OmniSciDB  ca0c39ec8f
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
CudaMgr_Namespace::CudaMgr Class Reference

#include <CudaMgr.h>

Public Member Functions

 CudaMgr (const int num_gpus, const int start_gpu=0)
 
 ~CudaMgr ()
 
void synchronizeDevices () const
 
int getDeviceCount () const
 
int getStartGpu () const
 
const heavyai::DeviceGroupgetDeviceGroup () const
 
void copyHostToDevice (int8_t *device_ptr, const int8_t *host_ptr, const size_t num_bytes, const int device_num, CUstream cuda_stream=0)
 
void copyDeviceToHost (int8_t *host_ptr, const int8_t *device_ptr, const size_t num_bytes, const int device_num, CUstream cuda_stream=0)
 
void copyDeviceToDevice (int8_t *dest_ptr, int8_t *src_ptr, const size_t num_bytes, const int dest_device_num, const int src_device_num, CUstream cuda_stream=0)
 
int8_t * allocatePinnedHostMem (const size_t num_bytes)
 
int8_t * allocateDeviceMem (const size_t num_bytes, const int device_num)
 
void freePinnedHostMem (int8_t *host_ptr)
 
void freeDeviceMem (int8_t *device_ptr)
 
void zeroDeviceMem (int8_t *device_ptr, const size_t num_bytes, const int device_num, CUstream cuda_stream=0)
 
void setDeviceMem (int8_t *device_ptr, const unsigned char uc, const size_t num_bytes, const int device_num, CUstream cuda_stream=0)
 
size_t getMinSharedMemoryPerBlockForAllDevices () const
 
size_t getMinNumMPsForAllDevices () const
 
const std::vector
< DeviceProperties > & 
getAllDeviceProperties () const
 
const DevicePropertiesgetDeviceProperties (const size_t device_num) const
 
bool isArchMaxwell () const
 
bool isArchMaxwellOrLater () const
 
bool isArchPascal () const
 
bool isArchPascalOrLater () const
 
bool isArchMaxwellOrLaterForAll () const
 
bool isArchVoltaOrGreaterForAll () const
 
NvidiaDeviceArch getDeviceArch () const
 
void setContext (const int device_num) const
 
int getContext () const
 

Static Public Member Functions

static std::string deviceArchToSM (const NvidiaDeviceArch arch)
 

Private Attributes

int device_count_
 
int start_gpu_
 
size_t min_shared_memory_per_block_for_all_devices
 
size_t min_num_mps_for_all_devices
 
std::vector< DevicePropertiesdevice_properties_
 
heavyai::DeviceGroup device_group_
 
std::vector< CUcontextdevice_contexts_
 
std::mutex device_cleanup_mutex_
 

Detailed Description

Definition at line 81 of file CudaMgr.h.

Constructor & Destructor Documentation

CudaMgr_Namespace::CudaMgr::CudaMgr ( const int  num_gpus,
const int  start_gpu = 0 
)

Definition at line 49 of file CudaMgr.cpp.

References CHECK_EQ, device_count_, logger::INFO, LOG, nvidia_jit_warmup(), setContext(), and start_gpu_.

50  : start_gpu_(start_gpu)
53  checkError(cuInit(0));
54  checkError(cuDeviceGetCount(&device_count_));
55 
56  if (num_gpus > 0) { // numGpus <= 0 will just use number of gpus found
57  device_count_ = std::min(device_count_, num_gpus);
58  } else {
59  // if we are using all gpus we cannot start on a gpu other than 0
60  CHECK_EQ(start_gpu_, 0);
61  }
62  fillDeviceProperties();
63  initDeviceGroup();
64  createDeviceContexts();
65  printDeviceProperties();
66 
67  // warm up the GPU JIT
68  LOG(INFO) << "Warming up the GPU JIT Compiler... (this may take several seconds)";
69  setContext(0);
71  LOG(INFO) << "GPU JIT Compiler initialized.";
72 }
#define CHECK_EQ(x, y)
Definition: Logger.h:230
size_t min_num_mps_for_all_devices
Definition: CudaMgr.h:248
#define LOG(tag)
Definition: Logger.h:216
void nvidia_jit_warmup()
void setContext(const int device_num) const
Definition: CudaMgr.cpp:405
size_t min_shared_memory_per_block_for_all_devices
Definition: CudaMgr.h:247

+ Here is the call graph for this function:

CudaMgr_Namespace::CudaMgr::~CudaMgr ( )

Definition at line 81 of file CudaMgr.cpp.

References device_cleanup_mutex_, device_contexts_, device_count_, logger::ERROR, LOG, and synchronizeDevices().

81  {
82  try {
83  // We don't want to remove the cudaMgr before all other processes have cleaned up.
84  // This should be enforced by the lifetime policies, but take this lock to be safe.
85  std::lock_guard<std::mutex> gpu_lock(device_cleanup_mutex_);
86 
88  for (int d = 0; d < device_count_; ++d) {
89  checkError(cuCtxDestroy(device_contexts_[d]));
90  }
91  } catch (const CudaErrorException& e) {
92  if (e.getStatus() == CUDA_ERROR_DEINITIALIZED) {
93  // TODO(adb / asuhan): Verify cuModuleUnload removes the context
94  return;
95  }
96  LOG(ERROR) << "CUDA Error: " << e.what();
97  } catch (const std::runtime_error& e) {
98  LOG(ERROR) << "CUDA Error: " << e.what();
99  }
100 }
std::mutex device_cleanup_mutex_
Definition: CudaMgr.h:253
#define LOG(tag)
Definition: Logger.h:216
std::vector< CUcontext > device_contexts_
Definition: CudaMgr.h:251
void synchronizeDevices() const
Definition: CudaMgr.cpp:102

+ Here is the call graph for this function:

Member Function Documentation

int8_t * CudaMgr_Namespace::CudaMgr::allocateDeviceMem ( const size_t  num_bytes,
const int  device_num 
)

Definition at line 288 of file CudaMgr.cpp.

References setContext().

Referenced by Buffer_Namespace::GpuCudaBufferMgr::addSlab().

288  {
289  setContext(device_num);
290  CUdeviceptr device_ptr;
291  checkError(cuMemAlloc(&device_ptr, num_bytes));
292  return reinterpret_cast<int8_t*>(device_ptr);
293 }
unsigned long long CUdeviceptr
Definition: nocuda.h:28
void setContext(const int device_num) const
Definition: CudaMgr.cpp:405

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int8_t * CudaMgr_Namespace::CudaMgr::allocatePinnedHostMem ( const size_t  num_bytes)

Definition at line 281 of file CudaMgr.cpp.

References setContext().

281  {
282  setContext(0);
283  void* host_ptr;
284  checkError(cuMemHostAlloc(&host_ptr, num_bytes, CU_MEMHOSTALLOC_PORTABLE));
285  return reinterpret_cast<int8_t*>(host_ptr);
286 }
void setContext(const int device_num) const
Definition: CudaMgr.cpp:405

+ Here is the call graph for this function:

void CudaMgr_Namespace::CudaMgr::copyDeviceToDevice ( int8_t *  dest_ptr,
int8_t *  src_ptr,
const size_t  num_bytes,
const int  dest_device_num,
const int  src_device_num,
CUstream  cuda_stream = 0 
)

Definition at line 143 of file CudaMgr.cpp.

References device_contexts_, and setContext().

Referenced by Buffer_Namespace::GpuCudaBuffer::readData(), and Buffer_Namespace::GpuCudaBuffer::writeData().

148  {
149  // dest_device_num and src_device_num are the device numbers relative to start_gpu_
150  // (real_device_num - start_gpu_)
151  if (src_device_num == dest_device_num) {
152  setContext(src_device_num);
153  if (!cuda_stream) {
154  checkError(cuMemcpy(reinterpret_cast<CUdeviceptr>(dest_ptr),
155  reinterpret_cast<CUdeviceptr>(src_ptr),
156  num_bytes));
157  } else {
158  checkError(cuMemcpyAsync(reinterpret_cast<CUdeviceptr>(dest_ptr),
159  reinterpret_cast<CUdeviceptr>(src_ptr),
160  num_bytes,
161  cuda_stream));
162  checkError(cuStreamSynchronize(cuda_stream));
163  }
164  } else {
165  if (!cuda_stream) {
166  checkError(cuMemcpyPeer(reinterpret_cast<CUdeviceptr>(dest_ptr),
167  device_contexts_[dest_device_num],
168  reinterpret_cast<CUdeviceptr>(src_ptr),
169  device_contexts_[src_device_num],
170  num_bytes)); // will we always have peer?
171  } else {
172  checkError(cuMemcpyPeerAsync(reinterpret_cast<CUdeviceptr>(dest_ptr),
173  device_contexts_[dest_device_num],
174  reinterpret_cast<CUdeviceptr>(src_ptr),
175  device_contexts_[src_device_num],
176  num_bytes,
177  cuda_stream)); // will we always have peer?
178  checkError(cuStreamSynchronize(cuda_stream));
179  }
180  }
181 }
void setContext(const int device_num) const
Definition: CudaMgr.cpp:405
std::vector< CUcontext > device_contexts_
Definition: CudaMgr.h:251

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void CudaMgr_Namespace::CudaMgr::copyDeviceToHost ( int8_t *  host_ptr,
const int8_t *  device_ptr,
const size_t  num_bytes,
const int  device_num,
CUstream  cuda_stream = 0 
)

Definition at line 125 of file CudaMgr.cpp.

References setContext().

Referenced by Buffer_Namespace::GpuCudaBuffer::readData(), and Buffer_Namespace::CpuBuffer::writeData().

129  {
130  setContext(device_num);
131  if (!cuda_stream) {
132  checkError(cuMemcpyDtoH(
133  host_ptr, reinterpret_cast<const CUdeviceptr>(device_ptr), num_bytes));
134  } else {
135  checkError(cuMemcpyDtoHAsync(host_ptr,
136  reinterpret_cast<const CUdeviceptr>(device_ptr),
137  num_bytes,
138  cuda_stream));
139  checkError(cuStreamSynchronize(cuda_stream));
140  }
141 }
void setContext(const int device_num) const
Definition: CudaMgr.cpp:405

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void CudaMgr_Namespace::CudaMgr::copyHostToDevice ( int8_t *  device_ptr,
const int8_t *  host_ptr,
const size_t  num_bytes,
const int  device_num,
CUstream  cuda_stream = 0 
)

Definition at line 109 of file CudaMgr.cpp.

References setContext().

Referenced by Buffer_Namespace::CpuBuffer::readData(), and Buffer_Namespace::GpuCudaBuffer::writeData().

113  {
114  setContext(device_num);
115  if (!cuda_stream) {
116  checkError(
117  cuMemcpyHtoD(reinterpret_cast<CUdeviceptr>(device_ptr), host_ptr, num_bytes));
118  } else {
119  checkError(cuMemcpyHtoDAsync(
120  reinterpret_cast<CUdeviceptr>(device_ptr), host_ptr, num_bytes, cuda_stream));
121  checkError(cuStreamSynchronize(cuda_stream));
122  }
123 }
void setContext(const int device_num) const
Definition: CudaMgr.cpp:405

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

static std::string CudaMgr_Namespace::CudaMgr::deviceArchToSM ( const NvidiaDeviceArch  arch)
inlinestatic

Definition at line 156 of file CudaMgr.h.

References CudaMgr_Namespace::Ampere, CudaMgr_Namespace::Kepler, LOG, CudaMgr_Namespace::Maxwell, CudaMgr_Namespace::Pascal, CudaMgr_Namespace::Turing, UNREACHABLE, CudaMgr_Namespace::Volta, and logger::WARNING.

156  {
157  // Must match ${CUDA_COMPILATION_ARCH} CMAKE flag
158  switch (arch) {
160  return "sm_35";
162  return "sm_50";
164  return "sm_60";
166  return "sm_70";
168  return "sm_75";
170  return "sm_75";
171  default:
172  LOG(WARNING) << "Unrecognized Nvidia device architecture, falling back to "
173  "Kepler-compatibility.";
174  return "sm_35";
175  }
176  UNREACHABLE();
177  return "";
178  }
#define LOG(tag)
Definition: Logger.h:216
#define UNREACHABLE()
Definition: Logger.h:266
void CudaMgr_Namespace::CudaMgr::freeDeviceMem ( int8_t *  device_ptr)

Definition at line 299 of file CudaMgr.cpp.

References device_cleanup_mutex_.

Referenced by Buffer_Namespace::GpuCudaBufferMgr::freeAllMem().

299  {
300  std::lock_guard<std::mutex> gpu_lock(device_cleanup_mutex_);
301 
302  checkError(cuMemFree(reinterpret_cast<CUdeviceptr>(device_ptr)));
303 }
std::mutex device_cleanup_mutex_
Definition: CudaMgr.h:253

+ Here is the caller graph for this function:

void CudaMgr_Namespace::CudaMgr::freePinnedHostMem ( int8_t *  host_ptr)

Definition at line 295 of file CudaMgr.cpp.

295  {
296  checkError(cuMemFreeHost(reinterpret_cast<void*>(host_ptr)));
297 }
const std::vector<DeviceProperties>& CudaMgr_Namespace::CudaMgr::getAllDeviceProperties ( ) const
inline

Definition at line 128 of file CudaMgr.h.

References device_properties_.

Referenced by Executor::blockSize(), Executor::deviceCycles(), and Executor::warpSize().

128  {
129  return device_properties_;
130  }
std::vector< DeviceProperties > device_properties_
Definition: CudaMgr.h:249

+ Here is the caller graph for this function:

int CudaMgr_Namespace::CudaMgr::getContext ( ) const

Definition at line 411 of file CudaMgr.cpp.

References device_contexts_.

Referenced by QueryEngine::getCudaStream(), and QueryEngine::QueryEngine().

411  {
412  CUcontext cnow;
413  checkError(cuCtxGetCurrent(&cnow));
414  if (cnow == NULL) {
415  throw std::runtime_error("no cuda device context");
416  }
417  int device_num{0};
418  for (auto& c : device_contexts_) {
419  if (c == cnow) {
420  return device_num;
421  }
422  ++device_num;
423  }
424  // TODO(sy): Change device_contexts_ to have O(1) lookup? (Or maybe not worth it.)
425  throw std::runtime_error("invalid cuda device context");
426 }
int CUcontext
Definition: nocuda.h:22
std::vector< CUcontext > device_contexts_
Definition: CudaMgr.h:251

+ Here is the caller graph for this function:

NvidiaDeviceArch CudaMgr_Namespace::CudaMgr::getDeviceArch ( ) const
inline

Definition at line 180 of file CudaMgr.h.

References CudaMgr_Namespace::Ampere, device_properties_, CudaMgr_Namespace::Kepler, CudaMgr_Namespace::Maxwell, CudaMgr_Namespace::Pascal, CudaMgr_Namespace::Turing, and CudaMgr_Namespace::Volta.

180  {
181  if (device_properties_.size() > 0) {
182  const auto& device_properties = device_properties_.front();
183  switch (device_properties.computeMajor) {
184  case 3:
186  case 5:
188  case 6:
190  case 7:
191  if (device_properties.computeMinor == 0) {
193  } else {
195  }
196  case 8:
198  default:
200  }
201  } else {
202  // always fallback to Kepler if an architecture cannot be detected
204  }
205  }
std::vector< DeviceProperties > device_properties_
Definition: CudaMgr.h:249
int CudaMgr_Namespace::CudaMgr::getDeviceCount ( ) const
inline

Definition at line 87 of file CudaMgr.h.

References device_count_.

Referenced by Executor::deviceCount(), get_available_gpus(), isArchMaxwell(), isArchMaxwellOrLater(), isArchPascal(), isArchPascalOrLater(), and QueryEngine::QueryEngine().

87 { return device_count_; }

+ Here is the caller graph for this function:

const heavyai::DeviceGroup& CudaMgr_Namespace::CudaMgr::getDeviceGroup ( ) const
inline

Definition at line 89 of file CudaMgr.h.

References device_group_.

89 { return device_group_; }
heavyai::DeviceGroup device_group_
Definition: CudaMgr.h:250
const DeviceProperties* CudaMgr_Namespace::CudaMgr::getDeviceProperties ( const size_t  device_num) const
inline

Definition at line 131 of file CudaMgr.h.

References device_properties_, and to_string().

131  {
132  // device_num is the device number relative to start_gpu_ (real_device_num -
133  // start_gpu_)
134  if (device_num < device_properties_.size()) {
135  return &device_properties_[device_num];
136  }
137  throw std::runtime_error("Specified device number " + std::to_string(device_num) +
138  " is out of range of number of devices (" +
139  std::to_string(device_properties_.size()) + ")");
140  }
std::string to_string(char const *&&v)
std::vector< DeviceProperties > device_properties_
Definition: CudaMgr.h:249

+ Here is the call graph for this function:

size_t CudaMgr_Namespace::CudaMgr::getMinNumMPsForAllDevices ( ) const
inline

Definition at line 126 of file CudaMgr.h.

References min_num_mps_for_all_devices.

126 { return min_num_mps_for_all_devices; }
size_t min_num_mps_for_all_devices
Definition: CudaMgr.h:248
size_t CudaMgr_Namespace::CudaMgr::getMinSharedMemoryPerBlockForAllDevices ( ) const
inline

Definition at line 122 of file CudaMgr.h.

References min_shared_memory_per_block_for_all_devices.

122  {
124  }
size_t min_shared_memory_per_block_for_all_devices
Definition: CudaMgr.h:247
int CudaMgr_Namespace::CudaMgr::getStartGpu ( ) const
inline

Definition at line 88 of file CudaMgr.h.

References start_gpu_.

88 { return start_gpu_; }
bool CudaMgr_Namespace::CudaMgr::isArchMaxwell ( ) const
inline

Definition at line 141 of file CudaMgr.h.

References device_properties_, and getDeviceCount().

141  {
142  return (getDeviceCount() > 0 && device_properties_[0].computeMajor == 5);
143  }
int getDeviceCount() const
Definition: CudaMgr.h:87
std::vector< DeviceProperties > device_properties_
Definition: CudaMgr.h:249

+ Here is the call graph for this function:

bool CudaMgr_Namespace::CudaMgr::isArchMaxwellOrLater ( ) const
inline

Definition at line 144 of file CudaMgr.h.

References device_properties_, and getDeviceCount().

144  {
145  return (getDeviceCount() > 0 && device_properties_[0].computeMajor >= 5);
146  }
int getDeviceCount() const
Definition: CudaMgr.h:87
std::vector< DeviceProperties > device_properties_
Definition: CudaMgr.h:249

+ Here is the call graph for this function:

bool CudaMgr_Namespace::CudaMgr::isArchMaxwellOrLaterForAll ( ) const

Returns true if all devices have Maxwell micro-architecture, or later. Returns false, if there is any device with compute capability of < 5.0

Definition at line 331 of file CudaMgr.cpp.

References device_count_, and device_properties_.

331  {
332  for (int i = 0; i < device_count_; i++) {
333  if (device_properties_[i].computeMajor < 5) {
334  return false;
335  }
336  }
337  return true;
338 }
std::vector< DeviceProperties > device_properties_
Definition: CudaMgr.h:249
bool CudaMgr_Namespace::CudaMgr::isArchPascal ( ) const
inline

Definition at line 147 of file CudaMgr.h.

References device_properties_, and getDeviceCount().

147  {
148  return (getDeviceCount() > 0 && device_properties_[0].computeMajor == 6);
149  }
int getDeviceCount() const
Definition: CudaMgr.h:87
std::vector< DeviceProperties > device_properties_
Definition: CudaMgr.h:249

+ Here is the call graph for this function:

bool CudaMgr_Namespace::CudaMgr::isArchPascalOrLater ( ) const
inline

Definition at line 150 of file CudaMgr.h.

References device_properties_, and getDeviceCount().

Referenced by Executor::isArchPascalOrLater().

150  {
151  return (getDeviceCount() > 0 && device_properties_[0].computeMajor >= 6);
152  }
int getDeviceCount() const
Definition: CudaMgr.h:87
std::vector< DeviceProperties > device_properties_
Definition: CudaMgr.h:249

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool CudaMgr_Namespace::CudaMgr::isArchVoltaOrGreaterForAll ( ) const

Returns true if all devices have Volta micro-architecture Returns false, if there is any non-Volta device available.

Definition at line 344 of file CudaMgr.cpp.

References device_count_, and device_properties_.

344  {
345  for (int i = 0; i < device_count_; i++) {
346  if (device_properties_[i].computeMajor < 7) {
347  return false;
348  }
349  }
350  return true;
351 }
std::vector< DeviceProperties > device_properties_
Definition: CudaMgr.h:249
void CudaMgr_Namespace::CudaMgr::setContext ( const int  device_num) const

Definition at line 405 of file CudaMgr.cpp.

References CHECK_LT, and device_contexts_.

Referenced by allocateDeviceMem(), allocatePinnedHostMem(), copyDeviceToDevice(), copyDeviceToHost(), copyHostToDevice(), CudaMgr(), QueryEngine::QueryEngine(), setDeviceMem(), and synchronizeDevices().

405  {
406  // deviceNum is the device number relative to startGpu (realDeviceNum - startGpu_)
407  CHECK_LT(device_num, device_count_);
408  cuCtxSetCurrent(device_contexts_[device_num]);
409 }
std::vector< CUcontext > device_contexts_
Definition: CudaMgr.h:251
#define CHECK_LT(x, y)
Definition: Logger.h:232

+ Here is the caller graph for this function:

void CudaMgr_Namespace::CudaMgr::setDeviceMem ( int8_t *  device_ptr,
const unsigned char  uc,
const size_t  num_bytes,
const int  device_num,
CUstream  cuda_stream = 0 
)

Definition at line 312 of file CudaMgr.cpp.

References setContext().

Referenced by zeroDeviceMem().

316  {
317  setContext(device_num);
318  if (!cuda_stream) {
319  checkError(cuMemsetD8(reinterpret_cast<CUdeviceptr>(device_ptr), uc, num_bytes));
320  } else {
321  checkError(cuMemsetD8Async(
322  reinterpret_cast<CUdeviceptr>(device_ptr), uc, num_bytes, cuda_stream));
323  checkError(cuStreamSynchronize(cuda_stream));
324  }
325 }
void setContext(const int device_num) const
Definition: CudaMgr.cpp:405

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void CudaMgr_Namespace::CudaMgr::synchronizeDevices ( ) const

Definition at line 102 of file CudaMgr.cpp.

References device_count_, and setContext().

Referenced by ~CudaMgr(), and Buffer_Namespace::GpuCudaBufferMgr::~GpuCudaBufferMgr().

102  {
103  for (int d = 0; d < device_count_; ++d) {
104  setContext(d);
105  checkError(cuCtxSynchronize());
106  }
107 }
void setContext(const int device_num) const
Definition: CudaMgr.cpp:405

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void CudaMgr_Namespace::CudaMgr::zeroDeviceMem ( int8_t *  device_ptr,
const size_t  num_bytes,
const int  device_num,
CUstream  cuda_stream = 0 
)

Definition at line 305 of file CudaMgr.cpp.

References setDeviceMem().

308  {
309  setDeviceMem(device_ptr, 0, num_bytes, device_num, cuda_stream);
310 }
void setDeviceMem(int8_t *device_ptr, const unsigned char uc, const size_t num_bytes, const int device_num, CUstream cuda_stream=0)
Definition: CudaMgr.cpp:312

+ Here is the call graph for this function:

Member Data Documentation

std::mutex CudaMgr_Namespace::CudaMgr::device_cleanup_mutex_
mutableprivate

Definition at line 253 of file CudaMgr.h.

Referenced by freeDeviceMem(), and ~CudaMgr().

std::vector<CUcontext> CudaMgr_Namespace::CudaMgr::device_contexts_
private

Definition at line 251 of file CudaMgr.h.

Referenced by copyDeviceToDevice(), getContext(), setContext(), and ~CudaMgr().

int CudaMgr_Namespace::CudaMgr::device_count_
private
heavyai::DeviceGroup CudaMgr_Namespace::CudaMgr::device_group_
private

Definition at line 250 of file CudaMgr.h.

Referenced by getDeviceGroup().

size_t CudaMgr_Namespace::CudaMgr::min_num_mps_for_all_devices
private

Definition at line 248 of file CudaMgr.h.

Referenced by getMinNumMPsForAllDevices().

size_t CudaMgr_Namespace::CudaMgr::min_shared_memory_per_block_for_all_devices
private

Definition at line 247 of file CudaMgr.h.

Referenced by getMinSharedMemoryPerBlockForAllDevices().

int CudaMgr_Namespace::CudaMgr::start_gpu_
private

Definition at line 246 of file CudaMgr.h.

Referenced by CudaMgr(), and getStartGpu().


The documentation for this class was generated from the following files: