OmniSciDB
c1a53651b2
|
#include <CudaMgr.h>
Public Member Functions | |
CudaMgr (const int num_gpus, const int start_gpu=0) | |
~CudaMgr () | |
void | synchronizeDevices () const |
int | getDeviceCount () const |
int | getStartGpu () const |
const heavyai::DeviceGroup & | getDeviceGroup () const |
void | copyHostToDevice (int8_t *device_ptr, const int8_t *host_ptr, const size_t num_bytes, const int device_num, CUstream cuda_stream=0) |
void | copyDeviceToHost (int8_t *host_ptr, const int8_t *device_ptr, const size_t num_bytes, const int device_num, CUstream cuda_stream=0) |
void | copyDeviceToDevice (int8_t *dest_ptr, int8_t *src_ptr, const size_t num_bytes, const int dest_device_num, const int src_device_num, CUstream cuda_stream=0) |
int8_t * | allocatePinnedHostMem (const size_t num_bytes) |
int8_t * | allocateDeviceMem (const size_t num_bytes, const int device_num) |
void | freePinnedHostMem (int8_t *host_ptr) |
void | freeDeviceMem (int8_t *device_ptr) |
void | zeroDeviceMem (int8_t *device_ptr, const size_t num_bytes, const int device_num, CUstream cuda_stream=0) |
void | setDeviceMem (int8_t *device_ptr, const unsigned char uc, const size_t num_bytes, const int device_num, CUstream cuda_stream=0) |
size_t | getMinSharedMemoryPerBlockForAllDevices () const |
size_t | getMinNumMPsForAllDevices () const |
const std::vector < DeviceProperties > & | getAllDeviceProperties () const |
const DeviceProperties * | getDeviceProperties (const size_t device_num) const |
bool | isArchMaxwell () const |
bool | isArchMaxwellOrLater () const |
bool | isArchPascal () const |
bool | isArchPascalOrLater () const |
bool | isArchMaxwellOrLaterForAll () const |
bool | isArchVoltaOrGreaterForAll () const |
NvidiaDeviceArch | getDeviceArch () const |
void | setContext (const int device_num) const |
int | getContext () const |
Static Public Member Functions | |
static std::string | deviceArchToSM (const NvidiaDeviceArch arch) |
Private Attributes | |
int | device_count_ |
int | start_gpu_ |
size_t | min_shared_memory_per_block_for_all_devices |
size_t | min_num_mps_for_all_devices |
std::vector< DeviceProperties > | device_properties_ |
heavyai::DeviceGroup | device_group_ |
std::vector< CUcontext > | device_contexts_ |
std::mutex | device_cleanup_mutex_ |
CudaMgr_Namespace::CudaMgr::CudaMgr | ( | const int | num_gpus, |
const int | start_gpu = 0 |
||
) |
Definition at line 49 of file CudaMgr.cpp.
References CHECK_EQ, device_count_, logger::INFO, LOG, nvidia_jit_warmup(), setContext(), and start_gpu_.
CudaMgr_Namespace::CudaMgr::~CudaMgr | ( | ) |
Definition at line 81 of file CudaMgr.cpp.
References device_cleanup_mutex_, device_contexts_, device_count_, logger::ERROR, LOG, and synchronizeDevices().
int8_t * CudaMgr_Namespace::CudaMgr::allocateDeviceMem | ( | const size_t | num_bytes, |
const int | device_num | ||
) |
Definition at line 288 of file CudaMgr.cpp.
References setContext().
Referenced by Buffer_Namespace::GpuCudaBufferMgr::addSlab().
int8_t * CudaMgr_Namespace::CudaMgr::allocatePinnedHostMem | ( | const size_t | num_bytes | ) |
Definition at line 281 of file CudaMgr.cpp.
References setContext().
void CudaMgr_Namespace::CudaMgr::copyDeviceToDevice | ( | int8_t * | dest_ptr, |
int8_t * | src_ptr, | ||
const size_t | num_bytes, | ||
const int | dest_device_num, | ||
const int | src_device_num, | ||
CUstream | cuda_stream = 0 |
||
) |
Definition at line 143 of file CudaMgr.cpp.
References device_contexts_, and setContext().
Referenced by Buffer_Namespace::GpuCudaBuffer::readData(), and Buffer_Namespace::GpuCudaBuffer::writeData().
void CudaMgr_Namespace::CudaMgr::copyDeviceToHost | ( | int8_t * | host_ptr, |
const int8_t * | device_ptr, | ||
const size_t | num_bytes, | ||
const int | device_num, | ||
CUstream | cuda_stream = 0 |
||
) |
Definition at line 125 of file CudaMgr.cpp.
References setContext().
Referenced by Buffer_Namespace::GpuCudaBuffer::readData(), and Buffer_Namespace::CpuBuffer::writeData().
void CudaMgr_Namespace::CudaMgr::copyHostToDevice | ( | int8_t * | device_ptr, |
const int8_t * | host_ptr, | ||
const size_t | num_bytes, | ||
const int | device_num, | ||
CUstream | cuda_stream = 0 |
||
) |
Definition at line 109 of file CudaMgr.cpp.
References setContext().
Referenced by Buffer_Namespace::CpuBuffer::readData(), and Buffer_Namespace::GpuCudaBuffer::writeData().
|
inlinestatic |
Definition at line 156 of file CudaMgr.h.
References CudaMgr_Namespace::Ampere, CudaMgr_Namespace::Kepler, LOG, CudaMgr_Namespace::Maxwell, CudaMgr_Namespace::Pascal, CudaMgr_Namespace::Turing, UNREACHABLE, CudaMgr_Namespace::Volta, and logger::WARNING.
void CudaMgr_Namespace::CudaMgr::freeDeviceMem | ( | int8_t * | device_ptr | ) |
Definition at line 299 of file CudaMgr.cpp.
References device_cleanup_mutex_.
Referenced by Buffer_Namespace::GpuCudaBufferMgr::freeAllMem().
void CudaMgr_Namespace::CudaMgr::freePinnedHostMem | ( | int8_t * | host_ptr | ) |
Definition at line 295 of file CudaMgr.cpp.
|
inline |
Definition at line 128 of file CudaMgr.h.
References device_properties_.
Referenced by Executor::blockSize(), Executor::deviceCycles(), and Executor::warpSize().
int CudaMgr_Namespace::CudaMgr::getContext | ( | ) | const |
Definition at line 411 of file CudaMgr.cpp.
References device_contexts_.
Referenced by QueryEngine::getCudaStream(), and QueryEngine::QueryEngine().
|
inline |
Definition at line 180 of file CudaMgr.h.
References CudaMgr_Namespace::Ampere, device_properties_, CudaMgr_Namespace::Kepler, CudaMgr_Namespace::Maxwell, CudaMgr_Namespace::Pascal, CudaMgr_Namespace::Turing, and CudaMgr_Namespace::Volta.
|
inline |
Definition at line 87 of file CudaMgr.h.
References device_count_.
Referenced by Executor::deviceCount(), get_available_gpus(), isArchMaxwell(), isArchMaxwellOrLater(), isArchPascal(), isArchPascalOrLater(), and QueryEngine::QueryEngine().
|
inline |
Definition at line 89 of file CudaMgr.h.
References device_group_.
|
inline |
Definition at line 131 of file CudaMgr.h.
References device_properties_, and to_string().
|
inline |
Definition at line 126 of file CudaMgr.h.
References min_num_mps_for_all_devices.
|
inline |
Definition at line 122 of file CudaMgr.h.
References min_shared_memory_per_block_for_all_devices.
|
inline |
|
inline |
Definition at line 141 of file CudaMgr.h.
References device_properties_, and getDeviceCount().
|
inline |
Definition at line 144 of file CudaMgr.h.
References device_properties_, and getDeviceCount().
bool CudaMgr_Namespace::CudaMgr::isArchMaxwellOrLaterForAll | ( | ) | const |
Returns true if all devices have Maxwell micro-architecture, or later. Returns false, if there is any device with compute capability of < 5.0
Definition at line 331 of file CudaMgr.cpp.
References device_count_, and device_properties_.
|
inline |
Definition at line 147 of file CudaMgr.h.
References device_properties_, and getDeviceCount().
|
inline |
Definition at line 150 of file CudaMgr.h.
References device_properties_, and getDeviceCount().
Referenced by Executor::isArchPascalOrLater().
bool CudaMgr_Namespace::CudaMgr::isArchVoltaOrGreaterForAll | ( | ) | const |
Returns true if all devices have Volta micro-architecture Returns false, if there is any non-Volta device available.
Definition at line 344 of file CudaMgr.cpp.
References device_count_, and device_properties_.
void CudaMgr_Namespace::CudaMgr::setContext | ( | const int | device_num | ) | const |
Definition at line 405 of file CudaMgr.cpp.
References CHECK_LT, and device_contexts_.
Referenced by allocateDeviceMem(), allocatePinnedHostMem(), copyDeviceToDevice(), copyDeviceToHost(), copyHostToDevice(), CudaMgr(), QueryEngine::QueryEngine(), setDeviceMem(), and synchronizeDevices().
void CudaMgr_Namespace::CudaMgr::setDeviceMem | ( | int8_t * | device_ptr, |
const unsigned char | uc, | ||
const size_t | num_bytes, | ||
const int | device_num, | ||
CUstream | cuda_stream = 0 |
||
) |
Definition at line 312 of file CudaMgr.cpp.
References setContext().
Referenced by zeroDeviceMem().
void CudaMgr_Namespace::CudaMgr::synchronizeDevices | ( | ) | const |
Definition at line 102 of file CudaMgr.cpp.
References device_count_, and setContext().
Referenced by ~CudaMgr(), and Buffer_Namespace::GpuCudaBufferMgr::~GpuCudaBufferMgr().
void CudaMgr_Namespace::CudaMgr::zeroDeviceMem | ( | int8_t * | device_ptr, |
const size_t | num_bytes, | ||
const int | device_num, | ||
CUstream | cuda_stream = 0 |
||
) |
Definition at line 305 of file CudaMgr.cpp.
References setDeviceMem().
|
mutableprivate |
Definition at line 253 of file CudaMgr.h.
Referenced by freeDeviceMem(), and ~CudaMgr().
|
private |
Definition at line 251 of file CudaMgr.h.
Referenced by copyDeviceToDevice(), getContext(), setContext(), and ~CudaMgr().
|
private |
Definition at line 245 of file CudaMgr.h.
Referenced by CudaMgr(), getDeviceCount(), isArchMaxwellOrLaterForAll(), isArchVoltaOrGreaterForAll(), synchronizeDevices(), and ~CudaMgr().
|
private |
Definition at line 250 of file CudaMgr.h.
Referenced by getDeviceGroup().
|
private |
Definition at line 249 of file CudaMgr.h.
Referenced by getAllDeviceProperties(), getDeviceArch(), getDeviceProperties(), isArchMaxwell(), isArchMaxwellOrLater(), isArchMaxwellOrLaterForAll(), isArchPascal(), isArchPascalOrLater(), and isArchVoltaOrGreaterForAll().
|
private |
Definition at line 248 of file CudaMgr.h.
Referenced by getMinNumMPsForAllDevices().
|
private |
Definition at line 247 of file CudaMgr.h.
Referenced by getMinSharedMemoryPerBlockForAllDevices().
|
private |
Definition at line 246 of file CudaMgr.h.
Referenced by CudaMgr(), and getStartGpu().