OmniSciDB
0264ff685a
|
#include <CudaMgr.h>
Public Member Functions | |
CudaMgr (const int num_gpus, const int start_gpu=0) | |
~CudaMgr () | |
void | synchronizeDevices () const |
int | getDeviceCount () const |
int | getStartGpu () const |
const omnisci::DeviceGroup & | getDeviceGroup () const |
void | copyHostToDevice (int8_t *device_ptr, const int8_t *host_ptr, const size_t num_bytes, const int device_num) |
void | copyDeviceToHost (int8_t *host_ptr, const int8_t *device_ptr, const size_t num_bytes, const int device_num) |
void | copyDeviceToDevice (int8_t *dest_ptr, int8_t *src_ptr, const size_t num_bytes, const int dest_device_num, const int src_device_num) |
int8_t * | allocatePinnedHostMem (const size_t num_bytes) |
int8_t * | allocateDeviceMem (const size_t num_bytes, const int device_num) |
void | freePinnedHostMem (int8_t *host_ptr) |
void | freeDeviceMem (int8_t *device_ptr) |
void | zeroDeviceMem (int8_t *device_ptr, const size_t num_bytes, const int device_num) |
void | setDeviceMem (int8_t *device_ptr, const unsigned char uc, const size_t num_bytes, const int device_num) |
size_t | getMinSharedMemoryPerBlockForAllDevices () const |
size_t | getMinNumMPsForAllDevices () const |
const std::vector< DeviceProperties > & | getAllDeviceProperties () const |
const DeviceProperties * | getDeviceProperties (const size_t device_num) const |
bool | isArchMaxwell () const |
bool | isArchMaxwellOrLater () const |
bool | isArchPascal () const |
bool | isArchPascalOrLater () const |
bool | isArchMaxwellOrLaterForAll () const |
bool | isArchVoltaOrGreaterForAll () const |
NvidiaDeviceArch | getDeviceArch () const |
void | setContext (const int device_num) const |
Static Public Member Functions | |
static std::string | deviceArchToSM (const NvidiaDeviceArch arch) |
Private Attributes | |
int | device_count_ |
int | start_gpu_ |
size_t | min_shared_memory_per_block_for_all_devices |
size_t | min_num_mps_for_all_devices |
std::vector< DeviceProperties > | device_properties_ |
omnisci::DeviceGroup | device_group_ |
std::vector< CUcontext > | device_contexts_ |
std::mutex | device_cleanup_mutex_ |
CudaMgr_Namespace::CudaMgr::CudaMgr | ( | const int | num_gpus, |
const int | start_gpu = 0 |
||
) |
Definition at line 48 of file CudaMgr.cpp.
References CHECK_EQ, CHECK_LE, device_count_, device_group_, device_properties_, logger::INFO, LOG, nvidia_jit_warmup(), setContext(), and start_gpu_.
CudaMgr_Namespace::CudaMgr::~CudaMgr | ( | ) |
Definition at line 81 of file CudaMgr.cpp.
References device_cleanup_mutex_, device_contexts_, device_count_, logger::ERROR, LOG, and synchronizeDevices().
int8_t * CudaMgr_Namespace::CudaMgr::allocateDeviceMem | ( | const size_t | num_bytes, |
const int | device_num | ||
) |
Definition at line 253 of file CudaMgr.cpp.
References setContext().
Referenced by Buffer_Namespace::GpuCudaBufferMgr::addSlab().
int8_t * CudaMgr_Namespace::CudaMgr::allocatePinnedHostMem | ( | const size_t | num_bytes | ) |
Definition at line 246 of file CudaMgr.cpp.
References setContext().
void CudaMgr_Namespace::CudaMgr::copyDeviceToDevice | ( | int8_t * | dest_ptr, |
int8_t * | src_ptr, | ||
const size_t | num_bytes, | ||
const int | dest_device_num, | ||
const int | src_device_num | ||
) |
Definition at line 127 of file CudaMgr.cpp.
References CHECK, device_cleanup_mutex_, device_contexts_, device_count_, device_properties_, logger::ERROR, LOG, min_num_mps_for_all_devices, min_shared_memory_per_block_for_all_devices, setContext(), and start_gpu_.
Referenced by Buffer_Namespace::GpuCudaBuffer::readData(), and Buffer_Namespace::GpuCudaBuffer::writeData().
void CudaMgr_Namespace::CudaMgr::copyDeviceToHost | ( | int8_t * | host_ptr, |
const int8_t * | device_ptr, | ||
const size_t | num_bytes, | ||
const int | device_num | ||
) |
Definition at line 118 of file CudaMgr.cpp.
References setContext().
Referenced by Buffer_Namespace::GpuCudaBuffer::readData(), and Buffer_Namespace::CpuBuffer::writeData().
void CudaMgr_Namespace::CudaMgr::copyHostToDevice | ( | int8_t * | device_ptr, |
const int8_t * | host_ptr, | ||
const size_t | num_bytes, | ||
const int | device_num | ||
) |
Definition at line 109 of file CudaMgr.cpp.
References setContext().
Referenced by Buffer_Namespace::CpuBuffer::readData(), and Buffer_Namespace::GpuCudaBuffer::writeData().
|
inlinestatic |
Definition at line 148 of file CudaMgr.h.
References CudaMgr_Namespace::Ampere, CudaMgr_Namespace::Kepler, LOG, CudaMgr_Namespace::Maxwell, CudaMgr_Namespace::Pascal, CudaMgr_Namespace::Turing, UNREACHABLE, CudaMgr_Namespace::Volta, and logger::WARNING.
Referenced by UdfCompiler::compileToGpuByteCode(), and CodeGenerator::initializeNVPTXBackend().
void CudaMgr_Namespace::CudaMgr::freeDeviceMem | ( | int8_t * | device_ptr | ) |
Definition at line 264 of file CudaMgr.cpp.
References device_cleanup_mutex_.
Referenced by Buffer_Namespace::GpuCudaBufferMgr::freeAllMem().
void CudaMgr_Namespace::CudaMgr::freePinnedHostMem | ( | int8_t * | host_ptr | ) |
Definition at line 260 of file CudaMgr.cpp.
|
inline |
Definition at line 120 of file CudaMgr.h.
Referenced by Executor::blockSize().
|
inline |
Definition at line 172 of file CudaMgr.h.
References CudaMgr_Namespace::Ampere, CudaMgr_Namespace::Kepler, CudaMgr_Namespace::Maxwell, CudaMgr_Namespace::Pascal, CudaMgr_Namespace::Turing, and CudaMgr_Namespace::Volta.
|
inline |
Definition at line 86 of file CudaMgr.h.
Referenced by CodeGenerator::generateNativeGPUCode(), and get_available_gpus().
|
inline |
Definition at line 88 of file CudaMgr.h.
|
inline |
Definition at line 123 of file CudaMgr.h.
References to_string().
|
inline |
Definition at line 118 of file CudaMgr.h.
|
inline |
Definition at line 114 of file CudaMgr.h.
Referenced by anonymous_namespace{NativeCodegen.cpp}::is_gpu_shared_mem_supported().
|
inline |
|
inline |
Definition at line 133 of file CudaMgr.h.
|
inline |
Definition at line 136 of file CudaMgr.h.
bool CudaMgr_Namespace::CudaMgr::isArchMaxwellOrLaterForAll | ( | ) | const |
Returns true if all devices have Maxwell micro-architecture, or later. Returns false, if there is any device with compute capability of < 5.0
Definition at line 288 of file CudaMgr.cpp.
References device_count_, and device_properties_.
Referenced by anonymous_namespace{NativeCodegen.cpp}::is_gpu_shared_mem_supported().
|
inline |
Definition at line 139 of file CudaMgr.h.
|
inline |
Definition at line 142 of file CudaMgr.h.
bool CudaMgr_Namespace::CudaMgr::isArchVoltaOrGreaterForAll | ( | ) | const |
Returns true if all devices have Volta micro-architecture Returns false, if there is any non-Volta device available.
Definition at line 301 of file CudaMgr.cpp.
References CHECK_EQ, device_contexts_, device_count_, device_properties_, logger::ERROR, CudaMgr_Namespace::errorMessage(), and LOG.
void CudaMgr_Namespace::CudaMgr::setContext | ( | const int | device_num | ) | const |
Definition at line 362 of file CudaMgr.cpp.
References CHECK_LT, device_contexts_, device_count_, device_properties_, logger::INFO, LOG, and VLOG.
Referenced by allocateDeviceMem(), allocatePinnedHostMem(), copyDeviceToDevice(), copyDeviceToHost(), copyHostToDevice(), CudaMgr(), setDeviceMem(), and synchronizeDevices().
void CudaMgr_Namespace::CudaMgr::setDeviceMem | ( | int8_t * | device_ptr, |
const unsigned char | uc, | ||
const size_t | num_bytes, | ||
const int | device_num | ||
) |
Definition at line 276 of file CudaMgr.cpp.
References setContext().
Referenced by zeroDeviceMem().
void CudaMgr_Namespace::CudaMgr::synchronizeDevices | ( | ) | const |
Definition at line 102 of file CudaMgr.cpp.
References device_count_, and setContext().
Referenced by ~CudaMgr(), and Buffer_Namespace::GpuCudaBufferMgr::~GpuCudaBufferMgr().
void CudaMgr_Namespace::CudaMgr::zeroDeviceMem | ( | int8_t * | device_ptr, |
const size_t | num_bytes, | ||
const int | device_num | ||
) |
Definition at line 270 of file CudaMgr.cpp.
References setDeviceMem().
Referenced by ResultSet::ResultSet().
|
mutableprivate |
Definition at line 244 of file CudaMgr.h.
Referenced by copyDeviceToDevice(), freeDeviceMem(), and ~CudaMgr().
|
private |
Definition at line 242 of file CudaMgr.h.
Referenced by copyDeviceToDevice(), isArchVoltaOrGreaterForAll(), setContext(), and ~CudaMgr().
|
private |
Definition at line 236 of file CudaMgr.h.
Referenced by copyDeviceToDevice(), CudaMgr(), isArchMaxwellOrLaterForAll(), isArchVoltaOrGreaterForAll(), setContext(), synchronizeDevices(), and ~CudaMgr().
|
private |
|
private |
Definition at line 240 of file CudaMgr.h.
Referenced by copyDeviceToDevice(), CudaMgr(), isArchMaxwellOrLaterForAll(), isArchVoltaOrGreaterForAll(), and setContext().
|
private |
Definition at line 239 of file CudaMgr.h.
Referenced by copyDeviceToDevice().
|
private |
Definition at line 238 of file CudaMgr.h.
Referenced by copyDeviceToDevice().
|
private |
Definition at line 237 of file CudaMgr.h.
Referenced by copyDeviceToDevice(), and CudaMgr().