20 namespace CudaMgr_Namespace {
33 const int8_t* host_ptr,
34 const size_t num_bytes,
40 const int8_t* device_ptr,
41 const size_t num_bytes,
48 const size_t num_bytes,
49 const int dest_device_num,
50 const int src_device_num,
70 const size_t num_bytes,
76 const unsigned char uc,
77 const size_t num_bytes,
void copyHostToDevice(int8_t *device_ptr, const int8_t *host_ptr, const size_t num_bytes, const int device_num, CUstream cuda_stream=0)
void setDeviceMem(int8_t *device_ptr, const unsigned char uc, const size_t num_bytes, const int device_num, CUstream cuda_stream=0)
int8_t * allocatePinnedHostMem(const size_t num_bytes)
void setContext(const int device_num) const
void copyDeviceToHost(int8_t *host_ptr, const int8_t *device_ptr, const size_t num_bytes, const int device_num, CUstream cuda_stream=0)
void freeDeviceMem(int8_t *device_ptr)
void copyDeviceToDevice(int8_t *dest_ptr, int8_t *src_ptr, const size_t num_bytes, const int dest_device_num, const int src_device_num, CUstream cuda_stream=0)
bool isArchMaxwellOrLaterForAll() const
void freePinnedHostMem(int8_t *host_ptr)
void synchronizeDevices() const
void zeroDeviceMem(int8_t *device_ptr, const size_t num_bytes, const int device_num, CUstream cuda_stream=0)
device_count_(device_count)
CudaMgr(const int num_gpus, const int start_gpu=0)
bool isArchVoltaOrGreaterForAll() const
int8_t * allocateDeviceMem(const size_t num_bytes, const int device_num)