32 namespace CudaMgr_Namespace {
46 class CudaErrorException :
public std::runtime_error {
50 CUresult getStatus()
const {
return status_; }
82 CudaMgr(
const int num_gpus,
const int start_gpu = 0);
91 const int8_t* host_ptr,
92 const size_t num_bytes,
93 const int device_num);
95 const int8_t* device_ptr,
96 const size_t num_bytes,
97 const int device_num);
100 const size_t num_bytes,
101 const int dest_device_num,
102 const int src_device_num);
108 void zeroDeviceMem(int8_t* device_ptr,
const size_t num_bytes,
const int device_num);
110 const unsigned char uc,
111 const size_t num_bytes,
112 const int device_num);
129 throw std::runtime_error(
"Specified device number " +
std::to_string(device_num) +
130 " is out of range of number of devices (" +
164 LOG(
WARNING) <<
"Unrecognized Nvidia device architecture, falling back to "
165 "Kepler-compatibility.";
175 switch (device_properties.computeMajor) {
183 if (device_properties.computeMinor == 0) {
203 void printDeviceProperties()
const;
205 const std::vector<CUcontext>& getDeviceContexts()
const {
return device_contexts_; }
206 const int getGpuDriverVersion()
const {
return gpu_driver_version_; }
208 void loadGpuModuleData(
CUmodule* module,
210 unsigned int num_options,
212 void** option_values,
213 const int device_id)
const;
214 void unloadGpuModuleData(
CUmodule* module,
const int device_id)
const;
216 struct CudaMemoryUsage {
221 static CudaMemoryUsage getCudaMemoryUsage();
226 void fillDeviceProperties();
227 void initDeviceGroup();
228 void createDeviceContexts();
229 size_t computeMinSharedMemoryPerBlockForAllDevices()
const;
230 size_t computeMinNumMPsForAllDevices()
const;
231 void checkError(
CUresult cu_result)
const;
233 int gpu_driver_version_;
size_t min_num_mps_for_all_devices
const omnisci::DeviceGroup & getDeviceGroup() const
void copyDeviceToHost(int8_t *host_ptr, const int8_t *device_ptr, const size_t num_bytes, const int device_num)
std::mutex device_cleanup_mutex_
int8_t * allocatePinnedHostMem(const size_t num_bytes)
void setContext(const int device_num) const
bool isArchPascalOrLater() const
size_t min_shared_memory_per_block_for_all_devices
void copyDeviceToDevice(int8_t *dest_ptr, int8_t *src_ptr, const size_t num_bytes, const int dest_device_num, const int src_device_num)
size_t getMinNumMPsForAllDevices() const
std::vector< CUcontext > device_contexts_
std::string get_cuda_home(void)
void freeDeviceMem(int8_t *device_ptr)
omnisci::DeviceGroup device_group_
std::string errorMessage(CUresult const status)
int getDeviceCount() const
size_t getMinSharedMemoryPerBlockForAllDevices() const
void setDeviceMem(int8_t *device_ptr, const unsigned char uc, const size_t num_bytes, const int device_num)
void copyHostToDevice(int8_t *device_ptr, const int8_t *host_ptr, const size_t num_bytes, const int device_num)
static std::string deviceArchToSM(const NvidiaDeviceArch arch)
bool isArchMaxwellOrLaterForAll() const
std::vector< DeviceProperties > device_properties_
void freePinnedHostMem(int8_t *host_ptr)
void synchronizeDevices() const
const DeviceProperties * getDeviceProperties(const size_t device_num) const
bool isArchMaxwell() const
bool isArchPascal() const
CudaMgr(const int num_gpus, const int start_gpu=0)
std::vector< DeviceIdentifier > DeviceGroup
const std::vector< DeviceProperties > & getAllDeviceProperties() const
bool isArchVoltaOrGreaterForAll() const
void zeroDeviceMem(int8_t *device_ptr, const size_t num_bytes, const int device_num)
NvidiaDeviceArch getDeviceArch() const
bool isArchMaxwellOrLater() const
int8_t * allocateDeviceMem(const size_t num_bytes, const int device_num)