OmniSciDB  0bd2ec9cf4
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
CudaMgr.h
Go to the documentation of this file.
1 /*
2  * Copyright 2018 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #pragma once
17 
18 #include <cstdlib>
19 #include <mutex>
20 #include <string>
21 #include <vector>
22 
23 #include "Shared/uuid.h"
24 
25 #ifdef HAVE_CUDA
26 #include <cuda.h>
27 #else
28 #include "Shared/nocuda.h"
29 #endif // HAVE_CUDA
30 
31 namespace omnisci {
33  const int index;
34  const int cuda_id;
35  const UUID uuid;
36 };
37 
38 using DeviceGroup = std::vector<DeviceIdentifier>;
39 } // namespace omnisci
40 
41 namespace CudaMgr_Namespace {
42 
43 #ifdef HAVE_CUDA
44 std::string errorMessage(CUresult const);
45 
46 class CudaErrorException : public std::runtime_error {
47  public:
48  CudaErrorException(CUresult status)
49  : std::runtime_error(errorMessage(status)), status_(status) {}
50 
51  CUresult getStatus() const { return status_; }
52 
53  private:
54  CUresult const status_;
55 };
56 #endif
57 
63  size_t globalMem;
67  int numMPs;
68  int warpSize;
72  int pciBusId;
75  int memoryBusWidth; // in bits
77  int clockKhz;
78  int numCore;
79  std::string arch;
80 };
81 
82 class CudaMgr {
83  public:
84  CudaMgr(const int num_gpus, const int start_gpu = 0);
85  ~CudaMgr();
86 
87  void synchronizeDevices() const;
88  int getDeviceCount() const { return device_count_; }
89  int getStartGpu() const { return start_gpu_; }
91 
92  void copyHostToDevice(int8_t* device_ptr,
93  const int8_t* host_ptr,
94  const size_t num_bytes,
95  const int device_num);
96  void copyDeviceToHost(int8_t* host_ptr,
97  const int8_t* device_ptr,
98  const size_t num_bytes,
99  const int device_num);
100  void copyDeviceToDevice(int8_t* dest_ptr,
101  int8_t* src_ptr,
102  const size_t num_bytes,
103  const int dest_device_num,
104  const int src_device_num);
105 
106  int8_t* allocatePinnedHostMem(const size_t num_bytes);
107  int8_t* allocateDeviceMem(const size_t num_bytes, const int device_num);
108  void freePinnedHostMem(int8_t* host_ptr);
109  void freeDeviceMem(int8_t* device_ptr);
110  void zeroDeviceMem(int8_t* device_ptr, const size_t num_bytes, const int device_num);
111  void setDeviceMem(int8_t* device_ptr,
112  const unsigned char uc,
113  const size_t num_bytes,
114  const int device_num);
115 
117 
118  const std::vector<DeviceProperties>& getAllDeviceProperties() const {
119  return device_properties_;
120  }
121  const DeviceProperties* getDeviceProperties(const size_t device_num) const {
122  // device_num is the device number relative to start_gpu_ (real_device_num -
123  // start_gpu_)
124  if (device_num < device_properties_.size()) {
125  return &device_properties_[device_num];
126  }
127  throw std::runtime_error("Specified device number " + std::to_string(device_num) +
128  " is out of range of number of devices (" +
129  std::to_string(device_properties_.size()) + ")");
130  }
131  inline bool isArchMaxwell() const {
132  return (getDeviceCount() > 0 && device_properties_[0].computeMajor == 5);
133  }
134  inline bool isArchMaxwellOrLater() const {
135  return (getDeviceCount() > 0 && device_properties_[0].computeMajor >= 5);
136  }
137  inline bool isArchPascal() const {
138  return (getDeviceCount() > 0 && device_properties_[0].computeMajor == 6);
139  }
140  inline bool isArchPascalOrLater() const {
141  return (getDeviceCount() > 0 && device_properties_[0].computeMajor >= 6);
142  }
143  bool isArchMaxwellOrLaterForAll() const;
144  bool isArchVoltaForAll() const;
145 
146  void setContext(const int device_num) const;
147 
148 #ifdef HAVE_CUDA
149  void printDeviceProperties() const;
150 
151  const std::vector<CUcontext>& getDeviceContexts() const { return device_contexts_; }
152  const int getGpuDriverVersion() const { return gpu_driver_version_; }
153 
154  void loadGpuModuleData(CUmodule* module,
155  const void* image,
156  unsigned int num_options,
157  CUjit_option* options,
158  void** option_values,
159  const int device_id) const;
160  void unloadGpuModuleData(CUmodule* module, const int device_id) const;
161 #endif
162 
163  private:
164 #ifdef HAVE_CUDA
165  void fillDeviceProperties();
166  void initDeviceGroup();
167  void createDeviceContexts();
168  size_t computeMaxSharedMemoryForAll() const;
169  void checkError(CUresult cu_result) const;
170 #endif
171 
176  std::vector<DeviceProperties> device_properties_;
178  std::vector<CUcontext> device_contexts_;
179 
180  mutable std::mutex device_cleanup_mutex_;
181 };
182 
183 } // Namespace CudaMgr_Namespace
std::unique_ptr< llvm::Module > module(runtime_module_shallow_copy(cgen_state))
int CUjit_option
Definition: nocuda.h:25
const omnisci::DeviceGroup & getDeviceGroup() const
Definition: CudaMgr.h:90
void copyDeviceToHost(int8_t *host_ptr, const int8_t *device_ptr, const size_t num_bytes, const int device_num)
Definition: CudaMgr.cpp:97
size_t getMaxSharedMemoryForAll() const
Definition: CudaMgr.h:116
std::mutex device_cleanup_mutex_
Definition: CudaMgr.h:180
int8_t * allocatePinnedHostMem(const size_t num_bytes)
Definition: CudaMgr.cpp:217
void setContext(const int device_num) const
Definition: CudaMgr.cpp:322
No-frills UUID type class to allow easy containerization and comparison of device UUIDs from differen...
bool isArchPascalOrLater() const
Definition: CudaMgr.h:140
void copyDeviceToDevice(int8_t *dest_ptr, int8_t *src_ptr, const size_t num_bytes, const int dest_device_num, const int src_device_num)
Definition: CudaMgr.cpp:106
int getStartGpu() const
Definition: CudaMgr.h:89
std::string to_string(char const *&&v)
std::vector< CUcontext > device_contexts_
Definition: CudaMgr.h:178
void freeDeviceMem(int8_t *device_ptr)
Definition: CudaMgr.cpp:235
omnisci::DeviceGroup device_group_
Definition: CudaMgr.h:177
std::string errorMessage(CUresult const status)
Definition: CudaMgr.cpp:28
int getDeviceCount() const
Definition: CudaMgr.h:88
const UUID uuid
UUID for device (hardware invariant)
Definition: CudaMgr.h:35
void setDeviceMem(int8_t *device_ptr, const unsigned char uc, const size_t num_bytes, const int device_num)
Definition: CudaMgr.cpp:247
void copyHostToDevice(int8_t *device_ptr, const int8_t *host_ptr, const size_t num_bytes, const int device_num)
Definition: CudaMgr.cpp:88
bool isArchMaxwellOrLaterForAll() const
Definition: CudaMgr.cpp:259
int CUresult
Definition: nocuda.h:21
std::vector< DeviceProperties > device_properties_
Definition: CudaMgr.h:176
size_t max_shared_memory_for_all_
Definition: CudaMgr.h:175
int CUdevice
Definition: nocuda.h:20
bool isArchVoltaForAll() const
Definition: CudaMgr.cpp:272
void freePinnedHostMem(int8_t *host_ptr)
Definition: CudaMgr.cpp:231
void synchronizeDevices() const
Definition: CudaMgr.cpp:81
const DeviceProperties * getDeviceProperties(const size_t device_num) const
Definition: CudaMgr.h:121
const int index
index into device group (currently num_gpus - start_gpu)
Definition: CudaMgr.h:33
bool isArchMaxwell() const
Definition: CudaMgr.h:131
bool isArchPascal() const
Definition: CudaMgr.h:137
CudaMgr(const int num_gpus, const int start_gpu=0)
Definition: CudaMgr.cpp:35
std::vector< DeviceIdentifier > DeviceGroup
Definition: CudaMgr.h:38
const std::vector< DeviceProperties > & getAllDeviceProperties() const
Definition: CudaMgr.h:118
void zeroDeviceMem(int8_t *device_ptr, const size_t num_bytes, const int device_num)
Definition: CudaMgr.cpp:241
bool isArchMaxwellOrLater() const
Definition: CudaMgr.h:134
void * CUmodule
Definition: nocuda.h:23
int8_t * allocateDeviceMem(const size_t num_bytes, const int device_num)
Definition: CudaMgr.cpp:224
const int cuda_id
Cuda ID for device (ignores start_gpu)
Definition: CudaMgr.h:34