OmniSciDB  04ee39c94c
CudaUtils.cu
Go to the documentation of this file.
1 #include <cuda.h>
2 #include "CudaUtils.h"
3 
4 namespace CudaUtils {
5 
6 template <typename T>
7 void allocGpuMem(T*& devMem,
8  const size_t numElems,
9  const size_t elemSize,
10  const int gpuNum) {
11  cudaSetDevice(gpuNum);
12  cudaMalloc((void**)&devMem, numElems * elemSize);
13 }
14 
15 template <typename T>
16 void allocPinnedHostMem(T*& hostMem, const size_t numElems, const size_t elemSize) {
17  cudaHostAlloc((void**)&hostMem, numElems * elemSize, cudaHostAllocPortable);
18 }
19 
20 template <typename T>
21 void copyToGpu(T* devMem,
22  const T* hostMem,
23  const size_t numElems,
24  const size_t elemSize,
25  const int gpuNum) {
26  cudaSetDevice(gpuNum);
27  cudaMemcpy(devMem, hostMem, numElems * elemSize, cudaMemcpyHostToDevice);
28 }
29 
30 template <typename T>
31 void copyGpuToGpu(T* dstMem,
32  const T* srcMem,
33  const std::size_t numElems,
34  const std::size_t elemSize,
35  const int dstGpuNum) {
36  cudaSetDevice(dstGpuNum);
37  cudaMemcpy(dstMem, srcMem, numElems * elemSize, cudaMemcpyDefault);
38 }
39 
40 template <typename T>
41 void copyToHost(T* hostMem,
42  const T* devMem,
43  const size_t numElems,
44  const size_t elemSize,
45  const int gpuNum) {
46  cudaSetDevice(gpuNum);
47  cudaMemcpy(hostMem, devMem, numElems * elemSize, cudaMemcpyDeviceToHost);
48 }
49 
50 template <typename T>
51 void gpuFree(T*& devMem) {
52  cudaFree(devMem);
53  devMem = 0;
54 }
55 
56 template <typename T>
57 void hostFree(T*& hostMem) {
58  cudaFreeHost(hostMem);
59  hostMem = 0;
60 }
61 
62 template void allocGpuMem<bool>(bool*& devMem,
63  const size_t numElems,
64  const size_t elemSize,
65  const int gpuNum);
66 template void allocGpuMem<char>(char*& devMem,
67  const size_t numElems,
68  const size_t elemSize,
69  const int gpuNum);
70 template void allocGpuMem<unsigned char>(unsigned char*& devMem,
71  const size_t numElems,
72  const size_t elemSize,
73  const int gpuNum);
74 template void allocGpuMem<int>(int*& devMem,
75  const size_t numElems,
76  const size_t elemSize,
77  const int gpuNum);
78 template void allocGpuMem<int8_t>(int8_t*& devMem,
79  const size_t numElems,
80  const size_t elemSize,
81  const int gpuNum);
82 template void allocGpuMem<unsigned int>(unsigned int*& devMem,
83  const size_t numElems,
84  const size_t elemSize,
85  const int gpuNum);
86 template void allocGpuMem<unsigned long>(unsigned long*& devMem,
87  const size_t numElems,
88  const size_t elemSize,
89  const int gpuNum);
90 template void allocGpuMem<unsigned long long int>(unsigned long long int*& devMem,
91  const size_t numElems,
92  const size_t elemSize,
93  const int gpuNum);
94 template void allocGpuMem<float>(float*& devMem,
95  const size_t numElems,
96  const size_t elemSize,
97  const int gpuNum);
98 template void allocGpuMem<double>(double*& devMem,
99  const size_t numElems,
100  const size_t elemSize,
101  const int gpuNum);
102 // template void allocGpuMem <void>(void * &devMem, const size_t numElems, const size_t
103 // elemSize);
104 
105 template void allocPinnedHostMem<int>(int*& hostMem,
106  const size_t numElems,
107  const size_t elemSize);
108 template void allocPinnedHostMem<int8_t>(int8_t*& hostMem,
109  const size_t numElems,
110  const size_t elemSize);
111 template void allocPinnedHostMem<char>(char*& hostMem,
112  const size_t numElems,
113  const size_t elemSize);
114 template void allocPinnedHostMem<unsigned char>(unsigned char*& hostMem,
115  const size_t numElems,
116  const size_t elemSize);
117 template void allocPinnedHostMem<float>(float*& hostMem,
118  const size_t numElems,
119  const size_t elemSize);
120 template void allocPinnedHostMem<unsigned int>(unsigned int*& hostMem,
121  const size_t numElems,
122  const size_t elemSize);
123 template void allocPinnedHostMem<void>(void*& hostMem,
124  const size_t numElems,
125  const size_t elemSize);
126 
127 template void copyToGpu<bool>(bool* devMem,
128  const bool* hostMem,
129  const size_t numElems,
130  const size_t elemSize,
131  const int gpuNum);
132 template void copyToGpu<char>(char* devMem,
133  const char* hostMem,
134  const size_t numElems,
135  const size_t elemSize,
136  const int gpuNum);
137 template void copyToGpu<unsigned char>(unsigned char* devMem,
138  const unsigned char* hostMem,
139  const size_t numElems,
140  const size_t elemSize,
141  const int gpuNum);
142 template void copyToGpu<int>(int* devMem,
143  const int* hostMem,
144  const size_t numElems,
145  const size_t elemSize,
146  const int gpuNum);
147 template void copyToGpu<int8_t>(int8_t* devMem,
148  const int8_t* hostMem,
149  const size_t numElems,
150  const size_t elemSize,
151  const int gpuNum);
152 template void copyToGpu<unsigned int>(unsigned int* devMem,
153  const unsigned int* hostMem,
154  const size_t numElems,
155  const size_t elemSize,
156  const int gpuNum);
157 template void copyToGpu<unsigned long>(unsigned long* devMem,
158  const unsigned long* hostMem,
159  const size_t numElems,
160  const size_t elemSize,
161  const int gpuNum);
162 template void copyToGpu<unsigned long long int>(unsigned long long int* devMem,
163  const unsigned long long int* hostMem,
164  const size_t numElems,
165  const size_t elemSize,
166  const int gpuNum);
167 template void copyToGpu<float>(float* devMem,
168  const float* hostMem,
169  const size_t numElems,
170  const size_t elemSize,
171  const int gpuNum);
172 template void copyToGpu<double>(double* devMem,
173  const double* hostMem,
174  const size_t numElems,
175  const size_t elemSize,
176  const int gpuNum);
177 
178 template void copyGpuToGpu<bool>(bool* dstMem,
179  const bool* srcMem,
180  const std::size_t numElems,
181  const std::size_t elemSize,
182  const int dstGpuNum);
183 template void copyGpuToGpu<char>(char* dstMem,
184  const char* srcMem,
185  const std::size_t numElems,
186  const std::size_t elemSize,
187  const int dstGpuNum);
188 template void copyGpuToGpu<unsigned char>(unsigned char* dstMem,
189  const unsigned char* srcMem,
190  const std::size_t numElems,
191  const std::size_t elemSize,
192  const int dstGpuNum);
193 template void copyGpuToGpu<int>(int* dstMem,
194  const int* srcMem,
195  const std::size_t numElems,
196  const std::size_t elemSize,
197  const int dstGpuNum);
198 template void copyGpuToGpu<int8_t>(int8_t* dstMem,
199  const int8_t* srcMem,
200  const std::size_t numElems,
201  const std::size_t elemSize,
202  const int dstGpuNum);
203 template void copyGpuToGpu<unsigned int>(unsigned int* dstMem,
204  const unsigned int* srcMem,
205  const std::size_t numElems,
206  const std::size_t elemSize,
207  const int dstGpuNum);
208 template void copyGpuToGpu<unsigned long long int>(unsigned long long int* dstMem,
209  const unsigned long long int* srcMem,
210  const std::size_t numElems,
211  const std::size_t elemSize,
212  const int dstGpuNum);
213 template void copyGpuToGpu<float>(float* dstMem,
214  const float* srcMem,
215  const std::size_t numElems,
216  const std::size_t elemSize,
217  const int dstGpuNum);
218 template void copyGpuToGpu<double>(double* dstMem,
219  const double* srcMem,
220  const std::size_t numElems,
221  const std::size_t elemSize,
222  const int dstGpuNum);
223 
224 // template void copyToHost <__nv_bool> (__nv_bool * hostMem, __nv_bool * devMem, const
225 // size_t numElems, const size_t elemSize, const int gpuNum);
226 
227 template void copyToHost<bool>(bool* hostMem,
228  const bool* devMem,
229  const size_t numElems,
230  const size_t elemSize,
231  const int gpuNum);
232 template void copyToHost<char>(char* hostMem,
233  const char* devMem,
234  const size_t numElems,
235  const size_t elemSize,
236  const int gpuNum);
237 template void copyToHost<unsigned char>(unsigned char* hostMem,
238  const unsigned char* devMem,
239  const size_t numElems,
240  const size_t elemSize,
241  const int gpuNum);
242 template void copyToHost<unsigned short>(unsigned short* hostMem,
243  const unsigned short* devMem,
244  const size_t numElems,
245  const size_t elemSize,
246  const int gpuNum);
247 template void copyToHost<int>(int* hostMem,
248  const int* devMem,
249  const size_t numElems,
250  const size_t elemSize,
251  const int gpuNum);
252 template void copyToHost<int8_t>(int8_t* hostMem,
253  const int8_t* devMem,
254  const size_t numElems,
255  const size_t elemSize,
256  const int gpuNum);
257 template void copyToHost<unsigned int>(unsigned int* hostMem,
258  const unsigned int* devMem,
259  const size_t numElems,
260  const size_t elemSize,
261  const int gpuNum);
262 template void copyToHost<unsigned long long int>(unsigned long long int* hostMem,
263  const unsigned long long int* devMem,
264  const size_t numElems,
265  const size_t elemSize,
266  const int gpuNum);
267 template void copyToHost<float>(float* hostMem,
268  const float* devMem,
269  const size_t numElems,
270  const size_t elemSize,
271  const int gpuNum);
272 template void copyToHost<double>(double* hostMem,
273  const double* devMem,
274  const size_t numElems,
275  const size_t elemSize,
276  const int gpuNum);
277 template void copyToHost<void>(void* hostMem,
278  const void* devMem,
279  const size_t numElems,
280  const size_t elemSize,
281  const int gpuNum);
282 
283 template void gpuFree<bool>(bool*& devMem);
284 template void gpuFree<char>(char*& devMem);
285 template void gpuFree<int>(int*& devMem);
286 template void gpuFree<int8_t>(int8_t*& devMem);
287 template void gpuFree<unsigned int>(unsigned int*& devMem);
288 template void gpuFree<unsigned long>(unsigned long*& devMem);
289 template void gpuFree<unsigned long long int>(unsigned long long int*& devMem);
290 template void gpuFree<float>(float*& devMem);
291 template void gpuFree<double>(double*& devMem);
292 template void gpuFree<unsigned char>(unsigned char*& devMem);
293 template void gpuFree<void>(void*& devMem);
294 
295 template void hostFree<bool>(bool*& hostMem);
296 template void hostFree<char>(char*& hostMem);
297 template void hostFree<int>(int*& hostMem);
298 template void hostFree<int8_t>(int8_t*& hostMem);
299 template void hostFree<unsigned int>(unsigned int*& hostMem);
300 template void hostFree<unsigned long long int>(unsigned long long int*& hostMem);
301 template void hostFree<float>(float*& hostMem);
302 template void hostFree<double>(double*& hostMem);
303 template void hostFree<unsigned char>(unsigned char*& hostMem);
304 // template void hostFree <geops_size_t> (geops_size_t * &hostMem);
305 template void hostFree<void>(void*& hostMem);
306 } // namespace CudaUtils