OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
gpu_enabled.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 // Functions in gpu_enabled handle two cases:
18 // * __CUDACC__ is defined and function call is made from device.
19 // * __CUDACC__ is not defined and function call is made from host.
20 // These do NOT work when __CUDACC__ is defined and call is made from host.
21 
22 #pragma once
23 
24 #include "funcannotations.h"
25 
26 #include <utility> // std::forward
27 
28 #ifdef __CUDACC__
29 #include <thrust/binary_search.h>
30 #include <thrust/reduce.h>
31 #include <thrust/reverse.h>
32 #include <thrust/sequence.h>
33 #include <thrust/sort.h>
34 #else
35 #include <algorithm>
36 #include <numeric>
37 #endif
38 
39 namespace gpu_enabled {
40 
41 template <typename... ARGS>
42 DEVICE auto accumulate(ARGS&&... args) {
43 #ifdef __CUDACC__
44  return thrust::reduce(thrust::device, std::forward<ARGS>(args)...);
45 #else
46  return std::accumulate(std::forward<ARGS>(args)...);
47 #endif
48 }
49 
50 template <typename... ARGS>
51 DEVICE auto copy(ARGS&&... args) {
52 #ifdef __CUDACC__
53  return thrust::copy(thrust::device, std::forward<ARGS>(args)...);
54 #else
55  return std::copy(std::forward<ARGS>(args)...);
56 #endif
57 }
58 
59 template <typename... ARGS>
60 DEVICE void fill(ARGS&&... args) {
61 #ifdef __CUDACC__
62  thrust::fill(thrust::device, std::forward<ARGS>(args)...);
63 #else
64  std::fill(std::forward<ARGS>(args)...);
65 #endif
66 }
67 
68 template <typename... ARGS>
69 DEVICE void iota(ARGS&&... args) {
70 #ifdef __CUDACC__
71  thrust::sequence(thrust::device, std::forward<ARGS>(args)...);
72 #else
73  std::iota(std::forward<ARGS>(args)...);
74 #endif
75 }
76 
77 template <typename... ARGS>
78 DEVICE auto lower_bound(ARGS&&... args) {
79 #ifdef __CUDACC__
80  return thrust::lower_bound(thrust::device, std::forward<ARGS>(args)...);
81 #else
82  return std::lower_bound(std::forward<ARGS>(args)...);
83 #endif
84 }
85 
86 template <typename... ARGS>
87 DEVICE void partial_sum(ARGS&&... args) {
88 #ifdef __CUDACC__
89  thrust::inclusive_scan(thrust::device, std::forward<ARGS>(args)...);
90 #else
91  std::partial_sum(std::forward<ARGS>(args)...);
92 #endif
93 }
94 
95 template <typename... ARGS>
96 DEVICE void reverse(ARGS&&... args) {
97 #ifdef __CUDACC__
98  thrust::reverse(thrust::device, std::forward<ARGS>(args)...);
99 #else
100  std::reverse(std::forward<ARGS>(args)...);
101 #endif
102 }
103 
104 template <typename... ARGS>
105 DEVICE void sort(ARGS&&... args) {
106 #ifdef __CUDACC__
107  thrust::sort(thrust::device, std::forward<ARGS>(args)...);
108 #else
109  std::sort(std::forward<ARGS>(args)...);
110 #endif
111 }
112 
113 template <typename... ARGS>
114 DEVICE void swap(ARGS&&... args) {
115 #ifdef __CUDACC__
116  thrust::swap(std::forward<ARGS>(args)...);
117 #else
118  std::swap(std::forward<ARGS>(args)...);
119 #endif
120 }
121 
122 template <typename... ARGS>
123 DEVICE auto upper_bound(ARGS&&... args) {
124 #ifdef __CUDACC__
125  return thrust::upper_bound(thrust::device, std::forward<ARGS>(args)...);
126 #else
127  return std::upper_bound(std::forward<ARGS>(args)...);
128 #endif
129 }
130 
131 } // namespace gpu_enabled
DEVICE auto upper_bound(ARGS &&...args)
Definition: gpu_enabled.h:123
DEVICE void sort(ARGS &&...args)
Definition: gpu_enabled.h:105
void inclusive_scan(InputIterator first, InputIterator last, OutputIterator out, const size_t thread_count)
#define DEVICE
DEVICE void fill(ARGS &&...args)
Definition: gpu_enabled.h:60
DEVICE auto copy(ARGS &&...args)
Definition: gpu_enabled.h:51
DEVICE void partial_sum(ARGS &&...args)
Definition: gpu_enabled.h:87
DEVICE auto accumulate(ARGS &&...args)
Definition: gpu_enabled.h:42
DEVICE auto lower_bound(ARGS &&...args)
Definition: gpu_enabled.h:78
DEVICE void iota(ARGS &&...args)
Definition: gpu_enabled.h:69
DEVICE void reverse(ARGS &&...args)
Definition: gpu_enabled.h:96
DEVICE void swap(ARGS &&...args)
Definition: gpu_enabled.h:114