OmniSciDB  c07336695a
ProfileTest.h
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
25 #ifndef PROFILETEST_H
26 #define PROFILETEST_H
27 
28 #include "../QueryEngine/GpuRtConstants.h"
29 
30 #include "../Shared/Logger.h"
31 
32 #ifdef HAVE_CUDA
33 #include <cuda_runtime_api.h>
34 #endif
35 
36 #if defined(HAVE_CUDA) && CUDA_VERSION >= 8000
37 #include <cuda.h>
38 
39 #define TRY_COLUMNAR
40 #define TRY_MASH
41 #define TRY_MASH_COLUMNAR
42 #if defined(TRY_MASH) || defined(TRY_MASH_COLUMNAR)
43 #define SAVE_MASH_BUF
44 #endif
45 #endif
46 
47 #include <vector>
48 
49 #ifndef __CUDACC__
50 #include <unistd.h> // sysconf
51 #include <algorithm>
52 inline long cpu_threads() {
53  // could use std::thread::hardware_concurrency(), but some
54  // slightly out-of-date compilers (gcc 4.7) implement it as always 0.
55  // Play it POSIX.1 safe instead.
56  return std::max(2 * sysconf(_SC_NPROCESSORS_CONF), 1L);
57 }
58 #endif
59 
60 enum DEV_KIND { CPU, GPU };
61 
63 
65 
66 #if defined(HAVE_CUDA) && CUDA_VERSION >= 8000
67 void init_groups_on_device(int8_t* groups,
68  const size_t group_count,
69  const size_t col_count,
70  const std::vector<size_t>& col_widths,
71  const std::vector<size_t>& init_vals,
72  const bool is_columnar);
73 
74 void run_query_on_device(int8_t* groups_buffer,
75  const size_t group_count,
76  const int8_t* row_buffer,
77  const size_t row_count,
78  const size_t key_count,
79  const size_t val_count,
80  const std::vector<size_t>& col_widths,
81  const std::vector<OP_KIND>& agg_ops,
82  const bool is_columnar);
83 #if defined(TRY_MASH) || defined(TRY_MASH_COLUMNAR)
84 void mash_run_query_on_device(int8_t* groups_buffer,
85  const size_t group_count,
86  const int8_t* row_buffer,
87  const size_t row_count,
88  const size_t key_count,
89  const size_t val_count,
90  const std::vector<size_t>& col_widths,
91  const std::vector<OP_KIND>& agg_ops,
92  const bool is_columnar);
93 #endif
94 bool generate_columns_on_device(int8_t* buffers,
95  const size_t row_count,
96  const size_t col_count,
97  const std::vector<size_t>& col_widths,
98  const std::vector<std::pair<int64_t, int64_t>>& ranges,
99  const bool is_columnar,
100  const std::vector<DIST_KIND>& dists);
101 
102 void columnarize_groups_on_device(int8_t* columnar_buffer,
103  const int8_t* rowwise_buffer,
104  const size_t group_count,
105  const std::vector<size_t>& col_widths);
106 
107 size_t deduplicate_rows_on_device(int8_t* row_buffer,
108  const size_t row_count,
109  const size_t key_count,
110  const std::vector<size_t>& col_widths,
111  const bool is_columnar);
112 
113 int8_t* get_hashed_copy(int8_t* dev_buffer,
114  const size_t entry_count,
115  const size_t new_entry_count,
116  const std::vector<size_t>& col_widths,
117  const std::vector<OP_KIND>& agg_ops,
118  const std::vector<size_t>& init_vals,
119  const bool is_columnar);
120 
121 size_t drop_rows(int8_t* row_buffer,
122  const size_t entry_count,
123  const size_t entry_size,
124  const size_t row_count,
125  const float fill_rate,
126  const bool is_columnar);
127 
128 void reduce_on_device(int8_t*& this_dev_buffer,
129  const size_t this_dev_id,
130  size_t& this_entry_count,
131  int8_t* that_dev_buffer,
132  const size_t that_dev_id,
133  const size_t that_entry_count,
134  const size_t that_actual_row_count,
135  const std::vector<size_t>& col_widths,
136  const std::vector<OP_KIND>& agg_ops,
137  const std::vector<size_t>& init_vals,
138  const bool is_columnar);
139 
140 int8_t* fetch_segs_from_others(std::vector<int8_t*>& dev_reduced_buffers,
141  const size_t entry_count,
142  const size_t dev_id,
143  const size_t dev_count,
144  const std::vector<size_t>& col_widths,
145  const bool is_columnar,
146  const size_t start,
147  const size_t end);
148 
149 std::pair<int8_t*, size_t> get_perfect_hashed_copy(
150  int8_t* dev_buffer,
151  const size_t entry_count,
152  const std::vector<size_t>& col_widths,
153  const std::vector<std::pair<int64_t, int64_t>>& ranges,
154  const std::vector<OP_KIND>& agg_ops,
155  const std::vector<size_t>& init_vals,
156  const bool is_columnar);
157 
158 void reduce_segment_on_device(int8_t* dev_seg_buf,
159  const int8_t* dev_other_segs,
160  const size_t entry_count,
161  const size_t seg_count,
162  const std::vector<size_t>& col_widths,
163  const std::vector<OP_KIND>& agg_ops,
164  const bool is_columnar,
165  const size_t start,
166  const size_t end);
167 #endif
168 
169 #endif /* PROFILETEST_H */
DEV_KIND
Definition: ProfileTest.h:60
OP_KIND
Definition: ProfileTest.h:64
long cpu_threads()
Definition: ProfileTest.h:52
DIST_KIND
Definition: ProfileTest.h:62
const int64_t * init_vals