OmniSciDB  eb3a3d0a03
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
GroupByRuntime.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
18 #include "MurmurHash.h"
19 
20 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE uint32_t
21 key_hash(const int64_t* key, const uint32_t key_count, const uint32_t key_byte_width) {
22  return MurmurHash3(key, key_byte_width * key_count, 0);
23 }
24 
26  int64_t* groups_buffer,
27  const uint32_t groups_buffer_entry_count,
28  const int64_t* key,
29  const uint32_t key_count,
30  const uint32_t key_width,
31  const uint32_t row_size_quad) {
32  uint32_t h = key_hash(key, key_count, key_width) % groups_buffer_entry_count;
33  int64_t* matching_group = get_matching_group_value(
34  groups_buffer, h, key, key_count, key_width, row_size_quad);
35  if (matching_group) {
36  return matching_group;
37  }
38  uint32_t h_probe = (h + 1) % groups_buffer_entry_count;
39  while (h_probe != h) {
40  matching_group = get_matching_group_value(
41  groups_buffer, h_probe, key, key_count, key_width, row_size_quad);
42  if (matching_group) {
43  return matching_group;
44  }
45  h_probe = (h_probe + 1) % groups_buffer_entry_count;
46  }
47  return NULL;
48 }
49 
51 
53  int64_t* groups_buffer,
54  const uint32_t groups_buffer_entry_count,
55  const int64_t* key,
56  const uint32_t key_count,
57  const uint32_t key_width,
58  const uint32_t row_size_quad) {
59  uint32_t h = key_hash(key, key_count, key_width) % groups_buffer_entry_count;
60  int64_t* matching_group = get_matching_group_value(
61  groups_buffer, h, key, key_count, key_width, row_size_quad);
62  if (matching_group) {
63  return matching_group;
64  }
65  uint32_t watchdog_countdown = 100;
66  uint32_t h_probe = (h + 1) % groups_buffer_entry_count;
67  while (h_probe != h) {
68  matching_group = get_matching_group_value(
69  groups_buffer, h_probe, key, key_count, key_width, row_size_quad);
70  if (matching_group) {
71  return matching_group;
72  }
73  h_probe = (h_probe + 1) % groups_buffer_entry_count;
74  if (--watchdog_countdown == 0) {
75  if (dynamic_watchdog()) {
76  return NULL;
77  }
78  watchdog_countdown = 100;
79  }
80  }
81  return NULL;
82 }
83 
84 extern "C" RUNTIME_EXPORT NEVER_INLINE DEVICE int32_t
85 get_group_value_columnar_slot(int64_t* groups_buffer,
86  const uint32_t groups_buffer_entry_count,
87  const int64_t* key,
88  const uint32_t key_count,
89  const uint32_t key_width) {
90  uint32_t h = key_hash(key, key_count, key_width) % groups_buffer_entry_count;
91  int32_t matching_slot = get_matching_group_value_columnar_slot(
92  groups_buffer, groups_buffer_entry_count, h, key, key_count, key_width);
93  if (matching_slot != -1) {
94  return h;
95  }
96  uint32_t h_probe = (h + 1) % groups_buffer_entry_count;
97  while (h_probe != h) {
99  groups_buffer, groups_buffer_entry_count, h_probe, key, key_count, key_width);
100  if (matching_slot != -1) {
101  return h_probe;
102  }
103  h_probe = (h_probe + 1) % groups_buffer_entry_count;
104  }
105  return -1;
106 }
107 
108 extern "C" RUNTIME_EXPORT NEVER_INLINE DEVICE int32_t
110  const uint32_t groups_buffer_entry_count,
111  const int64_t* key,
112  const uint32_t key_count,
113  const uint32_t key_width) {
114  uint32_t h = key_hash(key, key_count, key_width) % groups_buffer_entry_count;
115  int32_t matching_slot = get_matching_group_value_columnar_slot(
116  groups_buffer, groups_buffer_entry_count, h, key, key_count, key_width);
117  if (matching_slot != -1) {
118  return h;
119  }
120  uint32_t watchdog_countdown = 100;
121  uint32_t h_probe = (h + 1) % groups_buffer_entry_count;
122  while (h_probe != h) {
124  groups_buffer, groups_buffer_entry_count, h_probe, key, key_count, key_width);
125  if (matching_slot != -1) {
126  return h_probe;
127  }
128  h_probe = (h_probe + 1) % groups_buffer_entry_count;
129  if (--watchdog_countdown == 0) {
130  if (dynamic_watchdog()) {
131  return -1;
132  }
133  watchdog_countdown = 100;
134  }
135  }
136  return -1;
137 }
138 
140  int64_t* groups_buffer,
141  const uint32_t groups_buffer_entry_count,
142  const int64_t* key,
143  const uint32_t key_qw_count) {
144  uint32_t h = key_hash(key, key_qw_count, sizeof(int64_t)) % groups_buffer_entry_count;
145  int64_t* matching_group = get_matching_group_value_columnar(
146  groups_buffer, h, key, key_qw_count, groups_buffer_entry_count);
147  if (matching_group) {
148  return matching_group;
149  }
150  uint32_t h_probe = (h + 1) % groups_buffer_entry_count;
151  while (h_probe != h) {
152  matching_group = get_matching_group_value_columnar(
153  groups_buffer, h_probe, key, key_qw_count, groups_buffer_entry_count);
154  if (matching_group) {
155  return matching_group;
156  }
157  h_probe = (h_probe + 1) % groups_buffer_entry_count;
158  }
159  return NULL;
160 }
161 
162 extern "C" RUNTIME_EXPORT NEVER_INLINE DEVICE int64_t*
164  const uint32_t groups_buffer_entry_count,
165  const int64_t* key,
166  const uint32_t key_qw_count) {
167  uint32_t h = key_hash(key, key_qw_count, sizeof(int64_t)) % groups_buffer_entry_count;
168  int64_t* matching_group = get_matching_group_value_columnar(
169  groups_buffer, h, key, key_qw_count, groups_buffer_entry_count);
170  if (matching_group) {
171  return matching_group;
172  }
173  uint32_t watchdog_countdown = 100;
174  uint32_t h_probe = (h + 1) % groups_buffer_entry_count;
175  while (h_probe != h) {
176  matching_group = get_matching_group_value_columnar(
177  groups_buffer, h_probe, key, key_qw_count, groups_buffer_entry_count);
178  if (matching_group) {
179  return matching_group;
180  }
181  h_probe = (h_probe + 1) % groups_buffer_entry_count;
182  if (--watchdog_countdown == 0) {
183  if (dynamic_watchdog()) {
184  return NULL;
185  }
186  watchdog_countdown = 100;
187  }
188  }
189  return NULL;
190 }
191 
193  int64_t* groups_buffer,
194  const int64_t key,
195  const int64_t min_key,
196  const int64_t bucket,
197  const uint32_t row_size_quad) {
198  int64_t key_diff = key - min_key;
199  if (bucket) {
200  key_diff /= bucket;
201  }
202  int64_t off = key_diff * row_size_quad;
203  if (groups_buffer[off] == EMPTY_KEY_64) {
204  groups_buffer[off] = key;
205  }
206  return groups_buffer + off + 1;
207 }
208 
209 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t*
211  const int64_t key,
212  const int64_t orig_key,
213  const int64_t min_key,
214  const int64_t bucket,
215  const uint32_t row_size_quad) {
216  int64_t key_diff = key - min_key;
217  if (bucket) {
218  key_diff /= bucket;
219  }
220  int64_t off = key_diff * row_size_quad;
221  if (groups_buffer[off] == EMPTY_KEY_64) {
222  groups_buffer[off] = orig_key;
223  }
224  return groups_buffer + off + 1;
225 }
226 
227 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE uint32_t
228 get_columnar_group_bin_offset(int64_t* key_base_ptr,
229  const int64_t key,
230  const int64_t min_key,
231  const int64_t bucket) {
232  int64_t off = key - min_key;
233  if (bucket) {
234  off /= bucket;
235  }
236  if (key_base_ptr[off] == EMPTY_KEY_64) {
237  key_base_ptr[off] = key;
238  }
239  return off;
240 }
241 
243  int64_t* output_buffer,
244  const uint32_t output_buffer_entry_count,
245  const uint32_t pos,
246  const int64_t offset_in_fragment,
247  const uint32_t row_size_quad) {
248  uint64_t off = static_cast<uint64_t>(pos) * static_cast<uint64_t>(row_size_quad);
249  if (pos < output_buffer_entry_count) {
250  output_buffer[off] = offset_in_fragment;
251  return output_buffer + off + 1;
252  }
253  return NULL;
254 }
255 
256 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t
257 get_columnar_scan_output_offset(int64_t* output_buffer,
258  const uint32_t output_buffer_entry_count,
259  const uint32_t pos,
260  const int64_t offset_in_fragment) {
261  if (pos < output_buffer_entry_count) {
262  output_buffer[pos] = offset_in_fragment;
263  return pos;
264  }
265  return -1;
266 }
267 
268 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t
269 bucketized_hash_join_idx(int64_t hash_buff,
270  int64_t const key,
271  int64_t const min_key,
272  int64_t const max_key,
273  int64_t bucket_normalization) {
274  if (key >= min_key && key <= max_key) {
276  reinterpret_cast<int32_t*>(hash_buff), key, min_key, bucket_normalization);
277  }
278  return -1;
279 }
280 
281 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t
282 hash_join_idx(int64_t hash_buff,
283  const int64_t key,
284  const int64_t min_key,
285  const int64_t max_key) {
286  if (key >= min_key && key <= max_key) {
287  return *SUFFIX(get_hash_slot)(reinterpret_cast<int32_t*>(hash_buff), key, min_key);
288  }
289  return -1;
290 }
291 
292 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t
294  const int64_t key,
295  const int64_t min_key,
296  const int64_t max_key,
297  const int64_t null_val,
298  const int64_t bucket_normalization) {
299  return key != null_val ? bucketized_hash_join_idx(
300  hash_buff, key, min_key, max_key, bucket_normalization)
301  : -1;
302 }
303 
304 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t
305 hash_join_idx_nullable(int64_t hash_buff,
306  const int64_t key,
307  const int64_t min_key,
308  const int64_t max_key,
309  const int64_t null_val) {
310  return key != null_val ? hash_join_idx(hash_buff, key, min_key, max_key) : -1;
311 }
312 
313 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t
315  const int64_t key,
316  const int64_t min_key,
317  const int64_t max_key,
318  const int64_t null_val,
319  const int64_t translated_val,
320  const int64_t bucket_normalization) {
321  return key != null_val ? bucketized_hash_join_idx(
322  hash_buff, key, min_key, max_key, bucket_normalization)
323  : bucketized_hash_join_idx(hash_buff,
324  translated_val,
325  min_key,
326  translated_val,
327  bucket_normalization);
328 }
329 
330 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t
331 hash_join_idx_bitwise(int64_t hash_buff,
332  const int64_t key,
333  const int64_t min_key,
334  const int64_t max_key,
335  const int64_t null_val,
336  const int64_t translated_val) {
337  return key != null_val
338  ? hash_join_idx(hash_buff, key, min_key, max_key)
339  : hash_join_idx(hash_buff, translated_val, min_key, translated_val);
340 }
341 
342 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t
343 hash_join_idx_sharded(int64_t hash_buff,
344  const int64_t key,
345  const int64_t min_key,
346  const int64_t max_key,
347  const uint32_t entry_count_per_shard,
348  const uint32_t num_shards,
349  const uint32_t device_count) {
350  if (key >= min_key && key <= max_key) {
351  return *SUFFIX(get_hash_slot_sharded)(reinterpret_cast<int32_t*>(hash_buff),
352  key,
353  min_key,
354  entry_count_per_shard,
355  num_shards,
356  device_count);
357  }
358  return -1;
359 }
360 
361 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t
363  const int64_t key,
364  const int64_t min_key,
365  const int64_t max_key,
366  const uint32_t entry_count_per_shard,
367  const uint32_t num_shards,
368  const uint32_t device_count,
369  const int64_t null_val) {
370  return key != null_val ? hash_join_idx_sharded(hash_buff,
371  key,
372  min_key,
373  max_key,
374  entry_count_per_shard,
375  num_shards,
376  device_count)
377  : -1;
378 }
379 
380 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t
382  const int64_t key,
383  const int64_t min_key,
384  const int64_t max_key,
385  const uint32_t entry_count_per_shard,
386  const uint32_t num_shards,
387  const uint32_t device_count,
388  const int64_t null_val,
389  const int64_t translated_val) {
390  return key != null_val ? hash_join_idx_sharded(hash_buff,
391  key,
392  min_key,
393  max_key,
394  entry_count_per_shard,
395  num_shards,
396  device_count)
397  : hash_join_idx_sharded(hash_buff,
398  translated_val,
399  min_key,
400  translated_val,
401  entry_count_per_shard,
402  num_shards,
403  device_count);
404 }
405 
406 #define DEF_TRANSLATE_NULL_KEY(key_type) \
407  extern "C" RUNTIME_EXPORT NEVER_INLINE DEVICE int64_t translate_null_key_##key_type( \
408  const key_type key, const key_type null_val, const int64_t translated_val) { \
409  if (key == null_val) { \
410  return translated_val; \
411  } \
412  return key; \
413  }
414 
416 DEF_TRANSLATE_NULL_KEY(int16_t)
417 DEF_TRANSLATE_NULL_KEY(int32_t)
418 DEF_TRANSLATE_NULL_KEY(int64_t)
419 
420 #undef DEF_TRANSLATE_NULL_KEY
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t * get_group_value_fast_with_original_key(int64_t *groups_buffer, const int64_t key, const int64_t orig_key, const int64_t min_key, const int64_t bucket, const uint32_t row_size_quad)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t hash_join_idx_bitwise(int64_t hash_buff, const int64_t key, const int64_t min_key, const int64_t max_key, const int64_t null_val, const int64_t translated_val)
__device__ bool dynamic_watchdog()
__device__ int64_t * get_matching_group_value_columnar(int64_t *groups_buffer, const uint32_t h, const int64_t *key, const uint32_t key_qw_count, const size_t entry_count)
#define EMPTY_KEY_64
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t get_columnar_scan_output_offset(int64_t *output_buffer, const uint32_t output_buffer_entry_count, const uint32_t pos, const int64_t offset_in_fragment)
ALWAYS_INLINE DEVICE int32_t *SUFFIX() get_bucketized_hash_slot(int32_t *buff, const int64_t key, const int64_t min_key, const int64_t bucket_normalization)
Definition: JoinHashImpl.h:66
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t * get_scan_output_slot(int64_t *output_buffer, const uint32_t output_buffer_entry_count, const uint32_t pos, const int64_t offset_in_fragment, const uint32_t row_size_quad)
#define DEF_TRANSLATE_NULL_KEY(key_type)
#define SUFFIX(name)
__device__ int64_t * get_matching_group_value(int64_t *groups_buffer, const uint32_t h, const T *key, const uint32_t key_count, const uint32_t row_size_quad)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE uint32_t get_columnar_group_bin_offset(int64_t *key_base_ptr, const int64_t key, const int64_t min_key, const int64_t bucket)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t bucketized_hash_join_idx_bitwise(int64_t hash_buff, const int64_t key, const int64_t min_key, const int64_t max_key, const int64_t null_val, const int64_t translated_val, const int64_t bucket_normalization)
#define DEVICE
ALWAYS_INLINE DEVICE int32_t *SUFFIX() get_hash_slot(int32_t *buff, const int64_t key, const int64_t min_key)
Definition: JoinHashImpl.h:74
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t hash_join_idx(int64_t hash_buff, const int64_t key, const int64_t min_key, const int64_t max_key)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t bucketized_hash_join_idx_nullable(int64_t hash_buff, const int64_t key, const int64_t min_key, const int64_t max_key, const int64_t null_val, const int64_t bucket_normalization)
RUNTIME_EXPORT NEVER_INLINE DEVICE int64_t * get_group_value_columnar_with_watchdog(int64_t *groups_buffer, const uint32_t groups_buffer_entry_count, const int64_t *key, const uint32_t key_qw_count)
RUNTIME_EXPORT NEVER_INLINE DEVICE int64_t * get_group_value(int64_t *groups_buffer, const uint32_t groups_buffer_entry_count, const int64_t *key, const uint32_t key_count, const uint32_t key_width, const uint32_t row_size_quad)
#define RUNTIME_EXPORT
RUNTIME_EXPORT NEVER_INLINE DEVICE int32_t get_group_value_columnar_slot_with_watchdog(int64_t *groups_buffer, const uint32_t groups_buffer_entry_count, const int64_t *key, const uint32_t key_count, const uint32_t key_width)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t bucketized_hash_join_idx(int64_t hash_buff, int64_t const key, int64_t const min_key, int64_t const max_key, int64_t bucket_normalization)
RUNTIME_EXPORT NEVER_INLINE DEVICE int32_t get_group_value_columnar_slot(int64_t *groups_buffer, const uint32_t groups_buffer_entry_count, const int64_t *key, const uint32_t key_count, const uint32_t key_width)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t hash_join_idx_nullable(int64_t hash_buff, const int64_t key, const int64_t min_key, const int64_t max_key, const int64_t null_val)
RUNTIME_EXPORT NEVER_INLINE DEVICE uint32_t MurmurHash3(const void *key, int len, const uint32_t seed)
Definition: MurmurHash.cpp:33
ALWAYS_INLINE DEVICE int32_t *SUFFIX() get_hash_slot_sharded(int32_t *buff, const int64_t key, const int64_t min_key, const uint32_t entry_count_per_shard, const uint32_t num_shards, const uint32_t device_count)
Definition: JoinHashImpl.h:95
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t * get_group_value_fast(int64_t *groups_buffer, const int64_t key, const int64_t min_key, const int64_t bucket, const uint32_t row_size_quad)
#define NEVER_INLINE
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t hash_join_idx_sharded_nullable(int64_t hash_buff, const int64_t key, const int64_t min_key, const int64_t max_key, const uint32_t entry_count_per_shard, const uint32_t num_shards, const uint32_t device_count, const int64_t null_val)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE uint32_t key_hash(const int64_t *key, const uint32_t key_count, const uint32_t key_byte_width)
__device__ int32_t get_matching_group_value_columnar_slot(int64_t *groups_buffer, const uint32_t entry_count, const uint32_t h, const T *key, const uint32_t key_count)
#define ALWAYS_INLINE
RUNTIME_EXPORT NEVER_INLINE DEVICE int64_t * get_group_value_columnar(int64_t *groups_buffer, const uint32_t groups_buffer_entry_count, const int64_t *key, const uint32_t key_qw_count)
RUNTIME_EXPORT NEVER_INLINE DEVICE int64_t * get_group_value_with_watchdog(int64_t *groups_buffer, const uint32_t groups_buffer_entry_count, const int64_t *key, const uint32_t key_count, const uint32_t key_width, const uint32_t row_size_quad)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t hash_join_idx_sharded(int64_t hash_buff, const int64_t key, const int64_t min_key, const int64_t max_key, const uint32_t entry_count_per_shard, const uint32_t num_shards, const uint32_t device_count)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t hash_join_idx_bitwise_sharded(int64_t hash_buff, const int64_t key, const int64_t min_key, const int64_t max_key, const uint32_t entry_count_per_shard, const uint32_t num_shards, const uint32_t device_count, const int64_t null_val, const int64_t translated_val)