OmniSciDB  94e8789169
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
GroupByRuntime.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
18 #include "MurmurHash.h"
19 
20 extern "C" ALWAYS_INLINE DEVICE uint32_t key_hash(const int64_t* key,
21  const uint32_t key_count,
22  const uint32_t key_byte_width) {
23  return MurmurHash1(key, key_byte_width * key_count, 0);
24 }
25 
26 extern "C" NEVER_INLINE DEVICE int64_t* get_group_value(
27  int64_t* groups_buffer,
28  const uint32_t groups_buffer_entry_count,
29  const int64_t* key,
30  const uint32_t key_count,
31  const uint32_t key_width,
32  const uint32_t row_size_quad) {
33  uint32_t h = key_hash(key, key_count, key_width) % groups_buffer_entry_count;
34  int64_t* matching_group = get_matching_group_value(
35  groups_buffer, h, key, key_count, key_width, row_size_quad);
36  if (matching_group) {
37  return matching_group;
38  }
39  uint32_t h_probe = (h + 1) % groups_buffer_entry_count;
40  while (h_probe != h) {
41  matching_group = get_matching_group_value(
42  groups_buffer, h_probe, key, key_count, key_width, row_size_quad);
43  if (matching_group) {
44  return matching_group;
45  }
46  h_probe = (h_probe + 1) % groups_buffer_entry_count;
47  }
48  return NULL;
49 }
50 
51 extern "C" NEVER_INLINE DEVICE bool dynamic_watchdog();
52 
54  int64_t* groups_buffer,
55  const uint32_t groups_buffer_entry_count,
56  const int64_t* key,
57  const uint32_t key_count,
58  const uint32_t key_width,
59  const uint32_t row_size_quad) {
60  uint32_t h = key_hash(key, key_count, key_width) % groups_buffer_entry_count;
61  int64_t* matching_group = get_matching_group_value(
62  groups_buffer, h, key, key_count, key_width, row_size_quad);
63  if (matching_group) {
64  return matching_group;
65  }
66  uint32_t watchdog_countdown = 100;
67  uint32_t h_probe = (h + 1) % groups_buffer_entry_count;
68  while (h_probe != h) {
69  matching_group = get_matching_group_value(
70  groups_buffer, h_probe, key, key_count, key_width, row_size_quad);
71  if (matching_group) {
72  return matching_group;
73  }
74  h_probe = (h_probe + 1) % groups_buffer_entry_count;
75  if (--watchdog_countdown == 0) {
76  if (dynamic_watchdog()) {
77  return NULL;
78  }
79  watchdog_countdown = 100;
80  }
81  }
82  return NULL;
83 }
84 
85 extern "C" NEVER_INLINE DEVICE int32_t
86 get_group_value_columnar_slot(int64_t* groups_buffer,
87  const uint32_t groups_buffer_entry_count,
88  const int64_t* key,
89  const uint32_t key_count,
90  const uint32_t key_width) {
91  uint32_t h = key_hash(key, key_count, key_width) % groups_buffer_entry_count;
92  int32_t matching_slot = get_matching_group_value_columnar_slot(
93  groups_buffer, groups_buffer_entry_count, h, key, key_count, key_width);
94  if (matching_slot != -1) {
95  return h;
96  }
97  uint32_t h_probe = (h + 1) % groups_buffer_entry_count;
98  while (h_probe != h) {
100  groups_buffer, groups_buffer_entry_count, h_probe, key, key_count, key_width);
101  if (matching_slot != -1) {
102  return h_probe;
103  }
104  h_probe = (h_probe + 1) % groups_buffer_entry_count;
105  }
106  return -1;
107 }
108 
109 extern "C" NEVER_INLINE DEVICE int32_t
111  const uint32_t groups_buffer_entry_count,
112  const int64_t* key,
113  const uint32_t key_count,
114  const uint32_t key_width) {
115  uint32_t h = key_hash(key, key_count, key_width) % groups_buffer_entry_count;
116  int32_t matching_slot = get_matching_group_value_columnar_slot(
117  groups_buffer, groups_buffer_entry_count, h, key, key_count, key_width);
118  if (matching_slot != -1) {
119  return h;
120  }
121  uint32_t watchdog_countdown = 100;
122  uint32_t h_probe = (h + 1) % groups_buffer_entry_count;
123  while (h_probe != h) {
125  groups_buffer, groups_buffer_entry_count, h_probe, key, key_count, key_width);
126  if (matching_slot != -1) {
127  return h_probe;
128  }
129  h_probe = (h_probe + 1) % groups_buffer_entry_count;
130  if (--watchdog_countdown == 0) {
131  if (dynamic_watchdog()) {
132  return -1;
133  }
134  watchdog_countdown = 100;
135  }
136  }
137  return -1;
138 }
139 
141  int64_t* groups_buffer,
142  const uint32_t groups_buffer_entry_count,
143  const int64_t* key,
144  const uint32_t key_qw_count) {
145  uint32_t h = key_hash(key, key_qw_count, sizeof(int64_t)) % groups_buffer_entry_count;
146  int64_t* matching_group = get_matching_group_value_columnar(
147  groups_buffer, h, key, key_qw_count, groups_buffer_entry_count);
148  if (matching_group) {
149  return matching_group;
150  }
151  uint32_t h_probe = (h + 1) % groups_buffer_entry_count;
152  while (h_probe != h) {
153  matching_group = get_matching_group_value_columnar(
154  groups_buffer, h_probe, key, key_qw_count, groups_buffer_entry_count);
155  if (matching_group) {
156  return matching_group;
157  }
158  h_probe = (h_probe + 1) % groups_buffer_entry_count;
159  }
160  return NULL;
161 }
162 
164  int64_t* groups_buffer,
165  const uint32_t groups_buffer_entry_count,
166  const int64_t* key,
167  const uint32_t key_qw_count) {
168  uint32_t h = key_hash(key, key_qw_count, sizeof(int64_t)) % groups_buffer_entry_count;
169  int64_t* matching_group = get_matching_group_value_columnar(
170  groups_buffer, h, key, key_qw_count, groups_buffer_entry_count);
171  if (matching_group) {
172  return matching_group;
173  }
174  uint32_t watchdog_countdown = 100;
175  uint32_t h_probe = (h + 1) % groups_buffer_entry_count;
176  while (h_probe != h) {
177  matching_group = get_matching_group_value_columnar(
178  groups_buffer, h_probe, key, key_qw_count, groups_buffer_entry_count);
179  if (matching_group) {
180  return matching_group;
181  }
182  h_probe = (h_probe + 1) % groups_buffer_entry_count;
183  if (--watchdog_countdown == 0) {
184  if (dynamic_watchdog()) {
185  return NULL;
186  }
187  watchdog_countdown = 100;
188  }
189  }
190  return NULL;
191 }
192 
194  int64_t* groups_buffer,
195  const int64_t key,
196  const int64_t min_key,
197  const int64_t bucket,
198  const uint32_t row_size_quad) {
199  int64_t key_diff = key - min_key;
200  if (bucket) {
201  key_diff /= bucket;
202  }
203  int64_t off = key_diff * row_size_quad;
204  if (groups_buffer[off] == EMPTY_KEY_64) {
205  groups_buffer[off] = key;
206  }
207  return groups_buffer + off + 1;
208 }
209 
211  int64_t* groups_buffer,
212  const int64_t key,
213  const int64_t orig_key,
214  const int64_t min_key,
215  const int64_t bucket,
216  const uint32_t row_size_quad) {
217  int64_t key_diff = key - min_key;
218  if (bucket) {
219  key_diff /= bucket;
220  }
221  int64_t off = key_diff * row_size_quad;
222  if (groups_buffer[off] == EMPTY_KEY_64) {
223  groups_buffer[off] = orig_key;
224  }
225  return groups_buffer + off + 1;
226 }
227 
228 extern "C" ALWAYS_INLINE DEVICE uint32_t
229 get_columnar_group_bin_offset(int64_t* key_base_ptr,
230  const int64_t key,
231  const int64_t min_key,
232  const int64_t bucket) {
233  int64_t off = key - min_key;
234  if (bucket) {
235  off /= bucket;
236  }
237  if (key_base_ptr[off] == EMPTY_KEY_64) {
238  key_base_ptr[off] = key;
239  }
240  return off;
241 }
242 
244  int64_t* output_buffer,
245  const uint32_t output_buffer_entry_count,
246  const uint32_t pos,
247  const int64_t offset_in_fragment,
248  const uint32_t row_size_quad) {
249  uint64_t off = static_cast<uint64_t>(pos) * static_cast<uint64_t>(row_size_quad);
250  if (pos < output_buffer_entry_count) {
251  output_buffer[off] = offset_in_fragment;
252  return output_buffer + off + 1;
253  }
254  return NULL;
255 }
256 
257 extern "C" ALWAYS_INLINE DEVICE int32_t
258 get_columnar_scan_output_offset(int64_t* output_buffer,
259  const uint32_t output_buffer_entry_count,
260  const uint32_t pos,
261  const int64_t offset_in_fragment) {
262  if (pos < output_buffer_entry_count) {
263  output_buffer[pos] = offset_in_fragment;
264  return pos;
265  }
266  return -1;
267 }
268 
269 extern "C" ALWAYS_INLINE DEVICE int64_t
270 bucketized_hash_join_idx(int64_t hash_buff,
271  int64_t const key,
272  int64_t const min_key,
273  int64_t const max_key,
274  int64_t bucket_normalization) {
275  if (key >= min_key && key <= max_key) {
277  reinterpret_cast<int32_t*>(hash_buff), key, min_key, bucket_normalization);
278  }
279  return -1;
280 }
281 
282 extern "C" ALWAYS_INLINE DEVICE int64_t hash_join_idx(int64_t hash_buff,
283  const int64_t key,
284  const int64_t min_key,
285  const int64_t max_key) {
286  if (key >= min_key && key <= max_key) {
287  return *SUFFIX(get_hash_slot)(reinterpret_cast<int32_t*>(hash_buff), key, min_key);
288  }
289  return -1;
290 }
291 
292 extern "C" ALWAYS_INLINE DEVICE int64_t
294  const int64_t key,
295  const int64_t min_key,
296  const int64_t max_key,
297  const int64_t null_val,
298  const int64_t bucket_normalization) {
299  return key != null_val ? bucketized_hash_join_idx(
300  hash_buff, key, min_key, max_key, bucket_normalization)
301  : -1;
302 }
303 
304 extern "C" ALWAYS_INLINE DEVICE int64_t hash_join_idx_nullable(int64_t hash_buff,
305  const int64_t key,
306  const int64_t min_key,
307  const int64_t max_key,
308  const int64_t null_val) {
309  return key != null_val ? hash_join_idx(hash_buff, key, min_key, max_key) : -1;
310 }
311 
312 extern "C" ALWAYS_INLINE DEVICE int64_t
314  const int64_t key,
315  const int64_t min_key,
316  const int64_t max_key,
317  const int64_t null_val,
318  const int64_t translated_val,
319  const int64_t bucket_normalization) {
320  return key != null_val ? bucketized_hash_join_idx(
321  hash_buff, key, min_key, max_key, bucket_normalization)
322  : bucketized_hash_join_idx(hash_buff,
323  translated_val,
324  min_key,
325  translated_val,
326  bucket_normalization);
327 }
328 
329 extern "C" ALWAYS_INLINE DEVICE int64_t
330 hash_join_idx_bitwise(int64_t hash_buff,
331  const int64_t key,
332  const int64_t min_key,
333  const int64_t max_key,
334  const int64_t null_val,
335  const int64_t translated_val) {
336  return key != null_val
337  ? hash_join_idx(hash_buff, key, min_key, max_key)
338  : hash_join_idx(hash_buff, translated_val, min_key, translated_val);
339 }
340 
341 extern "C" ALWAYS_INLINE DEVICE int64_t
342 hash_join_idx_sharded(int64_t hash_buff,
343  const int64_t key,
344  const int64_t min_key,
345  const int64_t max_key,
346  const uint32_t entry_count_per_shard,
347  const uint32_t num_shards,
348  const uint32_t device_count) {
349  if (key >= min_key && key <= max_key) {
350  return *SUFFIX(get_hash_slot_sharded)(reinterpret_cast<int32_t*>(hash_buff),
351  key,
352  min_key,
353  entry_count_per_shard,
354  num_shards,
355  device_count);
356  }
357  return -1;
358 }
359 
360 extern "C" ALWAYS_INLINE DEVICE int64_t
362  const int64_t key,
363  const int64_t min_key,
364  const int64_t max_key,
365  const uint32_t entry_count_per_shard,
366  const uint32_t num_shards,
367  const uint32_t device_count,
368  const int64_t null_val) {
369  return key != null_val ? hash_join_idx_sharded(hash_buff,
370  key,
371  min_key,
372  max_key,
373  entry_count_per_shard,
374  num_shards,
375  device_count)
376  : -1;
377 }
378 
379 extern "C" ALWAYS_INLINE DEVICE int64_t
381  const int64_t key,
382  const int64_t min_key,
383  const int64_t max_key,
384  const uint32_t entry_count_per_shard,
385  const uint32_t num_shards,
386  const uint32_t device_count,
387  const int64_t null_val,
388  const int64_t translated_val) {
389  return key != null_val ? hash_join_idx_sharded(hash_buff,
390  key,
391  min_key,
392  max_key,
393  entry_count_per_shard,
394  num_shards,
395  device_count)
396  : hash_join_idx_sharded(hash_buff,
397  translated_val,
398  min_key,
399  translated_val,
400  entry_count_per_shard,
401  num_shards,
402  device_count);
403 }
404 
405 #define DEF_TRANSLATE_NULL_KEY(key_type) \
406  extern "C" NEVER_INLINE DEVICE int64_t translate_null_key_##key_type( \
407  const key_type key, const key_type null_val, const int64_t translated_val) { \
408  if (key == null_val) { \
409  return translated_val; \
410  } \
411  return key; \
412  }
413 
415 DEF_TRANSLATE_NULL_KEY(int16_t)
416 DEF_TRANSLATE_NULL_KEY(int32_t)
417 DEF_TRANSLATE_NULL_KEY(int64_t)
418 
419 #undef DEF_TRANSLATE_NULL_KEY
NEVER_INLINE DEVICE uint32_t MurmurHash1(const void *key, int len, const uint32_t seed)
Definition: MurmurHash.cpp:20
ALWAYS_INLINE DEVICE int64_t bucketized_hash_join_idx_nullable(int64_t hash_buff, const int64_t key, const int64_t min_key, const int64_t max_key, const int64_t null_val, const int64_t bucket_normalization)
NEVER_INLINE DEVICE int64_t * get_group_value_with_watchdog(int64_t *groups_buffer, const uint32_t groups_buffer_entry_count, const int64_t *key, const uint32_t key_count, const uint32_t key_width, const uint32_t row_size_quad)
NEVER_INLINE DEVICE int64_t * get_group_value_columnar(int64_t *groups_buffer, const uint32_t groups_buffer_entry_count, const int64_t *key, const uint32_t key_qw_count)
ALWAYS_INLINE DEVICE int64_t bucketized_hash_join_idx_bitwise(int64_t hash_buff, const int64_t key, const int64_t min_key, const int64_t max_key, const int64_t null_val, const int64_t translated_val, const int64_t bucket_normalization)
__device__ bool dynamic_watchdog()
__device__ int64_t * get_matching_group_value_columnar(int64_t *groups_buffer, const uint32_t h, const int64_t *key, const uint32_t key_qw_count, const size_t entry_count)
#define EMPTY_KEY_64
ALWAYS_INLINE DEVICE int64_t * get_scan_output_slot(int64_t *output_buffer, const uint32_t output_buffer_entry_count, const uint32_t pos, const int64_t offset_in_fragment, const uint32_t row_size_quad)
ALWAYS_INLINE DEVICE int32_t *SUFFIX() get_bucketized_hash_slot(int32_t *buff, const int64_t key, const int64_t min_key, const int64_t bucket_normalization)
Definition: JoinHashImpl.h:31
ALWAYS_INLINE DEVICE int64_t hash_join_idx_sharded(int64_t hash_buff, const int64_t key, const int64_t min_key, const int64_t max_key, const uint32_t entry_count_per_shard, const uint32_t num_shards, const uint32_t device_count)
NEVER_INLINE DEVICE int64_t * get_group_value_columnar_with_watchdog(int64_t *groups_buffer, const uint32_t groups_buffer_entry_count, const int64_t *key, const uint32_t key_qw_count)
ALWAYS_INLINE DEVICE uint32_t key_hash(const int64_t *key, const uint32_t key_count, const uint32_t key_byte_width)
NEVER_INLINE DEVICE int32_t get_group_value_columnar_slot_with_watchdog(int64_t *groups_buffer, const uint32_t groups_buffer_entry_count, const int64_t *key, const uint32_t key_count, const uint32_t key_width)
#define DEF_TRANSLATE_NULL_KEY(key_type)
ALWAYS_INLINE DEVICE int64_t hash_join_idx_bitwise_sharded(int64_t hash_buff, const int64_t key, const int64_t min_key, const int64_t max_key, const uint32_t entry_count_per_shard, const uint32_t num_shards, const uint32_t device_count, const int64_t null_val, const int64_t translated_val)
#define SUFFIX(name)
__device__ int64_t * get_matching_group_value(int64_t *groups_buffer, const uint32_t h, const T *key, const uint32_t key_count, const uint32_t row_size_quad)
ALWAYS_INLINE DEVICE int64_t hash_join_idx_sharded_nullable(int64_t hash_buff, const int64_t key, const int64_t min_key, const int64_t max_key, const uint32_t entry_count_per_shard, const uint32_t num_shards, const uint32_t device_count, const int64_t null_val)
ALWAYS_INLINE DEVICE int64_t * get_group_value_fast(int64_t *groups_buffer, const int64_t key, const int64_t min_key, const int64_t bucket, const uint32_t row_size_quad)
ALWAYS_INLINE DEVICE int64_t * get_group_value_fast_with_original_key(int64_t *groups_buffer, const int64_t key, const int64_t orig_key, const int64_t min_key, const int64_t bucket, const uint32_t row_size_quad)
#define DEVICE
ALWAYS_INLINE DEVICE int64_t hash_join_idx_nullable(int64_t hash_buff, const int64_t key, const int64_t min_key, const int64_t max_key, const int64_t null_val)
ALWAYS_INLINE DEVICE int32_t *SUFFIX() get_hash_slot(int32_t *buff, const int64_t key, const int64_t min_key)
Definition: JoinHashImpl.h:39
ALWAYS_INLINE DEVICE int64_t hash_join_idx(int64_t hash_buff, const int64_t key, const int64_t min_key, const int64_t max_key)
NEVER_INLINE DEVICE int64_t * get_group_value(int64_t *groups_buffer, const uint32_t groups_buffer_entry_count, const int64_t *key, const uint32_t key_count, const uint32_t key_width, const uint32_t row_size_quad)
ALWAYS_INLINE DEVICE int32_t *SUFFIX() get_hash_slot_sharded(int32_t *buff, const int64_t key, const int64_t min_key, const uint32_t entry_count_per_shard, const uint32_t num_shards, const uint32_t device_count)
Definition: JoinHashImpl.h:60
ALWAYS_INLINE DEVICE int64_t bucketized_hash_join_idx(int64_t hash_buff, int64_t const key, int64_t const min_key, int64_t const max_key, int64_t bucket_normalization)
#define NEVER_INLINE
ALWAYS_INLINE DEVICE int32_t get_columnar_scan_output_offset(int64_t *output_buffer, const uint32_t output_buffer_entry_count, const uint32_t pos, const int64_t offset_in_fragment)
NEVER_INLINE DEVICE int32_t get_group_value_columnar_slot(int64_t *groups_buffer, const uint32_t groups_buffer_entry_count, const int64_t *key, const uint32_t key_count, const uint32_t key_width)
ALWAYS_INLINE DEVICE uint32_t get_columnar_group_bin_offset(int64_t *key_base_ptr, const int64_t key, const int64_t min_key, const int64_t bucket)
ALWAYS_INLINE DEVICE int64_t hash_join_idx_bitwise(int64_t hash_buff, const int64_t key, const int64_t min_key, const int64_t max_key, const int64_t null_val, const int64_t translated_val)
__device__ int32_t get_matching_group_value_columnar_slot(int64_t *groups_buffer, const uint32_t entry_count, const uint32_t h, const T *key, const uint32_t key_count)
#define ALWAYS_INLINE