OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
GroupByRuntime.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
18 #include "MurmurHash.h"
19 
20 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE uint32_t
21 key_hash(const int64_t* key, const uint32_t key_count, const uint32_t key_byte_width) {
22  return MurmurHash3(key, key_byte_width * key_count, 0);
23 }
24 
26  int64_t* groups_buffer,
27  const uint32_t groups_buffer_entry_count,
28  const int64_t* key,
29  const uint32_t key_count,
30  const uint32_t key_width,
31  const uint32_t row_size_quad) {
32  uint32_t h = key_hash(key, key_count, key_width) % groups_buffer_entry_count;
33  int64_t* matching_group = get_matching_group_value(
34  groups_buffer, h, key, key_count, key_width, row_size_quad);
35  if (matching_group) {
36  return matching_group;
37  }
38  uint32_t h_probe = (h + 1) % groups_buffer_entry_count;
39  while (h_probe != h) {
40  matching_group = get_matching_group_value(
41  groups_buffer, h_probe, key, key_count, key_width, row_size_quad);
42  if (matching_group) {
43  return matching_group;
44  }
45  h_probe = (h_probe + 1) % groups_buffer_entry_count;
46  }
47  return NULL;
48 }
49 
51 
53  int64_t* groups_buffer,
54  const uint32_t groups_buffer_entry_count,
55  const int64_t* key,
56  const uint32_t key_count,
57  const uint32_t key_width,
58  const uint32_t row_size_quad) {
59  uint32_t h = key_hash(key, key_count, key_width) % groups_buffer_entry_count;
60  int64_t* matching_group = get_matching_group_value(
61  groups_buffer, h, key, key_count, key_width, row_size_quad);
62  if (matching_group) {
63  return matching_group;
64  }
65  uint32_t watchdog_countdown = 100;
66  uint32_t h_probe = (h + 1) % groups_buffer_entry_count;
67  while (h_probe != h) {
68  matching_group = get_matching_group_value(
69  groups_buffer, h_probe, key, key_count, key_width, row_size_quad);
70  if (matching_group) {
71  return matching_group;
72  }
73  h_probe = (h_probe + 1) % groups_buffer_entry_count;
74  if (--watchdog_countdown == 0) {
75  if (dynamic_watchdog()) {
76  return NULL;
77  }
78  watchdog_countdown = 100;
79  }
80  }
81  return NULL;
82 }
83 
84 extern "C" RUNTIME_EXPORT NEVER_INLINE DEVICE int32_t
85 get_group_value_columnar_slot(int64_t* groups_buffer,
86  const uint32_t groups_buffer_entry_count,
87  const int64_t* key,
88  const uint32_t key_count,
89  const uint32_t key_width) {
90  uint32_t h = key_hash(key, key_count, key_width) % groups_buffer_entry_count;
91  int32_t matching_slot = get_matching_group_value_columnar_slot(
92  groups_buffer, groups_buffer_entry_count, h, key, key_count, key_width);
93  if (matching_slot != -1) {
94  return h;
95  }
96  uint32_t h_probe = (h + 1) % groups_buffer_entry_count;
97  while (h_probe != h) {
99  groups_buffer, groups_buffer_entry_count, h_probe, key, key_count, key_width);
100  if (matching_slot != -1) {
101  return h_probe;
102  }
103  h_probe = (h_probe + 1) % groups_buffer_entry_count;
104  }
105  return -1;
106 }
107 
108 extern "C" RUNTIME_EXPORT NEVER_INLINE DEVICE int32_t
110  const uint32_t groups_buffer_entry_count,
111  const int64_t* key,
112  const uint32_t key_count,
113  const uint32_t key_width) {
114  uint32_t h = key_hash(key, key_count, key_width) % groups_buffer_entry_count;
115  int32_t matching_slot = get_matching_group_value_columnar_slot(
116  groups_buffer, groups_buffer_entry_count, h, key, key_count, key_width);
117  if (matching_slot != -1) {
118  return h;
119  }
120  uint32_t watchdog_countdown = 100;
121  uint32_t h_probe = (h + 1) % groups_buffer_entry_count;
122  while (h_probe != h) {
124  groups_buffer, groups_buffer_entry_count, h_probe, key, key_count, key_width);
125  if (matching_slot != -1) {
126  return h_probe;
127  }
128  h_probe = (h_probe + 1) % groups_buffer_entry_count;
129  if (--watchdog_countdown == 0) {
130  if (dynamic_watchdog()) {
131  return -1;
132  }
133  watchdog_countdown = 100;
134  }
135  }
136  return -1;
137 }
138 
140  int64_t* groups_buffer,
141  const uint32_t groups_buffer_entry_count,
142  const int64_t* key,
143  const uint32_t key_qw_count) {
144  uint32_t h = key_hash(key, key_qw_count, sizeof(int64_t)) % groups_buffer_entry_count;
145  int64_t* matching_group = get_matching_group_value_columnar(
146  groups_buffer, h, key, key_qw_count, groups_buffer_entry_count);
147  if (matching_group) {
148  return matching_group;
149  }
150  uint32_t h_probe = (h + 1) % groups_buffer_entry_count;
151  while (h_probe != h) {
152  matching_group = get_matching_group_value_columnar(
153  groups_buffer, h_probe, key, key_qw_count, groups_buffer_entry_count);
154  if (matching_group) {
155  return matching_group;
156  }
157  h_probe = (h_probe + 1) % groups_buffer_entry_count;
158  }
159  return NULL;
160 }
161 
162 extern "C" RUNTIME_EXPORT NEVER_INLINE DEVICE int64_t*
164  const uint32_t groups_buffer_entry_count,
165  const int64_t* key,
166  const uint32_t key_qw_count) {
167  uint32_t h = key_hash(key, key_qw_count, sizeof(int64_t)) % groups_buffer_entry_count;
168  int64_t* matching_group = get_matching_group_value_columnar(
169  groups_buffer, h, key, key_qw_count, groups_buffer_entry_count);
170  if (matching_group) {
171  return matching_group;
172  }
173  uint32_t watchdog_countdown = 100;
174  uint32_t h_probe = (h + 1) % groups_buffer_entry_count;
175  while (h_probe != h) {
176  matching_group = get_matching_group_value_columnar(
177  groups_buffer, h_probe, key, key_qw_count, groups_buffer_entry_count);
178  if (matching_group) {
179  return matching_group;
180  }
181  h_probe = (h_probe + 1) % groups_buffer_entry_count;
182  if (--watchdog_countdown == 0) {
183  if (dynamic_watchdog()) {
184  return NULL;
185  }
186  watchdog_countdown = 100;
187  }
188  }
189  return NULL;
190 }
191 
193  int64_t* groups_buffer,
194  const int64_t key,
195  const int64_t min_key,
196  const int64_t bucket,
197  const uint32_t row_size_quad) {
198  int64_t key_diff = key - min_key;
199  if (bucket) {
200  key_diff /= bucket;
201  }
202  int64_t off = key_diff * row_size_quad;
203  if (groups_buffer[off] == EMPTY_KEY_64) {
204  groups_buffer[off] = key;
205  }
206  return groups_buffer + off + 1;
207 }
208 
209 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t*
211  const int64_t key,
212  const int64_t orig_key,
213  const int64_t min_key,
214  const int64_t bucket,
215  const uint32_t row_size_quad) {
216  int64_t key_diff = key - min_key;
217  if (bucket) {
218  key_diff /= bucket;
219  }
220  int64_t off = key_diff * row_size_quad;
221  if (groups_buffer[off] == EMPTY_KEY_64) {
222  groups_buffer[off] = orig_key;
223  }
224  return groups_buffer + off + 1;
225 }
226 
227 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE uint32_t
228 get_columnar_group_bin_offset(int64_t* key_base_ptr,
229  const int64_t key,
230  const int64_t min_key,
231  const int64_t bucket) {
232  int64_t off = key - min_key;
233  if (bucket) {
234  off /= bucket;
235  }
236  if (key_base_ptr[off] == EMPTY_KEY_64) {
237  key_base_ptr[off] = key;
238  }
239  return off;
240 }
241 
243  int64_t* output_buffer,
244  const uint32_t output_buffer_entry_count,
245  const uint32_t pos,
246  const int64_t offset_in_fragment,
247  const uint32_t row_size_quad) {
248  uint64_t off = static_cast<uint64_t>(pos) * static_cast<uint64_t>(row_size_quad);
249  if (pos < output_buffer_entry_count) {
250  output_buffer[off] = offset_in_fragment;
251  return output_buffer + off + 1;
252  }
253  return NULL;
254 }
255 
256 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t
257 get_columnar_scan_output_offset(int64_t* output_buffer,
258  const uint32_t output_buffer_entry_count,
259  const uint32_t pos,
260  const int64_t offset_in_fragment) {
261  if (pos < output_buffer_entry_count) {
262  output_buffer[pos] = offset_in_fragment;
263  return pos;
264  }
265  return -1;
266 }
267 
268 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t
269 bucketized_hash_join_idx(int64_t hash_buff,
270  int64_t const key,
271  int64_t const min_key,
272  int64_t const max_key,
273  const int64_t translated_null_val,
274  int64_t bucket_normalization) {
275  if (hash_buff && key >= min_key && key <= max_key) {
276  return *SUFFIX(get_bucketized_hash_slot)(reinterpret_cast<int32_t*>(hash_buff),
277  key,
278  min_key / bucket_normalization,
279  translated_null_val,
280  bucket_normalization);
281  }
282  return -1;
283 }
284 
285 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t
286 hash_join_idx(int64_t hash_buff,
287  const int64_t key,
288  const int64_t min_key,
289  const int64_t max_key) {
290  if (key >= min_key && key <= max_key) {
291  return *SUFFIX(get_hash_slot)(reinterpret_cast<int32_t*>(hash_buff), key, min_key);
292  }
293  return -1;
294 }
295 
296 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t
298  const int64_t key,
299  const int64_t min_key,
300  const int64_t max_key,
301  const int64_t null_val,
302  const int64_t bucket_normalization) {
303  return key != null_val
305  hash_buff, key, min_key, max_key, null_val, bucket_normalization)
306  : -1;
307 }
308 
309 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t
310 hash_join_idx_nullable(int64_t hash_buff,
311  const int64_t key,
312  const int64_t min_key,
313  const int64_t max_key,
314  const int64_t null_val) {
315  return key != null_val ? hash_join_idx(hash_buff, key, min_key, max_key) : -1;
316 }
317 
318 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t
320  const int64_t key,
321  const int64_t min_key,
322  const int64_t max_key,
323  const int64_t null_val,
324  const int64_t translated_val,
325  const int64_t bucket_normalization) {
326  return key != null_val
328  hash_buff, key, min_key, max_key, translated_val, bucket_normalization)
329  : bucketized_hash_join_idx(hash_buff,
330  translated_val,
331  min_key,
332  max_key,
333  translated_val,
334  bucket_normalization);
335 }
336 
337 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t
338 hash_join_idx_bitwise(int64_t hash_buff,
339  const int64_t key,
340  const int64_t min_key,
341  const int64_t max_key,
342  const int64_t null_val,
343  const int64_t translated_val) {
344  return key != null_val
345  ? hash_join_idx(hash_buff, key, min_key, max_key)
346  : hash_join_idx(hash_buff, translated_val, min_key, translated_val);
347 }
348 
349 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t
350 hash_join_idx_sharded(int64_t hash_buff,
351  const int64_t key,
352  const int64_t min_key,
353  const int64_t max_key,
354  const uint32_t entry_count_per_shard,
355  const uint32_t num_shards,
356  const uint32_t device_count) {
357  if (hash_buff && key >= min_key && key <= max_key) {
358  return *SUFFIX(get_hash_slot_sharded)(reinterpret_cast<int32_t*>(hash_buff),
359  key,
360  min_key,
361  entry_count_per_shard,
362  num_shards,
363  device_count);
364  }
365  return -1;
366 }
367 
368 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t
370  const int64_t key,
371  const int64_t min_key,
372  const int64_t max_key,
373  const uint32_t entry_count_per_shard,
374  const uint32_t num_shards,
375  const uint32_t device_count,
376  const int64_t null_val) {
377  return key != null_val ? hash_join_idx_sharded(hash_buff,
378  key,
379  min_key,
380  max_key,
381  entry_count_per_shard,
382  num_shards,
383  device_count)
384  : -1;
385 }
386 
387 extern "C" RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t
389  const int64_t key,
390  const int64_t min_key,
391  const int64_t max_key,
392  const uint32_t entry_count_per_shard,
393  const uint32_t num_shards,
394  const uint32_t device_count,
395  const int64_t null_val,
396  const int64_t translated_val) {
397  return key != null_val ? hash_join_idx_sharded(hash_buff,
398  key,
399  min_key,
400  max_key,
401  entry_count_per_shard,
402  num_shards,
403  device_count)
404  : hash_join_idx_sharded(hash_buff,
405  translated_val,
406  min_key,
407  translated_val,
408  entry_count_per_shard,
409  num_shards,
410  device_count);
411 }
412 
413 #define DEF_TRANSLATE_NULL_KEY(key_type) \
414  extern "C" RUNTIME_EXPORT NEVER_INLINE DEVICE int64_t translate_null_key_##key_type( \
415  const key_type key, const key_type null_val, const int64_t translated_val) { \
416  if (key == null_val) { \
417  return translated_val; \
418  } \
419  return key; \
420  }
421 
423 DEF_TRANSLATE_NULL_KEY(int16_t)
424 DEF_TRANSLATE_NULL_KEY(int32_t)
425 DEF_TRANSLATE_NULL_KEY(int64_t)
426 
427 #undef DEF_TRANSLATE_NULL_KEY
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t * get_group_value_fast_with_original_key(int64_t *groups_buffer, const int64_t key, const int64_t orig_key, const int64_t min_key, const int64_t bucket, const uint32_t row_size_quad)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t hash_join_idx_bitwise(int64_t hash_buff, const int64_t key, const int64_t min_key, const int64_t max_key, const int64_t null_val, const int64_t translated_val)
__device__ bool dynamic_watchdog()
__device__ int64_t * get_matching_group_value_columnar(int64_t *groups_buffer, const uint32_t h, const int64_t *key, const uint32_t key_qw_count, const size_t entry_count)
#define EMPTY_KEY_64
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t get_columnar_scan_output_offset(int64_t *output_buffer, const uint32_t output_buffer_entry_count, const uint32_t pos, const int64_t offset_in_fragment)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t * get_scan_output_slot(int64_t *output_buffer, const uint32_t output_buffer_entry_count, const uint32_t pos, const int64_t offset_in_fragment, const uint32_t row_size_quad)
#define DEF_TRANSLATE_NULL_KEY(key_type)
#define SUFFIX(name)
__device__ int64_t * get_matching_group_value(int64_t *groups_buffer, const uint32_t h, const T *key, const uint32_t key_count, const uint32_t row_size_quad)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE uint32_t get_columnar_group_bin_offset(int64_t *key_base_ptr, const int64_t key, const int64_t min_key, const int64_t bucket)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t bucketized_hash_join_idx_bitwise(int64_t hash_buff, const int64_t key, const int64_t min_key, const int64_t max_key, const int64_t null_val, const int64_t translated_val, const int64_t bucket_normalization)
#define DEVICE
ALWAYS_INLINE DEVICE int32_t *SUFFIX() get_hash_slot(int32_t *buff, const int64_t key, const int64_t min_key)
Definition: JoinHashImpl.h:76
ALWAYS_INLINE DEVICE int32_t *SUFFIX() get_bucketized_hash_slot(int32_t *buff, const int64_t key, const int64_t min_key, const int64_t translated_null_val, const int64_t bucket_normalization)
Definition: JoinHashImpl.h:66
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t hash_join_idx(int64_t hash_buff, const int64_t key, const int64_t min_key, const int64_t max_key)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t bucketized_hash_join_idx_nullable(int64_t hash_buff, const int64_t key, const int64_t min_key, const int64_t max_key, const int64_t null_val, const int64_t bucket_normalization)
RUNTIME_EXPORT NEVER_INLINE DEVICE int64_t * get_group_value_columnar_with_watchdog(int64_t *groups_buffer, const uint32_t groups_buffer_entry_count, const int64_t *key, const uint32_t key_qw_count)
RUNTIME_EXPORT NEVER_INLINE DEVICE int64_t * get_group_value(int64_t *groups_buffer, const uint32_t groups_buffer_entry_count, const int64_t *key, const uint32_t key_count, const uint32_t key_width, const uint32_t row_size_quad)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t bucketized_hash_join_idx(int64_t hash_buff, int64_t const key, int64_t const min_key, int64_t const max_key, const int64_t translated_null_val, int64_t bucket_normalization)
#define RUNTIME_EXPORT
RUNTIME_EXPORT NEVER_INLINE DEVICE int32_t get_group_value_columnar_slot_with_watchdog(int64_t *groups_buffer, const uint32_t groups_buffer_entry_count, const int64_t *key, const uint32_t key_count, const uint32_t key_width)
RUNTIME_EXPORT NEVER_INLINE DEVICE int32_t get_group_value_columnar_slot(int64_t *groups_buffer, const uint32_t groups_buffer_entry_count, const int64_t *key, const uint32_t key_count, const uint32_t key_width)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t hash_join_idx_nullable(int64_t hash_buff, const int64_t key, const int64_t min_key, const int64_t max_key, const int64_t null_val)
RUNTIME_EXPORT NEVER_INLINE DEVICE uint32_t MurmurHash3(const void *key, int len, const uint32_t seed)
Definition: MurmurHash.cpp:33
ALWAYS_INLINE DEVICE int32_t *SUFFIX() get_hash_slot_sharded(int32_t *buff, const int64_t key, const int64_t min_key, const uint32_t entry_count_per_shard, const uint32_t num_shards, const uint32_t device_count)
Definition: JoinHashImpl.h:108
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t * get_group_value_fast(int64_t *groups_buffer, const int64_t key, const int64_t min_key, const int64_t bucket, const uint32_t row_size_quad)
#define NEVER_INLINE
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t hash_join_idx_sharded_nullable(int64_t hash_buff, const int64_t key, const int64_t min_key, const int64_t max_key, const uint32_t entry_count_per_shard, const uint32_t num_shards, const uint32_t device_count, const int64_t null_val)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE uint32_t key_hash(const int64_t *key, const uint32_t key_count, const uint32_t key_byte_width)
__device__ int32_t get_matching_group_value_columnar_slot(int64_t *groups_buffer, const uint32_t entry_count, const uint32_t h, const T *key, const uint32_t key_count)
#define ALWAYS_INLINE
RUNTIME_EXPORT NEVER_INLINE DEVICE int64_t * get_group_value_columnar(int64_t *groups_buffer, const uint32_t groups_buffer_entry_count, const int64_t *key, const uint32_t key_qw_count)
RUNTIME_EXPORT NEVER_INLINE DEVICE int64_t * get_group_value_with_watchdog(int64_t *groups_buffer, const uint32_t groups_buffer_entry_count, const int64_t *key, const uint32_t key_count, const uint32_t key_width, const uint32_t row_size_quad)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t hash_join_idx_sharded(int64_t hash_buff, const int64_t key, const int64_t min_key, const int64_t max_key, const uint32_t entry_count_per_shard, const uint32_t num_shards, const uint32_t device_count)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int64_t hash_join_idx_bitwise_sharded(int64_t hash_buff, const int64_t key, const int64_t min_key, const int64_t max_key, const uint32_t entry_count_per_shard, const uint32_t num_shards, const uint32_t device_count, const int64_t null_val, const int64_t translated_val)