OmniSciDB  1dac507f6e
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
GroupByRuntime.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "JoinHashImpl.h"
18 #include "MurmurHash.h"
19 
20 extern "C" ALWAYS_INLINE DEVICE uint32_t key_hash(const int64_t* key,
21  const uint32_t key_count,
22  const uint32_t key_byte_width) {
23  return MurmurHash1(key, key_byte_width * key_count, 0);
24 }
25 
26 extern "C" NEVER_INLINE DEVICE int64_t* get_group_value(
27  int64_t* groups_buffer,
28  const uint32_t groups_buffer_entry_count,
29  const int64_t* key,
30  const uint32_t key_count,
31  const uint32_t key_width,
32  const uint32_t row_size_quad,
33  const int64_t* init_vals) {
34  uint32_t h = key_hash(key, key_count, key_width) % groups_buffer_entry_count;
35  int64_t* matching_group = get_matching_group_value(
36  groups_buffer, h, key, key_count, key_width, row_size_quad, init_vals);
37  if (matching_group) {
38  return matching_group;
39  }
40  uint32_t h_probe = (h + 1) % groups_buffer_entry_count;
41  while (h_probe != h) {
42  matching_group = get_matching_group_value(
43  groups_buffer, h_probe, key, key_count, key_width, row_size_quad, init_vals);
44  if (matching_group) {
45  return matching_group;
46  }
47  h_probe = (h_probe + 1) % groups_buffer_entry_count;
48  }
49  return NULL;
50 }
51 
52 extern "C" NEVER_INLINE DEVICE bool dynamic_watchdog();
53 
55  int64_t* groups_buffer,
56  const uint32_t groups_buffer_entry_count,
57  const int64_t* key,
58  const uint32_t key_count,
59  const uint32_t key_width,
60  const uint32_t row_size_quad,
61  const int64_t* init_vals) {
62  uint32_t h = key_hash(key, key_count, key_width) % groups_buffer_entry_count;
63  int64_t* matching_group = get_matching_group_value(
64  groups_buffer, h, key, key_count, key_width, row_size_quad, init_vals);
65  if (matching_group) {
66  return matching_group;
67  }
68  uint32_t watchdog_countdown = 100;
69  uint32_t h_probe = (h + 1) % groups_buffer_entry_count;
70  while (h_probe != h) {
71  matching_group = get_matching_group_value(
72  groups_buffer, h_probe, key, key_count, key_width, row_size_quad, init_vals);
73  if (matching_group) {
74  return matching_group;
75  }
76  h_probe = (h_probe + 1) % groups_buffer_entry_count;
77  if (--watchdog_countdown == 0) {
78  if (dynamic_watchdog()) {
79  return NULL;
80  }
81  watchdog_countdown = 100;
82  }
83  }
84  return NULL;
85 }
86 
87 extern "C" NEVER_INLINE DEVICE int32_t
89  const uint32_t groups_buffer_entry_count,
90  const int64_t* key,
91  const uint32_t key_count,
92  const uint32_t key_width) {
93  uint32_t h = key_hash(key, key_count, key_width) % groups_buffer_entry_count;
94  int32_t matching_slot = get_matching_group_value_columnar_slot(
95  groups_buffer, groups_buffer_entry_count, h, key, key_count, key_width);
96  if (matching_slot != -1) {
97  return h;
98  }
99  uint32_t h_probe = (h + 1) % groups_buffer_entry_count;
100  while (h_probe != h) {
102  groups_buffer, groups_buffer_entry_count, h_probe, key, key_count, key_width);
103  if (matching_slot != -1) {
104  return h_probe;
105  }
106  h_probe = (h_probe + 1) % groups_buffer_entry_count;
107  }
108  return -1;
109 }
110 
111 extern "C" NEVER_INLINE DEVICE int32_t
113  const uint32_t groups_buffer_entry_count,
114  const int64_t* key,
115  const uint32_t key_count,
116  const uint32_t key_width) {
117  uint32_t h = key_hash(key, key_count, key_width) % groups_buffer_entry_count;
118  int32_t matching_slot = get_matching_group_value_columnar_slot(
119  groups_buffer, groups_buffer_entry_count, h, key, key_count, key_width);
120  if (matching_slot != -1) {
121  return h;
122  }
123  uint32_t watchdog_countdown = 100;
124  uint32_t h_probe = (h + 1) % groups_buffer_entry_count;
125  while (h_probe != h) {
127  groups_buffer, groups_buffer_entry_count, h_probe, key, key_count, key_width);
128  if (matching_slot != -1) {
129  return h_probe;
130  }
131  h_probe = (h_probe + 1) % groups_buffer_entry_count;
132  if (--watchdog_countdown == 0) {
133  if (dynamic_watchdog()) {
134  return -1;
135  }
136  watchdog_countdown = 100;
137  }
138  }
139  return -1;
140 }
141 
143  int64_t* groups_buffer,
144  const uint32_t groups_buffer_entry_count,
145  const int64_t* key,
146  const uint32_t key_qw_count) {
147  uint32_t h = key_hash(key, key_qw_count, sizeof(int64_t)) % groups_buffer_entry_count;
148  int64_t* matching_group = get_matching_group_value_columnar(
149  groups_buffer, h, key, key_qw_count, groups_buffer_entry_count);
150  if (matching_group) {
151  return matching_group;
152  }
153  uint32_t h_probe = (h + 1) % groups_buffer_entry_count;
154  while (h_probe != h) {
155  matching_group = get_matching_group_value_columnar(
156  groups_buffer, h_probe, key, key_qw_count, groups_buffer_entry_count);
157  if (matching_group) {
158  return matching_group;
159  }
160  h_probe = (h_probe + 1) % groups_buffer_entry_count;
161  }
162  return NULL;
163 }
164 
166  int64_t* groups_buffer,
167  const uint32_t groups_buffer_entry_count,
168  const int64_t* key,
169  const uint32_t key_qw_count) {
170  uint32_t h = key_hash(key, key_qw_count, sizeof(int64_t)) % groups_buffer_entry_count;
171  int64_t* matching_group = get_matching_group_value_columnar(
172  groups_buffer, h, key, key_qw_count, groups_buffer_entry_count);
173  if (matching_group) {
174  return matching_group;
175  }
176  uint32_t watchdog_countdown = 100;
177  uint32_t h_probe = (h + 1) % groups_buffer_entry_count;
178  while (h_probe != h) {
179  matching_group = get_matching_group_value_columnar(
180  groups_buffer, h_probe, key, key_qw_count, groups_buffer_entry_count);
181  if (matching_group) {
182  return matching_group;
183  }
184  h_probe = (h_probe + 1) % groups_buffer_entry_count;
185  if (--watchdog_countdown == 0) {
186  if (dynamic_watchdog()) {
187  return NULL;
188  }
189  watchdog_countdown = 100;
190  }
191  }
192  return NULL;
193 }
194 
196  int64_t* groups_buffer,
197  const int64_t key,
198  const int64_t min_key,
199  const int64_t bucket,
200  const uint32_t row_size_quad) {
201  int64_t key_diff = key - min_key;
202  if (bucket) {
203  key_diff /= bucket;
204  }
205  int64_t off = key_diff * row_size_quad;
206  if (groups_buffer[off] == EMPTY_KEY_64) {
207  groups_buffer[off] = key;
208  }
209  return groups_buffer + off + 1;
210 }
211 
213  int64_t* groups_buffer,
214  const int64_t key,
215  const int64_t orig_key,
216  const int64_t min_key,
217  const int64_t bucket,
218  const uint32_t row_size_quad) {
219  int64_t key_diff = key - min_key;
220  if (bucket) {
221  key_diff /= bucket;
222  }
223  int64_t off = key_diff * row_size_quad;
224  if (groups_buffer[off] == EMPTY_KEY_64) {
225  groups_buffer[off] = orig_key;
226  }
227  return groups_buffer + off + 1;
228 }
229 
230 extern "C" ALWAYS_INLINE DEVICE uint32_t
231 get_columnar_group_bin_offset(int64_t* key_base_ptr,
232  const int64_t key,
233  const int64_t min_key,
234  const int64_t bucket) {
235  int64_t off = key - min_key;
236  if (bucket) {
237  off /= bucket;
238  }
239  if (key_base_ptr[off] == EMPTY_KEY_64) {
240  key_base_ptr[off] = key;
241  }
242  return off;
243 }
244 
246  int64_t* output_buffer,
247  const uint32_t output_buffer_entry_count,
248  const uint32_t pos,
249  const int64_t offset_in_fragment,
250  const uint32_t row_size_quad) {
251  uint64_t off = static_cast<uint64_t>(pos) * static_cast<uint64_t>(row_size_quad);
252  if (pos < output_buffer_entry_count) {
253  output_buffer[off] = offset_in_fragment;
254  return output_buffer + off + 1;
255  }
256  return NULL;
257 }
258 
259 extern "C" ALWAYS_INLINE DEVICE int32_t
260 get_columnar_scan_output_offset(int64_t* output_buffer,
261  const uint32_t output_buffer_entry_count,
262  const uint32_t pos,
263  const int64_t offset_in_fragment) {
264  if (pos < output_buffer_entry_count) {
265  output_buffer[pos] = offset_in_fragment;
266  return pos;
267  }
268  return -1;
269 }
270 
271 extern "C" ALWAYS_INLINE DEVICE int64_t
272 bucketized_hash_join_idx(int64_t hash_buff,
273  int64_t const key,
274  int64_t const min_key,
275  int64_t const max_key,
276  int64_t bucket_normalization) {
277  if (key >= min_key && key <= max_key) {
279  reinterpret_cast<int32_t*>(hash_buff), key, min_key, bucket_normalization);
280  }
281  return -1;
282 }
283 
284 extern "C" ALWAYS_INLINE DEVICE int64_t hash_join_idx(int64_t hash_buff,
285  const int64_t key,
286  const int64_t min_key,
287  const int64_t max_key) {
288  if (key >= min_key && key <= max_key) {
289  return *SUFFIX(get_hash_slot)(reinterpret_cast<int32_t*>(hash_buff), key, min_key);
290  }
291  return -1;
292 }
293 
294 extern "C" ALWAYS_INLINE DEVICE int64_t
296  const int64_t key,
297  const int64_t min_key,
298  const int64_t max_key,
299  const int64_t null_val,
300  const int64_t bucket_normalization) {
301  return key != null_val ? bucketized_hash_join_idx(
302  hash_buff, key, min_key, max_key, bucket_normalization)
303  : -1;
304 }
305 
306 extern "C" ALWAYS_INLINE DEVICE int64_t hash_join_idx_nullable(int64_t hash_buff,
307  const int64_t key,
308  const int64_t min_key,
309  const int64_t max_key,
310  const int64_t null_val) {
311  return key != null_val ? hash_join_idx(hash_buff, key, min_key, max_key) : -1;
312 }
313 
314 extern "C" ALWAYS_INLINE DEVICE int64_t
316  const int64_t key,
317  const int64_t min_key,
318  const int64_t max_key,
319  const int64_t null_val,
320  const int64_t translated_val,
321  const int64_t bucket_normalization) {
322  return key != null_val ? bucketized_hash_join_idx(
323  hash_buff, key, min_key, max_key, bucket_normalization)
324  : bucketized_hash_join_idx(hash_buff,
325  translated_val,
326  min_key,
327  translated_val,
328  bucket_normalization);
329 }
330 
331 extern "C" ALWAYS_INLINE DEVICE int64_t
332 hash_join_idx_bitwise(int64_t hash_buff,
333  const int64_t key,
334  const int64_t min_key,
335  const int64_t max_key,
336  const int64_t null_val,
337  const int64_t translated_val) {
338  return key != null_val
339  ? hash_join_idx(hash_buff, key, min_key, max_key)
340  : hash_join_idx(hash_buff, translated_val, min_key, translated_val);
341 }
342 
343 extern "C" ALWAYS_INLINE DEVICE int64_t
344 hash_join_idx_sharded(int64_t hash_buff,
345  const int64_t key,
346  const int64_t min_key,
347  const int64_t max_key,
348  const uint32_t entry_count_per_shard,
349  const uint32_t num_shards,
350  const uint32_t device_count) {
351  if (key >= min_key && key <= max_key) {
352  return *SUFFIX(get_hash_slot_sharded)(reinterpret_cast<int32_t*>(hash_buff),
353  key,
354  min_key,
355  entry_count_per_shard,
356  num_shards,
357  device_count);
358  }
359  return -1;
360 }
361 
362 extern "C" ALWAYS_INLINE DEVICE int64_t
364  const int64_t key,
365  const int64_t min_key,
366  const int64_t max_key,
367  const uint32_t entry_count_per_shard,
368  const uint32_t num_shards,
369  const uint32_t device_count,
370  const int64_t null_val) {
371  return key != null_val ? hash_join_idx_sharded(hash_buff,
372  key,
373  min_key,
374  max_key,
375  entry_count_per_shard,
376  num_shards,
377  device_count)
378  : -1;
379 }
380 
381 extern "C" ALWAYS_INLINE DEVICE int64_t
383  const int64_t key,
384  const int64_t min_key,
385  const int64_t max_key,
386  const uint32_t entry_count_per_shard,
387  const uint32_t num_shards,
388  const uint32_t device_count,
389  const int64_t null_val,
390  const int64_t translated_val) {
391  return key != null_val ? hash_join_idx_sharded(hash_buff,
392  key,
393  min_key,
394  max_key,
395  entry_count_per_shard,
396  num_shards,
397  device_count)
398  : hash_join_idx_sharded(hash_buff,
399  translated_val,
400  min_key,
401  translated_val,
402  entry_count_per_shard,
403  num_shards,
404  device_count);
405 }
406 
407 #define DEF_TRANSLATE_NULL_KEY(key_type) \
408  extern "C" NEVER_INLINE DEVICE int64_t translate_null_key_##key_type( \
409  const key_type key, const key_type null_val, const int64_t translated_val) { \
410  if (key == null_val) { \
411  return translated_val; \
412  } \
413  return key; \
414  }
415 
417 DEF_TRANSLATE_NULL_KEY(int16_t)
418 DEF_TRANSLATE_NULL_KEY(int32_t)
419 DEF_TRANSLATE_NULL_KEY(int64_t)
420 
421 #undef DEF_TRANSLATE_NULL_KEY
NEVER_INLINE DEVICE uint32_t MurmurHash1(const void *key, int len, const uint32_t seed)
Definition: MurmurHash.cpp:20
ALWAYS_INLINE DEVICE int64_t bucketized_hash_join_idx_nullable(int64_t hash_buff, const int64_t key, const int64_t min_key, const int64_t max_key, const int64_t null_val, const int64_t bucket_normalization)
const int32_t groups_buffer_size return groups_buffer
NEVER_INLINE DEVICE int64_t * get_group_value_columnar(int64_t *groups_buffer, const uint32_t groups_buffer_entry_count, const int64_t *key, const uint32_t key_qw_count)
ALWAYS_INLINE DEVICE int64_t bucketized_hash_join_idx_bitwise(int64_t hash_buff, const int64_t key, const int64_t min_key, const int64_t max_key, const int64_t null_val, const int64_t translated_val, const int64_t bucket_normalization)
__device__ bool dynamic_watchdog()
__device__ int64_t * get_matching_group_value_columnar(int64_t *groups_buffer, const uint32_t h, const int64_t *key, const uint32_t key_qw_count, const size_t entry_count)
#define EMPTY_KEY_64
ALWAYS_INLINE DEVICE int64_t * get_scan_output_slot(int64_t *output_buffer, const uint32_t output_buffer_entry_count, const uint32_t pos, const int64_t offset_in_fragment, const uint32_t row_size_quad)
ALWAYS_INLINE DEVICE int32_t *SUFFIX() get_bucketized_hash_slot(int32_t *buff, const int64_t key, const int64_t min_key, const int64_t bucket_normalization)
Definition: JoinHashImpl.h:31
ALWAYS_INLINE DEVICE int64_t hash_join_idx_sharded(int64_t hash_buff, const int64_t key, const int64_t min_key, const int64_t max_key, const uint32_t entry_count_per_shard, const uint32_t num_shards, const uint32_t device_count)
NEVER_INLINE DEVICE int64_t * get_group_value_columnar_with_watchdog(int64_t *groups_buffer, const uint32_t groups_buffer_entry_count, const int64_t *key, const uint32_t key_qw_count)
ALWAYS_INLINE DEVICE uint32_t key_hash(const int64_t *key, const uint32_t key_count, const uint32_t key_byte_width)
NEVER_INLINE DEVICE int32_t get_group_value_columnar_slot_with_watchdog(int64_t *groups_buffer, const uint32_t groups_buffer_entry_count, const int64_t *key, const uint32_t key_count, const uint32_t key_width)
#define DEF_TRANSLATE_NULL_KEY(key_type)
ALWAYS_INLINE DEVICE int64_t hash_join_idx_bitwise_sharded(int64_t hash_buff, const int64_t key, const int64_t min_key, const int64_t max_key, const uint32_t entry_count_per_shard, const uint32_t num_shards, const uint32_t device_count, const int64_t null_val, const int64_t translated_val)
#define SUFFIX(name)
__device__ int64_t * get_matching_group_value(int64_t *groups_buffer, const uint32_t h, const T *key, const uint32_t key_count, const uint32_t row_size_quad)
NEVER_INLINE DEVICE int64_t * get_group_value_with_watchdog(int64_t *groups_buffer, const uint32_t groups_buffer_entry_count, const int64_t *key, const uint32_t key_count, const uint32_t key_width, const uint32_t row_size_quad, const int64_t *init_vals)
ALWAYS_INLINE DEVICE int64_t hash_join_idx_sharded_nullable(int64_t hash_buff, const int64_t key, const int64_t min_key, const int64_t max_key, const uint32_t entry_count_per_shard, const uint32_t num_shards, const uint32_t device_count, const int64_t null_val)
ALWAYS_INLINE DEVICE int64_t * get_group_value_fast(int64_t *groups_buffer, const int64_t key, const int64_t min_key, const int64_t bucket, const uint32_t row_size_quad)
const int64_t const uint32_t groups_buffer_entry_count
const int64_t const uint32_t const uint32_t key_qw_count
ALWAYS_INLINE DEVICE int64_t * get_group_value_fast_with_original_key(int64_t *groups_buffer, const int64_t key, const int64_t orig_key, const int64_t min_key, const int64_t bucket, const uint32_t row_size_quad)
#define DEVICE
ALWAYS_INLINE DEVICE int64_t hash_join_idx_nullable(int64_t hash_buff, const int64_t key, const int64_t min_key, const int64_t max_key, const int64_t null_val)
ALWAYS_INLINE DEVICE int32_t *SUFFIX() get_hash_slot(int32_t *buff, const int64_t key, const int64_t min_key)
Definition: JoinHashImpl.h:39
ALWAYS_INLINE DEVICE int64_t hash_join_idx(int64_t hash_buff, const int64_t key, const int64_t min_key, const int64_t max_key)
NEVER_INLINE DEVICE int64_t * get_group_value(int64_t *groups_buffer, const uint32_t groups_buffer_entry_count, const int64_t *key, const uint32_t key_count, const uint32_t key_width, const uint32_t row_size_quad, const int64_t *init_vals)
ALWAYS_INLINE DEVICE int32_t *SUFFIX() get_hash_slot_sharded(int32_t *buff, const int64_t key, const int64_t min_key, const uint32_t entry_count_per_shard, const uint32_t num_shards, const uint32_t device_count)
Definition: JoinHashImpl.h:60
ALWAYS_INLINE DEVICE int64_t bucketized_hash_join_idx(int64_t hash_buff, int64_t const key, int64_t const min_key, int64_t const max_key, int64_t bucket_normalization)
#define NEVER_INLINE
ALWAYS_INLINE DEVICE int32_t get_columnar_scan_output_offset(int64_t *output_buffer, const uint32_t output_buffer_entry_count, const uint32_t pos, const int64_t offset_in_fragment)
NEVER_INLINE DEVICE int32_t get_group_value_columnar_slot(int64_t *groups_buffer, const uint32_t groups_buffer_entry_count, const int64_t *key, const uint32_t key_count, const uint32_t key_width)
ALWAYS_INLINE DEVICE uint32_t get_columnar_group_bin_offset(int64_t *key_base_ptr, const int64_t key, const int64_t min_key, const int64_t bucket)
ALWAYS_INLINE DEVICE int64_t hash_join_idx_bitwise(int64_t hash_buff, const int64_t key, const int64_t min_key, const int64_t max_key, const int64_t null_val, const int64_t translated_val)
const int64_t * init_vals
__device__ int32_t get_matching_group_value_columnar_slot(int64_t *groups_buffer, const uint32_t entry_count, const uint32_t h, const T *key, const uint32_t key_count)
#define ALWAYS_INLINE