21 const uint32_t key_count,
22 const uint32_t key_byte_width) {
23 return MurmurHash1(key, key_byte_width * key_count, 0);
27 int64_t* groups_buffer,
28 const uint32_t groups_buffer_entry_count,
30 const uint32_t key_count,
31 const uint32_t key_width,
32 const uint32_t row_size_quad) {
33 uint32_t h =
key_hash(key, key_count, key_width) % groups_buffer_entry_count;
35 groups_buffer, h, key, key_count, key_width, row_size_quad);
37 return matching_group;
39 uint32_t h_probe = (h + 1) % groups_buffer_entry_count;
40 while (h_probe != h) {
42 groups_buffer, h_probe, key, key_count, key_width, row_size_quad);
44 return matching_group;
46 h_probe = (h_probe + 1) % groups_buffer_entry_count;
54 int64_t* groups_buffer,
55 const uint32_t groups_buffer_entry_count,
57 const uint32_t key_count,
58 const uint32_t key_width,
59 const uint32_t row_size_quad) {
60 uint32_t h =
key_hash(key, key_count, key_width) % groups_buffer_entry_count;
62 groups_buffer, h, key, key_count, key_width, row_size_quad);
64 return matching_group;
66 uint32_t watchdog_countdown = 100;
67 uint32_t h_probe = (h + 1) % groups_buffer_entry_count;
68 while (h_probe != h) {
70 groups_buffer, h_probe, key, key_count, key_width, row_size_quad);
72 return matching_group;
74 h_probe = (h_probe + 1) % groups_buffer_entry_count;
75 if (--watchdog_countdown == 0) {
79 watchdog_countdown = 100;
87 const uint32_t groups_buffer_entry_count,
89 const uint32_t key_count,
90 const uint32_t key_width) {
91 uint32_t h =
key_hash(key, key_count, key_width) % groups_buffer_entry_count;
93 groups_buffer, groups_buffer_entry_count, h, key, key_count, key_width);
94 if (matching_slot != -1) {
97 uint32_t h_probe = (h + 1) % groups_buffer_entry_count;
98 while (h_probe != h) {
100 groups_buffer, groups_buffer_entry_count, h_probe, key, key_count, key_width);
101 if (matching_slot != -1) {
104 h_probe = (h_probe + 1) % groups_buffer_entry_count;
111 const uint32_t groups_buffer_entry_count,
113 const uint32_t key_count,
114 const uint32_t key_width) {
115 uint32_t h =
key_hash(key, key_count, key_width) % groups_buffer_entry_count;
117 groups_buffer, groups_buffer_entry_count, h, key, key_count, key_width);
118 if (matching_slot != -1) {
121 uint32_t watchdog_countdown = 100;
122 uint32_t h_probe = (h + 1) % groups_buffer_entry_count;
123 while (h_probe != h) {
125 groups_buffer, groups_buffer_entry_count, h_probe, key, key_count, key_width);
126 if (matching_slot != -1) {
129 h_probe = (h_probe + 1) % groups_buffer_entry_count;
130 if (--watchdog_countdown == 0) {
134 watchdog_countdown = 100;
141 int64_t* groups_buffer,
142 const uint32_t groups_buffer_entry_count,
144 const uint32_t key_qw_count) {
145 uint32_t h =
key_hash(key, key_qw_count,
sizeof(int64_t)) % groups_buffer_entry_count;
147 groups_buffer, h, key, key_qw_count, groups_buffer_entry_count);
148 if (matching_group) {
149 return matching_group;
151 uint32_t h_probe = (h + 1) % groups_buffer_entry_count;
152 while (h_probe != h) {
154 groups_buffer, h_probe, key, key_qw_count, groups_buffer_entry_count);
155 if (matching_group) {
156 return matching_group;
158 h_probe = (h_probe + 1) % groups_buffer_entry_count;
164 int64_t* groups_buffer,
165 const uint32_t groups_buffer_entry_count,
167 const uint32_t key_qw_count) {
168 uint32_t h =
key_hash(key, key_qw_count,
sizeof(int64_t)) % groups_buffer_entry_count;
170 groups_buffer, h, key, key_qw_count, groups_buffer_entry_count);
171 if (matching_group) {
172 return matching_group;
174 uint32_t watchdog_countdown = 100;
175 uint32_t h_probe = (h + 1) % groups_buffer_entry_count;
176 while (h_probe != h) {
178 groups_buffer, h_probe, key, key_qw_count, groups_buffer_entry_count);
179 if (matching_group) {
180 return matching_group;
182 h_probe = (h_probe + 1) % groups_buffer_entry_count;
183 if (--watchdog_countdown == 0) {
187 watchdog_countdown = 100;
194 int64_t* groups_buffer,
196 const int64_t min_key,
197 const int64_t bucket,
198 const uint32_t row_size_quad) {
199 int64_t key_diff = key - min_key;
203 int64_t off = key_diff * row_size_quad;
205 groups_buffer[off] = key;
207 return groups_buffer + off + 1;
211 int64_t* groups_buffer,
213 const int64_t orig_key,
214 const int64_t min_key,
215 const int64_t bucket,
216 const uint32_t row_size_quad) {
217 int64_t key_diff = key - min_key;
221 int64_t off = key_diff * row_size_quad;
223 groups_buffer[off] = orig_key;
225 return groups_buffer + off + 1;
231 const int64_t min_key,
232 const int64_t bucket) {
233 int64_t off = key - min_key;
238 key_base_ptr[off] = key;
244 int64_t* output_buffer,
245 const uint32_t output_buffer_entry_count,
247 const int64_t offset_in_fragment,
248 const uint32_t row_size_quad) {
249 uint64_t off =
static_cast<uint64_t
>(pos) * static_cast<uint64_t>(row_size_quad);
250 if (pos < output_buffer_entry_count) {
251 output_buffer[off] = offset_in_fragment;
252 return output_buffer + off + 1;
259 const uint32_t output_buffer_entry_count,
261 const int64_t offset_in_fragment) {
262 if (pos < output_buffer_entry_count) {
263 output_buffer[pos] = offset_in_fragment;
272 int64_t
const min_key,
273 int64_t
const max_key,
274 int64_t bucket_normalization) {
275 if (key >= min_key && key <= max_key) {
277 reinterpret_cast<int32_t*
>(hash_buff), key, min_key, bucket_normalization);
284 const int64_t min_key,
285 const int64_t max_key) {
286 if (key >= min_key && key <= max_key) {
295 const int64_t min_key,
296 const int64_t max_key,
297 const int64_t null_val,
298 const int64_t bucket_normalization) {
300 hash_buff, key, min_key, max_key, bucket_normalization)
306 const int64_t min_key,
307 const int64_t max_key,
308 const int64_t null_val) {
309 return key != null_val ?
hash_join_idx(hash_buff, key, min_key, max_key) : -1;
315 const int64_t min_key,
316 const int64_t max_key,
317 const int64_t null_val,
318 const int64_t translated_val,
319 const int64_t bucket_normalization) {
321 hash_buff, key, min_key, max_key, bucket_normalization)
326 bucket_normalization);
332 const int64_t min_key,
333 const int64_t max_key,
334 const int64_t null_val,
335 const int64_t translated_val) {
336 return key != null_val
338 :
hash_join_idx(hash_buff, translated_val, min_key, translated_val);
344 const int64_t min_key,
345 const int64_t max_key,
346 const uint32_t entry_count_per_shard,
347 const uint32_t num_shards,
348 const uint32_t device_count) {
349 if (key >= min_key && key <= max_key) {
353 entry_count_per_shard,
363 const int64_t min_key,
364 const int64_t max_key,
365 const uint32_t entry_count_per_shard,
366 const uint32_t num_shards,
367 const uint32_t device_count,
368 const int64_t null_val) {
373 entry_count_per_shard,
382 const int64_t min_key,
383 const int64_t max_key,
384 const uint32_t entry_count_per_shard,
385 const uint32_t num_shards,
386 const uint32_t device_count,
387 const int64_t null_val,
388 const int64_t translated_val) {
393 entry_count_per_shard,
400 entry_count_per_shard,
405 #define DEF_TRANSLATE_NULL_KEY(key_type) \
406 extern "C" NEVER_INLINE DEVICE int64_t translate_null_key_##key_type( \
407 const key_type key, const key_type null_val, const int64_t translated_val) { \
408 if (key == null_val) { \
409 return translated_val; \
419 #undef DEF_TRANSLATE_NULL_KEY
NEVER_INLINE DEVICE uint32_t MurmurHash1(const void *key, int len, const uint32_t seed)
ALWAYS_INLINE DEVICE int64_t bucketized_hash_join_idx_nullable(int64_t hash_buff, const int64_t key, const int64_t min_key, const int64_t max_key, const int64_t null_val, const int64_t bucket_normalization)
NEVER_INLINE DEVICE int64_t * get_group_value_with_watchdog(int64_t *groups_buffer, const uint32_t groups_buffer_entry_count, const int64_t *key, const uint32_t key_count, const uint32_t key_width, const uint32_t row_size_quad)
NEVER_INLINE DEVICE int64_t * get_group_value_columnar(int64_t *groups_buffer, const uint32_t groups_buffer_entry_count, const int64_t *key, const uint32_t key_qw_count)
ALWAYS_INLINE DEVICE int64_t bucketized_hash_join_idx_bitwise(int64_t hash_buff, const int64_t key, const int64_t min_key, const int64_t max_key, const int64_t null_val, const int64_t translated_val, const int64_t bucket_normalization)
__device__ bool dynamic_watchdog()
__device__ int64_t * get_matching_group_value_columnar(int64_t *groups_buffer, const uint32_t h, const int64_t *key, const uint32_t key_qw_count, const size_t entry_count)
ALWAYS_INLINE DEVICE int64_t * get_scan_output_slot(int64_t *output_buffer, const uint32_t output_buffer_entry_count, const uint32_t pos, const int64_t offset_in_fragment, const uint32_t row_size_quad)
ALWAYS_INLINE DEVICE int32_t *SUFFIX() get_bucketized_hash_slot(int32_t *buff, const int64_t key, const int64_t min_key, const int64_t bucket_normalization)
ALWAYS_INLINE DEVICE int64_t hash_join_idx_sharded(int64_t hash_buff, const int64_t key, const int64_t min_key, const int64_t max_key, const uint32_t entry_count_per_shard, const uint32_t num_shards, const uint32_t device_count)
NEVER_INLINE DEVICE int64_t * get_group_value_columnar_with_watchdog(int64_t *groups_buffer, const uint32_t groups_buffer_entry_count, const int64_t *key, const uint32_t key_qw_count)
ALWAYS_INLINE DEVICE uint32_t key_hash(const int64_t *key, const uint32_t key_count, const uint32_t key_byte_width)
NEVER_INLINE DEVICE int32_t get_group_value_columnar_slot_with_watchdog(int64_t *groups_buffer, const uint32_t groups_buffer_entry_count, const int64_t *key, const uint32_t key_count, const uint32_t key_width)
#define DEF_TRANSLATE_NULL_KEY(key_type)
ALWAYS_INLINE DEVICE int64_t hash_join_idx_bitwise_sharded(int64_t hash_buff, const int64_t key, const int64_t min_key, const int64_t max_key, const uint32_t entry_count_per_shard, const uint32_t num_shards, const uint32_t device_count, const int64_t null_val, const int64_t translated_val)
__device__ int64_t * get_matching_group_value(int64_t *groups_buffer, const uint32_t h, const T *key, const uint32_t key_count, const uint32_t row_size_quad)
ALWAYS_INLINE DEVICE int64_t hash_join_idx_sharded_nullable(int64_t hash_buff, const int64_t key, const int64_t min_key, const int64_t max_key, const uint32_t entry_count_per_shard, const uint32_t num_shards, const uint32_t device_count, const int64_t null_val)
ALWAYS_INLINE DEVICE int64_t * get_group_value_fast(int64_t *groups_buffer, const int64_t key, const int64_t min_key, const int64_t bucket, const uint32_t row_size_quad)
ALWAYS_INLINE DEVICE int64_t * get_group_value_fast_with_original_key(int64_t *groups_buffer, const int64_t key, const int64_t orig_key, const int64_t min_key, const int64_t bucket, const uint32_t row_size_quad)
ALWAYS_INLINE DEVICE int64_t hash_join_idx_nullable(int64_t hash_buff, const int64_t key, const int64_t min_key, const int64_t max_key, const int64_t null_val)
ALWAYS_INLINE DEVICE int32_t *SUFFIX() get_hash_slot(int32_t *buff, const int64_t key, const int64_t min_key)
ALWAYS_INLINE DEVICE int64_t hash_join_idx(int64_t hash_buff, const int64_t key, const int64_t min_key, const int64_t max_key)
NEVER_INLINE DEVICE int64_t * get_group_value(int64_t *groups_buffer, const uint32_t groups_buffer_entry_count, const int64_t *key, const uint32_t key_count, const uint32_t key_width, const uint32_t row_size_quad)
ALWAYS_INLINE DEVICE int32_t *SUFFIX() get_hash_slot_sharded(int32_t *buff, const int64_t key, const int64_t min_key, const uint32_t entry_count_per_shard, const uint32_t num_shards, const uint32_t device_count)
ALWAYS_INLINE DEVICE int64_t bucketized_hash_join_idx(int64_t hash_buff, int64_t const key, int64_t const min_key, int64_t const max_key, int64_t bucket_normalization)
ALWAYS_INLINE DEVICE int32_t get_columnar_scan_output_offset(int64_t *output_buffer, const uint32_t output_buffer_entry_count, const uint32_t pos, const int64_t offset_in_fragment)
NEVER_INLINE DEVICE int32_t get_group_value_columnar_slot(int64_t *groups_buffer, const uint32_t groups_buffer_entry_count, const int64_t *key, const uint32_t key_count, const uint32_t key_width)
ALWAYS_INLINE DEVICE uint32_t get_columnar_group_bin_offset(int64_t *key_base_ptr, const int64_t key, const int64_t min_key, const int64_t bucket)
ALWAYS_INLINE DEVICE int64_t hash_join_idx_bitwise(int64_t hash_buff, const int64_t key, const int64_t min_key, const int64_t max_key, const int64_t null_val, const int64_t translated_val)
__device__ int32_t get_matching_group_value_columnar_slot(int64_t *groups_buffer, const uint32_t entry_count, const uint32_t h, const T *key, const uint32_t key_count)