OmniSciDB  c1a53651b2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
JoinHashImpl.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
23 #ifndef QUERYENGINE_GROUPBYFASTIMPL_H
24 #define QUERYENGINE_GROUPBYFASTIMPL_H
25 
26 #include <cstdint>
27 #include <functional>
28 #include "../../../Shared/funcannotations.h"
29 #include "../../../Shared/shard_key.h"
30 
31 #ifdef __CUDACC__
32 #define insert_key_cas(address, compare, val) atomicCAS(address, compare, val)
33 #elif defined(_WIN32)
34 #include "Shared/clean_windows.h"
35 #define insert_key_cas(address, compare, val) \
36  InterlockedCompareExchange(reinterpret_cast<volatile long*>(address), \
37  static_cast<long>(val), \
38  static_cast<long>(compare))
39 #else
40 #define insert_key_cas(address, compare, val) \
41  __sync_val_compare_and_swap(address, compare, val)
42 #endif
43 
45  size_t idx,
46  int32_t* entry_ptr,
47  const int32_t invalid_slot_val) {
48  if (insert_key_cas(entry_ptr, invalid_slot_val, idx) != invalid_slot_val) {
49  return -1;
50  }
51  return 0;
52 }
53 
55  size_t idx,
56  int32_t* entry_ptr,
57  const int32_t invalid_slot_val) {
58  // just mark the existence of value to the corresponding hash slot
59  // regardless of hashtable collision
60  insert_key_cas(entry_ptr, invalid_slot_val, idx);
61  return 0;
62 }
63 
64 #undef insert_key_cas
65 
67  int32_t* buff,
68  const int64_t key,
69  const int64_t min_key,
70  const int64_t translated_null_val,
71  const int64_t bucket_normalization) {
72  auto hash_slot = key / bucket_normalization - min_key + (key == translated_null_val);
73  return buff + hash_slot;
74 }
75 
76 extern "C" ALWAYS_INLINE DEVICE int32_t* SUFFIX(get_hash_slot)(int32_t* buff,
77  const int64_t key,
78  const int64_t min_key) {
79  return buff + (key - min_key);
80 }
81 
83  int32_t* buff,
84  const int64_t key,
85  const int64_t min_key,
86  const int64_t translated_null_val,
87  const uint32_t entry_count_per_shard,
88  const uint32_t num_shards,
89  const uint32_t device_count,
90  const int64_t bucket_normalization) {
91  const uint32_t shard = SHARD_FOR_KEY(key, num_shards);
92  const uint32_t shard_buffer_index =
93  shard / device_count; // shard sub-buffer index within `buff`
94  int32_t* shard_buffer = buff + shard_buffer_index * entry_count_per_shard;
95  auto hash_slot = ((key / bucket_normalization) - min_key) / num_shards +
96  (key == translated_null_val);
97  return shard_buffer + hash_slot;
98 }
99 
101  int32_t* buff,
102  const int64_t key,
103  const int64_t min_key,
104  const uint32_t entry_count_per_shard,
105  const uint32_t num_shards,
106  const uint32_t device_count) {
107  const uint32_t shard = SHARD_FOR_KEY(key, num_shards);
108  const uint32_t shard_buffer_index =
109  shard / device_count; // shard sub-buffer index within `buff`
110  int32_t* shard_buffer = buff + shard_buffer_index * entry_count_per_shard;
111  return shard_buffer + (key - min_key) / num_shards;
112 }
113 
115  int32_t* buff,
116  const int64_t key,
117  const int64_t min_key,
118  const int64_t translated_null_val,
119  const uint32_t entry_count_per_shard,
120  const uint32_t shard,
121  const uint32_t num_shards,
122  const uint32_t device_count,
123  const int64_t bucket_normalization) {
124  const uint32_t shard_buffer_index =
125  shard / device_count; // shard sub-buffer index within `buff`
126  int32_t* shard_buffer = buff + shard_buffer_index * entry_count_per_shard;
127  int64_t hash_slot = ((key / bucket_normalization) - min_key) / num_shards +
128  (key == translated_null_val);
129  return shard_buffer + hash_slot;
130 }
131 
133  int32_t* buff,
134  const int64_t key,
135  const int64_t min_key,
136  const uint32_t entry_count_per_shard,
137  const uint32_t shard,
138  const uint32_t num_shards,
139  const uint32_t device_count) {
140  const uint32_t shard_buffer_index =
141  shard / device_count; // shard sub-buffer index within `buff`
142  int32_t* shard_buffer = buff + shard_buffer_index * entry_count_per_shard;
143  return shard_buffer + (key - min_key) / num_shards;
144 }
145 
146 #endif // QUERYENGINE_GROUPBYFASTIMPL_H
#define SUFFIX(name)
ALWAYS_INLINE DEVICE int SUFFIX() fill_hashtable_for_semi_join(size_t idx, int32_t *entry_ptr, const int32_t invalid_slot_val)
Definition: JoinHashImpl.h:54
#define DEVICE
ALWAYS_INLINE DEVICE int32_t *SUFFIX() get_hash_slot(int32_t *buff, const int64_t key, const int64_t min_key)
Definition: JoinHashImpl.h:76
ALWAYS_INLINE DEVICE int32_t *SUFFIX() get_bucketized_hash_slot(int32_t *buff, const int64_t key, const int64_t min_key, const int64_t translated_null_val, const int64_t bucket_normalization)
Definition: JoinHashImpl.h:66
#define insert_key_cas(address, compare, val)
Definition: JoinHashImpl.h:40
ALWAYS_INLINE DEVICE int32_t *SUFFIX() get_hash_slot_sharded_opt(int32_t *buff, const int64_t key, const int64_t min_key, const uint32_t entry_count_per_shard, const uint32_t shard, const uint32_t num_shards, const uint32_t device_count)
Definition: JoinHashImpl.h:132
ALWAYS_INLINE DEVICE int32_t *SUFFIX() get_hash_slot_sharded(int32_t *buff, const int64_t key, const int64_t min_key, const uint32_t entry_count_per_shard, const uint32_t num_shards, const uint32_t device_count)
Definition: JoinHashImpl.h:100
ALWAYS_INLINE DEVICE int SUFFIX() fill_one_to_one_hashtable(size_t idx, int32_t *entry_ptr, const int32_t invalid_slot_val)
Definition: JoinHashImpl.h:44
ALWAYS_INLINE DEVICE int32_t *SUFFIX() get_bucketized_hash_slot_sharded_opt(int32_t *buff, const int64_t key, const int64_t min_key, const int64_t translated_null_val, const uint32_t entry_count_per_shard, const uint32_t shard, const uint32_t num_shards, const uint32_t device_count, const int64_t bucket_normalization)
Definition: JoinHashImpl.h:114
#define ALWAYS_INLINE
ALWAYS_INLINE DEVICE int32_t *SUFFIX() get_bucketized_hash_slot_sharded(int32_t *buff, const int64_t key, const int64_t min_key, const int64_t translated_null_val, const uint32_t entry_count_per_shard, const uint32_t num_shards, const uint32_t device_count, const int64_t bucket_normalization)
Definition: JoinHashImpl.h:82
#define SHARD_FOR_KEY(key, num_shards)
Definition: shard_key.h:20