OmniSciDB  04ee39c94c
JoinHashTableQueryRuntime.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "../Shared/geo_compression.h"
18 #include "CompareKeysInl.h"
19 #include "MurmurHash.h"
20 
21 DEVICE bool compare_to_key(const int8_t* entry,
22  const int8_t* key,
23  const size_t key_bytes) {
24  for (size_t i = 0; i < key_bytes; ++i) {
25  if (entry[i] != key[i]) {
26  return false;
27  }
28  }
29  return true;
30 }
31 
32 namespace {
33 
34 const int kNoMatch = -1;
35 const int kNotPresent = -2;
36 
37 } // namespace
38 
39 template <class T>
40 DEVICE int64_t get_matching_slot(const int8_t* hash_buff,
41  const uint32_t h,
42  const int8_t* key,
43  const size_t key_bytes) {
44  const auto lookup_result_ptr = hash_buff + h * (key_bytes + sizeof(T));
45  if (compare_to_key(lookup_result_ptr, key, key_bytes)) {
46  return *reinterpret_cast<const T*>(lookup_result_ptr + key_bytes);
47  }
48  if (*reinterpret_cast<const T*>(lookup_result_ptr) ==
49  SUFFIX(get_invalid_key) < T > ()) {
50  return kNotPresent;
51  }
52  return kNoMatch;
53 }
54 
55 template <class T>
56 FORCE_INLINE DEVICE int64_t baseline_hash_join_idx_impl(const int8_t* hash_buff,
57  const int8_t* key,
58  const size_t key_bytes,
59  const size_t entry_count) {
60  if (!entry_count) {
61  return kNoMatch;
62  }
63  const uint32_t h = MurmurHash1(key, key_bytes, 0) % entry_count;
64  int64_t matching_slot = get_matching_slot<T>(hash_buff, h, key, key_bytes);
65  if (matching_slot != kNoMatch) {
66  return matching_slot;
67  }
68  uint32_t h_probe = (h + 1) % entry_count;
69  while (h_probe != h) {
70  matching_slot = get_matching_slot<T>(hash_buff, h_probe, key, key_bytes);
71  if (matching_slot != kNoMatch) {
72  return matching_slot;
73  }
74  h_probe = (h_probe + 1) % entry_count;
75  }
76  return kNoMatch;
77 }
78 
79 extern "C" NEVER_INLINE DEVICE int64_t
80 baseline_hash_join_idx_32(const int8_t* hash_buff,
81  const int8_t* key,
82  const size_t key_bytes,
83  const size_t entry_count) {
84  return baseline_hash_join_idx_impl<int32_t>(hash_buff, key, key_bytes, entry_count);
85 }
86 
87 extern "C" NEVER_INLINE DEVICE int64_t
88 baseline_hash_join_idx_64(const int8_t* hash_buff,
89  const int8_t* key,
90  const size_t key_bytes,
91  const size_t entry_count) {
92  return baseline_hash_join_idx_impl<int64_t>(hash_buff, key, key_bytes, entry_count);
93 }
94 
95 template <typename T>
97  const double bucket_size) {
98  return static_cast<int64_t>(floor(static_cast<double>(value) * bucket_size));
99 }
100 
101 extern "C" NEVER_INLINE DEVICE int64_t
102 get_bucket_key_for_range_double(const int8_t* range_bytes,
103  const size_t range_component_index,
104  const double bucket_size) {
105  const auto range = reinterpret_cast<const double*>(range_bytes);
106  return get_bucket_key_for_value_impl(range[range_component_index], bucket_size);
107 }
108 
109 FORCE_INLINE DEVICE int64_t
111  const size_t range_component_index,
112  const double bucket_size) {
113  const auto range_ptr = reinterpret_cast<const int32_t*>(range);
114  if (range_component_index % 2 == 0) {
117  range_ptr[range_component_index]),
118  bucket_size);
119  } else {
122  range_ptr[range_component_index]),
123  bucket_size);
124  }
125 }
126 
127 extern "C" NEVER_INLINE DEVICE int64_t
129  const size_t range_component_index,
130  const double bucket_size) {
132  range, range_component_index, bucket_size);
133 }
134 
135 template <typename T>
137  const size_t key_component_count,
138  const T* composite_key_dict,
139  const size_t entry_count) {
140  const uint32_t h = MurmurHash1(key, key_component_count * sizeof(T), 0) % entry_count;
141  uint32_t off = h * key_component_count;
142  if (keys_are_equal(&composite_key_dict[off], key, key_component_count)) {
143  return h;
144  }
145  uint32_t h_probe = (h + 1) % entry_count;
146  while (h_probe != h) {
147  off = h_probe * key_component_count;
148  if (keys_are_equal(&composite_key_dict[off], key, key_component_count)) {
149  return h_probe;
150  }
151  if (composite_key_dict[off] == SUFFIX(get_invalid_key) < T > ()) {
152  return -1;
153  }
154  h_probe = (h_probe + 1) % entry_count;
155  }
156  return -1;
157 }
158 
159 extern "C" NEVER_INLINE DEVICE int64_t
160 get_composite_key_index_32(const int32_t* key,
161  const size_t key_component_count,
162  const int32_t* composite_key_dict,
163  const size_t entry_count) {
165  key, key_component_count, composite_key_dict, entry_count);
166 }
167 
168 extern "C" NEVER_INLINE DEVICE int64_t
169 get_composite_key_index_64(const int64_t* key,
170  const size_t key_component_count,
171  const int64_t* composite_key_dict,
172  const size_t entry_count) {
174  key, key_component_count, composite_key_dict, entry_count);
175 }
NEVER_INLINE DEVICE uint32_t MurmurHash1(const void *key, int len, const uint32_t seed)
Definition: MurmurHash.cpp:20
DEVICE bool compare_to_key(const int8_t *entry, const int8_t *key, const size_t key_bytes)
bool keys_are_equal(const T *key1, const T *key2, const size_t key_component_count)
DEVICE int64_t get_matching_slot(const int8_t *hash_buff, const uint32_t h, const int8_t *key, const size_t key_bytes)
#define SUFFIX(name)
NEVER_INLINE DEVICE int64_t baseline_hash_join_idx_64(const int8_t *hash_buff, const int8_t *key, const size_t key_bytes, const size_t entry_count)
FORCE_INLINE DEVICE int64_t get_bucket_key_for_value_impl(const T value, const double bucket_size)
NEVER_INLINE DEVICE int64_t get_bucket_key_for_range_compressed(const int8_t *range, const size_t range_component_index, const double bucket_size)
NEVER_INLINE DEVICE int64_t get_bucket_key_for_range_double(const int8_t *range_bytes, const size_t range_component_index, const double bucket_size)
#define DEVICE
DEVICE T SUFFIX() get_invalid_key()
DEVICE double decompress_lattitude_coord_geoint32(const int32_t compressed)
#define FORCE_INLINE
FORCE_INLINE DEVICE int64_t get_bucket_key_for_range_compressed_impl(const int8_t *range, const size_t range_component_index, const double bucket_size)
FORCE_INLINE DEVICE int64_t baseline_hash_join_idx_impl(const int8_t *hash_buff, const int8_t *key, const size_t key_bytes, const size_t entry_count)
#define NEVER_INLINE
NEVER_INLINE DEVICE int64_t baseline_hash_join_idx_32(const int8_t *hash_buff, const int8_t *key, const size_t key_bytes, const size_t entry_count)
DEVICE double decompress_longitude_coord_geoint32(const int32_t compressed)
NEVER_INLINE DEVICE int64_t get_composite_key_index_32(const int32_t *key, const size_t key_component_count, const int32_t *composite_key_dict, const size_t entry_count)
NEVER_INLINE DEVICE int64_t get_composite_key_index_64(const int64_t *key, const size_t key_component_count, const int64_t *composite_key_dict, const size_t entry_count)
FORCE_INLINE DEVICE int64_t get_composite_key_index_impl(const T *key, const size_t key_component_count, const T *composite_key_dict, const size_t entry_count)