OmniSciDB  95562058bd
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
JoinHashTableQueryRuntime.cpp File Reference
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include "Geospatial/CompressionRuntime.h"
#include "QueryEngine/CompareKeysInl.h"
#include "QueryEngine/MurmurHash.h"
+ Include dependency graph for JoinHashTableQueryRuntime.cpp:
+ This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Classes

struct  BufferRange
 
struct  Bounds
 

Namespaces

 anonymous_namespace{JoinHashTableQueryRuntime.cpp}
 

Functions

DEVICE bool compare_to_key (const int8_t *entry, const int8_t *key, const size_t key_bytes)
 
template<class T >
DEVICE int64_t get_matching_slot (const int8_t *hash_buff, const uint32_t h, const int8_t *key, const size_t key_bytes)
 
template<class T >
FORCE_INLINE DEVICE int64_t baseline_hash_join_idx_impl (const int8_t *hash_buff, const int8_t *key, const size_t key_bytes, const size_t entry_count)
 
NEVER_INLINE DEVICE int64_t baseline_hash_join_idx_32 (const int8_t *hash_buff, const int8_t *key, const size_t key_bytes, const size_t entry_count)
 
NEVER_INLINE DEVICE int64_t baseline_hash_join_idx_64 (const int8_t *hash_buff, const int8_t *key, const size_t key_bytes, const size_t entry_count)
 
template<typename T >
FORCE_INLINE DEVICE int64_t get_bucket_key_for_value_impl (const T value, const double bucket_size)
 
NEVER_INLINE DEVICE int64_t get_bucket_key_for_range_double (const int8_t *range_bytes, const size_t range_component_index, const double bucket_size)
 
FORCE_INLINE DEVICE int64_t get_bucket_key_for_range_compressed_impl (const int8_t *range, const size_t range_component_index, const double bucket_size)
 
NEVER_INLINE DEVICE int64_t get_bucket_key_for_range_compressed (const int8_t *range, const size_t range_component_index, const double bucket_size)
 
template<typename T >
FORCE_INLINE DEVICE int64_t get_composite_key_index_impl (const T *key, const size_t key_component_count, const T *composite_key_dict, const size_t entry_count)
 
NEVER_INLINE DEVICE int64_t get_composite_key_index_32 (const int32_t *key, const size_t key_component_count, const int32_t *composite_key_dict, const size_t entry_count)
 
NEVER_INLINE DEVICE int64_t get_composite_key_index_64 (const int64_t *key, const size_t key_component_count, const int64_t *composite_key_dict, const size_t entry_count)
 
NEVER_INLINE DEVICE int32_t insert_sorted (int32_t *arr, size_t elem_count, int32_t elem)
 
ALWAYS_INLINE DEVICE int64_t overlaps_hash_join_idx (int64_t hash_buff, const int64_t key, const int64_t min_key, const int64_t max_key)
 
ALWAYS_INLINE DEVICE BufferRange get_row_id_buffer_ptr (int64_t *hash_table_ptr, const int64_t *key, const int64_t key_component_count, const int64_t entry_count, const int64_t offset_buffer_ptr_offset, const int64_t sub_buff_size)
 
NEVER_INLINE DEVICE int64_t get_candidate_rows (int32_t *out_arr, const uint32_t max_arr_size, const int8_t *range_bytes, const int32_t range_component_index, const double bucket_size_x, const double bucket_size_y, const int32_t keys_count, const int64_t key_component_count, int64_t *hash_table_ptr, const int64_t entry_count, const int64_t offset_buffer_ptr_offset, const int64_t sub_buff_size)
 
NEVER_INLINE DEVICE int32_t get_num_buckets_for_bounds (const int8_t *range_bytes, const int32_t range_component_index, const double bucket_size_x, const double bucket_size_y)
 

Variables

const int anonymous_namespace{JoinHashTableQueryRuntime.cpp}::kNoMatch = -1
 
const int anonymous_namespace{JoinHashTableQueryRuntime.cpp}::kNotPresent = -2
 

Function Documentation

NEVER_INLINE DEVICE int64_t baseline_hash_join_idx_32 ( const int8_t *  hash_buff,
const int8_t *  key,
const size_t  key_bytes,
const size_t  entry_count 
)

Definition at line 84 of file JoinHashTableQueryRuntime.cpp.

87  {
88  return baseline_hash_join_idx_impl<int32_t>(hash_buff, key, key_bytes, entry_count);
89 }
NEVER_INLINE DEVICE int64_t baseline_hash_join_idx_64 ( const int8_t *  hash_buff,
const int8_t *  key,
const size_t  key_bytes,
const size_t  entry_count 
)

Definition at line 92 of file JoinHashTableQueryRuntime.cpp.

95  {
96  return baseline_hash_join_idx_impl<int64_t>(hash_buff, key, key_bytes, entry_count);
97 }
template<class T >
FORCE_INLINE DEVICE int64_t baseline_hash_join_idx_impl ( const int8_t *  hash_buff,
const int8_t *  key,
const size_t  key_bytes,
const size_t  entry_count 
)

Definition at line 60 of file JoinHashTableQueryRuntime.cpp.

References anonymous_namespace{JoinHashTableQueryRuntime.cpp}::kNoMatch, and MurmurHash1().

63  {
64  if (!entry_count) {
65  return kNoMatch;
66  }
67  const uint32_t h = MurmurHash1(key, key_bytes, 0) % entry_count;
68  int64_t matching_slot = get_matching_slot<T>(hash_buff, h, key, key_bytes);
69  if (matching_slot != kNoMatch) {
70  return matching_slot;
71  }
72  uint32_t h_probe = (h + 1) % entry_count;
73  while (h_probe != h) {
74  matching_slot = get_matching_slot<T>(hash_buff, h_probe, key, key_bytes);
75  if (matching_slot != kNoMatch) {
76  return matching_slot;
77  }
78  h_probe = (h_probe + 1) % entry_count;
79  }
80  return kNoMatch;
81 }
NEVER_INLINE DEVICE uint32_t MurmurHash1(const void *key, int len, const uint32_t seed)
Definition: MurmurHash.cpp:20

+ Here is the call graph for this function:

DEVICE bool compare_to_key ( const int8_t *  entry,
const int8_t *  key,
const size_t  key_bytes 
)

Definition at line 25 of file JoinHashTableQueryRuntime.cpp.

Referenced by get_matching_slot().

27  {
28  for (size_t i = 0; i < key_bytes; ++i) {
29  if (entry[i] != key[i]) {
30  return false;
31  }
32  }
33  return true;
34 }

+ Here is the caller graph for this function:

NEVER_INLINE DEVICE int64_t get_bucket_key_for_range_compressed ( const int8_t *  range,
const size_t  range_component_index,
const double  bucket_size 
)

Definition at line 130 of file JoinHashTableQueryRuntime.cpp.

References get_bucket_key_for_range_compressed_impl().

132  {
134  range, range_component_index, bucket_size);
135 }
FORCE_INLINE DEVICE int64_t get_bucket_key_for_range_compressed_impl(const int8_t *range, const size_t range_component_index, const double bucket_size)

+ Here is the call graph for this function:

FORCE_INLINE DEVICE int64_t get_bucket_key_for_range_compressed_impl ( const int8_t *  range,
const size_t  range_component_index,
const double  bucket_size 
)

Definition at line 114 of file JoinHashTableQueryRuntime.cpp.

References Geospatial::decompress_lattitude_coord_geoint32(), Geospatial::decompress_longitude_coord_geoint32(), and get_bucket_key_for_value_impl().

Referenced by get_bucket_key_for_range_compressed().

116  {
117  const auto range_ptr = reinterpret_cast<const int32_t*>(range);
118  if (range_component_index % 2 == 0) {
120  Geospatial::decompress_longitude_coord_geoint32(range_ptr[range_component_index]),
121  bucket_size);
122  } else {
124  Geospatial::decompress_lattitude_coord_geoint32(range_ptr[range_component_index]),
125  bucket_size);
126  }
127 }
DEVICE double decompress_lattitude_coord_geoint32(const int32_t compressed)
FORCE_INLINE DEVICE int64_t get_bucket_key_for_value_impl(const T value, const double bucket_size)
DEVICE double decompress_longitude_coord_geoint32(const int32_t compressed)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

NEVER_INLINE DEVICE int64_t get_bucket_key_for_range_double ( const int8_t *  range_bytes,
const size_t  range_component_index,
const double  bucket_size 
)

Definition at line 106 of file JoinHashTableQueryRuntime.cpp.

References get_bucket_key_for_value_impl().

108  {
109  const auto range = reinterpret_cast<const double*>(range_bytes);
110  return get_bucket_key_for_value_impl(range[range_component_index], bucket_size);
111 }
FORCE_INLINE DEVICE int64_t get_bucket_key_for_value_impl(const T value, const double bucket_size)

+ Here is the call graph for this function:

template<typename T >
FORCE_INLINE DEVICE int64_t get_bucket_key_for_value_impl ( const T  value,
const double  bucket_size 
)

Definition at line 100 of file JoinHashTableQueryRuntime.cpp.

Referenced by get_bucket_key_for_range_compressed_impl(), and get_bucket_key_for_range_double().

101  {
102  return static_cast<int64_t>(floor(static_cast<double>(value) * bucket_size));
103 }

+ Here is the caller graph for this function:

NEVER_INLINE DEVICE int64_t get_candidate_rows ( int32_t *  out_arr,
const uint32_t  max_arr_size,
const int8_t *  range_bytes,
const int32_t  range_component_index,
const double  bucket_size_x,
const double  bucket_size_y,
const int32_t  keys_count,
const int64_t  key_component_count,
int64_t *  hash_table_ptr,
const int64_t  entry_count,
const int64_t  offset_buffer_ptr_offset,
const int64_t  sub_buff_size 
)

Getting overlapping candidates for the overlaps join algorithm works as follows:

  1. Take the bounds of the Polygon and use the bucket sizes to split the bounding box into the hash keys it falls into.
  2. Iterate over the keys and look them up in the hash table.
  3. When looking up the values of each key, we use a series of offsets to get to the array of row ids.
  4. Since it is possible (likely) we encounter the same row id in several buckets, we need to ensure we remove the duplicates we encounter. A simple ordered insertion is used which ignores duplicate values. Since the N elements we insert can be considered relatively small (N < 200) this exhibits a good tradeoff to conserve space since we are constrained by the stack size on the GPU.

RETURNS: Unique Row IDs are placed on the fixed size stack array that is passed in as out_arr. The number of row ids in this array is returned.

Definition at line 287 of file JoinHashTableQueryRuntime.cpp.

References assert(), get_row_id_buffer_ptr(), insert_sorted(), and Bounds::min_X.

298  {
299  const auto range = reinterpret_cast<const double*>(range_bytes);
300 
301  size_t elem_count = 0;
302 
303  const auto bounds =
304  Bounds{.min_X = range[0], .min_Y = range[1], .max_X = range[2], .max_Y = range[3]};
305 
306  for (int64_t x = floor(bounds.min_X * bucket_size_x);
307  x <= floor(bounds.max_X * bucket_size_x);
308  x++) {
309  for (int64_t y = floor(bounds.min_Y * bucket_size_y);
310  y <= floor(bounds.max_Y * bucket_size_y);
311  y++) {
312  int64_t cur_key[2];
313  cur_key[0] = static_cast<int64_t>(x);
314  cur_key[1] = static_cast<int64_t>(y);
315 
316  const auto buffer_range = get_row_id_buffer_ptr(hash_table_ptr,
317  cur_key,
318  key_component_count,
319  entry_count,
320  offset_buffer_ptr_offset,
321  sub_buff_size);
322 
323  for (int64_t j = 0; j < buffer_range.element_count; j++) {
324  const auto rowid = buffer_range.buffer[j];
325  elem_count += insert_sorted(out_arr, elem_count, rowid);
326  assert(max_arr_size >= elem_count);
327  }
328  }
329  }
330 
331  return elem_count;
332 }
ALWAYS_INLINE DEVICE BufferRange get_row_id_buffer_ptr(int64_t *hash_table_ptr, const int64_t *key, const int64_t key_component_count, const int64_t entry_count, const int64_t offset_buffer_ptr_offset, const int64_t sub_buff_size)
NEVER_INLINE DEVICE int32_t insert_sorted(int32_t *arr, size_t elem_count, int32_t elem)
int64_t const int32_t sz assert(dest)

+ Here is the call graph for this function:

NEVER_INLINE DEVICE int64_t get_composite_key_index_32 ( const int32_t *  key,
const size_t  key_component_count,
const int32_t *  composite_key_dict,
const size_t  entry_count 
)

Definition at line 162 of file JoinHashTableQueryRuntime.cpp.

References get_composite_key_index_impl().

165  {
167  key, key_component_count, composite_key_dict, entry_count);
168 }
FORCE_INLINE DEVICE int64_t get_composite_key_index_impl(const T *key, const size_t key_component_count, const T *composite_key_dict, const size_t entry_count)

+ Here is the call graph for this function:

NEVER_INLINE DEVICE int64_t get_composite_key_index_64 ( const int64_t *  key,
const size_t  key_component_count,
const int64_t *  composite_key_dict,
const size_t  entry_count 
)

Definition at line 171 of file JoinHashTableQueryRuntime.cpp.

References get_composite_key_index_impl().

Referenced by get_row_id_buffer_ptr().

174  {
176  key, key_component_count, composite_key_dict, entry_count);
177 }
FORCE_INLINE DEVICE int64_t get_composite_key_index_impl(const T *key, const size_t key_component_count, const T *composite_key_dict, const size_t entry_count)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename T >
FORCE_INLINE DEVICE int64_t get_composite_key_index_impl ( const T *  key,
const size_t  key_component_count,
const T *  composite_key_dict,
const size_t  entry_count 
)

Definition at line 138 of file JoinHashTableQueryRuntime.cpp.

References get_invalid_key(), keys_are_equal(), MurmurHash1(), SUFFIX, and omnisci.dtypes::T.

Referenced by get_composite_key_index_32(), and get_composite_key_index_64().

141  {
142  const uint32_t h = MurmurHash1(key, key_component_count * sizeof(T), 0) % entry_count;
143  uint32_t off = h * key_component_count;
144  if (keys_are_equal(&composite_key_dict[off], key, key_component_count)) {
145  return h;
146  }
147  uint32_t h_probe = (h + 1) % entry_count;
148  while (h_probe != h) {
149  off = h_probe * key_component_count;
150  if (keys_are_equal(&composite_key_dict[off], key, key_component_count)) {
151  return h_probe;
152  }
153  if (composite_key_dict[off] == SUFFIX(get_invalid_key) < T > ()) {
154  return -1;
155  }
156  h_probe = (h_probe + 1) % entry_count;
157  }
158  return -1;
159 }
NEVER_INLINE DEVICE uint32_t MurmurHash1(const void *key, int len, const uint32_t seed)
Definition: MurmurHash.cpp:20
bool keys_are_equal(const T *key1, const T *key2, const size_t key_component_count)
#define SUFFIX(name)
DEVICE T SUFFIX() get_invalid_key()

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<class T >
DEVICE int64_t get_matching_slot ( const int8_t *  hash_buff,
const uint32_t  h,
const int8_t *  key,
const size_t  key_bytes 
)

Definition at line 44 of file JoinHashTableQueryRuntime.cpp.

References compare_to_key(), get_invalid_key(), anonymous_namespace{JoinHashTableQueryRuntime.cpp}::kNoMatch, anonymous_namespace{JoinHashTableQueryRuntime.cpp}::kNotPresent, SUFFIX, and omnisci.dtypes::T.

47  {
48  const auto lookup_result_ptr = hash_buff + h * (key_bytes + sizeof(T));
49  if (compare_to_key(lookup_result_ptr, key, key_bytes)) {
50  return *reinterpret_cast<const T*>(lookup_result_ptr + key_bytes);
51  }
52  if (*reinterpret_cast<const T*>(lookup_result_ptr) ==
53  SUFFIX(get_invalid_key) < T > ()) {
54  return kNotPresent;
55  }
56  return kNoMatch;
57 }
DEVICE bool compare_to_key(const int8_t *entry, const int8_t *key, const size_t key_bytes)
#define SUFFIX(name)
DEVICE T SUFFIX() get_invalid_key()

+ Here is the call graph for this function:

NEVER_INLINE DEVICE int32_t get_num_buckets_for_bounds ( const int8_t *  range_bytes,
const int32_t  range_component_index,
const double  bucket_size_x,
const double  bucket_size_y 
)

Definition at line 338 of file JoinHashTableQueryRuntime.cpp.

341  {
342  const auto range = reinterpret_cast<const double*>(range_bytes);
343 
344  const auto bounds_min_x = range[0];
345  const auto bounds_min_y = range[1];
346  const auto bounds_max_x = range[2];
347  const auto bounds_max_y = range[3];
348 
349  const auto num_x =
350  floor(bounds_max_x * bucket_size_x) - floor(bounds_min_x * bucket_size_x);
351  const auto num_y =
352  floor(bounds_max_y * bucket_size_y) - floor(bounds_min_y * bucket_size_y);
353  const auto num_buckets = (num_x + 1) * (num_y + 1);
354 
355  return num_buckets;
356 }
ALWAYS_INLINE DEVICE BufferRange get_row_id_buffer_ptr ( int64_t *  hash_table_ptr,
const int64_t *  key,
const int64_t  key_component_count,
const int64_t  entry_count,
const int64_t  offset_buffer_ptr_offset,
const int64_t  sub_buff_size 
)

Definition at line 216 of file JoinHashTableQueryRuntime.cpp.

References BufferRange::buffer, get_composite_key_index_64(), and overlaps_hash_join_idx().

Referenced by get_candidate_rows().

221  {
222  const int64_t min_key = 0;
223  const int64_t max_key = entry_count - 1;
224 
225  auto key_idx =
226  get_composite_key_index_64(key, key_component_count, hash_table_ptr, entry_count);
227 
228  if (key_idx < -1) {
229  return BufferRange{.buffer = nullptr, .element_count = 0};
230  }
231 
232  int8_t* one_to_many_ptr = reinterpret_cast<int8_t*>(hash_table_ptr);
233  one_to_many_ptr += offset_buffer_ptr_offset;
234 
235  // Returns an index used to fetch row count and row ids.
236  const auto slot = overlaps_hash_join_idx(
237  reinterpret_cast<int64_t>(one_to_many_ptr), key_idx, min_key, max_key);
238  if (slot < 0) {
239  return BufferRange{.buffer = nullptr, .element_count = 0};
240  }
241 
242  // Offset into the row count section of buffer
243  int8_t* count_ptr = one_to_many_ptr + sub_buff_size;
244 
245  const int64_t matched_row_count = overlaps_hash_join_idx(
246  reinterpret_cast<int64_t>(count_ptr), key_idx, min_key, max_key);
247 
248  // Offset into payload section, containing an array of row ids
249  int32_t* rowid_buffer = (int32_t*)(one_to_many_ptr + 2 * sub_buff_size);
250  const auto rowidoff_ptr = &rowid_buffer[slot];
251 
252  return BufferRange{.buffer = rowidoff_ptr, .element_count = matched_row_count};
253 }
ALWAYS_INLINE DEVICE int64_t overlaps_hash_join_idx(int64_t hash_buff, const int64_t key, const int64_t min_key, const int64_t max_key)
NEVER_INLINE DEVICE int64_t get_composite_key_index_64(const int64_t *key, const size_t key_component_count, const int64_t *composite_key_dict, const size_t entry_count)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

NEVER_INLINE DEVICE int32_t insert_sorted ( int32_t *  arr,
size_t  elem_count,
int32_t  elem 
)

Definition at line 179 of file JoinHashTableQueryRuntime.cpp.

Referenced by get_candidate_rows().

181  {
182  for (size_t i = 0; i < elem_count; i++) {
183  if (elem == arr[i])
184  return 0;
185 
186  if (elem > arr[i])
187  continue;
188 
189  for (size_t j = elem_count; i < j; j--) {
190  arr[j] = arr[j - 1];
191  }
192  arr[i] = elem;
193  return 1;
194  }
195 
196  arr[elem_count] = elem;
197  return 1;
198 }

+ Here is the caller graph for this function:

ALWAYS_INLINE DEVICE int64_t overlaps_hash_join_idx ( int64_t  hash_buff,
const int64_t  key,
const int64_t  min_key,
const int64_t  max_key 
)

Definition at line 200 of file JoinHashTableQueryRuntime.cpp.

Referenced by get_row_id_buffer_ptr().

203  {
204  if (key >= min_key && key <= max_key) {
205  return *(reinterpret_cast<int32_t*>(hash_buff) + (key - min_key));
206  }
207  return -1;
208 }

+ Here is the caller graph for this function: