OmniSciDB  a7179b2938
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
BaselineHashTableBuilder.h
Go to the documentation of this file.
1 /*
2  * Copyright 2020 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
24 #include "Shared/thread_count.h"
25 
26 template <typename SIZE,
27  class KEY_HANDLER,
29 int fill_baseline_hash_join_buff(int8_t* hash_buff,
30  const size_t entry_count,
31  const int32_t invalid_slot_val,
32  const size_t key_component_count,
33  const bool with_val_slot,
34  const KEY_HANDLER* key_handler,
35  const size_t num_elems,
36  const int32_t cpu_thread_idx,
37  const int32_t cpu_thread_count) {
38  if constexpr (std::is_same<KEY_HANDLER, GenericKeyHandler>::value) {
39  return fill_baseline_hash_join_buff_32(hash_buff,
40  entry_count,
41  invalid_slot_val,
42  key_component_count,
43  with_val_slot,
44  key_handler,
45  num_elems,
46  cpu_thread_idx,
47  cpu_thread_count);
48  } else {
49  static_assert(std::is_same<KEY_HANDLER, OverlapsKeyHandler>::value,
50  "Only Generic or Overlaps Key Handlers are supported.");
52  entry_count,
53  invalid_slot_val,
54  key_component_count,
55  with_val_slot,
56  key_handler,
57  num_elems,
58  cpu_thread_idx,
59  cpu_thread_count);
60  }
61 }
62 
63 template <typename SIZE,
64  class KEY_HANDLER,
66 int fill_baseline_hash_join_buff(int8_t* hash_buff,
67  const size_t entry_count,
68  const int32_t invalid_slot_val,
69  const size_t key_component_count,
70  const bool with_val_slot,
71  const KEY_HANDLER* key_handler,
72  const size_t num_elems,
73  const int32_t cpu_thread_idx,
74  const int32_t cpu_thread_count) {
75  if constexpr (std::is_same<KEY_HANDLER, GenericKeyHandler>::value) {
76  return fill_baseline_hash_join_buff_64(hash_buff,
77  entry_count,
78  invalid_slot_val,
79  key_component_count,
80  with_val_slot,
81  key_handler,
82  num_elems,
83  cpu_thread_idx,
84  cpu_thread_count);
85  } else {
86  static_assert(std::is_same<KEY_HANDLER, OverlapsKeyHandler>::value,
87  "Only Generic or Overlaps Key Handlers are supported.");
89  entry_count,
90  invalid_slot_val,
91  key_component_count,
92  with_val_slot,
93  key_handler,
94  num_elems,
95  cpu_thread_idx,
96  cpu_thread_count);
97  }
98 }
99 
100 template <typename SIZE,
101  class KEY_HANDLER,
104  const size_t entry_count,
105  const int32_t invalid_slot_val,
106  const size_t key_component_count,
107  const bool with_val_slot,
108  int* dev_err_buff,
109  const KEY_HANDLER* key_handler,
110  const size_t num_elems,
111  const size_t block_size_x,
112  const size_t grid_size_x) {
113  if constexpr (std::is_same<KEY_HANDLER, GenericKeyHandler>::value) {
115  entry_count,
116  invalid_slot_val,
117  key_component_count,
118  with_val_slot,
119  dev_err_buff,
120  key_handler,
121  num_elems,
122  block_size_x,
123  grid_size_x);
124  } else {
125  static_assert(std::is_same<KEY_HANDLER, OverlapsKeyHandler>::value,
126  "Only Generic or Overlaps Key Handlers are supported.");
127  LOG(FATAL) << "32-bit keys not yet supported for overlaps join.";
128  }
129 }
130 
131 template <typename SIZE,
132  class KEY_HANDLER,
134 void fill_baseline_hash_join_buff_on_device(int8_t* hash_buff,
135  const size_t entry_count,
136  const int32_t invalid_slot_val,
137  const size_t key_component_count,
138  const bool with_val_slot,
139  int* dev_err_buff,
140  const KEY_HANDLER* key_handler,
141  const size_t num_elems,
142  const size_t block_size_x,
143  const size_t grid_size_x) {
144  if constexpr (std::is_same<KEY_HANDLER, GenericKeyHandler>::value) {
146  entry_count,
147  invalid_slot_val,
148  key_component_count,
149  with_val_slot,
150  dev_err_buff,
151  key_handler,
152  num_elems,
153  block_size_x,
154  grid_size_x);
155  } else {
156  static_assert(std::is_same<KEY_HANDLER, OverlapsKeyHandler>::value,
157  "Only Generic or Overlaps Key Handlers are supported.");
159  entry_count,
160  invalid_slot_val,
161  key_component_count,
162  with_val_slot,
163  dev_err_buff,
164  key_handler,
165  num_elems,
166  block_size_x,
167  grid_size_x);
168  }
169 }
170 
171 template <typename SIZE,
172  class KEY_HANDLER,
175  const SIZE* composite_key_dict,
176  const size_t hash_entry_count,
177  const int32_t invalid_slot_val,
178  const size_t key_component_count,
179  const KEY_HANDLER* key_handler,
180  const size_t num_elems,
181  const size_t block_size_x,
182  const size_t grid_size_x) {
183  if constexpr (std::is_same<KEY_HANDLER, GenericKeyHandler>::value) {
185  composite_key_dict,
186  hash_entry_count,
187  invalid_slot_val,
188  key_component_count,
189  key_handler,
190  num_elems,
191  block_size_x,
192  grid_size_x);
193  } else {
194  static_assert(std::is_same<KEY_HANDLER, OverlapsKeyHandler>::value,
195  "Only Generic or Overlaps Key Handlers are supported.");
196  LOG(FATAL) << "32-bit keys not yet supported for overlaps join.";
197  }
198 }
199 
200 template <typename SIZE,
201  class KEY_HANDLER,
204  const SIZE* composite_key_dict,
205  const size_t hash_entry_count,
206  const int32_t invalid_slot_val,
207  const size_t key_component_count,
208  const KEY_HANDLER* key_handler,
209  const size_t num_elems,
210  const size_t block_size_x,
211  const size_t grid_size_x) {
212  if constexpr (std::is_same<KEY_HANDLER, GenericKeyHandler>::value) {
214  composite_key_dict,
215  hash_entry_count,
216  invalid_slot_val,
217  key_handler,
218  num_elems,
219  block_size_x,
220  grid_size_x);
221  } else {
222  static_assert(std::is_same<KEY_HANDLER, OverlapsKeyHandler>::value,
223  "Only Generic or Overlaps Key Handlers are supported.");
225  composite_key_dict,
226  hash_entry_count,
227  invalid_slot_val,
228  key_handler,
229  num_elems,
230  block_size_x,
231  grid_size_x);
232  }
233 }
234 
236  public:
238  : catalog_(catalog) {}
239 
240  template <class KEY_HANDLER>
241  int initHashTableOnCpu(KEY_HANDLER* key_handler,
242  const CompositeKeyInfo& composite_key_info,
243  const std::vector<JoinColumn>& join_columns,
244  const std::vector<JoinColumnTypeInfo>& join_column_types,
245  const std::vector<JoinBucketInfo>& join_bucket_info,
246  const size_t keyspace_entry_count,
247  const size_t keys_for_all_rows,
249  const size_t key_component_width,
250  const size_t key_component_count) {
251  auto timer = DEBUG_TIMER(__func__);
252  const auto entry_size =
253  (key_component_count +
254  (layout == JoinHashTableInterface::HashType::OneToOne ? 1 : 0)) *
255  key_component_width;
256  const size_t one_to_many_hash_entries =
258  ? 2 * keyspace_entry_count + keys_for_all_rows
259  : 0;
260  const size_t hash_table_size =
261  entry_size * keyspace_entry_count + one_to_many_hash_entries * sizeof(int32_t);
262 
263  // We can't allocate more than 2GB contiguous memory on GPU and each entry is 4 bytes.
264  if (hash_table_size > static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
265  throw TooManyHashEntries(
266  "Hash tables for GPU requiring larger than 2GB contigious memory not supported "
267  "yet");
268  }
269 
270  VLOG(1) << "Initializing CPU Join Hash Table with " << keyspace_entry_count
271  << " hash entries and " << one_to_many_hash_entries
272  << " entries in the one to many buffer";
273  VLOG(1) << "Total hash table size: " << hash_table_size << " Bytes";
274 
275  hash_table_ = std::make_unique<BaselineHashTable>(
276  catalog_, layout, keyspace_entry_count, keys_for_all_rows, hash_table_size);
277  auto cpu_hash_table_ptr = hash_table_->getCpuBuffer();
278  int thread_count = cpu_threads();
279  std::vector<std::future<void>> init_cpu_buff_threads;
280  for (int thread_idx = 0; thread_idx < thread_count; ++thread_idx) {
281  init_cpu_buff_threads.emplace_back(
282  std::async(std::launch::async,
283  [keyspace_entry_count,
284  key_component_count,
285  key_component_width,
286  thread_idx,
287  thread_count,
288  cpu_hash_table_ptr,
289  layout] {
290  switch (key_component_width) {
291  case 4:
293  cpu_hash_table_ptr,
294  keyspace_entry_count,
295  key_component_count,
297  -1,
298  thread_idx,
299  thread_count);
300  break;
301  case 8:
303  cpu_hash_table_ptr,
304  keyspace_entry_count,
305  key_component_count,
307  -1,
308  thread_idx,
309  thread_count);
310  break;
311  default:
312  CHECK(false);
313  }
314  }));
315  }
316  for (auto& child : init_cpu_buff_threads) {
317  child.get();
318  }
319  std::vector<std::future<int>> fill_cpu_buff_threads;
320  for (int thread_idx = 0; thread_idx < thread_count; ++thread_idx) {
321  fill_cpu_buff_threads.emplace_back(
322  std::async(std::launch::async,
323  [key_handler,
324  keyspace_entry_count,
325  &join_columns,
326  key_component_count,
327  key_component_width,
328  layout,
329  thread_idx,
330  cpu_hash_table_ptr,
331  thread_count] {
332  switch (key_component_width) {
333  case 4: {
334  return fill_baseline_hash_join_buff<int32_t>(
335  cpu_hash_table_ptr,
336  keyspace_entry_count,
337  -1,
338  key_component_count,
340  key_handler,
341  join_columns[0].num_elems,
342  thread_idx,
343  thread_count);
344  break;
345  }
346  case 8: {
347  return fill_baseline_hash_join_buff<int64_t>(
348  cpu_hash_table_ptr,
349  keyspace_entry_count,
350  -1,
351  key_component_count,
353  key_handler,
354  join_columns[0].num_elems,
355  thread_idx,
356  thread_count);
357  break;
358  }
359  default:
360  CHECK(false);
361  }
362  return -1;
363  }));
364  }
365  int err = 0;
366  for (auto& child : fill_cpu_buff_threads) {
367  int partial_err = child.get();
368  if (partial_err) {
369  err = partial_err;
370  }
371  }
372  if (err) {
373  return err;
374  }
376  auto one_to_many_buff = reinterpret_cast<int32_t*>(
377  cpu_hash_table_ptr + keyspace_entry_count * entry_size);
378  init_hash_join_buff(one_to_many_buff, keyspace_entry_count, -1, 0, 1);
379  switch (key_component_width) {
380  case 4: {
381  const auto composite_key_dict = reinterpret_cast<int32_t*>(cpu_hash_table_ptr);
383  one_to_many_buff,
384  composite_key_dict,
385  keyspace_entry_count,
386  -1,
387  key_component_count,
388  join_columns,
389  join_column_types,
390  join_bucket_info,
391  composite_key_info.sd_inner_proxy_per_key,
392  composite_key_info.sd_outer_proxy_per_key,
393  thread_count);
394  break;
395  }
396  case 8: {
397  const auto composite_key_dict = reinterpret_cast<int64_t*>(cpu_hash_table_ptr);
399  one_to_many_buff,
400  composite_key_dict,
401  keyspace_entry_count,
402  -1,
403  key_component_count,
404  join_columns,
405  join_column_types,
406  join_bucket_info,
407  composite_key_info.sd_inner_proxy_per_key,
408  composite_key_info.sd_outer_proxy_per_key,
409  thread_count);
410  break;
411  }
412  default:
413  CHECK(false);
414  }
415  }
416  return err;
417  }
418 
420  const size_t key_component_width,
421  const size_t key_component_count,
422  const size_t keyspace_entry_count,
423  const size_t emitted_keys_count,
424  const int device_id) {
425 #ifdef HAVE_CUDA
426  const auto entry_size =
427  (key_component_count +
428  (layout == JoinHashTableInterface::HashType::OneToOne ? 1 : 0)) *
429  key_component_width;
430  const size_t one_to_many_hash_entries =
432  ? 2 * keyspace_entry_count + emitted_keys_count
433  : 0;
434  const size_t hash_table_size =
435  entry_size * keyspace_entry_count + one_to_many_hash_entries * sizeof(int32_t);
436 
437  // We can't allocate more than 2GB contiguous memory on GPU and each entry is 4 bytes.
438  if (hash_table_size > static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
439  throw TooManyHashEntries(
440  "Hash tables for GPU requiring larger than 2GB contigious memory not supported "
441  "yet");
442  }
443 
444  VLOG(1) << "Initializing GPU Hash Table for device " << device_id << " with "
445  << keyspace_entry_count << " hash entries and " << one_to_many_hash_entries
446  << " entries in the one to many buffer";
447  VLOG(1) << "Total hash table size: " << hash_table_size << " Bytes";
448 
449  hash_table_ = std::make_unique<BaselineHashTable>(catalog_,
450  layout,
451  keyspace_entry_count,
452  emitted_keys_count,
453  hash_table_size,
454  device_id);
455 #else
456  UNREACHABLE();
457 #endif
458  }
459 
460  template <class KEY_HANDLER>
461  int initHashTableOnGpu(KEY_HANDLER* key_handler,
462  const std::vector<JoinColumn>& join_columns,
464  const size_t key_component_width,
465  const size_t key_component_count,
466  const size_t keyspace_entry_count,
467  const size_t emitted_keys_count,
468  const int device_id,
469  const unsigned block_size,
470  const unsigned grid_size) {
471  auto timer = DEBUG_TIMER(__func__);
472  int err = 0;
473 #ifdef HAVE_CUDA
474  allocateDeviceMemory(layout,
475  key_component_width,
476  key_component_count,
477  keyspace_entry_count,
478  emitted_keys_count,
479  device_id);
480  auto& data_mgr = catalog_->getDataMgr();
481  CudaAllocator allocator(&data_mgr, device_id);
482  auto dev_err_buff = reinterpret_cast<CUdeviceptr>(allocator.alloc(sizeof(int)));
483  copy_to_gpu(&data_mgr, dev_err_buff, &err, sizeof(err), device_id);
484  auto gpu_hash_table_buff = hash_table_->getGpuBuffer();
485  CHECK(gpu_hash_table_buff);
486  auto hash_buff = gpu_hash_table_buff->getMemoryPtr();
487 
488  switch (key_component_width) {
489  case 4:
491  hash_buff,
492  keyspace_entry_count,
493  key_component_count,
495  -1,
496  block_size,
497  grid_size);
498  break;
499  case 8:
501  hash_buff,
502  keyspace_entry_count,
503  key_component_count,
505  -1,
506  block_size,
507  grid_size);
508  break;
509  default:
510  UNREACHABLE();
511  }
512  const auto key_handler_gpu = transfer_flat_object_to_gpu(*key_handler, allocator);
513  switch (key_component_width) {
514  case 4: {
515  fill_baseline_hash_join_buff_on_device<int32_t>(
516  hash_buff,
517  keyspace_entry_count,
518  -1,
519  key_component_count,
521  reinterpret_cast<int*>(dev_err_buff),
522  key_handler_gpu,
523  join_columns.front().num_elems,
524  block_size,
525  grid_size);
526  copy_from_gpu(&data_mgr, &err, dev_err_buff, sizeof(err), device_id);
527  break;
528  }
529  case 8: {
530  fill_baseline_hash_join_buff_on_device<int64_t>(
531  hash_buff,
532  keyspace_entry_count,
533  -1,
534  key_component_count,
536  reinterpret_cast<int*>(dev_err_buff),
537  key_handler_gpu,
538  join_columns.front().num_elems,
539  block_size,
540  grid_size);
541  copy_from_gpu(&data_mgr, &err, dev_err_buff, sizeof(err), device_id);
542  break;
543  }
544  default:
545  UNREACHABLE();
546  }
547  if (err) {
548  return err;
549  }
551  const auto entry_size = key_component_count * key_component_width;
552  auto one_to_many_buff =
553  reinterpret_cast<int32_t*>(hash_buff + keyspace_entry_count * entry_size);
555  one_to_many_buff, keyspace_entry_count, -1, block_size, grid_size);
556  switch (key_component_width) {
557  case 4: {
558  const auto composite_key_dict = reinterpret_cast<int32_t*>(hash_buff);
559  fill_one_to_many_baseline_hash_table_on_device<int32_t>(
560  one_to_many_buff,
561  composite_key_dict,
562  keyspace_entry_count,
563  -1,
564  key_component_count,
565  key_handler_gpu,
566  join_columns.front().num_elems,
567  block_size,
568  grid_size);
569 
570  break;
571  }
572  case 8: {
573  const auto composite_key_dict = reinterpret_cast<int64_t*>(hash_buff);
574  fill_one_to_many_baseline_hash_table_on_device<int64_t>(
575  one_to_many_buff,
576  composite_key_dict,
577  keyspace_entry_count,
578  -1,
579  key_component_count,
580  key_handler_gpu,
581  join_columns.front().num_elems,
582  block_size,
583  grid_size);
584 
585  break;
586  }
587  default:
588  UNREACHABLE();
589  }
590  }
591 #else
592  UNREACHABLE();
593 #endif
594  return err;
595  }
596 
597  std::unique_ptr<BaselineHashTable> getHashTable() { return std::move(hash_table_); }
598 
599  private:
601 
602  std::unique_ptr<BaselineHashTable> hash_table_;
603 };
int initHashTableOnCpu(KEY_HANDLER *key_handler, const CompositeKeyInfo &composite_key_info, const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_bucket_info, const size_t keyspace_entry_count, const size_t keys_for_all_rows, const JoinHashTableInterface::HashType layout, const size_t key_component_width, const size_t key_component_count)
void fill_one_to_many_baseline_hash_table_on_device_32(int32_t *buff, const int32_t *composite_key_dict, const int64_t hash_entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const GenericKeyHandler *key_handler, const int64_t num_elems, const size_t block_size_x, const size_t grid_size_x)
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:97
void init_baseline_hash_join_buff_on_device_64(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val, const size_t block_size_x, const size_t grid_size_x)
void init_baseline_hash_join_buff_32(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
void fill_one_to_many_baseline_hash_table_64(int32_t *buff, const int64_t *composite_key_dict, const int64_t hash_entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_bucket_info, const std::vector< const void * > &sd_inner_proxy_per_key, const std::vector< const void * > &sd_outer_proxy_per_key, const int32_t cpu_thread_count)
void fill_baseline_hash_join_buff_on_device_64(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const GenericKeyHandler *key_handler, const int64_t num_elems, const size_t block_size_x, const size_t grid_size_x)
std::vector< const void * > sd_inner_proxy_per_key
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:209
BaselineJoinHashTableBuilder(const Catalog_Namespace::Catalog *catalog)
const Catalog_Namespace::Catalog * catalog_
#define LOG(tag)
Definition: Logger.h:188
unsigned long long CUdeviceptr
Definition: nocuda.h:27
#define UNREACHABLE()
Definition: Logger.h:241
void fill_one_to_many_baseline_hash_table_on_device(int32_t *buff, const SIZE *composite_key_dict, const size_t hash_entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const KEY_HANDLER *key_handler, const size_t num_elems, const size_t block_size_x, const size_t grid_size_x)
void init_baseline_hash_join_buff_64(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
std::unique_ptr< BaselineHashTable > hash_table_
int fill_baseline_hash_join_buff_64(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const GenericKeyHandler *key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
void copy_to_gpu(Data_Namespace::DataMgr *data_mgr, CUdeviceptr dst, const void *src, const size_t num_bytes, const int device_id)
Definition: GpuMemUtils.cpp:30
int initHashTableOnGpu(KEY_HANDLER *key_handler, const std::vector< JoinColumn > &join_columns, const JoinHashTableInterface::HashType layout, const size_t key_component_width, const size_t key_component_count, const size_t keyspace_entry_count, const size_t emitted_keys_count, const int device_id, const unsigned block_size, const unsigned grid_size)
std::vector< const void * > sd_outer_proxy_per_key
void copy_from_gpu(Data_Namespace::DataMgr *data_mgr, void *dst, const CUdeviceptr src, const size_t num_bytes, const int device_id)
T * transfer_flat_object_to_gpu(const T &object, CudaAllocator &allocator)
static bool layoutRequiresAdditionalBuffers(JoinHashTableInterface::HashType layout) noexcept
void fill_baseline_hash_join_buff_on_device_32(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const GenericKeyHandler *key_handler, const int64_t num_elems, const size_t block_size_x, const size_t grid_size_x)
void fill_baseline_hash_join_buff_on_device(int8_t *hash_buff, const size_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const KEY_HANDLER *key_handler, const size_t num_elems, const size_t block_size_x, const size_t grid_size_x)
void overlaps_fill_one_to_many_baseline_hash_table_on_device_64(int32_t *buff, const int64_t *composite_key_dict, const int64_t hash_entry_count, const int32_t invalid_slot_val, const OverlapsKeyHandler *key_handler, const int64_t num_elems, const size_t block_size_x, const size_t grid_size_x)
void fill_one_to_many_baseline_hash_table_32(int32_t *buff, const int32_t *composite_key_dict, const int64_t hash_entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_bucket_info, const std::vector< const void * > &sd_inner_proxy_per_key, const std::vector< const void * > &sd_outer_proxy_per_key, const int32_t cpu_thread_count)
int8_t * alloc(const size_t num_bytes) override
std::unique_ptr< BaselineHashTable > getHashTable()
int overlaps_fill_baseline_hash_join_buff_64(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const OverlapsKeyHandler *key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
DEVICE void SUFFIX() init_hash_join_buff(int32_t *groups_buffer, const int64_t hash_entry_count, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
int overlaps_fill_baseline_hash_join_buff_32(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const OverlapsKeyHandler *key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
void init_baseline_hash_join_buff_on_device_32(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val, const size_t block_size_x, const size_t grid_size_x)
#define CHECK(condition)
Definition: Logger.h:197
#define DEBUG_TIMER(name)
Definition: Logger.h:313
void init_hash_join_buff_on_device(int32_t *buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t block_size_x, const size_t grid_size_x)
void allocateDeviceMemory(const JoinHashTableInterface::HashType layout, const size_t key_component_width, const size_t key_component_count, const size_t keyspace_entry_count, const size_t emitted_keys_count, const int device_id)
Allocate GPU memory using GpuBuffers via DataMgr.
int cpu_threads()
Definition: thread_count.h:24
int fill_baseline_hash_join_buff(int8_t *hash_buff, const size_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const KEY_HANDLER *key_handler, const size_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
#define VLOG(n)
Definition: Logger.h:291
int fill_baseline_hash_join_buff_32(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const GenericKeyHandler *key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
void overlaps_fill_baseline_hash_join_buff_on_device_64(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const OverlapsKeyHandler *key_handler, const int64_t num_elems, const size_t block_size_x, const size_t grid_size_x)
void fill_one_to_many_baseline_hash_table_on_device_64(int32_t *buff, const int64_t *composite_key_dict, const int64_t hash_entry_count, const int32_t invalid_slot_val, const GenericKeyHandler *key_handler, const int64_t num_elems, const size_t block_size_x, const size_t grid_size_x)