OmniSciDB  fe05a0c208
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
BaselineHashTableBuilder.h
Go to the documentation of this file.
1 /*
2  * Copyright 2020 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
24 #include "Shared/thread_count.h"
25 
26 template <typename SIZE,
27  class KEY_HANDLER,
29 int fill_baseline_hash_join_buff(int8_t* hash_buff,
30  const size_t entry_count,
31  const int32_t invalid_slot_val,
32  const size_t key_component_count,
33  const bool with_val_slot,
34  const KEY_HANDLER* key_handler,
35  const size_t num_elems,
36  const int32_t cpu_thread_idx,
37  const int32_t cpu_thread_count) {
38  if constexpr (std::is_same<KEY_HANDLER, GenericKeyHandler>::value) {
39  return fill_baseline_hash_join_buff_32(hash_buff,
40  entry_count,
41  invalid_slot_val,
42  key_component_count,
43  with_val_slot,
44  key_handler,
45  num_elems,
46  cpu_thread_idx,
47  cpu_thread_count);
48  } else {
49  static_assert(std::is_same<KEY_HANDLER, OverlapsKeyHandler>::value,
50  "Only Generic or Overlaps Key Handlers are supported.");
52  entry_count,
53  invalid_slot_val,
54  key_component_count,
55  with_val_slot,
56  key_handler,
57  num_elems,
58  cpu_thread_idx,
59  cpu_thread_count);
60  }
61 }
62 
63 template <typename SIZE,
64  class KEY_HANDLER,
66 int fill_baseline_hash_join_buff(int8_t* hash_buff,
67  const size_t entry_count,
68  const int32_t invalid_slot_val,
69  const size_t key_component_count,
70  const bool with_val_slot,
71  const KEY_HANDLER* key_handler,
72  const size_t num_elems,
73  const int32_t cpu_thread_idx,
74  const int32_t cpu_thread_count) {
75  if constexpr (std::is_same<KEY_HANDLER, GenericKeyHandler>::value) {
76  return fill_baseline_hash_join_buff_64(hash_buff,
77  entry_count,
78  invalid_slot_val,
79  key_component_count,
80  with_val_slot,
81  key_handler,
82  num_elems,
83  cpu_thread_idx,
84  cpu_thread_count);
85  } else {
86  static_assert(std::is_same<KEY_HANDLER, OverlapsKeyHandler>::value,
87  "Only Generic or Overlaps Key Handlers are supported.");
89  entry_count,
90  invalid_slot_val,
91  key_component_count,
92  with_val_slot,
93  key_handler,
94  num_elems,
95  cpu_thread_idx,
96  cpu_thread_count);
97  }
98 }
99 
100 template <typename SIZE,
101  class KEY_HANDLER,
104  const size_t entry_count,
105  const int32_t invalid_slot_val,
106  const size_t key_component_count,
107  const bool with_val_slot,
108  int* dev_err_buff,
109  const KEY_HANDLER* key_handler,
110  const size_t num_elems) {
111  if constexpr (std::is_same<KEY_HANDLER, GenericKeyHandler>::value) {
113  entry_count,
114  invalid_slot_val,
115  key_component_count,
116  with_val_slot,
117  dev_err_buff,
118  key_handler,
119  num_elems);
120  } else {
121  static_assert(std::is_same<KEY_HANDLER, OverlapsKeyHandler>::value,
122  "Only Generic or Overlaps Key Handlers are supported.");
123  LOG(FATAL) << "32-bit keys not yet supported for overlaps join.";
124  }
125 }
126 
127 template <typename SIZE,
128  class KEY_HANDLER,
130 void fill_baseline_hash_join_buff_on_device(int8_t* hash_buff,
131  const size_t entry_count,
132  const int32_t invalid_slot_val,
133  const size_t key_component_count,
134  const bool with_val_slot,
135  int* dev_err_buff,
136  const KEY_HANDLER* key_handler,
137  const size_t num_elems) {
138  if constexpr (std::is_same<KEY_HANDLER, GenericKeyHandler>::value) {
140  entry_count,
141  invalid_slot_val,
142  key_component_count,
143  with_val_slot,
144  dev_err_buff,
145  key_handler,
146  num_elems);
147  } else {
148  static_assert(std::is_same<KEY_HANDLER, OverlapsKeyHandler>::value,
149  "Only Generic or Overlaps Key Handlers are supported.");
151  entry_count,
152  invalid_slot_val,
153  key_component_count,
154  with_val_slot,
155  dev_err_buff,
156  key_handler,
157  num_elems);
158  }
159 }
160 
161 template <typename SIZE,
162  class KEY_HANDLER,
165  const SIZE* composite_key_dict,
166  const size_t hash_entry_count,
167  const int32_t invalid_slot_val,
168  const size_t key_component_count,
169  const KEY_HANDLER* key_handler,
170  const size_t num_elems) {
171  if constexpr (std::is_same<KEY_HANDLER, GenericKeyHandler>::value) {
173  composite_key_dict,
174  hash_entry_count,
175  invalid_slot_val,
176  key_component_count,
177  key_handler,
178  num_elems);
179  } else {
180  static_assert(std::is_same<KEY_HANDLER, OverlapsKeyHandler>::value,
181  "Only Generic or Overlaps Key Handlers are supported.");
182  LOG(FATAL) << "32-bit keys not yet supported for overlaps join.";
183  }
184 }
185 
186 template <typename SIZE,
187  class KEY_HANDLER,
190  const SIZE* composite_key_dict,
191  const size_t hash_entry_count,
192  const int32_t invalid_slot_val,
193  const size_t key_component_count,
194  const KEY_HANDLER* key_handler,
195  const size_t num_elems) {
196  if constexpr (std::is_same<KEY_HANDLER, GenericKeyHandler>::value) {
198  composite_key_dict,
199  hash_entry_count,
200  invalid_slot_val,
201  key_handler,
202  num_elems);
203  } else {
204  static_assert(std::is_same<KEY_HANDLER, OverlapsKeyHandler>::value,
205  "Only Generic or Overlaps Key Handlers are supported.");
207  composite_key_dict,
208  hash_entry_count,
209  invalid_slot_val,
210  key_handler,
211  num_elems);
212  }
213 }
214 
216  public:
218  : catalog_(catalog) {}
219 
220  template <class KEY_HANDLER>
221  int initHashTableOnCpu(KEY_HANDLER* key_handler,
222  const CompositeKeyInfo& composite_key_info,
223  const std::vector<JoinColumn>& join_columns,
224  const std::vector<JoinColumnTypeInfo>& join_column_types,
225  const std::vector<JoinBucketInfo>& join_bucket_info,
226  const size_t keyspace_entry_count,
227  const size_t keys_for_all_rows,
228  const HashType layout,
229  const size_t key_component_width,
230  const size_t key_component_count) {
231  auto timer = DEBUG_TIMER(__func__);
232  const auto entry_size =
233  (key_component_count + (layout == HashType::OneToOne ? 1 : 0)) *
234  key_component_width;
235  const size_t one_to_many_hash_entries =
237  ? 2 * keyspace_entry_count + keys_for_all_rows
238  : 0;
239  const size_t hash_table_size =
240  entry_size * keyspace_entry_count + one_to_many_hash_entries * sizeof(int32_t);
241 
242  // We can't allocate more than 2GB contiguous memory on GPU and each entry is 4 bytes.
243  if (hash_table_size > static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
244  throw TooManyHashEntries(
245  "Hash tables for GPU requiring larger than 2GB contigious memory not supported "
246  "yet");
247  }
248 
249  VLOG(1) << "Initializing CPU Join Hash Table with " << keyspace_entry_count
250  << " hash entries and " << one_to_many_hash_entries
251  << " entries in the one to many buffer";
252  VLOG(1) << "Total hash table size: " << hash_table_size << " Bytes";
253 
254  hash_table_ = std::make_unique<BaselineHashTable>(
255  catalog_, layout, keyspace_entry_count, keys_for_all_rows, hash_table_size);
256  auto cpu_hash_table_ptr = hash_table_->getCpuBuffer();
257  int thread_count = cpu_threads();
258  std::vector<std::future<void>> init_cpu_buff_threads;
259  for (int thread_idx = 0; thread_idx < thread_count; ++thread_idx) {
260  init_cpu_buff_threads.emplace_back(
261  std::async(std::launch::async,
262  [keyspace_entry_count,
263  key_component_count,
264  key_component_width,
265  thread_idx,
266  thread_count,
267  cpu_hash_table_ptr,
268  layout] {
269  switch (key_component_width) {
270  case 4:
271  init_baseline_hash_join_buff_32(cpu_hash_table_ptr,
272  keyspace_entry_count,
273  key_component_count,
274  layout == HashType::OneToOne,
275  -1,
276  thread_idx,
277  thread_count);
278  break;
279  case 8:
280  init_baseline_hash_join_buff_64(cpu_hash_table_ptr,
281  keyspace_entry_count,
282  key_component_count,
283  layout == HashType::OneToOne,
284  -1,
285  thread_idx,
286  thread_count);
287  break;
288  default:
289  CHECK(false);
290  }
291  }));
292  }
293  for (auto& child : init_cpu_buff_threads) {
294  child.get();
295  }
296  std::vector<std::future<int>> fill_cpu_buff_threads;
297  for (int thread_idx = 0; thread_idx < thread_count; ++thread_idx) {
298  fill_cpu_buff_threads.emplace_back(std::async(
299  std::launch::async,
300  [key_handler,
301  keyspace_entry_count,
302  &join_columns,
303  key_component_count,
304  key_component_width,
305  layout,
306  thread_idx,
307  cpu_hash_table_ptr,
308  thread_count] {
309  switch (key_component_width) {
310  case 4: {
311  return fill_baseline_hash_join_buff<int32_t>(cpu_hash_table_ptr,
312  keyspace_entry_count,
313  -1,
314  key_component_count,
315  layout == HashType::OneToOne,
316  key_handler,
317  join_columns[0].num_elems,
318  thread_idx,
319  thread_count);
320  break;
321  }
322  case 8: {
323  return fill_baseline_hash_join_buff<int64_t>(cpu_hash_table_ptr,
324  keyspace_entry_count,
325  -1,
326  key_component_count,
327  layout == HashType::OneToOne,
328  key_handler,
329  join_columns[0].num_elems,
330  thread_idx,
331  thread_count);
332  break;
333  }
334  default:
335  CHECK(false);
336  }
337  return -1;
338  }));
339  }
340  int err = 0;
341  for (auto& child : fill_cpu_buff_threads) {
342  int partial_err = child.get();
343  if (partial_err) {
344  err = partial_err;
345  }
346  }
347  if (err) {
348  return err;
349  }
351  auto one_to_many_buff = reinterpret_cast<int32_t*>(
352  cpu_hash_table_ptr + keyspace_entry_count * entry_size);
353  init_hash_join_buff(one_to_many_buff, keyspace_entry_count, -1, 0, 1);
354  switch (key_component_width) {
355  case 4: {
356  const auto composite_key_dict = reinterpret_cast<int32_t*>(cpu_hash_table_ptr);
358  one_to_many_buff,
359  composite_key_dict,
360  keyspace_entry_count,
361  -1,
362  key_component_count,
363  join_columns,
364  join_column_types,
365  join_bucket_info,
366  composite_key_info.sd_inner_proxy_per_key,
367  composite_key_info.sd_outer_proxy_per_key,
368  thread_count);
369  break;
370  }
371  case 8: {
372  const auto composite_key_dict = reinterpret_cast<int64_t*>(cpu_hash_table_ptr);
374  one_to_many_buff,
375  composite_key_dict,
376  keyspace_entry_count,
377  -1,
378  key_component_count,
379  join_columns,
380  join_column_types,
381  join_bucket_info,
382  composite_key_info.sd_inner_proxy_per_key,
383  composite_key_info.sd_outer_proxy_per_key,
384  thread_count);
385  break;
386  }
387  default:
388  CHECK(false);
389  }
390  }
391  return err;
392  }
393 
394  void allocateDeviceMemory(const HashType layout,
395  const size_t key_component_width,
396  const size_t key_component_count,
397  const size_t keyspace_entry_count,
398  const size_t emitted_keys_count,
399  const int device_id) {
400 #ifdef HAVE_CUDA
401  const auto entry_size =
402  (key_component_count + (layout == HashType::OneToOne ? 1 : 0)) *
403  key_component_width;
404  const size_t one_to_many_hash_entries =
406  ? 2 * keyspace_entry_count + emitted_keys_count
407  : 0;
408  const size_t hash_table_size =
409  entry_size * keyspace_entry_count + one_to_many_hash_entries * sizeof(int32_t);
410 
411  // We can't allocate more than 2GB contiguous memory on GPU and each entry is 4 bytes.
412  if (hash_table_size > static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
413  throw TooManyHashEntries(
414  "Hash tables for GPU requiring larger than 2GB contigious memory not supported "
415  "yet");
416  }
417 
418  VLOG(1) << "Initializing GPU Hash Table for device " << device_id << " with "
419  << keyspace_entry_count << " hash entries and " << one_to_many_hash_entries
420  << " entries in the one to many buffer";
421  VLOG(1) << "Total hash table size: " << hash_table_size << " Bytes";
422 
423  hash_table_ = std::make_unique<BaselineHashTable>(catalog_,
424  layout,
425  keyspace_entry_count,
426  emitted_keys_count,
427  hash_table_size,
428  device_id);
429 #else
430  UNREACHABLE();
431 #endif
432  }
433 
434  template <class KEY_HANDLER>
435  int initHashTableOnGpu(KEY_HANDLER* key_handler,
436  const std::vector<JoinColumn>& join_columns,
437  const HashType layout,
438  const size_t key_component_width,
439  const size_t key_component_count,
440  const size_t keyspace_entry_count,
441  const size_t emitted_keys_count,
442  const int device_id) {
443  auto timer = DEBUG_TIMER(__func__);
444  int err = 0;
445 #ifdef HAVE_CUDA
446  allocateDeviceMemory(layout,
447  key_component_width,
448  key_component_count,
449  keyspace_entry_count,
450  emitted_keys_count,
451  device_id);
452  if (!keyspace_entry_count) {
453  // need to "allocate" the empty hash table first
454  CHECK(!emitted_keys_count);
455  return 0;
456  }
457  auto& data_mgr = catalog_->getDataMgr();
458  CudaAllocator allocator(&data_mgr, device_id);
459  auto dev_err_buff = reinterpret_cast<CUdeviceptr>(allocator.alloc(sizeof(int)));
460  copy_to_gpu(&data_mgr, dev_err_buff, &err, sizeof(err), device_id);
461  auto gpu_hash_table_buff = hash_table_->getGpuBuffer();
462  CHECK(gpu_hash_table_buff);
463 
464  switch (key_component_width) {
465  case 4:
466  init_baseline_hash_join_buff_on_device_32(gpu_hash_table_buff,
467  keyspace_entry_count,
468  key_component_count,
469  layout == HashType::OneToOne,
470  -1);
471  break;
472  case 8:
473  init_baseline_hash_join_buff_on_device_64(gpu_hash_table_buff,
474  keyspace_entry_count,
475  key_component_count,
476  layout == HashType::OneToOne,
477  -1);
478  break;
479  default:
480  UNREACHABLE();
481  }
482  const auto key_handler_gpu = transfer_flat_object_to_gpu(*key_handler, allocator);
483  switch (key_component_width) {
484  case 4: {
485  fill_baseline_hash_join_buff_on_device<int32_t>(
486  gpu_hash_table_buff,
487  keyspace_entry_count,
488  -1,
489  key_component_count,
490  layout == HashType::OneToOne,
491  reinterpret_cast<int*>(dev_err_buff),
492  key_handler_gpu,
493  join_columns.front().num_elems);
494  copy_from_gpu(&data_mgr, &err, dev_err_buff, sizeof(err), device_id);
495  break;
496  }
497  case 8: {
498  fill_baseline_hash_join_buff_on_device<int64_t>(
499  gpu_hash_table_buff,
500  keyspace_entry_count,
501  -1,
502  key_component_count,
503  layout == HashType::OneToOne,
504  reinterpret_cast<int*>(dev_err_buff),
505  key_handler_gpu,
506  join_columns.front().num_elems);
507  copy_from_gpu(&data_mgr, &err, dev_err_buff, sizeof(err), device_id);
508  break;
509  }
510  default:
511  UNREACHABLE();
512  }
513  if (err) {
514  return err;
515  }
517  const auto entry_size = key_component_count * key_component_width;
518  auto one_to_many_buff = reinterpret_cast<int32_t*>(
519  gpu_hash_table_buff + keyspace_entry_count * entry_size);
520  init_hash_join_buff_on_device(one_to_many_buff, keyspace_entry_count, -1);
521  switch (key_component_width) {
522  case 4: {
523  const auto composite_key_dict = reinterpret_cast<int32_t*>(gpu_hash_table_buff);
524  fill_one_to_many_baseline_hash_table_on_device<int32_t>(
525  one_to_many_buff,
526  composite_key_dict,
527  keyspace_entry_count,
528  -1,
529  key_component_count,
530  key_handler_gpu,
531  join_columns.front().num_elems);
532 
533  break;
534  }
535  case 8: {
536  const auto composite_key_dict = reinterpret_cast<int64_t*>(gpu_hash_table_buff);
537  fill_one_to_many_baseline_hash_table_on_device<int64_t>(
538  one_to_many_buff,
539  composite_key_dict,
540  keyspace_entry_count,
541  -1,
542  key_component_count,
543  key_handler_gpu,
544  join_columns.front().num_elems);
545 
546  break;
547  }
548  default:
549  UNREACHABLE();
550  }
551  }
552 #else
553  UNREACHABLE();
554 #endif
555  return err;
556  }
557 
558  std::unique_ptr<BaselineHashTable> getHashTable() { return std::move(hash_table_); }
559 
560  private:
562 
563  std::unique_ptr<BaselineHashTable> hash_table_;
564 };
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:102
void fill_one_to_many_baseline_hash_table_on_device(int32_t *buff, const SIZE *composite_key_dict, const size_t hash_entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const KEY_HANDLER *key_handler, const size_t num_elems)
void init_baseline_hash_join_buff_32(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
void fill_one_to_many_baseline_hash_table_64(int32_t *buff, const int64_t *composite_key_dict, const int64_t hash_entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_bucket_info, const std::vector< const void * > &sd_inner_proxy_per_key, const std::vector< const void * > &sd_outer_proxy_per_key, const int32_t cpu_thread_count)
std::vector< const void * > sd_inner_proxy_per_key
Definition: HashJoin.h:96
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:223
void fill_one_to_many_baseline_hash_table_on_device_32(int32_t *buff, const int32_t *composite_key_dict, const int64_t hash_entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const GenericKeyHandler *key_handler, const int64_t num_elems)
BaselineJoinHashTableBuilder(const Catalog_Namespace::Catalog *catalog)
const Catalog_Namespace::Catalog * catalog_
#define LOG(tag)
Definition: Logger.h:194
void init_baseline_hash_join_buff_on_device_64(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val)
void overlaps_fill_one_to_many_baseline_hash_table_on_device_64(int32_t *buff, const int64_t *composite_key_dict, const int64_t hash_entry_count, const int32_t invalid_slot_val, const OverlapsKeyHandler *key_handler, const int64_t num_elems)
unsigned long long CUdeviceptr
Definition: nocuda.h:27
#define UNREACHABLE()
Definition: Logger.h:247
void init_baseline_hash_join_buff_64(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
std::unique_ptr< BaselineHashTable > hash_table_
void overlaps_fill_baseline_hash_join_buff_on_device_64(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const OverlapsKeyHandler *key_handler, const int64_t num_elems)
void init_baseline_hash_join_buff_on_device_32(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val)
int fill_baseline_hash_join_buff_64(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const GenericKeyHandler *key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
void copy_to_gpu(Data_Namespace::DataMgr *data_mgr, CUdeviceptr dst, const void *src, const size_t num_bytes, const int device_id)
Definition: GpuMemUtils.cpp:30
void fill_one_to_many_baseline_hash_table_on_device_64(int32_t *buff, const int64_t *composite_key_dict, const int64_t hash_entry_count, const int32_t invalid_slot_val, const GenericKeyHandler *key_handler, const int64_t num_elems)
std::vector< const void * > sd_outer_proxy_per_key
Definition: HashJoin.h:97
void copy_from_gpu(Data_Namespace::DataMgr *data_mgr, void *dst, const CUdeviceptr src, const size_t num_bytes, const int device_id)
T * transfer_flat_object_to_gpu(const T &object, CudaAllocator &allocator)
int initHashTableOnGpu(KEY_HANDLER *key_handler, const std::vector< JoinColumn > &join_columns, const HashType layout, const size_t key_component_width, const size_t key_component_count, const size_t keyspace_entry_count, const size_t emitted_keys_count, const int device_id)
int initHashTableOnCpu(KEY_HANDLER *key_handler, const CompositeKeyInfo &composite_key_info, const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_bucket_info, const size_t keyspace_entry_count, const size_t keys_for_all_rows, const HashType layout, const size_t key_component_width, const size_t key_component_count)
void init_hash_join_buff_on_device(int32_t *buff, const int64_t entry_count, const int32_t invalid_slot_val)
void fill_one_to_many_baseline_hash_table_32(int32_t *buff, const int32_t *composite_key_dict, const int64_t hash_entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_bucket_info, const std::vector< const void * > &sd_inner_proxy_per_key, const std::vector< const void * > &sd_outer_proxy_per_key, const int32_t cpu_thread_count)
int8_t * alloc(const size_t num_bytes) override
void allocateDeviceMemory(const HashType layout, const size_t key_component_width, const size_t key_component_count, const size_t keyspace_entry_count, const size_t emitted_keys_count, const int device_id)
std::unique_ptr< BaselineHashTable > getHashTable()
int overlaps_fill_baseline_hash_join_buff_64(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const OverlapsKeyHandler *key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
DEVICE void SUFFIX() init_hash_join_buff(int32_t *groups_buffer, const int64_t hash_entry_count, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
int overlaps_fill_baseline_hash_join_buff_32(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const OverlapsKeyHandler *key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
void fill_baseline_hash_join_buff_on_device(int8_t *hash_buff, const size_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const KEY_HANDLER *key_handler, const size_t num_elems)
#define CHECK(condition)
Definition: Logger.h:203
#define DEBUG_TIMER(name)
Definition: Logger.h:319
void fill_baseline_hash_join_buff_on_device_32(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const GenericKeyHandler *key_handler, const int64_t num_elems)
void fill_baseline_hash_join_buff_on_device_64(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const GenericKeyHandler *key_handler, const int64_t num_elems)
Allocate GPU memory using GpuBuffers via DataMgr.
int cpu_threads()
Definition: thread_count.h:24
HashType
Definition: HashTable.h:19
int fill_baseline_hash_join_buff(int8_t *hash_buff, const size_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const KEY_HANDLER *key_handler, const size_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
#define VLOG(n)
Definition: Logger.h:297
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
Definition: HashJoin.h:129
int fill_baseline_hash_join_buff_32(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const GenericKeyHandler *key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)