OmniSciDB  21ac014ffc
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
BaselineHashTableBuilder.h
Go to the documentation of this file.
1 /*
2  * Copyright 2020 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
24 #include "Shared/thread_count.h"
25 
26 template <typename SIZE,
27  class KEY_HANDLER,
29 int fill_baseline_hash_join_buff(int8_t* hash_buff,
30  const size_t entry_count,
31  const int32_t invalid_slot_val,
32  const bool for_semi_join,
33  const size_t key_component_count,
34  const bool with_val_slot,
35  const KEY_HANDLER* key_handler,
36  const size_t num_elems,
37  const int32_t cpu_thread_idx,
38  const int32_t cpu_thread_count) {
39  if constexpr (std::is_same<KEY_HANDLER, GenericKeyHandler>::value) {
40  return fill_baseline_hash_join_buff_32(hash_buff,
41  entry_count,
42  invalid_slot_val,
43  for_semi_join,
44  key_component_count,
45  with_val_slot,
46  key_handler,
47  num_elems,
48  cpu_thread_idx,
49  cpu_thread_count);
50  } else {
51  static_assert(std::is_same<KEY_HANDLER, OverlapsKeyHandler>::value,
52  "Only Generic or Overlaps Key Handlers are supported.");
54  entry_count,
55  invalid_slot_val,
56  key_component_count,
57  with_val_slot,
58  key_handler,
59  num_elems,
60  cpu_thread_idx,
61  cpu_thread_count);
62  }
63 }
64 
65 template <typename SIZE,
66  class KEY_HANDLER,
68 int fill_baseline_hash_join_buff(int8_t* hash_buff,
69  const size_t entry_count,
70  const int32_t invalid_slot_val,
71  const bool for_semi_join,
72  const size_t key_component_count,
73  const bool with_val_slot,
74  const KEY_HANDLER* key_handler,
75  const size_t num_elems,
76  const int32_t cpu_thread_idx,
77  const int32_t cpu_thread_count) {
78  if constexpr (std::is_same<KEY_HANDLER, GenericKeyHandler>::value) {
79  return fill_baseline_hash_join_buff_64(hash_buff,
80  entry_count,
81  invalid_slot_val,
82  for_semi_join,
83  key_component_count,
84  with_val_slot,
85  key_handler,
86  num_elems,
87  cpu_thread_idx,
88  cpu_thread_count);
89  } else {
90  static_assert(std::is_same<KEY_HANDLER, OverlapsKeyHandler>::value,
91  "Only Generic or Overlaps Key Handlers are supported.");
93  entry_count,
94  invalid_slot_val,
95  key_component_count,
96  with_val_slot,
97  key_handler,
98  num_elems,
99  cpu_thread_idx,
100  cpu_thread_count);
101  }
102 }
103 
104 template <typename SIZE,
105  class KEY_HANDLER,
108  const size_t entry_count,
109  const int32_t invalid_slot_val,
110  const bool for_semi_join,
111  const size_t key_component_count,
112  const bool with_val_slot,
113  int* dev_err_buff,
114  const KEY_HANDLER* key_handler,
115  const size_t num_elems) {
116  if constexpr (std::is_same<KEY_HANDLER, GenericKeyHandler>::value) {
118  entry_count,
119  invalid_slot_val,
120  for_semi_join,
121  key_component_count,
122  with_val_slot,
123  dev_err_buff,
124  key_handler,
125  num_elems);
126  } else {
127  static_assert(std::is_same<KEY_HANDLER, OverlapsKeyHandler>::value,
128  "Only Generic or Overlaps Key Handlers are supported.");
129  LOG(FATAL) << "32-bit keys not yet supported for overlaps join.";
130  }
131 }
132 
133 template <typename SIZE,
134  class KEY_HANDLER,
136 void fill_baseline_hash_join_buff_on_device(int8_t* hash_buff,
137  const size_t entry_count,
138  const int32_t invalid_slot_val,
139  const bool for_semi_join,
140  const size_t key_component_count,
141  const bool with_val_slot,
142  int* dev_err_buff,
143  const KEY_HANDLER* key_handler,
144  const size_t num_elems) {
145  if constexpr (std::is_same<KEY_HANDLER, GenericKeyHandler>::value) {
147  entry_count,
148  invalid_slot_val,
149  for_semi_join,
150  key_component_count,
151  with_val_slot,
152  dev_err_buff,
153  key_handler,
154  num_elems);
155  } else {
156  static_assert(std::is_same<KEY_HANDLER, OverlapsKeyHandler>::value,
157  "Only Generic or Overlaps Key Handlers are supported.");
159  entry_count,
160  invalid_slot_val,
161  key_component_count,
162  with_val_slot,
163  dev_err_buff,
164  key_handler,
165  num_elems);
166  }
167 }
168 
169 template <typename SIZE,
170  class KEY_HANDLER,
173  const SIZE* composite_key_dict,
174  const size_t hash_entry_count,
175  const int32_t invalid_slot_val,
176  const size_t key_component_count,
177  const KEY_HANDLER* key_handler,
178  const size_t num_elems) {
179  if constexpr (std::is_same<KEY_HANDLER, GenericKeyHandler>::value) {
181  composite_key_dict,
182  hash_entry_count,
183  invalid_slot_val,
184  key_component_count,
185  key_handler,
186  num_elems);
187  } else {
188  static_assert(std::is_same<KEY_HANDLER, OverlapsKeyHandler>::value,
189  "Only Generic or Overlaps Key Handlers are supported.");
190  LOG(FATAL) << "32-bit keys not yet supported for overlaps join.";
191  }
192 }
193 
194 template <typename SIZE,
195  class KEY_HANDLER,
198  const SIZE* composite_key_dict,
199  const size_t hash_entry_count,
200  const int32_t invalid_slot_val,
201  const size_t key_component_count,
202  const KEY_HANDLER* key_handler,
203  const size_t num_elems) {
204  if constexpr (std::is_same<KEY_HANDLER, GenericKeyHandler>::value) {
206  composite_key_dict,
207  hash_entry_count,
208  invalid_slot_val,
209  key_handler,
210  num_elems);
211  } else {
212  static_assert(std::is_same<KEY_HANDLER, OverlapsKeyHandler>::value,
213  "Only Generic or Overlaps Key Handlers are supported.");
215  composite_key_dict,
216  hash_entry_count,
217  invalid_slot_val,
218  key_handler,
219  num_elems);
220  }
221 }
222 
224  public:
226 
227  template <class KEY_HANDLER>
228  int initHashTableOnCpu(KEY_HANDLER* key_handler,
229  const CompositeKeyInfo& composite_key_info,
230  const std::vector<JoinColumn>& join_columns,
231  const std::vector<JoinColumnTypeInfo>& join_column_types,
232  const std::vector<JoinBucketInfo>& join_bucket_info,
233  const size_t keyspace_entry_count,
234  const size_t keys_for_all_rows,
235  const HashType layout,
236  const JoinType join_type,
237  const size_t key_component_width,
238  const size_t key_component_count) {
239  auto timer = DEBUG_TIMER(__func__);
240  const auto entry_size =
241  (key_component_count + (layout == HashType::OneToOne ? 1 : 0)) *
242  key_component_width;
243  const size_t one_to_many_hash_entries =
245  ? 2 * keyspace_entry_count + keys_for_all_rows
246  : 0;
247  const size_t hash_table_size =
248  entry_size * keyspace_entry_count + one_to_many_hash_entries * sizeof(int32_t);
249 
250  // We can't allocate more than 2GB contiguous memory on GPU and each entry is 4 bytes.
251  if (hash_table_size > static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
252  throw TooManyHashEntries(
253  "Hash tables for GPU requiring larger than 2GB contigious memory not supported "
254  "yet");
255  }
256  const bool for_semi_join =
257  (join_type == JoinType::SEMI || join_type == JoinType::ANTI) &&
258  layout == HashType::OneToOne;
259 
260  VLOG(1) << "Initializing CPU Join Hash Table with " << keyspace_entry_count
261  << " hash entries and " << one_to_many_hash_entries
262  << " entries in the one to many buffer";
263  VLOG(1) << "Total hash table size: " << hash_table_size << " Bytes";
264 
265  hash_table_ = std::make_unique<BaselineHashTable>(
266  layout, keyspace_entry_count, keys_for_all_rows, hash_table_size);
267  auto cpu_hash_table_ptr = hash_table_->getCpuBuffer();
268  int thread_count = cpu_threads();
269  std::vector<std::future<void>> init_cpu_buff_threads;
270  for (int thread_idx = 0; thread_idx < thread_count; ++thread_idx) {
271  init_cpu_buff_threads.emplace_back(
272  std::async(std::launch::async,
273  [keyspace_entry_count,
274  key_component_count,
275  key_component_width,
276  thread_idx,
277  thread_count,
278  cpu_hash_table_ptr,
279  layout] {
280  switch (key_component_width) {
281  case 4:
282  init_baseline_hash_join_buff_32(cpu_hash_table_ptr,
283  keyspace_entry_count,
284  key_component_count,
285  layout == HashType::OneToOne,
286  -1,
287  thread_idx,
288  thread_count);
289  break;
290  case 8:
291  init_baseline_hash_join_buff_64(cpu_hash_table_ptr,
292  keyspace_entry_count,
293  key_component_count,
294  layout == HashType::OneToOne,
295  -1,
296  thread_idx,
297  thread_count);
298  break;
299  default:
300  CHECK(false);
301  }
302  }));
303  }
304  for (auto& child : init_cpu_buff_threads) {
305  child.get();
306  }
307  std::vector<std::future<int>> fill_cpu_buff_threads;
308  for (int thread_idx = 0; thread_idx < thread_count; ++thread_idx) {
309  fill_cpu_buff_threads.emplace_back(std::async(
310  std::launch::async,
311  [key_handler,
312  keyspace_entry_count,
313  &join_columns,
314  key_component_count,
315  key_component_width,
316  layout,
317  thread_idx,
318  cpu_hash_table_ptr,
319  thread_count,
320  for_semi_join] {
321  switch (key_component_width) {
322  case 4: {
323  return fill_baseline_hash_join_buff<int32_t>(cpu_hash_table_ptr,
324  keyspace_entry_count,
325  -1,
326  for_semi_join,
327  key_component_count,
328  layout == HashType::OneToOne,
329  key_handler,
330  join_columns[0].num_elems,
331  thread_idx,
332  thread_count);
333  break;
334  }
335  case 8: {
336  return fill_baseline_hash_join_buff<int64_t>(cpu_hash_table_ptr,
337  keyspace_entry_count,
338  -1,
339  for_semi_join,
340  key_component_count,
341  layout == HashType::OneToOne,
342  key_handler,
343  join_columns[0].num_elems,
344  thread_idx,
345  thread_count);
346  break;
347  }
348  default:
349  CHECK(false);
350  }
351  return -1;
352  }));
353  }
354  int err = 0;
355  for (auto& child : fill_cpu_buff_threads) {
356  int partial_err = child.get();
357  if (partial_err) {
358  err = partial_err;
359  }
360  }
361  if (err) {
362  return err;
363  }
365  auto one_to_many_buff = reinterpret_cast<int32_t*>(
366  cpu_hash_table_ptr + keyspace_entry_count * entry_size);
367  init_hash_join_buff(one_to_many_buff, keyspace_entry_count, -1, 0, 1);
368  switch (key_component_width) {
369  case 4: {
370  const auto composite_key_dict = reinterpret_cast<int32_t*>(cpu_hash_table_ptr);
372  one_to_many_buff,
373  composite_key_dict,
374  keyspace_entry_count,
375  -1,
376  key_component_count,
377  join_columns,
378  join_column_types,
379  join_bucket_info,
380  composite_key_info.sd_inner_proxy_per_key,
381  composite_key_info.sd_outer_proxy_per_key,
382  thread_count);
383  break;
384  }
385  case 8: {
386  const auto composite_key_dict = reinterpret_cast<int64_t*>(cpu_hash_table_ptr);
388  one_to_many_buff,
389  composite_key_dict,
390  keyspace_entry_count,
391  -1,
392  key_component_count,
393  join_columns,
394  join_column_types,
395  join_bucket_info,
396  composite_key_info.sd_inner_proxy_per_key,
397  composite_key_info.sd_outer_proxy_per_key,
398  thread_count);
399  break;
400  }
401  default:
402  CHECK(false);
403  }
404  }
405  return err;
406  }
407 
408  void allocateDeviceMemory(const HashType layout,
409  const size_t key_component_width,
410  const size_t key_component_count,
411  const size_t keyspace_entry_count,
412  const size_t emitted_keys_count,
413  const int device_id,
414  const Executor* executor) {
415 #ifdef HAVE_CUDA
416  const auto entry_size =
417  (key_component_count + (layout == HashType::OneToOne ? 1 : 0)) *
418  key_component_width;
419  const size_t one_to_many_hash_entries =
421  ? 2 * keyspace_entry_count + emitted_keys_count
422  : 0;
423  const size_t hash_table_size =
424  entry_size * keyspace_entry_count + one_to_many_hash_entries * sizeof(int32_t);
425 
426  // We can't allocate more than 2GB contiguous memory on GPU and each entry is 4 bytes.
427  if (hash_table_size > static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
428  throw TooManyHashEntries(
429  "Hash tables for GPU requiring larger than 2GB contigious memory not supported "
430  "yet");
431  }
432 
433  VLOG(1) << "Initializing GPU Hash Table for device " << device_id << " with "
434  << keyspace_entry_count << " hash entries and " << one_to_many_hash_entries
435  << " entries in the one to many buffer";
436  VLOG(1) << "Total hash table size: " << hash_table_size << " Bytes";
437 
438  hash_table_ = std::make_unique<BaselineHashTable>(executor->getDataMgr(),
439  layout,
440  keyspace_entry_count,
441  emitted_keys_count,
442  hash_table_size,
443  device_id);
444 #else
445  UNREACHABLE();
446 #endif
447  }
448 
449  template <class KEY_HANDLER>
450  int initHashTableOnGpu(KEY_HANDLER* key_handler,
451  const std::vector<JoinColumn>& join_columns,
452  const HashType layout,
453  const JoinType join_type,
454  const size_t key_component_width,
455  const size_t key_component_count,
456  const size_t keyspace_entry_count,
457  const size_t emitted_keys_count,
458  const int device_id,
459  const Executor* executor) {
460  auto timer = DEBUG_TIMER(__func__);
461  int err = 0;
462 #ifdef HAVE_CUDA
463  allocateDeviceMemory(layout,
464  key_component_width,
465  key_component_count,
466  keyspace_entry_count,
467  emitted_keys_count,
468  device_id,
469  executor);
470  if (!keyspace_entry_count) {
471  // need to "allocate" the empty hash table first
472  CHECK(!emitted_keys_count);
473  return 0;
474  }
475  auto data_mgr = executor->getDataMgr();
476  CudaAllocator allocator(data_mgr, device_id);
477  auto dev_err_buff = reinterpret_cast<CUdeviceptr>(allocator.alloc(sizeof(int)));
478  copy_to_gpu(data_mgr, dev_err_buff, &err, sizeof(err), device_id);
479  auto gpu_hash_table_buff = hash_table_->getGpuBuffer();
480  CHECK(gpu_hash_table_buff);
481  const bool for_semi_join =
482  (join_type == JoinType::SEMI || join_type == JoinType::ANTI) &&
483  layout == HashType::OneToOne;
484 
485  switch (key_component_width) {
486  case 4:
487  init_baseline_hash_join_buff_on_device_32(gpu_hash_table_buff,
488  keyspace_entry_count,
489  key_component_count,
490  layout == HashType::OneToOne,
491  -1);
492  break;
493  case 8:
494  init_baseline_hash_join_buff_on_device_64(gpu_hash_table_buff,
495  keyspace_entry_count,
496  key_component_count,
497  layout == HashType::OneToOne,
498  -1);
499  break;
500  default:
501  UNREACHABLE();
502  }
503  const auto key_handler_gpu = transfer_flat_object_to_gpu(*key_handler, allocator);
504  switch (key_component_width) {
505  case 4: {
506  fill_baseline_hash_join_buff_on_device<int32_t>(
507  gpu_hash_table_buff,
508  keyspace_entry_count,
509  -1,
510  for_semi_join,
511  key_component_count,
512  layout == HashType::OneToOne,
513  reinterpret_cast<int*>(dev_err_buff),
514  key_handler_gpu,
515  join_columns.front().num_elems);
516  copy_from_gpu(data_mgr, &err, dev_err_buff, sizeof(err), device_id);
517  break;
518  }
519  case 8: {
520  fill_baseline_hash_join_buff_on_device<int64_t>(
521  gpu_hash_table_buff,
522  keyspace_entry_count,
523  -1,
524  for_semi_join,
525  key_component_count,
526  layout == HashType::OneToOne,
527  reinterpret_cast<int*>(dev_err_buff),
528  key_handler_gpu,
529  join_columns.front().num_elems);
530  copy_from_gpu(data_mgr, &err, dev_err_buff, sizeof(err), device_id);
531  break;
532  }
533  default:
534  UNREACHABLE();
535  }
536  if (err) {
537  return err;
538  }
540  const auto entry_size = key_component_count * key_component_width;
541  auto one_to_many_buff = reinterpret_cast<int32_t*>(
542  gpu_hash_table_buff + keyspace_entry_count * entry_size);
543  init_hash_join_buff_on_device(one_to_many_buff, keyspace_entry_count, -1);
544  switch (key_component_width) {
545  case 4: {
546  const auto composite_key_dict = reinterpret_cast<int32_t*>(gpu_hash_table_buff);
547  fill_one_to_many_baseline_hash_table_on_device<int32_t>(
548  one_to_many_buff,
549  composite_key_dict,
550  keyspace_entry_count,
551  -1,
552  key_component_count,
553  key_handler_gpu,
554  join_columns.front().num_elems);
555 
556  break;
557  }
558  case 8: {
559  const auto composite_key_dict = reinterpret_cast<int64_t*>(gpu_hash_table_buff);
560  fill_one_to_many_baseline_hash_table_on_device<int64_t>(
561  one_to_many_buff,
562  composite_key_dict,
563  keyspace_entry_count,
564  -1,
565  key_component_count,
566  key_handler_gpu,
567  join_columns.front().num_elems);
568 
569  break;
570  }
571  default:
572  UNREACHABLE();
573  }
574  }
575 #else
576  UNREACHABLE();
577 #endif
578  return err;
579  }
580 
581  std::unique_ptr<BaselineHashTable> getHashTable() { return std::move(hash_table_); }
582 
583  private:
584  std::unique_ptr<BaselineHashTable> hash_table_;
585 };
JoinType
Definition: sqldefs.h:108
void fill_baseline_hash_join_buff_on_device(int8_t *hash_buff, const size_t entry_count, const int32_t invalid_slot_val, const bool for_semi_join, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const KEY_HANDLER *key_handler, const size_t num_elems)
void fill_one_to_many_baseline_hash_table_on_device(int32_t *buff, const SIZE *composite_key_dict, const size_t hash_entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const KEY_HANDLER *key_handler, const size_t num_elems)
void init_baseline_hash_join_buff_32(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
void fill_one_to_many_baseline_hash_table_64(int32_t *buff, const int64_t *composite_key_dict, const int64_t hash_entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_bucket_info, const std::vector< const void * > &sd_inner_proxy_per_key, const std::vector< const void * > &sd_outer_proxy_per_key, const int32_t cpu_thread_count)
std::vector< const void * > sd_inner_proxy_per_key
Definition: HashJoin.h:96
void fill_one_to_many_baseline_hash_table_on_device_32(int32_t *buff, const int32_t *composite_key_dict, const int64_t hash_entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const GenericKeyHandler *key_handler, const int64_t num_elems)
#define LOG(tag)
Definition: Logger.h:200
void init_baseline_hash_join_buff_on_device_64(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val)
void overlaps_fill_one_to_many_baseline_hash_table_on_device_64(int32_t *buff, const int64_t *composite_key_dict, const int64_t hash_entry_count, const int32_t invalid_slot_val, const OverlapsKeyHandler *key_handler, const int64_t num_elems)
void fill_baseline_hash_join_buff_on_device_32(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const bool for_semi_join, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const GenericKeyHandler *key_handler, const int64_t num_elems)
unsigned long long CUdeviceptr
Definition: nocuda.h:27
#define UNREACHABLE()
Definition: Logger.h:250
void init_baseline_hash_join_buff_64(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
std::unique_ptr< BaselineHashTable > hash_table_
void overlaps_fill_baseline_hash_join_buff_on_device_64(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const OverlapsKeyHandler *key_handler, const int64_t num_elems)
void init_baseline_hash_join_buff_on_device_32(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val)
void copy_to_gpu(Data_Namespace::DataMgr *data_mgr, CUdeviceptr dst, const void *src, const size_t num_bytes, const int device_id)
Definition: GpuMemUtils.cpp:30
void fill_one_to_many_baseline_hash_table_on_device_64(int32_t *buff, const int64_t *composite_key_dict, const int64_t hash_entry_count, const int32_t invalid_slot_val, const GenericKeyHandler *key_handler, const int64_t num_elems)
std::vector< const void * > sd_outer_proxy_per_key
Definition: HashJoin.h:97
void copy_from_gpu(Data_Namespace::DataMgr *data_mgr, void *dst, const CUdeviceptr src, const size_t num_bytes, const int device_id)
T * transfer_flat_object_to_gpu(const T &object, CudaAllocator &allocator)
int initHashTableOnGpu(KEY_HANDLER *key_handler, const std::vector< JoinColumn > &join_columns, const HashType layout, const JoinType join_type, const size_t key_component_width, const size_t key_component_count, const size_t keyspace_entry_count, const size_t emitted_keys_count, const int device_id, const Executor *executor)
int fill_baseline_hash_join_buff_64(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const bool for_semi_join, const size_t key_component_count, const bool with_val_slot, const GenericKeyHandler *key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
void init_hash_join_buff_on_device(int32_t *buff, const int64_t entry_count, const int32_t invalid_slot_val)
int fill_baseline_hash_join_buff(int8_t *hash_buff, const size_t entry_count, const int32_t invalid_slot_val, const bool for_semi_join, const size_t key_component_count, const bool with_val_slot, const KEY_HANDLER *key_handler, const size_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
void fill_one_to_many_baseline_hash_table_32(int32_t *buff, const int32_t *composite_key_dict, const int64_t hash_entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_bucket_info, const std::vector< const void * > &sd_inner_proxy_per_key, const std::vector< const void * > &sd_outer_proxy_per_key, const int32_t cpu_thread_count)
int8_t * alloc(const size_t num_bytes) override
std::unique_ptr< BaselineHashTable > getHashTable()
int overlaps_fill_baseline_hash_join_buff_64(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const OverlapsKeyHandler *key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
DEVICE void SUFFIX() init_hash_join_buff(int32_t *groups_buffer, const int64_t hash_entry_count, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
int overlaps_fill_baseline_hash_join_buff_32(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const OverlapsKeyHandler *key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
void allocateDeviceMemory(const HashType layout, const size_t key_component_width, const size_t key_component_count, const size_t keyspace_entry_count, const size_t emitted_keys_count, const int device_id, const Executor *executor)
#define CHECK(condition)
Definition: Logger.h:206
#define DEBUG_TIMER(name)
Definition: Logger.h:322
void fill_baseline_hash_join_buff_on_device_64(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const bool for_semi_join, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const GenericKeyHandler *key_handler, const int64_t num_elems)
Allocate GPU memory using GpuBuffers via DataMgr.
int fill_baseline_hash_join_buff_32(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const bool for_semi_join, const size_t key_component_count, const bool with_val_slot, const GenericKeyHandler *key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
int cpu_threads()
Definition: thread_count.h:24
HashType
Definition: HashTable.h:19
#define VLOG(n)
Definition: Logger.h:300
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
Definition: HashJoin.h:129
int initHashTableOnCpu(KEY_HANDLER *key_handler, const CompositeKeyInfo &composite_key_info, const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_bucket_info, const size_t keyspace_entry_count, const size_t keys_for_all_rows, const HashType layout, const JoinType join_type, const size_t key_component_width, const size_t key_component_count)