OmniSciDB  6686921089
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
BaselineHashTableBuilder.h
Go to the documentation of this file.
1 /*
2  * Copyright 2020 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
24 #include "Shared/thread_count.h"
25 
26 template <typename SIZE,
27  class KEY_HANDLER,
29 int fill_baseline_hash_join_buff(int8_t* hash_buff,
30  const size_t entry_count,
31  const int32_t invalid_slot_val,
32  const bool for_semi_join,
33  const size_t key_component_count,
34  const bool with_val_slot,
35  const KEY_HANDLER* key_handler,
36  const size_t num_elems,
37  const int32_t cpu_thread_idx,
38  const int32_t cpu_thread_count) {
39  if constexpr (std::is_same<KEY_HANDLER, GenericKeyHandler>::value) {
40  return fill_baseline_hash_join_buff_32(hash_buff,
41  entry_count,
42  invalid_slot_val,
43  for_semi_join,
44  key_component_count,
45  with_val_slot,
46  key_handler,
47  num_elems,
48  cpu_thread_idx,
49  cpu_thread_count);
50  } else if constexpr (std::is_same<KEY_HANDLER, RangeKeyHandler>::value) {
52  entry_count,
53  invalid_slot_val,
54  key_component_count,
55  with_val_slot,
56  key_handler,
57  num_elems,
58  cpu_thread_idx,
59  cpu_thread_count);
60  } else {
61  static_assert(std::is_same<KEY_HANDLER, OverlapsKeyHandler>::value,
62  "Only Generic, Overlaps, and Range Key Handlers are supported.");
64  entry_count,
65  invalid_slot_val,
66  key_component_count,
67  with_val_slot,
68  key_handler,
69  num_elems,
70  cpu_thread_idx,
71  cpu_thread_count);
72  }
73 }
74 
75 template <typename SIZE,
76  class KEY_HANDLER,
78 int fill_baseline_hash_join_buff(int8_t* hash_buff,
79  const size_t entry_count,
80  const int32_t invalid_slot_val,
81  const bool for_semi_join,
82  const size_t key_component_count,
83  const bool with_val_slot,
84  const KEY_HANDLER* key_handler,
85  const size_t num_elems,
86  const int32_t cpu_thread_idx,
87  const int32_t cpu_thread_count) {
88  if constexpr (std::is_same<KEY_HANDLER, GenericKeyHandler>::value) {
89  return fill_baseline_hash_join_buff_64(hash_buff,
90  entry_count,
91  invalid_slot_val,
92  for_semi_join,
93  key_component_count,
94  with_val_slot,
95  key_handler,
96  num_elems,
97  cpu_thread_idx,
98  cpu_thread_count);
99  } else if constexpr (std::is_same<KEY_HANDLER, RangeKeyHandler>::value) {
100  return range_fill_baseline_hash_join_buff_64(hash_buff,
101  entry_count,
102  invalid_slot_val,
103  key_component_count,
104  with_val_slot,
105  key_handler,
106  num_elems,
107  cpu_thread_idx,
108  cpu_thread_count);
109  } else {
110  static_assert(std::is_same<KEY_HANDLER, OverlapsKeyHandler>::value,
111  "Only Generic, Overlaps, and Range Key Handlers are supported.");
113  entry_count,
114  invalid_slot_val,
115  key_component_count,
116  with_val_slot,
117  key_handler,
118  num_elems,
119  cpu_thread_idx,
120  cpu_thread_count);
121  }
122 }
123 
124 template <typename SIZE,
125  class KEY_HANDLER,
128  const size_t entry_count,
129  const int32_t invalid_slot_val,
130  const bool for_semi_join,
131  const size_t key_component_count,
132  const bool with_val_slot,
133  int* dev_err_buff,
134  const KEY_HANDLER* key_handler,
135  const size_t num_elems) {
136  if constexpr (std::is_same<KEY_HANDLER, GenericKeyHandler>::value) {
138  entry_count,
139  invalid_slot_val,
140  for_semi_join,
141  key_component_count,
142  with_val_slot,
143  dev_err_buff,
144  key_handler,
145  num_elems);
146  } else if constexpr (std::is_same<KEY_HANDLER, RangeKeyHandler>::value) {
147  UNREACHABLE();
148  } else {
149  static_assert(std::is_same<KEY_HANDLER, OverlapsKeyHandler>::value,
150  "Only Generic, Overlaps, and Range Key Handlers are supported.");
151  LOG(FATAL) << "32-bit keys not yet supported for overlaps join.";
152  }
153 }
154 
155 template <typename SIZE,
156  class KEY_HANDLER,
158 void fill_baseline_hash_join_buff_on_device(int8_t* hash_buff,
159  const size_t entry_count,
160  const int32_t invalid_slot_val,
161  const bool for_semi_join,
162  const size_t key_component_count,
163  const bool with_val_slot,
164  int* dev_err_buff,
165  const KEY_HANDLER* key_handler,
166  const size_t num_elems) {
167  if constexpr (std::is_same<KEY_HANDLER, GenericKeyHandler>::value) {
169  entry_count,
170  invalid_slot_val,
171  for_semi_join,
172  key_component_count,
173  with_val_slot,
174  dev_err_buff,
175  key_handler,
176  num_elems);
177  } else if constexpr (std::is_same<KEY_HANDLER, RangeKeyHandler>::value) {
179  entry_count,
180  invalid_slot_val,
181  key_component_count,
182  with_val_slot,
183  dev_err_buff,
184  key_handler,
185  num_elems);
186  } else {
187  static_assert(std::is_same<KEY_HANDLER, OverlapsKeyHandler>::value,
188  "Only Generic, Overlaps, and Range Key Handlers are supported.");
190  entry_count,
191  invalid_slot_val,
192  key_component_count,
193  with_val_slot,
194  dev_err_buff,
195  key_handler,
196  num_elems);
197  }
198 }
199 
200 template <typename SIZE,
201  class KEY_HANDLER,
204  const SIZE* composite_key_dict,
205  const size_t hash_entry_count,
206  const int32_t invalid_slot_val,
207  const size_t key_component_count,
208  const KEY_HANDLER* key_handler,
209  const size_t num_elems) {
210  if constexpr (std::is_same<KEY_HANDLER, GenericKeyHandler>::value) {
212  composite_key_dict,
213  hash_entry_count,
214  invalid_slot_val,
215  key_component_count,
216  key_handler,
217  num_elems);
218  } else {
219  static_assert(std::is_same<KEY_HANDLER, OverlapsKeyHandler>::value ||
220  std::is_same<KEY_HANDLER, RangeKeyHandler>::value,
221  "Only Generic, Overlaps, and Range Key Handlers are supported.");
222  LOG(FATAL) << "32-bit keys not yet supported for overlaps join.";
223  }
224 }
225 
226 template <typename SIZE,
227  class KEY_HANDLER,
230  const SIZE* composite_key_dict,
231  const size_t hash_entry_count,
232  const int32_t invalid_slot_val,
233  const size_t key_component_count,
234  const KEY_HANDLER* key_handler,
235  const size_t num_elems) {
236  if constexpr (std::is_same<KEY_HANDLER, GenericKeyHandler>::value) {
238  composite_key_dict,
239  hash_entry_count,
240  invalid_slot_val,
241  key_handler,
242  num_elems);
243  } else if constexpr (std::is_same<KEY_HANDLER, RangeKeyHandler>::value) {
245  composite_key_dict,
246  hash_entry_count,
247  invalid_slot_val,
248  key_handler,
249  num_elems);
250  } else {
251  static_assert(std::is_same<KEY_HANDLER, OverlapsKeyHandler>::value,
252  "Only Generic, Overlaps, and Range Key Handlers are supported.");
254  composite_key_dict,
255  hash_entry_count,
256  invalid_slot_val,
257  key_handler,
258  num_elems);
259  }
260 }
261 
263  public:
264  BaselineJoinHashTableBuilder() = default;
265 
266  template <class KEY_HANDLER>
267  int initHashTableOnCpu(KEY_HANDLER* key_handler,
268  const CompositeKeyInfo& composite_key_info,
269  const std::vector<JoinColumn>& join_columns,
270  const std::vector<JoinColumnTypeInfo>& join_column_types,
271  const std::vector<JoinBucketInfo>& join_bucket_info,
272  const size_t keyspace_entry_count,
273  const size_t keys_for_all_rows,
274  const HashType layout,
275  const JoinType join_type,
276  const size_t key_component_width,
277  const size_t key_component_count) {
278  auto timer = DEBUG_TIMER(__func__);
279  const auto entry_size =
280  (key_component_count + (layout == HashType::OneToOne ? 1 : 0)) *
281  key_component_width;
282  const size_t one_to_many_hash_entries =
284  ? 2 * keyspace_entry_count + keys_for_all_rows
285  : 0;
286  const size_t hash_table_size =
287  entry_size * keyspace_entry_count + one_to_many_hash_entries * sizeof(int32_t);
288 
289  // We can't allocate more than 2GB contiguous memory on GPU and each entry is 4 bytes.
290  if (hash_table_size > static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
291  throw TooManyHashEntries(
292  "Hash tables for GPU requiring larger than 2GB contigious memory not supported "
293  "yet");
294  }
295  const bool for_semi_join =
296  (join_type == JoinType::SEMI || join_type == JoinType::ANTI) &&
297  layout == HashType::OneToOne;
298 
299  VLOG(1) << "Initializing CPU Join Hash Table with " << keyspace_entry_count
300  << " hash entries and " << one_to_many_hash_entries
301  << " entries in the one to many buffer";
302  VLOG(1) << "Total hash table size: " << hash_table_size << " Bytes";
303 
304  hash_table_ = std::make_unique<BaselineHashTable>(
305  layout, keyspace_entry_count, keys_for_all_rows, hash_table_size);
306  auto cpu_hash_table_ptr = hash_table_->getCpuBuffer();
307  int thread_count = cpu_threads();
308  std::vector<std::future<void>> init_cpu_buff_threads;
309  setHashLayout(layout);
310  for (int thread_idx = 0; thread_idx < thread_count; ++thread_idx) {
311  init_cpu_buff_threads.emplace_back(
313  [keyspace_entry_count,
314  key_component_count,
315  key_component_width,
316  thread_idx,
317  thread_count,
318  cpu_hash_table_ptr,
319  layout] {
320  switch (key_component_width) {
321  case 4:
322  init_baseline_hash_join_buff_32(cpu_hash_table_ptr,
323  keyspace_entry_count,
324  key_component_count,
325  layout == HashType::OneToOne,
326  -1,
327  thread_idx,
328  thread_count);
329  break;
330  case 8:
331  init_baseline_hash_join_buff_64(cpu_hash_table_ptr,
332  keyspace_entry_count,
333  key_component_count,
334  layout == HashType::OneToOne,
335  -1,
336  thread_idx,
337  thread_count);
338  break;
339  default:
340  CHECK(false);
341  }
342  }));
343  }
344  for (auto& child : init_cpu_buff_threads) {
345  child.get();
346  }
347  std::vector<std::future<int>> fill_cpu_buff_threads;
348  for (int thread_idx = 0; thread_idx < thread_count; ++thread_idx) {
349  fill_cpu_buff_threads.emplace_back(std::async(
351  [key_handler,
352  keyspace_entry_count,
353  &join_columns,
354  key_component_count,
355  key_component_width,
356  layout,
357  thread_idx,
358  cpu_hash_table_ptr,
359  thread_count,
360  for_semi_join] {
361  switch (key_component_width) {
362  case 4: {
363  return fill_baseline_hash_join_buff<int32_t>(cpu_hash_table_ptr,
364  keyspace_entry_count,
365  -1,
366  for_semi_join,
367  key_component_count,
368  layout == HashType::OneToOne,
369  key_handler,
370  join_columns[0].num_elems,
371  thread_idx,
372  thread_count);
373  break;
374  }
375  case 8: {
376  return fill_baseline_hash_join_buff<int64_t>(cpu_hash_table_ptr,
377  keyspace_entry_count,
378  -1,
379  for_semi_join,
380  key_component_count,
381  layout == HashType::OneToOne,
382  key_handler,
383  join_columns[0].num_elems,
384  thread_idx,
385  thread_count);
386  break;
387  }
388  default:
389  CHECK(false);
390  }
391  return -1;
392  }));
393  }
394  int err = 0;
395  for (auto& child : fill_cpu_buff_threads) {
396  int partial_err = child.get();
397  if (partial_err) {
398  err = partial_err;
399  }
400  }
401  if (err) {
402  return err;
403  }
405  auto one_to_many_buff = reinterpret_cast<int32_t*>(
406  cpu_hash_table_ptr + keyspace_entry_count * entry_size);
407  init_hash_join_buff(one_to_many_buff, keyspace_entry_count, -1, 0, 1);
408  bool is_geo_compressed = false;
409  if constexpr (std::is_same_v<KEY_HANDLER, RangeKeyHandler>) {
410  if (const auto range_handler =
411  reinterpret_cast<const RangeKeyHandler*>(key_handler)) {
412  is_geo_compressed = range_handler->is_compressed_;
413  }
414  }
415  setHashLayout(layout);
416  switch (key_component_width) {
417  case 4: {
418  const auto composite_key_dict = reinterpret_cast<int32_t*>(cpu_hash_table_ptr);
420  one_to_many_buff,
421  composite_key_dict,
422  keyspace_entry_count,
423  -1,
424  key_component_count,
425  join_columns,
426  join_column_types,
427  join_bucket_info,
428  composite_key_info.sd_inner_proxy_per_key,
429  composite_key_info.sd_outer_proxy_per_key,
430  thread_count,
431  std::is_same_v<KEY_HANDLER, RangeKeyHandler>,
432  is_geo_compressed);
433  break;
434  }
435  case 8: {
436  const auto composite_key_dict = reinterpret_cast<int64_t*>(cpu_hash_table_ptr);
438  one_to_many_buff,
439  composite_key_dict,
440  keyspace_entry_count,
441  -1,
442  key_component_count,
443  join_columns,
444  join_column_types,
445  join_bucket_info,
446  composite_key_info.sd_inner_proxy_per_key,
447  composite_key_info.sd_outer_proxy_per_key,
448  thread_count,
449  std::is_same_v<KEY_HANDLER, RangeKeyHandler>,
450  is_geo_compressed);
451  break;
452  }
453  default:
454  CHECK(false);
455  }
456  }
457  return err;
458  }
459 
460  void allocateDeviceMemory(const HashType layout,
461  const size_t key_component_width,
462  const size_t key_component_count,
463  const size_t keyspace_entry_count,
464  const size_t emitted_keys_count,
465  const int device_id,
466  const Executor* executor) {
467 #ifdef HAVE_CUDA
468  const auto entry_size =
469  (key_component_count + (layout == HashType::OneToOne ? 1 : 0)) *
470  key_component_width;
471  const size_t one_to_many_hash_entries =
473  ? 2 * keyspace_entry_count + emitted_keys_count
474  : 0;
475  const size_t hash_table_size =
476  entry_size * keyspace_entry_count + one_to_many_hash_entries * sizeof(int32_t);
477 
478  // We can't allocate more than 2GB contiguous memory on GPU and each entry is 4 bytes.
479  if (hash_table_size > static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
480  throw TooManyHashEntries(
481  "Hash tables for GPU requiring larger than 2GB contigious memory not supported "
482  "yet");
483  }
484 
485  VLOG(1) << "Initializing GPU Hash Table for device " << device_id << " with "
486  << keyspace_entry_count << " hash entries and " << one_to_many_hash_entries
487  << " entries in the " << HashJoin::getHashTypeString(layout) << " buffer";
488  VLOG(1) << "Total hash table size: " << hash_table_size << " Bytes";
489 
490  hash_table_ = std::make_unique<BaselineHashTable>(executor->getDataMgr(),
491  layout,
492  keyspace_entry_count,
493  emitted_keys_count,
494  hash_table_size,
495  device_id);
496 #else
497  UNREACHABLE();
498 #endif
499  }
500 
501  template <class KEY_HANDLER>
502  int initHashTableOnGpu(KEY_HANDLER* key_handler,
503  const std::vector<JoinColumn>& join_columns,
504  const HashType layout,
505  const JoinType join_type,
506  const size_t key_component_width,
507  const size_t key_component_count,
508  const size_t keyspace_entry_count,
509  const size_t emitted_keys_count,
510  const int device_id,
511  const Executor* executor) {
512  auto timer = DEBUG_TIMER(__func__);
513  int err = 0;
514 #ifdef HAVE_CUDA
515  allocateDeviceMemory(layout,
516  key_component_width,
517  key_component_count,
518  keyspace_entry_count,
519  emitted_keys_count,
520  device_id,
521  executor);
522  if (!keyspace_entry_count) {
523  // need to "allocate" the empty hash table first
524  CHECK(!emitted_keys_count);
525  return 0;
526  }
527  auto data_mgr = executor->getDataMgr();
528  auto allocator = data_mgr->createGpuAllocator(device_id);
529  auto dev_err_buff = allocator->alloc(sizeof(int));
530 
531  allocator->copyToDevice(dev_err_buff, &err, sizeof(err));
532  auto gpu_hash_table_buff = hash_table_->getGpuBuffer();
533  CHECK(gpu_hash_table_buff);
534  const bool for_semi_join =
535  (join_type == JoinType::SEMI || join_type == JoinType::ANTI) &&
536  layout == HashType::OneToOne;
537  setHashLayout(layout);
538  const auto key_handler_gpu = transfer_flat_object_to_gpu(*key_handler, *allocator);
539  switch (key_component_width) {
540  case 4:
541  init_baseline_hash_join_buff_on_device_32(gpu_hash_table_buff,
542  keyspace_entry_count,
543  key_component_count,
544  layout == HashType::OneToOne,
545  -1);
546  break;
547  case 8:
548  init_baseline_hash_join_buff_on_device_64(gpu_hash_table_buff,
549  keyspace_entry_count,
550  key_component_count,
551  layout == HashType::OneToOne,
552  -1);
553  break;
554  default:
555  UNREACHABLE();
556  }
557  switch (key_component_width) {
558  case 4: {
559  fill_baseline_hash_join_buff_on_device<int32_t>(
560  gpu_hash_table_buff,
561  keyspace_entry_count,
562  -1,
563  for_semi_join,
564  key_component_count,
565  layout == HashType::OneToOne,
566  reinterpret_cast<int*>(dev_err_buff),
567  key_handler_gpu,
568  join_columns.front().num_elems);
569  allocator->copyFromDevice(&err, dev_err_buff, sizeof(err));
570  break;
571  }
572  case 8: {
573  fill_baseline_hash_join_buff_on_device<int64_t>(
574  gpu_hash_table_buff,
575  keyspace_entry_count,
576  -1,
577  for_semi_join,
578  key_component_count,
579  layout == HashType::OneToOne,
580  reinterpret_cast<int*>(dev_err_buff),
581  key_handler_gpu,
582  join_columns.front().num_elems);
583  allocator->copyFromDevice(&err, dev_err_buff, sizeof(err));
584  break;
585  }
586  default:
587  UNREACHABLE();
588  }
589  if (err) {
590  return err;
591  }
593  const auto entry_size = key_component_count * key_component_width;
594  auto one_to_many_buff = reinterpret_cast<int32_t*>(
595  gpu_hash_table_buff + keyspace_entry_count * entry_size);
596  init_hash_join_buff_on_device(one_to_many_buff, keyspace_entry_count, -1);
597  setHashLayout(layout);
598  switch (key_component_width) {
599  case 4: {
600  const auto composite_key_dict = reinterpret_cast<int32_t*>(gpu_hash_table_buff);
601  fill_one_to_many_baseline_hash_table_on_device<int32_t>(
602  one_to_many_buff,
603  composite_key_dict,
604  keyspace_entry_count,
605  -1,
606  key_component_count,
607  key_handler_gpu,
608  join_columns.front().num_elems);
609 
610  break;
611  }
612  case 8: {
613  const auto composite_key_dict = reinterpret_cast<int64_t*>(gpu_hash_table_buff);
614  fill_one_to_many_baseline_hash_table_on_device<int64_t>(
615  one_to_many_buff,
616  composite_key_dict,
617  keyspace_entry_count,
618  -1,
619  key_component_count,
620  key_handler_gpu,
621  join_columns.front().num_elems);
622 
623  break;
624  }
625  default:
626  UNREACHABLE();
627  }
628  }
629 #else
630  UNREACHABLE();
631 #endif
632  return err;
633  }
634 
635  std::unique_ptr<BaselineHashTable> getHashTable() { return std::move(hash_table_); }
636 
637  void setHashLayout(HashType layout) { layout_ = layout; }
638 
639  HashType getHashLayout() const { return layout_; }
640 
641  private:
642  std::unique_ptr<BaselineHashTable> hash_table_;
644 };
JoinType
Definition: sqldefs.h:108
void fill_baseline_hash_join_buff_on_device(int8_t *hash_buff, const size_t entry_count, const int32_t invalid_slot_val, const bool for_semi_join, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const KEY_HANDLER *key_handler, const size_t num_elems)
void fill_one_to_many_baseline_hash_table_on_device(int32_t *buff, const SIZE *composite_key_dict, const size_t hash_entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const KEY_HANDLER *key_handler, const size_t num_elems)
void init_baseline_hash_join_buff_32(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
std::vector< const void * > sd_inner_proxy_per_key
Definition: HashJoin.h:97
T * transfer_flat_object_to_gpu(const T &object, DeviceAllocator &allocator)
void fill_one_to_many_baseline_hash_table_on_device_32(int32_t *buff, const int32_t *composite_key_dict, const int64_t hash_entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const GenericKeyHandler *key_handler, const int64_t num_elems)
#define LOG(tag)
Definition: Logger.h:203
void init_baseline_hash_join_buff_on_device_64(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val)
void overlaps_fill_one_to_many_baseline_hash_table_on_device_64(int32_t *buff, const int64_t *composite_key_dict, const int64_t hash_entry_count, const int32_t invalid_slot_val, const OverlapsKeyHandler *key_handler, const int64_t num_elems)
void fill_baseline_hash_join_buff_on_device_32(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const bool for_semi_join, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const GenericKeyHandler *key_handler, const int64_t num_elems)
#define UNREACHABLE()
Definition: Logger.h:253
void range_fill_baseline_hash_join_buff_on_device_64(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const RangeKeyHandler *key_handler, const size_t num_elems)
void fill_one_to_many_baseline_hash_table_32(int32_t *buff, const int32_t *composite_key_dict, const int64_t hash_entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_bucket_info, const std::vector< const void * > &sd_inner_proxy_per_key, const std::vector< const void * > &sd_outer_proxy_per_key, const int32_t cpu_thread_count, const bool is_range_join, const bool is_geo_compressed)
void init_baseline_hash_join_buff_64(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
std::unique_ptr< BaselineHashTable > hash_table_
void overlaps_fill_baseline_hash_join_buff_on_device_64(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const OverlapsKeyHandler *key_handler, const int64_t num_elems)
void init_baseline_hash_join_buff_on_device_32(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val)
future< Result > async(Fn &&fn, Args &&...args)
void fill_one_to_many_baseline_hash_table_on_device_64(int32_t *buff, const int64_t *composite_key_dict, const int64_t hash_entry_count, const int32_t invalid_slot_val, const GenericKeyHandler *key_handler, const int64_t num_elems)
std::vector< const void * > sd_outer_proxy_per_key
Definition: HashJoin.h:98
int initHashTableOnGpu(KEY_HANDLER *key_handler, const std::vector< JoinColumn > &join_columns, const HashType layout, const JoinType join_type, const size_t key_component_width, const size_t key_component_count, const size_t keyspace_entry_count, const size_t emitted_keys_count, const int device_id, const Executor *executor)
int fill_baseline_hash_join_buff_64(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const bool for_semi_join, const size_t key_component_count, const bool with_val_slot, const GenericKeyHandler *key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
void init_hash_join_buff_on_device(int32_t *buff, const int64_t entry_count, const int32_t invalid_slot_val)
int fill_baseline_hash_join_buff(int8_t *hash_buff, const size_t entry_count, const int32_t invalid_slot_val, const bool for_semi_join, const size_t key_component_count, const bool with_val_slot, const KEY_HANDLER *key_handler, const size_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
void range_fill_one_to_many_baseline_hash_table_on_device_64(int32_t *buff, const int64_t *composite_key_dict, const size_t hash_entry_count, const int32_t invalid_slot_val, const RangeKeyHandler *key_handler, const size_t num_elems)
int range_fill_baseline_hash_join_buff_64(int8_t *hash_buff, const size_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const RangeKeyHandler *key_handler, const size_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
std::unique_ptr< BaselineHashTable > getHashTable()
static std::string getHashTypeString(HashType ht) noexcept
Definition: HashJoin.h:134
int overlaps_fill_baseline_hash_join_buff_64(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const OverlapsKeyHandler *key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
int range_fill_baseline_hash_join_buff_32(int8_t *hash_buff, const size_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const RangeKeyHandler *key_handler, const size_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
DEVICE void SUFFIX() init_hash_join_buff(int32_t *groups_buffer, const int64_t hash_entry_count, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
void fill_one_to_many_baseline_hash_table_64(int32_t *buff, const int64_t *composite_key_dict, const int64_t hash_entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_bucket_info, const std::vector< const void * > &sd_inner_proxy_per_key, const std::vector< const void * > &sd_outer_proxy_per_key, const int32_t cpu_thread_count, const bool is_range_join, const bool is_geo_compressed)
int overlaps_fill_baseline_hash_join_buff_32(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const OverlapsKeyHandler *key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
void allocateDeviceMemory(const HashType layout, const size_t key_component_width, const size_t key_component_count, const size_t keyspace_entry_count, const size_t emitted_keys_count, const int device_id, const Executor *executor)
#define CHECK(condition)
Definition: Logger.h:209
#define DEBUG_TIMER(name)
Definition: Logger.h:352
void fill_baseline_hash_join_buff_on_device_64(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const bool for_semi_join, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const GenericKeyHandler *key_handler, const int64_t num_elems)
Allocate GPU memory using GpuBuffers via DataMgr.
int fill_baseline_hash_join_buff_32(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const bool for_semi_join, const size_t key_component_count, const bool with_val_slot, const GenericKeyHandler *key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
int cpu_threads()
Definition: thread_count.h:24
HashType
Definition: HashTable.h:19
#define VLOG(n)
Definition: Logger.h:303
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
Definition: HashJoin.h:130
int initHashTableOnCpu(KEY_HANDLER *key_handler, const CompositeKeyInfo &composite_key_info, const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_bucket_info, const size_t keyspace_entry_count, const size_t keys_for_all_rows, const HashType layout, const JoinType join_type, const size_t key_component_width, const size_t key_component_count)