OmniSciDB  085a039ca4
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
BaselineHashTableBuilder.h
Go to the documentation of this file.
1 /*
2  * Copyright 2020 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
25 #include "Shared/thread_count.h"
26 
27 template <typename SIZE,
28  class KEY_HANDLER,
30 int fill_baseline_hash_join_buff(int8_t* hash_buff,
31  const size_t entry_count,
32  const int32_t invalid_slot_val,
33  const bool for_semi_join,
34  const size_t key_component_count,
35  const bool with_val_slot,
36  const KEY_HANDLER* key_handler,
37  const size_t num_elems,
38  const int32_t cpu_thread_idx,
39  const int32_t cpu_thread_count) {
40  auto timer = DEBUG_TIMER(__func__);
41  if constexpr (std::is_same<KEY_HANDLER, GenericKeyHandler>::value) {
42  return fill_baseline_hash_join_buff_32(hash_buff,
43  entry_count,
44  invalid_slot_val,
45  for_semi_join,
46  key_component_count,
47  with_val_slot,
48  key_handler,
49  num_elems,
50  cpu_thread_idx,
51  cpu_thread_count);
52  } else if constexpr (std::is_same<KEY_HANDLER, RangeKeyHandler>::value) {
54  entry_count,
55  invalid_slot_val,
56  key_component_count,
57  with_val_slot,
58  key_handler,
59  num_elems,
60  cpu_thread_idx,
61  cpu_thread_count);
62  } else {
63  static_assert(std::is_same<KEY_HANDLER, OverlapsKeyHandler>::value,
64  "Only Generic, Overlaps, and Range Key Handlers are supported.");
66  entry_count,
67  invalid_slot_val,
68  key_component_count,
69  with_val_slot,
70  key_handler,
71  num_elems,
72  cpu_thread_idx,
73  cpu_thread_count);
74  }
75 }
76 
77 template <typename SIZE,
78  class KEY_HANDLER,
80 int fill_baseline_hash_join_buff(int8_t* hash_buff,
81  const size_t entry_count,
82  const int32_t invalid_slot_val,
83  const bool for_semi_join,
84  const size_t key_component_count,
85  const bool with_val_slot,
86  const KEY_HANDLER* key_handler,
87  const size_t num_elems,
88  const int32_t cpu_thread_idx,
89  const int32_t cpu_thread_count) {
90  auto timer = DEBUG_TIMER(__func__);
91  if constexpr (std::is_same<KEY_HANDLER, GenericKeyHandler>::value) {
92  return fill_baseline_hash_join_buff_64(hash_buff,
93  entry_count,
94  invalid_slot_val,
95  for_semi_join,
96  key_component_count,
97  with_val_slot,
98  key_handler,
99  num_elems,
100  cpu_thread_idx,
101  cpu_thread_count);
102  } else if constexpr (std::is_same<KEY_HANDLER, RangeKeyHandler>::value) {
103  return range_fill_baseline_hash_join_buff_64(hash_buff,
104  entry_count,
105  invalid_slot_val,
106  key_component_count,
107  with_val_slot,
108  key_handler,
109  num_elems,
110  cpu_thread_idx,
111  cpu_thread_count);
112  } else {
113  static_assert(std::is_same<KEY_HANDLER, OverlapsKeyHandler>::value,
114  "Only Generic, Overlaps, and Range Key Handlers are supported.");
116  entry_count,
117  invalid_slot_val,
118  key_component_count,
119  with_val_slot,
120  key_handler,
121  num_elems,
122  cpu_thread_idx,
123  cpu_thread_count);
124  }
125 }
126 
127 template <typename SIZE,
128  class KEY_HANDLER,
131  const size_t entry_count,
132  const int32_t invalid_slot_val,
133  const bool for_semi_join,
134  const size_t key_component_count,
135  const bool with_val_slot,
136  int* dev_err_buff,
137  const KEY_HANDLER* key_handler,
138  const size_t num_elems) {
139  auto timer = DEBUG_TIMER(__func__);
140  if constexpr (std::is_same<KEY_HANDLER, GenericKeyHandler>::value) {
142  entry_count,
143  invalid_slot_val,
144  for_semi_join,
145  key_component_count,
146  with_val_slot,
147  dev_err_buff,
148  key_handler,
149  num_elems);
150  } else if constexpr (std::is_same<KEY_HANDLER, RangeKeyHandler>::value) {
151  UNREACHABLE();
152  } else {
153  static_assert(std::is_same<KEY_HANDLER, OverlapsKeyHandler>::value,
154  "Only Generic, Overlaps, and Range Key Handlers are supported.");
155  LOG(FATAL) << "32-bit keys not yet supported for overlaps join.";
156  }
157 }
158 
159 template <typename SIZE,
160  class KEY_HANDLER,
162 void fill_baseline_hash_join_buff_on_device(int8_t* hash_buff,
163  const size_t entry_count,
164  const int32_t invalid_slot_val,
165  const bool for_semi_join,
166  const size_t key_component_count,
167  const bool with_val_slot,
168  int* dev_err_buff,
169  const KEY_HANDLER* key_handler,
170  const size_t num_elems) {
171  auto timer = DEBUG_TIMER(__func__);
172  if constexpr (std::is_same<KEY_HANDLER, GenericKeyHandler>::value) {
174  entry_count,
175  invalid_slot_val,
176  for_semi_join,
177  key_component_count,
178  with_val_slot,
179  dev_err_buff,
180  key_handler,
181  num_elems);
182  } else if constexpr (std::is_same<KEY_HANDLER, RangeKeyHandler>::value) {
184  entry_count,
185  invalid_slot_val,
186  key_component_count,
187  with_val_slot,
188  dev_err_buff,
189  key_handler,
190  num_elems);
191  } else {
192  static_assert(std::is_same<KEY_HANDLER, OverlapsKeyHandler>::value,
193  "Only Generic, Overlaps, and Range Key Handlers are supported.");
195  entry_count,
196  invalid_slot_val,
197  key_component_count,
198  with_val_slot,
199  dev_err_buff,
200  key_handler,
201  num_elems);
202  }
203 }
204 
205 template <typename SIZE,
206  class KEY_HANDLER,
209  const SIZE* composite_key_dict,
210  const size_t hash_entry_count,
211  const size_t key_component_count,
212  const KEY_HANDLER* key_handler,
213  const size_t num_elems) {
214  auto timer = DEBUG_TIMER(__func__);
215  if constexpr (std::is_same<KEY_HANDLER, GenericKeyHandler>::value) {
217  composite_key_dict,
218  hash_entry_count,
219  key_component_count,
220  key_handler,
221  num_elems);
222  } else {
223  static_assert(std::is_same<KEY_HANDLER, OverlapsKeyHandler>::value ||
224  std::is_same<KEY_HANDLER, RangeKeyHandler>::value,
225  "Only Generic, Overlaps, and Range Key Handlers are supported.");
226  LOG(FATAL) << "32-bit keys not yet supported for overlaps join.";
227  }
228 }
229 
230 template <typename SIZE,
231  class KEY_HANDLER,
234  const SIZE* composite_key_dict,
235  const size_t hash_entry_count,
236  const size_t key_component_count,
237  const KEY_HANDLER* key_handler,
238  const size_t num_elems) {
239  auto timer = DEBUG_TIMER(__func__);
240  if constexpr (std::is_same<KEY_HANDLER, GenericKeyHandler>::value) {
242  buff, composite_key_dict, hash_entry_count, key_handler, num_elems);
243  } else if constexpr (std::is_same<KEY_HANDLER, RangeKeyHandler>::value) {
245  buff, composite_key_dict, hash_entry_count, key_handler, num_elems);
246  } else {
247  static_assert(std::is_same<KEY_HANDLER, OverlapsKeyHandler>::value,
248  "Only Generic, Overlaps, and Range Key Handlers are supported.");
250  buff, composite_key_dict, hash_entry_count, key_handler, num_elems);
251  }
252 }
253 
255  public:
256  BaselineJoinHashTableBuilder() = default;
257  template <class KEY_HANDLER>
258  int initHashTableOnCpu(KEY_HANDLER* key_handler,
259  const CompositeKeyInfo& composite_key_info,
260  const std::vector<JoinColumn>& join_columns,
261  const std::vector<JoinColumnTypeInfo>& join_column_types,
262  const std::vector<JoinBucketInfo>& join_bucket_info,
264  str_proxy_translation_maps_ptrs_and_offsets,
265  const size_t keyspace_entry_count,
266  const size_t keys_for_all_rows,
267  const HashType layout,
268  const JoinType join_type,
269  const size_t key_component_width,
270  const size_t key_component_count) {
271  auto timer = DEBUG_TIMER(__func__);
272  const auto entry_size =
273  (key_component_count + (layout == HashType::OneToOne ? 1 : 0)) *
274  key_component_width;
275  const size_t one_to_many_hash_entries =
277  ? 2 * keyspace_entry_count + keys_for_all_rows
278  : 0;
279  const size_t hash_table_size =
280  entry_size * keyspace_entry_count + one_to_many_hash_entries * sizeof(int32_t);
281 
282  // We can't allocate more than 2GB contiguous memory on GPU and each entry is 4 bytes.
283  if (hash_table_size > static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
284  throw TooManyHashEntries(
285  "Hash tables for GPU requiring larger than 2GB contigious memory not supported "
286  "yet");
287  }
288  const bool for_semi_join =
289  (join_type == JoinType::SEMI || join_type == JoinType::ANTI) &&
290  layout == HashType::OneToOne;
291 
292  VLOG(1) << "Initializing CPU Join Hash Table with " << keyspace_entry_count
293  << " hash entries and " << one_to_many_hash_entries
294  << " entries in the one to many buffer";
295  VLOG(1) << "Total hash table size: " << hash_table_size << " Bytes";
296 
297  hash_table_ = std::make_unique<BaselineHashTable>(
298  layout, keyspace_entry_count, keys_for_all_rows, hash_table_size);
299  auto cpu_hash_table_ptr = hash_table_->getCpuBuffer();
300  int thread_count = cpu_threads();
301  std::vector<std::future<void>> init_cpu_buff_threads;
302  setHashLayout(layout);
303  {
304  auto timer_init = DEBUG_TIMER("CPU Baseline-Hash: init_baseline_hash_join_buff_32");
305 #ifdef HAVE_TBB
306  switch (key_component_width) {
307  case 4:
308  init_baseline_hash_join_buff_tbb_32(cpu_hash_table_ptr,
309  keyspace_entry_count,
310  key_component_count,
311  layout == HashType::OneToOne,
312  -1);
313  break;
314  case 8:
315  init_baseline_hash_join_buff_tbb_64(cpu_hash_table_ptr,
316  keyspace_entry_count,
317  key_component_count,
318  layout == HashType::OneToOne,
319  -1);
320  break;
321  default:
322  CHECK(false);
323  }
324 #else // #ifdef HAVE_TBB
325  for (int thread_idx = 0; thread_idx < thread_count; ++thread_idx) {
326  init_cpu_buff_threads.emplace_back(
328  [keyspace_entry_count,
329  key_component_count,
330  key_component_width,
331  thread_idx,
332  thread_count,
333  cpu_hash_table_ptr,
334  layout] {
335  switch (key_component_width) {
336  case 4:
337  init_baseline_hash_join_buff_32(cpu_hash_table_ptr,
338  keyspace_entry_count,
339  key_component_count,
340  layout == HashType::OneToOne,
341  -1,
342  thread_idx,
343  thread_count);
344  break;
345  case 8:
346  init_baseline_hash_join_buff_64(cpu_hash_table_ptr,
347  keyspace_entry_count,
348  key_component_count,
349  layout == HashType::OneToOne,
350  -1,
351  thread_idx,
352  thread_count);
353  break;
354  default:
355  CHECK(false);
356  }
357  }));
358  }
359  for (auto& child : init_cpu_buff_threads) {
360  child.get();
361  }
362 #endif // !HAVE_TBB
363  }
364  std::vector<std::future<int>> fill_cpu_buff_threads;
365  for (int thread_idx = 0; thread_idx < thread_count; ++thread_idx) {
366  fill_cpu_buff_threads.emplace_back(std::async(
368  [key_handler,
369  keyspace_entry_count,
370  &join_columns,
371  key_component_count,
372  key_component_width,
373  layout,
374  thread_idx,
375  cpu_hash_table_ptr,
376  thread_count,
377  for_semi_join] {
378  switch (key_component_width) {
379  case 4: {
380  return fill_baseline_hash_join_buff<int32_t>(cpu_hash_table_ptr,
381  keyspace_entry_count,
382  -1,
383  for_semi_join,
384  key_component_count,
385  layout == HashType::OneToOne,
386  key_handler,
387  join_columns[0].num_elems,
388  thread_idx,
389  thread_count);
390  break;
391  }
392  case 8: {
393  return fill_baseline_hash_join_buff<int64_t>(cpu_hash_table_ptr,
394  keyspace_entry_count,
395  -1,
396  for_semi_join,
397  key_component_count,
398  layout == HashType::OneToOne,
399  key_handler,
400  join_columns[0].num_elems,
401  thread_idx,
402  thread_count);
403  break;
404  }
405  default:
406  CHECK(false);
407  }
408  return -1;
409  }));
410  }
411  int err = 0;
412  for (auto& child : fill_cpu_buff_threads) {
413  int partial_err = child.get();
414  if (partial_err) {
415  err = partial_err;
416  }
417  }
418  if (err) {
419  return err;
420  }
422  auto one_to_many_buff = reinterpret_cast<int32_t*>(
423  cpu_hash_table_ptr + keyspace_entry_count * entry_size);
424  {
425  auto timer_init_additional_buffers =
426  DEBUG_TIMER("CPU Baseline-Hash: Additional Buffers init_hash_join_buff");
427  init_hash_join_buff(one_to_many_buff, keyspace_entry_count, -1, 0, 1);
428  }
429  bool is_geo_compressed = false;
430  if constexpr (std::is_same_v<KEY_HANDLER, RangeKeyHandler>) {
431  if (const auto range_handler =
432  reinterpret_cast<const RangeKeyHandler*>(key_handler)) {
433  is_geo_compressed = range_handler->is_compressed_;
434  }
435  }
436  setHashLayout(layout);
437 
438  switch (key_component_width) {
439  case 4: {
440  const auto composite_key_dict = reinterpret_cast<int32_t*>(cpu_hash_table_ptr);
442  one_to_many_buff,
443  composite_key_dict,
444  keyspace_entry_count,
445  key_component_count,
446  join_columns,
447  join_column_types,
448  join_bucket_info,
449  str_proxy_translation_maps_ptrs_and_offsets.first,
450  str_proxy_translation_maps_ptrs_and_offsets.second,
451  thread_count,
452  std::is_same_v<KEY_HANDLER, RangeKeyHandler>,
453  is_geo_compressed);
454  break;
455  }
456  case 8: {
457  const auto composite_key_dict = reinterpret_cast<int64_t*>(cpu_hash_table_ptr);
459  one_to_many_buff,
460  composite_key_dict,
461  keyspace_entry_count,
462  key_component_count,
463  join_columns,
464  join_column_types,
465  join_bucket_info,
466  str_proxy_translation_maps_ptrs_and_offsets.first,
467  str_proxy_translation_maps_ptrs_and_offsets.second,
468  thread_count,
469  std::is_same_v<KEY_HANDLER, RangeKeyHandler>,
470  is_geo_compressed);
471  break;
472  }
473  default:
474  CHECK(false);
475  }
476  }
477  return err;
478  }
479 
480  void allocateDeviceMemory(const HashType layout,
481  const size_t key_component_width,
482  const size_t key_component_count,
483  const size_t keyspace_entry_count,
484  const size_t emitted_keys_count,
485  const int device_id,
486  const Executor* executor) {
487 #ifdef HAVE_CUDA
488  const auto entry_size =
489  (key_component_count + (layout == HashType::OneToOne ? 1 : 0)) *
490  key_component_width;
491  const size_t one_to_many_hash_entries =
493  ? 2 * keyspace_entry_count + emitted_keys_count
494  : 0;
495  const size_t hash_table_size =
496  entry_size * keyspace_entry_count + one_to_many_hash_entries * sizeof(int32_t);
497 
498  // We can't allocate more than 2GB contiguous memory on GPU and each entry is 4 bytes.
499  if (hash_table_size > static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
500  throw TooManyHashEntries(
501  "Hash tables for GPU requiring larger than 2GB contigious memory not supported "
502  "yet");
503  }
504 
505  VLOG(1) << "Initializing GPU Hash Table for device " << device_id << " with "
506  << keyspace_entry_count << " hash entries and " << one_to_many_hash_entries
507  << " entries in the " << HashJoin::getHashTypeString(layout) << " buffer";
508  VLOG(1) << "Total hash table size: " << hash_table_size << " Bytes";
509 
510  hash_table_ = std::make_unique<BaselineHashTable>(executor->getDataMgr(),
511  layout,
512  keyspace_entry_count,
513  emitted_keys_count,
514  hash_table_size,
515  device_id);
516 #else
517  UNREACHABLE();
518 #endif
519  }
520 
521  template <class KEY_HANDLER>
522  int initHashTableOnGpu(KEY_HANDLER* key_handler,
523  const std::vector<JoinColumn>& join_columns,
524  const HashType layout,
525  const JoinType join_type,
526  const size_t key_component_width,
527  const size_t key_component_count,
528  const size_t keyspace_entry_count,
529  const size_t emitted_keys_count,
530  const int device_id,
531  const Executor* executor) {
532  auto timer = DEBUG_TIMER(__func__);
533  int err = 0;
534 #ifdef HAVE_CUDA
535  allocateDeviceMemory(layout,
536  key_component_width,
537  key_component_count,
538  keyspace_entry_count,
539  emitted_keys_count,
540  device_id,
541  executor);
542  if (!keyspace_entry_count) {
543  // need to "allocate" the empty hash table first
544  CHECK(!emitted_keys_count);
545  return 0;
546  }
547  auto data_mgr = executor->getDataMgr();
548  auto allocator = std::make_unique<CudaAllocator>(
549  data_mgr, device_id, getQueryEngineCudaStreamForDevice(device_id));
550  auto dev_err_buff = allocator->alloc(sizeof(int));
551 
552  allocator->copyToDevice(dev_err_buff, &err, sizeof(err));
553  auto gpu_hash_table_buff = hash_table_->getGpuBuffer();
554  CHECK(gpu_hash_table_buff);
555  const bool for_semi_join =
556  (join_type == JoinType::SEMI || join_type == JoinType::ANTI) &&
557  layout == HashType::OneToOne;
558  setHashLayout(layout);
559  const auto key_handler_gpu = transfer_flat_object_to_gpu(*key_handler, *allocator);
560  switch (key_component_width) {
561  case 4:
562  init_baseline_hash_join_buff_on_device_32(gpu_hash_table_buff,
563  keyspace_entry_count,
564  key_component_count,
565  layout == HashType::OneToOne,
566  -1);
567  break;
568  case 8:
569  init_baseline_hash_join_buff_on_device_64(gpu_hash_table_buff,
570  keyspace_entry_count,
571  key_component_count,
572  layout == HashType::OneToOne,
573  -1);
574  break;
575  default:
576  UNREACHABLE();
577  }
578  switch (key_component_width) {
579  case 4: {
580  fill_baseline_hash_join_buff_on_device<int32_t>(
581  gpu_hash_table_buff,
582  keyspace_entry_count,
583  -1,
584  for_semi_join,
585  key_component_count,
586  layout == HashType::OneToOne,
587  reinterpret_cast<int*>(dev_err_buff),
588  key_handler_gpu,
589  join_columns.front().num_elems);
590  allocator->copyFromDevice(&err, dev_err_buff, sizeof(err));
591  break;
592  }
593  case 8: {
594  fill_baseline_hash_join_buff_on_device<int64_t>(
595  gpu_hash_table_buff,
596  keyspace_entry_count,
597  -1,
598  for_semi_join,
599  key_component_count,
600  layout == HashType::OneToOne,
601  reinterpret_cast<int*>(dev_err_buff),
602  key_handler_gpu,
603  join_columns.front().num_elems);
604  allocator->copyFromDevice(&err, dev_err_buff, sizeof(err));
605  break;
606  }
607  default:
608  UNREACHABLE();
609  }
610  if (err) {
611  return err;
612  }
614  const auto entry_size = key_component_count * key_component_width;
615  auto one_to_many_buff = reinterpret_cast<int32_t*>(
616  gpu_hash_table_buff + keyspace_entry_count * entry_size);
617  init_hash_join_buff_on_device(one_to_many_buff, keyspace_entry_count, -1);
618  setHashLayout(layout);
619  switch (key_component_width) {
620  case 4: {
621  const auto composite_key_dict = reinterpret_cast<int32_t*>(gpu_hash_table_buff);
622  fill_one_to_many_baseline_hash_table_on_device<int32_t>(
623  one_to_many_buff,
624  composite_key_dict,
625  keyspace_entry_count,
626  key_component_count,
627  key_handler_gpu,
628  join_columns.front().num_elems);
629 
630  break;
631  }
632  case 8: {
633  const auto composite_key_dict = reinterpret_cast<int64_t*>(gpu_hash_table_buff);
634  fill_one_to_many_baseline_hash_table_on_device<int64_t>(
635  one_to_many_buff,
636  composite_key_dict,
637  keyspace_entry_count,
638  key_component_count,
639  key_handler_gpu,
640  join_columns.front().num_elems);
641 
642  break;
643  }
644  default:
645  UNREACHABLE();
646  }
647  }
648 #else
649  UNREACHABLE();
650 #endif
651  return err;
652  }
653 
654  std::unique_ptr<BaselineHashTable> getHashTable() { return std::move(hash_table_); }
655 
656  void setHashLayout(HashType layout) { layout_ = layout; }
657 
658  HashType getHashLayout() const { return layout_; }
659 
660  private:
661  std::unique_ptr<BaselineHashTable> hash_table_;
663 };
JoinType
Definition: sqldefs.h:136
void fill_baseline_hash_join_buff_on_device(int8_t *hash_buff, const size_t entry_count, const int32_t invalid_slot_val, const bool for_semi_join, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const KEY_HANDLER *key_handler, const size_t num_elems)
void fill_one_to_many_baseline_hash_table_on_device_64(int32_t *buff, const int64_t *composite_key_dict, const int64_t hash_entry_count, const GenericKeyHandler *key_handler, const int64_t num_elems)
void init_baseline_hash_join_buff_32(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
void fill_one_to_many_baseline_hash_table_on_device(int32_t *buff, const SIZE *composite_key_dict, const size_t hash_entry_count, const size_t key_component_count, const KEY_HANDLER *key_handler, const size_t num_elems)
int initHashTableOnCpu(KEY_HANDLER *key_handler, const CompositeKeyInfo &composite_key_info, const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_bucket_info, const StrProxyTranslationMapsPtrsAndOffsets &str_proxy_translation_maps_ptrs_and_offsets, const size_t keyspace_entry_count, const size_t keys_for_all_rows, const HashType layout, const JoinType join_type, const size_t key_component_width, const size_t key_component_count)
T * transfer_flat_object_to_gpu(const T &object, DeviceAllocator &allocator)
#define LOG(tag)
Definition: Logger.h:217
void init_baseline_hash_join_buff_on_device_64(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val)
void fill_baseline_hash_join_buff_on_device_32(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const bool for_semi_join, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const GenericKeyHandler *key_handler, const int64_t num_elems)
#define UNREACHABLE()
Definition: Logger.h:267
void range_fill_baseline_hash_join_buff_on_device_64(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const RangeKeyHandler *key_handler, const size_t num_elems)
void init_baseline_hash_join_buff_64(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
std::unique_ptr< BaselineHashTable > hash_table_
void overlaps_fill_baseline_hash_join_buff_on_device_64(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const OverlapsKeyHandler *key_handler, const int64_t num_elems)
void fill_one_to_many_baseline_hash_table_on_device_32(int32_t *buff, const int32_t *composite_key_dict, const int64_t hash_entry_count, const size_t key_component_count, const GenericKeyHandler *key_handler, const int64_t num_elems)
void init_baseline_hash_join_buff_on_device_32(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val)
future< Result > async(Fn &&fn, Args &&...args)
void fill_one_to_many_baseline_hash_table_64(int32_t *buff, const int64_t *composite_key_dict, const int64_t hash_entry_count, const size_t key_component_count, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_bucket_info, const std::vector< const int32_t * > &sd_inner_to_outer_translation_maps, const std::vector< int32_t > &sd_min_inner_elems, const int32_t cpu_thread_count, const bool is_range_join, const bool is_geo_compressed)
int initHashTableOnGpu(KEY_HANDLER *key_handler, const std::vector< JoinColumn > &join_columns, const HashType layout, const JoinType join_type, const size_t key_component_width, const size_t key_component_count, const size_t keyspace_entry_count, const size_t emitted_keys_count, const int device_id, const Executor *executor)
int fill_baseline_hash_join_buff_64(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const bool for_semi_join, const size_t key_component_count, const bool with_val_slot, const GenericKeyHandler *key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
void init_hash_join_buff_on_device(int32_t *buff, const int64_t entry_count, const int32_t invalid_slot_val)
int fill_baseline_hash_join_buff(int8_t *hash_buff, const size_t entry_count, const int32_t invalid_slot_val, const bool for_semi_join, const size_t key_component_count, const bool with_val_slot, const KEY_HANDLER *key_handler, const size_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
void range_fill_one_to_many_baseline_hash_table_on_device_64(int32_t *buff, const int64_t *composite_key_dict, const size_t hash_entry_count, const RangeKeyHandler *key_handler, const size_t num_elems)
int range_fill_baseline_hash_join_buff_64(int8_t *hash_buff, const size_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const RangeKeyHandler *key_handler, const size_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
std::pair< std::vector< const int32_t * >, std::vector< int32_t >> StrProxyTranslationMapsPtrsAndOffsets
std::unique_ptr< BaselineHashTable > getHashTable()
static std::string getHashTypeString(HashType ht) noexcept
Definition: HashJoin.h:154
int overlaps_fill_baseline_hash_join_buff_64(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const OverlapsKeyHandler *key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
int range_fill_baseline_hash_join_buff_32(int8_t *hash_buff, const size_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const RangeKeyHandler *key_handler, const size_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
DEVICE void SUFFIX() init_hash_join_buff(int32_t *groups_buffer, const int64_t hash_entry_count, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
CUstream getQueryEngineCudaStreamForDevice(int device_num)
Definition: QueryEngine.cpp:7
int overlaps_fill_baseline_hash_join_buff_32(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const OverlapsKeyHandler *key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
void allocateDeviceMemory(const HashType layout, const size_t key_component_width, const size_t key_component_count, const size_t keyspace_entry_count, const size_t emitted_keys_count, const int device_id, const Executor *executor)
#define CHECK(condition)
Definition: Logger.h:223
#define DEBUG_TIMER(name)
Definition: Logger.h:370
void fill_baseline_hash_join_buff_on_device_64(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const bool for_semi_join, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const GenericKeyHandler *key_handler, const int64_t num_elems)
void overlaps_fill_one_to_many_baseline_hash_table_on_device_64(int32_t *buff, const int64_t *composite_key_dict, const int64_t hash_entry_count, const OverlapsKeyHandler *key_handler, const int64_t num_elems)
void fill_one_to_many_baseline_hash_table_32(int32_t *buff, const int32_t *composite_key_dict, const int64_t hash_entry_count, const size_t key_component_count, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_bucket_info, const std::vector< const int32_t * > &sd_inner_to_outer_translation_maps, const std::vector< int32_t > &sd_min_inner_elems, const int32_t cpu_thread_count, const bool is_range_join, const bool is_geo_compressed)
Allocate GPU memory using GpuBuffers via DataMgr.
int fill_baseline_hash_join_buff_32(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const bool for_semi_join, const size_t key_component_count, const bool with_val_slot, const GenericKeyHandler *key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
int cpu_threads()
Definition: thread_count.h:24
HashType
Definition: HashTable.h:19
#define VLOG(n)
Definition: Logger.h:317
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
Definition: HashJoin.h:150