OmniSciDB  c1a53651b2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
BaselineHashTableBuilder.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
25 #include "Shared/thread_count.h"
26 
27 template <typename SIZE,
28  class KEY_HANDLER,
30 int fill_baseline_hash_join_buff(int8_t* hash_buff,
31  const size_t entry_count,
32  const int32_t invalid_slot_val,
33  const bool for_semi_join,
34  const size_t key_component_count,
35  const bool with_val_slot,
36  const KEY_HANDLER* key_handler,
37  const size_t num_elems,
38  const int32_t cpu_thread_idx,
39  const int32_t cpu_thread_count) {
40  auto timer = DEBUG_TIMER(__func__);
41  if constexpr (std::is_same<KEY_HANDLER, GenericKeyHandler>::value) {
42  return fill_baseline_hash_join_buff_32(hash_buff,
43  entry_count,
44  invalid_slot_val,
45  for_semi_join,
46  key_component_count,
47  with_val_slot,
48  key_handler,
49  num_elems,
50  cpu_thread_idx,
51  cpu_thread_count);
52  } else if constexpr (std::is_same<KEY_HANDLER, RangeKeyHandler>::value) {
54  entry_count,
55  invalid_slot_val,
56  key_component_count,
57  with_val_slot,
58  key_handler,
59  num_elems,
60  cpu_thread_idx,
61  cpu_thread_count);
62  } else {
63  static_assert(std::is_same<KEY_HANDLER, OverlapsKeyHandler>::value,
64  "Only Generic, Overlaps, and Range Key Handlers are supported.");
66  entry_count,
67  invalid_slot_val,
68  key_component_count,
69  with_val_slot,
70  key_handler,
71  num_elems,
72  cpu_thread_idx,
73  cpu_thread_count);
74  }
75 }
76 
77 template <typename SIZE,
78  class KEY_HANDLER,
80 int fill_baseline_hash_join_buff(int8_t* hash_buff,
81  const size_t entry_count,
82  const int32_t invalid_slot_val,
83  const bool for_semi_join,
84  const size_t key_component_count,
85  const bool with_val_slot,
86  const KEY_HANDLER* key_handler,
87  const size_t num_elems,
88  const int32_t cpu_thread_idx,
89  const int32_t cpu_thread_count) {
90  auto timer = DEBUG_TIMER(__func__);
91  if constexpr (std::is_same<KEY_HANDLER, GenericKeyHandler>::value) {
92  return fill_baseline_hash_join_buff_64(hash_buff,
93  entry_count,
94  invalid_slot_val,
95  for_semi_join,
96  key_component_count,
97  with_val_slot,
98  key_handler,
99  num_elems,
100  cpu_thread_idx,
101  cpu_thread_count);
102  } else if constexpr (std::is_same<KEY_HANDLER, RangeKeyHandler>::value) {
103  return range_fill_baseline_hash_join_buff_64(hash_buff,
104  entry_count,
105  invalid_slot_val,
106  key_component_count,
107  with_val_slot,
108  key_handler,
109  num_elems,
110  cpu_thread_idx,
111  cpu_thread_count);
112  } else {
113  static_assert(std::is_same<KEY_HANDLER, OverlapsKeyHandler>::value,
114  "Only Generic, Overlaps, and Range Key Handlers are supported.");
116  entry_count,
117  invalid_slot_val,
118  key_component_count,
119  with_val_slot,
120  key_handler,
121  num_elems,
122  cpu_thread_idx,
123  cpu_thread_count);
124  }
125 }
126 
127 template <typename SIZE,
128  class KEY_HANDLER,
131  const size_t entry_count,
132  const int32_t invalid_slot_val,
133  const bool for_semi_join,
134  const size_t key_component_count,
135  const bool with_val_slot,
136  int* dev_err_buff,
137  const KEY_HANDLER* key_handler,
138  const size_t num_elems) {
139  auto timer = DEBUG_TIMER(__func__);
140  if constexpr (std::is_same<KEY_HANDLER, GenericKeyHandler>::value) {
142  entry_count,
143  invalid_slot_val,
144  for_semi_join,
145  key_component_count,
146  with_val_slot,
147  dev_err_buff,
148  key_handler,
149  num_elems);
150  } else if constexpr (std::is_same<KEY_HANDLER, RangeKeyHandler>::value) {
151  UNREACHABLE();
152  } else {
153  static_assert(std::is_same<KEY_HANDLER, OverlapsKeyHandler>::value,
154  "Only Generic, Overlaps, and Range Key Handlers are supported.");
155  LOG(FATAL) << "32-bit keys not yet supported for overlaps join.";
156  }
157 }
158 
159 template <typename SIZE,
160  class KEY_HANDLER,
162 void fill_baseline_hash_join_buff_on_device(int8_t* hash_buff,
163  const size_t entry_count,
164  const int32_t invalid_slot_val,
165  const bool for_semi_join,
166  const size_t key_component_count,
167  const bool with_val_slot,
168  int* dev_err_buff,
169  const KEY_HANDLER* key_handler,
170  const size_t num_elems) {
171  auto timer = DEBUG_TIMER(__func__);
172  if constexpr (std::is_same<KEY_HANDLER, GenericKeyHandler>::value) {
174  entry_count,
175  invalid_slot_val,
176  for_semi_join,
177  key_component_count,
178  with_val_slot,
179  dev_err_buff,
180  key_handler,
181  num_elems);
182  } else if constexpr (std::is_same<KEY_HANDLER, RangeKeyHandler>::value) {
184  entry_count,
185  invalid_slot_val,
186  key_component_count,
187  with_val_slot,
188  dev_err_buff,
189  key_handler,
190  num_elems);
191  } else {
192  static_assert(std::is_same<KEY_HANDLER, OverlapsKeyHandler>::value,
193  "Only Generic, Overlaps, and Range Key Handlers are supported.");
195  entry_count,
196  invalid_slot_val,
197  key_component_count,
198  with_val_slot,
199  dev_err_buff,
200  key_handler,
201  num_elems);
202  }
203 }
204 
205 template <typename SIZE,
206  class KEY_HANDLER,
209  const SIZE* composite_key_dict,
210  const size_t hash_entry_count,
211  const size_t key_component_count,
212  const KEY_HANDLER* key_handler,
213  const size_t num_elems,
214  const bool for_window_framing) {
215  auto timer = DEBUG_TIMER(__func__);
216  if constexpr (std::is_same<KEY_HANDLER, GenericKeyHandler>::value) {
218  composite_key_dict,
219  hash_entry_count,
220  key_component_count,
221  key_handler,
222  num_elems,
223  for_window_framing);
224  } else {
225  static_assert(std::is_same<KEY_HANDLER, OverlapsKeyHandler>::value ||
226  std::is_same<KEY_HANDLER, RangeKeyHandler>::value,
227  "Only Generic, Overlaps, and Range Key Handlers are supported.");
228  LOG(FATAL) << "32-bit keys not yet supported for overlaps join.";
229  }
230 }
231 
232 template <typename SIZE,
233  class KEY_HANDLER,
236  const SIZE* composite_key_dict,
237  const size_t hash_entry_count,
238  const size_t key_component_count,
239  const KEY_HANDLER* key_handler,
240  const size_t num_elems,
241  const bool for_window_framing) {
242  auto timer = DEBUG_TIMER(__func__);
243  if constexpr (std::is_same<KEY_HANDLER, GenericKeyHandler>::value) {
245  composite_key_dict,
246  hash_entry_count,
247  key_handler,
248  num_elems,
249  for_window_framing);
250  } else if constexpr (std::is_same<KEY_HANDLER, RangeKeyHandler>::value) {
252  buff, composite_key_dict, hash_entry_count, key_handler, num_elems);
253  } else {
254  static_assert(std::is_same<KEY_HANDLER, OverlapsKeyHandler>::value,
255  "Only Generic, Overlaps, and Range Key Handlers are supported.");
257  buff, composite_key_dict, hash_entry_count, key_handler, num_elems);
258  }
259 }
260 
262  public:
263  BaselineJoinHashTableBuilder() = default;
264  template <class KEY_HANDLER>
265  int initHashTableOnCpu(KEY_HANDLER* key_handler,
266  const CompositeKeyInfo& composite_key_info,
267  const std::vector<JoinColumn>& join_columns,
268  const std::vector<JoinColumnTypeInfo>& join_column_types,
269  const std::vector<JoinBucketInfo>& join_bucket_info,
271  str_proxy_translation_maps_ptrs_and_offsets,
272  const size_t keyspace_entry_count,
273  const size_t keys_for_all_rows,
274  const HashType layout,
275  const JoinType join_type,
276  const size_t key_component_width,
277  const size_t key_component_count,
278  const RegisteredQueryHint& query_hint) {
279  auto timer = DEBUG_TIMER(__func__);
280  auto const entry_cnt = (key_component_count + (layout == HashType::OneToOne ? 1 : 0));
281  auto const entry_size = entry_cnt * key_component_width;
282  size_t const one_to_many_hash_entries =
284  ? 2 * keyspace_entry_count +
285  (keys_for_all_rows *
286  (1 + (join_type == JoinType::WINDOW_FUNCTION_FRAMING)))
287  : 0;
288  size_t const hash_table_size =
289  entry_size * keyspace_entry_count + one_to_many_hash_entries * sizeof(int32_t);
290 
292  hash_table_size > query_hint.max_join_hash_table_size) {
293  throw JoinHashTableTooBig(hash_table_size, query_hint.max_join_hash_table_size);
294  }
295 
296  // We can't allocate more than 2GB contiguous memory on GPU and each entry is 4 bytes.
297  if (hash_table_size > static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
298  throw TooManyHashEntries(
299  "Hash tables for GPU requiring larger than 2GB contigious memory not supported "
300  "yet");
301  }
302  const bool for_semi_join =
303  (join_type == JoinType::SEMI || join_type == JoinType::ANTI) &&
304  layout == HashType::OneToOne;
305 
306  hash_table_ = std::make_unique<BaselineHashTable>(
307  layout, keyspace_entry_count, keys_for_all_rows, hash_table_size);
308  VLOG(1) << "Initialize a CPU baseline hash table for join type "
309  << HashJoin::getHashTypeString(layout)
310  << ", hash table size: " << hash_table_size << " Bytes"
311  << ", # hash entries: " << entry_cnt << ", entry_size: " << entry_size
312  << ", # entries in the payload buffer: " << one_to_many_hash_entries
313  << " (# non-null hash entries: " << keyspace_entry_count
314  << ", # entries stored in the payload buffer: " << keys_for_all_rows << ")";
315  auto cpu_hash_table_ptr = hash_table_->getCpuBuffer();
316  int thread_count = cpu_threads();
317  std::vector<std::future<void>> init_cpu_buff_threads;
318  setHashLayout(layout);
319  {
320  auto timer_init = DEBUG_TIMER("CPU Baseline-Hash: init_baseline_hash_join_buff_32");
321 #ifdef HAVE_TBB
322  switch (key_component_width) {
323  case 4:
324  init_baseline_hash_join_buff_tbb_32(cpu_hash_table_ptr,
325  keyspace_entry_count,
326  key_component_count,
327  layout == HashType::OneToOne,
328  -1);
329  break;
330  case 8:
331  init_baseline_hash_join_buff_tbb_64(cpu_hash_table_ptr,
332  keyspace_entry_count,
333  key_component_count,
334  layout == HashType::OneToOne,
335  -1);
336  break;
337  default:
338  CHECK(false);
339  }
340 #else // #ifdef HAVE_TBB
341  for (int thread_idx = 0; thread_idx < thread_count; ++thread_idx) {
342  init_cpu_buff_threads.emplace_back(std::async(
344  [keyspace_entry_count,
345  key_component_count,
346  key_component_width,
347  thread_idx,
348  thread_count,
349  cpu_hash_table_ptr,
350  layout,
351  parent_thread_local_ids = logger::thread_local_ids()] {
352  logger::LocalIdsScopeGuard lisg = parent_thread_local_ids.setNewThreadId();
353  DEBUG_TIMER_NEW_THREAD(parent_thread_local_ids.thread_id_);
354  switch (key_component_width) {
355  case 4:
356  init_baseline_hash_join_buff_32(cpu_hash_table_ptr,
357  keyspace_entry_count,
358  key_component_count,
359  layout == HashType::OneToOne,
360  -1,
361  thread_idx,
362  thread_count);
363  break;
364  case 8:
365  init_baseline_hash_join_buff_64(cpu_hash_table_ptr,
366  keyspace_entry_count,
367  key_component_count,
368  layout == HashType::OneToOne,
369  -1,
370  thread_idx,
371  thread_count);
372  break;
373  default:
374  UNREACHABLE();
375  }
376  }));
377  }
378  for (auto& child : init_cpu_buff_threads) {
379  child.get();
380  }
381 #endif // !HAVE_TBB
382  }
383  std::vector<std::future<int>> fill_cpu_buff_threads;
384  for (int thread_idx = 0; thread_idx < thread_count; ++thread_idx) {
385  fill_cpu_buff_threads.emplace_back(std::async(
387  [key_handler,
388  keyspace_entry_count,
389  &join_columns,
390  key_component_count,
391  key_component_width,
392  layout,
393  thread_idx,
394  cpu_hash_table_ptr,
395  thread_count,
396  for_semi_join,
397  parent_thread_local_ids = logger::thread_local_ids()] {
398  logger::LocalIdsScopeGuard lisg = parent_thread_local_ids.setNewThreadId();
399  DEBUG_TIMER_NEW_THREAD(parent_thread_local_ids.thread_id_);
400  switch (key_component_width) {
401  case 4: {
402  return fill_baseline_hash_join_buff<int32_t>(cpu_hash_table_ptr,
403  keyspace_entry_count,
404  -1,
405  for_semi_join,
406  key_component_count,
407  layout == HashType::OneToOne,
408  key_handler,
409  join_columns[0].num_elems,
410  thread_idx,
411  thread_count);
412  break;
413  }
414  case 8: {
415  return fill_baseline_hash_join_buff<int64_t>(cpu_hash_table_ptr,
416  keyspace_entry_count,
417  -1,
418  for_semi_join,
419  key_component_count,
420  layout == HashType::OneToOne,
421  key_handler,
422  join_columns[0].num_elems,
423  thread_idx,
424  thread_count);
425  break;
426  }
427  default:
428  CHECK(false);
429  }
430  return -1;
431  }));
432  }
433  int err = 0;
434  for (auto& child : fill_cpu_buff_threads) {
435  int partial_err = child.get();
436  if (partial_err) {
437  err = partial_err;
438  }
439  }
440  if (err) {
441  return err;
442  }
444  auto one_to_many_buff = reinterpret_cast<int32_t*>(
445  cpu_hash_table_ptr + keyspace_entry_count * entry_size);
446  {
447  auto timer_init_additional_buffers =
448  DEBUG_TIMER("CPU Baseline-Hash: Additional Buffers init_hash_join_buff");
449  init_hash_join_buff(one_to_many_buff, keyspace_entry_count, -1, 0, 1);
450  }
451  bool is_geo_compressed = false;
452  if constexpr (std::is_same_v<KEY_HANDLER, RangeKeyHandler>) {
453  if (const auto range_handler =
454  reinterpret_cast<const RangeKeyHandler*>(key_handler)) {
455  is_geo_compressed = range_handler->is_compressed_;
456  }
457  }
458  setHashLayout(layout);
459  switch (key_component_width) {
460  case 4: {
461  const auto composite_key_dict = reinterpret_cast<int32_t*>(cpu_hash_table_ptr);
463  one_to_many_buff,
464  composite_key_dict,
465  keyspace_entry_count,
466  key_component_count,
467  join_columns,
468  join_column_types,
469  join_bucket_info,
470  str_proxy_translation_maps_ptrs_and_offsets.first,
471  str_proxy_translation_maps_ptrs_and_offsets.second,
472  thread_count,
473  std::is_same_v<KEY_HANDLER, RangeKeyHandler>,
474  is_geo_compressed,
475  join_type == JoinType::WINDOW_FUNCTION_FRAMING);
476  break;
477  }
478  case 8: {
479  const auto composite_key_dict = reinterpret_cast<int64_t*>(cpu_hash_table_ptr);
481  one_to_many_buff,
482  composite_key_dict,
483  keyspace_entry_count,
484  key_component_count,
485  join_columns,
486  join_column_types,
487  join_bucket_info,
488  str_proxy_translation_maps_ptrs_and_offsets.first,
489  str_proxy_translation_maps_ptrs_and_offsets.second,
490  thread_count,
491  std::is_same_v<KEY_HANDLER, RangeKeyHandler>,
492  is_geo_compressed,
493  join_type == JoinType::WINDOW_FUNCTION_FRAMING);
494  break;
495  }
496  default:
497  CHECK(false);
498  }
499  }
500  return err;
501  }
502 
503  void allocateDeviceMemory(const HashType layout,
504  const size_t key_component_width,
505  const size_t key_component_count,
506  const size_t keyspace_entry_count,
507  const size_t emitted_keys_count,
508  const int device_id,
509  const Executor* executor,
510  const RegisteredQueryHint& query_hint) {
511 #ifdef HAVE_CUDA
512  const auto num_hash_entries =
513  (key_component_count + (layout == HashType::OneToOne ? 1 : 0));
514  const auto entry_size = num_hash_entries * key_component_width;
515  const size_t one_to_many_hash_entries =
517  ? 2 * keyspace_entry_count + emitted_keys_count
518  : 0;
519  const size_t hash_table_size =
520  entry_size * keyspace_entry_count + one_to_many_hash_entries * sizeof(int32_t);
521 
523  hash_table_size > query_hint.max_join_hash_table_size) {
524  throw JoinHashTableTooBig(hash_table_size, query_hint.max_join_hash_table_size);
525  }
526 
527  // We can't allocate more than 2GB contiguous memory on GPU and each entry is 4 bytes.
528  if (hash_table_size > static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
529  throw TooManyHashEntries(
530  "Hash tables for GPU requiring larger than 2GB contigious memory not supported "
531  "yet");
532  }
533 
534  VLOG(1) << "Initialize a GPU baseline hash table for device " << device_id
535  << " with join type " << HashJoin::getHashTypeString(layout)
536  << ", hash table size: " << hash_table_size << " Bytes"
537  << ", # hash entries: " << num_hash_entries << ", entry_size: " << entry_size
538  << ", # entries in the payload buffer: " << one_to_many_hash_entries
539  << " (# non-null hash entries: " << key_component_count
540  << ", # entries stored in the payload buffer: " << emitted_keys_count << ")";
541 
542  hash_table_ = std::make_unique<BaselineHashTable>(executor->getDataMgr(),
543  layout,
544  keyspace_entry_count,
545  emitted_keys_count,
546  hash_table_size,
547  device_id);
548 #else
549  UNREACHABLE();
550 #endif
551  }
552 
553  template <class KEY_HANDLER>
554  int initHashTableOnGpu(KEY_HANDLER* key_handler,
555  const std::vector<JoinColumn>& join_columns,
556  const HashType layout,
557  const JoinType join_type,
558  const size_t key_component_width,
559  const size_t key_component_count,
560  const size_t keyspace_entry_count,
561  const size_t emitted_keys_count,
562  const int device_id,
563  const Executor* executor,
564  const RegisteredQueryHint& query_hint) {
565  auto timer = DEBUG_TIMER(__func__);
566  int err = 0;
567 #ifdef HAVE_CUDA
568  allocateDeviceMemory(layout,
569  key_component_width,
570  key_component_count,
571  keyspace_entry_count,
572  emitted_keys_count,
573  device_id,
574  executor,
575  query_hint);
576  if (!keyspace_entry_count) {
577  // need to "allocate" the empty hash table first
578  CHECK(!emitted_keys_count);
579  return 0;
580  }
581  auto data_mgr = executor->getDataMgr();
582  auto allocator = std::make_unique<CudaAllocator>(
583  data_mgr, device_id, getQueryEngineCudaStreamForDevice(device_id));
584  auto dev_err_buff = allocator->alloc(sizeof(int));
585 
586  allocator->copyToDevice(dev_err_buff, &err, sizeof(err));
587  auto gpu_hash_table_buff = hash_table_->getGpuBuffer();
588  CHECK(gpu_hash_table_buff);
589  const bool for_semi_join =
590  (join_type == JoinType::SEMI || join_type == JoinType::ANTI) &&
591  layout == HashType::OneToOne;
592  setHashLayout(layout);
593  const auto key_handler_gpu = transfer_flat_object_to_gpu(*key_handler, *allocator);
594  switch (key_component_width) {
595  case 4:
596  init_baseline_hash_join_buff_on_device_32(gpu_hash_table_buff,
597  keyspace_entry_count,
598  key_component_count,
599  layout == HashType::OneToOne,
600  -1);
601  break;
602  case 8:
603  init_baseline_hash_join_buff_on_device_64(gpu_hash_table_buff,
604  keyspace_entry_count,
605  key_component_count,
606  layout == HashType::OneToOne,
607  -1);
608  break;
609  default:
610  UNREACHABLE();
611  }
612  switch (key_component_width) {
613  case 4: {
614  fill_baseline_hash_join_buff_on_device<int32_t>(
615  gpu_hash_table_buff,
616  keyspace_entry_count,
617  -1,
618  for_semi_join,
619  key_component_count,
620  layout == HashType::OneToOne,
621  reinterpret_cast<int*>(dev_err_buff),
622  key_handler_gpu,
623  join_columns.front().num_elems);
624  allocator->copyFromDevice(&err, dev_err_buff, sizeof(err));
625  break;
626  }
627  case 8: {
628  fill_baseline_hash_join_buff_on_device<int64_t>(
629  gpu_hash_table_buff,
630  keyspace_entry_count,
631  -1,
632  for_semi_join,
633  key_component_count,
634  layout == HashType::OneToOne,
635  reinterpret_cast<int*>(dev_err_buff),
636  key_handler_gpu,
637  join_columns.front().num_elems);
638  allocator->copyFromDevice(&err, dev_err_buff, sizeof(err));
639  break;
640  }
641  default:
642  UNREACHABLE();
643  }
644  if (err) {
645  return err;
646  }
648  const auto entry_size = key_component_count * key_component_width;
649  auto one_to_many_buff = reinterpret_cast<int32_t*>(
650  gpu_hash_table_buff + keyspace_entry_count * entry_size);
651  init_hash_join_buff_on_device(one_to_many_buff, keyspace_entry_count, -1);
652  setHashLayout(layout);
653  switch (key_component_width) {
654  case 4: {
655  const auto composite_key_dict = reinterpret_cast<int32_t*>(gpu_hash_table_buff);
656  fill_one_to_many_baseline_hash_table_on_device<int32_t>(
657  one_to_many_buff,
658  composite_key_dict,
659  keyspace_entry_count,
660  key_component_count,
661  key_handler_gpu,
662  join_columns.front().num_elems,
663  join_type == JoinType::WINDOW_FUNCTION_FRAMING);
664 
665  break;
666  }
667  case 8: {
668  const auto composite_key_dict = reinterpret_cast<int64_t*>(gpu_hash_table_buff);
669  fill_one_to_many_baseline_hash_table_on_device<int64_t>(
670  one_to_many_buff,
671  composite_key_dict,
672  keyspace_entry_count,
673  key_component_count,
674  key_handler_gpu,
675  join_columns.front().num_elems,
676  join_type == JoinType::WINDOW_FUNCTION_FRAMING);
677 
678  break;
679  }
680  default:
681  UNREACHABLE();
682  }
683  }
684 #else
685  UNREACHABLE();
686 #endif
687  return err;
688  }
689 
690  std::unique_ptr<BaselineHashTable> getHashTable() { return std::move(hash_table_); }
691 
692  void setHashLayout(HashType layout) { layout_ = layout; }
693 
694  HashType getHashLayout() const { return layout_; }
695 
696  private:
697  std::unique_ptr<BaselineHashTable> hash_table_;
699 };
JoinType
Definition: sqldefs.h:165
void fill_baseline_hash_join_buff_on_device(int8_t *hash_buff, const size_t entry_count, const int32_t invalid_slot_val, const bool for_semi_join, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const KEY_HANDLER *key_handler, const size_t num_elems)
void init_baseline_hash_join_buff_32(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
void fill_one_to_many_baseline_hash_table_64(int32_t *buff, const int64_t *composite_key_dict, const int64_t hash_entry_count, const size_t key_component_count, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_bucket_info, const std::vector< const int32_t * > &sd_inner_to_outer_translation_maps, const std::vector< int32_t > &sd_min_inner_elems, const int32_t cpu_thread_count, const bool is_range_join, const bool is_geo_compressed, const bool for_window_framing)
void fill_one_to_many_baseline_hash_table_on_device(int32_t *buff, const SIZE *composite_key_dict, const size_t hash_entry_count, const size_t key_component_count, const KEY_HANDLER *key_handler, const size_t num_elems, const bool for_window_framing)
T * transfer_flat_object_to_gpu(const T &object, DeviceAllocator &allocator)
#define LOG(tag)
Definition: Logger.h:285
void init_baseline_hash_join_buff_on_device_64(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val)
void fill_baseline_hash_join_buff_on_device_32(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const bool for_semi_join, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const GenericKeyHandler *key_handler, const int64_t num_elems)
void fill_one_to_many_baseline_hash_table_32(int32_t *buff, const int32_t *composite_key_dict, const int64_t hash_entry_count, const size_t key_component_count, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_bucket_info, const std::vector< const int32_t * > &sd_inner_to_outer_translation_maps, const std::vector< int32_t > &sd_min_inner_elems, const int32_t cpu_thread_count, const bool is_range_join, const bool is_geo_compressed, const bool for_window_framing)
#define UNREACHABLE()
Definition: Logger.h:337
void range_fill_baseline_hash_join_buff_on_device_64(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const RangeKeyHandler *key_handler, const size_t num_elems)
int initHashTableOnGpu(KEY_HANDLER *key_handler, const std::vector< JoinColumn > &join_columns, const HashType layout, const JoinType join_type, const size_t key_component_width, const size_t key_component_count, const size_t keyspace_entry_count, const size_t emitted_keys_count, const int device_id, const Executor *executor, const RegisteredQueryHint &query_hint)
#define DEBUG_TIMER_NEW_THREAD(parent_thread_id)
Definition: Logger.h:416
int initHashTableOnCpu(KEY_HANDLER *key_handler, const CompositeKeyInfo &composite_key_info, const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_bucket_info, const StrProxyTranslationMapsPtrsAndOffsets &str_proxy_translation_maps_ptrs_and_offsets, const size_t keyspace_entry_count, const size_t keys_for_all_rows, const HashType layout, const JoinType join_type, const size_t key_component_width, const size_t key_component_count, const RegisteredQueryHint &query_hint)
void init_baseline_hash_join_buff_64(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
std::unique_ptr< BaselineHashTable > hash_table_
size_t max_join_hash_table_size
Definition: QueryHint.h:325
void overlaps_fill_baseline_hash_join_buff_on_device_64(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const OverlapsKeyHandler *key_handler, const int64_t num_elems)
void init_baseline_hash_join_buff_on_device_32(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val)
future< Result > async(Fn &&fn, Args &&...args)
void fill_one_to_many_baseline_hash_table_on_device_32(int32_t *buff, const int32_t *composite_key_dict, const int64_t hash_entry_count, const size_t key_component_count, const GenericKeyHandler *key_handler, const int64_t num_elems, const bool for_window_framing)
void fill_one_to_many_baseline_hash_table_on_device_64(int32_t *buff, const int64_t *composite_key_dict, const int64_t hash_entry_count, const GenericKeyHandler *key_handler, const int64_t num_elems, const bool for_window_framing)
int fill_baseline_hash_join_buff_64(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const bool for_semi_join, const size_t key_component_count, const bool with_val_slot, const GenericKeyHandler *key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
void allocateDeviceMemory(const HashType layout, const size_t key_component_width, const size_t key_component_count, const size_t keyspace_entry_count, const size_t emitted_keys_count, const int device_id, const Executor *executor, const RegisteredQueryHint &query_hint)
void init_hash_join_buff_on_device(int32_t *buff, const int64_t entry_count, const int32_t invalid_slot_val)
int fill_baseline_hash_join_buff(int8_t *hash_buff, const size_t entry_count, const int32_t invalid_slot_val, const bool for_semi_join, const size_t key_component_count, const bool with_val_slot, const KEY_HANDLER *key_handler, const size_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
void range_fill_one_to_many_baseline_hash_table_on_device_64(int32_t *buff, const int64_t *composite_key_dict, const size_t hash_entry_count, const RangeKeyHandler *key_handler, const size_t num_elems)
int range_fill_baseline_hash_join_buff_64(int8_t *hash_buff, const size_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const RangeKeyHandler *key_handler, const size_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
std::pair< std::vector< const int32_t * >, std::vector< int32_t >> StrProxyTranslationMapsPtrsAndOffsets
std::unique_ptr< BaselineHashTable > getHashTable()
static std::string getHashTypeString(HashType ht) noexcept
Definition: HashJoin.h:165
int overlaps_fill_baseline_hash_join_buff_64(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const OverlapsKeyHandler *key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
int range_fill_baseline_hash_join_buff_32(int8_t *hash_buff, const size_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const RangeKeyHandler *key_handler, const size_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
bool isHintRegistered(const QueryHint hint) const
Definition: QueryHint.h:348
DEVICE void SUFFIX() init_hash_join_buff(int32_t *groups_buffer, const int64_t hash_entry_count, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
CUstream getQueryEngineCudaStreamForDevice(int device_num)
Definition: QueryEngine.cpp:7
int overlaps_fill_baseline_hash_join_buff_32(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const OverlapsKeyHandler *key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
#define CHECK(condition)
Definition: Logger.h:291
#define DEBUG_TIMER(name)
Definition: Logger.h:411
void fill_baseline_hash_join_buff_on_device_64(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const bool for_semi_join, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const GenericKeyHandler *key_handler, const int64_t num_elems)
void overlaps_fill_one_to_many_baseline_hash_table_on_device_64(int32_t *buff, const int64_t *composite_key_dict, const int64_t hash_entry_count, const OverlapsKeyHandler *key_handler, const int64_t num_elems)
Allocate GPU memory using GpuBuffers via DataMgr.
int fill_baseline_hash_join_buff_32(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const bool for_semi_join, const size_t key_component_count, const bool with_val_slot, const GenericKeyHandler *key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
int cpu_threads()
Definition: thread_count.h:25
HashType
Definition: HashTable.h:19
ThreadLocalIds thread_local_ids()
Definition: Logger.cpp:874
#define VLOG(n)
Definition: Logger.h:387
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
Definition: HashJoin.h:161