OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
BaselineHashTableBuilder.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
25 #include "Shared/thread_count.h"
26 
27 template <typename SIZE,
28  class KEY_HANDLER,
30 int fill_baseline_hash_join_buff(int8_t* hash_buff,
31  const size_t entry_count,
32  const int32_t invalid_slot_val,
33  const bool for_semi_join,
34  const size_t key_component_count,
35  const bool with_val_slot,
36  const KEY_HANDLER* key_handler,
37  const size_t num_elems,
38  const int32_t cpu_thread_idx,
39  const int32_t cpu_thread_count) {
40  if constexpr (std::is_same<KEY_HANDLER, GenericKeyHandler>::value) {
41  return fill_baseline_hash_join_buff_32(hash_buff,
42  entry_count,
43  invalid_slot_val,
44  for_semi_join,
45  key_component_count,
46  with_val_slot,
47  key_handler,
48  num_elems,
49  cpu_thread_idx,
50  cpu_thread_count);
51  } else if constexpr (std::is_same<KEY_HANDLER, RangeKeyHandler>::value) {
53  entry_count,
54  invalid_slot_val,
55  key_component_count,
56  with_val_slot,
57  key_handler,
58  num_elems,
59  cpu_thread_idx,
60  cpu_thread_count);
61  } else {
62  static_assert(
63  std::is_same<KEY_HANDLER, BoundingBoxIntersectKeyHandler>::value,
64  "Only Generic, Bounding Box Intersect, and Range Key Handlers are supported.");
66  entry_count,
67  invalid_slot_val,
68  key_component_count,
69  with_val_slot,
70  key_handler,
71  num_elems,
72  cpu_thread_idx,
73  cpu_thread_count);
74  }
75 }
76 
77 template <typename SIZE,
78  class KEY_HANDLER,
80 int fill_baseline_hash_join_buff(int8_t* hash_buff,
81  const size_t entry_count,
82  const int32_t invalid_slot_val,
83  const bool for_semi_join,
84  const size_t key_component_count,
85  const bool with_val_slot,
86  const KEY_HANDLER* key_handler,
87  const size_t num_elems,
88  const int32_t cpu_thread_idx,
89  const int32_t cpu_thread_count) {
90  if constexpr (std::is_same<KEY_HANDLER, GenericKeyHandler>::value) {
91  return fill_baseline_hash_join_buff_64(hash_buff,
92  entry_count,
93  invalid_slot_val,
94  for_semi_join,
95  key_component_count,
96  with_val_slot,
97  key_handler,
98  num_elems,
99  cpu_thread_idx,
100  cpu_thread_count);
101  } else if constexpr (std::is_same<KEY_HANDLER, RangeKeyHandler>::value) {
102  return range_fill_baseline_hash_join_buff_64(hash_buff,
103  entry_count,
104  invalid_slot_val,
105  key_component_count,
106  with_val_slot,
107  key_handler,
108  num_elems,
109  cpu_thread_idx,
110  cpu_thread_count);
111  } else {
112  static_assert(
113  std::is_same<KEY_HANDLER, BoundingBoxIntersectKeyHandler>::value,
114  "Only Generic, Bounding Box Intersection, and Range Key Handlers are supported.");
116  entry_count,
117  invalid_slot_val,
118  key_component_count,
119  with_val_slot,
120  key_handler,
121  num_elems,
122  cpu_thread_idx,
123  cpu_thread_count);
124  }
125 }
126 
127 template <typename SIZE,
128  class KEY_HANDLER,
131  const size_t entry_count,
132  const int32_t invalid_slot_val,
133  const bool for_semi_join,
134  const size_t key_component_count,
135  const bool with_val_slot,
136  int* dev_err_buff,
137  const KEY_HANDLER* key_handler,
138  const size_t num_elems) {
139  if constexpr (std::is_same<KEY_HANDLER, GenericKeyHandler>::value) {
141  entry_count,
142  invalid_slot_val,
143  for_semi_join,
144  key_component_count,
145  with_val_slot,
146  dev_err_buff,
147  key_handler,
148  num_elems);
149  } else if constexpr (std::is_same<KEY_HANDLER, RangeKeyHandler>::value) {
150  UNREACHABLE();
151  } else {
152  static_assert(
153  std::is_same<KEY_HANDLER, BoundingBoxIntersectKeyHandler>::value,
154  "Only Generic, Bounding Box Intersection, and Range Key Handlers are supported.");
155  LOG(FATAL) << "32-bit keys not yet supported for bounding box intersect.";
156  }
157 }
158 
159 template <typename SIZE,
160  class KEY_HANDLER,
162 void fill_baseline_hash_join_buff_on_device(int8_t* hash_buff,
163  const size_t entry_count,
164  const int32_t invalid_slot_val,
165  const bool for_semi_join,
166  const size_t key_component_count,
167  const bool with_val_slot,
168  int* dev_err_buff,
169  const KEY_HANDLER* key_handler,
170  const size_t num_elems) {
171  if constexpr (std::is_same<KEY_HANDLER, GenericKeyHandler>::value) {
173  entry_count,
174  invalid_slot_val,
175  for_semi_join,
176  key_component_count,
177  with_val_slot,
178  dev_err_buff,
179  key_handler,
180  num_elems);
181  } else if constexpr (std::is_same<KEY_HANDLER, RangeKeyHandler>::value) {
183  entry_count,
184  invalid_slot_val,
185  key_component_count,
186  with_val_slot,
187  dev_err_buff,
188  key_handler,
189  num_elems);
190  } else {
191  static_assert(
192  std::is_same<KEY_HANDLER, BoundingBoxIntersectKeyHandler>::value,
193  "Only Generic, Bounding Box Intersect, and Range Key Handlers are supported.");
195  entry_count,
196  invalid_slot_val,
197  key_component_count,
198  with_val_slot,
199  dev_err_buff,
200  key_handler,
201  num_elems);
202  }
203 }
204 
205 template <typename SIZE,
206  class KEY_HANDLER,
209  const SIZE* composite_key_dict,
210  const size_t hash_entry_count,
211  const size_t key_component_count,
212  const KEY_HANDLER* key_handler,
213  const size_t num_elems,
214  const bool for_window_framing) {
215  if constexpr (std::is_same<KEY_HANDLER, GenericKeyHandler>::value) {
217  composite_key_dict,
218  hash_entry_count,
219  key_component_count,
220  key_handler,
221  num_elems,
222  for_window_framing);
223  } else {
224  static_assert(
225  std::is_same<KEY_HANDLER, BoundingBoxIntersectKeyHandler>::value ||
226  std::is_same<KEY_HANDLER, RangeKeyHandler>::value,
227  "Only Generic, Bounding Box Intersection, and Range Key Handlers are supported.");
228  LOG(FATAL) << "32-bit keys not yet supported for bounding box intersect.";
229  }
230 }
231 
232 template <typename SIZE,
233  class KEY_HANDLER,
236  const SIZE* composite_key_dict,
237  const size_t hash_entry_count,
238  const size_t key_component_count,
239  const KEY_HANDLER* key_handler,
240  const size_t num_elems,
241  const bool for_window_framing) {
242  if constexpr (std::is_same<KEY_HANDLER, GenericKeyHandler>::value) {
244  composite_key_dict,
245  hash_entry_count,
246  key_handler,
247  num_elems,
248  for_window_framing);
249  } else if constexpr (std::is_same<KEY_HANDLER, RangeKeyHandler>::value) {
251  buff, composite_key_dict, hash_entry_count, key_handler, num_elems);
252  } else {
253  static_assert(
254  std::is_same<KEY_HANDLER, BoundingBoxIntersectKeyHandler>::value,
255  "Only Generic, Bounding Box Intersect, and Range Key Handlers are supported.");
257  buff, composite_key_dict, hash_entry_count, key_handler, num_elems);
258  }
259 }
260 
262  public:
263  BaselineJoinHashTableBuilder() = default;
264  template <class KEY_HANDLER>
265  int initHashTableOnCpu(KEY_HANDLER* key_handler,
266  const CompositeKeyInfo& composite_key_info,
267  const std::vector<JoinColumn>& join_columns,
268  const std::vector<JoinColumnTypeInfo>& join_column_types,
269  const std::vector<JoinBucketInfo>& join_bucket_info,
271  str_proxy_translation_maps_ptrs_and_offsets,
272  const BaselineHashTableEntryInfo hash_table_entry_info,
273  const JoinType join_type,
274  const Executor* executor,
275  const RegisteredQueryHint& query_hint) {
276  auto timer = DEBUG_TIMER(__func__);
277  auto const hash_table_layout = hash_table_entry_info.getHashTableLayout();
278  size_t const hash_table_size = hash_table_entry_info.computeHashTableSize();
280  hash_table_size > query_hint.max_join_hash_table_size) {
281  throw JoinHashTableTooBig(hash_table_size, query_hint.max_join_hash_table_size);
282  }
283  const bool for_semi_join =
284  (join_type == JoinType::SEMI || join_type == JoinType::ANTI) &&
285  hash_table_layout == HashType::OneToOne;
286  hash_table_ = std::make_unique<BaselineHashTable>(
287  MemoryLevel::CPU_LEVEL, hash_table_entry_info, nullptr, -1);
288  setHashLayout(hash_table_layout);
289  if (hash_table_entry_info.getNumKeys() == 0) {
290  VLOG(1) << "Stop building a hash table: the input table is empty";
291  return 0;
292  }
293  auto cpu_hash_table_ptr = hash_table_->getCpuBuffer();
294  int thread_count = cpu_threads();
295  std::vector<std::future<void>> init_cpu_buff_threads;
296  {
297  auto timer_init = DEBUG_TIMER("Initialize CPU Baseline Join Hash Table");
298 #ifdef HAVE_TBB
299  switch (hash_table_entry_info.getJoinKeysSize()) {
300  case 4:
301  init_baseline_hash_join_buff_tbb_32(cpu_hash_table_ptr,
302  hash_table_entry_info.getNumHashEntries(),
303  hash_table_entry_info.getNumJoinKeys(),
304  hash_table_layout == HashType::OneToOne,
305  -1);
306  break;
307  case 8:
308  init_baseline_hash_join_buff_tbb_64(cpu_hash_table_ptr,
309  hash_table_entry_info.getNumHashEntries(),
310  hash_table_entry_info.getNumJoinKeys(),
311  hash_table_layout == HashType::OneToOne,
312  -1);
313  break;
314  default:
315  CHECK(false);
316  }
317 #else // #ifdef HAVE_TBB
318  for (int thread_idx = 0; thread_idx < thread_count; ++thread_idx) {
319  init_cpu_buff_threads.emplace_back(std::async(
321  [keyspace_entry_count,
322  key_component_count,
323  key_component_width,
324  thread_idx,
325  thread_count,
326  cpu_hash_table_ptr,
327  layout,
328  parent_thread_local_ids = logger::thread_local_ids()] {
329  logger::LocalIdsScopeGuard lisg = parent_thread_local_ids.setNewThreadId();
330  DEBUG_TIMER_NEW_THREAD(parent_thread_local_ids.thread_id_);
331  switch (key_component_width) {
332  case 4:
333  init_baseline_hash_join_buff_32(cpu_hash_table_ptr,
334  keyspace_entry_count,
335  key_component_count,
336  layout == HashType::OneToOne,
337  -1,
338  thread_idx,
339  thread_count);
340  break;
341  case 8:
342  init_baseline_hash_join_buff_64(cpu_hash_table_ptr,
343  keyspace_entry_count,
344  key_component_count,
345  layout == HashType::OneToOne,
346  -1,
347  thread_idx,
348  thread_count);
349  break;
350  default:
351  UNREACHABLE();
352  }
353  }));
354  }
355  for (auto& child : init_cpu_buff_threads) {
356  child.get();
357  }
358 #endif // !HAVE_TBB
359  }
360  std::vector<std::future<int>> fill_cpu_buff_threads;
361  auto timer_fill = DEBUG_TIMER("Fill CPU Baseline Join Hash Table");
362  for (int thread_idx = 0; thread_idx < thread_count; ++thread_idx) {
363  fill_cpu_buff_threads.emplace_back(std::async(
365  [key_handler,
366  &join_columns,
367  hash_table_entry_info,
368  thread_idx,
369  cpu_hash_table_ptr,
370  thread_count,
371  for_semi_join,
372  hash_table_layout,
373  parent_thread_local_ids = logger::thread_local_ids()] {
374  logger::LocalIdsScopeGuard lisg = parent_thread_local_ids.setNewThreadId();
375  DEBUG_TIMER_NEW_THREAD(parent_thread_local_ids.thread_id_);
376  switch (hash_table_entry_info.getJoinKeysSize()) {
377  case 4: {
378  return fill_baseline_hash_join_buff<int32_t>(
379  cpu_hash_table_ptr,
380  hash_table_entry_info.getNumHashEntries(),
381  -1,
382  for_semi_join,
383  hash_table_entry_info.getNumJoinKeys(),
384  hash_table_layout == HashType::OneToOne,
385  key_handler,
386  join_columns[0].num_elems,
387  thread_idx,
388  thread_count);
389  }
390  case 8: {
391  return fill_baseline_hash_join_buff<int64_t>(
392  cpu_hash_table_ptr,
393  hash_table_entry_info.getNumHashEntries(),
394  -1,
395  for_semi_join,
396  hash_table_entry_info.getNumJoinKeys(),
397  hash_table_layout == HashType::OneToOne,
398  key_handler,
399  join_columns[0].num_elems,
400  thread_idx,
401  thread_count);
402  }
403  default:
404  UNREACHABLE() << "Unexpected hash join key size: "
405  << hash_table_entry_info.getJoinKeysSize();
406  }
407  return -1;
408  }));
409  }
410  int err = 0;
411  for (auto& child : fill_cpu_buff_threads) {
412  int partial_err = child.get();
413  if (partial_err) {
414  err = partial_err;
415  }
416  }
417  if (err) {
418  return err;
419  }
420  if (HashJoin::layoutRequiresAdditionalBuffers(hash_table_layout)) {
421  auto one_to_many_buff = reinterpret_cast<int32_t*>(
422  cpu_hash_table_ptr + hash_table_entry_info.getNumHashEntries() *
423  hash_table_entry_info.computeKeySize());
424  {
425  auto timer_init_additional_buffers =
426  DEBUG_TIMER("Initialize Additional Buffers for CPU Baseline Join Hash Table");
428  one_to_many_buff, hash_table_entry_info.getNumHashEntries(), -1, 0, 1);
429  }
430  bool is_geo_compressed = false;
431  if constexpr (std::is_same_v<KEY_HANDLER, RangeKeyHandler>) {
432  if (const auto range_handler =
433  reinterpret_cast<const RangeKeyHandler*>(key_handler)) {
434  is_geo_compressed = range_handler->is_compressed_;
435  }
436  }
437  auto timer_fill_additional_buffers =
438  DEBUG_TIMER("Fill Additional Buffers for CPU Baseline Join Hash Table");
439  setHashLayout(hash_table_layout);
440  switch (hash_table_entry_info.getJoinKeysSize()) {
441  case 4: {
442  const auto composite_key_dict = reinterpret_cast<int32_t*>(cpu_hash_table_ptr);
444  one_to_many_buff,
445  composite_key_dict,
446  hash_table_entry_info.getNumHashEntries(),
447  hash_table_entry_info.getNumJoinKeys(),
448  join_columns,
449  join_column_types,
450  join_bucket_info,
451  str_proxy_translation_maps_ptrs_and_offsets.first,
452  str_proxy_translation_maps_ptrs_and_offsets.second,
453  thread_count,
454  std::is_same_v<KEY_HANDLER, RangeKeyHandler>,
455  is_geo_compressed,
456  join_type == JoinType::WINDOW_FUNCTION_FRAMING);
457  break;
458  }
459  case 8: {
460  const auto composite_key_dict = reinterpret_cast<int64_t*>(cpu_hash_table_ptr);
462  one_to_many_buff,
463  composite_key_dict,
464  hash_table_entry_info.getNumHashEntries(),
465  hash_table_entry_info.getNumJoinKeys(),
466  join_columns,
467  join_column_types,
468  join_bucket_info,
469  str_proxy_translation_maps_ptrs_and_offsets.first,
470  str_proxy_translation_maps_ptrs_and_offsets.second,
471  thread_count,
472  std::is_same_v<KEY_HANDLER, RangeKeyHandler>,
473  is_geo_compressed,
474  join_type == JoinType::WINDOW_FUNCTION_FRAMING);
475  break;
476  }
477  default:
478  CHECK(false);
479  }
480  }
481  return 0;
482  }
483 
484  void allocateDeviceMemory(const BaselineHashTableEntryInfo hash_table_entry_info,
485  const int device_id,
486  const Executor* executor,
487  const RegisteredQueryHint& query_hint) {
488 #ifdef HAVE_CUDA
489  const size_t hash_table_size = hash_table_entry_info.computeHashTableSize();
491  hash_table_size > query_hint.max_join_hash_table_size) {
492  throw JoinHashTableTooBig(hash_table_size, query_hint.max_join_hash_table_size);
493  }
494  if (hash_table_size > executor->maxGpuSlabSize()) {
495  throw JoinHashTableTooBig(hash_table_size, executor->maxGpuSlabSize());
496  }
497 
498  hash_table_ = std::make_unique<BaselineHashTable>(
499  MemoryLevel::GPU_LEVEL, hash_table_entry_info, executor->getDataMgr(), device_id);
500 #else
501  UNREACHABLE();
502 #endif
503  }
504 
505  template <class KEY_HANDLER>
506  int initHashTableOnGpu(KEY_HANDLER* key_handler,
507  const std::vector<JoinColumn>& join_columns,
508  const JoinType join_type,
509  const BaselineHashTableEntryInfo hash_table_entry_info,
510  const int device_id,
511  const Executor* executor,
512  const RegisteredQueryHint& query_hint) {
513  auto timer = DEBUG_TIMER(__func__);
514  int err = 0;
515 #ifdef HAVE_CUDA
516  allocateDeviceMemory(hash_table_entry_info, device_id, executor, query_hint);
517  auto const hash_table_layout = hash_table_entry_info.getHashTableLayout();
518  setHashLayout(hash_table_layout);
519  if (hash_table_entry_info.getNumKeys() == 0) {
520  VLOG(1) << "Stop building a hash table based on a column: an input table is empty";
521  return 0;
522  }
523  auto data_mgr = executor->getDataMgr();
524  auto allocator = std::make_unique<CudaAllocator>(
525  data_mgr, device_id, getQueryEngineCudaStreamForDevice(device_id));
526  auto dev_err_buff = allocator->alloc(sizeof(int));
527  allocator->copyToDevice(dev_err_buff, &err, sizeof(err));
528  auto gpu_hash_table_buff = hash_table_->getGpuBuffer();
529  CHECK(gpu_hash_table_buff);
530  const bool for_semi_join =
531  (join_type == JoinType::SEMI || join_type == JoinType::ANTI) &&
532  hash_table_layout == HashType::OneToOne;
533  const auto key_handler_gpu = transfer_flat_object_to_gpu(*key_handler, *allocator);
534  {
535  auto timer_init = DEBUG_TIMER("Initialize GPU Baseline Join Hash Table");
536  switch (hash_table_entry_info.getJoinKeysSize()) {
537  case 4:
539  gpu_hash_table_buff,
540  hash_table_entry_info.getNumHashEntries(),
541  hash_table_entry_info.getNumJoinKeys(),
542  hash_table_layout == HashType::OneToOne,
543  -1);
544  break;
545  case 8:
547  gpu_hash_table_buff,
548  hash_table_entry_info.getNumHashEntries(),
549  hash_table_entry_info.getNumJoinKeys(),
550  hash_table_layout == HashType::OneToOne,
551  -1);
552  break;
553  default:
554  UNREACHABLE();
555  }
556  }
557  auto timer_fill = DEBUG_TIMER("Fill GPU Baseline Join Hash Table");
558  switch (hash_table_entry_info.getJoinKeysSize()) {
559  case 4: {
560  fill_baseline_hash_join_buff_on_device<int32_t>(
561  gpu_hash_table_buff,
562  hash_table_entry_info.getNumHashEntries(),
563  -1,
564  for_semi_join,
565  hash_table_entry_info.getNumJoinKeys(),
566  hash_table_layout == HashType::OneToOne,
567  reinterpret_cast<int*>(dev_err_buff),
568  key_handler_gpu,
569  join_columns.front().num_elems);
570  allocator->copyFromDevice(&err, dev_err_buff, sizeof(err));
571  break;
572  }
573  case 8: {
574  fill_baseline_hash_join_buff_on_device<int64_t>(
575  gpu_hash_table_buff,
576  hash_table_entry_info.getNumHashEntries(),
577  -1,
578  for_semi_join,
579  hash_table_entry_info.getNumJoinKeys(),
580  hash_table_layout == HashType::OneToOne,
581  reinterpret_cast<int*>(dev_err_buff),
582  key_handler_gpu,
583  join_columns.front().num_elems);
584  allocator->copyFromDevice(&err, dev_err_buff, sizeof(err));
585  break;
586  }
587  default:
588  UNREACHABLE();
589  }
590  if (err) {
591  return err;
592  }
593  if (HashJoin::layoutRequiresAdditionalBuffers(hash_table_layout)) {
594  auto one_to_many_buff = reinterpret_cast<int32_t*>(
595  gpu_hash_table_buff + hash_table_entry_info.getNumHashEntries() *
596  hash_table_entry_info.computeKeySize());
597  {
598  auto timer_init_additional_buf =
599  DEBUG_TIMER("Initialize Additional Buffer for GPU Baseline Join Hash Table");
601  one_to_many_buff, hash_table_entry_info.getNumHashEntries(), -1);
602  }
603  setHashLayout(hash_table_layout);
604  auto timer_fill_additional_buf =
605  DEBUG_TIMER("Fill Additional Buffer for GPU Baseline Join Hash Table");
606  switch (hash_table_entry_info.getJoinKeysSize()) {
607  case 4: {
608  const auto composite_key_dict = reinterpret_cast<int32_t*>(gpu_hash_table_buff);
609  fill_one_to_many_baseline_hash_table_on_device<int32_t>(
610  one_to_many_buff,
611  composite_key_dict,
612  hash_table_entry_info.getNumHashEntries(),
613  hash_table_entry_info.getNumJoinKeys(),
614  key_handler_gpu,
615  join_columns.front().num_elems,
616  join_type == JoinType::WINDOW_FUNCTION_FRAMING);
617 
618  break;
619  }
620  case 8: {
621  const auto composite_key_dict = reinterpret_cast<int64_t*>(gpu_hash_table_buff);
622  fill_one_to_many_baseline_hash_table_on_device<int64_t>(
623  one_to_many_buff,
624  composite_key_dict,
625  hash_table_entry_info.getNumHashEntries(),
626  hash_table_entry_info.getNumJoinKeys(),
627  key_handler_gpu,
628  join_columns.front().num_elems,
629  join_type == JoinType::WINDOW_FUNCTION_FRAMING);
630 
631  break;
632  }
633  default:
634  UNREACHABLE();
635  }
636  }
637 #else
638  UNREACHABLE();
639 #endif
640  return err;
641  }
642 
643  std::unique_ptr<BaselineHashTable> getHashTable() {
644  return std::move(hash_table_);
645  }
646 
647  void setHashLayout(HashType layout) {
648  layout_ = layout;
649  }
650 
652  return layout_;
653  }
654 
655  private:
656  std::unique_ptr<BaselineHashTable> hash_table_;
658 };
JoinType
Definition: sqldefs.h:174
void fill_baseline_hash_join_buff_on_device(int8_t *hash_buff, const size_t entry_count, const int32_t invalid_slot_val, const bool for_semi_join, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const KEY_HANDLER *key_handler, const size_t num_elems)
void init_baseline_hash_join_buff_32(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
void fill_one_to_many_baseline_hash_table_64(int32_t *buff, const int64_t *composite_key_dict, const int64_t hash_entry_count, const size_t key_component_count, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_bucket_info, const std::vector< const int32_t * > &sd_inner_to_outer_translation_maps, const std::vector< int32_t > &sd_min_inner_elems, const int32_t cpu_thread_count, const bool is_range_join, const bool is_geo_compressed, const bool for_window_framing)
void fill_one_to_many_baseline_hash_table_on_device(int32_t *buff, const SIZE *composite_key_dict, const size_t hash_entry_count, const size_t key_component_count, const KEY_HANDLER *key_handler, const size_t num_elems, const bool for_window_framing)
size_t computeKeySize() const
T * transfer_flat_object_to_gpu(const T &object, DeviceAllocator &allocator)
#define LOG(tag)
Definition: Logger.h:285
void init_baseline_hash_join_buff_on_device_64(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val)
void fill_baseline_hash_join_buff_on_device_32(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const bool for_semi_join, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const GenericKeyHandler *key_handler, const int64_t num_elems)
void fill_one_to_many_baseline_hash_table_32(int32_t *buff, const int32_t *composite_key_dict, const int64_t hash_entry_count, const size_t key_component_count, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_bucket_info, const std::vector< const int32_t * > &sd_inner_to_outer_translation_maps, const std::vector< int32_t > &sd_min_inner_elems, const int32_t cpu_thread_count, const bool is_range_join, const bool is_geo_compressed, const bool for_window_framing)
#define UNREACHABLE()
Definition: Logger.h:338
void range_fill_baseline_hash_join_buff_on_device_64(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const RangeKeyHandler *key_handler, const size_t num_elems)
#define DEBUG_TIMER_NEW_THREAD(parent_thread_id)
Definition: Logger.h:417
void init_baseline_hash_join_buff_64(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
std::unique_ptr< BaselineHashTable > hash_table_
void allocateDeviceMemory(const BaselineHashTableEntryInfo hash_table_entry_info, const int device_id, const Executor *executor, const RegisteredQueryHint &query_hint)
int initHashTableOnGpu(KEY_HANDLER *key_handler, const std::vector< JoinColumn > &join_columns, const JoinType join_type, const BaselineHashTableEntryInfo hash_table_entry_info, const int device_id, const Executor *executor, const RegisteredQueryHint &query_hint)
size_t max_join_hash_table_size
Definition: QueryHint.h:358
size_t computeHashTableSize() const override
void init_baseline_hash_join_buff_on_device_32(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val)
future< Result > async(Fn &&fn, Args &&...args)
void fill_one_to_many_baseline_hash_table_on_device_32(int32_t *buff, const int32_t *composite_key_dict, const int64_t hash_entry_count, const size_t key_component_count, const GenericKeyHandler *key_handler, const int64_t num_elems, const bool for_window_framing)
HashType getHashTableLayout() const
Definition: HashTable.h:53
size_t getNumHashEntries() const
Definition: HashTable.h:50
void bbox_intersect_fill_one_to_many_baseline_hash_table_on_device_64(int32_t *buff, const int64_t *composite_key_dict, const int64_t hash_entry_count, const BoundingBoxIntersectKeyHandler *key_handler, const int64_t num_elems)
int bbox_intersect_fill_baseline_hash_join_buff_32(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const BoundingBoxIntersectKeyHandler *key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
int bbox_intersect_fill_baseline_hash_join_buff_64(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const BoundingBoxIntersectKeyHandler *key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
void fill_one_to_many_baseline_hash_table_on_device_64(int32_t *buff, const int64_t *composite_key_dict, const int64_t hash_entry_count, const GenericKeyHandler *key_handler, const int64_t num_elems, const bool for_window_framing)
int fill_baseline_hash_join_buff_64(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const bool for_semi_join, const size_t key_component_count, const bool with_val_slot, const GenericKeyHandler *key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
void init_hash_join_buff_on_device(int32_t *buff, const int64_t entry_count, const int32_t invalid_slot_val)
int fill_baseline_hash_join_buff(int8_t *hash_buff, const size_t entry_count, const int32_t invalid_slot_val, const bool for_semi_join, const size_t key_component_count, const bool with_val_slot, const KEY_HANDLER *key_handler, const size_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
void range_fill_one_to_many_baseline_hash_table_on_device_64(int32_t *buff, const int64_t *composite_key_dict, const size_t hash_entry_count, const RangeKeyHandler *key_handler, const size_t num_elems)
size_t getNumKeys() const
Definition: HashTable.h:51
int range_fill_baseline_hash_join_buff_64(int8_t *hash_buff, const size_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const RangeKeyHandler *key_handler, const size_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
std::pair< std::vector< const int32_t * >, std::vector< int32_t >> StrProxyTranslationMapsPtrsAndOffsets
std::unique_ptr< BaselineHashTable > getHashTable()
int range_fill_baseline_hash_join_buff_32(int8_t *hash_buff, const size_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const RangeKeyHandler *key_handler, const size_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
bool isHintRegistered(const QueryHint hint) const
Definition: QueryHint.h:383
int initHashTableOnCpu(KEY_HANDLER *key_handler, const CompositeKeyInfo &composite_key_info, const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_bucket_info, const StrProxyTranslationMapsPtrsAndOffsets &str_proxy_translation_maps_ptrs_and_offsets, const BaselineHashTableEntryInfo hash_table_entry_info, const JoinType join_type, const Executor *executor, const RegisteredQueryHint &query_hint)
DEVICE void SUFFIX() init_hash_join_buff(int32_t *groups_buffer, const int64_t hash_entry_count, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
CUstream getQueryEngineCudaStreamForDevice(int device_num)
Definition: QueryEngine.cpp:7
void bbox_intersect_fill_baseline_hash_join_buff_on_device_64(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const BoundingBoxIntersectKeyHandler *key_handler, const int64_t num_elems)
#define CHECK(condition)
Definition: Logger.h:291
#define DEBUG_TIMER(name)
Definition: Logger.h:412
size_t getNumJoinKeys() const
void fill_baseline_hash_join_buff_on_device_64(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const bool for_semi_join, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const GenericKeyHandler *key_handler, const int64_t num_elems)
Allocate GPU memory using GpuBuffers via DataMgr.
size_t getJoinKeysSize() const
int fill_baseline_hash_join_buff_32(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const bool for_semi_join, const size_t key_component_count, const bool with_val_slot, const GenericKeyHandler *key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
int cpu_threads()
Definition: thread_count.h:25
HashType
Definition: HashTable.h:19
ThreadLocalIds thread_local_ids()
Definition: Logger.cpp:880
#define VLOG(n)
Definition: Logger.h:388
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
Definition: HashJoin.h:175