OmniSciDB  b28c0d5765
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
BaselineHashTableBuilder.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
25 #include "Shared/thread_count.h"
26 
27 template <typename SIZE,
28  class KEY_HANDLER,
30 int fill_baseline_hash_join_buff(int8_t* hash_buff,
31  const size_t entry_count,
32  const int32_t invalid_slot_val,
33  const bool for_semi_join,
34  const size_t key_component_count,
35  const bool with_val_slot,
36  const KEY_HANDLER* key_handler,
37  const size_t num_elems,
38  const int32_t cpu_thread_idx,
39  const int32_t cpu_thread_count) {
40  auto timer = DEBUG_TIMER(__func__);
41  if constexpr (std::is_same<KEY_HANDLER, GenericKeyHandler>::value) {
42  return fill_baseline_hash_join_buff_32(hash_buff,
43  entry_count,
44  invalid_slot_val,
45  for_semi_join,
46  key_component_count,
47  with_val_slot,
48  key_handler,
49  num_elems,
50  cpu_thread_idx,
51  cpu_thread_count);
52  } else if constexpr (std::is_same<KEY_HANDLER, RangeKeyHandler>::value) {
54  entry_count,
55  invalid_slot_val,
56  key_component_count,
57  with_val_slot,
58  key_handler,
59  num_elems,
60  cpu_thread_idx,
61  cpu_thread_count);
62  } else {
63  static_assert(std::is_same<KEY_HANDLER, OverlapsKeyHandler>::value,
64  "Only Generic, Overlaps, and Range Key Handlers are supported.");
66  entry_count,
67  invalid_slot_val,
68  key_component_count,
69  with_val_slot,
70  key_handler,
71  num_elems,
72  cpu_thread_idx,
73  cpu_thread_count);
74  }
75 }
76 
77 template <typename SIZE,
78  class KEY_HANDLER,
80 int fill_baseline_hash_join_buff(int8_t* hash_buff,
81  const size_t entry_count,
82  const int32_t invalid_slot_val,
83  const bool for_semi_join,
84  const size_t key_component_count,
85  const bool with_val_slot,
86  const KEY_HANDLER* key_handler,
87  const size_t num_elems,
88  const int32_t cpu_thread_idx,
89  const int32_t cpu_thread_count) {
90  auto timer = DEBUG_TIMER(__func__);
91  if constexpr (std::is_same<KEY_HANDLER, GenericKeyHandler>::value) {
92  return fill_baseline_hash_join_buff_64(hash_buff,
93  entry_count,
94  invalid_slot_val,
95  for_semi_join,
96  key_component_count,
97  with_val_slot,
98  key_handler,
99  num_elems,
100  cpu_thread_idx,
101  cpu_thread_count);
102  } else if constexpr (std::is_same<KEY_HANDLER, RangeKeyHandler>::value) {
103  return range_fill_baseline_hash_join_buff_64(hash_buff,
104  entry_count,
105  invalid_slot_val,
106  key_component_count,
107  with_val_slot,
108  key_handler,
109  num_elems,
110  cpu_thread_idx,
111  cpu_thread_count);
112  } else {
113  static_assert(std::is_same<KEY_HANDLER, OverlapsKeyHandler>::value,
114  "Only Generic, Overlaps, and Range Key Handlers are supported.");
116  entry_count,
117  invalid_slot_val,
118  key_component_count,
119  with_val_slot,
120  key_handler,
121  num_elems,
122  cpu_thread_idx,
123  cpu_thread_count);
124  }
125 }
126 
127 template <typename SIZE,
128  class KEY_HANDLER,
131  const size_t entry_count,
132  const int32_t invalid_slot_val,
133  const bool for_semi_join,
134  const size_t key_component_count,
135  const bool with_val_slot,
136  int* dev_err_buff,
137  const KEY_HANDLER* key_handler,
138  const size_t num_elems) {
139  auto timer = DEBUG_TIMER(__func__);
140  if constexpr (std::is_same<KEY_HANDLER, GenericKeyHandler>::value) {
142  entry_count,
143  invalid_slot_val,
144  for_semi_join,
145  key_component_count,
146  with_val_slot,
147  dev_err_buff,
148  key_handler,
149  num_elems);
150  } else if constexpr (std::is_same<KEY_HANDLER, RangeKeyHandler>::value) {
151  UNREACHABLE();
152  } else {
153  static_assert(std::is_same<KEY_HANDLER, OverlapsKeyHandler>::value,
154  "Only Generic, Overlaps, and Range Key Handlers are supported.");
155  LOG(FATAL) << "32-bit keys not yet supported for overlaps join.";
156  }
157 }
158 
159 template <typename SIZE,
160  class KEY_HANDLER,
162 void fill_baseline_hash_join_buff_on_device(int8_t* hash_buff,
163  const size_t entry_count,
164  const int32_t invalid_slot_val,
165  const bool for_semi_join,
166  const size_t key_component_count,
167  const bool with_val_slot,
168  int* dev_err_buff,
169  const KEY_HANDLER* key_handler,
170  const size_t num_elems) {
171  auto timer = DEBUG_TIMER(__func__);
172  if constexpr (std::is_same<KEY_HANDLER, GenericKeyHandler>::value) {
174  entry_count,
175  invalid_slot_val,
176  for_semi_join,
177  key_component_count,
178  with_val_slot,
179  dev_err_buff,
180  key_handler,
181  num_elems);
182  } else if constexpr (std::is_same<KEY_HANDLER, RangeKeyHandler>::value) {
184  entry_count,
185  invalid_slot_val,
186  key_component_count,
187  with_val_slot,
188  dev_err_buff,
189  key_handler,
190  num_elems);
191  } else {
192  static_assert(std::is_same<KEY_HANDLER, OverlapsKeyHandler>::value,
193  "Only Generic, Overlaps, and Range Key Handlers are supported.");
195  entry_count,
196  invalid_slot_val,
197  key_component_count,
198  with_val_slot,
199  dev_err_buff,
200  key_handler,
201  num_elems);
202  }
203 }
204 
205 template <typename SIZE,
206  class KEY_HANDLER,
209  const SIZE* composite_key_dict,
210  const size_t hash_entry_count,
211  const size_t key_component_count,
212  const KEY_HANDLER* key_handler,
213  const size_t num_elems) {
214  auto timer = DEBUG_TIMER(__func__);
215  if constexpr (std::is_same<KEY_HANDLER, GenericKeyHandler>::value) {
217  composite_key_dict,
218  hash_entry_count,
219  key_component_count,
220  key_handler,
221  num_elems);
222  } else {
223  static_assert(std::is_same<KEY_HANDLER, OverlapsKeyHandler>::value ||
224  std::is_same<KEY_HANDLER, RangeKeyHandler>::value,
225  "Only Generic, Overlaps, and Range Key Handlers are supported.");
226  LOG(FATAL) << "32-bit keys not yet supported for overlaps join.";
227  }
228 }
229 
230 template <typename SIZE,
231  class KEY_HANDLER,
234  const SIZE* composite_key_dict,
235  const size_t hash_entry_count,
236  const size_t key_component_count,
237  const KEY_HANDLER* key_handler,
238  const size_t num_elems) {
239  auto timer = DEBUG_TIMER(__func__);
240  if constexpr (std::is_same<KEY_HANDLER, GenericKeyHandler>::value) {
242  buff, composite_key_dict, hash_entry_count, key_handler, num_elems);
243  } else if constexpr (std::is_same<KEY_HANDLER, RangeKeyHandler>::value) {
245  buff, composite_key_dict, hash_entry_count, key_handler, num_elems);
246  } else {
247  static_assert(std::is_same<KEY_HANDLER, OverlapsKeyHandler>::value,
248  "Only Generic, Overlaps, and Range Key Handlers are supported.");
250  buff, composite_key_dict, hash_entry_count, key_handler, num_elems);
251  }
252 }
253 
255  public:
256  BaselineJoinHashTableBuilder() = default;
257  template <class KEY_HANDLER>
258  int initHashTableOnCpu(KEY_HANDLER* key_handler,
259  const CompositeKeyInfo& composite_key_info,
260  const std::vector<JoinColumn>& join_columns,
261  const std::vector<JoinColumnTypeInfo>& join_column_types,
262  const std::vector<JoinBucketInfo>& join_bucket_info,
264  str_proxy_translation_maps_ptrs_and_offsets,
265  const size_t keyspace_entry_count,
266  const size_t keys_for_all_rows,
267  const HashType layout,
268  const JoinType join_type,
269  const size_t key_component_width,
270  const size_t key_component_count,
271  const RegisteredQueryHint& query_hint) {
272  auto timer = DEBUG_TIMER(__func__);
273  const auto entry_size =
274  (key_component_count + (layout == HashType::OneToOne ? 1 : 0)) *
275  key_component_width;
276  const size_t one_to_many_hash_entries =
278  ? 2 * keyspace_entry_count + keys_for_all_rows
279  : 0;
280  const size_t hash_table_size =
281  entry_size * keyspace_entry_count + one_to_many_hash_entries * sizeof(int32_t);
282 
284  hash_table_size > query_hint.max_join_hash_table_size) {
285  throw JoinHashTableTooBig(hash_table_size, query_hint.max_join_hash_table_size);
286  }
287 
288  // We can't allocate more than 2GB contiguous memory on GPU and each entry is 4 bytes.
289  if (hash_table_size > static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
290  throw TooManyHashEntries(
291  "Hash tables for GPU requiring larger than 2GB contigious memory not supported "
292  "yet");
293  }
294  const bool for_semi_join =
295  (join_type == JoinType::SEMI || join_type == JoinType::ANTI) &&
296  layout == HashType::OneToOne;
297 
298  VLOG(1) << "Initializing CPU Join Hash Table with " << keyspace_entry_count
299  << " hash entries and " << one_to_many_hash_entries
300  << " entries in the one to many buffer";
301  VLOG(1) << "Total hash table size: " << hash_table_size << " Bytes";
302 
303  hash_table_ = std::make_unique<BaselineHashTable>(
304  layout, keyspace_entry_count, keys_for_all_rows, hash_table_size);
305  auto cpu_hash_table_ptr = hash_table_->getCpuBuffer();
306  int thread_count = cpu_threads();
307  std::vector<std::future<void>> init_cpu_buff_threads;
308  setHashLayout(layout);
309  {
310  auto timer_init = DEBUG_TIMER("CPU Baseline-Hash: init_baseline_hash_join_buff_32");
311 #ifdef HAVE_TBB
312  switch (key_component_width) {
313  case 4:
314  init_baseline_hash_join_buff_tbb_32(cpu_hash_table_ptr,
315  keyspace_entry_count,
316  key_component_count,
317  layout == HashType::OneToOne,
318  -1);
319  break;
320  case 8:
321  init_baseline_hash_join_buff_tbb_64(cpu_hash_table_ptr,
322  keyspace_entry_count,
323  key_component_count,
324  layout == HashType::OneToOne,
325  -1);
326  break;
327  default:
328  CHECK(false);
329  }
330 #else // #ifdef HAVE_TBB
331  for (int thread_idx = 0; thread_idx < thread_count; ++thread_idx) {
332  init_cpu_buff_threads.emplace_back(
334  [keyspace_entry_count,
335  key_component_count,
336  key_component_width,
337  thread_idx,
338  thread_count,
339  cpu_hash_table_ptr,
340  layout] {
341  switch (key_component_width) {
342  case 4:
343  init_baseline_hash_join_buff_32(cpu_hash_table_ptr,
344  keyspace_entry_count,
345  key_component_count,
346  layout == HashType::OneToOne,
347  -1,
348  thread_idx,
349  thread_count);
350  break;
351  case 8:
352  init_baseline_hash_join_buff_64(cpu_hash_table_ptr,
353  keyspace_entry_count,
354  key_component_count,
355  layout == HashType::OneToOne,
356  -1,
357  thread_idx,
358  thread_count);
359  break;
360  default:
361  CHECK(false);
362  }
363  }));
364  }
365  for (auto& child : init_cpu_buff_threads) {
366  child.get();
367  }
368 #endif // !HAVE_TBB
369  }
370  std::vector<std::future<int>> fill_cpu_buff_threads;
371  for (int thread_idx = 0; thread_idx < thread_count; ++thread_idx) {
372  fill_cpu_buff_threads.emplace_back(std::async(
374  [key_handler,
375  keyspace_entry_count,
376  &join_columns,
377  key_component_count,
378  key_component_width,
379  layout,
380  thread_idx,
381  cpu_hash_table_ptr,
382  thread_count,
383  for_semi_join] {
384  switch (key_component_width) {
385  case 4: {
386  return fill_baseline_hash_join_buff<int32_t>(cpu_hash_table_ptr,
387  keyspace_entry_count,
388  -1,
389  for_semi_join,
390  key_component_count,
391  layout == HashType::OneToOne,
392  key_handler,
393  join_columns[0].num_elems,
394  thread_idx,
395  thread_count);
396  break;
397  }
398  case 8: {
399  return fill_baseline_hash_join_buff<int64_t>(cpu_hash_table_ptr,
400  keyspace_entry_count,
401  -1,
402  for_semi_join,
403  key_component_count,
404  layout == HashType::OneToOne,
405  key_handler,
406  join_columns[0].num_elems,
407  thread_idx,
408  thread_count);
409  break;
410  }
411  default:
412  CHECK(false);
413  }
414  return -1;
415  }));
416  }
417  int err = 0;
418  for (auto& child : fill_cpu_buff_threads) {
419  int partial_err = child.get();
420  if (partial_err) {
421  err = partial_err;
422  }
423  }
424  if (err) {
425  return err;
426  }
428  auto one_to_many_buff = reinterpret_cast<int32_t*>(
429  cpu_hash_table_ptr + keyspace_entry_count * entry_size);
430  {
431  auto timer_init_additional_buffers =
432  DEBUG_TIMER("CPU Baseline-Hash: Additional Buffers init_hash_join_buff");
433  init_hash_join_buff(one_to_many_buff, keyspace_entry_count, -1, 0, 1);
434  }
435  bool is_geo_compressed = false;
436  if constexpr (std::is_same_v<KEY_HANDLER, RangeKeyHandler>) {
437  if (const auto range_handler =
438  reinterpret_cast<const RangeKeyHandler*>(key_handler)) {
439  is_geo_compressed = range_handler->is_compressed_;
440  }
441  }
442  setHashLayout(layout);
443 
444  switch (key_component_width) {
445  case 4: {
446  const auto composite_key_dict = reinterpret_cast<int32_t*>(cpu_hash_table_ptr);
448  one_to_many_buff,
449  composite_key_dict,
450  keyspace_entry_count,
451  key_component_count,
452  join_columns,
453  join_column_types,
454  join_bucket_info,
455  str_proxy_translation_maps_ptrs_and_offsets.first,
456  str_proxy_translation_maps_ptrs_and_offsets.second,
457  thread_count,
458  std::is_same_v<KEY_HANDLER, RangeKeyHandler>,
459  is_geo_compressed);
460  break;
461  }
462  case 8: {
463  const auto composite_key_dict = reinterpret_cast<int64_t*>(cpu_hash_table_ptr);
465  one_to_many_buff,
466  composite_key_dict,
467  keyspace_entry_count,
468  key_component_count,
469  join_columns,
470  join_column_types,
471  join_bucket_info,
472  str_proxy_translation_maps_ptrs_and_offsets.first,
473  str_proxy_translation_maps_ptrs_and_offsets.second,
474  thread_count,
475  std::is_same_v<KEY_HANDLER, RangeKeyHandler>,
476  is_geo_compressed);
477  break;
478  }
479  default:
480  CHECK(false);
481  }
482  }
483  return err;
484  }
485 
486  void allocateDeviceMemory(const HashType layout,
487  const size_t key_component_width,
488  const size_t key_component_count,
489  const size_t keyspace_entry_count,
490  const size_t emitted_keys_count,
491  const int device_id,
492  const Executor* executor,
493  const RegisteredQueryHint& query_hint) {
494 #ifdef HAVE_CUDA
495  const auto entry_size =
496  (key_component_count + (layout == HashType::OneToOne ? 1 : 0)) *
497  key_component_width;
498  const size_t one_to_many_hash_entries =
500  ? 2 * keyspace_entry_count + emitted_keys_count
501  : 0;
502  const size_t hash_table_size =
503  entry_size * keyspace_entry_count + one_to_many_hash_entries * sizeof(int32_t);
504 
506  hash_table_size > query_hint.max_join_hash_table_size) {
507  throw JoinHashTableTooBig(hash_table_size, query_hint.max_join_hash_table_size);
508  }
509 
510  // We can't allocate more than 2GB contiguous memory on GPU and each entry is 4 bytes.
511  if (hash_table_size > static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
512  throw TooManyHashEntries(
513  "Hash tables for GPU requiring larger than 2GB contigious memory not supported "
514  "yet");
515  }
516 
517  VLOG(1) << "Initializing GPU Hash Table for device " << device_id << " with "
518  << keyspace_entry_count << " hash entries and " << one_to_many_hash_entries
519  << " entries in the " << HashJoin::getHashTypeString(layout) << " buffer";
520  VLOG(1) << "Total hash table size: " << hash_table_size << " Bytes";
521 
522  hash_table_ = std::make_unique<BaselineHashTable>(executor->getDataMgr(),
523  layout,
524  keyspace_entry_count,
525  emitted_keys_count,
526  hash_table_size,
527  device_id);
528 #else
529  UNREACHABLE();
530 #endif
531  }
532 
533  template <class KEY_HANDLER>
534  int initHashTableOnGpu(KEY_HANDLER* key_handler,
535  const std::vector<JoinColumn>& join_columns,
536  const HashType layout,
537  const JoinType join_type,
538  const size_t key_component_width,
539  const size_t key_component_count,
540  const size_t keyspace_entry_count,
541  const size_t emitted_keys_count,
542  const int device_id,
543  const Executor* executor,
544  const RegisteredQueryHint& query_hint) {
545  auto timer = DEBUG_TIMER(__func__);
546  int err = 0;
547 #ifdef HAVE_CUDA
548  allocateDeviceMemory(layout,
549  key_component_width,
550  key_component_count,
551  keyspace_entry_count,
552  emitted_keys_count,
553  device_id,
554  executor,
555  query_hint);
556  if (!keyspace_entry_count) {
557  // need to "allocate" the empty hash table first
558  CHECK(!emitted_keys_count);
559  return 0;
560  }
561  auto data_mgr = executor->getDataMgr();
562  auto allocator = std::make_unique<CudaAllocator>(
563  data_mgr, device_id, getQueryEngineCudaStreamForDevice(device_id));
564  auto dev_err_buff = allocator->alloc(sizeof(int));
565 
566  allocator->copyToDevice(dev_err_buff, &err, sizeof(err));
567  auto gpu_hash_table_buff = hash_table_->getGpuBuffer();
568  CHECK(gpu_hash_table_buff);
569  const bool for_semi_join =
570  (join_type == JoinType::SEMI || join_type == JoinType::ANTI) &&
571  layout == HashType::OneToOne;
572  setHashLayout(layout);
573  const auto key_handler_gpu = transfer_flat_object_to_gpu(*key_handler, *allocator);
574  switch (key_component_width) {
575  case 4:
576  init_baseline_hash_join_buff_on_device_32(gpu_hash_table_buff,
577  keyspace_entry_count,
578  key_component_count,
579  layout == HashType::OneToOne,
580  -1);
581  break;
582  case 8:
583  init_baseline_hash_join_buff_on_device_64(gpu_hash_table_buff,
584  keyspace_entry_count,
585  key_component_count,
586  layout == HashType::OneToOne,
587  -1);
588  break;
589  default:
590  UNREACHABLE();
591  }
592  switch (key_component_width) {
593  case 4: {
594  fill_baseline_hash_join_buff_on_device<int32_t>(
595  gpu_hash_table_buff,
596  keyspace_entry_count,
597  -1,
598  for_semi_join,
599  key_component_count,
600  layout == HashType::OneToOne,
601  reinterpret_cast<int*>(dev_err_buff),
602  key_handler_gpu,
603  join_columns.front().num_elems);
604  allocator->copyFromDevice(&err, dev_err_buff, sizeof(err));
605  break;
606  }
607  case 8: {
608  fill_baseline_hash_join_buff_on_device<int64_t>(
609  gpu_hash_table_buff,
610  keyspace_entry_count,
611  -1,
612  for_semi_join,
613  key_component_count,
614  layout == HashType::OneToOne,
615  reinterpret_cast<int*>(dev_err_buff),
616  key_handler_gpu,
617  join_columns.front().num_elems);
618  allocator->copyFromDevice(&err, dev_err_buff, sizeof(err));
619  break;
620  }
621  default:
622  UNREACHABLE();
623  }
624  if (err) {
625  return err;
626  }
628  const auto entry_size = key_component_count * key_component_width;
629  auto one_to_many_buff = reinterpret_cast<int32_t*>(
630  gpu_hash_table_buff + keyspace_entry_count * entry_size);
631  init_hash_join_buff_on_device(one_to_many_buff, keyspace_entry_count, -1);
632  setHashLayout(layout);
633  switch (key_component_width) {
634  case 4: {
635  const auto composite_key_dict = reinterpret_cast<int32_t*>(gpu_hash_table_buff);
636  fill_one_to_many_baseline_hash_table_on_device<int32_t>(
637  one_to_many_buff,
638  composite_key_dict,
639  keyspace_entry_count,
640  key_component_count,
641  key_handler_gpu,
642  join_columns.front().num_elems);
643 
644  break;
645  }
646  case 8: {
647  const auto composite_key_dict = reinterpret_cast<int64_t*>(gpu_hash_table_buff);
648  fill_one_to_many_baseline_hash_table_on_device<int64_t>(
649  one_to_many_buff,
650  composite_key_dict,
651  keyspace_entry_count,
652  key_component_count,
653  key_handler_gpu,
654  join_columns.front().num_elems);
655 
656  break;
657  }
658  default:
659  UNREACHABLE();
660  }
661  }
662 #else
663  UNREACHABLE();
664 #endif
665  return err;
666  }
667 
668  std::unique_ptr<BaselineHashTable> getHashTable() { return std::move(hash_table_); }
669 
670  void setHashLayout(HashType layout) { layout_ = layout; }
671 
672  HashType getHashLayout() const { return layout_; }
673 
674  private:
675  std::unique_ptr<BaselineHashTable> hash_table_;
677 };
JoinType
Definition: sqldefs.h:164
void fill_baseline_hash_join_buff_on_device(int8_t *hash_buff, const size_t entry_count, const int32_t invalid_slot_val, const bool for_semi_join, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const KEY_HANDLER *key_handler, const size_t num_elems)
void fill_one_to_many_baseline_hash_table_on_device_64(int32_t *buff, const int64_t *composite_key_dict, const int64_t hash_entry_count, const GenericKeyHandler *key_handler, const int64_t num_elems)
void init_baseline_hash_join_buff_32(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
void fill_one_to_many_baseline_hash_table_on_device(int32_t *buff, const SIZE *composite_key_dict, const size_t hash_entry_count, const size_t key_component_count, const KEY_HANDLER *key_handler, const size_t num_elems)
T * transfer_flat_object_to_gpu(const T &object, DeviceAllocator &allocator)
#define LOG(tag)
Definition: Logger.h:216
void init_baseline_hash_join_buff_on_device_64(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val)
void fill_baseline_hash_join_buff_on_device_32(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const bool for_semi_join, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const GenericKeyHandler *key_handler, const int64_t num_elems)
#define UNREACHABLE()
Definition: Logger.h:266
void range_fill_baseline_hash_join_buff_on_device_64(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const RangeKeyHandler *key_handler, const size_t num_elems)
int initHashTableOnGpu(KEY_HANDLER *key_handler, const std::vector< JoinColumn > &join_columns, const HashType layout, const JoinType join_type, const size_t key_component_width, const size_t key_component_count, const size_t keyspace_entry_count, const size_t emitted_keys_count, const int device_id, const Executor *executor, const RegisteredQueryHint &query_hint)
int initHashTableOnCpu(KEY_HANDLER *key_handler, const CompositeKeyInfo &composite_key_info, const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_bucket_info, const StrProxyTranslationMapsPtrsAndOffsets &str_proxy_translation_maps_ptrs_and_offsets, const size_t keyspace_entry_count, const size_t keys_for_all_rows, const HashType layout, const JoinType join_type, const size_t key_component_width, const size_t key_component_count, const RegisteredQueryHint &query_hint)
void init_baseline_hash_join_buff_64(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
std::unique_ptr< BaselineHashTable > hash_table_
size_t max_join_hash_table_size
Definition: QueryHint.h:325
void overlaps_fill_baseline_hash_join_buff_on_device_64(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const OverlapsKeyHandler *key_handler, const int64_t num_elems)
void fill_one_to_many_baseline_hash_table_on_device_32(int32_t *buff, const int32_t *composite_key_dict, const int64_t hash_entry_count, const size_t key_component_count, const GenericKeyHandler *key_handler, const int64_t num_elems)
void init_baseline_hash_join_buff_on_device_32(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val)
future< Result > async(Fn &&fn, Args &&...args)
void fill_one_to_many_baseline_hash_table_64(int32_t *buff, const int64_t *composite_key_dict, const int64_t hash_entry_count, const size_t key_component_count, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_bucket_info, const std::vector< const int32_t * > &sd_inner_to_outer_translation_maps, const std::vector< int32_t > &sd_min_inner_elems, const int32_t cpu_thread_count, const bool is_range_join, const bool is_geo_compressed)
int fill_baseline_hash_join_buff_64(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const bool for_semi_join, const size_t key_component_count, const bool with_val_slot, const GenericKeyHandler *key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
void allocateDeviceMemory(const HashType layout, const size_t key_component_width, const size_t key_component_count, const size_t keyspace_entry_count, const size_t emitted_keys_count, const int device_id, const Executor *executor, const RegisteredQueryHint &query_hint)
void init_hash_join_buff_on_device(int32_t *buff, const int64_t entry_count, const int32_t invalid_slot_val)
int fill_baseline_hash_join_buff(int8_t *hash_buff, const size_t entry_count, const int32_t invalid_slot_val, const bool for_semi_join, const size_t key_component_count, const bool with_val_slot, const KEY_HANDLER *key_handler, const size_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
void range_fill_one_to_many_baseline_hash_table_on_device_64(int32_t *buff, const int64_t *composite_key_dict, const size_t hash_entry_count, const RangeKeyHandler *key_handler, const size_t num_elems)
int range_fill_baseline_hash_join_buff_64(int8_t *hash_buff, const size_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const RangeKeyHandler *key_handler, const size_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
std::pair< std::vector< const int32_t * >, std::vector< int32_t >> StrProxyTranslationMapsPtrsAndOffsets
std::unique_ptr< BaselineHashTable > getHashTable()
static std::string getHashTypeString(HashType ht) noexcept
Definition: HashJoin.h:164
int overlaps_fill_baseline_hash_join_buff_64(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const OverlapsKeyHandler *key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
int range_fill_baseline_hash_join_buff_32(int8_t *hash_buff, const size_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const RangeKeyHandler *key_handler, const size_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
bool isHintRegistered(const QueryHint hint) const
Definition: QueryHint.h:348
DEVICE void SUFFIX() init_hash_join_buff(int32_t *groups_buffer, const int64_t hash_entry_count, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
CUstream getQueryEngineCudaStreamForDevice(int device_num)
Definition: QueryEngine.cpp:7
int overlaps_fill_baseline_hash_join_buff_32(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const OverlapsKeyHandler *key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
#define CHECK(condition)
Definition: Logger.h:222
#define DEBUG_TIMER(name)
Definition: Logger.h:374
void fill_baseline_hash_join_buff_on_device_64(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const bool for_semi_join, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const GenericKeyHandler *key_handler, const int64_t num_elems)
void overlaps_fill_one_to_many_baseline_hash_table_on_device_64(int32_t *buff, const int64_t *composite_key_dict, const int64_t hash_entry_count, const OverlapsKeyHandler *key_handler, const int64_t num_elems)
void fill_one_to_many_baseline_hash_table_32(int32_t *buff, const int32_t *composite_key_dict, const int64_t hash_entry_count, const size_t key_component_count, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_bucket_info, const std::vector< const int32_t * > &sd_inner_to_outer_translation_maps, const std::vector< int32_t > &sd_min_inner_elems, const int32_t cpu_thread_count, const bool is_range_join, const bool is_geo_compressed)
Allocate GPU memory using GpuBuffers via DataMgr.
int fill_baseline_hash_join_buff_32(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const bool for_semi_join, const size_t key_component_count, const bool with_val_slot, const GenericKeyHandler *key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
int cpu_threads()
Definition: thread_count.h:25
HashType
Definition: HashTable.h:19
#define VLOG(n)
Definition: Logger.h:316
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
Definition: HashJoin.h:160