OmniSciDB  cde582ebc3
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
BaselineJoinHashTableBuilder Class Reference

#include <BaselineHashTableBuilder.h>

Public Member Functions

 BaselineJoinHashTableBuilder ()=default
 
template<class KEY_HANDLER >
int initHashTableOnCpu (KEY_HANDLER *key_handler, const CompositeKeyInfo &composite_key_info, const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_bucket_info, const StrProxyTranslationMapsPtrsAndOffsets &str_proxy_translation_maps_ptrs_and_offsets, const size_t keyspace_entry_count, const size_t keys_for_all_rows, const HashType layout, const JoinType join_type, const size_t key_component_width, const size_t key_component_count)
 
void allocateDeviceMemory (const HashType layout, const size_t key_component_width, const size_t key_component_count, const size_t keyspace_entry_count, const size_t emitted_keys_count, const int device_id, const Executor *executor)
 
template<class KEY_HANDLER >
int initHashTableOnGpu (KEY_HANDLER *key_handler, const std::vector< JoinColumn > &join_columns, const HashType layout, const JoinType join_type, const size_t key_component_width, const size_t key_component_count, const size_t keyspace_entry_count, const size_t emitted_keys_count, const int device_id, const Executor *executor)
 
std::unique_ptr
< BaselineHashTable
getHashTable ()
 
void setHashLayout (HashType layout)
 
HashType getHashLayout () const
 

Private Attributes

std::unique_ptr
< BaselineHashTable
hash_table_
 
HashType layout_
 

Detailed Description

Definition at line 254 of file BaselineHashTableBuilder.h.

Constructor & Destructor Documentation

BaselineJoinHashTableBuilder::BaselineJoinHashTableBuilder ( )
default

Member Function Documentation

void BaselineJoinHashTableBuilder::allocateDeviceMemory ( const HashType  layout,
const size_t  key_component_width,
const size_t  key_component_count,
const size_t  keyspace_entry_count,
const size_t  emitted_keys_count,
const int  device_id,
const Executor executor 
)
inline

Definition at line 480 of file BaselineHashTableBuilder.h.

References HashJoin::getHashTypeString(), hash_table_, HashJoin::layoutRequiresAdditionalBuffers(), OneToOne, UNREACHABLE, and VLOG.

Referenced by BaselineJoinHashTable::copyCpuHashTableToGpu(), and initHashTableOnGpu().

486  {
487 #ifdef HAVE_CUDA
488  const auto entry_size =
489  (key_component_count + (layout == HashType::OneToOne ? 1 : 0)) *
490  key_component_width;
491  const size_t one_to_many_hash_entries =
493  ? 2 * keyspace_entry_count + emitted_keys_count
494  : 0;
495  const size_t hash_table_size =
496  entry_size * keyspace_entry_count + one_to_many_hash_entries * sizeof(int32_t);
497 
498  // We can't allocate more than 2GB contiguous memory on GPU and each entry is 4 bytes.
499  if (hash_table_size > static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
500  throw TooManyHashEntries(
501  "Hash tables for GPU requiring larger than 2GB contigious memory not supported "
502  "yet");
503  }
504 
505  VLOG(1) << "Initializing GPU Hash Table for device " << device_id << " with "
506  << keyspace_entry_count << " hash entries and " << one_to_many_hash_entries
507  << " entries in the " << HashJoin::getHashTypeString(layout) << " buffer";
508  VLOG(1) << "Total hash table size: " << hash_table_size << " Bytes";
509 
510  hash_table_ = std::make_unique<BaselineHashTable>(executor->getDataMgr(),
511  layout,
512  keyspace_entry_count,
513  emitted_keys_count,
514  hash_table_size,
515  device_id);
516 #else
517  UNREACHABLE();
518 #endif
519  }
#define UNREACHABLE()
Definition: Logger.h:266
std::unique_ptr< BaselineHashTable > hash_table_
static std::string getHashTypeString(HashType ht) noexcept
Definition: HashJoin.h:154
#define VLOG(n)
Definition: Logger.h:316
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
Definition: HashJoin.h:150

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

HashType BaselineJoinHashTableBuilder::getHashLayout ( ) const
inline

Definition at line 658 of file BaselineHashTableBuilder.h.

References layout_.

std::unique_ptr<BaselineHashTable> BaselineJoinHashTableBuilder::getHashTable ( )
inline

Definition at line 654 of file BaselineHashTableBuilder.h.

References hash_table_.

Referenced by BaselineJoinHashTable::copyCpuHashTableToGpu(), and BaselineJoinHashTable::initHashTableForDevice().

654 { return std::move(hash_table_); }
std::unique_ptr< BaselineHashTable > hash_table_

+ Here is the caller graph for this function:

template<class KEY_HANDLER >
int BaselineJoinHashTableBuilder::initHashTableOnCpu ( KEY_HANDLER *  key_handler,
const CompositeKeyInfo composite_key_info,
const std::vector< JoinColumn > &  join_columns,
const std::vector< JoinColumnTypeInfo > &  join_column_types,
const std::vector< JoinBucketInfo > &  join_bucket_info,
const StrProxyTranslationMapsPtrsAndOffsets str_proxy_translation_maps_ptrs_and_offsets,
const size_t  keyspace_entry_count,
const size_t  keys_for_all_rows,
const HashType  layout,
const JoinType  join_type,
const size_t  key_component_width,
const size_t  key_component_count 
)
inline

Definition at line 258 of file BaselineHashTableBuilder.h.

References ANTI, threading_serial::async(), CHECK, cpu_threads(), DEBUG_TIMER, fill_one_to_many_baseline_hash_table_32(), fill_one_to_many_baseline_hash_table_64(), hash_table_, init_baseline_hash_join_buff_32(), init_baseline_hash_join_buff_64(), init_hash_join_buff(), HashJoin::layoutRequiresAdditionalBuffers(), OneToOne, SEMI, setHashLayout(), and VLOG.

Referenced by BaselineJoinHashTable::initHashTableForDevice(), and OverlapsJoinHashTable::initHashTableOnCpu().

270  {
271  auto timer = DEBUG_TIMER(__func__);
272  const auto entry_size =
273  (key_component_count + (layout == HashType::OneToOne ? 1 : 0)) *
274  key_component_width;
275  const size_t one_to_many_hash_entries =
277  ? 2 * keyspace_entry_count + keys_for_all_rows
278  : 0;
279  const size_t hash_table_size =
280  entry_size * keyspace_entry_count + one_to_many_hash_entries * sizeof(int32_t);
281 
282  // We can't allocate more than 2GB contiguous memory on GPU and each entry is 4 bytes.
283  if (hash_table_size > static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
284  throw TooManyHashEntries(
285  "Hash tables for GPU requiring larger than 2GB contigious memory not supported "
286  "yet");
287  }
288  const bool for_semi_join =
289  (join_type == JoinType::SEMI || join_type == JoinType::ANTI) &&
290  layout == HashType::OneToOne;
291 
292  VLOG(1) << "Initializing CPU Join Hash Table with " << keyspace_entry_count
293  << " hash entries and " << one_to_many_hash_entries
294  << " entries in the one to many buffer";
295  VLOG(1) << "Total hash table size: " << hash_table_size << " Bytes";
296 
297  hash_table_ = std::make_unique<BaselineHashTable>(
298  layout, keyspace_entry_count, keys_for_all_rows, hash_table_size);
299  auto cpu_hash_table_ptr = hash_table_->getCpuBuffer();
300  int thread_count = cpu_threads();
301  std::vector<std::future<void>> init_cpu_buff_threads;
302  setHashLayout(layout);
303  {
304  auto timer_init = DEBUG_TIMER("CPU Baseline-Hash: init_baseline_hash_join_buff_32");
305 #ifdef HAVE_TBB
306  switch (key_component_width) {
307  case 4:
308  init_baseline_hash_join_buff_tbb_32(cpu_hash_table_ptr,
309  keyspace_entry_count,
310  key_component_count,
311  layout == HashType::OneToOne,
312  -1);
313  break;
314  case 8:
315  init_baseline_hash_join_buff_tbb_64(cpu_hash_table_ptr,
316  keyspace_entry_count,
317  key_component_count,
318  layout == HashType::OneToOne,
319  -1);
320  break;
321  default:
322  CHECK(false);
323  }
324 #else // #ifdef HAVE_TBB
325  for (int thread_idx = 0; thread_idx < thread_count; ++thread_idx) {
326  init_cpu_buff_threads.emplace_back(
328  [keyspace_entry_count,
329  key_component_count,
330  key_component_width,
331  thread_idx,
332  thread_count,
333  cpu_hash_table_ptr,
334  layout] {
335  switch (key_component_width) {
336  case 4:
337  init_baseline_hash_join_buff_32(cpu_hash_table_ptr,
338  keyspace_entry_count,
339  key_component_count,
340  layout == HashType::OneToOne,
341  -1,
342  thread_idx,
343  thread_count);
344  break;
345  case 8:
346  init_baseline_hash_join_buff_64(cpu_hash_table_ptr,
347  keyspace_entry_count,
348  key_component_count,
349  layout == HashType::OneToOne,
350  -1,
351  thread_idx,
352  thread_count);
353  break;
354  default:
355  CHECK(false);
356  }
357  }));
358  }
359  for (auto& child : init_cpu_buff_threads) {
360  child.get();
361  }
362 #endif // !HAVE_TBB
363  }
364  std::vector<std::future<int>> fill_cpu_buff_threads;
365  for (int thread_idx = 0; thread_idx < thread_count; ++thread_idx) {
366  fill_cpu_buff_threads.emplace_back(std::async(
368  [key_handler,
369  keyspace_entry_count,
370  &join_columns,
371  key_component_count,
372  key_component_width,
373  layout,
374  thread_idx,
375  cpu_hash_table_ptr,
376  thread_count,
377  for_semi_join] {
378  switch (key_component_width) {
379  case 4: {
380  return fill_baseline_hash_join_buff<int32_t>(cpu_hash_table_ptr,
381  keyspace_entry_count,
382  -1,
383  for_semi_join,
384  key_component_count,
385  layout == HashType::OneToOne,
386  key_handler,
387  join_columns[0].num_elems,
388  thread_idx,
389  thread_count);
390  break;
391  }
392  case 8: {
393  return fill_baseline_hash_join_buff<int64_t>(cpu_hash_table_ptr,
394  keyspace_entry_count,
395  -1,
396  for_semi_join,
397  key_component_count,
398  layout == HashType::OneToOne,
399  key_handler,
400  join_columns[0].num_elems,
401  thread_idx,
402  thread_count);
403  break;
404  }
405  default:
406  CHECK(false);
407  }
408  return -1;
409  }));
410  }
411  int err = 0;
412  for (auto& child : fill_cpu_buff_threads) {
413  int partial_err = child.get();
414  if (partial_err) {
415  err = partial_err;
416  }
417  }
418  if (err) {
419  return err;
420  }
422  auto one_to_many_buff = reinterpret_cast<int32_t*>(
423  cpu_hash_table_ptr + keyspace_entry_count * entry_size);
424  {
425  auto timer_init_additional_buffers =
426  DEBUG_TIMER("CPU Baseline-Hash: Additional Buffers init_hash_join_buff");
427  init_hash_join_buff(one_to_many_buff, keyspace_entry_count, -1, 0, 1);
428  }
429  bool is_geo_compressed = false;
430  if constexpr (std::is_same_v<KEY_HANDLER, RangeKeyHandler>) {
431  if (const auto range_handler =
432  reinterpret_cast<const RangeKeyHandler*>(key_handler)) {
433  is_geo_compressed = range_handler->is_compressed_;
434  }
435  }
436  setHashLayout(layout);
437 
438  switch (key_component_width) {
439  case 4: {
440  const auto composite_key_dict = reinterpret_cast<int32_t*>(cpu_hash_table_ptr);
442  one_to_many_buff,
443  composite_key_dict,
444  keyspace_entry_count,
445  key_component_count,
446  join_columns,
447  join_column_types,
448  join_bucket_info,
449  str_proxy_translation_maps_ptrs_and_offsets.first,
450  str_proxy_translation_maps_ptrs_and_offsets.second,
451  thread_count,
452  std::is_same_v<KEY_HANDLER, RangeKeyHandler>,
453  is_geo_compressed);
454  break;
455  }
456  case 8: {
457  const auto composite_key_dict = reinterpret_cast<int64_t*>(cpu_hash_table_ptr);
459  one_to_many_buff,
460  composite_key_dict,
461  keyspace_entry_count,
462  key_component_count,
463  join_columns,
464  join_column_types,
465  join_bucket_info,
466  str_proxy_translation_maps_ptrs_and_offsets.first,
467  str_proxy_translation_maps_ptrs_and_offsets.second,
468  thread_count,
469  std::is_same_v<KEY_HANDLER, RangeKeyHandler>,
470  is_geo_compressed);
471  break;
472  }
473  default:
474  CHECK(false);
475  }
476  }
477  return err;
478  }
void init_baseline_hash_join_buff_32(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
void init_baseline_hash_join_buff_64(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
std::unique_ptr< BaselineHashTable > hash_table_
future< Result > async(Fn &&fn, Args &&...args)
void fill_one_to_many_baseline_hash_table_64(int32_t *buff, const int64_t *composite_key_dict, const int64_t hash_entry_count, const size_t key_component_count, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_bucket_info, const std::vector< const int32_t * > &sd_inner_to_outer_translation_maps, const std::vector< int32_t > &sd_min_inner_elems, const int32_t cpu_thread_count, const bool is_range_join, const bool is_geo_compressed)
DEVICE void SUFFIX() init_hash_join_buff(int32_t *groups_buffer, const int64_t hash_entry_count, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
#define CHECK(condition)
Definition: Logger.h:222
#define DEBUG_TIMER(name)
Definition: Logger.h:371
void fill_one_to_many_baseline_hash_table_32(int32_t *buff, const int32_t *composite_key_dict, const int64_t hash_entry_count, const size_t key_component_count, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_bucket_info, const std::vector< const int32_t * > &sd_inner_to_outer_translation_maps, const std::vector< int32_t > &sd_min_inner_elems, const int32_t cpu_thread_count, const bool is_range_join, const bool is_geo_compressed)
int cpu_threads()
Definition: thread_count.h:24
#define VLOG(n)
Definition: Logger.h:316
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
Definition: HashJoin.h:150

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<class KEY_HANDLER >
int BaselineJoinHashTableBuilder::initHashTableOnGpu ( KEY_HANDLER *  key_handler,
const std::vector< JoinColumn > &  join_columns,
const HashType  layout,
const JoinType  join_type,
const size_t  key_component_width,
const size_t  key_component_count,
const size_t  keyspace_entry_count,
const size_t  emitted_keys_count,
const int  device_id,
const Executor executor 
)
inline

Definition at line 522 of file BaselineHashTableBuilder.h.

References allocateDeviceMemory(), ANTI, CHECK, DEBUG_TIMER, getQueryEngineCudaStreamForDevice(), hash_table_, init_baseline_hash_join_buff_on_device_32(), init_baseline_hash_join_buff_on_device_64(), init_hash_join_buff_on_device(), HashJoin::layoutRequiresAdditionalBuffers(), OneToOne, SEMI, setHashLayout(), transfer_flat_object_to_gpu(), and UNREACHABLE.

Referenced by BaselineJoinHashTable::initHashTableForDevice().

531  {
532  auto timer = DEBUG_TIMER(__func__);
533  int err = 0;
534 #ifdef HAVE_CUDA
535  allocateDeviceMemory(layout,
536  key_component_width,
537  key_component_count,
538  keyspace_entry_count,
539  emitted_keys_count,
540  device_id,
541  executor);
542  if (!keyspace_entry_count) {
543  // need to "allocate" the empty hash table first
544  CHECK(!emitted_keys_count);
545  return 0;
546  }
547  auto data_mgr = executor->getDataMgr();
548  auto allocator = std::make_unique<CudaAllocator>(
549  data_mgr, device_id, getQueryEngineCudaStreamForDevice(device_id));
550  auto dev_err_buff = allocator->alloc(sizeof(int));
551 
552  allocator->copyToDevice(dev_err_buff, &err, sizeof(err));
553  auto gpu_hash_table_buff = hash_table_->getGpuBuffer();
554  CHECK(gpu_hash_table_buff);
555  const bool for_semi_join =
556  (join_type == JoinType::SEMI || join_type == JoinType::ANTI) &&
557  layout == HashType::OneToOne;
558  setHashLayout(layout);
559  const auto key_handler_gpu = transfer_flat_object_to_gpu(*key_handler, *allocator);
560  switch (key_component_width) {
561  case 4:
562  init_baseline_hash_join_buff_on_device_32(gpu_hash_table_buff,
563  keyspace_entry_count,
564  key_component_count,
565  layout == HashType::OneToOne,
566  -1);
567  break;
568  case 8:
569  init_baseline_hash_join_buff_on_device_64(gpu_hash_table_buff,
570  keyspace_entry_count,
571  key_component_count,
572  layout == HashType::OneToOne,
573  -1);
574  break;
575  default:
576  UNREACHABLE();
577  }
578  switch (key_component_width) {
579  case 4: {
580  fill_baseline_hash_join_buff_on_device<int32_t>(
581  gpu_hash_table_buff,
582  keyspace_entry_count,
583  -1,
584  for_semi_join,
585  key_component_count,
586  layout == HashType::OneToOne,
587  reinterpret_cast<int*>(dev_err_buff),
588  key_handler_gpu,
589  join_columns.front().num_elems);
590  allocator->copyFromDevice(&err, dev_err_buff, sizeof(err));
591  break;
592  }
593  case 8: {
594  fill_baseline_hash_join_buff_on_device<int64_t>(
595  gpu_hash_table_buff,
596  keyspace_entry_count,
597  -1,
598  for_semi_join,
599  key_component_count,
600  layout == HashType::OneToOne,
601  reinterpret_cast<int*>(dev_err_buff),
602  key_handler_gpu,
603  join_columns.front().num_elems);
604  allocator->copyFromDevice(&err, dev_err_buff, sizeof(err));
605  break;
606  }
607  default:
608  UNREACHABLE();
609  }
610  if (err) {
611  return err;
612  }
614  const auto entry_size = key_component_count * key_component_width;
615  auto one_to_many_buff = reinterpret_cast<int32_t*>(
616  gpu_hash_table_buff + keyspace_entry_count * entry_size);
617  init_hash_join_buff_on_device(one_to_many_buff, keyspace_entry_count, -1);
618  setHashLayout(layout);
619  switch (key_component_width) {
620  case 4: {
621  const auto composite_key_dict = reinterpret_cast<int32_t*>(gpu_hash_table_buff);
622  fill_one_to_many_baseline_hash_table_on_device<int32_t>(
623  one_to_many_buff,
624  composite_key_dict,
625  keyspace_entry_count,
626  key_component_count,
627  key_handler_gpu,
628  join_columns.front().num_elems);
629 
630  break;
631  }
632  case 8: {
633  const auto composite_key_dict = reinterpret_cast<int64_t*>(gpu_hash_table_buff);
634  fill_one_to_many_baseline_hash_table_on_device<int64_t>(
635  one_to_many_buff,
636  composite_key_dict,
637  keyspace_entry_count,
638  key_component_count,
639  key_handler_gpu,
640  join_columns.front().num_elems);
641 
642  break;
643  }
644  default:
645  UNREACHABLE();
646  }
647  }
648 #else
649  UNREACHABLE();
650 #endif
651  return err;
652  }
T * transfer_flat_object_to_gpu(const T &object, DeviceAllocator &allocator)
void init_baseline_hash_join_buff_on_device_64(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val)
#define UNREACHABLE()
Definition: Logger.h:266
std::unique_ptr< BaselineHashTable > hash_table_
void init_baseline_hash_join_buff_on_device_32(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val)
void init_hash_join_buff_on_device(int32_t *buff, const int64_t entry_count, const int32_t invalid_slot_val)
CUstream getQueryEngineCudaStreamForDevice(int device_num)
Definition: QueryEngine.cpp:7
void allocateDeviceMemory(const HashType layout, const size_t key_component_width, const size_t key_component_count, const size_t keyspace_entry_count, const size_t emitted_keys_count, const int device_id, const Executor *executor)
#define CHECK(condition)
Definition: Logger.h:222
#define DEBUG_TIMER(name)
Definition: Logger.h:371
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
Definition: HashJoin.h:150

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void BaselineJoinHashTableBuilder::setHashLayout ( HashType  layout)
inline

Definition at line 656 of file BaselineHashTableBuilder.h.

References layout_.

Referenced by initHashTableOnCpu(), and initHashTableOnGpu().

656 { layout_ = layout; }

+ Here is the caller graph for this function:

Member Data Documentation

std::unique_ptr<BaselineHashTable> BaselineJoinHashTableBuilder::hash_table_
private
HashType BaselineJoinHashTableBuilder::layout_
private

Definition at line 662 of file BaselineHashTableBuilder.h.

Referenced by getHashLayout(), and setHashLayout().


The documentation for this class was generated from the following file: