OmniSciDB  16c4e035a1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
BaselineJoinHashTableBuilder Class Reference

#include <BaselineHashTableBuilder.h>

Public Member Functions

 BaselineJoinHashTableBuilder ()=default
 
template<class KEY_HANDLER >
int initHashTableOnCpu (KEY_HANDLER *key_handler, const CompositeKeyInfo &composite_key_info, const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_bucket_info, const size_t keyspace_entry_count, const size_t keys_for_all_rows, const HashType layout, const JoinType join_type, const size_t key_component_width, const size_t key_component_count)
 
void allocateDeviceMemory (const HashType layout, const size_t key_component_width, const size_t key_component_count, const size_t keyspace_entry_count, const size_t emitted_keys_count, const int device_id, const Executor *executor)
 
template<class KEY_HANDLER >
int initHashTableOnGpu (KEY_HANDLER *key_handler, const std::vector< JoinColumn > &join_columns, const HashType layout, const JoinType join_type, const size_t key_component_width, const size_t key_component_count, const size_t keyspace_entry_count, const size_t emitted_keys_count, const int device_id, const Executor *executor)
 
std::unique_ptr
< BaselineHashTable
getHashTable ()
 
void setHashLayout (HashType layout)
 
HashType getHashLayout () const
 

Private Attributes

std::unique_ptr
< BaselineHashTable
hash_table_
 
HashType layout_
 

Detailed Description

Definition at line 262 of file BaselineHashTableBuilder.h.

Constructor & Destructor Documentation

BaselineJoinHashTableBuilder::BaselineJoinHashTableBuilder ( )
default

Member Function Documentation

void BaselineJoinHashTableBuilder::allocateDeviceMemory ( const HashType  layout,
const size_t  key_component_width,
const size_t  key_component_count,
const size_t  keyspace_entry_count,
const size_t  emitted_keys_count,
const int  device_id,
const Executor executor 
)
inline

Definition at line 460 of file BaselineHashTableBuilder.h.

References HashJoin::getHashTypeString(), hash_table_, HashJoin::layoutRequiresAdditionalBuffers(), OneToOne, UNREACHABLE, and VLOG.

Referenced by BaselineJoinHashTable::initHashTableForDevice(), and initHashTableOnGpu().

466  {
467 #ifdef HAVE_CUDA
468  const auto entry_size =
469  (key_component_count + (layout == HashType::OneToOne ? 1 : 0)) *
470  key_component_width;
471  const size_t one_to_many_hash_entries =
473  ? 2 * keyspace_entry_count + emitted_keys_count
474  : 0;
475  const size_t hash_table_size =
476  entry_size * keyspace_entry_count + one_to_many_hash_entries * sizeof(int32_t);
477 
478  // We can't allocate more than 2GB contiguous memory on GPU and each entry is 4 bytes.
479  if (hash_table_size > static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
480  throw TooManyHashEntries(
481  "Hash tables for GPU requiring larger than 2GB contigious memory not supported "
482  "yet");
483  }
484 
485  VLOG(1) << "Initializing GPU Hash Table for device " << device_id << " with "
486  << keyspace_entry_count << " hash entries and " << one_to_many_hash_entries
487  << " entries in the " << HashJoin::getHashTypeString(layout) << " buffer";
488  VLOG(1) << "Total hash table size: " << hash_table_size << " Bytes";
489 
490  hash_table_ = std::make_unique<BaselineHashTable>(executor->getDataMgr(),
491  layout,
492  keyspace_entry_count,
493  emitted_keys_count,
494  hash_table_size,
495  device_id);
496 #else
497  UNREACHABLE();
498 #endif
499  }
#define UNREACHABLE()
Definition: Logger.h:255
std::unique_ptr< BaselineHashTable > hash_table_
static std::string getHashTypeString(HashType ht) noexcept
Definition: HashJoin.h:150
#define VLOG(n)
Definition: Logger.h:305
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
Definition: HashJoin.h:146

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

HashType BaselineJoinHashTableBuilder::getHashLayout ( ) const
inline

Definition at line 639 of file BaselineHashTableBuilder.h.

References layout_.

std::unique_ptr<BaselineHashTable> BaselineJoinHashTableBuilder::getHashTable ( )
inline

Definition at line 635 of file BaselineHashTableBuilder.h.

References hash_table_.

Referenced by BaselineJoinHashTable::initHashTableForDevice().

635 { return std::move(hash_table_); }
std::unique_ptr< BaselineHashTable > hash_table_

+ Here is the caller graph for this function:

template<class KEY_HANDLER >
int BaselineJoinHashTableBuilder::initHashTableOnCpu ( KEY_HANDLER *  key_handler,
const CompositeKeyInfo composite_key_info,
const std::vector< JoinColumn > &  join_columns,
const std::vector< JoinColumnTypeInfo > &  join_column_types,
const std::vector< JoinBucketInfo > &  join_bucket_info,
const size_t  keyspace_entry_count,
const size_t  keys_for_all_rows,
const HashType  layout,
const JoinType  join_type,
const size_t  key_component_width,
const size_t  key_component_count 
)
inline

Definition at line 267 of file BaselineHashTableBuilder.h.

References ANTI, threading_serial::async(), CHECK, cpu_threads(), DEBUG_TIMER, fill_one_to_many_baseline_hash_table_32(), fill_one_to_many_baseline_hash_table_64(), hash_table_, init_baseline_hash_join_buff_32(), init_baseline_hash_join_buff_64(), init_hash_join_buff(), HashJoin::layoutRequiresAdditionalBuffers(), OneToOne, CompositeKeyInfo::sd_inner_proxy_per_key, CompositeKeyInfo::sd_outer_proxy_per_key, SEMI, setHashLayout(), and VLOG.

Referenced by BaselineJoinHashTable::initHashTableForDevice(), and OverlapsJoinHashTable::initHashTableOnCpu().

277  {
278  auto timer = DEBUG_TIMER(__func__);
279  const auto entry_size =
280  (key_component_count + (layout == HashType::OneToOne ? 1 : 0)) *
281  key_component_width;
282  const size_t one_to_many_hash_entries =
284  ? 2 * keyspace_entry_count + keys_for_all_rows
285  : 0;
286  const size_t hash_table_size =
287  entry_size * keyspace_entry_count + one_to_many_hash_entries * sizeof(int32_t);
288 
289  // We can't allocate more than 2GB contiguous memory on GPU and each entry is 4 bytes.
290  if (hash_table_size > static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
291  throw TooManyHashEntries(
292  "Hash tables for GPU requiring larger than 2GB contigious memory not supported "
293  "yet");
294  }
295  const bool for_semi_join =
296  (join_type == JoinType::SEMI || join_type == JoinType::ANTI) &&
297  layout == HashType::OneToOne;
298 
299  VLOG(1) << "Initializing CPU Join Hash Table with " << keyspace_entry_count
300  << " hash entries and " << one_to_many_hash_entries
301  << " entries in the one to many buffer";
302  VLOG(1) << "Total hash table size: " << hash_table_size << " Bytes";
303 
304  hash_table_ = std::make_unique<BaselineHashTable>(
305  layout, keyspace_entry_count, keys_for_all_rows, hash_table_size);
306  auto cpu_hash_table_ptr = hash_table_->getCpuBuffer();
307  int thread_count = cpu_threads();
308  std::vector<std::future<void>> init_cpu_buff_threads;
309  setHashLayout(layout);
310  for (int thread_idx = 0; thread_idx < thread_count; ++thread_idx) {
311  init_cpu_buff_threads.emplace_back(
313  [keyspace_entry_count,
314  key_component_count,
315  key_component_width,
316  thread_idx,
317  thread_count,
318  cpu_hash_table_ptr,
319  layout] {
320  switch (key_component_width) {
321  case 4:
322  init_baseline_hash_join_buff_32(cpu_hash_table_ptr,
323  keyspace_entry_count,
324  key_component_count,
325  layout == HashType::OneToOne,
326  -1,
327  thread_idx,
328  thread_count);
329  break;
330  case 8:
331  init_baseline_hash_join_buff_64(cpu_hash_table_ptr,
332  keyspace_entry_count,
333  key_component_count,
334  layout == HashType::OneToOne,
335  -1,
336  thread_idx,
337  thread_count);
338  break;
339  default:
340  CHECK(false);
341  }
342  }));
343  }
344  for (auto& child : init_cpu_buff_threads) {
345  child.get();
346  }
347  std::vector<std::future<int>> fill_cpu_buff_threads;
348  for (int thread_idx = 0; thread_idx < thread_count; ++thread_idx) {
349  fill_cpu_buff_threads.emplace_back(std::async(
351  [key_handler,
352  keyspace_entry_count,
353  &join_columns,
354  key_component_count,
355  key_component_width,
356  layout,
357  thread_idx,
358  cpu_hash_table_ptr,
359  thread_count,
360  for_semi_join] {
361  switch (key_component_width) {
362  case 4: {
363  return fill_baseline_hash_join_buff<int32_t>(cpu_hash_table_ptr,
364  keyspace_entry_count,
365  -1,
366  for_semi_join,
367  key_component_count,
368  layout == HashType::OneToOne,
369  key_handler,
370  join_columns[0].num_elems,
371  thread_idx,
372  thread_count);
373  break;
374  }
375  case 8: {
376  return fill_baseline_hash_join_buff<int64_t>(cpu_hash_table_ptr,
377  keyspace_entry_count,
378  -1,
379  for_semi_join,
380  key_component_count,
381  layout == HashType::OneToOne,
382  key_handler,
383  join_columns[0].num_elems,
384  thread_idx,
385  thread_count);
386  break;
387  }
388  default:
389  CHECK(false);
390  }
391  return -1;
392  }));
393  }
394  int err = 0;
395  for (auto& child : fill_cpu_buff_threads) {
396  int partial_err = child.get();
397  if (partial_err) {
398  err = partial_err;
399  }
400  }
401  if (err) {
402  return err;
403  }
405  auto one_to_many_buff = reinterpret_cast<int32_t*>(
406  cpu_hash_table_ptr + keyspace_entry_count * entry_size);
407  init_hash_join_buff(one_to_many_buff, keyspace_entry_count, -1, 0, 1);
408  bool is_geo_compressed = false;
409  if constexpr (std::is_same_v<KEY_HANDLER, RangeKeyHandler>) {
410  if (const auto range_handler =
411  reinterpret_cast<const RangeKeyHandler*>(key_handler)) {
412  is_geo_compressed = range_handler->is_compressed_;
413  }
414  }
415  setHashLayout(layout);
416  switch (key_component_width) {
417  case 4: {
418  const auto composite_key_dict = reinterpret_cast<int32_t*>(cpu_hash_table_ptr);
420  one_to_many_buff,
421  composite_key_dict,
422  keyspace_entry_count,
423  -1,
424  key_component_count,
425  join_columns,
426  join_column_types,
427  join_bucket_info,
428  composite_key_info.sd_inner_proxy_per_key,
429  composite_key_info.sd_outer_proxy_per_key,
430  thread_count,
431  std::is_same_v<KEY_HANDLER, RangeKeyHandler>,
432  is_geo_compressed);
433  break;
434  }
435  case 8: {
436  const auto composite_key_dict = reinterpret_cast<int64_t*>(cpu_hash_table_ptr);
438  one_to_many_buff,
439  composite_key_dict,
440  keyspace_entry_count,
441  -1,
442  key_component_count,
443  join_columns,
444  join_column_types,
445  join_bucket_info,
446  composite_key_info.sd_inner_proxy_per_key,
447  composite_key_info.sd_outer_proxy_per_key,
448  thread_count,
449  std::is_same_v<KEY_HANDLER, RangeKeyHandler>,
450  is_geo_compressed);
451  break;
452  }
453  default:
454  CHECK(false);
455  }
456  }
457  return err;
458  }
void init_baseline_hash_join_buff_32(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
std::vector< const void * > sd_inner_proxy_per_key
Definition: HashJoin.h:109
void fill_one_to_many_baseline_hash_table_32(int32_t *buff, const int32_t *composite_key_dict, const int64_t hash_entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_bucket_info, const std::vector< const void * > &sd_inner_proxy_per_key, const std::vector< const void * > &sd_outer_proxy_per_key, const int32_t cpu_thread_count, const bool is_range_join, const bool is_geo_compressed)
void init_baseline_hash_join_buff_64(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
std::unique_ptr< BaselineHashTable > hash_table_
future< Result > async(Fn &&fn, Args &&...args)
std::vector< const void * > sd_outer_proxy_per_key
Definition: HashJoin.h:110
DEVICE void SUFFIX() init_hash_join_buff(int32_t *groups_buffer, const int64_t hash_entry_count, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
void fill_one_to_many_baseline_hash_table_64(int32_t *buff, const int64_t *composite_key_dict, const int64_t hash_entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_bucket_info, const std::vector< const void * > &sd_inner_proxy_per_key, const std::vector< const void * > &sd_outer_proxy_per_key, const int32_t cpu_thread_count, const bool is_range_join, const bool is_geo_compressed)
#define CHECK(condition)
Definition: Logger.h:211
#define DEBUG_TIMER(name)
Definition: Logger.h:358
int cpu_threads()
Definition: thread_count.h:24
#define VLOG(n)
Definition: Logger.h:305
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
Definition: HashJoin.h:146

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<class KEY_HANDLER >
int BaselineJoinHashTableBuilder::initHashTableOnGpu ( KEY_HANDLER *  key_handler,
const std::vector< JoinColumn > &  join_columns,
const HashType  layout,
const JoinType  join_type,
const size_t  key_component_width,
const size_t  key_component_count,
const size_t  keyspace_entry_count,
const size_t  emitted_keys_count,
const int  device_id,
const Executor executor 
)
inline

Definition at line 502 of file BaselineHashTableBuilder.h.

References allocateDeviceMemory(), ANTI, CHECK, DEBUG_TIMER, hash_table_, init_baseline_hash_join_buff_on_device_32(), init_baseline_hash_join_buff_on_device_64(), init_hash_join_buff_on_device(), HashJoin::layoutRequiresAdditionalBuffers(), OneToOne, SEMI, setHashLayout(), transfer_flat_object_to_gpu(), and UNREACHABLE.

Referenced by BaselineJoinHashTable::initHashTableForDevice().

511  {
512  auto timer = DEBUG_TIMER(__func__);
513  int err = 0;
514 #ifdef HAVE_CUDA
515  allocateDeviceMemory(layout,
516  key_component_width,
517  key_component_count,
518  keyspace_entry_count,
519  emitted_keys_count,
520  device_id,
521  executor);
522  if (!keyspace_entry_count) {
523  // need to "allocate" the empty hash table first
524  CHECK(!emitted_keys_count);
525  return 0;
526  }
527  auto data_mgr = executor->getDataMgr();
528  auto allocator = data_mgr->createGpuAllocator(device_id);
529  auto dev_err_buff = allocator->alloc(sizeof(int));
530 
531  allocator->copyToDevice(dev_err_buff, &err, sizeof(err));
532  auto gpu_hash_table_buff = hash_table_->getGpuBuffer();
533  CHECK(gpu_hash_table_buff);
534  const bool for_semi_join =
535  (join_type == JoinType::SEMI || join_type == JoinType::ANTI) &&
536  layout == HashType::OneToOne;
537  setHashLayout(layout);
538  const auto key_handler_gpu = transfer_flat_object_to_gpu(*key_handler, *allocator);
539  switch (key_component_width) {
540  case 4:
541  init_baseline_hash_join_buff_on_device_32(gpu_hash_table_buff,
542  keyspace_entry_count,
543  key_component_count,
544  layout == HashType::OneToOne,
545  -1);
546  break;
547  case 8:
548  init_baseline_hash_join_buff_on_device_64(gpu_hash_table_buff,
549  keyspace_entry_count,
550  key_component_count,
551  layout == HashType::OneToOne,
552  -1);
553  break;
554  default:
555  UNREACHABLE();
556  }
557  switch (key_component_width) {
558  case 4: {
559  fill_baseline_hash_join_buff_on_device<int32_t>(
560  gpu_hash_table_buff,
561  keyspace_entry_count,
562  -1,
563  for_semi_join,
564  key_component_count,
565  layout == HashType::OneToOne,
566  reinterpret_cast<int*>(dev_err_buff),
567  key_handler_gpu,
568  join_columns.front().num_elems);
569  allocator->copyFromDevice(&err, dev_err_buff, sizeof(err));
570  break;
571  }
572  case 8: {
573  fill_baseline_hash_join_buff_on_device<int64_t>(
574  gpu_hash_table_buff,
575  keyspace_entry_count,
576  -1,
577  for_semi_join,
578  key_component_count,
579  layout == HashType::OneToOne,
580  reinterpret_cast<int*>(dev_err_buff),
581  key_handler_gpu,
582  join_columns.front().num_elems);
583  allocator->copyFromDevice(&err, dev_err_buff, sizeof(err));
584  break;
585  }
586  default:
587  UNREACHABLE();
588  }
589  if (err) {
590  return err;
591  }
593  const auto entry_size = key_component_count * key_component_width;
594  auto one_to_many_buff = reinterpret_cast<int32_t*>(
595  gpu_hash_table_buff + keyspace_entry_count * entry_size);
596  init_hash_join_buff_on_device(one_to_many_buff, keyspace_entry_count, -1);
597  setHashLayout(layout);
598  switch (key_component_width) {
599  case 4: {
600  const auto composite_key_dict = reinterpret_cast<int32_t*>(gpu_hash_table_buff);
601  fill_one_to_many_baseline_hash_table_on_device<int32_t>(
602  one_to_many_buff,
603  composite_key_dict,
604  keyspace_entry_count,
605  -1,
606  key_component_count,
607  key_handler_gpu,
608  join_columns.front().num_elems);
609 
610  break;
611  }
612  case 8: {
613  const auto composite_key_dict = reinterpret_cast<int64_t*>(gpu_hash_table_buff);
614  fill_one_to_many_baseline_hash_table_on_device<int64_t>(
615  one_to_many_buff,
616  composite_key_dict,
617  keyspace_entry_count,
618  -1,
619  key_component_count,
620  key_handler_gpu,
621  join_columns.front().num_elems);
622 
623  break;
624  }
625  default:
626  UNREACHABLE();
627  }
628  }
629 #else
630  UNREACHABLE();
631 #endif
632  return err;
633  }
T * transfer_flat_object_to_gpu(const T &object, DeviceAllocator &allocator)
void init_baseline_hash_join_buff_on_device_64(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val)
#define UNREACHABLE()
Definition: Logger.h:255
std::unique_ptr< BaselineHashTable > hash_table_
void init_baseline_hash_join_buff_on_device_32(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val)
void init_hash_join_buff_on_device(int32_t *buff, const int64_t entry_count, const int32_t invalid_slot_val)
void allocateDeviceMemory(const HashType layout, const size_t key_component_width, const size_t key_component_count, const size_t keyspace_entry_count, const size_t emitted_keys_count, const int device_id, const Executor *executor)
#define CHECK(condition)
Definition: Logger.h:211
#define DEBUG_TIMER(name)
Definition: Logger.h:358
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
Definition: HashJoin.h:146

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void BaselineJoinHashTableBuilder::setHashLayout ( HashType  layout)
inline

Definition at line 637 of file BaselineHashTableBuilder.h.

References layout_.

Referenced by initHashTableOnCpu(), and initHashTableOnGpu().

637 { layout_ = layout; }

+ Here is the caller graph for this function:

Member Data Documentation

std::unique_ptr<BaselineHashTable> BaselineJoinHashTableBuilder::hash_table_
private
HashType BaselineJoinHashTableBuilder::layout_
private

Definition at line 643 of file BaselineHashTableBuilder.h.

Referenced by getHashLayout(), and setHashLayout().


The documentation for this class was generated from the following file: