OmniSciDB  ba1bac9284
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
BaselineJoinHashTableBuilder Class Reference

#include <BaselineHashTableBuilder.h>

+ Collaboration diagram for BaselineJoinHashTableBuilder:

Public Member Functions

 BaselineJoinHashTableBuilder (const Catalog_Namespace::Catalog *catalog)
 
template<class KEY_HANDLER >
int initHashTableOnCpu (KEY_HANDLER *key_handler, const CompositeKeyInfo &composite_key_info, const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_bucket_info, const size_t keyspace_entry_count, const size_t keys_for_all_rows, const HashType layout, const JoinType join_type, const size_t key_component_width, const size_t key_component_count)
 
void allocateDeviceMemory (const HashType layout, const size_t key_component_width, const size_t key_component_count, const size_t keyspace_entry_count, const size_t emitted_keys_count, const int device_id)
 
template<class KEY_HANDLER >
int initHashTableOnGpu (KEY_HANDLER *key_handler, const std::vector< JoinColumn > &join_columns, const HashType layout, const JoinType join_type, const size_t key_component_width, const size_t key_component_count, const size_t keyspace_entry_count, const size_t emitted_keys_count, const int device_id)
 
std::unique_ptr
< BaselineHashTable
getHashTable ()
 

Private Attributes

const Catalog_Namespace::Catalogcatalog_
 
std::unique_ptr
< BaselineHashTable
hash_table_
 

Detailed Description

Definition at line 223 of file BaselineHashTableBuilder.h.

Constructor & Destructor Documentation

BaselineJoinHashTableBuilder::BaselineJoinHashTableBuilder ( const Catalog_Namespace::Catalog catalog)
inline

Definition at line 225 of file BaselineHashTableBuilder.h.

226  : catalog_(catalog) {}
const Catalog_Namespace::Catalog * catalog_

Member Function Documentation

void BaselineJoinHashTableBuilder::allocateDeviceMemory ( const HashType  layout,
const size_t  key_component_width,
const size_t  key_component_count,
const size_t  keyspace_entry_count,
const size_t  emitted_keys_count,
const int  device_id 
)
inline

Definition at line 409 of file BaselineHashTableBuilder.h.

References catalog_, hash_table_, HashJoin::layoutRequiresAdditionalBuffers(), OneToOne, UNREACHABLE, and VLOG.

Referenced by BaselineJoinHashTable::initHashTableForDevice(), and initHashTableOnGpu().

414  {
415 #ifdef HAVE_CUDA
416  const auto entry_size =
417  (key_component_count + (layout == HashType::OneToOne ? 1 : 0)) *
418  key_component_width;
419  const size_t one_to_many_hash_entries =
421  ? 2 * keyspace_entry_count + emitted_keys_count
422  : 0;
423  const size_t hash_table_size =
424  entry_size * keyspace_entry_count + one_to_many_hash_entries * sizeof(int32_t);
425 
426  // We can't allocate more than 2GB contiguous memory on GPU and each entry is 4 bytes.
427  if (hash_table_size > static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
428  throw TooManyHashEntries(
429  "Hash tables for GPU requiring larger than 2GB contigious memory not supported "
430  "yet");
431  }
432 
433  VLOG(1) << "Initializing GPU Hash Table for device " << device_id << " with "
434  << keyspace_entry_count << " hash entries and " << one_to_many_hash_entries
435  << " entries in the one to many buffer";
436  VLOG(1) << "Total hash table size: " << hash_table_size << " Bytes";
437 
438  hash_table_ = std::make_unique<BaselineHashTable>(catalog_,
439  layout,
440  keyspace_entry_count,
441  emitted_keys_count,
442  hash_table_size,
443  device_id);
444 #else
445  UNREACHABLE();
446 #endif
447  }
const Catalog_Namespace::Catalog * catalog_
#define UNREACHABLE()
Definition: Logger.h:250
std::unique_ptr< BaselineHashTable > hash_table_
#define VLOG(n)
Definition: Logger.h:300
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
Definition: HashJoin.h:129

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::unique_ptr<BaselineHashTable> BaselineJoinHashTableBuilder::getHashTable ( )
inline

Definition at line 579 of file BaselineHashTableBuilder.h.

References hash_table_.

Referenced by BaselineJoinHashTable::initHashTableForDevice(), and OverlapsJoinHashTable::initHashTableOnCpu().

579 { return std::move(hash_table_); }
std::unique_ptr< BaselineHashTable > hash_table_

+ Here is the caller graph for this function:

template<class KEY_HANDLER >
int BaselineJoinHashTableBuilder::initHashTableOnCpu ( KEY_HANDLER *  key_handler,
const CompositeKeyInfo composite_key_info,
const std::vector< JoinColumn > &  join_columns,
const std::vector< JoinColumnTypeInfo > &  join_column_types,
const std::vector< JoinBucketInfo > &  join_bucket_info,
const size_t  keyspace_entry_count,
const size_t  keys_for_all_rows,
const HashType  layout,
const JoinType  join_type,
const size_t  key_component_width,
const size_t  key_component_count 
)
inline

Definition at line 229 of file BaselineHashTableBuilder.h.

References ANTI, catalog_, CHECK, cpu_threads(), DEBUG_TIMER, fill_one_to_many_baseline_hash_table_32(), fill_one_to_many_baseline_hash_table_64(), hash_table_, init_baseline_hash_join_buff_32(), init_baseline_hash_join_buff_64(), init_hash_join_buff(), HashJoin::layoutRequiresAdditionalBuffers(), OneToOne, CompositeKeyInfo::sd_inner_proxy_per_key, CompositeKeyInfo::sd_outer_proxy_per_key, SEMI, and VLOG.

Referenced by BaselineJoinHashTable::initHashTableForDevice(), and OverlapsJoinHashTable::initHashTableOnCpu().

239  {
240  auto timer = DEBUG_TIMER(__func__);
241  const auto entry_size =
242  (key_component_count + (layout == HashType::OneToOne ? 1 : 0)) *
243  key_component_width;
244  const size_t one_to_many_hash_entries =
246  ? 2 * keyspace_entry_count + keys_for_all_rows
247  : 0;
248  const size_t hash_table_size =
249  entry_size * keyspace_entry_count + one_to_many_hash_entries * sizeof(int32_t);
250 
251  // We can't allocate more than 2GB contiguous memory on GPU and each entry is 4 bytes.
252  if (hash_table_size > static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
253  throw TooManyHashEntries(
254  "Hash tables for GPU requiring larger than 2GB contigious memory not supported "
255  "yet");
256  }
257  const bool for_semi_join =
258  (join_type == JoinType::SEMI || join_type == JoinType::ANTI) &&
259  layout == HashType::OneToOne;
260 
261  VLOG(1) << "Initializing CPU Join Hash Table with " << keyspace_entry_count
262  << " hash entries and " << one_to_many_hash_entries
263  << " entries in the one to many buffer";
264  VLOG(1) << "Total hash table size: " << hash_table_size << " Bytes";
265 
266  hash_table_ = std::make_unique<BaselineHashTable>(
267  catalog_, layout, keyspace_entry_count, keys_for_all_rows, hash_table_size);
268  auto cpu_hash_table_ptr = hash_table_->getCpuBuffer();
269  int thread_count = cpu_threads();
270  std::vector<std::future<void>> init_cpu_buff_threads;
271  for (int thread_idx = 0; thread_idx < thread_count; ++thread_idx) {
272  init_cpu_buff_threads.emplace_back(
273  std::async(std::launch::async,
274  [keyspace_entry_count,
275  key_component_count,
276  key_component_width,
277  thread_idx,
278  thread_count,
279  cpu_hash_table_ptr,
280  layout] {
281  switch (key_component_width) {
282  case 4:
283  init_baseline_hash_join_buff_32(cpu_hash_table_ptr,
284  keyspace_entry_count,
285  key_component_count,
286  layout == HashType::OneToOne,
287  -1,
288  thread_idx,
289  thread_count);
290  break;
291  case 8:
292  init_baseline_hash_join_buff_64(cpu_hash_table_ptr,
293  keyspace_entry_count,
294  key_component_count,
295  layout == HashType::OneToOne,
296  -1,
297  thread_idx,
298  thread_count);
299  break;
300  default:
301  CHECK(false);
302  }
303  }));
304  }
305  for (auto& child : init_cpu_buff_threads) {
306  child.get();
307  }
308  std::vector<std::future<int>> fill_cpu_buff_threads;
309  for (int thread_idx = 0; thread_idx < thread_count; ++thread_idx) {
310  fill_cpu_buff_threads.emplace_back(std::async(
311  std::launch::async,
312  [key_handler,
313  keyspace_entry_count,
314  &join_columns,
315  key_component_count,
316  key_component_width,
317  layout,
318  thread_idx,
319  cpu_hash_table_ptr,
320  thread_count,
321  for_semi_join] {
322  switch (key_component_width) {
323  case 4: {
324  return fill_baseline_hash_join_buff<int32_t>(cpu_hash_table_ptr,
325  keyspace_entry_count,
326  -1,
327  for_semi_join,
328  key_component_count,
329  layout == HashType::OneToOne,
330  key_handler,
331  join_columns[0].num_elems,
332  thread_idx,
333  thread_count);
334  break;
335  }
336  case 8: {
337  return fill_baseline_hash_join_buff<int64_t>(cpu_hash_table_ptr,
338  keyspace_entry_count,
339  -1,
340  for_semi_join,
341  key_component_count,
342  layout == HashType::OneToOne,
343  key_handler,
344  join_columns[0].num_elems,
345  thread_idx,
346  thread_count);
347  break;
348  }
349  default:
350  CHECK(false);
351  }
352  return -1;
353  }));
354  }
355  int err = 0;
356  for (auto& child : fill_cpu_buff_threads) {
357  int partial_err = child.get();
358  if (partial_err) {
359  err = partial_err;
360  }
361  }
362  if (err) {
363  return err;
364  }
366  auto one_to_many_buff = reinterpret_cast<int32_t*>(
367  cpu_hash_table_ptr + keyspace_entry_count * entry_size);
368  init_hash_join_buff(one_to_many_buff, keyspace_entry_count, -1, 0, 1);
369  switch (key_component_width) {
370  case 4: {
371  const auto composite_key_dict = reinterpret_cast<int32_t*>(cpu_hash_table_ptr);
373  one_to_many_buff,
374  composite_key_dict,
375  keyspace_entry_count,
376  -1,
377  key_component_count,
378  join_columns,
379  join_column_types,
380  join_bucket_info,
381  composite_key_info.sd_inner_proxy_per_key,
382  composite_key_info.sd_outer_proxy_per_key,
383  thread_count);
384  break;
385  }
386  case 8: {
387  const auto composite_key_dict = reinterpret_cast<int64_t*>(cpu_hash_table_ptr);
389  one_to_many_buff,
390  composite_key_dict,
391  keyspace_entry_count,
392  -1,
393  key_component_count,
394  join_columns,
395  join_column_types,
396  join_bucket_info,
397  composite_key_info.sd_inner_proxy_per_key,
398  composite_key_info.sd_outer_proxy_per_key,
399  thread_count);
400  break;
401  }
402  default:
403  CHECK(false);
404  }
405  }
406  return err;
407  }
void init_baseline_hash_join_buff_32(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
void fill_one_to_many_baseline_hash_table_64(int32_t *buff, const int64_t *composite_key_dict, const int64_t hash_entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_bucket_info, const std::vector< const void * > &sd_inner_proxy_per_key, const std::vector< const void * > &sd_outer_proxy_per_key, const int32_t cpu_thread_count)
std::vector< const void * > sd_inner_proxy_per_key
Definition: HashJoin.h:96
const Catalog_Namespace::Catalog * catalog_
void init_baseline_hash_join_buff_64(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
std::unique_ptr< BaselineHashTable > hash_table_
std::vector< const void * > sd_outer_proxy_per_key
Definition: HashJoin.h:97
void fill_one_to_many_baseline_hash_table_32(int32_t *buff, const int32_t *composite_key_dict, const int64_t hash_entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_bucket_info, const std::vector< const void * > &sd_inner_proxy_per_key, const std::vector< const void * > &sd_outer_proxy_per_key, const int32_t cpu_thread_count)
DEVICE void SUFFIX() init_hash_join_buff(int32_t *groups_buffer, const int64_t hash_entry_count, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
#define CHECK(condition)
Definition: Logger.h:206
#define DEBUG_TIMER(name)
Definition: Logger.h:322
int cpu_threads()
Definition: thread_count.h:24
#define VLOG(n)
Definition: Logger.h:300
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
Definition: HashJoin.h:129

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<class KEY_HANDLER >
int BaselineJoinHashTableBuilder::initHashTableOnGpu ( KEY_HANDLER *  key_handler,
const std::vector< JoinColumn > &  join_columns,
const HashType  layout,
const JoinType  join_type,
const size_t  key_component_width,
const size_t  key_component_count,
const size_t  keyspace_entry_count,
const size_t  emitted_keys_count,
const int  device_id 
)
inline

Definition at line 450 of file BaselineHashTableBuilder.h.

References CudaAllocator::alloc(), allocateDeviceMemory(), ANTI, catalog_, CHECK, copy_from_gpu(), copy_to_gpu(), DEBUG_TIMER, Catalog_Namespace::Catalog::getDataMgr(), hash_table_, init_baseline_hash_join_buff_on_device_32(), init_baseline_hash_join_buff_on_device_64(), init_hash_join_buff_on_device(), HashJoin::layoutRequiresAdditionalBuffers(), OneToOne, SEMI, transfer_flat_object_to_gpu(), and UNREACHABLE.

Referenced by BaselineJoinHashTable::initHashTableForDevice().

458  {
459  auto timer = DEBUG_TIMER(__func__);
460  int err = 0;
461 #ifdef HAVE_CUDA
462  allocateDeviceMemory(layout,
463  key_component_width,
464  key_component_count,
465  keyspace_entry_count,
466  emitted_keys_count,
467  device_id);
468  if (!keyspace_entry_count) {
469  // need to "allocate" the empty hash table first
470  CHECK(!emitted_keys_count);
471  return 0;
472  }
473  auto& data_mgr = catalog_->getDataMgr();
474  CudaAllocator allocator(&data_mgr, device_id);
475  auto dev_err_buff = reinterpret_cast<CUdeviceptr>(allocator.alloc(sizeof(int)));
476  copy_to_gpu(&data_mgr, dev_err_buff, &err, sizeof(err), device_id);
477  auto gpu_hash_table_buff = hash_table_->getGpuBuffer();
478  CHECK(gpu_hash_table_buff);
479  const bool for_semi_join =
480  (join_type == JoinType::SEMI || join_type == JoinType::ANTI) &&
481  layout == HashType::OneToOne;
482 
483  switch (key_component_width) {
484  case 4:
485  init_baseline_hash_join_buff_on_device_32(gpu_hash_table_buff,
486  keyspace_entry_count,
487  key_component_count,
488  layout == HashType::OneToOne,
489  -1);
490  break;
491  case 8:
492  init_baseline_hash_join_buff_on_device_64(gpu_hash_table_buff,
493  keyspace_entry_count,
494  key_component_count,
495  layout == HashType::OneToOne,
496  -1);
497  break;
498  default:
499  UNREACHABLE();
500  }
501  const auto key_handler_gpu = transfer_flat_object_to_gpu(*key_handler, allocator);
502  switch (key_component_width) {
503  case 4: {
504  fill_baseline_hash_join_buff_on_device<int32_t>(
505  gpu_hash_table_buff,
506  keyspace_entry_count,
507  -1,
508  for_semi_join,
509  key_component_count,
510  layout == HashType::OneToOne,
511  reinterpret_cast<int*>(dev_err_buff),
512  key_handler_gpu,
513  join_columns.front().num_elems);
514  copy_from_gpu(&data_mgr, &err, dev_err_buff, sizeof(err), device_id);
515  break;
516  }
517  case 8: {
518  fill_baseline_hash_join_buff_on_device<int64_t>(
519  gpu_hash_table_buff,
520  keyspace_entry_count,
521  -1,
522  for_semi_join,
523  key_component_count,
524  layout == HashType::OneToOne,
525  reinterpret_cast<int*>(dev_err_buff),
526  key_handler_gpu,
527  join_columns.front().num_elems);
528  copy_from_gpu(&data_mgr, &err, dev_err_buff, sizeof(err), device_id);
529  break;
530  }
531  default:
532  UNREACHABLE();
533  }
534  if (err) {
535  return err;
536  }
538  const auto entry_size = key_component_count * key_component_width;
539  auto one_to_many_buff = reinterpret_cast<int32_t*>(
540  gpu_hash_table_buff + keyspace_entry_count * entry_size);
541  init_hash_join_buff_on_device(one_to_many_buff, keyspace_entry_count, -1);
542  switch (key_component_width) {
543  case 4: {
544  const auto composite_key_dict = reinterpret_cast<int32_t*>(gpu_hash_table_buff);
545  fill_one_to_many_baseline_hash_table_on_device<int32_t>(
546  one_to_many_buff,
547  composite_key_dict,
548  keyspace_entry_count,
549  -1,
550  key_component_count,
551  key_handler_gpu,
552  join_columns.front().num_elems);
553 
554  break;
555  }
556  case 8: {
557  const auto composite_key_dict = reinterpret_cast<int64_t*>(gpu_hash_table_buff);
558  fill_one_to_many_baseline_hash_table_on_device<int64_t>(
559  one_to_many_buff,
560  composite_key_dict,
561  keyspace_entry_count,
562  -1,
563  key_component_count,
564  key_handler_gpu,
565  join_columns.front().num_elems);
566 
567  break;
568  }
569  default:
570  UNREACHABLE();
571  }
572  }
573 #else
574  UNREACHABLE();
575 #endif
576  return err;
577  }
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:223
const Catalog_Namespace::Catalog * catalog_
void init_baseline_hash_join_buff_on_device_64(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val)
unsigned long long CUdeviceptr
Definition: nocuda.h:27
#define UNREACHABLE()
Definition: Logger.h:250
std::unique_ptr< BaselineHashTable > hash_table_
void init_baseline_hash_join_buff_on_device_32(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val)
void copy_to_gpu(Data_Namespace::DataMgr *data_mgr, CUdeviceptr dst, const void *src, const size_t num_bytes, const int device_id)
Definition: GpuMemUtils.cpp:30
void copy_from_gpu(Data_Namespace::DataMgr *data_mgr, void *dst, const CUdeviceptr src, const size_t num_bytes, const int device_id)
T * transfer_flat_object_to_gpu(const T &object, CudaAllocator &allocator)
void init_hash_join_buff_on_device(int32_t *buff, const int64_t entry_count, const int32_t invalid_slot_val)
void allocateDeviceMemory(const HashType layout, const size_t key_component_width, const size_t key_component_count, const size_t keyspace_entry_count, const size_t emitted_keys_count, const int device_id)
#define CHECK(condition)
Definition: Logger.h:206
#define DEBUG_TIMER(name)
Definition: Logger.h:322
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
Definition: HashJoin.h:129

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Member Data Documentation

const Catalog_Namespace::Catalog* BaselineJoinHashTableBuilder::catalog_
private
std::unique_ptr<BaselineHashTable> BaselineJoinHashTableBuilder::hash_table_
private

The documentation for this class was generated from the following file: