OmniSciDB  1dac507f6e
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
HashJoinRuntimeGpu.cu File Reference
#include "HashJoinRuntime.cpp"
#include <thrust/device_ptr.h>
#include <thrust/scan.h>
+ Include dependency graph for HashJoinRuntimeGpu.cu:

Go to the source code of this file.

Macros

#define VALID_POS_FLAG   0
 

Functions

__global__ void fill_hash_join_buff_wrapper (int32_t *buff, const int32_t invalid_slot_val, const JoinColumn join_column, const JoinColumnTypeInfo type_info, int *err)
 
__global__ void fill_hash_join_buff_bucketized_wrapper (int32_t *buff, const int32_t invalid_slot_val, const JoinColumn join_column, const JoinColumnTypeInfo type_info, int *err, const int64_t bucket_normalization)
 
void fill_hash_join_buff_on_device_bucketized (int32_t *buff, const int32_t invalid_slot_val, int *dev_err_buff, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const size_t block_size_x, const size_t grid_size_x, const int64_t bucket_normalization)
 
void fill_hash_join_buff_on_device (int32_t *buff, const int32_t invalid_slot_val, int *dev_err_buff, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const size_t block_size_x, const size_t grid_size_x)
 
__global__ void fill_hash_join_buff_wrapper_sharded_bucketized (int32_t *buff, const int32_t invalid_slot_val, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const ShardInfo shard_info, int *err, const int64_t bucket_normalization)
 
__global__ void fill_hash_join_buff_wrapper_sharded (int32_t *buff, const int32_t invalid_slot_val, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const ShardInfo shard_info, int *err)
 
void fill_hash_join_buff_on_device_sharded_bucketized (int32_t *buff, const int32_t invalid_slot_val, int *dev_err_buff, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const ShardInfo shard_info, const size_t block_size_x, const size_t grid_size_x, const int64_t bucket_normalization)
 
void fill_hash_join_buff_on_device_sharded (int32_t *buff, const int32_t invalid_slot_val, int *dev_err_buff, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const ShardInfo shard_info, const size_t block_size_x, const size_t grid_size_x)
 
__global__ void init_hash_join_buff_wrapper (int32_t *buff, const int32_t hash_entry_count, const int32_t invalid_slot_val)
 
void init_hash_join_buff_on_device (int32_t *buff, const int32_t hash_entry_count, const int32_t invalid_slot_val, const size_t block_size_x, const size_t grid_size_x)
 
__global__ void set_valid_pos_flag (int32_t *pos_buff, const int32_t *count_buff, const int32_t entry_count)
 
__global__ void set_valid_pos (int32_t *pos_buff, int32_t *count_buff, const int32_t entry_count)
 
template<typename COUNT_MATCHES_FUNCTOR , typename FILL_ROW_IDS_FUNCTOR >
void fill_one_to_many_hash_table_on_device_impl (int32_t *buff, const int32_t hash_entry_count, const int32_t invalid_slot_val, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info, const size_t block_size_x, const size_t grid_size_x, COUNT_MATCHES_FUNCTOR count_matches_func, FILL_ROW_IDS_FUNCTOR fill_row_ids_func)
 
void fill_one_to_many_hash_table_on_device (int32_t *buff, const HashEntryInfo hash_entry_info, const int32_t invalid_slot_val, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info, const size_t block_size_x, const size_t grid_size_x)
 
void fill_one_to_many_hash_table_on_device_bucketized (int32_t *buff, const HashEntryInfo hash_entry_info, const int32_t invalid_slot_val, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info, const size_t block_size_x, const size_t grid_size_x)
 
void fill_one_to_many_hash_table_on_device_sharded (int32_t *buff, const HashEntryInfo hash_entry_info, const int32_t invalid_slot_val, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info, const ShardInfo &shard_info, const size_t block_size_x, const size_t grid_size_x)
 
template<typename T , typename KEY_HANDLER >
void fill_one_to_many_baseline_hash_table_on_device (int32_t *buff, const T *composite_key_dict, const size_t hash_entry_count, const int32_t invalid_slot_val, const KEY_HANDLER *key_handler, const size_t num_elems, const size_t block_size_x, const size_t grid_size_x)
 
template<typename T >
__global__ void init_baseline_hash_join_buff_wrapper (int8_t *hash_join_buff, const size_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val)
 
void init_baseline_hash_join_buff_on_device_32 (int8_t *hash_join_buff, const int32_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val, const size_t block_size_x, const size_t grid_size_x)
 
void init_baseline_hash_join_buff_on_device_64 (int8_t *hash_join_buff, const int32_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val, const size_t block_size_x, const size_t grid_size_x)
 
template<typename T , typename KEY_HANDLER >
__global__ void fill_baseline_hash_join_buff_wrapper (int8_t *hash_buff, const size_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, int *err, const KEY_HANDLER *key_handler, const size_t num_elems)
 
void fill_baseline_hash_join_buff_on_device_32 (int8_t *hash_buff, const size_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const GenericKeyHandler *key_handler, const size_t num_elems, const size_t block_size_x, const size_t grid_size_x)
 
void fill_baseline_hash_join_buff_on_device_64 (int8_t *hash_buff, const size_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const GenericKeyHandler *key_handler, const size_t num_elems, const size_t block_size_x, const size_t grid_size_x)
 
void overlaps_fill_baseline_hash_join_buff_on_device_64 (int8_t *hash_buff, const size_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const OverlapsKeyHandler *key_handler, const size_t num_elems, const size_t block_size_x, const size_t grid_size_x)
 
void fill_one_to_many_baseline_hash_table_on_device_32 (int32_t *buff, const int32_t *composite_key_dict, const size_t hash_entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const GenericKeyHandler *key_handler, const size_t num_elems, const size_t block_size_x, const size_t grid_size_x)
 
void fill_one_to_many_baseline_hash_table_on_device_64 (int32_t *buff, const int64_t *composite_key_dict, const size_t hash_entry_count, const int32_t invalid_slot_val, const GenericKeyHandler *key_handler, const size_t num_elems, const size_t block_size_x, const size_t grid_size_x)
 
void overlaps_fill_one_to_many_baseline_hash_table_on_device_64 (int32_t *buff, const int64_t *composite_key_dict, const size_t hash_entry_count, const int32_t invalid_slot_val, const OverlapsKeyHandler *key_handler, const size_t num_elems, const size_t block_size_x, const size_t grid_size_x)
 
void approximate_distinct_tuples_on_device_overlaps (uint8_t *hll_buffer, const uint32_t b, int32_t *row_counts_buffer, const OverlapsKeyHandler *key_handler, const size_t num_elems, const size_t block_size_x, const size_t grid_size_x)
 
void approximate_distinct_tuples_on_device (uint8_t *hll_buffer, const uint32_t b, const GenericKeyHandler *key_handler, const size_t num_elems, const size_t block_size_x, const size_t grid_size_x)
 
void compute_bucket_sizes_on_device (double *bucket_sizes_buffer, const JoinColumn *join_column, const double bucket_sz_threshold, const size_t block_size_x, const size_t grid_size_x)
 

Macro Definition Documentation

#define VALID_POS_FLAG   0

Definition at line 152 of file HashJoinRuntimeGpu.cu.

Referenced by set_valid_pos(), and set_valid_pos_flag().

Function Documentation

void approximate_distinct_tuples_on_device ( uint8_t *  hll_buffer,
const uint32_t  b,
const GenericKeyHandler key_handler,
const size_t  num_elems,
const size_t  block_size_x,
const size_t  grid_size_x 
)

Definition at line 546 of file HashJoinRuntimeGpu.cu.

Referenced by BaselineJoinHashTable::approximateTupleCount().

551  {
552  approximate_distinct_tuples_impl_gpu<<<grid_size_x, block_size_x>>>(
553  hll_buffer, nullptr, b, num_elems, key_handler);
554 }

+ Here is the caller graph for this function:

void approximate_distinct_tuples_on_device_overlaps ( uint8_t *  hll_buffer,
const uint32_t  b,
int32_t *  row_counts_buffer,
const OverlapsKeyHandler key_handler,
const size_t  num_elems,
const size_t  block_size_x,
const size_t  grid_size_x 
)

Definition at line 531 of file HashJoinRuntimeGpu.cu.

References inclusive_scan().

Referenced by OverlapsJoinHashTable::approximateTupleCount().

537  {
538  approximate_distinct_tuples_impl_gpu<<<grid_size_x, block_size_x>>>(
539  hll_buffer, row_counts_buffer, b, num_elems, key_handler);
540 
541  auto row_counts_buffer_ptr = thrust::device_pointer_cast(row_counts_buffer);
543  row_counts_buffer_ptr, row_counts_buffer_ptr + num_elems, row_counts_buffer_ptr);
544 }
void inclusive_scan(InputIterator first, InputIterator last, OutputIterator out, const size_t thread_count)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void compute_bucket_sizes_on_device ( double *  bucket_sizes_buffer,
const JoinColumn join_column,
const double  bucket_sz_threshold,
const size_t  block_size_x,
const size_t  grid_size_x 
)

Definition at line 556 of file HashJoinRuntimeGpu.cu.

Referenced by OverlapsJoinHashTable::computeBucketSizes().

560  {
561  compute_bucket_sizes_impl_gpu<2><<<grid_size_x, block_size_x>>>(
562  bucket_sizes_buffer, join_column, bucket_sz_threshold, block_size_x, grid_size_x);
563 }

+ Here is the caller graph for this function:

void fill_baseline_hash_join_buff_on_device_32 ( int8_t *  hash_buff,
const size_t  entry_count,
const int32_t  invalid_slot_val,
const size_t  key_component_count,
const bool  with_val_slot,
int *  dev_err_buff,
const GenericKeyHandler key_handler,
const size_t  num_elems,
const size_t  block_size_x,
const size_t  grid_size_x 
)

Definition at line 409 of file HashJoinRuntimeGpu.cu.

Referenced by BaselineJoinHashTable::initHashTableOnGpu().

418  {
419  fill_baseline_hash_join_buff_wrapper<int32_t>
420  <<<grid_size_x, block_size_x>>>(hash_buff,
421  entry_count,
422  invalid_slot_val,
423  key_component_count,
424  with_val_slot,
425  dev_err_buff,
426  key_handler,
427  num_elems);
428 }

+ Here is the caller graph for this function:

void fill_baseline_hash_join_buff_on_device_64 ( int8_t *  hash_buff,
const size_t  entry_count,
const int32_t  invalid_slot_val,
const size_t  key_component_count,
const bool  with_val_slot,
int *  dev_err_buff,
const GenericKeyHandler key_handler,
const size_t  num_elems,
const size_t  block_size_x,
const size_t  grid_size_x 
)

Definition at line 430 of file HashJoinRuntimeGpu.cu.

Referenced by BaselineJoinHashTable::initHashTableOnGpu().

439  {
440  fill_baseline_hash_join_buff_wrapper<unsigned long long>
441  <<<grid_size_x, block_size_x>>>(hash_buff,
442  entry_count,
443  invalid_slot_val,
444  key_component_count,
445  with_val_slot,
446  dev_err_buff,
447  key_handler,
448  num_elems);
449 }

+ Here is the caller graph for this function:

template<typename T , typename KEY_HANDLER >
__global__ void fill_baseline_hash_join_buff_wrapper ( int8_t *  hash_buff,
const size_t  entry_count,
const int32_t  invalid_slot_val,
const size_t  key_component_count,
const bool  with_val_slot,
int *  err,
const KEY_HANDLER *  key_handler,
const size_t  num_elems 
)

Definition at line 389 of file HashJoinRuntimeGpu.cu.

References fill_baseline_hash_join_buff(), and SUFFIX.

396  {
397  int partial_err = SUFFIX(fill_baseline_hash_join_buff)<T>(hash_buff,
398  entry_count,
399  invalid_slot_val,
400  key_component_count,
401  with_val_slot,
402  key_handler,
403  num_elems,
404  -1,
405  -1);
406  atomicCAS(err, 0, partial_err);
407 }
#define SUFFIX(name)
DEVICE int SUFFIX() fill_baseline_hash_join_buff(int8_t *hash_buff, const size_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const FILL_HANDLER *f, const size_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)

+ Here is the call graph for this function:

__global__ void fill_hash_join_buff_bucketized_wrapper ( int32_t *  buff,
const int32_t  invalid_slot_val,
const JoinColumn  join_column,
const JoinColumnTypeInfo  type_info,
int *  err,
const int64_t  bucket_normalization 
)

Definition at line 31 of file HashJoinRuntimeGpu.cu.

References fill_hash_join_buff_bucketized(), and SUFFIX.

37  {
38  int partial_err = SUFFIX(fill_hash_join_buff_bucketized)(buff,
39  invalid_slot_val,
40  join_column,
41  type_info,
42  NULL,
43  NULL,
44  -1,
45  -1,
46  bucket_normalization);
47  atomicCAS(err, 0, partial_err);
48 }
#define SUFFIX(name)
DEVICE int SUFFIX() fill_hash_join_buff_bucketized(int32_t *buff, const int32_t invalid_slot_val, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const void *sd_inner_proxy, const void *sd_outer_proxy, const int32_t cpu_thread_idx, const int32_t cpu_thread_count, const int64_t bucket_normalization)

+ Here is the call graph for this function:

void fill_hash_join_buff_on_device ( int32_t *  buff,
const int32_t  invalid_slot_val,
int *  dev_err_buff,
const JoinColumn  join_column,
const JoinColumnTypeInfo  type_info,
const size_t  block_size_x,
const size_t  grid_size_x 
)

Definition at line 62 of file HashJoinRuntimeGpu.cu.

68  {
69  fill_hash_join_buff_wrapper<<<grid_size_x, block_size_x>>>(
70  buff, invalid_slot_val, join_column, type_info, dev_err_buff);
71 }
void fill_hash_join_buff_on_device_bucketized ( int32_t *  buff,
const int32_t  invalid_slot_val,
int *  dev_err_buff,
const JoinColumn  join_column,
const JoinColumnTypeInfo  type_info,
const size_t  block_size_x,
const size_t  grid_size_x,
const int64_t  bucket_normalization 
)

Definition at line 50 of file HashJoinRuntimeGpu.cu.

Referenced by JoinHashTable::initHashTableForDevice().

57  {
58  fill_hash_join_buff_bucketized_wrapper<<<grid_size_x, block_size_x>>>(
59  buff, invalid_slot_val, join_column, type_info, dev_err_buff, bucket_normalization);
60 }

+ Here is the caller graph for this function:

void fill_hash_join_buff_on_device_sharded ( int32_t *  buff,
const int32_t  invalid_slot_val,
int *  dev_err_buff,
const JoinColumn  join_column,
const JoinColumnTypeInfo  type_info,
const ShardInfo  shard_info,
const size_t  block_size_x,
const size_t  grid_size_x 
)

Definition at line 125 of file HashJoinRuntimeGpu.cu.

132  {
133  fill_hash_join_buff_wrapper_sharded<<<grid_size_x, block_size_x>>>(
134  buff, invalid_slot_val, join_column, type_info, shard_info, dev_err_buff);
135 }
void fill_hash_join_buff_on_device_sharded_bucketized ( int32_t *  buff,
const int32_t  invalid_slot_val,
int *  dev_err_buff,
const JoinColumn  join_column,
const JoinColumnTypeInfo  type_info,
const ShardInfo  shard_info,
const size_t  block_size_x,
const size_t  grid_size_x,
const int64_t  bucket_normalization 
)

Definition at line 105 of file HashJoinRuntimeGpu.cu.

Referenced by JoinHashTable::initHashTableForDevice().

114  {
115  fill_hash_join_buff_wrapper_sharded_bucketized<<<grid_size_x, block_size_x>>>(
116  buff,
117  invalid_slot_val,
118  join_column,
119  type_info,
120  shard_info,
121  dev_err_buff,
122  bucket_normalization);
123 }

+ Here is the caller graph for this function:

__global__ void fill_hash_join_buff_wrapper ( int32_t *  buff,
const int32_t  invalid_slot_val,
const JoinColumn  join_column,
const JoinColumnTypeInfo  type_info,
int *  err 
)

Definition at line 21 of file HashJoinRuntimeGpu.cu.

References fill_hash_join_buff(), and SUFFIX.

25  {
26  int partial_err = SUFFIX(fill_hash_join_buff)(
27  buff, invalid_slot_val, join_column, type_info, NULL, NULL, -1, -1);
28  atomicCAS(err, 0, partial_err);
29 }
#define SUFFIX(name)
DEVICE int SUFFIX() fill_hash_join_buff(int32_t *buff, const int32_t invalid_slot_val, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const void *sd_inner_proxy, const void *sd_outer_proxy, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)

+ Here is the call graph for this function:

__global__ void fill_hash_join_buff_wrapper_sharded ( int32_t *  buff,
const int32_t  invalid_slot_val,
const JoinColumn  join_column,
const JoinColumnTypeInfo  type_info,
const ShardInfo  shard_info,
int *  err 
)

Definition at line 94 of file HashJoinRuntimeGpu.cu.

References fill_hash_join_buff_sharded(), and SUFFIX.

99  {
100  int partial_err = SUFFIX(fill_hash_join_buff_sharded)(
101  buff, invalid_slot_val, join_column, type_info, shard_info, NULL, NULL, -1, -1);
102  atomicCAS(err, 0, partial_err);
103 }
#define SUFFIX(name)
DEVICE int SUFFIX() fill_hash_join_buff_sharded(int32_t *buff, const int32_t invalid_slot_val, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const ShardInfo shard_info, const void *sd_inner_proxy, const void *sd_outer_proxy, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)

+ Here is the call graph for this function:

__global__ void fill_hash_join_buff_wrapper_sharded_bucketized ( int32_t *  buff,
const int32_t  invalid_slot_val,
const JoinColumn  join_column,
const JoinColumnTypeInfo  type_info,
const ShardInfo  shard_info,
int *  err,
const int64_t  bucket_normalization 
)

Definition at line 73 of file HashJoinRuntimeGpu.cu.

References fill_hash_join_buff_sharded_bucketized(), and SUFFIX.

80  {
81  int partial_err = SUFFIX(fill_hash_join_buff_sharded_bucketized)(buff,
82  invalid_slot_val,
83  join_column,
84  type_info,
85  shard_info,
86  NULL,
87  NULL,
88  -1,
89  -1,
90  bucket_normalization);
91  atomicCAS(err, 0, partial_err);
92 }
#define SUFFIX(name)
DEVICE int SUFFIX() fill_hash_join_buff_sharded_bucketized(int32_t *buff, const int32_t invalid_slot_val, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const ShardInfo shard_info, const void *sd_inner_proxy, const void *sd_outer_proxy, const int32_t cpu_thread_idx, const int32_t cpu_thread_count, const int64_t bucket_normalization)

+ Here is the call graph for this function:

template<typename T , typename KEY_HANDLER >
void fill_one_to_many_baseline_hash_table_on_device ( int32_t *  buff,
const T *  composite_key_dict,
const size_t  hash_entry_count,
const int32_t  invalid_slot_val,
const KEY_HANDLER *  key_handler,
const size_t  num_elems,
const size_t  block_size_x,
const size_t  grid_size_x 
)

Definition at line 321 of file HashJoinRuntimeGpu.cu.

References inclusive_scan().

328  {
329  auto pos_buff = buff;
330  auto count_buff = buff + hash_entry_count;
331  cudaMemset(count_buff, 0, hash_entry_count * sizeof(int32_t));
332  count_matches_baseline_gpu<<<grid_size_x, block_size_x>>>(
333  count_buff, composite_key_dict, hash_entry_count, key_handler, num_elems);
334 
335  set_valid_pos_flag<<<grid_size_x, block_size_x>>>(
336  pos_buff, count_buff, hash_entry_count);
337 
338  auto count_buff_dev_ptr = thrust::device_pointer_cast(count_buff);
340  count_buff_dev_ptr, count_buff_dev_ptr + hash_entry_count, count_buff_dev_ptr);
341  set_valid_pos<<<grid_size_x, block_size_x>>>(pos_buff, count_buff, hash_entry_count);
342  cudaMemset(count_buff, 0, hash_entry_count * sizeof(int32_t));
343  fill_row_ids_baseline_gpu<<<grid_size_x, block_size_x>>>(buff,
344  composite_key_dict,
345  hash_entry_count,
346  invalid_slot_val,
347  key_handler,
348  num_elems);
349 }
void inclusive_scan(InputIterator first, InputIterator last, OutputIterator out, const size_t thread_count)

+ Here is the call graph for this function:

void fill_one_to_many_baseline_hash_table_on_device_32 ( int32_t *  buff,
const int32_t *  composite_key_dict,
const size_t  hash_entry_count,
const int32_t  invalid_slot_val,
const size_t  key_component_count,
const GenericKeyHandler key_handler,
const size_t  num_elems,
const size_t  block_size_x,
const size_t  grid_size_x 
)

Definition at line 473 of file HashJoinRuntimeGpu.cu.

Referenced by BaselineJoinHashTable::initHashTableOnGpu().

482  {
483  fill_one_to_many_baseline_hash_table_on_device<int32_t>(buff,
484  composite_key_dict,
485  hash_entry_count,
486  invalid_slot_val,
487  key_handler,
488  num_elems,
489  block_size_x,
490  grid_size_x);
491 }

+ Here is the caller graph for this function:

void fill_one_to_many_baseline_hash_table_on_device_64 ( int32_t *  buff,
const int64_t *  composite_key_dict,
const size_t  hash_entry_count,
const int32_t  invalid_slot_val,
const GenericKeyHandler key_handler,
const size_t  num_elems,
const size_t  block_size_x,
const size_t  grid_size_x 
)

Definition at line 493 of file HashJoinRuntimeGpu.cu.

Referenced by BaselineJoinHashTable::initHashTableOnGpu().

501  {
502  fill_one_to_many_baseline_hash_table_on_device<int64_t>(buff,
503  composite_key_dict,
504  hash_entry_count,
505  invalid_slot_val,
506  key_handler,
507  num_elems,
508  block_size_x,
509  grid_size_x);
510 }

+ Here is the caller graph for this function:

void fill_one_to_many_hash_table_on_device ( int32_t *  buff,
const HashEntryInfo  hash_entry_info,
const int32_t  invalid_slot_val,
const JoinColumn join_column,
const JoinColumnTypeInfo type_info,
const size_t  block_size_x,
const size_t  grid_size_x 
)

Definition at line 204 of file HashJoinRuntimeGpu.cu.

References count_matches(), fill_one_to_many_hash_table_on_device_impl(), fill_row_ids(), HashEntryInfo::hash_entry_count, and SUFFIX.

Referenced by JoinHashTable::initOneToManyHashTable().

210  {
211  auto hash_entry_count = hash_entry_info.hash_entry_count;
212  auto count_matches_func = [hash_entry_count,
213  grid_size_x,
214  block_size_x,
215  count_buff = buff + hash_entry_count,
216  invalid_slot_val,
217  join_column,
218  type_info] {
219  SUFFIX(count_matches)<<<grid_size_x, block_size_x>>>(
220  count_buff, invalid_slot_val, join_column, type_info);
221  };
222 
223  auto fill_row_ids_func = [grid_size_x,
224  block_size_x,
225  buff,
226  hash_entry_count,
227  invalid_slot_val,
228  join_column,
229  type_info] {
230  SUFFIX(fill_row_ids)<<<grid_size_x, block_size_x>>>(
231  buff, hash_entry_count, invalid_slot_val, join_column, type_info);
232  };
233 
235  hash_entry_count,
236  invalid_slot_val,
237  join_column,
238  type_info,
239  block_size_x,
240  grid_size_x,
241  count_matches_func,
242  fill_row_ids_func);
243 }
#define SUFFIX(name)
void fill_one_to_many_hash_table_on_device_impl(int32_t *buff, const int32_t hash_entry_count, const int32_t invalid_slot_val, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info, const size_t block_size_x, const size_t grid_size_x, COUNT_MATCHES_FUNCTOR count_matches_func, FILL_ROW_IDS_FUNCTOR fill_row_ids_func)
GLOBAL void SUFFIX() fill_row_ids(int32_t *buff, const int32_t hash_entry_count, const int32_t invalid_slot_val, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const void *sd_inner_proxy, const void *sd_outer_proxy, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
size_t hash_entry_count
GLOBAL void SUFFIX() count_matches(int32_t *count_buff, const int32_t invalid_slot_val, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const void *sd_inner_proxy, const void *sd_outer_proxy, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void fill_one_to_many_hash_table_on_device_bucketized ( int32_t *  buff,
const HashEntryInfo  hash_entry_info,
const int32_t  invalid_slot_val,
const JoinColumn join_column,
const JoinColumnTypeInfo type_info,
const size_t  block_size_x,
const size_t  grid_size_x 
)

Definition at line 245 of file HashJoinRuntimeGpu.cu.

References HashEntryInfo::bucket_normalization, count_matches_bucketized(), fill_one_to_many_hash_table_on_device_impl(), fill_row_ids_bucketized(), HashEntryInfo::getNormalizedHashEntryCount(), and SUFFIX.

Referenced by JoinHashTable::initOneToManyHashTable().

251  {
252  auto hash_entry_count = hash_entry_info.getNormalizedHashEntryCount();
253  auto count_matches_func = [grid_size_x,
254  block_size_x,
255  count_buff = buff + hash_entry_count,
256  invalid_slot_val,
257  join_column,
258  type_info,
259  bucket_normalization =
260  hash_entry_info.bucket_normalization] {
261  SUFFIX(count_matches_bucketized)<<<grid_size_x, block_size_x>>>(
262  count_buff, invalid_slot_val, join_column, type_info, bucket_normalization);
263  };
264 
265  auto fill_row_ids_func = [grid_size_x,
266  block_size_x,
267  buff,
268  hash_entry_count =
269  hash_entry_info.getNormalizedHashEntryCount(),
270  invalid_slot_val,
271  join_column,
272  type_info,
273  bucket_normalization = hash_entry_info.bucket_normalization] {
274  SUFFIX(fill_row_ids_bucketized)<<<grid_size_x, block_size_x>>>(buff,
275  hash_entry_count,
276  invalid_slot_val,
277  join_column,
278  type_info,
279  bucket_normalization);
280  };
281 
283  hash_entry_count,
284  invalid_slot_val,
285  join_column,
286  type_info,
287  block_size_x,
288  grid_size_x,
289  count_matches_func,
290  fill_row_ids_func);
291 }
GLOBAL void SUFFIX() count_matches_bucketized(int32_t *count_buff, const int32_t invalid_slot_val, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const void *sd_inner_proxy, const void *sd_outer_proxy, const int32_t cpu_thread_idx, const int32_t cpu_thread_count, const int64_t bucket_normalization)
GLOBAL void SUFFIX() fill_row_ids_bucketized(int32_t *buff, const int32_t hash_entry_count, const int32_t invalid_slot_val, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const void *sd_inner_proxy, const void *sd_outer_proxy, const int32_t cpu_thread_idx, const int32_t cpu_thread_count, const int64_t bucket_normalization)
#define SUFFIX(name)
void fill_one_to_many_hash_table_on_device_impl(int32_t *buff, const int32_t hash_entry_count, const int32_t invalid_slot_val, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info, const size_t block_size_x, const size_t grid_size_x, COUNT_MATCHES_FUNCTOR count_matches_func, FILL_ROW_IDS_FUNCTOR fill_row_ids_func)
int64_t bucket_normalization
size_t getNormalizedHashEntryCount() const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename COUNT_MATCHES_FUNCTOR , typename FILL_ROW_IDS_FUNCTOR >
void fill_one_to_many_hash_table_on_device_impl ( int32_t *  buff,
const int32_t  hash_entry_count,
const int32_t  invalid_slot_val,
const JoinColumn join_column,
const JoinColumnTypeInfo type_info,
const size_t  block_size_x,
const size_t  grid_size_x,
COUNT_MATCHES_FUNCTOR  count_matches_func,
FILL_ROW_IDS_FUNCTOR  fill_row_ids_func 
)

Definition at line 179 of file HashJoinRuntimeGpu.cu.

References inclusive_scan().

Referenced by fill_one_to_many_hash_table_on_device(), and fill_one_to_many_hash_table_on_device_bucketized().

187  {
188  int32_t* pos_buff = buff;
189  int32_t* count_buff = buff + hash_entry_count;
190  cudaMemset(count_buff, 0, hash_entry_count * sizeof(int32_t));
191  count_matches_func();
192 
193  set_valid_pos_flag<<<grid_size_x, block_size_x>>>(
194  pos_buff, count_buff, hash_entry_count);
195 
196  auto count_buff_dev_ptr = thrust::device_pointer_cast(count_buff);
198  count_buff_dev_ptr, count_buff_dev_ptr + hash_entry_count, count_buff_dev_ptr);
199  set_valid_pos<<<grid_size_x, block_size_x>>>(pos_buff, count_buff, hash_entry_count);
200  cudaMemset(count_buff, 0, hash_entry_count * sizeof(int32_t));
201  fill_row_ids_func();
202 }
void inclusive_scan(InputIterator first, InputIterator last, OutputIterator out, const size_t thread_count)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void fill_one_to_many_hash_table_on_device_sharded ( int32_t *  buff,
const HashEntryInfo  hash_entry_info,
const int32_t  invalid_slot_val,
const JoinColumn join_column,
const JoinColumnTypeInfo type_info,
const ShardInfo shard_info,
const size_t  block_size_x,
const size_t  grid_size_x 
)

Definition at line 293 of file HashJoinRuntimeGpu.cu.

References count_matches_sharded(), fill_row_ids_sharded(), HashEntryInfo::hash_entry_count, inclusive_scan(), and SUFFIX.

Referenced by JoinHashTable::initOneToManyHashTable().

300  {
301  auto hash_entry_count = hash_entry_info.hash_entry_count;
302  int32_t* pos_buff = buff;
303  int32_t* count_buff = buff + hash_entry_count;
304  cudaMemset(count_buff, 0, hash_entry_count * sizeof(int32_t));
305  SUFFIX(count_matches_sharded)<<<grid_size_x, block_size_x>>>(
306  count_buff, invalid_slot_val, join_column, type_info, shard_info);
307 
308  set_valid_pos_flag<<<grid_size_x, block_size_x>>>(
309  pos_buff, count_buff, hash_entry_count);
310 
311  auto count_buff_dev_ptr = thrust::device_pointer_cast(count_buff);
313  count_buff_dev_ptr, count_buff_dev_ptr + hash_entry_count, count_buff_dev_ptr);
314  set_valid_pos<<<grid_size_x, block_size_x>>>(pos_buff, count_buff, hash_entry_count);
315  cudaMemset(count_buff, 0, hash_entry_count * sizeof(int32_t));
316  SUFFIX(fill_row_ids_sharded)<<<grid_size_x, block_size_x>>>(
317  buff, hash_entry_count, invalid_slot_val, join_column, type_info, shard_info);
318 }
GLOBAL void SUFFIX() fill_row_ids_sharded(int32_t *buff, const int32_t hash_entry_count, const int32_t invalid_slot_val, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const ShardInfo shard_info, const void *sd_inner_proxy, const void *sd_outer_proxy, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
#define SUFFIX(name)
GLOBAL void SUFFIX() count_matches_sharded(int32_t *count_buff, const int32_t invalid_slot_val, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const ShardInfo shard_info, const void *sd_inner_proxy, const void *sd_outer_proxy, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
void inclusive_scan(InputIterator first, InputIterator last, OutputIterator out, const size_t thread_count)
size_t hash_entry_count

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void init_baseline_hash_join_buff_on_device_32 ( int8_t *  hash_join_buff,
const int32_t  entry_count,
const size_t  key_component_count,
const bool  with_val_slot,
const int32_t  invalid_slot_val,
const size_t  block_size_x,
const size_t  grid_size_x 
)

Definition at line 366 of file HashJoinRuntimeGpu.cu.

Referenced by OverlapsJoinHashTable::initHashTableOnGpu(), and BaselineJoinHashTable::initHashTableOnGpu().

372  {
373  init_baseline_hash_join_buff_wrapper<int32_t><<<grid_size_x, block_size_x>>>(
374  hash_join_buff, entry_count, key_component_count, with_val_slot, invalid_slot_val);
375 }

+ Here is the caller graph for this function:

void init_baseline_hash_join_buff_on_device_64 ( int8_t *  hash_join_buff,
const int32_t  entry_count,
const size_t  key_component_count,
const bool  with_val_slot,
const int32_t  invalid_slot_val,
const size_t  block_size_x,
const size_t  grid_size_x 
)

Definition at line 377 of file HashJoinRuntimeGpu.cu.

Referenced by OverlapsJoinHashTable::initHashTableOnGpu(), and BaselineJoinHashTable::initHashTableOnGpu().

383  {
384  init_baseline_hash_join_buff_wrapper<int64_t><<<grid_size_x, block_size_x>>>(
385  hash_join_buff, entry_count, key_component_count, with_val_slot, invalid_slot_val);
386 }

+ Here is the caller graph for this function:

template<typename T >
__global__ void init_baseline_hash_join_buff_wrapper ( int8_t *  hash_join_buff,
const size_t  entry_count,
const size_t  key_component_count,
const bool  with_val_slot,
const int32_t  invalid_slot_val 
)

Definition at line 352 of file HashJoinRuntimeGpu.cu.

References init_baseline_hash_join_buff(), and SUFFIX.

356  {
357  SUFFIX(init_baseline_hash_join_buff)<T>(hash_join_buff,
358  entry_count,
359  key_component_count,
360  with_val_slot,
361  invalid_slot_val,
362  -1,
363  -1);
364 }
#define SUFFIX(name)
DEVICE void SUFFIX() init_baseline_hash_join_buff(int8_t *hash_buff, const size_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)

+ Here is the call graph for this function:

void init_hash_join_buff_on_device ( int32_t *  buff,
const int32_t  hash_entry_count,
const int32_t  invalid_slot_val,
const size_t  block_size_x,
const size_t  grid_size_x 
)

Definition at line 143 of file HashJoinRuntimeGpu.cu.

Referenced by JoinHashTable::initHashTableForDevice(), OverlapsJoinHashTable::initHashTableOnGpu(), BaselineJoinHashTable::initHashTableOnGpu(), and JoinHashTable::initOneToManyHashTable().

147  {
148  init_hash_join_buff_wrapper<<<grid_size_x, block_size_x>>>(
149  buff, hash_entry_count, invalid_slot_val);
150 }

+ Here is the caller graph for this function:

__global__ void init_hash_join_buff_wrapper ( int32_t *  buff,
const int32_t  hash_entry_count,
const int32_t  invalid_slot_val 
)

Definition at line 137 of file HashJoinRuntimeGpu.cu.

References init_hash_join_buff(), and SUFFIX.

139  {
140  SUFFIX(init_hash_join_buff)(buff, hash_entry_count, invalid_slot_val, -1, -1);
141 }
DEVICE void SUFFIX() init_hash_join_buff(int32_t *groups_buffer, const int32_t hash_entry_count, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
#define SUFFIX(name)

+ Here is the call graph for this function:

void overlaps_fill_baseline_hash_join_buff_on_device_64 ( int8_t *  hash_buff,
const size_t  entry_count,
const int32_t  invalid_slot_val,
const size_t  key_component_count,
const bool  with_val_slot,
int *  dev_err_buff,
const OverlapsKeyHandler key_handler,
const size_t  num_elems,
const size_t  block_size_x,
const size_t  grid_size_x 
)

Definition at line 451 of file HashJoinRuntimeGpu.cu.

Referenced by OverlapsJoinHashTable::initHashTableOnGpu().

461  {
462  fill_baseline_hash_join_buff_wrapper<unsigned long long>
463  <<<grid_size_x, block_size_x>>>(hash_buff,
464  entry_count,
465  invalid_slot_val,
466  key_component_count,
467  with_val_slot,
468  dev_err_buff,
469  key_handler,
470  num_elems);
471 }

+ Here is the caller graph for this function:

void overlaps_fill_one_to_many_baseline_hash_table_on_device_64 ( int32_t *  buff,
const int64_t *  composite_key_dict,
const size_t  hash_entry_count,
const int32_t  invalid_slot_val,
const OverlapsKeyHandler key_handler,
const size_t  num_elems,
const size_t  block_size_x,
const size_t  grid_size_x 
)

Definition at line 512 of file HashJoinRuntimeGpu.cu.

Referenced by OverlapsJoinHashTable::initHashTableOnGpu().

520  {
521  fill_one_to_many_baseline_hash_table_on_device<int64_t>(buff,
522  composite_key_dict,
523  hash_entry_count,
524  invalid_slot_val,
525  key_handler,
526  num_elems,
527  block_size_x,
528  grid_size_x);
529 }

+ Here is the caller graph for this function:

__global__ void set_valid_pos ( int32_t *  pos_buff,
int32_t *  count_buff,
const int32_t  entry_count 
)

Definition at line 166 of file HashJoinRuntimeGpu.cu.

References VALID_POS_FLAG.

168  {
169  const int32_t start = threadIdx.x + blockDim.x * blockIdx.x;
170  const int32_t step = blockDim.x * gridDim.x;
171  for (int32_t i = start; i < entry_count; i += step) {
172  if (VALID_POS_FLAG == pos_buff[i]) {
173  pos_buff[i] = !i ? 0 : count_buff[i - 1];
174  }
175  }
176 }
#define VALID_POS_FLAG
__global__ void set_valid_pos_flag ( int32_t *  pos_buff,
const int32_t *  count_buff,
const int32_t  entry_count 
)

Definition at line 154 of file HashJoinRuntimeGpu.cu.

References VALID_POS_FLAG.

156  {
157  const int32_t start = threadIdx.x + blockDim.x * blockIdx.x;
158  const int32_t step = blockDim.x * gridDim.x;
159  for (int32_t i = start; i < entry_count; i += step) {
160  if (count_buff[i]) {
161  pos_buff[i] = VALID_POS_FLAG;
162  }
163  }
164 }
#define VALID_POS_FLAG