OmniSciDB
bf83d84833
|
Go to the source code of this file.
Macros | |
#define | checkCudaErrors(err) CHECK_EQ(err, cudaSuccess) |
#define | VALID_POS_FLAG 0 |
Functions | |
template<typename F , typename... ARGS> | |
void | cuda_kernel_launch_wrapper (F func, ARGS &&...args) |
__global__ void | fill_hash_join_buff_wrapper (int32_t *buff, const int32_t invalid_slot_val, const JoinColumn join_column, const JoinColumnTypeInfo type_info, int *err) |
__global__ void | fill_hash_join_buff_bucketized_wrapper (int32_t *buff, const int32_t invalid_slot_val, const JoinColumn join_column, const JoinColumnTypeInfo type_info, int *err, const int64_t bucket_normalization) |
void | fill_hash_join_buff_on_device_bucketized (int32_t *buff, const int32_t invalid_slot_val, int *dev_err_buff, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const int64_t bucket_normalization) |
void | fill_hash_join_buff_on_device (int32_t *buff, const int32_t invalid_slot_val, int *dev_err_buff, const JoinColumn join_column, const JoinColumnTypeInfo type_info) |
__global__ void | fill_hash_join_buff_wrapper_sharded_bucketized (int32_t *buff, const int32_t invalid_slot_val, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const ShardInfo shard_info, int *err, const int64_t bucket_normalization) |
__global__ void | fill_hash_join_buff_wrapper_sharded (int32_t *buff, const int32_t invalid_slot_val, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const ShardInfo shard_info, int *err) |
void | fill_hash_join_buff_on_device_sharded_bucketized (int32_t *buff, const int32_t invalid_slot_val, int *dev_err_buff, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const ShardInfo shard_info, const int64_t bucket_normalization) |
void | fill_hash_join_buff_on_device_sharded (int32_t *buff, const int32_t invalid_slot_val, int *dev_err_buff, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const ShardInfo shard_info) |
__global__ void | init_hash_join_buff_wrapper (int32_t *buff, const int64_t hash_entry_count, const int32_t invalid_slot_val) |
void | init_hash_join_buff_on_device (int32_t *buff, const int64_t hash_entry_count, const int32_t invalid_slot_val) |
__global__ void | set_valid_pos_flag (int32_t *pos_buff, const int32_t *count_buff, const int64_t entry_count) |
__global__ void | set_valid_pos (int32_t *pos_buff, int32_t *count_buff, const int64_t entry_count) |
template<typename COUNT_MATCHES_FUNCTOR , typename FILL_ROW_IDS_FUNCTOR > | |
void | fill_one_to_many_hash_table_on_device_impl (int32_t *buff, const int64_t hash_entry_count, const int32_t invalid_slot_val, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info, COUNT_MATCHES_FUNCTOR count_matches_func, FILL_ROW_IDS_FUNCTOR fill_row_ids_func) |
void | fill_one_to_many_hash_table_on_device (int32_t *buff, const HashEntryInfo hash_entry_info, const int32_t invalid_slot_val, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info) |
void | fill_one_to_many_hash_table_on_device_bucketized (int32_t *buff, const HashEntryInfo hash_entry_info, const int32_t invalid_slot_val, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info) |
void | fill_one_to_many_hash_table_on_device_sharded (int32_t *buff, const HashEntryInfo hash_entry_info, const int32_t invalid_slot_val, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info, const ShardInfo &shard_info) |
template<typename T , typename KEY_HANDLER > | |
void | fill_one_to_many_baseline_hash_table_on_device (int32_t *buff, const T *composite_key_dict, const int64_t hash_entry_count, const int32_t invalid_slot_val, const KEY_HANDLER *key_handler, const size_t num_elems) |
template<typename T > | |
__global__ void | init_baseline_hash_join_buff_wrapper (int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val) |
void | init_baseline_hash_join_buff_on_device_32 (int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val) |
void | init_baseline_hash_join_buff_on_device_64 (int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val) |
template<typename T , typename KEY_HANDLER > | |
__global__ void | fill_baseline_hash_join_buff_wrapper (int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, int *err, const KEY_HANDLER *key_handler, const int64_t num_elems) |
void | fill_baseline_hash_join_buff_on_device_32 (int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const GenericKeyHandler *key_handler, const int64_t num_elems) |
void | fill_baseline_hash_join_buff_on_device_64 (int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const GenericKeyHandler *key_handler, const int64_t num_elems) |
void | overlaps_fill_baseline_hash_join_buff_on_device_64 (int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const OverlapsKeyHandler *key_handler, const int64_t num_elems) |
void | fill_one_to_many_baseline_hash_table_on_device_32 (int32_t *buff, const int32_t *composite_key_dict, const int64_t hash_entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const GenericKeyHandler *key_handler, const int64_t num_elems) |
void | fill_one_to_many_baseline_hash_table_on_device_64 (int32_t *buff, const int64_t *composite_key_dict, const int64_t hash_entry_count, const int32_t invalid_slot_val, const GenericKeyHandler *key_handler, const int64_t num_elems) |
void | overlaps_fill_one_to_many_baseline_hash_table_on_device_64 (int32_t *buff, const int64_t *composite_key_dict, const int64_t hash_entry_count, const int32_t invalid_slot_val, const OverlapsKeyHandler *key_handler, const int64_t num_elems) |
void | approximate_distinct_tuples_on_device_overlaps (uint8_t *hll_buffer, const uint32_t b, int32_t *row_counts_buffer, const OverlapsKeyHandler *key_handler, const int64_t num_elems) |
void | approximate_distinct_tuples_on_device (uint8_t *hll_buffer, const uint32_t b, const GenericKeyHandler *key_handler, const int64_t num_elems) |
void | compute_bucket_sizes_on_device (double *bucket_sizes_buffer, const JoinColumn *join_column, const JoinColumnTypeInfo *type_info, const double bucket_sz_threshold) |
#define checkCudaErrors | ( | err | ) | CHECK_EQ(err, cudaSuccess) |
Definition at line 21 of file HashJoinRuntimeGpu.cu.
Referenced by cuda_kernel_launch_wrapper().
#define VALID_POS_FLAG 0 |
Definition at line 167 of file HashJoinRuntimeGpu.cu.
Referenced by set_valid_pos(), and set_valid_pos_flag().
void approximate_distinct_tuples_on_device | ( | uint8_t * | hll_buffer, |
const uint32_t | b, | ||
const GenericKeyHandler * | key_handler, | ||
const int64_t | num_elems | ||
) |
Definition at line 550 of file HashJoinRuntimeGpu.cu.
References cuda_kernel_launch_wrapper().
Referenced by BaselineJoinHashTable::approximateTupleCount().
void approximate_distinct_tuples_on_device_overlaps | ( | uint8_t * | hll_buffer, |
const uint32_t | b, | ||
int32_t * | row_counts_buffer, | ||
const OverlapsKeyHandler * | key_handler, | ||
const int64_t | num_elems | ||
) |
Definition at line 533 of file HashJoinRuntimeGpu.cu.
References cuda_kernel_launch_wrapper(), and inclusive_scan().
Referenced by OverlapsJoinHashTable::approximateTupleCount().
void compute_bucket_sizes_on_device | ( | double * | bucket_sizes_buffer, |
const JoinColumn * | join_column, | ||
const JoinColumnTypeInfo * | type_info, | ||
const double | bucket_sz_threshold | ||
) |
Definition at line 562 of file HashJoinRuntimeGpu.cu.
References cuda_kernel_launch_wrapper().
Referenced by OverlapsJoinHashTable::computeBucketSizes().
void cuda_kernel_launch_wrapper | ( | F | func, |
ARGS &&... | args | ||
) |
Definition at line 24 of file HashJoinRuntimeGpu.cu.
References run_benchmark_import::args, and checkCudaErrors.
Referenced by approximate_distinct_tuples_on_device(), approximate_distinct_tuples_on_device_overlaps(), compute_bucket_sizes_on_device(), fill_baseline_hash_join_buff_on_device_32(), fill_baseline_hash_join_buff_on_device_64(), fill_hash_join_buff_on_device(), fill_hash_join_buff_on_device_bucketized(), fill_hash_join_buff_on_device_sharded(), fill_hash_join_buff_on_device_sharded_bucketized(), fill_one_to_many_baseline_hash_table_on_device(), fill_one_to_many_hash_table_on_device(), fill_one_to_many_hash_table_on_device_bucketized(), fill_one_to_many_hash_table_on_device_impl(), fill_one_to_many_hash_table_on_device_sharded(), init_baseline_hash_join_buff_on_device_32(), init_baseline_hash_join_buff_on_device_64(), init_hash_join_buff_on_device(), and overlaps_fill_baseline_hash_join_buff_on_device_64().
void fill_baseline_hash_join_buff_on_device_32 | ( | int8_t * | hash_buff, |
const int64_t | entry_count, | ||
const int32_t | invalid_slot_val, | ||
const size_t | key_component_count, | ||
const bool | with_val_slot, | ||
int * | dev_err_buff, | ||
const GenericKeyHandler * | key_handler, | ||
const int64_t | num_elems | ||
) |
Definition at line 426 of file HashJoinRuntimeGpu.cu.
References cuda_kernel_launch_wrapper().
Referenced by fill_baseline_hash_join_buff_on_device().
void fill_baseline_hash_join_buff_on_device_64 | ( | int8_t * | hash_buff, |
const int64_t | entry_count, | ||
const int32_t | invalid_slot_val, | ||
const size_t | key_component_count, | ||
const bool | with_val_slot, | ||
int * | dev_err_buff, | ||
const GenericKeyHandler * | key_handler, | ||
const int64_t | num_elems | ||
) |
Definition at line 446 of file HashJoinRuntimeGpu.cu.
References cuda_kernel_launch_wrapper().
__global__ void fill_baseline_hash_join_buff_wrapper | ( | int8_t * | hash_buff, |
const int64_t | entry_count, | ||
const int32_t | invalid_slot_val, | ||
const size_t | key_component_count, | ||
const bool | with_val_slot, | ||
int * | err, | ||
const KEY_HANDLER * | key_handler, | ||
const int64_t | num_elems | ||
) |
Definition at line 406 of file HashJoinRuntimeGpu.cu.
References fill_baseline_hash_join_buff(), SUFFIX, and omnisci.dtypes::T.
__global__ void fill_hash_join_buff_bucketized_wrapper | ( | int32_t * | buff, |
const int32_t | invalid_slot_val, | ||
const JoinColumn | join_column, | ||
const JoinColumnTypeInfo | type_info, | ||
int * | err, | ||
const int64_t | bucket_normalization | ||
) |
Definition at line 42 of file HashJoinRuntimeGpu.cu.
References fill_hash_join_buff_bucketized(), and SUFFIX.
Referenced by fill_hash_join_buff_on_device_bucketized().
void fill_hash_join_buff_on_device | ( | int32_t * | buff, |
const int32_t | invalid_slot_val, | ||
int * | dev_err_buff, | ||
const JoinColumn | join_column, | ||
const JoinColumnTypeInfo | type_info | ||
) |
Definition at line 76 of file HashJoinRuntimeGpu.cu.
References cuda_kernel_launch_wrapper(), and fill_hash_join_buff_wrapper().
void fill_hash_join_buff_on_device_bucketized | ( | int32_t * | buff, |
const int32_t | invalid_slot_val, | ||
int * | dev_err_buff, | ||
const JoinColumn | join_column, | ||
const JoinColumnTypeInfo | type_info, | ||
const int64_t | bucket_normalization | ||
) |
Definition at line 61 of file HashJoinRuntimeGpu.cu.
References cuda_kernel_launch_wrapper(), and fill_hash_join_buff_bucketized_wrapper().
void fill_hash_join_buff_on_device_sharded | ( | int32_t * | buff, |
const int32_t | invalid_slot_val, | ||
int * | dev_err_buff, | ||
const JoinColumn | join_column, | ||
const JoinColumnTypeInfo | type_info, | ||
const ShardInfo | shard_info | ||
) |
Definition at line 139 of file HashJoinRuntimeGpu.cu.
References cuda_kernel_launch_wrapper(), and fill_hash_join_buff_wrapper_sharded().
void fill_hash_join_buff_on_device_sharded_bucketized | ( | int32_t * | buff, |
const int32_t | invalid_slot_val, | ||
int * | dev_err_buff, | ||
const JoinColumn | join_column, | ||
const JoinColumnTypeInfo | type_info, | ||
const ShardInfo | shard_info, | ||
const int64_t | bucket_normalization | ||
) |
Definition at line 121 of file HashJoinRuntimeGpu.cu.
References cuda_kernel_launch_wrapper(), and fill_hash_join_buff_wrapper_sharded_bucketized().
__global__ void fill_hash_join_buff_wrapper | ( | int32_t * | buff, |
const int32_t | invalid_slot_val, | ||
const JoinColumn | join_column, | ||
const JoinColumnTypeInfo | type_info, | ||
int * | err | ||
) |
Definition at line 32 of file HashJoinRuntimeGpu.cu.
References fill_hash_join_buff(), and SUFFIX.
Referenced by fill_hash_join_buff_on_device().
__global__ void fill_hash_join_buff_wrapper_sharded | ( | int32_t * | buff, |
const int32_t | invalid_slot_val, | ||
const JoinColumn | join_column, | ||
const JoinColumnTypeInfo | type_info, | ||
const ShardInfo | shard_info, | ||
int * | err | ||
) |
Definition at line 110 of file HashJoinRuntimeGpu.cu.
References fill_hash_join_buff_sharded(), and SUFFIX.
Referenced by fill_hash_join_buff_on_device_sharded().
__global__ void fill_hash_join_buff_wrapper_sharded_bucketized | ( | int32_t * | buff, |
const int32_t | invalid_slot_val, | ||
const JoinColumn | join_column, | ||
const JoinColumnTypeInfo | type_info, | ||
const ShardInfo | shard_info, | ||
int * | err, | ||
const int64_t | bucket_normalization | ||
) |
Definition at line 89 of file HashJoinRuntimeGpu.cu.
References fill_hash_join_buff_sharded_bucketized(), and SUFFIX.
Referenced by fill_hash_join_buff_on_device_sharded_bucketized().
void fill_one_to_many_baseline_hash_table_on_device | ( | int32_t * | buff, |
const T * | composite_key_dict, | ||
const int64_t | hash_entry_count, | ||
const int32_t | invalid_slot_val, | ||
const KEY_HANDLER * | key_handler, | ||
const size_t | num_elems | ||
) |
Definition at line 331 of file HashJoinRuntimeGpu.cu.
References cuda_kernel_launch_wrapper(), inclusive_scan(), set_valid_pos(), and set_valid_pos_flag().
void fill_one_to_many_baseline_hash_table_on_device_32 | ( | int32_t * | buff, |
const int32_t * | composite_key_dict, | ||
const int64_t | hash_entry_count, | ||
const int32_t | invalid_slot_val, | ||
const size_t | key_component_count, | ||
const GenericKeyHandler * | key_handler, | ||
const int64_t | num_elems | ||
) |
Definition at line 487 of file HashJoinRuntimeGpu.cu.
Referenced by fill_one_to_many_baseline_hash_table_on_device().
void fill_one_to_many_baseline_hash_table_on_device_64 | ( | int32_t * | buff, |
const int64_t * | composite_key_dict, | ||
const int64_t | hash_entry_count, | ||
const int32_t | invalid_slot_val, | ||
const GenericKeyHandler * | key_handler, | ||
const int64_t | num_elems | ||
) |
Definition at line 503 of file HashJoinRuntimeGpu.cu.
void fill_one_to_many_hash_table_on_device | ( | int32_t * | buff, |
const HashEntryInfo | hash_entry_info, | ||
const int32_t | invalid_slot_val, | ||
const JoinColumn & | join_column, | ||
const JoinColumnTypeInfo & | type_info | ||
) |
Definition at line 217 of file HashJoinRuntimeGpu.cu.
References count_matches(), cuda_kernel_launch_wrapper(), fill_one_to_many_hash_table_on_device_impl(), fill_row_ids(), HashEntryInfo::hash_entry_count, and SUFFIX.
void fill_one_to_many_hash_table_on_device_bucketized | ( | int32_t * | buff, |
const HashEntryInfo | hash_entry_info, | ||
const int32_t | invalid_slot_val, | ||
const JoinColumn & | join_column, | ||
const JoinColumnTypeInfo & | type_info | ||
) |
Definition at line 251 of file HashJoinRuntimeGpu.cu.
References HashEntryInfo::bucket_normalization, count_matches_bucketized(), cuda_kernel_launch_wrapper(), fill_one_to_many_hash_table_on_device_impl(), fill_row_ids_bucketized(), HashEntryInfo::getNormalizedHashEntryCount(), and SUFFIX.
void fill_one_to_many_hash_table_on_device_impl | ( | int32_t * | buff, |
const int64_t | hash_entry_count, | ||
const int32_t | invalid_slot_val, | ||
const JoinColumn & | join_column, | ||
const JoinColumnTypeInfo & | type_info, | ||
COUNT_MATCHES_FUNCTOR | count_matches_func, | ||
FILL_ROW_IDS_FUNCTOR | fill_row_ids_func | ||
) |
Definition at line 194 of file HashJoinRuntimeGpu.cu.
References cuda_kernel_launch_wrapper(), inclusive_scan(), set_valid_pos(), and set_valid_pos_flag().
Referenced by fill_one_to_many_hash_table_on_device(), and fill_one_to_many_hash_table_on_device_bucketized().
void fill_one_to_many_hash_table_on_device_sharded | ( | int32_t * | buff, |
const HashEntryInfo | hash_entry_info, | ||
const int32_t | invalid_slot_val, | ||
const JoinColumn & | join_column, | ||
const JoinColumnTypeInfo & | type_info, | ||
const ShardInfo & | shard_info | ||
) |
Definition at line 297 of file HashJoinRuntimeGpu.cu.
References count_matches_sharded(), cuda_kernel_launch_wrapper(), fill_row_ids_sharded(), HashEntryInfo::hash_entry_count, inclusive_scan(), set_valid_pos(), set_valid_pos_flag(), and SUFFIX.
void init_baseline_hash_join_buff_on_device_32 | ( | int8_t * | hash_join_buff, |
const int64_t | entry_count, | ||
const size_t | key_component_count, | ||
const bool | with_val_slot, | ||
const int32_t | invalid_slot_val | ||
) |
Definition at line 379 of file HashJoinRuntimeGpu.cu.
References cuda_kernel_launch_wrapper().
Referenced by BaselineJoinHashTableBuilder::initHashTableOnGpu().
void init_baseline_hash_join_buff_on_device_64 | ( | int8_t * | hash_join_buff, |
const int64_t | entry_count, | ||
const size_t | key_component_count, | ||
const bool | with_val_slot, | ||
const int32_t | invalid_slot_val | ||
) |
Definition at line 392 of file HashJoinRuntimeGpu.cu.
References cuda_kernel_launch_wrapper().
Referenced by BaselineJoinHashTableBuilder::initHashTableOnGpu().
__global__ void init_baseline_hash_join_buff_wrapper | ( | int8_t * | hash_join_buff, |
const int64_t | entry_count, | ||
const size_t | key_component_count, | ||
const bool | with_val_slot, | ||
const int32_t | invalid_slot_val | ||
) |
Definition at line 365 of file HashJoinRuntimeGpu.cu.
References init_baseline_hash_join_buff(), SUFFIX, and omnisci.dtypes::T.
void init_hash_join_buff_on_device | ( | int32_t * | buff, |
const int64_t | hash_entry_count, | ||
const int32_t | invalid_slot_val | ||
) |
Definition at line 160 of file HashJoinRuntimeGpu.cu.
References cuda_kernel_launch_wrapper(), and init_hash_join_buff_wrapper().
Referenced by BaselineJoinHashTableBuilder::initHashTableOnGpu().
__global__ void init_hash_join_buff_wrapper | ( | int32_t * | buff, |
const int64_t | hash_entry_count, | ||
const int32_t | invalid_slot_val | ||
) |
Definition at line 154 of file HashJoinRuntimeGpu.cu.
References init_hash_join_buff(), and SUFFIX.
Referenced by init_hash_join_buff_on_device().
void overlaps_fill_baseline_hash_join_buff_on_device_64 | ( | int8_t * | hash_buff, |
const int64_t | entry_count, | ||
const int32_t | invalid_slot_val, | ||
const size_t | key_component_count, | ||
const bool | with_val_slot, | ||
int * | dev_err_buff, | ||
const OverlapsKeyHandler * | key_handler, | ||
const int64_t | num_elems | ||
) |
Definition at line 466 of file HashJoinRuntimeGpu.cu.
References cuda_kernel_launch_wrapper().
void overlaps_fill_one_to_many_baseline_hash_table_on_device_64 | ( | int32_t * | buff, |
const int64_t * | composite_key_dict, | ||
const int64_t | hash_entry_count, | ||
const int32_t | invalid_slot_val, | ||
const OverlapsKeyHandler * | key_handler, | ||
const int64_t | num_elems | ||
) |
Definition at line 518 of file HashJoinRuntimeGpu.cu.
__global__ void set_valid_pos | ( | int32_t * | pos_buff, |
int32_t * | count_buff, | ||
const int64_t | entry_count | ||
) |
Definition at line 181 of file HashJoinRuntimeGpu.cu.
References VALID_POS_FLAG.
Referenced by fill_one_to_many_baseline_hash_table_on_device(), fill_one_to_many_hash_table_on_device_impl(), and fill_one_to_many_hash_table_on_device_sharded().
__global__ void set_valid_pos_flag | ( | int32_t * | pos_buff, |
const int32_t * | count_buff, | ||
const int64_t | entry_count | ||
) |
Definition at line 169 of file HashJoinRuntimeGpu.cu.
References VALID_POS_FLAG.
Referenced by fill_one_to_many_baseline_hash_table_on_device(), fill_one_to_many_hash_table_on_device_impl(), and fill_one_to_many_hash_table_on_device_sharded().