#include <cstddef>
#include <cstdint>
#include <vector>
#include "../../../Shared/SqlTypesLayout.h"
#include "../../../Shared/sqltypes.h"
#include "../../RuntimeFunctions.h"
#include "../../../Shared/funcannotations.h"

Include dependency graph for HashJoinRuntime.h:

This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Classes
struct	BucketizedHashEntryInfo

struct	JoinChunk

struct	JoinColumn

struct	JoinColumnTypeInfo

struct	JoinBucketInfo

struct	ShardInfo

struct	OneToOnePerfectJoinHashTableFillFuncArgs

struct	OneToManyPerfectJoinHashTableFillFuncArgs

Enumerations
enum	ColumnType { SmallDate = 0, Signed = 1, Unsigned = 2, Double = 3 }

Functions
void	init_hash_join_buff (int32_t *buff, const int64_t entry_count, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)

void	init_hash_join_buff_on_device (int32_t *buff, const int64_t entry_count, const int32_t invalid_slot_val)

void	init_baseline_hash_join_buff_32 (int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)

void	init_baseline_hash_join_buff_64 (int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)

void	init_baseline_hash_join_buff_on_device_32 (int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val)

void	init_baseline_hash_join_buff_on_device_64 (int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val)

ColumnType	get_join_column_type_kind (const SQLTypeInfo &ti)

int	fill_hash_join_buff_bucketized (OneToOnePerfectJoinHashTableFillFuncArgs const args, int32_t const cpu_thread_idx, int32_t const cpu_thread_count)

int	fill_hash_join_buff (OneToOnePerfectJoinHashTableFillFuncArgs const args, int32_t const cpu_thread_idx, int32_t const cpu_thread_count)

int	fill_hash_join_buff_bitwise_eq (OneToOnePerfectJoinHashTableFillFuncArgs const args, int32_t const cpu_thread_idx, int32_t const cpu_thread_count)

void	fill_hash_join_buff_on_device (OneToOnePerfectJoinHashTableFillFuncArgs const args)

void	fill_hash_join_buff_on_device_bucketized (OneToOnePerfectJoinHashTableFillFuncArgs const args)

void	fill_hash_join_buff_on_device_sharded (OneToOnePerfectJoinHashTableFillFuncArgs const args, ShardInfo const shard_info)

void	fill_hash_join_buff_on_device_sharded_bucketized (OneToOnePerfectJoinHashTableFillFuncArgs const args, ShardInfo const shard_info)

void	fill_one_to_many_hash_table (OneToManyPerfectJoinHashTableFillFuncArgs const args, int32_t const cpu_thread_count)

void	fill_one_to_many_hash_table_bucketized (OneToManyPerfectJoinHashTableFillFuncArgs const args, int32_t const cpu_thread_count)

void	fill_one_to_many_hash_table_on_device (OneToManyPerfectJoinHashTableFillFuncArgs const args)

void	fill_one_to_many_hash_table_on_device_bucketized (OneToManyPerfectJoinHashTableFillFuncArgs const args)

void	fill_one_to_many_hash_table_on_device_sharded (OneToManyPerfectJoinHashTableFillFuncArgs const args, ShardInfo const shard_info)

int	fill_baseline_hash_join_buff_32 (int8_t hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const bool for_semi_join, const size_t key_component_count, const bool with_val_slot, const GenericKeyHandler key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)

int	bbox_intersect_fill_baseline_hash_join_buff_32 (int8_t hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const BoundingBoxIntersectKeyHandler key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)

int	range_fill_baseline_hash_join_buff_32 (int8_t hash_buff, const size_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const RangeKeyHandler key_handler, const size_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)

int	fill_baseline_hash_join_buff_64 (int8_t hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const bool for_semi_join, const size_t key_component_count, const bool with_val_slot, const GenericKeyHandler key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)

int	bbox_intersect_fill_baseline_hash_join_buff_64 (int8_t hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const BoundingBoxIntersectKeyHandler key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)

int	range_fill_baseline_hash_join_buff_64 (int8_t hash_buff, const size_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const RangeKeyHandler key_handler, const size_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)

void	fill_baseline_hash_join_buff_on_device_32 (int8_t hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const bool for_semi_join, const size_t key_component_count, const bool with_val_slot, int dev_err_buff, const GenericKeyHandler *key_handler, const int64_t num_elems)

void	fill_baseline_hash_join_buff_on_device_64 (int8_t hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const bool for_semi_join, const size_t key_component_count, const bool with_val_slot, int dev_err_buff, const GenericKeyHandler *key_handler, const int64_t num_elems)

void	bbox_intersect_fill_baseline_hash_join_buff_on_device_64 (int8_t hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, int dev_err_buff, const BoundingBoxIntersectKeyHandler *key_handler, const int64_t num_elems)

void	range_fill_baseline_hash_join_buff_on_device_64 (int8_t hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, int dev_err_buff, const RangeKeyHandler *key_handler, const size_t num_elems)

void	fill_one_to_many_baseline_hash_table_32 (int32_t buff, const int32_t composite_key_dict, const int64_t hash_entry_count, const size_t key_component_count, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_bucket_info, const std::vector< const int32_t * > &sd_inner_to_outer_translation_maps, const std::vector< int32_t > &sd_min_inner_elems, const int32_t cpu_thread_count, const bool is_range_join=false, const bool is_geo_compressed=false, const bool for_window_framing=false)

void	fill_one_to_many_baseline_hash_table_64 (int32_t buff, const int64_t composite_key_dict, const int64_t hash_entry_count, const size_t key_component_count, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_bucket_info, const std::vector< const int32_t * > &sd_inner_to_outer_translation_maps, const std::vector< int32_t > &sd_min_inner_elems, const int32_t cpu_thread_count, const bool is_range_join=false, const bool is_geo_compressed=false, const bool for_window_framing=false)

void	fill_one_to_many_baseline_hash_table_on_device_32 (int32_t buff, const int32_t composite_key_dict, const int64_t hash_entry_count, const size_t key_component_count, const GenericKeyHandler *key_handler, const int64_t num_elems, const bool for_window_framing)

void	fill_one_to_many_baseline_hash_table_on_device_64 (int32_t buff, const int64_t composite_key_dict, const int64_t hash_entry_count, const GenericKeyHandler *key_handler, const int64_t num_elems, const bool for_window_framing)

void	bbox_intersect_fill_one_to_many_baseline_hash_table_on_device_64 (int32_t buff, const int64_t composite_key_dict, const int64_t hash_entry_count, const BoundingBoxIntersectKeyHandler *key_handler, const int64_t num_elems)

void	range_fill_one_to_many_baseline_hash_table_on_device_64 (int32_t buff, const int64_t composite_key_dict, const size_t hash_entry_count, const RangeKeyHandler *key_handler, const size_t num_elems)

void	approximate_distinct_tuples (uint8_t *hll_buffer_all_cpus, const uint32_t b, const size_t padded_size_bytes, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const int thread_count)

void	approximate_distinct_tuples_bbox_intersect (uint8_t *hll_buffer_all_cpus, std::vector< int32_t > &row_counts, const uint32_t b, const size_t padded_size_bytes, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_buckets_per_key, const int thread_count)

void	approximate_distinct_tuples_range (uint8_t *hll_buffer_all_cpus, std::vector< int32_t > &row_counts, const uint32_t b, const size_t padded_size_bytes, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_buckets_per_key, const bool is_compressed, const int thread_count)

void	approximate_distinct_tuples_on_device (uint8_t hll_buffer, const uint32_t b, const GenericKeyHandler key_handler, const int64_t num_elems)

void	approximate_distinct_tuples_on_device_bbox_intersect (uint8_t hll_buffer, const uint32_t b, int32_t row_counts_buffer, const BoundingBoxIntersectKeyHandler *key_handler, const int64_t num_elems)

void	compute_bucket_sizes_on_cpu (std::vector< double > &bucket_sizes_for_dimension, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info, const std::vector< double > &bucket_size_thresholds, const int thread_count)

void	approximate_distinct_tuples_on_device_range (uint8_t hll_buffer, const uint32_t b, int32_t row_counts_buffer, const RangeKeyHandler *key_handler, const size_t num_elems, const size_t block_size_x, const size_t grid_size_x)

void	compute_bucket_sizes_on_device (double bucket_sizes_buffer, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const double bucket_size_thresholds)

Variables
const size_t	g_maximum_conditions_to_coalesce {8}

Enumeration Type Documentation

enum ColumnType

Enumerator
SmallDate
Signed
Unsigned
Double

Definition at line 120 of file HashJoinRuntime.h.

120 { SmallDate = 0, Signed = 1, Unsigned = 2, Double = 3 };

Signed

Definition: HashJoinRuntime.h:120

Double

Definition: HashJoinRuntime.h:120

Unsigned

Definition: HashJoinRuntime.h:120

SmallDate

Definition: HashJoinRuntime.h:120

Function Documentation

void approximate_distinct_tuples	(	uint8_t *	hll_buffer_all_cpus,
		const uint32_t	b,
		const size_t	padded_size_bytes,
		const std::vector< JoinColumn > &	join_column_per_key,
		const std::vector< JoinColumnTypeInfo > &	type_info_per_key,
		const int	thread_count
	)

Definition at line 2247 of file HashJoinRuntime.cpp.

References approximate_distinct_tuples_impl(), threading_serial::async(), CHECK, and CHECK_EQ.

Referenced by BaselineJoinHashTable::approximateTupleCount().

                                                          {
   CHECK_EQ(join_column_per_key.size(), type_info_per_key.size());
   CHECK(!join_column_per_key.empty());
 
   std::vector<std::future<void>> approx_distinct_threads;
   for (int thread_idx = 0; thread_idx < thread_count; ++thread_idx) {
     approx_distinct_threads.push_back(std::async(
         std::launch::async,
         [&join_column_per_key,
          &type_info_per_key,
          b,
          hll_buffer_all_cpus,
          padded_size_bytes,
          thread_idx,
          thread_count] {
           auto hll_buffer = hll_buffer_all_cpus + thread_idx * padded_size_bytes;
 
           const auto key_handler = GenericKeyHandler(join_column_per_key.size(),
                                                      false,
                                                      &join_column_per_key[0],
                                                      &type_info_per_key[0],
                                                      nullptr,
                                                      nullptr);
           approximate_distinct_tuples_impl(hll_buffer,
                                            nullptr,
                                            b,
                                            join_column_per_key[0].num_elems,
                                            &key_handler,
                                            thread_idx,
                                            thread_count);
         }));
   }
   for (auto& child : approx_distinct_threads) {
     child.get();
   }
 }

Here is the call graph for this function:

Here is the caller graph for this function:

void approximate_distinct_tuples_bbox_intersect	(	uint8_t *	hll_buffer_all_cpus,
		std::vector< int32_t > &	row_counts,
		const uint32_t	b,
		const size_t	padded_size_bytes,
		const std::vector< JoinColumn > &	join_column_per_key,
		const std::vector< JoinColumnTypeInfo > &	type_info_per_key,
		const std::vector< JoinBucketInfo > &	join_buckets_per_key,
		const int	thread_count
	)

Definition at line 2289 of file HashJoinRuntime.cpp.

References approximate_distinct_tuples_impl(), threading_serial::async(), CHECK, CHECK_EQ, and inclusive_scan().

Referenced by BoundingBoxIntersectJoinHashTable::approximateTupleCount().

                             {
   CHECK_EQ(join_column_per_key.size(), join_buckets_per_key.size());
   CHECK_EQ(join_column_per_key.size(), type_info_per_key.size());
   CHECK(!join_column_per_key.empty());
 
   std::vector<std::future<void>> approx_distinct_threads;
   for (int thread_idx = 0; thread_idx < thread_count; ++thread_idx) {
     approx_distinct_threads.push_back(std::async(
         std::launch::async,
         [&join_column_per_key,
          &join_buckets_per_key,
          &row_counts,
          b,
          hll_buffer_all_cpus,
          padded_size_bytes,
          thread_idx,
          thread_count] {
           auto hll_buffer = hll_buffer_all_cpus + thread_idx * padded_size_bytes;
 
           const auto key_handler = BoundingBoxIntersectKeyHandler(
               join_buckets_per_key[0].inverse_bucket_sizes_for_dimension.size(),
               &join_column_per_key[0],
               join_buckets_per_key[0].inverse_bucket_sizes_for_dimension.data());
           approximate_distinct_tuples_impl(hll_buffer,
                                            row_counts.data(),
                                            b,
                                            join_column_per_key[0].num_elems,
                                            &key_handler,
                                            thread_idx,
                                            thread_count);
         }));
   }
   for (auto& child : approx_distinct_threads) {
     child.get();
   }
 
   ::inclusive_scan(
       row_counts.begin(), row_counts.end(), row_counts.begin(), thread_count);
 }

Here is the call graph for this function:

Here is the caller graph for this function:

void approximate_distinct_tuples_on_device	(	uint8_t *	hll_buffer,
		const uint32_t	b,
		const GenericKeyHandler *	key_handler,
		const int64_t	num_elems
	)

Definition at line 537 of file HashJoinRuntimeGpu.cu.

References cuda_kernel_launch_wrapper().

Referenced by BaselineJoinHashTable::approximateTupleCount().

                                                                     {
   cuda_kernel_launch_wrapper(approximate_distinct_tuples_impl_gpu<GenericKeyHandler>,
                              hll_buffer,
                              nullptr,
                              b,
                              num_elems,
                              key_handler);
 }

Here is the call graph for this function:

Here is the caller graph for this function:

void approximate_distinct_tuples_on_device_bbox_intersect	(	uint8_t *	hll_buffer,
		const uint32_t	b,
		int32_t *	row_counts_buffer,
		const BoundingBoxIntersectKeyHandler *	key_handler,
		const int64_t	num_elems
	)

Definition at line 501 of file HashJoinRuntimeGpu.cu.

References cuda_kernel_launch_wrapper(), and inclusive_scan().

Referenced by BoundingBoxIntersectJoinHashTable::approximateTupleCount().

                              {
   cuda_kernel_launch_wrapper(
       approximate_distinct_tuples_impl_gpu<BoundingBoxIntersectKeyHandler>,
       hll_buffer,
       row_counts_buffer,
       b,
       num_elems,
       key_handler);
 
   auto row_counts_buffer_ptr = thrust::device_pointer_cast(row_counts_buffer);
   thrust::inclusive_scan(
       row_counts_buffer_ptr, row_counts_buffer_ptr + num_elems, row_counts_buffer_ptr);
 }

Here is the call graph for this function:

Here is the caller graph for this function:

void approximate_distinct_tuples_on_device_range	(	uint8_t *	hll_buffer,
		const uint32_t	b,
		int32_t *	row_counts_buffer,
		const RangeKeyHandler *	key_handler,
		const size_t	num_elems,
		const size_t	block_size_x,
		const size_t	grid_size_x
	)

Definition at line 520 of file HashJoinRuntimeGpu.cu.

References checkCudaErrors, getQueryEngineCudaStream(), and inclusive_scan().

Referenced by RangeJoinHashTable::approximateTupleCount().

                                                                            {
   auto qe_cuda_stream = getQueryEngineCudaStream();
   approximate_distinct_tuples_impl_gpu<<<grid_size_x, block_size_x, 0, qe_cuda_stream>>>(
       hll_buffer, row_counts_buffer, b, num_elems, key_handler);
   checkCudaErrors(cudaStreamSynchronize(qe_cuda_stream));
 
   auto row_counts_buffer_ptr = thrust::device_pointer_cast(row_counts_buffer);
   thrust::inclusive_scan(
       row_counts_buffer_ptr, row_counts_buffer_ptr + num_elems, row_counts_buffer_ptr);
 }

Here is the call graph for this function:

Here is the caller graph for this function:

void approximate_distinct_tuples_range	(	uint8_t *	hll_buffer_all_cpus,
		std::vector< int32_t > &	row_counts,
		const uint32_t	b,
		const size_t	padded_size_bytes,
		const std::vector< JoinColumn > &	join_column_per_key,
		const std::vector< JoinColumnTypeInfo > &	type_info_per_key,
		const std::vector< JoinBucketInfo > &	join_buckets_per_key,
		const bool	is_compressed,
		const int	thread_count
	)

Definition at line 2337 of file HashJoinRuntime.cpp.

References approximate_distinct_tuples_impl(), threading_serial::async(), CHECK, CHECK_EQ, and inclusive_scan().

Referenced by RangeJoinHashTable::approximateTupleCount().

                             {
   CHECK_EQ(join_column_per_key.size(), join_buckets_per_key.size());
   CHECK_EQ(join_column_per_key.size(), type_info_per_key.size());
   CHECK(!join_column_per_key.empty());
 
   std::vector<std::future<void>> approx_distinct_threads;
   for (int thread_idx = 0; thread_idx < thread_count; ++thread_idx) {
     approx_distinct_threads.push_back(std::async(
         std::launch::async,
         [&join_column_per_key,
          &join_buckets_per_key,
          &row_counts,
          b,
          hll_buffer_all_cpus,
          padded_size_bytes,
          thread_idx,
          is_compressed,
          thread_count] {
           auto hll_buffer = hll_buffer_all_cpus + thread_idx * padded_size_bytes;
 
           const auto key_handler = RangeKeyHandler(
               is_compressed,
               join_buckets_per_key[0].inverse_bucket_sizes_for_dimension.size(),
               &join_column_per_key[0],
               join_buckets_per_key[0].inverse_bucket_sizes_for_dimension.data());
           approximate_distinct_tuples_impl(hll_buffer,
                                            row_counts.data(),
                                            b,
                                            join_column_per_key[0].num_elems,
                                            &key_handler,
                                            thread_idx,
                                            thread_count);
         }));
   }
   for (auto& child : approx_distinct_threads) {
     child.get();
   }
 
   ::inclusive_scan(
       row_counts.begin(), row_counts.end(), row_counts.begin(), thread_count);
 }

Here is the call graph for this function:

Here is the caller graph for this function:

int bbox_intersect_fill_baseline_hash_join_buff_32	(	int8_t *	hash_buff,
		const int64_t	entry_count,
		const int32_t	invalid_slot_val,
		const size_t	key_component_count,
		const bool	with_val_slot,
		const BoundingBoxIntersectKeyHandler *	key_handler,
		const int64_t	num_elems,
		const int32_t	cpu_thread_idx,
		const int32_t	cpu_thread_count
	)

Definition at line 1866 of file HashJoinRuntime.cpp.

Referenced by fill_baseline_hash_join_buff().

                                     {
   return fill_baseline_hash_join_buff<int32_t>(hash_buff,
                                                entry_count,
                                                invalid_slot_val,
                                                false,
                                                key_component_count,
                                                with_val_slot,
                                                key_handler,
                                                num_elems,
                                                cpu_thread_idx,
                                                cpu_thread_count);
 }

Here is the caller graph for this function:

int bbox_intersect_fill_baseline_hash_join_buff_64	(	int8_t *	hash_buff,
		const int64_t	entry_count,
		const int32_t	invalid_slot_val,
		const size_t	key_component_count,
		const bool	with_val_slot,
		const BoundingBoxIntersectKeyHandler *	key_handler,
		const int64_t	num_elems,
		const int32_t	cpu_thread_idx,
		const int32_t	cpu_thread_count
	)

Definition at line 1931 of file HashJoinRuntime.cpp.

                                     {
   return fill_baseline_hash_join_buff<int64_t>(hash_buff,
                                                entry_count,
                                                invalid_slot_val,
                                                false,
                                                key_component_count,
                                                with_val_slot,
                                                key_handler,
                                                num_elems,
                                                cpu_thread_idx,
                                                cpu_thread_count);
 }

void bbox_intersect_fill_baseline_hash_join_buff_on_device_64	(	int8_t *	hash_buff,
		const int64_t	entry_count,
		const int32_t	invalid_slot_val,
		const size_t	key_component_count,
		const bool	with_val_slot,
		int *	dev_err_buff,
		const BoundingBoxIntersectKeyHandler *	key_handler,
		const int64_t	num_elems
	)

Definition at line 406 of file HashJoinRuntimeGpu.cu.

References cuda_kernel_launch_wrapper(), and fill_baseline_hash_join_buff_wrapper().

                              {
   cuda_kernel_launch_wrapper(
       fill_baseline_hash_join_buff_wrapper<unsigned long long,
                                            BoundingBoxIntersectKeyHandler>,
       hash_buff,
       entry_count,
       invalid_slot_val,
       false,
       key_component_count,
       with_val_slot,
       dev_err_buff,
       key_handler,
       num_elems);
 }

Here is the call graph for this function:

void bbox_intersect_fill_one_to_many_baseline_hash_table_on_device_64	(	int32_t *	buff,
		const int64_t *	composite_key_dict,
		const int64_t	hash_entry_count,
		const BoundingBoxIntersectKeyHandler *	key_handler,
		const int64_t	num_elems
	)

Definition at line 481 of file HashJoinRuntimeGpu.cu.

                              {
   fill_one_to_many_baseline_hash_table_on_device<int64_t>(
       buff, composite_key_dict, hash_entry_count, key_handler, num_elems, false);
 }

void compute_bucket_sizes_on_cpu	(	std::vector< double > &	bucket_sizes_for_dimension,
		const JoinColumn &	join_column,
		const JoinColumnTypeInfo &	type_info,
		const std::vector< double > &	bucket_size_thresholds,
		const int	thread_count
	)

Definition at line 2388 of file HashJoinRuntime.cpp.

References threading_serial::async().

Referenced by anonymous_namespace{BoundingBoxIntersectJoinHashTable.cpp}::compute_bucket_sizes().

                                                          {
   std::vector<std::vector<double>> bucket_sizes_for_threads;
   for (int thread_idx = 0; thread_idx < thread_count; ++thread_idx) {
     bucket_sizes_for_threads.emplace_back(bucket_sizes_for_dimension.size(), 0.0);
   }
   std::vector<std::future<void>> threads;
   for (int thread_idx = 0; thread_idx < thread_count; ++thread_idx) {
     threads.push_back(std::async(std::launch::async,
                                  compute_bucket_sizes_impl<2>,
                                  bucket_sizes_for_threads[thread_idx].data(),
                                  &join_column,
                                  &type_info,
                                  bucket_size_thresholds.data(),
                                  thread_idx,
                                  thread_count));
   }
   for (auto& child : threads) {
     child.get();
   }
 
   for (int thread_idx = 0; thread_idx < thread_count; ++thread_idx) {
     for (size_t i = 0; i < bucket_sizes_for_dimension.size(); i++) {
       if (bucket_sizes_for_threads[thread_idx][i] > bucket_sizes_for_dimension[i]) {
         bucket_sizes_for_dimension[i] = bucket_sizes_for_threads[thread_idx][i];
       }
     }
   }
 }

Here is the call graph for this function:

Here is the caller graph for this function:

void compute_bucket_sizes_on_device	(	double *	bucket_sizes_buffer,
		const JoinColumn *	join_column,
		const JoinColumnTypeInfo *	type_info,
		const double *	bucket_size_thresholds
	)

Definition at line 549 of file HashJoinRuntimeGpu.cu.

References cuda_kernel_launch_wrapper().

Referenced by anonymous_namespace{BoundingBoxIntersectJoinHashTable.cpp}::compute_bucket_sizes().

                                                                        {
   cuda_kernel_launch_wrapper(compute_bucket_sizes_impl_gpu<2>,
                              bucket_sizes_buffer,
                              join_column,
                              type_info,
                              bucket_sz_threshold);
 }

Here is the call graph for this function:

Here is the caller graph for this function:

int fill_baseline_hash_join_buff_32	(	int8_t *	hash_buff,
		const int64_t	entry_count,
		const int32_t	invalid_slot_val,
		const bool	for_semi_join,
		const size_t	key_component_count,
		const bool	with_val_slot,
		const GenericKeyHandler *	key_handler,
		const int64_t	num_elems,
		const int32_t	cpu_thread_idx,
		const int32_t	cpu_thread_count
	)

Definition at line 1844 of file HashJoinRuntime.cpp.

Referenced by fill_baseline_hash_join_buff().

                                                                     {
   return fill_baseline_hash_join_buff<int32_t>(hash_buff,
                                                entry_count,
                                                invalid_slot_val,
                                                for_semi_join,
                                                key_component_count,
                                                with_val_slot,
                                                key_handler,
                                                num_elems,
                                                cpu_thread_idx,
                                                cpu_thread_count);
 }

Here is the caller graph for this function:

int fill_baseline_hash_join_buff_64	(	int8_t *	hash_buff,
		const int64_t	entry_count,
		const int32_t	invalid_slot_val,
		const bool	for_semi_join,
		const size_t	key_component_count,
		const bool	with_val_slot,
		const GenericKeyHandler *	key_handler,
		const int64_t	num_elems,
		const int32_t	cpu_thread_idx,
		const int32_t	cpu_thread_count
	)

Definition at line 1909 of file HashJoinRuntime.cpp.

                                                                     {
   return fill_baseline_hash_join_buff<int64_t>(hash_buff,
                                                entry_count,
                                                invalid_slot_val,
                                                for_semi_join,
                                                key_component_count,
                                                with_val_slot,
                                                key_handler,
                                                num_elems,
                                                cpu_thread_idx,
                                                cpu_thread_count);
 }

void fill_baseline_hash_join_buff_on_device_32	(	int8_t *	hash_buff,
		const int64_t	entry_count,
		const int32_t	invalid_slot_val,
		const bool	for_semi_join,
		const size_t	key_component_count,
		const bool	with_val_slot,
		int *	dev_err_buff,
		const GenericKeyHandler *	key_handler,
		const int64_t	num_elems
	)

Definition at line 362 of file HashJoinRuntimeGpu.cu.

References cuda_kernel_launch_wrapper().

Referenced by fill_baseline_hash_join_buff_on_device().

                                                                         {
   cuda_kernel_launch_wrapper(
       fill_baseline_hash_join_buff_wrapper<int32_t, GenericKeyHandler>,
       hash_buff,
       entry_count,
       invalid_slot_val,
       for_semi_join,
       key_component_count,
       with_val_slot,
       dev_err_buff,
       key_handler,
       num_elems);
 }

Here is the call graph for this function:

Here is the caller graph for this function:

void fill_baseline_hash_join_buff_on_device_64	(	int8_t *	hash_buff,
		const int64_t	entry_count,
		const int32_t	invalid_slot_val,
		const bool	for_semi_join,
		const size_t	key_component_count,
		const bool	with_val_slot,
		int *	dev_err_buff,
		const GenericKeyHandler *	key_handler,
		const int64_t	num_elems
	)

Definition at line 384 of file HashJoinRuntimeGpu.cu.

References cuda_kernel_launch_wrapper().

                                                                         {
   cuda_kernel_launch_wrapper(
       fill_baseline_hash_join_buff_wrapper<unsigned long long, GenericKeyHandler>,
       hash_buff,
       entry_count,
       invalid_slot_val,
       for_semi_join,
       key_component_count,
       with_val_slot,
       dev_err_buff,
       key_handler,
       num_elems);
 }

Here is the call graph for this function:

int fill_hash_join_buff	(	OneToOnePerfectJoinHashTableFillFuncArgs const	args,
		int32_t const	cpu_thread_idx,
		int32_t const	cpu_thread_count
	)

Definition at line 203 of file HashJoinRuntime.cpp.

References fill_hash_join_buff_impl(), fill_hashtable_for_semi_join(), fill_one_to_one_hashtable(), get_hash_slot(), and SUFFIX.

Referenced by fill_hash_join_buff_wrapper(), and PerfectJoinHashTableBuilder::initOneToOneHashTableOnCpu().

                                     {
   auto filling_func = args.for_semi_join ? SUFFIX(fill_hashtable_for_semi_join)
                                          : SUFFIX(fill_one_to_one_hashtable);
   auto hashtable_filling_func = [&](auto elem, size_t index) {
     auto entry_ptr = SUFFIX(get_hash_slot)(args.buff, elem, args.type_info.min_val);
     return filling_func(index, entry_ptr, args.invalid_slot_val);
   };
 
   return fill_hash_join_buff_impl(
       args, cpu_thread_idx, cpu_thread_count, hashtable_filling_func);
 }

Here is the call graph for this function:

Here is the caller graph for this function:

int fill_hash_join_buff_bitwise_eq	(	OneToOnePerfectJoinHashTableFillFuncArgs const	args,
		int32_t const	cpu_thread_idx,
		int32_t const	cpu_thread_count
	)

Definition at line 187 of file HashJoinRuntime.cpp.

References fill_hash_join_buff_impl(), fill_hashtable_for_semi_join(), fill_one_to_one_hashtable(), get_hash_slot_bitwise_eq(), and SUFFIX.

Referenced by fill_hash_join_buff_wrapper(), and PerfectJoinHashTableBuilder::initOneToOneHashTableOnCpu().

                                     {
   auto filling_func = args.for_semi_join ? SUFFIX(fill_hashtable_for_semi_join)
                                          : SUFFIX(fill_one_to_one_hashtable);
   auto hashtable_filling_func = [&](auto elem, size_t index) {
     auto entry_ptr = SUFFIX(get_hash_slot_bitwise_eq)(
         args.buff, elem, args.type_info.min_val, args.type_info.translated_null_val);
     return filling_func(index, entry_ptr, args.invalid_slot_val);
   };
 
   return fill_hash_join_buff_impl(
       args, cpu_thread_idx, cpu_thread_count, hashtable_filling_func);
 }

Here is the call graph for this function:

Here is the caller graph for this function:

int fill_hash_join_buff_bucketized	(	OneToOnePerfectJoinHashTableFillFuncArgs const	args,
		int32_t const	cpu_thread_idx,
		int32_t const	cpu_thread_count
	)

Definition at line 167 of file HashJoinRuntime.cpp.

References fill_hash_join_buff_impl(), fill_hashtable_for_semi_join(), fill_one_to_one_hashtable(), get_bucketized_hash_slot(), and SUFFIX.

Referenced by fill_hash_join_buff_bucketized_wrapper(), and PerfectJoinHashTableBuilder::initOneToOneHashTableOnCpu().

                                     {
   auto filling_func = args.for_semi_join ? SUFFIX(fill_hashtable_for_semi_join)
                                          : SUFFIX(fill_one_to_one_hashtable);
   auto hashtable_filling_func = [&](auto elem, size_t index) {
     auto entry_ptr = SUFFIX(get_bucketized_hash_slot)(
         args.buff,
         elem,
         args.type_info.min_val / args.bucket_normalization,
         args.type_info.translated_null_val,
         args.bucket_normalization);
     return filling_func(index, entry_ptr, args.invalid_slot_val);
   };
 
   return fill_hash_join_buff_impl(
       args, cpu_thread_idx, cpu_thread_count, hashtable_filling_func);
 }

Here is the call graph for this function:

Here is the caller graph for this function:

void fill_hash_join_buff_on_device ( OneToOnePerfectJoinHashTableFillFuncArgs const args )

Definition at line 57 of file HashJoinRuntimeGpu.cu.

References cuda_kernel_launch_wrapper(), and fill_hash_join_buff_wrapper().

                                                                                         {
   cuda_kernel_launch_wrapper(fill_hash_join_buff_wrapper, args);
 }

Here is the call graph for this function:

void fill_hash_join_buff_on_device_bucketized ( OneToOnePerfectJoinHashTableFillFuncArgs const args )

Definition at line 52 of file HashJoinRuntimeGpu.cu.

References cuda_kernel_launch_wrapper(), and fill_hash_join_buff_bucketized_wrapper().

                                                          {
   cuda_kernel_launch_wrapper(fill_hash_join_buff_bucketized_wrapper, args);
 }

Here is the call graph for this function:

void fill_hash_join_buff_on_device_sharded	(	OneToOnePerfectJoinHashTableFillFuncArgs const	args,
		ShardInfo const	shard_info
	)

Definition at line 102 of file HashJoinRuntimeGpu.cu.

References cuda_kernel_launch_wrapper(), and fill_hash_join_buff_wrapper_sharded().

                                 {
   cuda_kernel_launch_wrapper(fill_hash_join_buff_wrapper_sharded, args, shard_info);
 }

Here is the call graph for this function:

void fill_hash_join_buff_on_device_sharded_bucketized	(	OneToOnePerfectJoinHashTableFillFuncArgs const	args,
		ShardInfo const	shard_info
	)

Definition at line 95 of file HashJoinRuntimeGpu.cu.

References cuda_kernel_launch_wrapper(), and fill_hash_join_buff_wrapper_sharded_bucketized().

                                 {
   cuda_kernel_launch_wrapper(
       fill_hash_join_buff_wrapper_sharded_bucketized, args, shard_info);
 }

Here is the call graph for this function:

void fill_one_to_many_baseline_hash_table_32	(	int32_t *	buff,
		const int32_t *	composite_key_dict,
		const int64_t	hash_entry_count,
		const size_t	key_component_count,
		const std::vector< JoinColumn > &	join_column_per_key,
		const std::vector< JoinColumnTypeInfo > &	type_info_per_key,
		const std::vector< JoinBucketInfo > &	join_bucket_info,
		const std::vector< const int32_t * > &	sd_inner_to_outer_translation_maps,
		const std::vector< int32_t > &	sd_min_inner_elems,
		const int32_t	cpu_thread_count,
		const bool	is_range_join = `false`,
		const bool	is_geo_compressed = `false`,
		const bool	for_window_framing = `false`
	)

Definition at line 2189 of file HashJoinRuntime.cpp.

Referenced by BaselineJoinHashTableBuilder::initHashTableOnCpu().

                                    {
   fill_one_to_many_baseline_hash_table<int32_t>(buff,
                                                 composite_key_dict,
                                                 hash_entry_count,
                                                 key_component_count,
                                                 join_column_per_key,
                                                 type_info_per_key,
                                                 join_bucket_info,
                                                 sd_inner_to_outer_translation_maps,
                                                 sd_min_inner_elems,
                                                 cpu_thread_count,
                                                 is_range_join,
                                                 is_geo_compressed,
                                                 for_window_framing);
 }

Here is the caller graph for this function:

void fill_one_to_many_baseline_hash_table_64	(	int32_t *	buff,
		const int64_t *	composite_key_dict,
		const int64_t	hash_entry_count,
		const size_t	key_component_count,
		const std::vector< JoinColumn > &	join_column_per_key,
		const std::vector< JoinColumnTypeInfo > &	type_info_per_key,
		const std::vector< JoinBucketInfo > &	join_bucket_info,
		const std::vector< const int32_t * > &	sd_inner_to_outer_translation_maps,
		const std::vector< int32_t > &	sd_min_inner_elems,
		const int32_t	cpu_thread_count,
		const bool	is_range_join = `false`,
		const bool	is_geo_compressed = `false`,
		const bool	for_window_framing = `false`
	)

Definition at line 2218 of file HashJoinRuntime.cpp.

Referenced by BaselineJoinHashTableBuilder::initHashTableOnCpu().

                                    {
   fill_one_to_many_baseline_hash_table<int64_t>(buff,
                                                 composite_key_dict,
                                                 hash_entry_count,
                                                 key_component_count,
                                                 join_column_per_key,
                                                 type_info_per_key,
                                                 join_bucket_info,
                                                 sd_inner_to_outer_translation_maps,
                                                 sd_min_inner_elems,
                                                 cpu_thread_count,
                                                 is_range_join,
                                                 is_geo_compressed,
                                                 for_window_framing);
 }

Here is the caller graph for this function:

void fill_one_to_many_baseline_hash_table_on_device_32	(	int32_t *	buff,
		const int32_t *	composite_key_dict,
		const int64_t	hash_entry_count,
		const size_t	key_component_count,
		const GenericKeyHandler *	key_handler,
		const int64_t	num_elems,
		const bool	for_window_framing
	)

Definition at line 450 of file HashJoinRuntimeGpu.cu.

Referenced by fill_one_to_many_baseline_hash_table_on_device().

                                    {
   fill_one_to_many_baseline_hash_table_on_device<int32_t>(buff,
                                                           composite_key_dict,
                                                           hash_entry_count,
                                                           key_handler,
                                                           num_elems,
                                                           for_window_framing);
 }

Here is the caller graph for this function:

void fill_one_to_many_baseline_hash_table_on_device_64	(	int32_t *	buff,
		const int64_t *	composite_key_dict,
		const int64_t	hash_entry_count,
		const GenericKeyHandler *	key_handler,
		const int64_t	num_elems,
		const bool	for_window_framing
	)

Definition at line 466 of file HashJoinRuntimeGpu.cu.

                                    {
   fill_one_to_many_baseline_hash_table_on_device<int64_t>(buff,
                                                           composite_key_dict,
                                                           hash_entry_count,
                                                           key_handler,
                                                           num_elems,
                                                           for_window_framing);
 }

void fill_one_to_many_hash_table	(	OneToManyPerfectJoinHashTableFillFuncArgs const	args,
		int32_t const	cpu_thread_count
	)

Definition at line 1564 of file HashJoinRuntime.cpp.

References run_benchmark_import::args, OneToManyPerfectJoinHashTableFillFuncArgs::buff, count_matches(), DEBUG_TIMER, fill_one_to_many_hash_table_impl(), fill_row_ids(), OneToManyPerfectJoinHashTableFillFuncArgs::for_window_framing, OneToManyPerfectJoinHashTableFillFuncArgs::hash_entry_info, OneToManyPerfectJoinHashTableFillFuncArgs::join_column, OneToManyPerfectJoinHashTableFillFuncArgs::min_inner_elem, OneToManyPerfectJoinHashTableFillFuncArgs::sd_inner_to_outer_translation_map, SUFFIX, and OneToManyPerfectJoinHashTableFillFuncArgs::type_info.

Referenced by PerfectJoinHashTableBuilder::initOneToManyHashTableOnCpu().

                                                                  {
   auto timer = DEBUG_TIMER(__func__);
   auto const buff = args.buff;
   auto const hash_entry_info = args.hash_entry_info;
   auto launch_count_matches = [count_buff =
                                    buff + hash_entry_info.bucketized_hash_entry_count,
                                &args](auto cpu_thread_idx, auto cpu_thread_count) {
     SUFFIX(count_matches)
     (count_buff,
      args.join_column,
      args.type_info,
      args.sd_inner_to_outer_translation_map,
      args.min_inner_elem,
      cpu_thread_idx,
      cpu_thread_count);
   };
   auto launch_fill_row_ids =
       [hash_entry_count = hash_entry_info.bucketized_hash_entry_count, buff, args](
           auto cpu_thread_idx, auto cpu_thread_count) {
         SUFFIX(fill_row_ids)
         (buff,
          hash_entry_count,
          args.join_column,
          args.type_info,
          args.for_window_framing,
          args.sd_inner_to_outer_translation_map,
          args.min_inner_elem,
          cpu_thread_idx,
          cpu_thread_count);
       };
 
   fill_one_to_many_hash_table_impl(buff,
                                    hash_entry_info.bucketized_hash_entry_count,
                                    args.join_column,
                                    args.type_info,
                                    args.sd_inner_to_outer_translation_map,
                                    args.min_inner_elem,
                                    cpu_thread_count,
                                    args.for_window_framing,
                                    launch_count_matches,
                                    launch_fill_row_ids);
 }

Here is the call graph for this function:

Here is the caller graph for this function:

void fill_one_to_many_hash_table_bucketized	(	OneToManyPerfectJoinHashTableFillFuncArgs const	args,
		int32_t const	cpu_thread_count
	)

Definition at line 1608 of file HashJoinRuntime.cpp.

References run_benchmark_import::args, BucketizedHashEntryInfo::bucket_normalization, OneToManyPerfectJoinHashTableFillFuncArgs::buff, count_matches_bucketized(), DEBUG_TIMER, fill_one_to_many_hash_table_impl(), fill_row_ids_bucketized(), OneToManyPerfectJoinHashTableFillFuncArgs::hash_entry_info, OneToManyPerfectJoinHashTableFillFuncArgs::join_column, OneToManyPerfectJoinHashTableFillFuncArgs::min_inner_elem, OneToManyPerfectJoinHashTableFillFuncArgs::sd_inner_to_outer_translation_map, SUFFIX, and OneToManyPerfectJoinHashTableFillFuncArgs::type_info.

Referenced by PerfectJoinHashTableBuilder::initOneToManyHashTableOnCpu().

                                     {
   auto timer = DEBUG_TIMER(__func__);
   auto const buff = args.buff;
   auto const hash_entry_info = args.hash_entry_info;
   auto bucket_normalization = hash_entry_info.bucket_normalization;
   auto hash_entry_count = hash_entry_info.getNormalizedHashEntryCount();
   auto launch_count_matches = [bucket_normalization,
                                count_buff = buff + hash_entry_count,
                                &args](auto cpu_thread_idx, auto cpu_thread_count) {
     SUFFIX(count_matches_bucketized)
     (count_buff,
      args.join_column,
      args.type_info,
      args.sd_inner_to_outer_translation_map,
      args.min_inner_elem,
      cpu_thread_idx,
      cpu_thread_count,
      bucket_normalization);
   };
   auto launch_fill_row_ids = [bucket_normalization, hash_entry_count, buff, args](
                                  auto cpu_thread_idx, auto cpu_thread_count) {
     SUFFIX(fill_row_ids_bucketized)
     (buff,
      hash_entry_count,
      args.join_column,
      args.type_info,
      args.sd_inner_to_outer_translation_map,
      args.min_inner_elem,
      cpu_thread_idx,
      cpu_thread_count,
      bucket_normalization);
   };
 
   fill_one_to_many_hash_table_impl(buff,
                                    hash_entry_count,
                                    args.join_column,
                                    args.type_info,
                                    args.sd_inner_to_outer_translation_map,
                                    args.min_inner_elem,
                                    cpu_thread_count,
                                    false,
                                    launch_count_matches,
                                    launch_fill_row_ids);
 }

Here is the call graph for this function:

Here is the caller graph for this function:

void fill_one_to_many_hash_table_on_device ( OneToManyPerfectJoinHashTableFillFuncArgs const args )

Definition at line 175 of file HashJoinRuntimeGpu.cu.

References run_benchmark_import::args, BucketizedHashEntryInfo::bucketized_hash_entry_count, OneToManyPerfectJoinHashTableFillFuncArgs::buff, count_matches(), cuda_kernel_launch_wrapper(), fill_one_to_many_hash_table_on_device_impl(), fill_row_ids(), OneToManyPerfectJoinHashTableFillFuncArgs::for_window_framing, OneToManyPerfectJoinHashTableFillFuncArgs::hash_entry_info, OneToManyPerfectJoinHashTableFillFuncArgs::join_column, SUFFIX, and OneToManyPerfectJoinHashTableFillFuncArgs::type_info.

                                                           {
   auto buff = args.buff;
   auto hash_entry_count = args.hash_entry_info.bucketized_hash_entry_count;
   auto count_matches_func = [count_buff = buff + hash_entry_count, &args] {
     cuda_kernel_launch_wrapper(
         SUFFIX(count_matches), count_buff, args.join_column, args.type_info);
   };
   auto fill_row_ids_func = [buff, hash_entry_count, &args] {
     cuda_kernel_launch_wrapper(SUFFIX(fill_row_ids),
                                buff,
                                hash_entry_count,
                                args.join_column,
                                args.type_info,
                                args.for_window_framing);
   };
   fill_one_to_many_hash_table_on_device_impl(buff,
                                              hash_entry_count,
                                              args.join_column,
                                              args.type_info,
                                              count_matches_func,
                                              fill_row_ids_func);
 }

Here is the call graph for this function:

void fill_one_to_many_hash_table_on_device_bucketized ( OneToManyPerfectJoinHashTableFillFuncArgs const args )

Definition at line 199 of file HashJoinRuntimeGpu.cu.

References run_benchmark_import::args, OneToManyPerfectJoinHashTableFillFuncArgs::bucket_normalization, OneToManyPerfectJoinHashTableFillFuncArgs::buff, count_matches_bucketized(), cuda_kernel_launch_wrapper(), fill_one_to_many_hash_table_on_device_impl(), fill_row_ids_bucketized(), BucketizedHashEntryInfo::getNormalizedHashEntryCount(), OneToManyPerfectJoinHashTableFillFuncArgs::hash_entry_info, OneToManyPerfectJoinHashTableFillFuncArgs::join_column, SUFFIX, and OneToManyPerfectJoinHashTableFillFuncArgs::type_info.

                                                           {
   auto hash_entry_count = args.hash_entry_info.getNormalizedHashEntryCount();
   auto const buff = args.buff;
   auto count_matches_func = [count_buff = buff + hash_entry_count, &args] {
     cuda_kernel_launch_wrapper(SUFFIX(count_matches_bucketized),
                                count_buff,
                                args.join_column,
                                args.type_info,
                                args.bucket_normalization);
   };
   auto fill_row_ids_func = [buff, hash_entry_count, &args] {
     cuda_kernel_launch_wrapper(SUFFIX(fill_row_ids_bucketized),
                                buff,
                                hash_entry_count,
                                args.join_column,
                                args.type_info,
                                args.bucket_normalization);
   };
   fill_one_to_many_hash_table_on_device_impl(buff,
                                              hash_entry_count,
                                              args.join_column,
                                              args.type_info,
                                              count_matches_func,
                                              fill_row_ids_func);
 }

Here is the call graph for this function:

void fill_one_to_many_hash_table_on_device_sharded	(	OneToManyPerfectJoinHashTableFillFuncArgs const	args,
		ShardInfo const	shard_info
	)

Definition at line 226 of file HashJoinRuntimeGpu.cu.

References BucketizedHashEntryInfo::bucketized_hash_entry_count, OneToManyPerfectJoinHashTableFillFuncArgs::buff, checkCudaErrors, count_matches_sharded(), cuda_kernel_launch_wrapper(), fill_row_ids_sharded(), getQueryEngineCudaStream(), OneToManyPerfectJoinHashTableFillFuncArgs::hash_entry_info, inclusive_scan(), OneToManyPerfectJoinHashTableFillFuncArgs::join_column, set_valid_pos(), set_valid_pos_flag(), SUFFIX, and OneToManyPerfectJoinHashTableFillFuncArgs::type_info.

                                 {
   auto hash_entry_count = args.hash_entry_info.bucketized_hash_entry_count;
   int32_t* pos_buff = args.buff;
   int32_t* count_buff = args.buff + hash_entry_count;
   auto qe_cuda_stream = getQueryEngineCudaStream();
   checkCudaErrors(
       cudaMemsetAsync(count_buff, 0, hash_entry_count * sizeof(int32_t), qe_cuda_stream));
   checkCudaErrors(cudaStreamSynchronize(qe_cuda_stream));
   cuda_kernel_launch_wrapper(SUFFIX(count_matches_sharded),
                              count_buff,
                              args.join_column,
                              args.type_info,
                              shard_info);
 
   cuda_kernel_launch_wrapper(set_valid_pos_flag, pos_buff, count_buff, hash_entry_count);
 
   auto count_buff_dev_ptr = thrust::device_pointer_cast(count_buff);
   thrust::inclusive_scan(
       count_buff_dev_ptr, count_buff_dev_ptr + hash_entry_count, count_buff_dev_ptr);
   cuda_kernel_launch_wrapper(set_valid_pos, pos_buff, count_buff, hash_entry_count);
   checkCudaErrors(
       cudaMemsetAsync(count_buff, 0, hash_entry_count * sizeof(int32_t), qe_cuda_stream));
   checkCudaErrors(cudaStreamSynchronize(qe_cuda_stream));
   cuda_kernel_launch_wrapper(SUFFIX(fill_row_ids_sharded),
                              args.buff,
                              hash_entry_count,
                              args.join_column,
                              args.type_info,
                              shard_info);
 }

Here is the call graph for this function:

ColumnType get_join_column_type_kind ( const SQLTypeInfo & ti )

inline

Definition at line 147 of file HashJoinRuntime.h.

References SQLTypeInfo::is_date_in_days(), is_unsigned_type(), Signed, SmallDate, and Unsigned.

Referenced by BoundingBoxIntersectJoinHashTable::fetchColumnsForDevice(), PerfectJoinHashTable::fetchColumnsForDevice(), BaselineJoinHashTable::fetchColumnsForDevice(), PerfectJoinHashTableBuilder::initOneToManyHashTableOnCpu(), and PerfectJoinHashTableBuilder::initOneToOneHashTableOnCpu().

                                                                    {
   if (ti.is_date_in_days()) {
     return SmallDate;
   } else {
     return is_unsigned_type(ti) ? Unsigned : Signed;
   }
 }

Here is the call graph for this function:

Here is the caller graph for this function:

void init_baseline_hash_join_buff_32	(	int8_t *	hash_join_buff,
		const int64_t	entry_count,
		const size_t	key_component_count,
		const bool	with_val_slot,
		const int32_t	invalid_slot_val,
		const int32_t	cpu_thread_idx,
		const int32_t	cpu_thread_count
	)

Definition at line 1788 of file HashJoinRuntime.cpp.

Referenced by BaselineJoinHashTableBuilder::initHashTableOnCpu().

                                                                      {
   init_baseline_hash_join_buff<int32_t>(hash_join_buff,
                                         entry_count,
                                         key_component_count,
                                         with_val_slot,
                                         invalid_slot_val,
                                         cpu_thread_idx,
                                         cpu_thread_count);
 }

Here is the caller graph for this function:

void init_baseline_hash_join_buff_64	(	int8_t *	hash_join_buff,
		const int64_t	entry_count,
		const size_t	key_component_count,
		const bool	with_val_slot,
		const int32_t	invalid_slot_val,
		const int32_t	cpu_thread_idx,
		const int32_t	cpu_thread_count
	)

Definition at line 1804 of file HashJoinRuntime.cpp.

Referenced by BaselineJoinHashTableBuilder::initHashTableOnCpu().

                                                                      {
   init_baseline_hash_join_buff<int64_t>(hash_join_buff,
                                         entry_count,
                                         key_component_count,
                                         with_val_slot,
                                         invalid_slot_val,
                                         cpu_thread_idx,
                                         cpu_thread_count);
 }

Here is the caller graph for this function:

void init_baseline_hash_join_buff_on_device_32	(	int8_t *	hash_join_buff,
		const int64_t	entry_count,
		const size_t	key_component_count,
		const bool	with_val_slot,
		const int32_t	invalid_slot_val
	)

Definition at line 313 of file HashJoinRuntimeGpu.cu.

References cuda_kernel_launch_wrapper().

Referenced by BaselineJoinHashTableBuilder::initHashTableOnGpu().

                                                                                {
   cuda_kernel_launch_wrapper(init_baseline_hash_join_buff_wrapper<int32_t>,
                              hash_join_buff,
                              entry_count,
                              key_component_count,
                              with_val_slot,
                              invalid_slot_val);
 }

Here is the call graph for this function:

Here is the caller graph for this function:

void init_baseline_hash_join_buff_on_device_64	(	int8_t *	hash_join_buff,
		const int64_t	entry_count,
		const size_t	key_component_count,
		const bool	with_val_slot,
		const int32_t	invalid_slot_val
	)

Definition at line 326 of file HashJoinRuntimeGpu.cu.

References cuda_kernel_launch_wrapper().

Referenced by BaselineJoinHashTableBuilder::initHashTableOnGpu().

                                                                                {
   cuda_kernel_launch_wrapper(init_baseline_hash_join_buff_wrapper<int64_t>,
                              hash_join_buff,
                              entry_count,
                              key_component_count,
                              with_val_slot,
                              invalid_slot_val);
 }

Here is the call graph for this function:

Here is the caller graph for this function:

void init_hash_join_buff	(	int32_t *	buff,
		const int64_t	entry_count,
		const int32_t	invalid_slot_val,
		const int32_t	cpu_thread_idx,
		const int32_t	cpu_thread_count
	)

Definition at line 71 of file HashJoinRuntime.cpp.

Referenced by init_hash_join_buff_wrapper(), BaselineJoinHashTableBuilder::initHashTableOnCpu(), PerfectJoinHashTableBuilder::initOneToManyHashTableOnCpu(), and PerfectJoinHashTableBuilder::initOneToOneHashTableOnCpu().

                                                                         {
 #ifdef __CUDACC__
   int32_t start = threadIdx.x + blockDim.x * blockIdx.x;
   int32_t step = blockDim.x * gridDim.x;
 #else
   int32_t start = cpu_thread_idx;
   int32_t step = cpu_thread_count;
 #endif
   for (int64_t i = start; i < hash_entry_count; i += step) {
     groups_buffer[i] = invalid_slot_val;
   }
 }

Here is the caller graph for this function:

void init_hash_join_buff_on_device	(	int32_t *	buff,
		const int64_t	entry_count,
		const int32_t	invalid_slot_val
	)

Definition at line 114 of file HashJoinRuntimeGpu.cu.

References cuda_kernel_launch_wrapper(), and init_hash_join_buff_wrapper().

Referenced by BaselineJoinHashTableBuilder::initHashTableOnGpu().

                                                                    {
   cuda_kernel_launch_wrapper(
       init_hash_join_buff_wrapper, buff, hash_entry_count, invalid_slot_val);
 }

Here is the call graph for this function:

Here is the caller graph for this function:

int range_fill_baseline_hash_join_buff_32	(	int8_t *	hash_buff,
		const size_t	entry_count,
		const int32_t	invalid_slot_val,
		const size_t	key_component_count,
		const bool	with_val_slot,
		const RangeKeyHandler *	key_handler,
		const size_t	num_elems,
		const int32_t	cpu_thread_idx,
		const int32_t	cpu_thread_count
	)

Definition at line 1888 of file HashJoinRuntime.cpp.

Referenced by fill_baseline_hash_join_buff().

                                                                           {
   return fill_baseline_hash_join_buff<int32_t>(hash_buff,
                                                entry_count,
                                                invalid_slot_val,
                                                false,
                                                key_component_count,
                                                with_val_slot,
                                                key_handler,
                                                num_elems,
                                                cpu_thread_idx,
                                                cpu_thread_count);
 }

Here is the caller graph for this function:

int range_fill_baseline_hash_join_buff_64	(	int8_t *	hash_buff,
		const size_t	entry_count,
		const int32_t	invalid_slot_val,
		const size_t	key_component_count,
		const bool	with_val_slot,
		const RangeKeyHandler *	key_handler,
		const size_t	num_elems,
		const int32_t	cpu_thread_idx,
		const int32_t	cpu_thread_count
	)

Definition at line 1953 of file HashJoinRuntime.cpp.

                                                                           {
   return fill_baseline_hash_join_buff<int64_t>(hash_buff,
                                                entry_count,
                                                invalid_slot_val,
                                                false,
                                                key_component_count,
                                                with_val_slot,
                                                key_handler,
                                                num_elems,
                                                cpu_thread_idx,
                                                cpu_thread_count);
 }

void range_fill_baseline_hash_join_buff_on_device_64	(	int8_t *	hash_buff,
		const int64_t	entry_count,
		const int32_t	invalid_slot_val,
		const size_t	key_component_count,
		const bool	with_val_slot,
		int *	dev_err_buff,
		const RangeKeyHandler *	key_handler,
		const size_t	num_elems
	)

Definition at line 429 of file HashJoinRuntimeGpu.cu.

References cuda_kernel_launch_wrapper().

                                                                              {
   cuda_kernel_launch_wrapper(
       fill_baseline_hash_join_buff_wrapper<unsigned long long, RangeKeyHandler>,
       hash_buff,
       entry_count,
       invalid_slot_val,
       false,
       key_component_count,
       with_val_slot,
       dev_err_buff,
       key_handler,
       num_elems);
 }

Here is the call graph for this function:

void range_fill_one_to_many_baseline_hash_table_on_device_64	(	int32_t *	buff,
		const int64_t *	composite_key_dict,
		const size_t	hash_entry_count,
		const RangeKeyHandler *	key_handler,
		const size_t	num_elems
	)

Definition at line 491 of file HashJoinRuntimeGpu.cu.

                             {
   fill_one_to_many_baseline_hash_table_on_device<int64_t>(
       buff, composite_key_dict, hash_entry_count, key_handler, num_elems, false);
 }

Variable Documentation

const size_t g_maximum_conditions_to_coalesce {8}

Definition at line 52 of file HashJoinRuntime.h.

Referenced by approximate_distinct_tuples_impl(), anonymous_namespace{IRCodegen.cpp}::check_valid_join_qual(), combine_equi_join_conditions(), count_matches_baseline(), fill_baseline_hash_join_buff(), and fill_row_ids_baseline().

Classes

Enumerations

Functions

Variables

Enumeration Type Documentation

Function Documentation

Variable Documentation