OmniSciDB  85c2d10cdc
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
HashJoinRuntime.h File Reference
#include <cstddef>
#include <cstdint>
#include <vector>
#include "../../../Shared/SqlTypesLayout.h"
#include "../../../Shared/sqltypes.h"
#include "../../RuntimeFunctions.h"
#include "../../../Shared/funcannotations.h"
+ Include dependency graph for HashJoinRuntime.h:
+ This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Classes

struct  HashEntryInfo
 
struct  JoinChunk
 
struct  JoinColumn
 
struct  JoinColumnTypeInfo
 
struct  JoinBucketInfo
 
struct  ShardInfo
 

Enumerations

enum  ColumnType { SmallDate = 0, Signed = 1, Unsigned = 2, Double = 3 }
 

Functions

void init_hash_join_buff (int32_t *buff, const int64_t entry_count, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
 
void init_hash_join_buff_on_device (int32_t *buff, const int64_t entry_count, const int32_t invalid_slot_val)
 
void init_baseline_hash_join_buff_32 (int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
 
void init_baseline_hash_join_buff_64 (int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
 
void init_baseline_hash_join_buff_on_device_32 (int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val)
 
void init_baseline_hash_join_buff_on_device_64 (int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val)
 
ColumnType get_join_column_type_kind (const SQLTypeInfo &ti)
 
int fill_hash_join_buff_bucketized (int32_t *buff, const int32_t invalid_slot_val, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const void *sd_inner, const void *sd_outer, const int32_t cpu_thread_idx, const int32_t cpu_thread_count, const int64_t bucket_normalization)
 
int fill_hash_join_buff (int32_t *buff, const int32_t invalid_slot_val, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const void *sd_inner, const void *sd_outer, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
 
void fill_hash_join_buff_on_device (int32_t *buff, const int32_t invalid_slot_val, int *dev_err_buff, const JoinColumn join_column, const JoinColumnTypeInfo type_info)
 
void fill_hash_join_buff_on_device_bucketized (int32_t *buff, const int32_t invalid_slot_val, int *dev_err_buff, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const int64_t bucket_normalization)
 
void fill_hash_join_buff_on_device_sharded (int32_t *buff, const int32_t invalid_slot_val, int *dev_err_buff, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const ShardInfo shard_info)
 
void fill_hash_join_buff_on_device_sharded_bucketized (int32_t *buff, const int32_t invalid_slot_val, int *dev_err_buff, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const ShardInfo shard_info, const int64_t bucket_normalization)
 
void fill_one_to_many_hash_table (int32_t *buff, const HashEntryInfo hash_entry_info, const int32_t invalid_slot_val, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info, const void *sd_inner_proxy, const void *sd_outer_proxy, const unsigned cpu_thread_count)
 
void fill_one_to_many_hash_table_bucketized (int32_t *buff, const HashEntryInfo hash_entry_info, const int32_t invalid_slot_val, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info, const void *sd_inner_proxy, const void *sd_outer_proxy, const unsigned cpu_thread_count)
 
void fill_one_to_many_hash_table_sharded_bucketized (int32_t *buff, const HashEntryInfo hash_entry_info, const int32_t invalid_slot_val, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info, const ShardInfo &shard_info, const void *sd_inner_proxy, const void *sd_outer_proxy, const unsigned cpu_thread_count)
 
void fill_one_to_many_hash_table_on_device (int32_t *buff, const HashEntryInfo hash_entry_info, const int32_t invalid_slot_val, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info)
 
void fill_one_to_many_hash_table_on_device_bucketized (int32_t *buff, const HashEntryInfo hash_entry_info, const int32_t invalid_slot_val, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info)
 
void fill_one_to_many_hash_table_on_device_sharded (int32_t *buff, const HashEntryInfo hash_entry_info, const int32_t invalid_slot_val, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info, const ShardInfo &shard_info)
 
int fill_baseline_hash_join_buff_32 (int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const GenericKeyHandler *key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
 
int overlaps_fill_baseline_hash_join_buff_32 (int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const OverlapsKeyHandler *key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
 
int fill_baseline_hash_join_buff_64 (int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const GenericKeyHandler *key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
 
int overlaps_fill_baseline_hash_join_buff_64 (int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const OverlapsKeyHandler *key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
 
void fill_baseline_hash_join_buff_on_device_32 (int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const GenericKeyHandler *key_handler, const int64_t num_elems)
 
void fill_baseline_hash_join_buff_on_device_64 (int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const GenericKeyHandler *key_handler, const int64_t num_elems)
 
void overlaps_fill_baseline_hash_join_buff_on_device_64 (int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const OverlapsKeyHandler *key_handler, const int64_t num_elems)
 
void fill_one_to_many_baseline_hash_table_32 (int32_t *buff, const int32_t *composite_key_dict, const int64_t hash_entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_bucket_info, const std::vector< const void * > &sd_inner_proxy_per_key, const std::vector< const void * > &sd_outer_proxy_per_key, const int32_t cpu_thread_count)
 
void fill_one_to_many_baseline_hash_table_64 (int32_t *buff, const int64_t *composite_key_dict, const int64_t hash_entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_bucket_info, const std::vector< const void * > &sd_inner_proxy_per_key, const std::vector< const void * > &sd_outer_proxy_per_key, const int32_t cpu_thread_count)
 
void fill_one_to_many_baseline_hash_table_on_device_32 (int32_t *buff, const int32_t *composite_key_dict, const int64_t hash_entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const GenericKeyHandler *key_handler, const int64_t num_elems)
 
void fill_one_to_many_baseline_hash_table_on_device_64 (int32_t *buff, const int64_t *composite_key_dict, const int64_t hash_entry_count, const int32_t invalid_slot_val, const GenericKeyHandler *key_handler, const int64_t num_elems)
 
void overlaps_fill_one_to_many_baseline_hash_table_on_device_64 (int32_t *buff, const int64_t *composite_key_dict, const int64_t hash_entry_count, const int32_t invalid_slot_val, const OverlapsKeyHandler *key_handler, const int64_t num_elems)
 
void approximate_distinct_tuples (uint8_t *hll_buffer_all_cpus, const uint32_t b, const size_t padded_size_bytes, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const int thread_count)
 
void approximate_distinct_tuples_overlaps (uint8_t *hll_buffer_all_cpus, std::vector< int32_t > &row_counts, const uint32_t b, const size_t padded_size_bytes, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_buckets_per_key, const int thread_count)
 
void approximate_distinct_tuples_on_device (uint8_t *hll_buffer, const uint32_t b, const GenericKeyHandler *key_handler, const int64_t num_elems)
 
void approximate_distinct_tuples_on_device_overlaps (uint8_t *hll_buffer, const uint32_t b, int32_t *row_counts_buffer, const OverlapsKeyHandler *key_handler, const int64_t num_elems)
 
void compute_bucket_sizes_on_cpu (std::vector< double > &bucket_sizes_for_dimension, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info, const std::vector< double > &bucket_size_thresholds, const int thread_count)
 
void compute_bucket_sizes_on_device (double *bucket_sizes_buffer, const JoinColumn *join_column, const JoinColumnTypeInfo *type_info, const double *bucket_size_thresholds)
 

Variables

const size_t g_maximum_conditions_to_coalesce {8}
 

Enumeration Type Documentation

enum ColumnType
Enumerator
SmallDate 
Signed 
Unsigned 
Double 

Definition at line 100 of file HashJoinRuntime.h.

Function Documentation

void approximate_distinct_tuples ( uint8_t *  hll_buffer_all_cpus,
const uint32_t  b,
const size_t  padded_size_bytes,
const std::vector< JoinColumn > &  join_column_per_key,
const std::vector< JoinColumnTypeInfo > &  type_info_per_key,
const int  thread_count 
)

Definition at line 1964 of file HashJoinRuntime.cpp.

References approximate_distinct_tuples_impl(), CHECK, and CHECK_EQ.

Referenced by BaselineJoinHashTable::approximateTupleCount().

1969  {
1970  CHECK_EQ(join_column_per_key.size(), type_info_per_key.size());
1971  CHECK(!join_column_per_key.empty());
1972 
1973  std::vector<std::future<void>> approx_distinct_threads;
1974  for (int thread_idx = 0; thread_idx < thread_count; ++thread_idx) {
1975  approx_distinct_threads.push_back(std::async(
1976  std::launch::async,
1977  [&join_column_per_key,
1978  &type_info_per_key,
1979  b,
1980  hll_buffer_all_cpus,
1981  padded_size_bytes,
1982  thread_idx,
1983  thread_count] {
1984  auto hll_buffer = hll_buffer_all_cpus + thread_idx * padded_size_bytes;
1985 
1986  const auto key_handler = GenericKeyHandler(join_column_per_key.size(),
1987  false,
1988  &join_column_per_key[0],
1989  &type_info_per_key[0],
1990  nullptr,
1991  nullptr);
1993  nullptr,
1994  b,
1995  join_column_per_key[0].num_elems,
1996  &key_handler,
1997  thread_idx,
1998  thread_count);
1999  }));
2000  }
2001  for (auto& child : approx_distinct_threads) {
2002  child.get();
2003  }
2004 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
GLOBAL void SUFFIX() approximate_distinct_tuples_impl(uint8_t *hll_buffer, int32_t *row_count_buffer, const uint32_t b, const int64_t num_elems, const KEY_HANDLER *f, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void approximate_distinct_tuples_on_device ( uint8_t *  hll_buffer,
const uint32_t  b,
const GenericKeyHandler key_handler,
const int64_t  num_elems 
)

Definition at line 550 of file HashJoinRuntimeGpu.cu.

References cuda_kernel_launch_wrapper().

Referenced by BaselineJoinHashTable::approximateTupleCount().

553  {
554  cuda_kernel_launch_wrapper(approximate_distinct_tuples_impl_gpu<GenericKeyHandler>,
555  hll_buffer,
556  nullptr,
557  b,
558  num_elems,
559  key_handler);
560 }
void cuda_kernel_launch_wrapper(F func, ARGS &&...args)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void approximate_distinct_tuples_on_device_overlaps ( uint8_t *  hll_buffer,
const uint32_t  b,
int32_t *  row_counts_buffer,
const OverlapsKeyHandler key_handler,
const int64_t  num_elems 
)

Definition at line 533 of file HashJoinRuntimeGpu.cu.

References cuda_kernel_launch_wrapper(), and inclusive_scan().

Referenced by OverlapsJoinHashTable::approximateTupleCount().

537  {
538  cuda_kernel_launch_wrapper(approximate_distinct_tuples_impl_gpu<OverlapsKeyHandler>,
539  hll_buffer,
540  row_counts_buffer,
541  b,
542  num_elems,
543  key_handler);
544 
545  auto row_counts_buffer_ptr = thrust::device_pointer_cast(row_counts_buffer);
547  row_counts_buffer_ptr, row_counts_buffer_ptr + num_elems, row_counts_buffer_ptr);
548 }
void inclusive_scan(InputIterator first, InputIterator last, OutputIterator out, const size_t thread_count)
void cuda_kernel_launch_wrapper(F func, ARGS &&...args)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void approximate_distinct_tuples_overlaps ( uint8_t *  hll_buffer_all_cpus,
std::vector< int32_t > &  row_counts,
const uint32_t  b,
const size_t  padded_size_bytes,
const std::vector< JoinColumn > &  join_column_per_key,
const std::vector< JoinColumnTypeInfo > &  type_info_per_key,
const std::vector< JoinBucketInfo > &  join_buckets_per_key,
const int  thread_count 
)

Definition at line 2006 of file HashJoinRuntime.cpp.

References approximate_distinct_tuples_impl(), CHECK, CHECK_EQ, and inclusive_scan().

Referenced by OverlapsJoinHashTable::approximateTupleCount().

2014  {
2015  CHECK_EQ(join_column_per_key.size(), join_buckets_per_key.size());
2016  CHECK_EQ(join_column_per_key.size(), type_info_per_key.size());
2017  CHECK(!join_column_per_key.empty());
2018 
2019  std::vector<std::future<void>> approx_distinct_threads;
2020  for (int thread_idx = 0; thread_idx < thread_count; ++thread_idx) {
2021  approx_distinct_threads.push_back(std::async(
2022  std::launch::async,
2023  [&join_column_per_key,
2024  &join_buckets_per_key,
2025  &row_counts,
2026  b,
2027  hll_buffer_all_cpus,
2028  padded_size_bytes,
2029  thread_idx,
2030  thread_count] {
2031  auto hll_buffer = hll_buffer_all_cpus + thread_idx * padded_size_bytes;
2032 
2033  const auto key_handler = OverlapsKeyHandler(
2034  join_buckets_per_key[0].inverse_bucket_sizes_for_dimension.size(),
2035  &join_column_per_key[0],
2036  join_buckets_per_key[0].inverse_bucket_sizes_for_dimension.data());
2038  row_counts.data(),
2039  b,
2040  join_column_per_key[0].num_elems,
2041  &key_handler,
2042  thread_idx,
2043  thread_count);
2044  }));
2045  }
2046  for (auto& child : approx_distinct_threads) {
2047  child.get();
2048  }
2049 
2051  row_counts.begin(), row_counts.end(), row_counts.begin(), thread_count);
2052 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
void inclusive_scan(InputIterator first, InputIterator last, OutputIterator out, const size_t thread_count)
GLOBAL void SUFFIX() approximate_distinct_tuples_impl(uint8_t *hll_buffer, int32_t *row_count_buffer, const uint32_t b, const int64_t num_elems, const KEY_HANDLER *f, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void compute_bucket_sizes_on_cpu ( std::vector< double > &  bucket_sizes_for_dimension,
const JoinColumn join_column,
const JoinColumnTypeInfo type_info,
const std::vector< double > &  bucket_size_thresholds,
const int  thread_count 
)

Definition at line 2054 of file HashJoinRuntime.cpp.

References i.

Referenced by anonymous_namespace{OverlapsJoinHashTable.cpp}::compute_bucket_sizes().

2058  {
2059  std::vector<std::vector<double>> bucket_sizes_for_threads;
2060  for (int thread_idx = 0; thread_idx < thread_count; ++thread_idx) {
2061  bucket_sizes_for_threads.emplace_back(bucket_sizes_for_dimension.size(), 0.0);
2062  }
2063  std::vector<std::future<void>> threads;
2064  for (int thread_idx = 0; thread_idx < thread_count; ++thread_idx) {
2065  threads.push_back(std::async(std::launch::async,
2066  compute_bucket_sizes_impl<2>,
2067  bucket_sizes_for_threads[thread_idx].data(),
2068  &join_column,
2069  &type_info,
2070  bucket_size_thresholds.data(),
2071  thread_idx,
2072  thread_count));
2073  }
2074  for (auto& child : threads) {
2075  child.get();
2076  }
2077 
2078  for (int thread_idx = 0; thread_idx < thread_count; ++thread_idx) {
2079  for (size_t i = 0; i < bucket_sizes_for_dimension.size(); i++) {
2080  if (bucket_sizes_for_threads[thread_idx][i] > bucket_sizes_for_dimension[i]) {
2081  bucket_sizes_for_dimension[i] = bucket_sizes_for_threads[thread_idx][i];
2082  }
2083  }
2084  }
2085 }

+ Here is the caller graph for this function:

void compute_bucket_sizes_on_device ( double *  bucket_sizes_buffer,
const JoinColumn join_column,
const JoinColumnTypeInfo type_info,
const double *  bucket_size_thresholds 
)

Definition at line 562 of file HashJoinRuntimeGpu.cu.

References cuda_kernel_launch_wrapper().

Referenced by anonymous_namespace{OverlapsJoinHashTable.cpp}::compute_bucket_sizes().

565  {
566  cuda_kernel_launch_wrapper(compute_bucket_sizes_impl_gpu<2>,
567  bucket_sizes_buffer,
568  join_column,
569  type_info,
570  bucket_sz_threshold);
571 }
void cuda_kernel_launch_wrapper(F func, ARGS &&...args)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int fill_baseline_hash_join_buff_32 ( int8_t *  hash_buff,
const int64_t  entry_count,
const int32_t  invalid_slot_val,
const size_t  key_component_count,
const bool  with_val_slot,
const GenericKeyHandler key_handler,
const int64_t  num_elems,
const int32_t  cpu_thread_idx,
const int32_t  cpu_thread_count 
)

Definition at line 1672 of file HashJoinRuntime.cpp.

Referenced by fill_baseline_hash_join_buff().

1680  {
1681  return fill_baseline_hash_join_buff<int32_t>(hash_buff,
1682  entry_count,
1683  invalid_slot_val,
1684  key_component_count,
1685  with_val_slot,
1686  key_handler,
1687  num_elems,
1688  cpu_thread_idx,
1689  cpu_thread_count);
1690 }

+ Here is the caller graph for this function:

int fill_baseline_hash_join_buff_64 ( int8_t *  hash_buff,
const int64_t  entry_count,
const int32_t  invalid_slot_val,
const size_t  key_component_count,
const bool  with_val_slot,
const GenericKeyHandler key_handler,
const int64_t  num_elems,
const int32_t  cpu_thread_idx,
const int32_t  cpu_thread_count 
)

Definition at line 1712 of file HashJoinRuntime.cpp.

1720  {
1721  return fill_baseline_hash_join_buff<int64_t>(hash_buff,
1722  entry_count,
1723  invalid_slot_val,
1724  key_component_count,
1725  with_val_slot,
1726  key_handler,
1727  num_elems,
1728  cpu_thread_idx,
1729  cpu_thread_count);
1730 }
void fill_baseline_hash_join_buff_on_device_32 ( int8_t *  hash_buff,
const int64_t  entry_count,
const int32_t  invalid_slot_val,
const size_t  key_component_count,
const bool  with_val_slot,
int *  dev_err_buff,
const GenericKeyHandler key_handler,
const int64_t  num_elems 
)

Definition at line 426 of file HashJoinRuntimeGpu.cu.

References cuda_kernel_launch_wrapper().

Referenced by fill_baseline_hash_join_buff_on_device().

433  {
435  fill_baseline_hash_join_buff_wrapper<int32_t, GenericKeyHandler>,
436  hash_buff,
437  entry_count,
438  invalid_slot_val,
439  key_component_count,
440  with_val_slot,
441  dev_err_buff,
442  key_handler,
443  num_elems);
444 }
void cuda_kernel_launch_wrapper(F func, ARGS &&...args)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void fill_baseline_hash_join_buff_on_device_64 ( int8_t *  hash_buff,
const int64_t  entry_count,
const int32_t  invalid_slot_val,
const size_t  key_component_count,
const bool  with_val_slot,
int *  dev_err_buff,
const GenericKeyHandler key_handler,
const int64_t  num_elems 
)

Definition at line 446 of file HashJoinRuntimeGpu.cu.

References cuda_kernel_launch_wrapper().

453  {
455  fill_baseline_hash_join_buff_wrapper<unsigned long long, GenericKeyHandler>,
456  hash_buff,
457  entry_count,
458  invalid_slot_val,
459  key_component_count,
460  with_val_slot,
461  dev_err_buff,
462  key_handler,
463  num_elems);
464 }
void cuda_kernel_launch_wrapper(F func, ARGS &&...args)

+ Here is the call graph for this function:

int fill_hash_join_buff ( int32_t *  buff,
const int32_t  invalid_slot_val,
const JoinColumn  join_column,
const JoinColumnTypeInfo  type_info,
const void *  sd_inner,
const void *  sd_outer,
const int32_t  cpu_thread_idx,
const int32_t  cpu_thread_count 
)

Definition at line 193 of file HashJoinRuntime.cpp.

References fill_hash_join_buff_impl(), get_hash_slot(), and SUFFIX.

Referenced by fill_hash_join_buff_wrapper().

200  {
201  auto slot_selector = [&](auto elem) {
202  return SUFFIX(get_hash_slot)(buff, elem, type_info.min_val);
203  };
204  return fill_hash_join_buff_impl(buff,
205  invalid_slot_val,
206  join_column,
207  type_info,
208  sd_inner_proxy,
209  sd_outer_proxy,
210  cpu_thread_idx,
211  cpu_thread_count,
212  slot_selector);
213 }
#define SUFFIX(name)
ALWAYS_INLINE DEVICE int32_t *SUFFIX() get_hash_slot(int32_t *buff, const int64_t key, const int64_t min_key)
Definition: JoinHashImpl.h:39
DEVICE auto fill_hash_join_buff_impl(int32_t *buff, const int32_t invalid_slot_val, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const void *sd_inner_proxy, const void *sd_outer_proxy, const int32_t cpu_thread_idx, const int32_t cpu_thread_count, SLOT_SELECTOR slot_sel)
const int64_t min_val

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int fill_hash_join_buff_bucketized ( int32_t *  buff,
const int32_t  invalid_slot_val,
const JoinColumn  join_column,
const JoinColumnTypeInfo  type_info,
const void *  sd_inner,
const void *  sd_outer,
const int32_t  cpu_thread_idx,
const int32_t  cpu_thread_count,
const int64_t  bucket_normalization 
)

Definition at line 169 of file HashJoinRuntime.cpp.

References fill_hash_join_buff_impl(), get_bucketized_hash_slot(), and SUFFIX.

Referenced by fill_hash_join_buff_bucketized_wrapper(), and PerfectJoinHashTableBuilder::initOneToOneHashTableOnCpu().

177  {
178  auto slot_selector = [&](auto elem) {
180  buff, elem, type_info.min_val, bucket_normalization);
181  };
182  return fill_hash_join_buff_impl(buff,
183  invalid_slot_val,
184  join_column,
185  type_info,
186  sd_inner_proxy,
187  sd_outer_proxy,
188  cpu_thread_idx,
189  cpu_thread_count,
190  slot_selector);
191 }
ALWAYS_INLINE DEVICE int32_t *SUFFIX() get_bucketized_hash_slot(int32_t *buff, const int64_t key, const int64_t min_key, const int64_t bucket_normalization)
Definition: JoinHashImpl.h:31
#define SUFFIX(name)
DEVICE auto fill_hash_join_buff_impl(int32_t *buff, const int32_t invalid_slot_val, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const void *sd_inner_proxy, const void *sd_outer_proxy, const int32_t cpu_thread_idx, const int32_t cpu_thread_count, SLOT_SELECTOR slot_sel)
const int64_t min_val

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void fill_hash_join_buff_on_device ( int32_t *  buff,
const int32_t  invalid_slot_val,
int *  dev_err_buff,
const JoinColumn  join_column,
const JoinColumnTypeInfo  type_info 
)

Definition at line 76 of file HashJoinRuntimeGpu.cu.

References cuda_kernel_launch_wrapper(), and fill_hash_join_buff_wrapper().

80  {
82  buff,
83  invalid_slot_val,
84  join_column,
85  type_info,
86  dev_err_buff);
87 }
__global__ void fill_hash_join_buff_wrapper(int32_t *buff, const int32_t invalid_slot_val, const JoinColumn join_column, const JoinColumnTypeInfo type_info, int *err)
void cuda_kernel_launch_wrapper(F func, ARGS &&...args)

+ Here is the call graph for this function:

void fill_hash_join_buff_on_device_bucketized ( int32_t *  buff,
const int32_t  invalid_slot_val,
int *  dev_err_buff,
const JoinColumn  join_column,
const JoinColumnTypeInfo  type_info,
const int64_t  bucket_normalization 
)

Definition at line 61 of file HashJoinRuntimeGpu.cu.

References cuda_kernel_launch_wrapper(), and fill_hash_join_buff_bucketized_wrapper().

66  {
68  buff,
69  invalid_slot_val,
70  join_column,
71  type_info,
72  dev_err_buff,
73  bucket_normalization);
74 }
__global__ void fill_hash_join_buff_bucketized_wrapper(int32_t *buff, const int32_t invalid_slot_val, const JoinColumn join_column, const JoinColumnTypeInfo type_info, int *err, const int64_t bucket_normalization)
void cuda_kernel_launch_wrapper(F func, ARGS &&...args)

+ Here is the call graph for this function:

void fill_hash_join_buff_on_device_sharded ( int32_t *  buff,
const int32_t  invalid_slot_val,
int *  dev_err_buff,
const JoinColumn  join_column,
const JoinColumnTypeInfo  type_info,
const ShardInfo  shard_info 
)

Definition at line 139 of file HashJoinRuntimeGpu.cu.

References cuda_kernel_launch_wrapper(), and fill_hash_join_buff_wrapper_sharded().

144  {
146  buff,
147  invalid_slot_val,
148  join_column,
149  type_info,
150  shard_info,
151  dev_err_buff);
152 }
__global__ void fill_hash_join_buff_wrapper_sharded(int32_t *buff, const int32_t invalid_slot_val, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const ShardInfo shard_info, int *err)
void cuda_kernel_launch_wrapper(F func, ARGS &&...args)

+ Here is the call graph for this function:

void fill_hash_join_buff_on_device_sharded_bucketized ( int32_t *  buff,
const int32_t  invalid_slot_val,
int *  dev_err_buff,
const JoinColumn  join_column,
const JoinColumnTypeInfo  type_info,
const ShardInfo  shard_info,
const int64_t  bucket_normalization 
)

Definition at line 121 of file HashJoinRuntimeGpu.cu.

References cuda_kernel_launch_wrapper(), and fill_hash_join_buff_wrapper_sharded_bucketized().

128  {
130  buff,
131  invalid_slot_val,
132  join_column,
133  type_info,
134  shard_info,
135  dev_err_buff,
136  bucket_normalization);
137 }
void cuda_kernel_launch_wrapper(F func, ARGS &&...args)
__global__ void fill_hash_join_buff_wrapper_sharded_bucketized(int32_t *buff, const int32_t invalid_slot_val, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const ShardInfo shard_info, int *err, const int64_t bucket_normalization)

+ Here is the call graph for this function:

void fill_one_to_many_baseline_hash_table_32 ( int32_t *  buff,
const int32_t *  composite_key_dict,
const int64_t  hash_entry_count,
const int32_t  invalid_slot_val,
const size_t  key_component_count,
const std::vector< JoinColumn > &  join_column_per_key,
const std::vector< JoinColumnTypeInfo > &  type_info_per_key,
const std::vector< JoinBucketInfo > &  join_bucket_info,
const std::vector< const void * > &  sd_inner_proxy_per_key,
const std::vector< const void * > &  sd_outer_proxy_per_key,
const int32_t  cpu_thread_count 
)

Definition at line 1914 of file HashJoinRuntime.cpp.

Referenced by BaselineJoinHashTableBuilder::initHashTableOnCpu().

1925  {
1926  fill_one_to_many_baseline_hash_table<int32_t>(buff,
1927  composite_key_dict,
1928  hash_entry_count,
1929  invalid_slot_val,
1930  key_component_count,
1931  join_column_per_key,
1932  type_info_per_key,
1933  join_bucket_info,
1934  sd_inner_proxy_per_key,
1935  sd_outer_proxy_per_key,
1936  cpu_thread_count);
1937 }

+ Here is the caller graph for this function:

void fill_one_to_many_baseline_hash_table_64 ( int32_t *  buff,
const int64_t *  composite_key_dict,
const int64_t  hash_entry_count,
const int32_t  invalid_slot_val,
const size_t  key_component_count,
const std::vector< JoinColumn > &  join_column_per_key,
const std::vector< JoinColumnTypeInfo > &  type_info_per_key,
const std::vector< JoinBucketInfo > &  join_bucket_info,
const std::vector< const void * > &  sd_inner_proxy_per_key,
const std::vector< const void * > &  sd_outer_proxy_per_key,
const int32_t  cpu_thread_count 
)

Definition at line 1939 of file HashJoinRuntime.cpp.

Referenced by BaselineJoinHashTableBuilder::initHashTableOnCpu().

1950  {
1951  fill_one_to_many_baseline_hash_table<int64_t>(buff,
1952  composite_key_dict,
1953  hash_entry_count,
1954  invalid_slot_val,
1955  key_component_count,
1956  join_column_per_key,
1957  type_info_per_key,
1958  join_bucket_info,
1959  sd_inner_proxy_per_key,
1960  sd_outer_proxy_per_key,
1961  cpu_thread_count);
1962 }

+ Here is the caller graph for this function:

void fill_one_to_many_baseline_hash_table_on_device_32 ( int32_t *  buff,
const int32_t *  composite_key_dict,
const int64_t  hash_entry_count,
const int32_t  invalid_slot_val,
const size_t  key_component_count,
const GenericKeyHandler key_handler,
const int64_t  num_elems 
)

Definition at line 487 of file HashJoinRuntimeGpu.cu.

Referenced by fill_one_to_many_baseline_hash_table_on_device().

494  {
495  fill_one_to_many_baseline_hash_table_on_device<int32_t>(buff,
496  composite_key_dict,
497  hash_entry_count,
498  invalid_slot_val,
499  key_handler,
500  num_elems);
501 }

+ Here is the caller graph for this function:

void fill_one_to_many_baseline_hash_table_on_device_64 ( int32_t *  buff,
const int64_t *  composite_key_dict,
const int64_t  hash_entry_count,
const int32_t  invalid_slot_val,
const GenericKeyHandler key_handler,
const int64_t  num_elems 
)

Definition at line 503 of file HashJoinRuntimeGpu.cu.

509  {
510  fill_one_to_many_baseline_hash_table_on_device<int64_t>(buff,
511  composite_key_dict,
512  hash_entry_count,
513  invalid_slot_val,
514  key_handler,
515  num_elems);
516 }
void fill_one_to_many_hash_table ( int32_t *  buff,
const HashEntryInfo  hash_entry_info,
const int32_t  invalid_slot_val,
const JoinColumn join_column,
const JoinColumnTypeInfo type_info,
const void *  sd_inner_proxy,
const void *  sd_outer_proxy,
const unsigned  cpu_thread_count 
)

Definition at line 1381 of file HashJoinRuntime.cpp.

References count_matches(), fill_one_to_many_hash_table_impl(), fill_row_ids(), HashEntryInfo::hash_entry_count, and SUFFIX.

Referenced by PerfectJoinHashTableBuilder::initOneToManyHashTableOnCpu().

1388  {
1389  auto launch_count_matches = [count_buff = buff + hash_entry_info.hash_entry_count,
1390  invalid_slot_val,
1391  &join_column,
1392  &type_info,
1393  sd_inner_proxy,
1394  sd_outer_proxy](auto cpu_thread_idx,
1395  auto cpu_thread_count) {
1397  (count_buff,
1398  invalid_slot_val,
1399  join_column,
1400  type_info,
1401  sd_inner_proxy,
1402  sd_outer_proxy,
1403  cpu_thread_idx,
1404  cpu_thread_count);
1405  };
1406  auto launch_fill_row_ids = [hash_entry_count = hash_entry_info.hash_entry_count,
1407  buff,
1408  invalid_slot_val,
1409  &join_column,
1410  &type_info,
1411  sd_inner_proxy,
1412  sd_outer_proxy](auto cpu_thread_idx,
1413  auto cpu_thread_count) {
1415  (buff,
1416  hash_entry_count,
1417  invalid_slot_val,
1418  join_column,
1419  type_info,
1420  sd_inner_proxy,
1421  sd_outer_proxy,
1422  cpu_thread_idx,
1423  cpu_thread_count);
1424  };
1425 
1427  hash_entry_info.hash_entry_count,
1428  invalid_slot_val,
1429  join_column,
1430  type_info,
1431  sd_inner_proxy,
1432  sd_outer_proxy,
1433  cpu_thread_count,
1434  launch_count_matches,
1435  launch_fill_row_ids);
1436 }
#define SUFFIX(name)
void fill_one_to_many_hash_table_impl(int32_t *buff, const int64_t hash_entry_count, const int32_t invalid_slot_val, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info, const void *sd_inner_proxy, const void *sd_outer_proxy, const unsigned cpu_thread_count, COUNT_MATCHES_LAUNCH_FUNCTOR count_matches_func, FILL_ROW_IDS_LAUNCH_FUNCTOR fill_row_ids_func)
size_t hash_entry_count
GLOBAL void SUFFIX() count_matches(int32_t *count_buff, const int32_t invalid_slot_val, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const void *sd_inner_proxy, const void *sd_outer_proxy, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
GLOBAL void SUFFIX() fill_row_ids(int32_t *buff, const int64_t hash_entry_count, const int32_t invalid_slot_val, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const void *sd_inner_proxy, const void *sd_outer_proxy, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void fill_one_to_many_hash_table_bucketized ( int32_t *  buff,
const HashEntryInfo  hash_entry_info,
const int32_t  invalid_slot_val,
const JoinColumn join_column,
const JoinColumnTypeInfo type_info,
const void *  sd_inner_proxy,
const void *  sd_outer_proxy,
const unsigned  cpu_thread_count 
)

Definition at line 1438 of file HashJoinRuntime.cpp.

References HashEntryInfo::bucket_normalization, count_matches_bucketized(), fill_one_to_many_hash_table_impl(), fill_row_ids_bucketized(), HashEntryInfo::getNormalizedHashEntryCount(), and SUFFIX.

Referenced by PerfectJoinHashTableBuilder::initOneToManyHashTableOnCpu().

1445  {
1446  auto bucket_normalization = hash_entry_info.bucket_normalization;
1447  auto hash_entry_count = hash_entry_info.getNormalizedHashEntryCount();
1448  auto launch_count_matches = [bucket_normalization,
1449  count_buff = buff + hash_entry_count,
1450  invalid_slot_val,
1451  &join_column,
1452  &type_info,
1453  sd_inner_proxy,
1454  sd_outer_proxy](auto cpu_thread_idx,
1455  auto cpu_thread_count) {
1457  (count_buff,
1458  invalid_slot_val,
1459  join_column,
1460  type_info,
1461  sd_inner_proxy,
1462  sd_outer_proxy,
1463  cpu_thread_idx,
1464  cpu_thread_count,
1465  bucket_normalization);
1466  };
1467  auto launch_fill_row_ids = [bucket_normalization,
1468  hash_entry_count,
1469  buff,
1470  invalid_slot_val,
1471  &join_column,
1472  &type_info,
1473  sd_inner_proxy,
1474  sd_outer_proxy](auto cpu_thread_idx,
1475  auto cpu_thread_count) {
1477  (buff,
1478  hash_entry_count,
1479  invalid_slot_val,
1480  join_column,
1481  type_info,
1482  sd_inner_proxy,
1483  sd_outer_proxy,
1484  cpu_thread_idx,
1485  cpu_thread_count,
1486  bucket_normalization);
1487  };
1488 
1490  hash_entry_count,
1491  invalid_slot_val,
1492  join_column,
1493  type_info,
1494  sd_inner_proxy,
1495  sd_outer_proxy,
1496  cpu_thread_count,
1497  launch_count_matches,
1498  launch_fill_row_ids);
1499 }
GLOBAL void SUFFIX() count_matches_bucketized(int32_t *count_buff, const int32_t invalid_slot_val, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const void *sd_inner_proxy, const void *sd_outer_proxy, const int32_t cpu_thread_idx, const int32_t cpu_thread_count, const int64_t bucket_normalization)
#define SUFFIX(name)
void fill_one_to_many_hash_table_impl(int32_t *buff, const int64_t hash_entry_count, const int32_t invalid_slot_val, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info, const void *sd_inner_proxy, const void *sd_outer_proxy, const unsigned cpu_thread_count, COUNT_MATCHES_LAUNCH_FUNCTOR count_matches_func, FILL_ROW_IDS_LAUNCH_FUNCTOR fill_row_ids_func)
int64_t bucket_normalization
GLOBAL void SUFFIX() fill_row_ids_bucketized(int32_t *buff, const int64_t hash_entry_count, const int32_t invalid_slot_val, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const void *sd_inner_proxy, const void *sd_outer_proxy, const int32_t cpu_thread_idx, const int32_t cpu_thread_count, const int64_t bucket_normalization)
size_t getNormalizedHashEntryCount() const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void fill_one_to_many_hash_table_on_device ( int32_t *  buff,
const HashEntryInfo  hash_entry_info,
const int32_t  invalid_slot_val,
const JoinColumn join_column,
const JoinColumnTypeInfo type_info 
)

Definition at line 217 of file HashJoinRuntimeGpu.cu.

References count_matches(), cuda_kernel_launch_wrapper(), fill_one_to_many_hash_table_on_device_impl(), fill_row_ids(), HashEntryInfo::hash_entry_count, and SUFFIX.

221  {
222  auto hash_entry_count = hash_entry_info.hash_entry_count;
223  auto count_matches_func = [hash_entry_count,
224  count_buff = buff + hash_entry_count,
225  invalid_slot_val,
226  join_column,
227  type_info] {
229  SUFFIX(count_matches), count_buff, invalid_slot_val, join_column, type_info);
230  };
231 
232  auto fill_row_ids_func =
233  [buff, hash_entry_count, invalid_slot_val, join_column, type_info] {
235  buff,
236  hash_entry_count,
237  invalid_slot_val,
238  join_column,
239  type_info);
240  };
241 
243  hash_entry_count,
244  invalid_slot_val,
245  join_column,
246  type_info,
247  count_matches_func,
248  fill_row_ids_func);
249 }
void fill_one_to_many_hash_table_on_device_impl(int32_t *buff, const int64_t hash_entry_count, const int32_t invalid_slot_val, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info, COUNT_MATCHES_FUNCTOR count_matches_func, FILL_ROW_IDS_FUNCTOR fill_row_ids_func)
#define SUFFIX(name)
size_t hash_entry_count
GLOBAL void SUFFIX() count_matches(int32_t *count_buff, const int32_t invalid_slot_val, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const void *sd_inner_proxy, const void *sd_outer_proxy, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
GLOBAL void SUFFIX() fill_row_ids(int32_t *buff, const int64_t hash_entry_count, const int32_t invalid_slot_val, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const void *sd_inner_proxy, const void *sd_outer_proxy, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
void cuda_kernel_launch_wrapper(F func, ARGS &&...args)

+ Here is the call graph for this function:

void fill_one_to_many_hash_table_on_device_bucketized ( int32_t *  buff,
const HashEntryInfo  hash_entry_info,
const int32_t  invalid_slot_val,
const JoinColumn join_column,
const JoinColumnTypeInfo type_info 
)

Definition at line 251 of file HashJoinRuntimeGpu.cu.

References HashEntryInfo::bucket_normalization, count_matches_bucketized(), cuda_kernel_launch_wrapper(), fill_one_to_many_hash_table_on_device_impl(), fill_row_ids_bucketized(), HashEntryInfo::getNormalizedHashEntryCount(), and SUFFIX.

256  {
257  auto hash_entry_count = hash_entry_info.getNormalizedHashEntryCount();
258  auto count_matches_func = [count_buff = buff + hash_entry_count,
259  invalid_slot_val,
260  join_column,
261  type_info,
262  bucket_normalization =
263  hash_entry_info.bucket_normalization] {
265  count_buff,
266  invalid_slot_val,
267  join_column,
268  type_info,
269  bucket_normalization);
270  };
271 
272  auto fill_row_ids_func = [buff,
273  hash_entry_count =
274  hash_entry_info.getNormalizedHashEntryCount(),
275  invalid_slot_val,
276  join_column,
277  type_info,
278  bucket_normalization = hash_entry_info.bucket_normalization] {
280  buff,
281  hash_entry_count,
282  invalid_slot_val,
283  join_column,
284  type_info,
285  bucket_normalization);
286  };
287 
289  hash_entry_count,
290  invalid_slot_val,
291  join_column,
292  type_info,
293  count_matches_func,
294  fill_row_ids_func);
295 }
GLOBAL void SUFFIX() count_matches_bucketized(int32_t *count_buff, const int32_t invalid_slot_val, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const void *sd_inner_proxy, const void *sd_outer_proxy, const int32_t cpu_thread_idx, const int32_t cpu_thread_count, const int64_t bucket_normalization)
void fill_one_to_many_hash_table_on_device_impl(int32_t *buff, const int64_t hash_entry_count, const int32_t invalid_slot_val, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info, COUNT_MATCHES_FUNCTOR count_matches_func, FILL_ROW_IDS_FUNCTOR fill_row_ids_func)
#define SUFFIX(name)
int64_t bucket_normalization
GLOBAL void SUFFIX() fill_row_ids_bucketized(int32_t *buff, const int64_t hash_entry_count, const int32_t invalid_slot_val, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const void *sd_inner_proxy, const void *sd_outer_proxy, const int32_t cpu_thread_idx, const int32_t cpu_thread_count, const int64_t bucket_normalization)
void cuda_kernel_launch_wrapper(F func, ARGS &&...args)
size_t getNormalizedHashEntryCount() const

+ Here is the call graph for this function:

void fill_one_to_many_hash_table_on_device_sharded ( int32_t *  buff,
const HashEntryInfo  hash_entry_info,
const int32_t  invalid_slot_val,
const JoinColumn join_column,
const JoinColumnTypeInfo type_info,
const ShardInfo shard_info 
)

Definition at line 297 of file HashJoinRuntimeGpu.cu.

References count_matches_sharded(), cuda_kernel_launch_wrapper(), fill_row_ids_sharded(), HashEntryInfo::hash_entry_count, inclusive_scan(), set_valid_pos(), set_valid_pos_flag(), and SUFFIX.

302  {
303  auto hash_entry_count = hash_entry_info.hash_entry_count;
304  int32_t* pos_buff = buff;
305  int32_t* count_buff = buff + hash_entry_count;
306  cudaMemset(count_buff, 0, hash_entry_count * sizeof(int32_t));
308  count_buff,
309  invalid_slot_val,
310  join_column,
311  type_info,
312  shard_info);
313 
314  cuda_kernel_launch_wrapper(set_valid_pos_flag, pos_buff, count_buff, hash_entry_count);
315 
316  auto count_buff_dev_ptr = thrust::device_pointer_cast(count_buff);
318  count_buff_dev_ptr, count_buff_dev_ptr + hash_entry_count, count_buff_dev_ptr);
319  cuda_kernel_launch_wrapper(set_valid_pos, pos_buff, count_buff, hash_entry_count);
320  cudaMemset(count_buff, 0, hash_entry_count * sizeof(int32_t));
322  buff,
323  hash_entry_count,
324  invalid_slot_val,
325  join_column,
326  type_info,
327  shard_info);
328 }
GLOBAL void SUFFIX() fill_row_ids_sharded(int32_t *buff, const int64_t hash_entry_count, const int32_t invalid_slot_val, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const ShardInfo shard_info, const void *sd_inner_proxy, const void *sd_outer_proxy, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
#define SUFFIX(name)
GLOBAL void SUFFIX() count_matches_sharded(int32_t *count_buff, const int32_t invalid_slot_val, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const ShardInfo shard_info, const void *sd_inner_proxy, const void *sd_outer_proxy, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
void inclusive_scan(InputIterator first, InputIterator last, OutputIterator out, const size_t thread_count)
size_t hash_entry_count
__global__ void set_valid_pos_flag(int32_t *pos_buff, const int32_t *count_buff, const int64_t entry_count)
void cuda_kernel_launch_wrapper(F func, ARGS &&...args)
__global__ void set_valid_pos(int32_t *pos_buff, int32_t *count_buff, const int64_t entry_count)

+ Here is the call graph for this function:

void fill_one_to_many_hash_table_sharded_bucketized ( int32_t *  buff,
const HashEntryInfo  hash_entry_info,
const int32_t  invalid_slot_val,
const JoinColumn join_column,
const JoinColumnTypeInfo type_info,
const ShardInfo shard_info,
const void *  sd_inner_proxy,
const void *  sd_outer_proxy,
const unsigned  cpu_thread_count 
)
ColumnType get_join_column_type_kind ( const SQLTypeInfo ti)
inline

Definition at line 127 of file HashJoinRuntime.h.

References SQLTypeInfo::is_date_in_days(), is_unsigned_type(), Signed, SmallDate, and Unsigned.

Referenced by PerfectJoinHashTable::fetchColumnsForDevice(), BaselineJoinHashTable::fetchColumnsForDevice(), OverlapsJoinHashTable::fetchColumnsForDevice(), PerfectJoinHashTableBuilder::initOneToManyHashTableOnCpu(), and PerfectJoinHashTableBuilder::initOneToOneHashTableOnCpu().

127  {
128  if (ti.is_date_in_days()) {
129  return SmallDate;
130  } else {
131  return is_unsigned_type(ti) ? Unsigned : Signed;
132  }
133 }
bool is_date_in_days() const
Definition: sqltypes.h:720
bool is_unsigned_type(const SQLTypeInfo &ti)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void init_baseline_hash_join_buff_32 ( int8_t *  hash_join_buff,
const int64_t  entry_count,
const size_t  key_component_count,
const bool  with_val_slot,
const int32_t  invalid_slot_val,
const int32_t  cpu_thread_idx,
const int32_t  cpu_thread_count 
)

Definition at line 1640 of file HashJoinRuntime.cpp.

Referenced by BaselineJoinHashTableBuilder::initHashTableOnCpu().

1646  {
1647  init_baseline_hash_join_buff<int32_t>(hash_join_buff,
1648  entry_count,
1649  key_component_count,
1650  with_val_slot,
1651  invalid_slot_val,
1652  cpu_thread_idx,
1653  cpu_thread_count);
1654 }

+ Here is the caller graph for this function:

void init_baseline_hash_join_buff_64 ( int8_t *  hash_join_buff,
const int64_t  entry_count,
const size_t  key_component_count,
const bool  with_val_slot,
const int32_t  invalid_slot_val,
const int32_t  cpu_thread_idx,
const int32_t  cpu_thread_count 
)

Definition at line 1656 of file HashJoinRuntime.cpp.

Referenced by BaselineJoinHashTableBuilder::initHashTableOnCpu().

1662  {
1663  init_baseline_hash_join_buff<int64_t>(hash_join_buff,
1664  entry_count,
1665  key_component_count,
1666  with_val_slot,
1667  invalid_slot_val,
1668  cpu_thread_idx,
1669  cpu_thread_count);
1670 }

+ Here is the caller graph for this function:

void init_baseline_hash_join_buff_on_device_32 ( int8_t *  hash_join_buff,
const int64_t  entry_count,
const size_t  key_component_count,
const bool  with_val_slot,
const int32_t  invalid_slot_val 
)

Definition at line 379 of file HashJoinRuntimeGpu.cu.

References cuda_kernel_launch_wrapper().

Referenced by BaselineJoinHashTableBuilder::initHashTableOnGpu().

383  {
384  cuda_kernel_launch_wrapper(init_baseline_hash_join_buff_wrapper<int32_t>,
385  hash_join_buff,
386  entry_count,
387  key_component_count,
388  with_val_slot,
389  invalid_slot_val);
390 }
void cuda_kernel_launch_wrapper(F func, ARGS &&...args)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void init_baseline_hash_join_buff_on_device_64 ( int8_t *  hash_join_buff,
const int64_t  entry_count,
const size_t  key_component_count,
const bool  with_val_slot,
const int32_t  invalid_slot_val 
)

Definition at line 392 of file HashJoinRuntimeGpu.cu.

References cuda_kernel_launch_wrapper().

Referenced by BaselineJoinHashTableBuilder::initHashTableOnGpu().

396  {
397  cuda_kernel_launch_wrapper(init_baseline_hash_join_buff_wrapper<int64_t>,
398  hash_join_buff,
399  entry_count,
400  key_component_count,
401  with_val_slot,
402  invalid_slot_val);
403 }
void cuda_kernel_launch_wrapper(F func, ARGS &&...args)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void init_hash_join_buff ( int32_t *  buff,
const int64_t  entry_count,
const int32_t  invalid_slot_val,
const int32_t  cpu_thread_idx,
const int32_t  cpu_thread_count 
)

Definition at line 92 of file HashJoinRuntime.cpp.

References i.

Referenced by init_hash_join_buff_wrapper(), BaselineJoinHashTableBuilder::initHashTableOnCpu(), PerfectJoinHashTableBuilder::initOneToManyHashTableOnCpu(), and PerfectJoinHashTableBuilder::initOneToOneHashTableOnCpu().

96  {
97 #ifdef __CUDACC__
98  int32_t start = threadIdx.x + blockDim.x * blockIdx.x;
99  int32_t step = blockDim.x * gridDim.x;
100 #else
101  int32_t start = cpu_thread_idx;
102  int32_t step = cpu_thread_count;
103 #endif
104  for (int64_t i = start; i < hash_entry_count; i += step) {
105  groups_buffer[i] = invalid_slot_val;
106  }
107 }

+ Here is the caller graph for this function:

void init_hash_join_buff_on_device ( int32_t *  buff,
const int64_t  entry_count,
const int32_t  invalid_slot_val 
)

Definition at line 160 of file HashJoinRuntimeGpu.cu.

References cuda_kernel_launch_wrapper(), and init_hash_join_buff_wrapper().

Referenced by BaselineJoinHashTableBuilder::initHashTableOnGpu().

162  {
164  init_hash_join_buff_wrapper, buff, hash_entry_count, invalid_slot_val);
165 }
void cuda_kernel_launch_wrapper(F func, ARGS &&...args)
__global__ void init_hash_join_buff_wrapper(int32_t *buff, const int64_t hash_entry_count, const int32_t invalid_slot_val)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int overlaps_fill_baseline_hash_join_buff_32 ( int8_t *  hash_buff,
const int64_t  entry_count,
const int32_t  invalid_slot_val,
const size_t  key_component_count,
const bool  with_val_slot,
const OverlapsKeyHandler key_handler,
const int64_t  num_elems,
const int32_t  cpu_thread_idx,
const int32_t  cpu_thread_count 
)

Definition at line 1692 of file HashJoinRuntime.cpp.

Referenced by fill_baseline_hash_join_buff().

1700  {
1701  return fill_baseline_hash_join_buff<int32_t>(hash_buff,
1702  entry_count,
1703  invalid_slot_val,
1704  key_component_count,
1705  with_val_slot,
1706  key_handler,
1707  num_elems,
1708  cpu_thread_idx,
1709  cpu_thread_count);
1710 }

+ Here is the caller graph for this function:

int overlaps_fill_baseline_hash_join_buff_64 ( int8_t *  hash_buff,
const int64_t  entry_count,
const int32_t  invalid_slot_val,
const size_t  key_component_count,
const bool  with_val_slot,
const OverlapsKeyHandler key_handler,
const int64_t  num_elems,
const int32_t  cpu_thread_idx,
const int32_t  cpu_thread_count 
)

Definition at line 1732 of file HashJoinRuntime.cpp.

1740  {
1741  return fill_baseline_hash_join_buff<int64_t>(hash_buff,
1742  entry_count,
1743  invalid_slot_val,
1744  key_component_count,
1745  with_val_slot,
1746  key_handler,
1747  num_elems,
1748  cpu_thread_idx,
1749  cpu_thread_count);
1750 }
void overlaps_fill_baseline_hash_join_buff_on_device_64 ( int8_t *  hash_buff,
const int64_t  entry_count,
const int32_t  invalid_slot_val,
const size_t  key_component_count,
const bool  with_val_slot,
int *  dev_err_buff,
const OverlapsKeyHandler key_handler,
const int64_t  num_elems 
)

Definition at line 466 of file HashJoinRuntimeGpu.cu.

References cuda_kernel_launch_wrapper().

474  {
476  fill_baseline_hash_join_buff_wrapper<unsigned long long, OverlapsKeyHandler>,
477  hash_buff,
478  entry_count,
479  invalid_slot_val,
480  key_component_count,
481  with_val_slot,
482  dev_err_buff,
483  key_handler,
484  num_elems);
485 }
void cuda_kernel_launch_wrapper(F func, ARGS &&...args)

+ Here is the call graph for this function:

void overlaps_fill_one_to_many_baseline_hash_table_on_device_64 ( int32_t *  buff,
const int64_t *  composite_key_dict,
const int64_t  hash_entry_count,
const int32_t  invalid_slot_val,
const OverlapsKeyHandler key_handler,
const int64_t  num_elems 
)

Definition at line 518 of file HashJoinRuntimeGpu.cu.

524  {
525  fill_one_to_many_baseline_hash_table_on_device<int64_t>(buff,
526  composite_key_dict,
527  hash_entry_count,
528  invalid_slot_val,
529  key_handler,
530  num_elems);
531 }

Variable Documentation