OmniSciDB  dfae7c3b14
HashJoinRuntime.h File Reference
#include <cstddef>
#include <cstdint>
#include <vector>
#include "../../Shared/SqlTypesLayout.h"
#include "../../Shared/sqltypes.h"
#include "../RuntimeFunctions.h"
#include "../../Shared/funcannotations.h"
+ Include dependency graph for HashJoinRuntime.h:
+ This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Classes

struct  HashEntryInfo
 
struct  JoinChunk
 
struct  JoinColumn
 
struct  JoinColumnTypeInfo
 
struct  JoinBucketInfo
 
struct  ShardInfo
 

Enumerations

enum  ColumnType { SmallDate = 0, Signed = 1, Unsigned = 2, Double = 3 }
 

Functions

void init_hash_join_buff (int32_t *buff, const int64_t entry_count, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
 
void init_hash_join_buff_on_device (int32_t *buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t block_size_x, const size_t grid_size_x)
 
void init_baseline_hash_join_buff_32 (int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
 
void init_baseline_hash_join_buff_64 (int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
 
void init_baseline_hash_join_buff_on_device_32 (int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val, const size_t block_size_x, const size_t grid_size_x)
 
void init_baseline_hash_join_buff_on_device_64 (int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val, const size_t block_size_x, const size_t grid_size_x)
 
ColumnType get_join_column_type_kind (const SQLTypeInfo &ti)
 
int fill_hash_join_buff_bucketized (int32_t *buff, const int32_t invalid_slot_val, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const void *sd_inner, const void *sd_outer, const int32_t cpu_thread_idx, const int32_t cpu_thread_count, const int64_t bucket_normalization)
 
int fill_hash_join_buff (int32_t *buff, const int32_t invalid_slot_val, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const void *sd_inner, const void *sd_outer, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
 
void fill_hash_join_buff_on_device (int32_t *buff, const int32_t invalid_slot_val, int *dev_err_buff, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const size_t block_size_x, const size_t grid_size_x)
 
void fill_hash_join_buff_on_device_bucketized (int32_t *buff, const int32_t invalid_slot_val, int *dev_err_buff, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const size_t block_size_x, const size_t grid_size_x, const int64_t bucket_normalization)
 
void fill_hash_join_buff_on_device_sharded (int32_t *buff, const int32_t invalid_slot_val, int *dev_err_buff, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const ShardInfo shard_info, const size_t block_size_x, const size_t grid_size_x)
 
void fill_hash_join_buff_on_device_sharded_bucketized (int32_t *buff, const int32_t invalid_slot_val, int *dev_err_buff, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const ShardInfo shard_info, const size_t block_size_x, const size_t grid_size_x, const int64_t bucket_normalization)
 
void fill_one_to_many_hash_table (int32_t *buff, const HashEntryInfo hash_entry_info, const int32_t invalid_slot_val, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info, const void *sd_inner_proxy, const void *sd_outer_proxy, const unsigned cpu_thread_count)
 
void fill_one_to_many_hash_table_bucketized (int32_t *buff, const HashEntryInfo hash_entry_info, const int32_t invalid_slot_val, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info, const void *sd_inner_proxy, const void *sd_outer_proxy, const unsigned cpu_thread_count)
 
void fill_one_to_many_hash_table_sharded_bucketized (int32_t *buff, const HashEntryInfo hash_entry_info, const int32_t invalid_slot_val, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info, const ShardInfo &shard_info, const void *sd_inner_proxy, const void *sd_outer_proxy, const unsigned cpu_thread_count)
 
void fill_one_to_many_hash_table_on_device (int32_t *buff, const HashEntryInfo hash_entry_info, const int32_t invalid_slot_val, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info, const size_t block_size_x, const size_t grid_size_x)
 
void fill_one_to_many_hash_table_on_device_bucketized (int32_t *buff, const HashEntryInfo hash_entry_info, const int32_t invalid_slot_val, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info, const size_t block_size_x, const size_t grid_size_x)
 
void fill_one_to_many_hash_table_on_device_sharded (int32_t *buff, const HashEntryInfo hash_entry_info, const int32_t invalid_slot_val, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info, const ShardInfo &shard_info, const size_t block_size_x, const size_t grid_size_x)
 
int fill_baseline_hash_join_buff_32 (int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const GenericKeyHandler *key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
 
int overlaps_fill_baseline_hash_join_buff_32 (int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const OverlapsKeyHandler *key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
 
int fill_baseline_hash_join_buff_64 (int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const GenericKeyHandler *key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
 
int overlaps_fill_baseline_hash_join_buff_64 (int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const OverlapsKeyHandler *key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
 
void fill_baseline_hash_join_buff_on_device_32 (int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const GenericKeyHandler *key_handler, const int64_t num_elems, const size_t block_size_x, const size_t grid_size_x)
 
void fill_baseline_hash_join_buff_on_device_64 (int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const GenericKeyHandler *key_handler, const int64_t num_elems, const size_t block_size_x, const size_t grid_size_x)
 
void overlaps_fill_baseline_hash_join_buff_on_device_64 (int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const OverlapsKeyHandler *key_handler, const int64_t num_elems, const size_t block_size_x, const size_t grid_size_x)
 
void fill_one_to_many_baseline_hash_table_32 (int32_t *buff, const int32_t *composite_key_dict, const int64_t hash_entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_bucket_info, const std::vector< const void *> &sd_inner_proxy_per_key, const std::vector< const void *> &sd_outer_proxy_per_key, const int32_t cpu_thread_count)
 
void fill_one_to_many_baseline_hash_table_64 (int32_t *buff, const int64_t *composite_key_dict, const int64_t hash_entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_bucket_info, const std::vector< const void *> &sd_inner_proxy_per_key, const std::vector< const void *> &sd_outer_proxy_per_key, const int32_t cpu_thread_count)
 
void fill_one_to_many_baseline_hash_table_on_device_32 (int32_t *buff, const int32_t *composite_key_dict, const int64_t hash_entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const GenericKeyHandler *key_handler, const int64_t num_elems, const size_t block_size_x, const size_t grid_size_x)
 
void fill_one_to_many_baseline_hash_table_on_device_64 (int32_t *buff, const int64_t *composite_key_dict, const int64_t hash_entry_count, const int32_t invalid_slot_val, const GenericKeyHandler *key_handler, const int64_t num_elems, const size_t block_size_x, const size_t grid_size_x)
 
void overlaps_fill_one_to_many_baseline_hash_table_on_device_64 (int32_t *buff, const int64_t *composite_key_dict, const int64_t hash_entry_count, const int32_t invalid_slot_val, const OverlapsKeyHandler *key_handler, const int64_t num_elems, const size_t block_size_x, const size_t grid_size_x)
 
void approximate_distinct_tuples (uint8_t *hll_buffer_all_cpus, const uint32_t b, const size_t padded_size_bytes, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const int thread_count)
 
void approximate_distinct_tuples_overlaps (uint8_t *hll_buffer_all_cpus, std::vector< int32_t > &row_counts, const uint32_t b, const size_t padded_size_bytes, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_buckets_per_key, const int thread_count)
 
void approximate_distinct_tuples_on_device (uint8_t *hll_buffer, const uint32_t b, const GenericKeyHandler *key_handler, const int64_t num_elems, const size_t block_size_x, const size_t grid_size_x)
 
void approximate_distinct_tuples_on_device_overlaps (uint8_t *hll_buffer, const uint32_t b, int32_t *row_counts_buffer, const OverlapsKeyHandler *key_handler, const int64_t num_elems, const size_t block_size_x, const size_t grid_size_x)
 
void compute_bucket_sizes (std::vector< double > &bucket_sizes_for_dimension, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info, const double bucket_size_threshold, const int thread_count)
 
void compute_bucket_sizes_on_device (double *bucket_sizes_buffer, const JoinColumn *join_column, const JoinColumnTypeInfo *type_info, const double bucket_sz_threshold, const size_t block_size_x, const size_t grid_size_x)
 

Variables

const size_t g_maximum_conditions_to_coalesce {8}
 

Enumeration Type Documentation

◆ ColumnType

enum ColumnType
Enumerator
SmallDate 
Signed 
Unsigned 
Double 

Definition at line 106 of file HashJoinRuntime.h.

Function Documentation

◆ approximate_distinct_tuples()

void approximate_distinct_tuples ( uint8_t *  hll_buffer_all_cpus,
const uint32_t  b,
const size_t  padded_size_bytes,
const std::vector< JoinColumn > &  join_column_per_key,
const std::vector< JoinColumnTypeInfo > &  type_info_per_key,
const int  thread_count 
)

Definition at line 1939 of file HashJoinRuntime.cpp.

References approximate_distinct_tuples_impl(), CHECK, and CHECK_EQ.

Referenced by BaselineJoinHashTable::approximateTupleCount().

1944  {
1945  CHECK_EQ(join_column_per_key.size(), type_info_per_key.size());
1946  CHECK(!join_column_per_key.empty());
1947 
1948  std::vector<std::future<void>> approx_distinct_threads;
1949  for (int thread_idx = 0; thread_idx < thread_count; ++thread_idx) {
1950  approx_distinct_threads.push_back(std::async(
1951  std::launch::async,
1952  [&join_column_per_key,
1953  &type_info_per_key,
1954  b,
1955  hll_buffer_all_cpus,
1956  padded_size_bytes,
1957  thread_idx,
1958  thread_count] {
1959  auto hll_buffer = hll_buffer_all_cpus + thread_idx * padded_size_bytes;
1960 
1961  const auto key_handler = GenericKeyHandler(join_column_per_key.size(),
1962  false,
1963  &join_column_per_key[0],
1964  &type_info_per_key[0],
1965  nullptr,
1966  nullptr);
1968  nullptr,
1969  b,
1970  join_column_per_key[0].num_elems,
1971  &key_handler,
1972  thread_idx,
1973  thread_count);
1974  }));
1975  }
1976  for (auto& child : approx_distinct_threads) {
1977  child.get();
1978  }
1979 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
GLOBAL void SUFFIX() approximate_distinct_tuples_impl(uint8_t *hll_buffer, int32_t *row_count_buffer, const uint32_t b, const int64_t num_elems, const KEY_HANDLER *f, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
#define CHECK(condition)
Definition: Logger.h:197
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ approximate_distinct_tuples_on_device()

void approximate_distinct_tuples_on_device ( uint8_t *  hll_buffer,
const uint32_t  b,
const GenericKeyHandler key_handler,
const int64_t  num_elems,
const size_t  block_size_x,
const size_t  grid_size_x 
)

Referenced by BaselineJoinHashTable::approximateTupleCount().

+ Here is the caller graph for this function:

◆ approximate_distinct_tuples_on_device_overlaps()

void approximate_distinct_tuples_on_device_overlaps ( uint8_t *  hll_buffer,
const uint32_t  b,
int32_t *  row_counts_buffer,
const OverlapsKeyHandler key_handler,
const int64_t  num_elems,
const size_t  block_size_x,
const size_t  grid_size_x 
)

Referenced by OverlapsJoinHashTable::approximateTupleCount().

+ Here is the caller graph for this function:

◆ approximate_distinct_tuples_overlaps()

void approximate_distinct_tuples_overlaps ( uint8_t *  hll_buffer_all_cpus,
std::vector< int32_t > &  row_counts,
const uint32_t  b,
const size_t  padded_size_bytes,
const std::vector< JoinColumn > &  join_column_per_key,
const std::vector< JoinColumnTypeInfo > &  type_info_per_key,
const std::vector< JoinBucketInfo > &  join_buckets_per_key,
const int  thread_count 
)

Definition at line 1981 of file HashJoinRuntime.cpp.

References approximate_distinct_tuples_impl(), CHECK, CHECK_EQ, and inclusive_scan().

Referenced by OverlapsJoinHashTable::approximateTupleCount().

1989  {
1990  CHECK_EQ(join_column_per_key.size(), join_buckets_per_key.size());
1991  CHECK_EQ(join_column_per_key.size(), type_info_per_key.size());
1992  CHECK(!join_column_per_key.empty());
1993 
1994  std::vector<std::future<void>> approx_distinct_threads;
1995  for (int thread_idx = 0; thread_idx < thread_count; ++thread_idx) {
1996  approx_distinct_threads.push_back(std::async(
1997  std::launch::async,
1998  [&join_column_per_key,
1999  &join_buckets_per_key,
2000  &row_counts,
2001  b,
2002  hll_buffer_all_cpus,
2003  padded_size_bytes,
2004  thread_idx,
2005  thread_count] {
2006  auto hll_buffer = hll_buffer_all_cpus + thread_idx * padded_size_bytes;
2007 
2008  const auto key_handler = OverlapsKeyHandler(
2009  join_buckets_per_key[0].bucket_sizes_for_dimension.size(),
2010  &join_column_per_key[0],
2011  join_buckets_per_key[0].bucket_sizes_for_dimension.data());
2013  row_counts.data(),
2014  b,
2015  join_column_per_key[0].num_elems,
2016  &key_handler,
2017  thread_idx,
2018  thread_count);
2019  }));
2020  }
2021  for (auto& child : approx_distinct_threads) {
2022  child.get();
2023  }
2024 
2026  row_counts.begin(), row_counts.end(), row_counts.begin(), thread_count);
2027 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
void inclusive_scan(InputIterator first, InputIterator last, OutputIterator out, const size_t thread_count)
GLOBAL void SUFFIX() approximate_distinct_tuples_impl(uint8_t *hll_buffer, int32_t *row_count_buffer, const uint32_t b, const int64_t num_elems, const KEY_HANDLER *f, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
#define CHECK(condition)
Definition: Logger.h:197
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ compute_bucket_sizes()

void compute_bucket_sizes ( std::vector< double > &  bucket_sizes_for_dimension,
const JoinColumn join_column,
const JoinColumnTypeInfo type_info,
const double  bucket_size_threshold,
const int  thread_count 
)

Definition at line 2029 of file HashJoinRuntime.cpp.

Referenced by OverlapsJoinHashTable::computeBucketSizes().

2033  {
2034  std::vector<std::vector<double>> bucket_sizes_for_threads;
2035  for (int thread_idx = 0; thread_idx < thread_count; ++thread_idx) {
2036  bucket_sizes_for_threads.emplace_back(bucket_sizes_for_dimension.size(),
2037  std::numeric_limits<double>::max());
2038  }
2039  std::vector<std::future<void>> threads;
2040  for (int thread_idx = 0; thread_idx < thread_count; ++thread_idx) {
2041  threads.push_back(std::async(std::launch::async,
2042  compute_bucket_sizes_impl<2>,
2043  bucket_sizes_for_threads[thread_idx].data(),
2044  &join_column,
2045  &type_info,
2046  bucket_size_threshold,
2047  thread_idx,
2048  thread_count));
2049  }
2050  for (auto& child : threads) {
2051  child.get();
2052  }
2053 
2054  for (int thread_idx = 0; thread_idx < thread_count; ++thread_idx) {
2055  for (size_t i = 0; i < bucket_sizes_for_dimension.size(); i++) {
2056  if (bucket_sizes_for_threads[thread_idx][i] < bucket_sizes_for_dimension[i]) {
2057  bucket_sizes_for_dimension[i] = bucket_sizes_for_threads[thread_idx][i];
2058  }
2059  }
2060  }
2061 }
+ Here is the caller graph for this function:

◆ compute_bucket_sizes_on_device()

void compute_bucket_sizes_on_device ( double *  bucket_sizes_buffer,
const JoinColumn join_column,
const JoinColumnTypeInfo type_info,
const double  bucket_sz_threshold,
const size_t  block_size_x,
const size_t  grid_size_x 
)

Referenced by OverlapsJoinHashTable::computeBucketSizes().

+ Here is the caller graph for this function:

◆ fill_baseline_hash_join_buff_32()

int fill_baseline_hash_join_buff_32 ( int8_t *  hash_buff,
const int64_t  entry_count,
const int32_t  invalid_slot_val,
const size_t  key_component_count,
const bool  with_val_slot,
const GenericKeyHandler key_handler,
const int64_t  num_elems,
const int32_t  cpu_thread_idx,
const int32_t  cpu_thread_count 
)

Definition at line 1647 of file HashJoinRuntime.cpp.

Referenced by BaselineJoinHashTable::initHashTableOnCpu().

1655  {
1656  return fill_baseline_hash_join_buff<int32_t>(hash_buff,
1657  entry_count,
1658  invalid_slot_val,
1659  key_component_count,
1660  with_val_slot,
1661  key_handler,
1662  num_elems,
1663  cpu_thread_idx,
1664  cpu_thread_count);
1665 }
+ Here is the caller graph for this function:

◆ fill_baseline_hash_join_buff_64()

int fill_baseline_hash_join_buff_64 ( int8_t *  hash_buff,
const int64_t  entry_count,
const int32_t  invalid_slot_val,
const size_t  key_component_count,
const bool  with_val_slot,
const GenericKeyHandler key_handler,
const int64_t  num_elems,
const int32_t  cpu_thread_idx,
const int32_t  cpu_thread_count 
)

Definition at line 1687 of file HashJoinRuntime.cpp.

Referenced by BaselineJoinHashTable::initHashTableOnCpu().

1695  {
1696  return fill_baseline_hash_join_buff<int64_t>(hash_buff,
1697  entry_count,
1698  invalid_slot_val,
1699  key_component_count,
1700  with_val_slot,
1701  key_handler,
1702  num_elems,
1703  cpu_thread_idx,
1704  cpu_thread_count);
1705 }
+ Here is the caller graph for this function:

◆ fill_baseline_hash_join_buff_on_device_32()

void fill_baseline_hash_join_buff_on_device_32 ( int8_t *  hash_buff,
const int64_t  entry_count,
const int32_t  invalid_slot_val,
const size_t  key_component_count,
const bool  with_val_slot,
int *  dev_err_buff,
const GenericKeyHandler key_handler,
const int64_t  num_elems,
const size_t  block_size_x,
const size_t  grid_size_x 
)

Referenced by BaselineJoinHashTable::initHashTableOnGpu().

+ Here is the caller graph for this function:

◆ fill_baseline_hash_join_buff_on_device_64()

void fill_baseline_hash_join_buff_on_device_64 ( int8_t *  hash_buff,
const int64_t  entry_count,
const int32_t  invalid_slot_val,
const size_t  key_component_count,
const bool  with_val_slot,
int *  dev_err_buff,
const GenericKeyHandler key_handler,
const int64_t  num_elems,
const size_t  block_size_x,
const size_t  grid_size_x 
)

Referenced by BaselineJoinHashTable::initHashTableOnGpu().

+ Here is the caller graph for this function:

◆ fill_hash_join_buff()

int fill_hash_join_buff ( int32_t *  buff,
const int32_t  invalid_slot_val,
const JoinColumn  join_column,
const JoinColumnTypeInfo  type_info,
const void *  sd_inner,
const void *  sd_outer,
const int32_t  cpu_thread_idx,
const int32_t  cpu_thread_count 
)

Definition at line 188 of file HashJoinRuntime.cpp.

References fill_hash_join_buff_impl(), get_hash_slot(), and SUFFIX.

195  {
196  auto slot_selector = [&](auto elem) {
197  return SUFFIX(get_hash_slot)(buff, elem, type_info.min_val);
198  };
199  return fill_hash_join_buff_impl(buff,
200  invalid_slot_val,
201  join_column,
202  type_info,
203  sd_inner_proxy,
204  sd_outer_proxy,
205  cpu_thread_idx,
206  cpu_thread_count,
207  slot_selector);
208 }
#define SUFFIX(name)
ALWAYS_INLINE DEVICE int32_t *SUFFIX() get_hash_slot(int32_t *buff, const int64_t key, const int64_t min_key)
Definition: JoinHashImpl.h:39
DEVICE auto fill_hash_join_buff_impl(int32_t *buff, const int32_t invalid_slot_val, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const void *sd_inner_proxy, const void *sd_outer_proxy, const int32_t cpu_thread_idx, const int32_t cpu_thread_count, SLOT_SELECTOR slot_sel)
const int64_t min_val
+ Here is the call graph for this function:

◆ fill_hash_join_buff_bucketized()

int fill_hash_join_buff_bucketized ( int32_t *  buff,
const int32_t  invalid_slot_val,
const JoinColumn  join_column,
const JoinColumnTypeInfo  type_info,
const void *  sd_inner,
const void *  sd_outer,
const int32_t  cpu_thread_idx,
const int32_t  cpu_thread_count,
const int64_t  bucket_normalization 
)

Definition at line 164 of file HashJoinRuntime.cpp.

References fill_hash_join_buff_impl(), get_bucketized_hash_slot(), and SUFFIX.

Referenced by JoinHashTable::initOneToOneHashTableOnCpu().

172  {
173  auto slot_selector = [&](auto elem) {
175  buff, elem, type_info.min_val, bucket_normalization);
176  };
177  return fill_hash_join_buff_impl(buff,
178  invalid_slot_val,
179  join_column,
180  type_info,
181  sd_inner_proxy,
182  sd_outer_proxy,
183  cpu_thread_idx,
184  cpu_thread_count,
185  slot_selector);
186 }
ALWAYS_INLINE DEVICE int32_t *SUFFIX() get_bucketized_hash_slot(int32_t *buff, const int64_t key, const int64_t min_key, const int64_t bucket_normalization)
Definition: JoinHashImpl.h:31
#define SUFFIX(name)
DEVICE auto fill_hash_join_buff_impl(int32_t *buff, const int32_t invalid_slot_val, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const void *sd_inner_proxy, const void *sd_outer_proxy, const int32_t cpu_thread_idx, const int32_t cpu_thread_count, SLOT_SELECTOR slot_sel)
const int64_t min_val
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ fill_hash_join_buff_on_device()

void fill_hash_join_buff_on_device ( int32_t *  buff,
const int32_t  invalid_slot_val,
int *  dev_err_buff,
const JoinColumn  join_column,
const JoinColumnTypeInfo  type_info,
const size_t  block_size_x,
const size_t  grid_size_x 
)

◆ fill_hash_join_buff_on_device_bucketized()

void fill_hash_join_buff_on_device_bucketized ( int32_t *  buff,
const int32_t  invalid_slot_val,
int *  dev_err_buff,
const JoinColumn  join_column,
const JoinColumnTypeInfo  type_info,
const size_t  block_size_x,
const size_t  grid_size_x,
const int64_t  bucket_normalization 
)

Referenced by JoinHashTable::initOneToOneHashTable().

+ Here is the caller graph for this function:

◆ fill_hash_join_buff_on_device_sharded()

void fill_hash_join_buff_on_device_sharded ( int32_t *  buff,
const int32_t  invalid_slot_val,
int *  dev_err_buff,
const JoinColumn  join_column,
const JoinColumnTypeInfo  type_info,
const ShardInfo  shard_info,
const size_t  block_size_x,
const size_t  grid_size_x 
)

◆ fill_hash_join_buff_on_device_sharded_bucketized()

void fill_hash_join_buff_on_device_sharded_bucketized ( int32_t *  buff,
const int32_t  invalid_slot_val,
int *  dev_err_buff,
const JoinColumn  join_column,
const JoinColumnTypeInfo  type_info,
const ShardInfo  shard_info,
const size_t  block_size_x,
const size_t  grid_size_x,
const int64_t  bucket_normalization 
)

Referenced by JoinHashTable::initOneToOneHashTable().

+ Here is the caller graph for this function:

◆ fill_one_to_many_baseline_hash_table_32()

void fill_one_to_many_baseline_hash_table_32 ( int32_t *  buff,
const int32_t *  composite_key_dict,
const int64_t  hash_entry_count,
const int32_t  invalid_slot_val,
const size_t  key_component_count,
const std::vector< JoinColumn > &  join_column_per_key,
const std::vector< JoinColumnTypeInfo > &  type_info_per_key,
const std::vector< JoinBucketInfo > &  join_bucket_info,
const std::vector< const void *> &  sd_inner_proxy_per_key,
const std::vector< const void *> &  sd_outer_proxy_per_key,
const int32_t  cpu_thread_count 
)

Definition at line 1889 of file HashJoinRuntime.cpp.

Referenced by OverlapsJoinHashTable::initHashTableOnCpu(), and BaselineJoinHashTable::initHashTableOnCpu().

1900  {
1901  fill_one_to_many_baseline_hash_table<int32_t>(buff,
1902  composite_key_dict,
1903  hash_entry_count,
1904  invalid_slot_val,
1905  key_component_count,
1906  join_column_per_key,
1907  type_info_per_key,
1908  join_bucket_info,
1909  sd_inner_proxy_per_key,
1910  sd_outer_proxy_per_key,
1911  cpu_thread_count);
1912 }
+ Here is the caller graph for this function:

◆ fill_one_to_many_baseline_hash_table_64()

void fill_one_to_many_baseline_hash_table_64 ( int32_t *  buff,
const int64_t *  composite_key_dict,
const int64_t  hash_entry_count,
const int32_t  invalid_slot_val,
const size_t  key_component_count,
const std::vector< JoinColumn > &  join_column_per_key,
const std::vector< JoinColumnTypeInfo > &  type_info_per_key,
const std::vector< JoinBucketInfo > &  join_bucket_info,
const std::vector< const void *> &  sd_inner_proxy_per_key,
const std::vector< const void *> &  sd_outer_proxy_per_key,
const int32_t  cpu_thread_count 
)

Definition at line 1914 of file HashJoinRuntime.cpp.

Referenced by OverlapsJoinHashTable::initHashTableOnCpu(), and BaselineJoinHashTable::initHashTableOnCpu().

1925  {
1926  fill_one_to_many_baseline_hash_table<int64_t>(buff,
1927  composite_key_dict,
1928  hash_entry_count,
1929  invalid_slot_val,
1930  key_component_count,
1931  join_column_per_key,
1932  type_info_per_key,
1933  join_bucket_info,
1934  sd_inner_proxy_per_key,
1935  sd_outer_proxy_per_key,
1936  cpu_thread_count);
1937 }
+ Here is the caller graph for this function:

◆ fill_one_to_many_baseline_hash_table_on_device_32()

void fill_one_to_many_baseline_hash_table_on_device_32 ( int32_t *  buff,
const int32_t *  composite_key_dict,
const int64_t  hash_entry_count,
const int32_t  invalid_slot_val,
const size_t  key_component_count,
const GenericKeyHandler key_handler,
const int64_t  num_elems,
const size_t  block_size_x,
const size_t  grid_size_x 
)

Referenced by BaselineJoinHashTable::initHashTableOnGpu().

+ Here is the caller graph for this function:

◆ fill_one_to_many_baseline_hash_table_on_device_64()

void fill_one_to_many_baseline_hash_table_on_device_64 ( int32_t *  buff,
const int64_t *  composite_key_dict,
const int64_t  hash_entry_count,
const int32_t  invalid_slot_val,
const GenericKeyHandler key_handler,
const int64_t  num_elems,
const size_t  block_size_x,
const size_t  grid_size_x 
)

Referenced by BaselineJoinHashTable::initHashTableOnGpu().

+ Here is the caller graph for this function:

◆ fill_one_to_many_hash_table()

void fill_one_to_many_hash_table ( int32_t *  buff,
const HashEntryInfo  hash_entry_info,
const int32_t  invalid_slot_val,
const JoinColumn join_column,
const JoinColumnTypeInfo type_info,
const void *  sd_inner_proxy,
const void *  sd_outer_proxy,
const unsigned  cpu_thread_count 
)

Definition at line 1356 of file HashJoinRuntime.cpp.

References count_matches(), fill_one_to_many_hash_table_impl(), fill_row_ids(), HashEntryInfo::hash_entry_count, and SUFFIX.

Referenced by JoinHashTable::initOneToManyHashTableOnCpu().

1363  {
1364  auto launch_count_matches = [count_buff = buff + hash_entry_info.hash_entry_count,
1365  invalid_slot_val,
1366  &join_column,
1367  &type_info,
1368  sd_inner_proxy,
1369  sd_outer_proxy](auto cpu_thread_idx,
1370  auto cpu_thread_count) {
1372  (count_buff,
1373  invalid_slot_val,
1374  join_column,
1375  type_info,
1376  sd_inner_proxy,
1377  sd_outer_proxy,
1378  cpu_thread_idx,
1379  cpu_thread_count);
1380  };
1381  auto launch_fill_row_ids = [hash_entry_count = hash_entry_info.hash_entry_count,
1382  buff,
1383  invalid_slot_val,
1384  &join_column,
1385  &type_info,
1386  sd_inner_proxy,
1387  sd_outer_proxy](auto cpu_thread_idx,
1388  auto cpu_thread_count) {
1390  (buff,
1391  hash_entry_count,
1392  invalid_slot_val,
1393  join_column,
1394  type_info,
1395  sd_inner_proxy,
1396  sd_outer_proxy,
1397  cpu_thread_idx,
1398  cpu_thread_count);
1399  };
1400 
1402  hash_entry_info.hash_entry_count,
1403  invalid_slot_val,
1404  join_column,
1405  type_info,
1406  sd_inner_proxy,
1407  sd_outer_proxy,
1408  cpu_thread_count,
1409  launch_count_matches,
1410  launch_fill_row_ids);
1411 }
#define SUFFIX(name)
void fill_one_to_many_hash_table_impl(int32_t *buff, const int64_t hash_entry_count, const int32_t invalid_slot_val, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info, const void *sd_inner_proxy, const void *sd_outer_proxy, const unsigned cpu_thread_count, COUNT_MATCHES_LAUNCH_FUNCTOR count_matches_func, FILL_ROW_IDS_LAUNCH_FUNCTOR fill_row_ids_func)
size_t hash_entry_count
GLOBAL void SUFFIX() count_matches(int32_t *count_buff, const int32_t invalid_slot_val, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const void *sd_inner_proxy, const void *sd_outer_proxy, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
GLOBAL void SUFFIX() fill_row_ids(int32_t *buff, const int64_t hash_entry_count, const int32_t invalid_slot_val, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const void *sd_inner_proxy, const void *sd_outer_proxy, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ fill_one_to_many_hash_table_bucketized()

void fill_one_to_many_hash_table_bucketized ( int32_t *  buff,
const HashEntryInfo  hash_entry_info,
const int32_t  invalid_slot_val,
const JoinColumn join_column,
const JoinColumnTypeInfo type_info,
const void *  sd_inner_proxy,
const void *  sd_outer_proxy,
const unsigned  cpu_thread_count 
)

Definition at line 1413 of file HashJoinRuntime.cpp.

References HashEntryInfo::bucket_normalization, count_matches_bucketized(), fill_one_to_many_hash_table_impl(), fill_row_ids_bucketized(), HashEntryInfo::getNormalizedHashEntryCount(), and SUFFIX.

Referenced by JoinHashTable::initOneToManyHashTableOnCpu().

1420  {
1421  auto bucket_normalization = hash_entry_info.bucket_normalization;
1422  auto hash_entry_count = hash_entry_info.getNormalizedHashEntryCount();
1423  auto launch_count_matches = [bucket_normalization,
1424  count_buff = buff + hash_entry_count,
1425  invalid_slot_val,
1426  &join_column,
1427  &type_info,
1428  sd_inner_proxy,
1429  sd_outer_proxy](auto cpu_thread_idx,
1430  auto cpu_thread_count) {
1432  (count_buff,
1433  invalid_slot_val,
1434  join_column,
1435  type_info,
1436  sd_inner_proxy,
1437  sd_outer_proxy,
1438  cpu_thread_idx,
1439  cpu_thread_count,
1440  bucket_normalization);
1441  };
1442  auto launch_fill_row_ids = [bucket_normalization,
1443  hash_entry_count,
1444  buff,
1445  invalid_slot_val,
1446  &join_column,
1447  &type_info,
1448  sd_inner_proxy,
1449  sd_outer_proxy](auto cpu_thread_idx,
1450  auto cpu_thread_count) {
1452  (buff,
1453  hash_entry_count,
1454  invalid_slot_val,
1455  join_column,
1456  type_info,
1457  sd_inner_proxy,
1458  sd_outer_proxy,
1459  cpu_thread_idx,
1460  cpu_thread_count,
1461  bucket_normalization);
1462  };
1463 
1465  hash_entry_count,
1466  invalid_slot_val,
1467  join_column,
1468  type_info,
1469  sd_inner_proxy,
1470  sd_outer_proxy,
1471  cpu_thread_count,
1472  launch_count_matches,
1473  launch_fill_row_ids);
1474 }
GLOBAL void SUFFIX() count_matches_bucketized(int32_t *count_buff, const int32_t invalid_slot_val, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const void *sd_inner_proxy, const void *sd_outer_proxy, const int32_t cpu_thread_idx, const int32_t cpu_thread_count, const int64_t bucket_normalization)
size_t getNormalizedHashEntryCount() const
#define SUFFIX(name)
void fill_one_to_many_hash_table_impl(int32_t *buff, const int64_t hash_entry_count, const int32_t invalid_slot_val, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info, const void *sd_inner_proxy, const void *sd_outer_proxy, const unsigned cpu_thread_count, COUNT_MATCHES_LAUNCH_FUNCTOR count_matches_func, FILL_ROW_IDS_LAUNCH_FUNCTOR fill_row_ids_func)
int64_t bucket_normalization
GLOBAL void SUFFIX() fill_row_ids_bucketized(int32_t *buff, const int64_t hash_entry_count, const int32_t invalid_slot_val, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const void *sd_inner_proxy, const void *sd_outer_proxy, const int32_t cpu_thread_idx, const int32_t cpu_thread_count, const int64_t bucket_normalization)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ fill_one_to_many_hash_table_on_device()

void fill_one_to_many_hash_table_on_device ( int32_t *  buff,
const HashEntryInfo  hash_entry_info,
const int32_t  invalid_slot_val,
const JoinColumn join_column,
const JoinColumnTypeInfo type_info,
const size_t  block_size_x,
const size_t  grid_size_x 
)

Referenced by JoinHashTable::initOneToManyHashTable().

+ Here is the caller graph for this function:

◆ fill_one_to_many_hash_table_on_device_bucketized()

void fill_one_to_many_hash_table_on_device_bucketized ( int32_t *  buff,
const HashEntryInfo  hash_entry_info,
const int32_t  invalid_slot_val,
const JoinColumn join_column,
const JoinColumnTypeInfo type_info,
const size_t  block_size_x,
const size_t  grid_size_x 
)

Referenced by JoinHashTable::initOneToManyHashTable().

+ Here is the caller graph for this function:

◆ fill_one_to_many_hash_table_on_device_sharded()

void fill_one_to_many_hash_table_on_device_sharded ( int32_t *  buff,
const HashEntryInfo  hash_entry_info,
const int32_t  invalid_slot_val,
const JoinColumn join_column,
const JoinColumnTypeInfo type_info,
const ShardInfo shard_info,
const size_t  block_size_x,
const size_t  grid_size_x 
)

Referenced by JoinHashTable::initOneToManyHashTable().

+ Here is the caller graph for this function:

◆ fill_one_to_many_hash_table_sharded_bucketized()

void fill_one_to_many_hash_table_sharded_bucketized ( int32_t *  buff,
const HashEntryInfo  hash_entry_info,
const int32_t  invalid_slot_val,
const JoinColumn join_column,
const JoinColumnTypeInfo type_info,
const ShardInfo shard_info,
const void *  sd_inner_proxy,
const void *  sd_outer_proxy,
const unsigned  cpu_thread_count 
)

◆ get_join_column_type_kind()

ColumnType get_join_column_type_kind ( const SQLTypeInfo ti)
inline

Definition at line 133 of file HashJoinRuntime.h.

References SQLTypeInfo::is_date_in_days(), is_unsigned_type(), Signed, SmallDate, and Unsigned.

Referenced by OverlapsJoinHashTable::fetchColumnsForDevice(), BaselineJoinHashTable::fetchColumnsForDevice(), JoinHashTable::initOneToManyHashTable(), JoinHashTable::initOneToManyHashTableOnCpu(), JoinHashTable::initOneToOneHashTable(), and JoinHashTable::initOneToOneHashTableOnCpu().

133  {
134  if (ti.is_date_in_days()) {
135  return SmallDate;
136  } else {
137  return is_unsigned_type(ti) ? Unsigned : Signed;
138  }
139 }
bool is_unsigned_type(const SQLTypeInfo &ti)
bool is_date_in_days() const
Definition: sqltypes.h:632
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ init_baseline_hash_join_buff_32()

void init_baseline_hash_join_buff_32 ( int8_t *  hash_join_buff,
const int64_t  entry_count,
const size_t  key_component_count,
const bool  with_val_slot,
const int32_t  invalid_slot_val,
const int32_t  cpu_thread_idx,
const int32_t  cpu_thread_count 
)

Definition at line 1615 of file HashJoinRuntime.cpp.

Referenced by OverlapsJoinHashTable::initHashTableOnCpu(), and BaselineJoinHashTable::initHashTableOnCpu().

1621  {
1622  init_baseline_hash_join_buff<int32_t>(hash_join_buff,
1623  entry_count,
1624  key_component_count,
1625  with_val_slot,
1626  invalid_slot_val,
1627  cpu_thread_idx,
1628  cpu_thread_count);
1629 }
+ Here is the caller graph for this function:

◆ init_baseline_hash_join_buff_64()

void init_baseline_hash_join_buff_64 ( int8_t *  hash_join_buff,
const int64_t  entry_count,
const size_t  key_component_count,
const bool  with_val_slot,
const int32_t  invalid_slot_val,
const int32_t  cpu_thread_idx,
const int32_t  cpu_thread_count 
)

Definition at line 1631 of file HashJoinRuntime.cpp.

Referenced by OverlapsJoinHashTable::initHashTableOnCpu(), and BaselineJoinHashTable::initHashTableOnCpu().

1637  {
1638  init_baseline_hash_join_buff<int64_t>(hash_join_buff,
1639  entry_count,
1640  key_component_count,
1641  with_val_slot,
1642  invalid_slot_val,
1643  cpu_thread_idx,
1644  cpu_thread_count);
1645 }
+ Here is the caller graph for this function:

◆ init_baseline_hash_join_buff_on_device_32()

void init_baseline_hash_join_buff_on_device_32 ( int8_t *  hash_join_buff,
const int64_t  entry_count,
const size_t  key_component_count,
const bool  with_val_slot,
const int32_t  invalid_slot_val,
const size_t  block_size_x,
const size_t  grid_size_x 
)

Referenced by OverlapsJoinHashTable::initHashTableOnGpu(), and BaselineJoinHashTable::initHashTableOnGpu().

+ Here is the caller graph for this function:

◆ init_baseline_hash_join_buff_on_device_64()

void init_baseline_hash_join_buff_on_device_64 ( int8_t *  hash_join_buff,
const int64_t  entry_count,
const size_t  key_component_count,
const bool  with_val_slot,
const int32_t  invalid_slot_val,
const size_t  block_size_x,
const size_t  grid_size_x 
)

Referenced by OverlapsJoinHashTable::initHashTableOnGpu(), and BaselineJoinHashTable::initHashTableOnGpu().

+ Here is the caller graph for this function:

◆ init_hash_join_buff()

void init_hash_join_buff ( int32_t *  buff,
const int64_t  entry_count,
const int32_t  invalid_slot_val,
const int32_t  cpu_thread_idx,
const int32_t  cpu_thread_count 
)

Definition at line 92 of file HashJoinRuntime.cpp.

Referenced by OverlapsJoinHashTable::initHashTableOnCpu(), BaselineJoinHashTable::initHashTableOnCpu(), JoinHashTable::initOneToManyHashTableOnCpu(), and JoinHashTable::initOneToOneHashTableOnCpu().

96  {
97 #ifdef __CUDACC__
98  int32_t start = threadIdx.x + blockDim.x * blockIdx.x;
99  int32_t step = blockDim.x * gridDim.x;
100 #else
101  int32_t start = cpu_thread_idx;
102  int32_t step = cpu_thread_count;
103 #endif
104  for (int64_t i = start; i < hash_entry_count; i += step) {
105  groups_buffer[i] = invalid_slot_val;
106  }
107 }
+ Here is the caller graph for this function:

◆ init_hash_join_buff_on_device()

void init_hash_join_buff_on_device ( int32_t *  buff,
const int64_t  entry_count,
const int32_t  invalid_slot_val,
const size_t  block_size_x,
const size_t  grid_size_x 
)

Referenced by OverlapsJoinHashTable::initHashTableOnGpu(), BaselineJoinHashTable::initHashTableOnGpu(), JoinHashTable::initOneToManyHashTable(), and JoinHashTable::initOneToOneHashTable().

+ Here is the caller graph for this function:

◆ overlaps_fill_baseline_hash_join_buff_32()

int overlaps_fill_baseline_hash_join_buff_32 ( int8_t *  hash_buff,
const int64_t  entry_count,
const int32_t  invalid_slot_val,
const size_t  key_component_count,
const bool  with_val_slot,
const OverlapsKeyHandler key_handler,
const int64_t  num_elems,
const int32_t  cpu_thread_idx,
const int32_t  cpu_thread_count 
)

Definition at line 1667 of file HashJoinRuntime.cpp.

Referenced by OverlapsJoinHashTable::initHashTableOnCpu().

1675  {
1676  return fill_baseline_hash_join_buff<int32_t>(hash_buff,
1677  entry_count,
1678  invalid_slot_val,
1679  key_component_count,
1680  with_val_slot,
1681  key_handler,
1682  num_elems,
1683  cpu_thread_idx,
1684  cpu_thread_count);
1685 }
+ Here is the caller graph for this function:

◆ overlaps_fill_baseline_hash_join_buff_64()

int overlaps_fill_baseline_hash_join_buff_64 ( int8_t *  hash_buff,
const int64_t  entry_count,
const int32_t  invalid_slot_val,
const size_t  key_component_count,
const bool  with_val_slot,
const OverlapsKeyHandler key_handler,
const int64_t  num_elems,
const int32_t  cpu_thread_idx,
const int32_t  cpu_thread_count 
)

Definition at line 1707 of file HashJoinRuntime.cpp.

Referenced by OverlapsJoinHashTable::initHashTableOnCpu().

1715  {
1716  return fill_baseline_hash_join_buff<int64_t>(hash_buff,
1717  entry_count,
1718  invalid_slot_val,
1719  key_component_count,
1720  with_val_slot,
1721  key_handler,
1722  num_elems,
1723  cpu_thread_idx,
1724  cpu_thread_count);
1725 }
+ Here is the caller graph for this function:

◆ overlaps_fill_baseline_hash_join_buff_on_device_64()

void overlaps_fill_baseline_hash_join_buff_on_device_64 ( int8_t *  hash_buff,
const int64_t  entry_count,
const int32_t  invalid_slot_val,
const size_t  key_component_count,
const bool  with_val_slot,
int *  dev_err_buff,
const OverlapsKeyHandler key_handler,
const int64_t  num_elems,
const size_t  block_size_x,
const size_t  grid_size_x 
)

Referenced by OverlapsJoinHashTable::initHashTableOnGpu().

+ Here is the caller graph for this function:

◆ overlaps_fill_one_to_many_baseline_hash_table_on_device_64()

void overlaps_fill_one_to_many_baseline_hash_table_on_device_64 ( int32_t *  buff,
const int64_t *  composite_key_dict,
const int64_t  hash_entry_count,
const int32_t  invalid_slot_val,
const OverlapsKeyHandler key_handler,
const int64_t  num_elems,
const size_t  block_size_x,
const size_t  grid_size_x 
)

Referenced by OverlapsJoinHashTable::initHashTableOnGpu().

+ Here is the caller graph for this function:

Variable Documentation

◆ g_maximum_conditions_to_coalesce