OmniSciDB  ba1bac9284
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
OverlapsJoinHashTable Class Reference

#include <OverlapsJoinHashTable.h>

+ Inheritance diagram for OverlapsJoinHashTable:
+ Collaboration diagram for OverlapsJoinHashTable:

Public Member Functions

 OverlapsJoinHashTable (const std::shared_ptr< Analyzer::BinOper > condition, const JoinType join_type, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, ColumnCacheMap &column_cache, Executor *executor, const std::vector< InnerOuter > &inner_outer_pairs, const int device_count)
 
virtual ~OverlapsJoinHashTable ()
 
- Public Member Functions inherited from HashJoin
virtual std::string toStringFlat64 (const ExecutorDeviceType device_type, const int device_id) const
 
virtual std::string toStringFlat32 (const ExecutorDeviceType device_type, const int device_id) const
 
JoinColumn fetchJoinColumn (const Analyzer::ColumnVar *hash_col, const std::vector< Fragmenter_Namespace::FragmentInfo > &fragment_info, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks_owner, DeviceAllocator *dev_buff_owner, std::vector< std::shared_ptr< void >> &malloc_owner, Executor *executor, ColumnCacheMap *column_cache)
 
HashTablegetHashTableForDevice (const size_t device_id) const
 
size_t getJoinHashBufferSize (const ExecutorDeviceType device_type)
 
size_t getJoinHashBufferSize (const ExecutorDeviceType device_type, const int device_id) const
 
int64_t getJoinHashBuffer (const ExecutorDeviceType device_type, const int device_id) const
 
void freeHashBufferMemory ()
 

Static Public Member Functions

static std::shared_ptr
< OverlapsJoinHashTable
getInstance (const std::shared_ptr< Analyzer::BinOper > condition, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const RegisteredQueryHint &query_hint)
 Make hash table from an in-flight SQL query's parse tree etc. More...
 
static auto getCacheInvalidator () -> std::function< void()>
 
static size_t getCombinedHashTableCacheSize ()
 
- Static Public Member Functions inherited from HashJoin
static bool layoutRequiresAdditionalBuffers (HashType layout) noexcept
 
static std::string getHashTypeString (HashType ht) noexcept
 
static HashJoinMatchingSet codegenMatchingSet (const std::vector< llvm::Value * > &hash_join_idx_args_in, const bool is_sharded, const bool col_is_nullable, const bool is_bw_eq, const int64_t sub_buff_size, Executor *executor, const bool is_bucketized=false)
 
static llvm::Value * codegenHashTableLoad (const size_t table_idx, Executor *executor)
 
static std::shared_ptr< HashJoingetInstance (const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const RegisteredQueryHint &query_hint)
 Make hash table from an in-flight SQL query's parse tree etc. More...
 
static std::shared_ptr< HashJoingetSyntheticInstance (std::string_view table1, std::string_view column1, std::string_view table2, std::string_view column2, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
 Make hash table from named tables and columns (such as for testing). More...
 
static std::shared_ptr< HashJoingetSyntheticInstance (const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
 Make hash table from named tables and columns (such as for testing). More...
 
static int getInnerTableId (const std::vector< InnerOuter > &inner_outer_pairs)
 
static void checkHashJoinReplicationConstraint (const int table_id, const size_t shard_count, const Executor *executor)
 
static CompositeKeyInfo getCompositeKeyInfo (const std::vector< InnerOuter > &inner_outer_pairs, const Executor *executor)
 

Protected Member Functions

void reify (const HashType preferred_layout)
 
void reifyWithLayout (const HashType layout)
 
virtual void reifyImpl (std::vector< ColumnsForDevice > &columns_per_device, const Fragmenter_Namespace::TableInfo &query_info, const HashType layout, const size_t shard_count, const size_t entry_count, const size_t emitted_keys_count, const bool skip_hashtable_caching, const size_t chosen_max_hashtable_size, const double chosen_bucket_threshold)
 
void reifyForDevice (const ColumnsForDevice &columns_for_device, const HashType layout, const size_t entry_count, const size_t emitted_keys_count, const bool skip_hashtable_caching, const size_t chosen_max_hashtable_size, const double chosen_bucket_threshold, const int device_id, const logger::ThreadId parent_thread_id)
 
size_t calculateHashTableSize (size_t number_of_dimensions, size_t emitted_keys_count, size_t entry_count) const
 
ColumnsForDevice fetchColumnsForDevice (const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, DeviceAllocator *dev_buff_owner)
 
virtual std::pair< size_t, size_t > approximateTupleCount (const std::vector< double > &inverse_bucket_sizes_for_dimension, std::vector< ColumnsForDevice > &, const size_t chosen_max_hashtable_size, const double chosen_bucket_threshold)
 
virtual std::pair< size_t, size_t > computeHashTableCounts (const size_t shard_count, const std::vector< double > &inverse_bucket_sizes_for_dimension, std::vector< ColumnsForDevice > &columns_per_device, const size_t chosen_max_hashtable_size, const double chosen_bucket_threshold)
 
void setInverseBucketSizeInfo (const std::vector< double > &inverse_bucket_sizes, std::vector< ColumnsForDevice > &columns_per_device, const size_t device_count)
 
size_t getKeyComponentWidth () const
 
size_t getKeyComponentCount () const
 
HashType getHashType () const noexceptoverride
 
Data_Namespace::MemoryLevel getMemoryLevel () const noexceptoverride
 
int getDeviceCount () const noexceptoverride
 
std::shared_ptr
< BaselineHashTable
initHashTableOnCpu (const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_bucket_info, const HashType layout, const size_t entry_count, const size_t emitted_keys_count, const bool skip_hashtable_caching, const size_t chosen_max_hashtable_size, const double chosen_bucket_threshold)
 
HashJoinMatchingSet codegenMatchingSet (const CompilationOptions &, const size_t) override
 
std::string toString (const ExecutorDeviceType device_type, const int device_id=0, bool raw=false) const override
 
DecodedJoinHashBufferSet toSet (const ExecutorDeviceType device_type, const int device_id) const override
 
llvm::Value * codegenSlot (const CompilationOptions &, const size_t) override
 
const RegisteredQueryHintgetRegisteredQueryHint ()
 
void registerQueryHint (const RegisteredQueryHint &query_hint)
 

Private Types

using HashTableCacheValue = std::shared_ptr< HashTable >
 
using BucketThreshold = double
 
using BucketSizes = std::vector< double >
 

Private Member Functions

size_t getEntryCount () const
 
size_t getEmittedKeysCount () const
 
size_t getComponentBufferSize () const noexceptoverride
 
size_t shardCount () const
 
Data_Namespace::MemoryLevel getEffectiveMemoryLevel (const std::vector< InnerOuter > &inner_outer_pairs) const
 
int getInnerTableId () const noexceptoverride
 
int getInnerTableRteIdx () const noexceptoverride
 
size_t getKeyBufferSize () const noexcept
 
size_t offsetBufferOff () const noexceptoverride
 
size_t countBufferOff () const noexceptoverride
 
size_t payloadBufferOff () const noexceptoverride
 
std::string getHashJoinType () const final
 
std::shared_ptr< HashTableinitHashTableOnCpuFromCache (const OverlapsHashTableCacheKey &key)
 
std::optional< std::pair
< size_t, size_t > > 
getApproximateTupleCountFromCache (const OverlapsHashTableCacheKey &)
 
void putHashTableOnCpuToCache (const OverlapsHashTableCacheKey &key, std::shared_ptr< HashTable > hash_table)
 
llvm::Value * codegenKey (const CompilationOptions &)
 
std::vector< llvm::Value * > codegenManyKey (const CompilationOptions &)
 

Private Attributes

const std::shared_ptr
< Analyzer::BinOper
condition_
 
const JoinType join_type_
 
const std::vector
< InputTableInfo > & 
query_infos_
 
const Data_Namespace::MemoryLevel memory_level_
 
Executorexecutor_
 
ColumnCacheMapcolumn_cache_
 
std::vector< InnerOuterinner_outer_pairs_
 
const int device_count_
 
std::vector< double > inverse_bucket_sizes_for_dimension_
 
std::optional< HashTypelayout_override_
 
std::mutex cpu_hash_table_buff_mutex_
 
RegisteredQueryHint query_hint_
 

Static Private Attributes

static std::unique_ptr
< OverlapsHashTableCache
< OverlapsHashTableCacheKey,
HashTableCacheValue > > 
hash_table_cache_
 
static std::unique_ptr
< HashTableCache
< OverlapsHashTableCacheKey,
std::pair< BucketThreshold,
BucketSizes > > > 
auto_tuner_cache_
 

Additional Inherited Members

- Protected Attributes inherited from HashJoin
std::vector< std::shared_ptr
< HashTable > > 
hash_tables_for_device_
 

Detailed Description

Definition at line 87 of file OverlapsJoinHashTable.h.

Member Typedef Documentation

using OverlapsJoinHashTable::BucketSizes = std::vector<double>
private

Definition at line 371 of file OverlapsJoinHashTable.h.

Definition at line 370 of file OverlapsJoinHashTable.h.

using OverlapsJoinHashTable::HashTableCacheValue = std::shared_ptr<HashTable>
private

Definition at line 364 of file OverlapsJoinHashTable.h.

Constructor & Destructor Documentation

OverlapsJoinHashTable::OverlapsJoinHashTable ( const std::shared_ptr< Analyzer::BinOper condition,
const JoinType  join_type,
const std::vector< InputTableInfo > &  query_infos,
const Data_Namespace::MemoryLevel  memory_level,
ColumnCacheMap column_cache,
Executor executor,
const std::vector< InnerOuter > &  inner_outer_pairs,
const int  device_count 
)
inline

Definition at line 89 of file OverlapsJoinHashTable.h.

References CHECK_GT, RegisteredQueryHint::defaults(), device_count_, HashJoin::hash_tables_for_device_, and query_hint_.

97  : condition_(condition)
98  , join_type_(join_type)
99  , query_infos_(query_infos)
100  , memory_level_(memory_level)
101  , executor_(executor)
102  , column_cache_(column_cache)
103  , inner_outer_pairs_(inner_outer_pairs)
104  , device_count_(device_count) {
106  hash_tables_for_device_.resize(std::max(device_count_, 1));
108  }
std::vector< std::shared_ptr< HashTable > > hash_tables_for_device_
Definition: HashJoin.h:270
RegisteredQueryHint query_hint_
#define CHECK_GT(x, y)
Definition: Logger.h:218
const std::shared_ptr< Analyzer::BinOper > condition_
ColumnCacheMap & column_cache_
const std::vector< InputTableInfo > & query_infos_
static RegisteredQueryHint defaults()
Definition: QueryHint.h:175
const Data_Namespace::MemoryLevel memory_level_
std::vector< InnerOuter > inner_outer_pairs_

+ Here is the call graph for this function:

virtual OverlapsJoinHashTable::~OverlapsJoinHashTable ( )
inlinevirtual

Definition at line 110 of file OverlapsJoinHashTable.h.

110 {}

Member Function Documentation

std::pair< size_t, size_t > OverlapsJoinHashTable::approximateTupleCount ( const std::vector< double > &  inverse_bucket_sizes_for_dimension,
std::vector< ColumnsForDevice > &  columns_per_device,
const size_t  chosen_max_hashtable_size,
const double  chosen_bucket_threshold 
)
protectedvirtual

Definition at line 878 of file OverlapsJoinHashTable.cpp.

References gpu_enabled::accumulate(), CudaAllocator::alloc(), approximate_distinct_tuples_on_device_overlaps(), approximate_distinct_tuples_overlaps(), Bitmap, CHECK, CHECK_EQ, CHECK_GT, condition_, copy_from_gpu(), copy_to_gpu(), CPU, Data_Namespace::CPU_LEVEL, cpu_threads(), device_count_, executor_, getApproximateTupleCountFromCache(), HashJoin::getCompositeKeyInfo(), getEffectiveMemoryLevel(), GPU, Data_Namespace::GPU_LEVEL, hll_size(), hll_unify(), i, inner_outer_pairs_, transfer_flat_object_to_gpu(), transfer_vector_of_flat_objects_to_gpu(), UNREACHABLE, and VLOG.

Referenced by computeHashTableCounts().

882  {
883  const auto effective_memory_level = getEffectiveMemoryLevel(inner_outer_pairs_);
884  CountDistinctDescriptor count_distinct_desc{
886  0,
887  11,
888  true,
889  effective_memory_level == Data_Namespace::MemoryLevel::GPU_LEVEL
892  1};
893  const auto padded_size_bytes = count_distinct_desc.bitmapPaddedSizeBytes();
894 
895  CHECK(!columns_per_device.empty() && !columns_per_device.front().join_columns.empty());
896  if (columns_per_device.front().join_columns.front().num_elems == 0) {
897  return std::make_pair(0, 0);
898  }
899 
900  // TODO: state management in here should be revisited, but this should be safe enough
901  // for now
902  // re-compute bucket counts per device based on global bucket size
903  for (size_t device_id = 0; device_id < columns_per_device.size(); ++device_id) {
904  auto& columns_for_device = columns_per_device[device_id];
905  columns_for_device.setBucketInfo(inverse_bucket_sizes_for_dimension,
907  }
908 
909  // Number of keys must match dimension of buckets
910  CHECK_EQ(columns_per_device.front().join_columns.size(),
911  columns_per_device.front().join_buckets.size());
912  if (effective_memory_level == Data_Namespace::MemoryLevel::CPU_LEVEL) {
913  // Note that this path assumes each device has the same hash table (for GPU hash join
914  // w/ hash table built on CPU)
915  const auto composite_key_info =
917  OverlapsHashTableCacheKey cache_key{
918  columns_per_device.front().join_columns.front().num_elems,
919  composite_key_info.cache_key_chunks,
920  condition_->get_optype(),
921  chosen_max_hashtable_size,
922  chosen_bucket_threshold,
923  inverse_bucket_sizes_for_dimension};
924  const auto cached_count_info = getApproximateTupleCountFromCache(cache_key);
925  if (cached_count_info) {
926  VLOG(1) << "Using a cached tuple count: " << cached_count_info->first
927  << ", emitted keys count: " << cached_count_info->second;
928  return *cached_count_info;
929  }
930  int thread_count = cpu_threads();
931  std::vector<uint8_t> hll_buffer_all_cpus(thread_count * padded_size_bytes);
932  auto hll_result = &hll_buffer_all_cpus[0];
933 
934  std::vector<int32_t> num_keys_for_row;
935  // TODO(adb): support multi-column overlaps join
936  num_keys_for_row.resize(columns_per_device.front().join_columns[0].num_elems);
937 
939  num_keys_for_row,
940  count_distinct_desc.bitmap_sz_bits,
941  padded_size_bytes,
942  columns_per_device.front().join_columns,
943  columns_per_device.front().join_column_types,
944  columns_per_device.front().join_buckets,
945  thread_count);
946  for (int i = 1; i < thread_count; ++i) {
947  hll_unify(hll_result,
948  hll_result + i * padded_size_bytes,
949  1 << count_distinct_desc.bitmap_sz_bits);
950  }
951  return std::make_pair(
952  hll_size(hll_result, count_distinct_desc.bitmap_sz_bits),
953  static_cast<size_t>(num_keys_for_row.size() > 0 ? num_keys_for_row.back() : 0));
954  }
955 #ifdef HAVE_CUDA
956  auto& data_mgr = executor_->getCatalog()->getDataMgr();
957  std::vector<std::vector<uint8_t>> host_hll_buffers(device_count_);
958  for (auto& host_hll_buffer : host_hll_buffers) {
959  host_hll_buffer.resize(count_distinct_desc.bitmapPaddedSizeBytes());
960  }
961  std::vector<size_t> emitted_keys_count_device_threads(device_count_, 0);
962  std::vector<std::future<void>> approximate_distinct_device_threads;
963  for (int device_id = 0; device_id < device_count_; ++device_id) {
964  approximate_distinct_device_threads.emplace_back(std::async(
965  std::launch::async,
966  [device_id,
967  &columns_per_device,
968  &count_distinct_desc,
969  &data_mgr,
970  &host_hll_buffers,
971  &emitted_keys_count_device_threads] {
972  CudaAllocator allocator(&data_mgr, device_id);
973  auto device_hll_buffer =
974  allocator.alloc(count_distinct_desc.bitmapPaddedSizeBytes());
975  data_mgr.getCudaMgr()->zeroDeviceMem(
976  device_hll_buffer, count_distinct_desc.bitmapPaddedSizeBytes(), device_id);
977  const auto& columns_for_device = columns_per_device[device_id];
978  auto join_columns_gpu = transfer_vector_of_flat_objects_to_gpu(
979  columns_for_device.join_columns, allocator);
980 
981  CHECK_GT(columns_for_device.join_buckets.size(), 0u);
982  const auto& inverse_bucket_sizes_for_dimension =
983  columns_for_device.join_buckets[0].inverse_bucket_sizes_for_dimension;
984  auto inverse_bucket_sizes_gpu =
985  allocator.alloc(inverse_bucket_sizes_for_dimension.size() * sizeof(double));
986  copy_to_gpu(&data_mgr,
987  reinterpret_cast<CUdeviceptr>(inverse_bucket_sizes_gpu),
988  inverse_bucket_sizes_for_dimension.data(),
989  inverse_bucket_sizes_for_dimension.size() * sizeof(double),
990  device_id);
991  const size_t row_counts_buffer_sz =
992  columns_per_device.front().join_columns[0].num_elems * sizeof(int32_t);
993  auto row_counts_buffer = allocator.alloc(row_counts_buffer_sz);
994  data_mgr.getCudaMgr()->zeroDeviceMem(
995  row_counts_buffer, row_counts_buffer_sz, device_id);
996  const auto key_handler =
997  OverlapsKeyHandler(inverse_bucket_sizes_for_dimension.size(),
998  join_columns_gpu,
999  reinterpret_cast<double*>(inverse_bucket_sizes_gpu));
1000  const auto key_handler_gpu =
1001  transfer_flat_object_to_gpu(key_handler, allocator);
1003  reinterpret_cast<uint8_t*>(device_hll_buffer),
1004  count_distinct_desc.bitmap_sz_bits,
1005  reinterpret_cast<int32_t*>(row_counts_buffer),
1006  key_handler_gpu,
1007  columns_for_device.join_columns[0].num_elems);
1008 
1009  auto& host_emitted_keys_count = emitted_keys_count_device_threads[device_id];
1010  copy_from_gpu(&data_mgr,
1011  &host_emitted_keys_count,
1012  reinterpret_cast<CUdeviceptr>(
1013  row_counts_buffer +
1014  (columns_per_device.front().join_columns[0].num_elems - 1) *
1015  sizeof(int32_t)),
1016  sizeof(int32_t),
1017  device_id);
1018 
1019  auto& host_hll_buffer = host_hll_buffers[device_id];
1020  copy_from_gpu(&data_mgr,
1021  &host_hll_buffer[0],
1022  reinterpret_cast<CUdeviceptr>(device_hll_buffer),
1023  count_distinct_desc.bitmapPaddedSizeBytes(),
1024  device_id);
1025  }));
1026  }
1027  for (auto& child : approximate_distinct_device_threads) {
1028  child.get();
1029  }
1030  CHECK_EQ(Data_Namespace::MemoryLevel::GPU_LEVEL, effective_memory_level);
1031  auto& result_hll_buffer = host_hll_buffers.front();
1032  auto hll_result = reinterpret_cast<int32_t*>(&result_hll_buffer[0]);
1033  for (int device_id = 1; device_id < device_count_; ++device_id) {
1034  auto& host_hll_buffer = host_hll_buffers[device_id];
1035  hll_unify(hll_result,
1036  reinterpret_cast<int32_t*>(&host_hll_buffer[0]),
1037  1 << count_distinct_desc.bitmap_sz_bits);
1038  }
1039  const size_t emitted_keys_count =
1040  std::accumulate(emitted_keys_count_device_threads.begin(),
1041  emitted_keys_count_device_threads.end(),
1042  0);
1043  return std::make_pair(hll_size(hll_result, count_distinct_desc.bitmap_sz_bits),
1044  emitted_keys_count);
1045 #else
1046  UNREACHABLE();
1047  return {0, 0};
1048 #endif // HAVE_CUDA
1049 }
#define CHECK_EQ(x, y)
Definition: Logger.h:214
void hll_unify(T1 *lhs, T2 *rhs, const size_t m)
Definition: HyperLogLog.h:109
#define UNREACHABLE()
Definition: Logger.h:250
size_t hll_size(const T *M, const size_t bitmap_sz_bits)
Definition: HyperLogLog.h:90
#define CHECK_GT(x, y)
Definition: Logger.h:218
const std::shared_ptr< Analyzer::BinOper > condition_
std::optional< std::pair< size_t, size_t > > getApproximateTupleCountFromCache(const OverlapsHashTableCacheKey &)
void copy_to_gpu(Data_Namespace::DataMgr *data_mgr, CUdeviceptr dst, const void *src, const size_t num_bytes, const int device_id)
Definition: GpuMemUtils.cpp:30
void copy_from_gpu(Data_Namespace::DataMgr *data_mgr, void *dst, const CUdeviceptr src, const size_t num_bytes, const int device_id)
T * transfer_flat_object_to_gpu(const T &object, CudaAllocator &allocator)
DEVICE auto accumulate(ARGS &&...args)
Definition: gpu_enabled.h:42
static CompositeKeyInfo getCompositeKeyInfo(const std::vector< InnerOuter > &inner_outer_pairs, const Executor *executor)
Definition: HashJoin.cpp:324
void approximate_distinct_tuples_overlaps(uint8_t *hll_buffer_all_cpus, std::vector< int32_t > &row_counts, const uint32_t b, const size_t padded_size_bytes, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_buckets_per_key, const int thread_count)
#define CHECK(condition)
Definition: Logger.h:206
T * transfer_vector_of_flat_objects_to_gpu(const std::vector< T > &vec, CudaAllocator &allocator)
std::vector< InnerOuter > inner_outer_pairs_
Data_Namespace::MemoryLevel getEffectiveMemoryLevel(const std::vector< InnerOuter > &inner_outer_pairs) const
int cpu_threads()
Definition: thread_count.h:24
void approximate_distinct_tuples_on_device_overlaps(uint8_t *hll_buffer, const uint32_t b, int32_t *row_counts_buffer, const OverlapsKeyHandler *key_handler, const int64_t num_elems)
#define VLOG(n)
Definition: Logger.h:300

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t OverlapsJoinHashTable::calculateHashTableSize ( size_t  number_of_dimensions,
size_t  emitted_keys_count,
size_t  entry_count 
) const
protected

Definition at line 806 of file OverlapsJoinHashTable.cpp.

References getKeyComponentWidth().

Referenced by reifyWithLayout().

808  {
809  const auto key_component_width = getKeyComponentWidth();
810  const auto key_component_count = number_of_dimensions;
811  const auto entry_size = key_component_count * key_component_width;
812  const auto keys_for_all_rows = emitted_keys_count;
813  const size_t one_to_many_hash_entries = 2 * entry_count + keys_for_all_rows;
814  const size_t hash_table_size =
815  entry_size * entry_count + one_to_many_hash_entries * sizeof(int32_t);
816  return hash_table_size;
817 }

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * OverlapsJoinHashTable::codegenKey ( const CompilationOptions co)
private

Definition at line 1362 of file OverlapsJoinHashTable.cpp.

References AUTOMATIC_IR_METADATA, CodeGenerator::castArrayPointer(), CHECK, CHECK_EQ, CodeGenerator::codegen(), executor_, getKeyComponentCount(), getKeyComponentWidth(), inner_outer_pairs_, inverse_bucket_sizes_for_dimension_, kPOINT, kTINYINT, LL_BUILDER, LL_CONTEXT, LL_INT, and CodeGenerator::posArg().

Referenced by codegenMatchingSet().

1362  {
1363  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1364  const auto key_component_width = getKeyComponentWidth();
1365  CHECK(key_component_width == 4 || key_component_width == 8);
1366  const auto key_size_lv = LL_INT(getKeyComponentCount() * key_component_width);
1367  llvm::Value* key_buff_lv{nullptr};
1368  switch (key_component_width) {
1369  case 4:
1370  key_buff_lv =
1371  LL_BUILDER.CreateAlloca(llvm::Type::getInt32Ty(LL_CONTEXT), key_size_lv);
1372  break;
1373  case 8:
1374  key_buff_lv =
1375  LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT), key_size_lv);
1376  break;
1377  default:
1378  CHECK(false);
1379  }
1380 
1381  const auto& inner_outer_pair = inner_outer_pairs_[0];
1382  const auto outer_geo = inner_outer_pair.second;
1383  const auto outer_geo_ti = outer_geo->get_type_info();
1384 
1385  llvm::Value* arr_ptr = nullptr;
1386  CodeGenerator code_generator(executor_);
1387  CHECK_EQ(inverse_bucket_sizes_for_dimension_.size(), static_cast<size_t>(2));
1388 
1389  if (outer_geo_ti.is_geometry()) {
1390  // TODO(adb): for points we will use the coords array, but for other geometries we
1391  // will need to use the bounding box. For now only support points.
1392  CHECK_EQ(outer_geo_ti.get_type(), kPOINT);
1393 
1394  if (const auto outer_geo_col = dynamic_cast<const Analyzer::ColumnVar*>(outer_geo)) {
1395  const auto outer_geo_col_lvs = code_generator.codegen(outer_geo_col, true, co);
1396  CHECK_EQ(outer_geo_col_lvs.size(), size_t(1));
1397  const auto coords_cd = executor_->getCatalog()->getMetadataForColumn(
1398  outer_geo_col->get_table_id(), outer_geo_col->get_column_id() + 1);
1399  CHECK(coords_cd);
1400 
1401  const auto array_ptr = executor_->cgen_state_->emitExternalCall(
1402  "array_buff",
1403  llvm::Type::getInt8PtrTy(executor_->cgen_state_->context_),
1404  {outer_geo_col_lvs.front(), code_generator.posArg(outer_geo_col)});
1405  CHECK(coords_cd->columnType.get_elem_type().get_type() == kTINYINT)
1406  << "Only TINYINT coordinates columns are supported in geo overlaps hash join.";
1407  arr_ptr = code_generator.castArrayPointer(array_ptr,
1408  coords_cd->columnType.get_elem_type());
1409  }
1410  } else if (outer_geo_ti.is_fixlen_array()) {
1411  // Process dynamically constructed points
1412  const auto outer_geo_cast_coord_array =
1413  dynamic_cast<const Analyzer::UOper*>(outer_geo);
1414  CHECK_EQ(outer_geo_cast_coord_array->get_optype(), kCAST);
1415  const auto outer_geo_coord_array = dynamic_cast<const Analyzer::ArrayExpr*>(
1416  outer_geo_cast_coord_array->get_operand());
1417  CHECK(outer_geo_coord_array);
1418  CHECK(outer_geo_coord_array->isLocalAlloc());
1419  CHECK_EQ(outer_geo_coord_array->getElementCount(), 2);
1420  auto elem_size = (outer_geo_ti.get_compression() == kENCODING_GEOINT)
1421  ? sizeof(int32_t)
1422  : sizeof(double);
1423  CHECK_EQ(outer_geo_ti.get_size(), int(2 * elem_size));
1424  const auto outer_geo_constructed_lvs = code_generator.codegen(outer_geo, true, co);
1425  // CHECK_EQ(outer_geo_constructed_lvs.size(), size_t(2)); // Pointer and size
1426  const auto array_ptr = outer_geo_constructed_lvs.front(); // Just need the pointer
1427  arr_ptr = LL_BUILDER.CreateGEP(array_ptr, LL_INT(0));
1428  arr_ptr = code_generator.castArrayPointer(array_ptr, SQLTypeInfo(kTINYINT, true));
1429  }
1430  if (!arr_ptr) {
1431  LOG(FATAL) << "Overlaps key currently only supported for geospatial columns and "
1432  "constructed points.";
1433  }
1434 
1435  for (size_t i = 0; i < 2; i++) {
1436  const auto key_comp_dest_lv = LL_BUILDER.CreateGEP(key_buff_lv, LL_INT(i));
1437 
1438  // Note that get_bucket_key_for_range_compressed will need to be specialized for
1439  // future compression schemes
1440  auto bucket_key =
1441  outer_geo_ti.get_compression() == kENCODING_GEOINT
1442  ? executor_->cgen_state_->emitExternalCall(
1443  "get_bucket_key_for_range_compressed",
1444  get_int_type(64, LL_CONTEXT),
1446  : executor_->cgen_state_->emitExternalCall(
1447  "get_bucket_key_for_range_double",
1448  get_int_type(64, LL_CONTEXT),
1450  const auto col_lv = LL_BUILDER.CreateSExt(
1451  bucket_key, get_int_type(key_component_width * 8, LL_CONTEXT));
1452  LL_BUILDER.CreateStore(col_lv, key_comp_dest_lv);
1453  }
1454  return key_buff_lv;
1455 }
#define CHECK_EQ(x, y)
Definition: Logger.h:214
#define LOG(tag)
Definition: Logger.h:200
#define LL_FP(v)
Definition: sqldefs.h:49
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define LL_BUILDER
#define AUTOMATIC_IR_METADATA(CGENSTATE)
std::vector< double > inverse_bucket_sizes_for_dimension_
#define LL_INT(v)
#define CHECK(condition)
Definition: Logger.h:206
std::vector< InnerOuter > inner_outer_pairs_
#define LL_CONTEXT

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector< llvm::Value * > OverlapsJoinHashTable::codegenManyKey ( const CompilationOptions co)
private

Definition at line 1457 of file OverlapsJoinHashTable.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CHECK_EQ, CodeGenerator::codegen(), executor_, HashJoin::getHashTableForDevice(), getHashType(), getKeyComponentWidth(), inner_outer_pairs_, ManyToMany, CodeGenerator::posArg(), and VLOG.

Referenced by codegenMatchingSet().

1458  {
1459  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1460  const auto key_component_width = getKeyComponentWidth();
1461  CHECK(key_component_width == 4 || key_component_width == 8);
1462  auto hash_table = getHashTableForDevice(size_t(0));
1463  CHECK(hash_table);
1465 
1466  VLOG(1) << "Performing codgen for ManyToMany";
1467  const auto& inner_outer_pair = inner_outer_pairs_[0];
1468  const auto outer_col = inner_outer_pair.second;
1469 
1470  CodeGenerator code_generator(executor_);
1471  const auto col_lvs = code_generator.codegen(outer_col, true, co);
1472  CHECK_EQ(col_lvs.size(), size_t(1));
1473 
1474  const auto outer_col_var = dynamic_cast<const Analyzer::ColumnVar*>(outer_col);
1475  CHECK(outer_col_var);
1476  const auto coords_cd = executor_->getCatalog()->getMetadataForColumn(
1477  outer_col_var->get_table_id(), outer_col_var->get_column_id());
1478  CHECK(coords_cd);
1479 
1480  const auto array_ptr = executor_->cgen_state_->emitExternalCall(
1481  "array_buff",
1482  llvm::Type::getInt8PtrTy(executor_->cgen_state_->context_),
1483  {col_lvs.front(), code_generator.posArg(outer_col)});
1484 
1485  // TODO(jclay): this seems to cast to double, and causes the GPU build to fail.
1486  // const auto arr_ptr =
1487  // code_generator.castArrayPointer(array_ptr,
1488  // coords_cd->columnType.get_elem_type());
1489  array_ptr->setName("array_ptr");
1490 
1491  auto num_keys_lv = executor_->cgen_state_->emitExternalCall(
1492  "get_num_buckets_for_bounds",
1493  get_int_type(32, LL_CONTEXT),
1494  {array_ptr,
1495  LL_INT(0),
1498  num_keys_lv->setName("num_keys_lv");
1499 
1500  return {num_keys_lv, array_ptr};
1501 }
#define CHECK_EQ(x, y)
Definition: Logger.h:214
#define LL_FP(v)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
HashType getHashType() const noexceptoverride
#define AUTOMATIC_IR_METADATA(CGENSTATE)
HashTable * getHashTableForDevice(const size_t device_id) const
Definition: HashJoin.h:215
std::vector< double > inverse_bucket_sizes_for_dimension_
#define LL_INT(v)
#define CHECK(condition)
Definition: Logger.h:206
std::vector< InnerOuter > inner_outer_pairs_
#define LL_CONTEXT
#define VLOG(n)
Definition: Logger.h:300

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

HashJoinMatchingSet OverlapsJoinHashTable::codegenMatchingSet ( const CompilationOptions co,
const size_t  index 
)
overrideprotectedvirtual

Implements HashJoin.

Definition at line 1503 of file OverlapsJoinHashTable.cpp.

References AUTOMATIC_IR_METADATA, CHECK, HashJoin::codegenHashTableLoad(), codegenKey(), codegenManyKey(), HashJoin::codegenMatchingSet(), executor_, get_int_array_type(), get_int_type(), getComponentBufferSize(), getEntryCount(), getHashType(), getKeyComponentCount(), getKeyComponentWidth(), inverse_bucket_sizes_for_dimension_, LL_BUILDER, LL_CONTEXT, LL_FP, LL_INT, ManyToMany, offsetBufferOff(), OneToMany, to_string(), UNREACHABLE, and VLOG.

1505  {
1506  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1507  if (getHashType() == HashType::ManyToMany) {
1508  VLOG(1) << "Building codegenMatchingSet for ManyToMany";
1509  const auto key_component_width = getKeyComponentWidth();
1510  CHECK(key_component_width == 4 || key_component_width == 8);
1511  auto many_to_many_args = codegenManyKey(co);
1512  auto hash_ptr = HashJoin::codegenHashTableLoad(index, executor_);
1513  const auto composite_dict_ptr_type =
1514  llvm::Type::getIntNPtrTy(LL_CONTEXT, key_component_width * 8);
1515  const auto composite_key_dict =
1516  hash_ptr->getType()->isPointerTy()
1517  ? LL_BUILDER.CreatePointerCast(hash_ptr, composite_dict_ptr_type)
1518  : LL_BUILDER.CreateIntToPtr(hash_ptr, composite_dict_ptr_type);
1519  const auto key_component_count = getKeyComponentCount();
1520 
1521  auto one_to_many_ptr = hash_ptr;
1522 
1523  if (one_to_many_ptr->getType()->isPointerTy()) {
1524  one_to_many_ptr =
1525  LL_BUILDER.CreatePtrToInt(hash_ptr, llvm::Type::getInt64Ty(LL_CONTEXT));
1526  } else {
1527  CHECK(one_to_many_ptr->getType()->isIntegerTy(64));
1528  }
1529 
1530  const auto composite_key_dict_size = offsetBufferOff();
1531  one_to_many_ptr =
1532  LL_BUILDER.CreateAdd(one_to_many_ptr, LL_INT(composite_key_dict_size));
1533 
1534  // NOTE(jclay): A fixed array of size 200 is allocated on the stack.
1535  // this is likely the maximum value we can do that is safe to use across
1536  // all supported GPU architectures.
1537  const int max_array_size = 200;
1538  const auto arr_type = get_int_array_type(32, max_array_size, LL_CONTEXT);
1539  const auto out_arr_lv = LL_BUILDER.CreateAlloca(arr_type);
1540  out_arr_lv->setName("out_arr");
1541 
1542  const auto casted_out_arr_lv =
1543  LL_BUILDER.CreatePointerCast(out_arr_lv, arr_type->getPointerTo());
1544 
1545  const auto element_ptr = LL_BUILDER.CreateGEP(arr_type, casted_out_arr_lv, LL_INT(0));
1546 
1547  auto rowid_ptr_i32 =
1548  LL_BUILDER.CreatePointerCast(element_ptr, llvm::Type::getInt32PtrTy(LL_CONTEXT));
1549 
1550  const auto candidate_count_lv = executor_->cgen_state_->emitExternalCall(
1551  "get_candidate_rows",
1552  llvm::Type::getInt64Ty(LL_CONTEXT),
1553  {
1554  rowid_ptr_i32,
1555  LL_INT(max_array_size),
1556  many_to_many_args[1],
1557  LL_INT(0),
1560  many_to_many_args[0],
1561  LL_INT(key_component_count), // key_component_count
1562  composite_key_dict, // ptr to hash table
1563  LL_INT(getEntryCount()), // entry_count
1564  LL_INT(composite_key_dict_size), // offset_buffer_ptr_offset
1565  LL_INT(getEntryCount() * sizeof(int32_t)) // sub_buff_size
1566  });
1567 
1568  const auto slot_lv = LL_INT(int64_t(0));
1569 
1570  return {rowid_ptr_i32, candidate_count_lv, slot_lv};
1571  } else {
1572  VLOG(1) << "Building codegenMatchingSet for Baseline";
1573  // TODO: duplicated w/ BaselineJoinHashTable -- push into the hash table builder?
1574  const auto key_component_width = getKeyComponentWidth();
1575  CHECK(key_component_width == 4 || key_component_width == 8);
1576  auto key_buff_lv = codegenKey(co);
1578  auto hash_ptr = HashJoin::codegenHashTableLoad(index, executor_);
1579  const auto composite_dict_ptr_type =
1580  llvm::Type::getIntNPtrTy(LL_CONTEXT, key_component_width * 8);
1581  const auto composite_key_dict =
1582  hash_ptr->getType()->isPointerTy()
1583  ? LL_BUILDER.CreatePointerCast(hash_ptr, composite_dict_ptr_type)
1584  : LL_BUILDER.CreateIntToPtr(hash_ptr, composite_dict_ptr_type);
1585  const auto key_component_count = getKeyComponentCount();
1586  const auto key = executor_->cgen_state_->emitExternalCall(
1587  "get_composite_key_index_" + std::to_string(key_component_width * 8),
1588  get_int_type(64, LL_CONTEXT),
1589  {key_buff_lv,
1590  LL_INT(key_component_count),
1591  composite_key_dict,
1592  LL_INT(getEntryCount())});
1593  auto one_to_many_ptr = hash_ptr;
1594  if (one_to_many_ptr->getType()->isPointerTy()) {
1595  one_to_many_ptr =
1596  LL_BUILDER.CreatePtrToInt(hash_ptr, llvm::Type::getInt64Ty(LL_CONTEXT));
1597  } else {
1598  CHECK(one_to_many_ptr->getType()->isIntegerTy(64));
1599  }
1600  const auto composite_key_dict_size = offsetBufferOff();
1601  one_to_many_ptr =
1602  LL_BUILDER.CreateAdd(one_to_many_ptr, LL_INT(composite_key_dict_size));
1604  std::vector<llvm::Value*>{
1605  one_to_many_ptr, key, LL_INT(int64_t(0)), LL_INT(getEntryCount() - 1)},
1606  false,
1607  false,
1608  false,
1610  executor_);
1611  }
1612  UNREACHABLE();
1613  return HashJoinMatchingSet{};
1614 }
llvm::Value * codegenKey(const CompilationOptions &)
virtual HashJoinMatchingSet codegenMatchingSet(const CompilationOptions &, const size_t)=0
static llvm::Value * codegenHashTableLoad(const size_t table_idx, Executor *executor)
Definition: HashJoin.cpp:215
#define LL_FP(v)
#define UNREACHABLE()
Definition: Logger.h:250
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
HashType getHashType() const noexceptoverride
std::string to_string(char const *&&v)
#define LL_BUILDER
std::vector< llvm::Value * > codegenManyKey(const CompilationOptions &)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
std::vector< double > inverse_bucket_sizes_for_dimension_
#define LL_INT(v)
size_t offsetBufferOff() const noexceptoverride
#define CHECK(condition)
Definition: Logger.h:206
size_t getComponentBufferSize() const noexceptoverride
#define LL_CONTEXT
llvm::ArrayType * get_int_array_type(int const width, int count, llvm::LLVMContext &context)
#define VLOG(n)
Definition: Logger.h:300

+ Here is the call graph for this function:

llvm::Value* OverlapsJoinHashTable::codegenSlot ( const CompilationOptions ,
const size_t   
)
inlineoverrideprotectedvirtual

Implements HashJoin.

Definition at line 253 of file OverlapsJoinHashTable.h.

References UNREACHABLE.

253  {
254  UNREACHABLE(); // not applicable for overlaps join
255  return nullptr;
256  }
#define UNREACHABLE()
Definition: Logger.h:250
std::pair< size_t, size_t > OverlapsJoinHashTable::computeHashTableCounts ( const size_t  shard_count,
const std::vector< double > &  inverse_bucket_sizes_for_dimension,
std::vector< ColumnsForDevice > &  columns_per_device,
const size_t  chosen_max_hashtable_size,
const double  chosen_bucket_threshold 
)
protectedvirtual

Definition at line 859 of file OverlapsJoinHashTable.cpp.

References approximateTupleCount(), CHECK, device_count_, get_entries_per_device(), and memory_level_.

Referenced by reifyWithLayout().

864  {
865  CHECK(!inverse_bucket_sizes_for_dimension.empty());
866  const auto [tuple_count, emitted_keys_count] =
867  approximateTupleCount(inverse_bucket_sizes_for_dimension,
868  columns_per_device,
869  chosen_max_hashtable_size,
870  chosen_bucket_threshold);
871  const auto entry_count = 2 * std::max(tuple_count, size_t(1));
872 
873  return std::make_pair(
874  get_entries_per_device(entry_count, shard_count, device_count_, memory_level_),
875  emitted_keys_count);
876 }
virtual std::pair< size_t, size_t > approximateTupleCount(const std::vector< double > &inverse_bucket_sizes_for_dimension, std::vector< ColumnsForDevice > &, const size_t chosen_max_hashtable_size, const double chosen_bucket_threshold)
size_t get_entries_per_device(const size_t total_entries, const size_t shard_count, const size_t device_count, const Data_Namespace::MemoryLevel memory_level)
#define CHECK(condition)
Definition: Logger.h:206
const Data_Namespace::MemoryLevel memory_level_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t OverlapsJoinHashTable::countBufferOff ( ) const
inlineoverrideprivatevirtualnoexcept

Implements HashJoin.

Definition at line 316 of file OverlapsJoinHashTable.h.

References getComponentBufferSize(), getHashType(), getKeyBufferSize(), HashJoin::layoutRequiresAdditionalBuffers(), and offsetBufferOff().

Referenced by payloadBufferOff(), toSet(), and toString().

316  {
319  } else {
320  return getKeyBufferSize();
321  }
322  }
HashType getHashType() const noexceptoverride
size_t offsetBufferOff() const noexceptoverride
size_t getComponentBufferSize() const noexceptoverride
size_t getKeyBufferSize() const noexcept
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
Definition: HashJoin.h:129

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ColumnsForDevice OverlapsJoinHashTable::fetchColumnsForDevice ( const std::vector< Fragmenter_Namespace::FragmentInfo > &  fragments,
const int  device_id,
DeviceAllocator dev_buff_owner 
)
protected

Definition at line 819 of file OverlapsJoinHashTable.cpp.

References CHECK, column_cache_, executor_, HashJoin::fetchJoinColumn(), get_column_descriptor_maybe(), get_join_column_type_kind(), getEffectiveMemoryLevel(), and inner_outer_pairs_.

Referenced by reifyWithLayout().

822  {
823  const auto& catalog = *executor_->getCatalog();
824  const auto effective_memory_level = getEffectiveMemoryLevel(inner_outer_pairs_);
825 
826  std::vector<JoinColumn> join_columns;
827  std::vector<std::shared_ptr<Chunk_NS::Chunk>> chunks_owner;
828  std::vector<JoinColumnTypeInfo> join_column_types;
829  std::vector<std::shared_ptr<void>> malloc_owner;
830  for (const auto& inner_outer_pair : inner_outer_pairs_) {
831  const auto inner_col = inner_outer_pair.first;
832  const auto inner_cd = get_column_descriptor_maybe(
833  inner_col->get_column_id(), inner_col->get_table_id(), catalog);
834  if (inner_cd && inner_cd->isVirtualCol) {
836  }
837  join_columns.emplace_back(fetchJoinColumn(inner_col,
838  fragments,
839  effective_memory_level,
840  device_id,
841  chunks_owner,
842  dev_buff_owner,
843  malloc_owner,
844  executor_,
845  &column_cache_));
846  const auto& ti = inner_col->get_type_info();
847  join_column_types.emplace_back(JoinColumnTypeInfo{static_cast<size_t>(ti.get_size()),
848  0,
849  0,
850  inline_int_null_value<int64_t>(),
851  false,
852  0,
854  CHECK(ti.is_array()) << "Overlaps join currently only supported for arrays.";
855  }
856  return {join_columns, join_column_types, chunks_owner, {}, malloc_owner};
857 }
JoinColumn fetchJoinColumn(const Analyzer::ColumnVar *hash_col, const std::vector< Fragmenter_Namespace::FragmentInfo > &fragment_info, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks_owner, DeviceAllocator *dev_buff_owner, std::vector< std::shared_ptr< void >> &malloc_owner, Executor *executor, ColumnCacheMap *column_cache)
Definition: HashJoin.cpp:54
const ColumnDescriptor * get_column_descriptor_maybe(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
Definition: Execute.h:221
ColumnCacheMap & column_cache_
ColumnType get_join_column_type_kind(const SQLTypeInfo &ti)
#define CHECK(condition)
Definition: Logger.h:206
std::vector< InnerOuter > inner_outer_pairs_
Data_Namespace::MemoryLevel getEffectiveMemoryLevel(const std::vector< InnerOuter > &inner_outer_pairs) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::optional< std::pair< size_t, size_t > > OverlapsJoinHashTable::getApproximateTupleCountFromCache ( const OverlapsHashTableCacheKey key)
private

Definition at line 1731 of file OverlapsJoinHashTable.cpp.

References CHECK, CHECK_GE, OverlapsHashTableCacheKey::chunk_keys, and hash_table_cache_.

Referenced by approximateTupleCount().

1732  {
1733  for (auto chunk_key : key.chunk_keys) {
1734  CHECK_GE(chunk_key.size(), size_t(2));
1735  if (chunk_key[1] < 0) {
1736  return std::nullopt;
1737  ;
1738  }
1739  }
1740 
1742  auto hash_table_opt = hash_table_cache_->getWithKey(key);
1743  if (hash_table_opt) {
1744  auto hash_table = hash_table_opt->second;
1745  return std::make_pair(hash_table->getEntryCount() / 2,
1746  hash_table->getEmittedKeysCount());
1747  }
1748  return std::nullopt;
1749 }
#define CHECK_GE(x, y)
Definition: Logger.h:219
static std::unique_ptr< OverlapsHashTableCache< OverlapsHashTableCacheKey, HashTableCacheValue > > hash_table_cache_
#define CHECK(condition)
Definition: Logger.h:206
const std::vector< ChunkKey > chunk_keys

+ Here is the caller graph for this function:

static auto OverlapsJoinHashTable::getCacheInvalidator ( ) -> std::function<void()>
inlinestatic

Definition at line 123 of file OverlapsJoinHashTable.h.

References auto_tuner_cache_, CHECK, and hash_table_cache_.

123  {
124  return []() -> void {
126  auto auto_tuner_cache_invalidator = auto_tuner_cache_->getCacheInvalidator();
127  auto_tuner_cache_invalidator();
128 
130  auto main_cache_invalidator = hash_table_cache_->getCacheInvalidator();
131  main_cache_invalidator();
132  };
133  }
static std::unique_ptr< OverlapsHashTableCache< OverlapsHashTableCacheKey, HashTableCacheValue > > hash_table_cache_
static std::unique_ptr< HashTableCache< OverlapsHashTableCacheKey, std::pair< BucketThreshold, BucketSizes > > > auto_tuner_cache_
#define CHECK(condition)
Definition: Logger.h:206
static size_t OverlapsJoinHashTable::getCombinedHashTableCacheSize ( )
inlinestatic

Definition at line 135 of file OverlapsJoinHashTable.h.

References auto_tuner_cache_, CHECK, and hash_table_cache_.

Referenced by QueryRunner::QueryRunner::getNumberOfCachedOverlapsHashTables().

135  {
136  // for unit tests
138  return hash_table_cache_->getNumberOfCachedHashTables() +
139  auto_tuner_cache_->getNumberOfCachedHashTables();
140  }
static std::unique_ptr< OverlapsHashTableCache< OverlapsHashTableCacheKey, HashTableCacheValue > > hash_table_cache_
static std::unique_ptr< HashTableCache< OverlapsHashTableCacheKey, std::pair< BucketThreshold, BucketSizes > > > auto_tuner_cache_
#define CHECK(condition)
Definition: Logger.h:206

+ Here is the caller graph for this function:

size_t OverlapsJoinHashTable::getComponentBufferSize ( ) const
inlineoverrideprivatevirtualnoexcept

Implements HashJoin.

Definition at line 277 of file OverlapsJoinHashTable.h.

References CHECK, and HashJoin::hash_tables_for_device_.

Referenced by codegenMatchingSet(), countBufferOff(), and payloadBufferOff().

277  {
278  CHECK(!hash_tables_for_device_.empty());
279  auto hash_table = hash_tables_for_device_.front();
280  CHECK(hash_table);
281  return hash_table->getEntryCount() * sizeof(int32_t);
282  }
std::vector< std::shared_ptr< HashTable > > hash_tables_for_device_
Definition: HashJoin.h:270
#define CHECK(condition)
Definition: Logger.h:206

+ Here is the caller graph for this function:

int OverlapsJoinHashTable::getDeviceCount ( ) const
inlineoverrideprotectedvirtualnoexcept

Implements HashJoin.

Definition at line 212 of file OverlapsJoinHashTable.h.

References device_count_.

212 { return device_count_; };
Data_Namespace::MemoryLevel OverlapsJoinHashTable::getEffectiveMemoryLevel ( const std::vector< InnerOuter > &  inner_outer_pairs) const
private

Definition at line 1697 of file OverlapsJoinHashTable.cpp.

References Data_Namespace::CPU_LEVEL, RegisteredQueryHint::isHintRegistered(), kOverlapsAllowGpuBuild, memory_level_, RegisteredQueryHint::overlaps_allow_gpu_build, and query_hint_.

Referenced by approximateTupleCount(), fetchColumnsForDevice(), reifyForDevice(), and reifyWithLayout().

1698  {
1699  // always build on CPU
1702  return memory_level_;
1703  }
1705 }
bool overlaps_allow_gpu_build
Definition: QueryHint.h:169
RegisteredQueryHint query_hint_
const bool isHintRegistered(const QueryHint hint) const
Definition: QueryHint.h:203
const Data_Namespace::MemoryLevel memory_level_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t OverlapsJoinHashTable::getEmittedKeysCount ( ) const
inlineprivate

Definition at line 271 of file OverlapsJoinHashTable.h.

References CHECK, and HashJoin::getHashTableForDevice().

271  {
272  auto hash_table = getHashTableForDevice(0);
273  CHECK(hash_table);
274  return hash_table->getEmittedKeysCount();
275  }
HashTable * getHashTableForDevice(const size_t device_id) const
Definition: HashJoin.h:215
#define CHECK(condition)
Definition: Logger.h:206

+ Here is the call graph for this function:

size_t OverlapsJoinHashTable::getEntryCount ( ) const
inlineprivate

Definition at line 265 of file OverlapsJoinHashTable.h.

References CHECK, and HashJoin::getHashTableForDevice().

Referenced by codegenMatchingSet(), and getKeyBufferSize().

265  {
266  auto hash_table = getHashTableForDevice(0);
267  CHECK(hash_table);
268  return hash_table->getEntryCount();
269  }
HashTable * getHashTableForDevice(const size_t device_id) const
Definition: HashJoin.h:215
#define CHECK(condition)
Definition: Logger.h:206

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::string OverlapsJoinHashTable::getHashJoinType ( ) const
inlinefinalprivatevirtual

Implements HashJoin.

Definition at line 332 of file OverlapsJoinHashTable.h.

332 { return "Overlaps"; }
HashType OverlapsJoinHashTable::getHashType ( ) const
inlineoverrideprotectedvirtualnoexcept

Implements HashJoin.

Definition at line 199 of file OverlapsJoinHashTable.h.

References CHECK, HashJoin::getHashTableForDevice(), and layout_override_.

Referenced by codegenManyKey(), codegenMatchingSet(), countBufferOff(), getKeyBufferSize(), payloadBufferOff(), toSet(), and toString().

199  {
200  if (layout_override_) {
201  return *layout_override_;
202  }
203  auto hash_table = getHashTableForDevice(0);
204  CHECK(hash_table);
205  return hash_table->getLayout();
206  }
HashTable * getHashTableForDevice(const size_t device_id) const
Definition: HashJoin.h:215
std::optional< HashType > layout_override_
#define CHECK(condition)
Definition: Logger.h:206

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int OverlapsJoinHashTable::getInnerTableId ( ) const
overrideprivatevirtualnoexcept

Implements HashJoin.

Definition at line 1707 of file OverlapsJoinHashTable.cpp.

References CHECK, HashJoin::getInnerTableId(), and inner_outer_pairs_.

1707  {
1708  try {
1710  } catch (...) {
1711  CHECK(false);
1712  }
1713  return 0;
1714 }
virtual int getInnerTableId() const noexcept=0
#define CHECK(condition)
Definition: Logger.h:206
std::vector< InnerOuter > inner_outer_pairs_

+ Here is the call graph for this function:

int OverlapsJoinHashTable::getInnerTableRteIdx ( ) const
inlineoverrideprivatevirtualnoexcept

Implements HashJoin.

Definition at line 297 of file OverlapsJoinHashTable.h.

References CHECK, and inner_outer_pairs_.

297  {
298  CHECK(!inner_outer_pairs_.empty());
299  const auto first_inner_col = inner_outer_pairs_.front().first;
300  return first_inner_col->get_rte_idx();
301  }
#define CHECK(condition)
Definition: Logger.h:206
std::vector< InnerOuter > inner_outer_pairs_
std::shared_ptr< OverlapsJoinHashTable > OverlapsJoinHashTable::getInstance ( const std::shared_ptr< Analyzer::BinOper condition,
const std::vector< InputTableInfo > &  query_infos,
const Data_Namespace::MemoryLevel  memory_level,
const JoinType  join_type,
const int  device_count,
ColumnCacheMap column_cache,
Executor executor,
const RegisteredQueryHint query_hint 
)
static

Make hash table from an in-flight SQL query's parse tree etc.

Definition at line 43 of file OverlapsJoinHashTable.cpp.

References CHECK_EQ, count, get_inner_query_info(), HashJoin::getHashTypeString(), HashJoin::getInnerTableId(), Fragmenter_Namespace::TableInfo::getNumTuplesUpperBound(), InputTableInfo::info, RegisteredQueryHint::isAnyQueryHintDelivered(), ManyToMany, normalize_column_pairs(), OneToMany, VLOG, and VLOGGING.

Referenced by HashJoin::getInstance().

51  {
52  decltype(std::chrono::steady_clock::now()) ts1, ts2;
53  auto inner_outer_pairs = normalize_column_pairs(
54  condition.get(), *executor->getCatalog(), executor->getTemporaryTables());
55 
56  const auto getHashTableType =
57  [](const std::shared_ptr<Analyzer::BinOper> condition,
58  const std::vector<InnerOuter>& inner_outer_pairs) -> HashType {
59  HashType layout = HashType::OneToMany;
60  if (condition->is_overlaps_oper()) {
61  CHECK_EQ(inner_outer_pairs.size(), size_t(1));
62  if (inner_outer_pairs[0].first->get_type_info().is_array() &&
63  inner_outer_pairs[0].second->get_type_info().is_array() &&
64  // Bounds vs constructed points, former should yield ManyToMany
65  inner_outer_pairs[0].second->get_type_info().get_size() == 32) {
66  layout = HashType::ManyToMany;
67  }
68  }
69  return layout;
70  };
71 
72  auto layout = getHashTableType(condition, inner_outer_pairs);
73 
74  if (VLOGGING(1)) {
75  VLOG(1) << "Building geo hash table " << getHashTypeString(layout)
76  << " for qual: " << condition->toString();
77  ts1 = std::chrono::steady_clock::now();
78  }
79 
80  const auto qi_0 = query_infos[0].info.getNumTuplesUpperBound();
81  const auto qi_1 = query_infos[1].info.getNumTuplesUpperBound();
82 
83  VLOG(1) << "table_id = " << query_infos[0].table_id << " has " << qi_0 << " tuples.";
84  VLOG(1) << "table_id = " << query_infos[1].table_id << " has " << qi_1 << " tuples.";
85 
86  const auto& query_info =
87  get_inner_query_info(HashJoin::getInnerTableId(inner_outer_pairs), query_infos)
88  .info;
89  const auto total_entries = 2 * query_info.getNumTuplesUpperBound();
90  if (total_entries > static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
91  throw TooManyHashEntries();
92  }
93 
94  auto join_hash_table = std::make_shared<OverlapsJoinHashTable>(condition,
95  join_type,
96  query_infos,
97  memory_level,
98  column_cache,
99  executor,
100  inner_outer_pairs,
101  device_count);
102  if (query_hint.isAnyQueryHintDelivered()) {
103  join_hash_table->registerQueryHint(query_hint);
104  }
105  try {
106  join_hash_table->reify(layout);
107  } catch (const HashJoinFail& e) {
108  throw HashJoinFail(std::string("Could not build a 1-to-1 correspondence for columns "
109  "involved in overlaps join | ") +
110  e.what());
111  } catch (const ColumnarConversionNotSupported& e) {
112  throw HashJoinFail(std::string("Could not build hash tables for overlaps join | "
113  "Inner table too big. Attempt manual table reordering "
114  "or create a single fragment inner table. | ") +
115  e.what());
116  } catch (const std::exception& e) {
117  throw HashJoinFail(std::string("Failed to build hash tables for overlaps join | ") +
118  e.what());
119  }
120  if (VLOGGING(1)) {
121  ts2 = std::chrono::steady_clock::now();
122  VLOG(1) << "Built geo hash table " << getHashTypeString(layout) << " in "
123  << std::chrono::duration_cast<std::chrono::milliseconds>(ts2 - ts1).count()
124  << " ms";
125  }
126  return join_hash_table;
127 }
#define CHECK_EQ(x, y)
Definition: Logger.h:214
Fragmenter_Namespace::TableInfo info
Definition: InputMetadata.h:35
std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > InnerOuter
Definition: HashJoin.h:76
#define const
int count
std::vector< InnerOuter > normalize_column_pairs(const Analyzer::BinOper *condition, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables)
Definition: HashJoin.cpp:712
virtual int getInnerTableId() const noexcept=0
#define VLOGGING(n)
Definition: Logger.h:204
const InputTableInfo & get_inner_query_info(const int inner_table_id, const std::vector< InputTableInfo > &query_infos)
static std::string getHashTypeString(HashType ht) noexcept
Definition: HashJoin.h:133
HashType
Definition: HashTable.h:19
bool isAnyQueryHintDelivered() const
Definition: QueryHint.h:187
#define VLOG(n)
Definition: Logger.h:300

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t OverlapsJoinHashTable::getKeyBufferSize ( ) const
inlineprivatenoexcept

Definition at line 303 of file OverlapsJoinHashTable.h.

References CHECK, getEntryCount(), getHashType(), getKeyComponentCount(), getKeyComponentWidth(), and HashJoin::layoutRequiresAdditionalBuffers().

Referenced by countBufferOff(), offsetBufferOff(), and payloadBufferOff().

303  {
304  const auto key_component_width = getKeyComponentWidth();
305  CHECK(key_component_width == 4 || key_component_width == 8);
306  const auto key_component_count = getKeyComponentCount();
308  return getEntryCount() * key_component_count * key_component_width;
309  } else {
310  return getEntryCount() * (key_component_count + 1) * key_component_width;
311  }
312  }
HashType getHashType() const noexceptoverride
#define CHECK(condition)
Definition: Logger.h:206
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
Definition: HashJoin.h:129

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t OverlapsJoinHashTable::getKeyComponentCount ( ) const
protected

Definition at line 1071 of file OverlapsJoinHashTable.cpp.

References CHECK, and inverse_bucket_sizes_for_dimension_.

Referenced by codegenKey(), codegenMatchingSet(), getKeyBufferSize(), initHashTableOnCpu(), toSet(), and toString().

1071  {
1074 }
std::vector< double > inverse_bucket_sizes_for_dimension_
#define CHECK(condition)
Definition: Logger.h:206

+ Here is the caller graph for this function:

size_t OverlapsJoinHashTable::getKeyComponentWidth ( ) const
protected

Definition at line 1067 of file OverlapsJoinHashTable.cpp.

Referenced by calculateHashTableSize(), codegenKey(), codegenManyKey(), codegenMatchingSet(), getKeyBufferSize(), initHashTableOnCpu(), reifyForDevice(), toSet(), and toString().

1067  {
1068  return 8;
1069 }

+ Here is the caller graph for this function:

Data_Namespace::MemoryLevel OverlapsJoinHashTable::getMemoryLevel ( ) const
inlineoverrideprotectedvirtualnoexcept

Implements HashJoin.

Definition at line 208 of file OverlapsJoinHashTable.h.

References memory_level_.

208  {
209  return memory_level_;
210  }
const Data_Namespace::MemoryLevel memory_level_
const RegisteredQueryHint& OverlapsJoinHashTable::getRegisteredQueryHint ( )
inlineprotected

Definition at line 258 of file OverlapsJoinHashTable.h.

References query_hint_.

Referenced by reifyWithLayout().

258 { return query_hint_; }
RegisteredQueryHint query_hint_

+ Here is the caller graph for this function:

std::shared_ptr< BaselineHashTable > OverlapsJoinHashTable::initHashTableOnCpu ( const std::vector< JoinColumn > &  join_columns,
const std::vector< JoinColumnTypeInfo > &  join_column_types,
const std::vector< JoinBucketInfo > &  join_bucket_info,
const HashType  layout,
const size_t  entry_count,
const size_t  emitted_keys_count,
const bool  skip_hashtable_caching,
const size_t  chosen_max_hashtable_size,
const double  chosen_bucket_threshold 
)
protected

Definition at line 1197 of file OverlapsJoinHashTable.cpp.

References CHECK, condition_, cpu_hash_table_buff_mutex_, DEBUG_TIMER, executor_, HashJoin::getCompositeKeyInfo(), BaselineJoinHashTableBuilder::getHashTable(), HashJoin::getInnerTableId(), getKeyComponentCount(), getKeyComponentWidth(), BaselineJoinHashTableBuilder::initHashTableOnCpu(), initHashTableOnCpuFromCache(), inner_outer_pairs_, inverse_bucket_sizes_for_dimension_, join_type_, layout_override_, HashJoin::layoutRequiresAdditionalBuffers(), ManyToMany, OneToMany, putHashTableOnCpuToCache(), to_string(), and VLOG.

Referenced by reifyForDevice().

1206  {
1207  auto timer = DEBUG_TIMER(__func__);
1208  const auto composite_key_info =
1210  CHECK(!join_columns.empty());
1211  CHECK(!join_bucket_info.empty());
1212  OverlapsHashTableCacheKey cache_key{join_columns.front().num_elems,
1213  composite_key_info.cache_key_chunks,
1214  condition_->get_optype(),
1215  chosen_max_hashtable_size,
1216  chosen_bucket_threshold,
1218 
1219  std::lock_guard<std::mutex> cpu_hash_table_buff_lock(cpu_hash_table_buff_mutex_);
1220  if (auto generic_hash_table = initHashTableOnCpuFromCache(cache_key)) {
1221  if (auto hash_table =
1222  std::dynamic_pointer_cast<BaselineHashTable>(generic_hash_table)) {
1223  VLOG(1) << "Using cached CPU hash table for initialization.";
1224  // See if a hash table of a different layout was returned.
1225  // If it was OneToMany, we can reuse it on ManyToMany.
1226  if (layout == HashType::ManyToMany &&
1227  hash_table->getLayout() == HashType::OneToMany) {
1228  // use the cached hash table
1230  }
1231  return hash_table;
1232  }
1233  }
1235  const auto key_component_count =
1236  join_bucket_info[0].inverse_bucket_sizes_for_dimension.size();
1237 
1238  const auto key_handler =
1239  OverlapsKeyHandler(key_component_count,
1240  &join_columns[0],
1241  join_bucket_info[0].inverse_bucket_sizes_for_dimension.data());
1242  const auto catalog = executor_->getCatalog();
1243  BaselineJoinHashTableBuilder builder(catalog);
1244  const auto err = builder.initHashTableOnCpu(&key_handler,
1245  composite_key_info,
1246  join_columns,
1247  join_column_types,
1248  join_bucket_info,
1249  entry_count,
1250  emitted_keys_count,
1251  layout,
1252  join_type_,
1255  if (err) {
1256  throw HashJoinFail(
1257  std::string("Unrecognized error when initializing CPU overlaps hash table (") +
1258  std::to_string(err) + std::string(")"));
1259  }
1260  std::shared_ptr<BaselineHashTable> hash_table = builder.getHashTable();
1262  if (skip_hashtable_caching) {
1263  VLOG(1) << "Skip to cache overlaps join hashtable";
1264  } else {
1265  putHashTableOnCpuToCache(cache_key, hash_table);
1266  }
1267  }
1268  return hash_table;
1269 }
std::shared_ptr< HashTable > initHashTableOnCpuFromCache(const OverlapsHashTableCacheKey &key)
std::string to_string(char const *&&v)
const std::shared_ptr< Analyzer::BinOper > condition_
void putHashTableOnCpuToCache(const OverlapsHashTableCacheKey &key, std::shared_ptr< HashTable > hash_table)
virtual int getInnerTableId() const noexcept=0
static CompositeKeyInfo getCompositeKeyInfo(const std::vector< InnerOuter > &inner_outer_pairs, const Executor *executor)
Definition: HashJoin.cpp:324
std::vector< double > inverse_bucket_sizes_for_dimension_
std::optional< HashType > layout_override_
#define CHECK(condition)
Definition: Logger.h:206
#define DEBUG_TIMER(name)
Definition: Logger.h:322
std::vector< InnerOuter > inner_outer_pairs_
#define VLOG(n)
Definition: Logger.h:300
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
Definition: HashJoin.h:129

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::shared_ptr< HashTable > OverlapsJoinHashTable::initHashTableOnCpuFromCache ( const OverlapsHashTableCacheKey key)
private

Definition at line 1716 of file OverlapsJoinHashTable.cpp.

References CHECK, DEBUG_TIMER, hash_table_cache_, inverse_bucket_sizes_for_dimension_, and VLOG.

Referenced by initHashTableOnCpu().

1717  {
1718  auto timer = DEBUG_TIMER(__func__);
1719  VLOG(1) << "Checking CPU hash table cache.";
1721  auto hash_table_opt = hash_table_cache_->getWithKey(key);
1722  if (hash_table_opt) {
1724  hash_table_opt->first.inverse_bucket_sizes);
1725  return hash_table_opt->second;
1726  }
1727  return nullptr;
1728 }
static std::unique_ptr< OverlapsHashTableCache< OverlapsHashTableCacheKey, HashTableCacheValue > > hash_table_cache_
std::vector< double > inverse_bucket_sizes_for_dimension_
#define CHECK(condition)
Definition: Logger.h:206
#define DEBUG_TIMER(name)
Definition: Logger.h:322
#define VLOG(n)
Definition: Logger.h:300

+ Here is the caller graph for this function:

size_t OverlapsJoinHashTable::offsetBufferOff ( ) const
inlineoverrideprivatevirtualnoexcept

Implements HashJoin.

Definition at line 314 of file OverlapsJoinHashTable.h.

References getKeyBufferSize().

Referenced by codegenMatchingSet(), countBufferOff(), toSet(), and toString().

314 { return getKeyBufferSize(); }
size_t getKeyBufferSize() const noexcept

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t OverlapsJoinHashTable::payloadBufferOff ( ) const
inlineoverrideprivatevirtualnoexcept

Implements HashJoin.

Definition at line 324 of file OverlapsJoinHashTable.h.

References countBufferOff(), getComponentBufferSize(), getHashType(), getKeyBufferSize(), and HashJoin::layoutRequiresAdditionalBuffers().

Referenced by toSet(), and toString().

324  {
327  } else {
328  return getKeyBufferSize();
329  }
330  }
HashType getHashType() const noexceptoverride
size_t countBufferOff() const noexceptoverride
size_t getComponentBufferSize() const noexceptoverride
size_t getKeyBufferSize() const noexcept
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
Definition: HashJoin.h:129

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void OverlapsJoinHashTable::putHashTableOnCpuToCache ( const OverlapsHashTableCacheKey key,
std::shared_ptr< HashTable hash_table 
)
private

Definition at line 1751 of file OverlapsJoinHashTable.cpp.

References CHECK, CHECK_GE, OverlapsHashTableCacheKey::chunk_keys, and hash_table_cache_.

Referenced by initHashTableOnCpu().

1753  {
1754  for (auto chunk_key : key.chunk_keys) {
1755  CHECK_GE(chunk_key.size(), size_t(2));
1756  if (chunk_key[1] < 0) {
1757  return;
1758  }
1759  }
1761  hash_table_cache_->insert(key, hash_table);
1762 }
#define CHECK_GE(x, y)
Definition: Logger.h:219
static std::unique_ptr< OverlapsHashTableCache< OverlapsHashTableCacheKey, HashTableCacheValue > > hash_table_cache_
#define CHECK(condition)
Definition: Logger.h:206
const std::vector< ChunkKey > chunk_keys

+ Here is the caller graph for this function:

void OverlapsJoinHashTable::registerQueryHint ( const RegisteredQueryHint query_hint)
inlineprotected

Definition at line 260 of file OverlapsJoinHashTable.h.

References query_hint_.

260  {
261  query_hint_ = query_hint;
262  }
RegisteredQueryHint query_hint_
void OverlapsJoinHashTable::reify ( const HashType  preferred_layout)
protected

Definition at line 1076 of file OverlapsJoinHashTable.cpp.

References CHECK, CHECK_EQ, CHECK_LT, condition_, DEBUG_TIMER, device_count_, executor_, HashJoin::getCompositeKeyInfo(), inner_outer_pairs_, ManyToMany, OneToMany, reifyWithLayout(), and VLOG.

1076  {
1077  auto timer = DEBUG_TIMER(__func__);
1078  CHECK_LT(0, device_count_);
1079  const auto composite_key_info =
1081 
1082  CHECK(condition_->is_overlaps_oper());
1083  CHECK_EQ(inner_outer_pairs_.size(), size_t(1));
1084  HashType layout;
1085  if (inner_outer_pairs_[0].second->get_type_info().is_fixlen_array() &&
1086  inner_outer_pairs_[0].second->get_type_info().get_size() == 32) {
1087  // bounds array
1088  layout = HashType::ManyToMany;
1089  } else {
1090  layout = HashType::OneToMany;
1091  }
1092  try {
1093  reifyWithLayout(layout);
1094  return;
1095  } catch (const std::exception& e) {
1096  VLOG(1) << "Caught exception while building overlaps baseline hash table: "
1097  << e.what();
1098  throw;
1099  }
1100 }
#define CHECK_EQ(x, y)
Definition: Logger.h:214
void reifyWithLayout(const HashType layout)
const std::shared_ptr< Analyzer::BinOper > condition_
static CompositeKeyInfo getCompositeKeyInfo(const std::vector< InnerOuter > &inner_outer_pairs, const Executor *executor)
Definition: HashJoin.cpp:324
#define CHECK_LT(x, y)
Definition: Logger.h:216
#define CHECK(condition)
Definition: Logger.h:206
#define DEBUG_TIMER(name)
Definition: Logger.h:322
std::vector< InnerOuter > inner_outer_pairs_
HashType
Definition: HashTable.h:19
#define VLOG(n)
Definition: Logger.h:300

+ Here is the call graph for this function:

void OverlapsJoinHashTable::reifyForDevice ( const ColumnsForDevice columns_for_device,
const HashType  layout,
const size_t  entry_count,
const size_t  emitted_keys_count,
const bool  skip_hashtable_caching,
const size_t  chosen_max_hashtable_size,
const double  chosen_bucket_threshold,
const int  device_id,
const logger::ThreadId  parent_thread_id 
)
protected

Definition at line 1138 of file OverlapsJoinHashTable.cpp.

References CHECK, CHECK_EQ, CHECK_LT, Data_Namespace::CPU_LEVEL, DEBUG_TIMER_NEW_THREAD, getEffectiveMemoryLevel(), getKeyComponentWidth(), Data_Namespace::GPU_LEVEL, HashJoin::hash_tables_for_device_, initHashTableOnCpu(), inner_outer_pairs_, ColumnsForDevice::join_buckets, ColumnsForDevice::join_column_types, ColumnsForDevice::join_columns, HashJoin::layoutRequiresAdditionalBuffers(), memory_level_, UNREACHABLE, and VLOG.

Referenced by reifyImpl().

1146  {
1147  DEBUG_TIMER_NEW_THREAD(parent_thread_id);
1148  CHECK_EQ(getKeyComponentWidth(), size_t(8));
1150  const auto effective_memory_level = getEffectiveMemoryLevel(inner_outer_pairs_);
1151 
1152  if (effective_memory_level == Data_Namespace::MemoryLevel::CPU_LEVEL) {
1153  VLOG(1) << "Building overlaps join hash table on CPU.";
1154  auto hash_table = initHashTableOnCpu(columns_for_device.join_columns,
1155  columns_for_device.join_column_types,
1156  columns_for_device.join_buckets,
1157  layout,
1158  entry_count,
1159  emitted_keys_count,
1160  skip_hashtable_caching,
1161  chosen_max_hashtable_size,
1162  chosen_bucket_threshold);
1163  CHECK(hash_table);
1164 
1165 #ifdef HAVE_CUDA
1167  auto gpu_hash_table = copyCpuHashTableToGpu(
1168  std::move(hash_table), layout, entry_count, emitted_keys_count, device_id);
1169  CHECK_LT(size_t(device_id), hash_tables_for_device_.size());
1170  hash_tables_for_device_[device_id] = std::move(gpu_hash_table);
1171  } else {
1172 #else
1173  CHECK_EQ(Data_Namespace::CPU_LEVEL, effective_memory_level);
1174 #endif
1175  CHECK_EQ(hash_tables_for_device_.size(), size_t(1));
1176  hash_tables_for_device_[0] = std::move(hash_table);
1177 #ifdef HAVE_CUDA
1178  }
1179 #endif
1180  } else {
1181 #ifdef HAVE_CUDA
1182  auto hash_table = initHashTableOnGpu(columns_for_device.join_columns,
1183  columns_for_device.join_column_types,
1184  columns_for_device.join_buckets,
1185  layout,
1186  entry_count,
1187  emitted_keys_count,
1188  device_id);
1189  CHECK_LT(size_t(device_id), hash_tables_for_device_.size());
1190  hash_tables_for_device_[device_id] = std::move(hash_table);
1191 #else
1192  UNREACHABLE();
1193 #endif
1194  }
1195 }
#define CHECK_EQ(x, y)
Definition: Logger.h:214
std::vector< std::shared_ptr< HashTable > > hash_tables_for_device_
Definition: HashJoin.h:270
#define UNREACHABLE()
Definition: Logger.h:250
#define DEBUG_TIMER_NEW_THREAD(parent_thread_id)
Definition: Logger.h:327
const std::vector< JoinColumnTypeInfo > join_column_types
Definition: HashJoin.h:80
std::shared_ptr< BaselineHashTable > initHashTableOnCpu(const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_bucket_info, const HashType layout, const size_t entry_count, const size_t emitted_keys_count, const bool skip_hashtable_caching, const size_t chosen_max_hashtable_size, const double chosen_bucket_threshold)
#define CHECK_LT(x, y)
Definition: Logger.h:216
#define CHECK(condition)
Definition: Logger.h:206
const Data_Namespace::MemoryLevel memory_level_
std::vector< InnerOuter > inner_outer_pairs_
Data_Namespace::MemoryLevel getEffectiveMemoryLevel(const std::vector< InnerOuter > &inner_outer_pairs) const
std::vector< JoinBucketInfo > join_buckets
Definition: HashJoin.h:82
const std::vector< JoinColumn > join_columns
Definition: HashJoin.h:79
#define VLOG(n)
Definition: Logger.h:300
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
Definition: HashJoin.h:129

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void OverlapsJoinHashTable::reifyImpl ( std::vector< ColumnsForDevice > &  columns_per_device,
const Fragmenter_Namespace::TableInfo query_info,
const HashType  layout,
const size_t  shard_count,
const size_t  entry_count,
const size_t  emitted_keys_count,
const bool  skip_hashtable_caching,
const size_t  chosen_max_hashtable_size,
const double  chosen_bucket_threshold 
)
protectedvirtual

Definition at line 1102 of file OverlapsJoinHashTable.cpp.

References device_count_, Fragmenter_Namespace::TableInfo::fragments, only_shards_for_device(), reifyForDevice(), and logger::thread_id().

Referenced by reifyWithLayout().

1110  {
1111  std::vector<std::future<void>> init_threads;
1112  for (int device_id = 0; device_id < device_count_; ++device_id) {
1113  const auto fragments =
1114  shard_count
1115  ? only_shards_for_device(query_info.fragments, device_id, device_count_)
1116  : query_info.fragments;
1117  init_threads.push_back(std::async(std::launch::async,
1119  this,
1120  columns_per_device[device_id],
1121  layout,
1122  entry_count,
1123  emitted_keys_count,
1124  skip_hashtable_caching,
1125  chosen_max_hashtable_size,
1126  chosen_bucket_threshold,
1127  device_id,
1128  logger::thread_id()));
1129  }
1130  for (auto& init_thread : init_threads) {
1131  init_thread.wait();
1132  }
1133  for (auto& init_thread : init_threads) {
1134  init_thread.get();
1135  }
1136 }
std::vector< FragmentInfo > fragments
Definition: Fragmenter.h:160
std::vector< Fragmenter_Namespace::FragmentInfo > only_shards_for_device(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, const int device_count)
void reifyForDevice(const ColumnsForDevice &columns_for_device, const HashType layout, const size_t entry_count, const size_t emitted_keys_count, const bool skip_hashtable_caching, const size_t chosen_max_hashtable_size, const double chosen_bucket_threshold, const int device_id, const logger::ThreadId parent_thread_id)
ThreadId thread_id()
Definition: Logger.cpp:732

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void OverlapsJoinHashTable::reifyWithLayout ( const HashType  layout)
protected

Definition at line 515 of file OverlapsJoinHashTable.cpp.

References gpu_enabled::accumulate(), auto_tuner_cache_, calculateHashTableSize(), CHECK, CHECK_EQ, CHECK_GE, computeHashTableCounts(), condition_, DEBUG_TIMER, device_count_, executor_, fetchColumnsForDevice(), g_overlaps_max_table_size_bytes, g_overlaps_target_entries_per_bin, get_inner_query_info(), HashJoin::getCompositeKeyInfo(), getEffectiveMemoryLevel(), HashJoin::getHashTypeString(), HashJoin::getInnerTableId(), getRegisteredQueryHint(), Data_Namespace::GPU_LEVEL, hash_table_cache_, InputTableInfo::info, inner_outer_pairs_, inverse_bucket_sizes_for_dimension_, kOverlapsBucketThreshold, kOverlapsKeysPerBin, kOverlapsMaxSize, kOverlapsNoCache, HashJoin::layoutRequiresAdditionalBuffers(), memory_level_, only_shards_for_device(), query_infos_, reifyImpl(), setInverseBucketSizeInfo(), shardCount(), and VLOG.

Referenced by reify().

515  {
516  auto timer = DEBUG_TIMER(__func__);
518  const auto& query_info =
520  .info;
521  VLOG(1) << "Reify with layout " << getHashTypeString(layout)
522  << "for table_id: " << HashJoin::getInnerTableId(inner_outer_pairs_);
523  if (query_info.fragments.empty()) {
524  return;
525  }
526 
527  auto overlaps_max_table_size_bytes = g_overlaps_max_table_size_bytes;
528  std::optional<double> overlaps_threshold_override;
529  double overlaps_target_entries_per_bin = g_overlaps_target_entries_per_bin;
530  auto query_hint = getRegisteredQueryHint();
531  auto skip_hashtable_caching = false;
532  if (query_hint.isHintRegistered(QueryHint::kOverlapsBucketThreshold)) {
533  VLOG(1) << "Setting overlaps bucket threshold "
534  "\'overlaps_hashjoin_bucket_threshold\' via "
535  "query hint: "
536  << query_hint.overlaps_bucket_threshold;
537  overlaps_threshold_override = query_hint.overlaps_bucket_threshold;
538  }
539  if (query_hint.isHintRegistered(QueryHint::kOverlapsMaxSize)) {
540  std::ostringstream oss;
541  oss << "User requests to change a threshold \'overlaps_max_table_size_bytes\' via "
542  "query hint";
543  if (!overlaps_threshold_override.has_value()) {
544  oss << ": " << overlaps_max_table_size_bytes << " -> "
545  << query_hint.overlaps_max_size;
546  overlaps_max_table_size_bytes = query_hint.overlaps_max_size;
547  } else {
548  oss << ", but is skipped since the query hint also changes the threshold "
549  "\'overlaps_hashjoin_bucket_threshold\'";
550  }
551  VLOG(1) << oss.str();
552  }
553  if (query_hint.isHintRegistered(QueryHint::kOverlapsNoCache)) {
554  VLOG(1) << "User requests to skip caching overlaps join hashtable and its tuned "
555  "parameters for this query";
556  skip_hashtable_caching = true;
557  }
558  if (query_hint.isHintRegistered(QueryHint::kOverlapsKeysPerBin)) {
559  VLOG(1) << "User requests to change a threshold \'overlaps_keys_per_bin\' via query "
560  "hint: "
561  << overlaps_target_entries_per_bin << " -> "
562  << query_hint.overlaps_keys_per_bin;
563  overlaps_target_entries_per_bin = query_hint.overlaps_keys_per_bin;
564  }
565 
566  std::vector<ColumnsForDevice> columns_per_device;
567  const auto catalog = executor_->getCatalog();
568  CHECK(catalog);
569  auto& data_mgr = catalog->getDataMgr();
570  std::vector<std::unique_ptr<CudaAllocator>> dev_buff_owners;
572  for (int device_id = 0; device_id < device_count_; ++device_id) {
573  dev_buff_owners.emplace_back(std::make_unique<CudaAllocator>(&data_mgr, device_id));
574  }
575  }
576  const auto shard_count = shardCount();
577  size_t total_num_tuples = 0;
578  for (int device_id = 0; device_id < device_count_; ++device_id) {
579  const auto fragments =
580  shard_count
581  ? only_shards_for_device(query_info.fragments, device_id, device_count_)
582  : query_info.fragments;
583  const size_t crt_num_tuples =
584  std::accumulate(fragments.begin(),
585  fragments.end(),
586  size_t(0),
587  [](const auto& sum, const auto& fragment) {
588  return sum + fragment.getNumTuples();
589  });
590  total_num_tuples += crt_num_tuples;
591  const auto columns_for_device =
592  fetchColumnsForDevice(fragments,
593  device_id,
595  ? dev_buff_owners[device_id].get()
596  : nullptr);
597  columns_per_device.push_back(columns_for_device);
598  }
599 
600  // Prepare to calculate the size of the hash table.
601  const auto composite_key_info =
603 
604  auto cache_key_contains_intermediate_table = [](const auto cache_key) {
605  for (auto key : cache_key.chunk_keys) {
606  CHECK_GE(key.size(), size_t(2));
607  if (key[1] < 0) {
608  return true;
609  }
610  }
611  return false;
612  };
613 
614  if (overlaps_threshold_override) {
615  // compute bucket sizes based on the user provided threshold
616  BucketSizeTuner tuner(/*initial_threshold=*/*overlaps_threshold_override,
617  /*step=*/1.0,
618  /*min_threshold=*/0.0,
620  columns_per_device,
622  total_num_tuples,
623  executor_);
624  const auto inverse_bucket_sizes = tuner.getInverseBucketSizes();
625 
626  auto [entry_count, emitted_keys_count] =
627  computeHashTableCounts(shard_count,
628  inverse_bucket_sizes,
629  columns_per_device,
630  overlaps_max_table_size_bytes,
631  *overlaps_threshold_override);
632  setInverseBucketSizeInfo(inverse_bucket_sizes, columns_per_device, device_count_);
633  // reifyImpl will check the hash table cache for an appropriate hash table w/ those
634  // bucket sizes (or within tolerances) if a hash table exists use it, otherwise build
635  // one
636  reifyImpl(columns_per_device,
637  query_info,
638  layout,
639  shard_count,
640  entry_count,
641  emitted_keys_count,
642  skip_hashtable_caching,
643  overlaps_max_table_size_bytes,
644  *overlaps_threshold_override);
645  } else {
646  double overlaps_bucket_threshold = std::numeric_limits<double>::max();
647  OverlapsHashTableCacheKey cache_key{
648  columns_per_device.front().join_columns.front().num_elems,
649  composite_key_info.cache_key_chunks,
650  condition_->get_optype(),
651  overlaps_max_table_size_bytes,
652  overlaps_bucket_threshold};
653  auto cached_bucket_threshold_opt = auto_tuner_cache_->get(cache_key);
654  if (cached_bucket_threshold_opt) {
655  overlaps_bucket_threshold = cached_bucket_threshold_opt->first;
656  auto inverse_bucket_sizes = cached_bucket_threshold_opt->second;
657 
658  OverlapsHashTableCacheKey hash_table_cache_key(cache_key,
659  overlaps_max_table_size_bytes,
660  overlaps_bucket_threshold,
661  inverse_bucket_sizes);
662  if (auto hash_table_cache_opt =
663  hash_table_cache_->getWithKey(hash_table_cache_key)) {
664  // if we already have a built hash table, we can skip the scans required for
665  // computing bucket size and tuple count
666  auto key = hash_table_cache_opt->first;
667  // reset as the hash table sizes can vary a bit
669  key.inverse_bucket_sizes, columns_per_device, device_count_);
670  auto hash_table = hash_table_cache_opt->second;
671  CHECK(hash_table);
672 
673  VLOG(1) << "Using cached hash table bucket size";
674 
675  reifyImpl(columns_per_device,
676  query_info,
677  layout,
678  shard_count,
679  hash_table->getEntryCount(),
680  hash_table->getEmittedKeysCount(),
681  skip_hashtable_caching,
682  overlaps_max_table_size_bytes,
683  overlaps_bucket_threshold);
684  } else {
685  VLOG(1) << "Computing bucket size for cached bucket threshold";
686  // compute bucket size using our cached tuner value
687  BucketSizeTuner tuner(/*initial_threshold=*/overlaps_bucket_threshold,
688  /*step=*/1.0,
689  /*min_threshold=*/0.0,
691  columns_per_device,
693  total_num_tuples,
694  executor_);
695 
696  const auto inverse_bucket_sizes = tuner.getInverseBucketSizes();
697 
698  auto [entry_count, emitted_keys_count] =
699  computeHashTableCounts(shard_count,
700  inverse_bucket_sizes,
701  columns_per_device,
702  overlaps_max_table_size_bytes,
703  overlaps_bucket_threshold);
704  setInverseBucketSizeInfo(inverse_bucket_sizes, columns_per_device, device_count_);
705 
706  reifyImpl(columns_per_device,
707  query_info,
708  layout,
709  shard_count,
710  entry_count,
711  emitted_keys_count,
712  skip_hashtable_caching,
713  overlaps_max_table_size_bytes,
714  overlaps_bucket_threshold);
715  }
716  } else {
717  // compute bucket size using the auto tuner
718  BucketSizeTuner tuner(
719  /*initial_threshold=*/overlaps_bucket_threshold,
720  /*step=*/2.0,
721  /*min_threshold=*/1e-7,
723  columns_per_device,
725  total_num_tuples,
726  executor_);
727 
728  VLOG(1) << "Running overlaps join size auto tune with parameters: " << tuner;
729 
730  // manages the tuning state machine
731  TuningState tuning_state(overlaps_max_table_size_bytes,
732  overlaps_target_entries_per_bin);
733  while (tuner.tuneOneStep(tuning_state.tuning_direction)) {
734  const auto inverse_bucket_sizes = tuner.getInverseBucketSizes();
735 
736  const auto [crt_entry_count, crt_emitted_keys_count] =
737  computeHashTableCounts(shard_count,
738  inverse_bucket_sizes,
739  columns_per_device,
740  tuning_state.overlaps_max_table_size_bytes,
741  tuning_state.chosen_overlaps_threshold);
742  const size_t hash_table_size = calculateHashTableSize(
743  inverse_bucket_sizes.size(), crt_emitted_keys_count, crt_entry_count);
744  HashTableProps crt_props(crt_entry_count,
745  crt_emitted_keys_count,
746  hash_table_size,
747  inverse_bucket_sizes);
748  VLOG(1) << "Tuner output: " << tuner << " with properties " << crt_props;
749 
750  const auto should_continue = tuning_state(crt_props, tuner.getMinBucketSize());
752  tuning_state.crt_props.bucket_sizes, columns_per_device, device_count_);
753  if (!should_continue) {
754  break;
755  }
756  }
757 
758  const auto& crt_props = tuning_state.crt_props;
759  // sanity check that the hash table size has not changed. this is a fairly
760  // inexpensive check to ensure the above algorithm is consistent
761  const size_t hash_table_size =
763  crt_props.emitted_keys_count,
764  crt_props.entry_count);
765  CHECK_EQ(crt_props.hash_table_size, hash_table_size);
766 
768  hash_table_size > overlaps_max_table_size_bytes) {
769  VLOG(1) << "Could not find suitable overlaps join parameters to create hash "
770  "table under max allowed size ("
771  << overlaps_max_table_size_bytes << ") bytes.";
772  throw OverlapsHashTableTooBig(overlaps_max_table_size_bytes);
773  }
774 
775  VLOG(1) << "Final tuner output: " << tuner << " with properties " << crt_props;
777  VLOG(1) << "Final bucket sizes: ";
778  for (size_t dim = 0; dim < inverse_bucket_sizes_for_dimension_.size(); dim++) {
779  VLOG(1) << "dim[" << dim
780  << "]: " << 1.0 / inverse_bucket_sizes_for_dimension_[dim];
781  }
782  CHECK_GE(tuning_state.chosen_overlaps_threshold, double(0));
783  if (!cache_key_contains_intermediate_table(cache_key)) {
784  if (skip_hashtable_caching) {
785  VLOG(1) << "Skip to add tuned parameters to auto tuner";
786  } else {
787  auto cache_value = std::make_pair(tuning_state.chosen_overlaps_threshold,
789  auto_tuner_cache_->insert(cache_key, cache_value);
790  }
791  }
792  overlaps_bucket_threshold = tuning_state.chosen_overlaps_threshold;
793  reifyImpl(columns_per_device,
794  query_info,
795  layout,
796  shard_count,
797  crt_props.entry_count,
798  crt_props.emitted_keys_count,
799  skip_hashtable_caching,
800  overlaps_max_table_size_bytes,
801  overlaps_bucket_threshold);
802  }
803  }
804 }
#define CHECK_EQ(x, y)
Definition: Logger.h:214
Fragmenter_Namespace::TableInfo info
Definition: InputMetadata.h:35
virtual void reifyImpl(std::vector< ColumnsForDevice > &columns_per_device, const Fragmenter_Namespace::TableInfo &query_info, const HashType layout, const size_t shard_count, const size_t entry_count, const size_t emitted_keys_count, const bool skip_hashtable_caching, const size_t chosen_max_hashtable_size, const double chosen_bucket_threshold)
const RegisteredQueryHint & getRegisteredQueryHint()
#define CHECK_GE(x, y)
Definition: Logger.h:219
virtual std::pair< size_t, size_t > computeHashTableCounts(const size_t shard_count, const std::vector< double > &inverse_bucket_sizes_for_dimension, std::vector< ColumnsForDevice > &columns_per_device, const size_t chosen_max_hashtable_size, const double chosen_bucket_threshold)
size_t calculateHashTableSize(size_t number_of_dimensions, size_t emitted_keys_count, size_t entry_count) const
const std::shared_ptr< Analyzer::BinOper > condition_
const std::vector< InputTableInfo > & query_infos_
std::vector< Fragmenter_Namespace::FragmentInfo > only_shards_for_device(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, const int device_count)
DEVICE auto accumulate(ARGS &&...args)
Definition: gpu_enabled.h:42
static std::unique_ptr< OverlapsHashTableCache< OverlapsHashTableCacheKey, HashTableCacheValue > > hash_table_cache_
double g_overlaps_target_entries_per_bin
Definition: Execute.cpp:99
size_t g_overlaps_max_table_size_bytes
Definition: Execute.cpp:98
virtual int getInnerTableId() const noexcept=0
const InputTableInfo & get_inner_query_info(const int inner_table_id, const std::vector< InputTableInfo > &query_infos)
static CompositeKeyInfo getCompositeKeyInfo(const std::vector< InnerOuter > &inner_outer_pairs, const Executor *executor)
Definition: HashJoin.cpp:324
std::vector< double > inverse_bucket_sizes_for_dimension_
static std::string getHashTypeString(HashType ht) noexcept
Definition: HashJoin.h:133
static std::unique_ptr< HashTableCache< OverlapsHashTableCacheKey, std::pair< BucketThreshold, BucketSizes > > > auto_tuner_cache_
void setInverseBucketSizeInfo(const std::vector< double > &inverse_bucket_sizes, std::vector< ColumnsForDevice > &columns_per_device, const size_t device_count)
ColumnsForDevice fetchColumnsForDevice(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, DeviceAllocator *dev_buff_owner)
#define CHECK(condition)
Definition: Logger.h:206
#define DEBUG_TIMER(name)
Definition: Logger.h:322
const Data_Namespace::MemoryLevel memory_level_
std::vector< InnerOuter > inner_outer_pairs_
Data_Namespace::MemoryLevel getEffectiveMemoryLevel(const std::vector< InnerOuter > &inner_outer_pairs) const
#define VLOG(n)
Definition: Logger.h:300
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
Definition: HashJoin.h:129

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void OverlapsJoinHashTable::setInverseBucketSizeInfo ( const std::vector< double > &  inverse_bucket_sizes,
std::vector< ColumnsForDevice > &  columns_per_device,
const size_t  device_count 
)
protected

Definition at line 1051 of file OverlapsJoinHashTable.cpp.

References CHECK_EQ, inner_outer_pairs_, and inverse_bucket_sizes_for_dimension_.

Referenced by reifyWithLayout().

1054  {
1055  // set global bucket size
1056  inverse_bucket_sizes_for_dimension_ = inverse_bucket_sizes;
1057 
1058  // re-compute bucket counts per device based on global bucket size
1059  CHECK_EQ(columns_per_device.size(), size_t(device_count));
1060  for (size_t device_id = 0; device_id < device_count; ++device_id) {
1061  auto& columns_for_device = columns_per_device[device_id];
1062  columns_for_device.setBucketInfo(inverse_bucket_sizes_for_dimension_,
1064  }
1065 }
#define CHECK_EQ(x, y)
Definition: Logger.h:214
std::vector< double > inverse_bucket_sizes_for_dimension_
std::vector< InnerOuter > inner_outer_pairs_

+ Here is the caller graph for this function:

size_t OverlapsJoinHashTable::shardCount ( ) const
inlineprivate

Definition at line 284 of file OverlapsJoinHashTable.h.

References condition_, executor_, BaselineJoinHashTable::getShardCountForCondition(), Data_Namespace::GPU_LEVEL, inner_outer_pairs_, and memory_level_.

Referenced by reifyWithLayout().

284  {
286  return 0;
287  }
290  }
const std::shared_ptr< Analyzer::BinOper > condition_
const Data_Namespace::MemoryLevel memory_level_
std::vector< InnerOuter > inner_outer_pairs_
static size_t getShardCountForCondition(const Analyzer::BinOper *condition, const Executor *executor, const std::vector< InnerOuter > &inner_outer_pairs)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::set< DecodedJoinHashBufferEntry > OverlapsJoinHashTable::toSet ( const ExecutorDeviceType  device_type,
const int  device_id 
) const
overrideprotectedvirtual

Implements HashJoin.

Definition at line 1660 of file OverlapsJoinHashTable.cpp.

References CHECK, copy_from_gpu(), countBufferOff(), executor_, HashJoin::getHashTableForDevice(), getHashType(), HashJoin::getJoinHashBuffer(), getKeyComponentCount(), getKeyComponentWidth(), GPU, offsetBufferOff(), OneToOne, payloadBufferOff(), and HashTable::toSet().

1662  {
1663  auto buffer = getJoinHashBuffer(device_type, device_id);
1664  auto hash_table = getHashTableForDevice(device_id);
1665  CHECK(hash_table);
1666  auto buffer_size = hash_table->getHashTableBufferSize(device_type);
1667 #ifdef HAVE_CUDA
1668  std::unique_ptr<int8_t[]> buffer_copy;
1669  if (device_type == ExecutorDeviceType::GPU) {
1670  buffer_copy = std::make_unique<int8_t[]>(buffer_size);
1671  CHECK(executor_);
1672  auto& data_mgr = executor_->getCatalog()->getDataMgr();
1673  copy_from_gpu(&data_mgr,
1674  buffer_copy.get(),
1675  reinterpret_cast<CUdeviceptr>(reinterpret_cast<int8_t*>(buffer)),
1676  buffer_size,
1677  device_id);
1678  }
1679  auto ptr1 = buffer_copy ? buffer_copy.get() : reinterpret_cast<const int8_t*>(buffer);
1680 #else
1681  auto ptr1 = reinterpret_cast<const int8_t*>(buffer);
1682 #endif // HAVE_CUDA
1683  auto ptr2 = ptr1 + offsetBufferOff();
1684  auto ptr3 = ptr1 + countBufferOff();
1685  auto ptr4 = ptr1 + payloadBufferOff();
1686  const auto layout = getHashType();
1687  return HashTable::toSet(getKeyComponentCount() + (layout == HashType::OneToOne ? 1 : 0),
1689  hash_table->getEntryCount(),
1690  ptr1,
1691  ptr2,
1692  ptr3,
1693  ptr4,
1694  buffer_size);
1695 }
int64_t getJoinHashBuffer(const ExecutorDeviceType device_type, const int device_id) const
Definition: HashJoin.h:234
unsigned long long CUdeviceptr
Definition: nocuda.h:27
HashType getHashType() const noexceptoverride
void copy_from_gpu(Data_Namespace::DataMgr *data_mgr, void *dst, const CUdeviceptr src, const size_t num_bytes, const int device_id)
size_t payloadBufferOff() const noexceptoverride
HashTable * getHashTableForDevice(const size_t device_id) const
Definition: HashJoin.h:215
size_t offsetBufferOff() const noexceptoverride
size_t countBufferOff() const noexceptoverride
#define CHECK(condition)
Definition: Logger.h:206
static DecodedJoinHashBufferSet toSet(size_t key_component_count, size_t key_component_width, size_t entry_count, const int8_t *ptr1, const int8_t *ptr2, const int8_t *ptr3, const int8_t *ptr4, size_t buffer_size)
Decode hash table into a std::set for easy inspection and validation.
Definition: HashTable.cpp:139

+ Here is the call graph for this function:

std::string OverlapsJoinHashTable::toString ( const ExecutorDeviceType  device_type,
const int  device_id = 0,
bool  raw = false 
) const
overrideprotectedvirtual

Implements HashJoin.

Definition at line 1616 of file OverlapsJoinHashTable.cpp.

References CHECK, CHECK_LT, copy_from_gpu(), countBufferOff(), executor_, getHashType(), HashJoin::getHashTypeString(), HashJoin::getJoinHashBuffer(), getKeyComponentCount(), getKeyComponentWidth(), GPU, HashJoin::hash_tables_for_device_, offsetBufferOff(), OneToOne, payloadBufferOff(), and HashTable::toString().

1618  {
1619  auto buffer = getJoinHashBuffer(device_type, device_id);
1620  CHECK_LT(device_id, hash_tables_for_device_.size());
1621  auto hash_table = hash_tables_for_device_[device_id];
1622  CHECK(hash_table);
1623  auto buffer_size = hash_table->getHashTableBufferSize(device_type);
1624 #ifdef HAVE_CUDA
1625  std::unique_ptr<int8_t[]> buffer_copy;
1626  if (device_type == ExecutorDeviceType::GPU) {
1627  buffer_copy = std::make_unique<int8_t[]>(buffer_size);
1628  CHECK(executor_);
1629  auto& data_mgr = executor_->getCatalog()->getDataMgr();
1630 
1631  copy_from_gpu(&data_mgr,
1632  buffer_copy.get(),
1633  reinterpret_cast<CUdeviceptr>(reinterpret_cast<int8_t*>(buffer)),
1634  buffer_size,
1635  device_id);
1636  }
1637  auto ptr1 = buffer_copy ? buffer_copy.get() : reinterpret_cast<const int8_t*>(buffer);
1638 #else
1639  auto ptr1 = reinterpret_cast<const int8_t*>(buffer);
1640 #endif // HAVE_CUDA
1641  auto ptr2 = ptr1 + offsetBufferOff();
1642  auto ptr3 = ptr1 + countBufferOff();
1643  auto ptr4 = ptr1 + payloadBufferOff();
1644  CHECK(hash_table);
1645  const auto layout = getHashType();
1646  return HashTable::toString(
1647  "geo",
1648  getHashTypeString(layout),
1649  getKeyComponentCount() + (layout == HashType::OneToOne ? 1 : 0),
1651  hash_table->getEntryCount(),
1652  ptr1,
1653  ptr2,
1654  ptr3,
1655  ptr4,
1656  buffer_size,
1657  raw);
1658 }
int64_t getJoinHashBuffer(const ExecutorDeviceType device_type, const int device_id) const
Definition: HashJoin.h:234
unsigned long long CUdeviceptr
Definition: nocuda.h:27
std::vector< std::shared_ptr< HashTable > > hash_tables_for_device_
Definition: HashJoin.h:270
HashType getHashType() const noexceptoverride
void copy_from_gpu(Data_Namespace::DataMgr *data_mgr, void *dst, const CUdeviceptr src, const size_t num_bytes, const int device_id)
size_t payloadBufferOff() const noexceptoverride
#define CHECK_LT(x, y)
Definition: Logger.h:216
static std::string getHashTypeString(HashType ht) noexcept
Definition: HashJoin.h:133
static std::string toString(const std::string &type, const std::string &layout_type, size_t key_component_count, size_t key_component_width, size_t entry_count, const int8_t *ptr1, const int8_t *ptr2, const int8_t *ptr3, const int8_t *ptr4, size_t buffer_size, bool raw=false)
Decode hash table into a human-readable string.
Definition: HashTable.cpp:226
size_t offsetBufferOff() const noexceptoverride
size_t countBufferOff() const noexceptoverride
#define CHECK(condition)
Definition: Logger.h:206

+ Here is the call graph for this function:

Member Data Documentation

ColumnCacheMap& OverlapsJoinHashTable::column_cache_
private

Definition at line 352 of file OverlapsJoinHashTable.h.

Referenced by fetchColumnsForDevice().

const std::shared_ptr<Analyzer::BinOper> OverlapsJoinHashTable::condition_
private
std::mutex OverlapsJoinHashTable::cpu_hash_table_buff_mutex_
private

Definition at line 362 of file OverlapsJoinHashTable.h.

Referenced by initHashTableOnCpu().

const int OverlapsJoinHashTable::device_count_
private
std::vector<double> OverlapsJoinHashTable::inverse_bucket_sizes_for_dimension_
private
const JoinType OverlapsJoinHashTable::join_type_
private

Definition at line 347 of file OverlapsJoinHashTable.h.

Referenced by initHashTableOnCpu().

std::optional<HashType> OverlapsJoinHashTable::layout_override_
private

Definition at line 360 of file OverlapsJoinHashTable.h.

Referenced by getHashType(), and initHashTableOnCpu().

RegisteredQueryHint OverlapsJoinHashTable::query_hint_
private
const std::vector<InputTableInfo>& OverlapsJoinHashTable::query_infos_
private

Definition at line 348 of file OverlapsJoinHashTable.h.

Referenced by reifyWithLayout().


The documentation for this class was generated from the following files: