OmniSciDB  c0231cc57d
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
OverlapsJoinHashTable Class Reference

#include <OverlapsJoinHashTable.h>

+ Inheritance diagram for OverlapsJoinHashTable:
+ Collaboration diagram for OverlapsJoinHashTable:

Classes

struct  AlternativeCacheKeyForOverlapsHashJoin
 

Public Member Functions

 OverlapsJoinHashTable (const std::shared_ptr< Analyzer::BinOper > condition, const JoinType join_type, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, ColumnCacheMap &column_cache, Executor *executor, const std::vector< InnerOuter > &inner_outer_pairs, const int device_count, const HashTableBuildDagMap &hashtable_build_dag_map, const TableIdToNodeMap &table_id_to_node_map)
 
virtual ~OverlapsJoinHashTable ()
 
- Public Member Functions inherited from HashJoin
virtual std::string toStringFlat64 (const ExecutorDeviceType device_type, const int device_id) const
 
virtual std::string toStringFlat32 (const ExecutorDeviceType device_type, const int device_id) const
 
JoinColumn fetchJoinColumn (const Analyzer::ColumnVar *hash_col, const std::vector< Fragmenter_Namespace::FragmentInfo > &fragment_info, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks_owner, DeviceAllocator *dev_buff_owner, std::vector< std::shared_ptr< void >> &malloc_owner, Executor *executor, ColumnCacheMap *column_cache)
 
HashTablegetHashTableForDevice (const size_t device_id) const
 
size_t getJoinHashBufferSize (const ExecutorDeviceType device_type)
 
size_t getJoinHashBufferSize (const ExecutorDeviceType device_type, const int device_id) const
 
int8_t * getJoinHashBuffer (const ExecutorDeviceType device_type, const int device_id) const
 
void freeHashBufferMemory ()
 

Static Public Member Functions

static std::shared_ptr
< OverlapsJoinHashTable
getInstance (const std::shared_ptr< Analyzer::BinOper > condition, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)
 Make hash table from an in-flight SQL query's parse tree etc. More...
 
static void invalidateCache ()
 
static void markCachedItemAsDirty (size_t table_key)
 
static HashtableRecyclergetHashTableCache ()
 
static
OverlapsTuningParamRecycler
getOverlapsTuningParamCache ()
 
- Static Public Member Functions inherited from HashJoin
static bool layoutRequiresAdditionalBuffers (HashType layout) noexcept
 
static std::string getHashTypeString (HashType ht) noexcept
 
static HashJoinMatchingSet codegenMatchingSet (const std::vector< llvm::Value * > &hash_join_idx_args_in, const bool is_sharded, const bool col_is_nullable, const bool is_bw_eq, const int64_t sub_buff_size, Executor *executor, const bool is_bucketized=false)
 
static llvm::Value * codegenHashTableLoad (const size_t table_idx, Executor *executor)
 
static std::shared_ptr< HashJoingetInstance (const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)
 Make hash table from an in-flight SQL query's parse tree etc. More...
 
static std::shared_ptr< HashJoingetSyntheticInstance (std::string_view table1, std::string_view column1, std::string_view table2, std::string_view column2, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
 Make hash table from named tables and columns (such as for testing). More...
 
static std::shared_ptr< HashJoingetSyntheticInstance (const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
 Make hash table from named tables and columns (such as for testing). More...
 
static std::pair< std::string,
std::shared_ptr< HashJoin > > 
getSyntheticInstance (std::vector< std::shared_ptr< Analyzer::BinOper >>, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
 
static int getInnerTableId (const std::vector< InnerOuter > &inner_outer_pairs)
 
static bool canAccessHashTable (bool allow_hash_table_recycling, bool invalid_cache_key, JoinType join_type)
 
static void checkHashJoinReplicationConstraint (const int table_id, const size_t shard_count, const Executor *executor)
 
static std::pair< InnerOuter,
InnerOuterStringOpInfos
normalizeColumnPair (const Analyzer::Expr *lhs, const Analyzer::Expr *rhs, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables, const bool is_overlaps_join=false)
 
template<typename T >
static const T * getHashJoinColumn (const Analyzer::Expr *expr)
 
static std::pair< std::vector
< InnerOuter >, std::vector
< InnerOuterStringOpInfos > > 
normalizeColumnPairs (const Analyzer::BinOper *condition, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables)
 
static std::vector< int > collectFragmentIds (const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments)
 
static CompositeKeyInfo getCompositeKeyInfo (const std::vector< InnerOuter > &inner_outer_pairs, const Executor *executor, const std::vector< InnerOuterStringOpInfos > &inner_outer_string_op_infos_pairs={})
 
static std::vector< const
StringDictionaryProxy::IdMap * > 
translateCompositeStrDictProxies (const CompositeKeyInfo &composite_key_info, const std::vector< InnerOuterStringOpInfos > &string_op_infos_for_keys, const Executor *executor)
 
static std::pair< const
StringDictionaryProxy
*, StringDictionaryProxy * > 
getStrDictProxies (const InnerOuter &cols, const Executor *executor, const bool has_string_ops)
 
static const
StringDictionaryProxy::IdMap
translateInnerToOuterStrDictProxies (const InnerOuter &cols, const InnerOuterStringOpInfos &inner_outer_string_op_infos, ExpressionRange &old_col_range, const Executor *executor)
 

Protected Member Functions

void reify (const HashType preferred_layout)
 
virtual void reifyWithLayout (const HashType layout)
 
virtual void reifyImpl (std::vector< ColumnsForDevice > &columns_per_device, const Fragmenter_Namespace::TableInfo &query_info, const HashType layout, const size_t shard_count, const size_t entry_count, const size_t emitted_keys_count, const bool skip_hashtable_caching, const size_t chosen_max_hashtable_size, const double chosen_bucket_threshold)
 
void reifyForDevice (const ColumnsForDevice &columns_for_device, const HashType layout, const size_t entry_count, const size_t emitted_keys_count, const bool skip_hashtable_caching, const int device_id, const logger::ThreadId parent_thread_id)
 
size_t calculateHashTableSize (size_t number_of_dimensions, size_t emitted_keys_count, size_t entry_count) const
 
ColumnsForDevice fetchColumnsForDevice (const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, DeviceAllocator *dev_buff_owner)
 
virtual std::pair< size_t, size_t > approximateTupleCount (const std::vector< double > &inverse_bucket_sizes_for_dimension, std::vector< ColumnsForDevice > &, const size_t chosen_max_hashtable_size, const double chosen_bucket_threshold)
 
virtual std::pair< size_t, size_t > computeHashTableCounts (const size_t shard_count, const std::vector< double > &inverse_bucket_sizes_for_dimension, std::vector< ColumnsForDevice > &columns_per_device, const size_t chosen_max_hashtable_size, const double chosen_bucket_threshold)
 
void setInverseBucketSizeInfo (const std::vector< double > &inverse_bucket_sizes, std::vector< ColumnsForDevice > &columns_per_device, const size_t device_count)
 
size_t getKeyComponentWidth () const
 
size_t getKeyComponentCount () const
 
HashType getHashType () const noexceptoverride
 
Data_Namespace::MemoryLevel getMemoryLevel () const noexceptoverride
 
int getDeviceCount () const noexceptoverride
 
std::shared_ptr
< BaselineHashTable
initHashTableOnCpu (const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_bucket_info, const HashType layout, const size_t entry_count, const size_t emitted_keys_count, const bool skip_hashtable_caching)
 
HashJoinMatchingSet codegenMatchingSet (const CompilationOptions &, const size_t) override
 
std::string toString (const ExecutorDeviceType device_type, const int device_id=0, bool raw=false) const override
 
DecodedJoinHashBufferSet toSet (const ExecutorDeviceType device_type, const int device_id) const override
 
llvm::Value * codegenSlot (const CompilationOptions &, const size_t) override
 
const RegisteredQueryHintgetRegisteredQueryHint ()
 
void registerQueryHint (const RegisteredQueryHint &query_hint)
 
size_t getEntryCount () const
 
size_t getEmittedKeysCount () const
 
size_t getComponentBufferSize () const noexceptoverride
 
size_t shardCount () const
 
Data_Namespace::MemoryLevel getEffectiveMemoryLevel (const std::vector< InnerOuter > &inner_outer_pairs) const
 
int getInnerTableId () const noexceptoverride
 
int getInnerTableRteIdx () const noexceptoverride
 
size_t getKeyBufferSize () const noexcept
 
size_t offsetBufferOff () const noexceptoverride
 
size_t countBufferOff () const noexceptoverride
 
size_t payloadBufferOff () const noexceptoverride
 
std::string getHashJoinType () const final
 
bool isBitwiseEq () const override
 
std::shared_ptr< HashTableinitHashTableOnCpuFromCache (QueryPlanHash key, CacheItemType item_type, DeviceIdentifier device_identifier)
 
std::optional< std::pair
< size_t, size_t > > 
getApproximateTupleCountFromCache (QueryPlanHash key, CacheItemType item_type, DeviceIdentifier device_identifier)
 
void putHashTableOnCpuToCache (QueryPlanHash key, CacheItemType item_type, std::shared_ptr< HashTable > hashtable_ptr, DeviceIdentifier device_identifier, size_t hashtable_building_time)
 
llvm::Value * codegenKey (const CompilationOptions &)
 
std::vector< llvm::Value * > codegenManyKey (const CompilationOptions &)
 
std::optional
< OverlapsHashTableMetaInfo
getOverlapsHashTableMetaInfo ()
 
QueryPlanHash getAlternativeCacheKey (AlternativeCacheKeyForOverlapsHashJoin &info)
 
void generateCacheKey (const size_t max_hashtable_size, const double bucket_threshold, const std::vector< double > &bucket_sizes, std::vector< std::vector< Fragmenter_Namespace::FragmentInfo >> &fragments_per_device, int device_count)
 
QueryPlanHash getCacheKey (int device_id) const
 
const std::vector< InnerOuter > & getInnerOuterPairs () const
 
void setOverlapsHashtableMetaInfo (size_t max_table_size_bytes, double bucket_threshold, std::vector< double > &bucket_sizes)
 

Protected Attributes

const std::shared_ptr
< Analyzer::BinOper
condition_
 
const JoinType join_type_
 
const std::vector
< InputTableInfo > & 
query_infos_
 
const Data_Namespace::MemoryLevel memory_level_
 
Executorexecutor_
 
ColumnCacheMapcolumn_cache_
 
std::vector< InnerOuterinner_outer_pairs_
 
const int device_count_
 
std::vector< double > inverse_bucket_sizes_for_dimension_
 
double chosen_overlaps_bucket_threshold_
 
size_t chosen_overlaps_max_table_size_bytes_
 
CompositeKeyInfo composite_key_info_
 
std::optional< HashTypelayout_override_
 
std::mutex cpu_hash_table_buff_mutex_
 
RegisteredQueryHint query_hint_
 
HashTableBuildDagMap hashtable_build_dag_map_
 
QueryPlanDAG query_plan_dag_
 
std::vector< QueryPlanHashhashtable_cache_key_
 
HashtableCacheMetaInfo hashtable_cache_meta_info_
 
std::unordered_set< size_t > table_keys_
 
const TableIdToNodeMap table_id_to_node_map_
 
- Protected Attributes inherited from HashJoin
std::vector< std::shared_ptr
< HashTable > > 
hash_tables_for_device_
 

Static Protected Attributes

static std::unique_ptr
< HashtableRecycler
hash_table_cache_
 
static std::unique_ptr
< OverlapsTuningParamRecycler
auto_tuner_cache_
 

Additional Inherited Members

- Static Protected Member Functions inherited from HashJoin
static llvm::Value * codegenColOrStringOper (const Analyzer::Expr *col_or_string_oper, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos, CodeGenerator &code_generator, const CompilationOptions &co)
 

Detailed Description

Definition at line 24 of file OverlapsJoinHashTable.h.

Constructor & Destructor Documentation

OverlapsJoinHashTable::OverlapsJoinHashTable ( const std::shared_ptr< Analyzer::BinOper condition,
const JoinType  join_type,
const std::vector< InputTableInfo > &  query_infos,
const Data_Namespace::MemoryLevel  memory_level,
ColumnCacheMap column_cache,
Executor executor,
const std::vector< InnerOuter > &  inner_outer_pairs,
const int  device_count,
const HashTableBuildDagMap hashtable_build_dag_map,
const TableIdToNodeMap table_id_to_node_map 
)
inline

Definition at line 26 of file OverlapsJoinHashTable.h.

References CHECK_GT, RegisteredQueryHint::defaults(), device_count_, HashJoin::hash_tables_for_device_, and query_hint_.

36  : condition_(condition)
37  , join_type_(join_type)
38  , query_infos_(query_infos)
39  , memory_level_(memory_level)
40  , executor_(executor)
41  , column_cache_(column_cache)
42  , inner_outer_pairs_(inner_outer_pairs)
43  , device_count_(device_count)
44  , hashtable_build_dag_map_(hashtable_build_dag_map)
45  , table_id_to_node_map_(table_id_to_node_map) {
47  hash_tables_for_device_.resize(std::max(device_count_, 1));
49  }
const TableIdToNodeMap table_id_to_node_map_
std::vector< std::shared_ptr< HashTable > > hash_tables_for_device_
Definition: HashJoin.h:351
RegisteredQueryHint query_hint_
#define CHECK_GT(x, y)
Definition: Logger.h:234
const std::shared_ptr< Analyzer::BinOper > condition_
ColumnCacheMap & column_cache_
const std::vector< InputTableInfo > & query_infos_
HashTableBuildDagMap hashtable_build_dag_map_
static RegisteredQueryHint defaults()
Definition: QueryHint.h:247
const Data_Namespace::MemoryLevel memory_level_
std::vector< InnerOuter > inner_outer_pairs_

+ Here is the call graph for this function:

virtual OverlapsJoinHashTable::~OverlapsJoinHashTable ( )
inlinevirtual

Definition at line 51 of file OverlapsJoinHashTable.h.

51 {}

Member Function Documentation

std::pair< size_t, size_t > OverlapsJoinHashTable::approximateTupleCount ( const std::vector< double > &  inverse_bucket_sizes_for_dimension,
std::vector< ColumnsForDevice > &  columns_per_device,
const size_t  chosen_max_hashtable_size,
const double  chosen_bucket_threshold 
)
protectedvirtual

Reimplemented in RangeJoinHashTable.

Definition at line 982 of file OverlapsJoinHashTable.cpp.

References gpu_enabled::accumulate(), approximate_distinct_tuples_on_device_overlaps(), approximate_distinct_tuples_overlaps(), threading_serial::async(), Bitmap, CHECK, CHECK_EQ, CHECK_GT, CPU, DataRecyclerUtil::CPU_DEVICE_IDENTIFIER, Data_Namespace::CPU_LEVEL, cpu_threads(), device_count_, executor_, getApproximateTupleCountFromCache(), getEffectiveMemoryLevel(), getQueryEngineCudaStreamForDevice(), GPU, Data_Namespace::GPU_LEVEL, hashtable_cache_key_, hll_size(), hll_unify(), inner_outer_pairs_, OVERLAPS_HT, transfer_flat_object_to_gpu(), transfer_vector_of_flat_objects_to_gpu(), UNREACHABLE, and VLOG.

Referenced by computeHashTableCounts().

986  {
987  const auto effective_memory_level = getEffectiveMemoryLevel(inner_outer_pairs_);
988  CountDistinctDescriptor count_distinct_desc{
990  0,
991  11,
992  true,
993  effective_memory_level == Data_Namespace::MemoryLevel::GPU_LEVEL
996  1};
997  const auto padded_size_bytes = count_distinct_desc.bitmapPaddedSizeBytes();
998 
999  CHECK(!columns_per_device.empty() && !columns_per_device.front().join_columns.empty());
1000  if (columns_per_device.front().join_columns.front().num_elems == 0) {
1001  return std::make_pair(0, 0);
1002  }
1003 
1004  // TODO: state management in here should be revisited, but this should be safe enough
1005  // for now
1006  // re-compute bucket counts per device based on global bucket size
1007  for (size_t device_id = 0; device_id < columns_per_device.size(); ++device_id) {
1008  auto& columns_for_device = columns_per_device[device_id];
1009  columns_for_device.setBucketInfo(inverse_bucket_sizes_for_dimension,
1011  }
1012 
1013  // Number of keys must match dimension of buckets
1014  CHECK_EQ(columns_per_device.front().join_columns.size(),
1015  columns_per_device.front().join_buckets.size());
1016  if (effective_memory_level == Data_Namespace::MemoryLevel::CPU_LEVEL) {
1017  // Note that this path assumes each device has the same hash table (for GPU hash
1018  // join w/ hash table built on CPU)
1019  const auto cached_count_info =
1023  if (cached_count_info) {
1024  VLOG(1) << "Using a cached tuple count: " << cached_count_info->first
1025  << ", emitted keys count: " << cached_count_info->second;
1026  return *cached_count_info;
1027  }
1028  int thread_count = cpu_threads();
1029  std::vector<uint8_t> hll_buffer_all_cpus(thread_count * padded_size_bytes);
1030  auto hll_result = &hll_buffer_all_cpus[0];
1031 
1032  std::vector<int32_t> num_keys_for_row;
1033  // TODO(adb): support multi-column overlaps join
1034  num_keys_for_row.resize(columns_per_device.front().join_columns[0].num_elems);
1035 
1037  num_keys_for_row,
1038  count_distinct_desc.bitmap_sz_bits,
1039  padded_size_bytes,
1040  columns_per_device.front().join_columns,
1041  columns_per_device.front().join_column_types,
1042  columns_per_device.front().join_buckets,
1043  thread_count);
1044  for (int i = 1; i < thread_count; ++i) {
1045  hll_unify(hll_result,
1046  hll_result + i * padded_size_bytes,
1047  1 << count_distinct_desc.bitmap_sz_bits);
1048  }
1049  return std::make_pair(
1050  hll_size(hll_result, count_distinct_desc.bitmap_sz_bits),
1051  static_cast<size_t>(num_keys_for_row.size() > 0 ? num_keys_for_row.back() : 0));
1052  }
1053 #ifdef HAVE_CUDA
1054  auto data_mgr = executor_->getDataMgr();
1055  std::vector<std::vector<uint8_t>> host_hll_buffers(device_count_);
1056  for (auto& host_hll_buffer : host_hll_buffers) {
1057  host_hll_buffer.resize(count_distinct_desc.bitmapPaddedSizeBytes());
1058  }
1059  std::vector<size_t> emitted_keys_count_device_threads(device_count_, 0);
1060  std::vector<std::future<void>> approximate_distinct_device_threads;
1061  for (int device_id = 0; device_id < device_count_; ++device_id) {
1062  approximate_distinct_device_threads.emplace_back(std::async(
1064  [device_id,
1065  &columns_per_device,
1066  &count_distinct_desc,
1067  data_mgr,
1068  &host_hll_buffers,
1069  &emitted_keys_count_device_threads] {
1070  auto allocator = std::make_unique<CudaAllocator>(
1071  data_mgr, device_id, getQueryEngineCudaStreamForDevice(device_id));
1072  auto device_hll_buffer =
1073  allocator->alloc(count_distinct_desc.bitmapPaddedSizeBytes());
1074  data_mgr->getCudaMgr()->zeroDeviceMem(
1075  device_hll_buffer,
1076  count_distinct_desc.bitmapPaddedSizeBytes(),
1077  device_id,
1079  const auto& columns_for_device = columns_per_device[device_id];
1080  auto join_columns_gpu = transfer_vector_of_flat_objects_to_gpu(
1081  columns_for_device.join_columns, *allocator);
1082 
1083  CHECK_GT(columns_for_device.join_buckets.size(), 0u);
1084  const auto& inverse_bucket_sizes_for_dimension =
1085  columns_for_device.join_buckets[0].inverse_bucket_sizes_for_dimension;
1086  auto inverse_bucket_sizes_gpu = allocator->alloc(
1087  inverse_bucket_sizes_for_dimension.size() * sizeof(double));
1088  allocator->copyToDevice(
1089  inverse_bucket_sizes_gpu,
1090  inverse_bucket_sizes_for_dimension.data(),
1091  inverse_bucket_sizes_for_dimension.size() * sizeof(double));
1092  const size_t row_counts_buffer_sz =
1093  columns_per_device.front().join_columns[0].num_elems * sizeof(int32_t);
1094  auto row_counts_buffer = allocator->alloc(row_counts_buffer_sz);
1095  data_mgr->getCudaMgr()->zeroDeviceMem(
1096  row_counts_buffer,
1097  row_counts_buffer_sz,
1098  device_id,
1100  const auto key_handler =
1101  OverlapsKeyHandler(inverse_bucket_sizes_for_dimension.size(),
1102  join_columns_gpu,
1103  reinterpret_cast<double*>(inverse_bucket_sizes_gpu));
1104  const auto key_handler_gpu =
1105  transfer_flat_object_to_gpu(key_handler, *allocator);
1107  reinterpret_cast<uint8_t*>(device_hll_buffer),
1108  count_distinct_desc.bitmap_sz_bits,
1109  reinterpret_cast<int32_t*>(row_counts_buffer),
1110  key_handler_gpu,
1111  columns_for_device.join_columns[0].num_elems);
1112 
1113  auto& host_emitted_keys_count = emitted_keys_count_device_threads[device_id];
1114  allocator->copyFromDevice(
1115  &host_emitted_keys_count,
1116  row_counts_buffer +
1117  (columns_per_device.front().join_columns[0].num_elems - 1) *
1118  sizeof(int32_t),
1119  sizeof(int32_t));
1120 
1121  auto& host_hll_buffer = host_hll_buffers[device_id];
1122  allocator->copyFromDevice(&host_hll_buffer[0],
1123  device_hll_buffer,
1124  count_distinct_desc.bitmapPaddedSizeBytes());
1125  }));
1126  }
1127  for (auto& child : approximate_distinct_device_threads) {
1128  child.get();
1129  }
1130  CHECK_EQ(Data_Namespace::MemoryLevel::GPU_LEVEL, effective_memory_level);
1131  auto& result_hll_buffer = host_hll_buffers.front();
1132  auto hll_result = reinterpret_cast<int32_t*>(&result_hll_buffer[0]);
1133  for (int device_id = 1; device_id < device_count_; ++device_id) {
1134  auto& host_hll_buffer = host_hll_buffers[device_id];
1135  hll_unify(hll_result,
1136  reinterpret_cast<int32_t*>(&host_hll_buffer[0]),
1137  1 << count_distinct_desc.bitmap_sz_bits);
1138  }
1139  const size_t emitted_keys_count =
1140  std::accumulate(emitted_keys_count_device_threads.begin(),
1141  emitted_keys_count_device_threads.end(),
1142  0);
1143  return std::make_pair(hll_size(hll_result, count_distinct_desc.bitmap_sz_bits),
1144  emitted_keys_count);
1145 #else
1146  UNREACHABLE();
1147  return {0, 0};
1148 #endif // HAVE_CUDA
1149 }
#define CHECK_EQ(x, y)
Definition: Logger.h:230
T * transfer_flat_object_to_gpu(const T &object, DeviceAllocator &allocator)
void hll_unify(T1 *lhs, T2 *rhs, const size_t m)
Definition: HyperLogLog.h:107
#define UNREACHABLE()
Definition: Logger.h:266
std::optional< std::pair< size_t, size_t > > getApproximateTupleCountFromCache(QueryPlanHash key, CacheItemType item_type, DeviceIdentifier device_identifier)
size_t hll_size(const T *M, const size_t bitmap_sz_bits)
Definition: HyperLogLog.h:88
#define CHECK_GT(x, y)
Definition: Logger.h:234
future< Result > async(Fn &&fn, Args &&...args)
DEVICE auto accumulate(ARGS &&...args)
Definition: gpu_enabled.h:42
std::vector< QueryPlanHash > hashtable_cache_key_
void approximate_distinct_tuples_overlaps(uint8_t *hll_buffer_all_cpus, std::vector< int32_t > &row_counts, const uint32_t b, const size_t padded_size_bytes, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_buckets_per_key, const int thread_count)
CUstream getQueryEngineCudaStreamForDevice(int device_num)
Definition: QueryEngine.cpp:7
#define CHECK(condition)
Definition: Logger.h:222
std::vector< InnerOuter > inner_outer_pairs_
T * transfer_vector_of_flat_objects_to_gpu(const std::vector< T > &vec, DeviceAllocator &allocator)
Data_Namespace::MemoryLevel getEffectiveMemoryLevel(const std::vector< InnerOuter > &inner_outer_pairs) const
static constexpr DeviceIdentifier CPU_DEVICE_IDENTIFIER
Definition: DataRecycler.h:136
int cpu_threads()
Definition: thread_count.h:25
void approximate_distinct_tuples_on_device_overlaps(uint8_t *hll_buffer, const uint32_t b, int32_t *row_counts_buffer, const OverlapsKeyHandler *key_handler, const int64_t num_elems)
#define VLOG(n)
Definition: Logger.h:316

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t OverlapsJoinHashTable::calculateHashTableSize ( size_t  number_of_dimensions,
size_t  emitted_keys_count,
size_t  entry_count 
) const
protected

Definition at line 910 of file OverlapsJoinHashTable.cpp.

References getKeyComponentWidth().

Referenced by RangeJoinHashTable::reifyWithLayout(), and reifyWithLayout().

912  {
913  const auto key_component_width = getKeyComponentWidth();
914  const auto key_component_count = number_of_dimensions;
915  const auto entry_size = key_component_count * key_component_width;
916  const auto keys_for_all_rows = emitted_keys_count;
917  const size_t one_to_many_hash_entries = 2 * entry_count + keys_for_all_rows;
918  const size_t hash_table_size =
919  entry_size * entry_count + one_to_many_hash_entries * sizeof(int32_t);
920  return hash_table_size;
921 }

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * OverlapsJoinHashTable::codegenKey ( const CompilationOptions co)
protected

Definition at line 1465 of file OverlapsJoinHashTable.cpp.

References AUTOMATIC_IR_METADATA, CodeGenerator::castArrayPointer(), CHECK, CHECK_EQ, CodeGenerator::codegen(), executor_, getKeyComponentCount(), getKeyComponentWidth(), inner_outer_pairs_, inverse_bucket_sizes_for_dimension_, kPOINT, kTINYINT, LL_BUILDER, LL_CONTEXT, LL_INT, CodeGenerator::posArg(), and UNREACHABLE.

Referenced by codegenMatchingSet().

1465  {
1466  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1467  const auto key_component_width = getKeyComponentWidth();
1468  CHECK(key_component_width == 4 || key_component_width == 8);
1469  const auto key_size_lv = LL_INT(getKeyComponentCount() * key_component_width);
1470  llvm::Value* key_buff_lv{nullptr};
1471  switch (key_component_width) {
1472  case 4:
1473  key_buff_lv =
1474  LL_BUILDER.CreateAlloca(llvm::Type::getInt32Ty(LL_CONTEXT), key_size_lv);
1475  break;
1476  case 8:
1477  key_buff_lv =
1478  LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT), key_size_lv);
1479  break;
1480  default:
1481  CHECK(false);
1482  }
1483 
1484  const auto& inner_outer_pair = inner_outer_pairs_[0];
1485  const auto outer_geo = inner_outer_pair.second;
1486  const auto outer_geo_ti = outer_geo->get_type_info();
1487 
1488  llvm::Value* arr_ptr = nullptr;
1489  CodeGenerator code_generator(executor_);
1490  CHECK_EQ(inverse_bucket_sizes_for_dimension_.size(), static_cast<size_t>(2));
1491 
1492  if (outer_geo_ti.is_geometry()) {
1493  // TODO(adb): for points we will use the coords array, but for other geometries we
1494  // will need to use the bounding box. For now only support points.
1495  CHECK_EQ(outer_geo_ti.get_type(), kPOINT);
1496 
1497  if (const auto outer_geo_col = dynamic_cast<const Analyzer::ColumnVar*>(outer_geo)) {
1498  const auto outer_geo_col_lvs = code_generator.codegen(outer_geo_col, true, co);
1499  CHECK_EQ(outer_geo_col_lvs.size(), size_t(1));
1500  const auto coords_cd = executor_->getCatalog()->getMetadataForColumn(
1501  outer_geo_col->get_table_id(), outer_geo_col->get_column_id() + 1);
1502  CHECK(coords_cd);
1503 
1504  const auto array_ptr = executor_->cgen_state_->emitExternalCall(
1505  "array_buff",
1506  llvm::Type::getInt8PtrTy(executor_->cgen_state_->context_),
1507  {outer_geo_col_lvs.front(), code_generator.posArg(outer_geo_col)});
1508  CHECK(coords_cd->columnType.get_elem_type().get_type() == kTINYINT)
1509  << "Only TINYINT coordinates columns are supported in geo overlaps hash "
1510  "join.";
1511  arr_ptr = code_generator.castArrayPointer(array_ptr,
1512  coords_cd->columnType.get_elem_type());
1513  } else if (const auto outer_geo_function_operator =
1514  dynamic_cast<const Analyzer::GeoOperator*>(outer_geo)) {
1515  // Process points dynamically constructed by geo function operators
1516  const auto outer_geo_function_operator_lvs =
1517  code_generator.codegen(outer_geo_function_operator, true, co);
1518  CHECK_EQ(outer_geo_function_operator_lvs.size(), size_t(2));
1519  arr_ptr = outer_geo_function_operator_lvs.front();
1520  } else if (const auto outer_geo_expr =
1521  dynamic_cast<const Analyzer::GeoExpr*>(outer_geo)) {
1522  UNREACHABLE() << outer_geo_expr->toString();
1523  }
1524  } else if (outer_geo_ti.is_fixlen_array()) {
1525  // Process dynamically constructed points
1526  const auto outer_geo_cast_coord_array =
1527  dynamic_cast<const Analyzer::UOper*>(outer_geo);
1528  CHECK_EQ(outer_geo_cast_coord_array->get_optype(), kCAST);
1529  const auto outer_geo_coord_array = dynamic_cast<const Analyzer::ArrayExpr*>(
1530  outer_geo_cast_coord_array->get_operand());
1531  CHECK(outer_geo_coord_array);
1532  CHECK(outer_geo_coord_array->isLocalAlloc());
1533  CHECK_EQ(outer_geo_coord_array->getElementCount(), 2);
1534  auto elem_size = (outer_geo_ti.get_compression() == kENCODING_GEOINT)
1535  ? sizeof(int32_t)
1536  : sizeof(double);
1537  CHECK_EQ(outer_geo_ti.get_size(), int(2 * elem_size));
1538  const auto outer_geo_constructed_lvs = code_generator.codegen(outer_geo, true, co);
1539  // CHECK_EQ(outer_geo_constructed_lvs.size(), size_t(2)); // Pointer and size
1540  const auto array_ptr = outer_geo_constructed_lvs.front(); // Just need the pointer
1541  arr_ptr = LL_BUILDER.CreateGEP(
1542  array_ptr->getType()->getScalarType()->getPointerElementType(),
1543  array_ptr,
1544  LL_INT(0));
1545  arr_ptr = code_generator.castArrayPointer(array_ptr, SQLTypeInfo(kTINYINT, true));
1546  }
1547  if (!arr_ptr) {
1548  LOG(FATAL) << "Overlaps key currently only supported for geospatial columns and "
1549  "constructed points.";
1550  }
1551 
1552  for (size_t i = 0; i < 2; i++) {
1553  const auto key_comp_dest_lv = LL_BUILDER.CreateGEP(
1554  key_buff_lv->getType()->getScalarType()->getPointerElementType(),
1555  key_buff_lv,
1556  LL_INT(i));
1557 
1558  // Note that get_bucket_key_for_range_compressed will need to be specialized for
1559  // future compression schemes
1560  auto bucket_key =
1561  outer_geo_ti.get_compression() == kENCODING_GEOINT
1562  ? executor_->cgen_state_->emitExternalCall(
1563  "get_bucket_key_for_range_compressed",
1564  get_int_type(64, LL_CONTEXT),
1566  : executor_->cgen_state_->emitExternalCall(
1567  "get_bucket_key_for_range_double",
1568  get_int_type(64, LL_CONTEXT),
1569  {arr_ptr, LL_INT(i), LL_FP(inverse_bucket_sizes_for_dimension_[i])});
1570  const auto col_lv = LL_BUILDER.CreateSExt(
1571  bucket_key, get_int_type(key_component_width * 8, LL_CONTEXT));
1572  LL_BUILDER.CreateStore(col_lv, key_comp_dest_lv);
1573  }
1574  return key_buff_lv;
1575 }
#define CHECK_EQ(x, y)
Definition: Logger.h:230
#define LOG(tag)
Definition: Logger.h:216
#define LL_FP(v)
#define UNREACHABLE()
Definition: Logger.h:266
Definition: sqldefs.h:48
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define LL_BUILDER
#define AUTOMATIC_IR_METADATA(CGENSTATE)
std::vector< double > inverse_bucket_sizes_for_dimension_
#define LL_INT(v)
#define CHECK(condition)
Definition: Logger.h:222
std::vector< InnerOuter > inner_outer_pairs_
#define LL_CONTEXT

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector< llvm::Value * > OverlapsJoinHashTable::codegenManyKey ( const CompilationOptions co)
protected

Definition at line 1577 of file OverlapsJoinHashTable.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CHECK_EQ, CodeGenerator::codegen(), executor_, HashJoin::getHashTableForDevice(), getHashType(), getKeyComponentWidth(), inner_outer_pairs_, ManyToMany, CodeGenerator::posArg(), and VLOG.

Referenced by codegenMatchingSet().

1578  {
1579  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1580  const auto key_component_width = getKeyComponentWidth();
1581  CHECK(key_component_width == 4 || key_component_width == 8);
1582  auto hash_table = getHashTableForDevice(size_t(0));
1583  CHECK(hash_table);
1585 
1586  VLOG(1) << "Performing codgen for ManyToMany";
1587  const auto& inner_outer_pair = inner_outer_pairs_[0];
1588  const auto outer_col = inner_outer_pair.second;
1589 
1590  CodeGenerator code_generator(executor_);
1591  const auto col_lvs = code_generator.codegen(outer_col, true, co);
1592  CHECK_EQ(col_lvs.size(), size_t(1));
1593 
1594  const auto outer_col_var = dynamic_cast<const Analyzer::ColumnVar*>(outer_col);
1595  CHECK(outer_col_var);
1596  const auto coords_cd = executor_->getCatalog()->getMetadataForColumn(
1597  outer_col_var->get_table_id(), outer_col_var->get_column_id());
1598  CHECK(coords_cd);
1599 
1600  const auto array_ptr = executor_->cgen_state_->emitExternalCall(
1601  "array_buff",
1602  llvm::Type::getInt8PtrTy(executor_->cgen_state_->context_),
1603  {col_lvs.front(), code_generator.posArg(outer_col)});
1604 
1605  // TODO(jclay): this seems to cast to double, and causes the GPU build to fail.
1606  // const auto arr_ptr =
1607  // code_generator.castArrayPointer(array_ptr,
1608  // coords_cd->columnType.get_elem_type());
1609  array_ptr->setName("array_ptr");
1610 
1611  auto num_keys_lv = executor_->cgen_state_->emitExternalCall(
1612  "get_num_buckets_for_bounds",
1613  get_int_type(32, LL_CONTEXT),
1614  {array_ptr,
1615  LL_INT(0),
1618  num_keys_lv->setName("num_keys_lv");
1619 
1620  return {num_keys_lv, array_ptr};
1621 }
#define CHECK_EQ(x, y)
Definition: Logger.h:230
#define LL_FP(v)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
HashType getHashType() const noexceptoverride
#define AUTOMATIC_IR_METADATA(CGENSTATE)
HashTable * getHashTableForDevice(const size_t device_id) const
Definition: HashJoin.h:269
std::vector< double > inverse_bucket_sizes_for_dimension_
#define LL_INT(v)
#define CHECK(condition)
Definition: Logger.h:222
std::vector< InnerOuter > inner_outer_pairs_
#define LL_CONTEXT
#define VLOG(n)
Definition: Logger.h:316

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

HashJoinMatchingSet OverlapsJoinHashTable::codegenMatchingSet ( const CompilationOptions co,
const size_t  index 
)
overrideprotectedvirtual

Implements HashJoin.

Definition at line 1623 of file OverlapsJoinHashTable.cpp.

References AUTOMATIC_IR_METADATA, CHECK, HashJoin::codegenHashTableLoad(), codegenKey(), codegenManyKey(), HashJoin::codegenMatchingSet(), executor_, get_int_array_type(), get_int_type(), getComponentBufferSize(), getEntryCount(), getHashType(), getKeyComponentCount(), getKeyComponentWidth(), inverse_bucket_sizes_for_dimension_, LL_BUILDER, LL_CONTEXT, LL_FP, LL_INT, ManyToMany, offsetBufferOff(), OneToMany, to_string(), UNREACHABLE, and VLOG.

1625  {
1626  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1627  if (getHashType() == HashType::ManyToMany) {
1628  VLOG(1) << "Building codegenMatchingSet for ManyToMany";
1629  const auto key_component_width = getKeyComponentWidth();
1630  CHECK(key_component_width == 4 || key_component_width == 8);
1631  auto many_to_many_args = codegenManyKey(co);
1632  auto hash_ptr = HashJoin::codegenHashTableLoad(index, executor_);
1633  const auto composite_dict_ptr_type =
1634  llvm::Type::getIntNPtrTy(LL_CONTEXT, key_component_width * 8);
1635  const auto composite_key_dict =
1636  hash_ptr->getType()->isPointerTy()
1637  ? LL_BUILDER.CreatePointerCast(hash_ptr, composite_dict_ptr_type)
1638  : LL_BUILDER.CreateIntToPtr(hash_ptr, composite_dict_ptr_type);
1639  const auto key_component_count = getKeyComponentCount();
1640 
1641  auto one_to_many_ptr = hash_ptr;
1642 
1643  if (one_to_many_ptr->getType()->isPointerTy()) {
1644  one_to_many_ptr =
1645  LL_BUILDER.CreatePtrToInt(hash_ptr, llvm::Type::getInt64Ty(LL_CONTEXT));
1646  } else {
1647  CHECK(one_to_many_ptr->getType()->isIntegerTy(64));
1648  }
1649 
1650  const auto composite_key_dict_size = offsetBufferOff();
1651  one_to_many_ptr =
1652  LL_BUILDER.CreateAdd(one_to_many_ptr, LL_INT(composite_key_dict_size));
1653 
1654  // NOTE(jclay): A fixed array of size 200 is allocated on the stack.
1655  // this is likely the maximum value we can do that is safe to use across
1656  // all supported GPU architectures.
1657  const int max_array_size = 200;
1658  const auto arr_type = get_int_array_type(32, max_array_size, LL_CONTEXT);
1659  const auto out_arr_lv = LL_BUILDER.CreateAlloca(arr_type);
1660  out_arr_lv->setName("out_arr");
1661 
1662  const auto casted_out_arr_lv =
1663  LL_BUILDER.CreatePointerCast(out_arr_lv, arr_type->getPointerTo());
1664 
1665  const auto element_ptr = LL_BUILDER.CreateGEP(arr_type, casted_out_arr_lv, LL_INT(0));
1666 
1667  auto rowid_ptr_i32 =
1668  LL_BUILDER.CreatePointerCast(element_ptr, llvm::Type::getInt32PtrTy(LL_CONTEXT));
1669 
1670  const auto candidate_count_lv = executor_->cgen_state_->emitExternalCall(
1671  "get_candidate_rows",
1672  llvm::Type::getInt64Ty(LL_CONTEXT),
1673  {
1674  rowid_ptr_i32,
1675  LL_INT(max_array_size),
1676  many_to_many_args[1],
1677  LL_INT(0),
1680  many_to_many_args[0],
1681  LL_INT(key_component_count), // key_component_count
1682  composite_key_dict, // ptr to hash table
1683  LL_INT(getEntryCount()), // entry_count
1684  LL_INT(composite_key_dict_size), // offset_buffer_ptr_offset
1685  LL_INT(getEntryCount() * sizeof(int32_t)) // sub_buff_size
1686  });
1687 
1688  const auto slot_lv = LL_INT(int64_t(0));
1689 
1690  return {rowid_ptr_i32, candidate_count_lv, slot_lv};
1691  } else {
1692  VLOG(1) << "Building codegenMatchingSet for Baseline";
1693  // TODO: duplicated w/ BaselineJoinHashTable -- push into the hash table builder?
1694  const auto key_component_width = getKeyComponentWidth();
1695  CHECK(key_component_width == 4 || key_component_width == 8);
1696  auto key_buff_lv = codegenKey(co);
1698  auto hash_ptr = HashJoin::codegenHashTableLoad(index, executor_);
1699  const auto composite_dict_ptr_type =
1700  llvm::Type::getIntNPtrTy(LL_CONTEXT, key_component_width * 8);
1701  const auto composite_key_dict =
1702  hash_ptr->getType()->isPointerTy()
1703  ? LL_BUILDER.CreatePointerCast(hash_ptr, composite_dict_ptr_type)
1704  : LL_BUILDER.CreateIntToPtr(hash_ptr, composite_dict_ptr_type);
1705  const auto key_component_count = getKeyComponentCount();
1706  const auto key = executor_->cgen_state_->emitExternalCall(
1707  "get_composite_key_index_" + std::to_string(key_component_width * 8),
1708  get_int_type(64, LL_CONTEXT),
1709  {key_buff_lv,
1710  LL_INT(key_component_count),
1711  composite_key_dict,
1712  LL_INT(getEntryCount())});
1713  auto one_to_many_ptr = hash_ptr;
1714  if (one_to_many_ptr->getType()->isPointerTy()) {
1715  one_to_many_ptr =
1716  LL_BUILDER.CreatePtrToInt(hash_ptr, llvm::Type::getInt64Ty(LL_CONTEXT));
1717  } else {
1718  CHECK(one_to_many_ptr->getType()->isIntegerTy(64));
1719  }
1720  const auto composite_key_dict_size = offsetBufferOff();
1721  one_to_many_ptr =
1722  LL_BUILDER.CreateAdd(one_to_many_ptr, LL_INT(composite_key_dict_size));
1724  std::vector<llvm::Value*>{
1725  one_to_many_ptr, key, LL_INT(int64_t(0)), LL_INT(getEntryCount() - 1)},
1726  false,
1727  false,
1728  false,
1730  executor_);
1731  }
1732  UNREACHABLE();
1733  return HashJoinMatchingSet{};
1734 }
llvm::Value * codegenKey(const CompilationOptions &)
virtual HashJoinMatchingSet codegenMatchingSet(const CompilationOptions &, const size_t)=0
static llvm::Value * codegenHashTableLoad(const size_t table_idx, Executor *executor)
Definition: HashJoin.cpp:257
#define LL_FP(v)
#define UNREACHABLE()
Definition: Logger.h:266
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
HashType getHashType() const noexceptoverride
std::string to_string(char const *&&v)
#define LL_BUILDER
std::vector< llvm::Value * > codegenManyKey(const CompilationOptions &)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
std::vector< double > inverse_bucket_sizes_for_dimension_
#define LL_INT(v)
size_t offsetBufferOff() const noexceptoverride
#define CHECK(condition)
Definition: Logger.h:222
size_t getComponentBufferSize() const noexceptoverride
#define LL_CONTEXT
llvm::ArrayType * get_int_array_type(int const width, int count, llvm::LLVMContext &context)
#define VLOG(n)
Definition: Logger.h:316

+ Here is the call graph for this function:

llvm::Value* OverlapsJoinHashTable::codegenSlot ( const CompilationOptions ,
const size_t   
)
inlineoverrideprotectedvirtual

Implements HashJoin.

Definition at line 208 of file OverlapsJoinHashTable.h.

References UNREACHABLE.

208  {
209  UNREACHABLE(); // not applicable for overlaps join
210  return nullptr;
211  }
#define UNREACHABLE()
Definition: Logger.h:266
std::pair< size_t, size_t > OverlapsJoinHashTable::computeHashTableCounts ( const size_t  shard_count,
const std::vector< double > &  inverse_bucket_sizes_for_dimension,
std::vector< ColumnsForDevice > &  columns_per_device,
const size_t  chosen_max_hashtable_size,
const double  chosen_bucket_threshold 
)
protectedvirtual

Definition at line 963 of file OverlapsJoinHashTable.cpp.

References approximateTupleCount(), CHECK, device_count_, get_entries_per_device(), and memory_level_.

Referenced by reifyWithLayout().

968  {
969  CHECK(!inverse_bucket_sizes_for_dimension.empty());
970  const auto [tuple_count, emitted_keys_count] =
971  approximateTupleCount(inverse_bucket_sizes_for_dimension,
972  columns_per_device,
973  chosen_max_hashtable_size,
974  chosen_bucket_threshold);
975  const auto entry_count = 2 * std::max(tuple_count, size_t(1));
976 
977  return std::make_pair(
978  get_entries_per_device(entry_count, shard_count, device_count_, memory_level_),
979  emitted_keys_count);
980 }
virtual std::pair< size_t, size_t > approximateTupleCount(const std::vector< double > &inverse_bucket_sizes_for_dimension, std::vector< ColumnsForDevice > &, const size_t chosen_max_hashtable_size, const double chosen_bucket_threshold)
size_t get_entries_per_device(const size_t total_entries, const size_t shard_count, const size_t device_count, const Data_Namespace::MemoryLevel memory_level)
#define CHECK(condition)
Definition: Logger.h:222
const Data_Namespace::MemoryLevel memory_level_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t OverlapsJoinHashTable::countBufferOff ( ) const
inlineoverrideprotectedvirtualnoexcept

Implements HashJoin.

Definition at line 270 of file OverlapsJoinHashTable.h.

References getComponentBufferSize(), getHashType(), getKeyBufferSize(), HashJoin::layoutRequiresAdditionalBuffers(), and offsetBufferOff().

Referenced by payloadBufferOff(), toSet(), and toString().

270  {
273  } else {
274  return getKeyBufferSize();
275  }
276  }
HashType getHashType() const noexceptoverride
size_t offsetBufferOff() const noexceptoverride
size_t getComponentBufferSize() const noexceptoverride
size_t getKeyBufferSize() const noexcept
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
Definition: HashJoin.h:150

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ColumnsForDevice OverlapsJoinHashTable::fetchColumnsForDevice ( const std::vector< Fragmenter_Namespace::FragmentInfo > &  fragments,
const int  device_id,
DeviceAllocator dev_buff_owner 
)
protected

Definition at line 923 of file OverlapsJoinHashTable.cpp.

References CHECK, column_cache_, executor_, HashJoin::fetchJoinColumn(), get_column_descriptor_maybe(), get_join_column_type_kind(), getEffectiveMemoryLevel(), and inner_outer_pairs_.

Referenced by RangeJoinHashTable::reifyWithLayout(), and reifyWithLayout().

926  {
927  const auto& catalog = *executor_->getCatalog();
928  const auto effective_memory_level = getEffectiveMemoryLevel(inner_outer_pairs_);
929 
930  std::vector<JoinColumn> join_columns;
931  std::vector<std::shared_ptr<Chunk_NS::Chunk>> chunks_owner;
932  std::vector<JoinColumnTypeInfo> join_column_types;
933  std::vector<std::shared_ptr<void>> malloc_owner;
934  for (const auto& inner_outer_pair : inner_outer_pairs_) {
935  const auto inner_col = inner_outer_pair.first;
936  const auto inner_cd = get_column_descriptor_maybe(
937  inner_col->get_column_id(), inner_col->get_table_id(), catalog);
938  if (inner_cd && inner_cd->isVirtualCol) {
940  }
941  join_columns.emplace_back(fetchJoinColumn(inner_col,
942  fragments,
943  effective_memory_level,
944  device_id,
945  chunks_owner,
946  dev_buff_owner,
947  malloc_owner,
948  executor_,
949  &column_cache_));
950  const auto& ti = inner_col->get_type_info();
951  join_column_types.emplace_back(JoinColumnTypeInfo{static_cast<size_t>(ti.get_size()),
952  0,
953  0,
954  inline_int_null_value<int64_t>(),
955  false,
956  0,
958  CHECK(ti.is_array()) << "Overlaps join currently only supported for arrays.";
959  }
960  return {join_columns, join_column_types, chunks_owner, {}, malloc_owner};
961 }
JoinColumn fetchJoinColumn(const Analyzer::ColumnVar *hash_col, const std::vector< Fragmenter_Namespace::FragmentInfo > &fragment_info, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks_owner, DeviceAllocator *dev_buff_owner, std::vector< std::shared_ptr< void >> &malloc_owner, Executor *executor, ColumnCacheMap *column_cache)
Definition: HashJoin.cpp:58
const ColumnDescriptor * get_column_descriptor_maybe(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
Definition: Execute.h:220
ColumnCacheMap & column_cache_
ColumnType get_join_column_type_kind(const SQLTypeInfo &ti)
#define CHECK(condition)
Definition: Logger.h:222
std::vector< InnerOuter > inner_outer_pairs_
Data_Namespace::MemoryLevel getEffectiveMemoryLevel(const std::vector< InnerOuter > &inner_outer_pairs) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void OverlapsJoinHashTable::generateCacheKey ( const size_t  max_hashtable_size,
const double  bucket_threshold,
const std::vector< double > &  bucket_sizes,
std::vector< std::vector< Fragmenter_Namespace::FragmentInfo >> &  fragments_per_device,
int  device_count 
)
inlineprotected

Definition at line 342 of file OverlapsJoinHashTable.h.

References HashJoin::collectFragmentIds(), hash_table_cache_, hashtable_cache_key_, and table_keys_.

Referenced by RangeJoinHashTable::reifyWithLayout(), and reifyWithLayout().

347  {
348  for (int device_id = 0; device_id < device_count; ++device_id) {
349  auto hash_val = boost::hash_value(hashtable_cache_key_[device_id]);
350  boost::hash_combine(hash_val, max_hashtable_size);
351  boost::hash_combine(hash_val, bucket_threshold);
352  boost::hash_combine(hash_val, bucket_sizes);
353  boost::hash_combine(hash_val,
354  HashJoin::collectFragmentIds(fragments_per_device[device_id]));
355  hashtable_cache_key_[device_id] = hash_val;
356  hash_table_cache_->addQueryPlanDagForTableKeys(hashtable_cache_key_[device_id],
357  table_keys_);
358  }
359  }
static std::vector< int > collectFragmentIds(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments)
Definition: HashJoin.cpp:446
std::vector< QueryPlanHash > hashtable_cache_key_
static std::unique_ptr< HashtableRecycler > hash_table_cache_
std::unordered_set< size_t > table_keys_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

QueryPlanHash OverlapsJoinHashTable::getAlternativeCacheKey ( AlternativeCacheKeyForOverlapsHashJoin info)
inlineprotected

Definition at line 323 of file OverlapsJoinHashTable.h.

References OverlapsJoinHashTable::AlternativeCacheKeyForOverlapsHashJoin::bucket_threshold, OverlapsJoinHashTable::AlternativeCacheKeyForOverlapsHashJoin::chunk_key_hash, OverlapsJoinHashTable::AlternativeCacheKeyForOverlapsHashJoin::inner_outer_pairs, OverlapsJoinHashTable::AlternativeCacheKeyForOverlapsHashJoin::inverse_bucket_sizes, OverlapsJoinHashTable::AlternativeCacheKeyForOverlapsHashJoin::max_hashtable_size, OverlapsJoinHashTable::AlternativeCacheKeyForOverlapsHashJoin::num_elements, and OverlapsJoinHashTable::AlternativeCacheKeyForOverlapsHashJoin::optype.

Referenced by RangeJoinHashTable::reifyWithLayout(), and reifyWithLayout().

323  {
324  auto hash = info.chunk_key_hash;
325  for (InnerOuter inner_outer : info.inner_outer_pairs) {
326  auto inner_col = inner_outer.first;
327  auto rhs_col_var = dynamic_cast<const Analyzer::ColumnVar*>(inner_outer.second);
328  auto outer_col = rhs_col_var ? rhs_col_var : inner_col;
329  boost::hash_combine(hash, inner_col->toString());
330  if (inner_col->get_type_info().is_string()) {
331  boost::hash_combine(hash, outer_col->toString());
332  }
333  }
334  boost::hash_combine(hash, info.num_elements);
335  boost::hash_combine(hash, info.optype);
336  boost::hash_combine(hash, info.max_hashtable_size);
337  boost::hash_combine(hash, info.bucket_threshold);
338  boost::hash_combine(hash, info.inverse_bucket_sizes);
339  return hash;
340  }
std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > InnerOuter
Definition: HashJoin.h:95

+ Here is the caller graph for this function:

std::optional< std::pair< size_t, size_t > > OverlapsJoinHashTable::getApproximateTupleCountFromCache ( QueryPlanHash  key,
CacheItemType  item_type,
DeviceIdentifier  device_identifier 
)
protected

Definition at line 1853 of file OverlapsJoinHashTable.cpp.

References CHECK, getOverlapsHashTableMetaInfo(), hash_table_cache_, and HashtableCacheMetaInfo::overlaps_meta_info.

Referenced by approximateTupleCount().

1856  {
1858  HashtableCacheMetaInfo metaInfo;
1860  auto cached_hashtable =
1861  hash_table_cache_->getItemFromCache(key, item_type, device_identifier, metaInfo);
1862  if (cached_hashtable) {
1863  return std::make_pair(cached_hashtable->getEntryCount() / 2,
1864  cached_hashtable->getEmittedKeysCount());
1865  }
1866  return std::nullopt;
1867 }
std::optional< OverlapsHashTableMetaInfo > overlaps_meta_info
#define CHECK(condition)
Definition: Logger.h:222
static std::unique_ptr< HashtableRecycler > hash_table_cache_
std::optional< OverlapsHashTableMetaInfo > getOverlapsHashTableMetaInfo()

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

QueryPlanHash OverlapsJoinHashTable::getCacheKey ( int  device_id) const
inlineprotected

Definition at line 361 of file OverlapsJoinHashTable.h.

References hashtable_cache_key_.

361  {
362  return hashtable_cache_key_[device_id];
363  }
std::vector< QueryPlanHash > hashtable_cache_key_
size_t OverlapsJoinHashTable::getComponentBufferSize ( ) const
inlineoverrideprotectedvirtualnoexcept

Implements HashJoin.

Definition at line 231 of file OverlapsJoinHashTable.h.

References CHECK, and HashJoin::hash_tables_for_device_.

Referenced by codegenMatchingSet(), RangeJoinHashTable::codegenMatchingSetWithOffset(), countBufferOff(), and payloadBufferOff().

231  {
232  CHECK(!hash_tables_for_device_.empty());
233  auto hash_table = hash_tables_for_device_.front();
234  CHECK(hash_table);
235  return hash_table->getEntryCount() * sizeof(int32_t);
236  }
std::vector< std::shared_ptr< HashTable > > hash_tables_for_device_
Definition: HashJoin.h:351
#define CHECK(condition)
Definition: Logger.h:222

+ Here is the caller graph for this function:

int OverlapsJoinHashTable::getDeviceCount ( ) const
inlineoverrideprotectedvirtualnoexcept

Implements HashJoin.

Definition at line 169 of file OverlapsJoinHashTable.h.

References device_count_.

169 { return device_count_; };
Data_Namespace::MemoryLevel OverlapsJoinHashTable::getEffectiveMemoryLevel ( const std::vector< InnerOuter > &  inner_outer_pairs) const
protected

Definition at line 1814 of file OverlapsJoinHashTable.cpp.

References Data_Namespace::CPU_LEVEL, executor_, Data_Namespace::GPU_LEVEL, RegisteredQueryHint::isHintRegistered(), kOverlapsAllowGpuBuild, memory_level_, RegisteredQueryHint::overlaps_allow_gpu_build, and query_hint_.

Referenced by approximateTupleCount(), fetchColumnsForDevice(), reifyForDevice(), RangeJoinHashTable::reifyWithLayout(), and reifyWithLayout().

1815  {
1818  this->executor_->getDataMgr()->gpusPresent() &&
1821  }
1822  // otherwise, try to build on CPU
1824 }
bool overlaps_allow_gpu_build
Definition: QueryHint.h:241
RegisteredQueryHint query_hint_
bool isHintRegistered(const QueryHint hint) const
Definition: QueryHint.h:266
const Data_Namespace::MemoryLevel memory_level_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t OverlapsJoinHashTable::getEmittedKeysCount ( ) const
inlineprotected

Definition at line 225 of file OverlapsJoinHashTable.h.

References CHECK, and HashJoin::getHashTableForDevice().

225  {
226  auto hash_table = getHashTableForDevice(0);
227  CHECK(hash_table);
228  return hash_table->getEmittedKeysCount();
229  }
HashTable * getHashTableForDevice(const size_t device_id) const
Definition: HashJoin.h:269
#define CHECK(condition)
Definition: Logger.h:222

+ Here is the call graph for this function:

size_t OverlapsJoinHashTable::getEntryCount ( ) const
inlineprotected

Definition at line 219 of file OverlapsJoinHashTable.h.

References CHECK, and HashJoin::getHashTableForDevice().

Referenced by codegenMatchingSet(), RangeJoinHashTable::codegenMatchingSetWithOffset(), and getKeyBufferSize().

219  {
220  auto hash_table = getHashTableForDevice(0);
221  CHECK(hash_table);
222  return hash_table->getEntryCount();
223  }
HashTable * getHashTableForDevice(const size_t device_id) const
Definition: HashJoin.h:269
#define CHECK(condition)
Definition: Logger.h:222

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::string OverlapsJoinHashTable::getHashJoinType ( ) const
inlinefinalprotectedvirtual

Implements HashJoin.

Definition at line 286 of file OverlapsJoinHashTable.h.

286 { return "Overlaps"; }
static HashtableRecycler* OverlapsJoinHashTable::getHashTableCache ( )
inlinestatic

Definition at line 91 of file OverlapsJoinHashTable.h.

References CHECK, and hash_table_cache_.

Referenced by QueryRunner::QueryRunner::getCachedHashtableWithoutCacheKey(), QueryRunner::QueryRunner::getCacheItemMetric(), and QueryRunner::QueryRunner::getNumberOfCachedItem().

91  {
93  return hash_table_cache_.get();
94  }
#define CHECK(condition)
Definition: Logger.h:222
static std::unique_ptr< HashtableRecycler > hash_table_cache_

+ Here is the caller graph for this function:

HashType OverlapsJoinHashTable::getHashType ( ) const
inlineoverrideprotectedvirtualnoexcept

Implements HashJoin.

Reimplemented in RangeJoinHashTable.

Definition at line 156 of file OverlapsJoinHashTable.h.

References CHECK, HashJoin::getHashTableForDevice(), and layout_override_.

Referenced by codegenManyKey(), codegenMatchingSet(), countBufferOff(), getKeyBufferSize(), payloadBufferOff(), toSet(), and toString().

156  {
157  if (layout_override_) {
158  return *layout_override_;
159  }
160  auto hash_table = getHashTableForDevice(0);
161  CHECK(hash_table);
162  return hash_table->getLayout();
163  }
HashTable * getHashTableForDevice(const size_t device_id) const
Definition: HashJoin.h:269
std::optional< HashType > layout_override_
#define CHECK(condition)
Definition: Logger.h:222

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const std::vector<InnerOuter>& OverlapsJoinHashTable::getInnerOuterPairs ( ) const
inlineprotected

Definition at line 365 of file OverlapsJoinHashTable.h.

References inner_outer_pairs_.

Referenced by RangeJoinHashTable::isProbeCompressed().

365 { return inner_outer_pairs_; }
std::vector< InnerOuter > inner_outer_pairs_

+ Here is the caller graph for this function:

int OverlapsJoinHashTable::getInnerTableId ( ) const
overrideprotectedvirtualnoexcept

Implements HashJoin.

Definition at line 1826 of file OverlapsJoinHashTable.cpp.

References CHECK, HashJoin::getInnerTableId(), and inner_outer_pairs_.

Referenced by RangeJoinHashTable::reifyWithLayout().

1826  {
1827  try {
1829  } catch (...) {
1830  CHECK(false);
1831  }
1832  return 0;
1833 }
virtual int getInnerTableId() const noexcept=0
#define CHECK(condition)
Definition: Logger.h:222
std::vector< InnerOuter > inner_outer_pairs_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int OverlapsJoinHashTable::getInnerTableRteIdx ( ) const
inlineoverrideprotectedvirtualnoexcept

Implements HashJoin.

Definition at line 251 of file OverlapsJoinHashTable.h.

References CHECK, and inner_outer_pairs_.

251  {
252  CHECK(!inner_outer_pairs_.empty());
253  const auto first_inner_col = inner_outer_pairs_.front().first;
254  return first_inner_col->get_rte_idx();
255  }
#define CHECK(condition)
Definition: Logger.h:222
std::vector< InnerOuter > inner_outer_pairs_
std::shared_ptr< OverlapsJoinHashTable > OverlapsJoinHashTable::getInstance ( const std::shared_ptr< Analyzer::BinOper condition,
const std::vector< InputTableInfo > &  query_infos,
const Data_Namespace::MemoryLevel  memory_level,
const JoinType  join_type,
const int  device_count,
ColumnCacheMap column_cache,
Executor executor,
const HashTableBuildDagMap hashtable_build_dag_map,
const RegisteredQueryHint query_hint,
const TableIdToNodeMap table_id_to_node_map 
)
static

Make hash table from an in-flight SQL query's parse tree etc.

Definition at line 37 of file OverlapsJoinHashTable.cpp.

References CHECK, CHECK_EQ, get_inner_query_info(), HashJoin::getHashTypeString(), HashJoin::getInnerTableId(), RangeJoinHashTable::getInstance(), Fragmenter_Namespace::TableInfo::getNumTuplesUpperBound(), InputTableInfo::info, RegisteredQueryHint::isAnyQueryHintDelivered(), ManyToMany, HashJoin::normalizeColumnPairs(), OneToMany, VLOG, and VLOGGING.

Referenced by HashJoin::getInstance().

47  {
48  decltype(std::chrono::steady_clock::now()) ts1, ts2;
49 
50  std::vector<InnerOuter> inner_outer_pairs;
51 
52  if (const auto range_expr =
53  dynamic_cast<const Analyzer::RangeOper*>(condition->get_right_operand())) {
54  return RangeJoinHashTable::getInstance(condition,
55  range_expr,
56  query_infos,
57  memory_level,
58  join_type,
59  device_count,
60  column_cache,
61  executor,
62  hashtable_build_dag_map,
63  query_hint,
64  table_id_to_node_map);
65  } else {
66  inner_outer_pairs =
68  condition.get(), *executor->getCatalog(), executor->getTemporaryTables())
69  .first;
70  }
71  CHECK(!inner_outer_pairs.empty());
72 
73  const auto getHashTableType =
74  [](const std::shared_ptr<Analyzer::BinOper> condition,
75  const std::vector<InnerOuter>& inner_outer_pairs) -> HashType {
77  if (condition->is_overlaps_oper()) {
78  CHECK_EQ(inner_outer_pairs.size(), size_t(1));
79  if (inner_outer_pairs[0].first->get_type_info().is_array() &&
80  inner_outer_pairs[0].second->get_type_info().is_array() &&
81  // Bounds vs constructed points, former should yield ManyToMany
82  inner_outer_pairs[0].second->get_type_info().get_size() == 32) {
83  layout = HashType::ManyToMany;
84  }
85  }
86  return layout;
87  };
88 
89  const auto layout = getHashTableType(condition, inner_outer_pairs);
90 
91  if (VLOGGING(1)) {
92  VLOG(1) << "Building geo hash table " << getHashTypeString(layout)
93  << " for qual: " << condition->toString();
94  ts1 = std::chrono::steady_clock::now();
95  }
96 
97  const auto qi_0 = query_infos[0].info.getNumTuplesUpperBound();
98  const auto qi_1 = query_infos[1].info.getNumTuplesUpperBound();
99 
100  VLOG(1) << "table_id = " << query_infos[0].table_id << " has " << qi_0 << " tuples.";
101  VLOG(1) << "table_id = " << query_infos[1].table_id << " has " << qi_1 << " tuples.";
102 
103  const auto& query_info =
104  get_inner_query_info(HashJoin::getInnerTableId(inner_outer_pairs), query_infos)
105  .info;
106  const auto total_entries = 2 * query_info.getNumTuplesUpperBound();
107  if (total_entries > static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
108  throw TooManyHashEntries();
109  }
110 
111  auto join_hash_table = std::make_shared<OverlapsJoinHashTable>(condition,
112  join_type,
113  query_infos,
114  memory_level,
115  column_cache,
116  executor,
117  inner_outer_pairs,
118  device_count,
119  hashtable_build_dag_map,
120  table_id_to_node_map);
121  if (query_hint.isAnyQueryHintDelivered()) {
122  join_hash_table->registerQueryHint(query_hint);
123  }
124  try {
125  join_hash_table->reify(layout);
126  } catch (const HashJoinFail& e) {
127  throw HashJoinFail(std::string("Could not build a 1-to-1 correspondence for columns "
128  "involved in overlaps join | ") +
129  e.what());
130  } catch (const ColumnarConversionNotSupported& e) {
131  throw HashJoinFail(std::string("Could not build hash tables for overlaps join | "
132  "Inner table too big. Attempt manual table reordering "
133  "or create a single fragment inner table. | ") +
134  e.what());
135  } catch (const std::exception& e) {
136  throw HashJoinFail(std::string("Failed to build hash tables for overlaps join | ") +
137  e.what());
138  }
139  if (VLOGGING(1)) {
140  ts2 = std::chrono::steady_clock::now();
141  VLOG(1) << "Built geo hash table " << getHashTypeString(layout) << " in "
142  << std::chrono::duration_cast<std::chrono::milliseconds>(ts2 - ts1).count()
143  << " ms";
144  }
145  return join_hash_table;
146 }
#define CHECK_EQ(x, y)
Definition: Logger.h:230
Fragmenter_Namespace::TableInfo info
Definition: InputMetadata.h:35
std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > InnerOuter
Definition: HashJoin.h:95
static std::pair< std::vector< InnerOuter >, std::vector< InnerOuterStringOpInfos > > normalizeColumnPairs(const Analyzer::BinOper *condition, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables)
Definition: HashJoin.cpp:989
static std::shared_ptr< RangeJoinHashTable > getInstance(const std::shared_ptr< Analyzer::BinOper > condition, const Analyzer::RangeOper *range_expr, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)
virtual int getInnerTableId() const noexcept=0
#define VLOGGING(n)
Definition: Logger.h:220
const InputTableInfo & get_inner_query_info(const int inner_table_id, const std::vector< InputTableInfo > &query_infos)
static std::string getHashTypeString(HashType ht) noexcept
Definition: HashJoin.h:154
#define CHECK(condition)
Definition: Logger.h:222
HashType
Definition: HashTable.h:19
bool isAnyQueryHintDelivered() const
Definition: QueryHint.h:256
#define VLOG(n)
Definition: Logger.h:316

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t OverlapsJoinHashTable::getKeyBufferSize ( ) const
inlineprotectednoexcept

Definition at line 257 of file OverlapsJoinHashTable.h.

References CHECK, getEntryCount(), getHashType(), getKeyComponentCount(), getKeyComponentWidth(), and HashJoin::layoutRequiresAdditionalBuffers().

Referenced by countBufferOff(), offsetBufferOff(), and payloadBufferOff().

257  {
258  const auto key_component_width = getKeyComponentWidth();
259  CHECK(key_component_width == 4 || key_component_width == 8);
260  const auto key_component_count = getKeyComponentCount();
262  return getEntryCount() * key_component_count * key_component_width;
263  } else {
264  return getEntryCount() * (key_component_count + 1) * key_component_width;
265  }
266  }
HashType getHashType() const noexceptoverride
#define CHECK(condition)
Definition: Logger.h:222
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
Definition: HashJoin.h:150

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t OverlapsJoinHashTable::getKeyComponentCount ( ) const
protected

Definition at line 1171 of file OverlapsJoinHashTable.cpp.

References CHECK, and inverse_bucket_sizes_for_dimension_.

Referenced by RangeJoinHashTable::codegenKey(), codegenKey(), codegenMatchingSet(), RangeJoinHashTable::codegenMatchingSetWithOffset(), getKeyBufferSize(), RangeJoinHashTable::initHashTableOnCpu(), initHashTableOnCpu(), toSet(), and toString().

1171  {
1174 }
std::vector< double > inverse_bucket_sizes_for_dimension_
#define CHECK(condition)
Definition: Logger.h:222

+ Here is the caller graph for this function:

size_t OverlapsJoinHashTable::getKeyComponentWidth ( ) const
protected

Definition at line 1167 of file OverlapsJoinHashTable.cpp.

Referenced by calculateHashTableSize(), RangeJoinHashTable::codegenKey(), codegenKey(), codegenManyKey(), codegenMatchingSet(), RangeJoinHashTable::codegenMatchingSetWithOffset(), getKeyBufferSize(), RangeJoinHashTable::initHashTableOnCpu(), initHashTableOnCpu(), RangeJoinHashTable::reifyForDevice(), reifyForDevice(), toSet(), and toString().

1167  {
1168  return 8;
1169 }

+ Here is the caller graph for this function:

Data_Namespace::MemoryLevel OverlapsJoinHashTable::getMemoryLevel ( ) const
inlineoverrideprotectedvirtualnoexcept

Implements HashJoin.

Definition at line 165 of file OverlapsJoinHashTable.h.

References memory_level_.

165  {
166  return memory_level_;
167  }
const Data_Namespace::MemoryLevel memory_level_
std::optional<OverlapsHashTableMetaInfo> OverlapsJoinHashTable::getOverlapsHashTableMetaInfo ( )
inlineprotected

Definition at line 309 of file OverlapsJoinHashTable.h.

References hashtable_cache_meta_info_, and HashtableCacheMetaInfo::overlaps_meta_info.

Referenced by getApproximateTupleCountFromCache(), initHashTableOnCpuFromCache(), and putHashTableOnCpuToCache().

309  {
311  }
std::optional< OverlapsHashTableMetaInfo > overlaps_meta_info
HashtableCacheMetaInfo hashtable_cache_meta_info_

+ Here is the caller graph for this function:

static OverlapsTuningParamRecycler* OverlapsJoinHashTable::getOverlapsTuningParamCache ( )
inlinestatic

Definition at line 96 of file OverlapsJoinHashTable.h.

References auto_tuner_cache_, and CHECK.

Referenced by QueryRunner::QueryRunner::getNumberOfCachedItem().

96  {
98  return auto_tuner_cache_.get();
99  }
static std::unique_ptr< OverlapsTuningParamRecycler > auto_tuner_cache_
#define CHECK(condition)
Definition: Logger.h:222

+ Here is the caller graph for this function:

const RegisteredQueryHint& OverlapsJoinHashTable::getRegisteredQueryHint ( )
inlineprotected

Definition at line 213 of file OverlapsJoinHashTable.h.

References query_hint_.

Referenced by reifyWithLayout().

213 { return query_hint_; }
RegisteredQueryHint query_hint_

+ Here is the caller graph for this function:

std::shared_ptr< BaselineHashTable > OverlapsJoinHashTable::initHashTableOnCpu ( const std::vector< JoinColumn > &  join_columns,
const std::vector< JoinColumnTypeInfo > &  join_column_types,
const std::vector< JoinBucketInfo > &  join_bucket_info,
const HashType  layout,
const size_t  entry_count,
const size_t  emitted_keys_count,
const bool  skip_hashtable_caching 
)
protected

Definition at line 1296 of file OverlapsJoinHashTable.cpp.

References CHECK, composite_key_info_, DataRecyclerUtil::CPU_DEVICE_IDENTIFIER, cpu_hash_table_buff_mutex_, DEBUG_TIMER, getKeyComponentCount(), getKeyComponentWidth(), hashtable_cache_key_, BaselineJoinHashTableBuilder::initHashTableOnCpu(), initHashTableOnCpuFromCache(), join_type_, layout_override_, HashJoin::layoutRequiresAdditionalBuffers(), ManyToMany, OneToMany, OVERLAPS_HT, putHashTableOnCpuToCache(), to_string(), and VLOG.

Referenced by reifyForDevice().

1303  {
1304  auto timer = DEBUG_TIMER(__func__);
1305  decltype(std::chrono::steady_clock::now()) ts1, ts2;
1306  ts1 = std::chrono::steady_clock::now();
1307  CHECK(!join_columns.empty());
1308  CHECK(!join_bucket_info.empty());
1309  std::lock_guard<std::mutex> cpu_hash_table_buff_lock(cpu_hash_table_buff_mutex_);
1310  if (auto generic_hash_table =
1313  DataRecyclerUtil::CPU_DEVICE_IDENTIFIER)) {
1314  if (auto hash_table =
1315  std::dynamic_pointer_cast<BaselineHashTable>(generic_hash_table)) {
1316  VLOG(1) << "Using cached CPU hash table for initialization.";
1317  // See if a hash table of a different layout was returned.
1318  // If it was OneToMany, we can reuse it on ManyToMany.
1319  if (layout == HashType::ManyToMany &&
1320  hash_table->getLayout() == HashType::OneToMany) {
1321  // use the cached hash table
1323  return hash_table;
1324  }
1325  if (layout == hash_table->getLayout()) {
1326  return hash_table;
1327  }
1328  }
1329  }
1331  const auto key_component_count =
1332  join_bucket_info[0].inverse_bucket_sizes_for_dimension.size();
1333 
1334  const auto key_handler =
1335  OverlapsKeyHandler(key_component_count,
1336  &join_columns[0],
1337  join_bucket_info[0].inverse_bucket_sizes_for_dimension.data());
1340  dummy_str_proxy_translation_maps_ptrs_and_offsets;
1341  const auto err =
1342  builder.initHashTableOnCpu(&key_handler,
1344  join_columns,
1345  join_column_types,
1346  join_bucket_info,
1347  dummy_str_proxy_translation_maps_ptrs_and_offsets,
1348  entry_count,
1349  emitted_keys_count,
1350  layout,
1351  join_type_,
1354  ts2 = std::chrono::steady_clock::now();
1355  if (err) {
1356  throw HashJoinFail(
1357  std::string("Unrecognized error when initializing CPU overlaps hash table (") +
1358  std::to_string(err) + std::string(")"));
1359  }
1360  std::shared_ptr<BaselineHashTable> hash_table = builder.getHashTable();
1361  if (skip_hashtable_caching) {
1362  VLOG(1) << "Skip to cache overlaps join hashtable";
1363  } else {
1364  auto hashtable_build_time =
1365  std::chrono::duration_cast<std::chrono::milliseconds>(ts2 - ts1).count();
1368  hash_table,
1370  hashtable_build_time);
1371  }
1372  return hash_table;
1373 }
std::lock_guard< T > lock_guard
std::shared_ptr< HashTable > initHashTableOnCpuFromCache(QueryPlanHash key, CacheItemType item_type, DeviceIdentifier device_identifier)
int initHashTableOnCpu(KEY_HANDLER *key_handler, const CompositeKeyInfo &composite_key_info, const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_bucket_info, const StrProxyTranslationMapsPtrsAndOffsets &str_proxy_translation_maps_ptrs_and_offsets, const size_t keyspace_entry_count, const size_t keys_for_all_rows, const HashType layout, const JoinType join_type, const size_t key_component_width, const size_t key_component_count)
void putHashTableOnCpuToCache(QueryPlanHash key, CacheItemType item_type, std::shared_ptr< HashTable > hashtable_ptr, DeviceIdentifier device_identifier, size_t hashtable_building_time)
std::string to_string(char const *&&v)
CacheItemType
Definition: DataRecycler.h:38
std::vector< QueryPlanHash > hashtable_cache_key_
std::optional< HashType > layout_override_
std::pair< std::vector< const int32_t * >, std::vector< int32_t >> StrProxyTranslationMapsPtrsAndOffsets
#define CHECK(condition)
Definition: Logger.h:222
#define DEBUG_TIMER(name)
Definition: Logger.h:371
static constexpr DeviceIdentifier CPU_DEVICE_IDENTIFIER
Definition: DataRecycler.h:136
CompositeKeyInfo composite_key_info_
#define VLOG(n)
Definition: Logger.h:316
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
Definition: HashJoin.h:150

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::shared_ptr< HashTable > OverlapsJoinHashTable::initHashTableOnCpuFromCache ( QueryPlanHash  key,
CacheItemType  item_type,
DeviceIdentifier  device_identifier 
)
protected

Definition at line 1835 of file OverlapsJoinHashTable.cpp.

References CHECK, DEBUG_TIMER, getOverlapsHashTableMetaInfo(), hash_table_cache_, HashtableCacheMetaInfo::overlaps_meta_info, and VLOG.

Referenced by initHashTableOnCpu(), and RangeJoinHashTable::reifyWithLayout().

1838  {
1839  auto timer = DEBUG_TIMER(__func__);
1840  VLOG(1) << "Checking CPU hash table cache.";
1842  HashtableCacheMetaInfo meta_info;
1844  auto cached_hashtable =
1845  hash_table_cache_->getItemFromCache(key, item_type, device_identifier, meta_info);
1846  if (cached_hashtable) {
1847  return cached_hashtable;
1848  }
1849  return nullptr;
1850 }
std::optional< OverlapsHashTableMetaInfo > overlaps_meta_info
#define CHECK(condition)
Definition: Logger.h:222
#define DEBUG_TIMER(name)
Definition: Logger.h:371
static std::unique_ptr< HashtableRecycler > hash_table_cache_
std::optional< OverlapsHashTableMetaInfo > getOverlapsHashTableMetaInfo()
#define VLOG(n)
Definition: Logger.h:316

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

static void OverlapsJoinHashTable::invalidateCache ( )
inlinestatic

Definition at line 66 of file OverlapsJoinHashTable.h.

References auto_tuner_cache_, CHECK, and hash_table_cache_.

66  {
68  auto_tuner_cache_->clearCache();
69 
71  hash_table_cache_->clearCache();
72  }
static std::unique_ptr< OverlapsTuningParamRecycler > auto_tuner_cache_
#define CHECK(condition)
Definition: Logger.h:222
static std::unique_ptr< HashtableRecycler > hash_table_cache_
bool OverlapsJoinHashTable::isBitwiseEq ( ) const
overrideprotectedvirtual

Implements HashJoin.

Definition at line 1890 of file OverlapsJoinHashTable.cpp.

References condition_, and kBW_EQ.

1890  {
1891  return condition_->get_optype() == kBW_EQ;
1892 }
const std::shared_ptr< Analyzer::BinOper > condition_
Definition: sqldefs.h:30
static void OverlapsJoinHashTable::markCachedItemAsDirty ( size_t  table_key)
inlinestatic

Definition at line 74 of file OverlapsJoinHashTable.h.

References auto_tuner_cache_, CHECK, DataRecyclerUtil::CPU_DEVICE_IDENTIFIER, hash_table_cache_, OVERLAPS_AUTO_TUNER_PARAM, and OVERLAPS_HT.

74  {
77  auto candidate_table_keys =
78  hash_table_cache_->getMappedQueryPlanDagsWithTableKey(table_key);
79  if (candidate_table_keys.has_value()) {
80  auto_tuner_cache_->markCachedItemAsDirty(table_key,
81  *candidate_table_keys,
84  hash_table_cache_->markCachedItemAsDirty(table_key,
85  *candidate_table_keys,
88  }
89  }
static std::unique_ptr< OverlapsTuningParamRecycler > auto_tuner_cache_
#define CHECK(condition)
Definition: Logger.h:222
static std::unique_ptr< HashtableRecycler > hash_table_cache_
static constexpr DeviceIdentifier CPU_DEVICE_IDENTIFIER
Definition: DataRecycler.h:136
size_t OverlapsJoinHashTable::offsetBufferOff ( ) const
inlineoverrideprotectedvirtualnoexcept

Implements HashJoin.

Definition at line 268 of file OverlapsJoinHashTable.h.

References getKeyBufferSize().

Referenced by codegenMatchingSet(), RangeJoinHashTable::codegenMatchingSetWithOffset(), countBufferOff(), toSet(), and toString().

268 { return getKeyBufferSize(); }
size_t getKeyBufferSize() const noexcept

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t OverlapsJoinHashTable::payloadBufferOff ( ) const
inlineoverrideprotectedvirtualnoexcept

Implements HashJoin.

Definition at line 278 of file OverlapsJoinHashTable.h.

References countBufferOff(), getComponentBufferSize(), getHashType(), getKeyBufferSize(), and HashJoin::layoutRequiresAdditionalBuffers().

Referenced by toSet(), and toString().

278  {
281  } else {
282  return getKeyBufferSize();
283  }
284  }
HashType getHashType() const noexceptoverride
size_t countBufferOff() const noexceptoverride
size_t getComponentBufferSize() const noexceptoverride
size_t getKeyBufferSize() const noexcept
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
Definition: HashJoin.h:150

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void OverlapsJoinHashTable::putHashTableOnCpuToCache ( QueryPlanHash  key,
CacheItemType  item_type,
std::shared_ptr< HashTable hashtable_ptr,
DeviceIdentifier  device_identifier,
size_t  hashtable_building_time 
)
protected

Definition at line 1869 of file OverlapsJoinHashTable.cpp.

References CHECK, CPU, getOverlapsHashTableMetaInfo(), hash_table_cache_, HashtableCacheMetaInfo::overlaps_meta_info, and query_hint_.

Referenced by RangeJoinHashTable::initHashTableOnCpu(), and initHashTableOnCpu().

1874  {
1876  CHECK(hashtable_ptr && !hashtable_ptr->getGpuBuffer());
1877  HashtableCacheMetaInfo meta_info;
1879  meta_info.registered_query_hint = query_hint_;
1880  hash_table_cache_->putItemToCache(
1881  key,
1882  hashtable_ptr,
1883  item_type,
1884  device_identifier,
1885  hashtable_ptr->getHashTableBufferSize(ExecutorDeviceType::CPU),
1886  hashtable_building_time,
1887  meta_info);
1888 }
std::optional< OverlapsHashTableMetaInfo > overlaps_meta_info
RegisteredQueryHint query_hint_
#define CHECK(condition)
Definition: Logger.h:222
static std::unique_ptr< HashtableRecycler > hash_table_cache_
std::optional< OverlapsHashTableMetaInfo > getOverlapsHashTableMetaInfo()

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void OverlapsJoinHashTable::registerQueryHint ( const RegisteredQueryHint query_hint)
inlineprotected

Definition at line 215 of file OverlapsJoinHashTable.h.

References query_hint_.

215  {
216  query_hint_ = query_hint;
217  }
RegisteredQueryHint query_hint_
void OverlapsJoinHashTable::reify ( const HashType  preferred_layout)
protected

Definition at line 1176 of file OverlapsJoinHashTable.cpp.

References CHECK, CHECK_EQ, CHECK_LT, composite_key_info_, condition_, DEBUG_TIMER, device_count_, executor_, HashJoin::getCompositeKeyInfo(), inner_outer_pairs_, ManyToMany, OneToMany, reifyWithLayout(), and VLOG.

1176  {
1177  auto timer = DEBUG_TIMER(__func__);
1178  CHECK_LT(0, device_count_);
1180 
1181  CHECK(condition_->is_overlaps_oper());
1182  CHECK_EQ(inner_outer_pairs_.size(), size_t(1));
1183  HashType layout;
1184  if (inner_outer_pairs_[0].second->get_type_info().is_fixlen_array() &&
1185  inner_outer_pairs_[0].second->get_type_info().get_size() == 32) {
1186  // bounds array
1187  layout = HashType::ManyToMany;
1188  } else {
1189  layout = HashType::OneToMany;
1190  }
1191  try {
1192  reifyWithLayout(layout);
1193  return;
1194  } catch (const std::exception& e) {
1195  VLOG(1) << "Caught exception while building overlaps baseline hash table: "
1196  << e.what();
1197  throw;
1198  }
1199 }
#define CHECK_EQ(x, y)
Definition: Logger.h:230
virtual void reifyWithLayout(const HashType layout)
const std::shared_ptr< Analyzer::BinOper > condition_
#define CHECK_LT(x, y)
Definition: Logger.h:232
#define CHECK(condition)
Definition: Logger.h:222
#define DEBUG_TIMER(name)
Definition: Logger.h:371
std::vector< InnerOuter > inner_outer_pairs_
CompositeKeyInfo composite_key_info_
HashType
Definition: HashTable.h:19
#define VLOG(n)
Definition: Logger.h:316
static CompositeKeyInfo getCompositeKeyInfo(const std::vector< InnerOuter > &inner_outer_pairs, const Executor *executor, const std::vector< InnerOuterStringOpInfos > &inner_outer_string_op_infos_pairs={})
Definition: HashJoin.cpp:455

+ Here is the call graph for this function:

void OverlapsJoinHashTable::reifyForDevice ( const ColumnsForDevice columns_for_device,
const HashType  layout,
const size_t  entry_count,
const size_t  emitted_keys_count,
const bool  skip_hashtable_caching,
const int  device_id,
const logger::ThreadId  parent_thread_id 
)
protected

Definition at line 1241 of file OverlapsJoinHashTable.cpp.

References CHECK, CHECK_EQ, CHECK_LT, Data_Namespace::CPU_LEVEL, DEBUG_TIMER_NEW_THREAD, getEffectiveMemoryLevel(), getKeyComponentWidth(), Data_Namespace::GPU_LEVEL, HashJoin::hash_tables_for_device_, initHashTableOnCpu(), inner_outer_pairs_, ColumnsForDevice::join_buckets, ColumnsForDevice::join_column_types, ColumnsForDevice::join_columns, HashJoin::layoutRequiresAdditionalBuffers(), memory_level_, UNREACHABLE, and VLOG.

Referenced by reifyImpl().

1247  {
1248  DEBUG_TIMER_NEW_THREAD(parent_thread_id);
1249  CHECK_EQ(getKeyComponentWidth(), size_t(8));
1251  const auto effective_memory_level = getEffectiveMemoryLevel(inner_outer_pairs_);
1252 
1253  if (effective_memory_level == Data_Namespace::MemoryLevel::CPU_LEVEL) {
1254  VLOG(1) << "Building overlaps join hash table on CPU.";
1255  auto hash_table = initHashTableOnCpu(columns_for_device.join_columns,
1256  columns_for_device.join_column_types,
1257  columns_for_device.join_buckets,
1258  layout,
1259  entry_count,
1260  emitted_keys_count,
1261  skip_hashtable_caching);
1262  CHECK(hash_table);
1263 
1264 #ifdef HAVE_CUDA
1266  auto gpu_hash_table = copyCpuHashTableToGpu(
1267  hash_table, layout, entry_count, emitted_keys_count, device_id);
1268  CHECK_LT(static_cast<size_t>(device_id), hash_tables_for_device_.size());
1269  hash_tables_for_device_[device_id] = std::move(gpu_hash_table);
1270  } else {
1271 #else
1272  CHECK_EQ(Data_Namespace::CPU_LEVEL, effective_memory_level);
1273 #endif
1274  CHECK_EQ(hash_tables_for_device_.size(), size_t(1));
1275  hash_tables_for_device_[0] = hash_table;
1276 #ifdef HAVE_CUDA
1277  }
1278 #endif
1279  } else {
1280 #ifdef HAVE_CUDA
1281  auto hash_table = initHashTableOnGpu(columns_for_device.join_columns,
1282  columns_for_device.join_column_types,
1283  columns_for_device.join_buckets,
1284  layout,
1285  entry_count,
1286  emitted_keys_count,
1287  device_id);
1288  CHECK_LT(static_cast<size_t>(device_id), hash_tables_for_device_.size());
1289  hash_tables_for_device_[device_id] = std::move(hash_table);
1290 #else
1291  UNREACHABLE();
1292 #endif
1293  }
1294 }
#define CHECK_EQ(x, y)
Definition: Logger.h:230
std::vector< std::shared_ptr< HashTable > > hash_tables_for_device_
Definition: HashJoin.h:351
#define UNREACHABLE()
Definition: Logger.h:266
#define DEBUG_TIMER_NEW_THREAD(parent_thread_id)
Definition: Logger.h:376
std::shared_ptr< BaselineHashTable > initHashTableOnCpu(const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_bucket_info, const HashType layout, const size_t entry_count, const size_t emitted_keys_count, const bool skip_hashtable_caching)
const std::vector< JoinColumnTypeInfo > join_column_types
Definition: HashJoin.h:101
#define CHECK_LT(x, y)
Definition: Logger.h:232
#define CHECK(condition)
Definition: Logger.h:222
const Data_Namespace::MemoryLevel memory_level_
std::vector< InnerOuter > inner_outer_pairs_
Data_Namespace::MemoryLevel getEffectiveMemoryLevel(const std::vector< InnerOuter > &inner_outer_pairs) const
std::vector< JoinBucketInfo > join_buckets
Definition: HashJoin.h:103
const std::vector< JoinColumn > join_columns
Definition: HashJoin.h:100
#define VLOG(n)
Definition: Logger.h:316
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
Definition: HashJoin.h:150

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void OverlapsJoinHashTable::reifyImpl ( std::vector< ColumnsForDevice > &  columns_per_device,
const Fragmenter_Namespace::TableInfo query_info,
const HashType  layout,
const size_t  shard_count,
const size_t  entry_count,
const size_t  emitted_keys_count,
const bool  skip_hashtable_caching,
const size_t  chosen_max_hashtable_size,
const double  chosen_bucket_threshold 
)
protectedvirtual

Definition at line 1201 of file OverlapsJoinHashTable.cpp.

References threading_serial::async(), chosen_overlaps_bucket_threshold_, chosen_overlaps_max_table_size_bytes_, device_count_, Fragmenter_Namespace::TableInfo::fragments, inverse_bucket_sizes_for_dimension_, only_shards_for_device(), reifyForDevice(), setOverlapsHashtableMetaInfo(), and logger::thread_id().

Referenced by reifyWithLayout().

1209  {
1210  std::vector<std::future<void>> init_threads;
1211  chosen_overlaps_bucket_threshold_ = chosen_bucket_threshold;
1212  chosen_overlaps_max_table_size_bytes_ = chosen_max_hashtable_size;
1216 
1217  for (int device_id = 0; device_id < device_count_; ++device_id) {
1218  const auto fragments =
1219  shard_count
1220  ? only_shards_for_device(query_info.fragments, device_id, device_count_)
1221  : query_info.fragments;
1222  init_threads.push_back(std::async(std::launch::async,
1224  this,
1225  columns_per_device[device_id],
1226  layout,
1227  entry_count,
1228  emitted_keys_count,
1229  skip_hashtable_caching,
1230  device_id,
1231  logger::thread_id()));
1232  }
1233  for (auto& init_thread : init_threads) {
1234  init_thread.wait();
1235  }
1236  for (auto& init_thread : init_threads) {
1237  init_thread.get();
1238  }
1239 }
std::vector< FragmentInfo > fragments
Definition: Fragmenter.h:171
future< Result > async(Fn &&fn, Args &&...args)
std::vector< Fragmenter_Namespace::FragmentInfo > only_shards_for_device(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, const int device_count)
void setOverlapsHashtableMetaInfo(size_t max_table_size_bytes, double bucket_threshold, std::vector< double > &bucket_sizes)
std::vector< double > inverse_bucket_sizes_for_dimension_
void reifyForDevice(const ColumnsForDevice &columns_for_device, const HashType layout, const size_t entry_count, const size_t emitted_keys_count, const bool skip_hashtable_caching, const int device_id, const logger::ThreadId parent_thread_id)
ThreadId thread_id()
Definition: Logger.cpp:820

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void OverlapsJoinHashTable::reifyWithLayout ( const HashType  layout)
protectedvirtual

Reimplemented in RangeJoinHashTable.

Definition at line 535 of file OverlapsJoinHashTable.cpp.

References gpu_enabled::accumulate(), auto_tuner_cache_, CompositeKeyInfo::cache_key_chunks, calculateHashTableSize(), CHECK, CHECK_EQ, CHECK_GE, HashJoin::collectFragmentIds(), composite_key_info_, computeHashTableCounts(), condition_, DataRecyclerUtil::CPU_DEVICE_IDENTIFIER, DEBUG_TIMER, device_count_, executor_, fetchColumnsForDevice(), g_overlaps_max_table_size_bytes, g_overlaps_target_entries_per_bin, generateCacheKey(), get_inner_query_info(), getAlternativeCacheKey(), DataRecyclerUtil::getAlternativeTableKeys(), getEffectiveMemoryLevel(), HashtableRecycler::getHashtableAccessPathInfo(), HashJoin::getHashTypeString(), HashJoin::getInnerTableId(), getQueryEngineCudaStreamForDevice(), getRegisteredQueryHint(), Data_Namespace::GPU_LEVEL, hash_table_cache_, hashtable_build_dag_map_, hashtable_cache_key_, hashtable_cache_meta_info_, InputTableInfo::info, inner_outer_pairs_, inverse_bucket_sizes_for_dimension_, RegisteredQueryHint::isHintRegistered(), HashtableRecycler::isInvalidHashTableCacheKey(), join_type_, kOverlapsAllowGpuBuild, kOverlapsBucketThreshold, kOverlapsKeysPerBin, kOverlapsMaxSize, kOverlapsNoCache, HashJoin::layoutRequiresAdditionalBuffers(), memory_level_, only_shards_for_device(), RegisteredQueryHint::overlaps_allow_gpu_build, OVERLAPS_AUTO_TUNER_PARAM, OVERLAPS_HT, query_hint_, query_infos_, reifyImpl(), setInverseBucketSizeInfo(), setOverlapsHashtableMetaInfo(), shardCount(), table_keys_, and VLOG.

Referenced by reify().

535  {
536  auto timer = DEBUG_TIMER(__func__);
538  const auto& query_info =
540  .info;
541  VLOG(1) << "Reify with layout " << getHashTypeString(layout)
542  << "for table_id: " << HashJoin::getInnerTableId(inner_outer_pairs_);
543  if (query_info.fragments.empty()) {
544  return;
545  }
546 
547  auto overlaps_max_table_size_bytes = g_overlaps_max_table_size_bytes;
548  std::optional<double> overlaps_threshold_override;
549  double overlaps_target_entries_per_bin = g_overlaps_target_entries_per_bin;
550  auto query_hint = getRegisteredQueryHint();
551  auto skip_hashtable_caching = false;
552  if (query_hint.isHintRegistered(QueryHint::kOverlapsBucketThreshold)) {
553  VLOG(1) << "Setting overlaps bucket threshold "
554  "\'overlaps_hashjoin_bucket_threshold\' via "
555  "query hint: "
556  << query_hint.overlaps_bucket_threshold;
557  overlaps_threshold_override = query_hint.overlaps_bucket_threshold;
558  }
559  if (query_hint.isHintRegistered(QueryHint::kOverlapsMaxSize)) {
560  std::ostringstream oss;
561  oss << "User requests to change a threshold \'overlaps_max_table_size_bytes\' via "
562  "query hint";
563  if (!overlaps_threshold_override.has_value()) {
564  oss << ": " << overlaps_max_table_size_bytes << " -> "
565  << query_hint.overlaps_max_size;
566  overlaps_max_table_size_bytes = query_hint.overlaps_max_size;
567  } else {
568  oss << ", but is skipped since the query hint also changes the threshold "
569  "\'overlaps_hashjoin_bucket_threshold\'";
570  }
571  VLOG(1) << oss.str();
572  }
573  if (query_hint.isHintRegistered(QueryHint::kOverlapsNoCache)) {
574  VLOG(1) << "User requests to skip caching overlaps join hashtable and its tuned "
575  "parameters for this query";
576  skip_hashtable_caching = true;
577  }
578  if (query_hint.isHintRegistered(QueryHint::kOverlapsKeysPerBin)) {
579  VLOG(1) << "User requests to change a threshold \'overlaps_keys_per_bin\' via query "
580  "hint: "
581  << overlaps_target_entries_per_bin << " -> "
582  << query_hint.overlaps_keys_per_bin;
583  overlaps_target_entries_per_bin = query_hint.overlaps_keys_per_bin;
584  }
585 
586  auto data_mgr = executor_->getDataMgr();
587  // we prioritize CPU when building an overlaps join hashtable, but if we have GPU and
588  // user-given hint is given we selectively allow GPU to build it but even if we have GPU
589  // but user foces to set CPU as execution device type we should not allow to use GPU for
590  // building it
591  auto allow_gpu_hashtable_build =
594  if (allow_gpu_hashtable_build) {
595  if (data_mgr->gpusPresent() &&
597  VLOG(1) << "A user forces to build GPU hash table for this overlaps join operator";
598  } else {
599  allow_gpu_hashtable_build = false;
600  VLOG(1) << "A user forces to build GPU hash table for this overlaps join operator "
601  "but we "
602  "skip it since either GPU is not presented or CPU execution mode is set";
603  }
604  }
605 
606  std::vector<ColumnsForDevice> columns_per_device;
607  std::vector<std::unique_ptr<CudaAllocator>> dev_buff_owners;
609  allow_gpu_hashtable_build) {
610  for (int device_id = 0; device_id < device_count_; ++device_id) {
611  dev_buff_owners.emplace_back(std::make_unique<CudaAllocator>(
612  data_mgr, device_id, getQueryEngineCudaStreamForDevice(device_id)));
613  }
614  }
615 
616  std::vector<std::vector<Fragmenter_Namespace::FragmentInfo>> fragments_per_device;
617  const auto shard_count = shardCount();
618  size_t total_num_tuples = 0;
619  for (int device_id = 0; device_id < device_count_; ++device_id) {
620  fragments_per_device.emplace_back(
621  shard_count
622  ? only_shards_for_device(query_info.fragments, device_id, device_count_)
623  : query_info.fragments);
624  const size_t crt_num_tuples =
625  std::accumulate(fragments_per_device.back().begin(),
626  fragments_per_device.back().end(),
627  size_t(0),
628  [](const auto& sum, const auto& fragment) {
629  return sum + fragment.getNumTuples();
630  });
631  total_num_tuples += crt_num_tuples;
632  const auto columns_for_device =
633  fetchColumnsForDevice(fragments_per_device.back(),
634  device_id,
636  allow_gpu_hashtable_build
637  ? dev_buff_owners[device_id].get()
638  : nullptr);
639  columns_per_device.push_back(columns_for_device);
640  }
641 
642  // try to extract cache key for hash table and its relevant info
643  auto hashtable_access_path_info =
645  {},
646  condition_->get_optype(),
647  join_type_,
650  shard_count,
651  fragments_per_device,
652  executor_);
653  hashtable_cache_key_ = hashtable_access_path_info.hashed_query_plan_dag;
654  hashtable_cache_meta_info_ = hashtable_access_path_info.meta_info;
655  table_keys_ = hashtable_access_path_info.table_keys;
656 
657  auto get_inner_table_id = [this]() {
658  return inner_outer_pairs_.front().first->get_table_id();
659  };
660 
661  if (table_keys_.empty()) {
664  executor_->getCatalog()->getDatabaseId(),
665  get_inner_table_id());
666  }
667  CHECK(!table_keys_.empty());
668 
669  if (overlaps_threshold_override) {
670  // compute bucket sizes based on the user provided threshold
671  BucketSizeTuner tuner(/*initial_threshold=*/*overlaps_threshold_override,
672  /*step=*/1.0,
673  /*min_threshold=*/0.0,
675  columns_per_device,
677  total_num_tuples,
678  executor_);
679  const auto inverse_bucket_sizes = tuner.getInverseBucketSizes();
680 
681  auto [entry_count, emitted_keys_count] =
682  computeHashTableCounts(shard_count,
683  inverse_bucket_sizes,
684  columns_per_device,
685  overlaps_max_table_size_bytes,
686  *overlaps_threshold_override);
687  setInverseBucketSizeInfo(inverse_bucket_sizes, columns_per_device, device_count_);
688  // reifyImpl will check the hash table cache for an appropriate hash table w/ those
689  // bucket sizes (or within tolerances) if a hash table exists use it, otherwise build
690  // one
691  generateCacheKey(overlaps_max_table_size_bytes,
692  *overlaps_threshold_override,
693  inverse_bucket_sizes,
694  fragments_per_device,
695  device_count_);
696  reifyImpl(columns_per_device,
697  query_info,
698  layout,
699  shard_count,
700  entry_count,
701  emitted_keys_count,
702  skip_hashtable_caching,
703  overlaps_max_table_size_bytes,
704  *overlaps_threshold_override);
705  } else {
706  double overlaps_bucket_threshold = std::numeric_limits<double>::max();
707  generateCacheKey(overlaps_max_table_size_bytes,
708  overlaps_bucket_threshold,
709  {},
710  fragments_per_device,
711  device_count_);
712  std::vector<size_t> per_device_chunk_key;
713  if (HashtableRecycler::isInvalidHashTableCacheKey(hashtable_cache_key_) &&
714  get_inner_table_id() > 0) {
715  for (int device_id = 0; device_id < device_count_; ++device_id) {
716  auto chunk_key_hash = boost::hash_value(composite_key_info_.cache_key_chunks);
717  boost::hash_combine(
718  chunk_key_hash,
719  HashJoin::collectFragmentIds(fragments_per_device[device_id]));
720  per_device_chunk_key.push_back(chunk_key_hash);
721  AlternativeCacheKeyForOverlapsHashJoin cache_key{
723  columns_per_device.front().join_columns.front().num_elems,
724  chunk_key_hash,
725  condition_->get_optype(),
726  overlaps_max_table_size_bytes,
727  overlaps_bucket_threshold,
728  {}};
729  hashtable_cache_key_[device_id] = getAlternativeCacheKey(cache_key);
730  hash_table_cache_->addQueryPlanDagForTableKeys(hashtable_cache_key_[device_id],
731  table_keys_);
732  }
733  }
734 
735  auto cached_bucket_threshold =
736  auto_tuner_cache_->getItemFromCache(hashtable_cache_key_.front(),
739  if (cached_bucket_threshold) {
740  overlaps_bucket_threshold = cached_bucket_threshold->bucket_threshold;
741  auto inverse_bucket_sizes = cached_bucket_threshold->bucket_sizes;
743  overlaps_max_table_size_bytes, overlaps_bucket_threshold, inverse_bucket_sizes);
744  generateCacheKey(overlaps_max_table_size_bytes,
745  overlaps_bucket_threshold,
746  inverse_bucket_sizes,
747  fragments_per_device,
748  device_count_);
749 
750  if (auto hash_table =
751  hash_table_cache_->getItemFromCache(hashtable_cache_key_[device_count_],
754  std::nullopt)) {
755  // if we already have a built hash table, we can skip the scans required for
756  // computing bucket size and tuple count
757  // reset as the hash table sizes can vary a bit
758  setInverseBucketSizeInfo(inverse_bucket_sizes, columns_per_device, device_count_);
759  CHECK(hash_table);
760 
761  VLOG(1) << "Using cached hash table bucket size";
762 
763  reifyImpl(columns_per_device,
764  query_info,
765  layout,
766  shard_count,
767  hash_table->getEntryCount(),
768  hash_table->getEmittedKeysCount(),
769  skip_hashtable_caching,
770  overlaps_max_table_size_bytes,
771  overlaps_bucket_threshold);
772  } else {
773  VLOG(1) << "Computing bucket size for cached bucket threshold";
774  // compute bucket size using our cached tuner value
775  BucketSizeTuner tuner(/*initial_threshold=*/overlaps_bucket_threshold,
776  /*step=*/1.0,
777  /*min_threshold=*/0.0,
779  columns_per_device,
781  total_num_tuples,
782  executor_);
783 
784  const auto inverse_bucket_sizes = tuner.getInverseBucketSizes();
785 
786  auto [entry_count, emitted_keys_count] =
787  computeHashTableCounts(shard_count,
788  inverse_bucket_sizes,
789  columns_per_device,
790  overlaps_max_table_size_bytes,
791  overlaps_bucket_threshold);
792  setInverseBucketSizeInfo(inverse_bucket_sizes, columns_per_device, device_count_);
793 
794  generateCacheKey(overlaps_max_table_size_bytes,
795  overlaps_bucket_threshold,
796  inverse_bucket_sizes,
797  fragments_per_device,
798  device_count_);
799 
800  reifyImpl(columns_per_device,
801  query_info,
802  layout,
803  shard_count,
804  entry_count,
805  emitted_keys_count,
806  skip_hashtable_caching,
807  overlaps_max_table_size_bytes,
808  overlaps_bucket_threshold);
809  }
810  } else {
811  // compute bucket size using the auto tuner
812  BucketSizeTuner tuner(
813  /*initial_threshold=*/overlaps_bucket_threshold,
814  /*step=*/2.0,
815  /*min_threshold=*/1e-7,
817  columns_per_device,
819  total_num_tuples,
820  executor_);
821 
822  VLOG(1) << "Running overlaps join size auto tune with parameters: " << tuner;
823 
824  // manages the tuning state machine
825  TuningState tuning_state(overlaps_max_table_size_bytes,
826  overlaps_target_entries_per_bin);
827  while (tuner.tuneOneStep(tuning_state.tuning_direction)) {
828  const auto inverse_bucket_sizes = tuner.getInverseBucketSizes();
829 
830  const auto [crt_entry_count, crt_emitted_keys_count] =
831  computeHashTableCounts(shard_count,
832  inverse_bucket_sizes,
833  columns_per_device,
834  tuning_state.overlaps_max_table_size_bytes,
835  tuning_state.chosen_overlaps_threshold);
836  const size_t hash_table_size = calculateHashTableSize(
837  inverse_bucket_sizes.size(), crt_emitted_keys_count, crt_entry_count);
838  HashTableProps crt_props(crt_entry_count,
839  crt_emitted_keys_count,
840  hash_table_size,
841  inverse_bucket_sizes);
842  VLOG(1) << "Tuner output: " << tuner << " with properties " << crt_props;
843 
844  const auto should_continue = tuning_state(crt_props, tuner.getMinBucketSize());
846  tuning_state.crt_props.bucket_sizes, columns_per_device, device_count_);
847  if (!should_continue) {
848  break;
849  }
850  }
851 
852  const auto& crt_props = tuning_state.crt_props;
853  // sanity check that the hash table size has not changed. this is a fairly
854  // inexpensive check to ensure the above algorithm is consistent
855  const size_t hash_table_size =
857  crt_props.emitted_keys_count,
858  crt_props.entry_count);
859  CHECK_EQ(crt_props.hash_table_size, hash_table_size);
860 
862  hash_table_size > overlaps_max_table_size_bytes) {
863  VLOG(1) << "Could not find suitable overlaps join parameters to create hash "
864  "table under max allowed size ("
865  << overlaps_max_table_size_bytes << ") bytes.";
866  throw OverlapsHashTableTooBig(overlaps_max_table_size_bytes);
867  }
868 
869  VLOG(1) << "Final tuner output: " << tuner << " with properties " << crt_props;
871  VLOG(1) << "Final bucket sizes: ";
872  for (size_t dim = 0; dim < inverse_bucket_sizes_for_dimension_.size(); dim++) {
873  VLOG(1) << "dim[" << dim
874  << "]: " << 1.0 / inverse_bucket_sizes_for_dimension_[dim];
875  }
876  CHECK_GE(tuning_state.chosen_overlaps_threshold, double(0));
877  generateCacheKey(tuning_state.overlaps_max_table_size_bytes,
878  tuning_state.chosen_overlaps_threshold,
879  {},
880  fragments_per_device,
881  device_count_);
882  const auto candidate_auto_tuner_cache_key = hashtable_cache_key_.front();
883  if (skip_hashtable_caching) {
884  VLOG(1) << "Skip to add tuned parameters to auto tuner";
885  } else {
886  AutoTunerMetaInfo meta_info{tuning_state.overlaps_max_table_size_bytes,
887  tuning_state.chosen_overlaps_threshold,
889  auto_tuner_cache_->putItemToCache(candidate_auto_tuner_cache_key,
890  meta_info,
893  0,
894  0);
895  }
896  overlaps_bucket_threshold = tuning_state.chosen_overlaps_threshold;
897  reifyImpl(columns_per_device,
898  query_info,
899  layout,
900  shard_count,
901  crt_props.entry_count,
902  crt_props.emitted_keys_count,
903  skip_hashtable_caching,
904  overlaps_max_table_size_bytes,
905  overlaps_bucket_threshold);
906  }
907  }
908 }
static std::vector< int > collectFragmentIds(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments)
Definition: HashJoin.cpp:446
#define CHECK_EQ(x, y)
Definition: Logger.h:230
Fragmenter_Namespace::TableInfo info
Definition: InputMetadata.h:35
static bool isInvalidHashTableCacheKey(const std::vector< QueryPlanHash > &cache_keys)
std::vector< ChunkKey > cache_key_chunks
Definition: HashJoin.h:119
virtual void reifyImpl(std::vector< ColumnsForDevice > &columns_per_device, const Fragmenter_Namespace::TableInfo &query_info, const HashType layout, const size_t shard_count, const size_t entry_count, const size_t emitted_keys_count, const bool skip_hashtable_caching, const size_t chosen_max_hashtable_size, const double chosen_bucket_threshold)
bool overlaps_allow_gpu_build
Definition: QueryHint.h:241
const RegisteredQueryHint & getRegisteredQueryHint()
RegisteredQueryHint query_hint_
#define CHECK_GE(x, y)
Definition: Logger.h:235
static std::unique_ptr< OverlapsTuningParamRecycler > auto_tuner_cache_
virtual std::pair< size_t, size_t > computeHashTableCounts(const size_t shard_count, const std::vector< double > &inverse_bucket_sizes_for_dimension, std::vector< ColumnsForDevice > &columns_per_device, const size_t chosen_max_hashtable_size, const double chosen_bucket_threshold)
size_t calculateHashTableSize(size_t number_of_dimensions, size_t emitted_keys_count, size_t entry_count) const
const std::shared_ptr< Analyzer::BinOper > condition_
QueryPlanHash getAlternativeCacheKey(AlternativeCacheKeyForOverlapsHashJoin &info)
const std::vector< InputTableInfo > & query_infos_
std::vector< Fragmenter_Namespace::FragmentInfo > only_shards_for_device(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, const int device_count)
DEVICE auto accumulate(ARGS &&...args)
Definition: gpu_enabled.h:42
double g_overlaps_target_entries_per_bin
Definition: Execute.cpp:106
HashTableBuildDagMap hashtable_build_dag_map_
size_t g_overlaps_max_table_size_bytes
Definition: Execute.cpp:105
std::vector< QueryPlanHash > hashtable_cache_key_
virtual int getInnerTableId() const noexcept=0
void setOverlapsHashtableMetaInfo(size_t max_table_size_bytes, double bucket_threshold, std::vector< double > &bucket_sizes)
const InputTableInfo & get_inner_query_info(const int inner_table_id, const std::vector< InputTableInfo > &query_infos)
std::vector< double > inverse_bucket_sizes_for_dimension_
static std::string getHashTypeString(HashType ht) noexcept
Definition: HashJoin.h:154
static std::unordered_set< size_t > getAlternativeTableKeys(const std::vector< ChunkKey > &chunk_keys, int db_id, int inner_table_id)
Definition: DataRecycler.h:154
void setInverseBucketSizeInfo(const std::vector< double > &inverse_bucket_sizes, std::vector< ColumnsForDevice > &columns_per_device, const size_t device_count)
bool isHintRegistered(const QueryHint hint) const
Definition: QueryHint.h:266
ColumnsForDevice fetchColumnsForDevice(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, DeviceAllocator *dev_buff_owner)
CUstream getQueryEngineCudaStreamForDevice(int device_num)
Definition: QueryEngine.cpp:7
HashtableCacheMetaInfo hashtable_cache_meta_info_
#define CHECK(condition)
Definition: Logger.h:222
#define DEBUG_TIMER(name)
Definition: Logger.h:371
const Data_Namespace::MemoryLevel memory_level_
void generateCacheKey(const size_t max_hashtable_size, const double bucket_threshold, const std::vector< double > &bucket_sizes, std::vector< std::vector< Fragmenter_Namespace::FragmentInfo >> &fragments_per_device, int device_count)
static std::unique_ptr< HashtableRecycler > hash_table_cache_
std::vector< InnerOuter > inner_outer_pairs_
std::unordered_set< size_t > table_keys_
Data_Namespace::MemoryLevel getEffectiveMemoryLevel(const std::vector< InnerOuter > &inner_outer_pairs) const
static constexpr DeviceIdentifier CPU_DEVICE_IDENTIFIER
Definition: DataRecycler.h:136
static HashtableAccessPathInfo getHashtableAccessPathInfo(const std::vector< InnerOuter > &inner_outer_pairs, const std::vector< InnerOuterStringOpInfos > &inner_outer_string_op_infos_pairs, const SQLOps op_type, const JoinType join_type, const HashTableBuildDagMap &hashtable_build_dag_map, int device_count, int shard_count, const std::vector< std::vector< Fragmenter_Namespace::FragmentInfo >> &frags_for_device, Executor *executor)
CompositeKeyInfo composite_key_info_
#define VLOG(n)
Definition: Logger.h:316
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
Definition: HashJoin.h:150

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void OverlapsJoinHashTable::setInverseBucketSizeInfo ( const std::vector< double > &  inverse_bucket_sizes,
std::vector< ColumnsForDevice > &  columns_per_device,
const size_t  device_count 
)
protected

Definition at line 1151 of file OverlapsJoinHashTable.cpp.

References CHECK_EQ, inner_outer_pairs_, and inverse_bucket_sizes_for_dimension_.

Referenced by RangeJoinHashTable::reifyWithLayout(), and reifyWithLayout().

1154  {
1155  // set global bucket size
1156  inverse_bucket_sizes_for_dimension_ = inverse_bucket_sizes;
1157 
1158  // re-compute bucket counts per device based on global bucket size
1159  CHECK_EQ(columns_per_device.size(), static_cast<size_t>(device_count));
1160  for (size_t device_id = 0; device_id < device_count; ++device_id) {
1161  auto& columns_for_device = columns_per_device[device_id];
1162  columns_for_device.setBucketInfo(inverse_bucket_sizes_for_dimension_,
1164  }
1165 }
#define CHECK_EQ(x, y)
Definition: Logger.h:230
std::vector< double > inverse_bucket_sizes_for_dimension_
std::vector< InnerOuter > inner_outer_pairs_

+ Here is the caller graph for this function:

void OverlapsJoinHashTable::setOverlapsHashtableMetaInfo ( size_t  max_table_size_bytes,
double  bucket_threshold,
std::vector< double > &  bucket_sizes 
)
inlineprotected

Definition at line 367 of file OverlapsJoinHashTable.h.

References OverlapsHashTableMetaInfo::bucket_sizes, hashtable_cache_meta_info_, OverlapsHashTableMetaInfo::overlaps_bucket_threshold, OverlapsHashTableMetaInfo::overlaps_max_table_size_bytes, and HashtableCacheMetaInfo::overlaps_meta_info.

Referenced by reifyImpl(), RangeJoinHashTable::reifyWithLayout(), and reifyWithLayout().

369  {
370  OverlapsHashTableMetaInfo overlaps_meta_info;
371  overlaps_meta_info.bucket_sizes = bucket_sizes;
372  overlaps_meta_info.overlaps_max_table_size_bytes = max_table_size_bytes;
373  overlaps_meta_info.overlaps_bucket_threshold = bucket_threshold;
374  HashtableCacheMetaInfo meta_info;
375  meta_info.overlaps_meta_info = overlaps_meta_info;
376  hashtable_cache_meta_info_ = meta_info;
377  }
std::optional< OverlapsHashTableMetaInfo > overlaps_meta_info
HashtableCacheMetaInfo hashtable_cache_meta_info_
std::vector< double > bucket_sizes

+ Here is the caller graph for this function:

size_t OverlapsJoinHashTable::shardCount ( ) const
inlineprotected

Definition at line 238 of file OverlapsJoinHashTable.h.

References condition_, executor_, BaselineJoinHashTable::getShardCountForCondition(), Data_Namespace::GPU_LEVEL, inner_outer_pairs_, and memory_level_.

Referenced by RangeJoinHashTable::reifyWithLayout(), and reifyWithLayout().

238  {
240  return 0;
241  }
244  }
const std::shared_ptr< Analyzer::BinOper > condition_
const Data_Namespace::MemoryLevel memory_level_
std::vector< InnerOuter > inner_outer_pairs_
static size_t getShardCountForCondition(const Analyzer::BinOper *condition, const Executor *executor, const std::vector< InnerOuter > &inner_outer_pairs)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::set< DecodedJoinHashBufferEntry > OverlapsJoinHashTable::toSet ( const ExecutorDeviceType  device_type,
const int  device_id 
) const
overrideprotectedvirtual

Implements HashJoin.

Definition at line 1778 of file OverlapsJoinHashTable.cpp.

References CHECK, countBufferOff(), executor_, HashJoin::getHashTableForDevice(), getHashType(), HashJoin::getJoinHashBuffer(), getKeyComponentCount(), getKeyComponentWidth(), getQueryEngineCudaStreamForDevice(), GPU, offsetBufferOff(), OneToOne, payloadBufferOff(), and HashTable::toSet().

1780  {
1781  auto buffer = getJoinHashBuffer(device_type, device_id);
1782  auto hash_table = getHashTableForDevice(device_id);
1783  CHECK(hash_table);
1784  auto buffer_size = hash_table->getHashTableBufferSize(device_type);
1785 #ifdef HAVE_CUDA
1786  std::unique_ptr<int8_t[]> buffer_copy;
1787  if (device_type == ExecutorDeviceType::GPU) {
1788  buffer_copy = std::make_unique<int8_t[]>(buffer_size);
1789  CHECK(executor_);
1790  auto data_mgr = executor_->getDataMgr();
1791  auto allocator = std::make_unique<CudaAllocator>(
1792  data_mgr, device_id, getQueryEngineCudaStreamForDevice(device_id));
1793 
1794  allocator->copyFromDevice(buffer_copy.get(), buffer, buffer_size);
1795  }
1796  auto ptr1 = buffer_copy ? buffer_copy.get() : reinterpret_cast<const int8_t*>(buffer);
1797 #else
1798  auto ptr1 = reinterpret_cast<const int8_t*>(buffer);
1799 #endif // HAVE_CUDA
1800  auto ptr2 = ptr1 + offsetBufferOff();
1801  auto ptr3 = ptr1 + countBufferOff();
1802  auto ptr4 = ptr1 + payloadBufferOff();
1803  const auto layout = getHashType();
1804  return HashTable::toSet(getKeyComponentCount() + (layout == HashType::OneToOne ? 1 : 0),
1806  hash_table->getEntryCount(),
1807  ptr1,
1808  ptr2,
1809  ptr3,
1810  ptr4,
1811  buffer_size);
1812 }
HashType getHashType() const noexceptoverride
size_t payloadBufferOff() const noexceptoverride
int8_t * getJoinHashBuffer(const ExecutorDeviceType device_type, const int device_id) const
Definition: HashJoin.h:288
HashTable * getHashTableForDevice(const size_t device_id) const
Definition: HashJoin.h:269
CUstream getQueryEngineCudaStreamForDevice(int device_num)
Definition: QueryEngine.cpp:7
size_t offsetBufferOff() const noexceptoverride
size_t countBufferOff() const noexceptoverride
#define CHECK(condition)
Definition: Logger.h:222
static DecodedJoinHashBufferSet toSet(size_t key_component_count, size_t key_component_width, size_t entry_count, const int8_t *ptr1, const int8_t *ptr2, const int8_t *ptr3, const int8_t *ptr4, size_t buffer_size)
Decode hash table into a std::set for easy inspection and validation.
Definition: HashTable.cpp:139

+ Here is the call graph for this function:

std::string OverlapsJoinHashTable::toString ( const ExecutorDeviceType  device_type,
const int  device_id = 0,
bool  raw = false 
) const
overrideprotectedvirtual

Implements HashJoin.

Definition at line 1736 of file OverlapsJoinHashTable.cpp.

References CHECK, CHECK_LT, countBufferOff(), executor_, getHashType(), HashJoin::getHashTypeString(), HashJoin::getJoinHashBuffer(), getKeyComponentCount(), getKeyComponentWidth(), getQueryEngineCudaStreamForDevice(), GPU, HashJoin::hash_tables_for_device_, offsetBufferOff(), OneToOne, payloadBufferOff(), and HashTable::toString().

1738  {
1739  auto buffer = getJoinHashBuffer(device_type, device_id);
1740  CHECK_LT(static_cast<size_t>(device_id), hash_tables_for_device_.size());
1741  auto hash_table = hash_tables_for_device_[device_id];
1742  CHECK(hash_table);
1743  auto buffer_size = hash_table->getHashTableBufferSize(device_type);
1744 #ifdef HAVE_CUDA
1745  std::unique_ptr<int8_t[]> buffer_copy;
1746  if (device_type == ExecutorDeviceType::GPU) {
1747  buffer_copy = std::make_unique<int8_t[]>(buffer_size);
1748  CHECK(executor_);
1749  auto data_mgr = executor_->getDataMgr();
1750  auto device_allocator = std::make_unique<CudaAllocator>(
1751  data_mgr, device_id, getQueryEngineCudaStreamForDevice(device_id));
1752 
1753  device_allocator->copyFromDevice(buffer_copy.get(), buffer, buffer_size);
1754  }
1755  auto ptr1 = buffer_copy ? buffer_copy.get() : reinterpret_cast<const int8_t*>(buffer);
1756 #else
1757  auto ptr1 = reinterpret_cast<const int8_t*>(buffer);
1758 #endif // HAVE_CUDA
1759  auto ptr2 = ptr1 + offsetBufferOff();
1760  auto ptr3 = ptr1 + countBufferOff();
1761  auto ptr4 = ptr1 + payloadBufferOff();
1762  CHECK(hash_table);
1763  const auto layout = getHashType();
1764  return HashTable::toString(
1765  "geo",
1766  getHashTypeString(layout),
1767  getKeyComponentCount() + (layout == HashType::OneToOne ? 1 : 0),
1769  hash_table->getEntryCount(),
1770  ptr1,
1771  ptr2,
1772  ptr3,
1773  ptr4,
1774  buffer_size,
1775  raw);
1776 }
std::vector< std::shared_ptr< HashTable > > hash_tables_for_device_
Definition: HashJoin.h:351
HashType getHashType() const noexceptoverride
size_t payloadBufferOff() const noexceptoverride
int8_t * getJoinHashBuffer(const ExecutorDeviceType device_type, const int device_id) const
Definition: HashJoin.h:288
#define CHECK_LT(x, y)
Definition: Logger.h:232
static std::string getHashTypeString(HashType ht) noexcept
Definition: HashJoin.h:154
static std::string toString(const std::string &type, const std::string &layout_type, size_t key_component_count, size_t key_component_width, size_t entry_count, const int8_t *ptr1, const int8_t *ptr2, const int8_t *ptr3, const int8_t *ptr4, size_t buffer_size, bool raw=false)
Decode hash table into a human-readable string.
Definition: HashTable.cpp:226
CUstream getQueryEngineCudaStreamForDevice(int device_num)
Definition: QueryEngine.cpp:7
size_t offsetBufferOff() const noexceptoverride
size_t countBufferOff() const noexceptoverride
#define CHECK(condition)
Definition: Logger.h:222

+ Here is the call graph for this function:

Member Data Documentation

std::unique_ptr< OverlapsTuningParamRecycler > OverlapsJoinHashTable::auto_tuner_cache_
staticprotected
Initial value:
=
std::make_unique<OverlapsTuningParamRecycler>()

Definition at line 409 of file OverlapsJoinHashTable.h.

Referenced by getOverlapsTuningParamCache(), invalidateCache(), markCachedItemAsDirty(), and reifyWithLayout().

double OverlapsJoinHashTable::chosen_overlaps_bucket_threshold_
protected

Definition at line 391 of file OverlapsJoinHashTable.h.

Referenced by reifyImpl().

size_t OverlapsJoinHashTable::chosen_overlaps_max_table_size_bytes_
protected

Definition at line 392 of file OverlapsJoinHashTable.h.

Referenced by reifyImpl().

ColumnCacheMap& OverlapsJoinHashTable::column_cache_
protected

Definition at line 385 of file OverlapsJoinHashTable.h.

Referenced by fetchColumnsForDevice().

CompositeKeyInfo OverlapsJoinHashTable::composite_key_info_
protected
const std::shared_ptr<Analyzer::BinOper> OverlapsJoinHashTable::condition_
protected
std::mutex OverlapsJoinHashTable::cpu_hash_table_buff_mutex_
protected
HashTableBuildDagMap OverlapsJoinHashTable::hashtable_build_dag_map_
protected
HashtableCacheMetaInfo OverlapsJoinHashTable::hashtable_cache_meta_info_
protected
const JoinType OverlapsJoinHashTable::join_type_
protected
std::optional<HashType> OverlapsJoinHashTable::layout_override_
protected
const std::vector<InputTableInfo>& OverlapsJoinHashTable::query_infos_
protected
QueryPlanDAG OverlapsJoinHashTable::query_plan_dag_
protected

Definition at line 413 of file OverlapsJoinHashTable.h.

const TableIdToNodeMap OverlapsJoinHashTable::table_id_to_node_map_
protected

Definition at line 417 of file OverlapsJoinHashTable.h.

std::unordered_set<size_t> OverlapsJoinHashTable::table_keys_
protected

The documentation for this class was generated from the following files: