OmniSciDB  d2f719934e
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
OverlapsJoinHashTable Class Reference

#include <OverlapsJoinHashTable.h>

+ Inheritance diagram for OverlapsJoinHashTable:
+ Collaboration diagram for OverlapsJoinHashTable:

Classes

struct  AlternativeCacheKeyForOverlapsHashJoin
 

Public Member Functions

 OverlapsJoinHashTable (const std::shared_ptr< Analyzer::BinOper > condition, const JoinType join_type, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, ColumnCacheMap &column_cache, Executor *executor, const std::vector< InnerOuter > &inner_outer_pairs, const int device_count, QueryPlan query_plan_dag, HashtableCacheMetaInfo hashtable_cache_meta_info, const TableIdToNodeMap &table_id_to_node_map)
 
virtual ~OverlapsJoinHashTable ()
 
- Public Member Functions inherited from HashJoin
virtual std::string toStringFlat64 (const ExecutorDeviceType device_type, const int device_id) const
 
virtual std::string toStringFlat32 (const ExecutorDeviceType device_type, const int device_id) const
 
JoinColumn fetchJoinColumn (const Analyzer::ColumnVar *hash_col, const std::vector< Fragmenter_Namespace::FragmentInfo > &fragment_info, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks_owner, DeviceAllocator *dev_buff_owner, std::vector< std::shared_ptr< void >> &malloc_owner, Executor *executor, ColumnCacheMap *column_cache)
 
HashTablegetHashTableForDevice (const size_t device_id) const
 
size_t getJoinHashBufferSize (const ExecutorDeviceType device_type)
 
size_t getJoinHashBufferSize (const ExecutorDeviceType device_type, const int device_id) const
 
int8_t * getJoinHashBuffer (const ExecutorDeviceType device_type, const int device_id) const
 
void freeHashBufferMemory ()
 

Static Public Member Functions

static std::shared_ptr
< OverlapsJoinHashTable
getInstance (const std::shared_ptr< Analyzer::BinOper > condition, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)
 Make hash table from an in-flight SQL query's parse tree etc. More...
 
static auto getCacheInvalidator () -> std::function< void()>
 
static HashtableRecyclergetHashTableCache ()
 
static
OverlapsTuningParamRecycler
getOverlapsTuningParamCache ()
 
- Static Public Member Functions inherited from HashJoin
static bool layoutRequiresAdditionalBuffers (HashType layout) noexcept
 
static std::string getHashTypeString (HashType ht) noexcept
 
static HashJoinMatchingSet codegenMatchingSet (const std::vector< llvm::Value * > &hash_join_idx_args_in, const bool is_sharded, const bool col_is_nullable, const bool is_bw_eq, const int64_t sub_buff_size, Executor *executor, const bool is_bucketized=false)
 
static llvm::Value * codegenHashTableLoad (const size_t table_idx, Executor *executor)
 
static std::shared_ptr< HashJoingetInstance (const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)
 Make hash table from an in-flight SQL query's parse tree etc. More...
 
static std::shared_ptr< HashJoingetSyntheticInstance (std::string_view table1, std::string_view column1, std::string_view table2, std::string_view column2, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
 Make hash table from named tables and columns (such as for testing). More...
 
static std::shared_ptr< HashJoingetSyntheticInstance (const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
 Make hash table from named tables and columns (such as for testing). More...
 
static std::pair< std::string,
std::shared_ptr< HashJoin > > 
getSyntheticInstance (std::vector< std::shared_ptr< Analyzer::BinOper >>, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
 
static int getInnerTableId (const std::vector< InnerOuter > &inner_outer_pairs)
 
static void checkHashJoinReplicationConstraint (const int table_id, const size_t shard_count, const Executor *executor)
 
static InnerOuter normalizeColumnPair (const Analyzer::Expr *lhs, const Analyzer::Expr *rhs, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables, const bool is_overlaps_join=false)
 
static std::vector< InnerOuternormalizeColumnPairs (const Analyzer::BinOper *condition, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables)
 
static CompositeKeyInfo getCompositeKeyInfo (const std::vector< InnerOuter > &inner_outer_pairs, const Executor *executor)
 

Protected Member Functions

void reify (const HashType preferred_layout)
 
virtual void reifyWithLayout (const HashType layout)
 
virtual void reifyImpl (std::vector< ColumnsForDevice > &columns_per_device, const Fragmenter_Namespace::TableInfo &query_info, const HashType layout, const size_t shard_count, const size_t entry_count, const size_t emitted_keys_count, const bool skip_hashtable_caching, const size_t chosen_max_hashtable_size, const double chosen_bucket_threshold)
 
void reifyForDevice (const ColumnsForDevice &columns_for_device, const HashType layout, const size_t entry_count, const size_t emitted_keys_count, const bool skip_hashtable_caching, const int device_id, const logger::ThreadId parent_thread_id)
 
size_t calculateHashTableSize (size_t number_of_dimensions, size_t emitted_keys_count, size_t entry_count) const
 
ColumnsForDevice fetchColumnsForDevice (const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, DeviceAllocator *dev_buff_owner)
 
virtual std::pair< size_t, size_t > approximateTupleCount (const std::vector< double > &inverse_bucket_sizes_for_dimension, std::vector< ColumnsForDevice > &, const size_t chosen_max_hashtable_size, const double chosen_bucket_threshold)
 
virtual std::pair< size_t, size_t > computeHashTableCounts (const size_t shard_count, const std::vector< double > &inverse_bucket_sizes_for_dimension, std::vector< ColumnsForDevice > &columns_per_device, const size_t chosen_max_hashtable_size, const double chosen_bucket_threshold)
 
void setInverseBucketSizeInfo (const std::vector< double > &inverse_bucket_sizes, std::vector< ColumnsForDevice > &columns_per_device, const size_t device_count)
 
size_t getKeyComponentWidth () const
 
size_t getKeyComponentCount () const
 
HashType getHashType () const noexceptoverride
 
Data_Namespace::MemoryLevel getMemoryLevel () const noexceptoverride
 
int getDeviceCount () const noexceptoverride
 
std::shared_ptr
< BaselineHashTable
initHashTableOnCpu (const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_bucket_info, const HashType layout, const size_t entry_count, const size_t emitted_keys_count, const bool skip_hashtable_caching)
 
HashJoinMatchingSet codegenMatchingSet (const CompilationOptions &, const size_t) override
 
std::string toString (const ExecutorDeviceType device_type, const int device_id=0, bool raw=false) const override
 
DecodedJoinHashBufferSet toSet (const ExecutorDeviceType device_type, const int device_id) const override
 
llvm::Value * codegenSlot (const CompilationOptions &, const size_t) override
 
const RegisteredQueryHintgetRegisteredQueryHint ()
 
void registerQueryHint (const RegisteredQueryHint &query_hint)
 
size_t getEntryCount () const
 
size_t getEmittedKeysCount () const
 
size_t getComponentBufferSize () const noexceptoverride
 
size_t shardCount () const
 
Data_Namespace::MemoryLevel getEffectiveMemoryLevel (const std::vector< InnerOuter > &inner_outer_pairs) const
 
int getInnerTableId () const noexceptoverride
 
int getInnerTableRteIdx () const noexceptoverride
 
size_t getKeyBufferSize () const noexcept
 
size_t offsetBufferOff () const noexceptoverride
 
size_t countBufferOff () const noexceptoverride
 
size_t payloadBufferOff () const noexceptoverride
 
std::string getHashJoinType () const final
 
bool isBitwiseEq () const override
 
std::shared_ptr< HashTableinitHashTableOnCpuFromCache (QueryPlanHash key, CacheItemType item_type, DeviceIdentifier device_identifier)
 
std::optional< std::pair
< size_t, size_t > > 
getApproximateTupleCountFromCache (QueryPlanHash key, CacheItemType item_type, DeviceIdentifier device_identifier)
 
void putHashTableOnCpuToCache (QueryPlanHash key, CacheItemType item_type, std::shared_ptr< HashTable > hashtable_ptr, DeviceIdentifier device_identifier, size_t hashtable_building_time)
 
llvm::Value * codegenKey (const CompilationOptions &)
 
std::vector< llvm::Value * > codegenManyKey (const CompilationOptions &)
 
std::optional
< OverlapsHashTableMetaInfo
getOverlapsHashTableMetaInfo ()
 
QueryPlanHash getAlternativeCacheKey (AlternativeCacheKeyForOverlapsHashJoin &info)
 
void generateCacheKey (const size_t max_hashtable_size, const double bucket_threshold)
 
QueryPlanHash getCacheKey () const
 
const std::vector< InnerOuter > & getInnerOuterPairs () const
 
void setOverlapsHashtableMetaInfo (size_t max_table_size_bytes, double bucket_threshold, std::vector< double > &bucket_sizes)
 

Protected Attributes

const std::shared_ptr
< Analyzer::BinOper
condition_
 
const JoinType join_type_
 
const std::vector
< InputTableInfo > & 
query_infos_
 
const Data_Namespace::MemoryLevel memory_level_
 
Executorexecutor_
 
ColumnCacheMapcolumn_cache_
 
std::vector< InnerOuterinner_outer_pairs_
 
const int device_count_
 
std::vector< double > inverse_bucket_sizes_for_dimension_
 
double chosen_overlaps_bucket_threshold_
 
size_t chosen_overlaps_max_table_size_bytes_
 
CompositeKeyInfo composite_key_info_
 
std::optional< HashTypelayout_override_
 
std::mutex cpu_hash_table_buff_mutex_
 
RegisteredQueryHint query_hint_
 
QueryPlan query_plan_dag_
 
const TableIdToNodeMap table_id_to_node_map_
 
QueryPlanHash hashtable_cache_key_
 
HashtableCacheMetaInfo hashtable_cache_meta_info_
 
- Protected Attributes inherited from HashJoin
std::vector< std::shared_ptr
< HashTable > > 
hash_tables_for_device_
 

Static Protected Attributes

static std::unique_ptr
< HashtableRecycler
hash_table_cache_
 
static std::unique_ptr
< OverlapsTuningParamRecycler
auto_tuner_cache_
 

Detailed Description

Definition at line 24 of file OverlapsJoinHashTable.h.

Constructor & Destructor Documentation

OverlapsJoinHashTable::OverlapsJoinHashTable ( const std::shared_ptr< Analyzer::BinOper condition,
const JoinType  join_type,
const std::vector< InputTableInfo > &  query_infos,
const Data_Namespace::MemoryLevel  memory_level,
ColumnCacheMap column_cache,
Executor executor,
const std::vector< InnerOuter > &  inner_outer_pairs,
const int  device_count,
QueryPlan  query_plan_dag,
HashtableCacheMetaInfo  hashtable_cache_meta_info,
const TableIdToNodeMap table_id_to_node_map 
)
inline

Definition at line 26 of file OverlapsJoinHashTable.h.

References CHECK_GT, RegisteredQueryHint::defaults(), device_count_, HashJoin::hash_tables_for_device_, and query_hint_.

37  : condition_(condition)
38  , join_type_(join_type)
39  , query_infos_(query_infos)
40  , memory_level_(memory_level)
41  , executor_(executor)
42  , column_cache_(column_cache)
43  , inner_outer_pairs_(inner_outer_pairs)
44  , device_count_(device_count)
45  , query_plan_dag_(query_plan_dag)
46  , table_id_to_node_map_(table_id_to_node_map)
48  , hashtable_cache_meta_info_(hashtable_cache_meta_info) {
50  hash_tables_for_device_.resize(std::max(device_count_, 1));
52  }
const TableIdToNodeMap table_id_to_node_map_
constexpr QueryPlanHash EMPTY_HASHED_PLAN_DAG_KEY
std::vector< std::shared_ptr< HashTable > > hash_tables_for_device_
Definition: HashJoin.h:296
RegisteredQueryHint query_hint_
#define CHECK_GT(x, y)
Definition: Logger.h:223
const std::shared_ptr< Analyzer::BinOper > condition_
ColumnCacheMap & column_cache_
const std::vector< InputTableInfo > & query_infos_
static RegisteredQueryHint defaults()
Definition: QueryHint.h:222
HashtableCacheMetaInfo hashtable_cache_meta_info_
const Data_Namespace::MemoryLevel memory_level_
std::vector< InnerOuter > inner_outer_pairs_

+ Here is the call graph for this function:

virtual OverlapsJoinHashTable::~OverlapsJoinHashTable ( )
inlinevirtual

Definition at line 54 of file OverlapsJoinHashTable.h.

54 {}

Member Function Documentation

std::pair< size_t, size_t > OverlapsJoinHashTable::approximateTupleCount ( const std::vector< double > &  inverse_bucket_sizes_for_dimension,
std::vector< ColumnsForDevice > &  columns_per_device,
const size_t  chosen_max_hashtable_size,
const double  chosen_bucket_threshold 
)
protectedvirtual

Reimplemented in RangeJoinHashTable.

Definition at line 942 of file OverlapsJoinHashTable.cpp.

References gpu_enabled::accumulate(), approximate_distinct_tuples_on_device_overlaps(), approximate_distinct_tuples_overlaps(), threading_serial::async(), Bitmap, CHECK, CHECK_EQ, CHECK_GT, CPU, DataRecyclerUtil::CPU_DEVICE_IDENTIFIER, Data_Namespace::CPU_LEVEL, cpu_threads(), device_count_, executor_, getApproximateTupleCountFromCache(), getEffectiveMemoryLevel(), GPU, Data_Namespace::GPU_LEVEL, hashtable_cache_key_, hll_size(), hll_unify(), i, inner_outer_pairs_, OVERLAPS_HT, transfer_flat_object_to_gpu(), transfer_vector_of_flat_objects_to_gpu(), UNREACHABLE, and VLOG.

Referenced by computeHashTableCounts().

946  {
947  const auto effective_memory_level = getEffectiveMemoryLevel(inner_outer_pairs_);
948  CountDistinctDescriptor count_distinct_desc{
950  0,
951  11,
952  true,
953  effective_memory_level == Data_Namespace::MemoryLevel::GPU_LEVEL
956  1};
957  const auto padded_size_bytes = count_distinct_desc.bitmapPaddedSizeBytes();
958 
959  CHECK(!columns_per_device.empty() && !columns_per_device.front().join_columns.empty());
960  if (columns_per_device.front().join_columns.front().num_elems == 0) {
961  return std::make_pair(0, 0);
962  }
963 
964  // TODO: state management in here should be revisited, but this should be safe enough
965  // for now
966  // re-compute bucket counts per device based on global bucket size
967  for (size_t device_id = 0; device_id < columns_per_device.size(); ++device_id) {
968  auto& columns_for_device = columns_per_device[device_id];
969  columns_for_device.setBucketInfo(inverse_bucket_sizes_for_dimension,
971  }
972 
973  // Number of keys must match dimension of buckets
974  CHECK_EQ(columns_per_device.front().join_columns.size(),
975  columns_per_device.front().join_buckets.size());
976  if (effective_memory_level == Data_Namespace::MemoryLevel::CPU_LEVEL) {
977  // Note that this path assumes each device has the same hash table (for GPU hash join
978  // w/ hash table built on CPU)
979  const auto cached_count_info =
983  if (cached_count_info) {
984  VLOG(1) << "Using a cached tuple count: " << cached_count_info->first
985  << ", emitted keys count: " << cached_count_info->second;
986  return *cached_count_info;
987  }
988  int thread_count = cpu_threads();
989  std::vector<uint8_t> hll_buffer_all_cpus(thread_count * padded_size_bytes);
990  auto hll_result = &hll_buffer_all_cpus[0];
991 
992  std::vector<int32_t> num_keys_for_row;
993  // TODO(adb): support multi-column overlaps join
994  num_keys_for_row.resize(columns_per_device.front().join_columns[0].num_elems);
995 
997  num_keys_for_row,
998  count_distinct_desc.bitmap_sz_bits,
999  padded_size_bytes,
1000  columns_per_device.front().join_columns,
1001  columns_per_device.front().join_column_types,
1002  columns_per_device.front().join_buckets,
1003  thread_count);
1004  for (int i = 1; i < thread_count; ++i) {
1005  hll_unify(hll_result,
1006  hll_result + i * padded_size_bytes,
1007  1 << count_distinct_desc.bitmap_sz_bits);
1008  }
1009  return std::make_pair(
1010  hll_size(hll_result, count_distinct_desc.bitmap_sz_bits),
1011  static_cast<size_t>(num_keys_for_row.size() > 0 ? num_keys_for_row.back() : 0));
1012  }
1013 #ifdef HAVE_CUDA
1014  auto data_mgr = executor_->getDataMgr();
1015  std::vector<std::vector<uint8_t>> host_hll_buffers(device_count_);
1016  for (auto& host_hll_buffer : host_hll_buffers) {
1017  host_hll_buffer.resize(count_distinct_desc.bitmapPaddedSizeBytes());
1018  }
1019  std::vector<size_t> emitted_keys_count_device_threads(device_count_, 0);
1020  std::vector<std::future<void>> approximate_distinct_device_threads;
1021  for (int device_id = 0; device_id < device_count_; ++device_id) {
1022  approximate_distinct_device_threads.emplace_back(std::async(
1024  [device_id,
1025  &columns_per_device,
1026  &count_distinct_desc,
1027  data_mgr,
1028  &host_hll_buffers,
1029  &emitted_keys_count_device_threads] {
1030  auto allocator = data_mgr->createGpuAllocator(device_id);
1031  auto device_hll_buffer =
1032  allocator->alloc(count_distinct_desc.bitmapPaddedSizeBytes());
1033  data_mgr->getCudaMgr()->zeroDeviceMem(
1034  device_hll_buffer, count_distinct_desc.bitmapPaddedSizeBytes(), device_id);
1035  const auto& columns_for_device = columns_per_device[device_id];
1036  auto join_columns_gpu = transfer_vector_of_flat_objects_to_gpu(
1037  columns_for_device.join_columns, *allocator);
1038 
1039  CHECK_GT(columns_for_device.join_buckets.size(), 0u);
1040  const auto& inverse_bucket_sizes_for_dimension =
1041  columns_for_device.join_buckets[0].inverse_bucket_sizes_for_dimension;
1042  auto inverse_bucket_sizes_gpu = allocator->alloc(
1043  inverse_bucket_sizes_for_dimension.size() * sizeof(double));
1044  allocator->copyToDevice(
1045  inverse_bucket_sizes_gpu,
1046  inverse_bucket_sizes_for_dimension.data(),
1047  inverse_bucket_sizes_for_dimension.size() * sizeof(double));
1048  const size_t row_counts_buffer_sz =
1049  columns_per_device.front().join_columns[0].num_elems * sizeof(int32_t);
1050  auto row_counts_buffer = allocator->alloc(row_counts_buffer_sz);
1051  data_mgr->getCudaMgr()->zeroDeviceMem(
1052  row_counts_buffer, row_counts_buffer_sz, device_id);
1053  const auto key_handler =
1054  OverlapsKeyHandler(inverse_bucket_sizes_for_dimension.size(),
1055  join_columns_gpu,
1056  reinterpret_cast<double*>(inverse_bucket_sizes_gpu));
1057  const auto key_handler_gpu =
1058  transfer_flat_object_to_gpu(key_handler, *allocator);
1060  reinterpret_cast<uint8_t*>(device_hll_buffer),
1061  count_distinct_desc.bitmap_sz_bits,
1062  reinterpret_cast<int32_t*>(row_counts_buffer),
1063  key_handler_gpu,
1064  columns_for_device.join_columns[0].num_elems);
1065 
1066  auto& host_emitted_keys_count = emitted_keys_count_device_threads[device_id];
1067  allocator->copyFromDevice(
1068  &host_emitted_keys_count,
1069  row_counts_buffer +
1070  (columns_per_device.front().join_columns[0].num_elems - 1) *
1071  sizeof(int32_t),
1072  sizeof(int32_t));
1073 
1074  auto& host_hll_buffer = host_hll_buffers[device_id];
1075  allocator->copyFromDevice(&host_hll_buffer[0],
1076  device_hll_buffer,
1077  count_distinct_desc.bitmapPaddedSizeBytes());
1078  }));
1079  }
1080  for (auto& child : approximate_distinct_device_threads) {
1081  child.get();
1082  }
1083  CHECK_EQ(Data_Namespace::MemoryLevel::GPU_LEVEL, effective_memory_level);
1084  auto& result_hll_buffer = host_hll_buffers.front();
1085  auto hll_result = reinterpret_cast<int32_t*>(&result_hll_buffer[0]);
1086  for (int device_id = 1; device_id < device_count_; ++device_id) {
1087  auto& host_hll_buffer = host_hll_buffers[device_id];
1088  hll_unify(hll_result,
1089  reinterpret_cast<int32_t*>(&host_hll_buffer[0]),
1090  1 << count_distinct_desc.bitmap_sz_bits);
1091  }
1092  const size_t emitted_keys_count =
1093  std::accumulate(emitted_keys_count_device_threads.begin(),
1094  emitted_keys_count_device_threads.end(),
1095  0);
1096  return std::make_pair(hll_size(hll_result, count_distinct_desc.bitmap_sz_bits),
1097  emitted_keys_count);
1098 #else
1099  UNREACHABLE();
1100  return {0, 0};
1101 #endif // HAVE_CUDA
1102 }
#define CHECK_EQ(x, y)
Definition: Logger.h:219
T * transfer_flat_object_to_gpu(const T &object, DeviceAllocator &allocator)
void hll_unify(T1 *lhs, T2 *rhs, const size_t m)
Definition: HyperLogLog.h:109
#define UNREACHABLE()
Definition: Logger.h:255
std::optional< std::pair< size_t, size_t > > getApproximateTupleCountFromCache(QueryPlanHash key, CacheItemType item_type, DeviceIdentifier device_identifier)
size_t hll_size(const T *M, const size_t bitmap_sz_bits)
Definition: HyperLogLog.h:90
#define CHECK_GT(x, y)
Definition: Logger.h:223
future< Result > async(Fn &&fn, Args &&...args)
DEVICE auto accumulate(ARGS &&...args)
Definition: gpu_enabled.h:42
void approximate_distinct_tuples_overlaps(uint8_t *hll_buffer_all_cpus, std::vector< int32_t > &row_counts, const uint32_t b, const size_t padded_size_bytes, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_buckets_per_key, const int thread_count)
#define CHECK(condition)
Definition: Logger.h:211
std::vector< InnerOuter > inner_outer_pairs_
T * transfer_vector_of_flat_objects_to_gpu(const std::vector< T > &vec, DeviceAllocator &allocator)
Data_Namespace::MemoryLevel getEffectiveMemoryLevel(const std::vector< InnerOuter > &inner_outer_pairs) const
static constexpr DeviceIdentifier CPU_DEVICE_IDENTIFIER
Definition: DataRecycler.h:132
int cpu_threads()
Definition: thread_count.h:24
void approximate_distinct_tuples_on_device_overlaps(uint8_t *hll_buffer, const uint32_t b, int32_t *row_counts_buffer, const OverlapsKeyHandler *key_handler, const int64_t num_elems)
#define VLOG(n)
Definition: Logger.h:305

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t OverlapsJoinHashTable::calculateHashTableSize ( size_t  number_of_dimensions,
size_t  emitted_keys_count,
size_t  entry_count 
) const
protected

Definition at line 870 of file OverlapsJoinHashTable.cpp.

References getKeyComponentWidth().

Referenced by RangeJoinHashTable::reifyWithLayout(), and reifyWithLayout().

872  {
873  const auto key_component_width = getKeyComponentWidth();
874  const auto key_component_count = number_of_dimensions;
875  const auto entry_size = key_component_count * key_component_width;
876  const auto keys_for_all_rows = emitted_keys_count;
877  const size_t one_to_many_hash_entries = 2 * entry_count + keys_for_all_rows;
878  const size_t hash_table_size =
879  entry_size * entry_count + one_to_many_hash_entries * sizeof(int32_t);
880  return hash_table_size;
881 }

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * OverlapsJoinHashTable::codegenKey ( const CompilationOptions co)
protected

Definition at line 1428 of file OverlapsJoinHashTable.cpp.

References AUTOMATIC_IR_METADATA, CodeGenerator::castArrayPointer(), CHECK, CHECK_EQ, CodeGenerator::codegen(), executor_, getKeyComponentCount(), getKeyComponentWidth(), inner_outer_pairs_, inverse_bucket_sizes_for_dimension_, kPOINT, kTINYINT, LL_BUILDER, LL_CONTEXT, LL_INT, CodeGenerator::posArg(), and UNREACHABLE.

Referenced by codegenMatchingSet().

1428  {
1429  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1430  const auto key_component_width = getKeyComponentWidth();
1431  CHECK(key_component_width == 4 || key_component_width == 8);
1432  const auto key_size_lv = LL_INT(getKeyComponentCount() * key_component_width);
1433  llvm::Value* key_buff_lv{nullptr};
1434  switch (key_component_width) {
1435  case 4:
1436  key_buff_lv =
1437  LL_BUILDER.CreateAlloca(llvm::Type::getInt32Ty(LL_CONTEXT), key_size_lv);
1438  break;
1439  case 8:
1440  key_buff_lv =
1441  LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT), key_size_lv);
1442  break;
1443  default:
1444  CHECK(false);
1445  }
1446 
1447  const auto& inner_outer_pair = inner_outer_pairs_[0];
1448  const auto outer_geo = inner_outer_pair.second;
1449  const auto outer_geo_ti = outer_geo->get_type_info();
1450 
1451  llvm::Value* arr_ptr = nullptr;
1452  CodeGenerator code_generator(executor_);
1453  CHECK_EQ(inverse_bucket_sizes_for_dimension_.size(), static_cast<size_t>(2));
1454 
1455  if (outer_geo_ti.is_geometry()) {
1456  // TODO(adb): for points we will use the coords array, but for other geometries we
1457  // will need to use the bounding box. For now only support points.
1458  CHECK_EQ(outer_geo_ti.get_type(), kPOINT);
1459 
1460  if (const auto outer_geo_col = dynamic_cast<const Analyzer::ColumnVar*>(outer_geo)) {
1461  const auto outer_geo_col_lvs = code_generator.codegen(outer_geo_col, true, co);
1462  CHECK_EQ(outer_geo_col_lvs.size(), size_t(1));
1463  const auto coords_cd = executor_->getCatalog()->getMetadataForColumn(
1464  outer_geo_col->get_table_id(), outer_geo_col->get_column_id() + 1);
1465  CHECK(coords_cd);
1466 
1467  const auto array_ptr = executor_->cgen_state_->emitExternalCall(
1468  "array_buff",
1469  llvm::Type::getInt8PtrTy(executor_->cgen_state_->context_),
1470  {outer_geo_col_lvs.front(), code_generator.posArg(outer_geo_col)});
1471  CHECK(coords_cd->columnType.get_elem_type().get_type() == kTINYINT)
1472  << "Only TINYINT coordinates columns are supported in geo overlaps hash join.";
1473  arr_ptr = code_generator.castArrayPointer(array_ptr,
1474  coords_cd->columnType.get_elem_type());
1475  } else if (const auto outer_geo_function_operator =
1476  dynamic_cast<const Analyzer::GeoOperator*>(outer_geo)) {
1477  // Process points dynamically constructed by geo function operators
1478  const auto outer_geo_function_operator_lvs =
1479  code_generator.codegen(outer_geo_function_operator, true, co);
1480  CHECK_EQ(outer_geo_function_operator_lvs.size(), size_t(2));
1481  arr_ptr = outer_geo_function_operator_lvs.front();
1482  } else if (const auto outer_geo_expr =
1483  dynamic_cast<const Analyzer::GeoExpr*>(outer_geo)) {
1484  UNREACHABLE() << outer_geo_expr->toString();
1485  }
1486  } else if (outer_geo_ti.is_fixlen_array()) {
1487  // Process dynamically constructed points
1488  const auto outer_geo_cast_coord_array =
1489  dynamic_cast<const Analyzer::UOper*>(outer_geo);
1490  CHECK_EQ(outer_geo_cast_coord_array->get_optype(), kCAST);
1491  const auto outer_geo_coord_array = dynamic_cast<const Analyzer::ArrayExpr*>(
1492  outer_geo_cast_coord_array->get_operand());
1493  CHECK(outer_geo_coord_array);
1494  CHECK(outer_geo_coord_array->isLocalAlloc());
1495  CHECK_EQ(outer_geo_coord_array->getElementCount(), 2);
1496  auto elem_size = (outer_geo_ti.get_compression() == kENCODING_GEOINT)
1497  ? sizeof(int32_t)
1498  : sizeof(double);
1499  CHECK_EQ(outer_geo_ti.get_size(), int(2 * elem_size));
1500  const auto outer_geo_constructed_lvs = code_generator.codegen(outer_geo, true, co);
1501  // CHECK_EQ(outer_geo_constructed_lvs.size(), size_t(2)); // Pointer and size
1502  const auto array_ptr = outer_geo_constructed_lvs.front(); // Just need the pointer
1503  arr_ptr = LL_BUILDER.CreateGEP(array_ptr, LL_INT(0));
1504  arr_ptr = code_generator.castArrayPointer(array_ptr, SQLTypeInfo(kTINYINT, true));
1505  }
1506  if (!arr_ptr) {
1507  LOG(FATAL) << "Overlaps key currently only supported for geospatial columns and "
1508  "constructed points.";
1509  }
1510 
1511  for (size_t i = 0; i < 2; i++) {
1512  const auto key_comp_dest_lv = LL_BUILDER.CreateGEP(key_buff_lv, LL_INT(i));
1513 
1514  // Note that get_bucket_key_for_range_compressed will need to be specialized for
1515  // future compression schemes
1516  auto bucket_key =
1517  outer_geo_ti.get_compression() == kENCODING_GEOINT
1518  ? executor_->cgen_state_->emitExternalCall(
1519  "get_bucket_key_for_range_compressed",
1520  get_int_type(64, LL_CONTEXT),
1522  : executor_->cgen_state_->emitExternalCall(
1523  "get_bucket_key_for_range_double",
1524  get_int_type(64, LL_CONTEXT),
1526  const auto col_lv = LL_BUILDER.CreateSExt(
1527  bucket_key, get_int_type(key_component_width * 8, LL_CONTEXT));
1528  LL_BUILDER.CreateStore(col_lv, key_comp_dest_lv);
1529  }
1530  return key_buff_lv;
1531 }
#define CHECK_EQ(x, y)
Definition: Logger.h:219
#define LOG(tag)
Definition: Logger.h:205
#define LL_FP(v)
#define UNREACHABLE()
Definition: Logger.h:255
Definition: sqldefs.h:49
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define LL_BUILDER
#define AUTOMATIC_IR_METADATA(CGENSTATE)
std::vector< double > inverse_bucket_sizes_for_dimension_
#define LL_INT(v)
#define CHECK(condition)
Definition: Logger.h:211
std::vector< InnerOuter > inner_outer_pairs_
#define LL_CONTEXT

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector< llvm::Value * > OverlapsJoinHashTable::codegenManyKey ( const CompilationOptions co)
protected

Definition at line 1533 of file OverlapsJoinHashTable.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CHECK_EQ, CodeGenerator::codegen(), executor_, HashJoin::getHashTableForDevice(), getHashType(), getKeyComponentWidth(), inner_outer_pairs_, ManyToMany, CodeGenerator::posArg(), and VLOG.

Referenced by codegenMatchingSet().

1534  {
1535  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1536  const auto key_component_width = getKeyComponentWidth();
1537  CHECK(key_component_width == 4 || key_component_width == 8);
1538  auto hash_table = getHashTableForDevice(size_t(0));
1539  CHECK(hash_table);
1541 
1542  VLOG(1) << "Performing codgen for ManyToMany";
1543  const auto& inner_outer_pair = inner_outer_pairs_[0];
1544  const auto outer_col = inner_outer_pair.second;
1545 
1546  CodeGenerator code_generator(executor_);
1547  const auto col_lvs = code_generator.codegen(outer_col, true, co);
1548  CHECK_EQ(col_lvs.size(), size_t(1));
1549 
1550  const auto outer_col_var = dynamic_cast<const Analyzer::ColumnVar*>(outer_col);
1551  CHECK(outer_col_var);
1552  const auto coords_cd = executor_->getCatalog()->getMetadataForColumn(
1553  outer_col_var->get_table_id(), outer_col_var->get_column_id());
1554  CHECK(coords_cd);
1555 
1556  const auto array_ptr = executor_->cgen_state_->emitExternalCall(
1557  "array_buff",
1558  llvm::Type::getInt8PtrTy(executor_->cgen_state_->context_),
1559  {col_lvs.front(), code_generator.posArg(outer_col)});
1560 
1561  // TODO(jclay): this seems to cast to double, and causes the GPU build to fail.
1562  // const auto arr_ptr =
1563  // code_generator.castArrayPointer(array_ptr,
1564  // coords_cd->columnType.get_elem_type());
1565  array_ptr->setName("array_ptr");
1566 
1567  auto num_keys_lv = executor_->cgen_state_->emitExternalCall(
1568  "get_num_buckets_for_bounds",
1569  get_int_type(32, LL_CONTEXT),
1570  {array_ptr,
1571  LL_INT(0),
1574  num_keys_lv->setName("num_keys_lv");
1575 
1576  return {num_keys_lv, array_ptr};
1577 }
#define CHECK_EQ(x, y)
Definition: Logger.h:219
#define LL_FP(v)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
HashType getHashType() const noexceptoverride
#define AUTOMATIC_IR_METADATA(CGENSTATE)
HashTable * getHashTableForDevice(const size_t device_id) const
Definition: HashJoin.h:241
std::vector< double > inverse_bucket_sizes_for_dimension_
#define LL_INT(v)
#define CHECK(condition)
Definition: Logger.h:211
std::vector< InnerOuter > inner_outer_pairs_
#define LL_CONTEXT
#define VLOG(n)
Definition: Logger.h:305

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

HashJoinMatchingSet OverlapsJoinHashTable::codegenMatchingSet ( const CompilationOptions co,
const size_t  index 
)
overrideprotectedvirtual

Implements HashJoin.

Definition at line 1579 of file OverlapsJoinHashTable.cpp.

References AUTOMATIC_IR_METADATA, CHECK, HashJoin::codegenHashTableLoad(), codegenKey(), codegenManyKey(), HashJoin::codegenMatchingSet(), executor_, get_int_array_type(), get_int_type(), getComponentBufferSize(), getEntryCount(), getHashType(), getKeyComponentCount(), getKeyComponentWidth(), inverse_bucket_sizes_for_dimension_, LL_BUILDER, LL_CONTEXT, LL_FP, LL_INT, ManyToMany, offsetBufferOff(), OneToMany, to_string(), UNREACHABLE, and VLOG.

1581  {
1582  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1583  if (getHashType() == HashType::ManyToMany) {
1584  VLOG(1) << "Building codegenMatchingSet for ManyToMany";
1585  const auto key_component_width = getKeyComponentWidth();
1586  CHECK(key_component_width == 4 || key_component_width == 8);
1587  auto many_to_many_args = codegenManyKey(co);
1588  auto hash_ptr = HashJoin::codegenHashTableLoad(index, executor_);
1589  const auto composite_dict_ptr_type =
1590  llvm::Type::getIntNPtrTy(LL_CONTEXT, key_component_width * 8);
1591  const auto composite_key_dict =
1592  hash_ptr->getType()->isPointerTy()
1593  ? LL_BUILDER.CreatePointerCast(hash_ptr, composite_dict_ptr_type)
1594  : LL_BUILDER.CreateIntToPtr(hash_ptr, composite_dict_ptr_type);
1595  const auto key_component_count = getKeyComponentCount();
1596 
1597  auto one_to_many_ptr = hash_ptr;
1598 
1599  if (one_to_many_ptr->getType()->isPointerTy()) {
1600  one_to_many_ptr =
1601  LL_BUILDER.CreatePtrToInt(hash_ptr, llvm::Type::getInt64Ty(LL_CONTEXT));
1602  } else {
1603  CHECK(one_to_many_ptr->getType()->isIntegerTy(64));
1604  }
1605 
1606  const auto composite_key_dict_size = offsetBufferOff();
1607  one_to_many_ptr =
1608  LL_BUILDER.CreateAdd(one_to_many_ptr, LL_INT(composite_key_dict_size));
1609 
1610  // NOTE(jclay): A fixed array of size 200 is allocated on the stack.
1611  // this is likely the maximum value we can do that is safe to use across
1612  // all supported GPU architectures.
1613  const int max_array_size = 200;
1614  const auto arr_type = get_int_array_type(32, max_array_size, LL_CONTEXT);
1615  const auto out_arr_lv = LL_BUILDER.CreateAlloca(arr_type);
1616  out_arr_lv->setName("out_arr");
1617 
1618  const auto casted_out_arr_lv =
1619  LL_BUILDER.CreatePointerCast(out_arr_lv, arr_type->getPointerTo());
1620 
1621  const auto element_ptr = LL_BUILDER.CreateGEP(arr_type, casted_out_arr_lv, LL_INT(0));
1622 
1623  auto rowid_ptr_i32 =
1624  LL_BUILDER.CreatePointerCast(element_ptr, llvm::Type::getInt32PtrTy(LL_CONTEXT));
1625 
1626  const auto candidate_count_lv = executor_->cgen_state_->emitExternalCall(
1627  "get_candidate_rows",
1628  llvm::Type::getInt64Ty(LL_CONTEXT),
1629  {
1630  rowid_ptr_i32,
1631  LL_INT(max_array_size),
1632  many_to_many_args[1],
1633  LL_INT(0),
1636  many_to_many_args[0],
1637  LL_INT(key_component_count), // key_component_count
1638  composite_key_dict, // ptr to hash table
1639  LL_INT(getEntryCount()), // entry_count
1640  LL_INT(composite_key_dict_size), // offset_buffer_ptr_offset
1641  LL_INT(getEntryCount() * sizeof(int32_t)) // sub_buff_size
1642  });
1643 
1644  const auto slot_lv = LL_INT(int64_t(0));
1645 
1646  return {rowid_ptr_i32, candidate_count_lv, slot_lv};
1647  } else {
1648  VLOG(1) << "Building codegenMatchingSet for Baseline";
1649  // TODO: duplicated w/ BaselineJoinHashTable -- push into the hash table builder?
1650  const auto key_component_width = getKeyComponentWidth();
1651  CHECK(key_component_width == 4 || key_component_width == 8);
1652  auto key_buff_lv = codegenKey(co);
1654  auto hash_ptr = HashJoin::codegenHashTableLoad(index, executor_);
1655  const auto composite_dict_ptr_type =
1656  llvm::Type::getIntNPtrTy(LL_CONTEXT, key_component_width * 8);
1657  const auto composite_key_dict =
1658  hash_ptr->getType()->isPointerTy()
1659  ? LL_BUILDER.CreatePointerCast(hash_ptr, composite_dict_ptr_type)
1660  : LL_BUILDER.CreateIntToPtr(hash_ptr, composite_dict_ptr_type);
1661  const auto key_component_count = getKeyComponentCount();
1662  const auto key = executor_->cgen_state_->emitExternalCall(
1663  "get_composite_key_index_" + std::to_string(key_component_width * 8),
1664  get_int_type(64, LL_CONTEXT),
1665  {key_buff_lv,
1666  LL_INT(key_component_count),
1667  composite_key_dict,
1668  LL_INT(getEntryCount())});
1669  auto one_to_many_ptr = hash_ptr;
1670  if (one_to_many_ptr->getType()->isPointerTy()) {
1671  one_to_many_ptr =
1672  LL_BUILDER.CreatePtrToInt(hash_ptr, llvm::Type::getInt64Ty(LL_CONTEXT));
1673  } else {
1674  CHECK(one_to_many_ptr->getType()->isIntegerTy(64));
1675  }
1676  const auto composite_key_dict_size = offsetBufferOff();
1677  one_to_many_ptr =
1678  LL_BUILDER.CreateAdd(one_to_many_ptr, LL_INT(composite_key_dict_size));
1680  std::vector<llvm::Value*>{
1681  one_to_many_ptr, key, LL_INT(int64_t(0)), LL_INT(getEntryCount() - 1)},
1682  false,
1683  false,
1684  false,
1686  executor_);
1687  }
1688  UNREACHABLE();
1689  return HashJoinMatchingSet{};
1690 }
llvm::Value * codegenKey(const CompilationOptions &)
virtual HashJoinMatchingSet codegenMatchingSet(const CompilationOptions &, const size_t)=0
static llvm::Value * codegenHashTableLoad(const size_t table_idx, Executor *executor)
Definition: HashJoin.cpp:215
#define LL_FP(v)
#define UNREACHABLE()
Definition: Logger.h:255
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
HashType getHashType() const noexceptoverride
std::string to_string(char const *&&v)
#define LL_BUILDER
std::vector< llvm::Value * > codegenManyKey(const CompilationOptions &)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
std::vector< double > inverse_bucket_sizes_for_dimension_
#define LL_INT(v)
size_t offsetBufferOff() const noexceptoverride
#define CHECK(condition)
Definition: Logger.h:211
size_t getComponentBufferSize() const noexceptoverride
#define LL_CONTEXT
llvm::ArrayType * get_int_array_type(int const width, int count, llvm::LLVMContext &context)
#define VLOG(n)
Definition: Logger.h:305

+ Here is the call graph for this function:

llvm::Value* OverlapsJoinHashTable::codegenSlot ( const CompilationOptions ,
const size_t   
)
inlineoverrideprotectedvirtual

Implements HashJoin.

Definition at line 198 of file OverlapsJoinHashTable.h.

References UNREACHABLE.

198  {
199  UNREACHABLE(); // not applicable for overlaps join
200  return nullptr;
201  }
#define UNREACHABLE()
Definition: Logger.h:255
std::pair< size_t, size_t > OverlapsJoinHashTable::computeHashTableCounts ( const size_t  shard_count,
const std::vector< double > &  inverse_bucket_sizes_for_dimension,
std::vector< ColumnsForDevice > &  columns_per_device,
const size_t  chosen_max_hashtable_size,
const double  chosen_bucket_threshold 
)
protectedvirtual

Definition at line 923 of file OverlapsJoinHashTable.cpp.

References approximateTupleCount(), CHECK, device_count_, get_entries_per_device(), and memory_level_.

Referenced by reifyWithLayout().

928  {
929  CHECK(!inverse_bucket_sizes_for_dimension.empty());
930  const auto [tuple_count, emitted_keys_count] =
931  approximateTupleCount(inverse_bucket_sizes_for_dimension,
932  columns_per_device,
933  chosen_max_hashtable_size,
934  chosen_bucket_threshold);
935  const auto entry_count = 2 * std::max(tuple_count, size_t(1));
936 
937  return std::make_pair(
938  get_entries_per_device(entry_count, shard_count, device_count_, memory_level_),
939  emitted_keys_count);
940 }
virtual std::pair< size_t, size_t > approximateTupleCount(const std::vector< double > &inverse_bucket_sizes_for_dimension, std::vector< ColumnsForDevice > &, const size_t chosen_max_hashtable_size, const double chosen_bucket_threshold)
size_t get_entries_per_device(const size_t total_entries, const size_t shard_count, const size_t device_count, const Data_Namespace::MemoryLevel memory_level)
#define CHECK(condition)
Definition: Logger.h:211
const Data_Namespace::MemoryLevel memory_level_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t OverlapsJoinHashTable::countBufferOff ( ) const
inlineoverrideprotectedvirtualnoexcept

Implements HashJoin.

Definition at line 260 of file OverlapsJoinHashTable.h.

References getComponentBufferSize(), getHashType(), getKeyBufferSize(), HashJoin::layoutRequiresAdditionalBuffers(), and offsetBufferOff().

Referenced by payloadBufferOff(), toSet(), and toString().

260  {
263  } else {
264  return getKeyBufferSize();
265  }
266  }
HashType getHashType() const noexceptoverride
size_t offsetBufferOff() const noexceptoverride
size_t getComponentBufferSize() const noexceptoverride
size_t getKeyBufferSize() const noexcept
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
Definition: HashJoin.h:130

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ColumnsForDevice OverlapsJoinHashTable::fetchColumnsForDevice ( const std::vector< Fragmenter_Namespace::FragmentInfo > &  fragments,
const int  device_id,
DeviceAllocator dev_buff_owner 
)
protected

Definition at line 883 of file OverlapsJoinHashTable.cpp.

References CHECK, column_cache_, executor_, HashJoin::fetchJoinColumn(), get_column_descriptor_maybe(), get_join_column_type_kind(), getEffectiveMemoryLevel(), and inner_outer_pairs_.

Referenced by RangeJoinHashTable::reifyWithLayout(), and reifyWithLayout().

886  {
887  const auto& catalog = *executor_->getCatalog();
888  const auto effective_memory_level = getEffectiveMemoryLevel(inner_outer_pairs_);
889 
890  std::vector<JoinColumn> join_columns;
891  std::vector<std::shared_ptr<Chunk_NS::Chunk>> chunks_owner;
892  std::vector<JoinColumnTypeInfo> join_column_types;
893  std::vector<std::shared_ptr<void>> malloc_owner;
894  for (const auto& inner_outer_pair : inner_outer_pairs_) {
895  const auto inner_col = inner_outer_pair.first;
896  const auto inner_cd = get_column_descriptor_maybe(
897  inner_col->get_column_id(), inner_col->get_table_id(), catalog);
898  if (inner_cd && inner_cd->isVirtualCol) {
900  }
901  join_columns.emplace_back(fetchJoinColumn(inner_col,
902  fragments,
903  effective_memory_level,
904  device_id,
905  chunks_owner,
906  dev_buff_owner,
907  malloc_owner,
908  executor_,
909  &column_cache_));
910  const auto& ti = inner_col->get_type_info();
911  join_column_types.emplace_back(JoinColumnTypeInfo{static_cast<size_t>(ti.get_size()),
912  0,
913  0,
914  inline_int_null_value<int64_t>(),
915  false,
916  0,
918  CHECK(ti.is_array()) << "Overlaps join currently only supported for arrays.";
919  }
920  return {join_columns, join_column_types, chunks_owner, {}, malloc_owner};
921 }
JoinColumn fetchJoinColumn(const Analyzer::ColumnVar *hash_col, const std::vector< Fragmenter_Namespace::FragmentInfo > &fragment_info, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks_owner, DeviceAllocator *dev_buff_owner, std::vector< std::shared_ptr< void >> &malloc_owner, Executor *executor, ColumnCacheMap *column_cache)
Definition: HashJoin.cpp:54
const ColumnDescriptor * get_column_descriptor_maybe(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
Definition: Execute.h:220
ColumnCacheMap & column_cache_
ColumnType get_join_column_type_kind(const SQLTypeInfo &ti)
#define CHECK(condition)
Definition: Logger.h:211
std::vector< InnerOuter > inner_outer_pairs_
Data_Namespace::MemoryLevel getEffectiveMemoryLevel(const std::vector< InnerOuter > &inner_outer_pairs) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void OverlapsJoinHashTable::generateCacheKey ( const size_t  max_hashtable_size,
const double  bucket_threshold 
)
inlineprotected

Definition at line 332 of file OverlapsJoinHashTable.h.

References hashtable_cache_key_, and query_plan_dag_.

Referenced by RangeJoinHashTable::initHashTableOnCpu(), and reifyWithLayout().

332  {
333  std::ostringstream oss;
334  oss << query_plan_dag_;
335  oss << max_hashtable_size << "|";
336  oss << bucket_threshold;
337  hashtable_cache_key_ = boost::hash_value(oss.str());
338  }

+ Here is the caller graph for this function:

QueryPlanHash OverlapsJoinHashTable::getAlternativeCacheKey ( AlternativeCacheKeyForOverlapsHashJoin info)
inlineprotected

Definition at line 313 of file OverlapsJoinHashTable.h.

References OverlapsJoinHashTable::AlternativeCacheKeyForOverlapsHashJoin::bucket_threshold, OverlapsJoinHashTable::AlternativeCacheKeyForOverlapsHashJoin::chunk_key, OverlapsJoinHashTable::AlternativeCacheKeyForOverlapsHashJoin::inner_outer_pairs, OverlapsJoinHashTable::AlternativeCacheKeyForOverlapsHashJoin::inverse_bucket_sizes, OverlapsJoinHashTable::AlternativeCacheKeyForOverlapsHashJoin::max_hashtable_size, OverlapsJoinHashTable::AlternativeCacheKeyForOverlapsHashJoin::num_elements, OverlapsJoinHashTable::AlternativeCacheKeyForOverlapsHashJoin::optype, and toString().

Referenced by RangeJoinHashTable::initHashTableOnCpu(), reifyImpl(), and reifyWithLayout().

313  {
314  auto hash = boost::hash_value(::toString(info.chunk_key));
315  for (InnerOuter inner_outer : info.inner_outer_pairs) {
316  auto inner_col = inner_outer.first;
317  auto rhs_col_var = dynamic_cast<const Analyzer::ColumnVar*>(inner_outer.second);
318  auto outer_col = rhs_col_var ? rhs_col_var : inner_col;
319  boost::hash_combine(hash, inner_col->toString());
320  if (inner_col->get_type_info().is_string()) {
321  boost::hash_combine(hash, outer_col->toString());
322  }
323  }
324  boost::hash_combine(hash, info.num_elements);
325  boost::hash_combine(hash, ::toString(info.optype));
326  boost::hash_combine(hash, info.max_hashtable_size);
327  boost::hash_combine(hash, info.bucket_threshold);
328  boost::hash_combine(hash, ::toString(info.inverse_bucket_sizes));
329  return hash;
330  }
std::string toString(const ExecutorDeviceType device_type, const int device_id=0, bool raw=false) const override
std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > InnerOuter
Definition: HashJoin.h:77

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::optional< std::pair< size_t, size_t > > OverlapsJoinHashTable::getApproximateTupleCountFromCache ( QueryPlanHash  key,
CacheItemType  item_type,
DeviceIdentifier  device_identifier 
)
protected

Definition at line 1807 of file OverlapsJoinHashTable.cpp.

References CHECK, getOverlapsHashTableMetaInfo(), hash_table_cache_, and HashtableCacheMetaInfo::overlaps_meta_info.

Referenced by RangeJoinHashTable::approximateTupleCount(), and approximateTupleCount().

1810  {
1812  HashtableCacheMetaInfo metaInfo;
1814  auto cached_hashtable =
1815  hash_table_cache_->getItemFromCache(key, item_type, device_identifier, metaInfo);
1816  if (cached_hashtable) {
1817  return std::make_pair(cached_hashtable->getEntryCount() / 2,
1818  cached_hashtable->getEmittedKeysCount());
1819  }
1820  return std::nullopt;
1821 }
std::optional< OverlapsHashTableMetaInfo > overlaps_meta_info
#define CHECK(condition)
Definition: Logger.h:211
static std::unique_ptr< HashtableRecycler > hash_table_cache_
std::optional< OverlapsHashTableMetaInfo > getOverlapsHashTableMetaInfo()

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

static auto OverlapsJoinHashTable::getCacheInvalidator ( ) -> std::function<void()>
inlinestatic

Definition at line 69 of file OverlapsJoinHashTable.h.

References auto_tuner_cache_, CHECK, and hash_table_cache_.

69  {
70  return []() -> void {
72  auto auto_tuner_cache_invalidator = auto_tuner_cache_->getCacheInvalidator();
73  auto_tuner_cache_invalidator();
74 
76  auto main_cache_invalidator = hash_table_cache_->getCacheInvalidator();
77  main_cache_invalidator();
78  };
79  }
static std::unique_ptr< OverlapsTuningParamRecycler > auto_tuner_cache_
#define CHECK(condition)
Definition: Logger.h:211
static std::unique_ptr< HashtableRecycler > hash_table_cache_
QueryPlanHash OverlapsJoinHashTable::getCacheKey ( ) const
inlineprotected

Definition at line 340 of file OverlapsJoinHashTable.h.

References hashtable_cache_key_.

Referenced by reifyWithLayout().

340 { return hashtable_cache_key_; }

+ Here is the caller graph for this function:

size_t OverlapsJoinHashTable::getComponentBufferSize ( ) const
inlineoverrideprotectedvirtualnoexcept

Implements HashJoin.

Definition at line 221 of file OverlapsJoinHashTable.h.

References CHECK, and HashJoin::hash_tables_for_device_.

Referenced by codegenMatchingSet(), RangeJoinHashTable::codegenMatchingSetWithOffset(), countBufferOff(), and payloadBufferOff().

221  {
222  CHECK(!hash_tables_for_device_.empty());
223  auto hash_table = hash_tables_for_device_.front();
224  CHECK(hash_table);
225  return hash_table->getEntryCount() * sizeof(int32_t);
226  }
std::vector< std::shared_ptr< HashTable > > hash_tables_for_device_
Definition: HashJoin.h:296
#define CHECK(condition)
Definition: Logger.h:211

+ Here is the caller graph for this function:

int OverlapsJoinHashTable::getDeviceCount ( ) const
inlineoverrideprotectedvirtualnoexcept

Implements HashJoin.

Definition at line 159 of file OverlapsJoinHashTable.h.

References device_count_.

159 { return device_count_; };
Data_Namespace::MemoryLevel OverlapsJoinHashTable::getEffectiveMemoryLevel ( const std::vector< InnerOuter > &  inner_outer_pairs) const
protected

Definition at line 1768 of file OverlapsJoinHashTable.cpp.

References Data_Namespace::CPU_LEVEL, executor_, Data_Namespace::GPU_LEVEL, RegisteredQueryHint::isHintRegistered(), kOverlapsAllowGpuBuild, memory_level_, RegisteredQueryHint::overlaps_allow_gpu_build, and query_hint_.

Referenced by RangeJoinHashTable::approximateTupleCount(), approximateTupleCount(), fetchColumnsForDevice(), RangeJoinHashTable::reifyForDevice(), reifyForDevice(), and reifyWithLayout().

1769  {
1772  this->executor_->getDataMgr()->gpusPresent() &&
1775  }
1776  // otherwise, try to build on CPU
1778 }
bool overlaps_allow_gpu_build
Definition: QueryHint.h:216
RegisteredQueryHint query_hint_
bool isHintRegistered(const QueryHint hint) const
Definition: QueryHint.h:241
const Data_Namespace::MemoryLevel memory_level_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t OverlapsJoinHashTable::getEmittedKeysCount ( ) const
inlineprotected

Definition at line 215 of file OverlapsJoinHashTable.h.

References CHECK, and HashJoin::getHashTableForDevice().

215  {
216  auto hash_table = getHashTableForDevice(0);
217  CHECK(hash_table);
218  return hash_table->getEmittedKeysCount();
219  }
HashTable * getHashTableForDevice(const size_t device_id) const
Definition: HashJoin.h:241
#define CHECK(condition)
Definition: Logger.h:211

+ Here is the call graph for this function:

size_t OverlapsJoinHashTable::getEntryCount ( ) const
inlineprotected

Definition at line 209 of file OverlapsJoinHashTable.h.

References CHECK, and HashJoin::getHashTableForDevice().

Referenced by codegenMatchingSet(), RangeJoinHashTable::codegenMatchingSetWithOffset(), and getKeyBufferSize().

209  {
210  auto hash_table = getHashTableForDevice(0);
211  CHECK(hash_table);
212  return hash_table->getEntryCount();
213  }
HashTable * getHashTableForDevice(const size_t device_id) const
Definition: HashJoin.h:241
#define CHECK(condition)
Definition: Logger.h:211

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::string OverlapsJoinHashTable::getHashJoinType ( ) const
inlinefinalprotectedvirtual

Implements HashJoin.

Definition at line 276 of file OverlapsJoinHashTable.h.

276 { return "Overlaps"; }
static HashtableRecycler* OverlapsJoinHashTable::getHashTableCache ( )
inlinestatic

Definition at line 81 of file OverlapsJoinHashTable.h.

References CHECK, and hash_table_cache_.

Referenced by QueryRunner::QueryRunner::getCachedHashtableWithoutCacheKey(), QueryRunner::QueryRunner::getCacheItemMetric(), and QueryRunner::QueryRunner::getNumberOfCachedOverlapsHashTables().

81  {
83  return hash_table_cache_.get();
84  }
#define CHECK(condition)
Definition: Logger.h:211
static std::unique_ptr< HashtableRecycler > hash_table_cache_

+ Here is the caller graph for this function:

HashType OverlapsJoinHashTable::getHashType ( ) const
inlineoverrideprotectedvirtualnoexcept

Implements HashJoin.

Reimplemented in RangeJoinHashTable.

Definition at line 146 of file OverlapsJoinHashTable.h.

References CHECK, HashJoin::getHashTableForDevice(), and layout_override_.

Referenced by codegenManyKey(), codegenMatchingSet(), countBufferOff(), getKeyBufferSize(), payloadBufferOff(), toSet(), and toString().

146  {
147  if (layout_override_) {
148  return *layout_override_;
149  }
150  auto hash_table = getHashTableForDevice(0);
151  CHECK(hash_table);
152  return hash_table->getLayout();
153  }
HashTable * getHashTableForDevice(const size_t device_id) const
Definition: HashJoin.h:241
std::optional< HashType > layout_override_
#define CHECK(condition)
Definition: Logger.h:211

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const std::vector<InnerOuter>& OverlapsJoinHashTable::getInnerOuterPairs ( ) const
inlineprotected

Definition at line 342 of file OverlapsJoinHashTable.h.

References inner_outer_pairs_.

Referenced by RangeJoinHashTable::isProbeCompressed().

342 { return inner_outer_pairs_; }
std::vector< InnerOuter > inner_outer_pairs_

+ Here is the caller graph for this function:

int OverlapsJoinHashTable::getInnerTableId ( ) const
overrideprotectedvirtualnoexcept

Implements HashJoin.

Definition at line 1780 of file OverlapsJoinHashTable.cpp.

References CHECK, HashJoin::getInnerTableId(), and inner_outer_pairs_.

Referenced by RangeJoinHashTable::reifyWithLayout().

1780  {
1781  try {
1783  } catch (...) {
1784  CHECK(false);
1785  }
1786  return 0;
1787 }
virtual int getInnerTableId() const noexcept=0
#define CHECK(condition)
Definition: Logger.h:211
std::vector< InnerOuter > inner_outer_pairs_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int OverlapsJoinHashTable::getInnerTableRteIdx ( ) const
inlineoverrideprotectedvirtualnoexcept

Implements HashJoin.

Definition at line 241 of file OverlapsJoinHashTable.h.

References CHECK, and inner_outer_pairs_.

241  {
242  CHECK(!inner_outer_pairs_.empty());
243  const auto first_inner_col = inner_outer_pairs_.front().first;
244  return first_inner_col->get_rte_idx();
245  }
#define CHECK(condition)
Definition: Logger.h:211
std::vector< InnerOuter > inner_outer_pairs_
std::shared_ptr< OverlapsJoinHashTable > OverlapsJoinHashTable::getInstance ( const std::shared_ptr< Analyzer::BinOper condition,
const std::vector< InputTableInfo > &  query_infos,
const Data_Namespace::MemoryLevel  memory_level,
const JoinType  join_type,
const int  device_count,
ColumnCacheMap column_cache,
Executor executor,
const HashTableBuildDagMap hashtable_build_dag_map,
const RegisteredQueryHint query_hint,
const TableIdToNodeMap table_id_to_node_map 
)
static

Make hash table from an in-flight SQL query's parse tree etc.

Definition at line 37 of file OverlapsJoinHashTable.cpp.

References CHECK, CHECK_EQ, count, get_inner_query_info(), HashtableRecycler::getHashtableKeyString(), HashJoin::getHashTypeString(), HashJoin::getInnerTableId(), RangeJoinHashTable::getInstance(), Fragmenter_Namespace::TableInfo::getNumTuplesUpperBound(), InputTableInfo::info, RegisteredQueryHint::isAnyQueryHintDelivered(), ManyToMany, HashJoin::normalizeColumnPairs(), OneToMany, VLOG, and VLOGGING.

Referenced by HashJoin::getInstance().

47  {
48  decltype(std::chrono::steady_clock::now()) ts1, ts2;
49 
50  std::vector<InnerOuter> inner_outer_pairs;
51 
52  if (const auto range_expr =
53  dynamic_cast<const Analyzer::RangeOper*>(condition->get_right_operand())) {
54  return RangeJoinHashTable::getInstance(condition,
55  range_expr,
56  query_infos,
57  memory_level,
58  join_type,
59  device_count,
60  column_cache,
61  executor,
62  hashtable_build_dag_map,
63  query_hint,
64  table_id_to_node_map);
65  } else {
66  inner_outer_pairs = HashJoin::normalizeColumnPairs(
67  condition.get(), *executor->getCatalog(), executor->getTemporaryTables());
68  }
69  CHECK(!inner_outer_pairs.empty());
70 
71  const auto getHashTableType =
72  [](const std::shared_ptr<Analyzer::BinOper> condition,
73  const std::vector<InnerOuter>& inner_outer_pairs) -> HashType {
75  if (condition->is_overlaps_oper()) {
76  CHECK_EQ(inner_outer_pairs.size(), size_t(1));
77  if (inner_outer_pairs[0].first->get_type_info().is_array() &&
78  inner_outer_pairs[0].second->get_type_info().is_array() &&
79  // Bounds vs constructed points, former should yield ManyToMany
80  inner_outer_pairs[0].second->get_type_info().get_size() == 32) {
81  layout = HashType::ManyToMany;
82  }
83  }
84  return layout;
85  };
86 
87  const auto layout = getHashTableType(condition, inner_outer_pairs);
88 
89  if (VLOGGING(1)) {
90  VLOG(1) << "Building geo hash table " << getHashTypeString(layout)
91  << " for qual: " << condition->toString();
92  ts1 = std::chrono::steady_clock::now();
93  }
94 
95  const auto qi_0 = query_infos[0].info.getNumTuplesUpperBound();
96  const auto qi_1 = query_infos[1].info.getNumTuplesUpperBound();
97 
98  VLOG(1) << "table_id = " << query_infos[0].table_id << " has " << qi_0 << " tuples.";
99  VLOG(1) << "table_id = " << query_infos[1].table_id << " has " << qi_1 << " tuples.";
100 
101  const auto& query_info =
102  get_inner_query_info(HashJoin::getInnerTableId(inner_outer_pairs), query_infos)
103  .info;
104  const auto total_entries = 2 * query_info.getNumTuplesUpperBound();
105  if (total_entries > static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
106  throw TooManyHashEntries();
107  }
108 
109  auto hashtable_cache_key_string =
111  condition->get_optype(),
112  join_type,
113  hashtable_build_dag_map,
114  executor);
115 
116  auto join_hash_table =
117  std::make_shared<OverlapsJoinHashTable>(condition,
118  join_type,
119  query_infos,
120  memory_level,
121  column_cache,
122  executor,
123  inner_outer_pairs,
124  device_count,
125  hashtable_cache_key_string.first,
126  hashtable_cache_key_string.second,
127  table_id_to_node_map);
128  if (query_hint.isAnyQueryHintDelivered()) {
129  join_hash_table->registerQueryHint(query_hint);
130  }
131  try {
132  join_hash_table->reify(layout);
133  } catch (const HashJoinFail& e) {
134  throw HashJoinFail(std::string("Could not build a 1-to-1 correspondence for columns "
135  "involved in overlaps join | ") +
136  e.what());
137  } catch (const ColumnarConversionNotSupported& e) {
138  throw HashJoinFail(std::string("Could not build hash tables for overlaps join | "
139  "Inner table too big. Attempt manual table reordering "
140  "or create a single fragment inner table. | ") +
141  e.what());
142  } catch (const std::exception& e) {
143  throw HashJoinFail(std::string("Failed to build hash tables for overlaps join | ") +
144  e.what());
145  }
146  if (VLOGGING(1)) {
147  ts2 = std::chrono::steady_clock::now();
148  VLOG(1) << "Built geo hash table " << getHashTypeString(layout) << " in "
149  << std::chrono::duration_cast<std::chrono::milliseconds>(ts2 - ts1).count()
150  << " ms";
151  }
152  return join_hash_table;
153 }
#define CHECK_EQ(x, y)
Definition: Logger.h:219
Fragmenter_Namespace::TableInfo info
Definition: InputMetadata.h:35
std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > InnerOuter
Definition: HashJoin.h:77
#define const
static std::pair< QueryPlan, HashtableCacheMetaInfo > getHashtableKeyString(const std::vector< InnerOuter > &inner_outer_pairs, const SQLOps op_type, const JoinType join_type, const HashTableBuildDagMap &hashtable_build_dag_map, Executor *executor)
int count
static std::shared_ptr< RangeJoinHashTable > getInstance(const std::shared_ptr< Analyzer::BinOper > condition, const Analyzer::RangeOper *range_expr, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)
virtual int getInnerTableId() const noexcept=0
#define VLOGGING(n)
Definition: Logger.h:209
const InputTableInfo & get_inner_query_info(const int inner_table_id, const std::vector< InputTableInfo > &query_infos)
static std::string getHashTypeString(HashType ht) noexcept
Definition: HashJoin.h:134
static std::vector< InnerOuter > normalizeColumnPairs(const Analyzer::BinOper *condition, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables)
Definition: HashJoin.cpp:744
#define CHECK(condition)
Definition: Logger.h:211
HashType
Definition: HashTable.h:19
if(yyssp >=yyss+yystacksize-1)
bool isAnyQueryHintDelivered() const
Definition: QueryHint.h:231
#define VLOG(n)
Definition: Logger.h:305

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t OverlapsJoinHashTable::getKeyBufferSize ( ) const
inlineprotectednoexcept

Definition at line 247 of file OverlapsJoinHashTable.h.

References CHECK, getEntryCount(), getHashType(), getKeyComponentCount(), getKeyComponentWidth(), and HashJoin::layoutRequiresAdditionalBuffers().

Referenced by countBufferOff(), offsetBufferOff(), and payloadBufferOff().

247  {
248  const auto key_component_width = getKeyComponentWidth();
249  CHECK(key_component_width == 4 || key_component_width == 8);
250  const auto key_component_count = getKeyComponentCount();
252  return getEntryCount() * key_component_count * key_component_width;
253  } else {
254  return getEntryCount() * (key_component_count + 1) * key_component_width;
255  }
256  }
HashType getHashType() const noexceptoverride
#define CHECK(condition)
Definition: Logger.h:211
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
Definition: HashJoin.h:130

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t OverlapsJoinHashTable::getKeyComponentCount ( ) const
protected

Definition at line 1124 of file OverlapsJoinHashTable.cpp.

References CHECK, and inverse_bucket_sizes_for_dimension_.

Referenced by RangeJoinHashTable::codegenKey(), codegenKey(), codegenMatchingSet(), RangeJoinHashTable::codegenMatchingSetWithOffset(), getKeyBufferSize(), RangeJoinHashTable::initHashTableOnCpu(), initHashTableOnCpu(), toSet(), and toString().

1124  {
1127 }
std::vector< double > inverse_bucket_sizes_for_dimension_
#define CHECK(condition)
Definition: Logger.h:211

+ Here is the caller graph for this function:

size_t OverlapsJoinHashTable::getKeyComponentWidth ( ) const
protected

Definition at line 1120 of file OverlapsJoinHashTable.cpp.

Referenced by calculateHashTableSize(), RangeJoinHashTable::codegenKey(), codegenKey(), codegenManyKey(), codegenMatchingSet(), RangeJoinHashTable::codegenMatchingSetWithOffset(), getKeyBufferSize(), RangeJoinHashTable::initHashTableOnCpu(), initHashTableOnCpu(), RangeJoinHashTable::reifyForDevice(), reifyForDevice(), toSet(), and toString().

1120  {
1121  return 8;
1122 }

+ Here is the caller graph for this function:

Data_Namespace::MemoryLevel OverlapsJoinHashTable::getMemoryLevel ( ) const
inlineoverrideprotectedvirtualnoexcept

Implements HashJoin.

Definition at line 155 of file OverlapsJoinHashTable.h.

References memory_level_.

155  {
156  return memory_level_;
157  }
const Data_Namespace::MemoryLevel memory_level_
std::optional<OverlapsHashTableMetaInfo> OverlapsJoinHashTable::getOverlapsHashTableMetaInfo ( )
inlineprotected

Definition at line 299 of file OverlapsJoinHashTable.h.

References hashtable_cache_meta_info_, and HashtableCacheMetaInfo::overlaps_meta_info.

Referenced by getApproximateTupleCountFromCache(), initHashTableOnCpuFromCache(), and putHashTableOnCpuToCache().

299  {
301  }
std::optional< OverlapsHashTableMetaInfo > overlaps_meta_info
HashtableCacheMetaInfo hashtable_cache_meta_info_

+ Here is the caller graph for this function:

static OverlapsTuningParamRecycler* OverlapsJoinHashTable::getOverlapsTuningParamCache ( )
inlinestatic

Definition at line 86 of file OverlapsJoinHashTable.h.

References auto_tuner_cache_, and CHECK.

Referenced by QueryRunner::QueryRunner::getNumberOfCachedOverlapsHashTableTuringParams().

86  {
88  return auto_tuner_cache_.get();
89  }
static std::unique_ptr< OverlapsTuningParamRecycler > auto_tuner_cache_
#define CHECK(condition)
Definition: Logger.h:211

+ Here is the caller graph for this function:

const RegisteredQueryHint& OverlapsJoinHashTable::getRegisteredQueryHint ( )
inlineprotected

Definition at line 203 of file OverlapsJoinHashTable.h.

References query_hint_.

Referenced by reifyWithLayout().

203 { return query_hint_; }
RegisteredQueryHint query_hint_

+ Here is the caller graph for this function:

std::shared_ptr< BaselineHashTable > OverlapsJoinHashTable::initHashTableOnCpu ( const std::vector< JoinColumn > &  join_columns,
const std::vector< JoinColumnTypeInfo > &  join_column_types,
const std::vector< JoinBucketInfo > &  join_bucket_info,
const HashType  layout,
const size_t  entry_count,
const size_t  emitted_keys_count,
const bool  skip_hashtable_caching 
)
protected

Definition at line 1265 of file OverlapsJoinHashTable.cpp.

References CHECK, composite_key_info_, count, DataRecyclerUtil::CPU_DEVICE_IDENTIFIER, cpu_hash_table_buff_mutex_, DEBUG_TIMER, getKeyComponentCount(), getKeyComponentWidth(), hashtable_cache_key_, BaselineJoinHashTableBuilder::initHashTableOnCpu(), initHashTableOnCpuFromCache(), join_type_, layout_override_, HashJoin::layoutRequiresAdditionalBuffers(), ManyToMany, OneToMany, OVERLAPS_HT, putHashTableOnCpuToCache(), to_string(), and VLOG.

Referenced by reifyForDevice().

1272  {
1273  auto timer = DEBUG_TIMER(__func__);
1274  decltype(std::chrono::steady_clock::now()) ts1, ts2;
1275  ts1 = std::chrono::steady_clock::now();
1276  CHECK(!join_columns.empty());
1277  CHECK(!join_bucket_info.empty());
1278  std::lock_guard<std::mutex> cpu_hash_table_buff_lock(cpu_hash_table_buff_mutex_);
1279  if (auto generic_hash_table =
1282  DataRecyclerUtil::CPU_DEVICE_IDENTIFIER)) {
1283  if (auto hash_table =
1284  std::dynamic_pointer_cast<BaselineHashTable>(generic_hash_table)) {
1285  VLOG(1) << "Using cached CPU hash table for initialization.";
1286  // See if a hash table of a different layout was returned.
1287  // If it was OneToMany, we can reuse it on ManyToMany.
1288  if (layout == HashType::ManyToMany &&
1289  hash_table->getLayout() == HashType::OneToMany) {
1290  // use the cached hash table
1292  return hash_table;
1293  }
1294  if (layout == hash_table->getLayout()) {
1295  return hash_table;
1296  }
1297  }
1298  }
1300  const auto key_component_count =
1301  join_bucket_info[0].inverse_bucket_sizes_for_dimension.size();
1302 
1303  const auto key_handler =
1304  OverlapsKeyHandler(key_component_count,
1305  &join_columns[0],
1306  join_bucket_info[0].inverse_bucket_sizes_for_dimension.data());
1308  const auto err = builder.initHashTableOnCpu(&key_handler,
1310  join_columns,
1311  join_column_types,
1312  join_bucket_info,
1313  entry_count,
1314  emitted_keys_count,
1315  layout,
1316  join_type_,
1319  ts2 = std::chrono::steady_clock::now();
1320  if (err) {
1321  throw HashJoinFail(
1322  std::string("Unrecognized error when initializing CPU overlaps hash table (") +
1323  std::to_string(err) + std::string(")"));
1324  }
1325  std::shared_ptr<BaselineHashTable> hash_table = builder.getHashTable();
1326  if (skip_hashtable_caching) {
1327  VLOG(1) << "Skip to cache overlaps join hashtable";
1328  } else {
1329  auto hashtable_build_time =
1330  std::chrono::duration_cast<std::chrono::milliseconds>(ts2 - ts1).count();
1331  putHashTableOnCpuToCache(hashtable_cache_key_,
1333  hash_table,
1335  hashtable_build_time);
1336  }
1337  return hash_table;
1338 }
std::shared_ptr< HashTable > initHashTableOnCpuFromCache(QueryPlanHash key, CacheItemType item_type, DeviceIdentifier device_identifier)
void putHashTableOnCpuToCache(QueryPlanHash key, CacheItemType item_type, std::shared_ptr< HashTable > hashtable_ptr, DeviceIdentifier device_identifier, size_t hashtable_building_time)
std::string to_string(char const *&&v)
CacheItemType
Definition: DataRecycler.h:36
int count
std::optional< HashType > layout_override_
#define CHECK(condition)
Definition: Logger.h:211
#define DEBUG_TIMER(name)
Definition: Logger.h:358
static constexpr DeviceIdentifier CPU_DEVICE_IDENTIFIER
Definition: DataRecycler.h:132
CompositeKeyInfo composite_key_info_
if(yyssp >=yyss+yystacksize-1)
#define VLOG(n)
Definition: Logger.h:305
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
Definition: HashJoin.h:130
int initHashTableOnCpu(KEY_HANDLER *key_handler, const CompositeKeyInfo &composite_key_info, const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_bucket_info, const size_t keyspace_entry_count, const size_t keys_for_all_rows, const HashType layout, const JoinType join_type, const size_t key_component_width, const size_t key_component_count)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::shared_ptr< HashTable > OverlapsJoinHashTable::initHashTableOnCpuFromCache ( QueryPlanHash  key,
CacheItemType  item_type,
DeviceIdentifier  device_identifier 
)
protected

Definition at line 1789 of file OverlapsJoinHashTable.cpp.

References CHECK, DEBUG_TIMER, getOverlapsHashTableMetaInfo(), hash_table_cache_, HashtableCacheMetaInfo::overlaps_meta_info, and VLOG.

Referenced by RangeJoinHashTable::initHashTableOnCpu(), and initHashTableOnCpu().

1792  {
1793  auto timer = DEBUG_TIMER(__func__);
1794  VLOG(1) << "Checking CPU hash table cache.";
1796  HashtableCacheMetaInfo meta_info;
1798  auto cached_hashtable =
1799  hash_table_cache_->getItemFromCache(key, item_type, device_identifier, meta_info);
1800  if (cached_hashtable) {
1801  return cached_hashtable;
1802  }
1803  return nullptr;
1804 }
std::optional< OverlapsHashTableMetaInfo > overlaps_meta_info
#define CHECK(condition)
Definition: Logger.h:211
#define DEBUG_TIMER(name)
Definition: Logger.h:358
static std::unique_ptr< HashtableRecycler > hash_table_cache_
std::optional< OverlapsHashTableMetaInfo > getOverlapsHashTableMetaInfo()
#define VLOG(n)
Definition: Logger.h:305

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool OverlapsJoinHashTable::isBitwiseEq ( ) const
overrideprotectedvirtual

Implements HashJoin.

Definition at line 1844 of file OverlapsJoinHashTable.cpp.

References condition_, and kBW_EQ.

1844  {
1845  return condition_->get_optype() == kBW_EQ;
1846 }
const std::shared_ptr< Analyzer::BinOper > condition_
Definition: sqldefs.h:31
size_t OverlapsJoinHashTable::offsetBufferOff ( ) const
inlineoverrideprotectedvirtualnoexcept

Implements HashJoin.

Definition at line 258 of file OverlapsJoinHashTable.h.

References getKeyBufferSize().

Referenced by codegenMatchingSet(), RangeJoinHashTable::codegenMatchingSetWithOffset(), countBufferOff(), toSet(), and toString().

258 { return getKeyBufferSize(); }
size_t getKeyBufferSize() const noexcept

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t OverlapsJoinHashTable::payloadBufferOff ( ) const
inlineoverrideprotectedvirtualnoexcept

Implements HashJoin.

Definition at line 268 of file OverlapsJoinHashTable.h.

References countBufferOff(), getComponentBufferSize(), getHashType(), getKeyBufferSize(), and HashJoin::layoutRequiresAdditionalBuffers().

Referenced by toSet(), and toString().

268  {
271  } else {
272  return getKeyBufferSize();
273  }
274  }
HashType getHashType() const noexceptoverride
size_t countBufferOff() const noexceptoverride
size_t getComponentBufferSize() const noexceptoverride
size_t getKeyBufferSize() const noexcept
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
Definition: HashJoin.h:130

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void OverlapsJoinHashTable::putHashTableOnCpuToCache ( QueryPlanHash  key,
CacheItemType  item_type,
std::shared_ptr< HashTable hashtable_ptr,
DeviceIdentifier  device_identifier,
size_t  hashtable_building_time 
)
protected

Definition at line 1823 of file OverlapsJoinHashTable.cpp.

References CHECK, CPU, getOverlapsHashTableMetaInfo(), hash_table_cache_, HashtableCacheMetaInfo::overlaps_meta_info, and query_hint_.

Referenced by RangeJoinHashTable::initHashTableOnCpu(), and initHashTableOnCpu().

1828  {
1830  CHECK(hashtable_ptr && !hashtable_ptr->getGpuBuffer());
1831  HashtableCacheMetaInfo meta_info;
1833  meta_info.registered_query_hint = query_hint_;
1834  hash_table_cache_->putItemToCache(
1835  key,
1836  hashtable_ptr,
1837  item_type,
1838  device_identifier,
1839  hashtable_ptr->getHashTableBufferSize(ExecutorDeviceType::CPU),
1840  hashtable_building_time,
1841  meta_info);
1842 }
std::optional< OverlapsHashTableMetaInfo > overlaps_meta_info
RegisteredQueryHint query_hint_
#define CHECK(condition)
Definition: Logger.h:211
static std::unique_ptr< HashtableRecycler > hash_table_cache_
std::optional< OverlapsHashTableMetaInfo > getOverlapsHashTableMetaInfo()

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void OverlapsJoinHashTable::registerQueryHint ( const RegisteredQueryHint query_hint)
inlineprotected

Definition at line 205 of file OverlapsJoinHashTable.h.

References query_hint_.

205  {
206  query_hint_ = query_hint;
207  }
RegisteredQueryHint query_hint_
void OverlapsJoinHashTable::reify ( const HashType  preferred_layout)
protected

Definition at line 1129 of file OverlapsJoinHashTable.cpp.

References CHECK, CHECK_EQ, CHECK_LT, composite_key_info_, condition_, DEBUG_TIMER, device_count_, executor_, HashJoin::getCompositeKeyInfo(), inner_outer_pairs_, ManyToMany, OneToMany, reifyWithLayout(), and VLOG.

1129  {
1130  auto timer = DEBUG_TIMER(__func__);
1131  CHECK_LT(0, device_count_);
1133 
1134  CHECK(condition_->is_overlaps_oper());
1135  CHECK_EQ(inner_outer_pairs_.size(), size_t(1));
1136  HashType layout;
1137  if (inner_outer_pairs_[0].second->get_type_info().is_fixlen_array() &&
1138  inner_outer_pairs_[0].second->get_type_info().get_size() == 32) {
1139  // bounds array
1140  layout = HashType::ManyToMany;
1141  } else {
1142  layout = HashType::OneToMany;
1143  }
1144  try {
1145  reifyWithLayout(layout);
1146  return;
1147  } catch (const std::exception& e) {
1148  VLOG(1) << "Caught exception while building overlaps baseline hash table: "
1149  << e.what();
1150  throw;
1151  }
1152 }
#define CHECK_EQ(x, y)
Definition: Logger.h:219
virtual void reifyWithLayout(const HashType layout)
const std::shared_ptr< Analyzer::BinOper > condition_
static CompositeKeyInfo getCompositeKeyInfo(const std::vector< InnerOuter > &inner_outer_pairs, const Executor *executor)
Definition: HashJoin.cpp:334
#define CHECK_LT(x, y)
Definition: Logger.h:221
#define CHECK(condition)
Definition: Logger.h:211
#define DEBUG_TIMER(name)
Definition: Logger.h:358
std::vector< InnerOuter > inner_outer_pairs_
CompositeKeyInfo composite_key_info_
HashType
Definition: HashTable.h:19
#define VLOG(n)
Definition: Logger.h:305

+ Here is the call graph for this function:

void OverlapsJoinHashTable::reifyForDevice ( const ColumnsForDevice columns_for_device,
const HashType  layout,
const size_t  entry_count,
const size_t  emitted_keys_count,
const bool  skip_hashtable_caching,
const int  device_id,
const logger::ThreadId  parent_thread_id 
)
protected

Definition at line 1210 of file OverlapsJoinHashTable.cpp.

References CHECK, CHECK_EQ, CHECK_LT, Data_Namespace::CPU_LEVEL, DEBUG_TIMER_NEW_THREAD, getEffectiveMemoryLevel(), getKeyComponentWidth(), Data_Namespace::GPU_LEVEL, HashJoin::hash_tables_for_device_, initHashTableOnCpu(), inner_outer_pairs_, ColumnsForDevice::join_buckets, ColumnsForDevice::join_column_types, ColumnsForDevice::join_columns, HashJoin::layoutRequiresAdditionalBuffers(), memory_level_, UNREACHABLE, and VLOG.

Referenced by reifyImpl().

1216  {
1217  DEBUG_TIMER_NEW_THREAD(parent_thread_id);
1218  CHECK_EQ(getKeyComponentWidth(), size_t(8));
1220  const auto effective_memory_level = getEffectiveMemoryLevel(inner_outer_pairs_);
1221 
1222  if (effective_memory_level == Data_Namespace::MemoryLevel::CPU_LEVEL) {
1223  VLOG(1) << "Building overlaps join hash table on CPU.";
1224  auto hash_table = initHashTableOnCpu(columns_for_device.join_columns,
1225  columns_for_device.join_column_types,
1226  columns_for_device.join_buckets,
1227  layout,
1228  entry_count,
1229  emitted_keys_count,
1230  skip_hashtable_caching);
1231  CHECK(hash_table);
1232 
1233 #ifdef HAVE_CUDA
1235  auto gpu_hash_table = copyCpuHashTableToGpu(
1236  std::move(hash_table), layout, entry_count, emitted_keys_count, device_id);
1237  CHECK_LT(size_t(device_id), hash_tables_for_device_.size());
1238  hash_tables_for_device_[device_id] = std::move(gpu_hash_table);
1239  } else {
1240 #else
1241  CHECK_EQ(Data_Namespace::CPU_LEVEL, effective_memory_level);
1242 #endif
1243  CHECK_EQ(hash_tables_for_device_.size(), size_t(1));
1244  hash_tables_for_device_[0] = std::move(hash_table);
1245 #ifdef HAVE_CUDA
1246  }
1247 #endif
1248  } else {
1249 #ifdef HAVE_CUDA
1250  auto hash_table = initHashTableOnGpu(columns_for_device.join_columns,
1251  columns_for_device.join_column_types,
1252  columns_for_device.join_buckets,
1253  layout,
1254  entry_count,
1255  emitted_keys_count,
1256  device_id);
1257  CHECK_LT(size_t(device_id), hash_tables_for_device_.size());
1258  hash_tables_for_device_[device_id] = std::move(hash_table);
1259 #else
1260  UNREACHABLE();
1261 #endif
1262  }
1263 }
#define CHECK_EQ(x, y)
Definition: Logger.h:219
std::vector< std::shared_ptr< HashTable > > hash_tables_for_device_
Definition: HashJoin.h:296
#define UNREACHABLE()
Definition: Logger.h:255
#define DEBUG_TIMER_NEW_THREAD(parent_thread_id)
Definition: Logger.h:363
std::shared_ptr< BaselineHashTable > initHashTableOnCpu(const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_bucket_info, const HashType layout, const size_t entry_count, const size_t emitted_keys_count, const bool skip_hashtable_caching)
const std::vector< JoinColumnTypeInfo > join_column_types
Definition: HashJoin.h:81
#define CHECK_LT(x, y)
Definition: Logger.h:221
#define CHECK(condition)
Definition: Logger.h:211
const Data_Namespace::MemoryLevel memory_level_
std::vector< InnerOuter > inner_outer_pairs_
Data_Namespace::MemoryLevel getEffectiveMemoryLevel(const std::vector< InnerOuter > &inner_outer_pairs) const
std::vector< JoinBucketInfo > join_buckets
Definition: HashJoin.h:83
const std::vector< JoinColumn > join_columns
Definition: HashJoin.h:80
#define VLOG(n)
Definition: Logger.h:305
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
Definition: HashJoin.h:130

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void OverlapsJoinHashTable::reifyImpl ( std::vector< ColumnsForDevice > &  columns_per_device,
const Fragmenter_Namespace::TableInfo query_info,
const HashType  layout,
const size_t  shard_count,
const size_t  entry_count,
const size_t  emitted_keys_count,
const bool  skip_hashtable_caching,
const size_t  chosen_max_hashtable_size,
const double  chosen_bucket_threshold 
)
protectedvirtual

Definition at line 1154 of file OverlapsJoinHashTable.cpp.

References threading_serial::async(), CompositeKeyInfo::cache_key_chunks, chosen_overlaps_bucket_threshold_, chosen_overlaps_max_table_size_bytes_, composite_key_info_, condition_, device_count_, EMPTY_HASHED_PLAN_DAG_KEY, EMPTY_QUERY_PLAN, Fragmenter_Namespace::TableInfo::fragments, getAlternativeCacheKey(), hashtable_cache_key_, inner_outer_pairs_, inverse_bucket_sizes_for_dimension_, only_shards_for_device(), query_plan_dag_, reifyForDevice(), setOverlapsHashtableMetaInfo(), logger::thread_id(), and VLOG.

Referenced by reifyWithLayout().

1162  {
1163  std::vector<std::future<void>> init_threads;
1164  chosen_overlaps_bucket_threshold_ = chosen_bucket_threshold;
1165  chosen_overlaps_max_table_size_bytes_ = chosen_max_hashtable_size;
1169  if ((query_plan_dag_.compare(EMPTY_QUERY_PLAN) == 0 ||
1171  inner_outer_pairs_.front().first->get_table_id() > 0) {
1172  // sometimes we cannot retrieve query plan dag, so try to recycler cache
1173  // with the old-passioned cache key if we deal with hashtable of non-temporary table
1174  AlternativeCacheKeyForOverlapsHashJoin cache_key{
1176  columns_per_device.front().join_columns.front().num_elems,
1178  condition_->get_optype(),
1183  VLOG(2) << "Use alternative hashtable cache key due to unavailable query plan dag "
1184  "extraction";
1185  }
1186  for (int device_id = 0; device_id < device_count_; ++device_id) {
1187  const auto fragments =
1188  shard_count
1189  ? only_shards_for_device(query_info.fragments, device_id, device_count_)
1190  : query_info.fragments;
1191  init_threads.push_back(std::async(std::launch::async,
1193  this,
1194  columns_per_device[device_id],
1195  layout,
1196  entry_count,
1197  emitted_keys_count,
1198  skip_hashtable_caching,
1199  device_id,
1200  logger::thread_id()));
1201  }
1202  for (auto& init_thread : init_threads) {
1203  init_thread.wait();
1204  }
1205  for (auto& init_thread : init_threads) {
1206  init_thread.get();
1207  }
1208 }
std::vector< ChunkKey > cache_key_chunks
Definition: HashJoin.h:99
constexpr QueryPlanHash EMPTY_HASHED_PLAN_DAG_KEY
std::vector< FragmentInfo > fragments
Definition: Fragmenter.h:162
const std::shared_ptr< Analyzer::BinOper > condition_
future< Result > async(Fn &&fn, Args &&...args)
QueryPlanHash getAlternativeCacheKey(AlternativeCacheKeyForOverlapsHashJoin &info)
std::vector< Fragmenter_Namespace::FragmentInfo > only_shards_for_device(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, const int device_count)
void setOverlapsHashtableMetaInfo(size_t max_table_size_bytes, double bucket_threshold, std::vector< double > &bucket_sizes)
std::vector< double > inverse_bucket_sizes_for_dimension_
void reifyForDevice(const ColumnsForDevice &columns_for_device, const HashType layout, const size_t entry_count, const size_t emitted_keys_count, const bool skip_hashtable_caching, const int device_id, const logger::ThreadId parent_thread_id)
ThreadId thread_id()
Definition: Logger.cpp:816
constexpr char const * EMPTY_QUERY_PLAN
std::vector< InnerOuter > inner_outer_pairs_
CompositeKeyInfo composite_key_info_
#define VLOG(n)
Definition: Logger.h:305

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void OverlapsJoinHashTable::reifyWithLayout ( const HashType  layout)
protectedvirtual

Reimplemented in RangeJoinHashTable.

Definition at line 541 of file OverlapsJoinHashTable.cpp.

References gpu_enabled::accumulate(), auto_tuner_cache_, CompositeKeyInfo::cache_key_chunks, calculateHashTableSize(), CHECK, CHECK_EQ, CHECK_GE, composite_key_info_, computeHashTableCounts(), condition_, DataRecyclerUtil::CPU_DEVICE_IDENTIFIER, DEBUG_TIMER, device_count_, EMPTY_HASHED_PLAN_DAG_KEY, EMPTY_QUERY_PLAN, executor_, fetchColumnsForDevice(), g_overlaps_max_table_size_bytes, g_overlaps_target_entries_per_bin, generateCacheKey(), get_inner_query_info(), getAlternativeCacheKey(), getCacheKey(), getEffectiveMemoryLevel(), HashJoin::getHashTypeString(), HashJoin::getInnerTableId(), getRegisteredQueryHint(), Data_Namespace::GPU_LEVEL, hash_table_cache_, hashtable_cache_key_, InputTableInfo::info, inner_outer_pairs_, inverse_bucket_sizes_for_dimension_, RegisteredQueryHint::isHintRegistered(), kOverlapsAllowGpuBuild, kOverlapsBucketThreshold, kOverlapsKeysPerBin, kOverlapsMaxSize, kOverlapsNoCache, HashJoin::layoutRequiresAdditionalBuffers(), memory_level_, only_shards_for_device(), RegisteredQueryHint::overlaps_allow_gpu_build, OVERLAPS_AUTO_TUNER_PARAM, OVERLAPS_HT, query_hint_, query_infos_, query_plan_dag_, reifyImpl(), setInverseBucketSizeInfo(), setOverlapsHashtableMetaInfo(), shardCount(), and VLOG.

Referenced by reify().

541  {
542  auto timer = DEBUG_TIMER(__func__);
544  const auto& query_info =
546  .info;
547  VLOG(1) << "Reify with layout " << getHashTypeString(layout)
548  << "for table_id: " << HashJoin::getInnerTableId(inner_outer_pairs_);
549  if (query_info.fragments.empty()) {
550  return;
551  }
552 
553  auto overlaps_max_table_size_bytes = g_overlaps_max_table_size_bytes;
554  std::optional<double> overlaps_threshold_override;
555  double overlaps_target_entries_per_bin = g_overlaps_target_entries_per_bin;
556  auto query_hint = getRegisteredQueryHint();
557  auto skip_hashtable_caching = false;
558  if (query_hint.isHintRegistered(QueryHint::kOverlapsBucketThreshold)) {
559  VLOG(1) << "Setting overlaps bucket threshold "
560  "\'overlaps_hashjoin_bucket_threshold\' via "
561  "query hint: "
562  << query_hint.overlaps_bucket_threshold;
563  overlaps_threshold_override = query_hint.overlaps_bucket_threshold;
564  }
565  if (query_hint.isHintRegistered(QueryHint::kOverlapsMaxSize)) {
566  std::ostringstream oss;
567  oss << "User requests to change a threshold \'overlaps_max_table_size_bytes\' via "
568  "query hint";
569  if (!overlaps_threshold_override.has_value()) {
570  oss << ": " << overlaps_max_table_size_bytes << " -> "
571  << query_hint.overlaps_max_size;
572  overlaps_max_table_size_bytes = query_hint.overlaps_max_size;
573  } else {
574  oss << ", but is skipped since the query hint also changes the threshold "
575  "\'overlaps_hashjoin_bucket_threshold\'";
576  }
577  VLOG(1) << oss.str();
578  }
579  if (query_hint.isHintRegistered(QueryHint::kOverlapsNoCache)) {
580  VLOG(1) << "User requests to skip caching overlaps join hashtable and its tuned "
581  "parameters for this query";
582  skip_hashtable_caching = true;
583  }
584  if (query_hint.isHintRegistered(QueryHint::kOverlapsKeysPerBin)) {
585  VLOG(1) << "User requests to change a threshold \'overlaps_keys_per_bin\' via query "
586  "hint: "
587  << overlaps_target_entries_per_bin << " -> "
588  << query_hint.overlaps_keys_per_bin;
589  overlaps_target_entries_per_bin = query_hint.overlaps_keys_per_bin;
590  }
591  auto data_mgr = executor_->getDataMgr();
592  // we prioritize CPU when building an overlaps join hashtable, but if we have GPU and
593  // user-given hint is given we selectively allow GPU to build it but even if we have GPU
594  // but user foces to set CPU as execution device type we should not allow to use GPU for
595  // building it
596  auto allow_gpu_hashtable_build =
599  if (allow_gpu_hashtable_build) {
600  if (data_mgr->gpusPresent() &&
602  VLOG(1) << "A user forces to build GPU hash table for this overlaps join operator";
603  } else {
604  allow_gpu_hashtable_build = false;
605  VLOG(1) << "A user forces to build GPU hash table for this overlaps join operator "
606  "but we "
607  "skip it since either GPU is not presented or CPU execution mode is set";
608  }
609  }
610 
611  std::vector<ColumnsForDevice> columns_per_device;
612  std::vector<std::unique_ptr<CudaAllocator>> dev_buff_owners;
614  allow_gpu_hashtable_build) {
615  for (int device_id = 0; device_id < device_count_; ++device_id) {
616  dev_buff_owners.emplace_back(std::make_unique<CudaAllocator>(data_mgr, device_id));
617  }
618  }
619  const auto shard_count = shardCount();
620  size_t total_num_tuples = 0;
621  for (int device_id = 0; device_id < device_count_; ++device_id) {
622  const auto fragments =
623  shard_count
624  ? only_shards_for_device(query_info.fragments, device_id, device_count_)
625  : query_info.fragments;
626  const size_t crt_num_tuples =
627  std::accumulate(fragments.begin(),
628  fragments.end(),
629  size_t(0),
630  [](const auto& sum, const auto& fragment) {
631  return sum + fragment.getNumTuples();
632  });
633  total_num_tuples += crt_num_tuples;
634  const auto columns_for_device =
635  fetchColumnsForDevice(fragments,
636  device_id,
638  allow_gpu_hashtable_build
639  ? dev_buff_owners[device_id].get()
640  : nullptr);
641  columns_per_device.push_back(columns_for_device);
642  }
643 
644  if (overlaps_threshold_override) {
645  // compute bucket sizes based on the user provided threshold
646  BucketSizeTuner tuner(/*initial_threshold=*/*overlaps_threshold_override,
647  /*step=*/1.0,
648  /*min_threshold=*/0.0,
650  columns_per_device,
652  total_num_tuples,
653  executor_);
654  const auto inverse_bucket_sizes = tuner.getInverseBucketSizes();
655 
656  auto [entry_count, emitted_keys_count] =
657  computeHashTableCounts(shard_count,
658  inverse_bucket_sizes,
659  columns_per_device,
660  overlaps_max_table_size_bytes,
661  *overlaps_threshold_override);
662  setInverseBucketSizeInfo(inverse_bucket_sizes, columns_per_device, device_count_);
663  // reifyImpl will check the hash table cache for an appropriate hash table w/ those
664  // bucket sizes (or within tolerances) if a hash table exists use it, otherwise build
665  // one
666  generateCacheKey(overlaps_max_table_size_bytes, *overlaps_threshold_override);
667  reifyImpl(columns_per_device,
668  query_info,
669  layout,
670  shard_count,
671  entry_count,
672  emitted_keys_count,
673  skip_hashtable_caching,
674  overlaps_max_table_size_bytes,
675  *overlaps_threshold_override);
676  } else {
677  double overlaps_bucket_threshold = std::numeric_limits<double>::max();
678  generateCacheKey(overlaps_max_table_size_bytes, overlaps_bucket_threshold);
679  auto candidate_auto_tuner_cache_key = getCacheKey();
680  if ((query_plan_dag_.compare(EMPTY_QUERY_PLAN) == 0 ||
682  inner_outer_pairs_.front().first->get_table_id() > 0) {
683  AlternativeCacheKeyForOverlapsHashJoin cache_key{
685  columns_per_device.front().join_columns.front().num_elems,
687  condition_->get_optype(),
688  overlaps_max_table_size_bytes,
689  overlaps_bucket_threshold};
690  candidate_auto_tuner_cache_key = getAlternativeCacheKey(cache_key);
691  VLOG(2) << "Use alternative auto tuner cache key due to unavailable query plan dag "
692  "extraction";
693  }
694  auto cached_bucket_threshold =
695  auto_tuner_cache_->getItemFromCache(candidate_auto_tuner_cache_key,
698  if (cached_bucket_threshold) {
699  overlaps_bucket_threshold = cached_bucket_threshold->bucket_threshold;
700  auto inverse_bucket_sizes = cached_bucket_threshold->bucket_sizes;
702  overlaps_max_table_size_bytes, overlaps_bucket_threshold, inverse_bucket_sizes);
703  generateCacheKey(overlaps_max_table_size_bytes, overlaps_bucket_threshold);
704  if ((query_plan_dag_.compare(EMPTY_QUERY_PLAN) == 0 ||
706  inner_outer_pairs_.front().first->get_table_id() > 0) {
707  AlternativeCacheKeyForOverlapsHashJoin cache_key{
709  columns_per_device.front().join_columns.front().num_elems,
711  condition_->get_optype(),
712  overlaps_max_table_size_bytes,
713  overlaps_bucket_threshold,
714  inverse_bucket_sizes};
716  VLOG(2) << "Use alternative hashtable cache key due to unavailable query plan "
717  "dag extraction";
718  }
719  if (auto hash_table =
720  hash_table_cache_->getItemFromCache(hashtable_cache_key_,
723  std::nullopt)) {
724  // if we already have a built hash table, we can skip the scans required for
725  // computing bucket size and tuple count
726  // reset as the hash table sizes can vary a bit
727  setInverseBucketSizeInfo(inverse_bucket_sizes, columns_per_device, device_count_);
728  CHECK(hash_table);
729 
730  VLOG(1) << "Using cached hash table bucket size";
731 
732  reifyImpl(columns_per_device,
733  query_info,
734  layout,
735  shard_count,
736  hash_table->getEntryCount(),
737  hash_table->getEmittedKeysCount(),
738  skip_hashtable_caching,
739  overlaps_max_table_size_bytes,
740  overlaps_bucket_threshold);
741  } else {
742  VLOG(1) << "Computing bucket size for cached bucket threshold";
743  // compute bucket size using our cached tuner value
744  BucketSizeTuner tuner(/*initial_threshold=*/overlaps_bucket_threshold,
745  /*step=*/1.0,
746  /*min_threshold=*/0.0,
748  columns_per_device,
750  total_num_tuples,
751  executor_);
752 
753  const auto inverse_bucket_sizes = tuner.getInverseBucketSizes();
754 
755  auto [entry_count, emitted_keys_count] =
756  computeHashTableCounts(shard_count,
757  inverse_bucket_sizes,
758  columns_per_device,
759  overlaps_max_table_size_bytes,
760  overlaps_bucket_threshold);
761  setInverseBucketSizeInfo(inverse_bucket_sizes, columns_per_device, device_count_);
762 
763  reifyImpl(columns_per_device,
764  query_info,
765  layout,
766  shard_count,
767  entry_count,
768  emitted_keys_count,
769  skip_hashtable_caching,
770  overlaps_max_table_size_bytes,
771  overlaps_bucket_threshold);
772  }
773  } else {
774  // compute bucket size using the auto tuner
775  BucketSizeTuner tuner(
776  /*initial_threshold=*/overlaps_bucket_threshold,
777  /*step=*/2.0,
778  /*min_threshold=*/1e-7,
780  columns_per_device,
782  total_num_tuples,
783  executor_);
784 
785  VLOG(1) << "Running overlaps join size auto tune with parameters: " << tuner;
786 
787  // manages the tuning state machine
788  TuningState tuning_state(overlaps_max_table_size_bytes,
789  overlaps_target_entries_per_bin);
790  while (tuner.tuneOneStep(tuning_state.tuning_direction)) {
791  const auto inverse_bucket_sizes = tuner.getInverseBucketSizes();
792 
793  const auto [crt_entry_count, crt_emitted_keys_count] =
794  computeHashTableCounts(shard_count,
795  inverse_bucket_sizes,
796  columns_per_device,
797  tuning_state.overlaps_max_table_size_bytes,
798  tuning_state.chosen_overlaps_threshold);
799  const size_t hash_table_size = calculateHashTableSize(
800  inverse_bucket_sizes.size(), crt_emitted_keys_count, crt_entry_count);
801  HashTableProps crt_props(crt_entry_count,
802  crt_emitted_keys_count,
803  hash_table_size,
804  inverse_bucket_sizes);
805  VLOG(1) << "Tuner output: " << tuner << " with properties " << crt_props;
806 
807  const auto should_continue = tuning_state(crt_props, tuner.getMinBucketSize());
809  tuning_state.crt_props.bucket_sizes, columns_per_device, device_count_);
810  if (!should_continue) {
811  break;
812  }
813  }
814 
815  const auto& crt_props = tuning_state.crt_props;
816  // sanity check that the hash table size has not changed. this is a fairly
817  // inexpensive check to ensure the above algorithm is consistent
818  const size_t hash_table_size =
820  crt_props.emitted_keys_count,
821  crt_props.entry_count);
822  CHECK_EQ(crt_props.hash_table_size, hash_table_size);
823 
825  hash_table_size > overlaps_max_table_size_bytes) {
826  VLOG(1) << "Could not find suitable overlaps join parameters to create hash "
827  "table under max allowed size ("
828  << overlaps_max_table_size_bytes << ") bytes.";
829  throw OverlapsHashTableTooBig(overlaps_max_table_size_bytes);
830  }
831 
832  VLOG(1) << "Final tuner output: " << tuner << " with properties " << crt_props;
834  VLOG(1) << "Final bucket sizes: ";
835  for (size_t dim = 0; dim < inverse_bucket_sizes_for_dimension_.size(); dim++) {
836  VLOG(1) << "dim[" << dim
837  << "]: " << 1.0 / inverse_bucket_sizes_for_dimension_[dim];
838  }
839  CHECK_GE(tuning_state.chosen_overlaps_threshold, double(0));
840  generateCacheKey(tuning_state.overlaps_max_table_size_bytes,
841  tuning_state.chosen_overlaps_threshold);
842  candidate_auto_tuner_cache_key = getCacheKey();
843  if (skip_hashtable_caching) {
844  VLOG(1) << "Skip to add tuned parameters to auto tuner";
845  } else {
846  AutoTunerMetaInfo meta_info{tuning_state.overlaps_max_table_size_bytes,
847  tuning_state.chosen_overlaps_threshold,
849  auto_tuner_cache_->putItemToCache(candidate_auto_tuner_cache_key,
850  meta_info,
853  0,
854  0);
855  }
856  overlaps_bucket_threshold = tuning_state.chosen_overlaps_threshold;
857  reifyImpl(columns_per_device,
858  query_info,
859  layout,
860  shard_count,
861  crt_props.entry_count,
862  crt_props.emitted_keys_count,
863  skip_hashtable_caching,
864  overlaps_max_table_size_bytes,
865  overlaps_bucket_threshold);
866  }
867  }
868 }
#define CHECK_EQ(x, y)
Definition: Logger.h:219
Fragmenter_Namespace::TableInfo info
Definition: InputMetadata.h:35
QueryPlanHash getCacheKey() const
std::vector< ChunkKey > cache_key_chunks
Definition: HashJoin.h:99
virtual void reifyImpl(std::vector< ColumnsForDevice > &columns_per_device, const Fragmenter_Namespace::TableInfo &query_info, const HashType layout, const size_t shard_count, const size_t entry_count, const size_t emitted_keys_count, const bool skip_hashtable_caching, const size_t chosen_max_hashtable_size, const double chosen_bucket_threshold)
void generateCacheKey(const size_t max_hashtable_size, const double bucket_threshold)
bool overlaps_allow_gpu_build
Definition: QueryHint.h:216
constexpr QueryPlanHash EMPTY_HASHED_PLAN_DAG_KEY
const RegisteredQueryHint & getRegisteredQueryHint()
RegisteredQueryHint query_hint_
#define CHECK_GE(x, y)
Definition: Logger.h:224
static std::unique_ptr< OverlapsTuningParamRecycler > auto_tuner_cache_
virtual std::pair< size_t, size_t > computeHashTableCounts(const size_t shard_count, const std::vector< double > &inverse_bucket_sizes_for_dimension, std::vector< ColumnsForDevice > &columns_per_device, const size_t chosen_max_hashtable_size, const double chosen_bucket_threshold)
size_t calculateHashTableSize(size_t number_of_dimensions, size_t emitted_keys_count, size_t entry_count) const
const std::shared_ptr< Analyzer::BinOper > condition_
QueryPlanHash getAlternativeCacheKey(AlternativeCacheKeyForOverlapsHashJoin &info)
const std::vector< InputTableInfo > & query_infos_
std::vector< Fragmenter_Namespace::FragmentInfo > only_shards_for_device(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, const int device_count)
DEVICE auto accumulate(ARGS &&...args)
Definition: gpu_enabled.h:42
double g_overlaps_target_entries_per_bin
Definition: Execute.cpp:102
size_t g_overlaps_max_table_size_bytes
Definition: Execute.cpp:101
virtual int getInnerTableId() const noexcept=0
void setOverlapsHashtableMetaInfo(size_t max_table_size_bytes, double bucket_threshold, std::vector< double > &bucket_sizes)
const InputTableInfo & get_inner_query_info(const int inner_table_id, const std::vector< InputTableInfo > &query_infos)
std::vector< double > inverse_bucket_sizes_for_dimension_
static std::string getHashTypeString(HashType ht) noexcept
Definition: HashJoin.h:134
void setInverseBucketSizeInfo(const std::vector< double > &inverse_bucket_sizes, std::vector< ColumnsForDevice > &columns_per_device, const size_t device_count)
bool isHintRegistered(const QueryHint hint) const
Definition: QueryHint.h:241
ColumnsForDevice fetchColumnsForDevice(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, DeviceAllocator *dev_buff_owner)
constexpr char const * EMPTY_QUERY_PLAN
#define CHECK(condition)
Definition: Logger.h:211
#define DEBUG_TIMER(name)
Definition: Logger.h:358
const Data_Namespace::MemoryLevel memory_level_
static std::unique_ptr< HashtableRecycler > hash_table_cache_
std::vector< InnerOuter > inner_outer_pairs_
Data_Namespace::MemoryLevel getEffectiveMemoryLevel(const std::vector< InnerOuter > &inner_outer_pairs) const
static constexpr DeviceIdentifier CPU_DEVICE_IDENTIFIER
Definition: DataRecycler.h:132
CompositeKeyInfo composite_key_info_
#define VLOG(n)
Definition: Logger.h:305
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
Definition: HashJoin.h:130

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void OverlapsJoinHashTable::setInverseBucketSizeInfo ( const std::vector< double > &  inverse_bucket_sizes,
std::vector< ColumnsForDevice > &  columns_per_device,
const size_t  device_count 
)
protected

Definition at line 1104 of file OverlapsJoinHashTable.cpp.

References CHECK_EQ, inner_outer_pairs_, and inverse_bucket_sizes_for_dimension_.

Referenced by RangeJoinHashTable::reifyWithLayout(), and reifyWithLayout().

1107  {
1108  // set global bucket size
1109  inverse_bucket_sizes_for_dimension_ = inverse_bucket_sizes;
1110 
1111  // re-compute bucket counts per device based on global bucket size
1112  CHECK_EQ(columns_per_device.size(), size_t(device_count));
1113  for (size_t device_id = 0; device_id < device_count; ++device_id) {
1114  auto& columns_for_device = columns_per_device[device_id];
1115  columns_for_device.setBucketInfo(inverse_bucket_sizes_for_dimension_,
1117  }
1118 }
#define CHECK_EQ(x, y)
Definition: Logger.h:219
std::vector< double > inverse_bucket_sizes_for_dimension_
std::vector< InnerOuter > inner_outer_pairs_

+ Here is the caller graph for this function:

void OverlapsJoinHashTable::setOverlapsHashtableMetaInfo ( size_t  max_table_size_bytes,
double  bucket_threshold,
std::vector< double > &  bucket_sizes 
)
inlineprotected

Definition at line 344 of file OverlapsJoinHashTable.h.

References OverlapsHashTableMetaInfo::bucket_sizes, hashtable_cache_meta_info_, OverlapsHashTableMetaInfo::overlaps_bucket_threshold, OverlapsHashTableMetaInfo::overlaps_max_table_size_bytes, and HashtableCacheMetaInfo::overlaps_meta_info.

Referenced by RangeJoinHashTable::initHashTableOnCpu(), reifyImpl(), and reifyWithLayout().

346  {
347  OverlapsHashTableMetaInfo overlaps_meta_info;
348  overlaps_meta_info.bucket_sizes = bucket_sizes;
349  overlaps_meta_info.overlaps_max_table_size_bytes = max_table_size_bytes;
350  overlaps_meta_info.overlaps_bucket_threshold = bucket_threshold;
351  HashtableCacheMetaInfo meta_info;
352  meta_info.overlaps_meta_info = overlaps_meta_info;
353  hashtable_cache_meta_info_ = meta_info;
354  }
std::optional< OverlapsHashTableMetaInfo > overlaps_meta_info
HashtableCacheMetaInfo hashtable_cache_meta_info_
std::vector< double > bucket_sizes

+ Here is the caller graph for this function:

size_t OverlapsJoinHashTable::shardCount ( ) const
inlineprotected

Definition at line 228 of file OverlapsJoinHashTable.h.

References condition_, executor_, BaselineJoinHashTable::getShardCountForCondition(), Data_Namespace::GPU_LEVEL, inner_outer_pairs_, and memory_level_.

Referenced by RangeJoinHashTable::reifyWithLayout(), and reifyWithLayout().

228  {
230  return 0;
231  }
234  }
const std::shared_ptr< Analyzer::BinOper > condition_
const Data_Namespace::MemoryLevel memory_level_
std::vector< InnerOuter > inner_outer_pairs_
static size_t getShardCountForCondition(const Analyzer::BinOper *condition, const Executor *executor, const std::vector< InnerOuter > &inner_outer_pairs)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::set< DecodedJoinHashBufferEntry > OverlapsJoinHashTable::toSet ( const ExecutorDeviceType  device_type,
const int  device_id 
) const
overrideprotectedvirtual

Implements HashJoin.

Definition at line 1733 of file OverlapsJoinHashTable.cpp.

References CHECK, countBufferOff(), executor_, HashJoin::getHashTableForDevice(), getHashType(), HashJoin::getJoinHashBuffer(), getKeyComponentCount(), getKeyComponentWidth(), GPU, offsetBufferOff(), OneToOne, payloadBufferOff(), and HashTable::toSet().

1735  {
1736  auto buffer = getJoinHashBuffer(device_type, device_id);
1737  auto hash_table = getHashTableForDevice(device_id);
1738  CHECK(hash_table);
1739  auto buffer_size = hash_table->getHashTableBufferSize(device_type);
1740 #ifdef HAVE_CUDA
1741  std::unique_ptr<int8_t[]> buffer_copy;
1742  if (device_type == ExecutorDeviceType::GPU) {
1743  buffer_copy = std::make_unique<int8_t[]>(buffer_size);
1744  CHECK(executor_);
1745  auto data_mgr = executor_->getDataMgr();
1746  auto allocator = data_mgr->createGpuAllocator(device_id);
1747 
1748  allocator->copyFromDevice(buffer_copy.get(), buffer, buffer_size);
1749  }
1750  auto ptr1 = buffer_copy ? buffer_copy.get() : reinterpret_cast<const int8_t*>(buffer);
1751 #else
1752  auto ptr1 = reinterpret_cast<const int8_t*>(buffer);
1753 #endif // HAVE_CUDA
1754  auto ptr2 = ptr1 + offsetBufferOff();
1755  auto ptr3 = ptr1 + countBufferOff();
1756  auto ptr4 = ptr1 + payloadBufferOff();
1757  const auto layout = getHashType();
1758  return HashTable::toSet(getKeyComponentCount() + (layout == HashType::OneToOne ? 1 : 0),
1760  hash_table->getEntryCount(),
1761  ptr1,
1762  ptr2,
1763  ptr3,
1764  ptr4,
1765  buffer_size);
1766 }
HashType getHashType() const noexceptoverride
size_t payloadBufferOff() const noexceptoverride
int8_t * getJoinHashBuffer(const ExecutorDeviceType device_type, const int device_id) const
Definition: HashJoin.h:260
HashTable * getHashTableForDevice(const size_t device_id) const
Definition: HashJoin.h:241
size_t offsetBufferOff() const noexceptoverride
size_t countBufferOff() const noexceptoverride
#define CHECK(condition)
Definition: Logger.h:211
static DecodedJoinHashBufferSet toSet(size_t key_component_count, size_t key_component_width, size_t entry_count, const int8_t *ptr1, const int8_t *ptr2, const int8_t *ptr3, const int8_t *ptr4, size_t buffer_size)
Decode hash table into a std::set for easy inspection and validation.
Definition: HashTable.cpp:139

+ Here is the call graph for this function:

std::string OverlapsJoinHashTable::toString ( const ExecutorDeviceType  device_type,
const int  device_id = 0,
bool  raw = false 
) const
overrideprotectedvirtual

Implements HashJoin.

Definition at line 1692 of file OverlapsJoinHashTable.cpp.

References CHECK, CHECK_LT, countBufferOff(), executor_, getHashType(), HashJoin::getHashTypeString(), HashJoin::getJoinHashBuffer(), getKeyComponentCount(), getKeyComponentWidth(), GPU, HashJoin::hash_tables_for_device_, offsetBufferOff(), OneToOne, payloadBufferOff(), and HashTable::toString().

Referenced by getAlternativeCacheKey().

1694  {
1695  auto buffer = getJoinHashBuffer(device_type, device_id);
1696  CHECK_LT(device_id, hash_tables_for_device_.size());
1697  auto hash_table = hash_tables_for_device_[device_id];
1698  CHECK(hash_table);
1699  auto buffer_size = hash_table->getHashTableBufferSize(device_type);
1700 #ifdef HAVE_CUDA
1701  std::unique_ptr<int8_t[]> buffer_copy;
1702  if (device_type == ExecutorDeviceType::GPU) {
1703  buffer_copy = std::make_unique<int8_t[]>(buffer_size);
1704  CHECK(executor_);
1705  auto data_mgr = executor_->getDataMgr();
1706  auto device_allocator = data_mgr->createGpuAllocator(device_id);
1707 
1708  device_allocator->copyFromDevice(buffer_copy.get(), buffer, buffer_size);
1709  }
1710  auto ptr1 = buffer_copy ? buffer_copy.get() : reinterpret_cast<const int8_t*>(buffer);
1711 #else
1712  auto ptr1 = reinterpret_cast<const int8_t*>(buffer);
1713 #endif // HAVE_CUDA
1714  auto ptr2 = ptr1 + offsetBufferOff();
1715  auto ptr3 = ptr1 + countBufferOff();
1716  auto ptr4 = ptr1 + payloadBufferOff();
1717  CHECK(hash_table);
1718  const auto layout = getHashType();
1719  return HashTable::toString(
1720  "geo",
1721  getHashTypeString(layout),
1722  getKeyComponentCount() + (layout == HashType::OneToOne ? 1 : 0),
1724  hash_table->getEntryCount(),
1725  ptr1,
1726  ptr2,
1727  ptr3,
1728  ptr4,
1729  buffer_size,
1730  raw);
1731 }
std::vector< std::shared_ptr< HashTable > > hash_tables_for_device_
Definition: HashJoin.h:296
HashType getHashType() const noexceptoverride
size_t payloadBufferOff() const noexceptoverride
int8_t * getJoinHashBuffer(const ExecutorDeviceType device_type, const int device_id) const
Definition: HashJoin.h:260
#define CHECK_LT(x, y)
Definition: Logger.h:221
static std::string getHashTypeString(HashType ht) noexcept
Definition: HashJoin.h:134
static std::string toString(const std::string &type, const std::string &layout_type, size_t key_component_count, size_t key_component_width, size_t entry_count, const int8_t *ptr1, const int8_t *ptr2, const int8_t *ptr3, const int8_t *ptr4, size_t buffer_size, bool raw=false)
Decode hash table into a human-readable string.
Definition: HashTable.cpp:226
size_t offsetBufferOff() const noexceptoverride
size_t countBufferOff() const noexceptoverride
#define CHECK(condition)
Definition: Logger.h:211

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Member Data Documentation

std::unique_ptr< OverlapsTuningParamRecycler > OverlapsJoinHashTable::auto_tuner_cache_
staticprotected
Initial value:
=
std::make_unique<OverlapsTuningParamRecycler>()

Definition at line 386 of file OverlapsJoinHashTable.h.

Referenced by getCacheInvalidator(), getOverlapsTuningParamCache(), and reifyWithLayout().

double OverlapsJoinHashTable::chosen_overlaps_bucket_threshold_
protected

Definition at line 368 of file OverlapsJoinHashTable.h.

Referenced by reifyImpl().

size_t OverlapsJoinHashTable::chosen_overlaps_max_table_size_bytes_
protected

Definition at line 369 of file OverlapsJoinHashTable.h.

Referenced by reifyImpl().

ColumnCacheMap& OverlapsJoinHashTable::column_cache_
protected

Definition at line 362 of file OverlapsJoinHashTable.h.

Referenced by fetchColumnsForDevice().

CompositeKeyInfo OverlapsJoinHashTable::composite_key_info_
protected
const std::shared_ptr<Analyzer::BinOper> OverlapsJoinHashTable::condition_
protected
std::mutex OverlapsJoinHashTable::cpu_hash_table_buff_mutex_
protected
std::unique_ptr< HashtableRecycler > OverlapsJoinHashTable::hash_table_cache_
staticprotected
HashtableCacheMetaInfo OverlapsJoinHashTable::hashtable_cache_meta_info_
protected
const JoinType OverlapsJoinHashTable::join_type_
protected
std::optional<HashType> OverlapsJoinHashTable::layout_override_
protected
const std::vector<InputTableInfo>& OverlapsJoinHashTable::query_infos_
protected
QueryPlan OverlapsJoinHashTable::query_plan_dag_
protected
const TableIdToNodeMap OverlapsJoinHashTable::table_id_to_node_map_
protected

Definition at line 390 of file OverlapsJoinHashTable.h.


The documentation for this class was generated from the following files: