OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
BoundingBoxIntersectJoinHashTable Class Reference

#include <BoundingBoxIntersectJoinHashTable.h>

+ Inheritance diagram for BoundingBoxIntersectJoinHashTable:
+ Collaboration diagram for BoundingBoxIntersectJoinHashTable:

Classes

struct  AlternativeCacheKeyForBoundingBoxIntersection
 

Public Member Functions

 BoundingBoxIntersectJoinHashTable (const std::shared_ptr< Analyzer::BinOper > condition, const JoinType join_type, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, ColumnCacheMap &column_cache, Executor *executor, const std::vector< InnerOuter > &inner_outer_pairs, const int device_count, const RegisteredQueryHint &query_hints, const HashTableBuildDagMap &hashtable_build_dag_map, const TableIdToNodeMap &table_id_to_node_map)
 
virtual ~BoundingBoxIntersectJoinHashTable ()
 
- Public Member Functions inherited from HashJoin
virtual std::string toStringFlat64 (const ExecutorDeviceType device_type, const int device_id) const
 
virtual std::string toStringFlat32 (const ExecutorDeviceType device_type, const int device_id) const
 
JoinColumn fetchJoinColumn (const Analyzer::ColumnVar *hash_col, const std::vector< Fragmenter_Namespace::FragmentInfo > &fragment_info, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks_owner, DeviceAllocator *dev_buff_owner, std::vector< std::shared_ptr< void >> &malloc_owner, Executor *executor, ColumnCacheMap *column_cache)
 
HashTablegetHashTableForDevice (const size_t device_id) const
 
size_t getJoinHashBufferSize (const ExecutorDeviceType device_type)
 
size_t getJoinHashBufferSize (const ExecutorDeviceType device_type, const int device_id) const
 
int8_t * getJoinHashBuffer (const ExecutorDeviceType device_type, const int device_id) const
 
void freeHashBufferMemory ()
 

Static Public Member Functions

static std::shared_ptr
< BoundingBoxIntersectJoinHashTable
getInstance (const std::shared_ptr< Analyzer::BinOper > condition, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)
 Make hash table from an in-flight SQL query's parse tree etc. More...
 
static void invalidateCache ()
 
static void markCachedItemAsDirty (size_t table_key)
 
static HashtableRecyclergetHashTableCache ()
 
static
BoundingBoxIntersectTuningParamRecycler
getBoundingBoxIntersectTuningParamCache ()
 
- Static Public Member Functions inherited from HashJoin
static size_t getMaximumNumHashEntriesCanHold (MemoryLevel memory_level, const Executor *executor, size_t rowid_size) noexcept
 
static std::string generateTooManyHashEntriesErrMsg (size_t num_entries, size_t threshold, MemoryLevel memory_level)
 
static bool layoutRequiresAdditionalBuffers (HashType layout) noexcept
 
static std::string getHashTypeString (HashType ht) noexcept
 
static HashJoinMatchingSet codegenMatchingSet (const std::vector< llvm::Value * > &hash_join_idx_args_in, const bool is_sharded, const bool col_is_nullable, const bool is_bw_eq, const int64_t sub_buff_size, Executor *executor, const bool is_bucketized=false)
 
static llvm::Value * codegenHashTableLoad (const size_t table_idx, Executor *executor)
 
static std::shared_ptr< HashJoingetInstance (const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)
 Make hash table from an in-flight SQL query's parse tree etc. More...
 
static std::shared_ptr< HashJoingetSyntheticInstance (std::string_view table1, std::string_view column1, const Catalog_Namespace::Catalog &catalog1, std::string_view table2, std::string_view column2, const Catalog_Namespace::Catalog &catalog2, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
 Make hash table from named tables and columns (such as for testing). More...
 
static std::shared_ptr< HashJoingetSyntheticInstance (const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
 Make hash table from named tables and columns (such as for testing). More...
 
static std::pair< std::string,
std::shared_ptr< HashJoin > > 
getSyntheticInstance (std::vector< std::shared_ptr< Analyzer::BinOper >>, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
 
static shared::TableKey getInnerTableId (const std::vector< InnerOuter > &inner_outer_pairs)
 
static bool canAccessHashTable (bool allow_hash_table_recycling, bool invalid_cache_key, JoinType join_type)
 
static void checkHashJoinReplicationConstraint (const shared::TableKey &table_key, const size_t shard_count, const Executor *executor)
 
static std::pair< InnerOuter,
InnerOuterStringOpInfos
normalizeColumnPair (const Analyzer::Expr *lhs, const Analyzer::Expr *rhs, const TemporaryTables *temporary_tables, const bool is_bbox_intersect=false)
 
template<typename T >
static const T * getHashJoinColumn (const Analyzer::Expr *expr)
 
static std::pair< std::vector
< InnerOuter >, std::vector
< InnerOuterStringOpInfos > > 
normalizeColumnPairs (const Analyzer::BinOper *condition, const TemporaryTables *temporary_tables)
 
static std::vector< int > collectFragmentIds (const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments)
 
static CompositeKeyInfo getCompositeKeyInfo (const std::vector< InnerOuter > &inner_outer_pairs, const Executor *executor, const std::vector< InnerOuterStringOpInfos > &inner_outer_string_op_infos_pairs={})
 
static std::vector< const
StringDictionaryProxy::IdMap * > 
translateCompositeStrDictProxies (const CompositeKeyInfo &composite_key_info, const std::vector< InnerOuterStringOpInfos > &string_op_infos_for_keys, const Executor *executor)
 
static std::pair< const
StringDictionaryProxy
*, StringDictionaryProxy * > 
getStrDictProxies (const InnerOuter &cols, const Executor *executor, const bool has_string_ops)
 
static const
StringDictionaryProxy::IdMap
translateInnerToOuterStrDictProxies (const InnerOuter &cols, const InnerOuterStringOpInfos &inner_outer_string_op_infos, ExpressionRange &old_col_range, const Executor *executor)
 

Protected Member Functions

void reify (const HashType preferred_layout)
 
virtual void reifyWithLayout (const HashType layout)
 
virtual void reifyImpl (std::vector< ColumnsForDevice > &columns_per_device, const Fragmenter_Namespace::TableInfo &query_info, const HashType layout, const size_t shard_count, const size_t entry_count, const size_t emitted_keys_count, const bool skip_hashtable_caching, const size_t chosen_max_hashtable_size, const double chosen_bucket_threshold)
 
void reifyForDevice (const ColumnsForDevice &columns_for_device, const HashType layout, const size_t entry_count, const size_t emitted_keys_count, const bool skip_hashtable_caching, const int device_id, const logger::ThreadLocalIds parent_thread_local_ids)
 
size_t calculateHashTableSize (size_t number_of_dimensions, size_t emitted_keys_count, size_t entry_count) const
 
ColumnsForDevice fetchColumnsForDevice (const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, DeviceAllocator *dev_buff_owner)
 
virtual std::pair< size_t, size_t > approximateTupleCount (const std::vector< double > &inverse_bucket_sizes_for_dimension, std::vector< ColumnsForDevice > &, const size_t chosen_max_hashtable_size, const double chosen_bucket_threshold)
 
virtual std::pair< size_t, size_t > computeHashTableCounts (const size_t shard_count, const std::vector< double > &inverse_bucket_sizes_for_dimension, std::vector< ColumnsForDevice > &columns_per_device, const size_t chosen_max_hashtable_size, const double chosen_bucket_threshold)
 
void setInverseBucketSizeInfo (const std::vector< double > &inverse_bucket_sizes, std::vector< ColumnsForDevice > &columns_per_device, const size_t device_count)
 
size_t getKeyComponentWidth () const
 
size_t getKeyComponentCount () const
 
HashType getHashType () const noexceptoverride
 
Data_Namespace::MemoryLevel getMemoryLevel () const noexceptoverride
 
int getDeviceCount () const noexceptoverride
 
std::shared_ptr
< BaselineHashTable
initHashTableOnCpu (const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_bucket_info, const BaselineHashTableEntryInfo hash_table_entry_info, const bool skip_hashtable_caching)
 
HashJoinMatchingSet codegenMatchingSet (const CompilationOptions &, const size_t) override
 
std::string toString (const ExecutorDeviceType device_type, const int device_id=0, bool raw=false) const override
 
DecodedJoinHashBufferSet toSet (const ExecutorDeviceType device_type, const int device_id) const override
 
llvm::Value * codegenSlot (const CompilationOptions &, const size_t) override
 
const RegisteredQueryHintgetRegisteredQueryHint ()
 
size_t getEntryCount () const
 
size_t getEmittedKeysCount () const
 
size_t getComponentBufferSize () const noexceptoverride
 
size_t shardCount () const
 
Data_Namespace::MemoryLevel getEffectiveMemoryLevel (const std::vector< InnerOuter > &inner_outer_pairs) const
 
shared::TableKey getInnerTableId () const noexceptoverride
 
int getInnerTableRteIdx () const noexceptoverride
 
size_t getKeyBufferSize () const noexcept
 
size_t offsetBufferOff () const noexceptoverride
 
size_t countBufferOff () const noexceptoverride
 
size_t payloadBufferOff () const noexceptoverride
 
std::string getHashJoinType () const final
 
bool isBitwiseEq () const override
 
std::shared_ptr< HashTableinitHashTableOnCpuFromCache (QueryPlanHash key, CacheItemType item_type, DeviceIdentifier device_identifier)
 
std::optional< std::pair
< size_t, size_t > > 
getApproximateTupleCountFromCache (QueryPlanHash key, CacheItemType item_type, DeviceIdentifier device_identifier)
 
void putHashTableOnCpuToCache (QueryPlanHash key, CacheItemType item_type, std::shared_ptr< HashTable > hashtable_ptr, DeviceIdentifier device_identifier, size_t hashtable_building_time)
 
llvm::Value * codegenKey (const CompilationOptions &)
 
std::vector< llvm::Value * > codegenManyKey (const CompilationOptions &)
 
std::optional
< BoundingBoxIntersectMetaInfo
getBoundingBoxIntersectMetaInfo ()
 
QueryPlanHash getAlternativeCacheKey (AlternativeCacheKeyForBoundingBoxIntersection &info)
 
void generateCacheKey (const size_t max_hashtable_size, const double bucket_threshold, const std::vector< double > &bucket_sizes, std::vector< std::vector< Fragmenter_Namespace::FragmentInfo >> &fragments_per_device, int device_count)
 
QueryPlanHash getCacheKey (int device_id) const
 
const std::vector< InnerOuter > & getInnerOuterPairs () const
 
void setBoundingBoxIntersectionMetaInfo (size_t max_table_size_bytes, double bucket_threshold, std::vector< double > &bucket_sizes)
 

Protected Attributes

const std::shared_ptr
< Analyzer::BinOper
condition_
 
const JoinType join_type_
 
const std::vector
< InputTableInfo > & 
query_infos_
 
const Data_Namespace::MemoryLevel memory_level_
 
Executorexecutor_
 
ColumnCacheMapcolumn_cache_
 
std::vector< InnerOuterinner_outer_pairs_
 
const int device_count_
 
RegisteredQueryHint query_hints_
 
std::vector< double > inverse_bucket_sizes_for_dimension_
 
double chosen_bbox_intersect_bucket_threshold_
 
size_t chosen_bbox_intersect_max_table_size_bytes_
 
CompositeKeyInfo composite_key_info_
 
std::optional< HashTypelayout_override_
 
std::mutex cpu_hash_table_buff_mutex_
 
HashTableBuildDagMap hashtable_build_dag_map_
 
QueryPlanDAG query_plan_dag_
 
std::vector< QueryPlanHashhashtable_cache_key_
 
HashtableCacheMetaInfo hashtable_cache_meta_info_
 
std::unordered_set< size_t > table_keys_
 
const TableIdToNodeMap table_id_to_node_map_
 
- Protected Attributes inherited from HashJoin
std::vector< std::shared_ptr
< HashTable > > 
hash_tables_for_device_
 

Static Protected Attributes

static std::unique_ptr
< HashtableRecycler
hash_table_cache_
 
static std::unique_ptr
< BoundingBoxIntersectTuningParamRecycler
auto_tuner_cache_
 

Additional Inherited Members

- Static Public Attributes inherited from HashJoin
static constexpr size_t MAX_NUM_HASH_ENTRIES = size_t(1) << 31
 
- Static Protected Member Functions inherited from HashJoin
static llvm::Value * codegenColOrStringOper (const Analyzer::Expr *col_or_string_oper, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos, CodeGenerator &code_generator, const CompilationOptions &co)
 

Detailed Description

Definition at line 24 of file BoundingBoxIntersectJoinHashTable.h.

Constructor & Destructor Documentation

BoundingBoxIntersectJoinHashTable::BoundingBoxIntersectJoinHashTable ( const std::shared_ptr< Analyzer::BinOper condition,
const JoinType  join_type,
const std::vector< InputTableInfo > &  query_infos,
const Data_Namespace::MemoryLevel  memory_level,
ColumnCacheMap column_cache,
Executor executor,
const std::vector< InnerOuter > &  inner_outer_pairs,
const int  device_count,
const RegisteredQueryHint query_hints,
const HashTableBuildDagMap hashtable_build_dag_map,
const TableIdToNodeMap table_id_to_node_map 
)
inline

Definition at line 26 of file BoundingBoxIntersectJoinHashTable.h.

References CHECK_GT, device_count_, and HashJoin::hash_tables_for_device_.

37  : condition_(condition)
38  , join_type_(join_type)
39  , query_infos_(query_infos)
40  , memory_level_(memory_level)
41  , executor_(executor)
42  , column_cache_(column_cache)
43  , inner_outer_pairs_(inner_outer_pairs)
44  , device_count_(device_count)
45  , query_hints_(query_hints)
46  , hashtable_build_dag_map_(hashtable_build_dag_map)
47  , table_id_to_node_map_(table_id_to_node_map) {
49  hash_tables_for_device_.resize(std::max(device_count_, 1));
50  }
std::vector< std::shared_ptr< HashTable > > hash_tables_for_device_
Definition: HashJoin.h:377
const std::shared_ptr< Analyzer::BinOper > condition_
#define CHECK_GT(x, y)
Definition: Logger.h:305
const Data_Namespace::MemoryLevel memory_level_
const std::vector< InputTableInfo > & query_infos_
virtual BoundingBoxIntersectJoinHashTable::~BoundingBoxIntersectJoinHashTable ( )
inlinevirtual

Definition at line 52 of file BoundingBoxIntersectJoinHashTable.h.

52 {}

Member Function Documentation

std::pair< size_t, size_t > BoundingBoxIntersectJoinHashTable::approximateTupleCount ( const std::vector< double > &  inverse_bucket_sizes_for_dimension,
std::vector< ColumnsForDevice > &  columns_per_device,
const size_t  chosen_max_hashtable_size,
const double  chosen_bucket_threshold 
)
protectedvirtual

Reimplemented in RangeJoinHashTable.

Definition at line 1004 of file BoundingBoxIntersectJoinHashTable.cpp.

References gpu_enabled::accumulate(), approximate_distinct_tuples_bbox_intersect(), approximate_distinct_tuples_on_device_bbox_intersect(), threading_serial::async(), BBOX_INTERSECT_HT, Bitmap, CHECK, CHECK_EQ, CHECK_GT, CPU, DataRecyclerUtil::CPU_DEVICE_IDENTIFIER, Data_Namespace::CPU_LEVEL, cpu_threads(), device_count_, executor_, getApproximateTupleCountFromCache(), getEffectiveMemoryLevel(), getQueryEngineCudaStreamForDevice(), GPU, Data_Namespace::GPU_LEVEL, hashtable_cache_key_, hll_size(), hll_unify(), inner_outer_pairs_, transfer_flat_object_to_gpu(), transfer_vector_of_flat_objects_to_gpu(), UNREACHABLE, and VLOG.

Referenced by computeHashTableCounts().

1008  {
1009  const auto effective_memory_level = getEffectiveMemoryLevel(inner_outer_pairs_);
1010  CountDistinctDescriptor count_distinct_desc{
1012  0,
1013  0,
1014  11,
1015  true,
1016  effective_memory_level == Data_Namespace::MemoryLevel::GPU_LEVEL
1019  1};
1020  const auto padded_size_bytes = count_distinct_desc.bitmapPaddedSizeBytes();
1021 
1022  CHECK(!columns_per_device.empty() && !columns_per_device.front().join_columns.empty());
1023  if (columns_per_device.front().join_columns.front().num_elems == 0) {
1024  return std::make_pair(0, 0);
1025  }
1026 
1027  // TODO: state management in here should be revisited, but this should be safe enough
1028  // for now
1029  // re-compute bucket counts per device based on global bucket size
1030  for (size_t device_id = 0; device_id < columns_per_device.size(); ++device_id) {
1031  auto& columns_for_device = columns_per_device[device_id];
1032  columns_for_device.setBucketInfo(inverse_bucket_sizes_for_dimension,
1034  }
1035 
1036  // Number of keys must match dimension of buckets
1037  CHECK_EQ(columns_per_device.front().join_columns.size(),
1038  columns_per_device.front().join_buckets.size());
1039  if (effective_memory_level == Data_Namespace::MemoryLevel::CPU_LEVEL) {
1040  // Note that this path assumes each device has the same hash table (for GPU hash
1041  // join w/ hash table built on CPU)
1042  const auto cached_count_info =
1046  if (cached_count_info) {
1047  VLOG(1) << "Using a cached tuple count: " << cached_count_info->first
1048  << ", emitted keys count: " << cached_count_info->second;
1049  return *cached_count_info;
1050  }
1051  int thread_count = cpu_threads();
1052  std::vector<uint8_t> hll_buffer_all_cpus(thread_count * padded_size_bytes);
1053  auto hll_result = &hll_buffer_all_cpus[0];
1054 
1055  std::vector<int32_t> num_keys_for_row;
1056  // TODO(adb): support multi-column bounding box intersection
1057  num_keys_for_row.resize(columns_per_device.front().join_columns[0].num_elems);
1058 
1060  hll_result,
1061  num_keys_for_row,
1062  count_distinct_desc.bitmap_sz_bits,
1063  padded_size_bytes,
1064  columns_per_device.front().join_columns,
1065  columns_per_device.front().join_column_types,
1066  columns_per_device.front().join_buckets,
1067  thread_count);
1068  for (int i = 1; i < thread_count; ++i) {
1069  hll_unify(hll_result,
1070  hll_result + i * padded_size_bytes,
1071  size_t(1) << count_distinct_desc.bitmap_sz_bits);
1072  }
1073  return std::make_pair(
1074  hll_size(hll_result, count_distinct_desc.bitmap_sz_bits),
1075  static_cast<size_t>(num_keys_for_row.size() > 0 ? num_keys_for_row.back() : 0));
1076  }
1077 #ifdef HAVE_CUDA
1078  auto data_mgr = executor_->getDataMgr();
1079  std::vector<std::vector<uint8_t>> host_hll_buffers(device_count_);
1080  for (auto& host_hll_buffer : host_hll_buffers) {
1081  host_hll_buffer.resize(count_distinct_desc.bitmapPaddedSizeBytes());
1082  }
1083  std::vector<size_t> emitted_keys_count_device_threads(device_count_, 0);
1084  std::vector<std::future<void>> approximate_distinct_device_threads;
1085  for (int device_id = 0; device_id < device_count_; ++device_id) {
1086  approximate_distinct_device_threads.emplace_back(std::async(
1088  [device_id,
1089  &columns_per_device,
1090  &count_distinct_desc,
1091  data_mgr,
1092  &host_hll_buffers,
1093  &emitted_keys_count_device_threads] {
1094  auto allocator = std::make_unique<CudaAllocator>(
1095  data_mgr, device_id, getQueryEngineCudaStreamForDevice(device_id));
1096  auto device_hll_buffer =
1097  allocator->alloc(count_distinct_desc.bitmapPaddedSizeBytes());
1098  data_mgr->getCudaMgr()->zeroDeviceMem(
1099  device_hll_buffer,
1100  count_distinct_desc.bitmapPaddedSizeBytes(),
1101  device_id,
1103  const auto& columns_for_device = columns_per_device[device_id];
1104  auto join_columns_gpu = transfer_vector_of_flat_objects_to_gpu(
1105  columns_for_device.join_columns, *allocator);
1106 
1107  CHECK_GT(columns_for_device.join_buckets.size(), 0u);
1108  const auto& inverse_bucket_sizes_for_dimension =
1109  columns_for_device.join_buckets[0].inverse_bucket_sizes_for_dimension;
1110  auto inverse_bucket_sizes_gpu = allocator->alloc(
1111  inverse_bucket_sizes_for_dimension.size() * sizeof(double));
1112  allocator->copyToDevice(
1113  inverse_bucket_sizes_gpu,
1114  inverse_bucket_sizes_for_dimension.data(),
1115  inverse_bucket_sizes_for_dimension.size() * sizeof(double));
1116  const size_t row_counts_buffer_sz =
1117  columns_per_device.front().join_columns[0].num_elems * sizeof(int32_t);
1118  auto row_counts_buffer = allocator->alloc(row_counts_buffer_sz);
1119  data_mgr->getCudaMgr()->zeroDeviceMem(
1120  row_counts_buffer,
1121  row_counts_buffer_sz,
1122  device_id,
1124  const auto key_handler = BoundingBoxIntersectKeyHandler(
1125  inverse_bucket_sizes_for_dimension.size(),
1126  join_columns_gpu,
1127  reinterpret_cast<double*>(inverse_bucket_sizes_gpu));
1128  const auto key_handler_gpu =
1129  transfer_flat_object_to_gpu(key_handler, *allocator);
1131  reinterpret_cast<uint8_t*>(device_hll_buffer),
1132  count_distinct_desc.bitmap_sz_bits,
1133  reinterpret_cast<int32_t*>(row_counts_buffer),
1134  key_handler_gpu,
1135  columns_for_device.join_columns[0].num_elems);
1136 
1137  auto& host_emitted_keys_count = emitted_keys_count_device_threads[device_id];
1138  allocator->copyFromDevice(
1139  &host_emitted_keys_count,
1140  row_counts_buffer +
1141  (columns_per_device.front().join_columns[0].num_elems - 1) *
1142  sizeof(int32_t),
1143  sizeof(int32_t));
1144 
1145  auto& host_hll_buffer = host_hll_buffers[device_id];
1146  allocator->copyFromDevice(&host_hll_buffer[0],
1147  device_hll_buffer,
1148  count_distinct_desc.bitmapPaddedSizeBytes());
1149  }));
1150  }
1151  for (auto& child : approximate_distinct_device_threads) {
1152  child.get();
1153  }
1154  CHECK_EQ(Data_Namespace::MemoryLevel::GPU_LEVEL, effective_memory_level);
1155  auto& result_hll_buffer = host_hll_buffers.front();
1156  auto hll_result = reinterpret_cast<int32_t*>(&result_hll_buffer[0]);
1157  for (int device_id = 1; device_id < device_count_; ++device_id) {
1158  auto& host_hll_buffer = host_hll_buffers[device_id];
1159  hll_unify(hll_result,
1160  reinterpret_cast<int32_t*>(&host_hll_buffer[0]),
1161  size_t(1) << count_distinct_desc.bitmap_sz_bits);
1162  }
1163  const size_t emitted_keys_count =
1164  std::accumulate(emitted_keys_count_device_threads.begin(),
1165  emitted_keys_count_device_threads.end(),
1166  0);
1167  return std::make_pair(hll_size(hll_result, count_distinct_desc.bitmap_sz_bits),
1168  emitted_keys_count);
1169 #else
1170  UNREACHABLE();
1171  return {0, 0};
1172 #endif // HAVE_CUDA
1173 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
Data_Namespace::MemoryLevel getEffectiveMemoryLevel(const std::vector< InnerOuter > &inner_outer_pairs) const
T * transfer_flat_object_to_gpu(const T &object, DeviceAllocator &allocator)
void hll_unify(T1 *lhs, T2 *rhs, const size_t m)
Definition: HyperLogLog.h:107
#define UNREACHABLE()
Definition: Logger.h:338
size_t hll_size(const T *M, const size_t bitmap_sz_bits)
Definition: HyperLogLog.h:88
#define CHECK_GT(x, y)
Definition: Logger.h:305
future< Result > async(Fn &&fn, Args &&...args)
DEVICE auto accumulate(ARGS &&...args)
Definition: gpu_enabled.h:42
void approximate_distinct_tuples_on_device_bbox_intersect(uint8_t *hll_buffer, const uint32_t b, int32_t *row_counts_buffer, const BoundingBoxIntersectKeyHandler *key_handler, const int64_t num_elems)
void approximate_distinct_tuples_bbox_intersect(uint8_t *hll_buffer_all_cpus, std::vector< int32_t > &row_counts, const uint32_t b, const size_t padded_size_bytes, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_buckets_per_key, const int thread_count)
CUstream getQueryEngineCudaStreamForDevice(int device_num)
Definition: QueryEngine.cpp:7
#define CHECK(condition)
Definition: Logger.h:291
std::optional< std::pair< size_t, size_t > > getApproximateTupleCountFromCache(QueryPlanHash key, CacheItemType item_type, DeviceIdentifier device_identifier)
T * transfer_vector_of_flat_objects_to_gpu(const std::vector< T > &vec, DeviceAllocator &allocator)
static constexpr DeviceIdentifier CPU_DEVICE_IDENTIFIER
Definition: DataRecycler.h:136
int cpu_threads()
Definition: thread_count.h:25
#define VLOG(n)
Definition: Logger.h:388

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t BoundingBoxIntersectJoinHashTable::calculateHashTableSize ( size_t  number_of_dimensions,
size_t  emitted_keys_count,
size_t  entry_count 
) const
protected

Definition at line 932 of file BoundingBoxIntersectJoinHashTable.cpp.

References getKeyComponentWidth().

Referenced by RangeJoinHashTable::reifyWithLayout(), and reifyWithLayout().

935  {
936  const auto key_component_width = getKeyComponentWidth();
937  const auto key_component_count = number_of_dimensions;
938  const auto entry_size = key_component_count * key_component_width;
939  const auto keys_for_all_rows = emitted_keys_count;
940  const size_t one_to_many_hash_entries = 2 * entry_count + keys_for_all_rows;
941  const size_t hash_table_size =
942  entry_size * entry_count + one_to_many_hash_entries * sizeof(int32_t);
943  return hash_table_size;
944 }

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * BoundingBoxIntersectJoinHashTable::codegenKey ( const CompilationOptions co)
protected

Definition at line 1482 of file BoundingBoxIntersectJoinHashTable.cpp.

References AUTOMATIC_IR_METADATA, CodeGenerator::castArrayPointer(), CHECK, CHECK_EQ, CodeGenerator::codegen(), Catalog_Namespace::get_metadata_for_column(), getKeyComponentCount(), getKeyComponentWidth(), inner_outer_pairs_, inverse_bucket_sizes_for_dimension_, kPOINT, kTINYINT, LL_BUILDER, LL_CONTEXT, LL_INT, CodeGenerator::posArg(), and UNREACHABLE.

Referenced by codegenMatchingSet().

1482  {
1483  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1484  const auto key_component_width = getKeyComponentWidth();
1485  CHECK(key_component_width == 4 || key_component_width == 8);
1486  const auto key_size_lv = LL_INT(getKeyComponentCount() * key_component_width);
1487  llvm::Value* key_buff_lv{nullptr};
1488  switch (key_component_width) {
1489  case 4:
1490  key_buff_lv =
1491  LL_BUILDER.CreateAlloca(llvm::Type::getInt32Ty(LL_CONTEXT), key_size_lv);
1492  break;
1493  case 8:
1494  key_buff_lv =
1495  LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT), key_size_lv);
1496  break;
1497  default:
1498  CHECK(false);
1499  }
1500 
1501  const auto& inner_outer_pair = inner_outer_pairs_[0];
1502  const auto outer_geo = inner_outer_pair.second;
1503  const auto outer_geo_ti = outer_geo->get_type_info();
1504 
1505  llvm::Value* arr_ptr = nullptr;
1506  CodeGenerator code_generator(executor_);
1507  CHECK_EQ(inverse_bucket_sizes_for_dimension_.size(), static_cast<size_t>(2));
1508 
1509  if (outer_geo_ti.is_geometry()) {
1510  // TODO(adb): for points we will use the coords array, but for other geometries we
1511  // will need to use the bounding box. For now only support points.
1512  CHECK_EQ(outer_geo_ti.get_type(), kPOINT);
1513 
1514  if (const auto outer_geo_col = dynamic_cast<const Analyzer::ColumnVar*>(outer_geo)) {
1515  const auto outer_geo_col_lvs = code_generator.codegen(outer_geo_col, true, co);
1516  CHECK_EQ(outer_geo_col_lvs.size(), size_t(1));
1517  auto column_key = outer_geo_col->getColumnKey();
1518  column_key.column_id = column_key.column_id + 1;
1519  const auto coords_cd = Catalog_Namespace::get_metadata_for_column(column_key);
1520  CHECK(coords_cd);
1521 
1522  const auto array_ptr = executor_->cgen_state_->emitExternalCall(
1523  "array_buff",
1524  llvm::Type::getInt8PtrTy(executor_->cgen_state_->context_),
1525  {outer_geo_col_lvs.front(), code_generator.posArg(outer_geo_col)});
1526  CHECK(coords_cd->columnType.get_elem_type().get_type() == kTINYINT)
1527  << "Bounding box intersection only supports TINYINT coordinates columns.";
1528  arr_ptr = code_generator.castArrayPointer(array_ptr,
1529  coords_cd->columnType.get_elem_type());
1530  } else if (const auto outer_geo_function_operator =
1531  dynamic_cast<const Analyzer::GeoOperator*>(outer_geo)) {
1532  // Process points dynamically constructed by geo function operators
1533  const auto outer_geo_function_operator_lvs =
1534  code_generator.codegen(outer_geo_function_operator, true, co);
1535  CHECK_EQ(outer_geo_function_operator_lvs.size(), size_t(2));
1536  arr_ptr = outer_geo_function_operator_lvs.front();
1537  } else if (const auto outer_geo_expr =
1538  dynamic_cast<const Analyzer::GeoExpr*>(outer_geo)) {
1539  UNREACHABLE() << outer_geo_expr->toString();
1540  }
1541  } else if (outer_geo_ti.is_fixlen_array()) {
1542  // Process dynamically constructed points
1543  const auto outer_geo_cast_coord_array =
1544  dynamic_cast<const Analyzer::UOper*>(outer_geo);
1545  CHECK_EQ(outer_geo_cast_coord_array->get_optype(), kCAST);
1546  const auto outer_geo_coord_array = dynamic_cast<const Analyzer::ArrayExpr*>(
1547  outer_geo_cast_coord_array->get_operand());
1548  CHECK(outer_geo_coord_array);
1549  CHECK(outer_geo_coord_array->isLocalAlloc());
1550  CHECK_EQ(outer_geo_coord_array->getElementCount(), 2);
1551  auto elem_size = (outer_geo_ti.get_compression() == kENCODING_GEOINT)
1552  ? sizeof(int32_t)
1553  : sizeof(double);
1554  CHECK_EQ(outer_geo_ti.get_size(), int(2 * elem_size));
1555  const auto outer_geo_constructed_lvs = code_generator.codegen(outer_geo, true, co);
1556  // CHECK_EQ(outer_geo_constructed_lvs.size(), size_t(2)); // Pointer and size
1557  const auto array_ptr = outer_geo_constructed_lvs.front(); // Just need the pointer
1558  arr_ptr = LL_BUILDER.CreateGEP(
1559  array_ptr->getType()->getScalarType()->getPointerElementType(),
1560  array_ptr,
1561  LL_INT(0));
1562  arr_ptr = code_generator.castArrayPointer(array_ptr, SQLTypeInfo(kTINYINT, true));
1563  }
1564  if (!arr_ptr) {
1565  LOG(FATAL)
1566  << "Bounding box intersection currently only supports geospatial columns and "
1567  "constructed points.";
1568  }
1569 
1570  for (size_t i = 0; i < 2; i++) {
1571  const auto key_comp_dest_lv = LL_BUILDER.CreateGEP(
1572  key_buff_lv->getType()->getScalarType()->getPointerElementType(),
1573  key_buff_lv,
1574  LL_INT(i));
1575 
1576  // Note that get_bucket_key_for_range_compressed will need to be specialized for
1577  // future compression schemes
1578  auto bucket_key =
1579  outer_geo_ti.get_compression() == kENCODING_GEOINT
1580  ? executor_->cgen_state_->emitExternalCall(
1581  "get_bucket_key_for_range_compressed",
1582  get_int_type(64, LL_CONTEXT),
1584  : executor_->cgen_state_->emitExternalCall(
1585  "get_bucket_key_for_range_double",
1586  get_int_type(64, LL_CONTEXT),
1587  {arr_ptr, LL_INT(i), LL_FP(inverse_bucket_sizes_for_dimension_[i])});
1588  const auto col_lv = LL_BUILDER.CreateSExt(
1589  bucket_key, get_int_type(key_component_width * 8, LL_CONTEXT));
1590  LL_BUILDER.CreateStore(col_lv, key_comp_dest_lv);
1591  }
1592  return key_buff_lv;
1593 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
#define LOG(tag)
Definition: Logger.h:285
#define UNREACHABLE()
Definition: Logger.h:338
const ColumnDescriptor * get_metadata_for_column(const ::shared::ColumnKey &column_key)
Definition: sqldefs.h:48
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector< llvm::Value * > BoundingBoxIntersectJoinHashTable::codegenManyKey ( const CompilationOptions co)
protected

Definition at line 1595 of file BoundingBoxIntersectJoinHashTable.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CHECK_EQ, CodeGenerator::codegen(), Catalog_Namespace::get_metadata_for_column(), HashJoin::getHashTableForDevice(), getHashType(), getKeyComponentWidth(), inner_outer_pairs_, ManyToMany, CodeGenerator::posArg(), and VLOG.

Referenced by codegenMatchingSet().

1596  {
1597  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1598  const auto key_component_width = getKeyComponentWidth();
1599  CHECK(key_component_width == 4 || key_component_width == 8);
1600  auto hash_table = getHashTableForDevice(size_t(0));
1601  CHECK(hash_table);
1603 
1604  VLOG(1) << "Performing codgen for ManyToMany";
1605  const auto& inner_outer_pair = inner_outer_pairs_[0];
1606  const auto outer_col = inner_outer_pair.second;
1607 
1608  CodeGenerator code_generator(executor_);
1609  const auto col_lvs = code_generator.codegen(outer_col, true, co);
1610  CHECK_EQ(col_lvs.size(), size_t(1));
1611 
1612  const auto outer_col_var = dynamic_cast<const Analyzer::ColumnVar*>(outer_col);
1613  CHECK(outer_col_var);
1614  const auto coords_cd =
1615  Catalog_Namespace::get_metadata_for_column(outer_col_var->getColumnKey());
1616  CHECK(coords_cd);
1617 
1618  const auto array_ptr = executor_->cgen_state_->emitExternalCall(
1619  "array_buff",
1620  llvm::Type::getInt8PtrTy(executor_->cgen_state_->context_),
1621  {col_lvs.front(), code_generator.posArg(outer_col)});
1622 
1623  // TODO(jclay): this seems to cast to double, and causes the GPU build to fail.
1624  // const auto arr_ptr =
1625  // code_generator.castArrayPointer(array_ptr,
1626  // coords_cd->columnType.get_elem_type());
1627  array_ptr->setName("array_ptr");
1628 
1629  auto num_keys_lv = executor_->cgen_state_->emitExternalCall(
1630  "get_num_buckets_for_bounds",
1631  get_int_type(32, LL_CONTEXT),
1632  {array_ptr,
1633  LL_INT(0),
1636  num_keys_lv->setName("num_keys_lv");
1637 
1638  return {num_keys_lv, array_ptr};
1639 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
const ColumnDescriptor * get_metadata_for_column(const ::shared::ColumnKey &column_key)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
HashTable * getHashTableForDevice(const size_t device_id) const
Definition: HashJoin.h:295
HashType getHashType() const noexceptoverride
#define CHECK(condition)
Definition: Logger.h:291
#define VLOG(n)
Definition: Logger.h:388

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

HashJoinMatchingSet BoundingBoxIntersectJoinHashTable::codegenMatchingSet ( const CompilationOptions co,
const size_t  index 
)
overrideprotectedvirtual

Implements HashJoin.

Definition at line 1641 of file BoundingBoxIntersectJoinHashTable.cpp.

References AUTOMATIC_IR_METADATA, CHECK, HashJoin::codegenHashTableLoad(), codegenKey(), codegenManyKey(), HashJoin::codegenMatchingSet(), executor_, get_int_array_type(), get_int_type(), getComponentBufferSize(), getEntryCount(), getHashType(), getKeyComponentCount(), getKeyComponentWidth(), inverse_bucket_sizes_for_dimension_, LL_BUILDER, LL_CONTEXT, LL_FP, LL_INT, ManyToMany, offsetBufferOff(), OneToMany, to_string(), UNREACHABLE, and VLOG.

1643  {
1644  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1645  if (getHashType() == HashType::ManyToMany) {
1646  VLOG(1) << "Building codegenMatchingSet for ManyToMany";
1647  const auto key_component_width = getKeyComponentWidth();
1648  CHECK(key_component_width == 4 || key_component_width == 8);
1649  auto many_to_many_args = codegenManyKey(co);
1650  auto hash_ptr = HashJoin::codegenHashTableLoad(index, executor_);
1651  const auto composite_dict_ptr_type =
1652  llvm::Type::getIntNPtrTy(LL_CONTEXT, key_component_width * 8);
1653  const auto composite_key_dict =
1654  hash_ptr->getType()->isPointerTy()
1655  ? LL_BUILDER.CreatePointerCast(hash_ptr, composite_dict_ptr_type)
1656  : LL_BUILDER.CreateIntToPtr(hash_ptr, composite_dict_ptr_type);
1657  const auto key_component_count = getKeyComponentCount();
1658 
1659  auto one_to_many_ptr = hash_ptr;
1660 
1661  if (one_to_many_ptr->getType()->isPointerTy()) {
1662  one_to_many_ptr =
1663  LL_BUILDER.CreatePtrToInt(hash_ptr, llvm::Type::getInt64Ty(LL_CONTEXT));
1664  } else {
1665  CHECK(one_to_many_ptr->getType()->isIntegerTy(64));
1666  }
1667 
1668  const auto composite_key_dict_size = offsetBufferOff();
1669  one_to_many_ptr =
1670  LL_BUILDER.CreateAdd(one_to_many_ptr, LL_INT(composite_key_dict_size));
1671 
1672  // NOTE(jclay): A fixed array of size 200 is allocated on the stack.
1673  // this is likely the maximum value we can do that is safe to use across
1674  // all supported GPU architectures.
1675  const int max_array_size = 200;
1676  const auto arr_type = get_int_array_type(32, max_array_size, LL_CONTEXT);
1677  const auto out_arr_lv = LL_BUILDER.CreateAlloca(arr_type);
1678  out_arr_lv->setName("out_arr");
1679 
1680  const auto casted_out_arr_lv =
1681  LL_BUILDER.CreatePointerCast(out_arr_lv, arr_type->getPointerTo());
1682 
1683  const auto element_ptr = LL_BUILDER.CreateGEP(arr_type, casted_out_arr_lv, LL_INT(0));
1684 
1685  auto rowid_ptr_i32 =
1686  LL_BUILDER.CreatePointerCast(element_ptr, llvm::Type::getInt32PtrTy(LL_CONTEXT));
1687 
1688  const auto candidate_count_lv = executor_->cgen_state_->emitExternalCall(
1689  "get_candidate_rows",
1690  llvm::Type::getInt64Ty(LL_CONTEXT),
1691  {
1692  rowid_ptr_i32,
1693  LL_INT(max_array_size),
1694  many_to_many_args[1],
1695  LL_INT(0),
1698  many_to_many_args[0],
1699  LL_INT(key_component_count), // key_component_count
1700  composite_key_dict, // ptr to hash table
1701  LL_INT(getEntryCount()), // entry_count
1702  LL_INT(composite_key_dict_size), // offset_buffer_ptr_offset
1703  LL_INT(getEntryCount() * sizeof(int32_t)) // sub_buff_size
1704  });
1705 
1706  const auto slot_lv = LL_INT(int64_t(0));
1707 
1708  return {rowid_ptr_i32, candidate_count_lv, slot_lv};
1709  } else {
1710  VLOG(1) << "Building codegenMatchingSet for Baseline";
1711  // TODO: duplicated w/ BaselineJoinHashTable -- push into the hash table builder?
1712  const auto key_component_width = getKeyComponentWidth();
1713  CHECK(key_component_width == 4 || key_component_width == 8);
1714  auto key_buff_lv = codegenKey(co);
1716  auto hash_ptr = HashJoin::codegenHashTableLoad(index, executor_);
1717  const auto composite_dict_ptr_type =
1718  llvm::Type::getIntNPtrTy(LL_CONTEXT, key_component_width * 8);
1719  const auto composite_key_dict =
1720  hash_ptr->getType()->isPointerTy()
1721  ? LL_BUILDER.CreatePointerCast(hash_ptr, composite_dict_ptr_type)
1722  : LL_BUILDER.CreateIntToPtr(hash_ptr, composite_dict_ptr_type);
1723  const auto key_component_count = getKeyComponentCount();
1724  const auto key = executor_->cgen_state_->emitExternalCall(
1725  "get_composite_key_index_" + std::to_string(key_component_width * 8),
1726  get_int_type(64, LL_CONTEXT),
1727  {key_buff_lv,
1728  LL_INT(key_component_count),
1729  composite_key_dict,
1730  LL_INT(getEntryCount())});
1731  auto one_to_many_ptr = hash_ptr;
1732  if (one_to_many_ptr->getType()->isPointerTy()) {
1733  one_to_many_ptr =
1734  LL_BUILDER.CreatePtrToInt(hash_ptr, llvm::Type::getInt64Ty(LL_CONTEXT));
1735  } else {
1736  CHECK(one_to_many_ptr->getType()->isIntegerTy(64));
1737  }
1738  const auto composite_key_dict_size = offsetBufferOff();
1739  one_to_many_ptr =
1740  LL_BUILDER.CreateAdd(one_to_many_ptr, LL_INT(composite_key_dict_size));
1742  std::vector<llvm::Value*>{
1743  one_to_many_ptr, key, LL_INT(int64_t(0)), LL_INT(getEntryCount() - 1)},
1744  false,
1745  false,
1746  false,
1748  executor_);
1749  }
1750  UNREACHABLE();
1751  return HashJoinMatchingSet{};
1752 }
virtual HashJoinMatchingSet codegenMatchingSet(const CompilationOptions &, const size_t)=0
static llvm::Value * codegenHashTableLoad(const size_t table_idx, Executor *executor)
Definition: HashJoin.cpp:259
llvm::Value * codegenKey(const CompilationOptions &)
#define UNREACHABLE()
Definition: Logger.h:338
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
std::vector< llvm::Value * > codegenManyKey(const CompilationOptions &)
std::string to_string(char const *&&v)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
HashType getHashType() const noexceptoverride
#define CHECK(condition)
Definition: Logger.h:291
llvm::ArrayType * get_int_array_type(int const width, int count, llvm::LLVMContext &context)
#define VLOG(n)
Definition: Logger.h:388
size_t getComponentBufferSize() const noexceptoverride

+ Here is the call graph for this function:

llvm::Value* BoundingBoxIntersectJoinHashTable::codegenSlot ( const CompilationOptions ,
const size_t   
)
inlineoverrideprotectedvirtual

Implements HashJoin.

Definition at line 204 of file BoundingBoxIntersectJoinHashTable.h.

References UNREACHABLE.

204  {
205  UNREACHABLE(); // not applicable for bounding box intersection
206  return nullptr;
207  }
#define UNREACHABLE()
Definition: Logger.h:338
std::pair< size_t, size_t > BoundingBoxIntersectJoinHashTable::computeHashTableCounts ( const size_t  shard_count,
const std::vector< double > &  inverse_bucket_sizes_for_dimension,
std::vector< ColumnsForDevice > &  columns_per_device,
const size_t  chosen_max_hashtable_size,
const double  chosen_bucket_threshold 
)
protectedvirtual

Definition at line 985 of file BoundingBoxIntersectJoinHashTable.cpp.

References approximateTupleCount(), CHECK, device_count_, get_entries_per_device(), and memory_level_.

Referenced by reifyWithLayout().

990  {
991  CHECK(!inverse_bucket_sizes_for_dimension.empty());
992  const auto [tuple_count, emitted_keys_count] =
993  approximateTupleCount(inverse_bucket_sizes_for_dimension,
994  columns_per_device,
995  chosen_max_hashtable_size,
996  chosen_bucket_threshold);
997  const auto entry_count = 2 * std::max(tuple_count, size_t(1));
998 
999  return std::make_pair(
1000  get_entries_per_device(entry_count, shard_count, device_count_, memory_level_),
1001  emitted_keys_count);
1002 }
virtual std::pair< size_t, size_t > approximateTupleCount(const std::vector< double > &inverse_bucket_sizes_for_dimension, std::vector< ColumnsForDevice > &, const size_t chosen_max_hashtable_size, const double chosen_bucket_threshold)
const Data_Namespace::MemoryLevel memory_level_
size_t get_entries_per_device(const size_t total_entries, const size_t shard_count, const size_t device_count, const Data_Namespace::MemoryLevel memory_level)
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t BoundingBoxIntersectJoinHashTable::countBufferOff ( ) const
inlineoverrideprotectedvirtualnoexcept

Implements HashJoin.

Definition at line 266 of file BoundingBoxIntersectJoinHashTable.h.

References getComponentBufferSize(), getHashType(), getKeyBufferSize(), HashJoin::layoutRequiresAdditionalBuffers(), and offsetBufferOff().

Referenced by payloadBufferOff(), toSet(), and toString().

266  {
269  } else {
270  return getKeyBufferSize();
271  }
272  }
HashType getHashType() const noexceptoverride
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
Definition: HashJoin.h:175
size_t getComponentBufferSize() const noexceptoverride

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ColumnsForDevice BoundingBoxIntersectJoinHashTable::fetchColumnsForDevice ( const std::vector< Fragmenter_Namespace::FragmentInfo > &  fragments,
const int  device_id,
DeviceAllocator dev_buff_owner 
)
protected

Definition at line 946 of file BoundingBoxIntersectJoinHashTable.cpp.

References CHECK, column_cache_, executor_, HashJoin::fetchJoinColumn(), get_column_descriptor_maybe(), get_join_column_type_kind(), getEffectiveMemoryLevel(), and inner_outer_pairs_.

Referenced by RangeJoinHashTable::reifyWithLayout(), and reifyWithLayout().

949  {
950  const auto effective_memory_level = getEffectiveMemoryLevel(inner_outer_pairs_);
951 
952  std::vector<JoinColumn> join_columns;
953  std::vector<std::shared_ptr<Chunk_NS::Chunk>> chunks_owner;
954  std::vector<JoinColumnTypeInfo> join_column_types;
955  std::vector<std::shared_ptr<void>> malloc_owner;
956  for (const auto& inner_outer_pair : inner_outer_pairs_) {
957  const auto inner_col = inner_outer_pair.first;
958  const auto inner_cd = get_column_descriptor_maybe(inner_col->getColumnKey());
959  if (inner_cd && inner_cd->isVirtualCol) {
961  }
962  join_columns.emplace_back(fetchJoinColumn(inner_col,
963  fragments,
964  effective_memory_level,
965  device_id,
966  chunks_owner,
967  dev_buff_owner,
968  malloc_owner,
969  executor_,
970  &column_cache_));
971  const auto& ti = inner_col->get_type_info();
972  join_column_types.emplace_back(JoinColumnTypeInfo{static_cast<size_t>(ti.get_size()),
973  0,
974  0,
975  inline_int_null_value<int64_t>(),
976  false,
977  0,
979  CHECK(ti.is_array())
980  << "Bounding box intersection currently only supported for arrays.";
981  }
982  return {join_columns, join_column_types, chunks_owner, {}, malloc_owner};
983 }
Data_Namespace::MemoryLevel getEffectiveMemoryLevel(const std::vector< InnerOuter > &inner_outer_pairs) const
JoinColumn fetchJoinColumn(const Analyzer::ColumnVar *hash_col, const std::vector< Fragmenter_Namespace::FragmentInfo > &fragment_info, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks_owner, DeviceAllocator *dev_buff_owner, std::vector< std::shared_ptr< void >> &malloc_owner, Executor *executor, ColumnCacheMap *column_cache)
Definition: HashJoin.cpp:60
const ColumnDescriptor * get_column_descriptor_maybe(const shared::ColumnKey &column_key)
Definition: Execute.h:241
ColumnType get_join_column_type_kind(const SQLTypeInfo &ti)
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void BoundingBoxIntersectJoinHashTable::generateCacheKey ( const size_t  max_hashtable_size,
const double  bucket_threshold,
const std::vector< double > &  bucket_sizes,
std::vector< std::vector< Fragmenter_Namespace::FragmentInfo >> &  fragments_per_device,
int  device_count 
)
inlineprotected

Definition at line 341 of file BoundingBoxIntersectJoinHashTable.h.

References HashJoin::collectFragmentIds(), hash_table_cache_, hash_value(), hashtable_cache_key_, and table_keys_.

Referenced by RangeJoinHashTable::reifyWithLayout(), and reifyWithLayout().

346  {
347  for (int device_id = 0; device_id < device_count; ++device_id) {
348  auto hash_val = boost::hash_value(hashtable_cache_key_[device_id]);
349  boost::hash_combine(hash_val, max_hashtable_size);
350  boost::hash_combine(hash_val, bucket_threshold);
351  boost::hash_combine(hash_val, bucket_sizes);
352  boost::hash_combine(hash_val,
353  HashJoin::collectFragmentIds(fragments_per_device[device_id]));
354  hashtable_cache_key_[device_id] = hash_val;
355  hash_table_cache_->addQueryPlanDagForTableKeys(hashtable_cache_key_[device_id],
356  table_keys_);
357  }
358  }
static std::vector< int > collectFragmentIds(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments)
Definition: HashJoin.cpp:461
static std::unique_ptr< HashtableRecycler > hash_table_cache_
std::size_t hash_value(RexAbstractInput const &rex_ab_input)
Definition: RelAlgDag.cpp:3525

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

QueryPlanHash BoundingBoxIntersectJoinHashTable::getAlternativeCacheKey ( AlternativeCacheKeyForBoundingBoxIntersection info)
inlineprotected

Definition at line 321 of file BoundingBoxIntersectJoinHashTable.h.

References BoundingBoxIntersectJoinHashTable::AlternativeCacheKeyForBoundingBoxIntersection::bucket_threshold, BoundingBoxIntersectJoinHashTable::AlternativeCacheKeyForBoundingBoxIntersection::chunk_key_hash, BoundingBoxIntersectJoinHashTable::AlternativeCacheKeyForBoundingBoxIntersection::inner_outer_pairs, BoundingBoxIntersectJoinHashTable::AlternativeCacheKeyForBoundingBoxIntersection::inverse_bucket_sizes, BoundingBoxIntersectJoinHashTable::AlternativeCacheKeyForBoundingBoxIntersection::max_hashtable_size, BoundingBoxIntersectJoinHashTable::AlternativeCacheKeyForBoundingBoxIntersection::num_elements, and BoundingBoxIntersectJoinHashTable::AlternativeCacheKeyForBoundingBoxIntersection::optype.

Referenced by RangeJoinHashTable::reifyWithLayout(), and reifyWithLayout().

322  {
323  auto hash = info.chunk_key_hash;
324  for (InnerOuter inner_outer : info.inner_outer_pairs) {
325  auto inner_col = inner_outer.first;
326  auto rhs_col_var = dynamic_cast<const Analyzer::ColumnVar*>(inner_outer.second);
327  auto outer_col = rhs_col_var ? rhs_col_var : inner_col;
328  boost::hash_combine(hash, inner_col->toString());
329  if (inner_col->get_type_info().is_string()) {
330  boost::hash_combine(hash, outer_col->toString());
331  }
332  }
333  boost::hash_combine(hash, info.num_elements);
334  boost::hash_combine(hash, info.optype);
335  boost::hash_combine(hash, info.max_hashtable_size);
336  boost::hash_combine(hash, info.bucket_threshold);
337  boost::hash_combine(hash, info.inverse_bucket_sizes);
338  return hash;
339  }
std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > InnerOuter
Definition: HashJoin.h:105

+ Here is the caller graph for this function:

std::optional< std::pair< size_t, size_t > > BoundingBoxIntersectJoinHashTable::getApproximateTupleCountFromCache ( QueryPlanHash  key,
CacheItemType  item_type,
DeviceIdentifier  device_identifier 
)
protected

Definition at line 1875 of file BoundingBoxIntersectJoinHashTable.cpp.

References HashtableCacheMetaInfo::bbox_intersect_meta_info, CHECK, getBoundingBoxIntersectMetaInfo(), and hash_table_cache_.

Referenced by approximateTupleCount().

1878  {
1880  HashtableCacheMetaInfo metaInfo;
1882  auto cached_hashtable =
1883  hash_table_cache_->getItemFromCache(key, item_type, device_identifier, metaInfo);
1884  if (cached_hashtable) {
1885  return std::make_pair(cached_hashtable->getEntryCount() / 2,
1886  cached_hashtable->getEmittedKeysCount());
1887  }
1888  return std::nullopt;
1889 }
std::optional< BoundingBoxIntersectMetaInfo > bbox_intersect_meta_info
std::optional< BoundingBoxIntersectMetaInfo > getBoundingBoxIntersectMetaInfo()
static std::unique_ptr< HashtableRecycler > hash_table_cache_
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::optional<BoundingBoxIntersectMetaInfo> BoundingBoxIntersectJoinHashTable::getBoundingBoxIntersectMetaInfo ( )
inlineprotected

Definition at line 307 of file BoundingBoxIntersectJoinHashTable.h.

References HashtableCacheMetaInfo::bbox_intersect_meta_info, and hashtable_cache_meta_info_.

Referenced by getApproximateTupleCountFromCache(), initHashTableOnCpuFromCache(), and putHashTableOnCpuToCache().

307  {
309  }
std::optional< BoundingBoxIntersectMetaInfo > bbox_intersect_meta_info

+ Here is the caller graph for this function:

static BoundingBoxIntersectTuningParamRecycler* BoundingBoxIntersectJoinHashTable::getBoundingBoxIntersectTuningParamCache ( )
inlinestatic

Definition at line 99 of file BoundingBoxIntersectJoinHashTable.h.

References auto_tuner_cache_, and CHECK.

Referenced by QueryRunner::QueryRunner::getNumberOfCachedItem(), and anonymous_namespace{DBHandler.cpp}::log_cache_size().

99  {
101  return auto_tuner_cache_.get();
102  }
static std::unique_ptr< BoundingBoxIntersectTuningParamRecycler > auto_tuner_cache_
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the caller graph for this function:

QueryPlanHash BoundingBoxIntersectJoinHashTable::getCacheKey ( int  device_id) const
inlineprotected

Definition at line 360 of file BoundingBoxIntersectJoinHashTable.h.

References hashtable_cache_key_.

360  {
361  return hashtable_cache_key_[device_id];
362  }
size_t BoundingBoxIntersectJoinHashTable::getComponentBufferSize ( ) const
inlineoverrideprotectedvirtualnoexcept

Implements HashJoin.

Definition at line 225 of file BoundingBoxIntersectJoinHashTable.h.

References CHECK, and HashJoin::hash_tables_for_device_.

Referenced by codegenMatchingSet(), RangeJoinHashTable::codegenMatchingSetWithOffset(), countBufferOff(), and payloadBufferOff().

225  {
226  CHECK(!hash_tables_for_device_.empty());
227  auto hash_table = hash_tables_for_device_.front();
228  CHECK(hash_table);
229  return hash_table->getEntryCount() * sizeof(int32_t);
230  }
std::vector< std::shared_ptr< HashTable > > hash_tables_for_device_
Definition: HashJoin.h:377
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the caller graph for this function:

int BoundingBoxIntersectJoinHashTable::getDeviceCount ( ) const
inlineoverrideprotectedvirtualnoexcept

Implements HashJoin.

Definition at line 172 of file BoundingBoxIntersectJoinHashTable.h.

References device_count_.

Data_Namespace::MemoryLevel BoundingBoxIntersectJoinHashTable::getEffectiveMemoryLevel ( const std::vector< InnerOuter > &  inner_outer_pairs) const
protected

Definition at line 1836 of file BoundingBoxIntersectJoinHashTable.cpp.

References RegisteredQueryHint::bbox_intersect_allow_gpu_build, Data_Namespace::CPU_LEVEL, Data_Namespace::GPU_LEVEL, RegisteredQueryHint::isHintRegistered(), kBBoxIntersectAllowGpuBuild, memory_level_, and query_hints_.

Referenced by approximateTupleCount(), fetchColumnsForDevice(), reifyForDevice(), RangeJoinHashTable::reifyWithLayout(), and reifyWithLayout().

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t BoundingBoxIntersectJoinHashTable::getEmittedKeysCount ( ) const
inlineprotected

Definition at line 219 of file BoundingBoxIntersectJoinHashTable.h.

References CHECK, and HashJoin::getHashTableForDevice().

219  {
220  auto hash_table = getHashTableForDevice(0);
221  CHECK(hash_table);
222  return hash_table->getEmittedKeysCount();
223  }
HashTable * getHashTableForDevice(const size_t device_id) const
Definition: HashJoin.h:295
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

size_t BoundingBoxIntersectJoinHashTable::getEntryCount ( ) const
inlineprotected

Definition at line 213 of file BoundingBoxIntersectJoinHashTable.h.

References CHECK, and HashJoin::getHashTableForDevice().

Referenced by codegenMatchingSet(), RangeJoinHashTable::codegenMatchingSetWithOffset(), and getKeyBufferSize().

213  {
214  auto hash_table = getHashTableForDevice(0);
215  CHECK(hash_table);
216  return hash_table->getEntryCount();
217  }
HashTable * getHashTableForDevice(const size_t device_id) const
Definition: HashJoin.h:295
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::string BoundingBoxIntersectJoinHashTable::getHashJoinType ( ) const
inlinefinalprotectedvirtual

Implements HashJoin.

Definition at line 282 of file BoundingBoxIntersectJoinHashTable.h.

282  {
283  return "BoundingBoxIntersect";
284  }
static HashtableRecycler* BoundingBoxIntersectJoinHashTable::getHashTableCache ( )
inlinestatic

Definition at line 93 of file BoundingBoxIntersectJoinHashTable.h.

References CHECK, and hash_table_cache_.

Referenced by QueryRunner::QueryRunner::getCachedHashtableWithoutCacheKey(), QueryRunner::QueryRunner::getCacheItemMetric(), QueryRunner::QueryRunner::getNumberOfCachedItem(), anonymous_namespace{DBHandler.cpp}::log_cache_size(), and CommandLineOptions::parse_command_line().

93  {
95  return hash_table_cache_.get();
96  }
static std::unique_ptr< HashtableRecycler > hash_table_cache_
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the caller graph for this function:

HashType BoundingBoxIntersectJoinHashTable::getHashType ( ) const
inlineoverrideprotectedvirtualnoexcept

Implements HashJoin.

Reimplemented in RangeJoinHashTable.

Definition at line 159 of file BoundingBoxIntersectJoinHashTable.h.

References CHECK, HashJoin::getHashTableForDevice(), and layout_override_.

Referenced by codegenManyKey(), codegenMatchingSet(), countBufferOff(), getKeyBufferSize(), payloadBufferOff(), toSet(), and toString().

159  {
160  if (layout_override_) {
161  return *layout_override_;
162  }
163  auto hash_table = getHashTableForDevice(0);
164  CHECK(hash_table);
165  return hash_table->getLayout();
166  }
HashTable * getHashTableForDevice(const size_t device_id) const
Definition: HashJoin.h:295
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const std::vector<InnerOuter>& BoundingBoxIntersectJoinHashTable::getInnerOuterPairs ( ) const
inlineprotected

Definition at line 364 of file BoundingBoxIntersectJoinHashTable.h.

References inner_outer_pairs_.

Referenced by RangeJoinHashTable::isProbeCompressed().

364  {
365  return inner_outer_pairs_;
366  }

+ Here is the caller graph for this function:

shared::TableKey BoundingBoxIntersectJoinHashTable::getInnerTableId ( ) const
overrideprotectedvirtualnoexcept

Implements HashJoin.

Definition at line 1848 of file BoundingBoxIntersectJoinHashTable.cpp.

References CHECK, HashJoin::getInnerTableId(), and inner_outer_pairs_.

Referenced by RangeJoinHashTable::reifyWithLayout().

1848  {
1849  try {
1851  } catch (...) {
1852  CHECK(false);
1853  }
1854  return {};
1855 }
virtual shared::TableKey getInnerTableId() const noexcept=0
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int BoundingBoxIntersectJoinHashTable::getInnerTableRteIdx ( ) const
inlineoverrideprotectedvirtualnoexcept

Implements HashJoin.

Definition at line 245 of file BoundingBoxIntersectJoinHashTable.h.

References CHECK, and inner_outer_pairs_.

245  {
246  CHECK(!inner_outer_pairs_.empty());
247  const auto first_inner_col = inner_outer_pairs_.front().first;
248  return first_inner_col->get_rte_idx();
249  }
#define CHECK(condition)
Definition: Logger.h:291
std::shared_ptr< BoundingBoxIntersectJoinHashTable > BoundingBoxIntersectJoinHashTable::getInstance ( const std::shared_ptr< Analyzer::BinOper condition,
const std::vector< InputTableInfo > &  query_infos,
const Data_Namespace::MemoryLevel  memory_level,
const JoinType  join_type,
const int  device_count,
ColumnCacheMap column_cache,
Executor executor,
const HashTableBuildDagMap hashtable_build_dag_map,
const RegisteredQueryHint query_hint,
const TableIdToNodeMap table_id_to_node_map 
)
static

Make hash table from an in-flight SQL query's parse tree etc.

Definition at line 39 of file BoundingBoxIntersectJoinHashTable.cpp.

References CHECK, CHECK_EQ, RegisteredQueryHint::force_baseline_hash_join, RegisteredQueryHint::force_one_to_many_hash_join, get_inner_query_info(), HashJoin::getHashTypeString(), HashJoin::getInnerTableId(), RangeJoinHashTable::getInstance(), Fragmenter_Namespace::TableInfo::getNumTuplesUpperBound(), InputTableInfo::info, logger::INFO, LOG, ManyToMany, HashJoin::MAX_NUM_HASH_ENTRIES, HashJoin::normalizeColumnPairs(), OneToMany, VLOG, and VLOGGING.

Referenced by HashJoin::getInstance().

49  {
50  decltype(std::chrono::steady_clock::now()) ts1, ts2;
51  auto copied_query_hints = query_hints;
52  if (query_hints.force_one_to_many_hash_join) {
53  LOG(INFO) << "Ignoring query hint \'force_one_to_many_hash_join\' for bounding box "
54  "intersection";
55  copied_query_hints.force_one_to_many_hash_join = false;
56  }
57  if (query_hints.force_baseline_hash_join) {
58  LOG(INFO) << "Ignoring query hint \'force_baseline_hash_join\' for bounding box "
59  "intersection";
60  copied_query_hints.force_baseline_hash_join = false;
61  }
62  std::vector<InnerOuter> inner_outer_pairs;
63  if (const auto range_expr =
64  dynamic_cast<const Analyzer::RangeOper*>(condition->get_right_operand())) {
65  return RangeJoinHashTable::getInstance(condition,
66  range_expr,
67  query_infos,
68  memory_level,
69  join_type,
70  device_count,
71  column_cache,
72  executor,
73  hashtable_build_dag_map,
74  copied_query_hints,
75  table_id_to_node_map);
76  } else {
77  inner_outer_pairs =
78  HashJoin::normalizeColumnPairs(condition.get(), executor->getTemporaryTables())
79  .first;
80  }
81  CHECK(!inner_outer_pairs.empty());
82 
83  const auto getHashTableType =
84  [](const std::shared_ptr<Analyzer::BinOper> condition,
85  const std::vector<InnerOuter>& inner_outer_pairs) -> HashType {
87  if (condition->is_bbox_intersect_oper()) {
88  CHECK_EQ(inner_outer_pairs.size(), size_t(1));
89  if (inner_outer_pairs[0].first->get_type_info().is_array() &&
90  inner_outer_pairs[0].second->get_type_info().is_array() &&
91  // Bounds vs constructed points, former should yield ManyToMany
92  inner_outer_pairs[0].second->get_type_info().get_size() == 32) {
93  layout = HashType::ManyToMany;
94  }
95  }
96  return layout;
97  };
98 
99  const auto layout = getHashTableType(condition, inner_outer_pairs);
100 
101  if (VLOGGING(1)) {
102  VLOG(1) << "Building geo hash table " << getHashTypeString(layout)
103  << " for qual: " << condition->toString();
104  ts1 = std::chrono::steady_clock::now();
105  }
106 
107  const auto qi_0 = query_infos[0].info.getNumTuplesUpperBound();
108  const auto qi_1 = query_infos[1].info.getNumTuplesUpperBound();
109 
110  VLOG(1) << "table_key = " << query_infos[0].table_key << " has " << qi_0 << " tuples.";
111  VLOG(1) << "table_key = " << query_infos[1].table_key << " has " << qi_1 << " tuples.";
112 
113  const auto& query_info =
114  get_inner_query_info(HashJoin::getInnerTableId(inner_outer_pairs), query_infos)
115  .info;
116  const auto total_entries = 2 * query_info.getNumTuplesUpperBound();
117  if (total_entries > HashJoin::MAX_NUM_HASH_ENTRIES) {
118  throw TooManyHashEntries();
119  }
120 
121  auto join_hash_table =
122  std::make_shared<BoundingBoxIntersectJoinHashTable>(condition,
123  join_type,
124  query_infos,
125  memory_level,
126  column_cache,
127  executor,
128  inner_outer_pairs,
129  device_count,
130  copied_query_hints,
131  hashtable_build_dag_map,
132  table_id_to_node_map);
133  try {
134  join_hash_table->reify(layout);
135  } catch (const HashJoinFail& e) {
136  throw HashJoinFail(std::string("Could not build a 1-to-1 correspondence for columns "
137  "involved in bounding box intersection | ") +
138  e.what());
139  } catch (const ColumnarConversionNotSupported& e) {
140  throw HashJoinFail(
141  std::string("Could not build hash tables for bounding box intersection | "
142  "Inner table too big. Attempt manual table reordering "
143  "or create a single fragment inner table. | ") +
144  e.what());
145  } catch (const JoinHashTableTooBig& e) {
146  throw e;
147  } catch (const std::exception& e) {
148  throw HashJoinFail(
149  std::string("Failed to build hash tables for bounding box intersection | ") +
150  e.what());
151  }
152  if (VLOGGING(1)) {
153  ts2 = std::chrono::steady_clock::now();
154  VLOG(1) << "Built geo hash table " << getHashTypeString(layout) << " in "
155  << std::chrono::duration_cast<std::chrono::milliseconds>(ts2 - ts1).count()
156  << " ms";
157  }
158  return join_hash_table;
159 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
Fragmenter_Namespace::TableInfo info
Definition: InputMetadata.h:35
#define LOG(tag)
Definition: Logger.h:285
const InputTableInfo & get_inner_query_info(const shared::TableKey &inner_table_key, const std::vector< InputTableInfo > &query_infos)
static constexpr size_t MAX_NUM_HASH_ENTRIES
Definition: HashJoin.h:136
static std::shared_ptr< RangeJoinHashTable > getInstance(const std::shared_ptr< Analyzer::BinOper > condition, const Analyzer::RangeOper *range_expr, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hints, const TableIdToNodeMap &table_id_to_node_map)
virtual shared::TableKey getInnerTableId() const noexcept=0
#define VLOGGING(n)
Definition: Logger.h:289
static std::string getHashTypeString(HashType ht) noexcept
Definition: HashJoin.h:179
#define CHECK(condition)
Definition: Logger.h:291
static std::pair< std::vector< InnerOuter >, std::vector< InnerOuterStringOpInfos > > normalizeColumnPairs(const Analyzer::BinOper *condition, const TemporaryTables *temporary_tables)
Definition: HashJoin.cpp:1015
HashType
Definition: HashTable.h:19
#define VLOG(n)
Definition: Logger.h:388

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t BoundingBoxIntersectJoinHashTable::getKeyBufferSize ( ) const
inlineprotectednoexcept

Definition at line 251 of file BoundingBoxIntersectJoinHashTable.h.

References CHECK, getEntryCount(), getHashType(), getKeyComponentCount(), getKeyComponentWidth(), and HashJoin::layoutRequiresAdditionalBuffers().

Referenced by countBufferOff(), offsetBufferOff(), and payloadBufferOff().

251  {
252  const auto key_component_width = getKeyComponentWidth();
253  CHECK(key_component_width == 4 || key_component_width == 8);
254  const auto key_component_count = getKeyComponentCount();
256  return getEntryCount() * key_component_count * key_component_width;
257  } else {
258  return getEntryCount() * (key_component_count + 1) * key_component_width;
259  }
260  }
HashType getHashType() const noexceptoverride
#define CHECK(condition)
Definition: Logger.h:291
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
Definition: HashJoin.h:175

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t BoundingBoxIntersectJoinHashTable::getKeyComponentCount ( ) const
protected

Definition at line 1195 of file BoundingBoxIntersectJoinHashTable.cpp.

References CHECK, and inverse_bucket_sizes_for_dimension_.

Referenced by RangeJoinHashTable::codegenKey(), codegenKey(), codegenMatchingSet(), RangeJoinHashTable::codegenMatchingSetWithOffset(), getKeyBufferSize(), RangeJoinHashTable::reifyForDevice(), reifyForDevice(), toSet(), and toString().

1195  {
1198 }
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the caller graph for this function:

size_t BoundingBoxIntersectJoinHashTable::getKeyComponentWidth ( ) const
protected

Definition at line 1191 of file BoundingBoxIntersectJoinHashTable.cpp.

Referenced by calculateHashTableSize(), RangeJoinHashTable::codegenKey(), codegenKey(), codegenManyKey(), codegenMatchingSet(), RangeJoinHashTable::codegenMatchingSetWithOffset(), getKeyBufferSize(), RangeJoinHashTable::reifyForDevice(), reifyForDevice(), toSet(), and toString().

1191  {
1192  return 8;
1193 }

+ Here is the caller graph for this function:

Data_Namespace::MemoryLevel BoundingBoxIntersectJoinHashTable::getMemoryLevel ( ) const
inlineoverrideprotectedvirtualnoexcept

Implements HashJoin.

Definition at line 168 of file BoundingBoxIntersectJoinHashTable.h.

References memory_level_.

168  {
169  return memory_level_;
170  }
const Data_Namespace::MemoryLevel memory_level_
const RegisteredQueryHint& BoundingBoxIntersectJoinHashTable::getRegisteredQueryHint ( )
inlineprotected

Definition at line 209 of file BoundingBoxIntersectJoinHashTable.h.

References query_hints_.

209  {
210  return query_hints_;
211  }
std::shared_ptr< BaselineHashTable > BoundingBoxIntersectJoinHashTable::initHashTableOnCpu ( const std::vector< JoinColumn > &  join_columns,
const std::vector< JoinColumnTypeInfo > &  join_column_types,
const std::vector< JoinBucketInfo > &  join_bucket_info,
const BaselineHashTableEntryInfo  hash_table_entry_info,
const bool  skip_hashtable_caching 
)
protected

Definition at line 1327 of file BoundingBoxIntersectJoinHashTable.cpp.

References BBOX_INTERSECT_HT, CHECK, composite_key_info_, DataRecyclerUtil::CPU_DEVICE_IDENTIFIER, cpu_hash_table_buff_mutex_, DEBUG_TIMER, executor_, HashTableEntryInfo::getHashTableLayout(), hashtable_cache_key_, BaselineJoinHashTableBuilder::initHashTableOnCpu(), initHashTableOnCpuFromCache(), join_type_, layout_override_, HashJoin::layoutRequiresAdditionalBuffers(), ManyToMany, OneToMany, putHashTableOnCpuToCache(), query_hints_, to_string(), and VLOG.

Referenced by reifyForDevice().

1332  {
1333  auto timer = DEBUG_TIMER(__func__);
1334  decltype(std::chrono::steady_clock::now()) ts1, ts2;
1335  ts1 = std::chrono::steady_clock::now();
1336  CHECK(!join_columns.empty());
1337  CHECK(!join_bucket_info.empty());
1338  std::lock_guard<std::mutex> cpu_hash_table_buff_lock(cpu_hash_table_buff_mutex_);
1339  auto const hash_table_layout = hash_table_entry_info.getHashTableLayout();
1340  if (auto generic_hash_table =
1343  DataRecyclerUtil::CPU_DEVICE_IDENTIFIER)) {
1344  if (auto hash_table =
1345  std::dynamic_pointer_cast<BaselineHashTable>(generic_hash_table)) {
1346  VLOG(1) << "Using cached CPU hash table for initialization.";
1347  // See if a hash table of a different layout was returned.
1348  // If it was OneToMany, we can reuse it on ManyToMany.
1349  if (hash_table_layout == HashType::ManyToMany &&
1350  hash_table->getLayout() == HashType::OneToMany) {
1351  // use the cached hash table
1353  return hash_table;
1354  }
1355  if (hash_table_layout == hash_table->getLayout()) {
1356  return hash_table;
1357  }
1358  }
1359  }
1360  CHECK(layoutRequiresAdditionalBuffers(hash_table_layout));
1361  const auto key_component_count =
1362  join_bucket_info[0].inverse_bucket_sizes_for_dimension.size();
1363 
1364  const auto key_handler = BoundingBoxIntersectKeyHandler(
1365  key_component_count,
1366  &join_columns[0],
1367  join_bucket_info[0].inverse_bucket_sizes_for_dimension.data());
1370  dummy_str_proxy_translation_maps_ptrs_and_offsets;
1371  const auto err =
1372  builder.initHashTableOnCpu(&key_handler,
1374  join_columns,
1375  join_column_types,
1376  join_bucket_info,
1377  dummy_str_proxy_translation_maps_ptrs_and_offsets,
1378  hash_table_entry_info,
1379  join_type_,
1380  executor_,
1381  query_hints_);
1382  ts2 = std::chrono::steady_clock::now();
1383  if (err) {
1384  throw HashJoinFail(std::string("Unrecognized error when initializing CPU hash table "
1385  "for bounding box intersection(") +
1386  std::to_string(err) + std::string(")"));
1387  }
1388  std::shared_ptr<BaselineHashTable> hash_table = builder.getHashTable();
1389  if (skip_hashtable_caching) {
1390  VLOG(1) << "Skip to cache join hashtable for bounding box intersection";
1391  } else {
1392  auto hashtable_build_time =
1393  std::chrono::duration_cast<std::chrono::milliseconds>(ts2 - ts1).count();
1396  hash_table,
1398  hashtable_build_time);
1399  }
1400  return hash_table;
1401 }
std::lock_guard< T > lock_guard
std::string to_string(char const *&&v)
CacheItemType
Definition: DataRecycler.h:38
std::pair< std::vector< const int32_t * >, std::vector< int32_t >> StrProxyTranslationMapsPtrsAndOffsets
int initHashTableOnCpu(KEY_HANDLER *key_handler, const CompositeKeyInfo &composite_key_info, const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_bucket_info, const StrProxyTranslationMapsPtrsAndOffsets &str_proxy_translation_maps_ptrs_and_offsets, const BaselineHashTableEntryInfo hash_table_entry_info, const JoinType join_type, const Executor *executor, const RegisteredQueryHint &query_hint)
void putHashTableOnCpuToCache(QueryPlanHash key, CacheItemType item_type, std::shared_ptr< HashTable > hashtable_ptr, DeviceIdentifier device_identifier, size_t hashtable_building_time)
#define CHECK(condition)
Definition: Logger.h:291
#define DEBUG_TIMER(name)
Definition: Logger.h:412
std::shared_ptr< HashTable > initHashTableOnCpuFromCache(QueryPlanHash key, CacheItemType item_type, DeviceIdentifier device_identifier)
static constexpr DeviceIdentifier CPU_DEVICE_IDENTIFIER
Definition: DataRecycler.h:136
#define VLOG(n)
Definition: Logger.h:388
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
Definition: HashJoin.h:175

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::shared_ptr< HashTable > BoundingBoxIntersectJoinHashTable::initHashTableOnCpuFromCache ( QueryPlanHash  key,
CacheItemType  item_type,
DeviceIdentifier  device_identifier 
)
protected

Definition at line 1857 of file BoundingBoxIntersectJoinHashTable.cpp.

References HashtableCacheMetaInfo::bbox_intersect_meta_info, CHECK, DEBUG_TIMER, getBoundingBoxIntersectMetaInfo(), hash_table_cache_, and VLOG.

Referenced by initHashTableOnCpu(), and RangeJoinHashTable::reifyWithLayout().

1860  {
1861  auto timer = DEBUG_TIMER(__func__);
1862  VLOG(1) << "Checking CPU hash table cache.";
1864  HashtableCacheMetaInfo meta_info;
1866  auto cached_hashtable =
1867  hash_table_cache_->getItemFromCache(key, item_type, device_identifier, meta_info);
1868  if (cached_hashtable) {
1869  return cached_hashtable;
1870  }
1871  return nullptr;
1872 }
std::optional< BoundingBoxIntersectMetaInfo > bbox_intersect_meta_info
std::optional< BoundingBoxIntersectMetaInfo > getBoundingBoxIntersectMetaInfo()
static std::unique_ptr< HashtableRecycler > hash_table_cache_
#define CHECK(condition)
Definition: Logger.h:291
#define DEBUG_TIMER(name)
Definition: Logger.h:412
#define VLOG(n)
Definition: Logger.h:388

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

static void BoundingBoxIntersectJoinHashTable::invalidateCache ( )
inlinestatic

Definition at line 67 of file BoundingBoxIntersectJoinHashTable.h.

References auto_tuner_cache_, CHECK, and hash_table_cache_.

67  {
69  auto_tuner_cache_->clearCache();
70 
72  hash_table_cache_->clearCache();
73  }
static std::unique_ptr< BoundingBoxIntersectTuningParamRecycler > auto_tuner_cache_
static std::unique_ptr< HashtableRecycler > hash_table_cache_
#define CHECK(condition)
Definition: Logger.h:291
bool BoundingBoxIntersectJoinHashTable::isBitwiseEq ( ) const
overrideprotectedvirtual

Implements HashJoin.

Definition at line 1912 of file BoundingBoxIntersectJoinHashTable.cpp.

References condition_, and kBW_EQ.

1912  {
1913  return condition_->get_optype() == kBW_EQ;
1914 }
const std::shared_ptr< Analyzer::BinOper > condition_
Definition: sqldefs.h:30
static void BoundingBoxIntersectJoinHashTable::markCachedItemAsDirty ( size_t  table_key)
inlinestatic

Definition at line 75 of file BoundingBoxIntersectJoinHashTable.h.

References auto_tuner_cache_, BBOX_INTERSECT_AUTO_TUNER_PARAM, BBOX_INTERSECT_HT, CHECK, DataRecyclerUtil::CPU_DEVICE_IDENTIFIER, and hash_table_cache_.

75  {
78  auto candidate_table_keys =
79  hash_table_cache_->getMappedQueryPlanDagsWithTableKey(table_key);
80  if (candidate_table_keys.has_value()) {
81  auto_tuner_cache_->markCachedItemAsDirty(
82  table_key,
83  *candidate_table_keys,
86  hash_table_cache_->markCachedItemAsDirty(table_key,
87  *candidate_table_keys,
90  }
91  }
static std::unique_ptr< BoundingBoxIntersectTuningParamRecycler > auto_tuner_cache_
static std::unique_ptr< HashtableRecycler > hash_table_cache_
#define CHECK(condition)
Definition: Logger.h:291
static constexpr DeviceIdentifier CPU_DEVICE_IDENTIFIER
Definition: DataRecycler.h:136
size_t BoundingBoxIntersectJoinHashTable::offsetBufferOff ( ) const
inlineoverrideprotectedvirtualnoexcept

Implements HashJoin.

Definition at line 262 of file BoundingBoxIntersectJoinHashTable.h.

References getKeyBufferSize().

Referenced by codegenMatchingSet(), RangeJoinHashTable::codegenMatchingSetWithOffset(), countBufferOff(), toSet(), and toString().

262  {
263  return getKeyBufferSize();
264  }

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t BoundingBoxIntersectJoinHashTable::payloadBufferOff ( ) const
inlineoverrideprotectedvirtualnoexcept

Implements HashJoin.

Definition at line 274 of file BoundingBoxIntersectJoinHashTable.h.

References countBufferOff(), getComponentBufferSize(), getHashType(), getKeyBufferSize(), and HashJoin::layoutRequiresAdditionalBuffers().

Referenced by toSet(), and toString().

274  {
277  } else {
278  return getKeyBufferSize();
279  }
280  }
HashType getHashType() const noexceptoverride
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
Definition: HashJoin.h:175
size_t getComponentBufferSize() const noexceptoverride

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void BoundingBoxIntersectJoinHashTable::putHashTableOnCpuToCache ( QueryPlanHash  key,
CacheItemType  item_type,
std::shared_ptr< HashTable hashtable_ptr,
DeviceIdentifier  device_identifier,
size_t  hashtable_building_time 
)
protected

Definition at line 1891 of file BoundingBoxIntersectJoinHashTable.cpp.

References HashtableCacheMetaInfo::bbox_intersect_meta_info, CHECK, CPU, getBoundingBoxIntersectMetaInfo(), hash_table_cache_, and query_hints_.

Referenced by RangeJoinHashTable::initHashTableOnCpu(), and initHashTableOnCpu().

1896  {
1898  CHECK(hashtable_ptr && !hashtable_ptr->getGpuBuffer());
1899  HashtableCacheMetaInfo meta_info;
1901  meta_info.registered_query_hint = query_hints_;
1902  hash_table_cache_->putItemToCache(
1903  key,
1904  hashtable_ptr,
1905  item_type,
1906  device_identifier,
1907  hashtable_ptr->getHashTableBufferSize(ExecutorDeviceType::CPU),
1908  hashtable_building_time,
1909  meta_info);
1910 }
std::optional< BoundingBoxIntersectMetaInfo > bbox_intersect_meta_info
std::optional< BoundingBoxIntersectMetaInfo > getBoundingBoxIntersectMetaInfo()
static std::unique_ptr< HashtableRecycler > hash_table_cache_
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void BoundingBoxIntersectJoinHashTable::reify ( const HashType  preferred_layout)
protected

Definition at line 1200 of file BoundingBoxIntersectJoinHashTable.cpp.

References CHECK, CHECK_EQ, CHECK_LT, composite_key_info_, condition_, DEBUG_TIMER, device_count_, executor_, HashJoin::getCompositeKeyInfo(), inner_outer_pairs_, ManyToMany, OneToMany, reifyWithLayout(), and VLOG.

1200  {
1201  auto timer = DEBUG_TIMER(__func__);
1202  CHECK_LT(0, device_count_);
1204 
1205  CHECK(condition_->is_bbox_intersect_oper());
1206  CHECK_EQ(inner_outer_pairs_.size(), size_t(1));
1207  HashType layout;
1208  if (inner_outer_pairs_[0].second->get_type_info().is_fixlen_array() &&
1209  inner_outer_pairs_[0].second->get_type_info().get_size() == 32) {
1210  // bounds array
1211  layout = HashType::ManyToMany;
1212  } else {
1213  layout = HashType::OneToMany;
1214  }
1215  try {
1216  reifyWithLayout(layout);
1217  return;
1218  } catch (const JoinHashTableTooBig& e) {
1219  throw e;
1220  } catch (const std::exception& e) {
1221  VLOG(1) << "Caught exception while building baseline hash table for bounding box "
1222  "intersection: "
1223  << e.what();
1224  throw;
1225  }
1226 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
const std::shared_ptr< Analyzer::BinOper > condition_
#define CHECK_LT(x, y)
Definition: Logger.h:303
virtual void reifyWithLayout(const HashType layout)
#define CHECK(condition)
Definition: Logger.h:291
#define DEBUG_TIMER(name)
Definition: Logger.h:412
HashType
Definition: HashTable.h:19
#define VLOG(n)
Definition: Logger.h:388
static CompositeKeyInfo getCompositeKeyInfo(const std::vector< InnerOuter > &inner_outer_pairs, const Executor *executor, const std::vector< InnerOuterStringOpInfos > &inner_outer_string_op_infos_pairs={})
Definition: HashJoin.cpp:470

+ Here is the call graph for this function:

void BoundingBoxIntersectJoinHashTable::reifyForDevice ( const ColumnsForDevice columns_for_device,
const HashType  layout,
const size_t  entry_count,
const size_t  emitted_keys_count,
const bool  skip_hashtable_caching,
const int  device_id,
const logger::ThreadLocalIds  parent_thread_local_ids 
)
protected

Definition at line 1269 of file BoundingBoxIntersectJoinHashTable.cpp.

References CHECK, CHECK_EQ, CHECK_LT, Data_Namespace::CPU_LEVEL, DEBUG_TIMER_NEW_THREAD, getEffectiveMemoryLevel(), getKeyComponentCount(), getKeyComponentWidth(), Data_Namespace::GPU_LEVEL, HashJoin::hash_tables_for_device_, initHashTableOnCpu(), inner_outer_pairs_, ColumnsForDevice::join_buckets, ColumnsForDevice::join_column_types, ColumnsForDevice::join_columns, HashJoin::layoutRequiresAdditionalBuffers(), memory_level_, logger::ThreadLocalIds::setNewThreadId(), logger::ThreadLocalIds::thread_id_, UNREACHABLE, and VLOG.

Referenced by reifyImpl().

1276  {
1277  logger::LocalIdsScopeGuard lisg = parent_thread_local_ids.setNewThreadId();
1278  DEBUG_TIMER_NEW_THREAD(parent_thread_local_ids.thread_id_);
1279  CHECK_EQ(getKeyComponentWidth(), size_t(8));
1281  const auto effective_memory_level = getEffectiveMemoryLevel(inner_outer_pairs_);
1282  BaselineHashTableEntryInfo hash_table_entry_info(entry_count,
1283  emitted_keys_count,
1284  sizeof(int32_t),
1287  layout,
1288  false);
1289  if (effective_memory_level == Data_Namespace::MemoryLevel::CPU_LEVEL) {
1290  VLOG(1) << "Building join hash table for bounding box intersection on CPU.";
1291  auto hash_table = initHashTableOnCpu(columns_for_device.join_columns,
1292  columns_for_device.join_column_types,
1293  columns_for_device.join_buckets,
1294  hash_table_entry_info,
1295  skip_hashtable_caching);
1296  CHECK(hash_table);
1297 
1298 #ifdef HAVE_CUDA
1300  auto gpu_hash_table = copyCpuHashTableToGpu(hash_table, device_id);
1301  CHECK_LT(static_cast<size_t>(device_id), hash_tables_for_device_.size());
1302  hash_tables_for_device_[device_id] = std::move(gpu_hash_table);
1303  } else {
1304 #else
1305  CHECK_EQ(Data_Namespace::CPU_LEVEL, effective_memory_level);
1306 #endif
1307  CHECK_EQ(hash_tables_for_device_.size(), size_t(1));
1308  hash_tables_for_device_[0] = hash_table;
1309 #ifdef HAVE_CUDA
1310  }
1311 #endif
1312  } else {
1313 #ifdef HAVE_CUDA
1314  auto hash_table = initHashTableOnGpu(columns_for_device.join_columns,
1315  columns_for_device.join_column_types,
1316  columns_for_device.join_buckets,
1317  hash_table_entry_info,
1318  device_id);
1319  CHECK_LT(static_cast<size_t>(device_id), hash_tables_for_device_.size());
1320  hash_tables_for_device_[device_id] = std::move(hash_table);
1321 #else
1322  UNREACHABLE();
1323 #endif
1324  }
1325 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
Data_Namespace::MemoryLevel getEffectiveMemoryLevel(const std::vector< InnerOuter > &inner_outer_pairs) const
std::vector< std::shared_ptr< HashTable > > hash_tables_for_device_
Definition: HashJoin.h:377
#define UNREACHABLE()
Definition: Logger.h:338
std::shared_ptr< BaselineHashTable > initHashTableOnCpu(const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_bucket_info, const BaselineHashTableEntryInfo hash_table_entry_info, const bool skip_hashtable_caching)
#define DEBUG_TIMER_NEW_THREAD(parent_thread_id)
Definition: Logger.h:417
const std::vector< JoinColumnTypeInfo > join_column_types
Definition: HashJoin.h:111
const Data_Namespace::MemoryLevel memory_level_
#define CHECK_LT(x, y)
Definition: Logger.h:303
LocalIdsScopeGuard setNewThreadId() const
Definition: Logger.cpp:538
#define CHECK(condition)
Definition: Logger.h:291
ThreadId thread_id_
Definition: Logger.h:138
std::vector< JoinBucketInfo > join_buckets
Definition: HashJoin.h:113
const std::vector< JoinColumn > join_columns
Definition: HashJoin.h:110
#define VLOG(n)
Definition: Logger.h:388
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
Definition: HashJoin.h:175

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void BoundingBoxIntersectJoinHashTable::reifyImpl ( std::vector< ColumnsForDevice > &  columns_per_device,
const Fragmenter_Namespace::TableInfo query_info,
const HashType  layout,
const size_t  shard_count,
const size_t  entry_count,
const size_t  emitted_keys_count,
const bool  skip_hashtable_caching,
const size_t  chosen_max_hashtable_size,
const double  chosen_bucket_threshold 
)
protectedvirtual

Definition at line 1228 of file BoundingBoxIntersectJoinHashTable.cpp.

References threading_serial::async(), chosen_bbox_intersect_bucket_threshold_, chosen_bbox_intersect_max_table_size_bytes_, device_count_, Fragmenter_Namespace::TableInfo::fragments, inverse_bucket_sizes_for_dimension_, only_shards_for_device(), reifyForDevice(), setBoundingBoxIntersectionMetaInfo(), and logger::thread_local_ids().

Referenced by reifyWithLayout().

1237  {
1238  std::vector<std::future<void>> init_threads;
1239  chosen_bbox_intersect_bucket_threshold_ = chosen_bucket_threshold;
1240  chosen_bbox_intersect_max_table_size_bytes_ = chosen_max_hashtable_size;
1244 
1245  for (int device_id = 0; device_id < device_count_; ++device_id) {
1246  const auto fragments =
1247  shard_count
1248  ? only_shards_for_device(query_info.fragments, device_id, device_count_)
1249  : query_info.fragments;
1250  init_threads.push_back(std::async(std::launch::async,
1252  this,
1253  columns_per_device[device_id],
1254  layout,
1255  entry_count,
1256  emitted_keys_count,
1257  skip_hashtable_caching,
1258  device_id,
1260  }
1261  for (auto& init_thread : init_threads) {
1262  init_thread.wait();
1263  }
1264  for (auto& init_thread : init_threads) {
1265  init_thread.get();
1266  }
1267 }
void setBoundingBoxIntersectionMetaInfo(size_t max_table_size_bytes, double bucket_threshold, std::vector< double > &bucket_sizes)
std::vector< FragmentInfo > fragments
Definition: Fragmenter.h:171
future< Result > async(Fn &&fn, Args &&...args)
std::vector< Fragmenter_Namespace::FragmentInfo > only_shards_for_device(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, const int device_count)
void reifyForDevice(const ColumnsForDevice &columns_for_device, const HashType layout, const size_t entry_count, const size_t emitted_keys_count, const bool skip_hashtable_caching, const int device_id, const logger::ThreadLocalIds parent_thread_local_ids)
ThreadLocalIds thread_local_ids()
Definition: Logger.cpp:880

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void BoundingBoxIntersectJoinHashTable::reifyWithLayout ( const HashType  layout)
protectedvirtual

Reimplemented in RangeJoinHashTable.

Definition at line 552 of file BoundingBoxIntersectJoinHashTable.cpp.

References gpu_enabled::accumulate(), auto_tuner_cache_, RegisteredQueryHint::bbox_intersect_allow_gpu_build, BBOX_INTERSECT_AUTO_TUNER_PARAM, RegisteredQueryHint::bbox_intersect_bucket_threshold, BBOX_INTERSECT_HT, RegisteredQueryHint::bbox_intersect_keys_per_bin, RegisteredQueryHint::bbox_intersect_max_size, CompositeKeyInfo::cache_key_chunks, calculateHashTableSize(), CHECK, CHECK_EQ, CHECK_GE, HashJoin::collectFragmentIds(), composite_key_info_, computeHashTableCounts(), condition_, DataRecyclerUtil::CPU_DEVICE_IDENTIFIER, DEBUG_TIMER, device_count_, executor_, fetchColumnsForDevice(), g_bbox_intersect_max_table_size_bytes, g_bbox_intersect_target_entries_per_bin, generateCacheKey(), get_inner_query_info(), getAlternativeCacheKey(), DataRecyclerUtil::getAlternativeTableKeys(), getEffectiveMemoryLevel(), HashtableRecycler::getHashtableAccessPathInfo(), HashJoin::getHashTypeString(), HashJoin::getInnerTableId(), getQueryEngineCudaStreamForDevice(), Data_Namespace::GPU_LEVEL, hash_table_cache_, hash_value(), hashtable_build_dag_map_, hashtable_cache_key_, hashtable_cache_meta_info_, InputTableInfo::info, inner_outer_pairs_, inverse_bucket_sizes_for_dimension_, RegisteredQueryHint::isHintRegistered(), HashtableRecycler::isInvalidHashTableCacheKey(), join_type_, kBBoxIntersectAllowGpuBuild, kBBoxIntersectBucketThreshold, kBBoxIntersectKeysPerBin, kBBoxIntersectMaxSize, kBBoxIntersectNoCache, HashJoin::layoutRequiresAdditionalBuffers(), memory_level_, only_shards_for_device(), query_hints_, query_infos_, reifyImpl(), setBoundingBoxIntersectionMetaInfo(), setInverseBucketSizeInfo(), shardCount(), table_keys_, and VLOG.

Referenced by reify().

552  {
553  auto timer = DEBUG_TIMER(__func__);
555  const auto& query_info =
557  .info;
558  auto [db_id, table_id] = HashJoin::getInnerTableId(inner_outer_pairs_);
559  VLOG(1) << "Reify with layout " << getHashTypeString(layout) << "for db_id: " << db_id
560  << ", table_id: " << table_id;
561  if (query_info.fragments.empty()) {
562  return;
563  }
564 
565  auto bbox_intersect_max_table_size_bytes = g_bbox_intersect_max_table_size_bytes;
566  std::optional<double> bbox_intersect_threshold_override;
567  double bbox_intersect_target_entries_per_bin = g_bbox_intersect_target_entries_per_bin;
568  auto skip_hashtable_caching = false;
570  VLOG(1) << "Setting bounding box intersection bucket threshold "
571  "\'bbox_intersect_bucket_threshold\' via "
572  "query hint: "
574  bbox_intersect_threshold_override = query_hints_.bbox_intersect_bucket_threshold;
575  }
577  std::ostringstream oss;
578  oss << "User requests to change a threshold \'bbox_intersect_max_table_size_bytes\' "
579  "via "
580  "query hint";
581  if (!bbox_intersect_threshold_override.has_value()) {
582  oss << ": " << bbox_intersect_max_table_size_bytes << " -> "
584  bbox_intersect_max_table_size_bytes = query_hints_.bbox_intersect_max_size;
585  } else {
586  oss << ", but is skipped since the query hint also changes the threshold "
587  "\'bbox_intersect_bucket_threshold\'";
588  }
589  VLOG(1) << oss.str();
590  }
592  VLOG(1) << "User requests to skip caching join hashtable for bounding box "
593  "intersection and its tuned "
594  "parameters for this query";
595  skip_hashtable_caching = true;
596  }
598  VLOG(1) << "User requests to change a threshold \'bbox_intersect_keys_per_bin\' via "
599  "query "
600  "hint: "
601  << bbox_intersect_target_entries_per_bin << " -> "
603  bbox_intersect_target_entries_per_bin = query_hints_.bbox_intersect_keys_per_bin;
604  }
605 
606  auto data_mgr = executor_->getDataMgr();
607  // we prioritize CPU when building a join hashtable for bounding box intersection, but
608  // if we have GPU and user-given hint is given we selectively allow GPU to build it but
609  // even if we have GPU but user foces to set CPU as execution device type we should not
610  // allow to use GPU for building it
611  auto allow_gpu_hashtable_build =
614  if (allow_gpu_hashtable_build) {
615  if (data_mgr->gpusPresent() &&
617  VLOG(1) << "A user forces to build GPU hash table for bounding box intersection";
618  } else {
619  allow_gpu_hashtable_build = false;
620  VLOG(1) << "A user forces to build GPU hash table for bounding box intersection "
621  "but we skip it since either GPU is not presented or CPU execution mode "
622  "is set";
623  }
624  }
625 
626  std::vector<ColumnsForDevice> columns_per_device;
627  std::vector<std::unique_ptr<CudaAllocator>> dev_buff_owners;
629  allow_gpu_hashtable_build) {
630  for (int device_id = 0; device_id < device_count_; ++device_id) {
631  dev_buff_owners.emplace_back(std::make_unique<CudaAllocator>(
632  data_mgr, device_id, getQueryEngineCudaStreamForDevice(device_id)));
633  }
634  }
635 
636  std::vector<std::vector<Fragmenter_Namespace::FragmentInfo>> fragments_per_device;
637  const auto shard_count = shardCount();
638  size_t total_num_tuples = 0;
639  for (int device_id = 0; device_id < device_count_; ++device_id) {
640  fragments_per_device.emplace_back(
641  shard_count
642  ? only_shards_for_device(query_info.fragments, device_id, device_count_)
643  : query_info.fragments);
644  const size_t crt_num_tuples =
645  std::accumulate(fragments_per_device.back().begin(),
646  fragments_per_device.back().end(),
647  size_t(0),
648  [](const auto& sum, const auto& fragment) {
649  return sum + fragment.getNumTuples();
650  });
651  total_num_tuples += crt_num_tuples;
652  const auto columns_for_device =
653  fetchColumnsForDevice(fragments_per_device.back(),
654  device_id,
656  allow_gpu_hashtable_build
657  ? dev_buff_owners[device_id].get()
658  : nullptr);
659  columns_per_device.push_back(columns_for_device);
660  }
661 
662  // try to extract cache key for hash table and its relevant info
663  auto hashtable_access_path_info =
665  {},
666  condition_->get_optype(),
667  join_type_,
670  shard_count,
671  fragments_per_device,
672  executor_);
673  hashtable_cache_key_ = hashtable_access_path_info.hashed_query_plan_dag;
674  hashtable_cache_meta_info_ = hashtable_access_path_info.meta_info;
675  table_keys_ = hashtable_access_path_info.table_keys;
676 
677  auto get_inner_table_key = [this]() {
678  auto col_var = inner_outer_pairs_.front().first;
679  return col_var->getTableKey();
680  };
681 
682  if (table_keys_.empty()) {
683  const auto& table_key = get_inner_table_key();
686  }
687  CHECK(!table_keys_.empty());
688 
689  if (bbox_intersect_threshold_override) {
690  // compute bucket sizes based on the user provided threshold
691  BucketSizeTuner tuner(/*initial_threshold=*/*bbox_intersect_threshold_override,
692  /*step=*/1.0,
693  /*min_threshold=*/0.0,
695  columns_per_device,
697  total_num_tuples,
698  executor_);
699  const auto inverse_bucket_sizes = tuner.getInverseBucketSizes();
700 
701  auto [entry_count, emitted_keys_count] =
702  computeHashTableCounts(shard_count,
703  inverse_bucket_sizes,
704  columns_per_device,
705  bbox_intersect_max_table_size_bytes,
706  *bbox_intersect_threshold_override);
707  setInverseBucketSizeInfo(inverse_bucket_sizes, columns_per_device, device_count_);
708  // reifyImpl will check the hash table cache for an appropriate hash table w/ those
709  // bucket sizes (or within tolerances) if a hash table exists use it, otherwise build
710  // one
711  generateCacheKey(bbox_intersect_max_table_size_bytes,
712  *bbox_intersect_threshold_override,
713  inverse_bucket_sizes,
714  fragments_per_device,
715  device_count_);
716  reifyImpl(columns_per_device,
717  query_info,
718  layout,
719  shard_count,
720  entry_count,
721  emitted_keys_count,
722  skip_hashtable_caching,
723  bbox_intersect_max_table_size_bytes,
724  *bbox_intersect_threshold_override);
725  } else {
726  double bbox_intersect_bucket_threshold = std::numeric_limits<double>::max();
727  generateCacheKey(bbox_intersect_max_table_size_bytes,
728  bbox_intersect_bucket_threshold,
729  {},
730  fragments_per_device,
731  device_count_);
732  std::vector<size_t> per_device_chunk_key;
733  if (HashtableRecycler::isInvalidHashTableCacheKey(hashtable_cache_key_) &&
734  get_inner_table_key().table_id > 0) {
735  for (int device_id = 0; device_id < device_count_; ++device_id) {
737  boost::hash_combine(
738  chunk_key_hash,
739  HashJoin::collectFragmentIds(fragments_per_device[device_id]));
740  per_device_chunk_key.push_back(chunk_key_hash);
741  AlternativeCacheKeyForBoundingBoxIntersection cache_key{
743  columns_per_device.front().join_columns.front().num_elems,
744  chunk_key_hash,
745  condition_->get_optype(),
746  bbox_intersect_max_table_size_bytes,
747  bbox_intersect_bucket_threshold,
748  {}};
749  hashtable_cache_key_[device_id] = getAlternativeCacheKey(cache_key);
750  hash_table_cache_->addQueryPlanDagForTableKeys(hashtable_cache_key_[device_id],
751  table_keys_);
752  }
753  }
754 
755  auto cached_bucket_threshold = auto_tuner_cache_->getItemFromCache(
756  hashtable_cache_key_.front(),
759  if (cached_bucket_threshold) {
760  bbox_intersect_bucket_threshold = cached_bucket_threshold->bucket_threshold;
761  auto inverse_bucket_sizes = cached_bucket_threshold->bucket_sizes;
762  setBoundingBoxIntersectionMetaInfo(bbox_intersect_max_table_size_bytes,
763  bbox_intersect_bucket_threshold,
764  inverse_bucket_sizes);
765  generateCacheKey(bbox_intersect_max_table_size_bytes,
766  bbox_intersect_bucket_threshold,
767  inverse_bucket_sizes,
768  fragments_per_device,
769  device_count_);
770 
771  if (auto hash_table =
772  hash_table_cache_->getItemFromCache(hashtable_cache_key_[device_count_],
775  std::nullopt)) {
776  // if we already have a built hash table, we can skip the scans required for
777  // computing bucket size and tuple count
778  // reset as the hash table sizes can vary a bit
779  setInverseBucketSizeInfo(inverse_bucket_sizes, columns_per_device, device_count_);
780  CHECK(hash_table);
781 
782  VLOG(1) << "Using cached hash table bucket size";
783 
784  reifyImpl(columns_per_device,
785  query_info,
786  layout,
787  shard_count,
788  hash_table->getEntryCount(),
789  hash_table->getEmittedKeysCount(),
790  skip_hashtable_caching,
791  bbox_intersect_max_table_size_bytes,
792  bbox_intersect_bucket_threshold);
793  } else {
794  VLOG(1) << "Computing bucket size for cached bucket threshold";
795  // compute bucket size using our cached tuner value
796  BucketSizeTuner tuner(/*initial_threshold=*/bbox_intersect_bucket_threshold,
797  /*step=*/1.0,
798  /*min_threshold=*/0.0,
800  columns_per_device,
802  total_num_tuples,
803  executor_);
804 
805  const auto inverse_bucket_sizes = tuner.getInverseBucketSizes();
806 
807  auto [entry_count, emitted_keys_count] =
808  computeHashTableCounts(shard_count,
809  inverse_bucket_sizes,
810  columns_per_device,
811  bbox_intersect_max_table_size_bytes,
812  bbox_intersect_bucket_threshold);
813  setInverseBucketSizeInfo(inverse_bucket_sizes, columns_per_device, device_count_);
814 
815  generateCacheKey(bbox_intersect_max_table_size_bytes,
816  bbox_intersect_bucket_threshold,
817  inverse_bucket_sizes,
818  fragments_per_device,
819  device_count_);
820 
821  reifyImpl(columns_per_device,
822  query_info,
823  layout,
824  shard_count,
825  entry_count,
826  emitted_keys_count,
827  skip_hashtable_caching,
828  bbox_intersect_max_table_size_bytes,
829  bbox_intersect_bucket_threshold);
830  }
831  } else {
832  // compute bucket size using the auto tuner
833  BucketSizeTuner tuner(
834  /*initial_threshold=*/bbox_intersect_bucket_threshold,
835  /*step=*/2.0,
836  /*min_threshold=*/1e-7,
838  columns_per_device,
840  total_num_tuples,
841  executor_);
842 
843  VLOG(1) << "Running auto tune logic for bounding box intersection with parameters: "
844  << tuner;
845 
846  // manages the tuning state machine
847  TuningState tuning_state(bbox_intersect_max_table_size_bytes,
848  bbox_intersect_target_entries_per_bin);
849  while (tuner.tuneOneStep(tuning_state.tuning_direction)) {
850  const auto inverse_bucket_sizes = tuner.getInverseBucketSizes();
851 
852  const auto [crt_entry_count, crt_emitted_keys_count] =
853  computeHashTableCounts(shard_count,
854  inverse_bucket_sizes,
855  columns_per_device,
856  tuning_state.bbox_intersect_max_table_size_bytes,
857  tuning_state.chosen_bbox_intersect_threshold);
858  const size_t hash_table_size = calculateHashTableSize(
859  inverse_bucket_sizes.size(), crt_emitted_keys_count, crt_entry_count);
860  HashTableProps crt_props(crt_entry_count,
861  crt_emitted_keys_count,
862  hash_table_size,
863  inverse_bucket_sizes);
864  VLOG(1) << "Tuner output: " << tuner << " with properties " << crt_props;
865 
866  const auto should_continue = tuning_state(crt_props, tuner.getMinBucketSize());
868  tuning_state.crt_props.bucket_sizes, columns_per_device, device_count_);
869  if (!should_continue) {
870  break;
871  }
872  }
873 
874  const auto& crt_props = tuning_state.crt_props;
875  // sanity check that the hash table size has not changed. this is a fairly
876  // inexpensive check to ensure the above algorithm is consistent
877  const size_t hash_table_size =
879  crt_props.emitted_keys_count,
880  crt_props.entry_count);
881  CHECK_EQ(crt_props.hash_table_size, hash_table_size);
882 
884  hash_table_size > bbox_intersect_max_table_size_bytes) {
885  VLOG(1) << "Could not find suitable parameters to create hash "
886  "table for bounding box intersectionunder max allowed size ("
887  << bbox_intersect_max_table_size_bytes << ") bytes.";
888  throw TooBigHashTableForBoundingBoxIntersect(bbox_intersect_max_table_size_bytes);
889  }
890 
891  VLOG(1) << "Final tuner output: " << tuner << " with properties " << crt_props;
893  VLOG(1) << "Final bucket sizes: ";
894  for (size_t dim = 0; dim < inverse_bucket_sizes_for_dimension_.size(); dim++) {
895  VLOG(1) << "dim[" << dim
896  << "]: " << 1.0 / inverse_bucket_sizes_for_dimension_[dim];
897  }
898  CHECK_GE(tuning_state.chosen_bbox_intersect_threshold, double(0));
899  generateCacheKey(tuning_state.bbox_intersect_max_table_size_bytes,
900  tuning_state.chosen_bbox_intersect_threshold,
901  {},
902  fragments_per_device,
903  device_count_);
904  const auto candidate_auto_tuner_cache_key = hashtable_cache_key_.front();
905  if (skip_hashtable_caching) {
906  VLOG(1) << "Skip to add tuned parameters to auto tuner";
907  } else {
908  AutoTunerMetaInfo meta_info{tuning_state.bbox_intersect_max_table_size_bytes,
909  tuning_state.chosen_bbox_intersect_threshold,
911  auto_tuner_cache_->putItemToCache(candidate_auto_tuner_cache_key,
912  meta_info,
915  0,
916  0);
917  }
918  bbox_intersect_bucket_threshold = tuning_state.chosen_bbox_intersect_threshold;
919  reifyImpl(columns_per_device,
920  query_info,
921  layout,
922  shard_count,
923  crt_props.entry_count,
924  crt_props.emitted_keys_count,
925  skip_hashtable_caching,
926  bbox_intersect_max_table_size_bytes,
927  bbox_intersect_bucket_threshold);
928  }
929  }
930 }
static std::vector< int > collectFragmentIds(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments)
Definition: HashJoin.cpp:461
static std::unique_ptr< BoundingBoxIntersectTuningParamRecycler > auto_tuner_cache_
#define CHECK_EQ(x, y)
Definition: Logger.h:301
void setBoundingBoxIntersectionMetaInfo(size_t max_table_size_bytes, double bucket_threshold, std::vector< double > &bucket_sizes)
virtual std::pair< size_t, size_t > computeHashTableCounts(const size_t shard_count, const std::vector< double > &inverse_bucket_sizes_for_dimension, std::vector< ColumnsForDevice > &columns_per_device, const size_t chosen_max_hashtable_size, const double chosen_bucket_threshold)
Fragmenter_Namespace::TableInfo info
Definition: InputMetadata.h:35
Data_Namespace::MemoryLevel getEffectiveMemoryLevel(const std::vector< InnerOuter > &inner_outer_pairs) const
size_t calculateHashTableSize(size_t number_of_dimensions, size_t emitted_keys_count, size_t entry_count) const
static bool isInvalidHashTableCacheKey(const std::vector< QueryPlanHash > &cache_keys)
std::vector< ChunkKey > cache_key_chunks
Definition: HashJoin.h:129
double bbox_intersect_keys_per_bin
Definition: QueryHint.h:353
const std::shared_ptr< Analyzer::BinOper > condition_
#define CHECK_GE(x, y)
Definition: Logger.h:306
const InputTableInfo & get_inner_query_info(const shared::TableKey &inner_table_key, const std::vector< InputTableInfo > &query_infos)
double g_bbox_intersect_target_entries_per_bin
Definition: Execute.cpp:111
void setInverseBucketSizeInfo(const std::vector< double > &inverse_bucket_sizes, std::vector< ColumnsForDevice > &columns_per_device, const size_t device_count)
ColumnsForDevice fetchColumnsForDevice(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, DeviceAllocator *dev_buff_owner)
const Data_Namespace::MemoryLevel memory_level_
double bbox_intersect_bucket_threshold
Definition: QueryHint.h:348
static std::unique_ptr< HashtableRecycler > hash_table_cache_
void generateCacheKey(const size_t max_hashtable_size, const double bucket_threshold, const std::vector< double > &bucket_sizes, std::vector< std::vector< Fragmenter_Namespace::FragmentInfo >> &fragments_per_device, int device_count)
std::vector< Fragmenter_Namespace::FragmentInfo > only_shards_for_device(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, const int device_count)
const std::vector< InputTableInfo > & query_infos_
DEVICE auto accumulate(ARGS &&...args)
Definition: gpu_enabled.h:42
virtual void reifyImpl(std::vector< ColumnsForDevice > &columns_per_device, const Fragmenter_Namespace::TableInfo &query_info, const HashType layout, const size_t shard_count, const size_t entry_count, const size_t emitted_keys_count, const bool skip_hashtable_caching, const size_t chosen_max_hashtable_size, const double chosen_bucket_threshold)
static std::unordered_set< size_t > getAlternativeTableKeys(const std::vector< ChunkKey > &chunk_keys, const shared::TableKey &inner_table_key)
Definition: DataRecycler.h:154
QueryPlanHash getAlternativeCacheKey(AlternativeCacheKeyForBoundingBoxIntersection &info)
virtual shared::TableKey getInnerTableId() const noexcept=0
static std::string getHashTypeString(HashType ht) noexcept
Definition: HashJoin.h:179
bool isHintRegistered(const QueryHint hint) const
Definition: QueryHint.h:383
size_t bbox_intersect_max_size
Definition: QueryHint.h:350
CUstream getQueryEngineCudaStreamForDevice(int device_num)
Definition: QueryEngine.cpp:7
std::size_t hash_value(RexAbstractInput const &rex_ab_input)
Definition: RelAlgDag.cpp:3525
#define CHECK(condition)
Definition: Logger.h:291
#define DEBUG_TIMER(name)
Definition: Logger.h:412
bool bbox_intersect_allow_gpu_build
Definition: QueryHint.h:351
static constexpr DeviceIdentifier CPU_DEVICE_IDENTIFIER
Definition: DataRecycler.h:136
static HashtableAccessPathInfo getHashtableAccessPathInfo(const std::vector< InnerOuter > &inner_outer_pairs, const std::vector< InnerOuterStringOpInfos > &inner_outer_string_op_infos_pairs, const SQLOps op_type, const JoinType join_type, const HashTableBuildDagMap &hashtable_build_dag_map, int device_count, int shard_count, const std::vector< std::vector< Fragmenter_Namespace::FragmentInfo >> &frags_for_device, Executor *executor)
#define VLOG(n)
Definition: Logger.h:388
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
Definition: HashJoin.h:175
size_t g_bbox_intersect_max_table_size_bytes
Definition: Execute.cpp:110

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void BoundingBoxIntersectJoinHashTable::setBoundingBoxIntersectionMetaInfo ( size_t  max_table_size_bytes,
double  bucket_threshold,
std::vector< double > &  bucket_sizes 
)
inlineprotected

Definition at line 368 of file BoundingBoxIntersectJoinHashTable.h.

References BoundingBoxIntersectMetaInfo::bbox_intersect_bucket_threshold, BoundingBoxIntersectMetaInfo::bbox_intersect_max_table_size_bytes, HashtableCacheMetaInfo::bbox_intersect_meta_info, BoundingBoxIntersectMetaInfo::bucket_sizes, and hashtable_cache_meta_info_.

Referenced by reifyImpl(), RangeJoinHashTable::reifyWithLayout(), and reifyWithLayout().

370  {
371  BoundingBoxIntersectMetaInfo bbox_intersect_meta_info;
372  bbox_intersect_meta_info.bucket_sizes = bucket_sizes;
373  bbox_intersect_meta_info.bbox_intersect_max_table_size_bytes = max_table_size_bytes;
374  bbox_intersect_meta_info.bbox_intersect_bucket_threshold = bucket_threshold;
375  HashtableCacheMetaInfo meta_info;
376  meta_info.bbox_intersect_meta_info = bbox_intersect_meta_info;
377  hashtable_cache_meta_info_ = meta_info;
378  }
std::optional< BoundingBoxIntersectMetaInfo > bbox_intersect_meta_info
std::vector< double > bucket_sizes

+ Here is the caller graph for this function:

void BoundingBoxIntersectJoinHashTable::setInverseBucketSizeInfo ( const std::vector< double > &  inverse_bucket_sizes,
std::vector< ColumnsForDevice > &  columns_per_device,
const size_t  device_count 
)
protected

Definition at line 1175 of file BoundingBoxIntersectJoinHashTable.cpp.

References CHECK_EQ, inner_outer_pairs_, and inverse_bucket_sizes_for_dimension_.

Referenced by RangeJoinHashTable::reifyWithLayout(), and reifyWithLayout().

1178  {
1179  // set global bucket size
1180  inverse_bucket_sizes_for_dimension_ = inverse_bucket_sizes;
1181 
1182  // re-compute bucket counts per device based on global bucket size
1183  CHECK_EQ(columns_per_device.size(), static_cast<size_t>(device_count));
1184  for (size_t device_id = 0; device_id < device_count; ++device_id) {
1185  auto& columns_for_device = columns_per_device[device_id];
1186  columns_for_device.setBucketInfo(inverse_bucket_sizes_for_dimension_,
1188  }
1189 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301

+ Here is the caller graph for this function:

size_t BoundingBoxIntersectJoinHashTable::shardCount ( ) const
inlineprotected

Definition at line 232 of file BoundingBoxIntersectJoinHashTable.h.

References condition_, executor_, BaselineJoinHashTable::getShardCountForCondition(), Data_Namespace::GPU_LEVEL, inner_outer_pairs_, and memory_level_.

Referenced by RangeJoinHashTable::reifyWithLayout(), and reifyWithLayout().

232  {
234  return 0;
235  }
238  }
const std::shared_ptr< Analyzer::BinOper > condition_
const Data_Namespace::MemoryLevel memory_level_
static size_t getShardCountForCondition(const Analyzer::BinOper *condition, const Executor *executor, const std::vector< InnerOuter > &inner_outer_pairs)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::set< DecodedJoinHashBufferEntry > BoundingBoxIntersectJoinHashTable::toSet ( const ExecutorDeviceType  device_type,
const int  device_id 
) const
overrideprotectedvirtual

Implements HashJoin.

Definition at line 1800 of file BoundingBoxIntersectJoinHashTable.cpp.

References CHECK, countBufferOff(), HashJoin::getHashTableForDevice(), getHashType(), HashJoin::getJoinHashBuffer(), getKeyComponentCount(), getKeyComponentWidth(), getQueryEngineCudaStreamForDevice(), GPU, offsetBufferOff(), OneToOne, payloadBufferOff(), and HashTable::toSet().

1802  {
1803  auto buffer = getJoinHashBuffer(device_type, device_id);
1804  auto hash_table = getHashTableForDevice(device_id);
1805  CHECK(hash_table);
1806  auto buffer_size = hash_table->getHashTableBufferSize(device_type);
1807 #ifdef HAVE_CUDA
1808  std::unique_ptr<int8_t[]> buffer_copy;
1809  if (device_type == ExecutorDeviceType::GPU) {
1810  buffer_copy = std::make_unique<int8_t[]>(buffer_size);
1811  CHECK(executor_);
1812  auto data_mgr = executor_->getDataMgr();
1813  auto allocator = std::make_unique<CudaAllocator>(
1814  data_mgr, device_id, getQueryEngineCudaStreamForDevice(device_id));
1815 
1816  allocator->copyFromDevice(buffer_copy.get(), buffer, buffer_size);
1817  }
1818  auto ptr1 = buffer_copy ? buffer_copy.get() : reinterpret_cast<const int8_t*>(buffer);
1819 #else
1820  auto ptr1 = reinterpret_cast<const int8_t*>(buffer);
1821 #endif // HAVE_CUDA
1822  auto ptr2 = ptr1 + offsetBufferOff();
1823  auto ptr3 = ptr1 + countBufferOff();
1824  auto ptr4 = ptr1 + payloadBufferOff();
1825  const auto layout = getHashType();
1826  return HashTable::toSet(getKeyComponentCount() + (layout == HashType::OneToOne ? 1 : 0),
1828  hash_table->getEntryCount(),
1829  ptr1,
1830  ptr2,
1831  ptr3,
1832  ptr4,
1833  buffer_size);
1834 }
int8_t * getJoinHashBuffer(const ExecutorDeviceType device_type, const int device_id) const
Definition: HashJoin.h:314
size_t payloadBufferOff() const noexceptoverride
HashTable * getHashTableForDevice(const size_t device_id) const
Definition: HashJoin.h:295
HashType getHashType() const noexceptoverride
CUstream getQueryEngineCudaStreamForDevice(int device_num)
Definition: QueryEngine.cpp:7
#define CHECK(condition)
Definition: Logger.h:291
static DecodedJoinHashBufferSet toSet(size_t key_component_count, size_t key_component_width, size_t entry_count, const int8_t *ptr1, const int8_t *ptr2, const int8_t *ptr3, const int8_t *ptr4, size_t buffer_size)
Decode hash table into a std::set for easy inspection and validation.
Definition: HashTable.cpp:139

+ Here is the call graph for this function:

std::string BoundingBoxIntersectJoinHashTable::toString ( const ExecutorDeviceType  device_type,
const int  device_id = 0,
bool  raw = false 
) const
overrideprotectedvirtual

Implements HashJoin.

Definition at line 1754 of file BoundingBoxIntersectJoinHashTable.cpp.

References CHECK, CHECK_LT, countBufferOff(), getHashType(), HashJoin::getHashTypeString(), HashJoin::getJoinHashBuffer(), getKeyComponentCount(), getKeyComponentWidth(), getQueryEngineCudaStreamForDevice(), GPU, HashJoin::hash_tables_for_device_, offsetBufferOff(), OneToOne, payloadBufferOff(), and HashTable::toString().

1757  {
1758  auto buffer = getJoinHashBuffer(device_type, device_id);
1759  if (!buffer) {
1760  return "EMPTY";
1761  }
1762  CHECK_LT(static_cast<size_t>(device_id), hash_tables_for_device_.size());
1763  auto hash_table = hash_tables_for_device_[device_id];
1764  CHECK(hash_table);
1765  auto buffer_size = hash_table->getHashTableBufferSize(device_type);
1766 #ifdef HAVE_CUDA
1767  std::unique_ptr<int8_t[]> buffer_copy;
1768  if (device_type == ExecutorDeviceType::GPU) {
1769  buffer_copy = std::make_unique<int8_t[]>(buffer_size);
1770  CHECK(executor_);
1771  auto data_mgr = executor_->getDataMgr();
1772  auto device_allocator = std::make_unique<CudaAllocator>(
1773  data_mgr, device_id, getQueryEngineCudaStreamForDevice(device_id));
1774 
1775  device_allocator->copyFromDevice(buffer_copy.get(), buffer, buffer_size);
1776  }
1777  auto ptr1 = buffer_copy ? buffer_copy.get() : reinterpret_cast<const int8_t*>(buffer);
1778 #else
1779  auto ptr1 = reinterpret_cast<const int8_t*>(buffer);
1780 #endif // HAVE_CUDA
1781  auto ptr2 = ptr1 + offsetBufferOff();
1782  auto ptr3 = ptr1 + countBufferOff();
1783  auto ptr4 = ptr1 + payloadBufferOff();
1784  CHECK(hash_table);
1785  const auto layout = getHashType();
1786  return HashTable::toString(
1787  "geo",
1788  getHashTypeString(layout),
1789  getKeyComponentCount() + (layout == HashType::OneToOne ? 1 : 0),
1791  hash_table->getEntryCount(),
1792  ptr1,
1793  ptr2,
1794  ptr3,
1795  ptr4,
1796  buffer_size,
1797  raw);
1798 }
std::vector< std::shared_ptr< HashTable > > hash_tables_for_device_
Definition: HashJoin.h:377
int8_t * getJoinHashBuffer(const ExecutorDeviceType device_type, const int device_id) const
Definition: HashJoin.h:314
size_t payloadBufferOff() const noexceptoverride
#define CHECK_LT(x, y)
Definition: Logger.h:303
HashType getHashType() const noexceptoverride
static std::string getHashTypeString(HashType ht) noexcept
Definition: HashJoin.h:179
static std::string toString(const std::string &type, const std::string &layout_type, size_t key_component_count, size_t key_component_width, size_t entry_count, const int8_t *ptr1, const int8_t *ptr2, const int8_t *ptr3, const int8_t *ptr4, size_t buffer_size, bool raw=false)
Decode hash table into a human-readable string.
Definition: HashTable.cpp:226
CUstream getQueryEngineCudaStreamForDevice(int device_num)
Definition: QueryEngine.cpp:7
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

Member Data Documentation

std::unique_ptr< BoundingBoxIntersectTuningParamRecycler > BoundingBoxIntersectJoinHashTable::auto_tuner_cache_
staticprotected
Initial value:
=
std::make_unique<BoundingBoxIntersectTuningParamRecycler>()

Definition at line 411 of file BoundingBoxIntersectJoinHashTable.h.

Referenced by getBoundingBoxIntersectTuningParamCache(), invalidateCache(), markCachedItemAsDirty(), and reifyWithLayout().

double BoundingBoxIntersectJoinHashTable::chosen_bbox_intersect_bucket_threshold_
protected

Definition at line 393 of file BoundingBoxIntersectJoinHashTable.h.

Referenced by reifyImpl().

size_t BoundingBoxIntersectJoinHashTable::chosen_bbox_intersect_max_table_size_bytes_
protected

Definition at line 394 of file BoundingBoxIntersectJoinHashTable.h.

Referenced by reifyImpl().

ColumnCacheMap& BoundingBoxIntersectJoinHashTable::column_cache_
protected

Definition at line 386 of file BoundingBoxIntersectJoinHashTable.h.

Referenced by fetchColumnsForDevice().

CompositeKeyInfo BoundingBoxIntersectJoinHashTable::composite_key_info_
protected
const std::shared_ptr<Analyzer::BinOper> BoundingBoxIntersectJoinHashTable::condition_
protected
std::mutex BoundingBoxIntersectJoinHashTable::cpu_hash_table_buff_mutex_
protected
HashTableBuildDagMap BoundingBoxIntersectJoinHashTable::hashtable_build_dag_map_
protected
HashtableCacheMetaInfo BoundingBoxIntersectJoinHashTable::hashtable_cache_meta_info_
protected
const JoinType BoundingBoxIntersectJoinHashTable::join_type_
protected
std::optional<HashType> BoundingBoxIntersectJoinHashTable::layout_override_
protected
const std::vector<InputTableInfo>& BoundingBoxIntersectJoinHashTable::query_infos_
protected
QueryPlanDAG BoundingBoxIntersectJoinHashTable::query_plan_dag_
protected

Definition at line 414 of file BoundingBoxIntersectJoinHashTable.h.

const TableIdToNodeMap BoundingBoxIntersectJoinHashTable::table_id_to_node_map_
protected

Definition at line 418 of file BoundingBoxIntersectJoinHashTable.h.

std::unordered_set<size_t> BoundingBoxIntersectJoinHashTable::table_keys_
protected

The documentation for this class was generated from the following files: