OmniSciDB  d2f719934e
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
RangeJoinHashTable Class Referencefinal

#include <RangeJoinHashTable.h>

+ Inheritance diagram for RangeJoinHashTable:
+ Collaboration diagram for RangeJoinHashTable:

Public Member Functions

 RangeJoinHashTable (const std::shared_ptr< Analyzer::BinOper > condition, const JoinType join_type, const Analyzer::RangeOper *range_expr, std::shared_ptr< Analyzer::ColumnVar > inner_col_expr, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, ColumnCacheMap &column_cache, Executor *executor, const std::vector< InnerOuter > &inner_outer_pairs, const int device_count, QueryPlan query_plan_dag, HashtableCacheMetaInfo hashtable_cache_meta_info, const HashTableBuildDagMap &hashtable_build_dag_map, const TableIdToNodeMap &table_id_to_node_map)
 
 ~RangeJoinHashTable () override=default
 
llvm::Value * codegenKey (const CompilationOptions &co, llvm::Value *offset)
 
HashJoinMatchingSet codegenMatchingSetWithOffset (const CompilationOptions &, const size_t, llvm::Value *)
 
- Public Member Functions inherited from OverlapsJoinHashTable
 OverlapsJoinHashTable (const std::shared_ptr< Analyzer::BinOper > condition, const JoinType join_type, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, ColumnCacheMap &column_cache, Executor *executor, const std::vector< InnerOuter > &inner_outer_pairs, const int device_count, QueryPlan query_plan_dag, HashtableCacheMetaInfo hashtable_cache_meta_info, const TableIdToNodeMap &table_id_to_node_map)
 
virtual ~OverlapsJoinHashTable ()
 
- Public Member Functions inherited from HashJoin
virtual std::string toStringFlat64 (const ExecutorDeviceType device_type, const int device_id) const
 
virtual std::string toStringFlat32 (const ExecutorDeviceType device_type, const int device_id) const
 
JoinColumn fetchJoinColumn (const Analyzer::ColumnVar *hash_col, const std::vector< Fragmenter_Namespace::FragmentInfo > &fragment_info, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks_owner, DeviceAllocator *dev_buff_owner, std::vector< std::shared_ptr< void >> &malloc_owner, Executor *executor, ColumnCacheMap *column_cache)
 
HashTablegetHashTableForDevice (const size_t device_id) const
 
size_t getJoinHashBufferSize (const ExecutorDeviceType device_type)
 
size_t getJoinHashBufferSize (const ExecutorDeviceType device_type, const int device_id) const
 
int8_t * getJoinHashBuffer (const ExecutorDeviceType device_type, const int device_id) const
 
void freeHashBufferMemory ()
 

Static Public Member Functions

static std::shared_ptr
< RangeJoinHashTable
getInstance (const std::shared_ptr< Analyzer::BinOper > condition, const Analyzer::RangeOper *range_expr, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)
 
- Static Public Member Functions inherited from OverlapsJoinHashTable
static std::shared_ptr
< OverlapsJoinHashTable
getInstance (const std::shared_ptr< Analyzer::BinOper > condition, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)
 Make hash table from an in-flight SQL query's parse tree etc. More...
 
static auto getCacheInvalidator () -> std::function< void()>
 
static HashtableRecyclergetHashTableCache ()
 
static
OverlapsTuningParamRecycler
getOverlapsTuningParamCache ()
 
- Static Public Member Functions inherited from HashJoin
static bool layoutRequiresAdditionalBuffers (HashType layout) noexcept
 
static std::string getHashTypeString (HashType ht) noexcept
 
static HashJoinMatchingSet codegenMatchingSet (const std::vector< llvm::Value * > &hash_join_idx_args_in, const bool is_sharded, const bool col_is_nullable, const bool is_bw_eq, const int64_t sub_buff_size, Executor *executor, const bool is_bucketized=false)
 
static llvm::Value * codegenHashTableLoad (const size_t table_idx, Executor *executor)
 
static std::shared_ptr< HashJoingetInstance (const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)
 Make hash table from an in-flight SQL query's parse tree etc. More...
 
static std::shared_ptr< HashJoingetSyntheticInstance (std::string_view table1, std::string_view column1, std::string_view table2, std::string_view column2, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
 Make hash table from named tables and columns (such as for testing). More...
 
static std::shared_ptr< HashJoingetSyntheticInstance (const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
 Make hash table from named tables and columns (such as for testing). More...
 
static std::pair< std::string,
std::shared_ptr< HashJoin > > 
getSyntheticInstance (std::vector< std::shared_ptr< Analyzer::BinOper >>, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
 
static int getInnerTableId (const std::vector< InnerOuter > &inner_outer_pairs)
 
static void checkHashJoinReplicationConstraint (const int table_id, const size_t shard_count, const Executor *executor)
 
static InnerOuter normalizeColumnPair (const Analyzer::Expr *lhs, const Analyzer::Expr *rhs, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables, const bool is_overlaps_join=false)
 
static std::vector< InnerOuternormalizeColumnPairs (const Analyzer::BinOper *condition, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables)
 
static CompositeKeyInfo getCompositeKeyInfo (const std::vector< InnerOuter > &inner_outer_pairs, const Executor *executor)
 

Protected Member Functions

void reifyWithLayout (const HashType layout) override
 
void reifyForDevice (const ColumnsForDevice &columns_for_device, const HashType layout, const size_t entry_count, const size_t emitted_keys_count, const int device_id, const logger::ThreadId parent_thread_id)
 
std::shared_ptr
< BaselineHashTable
initHashTableOnCpu (const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_bucket_info, const HashType layout, const size_t entry_count, const size_t emitted_keys_count)
 
HashType getHashType () const noexceptoverride
 
std::pair< size_t, size_t > approximateTupleCount (const std::vector< double > &inverse_bucket_sizes_for_dimension, std::vector< ColumnsForDevice > &columns_per_device, const size_t chosen_max_hashtable_size, const double chosen_bucket_threshold) override
 
std::pair< size_t, size_t > computeRangeHashTableCounts (const size_t shard_count, std::vector< ColumnsForDevice > &columns_per_device)
 
- Protected Member Functions inherited from OverlapsJoinHashTable
void reify (const HashType preferred_layout)
 
virtual void reifyImpl (std::vector< ColumnsForDevice > &columns_per_device, const Fragmenter_Namespace::TableInfo &query_info, const HashType layout, const size_t shard_count, const size_t entry_count, const size_t emitted_keys_count, const bool skip_hashtable_caching, const size_t chosen_max_hashtable_size, const double chosen_bucket_threshold)
 
void reifyForDevice (const ColumnsForDevice &columns_for_device, const HashType layout, const size_t entry_count, const size_t emitted_keys_count, const bool skip_hashtable_caching, const int device_id, const logger::ThreadId parent_thread_id)
 
size_t calculateHashTableSize (size_t number_of_dimensions, size_t emitted_keys_count, size_t entry_count) const
 
ColumnsForDevice fetchColumnsForDevice (const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, DeviceAllocator *dev_buff_owner)
 
virtual std::pair< size_t, size_t > computeHashTableCounts (const size_t shard_count, const std::vector< double > &inverse_bucket_sizes_for_dimension, std::vector< ColumnsForDevice > &columns_per_device, const size_t chosen_max_hashtable_size, const double chosen_bucket_threshold)
 
void setInverseBucketSizeInfo (const std::vector< double > &inverse_bucket_sizes, std::vector< ColumnsForDevice > &columns_per_device, const size_t device_count)
 
size_t getKeyComponentWidth () const
 
size_t getKeyComponentCount () const
 
Data_Namespace::MemoryLevel getMemoryLevel () const noexceptoverride
 
int getDeviceCount () const noexceptoverride
 
std::shared_ptr
< BaselineHashTable
initHashTableOnCpu (const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_bucket_info, const HashType layout, const size_t entry_count, const size_t emitted_keys_count, const bool skip_hashtable_caching)
 
HashJoinMatchingSet codegenMatchingSet (const CompilationOptions &, const size_t) override
 
std::string toString (const ExecutorDeviceType device_type, const int device_id=0, bool raw=false) const override
 
DecodedJoinHashBufferSet toSet (const ExecutorDeviceType device_type, const int device_id) const override
 
llvm::Value * codegenSlot (const CompilationOptions &, const size_t) override
 
const RegisteredQueryHintgetRegisteredQueryHint ()
 
void registerQueryHint (const RegisteredQueryHint &query_hint)
 
size_t getEntryCount () const
 
size_t getEmittedKeysCount () const
 
size_t getComponentBufferSize () const noexceptoverride
 
size_t shardCount () const
 
Data_Namespace::MemoryLevel getEffectiveMemoryLevel (const std::vector< InnerOuter > &inner_outer_pairs) const
 
int getInnerTableId () const noexceptoverride
 
int getInnerTableRteIdx () const noexceptoverride
 
size_t getKeyBufferSize () const noexcept
 
size_t offsetBufferOff () const noexceptoverride
 
size_t countBufferOff () const noexceptoverride
 
size_t payloadBufferOff () const noexceptoverride
 
std::string getHashJoinType () const final
 
bool isBitwiseEq () const override
 
std::shared_ptr< HashTableinitHashTableOnCpuFromCache (QueryPlanHash key, CacheItemType item_type, DeviceIdentifier device_identifier)
 
std::optional< std::pair
< size_t, size_t > > 
getApproximateTupleCountFromCache (QueryPlanHash key, CacheItemType item_type, DeviceIdentifier device_identifier)
 
void putHashTableOnCpuToCache (QueryPlanHash key, CacheItemType item_type, std::shared_ptr< HashTable > hashtable_ptr, DeviceIdentifier device_identifier, size_t hashtable_building_time)
 
llvm::Value * codegenKey (const CompilationOptions &)
 
std::vector< llvm::Value * > codegenManyKey (const CompilationOptions &)
 
std::optional
< OverlapsHashTableMetaInfo
getOverlapsHashTableMetaInfo ()
 
QueryPlanHash getAlternativeCacheKey (AlternativeCacheKeyForOverlapsHashJoin &info)
 
void generateCacheKey (const size_t max_hashtable_size, const double bucket_threshold)
 
QueryPlanHash getCacheKey () const
 
const std::vector< InnerOuter > & getInnerOuterPairs () const
 
void setOverlapsHashtableMetaInfo (size_t max_table_size_bytes, double bucket_threshold, std::vector< double > &bucket_sizes)
 

Private Member Functions

bool isInnerColCompressed () const
 
bool isProbeCompressed () const
 

Private Attributes

const Analyzer::RangeOperrange_expr_
 
std::shared_ptr
< Analyzer::ColumnVar
inner_col_expr_
 
const double bucket_threshold_ {std::numeric_limits<double>::max()}
 
const size_t max_hashtable_size_ {std::numeric_limits<size_t>::max()}
 
HashtableCacheMetaInfo overlaps_hashtable_cache_meta_info_
 

Additional Inherited Members

- Protected Attributes inherited from OverlapsJoinHashTable
const std::shared_ptr
< Analyzer::BinOper
condition_
 
const JoinType join_type_
 
const std::vector
< InputTableInfo > & 
query_infos_
 
const Data_Namespace::MemoryLevel memory_level_
 
Executorexecutor_
 
ColumnCacheMapcolumn_cache_
 
std::vector< InnerOuterinner_outer_pairs_
 
const int device_count_
 
std::vector< double > inverse_bucket_sizes_for_dimension_
 
double chosen_overlaps_bucket_threshold_
 
size_t chosen_overlaps_max_table_size_bytes_
 
CompositeKeyInfo composite_key_info_
 
std::optional< HashTypelayout_override_
 
std::mutex cpu_hash_table_buff_mutex_
 
RegisteredQueryHint query_hint_
 
QueryPlan query_plan_dag_
 
const TableIdToNodeMap table_id_to_node_map_
 
QueryPlanHash hashtable_cache_key_
 
HashtableCacheMetaInfo hashtable_cache_meta_info_
 
- Protected Attributes inherited from HashJoin
std::vector< std::shared_ptr
< HashTable > > 
hash_tables_for_device_
 
- Static Protected Attributes inherited from OverlapsJoinHashTable
static std::unique_ptr
< HashtableRecycler
hash_table_cache_
 
static std::unique_ptr
< OverlapsTuningParamRecycler
auto_tuner_cache_
 

Detailed Description

Definition at line 21 of file RangeJoinHashTable.h.

Constructor & Destructor Documentation

RangeJoinHashTable::RangeJoinHashTable ( const std::shared_ptr< Analyzer::BinOper condition,
const JoinType  join_type,
const Analyzer::RangeOper range_expr,
std::shared_ptr< Analyzer::ColumnVar inner_col_expr,
const std::vector< InputTableInfo > &  query_infos,
const Data_Namespace::MemoryLevel  memory_level,
ColumnCacheMap column_cache,
Executor executor,
const std::vector< InnerOuter > &  inner_outer_pairs,
const int  device_count,
QueryPlan  query_plan_dag,
HashtableCacheMetaInfo  hashtable_cache_meta_info,
const HashTableBuildDagMap hashtable_build_dag_map,
const TableIdToNodeMap table_id_to_node_map 
)
inline

Definition at line 23 of file RangeJoinHashTable.h.

37  : OverlapsJoinHashTable(condition,
38  join_type,
39  query_infos,
40  memory_level,
41  column_cache,
42  executor,
43  inner_outer_pairs,
44  device_count,
45  query_plan_dag,
46  hashtable_cache_meta_info,
47  table_id_to_node_map)
48  , range_expr_(range_expr)
49  , inner_col_expr_(std::move(inner_col_expr)) {}
std::shared_ptr< Analyzer::ColumnVar > inner_col_expr_
OverlapsJoinHashTable(const std::shared_ptr< Analyzer::BinOper > condition, const JoinType join_type, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, ColumnCacheMap &column_cache, Executor *executor, const std::vector< InnerOuter > &inner_outer_pairs, const int device_count, QueryPlan query_plan_dag, HashtableCacheMetaInfo hashtable_cache_meta_info, const TableIdToNodeMap &table_id_to_node_map)
const Analyzer::RangeOper * range_expr_
RangeJoinHashTable::~RangeJoinHashTable ( )
overridedefault

Member Function Documentation

std::pair< size_t, size_t > RangeJoinHashTable::approximateTupleCount ( const std::vector< double > &  inverse_bucket_sizes_for_dimension,
std::vector< ColumnsForDevice > &  columns_per_device,
const size_t  chosen_max_hashtable_size,
const double  chosen_bucket_threshold 
)
overrideprotectedvirtual

Reimplemented from OverlapsJoinHashTable.

Definition at line 472 of file RangeJoinHashTable.cpp.

References approximate_distinct_tuples_on_device_range(), approximate_distinct_tuples_range(), threading_serial::async(), Bitmap, CHECK, CHECK_EQ, CHECK_GT, CPU, DataRecyclerUtil::CPU_DEVICE_IDENTIFIER, Data_Namespace::CPU_LEVEL, cpu_threads(), OverlapsJoinHashTable::device_count_, OverlapsJoinHashTable::executor_, OverlapsJoinHashTable::getApproximateTupleCountFromCache(), HashJoin::getCompositeKeyInfo(), OverlapsJoinHashTable::getEffectiveMemoryLevel(), GPU, Data_Namespace::GPU_LEVEL, OverlapsJoinHashTable::hashtable_cache_key_, hll_size(), hll_unify(), i, CountDistinctDescriptor::impl_type_, OverlapsJoinHashTable::inner_outer_pairs_, isInnerColCompressed(), OVERLAPS_HT, transfer_flat_object_to_gpu(), transfer_vector_of_flat_objects_to_gpu(), UNREACHABLE, and VLOG.

Referenced by computeRangeHashTableCounts().

476  {
477  const auto effective_memory_level = getEffectiveMemoryLevel(inner_outer_pairs_);
478 #ifdef _WIN32
479  // WIN32 needs have C++20 set for designated initialisation to work
480  CountDistinctDescriptor count_distinct_desc{
482  0,
483  11,
484  true,
485  effective_memory_level == Data_Namespace::MemoryLevel::GPU_LEVEL
488  1,
489  };
490 #else
491  CountDistinctDescriptor count_distinct_desc{
493  .min_val = 0,
494  .bitmap_sz_bits = 11,
495  .approximate = true,
496  .device_type = effective_memory_level == Data_Namespace::MemoryLevel::GPU_LEVEL
499  .sub_bitmap_count = 1,
500  };
501 #endif
502  const auto padded_size_bytes = count_distinct_desc.bitmapPaddedSizeBytes();
503 
504  CHECK(!columns_per_device.empty() && !columns_per_device.front().join_columns.empty());
505  if (columns_per_device.front().join_columns.front().num_elems == 0) {
506  return std::make_pair(0, 0);
507  }
508 
509  for (auto& columns_for_device : columns_per_device) {
510  columns_for_device.setBucketInfo(inverse_bucket_sizes_for_dimension,
512  }
513 
514  // Number of keys must match dimension of buckets
515  CHECK_EQ(columns_per_device.front().join_columns.size(),
516  columns_per_device.front().join_buckets.size());
517  if (effective_memory_level == Data_Namespace::MemoryLevel::CPU_LEVEL) {
518  const auto composite_key_info =
520 
521  const auto cached_count_info =
525  if (cached_count_info.has_value() && cached_count_info.value().first) {
526  VLOG(1) << "Using a cached tuple count: " << cached_count_info.value().first
527  << ", emitted keys count: " << cached_count_info.value().second;
528  return std::make_pair(cached_count_info.value().first,
529  cached_count_info.value().second);
530  }
531  int thread_count = cpu_threads();
532  std::vector<uint8_t> hll_buffer_all_cpus(thread_count * padded_size_bytes);
533  auto hll_result = &hll_buffer_all_cpus[0];
534 
535  std::vector<int32_t> num_keys_for_row;
536  num_keys_for_row.resize(columns_per_device.front().join_columns[0].num_elems);
537 
539  num_keys_for_row,
540  count_distinct_desc.bitmap_sz_bits,
541  padded_size_bytes,
542  columns_per_device.front().join_columns,
543  columns_per_device.front().join_column_types,
544  columns_per_device.front().join_buckets,
546  thread_count);
547 
548  for (int i = 1; i < thread_count; ++i) {
549  hll_unify(hll_result,
550  hll_result + i * padded_size_bytes,
551  1 << count_distinct_desc.bitmap_sz_bits);
552  }
553  return std::make_pair(hll_size(hll_result, count_distinct_desc.bitmap_sz_bits),
554  num_keys_for_row.size() > 0 ? num_keys_for_row.back() : 0);
555  }
556 #ifdef HAVE_CUDA
557  auto& data_mgr = executor_->getCatalog()->getDataMgr();
558  std::vector<std::vector<uint8_t>> host_hll_buffers(device_count_);
559  for (auto& host_hll_buffer : host_hll_buffers) {
560  host_hll_buffer.resize(count_distinct_desc.bitmapPaddedSizeBytes());
561  }
562  std::vector<size_t> emitted_keys_count_device_threads(device_count_, 0);
563  std::vector<std::future<void>> approximate_distinct_device_threads;
564  for (int device_id = 0; device_id < device_count_; ++device_id) {
565  approximate_distinct_device_threads.emplace_back(std::async(
567  [device_id,
568  &columns_per_device,
569  &count_distinct_desc,
570  &data_mgr,
571  &host_hll_buffers,
572  &emitted_keys_count_device_threads,
573  this] {
574  auto allocator = data_mgr.createGpuAllocator(device_id);
575  auto device_hll_buffer =
576  allocator->alloc(count_distinct_desc.bitmapPaddedSizeBytes());
577  data_mgr.getCudaMgr()->zeroDeviceMem(
578  device_hll_buffer, count_distinct_desc.bitmapPaddedSizeBytes(), device_id);
579  const auto& columns_for_device = columns_per_device[device_id];
580  auto join_columns_gpu = transfer_vector_of_flat_objects_to_gpu(
581  columns_for_device.join_columns, *allocator);
582 
583  CHECK_GT(columns_for_device.join_buckets.size(), 0u);
584  const auto& bucket_sizes_for_dimension =
585  columns_for_device.join_buckets[0].inverse_bucket_sizes_for_dimension;
586  auto bucket_sizes_gpu =
587  allocator->alloc(bucket_sizes_for_dimension.size() * sizeof(double));
588  allocator->copyToDevice(bucket_sizes_gpu,
589  bucket_sizes_for_dimension.data(),
590  bucket_sizes_for_dimension.size() * sizeof(double));
591  const size_t row_counts_buffer_sz =
592  columns_per_device.front().join_columns[0].num_elems * sizeof(int32_t);
593  auto row_counts_buffer = allocator->alloc(row_counts_buffer_sz);
594  data_mgr.getCudaMgr()->zeroDeviceMem(
595  row_counts_buffer, row_counts_buffer_sz, device_id);
596  const auto key_handler =
598  bucket_sizes_for_dimension.size(),
599  join_columns_gpu,
600  reinterpret_cast<double*>(bucket_sizes_gpu));
601  const auto key_handler_gpu =
602  transfer_flat_object_to_gpu(key_handler, *allocator);
604  reinterpret_cast<uint8_t*>(device_hll_buffer),
605  count_distinct_desc.bitmap_sz_bits,
606  reinterpret_cast<int32_t*>(row_counts_buffer),
607  key_handler_gpu,
608  columns_for_device.join_columns[0].num_elems,
609  executor_->blockSize(),
610  executor_->gridSize());
611 
612  auto& host_emitted_keys_count = emitted_keys_count_device_threads[device_id];
613  allocator->copyFromDevice(
614  &host_emitted_keys_count,
615  row_counts_buffer +
616  (columns_per_device.front().join_columns[0].num_elems - 1) *
617  sizeof(int32_t),
618  sizeof(int32_t));
619 
620  auto& host_hll_buffer = host_hll_buffers[device_id];
621  allocator->copyFromDevice(&host_hll_buffer[0],
622  device_hll_buffer,
623  count_distinct_desc.bitmapPaddedSizeBytes());
624  }));
625  }
626  for (auto& child : approximate_distinct_device_threads) {
627  child.get();
628  }
629  CHECK_EQ(Data_Namespace::MemoryLevel::GPU_LEVEL, effective_memory_level);
630  auto& result_hll_buffer = host_hll_buffers.front();
631  auto hll_result = reinterpret_cast<int32_t*>(&result_hll_buffer[0]);
632  for (int device_id = 1; device_id < device_count_; ++device_id) {
633  auto& host_hll_buffer = host_hll_buffers[device_id];
634  hll_unify(hll_result,
635  reinterpret_cast<int32_t*>(&host_hll_buffer[0]),
636  1 << count_distinct_desc.bitmap_sz_bits);
637  }
638  size_t emitted_keys_count = 0;
639  for (auto& emitted_keys_count_device : emitted_keys_count_device_threads) {
640  emitted_keys_count += emitted_keys_count_device;
641  }
642  return std::make_pair(hll_size(hll_result, count_distinct_desc.bitmap_sz_bits),
643  emitted_keys_count);
644 #else
645  UNREACHABLE();
646  return {0, 0};
647 #endif // HAVE_CUDA
648 }
#define CHECK_EQ(x, y)
Definition: Logger.h:219
T * transfer_flat_object_to_gpu(const T &object, DeviceAllocator &allocator)
void hll_unify(T1 *lhs, T2 *rhs, const size_t m)
Definition: HyperLogLog.h:109
#define UNREACHABLE()
Definition: Logger.h:255
std::optional< std::pair< size_t, size_t > > getApproximateTupleCountFromCache(QueryPlanHash key, CacheItemType item_type, DeviceIdentifier device_identifier)
CountDistinctImplType impl_type_
size_t hll_size(const T *M, const size_t bitmap_sz_bits)
Definition: HyperLogLog.h:90
#define CHECK_GT(x, y)
Definition: Logger.h:223
void approximate_distinct_tuples_on_device_range(uint8_t *hll_buffer, const uint32_t b, int32_t *row_counts_buffer, const RangeKeyHandler *key_handler, const size_t num_elems, const size_t block_size_x, const size_t grid_size_x)
future< Result > async(Fn &&fn, Args &&...args)
static CompositeKeyInfo getCompositeKeyInfo(const std::vector< InnerOuter > &inner_outer_pairs, const Executor *executor)
Definition: HashJoin.cpp:334
bool isInnerColCompressed() const
void approximate_distinct_tuples_range(uint8_t *hll_buffer_all_cpus, std::vector< int32_t > &row_counts, const uint32_t b, const size_t padded_size_bytes, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_buckets_per_key, const bool is_compressed, const int thread_count)
#define CHECK(condition)
Definition: Logger.h:211
std::vector< InnerOuter > inner_outer_pairs_
T * transfer_vector_of_flat_objects_to_gpu(const std::vector< T > &vec, DeviceAllocator &allocator)
Data_Namespace::MemoryLevel getEffectiveMemoryLevel(const std::vector< InnerOuter > &inner_outer_pairs) const
static constexpr DeviceIdentifier CPU_DEVICE_IDENTIFIER
Definition: DataRecycler.h:132
int cpu_threads()
Definition: thread_count.h:24
#define VLOG(n)
Definition: Logger.h:305

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * RangeJoinHashTable::codegenKey ( const CompilationOptions co,
llvm::Value *  offset 
)

Definition at line 656 of file RangeJoinHashTable.cpp.

References CHECK, CHECK_EQ, CodeGenerator::codegen(), OverlapsJoinHashTable::executor_, get_int_type(), OverlapsJoinHashTable::getKeyComponentCount(), OverlapsJoinHashTable::getKeyComponentWidth(), i, OverlapsJoinHashTable::inner_outer_pairs_, OverlapsJoinHashTable::inverse_bucket_sizes_for_dimension_, isProbeCompressed(), kPOINT, kTINYINT, LL_BUILDER, LL_CONTEXT, LL_FP, LL_INT, and CodeGenerator::posArg().

Referenced by codegenMatchingSetWithOffset().

657  {
658  const auto key_component_width = getKeyComponentWidth();
659  CHECK(key_component_width == 4 || key_component_width == 8);
660  const auto key_size_lv = LL_INT(getKeyComponentCount() * key_component_width);
661  llvm::Value* key_buff_lv{nullptr};
662  switch (key_component_width) {
663  case 4:
664  key_buff_lv =
665  LL_BUILDER.CreateAlloca(llvm::Type::getInt32Ty(LL_CONTEXT), key_size_lv);
666  break;
667  case 8:
668  key_buff_lv =
669  LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT), key_size_lv);
670  break;
671  default:
672  CHECK(false);
673  }
674 
675  const auto& inner_outer_pair = inner_outer_pairs_[0];
676  const auto outer_col = inner_outer_pair.second;
677  const auto outer_col_ti = outer_col->get_type_info();
678 
679  if (outer_col_ti.is_geometry()) {
680  CodeGenerator code_generator(executor_);
681  // TODO(adb): for points we will use the coords array, but for other
682  // geometries we will need to use the bounding box. For now only support
683  // points.
684  CHECK_EQ(outer_col_ti.get_type(), kPOINT);
685  CHECK_EQ(inverse_bucket_sizes_for_dimension_.size(), static_cast<size_t>(2));
686 
687  const auto col_lvs = code_generator.codegen(outer_col, true, co);
688  CHECK_EQ(col_lvs.size(), size_t(1));
689 
690  const auto outer_col_var = dynamic_cast<const Analyzer::ColumnVar*>(outer_col);
691  CHECK(outer_col_var);
692 
693  const auto coords_cd = executor_->getCatalog()->getMetadataForColumn(
694  outer_col_var->get_table_id(), outer_col_var->get_column_id() + 1);
695  CHECK(coords_cd);
696 
697  const auto array_ptr = executor_->cgen_state_->emitExternalCall(
698  "array_buff",
699  llvm::Type::getInt8PtrTy(executor_->cgen_state_->context_),
700  {col_lvs.front(), code_generator.posArg(outer_col)});
701  CHECK(coords_cd->columnType.get_elem_type().get_type() == kTINYINT)
702  << "Only TINYINT coordinates columns are supported in geo overlaps "
703  "hash join.";
704 
705  const auto arr_ptr =
706  code_generator.castArrayPointer(array_ptr, coords_cd->columnType.get_elem_type());
707 
708  // load and unpack offsets
709  const auto offset = LL_BUILDER.CreateLoad(offset_ptr, "packed_bucket_offset");
710  const auto x_offset =
711  LL_BUILDER.CreateTrunc(offset, llvm::Type::getInt32Ty(LL_CONTEXT));
712 
713  const auto y_offset_shifted =
714  LL_BUILDER.CreateLShr(offset, LL_INT(static_cast<int64_t>(32)));
715  const auto y_offset =
716  LL_BUILDER.CreateTrunc(y_offset_shifted, llvm::Type::getInt32Ty(LL_CONTEXT));
717 
718  const auto x_bucket_offset =
719  LL_BUILDER.CreateSExt(x_offset, llvm::Type::getInt64Ty(LL_CONTEXT));
720  const auto y_bucket_offset =
721  LL_BUILDER.CreateSExt(y_offset, llvm::Type::getInt64Ty(LL_CONTEXT));
722 
723  for (size_t i = 0; i < 2; i++) {
724  const auto key_comp_dest_lv = LL_BUILDER.CreateGEP(key_buff_lv, LL_INT(i));
725 
726  const auto funcName = isProbeCompressed() ? "get_bucket_key_for_range_compressed"
727  : "get_bucket_key_for_range_double";
728 
729  // Note that get_bucket_key_for_range_compressed will need to be
730  // specialized for future compression schemes
731  auto bucket_key = executor_->cgen_state_->emitExternalCall(
732  funcName,
735 
736  auto bucket_key_shifted = i == 0
737  ? LL_BUILDER.CreateAdd(x_bucket_offset, bucket_key)
738  : LL_BUILDER.CreateAdd(y_bucket_offset, bucket_key);
739 
740  const auto col_lv = LL_BUILDER.CreateSExt(
741  bucket_key_shifted, get_int_type(key_component_width * 8, LL_CONTEXT));
742  LL_BUILDER.CreateStore(col_lv, key_comp_dest_lv);
743  }
744  } else {
745  LOG(FATAL) << "Range join key currently only supported for geospatial types.";
746  }
747  return key_buff_lv;
748 }
#define LL_INT(v)
#define CHECK_EQ(x, y)
Definition: Logger.h:219
#define LOG(tag)
Definition: Logger.h:205
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define LL_CONTEXT
#define LL_FP(v)
#define LL_BUILDER
std::vector< double > inverse_bucket_sizes_for_dimension_
bool isProbeCompressed() const
#define CHECK(condition)
Definition: Logger.h:211
std::vector< InnerOuter > inner_outer_pairs_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

HashJoinMatchingSet RangeJoinHashTable::codegenMatchingSetWithOffset ( const CompilationOptions co,
const size_t  index,
llvm::Value *  range_offset 
)

Definition at line 750 of file RangeJoinHashTable.cpp.

References CHECK, HashJoin::codegenHashTableLoad(), codegenKey(), HashJoin::codegenMatchingSet(), OverlapsJoinHashTable::executor_, get_int_type(), OverlapsJoinHashTable::getComponentBufferSize(), OverlapsJoinHashTable::getEntryCount(), getHashType(), OverlapsJoinHashTable::getKeyComponentCount(), OverlapsJoinHashTable::getKeyComponentWidth(), LL_BUILDER, LL_CONTEXT, LL_INT, OverlapsJoinHashTable::offsetBufferOff(), OneToMany, and to_string().

753  {
754  const auto key_component_width = getKeyComponentWidth();
755  CHECK(key_component_width == 4 || key_component_width == 8);
756 
757  auto key_buff_lv = codegenKey(co, range_offset);
759 
760  auto hash_ptr = codegenHashTableLoad(index, executor_);
761  const auto composite_dict_ptr_type =
762  llvm::Type::getIntNPtrTy(LL_CONTEXT, key_component_width * 8);
763 
764  const auto composite_key_dict =
765  hash_ptr->getType()->isPointerTy()
766  ? LL_BUILDER.CreatePointerCast(hash_ptr, composite_dict_ptr_type)
767  : LL_BUILDER.CreateIntToPtr(hash_ptr, composite_dict_ptr_type);
768 
769  const auto key_component_count = getKeyComponentCount();
770 
771  const auto funcName =
772  "get_composite_key_index_" + std::to_string(key_component_width * 8);
773 
774  const auto key = executor_->cgen_state_->emitExternalCall(funcName,
776  {key_buff_lv,
777  LL_INT(key_component_count),
778  composite_key_dict,
779  LL_INT(getEntryCount())});
780 
781  auto one_to_many_ptr = hash_ptr;
782  if (one_to_many_ptr->getType()->isPointerTy()) {
783  one_to_many_ptr =
784  LL_BUILDER.CreatePtrToInt(hash_ptr, llvm::Type::getInt64Ty(LL_CONTEXT));
785  } else {
786  CHECK(one_to_many_ptr->getType()->isIntegerTy(64));
787  }
788  const auto composite_key_dict_size = offsetBufferOff();
789  one_to_many_ptr =
790  LL_BUILDER.CreateAdd(one_to_many_ptr, LL_INT(composite_key_dict_size));
791 
793  /* hash_join_idx_args_in */ {one_to_many_ptr,
794  key,
795  LL_INT(int64_t(0)),
796  LL_INT(getEntryCount() - 1)},
797  /* is_sharded */ false,
798  /* is_nullable */ false,
799  /* is_bw_eq */ false,
800  /* sub_buff_size */ getComponentBufferSize(),
801  /* executor */ executor_);
802 }
#define LL_INT(v)
virtual HashJoinMatchingSet codegenMatchingSet(const CompilationOptions &, const size_t)=0
llvm::Value * codegenKey(const CompilationOptions &co, llvm::Value *offset)
static llvm::Value * codegenHashTableLoad(const size_t table_idx, Executor *executor)
Definition: HashJoin.cpp:215
HashType getHashType() const noexceptoverride
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define LL_CONTEXT
std::string to_string(char const *&&v)
#define LL_BUILDER
size_t offsetBufferOff() const noexceptoverride
#define CHECK(condition)
Definition: Logger.h:211
size_t getComponentBufferSize() const noexceptoverride

+ Here is the call graph for this function:

std::pair< size_t, size_t > RangeJoinHashTable::computeRangeHashTableCounts ( const size_t  shard_count,
std::vector< ColumnsForDevice > &  columns_per_device 
)
protected

Definition at line 456 of file RangeJoinHashTable.cpp.

References approximateTupleCount(), bucket_threshold_, CHECK, OverlapsJoinHashTable::device_count_, get_entries_per_device(), OverlapsJoinHashTable::inverse_bucket_sizes_for_dimension_, max_hashtable_size_, and OverlapsJoinHashTable::memory_level_.

Referenced by reifyWithLayout().

458  {
460  const auto [tuple_count, emitted_keys_count] =
462  columns_per_device,
465  const auto entry_count = 2 * std::max(tuple_count, size_t(1));
466 
467  return std::make_pair(
468  get_entries_per_device(entry_count, shard_count, device_count_, memory_level_),
469  emitted_keys_count);
470 }
std::pair< size_t, size_t > approximateTupleCount(const std::vector< double > &inverse_bucket_sizes_for_dimension, std::vector< ColumnsForDevice > &columns_per_device, const size_t chosen_max_hashtable_size, const double chosen_bucket_threshold) override
const double bucket_threshold_
std::vector< double > inverse_bucket_sizes_for_dimension_
size_t get_entries_per_device(const size_t total_entries, const size_t shard_count, const size_t device_count, const Data_Namespace::MemoryLevel memory_level)
#define CHECK(condition)
Definition: Logger.h:211
const Data_Namespace::MemoryLevel memory_level_
const size_t max_hashtable_size_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

HashType RangeJoinHashTable::getHashType ( ) const
inlineoverrideprotectedvirtualnoexcept

Reimplemented from OverlapsJoinHashTable.

Definition at line 95 of file RangeJoinHashTable.h.

References OneToMany.

Referenced by codegenMatchingSetWithOffset().

+ Here is the caller graph for this function:

std::shared_ptr< RangeJoinHashTable > RangeJoinHashTable::getInstance ( const std::shared_ptr< Analyzer::BinOper condition,
const Analyzer::RangeOper range_expr,
const std::vector< InputTableInfo > &  query_infos,
const Data_Namespace::MemoryLevel  memory_level,
const JoinType  join_type,
const int  device_count,
ColumnCacheMap column_cache,
Executor executor,
const HashTableBuildDagMap hashtable_build_dag_map,
const RegisteredQueryHint query_hint,
const TableIdToNodeMap table_id_to_node_map 
)
static

NOTE(jclay): Handling Range Joins With Mixed Compression:

First, let's take a concrete example of a query that is rewritten as a range join. Notice in the first code block, that the condition operator is an Overlaps operator. The LHS is a column, and the RHS is the range operator. In order to have the hash table build and probe work properly, we need to ensure that the approriate runtime functions are selected. The following breakdown is provided to help document how the appropriate runtime funditon is selected.

  • The LHS of the RangeOper is used to build the hash table
  • The LHS of the OverlapsOper + the RHS of the RangeOper is used as probe

SELECT count(*) FROM t1, t2 where ST_Distance(t1.p1_comp32, t2.p1) <= 6.3;

BinOper condition

((OVERLAPS) (ColumnVar table: (t1) column: (p1_comp32) GEOMETRY(POINT, 4326) ENCODING COMPRESSED(32)) (RangeOper) (ColumnVar table: (t2) column: (p1) GEOMETRY(POINT, 4326) ENCODING NONE), (Const 6.330000))

RangeOper condition

[(ColumnVar table: 5 (t2) column: 1 rte: 1 GEOMETRY(POINT, 4326) ENCODING NONE), (Const 6.330000)]

Same example as above, annotated:

SELECT count(*) FROM t1, t2 where ST_Distance( t1.p1_comp32, << Overlaps Condition LHS t2.p1 << RangeOper LHS ) <= 6.3; << RangeOper RHS

In this case, we select the uncompressed runtime functions when building the hash table over t2.p1. When performing the probe, we must select the compressed runtime functions.

Definition at line 72 of file RangeJoinHashTable.cpp.

References cat(), CHECK, HashJoin::checkHashJoinReplicationConstraint(), logger::FATAL, get_inner_query_info(), Analyzer::RangeOper::get_left_operand(), HashtableRecycler::getHashtableKeyString(), HashJoin::getInnerTableId(), Fragmenter_Namespace::TableInfo::getNumTuplesUpperBound(), BaselineJoinHashTable::getShardCountForCondition(), Data_Namespace::GPU_LEVEL, InputTableInfo::info, LOG, OneToMany, and Analyzer::RangeOper::toString().

Referenced by OverlapsJoinHashTable::getInstance().

83  {
84  // the hash table is built over the LHS of the range oper. we then use the lhs
85  // of the bin oper + the rhs of the range oper for the probe
86  auto range_expr_col_var =
87  dynamic_cast<const Analyzer::ColumnVar*>(range_expr->get_left_operand());
88  if (!range_expr_col_var || !range_expr_col_var->get_type_info().is_geometry()) {
89  throw HashJoinFail("Could not build hash tables for range join | " +
90  range_expr->toString());
91  }
92  auto cat = executor->getCatalog();
93  CHECK(cat);
94  CHECK(range_expr_col_var->get_type_info().is_geometry());
95 
96  auto coords_cd = cat->getMetadataForColumn(range_expr_col_var->get_table_id(),
97  range_expr_col_var->get_column_id() + 1);
98  CHECK(coords_cd);
99 
100  auto range_join_inner_col_expr =
101  makeExpr<Analyzer::ColumnVar>(coords_cd->columnType,
102  coords_cd->tableId,
103  coords_cd->columnId,
104  range_expr_col_var->get_rte_idx());
105 
106  std::vector<InnerOuter> inner_outer_pairs;
107  inner_outer_pairs.emplace_back(
108  InnerOuter{dynamic_cast<Analyzer::ColumnVar*>(range_join_inner_col_expr.get()),
109  condition->get_left_operand()});
110 
111  const auto& query_info =
112  get_inner_query_info(HashJoin::getInnerTableId(inner_outer_pairs), query_infos)
113  .info;
114 
115  const auto total_entries = 2 * query_info.getNumTuplesUpperBound();
116  if (total_entries > static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
117  throw TooManyHashEntries();
118  }
119 
120  const auto shard_count = memory_level == Data_Namespace::GPU_LEVEL
122  condition.get(), executor, inner_outer_pairs)
123  : 0;
124 
125  auto hashtable_cache_key_string =
127  condition->get_optype(),
128  join_type,
129  hashtable_build_dag_map,
130  executor);
131 
132  auto join_hash_table =
133  std::make_shared<RangeJoinHashTable>(condition,
134  join_type,
135  range_expr,
136  range_join_inner_col_expr,
137  query_infos,
138  memory_level,
139  column_cache,
140  executor,
141  inner_outer_pairs,
142  device_count,
143  hashtable_cache_key_string.first,
144  hashtable_cache_key_string.second,
145  hashtable_build_dag_map,
146  table_id_to_node_map);
148  HashJoin::getInnerTableId(inner_outer_pairs), shard_count, executor);
149  try {
150  join_hash_table->reifyWithLayout(HashType::OneToMany);
151  } catch (const HashJoinFail& e) {
152  throw HashJoinFail(std::string("Could not build a 1-to-1 correspondence for columns "
153  "involved in equijoin | ") +
154  e.what());
155  } catch (const ColumnarConversionNotSupported& e) {
156  throw HashJoinFail(std::string("Could not build hash tables for equijoin | ") +
157  e.what());
158  } catch (const std::exception& e) {
159  LOG(FATAL) << "Fatal error while attempting to build hash tables for join: "
160  << e.what();
161  }
162 
163  return join_hash_table;
164 }
Fragmenter_Namespace::TableInfo info
Definition: InputMetadata.h:35
std::string cat(Ts &&...args)
std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > InnerOuter
Definition: HashJoin.h:77
#define LOG(tag)
Definition: Logger.h:205
const Expr * get_left_operand() const
Definition: Analyzer.h:538
static std::pair< QueryPlan, HashtableCacheMetaInfo > getHashtableKeyString(const std::vector< InnerOuter > &inner_outer_pairs, const SQLOps op_type, const JoinType join_type, const HashTableBuildDagMap &hashtable_build_dag_map, Executor *executor)
std::string toString() const override
Definition: Analyzer.cpp:2642
virtual int getInnerTableId() const noexcept=0
static void checkHashJoinReplicationConstraint(const int table_id, const size_t shard_count, const Executor *executor)
Definition: HashJoin.cpp:587
const InputTableInfo & get_inner_query_info(const int inner_table_id, const std::vector< InputTableInfo > &query_infos)
#define CHECK(condition)
Definition: Logger.h:211
static size_t getShardCountForCondition(const Analyzer::BinOper *condition, const Executor *executor, const std::vector< InnerOuter > &inner_outer_pairs)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::shared_ptr< BaselineHashTable > RangeJoinHashTable::initHashTableOnCpu ( const std::vector< JoinColumn > &  join_columns,
const std::vector< JoinColumnTypeInfo > &  join_column_types,
const std::vector< JoinBucketInfo > &  join_bucket_info,
const HashType  layout,
const size_t  entry_count,
const size_t  emitted_keys_count 
)
protected

Definition at line 362 of file RangeJoinHashTable.cpp.

References bucket_threshold_, CompositeKeyInfo::cache_key_chunks, CHECK, OverlapsJoinHashTable::composite_key_info_, OverlapsJoinHashTable::condition_, count, DataRecyclerUtil::CPU_DEVICE_IDENTIFIER, OverlapsJoinHashTable::cpu_hash_table_buff_mutex_, DEBUG_TIMER, EMPTY_HASHED_PLAN_DAG_KEY, EMPTY_QUERY_PLAN, OverlapsJoinHashTable::executor_, OverlapsJoinHashTable::generateCacheKey(), OverlapsJoinHashTable::getAlternativeCacheKey(), HashJoin::getCompositeKeyInfo(), OverlapsJoinHashTable::getKeyComponentCount(), OverlapsJoinHashTable::getKeyComponentWidth(), OverlapsJoinHashTable::hashtable_cache_key_, OverlapsJoinHashTable::initHashTableOnCpuFromCache(), OverlapsJoinHashTable::inner_outer_pairs_, OverlapsJoinHashTable::inverse_bucket_sizes_for_dimension_, isInnerColCompressed(), OverlapsJoinHashTable::join_type_, OverlapsJoinHashTable::layout_override_, HashJoin::layoutRequiresAdditionalBuffers(), ManyToMany, max_hashtable_size_, OneToMany, OVERLAPS_HT, OverlapsJoinHashTable::putHashTableOnCpuToCache(), OverlapsJoinHashTable::query_plan_dag_, OverlapsJoinHashTable::setOverlapsHashtableMetaInfo(), to_string(), and VLOG.

Referenced by reifyForDevice().

368  {
369  auto timer = DEBUG_TIMER(__func__);
370  decltype(std::chrono::steady_clock::now()) ts1, ts2;
371  ts1 = std::chrono::steady_clock::now();
372  const auto composite_key_info =
374  CHECK(!join_columns.empty());
375  CHECK(!join_bucket_info.empty());
376 
379  generateCacheKey(max_hashtable_size_, max_hashtable_size_);
380 
381  if ((query_plan_dag_.compare(EMPTY_QUERY_PLAN) == 0 ||
383  inner_outer_pairs_.front().first->get_table_id() > 0) {
384  // sometimes we cannot retrieve query plan dag, so try to recycler cache
385  // with the old-passioned cache key if we deal with hashtable of non-temporary table
386  AlternativeCacheKeyForOverlapsHashJoin cache_key{inner_outer_pairs_,
387  join_columns.front().num_elems,
389  condition_->get_optype(),
392  inverse_bucket_sizes_for_dimension_};
394  VLOG(2) << "Use alternative hashtable cache key due to unavailable query plan dag "
395  "extraction (hashtable_cache_key: "
396  << hashtable_cache_key_ << ")";
397  }
398 
399  std::lock_guard<std::mutex> cpu_hash_table_buff_lock(cpu_hash_table_buff_mutex_);
400  if (auto generic_hash_table =
404  if (auto hash_table =
405  std::dynamic_pointer_cast<BaselineHashTable>(generic_hash_table)) {
406  // See if a hash table of a different layout was returned.
407  // If it was OneToMany, we can reuse it on ManyToMany.
408  if (layout == HashType::ManyToMany &&
409  hash_table->getLayout() == HashType::OneToMany) {
410  // use the cached hash table
412  return hash_table;
413  }
414  }
415  }
416 
418  const auto key_component_count =
419  join_bucket_info[0].inverse_bucket_sizes_for_dimension.size();
420 
421  auto key_handler =
423  key_component_count,
424  &join_columns[0],
425  join_bucket_info[0].inverse_bucket_sizes_for_dimension.data());
426 
428  const auto err = builder.initHashTableOnCpu(&key_handler,
429  composite_key_info,
430  join_columns,
431  join_column_types,
432  join_bucket_info,
433  entry_count,
434  emitted_keys_count,
435  layout,
436  join_type_,
439  ts2 = std::chrono::steady_clock::now();
440  if (err) {
441  throw HashJoinFail(std::string("Unrecognized error when initializing CPU "
442  "range join hash table (") +
443  std::to_string(err) + std::string(")"));
444  }
445  std::shared_ptr<BaselineHashTable> hash_table = builder.getHashTable();
446  auto hashtable_build_time =
447  std::chrono::duration_cast<std::chrono::milliseconds>(ts2 - ts1).count();
450  hash_table,
452  hashtable_build_time);
453  return hash_table;
454 }
std::shared_ptr< HashTable > initHashTableOnCpuFromCache(QueryPlanHash key, CacheItemType item_type, DeviceIdentifier device_identifier)
std::vector< ChunkKey > cache_key_chunks
Definition: HashJoin.h:99
void generateCacheKey(const size_t max_hashtable_size, const double bucket_threshold)
#define const
constexpr QueryPlanHash EMPTY_HASHED_PLAN_DAG_KEY
void putHashTableOnCpuToCache(QueryPlanHash key, CacheItemType item_type, std::shared_ptr< HashTable > hashtable_ptr, DeviceIdentifier device_identifier, size_t hashtable_building_time)
std::string to_string(char const *&&v)
const std::shared_ptr< Analyzer::BinOper > condition_
int count
QueryPlanHash getAlternativeCacheKey(AlternativeCacheKeyForOverlapsHashJoin &info)
const double bucket_threshold_
void setOverlapsHashtableMetaInfo(size_t max_table_size_bytes, double bucket_threshold, std::vector< double > &bucket_sizes)
static CompositeKeyInfo getCompositeKeyInfo(const std::vector< InnerOuter > &inner_outer_pairs, const Executor *executor)
Definition: HashJoin.cpp:334
std::vector< double > inverse_bucket_sizes_for_dimension_
std::optional< HashType > layout_override_
bool isInnerColCompressed() const
constexpr char const * EMPTY_QUERY_PLAN
#define CHECK(condition)
Definition: Logger.h:211
#define DEBUG_TIMER(name)
Definition: Logger.h:358
std::vector< InnerOuter > inner_outer_pairs_
static constexpr DeviceIdentifier CPU_DEVICE_IDENTIFIER
Definition: DataRecycler.h:132
CompositeKeyInfo composite_key_info_
if(yyssp >=yyss+yystacksize-1)
#define VLOG(n)
Definition: Logger.h:305
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
Definition: HashJoin.h:130
int initHashTableOnCpu(KEY_HANDLER *key_handler, const CompositeKeyInfo &composite_key_info, const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_bucket_info, const size_t keyspace_entry_count, const size_t keys_for_all_rows, const HashType layout, const JoinType join_type, const size_t key_component_width, const size_t key_component_count)
const size_t max_hashtable_size_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool RangeJoinHashTable::isInnerColCompressed ( ) const
inlineprivate

Definition at line 115 of file RangeJoinHashTable.h.

References SQLTypeInfo::get_compression(), Analyzer::RangeOper::get_left_operand(), Analyzer::Expr::get_type_info(), kENCODING_GEOINT, and range_expr_.

Referenced by approximateTupleCount(), and initHashTableOnCpu().

115  {
118  }
const Expr * get_left_operand() const
Definition: Analyzer.h:538
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:77
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:337
const Analyzer::RangeOper * range_expr_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool RangeJoinHashTable::isProbeCompressed ( ) const
inlineprivate

Definition at line 120 of file RangeJoinHashTable.h.

References OverlapsJoinHashTable::getInnerOuterPairs(), and kENCODING_GEOINT.

Referenced by codegenKey().

120  {
121  const auto& inner_outer_pair = getInnerOuterPairs()[0];
122  const auto outer_col = inner_outer_pair.second;
123  const auto outer_col_ti = outer_col->get_type_info();
124 
125  return outer_col_ti.get_compression() == kENCODING_GEOINT;
126  }
const std::vector< InnerOuter > & getInnerOuterPairs() const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void RangeJoinHashTable::reifyForDevice ( const ColumnsForDevice columns_for_device,
const HashType  layout,
const size_t  entry_count,
const size_t  emitted_keys_count,
const int  device_id,
const logger::ThreadId  parent_thread_id 
)
protected

Definition at line 258 of file RangeJoinHashTable.cpp.

References CHECK, CHECK_EQ, CHECK_LT, Data_Namespace::CPU_LEVEL, DEBUG_TIMER_NEW_THREAD, OverlapsJoinHashTable::getEffectiveMemoryLevel(), OverlapsJoinHashTable::getKeyComponentWidth(), Data_Namespace::GPU_LEVEL, HashJoin::hash_tables_for_device_, initHashTableOnCpu(), OverlapsJoinHashTable::inner_outer_pairs_, ColumnsForDevice::join_buckets, ColumnsForDevice::join_column_types, ColumnsForDevice::join_columns, HashJoin::layoutRequiresAdditionalBuffers(), OverlapsJoinHashTable::memory_level_, UNREACHABLE, and VLOG.

Referenced by reifyWithLayout().

263  {
264  DEBUG_TIMER_NEW_THREAD(parent_thread_id);
265  CHECK_EQ(getKeyComponentWidth(), size_t(8));
267  const auto effective_memory_level = getEffectiveMemoryLevel(inner_outer_pairs_);
268 
269  if (effective_memory_level == Data_Namespace::MemoryLevel::CPU_LEVEL) {
270  VLOG(1) << "Building range join hash table on CPU.";
271  auto hash_table = initHashTableOnCpu(columns_for_device.join_columns,
272  columns_for_device.join_column_types,
273  columns_for_device.join_buckets,
274  layout,
275  entry_count,
276  emitted_keys_count);
277  CHECK(hash_table);
278 
279 #ifdef HAVE_CUDA
281  auto gpu_hash_table = copyCpuHashTableToGpu(
282  std::move(hash_table), layout, entry_count, emitted_keys_count, device_id);
283  CHECK_LT(size_t(device_id), hash_tables_for_device_.size());
284  hash_tables_for_device_[device_id] = std::move(gpu_hash_table);
285  } else {
286 #else
287  CHECK_EQ(Data_Namespace::CPU_LEVEL, effective_memory_level);
288 #endif
289  CHECK_EQ(hash_tables_for_device_.size(), size_t(1));
290  hash_tables_for_device_[0] = std::move(hash_table);
291 #ifdef HAVE_CUDA
292  }
293 #endif
294  } else {
295 #ifdef HAVE_CUDA
296  auto hash_table = initHashTableOnGpu(columns_for_device.join_columns,
297  columns_for_device.join_column_types,
298  columns_for_device.join_buckets,
299  layout,
300  entry_count,
301  emitted_keys_count,
302  device_id);
303  CHECK_LT(size_t(device_id), hash_tables_for_device_.size());
304  hash_tables_for_device_[device_id] = std::move(hash_table);
305 #else
306  UNREACHABLE();
307 #endif
308  }
309 }
#define CHECK_EQ(x, y)
Definition: Logger.h:219
std::vector< std::shared_ptr< HashTable > > hash_tables_for_device_
Definition: HashJoin.h:296
#define UNREACHABLE()
Definition: Logger.h:255
#define DEBUG_TIMER_NEW_THREAD(parent_thread_id)
Definition: Logger.h:363
const std::vector< JoinColumnTypeInfo > join_column_types
Definition: HashJoin.h:81
std::shared_ptr< BaselineHashTable > initHashTableOnCpu(const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_bucket_info, const HashType layout, const size_t entry_count, const size_t emitted_keys_count)
#define CHECK_LT(x, y)
Definition: Logger.h:221
#define CHECK(condition)
Definition: Logger.h:211
const Data_Namespace::MemoryLevel memory_level_
std::vector< InnerOuter > inner_outer_pairs_
Data_Namespace::MemoryLevel getEffectiveMemoryLevel(const std::vector< InnerOuter > &inner_outer_pairs) const
std::vector< JoinBucketInfo > join_buckets
Definition: HashJoin.h:83
const std::vector< JoinColumn > join_columns
Definition: HashJoin.h:80
#define VLOG(n)
Definition: Logger.h:305
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
Definition: HashJoin.h:130

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void RangeJoinHashTable::reifyWithLayout ( const HashType  layout)
overrideprotectedvirtual

Reimplemented from OverlapsJoinHashTable.

Definition at line 166 of file RangeJoinHashTable.cpp.

References threading_serial::async(), OverlapsJoinHashTable::calculateHashTableSize(), CHECK, computeRangeHashTableCounts(), DEBUG_TIMER, OverlapsJoinHashTable::device_count_, OverlapsJoinHashTable::executor_, OverlapsJoinHashTable::fetchColumnsForDevice(), get_inner_query_info(), Analyzer::RangeOper::get_right_operand(), HashJoin::getHashTypeString(), HashJoin::getInnerTableId(), OverlapsJoinHashTable::getInnerTableId(), Data_Namespace::GPU_LEVEL, InputTableInfo::info, OverlapsJoinHashTable::inner_outer_pairs_, OverlapsJoinHashTable::inverse_bucket_sizes_for_dimension_, OverlapsJoinHashTable::memory_level_, OneToMany, only_shards_for_device(), OverlapsJoinHashTable::query_infos_, range_expr_, reifyForDevice(), OverlapsJoinHashTable::setInverseBucketSizeInfo(), OverlapsJoinHashTable::shardCount(), logger::thread_id(), and VLOG.

166  {
167  auto timer = DEBUG_TIMER(__func__);
168  CHECK(layout == HashType::OneToMany);
169 
170  const auto& query_info =
172  .info;
173 
174  if (query_info.fragments.empty()) {
175  return;
176  }
177 
178  VLOG(1) << "Reify with layout " << getHashTypeString(layout)
179  << "for table_id: " << getInnerTableId();
180 
181  std::vector<ColumnsForDevice> columns_per_device;
182  const auto catalog = executor_->getCatalog();
183  CHECK(catalog);
184 
185  auto& data_mgr = catalog->getDataMgr();
186  std::vector<std::unique_ptr<CudaAllocator>> dev_buff_owners;
188  for (int device_id = 0; device_id < device_count_; ++device_id) {
189  dev_buff_owners.emplace_back(std::make_unique<CudaAllocator>(&data_mgr, device_id));
190  }
191  }
192  const auto shard_count = shardCount();
193  for (int device_id = 0; device_id < device_count_; ++device_id) {
194  const auto fragments =
195  shard_count
196  ? only_shards_for_device(query_info.fragments, device_id, device_count_)
197  : query_info.fragments;
198  const auto columns_for_device =
199  fetchColumnsForDevice(fragments,
200  device_id,
202  ? dev_buff_owners[device_id].get()
203  : nullptr);
204  columns_per_device.push_back(columns_for_device);
205  }
206 
208 
209  const auto bucket_range =
210  dynamic_cast<const Analyzer::Constant*>(range_expr_->get_right_operand());
211 
212  CHECK(bucket_range);
213  CHECK(bucket_range->get_type_info().is_fp() &&
214  bucket_range->get_type_info().get_size() == 8); // TODO
215 
216  const auto bucket_range_datum = bucket_range->get_constval();
217 
218  inverse_bucket_sizes_for_dimension_.emplace_back(1. / bucket_range_datum.doubleval);
219  inverse_bucket_sizes_for_dimension_.emplace_back(1. / bucket_range_datum.doubleval);
220 
222  inverse_bucket_sizes_for_dimension_, columns_per_device, device_count_);
223 
224  auto [entry_count, emitted_keys_count] =
225  computeRangeHashTableCounts(shard_count, columns_per_device);
226 
227  size_t hash_table_size = OverlapsJoinHashTable::calculateHashTableSize(
228  inverse_bucket_sizes_for_dimension_.size(), emitted_keys_count, entry_count);
229 
230  VLOG(1) << "Finalized range join hash table: entry count " << entry_count
231  << " hash table size " << hash_table_size;
232 
233  std::vector<std::future<void>> init_threads;
234  for (int device_id = 0; device_id < device_count_; ++device_id) {
235  const auto fragments =
236  shard_count
237  ? only_shards_for_device(query_info.fragments, device_id, device_count_)
238  : query_info.fragments;
239  init_threads.push_back(
242  this,
243  /* columns_for_device */ columns_per_device[device_id],
244  /* layout_type */ layout,
245  /* entry_count */ entry_count,
246  /* emitted_keys_count */ emitted_keys_count,
247  /* device_id */ device_id,
248  /* parent_thread_id */ logger::thread_id()));
249  }
250  for (auto& init_thread : init_threads) {
251  init_thread.wait();
252  }
253  for (auto& init_thread : init_threads) {
254  init_thread.get();
255  }
256 }
int getInnerTableId() const noexceptoverride
Fragmenter_Namespace::TableInfo info
Definition: InputMetadata.h:35
size_t calculateHashTableSize(size_t number_of_dimensions, size_t emitted_keys_count, size_t entry_count) const
future< Result > async(Fn &&fn, Args &&...args)
const std::vector< InputTableInfo > & query_infos_
std::vector< Fragmenter_Namespace::FragmentInfo > only_shards_for_device(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, const int device_count)
std::pair< size_t, size_t > computeRangeHashTableCounts(const size_t shard_count, std::vector< ColumnsForDevice > &columns_per_device)
virtual int getInnerTableId() const noexcept=0
const Expr * get_right_operand() const
Definition: Analyzer.h:539
const InputTableInfo & get_inner_query_info(const int inner_table_id, const std::vector< InputTableInfo > &query_infos)
std::vector< double > inverse_bucket_sizes_for_dimension_
static std::string getHashTypeString(HashType ht) noexcept
Definition: HashJoin.h:134
void setInverseBucketSizeInfo(const std::vector< double > &inverse_bucket_sizes, std::vector< ColumnsForDevice > &columns_per_device, const size_t device_count)
ColumnsForDevice fetchColumnsForDevice(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, DeviceAllocator *dev_buff_owner)
ThreadId thread_id()
Definition: Logger.cpp:816
#define CHECK(condition)
Definition: Logger.h:211
#define DEBUG_TIMER(name)
Definition: Logger.h:358
void reifyForDevice(const ColumnsForDevice &columns_for_device, const HashType layout, const size_t entry_count, const size_t emitted_keys_count, const int device_id, const logger::ThreadId parent_thread_id)
const Data_Namespace::MemoryLevel memory_level_
std::vector< InnerOuter > inner_outer_pairs_
const Analyzer::RangeOper * range_expr_
#define VLOG(n)
Definition: Logger.h:305

+ Here is the call graph for this function:

Member Data Documentation

const double RangeJoinHashTable::bucket_threshold_ {std::numeric_limits<double>::max()}
private

Definition at line 130 of file RangeJoinHashTable.h.

Referenced by computeRangeHashTableCounts(), and initHashTableOnCpu().

std::shared_ptr<Analyzer::ColumnVar> RangeJoinHashTable::inner_col_expr_
private

Definition at line 129 of file RangeJoinHashTable.h.

const size_t RangeJoinHashTable::max_hashtable_size_ {std::numeric_limits<size_t>::max()}
private

Definition at line 131 of file RangeJoinHashTable.h.

Referenced by computeRangeHashTableCounts(), and initHashTableOnCpu().

HashtableCacheMetaInfo RangeJoinHashTable::overlaps_hashtable_cache_meta_info_
private

Definition at line 132 of file RangeJoinHashTable.h.

const Analyzer::RangeOper* RangeJoinHashTable::range_expr_
private

Definition at line 128 of file RangeJoinHashTable.h.

Referenced by isInnerColCompressed(), and reifyWithLayout().


The documentation for this class was generated from the following files: