OmniSciDB  ab4938a6a3
BaselineJoinHashTable Class Reference

#include <BaselineJoinHashTable.h>

+ Inheritance diagram for BaselineJoinHashTable:
+ Collaboration diagram for BaselineJoinHashTable:

Classes

struct  ColumnsForDevice
 
struct  CompositeKeyInfo
 
struct  HashTableCacheKey
 
struct  HashTableCacheValue
 

Public Member Functions

int64_t getJoinHashBuffer (const ExecutorDeviceType device_type, const int device_id) const noexcept override
 
size_t getJoinHashBufferSize (const ExecutorDeviceType device_type, const int device_id) const noexcept override
 
std::string toString (const ExecutorDeviceType device_type, const int device_id=0, bool raw=false) const override
 
std::set< DecodedJoinHashBufferEntrytoSet (const ExecutorDeviceType device_type, const int device_id) const override
 
llvm::Value * codegenSlot (const CompilationOptions &, const size_t) override
 
HashJoinMatchingSet codegenMatchingSet (const CompilationOptions &, const size_t) override
 
int getInnerTableId () const noexcept override
 
int getInnerTableRteIdx () const noexcept override
 
JoinHashTableInterface::HashType getHashType () const noexcept override
 
Data_Namespace::MemoryLevel getMemoryLevel () const noexcept override
 
int getDeviceCount () const noexcept override
 
size_t offsetBufferOff () const noexcept override
 
size_t countBufferOff () const noexcept override
 
size_t payloadBufferOff () const noexcept override
 
virtual ~BaselineJoinHashTable ()
 
- Public Member Functions inherited from JoinHashTableInterface
virtual std::string toStringFlat64 (const ExecutorDeviceType device_type, const int device_id) const
 
virtual std::string toStringFlat32 (const ExecutorDeviceType device_type, const int device_id) const
 
JoinColumn fetchJoinColumn (const Analyzer::ColumnVar *hash_col, const std::vector< Fragmenter_Namespace::FragmentInfo > &fragment_info, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks_owner, ThrustAllocator &dev_buff_owner, std::vector< std::shared_ptr< void >> &malloc_owner, Executor *executor, ColumnCacheMap *column_cache)
 

Static Public Member Functions

static std::shared_ptr< BaselineJoinHashTablegetInstance (const std::shared_ptr< Analyzer::BinOper > condition, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
 Make hash table from an in-flight SQL query's parse tree etc. More...
 
static size_t getShardCountForCondition (const Analyzer::BinOper *condition, const Executor *executor, const std::vector< InnerOuter > &inner_outer_pairs)
 
static auto yieldCacheInvalidator () -> std::function< void()>
 
static const std::shared_ptr< std::vector< int8_t > > & getCachedHashTable (size_t idx)
 
static size_t getEntryCntCachedHashTable (size_t idx)
 
static uint64_t getNumberOfCachedHashTables ()
 
- Static Public Member Functions inherited from JoinHashTableInterface
static std::string getHashTypeString (HashType ht) noexcept
 
static DecodedJoinHashBufferSet toSet (size_t key_component_count, size_t key_component_width, size_t entry_count, const int8_t *ptr1, const int8_t *ptr2, const int8_t *ptr3, const int8_t *ptr4, size_t buffer_size)
 Decode hash table into a std::set for easy inspection and validation. More...
 
static std::string toString (const std::string &type, const std::string &layout_type, size_t key_component_count, size_t key_component_width, size_t entry_count, const int8_t *ptr1, const int8_t *ptr2, const int8_t *ptr3, const int8_t *ptr4, size_t buffer_size, bool raw=false)
 Decode hash table into a human-readable string. More...
 
static std::shared_ptr< JoinHashTableInterfacegetInstance (const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
 Make hash table from an in-flight SQL query's parse tree etc. More...
 
static std::shared_ptr< JoinHashTableInterfacegetSyntheticInstance (std::string_view table1, std::string_view column1, std::string_view table2, std::string_view column2, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
 Make hash table from named tables and columns (such as for testing). More...
 
static std::shared_ptr< JoinHashTableInterfacegetSyntheticInstance (const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
 Make hash table from named tables and columns (such as for testing). More...
 

Protected Member Functions

 BaselineJoinHashTable (const std::shared_ptr< Analyzer::BinOper > condition, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const size_t entry_count, ColumnCacheMap &column_cache, Executor *executor, const std::vector< InnerOuter > &inner_outer_pairs, const int device_count)
 
virtual void reifyWithLayout (const JoinHashTableInterface::HashType layout)
 
virtual ColumnsForDevice fetchColumnsForDevice (const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, ThrustAllocator &dev_buff_owner)
 
virtual std::pair< size_t, size_t > approximateTupleCount (const std::vector< ColumnsForDevice > &) const
 
virtual size_t getKeyComponentWidth () const
 
virtual size_t getKeyComponentCount () const
 
virtual int initHashTableOnCpu (const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_bucket_info, const JoinHashTableInterface::HashType layout)
 
virtual int initHashTableOnGpu (const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_bucket_info, const JoinHashTableInterface::HashType layout, const size_t key_component_width, const size_t key_component_count, const int device_id)
 
virtual llvm::Value * codegenKey (const CompilationOptions &)
 
size_t shardCount () const
 
Data_Namespace::MemoryLevel getEffectiveMemoryLevel (const std::vector< InnerOuter > &inner_outer_pairs) const
 
CompositeKeyInfo getCompositeKeyInfo () const
 
void reify ()
 
void reifyForDevice (const ColumnsForDevice &columns_for_device, const JoinHashTableInterface::HashType layout, const int device_id, const logger::ThreadId parent_thread_id)
 
void checkHashJoinReplicationConstraint (const int table_id) const
 
int initHashTableForDevice (const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_buckets, const JoinHashTableInterface::HashType layout, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id)
 
llvm::Value * hashPtr (const size_t index)
 
void initHashTableOnCpuFromCache (const HashTableCacheKey &)
 
void putHashTableOnCpuToCache (const HashTableCacheKey &)
 
std::pair< ssize_t, size_t > getApproximateTupleCountFromCache (const HashTableCacheKey &) const
 
bool isBitwiseEq () const
 
void freeHashBufferMemory ()
 
void freeHashBufferGpuMemory ()
 
void freeHashBufferCpuMemory ()
 
bool layoutRequiresAdditionalBuffers (JoinHashTableInterface::HashType layout) const noexcept override
 
const HashTableCacheValuefindHashTableOnCpuInCache (const HashTableCacheKey &)
 

Static Protected Member Functions

static int getInnerTableId (const std::vector< InnerOuter > &inner_outer_pairs)
 

Protected Attributes

const std::shared_ptr< Analyzer::BinOpercondition_
 
const std::vector< InputTableInfo > & query_infos_
 
const Data_Namespace::MemoryLevel memory_level_
 
JoinHashTableInterface::HashType layout_
 
size_t entry_count_
 
size_t emitted_keys_count_
 
Executorexecutor_
 
ColumnCacheMapcolumn_cache_
 
std::shared_ptr< std::vector< int8_t > > cpu_hash_table_buff_
 
std::mutex cpu_hash_table_buff_mutex_
 
std::vector< InnerOuterinner_outer_pairs_
 
const Catalog_Namespace::Catalogcatalog_
 
const int device_count_
 

Static Protected Attributes

static std::vector< std::pair< HashTableCacheKey, HashTableCacheValue > > hash_table_cache_
 
static std::mutex hash_table_cache_mutex_
 
static const int ERR_FAILED_TO_FETCH_COLUMN {-3}
 
static const int ERR_FAILED_TO_JOIN_ON_VIRTUAL_COLUMN {-4}
 

Private Member Functions

size_t getKeyBufferSize () const noexcept
 
size_t getComponentBufferSize () const noexcept
 

Additional Inherited Members

- Public Types inherited from JoinHashTableInterface
enum  HashType : int { HashType::OneToOne, HashType::OneToMany, HashType::ManyToMany }
 

Detailed Description

Definition at line 43 of file BaselineJoinHashTable.h.

Constructor & Destructor Documentation

◆ ~BaselineJoinHashTable()

virtual BaselineJoinHashTable::~BaselineJoinHashTable ( )
inlinevirtual

Definition at line 123 of file BaselineJoinHashTable.h.

References BaselineJoinHashTable(), getComponentBufferSize(), getInnerTableId(), getKeyBufferSize(), and reifyWithLayout().

123 {}
+ Here is the call graph for this function:

◆ BaselineJoinHashTable()

BaselineJoinHashTable::BaselineJoinHashTable ( const std::shared_ptr< Analyzer::BinOper condition,
const std::vector< InputTableInfo > &  query_infos,
const Data_Namespace::MemoryLevel  memory_level,
const HashType  preferred_hash_type,
const size_t  entry_count,
ColumnCacheMap column_cache,
Executor executor,
const std::vector< InnerOuter > &  inner_outer_pairs,
const int  device_count 
)
protected

Definition at line 109 of file BaselineJoinHashTable.cpp.

References CHECK_GT, and device_count_.

Referenced by getInstance(), and ~BaselineJoinHashTable().

119  : condition_(condition)
120  , query_infos_(query_infos)
121  , memory_level_(memory_level)
122  , layout_(preferred_hash_type)
123  , entry_count_(entry_count)
125  , executor_(executor)
126  , column_cache_(column_cache)
127  , inner_outer_pairs_(inner_outer_pairs)
128  , catalog_(executor->getCatalog())
129  , device_count_(device_count)
130 #ifdef HAVE_CUDA
131  , block_size_(memory_level == Data_Namespace::MemoryLevel::GPU_LEVEL
132  ? executor->blockSize()
133  : 0)
134  , grid_size_(memory_level == Data_Namespace::MemoryLevel::GPU_LEVEL
135  ? executor->gridSize()
136  : 0) {
138 }
139 #else
140 {
142 }
JoinHashTableInterface::HashType layout_
#define CHECK_GT(x, y)
Definition: Logger.h:209
const std::vector< InputTableInfo > & query_infos_
std::vector< InnerOuter > inner_outer_pairs_
ColumnCacheMap & column_cache_
const Catalog_Namespace::Catalog * catalog_
const Data_Namespace::MemoryLevel memory_level_
const std::shared_ptr< Analyzer::BinOper > condition_
+ Here is the caller graph for this function:

Member Function Documentation

◆ approximateTupleCount()

std::pair< size_t, size_t > BaselineJoinHashTable::approximateTupleCount ( const std::vector< ColumnsForDevice > &  columns_per_device) const
protectedvirtual

Reimplemented in OverlapsJoinHashTable.

Definition at line 404 of file BaselineJoinHashTable.cpp.

References ThrustAllocator::allocateScopedBuffer(), approximate_distinct_tuples(), approximate_distinct_tuples_on_device(), Bitmap, catalog_, CHECK, CHECK_EQ, condition_, copy_from_gpu(), CPU, Data_Namespace::CPU_LEVEL, cpu_threads(), device_count_, getApproximateTupleCountFromCache(), getCompositeKeyInfo(), Catalog_Namespace::Catalog::getDataMgr(), getEffectiveMemoryLevel(), GPU, Data_Namespace::GPU_LEVEL, hll_size(), hll_unify(), inner_outer_pairs_, transfer_flat_object_to_gpu(), transfer_vector_of_flat_objects_to_gpu(), and UNREACHABLE.

Referenced by reifyWithLayout().

405  {
406  const auto effective_memory_level = getEffectiveMemoryLevel(inner_outer_pairs_);
407  CountDistinctDescriptor count_distinct_desc{
409  0,
410  11,
411  true,
412  effective_memory_level == Data_Namespace::MemoryLevel::GPU_LEVEL
415  1};
416  const auto padded_size_bytes = count_distinct_desc.bitmapPaddedSizeBytes();
417 
418  CHECK(!columns_per_device.empty() && !columns_per_device.front().join_columns.empty());
419 
420  if (effective_memory_level == Data_Namespace::MemoryLevel::CPU_LEVEL) {
421  const auto composite_key_info = getCompositeKeyInfo();
422  HashTableCacheKey cache_key{columns_per_device.front().join_columns.front().num_elems,
423  composite_key_info.cache_key_chunks,
424  condition_->get_optype()};
425  const auto cached_count_info = getApproximateTupleCountFromCache(cache_key);
426  if (cached_count_info.first >= 0) {
427  return std::make_pair(cached_count_info.first, cached_count_info.second);
428  }
429  int thread_count = cpu_threads();
430  std::vector<uint8_t> hll_buffer_all_cpus(thread_count * padded_size_bytes);
431  auto hll_result = &hll_buffer_all_cpus[0];
432 
433  approximate_distinct_tuples(hll_result,
434  count_distinct_desc.bitmap_sz_bits,
435  padded_size_bytes,
436  columns_per_device.front().join_columns,
437  columns_per_device.front().join_column_types,
438  thread_count);
439  for (int i = 1; i < thread_count; ++i) {
440  hll_unify(hll_result,
441  hll_result + i * padded_size_bytes,
442  1 << count_distinct_desc.bitmap_sz_bits);
443  }
444  return std::make_pair(hll_size(hll_result, count_distinct_desc.bitmap_sz_bits), 0);
445  }
446 #ifdef HAVE_CUDA
447  auto& data_mgr = catalog_->getDataMgr();
448  std::vector<std::vector<uint8_t>> host_hll_buffers(device_count_);
449  for (auto& host_hll_buffer : host_hll_buffers) {
450  host_hll_buffer.resize(count_distinct_desc.bitmapPaddedSizeBytes());
451  }
452  std::vector<std::future<void>> approximate_distinct_device_threads;
453  for (int device_id = 0; device_id < device_count_; ++device_id) {
454  approximate_distinct_device_threads.emplace_back(std::async(
455  std::launch::async,
456  [device_id,
457  &columns_per_device,
458  &count_distinct_desc,
459  &data_mgr,
460  &host_hll_buffers,
461  this] {
462  ThrustAllocator allocator(&data_mgr, device_id);
463  auto device_hll_buffer =
464  allocator.allocateScopedBuffer(count_distinct_desc.bitmapPaddedSizeBytes());
465  data_mgr.getCudaMgr()->zeroDeviceMem(
466  device_hll_buffer, count_distinct_desc.bitmapPaddedSizeBytes(), device_id);
467  const auto& columns_for_device = columns_per_device[device_id];
468  auto join_columns_gpu = transfer_vector_of_flat_objects_to_gpu(
469  columns_for_device.join_columns, allocator);
470  auto join_column_types_gpu = transfer_vector_of_flat_objects_to_gpu(
471  columns_for_device.join_column_types, allocator);
472  const auto key_handler =
473  GenericKeyHandler(columns_for_device.join_columns.size(),
474  true,
475  join_columns_gpu,
476  join_column_types_gpu,
477  nullptr,
478  nullptr);
479  const auto key_handler_gpu =
480  transfer_flat_object_to_gpu(key_handler, allocator);
482  reinterpret_cast<uint8_t*>(device_hll_buffer),
483  count_distinct_desc.bitmap_sz_bits,
484  key_handler_gpu,
485  columns_for_device.join_columns[0].num_elems,
486  block_size_,
487  grid_size_);
488 
489  auto& host_hll_buffer = host_hll_buffers[device_id];
490  copy_from_gpu(&data_mgr,
491  &host_hll_buffer[0],
492  reinterpret_cast<CUdeviceptr>(device_hll_buffer),
493  count_distinct_desc.bitmapPaddedSizeBytes(),
494  device_id);
495  }));
496  }
497  for (auto& child : approximate_distinct_device_threads) {
498  child.get();
499  }
500  CHECK_EQ(Data_Namespace::MemoryLevel::GPU_LEVEL, effective_memory_level);
501  auto& result_hll_buffer = host_hll_buffers.front();
502  auto hll_result = reinterpret_cast<int32_t*>(&result_hll_buffer[0]);
503  for (int device_id = 1; device_id < device_count_; ++device_id) {
504  auto& host_hll_buffer = host_hll_buffers[device_id];
505  hll_unify(hll_result,
506  reinterpret_cast<int32_t*>(&host_hll_buffer[0]),
507  1 << count_distinct_desc.bitmap_sz_bits);
508  }
509  return std::make_pair(hll_size(hll_result, count_distinct_desc.bitmap_sz_bits), 0);
510 #else
511  UNREACHABLE();
512  return {0, 0};
513 #endif // HAVE_CUDA
514 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
std::pair< ssize_t, size_t > getApproximateTupleCountFromCache(const HashTableCacheKey &) const
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:183
void hll_unify(T1 *lhs, T2 *rhs, const size_t m)
Definition: HyperLogLog.h:109
#define UNREACHABLE()
Definition: Logger.h:241
CompositeKeyInfo getCompositeKeyInfo() const
size_t hll_size(const T *M, const size_t bitmap_sz_bits)
Definition: HyperLogLog.h:90
T * transfer_vector_of_flat_objects_to_gpu(const std::vector< T > &vec, ThrustAllocator &allocator)
std::vector< InnerOuter > inner_outer_pairs_
void approximate_distinct_tuples(uint8_t *hll_buffer_all_cpus, const uint32_t b, const size_t padded_size_bytes, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const int thread_count)
void copy_from_gpu(Data_Namespace::DataMgr *data_mgr, void *dst, const CUdeviceptr src, const size_t num_bytes, const int device_id)
void approximate_distinct_tuples_on_device(uint8_t *hll_buffer, const uint32_t b, const GenericKeyHandler *key_handler, const size_t num_elems, const size_t block_size_x, const size_t grid_size_x)
const Catalog_Namespace::Catalog * catalog_
#define CHECK(condition)
Definition: Logger.h:197
Data_Namespace::MemoryLevel getEffectiveMemoryLevel(const std::vector< InnerOuter > &inner_outer_pairs) const
T * transfer_flat_object_to_gpu(const T &object, ThrustAllocator &allocator)
int cpu_threads()
Definition: thread_count.h:25
const std::shared_ptr< Analyzer::BinOper > condition_
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ checkHashJoinReplicationConstraint()

void BaselineJoinHashTable::checkHashJoinReplicationConstraint ( const int  table_id) const
protected

Definition at line 1189 of file BaselineJoinHashTable.cpp.

References catalog_, CHECK, g_cluster, Catalog_Namespace::Catalog::getMetadataForTable(), shardCount(), and table_is_replicated().

1189  {
1190  if (!g_cluster) {
1191  return;
1192  }
1193  if (table_id >= 0) {
1194  const auto inner_td = catalog_->getMetadataForTable(table_id);
1195  CHECK(inner_td);
1196  const auto shard_count = shardCount();
1197  if (!shard_count && !table_is_replicated(inner_td)) {
1198  throw TableMustBeReplicated(inner_td->tableName);
1199  }
1200  }
1201 }
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
bool table_is_replicated(const TableDescriptor *td)
const Catalog_Namespace::Catalog * catalog_
#define CHECK(condition)
Definition: Logger.h:197
bool g_cluster
+ Here is the call graph for this function:

◆ codegenKey()

llvm::Value * BaselineJoinHashTable::codegenKey ( const CompilationOptions co)
protectedvirtual

Reimplemented in OverlapsJoinHashTable.

Definition at line 1118 of file BaselineJoinHashTable.cpp.

References CHECK, CHECK_EQ, CodeGenerator::codegen(), executor_, get_int_type(), getKeyComponentCount(), getKeyComponentWidth(), inner_outer_pairs_, LL_BUILDER, LL_CONTEXT, and LL_INT.

Referenced by codegenMatchingSet(), and codegenSlot().

1118  {
1119  const auto key_component_width = getKeyComponentWidth();
1120  CHECK(key_component_width == 4 || key_component_width == 8);
1121  const auto key_size_lv = LL_INT(getKeyComponentCount() * key_component_width);
1122  llvm::Value* key_buff_lv{nullptr};
1123  switch (key_component_width) {
1124  case 4:
1125  key_buff_lv =
1126  LL_BUILDER.CreateAlloca(llvm::Type::getInt32Ty(LL_CONTEXT), key_size_lv);
1127  break;
1128  case 8:
1129  key_buff_lv =
1130  LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT), key_size_lv);
1131  break;
1132  default:
1133  CHECK(false);
1134  }
1135 
1136  CodeGenerator code_generator(executor_);
1137  for (size_t i = 0; i < getKeyComponentCount(); ++i) {
1138  const auto key_comp_dest_lv = LL_BUILDER.CreateGEP(key_buff_lv, LL_INT(i));
1139  const auto& inner_outer_pair = inner_outer_pairs_[i];
1140  const auto outer_col = inner_outer_pair.second;
1141  const auto col_lvs = code_generator.codegen(outer_col, true, co);
1142  CHECK_EQ(size_t(1), col_lvs.size());
1143  const auto col_lv = LL_BUILDER.CreateSExt(
1144  col_lvs.front(), get_int_type(key_component_width * 8, LL_CONTEXT));
1145  LL_BUILDER.CreateStore(col_lv, key_comp_dest_lv);
1146  }
1147  return key_buff_lv;
1148 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
virtual size_t getKeyComponentCount() const
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
std::vector< InnerOuter > inner_outer_pairs_
#define LL_INT(v)
#define LL_CONTEXT
#define LL_BUILDER
virtual size_t getKeyComponentWidth() const
#define CHECK(condition)
Definition: Logger.h:197
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ codegenMatchingSet()

HashJoinMatchingSet BaselineJoinHashTable::codegenMatchingSet ( const CompilationOptions co,
const size_t  index 
)
overridevirtual

Implements JoinHashTableInterface.

Reimplemented in OverlapsJoinHashTable.

Definition at line 1042 of file BaselineJoinHashTable.cpp.

References CHECK, JoinHashTable::codegenHashTableLoad(), codegenKey(), JoinHashTable::codegenMatchingSet(), entry_count_, executor_, get_int_type(), getComponentBufferSize(), getKeyComponentCount(), getKeyComponentWidth(), layout_, LL_BUILDER, LL_CONTEXT, LL_INT, offsetBufferOff(), JoinHashTableInterface::OneToMany, and to_string().

Referenced by OverlapsJoinHashTable::codegenMatchingSet().

1044  {
1045  const auto key_component_width = getKeyComponentWidth();
1046  CHECK(key_component_width == 4 || key_component_width == 8);
1047  auto key_buff_lv = codegenKey(co);
1049  auto hash_ptr = JoinHashTable::codegenHashTableLoad(index, executor_);
1050  const auto composite_dict_ptr_type =
1051  llvm::Type::getIntNPtrTy(LL_CONTEXT, key_component_width * 8);
1052  const auto composite_key_dict =
1053  hash_ptr->getType()->isPointerTy()
1054  ? LL_BUILDER.CreatePointerCast(hash_ptr, composite_dict_ptr_type)
1055  : LL_BUILDER.CreateIntToPtr(hash_ptr, composite_dict_ptr_type);
1056  const auto key_component_count = getKeyComponentCount();
1057  const auto key = executor_->cgen_state_->emitExternalCall(
1058  "get_composite_key_index_" + std::to_string(key_component_width * 8),
1059  get_int_type(64, LL_CONTEXT),
1060  {key_buff_lv,
1061  LL_INT(key_component_count),
1062  composite_key_dict,
1063  LL_INT(entry_count_)});
1064  auto one_to_many_ptr = hash_ptr;
1065  if (one_to_many_ptr->getType()->isPointerTy()) {
1066  one_to_many_ptr =
1067  LL_BUILDER.CreatePtrToInt(hash_ptr, llvm::Type::getInt64Ty(LL_CONTEXT));
1068  } else {
1069  CHECK(one_to_many_ptr->getType()->isIntegerTy(64));
1070  }
1071  const auto composite_key_dict_size = offsetBufferOff();
1072  one_to_many_ptr =
1073  LL_BUILDER.CreateAdd(one_to_many_ptr, LL_INT(composite_key_dict_size));
1075  {one_to_many_ptr, key, LL_INT(int64_t(0)), LL_INT(entry_count_ - 1)},
1076  false,
1077  false,
1078  false,
1080  executor_);
1081 }
virtual size_t getKeyComponentCount() const
size_t getComponentBufferSize() const noexcept
JoinHashTableInterface::HashType layout_
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
std::string to_string(char const *&&v)
virtual llvm::Value * codegenKey(const CompilationOptions &)
#define LL_INT(v)
#define LL_CONTEXT
#define LL_BUILDER
static llvm::Value * codegenHashTableLoad(const size_t table_idx, Executor *executor)
virtual size_t getKeyComponentWidth() const
size_t offsetBufferOff() const noexcept override
HashJoinMatchingSet codegenMatchingSet(const CompilationOptions &, const size_t) override
#define CHECK(condition)
Definition: Logger.h:197
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ codegenSlot()

llvm::Value * BaselineJoinHashTable::codegenSlot ( const CompilationOptions co,
const size_t  index 
)
overridevirtual

Implements JoinHashTableInterface.

Definition at line 1026 of file BaselineJoinHashTable.cpp.

References CHECK, codegenKey(), entry_count_, executor_, get_int_type(), getHashType(), getKeyComponentCount(), getKeyComponentWidth(), hashPtr(), LL_BUILDER, LL_CONTEXT, LL_INT, JoinHashTableInterface::OneToOne, and to_string().

1027  {
1029  const auto key_component_width = getKeyComponentWidth();
1030  CHECK(key_component_width == 4 || key_component_width == 8);
1031  auto key_buff_lv = codegenKey(co);
1032  const auto hash_ptr = hashPtr(index);
1033  const auto key_ptr_lv =
1034  LL_BUILDER.CreatePointerCast(key_buff_lv, llvm::Type::getInt8PtrTy(LL_CONTEXT));
1035  const auto key_size_lv = LL_INT(getKeyComponentCount() * key_component_width);
1036  return executor_->cgen_state_->emitExternalCall(
1037  "baseline_hash_join_idx_" + std::to_string(key_component_width * 8),
1038  get_int_type(64, LL_CONTEXT),
1039  {hash_ptr, key_ptr_lv, key_size_lv, LL_INT(entry_count_)});
1040 }
virtual size_t getKeyComponentCount() const
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
std::string to_string(char const *&&v)
virtual llvm::Value * codegenKey(const CompilationOptions &)
#define LL_INT(v)
#define LL_CONTEXT
#define LL_BUILDER
virtual size_t getKeyComponentWidth() const
llvm::Value * hashPtr(const size_t index)
#define CHECK(condition)
Definition: Logger.h:197
JoinHashTableInterface::HashType getHashType() const noexcept override
+ Here is the call graph for this function:

◆ countBufferOff()

size_t BaselineJoinHashTable::countBufferOff ( ) const
overridevirtualnoexcept

Implements JoinHashTableInterface.

Definition at line 1087 of file BaselineJoinHashTable.cpp.

References getComponentBufferSize(), getKeyBufferSize(), layout_, layoutRequiresAdditionalBuffers(), and offsetBufferOff().

Referenced by getDeviceCount(), payloadBufferOff(), toSet(), and toString().

1087  {
1090  } else {
1091  return getKeyBufferSize();
1092  }
1093 }
size_t getComponentBufferSize() const noexcept
bool layoutRequiresAdditionalBuffers(JoinHashTableInterface::HashType layout) const noexcept override
size_t getKeyBufferSize() const noexcept
JoinHashTableInterface::HashType layout_
size_t offsetBufferOff() const noexcept override
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ fetchColumnsForDevice()

BaselineJoinHashTable::ColumnsForDevice BaselineJoinHashTable::fetchColumnsForDevice ( const std::vector< Fragmenter_Namespace::FragmentInfo > &  fragments,
const int  device_id,
ThrustAllocator dev_buff_owner 
)
protectedvirtual

Reimplemented in OverlapsJoinHashTable.

Definition at line 516 of file BaselineJoinHashTable.cpp.

References catalog_, column_cache_, executor_, JoinHashTableInterface::fetchJoinColumn(), get_column_descriptor_maybe(), get_join_column_type_kind(), getEffectiveMemoryLevel(), inline_fixed_encoding_null_val(), inner_outer_pairs_, and isBitwiseEq().

Referenced by reifyWithLayout().

519  {
520  const auto effective_memory_level = getEffectiveMemoryLevel(inner_outer_pairs_);
521 
522  std::vector<JoinColumn> join_columns;
523  std::vector<std::shared_ptr<Chunk_NS::Chunk>> chunks_owner;
524  std::vector<JoinColumnTypeInfo> join_column_types;
525  std::vector<JoinBucketInfo> join_bucket_info;
526  std::vector<std::shared_ptr<void>> malloc_owner;
527  for (const auto& inner_outer_pair : inner_outer_pairs_) {
528  const auto inner_col = inner_outer_pair.first;
529  const auto inner_cd = get_column_descriptor_maybe(
530  inner_col->get_column_id(), inner_col->get_table_id(), *catalog_);
531  if (inner_cd && inner_cd->isVirtualCol) {
533  }
534  join_columns.emplace_back(fetchJoinColumn(inner_col,
535  fragments,
536  effective_memory_level,
537  device_id,
538  chunks_owner,
539  dev_buff_owner,
540  malloc_owner,
541  executor_,
542  &column_cache_));
543  const auto& ti = inner_col->get_type_info();
544  join_column_types.emplace_back(JoinColumnTypeInfo{static_cast<size_t>(ti.get_size()),
545  0,
546  0,
548  isBitwiseEq(),
549  0,
551  }
552  return {join_columns, join_column_types, chunks_owner, join_bucket_info, malloc_owner};
553 }
std::vector< InnerOuter > inner_outer_pairs_
const ColumnDescriptor * get_column_descriptor_maybe(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
Definition: Execute.h:177
ColumnCacheMap & column_cache_
JoinColumn fetchJoinColumn(const Analyzer::ColumnVar *hash_col, const std::vector< Fragmenter_Namespace::FragmentInfo > &fragment_info, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks_owner, ThrustAllocator &dev_buff_owner, std::vector< std::shared_ptr< void >> &malloc_owner, Executor *executor, ColumnCacheMap *column_cache)
const Catalog_Namespace::Catalog * catalog_
ColumnType get_join_column_type_kind(const SQLTypeInfo &ti)
Data_Namespace::MemoryLevel getEffectiveMemoryLevel(const std::vector< InnerOuter > &inner_outer_pairs) const
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ findHashTableOnCpuInCache()

const BaselineJoinHashTable::HashTableCacheValue * BaselineJoinHashTable::findHashTableOnCpuInCache ( const HashTableCacheKey key)
protected

Definition at line 1204 of file BaselineJoinHashTable.cpp.

References hash_table_cache_, and hash_table_cache_mutex_.

1204  {
1205  std::lock_guard<std::mutex> hash_table_cache_lock(hash_table_cache_mutex_);
1206  for (const auto& kv : hash_table_cache_) {
1207  if (kv.first == key) {
1208  return &kv.second;
1209  }
1210  }
1211  return nullptr;
1212 }
static std::mutex hash_table_cache_mutex_
static std::vector< std::pair< HashTableCacheKey, HashTableCacheValue > > hash_table_cache_

◆ freeHashBufferCpuMemory()

void BaselineJoinHashTable::freeHashBufferCpuMemory ( )
protected

Definition at line 1301 of file BaselineJoinHashTable.cpp.

References cpu_hash_table_buff_, HashTypeCache::hash_type_cache_, and HashTypeCache::hash_type_cache_mutex_.

Referenced by freeHashBufferMemory(), and BaselineJoinHashTable::HashTableCacheKey::operator<().

1301  {
1302  cpu_hash_table_buff_.reset();
1303 }
std::shared_ptr< std::vector< int8_t > > cpu_hash_table_buff_
+ Here is the caller graph for this function:

◆ freeHashBufferGpuMemory()

void BaselineJoinHashTable::freeHashBufferGpuMemory ( )
protected

Definition at line 1287 of file BaselineJoinHashTable.cpp.

References catalog_, CHECK, CudaAllocator::freeGpuAbstractBuffer(), and Catalog_Namespace::Catalog::getDataMgr().

Referenced by freeHashBufferMemory(), and BaselineJoinHashTable::HashTableCacheKey::operator<().

1287  {
1288 #ifdef HAVE_CUDA
1289  auto& data_mgr = catalog_->getDataMgr();
1290  for (auto& buf : gpu_hash_table_buff_) {
1291  if (buf) {
1292  CudaAllocator::freeGpuAbstractBuffer(&data_mgr, buf);
1293  buf = nullptr;
1294  }
1295  }
1296 #else
1297  CHECK(false);
1298 #endif // HAVE_CUDA
1299 }
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:183
static void freeGpuAbstractBuffer(Data_Namespace::DataMgr *data_mgr, Data_Namespace::AbstractBuffer *ab)
const Catalog_Namespace::Catalog * catalog_
#define CHECK(condition)
Definition: Logger.h:197
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ freeHashBufferMemory()

void BaselineJoinHashTable::freeHashBufferMemory ( )
protected

Definition at line 1280 of file BaselineJoinHashTable.cpp.

References freeHashBufferCpuMemory(), and freeHashBufferGpuMemory().

Referenced by BaselineJoinHashTable::HashTableCacheKey::operator<(), and reify().

1280  {
1281 #ifdef HAVE_CUDA
1283 #endif
1285 }
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ getApproximateTupleCountFromCache()

std::pair< ssize_t, size_t > BaselineJoinHashTable::getApproximateTupleCountFromCache ( const HashTableCacheKey key) const
protected

Definition at line 1257 of file BaselineJoinHashTable.cpp.

References CHECK_GE, BaselineJoinHashTable::HashTableCacheKey::chunk_keys, hash_table_cache_, and hash_table_cache_mutex_.

Referenced by OverlapsJoinHashTable::approximateTupleCount(), approximateTupleCount(), and BaselineJoinHashTable::HashTableCacheKey::operator<().

1258  {
1259  for (auto chunk_key : key.chunk_keys) {
1260  CHECK_GE(chunk_key.size(), size_t(2));
1261  if (chunk_key[1] < 0) {
1262  return std::make_pair(-1, 0);
1263  ;
1264  }
1265  }
1266 
1267  std::lock_guard<std::mutex> hash_table_cache_lock(hash_table_cache_mutex_);
1268  for (const auto& kv : hash_table_cache_) {
1269  if (kv.first == key) {
1270  return std::make_pair(kv.second.entry_count / 2, kv.second.emitted_keys_count);
1271  }
1272  }
1273  return std::make_pair(-1, 0);
1274 }
#define CHECK_GE(x, y)
Definition: Logger.h:210
static std::mutex hash_table_cache_mutex_
static std::vector< std::pair< HashTableCacheKey, HashTableCacheValue > > hash_table_cache_
+ Here is the caller graph for this function:

◆ getCachedHashTable()

static const std::shared_ptr<std::vector<int8_t> >& BaselineJoinHashTable::getCachedHashTable ( size_t  idx)
inlinestatic

Definition at line 104 of file BaselineJoinHashTable.h.

References CHECK, CHECK_LT, hash_table_cache_, and hash_table_cache_mutex_.

Referenced by QueryRunner::QueryRunner::getCachedBaselineHashTable().

104  {
105  std::lock_guard<std::mutex> guard(hash_table_cache_mutex_);
106  CHECK(!hash_table_cache_.empty());
107  CHECK_LT(idx, hash_table_cache_.size());
108  return hash_table_cache_.at(idx).second.buffer;
109  }
#define CHECK_LT(x, y)
Definition: Logger.h:207
static std::mutex hash_table_cache_mutex_
#define CHECK(condition)
Definition: Logger.h:197
static std::vector< std::pair< HashTableCacheKey, HashTableCacheValue > > hash_table_cache_
+ Here is the caller graph for this function:

◆ getComponentBufferSize()

size_t BaselineJoinHashTable::getComponentBufferSize ( ) const
privatenoexcept

Definition at line 1114 of file BaselineJoinHashTable.cpp.

References entry_count_.

Referenced by codegenMatchingSet(), countBufferOff(), payloadBufferOff(), and ~BaselineJoinHashTable().

1114  {
1115  return entry_count_ * sizeof(int32_t);
1116 }
+ Here is the caller graph for this function:

◆ getCompositeKeyInfo()

BaselineJoinHashTable::CompositeKeyInfo BaselineJoinHashTable::getCompositeKeyInfo ( ) const
protected

Definition at line 274 of file BaselineJoinHashTable.cpp.

References catalog_, CHECK, Catalog_Namespace::DBMetadata::dbId, executor_, Catalog_Namespace::Catalog::getCurrentDB(), inner_outer_pairs_, and kENCODING_DICT.

Referenced by OverlapsJoinHashTable::approximateTupleCount(), approximateTupleCount(), OverlapsJoinHashTable::initHashTableOnCpu(), initHashTableOnCpu(), reify(), and OverlapsJoinHashTable::reifyWithLayout().

275  {
276  std::vector<const void*> sd_inner_proxy_per_key;
277  std::vector<const void*> sd_outer_proxy_per_key;
278  std::vector<ChunkKey> cache_key_chunks; // used for the cache key
279  for (const auto& inner_outer_pair : inner_outer_pairs_) {
280  const auto inner_col = inner_outer_pair.first;
281  const auto outer_col = inner_outer_pair.second;
282  const auto& inner_ti = inner_col->get_type_info();
283  const auto& outer_ti = outer_col->get_type_info();
284  ChunkKey cache_key_chunks_for_column{catalog_->getCurrentDB().dbId,
285  inner_col->get_table_id(),
286  inner_col->get_column_id()};
287  if (inner_ti.is_string()) {
288  CHECK(outer_ti.is_string());
289  CHECK(inner_ti.get_compression() == kENCODING_DICT &&
290  outer_ti.get_compression() == kENCODING_DICT);
291  const auto sd_inner_proxy = executor_->getStringDictionaryProxy(
292  inner_ti.get_comp_param(), executor_->getRowSetMemoryOwner(), true);
293  const auto sd_outer_proxy = executor_->getStringDictionaryProxy(
294  outer_ti.get_comp_param(), executor_->getRowSetMemoryOwner(), true);
295  CHECK(sd_inner_proxy && sd_outer_proxy);
296  sd_inner_proxy_per_key.push_back(sd_inner_proxy);
297  sd_outer_proxy_per_key.push_back(sd_outer_proxy);
298  cache_key_chunks_for_column.push_back(sd_outer_proxy->getGeneration());
299  } else {
300  sd_inner_proxy_per_key.emplace_back();
301  sd_outer_proxy_per_key.emplace_back();
302  }
303  cache_key_chunks.push_back(cache_key_chunks_for_column);
304  }
305  return {sd_inner_proxy_per_key, sd_outer_proxy_per_key, cache_key_chunks};
306 }
std::vector< InnerOuter > inner_outer_pairs_
const DBMetadata & getCurrentDB() const
Definition: Catalog.h:182
const Catalog_Namespace::Catalog * catalog_
#define CHECK(condition)
Definition: Logger.h:197
std::vector< int > ChunkKey
Definition: types.h:35
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ getDeviceCount()

int BaselineJoinHashTable::getDeviceCount ( ) const
inlineoverridevirtualnoexcept

Implements JoinHashTableInterface.

Definition at line 88 of file BaselineJoinHashTable.h.

References countBufferOff(), device_count_, offsetBufferOff(), and payloadBufferOff().

88 { return device_count_; };
+ Here is the call graph for this function:

◆ getEffectiveMemoryLevel()

Data_Namespace::MemoryLevel BaselineJoinHashTable::getEffectiveMemoryLevel ( const std::vector< InnerOuter > &  inner_outer_pairs) const
protected

Definition at line 605 of file BaselineJoinHashTable.cpp.

References Data_Namespace::CPU_LEVEL, executor_, memory_level_, and needs_dictionary_translation().

Referenced by OverlapsJoinHashTable::approximateTupleCount(), approximateTupleCount(), OverlapsJoinHashTable::computeBucketSizes(), OverlapsJoinHashTable::fetchColumnsForDevice(), fetchColumnsForDevice(), and reifyForDevice().

606  {
607  for (const auto& inner_outer_pair : inner_outer_pairs) {
609  inner_outer_pair.first, inner_outer_pair.second, executor_)) {
611  }
612  }
613  return memory_level_;
614 }
bool needs_dictionary_translation(const Analyzer::ColumnVar *inner_col, const Analyzer::Expr *outer_col_expr, const Executor *executor)
const Data_Namespace::MemoryLevel memory_level_
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ getEntryCntCachedHashTable()

static size_t BaselineJoinHashTable::getEntryCntCachedHashTable ( size_t  idx)
inlinestatic

Definition at line 111 of file BaselineJoinHashTable.h.

References CHECK, CHECK_LT, hash_table_cache_, and hash_table_cache_mutex_.

Referenced by QueryRunner::QueryRunner::getEntryCntCachedBaselineHashTable().

111  {
112  std::lock_guard<std::mutex> guard(hash_table_cache_mutex_);
113  CHECK(!hash_table_cache_.empty());
114  CHECK_LT(idx, hash_table_cache_.size());
115  return hash_table_cache_.at(idx).second.entry_count;
116  }
#define CHECK_LT(x, y)
Definition: Logger.h:207
static std::mutex hash_table_cache_mutex_
#define CHECK(condition)
Definition: Logger.h:197
static std::vector< std::pair< HashTableCacheKey, HashTableCacheValue > > hash_table_cache_
+ Here is the caller graph for this function:

◆ getHashType()

JoinHashTableInterface::HashType BaselineJoinHashTable::getHashType ( ) const
overridevirtualnoexcept

Implements JoinHashTableInterface.

Definition at line 1178 of file BaselineJoinHashTable.cpp.

References layout_.

Referenced by OverlapsJoinHashTable::codegenMatchingSet(), and codegenSlot().

1178  {
1179  return layout_;
1180 }
JoinHashTableInterface::HashType layout_
+ Here is the caller graph for this function:

◆ getInnerTableId() [1/2]

int BaselineJoinHashTable::getInnerTableId ( ) const
overridevirtualnoexcept

Implements JoinHashTableInterface.

Definition at line 1163 of file BaselineJoinHashTable.cpp.

References CHECK, and inner_outer_pairs_.

Referenced by getInstance(), OverlapsJoinHashTable::getInstance(), OverlapsJoinHashTable::initHashTableOnCpu(), initHashTableOnCpu(), OverlapsJoinHashTable::reifyWithLayout(), reifyWithLayout(), and ~BaselineJoinHashTable().

1163  {
1164  try {
1166  } catch (...) {
1167  CHECK(false);
1168  }
1169  return 0;
1170 }
std::vector< InnerOuter > inner_outer_pairs_
int getInnerTableId() const noexcept override
#define CHECK(condition)
Definition: Logger.h:197
+ Here is the caller graph for this function:

◆ getInnerTableId() [2/2]

int BaselineJoinHashTable::getInnerTableId ( const std::vector< InnerOuter > &  inner_outer_pairs)
staticprotected

Definition at line 1182 of file BaselineJoinHashTable.cpp.

References CHECK.

1183  {
1184  CHECK(!inner_outer_pairs.empty());
1185  const auto first_inner_col = inner_outer_pairs.front().first;
1186  return first_inner_col->get_table_id();
1187 }
#define CHECK(condition)
Definition: Logger.h:197

◆ getInnerTableRteIdx()

int BaselineJoinHashTable::getInnerTableRteIdx ( ) const
overridevirtualnoexcept

Implements JoinHashTableInterface.

Definition at line 1172 of file BaselineJoinHashTable.cpp.

References CHECK, and inner_outer_pairs_.

1172  {
1173  CHECK(!inner_outer_pairs_.empty());
1174  const auto first_inner_col = inner_outer_pairs_.front().first;
1175  return first_inner_col->get_rte_idx();
1176 }
std::vector< InnerOuter > inner_outer_pairs_
#define CHECK(condition)
Definition: Logger.h:197

◆ getInstance()

std::shared_ptr< BaselineJoinHashTable > BaselineJoinHashTable::getInstance ( const std::shared_ptr< Analyzer::BinOper condition,
const std::vector< InputTableInfo > &  query_infos,
const Data_Namespace::MemoryLevel  memory_level,
const HashType  preferred_hash_type,
const int  device_count,
ColumnCacheMap column_cache,
Executor executor 
)
static

Make hash table from an in-flight SQL query's parse tree etc.

Definition at line 33 of file BaselineJoinHashTable.cpp.

References BaselineJoinHashTable(), get_entries_per_device(), get_inner_query_info(), JoinHashTableInterface::getHashTypeString(), getInnerTableId(), Fragmenter_Namespace::TableInfo::getNumTuplesUpperBound(), getShardCountForCondition(), Data_Namespace::GPU_LEVEL, InputTableInfo::info, normalize_column_pairs(), VLOG, and VLOGGING.

Referenced by JoinHashTableInterface::getInstance().

40  {
41  decltype(std::chrono::steady_clock::now()) ts1, ts2;
42 
43  if (VLOGGING(1)) {
44  VLOG(1) << "Building keyed hash table " << getHashTypeString(preferred_hash_type)
45  << " for qual: " << condition->toString();
46  ts1 = std::chrono::steady_clock::now();
47  }
48  auto inner_outer_pairs = normalize_column_pairs(
49  condition.get(), *executor->getCatalog(), executor->getTemporaryTables());
50 
51  const auto& query_info =
52  get_inner_query_info(getInnerTableId(inner_outer_pairs), query_infos).info;
53  const auto total_entries = 2 * query_info.getNumTuplesUpperBound();
54  if (total_entries > static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
55  throw TooManyHashEntries();
56  }
57  const auto shard_count = memory_level == Data_Namespace::GPU_LEVEL
59  condition.get(), executor, inner_outer_pairs)
60  : 0;
61  const auto entries_per_device =
62  get_entries_per_device(total_entries, shard_count, device_count, memory_level);
63  auto join_hash_table = std::shared_ptr<BaselineJoinHashTable>(
64  new BaselineJoinHashTable(condition,
65  query_infos,
66  memory_level,
67  preferred_hash_type,
68  entries_per_device,
69  column_cache,
70  executor,
71  inner_outer_pairs,
72  device_count));
73  join_hash_table->checkHashJoinReplicationConstraint(getInnerTableId(inner_outer_pairs));
74  try {
75  join_hash_table->reify();
76  } catch (const TableMustBeReplicated& e) {
77  // Throw a runtime error to abort the query
78  join_hash_table->freeHashBufferMemory();
79  throw std::runtime_error(e.what());
80  } catch (const HashJoinFail& e) {
81  // HashJoinFail exceptions log an error and trigger a retry with a join loop (if
82  // possible)
83  join_hash_table->freeHashBufferMemory();
84  throw HashJoinFail(std::string("Could not build a 1-to-1 correspondence for columns "
85  "involved in equijoin | ") +
86  e.what());
87  } catch (const ColumnarConversionNotSupported& e) {
88  throw HashJoinFail(std::string("Could not build hash tables for equijoin | ") +
89  e.what());
90  } catch (const OutOfMemory& e) {
91  throw HashJoinFail(
92  std::string("Ran out of memory while building hash tables for equijoin | ") +
93  e.what());
94  } catch (const std::exception& e) {
95  throw std::runtime_error(
96  std::string("Fatal error while attempting to build hash tables for join: ") +
97  e.what());
98  }
99  if (VLOGGING(1)) {
100  ts2 = std::chrono::steady_clock::now();
101  VLOG(1) << "Built keyed hash table "
102  << getHashTypeString(join_hash_table->getHashType()) << " in "
103  << std::chrono::duration_cast<std::chrono::milliseconds>(ts2 - ts1).count()
104  << " ms";
105  }
106  return join_hash_table;
107 }
Fragmenter_Namespace::TableInfo info
Definition: InputMetadata.h:35
std::vector< InnerOuter > normalize_column_pairs(const Analyzer::BinOper *condition, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables)
BaselineJoinHashTable(const std::shared_ptr< Analyzer::BinOper > condition, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const size_t entry_count, ColumnCacheMap &column_cache, Executor *executor, const std::vector< InnerOuter > &inner_outer_pairs, const int device_count)
size_t get_entries_per_device(const size_t total_entries, const size_t shard_count, const size_t device_count, const Data_Namespace::MemoryLevel memory_level)
int getInnerTableId() const noexcept override
#define VLOGGING(n)
Definition: Logger.h:195
static std::string getHashTypeString(HashType ht) noexcept
static size_t getShardCountForCondition(const Analyzer::BinOper *condition, const Executor *executor, const std::vector< InnerOuter > &inner_outer_pairs)
const InputTableInfo & get_inner_query_info(const int inner_table_id, const std::vector< InputTableInfo > &query_infos)
#define VLOG(n)
Definition: Logger.h:291
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ getJoinHashBuffer()

int64_t BaselineJoinHashTable::getJoinHashBuffer ( const ExecutorDeviceType  device_type,
const int  device_id 
) const
overridevirtualnoexcept

Implements JoinHashTableInterface.

Definition at line 158 of file BaselineJoinHashTable.cpp.

References CHECK, CHECK_LT, CPU, and cpu_hash_table_buff_.

Referenced by toSet(), and toString().

159  {
160  if (device_type == ExecutorDeviceType::CPU && !cpu_hash_table_buff_) {
161  return 0;
162  }
163 #ifdef HAVE_CUDA
164  CHECK_LT(static_cast<size_t>(device_id), gpu_hash_table_buff_.size());
165  if (device_type == ExecutorDeviceType::CPU) {
166  return reinterpret_cast<int64_t>(&(*cpu_hash_table_buff_)[0]);
167  } else {
168  return gpu_hash_table_buff_[device_id]
169  ? reinterpret_cast<CUdeviceptr>(
170  gpu_hash_table_buff_[device_id]->getMemoryPtr())
171  : reinterpret_cast<CUdeviceptr>(nullptr);
172  }
173 #else
174  CHECK(device_type == ExecutorDeviceType::CPU);
175  return reinterpret_cast<int64_t>(&(*cpu_hash_table_buff_)[0]);
176 #endif
177 }
std::shared_ptr< std::vector< int8_t > > cpu_hash_table_buff_
unsigned long long CUdeviceptr
Definition: nocuda.h:27
#define CHECK_LT(x, y)
Definition: Logger.h:207
#define CHECK(condition)
Definition: Logger.h:197
+ Here is the caller graph for this function:

◆ getJoinHashBufferSize()

size_t BaselineJoinHashTable::getJoinHashBufferSize ( const ExecutorDeviceType  device_type,
const int  device_id 
) const
overridevirtualnoexcept

Implements JoinHashTableInterface.

Definition at line 179 of file BaselineJoinHashTable.cpp.

References CHECK, CHECK_LT, CPU, and cpu_hash_table_buff_.

Referenced by toSet(), and toString().

180  {
181  if (device_type == ExecutorDeviceType::CPU && !cpu_hash_table_buff_) {
182  return 0;
183  }
184 #ifdef HAVE_CUDA
185  CHECK_LT(static_cast<size_t>(device_id), gpu_hash_table_buff_.size());
186  if (device_type == ExecutorDeviceType::CPU) {
187  return cpu_hash_table_buff_->size() *
188  sizeof(decltype(cpu_hash_table_buff_)::element_type::value_type);
189  } else {
190  return gpu_hash_table_buff_[device_id]
191  ? gpu_hash_table_buff_[device_id]->reservedSize()
192  : 0;
193  }
194 #else
195  CHECK(device_type == ExecutorDeviceType::CPU);
196  return cpu_hash_table_buff_->size() *
197  sizeof(decltype(cpu_hash_table_buff_)::element_type::value_type);
198 #endif
199 }
std::shared_ptr< std::vector< int8_t > > cpu_hash_table_buff_
#define CHECK_LT(x, y)
Definition: Logger.h:207
#define CHECK(condition)
Definition: Logger.h:197
+ Here is the caller graph for this function:

◆ getKeyBufferSize()

size_t BaselineJoinHashTable::getKeyBufferSize ( ) const
privatenoexcept

Definition at line 1103 of file BaselineJoinHashTable.cpp.

References CHECK, entry_count_, getKeyComponentCount(), getKeyComponentWidth(), layout_, and layoutRequiresAdditionalBuffers().

Referenced by countBufferOff(), offsetBufferOff(), payloadBufferOff(), and ~BaselineJoinHashTable().

1103  {
1104  const auto key_component_width = getKeyComponentWidth();
1105  CHECK(key_component_width == 4 || key_component_width == 8);
1106  const auto key_component_count = getKeyComponentCount();
1108  return entry_count_ * key_component_count * key_component_width;
1109  } else {
1110  return entry_count_ * (key_component_count + 1) * key_component_width;
1111  }
1112 }
virtual size_t getKeyComponentCount() const
bool layoutRequiresAdditionalBuffers(JoinHashTableInterface::HashType layout) const noexcept override
JoinHashTableInterface::HashType layout_
virtual size_t getKeyComponentWidth() const
#define CHECK(condition)
Definition: Logger.h:197
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ getKeyComponentCount()

size_t BaselineJoinHashTable::getKeyComponentCount ( ) const
protectedvirtual

Reimplemented in OverlapsJoinHashTable.

Definition at line 601 of file BaselineJoinHashTable.cpp.

References inner_outer_pairs_.

Referenced by codegenKey(), codegenMatchingSet(), codegenSlot(), getKeyBufferSize(), initHashTableForDevice(), initHashTableOnCpu(), toSet(), and toString().

601  {
602  return inner_outer_pairs_.size();
603 }
std::vector< InnerOuter > inner_outer_pairs_
+ Here is the caller graph for this function:

◆ getKeyComponentWidth()

size_t BaselineJoinHashTable::getKeyComponentWidth ( ) const
protectedvirtual

Reimplemented in OverlapsJoinHashTable.

Definition at line 589 of file BaselineJoinHashTable.cpp.

References CHECK_EQ, and inner_outer_pairs_.

Referenced by codegenKey(), codegenMatchingSet(), codegenSlot(), getKeyBufferSize(), initHashTableForDevice(), initHashTableOnCpu(), toSet(), and toString().

589  {
590  for (const auto& inner_outer_pair : inner_outer_pairs_) {
591  const auto inner_col = inner_outer_pair.first;
592  const auto& inner_col_ti = inner_col->get_type_info();
593  if (inner_col_ti.get_logical_size() > 4) {
594  CHECK_EQ(8, inner_col_ti.get_logical_size());
595  return 8;
596  }
597  }
598  return 4;
599 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
std::vector< InnerOuter > inner_outer_pairs_
+ Here is the caller graph for this function:

◆ getMemoryLevel()

Data_Namespace::MemoryLevel BaselineJoinHashTable::getMemoryLevel ( ) const
inlineoverridevirtualnoexcept

Implements JoinHashTableInterface.

Definition at line 84 of file BaselineJoinHashTable.h.

References memory_level_.

84  {
85  return memory_level_;
86  };
const Data_Namespace::MemoryLevel memory_level_

◆ getNumberOfCachedHashTables()

static uint64_t BaselineJoinHashTable::getNumberOfCachedHashTables ( )
inlinestatic

Definition at line 118 of file BaselineJoinHashTable.h.

References hash_table_cache_, and hash_table_cache_mutex_.

Referenced by QueryRunner::QueryRunner::getNumberOfCachedBaselineJoinHashTables().

118  {
119  std::lock_guard<std::mutex> guard(hash_table_cache_mutex_);
120  return hash_table_cache_.size();
121  }
static std::mutex hash_table_cache_mutex_
static std::vector< std::pair< HashTableCacheKey, HashTableCacheValue > > hash_table_cache_
+ Here is the caller graph for this function:

◆ getShardCountForCondition()

size_t BaselineJoinHashTable::getShardCountForCondition ( const Analyzer::BinOper condition,
const Executor executor,
const std::vector< InnerOuter > &  inner_outer_pairs 
)
static

Definition at line 145 of file BaselineJoinHashTable.cpp.

References get_shard_count().

Referenced by getInstance(), OverlapsJoinHashTable::getInstance(), shardCount(), and Executor::skipFragmentPair().

148  {
149  for (const auto& inner_outer_pair : inner_outer_pairs) {
150  const auto pair_shard_count = get_shard_count(inner_outer_pair, executor);
151  if (pair_shard_count) {
152  return pair_shard_count;
153  }
154  }
155  return 0;
156 }
size_t get_shard_count(const Analyzer::BinOper *join_condition, const Executor *executor)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ hashPtr()

llvm::Value * BaselineJoinHashTable::hashPtr ( const size_t  index)
protected

Definition at line 1150 of file BaselineJoinHashTable.cpp.

References JoinHashTable::codegenHashTableLoad(), executor_, LL_BUILDER, and LL_CONTEXT.

Referenced by codegenSlot().

1150  {
1151  auto hash_ptr = JoinHashTable::codegenHashTableLoad(index, executor_);
1152  const auto pi8_type = llvm::Type::getInt8PtrTy(LL_CONTEXT);
1153  return hash_ptr->getType()->isPointerTy()
1154  ? LL_BUILDER.CreatePointerCast(hash_ptr, pi8_type)
1155  : LL_BUILDER.CreateIntToPtr(hash_ptr, pi8_type);
1156 }
#define LL_CONTEXT
#define LL_BUILDER
static llvm::Value * codegenHashTableLoad(const size_t table_idx, Executor *executor)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ initHashTableForDevice()

int BaselineJoinHashTable::initHashTableForDevice ( const std::vector< JoinColumn > &  join_columns,
const std::vector< JoinColumnTypeInfo > &  join_column_types,
const std::vector< JoinBucketInfo > &  join_buckets,
const JoinHashTableInterface::HashType  layout,
const Data_Namespace::MemoryLevel  effective_memory_level,
const int  device_id 
)
protected

Definition at line 950 of file BaselineJoinHashTable.cpp.

References CudaAllocator::allocGpuAbstractBuffer(), catalog_, CHECK, CHECK_EQ, copy_to_gpu(), cpu_hash_table_buff_, cpu_hash_table_buff_mutex_, Data_Namespace::CPU_LEVEL, DEBUG_TIMER, emitted_keys_count_, entry_count_, Catalog_Namespace::Catalog::getDataMgr(), getKeyComponentCount(), getKeyComponentWidth(), Data_Namespace::GPU_LEVEL, initHashTableOnCpu(), initHashTableOnGpu(), layoutRequiresAdditionalBuffers(), memory_level_, JoinHashTableInterface::OneToOne, and VLOG.

Referenced by reifyForDevice().

956  {
957  auto timer = DEBUG_TIMER(__func__);
958  const auto key_component_width = getKeyComponentWidth();
959  const auto key_component_count = getKeyComponentCount();
960  int err = 0;
961 #ifdef HAVE_CUDA
962  auto& data_mgr = catalog_->getDataMgr();
964  const auto entry_size =
965  (key_component_count +
966  (layout == JoinHashTableInterface::HashType::OneToOne ? 1 : 0)) *
967  key_component_width;
968  const auto keys_for_all_rows = emitted_keys_count_;
969  const size_t one_to_many_hash_entries = layoutRequiresAdditionalBuffers(layout)
970  ? 2 * entry_count_ + keys_for_all_rows
971  : 0;
972  const size_t hash_table_size =
973  entry_size * entry_count_ + one_to_many_hash_entries * sizeof(int32_t);
974 
975  // We can't allocate more than 2GB contiguous memory on GPU and each entry is 4 bytes.
976  if (hash_table_size > static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
977  throw TooManyHashEntries();
978  }
979 
980  VLOG(1) << "Initializing GPU Hash Table for device " << device_id << " with "
981  << entry_count_ << " hash entries and " << one_to_many_hash_entries
982  << " entries in the one to many buffer";
983  VLOG(1) << "Total hash table size: " << hash_table_size << " Bytes";
984  gpu_hash_table_buff_[device_id] =
985  CudaAllocator::allocGpuAbstractBuffer(&data_mgr, hash_table_size, device_id);
986  }
987 #else
988  CHECK_EQ(Data_Namespace::CPU_LEVEL, effective_memory_level);
989 #endif
990  if (effective_memory_level == Data_Namespace::CPU_LEVEL) {
991  std::lock_guard<std::mutex> cpu_hash_table_buff_lock(cpu_hash_table_buff_mutex_);
992  err = initHashTableOnCpu(join_columns, join_column_types, join_bucket_info, layout);
993  // Transfer the hash table on the GPU if we've only built it on CPU
994  // but the query runs on GPU (join on dictionary encoded columns).
995  // Don't transfer the buffer if there was an error since we'll bail anyway.
996  if (memory_level_ == Data_Namespace::GPU_LEVEL && !err) {
997 #ifdef HAVE_CUDA
998  copy_to_gpu(
999  &data_mgr,
1000  reinterpret_cast<CUdeviceptr>(gpu_hash_table_buff_[device_id]->getMemoryPtr()),
1001  &(*cpu_hash_table_buff_)[0],
1002  cpu_hash_table_buff_->size() * sizeof((*cpu_hash_table_buff_)[0]),
1003  device_id);
1004 #else
1005  CHECK(false);
1006 #endif
1007  }
1008  } else {
1009  err = initHashTableOnGpu(join_columns,
1010  join_column_types,
1011  join_bucket_info,
1012  layout,
1013  key_component_width,
1014  key_component_count,
1015  device_id);
1016  }
1017  return err;
1018 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
virtual int initHashTableOnGpu(const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_bucket_info, const JoinHashTableInterface::HashType layout, const size_t key_component_width, const size_t key_component_count, const int device_id)
virtual size_t getKeyComponentCount() const
std::shared_ptr< std::vector< int8_t > > cpu_hash_table_buff_
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:183
bool layoutRequiresAdditionalBuffers(JoinHashTableInterface::HashType layout) const noexcept override
void copy_to_gpu(Data_Namespace::DataMgr *data_mgr, CUdeviceptr dst, const void *src, const size_t num_bytes, const int device_id)
Definition: GpuMemUtils.cpp:31
const Catalog_Namespace::Catalog * catalog_
virtual size_t getKeyComponentWidth() const
static Data_Namespace::AbstractBuffer * allocGpuAbstractBuffer(Data_Namespace::DataMgr *data_mgr, const size_t num_bytes, const int device_id)
const Data_Namespace::MemoryLevel memory_level_
virtual int initHashTableOnCpu(const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_bucket_info, const JoinHashTableInterface::HashType layout)
#define CHECK(condition)
Definition: Logger.h:197
#define DEBUG_TIMER(name)
Definition: Logger.h:313
#define VLOG(n)
Definition: Logger.h:291
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ initHashTableOnCpu()

int BaselineJoinHashTable::initHashTableOnCpu ( const std::vector< JoinColumn > &  join_columns,
const std::vector< JoinColumnTypeInfo > &  join_column_types,
const std::vector< JoinBucketInfo > &  join_bucket_info,
const JoinHashTableInterface::HashType  layout 
)
protectedvirtual

Reimplemented in OverlapsJoinHashTable.

Definition at line 616 of file BaselineJoinHashTable.cpp.

References CHECK, condition_, cpu_hash_table_buff_, cpu_threads(), DEBUG_TIMER, entry_count_, fill_baseline_hash_join_buff_32(), fill_baseline_hash_join_buff_64(), fill_one_to_many_baseline_hash_table_32(), fill_one_to_many_baseline_hash_table_64(), getCompositeKeyInfo(), getInnerTableId(), getKeyComponentCount(), getKeyComponentWidth(), init_baseline_hash_join_buff_32(), init_baseline_hash_join_buff_64(), init_hash_join_buff(), initHashTableOnCpuFromCache(), JoinHashTableInterface::OneToMany, JoinHashTableInterface::OneToOne, putHashTableOnCpuToCache(), and VLOG.

Referenced by initHashTableForDevice().

620  {
621  auto timer = DEBUG_TIMER(__func__);
622  const auto composite_key_info = getCompositeKeyInfo();
623  CHECK(!join_columns.empty());
624  HashTableCacheKey cache_key{join_columns.front().num_elems,
625  composite_key_info.cache_key_chunks,
626  condition_->get_optype()};
627  initHashTableOnCpuFromCache(cache_key);
628  if (cpu_hash_table_buff_) {
629  return 0;
630  }
631  const auto key_component_width = getKeyComponentWidth();
632  const auto key_component_count = getKeyComponentCount();
633  const auto entry_size =
634  (key_component_count +
635  (layout == JoinHashTableInterface::HashType::OneToOne ? 1 : 0)) *
636  key_component_width;
637  const auto keys_for_all_rows = join_columns.front().num_elems;
638  const size_t one_to_many_hash_entries =
640  ? 2 * entry_count_ + keys_for_all_rows
641  : 0;
642  const size_t hash_table_size =
643  entry_size * entry_count_ + one_to_many_hash_entries * sizeof(int32_t);
644 
645  // We can't allocate more than 2GB contiguous memory on GPU and each entry is 4 bytes.
646  if (hash_table_size > static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
647  throw TooManyHashEntries();
648  }
649 
650  VLOG(1) << "Initializing CPU Join Hash Table with " << entry_count_
651  << " hash entries and " << one_to_many_hash_entries
652  << " entries in the one to many buffer";
653  VLOG(1) << "Total hash table size: " << hash_table_size << " Bytes";
654 
655  cpu_hash_table_buff_.reset(new std::vector<int8_t>(hash_table_size));
656  int thread_count = cpu_threads();
657  std::vector<std::future<void>> init_cpu_buff_threads;
658  for (int thread_idx = 0; thread_idx < thread_count; ++thread_idx) {
659  init_cpu_buff_threads.emplace_back(
660  std::async(std::launch::async,
661  [this,
662  key_component_count,
663  key_component_width,
664  thread_idx,
665  thread_count,
666  layout] {
667  switch (key_component_width) {
668  case 4:
670  &(*cpu_hash_table_buff_)[0],
671  entry_count_,
672  key_component_count,
674  -1,
675  thread_idx,
676  thread_count);
677  break;
678  case 8:
680  &(*cpu_hash_table_buff_)[0],
681  entry_count_,
682  key_component_count,
684  -1,
685  thread_idx,
686  thread_count);
687  break;
688  default:
689  CHECK(false);
690  }
691  }));
692  }
693  for (auto& child : init_cpu_buff_threads) {
694  child.get();
695  }
696  std::vector<std::future<int>> fill_cpu_buff_threads;
697  for (int thread_idx = 0; thread_idx < thread_count; ++thread_idx) {
698  fill_cpu_buff_threads.emplace_back(std::async(
699  std::launch::async,
700  [this,
701  &composite_key_info,
702  &join_columns,
703  &join_column_types,
704  key_component_count,
705  key_component_width,
706  layout,
707  thread_idx,
708  thread_count] {
709  switch (key_component_width) {
710  case 4: {
711  const auto key_handler =
712  GenericKeyHandler(key_component_count,
713  true,
714  &join_columns[0],
715  &join_column_types[0],
716  &composite_key_info.sd_inner_proxy_per_key[0],
717  &composite_key_info.sd_outer_proxy_per_key[0]);
719  &(*cpu_hash_table_buff_)[0],
720  entry_count_,
721  -1,
722  key_component_count,
724  &key_handler,
725  join_columns[0].num_elems,
726  thread_idx,
727  thread_count);
728  break;
729  }
730  case 8: {
731  const auto key_handler =
732  GenericKeyHandler(key_component_count,
733  true,
734  &join_columns[0],
735  &join_column_types[0],
736  &composite_key_info.sd_inner_proxy_per_key[0],
737  &composite_key_info.sd_outer_proxy_per_key[0]);
739  &(*cpu_hash_table_buff_)[0],
740  entry_count_,
741  -1,
742  key_component_count,
744  &key_handler,
745  join_columns[0].num_elems,
746  thread_idx,
747  thread_count);
748  break;
749  }
750  default:
751  CHECK(false);
752  }
753  return -1;
754  }));
755  }
756  int err = 0;
757  for (auto& child : fill_cpu_buff_threads) {
758  int partial_err = child.get();
759  if (partial_err) {
760  err = partial_err;
761  }
762  }
763  if (err) {
764  cpu_hash_table_buff_.reset();
765  return err;
766  }
768  auto one_to_many_buff = reinterpret_cast<int32_t*>(&(*cpu_hash_table_buff_)[0] +
769  entry_count_ * entry_size);
770  init_hash_join_buff(one_to_many_buff, entry_count_, -1, 0, 1);
771  switch (key_component_width) {
772  case 4: {
773  const auto composite_key_dict =
774  reinterpret_cast<int32_t*>(&(*cpu_hash_table_buff_)[0]);
776  composite_key_dict,
777  entry_count_,
778  -1,
779  key_component_count,
780  join_columns,
781  join_column_types,
782  join_bucket_info,
783  composite_key_info.sd_inner_proxy_per_key,
784  composite_key_info.sd_outer_proxy_per_key,
785  thread_count);
786  break;
787  }
788  case 8: {
789  const auto composite_key_dict =
790  reinterpret_cast<int64_t*>(&(*cpu_hash_table_buff_)[0]);
792  composite_key_dict,
793  entry_count_,
794  -1,
795  key_component_count,
796  join_columns,
797  join_column_types,
798  join_bucket_info,
799  composite_key_info.sd_inner_proxy_per_key,
800  composite_key_info.sd_outer_proxy_per_key,
801  thread_count);
802  break;
803  }
804  default:
805  CHECK(false);
806  }
807  }
808  if (!err && getInnerTableId() > 0) {
809  putHashTableOnCpuToCache(cache_key);
810  }
811  return err;
812 }
void fill_one_to_many_baseline_hash_table_32(int32_t *buff, const int32_t *composite_key_dict, const size_t hash_entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_bucket_info, const std::vector< const void *> &sd_inner_proxy_per_key, const std::vector< const void *> &sd_outer_proxy_per_key, const int32_t cpu_thread_count)
void putHashTableOnCpuToCache(const HashTableCacheKey &)
virtual size_t getKeyComponentCount() const
std::shared_ptr< std::vector< int8_t > > cpu_hash_table_buff_
DEVICE void SUFFIX() init_hash_join_buff(int32_t *groups_buffer, const int32_t hash_entry_count, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
void init_baseline_hash_join_buff_64(int8_t *hash_join_buff, const int32_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
void fill_one_to_many_baseline_hash_table_64(int32_t *buff, const int64_t *composite_key_dict, const size_t hash_entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_bucket_info, const std::vector< const void *> &sd_inner_proxy_per_key, const std::vector< const void *> &sd_outer_proxy_per_key, const int32_t cpu_thread_count)
CompositeKeyInfo getCompositeKeyInfo() const
int fill_baseline_hash_join_buff_64(int8_t *hash_buff, const size_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const GenericKeyHandler *key_handler, const size_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
int getInnerTableId() const noexcept override
virtual size_t getKeyComponentWidth() const
void initHashTableOnCpuFromCache(const HashTableCacheKey &)
#define CHECK(condition)
Definition: Logger.h:197
#define DEBUG_TIMER(name)
Definition: Logger.h:313
int cpu_threads()
Definition: thread_count.h:25
#define VLOG(n)
Definition: Logger.h:291
int fill_baseline_hash_join_buff_32(int8_t *hash_buff, const size_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const GenericKeyHandler *key_handler, const size_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
const std::shared_ptr< Analyzer::BinOper > condition_
void init_baseline_hash_join_buff_32(int8_t *hash_join_buff, const int32_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ initHashTableOnCpuFromCache()

void BaselineJoinHashTable::initHashTableOnCpuFromCache ( const HashTableCacheKey key)
protected

Definition at line 1214 of file BaselineJoinHashTable.cpp.

References cpu_hash_table_buff_, DEBUG_TIMER, emitted_keys_count_, entry_count_, hash_table_cache_, hash_table_cache_mutex_, layout_, and VLOG.

Referenced by OverlapsJoinHashTable::initHashTableOnCpu(), initHashTableOnCpu(), and BaselineJoinHashTable::HashTableCacheKey::operator<().

1214  {
1215  auto timer = DEBUG_TIMER(__func__);
1216  VLOG(1) << "Checking CPU hash table cache.";
1217  std::lock_guard<std::mutex> hash_table_cache_lock(hash_table_cache_mutex_);
1218  if (hash_table_cache_.size() == 0) {
1219  VLOG(1) << "CPU hash table cache was empty.";
1220  }
1221  for (const auto& kv : hash_table_cache_) {
1222  if (kv.first == key) {
1223  VLOG(1) << "Found a suitable hash table in the cache.";
1224  cpu_hash_table_buff_ = kv.second.buffer;
1225  layout_ = kv.second.type;
1226  entry_count_ = kv.second.entry_count;
1227  emitted_keys_count_ = kv.second.emitted_keys_count;
1228  break;
1229  } else {
1230  VLOG(1) << hash_table_cache_.size()
1231  << " hash tables found in cache. None were suitable for this query.";
1232  }
1233  }
1234 }
std::shared_ptr< std::vector< int8_t > > cpu_hash_table_buff_
JoinHashTableInterface::HashType layout_
static std::mutex hash_table_cache_mutex_
#define DEBUG_TIMER(name)
Definition: Logger.h:313
static std::vector< std::pair< HashTableCacheKey, HashTableCacheValue > > hash_table_cache_
#define VLOG(n)
Definition: Logger.h:291
+ Here is the caller graph for this function:

◆ initHashTableOnGpu()

int BaselineJoinHashTable::initHashTableOnGpu ( const std::vector< JoinColumn > &  join_columns,
const std::vector< JoinColumnTypeInfo > &  join_column_types,
const std::vector< JoinBucketInfo > &  join_bucket_info,
const JoinHashTableInterface::HashType  layout,
const size_t  key_component_width,
const size_t  key_component_count,
const int  device_id 
)
protectedvirtual

Reimplemented in OverlapsJoinHashTable.

Definition at line 814 of file BaselineJoinHashTable.cpp.

References ThrustAllocator::allocateScopedBuffer(), catalog_, copy_from_gpu(), copy_to_gpu(), DEBUG_TIMER, entry_count_, fill_baseline_hash_join_buff_on_device_32(), fill_baseline_hash_join_buff_on_device_64(), fill_one_to_many_baseline_hash_table_on_device_32(), fill_one_to_many_baseline_hash_table_on_device_64(), Catalog_Namespace::Catalog::getDataMgr(), init_baseline_hash_join_buff_on_device_32(), init_baseline_hash_join_buff_on_device_64(), init_hash_join_buff_on_device(), JoinHashTableInterface::OneToMany, JoinHashTableInterface::OneToOne, transfer_flat_object_to_gpu(), transfer_vector_of_flat_objects_to_gpu(), and UNREACHABLE.

Referenced by initHashTableForDevice().

821  {
822  auto timer = DEBUG_TIMER(__func__);
823  int err = 0;
824 #ifdef HAVE_CUDA
825  auto& data_mgr = catalog_->getDataMgr();
826  ThrustAllocator allocator(&data_mgr, device_id);
827  auto dev_err_buff =
828  reinterpret_cast<CUdeviceptr>(allocator.allocateScopedBuffer(sizeof(int)));
829  copy_to_gpu(&data_mgr, dev_err_buff, &err, sizeof(err), device_id);
830  switch (key_component_width) {
831  case 4:
833  reinterpret_cast<int8_t*>(gpu_hash_table_buff_[device_id]->getMemoryPtr()),
834  entry_count_,
835  key_component_count,
837  -1,
838  block_size_,
839  grid_size_);
840  break;
841  case 8:
843  reinterpret_cast<int8_t*>(gpu_hash_table_buff_[device_id]->getMemoryPtr()),
844  entry_count_,
845  key_component_count,
847  -1,
848  block_size_,
849  grid_size_);
850  break;
851  default:
852  UNREACHABLE();
853  }
854  auto join_columns_gpu = transfer_vector_of_flat_objects_to_gpu(join_columns, allocator);
855  auto hash_buff =
856  reinterpret_cast<int8_t*>(gpu_hash_table_buff_[device_id]->getMemoryPtr());
857  auto join_column_types_gpu =
858  transfer_vector_of_flat_objects_to_gpu(join_column_types, allocator);
859 
860  const auto key_handler = GenericKeyHandler(key_component_count,
861  true,
862  join_columns_gpu,
863  join_column_types_gpu,
864  nullptr,
865  nullptr);
866  const auto key_handler_gpu = transfer_flat_object_to_gpu(key_handler, allocator);
867  switch (key_component_width) {
868  case 4: {
870  hash_buff,
871  entry_count_,
872  -1,
873  key_component_count,
875  reinterpret_cast<int*>(dev_err_buff),
876  key_handler_gpu,
877  join_columns.front().num_elems,
878  block_size_,
879  grid_size_);
880  copy_from_gpu(&data_mgr, &err, dev_err_buff, sizeof(err), device_id);
881  break;
882  }
883  case 8: {
885  hash_buff,
886  entry_count_,
887  -1,
888  key_component_count,
890  reinterpret_cast<int*>(dev_err_buff),
891  key_handler_gpu,
892  join_columns.front().num_elems,
893  block_size_,
894  grid_size_);
895  copy_from_gpu(&data_mgr, &err, dev_err_buff, sizeof(err), device_id);
896  break;
897  }
898  default:
899  UNREACHABLE();
900  }
901  if (err) {
902  return err;
903  }
905  const auto entry_size = key_component_count * key_component_width;
906  auto one_to_many_buff = reinterpret_cast<int32_t*>(
907  gpu_hash_table_buff_[device_id]->getMemoryPtr() + entry_count_ * entry_size);
908  switch (key_component_width) {
909  case 4: {
910  const auto composite_key_dict =
911  reinterpret_cast<int32_t*>(gpu_hash_table_buff_[device_id]->getMemoryPtr());
913  one_to_many_buff, entry_count_, -1, block_size_, grid_size_);
915  composite_key_dict,
916  entry_count_,
917  -1,
918  key_component_count,
919  key_handler_gpu,
920  join_columns.front().num_elems,
921  block_size_,
922  grid_size_);
923  break;
924  }
925  case 8: {
926  const auto composite_key_dict =
927  reinterpret_cast<int64_t*>(gpu_hash_table_buff_[device_id]->getMemoryPtr());
929  one_to_many_buff, entry_count_, -1, block_size_, grid_size_);
931  composite_key_dict,
932  entry_count_,
933  -1,
934  key_handler_gpu,
935  join_columns.front().num_elems,
936  block_size_,
937  grid_size_);
938  break;
939  }
940  default:
941  UNREACHABLE();
942  }
943  }
944 #else
945  UNREACHABLE();
946 #endif
947  return err;
948 }
void fill_one_to_many_baseline_hash_table_on_device_32(int32_t *buff, const int32_t *composite_key_dict, const size_t hash_entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const GenericKeyHandler *key_handler, const size_t num_elems, const size_t block_size_x, const size_t grid_size_x)
void fill_baseline_hash_join_buff_on_device_32(int8_t *hash_buff, const size_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const GenericKeyHandler *key_handler, const size_t num_elems, const size_t block_size_x, const size_t grid_size_x)
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:183
unsigned long long CUdeviceptr
Definition: nocuda.h:27
#define UNREACHABLE()
Definition: Logger.h:241
T * transfer_vector_of_flat_objects_to_gpu(const std::vector< T > &vec, ThrustAllocator &allocator)
void copy_to_gpu(Data_Namespace::DataMgr *data_mgr, CUdeviceptr dst, const void *src, const size_t num_bytes, const int device_id)
Definition: GpuMemUtils.cpp:31
void copy_from_gpu(Data_Namespace::DataMgr *data_mgr, void *dst, const CUdeviceptr src, const size_t num_bytes, const int device_id)
void init_baseline_hash_join_buff_on_device_32(int8_t *hash_join_buff, const int32_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val, const size_t block_size_x, const size_t grid_size_x)
void init_baseline_hash_join_buff_on_device_64(int8_t *hash_join_buff, const int32_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val, const size_t block_size_x, const size_t grid_size_x)
void fill_one_to_many_baseline_hash_table_on_device_64(int32_t *buff, const int64_t *composite_key_dict, const size_t hash_entry_count, const int32_t invalid_slot_val, const GenericKeyHandler *key_handler, const size_t num_elems, const size_t block_size_x, const size_t grid_size_x)
const Catalog_Namespace::Catalog * catalog_
void init_hash_join_buff_on_device(int32_t *buff, const int32_t entry_count, const int32_t invalid_slot_val, const size_t block_size_x, const size_t grid_size_x)
void fill_baseline_hash_join_buff_on_device_64(int8_t *hash_buff, const size_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const GenericKeyHandler *key_handler, const size_t num_elems, const size_t block_size_x, const size_t grid_size_x)
#define DEBUG_TIMER(name)
Definition: Logger.h:313
T * transfer_flat_object_to_gpu(const T &object, ThrustAllocator &allocator)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ isBitwiseEq()

bool BaselineJoinHashTable::isBitwiseEq ( ) const
protected

Definition at line 1276 of file BaselineJoinHashTable.cpp.

References condition_, and kBW_EQ.

Referenced by OverlapsJoinHashTable::fetchColumnsForDevice(), fetchColumnsForDevice(), and BaselineJoinHashTable::HashTableCacheKey::operator<().

1276  {
1277  return condition_->get_optype() == kBW_EQ;
1278 }
Definition: sqldefs.h:31
const std::shared_ptr< Analyzer::BinOper > condition_
+ Here is the caller graph for this function:

◆ layoutRequiresAdditionalBuffers()

bool BaselineJoinHashTable::layoutRequiresAdditionalBuffers ( JoinHashTableInterface::HashType  layout) const
inlineoverrideprotectedvirtualnoexcept

◆ offsetBufferOff()

size_t BaselineJoinHashTable::offsetBufferOff ( ) const
overridevirtualnoexcept

Implements JoinHashTableInterface.

Definition at line 1083 of file BaselineJoinHashTable.cpp.

References getKeyBufferSize().

Referenced by codegenMatchingSet(), OverlapsJoinHashTable::codegenMatchingSet(), countBufferOff(), getDeviceCount(), toSet(), and toString().

1083  {
1084  return getKeyBufferSize();
1085 }
size_t getKeyBufferSize() const noexcept
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ payloadBufferOff()

size_t BaselineJoinHashTable::payloadBufferOff ( ) const
overridevirtualnoexcept

Implements JoinHashTableInterface.

Definition at line 1095 of file BaselineJoinHashTable.cpp.

References countBufferOff(), getComponentBufferSize(), getKeyBufferSize(), layout_, and layoutRequiresAdditionalBuffers().

Referenced by getDeviceCount(), toSet(), and toString().

1095  {
1098  } else {
1099  return getKeyBufferSize();
1100  }
1101 }
size_t countBufferOff() const noexcept override
size_t getComponentBufferSize() const noexcept
bool layoutRequiresAdditionalBuffers(JoinHashTableInterface::HashType layout) const noexcept override
size_t getKeyBufferSize() const noexcept
JoinHashTableInterface::HashType layout_
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ putHashTableOnCpuToCache()

void BaselineJoinHashTable::putHashTableOnCpuToCache ( const HashTableCacheKey key)
protected

Definition at line 1236 of file BaselineJoinHashTable.cpp.

References CHECK_GE, BaselineJoinHashTable::HashTableCacheKey::chunk_keys, cpu_hash_table_buff_, emitted_keys_count_, entry_count_, hash_table_cache_, hash_table_cache_mutex_, layout_, and VLOG.

Referenced by OverlapsJoinHashTable::initHashTableOnCpu(), initHashTableOnCpu(), and BaselineJoinHashTable::HashTableCacheKey::operator<().

1236  {
1237  for (auto chunk_key : key.chunk_keys) {
1238  CHECK_GE(chunk_key.size(), size_t(2));
1239  if (chunk_key[1] < 0) {
1240  return;
1241  }
1242  }
1243 
1244  std::lock_guard<std::mutex> hash_table_cache_lock(hash_table_cache_mutex_);
1245  VLOG(1) << "Storing hash table in cache.";
1246  for (const auto& kv : hash_table_cache_) {
1247  if (std::get<0>(kv) == key) {
1248  return;
1249  }
1250  }
1251  hash_table_cache_.emplace_back(
1252  key,
1253  HashTableCacheValue{
1255 }
std::shared_ptr< std::vector< int8_t > > cpu_hash_table_buff_
#define CHECK_GE(x, y)
Definition: Logger.h:210
JoinHashTableInterface::HashType layout_
static std::mutex hash_table_cache_mutex_
static std::vector< std::pair< HashTableCacheKey, HashTableCacheValue > > hash_table_cache_
#define VLOG(n)
Definition: Logger.h:291
+ Here is the caller graph for this function:

◆ reify()

void BaselineJoinHashTable::reify ( )
protected

Definition at line 308 of file BaselineJoinHashTable.cpp.

References CHECK_EQ, CHECK_LT, condition_, DEBUG_TIMER, device_count_, freeHashBufferMemory(), HashTypeCache::get(), getCompositeKeyInfo(), inner_outer_pairs_, layout_, JoinHashTableInterface::ManyToMany, JoinHashTableInterface::OneToMany, reifyWithLayout(), HashTypeCache::set(), and VLOG.

308  {
309  auto timer = DEBUG_TIMER(__func__);
311 #ifdef HAVE_CUDA
312  gpu_hash_table_buff_.resize(device_count_);
313 #endif // HAVE_CUDA
314  const auto composite_key_info = getCompositeKeyInfo();
315  const auto type_and_found = HashTypeCache::get(composite_key_info.cache_key_chunks);
316  const auto layout = type_and_found.second ? type_and_found.first : layout_;
317 
318  if (condition_->is_overlaps_oper()) {
319  CHECK_EQ(inner_outer_pairs_.size(), size_t(1));
321 
322  if (inner_outer_pairs_[0].second->get_type_info().is_array()) {
324  } else {
326  }
327  try {
328  reifyWithLayout(layout);
329  return;
330  } catch (const std::exception& e) {
331  VLOG(1) << "Caught exception while building overlaps baseline hash table: "
332  << e.what();
333  throw;
334  }
335  }
336 
337  try {
338  reifyWithLayout(layout);
339  } catch (const std::exception& e) {
340  VLOG(1) << "Caught exception while building baseline hash table: " << e.what();
342  HashTypeCache::set(composite_key_info.cache_key_chunks,
345  }
346 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
static void set(const std::vector< ChunkKey > &key, const JoinHashTableInterface::HashType hash_type)
JoinHashTableInterface::HashType layout_
CompositeKeyInfo getCompositeKeyInfo() const
std::vector< InnerOuter > inner_outer_pairs_
virtual void reifyWithLayout(const JoinHashTableInterface::HashType layout)
#define CHECK_LT(x, y)
Definition: Logger.h:207
#define DEBUG_TIMER(name)
Definition: Logger.h:313
#define VLOG(n)
Definition: Logger.h:291
const std::shared_ptr< Analyzer::BinOper > condition_
static std::pair< JoinHashTableInterface::HashType, bool > get(const std::vector< ChunkKey > &key)
+ Here is the call graph for this function:

◆ reifyForDevice()

void BaselineJoinHashTable::reifyForDevice ( const ColumnsForDevice columns_for_device,
const JoinHashTableInterface::HashType  layout,
const int  device_id,
const logger::ThreadId  parent_thread_id 
)
protected

Definition at line 555 of file BaselineJoinHashTable.cpp.

References DEBUG_TIMER_NEW_THREAD, ERR_FAILED_TO_FETCH_COLUMN, ERR_FAILED_TO_JOIN_ON_VIRTUAL_COLUMN, getEffectiveMemoryLevel(), initHashTableForDevice(), inner_outer_pairs_, BaselineJoinHashTable::ColumnsForDevice::join_buckets, BaselineJoinHashTable::ColumnsForDevice::join_column_types, BaselineJoinHashTable::ColumnsForDevice::join_columns, and to_string().

Referenced by OverlapsJoinHashTable::reifyWithLayout(), and reifyWithLayout().

558  {
559  DEBUG_TIMER_NEW_THREAD(parent_thread_id);
560  const auto effective_memory_level = getEffectiveMemoryLevel(inner_outer_pairs_);
561  const auto err = initHashTableForDevice(columns_for_device.join_columns,
562  columns_for_device.join_column_types,
563  columns_for_device.join_buckets,
564  layout,
565  effective_memory_level,
566  device_id);
567  if (err) {
568  switch (err) {
570  throw FailedToFetchColumn();
573  default:
574  throw HashJoinFail(
575  std::string("Unrecognized error when initializing baseline hash table (") +
576  std::to_string(err) + std::string(")"));
577  }
578  }
579 }
#define DEBUG_TIMER_NEW_THREAD(parent_thread_id)
Definition: Logger.h:315
std::string to_string(char const *&&v)
std::vector< InnerOuter > inner_outer_pairs_
static const int ERR_FAILED_TO_FETCH_COLUMN
int initHashTableForDevice(const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_buckets, const JoinHashTableInterface::HashType layout, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id)
static const int ERR_FAILED_TO_JOIN_ON_VIRTUAL_COLUMN
Data_Namespace::MemoryLevel getEffectiveMemoryLevel(const std::vector< InnerOuter > &inner_outer_pairs) const
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ reifyWithLayout()

void BaselineJoinHashTable::reifyWithLayout ( const JoinHashTableInterface::HashType  layout)
protectedvirtual

Reimplemented in OverlapsJoinHashTable.

Definition at line 348 of file BaselineJoinHashTable.cpp.

References approximateTupleCount(), catalog_, CHECK, device_count_, emitted_keys_count_, entry_count_, fetchColumnsForDevice(), get_entries_per_device(), get_inner_query_info(), Catalog_Namespace::Catalog::getDataMgr(), getInnerTableId(), InputTableInfo::info, layout_, memory_level_, JoinHashTableInterface::OneToMany, only_shards_for_device(), query_infos_, reifyForDevice(), shardCount(), and logger::thread_id().

Referenced by reify(), and ~BaselineJoinHashTable().

349  {
350  layout_ = layout;
351  const auto& query_info = get_inner_query_info(getInnerTableId(), query_infos_).info;
352  if (query_info.fragments.empty()) {
353  return;
354  }
355  auto& data_mgr = catalog_->getDataMgr();
356  auto dev_buff_owners =
357  std::make_unique<std::unique_ptr<ThrustAllocator>[]>(device_count_);
358  for (int device_id = 0; device_id < device_count_; ++device_id) {
359  dev_buff_owners[device_id] = std::make_unique<ThrustAllocator>(&data_mgr, device_id);
360  }
361  std::vector<BaselineJoinHashTable::ColumnsForDevice> columns_per_device;
362  const auto shard_count = shardCount();
363  for (int device_id = 0; device_id < device_count_; ++device_id) {
364  const auto fragments =
365  shard_count
366  ? only_shards_for_device(query_info.fragments, device_id, device_count_)
367  : query_info.fragments;
368  const auto columns_for_device =
369  fetchColumnsForDevice(fragments, device_id, *dev_buff_owners[device_id]);
370  columns_per_device.push_back(columns_for_device);
371  }
373  CHECK(!columns_per_device.front().join_columns.empty());
374  emitted_keys_count_ = columns_per_device.front().join_columns.front().num_elems;
375  size_t tuple_count;
376  std::tie(tuple_count, std::ignore) = approximateTupleCount(columns_per_device);
377  const auto entry_count = 2 * std::max(tuple_count, size_t(1));
378 
379  entry_count_ =
380  get_entries_per_device(entry_count, shard_count, device_count_, memory_level_);
381  }
382  std::vector<std::future<void>> init_threads;
383  for (int device_id = 0; device_id < device_count_; ++device_id) {
384  const auto fragments =
385  shard_count
386  ? only_shards_for_device(query_info.fragments, device_id, device_count_)
387  : query_info.fragments;
388  init_threads.push_back(std::async(std::launch::async,
390  this,
391  columns_per_device[device_id],
392  layout,
393  device_id,
394  logger::thread_id()));
395  }
396  for (auto& init_thread : init_threads) {
397  init_thread.wait();
398  }
399  for (auto& init_thread : init_threads) {
400  init_thread.get();
401  }
402 }
Fragmenter_Namespace::TableInfo info
Definition: InputMetadata.h:35
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:183
JoinHashTableInterface::HashType layout_
size_t get_entries_per_device(const size_t total_entries, const size_t shard_count, const size_t device_count, const Data_Namespace::MemoryLevel memory_level)
std::vector< Fragmenter_Namespace::FragmentInfo > only_shards_for_device(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, const int device_count)
const std::vector< InputTableInfo > & query_infos_
void reifyForDevice(const ColumnsForDevice &columns_for_device, const JoinHashTableInterface::HashType layout, const int device_id, const logger::ThreadId parent_thread_id)
int getInnerTableId() const noexcept override
const Catalog_Namespace::Catalog * catalog_
const Data_Namespace::MemoryLevel memory_level_
ThreadId thread_id()
Definition: Logger.cpp:715
#define CHECK(condition)
Definition: Logger.h:197
virtual std::pair< size_t, size_t > approximateTupleCount(const std::vector< ColumnsForDevice > &) const
const InputTableInfo & get_inner_query_info(const int inner_table_id, const std::vector< InputTableInfo > &query_infos)
virtual ColumnsForDevice fetchColumnsForDevice(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, ThrustAllocator &dev_buff_owner)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ shardCount()

size_t BaselineJoinHashTable::shardCount ( ) const
protected

Definition at line 581 of file BaselineJoinHashTable.cpp.

References condition_, executor_, getShardCountForCondition(), Data_Namespace::GPU_LEVEL, inner_outer_pairs_, and memory_level_.

Referenced by checkHashJoinReplicationConstraint(), OverlapsJoinHashTable::reifyWithLayout(), and reifyWithLayout().

581  {
583  return 0;
584  }
587 }
std::vector< InnerOuter > inner_outer_pairs_
const Data_Namespace::MemoryLevel memory_level_
static size_t getShardCountForCondition(const Analyzer::BinOper *condition, const Executor *executor, const std::vector< InnerOuter > &inner_outer_pairs)
const std::shared_ptr< Analyzer::BinOper > condition_
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ toSet()

std::set< DecodedJoinHashBufferEntry > BaselineJoinHashTable::toSet ( const ExecutorDeviceType  device_type,
const int  device_id 
) const
overridevirtual

Implements JoinHashTableInterface.

Definition at line 239 of file BaselineJoinHashTable.cpp.

References catalog_, copy_from_gpu(), countBufferOff(), entry_count_, Catalog_Namespace::Catalog::getDataMgr(), getJoinHashBuffer(), getJoinHashBufferSize(), getKeyComponentCount(), getKeyComponentWidth(), GPU, layout_, offsetBufferOff(), JoinHashTableInterface::OneToOne, payloadBufferOff(), and JoinHashTableInterface::toSet().

241  {
242  auto buffer = getJoinHashBuffer(device_type, device_id);
243  auto buffer_size = getJoinHashBufferSize(device_type, device_id);
244 #ifdef HAVE_CUDA
245  std::unique_ptr<int8_t[]> buffer_copy;
246  if (device_type == ExecutorDeviceType::GPU) {
247  buffer_copy = std::make_unique<int8_t[]>(buffer_size);
248 
250  buffer_copy.get(),
251  reinterpret_cast<CUdeviceptr>(reinterpret_cast<int8_t*>(buffer)),
252  buffer_size,
253  device_id);
254  }
255  auto ptr1 = buffer_copy ? buffer_copy.get() : reinterpret_cast<const int8_t*>(buffer);
256 #else
257  auto ptr1 = reinterpret_cast<const int8_t*>(buffer);
258 #endif // HAVE_CUDA
259  auto ptr2 = ptr1 + offsetBufferOff();
260  auto ptr3 = ptr1 + countBufferOff();
261  auto ptr4 = ptr1 + payloadBufferOff();
266  entry_count_,
267  ptr1,
268  ptr2,
269  ptr3,
270  ptr4,
271  buffer_size);
272 }
size_t countBufferOff() const noexcept override
int64_t getJoinHashBuffer(const ExecutorDeviceType device_type, const int device_id) const noexcept override
virtual size_t getKeyComponentCount() const
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:183
unsigned long long CUdeviceptr
Definition: nocuda.h:27
JoinHashTableInterface::HashType layout_
size_t getJoinHashBufferSize(const ExecutorDeviceType device_type, const int device_id) const noexcept override
void copy_from_gpu(Data_Namespace::DataMgr *data_mgr, void *dst, const CUdeviceptr src, const size_t num_bytes, const int device_id)
size_t payloadBufferOff() const noexcept override
const Catalog_Namespace::Catalog * catalog_
virtual size_t getKeyComponentWidth() const
virtual DecodedJoinHashBufferSet toSet(const ExecutorDeviceType device_type, const int device_id) const =0
size_t offsetBufferOff() const noexcept override
+ Here is the call graph for this function:

◆ toString()

std::string BaselineJoinHashTable::toString ( const ExecutorDeviceType  device_type,
const int  device_id = 0,
bool  raw = false 
) const
overridevirtual

Implements JoinHashTableInterface.

Definition at line 201 of file BaselineJoinHashTable.cpp.

References catalog_, condition_, copy_from_gpu(), countBufferOff(), entry_count_, Catalog_Namespace::Catalog::getDataMgr(), JoinHashTableInterface::getHashTypeString(), getJoinHashBuffer(), getJoinHashBufferSize(), getKeyComponentCount(), getKeyComponentWidth(), GPU, layout_, offsetBufferOff(), JoinHashTableInterface::OneToOne, payloadBufferOff(), and JoinHashTableInterface::toString().

203  {
204  auto buffer = getJoinHashBuffer(device_type, device_id);
205  auto buffer_size = getJoinHashBufferSize(device_type, device_id);
206 #ifdef HAVE_CUDA
207  std::unique_ptr<int8_t[]> buffer_copy;
208  if (device_type == ExecutorDeviceType::GPU) {
209  buffer_copy = std::make_unique<int8_t[]>(buffer_size);
210 
212  buffer_copy.get(),
213  reinterpret_cast<CUdeviceptr>(reinterpret_cast<int8_t*>(buffer)),
214  buffer_size,
215  device_id);
216  }
217  auto ptr1 = buffer_copy ? buffer_copy.get() : reinterpret_cast<const int8_t*>(buffer);
218 #else
219  auto ptr1 = reinterpret_cast<const int8_t*>(buffer);
220 #endif // HAVE_CUDA
221  auto ptr2 = ptr1 + offsetBufferOff();
222  auto ptr3 = ptr1 + countBufferOff();
223  auto ptr4 = ptr1 + payloadBufferOff();
225  !condition_->is_overlaps_oper() ? "keyed" : "geo",
230  entry_count_,
231  ptr1,
232  ptr2,
233  ptr3,
234  ptr4,
235  buffer_size,
236  raw);
237 }
size_t countBufferOff() const noexcept override
int64_t getJoinHashBuffer(const ExecutorDeviceType device_type, const int device_id) const noexcept override
virtual size_t getKeyComponentCount() const
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:183
unsigned long long CUdeviceptr
Definition: nocuda.h:27
JoinHashTableInterface::HashType layout_
size_t getJoinHashBufferSize(const ExecutorDeviceType device_type, const int device_id) const noexcept override
void copy_from_gpu(Data_Namespace::DataMgr *data_mgr, void *dst, const CUdeviceptr src, const size_t num_bytes, const int device_id)
size_t payloadBufferOff() const noexcept override
const Catalog_Namespace::Catalog * catalog_
virtual size_t getKeyComponentWidth() const
size_t offsetBufferOff() const noexcept override
static std::string getHashTypeString(HashType ht) noexcept
virtual std::string toString(const ExecutorDeviceType device_type, const int device_id=0, bool raw=false) const =0
const std::shared_ptr< Analyzer::BinOper > condition_
+ Here is the call graph for this function:

◆ yieldCacheInvalidator()

static auto BaselineJoinHashTable::yieldCacheInvalidator ( ) -> std::function<void()>
inlinestatic

Definition at line 96 of file BaselineJoinHashTable.h.

References hash_table_cache_, hash_table_cache_mutex_, and VLOG.

96  {
97  VLOG(1) << "Invalidate " << hash_table_cache_.size() << " cached baseline hashtable.";
98  return []() -> void {
99  std::lock_guard<std::mutex> guard(hash_table_cache_mutex_);
100  hash_table_cache_.clear();
101  };
102  }
static std::mutex hash_table_cache_mutex_
static std::vector< std::pair< HashTableCacheKey, HashTableCacheValue > > hash_table_cache_
#define VLOG(n)
Definition: Logger.h:291

Member Data Documentation

◆ catalog_

◆ column_cache_

ColumnCacheMap& BaselineJoinHashTable::column_cache_
protected

◆ condition_

◆ cpu_hash_table_buff_

std::shared_ptr<std::vector<int8_t> > BaselineJoinHashTable::cpu_hash_table_buff_
protected

◆ cpu_hash_table_buff_mutex_

std::mutex BaselineJoinHashTable::cpu_hash_table_buff_mutex_
protected

Definition at line 272 of file BaselineJoinHashTable.h.

Referenced by initHashTableForDevice().

◆ device_count_

◆ emitted_keys_count_

◆ entry_count_

◆ ERR_FAILED_TO_FETCH_COLUMN

const int BaselineJoinHashTable::ERR_FAILED_TO_FETCH_COLUMN {-3}
staticprotected

Definition at line 296 of file BaselineJoinHashTable.h.

Referenced by reifyForDevice().

◆ ERR_FAILED_TO_JOIN_ON_VIRTUAL_COLUMN

const int BaselineJoinHashTable::ERR_FAILED_TO_JOIN_ON_VIRTUAL_COLUMN {-4}
staticprotected

Definition at line 297 of file BaselineJoinHashTable.h.

Referenced by reifyForDevice().

◆ executor_

◆ hash_table_cache_

◆ hash_table_cache_mutex_

◆ inner_outer_pairs_

◆ layout_

◆ memory_level_

◆ query_infos_

const std::vector<InputTableInfo>& BaselineJoinHashTable::query_infos_
protected

The documentation for this class was generated from the following files: