OmniSciDB  ba1bac9284
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
BaselineJoinHashTable Class Reference

#include <BaselineJoinHashTable.h>

+ Inheritance diagram for BaselineJoinHashTable:
+ Collaboration diagram for BaselineJoinHashTable:

Public Member Functions

std::string toString (const ExecutorDeviceType device_type, const int device_id=0, bool raw=false) const override
 
std::set
< DecodedJoinHashBufferEntry
toSet (const ExecutorDeviceType device_type, const int device_id) const override
 
llvm::Value * codegenSlot (const CompilationOptions &, const size_t) override
 
HashJoinMatchingSet codegenMatchingSet (const CompilationOptions &, const size_t) override
 
int getInnerTableId () const noexceptoverride
 
int getInnerTableRteIdx () const noexceptoverride
 
HashType getHashType () const noexceptoverride
 
Data_Namespace::MemoryLevel getMemoryLevel () const noexceptoverride
 
int getDeviceCount () const noexceptoverride
 
size_t offsetBufferOff () const noexceptoverride
 
size_t countBufferOff () const noexceptoverride
 
size_t payloadBufferOff () const noexceptoverride
 
std::string getHashJoinType () const final
 
virtual ~BaselineJoinHashTable ()
 
- Public Member Functions inherited from HashJoin
virtual std::string toStringFlat64 (const ExecutorDeviceType device_type, const int device_id) const
 
virtual std::string toStringFlat32 (const ExecutorDeviceType device_type, const int device_id) const
 
JoinColumn fetchJoinColumn (const Analyzer::ColumnVar *hash_col, const std::vector< Fragmenter_Namespace::FragmentInfo > &fragment_info, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks_owner, DeviceAllocator *dev_buff_owner, std::vector< std::shared_ptr< void >> &malloc_owner, Executor *executor, ColumnCacheMap *column_cache)
 
HashTablegetHashTableForDevice (const size_t device_id) const
 
size_t getJoinHashBufferSize (const ExecutorDeviceType device_type)
 
size_t getJoinHashBufferSize (const ExecutorDeviceType device_type, const int device_id) const
 
int64_t getJoinHashBuffer (const ExecutorDeviceType device_type, const int device_id) const
 
void freeHashBufferMemory ()
 

Static Public Member Functions

static std::shared_ptr
< BaselineJoinHashTable
getInstance (const std::shared_ptr< Analyzer::BinOper > condition, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
 Make hash table from an in-flight SQL query's parse tree etc. More...
 
static size_t getShardCountForCondition (const Analyzer::BinOper *condition, const Executor *executor, const std::vector< InnerOuter > &inner_outer_pairs)
 
static auto getCacheInvalidator () -> std::function< void()>
 
static auto * getHashTableCache ()
 
- Static Public Member Functions inherited from HashJoin
static bool layoutRequiresAdditionalBuffers (HashType layout) noexcept
 
static std::string getHashTypeString (HashType ht) noexcept
 
static HashJoinMatchingSet codegenMatchingSet (const std::vector< llvm::Value * > &hash_join_idx_args_in, const bool is_sharded, const bool col_is_nullable, const bool is_bw_eq, const int64_t sub_buff_size, Executor *executor, const bool is_bucketized=false)
 
static llvm::Value * codegenHashTableLoad (const size_t table_idx, Executor *executor)
 
static std::shared_ptr< HashJoingetInstance (const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const RegisteredQueryHint &query_hint)
 Make hash table from an in-flight SQL query's parse tree etc. More...
 
static std::shared_ptr< HashJoingetSyntheticInstance (std::string_view table1, std::string_view column1, std::string_view table2, std::string_view column2, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
 Make hash table from named tables and columns (such as for testing). More...
 
static std::shared_ptr< HashJoingetSyntheticInstance (const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
 Make hash table from named tables and columns (such as for testing). More...
 
static int getInnerTableId (const std::vector< InnerOuter > &inner_outer_pairs)
 
static void checkHashJoinReplicationConstraint (const int table_id, const size_t shard_count, const Executor *executor)
 
static CompositeKeyInfo getCompositeKeyInfo (const std::vector< InnerOuter > &inner_outer_pairs, const Executor *executor)
 

Protected Types

using HashTableCacheValue = std::shared_ptr< HashTable >
 

Protected Member Functions

 BaselineJoinHashTable (const std::shared_ptr< Analyzer::BinOper > condition, const JoinType join_type, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, ColumnCacheMap &column_cache, Executor *executor, const std::vector< InnerOuter > &inner_outer_pairs, const int device_count)
 
size_t getComponentBufferSize () const noexceptoverride
 
size_t getKeyBufferSize () const noexcept
 
virtual void reifyWithLayout (const HashType layout)
 
virtual ColumnsForDevice fetchColumnsForDevice (const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, DeviceAllocator *dev_buff_owner)
 
virtual std::pair< size_t, size_t > approximateTupleCount (const std::vector< ColumnsForDevice > &) const
 
virtual size_t getKeyComponentWidth () const
 
virtual size_t getKeyComponentCount () const
 
virtual llvm::Value * codegenKey (const CompilationOptions &)
 
size_t shardCount () const
 
Data_Namespace::MemoryLevel getEffectiveMemoryLevel (const std::vector< InnerOuter > &inner_outer_pairs) const
 
void reify (const HashType preferred_layout)
 
virtual void reifyForDevice (const ColumnsForDevice &columns_for_device, const HashType layout, const int device_id, const size_t entry_count, const size_t emitted_keys_count, const logger::ThreadId parent_thread_id)
 
virtual int initHashTableForDevice (const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_buckets, const HashType layout, const Data_Namespace::MemoryLevel effective_memory_level, const size_t entry_count, const size_t emitted_keys_count, const int device_id)
 
llvm::Value * hashPtr (const size_t index)
 
std::shared_ptr< HashTableinitHashTableOnCpuFromCache (const HashTableCacheKey &)
 
void putHashTableOnCpuToCache (const HashTableCacheKey &, std::shared_ptr< HashTable > &hash_table)
 
std::pair< std::optional
< size_t >, size_t > 
getApproximateTupleCountFromCache (const HashTableCacheKey &) const
 
bool isBitwiseEq () const
 

Static Protected Member Functions

static int getInnerTableId (const std::vector< InnerOuter > &inner_outer_pairs)
 

Protected Attributes

const std::shared_ptr
< Analyzer::BinOper
condition_
 
const JoinType join_type_
 
const std::vector
< InputTableInfo > & 
query_infos_
 
const Data_Namespace::MemoryLevel memory_level_
 
Executorexecutor_
 
ColumnCacheMapcolumn_cache_
 
std::mutex cpu_hash_table_buff_mutex_
 
std::vector< InnerOuterinner_outer_pairs_
 
const Catalog_Namespace::Catalogcatalog_
 
const int device_count_
 
std::optional< HashTypelayout_override_
 
- Protected Attributes inherited from HashJoin
std::vector< std::shared_ptr
< HashTable > > 
hash_tables_for_device_
 

Static Protected Attributes

static std::unique_ptr
< HashTableCache
< HashTableCacheKey,
HashTableCacheValue > > 
hash_table_cache_
 

Detailed Description

Definition at line 70 of file BaselineJoinHashTable.h.

Member Typedef Documentation

using BaselineJoinHashTable::HashTableCacheValue = std::shared_ptr<HashTable>
protected

Definition at line 219 of file BaselineJoinHashTable.h.

Constructor & Destructor Documentation

virtual BaselineJoinHashTable::~BaselineJoinHashTable ( )
inlinevirtual

Definition at line 134 of file BaselineJoinHashTable.h.

134 {}
BaselineJoinHashTable::BaselineJoinHashTable ( const std::shared_ptr< Analyzer::BinOper condition,
const JoinType  join_type,
const std::vector< InputTableInfo > &  query_infos,
const Data_Namespace::MemoryLevel  memory_level,
ColumnCacheMap column_cache,
Executor executor,
const std::vector< InnerOuter > &  inner_outer_pairs,
const int  device_count 
)
protected

Definition at line 101 of file BaselineJoinHashTable.cpp.

References CHECK_GT, device_count_, and HashJoin::hash_tables_for_device_.

Referenced by getInstance().

110  : condition_(condition)
111  , join_type_(join_type)
112  , query_infos_(query_infos)
113  , memory_level_(memory_level)
114  , executor_(executor)
115  , column_cache_(column_cache)
116  , inner_outer_pairs_(inner_outer_pairs)
117  , catalog_(executor->getCatalog())
118  , device_count_(device_count) {
120  hash_tables_for_device_.resize(std::max(device_count_, 1));
121 }
std::vector< std::shared_ptr< HashTable > > hash_tables_for_device_
Definition: HashJoin.h:270
#define CHECK_GT(x, y)
Definition: Logger.h:218
const std::vector< InputTableInfo > & query_infos_
std::vector< InnerOuter > inner_outer_pairs_
ColumnCacheMap & column_cache_
const Catalog_Namespace::Catalog * catalog_
const Data_Namespace::MemoryLevel memory_level_
const std::shared_ptr< Analyzer::BinOper > condition_

+ Here is the caller graph for this function:

Member Function Documentation

std::pair< size_t, size_t > BaselineJoinHashTable::approximateTupleCount ( const std::vector< ColumnsForDevice > &  columns_per_device) const
protectedvirtual

Definition at line 329 of file BaselineJoinHashTable.cpp.

References CudaAllocator::alloc(), approximate_distinct_tuples(), approximate_distinct_tuples_on_device(), Bitmap, catalog_, CHECK, CHECK_EQ, condition_, copy_from_gpu(), CPU, Data_Namespace::CPU_LEVEL, cpu_threads(), device_count_, executor_, getApproximateTupleCountFromCache(), HashJoin::getCompositeKeyInfo(), Catalog_Namespace::Catalog::getDataMgr(), getEffectiveMemoryLevel(), GPU, Data_Namespace::GPU_LEVEL, hll_size(), hll_unify(), i, inner_outer_pairs_, join_type_, transfer_flat_object_to_gpu(), transfer_vector_of_flat_objects_to_gpu(), UNREACHABLE, and VLOG.

Referenced by reifyWithLayout().

330  {
331  const auto effective_memory_level = getEffectiveMemoryLevel(inner_outer_pairs_);
332  CountDistinctDescriptor count_distinct_desc{
334  0,
335  11,
336  true,
337  effective_memory_level == Data_Namespace::MemoryLevel::GPU_LEVEL
340  1};
341  const auto padded_size_bytes = count_distinct_desc.bitmapPaddedSizeBytes();
342 
343  CHECK(!columns_per_device.empty() && !columns_per_device.front().join_columns.empty());
344 
345  if (effective_memory_level == Data_Namespace::MemoryLevel::CPU_LEVEL) {
346  const auto composite_key_info =
348  HashTableCacheKey cache_key{columns_per_device.front().join_columns.front().num_elems,
349  composite_key_info.cache_key_chunks,
350  condition_->get_optype(),
351  join_type_};
352  const auto cached_count_info = getApproximateTupleCountFromCache(cache_key);
353  if (cached_count_info.first) {
354  VLOG(1) << "Using a cached tuple count: " << *cached_count_info.first
355  << ", emitted keys count: " << cached_count_info.second;
356  return std::make_pair(*cached_count_info.first, cached_count_info.second);
357  }
358  int thread_count = cpu_threads();
359  std::vector<uint8_t> hll_buffer_all_cpus(thread_count * padded_size_bytes);
360  auto hll_result = &hll_buffer_all_cpus[0];
361 
362  approximate_distinct_tuples(hll_result,
363  count_distinct_desc.bitmap_sz_bits,
364  padded_size_bytes,
365  columns_per_device.front().join_columns,
366  columns_per_device.front().join_column_types,
367  thread_count);
368  for (int i = 1; i < thread_count; ++i) {
369  hll_unify(hll_result,
370  hll_result + i * padded_size_bytes,
371  1 << count_distinct_desc.bitmap_sz_bits);
372  }
373  return std::make_pair(hll_size(hll_result, count_distinct_desc.bitmap_sz_bits), 0);
374  }
375 #ifdef HAVE_CUDA
376  auto& data_mgr = catalog_->getDataMgr();
377  std::vector<std::vector<uint8_t>> host_hll_buffers(device_count_);
378  for (auto& host_hll_buffer : host_hll_buffers) {
379  host_hll_buffer.resize(count_distinct_desc.bitmapPaddedSizeBytes());
380  }
381  std::vector<std::future<void>> approximate_distinct_device_threads;
382  for (int device_id = 0; device_id < device_count_; ++device_id) {
383  approximate_distinct_device_threads.emplace_back(std::async(
384  std::launch::async,
385  [device_id,
386  &columns_per_device,
387  &count_distinct_desc,
388  &data_mgr,
389  &host_hll_buffers] {
390  CudaAllocator allocator(&data_mgr, device_id);
391  auto device_hll_buffer =
392  allocator.alloc(count_distinct_desc.bitmapPaddedSizeBytes());
393  data_mgr.getCudaMgr()->zeroDeviceMem(
394  device_hll_buffer, count_distinct_desc.bitmapPaddedSizeBytes(), device_id);
395  const auto& columns_for_device = columns_per_device[device_id];
396  auto join_columns_gpu = transfer_vector_of_flat_objects_to_gpu(
397  columns_for_device.join_columns, allocator);
398  auto join_column_types_gpu = transfer_vector_of_flat_objects_to_gpu(
399  columns_for_device.join_column_types, allocator);
400  const auto key_handler =
401  GenericKeyHandler(columns_for_device.join_columns.size(),
402  true,
403  join_columns_gpu,
404  join_column_types_gpu,
405  nullptr,
406  nullptr);
407  const auto key_handler_gpu =
408  transfer_flat_object_to_gpu(key_handler, allocator);
410  reinterpret_cast<uint8_t*>(device_hll_buffer),
411  count_distinct_desc.bitmap_sz_bits,
412  key_handler_gpu,
413  columns_for_device.join_columns[0].num_elems);
414 
415  auto& host_hll_buffer = host_hll_buffers[device_id];
416  copy_from_gpu(&data_mgr,
417  &host_hll_buffer[0],
418  reinterpret_cast<CUdeviceptr>(device_hll_buffer),
419  count_distinct_desc.bitmapPaddedSizeBytes(),
420  device_id);
421  }));
422  }
423  for (auto& child : approximate_distinct_device_threads) {
424  child.get();
425  }
426  CHECK_EQ(Data_Namespace::MemoryLevel::GPU_LEVEL, effective_memory_level);
427  auto& result_hll_buffer = host_hll_buffers.front();
428  auto hll_result = reinterpret_cast<int32_t*>(&result_hll_buffer[0]);
429  for (int device_id = 1; device_id < device_count_; ++device_id) {
430  auto& host_hll_buffer = host_hll_buffers[device_id];
431  hll_unify(hll_result,
432  reinterpret_cast<int32_t*>(&host_hll_buffer[0]),
433  1 << count_distinct_desc.bitmap_sz_bits);
434  }
435  return std::make_pair(hll_size(hll_result, count_distinct_desc.bitmap_sz_bits), 0);
436 #else
437  UNREACHABLE();
438  return {0, 0};
439 #endif // HAVE_CUDA
440 }
#define CHECK_EQ(x, y)
Definition: Logger.h:214
Data_Namespace::MemoryLevel getEffectiveMemoryLevel(const std::vector< InnerOuter > &inner_outer_pairs) const
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:223
void hll_unify(T1 *lhs, T2 *rhs, const size_t m)
Definition: HyperLogLog.h:109
#define UNREACHABLE()
Definition: Logger.h:250
size_t hll_size(const T *M, const size_t bitmap_sz_bits)
Definition: HyperLogLog.h:90
std::pair< std::optional< size_t >, size_t > getApproximateTupleCountFromCache(const HashTableCacheKey &) const
std::vector< InnerOuter > inner_outer_pairs_
void approximate_distinct_tuples(uint8_t *hll_buffer_all_cpus, const uint32_t b, const size_t padded_size_bytes, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const int thread_count)
void copy_from_gpu(Data_Namespace::DataMgr *data_mgr, void *dst, const CUdeviceptr src, const size_t num_bytes, const int device_id)
T * transfer_flat_object_to_gpu(const T &object, CudaAllocator &allocator)
static CompositeKeyInfo getCompositeKeyInfo(const std::vector< InnerOuter > &inner_outer_pairs, const Executor *executor)
Definition: HashJoin.cpp:324
const Catalog_Namespace::Catalog * catalog_
void approximate_distinct_tuples_on_device(uint8_t *hll_buffer, const uint32_t b, const GenericKeyHandler *key_handler, const int64_t num_elems)
#define CHECK(condition)
Definition: Logger.h:206
T * transfer_vector_of_flat_objects_to_gpu(const std::vector< T > &vec, CudaAllocator &allocator)
int cpu_threads()
Definition: thread_count.h:24
#define VLOG(n)
Definition: Logger.h:300
const std::shared_ptr< Analyzer::BinOper > condition_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * BaselineJoinHashTable::codegenKey ( const CompilationOptions co)
protectedvirtual

Definition at line 775 of file BaselineJoinHashTable.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CHECK_EQ, CodeGenerator::codegen(), executor_, get_int_type(), get_max_rte_scan_table(), getKeyComponentCount(), getKeyComponentWidth(), i, inner_outer_pairs_, LL_BUILDER, LL_CONTEXT, LL_INT, and self_join_not_covered_by_left_deep_tree().

Referenced by codegenMatchingSet(), and codegenSlot().

775  {
776  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
777  const auto key_component_width = getKeyComponentWidth();
778  CHECK(key_component_width == 4 || key_component_width == 8);
779  const auto key_size_lv = LL_INT(getKeyComponentCount() * key_component_width);
780  llvm::Value* key_buff_lv{nullptr};
781  switch (key_component_width) {
782  case 4:
783  key_buff_lv =
784  LL_BUILDER.CreateAlloca(llvm::Type::getInt32Ty(LL_CONTEXT), key_size_lv);
785  break;
786  case 8:
787  key_buff_lv =
788  LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT), key_size_lv);
789  break;
790  default:
791  CHECK(false);
792  }
793 
794  CodeGenerator code_generator(executor_);
795  for (size_t i = 0; i < getKeyComponentCount(); ++i) {
796  const auto key_comp_dest_lv = LL_BUILDER.CreateGEP(key_buff_lv, LL_INT(i));
797  const auto& inner_outer_pair = inner_outer_pairs_[i];
798  const auto outer_col = inner_outer_pair.second;
799  const auto key_col_var = dynamic_cast<const Analyzer::ColumnVar*>(outer_col);
800  const auto val_col_var =
801  dynamic_cast<const Analyzer::ColumnVar*>(inner_outer_pair.first);
802  if (key_col_var && val_col_var &&
804  key_col_var,
805  val_col_var,
806  get_max_rte_scan_table(executor_->cgen_state_->scan_idx_to_hash_pos_))) {
807  throw std::runtime_error(
808  "Query execution fails because the query contains not supported self-join "
809  "pattern. We suspect the query requires multiple left-deep join tree due to "
810  "the join condition of the self-join and is not supported for now. Please "
811  "consider rewriting table order in "
812  "FROM clause.");
813  }
814  const auto col_lvs = code_generator.codegen(outer_col, true, co);
815  CHECK_EQ(size_t(1), col_lvs.size());
816  const auto col_lv = LL_BUILDER.CreateSExt(
817  col_lvs.front(), get_int_type(key_component_width * 8, LL_CONTEXT));
818  LL_BUILDER.CreateStore(col_lv, key_comp_dest_lv);
819  }
820  return key_buff_lv;
821 }
#define CHECK_EQ(x, y)
Definition: Logger.h:214
bool self_join_not_covered_by_left_deep_tree(const Analyzer::ColumnVar *key_side, const Analyzer::ColumnVar *val_side, const int max_rte_covered)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
const int get_max_rte_scan_table(std::unordered_map< int, llvm::Value * > &scan_idx_to_hash_pos)
std::vector< InnerOuter > inner_outer_pairs_
#define LL_INT(v)
#define LL_CONTEXT
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define LL_BUILDER
#define CHECK(condition)
Definition: Logger.h:206
virtual size_t getKeyComponentCount() const
virtual size_t getKeyComponentWidth() const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

HashJoinMatchingSet BaselineJoinHashTable::codegenMatchingSet ( const CompilationOptions co,
const size_t  index 
)
overridevirtual

Implements HashJoin.

Definition at line 693 of file BaselineJoinHashTable.cpp.

References AUTOMATIC_IR_METADATA, CHECK, HashJoin::codegenHashTableLoad(), codegenKey(), HashJoin::codegenMatchingSet(), executor_, get_int_type(), getComponentBufferSize(), HashJoin::getHashTableForDevice(), getHashType(), getKeyComponentCount(), getKeyComponentWidth(), LL_BUILDER, LL_CONTEXT, LL_INT, offsetBufferOff(), OneToMany, and to_string().

695  {
696  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
697  const auto hash_table = getHashTableForDevice(size_t(0));
698  CHECK(hash_table);
699  const auto key_component_width = getKeyComponentWidth();
700  CHECK(key_component_width == 4 || key_component_width == 8);
701  auto key_buff_lv = codegenKey(co);
703  auto hash_ptr = HashJoin::codegenHashTableLoad(index, executor_);
704  const auto composite_dict_ptr_type =
705  llvm::Type::getIntNPtrTy(LL_CONTEXT, key_component_width * 8);
706  const auto composite_key_dict =
707  hash_ptr->getType()->isPointerTy()
708  ? LL_BUILDER.CreatePointerCast(hash_ptr, composite_dict_ptr_type)
709  : LL_BUILDER.CreateIntToPtr(hash_ptr, composite_dict_ptr_type);
710  const auto key_component_count = getKeyComponentCount();
711  const auto key = executor_->cgen_state_->emitExternalCall(
712  "get_composite_key_index_" + std::to_string(key_component_width * 8),
714  {key_buff_lv,
715  LL_INT(key_component_count),
716  composite_key_dict,
717  LL_INT(hash_table->getEntryCount())});
718  auto one_to_many_ptr = hash_ptr;
719  if (one_to_many_ptr->getType()->isPointerTy()) {
720  one_to_many_ptr =
721  LL_BUILDER.CreatePtrToInt(hash_ptr, llvm::Type::getInt64Ty(LL_CONTEXT));
722  } else {
723  CHECK(one_to_many_ptr->getType()->isIntegerTy(64));
724  }
725  const auto composite_key_dict_size = offsetBufferOff();
726  one_to_many_ptr =
727  LL_BUILDER.CreateAdd(one_to_many_ptr, LL_INT(composite_key_dict_size));
729  {one_to_many_ptr, key, LL_INT(int64_t(0)), LL_INT(hash_table->getEntryCount() - 1)},
730  false,
731  false,
732  false,
734  executor_);
735 }
size_t offsetBufferOff() const noexceptoverride
virtual HashJoinMatchingSet codegenMatchingSet(const CompilationOptions &, const size_t)=0
static llvm::Value * codegenHashTableLoad(const size_t table_idx, Executor *executor)
Definition: HashJoin.cpp:215
size_t getComponentBufferSize() const noexceptoverride
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
std::string to_string(char const *&&v)
virtual llvm::Value * codegenKey(const CompilationOptions &)
HashType getHashType() const noexceptoverride
#define LL_INT(v)
#define LL_CONTEXT
#define AUTOMATIC_IR_METADATA(CGENSTATE)
HashTable * getHashTableForDevice(const size_t device_id) const
Definition: HashJoin.h:215
#define LL_BUILDER
#define CHECK(condition)
Definition: Logger.h:206
virtual size_t getKeyComponentCount() const
virtual size_t getKeyComponentWidth() const

+ Here is the call graph for this function:

llvm::Value * BaselineJoinHashTable::codegenSlot ( const CompilationOptions co,
const size_t  index 
)
overridevirtual

Implements HashJoin.

Definition at line 675 of file BaselineJoinHashTable.cpp.

References AUTOMATIC_IR_METADATA, CHECK, codegenKey(), executor_, get_int_type(), HashJoin::getHashTableForDevice(), getHashType(), getKeyComponentCount(), getKeyComponentWidth(), hashPtr(), LL_BUILDER, LL_CONTEXT, LL_INT, OneToOne, and to_string().

676  {
677  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
679  const auto key_component_width = getKeyComponentWidth();
680  CHECK(key_component_width == 4 || key_component_width == 8);
681  auto key_buff_lv = codegenKey(co);
682  const auto hash_ptr = hashPtr(index);
683  const auto key_ptr_lv =
684  LL_BUILDER.CreatePointerCast(key_buff_lv, llvm::Type::getInt8PtrTy(LL_CONTEXT));
685  const auto key_size_lv = LL_INT(getKeyComponentCount() * key_component_width);
686  const auto hash_table = getHashTableForDevice(size_t(0));
687  return executor_->cgen_state_->emitExternalCall(
688  "baseline_hash_join_idx_" + std::to_string(key_component_width * 8),
690  {hash_ptr, key_ptr_lv, key_size_lv, LL_INT(hash_table->getEntryCount())});
691 }
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
std::string to_string(char const *&&v)
virtual llvm::Value * codegenKey(const CompilationOptions &)
HashType getHashType() const noexceptoverride
#define LL_INT(v)
#define LL_CONTEXT
#define AUTOMATIC_IR_METADATA(CGENSTATE)
HashTable * getHashTableForDevice(const size_t device_id) const
Definition: HashJoin.h:215
#define LL_BUILDER
llvm::Value * hashPtr(const size_t index)
#define CHECK(condition)
Definition: Logger.h:206
virtual size_t getKeyComponentCount() const
virtual size_t getKeyComponentWidth() const

+ Here is the call graph for this function:

size_t BaselineJoinHashTable::countBufferOff ( ) const
overridevirtualnoexcept

Implements HashJoin.

Definition at line 741 of file BaselineJoinHashTable.cpp.

References getComponentBufferSize(), getHashType(), getKeyBufferSize(), HashJoin::layoutRequiresAdditionalBuffers(), and offsetBufferOff().

Referenced by payloadBufferOff(), toSet(), and toString().

741  {
744  } else {
745  return getKeyBufferSize();
746  }
747 }
size_t offsetBufferOff() const noexceptoverride
size_t getKeyBufferSize() const noexcept
size_t getComponentBufferSize() const noexceptoverride
HashType getHashType() const noexceptoverride
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
Definition: HashJoin.h:129

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ColumnsForDevice BaselineJoinHashTable::fetchColumnsForDevice ( const std::vector< Fragmenter_Namespace::FragmentInfo > &  fragments,
const int  device_id,
DeviceAllocator dev_buff_owner 
)
protectedvirtual

Definition at line 442 of file BaselineJoinHashTable.cpp.

References catalog_, column_cache_, executor_, HashJoin::fetchJoinColumn(), get_column_descriptor_maybe(), get_join_column_type_kind(), getEffectiveMemoryLevel(), inline_fixed_encoding_null_val(), inner_outer_pairs_, and isBitwiseEq().

Referenced by reifyWithLayout().

445  {
446  const auto effective_memory_level = getEffectiveMemoryLevel(inner_outer_pairs_);
447 
448  std::vector<JoinColumn> join_columns;
449  std::vector<std::shared_ptr<Chunk_NS::Chunk>> chunks_owner;
450  std::vector<JoinColumnTypeInfo> join_column_types;
451  std::vector<JoinBucketInfo> join_bucket_info;
452  std::vector<std::shared_ptr<void>> malloc_owner;
453  for (const auto& inner_outer_pair : inner_outer_pairs_) {
454  const auto inner_col = inner_outer_pair.first;
455  const auto inner_cd = get_column_descriptor_maybe(
456  inner_col->get_column_id(), inner_col->get_table_id(), *catalog_);
457  if (inner_cd && inner_cd->isVirtualCol) {
459  }
460  join_columns.emplace_back(fetchJoinColumn(inner_col,
461  fragments,
462  effective_memory_level,
463  device_id,
464  chunks_owner,
465  dev_buff_owner,
466  malloc_owner,
467  executor_,
468  &column_cache_));
469  const auto& ti = inner_col->get_type_info();
470  join_column_types.emplace_back(JoinColumnTypeInfo{static_cast<size_t>(ti.get_size()),
471  0,
472  0,
474  isBitwiseEq(),
475  0,
477  }
478  return {join_columns, join_column_types, chunks_owner, join_bucket_info, malloc_owner};
479 }
Data_Namespace::MemoryLevel getEffectiveMemoryLevel(const std::vector< InnerOuter > &inner_outer_pairs) const
JoinColumn fetchJoinColumn(const Analyzer::ColumnVar *hash_col, const std::vector< Fragmenter_Namespace::FragmentInfo > &fragment_info, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks_owner, DeviceAllocator *dev_buff_owner, std::vector< std::shared_ptr< void >> &malloc_owner, Executor *executor, ColumnCacheMap *column_cache)
Definition: HashJoin.cpp:54
std::vector< InnerOuter > inner_outer_pairs_
const ColumnDescriptor * get_column_descriptor_maybe(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
Definition: Execute.h:221
ColumnCacheMap & column_cache_
const Catalog_Namespace::Catalog * catalog_
ColumnType get_join_column_type_kind(const SQLTypeInfo &ti)
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::pair< std::optional< size_t >, size_t > BaselineJoinHashTable::getApproximateTupleCountFromCache ( const HashTableCacheKey key) const
protected

Definition at line 892 of file BaselineJoinHashTable.cpp.

References CHECK, CHECK_GE, HashTableCacheKey::chunk_keys, and hash_table_cache_.

Referenced by approximateTupleCount().

893  {
894  for (auto chunk_key : key.chunk_keys) {
895  CHECK_GE(chunk_key.size(), size_t(2));
896  if (chunk_key[1] < 0) {
897  return std::make_pair(std::nullopt, 0);
898  ;
899  }
900  }
901 
903  auto hash_table_opt = hash_table_cache_->get(key);
904  if (hash_table_opt) {
905  auto hash_table = *hash_table_opt;
906  return std::make_pair(hash_table->getEntryCount() / 2,
907  hash_table->getEmittedKeysCount());
908  }
909  return std::make_pair(std::nullopt, 0);
910 }
static std::unique_ptr< HashTableCache< HashTableCacheKey, HashTableCacheValue > > hash_table_cache_
#define CHECK_GE(x, y)
Definition: Logger.h:219
#define CHECK(condition)
Definition: Logger.h:206
const std::vector< ChunkKey > chunk_keys

+ Here is the caller graph for this function:

static auto BaselineJoinHashTable::getCacheInvalidator ( ) -> std::function<void()>
inlinestatic

Definition at line 120 of file BaselineJoinHashTable.h.

References CHECK, HashTypeCache::clear(), and hash_table_cache_.

120  {
121  return []() -> void {
122  // TODO: make hash type cache part of the main cache
124  hash_table_cache_->clear();
126  };
127  }
static std::unique_ptr< HashTableCache< HashTableCacheKey, HashTableCacheValue > > hash_table_cache_
#define CHECK(condition)
Definition: Logger.h:206

+ Here is the call graph for this function:

size_t BaselineJoinHashTable::getComponentBufferSize ( ) const
overrideprotectedvirtualnoexcept

Implements HashJoin.

Definition at line 770 of file BaselineJoinHashTable.cpp.

References HashJoin::getHashTableForDevice().

Referenced by codegenMatchingSet(), countBufferOff(), and payloadBufferOff().

770  {
771  const auto hash_table = getHashTableForDevice(size_t(0));
772  return hash_table->getEntryCount() * sizeof(int32_t);
773 }
HashTable * getHashTableForDevice(const size_t device_id) const
Definition: HashJoin.h:215

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int BaselineJoinHashTable::getDeviceCount ( ) const
inlineoverridevirtualnoexcept

Implements HashJoin.

Definition at line 110 of file BaselineJoinHashTable.h.

References device_count_.

110 { return device_count_; };
Data_Namespace::MemoryLevel BaselineJoinHashTable::getEffectiveMemoryLevel ( const std::vector< InnerOuter > &  inner_outer_pairs) const
protected

Definition at line 528 of file BaselineJoinHashTable.cpp.

References Data_Namespace::CPU_LEVEL, executor_, memory_level_, and needs_dictionary_translation().

Referenced by approximateTupleCount(), fetchColumnsForDevice(), and reifyForDevice().

529  {
530  for (const auto& inner_outer_pair : inner_outer_pairs) {
532  inner_outer_pair.first, inner_outer_pair.second, executor_)) {
534  }
535  }
536  return memory_level_;
537 }
bool needs_dictionary_translation(const Analyzer::ColumnVar *inner_col, const Analyzer::Expr *outer_col_expr, const Executor *executor)
const Data_Namespace::MemoryLevel memory_level_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::string BaselineJoinHashTable::getHashJoinType ( ) const
inlinefinalvirtual

Implements HashJoin.

Definition at line 118 of file BaselineJoinHashTable.h.

118 { return "Baseline"; }
static auto* BaselineJoinHashTable::getHashTableCache ( )
inlinestatic

Definition at line 129 of file BaselineJoinHashTable.h.

References CHECK, and hash_table_cache_.

Referenced by QueryRunner::QueryRunner::getCachedBaselineHashTable(), QueryRunner::QueryRunner::getEntryCntCachedBaselineHashTable(), and QueryRunner::QueryRunner::getNumberOfCachedBaselineJoinHashTables().

129  {
131  return hash_table_cache_.get();
132  }
static std::unique_ptr< HashTableCache< HashTableCacheKey, HashTableCacheValue > > hash_table_cache_
#define CHECK(condition)
Definition: Logger.h:206

+ Here is the caller graph for this function:

HashType BaselineJoinHashTable::getHashType ( ) const
overridevirtualnoexcept

Implements HashJoin.

Definition at line 852 of file BaselineJoinHashTable.cpp.

References CHECK, HashJoin::getHashTableForDevice(), and layout_override_.

Referenced by codegenMatchingSet(), codegenSlot(), countBufferOff(), payloadBufferOff(), and toString().

852  {
853  auto hash_table = getHashTableForDevice(size_t(0));
854  CHECK(hash_table);
855  if (layout_override_) {
856  return *layout_override_;
857  } else {
858  return hash_table->getLayout();
859  }
860 }
HashTable * getHashTableForDevice(const size_t device_id) const
Definition: HashJoin.h:215
std::optional< HashType > layout_override_
#define CHECK(condition)
Definition: Logger.h:206

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int BaselineJoinHashTable::getInnerTableId ( ) const
overridevirtualnoexcept

Implements HashJoin.

Definition at line 837 of file BaselineJoinHashTable.cpp.

References CHECK, and inner_outer_pairs_.

Referenced by initHashTableForDevice(), reify(), and reifyWithLayout().

837  {
838  try {
840  } catch (...) {
841  CHECK(false);
842  }
843  return 0;
844 }
std::vector< InnerOuter > inner_outer_pairs_
int getInnerTableId() const noexceptoverride
#define CHECK(condition)
Definition: Logger.h:206

+ Here is the caller graph for this function:

int BaselineJoinHashTable::getInnerTableId ( const std::vector< InnerOuter > &  inner_outer_pairs)
staticprotected

Definition at line 862 of file BaselineJoinHashTable.cpp.

References CHECK.

863  {
864  CHECK(!inner_outer_pairs.empty());
865  const auto first_inner_col = inner_outer_pairs.front().first;
866  return first_inner_col->get_table_id();
867 }
#define CHECK(condition)
Definition: Logger.h:206
int BaselineJoinHashTable::getInnerTableRteIdx ( ) const
overridevirtualnoexcept

Implements HashJoin.

Definition at line 846 of file BaselineJoinHashTable.cpp.

References CHECK, and inner_outer_pairs_.

846  {
847  CHECK(!inner_outer_pairs_.empty());
848  const auto first_inner_col = inner_outer_pairs_.front().first;
849  return first_inner_col->get_rte_idx();
850 }
std::vector< InnerOuter > inner_outer_pairs_
#define CHECK(condition)
Definition: Logger.h:206
std::shared_ptr< BaselineJoinHashTable > BaselineJoinHashTable::getInstance ( const std::shared_ptr< Analyzer::BinOper condition,
const std::vector< InputTableInfo > &  query_infos,
const Data_Namespace::MemoryLevel  memory_level,
const JoinType  join_type,
const HashType  preferred_hash_type,
const int  device_count,
ColumnCacheMap column_cache,
Executor executor 
)
static

Make hash table from an in-flight SQL query's parse tree etc.

Definition at line 38 of file BaselineJoinHashTable.cpp.

References BaselineJoinHashTable(), HashJoin::getHashTypeString(), normalize_column_pairs(), VLOG, and VLOGGING.

Referenced by HashJoin::getInstance().

46  {
47  decltype(std::chrono::steady_clock::now()) ts1, ts2;
48 
49  if (VLOGGING(1)) {
50  VLOG(1) << "Building keyed hash table " << getHashTypeString(preferred_hash_type)
51  << " for qual: " << condition->toString();
52  ts1 = std::chrono::steady_clock::now();
53  }
54  auto inner_outer_pairs = normalize_column_pairs(
55  condition.get(), *executor->getCatalog(), executor->getTemporaryTables());
56 
57  auto join_hash_table =
58  std::shared_ptr<BaselineJoinHashTable>(new BaselineJoinHashTable(condition,
59  join_type,
60  query_infos,
61  memory_level,
62  column_cache,
63  executor,
64  inner_outer_pairs,
65  device_count));
66  try {
67  join_hash_table->reify(preferred_hash_type);
68  } catch (const TableMustBeReplicated& e) {
69  // Throw a runtime error to abort the query
70  join_hash_table->freeHashBufferMemory();
71  throw std::runtime_error(e.what());
72  } catch (const HashJoinFail& e) {
73  // HashJoinFail exceptions log an error and trigger a retry with a join loop (if
74  // possible)
75  join_hash_table->freeHashBufferMemory();
76  throw HashJoinFail(std::string("Could not build a 1-to-1 correspondence for columns "
77  "involved in equijoin | ") +
78  e.what());
79  } catch (const ColumnarConversionNotSupported& e) {
80  throw HashJoinFail(std::string("Could not build hash tables for equijoin | ") +
81  e.what());
82  } catch (const OutOfMemory& e) {
83  throw HashJoinFail(
84  std::string("Ran out of memory while building hash tables for equijoin | ") +
85  e.what());
86  } catch (const std::exception& e) {
87  throw std::runtime_error(
88  std::string("Fatal error while attempting to build hash tables for join: ") +
89  e.what());
90  }
91  if (VLOGGING(1)) {
92  ts2 = std::chrono::steady_clock::now();
93  VLOG(1) << "Built keyed hash table "
94  << getHashTypeString(join_hash_table->getHashType()) << " in "
95  << std::chrono::duration_cast<std::chrono::milliseconds>(ts2 - ts1).count()
96  << " ms";
97  }
98  return join_hash_table;
99 }
std::vector< InnerOuter > normalize_column_pairs(const Analyzer::BinOper *condition, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables)
Definition: HashJoin.cpp:712
#define VLOGGING(n)
Definition: Logger.h:204
static std::string getHashTypeString(HashType ht) noexcept
Definition: HashJoin.h:133
BaselineJoinHashTable(const std::shared_ptr< Analyzer::BinOper > condition, const JoinType join_type, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, ColumnCacheMap &column_cache, Executor *executor, const std::vector< InnerOuter > &inner_outer_pairs, const int device_count)
if(yyssp >=yyss+yystacksize-1)
#define VLOG(n)
Definition: Logger.h:300

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t BaselineJoinHashTable::getKeyBufferSize ( ) const
protectednoexcept

Definition at line 757 of file BaselineJoinHashTable.cpp.

References CHECK, HashJoin::getHashTableForDevice(), getKeyComponentCount(), getKeyComponentWidth(), and HashJoin::layoutRequiresAdditionalBuffers().

Referenced by countBufferOff(), offsetBufferOff(), and payloadBufferOff().

757  {
758  const auto key_component_width = getKeyComponentWidth();
759  CHECK(key_component_width == 4 || key_component_width == 8);
760  const auto key_component_count = getKeyComponentCount();
761  auto hash_table = getHashTableForDevice(size_t(0));
762  CHECK(hash_table);
763  if (layoutRequiresAdditionalBuffers(hash_table->getLayout())) {
764  return hash_table->getEntryCount() * key_component_count * key_component_width;
765  } else {
766  return hash_table->getEntryCount() * (key_component_count + 1) * key_component_width;
767  }
768 }
HashTable * getHashTableForDevice(const size_t device_id) const
Definition: HashJoin.h:215
#define CHECK(condition)
Definition: Logger.h:206
virtual size_t getKeyComponentCount() const
virtual size_t getKeyComponentWidth() const
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
Definition: HashJoin.h:129

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t BaselineJoinHashTable::getKeyComponentCount ( ) const
protectedvirtual

Definition at line 524 of file BaselineJoinHashTable.cpp.

References inner_outer_pairs_.

Referenced by codegenKey(), codegenMatchingSet(), codegenSlot(), getKeyBufferSize(), initHashTableForDevice(), toSet(), and toString().

524  {
525  return inner_outer_pairs_.size();
526 }
std::vector< InnerOuter > inner_outer_pairs_

+ Here is the caller graph for this function:

size_t BaselineJoinHashTable::getKeyComponentWidth ( ) const
protectedvirtual

Definition at line 512 of file BaselineJoinHashTable.cpp.

References CHECK_EQ, and inner_outer_pairs_.

Referenced by codegenKey(), codegenMatchingSet(), codegenSlot(), getKeyBufferSize(), initHashTableForDevice(), toSet(), and toString().

512  {
513  for (const auto& inner_outer_pair : inner_outer_pairs_) {
514  const auto inner_col = inner_outer_pair.first;
515  const auto& inner_col_ti = inner_col->get_type_info();
516  if (inner_col_ti.get_logical_size() > 4) {
517  CHECK_EQ(8, inner_col_ti.get_logical_size());
518  return 8;
519  }
520  }
521  return 4;
522 }
#define CHECK_EQ(x, y)
Definition: Logger.h:214
std::vector< InnerOuter > inner_outer_pairs_

+ Here is the caller graph for this function:

Data_Namespace::MemoryLevel BaselineJoinHashTable::getMemoryLevel ( ) const
inlineoverridevirtualnoexcept

Implements HashJoin.

Definition at line 106 of file BaselineJoinHashTable.h.

References memory_level_.

106  {
107  return memory_level_;
108  };
const Data_Namespace::MemoryLevel memory_level_
size_t BaselineJoinHashTable::getShardCountForCondition ( const Analyzer::BinOper condition,
const Executor executor,
const std::vector< InnerOuter > &  inner_outer_pairs 
)
static

Definition at line 123 of file BaselineJoinHashTable.cpp.

References get_shard_count().

Referenced by reify(), shardCount(), OverlapsJoinHashTable::shardCount(), and Executor::skipFragmentPair().

126  {
127  for (const auto& inner_outer_pair : inner_outer_pairs) {
128  const auto pair_shard_count = get_shard_count(inner_outer_pair, executor);
129  if (pair_shard_count) {
130  return pair_shard_count;
131  }
132  }
133  return 0;
134 }
size_t get_shard_count(const Analyzer::BinOper *join_condition, const Executor *executor)
Definition: HashJoin.cpp:560

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * BaselineJoinHashTable::hashPtr ( const size_t  index)
protected

Definition at line 823 of file BaselineJoinHashTable.cpp.

References AUTOMATIC_IR_METADATA, HashJoin::codegenHashTableLoad(), executor_, LL_BUILDER, and LL_CONTEXT.

Referenced by codegenSlot().

823  {
824  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
825  auto hash_ptr = HashJoin::codegenHashTableLoad(index, executor_);
826  const auto pi8_type = llvm::Type::getInt8PtrTy(LL_CONTEXT);
827  return hash_ptr->getType()->isPointerTy()
828  ? LL_BUILDER.CreatePointerCast(hash_ptr, pi8_type)
829  : LL_BUILDER.CreateIntToPtr(hash_ptr, pi8_type);
830 }
static llvm::Value * codegenHashTableLoad(const size_t table_idx, Executor *executor)
Definition: HashJoin.cpp:215
#define LL_CONTEXT
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define LL_BUILDER

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int BaselineJoinHashTable::initHashTableForDevice ( const std::vector< JoinColumn > &  join_columns,
const std::vector< JoinColumnTypeInfo > &  join_column_types,
const std::vector< JoinBucketInfo > &  join_buckets,
const HashType  layout,
const Data_Namespace::MemoryLevel  effective_memory_level,
const size_t  entry_count,
const size_t  emitted_keys_count,
const int  device_id 
)
protectedvirtual

Definition at line 539 of file BaselineJoinHashTable.cpp.

References BaselineJoinHashTableBuilder::allocateDeviceMemory(), catalog_, CHECK, CHECK_EQ, CHECK_LT, condition_, copy_to_gpu(), CPU, cpu_hash_table_buff_mutex_, Data_Namespace::CPU_LEVEL, DEBUG_TIMER, executor_, HashJoin::getCompositeKeyInfo(), Catalog_Namespace::Catalog::getDataMgr(), BaselineJoinHashTableBuilder::getHashTable(), getInnerTableId(), getKeyComponentCount(), getKeyComponentWidth(), Data_Namespace::GPU_LEVEL, HashJoin::hash_tables_for_device_, BaselineJoinHashTableBuilder::initHashTableOnCpu(), initHashTableOnCpuFromCache(), BaselineJoinHashTableBuilder::initHashTableOnGpu(), inner_outer_pairs_, join_type_, memory_level_, putHashTableOnCpuToCache(), transfer_vector_of_flat_objects_to_gpu(), and UNREACHABLE.

Referenced by reifyForDevice().

547  {
548  auto timer = DEBUG_TIMER(__func__);
549  const auto key_component_count = getKeyComponentCount();
550  int err = 0;
551 
552  if (effective_memory_level == Data_Namespace::CPU_LEVEL) {
553  std::lock_guard<std::mutex> cpu_hash_table_buff_lock(cpu_hash_table_buff_mutex_);
554 
555  const auto composite_key_info =
557 
558  CHECK(!join_columns.empty());
559  HashTableCacheKey cache_key{join_columns.front().num_elems,
560  composite_key_info.cache_key_chunks,
561  condition_->get_optype(),
562  join_type_};
563 
565  CHECK_EQ(device_id, size_t(0));
566  }
567  CHECK_LT(static_cast<size_t>(device_id), hash_tables_for_device_.size());
568 
569  auto hash_table = initHashTableOnCpuFromCache(cache_key);
570  if (hash_table) {
571  hash_tables_for_device_[device_id] = hash_table;
572  } else {
574 
575  const auto key_handler =
576  GenericKeyHandler(key_component_count,
577  true,
578  &join_columns[0],
579  &join_column_types[0],
580  &composite_key_info.sd_inner_proxy_per_key[0],
581  &composite_key_info.sd_outer_proxy_per_key[0]);
582  err = builder.initHashTableOnCpu(&key_handler,
583  composite_key_info,
584  join_columns,
585  join_column_types,
586  join_bucket_info,
587  entry_count,
588  join_columns.front().num_elems,
589  layout,
590  join_type_,
593  hash_tables_for_device_[device_id] = builder.getHashTable();
594 
595  if (!err) {
596  if (getInnerTableId() > 0) {
597  putHashTableOnCpuToCache(cache_key, hash_tables_for_device_[device_id]);
598  }
599  }
600  }
601  // Transfer the hash table on the GPU if we've only built it on CPU
602  // but the query runs on GPU (join on dictionary encoded columns).
603  // Don't transfer the buffer if there was an error since we'll bail anyway.
604  if (memory_level_ == Data_Namespace::GPU_LEVEL && !err) {
605 #ifdef HAVE_CUDA
607 
608  builder.allocateDeviceMemory(layout,
611  entry_count,
612  emitted_keys_count,
613  device_id);
614 
615  CHECK_LT(size_t(device_id), hash_tables_for_device_.size());
616  auto cpu_source_hash_table = hash_tables_for_device_[device_id];
617  CHECK(cpu_source_hash_table);
618  auto gpu_target_hash_table = builder.getHashTable();
619  CHECK(gpu_target_hash_table);
620 
621  const auto gpu_buff = gpu_target_hash_table->getGpuBuffer();
622  CHECK(gpu_buff);
623  auto& data_mgr = catalog_->getDataMgr();
624  copy_to_gpu(&data_mgr,
625  reinterpret_cast<CUdeviceptr>(gpu_buff),
626  cpu_source_hash_table->getCpuBuffer(),
627  cpu_source_hash_table->getHashTableBufferSize(ExecutorDeviceType::CPU),
628  device_id);
629  hash_tables_for_device_[device_id] = std::move(gpu_target_hash_table);
630 #else
631  CHECK(false);
632 #endif
633  }
634  } else {
635 #ifdef HAVE_CUDA
637 
638  auto& data_mgr = catalog_->getDataMgr();
639  CudaAllocator allocator(&data_mgr, device_id);
640  auto join_column_types_gpu =
641  transfer_vector_of_flat_objects_to_gpu(join_column_types, allocator);
642  auto join_columns_gpu =
643  transfer_vector_of_flat_objects_to_gpu(join_columns, allocator);
644  const auto key_handler = GenericKeyHandler(key_component_count,
645  true,
646  join_columns_gpu,
647  join_column_types_gpu,
648  nullptr,
649  nullptr);
650 
651  err = builder.initHashTableOnGpu(&key_handler,
652  join_columns,
653  layout,
654  join_type_,
657  entry_count,
658  emitted_keys_count,
659  device_id);
660  CHECK_LT(size_t(device_id), hash_tables_for_device_.size());
661  hash_tables_for_device_[device_id] = builder.getHashTable();
662 #else
663  UNREACHABLE();
664 #endif
665  }
666  return err;
667 }
void putHashTableOnCpuToCache(const HashTableCacheKey &, std::shared_ptr< HashTable > &hash_table)
#define CHECK_EQ(x, y)
Definition: Logger.h:214
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:223
std::vector< std::shared_ptr< HashTable > > hash_tables_for_device_
Definition: HashJoin.h:270
#define UNREACHABLE()
Definition: Logger.h:250
std::vector< InnerOuter > inner_outer_pairs_
void copy_to_gpu(Data_Namespace::DataMgr *data_mgr, CUdeviceptr dst, const void *src, const size_t num_bytes, const int device_id)
Definition: GpuMemUtils.cpp:30
static CompositeKeyInfo getCompositeKeyInfo(const std::vector< InnerOuter > &inner_outer_pairs, const Executor *executor)
Definition: HashJoin.cpp:324
#define CHECK_LT(x, y)
Definition: Logger.h:216
int getInnerTableId() const noexceptoverride
const Catalog_Namespace::Catalog * catalog_
std::shared_ptr< HashTable > initHashTableOnCpuFromCache(const HashTableCacheKey &)
const Data_Namespace::MemoryLevel memory_level_
#define CHECK(condition)
Definition: Logger.h:206
T * transfer_vector_of_flat_objects_to_gpu(const std::vector< T > &vec, CudaAllocator &allocator)
#define DEBUG_TIMER(name)
Definition: Logger.h:322
virtual size_t getKeyComponentCount() const
virtual size_t getKeyComponentWidth() const
const std::shared_ptr< Analyzer::BinOper > condition_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::shared_ptr< HashTable > BaselineJoinHashTable::initHashTableOnCpuFromCache ( const HashTableCacheKey key)
protected

Definition at line 869 of file BaselineJoinHashTable.cpp.

References CHECK, DEBUG_TIMER, hash_table_cache_, and VLOG.

Referenced by initHashTableForDevice().

870  {
871  auto timer = DEBUG_TIMER(__func__);
872  VLOG(1) << "Checking CPU hash table cache.";
874  auto hash_table_opt = (hash_table_cache_->get(key));
875  return hash_table_opt ? *hash_table_opt : nullptr;
876 }
static std::unique_ptr< HashTableCache< HashTableCacheKey, HashTableCacheValue > > hash_table_cache_
#define CHECK(condition)
Definition: Logger.h:206
#define DEBUG_TIMER(name)
Definition: Logger.h:322
#define VLOG(n)
Definition: Logger.h:300

+ Here is the caller graph for this function:

bool BaselineJoinHashTable::isBitwiseEq ( ) const
protected

Definition at line 912 of file BaselineJoinHashTable.cpp.

References condition_, and kBW_EQ.

Referenced by fetchColumnsForDevice().

912  {
913  return condition_->get_optype() == kBW_EQ;
914 }
Definition: sqldefs.h:31
const std::shared_ptr< Analyzer::BinOper > condition_

+ Here is the caller graph for this function:

size_t BaselineJoinHashTable::offsetBufferOff ( ) const
overridevirtualnoexcept

Implements HashJoin.

Definition at line 737 of file BaselineJoinHashTable.cpp.

References getKeyBufferSize().

Referenced by codegenMatchingSet(), countBufferOff(), toSet(), and toString().

737  {
738  return getKeyBufferSize();
739 }
size_t getKeyBufferSize() const noexcept

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t BaselineJoinHashTable::payloadBufferOff ( ) const
overridevirtualnoexcept

Implements HashJoin.

Definition at line 749 of file BaselineJoinHashTable.cpp.

References countBufferOff(), getComponentBufferSize(), getHashType(), getKeyBufferSize(), and HashJoin::layoutRequiresAdditionalBuffers().

Referenced by toSet(), and toString().

749  {
752  } else {
753  return getKeyBufferSize();
754  }
755 }
size_t getKeyBufferSize() const noexcept
size_t getComponentBufferSize() const noexceptoverride
HashType getHashType() const noexceptoverride
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
Definition: HashJoin.h:129
size_t countBufferOff() const noexceptoverride

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void BaselineJoinHashTable::putHashTableOnCpuToCache ( const HashTableCacheKey key,
std::shared_ptr< HashTable > &  hash_table 
)
protected

Definition at line 878 of file BaselineJoinHashTable.cpp.

References CHECK, CHECK_GE, HashTableCacheKey::chunk_keys, and hash_table_cache_.

Referenced by initHashTableForDevice().

880  {
881  for (auto chunk_key : key.chunk_keys) {
882  CHECK_GE(chunk_key.size(), size_t(2));
883  if (chunk_key[1] < 0) {
884  return;
885  }
886  }
888  hash_table_cache_->insert(key, hash_table);
889 }
static std::unique_ptr< HashTableCache< HashTableCacheKey, HashTableCacheValue > > hash_table_cache_
#define CHECK_GE(x, y)
Definition: Logger.h:219
#define CHECK(condition)
Definition: Logger.h:206
const std::vector< ChunkKey > chunk_keys

+ Here is the caller graph for this function:

void BaselineJoinHashTable::reify ( const HashType  preferred_layout)
protected

Definition at line 214 of file BaselineJoinHashTable.cpp.

References CHECK_EQ, CHECK_LT, HashJoin::checkHashJoinReplicationConstraint(), condition_, DEBUG_TIMER, device_count_, executor_, HashJoin::freeHashBufferMemory(), HashTypeCache::get(), HashJoin::getCompositeKeyInfo(), getInnerTableId(), getShardCountForCondition(), inner_outer_pairs_, ManyToMany, OneToMany, reifyWithLayout(), HashTypeCache::set(), and VLOG.

214  {
215  auto timer = DEBUG_TIMER(__func__);
217  const auto composite_key_info =
219  const auto type_and_found = HashTypeCache::get(composite_key_info.cache_key_chunks);
220  const auto layout = type_and_found.second ? type_and_found.first : preferred_layout;
221 
226  executor_);
227 
228  if (condition_->is_overlaps_oper()) {
229  CHECK_EQ(inner_outer_pairs_.size(), size_t(1));
230  HashType layout;
231 
232  if (inner_outer_pairs_[0].second->get_type_info().is_array()) {
233  layout = HashType::ManyToMany;
234  } else {
235  layout = HashType::OneToMany;
236  }
237  try {
238  reifyWithLayout(layout);
239  return;
240  } catch (const std::exception& e) {
241  VLOG(1) << "Caught exception while building overlaps baseline hash table: "
242  << e.what();
243  throw;
244  }
245  }
246 
247  try {
248  reifyWithLayout(layout);
249  } catch (const std::exception& e) {
250  VLOG(1) << "Caught exception while building baseline hash table: " << e.what();
252  HashTypeCache::set(composite_key_info.cache_key_chunks, HashType::OneToMany);
254  }
255 }
#define CHECK_EQ(x, y)
Definition: Logger.h:214
void freeHashBufferMemory()
Definition: HashJoin.h:257
static std::pair< HashType, bool > get(const std::vector< ChunkKey > &key)
std::vector< InnerOuter > inner_outer_pairs_
static void checkHashJoinReplicationConstraint(const int table_id, const size_t shard_count, const Executor *executor)
Definition: HashJoin.cpp:532
virtual void reifyWithLayout(const HashType layout)
static CompositeKeyInfo getCompositeKeyInfo(const std::vector< InnerOuter > &inner_outer_pairs, const Executor *executor)
Definition: HashJoin.cpp:324
#define CHECK_LT(x, y)
Definition: Logger.h:216
int getInnerTableId() const noexceptoverride
#define DEBUG_TIMER(name)
Definition: Logger.h:322
static size_t getShardCountForCondition(const Analyzer::BinOper *condition, const Executor *executor, const std::vector< InnerOuter > &inner_outer_pairs)
static void set(const std::vector< ChunkKey > &key, const HashType hash_type)
HashType
Definition: HashTable.h:19
#define VLOG(n)
Definition: Logger.h:300
const std::shared_ptr< Analyzer::BinOper > condition_

+ Here is the call graph for this function:

void BaselineJoinHashTable::reifyForDevice ( const ColumnsForDevice columns_for_device,
const HashType  layout,
const int  device_id,
const size_t  entry_count,
const size_t  emitted_keys_count,
const logger::ThreadId  parent_thread_id 
)
protectedvirtual

Definition at line 481 of file BaselineJoinHashTable.cpp.

References DEBUG_TIMER_NEW_THREAD, getEffectiveMemoryLevel(), initHashTableForDevice(), inner_outer_pairs_, ColumnsForDevice::join_buckets, ColumnsForDevice::join_column_types, ColumnsForDevice::join_columns, and to_string().

Referenced by reifyWithLayout().

486  {
487  DEBUG_TIMER_NEW_THREAD(parent_thread_id);
488  const auto effective_memory_level = getEffectiveMemoryLevel(inner_outer_pairs_);
489  const auto err = initHashTableForDevice(columns_for_device.join_columns,
490  columns_for_device.join_column_types,
491  columns_for_device.join_buckets,
492  layout,
493  effective_memory_level,
494  entry_count,
495  emitted_keys_count,
496  device_id);
497  if (err) {
498  throw HashJoinFail(
499  std::string("Unrecognized error when initializing baseline hash table (") +
500  std::to_string(err) + std::string(")"));
501  }
502 }
Data_Namespace::MemoryLevel getEffectiveMemoryLevel(const std::vector< InnerOuter > &inner_outer_pairs) const
#define DEBUG_TIMER_NEW_THREAD(parent_thread_id)
Definition: Logger.h:327
std::string to_string(char const *&&v)
std::vector< InnerOuter > inner_outer_pairs_
const std::vector< JoinColumnTypeInfo > join_column_types
Definition: HashJoin.h:80
virtual int initHashTableForDevice(const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_buckets, const HashType layout, const Data_Namespace::MemoryLevel effective_memory_level, const size_t entry_count, const size_t emitted_keys_count, const int device_id)
std::vector< JoinBucketInfo > join_buckets
Definition: HashJoin.h:82
const std::vector< JoinColumn > join_columns
Definition: HashJoin.h:79

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void BaselineJoinHashTable::reifyWithLayout ( const HashType  layout)
protectedvirtual

Definition at line 257 of file BaselineJoinHashTable.cpp.

References approximateTupleCount(), catalog_, CHECK, device_count_, fetchColumnsForDevice(), get_entries_per_device(), get_inner_query_info(), Catalog_Namespace::Catalog::getDataMgr(), getInnerTableId(), Fragmenter_Namespace::TableInfo::getNumTuplesUpperBound(), Data_Namespace::GPU_LEVEL, InputTableInfo::info, memory_level_, OneToMany, only_shards_for_device(), query_infos_, reifyForDevice(), shardCount(), and logger::thread_id().

Referenced by reify().

257  {
258  const auto& query_info = get_inner_query_info(getInnerTableId(), query_infos_).info;
259  if (query_info.fragments.empty()) {
260  return;
261  }
262 
263  const auto total_entries = 2 * query_info.getNumTuplesUpperBound();
264  if (total_entries > static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
265  throw TooManyHashEntries();
266  }
267 
268  auto& data_mgr = catalog_->getDataMgr();
269  std::vector<std::unique_ptr<CudaAllocator>> dev_buff_owners;
271  for (int device_id = 0; device_id < device_count_; ++device_id) {
272  dev_buff_owners.emplace_back(std::make_unique<CudaAllocator>(&data_mgr, device_id));
273  }
274  }
275  std::vector<ColumnsForDevice> columns_per_device;
276  const auto shard_count = shardCount();
277  auto entries_per_device =
278  get_entries_per_device(total_entries, shard_count, device_count_, memory_level_);
279 
280  for (int device_id = 0; device_id < device_count_; ++device_id) {
281  const auto fragments =
282  shard_count
283  ? only_shards_for_device(query_info.fragments, device_id, device_count_)
284  : query_info.fragments;
285  const auto columns_for_device =
286  fetchColumnsForDevice(fragments,
287  device_id,
289  ? dev_buff_owners[device_id].get()
290  : nullptr);
291  columns_per_device.push_back(columns_for_device);
292  }
293  size_t emitted_keys_count = 0;
294  if (layout == HashType::OneToMany) {
295  CHECK(!columns_per_device.front().join_columns.empty());
296  emitted_keys_count = columns_per_device.front().join_columns.front().num_elems;
297  size_t tuple_count;
298  std::tie(tuple_count, std::ignore) = approximateTupleCount(columns_per_device);
299  const auto entry_count = 2 * std::max(tuple_count, size_t(1));
300 
301  // reset entries per device with one to many info
302  entries_per_device =
303  get_entries_per_device(entry_count, shard_count, device_count_, memory_level_);
304  }
305  std::vector<std::future<void>> init_threads;
306  for (int device_id = 0; device_id < device_count_; ++device_id) {
307  const auto fragments =
308  shard_count
309  ? only_shards_for_device(query_info.fragments, device_id, device_count_)
310  : query_info.fragments;
311  init_threads.push_back(std::async(std::launch::async,
313  this,
314  columns_per_device[device_id],
315  layout,
316  device_id,
317  entries_per_device,
318  emitted_keys_count,
319  logger::thread_id()));
320  }
321  for (auto& init_thread : init_threads) {
322  init_thread.wait();
323  }
324  for (auto& init_thread : init_threads) {
325  init_thread.get();
326  }
327 }
Fragmenter_Namespace::TableInfo info
Definition: InputMetadata.h:35
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:223
virtual void reifyForDevice(const ColumnsForDevice &columns_for_device, const HashType layout, const int device_id, const size_t entry_count, const size_t emitted_keys_count, const logger::ThreadId parent_thread_id)
virtual ColumnsForDevice fetchColumnsForDevice(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, DeviceAllocator *dev_buff_owner)
const std::vector< InputTableInfo > & query_infos_
std::vector< Fragmenter_Namespace::FragmentInfo > only_shards_for_device(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, const int device_count)
virtual std::pair< size_t, size_t > approximateTupleCount(const std::vector< ColumnsForDevice > &) const
const InputTableInfo & get_inner_query_info(const int inner_table_id, const std::vector< InputTableInfo > &query_infos)
int getInnerTableId() const noexceptoverride
size_t get_entries_per_device(const size_t total_entries, const size_t shard_count, const size_t device_count, const Data_Namespace::MemoryLevel memory_level)
const Catalog_Namespace::Catalog * catalog_
const Data_Namespace::MemoryLevel memory_level_
ThreadId thread_id()
Definition: Logger.cpp:732
#define CHECK(condition)
Definition: Logger.h:206

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t BaselineJoinHashTable::shardCount ( ) const
protected

Definition at line 504 of file BaselineJoinHashTable.cpp.

References condition_, executor_, getShardCountForCondition(), Data_Namespace::GPU_LEVEL, inner_outer_pairs_, and memory_level_.

Referenced by reifyWithLayout().

504  {
506  return 0;
507  }
510 }
std::vector< InnerOuter > inner_outer_pairs_
const Data_Namespace::MemoryLevel memory_level_
static size_t getShardCountForCondition(const Analyzer::BinOper *condition, const Executor *executor, const std::vector< InnerOuter > &inner_outer_pairs)
const std::shared_ptr< Analyzer::BinOper > condition_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::set< DecodedJoinHashBufferEntry > BaselineJoinHashTable::toSet ( const ExecutorDeviceType  device_type,
const int  device_id 
) const
overridevirtual

Implements HashJoin.

Definition at line 178 of file BaselineJoinHashTable.cpp.

References catalog_, CHECK, copy_from_gpu(), countBufferOff(), Catalog_Namespace::Catalog::getDataMgr(), HashJoin::getHashTableForDevice(), HashJoin::getJoinHashBuffer(), getKeyComponentCount(), getKeyComponentWidth(), GPU, offsetBufferOff(), OneToOne, payloadBufferOff(), and HashTable::toSet().

180  {
181  auto buffer = getJoinHashBuffer(device_type, device_id);
182  auto hash_table = getHashTableForDevice(device_id);
183  CHECK(hash_table);
184  auto buffer_size = hash_table->getHashTableBufferSize(device_type);
185 #ifdef HAVE_CUDA
186  std::unique_ptr<int8_t[]> buffer_copy;
187  if (device_type == ExecutorDeviceType::GPU) {
188  buffer_copy = std::make_unique<int8_t[]>(buffer_size);
189 
191  buffer_copy.get(),
192  reinterpret_cast<CUdeviceptr>(reinterpret_cast<int8_t*>(buffer)),
193  buffer_size,
194  device_id);
195  }
196  auto ptr1 = buffer_copy ? buffer_copy.get() : reinterpret_cast<const int8_t*>(buffer);
197 #else
198  auto ptr1 = reinterpret_cast<const int8_t*>(buffer);
199 #endif // HAVE_CUDA
200  auto ptr2 = ptr1 + offsetBufferOff();
201  auto ptr3 = ptr1 + countBufferOff();
202  auto ptr4 = ptr1 + payloadBufferOff();
203  const auto layout = hash_table->getLayout();
204  return HashTable::toSet(getKeyComponentCount() + (layout == HashType::OneToOne ? 1 : 0),
206  hash_table->getEntryCount(),
207  ptr1,
208  ptr2,
209  ptr3,
210  ptr4,
211  buffer_size);
212 }
size_t offsetBufferOff() const noexceptoverride
int64_t getJoinHashBuffer(const ExecutorDeviceType device_type, const int device_id) const
Definition: HashJoin.h:234
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:223
unsigned long long CUdeviceptr
Definition: nocuda.h:27
size_t payloadBufferOff() const noexceptoverride
void copy_from_gpu(Data_Namespace::DataMgr *data_mgr, void *dst, const CUdeviceptr src, const size_t num_bytes, const int device_id)
HashTable * getHashTableForDevice(const size_t device_id) const
Definition: HashJoin.h:215
const Catalog_Namespace::Catalog * catalog_
#define CHECK(condition)
Definition: Logger.h:206
virtual size_t getKeyComponentCount() const
virtual size_t getKeyComponentWidth() const
static DecodedJoinHashBufferSet toSet(size_t key_component_count, size_t key_component_width, size_t entry_count, const int8_t *ptr1, const int8_t *ptr2, const int8_t *ptr3, const int8_t *ptr4, size_t buffer_size)
Decode hash table into a std::set for easy inspection and validation.
Definition: HashTable.cpp:139
size_t countBufferOff() const noexceptoverride

+ Here is the call graph for this function:

std::string BaselineJoinHashTable::toString ( const ExecutorDeviceType  device_type,
const int  device_id = 0,
bool  raw = false 
) const
overridevirtual

Implements HashJoin.

Definition at line 136 of file BaselineJoinHashTable.cpp.

References catalog_, CHECK, CHECK_LT, copy_from_gpu(), countBufferOff(), Catalog_Namespace::Catalog::getDataMgr(), getHashType(), HashJoin::getHashTypeString(), HashJoin::getJoinHashBuffer(), getKeyComponentCount(), getKeyComponentWidth(), GPU, HashJoin::hash_tables_for_device_, offsetBufferOff(), OneToOne, payloadBufferOff(), and HashTable::toString().

138  {
139  auto buffer = getJoinHashBuffer(device_type, device_id);
140  CHECK_LT(device_id, hash_tables_for_device_.size());
141  auto hash_table = hash_tables_for_device_[device_id];
142  CHECK(hash_table);
143  auto buffer_size = hash_table->getHashTableBufferSize(device_type);
144 #ifdef HAVE_CUDA
145  std::unique_ptr<int8_t[]> buffer_copy;
146  if (device_type == ExecutorDeviceType::GPU) {
147  buffer_copy = std::make_unique<int8_t[]>(buffer_size);
148 
150  buffer_copy.get(),
151  reinterpret_cast<CUdeviceptr>(reinterpret_cast<int8_t*>(buffer)),
152  buffer_size,
153  device_id);
154  }
155  auto ptr1 = buffer_copy ? buffer_copy.get() : reinterpret_cast<const int8_t*>(buffer);
156 #else
157  auto ptr1 = reinterpret_cast<const int8_t*>(buffer);
158 #endif // HAVE_CUDA
159  auto ptr2 = ptr1 + offsetBufferOff();
160  auto ptr3 = ptr1 + countBufferOff();
161  auto ptr4 = ptr1 + payloadBufferOff();
162  CHECK(hash_table);
163  const auto layout = getHashType();
164  return HashTable::toString(
165  "keyed",
166  getHashTypeString(layout),
167  getKeyComponentCount() + (layout == HashType::OneToOne ? 1 : 0),
169  hash_table->getEntryCount(),
170  ptr1,
171  ptr2,
172  ptr3,
173  ptr4,
174  buffer_size,
175  raw);
176 }
size_t offsetBufferOff() const noexceptoverride
int64_t getJoinHashBuffer(const ExecutorDeviceType device_type, const int device_id) const
Definition: HashJoin.h:234
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:223
unsigned long long CUdeviceptr
Definition: nocuda.h:27
std::vector< std::shared_ptr< HashTable > > hash_tables_for_device_
Definition: HashJoin.h:270
size_t payloadBufferOff() const noexceptoverride
HashType getHashType() const noexceptoverride
void copy_from_gpu(Data_Namespace::DataMgr *data_mgr, void *dst, const CUdeviceptr src, const size_t num_bytes, const int device_id)
#define CHECK_LT(x, y)
Definition: Logger.h:216
static std::string getHashTypeString(HashType ht) noexcept
Definition: HashJoin.h:133
static std::string toString(const std::string &type, const std::string &layout_type, size_t key_component_count, size_t key_component_width, size_t entry_count, const int8_t *ptr1, const int8_t *ptr2, const int8_t *ptr3, const int8_t *ptr4, size_t buffer_size, bool raw=false)
Decode hash table into a human-readable string.
Definition: HashTable.cpp:226
const Catalog_Namespace::Catalog * catalog_
#define CHECK(condition)
Definition: Logger.h:206
virtual size_t getKeyComponentCount() const
virtual size_t getKeyComponentWidth() const
size_t countBufferOff() const noexceptoverride

+ Here is the call graph for this function:

Member Data Documentation

ColumnCacheMap& BaselineJoinHashTable::column_cache_
protected

Definition at line 209 of file BaselineJoinHashTable.h.

Referenced by fetchColumnsForDevice().

const std::shared_ptr<Analyzer::BinOper> BaselineJoinHashTable::condition_
protected
std::mutex BaselineJoinHashTable::cpu_hash_table_buff_mutex_
protected

Definition at line 210 of file BaselineJoinHashTable.h.

Referenced by initHashTableForDevice().

const int BaselineJoinHashTable::device_count_
protected
const JoinType BaselineJoinHashTable::join_type_
protected

Definition at line 205 of file BaselineJoinHashTable.h.

Referenced by approximateTupleCount(), and initHashTableForDevice().

std::optional<HashType> BaselineJoinHashTable::layout_override_
protected

Definition at line 217 of file BaselineJoinHashTable.h.

Referenced by getHashType().

const Data_Namespace::MemoryLevel BaselineJoinHashTable::memory_level_
protected
const std::vector<InputTableInfo>& BaselineJoinHashTable::query_infos_
protected

Definition at line 206 of file BaselineJoinHashTable.h.

Referenced by reifyWithLayout().


The documentation for this class was generated from the following files: