OmniSciDB  95562058bd
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
BaselineJoinHashTable Class Reference

#include <BaselineJoinHashTable.h>

+ Inheritance diagram for BaselineJoinHashTable:
+ Collaboration diagram for BaselineJoinHashTable:

Classes

struct  ColumnsForDevice
 
struct  CompositeKeyInfo
 
struct  HashTableCacheKey
 
struct  HashTableCacheValue
 

Public Member Functions

int64_t getJoinHashBuffer (const ExecutorDeviceType device_type, const int device_id) const noexceptoverride
 
size_t getJoinHashBufferSize (const ExecutorDeviceType device_type, const int device_id) const noexceptoverride
 
std::string toString (const ExecutorDeviceType device_type, const int device_id=0, bool raw=false) const override
 
std::set
< DecodedJoinHashBufferEntry
toSet (const ExecutorDeviceType device_type, const int device_id) const override
 
llvm::Value * codegenSlot (const CompilationOptions &, const size_t) override
 
HashJoinMatchingSet codegenMatchingSet (const CompilationOptions &, const size_t) override
 
int getInnerTableId () const noexceptoverride
 
int getInnerTableRteIdx () const noexceptoverride
 
JoinHashTableInterface::HashType getHashType () const noexceptoverride
 
Data_Namespace::MemoryLevel getMemoryLevel () const noexceptoverride
 
int getDeviceCount () const noexceptoverride
 
size_t offsetBufferOff () const noexceptoverride
 
size_t countBufferOff () const noexceptoverride
 
size_t payloadBufferOff () const noexceptoverride
 
virtual ~BaselineJoinHashTable ()
 
- Public Member Functions inherited from JoinHashTableInterface
virtual std::string toStringFlat64 (const ExecutorDeviceType device_type, const int device_id) const
 
virtual std::string toStringFlat32 (const ExecutorDeviceType device_type, const int device_id) const
 
JoinColumn fetchJoinColumn (const Analyzer::ColumnVar *hash_col, const std::vector< Fragmenter_Namespace::FragmentInfo > &fragment_info, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks_owner, DeviceAllocator *dev_buff_owner, std::vector< std::shared_ptr< void >> &malloc_owner, Executor *executor, ColumnCacheMap *column_cache)
 

Static Public Member Functions

static std::shared_ptr
< BaselineJoinHashTable
getInstance (const std::shared_ptr< Analyzer::BinOper > condition, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
 Make hash table from an in-flight SQL query's parse tree etc. More...
 
static size_t getShardCountForCondition (const Analyzer::BinOper *condition, const Executor *executor, const std::vector< InnerOuter > &inner_outer_pairs)
 
static auto yieldCacheInvalidator () -> std::function< void()>
 
static const std::shared_ptr
< std::vector< int8_t > > & 
getCachedHashTable (size_t idx)
 
static size_t getEntryCntCachedHashTable (size_t idx)
 
static uint64_t getNumberOfCachedHashTables ()
 
- Static Public Member Functions inherited from JoinHashTableInterface
static std::string getHashTypeString (HashType ht) noexcept
 
static DecodedJoinHashBufferSet toSet (size_t key_component_count, size_t key_component_width, size_t entry_count, const int8_t *ptr1, const int8_t *ptr2, const int8_t *ptr3, const int8_t *ptr4, size_t buffer_size)
 Decode hash table into a std::set for easy inspection and validation. More...
 
static std::string toString (const std::string &type, const std::string &layout_type, size_t key_component_count, size_t key_component_width, size_t entry_count, const int8_t *ptr1, const int8_t *ptr2, const int8_t *ptr3, const int8_t *ptr4, size_t buffer_size, bool raw=false)
 Decode hash table into a human-readable string. More...
 
static std::shared_ptr
< JoinHashTableInterface
getInstance (const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
 Make hash table from an in-flight SQL query's parse tree etc. More...
 
static std::shared_ptr
< JoinHashTableInterface
getSyntheticInstance (std::string_view table1, std::string_view column1, std::string_view table2, std::string_view column2, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
 Make hash table from named tables and columns (such as for testing). More...
 
static std::shared_ptr
< JoinHashTableInterface
getSyntheticInstance (const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
 Make hash table from named tables and columns (such as for testing). More...
 

Protected Member Functions

 BaselineJoinHashTable (const std::shared_ptr< Analyzer::BinOper > condition, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const size_t entry_count, ColumnCacheMap &column_cache, Executor *executor, const std::vector< InnerOuter > &inner_outer_pairs, const int device_count)
 
virtual void reifyWithLayout (const JoinHashTableInterface::HashType layout)
 
virtual ColumnsForDevice fetchColumnsForDevice (const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, DeviceAllocator *dev_buff_owner)
 
virtual std::pair< size_t, size_t > approximateTupleCount (const std::vector< ColumnsForDevice > &) const
 
virtual size_t getKeyComponentWidth () const
 
virtual size_t getKeyComponentCount () const
 
virtual int initHashTableOnCpu (const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_bucket_info, const JoinHashTableInterface::HashType layout)
 
virtual int initHashTableOnGpu (const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_bucket_info, const JoinHashTableInterface::HashType layout, const size_t key_component_width, const size_t key_component_count, const int device_id)
 
virtual llvm::Value * codegenKey (const CompilationOptions &)
 
size_t shardCount () const
 
Data_Namespace::MemoryLevel getEffectiveMemoryLevel (const std::vector< InnerOuter > &inner_outer_pairs) const
 
CompositeKeyInfo getCompositeKeyInfo () const
 
void reify ()
 
void reifyForDevice (const ColumnsForDevice &columns_for_device, const JoinHashTableInterface::HashType layout, const int device_id, const logger::ThreadId parent_thread_id)
 
void checkHashJoinReplicationConstraint (const int table_id) const
 
int initHashTableForDevice (const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_buckets, const JoinHashTableInterface::HashType layout, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id)
 
llvm::Value * hashPtr (const size_t index)
 
void initHashTableOnCpuFromCache (const HashTableCacheKey &)
 
void putHashTableOnCpuToCache (const HashTableCacheKey &)
 
std::pair< std::optional
< size_t >, size_t > 
getApproximateTupleCountFromCache (const HashTableCacheKey &) const
 
bool isBitwiseEq () const
 
void freeHashBufferMemory ()
 
void freeHashBufferGpuMemory ()
 
void freeHashBufferCpuMemory ()
 
bool layoutRequiresAdditionalBuffers (JoinHashTableInterface::HashType layout) const noexceptoverride
 
const HashTableCacheValuefindHashTableOnCpuInCache (const HashTableCacheKey &)
 

Static Protected Member Functions

static int getInnerTableId (const std::vector< InnerOuter > &inner_outer_pairs)
 

Protected Attributes

const std::shared_ptr
< Analyzer::BinOper
condition_
 
const std::vector
< InputTableInfo > & 
query_infos_
 
const Data_Namespace::MemoryLevel memory_level_
 
JoinHashTableInterface::HashType layout_
 
size_t entry_count_
 
size_t emitted_keys_count_
 
Executorexecutor_
 
ColumnCacheMapcolumn_cache_
 
std::shared_ptr< std::vector
< int8_t > > 
cpu_hash_table_buff_
 
std::mutex cpu_hash_table_buff_mutex_
 
std::vector< InnerOuterinner_outer_pairs_
 
const Catalog_Namespace::Catalogcatalog_
 
const int device_count_
 

Static Protected Attributes

static std::vector< std::pair
< HashTableCacheKey,
HashTableCacheValue > > 
hash_table_cache_
 
static std::mutex hash_table_cache_mutex_
 
static const int ERR_FAILED_TO_FETCH_COLUMN
 
static const int ERR_FAILED_TO_JOIN_ON_VIRTUAL_COLUMN {-4}
 

Private Member Functions

size_t getKeyBufferSize () const noexcept
 
size_t getComponentBufferSize () const noexcept
 

Additional Inherited Members

- Public Types inherited from JoinHashTableInterface
enum  HashType : int { HashType::OneToOne, HashType::OneToMany, HashType::ManyToMany }
 

Detailed Description

Definition at line 43 of file BaselineJoinHashTable.h.

Constructor & Destructor Documentation

BaselineJoinHashTable::~BaselineJoinHashTable ( )
virtual

Definition at line 1118 of file BaselineJoinHashTable.cpp.

References CHECK, and executor_.

1118  {
1119  // TODO: use freeHashBufferMemory?
1120 #ifdef HAVE_CUDA
1121  CHECK(executor_);
1122  CHECK(executor_->catalog_);
1123  auto& data_mgr = executor_->catalog_->getDataMgr();
1124  for (auto& gpu_buffer : gpu_hash_table_buff_) {
1125  if (gpu_buffer) {
1126  data_mgr.free(gpu_buffer);
1127  }
1128  }
1129 #endif
1130 }
#define CHECK(condition)
Definition: Logger.h:197
BaselineJoinHashTable::BaselineJoinHashTable ( const std::shared_ptr< Analyzer::BinOper condition,
const std::vector< InputTableInfo > &  query_infos,
const Data_Namespace::MemoryLevel  memory_level,
const HashType  preferred_hash_type,
const size_t  entry_count,
ColumnCacheMap column_cache,
Executor executor,
const std::vector< InnerOuter > &  inner_outer_pairs,
const int  device_count 
)
protected

Definition at line 111 of file BaselineJoinHashTable.cpp.

References CHECK_GT, and device_count_.

Referenced by getInstance().

121  : condition_(condition)
122  , query_infos_(query_infos)
123  , memory_level_(memory_level)
124  , layout_(preferred_hash_type)
125  , entry_count_(entry_count)
127  , executor_(executor)
128  , column_cache_(column_cache)
129  , inner_outer_pairs_(inner_outer_pairs)
130  , catalog_(executor->getCatalog())
131  , device_count_(device_count)
132 #ifdef HAVE_CUDA
133  , block_size_(memory_level == Data_Namespace::MemoryLevel::GPU_LEVEL
134  ? executor->blockSize()
135  : 0)
136  , grid_size_(memory_level == Data_Namespace::MemoryLevel::GPU_LEVEL
137  ? executor->gridSize()
138  : 0) {
140 }
141 #else
142 {
144 }
JoinHashTableInterface::HashType layout_
#define CHECK_GT(x, y)
Definition: Logger.h:209
const std::vector< InputTableInfo > & query_infos_
std::vector< InnerOuter > inner_outer_pairs_
ColumnCacheMap & column_cache_
const Catalog_Namespace::Catalog * catalog_
const Data_Namespace::MemoryLevel memory_level_
const std::shared_ptr< Analyzer::BinOper > condition_

+ Here is the caller graph for this function:

Member Function Documentation

std::pair< size_t, size_t > BaselineJoinHashTable::approximateTupleCount ( const std::vector< ColumnsForDevice > &  columns_per_device) const
protectedvirtual

Reimplemented in OverlapsJoinHashTable.

Definition at line 412 of file BaselineJoinHashTable.cpp.

References CudaAllocator::alloc(), approximate_distinct_tuples(), approximate_distinct_tuples_on_device(), Bitmap, catalog_, CHECK, CHECK_EQ, condition_, copy_from_gpu(), CPU, Data_Namespace::CPU_LEVEL, cpu_threads(), device_count_, getApproximateTupleCountFromCache(), getCompositeKeyInfo(), Catalog_Namespace::Catalog::getDataMgr(), getEffectiveMemoryLevel(), GPU, Data_Namespace::GPU_LEVEL, hll_size(), hll_unify(), inner_outer_pairs_, transfer_flat_object_to_gpu(), transfer_vector_of_flat_objects_to_gpu(), UNREACHABLE, and VLOG.

Referenced by reifyWithLayout().

413  {
414  const auto effective_memory_level = getEffectiveMemoryLevel(inner_outer_pairs_);
415  CountDistinctDescriptor count_distinct_desc{
417  0,
418  11,
419  true,
420  effective_memory_level == Data_Namespace::MemoryLevel::GPU_LEVEL
423  1};
424  const auto padded_size_bytes = count_distinct_desc.bitmapPaddedSizeBytes();
425 
426  CHECK(!columns_per_device.empty() && !columns_per_device.front().join_columns.empty());
427 
428  if (effective_memory_level == Data_Namespace::MemoryLevel::CPU_LEVEL) {
429  const auto composite_key_info = getCompositeKeyInfo();
430  HashTableCacheKey cache_key{columns_per_device.front().join_columns.front().num_elems,
431  composite_key_info.cache_key_chunks,
432  condition_->get_optype()};
433  const auto cached_count_info = getApproximateTupleCountFromCache(cache_key);
434  if (cached_count_info.first) {
435  VLOG(1) << "Using a cached tuple count: " << *cached_count_info.first
436  << ", emitted keys count: " << cached_count_info.second;
437  return std::make_pair(*cached_count_info.first, cached_count_info.second);
438  }
439  int thread_count = cpu_threads();
440  std::vector<uint8_t> hll_buffer_all_cpus(thread_count * padded_size_bytes);
441  auto hll_result = &hll_buffer_all_cpus[0];
442 
443  approximate_distinct_tuples(hll_result,
444  count_distinct_desc.bitmap_sz_bits,
445  padded_size_bytes,
446  columns_per_device.front().join_columns,
447  columns_per_device.front().join_column_types,
448  thread_count);
449  for (int i = 1; i < thread_count; ++i) {
450  hll_unify(hll_result,
451  hll_result + i * padded_size_bytes,
452  1 << count_distinct_desc.bitmap_sz_bits);
453  }
454  return std::make_pair(hll_size(hll_result, count_distinct_desc.bitmap_sz_bits), 0);
455  }
456 #ifdef HAVE_CUDA
457  auto& data_mgr = catalog_->getDataMgr();
458  std::vector<std::vector<uint8_t>> host_hll_buffers(device_count_);
459  for (auto& host_hll_buffer : host_hll_buffers) {
460  host_hll_buffer.resize(count_distinct_desc.bitmapPaddedSizeBytes());
461  }
462  std::vector<std::future<void>> approximate_distinct_device_threads;
463  for (int device_id = 0; device_id < device_count_; ++device_id) {
464  approximate_distinct_device_threads.emplace_back(std::async(
465  std::launch::async,
466  [device_id,
467  &columns_per_device,
468  &count_distinct_desc,
469  &data_mgr,
470  &host_hll_buffers,
471  this] {
472  CudaAllocator allocator(&data_mgr, device_id);
473  auto device_hll_buffer =
474  allocator.alloc(count_distinct_desc.bitmapPaddedSizeBytes());
475  data_mgr.getCudaMgr()->zeroDeviceMem(
476  device_hll_buffer, count_distinct_desc.bitmapPaddedSizeBytes(), device_id);
477  const auto& columns_for_device = columns_per_device[device_id];
478  auto join_columns_gpu = transfer_vector_of_flat_objects_to_gpu(
479  columns_for_device.join_columns, allocator);
480  auto join_column_types_gpu = transfer_vector_of_flat_objects_to_gpu(
481  columns_for_device.join_column_types, allocator);
482  const auto key_handler =
483  GenericKeyHandler(columns_for_device.join_columns.size(),
484  true,
485  join_columns_gpu,
486  join_column_types_gpu,
487  nullptr,
488  nullptr);
489  const auto key_handler_gpu =
490  transfer_flat_object_to_gpu(key_handler, allocator);
492  reinterpret_cast<uint8_t*>(device_hll_buffer),
493  count_distinct_desc.bitmap_sz_bits,
494  key_handler_gpu,
495  columns_for_device.join_columns[0].num_elems,
496  block_size_,
497  grid_size_);
498 
499  auto& host_hll_buffer = host_hll_buffers[device_id];
500  copy_from_gpu(&data_mgr,
501  &host_hll_buffer[0],
502  reinterpret_cast<CUdeviceptr>(device_hll_buffer),
503  count_distinct_desc.bitmapPaddedSizeBytes(),
504  device_id);
505  }));
506  }
507  for (auto& child : approximate_distinct_device_threads) {
508  child.get();
509  }
510  CHECK_EQ(Data_Namespace::MemoryLevel::GPU_LEVEL, effective_memory_level);
511  auto& result_hll_buffer = host_hll_buffers.front();
512  auto hll_result = reinterpret_cast<int32_t*>(&result_hll_buffer[0]);
513  for (int device_id = 1; device_id < device_count_; ++device_id) {
514  auto& host_hll_buffer = host_hll_buffers[device_id];
515  hll_unify(hll_result,
516  reinterpret_cast<int32_t*>(&host_hll_buffer[0]),
517  1 << count_distinct_desc.bitmap_sz_bits);
518  }
519  return std::make_pair(hll_size(hll_result, count_distinct_desc.bitmap_sz_bits), 0);
520 #else
521  UNREACHABLE();
522  return {0, 0};
523 #endif // HAVE_CUDA
524 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
Data_Namespace::MemoryLevel getEffectiveMemoryLevel(const std::vector< InnerOuter > &inner_outer_pairs) const
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:209
void hll_unify(T1 *lhs, T2 *rhs, const size_t m)
Definition: HyperLogLog.h:109
#define UNREACHABLE()
Definition: Logger.h:241
size_t hll_size(const T *M, const size_t bitmap_sz_bits)
Definition: HyperLogLog.h:90
std::pair< std::optional< size_t >, size_t > getApproximateTupleCountFromCache(const HashTableCacheKey &) const
std::vector< InnerOuter > inner_outer_pairs_
void approximate_distinct_tuples(uint8_t *hll_buffer_all_cpus, const uint32_t b, const size_t padded_size_bytes, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const int thread_count)
CompositeKeyInfo getCompositeKeyInfo() const
void copy_from_gpu(Data_Namespace::DataMgr *data_mgr, void *dst, const CUdeviceptr src, const size_t num_bytes, const int device_id)
T * transfer_flat_object_to_gpu(const T &object, CudaAllocator &allocator)
void approximate_distinct_tuples_on_device(uint8_t *hll_buffer, const uint32_t b, const GenericKeyHandler *key_handler, const int64_t num_elems, const size_t block_size_x, const size_t grid_size_x)
const Catalog_Namespace::Catalog * catalog_
#define CHECK(condition)
Definition: Logger.h:197
T * transfer_vector_of_flat_objects_to_gpu(const std::vector< T > &vec, CudaAllocator &allocator)
int cpu_threads()
Definition: thread_count.h:24
#define VLOG(n)
Definition: Logger.h:291
const std::shared_ptr< Analyzer::BinOper > condition_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void BaselineJoinHashTable::checkHashJoinReplicationConstraint ( const int  table_id) const
protected

Definition at line 1235 of file BaselineJoinHashTable.cpp.

References catalog_, CHECK, g_cluster, Catalog_Namespace::Catalog::getMetadataForTable(), shardCount(), and table_is_replicated().

1235  {
1236  if (!g_cluster) {
1237  return;
1238  }
1239  if (table_id >= 0) {
1240  const auto inner_td = catalog_->getMetadataForTable(table_id);
1241  CHECK(inner_td);
1242  const auto shard_count = shardCount();
1243  if (!shard_count && !table_is_replicated(inner_td)) {
1244  throw TableMustBeReplicated(inner_td->tableName);
1245  }
1246  }
1247 }
bool table_is_replicated(const TableDescriptor *td)
const Catalog_Namespace::Catalog * catalog_
#define CHECK(condition)
Definition: Logger.h:197
bool g_cluster
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.

+ Here is the call graph for this function:

llvm::Value * BaselineJoinHashTable::codegenKey ( const CompilationOptions co)
protectedvirtual

Reimplemented in OverlapsJoinHashTable.

Definition at line 1147 of file BaselineJoinHashTable.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CHECK_EQ, CodeGenerator::codegen(), executor_, get_int_type(), get_max_rte_scan_table(), getKeyComponentCount(), getKeyComponentWidth(), inner_outer_pairs_, LL_BUILDER, LL_CONTEXT, LL_INT, and self_join_not_covered_by_left_deep_tree().

Referenced by codegenMatchingSet(), and codegenSlot().

1147  {
1148  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1149  const auto key_component_width = getKeyComponentWidth();
1150  CHECK(key_component_width == 4 || key_component_width == 8);
1151  const auto key_size_lv = LL_INT(getKeyComponentCount() * key_component_width);
1152  llvm::Value* key_buff_lv{nullptr};
1153  switch (key_component_width) {
1154  case 4:
1155  key_buff_lv =
1156  LL_BUILDER.CreateAlloca(llvm::Type::getInt32Ty(LL_CONTEXT), key_size_lv);
1157  break;
1158  case 8:
1159  key_buff_lv =
1160  LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT), key_size_lv);
1161  break;
1162  default:
1163  CHECK(false);
1164  }
1165 
1166  CodeGenerator code_generator(executor_);
1167  for (size_t i = 0; i < getKeyComponentCount(); ++i) {
1168  const auto key_comp_dest_lv = LL_BUILDER.CreateGEP(key_buff_lv, LL_INT(i));
1169  const auto& inner_outer_pair = inner_outer_pairs_[i];
1170  const auto outer_col = inner_outer_pair.second;
1171  const auto key_col_var = dynamic_cast<const Analyzer::ColumnVar*>(outer_col);
1172  const auto val_col_var =
1173  dynamic_cast<const Analyzer::ColumnVar*>(inner_outer_pair.first);
1174  if (key_col_var && val_col_var &&
1176  key_col_var,
1177  val_col_var,
1178  get_max_rte_scan_table(executor_->cgen_state_->scan_idx_to_hash_pos_))) {
1179  throw std::runtime_error(
1180  "Query execution fails because the query contains not supported self-join "
1181  "pattern. We suspect the query requires multiple left-deep join tree due to "
1182  "the join condition of the self-join and is not supported for now. Please "
1183  "consider rewriting table order in "
1184  "FROM clause.");
1185  }
1186  const auto col_lvs = code_generator.codegen(outer_col, true, co);
1187  CHECK_EQ(size_t(1), col_lvs.size());
1188  const auto col_lv = LL_BUILDER.CreateSExt(
1189  col_lvs.front(), get_int_type(key_component_width * 8, LL_CONTEXT));
1190  LL_BUILDER.CreateStore(col_lv, key_comp_dest_lv);
1191  }
1192  return key_buff_lv;
1193 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
bool self_join_not_covered_by_left_deep_tree(const Analyzer::ColumnVar *key_side, const Analyzer::ColumnVar *val_side, const int max_rte_covered)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
const int get_max_rte_scan_table(std::unordered_map< int, llvm::Value * > &scan_idx_to_hash_pos)
std::vector< InnerOuter > inner_outer_pairs_
#define LL_INT(v)
#define LL_CONTEXT
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define LL_BUILDER
#define CHECK(condition)
Definition: Logger.h:197
virtual size_t getKeyComponentCount() const
virtual size_t getKeyComponentWidth() const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

HashJoinMatchingSet BaselineJoinHashTable::codegenMatchingSet ( const CompilationOptions co,
const size_t  index 
)
overridevirtual

Implements JoinHashTableInterface.

Reimplemented in OverlapsJoinHashTable.

Definition at line 1056 of file BaselineJoinHashTable.cpp.

References AUTOMATIC_IR_METADATA, CHECK, JoinHashTable::codegenHashTableLoad(), codegenKey(), JoinHashTable::codegenMatchingSet(), entry_count_, executor_, get_int_type(), getComponentBufferSize(), getKeyComponentCount(), getKeyComponentWidth(), layout_, LL_BUILDER, LL_CONTEXT, LL_INT, offsetBufferOff(), JoinHashTableInterface::OneToMany, and to_string().

Referenced by OverlapsJoinHashTable::codegenMatchingSet().

1058  {
1059  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1060  const auto key_component_width = getKeyComponentWidth();
1061  CHECK(key_component_width == 4 || key_component_width == 8);
1062  auto key_buff_lv = codegenKey(co);
1064  auto hash_ptr = JoinHashTable::codegenHashTableLoad(index, executor_);
1065  const auto composite_dict_ptr_type =
1066  llvm::Type::getIntNPtrTy(LL_CONTEXT, key_component_width * 8);
1067  const auto composite_key_dict =
1068  hash_ptr->getType()->isPointerTy()
1069  ? LL_BUILDER.CreatePointerCast(hash_ptr, composite_dict_ptr_type)
1070  : LL_BUILDER.CreateIntToPtr(hash_ptr, composite_dict_ptr_type);
1071  const auto key_component_count = getKeyComponentCount();
1072  const auto key = executor_->cgen_state_->emitExternalCall(
1073  "get_composite_key_index_" + std::to_string(key_component_width * 8),
1074  get_int_type(64, LL_CONTEXT),
1075  {key_buff_lv,
1076  LL_INT(key_component_count),
1077  composite_key_dict,
1078  LL_INT(entry_count_)});
1079  auto one_to_many_ptr = hash_ptr;
1080  if (one_to_many_ptr->getType()->isPointerTy()) {
1081  one_to_many_ptr =
1082  LL_BUILDER.CreatePtrToInt(hash_ptr, llvm::Type::getInt64Ty(LL_CONTEXT));
1083  } else {
1084  CHECK(one_to_many_ptr->getType()->isIntegerTy(64));
1085  }
1086  const auto composite_key_dict_size = offsetBufferOff();
1087  one_to_many_ptr =
1088  LL_BUILDER.CreateAdd(one_to_many_ptr, LL_INT(composite_key_dict_size));
1090  {one_to_many_ptr, key, LL_INT(int64_t(0)), LL_INT(entry_count_ - 1)},
1091  false,
1092  false,
1093  false,
1095  executor_);
1096 }
size_t offsetBufferOff() const noexceptoverride
size_t getComponentBufferSize() const noexcept
JoinHashTableInterface::HashType layout_
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
std::string to_string(char const *&&v)
virtual llvm::Value * codegenKey(const CompilationOptions &)
#define LL_INT(v)
#define LL_CONTEXT
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define LL_BUILDER
static llvm::Value * codegenHashTableLoad(const size_t table_idx, Executor *executor)
HashJoinMatchingSet codegenMatchingSet(const CompilationOptions &, const size_t) override
#define CHECK(condition)
Definition: Logger.h:197
virtual size_t getKeyComponentCount() const
virtual size_t getKeyComponentWidth() const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * BaselineJoinHashTable::codegenSlot ( const CompilationOptions co,
const size_t  index 
)
overridevirtual

Implements JoinHashTableInterface.

Definition at line 1039 of file BaselineJoinHashTable.cpp.

References AUTOMATIC_IR_METADATA, CHECK, codegenKey(), entry_count_, executor_, get_int_type(), getHashType(), getKeyComponentCount(), getKeyComponentWidth(), hashPtr(), LL_BUILDER, LL_CONTEXT, LL_INT, JoinHashTableInterface::OneToOne, and to_string().

1040  {
1041  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1043  const auto key_component_width = getKeyComponentWidth();
1044  CHECK(key_component_width == 4 || key_component_width == 8);
1045  auto key_buff_lv = codegenKey(co);
1046  const auto hash_ptr = hashPtr(index);
1047  const auto key_ptr_lv =
1048  LL_BUILDER.CreatePointerCast(key_buff_lv, llvm::Type::getInt8PtrTy(LL_CONTEXT));
1049  const auto key_size_lv = LL_INT(getKeyComponentCount() * key_component_width);
1050  return executor_->cgen_state_->emitExternalCall(
1051  "baseline_hash_join_idx_" + std::to_string(key_component_width * 8),
1052  get_int_type(64, LL_CONTEXT),
1053  {hash_ptr, key_ptr_lv, key_size_lv, LL_INT(entry_count_)});
1054 }
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
std::string to_string(char const *&&v)
virtual llvm::Value * codegenKey(const CompilationOptions &)
JoinHashTableInterface::HashType getHashType() const noexceptoverride
#define LL_INT(v)
#define LL_CONTEXT
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define LL_BUILDER
llvm::Value * hashPtr(const size_t index)
#define CHECK(condition)
Definition: Logger.h:197
virtual size_t getKeyComponentCount() const
virtual size_t getKeyComponentWidth() const

+ Here is the call graph for this function:

size_t BaselineJoinHashTable::countBufferOff ( ) const
overridevirtualnoexcept

Implements JoinHashTableInterface.

Definition at line 1102 of file BaselineJoinHashTable.cpp.

References getComponentBufferSize(), getKeyBufferSize(), layout_, layoutRequiresAdditionalBuffers(), and offsetBufferOff().

Referenced by payloadBufferOff(), toSet(), and toString().

1102  {
1105  } else {
1106  return getKeyBufferSize();
1107  }
1108 }
size_t offsetBufferOff() const noexceptoverride
bool layoutRequiresAdditionalBuffers(JoinHashTableInterface::HashType layout) const noexceptoverride
size_t getComponentBufferSize() const noexcept
size_t getKeyBufferSize() const noexcept
JoinHashTableInterface::HashType layout_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

BaselineJoinHashTable::ColumnsForDevice BaselineJoinHashTable::fetchColumnsForDevice ( const std::vector< Fragmenter_Namespace::FragmentInfo > &  fragments,
const int  device_id,
DeviceAllocator dev_buff_owner 
)
protectedvirtual

Reimplemented in OverlapsJoinHashTable.

Definition at line 526 of file BaselineJoinHashTable.cpp.

References catalog_, column_cache_, executor_, JoinHashTableInterface::fetchJoinColumn(), get_column_descriptor_maybe(), get_join_column_type_kind(), getEffectiveMemoryLevel(), inline_fixed_encoding_null_val(), inner_outer_pairs_, and isBitwiseEq().

Referenced by reifyWithLayout().

529  {
530  const auto effective_memory_level = getEffectiveMemoryLevel(inner_outer_pairs_);
531 
532  std::vector<JoinColumn> join_columns;
533  std::vector<std::shared_ptr<Chunk_NS::Chunk>> chunks_owner;
534  std::vector<JoinColumnTypeInfo> join_column_types;
535  std::vector<JoinBucketInfo> join_bucket_info;
536  std::vector<std::shared_ptr<void>> malloc_owner;
537  for (const auto& inner_outer_pair : inner_outer_pairs_) {
538  const auto inner_col = inner_outer_pair.first;
539  const auto inner_cd = get_column_descriptor_maybe(
540  inner_col->get_column_id(), inner_col->get_table_id(), *catalog_);
541  if (inner_cd && inner_cd->isVirtualCol) {
543  }
544  join_columns.emplace_back(fetchJoinColumn(inner_col,
545  fragments,
546  effective_memory_level,
547  device_id,
548  chunks_owner,
549  dev_buff_owner,
550  malloc_owner,
551  executor_,
552  &column_cache_));
553  const auto& ti = inner_col->get_type_info();
554  join_column_types.emplace_back(JoinColumnTypeInfo{static_cast<size_t>(ti.get_size()),
555  0,
556  0,
558  isBitwiseEq(),
559  0,
561  }
562  return {join_columns, join_column_types, chunks_owner, join_bucket_info, malloc_owner};
563 }
Data_Namespace::MemoryLevel getEffectiveMemoryLevel(const std::vector< InnerOuter > &inner_outer_pairs) const
std::vector< InnerOuter > inner_outer_pairs_
const ColumnDescriptor * get_column_descriptor_maybe(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
Definition: Execute.h:183
ColumnCacheMap & column_cache_
JoinColumn fetchJoinColumn(const Analyzer::ColumnVar *hash_col, const std::vector< Fragmenter_Namespace::FragmentInfo > &fragment_info, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks_owner, DeviceAllocator *dev_buff_owner, std::vector< std::shared_ptr< void >> &malloc_owner, Executor *executor, ColumnCacheMap *column_cache)
const Catalog_Namespace::Catalog * catalog_
ColumnType get_join_column_type_kind(const SQLTypeInfo &ti)
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const BaselineJoinHashTable::HashTableCacheValue * BaselineJoinHashTable::findHashTableOnCpuInCache ( const HashTableCacheKey key)
protected

Definition at line 1250 of file BaselineJoinHashTable.cpp.

References hash_table_cache_, and hash_table_cache_mutex_.

1250  {
1251  std::lock_guard<std::mutex> hash_table_cache_lock(hash_table_cache_mutex_);
1252  for (const auto& kv : hash_table_cache_) {
1253  if (kv.first == key) {
1254  return &kv.second;
1255  }
1256  }
1257  return nullptr;
1258 }
static std::mutex hash_table_cache_mutex_
static std::vector< std::pair< HashTableCacheKey, HashTableCacheValue > > hash_table_cache_
void BaselineJoinHashTable::freeHashBufferCpuMemory ( )
protected

Definition at line 1348 of file BaselineJoinHashTable.cpp.

References cpu_hash_table_buff_.

Referenced by freeHashBufferMemory().

1348  {
1349  cpu_hash_table_buff_.reset();
1350 }
std::shared_ptr< std::vector< int8_t > > cpu_hash_table_buff_

+ Here is the caller graph for this function:

void BaselineJoinHashTable::freeHashBufferGpuMemory ( )
protected

Definition at line 1334 of file BaselineJoinHashTable.cpp.

References catalog_, CHECK, CudaAllocator::freeGpuAbstractBuffer(), and Catalog_Namespace::Catalog::getDataMgr().

Referenced by freeHashBufferMemory().

1334  {
1335 #ifdef HAVE_CUDA
1336  auto& data_mgr = catalog_->getDataMgr();
1337  for (auto& buf : gpu_hash_table_buff_) {
1338  if (buf) {
1339  CudaAllocator::freeGpuAbstractBuffer(&data_mgr, buf);
1340  buf = nullptr;
1341  }
1342  }
1343 #else
1344  CHECK(false);
1345 #endif // HAVE_CUDA
1346 }
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:209
static void freeGpuAbstractBuffer(Data_Namespace::DataMgr *data_mgr, Data_Namespace::AbstractBuffer *ab)
const Catalog_Namespace::Catalog * catalog_
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void BaselineJoinHashTable::freeHashBufferMemory ( )
protected

Definition at line 1327 of file BaselineJoinHashTable.cpp.

References freeHashBufferCpuMemory(), and freeHashBufferGpuMemory().

Referenced by reify().

1327  {
1328 #ifdef HAVE_CUDA
1330 #endif
1332 }

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::pair< std::optional< size_t >, size_t > BaselineJoinHashTable::getApproximateTupleCountFromCache ( const HashTableCacheKey key) const
protected

Definition at line 1304 of file BaselineJoinHashTable.cpp.

References CHECK_GE, BaselineJoinHashTable::HashTableCacheKey::chunk_keys, hash_table_cache_, and hash_table_cache_mutex_.

Referenced by OverlapsJoinHashTable::approximateTupleCount(), and approximateTupleCount().

1305  {
1306  for (auto chunk_key : key.chunk_keys) {
1307  CHECK_GE(chunk_key.size(), size_t(2));
1308  if (chunk_key[1] < 0) {
1309  return std::make_pair(std::nullopt, 0);
1310  ;
1311  }
1312  }
1313 
1314  std::lock_guard<std::mutex> hash_table_cache_lock(hash_table_cache_mutex_);
1315  for (const auto& kv : hash_table_cache_) {
1316  if (kv.first == key) {
1317  return std::make_pair(kv.second.entry_count / 2, kv.second.emitted_keys_count);
1318  }
1319  }
1320  return std::make_pair(std::nullopt, 0);
1321 }
#define CHECK_GE(x, y)
Definition: Logger.h:210
static std::mutex hash_table_cache_mutex_
static std::vector< std::pair< HashTableCacheKey, HashTableCacheValue > > hash_table_cache_

+ Here is the caller graph for this function:

static const std::shared_ptr<std::vector<int8_t> >& BaselineJoinHashTable::getCachedHashTable ( size_t  idx)
inlinestatic

Definition at line 104 of file BaselineJoinHashTable.h.

References CHECK, CHECK_LT, hash_table_cache_, and hash_table_cache_mutex_.

Referenced by QueryRunner::QueryRunner::getCachedBaselineHashTable().

104  {
105  std::lock_guard<std::mutex> guard(hash_table_cache_mutex_);
106  CHECK(!hash_table_cache_.empty());
107  CHECK_LT(idx, hash_table_cache_.size());
108  return hash_table_cache_.at(idx).second.buffer;
109  }
#define CHECK_LT(x, y)
Definition: Logger.h:207
static std::mutex hash_table_cache_mutex_
#define CHECK(condition)
Definition: Logger.h:197
static std::vector< std::pair< HashTableCacheKey, HashTableCacheValue > > hash_table_cache_

+ Here is the caller graph for this function:

size_t BaselineJoinHashTable::getComponentBufferSize ( ) const
privatenoexcept

Definition at line 1143 of file BaselineJoinHashTable.cpp.

References entry_count_.

Referenced by codegenMatchingSet(), countBufferOff(), and payloadBufferOff().

1143  {
1144  return entry_count_ * sizeof(int32_t);
1145 }

+ Here is the caller graph for this function:

BaselineJoinHashTable::CompositeKeyInfo BaselineJoinHashTable::getCompositeKeyInfo ( ) const
protected

Definition at line 276 of file BaselineJoinHashTable.cpp.

References catalog_, CHECK, Catalog_Namespace::DBMetadata::dbId, executor_, Catalog_Namespace::Catalog::getCurrentDB(), inner_outer_pairs_, and kENCODING_DICT.

Referenced by OverlapsJoinHashTable::approximateTupleCount(), approximateTupleCount(), OverlapsJoinHashTable::initHashTableOnCpu(), initHashTableOnCpu(), reify(), and OverlapsJoinHashTable::reifyWithLayout().

277  {
278  std::vector<const void*> sd_inner_proxy_per_key;
279  std::vector<const void*> sd_outer_proxy_per_key;
280  std::vector<ChunkKey> cache_key_chunks; // used for the cache key
281  for (const auto& inner_outer_pair : inner_outer_pairs_) {
282  const auto inner_col = inner_outer_pair.first;
283  const auto outer_col = inner_outer_pair.second;
284  const auto& inner_ti = inner_col->get_type_info();
285  const auto& outer_ti = outer_col->get_type_info();
286  ChunkKey cache_key_chunks_for_column{catalog_->getCurrentDB().dbId,
287  inner_col->get_table_id(),
288  inner_col->get_column_id()};
289  if (inner_ti.is_string() &&
290  !(inner_ti.get_comp_param() == outer_ti.get_comp_param())) {
291  CHECK(outer_ti.is_string());
292  CHECK(inner_ti.get_compression() == kENCODING_DICT &&
293  outer_ti.get_compression() == kENCODING_DICT);
294  const auto sd_inner_proxy = executor_->getStringDictionaryProxy(
295  inner_ti.get_comp_param(), executor_->getRowSetMemoryOwner(), true);
296  const auto sd_outer_proxy = executor_->getStringDictionaryProxy(
297  outer_ti.get_comp_param(), executor_->getRowSetMemoryOwner(), true);
298  CHECK(sd_inner_proxy && sd_outer_proxy);
299  sd_inner_proxy_per_key.push_back(sd_inner_proxy);
300  sd_outer_proxy_per_key.push_back(sd_outer_proxy);
301  cache_key_chunks_for_column.push_back(sd_outer_proxy->getGeneration());
302  } else {
303  sd_inner_proxy_per_key.emplace_back();
304  sd_outer_proxy_per_key.emplace_back();
305  }
306  cache_key_chunks.push_back(cache_key_chunks_for_column);
307  }
308  return {sd_inner_proxy_per_key, sd_outer_proxy_per_key, cache_key_chunks};
309 }
std::vector< int > ChunkKey
Definition: types.h:37
std::vector< InnerOuter > inner_outer_pairs_
const DBMetadata & getCurrentDB() const
Definition: Catalog.h:208
const Catalog_Namespace::Catalog * catalog_
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int BaselineJoinHashTable::getDeviceCount ( ) const
inlineoverridevirtualnoexcept

Implements JoinHashTableInterface.

Definition at line 88 of file BaselineJoinHashTable.h.

References device_count_.

88 { return device_count_; };
Data_Namespace::MemoryLevel BaselineJoinHashTable::getEffectiveMemoryLevel ( const std::vector< InnerOuter > &  inner_outer_pairs) const
protected

Definition at line 615 of file BaselineJoinHashTable.cpp.

References Data_Namespace::CPU_LEVEL, executor_, memory_level_, and needs_dictionary_translation().

Referenced by OverlapsJoinHashTable::approximateTupleCount(), approximateTupleCount(), OverlapsJoinHashTable::computeBucketSizes(), OverlapsJoinHashTable::fetchColumnsForDevice(), fetchColumnsForDevice(), and reifyForDevice().

616  {
617  for (const auto& inner_outer_pair : inner_outer_pairs) {
619  inner_outer_pair.first, inner_outer_pair.second, executor_)) {
621  }
622  }
623  return memory_level_;
624 }
bool needs_dictionary_translation(const Analyzer::ColumnVar *inner_col, const Analyzer::Expr *outer_col_expr, const Executor *executor)
const Data_Namespace::MemoryLevel memory_level_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

static size_t BaselineJoinHashTable::getEntryCntCachedHashTable ( size_t  idx)
inlinestatic

Definition at line 111 of file BaselineJoinHashTable.h.

References CHECK, CHECK_LT, hash_table_cache_, and hash_table_cache_mutex_.

Referenced by QueryRunner::QueryRunner::getEntryCntCachedBaselineHashTable().

111  {
112  std::lock_guard<std::mutex> guard(hash_table_cache_mutex_);
113  CHECK(!hash_table_cache_.empty());
114  CHECK_LT(idx, hash_table_cache_.size());
115  return hash_table_cache_.at(idx).second.entry_count;
116  }
#define CHECK_LT(x, y)
Definition: Logger.h:207
static std::mutex hash_table_cache_mutex_
#define CHECK(condition)
Definition: Logger.h:197
static std::vector< std::pair< HashTableCacheKey, HashTableCacheValue > > hash_table_cache_

+ Here is the caller graph for this function:

JoinHashTableInterface::HashType BaselineJoinHashTable::getHashType ( ) const
overridevirtualnoexcept

Implements JoinHashTableInterface.

Definition at line 1224 of file BaselineJoinHashTable.cpp.

References layout_.

Referenced by OverlapsJoinHashTable::codegenMatchingSet(), and codegenSlot().

1224  {
1225  return layout_;
1226 }
JoinHashTableInterface::HashType layout_

+ Here is the caller graph for this function:

int BaselineJoinHashTable::getInnerTableId ( ) const
overridevirtualnoexcept

Implements JoinHashTableInterface.

Definition at line 1209 of file BaselineJoinHashTable.cpp.

References CHECK, and inner_outer_pairs_.

Referenced by getInstance(), OverlapsJoinHashTable::getInstance(), OverlapsJoinHashTable::initHashTableOnCpu(), initHashTableOnCpu(), OverlapsJoinHashTable::reifyWithLayout(), and reifyWithLayout().

1209  {
1210  try {
1212  } catch (...) {
1213  CHECK(false);
1214  }
1215  return 0;
1216 }
std::vector< InnerOuter > inner_outer_pairs_
int getInnerTableId() const noexceptoverride
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the caller graph for this function:

int BaselineJoinHashTable::getInnerTableId ( const std::vector< InnerOuter > &  inner_outer_pairs)
staticprotected

Definition at line 1228 of file BaselineJoinHashTable.cpp.

References CHECK.

1229  {
1230  CHECK(!inner_outer_pairs.empty());
1231  const auto first_inner_col = inner_outer_pairs.front().first;
1232  return first_inner_col->get_table_id();
1233 }
#define CHECK(condition)
Definition: Logger.h:197
int BaselineJoinHashTable::getInnerTableRteIdx ( ) const
overridevirtualnoexcept

Implements JoinHashTableInterface.

Definition at line 1218 of file BaselineJoinHashTable.cpp.

References CHECK, and inner_outer_pairs_.

1218  {
1219  CHECK(!inner_outer_pairs_.empty());
1220  const auto first_inner_col = inner_outer_pairs_.front().first;
1221  return first_inner_col->get_rte_idx();
1222 }
std::vector< InnerOuter > inner_outer_pairs_
#define CHECK(condition)
Definition: Logger.h:197
std::shared_ptr< BaselineJoinHashTable > BaselineJoinHashTable::getInstance ( const std::shared_ptr< Analyzer::BinOper condition,
const std::vector< InputTableInfo > &  query_infos,
const Data_Namespace::MemoryLevel  memory_level,
const HashType  preferred_hash_type,
const int  device_count,
ColumnCacheMap column_cache,
Executor executor 
)
static

Make hash table from an in-flight SQL query's parse tree etc.

Definition at line 35 of file BaselineJoinHashTable.cpp.

References BaselineJoinHashTable(), get_entries_per_device(), get_inner_query_info(), JoinHashTableInterface::getHashTypeString(), getInnerTableId(), Fragmenter_Namespace::TableInfo::getNumTuplesUpperBound(), getShardCountForCondition(), Data_Namespace::GPU_LEVEL, InputTableInfo::info, normalize_column_pairs(), VLOG, and VLOGGING.

Referenced by JoinHashTableInterface::getInstance().

42  {
43  decltype(std::chrono::steady_clock::now()) ts1, ts2;
44 
45  if (VLOGGING(1)) {
46  VLOG(1) << "Building keyed hash table " << getHashTypeString(preferred_hash_type)
47  << " for qual: " << condition->toString();
48  ts1 = std::chrono::steady_clock::now();
49  }
50  auto inner_outer_pairs = normalize_column_pairs(
51  condition.get(), *executor->getCatalog(), executor->getTemporaryTables());
52 
53  const auto& query_info =
54  get_inner_query_info(getInnerTableId(inner_outer_pairs), query_infos).info;
55  const auto total_entries = 2 * query_info.getNumTuplesUpperBound();
56  if (total_entries > static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
57  throw TooManyHashEntries();
58  }
59  const auto shard_count = memory_level == Data_Namespace::GPU_LEVEL
61  condition.get(), executor, inner_outer_pairs)
62  : 0;
63  const auto entries_per_device =
64  get_entries_per_device(total_entries, shard_count, device_count, memory_level);
65  auto join_hash_table = std::shared_ptr<BaselineJoinHashTable>(
66  new BaselineJoinHashTable(condition,
67  query_infos,
68  memory_level,
69  preferred_hash_type,
70  entries_per_device,
71  column_cache,
72  executor,
73  inner_outer_pairs,
74  device_count));
75  join_hash_table->checkHashJoinReplicationConstraint(getInnerTableId(inner_outer_pairs));
76  try {
77  join_hash_table->reify();
78  } catch (const TableMustBeReplicated& e) {
79  // Throw a runtime error to abort the query
80  join_hash_table->freeHashBufferMemory();
81  throw std::runtime_error(e.what());
82  } catch (const HashJoinFail& e) {
83  // HashJoinFail exceptions log an error and trigger a retry with a join loop (if
84  // possible)
85  join_hash_table->freeHashBufferMemory();
86  throw HashJoinFail(std::string("Could not build a 1-to-1 correspondence for columns "
87  "involved in equijoin | ") +
88  e.what());
89  } catch (const ColumnarConversionNotSupported& e) {
90  throw HashJoinFail(std::string("Could not build hash tables for equijoin | ") +
91  e.what());
92  } catch (const OutOfMemory& e) {
93  throw HashJoinFail(
94  std::string("Ran out of memory while building hash tables for equijoin | ") +
95  e.what());
96  } catch (const std::exception& e) {
97  throw std::runtime_error(
98  std::string("Fatal error while attempting to build hash tables for join: ") +
99  e.what());
100  }
101  if (VLOGGING(1)) {
102  ts2 = std::chrono::steady_clock::now();
103  VLOG(1) << "Built keyed hash table "
104  << getHashTypeString(join_hash_table->getHashType()) << " in "
105  << std::chrono::duration_cast<std::chrono::milliseconds>(ts2 - ts1).count()
106  << " ms";
107  }
108  return join_hash_table;
109 }
Fragmenter_Namespace::TableInfo info
Definition: InputMetadata.h:35
std::vector< InnerOuter > normalize_column_pairs(const Analyzer::BinOper *condition, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables)
BaselineJoinHashTable(const std::shared_ptr< Analyzer::BinOper > condition, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const size_t entry_count, ColumnCacheMap &column_cache, Executor *executor, const std::vector< InnerOuter > &inner_outer_pairs, const int device_count)
size_t get_entries_per_device(const size_t total_entries, const size_t shard_count, const size_t device_count, const Data_Namespace::MemoryLevel memory_level)
#define VLOGGING(n)
Definition: Logger.h:195
int getInnerTableId() const noexceptoverride
static std::string getHashTypeString(HashType ht) noexcept
static size_t getShardCountForCondition(const Analyzer::BinOper *condition, const Executor *executor, const std::vector< InnerOuter > &inner_outer_pairs)
const InputTableInfo & get_inner_query_info(const int inner_table_id, const std::vector< InputTableInfo > &query_infos)
#define VLOG(n)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int64_t BaselineJoinHashTable::getJoinHashBuffer ( const ExecutorDeviceType  device_type,
const int  device_id 
) const
overridevirtualnoexcept

Implements JoinHashTableInterface.

Definition at line 160 of file BaselineJoinHashTable.cpp.

References CHECK, CHECK_LT, and CPU.

Referenced by toSet(), and toString().

161  {
162  if (device_type == ExecutorDeviceType::CPU && !cpu_hash_table_buff_) {
163  return 0;
164  }
165 #ifdef HAVE_CUDA
166  CHECK_LT(static_cast<size_t>(device_id), gpu_hash_table_buff_.size());
167  if (device_type == ExecutorDeviceType::CPU) {
168  return reinterpret_cast<int64_t>(&(*cpu_hash_table_buff_)[0]);
169  } else {
170  return gpu_hash_table_buff_[device_id]
171  ? reinterpret_cast<CUdeviceptr>(
172  gpu_hash_table_buff_[device_id]->getMemoryPtr())
173  : reinterpret_cast<CUdeviceptr>(nullptr);
174  }
175 #else
176  CHECK(device_type == ExecutorDeviceType::CPU);
177  return reinterpret_cast<int64_t>(&(*cpu_hash_table_buff_)[0]);
178 #endif
179 }
std::shared_ptr< std::vector< int8_t > > cpu_hash_table_buff_
unsigned long long CUdeviceptr
Definition: nocuda.h:27
#define CHECK_LT(x, y)
Definition: Logger.h:207
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the caller graph for this function:

size_t BaselineJoinHashTable::getJoinHashBufferSize ( const ExecutorDeviceType  device_type,
const int  device_id 
) const
overridevirtualnoexcept

Implements JoinHashTableInterface.

Definition at line 181 of file BaselineJoinHashTable.cpp.

References CHECK, CHECK_LT, and CPU.

Referenced by toSet(), and toString().

182  {
183  if (device_type == ExecutorDeviceType::CPU && !cpu_hash_table_buff_) {
184  return 0;
185  }
186 #ifdef HAVE_CUDA
187  CHECK_LT(static_cast<size_t>(device_id), gpu_hash_table_buff_.size());
188  if (device_type == ExecutorDeviceType::CPU) {
189  return cpu_hash_table_buff_->size() *
190  sizeof(decltype(cpu_hash_table_buff_)::element_type::value_type);
191  } else {
192  return gpu_hash_table_buff_[device_id]
193  ? gpu_hash_table_buff_[device_id]->reservedSize()
194  : 0;
195  }
196 #else
197  CHECK(device_type == ExecutorDeviceType::CPU);
198  return cpu_hash_table_buff_->size() *
199  sizeof(decltype(cpu_hash_table_buff_)::element_type::value_type);
200 #endif
201 }
std::shared_ptr< std::vector< int8_t > > cpu_hash_table_buff_
#define CHECK_LT(x, y)
Definition: Logger.h:207
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the caller graph for this function:

size_t BaselineJoinHashTable::getKeyBufferSize ( ) const
privatenoexcept

Definition at line 1132 of file BaselineJoinHashTable.cpp.

References CHECK, entry_count_, getKeyComponentCount(), getKeyComponentWidth(), layout_, and layoutRequiresAdditionalBuffers().

Referenced by countBufferOff(), offsetBufferOff(), and payloadBufferOff().

1132  {
1133  const auto key_component_width = getKeyComponentWidth();
1134  CHECK(key_component_width == 4 || key_component_width == 8);
1135  const auto key_component_count = getKeyComponentCount();
1137  return entry_count_ * key_component_count * key_component_width;
1138  } else {
1139  return entry_count_ * (key_component_count + 1) * key_component_width;
1140  }
1141 }
bool layoutRequiresAdditionalBuffers(JoinHashTableInterface::HashType layout) const noexceptoverride
JoinHashTableInterface::HashType layout_
#define CHECK(condition)
Definition: Logger.h:197
virtual size_t getKeyComponentCount() const
virtual size_t getKeyComponentWidth() const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t BaselineJoinHashTable::getKeyComponentCount ( ) const
protectedvirtual

Reimplemented in OverlapsJoinHashTable.

Definition at line 611 of file BaselineJoinHashTable.cpp.

References inner_outer_pairs_.

Referenced by codegenKey(), codegenMatchingSet(), codegenSlot(), getKeyBufferSize(), initHashTableForDevice(), initHashTableOnCpu(), toSet(), and toString().

611  {
612  return inner_outer_pairs_.size();
613 }
std::vector< InnerOuter > inner_outer_pairs_

+ Here is the caller graph for this function:

size_t BaselineJoinHashTable::getKeyComponentWidth ( ) const
protectedvirtual

Reimplemented in OverlapsJoinHashTable.

Definition at line 599 of file BaselineJoinHashTable.cpp.

References CHECK_EQ, and inner_outer_pairs_.

Referenced by codegenKey(), codegenMatchingSet(), codegenSlot(), getKeyBufferSize(), initHashTableForDevice(), initHashTableOnCpu(), toSet(), and toString().

599  {
600  for (const auto& inner_outer_pair : inner_outer_pairs_) {
601  const auto inner_col = inner_outer_pair.first;
602  const auto& inner_col_ti = inner_col->get_type_info();
603  if (inner_col_ti.get_logical_size() > 4) {
604  CHECK_EQ(8, inner_col_ti.get_logical_size());
605  return 8;
606  }
607  }
608  return 4;
609 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
std::vector< InnerOuter > inner_outer_pairs_

+ Here is the caller graph for this function:

Data_Namespace::MemoryLevel BaselineJoinHashTable::getMemoryLevel ( ) const
inlineoverridevirtualnoexcept

Implements JoinHashTableInterface.

Definition at line 84 of file BaselineJoinHashTable.h.

References memory_level_.

84  {
85  return memory_level_;
86  };
const Data_Namespace::MemoryLevel memory_level_
static uint64_t BaselineJoinHashTable::getNumberOfCachedHashTables ( )
inlinestatic

Definition at line 118 of file BaselineJoinHashTable.h.

References hash_table_cache_, and hash_table_cache_mutex_.

Referenced by QueryRunner::QueryRunner::getNumberOfCachedBaselineJoinHashTables().

118  {
119  std::lock_guard<std::mutex> guard(hash_table_cache_mutex_);
120  return hash_table_cache_.size();
121  }
static std::mutex hash_table_cache_mutex_
static std::vector< std::pair< HashTableCacheKey, HashTableCacheValue > > hash_table_cache_

+ Here is the caller graph for this function:

size_t BaselineJoinHashTable::getShardCountForCondition ( const Analyzer::BinOper condition,
const Executor executor,
const std::vector< InnerOuter > &  inner_outer_pairs 
)
static

Definition at line 147 of file BaselineJoinHashTable.cpp.

References get_shard_count().

Referenced by getInstance(), OverlapsJoinHashTable::getInstance(), shardCount(), and Executor::skipFragmentPair().

150  {
151  for (const auto& inner_outer_pair : inner_outer_pairs) {
152  const auto pair_shard_count = get_shard_count(inner_outer_pair, executor);
153  if (pair_shard_count) {
154  return pair_shard_count;
155  }
156  }
157  return 0;
158 }
size_t get_shard_count(const Analyzer::BinOper *join_condition, const Executor *executor)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * BaselineJoinHashTable::hashPtr ( const size_t  index)
protected

Definition at line 1195 of file BaselineJoinHashTable.cpp.

References AUTOMATIC_IR_METADATA, JoinHashTable::codegenHashTableLoad(), executor_, LL_BUILDER, and LL_CONTEXT.

Referenced by codegenSlot().

1195  {
1196  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1197  auto hash_ptr = JoinHashTable::codegenHashTableLoad(index, executor_);
1198  const auto pi8_type = llvm::Type::getInt8PtrTy(LL_CONTEXT);
1199  return hash_ptr->getType()->isPointerTy()
1200  ? LL_BUILDER.CreatePointerCast(hash_ptr, pi8_type)
1201  : LL_BUILDER.CreateIntToPtr(hash_ptr, pi8_type);
1202 }
#define LL_CONTEXT
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define LL_BUILDER
static llvm::Value * codegenHashTableLoad(const size_t table_idx, Executor *executor)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int BaselineJoinHashTable::initHashTableForDevice ( const std::vector< JoinColumn > &  join_columns,
const std::vector< JoinColumnTypeInfo > &  join_column_types,
const std::vector< JoinBucketInfo > &  join_buckets,
const JoinHashTableInterface::HashType  layout,
const Data_Namespace::MemoryLevel  effective_memory_level,
const int  device_id 
)
protected

Definition at line 961 of file BaselineJoinHashTable.cpp.

References CudaAllocator::allocGpuAbstractBuffer(), catalog_, CHECK, CHECK_EQ, copy_to_gpu(), cpu_hash_table_buff_, cpu_hash_table_buff_mutex_, Data_Namespace::CPU_LEVEL, DEBUG_TIMER, emitted_keys_count_, entry_count_, Catalog_Namespace::Catalog::getDataMgr(), getKeyComponentCount(), getKeyComponentWidth(), Data_Namespace::GPU_LEVEL, initHashTableOnCpu(), initHashTableOnGpu(), layoutRequiresAdditionalBuffers(), memory_level_, JoinHashTableInterface::OneToOne, and VLOG.

Referenced by reifyForDevice().

967  {
968  auto timer = DEBUG_TIMER(__func__);
969  const auto key_component_width = getKeyComponentWidth();
970  const auto key_component_count = getKeyComponentCount();
971  int err = 0;
972 #ifdef HAVE_CUDA
973  auto& data_mgr = catalog_->getDataMgr();
975  const auto entry_size =
976  (key_component_count +
977  (layout == JoinHashTableInterface::HashType::OneToOne ? 1 : 0)) *
978  key_component_width;
979  const auto keys_for_all_rows = emitted_keys_count_;
980  const size_t one_to_many_hash_entries = layoutRequiresAdditionalBuffers(layout)
981  ? 2 * entry_count_ + keys_for_all_rows
982  : 0;
983  const size_t hash_table_size =
984  entry_size * entry_count_ + one_to_many_hash_entries * sizeof(int32_t);
985 
986  // We can't allocate more than 2GB contiguous memory on GPU and each entry is 4 bytes.
987  if (hash_table_size > static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
988  throw TooManyHashEntries(
989  "Hash tables for GPU requiring larger than 2GB contigious memory not supported "
990  "yet");
991  }
992 
993  VLOG(1) << "Initializing GPU Hash Table for device " << device_id << " with "
994  << entry_count_ << " hash entries and " << one_to_many_hash_entries
995  << " entries in the one to many buffer";
996  VLOG(1) << "Total hash table size: " << hash_table_size << " Bytes";
997  gpu_hash_table_buff_[device_id] =
998  CudaAllocator::allocGpuAbstractBuffer(&data_mgr, hash_table_size, device_id);
999  }
1000 #else
1001  CHECK_EQ(Data_Namespace::CPU_LEVEL, effective_memory_level);
1002 #endif
1003  if (effective_memory_level == Data_Namespace::CPU_LEVEL) {
1004  std::lock_guard<std::mutex> cpu_hash_table_buff_lock(cpu_hash_table_buff_mutex_);
1005  err = initHashTableOnCpu(join_columns, join_column_types, join_bucket_info, layout);
1006  // Transfer the hash table on the GPU if we've only built it on CPU
1007  // but the query runs on GPU (join on dictionary encoded columns).
1008  // Don't transfer the buffer if there was an error since we'll bail anyway.
1009  if (memory_level_ == Data_Namespace::GPU_LEVEL && !err) {
1010 #ifdef HAVE_CUDA
1011  copy_to_gpu(
1012  &data_mgr,
1013  reinterpret_cast<CUdeviceptr>(gpu_hash_table_buff_[device_id]->getMemoryPtr()),
1014  &(*cpu_hash_table_buff_)[0],
1015  cpu_hash_table_buff_->size() * sizeof((*cpu_hash_table_buff_)[0]),
1016  device_id);
1017 #else
1018  CHECK(false);
1019 #endif
1020  }
1021  } else {
1022  err = initHashTableOnGpu(join_columns,
1023  join_column_types,
1024  join_bucket_info,
1025  layout,
1026  key_component_width,
1027  key_component_count,
1028  device_id);
1029  }
1030  return err;
1031 }
bool layoutRequiresAdditionalBuffers(JoinHashTableInterface::HashType layout) const noexceptoverride
#define CHECK_EQ(x, y)
Definition: Logger.h:205
virtual int initHashTableOnGpu(const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_bucket_info, const JoinHashTableInterface::HashType layout, const size_t key_component_width, const size_t key_component_count, const int device_id)
std::shared_ptr< std::vector< int8_t > > cpu_hash_table_buff_
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:209
void copy_to_gpu(Data_Namespace::DataMgr *data_mgr, CUdeviceptr dst, const void *src, const size_t num_bytes, const int device_id)
Definition: GpuMemUtils.cpp:30
const Catalog_Namespace::Catalog * catalog_
static Data_Namespace::AbstractBuffer * allocGpuAbstractBuffer(Data_Namespace::DataMgr *data_mgr, const size_t num_bytes, const int device_id)
const Data_Namespace::MemoryLevel memory_level_
virtual int initHashTableOnCpu(const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_bucket_info, const JoinHashTableInterface::HashType layout)
#define CHECK(condition)
Definition: Logger.h:197
#define DEBUG_TIMER(name)
Definition: Logger.h:313
virtual size_t getKeyComponentCount() const
virtual size_t getKeyComponentWidth() const
#define VLOG(n)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int BaselineJoinHashTable::initHashTableOnCpu ( const std::vector< JoinColumn > &  join_columns,
const std::vector< JoinColumnTypeInfo > &  join_column_types,
const std::vector< JoinBucketInfo > &  join_bucket_info,
const JoinHashTableInterface::HashType  layout 
)
protectedvirtual

Reimplemented in OverlapsJoinHashTable.

Definition at line 626 of file BaselineJoinHashTable.cpp.

References CHECK, condition_, cpu_hash_table_buff_, cpu_threads(), DEBUG_TIMER, entry_count_, fill_baseline_hash_join_buff_32(), fill_baseline_hash_join_buff_64(), fill_one_to_many_baseline_hash_table_32(), fill_one_to_many_baseline_hash_table_64(), getCompositeKeyInfo(), getInnerTableId(), getKeyComponentCount(), getKeyComponentWidth(), init_baseline_hash_join_buff_32(), init_baseline_hash_join_buff_64(), init_hash_join_buff(), initHashTableOnCpuFromCache(), JoinHashTableInterface::OneToMany, JoinHashTableInterface::OneToOne, putHashTableOnCpuToCache(), and VLOG.

Referenced by initHashTableForDevice().

630  {
631  auto timer = DEBUG_TIMER(__func__);
632  const auto composite_key_info = getCompositeKeyInfo();
633  CHECK(!join_columns.empty());
634  HashTableCacheKey cache_key{join_columns.front().num_elems,
635  composite_key_info.cache_key_chunks,
636  condition_->get_optype()};
637  initHashTableOnCpuFromCache(cache_key);
638  if (cpu_hash_table_buff_) {
639  return 0;
640  }
641  const auto key_component_width = getKeyComponentWidth();
642  const auto key_component_count = getKeyComponentCount();
643  const auto entry_size =
644  (key_component_count +
645  (layout == JoinHashTableInterface::HashType::OneToOne ? 1 : 0)) *
646  key_component_width;
647  const auto keys_for_all_rows = join_columns.front().num_elems;
648  const size_t one_to_many_hash_entries =
650  ? 2 * entry_count_ + keys_for_all_rows
651  : 0;
652  const size_t hash_table_size =
653  entry_size * entry_count_ + one_to_many_hash_entries * sizeof(int32_t);
654 
655  // We can't allocate more than 2GB contiguous memory on GPU and each entry is 4 bytes.
656  if (hash_table_size > static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
657  throw TooManyHashEntries(
658  "Hash tables for GPU requiring larger than 2GB contigious memory not supported "
659  "yet");
660  }
661 
662  VLOG(1) << "Initializing CPU Join Hash Table with " << entry_count_
663  << " hash entries and " << one_to_many_hash_entries
664  << " entries in the one to many buffer";
665  VLOG(1) << "Total hash table size: " << hash_table_size << " Bytes";
666 
667  cpu_hash_table_buff_.reset(new std::vector<int8_t>(hash_table_size));
668  int thread_count = cpu_threads();
669  std::vector<std::future<void>> init_cpu_buff_threads;
670  for (int thread_idx = 0; thread_idx < thread_count; ++thread_idx) {
671  init_cpu_buff_threads.emplace_back(
672  std::async(std::launch::async,
673  [this,
674  key_component_count,
675  key_component_width,
676  thread_idx,
677  thread_count,
678  layout] {
679  switch (key_component_width) {
680  case 4:
682  &(*cpu_hash_table_buff_)[0],
683  entry_count_,
684  key_component_count,
686  -1,
687  thread_idx,
688  thread_count);
689  break;
690  case 8:
692  &(*cpu_hash_table_buff_)[0],
693  entry_count_,
694  key_component_count,
696  -1,
697  thread_idx,
698  thread_count);
699  break;
700  default:
701  CHECK(false);
702  }
703  }));
704  }
705  for (auto& child : init_cpu_buff_threads) {
706  child.get();
707  }
708  std::vector<std::future<int>> fill_cpu_buff_threads;
709  for (int thread_idx = 0; thread_idx < thread_count; ++thread_idx) {
710  fill_cpu_buff_threads.emplace_back(std::async(
711  std::launch::async,
712  [this,
713  &composite_key_info,
714  &join_columns,
715  &join_column_types,
716  key_component_count,
717  key_component_width,
718  layout,
719  thread_idx,
720  thread_count] {
721  switch (key_component_width) {
722  case 4: {
723  const auto key_handler =
724  GenericKeyHandler(key_component_count,
725  true,
726  &join_columns[0],
727  &join_column_types[0],
728  &composite_key_info.sd_inner_proxy_per_key[0],
729  &composite_key_info.sd_outer_proxy_per_key[0]);
731  &(*cpu_hash_table_buff_)[0],
732  entry_count_,
733  -1,
734  key_component_count,
736  &key_handler,
737  join_columns[0].num_elems,
738  thread_idx,
739  thread_count);
740  break;
741  }
742  case 8: {
743  const auto key_handler =
744  GenericKeyHandler(key_component_count,
745  true,
746  &join_columns[0],
747  &join_column_types[0],
748  &composite_key_info.sd_inner_proxy_per_key[0],
749  &composite_key_info.sd_outer_proxy_per_key[0]);
751  &(*cpu_hash_table_buff_)[0],
752  entry_count_,
753  -1,
754  key_component_count,
756  &key_handler,
757  join_columns[0].num_elems,
758  thread_idx,
759  thread_count);
760  break;
761  }
762  default:
763  CHECK(false);
764  }
765  return -1;
766  }));
767  }
768  int err = 0;
769  for (auto& child : fill_cpu_buff_threads) {
770  int partial_err = child.get();
771  if (partial_err) {
772  err = partial_err;
773  }
774  }
775  if (err) {
776  cpu_hash_table_buff_.reset();
777  return err;
778  }
780  auto one_to_many_buff = reinterpret_cast<int32_t*>(&(*cpu_hash_table_buff_)[0] +
781  entry_count_ * entry_size);
782  init_hash_join_buff(one_to_many_buff, entry_count_, -1, 0, 1);
783  switch (key_component_width) {
784  case 4: {
785  const auto composite_key_dict =
786  reinterpret_cast<int32_t*>(&(*cpu_hash_table_buff_)[0]);
788  composite_key_dict,
789  entry_count_,
790  -1,
791  key_component_count,
792  join_columns,
793  join_column_types,
794  join_bucket_info,
795  composite_key_info.sd_inner_proxy_per_key,
796  composite_key_info.sd_outer_proxy_per_key,
797  thread_count);
798  break;
799  }
800  case 8: {
801  const auto composite_key_dict =
802  reinterpret_cast<int64_t*>(&(*cpu_hash_table_buff_)[0]);
804  composite_key_dict,
805  entry_count_,
806  -1,
807  key_component_count,
808  join_columns,
809  join_column_types,
810  join_bucket_info,
811  composite_key_info.sd_inner_proxy_per_key,
812  composite_key_info.sd_outer_proxy_per_key,
813  thread_count);
814  break;
815  }
816  default:
817  CHECK(false);
818  }
819  }
820  if (!err && getInnerTableId() > 0) {
821  putHashTableOnCpuToCache(cache_key);
822  }
823  return err;
824 }
void putHashTableOnCpuToCache(const HashTableCacheKey &)
void init_baseline_hash_join_buff_32(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
void fill_one_to_many_baseline_hash_table_64(int32_t *buff, const int64_t *composite_key_dict, const int64_t hash_entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_bucket_info, const std::vector< const void * > &sd_inner_proxy_per_key, const std::vector< const void * > &sd_outer_proxy_per_key, const int32_t cpu_thread_count)
std::shared_ptr< std::vector< int8_t > > cpu_hash_table_buff_
void init_baseline_hash_join_buff_64(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
int fill_baseline_hash_join_buff_64(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const GenericKeyHandler *key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
CompositeKeyInfo getCompositeKeyInfo() const
void fill_one_to_many_baseline_hash_table_32(int32_t *buff, const int32_t *composite_key_dict, const int64_t hash_entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const std::vector< JoinBucketInfo > &join_bucket_info, const std::vector< const void * > &sd_inner_proxy_per_key, const std::vector< const void * > &sd_outer_proxy_per_key, const int32_t cpu_thread_count)
int getInnerTableId() const noexceptoverride
DEVICE void SUFFIX() init_hash_join_buff(int32_t *groups_buffer, const int64_t hash_entry_count, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
void initHashTableOnCpuFromCache(const HashTableCacheKey &)
#define CHECK(condition)
Definition: Logger.h:197
#define DEBUG_TIMER(name)
Definition: Logger.h:313
virtual size_t getKeyComponentCount() const
virtual size_t getKeyComponentWidth() const
int cpu_threads()
Definition: thread_count.h:24
#define VLOG(n)
Definition: Logger.h:291
const std::shared_ptr< Analyzer::BinOper > condition_
int fill_baseline_hash_join_buff_32(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, const GenericKeyHandler *key_handler, const int64_t num_elems, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void BaselineJoinHashTable::initHashTableOnCpuFromCache ( const HashTableCacheKey key)
protected

Definition at line 1260 of file BaselineJoinHashTable.cpp.

References cpu_hash_table_buff_, DEBUG_TIMER, emitted_keys_count_, entry_count_, hash_table_cache_, hash_table_cache_mutex_, layout_, and VLOG.

Referenced by OverlapsJoinHashTable::initHashTableOnCpu(), and initHashTableOnCpu().

1260  {
1261  auto timer = DEBUG_TIMER(__func__);
1262  VLOG(1) << "Checking CPU hash table cache.";
1263  std::lock_guard<std::mutex> hash_table_cache_lock(hash_table_cache_mutex_);
1264  if (hash_table_cache_.size() == 0) {
1265  VLOG(1) << "CPU hash table cache was empty.";
1266  }
1267  for (const auto& kv : hash_table_cache_) {
1268  if (kv.first == key) {
1269  VLOG(1) << "Found a suitable hash table in the cache.";
1270  cpu_hash_table_buff_ = kv.second.buffer;
1271  layout_ = kv.second.type;
1272  entry_count_ = kv.second.entry_count;
1273  emitted_keys_count_ = kv.second.emitted_keys_count;
1274  break;
1275  } else {
1276  VLOG(1) << hash_table_cache_.size()
1277  << " hash tables found in cache. None were suitable for this query.";
1278  }
1279  }
1280 }
std::shared_ptr< std::vector< int8_t > > cpu_hash_table_buff_
JoinHashTableInterface::HashType layout_
static std::mutex hash_table_cache_mutex_
#define DEBUG_TIMER(name)
Definition: Logger.h:313
static std::vector< std::pair< HashTableCacheKey, HashTableCacheValue > > hash_table_cache_
#define VLOG(n)
Definition: Logger.h:291

+ Here is the caller graph for this function:

int BaselineJoinHashTable::initHashTableOnGpu ( const std::vector< JoinColumn > &  join_columns,
const std::vector< JoinColumnTypeInfo > &  join_column_types,
const std::vector< JoinBucketInfo > &  join_bucket_info,
const JoinHashTableInterface::HashType  layout,
const size_t  key_component_width,
const size_t  key_component_count,
const int  device_id 
)
protectedvirtual

Reimplemented in OverlapsJoinHashTable.

Definition at line 826 of file BaselineJoinHashTable.cpp.

References CudaAllocator::alloc(), catalog_, copy_from_gpu(), copy_to_gpu(), DEBUG_TIMER, entry_count_, fill_baseline_hash_join_buff_on_device_32(), fill_baseline_hash_join_buff_on_device_64(), fill_one_to_many_baseline_hash_table_on_device_32(), fill_one_to_many_baseline_hash_table_on_device_64(), Catalog_Namespace::Catalog::getDataMgr(), init_baseline_hash_join_buff_on_device_32(), init_baseline_hash_join_buff_on_device_64(), init_hash_join_buff_on_device(), JoinHashTableInterface::OneToMany, JoinHashTableInterface::OneToOne, transfer_flat_object_to_gpu(), transfer_vector_of_flat_objects_to_gpu(), and UNREACHABLE.

Referenced by initHashTableForDevice().

833  {
834  auto timer = DEBUG_TIMER(__func__);
835  int err = 0;
836 #ifdef HAVE_CUDA
837  auto& data_mgr = catalog_->getDataMgr();
838  CudaAllocator allocator(&data_mgr, device_id);
839  auto dev_err_buff = reinterpret_cast<CUdeviceptr>(allocator.alloc(sizeof(int)));
840  copy_to_gpu(&data_mgr, dev_err_buff, &err, sizeof(err), device_id);
841  switch (key_component_width) {
842  case 4:
844  reinterpret_cast<int8_t*>(gpu_hash_table_buff_[device_id]->getMemoryPtr()),
845  entry_count_,
846  key_component_count,
848  -1,
849  block_size_,
850  grid_size_);
851  break;
852  case 8:
854  reinterpret_cast<int8_t*>(gpu_hash_table_buff_[device_id]->getMemoryPtr()),
855  entry_count_,
856  key_component_count,
858  -1,
859  block_size_,
860  grid_size_);
861  break;
862  default:
863  UNREACHABLE();
864  }
865  auto join_columns_gpu = transfer_vector_of_flat_objects_to_gpu(join_columns, allocator);
866  auto hash_buff =
867  reinterpret_cast<int8_t*>(gpu_hash_table_buff_[device_id]->getMemoryPtr());
868  auto join_column_types_gpu =
869  transfer_vector_of_flat_objects_to_gpu(join_column_types, allocator);
870 
871  const auto key_handler = GenericKeyHandler(key_component_count,
872  true,
873  join_columns_gpu,
874  join_column_types_gpu,
875  nullptr,
876  nullptr);
877  const auto key_handler_gpu = transfer_flat_object_to_gpu(key_handler, allocator);
878  switch (key_component_width) {
879  case 4: {
881  hash_buff,
882  entry_count_,
883  -1,
884  key_component_count,
886  reinterpret_cast<int*>(dev_err_buff),
887  key_handler_gpu,
888  join_columns.front().num_elems,
889  block_size_,
890  grid_size_);
891  copy_from_gpu(&data_mgr, &err, dev_err_buff, sizeof(err), device_id);
892  break;
893  }
894  case 8: {
896  hash_buff,
897  entry_count_,
898  -1,
899  key_component_count,
901  reinterpret_cast<int*>(dev_err_buff),
902  key_handler_gpu,
903  join_columns.front().num_elems,
904  block_size_,
905  grid_size_);
906  copy_from_gpu(&data_mgr, &err, dev_err_buff, sizeof(err), device_id);
907  break;
908  }
909  default:
910  UNREACHABLE();
911  }
912  if (err) {
913  return err;
914  }
916  const auto entry_size = key_component_count * key_component_width;
917  auto one_to_many_buff = reinterpret_cast<int32_t*>(
918  gpu_hash_table_buff_[device_id]->getMemoryPtr() + entry_count_ * entry_size);
919  switch (key_component_width) {
920  case 4: {
921  const auto composite_key_dict =
922  reinterpret_cast<int32_t*>(gpu_hash_table_buff_[device_id]->getMemoryPtr());
924  one_to_many_buff, entry_count_, -1, block_size_, grid_size_);
926  composite_key_dict,
927  entry_count_,
928  -1,
929  key_component_count,
930  key_handler_gpu,
931  join_columns.front().num_elems,
932  block_size_,
933  grid_size_);
934  break;
935  }
936  case 8: {
937  const auto composite_key_dict =
938  reinterpret_cast<int64_t*>(gpu_hash_table_buff_[device_id]->getMemoryPtr());
940  one_to_many_buff, entry_count_, -1, block_size_, grid_size_);
942  composite_key_dict,
943  entry_count_,
944  -1,
945  key_handler_gpu,
946  join_columns.front().num_elems,
947  block_size_,
948  grid_size_);
949  break;
950  }
951  default:
952  UNREACHABLE();
953  }
954  }
955 #else
956  UNREACHABLE();
957 #endif
958  return err;
959 }
void fill_one_to_many_baseline_hash_table_on_device_32(int32_t *buff, const int32_t *composite_key_dict, const int64_t hash_entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const GenericKeyHandler *key_handler, const int64_t num_elems, const size_t block_size_x, const size_t grid_size_x)
void init_baseline_hash_join_buff_on_device_64(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val, const size_t block_size_x, const size_t grid_size_x)
void fill_baseline_hash_join_buff_on_device_64(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const GenericKeyHandler *key_handler, const int64_t num_elems, const size_t block_size_x, const size_t grid_size_x)
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:209
unsigned long long CUdeviceptr
Definition: nocuda.h:27
#define UNREACHABLE()
Definition: Logger.h:241
void copy_to_gpu(Data_Namespace::DataMgr *data_mgr, CUdeviceptr dst, const void *src, const size_t num_bytes, const int device_id)
Definition: GpuMemUtils.cpp:30
void copy_from_gpu(Data_Namespace::DataMgr *data_mgr, void *dst, const CUdeviceptr src, const size_t num_bytes, const int device_id)
T * transfer_flat_object_to_gpu(const T &object, CudaAllocator &allocator)
void fill_baseline_hash_join_buff_on_device_32(int8_t *hash_buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t key_component_count, const bool with_val_slot, int *dev_err_buff, const GenericKeyHandler *key_handler, const int64_t num_elems, const size_t block_size_x, const size_t grid_size_x)
const Catalog_Namespace::Catalog * catalog_
void init_baseline_hash_join_buff_on_device_32(int8_t *hash_join_buff, const int64_t entry_count, const size_t key_component_count, const bool with_val_slot, const int32_t invalid_slot_val, const size_t block_size_x, const size_t grid_size_x)
T * transfer_vector_of_flat_objects_to_gpu(const std::vector< T > &vec, CudaAllocator &allocator)
#define DEBUG_TIMER(name)
Definition: Logger.h:313
void init_hash_join_buff_on_device(int32_t *buff, const int64_t entry_count, const int32_t invalid_slot_val, const size_t block_size_x, const size_t grid_size_x)
void fill_one_to_many_baseline_hash_table_on_device_64(int32_t *buff, const int64_t *composite_key_dict, const int64_t hash_entry_count, const int32_t invalid_slot_val, const GenericKeyHandler *key_handler, const int64_t num_elems, const size_t block_size_x, const size_t grid_size_x)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool BaselineJoinHashTable::isBitwiseEq ( ) const
protected

Definition at line 1323 of file BaselineJoinHashTable.cpp.

References condition_, and kBW_EQ.

Referenced by OverlapsJoinHashTable::fetchColumnsForDevice(), and fetchColumnsForDevice().

1323  {
1324  return condition_->get_optype() == kBW_EQ;
1325 }
Definition: sqldefs.h:31
const std::shared_ptr< Analyzer::BinOper > condition_

+ Here is the caller graph for this function:

bool BaselineJoinHashTable::layoutRequiresAdditionalBuffers ( JoinHashTableInterface::HashType  layout) const
inlineoverrideprotectedvirtualnoexcept
size_t BaselineJoinHashTable::offsetBufferOff ( ) const
overridevirtualnoexcept

Implements JoinHashTableInterface.

Definition at line 1098 of file BaselineJoinHashTable.cpp.

References getKeyBufferSize().

Referenced by codegenMatchingSet(), OverlapsJoinHashTable::codegenMatchingSet(), countBufferOff(), toSet(), and toString().

1098  {
1099  return getKeyBufferSize();
1100 }
size_t getKeyBufferSize() const noexcept

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t BaselineJoinHashTable::payloadBufferOff ( ) const
overridevirtualnoexcept

Implements JoinHashTableInterface.

Definition at line 1110 of file BaselineJoinHashTable.cpp.

References countBufferOff(), getComponentBufferSize(), getKeyBufferSize(), layout_, and layoutRequiresAdditionalBuffers().

Referenced by toSet(), and toString().

1110  {
1113  } else {
1114  return getKeyBufferSize();
1115  }
1116 }
bool layoutRequiresAdditionalBuffers(JoinHashTableInterface::HashType layout) const noexceptoverride
size_t getComponentBufferSize() const noexcept
size_t getKeyBufferSize() const noexcept
JoinHashTableInterface::HashType layout_
size_t countBufferOff() const noexceptoverride

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void BaselineJoinHashTable::putHashTableOnCpuToCache ( const HashTableCacheKey key)
protected

Definition at line 1282 of file BaselineJoinHashTable.cpp.

References CHECK_GE, BaselineJoinHashTable::HashTableCacheKey::chunk_keys, cpu_hash_table_buff_, emitted_keys_count_, entry_count_, hash_table_cache_, hash_table_cache_mutex_, layout_, and VLOG.

Referenced by OverlapsJoinHashTable::initHashTableOnCpu(), and initHashTableOnCpu().

1282  {
1283  for (auto chunk_key : key.chunk_keys) {
1284  CHECK_GE(chunk_key.size(), size_t(2));
1285  if (chunk_key[1] < 0) {
1286  return;
1287  }
1288  }
1289 
1290  std::lock_guard<std::mutex> hash_table_cache_lock(hash_table_cache_mutex_);
1291  VLOG(1) << "Storing hash table in cache.";
1292  for (const auto& kv : hash_table_cache_) {
1293  if (std::get<0>(kv) == key) {
1294  return;
1295  }
1296  }
1297  hash_table_cache_.emplace_back(
1298  key,
1299  HashTableCacheValue{
1301 }
std::shared_ptr< std::vector< int8_t > > cpu_hash_table_buff_
#define CHECK_GE(x, y)
Definition: Logger.h:210
JoinHashTableInterface::HashType layout_
static std::mutex hash_table_cache_mutex_
static std::vector< std::pair< HashTableCacheKey, HashTableCacheValue > > hash_table_cache_
#define VLOG(n)
Definition: Logger.h:291

+ Here is the caller graph for this function:

void BaselineJoinHashTable::reify ( )
protected

Definition at line 311 of file BaselineJoinHashTable.cpp.

References CHECK_EQ, CHECK_LT, condition_, DEBUG_TIMER, device_count_, freeHashBufferMemory(), HashTypeCache::get(), getCompositeKeyInfo(), inner_outer_pairs_, layout_, JoinHashTableInterface::ManyToMany, JoinHashTableInterface::OneToMany, reifyWithLayout(), HashTypeCache::set(), and VLOG.

311  {
312  auto timer = DEBUG_TIMER(__func__);
314 #ifdef HAVE_CUDA
315  gpu_hash_table_buff_.resize(device_count_);
316 #endif // HAVE_CUDA
317  const auto composite_key_info = getCompositeKeyInfo();
318  const auto type_and_found = HashTypeCache::get(composite_key_info.cache_key_chunks);
319  const auto layout = type_and_found.second ? type_and_found.first : layout_;
320 
321  if (condition_->is_overlaps_oper()) {
322  CHECK_EQ(inner_outer_pairs_.size(), size_t(1));
324 
325  if (inner_outer_pairs_[0].second->get_type_info().is_array()) {
327  } else {
329  }
330  try {
331  reifyWithLayout(layout);
332  return;
333  } catch (const std::exception& e) {
334  VLOG(1) << "Caught exception while building overlaps baseline hash table: "
335  << e.what();
336  throw;
337  }
338  }
339 
340  try {
341  reifyWithLayout(layout);
342  } catch (const std::exception& e) {
343  VLOG(1) << "Caught exception while building baseline hash table: " << e.what();
345  HashTypeCache::set(composite_key_info.cache_key_chunks,
348  }
349 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
static void set(const std::vector< ChunkKey > &key, const JoinHashTableInterface::HashType hash_type)
JoinHashTableInterface::HashType layout_
std::vector< InnerOuter > inner_outer_pairs_
virtual void reifyWithLayout(const JoinHashTableInterface::HashType layout)
CompositeKeyInfo getCompositeKeyInfo() const
#define CHECK_LT(x, y)
Definition: Logger.h:207
#define DEBUG_TIMER(name)
Definition: Logger.h:313
#define VLOG(n)
Definition: Logger.h:291
const std::shared_ptr< Analyzer::BinOper > condition_
static std::pair< JoinHashTableInterface::HashType, bool > get(const std::vector< ChunkKey > &key)

+ Here is the call graph for this function:

void BaselineJoinHashTable::reifyForDevice ( const ColumnsForDevice columns_for_device,
const JoinHashTableInterface::HashType  layout,
const int  device_id,
const logger::ThreadId  parent_thread_id 
)
protected

Definition at line 565 of file BaselineJoinHashTable.cpp.

References DEBUG_TIMER_NEW_THREAD, ERR_FAILED_TO_FETCH_COLUMN, ERR_FAILED_TO_JOIN_ON_VIRTUAL_COLUMN, getEffectiveMemoryLevel(), initHashTableForDevice(), inner_outer_pairs_, BaselineJoinHashTable::ColumnsForDevice::join_buckets, BaselineJoinHashTable::ColumnsForDevice::join_column_types, BaselineJoinHashTable::ColumnsForDevice::join_columns, and to_string().

Referenced by OverlapsJoinHashTable::reifyWithLayout(), and reifyWithLayout().

568  {
569  DEBUG_TIMER_NEW_THREAD(parent_thread_id);
570  const auto effective_memory_level = getEffectiveMemoryLevel(inner_outer_pairs_);
571  const auto err = initHashTableForDevice(columns_for_device.join_columns,
572  columns_for_device.join_column_types,
573  columns_for_device.join_buckets,
574  layout,
575  effective_memory_level,
576  device_id);
577  if (err) {
578  switch (err) {
580  throw FailedToFetchColumn();
583  default:
584  throw HashJoinFail(
585  std::string("Unrecognized error when initializing baseline hash table (") +
586  std::to_string(err) + std::string(")"));
587  }
588  }
589 }
Data_Namespace::MemoryLevel getEffectiveMemoryLevel(const std::vector< InnerOuter > &inner_outer_pairs) const
#define DEBUG_TIMER_NEW_THREAD(parent_thread_id)
Definition: Logger.h:318
std::string to_string(char const *&&v)
std::vector< InnerOuter > inner_outer_pairs_
static const int ERR_FAILED_TO_FETCH_COLUMN
int initHashTableForDevice(const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_buckets, const JoinHashTableInterface::HashType layout, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id)
static const int ERR_FAILED_TO_JOIN_ON_VIRTUAL_COLUMN

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void BaselineJoinHashTable::reifyWithLayout ( const JoinHashTableInterface::HashType  layout)
protectedvirtual

Reimplemented in OverlapsJoinHashTable.

Definition at line 351 of file BaselineJoinHashTable.cpp.

References approximateTupleCount(), catalog_, CHECK, device_count_, emitted_keys_count_, entry_count_, fetchColumnsForDevice(), get_entries_per_device(), get_inner_query_info(), Catalog_Namespace::Catalog::getDataMgr(), getInnerTableId(), Data_Namespace::GPU_LEVEL, InputTableInfo::info, layout_, memory_level_, JoinHashTableInterface::OneToMany, only_shards_for_device(), query_infos_, reifyForDevice(), shardCount(), and logger::thread_id().

Referenced by reify().

352  {
353  layout_ = layout;
354  const auto& query_info = get_inner_query_info(getInnerTableId(), query_infos_).info;
355  if (query_info.fragments.empty()) {
356  return;
357  }
358  auto& data_mgr = catalog_->getDataMgr();
359  std::vector<std::unique_ptr<CudaAllocator>> dev_buff_owners;
361  for (int device_id = 0; device_id < device_count_; ++device_id) {
362  dev_buff_owners.emplace_back(std::make_unique<CudaAllocator>(&data_mgr, device_id));
363  }
364  }
365  std::vector<BaselineJoinHashTable::ColumnsForDevice> columns_per_device;
366  const auto shard_count = shardCount();
367  for (int device_id = 0; device_id < device_count_; ++device_id) {
368  const auto fragments =
369  shard_count
370  ? only_shards_for_device(query_info.fragments, device_id, device_count_)
371  : query_info.fragments;
372  const auto columns_for_device =
373  fetchColumnsForDevice(fragments,
374  device_id,
376  ? dev_buff_owners[device_id].get()
377  : nullptr);
378  columns_per_device.push_back(columns_for_device);
379  }
381  CHECK(!columns_per_device.front().join_columns.empty());
382  emitted_keys_count_ = columns_per_device.front().join_columns.front().num_elems;
383  size_t tuple_count;
384  std::tie(tuple_count, std::ignore) = approximateTupleCount(columns_per_device);
385  const auto entry_count = 2 * std::max(tuple_count, size_t(1));
386 
387  entry_count_ =
388  get_entries_per_device(entry_count, shard_count, device_count_, memory_level_);
389  }
390  std::vector<std::future<void>> init_threads;
391  for (int device_id = 0; device_id < device_count_; ++device_id) {
392  const auto fragments =
393  shard_count
394  ? only_shards_for_device(query_info.fragments, device_id, device_count_)
395  : query_info.fragments;
396  init_threads.push_back(std::async(std::launch::async,
398  this,
399  columns_per_device[device_id],
400  layout,
401  device_id,
402  logger::thread_id()));
403  }
404  for (auto& init_thread : init_threads) {
405  init_thread.wait();
406  }
407  for (auto& init_thread : init_threads) {
408  init_thread.get();
409  }
410 }
Fragmenter_Namespace::TableInfo info
Definition: InputMetadata.h:35
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:209
virtual ColumnsForDevice fetchColumnsForDevice(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, DeviceAllocator *dev_buff_owner)
JoinHashTableInterface::HashType layout_
size_t get_entries_per_device(const size_t total_entries, const size_t shard_count, const size_t device_count, const Data_Namespace::MemoryLevel memory_level)
std::vector< Fragmenter_Namespace::FragmentInfo > only_shards_for_device(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, const int device_count)
const std::vector< InputTableInfo > & query_infos_
void reifyForDevice(const ColumnsForDevice &columns_for_device, const JoinHashTableInterface::HashType layout, const int device_id, const logger::ThreadId parent_thread_id)
virtual std::pair< size_t, size_t > approximateTupleCount(const std::vector< ColumnsForDevice > &) const
int getInnerTableId() const noexceptoverride
const Catalog_Namespace::Catalog * catalog_
const Data_Namespace::MemoryLevel memory_level_
ThreadId thread_id()
Definition: Logger.cpp:731
#define CHECK(condition)
Definition: Logger.h:197
const InputTableInfo & get_inner_query_info(const int inner_table_id, const std::vector< InputTableInfo > &query_infos)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t BaselineJoinHashTable::shardCount ( ) const
protected

Definition at line 591 of file BaselineJoinHashTable.cpp.

References condition_, executor_, getShardCountForCondition(), Data_Namespace::GPU_LEVEL, inner_outer_pairs_, and memory_level_.

Referenced by checkHashJoinReplicationConstraint(), OverlapsJoinHashTable::reifyWithLayout(), and reifyWithLayout().

591  {
593  return 0;
594  }
597 }
std::vector< InnerOuter > inner_outer_pairs_
const Data_Namespace::MemoryLevel memory_level_
static size_t getShardCountForCondition(const Analyzer::BinOper *condition, const Executor *executor, const std::vector< InnerOuter > &inner_outer_pairs)
const std::shared_ptr< Analyzer::BinOper > condition_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::set< DecodedJoinHashBufferEntry > BaselineJoinHashTable::toSet ( const ExecutorDeviceType  device_type,
const int  device_id 
) const
overridevirtual

Implements JoinHashTableInterface.

Definition at line 241 of file BaselineJoinHashTable.cpp.

References catalog_, copy_from_gpu(), countBufferOff(), entry_count_, Catalog_Namespace::Catalog::getDataMgr(), getJoinHashBuffer(), getJoinHashBufferSize(), getKeyComponentCount(), getKeyComponentWidth(), GPU, layout_, offsetBufferOff(), JoinHashTableInterface::OneToOne, payloadBufferOff(), and JoinHashTableInterface::toSet().

243  {
244  auto buffer = getJoinHashBuffer(device_type, device_id);
245  auto buffer_size = getJoinHashBufferSize(device_type, device_id);
246 #ifdef HAVE_CUDA
247  std::unique_ptr<int8_t[]> buffer_copy;
248  if (device_type == ExecutorDeviceType::GPU) {
249  buffer_copy = std::make_unique<int8_t[]>(buffer_size);
250 
252  buffer_copy.get(),
253  reinterpret_cast<CUdeviceptr>(reinterpret_cast<int8_t*>(buffer)),
254  buffer_size,
255  device_id);
256  }
257  auto ptr1 = buffer_copy ? buffer_copy.get() : reinterpret_cast<const int8_t*>(buffer);
258 #else
259  auto ptr1 = reinterpret_cast<const int8_t*>(buffer);
260 #endif // HAVE_CUDA
261  auto ptr2 = ptr1 + offsetBufferOff();
262  auto ptr3 = ptr1 + countBufferOff();
263  auto ptr4 = ptr1 + payloadBufferOff();
268  entry_count_,
269  ptr1,
270  ptr2,
271  ptr3,
272  ptr4,
273  buffer_size);
274 }
size_t offsetBufferOff() const noexceptoverride
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:209
unsigned long long CUdeviceptr
Definition: nocuda.h:27
JoinHashTableInterface::HashType layout_
size_t payloadBufferOff() const noexceptoverride
void copy_from_gpu(Data_Namespace::DataMgr *data_mgr, void *dst, const CUdeviceptr src, const size_t num_bytes, const int device_id)
int64_t getJoinHashBuffer(const ExecutorDeviceType device_type, const int device_id) const noexceptoverride
const Catalog_Namespace::Catalog * catalog_
virtual DecodedJoinHashBufferSet toSet(const ExecutorDeviceType device_type, const int device_id) const =0
virtual size_t getKeyComponentCount() const
virtual size_t getKeyComponentWidth() const
size_t getJoinHashBufferSize(const ExecutorDeviceType device_type, const int device_id) const noexceptoverride
size_t countBufferOff() const noexceptoverride

+ Here is the call graph for this function:

std::string BaselineJoinHashTable::toString ( const ExecutorDeviceType  device_type,
const int  device_id = 0,
bool  raw = false 
) const
overridevirtual

Implements JoinHashTableInterface.

Definition at line 203 of file BaselineJoinHashTable.cpp.

References catalog_, condition_, copy_from_gpu(), countBufferOff(), entry_count_, Catalog_Namespace::Catalog::getDataMgr(), JoinHashTableInterface::getHashTypeString(), getJoinHashBuffer(), getJoinHashBufferSize(), getKeyComponentCount(), getKeyComponentWidth(), GPU, layout_, offsetBufferOff(), JoinHashTableInterface::OneToOne, payloadBufferOff(), and JoinHashTableInterface::toString().

205  {
206  auto buffer = getJoinHashBuffer(device_type, device_id);
207  auto buffer_size = getJoinHashBufferSize(device_type, device_id);
208 #ifdef HAVE_CUDA
209  std::unique_ptr<int8_t[]> buffer_copy;
210  if (device_type == ExecutorDeviceType::GPU) {
211  buffer_copy = std::make_unique<int8_t[]>(buffer_size);
212 
214  buffer_copy.get(),
215  reinterpret_cast<CUdeviceptr>(reinterpret_cast<int8_t*>(buffer)),
216  buffer_size,
217  device_id);
218  }
219  auto ptr1 = buffer_copy ? buffer_copy.get() : reinterpret_cast<const int8_t*>(buffer);
220 #else
221  auto ptr1 = reinterpret_cast<const int8_t*>(buffer);
222 #endif // HAVE_CUDA
223  auto ptr2 = ptr1 + offsetBufferOff();
224  auto ptr3 = ptr1 + countBufferOff();
225  auto ptr4 = ptr1 + payloadBufferOff();
227  !condition_->is_overlaps_oper() ? "keyed" : "geo",
232  entry_count_,
233  ptr1,
234  ptr2,
235  ptr3,
236  ptr4,
237  buffer_size,
238  raw);
239 }
size_t offsetBufferOff() const noexceptoverride
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:209
unsigned long long CUdeviceptr
Definition: nocuda.h:27
JoinHashTableInterface::HashType layout_
size_t payloadBufferOff() const noexceptoverride
void copy_from_gpu(Data_Namespace::DataMgr *data_mgr, void *dst, const CUdeviceptr src, const size_t num_bytes, const int device_id)
int64_t getJoinHashBuffer(const ExecutorDeviceType device_type, const int device_id) const noexceptoverride
const Catalog_Namespace::Catalog * catalog_
virtual size_t getKeyComponentCount() const
virtual size_t getKeyComponentWidth() const
static std::string getHashTypeString(HashType ht) noexcept
size_t getJoinHashBufferSize(const ExecutorDeviceType device_type, const int device_id) const noexceptoverride
virtual std::string toString(const ExecutorDeviceType device_type, const int device_id=0, bool raw=false) const =0
const std::shared_ptr< Analyzer::BinOper > condition_
size_t countBufferOff() const noexceptoverride

+ Here is the call graph for this function:

static auto BaselineJoinHashTable::yieldCacheInvalidator ( ) -> std::function<void()>
inlinestatic

Definition at line 96 of file BaselineJoinHashTable.h.

References hash_table_cache_, hash_table_cache_mutex_, and VLOG.

96  {
97  VLOG(1) << "Invalidate " << hash_table_cache_.size() << " cached baseline hashtable.";
98  return []() -> void {
99  std::lock_guard<std::mutex> guard(hash_table_cache_mutex_);
100  hash_table_cache_.clear();
101  };
102  }
static std::mutex hash_table_cache_mutex_
static std::vector< std::pair< HashTableCacheKey, HashTableCacheValue > > hash_table_cache_
#define VLOG(n)
Definition: Logger.h:291

Member Data Documentation

ColumnCacheMap& BaselineJoinHashTable::column_cache_
protected
std::shared_ptr<std::vector<int8_t> > BaselineJoinHashTable::cpu_hash_table_buff_
protected
std::mutex BaselineJoinHashTable::cpu_hash_table_buff_mutex_
protected

Definition at line 272 of file BaselineJoinHashTable.h.

Referenced by initHashTableForDevice().

const int BaselineJoinHashTable::ERR_FAILED_TO_FETCH_COLUMN
staticprotected

Definition at line 296 of file BaselineJoinHashTable.h.

Referenced by reifyForDevice().

const int BaselineJoinHashTable::ERR_FAILED_TO_JOIN_ON_VIRTUAL_COLUMN {-4}
staticprotected

Definition at line 297 of file BaselineJoinHashTable.h.

Referenced by reifyForDevice().

const std::vector<InputTableInfo>& BaselineJoinHashTable::query_infos_
protected

The documentation for this class was generated from the following files: