OmniSciDB  d2f719934e
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
BaselineJoinHashTable Class Reference

#include <BaselineJoinHashTable.h>

+ Inheritance diagram for BaselineJoinHashTable:
+ Collaboration diagram for BaselineJoinHashTable:

Classes

struct  AlternativeCacheKeyForBaselineHashJoin
 

Public Member Functions

std::string toString (const ExecutorDeviceType device_type, const int device_id=0, bool raw=false) const override
 
std::set
< DecodedJoinHashBufferEntry
toSet (const ExecutorDeviceType device_type, const int device_id) const override
 
llvm::Value * codegenSlot (const CompilationOptions &, const size_t) override
 
HashJoinMatchingSet codegenMatchingSet (const CompilationOptions &, const size_t) override
 
int getInnerTableId () const noexceptoverride
 
int getInnerTableRteIdx () const noexceptoverride
 
HashType getHashType () const noexceptoverride
 
Data_Namespace::MemoryLevel getMemoryLevel () const noexceptoverride
 
int getDeviceCount () const noexceptoverride
 
size_t offsetBufferOff () const noexceptoverride
 
size_t countBufferOff () const noexceptoverride
 
size_t payloadBufferOff () const noexceptoverride
 
std::string getHashJoinType () const final
 
virtual ~BaselineJoinHashTable ()
 
- Public Member Functions inherited from HashJoin
virtual std::string toStringFlat64 (const ExecutorDeviceType device_type, const int device_id) const
 
virtual std::string toStringFlat32 (const ExecutorDeviceType device_type, const int device_id) const
 
JoinColumn fetchJoinColumn (const Analyzer::ColumnVar *hash_col, const std::vector< Fragmenter_Namespace::FragmentInfo > &fragment_info, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks_owner, DeviceAllocator *dev_buff_owner, std::vector< std::shared_ptr< void >> &malloc_owner, Executor *executor, ColumnCacheMap *column_cache)
 
HashTablegetHashTableForDevice (const size_t device_id) const
 
size_t getJoinHashBufferSize (const ExecutorDeviceType device_type)
 
size_t getJoinHashBufferSize (const ExecutorDeviceType device_type, const int device_id) const
 
int8_t * getJoinHashBuffer (const ExecutorDeviceType device_type, const int device_id) const
 
void freeHashBufferMemory ()
 

Static Public Member Functions

static std::shared_ptr
< BaselineJoinHashTable
getInstance (const std::shared_ptr< Analyzer::BinOper > condition, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const HashTableBuildDagMap &hashtable_build_dag_map, const TableIdToNodeMap &table_id_to_node_map)
 Make hash table from an in-flight SQL query's parse tree etc. More...
 
static size_t getShardCountForCondition (const Analyzer::BinOper *condition, const Executor *executor, const std::vector< InnerOuter > &inner_outer_pairs)
 
static auto getCacheInvalidator () -> std::function< void()>
 
static HashtableRecyclergetHashTableCache ()
 
static HashingSchemeRecyclergetHashingSchemeCache ()
 
- Static Public Member Functions inherited from HashJoin
static bool layoutRequiresAdditionalBuffers (HashType layout) noexcept
 
static std::string getHashTypeString (HashType ht) noexcept
 
static HashJoinMatchingSet codegenMatchingSet (const std::vector< llvm::Value * > &hash_join_idx_args_in, const bool is_sharded, const bool col_is_nullable, const bool is_bw_eq, const int64_t sub_buff_size, Executor *executor, const bool is_bucketized=false)
 
static llvm::Value * codegenHashTableLoad (const size_t table_idx, Executor *executor)
 
static std::shared_ptr< HashJoingetInstance (const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)
 Make hash table from an in-flight SQL query's parse tree etc. More...
 
static std::shared_ptr< HashJoingetSyntheticInstance (std::string_view table1, std::string_view column1, std::string_view table2, std::string_view column2, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
 Make hash table from named tables and columns (such as for testing). More...
 
static std::shared_ptr< HashJoingetSyntheticInstance (const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
 Make hash table from named tables and columns (such as for testing). More...
 
static std::pair< std::string,
std::shared_ptr< HashJoin > > 
getSyntheticInstance (std::vector< std::shared_ptr< Analyzer::BinOper >>, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
 
static int getInnerTableId (const std::vector< InnerOuter > &inner_outer_pairs)
 
static void checkHashJoinReplicationConstraint (const int table_id, const size_t shard_count, const Executor *executor)
 
static InnerOuter normalizeColumnPair (const Analyzer::Expr *lhs, const Analyzer::Expr *rhs, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables, const bool is_overlaps_join=false)
 
static std::vector< InnerOuternormalizeColumnPairs (const Analyzer::BinOper *condition, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables)
 
static CompositeKeyInfo getCompositeKeyInfo (const std::vector< InnerOuter > &inner_outer_pairs, const Executor *executor)
 

Protected Member Functions

 BaselineJoinHashTable (const std::shared_ptr< Analyzer::BinOper > condition, const JoinType join_type, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, ColumnCacheMap &column_cache, Executor *executor, const std::vector< InnerOuter > &inner_outer_pairs, const int device_count, QueryPlanHash hashtable_cache_key, HashtableCacheMetaInfo hashtable_cache_meta_info, const TableIdToNodeMap &table_id_to_node_map)
 
size_t getComponentBufferSize () const noexceptoverride
 
size_t getKeyBufferSize () const noexcept
 
virtual void reifyWithLayout (const HashType layout)
 
virtual ColumnsForDevice fetchColumnsForDevice (const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, DeviceAllocator *dev_buff_owner)
 
virtual std::pair< size_t, size_t > approximateTupleCount (const std::vector< ColumnsForDevice > &, QueryPlanHash key, CacheItemType item_type, DeviceIdentifier device_identifier) const
 
virtual size_t getKeyComponentWidth () const
 
virtual size_t getKeyComponentCount () const
 
virtual llvm::Value * codegenKey (const CompilationOptions &)
 
size_t shardCount () const
 
Data_Namespace::MemoryLevel getEffectiveMemoryLevel (const std::vector< InnerOuter > &inner_outer_pairs) const
 
void reify (const HashType preferred_layout)
 
virtual void reifyForDevice (const ColumnsForDevice &columns_for_device, const HashType layout, const int device_id, const size_t entry_count, const size_t emitted_keys_count, const logger::ThreadId parent_thread_id)
 
virtual int initHashTableForDevice (const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_buckets, const HashType layout, const Data_Namespace::MemoryLevel effective_memory_level, const size_t entry_count, const size_t emitted_keys_count, const int device_id)
 
llvm::Value * hashPtr (const size_t index)
 
std::shared_ptr< HashTableinitHashTableOnCpuFromCache (QueryPlanHash key, CacheItemType item_type, DeviceIdentifier device_identifier)
 
void putHashTableOnCpuToCache (QueryPlanHash key, CacheItemType item_type, std::shared_ptr< HashTable > hashtable_ptr, DeviceIdentifier device_identifier, size_t hashtable_building_time)
 
std::pair< std::optional
< size_t >, size_t > 
getApproximateTupleCountFromCache (QueryPlanHash key, CacheItemType item_type, DeviceIdentifier device_identifier) const
 
bool isBitwiseEq () const override
 

Static Protected Member Functions

static int getInnerTableId (const std::vector< InnerOuter > &inner_outer_pairs)
 
static QueryPlanHash getAlternativeCacheKey (AlternativeCacheKeyForBaselineHashJoin &info)
 

Protected Attributes

const std::shared_ptr
< Analyzer::BinOper
condition_
 
const JoinType join_type_
 
const std::vector
< InputTableInfo > & 
query_infos_
 
const Data_Namespace::MemoryLevel memory_level_
 
Executorexecutor_
 
ColumnCacheMapcolumn_cache_
 
std::mutex cpu_hash_table_buff_mutex_
 
std::vector< InnerOuterinner_outer_pairs_
 
const Catalog_Namespace::Catalogcatalog_
 
const int device_count_
 
bool needs_dict_translation_
 
std::optional< HashTypelayout_override_
 
const TableIdToNodeMap table_id_to_node_map_
 
QueryPlanHash hashtable_cache_key_
 
HashtableCacheMetaInfo hashtable_cache_meta_info_
 
- Protected Attributes inherited from HashJoin
std::vector< std::shared_ptr
< HashTable > > 
hash_tables_for_device_
 

Static Protected Attributes

static std::unique_ptr
< HashtableRecycler
hash_table_cache_
 
static std::unique_ptr
< HashingSchemeRecycler
hash_table_layout_cache_
 

Detailed Description

Definition at line 46 of file BaselineJoinHashTable.h.

Constructor & Destructor Documentation

virtual BaselineJoinHashTable::~BaselineJoinHashTable ( )
inlinevirtual

Definition at line 119 of file BaselineJoinHashTable.h.

119 {}
BaselineJoinHashTable::BaselineJoinHashTable ( const std::shared_ptr< Analyzer::BinOper condition,
const JoinType  join_type,
const std::vector< InputTableInfo > &  query_infos,
const Data_Namespace::MemoryLevel  memory_level,
ColumnCacheMap column_cache,
Executor executor,
const std::vector< InnerOuter > &  inner_outer_pairs,
const int  device_count,
QueryPlanHash  hashtable_cache_key,
HashtableCacheMetaInfo  hashtable_cache_meta_info,
const TableIdToNodeMap table_id_to_node_map 
)
protected

Definition at line 114 of file BaselineJoinHashTable.cpp.

References CHECK_GT, device_count_, and HashJoin::hash_tables_for_device_.

Referenced by getInstance().

126  : condition_(condition)
127  , join_type_(join_type)
128  , query_infos_(query_infos)
129  , memory_level_(memory_level)
130  , executor_(executor)
131  , column_cache_(column_cache)
132  , inner_outer_pairs_(inner_outer_pairs)
133  , catalog_(executor->getCatalog())
134  , device_count_(device_count)
135  , needs_dict_translation_(false)
136  , table_id_to_node_map_(table_id_to_node_map)
137  , hashtable_cache_key_(hashtable_cache_key)
138  , hashtable_cache_meta_info_(hashtable_cache_meta_info) {
140  hash_tables_for_device_.resize(std::max(device_count_, 1));
141 }
std::vector< std::shared_ptr< HashTable > > hash_tables_for_device_
Definition: HashJoin.h:296
const TableIdToNodeMap table_id_to_node_map_
#define CHECK_GT(x, y)
Definition: Logger.h:223
const std::vector< InputTableInfo > & query_infos_
std::vector< InnerOuter > inner_outer_pairs_
ColumnCacheMap & column_cache_
HashtableCacheMetaInfo hashtable_cache_meta_info_
const Catalog_Namespace::Catalog * catalog_
const Data_Namespace::MemoryLevel memory_level_
const std::shared_ptr< Analyzer::BinOper > condition_

+ Here is the caller graph for this function:

Member Function Documentation

std::pair< size_t, size_t > BaselineJoinHashTable::approximateTupleCount ( const std::vector< ColumnsForDevice > &  columns_per_device,
QueryPlanHash  key,
CacheItemType  item_type,
DeviceIdentifier  device_identifier 
) const
protectedvirtual

Definition at line 359 of file BaselineJoinHashTable.cpp.

References approximate_distinct_tuples(), approximate_distinct_tuples_on_device(), threading_serial::async(), Bitmap, CHECK, CHECK_EQ, CPU, Data_Namespace::CPU_LEVEL, cpu_threads(), device_count_, executor_, getApproximateTupleCountFromCache(), HashJoin::getCompositeKeyInfo(), getEffectiveMemoryLevel(), GPU, Data_Namespace::GPU_LEVEL, hll_size(), hll_unify(), i, inner_outer_pairs_, transfer_flat_object_to_gpu(), transfer_vector_of_flat_objects_to_gpu(), UNREACHABLE, and VLOG.

Referenced by reifyWithLayout().

363  {
364  const auto effective_memory_level = getEffectiveMemoryLevel(inner_outer_pairs_);
365  CountDistinctDescriptor count_distinct_desc{
367  0,
368  11,
369  true,
370  effective_memory_level == Data_Namespace::MemoryLevel::GPU_LEVEL
373  1};
374  const auto padded_size_bytes = count_distinct_desc.bitmapPaddedSizeBytes();
375 
376  CHECK(!columns_per_device.empty() && !columns_per_device.front().join_columns.empty());
377 
378  if (effective_memory_level == Data_Namespace::MemoryLevel::CPU_LEVEL) {
379  const auto composite_key_info =
381  const auto cached_count_info =
382  getApproximateTupleCountFromCache(key, item_type, device_identifier);
383  if (cached_count_info.first) {
384  VLOG(1) << "Using a cached tuple count: " << *cached_count_info.first
385  << ", emitted keys count: " << cached_count_info.second;
386  return std::make_pair(*cached_count_info.first, cached_count_info.second);
387  }
388  int thread_count = cpu_threads();
389  std::vector<uint8_t> hll_buffer_all_cpus(thread_count * padded_size_bytes);
390  auto hll_result = &hll_buffer_all_cpus[0];
391 
392  approximate_distinct_tuples(hll_result,
393  count_distinct_desc.bitmap_sz_bits,
394  padded_size_bytes,
395  columns_per_device.front().join_columns,
396  columns_per_device.front().join_column_types,
397  thread_count);
398  for (int i = 1; i < thread_count; ++i) {
399  hll_unify(hll_result,
400  hll_result + i * padded_size_bytes,
401  1 << count_distinct_desc.bitmap_sz_bits);
402  }
403  return std::make_pair(hll_size(hll_result, count_distinct_desc.bitmap_sz_bits), 0);
404  }
405 #ifdef HAVE_CUDA
406  auto data_mgr = executor_->getDataMgr();
407  std::vector<std::vector<uint8_t>> host_hll_buffers(device_count_);
408  for (auto& host_hll_buffer : host_hll_buffers) {
409  host_hll_buffer.resize(count_distinct_desc.bitmapPaddedSizeBytes());
410  }
411  std::vector<std::future<void>> approximate_distinct_device_threads;
412  for (int device_id = 0; device_id < device_count_; ++device_id) {
413  approximate_distinct_device_threads.emplace_back(std::async(
415  [device_id,
416  &columns_per_device,
417  &count_distinct_desc,
418  data_mgr,
419  &host_hll_buffers] {
420  auto allocator = data_mgr->createGpuAllocator(device_id);
421  auto device_hll_buffer =
422  allocator->alloc(count_distinct_desc.bitmapPaddedSizeBytes());
423  data_mgr->getCudaMgr()->zeroDeviceMem(
424  device_hll_buffer, count_distinct_desc.bitmapPaddedSizeBytes(), device_id);
425  const auto& columns_for_device = columns_per_device[device_id];
426  auto join_columns_gpu = transfer_vector_of_flat_objects_to_gpu(
427  columns_for_device.join_columns, *allocator);
428  auto join_column_types_gpu = transfer_vector_of_flat_objects_to_gpu(
429  columns_for_device.join_column_types, *allocator);
430  const auto key_handler =
431  GenericKeyHandler(columns_for_device.join_columns.size(),
432  true,
433  join_columns_gpu,
434  join_column_types_gpu,
435  nullptr,
436  nullptr);
437  const auto key_handler_gpu =
438  transfer_flat_object_to_gpu(key_handler, *allocator);
440  reinterpret_cast<uint8_t*>(device_hll_buffer),
441  count_distinct_desc.bitmap_sz_bits,
442  key_handler_gpu,
443  columns_for_device.join_columns[0].num_elems);
444 
445  auto& host_hll_buffer = host_hll_buffers[device_id];
446  allocator->copyFromDevice(&host_hll_buffer[0],
447  device_hll_buffer,
448  count_distinct_desc.bitmapPaddedSizeBytes());
449  }));
450  }
451  for (auto& child : approximate_distinct_device_threads) {
452  child.get();
453  }
454  CHECK_EQ(Data_Namespace::MemoryLevel::GPU_LEVEL, effective_memory_level);
455  auto& result_hll_buffer = host_hll_buffers.front();
456  auto hll_result = reinterpret_cast<int32_t*>(&result_hll_buffer[0]);
457  for (int device_id = 1; device_id < device_count_; ++device_id) {
458  auto& host_hll_buffer = host_hll_buffers[device_id];
459  hll_unify(hll_result,
460  reinterpret_cast<int32_t*>(&host_hll_buffer[0]),
461  1 << count_distinct_desc.bitmap_sz_bits);
462  }
463  return std::make_pair(hll_size(hll_result, count_distinct_desc.bitmap_sz_bits), 0);
464 #else
465  UNREACHABLE();
466  return {0, 0};
467 #endif // HAVE_CUDA
468 }
#define CHECK_EQ(x, y)
Definition: Logger.h:219
Data_Namespace::MemoryLevel getEffectiveMemoryLevel(const std::vector< InnerOuter > &inner_outer_pairs) const
T * transfer_flat_object_to_gpu(const T &object, DeviceAllocator &allocator)
void hll_unify(T1 *lhs, T2 *rhs, const size_t m)
Definition: HyperLogLog.h:109
#define UNREACHABLE()
Definition: Logger.h:255
std::pair< std::optional< size_t >, size_t > getApproximateTupleCountFromCache(QueryPlanHash key, CacheItemType item_type, DeviceIdentifier device_identifier) const
size_t hll_size(const T *M, const size_t bitmap_sz_bits)
Definition: HyperLogLog.h:90
std::vector< InnerOuter > inner_outer_pairs_
void approximate_distinct_tuples(uint8_t *hll_buffer_all_cpus, const uint32_t b, const size_t padded_size_bytes, const std::vector< JoinColumn > &join_column_per_key, const std::vector< JoinColumnTypeInfo > &type_info_per_key, const int thread_count)
future< Result > async(Fn &&fn, Args &&...args)
static CompositeKeyInfo getCompositeKeyInfo(const std::vector< InnerOuter > &inner_outer_pairs, const Executor *executor)
Definition: HashJoin.cpp:334
void approximate_distinct_tuples_on_device(uint8_t *hll_buffer, const uint32_t b, const GenericKeyHandler *key_handler, const int64_t num_elems)
#define CHECK(condition)
Definition: Logger.h:211
T * transfer_vector_of_flat_objects_to_gpu(const std::vector< T > &vec, DeviceAllocator &allocator)
int cpu_threads()
Definition: thread_count.h:24
#define VLOG(n)
Definition: Logger.h:305

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * BaselineJoinHashTable::codegenKey ( const CompilationOptions co)
protectedvirtual

Definition at line 847 of file BaselineJoinHashTable.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CHECK_EQ, CodeGenerator::codegen(), executor_, get_int_type(), get_max_rte_scan_table(), getKeyComponentCount(), getKeyComponentWidth(), i, inner_outer_pairs_, LL_BUILDER, LL_CONTEXT, LL_INT, and self_join_not_covered_by_left_deep_tree().

Referenced by codegenMatchingSet(), and codegenSlot().

847  {
848  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
849  const auto key_component_width = getKeyComponentWidth();
850  CHECK(key_component_width == 4 || key_component_width == 8);
851  const auto key_size_lv = LL_INT(getKeyComponentCount() * key_component_width);
852  llvm::Value* key_buff_lv{nullptr};
853  switch (key_component_width) {
854  case 4:
855  key_buff_lv =
856  LL_BUILDER.CreateAlloca(llvm::Type::getInt32Ty(LL_CONTEXT), key_size_lv);
857  break;
858  case 8:
859  key_buff_lv =
860  LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT), key_size_lv);
861  break;
862  default:
863  CHECK(false);
864  }
865 
866  CodeGenerator code_generator(executor_);
867  for (size_t i = 0; i < getKeyComponentCount(); ++i) {
868  const auto key_comp_dest_lv = LL_BUILDER.CreateGEP(key_buff_lv, LL_INT(i));
869  const auto& inner_outer_pair = inner_outer_pairs_[i];
870  const auto outer_col = inner_outer_pair.second;
871  const auto key_col_var = dynamic_cast<const Analyzer::ColumnVar*>(outer_col);
872  const auto val_col_var =
873  dynamic_cast<const Analyzer::ColumnVar*>(inner_outer_pair.first);
874  if (key_col_var && val_col_var &&
876  key_col_var,
877  val_col_var,
878  get_max_rte_scan_table(executor_->cgen_state_->scan_idx_to_hash_pos_))) {
879  throw std::runtime_error(
880  "Query execution fails because the query contains not supported self-join "
881  "pattern. We suspect the query requires multiple left-deep join tree due to "
882  "the join condition of the self-join and is not supported for now. Please "
883  "consider rewriting table order in "
884  "FROM clause.");
885  }
886  const auto col_lvs = code_generator.codegen(outer_col, true, co);
887  CHECK_EQ(size_t(1), col_lvs.size());
888  const auto col_lv = LL_BUILDER.CreateSExt(
889  col_lvs.front(), get_int_type(key_component_width * 8, LL_CONTEXT));
890  LL_BUILDER.CreateStore(col_lv, key_comp_dest_lv);
891  }
892  return key_buff_lv;
893 }
#define CHECK_EQ(x, y)
Definition: Logger.h:219
bool self_join_not_covered_by_left_deep_tree(const Analyzer::ColumnVar *key_side, const Analyzer::ColumnVar *val_side, const int max_rte_covered)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
const int get_max_rte_scan_table(std::unordered_map< int, llvm::Value * > &scan_idx_to_hash_pos)
std::vector< InnerOuter > inner_outer_pairs_
#define LL_INT(v)
#define LL_CONTEXT
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define LL_BUILDER
#define CHECK(condition)
Definition: Logger.h:211
virtual size_t getKeyComponentCount() const
virtual size_t getKeyComponentWidth() const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

HashJoinMatchingSet BaselineJoinHashTable::codegenMatchingSet ( const CompilationOptions co,
const size_t  index 
)
overridevirtual

Implements HashJoin.

Definition at line 765 of file BaselineJoinHashTable.cpp.

References AUTOMATIC_IR_METADATA, CHECK, HashJoin::codegenHashTableLoad(), codegenKey(), HashJoin::codegenMatchingSet(), executor_, get_int_type(), getComponentBufferSize(), HashJoin::getHashTableForDevice(), getHashType(), getKeyComponentCount(), getKeyComponentWidth(), LL_BUILDER, LL_CONTEXT, LL_INT, offsetBufferOff(), OneToMany, and to_string().

767  {
768  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
769  const auto hash_table = getHashTableForDevice(size_t(0));
770  CHECK(hash_table);
771  const auto key_component_width = getKeyComponentWidth();
772  CHECK(key_component_width == 4 || key_component_width == 8);
773  auto key_buff_lv = codegenKey(co);
775  auto hash_ptr = HashJoin::codegenHashTableLoad(index, executor_);
776  const auto composite_dict_ptr_type =
777  llvm::Type::getIntNPtrTy(LL_CONTEXT, key_component_width * 8);
778  const auto composite_key_dict =
779  hash_ptr->getType()->isPointerTy()
780  ? LL_BUILDER.CreatePointerCast(hash_ptr, composite_dict_ptr_type)
781  : LL_BUILDER.CreateIntToPtr(hash_ptr, composite_dict_ptr_type);
782  const auto key_component_count = getKeyComponentCount();
783  const auto key = executor_->cgen_state_->emitExternalCall(
784  "get_composite_key_index_" + std::to_string(key_component_width * 8),
786  {key_buff_lv,
787  LL_INT(key_component_count),
788  composite_key_dict,
789  LL_INT(hash_table->getEntryCount())});
790  auto one_to_many_ptr = hash_ptr;
791  if (one_to_many_ptr->getType()->isPointerTy()) {
792  one_to_many_ptr =
793  LL_BUILDER.CreatePtrToInt(hash_ptr, llvm::Type::getInt64Ty(LL_CONTEXT));
794  } else {
795  CHECK(one_to_many_ptr->getType()->isIntegerTy(64));
796  }
797  const auto composite_key_dict_size = offsetBufferOff();
798  one_to_many_ptr =
799  LL_BUILDER.CreateAdd(one_to_many_ptr, LL_INT(composite_key_dict_size));
801  {one_to_many_ptr, key, LL_INT(int64_t(0)), LL_INT(hash_table->getEntryCount() - 1)},
802  false,
803  false,
804  false,
806  executor_);
807 }
size_t offsetBufferOff() const noexceptoverride
virtual HashJoinMatchingSet codegenMatchingSet(const CompilationOptions &, const size_t)=0
static llvm::Value * codegenHashTableLoad(const size_t table_idx, Executor *executor)
Definition: HashJoin.cpp:215
size_t getComponentBufferSize() const noexceptoverride
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
std::string to_string(char const *&&v)
virtual llvm::Value * codegenKey(const CompilationOptions &)
HashType getHashType() const noexceptoverride
#define LL_INT(v)
#define LL_CONTEXT
#define AUTOMATIC_IR_METADATA(CGENSTATE)
HashTable * getHashTableForDevice(const size_t device_id) const
Definition: HashJoin.h:241
#define LL_BUILDER
#define CHECK(condition)
Definition: Logger.h:211
virtual size_t getKeyComponentCount() const
virtual size_t getKeyComponentWidth() const

+ Here is the call graph for this function:

llvm::Value * BaselineJoinHashTable::codegenSlot ( const CompilationOptions co,
const size_t  index 
)
overridevirtual

Implements HashJoin.

Definition at line 747 of file BaselineJoinHashTable.cpp.

References AUTOMATIC_IR_METADATA, CHECK, codegenKey(), executor_, get_int_type(), HashJoin::getHashTableForDevice(), getHashType(), getKeyComponentCount(), getKeyComponentWidth(), hashPtr(), LL_BUILDER, LL_CONTEXT, LL_INT, OneToOne, and to_string().

748  {
749  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
751  const auto key_component_width = getKeyComponentWidth();
752  CHECK(key_component_width == 4 || key_component_width == 8);
753  auto key_buff_lv = codegenKey(co);
754  const auto hash_ptr = hashPtr(index);
755  const auto key_ptr_lv =
756  LL_BUILDER.CreatePointerCast(key_buff_lv, llvm::Type::getInt8PtrTy(LL_CONTEXT));
757  const auto key_size_lv = LL_INT(getKeyComponentCount() * key_component_width);
758  const auto hash_table = getHashTableForDevice(size_t(0));
759  return executor_->cgen_state_->emitExternalCall(
760  "baseline_hash_join_idx_" + std::to_string(key_component_width * 8),
762  {hash_ptr, key_ptr_lv, key_size_lv, LL_INT(hash_table->getEntryCount())});
763 }
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
std::string to_string(char const *&&v)
virtual llvm::Value * codegenKey(const CompilationOptions &)
HashType getHashType() const noexceptoverride
#define LL_INT(v)
#define LL_CONTEXT
#define AUTOMATIC_IR_METADATA(CGENSTATE)
HashTable * getHashTableForDevice(const size_t device_id) const
Definition: HashJoin.h:241
#define LL_BUILDER
llvm::Value * hashPtr(const size_t index)
#define CHECK(condition)
Definition: Logger.h:211
virtual size_t getKeyComponentCount() const
virtual size_t getKeyComponentWidth() const

+ Here is the call graph for this function:

size_t BaselineJoinHashTable::countBufferOff ( ) const
overridevirtualnoexcept

Implements HashJoin.

Definition at line 813 of file BaselineJoinHashTable.cpp.

References getComponentBufferSize(), getHashType(), getKeyBufferSize(), HashJoin::layoutRequiresAdditionalBuffers(), and offsetBufferOff().

Referenced by payloadBufferOff(), toSet(), and toString().

813  {
816  } else {
817  return getKeyBufferSize();
818  }
819 }
size_t offsetBufferOff() const noexceptoverride
size_t getKeyBufferSize() const noexcept
size_t getComponentBufferSize() const noexceptoverride
HashType getHashType() const noexceptoverride
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
Definition: HashJoin.h:130

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ColumnsForDevice BaselineJoinHashTable::fetchColumnsForDevice ( const std::vector< Fragmenter_Namespace::FragmentInfo > &  fragments,
const int  device_id,
DeviceAllocator dev_buff_owner 
)
protectedvirtual

Definition at line 470 of file BaselineJoinHashTable.cpp.

References catalog_, column_cache_, executor_, HashJoin::fetchJoinColumn(), get_column_descriptor_maybe(), get_join_column_type_kind(), getEffectiveMemoryLevel(), inline_fixed_encoding_null_val(), inner_outer_pairs_, and isBitwiseEq().

Referenced by reifyWithLayout().

473  {
474  const auto effective_memory_level = getEffectiveMemoryLevel(inner_outer_pairs_);
475 
476  std::vector<JoinColumn> join_columns;
477  std::vector<std::shared_ptr<Chunk_NS::Chunk>> chunks_owner;
478  std::vector<JoinColumnTypeInfo> join_column_types;
479  std::vector<JoinBucketInfo> join_bucket_info;
480  std::vector<std::shared_ptr<void>> malloc_owner;
481  for (const auto& inner_outer_pair : inner_outer_pairs_) {
482  const auto inner_col = inner_outer_pair.first;
483  const auto inner_cd = get_column_descriptor_maybe(
484  inner_col->get_column_id(), inner_col->get_table_id(), *catalog_);
485  if (inner_cd && inner_cd->isVirtualCol) {
487  }
488  join_columns.emplace_back(fetchJoinColumn(inner_col,
489  fragments,
490  effective_memory_level,
491  device_id,
492  chunks_owner,
493  dev_buff_owner,
494  malloc_owner,
495  executor_,
496  &column_cache_));
497  const auto& ti = inner_col->get_type_info();
498  join_column_types.emplace_back(JoinColumnTypeInfo{static_cast<size_t>(ti.get_size()),
499  0,
500  0,
502  isBitwiseEq(),
503  0,
505  }
506  return {join_columns, join_column_types, chunks_owner, join_bucket_info, malloc_owner};
507 }
Data_Namespace::MemoryLevel getEffectiveMemoryLevel(const std::vector< InnerOuter > &inner_outer_pairs) const
JoinColumn fetchJoinColumn(const Analyzer::ColumnVar *hash_col, const std::vector< Fragmenter_Namespace::FragmentInfo > &fragment_info, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks_owner, DeviceAllocator *dev_buff_owner, std::vector< std::shared_ptr< void >> &malloc_owner, Executor *executor, ColumnCacheMap *column_cache)
Definition: HashJoin.cpp:54
std::vector< InnerOuter > inner_outer_pairs_
const ColumnDescriptor * get_column_descriptor_maybe(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
Definition: Execute.h:220
ColumnCacheMap & column_cache_
const Catalog_Namespace::Catalog * catalog_
ColumnType get_join_column_type_kind(const SQLTypeInfo &ti)
bool isBitwiseEq() const override
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

static QueryPlanHash BaselineJoinHashTable::getAlternativeCacheKey ( AlternativeCacheKeyForBaselineHashJoin info)
inlinestaticprotected

Definition at line 210 of file BaselineJoinHashTable.h.

References BaselineJoinHashTable::AlternativeCacheKeyForBaselineHashJoin::inner_outer_pairs, BaselineJoinHashTable::AlternativeCacheKeyForBaselineHashJoin::join_type, BaselineJoinHashTable::AlternativeCacheKeyForBaselineHashJoin::num_elements, BaselineJoinHashTable::AlternativeCacheKeyForBaselineHashJoin::optype, and toString().

Referenced by reifyWithLayout().

211  {
212  auto hash = boost::hash_value(::toString(info.optype));
213  for (InnerOuter inner_outer : info.inner_outer_pairs) {
214  auto inner_col = inner_outer.first;
215  auto rhs_col_var = dynamic_cast<const Analyzer::ColumnVar*>(inner_outer.second);
216  auto outer_col = rhs_col_var ? rhs_col_var : inner_col;
217  boost::hash_combine(hash, inner_col->toString());
218  if (inner_col->get_type_info().is_string()) {
219  boost::hash_combine(hash, outer_col->toString());
220  }
221  }
222  boost::hash_combine(hash, info.num_elements);
223  boost::hash_combine(hash, ::toString(info.join_type));
224  return hash;
225  }
std::string toString(const ExecutorDeviceType device_type, const int device_id=0, bool raw=false) const override
std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > InnerOuter
Definition: HashJoin.h:77

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::pair< std::optional< size_t >, size_t > BaselineJoinHashTable::getApproximateTupleCountFromCache ( QueryPlanHash  key,
CacheItemType  item_type,
DeviceIdentifier  device_identifier 
) const
protected

Definition at line 969 of file BaselineJoinHashTable.cpp.

References CHECK, getInnerTableId(), hash_table_cache_, inner_outer_pairs_, HashtableRecycler::isSafeToCacheHashtable(), needs_dict_translation_, and table_id_to_node_map_.

Referenced by approximateTupleCount().

972  {
977  auto hash_table_ptr =
978  hash_table_cache_->getItemFromCache(key, item_type, device_identifier);
979  if (hash_table_ptr) {
980  return std::make_pair(hash_table_ptr->getEntryCount() / 2,
981  hash_table_ptr->getEmittedKeysCount());
982  }
983  }
984  return std::make_pair(std::nullopt, 0);
985 }
static bool isSafeToCacheHashtable(const TableIdToNodeMap &table_id_to_node_map, bool need_dict_translation, const int table_id)
const TableIdToNodeMap table_id_to_node_map_
std::vector< InnerOuter > inner_outer_pairs_
static std::unique_ptr< HashtableRecycler > hash_table_cache_
int getInnerTableId() const noexceptoverride
#define CHECK(condition)
Definition: Logger.h:211

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

static auto BaselineJoinHashTable::getCacheInvalidator ( ) -> std::function<void()>
inlinestatic

Definition at line 98 of file BaselineJoinHashTable.h.

References CHECK, hash_table_cache_, and hash_table_layout_cache_.

98  {
101  return []() -> void {
102  auto layout_cache_invalidator = hash_table_layout_cache_->getCacheInvalidator();
103  layout_cache_invalidator();
104 
105  auto main_cache_invalidator = hash_table_cache_->getCacheInvalidator();
106  main_cache_invalidator();
107  };
108  }
static std::unique_ptr< HashtableRecycler > hash_table_cache_
#define CHECK(condition)
Definition: Logger.h:211
static std::unique_ptr< HashingSchemeRecycler > hash_table_layout_cache_
size_t BaselineJoinHashTable::getComponentBufferSize ( ) const
overrideprotectedvirtualnoexcept

Implements HashJoin.

Definition at line 842 of file BaselineJoinHashTable.cpp.

References HashJoin::getHashTableForDevice().

Referenced by codegenMatchingSet(), countBufferOff(), and payloadBufferOff().

842  {
843  const auto hash_table = getHashTableForDevice(size_t(0));
844  return hash_table->getEntryCount() * sizeof(int32_t);
845 }
HashTable * getHashTableForDevice(const size_t device_id) const
Definition: HashJoin.h:241

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int BaselineJoinHashTable::getDeviceCount ( ) const
inlineoverridevirtualnoexcept

Implements HashJoin.

Definition at line 88 of file BaselineJoinHashTable.h.

References device_count_.

88 { return device_count_; };
Data_Namespace::MemoryLevel BaselineJoinHashTable::getEffectiveMemoryLevel ( const std::vector< InnerOuter > &  inner_outer_pairs) const
protected

Definition at line 556 of file BaselineJoinHashTable.cpp.

References Data_Namespace::CPU_LEVEL, executor_, memory_level_, needs_dict_translation_, and needs_dictionary_translation().

Referenced by approximateTupleCount(), fetchColumnsForDevice(), and reifyForDevice().

557  {
558  for (const auto& inner_outer_pair : inner_outer_pairs) {
560  inner_outer_pair.first, inner_outer_pair.second, executor_)) {
563  }
564  }
565  return memory_level_;
566 }
bool needs_dictionary_translation(const Analyzer::ColumnVar *inner_col, const Analyzer::Expr *outer_col_expr, const Executor *executor)
const Data_Namespace::MemoryLevel memory_level_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

static HashingSchemeRecycler* BaselineJoinHashTable::getHashingSchemeCache ( )
inlinestatic

Definition at line 114 of file BaselineJoinHashTable.h.

References CHECK, and hash_table_layout_cache_.

114  {
116  return hash_table_layout_cache_.get();
117  }
#define CHECK(condition)
Definition: Logger.h:211
static std::unique_ptr< HashingSchemeRecycler > hash_table_layout_cache_
std::string BaselineJoinHashTable::getHashJoinType ( ) const
inlinefinalvirtual

Implements HashJoin.

Definition at line 96 of file BaselineJoinHashTable.h.

96 { return "Baseline"; }
static HashtableRecycler* BaselineJoinHashTable::getHashTableCache ( )
inlinestatic

Definition at line 110 of file BaselineJoinHashTable.h.

References CHECK, and hash_table_cache_.

Referenced by QueryRunner::QueryRunner::getCachedBaselineHashTable(), QueryRunner::QueryRunner::getCachedHashtableWithoutCacheKey(), QueryRunner::QueryRunner::getCacheItemMetric(), QueryRunner::QueryRunner::getEntryCntCachedBaselineHashTable(), and QueryRunner::QueryRunner::getNumberOfCachedBaselineJoinHashTables().

110  {
112  return hash_table_cache_.get();
113  }
static std::unique_ptr< HashtableRecycler > hash_table_cache_
#define CHECK(condition)
Definition: Logger.h:211

+ Here is the caller graph for this function:

HashType BaselineJoinHashTable::getHashType ( ) const
overridevirtualnoexcept

Implements HashJoin.

Definition at line 924 of file BaselineJoinHashTable.cpp.

References CHECK, HashJoin::getHashTableForDevice(), and layout_override_.

Referenced by codegenMatchingSet(), codegenSlot(), countBufferOff(), payloadBufferOff(), and toString().

924  {
925  auto hash_table = getHashTableForDevice(size_t(0));
926  CHECK(hash_table);
927  if (layout_override_) {
928  return *layout_override_;
929  } else {
930  return hash_table->getLayout();
931  }
932 }
HashTable * getHashTableForDevice(const size_t device_id) const
Definition: HashJoin.h:241
std::optional< HashType > layout_override_
#define CHECK(condition)
Definition: Logger.h:211

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int BaselineJoinHashTable::getInnerTableId ( ) const
overridevirtualnoexcept

Implements HashJoin.

Definition at line 909 of file BaselineJoinHashTable.cpp.

References CHECK, and inner_outer_pairs_.

Referenced by getApproximateTupleCountFromCache(), initHashTableForDevice(), reify(), and reifyWithLayout().

909  {
910  try {
912  } catch (...) {
913  CHECK(false);
914  }
915  return 0;
916 }
std::vector< InnerOuter > inner_outer_pairs_
int getInnerTableId() const noexceptoverride
#define CHECK(condition)
Definition: Logger.h:211

+ Here is the caller graph for this function:

int BaselineJoinHashTable::getInnerTableId ( const std::vector< InnerOuter > &  inner_outer_pairs)
staticprotected

Definition at line 934 of file BaselineJoinHashTable.cpp.

References CHECK.

935  {
936  CHECK(!inner_outer_pairs.empty());
937  const auto first_inner_col = inner_outer_pairs.front().first;
938  return first_inner_col->get_table_id();
939 }
#define CHECK(condition)
Definition: Logger.h:211
int BaselineJoinHashTable::getInnerTableRteIdx ( ) const
overridevirtualnoexcept

Implements HashJoin.

Definition at line 918 of file BaselineJoinHashTable.cpp.

References CHECK, and inner_outer_pairs_.

918  {
919  CHECK(!inner_outer_pairs_.empty());
920  const auto first_inner_col = inner_outer_pairs_.front().first;
921  return first_inner_col->get_rte_idx();
922 }
std::vector< InnerOuter > inner_outer_pairs_
#define CHECK(condition)
Definition: Logger.h:211
std::shared_ptr< BaselineJoinHashTable > BaselineJoinHashTable::getInstance ( const std::shared_ptr< Analyzer::BinOper condition,
const std::vector< InputTableInfo > &  query_infos,
const Data_Namespace::MemoryLevel  memory_level,
const JoinType  join_type,
const HashType  preferred_hash_type,
const int  device_count,
ColumnCacheMap column_cache,
Executor executor,
const HashTableBuildDagMap hashtable_build_dag_map,
const TableIdToNodeMap table_id_to_node_map 
)
static

Make hash table from an in-flight SQL query's parse tree etc.

Definition at line 41 of file BaselineJoinHashTable.cpp.

References BaselineJoinHashTable(), HashtableRecycler::getHashtableCacheKey(), HashJoin::getHashTypeString(), HashJoin::normalizeColumnPairs(), VLOG, and VLOGGING.

Referenced by HashJoin::getInstance().

51  {
52  decltype(std::chrono::steady_clock::now()) ts1, ts2;
53 
54  if (VLOGGING(1)) {
55  VLOG(1) << "Building keyed hash table " << getHashTypeString(preferred_hash_type)
56  << " for qual: " << condition->toString();
57  ts1 = std::chrono::steady_clock::now();
58  }
59  auto inner_outer_pairs = HashJoin::normalizeColumnPairs(
60  condition.get(), *executor->getCatalog(), executor->getTemporaryTables());
61  auto hashtable_cache_key =
63  condition->get_optype(),
64  join_type,
65  hashtable_build_dag_map,
66  executor);
67  auto join_hash_table = std::shared_ptr<BaselineJoinHashTable>(
68  new BaselineJoinHashTable(condition,
69  join_type,
70  query_infos,
71  memory_level,
72  column_cache,
73  executor,
74  inner_outer_pairs,
75  device_count,
76  hashtable_cache_key.first,
77  hashtable_cache_key.second,
78  table_id_to_node_map));
79  try {
80  join_hash_table->reify(preferred_hash_type);
81  } catch (const TableMustBeReplicated& e) {
82  // Throw a runtime error to abort the query
83  join_hash_table->freeHashBufferMemory();
84  throw std::runtime_error(e.what());
85  } catch (const HashJoinFail& e) {
86  // HashJoinFail exceptions log an error and trigger a retry with a join loop (if
87  // possible)
88  join_hash_table->freeHashBufferMemory();
89  throw HashJoinFail(std::string("Could not build a 1-to-1 correspondence for columns "
90  "involved in equijoin | ") +
91  e.what());
92  } catch (const ColumnarConversionNotSupported& e) {
93  throw HashJoinFail(std::string("Could not build hash tables for equijoin | ") +
94  e.what());
95  } catch (const OutOfMemory& e) {
96  throw HashJoinFail(
97  std::string("Ran out of memory while building hash tables for equijoin | ") +
98  e.what());
99  } catch (const std::exception& e) {
100  throw std::runtime_error(
101  std::string("Fatal error while attempting to build hash tables for join: ") +
102  e.what());
103  }
104  if (VLOGGING(1)) {
105  ts2 = std::chrono::steady_clock::now();
106  VLOG(1) << "Built keyed hash table "
107  << getHashTypeString(join_hash_table->getHashType()) << " in "
108  << std::chrono::duration_cast<std::chrono::milliseconds>(ts2 - ts1).count()
109  << " ms";
110  }
111  return join_hash_table;
112 }
static std::pair< QueryPlanHash, HashtableCacheMetaInfo > getHashtableCacheKey(const std::vector< InnerOuter > &inner_outer_pairs, const SQLOps op_type, const JoinType join_type, const HashTableBuildDagMap &hashtable_build_dag_map, Executor *executor)
BaselineJoinHashTable(const std::shared_ptr< Analyzer::BinOper > condition, const JoinType join_type, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, ColumnCacheMap &column_cache, Executor *executor, const std::vector< InnerOuter > &inner_outer_pairs, const int device_count, QueryPlanHash hashtable_cache_key, HashtableCacheMetaInfo hashtable_cache_meta_info, const TableIdToNodeMap &table_id_to_node_map)
#define VLOGGING(n)
Definition: Logger.h:209
static std::string getHashTypeString(HashType ht) noexcept
Definition: HashJoin.h:134
static std::vector< InnerOuter > normalizeColumnPairs(const Analyzer::BinOper *condition, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables)
Definition: HashJoin.cpp:744
if(yyssp >=yyss+yystacksize-1)
#define VLOG(n)
Definition: Logger.h:305

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t BaselineJoinHashTable::getKeyBufferSize ( ) const
protectednoexcept

Definition at line 829 of file BaselineJoinHashTable.cpp.

References CHECK, HashJoin::getHashTableForDevice(), getKeyComponentCount(), getKeyComponentWidth(), and HashJoin::layoutRequiresAdditionalBuffers().

Referenced by countBufferOff(), offsetBufferOff(), and payloadBufferOff().

829  {
830  const auto key_component_width = getKeyComponentWidth();
831  CHECK(key_component_width == 4 || key_component_width == 8);
832  const auto key_component_count = getKeyComponentCount();
833  auto hash_table = getHashTableForDevice(size_t(0));
834  CHECK(hash_table);
835  if (layoutRequiresAdditionalBuffers(hash_table->getLayout())) {
836  return hash_table->getEntryCount() * key_component_count * key_component_width;
837  } else {
838  return hash_table->getEntryCount() * (key_component_count + 1) * key_component_width;
839  }
840 }
HashTable * getHashTableForDevice(const size_t device_id) const
Definition: HashJoin.h:241
#define CHECK(condition)
Definition: Logger.h:211
virtual size_t getKeyComponentCount() const
virtual size_t getKeyComponentWidth() const
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
Definition: HashJoin.h:130

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t BaselineJoinHashTable::getKeyComponentCount ( ) const
protectedvirtual

Definition at line 552 of file BaselineJoinHashTable.cpp.

References inner_outer_pairs_.

Referenced by codegenKey(), codegenMatchingSet(), codegenSlot(), getKeyBufferSize(), initHashTableForDevice(), toSet(), and toString().

552  {
553  return inner_outer_pairs_.size();
554 }
std::vector< InnerOuter > inner_outer_pairs_

+ Here is the caller graph for this function:

size_t BaselineJoinHashTable::getKeyComponentWidth ( ) const
protectedvirtual

Definition at line 540 of file BaselineJoinHashTable.cpp.

References CHECK_EQ, and inner_outer_pairs_.

Referenced by codegenKey(), codegenMatchingSet(), codegenSlot(), getKeyBufferSize(), initHashTableForDevice(), toSet(), and toString().

540  {
541  for (const auto& inner_outer_pair : inner_outer_pairs_) {
542  const auto inner_col = inner_outer_pair.first;
543  const auto& inner_col_ti = inner_col->get_type_info();
544  if (inner_col_ti.get_logical_size() > 4) {
545  CHECK_EQ(8, inner_col_ti.get_logical_size());
546  return 8;
547  }
548  }
549  return 4;
550 }
#define CHECK_EQ(x, y)
Definition: Logger.h:219
std::vector< InnerOuter > inner_outer_pairs_

+ Here is the caller graph for this function:

Data_Namespace::MemoryLevel BaselineJoinHashTable::getMemoryLevel ( ) const
inlineoverridevirtualnoexcept

Implements HashJoin.

Definition at line 84 of file BaselineJoinHashTable.h.

References memory_level_.

84  {
85  return memory_level_;
86  };
const Data_Namespace::MemoryLevel memory_level_
size_t BaselineJoinHashTable::getShardCountForCondition ( const Analyzer::BinOper condition,
const Executor executor,
const std::vector< InnerOuter > &  inner_outer_pairs 
)
static

Definition at line 143 of file BaselineJoinHashTable.cpp.

References get_shard_count().

Referenced by RangeJoinHashTable::getInstance(), reify(), shardCount(), OverlapsJoinHashTable::shardCount(), and Executor::skipFragmentPair().

146  {
147  for (const auto& inner_outer_pair : inner_outer_pairs) {
148  const auto pair_shard_count = get_shard_count(inner_outer_pair, executor);
149  if (pair_shard_count) {
150  return pair_shard_count;
151  }
152  }
153  return 0;
154 }
size_t get_shard_count(const Analyzer::BinOper *join_condition, const Executor *executor)
Definition: HashJoin.cpp:790

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * BaselineJoinHashTable::hashPtr ( const size_t  index)
protected

Definition at line 895 of file BaselineJoinHashTable.cpp.

References AUTOMATIC_IR_METADATA, HashJoin::codegenHashTableLoad(), executor_, LL_BUILDER, and LL_CONTEXT.

Referenced by codegenSlot().

895  {
896  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
897  auto hash_ptr = HashJoin::codegenHashTableLoad(index, executor_);
898  const auto pi8_type = llvm::Type::getInt8PtrTy(LL_CONTEXT);
899  return hash_ptr->getType()->isPointerTy()
900  ? LL_BUILDER.CreatePointerCast(hash_ptr, pi8_type)
901  : LL_BUILDER.CreateIntToPtr(hash_ptr, pi8_type);
902 }
static llvm::Value * codegenHashTableLoad(const size_t table_idx, Executor *executor)
Definition: HashJoin.cpp:215
#define LL_CONTEXT
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define LL_BUILDER

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int BaselineJoinHashTable::initHashTableForDevice ( const std::vector< JoinColumn > &  join_columns,
const std::vector< JoinColumnTypeInfo > &  join_column_types,
const std::vector< JoinBucketInfo > &  join_buckets,
const HashType  layout,
const Data_Namespace::MemoryLevel  effective_memory_level,
const size_t  entry_count,
const size_t  emitted_keys_count,
const int  device_id 
)
protectedvirtual

Definition at line 568 of file BaselineJoinHashTable.cpp.

References BaselineJoinHashTableBuilder::allocateDeviceMemory(), BASELINE_HT, CHECK, CHECK_EQ, CHECK_LT, count, CPU, DataRecyclerUtil::CPU_DEVICE_IDENTIFIER, cpu_hash_table_buff_mutex_, Data_Namespace::CPU_LEVEL, DEBUG_TIMER, executor_, HashJoin::getCompositeKeyInfo(), BaselineJoinHashTableBuilder::getHashTable(), HashJoin::getHashTypeString(), getInnerTableId(), getKeyComponentCount(), getKeyComponentWidth(), Data_Namespace::GPU_LEVEL, hash_table_layout_cache_, HashJoin::hash_tables_for_device_, hashtable_cache_key_, HT_HASHING_SCHEME, BaselineJoinHashTableBuilder::initHashTableOnCpu(), initHashTableOnCpuFromCache(), BaselineJoinHashTableBuilder::initHashTableOnGpu(), inner_outer_pairs_, HashtableRecycler::isSafeToCacheHashtable(), join_type_, memory_level_, needs_dict_translation_, putHashTableOnCpuToCache(), table_id_to_node_map_, transfer_vector_of_flat_objects_to_gpu(), UNREACHABLE, and VLOG.

Referenced by reifyForDevice().

576  {
577  auto timer = DEBUG_TIMER(__func__);
578  const auto key_component_count = getKeyComponentCount();
579  int err = 0;
580  decltype(std::chrono::steady_clock::now()) ts1, ts2;
581  ts1 = std::chrono::steady_clock::now();
582  auto allow_hashtable_recycling =
583  HashtableRecycler::isSafeToCacheHashtable(table_id_to_node_map_,
586  HashType hashtable_layout = layout;
587  if (effective_memory_level == Data_Namespace::CPU_LEVEL) {
588  std::lock_guard<std::mutex> cpu_hash_table_buff_lock(cpu_hash_table_buff_mutex_);
589 
590  const auto composite_key_info =
591  HashJoin::getCompositeKeyInfo(inner_outer_pairs_, executor_);
592 
593  CHECK(!join_columns.empty());
594 
596  CHECK_EQ(device_id, size_t(0));
597  }
598  CHECK_LT(static_cast<size_t>(device_id), hash_tables_for_device_.size());
599  std::shared_ptr<HashTable> hash_table{nullptr};
600  if (allow_hashtable_recycling) {
601  auto cached_hashtable_layout_type = hash_table_layout_cache_->getItemFromCache(
605  {});
606  if (cached_hashtable_layout_type) {
607  hashtable_layout = *cached_hashtable_layout_type;
608  VLOG(1) << "Recycle hashtable layout: " << getHashTypeString(hashtable_layout);
609  }
613  }
614 
615  if (hash_table) {
616  hash_tables_for_device_[device_id] = hash_table;
617  } else {
619 
620  const auto key_handler =
621  GenericKeyHandler(key_component_count,
622  true,
623  &join_columns[0],
624  &join_column_types[0],
625  &composite_key_info.sd_inner_proxy_per_key[0],
626  &composite_key_info.sd_outer_proxy_per_key[0]);
627  err = builder.initHashTableOnCpu(&key_handler,
628  composite_key_info,
629  join_columns,
630  join_column_types,
631  join_bucket_info,
632  entry_count,
633  join_columns.front().num_elems,
634  hashtable_layout,
635  join_type_,
638  hash_tables_for_device_[device_id] = builder.getHashTable();
639  ts2 = std::chrono::steady_clock::now();
640  auto hashtable_build_time =
641  std::chrono::duration_cast<std::chrono::milliseconds>(ts2 - ts1).count();
642  if (!err && allow_hashtable_recycling && hash_tables_for_device_[device_id]) {
643  // add ht-related items to cache iff we have a valid hashtable
646  hash_tables_for_device_[device_id],
648  hashtable_build_time);
649 
650  hash_table_layout_cache_->putItemToCache(
652  hash_tables_for_device_[device_id]->getLayout(),
655  0,
656  0,
657  {});
658  }
659  }
660  // Transfer the hash table on the GPU if we've only built it on CPU
661  // but the query runs on GPU (join on dictionary encoded columns).
662  // Don't transfer the buffer if there was an error since we'll bail anyway.
663  if (memory_level_ == Data_Namespace::GPU_LEVEL && !err) {
664 #ifdef HAVE_CUDA
666 
667  builder.allocateDeviceMemory(layout,
670  entry_count,
671  emitted_keys_count,
672  device_id,
673  executor_);
674 
675  CHECK_LT(size_t(device_id), hash_tables_for_device_.size());
676  auto cpu_source_hash_table = hash_tables_for_device_[device_id];
677  CHECK(cpu_source_hash_table);
678  auto gpu_target_hash_table = builder.getHashTable();
679  CHECK(gpu_target_hash_table);
680 
681  const auto gpu_buff = gpu_target_hash_table->getGpuBuffer();
682  CHECK(gpu_buff);
683  auto data_mgr = executor_->getDataMgr();
684  auto allocator = data_mgr->createGpuAllocator(device_id);
685  allocator->copyToDevice(
686  gpu_buff,
687  cpu_source_hash_table->getCpuBuffer(),
688  cpu_source_hash_table->getHashTableBufferSize(ExecutorDeviceType::CPU));
689  hash_tables_for_device_[device_id] = std::move(gpu_target_hash_table);
690 #else
691  CHECK(false);
692 #endif
693  }
694  } else {
695 #ifdef HAVE_CUDA
697 
698  auto data_mgr = executor_->getDataMgr();
699  CudaAllocator allocator(data_mgr, device_id);
700  auto join_column_types_gpu =
701  transfer_vector_of_flat_objects_to_gpu(join_column_types, allocator);
702  auto join_columns_gpu =
703  transfer_vector_of_flat_objects_to_gpu(join_columns, allocator);
704  const auto key_handler = GenericKeyHandler(key_component_count,
705  true,
706  join_columns_gpu,
707  join_column_types_gpu,
708  nullptr,
709  nullptr);
710 
711  err = builder.initHashTableOnGpu(&key_handler,
712  join_columns,
713  hashtable_layout,
714  join_type_,
717  entry_count,
718  emitted_keys_count,
719  device_id,
720  executor_);
721  CHECK_LT(size_t(device_id), hash_tables_for_device_.size());
722  hash_tables_for_device_[device_id] = builder.getHashTable();
723  if (!err && allow_hashtable_recycling && hash_tables_for_device_[device_id]) {
724  // add layout to cache iff we have a valid hashtable
725  hash_table_layout_cache_->putItemToCache(
727  hash_tables_for_device_[device_id]->getLayout(),
730  0,
731  0,
732  {});
733  }
734 #else
735  UNREACHABLE();
736 #endif
737  }
738  return err;
739 }
#define CHECK_EQ(x, y)
Definition: Logger.h:219
void putHashTableOnCpuToCache(QueryPlanHash key, CacheItemType item_type, std::shared_ptr< HashTable > hashtable_ptr, DeviceIdentifier device_identifier, size_t hashtable_building_time)
std::vector< std::shared_ptr< HashTable > > hash_tables_for_device_
Definition: HashJoin.h:296
#define UNREACHABLE()
Definition: Logger.h:255
const TableIdToNodeMap table_id_to_node_map_
std::shared_ptr< HashTable > initHashTableOnCpuFromCache(QueryPlanHash key, CacheItemType item_type, DeviceIdentifier device_identifier)
std::vector< InnerOuter > inner_outer_pairs_
int count
int initHashTableOnGpu(KEY_HANDLER *key_handler, const std::vector< JoinColumn > &join_columns, const HashType layout, const JoinType join_type, const size_t key_component_width, const size_t key_component_count, const size_t keyspace_entry_count, const size_t emitted_keys_count, const int device_id, const Executor *executor)
static CompositeKeyInfo getCompositeKeyInfo(const std::vector< InnerOuter > &inner_outer_pairs, const Executor *executor)
Definition: HashJoin.cpp:334
#define CHECK_LT(x, y)
Definition: Logger.h:221
int getInnerTableId() const noexceptoverride
std::unique_ptr< BaselineHashTable > getHashTable()
static std::string getHashTypeString(HashType ht) noexcept
Definition: HashJoin.h:134
const Data_Namespace::MemoryLevel memory_level_
void allocateDeviceMemory(const HashType layout, const size_t key_component_width, const size_t key_component_count, const size_t keyspace_entry_count, const size_t emitted_keys_count, const int device_id, const Executor *executor)
#define CHECK(condition)
Definition: Logger.h:211
#define DEBUG_TIMER(name)
Definition: Logger.h:358
virtual size_t getKeyComponentCount() const
virtual size_t getKeyComponentWidth() const
T * transfer_vector_of_flat_objects_to_gpu(const std::vector< T > &vec, DeviceAllocator &allocator)
static std::unique_ptr< HashingSchemeRecycler > hash_table_layout_cache_
static constexpr DeviceIdentifier CPU_DEVICE_IDENTIFIER
Definition: DataRecycler.h:132
HashType
Definition: HashTable.h:19
if(yyssp >=yyss+yystacksize-1)
#define VLOG(n)
Definition: Logger.h:305
int initHashTableOnCpu(KEY_HANDLER *key_handler, const CompositeKeyInfo &composite_key_info, const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_bucket_info, const size_t keyspace_entry_count, const size_t keys_for_all_rows, const HashType layout, const JoinType join_type, const size_t key_component_width, const size_t key_component_count)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::shared_ptr< HashTable > BaselineJoinHashTable::initHashTableOnCpuFromCache ( QueryPlanHash  key,
CacheItemType  item_type,
DeviceIdentifier  device_identifier 
)
protected

Definition at line 941 of file BaselineJoinHashTable.cpp.

References CHECK, DEBUG_TIMER, hash_table_cache_, and VLOG.

Referenced by initHashTableForDevice().

944  {
945  auto timer = DEBUG_TIMER(__func__);
946  VLOG(1) << "Checking CPU hash table cache.";
948  return hash_table_cache_->getItemFromCache(key, item_type, device_identifier);
949 }
static std::unique_ptr< HashtableRecycler > hash_table_cache_
#define CHECK(condition)
Definition: Logger.h:211
#define DEBUG_TIMER(name)
Definition: Logger.h:358
#define VLOG(n)
Definition: Logger.h:305

+ Here is the caller graph for this function:

bool BaselineJoinHashTable::isBitwiseEq ( ) const
overrideprotectedvirtual

Implements HashJoin.

Definition at line 987 of file BaselineJoinHashTable.cpp.

References condition_, and kBW_EQ.

Referenced by fetchColumnsForDevice().

987  {
988  return condition_->get_optype() == kBW_EQ;
989 }
Definition: sqldefs.h:31
const std::shared_ptr< Analyzer::BinOper > condition_

+ Here is the caller graph for this function:

size_t BaselineJoinHashTable::offsetBufferOff ( ) const
overridevirtualnoexcept

Implements HashJoin.

Definition at line 809 of file BaselineJoinHashTable.cpp.

References getKeyBufferSize().

Referenced by codegenMatchingSet(), countBufferOff(), toSet(), and toString().

809  {
810  return getKeyBufferSize();
811 }
size_t getKeyBufferSize() const noexcept

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t BaselineJoinHashTable::payloadBufferOff ( ) const
overridevirtualnoexcept

Implements HashJoin.

Definition at line 821 of file BaselineJoinHashTable.cpp.

References countBufferOff(), getComponentBufferSize(), getHashType(), getKeyBufferSize(), and HashJoin::layoutRequiresAdditionalBuffers().

Referenced by toSet(), and toString().

821  {
824  } else {
825  return getKeyBufferSize();
826  }
827 }
size_t getKeyBufferSize() const noexcept
size_t getComponentBufferSize() const noexceptoverride
HashType getHashType() const noexceptoverride
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
Definition: HashJoin.h:130
size_t countBufferOff() const noexceptoverride

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void BaselineJoinHashTable::putHashTableOnCpuToCache ( QueryPlanHash  key,
CacheItemType  item_type,
std::shared_ptr< HashTable hashtable_ptr,
DeviceIdentifier  device_identifier,
size_t  hashtable_building_time 
)
protected

Definition at line 951 of file BaselineJoinHashTable.cpp.

References CHECK, CPU, and hash_table_cache_.

Referenced by initHashTableForDevice().

956  {
958  CHECK(hashtable_ptr && !hashtable_ptr->getGpuBuffer());
959  hash_table_cache_->putItemToCache(
960  key,
961  hashtable_ptr,
962  item_type,
963  device_identifier,
964  hashtable_ptr->getHashTableBufferSize(ExecutorDeviceType::CPU),
965  hashtable_building_time);
966 }
static std::unique_ptr< HashtableRecycler > hash_table_cache_
#define CHECK(condition)
Definition: Logger.h:211

+ Here is the caller graph for this function:

void BaselineJoinHashTable::reify ( const HashType  preferred_layout)
protected

Definition at line 229 of file BaselineJoinHashTable.cpp.

References CHECK_EQ, CHECK_LT, HashJoin::checkHashJoinReplicationConstraint(), condition_, DEBUG_TIMER, device_count_, executor_, HashJoin::freeHashBufferMemory(), HashJoin::getCompositeKeyInfo(), getInnerTableId(), getShardCountForCondition(), inner_outer_pairs_, ManyToMany, OneToMany, reifyWithLayout(), and VLOG.

229  {
230  auto timer = DEBUG_TIMER(__func__);
232  const auto composite_key_info =
234 
239  executor_);
240 
241  if (condition_->is_overlaps_oper()) {
242  CHECK_EQ(inner_outer_pairs_.size(), size_t(1));
243  HashType layout;
244 
245  if (inner_outer_pairs_[0].second->get_type_info().is_array()) {
246  layout = HashType::ManyToMany;
247  } else {
248  layout = HashType::OneToMany;
249  }
250  try {
251  reifyWithLayout(layout);
252  return;
253  } catch (const std::exception& e) {
254  VLOG(1) << "Caught exception while building overlaps baseline hash table: "
255  << e.what();
256  throw;
257  }
258  }
259 
260  try {
261  reifyWithLayout(preferred_layout);
262  } catch (const std::exception& e) {
263  VLOG(1) << "Caught exception while building baseline hash table: " << e.what();
266  }
267 }
#define CHECK_EQ(x, y)
Definition: Logger.h:219
void freeHashBufferMemory()
Definition: HashJoin.h:283
std::vector< InnerOuter > inner_outer_pairs_
static void checkHashJoinReplicationConstraint(const int table_id, const size_t shard_count, const Executor *executor)
Definition: HashJoin.cpp:587
virtual void reifyWithLayout(const HashType layout)
static CompositeKeyInfo getCompositeKeyInfo(const std::vector< InnerOuter > &inner_outer_pairs, const Executor *executor)
Definition: HashJoin.cpp:334
#define CHECK_LT(x, y)
Definition: Logger.h:221
int getInnerTableId() const noexceptoverride
#define DEBUG_TIMER(name)
Definition: Logger.h:358
static size_t getShardCountForCondition(const Analyzer::BinOper *condition, const Executor *executor, const std::vector< InnerOuter > &inner_outer_pairs)
HashType
Definition: HashTable.h:19
#define VLOG(n)
Definition: Logger.h:305
const std::shared_ptr< Analyzer::BinOper > condition_

+ Here is the call graph for this function:

void BaselineJoinHashTable::reifyForDevice ( const ColumnsForDevice columns_for_device,
const HashType  layout,
const int  device_id,
const size_t  entry_count,
const size_t  emitted_keys_count,
const logger::ThreadId  parent_thread_id 
)
protectedvirtual

Definition at line 509 of file BaselineJoinHashTable.cpp.

References DEBUG_TIMER_NEW_THREAD, getEffectiveMemoryLevel(), initHashTableForDevice(), inner_outer_pairs_, ColumnsForDevice::join_buckets, ColumnsForDevice::join_column_types, ColumnsForDevice::join_columns, and to_string().

Referenced by reifyWithLayout().

514  {
515  DEBUG_TIMER_NEW_THREAD(parent_thread_id);
516  const auto effective_memory_level = getEffectiveMemoryLevel(inner_outer_pairs_);
517  const auto err = initHashTableForDevice(columns_for_device.join_columns,
518  columns_for_device.join_column_types,
519  columns_for_device.join_buckets,
520  layout,
521  effective_memory_level,
522  entry_count,
523  emitted_keys_count,
524  device_id);
525  if (err) {
526  throw HashJoinFail(
527  std::string("Unrecognized error when initializing baseline hash table (") +
528  std::to_string(err) + std::string(")"));
529  }
530 }
Data_Namespace::MemoryLevel getEffectiveMemoryLevel(const std::vector< InnerOuter > &inner_outer_pairs) const
#define DEBUG_TIMER_NEW_THREAD(parent_thread_id)
Definition: Logger.h:363
std::string to_string(char const *&&v)
std::vector< InnerOuter > inner_outer_pairs_
const std::vector< JoinColumnTypeInfo > join_column_types
Definition: HashJoin.h:81
virtual int initHashTableForDevice(const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_buckets, const HashType layout, const Data_Namespace::MemoryLevel effective_memory_level, const size_t entry_count, const size_t emitted_keys_count, const int device_id)
std::vector< JoinBucketInfo > join_buckets
Definition: HashJoin.h:83
const std::vector< JoinColumn > join_columns
Definition: HashJoin.h:80

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void BaselineJoinHashTable::reifyWithLayout ( const HashType  layout)
protectedvirtual

Definition at line 269 of file BaselineJoinHashTable.cpp.

References approximateTupleCount(), threading_serial::async(), BASELINE_HT, CHECK, condition_, DataRecyclerUtil::CPU_DEVICE_IDENTIFIER, device_count_, EMPTY_HASHED_PLAN_DAG_KEY, executor_, fetchColumnsForDevice(), get_entries_per_device(), get_inner_query_info(), getAlternativeCacheKey(), getInnerTableId(), Fragmenter_Namespace::TableInfo::getNumTuplesUpperBound(), Data_Namespace::GPU_LEVEL, hashtable_cache_key_, InputTableInfo::info, inner_outer_pairs_, join_type_, memory_level_, OneToMany, only_shards_for_device(), query_infos_, reifyForDevice(), shardCount(), logger::thread_id(), and VLOG.

Referenced by reify().

269  {
270  const auto& query_info = get_inner_query_info(getInnerTableId(), query_infos_).info;
271  if (query_info.fragments.empty()) {
272  return;
273  }
274 
275  const auto total_entries = 2 * query_info.getNumTuplesUpperBound();
276  if (total_entries > static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
277  throw TooManyHashEntries();
278  }
279 
280  auto data_mgr = executor_->getDataMgr();
281  std::vector<std::unique_ptr<CudaAllocator>> dev_buff_owners;
283  for (int device_id = 0; device_id < device_count_; ++device_id) {
284  dev_buff_owners.emplace_back(std::make_unique<CudaAllocator>(data_mgr, device_id));
285  }
286  }
287  std::vector<ColumnsForDevice> columns_per_device;
288  const auto shard_count = shardCount();
289  auto entries_per_device =
290  get_entries_per_device(total_entries, shard_count, device_count_, memory_level_);
291 
292  for (int device_id = 0; device_id < device_count_; ++device_id) {
293  const auto fragments =
294  shard_count
295  ? only_shards_for_device(query_info.fragments, device_id, device_count_)
296  : query_info.fragments;
297  const auto columns_for_device =
298  fetchColumnsForDevice(fragments,
299  device_id,
301  ? dev_buff_owners[device_id].get()
302  : nullptr);
303  columns_per_device.push_back(columns_for_device);
304  }
305  auto hashtable_layout_type = layout;
307  // sometimes we cannot retrieve query plan dag, so try to recycler cache
308  // with the old-passioned cache key if we deal with hashtable of non-temporary table
309  AlternativeCacheKeyForBaselineHashJoin cache_key{
311  columns_per_device.front().join_columns.front().num_elems,
312  condition_->get_optype(),
313  join_type_};
315  VLOG(2) << "Use alternative hashtable cache key due to unavailable query plan dag "
316  "extraction";
317  }
318 
319  size_t emitted_keys_count = 0;
320  if (hashtable_layout_type == HashType::OneToMany) {
321  CHECK(!columns_per_device.front().join_columns.empty());
322  emitted_keys_count = columns_per_device.front().join_columns.front().num_elems;
323  size_t tuple_count;
324  std::tie(tuple_count, std::ignore) =
325  approximateTupleCount(columns_per_device,
329  const auto entry_count = 2 * std::max(tuple_count, size_t(1));
330 
331  // reset entries per device with one to many info
332  entries_per_device =
333  get_entries_per_device(entry_count, shard_count, device_count_, memory_level_);
334  }
335  std::vector<std::future<void>> init_threads;
336  for (int device_id = 0; device_id < device_count_; ++device_id) {
337  const auto fragments =
338  shard_count
339  ? only_shards_for_device(query_info.fragments, device_id, device_count_)
340  : query_info.fragments;
341  init_threads.push_back(std::async(std::launch::async,
343  this,
344  columns_per_device[device_id],
345  hashtable_layout_type,
346  device_id,
347  entries_per_device,
348  emitted_keys_count,
349  logger::thread_id()));
350  }
351  for (auto& init_thread : init_threads) {
352  init_thread.wait();
353  }
354  for (auto& init_thread : init_threads) {
355  init_thread.get();
356  }
357 }
virtual std::pair< size_t, size_t > approximateTupleCount(const std::vector< ColumnsForDevice > &, QueryPlanHash key, CacheItemType item_type, DeviceIdentifier device_identifier) const
Fragmenter_Namespace::TableInfo info
Definition: InputMetadata.h:35
constexpr QueryPlanHash EMPTY_HASHED_PLAN_DAG_KEY
virtual void reifyForDevice(const ColumnsForDevice &columns_for_device, const HashType layout, const int device_id, const size_t entry_count, const size_t emitted_keys_count, const logger::ThreadId parent_thread_id)
virtual ColumnsForDevice fetchColumnsForDevice(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, DeviceAllocator *dev_buff_owner)
const std::vector< InputTableInfo > & query_infos_
std::vector< InnerOuter > inner_outer_pairs_
future< Result > async(Fn &&fn, Args &&...args)
static QueryPlanHash getAlternativeCacheKey(AlternativeCacheKeyForBaselineHashJoin &info)
std::vector< Fragmenter_Namespace::FragmentInfo > only_shards_for_device(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, const int device_count)
const InputTableInfo & get_inner_query_info(const int inner_table_id, const std::vector< InputTableInfo > &query_infos)
int getInnerTableId() const noexceptoverride
size_t get_entries_per_device(const size_t total_entries, const size_t shard_count, const size_t device_count, const Data_Namespace::MemoryLevel memory_level)
const Data_Namespace::MemoryLevel memory_level_
ThreadId thread_id()
Definition: Logger.cpp:816
#define CHECK(condition)
Definition: Logger.h:211
static constexpr DeviceIdentifier CPU_DEVICE_IDENTIFIER
Definition: DataRecycler.h:132
#define VLOG(n)
Definition: Logger.h:305
const std::shared_ptr< Analyzer::BinOper > condition_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t BaselineJoinHashTable::shardCount ( ) const
protected

Definition at line 532 of file BaselineJoinHashTable.cpp.

References condition_, executor_, getShardCountForCondition(), Data_Namespace::GPU_LEVEL, inner_outer_pairs_, and memory_level_.

Referenced by reifyWithLayout().

532  {
534  return 0;
535  }
538 }
std::vector< InnerOuter > inner_outer_pairs_
const Data_Namespace::MemoryLevel memory_level_
static size_t getShardCountForCondition(const Analyzer::BinOper *condition, const Executor *executor, const std::vector< InnerOuter > &inner_outer_pairs)
const std::shared_ptr< Analyzer::BinOper > condition_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::set< DecodedJoinHashBufferEntry > BaselineJoinHashTable::toSet ( const ExecutorDeviceType  device_type,
const int  device_id 
) const
overridevirtual

Implements HashJoin.

Definition at line 196 of file BaselineJoinHashTable.cpp.

References catalog_, CHECK, countBufferOff(), Data_Namespace::DataMgr::createGpuAllocator(), Catalog_Namespace::Catalog::getDataMgr(), HashJoin::getHashTableForDevice(), HashJoin::getJoinHashBuffer(), getKeyComponentCount(), getKeyComponentWidth(), GPU, offsetBufferOff(), OneToOne, payloadBufferOff(), and HashTable::toSet().

198  {
199  auto buffer = getJoinHashBuffer(device_type, device_id);
200  auto hash_table = getHashTableForDevice(device_id);
201  CHECK(hash_table);
202  auto buffer_size = hash_table->getHashTableBufferSize(device_type);
203 #ifdef HAVE_CUDA
204  std::unique_ptr<int8_t[]> buffer_copy;
205  if (device_type == ExecutorDeviceType::GPU) {
206  buffer_copy = std::make_unique<int8_t[]>(buffer_size);
207  auto& data_mgr = catalog_->getDataMgr();
208  auto device_allocator = data_mgr.createGpuAllocator(device_id);
209  device_allocator->copyFromDevice(buffer_copy.get(), buffer, buffer_size);
210  }
211  auto ptr1 = buffer_copy ? buffer_copy.get() : buffer;
212 #else
213  auto ptr1 = buffer;
214 #endif // HAVE_CUDA
215  auto ptr2 = ptr1 + offsetBufferOff();
216  auto ptr3 = ptr1 + countBufferOff();
217  auto ptr4 = ptr1 + payloadBufferOff();
218  const auto layout = hash_table->getLayout();
219  return HashTable::toSet(getKeyComponentCount() + (layout == HashType::OneToOne ? 1 : 0),
221  hash_table->getEntryCount(),
222  ptr1,
223  ptr2,
224  ptr3,
225  ptr4,
226  buffer_size);
227 }
size_t offsetBufferOff() const noexceptoverride
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:226
size_t payloadBufferOff() const noexceptoverride
std::unique_ptr< DeviceAllocator > createGpuAllocator(int device_id)
Definition: DataMgr.cpp:526
int8_t * getJoinHashBuffer(const ExecutorDeviceType device_type, const int device_id) const
Definition: HashJoin.h:260
HashTable * getHashTableForDevice(const size_t device_id) const
Definition: HashJoin.h:241
const Catalog_Namespace::Catalog * catalog_
#define CHECK(condition)
Definition: Logger.h:211
virtual size_t getKeyComponentCount() const
virtual size_t getKeyComponentWidth() const
static DecodedJoinHashBufferSet toSet(size_t key_component_count, size_t key_component_width, size_t entry_count, const int8_t *ptr1, const int8_t *ptr2, const int8_t *ptr3, const int8_t *ptr4, size_t buffer_size)
Decode hash table into a std::set for easy inspection and validation.
Definition: HashTable.cpp:139
size_t countBufferOff() const noexceptoverride

+ Here is the call graph for this function:

std::string BaselineJoinHashTable::toString ( const ExecutorDeviceType  device_type,
const int  device_id = 0,
bool  raw = false 
) const
overridevirtual

Implements HashJoin.

Definition at line 156 of file BaselineJoinHashTable.cpp.

References catalog_, CHECK, CHECK_LT, countBufferOff(), Data_Namespace::DataMgr::createGpuAllocator(), Catalog_Namespace::Catalog::getDataMgr(), getHashType(), HashJoin::getHashTypeString(), HashJoin::getJoinHashBuffer(), getKeyComponentCount(), getKeyComponentWidth(), GPU, HashJoin::hash_tables_for_device_, offsetBufferOff(), OneToOne, payloadBufferOff(), and HashTable::toString().

Referenced by getAlternativeCacheKey().

158  {
159  auto buffer = getJoinHashBuffer(device_type, device_id);
160  CHECK_LT(device_id, hash_tables_for_device_.size());
161  auto hash_table = hash_tables_for_device_[device_id];
162  CHECK(hash_table);
163  auto buffer_size = hash_table->getHashTableBufferSize(device_type);
164 #ifdef HAVE_CUDA
165  std::unique_ptr<int8_t[]> buffer_copy;
166  if (device_type == ExecutorDeviceType::GPU) {
167  buffer_copy = std::make_unique<int8_t[]>(buffer_size);
168 
169  auto& data_mgr = catalog_->getDataMgr();
170  auto device_allocator = data_mgr.createGpuAllocator(device_id);
171  device_allocator->copyFromDevice(buffer_copy.get(), buffer, buffer_size);
172  }
173  auto ptr1 = buffer_copy ? buffer_copy.get() : buffer;
174 #else
175  auto ptr1 = buffer;
176 #endif // HAVE_CUDA
177  auto ptr2 = ptr1 + offsetBufferOff();
178  auto ptr3 = ptr1 + countBufferOff();
179  auto ptr4 = ptr1 + payloadBufferOff();
180  CHECK(hash_table);
181  const auto layout = getHashType();
182  return HashTable::toString(
183  "keyed",
184  getHashTypeString(layout),
185  getKeyComponentCount() + (layout == HashType::OneToOne ? 1 : 0),
187  hash_table->getEntryCount(),
188  ptr1,
189  ptr2,
190  ptr3,
191  ptr4,
192  buffer_size,
193  raw);
194 }
size_t offsetBufferOff() const noexceptoverride
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:226
std::vector< std::shared_ptr< HashTable > > hash_tables_for_device_
Definition: HashJoin.h:296
size_t payloadBufferOff() const noexceptoverride
HashType getHashType() const noexceptoverride
std::unique_ptr< DeviceAllocator > createGpuAllocator(int device_id)
Definition: DataMgr.cpp:526
int8_t * getJoinHashBuffer(const ExecutorDeviceType device_type, const int device_id) const
Definition: HashJoin.h:260
#define CHECK_LT(x, y)
Definition: Logger.h:221
static std::string getHashTypeString(HashType ht) noexcept
Definition: HashJoin.h:134
static std::string toString(const std::string &type, const std::string &layout_type, size_t key_component_count, size_t key_component_width, size_t entry_count, const int8_t *ptr1, const int8_t *ptr2, const int8_t *ptr3, const int8_t *ptr4, size_t buffer_size, bool raw=false)
Decode hash table into a human-readable string.
Definition: HashTable.cpp:226
const Catalog_Namespace::Catalog * catalog_
#define CHECK(condition)
Definition: Logger.h:211
virtual size_t getKeyComponentCount() const
virtual size_t getKeyComponentWidth() const
size_t countBufferOff() const noexceptoverride

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Member Data Documentation

const Catalog_Namespace::Catalog* BaselineJoinHashTable::catalog_
protected

Definition at line 236 of file BaselineJoinHashTable.h.

Referenced by fetchColumnsForDevice(), toSet(), and toString().

ColumnCacheMap& BaselineJoinHashTable::column_cache_
protected

Definition at line 232 of file BaselineJoinHashTable.h.

Referenced by fetchColumnsForDevice().

const std::shared_ptr<Analyzer::BinOper> BaselineJoinHashTable::condition_
protected

Definition at line 227 of file BaselineJoinHashTable.h.

Referenced by isBitwiseEq(), reify(), reifyWithLayout(), and shardCount().

std::mutex BaselineJoinHashTable::cpu_hash_table_buff_mutex_
protected

Definition at line 233 of file BaselineJoinHashTable.h.

Referenced by initHashTableForDevice().

const int BaselineJoinHashTable::device_count_
protected
std::unique_ptr< HashtableRecycler > BaselineJoinHashTable::hash_table_cache_
staticprotected
std::unique_ptr< HashingSchemeRecycler > BaselineJoinHashTable::hash_table_layout_cache_
staticprotected
Initial value:
=
std::make_unique<HashingSchemeRecycler>()

Definition at line 247 of file BaselineJoinHashTable.h.

Referenced by getCacheInvalidator(), getHashingSchemeCache(), and initHashTableForDevice().

QueryPlanHash BaselineJoinHashTable::hashtable_cache_key_
protected

Definition at line 243 of file BaselineJoinHashTable.h.

Referenced by initHashTableForDevice(), and reifyWithLayout().

HashtableCacheMetaInfo BaselineJoinHashTable::hashtable_cache_meta_info_
protected

Definition at line 244 of file BaselineJoinHashTable.h.

const JoinType BaselineJoinHashTable::join_type_
protected

Definition at line 228 of file BaselineJoinHashTable.h.

Referenced by initHashTableForDevice(), and reifyWithLayout().

std::optional<HashType> BaselineJoinHashTable::layout_override_
protected

Definition at line 240 of file BaselineJoinHashTable.h.

Referenced by getHashType().

const Data_Namespace::MemoryLevel BaselineJoinHashTable::memory_level_
protected
bool BaselineJoinHashTable::needs_dict_translation_
mutableprotected
const std::vector<InputTableInfo>& BaselineJoinHashTable::query_infos_
protected

Definition at line 229 of file BaselineJoinHashTable.h.

Referenced by reifyWithLayout().

const TableIdToNodeMap BaselineJoinHashTable::table_id_to_node_map_
protected

The documentation for this class was generated from the following files: