OmniSciDB  ba1bac9284
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
PerfectJoinHashTable Class Reference

#include <PerfectJoinHashTable.h>

+ Inheritance diagram for PerfectJoinHashTable:
+ Collaboration diagram for PerfectJoinHashTable:

Classes

struct  JoinHashTableCacheKey
 

Public Types

using HashTableCacheValue = std::shared_ptr< PerfectHashTable >
 

Public Member Functions

std::string toString (const ExecutorDeviceType device_type, const int device_id=0, bool raw=false) const override
 
std::set
< DecodedJoinHashBufferEntry
toSet (const ExecutorDeviceType device_type, const int device_id) const override
 
llvm::Value * codegenSlot (const CompilationOptions &, const size_t) override
 
HashJoinMatchingSet codegenMatchingSet (const CompilationOptions &, const size_t) override
 
int getInnerTableId () const noexceptoverride
 
int getInnerTableRteIdx () const noexceptoverride
 
HashType getHashType () const noexceptoverride
 
Data_Namespace::MemoryLevel getMemoryLevel () const noexceptoverride
 
int getDeviceCount () const noexceptoverride
 
size_t offsetBufferOff () const noexceptoverride
 
size_t countBufferOff () const noexceptoverride
 
size_t payloadBufferOff () const noexceptoverride
 
std::string getHashJoinType () const final
 
virtual ~PerfectJoinHashTable ()
 
- Public Member Functions inherited from HashJoin
virtual std::string toStringFlat64 (const ExecutorDeviceType device_type, const int device_id) const
 
virtual std::string toStringFlat32 (const ExecutorDeviceType device_type, const int device_id) const
 
JoinColumn fetchJoinColumn (const Analyzer::ColumnVar *hash_col, const std::vector< Fragmenter_Namespace::FragmentInfo > &fragment_info, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks_owner, DeviceAllocator *dev_buff_owner, std::vector< std::shared_ptr< void >> &malloc_owner, Executor *executor, ColumnCacheMap *column_cache)
 
HashTablegetHashTableForDevice (const size_t device_id) const
 
size_t getJoinHashBufferSize (const ExecutorDeviceType device_type)
 
size_t getJoinHashBufferSize (const ExecutorDeviceType device_type, const int device_id) const
 
int64_t getJoinHashBuffer (const ExecutorDeviceType device_type, const int device_id) const
 
void freeHashBufferMemory ()
 

Static Public Member Functions

static std::shared_ptr
< PerfectJoinHashTable
getInstance (const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
 Make hash table from an in-flight SQL query's parse tree etc. More...
 
static auto getHashTableCache ()
 
static auto getCacheInvalidator () -> std::function< void()>
 
- Static Public Member Functions inherited from HashJoin
static bool layoutRequiresAdditionalBuffers (HashType layout) noexcept
 
static std::string getHashTypeString (HashType ht) noexcept
 
static HashJoinMatchingSet codegenMatchingSet (const std::vector< llvm::Value * > &hash_join_idx_args_in, const bool is_sharded, const bool col_is_nullable, const bool is_bw_eq, const int64_t sub_buff_size, Executor *executor, const bool is_bucketized=false)
 
static llvm::Value * codegenHashTableLoad (const size_t table_idx, Executor *executor)
 
static std::shared_ptr< HashJoingetInstance (const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const RegisteredQueryHint &query_hint)
 Make hash table from an in-flight SQL query's parse tree etc. More...
 
static std::shared_ptr< HashJoingetSyntheticInstance (std::string_view table1, std::string_view column1, std::string_view table2, std::string_view column2, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
 Make hash table from named tables and columns (such as for testing). More...
 
static std::shared_ptr< HashJoingetSyntheticInstance (const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
 Make hash table from named tables and columns (such as for testing). More...
 
static int getInnerTableId (const std::vector< InnerOuter > &inner_outer_pairs)
 
static void checkHashJoinReplicationConstraint (const int table_id, const size_t shard_count, const Executor *executor)
 
static CompositeKeyInfo getCompositeKeyInfo (const std::vector< InnerOuter > &inner_outer_pairs, const Executor *executor)
 

Private Member Functions

ColumnsForDevice fetchColumnsForDevice (const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, DeviceAllocator *dev_buff_owner)
 
void reifyForDevice (const ChunkKey &hash_table_key, const ColumnsForDevice &columns_for_device, const HashType layout, const int device_id, const logger::ThreadId parent_thread_id)
 
int initHashTableForDevice (const ChunkKey &chunk_key, const JoinColumn &join_column, const InnerOuter &cols, const HashType layout, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id)
 
Data_Namespace::MemoryLevel getEffectiveMemoryLevel (const std::vector< InnerOuter > &inner_outer_pairs) const
 
 PerfectJoinHashTable (const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const Analyzer::ColumnVar *col_var, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, const ExpressionRange &col_range, ColumnCacheMap &column_cache, Executor *executor, const int device_count)
 
ChunkKey genHashTableKey (const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const Analyzer::Expr *outer_col, const Analyzer::ColumnVar *inner_col) const
 
void reify ()
 
std::shared_ptr< PerfectHashTableinitHashTableOnCpuFromCache (const ChunkKey &chunk_key, const size_t num_elements, const InnerOuter &cols)
 
void putHashTableOnCpuToCache (const ChunkKey &chunk_key, const size_t num_elements, HashTableCacheValue hash_table, const InnerOuter &cols)
 
const InputTableInfogetInnerQueryInfo (const Analyzer::ColumnVar *inner_col) const
 
size_t shardCount () const
 
llvm::Value * codegenHashTableLoad (const size_t table_idx)
 
std::vector< llvm::Value * > getHashJoinArgs (llvm::Value *hash_ptr, const Analyzer::Expr *key_col, const int shard_count, const CompilationOptions &co)
 
bool isBitwiseEq () const
 
size_t getComponentBufferSize () const noexceptoverride
 
HashTablegetHashTableForDevice (const size_t device_id) const
 

Private Attributes

std::vector< InnerOuterinner_outer_pairs_
 
Catalog_Namespace::Catalogcatalog_
 
std::shared_ptr
< Analyzer::BinOper
qual_bin_oper_
 
const JoinType join_type_
 
std::shared_ptr
< Analyzer::ColumnVar
col_var_
 
const std::vector
< InputTableInfo > & 
query_infos_
 
const Data_Namespace::MemoryLevel memory_level_
 
HashType hash_type_
 
std::mutex cpu_hash_table_buff_mutex_
 
ExpressionRange col_range_
 
Executorexecutor_
 
ColumnCacheMapcolumn_cache_
 
const int device_count_
 

Static Private Attributes

static std::unique_ptr
< HashTableCache
< JoinHashTableCacheKey,
HashTableCacheValue > > 
hash_table_cache_
 

Additional Inherited Members

- Protected Attributes inherited from HashJoin
std::vector< std::shared_ptr
< HashTable > > 
hash_tables_for_device_
 

Detailed Description

Definition at line 50 of file PerfectJoinHashTable.h.

Member Typedef Documentation

Definition at line 52 of file PerfectJoinHashTable.h.

Constructor & Destructor Documentation

virtual PerfectJoinHashTable::~PerfectJoinHashTable ( )
inlinevirtual

Definition at line 108 of file PerfectJoinHashTable.h.

108 {}
PerfectJoinHashTable::PerfectJoinHashTable ( const std::shared_ptr< Analyzer::BinOper qual_bin_oper,
const Analyzer::ColumnVar col_var,
const std::vector< InputTableInfo > &  query_infos,
const Data_Namespace::MemoryLevel  memory_level,
const JoinType  join_type,
const HashType  preferred_hash_type,
const ExpressionRange col_range,
ColumnCacheMap column_cache,
Executor executor,
const int  device_count 
)
inlineprivate

Definition at line 136 of file PerfectJoinHashTable.h.

References CHECK, CHECK_GT, device_count_, ExpressionRange::getType(), HashJoin::hash_tables_for_device_, and Integer.

Referenced by getInstance().

146  : qual_bin_oper_(qual_bin_oper)
147  , join_type_(join_type)
148  , col_var_(std::dynamic_pointer_cast<Analyzer::ColumnVar>(col_var->deep_copy()))
149  , query_infos_(query_infos)
150  , memory_level_(memory_level)
151  , hash_type_(preferred_hash_type)
152  , col_range_(col_range)
153  , executor_(executor)
154  , column_cache_(column_cache)
155  , device_count_(device_count) {
159  }
const Data_Namespace::MemoryLevel memory_level_
std::shared_ptr< Analyzer::BinOper > qual_bin_oper_
std::vector< std::shared_ptr< HashTable > > hash_tables_for_device_
Definition: HashJoin.h:270
std::shared_ptr< Analyzer::Expr > deep_copy() const override
Definition: Analyzer.cpp:60
#define CHECK_GT(x, y)
Definition: Logger.h:218
const std::vector< InputTableInfo > & query_infos_
ColumnCacheMap & column_cache_
std::shared_ptr< Analyzer::ColumnVar > col_var_
ExpressionRangeType getType() const
#define CHECK(condition)
Definition: Logger.h:206

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Member Function Documentation

llvm::Value * PerfectJoinHashTable::codegenHashTableLoad ( const size_t  table_idx)
private

Definition at line 690 of file PerfectJoinHashTable.cpp.

References AUTOMATIC_IR_METADATA, CHECK, HashJoin::codegenHashTableLoad(), executor_, and get_arg_by_name().

Referenced by codegenMatchingSet(), and codegenSlot().

690  {
691  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
692  const auto hash_ptr = HashJoin::codegenHashTableLoad(table_idx, executor_);
693  if (hash_ptr->getType()->isIntegerTy(64)) {
694  return hash_ptr;
695  }
696  CHECK(hash_ptr->getType()->isPointerTy());
697  return executor_->cgen_state_->ir_builder_.CreatePtrToInt(
698  get_arg_by_name(executor_->cgen_state_->row_func_, "join_hash_tables"),
699  llvm::Type::getInt64Ty(executor_->cgen_state_->context_));
700 }
static llvm::Value * codegenHashTableLoad(const size_t table_idx, Executor *executor)
Definition: HashJoin.cpp:215
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:167
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK(condition)
Definition: Logger.h:206

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

HashJoinMatchingSet PerfectJoinHashTable::codegenMatchingSet ( const CompilationOptions co,
const size_t  index 
)
overridevirtual

Implements HashJoin.

Definition at line 754 of file PerfectJoinHashTable.cpp.

References AUTOMATIC_IR_METADATA, CHECK, codegenHashTableLoad(), HashJoin::codegenMatchingSet(), executor_, anonymous_namespace{HashJoin.cpp}::get_cols(), get_max_rte_scan_table(), getComponentBufferSize(), getHashJoinArgs(), isBitwiseEq(), kDATE, qual_bin_oper_, self_join_not_covered_by_left_deep_tree(), and shardCount().

755  {
756  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
757  const auto cols = get_cols(
758  qual_bin_oper_.get(), *executor_->getCatalog(), executor_->temporary_tables_);
759  auto key_col = cols.second;
760  CHECK(key_col);
761  auto val_col = cols.first;
762  CHECK(val_col);
763  auto pos_ptr = codegenHashTableLoad(index);
764  CHECK(pos_ptr);
765  const int shard_count = shardCount();
766  const auto key_col_var = dynamic_cast<const Analyzer::ColumnVar*>(key_col);
767  const auto val_col_var = dynamic_cast<const Analyzer::ColumnVar*>(val_col);
768  if (key_col_var && val_col_var &&
770  key_col_var,
771  val_col_var,
772  get_max_rte_scan_table(executor_->cgen_state_->scan_idx_to_hash_pos_))) {
773  throw std::runtime_error(
774  "Query execution fails because the query contains not supported self-join "
775  "pattern. We suspect the query requires multiple left-deep join tree due to "
776  "the "
777  "join condition of the self-join and is not supported for now. Please consider "
778  "rewriting table order in "
779  "FROM clause.");
780  }
781  auto hash_join_idx_args = getHashJoinArgs(pos_ptr, key_col, shard_count, co);
782  const int64_t sub_buff_size = getComponentBufferSize();
783  const auto& key_col_ti = key_col->get_type_info();
784 
785  auto bucketize = (key_col_ti.get_type() == kDATE);
786  return HashJoin::codegenMatchingSet(hash_join_idx_args,
787  shard_count,
788  !key_col_ti.get_notnull(),
789  isBitwiseEq(),
790  sub_buff_size,
791  executor_,
792  bucketize);
793 }
llvm::Value * codegenHashTableLoad(const size_t table_idx)
virtual HashJoinMatchingSet codegenMatchingSet(const CompilationOptions &, const size_t)=0
bool self_join_not_covered_by_left_deep_tree(const Analyzer::ColumnVar *key_side, const Analyzer::ColumnVar *val_side, const int max_rte_covered)
size_t getComponentBufferSize() const noexceptoverride
std::shared_ptr< Analyzer::BinOper > qual_bin_oper_
const int get_max_rte_scan_table(std::unordered_map< int, llvm::Value * > &scan_idx_to_hash_pos)
std::vector< llvm::Value * > getHashJoinArgs(llvm::Value *hash_ptr, const Analyzer::Expr *key_col, const int shard_count, const CompilationOptions &co)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
Definition: sqltypes.h:52
#define CHECK(condition)
Definition: Logger.h:206
InnerOuter get_cols(const Analyzer::BinOper *qual_bin_oper, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables)
Definition: HashJoin.cpp:550

+ Here is the call graph for this function:

llvm::Value * PerfectJoinHashTable::codegenSlot ( const CompilationOptions co,
const size_t  index 
)
overridevirtual

Implements HashJoin.

Definition at line 895 of file PerfectJoinHashTable.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CHECK_EQ, CodeGenerator::codegen(), codegenHashTableLoad(), executor_, anonymous_namespace{HashJoin.cpp}::get_cols(), get_max_rte_scan_table(), Analyzer::Expr::get_type_info(), getHashJoinArgs(), getHashType(), isBitwiseEq(), kDATE, OneToOne, qual_bin_oper_, self_join_not_covered_by_left_deep_tree(), and shardCount().

896  {
897  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
898  using namespace std::string_literals;
899 
901  const auto cols = get_cols(
902  qual_bin_oper_.get(), *executor_->getCatalog(), executor_->temporary_tables_);
903  auto key_col = cols.second;
904  CHECK(key_col);
905  auto val_col = cols.first;
906  CHECK(val_col);
907  CodeGenerator code_generator(executor_);
908  const auto key_col_var = dynamic_cast<const Analyzer::ColumnVar*>(key_col);
909  const auto val_col_var = dynamic_cast<const Analyzer::ColumnVar*>(val_col);
910  if (key_col_var && val_col_var &&
912  key_col_var,
913  val_col_var,
914  get_max_rte_scan_table(executor_->cgen_state_->scan_idx_to_hash_pos_))) {
915  throw std::runtime_error(
916  "Query execution fails because the query contains not supported self-join "
917  "pattern. We suspect the query requires multiple left-deep join tree due to "
918  "the "
919  "join condition of the self-join and is not supported for now. Please consider "
920  "rewriting table order in "
921  "FROM clause.");
922  }
923  const auto key_lvs = code_generator.codegen(key_col, true, co);
924  CHECK_EQ(size_t(1), key_lvs.size());
925  auto hash_ptr = codegenHashTableLoad(index);
926  CHECK(hash_ptr);
927  const int shard_count = shardCount();
928  const auto hash_join_idx_args = getHashJoinArgs(hash_ptr, key_col, shard_count, co);
929 
930  const auto& key_col_ti = key_col->get_type_info();
931  std::string fname((key_col_ti.get_type() == kDATE) ? "bucketized_hash_join_idx"s
932  : "hash_join_idx"s);
933 
934  if (isBitwiseEq()) {
935  fname += "_bitwise";
936  }
937  if (shard_count) {
938  fname += "_sharded";
939  }
940 
941  if (!isBitwiseEq() && !key_col_ti.get_notnull()) {
942  fname += "_nullable";
943  }
944  return executor_->cgen_state_->emitCall(fname, hash_join_idx_args);
945 }
llvm::Value * codegenHashTableLoad(const size_t table_idx)
#define CHECK_EQ(x, y)
Definition: Logger.h:214
bool self_join_not_covered_by_left_deep_tree(const Analyzer::ColumnVar *key_side, const Analyzer::ColumnVar *val_side, const int max_rte_covered)
std::shared_ptr< Analyzer::BinOper > qual_bin_oper_
const int get_max_rte_scan_table(std::unordered_map< int, llvm::Value * > &scan_idx_to_hash_pos)
HashType getHashType() const noexceptoverride
std::vector< llvm::Value * > getHashJoinArgs(llvm::Value *hash_ptr, const Analyzer::Expr *key_col, const int shard_count, const CompilationOptions &co)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
Definition: sqltypes.h:52
#define CHECK(condition)
Definition: Logger.h:206
InnerOuter get_cols(const Analyzer::BinOper *qual_bin_oper, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables)
Definition: HashJoin.cpp:550

+ Here is the call graph for this function:

size_t PerfectJoinHashTable::countBufferOff ( ) const
overridevirtualnoexcept

Implements HashJoin.

Definition at line 799 of file PerfectJoinHashTable.cpp.

References getComponentBufferSize().

Referenced by toSet(), and toString().

799  {
800  return getComponentBufferSize();
801 }
size_t getComponentBufferSize() const noexceptoverride

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ColumnsForDevice PerfectJoinHashTable::fetchColumnsForDevice ( const std::vector< Fragmenter_Namespace::FragmentInfo > &  fragments,
const int  device_id,
DeviceAllocator dev_buff_owner 
)
private

Definition at line 412 of file PerfectJoinHashTable.cpp.

References catalog_, column_cache_, executor_, HashJoin::fetchJoinColumn(), get_column_descriptor_maybe(), get_join_column_type_kind(), getEffectiveMemoryLevel(), inline_fixed_encoding_null_val(), inner_outer_pairs_, and isBitwiseEq().

Referenced by reify().

415  {
416  const auto effective_memory_level = getEffectiveMemoryLevel(inner_outer_pairs_);
417 
418  std::vector<JoinColumn> join_columns;
419  std::vector<std::shared_ptr<Chunk_NS::Chunk>> chunks_owner;
420  std::vector<JoinColumnTypeInfo> join_column_types;
421  std::vector<JoinBucketInfo> join_bucket_info;
422  std::vector<std::shared_ptr<void>> malloc_owner;
423  for (const auto& inner_outer_pair : inner_outer_pairs_) {
424  const auto inner_col = inner_outer_pair.first;
425  const auto inner_cd = get_column_descriptor_maybe(
426  inner_col->get_column_id(), inner_col->get_table_id(), *catalog_);
427  if (inner_cd && inner_cd->isVirtualCol) {
429  }
430  join_columns.emplace_back(fetchJoinColumn(inner_col,
431  fragments,
432  effective_memory_level,
433  device_id,
434  chunks_owner,
435  dev_buff_owner,
436  malloc_owner,
437  executor_,
438  &column_cache_));
439  const auto& ti = inner_col->get_type_info();
440  join_column_types.emplace_back(JoinColumnTypeInfo{static_cast<size_t>(ti.get_size()),
441  0,
442  0,
444  isBitwiseEq(),
445  0,
447  }
448  return {join_columns, join_column_types, chunks_owner, join_bucket_info, malloc_owner};
449 }
Data_Namespace::MemoryLevel getEffectiveMemoryLevel(const std::vector< InnerOuter > &inner_outer_pairs) const
JoinColumn fetchJoinColumn(const Analyzer::ColumnVar *hash_col, const std::vector< Fragmenter_Namespace::FragmentInfo > &fragment_info, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks_owner, DeviceAllocator *dev_buff_owner, std::vector< std::shared_ptr< void >> &malloc_owner, Executor *executor, ColumnCacheMap *column_cache)
Definition: HashJoin.cpp:54
const ColumnDescriptor * get_column_descriptor_maybe(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
Definition: Execute.h:221
std::vector< InnerOuter > inner_outer_pairs_
ColumnCacheMap & column_cache_
Catalog_Namespace::Catalog * catalog_
ColumnType get_join_column_type_kind(const SQLTypeInfo &ti)
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ChunkKey PerfectJoinHashTable::genHashTableKey ( const std::vector< Fragmenter_Namespace::FragmentInfo > &  fragments,
const Analyzer::Expr outer_col,
const Analyzer::ColumnVar inner_col 
) const
private

Definition at line 621 of file PerfectJoinHashTable.cpp.

References CHECK, CHECK_EQ, executor_, Analyzer::ColumnVar::get_column_id(), Analyzer::ColumnVar::get_table_id(), Analyzer::Expr::get_type_info(), getInnerQueryInfo(), Fragmenter_Namespace::TableInfo::getNumTuples(), InputTableInfo::info, and kENCODING_DICT.

Referenced by reify().

624  {
625  ChunkKey hash_table_key{executor_->getCatalog()->getCurrentDB().dbId,
626  inner_col->get_table_id(),
627  inner_col->get_column_id()};
628  const auto& ti = inner_col->get_type_info();
629  if (ti.is_string()) {
630  CHECK_EQ(kENCODING_DICT, ti.get_compression());
631  size_t outer_elem_count = 0;
632  const auto outer_col = dynamic_cast<const Analyzer::ColumnVar*>(outer_col_expr);
633  CHECK(outer_col);
634  const auto& outer_query_info = getInnerQueryInfo(outer_col).info;
635  for (auto& frag : outer_query_info.fragments) {
636  outer_elem_count = frag.getNumTuples();
637  }
638  hash_table_key.push_back(outer_elem_count);
639  }
640  if (fragments.size() < 2) {
641  hash_table_key.push_back(fragments.front().fragmentId);
642  }
643  return hash_table_key;
644 }
int get_table_id() const
Definition: Analyzer.h:194
#define CHECK_EQ(x, y)
Definition: Logger.h:214
std::vector< int > ChunkKey
Definition: types.h:37
Fragmenter_Namespace::TableInfo info
Definition: InputMetadata.h:35
const InputTableInfo & getInnerQueryInfo(const Analyzer::ColumnVar *inner_col) const
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:78
#define CHECK(condition)
Definition: Logger.h:206
int get_column_id() const
Definition: Analyzer.h:195

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

static auto PerfectJoinHashTable::getCacheInvalidator ( ) -> std::function<void()>
inlinestatic

Definition at line 103 of file PerfectJoinHashTable.h.

References CHECK, and hash_table_cache_.

103  {
105  return hash_table_cache_->getCacheInvalidator();
106  }
static std::unique_ptr< HashTableCache< JoinHashTableCacheKey, HashTableCacheValue > > hash_table_cache_
#define CHECK(condition)
Definition: Logger.h:206
size_t PerfectJoinHashTable::getComponentBufferSize ( ) const
overrideprivatevirtualnoexcept

Implements HashJoin.

Definition at line 807 of file PerfectJoinHashTable.cpp.

References HashJoin::hash_tables_for_device_, and OneToMany.

Referenced by codegenMatchingSet(), countBufferOff(), and payloadBufferOff().

807  {
808  if (hash_tables_for_device_.empty()) {
809  return 0;
810  }
811  auto hash_table = hash_tables_for_device_.front();
812  if (hash_table && hash_table->getLayout() == HashType::OneToMany) {
813  return hash_table->getEntryCount() * sizeof(int32_t);
814  } else {
815  return 0;
816  }
817 }
std::vector< std::shared_ptr< HashTable > > hash_tables_for_device_
Definition: HashJoin.h:270

+ Here is the caller graph for this function:

int PerfectJoinHashTable::getDeviceCount ( ) const
inlineoverridevirtualnoexcept

Implements HashJoin.

Definition at line 91 of file PerfectJoinHashTable.h.

References device_count_.

91 { return device_count_; };
Data_Namespace::MemoryLevel PerfectJoinHashTable::getEffectiveMemoryLevel ( const std::vector< InnerOuter > &  inner_outer_pairs) const
private

Definition at line 401 of file PerfectJoinHashTable.cpp.

References Data_Namespace::CPU_LEVEL, executor_, memory_level_, and needs_dictionary_translation().

Referenced by fetchColumnsForDevice(), and reifyForDevice().

402  {
403  for (const auto& inner_outer_pair : inner_outer_pairs) {
405  inner_outer_pair.first, inner_outer_pair.second, executor_)) {
407  }
408  }
409  return memory_level_;
410 }
const Data_Namespace::MemoryLevel memory_level_
bool needs_dictionary_translation(const Analyzer::ColumnVar *inner_col, const Analyzer::Expr *outer_col_expr, const Executor *executor)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector< llvm::Value * > PerfectJoinHashTable::getHashJoinArgs ( llvm::Value *  hash_ptr,
const Analyzer::Expr key_col,
const int  shard_count,
const CompilationOptions co 
)
private

Definition at line 702 of file PerfectJoinHashTable.cpp.

References AUTOMATIC_IR_METADATA, CHECK_EQ, col_range_, device_count_, executor_, anonymous_namespace{PerfectJoinHashTable.cpp}::get_bucketized_hash_entry_info(), anonymous_namespace{PerfectJoinHashTable.cpp}::get_hash_entry_count(), get_logical_type_info(), SQLTypeInfo::get_type(), Analyzer::Expr::get_type_info(), ExpressionRange::getIntMax(), ExpressionRange::getIntMin(), inline_fixed_encoding_null_val(), isBitwiseEq(), and kDATE.

Referenced by codegenMatchingSet(), and codegenSlot().

706  {
707  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
708  CodeGenerator code_generator(executor_);
709  const auto key_lvs = code_generator.codegen(key_col, true, co);
710  CHECK_EQ(size_t(1), key_lvs.size());
711  auto const& key_col_ti = key_col->get_type_info();
712  auto hash_entry_info =
714 
715  std::vector<llvm::Value*> hash_join_idx_args{
716  hash_ptr,
717  executor_->cgen_state_->castToTypeIn(key_lvs.front(), 64),
718  executor_->cgen_state_->llInt(col_range_.getIntMin()),
719  executor_->cgen_state_->llInt(col_range_.getIntMax())};
720  if (shard_count) {
721  const auto expected_hash_entry_count =
723  const auto entry_count_per_shard =
724  (expected_hash_entry_count + shard_count - 1) / shard_count;
725  hash_join_idx_args.push_back(
726  executor_->cgen_state_->llInt<uint32_t>(entry_count_per_shard));
727  hash_join_idx_args.push_back(executor_->cgen_state_->llInt<uint32_t>(shard_count));
728  hash_join_idx_args.push_back(executor_->cgen_state_->llInt<uint32_t>(device_count_));
729  }
730  auto key_col_logical_ti = get_logical_type_info(key_col->get_type_info());
731  if (!key_col_logical_ti.get_notnull() || isBitwiseEq()) {
732  hash_join_idx_args.push_back(executor_->cgen_state_->llInt(
733  inline_fixed_encoding_null_val(key_col_logical_ti)));
734  }
735  auto special_date_bucketization_case = key_col_ti.get_type() == kDATE;
736  if (isBitwiseEq()) {
737  if (special_date_bucketization_case) {
738  hash_join_idx_args.push_back(executor_->cgen_state_->llInt(
739  col_range_.getIntMax() / hash_entry_info.bucket_normalization + 1));
740  } else {
741  hash_join_idx_args.push_back(
742  executor_->cgen_state_->llInt(col_range_.getIntMax() + 1));
743  }
744  }
745 
746  if (special_date_bucketization_case) {
747  hash_join_idx_args.emplace_back(
748  executor_->cgen_state_->llInt(hash_entry_info.bucket_normalization));
749  }
750 
751  return hash_join_idx_args;
752 }
int64_t getIntMin() const
#define CHECK_EQ(x, y)
Definition: Logger.h:214
size_t get_hash_entry_count(const ExpressionRange &col_range, const bool is_bw_eq)
HashEntryInfo get_bucketized_hash_entry_info(SQLTypeInfo const &context_ti, ExpressionRange const &col_range, bool const is_bw_eq)
SQLTypeInfo get_logical_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:912
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:314
#define AUTOMATIC_IR_METADATA(CGENSTATE)
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:78
Definition: sqltypes.h:52
int64_t getIntMax() const
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::string PerfectJoinHashTable::getHashJoinType ( ) const
inlinefinalvirtual

Implements HashJoin.

Definition at line 99 of file PerfectJoinHashTable.h.

99 { return "Perfect"; }
static auto PerfectJoinHashTable::getHashTableCache ( )
inlinestatic

Definition at line 101 of file PerfectJoinHashTable.h.

References hash_table_cache_.

Referenced by QueryRunner::QueryRunner::getCachedJoinHashTable(), and QueryRunner::QueryRunner::getNumberOfCachedJoinHashTables().

101 { return hash_table_cache_.get(); }
static std::unique_ptr< HashTableCache< JoinHashTableCacheKey, HashTableCacheValue > > hash_table_cache_

+ Here is the caller graph for this function:

HashTable * PerfectJoinHashTable::getHashTableForDevice ( const size_t  device_id) const
private

Definition at line 819 of file PerfectJoinHashTable.cpp.

References CHECK_LT, and HashJoin::hash_tables_for_device_.

Referenced by toSet(), and toString().

819  {
820  CHECK_LT(device_id, hash_tables_for_device_.size());
821  return hash_tables_for_device_[device_id].get();
822 }
std::vector< std::shared_ptr< HashTable > > hash_tables_for_device_
Definition: HashJoin.h:270
#define CHECK_LT(x, y)
Definition: Logger.h:216

+ Here is the caller graph for this function:

HashType PerfectJoinHashTable::getHashType ( ) const
inlineoverridevirtualnoexcept

Implements HashJoin.

Definition at line 85 of file PerfectJoinHashTable.h.

References hash_type_.

Referenced by codegenSlot().

85 { return hash_type_; }

+ Here is the caller graph for this function:

const InputTableInfo & PerfectJoinHashTable::getInnerQueryInfo ( const Analyzer::ColumnVar inner_col) const
private

Definition at line 947 of file PerfectJoinHashTable.cpp.

References get_inner_query_info(), Analyzer::ColumnVar::get_table_id(), and query_infos_.

Referenced by genHashTableKey(), and reify().

948  {
949  return get_inner_query_info(inner_col->get_table_id(), query_infos_);
950 }
int get_table_id() const
Definition: Analyzer.h:194
const InputTableInfo & get_inner_query_info(const int inner_table_id, const std::vector< InputTableInfo > &query_infos)
const std::vector< InputTableInfo > & query_infos_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int PerfectJoinHashTable::getInnerTableId ( ) const
inlineoverridevirtualnoexcept

Implements HashJoin.

Definition at line 77 of file PerfectJoinHashTable.h.

References col_var_.

77  {
78  return col_var_.get()->get_table_id();
79  };
std::shared_ptr< Analyzer::ColumnVar > col_var_
int PerfectJoinHashTable::getInnerTableRteIdx ( ) const
inlineoverridevirtualnoexcept

Implements HashJoin.

Definition at line 81 of file PerfectJoinHashTable.h.

References col_var_.

81  {
82  return col_var_.get()->get_rte_idx();
83  };
std::shared_ptr< Analyzer::ColumnVar > col_var_
std::shared_ptr< PerfectJoinHashTable > PerfectJoinHashTable::getInstance ( const std::shared_ptr< Analyzer::BinOper qual_bin_oper,
const std::vector< InputTableInfo > &  query_infos,
const Data_Namespace::MemoryLevel  memory_level,
const JoinType  join_type,
const HashType  preferred_hash_type,
const int  device_count,
ColumnCacheMap column_cache,
Executor executor 
)
static

Make hash table from an in-flight SQL query's parse tree etc.

Definition at line 146 of file PerfectJoinHashTable.cpp.

References CHECK, CHECK_EQ, anonymous_namespace{PerfectJoinHashTable.cpp}::get_bucketized_hash_entry_info(), anonymous_namespace{HashJoin.cpp}::get_cols(), getExpressionRange(), HashJoin::getHashTypeString(), HashEntryInfo::getNormalizedHashEntryCount(), Data_Namespace::GPU_LEVEL, Invalid, IS_EQUIVALENCE, kBW_EQ, ExpressionRange::makeIntRange(), PerfectJoinHashTable(), VLOG, and VLOGGING.

Referenced by HashJoin::getInstance().

154  {
155  decltype(std::chrono::steady_clock::now()) ts1, ts2;
156  if (VLOGGING(1)) {
157  VLOG(1) << "Building perfect hash table " << getHashTypeString(preferred_hash_type)
158  << " for qual: " << qual_bin_oper->toString();
159  ts1 = std::chrono::steady_clock::now();
160  }
161  CHECK(IS_EQUIVALENCE(qual_bin_oper->get_optype()));
162  const auto cols =
163  get_cols(qual_bin_oper.get(), *executor->getCatalog(), executor->temporary_tables_);
164  const auto inner_col = cols.first;
165  CHECK(inner_col);
166  const auto& ti = inner_col->get_type_info();
167  auto col_range =
168  getExpressionRange(ti.is_string() ? cols.second : inner_col, query_infos, executor);
169  if (col_range.getType() == ExpressionRangeType::Invalid) {
170  throw HashJoinFail(
171  "Could not compute range for the expressions involved in the equijoin");
172  }
173  if (ti.is_string()) {
174  // The nullable info must be the same as the source column.
175  const auto source_col_range = getExpressionRange(inner_col, query_infos, executor);
176  if (source_col_range.getType() == ExpressionRangeType::Invalid) {
177  throw HashJoinFail(
178  "Could not compute range for the expressions involved in the equijoin");
179  }
180  if (source_col_range.getIntMin() > source_col_range.getIntMax()) {
181  // If the inner column expression range is empty, use the inner col range
182  CHECK_EQ(source_col_range.getIntMin(), int64_t(0));
183  CHECK_EQ(source_col_range.getIntMax(), int64_t(-1));
184  col_range = source_col_range;
185  } else {
186  col_range = ExpressionRange::makeIntRange(
187  std::min(source_col_range.getIntMin(), col_range.getIntMin()),
188  std::max(source_col_range.getIntMax(), col_range.getIntMax()),
189  0,
190  source_col_range.hasNulls());
191  }
192  }
193  // We can't allocate more than 2GB contiguous memory on GPU and each entry is 4 bytes.
194  const auto max_hash_entry_count =
196  ? static_cast<size_t>(std::numeric_limits<int32_t>::max() / sizeof(int32_t))
197  : static_cast<size_t>(std::numeric_limits<int32_t>::max());
198 
199  auto bucketized_entry_count_info = get_bucketized_hash_entry_info(
200  ti, col_range, qual_bin_oper->get_optype() == kBW_EQ);
201  auto bucketized_entry_count = bucketized_entry_count_info.getNormalizedHashEntryCount();
202 
203  if (bucketized_entry_count > max_hash_entry_count) {
204  throw TooManyHashEntries();
205  }
206 
207  if (qual_bin_oper->get_optype() == kBW_EQ &&
208  col_range.getIntMax() >= std::numeric_limits<int64_t>::max()) {
209  throw HashJoinFail("Cannot translate null value for kBW_EQ");
210  }
211  auto join_hash_table =
212  std::shared_ptr<PerfectJoinHashTable>(new PerfectJoinHashTable(qual_bin_oper,
213  inner_col,
214  query_infos,
215  memory_level,
216  join_type,
217  preferred_hash_type,
218  col_range,
219  column_cache,
220  executor,
221  device_count));
222  try {
223  join_hash_table->reify();
224  } catch (const TableMustBeReplicated& e) {
225  // Throw a runtime error to abort the query
226  join_hash_table->freeHashBufferMemory();
227  throw std::runtime_error(e.what());
228  } catch (const HashJoinFail& e) {
229  // HashJoinFail exceptions log an error and trigger a retry with a join loop (if
230  // possible)
231  join_hash_table->freeHashBufferMemory();
232  throw HashJoinFail(std::string("Could not build a 1-to-1 correspondence for columns "
233  "involved in equijoin | ") +
234  e.what());
235  } catch (const ColumnarConversionNotSupported& e) {
236  throw HashJoinFail(std::string("Could not build hash tables for equijoin | ") +
237  e.what());
238  } catch (const OutOfMemory& e) {
239  throw HashJoinFail(
240  std::string("Ran out of memory while building hash tables for equijoin | ") +
241  e.what());
242  } catch (const std::exception& e) {
243  throw std::runtime_error(
244  std::string("Fatal error while attempting to build hash tables for join: ") +
245  e.what());
246  }
247  if (VLOGGING(1)) {
248  ts2 = std::chrono::steady_clock::now();
249  VLOG(1) << "Built perfect hash table "
250  << getHashTypeString(join_hash_table->getHashType()) << " in "
251  << std::chrono::duration_cast<std::chrono::milliseconds>(ts2 - ts1).count()
252  << " ms";
253  }
254  return join_hash_table;
255 }
#define CHECK_EQ(x, y)
Definition: Logger.h:214
HashEntryInfo get_bucketized_hash_entry_info(SQLTypeInfo const &context_ti, ExpressionRange const &col_range, bool const is_bw_eq)
#define IS_EQUIVALENCE(X)
Definition: sqldefs.h:67
ExpressionRange getExpressionRange(const Analyzer::BinOper *expr, const std::vector< InputTableInfo > &query_infos, const Executor *, boost::optional< std::list< std::shared_ptr< Analyzer::Expr >>> simple_quals)
static ExpressionRange makeIntRange(const int64_t int_min, const int64_t int_max, const int64_t bucket, const bool has_nulls)
#define VLOGGING(n)
Definition: Logger.h:204
static std::string getHashTypeString(HashType ht) noexcept
Definition: HashJoin.h:133
PerfectJoinHashTable(const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const Analyzer::ColumnVar *col_var, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, const ExpressionRange &col_range, ColumnCacheMap &column_cache, Executor *executor, const int device_count)
size_t getNormalizedHashEntryCount() const
#define CHECK(condition)
Definition: Logger.h:206
Definition: sqldefs.h:31
if(yyssp >=yyss+yystacksize-1)
InnerOuter get_cols(const Analyzer::BinOper *qual_bin_oper, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables)
Definition: HashJoin.cpp:550
#define VLOG(n)
Definition: Logger.h:300

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Data_Namespace::MemoryLevel PerfectJoinHashTable::getMemoryLevel ( ) const
inlineoverridevirtualnoexcept

Implements HashJoin.

Definition at line 87 of file PerfectJoinHashTable.h.

References memory_level_.

87  {
88  return memory_level_;
89  };
const Data_Namespace::MemoryLevel memory_level_
int PerfectJoinHashTable::initHashTableForDevice ( const ChunkKey chunk_key,
const JoinColumn join_column,
const InnerOuter cols,
const HashType  layout,
const Data_Namespace::MemoryLevel  effective_memory_level,
const int  device_id 
)
private

Definition at line 486 of file PerfectJoinHashTable.cpp.

References PerfectJoinHashTableBuilder::allocateDeviceMemory(), CHECK, CHECK_EQ, CHECK_LE, CHECK_LT, col_range_, copy_to_gpu(), CPU, cpu_hash_table_buff_mutex_, Data_Namespace::CPU_LEVEL, DEBUG_TIMER, device_count_, executor_, anonymous_namespace{PerfectJoinHashTable.cpp}::get_bucketized_hash_entry_info(), PerfectJoinHashTableBuilder::getHashTable(), GPU, Data_Namespace::GPU_LEVEL, HashJoin::hash_tables_for_device_, initHashTableOnCpuFromCache(), PerfectJoinHashTableBuilder::initOneToManyHashTableOnCpu(), PerfectJoinHashTableBuilder::initOneToOneHashTableOnCpu(), isBitwiseEq(), join_type_, memory_level_, JoinColumn::num_elems, OneToOne, putHashTableOnCpuToCache(), shardCount(), and UNREACHABLE.

Referenced by reifyForDevice().

492  {
493  auto timer = DEBUG_TIMER(__func__);
494  const auto inner_col = cols.first;
495  CHECK(inner_col);
496 
497  auto hash_entry_info = get_bucketized_hash_entry_info(
498  inner_col->get_type_info(), col_range_, isBitwiseEq());
499  if (!hash_entry_info && layout == HashType::OneToOne) {
500  // TODO: what is this for?
501  return 0;
502  }
503 #ifndef HAVE_CUDA
504  CHECK_EQ(Data_Namespace::CPU_LEVEL, effective_memory_level);
505 #endif
506  int err{0};
507  const int32_t hash_join_invalid_val{-1};
508  if (effective_memory_level == Data_Namespace::CPU_LEVEL) {
509  CHECK(!chunk_key.empty());
510 
511  auto hash_table = initHashTableOnCpuFromCache(chunk_key, join_column.num_elems, cols);
512  {
513  std::lock_guard<std::mutex> cpu_hash_table_buff_lock(cpu_hash_table_buff_mutex_);
514  if (!hash_table) {
515  PerfectJoinHashTableBuilder builder(executor_->catalog_);
516  if (layout == HashType::OneToOne) {
517  builder.initOneToOneHashTableOnCpu(join_column,
518  col_range_,
519  isBitwiseEq(),
520  cols,
521  join_type_,
522  layout,
523  hash_entry_info,
524  hash_join_invalid_val,
525  executor_);
526  hash_table = builder.getHashTable();
527  } else {
528  builder.initOneToManyHashTableOnCpu(join_column,
529  col_range_,
530  isBitwiseEq(),
531  cols,
532  hash_entry_info,
533  hash_join_invalid_val,
534  executor_);
535  hash_table = builder.getHashTable();
536  }
537  } else {
538  if (layout == HashType::OneToOne &&
539  hash_table->getHashTableBufferSize(ExecutorDeviceType::CPU) >
540  hash_entry_info.getNormalizedHashEntryCount() * sizeof(int32_t)) {
541  // TODO: can this ever happen?
542  // Too many hash entries, need to retry with a 1:many table
543  throw NeedsOneToManyHash();
544  }
545  }
546  }
547  if (inner_col->get_table_id() > 0) {
548  putHashTableOnCpuToCache(chunk_key, join_column.num_elems, hash_table, cols);
549  }
550  // Transfer the hash table on the GPU if we've only built it on CPU
551  // but the query runs on GPU (join on dictionary encoded columns).
553 #ifdef HAVE_CUDA
554  const auto& ti = inner_col->get_type_info();
555  CHECK(ti.is_string());
556  auto catalog = executor_->getCatalog();
557  CHECK(catalog);
558  auto& data_mgr = catalog->getDataMgr();
559  std::lock_guard<std::mutex> cpu_hash_table_buff_lock(cpu_hash_table_buff_mutex_);
560 
561  PerfectJoinHashTableBuilder gpu_builder(executor_->catalog_);
562  gpu_builder.allocateDeviceMemory(join_column,
563  hash_table->getLayout(),
564  hash_entry_info,
565  shardCount(),
566  device_id,
567  device_count_);
568  std::shared_ptr<PerfectHashTable> gpu_hash_table = gpu_builder.getHashTable();
569  CHECK(gpu_hash_table);
570  auto gpu_buffer_ptr = gpu_hash_table->getGpuBuffer();
571  CHECK(gpu_buffer_ptr);
572 
573  CHECK(hash_table);
574  // GPU size returns reserved size
575  CHECK_LE(hash_table->getHashTableBufferSize(ExecutorDeviceType::CPU),
576  gpu_hash_table->getHashTableBufferSize(ExecutorDeviceType::GPU));
577  copy_to_gpu(&data_mgr,
578  reinterpret_cast<CUdeviceptr>(gpu_buffer_ptr),
579  hash_table->getCpuBuffer(),
580  hash_table->getHashTableBufferSize(ExecutorDeviceType::CPU),
581  device_id);
582  CHECK_LT(size_t(device_id), hash_tables_for_device_.size());
583  hash_tables_for_device_[device_id] = std::move(gpu_hash_table);
584 #else
585  UNREACHABLE();
586 #endif
587  } else {
588  CHECK(hash_table);
589  CHECK_LT(size_t(device_id), hash_tables_for_device_.size());
590  hash_tables_for_device_[device_id] = hash_table;
591  }
592  } else {
593 #ifdef HAVE_CUDA
594  PerfectJoinHashTableBuilder builder(executor_->catalog_);
595  CHECK_EQ(Data_Namespace::GPU_LEVEL, effective_memory_level);
596  builder.allocateDeviceMemory(
597  join_column, layout, hash_entry_info, shardCount(), device_id, device_count_);
598  builder.initHashTableOnGpu(chunk_key,
599  join_column,
600  col_range_,
601  isBitwiseEq(),
602  cols,
603  join_type_,
604  layout,
605  hash_entry_info,
606  shardCount(),
607  hash_join_invalid_val,
608  device_id,
610  executor_);
611  CHECK_LT(size_t(device_id), hash_tables_for_device_.size());
612  hash_tables_for_device_[device_id] = builder.getHashTable();
613 #else
614  UNREACHABLE();
615 #endif
616  }
617 
618  return err;
619 }
#define CHECK_EQ(x, y)
Definition: Logger.h:214
HashEntryInfo get_bucketized_hash_entry_info(SQLTypeInfo const &context_ti, ExpressionRange const &col_range, bool const is_bw_eq)
const Data_Namespace::MemoryLevel memory_level_
size_t num_elems
std::vector< std::shared_ptr< HashTable > > hash_tables_for_device_
Definition: HashJoin.h:270
#define UNREACHABLE()
Definition: Logger.h:250
void copy_to_gpu(Data_Namespace::DataMgr *data_mgr, CUdeviceptr dst, const void *src, const size_t num_bytes, const int device_id)
Definition: GpuMemUtils.cpp:30
std::shared_ptr< PerfectHashTable > initHashTableOnCpuFromCache(const ChunkKey &chunk_key, const size_t num_elements, const InnerOuter &cols)
#define CHECK_LT(x, y)
Definition: Logger.h:216
#define CHECK_LE(x, y)
Definition: Logger.h:217
#define CHECK(condition)
Definition: Logger.h:206
#define DEBUG_TIMER(name)
Definition: Logger.h:322
void putHashTableOnCpuToCache(const ChunkKey &chunk_key, const size_t num_elements, HashTableCacheValue hash_table, const InnerOuter &cols)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::shared_ptr< PerfectHashTable > PerfectJoinHashTable::initHashTableOnCpuFromCache ( const ChunkKey chunk_key,
const size_t  num_elements,
const InnerOuter cols 
)
private

Definition at line 646 of file PerfectJoinHashTable.cpp.

References CHECK_GE, col_range_, DEBUG_TIMER, hash_table_cache_, join_type_, and qual_bin_oper_.

Referenced by initHashTableForDevice().

649  {
650  auto timer = DEBUG_TIMER(__func__);
651  CHECK_GE(chunk_key.size(), size_t(2));
652  if (chunk_key[1] < 0) {
653  // Do not cache hash tables over intermediate results
654  return nullptr;
655  }
656  const auto outer_col = dynamic_cast<const Analyzer::ColumnVar*>(cols.second);
657  JoinHashTableCacheKey cache_key{col_range_,
658  *cols.first,
659  outer_col ? *outer_col : *cols.first,
660  num_elements,
661  chunk_key,
662  qual_bin_oper_->get_optype(),
663  join_type_};
664  auto hash_table_opt = (hash_table_cache_->get(cache_key));
665  return hash_table_opt ? *hash_table_opt : nullptr;
666 }
std::shared_ptr< Analyzer::BinOper > qual_bin_oper_
#define CHECK_GE(x, y)
Definition: Logger.h:219
static std::unique_ptr< HashTableCache< JoinHashTableCacheKey, HashTableCacheValue > > hash_table_cache_
#define DEBUG_TIMER(name)
Definition: Logger.h:322

+ Here is the caller graph for this function:

bool PerfectJoinHashTable::isBitwiseEq ( ) const
private

Definition at line 987 of file PerfectJoinHashTable.cpp.

References kBW_EQ, and qual_bin_oper_.

Referenced by codegenMatchingSet(), codegenSlot(), fetchColumnsForDevice(), getHashJoinArgs(), and initHashTableForDevice().

987  {
988  return qual_bin_oper_->get_optype() == kBW_EQ;
989 }
std::shared_ptr< Analyzer::BinOper > qual_bin_oper_
Definition: sqldefs.h:31

+ Here is the caller graph for this function:

size_t PerfectJoinHashTable::offsetBufferOff ( ) const
overridevirtualnoexcept

Implements HashJoin.

Definition at line 795 of file PerfectJoinHashTable.cpp.

Referenced by toSet(), and toString().

795  {
796  return 0;
797 }

+ Here is the caller graph for this function:

size_t PerfectJoinHashTable::payloadBufferOff ( ) const
overridevirtualnoexcept

Implements HashJoin.

Definition at line 803 of file PerfectJoinHashTable.cpp.

References getComponentBufferSize().

Referenced by toSet(), and toString().

803  {
804  return 2 * getComponentBufferSize();
805 }
size_t getComponentBufferSize() const noexceptoverride

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void PerfectJoinHashTable::putHashTableOnCpuToCache ( const ChunkKey chunk_key,
const size_t  num_elements,
HashTableCacheValue  hash_table,
const InnerOuter cols 
)
private

Definition at line 668 of file PerfectJoinHashTable.cpp.

References CHECK, CHECK_GE, col_range_, hash_table_cache_, join_type_, and qual_bin_oper_.

Referenced by initHashTableForDevice().

671  {
672  CHECK_GE(chunk_key.size(), size_t(2));
673  if (chunk_key[1] < 0) {
674  // Do not cache hash tables over intermediate results
675  return;
676  }
677  const auto outer_col = dynamic_cast<const Analyzer::ColumnVar*>(cols.second);
678  JoinHashTableCacheKey cache_key{col_range_,
679  *cols.first,
680  outer_col ? *outer_col : *cols.first,
681  num_elements,
682  chunk_key,
683  qual_bin_oper_->get_optype(),
684  join_type_};
686  CHECK(hash_table && !hash_table->getGpuBuffer());
687  hash_table_cache_->insert(cache_key, hash_table);
688 }
std::shared_ptr< Analyzer::BinOper > qual_bin_oper_
#define CHECK_GE(x, y)
Definition: Logger.h:219
static std::unique_ptr< HashTableCache< JoinHashTableCacheKey, HashTableCacheValue > > hash_table_cache_
#define CHECK(condition)
Definition: Logger.h:206

+ Here is the caller graph for this function:

void PerfectJoinHashTable::reify ( )
private

Definition at line 303 of file PerfectJoinHashTable.cpp.

References catalog_, CHECK_EQ, CHECK_LT, HashJoin::checkHashJoinReplicationConstraint(), DEBUG_TIMER, device_count_, executor_, fetchColumnsForDevice(), HashJoin::freeHashBufferMemory(), genHashTableKey(), anonymous_namespace{HashJoin.cpp}::get_cols(), get_shard_count(), Catalog_Namespace::Catalog::getDataMgr(), getInnerQueryInfo(), Data_Namespace::GPU_LEVEL, hash_type_, InputTableInfo::info, inner_outer_pairs_, memory_level_, OneToMany, only_shards_for_device(), qual_bin_oper_, reifyForDevice(), shardCount(), and logger::thread_id().

303  {
304  auto timer = DEBUG_TIMER(__func__);
306  catalog_ = const_cast<Catalog_Namespace::Catalog*>(executor_->getCatalog());
307  const auto cols =
308  get_cols(qual_bin_oper_.get(), *catalog_, executor_->temporary_tables_);
309  const auto inner_col = cols.first;
311  inner_col->get_table_id(),
313  executor_);
314  const auto& query_info = getInnerQueryInfo(inner_col).info;
315  if (query_info.fragments.empty()) {
316  return;
317  }
318  if (query_info.getNumTuplesUpperBound() >
319  static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
320  throw TooManyHashEntries();
321  }
322  std::vector<std::future<void>> init_threads;
323  const int shard_count = shardCount();
324 
325  inner_outer_pairs_.push_back(cols);
326  CHECK_EQ(inner_outer_pairs_.size(), size_t(1));
327 
328  std::vector<ColumnsForDevice> columns_per_device;
329  std::vector<std::unique_ptr<CudaAllocator>> dev_buff_owners;
330  try {
331  auto& data_mgr = catalog_->getDataMgr();
333  for (int device_id = 0; device_id < device_count_; ++device_id) {
334  dev_buff_owners.emplace_back(
335  std::make_unique<CudaAllocator>(&data_mgr, device_id));
336  }
337  }
338  for (int device_id = 0; device_id < device_count_; ++device_id) {
339  const auto fragments =
340  shard_count
341  ? only_shards_for_device(query_info.fragments, device_id, device_count_)
342  : query_info.fragments;
343  const auto columns_for_device =
344  fetchColumnsForDevice(fragments,
345  device_id,
347  ? dev_buff_owners[device_id].get()
348  : nullptr);
349  columns_per_device.push_back(columns_for_device);
350  const auto hash_table_key = genHashTableKey(
351  fragments, inner_outer_pairs_.front().second, inner_outer_pairs_.front().first);
352  init_threads.push_back(std::async(std::launch::async,
354  this,
355  hash_table_key,
356  columns_per_device[device_id],
357  hash_type_,
358  device_id,
359  logger::thread_id()));
360  }
361  for (auto& init_thread : init_threads) {
362  init_thread.wait();
363  }
364  for (auto& init_thread : init_threads) {
365  init_thread.get();
366  }
367 
368  } catch (const NeedsOneToManyHash& e) {
371  init_threads.clear();
373  CHECK_EQ(dev_buff_owners.size(), size_t(device_count_));
374  }
375  CHECK_EQ(columns_per_device.size(), size_t(device_count_));
376  for (int device_id = 0; device_id < device_count_; ++device_id) {
377  const auto fragments =
378  shard_count
379  ? only_shards_for_device(query_info.fragments, device_id, device_count_)
380  : query_info.fragments;
381  const auto hash_table_key = genHashTableKey(
382  fragments, inner_outer_pairs_.front().second, inner_outer_pairs_.front().first);
383  init_threads.push_back(std::async(std::launch::async,
385  this,
386  hash_table_key,
387  columns_per_device[device_id],
388  hash_type_,
389  device_id,
390  logger::thread_id()));
391  }
392  for (auto& init_thread : init_threads) {
393  init_thread.wait();
394  }
395  for (auto& init_thread : init_threads) {
396  init_thread.get();
397  }
398  }
399 }
void reifyForDevice(const ChunkKey &hash_table_key, const ColumnsForDevice &columns_for_device, const HashType layout, const int device_id, const logger::ThreadId parent_thread_id)
#define CHECK_EQ(x, y)
Definition: Logger.h:214
Fragmenter_Namespace::TableInfo info
Definition: InputMetadata.h:35
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:102
const Data_Namespace::MemoryLevel memory_level_
std::shared_ptr< Analyzer::BinOper > qual_bin_oper_
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:223
const InputTableInfo & getInnerQueryInfo(const Analyzer::ColumnVar *inner_col) const
ColumnsForDevice fetchColumnsForDevice(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, DeviceAllocator *dev_buff_owner)
void freeHashBufferMemory()
Definition: HashJoin.h:257
std::vector< Fragmenter_Namespace::FragmentInfo > only_shards_for_device(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, const int device_count)
static void checkHashJoinReplicationConstraint(const int table_id, const size_t shard_count, const Executor *executor)
Definition: HashJoin.cpp:532
std::vector< InnerOuter > inner_outer_pairs_
#define CHECK_LT(x, y)
Definition: Logger.h:216
ThreadId thread_id()
Definition: Logger.cpp:732
Catalog_Namespace::Catalog * catalog_
#define DEBUG_TIMER(name)
Definition: Logger.h:322
ChunkKey genHashTableKey(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const Analyzer::Expr *outer_col, const Analyzer::ColumnVar *inner_col) const
size_t get_shard_count(const Analyzer::BinOper *join_condition, const Executor *executor)
Definition: HashJoin.cpp:560
InnerOuter get_cols(const Analyzer::BinOper *qual_bin_oper, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables)
Definition: HashJoin.cpp:550

+ Here is the call graph for this function:

void PerfectJoinHashTable::reifyForDevice ( const ChunkKey hash_table_key,
const ColumnsForDevice columns_for_device,
const HashType  layout,
const int  device_id,
const logger::ThreadId  parent_thread_id 
)
private

Definition at line 451 of file PerfectJoinHashTable.cpp.

References CHECK_EQ, DEBUG_TIMER_NEW_THREAD, getEffectiveMemoryLevel(), initHashTableForDevice(), inner_outer_pairs_, ColumnsForDevice::join_columns, OneToMany, OneToOne, and to_string().

Referenced by reify().

455  {
456  DEBUG_TIMER_NEW_THREAD(parent_thread_id);
457  const auto effective_memory_level = getEffectiveMemoryLevel(inner_outer_pairs_);
458 
459  CHECK_EQ(columns_for_device.join_columns.size(), size_t(1));
460  CHECK_EQ(inner_outer_pairs_.size(), size_t(1));
461  auto& join_column = columns_for_device.join_columns.front();
462  if (layout == HashType::OneToOne) {
463  const auto err = initHashTableForDevice(hash_table_key,
464  join_column,
465  inner_outer_pairs_.front(),
466  layout,
467  effective_memory_level,
468  device_id);
469  if (err) {
470  throw NeedsOneToManyHash();
471  }
472  } else {
473  const auto err = initHashTableForDevice(hash_table_key,
474  join_column,
475  inner_outer_pairs_.front(),
477  effective_memory_level,
478  device_id);
479  if (err) {
480  throw std::runtime_error("Unexpected error building one to many hash table: " +
481  std::to_string(err));
482  }
483  }
484 }
#define CHECK_EQ(x, y)
Definition: Logger.h:214
Data_Namespace::MemoryLevel getEffectiveMemoryLevel(const std::vector< InnerOuter > &inner_outer_pairs) const
#define DEBUG_TIMER_NEW_THREAD(parent_thread_id)
Definition: Logger.h:327
std::string to_string(char const *&&v)
std::vector< InnerOuter > inner_outer_pairs_
int initHashTableForDevice(const ChunkKey &chunk_key, const JoinColumn &join_column, const InnerOuter &cols, const HashType layout, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id)
const std::vector< JoinColumn > join_columns
Definition: HashJoin.h:79

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t PerfectJoinHashTable::shardCount ( ) const
private

Definition at line 981 of file PerfectJoinHashTable.cpp.

References executor_, get_shard_count(), Data_Namespace::GPU_LEVEL, memory_level_, and qual_bin_oper_.

Referenced by codegenMatchingSet(), codegenSlot(), initHashTableForDevice(), and reify().

981  {
984  : 0;
985 }
const Data_Namespace::MemoryLevel memory_level_
std::shared_ptr< Analyzer::BinOper > qual_bin_oper_
size_t get_shard_count(const Analyzer::BinOper *join_condition, const Executor *executor)
Definition: HashJoin.cpp:560

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::set< DecodedJoinHashBufferEntry > PerfectJoinHashTable::toSet ( const ExecutorDeviceType  device_type,
const int  device_id 
) const
overridevirtual

Implements HashJoin.

Definition at line 861 of file PerfectJoinHashTable.cpp.

References copy_from_gpu(), countBufferOff(), executor_, getHashTableForDevice(), HashJoin::getJoinHashBuffer(), HashJoin::getJoinHashBufferSize(), GPU, offsetBufferOff(), payloadBufferOff(), and HashTable::toSet().

863  {
864  auto buffer = getJoinHashBuffer(device_type, device_id);
865  auto buffer_size = getJoinHashBufferSize(device_type, device_id);
866  auto hash_table = getHashTableForDevice(device_id);
867 #ifdef HAVE_CUDA
868  std::unique_ptr<int8_t[]> buffer_copy;
869  if (device_type == ExecutorDeviceType::GPU) {
870  buffer_copy = std::make_unique<int8_t[]>(buffer_size);
871 
872  copy_from_gpu(&executor_->getCatalog()->getDataMgr(),
873  buffer_copy.get(),
874  reinterpret_cast<CUdeviceptr>(reinterpret_cast<int8_t*>(buffer)),
875  buffer_size,
876  device_id);
877  }
878  auto ptr1 = buffer_copy ? buffer_copy.get() : reinterpret_cast<const int8_t*>(buffer);
879 #else
880  auto ptr1 = reinterpret_cast<const int8_t*>(buffer);
881 #endif // HAVE_CUDA
882  auto ptr2 = ptr1 + offsetBufferOff();
883  auto ptr3 = ptr1 + countBufferOff();
884  auto ptr4 = ptr1 + payloadBufferOff();
885  return HashTable::toSet(0,
886  0,
887  hash_table ? hash_table->getEntryCount() : 0,
888  ptr1,
889  ptr2,
890  ptr3,
891  ptr4,
892  buffer_size);
893 }
int64_t getJoinHashBuffer(const ExecutorDeviceType device_type, const int device_id) const
Definition: HashJoin.h:234
size_t offsetBufferOff() const noexceptoverride
unsigned long long CUdeviceptr
Definition: nocuda.h:27
size_t payloadBufferOff() const noexceptoverride
void copy_from_gpu(Data_Namespace::DataMgr *data_mgr, void *dst, const CUdeviceptr src, const size_t num_bytes, const int device_id)
size_t getJoinHashBufferSize(const ExecutorDeviceType device_type)
Definition: HashJoin.h:220
size_t countBufferOff() const noexceptoverride
static DecodedJoinHashBufferSet toSet(size_t key_component_count, size_t key_component_width, size_t entry_count, const int8_t *ptr1, const int8_t *ptr2, const int8_t *ptr3, const int8_t *ptr4, size_t buffer_size)
Decode hash table into a std::set for easy inspection and validation.
Definition: HashTable.cpp:139
HashTable * getHashTableForDevice(const size_t device_id) const

+ Here is the call graph for this function:

std::string PerfectJoinHashTable::toString ( const ExecutorDeviceType  device_type,
const int  device_id = 0,
bool  raw = false 
) const
overridevirtual

Implements HashJoin.

Definition at line 824 of file PerfectJoinHashTable.cpp.

References copy_from_gpu(), countBufferOff(), executor_, getHashTableForDevice(), HashJoin::getHashTypeString(), HashJoin::getJoinHashBuffer(), HashJoin::getJoinHashBufferSize(), GPU, hash_type_, offsetBufferOff(), payloadBufferOff(), and HashTable::toString().

826  {
827  auto buffer = getJoinHashBuffer(device_type, device_id);
828  auto buffer_size = getJoinHashBufferSize(device_type, device_id);
829  auto hash_table = getHashTableForDevice(device_id);
830 #ifdef HAVE_CUDA
831  std::unique_ptr<int8_t[]> buffer_copy;
832  if (device_type == ExecutorDeviceType::GPU) {
833  buffer_copy = std::make_unique<int8_t[]>(buffer_size);
834 
835  copy_from_gpu(&executor_->getCatalog()->getDataMgr(),
836  buffer_copy.get(),
837  reinterpret_cast<CUdeviceptr>(reinterpret_cast<int8_t*>(buffer)),
838  buffer_size,
839  device_id);
840  }
841  auto ptr1 = buffer_copy ? buffer_copy.get() : reinterpret_cast<const int8_t*>(buffer);
842 #else
843  auto ptr1 = reinterpret_cast<const int8_t*>(buffer);
844 #endif // HAVE_CUDA
845  auto ptr2 = ptr1 + offsetBufferOff();
846  auto ptr3 = ptr1 + countBufferOff();
847  auto ptr4 = ptr1 + payloadBufferOff();
848  return HashTable::toString("perfect",
850  0,
851  0,
852  hash_table ? hash_table->getEntryCount() : 0,
853  ptr1,
854  ptr2,
855  ptr3,
856  ptr4,
857  buffer_size,
858  raw);
859 }
int64_t getJoinHashBuffer(const ExecutorDeviceType device_type, const int device_id) const
Definition: HashJoin.h:234
size_t offsetBufferOff() const noexceptoverride
unsigned long long CUdeviceptr
Definition: nocuda.h:27
size_t payloadBufferOff() const noexceptoverride
void copy_from_gpu(Data_Namespace::DataMgr *data_mgr, void *dst, const CUdeviceptr src, const size_t num_bytes, const int device_id)
static std::string getHashTypeString(HashType ht) noexcept
Definition: HashJoin.h:133
size_t getJoinHashBufferSize(const ExecutorDeviceType device_type)
Definition: HashJoin.h:220
static std::string toString(const std::string &type, const std::string &layout_type, size_t key_component_count, size_t key_component_width, size_t entry_count, const int8_t *ptr1, const int8_t *ptr2, const int8_t *ptr3, const int8_t *ptr4, size_t buffer_size, bool raw=false)
Decode hash table into a human-readable string.
Definition: HashTable.cpp:226
size_t countBufferOff() const noexceptoverride
HashTable * getHashTableForDevice(const size_t device_id) const

+ Here is the call graph for this function:

Member Data Documentation

Catalog_Namespace::Catalog* PerfectJoinHashTable::catalog_
private

Definition at line 134 of file PerfectJoinHashTable.h.

Referenced by fetchColumnsForDevice(), and reify().

ExpressionRange PerfectJoinHashTable::col_range_
private
std::shared_ptr<Analyzer::ColumnVar> PerfectJoinHashTable::col_var_
private

Definition at line 194 of file PerfectJoinHashTable.h.

Referenced by getInnerTableId(), and getInnerTableRteIdx().

ColumnCacheMap& PerfectJoinHashTable::column_cache_
private

Definition at line 202 of file PerfectJoinHashTable.h.

Referenced by fetchColumnsForDevice().

std::mutex PerfectJoinHashTable::cpu_hash_table_buff_mutex_
private

Definition at line 199 of file PerfectJoinHashTable.h.

Referenced by initHashTableForDevice().

const int PerfectJoinHashTable::device_count_
private
HashType PerfectJoinHashTable::hash_type_
private

Definition at line 197 of file PerfectJoinHashTable.h.

Referenced by getHashType(), reify(), and toString().

std::vector<InnerOuter> PerfectJoinHashTable::inner_outer_pairs_
private

Definition at line 133 of file PerfectJoinHashTable.h.

Referenced by fetchColumnsForDevice(), reify(), and reifyForDevice().

const JoinType PerfectJoinHashTable::join_type_
private
const Data_Namespace::MemoryLevel PerfectJoinHashTable::memory_level_
private
std::shared_ptr<Analyzer::BinOper> PerfectJoinHashTable::qual_bin_oper_
private
const std::vector<InputTableInfo>& PerfectJoinHashTable::query_infos_
private

Definition at line 195 of file PerfectJoinHashTable.h.

Referenced by getInnerQueryInfo().


The documentation for this class was generated from the following files: