OmniSciDB  c1a53651b2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
PerfectJoinHashTable Class Reference

#include <PerfectJoinHashTable.h>

+ Inheritance diagram for PerfectJoinHashTable:
+ Collaboration diagram for PerfectJoinHashTable:

Classes

struct  AlternativeCacheKeyForPerfectHashJoin
 

Public Member Functions

std::string toString (const ExecutorDeviceType device_type, const int device_id=0, bool raw=false) const override
 
std::set
< DecodedJoinHashBufferEntry
toSet (const ExecutorDeviceType device_type, const int device_id) const override
 
llvm::Value * codegenSlot (const CompilationOptions &, const size_t) override
 
HashJoinMatchingSet codegenMatchingSet (const CompilationOptions &, const size_t) override
 
shared::TableKey getInnerTableId () const noexceptoverride
 
int getInnerTableRteIdx () const noexceptoverride
 
HashType getHashType () const noexceptoverride
 
Data_Namespace::MemoryLevel getMemoryLevel () const noexceptoverride
 
int getDeviceCount () const noexceptoverride
 
size_t offsetBufferOff () const noexceptoverride
 
size_t countBufferOff () const noexceptoverride
 
size_t payloadBufferOff () const noexceptoverride
 
std::string getHashJoinType () const final
 
const RegisteredQueryHintgetRegisteredQueryHint ()
 
BucketizedHashEntryInfo getHashEntryInfo () const
 
size_t getNormalizedHashEntryCount () const
 
virtual ~PerfectJoinHashTable ()
 
- Public Member Functions inherited from HashJoin
virtual std::string toStringFlat64 (const ExecutorDeviceType device_type, const int device_id) const
 
virtual std::string toStringFlat32 (const ExecutorDeviceType device_type, const int device_id) const
 
JoinColumn fetchJoinColumn (const Analyzer::ColumnVar *hash_col, const std::vector< Fragmenter_Namespace::FragmentInfo > &fragment_info, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks_owner, DeviceAllocator *dev_buff_owner, std::vector< std::shared_ptr< void >> &malloc_owner, Executor *executor, ColumnCacheMap *column_cache)
 
HashTablegetHashTableForDevice (const size_t device_id) const
 
size_t getJoinHashBufferSize (const ExecutorDeviceType device_type)
 
size_t getJoinHashBufferSize (const ExecutorDeviceType device_type, const int device_id) const
 
int8_t * getJoinHashBuffer (const ExecutorDeviceType device_type, const int device_id) const
 
void freeHashBufferMemory ()
 

Static Public Member Functions

static std::shared_ptr
< PerfectJoinHashTable
getInstance (const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hints, const TableIdToNodeMap &table_id_to_node_map)
 Make hash table from an in-flight SQL query's parse tree etc. More...
 
static HashtableRecyclergetHashTableCache ()
 
static HashingSchemeRecyclergetHashingSchemeCache ()
 
static void invalidateCache ()
 
static void markCachedItemAsDirty (size_t table_key)
 
- Static Public Member Functions inherited from HashJoin
static bool layoutRequiresAdditionalBuffers (HashType layout) noexcept
 
static std::string getHashTypeString (HashType ht) noexcept
 
static HashJoinMatchingSet codegenMatchingSet (const std::vector< llvm::Value * > &hash_join_idx_args_in, const bool is_sharded, const bool col_is_nullable, const bool is_bw_eq, const int64_t sub_buff_size, Executor *executor, const bool is_bucketized=false)
 
static llvm::Value * codegenHashTableLoad (const size_t table_idx, Executor *executor)
 
static std::shared_ptr< HashJoingetInstance (const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)
 Make hash table from an in-flight SQL query's parse tree etc. More...
 
static std::shared_ptr< HashJoingetSyntheticInstance (std::string_view table1, std::string_view column1, const Catalog_Namespace::Catalog &catalog1, std::string_view table2, std::string_view column2, const Catalog_Namespace::Catalog &catalog2, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
 Make hash table from named tables and columns (such as for testing). More...
 
static std::shared_ptr< HashJoingetSyntheticInstance (const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
 Make hash table from named tables and columns (such as for testing). More...
 
static std::pair< std::string,
std::shared_ptr< HashJoin > > 
getSyntheticInstance (std::vector< std::shared_ptr< Analyzer::BinOper >>, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
 
static shared::TableKey getInnerTableId (const std::vector< InnerOuter > &inner_outer_pairs)
 
static bool canAccessHashTable (bool allow_hash_table_recycling, bool invalid_cache_key, JoinType join_type)
 
static void checkHashJoinReplicationConstraint (const shared::TableKey &table_key, const size_t shard_count, const Executor *executor)
 
static std::pair< InnerOuter,
InnerOuterStringOpInfos
normalizeColumnPair (const Analyzer::Expr *lhs, const Analyzer::Expr *rhs, const TemporaryTables *temporary_tables, const bool is_overlaps_join=false)
 
template<typename T >
static const T * getHashJoinColumn (const Analyzer::Expr *expr)
 
static std::pair< std::vector
< InnerOuter >, std::vector
< InnerOuterStringOpInfos > > 
normalizeColumnPairs (const Analyzer::BinOper *condition, const TemporaryTables *temporary_tables)
 
static std::vector< int > collectFragmentIds (const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments)
 
static CompositeKeyInfo getCompositeKeyInfo (const std::vector< InnerOuter > &inner_outer_pairs, const Executor *executor, const std::vector< InnerOuterStringOpInfos > &inner_outer_string_op_infos_pairs={})
 
static std::vector< const
StringDictionaryProxy::IdMap * > 
translateCompositeStrDictProxies (const CompositeKeyInfo &composite_key_info, const std::vector< InnerOuterStringOpInfos > &string_op_infos_for_keys, const Executor *executor)
 
static std::pair< const
StringDictionaryProxy
*, StringDictionaryProxy * > 
getStrDictProxies (const InnerOuter &cols, const Executor *executor, const bool has_string_ops)
 
static const
StringDictionaryProxy::IdMap
translateInnerToOuterStrDictProxies (const InnerOuter &cols, const InnerOuterStringOpInfos &inner_outer_string_op_infos, ExpressionRange &old_col_range, const Executor *executor)
 

Private Member Functions

bool isOneToOneHashPossible (const std::vector< ColumnsForDevice > &columns_per_device) const
 
ColumnsForDevice fetchColumnsForDevice (const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, DeviceAllocator *dev_buff_owner)
 
void reifyForDevice (const ChunkKey &hash_table_key, const ColumnsForDevice &columns_for_device, const HashType layout, const int device_id, const logger::ThreadLocalIds)
 
int initHashTableForDevice (const ChunkKey &chunk_key, const JoinColumn &join_column, const InnerOuter &cols, const HashType layout, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id)
 
Data_Namespace::MemoryLevel getEffectiveMemoryLevel (const std::vector< InnerOuter > &inner_outer_pairs) const
 
 PerfectJoinHashTable (const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const Analyzer::ColumnVar *col_var, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, const ExpressionRange &col_range, const ExpressionRange &rhs_source_col_range, const BucketizedHashEntryInfo hash_entry_info, ColumnCacheMap &column_cache, Executor *executor, const int device_count, const RegisteredQueryHint &query_hints, const HashTableBuildDagMap &hashtable_build_dag_map, const TableIdToNodeMap &table_id_to_node_map, const InnerOuterStringOpInfos &inner_outer_string_op_infos={})
 
ChunkKey genChunkKey (const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const Analyzer::Expr *outer_col, const Analyzer::ColumnVar *inner_col) const
 
void reify ()
 
std::shared_ptr< PerfectHashTableinitHashTableOnCpuFromCache (QueryPlanHash key, CacheItemType item_type, DeviceIdentifier device_identifier)
 
void putHashTableOnCpuToCache (QueryPlanHash key, CacheItemType item_type, std::shared_ptr< PerfectHashTable > hashtable_ptr, DeviceIdentifier device_identifier, size_t hashtable_building_time)
 
const InputTableInfogetInnerQueryInfo (const Analyzer::ColumnVar *inner_col) const
 
size_t shardCount () const
 
llvm::Value * codegenHashTableLoad (const size_t table_idx)
 
std::vector< llvm::Value * > getHashJoinArgs (llvm::Value *hash_ptr, llvm::Value *key_lvs, const Analyzer::Expr *key_col, const int shard_count, const CompilationOptions &co)
 
bool isBitwiseEq () const override
 
size_t getComponentBufferSize () const noexceptoverride
 
HashTablegetHashTableForDevice (const size_t device_id) const
 
void copyCpuHashTableToGpu (std::shared_ptr< PerfectHashTable > &cpu_hash_table, const int device_id, Data_Namespace::DataMgr *data_mgr)
 

Static Private Member Functions

static QueryPlanHash getAlternativeCacheKey (AlternativeCacheKeyForPerfectHashJoin &info)
 

Private Attributes

std::vector< InnerOuterinner_outer_pairs_
 
std::shared_ptr
< Analyzer::BinOper
qual_bin_oper_
 
const JoinType join_type_
 
std::shared_ptr
< Analyzer::ColumnVar
col_var_
 
const std::vector
< InputTableInfo > & 
query_infos_
 
const Data_Namespace::MemoryLevel memory_level_
 
HashType hash_type_
 
std::mutex cpu_hash_table_buff_mutex_
 
std::mutex str_proxy_translation_mutex_
 
const
StringDictionaryProxy::IdMap
str_proxy_translation_map_ {nullptr}
 
ExpressionRange col_range_
 
ExpressionRange rhs_source_col_range_
 
BucketizedHashEntryInfo hash_entry_info_
 
Executorexecutor_
 
ColumnCacheMapcolumn_cache_
 
const int device_count_
 
RegisteredQueryHint query_hints_
 
bool needs_dict_translation_
 
HashTableBuildDagMap hashtable_build_dag_map_
 
std::vector< QueryPlanHashhashtable_cache_key_
 
HashtableCacheMetaInfo hashtable_cache_meta_info_
 
std::unordered_set< size_t > table_keys_
 
const TableIdToNodeMap table_id_to_node_map_
 
const InnerOuterStringOpInfos inner_outer_string_op_infos_
 

Static Private Attributes

static std::unique_ptr
< HashtableRecycler
hash_table_cache_
 
static std::unique_ptr
< HashingSchemeRecycler
hash_table_layout_cache_
 

Additional Inherited Members

- Static Protected Member Functions inherited from HashJoin
static llvm::Value * codegenColOrStringOper (const Analyzer::Expr *col_or_string_oper, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos, CodeGenerator &code_generator, const CompilationOptions &co)
 
- Protected Attributes inherited from HashJoin
std::vector< std::shared_ptr
< HashTable > > 
hash_tables_for_device_
 

Detailed Description

Definition at line 50 of file PerfectJoinHashTable.h.

Constructor & Destructor Documentation

virtual PerfectJoinHashTable::~PerfectJoinHashTable ( )
inlinevirtual

Definition at line 145 of file PerfectJoinHashTable.h.

145 {}
PerfectJoinHashTable::PerfectJoinHashTable ( const std::shared_ptr< Analyzer::BinOper qual_bin_oper,
const Analyzer::ColumnVar col_var,
const std::vector< InputTableInfo > &  query_infos,
const Data_Namespace::MemoryLevel  memory_level,
const JoinType  join_type,
const HashType  preferred_hash_type,
const ExpressionRange col_range,
const ExpressionRange rhs_source_col_range,
const BucketizedHashEntryInfo  hash_entry_info,
ColumnCacheMap column_cache,
Executor executor,
const int  device_count,
const RegisteredQueryHint query_hints,
const HashTableBuildDagMap hashtable_build_dag_map,
const TableIdToNodeMap table_id_to_node_map,
const InnerOuterStringOpInfos inner_outer_string_op_infos = {} 
)
inlineprivate

Definition at line 175 of file PerfectJoinHashTable.h.

Referenced by getInstance().

190  {})
191  : qual_bin_oper_(qual_bin_oper)
192  , join_type_(join_type)
193  , col_var_(std::dynamic_pointer_cast<Analyzer::ColumnVar>(col_var->deep_copy()))
194  , query_infos_(query_infos)
195  , memory_level_(memory_level)
196  , hash_type_(preferred_hash_type)
197  , col_range_(col_range)
198  , rhs_source_col_range_(rhs_source_col_range)
199  , hash_entry_info_(hash_entry_info)
200  , executor_(executor)
201  , column_cache_(column_cache)
202  , device_count_(device_count)
203  , query_hints_(query_hints)
204  , needs_dict_translation_(false)
205  , hashtable_build_dag_map_(hashtable_build_dag_map)
206  , table_id_to_node_map_(table_id_to_node_map)
207  , inner_outer_string_op_infos_(inner_outer_string_op_infos) {
211  }
BucketizedHashEntryInfo hash_entry_info_
ExpressionRange rhs_source_col_range_
const Data_Namespace::MemoryLevel memory_level_
std::shared_ptr< Analyzer::BinOper > qual_bin_oper_
const TableIdToNodeMap table_id_to_node_map_
std::vector< std::shared_ptr< HashTable > > hash_tables_for_device_
Definition: HashJoin.h:363
HashTableBuildDagMap hashtable_build_dag_map_
std::shared_ptr< Analyzer::Expr > deep_copy() const override
Definition: Analyzer.cpp:66
#define CHECK_GT(x, y)
Definition: Logger.h:305
const std::vector< InputTableInfo > & query_infos_
ColumnCacheMap & column_cache_
std::shared_ptr< Analyzer::ColumnVar > col_var_
RegisteredQueryHint query_hints_
const InnerOuterStringOpInfos inner_outer_string_op_infos_
ExpressionRangeType getType() const
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the caller graph for this function:

Member Function Documentation

llvm::Value * PerfectJoinHashTable::codegenHashTableLoad ( const size_t  table_idx)
private

Definition at line 951 of file PerfectJoinHashTable.cpp.

References AUTOMATIC_IR_METADATA, CHECK, HashJoin::codegenHashTableLoad(), executor_, and get_arg_by_name().

Referenced by codegenMatchingSet(), and codegenSlot().

951  {
952  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
953  const auto hash_ptr = HashJoin::codegenHashTableLoad(table_idx, executor_);
954  if (hash_ptr->getType()->isIntegerTy(64)) {
955  return hash_ptr;
956  }
957  CHECK(hash_ptr->getType()->isPointerTy());
958  return executor_->cgen_state_->ir_builder_.CreatePtrToInt(
959  get_arg_by_name(executor_->cgen_state_->row_func_, "join_hash_tables"),
960  llvm::Type::getInt64Ty(executor_->cgen_state_->context_));
961 }
static llvm::Value * codegenHashTableLoad(const size_t table_idx, Executor *executor)
Definition: HashJoin.cpp:257
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:167
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

HashJoinMatchingSet PerfectJoinHashTable::codegenMatchingSet ( const CompilationOptions co,
const size_t  index 
)
overridevirtual

Implements HashJoin.

Definition at line 1017 of file PerfectJoinHashTable.cpp.

References AUTOMATIC_IR_METADATA, CHECK, HashJoin::codegenColOrStringOper(), codegenHashTableLoad(), HashJoin::codegenMatchingSet(), executor_, anonymous_namespace{HashJoin.cpp}::get_cols(), get_max_rte_scan_table(), getComponentBufferSize(), getHashJoinArgs(), inner_outer_string_op_infos_, isBitwiseEq(), kDATE, qual_bin_oper_, self_join_not_covered_by_left_deep_tree(), and shardCount().

1018  {
1019  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1020  const auto cols = get_cols(qual_bin_oper_.get(), executor_->temporary_tables_).first;
1021  auto key_col = cols.second;
1022  CHECK(key_col);
1023  auto val_col = cols.first;
1024  CHECK(val_col);
1025  auto pos_ptr = codegenHashTableLoad(index);
1026  CHECK(pos_ptr);
1027  const int shard_count = shardCount();
1028  const auto key_col_var = dynamic_cast<const Analyzer::ColumnVar*>(key_col);
1029  const auto val_col_var = dynamic_cast<const Analyzer::ColumnVar*>(val_col);
1030  if (key_col_var && val_col_var &&
1032  key_col_var,
1033  val_col_var,
1034  get_max_rte_scan_table(executor_->cgen_state_->scan_idx_to_hash_pos_))) {
1035  throw std::runtime_error(
1036  "Query execution fails because the query contains not supported self-join "
1037  "pattern. We suspect the query requires multiple left-deep join tree due to "
1038  "the "
1039  "join condition of the self-join and is not supported for now. Please consider "
1040  "rewriting table order in "
1041  "FROM clause.");
1042  }
1043  CodeGenerator code_generator(executor_);
1044 
1045  auto key_lv = HashJoin::codegenColOrStringOper(
1046  key_col, inner_outer_string_op_infos_.second, code_generator, co);
1047 
1048  auto hash_join_idx_args = getHashJoinArgs(pos_ptr, key_lv, key_col, shard_count, co);
1049  const int64_t sub_buff_size = getComponentBufferSize();
1050  const auto& key_col_ti = key_col->get_type_info();
1051 
1052  auto bucketize = (key_col_ti.get_type() == kDATE);
1053  return HashJoin::codegenMatchingSet(hash_join_idx_args,
1054  shard_count,
1055  !key_col_ti.get_notnull(),
1056  isBitwiseEq(),
1057  sub_buff_size,
1058  executor_,
1059  bucketize);
1060 }
llvm::Value * codegenHashTableLoad(const size_t table_idx)
virtual HashJoinMatchingSet codegenMatchingSet(const CompilationOptions &, const size_t)=0
bool self_join_not_covered_by_left_deep_tree(const Analyzer::ColumnVar *key_side, const Analyzer::ColumnVar *val_side, const int max_rte_covered)
size_t getComponentBufferSize() const noexceptoverride
std::shared_ptr< Analyzer::BinOper > qual_bin_oper_
InnerOuter get_cols(const Analyzer::BinOper *qual_bin_oper, const TemporaryTables *temporary_tables)
Definition: HashJoin.cpp:1039
static llvm::Value * codegenColOrStringOper(const Analyzer::Expr *col_or_string_oper, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos, CodeGenerator &code_generator, const CompilationOptions &co)
Definition: HashJoin.cpp:545
const int get_max_rte_scan_table(std::unordered_map< int, llvm::Value * > &scan_idx_to_hash_pos)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
std::vector< llvm::Value * > getHashJoinArgs(llvm::Value *hash_ptr, llvm::Value *key_lvs, const Analyzer::Expr *key_col, const int shard_count, const CompilationOptions &co)
Definition: sqltypes.h:70
const InnerOuterStringOpInfos inner_outer_string_op_infos_
#define CHECK(condition)
Definition: Logger.h:291
bool isBitwiseEq() const override

+ Here is the call graph for this function:

llvm::Value * PerfectJoinHashTable::codegenSlot ( const CompilationOptions co,
const size_t  index 
)
overridevirtual

Implements HashJoin.

Definition at line 1196 of file PerfectJoinHashTable.cpp.

References AUTOMATIC_IR_METADATA, CHECK, HashJoin::codegenColOrStringOper(), codegenHashTableLoad(), executor_, anonymous_namespace{HashJoin.cpp}::get_cols(), get_max_rte_scan_table(), Analyzer::Expr::get_type_info(), getHashJoinArgs(), getHashType(), isBitwiseEq(), kDATE, OneToOne, qual_bin_oper_, self_join_not_covered_by_left_deep_tree(), and shardCount().

1197  {
1198  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1199  using namespace std::string_literals;
1200 
1202  const auto cols_and_string_op_infos =
1203  get_cols(qual_bin_oper_.get(), executor_->temporary_tables_);
1204  const auto& cols = cols_and_string_op_infos.first;
1205  const auto& inner_outer_string_op_infos = cols_and_string_op_infos.second;
1206  auto key_col = cols.second;
1207  CHECK(key_col);
1208  auto val_col = cols.first;
1209  CHECK(val_col);
1210  CodeGenerator code_generator(executor_);
1211  const auto key_col_var = dynamic_cast<const Analyzer::ColumnVar*>(key_col);
1212  const auto val_col_var = dynamic_cast<const Analyzer::ColumnVar*>(val_col);
1213  if (key_col_var && val_col_var &&
1215  key_col_var,
1216  val_col_var,
1217  get_max_rte_scan_table(executor_->cgen_state_->scan_idx_to_hash_pos_))) {
1218  throw std::runtime_error(
1219  "Query execution failed because the query contains not supported self-join "
1220  "pattern. We suspect the query requires multiple left-deep join tree due to "
1221  "the join condition of the self-join and is not supported for now. Please "
1222  "consider chaning the table order in the FROM clause.");
1223  }
1224 
1225  auto key_lv = HashJoin::codegenColOrStringOper(
1226  key_col, inner_outer_string_op_infos.second, code_generator, co);
1227 
1228  // CHECK_EQ(size_t(1), key_lvs.size());
1229  auto hash_ptr = codegenHashTableLoad(index);
1230  CHECK(hash_ptr);
1231  const int shard_count = shardCount();
1232  const auto hash_join_idx_args =
1233  getHashJoinArgs(hash_ptr, key_lv, key_col, shard_count, co);
1234 
1235  const auto& key_col_ti = key_col->get_type_info();
1236  std::string fname((key_col_ti.get_type() == kDATE) ? "bucketized_hash_join_idx"s
1237  : "hash_join_idx"s);
1238 
1239  if (isBitwiseEq()) {
1240  fname += "_bitwise";
1241  }
1242  if (shard_count) {
1243  fname += "_sharded";
1244  }
1245 
1246  if (!isBitwiseEq() && !key_col_ti.get_notnull()) {
1247  fname += "_nullable";
1248  }
1249  return executor_->cgen_state_->emitCall(fname, hash_join_idx_args);
1250 }
llvm::Value * codegenHashTableLoad(const size_t table_idx)
bool self_join_not_covered_by_left_deep_tree(const Analyzer::ColumnVar *key_side, const Analyzer::ColumnVar *val_side, const int max_rte_covered)
std::shared_ptr< Analyzer::BinOper > qual_bin_oper_
InnerOuter get_cols(const Analyzer::BinOper *qual_bin_oper, const TemporaryTables *temporary_tables)
Definition: HashJoin.cpp:1039
static llvm::Value * codegenColOrStringOper(const Analyzer::Expr *col_or_string_oper, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos, CodeGenerator &code_generator, const CompilationOptions &co)
Definition: HashJoin.cpp:545
const int get_max_rte_scan_table(std::unordered_map< int, llvm::Value * > &scan_idx_to_hash_pos)
HashType getHashType() const noexceptoverride
#define AUTOMATIC_IR_METADATA(CGENSTATE)
std::vector< llvm::Value * > getHashJoinArgs(llvm::Value *hash_ptr, llvm::Value *key_lvs, const Analyzer::Expr *key_col, const int shard_count, const CompilationOptions &co)
Definition: sqltypes.h:70
#define CHECK(condition)
Definition: Logger.h:291
bool isBitwiseEq() const override

+ Here is the call graph for this function:

void PerfectJoinHashTable::copyCpuHashTableToGpu ( std::shared_ptr< PerfectHashTable > &  cpu_hash_table,
const int  device_id,
Data_Namespace::DataMgr data_mgr 
)
private

Definition at line 1091 of file PerfectJoinHashTable.cpp.

References PerfectJoinHashTableBuilder::allocateDeviceMemory(), CHECK, CHECK_EQ, CHECK_LE, CHECK_LT, CPU, cpu_hash_table_buff_mutex_, device_count_, executor_, PerfectJoinHashTableBuilder::getHashTable(), getQueryEngineCudaStreamForDevice(), GPU, Data_Namespace::GPU_LEVEL, HashJoin::hash_tables_for_device_, memory_level_, and shardCount().

Referenced by initHashTableForDevice(), and reify().

1094  {
1096  CHECK(data_mgr);
1097  CHECK(cpu_hash_table);
1098 
1099  std::lock_guard<std::mutex> cpu_hash_table_buff_lock(cpu_hash_table_buff_mutex_);
1100  PerfectJoinHashTableBuilder gpu_builder;
1101  gpu_builder.allocateDeviceMemory(cpu_hash_table->getColumnNumElems(),
1102  cpu_hash_table->getLayout(),
1103  cpu_hash_table->getHashEntryInfo(),
1104  shardCount(),
1105  device_id,
1106  device_count_,
1107  executor_);
1108 
1109  std::shared_ptr<PerfectHashTable> gpu_hash_table = gpu_builder.getHashTable();
1110  CHECK(gpu_hash_table);
1111  auto gpu_buffer_ptr = gpu_hash_table->getGpuBuffer();
1112  CHECK(gpu_buffer_ptr);
1113 
1114  CHECK_LE(cpu_hash_table->getHashTableBufferSize(ExecutorDeviceType::CPU),
1115  gpu_hash_table->getHashTableBufferSize(ExecutorDeviceType::GPU));
1116 
1117  auto device_allocator = std::make_unique<CudaAllocator>(
1118  data_mgr, device_id, getQueryEngineCudaStreamForDevice(device_id));
1119  device_allocator->copyToDevice(
1120  gpu_buffer_ptr,
1121  cpu_hash_table->getCpuBuffer(),
1122  cpu_hash_table->getHashTableBufferSize(ExecutorDeviceType::CPU));
1123  CHECK_LT(static_cast<size_t>(device_id), hash_tables_for_device_.size());
1124  hash_tables_for_device_[device_id] = std::move(gpu_hash_table);
1125 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
const Data_Namespace::MemoryLevel memory_level_
std::vector< std::shared_ptr< HashTable > > hash_tables_for_device_
Definition: HashJoin.h:363
void allocateDeviceMemory(const size_t num_column_elems, const HashType layout, BucketizedHashEntryInfo hash_entry_info, const size_t shard_count, const int device_id, const int device_count, const Executor *executor)
std::unique_ptr< PerfectHashTable > getHashTable()
#define CHECK_LT(x, y)
Definition: Logger.h:303
#define CHECK_LE(x, y)
Definition: Logger.h:304
CUstream getQueryEngineCudaStreamForDevice(int device_num)
Definition: QueryEngine.cpp:7
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t PerfectJoinHashTable::countBufferOff ( ) const
overridevirtualnoexcept

Implements HashJoin.

Definition at line 1066 of file PerfectJoinHashTable.cpp.

References getComponentBufferSize().

Referenced by toSet(), and toString().

1066  {
1067  return getComponentBufferSize();
1068 }
size_t getComponentBufferSize() const noexceptoverride

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ColumnsForDevice PerfectJoinHashTable::fetchColumnsForDevice ( const std::vector< Fragmenter_Namespace::FragmentInfo > &  fragments,
const int  device_id,
DeviceAllocator dev_buff_owner 
)
private

Definition at line 651 of file PerfectJoinHashTable.cpp.

References column_cache_, executor_, HashJoin::fetchJoinColumn(), get_column_descriptor_maybe(), get_effective_memory_level(), get_join_column_type_kind(), inline_fixed_encoding_null_val(), inner_outer_pairs_, isBitwiseEq(), memory_level_, and needs_dict_translation_.

Referenced by reify().

654  {
655  std::vector<JoinColumn> join_columns;
656  std::vector<std::shared_ptr<Chunk_NS::Chunk>> chunks_owner;
657  std::vector<JoinColumnTypeInfo> join_column_types;
658  std::vector<JoinBucketInfo> join_bucket_info;
659  std::vector<std::shared_ptr<void>> malloc_owner;
660  const auto effective_memory_level =
662  for (const auto& inner_outer_pair : inner_outer_pairs_) {
663  const auto inner_col = inner_outer_pair.first;
664  const auto inner_cd = get_column_descriptor_maybe(inner_col->getColumnKey());
665  if (inner_cd && inner_cd->isVirtualCol) {
667  }
668  join_columns.emplace_back(fetchJoinColumn(inner_col,
669  fragments,
670  effective_memory_level,
671  device_id,
672  chunks_owner,
673  dev_buff_owner,
674  malloc_owner,
675  executor_,
676  &column_cache_));
677  const auto& ti = inner_col->get_type_info();
678  join_column_types.emplace_back(JoinColumnTypeInfo{static_cast<size_t>(ti.get_size()),
679  0,
680  0,
682  isBitwiseEq(),
683  0,
685  }
686  return {join_columns, join_column_types, chunks_owner, join_bucket_info, malloc_owner};
687 }
const Data_Namespace::MemoryLevel memory_level_
JoinColumn fetchJoinColumn(const Analyzer::ColumnVar *hash_col, const std::vector< Fragmenter_Namespace::FragmentInfo > &fragment_info, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks_owner, DeviceAllocator *dev_buff_owner, std::vector< std::shared_ptr< void >> &malloc_owner, Executor *executor, ColumnCacheMap *column_cache)
Definition: HashJoin.cpp:58
Data_Namespace::MemoryLevel get_effective_memory_level(const Data_Namespace::MemoryLevel memory_level, const bool needs_dict_translation)
const ColumnDescriptor * get_column_descriptor_maybe(const shared::ColumnKey &column_key)
Definition: Execute.h:220
std::vector< InnerOuter > inner_outer_pairs_
ColumnCacheMap & column_cache_
ColumnType get_join_column_type_kind(const SQLTypeInfo &ti)
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
bool isBitwiseEq() const override

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ChunkKey PerfectJoinHashTable::genChunkKey ( const std::vector< Fragmenter_Namespace::FragmentInfo > &  fragments,
const Analyzer::Expr outer_col,
const Analyzer::ColumnVar inner_col 
) const
private

Definition at line 889 of file PerfectJoinHashTable.cpp.

References gpu_enabled::accumulate(), CHECK, CHECK_EQ, shared::ColumnKey::db_id, Analyzer::Expr::get_type_info(), Analyzer::ColumnVar::getColumnKey(), getInnerQueryInfo(), InputTableInfo::info, and kENCODING_DICT.

Referenced by reify().

892  {
893  const auto& column_key = inner_col->getColumnKey();
894  ChunkKey chunk_key{column_key.db_id, column_key.table_id, column_key.column_id};
895  const auto& ti = inner_col->get_type_info();
896  std::for_each(fragments.cbegin(), fragments.cend(), [&chunk_key](const auto& fragment) {
897  // collect all frag ids to correctly generated cache key for a cached hash table
898  chunk_key.push_back(fragment.fragmentId);
899  });
900  if (ti.is_string()) {
901  CHECK_EQ(kENCODING_DICT, ti.get_compression());
902  const auto outer_col = dynamic_cast<const Analyzer::ColumnVar*>(outer_col_expr);
903  CHECK(outer_col);
904  const auto& outer_query_info = getInnerQueryInfo(outer_col).info;
905  size_t outer_elem_count =
906  std::accumulate(outer_query_info.fragments.begin(),
907  outer_query_info.fragments.end(),
908  size_t(0),
909  [&chunk_key](size_t sum, const auto& fragment) {
910  chunk_key.push_back(fragment.fragmentId);
911  return sum + fragment.getNumTuples();
912  });
913  chunk_key.push_back(outer_elem_count);
914  }
915 
916  return chunk_key;
917 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
std::vector< int > ChunkKey
Definition: types.h:36
Fragmenter_Namespace::TableInfo info
Definition: InputMetadata.h:35
const InputTableInfo & getInnerQueryInfo(const Analyzer::ColumnVar *inner_col) const
DEVICE auto accumulate(ARGS &&...args)
Definition: gpu_enabled.h:42
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:79
const shared::ColumnKey & getColumnKey() const
Definition: Analyzer.h:198
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

static QueryPlanHash PerfectJoinHashTable::getAlternativeCacheKey ( AlternativeCacheKeyForPerfectHashJoin info)
inlinestaticprivate

Definition at line 261 of file PerfectJoinHashTable.h.

References PerfectJoinHashTable::AlternativeCacheKeyForPerfectHashJoin::chunk_key, PerfectJoinHashTable::AlternativeCacheKeyForPerfectHashJoin::col_range, Analyzer::Expr::get_type_info(), PerfectJoinHashTable::AlternativeCacheKeyForPerfectHashJoin::inner_col, PerfectJoinHashTable::AlternativeCacheKeyForPerfectHashJoin::inner_outer_string_op_infos, SQLTypeInfo::is_string(), PerfectJoinHashTable::AlternativeCacheKeyForPerfectHashJoin::join_type, PerfectJoinHashTable::AlternativeCacheKeyForPerfectHashJoin::num_elements, PerfectJoinHashTable::AlternativeCacheKeyForPerfectHashJoin::optype, PerfectJoinHashTable::AlternativeCacheKeyForPerfectHashJoin::outer_col, toString(), ExpressionRange::toString(), and Analyzer::ColumnVar::toString().

Referenced by reify().

262  {
263  auto hash = boost::hash_value(::toString(info.chunk_key));
264  boost::hash_combine(hash, info.inner_col->toString());
265  if (info.inner_col->get_type_info().is_string()) {
266  boost::hash_combine(hash, info.outer_col->toString());
267  }
268  boost::hash_combine(hash, ::toString(info.inner_outer_string_op_infos));
269  boost::hash_combine(hash, info.col_range.toString());
270  boost::hash_combine(hash, info.num_elements);
271  boost::hash_combine(hash, info.optype);
272  boost::hash_combine(hash, info.join_type);
273  return hash;
274  }
std::string toString(const ExecutorDeviceType device_type, const int device_id=0, bool raw=false) const override

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t PerfectJoinHashTable::getComponentBufferSize ( ) const
overrideprivatevirtualnoexcept

Implements HashJoin.

Definition at line 1074 of file PerfectJoinHashTable.cpp.

References HashJoin::hash_tables_for_device_, and OneToMany.

Referenced by codegenMatchingSet(), countBufferOff(), and payloadBufferOff().

1074  {
1075  if (hash_tables_for_device_.empty()) {
1076  return 0;
1077  }
1078  auto hash_table = hash_tables_for_device_.front();
1079  if (hash_table && hash_table->getLayout() == HashType::OneToMany) {
1080  return hash_table->getEntryCount() * sizeof(int32_t);
1081  } else {
1082  return 0;
1083  }
1084 }
std::vector< std::shared_ptr< HashTable > > hash_tables_for_device_
Definition: HashJoin.h:363

+ Here is the caller graph for this function:

int PerfectJoinHashTable::getDeviceCount ( ) const
inlineoverridevirtualnoexcept

Implements HashJoin.

Definition at line 92 of file PerfectJoinHashTable.h.

References device_count_.

92 { return device_count_; };
Data_Namespace::MemoryLevel PerfectJoinHashTable::getEffectiveMemoryLevel ( const std::vector< InnerOuter > &  inner_outer_pairs) const
private

Definition at line 641 of file PerfectJoinHashTable.cpp.

References Data_Namespace::CPU_LEVEL, executor_, inner_outer_string_op_infos_, memory_level_, needs_dict_translation_, and needs_dictionary_translation().

Referenced by reify().

642  {
644  inner_outer_pairs.front(), inner_outer_string_op_infos_, executor_)) {
647  }
648  return memory_level_;
649 }
const Data_Namespace::MemoryLevel memory_level_
bool needs_dictionary_translation(const std::vector< InnerOuter > &inner_outer_pairs, const std::vector< InnerOuterStringOpInfos > &inner_outer_string_op_infos_pairs, const Executor *executor)
const InnerOuterStringOpInfos inner_outer_string_op_infos_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

BucketizedHashEntryInfo PerfectJoinHashTable::getHashEntryInfo ( ) const
inline

Definition at line 139 of file PerfectJoinHashTable.h.

References hash_entry_info_.

139 { return hash_entry_info_; }
BucketizedHashEntryInfo hash_entry_info_
static HashingSchemeRecycler* PerfectJoinHashTable::getHashingSchemeCache ( )
inlinestatic

Definition at line 106 of file PerfectJoinHashTable.h.

References CHECK, and hash_table_layout_cache_.

106  {
108  return hash_table_layout_cache_.get();
109  }
static std::unique_ptr< HashingSchemeRecycler > hash_table_layout_cache_
#define CHECK(condition)
Definition: Logger.h:291
std::vector< llvm::Value * > PerfectJoinHashTable::getHashJoinArgs ( llvm::Value *  hash_ptr,
llvm::Value *  key_lvs,
const Analyzer::Expr key_col,
const int  shard_count,
const CompilationOptions co 
)
private

Definition at line 963 of file PerfectJoinHashTable.cpp.

References AUTOMATIC_IR_METADATA, BucketizedHashEntryInfo::bucket_normalization, CHECK, col_range_, device_count_, executor_, anonymous_namespace{PerfectJoinHashTable.cpp}::get_hash_entry_count(), get_logical_type_info(), SQLTypeInfo::get_type(), Analyzer::Expr::get_type_info(), ExpressionRange::getIntMax(), ExpressionRange::getIntMin(), hash_entry_info_, inline_fixed_encoding_null_val(), isBitwiseEq(), and kDATE.

Referenced by codegenMatchingSet(), and codegenSlot().

968  {
969  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
970  CodeGenerator code_generator(executor_);
971  CHECK(key_lv);
972  // Todo(todd): Fix below, it's gross (but didn't want to redo the plumbing yet)
973  // const auto key_lv = key_lvs.size() && key_lvs[0]
974  // ? key_lvs[0]
975  // : code_generator.codegen(key_col, true, co)[0];
976  auto const& key_col_ti = key_col->get_type_info();
977 
978  std::vector<llvm::Value*> hash_join_idx_args{
979  hash_ptr,
980  executor_->cgen_state_->castToTypeIn(key_lv, 64),
981  executor_->cgen_state_->llInt(col_range_.getIntMin()),
982  executor_->cgen_state_->llInt(col_range_.getIntMax())};
983  if (shard_count) {
984  const auto expected_hash_entry_count =
986  const auto entry_count_per_shard =
987  (expected_hash_entry_count + shard_count - 1) / shard_count;
988  hash_join_idx_args.push_back(
989  executor_->cgen_state_->llInt<uint32_t>(entry_count_per_shard));
990  hash_join_idx_args.push_back(executor_->cgen_state_->llInt<uint32_t>(shard_count));
991  hash_join_idx_args.push_back(executor_->cgen_state_->llInt<uint32_t>(device_count_));
992  }
993  auto key_col_logical_ti = get_logical_type_info(key_col->get_type_info());
994  if (!key_col_logical_ti.get_notnull() || isBitwiseEq()) {
995  hash_join_idx_args.push_back(executor_->cgen_state_->llInt(
996  inline_fixed_encoding_null_val(key_col_logical_ti)));
997  }
998  auto special_date_bucketization_case = key_col_ti.get_type() == kDATE;
999  if (isBitwiseEq()) {
1000  if (special_date_bucketization_case) {
1001  hash_join_idx_args.push_back(executor_->cgen_state_->llInt(
1003  } else {
1004  hash_join_idx_args.push_back(
1005  executor_->cgen_state_->llInt(col_range_.getIntMax() + 1));
1006  }
1007  }
1008 
1009  if (special_date_bucketization_case) {
1010  hash_join_idx_args.emplace_back(
1011  executor_->cgen_state_->llInt(hash_entry_info_.bucket_normalization));
1012  }
1013 
1014  return hash_join_idx_args;
1015 }
BucketizedHashEntryInfo hash_entry_info_
int64_t getIntMin() const
size_t get_hash_entry_count(const ExpressionRange &col_range, const bool is_bw_eq)
SQLTypeInfo get_logical_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:1235
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:381
int64_t bucket_normalization
#define AUTOMATIC_IR_METADATA(CGENSTATE)
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:79
Definition: sqltypes.h:70
int64_t getIntMax() const
#define CHECK(condition)
Definition: Logger.h:291
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
bool isBitwiseEq() const override

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::string PerfectJoinHashTable::getHashJoinType ( ) const
inlinefinalvirtual

Implements HashJoin.

Definition at line 100 of file PerfectJoinHashTable.h.

100 { return "Perfect"; }
static HashtableRecycler* PerfectJoinHashTable::getHashTableCache ( )
inlinestatic

Definition at line 102 of file PerfectJoinHashTable.h.

References CHECK, and hash_table_cache_.

Referenced by QueryRunner::QueryRunner::getCachedHashtableWithoutCacheKey(), QueryRunner::QueryRunner::getCacheItemMetric(), and QueryRunner::QueryRunner::getNumberOfCachedItem().

102  {
104  return hash_table_cache_.get();
105  }
static std::unique_ptr< HashtableRecycler > hash_table_cache_
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the caller graph for this function:

HashTable * PerfectJoinHashTable::getHashTableForDevice ( const size_t  device_id) const
private

Definition at line 1086 of file PerfectJoinHashTable.cpp.

References CHECK_LT, and HashJoin::hash_tables_for_device_.

Referenced by toSet(), and toString().

1086  {
1087  CHECK_LT(device_id, hash_tables_for_device_.size());
1088  return hash_tables_for_device_[device_id].get();
1089 }
std::vector< std::shared_ptr< HashTable > > hash_tables_for_device_
Definition: HashJoin.h:363
#define CHECK_LT(x, y)
Definition: Logger.h:303

+ Here is the caller graph for this function:

HashType PerfectJoinHashTable::getHashType ( ) const
inlineoverridevirtualnoexcept

Implements HashJoin.

Definition at line 86 of file PerfectJoinHashTable.h.

References hash_type_.

Referenced by codegenSlot().

86 { return hash_type_; }

+ Here is the caller graph for this function:

const InputTableInfo & PerfectJoinHashTable::getInnerQueryInfo ( const Analyzer::ColumnVar inner_col) const
private

Definition at line 1252 of file PerfectJoinHashTable.cpp.

References get_inner_query_info(), Analyzer::ColumnVar::getTableKey(), and query_infos_.

Referenced by genChunkKey(), and reify().

1253  {
1254  return get_inner_query_info(inner_col->getTableKey(), query_infos_);
1255 }
const InputTableInfo & get_inner_query_info(const shared::TableKey &inner_table_key, const std::vector< InputTableInfo > &query_infos)
const std::vector< InputTableInfo > & query_infos_
shared::TableKey getTableKey() const
Definition: Analyzer.h:199

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

shared::TableKey PerfectJoinHashTable::getInnerTableId ( ) const
inlineoverridevirtualnoexcept

Implements HashJoin.

Definition at line 78 of file PerfectJoinHashTable.h.

References col_var_.

Referenced by reify().

78  {
79  return col_var_->getTableKey();
80  };
std::shared_ptr< Analyzer::ColumnVar > col_var_

+ Here is the caller graph for this function:

int PerfectJoinHashTable::getInnerTableRteIdx ( ) const
inlineoverridevirtualnoexcept

Implements HashJoin.

Definition at line 82 of file PerfectJoinHashTable.h.

References col_var_.

82  {
83  return col_var_.get()->get_rte_idx();
84  };
std::shared_ptr< Analyzer::ColumnVar > col_var_
std::shared_ptr< PerfectJoinHashTable > PerfectJoinHashTable::getInstance ( const std::shared_ptr< Analyzer::BinOper qual_bin_oper,
const std::vector< InputTableInfo > &  query_infos,
const Data_Namespace::MemoryLevel  memory_level,
const JoinType  join_type,
const HashType  preferred_hash_type,
const int  device_count,
ColumnCacheMap column_cache,
Executor executor,
const HashTableBuildDagMap hashtable_build_dag_map,
const RegisteredQueryHint query_hints,
const TableIdToNodeMap table_id_to_node_map 
)
static

Make hash table from an in-flight SQL query's parse tree etc.

Definition at line 151 of file PerfectJoinHashTable.cpp.

References CHECK, CHECK_EQ, anonymous_namespace{PerfectJoinHashTable.cpp}::get_bucketized_hash_entry_info(), anonymous_namespace{HashJoin.cpp}::get_cols(), getExpressionRange(), HashJoin::getHashTypeString(), BucketizedHashEntryInfo::getNormalizedHashEntryCount(), Data_Namespace::GPU_LEVEL, Invalid, IS_EQUIVALENCE, kBW_EQ, ExpressionRange::makeIntRange(), PerfectJoinHashTable(), VLOG, and VLOGGING.

Referenced by HashJoin::getInstance().

162  {
163  CHECK(IS_EQUIVALENCE(qual_bin_oper->get_optype()));
164  const auto cols_and_string_op_infos =
165  get_cols(qual_bin_oper.get(), executor->temporary_tables_);
166  const auto& cols = cols_and_string_op_infos.first;
167  const auto& inner_outer_string_op_infos = cols_and_string_op_infos.second;
168  const auto inner_col = cols.first;
169  CHECK(inner_col);
170  const auto& ti = inner_col->get_type_info();
171  auto col_range =
172  getExpressionRange(ti.is_string() ? cols.second : inner_col, query_infos, executor);
173  if (col_range.getType() == ExpressionRangeType::Invalid) {
174  throw HashJoinFail(
175  "Could not compute range for the expressions involved in the equijoin");
176  }
177  const auto rhs_source_col_range =
178  ti.is_string() ? getExpressionRange(inner_col, query_infos, executor) : col_range;
179  if (ti.is_string()) {
180  // The nullable info must be the same as the source column.
181  if (rhs_source_col_range.getType() == ExpressionRangeType::Invalid) {
182  throw HashJoinFail(
183  "Could not compute range for the expressions involved in the equijoin");
184  }
185  if (rhs_source_col_range.getIntMin() > rhs_source_col_range.getIntMax()) {
186  // If the inner column expression range is empty, use the inner col range
187  CHECK_EQ(rhs_source_col_range.getIntMin(), int64_t(0));
188  CHECK_EQ(rhs_source_col_range.getIntMax(), int64_t(-1));
189  col_range = rhs_source_col_range;
190  } else {
191  col_range = ExpressionRange::makeIntRange(
192  std::min(rhs_source_col_range.getIntMin(), col_range.getIntMin()),
193  std::max(rhs_source_col_range.getIntMax(), col_range.getIntMax()),
194  0,
195  rhs_source_col_range.hasNulls());
196  }
197  }
198 
199  // We can't allocate more than 2GB contiguous memory on GPU and each entry is 4 bytes.
200  const auto max_hash_entry_count =
202  ? static_cast<size_t>(std::numeric_limits<int32_t>::max() / sizeof(int32_t))
203  : static_cast<size_t>(std::numeric_limits<int32_t>::max());
204 
205  auto bucketized_entry_count_info = get_bucketized_hash_entry_info(
206  ti, col_range, qual_bin_oper->get_optype() == kBW_EQ);
207  auto bucketized_entry_count = bucketized_entry_count_info.getNormalizedHashEntryCount();
208  if (bucketized_entry_count > max_hash_entry_count) {
209  throw TooManyHashEntries();
210  }
211 
212  if (qual_bin_oper->get_optype() == kBW_EQ &&
213  col_range.getIntMax() >= std::numeric_limits<int64_t>::max()) {
214  throw HashJoinFail("Cannot translate null value for kBW_EQ");
215  }
216  decltype(std::chrono::steady_clock::now()) ts1, ts2;
217  if (VLOGGING(1)) {
218  ts1 = std::chrono::steady_clock::now();
219  }
220 
221  auto join_hash_table = std::shared_ptr<PerfectJoinHashTable>(
222  new PerfectJoinHashTable(qual_bin_oper,
223  inner_col,
224  query_infos,
225  memory_level,
226  join_type,
227  preferred_hash_type,
228  col_range,
229  rhs_source_col_range,
230  bucketized_entry_count_info,
231  column_cache,
232  executor,
233  device_count,
234  query_hints,
235  hashtable_build_dag_map,
236  table_id_to_node_map,
237  inner_outer_string_op_infos));
238  try {
239  join_hash_table->reify();
240  } catch (const TableMustBeReplicated& e) {
241  // Throw a runtime error to abort the query
242  join_hash_table->freeHashBufferMemory();
243  throw std::runtime_error(e.what());
244  } catch (const HashJoinFail& e) {
245  // HashJoinFail exceptions log an error and trigger a retry with a join loop (if
246  // possible)
247  join_hash_table->freeHashBufferMemory();
248  throw HashJoinFail(std::string("Could not build a 1-to-1 correspondence for columns "
249  "involved in equijoin | ") +
250  e.what());
251  } catch (const ColumnarConversionNotSupported& e) {
252  throw HashJoinFail(std::string("Could not build hash tables for equijoin | ") +
253  e.what());
254  } catch (const OutOfMemory& e) {
255  throw HashJoinFail(
256  std::string("Ran out of memory while building hash tables for equijoin | ") +
257  e.what());
258  } catch (const JoinHashTableTooBig& e) {
259  throw e;
260  } catch (const std::exception& e) {
261  throw std::runtime_error(
262  std::string("Fatal error while attempting to build hash tables for join: ") +
263  e.what());
264  }
265  if (VLOGGING(1)) {
266  ts2 = std::chrono::steady_clock::now();
267  VLOG(1) << "Built perfect hash table "
268  << getHashTypeString(join_hash_table->getHashType()) << " in "
269  << std::chrono::duration_cast<std::chrono::milliseconds>(ts2 - ts1).count()
270  << " ms";
271  }
272  return join_hash_table;
273 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
#define IS_EQUIVALENCE(X)
Definition: sqldefs.h:69
InnerOuter get_cols(const Analyzer::BinOper *qual_bin_oper, const TemporaryTables *temporary_tables)
Definition: HashJoin.cpp:1039
BucketizedHashEntryInfo get_bucketized_hash_entry_info(SQLTypeInfo const &context_ti, ExpressionRange const &col_range, bool const is_bw_eq)
ExpressionRange getExpressionRange(const Analyzer::BinOper *expr, const std::vector< InputTableInfo > &query_infos, const Executor *, boost::optional< std::list< std::shared_ptr< Analyzer::Expr >>> simple_quals)
static ExpressionRange makeIntRange(const int64_t int_min, const int64_t int_max, const int64_t bucket, const bool has_nulls)
#define VLOGGING(n)
Definition: Logger.h:289
static std::string getHashTypeString(HashType ht) noexcept
Definition: HashJoin.h:165
size_t getNormalizedHashEntryCount() const
PerfectJoinHashTable(const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const Analyzer::ColumnVar *col_var, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, const ExpressionRange &col_range, const ExpressionRange &rhs_source_col_range, const BucketizedHashEntryInfo hash_entry_info, ColumnCacheMap &column_cache, Executor *executor, const int device_count, const RegisteredQueryHint &query_hints, const HashTableBuildDagMap &hashtable_build_dag_map, const TableIdToNodeMap &table_id_to_node_map, const InnerOuterStringOpInfos &inner_outer_string_op_infos={})
#define CHECK(condition)
Definition: Logger.h:291
Definition: sqldefs.h:30
#define VLOG(n)
Definition: Logger.h:387

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Data_Namespace::MemoryLevel PerfectJoinHashTable::getMemoryLevel ( ) const
inlineoverridevirtualnoexcept

Implements HashJoin.

Definition at line 88 of file PerfectJoinHashTable.h.

References memory_level_.

88  {
89  return memory_level_;
90  };
const Data_Namespace::MemoryLevel memory_level_
size_t PerfectJoinHashTable::getNormalizedHashEntryCount ( ) const
inline

Definition at line 141 of file PerfectJoinHashTable.h.

References BucketizedHashEntryInfo::getNormalizedHashEntryCount(), and hash_entry_info_.

Referenced by initHashTableForDevice(), and isOneToOneHashPossible().

141  {
143  }
BucketizedHashEntryInfo hash_entry_info_
size_t getNormalizedHashEntryCount() const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const RegisteredQueryHint& PerfectJoinHashTable::getRegisteredQueryHint ( )
inline

Definition at line 137 of file PerfectJoinHashTable.h.

References query_hints_.

137 { return query_hints_; }
RegisteredQueryHint query_hints_
int PerfectJoinHashTable::initHashTableForDevice ( const ChunkKey chunk_key,
const JoinColumn join_column,
const InnerOuter cols,
const HashType  layout,
const Data_Namespace::MemoryLevel  effective_memory_level,
const int  device_id 
)
private

Definition at line 728 of file PerfectJoinHashTable.cpp.

References PerfectJoinHashTableBuilder::allocateDeviceMemory(), BucketizedHashEntryInfo::bucketized_hash_entry_count, CHECK, CHECK_EQ, CHECK_LT, col_range_, copyCpuHashTableToGpu(), DataRecyclerUtil::CPU_DEVICE_IDENTIFIER, cpu_hash_table_buff_mutex_, Data_Namespace::CPU_LEVEL, DEBUG_TIMER, device_count_, executor_, PerfectJoinHashTableBuilder::getHashTable(), getNormalizedHashEntryCount(), Data_Namespace::GPU_LEVEL, hash_entry_info_, hash_table_layout_cache_, HashJoin::hash_tables_for_device_, hash_type_, hashtable_cache_key_, HT_HASHING_SCHEME, PerfectJoinHashTableBuilder::initOneToManyHashTableOnCpu(), PerfectJoinHashTableBuilder::initOneToOneHashTableOnCpu(), inner_outer_string_op_infos_, isBitwiseEq(), RegisteredQueryHint::isHintRegistered(), HashtableRecycler::isSafeToCacheHashtable(), join_type_, kMaxJoinHashTableSize, RegisteredQueryHint::max_join_hash_table_size, memory_level_, needs_dict_translation_, JoinColumn::num_elems, OneToOne, PERFECT_HT, putHashTableOnCpuToCache(), query_hints_, shardCount(), str_proxy_translation_map_, table_id_to_node_map_, UNREACHABLE, and VLOG.

Referenced by reifyForDevice().

734  {
735  auto timer = DEBUG_TIMER(__func__);
736  const auto inner_col = cols.first;
737  CHECK(inner_col);
739  // the reason of why checking the layout is OneToOne is we start to build a hash table
740  // with OneToOne layout
741  VLOG(1) << "Stop building a hash table based on a column " << inner_col->toString()
742  << ": it is from an empty table";
743  return 0;
744  }
745 #ifndef HAVE_CUDA
746  CHECK_EQ(Data_Namespace::CPU_LEVEL, effective_memory_level);
747 #endif
748  int err{0};
749  const int32_t hash_join_invalid_val{-1};
750  auto hashtable_layout = layout;
751  auto allow_hashtable_recycling =
755  inner_col->getTableKey());
756  if (allow_hashtable_recycling) {
757  auto cached_hashtable_layout_type = hash_table_layout_cache_->getItemFromCache(
758  hashtable_cache_key_[device_id],
761  {});
762  if (cached_hashtable_layout_type) {
763  hash_type_ = *cached_hashtable_layout_type;
764  hashtable_layout = hash_type_;
765  }
766  }
767  const auto entry_count = getNormalizedHashEntryCount();
768  const auto hash_table_entry_count = hashtable_layout == HashType::OneToOne
769  ? entry_count
770  : 2 * entry_count + join_column.num_elems;
771  const auto hash_table_size = hash_table_entry_count * sizeof(int32_t);
775  }
776  if (effective_memory_level == Data_Namespace::CPU_LEVEL) {
777  CHECK(!chunk_key.empty());
778  std::shared_ptr<PerfectHashTable> hash_table{nullptr};
779  decltype(std::chrono::steady_clock::now()) ts1, ts2;
780  ts1 = std::chrono::steady_clock::now();
781  {
782  std::lock_guard<std::mutex> cpu_hash_table_buff_lock(cpu_hash_table_buff_mutex_);
784  if (hashtable_layout == HashType::OneToOne) {
785  builder.initOneToOneHashTableOnCpu(join_column,
786  col_range_,
787  isBitwiseEq(),
788  cols,
790  join_type_,
791  hashtable_layout,
793  hash_join_invalid_val,
794  executor_);
795  hash_table = builder.getHashTable();
796  } else {
797  builder.initOneToManyHashTableOnCpu(join_column,
798  col_range_,
799  isBitwiseEq(),
800  cols,
802  join_type_,
804  hash_join_invalid_val,
805  executor_);
806  hash_table = builder.getHashTable();
807  }
808  ts2 = std::chrono::steady_clock::now();
809  auto build_time =
810  std::chrono::duration_cast<std::chrono::milliseconds>(ts2 - ts1).count();
811  hash_table->setHashEntryInfo(hash_entry_info_);
812  hash_table->setColumnNumElems(join_column.num_elems);
813  if (allow_hashtable_recycling && hash_table) {
814  // add ht-related items to cache iff we have a valid hashtable
815  hash_table_layout_cache_->putItemToCache(hashtable_cache_key_[device_id],
816  hashtable_layout,
819  0,
820  0,
821  {});
824  hash_table,
826  build_time);
827  }
828  }
829  // Transfer the hash table on the GPU if we've only built it on CPU
830  // but the query runs on GPU (join on dictionary encoded columns).
832 #ifdef HAVE_CUDA
833  const auto& ti = inner_col->get_type_info();
834  CHECK(ti.is_string());
835  auto data_mgr = executor_->getDataMgr();
836  copyCpuHashTableToGpu(hash_table, device_id, data_mgr);
837 #else
838  UNREACHABLE();
839 #endif
840  } else {
841  CHECK(hash_table);
842  CHECK_LT(static_cast<size_t>(device_id), hash_tables_for_device_.size());
843  hash_tables_for_device_[device_id] = hash_table;
844  }
845  } else {
846 #ifdef HAVE_CUDA
848  CHECK_EQ(Data_Namespace::GPU_LEVEL, effective_memory_level);
849  builder.allocateDeviceMemory(join_column,
850  hashtable_layout,
852  shardCount(),
853  device_id,
855  executor_);
856  builder.initHashTableOnGpu(chunk_key,
857  join_column,
858  col_range_,
859  isBitwiseEq(),
860  cols,
861  join_type_,
862  hashtable_layout,
864  shardCount(),
865  hash_join_invalid_val,
866  device_id,
868  executor_);
869  CHECK_LT(static_cast<size_t>(device_id), hash_tables_for_device_.size());
870  hash_tables_for_device_[device_id] = builder.getHashTable();
871  if (!err && allow_hashtable_recycling && hash_tables_for_device_[device_id]) {
872  // add layout to cache iff we have a valid hashtable
873  hash_table_layout_cache_->putItemToCache(
874  hashtable_cache_key_[device_id],
875  hash_tables_for_device_[device_id]->getLayout(),
878  0,
879  0,
880  {});
881  }
882 #else
883  UNREACHABLE();
884 #endif
885  }
886  return err;
887 }
BucketizedHashEntryInfo hash_entry_info_
#define CHECK_EQ(x, y)
Definition: Logger.h:301
size_t getNormalizedHashEntryCount() const
const Data_Namespace::MemoryLevel memory_level_
size_t num_elems
const TableIdToNodeMap table_id_to_node_map_
std::vector< std::shared_ptr< HashTable > > hash_tables_for_device_
Definition: HashJoin.h:363
#define UNREACHABLE()
Definition: Logger.h:337
size_t max_join_hash_table_size
Definition: QueryHint.h:325
void copyCpuHashTableToGpu(std::shared_ptr< PerfectHashTable > &cpu_hash_table, const int device_id, Data_Namespace::DataMgr *data_mgr)
void allocateDeviceMemory(const size_t num_column_elems, const HashType layout, BucketizedHashEntryInfo hash_entry_info, const size_t shard_count, const int device_id, const int device_count, const Executor *executor)
void initOneToManyHashTableOnCpu(const JoinColumn &join_column, const ExpressionRange &col_range, const bool is_bitwise_eq, const std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > &cols, const StringDictionaryProxy::IdMap *str_proxy_translation_map, const JoinType join_type, const BucketizedHashEntryInfo hash_entry_info, const int32_t hash_join_invalid_val, const Executor *executor)
static std::unique_ptr< HashingSchemeRecycler > hash_table_layout_cache_
std::unique_ptr< PerfectHashTable > getHashTable()
#define CHECK_LT(x, y)
Definition: Logger.h:303
void putHashTableOnCpuToCache(QueryPlanHash key, CacheItemType item_type, std::shared_ptr< PerfectHashTable > hashtable_ptr, DeviceIdentifier device_identifier, size_t hashtable_building_time)
bool isHintRegistered(const QueryHint hint) const
Definition: QueryHint.h:348
RegisteredQueryHint query_hints_
const InnerOuterStringOpInfos inner_outer_string_op_infos_
#define CHECK(condition)
Definition: Logger.h:291
#define DEBUG_TIMER(name)
Definition: Logger.h:411
static bool isSafeToCacheHashtable(const TableIdToNodeMap &table_id_to_node_map, bool need_dict_translation, const std::vector< InnerOuterStringOpInfos > &inner_outer_string_op_info_pairs, const shared::TableKey &table_key)
void initOneToOneHashTableOnCpu(const JoinColumn &join_column, const ExpressionRange &col_range, const bool is_bitwise_eq, const InnerOuter &cols, const StringDictionaryProxy::IdMap *str_proxy_translation_map, const JoinType join_type, const HashType hash_type, const BucketizedHashEntryInfo hash_entry_info, const int32_t hash_join_invalid_val, const Executor *executor)
std::vector< QueryPlanHash > hashtable_cache_key_
static constexpr DeviceIdentifier CPU_DEVICE_IDENTIFIER
Definition: DataRecycler.h:136
const StringDictionaryProxy::IdMap * str_proxy_translation_map_
size_t bucketized_hash_entry_count
#define VLOG(n)
Definition: Logger.h:387
bool isBitwiseEq() const override

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::shared_ptr< PerfectHashTable > PerfectJoinHashTable::initHashTableOnCpuFromCache ( QueryPlanHash  key,
CacheItemType  item_type,
DeviceIdentifier  device_identifier 
)
private

Definition at line 919 of file PerfectJoinHashTable.cpp.

References CHECK, DEBUG_TIMER, hash_table_cache_, and VLOG.

Referenced by reify().

922  {
924  auto timer = DEBUG_TIMER(__func__);
925  VLOG(1) << "Checking CPU hash table cache.";
926  auto hashtable_ptr =
927  hash_table_cache_->getItemFromCache(key, item_type, device_identifier);
928  if (hashtable_ptr) {
929  return std::dynamic_pointer_cast<PerfectHashTable>(hashtable_ptr);
930  }
931  return nullptr;
932 }
static std::unique_ptr< HashtableRecycler > hash_table_cache_
#define CHECK(condition)
Definition: Logger.h:291
#define DEBUG_TIMER(name)
Definition: Logger.h:411
#define VLOG(n)
Definition: Logger.h:387

+ Here is the caller graph for this function:

static void PerfectJoinHashTable::invalidateCache ( )
inlinestatic

Definition at line 111 of file PerfectJoinHashTable.h.

References CHECK, hash_table_cache_, and hash_table_layout_cache_.

111  {
113  hash_table_layout_cache_->clearCache();
114 
116  hash_table_cache_->clearCache();
117  }
static std::unique_ptr< HashtableRecycler > hash_table_cache_
static std::unique_ptr< HashingSchemeRecycler > hash_table_layout_cache_
#define CHECK(condition)
Definition: Logger.h:291
bool PerfectJoinHashTable::isBitwiseEq ( ) const
overrideprivatevirtual

Implements HashJoin.

Definition at line 1292 of file PerfectJoinHashTable.cpp.

References kBW_EQ, and qual_bin_oper_.

Referenced by codegenMatchingSet(), codegenSlot(), fetchColumnsForDevice(), getHashJoinArgs(), initHashTableForDevice(), and reify().

1292  {
1293  return qual_bin_oper_->get_optype() == kBW_EQ;
1294 }
std::shared_ptr< Analyzer::BinOper > qual_bin_oper_
Definition: sqldefs.h:30

+ Here is the caller graph for this function:

bool PerfectJoinHashTable::isOneToOneHashPossible ( const std::vector< ColumnsForDevice > &  columns_per_device) const
private

Definition at line 336 of file PerfectJoinHashTable.cpp.

References CHECK, anonymous_namespace{PerfectJoinHashTable.cpp}::get_bucketized_hash_entry_info(), getNormalizedHashEntryCount(), ExpressionRange::hasNulls(), inner_outer_pairs_, kBW_EQ, qual_bin_oper_, rhs_source_col_range_, and VLOG.

Referenced by reify().

337  {
338  CHECK(!inner_outer_pairs_.empty());
339  const auto& rhs_col_ti = inner_outer_pairs_.front().first->get_type_info();
340  const auto max_unique_hash_input_entries =
342  rhs_col_ti, rhs_source_col_range_, qual_bin_oper_->get_optype() == kBW_EQ)
345  for (const auto& device_columns : columns_per_device) {
346  CHECK(!device_columns.join_columns.empty());
347  const auto rhs_join_col_num_entries = device_columns.join_columns.front().num_elems;
348  if (rhs_join_col_num_entries > max_unique_hash_input_entries) {
349  VLOG(1) << "Skipping attempt to build perfect hash one-to-one table as number of "
350  "rhs column entries ("
351  << rhs_join_col_num_entries << ") exceeds range for rhs join column ("
352  << max_unique_hash_input_entries << ").";
353  return false;
354  }
355  }
356  return true;
357 }
ExpressionRange rhs_source_col_range_
size_t getNormalizedHashEntryCount() const
std::shared_ptr< Analyzer::BinOper > qual_bin_oper_
BucketizedHashEntryInfo get_bucketized_hash_entry_info(SQLTypeInfo const &context_ti, ExpressionRange const &col_range, bool const is_bw_eq)
bool hasNulls() const
std::vector< InnerOuter > inner_outer_pairs_
#define CHECK(condition)
Definition: Logger.h:291
Definition: sqldefs.h:30
#define VLOG(n)
Definition: Logger.h:387

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

static void PerfectJoinHashTable::markCachedItemAsDirty ( size_t  table_key)
inlinestatic

Definition at line 119 of file PerfectJoinHashTable.h.

References CHECK, DataRecyclerUtil::CPU_DEVICE_IDENTIFIER, hash_table_cache_, hash_table_layout_cache_, HT_HASHING_SCHEME, and PERFECT_HT.

119  {
122  auto candidate_table_keys =
123  hash_table_cache_->getMappedQueryPlanDagsWithTableKey(table_key);
124  if (candidate_table_keys.has_value()) {
125  hash_table_layout_cache_->markCachedItemAsDirty(
126  table_key,
127  *candidate_table_keys,
130  hash_table_cache_->markCachedItemAsDirty(table_key,
131  *candidate_table_keys,
134  }
135  }
static std::unique_ptr< HashtableRecycler > hash_table_cache_
static std::unique_ptr< HashingSchemeRecycler > hash_table_layout_cache_
#define CHECK(condition)
Definition: Logger.h:291
static constexpr DeviceIdentifier CPU_DEVICE_IDENTIFIER
Definition: DataRecycler.h:136
size_t PerfectJoinHashTable::offsetBufferOff ( ) const
overridevirtualnoexcept

Implements HashJoin.

Definition at line 1062 of file PerfectJoinHashTable.cpp.

Referenced by toSet(), and toString().

1062  {
1063  return 0;
1064 }

+ Here is the caller graph for this function:

size_t PerfectJoinHashTable::payloadBufferOff ( ) const
overridevirtualnoexcept

Implements HashJoin.

Definition at line 1070 of file PerfectJoinHashTable.cpp.

References getComponentBufferSize().

Referenced by toSet(), and toString().

1070  {
1071  return 2 * getComponentBufferSize();
1072 }
size_t getComponentBufferSize() const noexceptoverride

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void PerfectJoinHashTable::putHashTableOnCpuToCache ( QueryPlanHash  key,
CacheItemType  item_type,
std::shared_ptr< PerfectHashTable hashtable_ptr,
DeviceIdentifier  device_identifier,
size_t  hashtable_building_time 
)
private

Definition at line 934 of file PerfectJoinHashTable.cpp.

References CHECK, CPU, and hash_table_cache_.

Referenced by initHashTableForDevice().

939  {
941  CHECK(hashtable_ptr && !hashtable_ptr->getGpuBuffer());
942  hash_table_cache_->putItemToCache(
943  key,
944  hashtable_ptr,
945  item_type,
946  device_identifier,
947  hashtable_ptr->getHashTableBufferSize(ExecutorDeviceType::CPU),
948  hashtable_building_time);
949 }
static std::unique_ptr< HashtableRecycler > hash_table_cache_
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the caller graph for this function:

void PerfectJoinHashTable::reify ( )
private

Definition at line 359 of file PerfectJoinHashTable.cpp.

References gpu_enabled::accumulate(), threading_serial::async(), HashJoin::canAccessHashTable(), CHECK, CHECK_EQ, CHECK_GE, CHECK_LT, HashJoin::checkHashJoinReplicationConstraint(), col_range_, copyCpuHashTableToGpu(), DataRecyclerUtil::CPU_DEVICE_IDENTIFIER, Data_Namespace::CPU_LEVEL, DEBUG_TIMER, device_count_, executor_, fetchColumnsForDevice(), HashJoin::freeHashBufferMemory(), genChunkKey(), anonymous_namespace{PerfectJoinHashTable.cpp}::get_bucketized_hash_entry_info(), anonymous_namespace{HashJoin.cpp}::get_cols(), get_shard_count(), getAlternativeCacheKey(), DataRecyclerUtil::getAlternativeTableKeys(), getEffectiveMemoryLevel(), HashtableRecycler::getHashtableAccessPathInfo(), getInnerQueryInfo(), getInnerTableId(), getQueryEngineCudaStreamForDevice(), Data_Namespace::GPU_LEVEL, hash_entry_info_, hash_table_cache_, HashJoin::hash_tables_for_device_, hash_type_, hashtable_build_dag_map_, hashtable_cache_key_, hashtable_cache_meta_info_, InputTableInfo::info, initHashTableOnCpuFromCache(), inner_outer_pairs_, inner_outer_string_op_infos_, isBitwiseEq(), HashtableRecycler::isInvalidHashTableCacheKey(), isOneToOneHashPossible(), HashtableRecycler::isSafeToCacheHashtable(), join_type_, memory_level_, needs_dict_translation_, needs_dictionary_translation(), OneToMany, OneToOne, only_shards_for_device(), PERFECT_HT, qual_bin_oper_, reifyForDevice(), shardCount(), str_proxy_translation_map_, str_proxy_translation_mutex_, shared::TableKey::table_id, table_id_to_node_map_, table_keys_, logger::thread_local_ids(), HashJoin::translateInnerToOuterStrDictProxies(), UNREACHABLE, and VLOG.

359  {
360  auto timer = DEBUG_TIMER(__func__);
362  const auto cols = get_cols(qual_bin_oper_.get(), executor_->temporary_tables_).first;
363  const auto inner_col = cols.first;
365  inner_col->getTableKey(),
367  executor_);
368  const auto& query_info = getInnerQueryInfo(inner_col).info;
369  if (query_info.fragments.empty()) {
370  return;
371  }
372  if (query_info.getNumTuplesUpperBound() >
373  static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
374  throw TooManyHashEntries();
375  }
376  std::vector<std::future<void>> init_threads;
377  const int shard_count = shardCount();
378 
379  inner_outer_pairs_.push_back(cols);
380  CHECK_EQ(inner_outer_pairs_.size(), size_t(1));
381  // Todo(todd): Clean up the fact that we store the inner outer column pairs as a vector,
382  // even though only one is ever valid for perfect hash layout. Either move to 1 or keep
383  // the vector but move it to the HashTable parent class
386 
387  std::vector<std::vector<Fragmenter_Namespace::FragmentInfo>> fragments_per_device;
388  std::vector<ColumnsForDevice> columns_per_device;
389  std::vector<std::unique_ptr<CudaAllocator>> dev_buff_owners;
390 
391  auto data_mgr = executor_->getDataMgr();
392  // check the existence of cached hash table here before fetching columns
393  // if available, skip the rest of logic and copy it to GPU if necessary
394  // there are few considerable things:
395  // 1. if table is sharded? --> deploy per-device logic
396  // here, each device may load different set of fragments, so their cache keys are
397  // different accordingly
398  // 2. otherwise, each device has the same hash table built from "all" fragments
399  // and their cache keys are the same (but we stick to per-device cache key vector)
400  // here, for CPU, we consider its # device to be one
401  // for GPU, each device builds its own hash table, or we build a single hash table on
402  // CPU and then copy it to each device
403  // 3. if cache key is not available? --> use alternative cache key
404 
405  // retrieve fragment lists and chunk key per device
406  std::vector<ChunkKey> chunk_key_per_device;
407  auto outer_col =
408  dynamic_cast<const Analyzer::ColumnVar*>(inner_outer_pairs_.front().second);
409  for (int device_id = 0; device_id < device_count_; ++device_id) {
410  fragments_per_device.emplace_back(
411  shard_count
412  ? only_shards_for_device(query_info.fragments, device_id, device_count_)
413  : query_info.fragments);
415  dev_buff_owners.emplace_back(std::make_unique<CudaAllocator>(
416  data_mgr, device_id, getQueryEngineCudaStreamForDevice(device_id)));
417  }
418  const auto chunk_key =
419  genChunkKey(fragments_per_device[device_id], outer_col, inner_col);
420  chunk_key_per_device.emplace_back(std::move(chunk_key));
421  }
422 
423  // try to extract cache key for hash table and its relevant info
424  auto hashtable_access_path_info =
427  qual_bin_oper_->get_optype(),
428  join_type_,
431  shard_count,
432  fragments_per_device,
433  executor_);
434  hashtable_cache_key_ = hashtable_access_path_info.hashed_query_plan_dag;
435  hashtable_cache_meta_info_ = hashtable_access_path_info.meta_info;
436  table_keys_ = hashtable_access_path_info.table_keys;
437 
438  if (table_keys_.empty()) {
439  // the actual chunks fetched per device can be different but they constitute the same
440  // table in the same db, so we can exploit this to create an alternative table key
441  const auto& inner_table_key = getInnerTableId();
442  table_keys_ =
443  DataRecyclerUtil::getAlternativeTableKeys(chunk_key_per_device, inner_table_key);
444  }
445  CHECK(!table_keys_.empty());
446 
447  if (HashtableRecycler::isInvalidHashTableCacheKey(hashtable_cache_key_) &&
448  getInnerTableId().table_id > 0) {
449  // sometimes we cannot retrieve query plan dag, so try to recycler cache
450  // with the old-fashioned cache key if we deal with hashtable of non-temporary table
451  for (int device_id = 0; device_id < device_count_; ++device_id) {
452  const auto num_tuples = std::accumulate(
453  fragments_per_device[device_id].begin(),
454  fragments_per_device[device_id].end(),
455  size_t(0),
456  [](size_t sum, const auto& fragment) { return sum + fragment.getNumTuples(); });
457  AlternativeCacheKeyForPerfectHashJoin cache_key{col_range_,
458  inner_col,
459  outer_col ? outer_col : inner_col,
461  chunk_key_per_device[device_id],
462  num_tuples,
463  qual_bin_oper_->get_optype(),
464  join_type_};
465  hashtable_cache_key_[device_id] = getAlternativeCacheKey(cache_key);
466  }
467  }
468 
469  // register a mapping between cache key and its input table info for per-table cache
470  // invalidation if we have valid cache key for "all" devices (otherwise, we skip to use
471  // cached hash table for safety)
472  const bool invalid_cache_key =
473  HashtableRecycler::isInvalidHashTableCacheKey(hashtable_cache_key_);
474  if (!invalid_cache_key) {
475  if (!shard_count) {
476  hash_table_cache_->addQueryPlanDagForTableKeys(hashtable_cache_key_.front(),
477  table_keys_);
478  } else {
479  std::for_each(hashtable_cache_key_.cbegin(),
480  hashtable_cache_key_.cend(),
481  [this](QueryPlanHash key) {
482  hash_table_cache_->addQueryPlanDagForTableKeys(key, table_keys_);
483  });
484  }
485  }
486 
487  const auto effective_memory_level = getEffectiveMemoryLevel(inner_outer_pairs_);
488 
489  // Assume we will need one-to-many if we have a string operation, as these tend
490  // to be cardinality-reducting operations, i.e. |S(t)| < |t|
491  // Todo(todd): Ostensibly only string ops on the rhs/inner expression cause rhs dups and
492  // so we may be too conservative here, but validate
493 
494  const bool has_string_ops = inner_outer_string_op_infos_.first.size() ||
495  inner_outer_string_op_infos_.second.size();
496 
497  // Also check if on the number of entries per column exceeds the rhs join hash table
498  // range, and skip trying to build a One-to-One hash table if so. There is a slight edge
499  // case where this can be overly pessimistic, and that is if the non-null values are all
500  // unique, but there are multiple null values, but we currently don't have the metadata
501  // to track null counts (only column nullability from the ddl and null existence from
502  // the encoded data), and this is probably too much of an edge case to worry about for
503  // now given the general performance benfits of skipping 1:1 if we are fairly confident
504  // it is doomed up front
505 
506  // Now check if on the number of entries per column exceeds the rhs join hash table
507  // range, and skip trying to build a One-to-One hash table if so
509  (has_string_ops || !isOneToOneHashPossible(columns_per_device))) {
511  }
512 
513  // todo (yoonmin) : support dictionary proxy cache for join including string op(s)
514  if (effective_memory_level == Data_Namespace::CPU_LEVEL) {
515  // construct string dictionary proxies if necessary
516  std::unique_lock<std::mutex> str_proxy_translation_lock(str_proxy_translation_mutex_);
517  if (needs_dict_translation_ && !str_proxy_translation_map_) {
518  CHECK_GE(inner_outer_pairs_.size(), 1UL);
519  auto const copied_col_range = col_range_;
523  col_range_,
524  executor_);
525  // update hash entry info if necessary
526  if (!(col_range_ == copied_col_range)) {
528  inner_col->get_type_info(), col_range_, isBitwiseEq());
529  }
530  }
531  }
532 
533  auto allow_hashtable_recycling =
535  needs_dict_translation_,
537  inner_col->getTableKey());
538  bool has_invalid_cached_hash_table = false;
539  if (effective_memory_level == Data_Namespace::CPU_LEVEL &&
541  allow_hashtable_recycling, invalid_cache_key, join_type_)) {
542  // build a hash table on CPU, and we have a chance to recycle the cached one if
543  // available
544  for (int device_id = 0; device_id < device_count_; ++device_id) {
545  auto hash_table =
546  initHashTableOnCpuFromCache(hashtable_cache_key_[device_id],
549  if (hash_table) {
550  hash_tables_for_device_[device_id] = hash_table;
551  hash_type_ = hash_table->getLayout();
552  } else {
553  has_invalid_cached_hash_table = true;
554  break;
555  }
556  }
557 
558  if (has_invalid_cached_hash_table) {
559  hash_tables_for_device_.clear();
560  hash_tables_for_device_.resize(device_count_);
561  } else {
563 #ifdef HAVE_CUDA
564  for (int device_id = 0; device_id < device_count_; ++device_id) {
565  auto cpu_hash_table = std::dynamic_pointer_cast<PerfectHashTable>(
566  hash_tables_for_device_[device_id]);
567  copyCpuHashTableToGpu(cpu_hash_table, device_id, data_mgr);
568  }
569 #else
570  UNREACHABLE();
571 #endif
572  }
573  return;
574  }
575  }
576 
577  // we have no cached hash table for this qual
578  // so, start building the hash table by fetching columns for devices
579  for (int device_id = 0; device_id < device_count_; ++device_id) {
580  columns_per_device.emplace_back(
581  fetchColumnsForDevice(fragments_per_device[device_id],
582  device_id,
584  ? dev_buff_owners[device_id].get()
585  : nullptr));
586  }
587 
588  try {
589  for (int device_id = 0; device_id < device_count_; ++device_id) {
590  const auto chunk_key = genChunkKey(fragments_per_device[device_id],
591  inner_outer_pairs_.front().second,
592  inner_outer_pairs_.front().first);
593  init_threads.push_back(std::async(std::launch::async,
595  this,
596  chunk_key,
597  columns_per_device[device_id],
598  hash_type_,
599  device_id,
601  }
602  for (auto& init_thread : init_threads) {
603  init_thread.wait();
604  }
605  for (auto& init_thread : init_threads) {
606  init_thread.get();
607  }
608  } catch (const NeedsOneToManyHash& e) {
609  VLOG(1) << "RHS/Inner hash join values detected to not be unique, falling back to "
610  "One-to-Many hash layout.";
614  init_threads.clear();
616  CHECK_EQ(dev_buff_owners.size(), size_t(device_count_));
617  }
618  CHECK_EQ(columns_per_device.size(), size_t(device_count_));
619  for (int device_id = 0; device_id < device_count_; ++device_id) {
620  const auto chunk_key = genChunkKey(fragments_per_device[device_id],
621  inner_outer_pairs_.front().second,
622  inner_outer_pairs_.front().first);
623  init_threads.push_back(std::async(std::launch::async,
625  this,
626  chunk_key,
627  columns_per_device[device_id],
628  hash_type_,
629  device_id,
631  }
632  for (auto& init_thread : init_threads) {
633  init_thread.wait();
634  }
635  for (auto& init_thread : init_threads) {
636  init_thread.get();
637  }
638  }
639 }
BucketizedHashEntryInfo hash_entry_info_
#define CHECK_EQ(x, y)
Definition: Logger.h:301
Fragmenter_Namespace::TableInfo info
Definition: InputMetadata.h:35
Data_Namespace::MemoryLevel getEffectiveMemoryLevel(const std::vector< InnerOuter > &inner_outer_pairs) const
static bool isInvalidHashTableCacheKey(const std::vector< QueryPlanHash > &cache_keys)
static bool canAccessHashTable(bool allow_hash_table_recycling, bool invalid_cache_key, JoinType join_type)
Definition: HashJoin.cpp:1030
const Data_Namespace::MemoryLevel memory_level_
std::shared_ptr< Analyzer::BinOper > qual_bin_oper_
std::mutex str_proxy_translation_mutex_
ChunkKey genChunkKey(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const Analyzer::Expr *outer_col, const Analyzer::ColumnVar *inner_col) const
static void checkHashJoinReplicationConstraint(const shared::TableKey &table_key, const size_t shard_count, const Executor *executor)
Definition: HashJoin.cpp:779
const TableIdToNodeMap table_id_to_node_map_
std::vector< std::shared_ptr< HashTable > > hash_tables_for_device_
Definition: HashJoin.h:363
#define UNREACHABLE()
Definition: Logger.h:337
#define CHECK_GE(x, y)
Definition: Logger.h:306
HashTableBuildDagMap hashtable_build_dag_map_
const InputTableInfo & getInnerQueryInfo(const Analyzer::ColumnVar *inner_col) const
ColumnsForDevice fetchColumnsForDevice(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, DeviceAllocator *dev_buff_owner)
InnerOuter get_cols(const Analyzer::BinOper *qual_bin_oper, const TemporaryTables *temporary_tables)
Definition: HashJoin.cpp:1039
std::shared_ptr< PerfectHashTable > initHashTableOnCpuFromCache(QueryPlanHash key, CacheItemType item_type, DeviceIdentifier device_identifier)
bool needs_dictionary_translation(const std::vector< InnerOuter > &inner_outer_pairs, const std::vector< InnerOuterStringOpInfos > &inner_outer_string_op_infos_pairs, const Executor *executor)
void freeHashBufferMemory()
Definition: HashJoin.h:323
BucketizedHashEntryInfo get_bucketized_hash_entry_info(SQLTypeInfo const &context_ti, ExpressionRange const &col_range, bool const is_bw_eq)
void copyCpuHashTableToGpu(std::shared_ptr< PerfectHashTable > &cpu_hash_table, const int device_id, Data_Namespace::DataMgr *data_mgr)
HashtableCacheMetaInfo hashtable_cache_meta_info_
static std::unique_ptr< HashtableRecycler > hash_table_cache_
future< Result > async(Fn &&fn, Args &&...args)
shared::TableKey getInnerTableId() const noexceptoverride
void reifyForDevice(const ChunkKey &hash_table_key, const ColumnsForDevice &columns_for_device, const HashType layout, const int device_id, const logger::ThreadLocalIds)
std::vector< Fragmenter_Namespace::FragmentInfo > only_shards_for_device(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, const int device_count)
bool isOneToOneHashPossible(const std::vector< ColumnsForDevice > &columns_per_device) const
DEVICE auto accumulate(ARGS &&...args)
Definition: gpu_enabled.h:42
static std::unordered_set< size_t > getAlternativeTableKeys(const std::vector< ChunkKey > &chunk_keys, const shared::TableKey &inner_table_key)
Definition: DataRecycler.h:154
std::vector< InnerOuter > inner_outer_pairs_
static const StringDictionaryProxy::IdMap * translateInnerToOuterStrDictProxies(const InnerOuter &cols, const InnerOuterStringOpInfos &inner_outer_string_op_infos, ExpressionRange &old_col_range, const Executor *executor)
Definition: HashJoin.cpp:414
#define CHECK_LT(x, y)
Definition: Logger.h:303
static QueryPlanHash getAlternativeCacheKey(AlternativeCacheKeyForPerfectHashJoin &info)
size_t QueryPlanHash
CUstream getQueryEngineCudaStreamForDevice(int device_num)
Definition: QueryEngine.cpp:7
const InnerOuterStringOpInfos inner_outer_string_op_infos_
#define CHECK(condition)
Definition: Logger.h:291
#define DEBUG_TIMER(name)
Definition: Logger.h:411
static bool isSafeToCacheHashtable(const TableIdToNodeMap &table_id_to_node_map, bool need_dict_translation, const std::vector< InnerOuterStringOpInfos > &inner_outer_string_op_info_pairs, const shared::TableKey &table_key)
std::vector< QueryPlanHash > hashtable_cache_key_
static constexpr DeviceIdentifier CPU_DEVICE_IDENTIFIER
Definition: DataRecycler.h:136
size_t get_shard_count(const Analyzer::BinOper *join_condition, const Executor *executor)
Definition: HashJoin.cpp:1048
static HashtableAccessPathInfo getHashtableAccessPathInfo(const std::vector< InnerOuter > &inner_outer_pairs, const std::vector< InnerOuterStringOpInfos > &inner_outer_string_op_infos_pairs, const SQLOps op_type, const JoinType join_type, const HashTableBuildDagMap &hashtable_build_dag_map, int device_count, int shard_count, const std::vector< std::vector< Fragmenter_Namespace::FragmentInfo >> &frags_for_device, Executor *executor)
ThreadLocalIds thread_local_ids()
Definition: Logger.cpp:874
const StringDictionaryProxy::IdMap * str_proxy_translation_map_
std::unordered_set< size_t > table_keys_
#define VLOG(n)
Definition: Logger.h:387
bool isBitwiseEq() const override

+ Here is the call graph for this function:

void PerfectJoinHashTable::reifyForDevice ( const ChunkKey hash_table_key,
const ColumnsForDevice columns_for_device,
const HashType  layout,
const int  device_id,
const logger::ThreadLocalIds  parent_thread_local_ids 
)
private

Definition at line 689 of file PerfectJoinHashTable.cpp.

References CHECK_EQ, DEBUG_TIMER_NEW_THREAD, get_effective_memory_level(), initHashTableForDevice(), inner_outer_pairs_, ColumnsForDevice::join_columns, memory_level_, needs_dict_translation_, OneToMany, OneToOne, logger::ThreadLocalIds::setNewThreadId(), logger::ThreadLocalIds::thread_id_, and to_string().

Referenced by reify().

694  {
695  logger::LocalIdsScopeGuard lisg = parent_thread_local_ids.setNewThreadId();
696  DEBUG_TIMER_NEW_THREAD(parent_thread_local_ids.thread_id_);
697  const auto effective_memory_level =
699 
700  CHECK_EQ(columns_for_device.join_columns.size(), size_t(1));
701  CHECK_EQ(inner_outer_pairs_.size(), size_t(1));
702  auto& join_column = columns_for_device.join_columns.front();
703  if (layout == HashType::OneToOne) {
704  const auto err = initHashTableForDevice(chunk_key,
705  join_column,
706  inner_outer_pairs_.front(),
707  layout,
708 
709  effective_memory_level,
710  device_id);
711  if (err) {
712  throw NeedsOneToManyHash();
713  }
714  } else {
715  const auto err = initHashTableForDevice(chunk_key,
716  join_column,
717  inner_outer_pairs_.front(),
719  effective_memory_level,
720  device_id);
721  if (err) {
722  throw std::runtime_error("Unexpected error building one to many hash table: " +
723  std::to_string(err));
724  }
725  }
726 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
const Data_Namespace::MemoryLevel memory_level_
Data_Namespace::MemoryLevel get_effective_memory_level(const Data_Namespace::MemoryLevel memory_level, const bool needs_dict_translation)
#define DEBUG_TIMER_NEW_THREAD(parent_thread_id)
Definition: Logger.h:416
std::string to_string(char const *&&v)
std::vector< InnerOuter > inner_outer_pairs_
LocalIdsScopeGuard setNewThreadId() const
Definition: Logger.cpp:532
int initHashTableForDevice(const ChunkKey &chunk_key, const JoinColumn &join_column, const InnerOuter &cols, const HashType layout, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id)
ThreadId thread_id_
Definition: Logger.h:138
const std::vector< JoinColumn > join_columns
Definition: HashJoin.h:111

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t PerfectJoinHashTable::shardCount ( ) const
private

Definition at line 1286 of file PerfectJoinHashTable.cpp.

References executor_, get_shard_count(), Data_Namespace::GPU_LEVEL, memory_level_, and qual_bin_oper_.

Referenced by codegenMatchingSet(), codegenSlot(), copyCpuHashTableToGpu(), initHashTableForDevice(), and reify().

1286  {
1289  : 0;
1290 }
const Data_Namespace::MemoryLevel memory_level_
std::shared_ptr< Analyzer::BinOper > qual_bin_oper_
size_t get_shard_count(const Analyzer::BinOper *join_condition, const Executor *executor)
Definition: HashJoin.cpp:1048

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::set< DecodedJoinHashBufferEntry > PerfectJoinHashTable::toSet ( const ExecutorDeviceType  device_type,
const int  device_id 
) const
overridevirtual

Implements HashJoin.

Definition at line 1163 of file PerfectJoinHashTable.cpp.

References countBufferOff(), executor_, getHashTableForDevice(), HashJoin::getJoinHashBuffer(), HashJoin::getJoinHashBufferSize(), getQueryEngineCudaStreamForDevice(), GPU, offsetBufferOff(), payloadBufferOff(), and HashTable::toSet().

1165  {
1166  auto buffer = getJoinHashBuffer(device_type, device_id);
1167  auto buffer_size = getJoinHashBufferSize(device_type, device_id);
1168  auto hash_table = getHashTableForDevice(device_id);
1169 #ifdef HAVE_CUDA
1170  std::unique_ptr<int8_t[]> buffer_copy;
1171  if (device_type == ExecutorDeviceType::GPU) {
1172  buffer_copy = std::make_unique<int8_t[]>(buffer_size);
1173 
1174  auto data_mgr = executor_->getDataMgr();
1175  auto device_allocator = std::make_unique<CudaAllocator>(
1176  data_mgr, device_id, getQueryEngineCudaStreamForDevice(device_id));
1177  device_allocator->copyFromDevice(buffer_copy.get(), buffer, buffer_size);
1178  }
1179  auto ptr1 = buffer_copy ? buffer_copy.get() : reinterpret_cast<const int8_t*>(buffer);
1180 #else
1181  auto ptr1 = reinterpret_cast<const int8_t*>(buffer);
1182 #endif // HAVE_CUDA
1183  auto ptr2 = ptr1 + offsetBufferOff();
1184  auto ptr3 = ptr1 + countBufferOff();
1185  auto ptr4 = ptr1 + payloadBufferOff();
1186  return HashTable::toSet(0,
1187  0,
1188  hash_table ? hash_table->getEntryCount() : 0,
1189  ptr1,
1190  ptr2,
1191  ptr3,
1192  ptr4,
1193  buffer_size);
1194 }
size_t offsetBufferOff() const noexceptoverride
size_t payloadBufferOff() const noexceptoverride
int8_t * getJoinHashBuffer(const ExecutorDeviceType device_type, const int device_id) const
Definition: HashJoin.h:300
size_t getJoinHashBufferSize(const ExecutorDeviceType device_type)
Definition: HashJoin.h:286
CUstream getQueryEngineCudaStreamForDevice(int device_num)
Definition: QueryEngine.cpp:7
size_t countBufferOff() const noexceptoverride
static DecodedJoinHashBufferSet toSet(size_t key_component_count, size_t key_component_width, size_t entry_count, const int8_t *ptr1, const int8_t *ptr2, const int8_t *ptr3, const int8_t *ptr4, size_t buffer_size)
Decode hash table into a std::set for easy inspection and validation.
Definition: HashTable.cpp:139
HashTable * getHashTableForDevice(const size_t device_id) const

+ Here is the call graph for this function:

std::string PerfectJoinHashTable::toString ( const ExecutorDeviceType  device_type,
const int  device_id = 0,
bool  raw = false 
) const
overridevirtual

Implements HashJoin.

Definition at line 1127 of file PerfectJoinHashTable.cpp.

References countBufferOff(), executor_, getHashTableForDevice(), HashJoin::getHashTypeString(), HashJoin::getJoinHashBuffer(), HashJoin::getJoinHashBufferSize(), getQueryEngineCudaStreamForDevice(), GPU, hash_type_, offsetBufferOff(), payloadBufferOff(), and HashTable::toString().

Referenced by getAlternativeCacheKey().

1129  {
1130  auto buffer = getJoinHashBuffer(device_type, device_id);
1131  auto buffer_size = getJoinHashBufferSize(device_type, device_id);
1132  auto hash_table = getHashTableForDevice(device_id);
1133 #ifdef HAVE_CUDA
1134  std::unique_ptr<int8_t[]> buffer_copy;
1135  if (device_type == ExecutorDeviceType::GPU) {
1136  buffer_copy = std::make_unique<int8_t[]>(buffer_size);
1137 
1138  auto data_mgr = executor_->getDataMgr();
1139  auto device_allocator = std::make_unique<CudaAllocator>(
1140  data_mgr, device_id, getQueryEngineCudaStreamForDevice(device_id));
1141  device_allocator->copyFromDevice(buffer_copy.get(), buffer, buffer_size);
1142  }
1143  auto ptr1 = buffer_copy ? buffer_copy.get() : reinterpret_cast<const int8_t*>(buffer);
1144 #else
1145  auto ptr1 = reinterpret_cast<const int8_t*>(buffer);
1146 #endif // HAVE_CUDA
1147  auto ptr2 = ptr1 + offsetBufferOff();
1148  auto ptr3 = ptr1 + countBufferOff();
1149  auto ptr4 = ptr1 + payloadBufferOff();
1150  return HashTable::toString("perfect",
1152  0,
1153  0,
1154  hash_table ? hash_table->getEntryCount() : 0,
1155  ptr1,
1156  ptr2,
1157  ptr3,
1158  ptr4,
1159  buffer_size,
1160  raw);
1161 }
size_t offsetBufferOff() const noexceptoverride
size_t payloadBufferOff() const noexceptoverride
int8_t * getJoinHashBuffer(const ExecutorDeviceType device_type, const int device_id) const
Definition: HashJoin.h:300
static std::string getHashTypeString(HashType ht) noexcept
Definition: HashJoin.h:165
size_t getJoinHashBufferSize(const ExecutorDeviceType device_type)
Definition: HashJoin.h:286
static std::string toString(const std::string &type, const std::string &layout_type, size_t key_component_count, size_t key_component_width, size_t entry_count, const int8_t *ptr1, const int8_t *ptr2, const int8_t *ptr3, const int8_t *ptr4, size_t buffer_size, bool raw=false)
Decode hash table into a human-readable string.
Definition: HashTable.cpp:226
CUstream getQueryEngineCudaStreamForDevice(int device_num)
Definition: QueryEngine.cpp:7
size_t countBufferOff() const noexceptoverride
HashTable * getHashTableForDevice(const size_t device_id) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Member Data Documentation

ExpressionRange PerfectJoinHashTable::col_range_
private

Definition at line 285 of file PerfectJoinHashTable.h.

Referenced by getHashJoinArgs(), initHashTableForDevice(), and reify().

std::shared_ptr<Analyzer::ColumnVar> PerfectJoinHashTable::col_var_
private

Definition at line 278 of file PerfectJoinHashTable.h.

Referenced by getInnerTableId(), and getInnerTableRteIdx().

ColumnCacheMap& PerfectJoinHashTable::column_cache_
private

Definition at line 289 of file PerfectJoinHashTable.h.

Referenced by fetchColumnsForDevice().

std::mutex PerfectJoinHashTable::cpu_hash_table_buff_mutex_
private

Definition at line 282 of file PerfectJoinHashTable.h.

Referenced by copyCpuHashTableToGpu(), and initHashTableForDevice().

const int PerfectJoinHashTable::device_count_
private
BucketizedHashEntryInfo PerfectJoinHashTable::hash_entry_info_
mutableprivate
std::unique_ptr< HashtableRecycler > PerfectJoinHashTable::hash_table_cache_
staticprivate
std::unique_ptr< HashingSchemeRecycler > PerfectJoinHashTable::hash_table_layout_cache_
staticprivate
Initial value:
=
std::make_unique<HashingSchemeRecycler>()

Definition at line 302 of file PerfectJoinHashTable.h.

Referenced by getHashingSchemeCache(), initHashTableForDevice(), invalidateCache(), and markCachedItemAsDirty().

HashType PerfectJoinHashTable::hash_type_
private

Definition at line 281 of file PerfectJoinHashTable.h.

Referenced by getHashType(), initHashTableForDevice(), reify(), and toString().

HashTableBuildDagMap PerfectJoinHashTable::hashtable_build_dag_map_
private

Definition at line 293 of file PerfectJoinHashTable.h.

Referenced by reify().

std::vector<QueryPlanHash> PerfectJoinHashTable::hashtable_cache_key_
private

Definition at line 295 of file PerfectJoinHashTable.h.

Referenced by initHashTableForDevice(), and reify().

HashtableCacheMetaInfo PerfectJoinHashTable::hashtable_cache_meta_info_
private

Definition at line 296 of file PerfectJoinHashTable.h.

Referenced by reify().

std::vector<InnerOuter> PerfectJoinHashTable::inner_outer_pairs_
private
const InnerOuterStringOpInfos PerfectJoinHashTable::inner_outer_string_op_infos_
private
const JoinType PerfectJoinHashTable::join_type_
private

Definition at line 277 of file PerfectJoinHashTable.h.

Referenced by initHashTableForDevice(), and reify().

bool PerfectJoinHashTable::needs_dict_translation_
mutableprivate
std::shared_ptr<Analyzer::BinOper> PerfectJoinHashTable::qual_bin_oper_
private
RegisteredQueryHint PerfectJoinHashTable::query_hints_
private

Definition at line 291 of file PerfectJoinHashTable.h.

Referenced by getRegisteredQueryHint(), and initHashTableForDevice().

const std::vector<InputTableInfo>& PerfectJoinHashTable::query_infos_
private

Definition at line 279 of file PerfectJoinHashTable.h.

Referenced by getInnerQueryInfo().

ExpressionRange PerfectJoinHashTable::rhs_source_col_range_
private

Definition at line 286 of file PerfectJoinHashTable.h.

Referenced by isOneToOneHashPossible().

const StringDictionaryProxy::IdMap* PerfectJoinHashTable::str_proxy_translation_map_ {nullptr}
private

Definition at line 284 of file PerfectJoinHashTable.h.

Referenced by initHashTableForDevice(), and reify().

std::mutex PerfectJoinHashTable::str_proxy_translation_mutex_
private

Definition at line 283 of file PerfectJoinHashTable.h.

Referenced by reify().

const TableIdToNodeMap PerfectJoinHashTable::table_id_to_node_map_
private

Definition at line 298 of file PerfectJoinHashTable.h.

Referenced by initHashTableForDevice(), and reify().

std::unordered_set<size_t> PerfectJoinHashTable::table_keys_
private

Definition at line 297 of file PerfectJoinHashTable.h.

Referenced by reify().


The documentation for this class was generated from the following files: