OmniSciDB  06b3bd477c
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
JoinHashTable Class Reference

#include <JoinHashTable.h>

+ Inheritance diagram for JoinHashTable:
+ Collaboration diagram for JoinHashTable:

Classes

struct  JoinHashTableCacheKey
 

Public Member Functions

int64_t getJoinHashBuffer (const ExecutorDeviceType device_type, const int device_id) const noexceptoverride
 
size_t getJoinHashBufferSize (const ExecutorDeviceType device_type, const int device_id) const noexceptoverride
 
std::string toString (const ExecutorDeviceType device_type, const int device_id=0, bool raw=false) const override
 
std::set
< DecodedJoinHashBufferEntry
toSet (const ExecutorDeviceType device_type, const int device_id) const override
 
llvm::Value * codegenSlot (const CompilationOptions &, const size_t) override
 
HashJoinMatchingSet codegenMatchingSet (const CompilationOptions &, const size_t) override
 
int getInnerTableId () const noexceptoverride
 
int getInnerTableRteIdx () const noexceptoverride
 
HashType getHashType () const noexceptoverride
 
Data_Namespace::MemoryLevel getMemoryLevel () const noexceptoverride
 
int getDeviceCount () const noexceptoverride
 
size_t offsetBufferOff () const noexceptoverride
 
size_t countBufferOff () const noexceptoverride
 
size_t payloadBufferOff () const noexceptoverride
 
virtual ~JoinHashTable ()
 
- Public Member Functions inherited from JoinHashTableInterface
virtual std::string toStringFlat64 (const ExecutorDeviceType device_type, const int device_id) const
 
virtual std::string toStringFlat32 (const ExecutorDeviceType device_type, const int device_id) const
 
JoinColumn fetchJoinColumn (const Analyzer::ColumnVar *hash_col, const std::vector< Fragmenter_Namespace::FragmentInfo > &fragment_info, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks_owner, DeviceAllocator *dev_buff_owner, std::vector< std::shared_ptr< void >> &malloc_owner, Executor *executor, ColumnCacheMap *column_cache)
 

Static Public Member Functions

static std::shared_ptr
< JoinHashTable
getInstance (const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
 Make hash table from an in-flight SQL query's parse tree etc. More...
 
static HashJoinMatchingSet codegenMatchingSet (const std::vector< llvm::Value * > &hash_join_idx_args_in, const bool is_sharded, const bool col_is_nullable, const bool is_bw_eq, const int64_t sub_buff_size, Executor *executor, const bool is_bucketized=false)
 
static llvm::Value * codegenHashTableLoad (const size_t table_idx, Executor *executor)
 
static auto yieldCacheInvalidator () -> std::function< void()>
 
static const std::shared_ptr
< std::vector< int32_t > > & 
getCachedHashTable (size_t idx)
 
static uint64_t getNumberOfCachedHashTables ()
 
- Static Public Member Functions inherited from JoinHashTableInterface
static std::string getHashTypeString (HashType ht) noexcept
 
static DecodedJoinHashBufferSet toSet (size_t key_component_count, size_t key_component_width, size_t entry_count, const int8_t *ptr1, const int8_t *ptr2, const int8_t *ptr3, const int8_t *ptr4, size_t buffer_size)
 Decode hash table into a std::set for easy inspection and validation. More...
 
static std::string toString (const std::string &type, const std::string &layout_type, size_t key_component_count, size_t key_component_width, size_t entry_count, const int8_t *ptr1, const int8_t *ptr2, const int8_t *ptr3, const int8_t *ptr4, size_t buffer_size, bool raw=false)
 Decode hash table into a human-readable string. More...
 
static std::shared_ptr
< JoinHashTableInterface
getInstance (const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
 Make hash table from an in-flight SQL query's parse tree etc. More...
 
static std::shared_ptr
< JoinHashTableInterface
getSyntheticInstance (std::string_view table1, std::string_view column1, std::string_view table2, std::string_view column2, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
 Make hash table from named tables and columns (such as for testing). More...
 
static std::shared_ptr
< JoinHashTableInterface
getSyntheticInstance (const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
 Make hash table from named tables and columns (such as for testing). More...
 

Private Member Functions

 JoinHashTable (const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const Analyzer::ColumnVar *col_var, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const ExpressionRange &col_range, ColumnCacheMap &column_cache, Executor *executor, const int device_count)
 
ChunkKey genHashTableKey (const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const Analyzer::Expr *outer_col, const Analyzer::ColumnVar *inner_col) const
 
void reify ()
 
void reifyOneToOneForDevice (const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, const logger::ThreadId parent_thread_id)
 
void reifyOneToManyForDevice (const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, const logger::ThreadId parent_thread_id)
 
void checkHashJoinReplicationConstraint (const int table_id) const
 
void initOneToOneHashTable (const ChunkKey &chunk_key, const JoinColumn &join_column, const std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > &cols, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id)
 
void initOneToManyHashTable (const ChunkKey &chunk_key, const JoinColumn &join_column, const std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > &cols, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id)
 
void initHashTableOnCpuFromCache (const ChunkKey &chunk_key, const size_t num_elements, const std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > &cols)
 
void putHashTableOnCpuToCache (const ChunkKey &chunk_key, const size_t num_elements, const std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > &cols)
 
void initOneToOneHashTableOnCpu (const JoinColumn &join_column, const std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > &cols, const HashEntryInfo hash_entry_info, const int32_t hash_join_invalid_val)
 
void initOneToManyHashTableOnCpu (const JoinColumn &join_column, const std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > &cols, const HashEntryInfo hash_entry_info, const int32_t hash_join_invalid_val)
 
const InputTableInfogetInnerQueryInfo (const Analyzer::ColumnVar *inner_col) const
 
size_t shardCount () const
 
llvm::Value * codegenHashTableLoad (const size_t table_idx)
 
std::vector< llvm::Value * > getHashJoinArgs (llvm::Value *hash_ptr, const Analyzer::Expr *key_col, const int shard_count, const CompilationOptions &co)
 
bool isBitwiseEq () const
 
void freeHashBufferMemory ()
 
void freeHashBufferGpuMemory ()
 
void freeHashBufferCpuMemory ()
 
size_t getComponentBufferSize () const noexcept
 
bool layoutRequiresAdditionalBuffers (JoinHashTableInterface::HashType layout) const noexceptoverride
 

Private Attributes

std::shared_ptr
< Analyzer::BinOper
qual_bin_oper_
 
std::shared_ptr
< Analyzer::ColumnVar
col_var_
 
const std::vector
< InputTableInfo > & 
query_infos_
 
const Data_Namespace::MemoryLevel memory_level_
 
HashType hash_type_
 
size_t hash_entry_count_
 
std::shared_ptr< std::vector
< int32_t > > 
cpu_hash_table_buff_
 
std::mutex cpu_hash_table_buff_mutex_
 
ExpressionRange col_range_
 
Executorexecutor_
 
ColumnCacheMapcolumn_cache_
 
const int device_count_
 

Static Private Attributes

static std::vector< std::pair
< JoinHashTableCacheKey,
std::shared_ptr< std::vector
< int32_t > > > > 
join_hash_table_cache_
 
static std::mutex join_hash_table_cache_mutex_
 

Additional Inherited Members

- Public Types inherited from JoinHashTableInterface
enum  HashType : int { HashType::OneToOne, HashType::OneToMany, HashType::ManyToMany }
 

Detailed Description

Definition at line 51 of file JoinHashTable.h.

Constructor & Destructor Documentation

JoinHashTable::~JoinHashTable ( )
virtual

Definition at line 563 of file JoinHashTable.cpp.

References CHECK(), and executor_.

563  {
564 #ifdef HAVE_CUDA
565  CHECK(executor_);
566  CHECK(executor_->catalog_);
567  auto& data_mgr = executor_->catalog_->getDataMgr();
568  for (auto& gpu_buffer : gpu_hash_table_buff_) {
569  if (gpu_buffer) {
570  data_mgr.free(gpu_buffer);
571  }
572  }
573  for (auto& gpu_buffer : gpu_hash_table_err_buff_) {
574  if (gpu_buffer) {
575  data_mgr.free(gpu_buffer);
576  }
577  }
578 #endif
579 }
CHECK(cgen_state)
Executor * executor_

+ Here is the call graph for this function:

JoinHashTable::JoinHashTable ( const std::shared_ptr< Analyzer::BinOper qual_bin_oper,
const Analyzer::ColumnVar col_var,
const std::vector< InputTableInfo > &  query_infos,
const Data_Namespace::MemoryLevel  memory_level,
const HashType  preferred_hash_type,
const ExpressionRange col_range,
ColumnCacheMap column_cache,
Executor executor,
const int  device_count 
)
inlineprivate

Definition at line 138 of file JoinHashTable.h.

References CHECK(), CHECK_GT, device_count_, ExpressionRange::getType(), and Integer.

Referenced by getInstance().

147  : qual_bin_oper_(qual_bin_oper)
148  , col_var_(std::dynamic_pointer_cast<Analyzer::ColumnVar>(col_var->deep_copy()))
149  , query_infos_(query_infos)
150  , memory_level_(memory_level)
151  , hash_type_(preferred_hash_type)
152  , hash_entry_count_(0)
153  , col_range_(col_range)
154  , executor_(executor)
155  , column_cache_(column_cache)
156  , device_count_(device_count) {
159  }
const std::vector< InputTableInfo > & query_infos_
const int device_count_
ColumnCacheMap & column_cache_
std::shared_ptr< Analyzer::Expr > deep_copy() const override
Definition: Analyzer.cpp:59
HashType hash_type_
#define CHECK_GT(x, y)
Definition: Logger.h:209
CHECK(cgen_state)
Executor * executor_
ExpressionRange col_range_
ExpressionRangeType getType() const
std::shared_ptr< Analyzer::ColumnVar > col_var_
std::shared_ptr< Analyzer::BinOper > qual_bin_oper_
const Data_Namespace::MemoryLevel memory_level_
size_t hash_entry_count_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Member Function Documentation

void JoinHashTable::checkHashJoinReplicationConstraint ( const int  table_id) const
private

Definition at line 720 of file JoinHashTable.cpp.

References CHECK(), executor_, g_cluster, get_shard_count(), qual_bin_oper_, and table_is_replicated().

Referenced by reify().

720  {
721  if (!g_cluster) {
722  return;
723  }
724  if (table_id >= 0) {
725  const auto inner_td = executor_->getCatalog()->getMetadataForTable(table_id);
726  CHECK(inner_td);
727  size_t shard_count{0};
728  shard_count = get_shard_count(qual_bin_oper_.get(), executor_);
729  if (!shard_count && !table_is_replicated(inner_td)) {
730  throw TableMustBeReplicated(inner_td->tableName);
731  }
732  }
733 }
size_t get_shard_count(const Analyzer::BinOper *join_condition, const Executor *executor)
CHECK(cgen_state)
Executor * executor_
bool table_is_replicated(const TableDescriptor *td)
bool g_cluster
std::shared_ptr< Analyzer::BinOper > qual_bin_oper_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * JoinHashTable::codegenHashTableLoad ( const size_t  table_idx,
Executor executor 
)
static

Definition at line 1246 of file JoinHashTable.cpp.

References CHECK(), CHECK_LT, and get_arg_by_name().

Referenced by codegenHashTableLoad(), BaselineJoinHashTable::codegenMatchingSet(), codegenMatchingSet(), OverlapsJoinHashTable::codegenMatchingSet(), codegenSlot(), and BaselineJoinHashTable::hashPtr().

1247  {
1248  llvm::Value* hash_ptr = nullptr;
1249  const auto total_table_count =
1250  executor->plan_state_->join_info_.join_hash_tables_.size();
1251  CHECK_LT(table_idx, total_table_count);
1252  if (total_table_count > 1) {
1253  auto hash_tables_ptr =
1254  get_arg_by_name(executor->cgen_state_->row_func_, "join_hash_tables");
1255  auto hash_pptr =
1256  table_idx > 0 ? executor->cgen_state_->ir_builder_.CreateGEP(
1257  hash_tables_ptr,
1258  executor->cgen_state_->llInt(static_cast<int64_t>(table_idx)))
1259  : hash_tables_ptr;
1260  hash_ptr = executor->cgen_state_->ir_builder_.CreateLoad(hash_pptr);
1261  } else {
1262  hash_ptr = get_arg_by_name(executor->cgen_state_->row_func_, "join_hash_tables");
1263  }
1264  CHECK(hash_ptr);
1265  return hash_ptr;
1266 }
CHECK(cgen_state)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:95
#define CHECK_LT(x, y)
Definition: Logger.h:207

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * JoinHashTable::codegenHashTableLoad ( const size_t  table_idx)
private

Definition at line 1235 of file JoinHashTable.cpp.

References CHECK(), codegenHashTableLoad(), executor_, and get_arg_by_name().

1235  {
1236  const auto hash_ptr = codegenHashTableLoad(table_idx, executor_);
1237  if (hash_ptr->getType()->isIntegerTy(64)) {
1238  return hash_ptr;
1239  }
1240  CHECK(hash_ptr->getType()->isPointerTy());
1241  return executor_->cgen_state_->ir_builder_.CreatePtrToInt(
1242  get_arg_by_name(executor_->cgen_state_->row_func_, "join_hash_tables"),
1243  llvm::Type::getInt64Ty(executor_->cgen_state_->context_));
1244 }
CHECK(cgen_state)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:95
Executor * executor_
static llvm::Value * codegenHashTableLoad(const size_t table_idx, Executor *executor)

+ Here is the call graph for this function:

HashJoinMatchingSet JoinHashTable::codegenMatchingSet ( const CompilationOptions co,
const size_t  index 
)
overridevirtual

Implements JoinHashTableInterface.

Definition at line 1318 of file JoinHashTable.cpp.

References CHECK(), codegenHashTableLoad(), executor_, anonymous_namespace{JoinHashTable.cpp}::get_cols(), getComponentBufferSize(), getHashJoinArgs(), isBitwiseEq(), kDATE, qual_bin_oper_, and shardCount().

Referenced by BaselineJoinHashTable::codegenMatchingSet().

1319  {
1320  const auto cols = get_cols(
1321  qual_bin_oper_.get(), *executor_->getCatalog(), executor_->temporary_tables_);
1322  auto key_col = cols.second;
1323  CHECK(key_col);
1324  auto val_col = cols.first;
1325  CHECK(val_col);
1326  auto pos_ptr = codegenHashTableLoad(index);
1327  CHECK(pos_ptr);
1328  const int shard_count = shardCount();
1329  auto hash_join_idx_args = getHashJoinArgs(pos_ptr, key_col, shard_count, co);
1330  const int64_t sub_buff_size = getComponentBufferSize();
1331  const auto& key_col_ti = key_col->get_type_info();
1332 
1333  auto bucketize = (key_col_ti.get_type() == kDATE);
1334  return codegenMatchingSet(hash_join_idx_args,
1335  shard_count,
1336  !key_col_ti.get_notnull(),
1337  isBitwiseEq(),
1338  sub_buff_size,
1339  executor_,
1340  bucketize);
1341 }
bool isBitwiseEq() const
std::vector< llvm::Value * > getHashJoinArgs(llvm::Value *hash_ptr, const Analyzer::Expr *key_col, const int shard_count, const CompilationOptions &co)
std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > get_cols(const Analyzer::BinOper *qual_bin_oper, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables)
CHECK(cgen_state)
size_t shardCount() const
Definition: sqltypes.h:54
Executor * executor_
static llvm::Value * codegenHashTableLoad(const size_t table_idx, Executor *executor)
size_t getComponentBufferSize() const noexcept
HashJoinMatchingSet codegenMatchingSet(const CompilationOptions &, const size_t) override
std::shared_ptr< Analyzer::BinOper > qual_bin_oper_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

HashJoinMatchingSet JoinHashTable::codegenMatchingSet ( const std::vector< llvm::Value * > &  hash_join_idx_args_in,
const bool  is_sharded,
const bool  col_is_nullable,
const bool  is_bw_eq,
const int64_t  sub_buff_size,
Executor executor,
const bool  is_bucketized = false 
)
static

Definition at line 1343 of file JoinHashTable.cpp.

References CHECK().

1350  {
1351  using namespace std::string_literals;
1352 
1353  std::string fname(is_bucketized ? "bucketized_hash_join_idx"s : "hash_join_idx"s);
1354 
1355  if (is_bw_eq) {
1356  fname += "_bitwise";
1357  }
1358  if (is_sharded) {
1359  fname += "_sharded";
1360  }
1361  if (!is_bw_eq && col_is_nullable) {
1362  fname += "_nullable";
1363  }
1364 
1365  const auto slot_lv = executor->cgen_state_->emitCall(fname, hash_join_idx_args_in);
1366  const auto slot_valid_lv = executor->cgen_state_->ir_builder_.CreateICmpSGE(
1367  slot_lv, executor->cgen_state_->llInt(int64_t(0)));
1368 
1369  auto pos_ptr = hash_join_idx_args_in[0];
1370  CHECK(pos_ptr);
1371 
1372  auto count_ptr = executor->cgen_state_->ir_builder_.CreateAdd(
1373  pos_ptr, executor->cgen_state_->llInt(sub_buff_size));
1374  auto hash_join_idx_args = hash_join_idx_args_in;
1375  hash_join_idx_args[0] = executor->cgen_state_->ir_builder_.CreatePtrToInt(
1376  count_ptr, llvm::Type::getInt64Ty(executor->cgen_state_->context_));
1377 
1378  const auto row_count_lv = executor->cgen_state_->ir_builder_.CreateSelect(
1379  slot_valid_lv,
1380  executor->cgen_state_->emitCall(fname, hash_join_idx_args),
1381  executor->cgen_state_->llInt(int64_t(0)));
1382  auto rowid_base_i32 = executor->cgen_state_->ir_builder_.CreateIntToPtr(
1383  executor->cgen_state_->ir_builder_.CreateAdd(
1384  pos_ptr, executor->cgen_state_->llInt(2 * sub_buff_size)),
1385  llvm::Type::getInt32PtrTy(executor->cgen_state_->context_));
1386  auto rowid_ptr_i32 =
1387  executor->cgen_state_->ir_builder_.CreateGEP(rowid_base_i32, slot_lv);
1388  return {rowid_ptr_i32, row_count_lv, slot_lv};
1389 }
CHECK(cgen_state)

+ Here is the call graph for this function:

llvm::Value * JoinHashTable::codegenSlot ( const CompilationOptions co,
const size_t  index 
)
overridevirtual

Implements JoinHashTableInterface.

Definition at line 1517 of file JoinHashTable.cpp.

References CHECK(), CHECK_EQ, CodeGenerator::codegen(), codegenHashTableLoad(), executor_, anonymous_namespace{JoinHashTable.cpp}::get_cols(), Analyzer::Expr::get_type_info(), getHashJoinArgs(), getHashType(), isBitwiseEq(), kDATE, JoinHashTableInterface::OneToOne, qual_bin_oper_, and shardCount().

1518  {
1519  using namespace std::string_literals;
1520 
1522  const auto cols = get_cols(
1523  qual_bin_oper_.get(), *executor_->getCatalog(), executor_->temporary_tables_);
1524  auto key_col = cols.second;
1525  CHECK(key_col);
1526  auto val_col = cols.first;
1527  CHECK(val_col);
1528  CodeGenerator code_generator(executor_);
1529  const auto key_lvs = code_generator.codegen(key_col, true, co);
1530  CHECK_EQ(size_t(1), key_lvs.size());
1531  auto hash_ptr = codegenHashTableLoad(index);
1532  CHECK(hash_ptr);
1533  const int shard_count = shardCount();
1534  const auto hash_join_idx_args = getHashJoinArgs(hash_ptr, key_col, shard_count, co);
1535 
1536  const auto& key_col_ti = key_col->get_type_info();
1537  std::string fname((key_col_ti.get_type() == kDATE) ? "bucketized_hash_join_idx"s
1538  : "hash_join_idx"s);
1539 
1540  if (isBitwiseEq()) {
1541  fname += "_bitwise";
1542  }
1543  if (shard_count) {
1544  fname += "_sharded";
1545  }
1546 
1547  if (!isBitwiseEq() && !key_col_ti.get_notnull()) {
1548  fname += "_nullable";
1549  }
1550  return executor_->cgen_state_->emitCall(fname, hash_join_idx_args);
1551 }
bool isBitwiseEq() const
#define CHECK_EQ(x, y)
Definition: Logger.h:205
std::vector< llvm::Value * > getHashJoinArgs(llvm::Value *hash_ptr, const Analyzer::Expr *key_col, const int shard_count, const CompilationOptions &co)
std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > get_cols(const Analyzer::BinOper *qual_bin_oper, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables)
CHECK(cgen_state)
size_t shardCount() const
Definition: sqltypes.h:54
Executor * executor_
static llvm::Value * codegenHashTableLoad(const size_t table_idx, Executor *executor)
std::shared_ptr< Analyzer::BinOper > qual_bin_oper_
HashType getHashType() const noexceptoverride
Definition: JoinHashTable.h:89

+ Here is the call graph for this function:

size_t JoinHashTable::countBufferOff ( ) const
overridevirtualnoexcept

Implements JoinHashTableInterface.

Definition at line 1395 of file JoinHashTable.cpp.

References getComponentBufferSize().

Referenced by toSet(), and toString().

1395  {
1396  return getComponentBufferSize();
1397 }
size_t getComponentBufferSize() const noexcept

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void JoinHashTable::freeHashBufferCpuMemory ( )
private

Definition at line 1626 of file JoinHashTable.cpp.

References cpu_hash_table_buff_.

Referenced by freeHashBufferMemory().

1626  {
1627  cpu_hash_table_buff_.reset();
1628 }
std::shared_ptr< std::vector< int32_t > > cpu_hash_table_buff_

+ Here is the caller graph for this function:

void JoinHashTable::freeHashBufferGpuMemory ( )
private

Definition at line 1605 of file JoinHashTable.cpp.

References CHECK(), executor_, and CudaAllocator::freeGpuAbstractBuffer().

Referenced by freeHashBufferMemory().

1605  {
1606 #ifdef HAVE_CUDA
1607  const auto& catalog = *executor_->getCatalog();
1608  auto& data_mgr = catalog.getDataMgr();
1609  for (auto& buf : gpu_hash_table_buff_) {
1610  if (buf) {
1611  CudaAllocator::freeGpuAbstractBuffer(&data_mgr, buf);
1612  buf = nullptr;
1613  }
1614  }
1615  for (auto& buf : gpu_hash_table_err_buff_) {
1616  if (buf) {
1617  CudaAllocator::freeGpuAbstractBuffer(&data_mgr, buf);
1618  buf = nullptr;
1619  }
1620  }
1621 #else
1622  CHECK(false);
1623 #endif // HAVE_CUDA
1624 }
CHECK(cgen_state)
Executor * executor_
static void freeGpuAbstractBuffer(Data_Namespace::DataMgr *data_mgr, Data_Namespace::AbstractBuffer *ab)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void JoinHashTable::freeHashBufferMemory ( )
private

Definition at line 1598 of file JoinHashTable.cpp.

References freeHashBufferCpuMemory(), and freeHashBufferGpuMemory().

Referenced by reify().

1598  {
1599 #ifdef HAVE_CUDA
1601 #endif
1603 }
void freeHashBufferGpuMemory()
void freeHashBufferCpuMemory()

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ChunkKey JoinHashTable::genHashTableKey ( const std::vector< Fragmenter_Namespace::FragmentInfo > &  fragments,
const Analyzer::Expr outer_col,
const Analyzer::ColumnVar inner_col 
) const
private

Definition at line 581 of file JoinHashTable.cpp.

References CHECK(), CHECK_EQ, executor_, Analyzer::ColumnVar::get_column_id(), Analyzer::ColumnVar::get_table_id(), Analyzer::Expr::get_type_info(), getInnerQueryInfo(), Fragmenter_Namespace::TableInfo::getNumTuples(), InputTableInfo::info, and kENCODING_DICT.

Referenced by reifyOneToManyForDevice(), and reifyOneToOneForDevice().

584  {
585  ChunkKey hash_table_key{executor_->getCatalog()->getCurrentDB().dbId,
586  inner_col->get_table_id(),
587  inner_col->get_column_id()};
588  const auto& ti = inner_col->get_type_info();
589  if (ti.is_string()) {
590  CHECK_EQ(kENCODING_DICT, ti.get_compression());
591  size_t outer_elem_count = 0;
592  const auto outer_col = dynamic_cast<const Analyzer::ColumnVar*>(outer_col_expr);
593  CHECK(outer_col);
594  const auto& outer_query_info = getInnerQueryInfo(outer_col).info;
595  for (auto& frag : outer_query_info.fragments) {
596  outer_elem_count = frag.getNumTuples();
597  }
598  hash_table_key.push_back(outer_elem_count);
599  }
600  if (fragments.size() < 2) {
601  hash_table_key.push_back(fragments.front().fragmentId);
602  }
603  return hash_table_key;
604 }
int get_table_id() const
Definition: Analyzer.h:195
#define CHECK_EQ(x, y)
Definition: Logger.h:205
std::vector< int > ChunkKey
Definition: types.h:35
Fragmenter_Namespace::TableInfo info
Definition: InputMetadata.h:35
CHECK(cgen_state)
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:79
Executor * executor_
int get_column_id() const
Definition: Analyzer.h:196
const InputTableInfo & getInnerQueryInfo(const Analyzer::ColumnVar *inner_col) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

static const std::shared_ptr<std::vector<int32_t> >& JoinHashTable::getCachedHashTable ( size_t  idx)
inlinestatic

Definition at line 123 of file JoinHashTable.h.

References CHECK(), CHECK_LT, join_hash_table_cache_, and join_hash_table_cache_mutex_.

Referenced by QueryRunner::QueryRunner::getCachedJoinHashTable().

123  {
124  std::lock_guard<std::mutex> guard(join_hash_table_cache_mutex_);
125  CHECK(!join_hash_table_cache_.empty());
126  CHECK_LT(idx, join_hash_table_cache_.size());
127  return join_hash_table_cache_.at(idx).second;
128  }
static std::mutex join_hash_table_cache_mutex_
CHECK(cgen_state)
static std::vector< std::pair< JoinHashTableCacheKey, std::shared_ptr< std::vector< int32_t > > > > join_hash_table_cache_
#define CHECK_LT(x, y)
Definition: Logger.h:207

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t JoinHashTable::getComponentBufferSize ( ) const
privatenoexcept

Definition at line 1403 of file JoinHashTable.cpp.

References hash_entry_count_, hash_type_, and JoinHashTableInterface::OneToMany.

Referenced by codegenMatchingSet(), countBufferOff(), and payloadBufferOff().

1403  {
1405  return hash_entry_count_ * sizeof(int32_t);
1406  } else {
1407  return 0;
1408  }
1409 }
HashType hash_type_
size_t hash_entry_count_

+ Here is the caller graph for this function:

int JoinHashTable::getDeviceCount ( ) const
inlineoverridevirtualnoexcept

Implements JoinHashTableInterface.

Definition at line 95 of file JoinHashTable.h.

References device_count_.

95 { return device_count_; };
const int device_count_
std::vector< llvm::Value * > JoinHashTable::getHashJoinArgs ( llvm::Value *  hash_ptr,
const Analyzer::Expr key_col,
const int  shard_count,
const CompilationOptions co 
)
private

Definition at line 1268 of file JoinHashTable.cpp.

References CHECK_EQ, CodeGenerator::codegen(), col_range_, device_count_, executor_, anonymous_namespace{JoinHashTable.cpp}::get_bucketized_hash_entry_info(), anonymous_namespace{JoinHashTable.cpp}::get_hash_entry_count(), get_logical_type_info(), SQLTypeInfo::get_type(), Analyzer::Expr::get_type_info(), ExpressionRange::getIntMax(), ExpressionRange::getIntMin(), inline_fixed_encoding_null_val(), isBitwiseEq(), and kDATE.

Referenced by codegenMatchingSet(), and codegenSlot().

1271  {
1272  CodeGenerator code_generator(executor_);
1273  const auto key_lvs = code_generator.codegen(key_col, true, co);
1274  CHECK_EQ(size_t(1), key_lvs.size());
1275  auto const& key_col_ti = key_col->get_type_info();
1276  auto hash_entry_info =
1278 
1279  std::vector<llvm::Value*> hash_join_idx_args{
1280  hash_ptr,
1281  executor_->cgen_state_->castToTypeIn(key_lvs.front(), 64),
1282  executor_->cgen_state_->llInt(col_range_.getIntMin()),
1283  executor_->cgen_state_->llInt(col_range_.getIntMax())};
1284  if (shard_count) {
1285  const auto expected_hash_entry_count =
1287  const auto entry_count_per_shard =
1288  (expected_hash_entry_count + shard_count - 1) / shard_count;
1289  hash_join_idx_args.push_back(
1290  executor_->cgen_state_->llInt<uint32_t>(entry_count_per_shard));
1291  hash_join_idx_args.push_back(executor_->cgen_state_->llInt<uint32_t>(shard_count));
1292  hash_join_idx_args.push_back(executor_->cgen_state_->llInt<uint32_t>(device_count_));
1293  }
1294  auto key_col_logical_ti = get_logical_type_info(key_col->get_type_info());
1295  if (!key_col_logical_ti.get_notnull() || isBitwiseEq()) {
1296  hash_join_idx_args.push_back(executor_->cgen_state_->llInt(
1297  inline_fixed_encoding_null_val(key_col_logical_ti)));
1298  }
1299  auto special_date_bucketization_case = key_col_ti.get_type() == kDATE;
1300  if (isBitwiseEq()) {
1301  if (special_date_bucketization_case) {
1302  hash_join_idx_args.push_back(executor_->cgen_state_->llInt(
1303  col_range_.getIntMax() / hash_entry_info.bucket_normalization + 1));
1304  } else {
1305  hash_join_idx_args.push_back(
1306  executor_->cgen_state_->llInt(col_range_.getIntMax() + 1));
1307  }
1308  }
1309 
1310  if (special_date_bucketization_case) {
1311  hash_join_idx_args.emplace_back(
1312  executor_->cgen_state_->llInt(hash_entry_info.bucket_normalization));
1313  }
1314 
1315  return hash_join_idx_args;
1316 }
int64_t getIntMin() const
bool isBitwiseEq() const
#define CHECK_EQ(x, y)
Definition: Logger.h:205
const int device_count_
SQLTypeInfo get_logical_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:818
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:258
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:79
HashEntryInfo get_bucketized_hash_entry_info(SQLTypeInfo const &context_ti, ExpressionRange const &col_range, bool const is_bw_eq)
Definition: sqltypes.h:54
Executor * executor_
ExpressionRange col_range_
int64_t getIntMax() const
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
size_t get_hash_entry_count(const ExpressionRange &col_range, const bool is_bw_eq)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

HashType JoinHashTable::getHashType ( ) const
inlineoverridevirtualnoexcept

Implements JoinHashTableInterface.

Definition at line 89 of file JoinHashTable.h.

References hash_type_.

Referenced by codegenSlot().

89 { return hash_type_; }
HashType hash_type_

+ Here is the caller graph for this function:

const InputTableInfo & JoinHashTable::getInnerQueryInfo ( const Analyzer::ColumnVar inner_col) const
private

Definition at line 1553 of file JoinHashTable.cpp.

References get_inner_query_info(), Analyzer::ColumnVar::get_table_id(), and query_infos_.

Referenced by genHashTableKey(), and reify().

1554  {
1555  return get_inner_query_info(inner_col->get_table_id(), query_infos_);
1556 }
int get_table_id() const
Definition: Analyzer.h:195
const std::vector< InputTableInfo > & query_infos_
const InputTableInfo & get_inner_query_info(const int inner_table_id, const std::vector< InputTableInfo > &query_infos)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int JoinHashTable::getInnerTableId ( ) const
inlineoverridevirtualnoexcept

Implements JoinHashTableInterface.

Definition at line 81 of file JoinHashTable.h.

References col_var_.

81  {
82  return col_var_.get()->get_table_id();
83  };
std::shared_ptr< Analyzer::ColumnVar > col_var_
int JoinHashTable::getInnerTableRteIdx ( ) const
inlineoverridevirtualnoexcept

Implements JoinHashTableInterface.

Definition at line 85 of file JoinHashTable.h.

References col_var_.

85  {
86  return col_var_.get()->get_rte_idx();
87  };
std::shared_ptr< Analyzer::ColumnVar > col_var_
std::shared_ptr< JoinHashTable > JoinHashTable::getInstance ( const std::shared_ptr< Analyzer::BinOper qual_bin_oper,
const std::vector< InputTableInfo > &  query_infos,
const Data_Namespace::MemoryLevel  memory_level,
const HashType  preferred_hash_type,
const int  device_count,
ColumnCacheMap column_cache,
Executor executor 
)
static

Make hash table from an in-flight SQL query's parse tree etc.

Definition at line 337 of file JoinHashTable.cpp.

References CHECK(), CHECK_EQ, anonymous_namespace{JoinHashTable.cpp}::get_bucketized_hash_entry_info(), anonymous_namespace{JoinHashTable.cpp}::get_cols(), getExpressionRange(), JoinHashTableInterface::getHashTypeString(), HashEntryInfo::getNormalizedHashEntryCount(), Data_Namespace::GPU_LEVEL, Invalid, IS_EQUIVALENCE, JoinHashTable(), kBW_EQ, ExpressionRange::makeIntRange(), VLOG, and VLOGGING.

Referenced by JoinHashTableInterface::getInstance().

344  {
345  decltype(std::chrono::steady_clock::now()) ts1, ts2;
346  if (VLOGGING(1)) {
347  VLOG(1) << "Building perfect hash table " << getHashTypeString(preferred_hash_type)
348  << " for qual: " << qual_bin_oper->toString();
349  ts1 = std::chrono::steady_clock::now();
350  }
351  CHECK(IS_EQUIVALENCE(qual_bin_oper->get_optype()));
352  const auto cols =
353  get_cols(qual_bin_oper.get(), *executor->getCatalog(), executor->temporary_tables_);
354  const auto inner_col = cols.first;
355  CHECK(inner_col);
356  const auto& ti = inner_col->get_type_info();
357  auto col_range =
358  getExpressionRange(ti.is_string() ? cols.second : inner_col, query_infos, executor);
359  if (col_range.getType() == ExpressionRangeType::Invalid) {
360  throw HashJoinFail(
361  "Could not compute range for the expressions involved in the equijoin");
362  }
363  if (ti.is_string()) {
364  // The nullable info must be the same as the source column.
365  const auto source_col_range = getExpressionRange(inner_col, query_infos, executor);
366  if (source_col_range.getType() == ExpressionRangeType::Invalid) {
367  throw HashJoinFail(
368  "Could not compute range for the expressions involved in the equijoin");
369  }
370  if (source_col_range.getIntMin() > source_col_range.getIntMax()) {
371  // If the inner column expression range is empty, use the inner col range
372  CHECK_EQ(source_col_range.getIntMin(), int64_t(0));
373  CHECK_EQ(source_col_range.getIntMax(), int64_t(-1));
374  col_range = source_col_range;
375  } else {
376  col_range = ExpressionRange::makeIntRange(
377  std::min(source_col_range.getIntMin(), col_range.getIntMin()),
378  std::max(source_col_range.getIntMax(), col_range.getIntMax()),
379  0,
380  source_col_range.hasNulls());
381  }
382  }
383  // We can't allocate more than 2GB contiguous memory on GPU and each entry is 4 bytes.
384  const auto max_hash_entry_count =
386  ? static_cast<size_t>(std::numeric_limits<int32_t>::max() / sizeof(int32_t))
387  : static_cast<size_t>(std::numeric_limits<int32_t>::max());
388 
389  auto bucketized_entry_count_info = get_bucketized_hash_entry_info(
390  ti, col_range, qual_bin_oper->get_optype() == kBW_EQ);
391  auto bucketized_entry_count = bucketized_entry_count_info.getNormalizedHashEntryCount();
392 
393  if (bucketized_entry_count > max_hash_entry_count) {
394  throw TooManyHashEntries();
395  }
396 
397  if (qual_bin_oper->get_optype() == kBW_EQ &&
398  col_range.getIntMax() >= std::numeric_limits<int64_t>::max()) {
399  throw HashJoinFail("Cannot translate null value for kBW_EQ");
400  }
401  auto join_hash_table =
402  std::shared_ptr<JoinHashTable>(new JoinHashTable(qual_bin_oper,
403  inner_col,
404  query_infos,
405  memory_level,
406  preferred_hash_type,
407  col_range,
408  column_cache,
409  executor,
410  device_count));
411  try {
412  join_hash_table->reify();
413  } catch (const TableMustBeReplicated& e) {
414  // Throw a runtime error to abort the query
415  join_hash_table->freeHashBufferMemory();
416  throw std::runtime_error(e.what());
417  } catch (const HashJoinFail& e) {
418  // HashJoinFail exceptions log an error and trigger a retry with a join loop (if
419  // possible)
420  join_hash_table->freeHashBufferMemory();
421  throw HashJoinFail(std::string("Could not build a 1-to-1 correspondence for columns "
422  "involved in equijoin | ") +
423  e.what());
424  } catch (const ColumnarConversionNotSupported& e) {
425  throw HashJoinFail(std::string("Could not build hash tables for equijoin | ") +
426  e.what());
427  } catch (const OutOfMemory& e) {
428  throw HashJoinFail(
429  std::string("Ran out of memory while building hash tables for equijoin | ") +
430  e.what());
431  } catch (const std::exception& e) {
432  throw std::runtime_error(
433  std::string("Fatal error while attempting to build hash tables for join: ") +
434  e.what());
435  }
436  if (VLOGGING(1)) {
437  ts2 = std::chrono::steady_clock::now();
438  VLOG(1) << "Built perfect hash table "
439  << getHashTypeString(join_hash_table->getHashType()) << " in "
440  << std::chrono::duration_cast<std::chrono::milliseconds>(ts2 - ts1).count()
441  << " ms";
442  }
443  return join_hash_table;
444 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
#define IS_EQUIVALENCE(X)
Definition: sqldefs.h:67
JoinHashTable(const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const Analyzer::ColumnVar *col_var, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const ExpressionRange &col_range, ColumnCacheMap &column_cache, Executor *executor, const int device_count)
std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > get_cols(const Analyzer::BinOper *qual_bin_oper, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables)
CHECK(cgen_state)
ExpressionRange getExpressionRange(const Analyzer::BinOper *expr, const std::vector< InputTableInfo > &query_infos, const Executor *, boost::optional< std::list< std::shared_ptr< Analyzer::Expr >>> simple_quals)
static ExpressionRange makeIntRange(const int64_t int_min, const int64_t int_max, const int64_t bucket, const bool has_nulls)
#define VLOGGING(n)
Definition: Logger.h:195
HashEntryInfo get_bucketized_hash_entry_info(SQLTypeInfo const &context_ti, ExpressionRange const &col_range, bool const is_bw_eq)
size_t getNormalizedHashEntryCount() const
Definition: sqldefs.h:31
static std::string getHashTypeString(HashType ht) noexcept
#define VLOG(n)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int64_t JoinHashTable::getJoinHashBuffer ( const ExecutorDeviceType  device_type,
const int  device_id 
) const
overridevirtualnoexcept

Implements JoinHashTableInterface.

Definition at line 1411 of file JoinHashTable.cpp.

References CHECK(), CHECK_LT, CPU, and cpu_hash_table_buff_.

Referenced by toSet(), and toString().

1412  {
1413  if (device_type == ExecutorDeviceType::CPU && !cpu_hash_table_buff_) {
1414  return 0;
1415  }
1416 #ifdef HAVE_CUDA
1417  CHECK_LT(static_cast<size_t>(device_id), gpu_hash_table_buff_.size());
1418  if (device_type == ExecutorDeviceType::CPU) {
1419  return reinterpret_cast<int64_t>(&(*cpu_hash_table_buff_)[0]);
1420  } else {
1421  return gpu_hash_table_buff_[device_id]
1422  ? reinterpret_cast<CUdeviceptr>(
1423  gpu_hash_table_buff_[device_id]->getMemoryPtr())
1424  : reinterpret_cast<CUdeviceptr>(nullptr);
1425  }
1426 #else
1427  CHECK(device_type == ExecutorDeviceType::CPU);
1428  return reinterpret_cast<int64_t>(&(*cpu_hash_table_buff_)[0]);
1429 #endif
1430 }
unsigned long long CUdeviceptr
Definition: nocuda.h:27
std::shared_ptr< std::vector< int32_t > > cpu_hash_table_buff_
CHECK(cgen_state)
#define CHECK_LT(x, y)
Definition: Logger.h:207

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t JoinHashTable::getJoinHashBufferSize ( const ExecutorDeviceType  device_type,
const int  device_id 
) const
overridevirtualnoexcept

Implements JoinHashTableInterface.

Definition at line 1432 of file JoinHashTable.cpp.

References CHECK(), CHECK_LT, CPU, and cpu_hash_table_buff_.

Referenced by toSet(), and toString().

1433  {
1434  if (device_type == ExecutorDeviceType::CPU && !cpu_hash_table_buff_) {
1435  return 0;
1436  }
1437 #ifdef HAVE_CUDA
1438  CHECK_LT(static_cast<size_t>(device_id), gpu_hash_table_buff_.size());
1439  if (device_type == ExecutorDeviceType::CPU) {
1440  return cpu_hash_table_buff_->size() *
1441  sizeof(decltype(cpu_hash_table_buff_)::element_type::value_type);
1442  } else {
1443  return gpu_hash_table_buff_[device_id]
1444  ? gpu_hash_table_buff_[device_id]->reservedSize()
1445  : 0;
1446  }
1447 #else
1448  CHECK(device_type == ExecutorDeviceType::CPU);
1449  return cpu_hash_table_buff_->size() *
1450  sizeof(decltype(cpu_hash_table_buff_)::element_type::value_type);
1451 #endif
1452 }
std::shared_ptr< std::vector< int32_t > > cpu_hash_table_buff_
CHECK(cgen_state)
#define CHECK_LT(x, y)
Definition: Logger.h:207

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Data_Namespace::MemoryLevel JoinHashTable::getMemoryLevel ( ) const
inlineoverridevirtualnoexcept

Implements JoinHashTableInterface.

Definition at line 91 of file JoinHashTable.h.

References memory_level_.

91  {
92  return memory_level_;
93  };
const Data_Namespace::MemoryLevel memory_level_
static uint64_t JoinHashTable::getNumberOfCachedHashTables ( )
inlinestatic

Definition at line 130 of file JoinHashTable.h.

References join_hash_table_cache_, and join_hash_table_cache_mutex_.

Referenced by QueryRunner::QueryRunner::getNumberOfCachedJoinHashTables().

130  {
131  std::lock_guard<std::mutex> guard(join_hash_table_cache_mutex_);
132  return join_hash_table_cache_.size();
133  }
static std::mutex join_hash_table_cache_mutex_
static std::vector< std::pair< JoinHashTableCacheKey, std::shared_ptr< std::vector< int32_t > > > > join_hash_table_cache_

+ Here is the caller graph for this function:

void JoinHashTable::initHashTableOnCpuFromCache ( const ChunkKey chunk_key,
const size_t  num_elements,
const std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > &  cols 
)
private

Definition at line 1183 of file JoinHashTable.cpp.

References CHECK_GE, col_range_, cpu_hash_table_buff_, cpu_hash_table_buff_mutex_, DEBUG_TIMER, join_hash_table_cache_, join_hash_table_cache_mutex_, and qual_bin_oper_.

Referenced by initOneToManyHashTable(), and initOneToOneHashTable().

1186  {
1187  auto timer = DEBUG_TIMER(__func__);
1188  CHECK_GE(chunk_key.size(), size_t(2));
1189  if (chunk_key[1] < 0) {
1190  // Do not cache hash tables over intermediate results
1191  return;
1192  }
1193  const auto outer_col = dynamic_cast<const Analyzer::ColumnVar*>(cols.second);
1194  JoinHashTableCacheKey cache_key{col_range_,
1195  *cols.first,
1196  outer_col ? *outer_col : *cols.first,
1197  num_elements,
1198  chunk_key,
1199  qual_bin_oper_->get_optype()};
1200  std::lock_guard<std::mutex> join_hash_table_cache_lock(join_hash_table_cache_mutex_);
1201  for (const auto& kv : join_hash_table_cache_) {
1202  if (kv.first == cache_key) {
1203  std::lock_guard<std::mutex> cpu_hash_table_buff_lock(cpu_hash_table_buff_mutex_);
1204  cpu_hash_table_buff_ = kv.second;
1205  break;
1206  }
1207  }
1208 }
std::shared_ptr< std::vector< int32_t > > cpu_hash_table_buff_
#define CHECK_GE(x, y)
Definition: Logger.h:210
static std::mutex join_hash_table_cache_mutex_
static std::vector< std::pair< JoinHashTableCacheKey, std::shared_ptr< std::vector< int32_t > > > > join_hash_table_cache_
ExpressionRange col_range_
std::mutex cpu_hash_table_buff_mutex_
#define DEBUG_TIMER(name)
Definition: Logger.h:313
std::shared_ptr< Analyzer::BinOper > qual_bin_oper_

+ Here is the caller graph for this function:

void JoinHashTable::initOneToManyHashTable ( const ChunkKey chunk_key,
const JoinColumn join_column,
const std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > &  cols,
const Data_Namespace::MemoryLevel  effective_memory_level,
const int  device_id 
)
private

Definition at line 1053 of file JoinHashTable.cpp.

References CudaAllocator::allocGpuAbstractBuffer(), CHECK(), CHECK_EQ, CHECK_GT, col_range_, copy_to_gpu(), cpu_hash_table_buff_, cpu_hash_table_buff_mutex_, Data_Namespace::CPU_LEVEL, DEBUG_TIMER, device_count_, executor_, fill_one_to_many_hash_table_on_device(), fill_one_to_many_hash_table_on_device_bucketized(), fill_one_to_many_hash_table_on_device_sharded(), anonymous_namespace{JoinHashTable.cpp}::get_bucketized_hash_entry_info(), get_join_column_type_kind(), get_shard_count(), ExpressionRange::getIntMax(), ExpressionRange::getIntMin(), Data_Namespace::GPU_LEVEL, hash_entry_count_, init_hash_join_buff_on_device(), initHashTableOnCpuFromCache(), initOneToManyHashTableOnCpu(), inline_fixed_encoding_null_val(), isBitwiseEq(), kDATE, memory_level_, JoinColumn::num_elems, putHashTableOnCpuToCache(), and qual_bin_oper_.

Referenced by reifyOneToManyForDevice().

1058  {
1059  auto timer = DEBUG_TIMER(__func__);
1060  auto const inner_col = cols.first;
1061  CHECK(inner_col);
1062 
1063  auto hash_entry_info = get_bucketized_hash_entry_info(
1064  inner_col->get_type_info(), col_range_, isBitwiseEq());
1065 
1066 #ifdef HAVE_CUDA
1067  const auto shard_count = get_shard_count(qual_bin_oper_.get(), executor_);
1068  const size_t entries_per_shard =
1069  (shard_count ? get_entries_per_shard(hash_entry_info.hash_entry_count, shard_count)
1070  : 0);
1071  // Even if we join on dictionary encoded strings, the memory on the GPU is still
1072  // needed once the join hash table has been built on the CPU.
1073  if (memory_level_ == Data_Namespace::GPU_LEVEL && shard_count) {
1074  const auto shards_per_device = (shard_count + device_count_ - 1) / device_count_;
1075  CHECK_GT(shards_per_device, 0u);
1076  hash_entry_info.hash_entry_count = entries_per_shard * shards_per_device;
1077  }
1078 #else
1079  CHECK_EQ(Data_Namespace::CPU_LEVEL, effective_memory_level);
1080 #endif
1081  if (!device_id) {
1082  hash_entry_count_ = hash_entry_info.getNormalizedHashEntryCount();
1083  }
1084 
1085 #ifdef HAVE_CUDA
1086  const auto& ti = inner_col->get_type_info();
1087  auto& data_mgr = executor_->getCatalog()->getDataMgr();
1089  const size_t total_count =
1090  2 * hash_entry_info.getNormalizedHashEntryCount() + join_column.num_elems;
1091  gpu_hash_table_buff_[device_id] = CudaAllocator::allocGpuAbstractBuffer(
1092  &data_mgr, total_count * sizeof(int32_t), device_id);
1093  }
1094 #endif
1095  const int32_t hash_join_invalid_val{-1};
1096  if (effective_memory_level == Data_Namespace::CPU_LEVEL) {
1097  initHashTableOnCpuFromCache(chunk_key, join_column.num_elems, cols);
1098  {
1099  std::lock_guard<std::mutex> cpu_hash_table_buff_lock(cpu_hash_table_buff_mutex_);
1101  join_column, cols, hash_entry_info, hash_join_invalid_val);
1102  }
1103  if (inner_col->get_table_id() > 0) {
1104  putHashTableOnCpuToCache(chunk_key, join_column.num_elems, cols);
1105  }
1106  // Transfer the hash table on the GPU if we've only built it on CPU
1107  // but the query runs on GPU (join on dictionary encoded columns).
1108  // Don't transfer the buffer if there was an error since we'll bail anyway.
1110 #ifdef HAVE_CUDA
1111  CHECK(ti.is_string());
1112  std::lock_guard<std::mutex> cpu_hash_table_buff_lock(cpu_hash_table_buff_mutex_);
1113  copy_to_gpu(
1114  &data_mgr,
1115  reinterpret_cast<CUdeviceptr>(gpu_hash_table_buff_[device_id]->getMemoryPtr()),
1116  &(*cpu_hash_table_buff_)[0],
1117  cpu_hash_table_buff_->size() * sizeof((*cpu_hash_table_buff_)[0]),
1118  device_id);
1119 #else
1120  CHECK(false);
1121 #endif
1122  }
1123  } else {
1124 #ifdef HAVE_CUDA
1125  CHECK_EQ(Data_Namespace::GPU_LEVEL, effective_memory_level);
1126  data_mgr.getCudaMgr()->setContext(device_id);
1128  reinterpret_cast<int32_t*>(gpu_hash_table_buff_[device_id]->getMemoryPtr()),
1129  hash_entry_info.getNormalizedHashEntryCount(),
1130  hash_join_invalid_val,
1131  executor_->blockSize(),
1132  executor_->gridSize());
1133  JoinColumnTypeInfo type_info{static_cast<size_t>(ti.get_size()),
1137  isBitwiseEq(),
1138  col_range_.getIntMax() + 1,
1140  auto use_bucketization = inner_col->get_type_info().get_type() == kDATE;
1141 
1142  if (shard_count) {
1143  CHECK_GT(device_count_, 0);
1144  for (size_t shard = device_id; shard < shard_count; shard += device_count_) {
1145  ShardInfo shard_info{shard, entries_per_shard, shard_count, device_count_};
1147  reinterpret_cast<int32_t*>(gpu_hash_table_buff_[device_id]->getMemoryPtr()),
1148  hash_entry_info,
1149  hash_join_invalid_val,
1150  join_column,
1151  type_info,
1152  shard_info,
1153  executor_->blockSize(),
1154  executor_->gridSize());
1155  }
1156  } else {
1157  if (use_bucketization) {
1159  reinterpret_cast<int32_t*>(gpu_hash_table_buff_[device_id]->getMemoryPtr()),
1160  hash_entry_info,
1161  hash_join_invalid_val,
1162  join_column,
1163  type_info,
1164  executor_->blockSize(),
1165  executor_->gridSize());
1166  } else {
1168  reinterpret_cast<int32_t*>(gpu_hash_table_buff_[device_id]->getMemoryPtr()),
1169  hash_entry_info,
1170  hash_join_invalid_val,
1171  join_column,
1172  type_info,
1173  executor_->blockSize(),
1174  executor_->gridSize());
1175  }
1176  }
1177 #else
1178  CHECK(false);
1179 #endif
1180  }
1181 }
void fill_one_to_many_hash_table_on_device(int32_t *buff, const HashEntryInfo hash_entry_info, const int32_t invalid_slot_val, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info, const size_t block_size_x, const size_t grid_size_x)
int64_t getIntMin() const
bool isBitwiseEq() const
#define CHECK_EQ(x, y)
Definition: Logger.h:205
size_t get_shard_count(const Analyzer::BinOper *join_condition, const Executor *executor)
const int device_count_
size_t num_elems
void initHashTableOnCpuFromCache(const ChunkKey &chunk_key, const size_t num_elements, const std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > &cols)
void fill_one_to_many_hash_table_on_device_sharded(int32_t *buff, const HashEntryInfo hash_entry_info, const int32_t invalid_slot_val, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info, const ShardInfo &shard_info, const size_t block_size_x, const size_t grid_size_x)
std::shared_ptr< std::vector< int32_t > > cpu_hash_table_buff_
void putHashTableOnCpuToCache(const ChunkKey &chunk_key, const size_t num_elements, const std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > &cols)
#define CHECK_GT(x, y)
Definition: Logger.h:209
CHECK(cgen_state)
void copy_to_gpu(Data_Namespace::DataMgr *data_mgr, CUdeviceptr dst, const void *src, const size_t num_bytes, const int device_id)
Definition: GpuMemUtils.cpp:30
void fill_one_to_many_hash_table_on_device_bucketized(int32_t *buff, const HashEntryInfo hash_entry_info, const int32_t invalid_slot_val, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info, const size_t block_size_x, const size_t grid_size_x)
void initOneToManyHashTableOnCpu(const JoinColumn &join_column, const std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > &cols, const HashEntryInfo hash_entry_info, const int32_t hash_join_invalid_val)
HashEntryInfo get_bucketized_hash_entry_info(SQLTypeInfo const &context_ti, ExpressionRange const &col_range, bool const is_bw_eq)
Definition: sqltypes.h:54
Executor * executor_
ExpressionRange col_range_
std::mutex cpu_hash_table_buff_mutex_
void init_hash_join_buff_on_device(int32_t *buff, const int32_t entry_count, const int32_t invalid_slot_val, const size_t block_size_x, const size_t grid_size_x)
static Data_Namespace::AbstractBuffer * allocGpuAbstractBuffer(Data_Namespace::DataMgr *data_mgr, const size_t num_bytes, const int device_id)
int64_t getIntMax() const
ColumnType get_join_column_type_kind(const SQLTypeInfo &ti)
#define DEBUG_TIMER(name)
Definition: Logger.h:313
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
std::shared_ptr< Analyzer::BinOper > qual_bin_oper_
const Data_Namespace::MemoryLevel memory_level_
size_t hash_entry_count_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void JoinHashTable::initOneToManyHashTableOnCpu ( const JoinColumn join_column,
const std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > &  cols,
const HashEntryInfo  hash_entry_info,
const int32_t  hash_join_invalid_val 
)
private

Definition at line 824 of file JoinHashTable.cpp.

References CHECK(), CHECK_EQ, col_range_, cpu_hash_table_buff_, cpu_threads(), DEBUG_TIMER, executor_, fill_one_to_many_hash_table(), fill_one_to_many_hash_table_bucketized(), get_join_column_type_kind(), ExpressionRange::getIntMax(), ExpressionRange::getIntMin(), HashEntryInfo::getNormalizedHashEntryCount(), init_hash_join_buff(), inline_fixed_encoding_null_val(), isBitwiseEq(), kDATE, kENCODING_DICT, and JoinColumn::num_elems.

Referenced by initOneToManyHashTable().

828  {
829  auto timer = DEBUG_TIMER(__func__);
830  const auto inner_col = cols.first;
831  CHECK(inner_col);
832  const auto& ti = inner_col->get_type_info();
833  if (cpu_hash_table_buff_) {
834  return;
835  }
836  cpu_hash_table_buff_ = std::make_shared<std::vector<int32_t>>(
837  2 * hash_entry_info.getNormalizedHashEntryCount() + join_column.num_elems);
838  const StringDictionaryProxy* sd_inner_proxy{nullptr};
839  const StringDictionaryProxy* sd_outer_proxy{nullptr};
840  if (ti.is_string()) {
841  CHECK_EQ(kENCODING_DICT, ti.get_compression());
842  sd_inner_proxy = executor_->getStringDictionaryProxy(
843  inner_col->get_comp_param(), executor_->row_set_mem_owner_, true);
844  CHECK(sd_inner_proxy);
845  const auto outer_col = dynamic_cast<const Analyzer::ColumnVar*>(cols.second);
846  CHECK(outer_col);
847  sd_outer_proxy = executor_->getStringDictionaryProxy(
848  outer_col->get_comp_param(), executor_->row_set_mem_owner_, true);
849  CHECK(sd_outer_proxy);
850  }
851  int thread_count = cpu_threads();
852  std::vector<std::future<void>> init_threads;
853  for (int thread_idx = 0; thread_idx < thread_count; ++thread_idx) {
854  init_threads.emplace_back(std::async(std::launch::async,
856  &(*cpu_hash_table_buff_)[0],
857  hash_entry_info.getNormalizedHashEntryCount(),
858  hash_join_invalid_val,
859  thread_idx,
860  thread_count));
861  }
862  for (auto& child : init_threads) {
863  child.wait();
864  }
865  for (auto& child : init_threads) {
866  child.get();
867  }
868 
869  if (ti.get_type() == kDATE) {
871  hash_entry_info,
872  hash_join_invalid_val,
873  join_column,
874  {static_cast<size_t>(ti.get_size()),
878  isBitwiseEq(),
879  col_range_.getIntMax() + 1,
881  sd_inner_proxy,
882  sd_outer_proxy,
883  thread_count);
884  } else {
886  hash_entry_info,
887  hash_join_invalid_val,
888  join_column,
889  {static_cast<size_t>(ti.get_size()),
893  isBitwiseEq(),
894  col_range_.getIntMax() + 1,
896  sd_inner_proxy,
897  sd_outer_proxy,
898  thread_count);
899  }
900 }
int64_t getIntMin() const
bool isBitwiseEq() const
#define CHECK_EQ(x, y)
Definition: Logger.h:205
DEVICE void SUFFIX() init_hash_join_buff(int32_t *groups_buffer, const int32_t hash_entry_count, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
size_t num_elems
std::shared_ptr< std::vector< int32_t > > cpu_hash_table_buff_
void fill_one_to_many_hash_table_bucketized(int32_t *buff, const HashEntryInfo hash_entry_info, const int32_t invalid_slot_val, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info, const void *sd_inner_proxy, const void *sd_outer_proxy, const unsigned cpu_thread_count)
CHECK(cgen_state)
Definition: sqltypes.h:54
Executor * executor_
void fill_one_to_many_hash_table(int32_t *buff, const HashEntryInfo hash_entry_info, const int32_t invalid_slot_val, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info, const void *sd_inner_proxy, const void *sd_outer_proxy, const unsigned cpu_thread_count)
ExpressionRange col_range_
size_t getNormalizedHashEntryCount() const
int64_t getIntMax() const
ColumnType get_join_column_type_kind(const SQLTypeInfo &ti)
#define DEBUG_TIMER(name)
Definition: Logger.h:313
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
int cpu_threads()
Definition: thread_count.h:25

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void JoinHashTable::initOneToOneHashTable ( const ChunkKey chunk_key,
const JoinColumn join_column,
const std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > &  cols,
const Data_Namespace::MemoryLevel  effective_memory_level,
const int  device_id 
)
private

Definition at line 914 of file JoinHashTable.cpp.

References CudaAllocator::allocGpuAbstractBuffer(), CHECK(), CHECK_EQ, CHECK_GT, col_range_, copy_from_gpu(), copy_to_gpu(), cpu_hash_table_buff_, cpu_hash_table_buff_mutex_, Data_Namespace::CPU_LEVEL, DEBUG_TIMER, device_count_, executor_, fill_hash_join_buff_on_device_bucketized(), fill_hash_join_buff_on_device_sharded_bucketized(), anonymous_namespace{JoinHashTable.cpp}::get_bucketized_hash_entry_info(), get_join_column_type_kind(), ExpressionRange::getIntMax(), ExpressionRange::getIntMin(), Data_Namespace::GPU_LEVEL, hash_entry_count_, init_hash_join_buff_on_device(), initHashTableOnCpuFromCache(), initOneToOneHashTableOnCpu(), inline_fixed_encoding_null_val(), isBitwiseEq(), memory_level_, JoinColumn::num_elems, putHashTableOnCpuToCache(), and shardCount().

Referenced by reifyOneToOneForDevice().

919  {
920  auto timer = DEBUG_TIMER(__func__);
921  const auto inner_col = cols.first;
922  CHECK(inner_col);
923 
924  auto hash_entry_info = get_bucketized_hash_entry_info(
925  inner_col->get_type_info(), col_range_, isBitwiseEq());
926  if (!hash_entry_info) {
927  return;
928  }
929 
930 #ifdef HAVE_CUDA
931  const auto shard_count = shardCount();
932  const size_t entries_per_shard{
933  shard_count ? get_entries_per_shard(hash_entry_info.hash_entry_count, shard_count)
934  : 0};
935  // Even if we join on dictionary encoded strings, the memory on the GPU is still
936  // needed once the join hash table has been built on the CPU.
937  const auto catalog = executor_->getCatalog();
939  auto& data_mgr = catalog->getDataMgr();
940  if (shard_count) {
941  const auto shards_per_device = (shard_count + device_count_ - 1) / device_count_;
942  CHECK_GT(shards_per_device, 0u);
943  hash_entry_info.hash_entry_count = entries_per_shard * shards_per_device;
944  }
945  gpu_hash_table_buff_[device_id] = CudaAllocator::allocGpuAbstractBuffer(
946  &data_mgr,
947  hash_entry_info.getNormalizedHashEntryCount() * sizeof(int32_t),
948  device_id);
949  }
950 #else
951  CHECK_EQ(Data_Namespace::CPU_LEVEL, effective_memory_level);
952 #endif
953  if (!device_id) {
954  hash_entry_count_ = hash_entry_info.getNormalizedHashEntryCount();
955  }
956 
957 #ifdef HAVE_CUDA
958  const auto& ti = inner_col->get_type_info();
959 #endif
960  const int32_t hash_join_invalid_val{-1};
961  if (effective_memory_level == Data_Namespace::CPU_LEVEL) {
962  CHECK(!chunk_key.empty());
963  initHashTableOnCpuFromCache(chunk_key, join_column.num_elems, cols);
964  {
965  std::lock_guard<std::mutex> cpu_hash_table_buff_lock(cpu_hash_table_buff_mutex_);
967  join_column, cols, hash_entry_info, hash_join_invalid_val);
968  }
969  if (inner_col->get_table_id() > 0) {
970  putHashTableOnCpuToCache(chunk_key, join_column.num_elems, cols);
971  }
972  // Transfer the hash table on the GPU if we've only built it on CPU
973  // but the query runs on GPU (join on dictionary encoded columns).
975 #ifdef HAVE_CUDA
976  CHECK(ti.is_string());
977  auto& data_mgr = catalog->getDataMgr();
978  std::lock_guard<std::mutex> cpu_hash_table_buff_lock(cpu_hash_table_buff_mutex_);
979 
980  copy_to_gpu(
981  &data_mgr,
982  reinterpret_cast<CUdeviceptr>(gpu_hash_table_buff_[device_id]->getMemoryPtr()),
983  &(*cpu_hash_table_buff_)[0],
984  cpu_hash_table_buff_->size() * sizeof((*cpu_hash_table_buff_)[0]),
985  device_id);
986 #else
987  CHECK(false);
988 #endif
989  }
990  } else {
991 #ifdef HAVE_CUDA
992  int err{0};
993  CHECK_EQ(Data_Namespace::GPU_LEVEL, effective_memory_level);
994  auto& data_mgr = catalog->getDataMgr();
995  gpu_hash_table_err_buff_[device_id] =
996  CudaAllocator::allocGpuAbstractBuffer(&data_mgr, sizeof(int), device_id);
997  auto dev_err_buff = reinterpret_cast<CUdeviceptr>(
998  gpu_hash_table_err_buff_[device_id]->getMemoryPtr());
999  copy_to_gpu(&data_mgr, dev_err_buff, &err, sizeof(err), device_id);
1001  reinterpret_cast<int32_t*>(gpu_hash_table_buff_[device_id]->getMemoryPtr()),
1002  hash_entry_info.getNormalizedHashEntryCount(),
1003  hash_join_invalid_val,
1004  executor_->blockSize(),
1005  executor_->gridSize());
1006  if (chunk_key.empty()) {
1007  return;
1008  }
1009  JoinColumnTypeInfo type_info{static_cast<size_t>(ti.get_size()),
1013  isBitwiseEq(),
1014  col_range_.getIntMax() + 1,
1016  if (shard_count) {
1017  CHECK_GT(device_count_, 0);
1018  for (size_t shard = device_id; shard < shard_count; shard += device_count_) {
1019  ShardInfo shard_info{shard, entries_per_shard, shard_count, device_count_};
1021  reinterpret_cast<int32_t*>(gpu_hash_table_buff_[device_id]->getMemoryPtr()),
1022  hash_join_invalid_val,
1023  reinterpret_cast<int*>(dev_err_buff),
1024  join_column,
1025  type_info,
1026  shard_info,
1027  executor_->blockSize(),
1028  executor_->gridSize(),
1029  hash_entry_info.bucket_normalization);
1030  }
1031  } else {
1033  reinterpret_cast<int32_t*>(gpu_hash_table_buff_[device_id]->getMemoryPtr()),
1034  hash_join_invalid_val,
1035  reinterpret_cast<int*>(dev_err_buff),
1036  join_column,
1037  type_info,
1038  executor_->blockSize(),
1039  executor_->gridSize(),
1040  hash_entry_info.bucket_normalization);
1041  }
1042  copy_from_gpu(&data_mgr, &err, dev_err_buff, sizeof(err), device_id);
1043 
1044  if (err) {
1045  throw NeedsOneToManyHash();
1046  }
1047 #else
1048  CHECK(false);
1049 #endif
1050  }
1051 }
int64_t getIntMin() const
bool isBitwiseEq() const
#define CHECK_EQ(x, y)
Definition: Logger.h:205
const int device_count_
size_t num_elems
void initHashTableOnCpuFromCache(const ChunkKey &chunk_key, const size_t num_elements, const std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > &cols)
unsigned long long CUdeviceptr
Definition: nocuda.h:27
std::shared_ptr< std::vector< int32_t > > cpu_hash_table_buff_
void initOneToOneHashTableOnCpu(const JoinColumn &join_column, const std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > &cols, const HashEntryInfo hash_entry_info, const int32_t hash_join_invalid_val)
void putHashTableOnCpuToCache(const ChunkKey &chunk_key, const size_t num_elements, const std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > &cols)
#define CHECK_GT(x, y)
Definition: Logger.h:209
CHECK(cgen_state)
void copy_to_gpu(Data_Namespace::DataMgr *data_mgr, CUdeviceptr dst, const void *src, const size_t num_bytes, const int device_id)
Definition: GpuMemUtils.cpp:30
void copy_from_gpu(Data_Namespace::DataMgr *data_mgr, void *dst, const CUdeviceptr src, const size_t num_bytes, const int device_id)
HashEntryInfo get_bucketized_hash_entry_info(SQLTypeInfo const &context_ti, ExpressionRange const &col_range, bool const is_bw_eq)
size_t shardCount() const
Executor * executor_
ExpressionRange col_range_
std::mutex cpu_hash_table_buff_mutex_
void init_hash_join_buff_on_device(int32_t *buff, const int32_t entry_count, const int32_t invalid_slot_val, const size_t block_size_x, const size_t grid_size_x)
static Data_Namespace::AbstractBuffer * allocGpuAbstractBuffer(Data_Namespace::DataMgr *data_mgr, const size_t num_bytes, const int device_id)
int64_t getIntMax() const
ColumnType get_join_column_type_kind(const SQLTypeInfo &ti)
#define DEBUG_TIMER(name)
Definition: Logger.h:313
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
const Data_Namespace::MemoryLevel memory_level_
size_t hash_entry_count_
void fill_hash_join_buff_on_device_sharded_bucketized(int32_t *buff, const int32_t invalid_slot_val, int *dev_err_buff, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const ShardInfo shard_info, const size_t block_size_x, const size_t grid_size_x, const int64_t bucket_normalization)
void fill_hash_join_buff_on_device_bucketized(int32_t *buff, const int32_t invalid_slot_val, int *dev_err_buff, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const size_t block_size_x, const size_t grid_size_x, const int64_t bucket_normalization)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void JoinHashTable::initOneToOneHashTableOnCpu ( const JoinColumn join_column,
const std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > &  cols,
const HashEntryInfo  hash_entry_info,
const int32_t  hash_join_invalid_val 
)
private

Definition at line 735 of file JoinHashTable.cpp.

References HashEntryInfo::bucket_normalization, CHECK(), CHECK_EQ, col_range_, cpu_hash_table_buff_, cpu_threads(), DEBUG_TIMER, executor_, fill_hash_join_buff_bucketized(), get_join_column_type_kind(), ExpressionRange::getIntMax(), ExpressionRange::getIntMin(), HashEntryInfo::getNormalizedHashEntryCount(), init_hash_join_buff(), inline_fixed_encoding_null_val(), isBitwiseEq(), and kENCODING_DICT.

Referenced by initOneToOneHashTable().

739  {
740  auto timer = DEBUG_TIMER(__func__);
741  const auto inner_col = cols.first;
742  CHECK(inner_col);
743  const auto& ti = inner_col->get_type_info();
744  if (!cpu_hash_table_buff_) {
745  cpu_hash_table_buff_ = std::make_shared<std::vector<int32_t>>(
746  hash_entry_info.getNormalizedHashEntryCount());
747  const StringDictionaryProxy* sd_inner_proxy{nullptr};
748  const StringDictionaryProxy* sd_outer_proxy{nullptr};
749  const auto outer_col = dynamic_cast<const Analyzer::ColumnVar*>(cols.second);
750  if (ti.is_string() &&
751  (outer_col && !(inner_col->get_comp_param() == outer_col->get_comp_param()))) {
752  CHECK_EQ(kENCODING_DICT, ti.get_compression());
753  sd_inner_proxy = executor_->getStringDictionaryProxy(
754  inner_col->get_comp_param(), executor_->row_set_mem_owner_, true);
755  CHECK(sd_inner_proxy);
756  CHECK(outer_col);
757  sd_outer_proxy = executor_->getStringDictionaryProxy(
758  outer_col->get_comp_param(), executor_->row_set_mem_owner_, true);
759  CHECK(sd_outer_proxy);
760  }
761  int thread_count = cpu_threads();
762  std::vector<std::thread> init_cpu_buff_threads;
763  for (int thread_idx = 0; thread_idx < thread_count; ++thread_idx) {
764  init_cpu_buff_threads.emplace_back(
765  [this, hash_entry_info, hash_join_invalid_val, thread_idx, thread_count] {
767  hash_entry_info.getNormalizedHashEntryCount(),
768  hash_join_invalid_val,
769  thread_idx,
770  thread_count);
771  });
772  }
773  for (auto& t : init_cpu_buff_threads) {
774  t.join();
775  }
776  init_cpu_buff_threads.clear();
777  int err{0};
778  for (int thread_idx = 0; thread_idx < thread_count; ++thread_idx) {
779  init_cpu_buff_threads.emplace_back([this,
780  hash_join_invalid_val,
781  &join_column,
782  sd_inner_proxy,
783  sd_outer_proxy,
784  thread_idx,
785  thread_count,
786  &ti,
787  &err,
788  hash_entry_info] {
789  int partial_err =
791  hash_join_invalid_val,
792  join_column,
793  {static_cast<size_t>(ti.get_size()),
797  isBitwiseEq(),
798  col_range_.getIntMax() + 1,
800  sd_inner_proxy,
801  sd_outer_proxy,
802  thread_idx,
803  thread_count,
804  hash_entry_info.bucket_normalization);
805  __sync_val_compare_and_swap(&err, 0, partial_err);
806  });
807  }
808  for (auto& t : init_cpu_buff_threads) {
809  t.join();
810  }
811  if (err) {
812  cpu_hash_table_buff_.reset();
813  // Too many hash entries, need to retry with a 1:many table
814  throw NeedsOneToManyHash();
815  }
816  } else {
817  if (cpu_hash_table_buff_->size() > hash_entry_info.getNormalizedHashEntryCount()) {
818  // Too many hash entries, need to retry with a 1:many table
819  throw NeedsOneToManyHash();
820  }
821  }
822 }
int64_t getIntMin() const
bool isBitwiseEq() const
#define CHECK_EQ(x, y)
Definition: Logger.h:205
DEVICE void SUFFIX() init_hash_join_buff(int32_t *groups_buffer, const int32_t hash_entry_count, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
std::shared_ptr< std::vector< int32_t > > cpu_hash_table_buff_
CHECK(cgen_state)
int64_t bucket_normalization
Executor * executor_
ExpressionRange col_range_
size_t getNormalizedHashEntryCount() const
int64_t getIntMax() const
ColumnType get_join_column_type_kind(const SQLTypeInfo &ti)
#define DEBUG_TIMER(name)
Definition: Logger.h:313
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
DEVICE int SUFFIX() fill_hash_join_buff_bucketized(int32_t *buff, const int32_t invalid_slot_val, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const void *sd_inner_proxy, const void *sd_outer_proxy, const int32_t cpu_thread_idx, const int32_t cpu_thread_count, const int64_t bucket_normalization)
int cpu_threads()
Definition: thread_count.h:25

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool JoinHashTable::isBitwiseEq ( ) const
private

Definition at line 1594 of file JoinHashTable.cpp.

References kBW_EQ, and qual_bin_oper_.

Referenced by codegenMatchingSet(), codegenSlot(), getHashJoinArgs(), initOneToManyHashTable(), initOneToManyHashTableOnCpu(), initOneToOneHashTable(), and initOneToOneHashTableOnCpu().

1594  {
1595  return qual_bin_oper_->get_optype() == kBW_EQ;
1596 }
Definition: sqldefs.h:31
std::shared_ptr< Analyzer::BinOper > qual_bin_oper_

+ Here is the caller graph for this function:

bool JoinHashTable::layoutRequiresAdditionalBuffers ( JoinHashTableInterface::HashType  layout) const
inlineoverrideprivatevirtualnoexcept
size_t JoinHashTable::offsetBufferOff ( ) const
overridevirtualnoexcept

Implements JoinHashTableInterface.

Definition at line 1391 of file JoinHashTable.cpp.

Referenced by toSet(), and toString().

1391  {
1392  return 0;
1393 }

+ Here is the caller graph for this function:

size_t JoinHashTable::payloadBufferOff ( ) const
overridevirtualnoexcept

Implements JoinHashTableInterface.

Definition at line 1399 of file JoinHashTable.cpp.

References getComponentBufferSize().

Referenced by toSet(), and toString().

1399  {
1400  return 2 * getComponentBufferSize();
1401 }
size_t getComponentBufferSize() const noexcept

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void JoinHashTable::putHashTableOnCpuToCache ( const ChunkKey chunk_key,
const size_t  num_elements,
const std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > &  cols 
)
private

Definition at line 1210 of file JoinHashTable.cpp.

References CHECK_GE, col_range_, cpu_hash_table_buff_, join_hash_table_cache_, join_hash_table_cache_mutex_, and qual_bin_oper_.

Referenced by initOneToManyHashTable(), and initOneToOneHashTable().

1213  {
1214  CHECK_GE(chunk_key.size(), size_t(2));
1215  if (chunk_key[1] < 0) {
1216  // Do not cache hash tables over intermediate results
1217  return;
1218  }
1219  const auto outer_col = dynamic_cast<const Analyzer::ColumnVar*>(cols.second);
1220  JoinHashTableCacheKey cache_key{col_range_,
1221  *cols.first,
1222  outer_col ? *outer_col : *cols.first,
1223  num_elements,
1224  chunk_key,
1225  qual_bin_oper_->get_optype()};
1226  std::lock_guard<std::mutex> join_hash_table_cache_lock(join_hash_table_cache_mutex_);
1227  for (const auto& kv : join_hash_table_cache_) {
1228  if (kv.first == cache_key) {
1229  return;
1230  }
1231  }
1232  join_hash_table_cache_.emplace_back(cache_key, cpu_hash_table_buff_);
1233 }
std::shared_ptr< std::vector< int32_t > > cpu_hash_table_buff_
#define CHECK_GE(x, y)
Definition: Logger.h:210
static std::mutex join_hash_table_cache_mutex_
static std::vector< std::pair< JoinHashTableCacheKey, std::shared_ptr< std::vector< int32_t > > > > join_hash_table_cache_
ExpressionRange col_range_
std::shared_ptr< Analyzer::BinOper > qual_bin_oper_

+ Here is the caller graph for this function:

void JoinHashTable::reify ( )
private

Definition at line 492 of file JoinHashTable.cpp.

References CHECK_LT, checkHashJoinReplicationConstraint(), DEBUG_TIMER, device_count_, executor_, freeHashBufferMemory(), anonymous_namespace{JoinHashTable.cpp}::get_cols(), getInnerQueryInfo(), hash_type_, InputTableInfo::info, JoinHashTableInterface::OneToMany, JoinHashTableInterface::OneToOne, only_shards_for_device(), qual_bin_oper_, reifyOneToManyForDevice(), reifyOneToOneForDevice(), shardCount(), and logger::thread_id().

492  {
493  auto timer = DEBUG_TIMER(__func__);
495  const auto& catalog = *executor_->getCatalog();
496  const auto cols = get_cols(qual_bin_oper_.get(), catalog, executor_->temporary_tables_);
497  const auto inner_col = cols.first;
498  checkHashJoinReplicationConstraint(inner_col->get_table_id());
499  const auto& query_info = getInnerQueryInfo(inner_col).info;
500  if (query_info.fragments.empty()) {
501  return;
502  }
503  if (query_info.getNumTuplesUpperBound() >
504  static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
505  throw TooManyHashEntries();
506  }
507 #ifdef HAVE_CUDA
508  gpu_hash_table_buff_.resize(device_count_);
509  gpu_hash_table_err_buff_.resize(device_count_);
510 #endif // HAVE_CUDA
511  std::vector<std::future<void>> init_threads;
512  const int shard_count = shardCount();
513 
514  try {
515  for (int device_id = 0; device_id < device_count_; ++device_id) {
516  const auto fragments =
517  shard_count
518  ? only_shards_for_device(query_info.fragments, device_id, device_count_)
519  : query_info.fragments;
520  init_threads.push_back(
521  std::async(std::launch::async,
525  this,
526  fragments,
527  device_id,
528  logger::thread_id()));
529  }
530  for (auto& init_thread : init_threads) {
531  init_thread.wait();
532  }
533  for (auto& init_thread : init_threads) {
534  init_thread.get();
535  }
536 
537  } catch (const NeedsOneToManyHash& e) {
540  init_threads.clear();
541  for (int device_id = 0; device_id < device_count_; ++device_id) {
542  const auto fragments =
543  shard_count
544  ? only_shards_for_device(query_info.fragments, device_id, device_count_)
545  : query_info.fragments;
546 
547  init_threads.push_back(std::async(std::launch::async,
548  &JoinHashTable::reifyOneToManyForDevice,
549  this,
550  fragments,
551  device_id,
552  logger::thread_id()));
553  }
554  for (auto& init_thread : init_threads) {
555  init_thread.wait();
556  }
557  for (auto& init_thread : init_threads) {
558  init_thread.get();
559  }
560  }
561 }
Fragmenter_Namespace::TableInfo info
Definition: InputMetadata.h:35
const int device_count_
void freeHashBufferMemory()
HashType hash_type_
std::vector< Fragmenter_Namespace::FragmentInfo > only_shards_for_device(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, const int device_count)
void checkHashJoinReplicationConstraint(const int table_id) const
std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > get_cols(const Analyzer::BinOper *qual_bin_oper, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables)
void reifyOneToOneForDevice(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, const logger::ThreadId parent_thread_id)
void reifyOneToManyForDevice(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, const logger::ThreadId parent_thread_id)
#define CHECK_LT(x, y)
Definition: Logger.h:207
size_t shardCount() const
Executor * executor_
ThreadId thread_id()
Definition: Logger.cpp:715
#define DEBUG_TIMER(name)
Definition: Logger.h:313
std::shared_ptr< Analyzer::BinOper > qual_bin_oper_
const InputTableInfo & getInnerQueryInfo(const Analyzer::ColumnVar *inner_col) const

+ Here is the call graph for this function:

void JoinHashTable::reifyOneToManyForDevice ( const std::vector< Fragmenter_Namespace::FragmentInfo > &  fragments,
const int  device_id,
const logger::ThreadId  parent_thread_id 
)
private

Definition at line 664 of file JoinHashTable.cpp.

References CHECK(), column_cache_, Data_Namespace::CPU_LEVEL, DEBUG_TIMER_NEW_THREAD, executor_, JoinHashTableInterface::fetchJoinColumn(), genHashTableKey(), anonymous_namespace{JoinHashTable.cpp}::get_cols(), get_column_descriptor_maybe(), Data_Namespace::GPU_LEVEL, initOneToManyHashTable(), memory_level_, needs_dictionary_translation(), and qual_bin_oper_.

Referenced by reify().

667  {
668  DEBUG_TIMER_NEW_THREAD(parent_thread_id);
669  const auto& catalog = *executor_->getCatalog();
670  auto& data_mgr = catalog.getDataMgr();
671  const auto cols = get_cols(qual_bin_oper_.get(), catalog, executor_->temporary_tables_);
672  const auto inner_col = cols.first;
673  CHECK(inner_col);
674  const auto inner_cd = get_column_descriptor_maybe(
675  inner_col->get_column_id(), inner_col->get_table_id(), catalog);
676  if (inner_cd && inner_cd->isVirtualCol) {
678  }
679  CHECK(!inner_cd || !(inner_cd->isVirtualCol));
680  // Since we don't have the string dictionary payloads on the GPU, we'll build
681  // the join hash table on the CPU and transfer it to the GPU.
682  const auto effective_memory_level =
683  needs_dictionary_translation(inner_col, cols.second, executor_)
685  : memory_level_;
686  if (fragments.empty()) {
687  ChunkKey empty_chunk;
688  initOneToManyHashTable(empty_chunk,
689  JoinColumn{nullptr, 0, 0, 0, 0},
690  cols,
691  effective_memory_level,
692  device_id);
693  return;
694  }
695 
696  std::vector<std::shared_ptr<Chunk_NS::Chunk>> chunks_owner;
697  std::unique_ptr<DeviceAllocator> device_allocator;
698  if (effective_memory_level == MemoryLevel::GPU_LEVEL) {
699  device_allocator = std::make_unique<CudaAllocator>(&data_mgr, device_id);
700  }
701  std::vector<std::shared_ptr<void>> malloc_owner;
702 
703  JoinColumn join_column = fetchJoinColumn(inner_col,
704  fragments,
705  effective_memory_level,
706  device_id,
707  chunks_owner,
708  device_allocator.get(),
709  malloc_owner,
710  executor_,
711  &column_cache_);
712 
713  initOneToManyHashTable(genHashTableKey(fragments, cols.second, inner_col),
714  join_column,
715  cols,
716  effective_memory_level,
717  device_id);
718 }
void initOneToManyHashTable(const ChunkKey &chunk_key, const JoinColumn &join_column, const std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > &cols, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id)
std::vector< int > ChunkKey
Definition: types.h:35
bool needs_dictionary_translation(const Analyzer::ColumnVar *inner_col, const Analyzer::Expr *outer_col_expr, const Executor *executor)
#define DEBUG_TIMER_NEW_THREAD(parent_thread_id)
Definition: Logger.h:315
ChunkKey genHashTableKey(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const Analyzer::Expr *outer_col, const Analyzer::ColumnVar *inner_col) const
ColumnCacheMap & column_cache_
std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > get_cols(const Analyzer::BinOper *qual_bin_oper, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables)
CHECK(cgen_state)
const ColumnDescriptor * get_column_descriptor_maybe(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
Definition: Execute.h:149
Executor * executor_
JoinColumn fetchJoinColumn(const Analyzer::ColumnVar *hash_col, const std::vector< Fragmenter_Namespace::FragmentInfo > &fragment_info, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks_owner, DeviceAllocator *dev_buff_owner, std::vector< std::shared_ptr< void >> &malloc_owner, Executor *executor, ColumnCacheMap *column_cache)
std::shared_ptr< Analyzer::BinOper > qual_bin_oper_
const Data_Namespace::MemoryLevel memory_level_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void JoinHashTable::reifyOneToOneForDevice ( const std::vector< Fragmenter_Namespace::FragmentInfo > &  fragments,
const int  device_id,
const logger::ThreadId  parent_thread_id 
)
private

Definition at line 606 of file JoinHashTable.cpp.

References CHECK(), column_cache_, Data_Namespace::CPU_LEVEL, DEBUG_TIMER_NEW_THREAD, executor_, JoinHashTableInterface::fetchJoinColumn(), genHashTableKey(), anonymous_namespace{JoinHashTable.cpp}::get_cols(), get_column_descriptor_maybe(), Data_Namespace::GPU_LEVEL, initOneToOneHashTable(), memory_level_, needs_dictionary_translation(), and qual_bin_oper_.

Referenced by reify().

609  {
610  DEBUG_TIMER_NEW_THREAD(parent_thread_id);
611  const auto& catalog = *executor_->getCatalog();
612  auto& data_mgr = catalog.getDataMgr();
613  const auto cols = get_cols(qual_bin_oper_.get(), catalog, executor_->temporary_tables_);
614  const auto inner_col = cols.first;
615  CHECK(inner_col);
616  const auto inner_cd = get_column_descriptor_maybe(
617  inner_col->get_column_id(), inner_col->get_table_id(), catalog);
618  if (inner_cd && inner_cd->isVirtualCol) {
620  }
621  CHECK(!inner_cd || !(inner_cd->isVirtualCol));
622  // Since we don't have the string dictionary payloads on the GPU, we'll build
623  // the join hash table on the CPU and transfer it to the GPU.
624  const auto effective_memory_level =
625  needs_dictionary_translation(inner_col, cols.second, executor_)
627  : memory_level_;
628  if (fragments.empty()) {
629  // No data in this fragment. Still need to create a hash table and initialize it
630  // properly.
631  ChunkKey empty_chunk;
632  initOneToOneHashTable(empty_chunk,
633  JoinColumn{nullptr, 0, 0, 0, 0},
634  cols,
635  effective_memory_level,
636  device_id);
637  return;
638  }
639 
640  std::vector<std::shared_ptr<Chunk_NS::Chunk>> chunks_owner;
641  std::unique_ptr<DeviceAllocator> device_allocator;
642  if (effective_memory_level == MemoryLevel::GPU_LEVEL) {
643  device_allocator = std::make_unique<CudaAllocator>(&data_mgr, device_id);
644  }
645  std::vector<std::shared_ptr<void>> malloc_owner;
646 
647  JoinColumn join_column = fetchJoinColumn(inner_col,
648  fragments,
649  effective_memory_level,
650  device_id,
651  chunks_owner,
652  device_allocator.get(),
653  malloc_owner,
654  executor_,
655  &column_cache_);
656 
657  initOneToOneHashTable(genHashTableKey(fragments, cols.second, inner_col),
658  join_column,
659  cols,
660  effective_memory_level,
661  device_id);
662 }
std::vector< int > ChunkKey
Definition: types.h:35
bool needs_dictionary_translation(const Analyzer::ColumnVar *inner_col, const Analyzer::Expr *outer_col_expr, const Executor *executor)
#define DEBUG_TIMER_NEW_THREAD(parent_thread_id)
Definition: Logger.h:315
ChunkKey genHashTableKey(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const Analyzer::Expr *outer_col, const Analyzer::ColumnVar *inner_col) const
ColumnCacheMap & column_cache_
std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > get_cols(const Analyzer::BinOper *qual_bin_oper, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables)
CHECK(cgen_state)
const ColumnDescriptor * get_column_descriptor_maybe(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
Definition: Execute.h:149
Executor * executor_
JoinColumn fetchJoinColumn(const Analyzer::ColumnVar *hash_col, const std::vector< Fragmenter_Namespace::FragmentInfo > &fragment_info, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks_owner, DeviceAllocator *dev_buff_owner, std::vector< std::shared_ptr< void >> &malloc_owner, Executor *executor, ColumnCacheMap *column_cache)
std::shared_ptr< Analyzer::BinOper > qual_bin_oper_
void initOneToOneHashTable(const ChunkKey &chunk_key, const JoinColumn &join_column, const std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > &cols, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id)
const Data_Namespace::MemoryLevel memory_level_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t JoinHashTable::shardCount ( ) const
private

Definition at line 1588 of file JoinHashTable.cpp.

References executor_, get_shard_count(), Data_Namespace::GPU_LEVEL, memory_level_, and qual_bin_oper_.

Referenced by codegenMatchingSet(), codegenSlot(), initOneToOneHashTable(), and reify().

1588  {
1591  : 0;
1592 }
size_t get_shard_count(const Analyzer::BinOper *join_condition, const Executor *executor)
Executor * executor_
std::shared_ptr< Analyzer::BinOper > qual_bin_oper_
const Data_Namespace::MemoryLevel memory_level_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::set< DecodedJoinHashBufferEntry > JoinHashTable::toSet ( const ExecutorDeviceType  device_type,
const int  device_id 
) const
overridevirtual

Implements JoinHashTableInterface.

Definition at line 1490 of file JoinHashTable.cpp.

References copy_from_gpu(), countBufferOff(), executor_, getJoinHashBuffer(), getJoinHashBufferSize(), GPU, hash_entry_count_, offsetBufferOff(), payloadBufferOff(), and JoinHashTableInterface::toSet().

1492  {
1493  auto buffer = getJoinHashBuffer(device_type, device_id);
1494  auto buffer_size = getJoinHashBufferSize(device_type, device_id);
1495 #ifdef HAVE_CUDA
1496  std::unique_ptr<int8_t[]> buffer_copy;
1497  if (device_type == ExecutorDeviceType::GPU) {
1498  buffer_copy = std::make_unique<int8_t[]>(buffer_size);
1499 
1500  copy_from_gpu(&executor_->getCatalog()->getDataMgr(),
1501  buffer_copy.get(),
1502  reinterpret_cast<CUdeviceptr>(reinterpret_cast<int8_t*>(buffer)),
1503  buffer_size,
1504  device_id);
1505  }
1506  auto ptr1 = buffer_copy ? buffer_copy.get() : reinterpret_cast<const int8_t*>(buffer);
1507 #else
1508  auto ptr1 = reinterpret_cast<const int8_t*>(buffer);
1509 #endif // HAVE_CUDA
1510  auto ptr2 = ptr1 + offsetBufferOff();
1511  auto ptr3 = ptr1 + countBufferOff();
1512  auto ptr4 = ptr1 + payloadBufferOff();
1514  0, 0, hash_entry_count_, ptr1, ptr2, ptr3, ptr4, buffer_size);
1515 }
size_t payloadBufferOff() const noexceptoverride
unsigned long long CUdeviceptr
Definition: nocuda.h:27
size_t getJoinHashBufferSize(const ExecutorDeviceType device_type, const int device_id) const noexceptoverride
size_t offsetBufferOff() const noexceptoverride
void copy_from_gpu(Data_Namespace::DataMgr *data_mgr, void *dst, const CUdeviceptr src, const size_t num_bytes, const int device_id)
int64_t getJoinHashBuffer(const ExecutorDeviceType device_type, const int device_id) const noexceptoverride
Executor * executor_
virtual DecodedJoinHashBufferSet toSet(const ExecutorDeviceType device_type, const int device_id) const =0
size_t countBufferOff() const noexceptoverride
size_t hash_entry_count_

+ Here is the call graph for this function:

std::string JoinHashTable::toString ( const ExecutorDeviceType  device_type,
const int  device_id = 0,
bool  raw = false 
) const
overridevirtual

Implements JoinHashTableInterface.

Definition at line 1454 of file JoinHashTable.cpp.

References copy_from_gpu(), countBufferOff(), executor_, JoinHashTableInterface::getHashTypeString(), getJoinHashBuffer(), getJoinHashBufferSize(), GPU, hash_entry_count_, hash_type_, offsetBufferOff(), payloadBufferOff(), and JoinHashTableInterface::toString().

1456  {
1457  auto buffer = getJoinHashBuffer(device_type, device_id);
1458  auto buffer_size = getJoinHashBufferSize(device_type, device_id);
1459 #ifdef HAVE_CUDA
1460  std::unique_ptr<int8_t[]> buffer_copy;
1461  if (device_type == ExecutorDeviceType::GPU) {
1462  buffer_copy = std::make_unique<int8_t[]>(buffer_size);
1463 
1464  copy_from_gpu(&executor_->getCatalog()->getDataMgr(),
1465  buffer_copy.get(),
1466  reinterpret_cast<CUdeviceptr>(reinterpret_cast<int8_t*>(buffer)),
1467  buffer_size,
1468  device_id);
1469  }
1470  auto ptr1 = buffer_copy ? buffer_copy.get() : reinterpret_cast<const int8_t*>(buffer);
1471 #else
1472  auto ptr1 = reinterpret_cast<const int8_t*>(buffer);
1473 #endif // HAVE_CUDA
1474  auto ptr2 = ptr1 + offsetBufferOff();
1475  auto ptr3 = ptr1 + countBufferOff();
1476  auto ptr4 = ptr1 + payloadBufferOff();
1477  return JoinHashTableInterface::toString("perfect",
1479  0,
1480  0,
1482  ptr1,
1483  ptr2,
1484  ptr3,
1485  ptr4,
1486  buffer_size,
1487  raw);
1488 }
size_t payloadBufferOff() const noexceptoverride
unsigned long long CUdeviceptr
Definition: nocuda.h:27
size_t getJoinHashBufferSize(const ExecutorDeviceType device_type, const int device_id) const noexceptoverride
HashType hash_type_
size_t offsetBufferOff() const noexceptoverride
void copy_from_gpu(Data_Namespace::DataMgr *data_mgr, void *dst, const CUdeviceptr src, const size_t num_bytes, const int device_id)
int64_t getJoinHashBuffer(const ExecutorDeviceType device_type, const int device_id) const noexceptoverride
Executor * executor_
size_t countBufferOff() const noexceptoverride
static std::string getHashTypeString(HashType ht) noexcept
virtual std::string toString(const ExecutorDeviceType device_type, const int device_id=0, bool raw=false) const =0
size_t hash_entry_count_

+ Here is the call graph for this function:

static auto JoinHashTable::yieldCacheInvalidator ( ) -> std::function<void()>
inlinestatic

Definition at line 114 of file JoinHashTable.h.

References join_hash_table_cache_, join_hash_table_cache_mutex_, and VLOG.

114  {
115  VLOG(1) << "Invalidate " << join_hash_table_cache_.size()
116  << " cached baseline hashtable.";
117  return []() -> void {
118  std::lock_guard<std::mutex> guard(join_hash_table_cache_mutex_);
119  join_hash_table_cache_.clear();
120  };
121  }
static std::mutex join_hash_table_cache_mutex_
static std::vector< std::pair< JoinHashTableCacheKey, std::shared_ptr< std::vector< int32_t > > > > join_hash_table_cache_
#define VLOG(n)
Definition: Logger.h:291

Member Data Documentation

std::shared_ptr<Analyzer::ColumnVar> JoinHashTable::col_var_
private

Definition at line 233 of file JoinHashTable.h.

Referenced by getInnerTableId(), and getInnerTableRteIdx().

ColumnCacheMap& JoinHashTable::column_cache_
private

Definition at line 246 of file JoinHashTable.h.

Referenced by reifyOneToManyForDevice(), and reifyOneToOneForDevice().

std::mutex JoinHashTable::cpu_hash_table_buff_mutex_
private
const int JoinHashTable::device_count_
private
size_t JoinHashTable::hash_entry_count_
private
HashType JoinHashTable::hash_type_
private

Definition at line 236 of file JoinHashTable.h.

Referenced by getComponentBufferSize(), getHashType(), reify(), and toString().

std::vector< std::pair< JoinHashTable::JoinHashTableCacheKey, std::shared_ptr< std::vector< int32_t > > > > JoinHashTable::join_hash_table_cache_
staticprivate
std::mutex JoinHashTable::join_hash_table_cache_mutex_
staticprivate
const std::vector<InputTableInfo>& JoinHashTable::query_infos_
private

Definition at line 234 of file JoinHashTable.h.

Referenced by getInnerQueryInfo().


The documentation for this class was generated from the following files: