OmniSciDB  cecceef8da
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
JoinHashTable Class Reference

#include <JoinHashTable.h>

+ Inheritance diagram for JoinHashTable:
+ Collaboration diagram for JoinHashTable:

Classes

struct  JoinHashTableCacheKey
 

Public Member Functions

int64_t getJoinHashBuffer (const ExecutorDeviceType device_type, const int device_id) const noexceptoverride
 
size_t getJoinHashBufferSize (const ExecutorDeviceType device_type, const int device_id) const noexceptoverride
 
std::string toString (const ExecutorDeviceType device_type, const int device_id=0, bool raw=false) const override
 
std::set
< DecodedJoinHashBufferEntry
toSet (const ExecutorDeviceType device_type, const int device_id) const override
 
llvm::Value * codegenSlot (const CompilationOptions &, const size_t) override
 
HashJoinMatchingSet codegenMatchingSet (const CompilationOptions &, const size_t) override
 
int getInnerTableId () const noexceptoverride
 
int getInnerTableRteIdx () const noexceptoverride
 
HashType getHashType () const noexceptoverride
 
Data_Namespace::MemoryLevel getMemoryLevel () const noexceptoverride
 
int getDeviceCount () const noexceptoverride
 
size_t offsetBufferOff () const noexceptoverride
 
size_t countBufferOff () const noexceptoverride
 
size_t payloadBufferOff () const noexceptoverride
 
virtual ~JoinHashTable ()
 
- Public Member Functions inherited from JoinHashTableInterface
virtual std::string toStringFlat64 (const ExecutorDeviceType device_type, const int device_id) const
 
virtual std::string toStringFlat32 (const ExecutorDeviceType device_type, const int device_id) const
 
JoinColumn fetchJoinColumn (const Analyzer::ColumnVar *hash_col, const std::deque< Fragmenter_Namespace::FragmentInfo > &fragment_info, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks_owner, ThrustAllocator &dev_buff_owner, std::vector< std::shared_ptr< void >> &malloc_owner, Executor *executor, ColumnCacheMap *column_cache)
 

Static Public Member Functions

static std::shared_ptr
< JoinHashTable
getInstance (const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
 Make hash table from an in-flight SQL query's parse tree etc. More...
 
static HashJoinMatchingSet codegenMatchingSet (const std::vector< llvm::Value * > &hash_join_idx_args_in, const bool is_sharded, const bool col_is_nullable, const bool is_bw_eq, const int64_t sub_buff_size, Executor *executor, const bool is_bucketized=false)
 
static llvm::Value * codegenHashTableLoad (const size_t table_idx, Executor *executor)
 
static auto yieldCacheInvalidator () -> std::function< void()>
 
- Static Public Member Functions inherited from JoinHashTableInterface
static DecodedJoinHashBufferSet toSet (size_t key_component_count, size_t key_component_width, size_t entry_count, const int8_t *ptr1, const int8_t *ptr2, const int8_t *ptr3, const int8_t *ptr4, size_t buffer_size)
 Decode hash table into a std::set for easy inspection and validation. More...
 
static std::string toString (const std::string &type, size_t key_component_count, size_t key_component_width, size_t entry_count, const int8_t *ptr1, const int8_t *ptr2, const int8_t *ptr3, const int8_t *ptr4, size_t buffer_size, bool raw=false)
 Decode hash table into a human-readable string. More...
 
static std::shared_ptr
< JoinHashTableInterface
getInstance (const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
 Make hash table from an in-flight SQL query's parse tree etc. More...
 
static std::shared_ptr
< JoinHashTableInterface
getSyntheticInstance (std::string_view table1, std::string_view column1, std::string_view table2, std::string_view column2, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
 Make hash table from named tables and columns (such as for testing). More...
 
static std::shared_ptr
< JoinHashTableInterface
getSyntheticInstance (const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
 Make hash table from named tables and columns (such as for testing). More...
 

Private Member Functions

 JoinHashTable (const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const Analyzer::ColumnVar *col_var, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const ExpressionRange &col_range, ColumnCacheMap &column_cache, Executor *executor, const int device_count)
 
ChunkKey genHashTableKey (const std::deque< Fragmenter_Namespace::FragmentInfo > &fragments, const Analyzer::Expr *outer_col, const Analyzer::ColumnVar *inner_col) const
 
void reify ()
 
void reifyOneToOneForDevice (const std::deque< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, const logger::ThreadId parent_thread_id)
 
void reifyOneToManyForDevice (const std::deque< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, const logger::ThreadId parent_thread_id)
 
void checkHashJoinReplicationConstraint (const int table_id) const
 
void initOneToOneHashTable (const ChunkKey &chunk_key, const JoinColumn &join_column, const std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > &cols, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id)
 
void initOneToManyHashTable (const ChunkKey &chunk_key, const JoinColumn &join_column, const std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > &cols, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id)
 
void initHashTableOnCpuFromCache (const ChunkKey &chunk_key, const size_t num_elements, const std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > &cols)
 
void putHashTableOnCpuToCache (const ChunkKey &chunk_key, const size_t num_elements, const std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > &cols)
 
void initOneToOneHashTableOnCpu (const JoinColumn &join_column, const std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > &cols, const HashEntryInfo hash_entry_info, const int32_t hash_join_invalid_val)
 
void initOneToManyHashTableOnCpu (const JoinColumn &join_column, const std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > &cols, const HashEntryInfo hash_entry_info, const int32_t hash_join_invalid_val)
 
const InputTableInfogetInnerQueryInfo (const Analyzer::ColumnVar *inner_col) const
 
size_t shardCount () const
 
llvm::Value * codegenHashTableLoad (const size_t table_idx)
 
std::vector< llvm::Value * > getHashJoinArgs (llvm::Value *hash_ptr, const Analyzer::Expr *key_col, const int shard_count, const CompilationOptions &co)
 
bool isBitwiseEq () const
 
void freeHashBufferMemory ()
 
void freeHashBufferGpuMemory ()
 
void freeHashBufferCpuMemory ()
 
size_t getComponentBufferSize () const noexcept
 

Private Attributes

std::shared_ptr
< Analyzer::BinOper
qual_bin_oper_
 
std::shared_ptr
< Analyzer::ColumnVar
col_var_
 
const std::vector
< InputTableInfo > & 
query_infos_
 
const Data_Namespace::MemoryLevel memory_level_
 
HashType hash_type_
 
size_t hash_entry_count_
 
std::shared_ptr< std::vector
< int32_t > > 
cpu_hash_table_buff_
 
std::mutex cpu_hash_table_buff_mutex_
 
ExpressionRange col_range_
 
Executorexecutor_
 
ColumnCacheMapcolumn_cache_
 
const int device_count_
 

Static Private Attributes

static std::vector< std::pair
< JoinHashTableCacheKey,
std::shared_ptr< std::vector
< int32_t > > > > 
join_hash_table_cache_
 
static std::mutex join_hash_table_cache_mutex_
 

Additional Inherited Members

- Public Types inherited from JoinHashTableInterface
enum  HashType { HashType::OneToOne, HashType::OneToMany }
 
- Protected Types inherited from JoinHashTableInterface
using LinearizedColumn = std::pair< const int8_t *, size_t >
 
using LinearizedColumnCacheKey = std::pair< int, int >
 
- Protected Attributes inherited from JoinHashTableInterface
std::map
< LinearizedColumnCacheKey,
LinearizedColumn
linearized_multifrag_columns_
 
std::mutex linearized_multifrag_column_mutex_
 
RowSetMemoryOwner linearized_multifrag_column_owner_
 

Detailed Description

Definition at line 52 of file JoinHashTable.h.

Constructor & Destructor Documentation

virtual JoinHashTable::~JoinHashTable ( )
inlinevirtual

Definition at line 122 of file JoinHashTable.h.

122 {}
JoinHashTable::JoinHashTable ( const std::shared_ptr< Analyzer::BinOper qual_bin_oper,
const Analyzer::ColumnVar col_var,
const std::vector< InputTableInfo > &  query_infos,
const Data_Namespace::MemoryLevel  memory_level,
const HashType  preferred_hash_type,
const ExpressionRange col_range,
ColumnCacheMap column_cache,
Executor executor,
const int  device_count 
)
inlineprivate

Definition at line 125 of file JoinHashTable.h.

References CHECK(), CHECK_GT, device_count_, ExpressionRange::getType(), and Integer.

Referenced by getInstance().

134  : qual_bin_oper_(qual_bin_oper)
135  , col_var_(std::dynamic_pointer_cast<Analyzer::ColumnVar>(col_var->deep_copy()))
136  , query_infos_(query_infos)
137  , memory_level_(memory_level)
138  , hash_type_(preferred_hash_type)
139  , hash_entry_count_(0)
140  , col_range_(col_range)
141  , executor_(executor)
142  , column_cache_(column_cache)
143  , device_count_(device_count) {
146  }
const std::vector< InputTableInfo > & query_infos_
const int device_count_
ColumnCacheMap & column_cache_
std::shared_ptr< Analyzer::Expr > deep_copy() const override
Definition: Analyzer.cpp:59
HashType hash_type_
#define CHECK_GT(x, y)
Definition: Logger.h:209
CHECK(cgen_state)
Executor * executor_
ExpressionRange col_range_
ExpressionRangeType getType() const
std::shared_ptr< Analyzer::ColumnVar > col_var_
std::shared_ptr< Analyzer::BinOper > qual_bin_oper_
const Data_Namespace::MemoryLevel memory_level_
size_t hash_entry_count_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Member Function Documentation

void JoinHashTable::checkHashJoinReplicationConstraint ( const int  table_id) const
private

Definition at line 670 of file JoinHashTable.cpp.

References CHECK(), executor_, g_cluster, get_shard_count(), qual_bin_oper_, and table_is_replicated().

Referenced by reify().

670  {
671  if (!g_cluster) {
672  return;
673  }
674  if (table_id >= 0) {
675  const auto inner_td = executor_->getCatalog()->getMetadataForTable(table_id);
676  CHECK(inner_td);
677  size_t shard_count{0};
678  shard_count = get_shard_count(qual_bin_oper_.get(), executor_);
679  if (!shard_count && !table_is_replicated(inner_td)) {
680  throw TableMustBeReplicated(inner_td->tableName);
681  }
682  }
683 }
size_t get_shard_count(const Analyzer::BinOper *join_condition, const Executor *executor)
CHECK(cgen_state)
Executor * executor_
bool table_is_replicated(const TableDescriptor *td)
bool g_cluster
std::shared_ptr< Analyzer::BinOper > qual_bin_oper_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * JoinHashTable::codegenHashTableLoad ( const size_t  table_idx,
Executor executor 
)
static

Definition at line 1185 of file JoinHashTable.cpp.

References CHECK(), CHECK_LT, and get_arg_by_name().

Referenced by codegenHashTableLoad(), BaselineJoinHashTable::codegenMatchingSet(), codegenMatchingSet(), codegenSlot(), and BaselineJoinHashTable::hashPtr().

1186  {
1187  llvm::Value* hash_ptr = nullptr;
1188  const auto total_table_count =
1189  executor->plan_state_->join_info_.join_hash_tables_.size();
1190  CHECK_LT(table_idx, total_table_count);
1191  if (total_table_count > 1) {
1192  auto hash_tables_ptr =
1193  get_arg_by_name(executor->cgen_state_->row_func_, "join_hash_tables");
1194  auto hash_pptr =
1195  table_idx > 0 ? executor->cgen_state_->ir_builder_.CreateGEP(
1196  hash_tables_ptr,
1197  executor->cgen_state_->llInt(static_cast<int64_t>(table_idx)))
1198  : hash_tables_ptr;
1199  hash_ptr = executor->cgen_state_->ir_builder_.CreateLoad(hash_pptr);
1200  } else {
1201  hash_ptr = get_arg_by_name(executor->cgen_state_->row_func_, "join_hash_tables");
1202  }
1203  CHECK(hash_ptr);
1204  return hash_ptr;
1205 }
CHECK(cgen_state)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:114
#define CHECK_LT(x, y)
Definition: Logger.h:207

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * JoinHashTable::codegenHashTableLoad ( const size_t  table_idx)
private

Definition at line 1174 of file JoinHashTable.cpp.

References CHECK(), codegenHashTableLoad(), executor_, and get_arg_by_name().

1174  {
1175  const auto hash_ptr = codegenHashTableLoad(table_idx, executor_);
1176  if (hash_ptr->getType()->isIntegerTy(64)) {
1177  return hash_ptr;
1178  }
1179  CHECK(hash_ptr->getType()->isPointerTy());
1180  return executor_->cgen_state_->ir_builder_.CreatePtrToInt(
1181  get_arg_by_name(executor_->cgen_state_->row_func_, "join_hash_tables"),
1182  llvm::Type::getInt64Ty(executor_->cgen_state_->context_));
1183 }
CHECK(cgen_state)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:114
Executor * executor_
static llvm::Value * codegenHashTableLoad(const size_t table_idx, Executor *executor)

+ Here is the call graph for this function:

HashJoinMatchingSet JoinHashTable::codegenMatchingSet ( const CompilationOptions co,
const size_t  index 
)
overridevirtual

Implements JoinHashTableInterface.

Definition at line 1257 of file JoinHashTable.cpp.

References CHECK(), codegenHashTableLoad(), executor_, anonymous_namespace{JoinHashTable.cpp}::get_cols(), getComponentBufferSize(), getHashJoinArgs(), isBitwiseEq(), kDATE, qual_bin_oper_, and shardCount().

Referenced by BaselineJoinHashTable::codegenMatchingSet().

1258  {
1259  const auto cols = get_cols(
1260  qual_bin_oper_.get(), *executor_->getCatalog(), executor_->temporary_tables_);
1261  auto key_col = cols.second;
1262  CHECK(key_col);
1263  auto val_col = cols.first;
1264  CHECK(val_col);
1265  auto pos_ptr = codegenHashTableLoad(index);
1266  CHECK(pos_ptr);
1267  const int shard_count = shardCount();
1268  auto hash_join_idx_args = getHashJoinArgs(pos_ptr, key_col, shard_count, co);
1269  const int64_t sub_buff_size = getComponentBufferSize();
1270  const auto& key_col_ti = key_col->get_type_info();
1271 
1272  auto bucketize = (key_col_ti.get_type() == kDATE);
1273  return codegenMatchingSet(hash_join_idx_args,
1274  shard_count,
1275  !key_col_ti.get_notnull(),
1276  isBitwiseEq(),
1277  sub_buff_size,
1278  executor_,
1279  bucketize);
1280 }
bool isBitwiseEq() const
std::vector< llvm::Value * > getHashJoinArgs(llvm::Value *hash_ptr, const Analyzer::Expr *key_col, const int shard_count, const CompilationOptions &co)
std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > get_cols(const Analyzer::BinOper *qual_bin_oper, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables)
CHECK(cgen_state)
size_t shardCount() const
Definition: sqltypes.h:54
Executor * executor_
static llvm::Value * codegenHashTableLoad(const size_t table_idx, Executor *executor)
size_t getComponentBufferSize() const noexcept
HashJoinMatchingSet codegenMatchingSet(const CompilationOptions &, const size_t) override
std::shared_ptr< Analyzer::BinOper > qual_bin_oper_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

HashJoinMatchingSet JoinHashTable::codegenMatchingSet ( const std::vector< llvm::Value * > &  hash_join_idx_args_in,
const bool  is_sharded,
const bool  col_is_nullable,
const bool  is_bw_eq,
const int64_t  sub_buff_size,
Executor executor,
const bool  is_bucketized = false 
)
static

Definition at line 1282 of file JoinHashTable.cpp.

References CHECK().

1289  {
1290  using namespace std::string_literals;
1291 
1292  std::string fname(is_bucketized ? "bucketized_hash_join_idx"s : "hash_join_idx"s);
1293 
1294  if (is_bw_eq) {
1295  fname += "_bitwise";
1296  }
1297  if (is_sharded) {
1298  fname += "_sharded";
1299  }
1300  if (!is_bw_eq && col_is_nullable) {
1301  fname += "_nullable";
1302  }
1303 
1304  const auto slot_lv = executor->cgen_state_->emitCall(fname, hash_join_idx_args_in);
1305  const auto slot_valid_lv = executor->cgen_state_->ir_builder_.CreateICmpSGE(
1306  slot_lv, executor->cgen_state_->llInt(int64_t(0)));
1307 
1308  auto pos_ptr = hash_join_idx_args_in[0];
1309  CHECK(pos_ptr);
1310 
1311  auto count_ptr = executor->cgen_state_->ir_builder_.CreateAdd(
1312  pos_ptr, executor->cgen_state_->llInt(sub_buff_size));
1313  auto hash_join_idx_args = hash_join_idx_args_in;
1314  hash_join_idx_args[0] = executor->cgen_state_->ir_builder_.CreatePtrToInt(
1315  count_ptr, llvm::Type::getInt64Ty(executor->cgen_state_->context_));
1316 
1317  const auto row_count_lv = executor->cgen_state_->ir_builder_.CreateSelect(
1318  slot_valid_lv,
1319  executor->cgen_state_->emitCall(fname, hash_join_idx_args),
1320  executor->cgen_state_->llInt(int64_t(0)));
1321  auto rowid_base_i32 = executor->cgen_state_->ir_builder_.CreateIntToPtr(
1322  executor->cgen_state_->ir_builder_.CreateAdd(
1323  pos_ptr, executor->cgen_state_->llInt(2 * sub_buff_size)),
1324  llvm::Type::getInt32PtrTy(executor->cgen_state_->context_));
1325  auto rowid_ptr_i32 =
1326  executor->cgen_state_->ir_builder_.CreateGEP(rowid_base_i32, slot_lv);
1327  return {rowid_ptr_i32, row_count_lv, slot_lv};
1328 }
CHECK(cgen_state)

+ Here is the call graph for this function:

llvm::Value * JoinHashTable::codegenSlot ( const CompilationOptions co,
const size_t  index 
)
overridevirtual

Implements JoinHashTableInterface.

Definition at line 1447 of file JoinHashTable.cpp.

References CHECK(), CHECK_EQ, CodeGenerator::codegen(), codegenHashTableLoad(), executor_, anonymous_namespace{JoinHashTable.cpp}::get_cols(), Analyzer::Expr::get_type_info(), getHashJoinArgs(), getHashType(), isBitwiseEq(), kDATE, JoinHashTableInterface::OneToOne, qual_bin_oper_, and shardCount().

1448  {
1449  using namespace std::string_literals;
1450 
1452  const auto cols = get_cols(
1453  qual_bin_oper_.get(), *executor_->getCatalog(), executor_->temporary_tables_);
1454  auto key_col = cols.second;
1455  CHECK(key_col);
1456  auto val_col = cols.first;
1457  CHECK(val_col);
1458  CodeGenerator code_generator(executor_);
1459  const auto key_lvs = code_generator.codegen(key_col, true, co);
1460  CHECK_EQ(size_t(1), key_lvs.size());
1461  auto hash_ptr = codegenHashTableLoad(index);
1462  CHECK(hash_ptr);
1463  const int shard_count = shardCount();
1464  const auto hash_join_idx_args = getHashJoinArgs(hash_ptr, key_col, shard_count, co);
1465 
1466  const auto& key_col_ti = key_col->get_type_info();
1467  std::string fname((key_col_ti.get_type() == kDATE) ? "bucketized_hash_join_idx"s
1468  : "hash_join_idx"s);
1469 
1470  if (isBitwiseEq()) {
1471  fname += "_bitwise";
1472  }
1473  if (shard_count) {
1474  fname += "_sharded";
1475  }
1476 
1477  if (!isBitwiseEq() && !key_col_ti.get_notnull()) {
1478  fname += "_nullable";
1479  }
1480  return executor_->cgen_state_->emitCall(fname, hash_join_idx_args);
1481 }
bool isBitwiseEq() const
#define CHECK_EQ(x, y)
Definition: Logger.h:205
std::vector< llvm::Value * > getHashJoinArgs(llvm::Value *hash_ptr, const Analyzer::Expr *key_col, const int shard_count, const CompilationOptions &co)
std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > get_cols(const Analyzer::BinOper *qual_bin_oper, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables)
CHECK(cgen_state)
size_t shardCount() const
Definition: sqltypes.h:54
Executor * executor_
static llvm::Value * codegenHashTableLoad(const size_t table_idx, Executor *executor)
std::shared_ptr< Analyzer::BinOper > qual_bin_oper_
HashType getHashType() const noexceptoverride
Definition: JoinHashTable.h:90

+ Here is the call graph for this function:

size_t JoinHashTable::countBufferOff ( ) const
overridevirtualnoexcept

Implements JoinHashTableInterface.

Definition at line 1334 of file JoinHashTable.cpp.

References getComponentBufferSize().

Referenced by toSet(), and toString().

1334  {
1335  return getComponentBufferSize();
1336 }
size_t getComponentBufferSize() const noexcept

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void JoinHashTable::freeHashBufferCpuMemory ( )
private

Definition at line 1556 of file JoinHashTable.cpp.

References cpu_hash_table_buff_.

Referenced by freeHashBufferMemory().

1556  {
1557  cpu_hash_table_buff_.reset();
1558 }
std::shared_ptr< std::vector< int32_t > > cpu_hash_table_buff_

+ Here is the caller graph for this function:

void JoinHashTable::freeHashBufferGpuMemory ( )
private

Definition at line 1535 of file JoinHashTable.cpp.

References CHECK(), executor_, and CudaAllocator::freeGpuAbstractBuffer().

Referenced by freeHashBufferMemory().

1535  {
1536 #ifdef HAVE_CUDA
1537  const auto& catalog = *executor_->getCatalog();
1538  auto& data_mgr = catalog.getDataMgr();
1539  for (auto& buf : gpu_hash_table_buff_) {
1540  if (buf) {
1541  CudaAllocator::freeGpuAbstractBuffer(&data_mgr, buf);
1542  buf = nullptr;
1543  }
1544  }
1545  for (auto& buf : gpu_hash_table_err_buff_) {
1546  if (buf) {
1547  CudaAllocator::freeGpuAbstractBuffer(&data_mgr, buf);
1548  buf = nullptr;
1549  }
1550  }
1551 #else
1552  CHECK(false);
1553 #endif // HAVE_CUDA
1554 }
CHECK(cgen_state)
Executor * executor_
static void freeGpuAbstractBuffer(Data_Namespace::DataMgr *data_mgr, Data_Namespace::AbstractBuffer *ab)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void JoinHashTable::freeHashBufferMemory ( )
private

Definition at line 1528 of file JoinHashTable.cpp.

References freeHashBufferCpuMemory(), and freeHashBufferGpuMemory().

Referenced by reify().

1528  {
1529 #ifdef HAVE_CUDA
1531 #endif
1533 }
void freeHashBufferGpuMemory()
void freeHashBufferCpuMemory()

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ChunkKey JoinHashTable::genHashTableKey ( const std::deque< Fragmenter_Namespace::FragmentInfo > &  fragments,
const Analyzer::Expr outer_col,
const Analyzer::ColumnVar inner_col 
) const
private

Definition at line 537 of file JoinHashTable.cpp.

References CHECK(), CHECK_EQ, executor_, Analyzer::ColumnVar::get_column_id(), Analyzer::ColumnVar::get_table_id(), Analyzer::Expr::get_type_info(), getInnerQueryInfo(), Fragmenter_Namespace::TableInfo::getNumTuples(), InputTableInfo::info, and kENCODING_DICT.

Referenced by reifyOneToManyForDevice(), and reifyOneToOneForDevice().

540  {
541  ChunkKey hash_table_key{executor_->getCatalog()->getCurrentDB().dbId,
542  inner_col->get_table_id(),
543  inner_col->get_column_id()};
544  const auto& ti = inner_col->get_type_info();
545  if (ti.is_string()) {
546  CHECK_EQ(kENCODING_DICT, ti.get_compression());
547  size_t outer_elem_count = 0;
548  const auto outer_col = dynamic_cast<const Analyzer::ColumnVar*>(outer_col_expr);
549  CHECK(outer_col);
550  const auto& outer_query_info = getInnerQueryInfo(outer_col).info;
551  for (auto& frag : outer_query_info.fragments) {
552  outer_elem_count = frag.getNumTuples();
553  }
554  hash_table_key.push_back(outer_elem_count);
555  }
556  if (fragments.size() < 2) {
557  hash_table_key.push_back(fragments.front().fragmentId);
558  }
559  return hash_table_key;
560 }
int get_table_id() const
Definition: Analyzer.h:194
#define CHECK_EQ(x, y)
Definition: Logger.h:205
std::vector< int > ChunkKey
Definition: types.h:35
Fragmenter_Namespace::TableInfo info
Definition: InputMetadata.h:35
CHECK(cgen_state)
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:78
Executor * executor_
int get_column_id() const
Definition: Analyzer.h:195
const InputTableInfo & getInnerQueryInfo(const Analyzer::ColumnVar *inner_col) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t JoinHashTable::getComponentBufferSize ( ) const
privatenoexcept

Definition at line 1342 of file JoinHashTable.cpp.

References hash_entry_count_, hash_type_, and JoinHashTableInterface::OneToMany.

Referenced by codegenMatchingSet(), countBufferOff(), and payloadBufferOff().

1342  {
1344  return hash_entry_count_ * sizeof(int32_t);
1345  } else {
1346  return 0;
1347  }
1348 }
HashType hash_type_
size_t hash_entry_count_

+ Here is the caller graph for this function:

int JoinHashTable::getDeviceCount ( ) const
inlineoverridevirtualnoexcept

Implements JoinHashTableInterface.

Definition at line 96 of file JoinHashTable.h.

References device_count_.

96 { return device_count_; };
const int device_count_
std::vector< llvm::Value * > JoinHashTable::getHashJoinArgs ( llvm::Value *  hash_ptr,
const Analyzer::Expr key_col,
const int  shard_count,
const CompilationOptions co 
)
private

Definition at line 1207 of file JoinHashTable.cpp.

References CHECK_EQ, CodeGenerator::codegen(), col_range_, device_count_, executor_, anonymous_namespace{JoinHashTable.cpp}::get_bucketized_hash_entry_info(), anonymous_namespace{JoinHashTable.cpp}::get_hash_entry_count(), get_logical_type_info(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_type(), Analyzer::Expr::get_type_info(), ExpressionRange::getIntMax(), ExpressionRange::getIntMin(), inline_fixed_encoding_null_val(), isBitwiseEq(), and kDATE.

Referenced by codegenMatchingSet(), and codegenSlot().

1210  {
1211  CodeGenerator code_generator(executor_);
1212  const auto key_lvs = code_generator.codegen(key_col, true, co);
1213  CHECK_EQ(size_t(1), key_lvs.size());
1214  auto const& key_col_ti = key_col->get_type_info();
1215  auto hash_entry_info =
1217 
1218  std::vector<llvm::Value*> hash_join_idx_args{
1219  hash_ptr,
1220  executor_->cgen_state_->castToTypeIn(key_lvs.front(), 64),
1221  executor_->cgen_state_->llInt(col_range_.getIntMin()),
1222  executor_->cgen_state_->llInt(col_range_.getIntMax())};
1223  if (shard_count) {
1224  const auto expected_hash_entry_count =
1226  const auto entry_count_per_shard =
1227  (expected_hash_entry_count + shard_count - 1) / shard_count;
1228  hash_join_idx_args.push_back(
1229  executor_->cgen_state_->llInt<uint32_t>(entry_count_per_shard));
1230  hash_join_idx_args.push_back(executor_->cgen_state_->llInt<uint32_t>(shard_count));
1231  hash_join_idx_args.push_back(executor_->cgen_state_->llInt<uint32_t>(device_count_));
1232  }
1233  auto key_col_logical_ti = get_logical_type_info(key_col->get_type_info());
1234  if (!key_col_logical_ti.get_notnull() || isBitwiseEq()) {
1235  hash_join_idx_args.push_back(executor_->cgen_state_->llInt(
1236  inline_fixed_encoding_null_val(key_col_logical_ti)));
1237  }
1238  auto special_date_bucketization_case = key_col_ti.get_type() == kDATE;
1239  if (isBitwiseEq()) {
1240  if (special_date_bucketization_case) {
1241  hash_join_idx_args.push_back(executor_->cgen_state_->llInt(
1242  col_range_.getIntMax() / hash_entry_info.bucket_normalization + 1));
1243  } else {
1244  hash_join_idx_args.push_back(
1245  executor_->cgen_state_->llInt(col_range_.getIntMax() + 1));
1246  }
1247  }
1248 
1249  if (special_date_bucketization_case) {
1250  hash_join_idx_args.emplace_back(
1251  executor_->cgen_state_->llInt(hash_entry_info.bucket_normalization));
1252  }
1253 
1254  return hash_join_idx_args;
1255 }
int64_t getIntMin() const
bool isBitwiseEq() const
#define CHECK_EQ(x, y)
Definition: Logger.h:205
const int device_count_
SQLTypeInfo get_logical_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:882
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:326
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:78
HashEntryInfo get_bucketized_hash_entry_info(SQLTypeInfo const &context_ti, ExpressionRange const &col_range, bool const is_bw_eq)
Definition: sqltypes.h:54
Executor * executor_
ExpressionRange col_range_
int64_t getIntMax() const
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
size_t get_hash_entry_count(const ExpressionRange &col_range, const bool is_bw_eq)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

HashType JoinHashTable::getHashType ( ) const
inlineoverridevirtualnoexcept

Implements JoinHashTableInterface.

Definition at line 90 of file JoinHashTable.h.

References hash_type_.

Referenced by codegenSlot().

90 { return hash_type_; }
HashType hash_type_

+ Here is the caller graph for this function:

const InputTableInfo & JoinHashTable::getInnerQueryInfo ( const Analyzer::ColumnVar inner_col) const
private

Definition at line 1483 of file JoinHashTable.cpp.

References get_inner_query_info(), Analyzer::ColumnVar::get_table_id(), and query_infos_.

Referenced by genHashTableKey(), and reify().

1484  {
1485  return get_inner_query_info(inner_col->get_table_id(), query_infos_);
1486 }
int get_table_id() const
Definition: Analyzer.h:194
const std::vector< InputTableInfo > & query_infos_
const InputTableInfo & get_inner_query_info(const int inner_table_id, const std::vector< InputTableInfo > &query_infos)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int JoinHashTable::getInnerTableId ( ) const
inlineoverridevirtualnoexcept

Implements JoinHashTableInterface.

Definition at line 82 of file JoinHashTable.h.

References col_var_.

82  {
83  return col_var_.get()->get_table_id();
84  };
std::shared_ptr< Analyzer::ColumnVar > col_var_
int JoinHashTable::getInnerTableRteIdx ( ) const
inlineoverridevirtualnoexcept

Implements JoinHashTableInterface.

Definition at line 86 of file JoinHashTable.h.

References col_var_.

86  {
87  return col_var_.get()->get_rte_idx();
88  };
std::shared_ptr< Analyzer::ColumnVar > col_var_
std::shared_ptr< JoinHashTable > JoinHashTable::getInstance ( const std::shared_ptr< Analyzer::BinOper qual_bin_oper,
const std::vector< InputTableInfo > &  query_infos,
const Data_Namespace::MemoryLevel  memory_level,
const HashType  preferred_hash_type,
const int  device_count,
ColumnCacheMap column_cache,
Executor executor 
)
static

Make hash table from an in-flight SQL query's parse tree etc.

Definition at line 304 of file JoinHashTable.cpp.

References CHECK(), CHECK_EQ, anonymous_namespace{JoinHashTable.cpp}::get_bucketized_hash_entry_info(), anonymous_namespace{JoinHashTable.cpp}::get_cols(), getExpressionRange(), HashEntryInfo::getNormalizedHashEntryCount(), Data_Namespace::GPU_LEVEL, Invalid, IS_EQUIVALENCE, JoinHashTable(), kBW_EQ, ExpressionRange::makeIntRange(), JoinHashTableInterface::OneToOne, VLOG, and VLOGGING.

Referenced by JoinHashTableInterface::getInstance().

311  {
312  decltype(std::chrono::steady_clock::now()) ts1, ts2;
313  if (VLOGGING(1)) {
314  VLOG(1) << "Building perfect hash table "
315  << (preferred_hash_type == JoinHashTableInterface::HashType::OneToOne
316  ? "OneToOne"
317  : "OneToMany")
318  << " for qual: " << qual_bin_oper->toString();
319  ts1 = std::chrono::steady_clock::now();
320  }
321  CHECK(IS_EQUIVALENCE(qual_bin_oper->get_optype()));
322  const auto cols =
323  get_cols(qual_bin_oper.get(), *executor->getCatalog(), executor->temporary_tables_);
324  const auto inner_col = cols.first;
325  CHECK(inner_col);
326  const auto& ti = inner_col->get_type_info();
327  auto col_range =
328  getExpressionRange(ti.is_string() ? cols.second : inner_col, query_infos, executor);
329  if (col_range.getType() == ExpressionRangeType::Invalid) {
330  throw HashJoinFail(
331  "Could not compute range for the expressions involved in the equijoin");
332  }
333  if (ti.is_string()) {
334  // The nullable info must be the same as the source column.
335  const auto source_col_range = getExpressionRange(inner_col, query_infos, executor);
336  if (source_col_range.getType() == ExpressionRangeType::Invalid) {
337  throw HashJoinFail(
338  "Could not compute range for the expressions involved in the equijoin");
339  }
340  if (source_col_range.getIntMin() > source_col_range.getIntMax()) {
341  // If the inner column expression range is empty, use the inner col range
342  CHECK_EQ(source_col_range.getIntMin(), int64_t(0));
343  CHECK_EQ(source_col_range.getIntMax(), int64_t(-1));
344  col_range = source_col_range;
345  } else {
346  col_range = ExpressionRange::makeIntRange(
347  std::min(source_col_range.getIntMin(), col_range.getIntMin()),
348  std::max(source_col_range.getIntMax(), col_range.getIntMax()),
349  0,
350  source_col_range.hasNulls());
351  }
352  }
353  // We can't allocate more than 2GB contiguous memory on GPU and each entry is 4 bytes.
354  const auto max_hash_entry_count =
356  ? static_cast<size_t>(std::numeric_limits<int32_t>::max() / sizeof(int32_t))
357  : static_cast<size_t>(std::numeric_limits<int32_t>::max());
358 
359  auto bucketized_entry_count_info = get_bucketized_hash_entry_info(
360  ti, col_range, qual_bin_oper->get_optype() == kBW_EQ);
361  auto bucketized_entry_count = bucketized_entry_count_info.getNormalizedHashEntryCount();
362 
363  if (bucketized_entry_count > max_hash_entry_count) {
364  throw TooManyHashEntries();
365  }
366 
367  if (qual_bin_oper->get_optype() == kBW_EQ &&
368  col_range.getIntMax() >= std::numeric_limits<int64_t>::max()) {
369  throw HashJoinFail("Cannot translate null value for kBW_EQ");
370  }
371  auto join_hash_table =
372  std::shared_ptr<JoinHashTable>(new JoinHashTable(qual_bin_oper,
373  inner_col,
374  query_infos,
375  memory_level,
376  preferred_hash_type,
377  col_range,
378  column_cache,
379  executor,
380  device_count));
381  try {
382  join_hash_table->reify();
383  } catch (const TableMustBeReplicated& e) {
384  // Throw a runtime error to abort the query
385  join_hash_table->freeHashBufferMemory();
386  throw std::runtime_error(e.what());
387  } catch (const HashJoinFail& e) {
388  // HashJoinFail exceptions log an error and trigger a retry with a join loop (if
389  // possible)
390  join_hash_table->freeHashBufferMemory();
391  throw HashJoinFail(std::string("Could not build a 1-to-1 correspondence for columns "
392  "involved in equijoin | ") +
393  e.what());
394  } catch (const ColumnarConversionNotSupported& e) {
395  throw HashJoinFail(std::string("Could not build hash tables for equijoin | ") +
396  e.what());
397  } catch (const OutOfMemory& e) {
398  throw HashJoinFail(
399  std::string("Ran out of memory while building hash tables for equijoin | ") +
400  e.what());
401  } catch (const std::exception& e) {
402  throw std::runtime_error(
403  std::string("Fatal error while attempting to build hash tables for join: ") +
404  e.what());
405  }
406  if (VLOGGING(1)) {
407  ts2 = std::chrono::steady_clock::now();
408  VLOG(1) << "Built perfect hash table "
409  << (join_hash_table->getHashType() ==
411  ? "OneToOne"
412  : "OneToMany")
413  << " in "
414  << std::chrono::duration_cast<std::chrono::milliseconds>(ts2 - ts1).count()
415  << " ms";
416  }
417  return join_hash_table;
418 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
#define IS_EQUIVALENCE(X)
Definition: sqldefs.h:67
JoinHashTable(const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const Analyzer::ColumnVar *col_var, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const ExpressionRange &col_range, ColumnCacheMap &column_cache, Executor *executor, const int device_count)
std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > get_cols(const Analyzer::BinOper *qual_bin_oper, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables)
CHECK(cgen_state)
ExpressionRange getExpressionRange(const Analyzer::BinOper *expr, const std::vector< InputTableInfo > &query_infos, const Executor *, boost::optional< std::list< std::shared_ptr< Analyzer::Expr >>> simple_quals)
static ExpressionRange makeIntRange(const int64_t int_min, const int64_t int_max, const int64_t bucket, const bool has_nulls)
#define VLOGGING(n)
Definition: Logger.h:195
HashEntryInfo get_bucketized_hash_entry_info(SQLTypeInfo const &context_ti, ExpressionRange const &col_range, bool const is_bw_eq)
size_t getNormalizedHashEntryCount() const
Definition: sqldefs.h:31
#define VLOG(n)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int64_t JoinHashTable::getJoinHashBuffer ( const ExecutorDeviceType  device_type,
const int  device_id 
) const
overridevirtualnoexcept

Implements JoinHashTableInterface.

Definition at line 1350 of file JoinHashTable.cpp.

References CHECK(), CHECK_LT, CPU, and cpu_hash_table_buff_.

Referenced by toSet(), and toString().

1351  {
1352  if (device_type == ExecutorDeviceType::CPU && !cpu_hash_table_buff_) {
1353  return 0;
1354  }
1355 #ifdef HAVE_CUDA
1356  CHECK_LT(static_cast<size_t>(device_id), gpu_hash_table_buff_.size());
1357  if (device_type == ExecutorDeviceType::CPU) {
1358  return reinterpret_cast<int64_t>(&(*cpu_hash_table_buff_)[0]);
1359  } else {
1360  return gpu_hash_table_buff_[device_id]
1361  ? reinterpret_cast<CUdeviceptr>(
1362  gpu_hash_table_buff_[device_id]->getMemoryPtr())
1363  : reinterpret_cast<CUdeviceptr>(nullptr);
1364  }
1365 #else
1366  CHECK(device_type == ExecutorDeviceType::CPU);
1367  return reinterpret_cast<int64_t>(&(*cpu_hash_table_buff_)[0]);
1368 #endif
1369 }
unsigned long long CUdeviceptr
Definition: nocuda.h:27
std::shared_ptr< std::vector< int32_t > > cpu_hash_table_buff_
CHECK(cgen_state)
#define CHECK_LT(x, y)
Definition: Logger.h:207

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t JoinHashTable::getJoinHashBufferSize ( const ExecutorDeviceType  device_type,
const int  device_id 
) const
overridevirtualnoexcept

Implements JoinHashTableInterface.

Definition at line 1371 of file JoinHashTable.cpp.

References CHECK(), CHECK_LT, CPU, and cpu_hash_table_buff_.

Referenced by toSet(), and toString().

1372  {
1373  if (device_type == ExecutorDeviceType::CPU && !cpu_hash_table_buff_) {
1374  return 0;
1375  }
1376 #ifdef HAVE_CUDA
1377  CHECK_LT(static_cast<size_t>(device_id), gpu_hash_table_buff_.size());
1378  if (device_type == ExecutorDeviceType::CPU) {
1379  return cpu_hash_table_buff_->size() *
1380  sizeof(decltype(cpu_hash_table_buff_)::element_type::value_type);
1381  } else {
1382  return gpu_hash_table_buff_[device_id]
1383  ? gpu_hash_table_buff_[device_id]->reservedSize()
1384  : 0;
1385  }
1386 #else
1387  CHECK(device_type == ExecutorDeviceType::CPU);
1388  return cpu_hash_table_buff_->size() *
1389  sizeof(decltype(cpu_hash_table_buff_)::element_type::value_type);
1390 #endif
1391 }
std::shared_ptr< std::vector< int32_t > > cpu_hash_table_buff_
CHECK(cgen_state)
#define CHECK_LT(x, y)
Definition: Logger.h:207

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Data_Namespace::MemoryLevel JoinHashTable::getMemoryLevel ( ) const
inlineoverridevirtualnoexcept

Implements JoinHashTableInterface.

Definition at line 92 of file JoinHashTable.h.

References memory_level_.

92  {
93  return memory_level_;
94  };
const Data_Namespace::MemoryLevel memory_level_
void JoinHashTable::initHashTableOnCpuFromCache ( const ChunkKey chunk_key,
const size_t  num_elements,
const std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > &  cols 
)
private

Definition at line 1132 of file JoinHashTable.cpp.

References col_range_, cpu_hash_table_buff_, cpu_hash_table_buff_mutex_, DEBUG_TIMER, join_hash_table_cache_, join_hash_table_cache_mutex_, and qual_bin_oper_.

Referenced by initOneToManyHashTable(), and initOneToOneHashTable().

1135  {
1136  auto timer = DEBUG_TIMER(__func__);
1137  const auto outer_col = dynamic_cast<const Analyzer::ColumnVar*>(cols.second);
1138  JoinHashTableCacheKey cache_key{col_range_,
1139  *cols.first,
1140  outer_col ? *outer_col : *cols.first,
1141  num_elements,
1142  chunk_key,
1143  qual_bin_oper_->get_optype()};
1144  std::lock_guard<std::mutex> join_hash_table_cache_lock(join_hash_table_cache_mutex_);
1145  for (const auto& kv : join_hash_table_cache_) {
1146  if (kv.first == cache_key) {
1147  std::lock_guard<std::mutex> cpu_hash_table_buff_lock(cpu_hash_table_buff_mutex_);
1148  cpu_hash_table_buff_ = kv.second;
1149  break;
1150  }
1151  }
1152 }
std::shared_ptr< std::vector< int32_t > > cpu_hash_table_buff_
static std::mutex join_hash_table_cache_mutex_
static std::vector< std::pair< JoinHashTableCacheKey, std::shared_ptr< std::vector< int32_t > > > > join_hash_table_cache_
ExpressionRange col_range_
std::mutex cpu_hash_table_buff_mutex_
#define DEBUG_TIMER(name)
Definition: Logger.h:313
std::shared_ptr< Analyzer::BinOper > qual_bin_oper_

+ Here is the caller graph for this function:

void JoinHashTable::initOneToManyHashTable ( const ChunkKey chunk_key,
const JoinColumn join_column,
const std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > &  cols,
const Data_Namespace::MemoryLevel  effective_memory_level,
const int  device_id 
)
private

Definition at line 1002 of file JoinHashTable.cpp.

References CudaAllocator::allocGpuAbstractBuffer(), CHECK(), CHECK_EQ, CHECK_GT, col_range_, copy_to_gpu(), cpu_hash_table_buff_, cpu_hash_table_buff_mutex_, Data_Namespace::CPU_LEVEL, DEBUG_TIMER, device_count_, executor_, fill_one_to_many_hash_table_on_device(), fill_one_to_many_hash_table_on_device_bucketized(), fill_one_to_many_hash_table_on_device_sharded(), anonymous_namespace{JoinHashTable.cpp}::get_bucketized_hash_entry_info(), get_join_column_type_kind(), get_shard_count(), ExpressionRange::getIntMax(), ExpressionRange::getIntMin(), Data_Namespace::GPU_LEVEL, hash_entry_count_, init_hash_join_buff_on_device(), initHashTableOnCpuFromCache(), initOneToManyHashTableOnCpu(), inline_fixed_encoding_null_val(), isBitwiseEq(), kDATE, memory_level_, JoinColumn::num_elems, putHashTableOnCpuToCache(), and qual_bin_oper_.

Referenced by reifyOneToManyForDevice().

1007  {
1008  auto timer = DEBUG_TIMER(__func__);
1009  auto const inner_col = cols.first;
1010  CHECK(inner_col);
1011 
1012  auto hash_entry_info = get_bucketized_hash_entry_info(
1013  inner_col->get_type_info(), col_range_, isBitwiseEq());
1014 
1015 #ifdef HAVE_CUDA
1016  const auto shard_count = get_shard_count(qual_bin_oper_.get(), executor_);
1017  const size_t entries_per_shard =
1018  (shard_count ? get_entries_per_shard(hash_entry_info.hash_entry_count, shard_count)
1019  : 0);
1020  // Even if we join on dictionary encoded strings, the memory on the GPU is still
1021  // needed once the join hash table has been built on the CPU.
1022  if (memory_level_ == Data_Namespace::GPU_LEVEL && shard_count) {
1023  const auto shards_per_device = (shard_count + device_count_ - 1) / device_count_;
1024  CHECK_GT(shards_per_device, 0u);
1025  hash_entry_info.hash_entry_count = entries_per_shard * shards_per_device;
1026  }
1027 #else
1028  CHECK_EQ(Data_Namespace::CPU_LEVEL, effective_memory_level);
1029 #endif
1030  if (!device_id) {
1031  hash_entry_count_ = hash_entry_info.getNormalizedHashEntryCount();
1032  }
1033 
1034 #ifdef HAVE_CUDA
1035  const auto& ti = inner_col->get_type_info();
1036  auto& data_mgr = executor_->getCatalog()->getDataMgr();
1038  const size_t total_count =
1039  2 * hash_entry_info.getNormalizedHashEntryCount() + join_column.num_elems;
1040  gpu_hash_table_buff_[device_id] = CudaAllocator::allocGpuAbstractBuffer(
1041  &data_mgr, total_count * sizeof(int32_t), device_id);
1042  }
1043 #endif
1044  const int32_t hash_join_invalid_val{-1};
1045  if (effective_memory_level == Data_Namespace::CPU_LEVEL) {
1046  initHashTableOnCpuFromCache(chunk_key, join_column.num_elems, cols);
1047  {
1048  std::lock_guard<std::mutex> cpu_hash_table_buff_lock(cpu_hash_table_buff_mutex_);
1050  join_column, cols, hash_entry_info, hash_join_invalid_val);
1051  }
1052  if (inner_col->get_table_id() > 0) {
1053  putHashTableOnCpuToCache(chunk_key, join_column.num_elems, cols);
1054  }
1055  // Transfer the hash table on the GPU if we've only built it on CPU
1056  // but the query runs on GPU (join on dictionary encoded columns).
1057  // Don't transfer the buffer if there was an error since we'll bail anyway.
1059 #ifdef HAVE_CUDA
1060  CHECK(ti.is_string());
1061  std::lock_guard<std::mutex> cpu_hash_table_buff_lock(cpu_hash_table_buff_mutex_);
1062  copy_to_gpu(
1063  &data_mgr,
1064  reinterpret_cast<CUdeviceptr>(gpu_hash_table_buff_[device_id]->getMemoryPtr()),
1065  &(*cpu_hash_table_buff_)[0],
1066  cpu_hash_table_buff_->size() * sizeof((*cpu_hash_table_buff_)[0]),
1067  device_id);
1068 #else
1069  CHECK(false);
1070 #endif
1071  }
1072  } else {
1073 #ifdef HAVE_CUDA
1074  CHECK_EQ(Data_Namespace::GPU_LEVEL, effective_memory_level);
1075  data_mgr.getCudaMgr()->setContext(device_id);
1077  reinterpret_cast<int32_t*>(gpu_hash_table_buff_[device_id]->getMemoryPtr()),
1078  hash_entry_info.getNormalizedHashEntryCount(),
1079  hash_join_invalid_val,
1080  executor_->blockSize(),
1081  executor_->gridSize());
1082  JoinColumnTypeInfo type_info{static_cast<size_t>(ti.get_size()),
1086  isBitwiseEq(),
1087  col_range_.getIntMax() + 1,
1089  auto use_bucketization = inner_col->get_type_info().get_type() == kDATE;
1090 
1091  if (shard_count) {
1092  CHECK_GT(device_count_, 0);
1093  for (size_t shard = device_id; shard < shard_count; shard += device_count_) {
1094  ShardInfo shard_info{shard, entries_per_shard, shard_count, device_count_};
1096  reinterpret_cast<int32_t*>(gpu_hash_table_buff_[device_id]->getMemoryPtr()),
1097  hash_entry_info,
1098  hash_join_invalid_val,
1099  join_column,
1100  type_info,
1101  shard_info,
1102  executor_->blockSize(),
1103  executor_->gridSize());
1104  }
1105  } else {
1106  if (use_bucketization) {
1108  reinterpret_cast<int32_t*>(gpu_hash_table_buff_[device_id]->getMemoryPtr()),
1109  hash_entry_info,
1110  hash_join_invalid_val,
1111  join_column,
1112  type_info,
1113  executor_->blockSize(),
1114  executor_->gridSize());
1115  } else {
1117  reinterpret_cast<int32_t*>(gpu_hash_table_buff_[device_id]->getMemoryPtr()),
1118  hash_entry_info,
1119  hash_join_invalid_val,
1120  join_column,
1121  type_info,
1122  executor_->blockSize(),
1123  executor_->gridSize());
1124  }
1125  }
1126 #else
1127  CHECK(false);
1128 #endif
1129  }
1130 }
void fill_one_to_many_hash_table_on_device(int32_t *buff, const HashEntryInfo hash_entry_info, const int32_t invalid_slot_val, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info, const size_t block_size_x, const size_t grid_size_x)
int64_t getIntMin() const
bool isBitwiseEq() const
#define CHECK_EQ(x, y)
Definition: Logger.h:205
size_t get_shard_count(const Analyzer::BinOper *join_condition, const Executor *executor)
const int device_count_
size_t num_elems
void initHashTableOnCpuFromCache(const ChunkKey &chunk_key, const size_t num_elements, const std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > &cols)
void fill_one_to_many_hash_table_on_device_sharded(int32_t *buff, const HashEntryInfo hash_entry_info, const int32_t invalid_slot_val, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info, const ShardInfo &shard_info, const size_t block_size_x, const size_t grid_size_x)
std::shared_ptr< std::vector< int32_t > > cpu_hash_table_buff_
void putHashTableOnCpuToCache(const ChunkKey &chunk_key, const size_t num_elements, const std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > &cols)
#define CHECK_GT(x, y)
Definition: Logger.h:209
CHECK(cgen_state)
void copy_to_gpu(Data_Namespace::DataMgr *data_mgr, CUdeviceptr dst, const void *src, const size_t num_bytes, const int device_id)
Definition: GpuMemUtils.cpp:31
void fill_one_to_many_hash_table_on_device_bucketized(int32_t *buff, const HashEntryInfo hash_entry_info, const int32_t invalid_slot_val, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info, const size_t block_size_x, const size_t grid_size_x)
void initOneToManyHashTableOnCpu(const JoinColumn &join_column, const std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > &cols, const HashEntryInfo hash_entry_info, const int32_t hash_join_invalid_val)
HashEntryInfo get_bucketized_hash_entry_info(SQLTypeInfo const &context_ti, ExpressionRange const &col_range, bool const is_bw_eq)
Definition: sqltypes.h:54
Executor * executor_
ExpressionRange col_range_
std::mutex cpu_hash_table_buff_mutex_
void init_hash_join_buff_on_device(int32_t *buff, const int32_t entry_count, const int32_t invalid_slot_val, const size_t block_size_x, const size_t grid_size_x)
static Data_Namespace::AbstractBuffer * allocGpuAbstractBuffer(Data_Namespace::DataMgr *data_mgr, const size_t num_bytes, const int device_id)
int64_t getIntMax() const
ColumnType get_join_column_type_kind(const SQLTypeInfo &ti)
#define DEBUG_TIMER(name)
Definition: Logger.h:313
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
std::shared_ptr< Analyzer::BinOper > qual_bin_oper_
const Data_Namespace::MemoryLevel memory_level_
size_t hash_entry_count_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void JoinHashTable::initOneToManyHashTableOnCpu ( const JoinColumn join_column,
const std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > &  cols,
const HashEntryInfo  hash_entry_info,
const int32_t  hash_join_invalid_val 
)
private

Definition at line 773 of file JoinHashTable.cpp.

References CHECK(), CHECK_EQ, col_range_, cpu_hash_table_buff_, cpu_threads(), DEBUG_TIMER, executor_, fill_one_to_many_hash_table(), fill_one_to_many_hash_table_bucketized(), get_join_column_type_kind(), ExpressionRange::getIntMax(), ExpressionRange::getIntMin(), HashEntryInfo::getNormalizedHashEntryCount(), init_hash_join_buff(), inline_fixed_encoding_null_val(), isBitwiseEq(), kDATE, kENCODING_DICT, and JoinColumn::num_elems.

Referenced by initOneToManyHashTable().

777  {
778  auto timer = DEBUG_TIMER(__func__);
779  const auto inner_col = cols.first;
780  CHECK(inner_col);
781  const auto& ti = inner_col->get_type_info();
782  if (cpu_hash_table_buff_) {
783  return;
784  }
785  cpu_hash_table_buff_ = std::make_shared<std::vector<int32_t>>(
786  2 * hash_entry_info.getNormalizedHashEntryCount() + join_column.num_elems);
787  const StringDictionaryProxy* sd_inner_proxy{nullptr};
788  const StringDictionaryProxy* sd_outer_proxy{nullptr};
789  if (ti.is_string()) {
790  CHECK_EQ(kENCODING_DICT, ti.get_compression());
791  sd_inner_proxy = executor_->getStringDictionaryProxy(
792  inner_col->get_comp_param(), executor_->row_set_mem_owner_, true);
793  CHECK(sd_inner_proxy);
794  const auto outer_col = dynamic_cast<const Analyzer::ColumnVar*>(cols.second);
795  CHECK(outer_col);
796  sd_outer_proxy = executor_->getStringDictionaryProxy(
797  outer_col->get_comp_param(), executor_->row_set_mem_owner_, true);
798  CHECK(sd_outer_proxy);
799  }
800  int thread_count = cpu_threads();
801  std::vector<std::future<void>> init_threads;
802  for (int thread_idx = 0; thread_idx < thread_count; ++thread_idx) {
803  init_threads.emplace_back(std::async(std::launch::async,
805  &(*cpu_hash_table_buff_)[0],
806  hash_entry_info.getNormalizedHashEntryCount(),
807  hash_join_invalid_val,
808  thread_idx,
809  thread_count));
810  }
811  for (auto& child : init_threads) {
812  child.wait();
813  }
814  for (auto& child : init_threads) {
815  child.get();
816  }
817 
818  if (ti.get_type() == kDATE) {
820  hash_entry_info,
821  hash_join_invalid_val,
822  join_column,
823  {static_cast<size_t>(ti.get_size()),
827  isBitwiseEq(),
828  col_range_.getIntMax() + 1,
830  sd_inner_proxy,
831  sd_outer_proxy,
832  thread_count);
833  } else {
835  hash_entry_info,
836  hash_join_invalid_val,
837  join_column,
838  {static_cast<size_t>(ti.get_size()),
842  isBitwiseEq(),
843  col_range_.getIntMax() + 1,
845  sd_inner_proxy,
846  sd_outer_proxy,
847  thread_count);
848  }
849 }
int64_t getIntMin() const
bool isBitwiseEq() const
#define CHECK_EQ(x, y)
Definition: Logger.h:205
DEVICE void SUFFIX() init_hash_join_buff(int32_t *groups_buffer, const int32_t hash_entry_count, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
size_t num_elems
std::shared_ptr< std::vector< int32_t > > cpu_hash_table_buff_
void fill_one_to_many_hash_table_bucketized(int32_t *buff, const HashEntryInfo hash_entry_info, const int32_t invalid_slot_val, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info, const void *sd_inner_proxy, const void *sd_outer_proxy, const unsigned cpu_thread_count)
CHECK(cgen_state)
Definition: sqltypes.h:54
Executor * executor_
void fill_one_to_many_hash_table(int32_t *buff, const HashEntryInfo hash_entry_info, const int32_t invalid_slot_val, const JoinColumn &join_column, const JoinColumnTypeInfo &type_info, const void *sd_inner_proxy, const void *sd_outer_proxy, const unsigned cpu_thread_count)
ExpressionRange col_range_
size_t getNormalizedHashEntryCount() const
int64_t getIntMax() const
ColumnType get_join_column_type_kind(const SQLTypeInfo &ti)
#define DEBUG_TIMER(name)
Definition: Logger.h:313
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
int cpu_threads()
Definition: thread_count.h:25

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void JoinHashTable::initOneToOneHashTable ( const ChunkKey chunk_key,
const JoinColumn join_column,
const std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > &  cols,
const Data_Namespace::MemoryLevel  effective_memory_level,
const int  device_id 
)
private

Definition at line 863 of file JoinHashTable.cpp.

References CudaAllocator::allocGpuAbstractBuffer(), CHECK(), CHECK_EQ, CHECK_GT, col_range_, copy_from_gpu(), copy_to_gpu(), cpu_hash_table_buff_, cpu_hash_table_buff_mutex_, Data_Namespace::CPU_LEVEL, DEBUG_TIMER, device_count_, executor_, fill_hash_join_buff_on_device_bucketized(), fill_hash_join_buff_on_device_sharded_bucketized(), anonymous_namespace{JoinHashTable.cpp}::get_bucketized_hash_entry_info(), get_join_column_type_kind(), ExpressionRange::getIntMax(), ExpressionRange::getIntMin(), Data_Namespace::GPU_LEVEL, hash_entry_count_, init_hash_join_buff_on_device(), initHashTableOnCpuFromCache(), initOneToOneHashTableOnCpu(), inline_fixed_encoding_null_val(), isBitwiseEq(), memory_level_, JoinColumn::num_elems, putHashTableOnCpuToCache(), and shardCount().

Referenced by reifyOneToOneForDevice().

868  {
869  auto timer = DEBUG_TIMER(__func__);
870  const auto inner_col = cols.first;
871  CHECK(inner_col);
872 
873  auto hash_entry_info = get_bucketized_hash_entry_info(
874  inner_col->get_type_info(), col_range_, isBitwiseEq());
875  if (!hash_entry_info) {
876  return;
877  }
878 
879 #ifdef HAVE_CUDA
880  const auto shard_count = shardCount();
881  const size_t entries_per_shard{
882  shard_count ? get_entries_per_shard(hash_entry_info.hash_entry_count, shard_count)
883  : 0};
884  // Even if we join on dictionary encoded strings, the memory on the GPU is still
885  // needed once the join hash table has been built on the CPU.
886  const auto catalog = executor_->getCatalog();
888  auto& data_mgr = catalog->getDataMgr();
889  if (shard_count) {
890  const auto shards_per_device = (shard_count + device_count_ - 1) / device_count_;
891  CHECK_GT(shards_per_device, 0u);
892  hash_entry_info.hash_entry_count = entries_per_shard * shards_per_device;
893  }
894  gpu_hash_table_buff_[device_id] = CudaAllocator::allocGpuAbstractBuffer(
895  &data_mgr,
896  hash_entry_info.getNormalizedHashEntryCount() * sizeof(int32_t),
897  device_id);
898  }
899 #else
900  CHECK_EQ(Data_Namespace::CPU_LEVEL, effective_memory_level);
901 #endif
902  if (!device_id) {
903  hash_entry_count_ = hash_entry_info.getNormalizedHashEntryCount();
904  }
905 
906 #ifdef HAVE_CUDA
907  const auto& ti = inner_col->get_type_info();
908 #endif
909  const int32_t hash_join_invalid_val{-1};
910  if (effective_memory_level == Data_Namespace::CPU_LEVEL) {
911  CHECK(!chunk_key.empty());
912  initHashTableOnCpuFromCache(chunk_key, join_column.num_elems, cols);
913  {
914  std::lock_guard<std::mutex> cpu_hash_table_buff_lock(cpu_hash_table_buff_mutex_);
916  join_column, cols, hash_entry_info, hash_join_invalid_val);
917  }
918  if (inner_col->get_table_id() > 0) {
919  putHashTableOnCpuToCache(chunk_key, join_column.num_elems, cols);
920  }
921  // Transfer the hash table on the GPU if we've only built it on CPU
922  // but the query runs on GPU (join on dictionary encoded columns).
924 #ifdef HAVE_CUDA
925  CHECK(ti.is_string());
926  auto& data_mgr = catalog->getDataMgr();
927  std::lock_guard<std::mutex> cpu_hash_table_buff_lock(cpu_hash_table_buff_mutex_);
928 
929  copy_to_gpu(
930  &data_mgr,
931  reinterpret_cast<CUdeviceptr>(gpu_hash_table_buff_[device_id]->getMemoryPtr()),
932  &(*cpu_hash_table_buff_)[0],
933  cpu_hash_table_buff_->size() * sizeof((*cpu_hash_table_buff_)[0]),
934  device_id);
935 #else
936  CHECK(false);
937 #endif
938  }
939  } else {
940 #ifdef HAVE_CUDA
941  int err{0};
942  CHECK_EQ(Data_Namespace::GPU_LEVEL, effective_memory_level);
943  auto& data_mgr = catalog->getDataMgr();
944  gpu_hash_table_err_buff_[device_id] =
945  CudaAllocator::allocGpuAbstractBuffer(&data_mgr, sizeof(int), device_id);
946  auto dev_err_buff = reinterpret_cast<CUdeviceptr>(
947  gpu_hash_table_err_buff_[device_id]->getMemoryPtr());
948  copy_to_gpu(&data_mgr, dev_err_buff, &err, sizeof(err), device_id);
950  reinterpret_cast<int32_t*>(gpu_hash_table_buff_[device_id]->getMemoryPtr()),
951  hash_entry_info.getNormalizedHashEntryCount(),
952  hash_join_invalid_val,
953  executor_->blockSize(),
954  executor_->gridSize());
955  if (chunk_key.empty()) {
956  return;
957  }
958  JoinColumnTypeInfo type_info{static_cast<size_t>(ti.get_size()),
962  isBitwiseEq(),
963  col_range_.getIntMax() + 1,
965  if (shard_count) {
967  for (size_t shard = device_id; shard < shard_count; shard += device_count_) {
968  ShardInfo shard_info{shard, entries_per_shard, shard_count, device_count_};
970  reinterpret_cast<int32_t*>(gpu_hash_table_buff_[device_id]->getMemoryPtr()),
971  hash_join_invalid_val,
972  reinterpret_cast<int*>(dev_err_buff),
973  join_column,
974  type_info,
975  shard_info,
976  executor_->blockSize(),
977  executor_->gridSize(),
978  hash_entry_info.bucket_normalization);
979  }
980  } else {
982  reinterpret_cast<int32_t*>(gpu_hash_table_buff_[device_id]->getMemoryPtr()),
983  hash_join_invalid_val,
984  reinterpret_cast<int*>(dev_err_buff),
985  join_column,
986  type_info,
987  executor_->blockSize(),
988  executor_->gridSize(),
989  hash_entry_info.bucket_normalization);
990  }
991  copy_from_gpu(&data_mgr, &err, dev_err_buff, sizeof(err), device_id);
992 
993  if (err) {
994  throw NeedsOneToManyHash();
995  }
996 #else
997  CHECK(false);
998 #endif
999  }
1000 }
int64_t getIntMin() const
bool isBitwiseEq() const
#define CHECK_EQ(x, y)
Definition: Logger.h:205
const int device_count_
size_t num_elems
void initHashTableOnCpuFromCache(const ChunkKey &chunk_key, const size_t num_elements, const std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > &cols)
unsigned long long CUdeviceptr
Definition: nocuda.h:27
std::shared_ptr< std::vector< int32_t > > cpu_hash_table_buff_
void initOneToOneHashTableOnCpu(const JoinColumn &join_column, const std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > &cols, const HashEntryInfo hash_entry_info, const int32_t hash_join_invalid_val)
void putHashTableOnCpuToCache(const ChunkKey &chunk_key, const size_t num_elements, const std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > &cols)
#define CHECK_GT(x, y)
Definition: Logger.h:209
CHECK(cgen_state)
void copy_to_gpu(Data_Namespace::DataMgr *data_mgr, CUdeviceptr dst, const void *src, const size_t num_bytes, const int device_id)
Definition: GpuMemUtils.cpp:31
void copy_from_gpu(Data_Namespace::DataMgr *data_mgr, void *dst, const CUdeviceptr src, const size_t num_bytes, const int device_id)
HashEntryInfo get_bucketized_hash_entry_info(SQLTypeInfo const &context_ti, ExpressionRange const &col_range, bool const is_bw_eq)
size_t shardCount() const
Executor * executor_
ExpressionRange col_range_
std::mutex cpu_hash_table_buff_mutex_
void init_hash_join_buff_on_device(int32_t *buff, const int32_t entry_count, const int32_t invalid_slot_val, const size_t block_size_x, const size_t grid_size_x)
static Data_Namespace::AbstractBuffer * allocGpuAbstractBuffer(Data_Namespace::DataMgr *data_mgr, const size_t num_bytes, const int device_id)
int64_t getIntMax() const
ColumnType get_join_column_type_kind(const SQLTypeInfo &ti)
#define DEBUG_TIMER(name)
Definition: Logger.h:313
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
const Data_Namespace::MemoryLevel memory_level_
size_t hash_entry_count_
void fill_hash_join_buff_on_device_sharded_bucketized(int32_t *buff, const int32_t invalid_slot_val, int *dev_err_buff, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const ShardInfo shard_info, const size_t block_size_x, const size_t grid_size_x, const int64_t bucket_normalization)
void fill_hash_join_buff_on_device_bucketized(int32_t *buff, const int32_t invalid_slot_val, int *dev_err_buff, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const size_t block_size_x, const size_t grid_size_x, const int64_t bucket_normalization)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void JoinHashTable::initOneToOneHashTableOnCpu ( const JoinColumn join_column,
const std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > &  cols,
const HashEntryInfo  hash_entry_info,
const int32_t  hash_join_invalid_val 
)
private

Definition at line 685 of file JoinHashTable.cpp.

References HashEntryInfo::bucket_normalization, CHECK(), CHECK_EQ, col_range_, cpu_hash_table_buff_, cpu_threads(), DEBUG_TIMER, executor_, fill_hash_join_buff_bucketized(), get_join_column_type_kind(), ExpressionRange::getIntMax(), ExpressionRange::getIntMin(), HashEntryInfo::getNormalizedHashEntryCount(), init_hash_join_buff(), inline_fixed_encoding_null_val(), isBitwiseEq(), and kENCODING_DICT.

Referenced by initOneToOneHashTable().

689  {
690  auto timer = DEBUG_TIMER(__func__);
691  const auto inner_col = cols.first;
692  CHECK(inner_col);
693  const auto& ti = inner_col->get_type_info();
694  if (!cpu_hash_table_buff_) {
695  cpu_hash_table_buff_ = std::make_shared<std::vector<int32_t>>(
696  hash_entry_info.getNormalizedHashEntryCount());
697  const StringDictionaryProxy* sd_inner_proxy{nullptr};
698  const StringDictionaryProxy* sd_outer_proxy{nullptr};
699  if (ti.is_string()) {
700  CHECK_EQ(kENCODING_DICT, ti.get_compression());
701  sd_inner_proxy = executor_->getStringDictionaryProxy(
702  inner_col->get_comp_param(), executor_->row_set_mem_owner_, true);
703  CHECK(sd_inner_proxy);
704  const auto outer_col = dynamic_cast<const Analyzer::ColumnVar*>(cols.second);
705  CHECK(outer_col);
706  sd_outer_proxy = executor_->getStringDictionaryProxy(
707  outer_col->get_comp_param(), executor_->row_set_mem_owner_, true);
708  CHECK(sd_outer_proxy);
709  }
710  int thread_count = cpu_threads();
711  std::vector<std::thread> init_cpu_buff_threads;
712  for (int thread_idx = 0; thread_idx < thread_count; ++thread_idx) {
713  init_cpu_buff_threads.emplace_back(
714  [this, hash_entry_info, hash_join_invalid_val, thread_idx, thread_count] {
716  hash_entry_info.getNormalizedHashEntryCount(),
717  hash_join_invalid_val,
718  thread_idx,
719  thread_count);
720  });
721  }
722  for (auto& t : init_cpu_buff_threads) {
723  t.join();
724  }
725  init_cpu_buff_threads.clear();
726  int err{0};
727  for (int thread_idx = 0; thread_idx < thread_count; ++thread_idx) {
728  init_cpu_buff_threads.emplace_back([this,
729  hash_join_invalid_val,
730  &join_column,
731  sd_inner_proxy,
732  sd_outer_proxy,
733  thread_idx,
734  thread_count,
735  &ti,
736  &err,
737  hash_entry_info] {
738  int partial_err =
740  hash_join_invalid_val,
741  join_column,
742  {static_cast<size_t>(ti.get_size()),
746  isBitwiseEq(),
747  col_range_.getIntMax() + 1,
749  sd_inner_proxy,
750  sd_outer_proxy,
751  thread_idx,
752  thread_count,
753  hash_entry_info.bucket_normalization);
754  __sync_val_compare_and_swap(&err, 0, partial_err);
755  });
756  }
757  for (auto& t : init_cpu_buff_threads) {
758  t.join();
759  }
760  if (err) {
761  cpu_hash_table_buff_.reset();
762  // Too many hash entries, need to retry with a 1:many table
763  throw NeedsOneToManyHash();
764  }
765  } else {
766  if (cpu_hash_table_buff_->size() > hash_entry_info.getNormalizedHashEntryCount()) {
767  // Too many hash entries, need to retry with a 1:many table
768  throw NeedsOneToManyHash();
769  }
770  }
771 }
int64_t getIntMin() const
bool isBitwiseEq() const
#define CHECK_EQ(x, y)
Definition: Logger.h:205
DEVICE void SUFFIX() init_hash_join_buff(int32_t *groups_buffer, const int32_t hash_entry_count, const int32_t invalid_slot_val, const int32_t cpu_thread_idx, const int32_t cpu_thread_count)
std::shared_ptr< std::vector< int32_t > > cpu_hash_table_buff_
CHECK(cgen_state)
int64_t bucket_normalization
Executor * executor_
ExpressionRange col_range_
size_t getNormalizedHashEntryCount() const
int64_t getIntMax() const
ColumnType get_join_column_type_kind(const SQLTypeInfo &ti)
#define DEBUG_TIMER(name)
Definition: Logger.h:313
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
DEVICE int SUFFIX() fill_hash_join_buff_bucketized(int32_t *buff, const int32_t invalid_slot_val, const JoinColumn join_column, const JoinColumnTypeInfo type_info, const void *sd_inner_proxy, const void *sd_outer_proxy, const int32_t cpu_thread_idx, const int32_t cpu_thread_count, const int64_t bucket_normalization)
int cpu_threads()
Definition: thread_count.h:25

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool JoinHashTable::isBitwiseEq ( ) const
private

Definition at line 1524 of file JoinHashTable.cpp.

References kBW_EQ, and qual_bin_oper_.

Referenced by codegenMatchingSet(), codegenSlot(), getHashJoinArgs(), initOneToManyHashTable(), initOneToManyHashTableOnCpu(), initOneToOneHashTable(), and initOneToOneHashTableOnCpu().

1524  {
1525  return qual_bin_oper_->get_optype() == kBW_EQ;
1526 }
Definition: sqldefs.h:31
std::shared_ptr< Analyzer::BinOper > qual_bin_oper_

+ Here is the caller graph for this function:

size_t JoinHashTable::offsetBufferOff ( ) const
overridevirtualnoexcept

Implements JoinHashTableInterface.

Definition at line 1330 of file JoinHashTable.cpp.

Referenced by toSet(), and toString().

1330  {
1331  return 0;
1332 }

+ Here is the caller graph for this function:

size_t JoinHashTable::payloadBufferOff ( ) const
overridevirtualnoexcept

Implements JoinHashTableInterface.

Definition at line 1338 of file JoinHashTable.cpp.

References getComponentBufferSize().

Referenced by toSet(), and toString().

1338  {
1339  return 2 * getComponentBufferSize();
1340 }
size_t getComponentBufferSize() const noexcept

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void JoinHashTable::putHashTableOnCpuToCache ( const ChunkKey chunk_key,
const size_t  num_elements,
const std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > &  cols 
)
private

Definition at line 1154 of file JoinHashTable.cpp.

References col_range_, cpu_hash_table_buff_, join_hash_table_cache_, join_hash_table_cache_mutex_, and qual_bin_oper_.

Referenced by initOneToManyHashTable(), and initOneToOneHashTable().

1157  {
1158  const auto outer_col = dynamic_cast<const Analyzer::ColumnVar*>(cols.second);
1159  JoinHashTableCacheKey cache_key{col_range_,
1160  *cols.first,
1161  outer_col ? *outer_col : *cols.first,
1162  num_elements,
1163  chunk_key,
1164  qual_bin_oper_->get_optype()};
1165  std::lock_guard<std::mutex> join_hash_table_cache_lock(join_hash_table_cache_mutex_);
1166  for (const auto& kv : join_hash_table_cache_) {
1167  if (kv.first == cache_key) {
1168  return;
1169  }
1170  }
1171  join_hash_table_cache_.emplace_back(cache_key, cpu_hash_table_buff_);
1172 }
std::shared_ptr< std::vector< int32_t > > cpu_hash_table_buff_
static std::mutex join_hash_table_cache_mutex_
static std::vector< std::pair< JoinHashTableCacheKey, std::shared_ptr< std::vector< int32_t > > > > join_hash_table_cache_
ExpressionRange col_range_
std::shared_ptr< Analyzer::BinOper > qual_bin_oper_

+ Here is the caller graph for this function:

void JoinHashTable::reify ( )
private

Definition at line 466 of file JoinHashTable.cpp.

References CHECK_LT, checkHashJoinReplicationConstraint(), DEBUG_TIMER, device_count_, executor_, freeHashBufferMemory(), anonymous_namespace{JoinHashTable.cpp}::get_cols(), getInnerQueryInfo(), hash_type_, InputTableInfo::info, JoinHashTableInterface::OneToMany, JoinHashTableInterface::OneToOne, only_shards_for_device(), qual_bin_oper_, reifyOneToManyForDevice(), reifyOneToOneForDevice(), shardCount(), and logger::thread_id().

466  {
467  auto timer = DEBUG_TIMER(__func__);
469  const auto& catalog = *executor_->getCatalog();
470  const auto cols = get_cols(qual_bin_oper_.get(), catalog, executor_->temporary_tables_);
471  const auto inner_col = cols.first;
472  checkHashJoinReplicationConstraint(inner_col->get_table_id());
473  const auto& query_info = getInnerQueryInfo(inner_col).info;
474  if (query_info.fragments.empty()) {
475  return;
476  }
477  if (query_info.getNumTuplesUpperBound() >
478  static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
479  throw TooManyHashEntries();
480  }
481 #ifdef HAVE_CUDA
482  gpu_hash_table_buff_.resize(device_count_);
483  gpu_hash_table_err_buff_.resize(device_count_);
484 #endif // HAVE_CUDA
485  std::vector<std::future<void>> init_threads;
486  const int shard_count = shardCount();
487 
488  try {
489  for (int device_id = 0; device_id < device_count_; ++device_id) {
490  const auto fragments =
491  shard_count
492  ? only_shards_for_device(query_info.fragments, device_id, device_count_)
493  : query_info.fragments;
494  init_threads.push_back(
495  std::async(std::launch::async,
499  this,
500  fragments,
501  device_id,
502  logger::thread_id()));
503  }
504  for (auto& init_thread : init_threads) {
505  init_thread.wait();
506  }
507  for (auto& init_thread : init_threads) {
508  init_thread.get();
509  }
510 
511  } catch (const NeedsOneToManyHash& e) {
514  init_threads.clear();
515  for (int device_id = 0; device_id < device_count_; ++device_id) {
516  const auto fragments =
517  shard_count
518  ? only_shards_for_device(query_info.fragments, device_id, device_count_)
519  : query_info.fragments;
520 
521  init_threads.push_back(std::async(std::launch::async,
522  &JoinHashTable::reifyOneToManyForDevice,
523  this,
524  fragments,
525  device_id,
526  logger::thread_id()));
527  }
528  for (auto& init_thread : init_threads) {
529  init_thread.wait();
530  }
531  for (auto& init_thread : init_threads) {
532  init_thread.get();
533  }
534  }
535 }
std::deque< Fragmenter_Namespace::FragmentInfo > only_shards_for_device(const std::deque< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, const int device_count)
Fragmenter_Namespace::TableInfo info
Definition: InputMetadata.h:35
const int device_count_
void freeHashBufferMemory()
HashType hash_type_
void checkHashJoinReplicationConstraint(const int table_id) const
std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > get_cols(const Analyzer::BinOper *qual_bin_oper, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables)
#define CHECK_LT(x, y)
Definition: Logger.h:207
size_t shardCount() const
Executor * executor_
ThreadId thread_id()
Definition: Logger.cpp:715
#define DEBUG_TIMER(name)
Definition: Logger.h:313
std::shared_ptr< Analyzer::BinOper > qual_bin_oper_
void reifyOneToOneForDevice(const std::deque< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, const logger::ThreadId parent_thread_id)
void reifyOneToManyForDevice(const std::deque< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, const logger::ThreadId parent_thread_id)
const InputTableInfo & getInnerQueryInfo(const Analyzer::ColumnVar *inner_col) const

+ Here is the call graph for this function:

void JoinHashTable::reifyOneToManyForDevice ( const std::deque< Fragmenter_Namespace::FragmentInfo > &  fragments,
const int  device_id,
const logger::ThreadId  parent_thread_id 
)
private

Definition at line 617 of file JoinHashTable.cpp.

References CHECK(), column_cache_, Data_Namespace::CPU_LEVEL, DEBUG_TIMER_NEW_THREAD, executor_, JoinHashTableInterface::fetchJoinColumn(), genHashTableKey(), anonymous_namespace{JoinHashTable.cpp}::get_cols(), get_column_descriptor_maybe(), initOneToManyHashTable(), memory_level_, needs_dictionary_translation(), and qual_bin_oper_.

Referenced by reify().

620  {
621  DEBUG_TIMER_NEW_THREAD(parent_thread_id);
622  const auto& catalog = *executor_->getCatalog();
623  auto& data_mgr = catalog.getDataMgr();
624  const auto cols = get_cols(qual_bin_oper_.get(), catalog, executor_->temporary_tables_);
625  const auto inner_col = cols.first;
626  CHECK(inner_col);
627  const auto inner_cd = get_column_descriptor_maybe(
628  inner_col->get_column_id(), inner_col->get_table_id(), catalog);
629  if (inner_cd && inner_cd->isVirtualCol) {
631  }
632  CHECK(!inner_cd || !(inner_cd->isVirtualCol));
633  // Since we don't have the string dictionary payloads on the GPU, we'll build
634  // the join hash table on the CPU and transfer it to the GPU.
635  const auto effective_memory_level =
636  needs_dictionary_translation(inner_col, cols.second, executor_)
638  : memory_level_;
639  if (fragments.empty()) {
640  ChunkKey empty_chunk;
641  initOneToManyHashTable(empty_chunk,
642  JoinColumn{nullptr, 0, 0, 0, 0},
643  cols,
644  effective_memory_level,
645  device_id);
646  return;
647  }
648 
649  std::vector<std::shared_ptr<Chunk_NS::Chunk>> chunks_owner;
650  ThrustAllocator dev_buff_owner(&data_mgr, device_id);
651  std::vector<std::shared_ptr<void>> malloc_owner;
652 
653  JoinColumn join_column = fetchJoinColumn(inner_col,
654  fragments,
655  effective_memory_level,
656  device_id,
657  chunks_owner,
658  dev_buff_owner,
659  malloc_owner,
660  executor_,
661  &column_cache_);
662 
663  initOneToManyHashTable(genHashTableKey(fragments, cols.second, inner_col),
664  join_column,
665  cols,
666  effective_memory_level,
667  device_id);
668 }
void initOneToManyHashTable(const ChunkKey &chunk_key, const JoinColumn &join_column, const std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > &cols, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id)
std::vector< int > ChunkKey
Definition: types.h:35
bool needs_dictionary_translation(const Analyzer::ColumnVar *inner_col, const Analyzer::Expr *outer_col_expr, const Executor *executor)
#define DEBUG_TIMER_NEW_THREAD(parent_thread_id)
Definition: Logger.h:315
ColumnCacheMap & column_cache_
std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > get_cols(const Analyzer::BinOper *qual_bin_oper, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables)
CHECK(cgen_state)
const ColumnDescriptor * get_column_descriptor_maybe(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
Definition: Execute.h:168
Executor * executor_
JoinColumn fetchJoinColumn(const Analyzer::ColumnVar *hash_col, const std::deque< Fragmenter_Namespace::FragmentInfo > &fragment_info, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks_owner, ThrustAllocator &dev_buff_owner, std::vector< std::shared_ptr< void >> &malloc_owner, Executor *executor, ColumnCacheMap *column_cache)
std::shared_ptr< Analyzer::BinOper > qual_bin_oper_
const Data_Namespace::MemoryLevel memory_level_
ChunkKey genHashTableKey(const std::deque< Fragmenter_Namespace::FragmentInfo > &fragments, const Analyzer::Expr *outer_col, const Analyzer::ColumnVar *inner_col) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void JoinHashTable::reifyOneToOneForDevice ( const std::deque< Fragmenter_Namespace::FragmentInfo > &  fragments,
const int  device_id,
const logger::ThreadId  parent_thread_id 
)
private

Definition at line 562 of file JoinHashTable.cpp.

References CHECK(), column_cache_, Data_Namespace::CPU_LEVEL, DEBUG_TIMER_NEW_THREAD, executor_, JoinHashTableInterface::fetchJoinColumn(), genHashTableKey(), anonymous_namespace{JoinHashTable.cpp}::get_cols(), get_column_descriptor_maybe(), initOneToOneHashTable(), memory_level_, needs_dictionary_translation(), and qual_bin_oper_.

Referenced by reify().

565  {
566  DEBUG_TIMER_NEW_THREAD(parent_thread_id);
567  const auto& catalog = *executor_->getCatalog();
568  auto& data_mgr = catalog.getDataMgr();
569  const auto cols = get_cols(qual_bin_oper_.get(), catalog, executor_->temporary_tables_);
570  const auto inner_col = cols.first;
571  CHECK(inner_col);
572  const auto inner_cd = get_column_descriptor_maybe(
573  inner_col->get_column_id(), inner_col->get_table_id(), catalog);
574  if (inner_cd && inner_cd->isVirtualCol) {
576  }
577  CHECK(!inner_cd || !(inner_cd->isVirtualCol));
578  // Since we don't have the string dictionary payloads on the GPU, we'll build
579  // the join hash table on the CPU and transfer it to the GPU.
580  const auto effective_memory_level =
581  needs_dictionary_translation(inner_col, cols.second, executor_)
583  : memory_level_;
584  if (fragments.empty()) {
585  // No data in this fragment. Still need to create a hash table and initialize it
586  // properly.
587  ChunkKey empty_chunk;
588  initOneToOneHashTable(empty_chunk,
589  JoinColumn{nullptr, 0, 0, 0, 0},
590  cols,
591  effective_memory_level,
592  device_id);
593  return;
594  }
595 
596  std::vector<std::shared_ptr<Chunk_NS::Chunk>> chunks_owner;
597  ThrustAllocator dev_buff_owner(&data_mgr, device_id);
598  std::vector<std::shared_ptr<void>> malloc_owner;
599 
600  JoinColumn join_column = fetchJoinColumn(inner_col,
601  fragments,
602  effective_memory_level,
603  device_id,
604  chunks_owner,
605  dev_buff_owner,
606  malloc_owner,
607  executor_,
608  &column_cache_);
609 
610  initOneToOneHashTable(genHashTableKey(fragments, cols.second, inner_col),
611  join_column,
612  cols,
613  effective_memory_level,
614  device_id);
615 }
std::vector< int > ChunkKey
Definition: types.h:35
bool needs_dictionary_translation(const Analyzer::ColumnVar *inner_col, const Analyzer::Expr *outer_col_expr, const Executor *executor)
#define DEBUG_TIMER_NEW_THREAD(parent_thread_id)
Definition: Logger.h:315
ColumnCacheMap & column_cache_
std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > get_cols(const Analyzer::BinOper *qual_bin_oper, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables)
CHECK(cgen_state)
const ColumnDescriptor * get_column_descriptor_maybe(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
Definition: Execute.h:168
Executor * executor_
JoinColumn fetchJoinColumn(const Analyzer::ColumnVar *hash_col, const std::deque< Fragmenter_Namespace::FragmentInfo > &fragment_info, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks_owner, ThrustAllocator &dev_buff_owner, std::vector< std::shared_ptr< void >> &malloc_owner, Executor *executor, ColumnCacheMap *column_cache)
std::shared_ptr< Analyzer::BinOper > qual_bin_oper_
void initOneToOneHashTable(const ChunkKey &chunk_key, const JoinColumn &join_column, const std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > &cols, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id)
const Data_Namespace::MemoryLevel memory_level_
ChunkKey genHashTableKey(const std::deque< Fragmenter_Namespace::FragmentInfo > &fragments, const Analyzer::Expr *outer_col, const Analyzer::ColumnVar *inner_col) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t JoinHashTable::shardCount ( ) const
private

Definition at line 1518 of file JoinHashTable.cpp.

References executor_, get_shard_count(), Data_Namespace::GPU_LEVEL, memory_level_, and qual_bin_oper_.

Referenced by codegenMatchingSet(), codegenSlot(), initOneToOneHashTable(), and reify().

1518  {
1521  : 0;
1522 }
size_t get_shard_count(const Analyzer::BinOper *join_condition, const Executor *executor)
Executor * executor_
std::shared_ptr< Analyzer::BinOper > qual_bin_oper_
const Data_Namespace::MemoryLevel memory_level_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::set< DecodedJoinHashBufferEntry > JoinHashTable::toSet ( const ExecutorDeviceType  device_type,
const int  device_id 
) const
overridevirtual

Implements JoinHashTableInterface.

Definition at line 1420 of file JoinHashTable.cpp.

References copy_from_gpu(), countBufferOff(), executor_, getJoinHashBuffer(), getJoinHashBufferSize(), GPU, hash_entry_count_, offsetBufferOff(), payloadBufferOff(), and JoinHashTableInterface::toSet().

1422  {
1423  auto buffer = getJoinHashBuffer(device_type, device_id);
1424  auto buffer_size = getJoinHashBufferSize(device_type, device_id);
1425 #ifdef HAVE_CUDA
1426  std::unique_ptr<int8_t[]> buffer_copy;
1427  if (device_type == ExecutorDeviceType::GPU) {
1428  buffer_copy = std::make_unique<int8_t[]>(buffer_size);
1429 
1430  copy_from_gpu(&executor_->getCatalog()->getDataMgr(),
1431  buffer_copy.get(),
1432  reinterpret_cast<CUdeviceptr>(reinterpret_cast<int8_t*>(buffer)),
1433  buffer_size,
1434  device_id);
1435  }
1436  auto ptr1 = buffer_copy ? buffer_copy.get() : reinterpret_cast<const int8_t*>(buffer);
1437 #else
1438  auto ptr1 = reinterpret_cast<const int8_t*>(buffer);
1439 #endif // HAVE_CUDA
1440  auto ptr2 = ptr1 + offsetBufferOff();
1441  auto ptr3 = ptr1 + countBufferOff();
1442  auto ptr4 = ptr1 + payloadBufferOff();
1444  0, 0, hash_entry_count_, ptr1, ptr2, ptr3, ptr4, buffer_size);
1445 }
size_t payloadBufferOff() const noexceptoverride
unsigned long long CUdeviceptr
Definition: nocuda.h:27
size_t getJoinHashBufferSize(const ExecutorDeviceType device_type, const int device_id) const noexceptoverride
size_t offsetBufferOff() const noexceptoverride
void copy_from_gpu(Data_Namespace::DataMgr *data_mgr, void *dst, const CUdeviceptr src, const size_t num_bytes, const int device_id)
int64_t getJoinHashBuffer(const ExecutorDeviceType device_type, const int device_id) const noexceptoverride
Executor * executor_
virtual DecodedJoinHashBufferSet toSet(const ExecutorDeviceType device_type, const int device_id) const =0
size_t countBufferOff() const noexceptoverride
size_t hash_entry_count_

+ Here is the call graph for this function:

std::string JoinHashTable::toString ( const ExecutorDeviceType  device_type,
const int  device_id = 0,
bool  raw = false 
) const
overridevirtual

Implements JoinHashTableInterface.

Definition at line 1393 of file JoinHashTable.cpp.

References copy_from_gpu(), countBufferOff(), executor_, getJoinHashBuffer(), getJoinHashBufferSize(), GPU, hash_entry_count_, offsetBufferOff(), payloadBufferOff(), and JoinHashTableInterface::toString().

1395  {
1396  auto buffer = getJoinHashBuffer(device_type, device_id);
1397  auto buffer_size = getJoinHashBufferSize(device_type, device_id);
1398 #ifdef HAVE_CUDA
1399  std::unique_ptr<int8_t[]> buffer_copy;
1400  if (device_type == ExecutorDeviceType::GPU) {
1401  buffer_copy = std::make_unique<int8_t[]>(buffer_size);
1402 
1403  copy_from_gpu(&executor_->getCatalog()->getDataMgr(),
1404  buffer_copy.get(),
1405  reinterpret_cast<CUdeviceptr>(reinterpret_cast<int8_t*>(buffer)),
1406  buffer_size,
1407  device_id);
1408  }
1409  auto ptr1 = buffer_copy ? buffer_copy.get() : reinterpret_cast<const int8_t*>(buffer);
1410 #else
1411  auto ptr1 = reinterpret_cast<const int8_t*>(buffer);
1412 #endif // HAVE_CUDA
1413  auto ptr2 = ptr1 + offsetBufferOff();
1414  auto ptr3 = ptr1 + countBufferOff();
1415  auto ptr4 = ptr1 + payloadBufferOff();
1417  "perfect", 0, 0, hash_entry_count_, ptr1, ptr2, ptr3, ptr4, buffer_size, raw);
1418 }
size_t payloadBufferOff() const noexceptoverride
unsigned long long CUdeviceptr
Definition: nocuda.h:27
size_t getJoinHashBufferSize(const ExecutorDeviceType device_type, const int device_id) const noexceptoverride
size_t offsetBufferOff() const noexceptoverride
void copy_from_gpu(Data_Namespace::DataMgr *data_mgr, void *dst, const CUdeviceptr src, const size_t num_bytes, const int device_id)
int64_t getJoinHashBuffer(const ExecutorDeviceType device_type, const int device_id) const noexceptoverride
Executor * executor_
size_t countBufferOff() const noexceptoverride
virtual std::string toString(const ExecutorDeviceType device_type, const int device_id=0, bool raw=false) const =0
size_t hash_entry_count_

+ Here is the call graph for this function:

static auto JoinHashTable::yieldCacheInvalidator ( ) -> std::function<void()>
inlinestatic

Definition at line 115 of file JoinHashTable.h.

References join_hash_table_cache_, and join_hash_table_cache_mutex_.

115  {
116  return []() -> void {
117  std::lock_guard<std::mutex> guard(join_hash_table_cache_mutex_);
118  join_hash_table_cache_.clear();
119  };
120  }
static std::mutex join_hash_table_cache_mutex_
static std::vector< std::pair< JoinHashTableCacheKey, std::shared_ptr< std::vector< int32_t > > > > join_hash_table_cache_

Member Data Documentation

std::shared_ptr<Analyzer::ColumnVar> JoinHashTable::col_var_
private

Definition at line 214 of file JoinHashTable.h.

Referenced by getInnerTableId(), and getInnerTableRteIdx().

ColumnCacheMap& JoinHashTable::column_cache_
private

Definition at line 227 of file JoinHashTable.h.

Referenced by reifyOneToManyForDevice(), and reifyOneToOneForDevice().

std::mutex JoinHashTable::cpu_hash_table_buff_mutex_
private
const int JoinHashTable::device_count_
private
size_t JoinHashTable::hash_entry_count_
private
HashType JoinHashTable::hash_type_
private

Definition at line 217 of file JoinHashTable.h.

Referenced by getComponentBufferSize(), getHashType(), and reify().

std::vector< std::pair< JoinHashTable::JoinHashTableCacheKey, std::shared_ptr< std::vector< int32_t > > > > JoinHashTable::join_hash_table_cache_
staticprivate
std::mutex JoinHashTable::join_hash_table_cache_mutex_
staticprivate
const std::vector<InputTableInfo>& JoinHashTable::query_infos_
private

Definition at line 215 of file JoinHashTable.h.

Referenced by getInnerQueryInfo().


The documentation for this class was generated from the following files: