OmniSciDB  94e8789169
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
HashJoin Class Referenceabstract

#include <HashJoin.h>

+ Inheritance diagram for HashJoin:

Public Member Functions

virtual std::string toString (const ExecutorDeviceType device_type, const int device_id=0, bool raw=false) const =0
 
virtual std::string toStringFlat64 (const ExecutorDeviceType device_type, const int device_id) const
 
virtual std::string toStringFlat32 (const ExecutorDeviceType device_type, const int device_id) const
 
virtual DecodedJoinHashBufferSet toSet (const ExecutorDeviceType device_type, const int device_id) const =0
 
virtual llvm::Value * codegenSlot (const CompilationOptions &, const size_t)=0
 
virtual HashJoinMatchingSet codegenMatchingSet (const CompilationOptions &, const size_t)=0
 
virtual int getInnerTableId () const noexcept=0
 
virtual int getInnerTableRteIdx () const noexcept=0
 
virtual HashType getHashType () const noexcept=0
 
virtual Data_Namespace::MemoryLevel getMemoryLevel () const noexcept=0
 
virtual int getDeviceCount () const noexcept=0
 
virtual size_t offsetBufferOff () const noexcept=0
 
virtual size_t countBufferOff () const noexcept=0
 
virtual size_t payloadBufferOff () const noexcept=0
 
virtual std::string getHashJoinType () const =0
 
JoinColumn fetchJoinColumn (const Analyzer::ColumnVar *hash_col, const std::vector< Fragmenter_Namespace::FragmentInfo > &fragment_info, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks_owner, DeviceAllocator *dev_buff_owner, std::vector< std::shared_ptr< void >> &malloc_owner, Executor *executor, ColumnCacheMap *column_cache)
 
HashTablegetHashTableForDevice (const size_t device_id) const
 
size_t getJoinHashBufferSize (const ExecutorDeviceType device_type)
 
size_t getJoinHashBufferSize (const ExecutorDeviceType device_type, const int device_id) const
 
int64_t getJoinHashBuffer (const ExecutorDeviceType device_type, const int device_id) const
 
void freeHashBufferMemory ()
 

Static Public Member Functions

static bool layoutRequiresAdditionalBuffers (HashType layout) noexcept
 
static std::string getHashTypeString (HashType ht) noexcept
 
static HashJoinMatchingSet codegenMatchingSet (const std::vector< llvm::Value * > &hash_join_idx_args_in, const bool is_sharded, const bool col_is_nullable, const bool is_bw_eq, const int64_t sub_buff_size, Executor *executor, const bool is_bucketized=false)
 
static llvm::Value * codegenHashTableLoad (const size_t table_idx, Executor *executor)
 
static std::shared_ptr< HashJoingetInstance (const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const QueryHint &query_hint)
 Make hash table from an in-flight SQL query's parse tree etc. More...
 
static std::shared_ptr< HashJoingetSyntheticInstance (std::string_view table1, std::string_view column1, std::string_view table2, std::string_view column2, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
 Make hash table from named tables and columns (such as for testing). More...
 
static std::shared_ptr< HashJoingetSyntheticInstance (const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
 Make hash table from named tables and columns (such as for testing). More...
 
static int getInnerTableId (const std::vector< InnerOuter > &inner_outer_pairs)
 
static void checkHashJoinReplicationConstraint (const int table_id, const size_t shard_count, const Executor *executor)
 
static CompositeKeyInfo getCompositeKeyInfo (const std::vector< InnerOuter > &inner_outer_pairs, const Executor *executor)
 

Protected Member Functions

virtual size_t getComponentBufferSize () const noexcept=0
 

Protected Attributes

std::vector< std::shared_ptr
< HashTable > > 
hash_tables_for_device_
 

Detailed Description

Definition at line 95 of file HashJoin.h.

Member Function Documentation

void HashJoin::checkHashJoinReplicationConstraint ( const int  table_id,
const size_t  shard_count,
const Executor executor 
)
static

Definition at line 524 of file HashJoin.cpp.

References CHECK, g_cluster, and table_is_replicated().

Referenced by PerfectJoinHashTable::reify(), and BaselineJoinHashTable::reify().

526  {
527  if (!g_cluster) {
528  return;
529  }
530  if (table_id >= 0) {
531  CHECK(executor);
532  const auto inner_td = executor->getCatalog()->getMetadataForTable(table_id);
533  CHECK(inner_td);
534  if (!shard_count && !table_is_replicated(inner_td)) {
535  throw TableMustBeReplicated(inner_td->tableName);
536  }
537  }
538 }
bool table_is_replicated(const TableDescriptor *td)
#define CHECK(condition)
Definition: Logger.h:197
bool g_cluster

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * HashJoin::codegenHashTableLoad ( const size_t  table_idx,
Executor executor 
)
static

Definition at line 214 of file HashJoin.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CHECK_LT, and get_arg_by_name().

Referenced by PerfectJoinHashTable::codegenHashTableLoad(), BaselineJoinHashTable::codegenMatchingSet(), OverlapsJoinHashTable::codegenMatchingSet(), and BaselineJoinHashTable::hashPtr().

214  {
215  AUTOMATIC_IR_METADATA(executor->cgen_state_.get());
216  llvm::Value* hash_ptr = nullptr;
217  const auto total_table_count =
218  executor->plan_state_->join_info_.join_hash_tables_.size();
219  CHECK_LT(table_idx, total_table_count);
220  if (total_table_count > 1) {
221  auto hash_tables_ptr =
222  get_arg_by_name(executor->cgen_state_->row_func_, "join_hash_tables");
223  auto hash_pptr =
224  table_idx > 0 ? executor->cgen_state_->ir_builder_.CreateGEP(
225  hash_tables_ptr,
226  executor->cgen_state_->llInt(static_cast<int64_t>(table_idx)))
227  : hash_tables_ptr;
228  hash_ptr = executor->cgen_state_->ir_builder_.CreateLoad(hash_pptr);
229  } else {
230  hash_ptr = get_arg_by_name(executor->cgen_state_->row_func_, "join_hash_tables");
231  }
232  CHECK(hash_ptr);
233  return hash_ptr;
234 }
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:162
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK_LT(x, y)
Definition: Logger.h:207
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

virtual HashJoinMatchingSet HashJoin::codegenMatchingSet ( const CompilationOptions ,
const size_t   
)
pure virtual

Implemented in OverlapsJoinHashTable, BaselineJoinHashTable, and PerfectJoinHashTable.

Referenced by PerfectJoinHashTable::codegenMatchingSet(), BaselineJoinHashTable::codegenMatchingSet(), and OverlapsJoinHashTable::codegenMatchingSet().

+ Here is the caller graph for this function:

HashJoinMatchingSet HashJoin::codegenMatchingSet ( const std::vector< llvm::Value * > &  hash_join_idx_args_in,
const bool  is_sharded,
const bool  col_is_nullable,
const bool  is_bw_eq,
const int64_t  sub_buff_size,
Executor executor,
const bool  is_bucketized = false 
)
static

Definition at line 165 of file HashJoin.cpp.

References AUTOMATIC_IR_METADATA, and CHECK.

172  {
173  AUTOMATIC_IR_METADATA(executor->cgen_state_.get());
174  using namespace std::string_literals;
175 
176  std::string fname(is_bucketized ? "bucketized_hash_join_idx"s : "hash_join_idx"s);
177 
178  if (is_bw_eq) {
179  fname += "_bitwise";
180  }
181  if (is_sharded) {
182  fname += "_sharded";
183  }
184  if (!is_bw_eq && col_is_nullable) {
185  fname += "_nullable";
186  }
187 
188  const auto slot_lv = executor->cgen_state_->emitCall(fname, hash_join_idx_args_in);
189  const auto slot_valid_lv = executor->cgen_state_->ir_builder_.CreateICmpSGE(
190  slot_lv, executor->cgen_state_->llInt(int64_t(0)));
191 
192  auto pos_ptr = hash_join_idx_args_in[0];
193  CHECK(pos_ptr);
194 
195  auto count_ptr = executor->cgen_state_->ir_builder_.CreateAdd(
196  pos_ptr, executor->cgen_state_->llInt(sub_buff_size));
197  auto hash_join_idx_args = hash_join_idx_args_in;
198  hash_join_idx_args[0] = executor->cgen_state_->ir_builder_.CreatePtrToInt(
199  count_ptr, llvm::Type::getInt64Ty(executor->cgen_state_->context_));
200 
201  const auto row_count_lv = executor->cgen_state_->ir_builder_.CreateSelect(
202  slot_valid_lv,
203  executor->cgen_state_->emitCall(fname, hash_join_idx_args),
204  executor->cgen_state_->llInt(int64_t(0)));
205  auto rowid_base_i32 = executor->cgen_state_->ir_builder_.CreateIntToPtr(
206  executor->cgen_state_->ir_builder_.CreateAdd(
207  pos_ptr, executor->cgen_state_->llInt(2 * sub_buff_size)),
208  llvm::Type::getInt32PtrTy(executor->cgen_state_->context_));
209  auto rowid_ptr_i32 =
210  executor->cgen_state_->ir_builder_.CreateGEP(rowid_base_i32, slot_lv);
211  return {rowid_ptr_i32, row_count_lv, slot_lv};
212 }
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK(condition)
Definition: Logger.h:197
virtual llvm::Value* HashJoin::codegenSlot ( const CompilationOptions ,
const size_t   
)
pure virtual
virtual size_t HashJoin::countBufferOff ( ) const
pure virtualnoexcept
JoinColumn HashJoin::fetchJoinColumn ( const Analyzer::ColumnVar hash_col,
const std::vector< Fragmenter_Namespace::FragmentInfo > &  fragment_info,
const Data_Namespace::MemoryLevel  effective_memory_level,
const int  device_id,
std::vector< std::shared_ptr< Chunk_NS::Chunk >> &  chunks_owner,
DeviceAllocator dev_buff_owner,
std::vector< std::shared_ptr< void >> &  malloc_owner,
Executor executor,
ColumnCacheMap column_cache 
)

fetchJoinColumn() calls ColumnFetcher::makeJoinColumn(), then copies the JoinColumn's col_chunks_buff memory onto the GPU if required by the effective_memory_level parameter. The dev_buff_owner parameter will manage the GPU memory.

Definition at line 54 of file HashJoin.cpp.

References Allocator::alloc(), CHECK, JoinColumn::col_chunks_buff, JoinColumn::col_chunks_buff_sz, DeviceAllocator::copyToDevice(), Data_Namespace::GPU_LEVEL, and ColumnFetcher::makeJoinColumn().

Referenced by PerfectJoinHashTable::fetchColumnsForDevice(), BaselineJoinHashTable::fetchColumnsForDevice(), and OverlapsJoinHashTable::fetchColumnsForDevice().

63  {
64  static std::mutex fragment_fetch_mutex;
65  std::lock_guard<std::mutex> fragment_fetch_lock(fragment_fetch_mutex);
66  try {
67  JoinColumn join_column = ColumnFetcher::makeJoinColumn(executor,
68  *hash_col,
69  fragment_info,
70  effective_memory_level,
71  device_id,
72  dev_buff_owner,
73  chunks_owner,
74  malloc_owner,
75  *column_cache);
76  if (effective_memory_level == Data_Namespace::GPU_LEVEL) {
77  CHECK(dev_buff_owner);
78  auto device_col_chunks_buff = dev_buff_owner->alloc(join_column.col_chunks_buff_sz);
79  dev_buff_owner->copyToDevice(device_col_chunks_buff,
80  join_column.col_chunks_buff,
81  join_column.col_chunks_buff_sz);
82  join_column.col_chunks_buff = device_col_chunks_buff;
83  }
84  return join_column;
85  } catch (...) {
86  throw FailedToFetchColumn();
87  }
88 }
virtual void copyToDevice(int8_t *device_dst, const int8_t *host_src, const size_t num_bytes) const =0
virtual int8_t * alloc(const size_t num_bytes)=0
static JoinColumn makeJoinColumn(Executor *executor, const Analyzer::ColumnVar &hash_col, const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const Data_Namespace::MemoryLevel effective_mem_lvl, const int device_id, DeviceAllocator *device_allocator, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks_owner, std::vector< std::shared_ptr< void >> &malloc_owner, ColumnCacheMap &column_cache)
Creates a JoinColumn struct containing an array of JoinChunk structs.
size_t col_chunks_buff_sz
const int8_t * col_chunks_buff
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void HashJoin::freeHashBufferMemory ( )
inline

Definition at line 248 of file HashJoin.h.

References hash_tables_for_device_.

Referenced by PerfectJoinHashTable::reify(), and BaselineJoinHashTable::reify().

248  {
249  auto empty_hash_tables =
251  hash_tables_for_device_.swap(empty_hash_tables);
252  }
std::vector< std::shared_ptr< HashTable > > hash_tables_for_device_
Definition: HashJoin.h:261

+ Here is the caller graph for this function:

virtual size_t HashJoin::getComponentBufferSize ( ) const
protectedpure virtualnoexcept
CompositeKeyInfo HashJoin::getCompositeKeyInfo ( const std::vector< InnerOuter > &  inner_outer_pairs,
const Executor executor 
)
static

Definition at line 318 of file HashJoin.cpp.

References CHECK, and kENCODING_DICT.

Referenced by BaselineJoinHashTable::approximateTupleCount(), OverlapsJoinHashTable::approximateTupleCount(), BaselineJoinHashTable::initHashTableForDevice(), OverlapsJoinHashTable::initHashTableOnCpu(), OverlapsJoinHashTable::reify(), BaselineJoinHashTable::reify(), and OverlapsJoinHashTable::reifyWithLayout().

320  {
321  CHECK(executor);
322  std::vector<const void*> sd_inner_proxy_per_key;
323  std::vector<const void*> sd_outer_proxy_per_key;
324  std::vector<ChunkKey> cache_key_chunks; // used for the cache key
325  const auto db_id = executor->getCatalog()->getCurrentDB().dbId;
326  for (const auto& inner_outer_pair : inner_outer_pairs) {
327  const auto inner_col = inner_outer_pair.first;
328  const auto outer_col = inner_outer_pair.second;
329  const auto& inner_ti = inner_col->get_type_info();
330  const auto& outer_ti = outer_col->get_type_info();
331  ChunkKey cache_key_chunks_for_column{
332  db_id, inner_col->get_table_id(), inner_col->get_column_id()};
333  if (inner_ti.is_string() &&
334  !(inner_ti.get_comp_param() == outer_ti.get_comp_param())) {
335  CHECK(outer_ti.is_string());
336  CHECK(inner_ti.get_compression() == kENCODING_DICT &&
337  outer_ti.get_compression() == kENCODING_DICT);
338  const auto sd_inner_proxy = executor->getStringDictionaryProxy(
339  inner_ti.get_comp_param(), executor->getRowSetMemoryOwner(), true);
340  const auto sd_outer_proxy = executor->getStringDictionaryProxy(
341  outer_ti.get_comp_param(), executor->getRowSetMemoryOwner(), true);
342  CHECK(sd_inner_proxy && sd_outer_proxy);
343  sd_inner_proxy_per_key.push_back(sd_inner_proxy);
344  sd_outer_proxy_per_key.push_back(sd_outer_proxy);
345  cache_key_chunks_for_column.push_back(sd_outer_proxy->getGeneration());
346  } else {
347  sd_inner_proxy_per_key.emplace_back();
348  sd_outer_proxy_per_key.emplace_back();
349  }
350  cache_key_chunks.push_back(cache_key_chunks_for_column);
351  }
352  return {sd_inner_proxy_per_key, sd_outer_proxy_per_key, cache_key_chunks};
353 }
std::vector< int > ChunkKey
Definition: types.h:37
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the caller graph for this function:

virtual int HashJoin::getDeviceCount ( ) const
pure virtualnoexcept
virtual std::string HashJoin::getHashJoinType ( ) const
pure virtual
HashTable* HashJoin::getHashTableForDevice ( const size_t  device_id) const
inline

Definition at line 206 of file HashJoin.h.

References CHECK_LT, and hash_tables_for_device_.

Referenced by OverlapsJoinHashTable::codegenManyKey(), BaselineJoinHashTable::codegenMatchingSet(), BaselineJoinHashTable::codegenSlot(), BaselineJoinHashTable::getComponentBufferSize(), OverlapsJoinHashTable::getEmittedKeysCount(), OverlapsJoinHashTable::getEntryCount(), BaselineJoinHashTable::getHashType(), OverlapsJoinHashTable::getHashType(), getJoinHashBufferSize(), BaselineJoinHashTable::getKeyBufferSize(), BaselineJoinHashTable::toSet(), and OverlapsJoinHashTable::toSet().

206  {
207  CHECK_LT(device_id, hash_tables_for_device_.size());
208  return hash_tables_for_device_[device_id].get();
209  }
std::vector< std::shared_ptr< HashTable > > hash_tables_for_device_
Definition: HashJoin.h:261
#define CHECK_LT(x, y)
Definition: Logger.h:207

+ Here is the caller graph for this function:

virtual HashType HashJoin::getHashType ( ) const
pure virtualnoexcept
static std::string HashJoin::getHashTypeString ( HashType  ht)
inlinestaticnoexcept

Definition at line 125 of file HashJoin.h.

Referenced by PerfectJoinHashTable::getInstance(), BaselineJoinHashTable::getInstance(), OverlapsJoinHashTable::getInstance(), OverlapsJoinHashTable::reifyWithLayout(), PerfectJoinHashTable::toString(), BaselineJoinHashTable::toString(), and OverlapsJoinHashTable::toString().

125  {
126  const char* HashTypeStrings[3] = {"OneToOne", "OneToMany", "ManyToMany"};
127  return HashTypeStrings[static_cast<int>(ht)];
128  };

+ Here is the caller graph for this function:

virtual int HashJoin::getInnerTableId ( ) const
pure virtualnoexcept

Implemented in OverlapsJoinHashTable, BaselineJoinHashTable, and PerfectJoinHashTable.

Referenced by OverlapsJoinHashTable::getInnerTableId(), OverlapsJoinHashTable::getInstance(), OverlapsJoinHashTable::initHashTableOnCpu(), and OverlapsJoinHashTable::reifyWithLayout().

+ Here is the caller graph for this function:

static int HashJoin::getInnerTableId ( const std::vector< InnerOuter > &  inner_outer_pairs)
inlinestatic

Definition at line 196 of file HashJoin.h.

References CHECK.

196  {
197  CHECK(!inner_outer_pairs.empty());
198  const auto first_inner_col = inner_outer_pairs.front().first;
199  return first_inner_col->get_table_id();
200  }
#define CHECK(condition)
Definition: Logger.h:197
virtual int HashJoin::getInnerTableRteIdx ( ) const
pure virtualnoexcept
std::shared_ptr< HashJoin > HashJoin::getInstance ( const std::shared_ptr< Analyzer::BinOper qual_bin_oper,
const std::vector< InputTableInfo > &  query_infos,
const Data_Namespace::MemoryLevel  memory_level,
const HashType  preferred_hash_type,
const int  device_count,
ColumnCacheMap column_cache,
Executor executor,
const QueryHint query_hint 
)
static

Make hash table from an in-flight SQL query's parse tree etc.

Definition at line 237 of file HashJoin.cpp.

References CHECK, CHECK_EQ, CHECK_GT, coalesce_singleton_equi_join(), CPU, DEBUG_TIMER, g_enable_overlaps_hashjoin, PerfectJoinHashTable::getInstance(), BaselineJoinHashTable::getInstance(), OverlapsJoinHashTable::getInstance(), GPU, Data_Namespace::GPU_LEVEL, VLOG, and VLOGGING.

Referenced by Executor::buildHashTableForQualifier(), and getSyntheticInstance().

245  {
246  auto timer = DEBUG_TIMER(__func__);
247  std::shared_ptr<HashJoin> join_hash_table;
248  CHECK_GT(device_count, 0);
249  if (!g_enable_overlaps_hashjoin && qual_bin_oper->is_overlaps_oper()) {
250  throw std::runtime_error(
251  "Overlaps hash join disabled, attempting to fall back to loop join");
252  }
253  if (qual_bin_oper->is_overlaps_oper()) {
254  VLOG(1) << "Trying to build geo hash table:";
255  join_hash_table = OverlapsJoinHashTable::getInstance(qual_bin_oper,
256  query_infos,
257  memory_level,
258  device_count,
259  column_cache,
260  executor,
261  query_hint);
262  } else if (dynamic_cast<const Analyzer::ExpressionTuple*>(
263  qual_bin_oper->get_left_operand())) {
264  VLOG(1) << "Trying to build keyed hash table:";
265  join_hash_table = BaselineJoinHashTable::getInstance(qual_bin_oper,
266  query_infos,
267  memory_level,
268  preferred_hash_type,
269  device_count,
270  column_cache,
271  executor);
272  } else {
273  try {
274  VLOG(1) << "Trying to build perfect hash table:";
275  join_hash_table = PerfectJoinHashTable::getInstance(qual_bin_oper,
276  query_infos,
277  memory_level,
278  preferred_hash_type,
279  device_count,
280  column_cache,
281  executor);
282  } catch (TooManyHashEntries&) {
283  const auto join_quals = coalesce_singleton_equi_join(qual_bin_oper);
284  CHECK_EQ(join_quals.size(), size_t(1));
285  const auto join_qual =
286  std::dynamic_pointer_cast<Analyzer::BinOper>(join_quals.front());
287  VLOG(1) << "Trying to build keyed hash table after perfect hash table:";
288  join_hash_table = BaselineJoinHashTable::getInstance(join_qual,
289  query_infos,
290  memory_level,
291  preferred_hash_type,
292  device_count,
293  column_cache,
294  executor);
295  }
296  }
297  CHECK(join_hash_table);
298  if (VLOGGING(2)) {
299  if (join_hash_table->getMemoryLevel() == Data_Namespace::MemoryLevel::GPU_LEVEL) {
300  for (int device_id = 0; device_id < join_hash_table->getDeviceCount();
301  ++device_id) {
302  if (join_hash_table->getJoinHashBufferSize(ExecutorDeviceType::GPU, device_id) <=
303  1000) {
304  VLOG(2) << "Built GPU hash table: "
305  << join_hash_table->toString(ExecutorDeviceType::GPU, device_id);
306  }
307  }
308  } else {
309  if (join_hash_table->getJoinHashBufferSize(ExecutorDeviceType::CPU) <= 1000) {
310  VLOG(2) << "Built CPU hash table: "
311  << join_hash_table->toString(ExecutorDeviceType::CPU);
312  }
313  }
314  }
315  return join_hash_table;
316 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
static std::shared_ptr< OverlapsJoinHashTable > getInstance(const std::shared_ptr< Analyzer::BinOper > condition, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const QueryHint &query_hint)
Make hash table from an in-flight SQL query&#39;s parse tree etc.
std::list< std::shared_ptr< Analyzer::Expr > > coalesce_singleton_equi_join(const std::shared_ptr< Analyzer::BinOper > &join_qual)
static std::shared_ptr< PerfectJoinHashTable > getInstance(const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
Make hash table from an in-flight SQL query&#39;s parse tree etc.
#define CHECK_GT(x, y)
Definition: Logger.h:209
static std::shared_ptr< BaselineJoinHashTable > getInstance(const std::shared_ptr< Analyzer::BinOper > condition, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
Make hash table from an in-flight SQL query&#39;s parse tree etc.
bool g_enable_overlaps_hashjoin
Definition: Execute.cpp:94
#define VLOGGING(n)
Definition: Logger.h:195
#define CHECK(condition)
Definition: Logger.h:197
#define DEBUG_TIMER(name)
Definition: Logger.h:313
#define VLOG(n)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int64_t HashJoin::getJoinHashBuffer ( const ExecutorDeviceType  device_type,
const int  device_id 
) const
inline

Definition at line 225 of file HashJoin.h.

References CHECK, CHECK_LT, CPU, and hash_tables_for_device_.

Referenced by PerfectJoinHashTable::toSet(), BaselineJoinHashTable::toSet(), OverlapsJoinHashTable::toSet(), PerfectJoinHashTable::toString(), BaselineJoinHashTable::toString(), OverlapsJoinHashTable::toString(), and anonymous_namespace{HashJoin.cpp}::toStringFlat().

226  {
227  // TODO: just make device_id a size_t
228  CHECK_LT(size_t(device_id), hash_tables_for_device_.size());
229  if (!hash_tables_for_device_[device_id]) {
230  return 0;
231  }
232  CHECK(hash_tables_for_device_[device_id]);
233  auto hash_table = hash_tables_for_device_[device_id].get();
234 #ifdef HAVE_CUDA
235  if (device_type == ExecutorDeviceType::CPU) {
236  return reinterpret_cast<int64_t>(hash_table->getCpuBuffer());
237  } else {
238  CHECK(hash_table);
239  const auto gpu_buff = hash_table->getGpuBuffer();
240  return reinterpret_cast<CUdeviceptr>(gpu_buff);
241  }
242 #else
243  CHECK(device_type == ExecutorDeviceType::CPU);
244  return reinterpret_cast<int64_t>(hash_table->getCpuBuffer());
245 #endif
246  }
unsigned long long CUdeviceptr
Definition: nocuda.h:27
std::vector< std::shared_ptr< HashTable > > hash_tables_for_device_
Definition: HashJoin.h:261
#define CHECK_LT(x, y)
Definition: Logger.h:207
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the caller graph for this function:

size_t HashJoin::getJoinHashBufferSize ( const ExecutorDeviceType  device_type)
inline

Definition at line 211 of file HashJoin.h.

References CHECK, and CPU.

Referenced by PerfectJoinHashTable::toSet(), PerfectJoinHashTable::toString(), and anonymous_namespace{HashJoin.cpp}::toStringFlat().

211  {
212  CHECK(device_type == ExecutorDeviceType::CPU);
213  return getJoinHashBufferSize(device_type, 0);
214  }
size_t getJoinHashBufferSize(const ExecutorDeviceType device_type)
Definition: HashJoin.h:211
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the caller graph for this function:

size_t HashJoin::getJoinHashBufferSize ( const ExecutorDeviceType  device_type,
const int  device_id 
) const
inline

Definition at line 216 of file HashJoin.h.

References getHashTableForDevice().

217  {
218  auto hash_table = getHashTableForDevice(device_id);
219  if (!hash_table) {
220  return 0;
221  }
222  return hash_table->getHashTableBufferSize(device_type);
223  }
HashTable * getHashTableForDevice(const size_t device_id) const
Definition: HashJoin.h:206

+ Here is the call graph for this function:

virtual Data_Namespace::MemoryLevel HashJoin::getMemoryLevel ( ) const
pure virtualnoexcept
std::shared_ptr< HashJoin > HashJoin::getSyntheticInstance ( std::string_view  table1,
std::string_view  column1,
std::string_view  table2,
std::string_view  column2,
const Data_Namespace::MemoryLevel  memory_level,
const HashType  preferred_hash_type,
const int  device_count,
ColumnCacheMap column_cache,
Executor executor 
)
static

Make hash table from named tables and columns (such as for testing).

Definition at line 467 of file HashJoin.cpp.

References QueryHint::defaults(), getInstance(), getSyntheticColumnVar(), getSyntheticInputTableInfo(), kBOOLEAN, kEQ, kONE, setupSyntheticCaching(), and ScalarExprVisitor< T >::visit().

476  {
477  auto a1 = getSyntheticColumnVar(table1, column1, 0, executor);
478  auto a2 = getSyntheticColumnVar(table2, column2, 1, executor);
479 
480  auto qual_bin_oper = std::make_shared<Analyzer::BinOper>(kBOOLEAN, kEQ, kONE, a1, a2);
481 
482  std::set<const Analyzer::ColumnVar*> cvs =
483  AllColumnVarsVisitor().visit(qual_bin_oper.get());
484  auto query_infos = getSyntheticInputTableInfo(cvs, executor);
485  setupSyntheticCaching(cvs, executor);
486  QueryHint query_hint = QueryHint::defaults();
487 
488  auto hash_table = HashJoin::getInstance(qual_bin_oper,
489  query_infos,
490  memory_level,
491  preferred_hash_type,
492  device_count,
493  column_cache,
494  executor,
495  query_hint);
496  return hash_table;
497 }
std::vector< InputTableInfo > getSyntheticInputTableInfo(std::set< const Analyzer::ColumnVar * > cvs, Executor *executor)
Definition: HashJoin.cpp:440
Definition: sqldefs.h:30
T visit(const Analyzer::Expr *expr) const
static std::shared_ptr< HashJoin > getInstance(const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const QueryHint &query_hint)
Make hash table from an in-flight SQL query&#39;s parse tree etc.
Definition: HashJoin.cpp:237
static QueryHint defaults()
Definition: QueryHint.h:74
void setupSyntheticCaching(std::set< const Analyzer::ColumnVar * > cvs, Executor *executor)
Definition: HashJoin.cpp:426
Definition: sqldefs.h:69
std::shared_ptr< Analyzer::ColumnVar > getSyntheticColumnVar(std::string_view table, std::string_view column, int rte_idx, Executor *executor)
Definition: HashJoin.cpp:355

+ Here is the call graph for this function:

std::shared_ptr< HashJoin > HashJoin::getSyntheticInstance ( const std::shared_ptr< Analyzer::BinOper qual_bin_oper,
const Data_Namespace::MemoryLevel  memory_level,
const HashType  preferred_hash_type,
const int  device_count,
ColumnCacheMap column_cache,
Executor executor 
)
static

Make hash table from named tables and columns (such as for testing).

Definition at line 500 of file HashJoin.cpp.

References QueryHint::defaults(), getInstance(), getSyntheticInputTableInfo(), setupSyntheticCaching(), and ScalarExprVisitor< T >::visit().

506  {
507  std::set<const Analyzer::ColumnVar*> cvs =
508  AllColumnVarsVisitor().visit(qual_bin_oper.get());
509  auto query_infos = getSyntheticInputTableInfo(cvs, executor);
510  setupSyntheticCaching(cvs, executor);
511  QueryHint query_hint = QueryHint::defaults();
512 
513  auto hash_table = HashJoin::getInstance(qual_bin_oper,
514  query_infos,
515  memory_level,
516  preferred_hash_type,
517  device_count,
518  column_cache,
519  executor,
520  query_hint);
521  return hash_table;
522 }
std::vector< InputTableInfo > getSyntheticInputTableInfo(std::set< const Analyzer::ColumnVar * > cvs, Executor *executor)
Definition: HashJoin.cpp:440
T visit(const Analyzer::Expr *expr) const
static std::shared_ptr< HashJoin > getInstance(const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const QueryHint &query_hint)
Make hash table from an in-flight SQL query&#39;s parse tree etc.
Definition: HashJoin.cpp:237
static QueryHint defaults()
Definition: QueryHint.h:74
void setupSyntheticCaching(std::set< const Analyzer::ColumnVar * > cvs, Executor *executor)
Definition: HashJoin.cpp:426

+ Here is the call graph for this function:

virtual size_t HashJoin::offsetBufferOff ( ) const
pure virtualnoexcept
virtual size_t HashJoin::payloadBufferOff ( ) const
pure virtualnoexcept
virtual DecodedJoinHashBufferSet HashJoin::toSet ( const ExecutorDeviceType  device_type,
const int  device_id 
) const
pure virtual
virtual std::string HashJoin::toString ( const ExecutorDeviceType  device_type,
const int  device_id = 0,
bool  raw = false 
) const
pure virtual
std::string HashJoin::toStringFlat32 ( const ExecutorDeviceType  device_type,
const int  device_id 
) const
virtual

Definition at line 116 of file HashJoin.cpp.

117  {
118  return toStringFlat<int32_t>(this, device_type, device_id);
119 }
std::string HashJoin::toStringFlat64 ( const ExecutorDeviceType  device_type,
const int  device_id 
) const
virtual

Definition at line 111 of file HashJoin.cpp.

112  {
113  return toStringFlat<int64_t>(this, device_type, device_id);
114 }

Member Data Documentation


The documentation for this class was generated from the following files: