OmniSciDB  ba1bac9284
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
HashJoin Class Referenceabstract

#include <HashJoin.h>

+ Inheritance diagram for HashJoin:

Public Member Functions

virtual std::string toString (const ExecutorDeviceType device_type, const int device_id=0, bool raw=false) const =0
 
virtual std::string toStringFlat64 (const ExecutorDeviceType device_type, const int device_id) const
 
virtual std::string toStringFlat32 (const ExecutorDeviceType device_type, const int device_id) const
 
virtual DecodedJoinHashBufferSet toSet (const ExecutorDeviceType device_type, const int device_id) const =0
 
virtual llvm::Value * codegenSlot (const CompilationOptions &, const size_t)=0
 
virtual HashJoinMatchingSet codegenMatchingSet (const CompilationOptions &, const size_t)=0
 
virtual int getInnerTableId () const noexcept=0
 
virtual int getInnerTableRteIdx () const noexcept=0
 
virtual HashType getHashType () const noexcept=0
 
virtual Data_Namespace::MemoryLevel getMemoryLevel () const noexcept=0
 
virtual int getDeviceCount () const noexcept=0
 
virtual size_t offsetBufferOff () const noexcept=0
 
virtual size_t countBufferOff () const noexcept=0
 
virtual size_t payloadBufferOff () const noexcept=0
 
virtual std::string getHashJoinType () const =0
 
JoinColumn fetchJoinColumn (const Analyzer::ColumnVar *hash_col, const std::vector< Fragmenter_Namespace::FragmentInfo > &fragment_info, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks_owner, DeviceAllocator *dev_buff_owner, std::vector< std::shared_ptr< void >> &malloc_owner, Executor *executor, ColumnCacheMap *column_cache)
 
HashTablegetHashTableForDevice (const size_t device_id) const
 
size_t getJoinHashBufferSize (const ExecutorDeviceType device_type)
 
size_t getJoinHashBufferSize (const ExecutorDeviceType device_type, const int device_id) const
 
int64_t getJoinHashBuffer (const ExecutorDeviceType device_type, const int device_id) const
 
void freeHashBufferMemory ()
 

Static Public Member Functions

static bool layoutRequiresAdditionalBuffers (HashType layout) noexcept
 
static std::string getHashTypeString (HashType ht) noexcept
 
static HashJoinMatchingSet codegenMatchingSet (const std::vector< llvm::Value * > &hash_join_idx_args_in, const bool is_sharded, const bool col_is_nullable, const bool is_bw_eq, const int64_t sub_buff_size, Executor *executor, const bool is_bucketized=false)
 
static llvm::Value * codegenHashTableLoad (const size_t table_idx, Executor *executor)
 
static std::shared_ptr< HashJoingetInstance (const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const RegisteredQueryHint &query_hint)
 Make hash table from an in-flight SQL query's parse tree etc. More...
 
static std::shared_ptr< HashJoingetSyntheticInstance (std::string_view table1, std::string_view column1, std::string_view table2, std::string_view column2, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
 Make hash table from named tables and columns (such as for testing). More...
 
static std::shared_ptr< HashJoingetSyntheticInstance (const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
 Make hash table from named tables and columns (such as for testing). More...
 
static int getInnerTableId (const std::vector< InnerOuter > &inner_outer_pairs)
 
static void checkHashJoinReplicationConstraint (const int table_id, const size_t shard_count, const Executor *executor)
 
static CompositeKeyInfo getCompositeKeyInfo (const std::vector< InnerOuter > &inner_outer_pairs, const Executor *executor)
 

Protected Member Functions

virtual size_t getComponentBufferSize () const noexcept=0
 

Protected Attributes

std::vector< std::shared_ptr
< HashTable > > 
hash_tables_for_device_
 

Detailed Description

Definition at line 103 of file HashJoin.h.

Member Function Documentation

void HashJoin::checkHashJoinReplicationConstraint ( const int  table_id,
const size_t  shard_count,
const Executor executor 
)
static

Definition at line 532 of file HashJoin.cpp.

References CHECK, g_cluster, and table_is_replicated().

Referenced by PerfectJoinHashTable::reify(), and BaselineJoinHashTable::reify().

534  {
535  if (!g_cluster) {
536  return;
537  }
538  if (table_id >= 0) {
539  CHECK(executor);
540  const auto inner_td = executor->getCatalog()->getMetadataForTable(table_id);
541  CHECK(inner_td);
542  if (!shard_count && !table_is_replicated(inner_td)) {
543  throw TableMustBeReplicated(inner_td->tableName);
544  }
545  }
546 }
bool table_is_replicated(const TableDescriptor *td)
#define CHECK(condition)
Definition: Logger.h:206
bool g_cluster

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * HashJoin::codegenHashTableLoad ( const size_t  table_idx,
Executor executor 
)
static

Definition at line 215 of file HashJoin.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CHECK_LT, and get_arg_by_name().

Referenced by PerfectJoinHashTable::codegenHashTableLoad(), BaselineJoinHashTable::codegenMatchingSet(), OverlapsJoinHashTable::codegenMatchingSet(), and BaselineJoinHashTable::hashPtr().

215  {
216  AUTOMATIC_IR_METADATA(executor->cgen_state_.get());
217  llvm::Value* hash_ptr = nullptr;
218  const auto total_table_count =
219  executor->plan_state_->join_info_.join_hash_tables_.size();
220  CHECK_LT(table_idx, total_table_count);
221  if (total_table_count > 1) {
222  auto hash_tables_ptr =
223  get_arg_by_name(executor->cgen_state_->row_func_, "join_hash_tables");
224  auto hash_pptr =
225  table_idx > 0 ? executor->cgen_state_->ir_builder_.CreateGEP(
226  hash_tables_ptr,
227  executor->cgen_state_->llInt(static_cast<int64_t>(table_idx)))
228  : hash_tables_ptr;
229  hash_ptr = executor->cgen_state_->ir_builder_.CreateLoad(hash_pptr);
230  } else {
231  hash_ptr = get_arg_by_name(executor->cgen_state_->row_func_, "join_hash_tables");
232  }
233  CHECK(hash_ptr);
234  return hash_ptr;
235 }
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:167
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK_LT(x, y)
Definition: Logger.h:216
#define CHECK(condition)
Definition: Logger.h:206

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

virtual HashJoinMatchingSet HashJoin::codegenMatchingSet ( const CompilationOptions ,
const size_t   
)
pure virtual

Implemented in OverlapsJoinHashTable, BaselineJoinHashTable, and PerfectJoinHashTable.

Referenced by PerfectJoinHashTable::codegenMatchingSet(), BaselineJoinHashTable::codegenMatchingSet(), and OverlapsJoinHashTable::codegenMatchingSet().

+ Here is the caller graph for this function:

HashJoinMatchingSet HashJoin::codegenMatchingSet ( const std::vector< llvm::Value * > &  hash_join_idx_args_in,
const bool  is_sharded,
const bool  col_is_nullable,
const bool  is_bw_eq,
const int64_t  sub_buff_size,
Executor executor,
const bool  is_bucketized = false 
)
static

Definition at line 166 of file HashJoin.cpp.

References AUTOMATIC_IR_METADATA, and CHECK.

173  {
174  AUTOMATIC_IR_METADATA(executor->cgen_state_.get());
175  using namespace std::string_literals;
176 
177  std::string fname(is_bucketized ? "bucketized_hash_join_idx"s : "hash_join_idx"s);
178 
179  if (is_bw_eq) {
180  fname += "_bitwise";
181  }
182  if (is_sharded) {
183  fname += "_sharded";
184  }
185  if (!is_bw_eq && col_is_nullable) {
186  fname += "_nullable";
187  }
188 
189  const auto slot_lv = executor->cgen_state_->emitCall(fname, hash_join_idx_args_in);
190  const auto slot_valid_lv = executor->cgen_state_->ir_builder_.CreateICmpSGE(
191  slot_lv, executor->cgen_state_->llInt(int64_t(0)));
192 
193  auto pos_ptr = hash_join_idx_args_in[0];
194  CHECK(pos_ptr);
195 
196  auto count_ptr = executor->cgen_state_->ir_builder_.CreateAdd(
197  pos_ptr, executor->cgen_state_->llInt(sub_buff_size));
198  auto hash_join_idx_args = hash_join_idx_args_in;
199  hash_join_idx_args[0] = executor->cgen_state_->ir_builder_.CreatePtrToInt(
200  count_ptr, llvm::Type::getInt64Ty(executor->cgen_state_->context_));
201 
202  const auto row_count_lv = executor->cgen_state_->ir_builder_.CreateSelect(
203  slot_valid_lv,
204  executor->cgen_state_->emitCall(fname, hash_join_idx_args),
205  executor->cgen_state_->llInt(int64_t(0)));
206  auto rowid_base_i32 = executor->cgen_state_->ir_builder_.CreateIntToPtr(
207  executor->cgen_state_->ir_builder_.CreateAdd(
208  pos_ptr, executor->cgen_state_->llInt(2 * sub_buff_size)),
209  llvm::Type::getInt32PtrTy(executor->cgen_state_->context_));
210  auto rowid_ptr_i32 =
211  executor->cgen_state_->ir_builder_.CreateGEP(rowid_base_i32, slot_lv);
212  return {rowid_ptr_i32, row_count_lv, slot_lv};
213 }
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK(condition)
Definition: Logger.h:206
virtual llvm::Value* HashJoin::codegenSlot ( const CompilationOptions ,
const size_t   
)
pure virtual
virtual size_t HashJoin::countBufferOff ( ) const
pure virtualnoexcept
JoinColumn HashJoin::fetchJoinColumn ( const Analyzer::ColumnVar hash_col,
const std::vector< Fragmenter_Namespace::FragmentInfo > &  fragment_info,
const Data_Namespace::MemoryLevel  effective_memory_level,
const int  device_id,
std::vector< std::shared_ptr< Chunk_NS::Chunk >> &  chunks_owner,
DeviceAllocator dev_buff_owner,
std::vector< std::shared_ptr< void >> &  malloc_owner,
Executor executor,
ColumnCacheMap column_cache 
)

fetchJoinColumn() calls ColumnFetcher::makeJoinColumn(), then copies the JoinColumn's col_chunks_buff memory onto the GPU if required by the effective_memory_level parameter. The dev_buff_owner parameter will manage the GPU memory.

Definition at line 54 of file HashJoin.cpp.

References Allocator::alloc(), CHECK, JoinColumn::col_chunks_buff, JoinColumn::col_chunks_buff_sz, DeviceAllocator::copyToDevice(), Data_Namespace::GPU_LEVEL, and ColumnFetcher::makeJoinColumn().

Referenced by PerfectJoinHashTable::fetchColumnsForDevice(), BaselineJoinHashTable::fetchColumnsForDevice(), and OverlapsJoinHashTable::fetchColumnsForDevice().

63  {
64  static std::mutex fragment_fetch_mutex;
65  std::lock_guard<std::mutex> fragment_fetch_lock(fragment_fetch_mutex);
66  try {
67  JoinColumn join_column = ColumnFetcher::makeJoinColumn(executor,
68  *hash_col,
69  fragment_info,
70  effective_memory_level,
71  device_id,
72  dev_buff_owner,
73  /*thread_idx=*/0,
74  chunks_owner,
75  malloc_owner,
76  *column_cache);
77  if (effective_memory_level == Data_Namespace::GPU_LEVEL) {
78  CHECK(dev_buff_owner);
79  auto device_col_chunks_buff = dev_buff_owner->alloc(join_column.col_chunks_buff_sz);
80  dev_buff_owner->copyToDevice(device_col_chunks_buff,
81  join_column.col_chunks_buff,
82  join_column.col_chunks_buff_sz);
83  join_column.col_chunks_buff = device_col_chunks_buff;
84  }
85  return join_column;
86  } catch (...) {
87  throw FailedToFetchColumn();
88  }
89 }
static JoinColumn makeJoinColumn(Executor *executor, const Analyzer::ColumnVar &hash_col, const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const Data_Namespace::MemoryLevel effective_mem_lvl, const int device_id, DeviceAllocator *device_allocator, const size_t thread_idx, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks_owner, std::vector< std::shared_ptr< void >> &malloc_owner, ColumnCacheMap &column_cache)
Creates a JoinColumn struct containing an array of JoinChunk structs.
virtual void copyToDevice(int8_t *device_dst, const int8_t *host_src, const size_t num_bytes) const =0
virtual int8_t * alloc(const size_t num_bytes)=0
size_t col_chunks_buff_sz
const int8_t * col_chunks_buff
#define CHECK(condition)
Definition: Logger.h:206

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void HashJoin::freeHashBufferMemory ( )
inline

Definition at line 257 of file HashJoin.h.

References hash_tables_for_device_.

Referenced by PerfectJoinHashTable::reify(), and BaselineJoinHashTable::reify().

257  {
258  auto empty_hash_tables =
260  hash_tables_for_device_.swap(empty_hash_tables);
261  }
std::vector< std::shared_ptr< HashTable > > hash_tables_for_device_
Definition: HashJoin.h:270

+ Here is the caller graph for this function:

virtual size_t HashJoin::getComponentBufferSize ( ) const
protectedpure virtualnoexcept
CompositeKeyInfo HashJoin::getCompositeKeyInfo ( const std::vector< InnerOuter > &  inner_outer_pairs,
const Executor executor 
)
static

Definition at line 324 of file HashJoin.cpp.

References CHECK, and kENCODING_DICT.

Referenced by BaselineJoinHashTable::approximateTupleCount(), OverlapsJoinHashTable::approximateTupleCount(), BaselineJoinHashTable::initHashTableForDevice(), OverlapsJoinHashTable::initHashTableOnCpu(), OverlapsJoinHashTable::reify(), BaselineJoinHashTable::reify(), and OverlapsJoinHashTable::reifyWithLayout().

326  {
327  CHECK(executor);
328  std::vector<const void*> sd_inner_proxy_per_key;
329  std::vector<const void*> sd_outer_proxy_per_key;
330  std::vector<ChunkKey> cache_key_chunks; // used for the cache key
331  const auto db_id = executor->getCatalog()->getCurrentDB().dbId;
332  for (const auto& inner_outer_pair : inner_outer_pairs) {
333  const auto inner_col = inner_outer_pair.first;
334  const auto outer_col = inner_outer_pair.second;
335  const auto& inner_ti = inner_col->get_type_info();
336  const auto& outer_ti = outer_col->get_type_info();
337  ChunkKey cache_key_chunks_for_column{
338  db_id, inner_col->get_table_id(), inner_col->get_column_id()};
339  if (inner_ti.is_string() &&
340  !(inner_ti.get_comp_param() == outer_ti.get_comp_param())) {
341  CHECK(outer_ti.is_string());
342  CHECK(inner_ti.get_compression() == kENCODING_DICT &&
343  outer_ti.get_compression() == kENCODING_DICT);
344  const auto sd_inner_proxy = executor->getStringDictionaryProxy(
345  inner_ti.get_comp_param(), executor->getRowSetMemoryOwner(), true);
346  const auto sd_outer_proxy = executor->getStringDictionaryProxy(
347  outer_ti.get_comp_param(), executor->getRowSetMemoryOwner(), true);
348  CHECK(sd_inner_proxy && sd_outer_proxy);
349  sd_inner_proxy_per_key.push_back(sd_inner_proxy);
350  sd_outer_proxy_per_key.push_back(sd_outer_proxy);
351  cache_key_chunks_for_column.push_back(sd_outer_proxy->getGeneration());
352  } else {
353  sd_inner_proxy_per_key.emplace_back();
354  sd_outer_proxy_per_key.emplace_back();
355  }
356  cache_key_chunks.push_back(cache_key_chunks_for_column);
357  }
358  return {sd_inner_proxy_per_key, sd_outer_proxy_per_key, cache_key_chunks};
359 }
std::vector< int > ChunkKey
Definition: types.h:37
#define CHECK(condition)
Definition: Logger.h:206

+ Here is the caller graph for this function:

virtual int HashJoin::getDeviceCount ( ) const
pure virtualnoexcept
virtual std::string HashJoin::getHashJoinType ( ) const
pure virtual
HashTable* HashJoin::getHashTableForDevice ( const size_t  device_id) const
inline

Definition at line 215 of file HashJoin.h.

References CHECK_LT, and hash_tables_for_device_.

Referenced by OverlapsJoinHashTable::codegenManyKey(), BaselineJoinHashTable::codegenMatchingSet(), BaselineJoinHashTable::codegenSlot(), BaselineJoinHashTable::getComponentBufferSize(), OverlapsJoinHashTable::getEmittedKeysCount(), OverlapsJoinHashTable::getEntryCount(), BaselineJoinHashTable::getHashType(), OverlapsJoinHashTable::getHashType(), getJoinHashBufferSize(), BaselineJoinHashTable::getKeyBufferSize(), BaselineJoinHashTable::toSet(), and OverlapsJoinHashTable::toSet().

215  {
216  CHECK_LT(device_id, hash_tables_for_device_.size());
217  return hash_tables_for_device_[device_id].get();
218  }
std::vector< std::shared_ptr< HashTable > > hash_tables_for_device_
Definition: HashJoin.h:270
#define CHECK_LT(x, y)
Definition: Logger.h:216

+ Here is the caller graph for this function:

virtual HashType HashJoin::getHashType ( ) const
pure virtualnoexcept
static std::string HashJoin::getHashTypeString ( HashType  ht)
inlinestaticnoexcept

Definition at line 133 of file HashJoin.h.

Referenced by PerfectJoinHashTable::getInstance(), BaselineJoinHashTable::getInstance(), OverlapsJoinHashTable::getInstance(), OverlapsJoinHashTable::reifyWithLayout(), PerfectJoinHashTable::toString(), BaselineJoinHashTable::toString(), and OverlapsJoinHashTable::toString().

133  {
134  const char* HashTypeStrings[3] = {"OneToOne", "OneToMany", "ManyToMany"};
135  return HashTypeStrings[static_cast<int>(ht)];
136  };

+ Here is the caller graph for this function:

virtual int HashJoin::getInnerTableId ( ) const
pure virtualnoexcept

Implemented in OverlapsJoinHashTable, BaselineJoinHashTable, and PerfectJoinHashTable.

Referenced by OverlapsJoinHashTable::getInnerTableId(), OverlapsJoinHashTable::getInstance(), OverlapsJoinHashTable::initHashTableOnCpu(), and OverlapsJoinHashTable::reifyWithLayout().

+ Here is the caller graph for this function:

static int HashJoin::getInnerTableId ( const std::vector< InnerOuter > &  inner_outer_pairs)
inlinestatic

Definition at line 205 of file HashJoin.h.

References CHECK.

205  {
206  CHECK(!inner_outer_pairs.empty());
207  const auto first_inner_col = inner_outer_pairs.front().first;
208  return first_inner_col->get_table_id();
209  }
#define CHECK(condition)
Definition: Logger.h:206
virtual int HashJoin::getInnerTableRteIdx ( ) const
pure virtualnoexcept
std::shared_ptr< HashJoin > HashJoin::getInstance ( const std::shared_ptr< Analyzer::BinOper qual_bin_oper,
const std::vector< InputTableInfo > &  query_infos,
const Data_Namespace::MemoryLevel  memory_level,
const JoinType  join_type,
const HashType  preferred_hash_type,
const int  device_count,
ColumnCacheMap column_cache,
Executor executor,
const RegisteredQueryHint query_hint 
)
static

Make hash table from an in-flight SQL query's parse tree etc.

Definition at line 238 of file HashJoin.cpp.

References CHECK, CHECK_EQ, CHECK_GT, coalesce_singleton_equi_join(), CPU, DEBUG_TIMER, g_enable_overlaps_hashjoin, PerfectJoinHashTable::getInstance(), BaselineJoinHashTable::getInstance(), OverlapsJoinHashTable::getInstance(), GPU, Data_Namespace::GPU_LEVEL, VLOG, and VLOGGING.

Referenced by Executor::buildHashTableForQualifier(), and getSyntheticInstance().

247  {
248  auto timer = DEBUG_TIMER(__func__);
249  std::shared_ptr<HashJoin> join_hash_table;
250  CHECK_GT(device_count, 0);
251  if (!g_enable_overlaps_hashjoin && qual_bin_oper->is_overlaps_oper()) {
252  throw std::runtime_error(
253  "Overlaps hash join disabled, attempting to fall back to loop join");
254  }
255  if (qual_bin_oper->is_overlaps_oper()) {
256  VLOG(1) << "Trying to build geo hash table:";
257  join_hash_table = OverlapsJoinHashTable::getInstance(qual_bin_oper,
258  query_infos,
259  memory_level,
260  join_type,
261  device_count,
262  column_cache,
263  executor,
264  query_hint);
265  } else if (dynamic_cast<const Analyzer::ExpressionTuple*>(
266  qual_bin_oper->get_left_operand())) {
267  VLOG(1) << "Trying to build keyed hash table:";
268  join_hash_table = BaselineJoinHashTable::getInstance(qual_bin_oper,
269  query_infos,
270  memory_level,
271  join_type,
272  preferred_hash_type,
273  device_count,
274  column_cache,
275  executor);
276  } else {
277  try {
278  VLOG(1) << "Trying to build perfect hash table:";
279  join_hash_table = PerfectJoinHashTable::getInstance(qual_bin_oper,
280  query_infos,
281  memory_level,
282  join_type,
283  preferred_hash_type,
284  device_count,
285  column_cache,
286  executor);
287  } catch (TooManyHashEntries&) {
288  const auto join_quals = coalesce_singleton_equi_join(qual_bin_oper);
289  CHECK_EQ(join_quals.size(), size_t(1));
290  const auto join_qual =
291  std::dynamic_pointer_cast<Analyzer::BinOper>(join_quals.front());
292  VLOG(1) << "Trying to build keyed hash table after perfect hash table:";
293  join_hash_table = BaselineJoinHashTable::getInstance(join_qual,
294  query_infos,
295  memory_level,
296  join_type,
297  preferred_hash_type,
298  device_count,
299  column_cache,
300  executor);
301  }
302  }
303  CHECK(join_hash_table);
304  if (VLOGGING(2)) {
305  if (join_hash_table->getMemoryLevel() == Data_Namespace::MemoryLevel::GPU_LEVEL) {
306  for (int device_id = 0; device_id < join_hash_table->getDeviceCount();
307  ++device_id) {
308  if (join_hash_table->getJoinHashBufferSize(ExecutorDeviceType::GPU, device_id) <=
309  1000) {
310  VLOG(2) << "Built GPU hash table: "
311  << join_hash_table->toString(ExecutorDeviceType::GPU, device_id);
312  }
313  }
314  } else {
315  if (join_hash_table->getJoinHashBufferSize(ExecutorDeviceType::CPU) <= 1000) {
316  VLOG(2) << "Built CPU hash table: "
317  << join_hash_table->toString(ExecutorDeviceType::CPU);
318  }
319  }
320  }
321  return join_hash_table;
322 }
#define CHECK_EQ(x, y)
Definition: Logger.h:214
std::list< std::shared_ptr< Analyzer::Expr > > coalesce_singleton_equi_join(const std::shared_ptr< Analyzer::BinOper > &join_qual)
static std::shared_ptr< OverlapsJoinHashTable > getInstance(const std::shared_ptr< Analyzer::BinOper > condition, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const RegisteredQueryHint &query_hint)
Make hash table from an in-flight SQL query&#39;s parse tree etc.
#define CHECK_GT(x, y)
Definition: Logger.h:218
bool g_enable_overlaps_hashjoin
Definition: Execute.cpp:96
static std::shared_ptr< BaselineJoinHashTable > getInstance(const std::shared_ptr< Analyzer::BinOper > condition, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
Make hash table from an in-flight SQL query&#39;s parse tree etc.
#define VLOGGING(n)
Definition: Logger.h:204
static std::shared_ptr< PerfectJoinHashTable > getInstance(const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
Make hash table from an in-flight SQL query&#39;s parse tree etc.
#define CHECK(condition)
Definition: Logger.h:206
#define DEBUG_TIMER(name)
Definition: Logger.h:322
#define VLOG(n)
Definition: Logger.h:300

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int64_t HashJoin::getJoinHashBuffer ( const ExecutorDeviceType  device_type,
const int  device_id 
) const
inline

Definition at line 234 of file HashJoin.h.

References CHECK, CHECK_LT, CPU, and hash_tables_for_device_.

Referenced by PerfectJoinHashTable::toSet(), BaselineJoinHashTable::toSet(), OverlapsJoinHashTable::toSet(), PerfectJoinHashTable::toString(), BaselineJoinHashTable::toString(), OverlapsJoinHashTable::toString(), and anonymous_namespace{HashJoin.cpp}::toStringFlat().

235  {
236  // TODO: just make device_id a size_t
237  CHECK_LT(size_t(device_id), hash_tables_for_device_.size());
238  if (!hash_tables_for_device_[device_id]) {
239  return 0;
240  }
241  CHECK(hash_tables_for_device_[device_id]);
242  auto hash_table = hash_tables_for_device_[device_id].get();
243 #ifdef HAVE_CUDA
244  if (device_type == ExecutorDeviceType::CPU) {
245  return reinterpret_cast<int64_t>(hash_table->getCpuBuffer());
246  } else {
247  CHECK(hash_table);
248  const auto gpu_buff = hash_table->getGpuBuffer();
249  return reinterpret_cast<CUdeviceptr>(gpu_buff);
250  }
251 #else
252  CHECK(device_type == ExecutorDeviceType::CPU);
253  return reinterpret_cast<int64_t>(hash_table->getCpuBuffer());
254 #endif
255  }
unsigned long long CUdeviceptr
Definition: nocuda.h:27
std::vector< std::shared_ptr< HashTable > > hash_tables_for_device_
Definition: HashJoin.h:270
#define CHECK_LT(x, y)
Definition: Logger.h:216
#define CHECK(condition)
Definition: Logger.h:206

+ Here is the caller graph for this function:

size_t HashJoin::getJoinHashBufferSize ( const ExecutorDeviceType  device_type)
inline

Definition at line 220 of file HashJoin.h.

References CHECK, and CPU.

Referenced by PerfectJoinHashTable::toSet(), PerfectJoinHashTable::toString(), and anonymous_namespace{HashJoin.cpp}::toStringFlat().

220  {
221  CHECK(device_type == ExecutorDeviceType::CPU);
222  return getJoinHashBufferSize(device_type, 0);
223  }
size_t getJoinHashBufferSize(const ExecutorDeviceType device_type)
Definition: HashJoin.h:220
#define CHECK(condition)
Definition: Logger.h:206

+ Here is the caller graph for this function:

size_t HashJoin::getJoinHashBufferSize ( const ExecutorDeviceType  device_type,
const int  device_id 
) const
inline

Definition at line 225 of file HashJoin.h.

References getHashTableForDevice().

226  {
227  auto hash_table = getHashTableForDevice(device_id);
228  if (!hash_table) {
229  return 0;
230  }
231  return hash_table->getHashTableBufferSize(device_type);
232  }
HashTable * getHashTableForDevice(const size_t device_id) const
Definition: HashJoin.h:215

+ Here is the call graph for this function:

virtual Data_Namespace::MemoryLevel HashJoin::getMemoryLevel ( ) const
pure virtualnoexcept
std::shared_ptr< HashJoin > HashJoin::getSyntheticInstance ( std::string_view  table1,
std::string_view  column1,
std::string_view  table2,
std::string_view  column2,
const Data_Namespace::MemoryLevel  memory_level,
const HashType  preferred_hash_type,
const int  device_count,
ColumnCacheMap column_cache,
Executor executor 
)
static

Make hash table from named tables and columns (such as for testing).

Definition at line 473 of file HashJoin.cpp.

References RegisteredQueryHint::defaults(), getInstance(), getSyntheticColumnVar(), getSyntheticInputTableInfo(), INNER, kBOOLEAN, kEQ, kONE, setupSyntheticCaching(), and ScalarExprVisitor< T >::visit().

482  {
483  auto a1 = getSyntheticColumnVar(table1, column1, 0, executor);
484  auto a2 = getSyntheticColumnVar(table2, column2, 1, executor);
485 
486  auto qual_bin_oper = std::make_shared<Analyzer::BinOper>(kBOOLEAN, kEQ, kONE, a1, a2);
487 
488  std::set<const Analyzer::ColumnVar*> cvs =
489  AllColumnVarsVisitor().visit(qual_bin_oper.get());
490  auto query_infos = getSyntheticInputTableInfo(cvs, executor);
491  setupSyntheticCaching(cvs, executor);
493 
494  auto hash_table = HashJoin::getInstance(qual_bin_oper,
495  query_infos,
496  memory_level,
498  preferred_hash_type,
499  device_count,
500  column_cache,
501  executor,
502  query_hint);
503  return hash_table;
504 }
std::vector< InputTableInfo > getSyntheticInputTableInfo(std::set< const Analyzer::ColumnVar * > cvs, Executor *executor)
Definition: HashJoin.cpp:446
Definition: sqldefs.h:30
T visit(const Analyzer::Expr *expr) const
void setupSyntheticCaching(std::set< const Analyzer::ColumnVar * > cvs, Executor *executor)
Definition: HashJoin.cpp:432
static RegisteredQueryHint defaults()
Definition: QueryHint.h:175
Definition: sqldefs.h:69
static std::shared_ptr< HashJoin > getInstance(const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const RegisteredQueryHint &query_hint)
Make hash table from an in-flight SQL query&#39;s parse tree etc.
Definition: HashJoin.cpp:238
std::shared_ptr< Analyzer::ColumnVar > getSyntheticColumnVar(std::string_view table, std::string_view column, int rte_idx, Executor *executor)
Definition: HashJoin.cpp:361

+ Here is the call graph for this function:

std::shared_ptr< HashJoin > HashJoin::getSyntheticInstance ( const std::shared_ptr< Analyzer::BinOper qual_bin_oper,
const Data_Namespace::MemoryLevel  memory_level,
const HashType  preferred_hash_type,
const int  device_count,
ColumnCacheMap column_cache,
Executor executor 
)
static

Make hash table from named tables and columns (such as for testing).

Definition at line 507 of file HashJoin.cpp.

References RegisteredQueryHint::defaults(), getInstance(), getSyntheticInputTableInfo(), INNER, setupSyntheticCaching(), and ScalarExprVisitor< T >::visit().

513  {
514  std::set<const Analyzer::ColumnVar*> cvs =
515  AllColumnVarsVisitor().visit(qual_bin_oper.get());
516  auto query_infos = getSyntheticInputTableInfo(cvs, executor);
517  setupSyntheticCaching(cvs, executor);
519 
520  auto hash_table = HashJoin::getInstance(qual_bin_oper,
521  query_infos,
522  memory_level,
524  preferred_hash_type,
525  device_count,
526  column_cache,
527  executor,
528  query_hint);
529  return hash_table;
530 }
std::vector< InputTableInfo > getSyntheticInputTableInfo(std::set< const Analyzer::ColumnVar * > cvs, Executor *executor)
Definition: HashJoin.cpp:446
T visit(const Analyzer::Expr *expr) const
void setupSyntheticCaching(std::set< const Analyzer::ColumnVar * > cvs, Executor *executor)
Definition: HashJoin.cpp:432
static RegisteredQueryHint defaults()
Definition: QueryHint.h:175
static std::shared_ptr< HashJoin > getInstance(const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const RegisteredQueryHint &query_hint)
Make hash table from an in-flight SQL query&#39;s parse tree etc.
Definition: HashJoin.cpp:238

+ Here is the call graph for this function:

virtual size_t HashJoin::offsetBufferOff ( ) const
pure virtualnoexcept
virtual size_t HashJoin::payloadBufferOff ( ) const
pure virtualnoexcept
virtual DecodedJoinHashBufferSet HashJoin::toSet ( const ExecutorDeviceType  device_type,
const int  device_id 
) const
pure virtual
virtual std::string HashJoin::toString ( const ExecutorDeviceType  device_type,
const int  device_id = 0,
bool  raw = false 
) const
pure virtual
std::string HashJoin::toStringFlat32 ( const ExecutorDeviceType  device_type,
const int  device_id 
) const
virtual

Definition at line 117 of file HashJoin.cpp.

118  {
119  return toStringFlat<int32_t>(this, device_type, device_id);
120 }
std::string HashJoin::toStringFlat64 ( const ExecutorDeviceType  device_type,
const int  device_id 
) const
virtual

Definition at line 112 of file HashJoin.cpp.

113  {
114  return toStringFlat<int64_t>(this, device_type, device_id);
115 }

Member Data Documentation


The documentation for this class was generated from the following files: