OmniSciDB  a987f07e93
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
HashJoin Class Referenceabstract

#include <HashJoin.h>

+ Inheritance diagram for HashJoin:

Public Member Functions

virtual std::string toString (const ExecutorDeviceType device_type, const int device_id=0, bool raw=false) const =0
 
virtual std::string toStringFlat64 (const ExecutorDeviceType device_type, const int device_id) const
 
virtual std::string toStringFlat32 (const ExecutorDeviceType device_type, const int device_id) const
 
virtual DecodedJoinHashBufferSet toSet (const ExecutorDeviceType device_type, const int device_id) const =0
 
virtual llvm::Value * codegenSlot (const CompilationOptions &, const size_t)=0
 
virtual HashJoinMatchingSet codegenMatchingSet (const CompilationOptions &, const size_t)=0
 
virtual int getInnerTableId () const noexcept=0
 
virtual int getInnerTableRteIdx () const noexcept=0
 
virtual HashType getHashType () const noexcept=0
 
virtual Data_Namespace::MemoryLevel getMemoryLevel () const noexcept=0
 
virtual int getDeviceCount () const noexcept=0
 
virtual size_t offsetBufferOff () const noexcept=0
 
virtual size_t countBufferOff () const noexcept=0
 
virtual size_t payloadBufferOff () const noexcept=0
 
virtual std::string getHashJoinType () const =0
 
virtual bool isBitwiseEq () const =0
 
JoinColumn fetchJoinColumn (const Analyzer::ColumnVar *hash_col, const std::vector< Fragmenter_Namespace::FragmentInfo > &fragment_info, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks_owner, DeviceAllocator *dev_buff_owner, std::vector< std::shared_ptr< void >> &malloc_owner, Executor *executor, ColumnCacheMap *column_cache)
 
HashTablegetHashTableForDevice (const size_t device_id) const
 
size_t getJoinHashBufferSize (const ExecutorDeviceType device_type)
 
size_t getJoinHashBufferSize (const ExecutorDeviceType device_type, const int device_id) const
 
int8_t * getJoinHashBuffer (const ExecutorDeviceType device_type, const int device_id) const
 
void freeHashBufferMemory ()
 

Static Public Member Functions

static bool layoutRequiresAdditionalBuffers (HashType layout) noexcept
 
static std::string getHashTypeString (HashType ht) noexcept
 
static HashJoinMatchingSet codegenMatchingSet (const std::vector< llvm::Value * > &hash_join_idx_args_in, const bool is_sharded, const bool col_is_nullable, const bool is_bw_eq, const int64_t sub_buff_size, Executor *executor, const bool is_bucketized=false)
 
static llvm::Value * codegenHashTableLoad (const size_t table_idx, Executor *executor)
 
static std::shared_ptr< HashJoingetInstance (const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)
 Make hash table from an in-flight SQL query's parse tree etc. More...
 
static std::shared_ptr< HashJoingetSyntheticInstance (std::string_view table1, std::string_view column1, std::string_view table2, std::string_view column2, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
 Make hash table from named tables and columns (such as for testing). More...
 
static std::shared_ptr< HashJoingetSyntheticInstance (const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
 Make hash table from named tables and columns (such as for testing). More...
 
static std::pair< std::string,
std::shared_ptr< HashJoin > > 
getSyntheticInstance (std::vector< std::shared_ptr< Analyzer::BinOper >>, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
 
static int getInnerTableId (const std::vector< InnerOuter > &inner_outer_pairs)
 
static bool canAccessHashTable (bool allow_hash_table_recycling, bool invalid_cache_key, JoinType join_type)
 
static void checkHashJoinReplicationConstraint (const int table_id, const size_t shard_count, const Executor *executor)
 
static std::pair< InnerOuter,
InnerOuterStringOpInfos
normalizeColumnPair (const Analyzer::Expr *lhs, const Analyzer::Expr *rhs, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables, const bool is_overlaps_join=false)
 
template<typename T >
static const T * getHashJoinColumn (const Analyzer::Expr *expr)
 
static std::pair< std::vector
< InnerOuter >, std::vector
< InnerOuterStringOpInfos > > 
normalizeColumnPairs (const Analyzer::BinOper *condition, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables)
 
static std::vector< int > collectFragmentIds (const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments)
 
static CompositeKeyInfo getCompositeKeyInfo (const std::vector< InnerOuter > &inner_outer_pairs, const Executor *executor, const std::vector< InnerOuterStringOpInfos > &inner_outer_string_op_infos_pairs={})
 
static std::vector< const
StringDictionaryProxy::IdMap * > 
translateCompositeStrDictProxies (const CompositeKeyInfo &composite_key_info, const std::vector< InnerOuterStringOpInfos > &string_op_infos_for_keys, const Executor *executor)
 
static std::pair< const
StringDictionaryProxy
*, StringDictionaryProxy * > 
getStrDictProxies (const InnerOuter &cols, const Executor *executor, const bool has_string_ops)
 
static const
StringDictionaryProxy::IdMap
translateInnerToOuterStrDictProxies (const InnerOuter &cols, const InnerOuterStringOpInfos &inner_outer_string_op_infos, ExpressionRange &old_col_range, const Executor *executor)
 

Protected Member Functions

virtual size_t getComponentBufferSize () const noexcept=0
 

Static Protected Member Functions

static llvm::Value * codegenColOrStringOper (const Analyzer::Expr *col_or_string_oper, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos, CodeGenerator &code_generator, const CompilationOptions &co)
 

Protected Attributes

std::vector< std::shared_ptr
< HashTable > > 
hash_tables_for_device_
 

Detailed Description

Definition at line 134 of file HashJoin.h.

Member Function Documentation

bool HashJoin::canAccessHashTable ( bool  allow_hash_table_recycling,
bool  invalid_cache_key,
JoinType  join_type 
)
static

Definition at line 1033 of file HashJoin.cpp.

References g_enable_data_recycler, g_use_hashtable_cache, and INVALID.

Referenced by PerfectJoinHashTable::reify(), and BaselineJoinHashTable::reifyWithLayout().

1035  {
1036  return g_enable_data_recycler && g_use_hashtable_cache && !invalid_cache_key &&
1037  allow_hash_table_recycling && join_type != JoinType::INVALID;
1038 }
bool g_enable_data_recycler
Definition: Execute.cpp:146
bool g_use_hashtable_cache
Definition: Execute.cpp:147

+ Here is the caller graph for this function:

void HashJoin::checkHashJoinReplicationConstraint ( const int  table_id,
const size_t  shard_count,
const Executor executor 
)
static

Definition at line 776 of file HashJoin.cpp.

References CHECK, g_cluster, and table_is_replicated().

Referenced by RangeJoinHashTable::getInstance(), BaselineJoinHashTable::reify(), and PerfectJoinHashTable::reify().

778  {
779  if (!g_cluster) {
780  return;
781  }
782  if (table_id >= 0) {
783  CHECK(executor);
784  const auto inner_td = executor->getCatalog()->getMetadataForTable(table_id);
785  CHECK(inner_td);
786  if (!shard_count && !table_is_replicated(inner_td)) {
787  throw TableMustBeReplicated(inner_td->tableName);
788  }
789  }
790 }
bool table_is_replicated(const TableDescriptor *td)
#define CHECK(condition)
Definition: Logger.h:289
bool g_cluster

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * HashJoin::codegenColOrStringOper ( const Analyzer::Expr col_or_string_oper,
const std::vector< StringOps_Namespace::StringOpInfo > &  string_op_infos,
CodeGenerator code_generator,
const CompilationOptions co 
)
staticprotected

Definition at line 544 of file HashJoin.cpp.

References CHECK, CodeGenerator::codegen(), and CodeGenerator::codegenPseudoStringOper().

Referenced by BaselineJoinHashTable::codegenKey(), PerfectJoinHashTable::codegenMatchingSet(), and PerfectJoinHashTable::codegenSlot().

548  {
549  if (!string_op_infos.empty()) {
550  const auto coerced_col_var =
551  dynamic_cast<const Analyzer::ColumnVar*>(col_or_string_oper);
552  CHECK(coerced_col_var);
553  std::vector<llvm::Value*> codegen_val_vec{
554  code_generator.codegenPseudoStringOper(coerced_col_var, string_op_infos, co)};
555  return codegen_val_vec[0];
556  }
557  return code_generator.codegen(col_or_string_oper, true, co)[0];
558 }
llvm::Value * codegenPseudoStringOper(const Analyzer::ColumnVar *, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos, const CompilationOptions &)
std::vector< llvm::Value * > codegen(const Analyzer::Expr *, const bool fetch_columns, const CompilationOptions &)
Definition: IRCodegen.cpp:30
#define CHECK(condition)
Definition: Logger.h:289

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * HashJoin::codegenHashTableLoad ( const size_t  table_idx,
Executor executor 
)
static

Definition at line 257 of file HashJoin.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CHECK_LT, and get_arg_by_name().

Referenced by PerfectJoinHashTable::codegenHashTableLoad(), BaselineJoinHashTable::codegenMatchingSet(), OverlapsJoinHashTable::codegenMatchingSet(), RangeJoinHashTable::codegenMatchingSetWithOffset(), and BaselineJoinHashTable::hashPtr().

257  {
258  AUTOMATIC_IR_METADATA(executor->cgen_state_.get());
259  llvm::Value* hash_ptr = nullptr;
260  const auto total_table_count =
261  executor->plan_state_->join_info_.join_hash_tables_.size();
262  CHECK_LT(table_idx, total_table_count);
263  if (total_table_count > 1) {
264  auto hash_tables_ptr =
265  get_arg_by_name(executor->cgen_state_->row_func_, "join_hash_tables");
266  auto hash_pptr =
267  table_idx > 0
268  ? executor->cgen_state_->ir_builder_.CreateGEP(
269  hash_tables_ptr->getType()->getScalarType()->getPointerElementType(),
270  hash_tables_ptr,
271  executor->cgen_state_->llInt(static_cast<int64_t>(table_idx)))
272  : hash_tables_ptr;
273  hash_ptr = executor->cgen_state_->ir_builder_.CreateLoad(
274  hash_pptr->getType()->getPointerElementType(), hash_pptr);
275  } else {
276  hash_ptr = get_arg_by_name(executor->cgen_state_->row_func_, "join_hash_tables");
277  }
278  CHECK(hash_ptr);
279  return hash_ptr;
280 }
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:166
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK_LT(x, y)
Definition: Logger.h:299
#define CHECK(condition)
Definition: Logger.h:289

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

virtual HashJoinMatchingSet HashJoin::codegenMatchingSet ( const CompilationOptions ,
const size_t   
)
pure virtual

Implemented in OverlapsJoinHashTable, BaselineJoinHashTable, and PerfectJoinHashTable.

Referenced by PerfectJoinHashTable::codegenMatchingSet(), BaselineJoinHashTable::codegenMatchingSet(), OverlapsJoinHashTable::codegenMatchingSet(), and RangeJoinHashTable::codegenMatchingSetWithOffset().

+ Here is the caller graph for this function:

HashJoinMatchingSet HashJoin::codegenMatchingSet ( const std::vector< llvm::Value * > &  hash_join_idx_args_in,
const bool  is_sharded,
const bool  col_is_nullable,
const bool  is_bw_eq,
const int64_t  sub_buff_size,
Executor executor,
const bool  is_bucketized = false 
)
static

Definition at line 206 of file HashJoin.cpp.

References AUTOMATIC_IR_METADATA, and CHECK.

213  {
214  AUTOMATIC_IR_METADATA(executor->cgen_state_.get());
215  using namespace std::string_literals;
216 
217  std::string fname(is_bucketized ? "bucketized_hash_join_idx"s : "hash_join_idx"s);
218 
219  if (is_bw_eq) {
220  fname += "_bitwise";
221  }
222  if (is_sharded) {
223  fname += "_sharded";
224  }
225  if (!is_bw_eq && col_is_nullable) {
226  fname += "_nullable";
227  }
228 
229  const auto slot_lv = executor->cgen_state_->emitCall(fname, hash_join_idx_args_in);
230  const auto slot_valid_lv = executor->cgen_state_->ir_builder_.CreateICmpSGE(
231  slot_lv, executor->cgen_state_->llInt(int64_t(0)));
232 
233  auto pos_ptr = hash_join_idx_args_in[0];
234  CHECK(pos_ptr);
235 
236  auto count_ptr = executor->cgen_state_->ir_builder_.CreateAdd(
237  pos_ptr, executor->cgen_state_->llInt(sub_buff_size));
238  auto hash_join_idx_args = hash_join_idx_args_in;
239  hash_join_idx_args[0] = executor->cgen_state_->ir_builder_.CreatePtrToInt(
240  count_ptr, llvm::Type::getInt64Ty(executor->cgen_state_->context_));
241 
242  const auto row_count_lv = executor->cgen_state_->ir_builder_.CreateSelect(
243  slot_valid_lv,
244  executor->cgen_state_->emitCall(fname, hash_join_idx_args),
245  executor->cgen_state_->llInt(int64_t(0)));
246  auto rowid_base_i32 = executor->cgen_state_->ir_builder_.CreateIntToPtr(
247  executor->cgen_state_->ir_builder_.CreateAdd(
248  pos_ptr, executor->cgen_state_->llInt(2 * sub_buff_size)),
249  llvm::Type::getInt32PtrTy(executor->cgen_state_->context_));
250  auto rowid_ptr_i32 = executor->cgen_state_->ir_builder_.CreateGEP(
251  rowid_base_i32->getType()->getScalarType()->getPointerElementType(),
252  rowid_base_i32,
253  slot_lv);
254  return {rowid_ptr_i32, row_count_lv, slot_lv};
255 }
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK(condition)
Definition: Logger.h:289
virtual llvm::Value* HashJoin::codegenSlot ( const CompilationOptions ,
const size_t   
)
pure virtual
std::vector< int > HashJoin::collectFragmentIds ( const std::vector< Fragmenter_Namespace::FragmentInfo > &  fragments)
static

Definition at line 451 of file HashJoin.cpp.

References gpu_enabled::sort(), and shared::transform().

Referenced by OverlapsJoinHashTable::generateCacheKey(), HashtableRecycler::getHashtableAccessPathInfo(), RangeJoinHashTable::reifyWithLayout(), and OverlapsJoinHashTable::reifyWithLayout().

452  {
453  auto const fragment_id = [](auto const& frag_info) { return frag_info.fragmentId; };
454  std::vector<int> frag_ids(fragments.size());
455  std::transform(fragments.cbegin(), fragments.cend(), frag_ids.begin(), fragment_id);
456  std::sort(frag_ids.begin(), frag_ids.end());
457  return frag_ids;
458 }
DEVICE void sort(ARGS &&...args)
Definition: gpu_enabled.h:105
OUTPUT transform(INPUT const &input, FUNC const &func)
Definition: misc.h:320

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

virtual size_t HashJoin::countBufferOff ( ) const
pure virtualnoexcept
JoinColumn HashJoin::fetchJoinColumn ( const Analyzer::ColumnVar hash_col,
const std::vector< Fragmenter_Namespace::FragmentInfo > &  fragment_info,
const Data_Namespace::MemoryLevel  effective_memory_level,
const int  device_id,
std::vector< std::shared_ptr< Chunk_NS::Chunk >> &  chunks_owner,
DeviceAllocator dev_buff_owner,
std::vector< std::shared_ptr< void >> &  malloc_owner,
Executor executor,
ColumnCacheMap column_cache 
)

fetchJoinColumn() calls ColumnFetcher::makeJoinColumn(), then copies the JoinColumn's col_chunks_buff memory onto the GPU if required by the effective_memory_level parameter. The dev_buff_owner parameter will manage the GPU memory.

Definition at line 58 of file HashJoin.cpp.

References Allocator::alloc(), CHECK, JoinColumn::col_chunks_buff, JoinColumn::col_chunks_buff_sz, DeviceAllocator::copyToDevice(), Data_Namespace::GPU_LEVEL, and ColumnFetcher::makeJoinColumn().

Referenced by OverlapsJoinHashTable::fetchColumnsForDevice(), PerfectJoinHashTable::fetchColumnsForDevice(), and BaselineJoinHashTable::fetchColumnsForDevice().

67  {
68  static std::mutex fragment_fetch_mutex;
69  std::lock_guard<std::mutex> fragment_fetch_lock(fragment_fetch_mutex);
70  try {
71  JoinColumn join_column = ColumnFetcher::makeJoinColumn(executor,
72  *hash_col,
73  fragment_info,
74  effective_memory_level,
75  device_id,
76  dev_buff_owner,
77  /*thread_idx=*/0,
78  chunks_owner,
79  malloc_owner,
80  *column_cache);
81  if (effective_memory_level == Data_Namespace::GPU_LEVEL) {
82  CHECK(dev_buff_owner);
83  auto device_col_chunks_buff = dev_buff_owner->alloc(join_column.col_chunks_buff_sz);
84  dev_buff_owner->copyToDevice(device_col_chunks_buff,
85  join_column.col_chunks_buff,
86  join_column.col_chunks_buff_sz);
87  join_column.col_chunks_buff = device_col_chunks_buff;
88  }
89  return join_column;
90  } catch (...) {
91  throw FailedToFetchColumn();
92  }
93 }
static JoinColumn makeJoinColumn(Executor *executor, const Analyzer::ColumnVar &hash_col, const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const Data_Namespace::MemoryLevel effective_mem_lvl, const int device_id, DeviceAllocator *device_allocator, const size_t thread_idx, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks_owner, std::vector< std::shared_ptr< void >> &malloc_owner, ColumnCacheMap &column_cache)
Creates a JoinColumn struct containing an array of JoinChunk structs.
virtual int8_t * alloc(const size_t num_bytes)=0
virtual void copyToDevice(void *device_dst, const void *host_src, const size_t num_bytes) const =0
size_t col_chunks_buff_sz
const int8_t * col_chunks_buff
#define CHECK(condition)
Definition: Logger.h:289

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void HashJoin::freeHashBufferMemory ( )
inline

Definition at line 321 of file HashJoin.h.

References hash_tables_for_device_.

Referenced by BaselineJoinHashTable::reify(), and PerfectJoinHashTable::reify().

321  {
322  auto empty_hash_tables =
324  hash_tables_for_device_.swap(empty_hash_tables);
325  }
std::vector< std::shared_ptr< HashTable > > hash_tables_for_device_
Definition: HashJoin.h:361

+ Here is the caller graph for this function:

virtual size_t HashJoin::getComponentBufferSize ( ) const
protectedpure virtualnoexcept
CompositeKeyInfo HashJoin::getCompositeKeyInfo ( const std::vector< InnerOuter > &  inner_outer_pairs,
const Executor executor,
const std::vector< InnerOuterStringOpInfos > &  inner_outer_string_op_infos_pairs = {} 
)
static

Definition at line 460 of file HashJoin.cpp.

References CHECK, CHECK_EQ, and kENCODING_DICT.

Referenced by RangeJoinHashTable::approximateTupleCount(), BaselineJoinHashTable::initHashTableForDevice(), RangeJoinHashTable::initHashTableOnCpu(), OverlapsJoinHashTable::reify(), BaselineJoinHashTable::reify(), and BaselineJoinHashTable::reifyWithLayout().

463  {
464  CHECK(executor);
465  std::vector<const void*> sd_inner_proxy_per_key;
466  std::vector<void*> sd_outer_proxy_per_key;
467  std::vector<ChunkKey> cache_key_chunks; // used for the cache key
468  const auto db_id = executor->getCatalog()->getCurrentDB().dbId;
469  const bool has_string_op_infos = inner_outer_string_op_infos_pairs.size();
470  if (has_string_op_infos) {
471  CHECK_EQ(inner_outer_pairs.size(), inner_outer_string_op_infos_pairs.size());
472  }
473  size_t string_op_info_pairs_idx = 0;
474  for (const auto& inner_outer_pair : inner_outer_pairs) {
475  const auto inner_col = inner_outer_pair.first;
476  const auto outer_col = inner_outer_pair.second;
477  const auto& inner_ti = inner_col->get_type_info();
478  const auto& outer_ti = outer_col->get_type_info();
479  ChunkKey cache_key_chunks_for_column{
480  db_id, inner_col->get_table_id(), inner_col->get_column_id()};
481  if (inner_ti.is_string() &&
482  (!(inner_ti.get_comp_param() == outer_ti.get_comp_param()) ||
483  (has_string_op_infos &&
484  (inner_outer_string_op_infos_pairs[string_op_info_pairs_idx].first.size() ||
485  inner_outer_string_op_infos_pairs[string_op_info_pairs_idx].second.size())))) {
486  CHECK(outer_ti.is_string());
487  CHECK(inner_ti.get_compression() == kENCODING_DICT &&
488  outer_ti.get_compression() == kENCODING_DICT);
489  const auto sd_inner_proxy = executor->getStringDictionaryProxy(
490  inner_ti.get_comp_param(), executor->getRowSetMemoryOwner(), true);
491  auto sd_outer_proxy = executor->getStringDictionaryProxy(
492  outer_ti.get_comp_param(), executor->getRowSetMemoryOwner(), true);
493  CHECK(sd_inner_proxy && sd_outer_proxy);
494  sd_inner_proxy_per_key.push_back(sd_inner_proxy);
495  sd_outer_proxy_per_key.push_back(sd_outer_proxy);
496  cache_key_chunks_for_column.push_back(sd_outer_proxy->getGeneration());
497  } else {
498  sd_inner_proxy_per_key.emplace_back();
499  sd_outer_proxy_per_key.emplace_back();
500  }
501  cache_key_chunks.push_back(cache_key_chunks_for_column);
502  string_op_info_pairs_idx++;
503  }
504  return {sd_inner_proxy_per_key, sd_outer_proxy_per_key, cache_key_chunks};
505 }
#define CHECK_EQ(x, y)
Definition: Logger.h:297
std::vector< int > ChunkKey
Definition: types.h:36
#define CHECK(condition)
Definition: Logger.h:289

+ Here is the caller graph for this function:

virtual int HashJoin::getDeviceCount ( ) const
pure virtualnoexcept
template<typename T >
const T * HashJoin::getHashJoinColumn ( const Analyzer::Expr expr)
static

Definition at line 793 of file HashJoin.cpp.

References CHECK, and heavydb.dtypes::T.

793  {
794  auto* target_expr = expr;
795  if (auto cast_expr = dynamic_cast<const Analyzer::UOper*>(expr)) {
796  target_expr = cast_expr->get_operand();
797  }
798  CHECK(target_expr);
799  return dynamic_cast<const T*>(target_expr);
800 }
#define CHECK(condition)
Definition: Logger.h:289
virtual std::string HashJoin::getHashJoinType ( ) const
pure virtual
HashTable* HashJoin::getHashTableForDevice ( const size_t  device_id) const
inline

Definition at line 279 of file HashJoin.h.

References CHECK_LT, and hash_tables_for_device_.

Referenced by OverlapsJoinHashTable::codegenManyKey(), BaselineJoinHashTable::codegenMatchingSet(), BaselineJoinHashTable::codegenSlot(), BaselineJoinHashTable::getComponentBufferSize(), OverlapsJoinHashTable::getEmittedKeysCount(), OverlapsJoinHashTable::getEntryCount(), BaselineJoinHashTable::getHashType(), OverlapsJoinHashTable::getHashType(), getJoinHashBufferSize(), BaselineJoinHashTable::getKeyBufferSize(), BaselineJoinHashTable::toSet(), and OverlapsJoinHashTable::toSet().

279  {
280  CHECK_LT(device_id, hash_tables_for_device_.size());
281  return hash_tables_for_device_[device_id].get();
282  }
std::vector< std::shared_ptr< HashTable > > hash_tables_for_device_
Definition: HashJoin.h:361
#define CHECK_LT(x, y)
Definition: Logger.h:299

+ Here is the caller graph for this function:

virtual HashType HashJoin::getHashType ( ) const
pure virtualnoexcept
static std::string HashJoin::getHashTypeString ( HashType  ht)
inlinestaticnoexcept

Definition at line 164 of file HashJoin.h.

Referenced by BaselineJoinHashTableBuilder::allocateDeviceMemory(), BaselineJoinHashTable::getInstance(), PerfectJoinHashTable::getInstance(), OverlapsJoinHashTable::getInstance(), BaselineJoinHashTableBuilder::initHashTableOnCpu(), RangeJoinHashTable::reifyWithLayout(), OverlapsJoinHashTable::reifyWithLayout(), HashingSchemeRecycler::toString(), PerfectJoinHashTable::toString(), BaselineJoinHashTable::toString(), and OverlapsJoinHashTable::toString().

164  {
165  const char* HashTypeStrings[3] = {"OneToOne", "OneToMany", "ManyToMany"};
166  return HashTypeStrings[static_cast<int>(ht)];
167  };

+ Here is the caller graph for this function:

virtual int HashJoin::getInnerTableId ( ) const
pure virtualnoexcept

Implemented in OverlapsJoinHashTable, BaselineJoinHashTable, and PerfectJoinHashTable.

Referenced by OverlapsJoinHashTable::getInnerTableId(), RangeJoinHashTable::getInstance(), OverlapsJoinHashTable::getInstance(), RangeJoinHashTable::reifyWithLayout(), and OverlapsJoinHashTable::reifyWithLayout().

+ Here is the caller graph for this function:

static int HashJoin::getInnerTableId ( const std::vector< InnerOuter > &  inner_outer_pairs)
inlinestatic

Definition at line 248 of file HashJoin.h.

References CHECK.

248  {
249  CHECK(!inner_outer_pairs.empty());
250  const auto first_inner_col = inner_outer_pairs.front().first;
251  return first_inner_col->get_table_id();
252  }
#define CHECK(condition)
Definition: Logger.h:289
virtual int HashJoin::getInnerTableRteIdx ( ) const
pure virtualnoexcept
std::shared_ptr< HashJoin > HashJoin::getInstance ( const std::shared_ptr< Analyzer::BinOper qual_bin_oper,
const std::vector< InputTableInfo > &  query_infos,
const Data_Namespace::MemoryLevel  memory_level,
const JoinType  join_type,
const HashType  preferred_hash_type,
const int  device_count,
ColumnCacheMap column_cache,
Executor executor,
const HashTableBuildDagMap hashtable_build_dag_map,
const RegisteredQueryHint query_hint,
const TableIdToNodeMap table_id_to_node_map 
)
static

Make hash table from an in-flight SQL query's parse tree etc.

Definition at line 283 of file HashJoin.cpp.

References CHECK, CHECK_EQ, CHECK_GT, coalesce_singleton_equi_join(), CPU, DEBUG_TIMER, g_enable_overlaps_hashjoin, BaselineJoinHashTable::getInstance(), PerfectJoinHashTable::getInstance(), OverlapsJoinHashTable::getInstance(), GPU, Data_Namespace::GPU_LEVEL, VLOG, and VLOGGING.

Referenced by Executor::buildHashTableForQualifier(), and getSyntheticInstance().

294  {
295  auto timer = DEBUG_TIMER(__func__);
296  std::shared_ptr<HashJoin> join_hash_table;
297  CHECK_GT(device_count, 0);
298  if (!g_enable_overlaps_hashjoin && qual_bin_oper->is_overlaps_oper()) {
299  throw std::runtime_error(
300  "Overlaps hash join disabled, attempting to fall back to loop join");
301  }
302  if (qual_bin_oper->is_overlaps_oper()) {
303  VLOG(1) << "Trying to build geo hash table:";
304  join_hash_table = OverlapsJoinHashTable::getInstance(qual_bin_oper,
305  query_infos,
306  memory_level,
307  join_type,
308  device_count,
309  column_cache,
310  executor,
311  hashtable_build_dag_map,
312  query_hint,
313  table_id_to_node_map);
314  } else if (dynamic_cast<const Analyzer::ExpressionTuple*>(
315  qual_bin_oper->get_left_operand())) {
316  VLOG(1) << "Trying to build keyed hash table:";
317  join_hash_table = BaselineJoinHashTable::getInstance(qual_bin_oper,
318  query_infos,
319  memory_level,
320  join_type,
321  preferred_hash_type,
322  device_count,
323  column_cache,
324  executor,
325  hashtable_build_dag_map,
326  query_hint,
327  table_id_to_node_map);
328  } else {
329  try {
330  VLOG(1) << "Trying to build perfect hash table:";
331  join_hash_table = PerfectJoinHashTable::getInstance(qual_bin_oper,
332  query_infos,
333  memory_level,
334  join_type,
335  preferred_hash_type,
336  device_count,
337  column_cache,
338  executor,
339  hashtable_build_dag_map,
340  query_hint,
341  table_id_to_node_map);
342  } catch (JoinHashTableTooBig& e) {
343  throw e;
344  } catch (TooManyHashEntries&) {
345  const auto join_quals = coalesce_singleton_equi_join(qual_bin_oper);
346  CHECK_EQ(join_quals.size(), size_t(1));
347  const auto join_qual =
348  std::dynamic_pointer_cast<Analyzer::BinOper>(join_quals.front());
349  VLOG(1) << "Trying to build keyed hash table after perfect hash table:";
350  join_hash_table = BaselineJoinHashTable::getInstance(join_qual,
351  query_infos,
352  memory_level,
353  join_type,
354  preferred_hash_type,
355  device_count,
356  column_cache,
357  executor,
358  hashtable_build_dag_map,
359  query_hint,
360  table_id_to_node_map);
361  }
362  }
363  CHECK(join_hash_table);
364  if (VLOGGING(2)) {
365  if (join_hash_table->getMemoryLevel() == Data_Namespace::MemoryLevel::GPU_LEVEL) {
366  for (int device_id = 0; device_id < join_hash_table->getDeviceCount();
367  ++device_id) {
368  if (join_hash_table->getJoinHashBufferSize(ExecutorDeviceType::GPU, device_id) <=
369  1000) {
370  VLOG(2) << "Built GPU hash table: "
371  << join_hash_table->toString(ExecutorDeviceType::GPU, device_id);
372  }
373  }
374  } else {
375  if (join_hash_table->getJoinHashBufferSize(ExecutorDeviceType::CPU) <= 1000) {
376  VLOG(2) << "Built CPU hash table: "
377  << join_hash_table->toString(ExecutorDeviceType::CPU);
378  }
379  }
380  }
381  return join_hash_table;
382 }
#define CHECK_EQ(x, y)
Definition: Logger.h:297
std::list< std::shared_ptr< Analyzer::Expr > > coalesce_singleton_equi_join(const std::shared_ptr< Analyzer::BinOper > &join_qual)
static std::shared_ptr< OverlapsJoinHashTable > getInstance(const std::shared_ptr< Analyzer::BinOper > condition, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)
Make hash table from an in-flight SQL query&#39;s parse tree etc.
#define CHECK_GT(x, y)
Definition: Logger.h:301
bool g_enable_overlaps_hashjoin
Definition: Execute.cpp:102
#define VLOGGING(n)
Definition: Logger.h:287
static std::shared_ptr< BaselineJoinHashTable > getInstance(const std::shared_ptr< Analyzer::BinOper > condition, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hints, const TableIdToNodeMap &table_id_to_node_map)
Make hash table from an in-flight SQL query&#39;s parse tree etc.
static std::shared_ptr< PerfectJoinHashTable > getInstance(const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hints, const TableIdToNodeMap &table_id_to_node_map)
Make hash table from an in-flight SQL query&#39;s parse tree etc.
#define CHECK(condition)
Definition: Logger.h:289
#define DEBUG_TIMER(name)
Definition: Logger.h:407
#define VLOG(n)
Definition: Logger.h:383

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int8_t* HashJoin::getJoinHashBuffer ( const ExecutorDeviceType  device_type,
const int  device_id 
) const
inline

Definition at line 298 of file HashJoin.h.

References CHECK, CHECK_LT, CPU, and hash_tables_for_device_.

Referenced by PerfectJoinHashTable::toSet(), BaselineJoinHashTable::toSet(), OverlapsJoinHashTable::toSet(), PerfectJoinHashTable::toString(), BaselineJoinHashTable::toString(), OverlapsJoinHashTable::toString(), and anonymous_namespace{HashJoin.cpp}::toStringFlat().

299  {
300  // TODO: just make device_id a size_t
301  CHECK_LT(size_t(device_id), hash_tables_for_device_.size());
302  if (!hash_tables_for_device_[device_id]) {
303  return nullptr;
304  }
305  CHECK(hash_tables_for_device_[device_id]);
306  auto hash_table = hash_tables_for_device_[device_id].get();
307 #ifdef HAVE_CUDA
308  if (device_type == ExecutorDeviceType::CPU) {
309  return hash_table->getCpuBuffer();
310  } else {
311  CHECK(hash_table);
312  const auto gpu_buff = hash_table->getGpuBuffer();
313  return gpu_buff;
314  }
315 #else
316  CHECK(device_type == ExecutorDeviceType::CPU);
317  return hash_table->getCpuBuffer();
318 #endif
319  }
std::vector< std::shared_ptr< HashTable > > hash_tables_for_device_
Definition: HashJoin.h:361
#define CHECK_LT(x, y)
Definition: Logger.h:299
#define CHECK(condition)
Definition: Logger.h:289

+ Here is the caller graph for this function:

size_t HashJoin::getJoinHashBufferSize ( const ExecutorDeviceType  device_type)
inline

Definition at line 284 of file HashJoin.h.

References CHECK, and CPU.

Referenced by PerfectJoinHashTable::toSet(), PerfectJoinHashTable::toString(), and anonymous_namespace{HashJoin.cpp}::toStringFlat().

284  {
285  CHECK(device_type == ExecutorDeviceType::CPU);
286  return getJoinHashBufferSize(device_type, 0);
287  }
size_t getJoinHashBufferSize(const ExecutorDeviceType device_type)
Definition: HashJoin.h:284
#define CHECK(condition)
Definition: Logger.h:289

+ Here is the caller graph for this function:

size_t HashJoin::getJoinHashBufferSize ( const ExecutorDeviceType  device_type,
const int  device_id 
) const
inline

Definition at line 289 of file HashJoin.h.

References getHashTableForDevice().

290  {
291  auto hash_table = getHashTableForDevice(device_id);
292  if (!hash_table) {
293  return 0;
294  }
295  return hash_table->getHashTableBufferSize(device_type);
296  }
HashTable * getHashTableForDevice(const size_t device_id) const
Definition: HashJoin.h:279

+ Here is the call graph for this function:

virtual Data_Namespace::MemoryLevel HashJoin::getMemoryLevel ( ) const
pure virtualnoexcept
std::pair< const StringDictionaryProxy *, StringDictionaryProxy * > HashJoin::getStrDictProxies ( const InnerOuter cols,
const Executor executor,
const bool  has_string_ops 
)
static

Definition at line 385 of file HashJoin.cpp.

References CHECK.

Referenced by translateInnerToOuterStrDictProxies().

387  {
388  const auto inner_col = cols.first;
389  CHECK(inner_col);
390  const auto inner_ti = inner_col->get_type_info();
391  const auto outer_col = dynamic_cast<const Analyzer::ColumnVar*>(cols.second);
392  std::pair<const StringDictionaryProxy*, StringDictionaryProxy*>
393  inner_outer_str_dict_proxies{nullptr, nullptr};
394  if (inner_ti.is_string() && outer_col) {
395  CHECK(outer_col->get_type_info().is_string());
396  inner_outer_str_dict_proxies.first =
397  executor->getStringDictionaryProxy(inner_col->get_comp_param(), true);
398  CHECK(inner_outer_str_dict_proxies.first);
399  inner_outer_str_dict_proxies.second =
400  executor->getStringDictionaryProxy(outer_col->get_comp_param(), true);
401  CHECK(inner_outer_str_dict_proxies.second);
402  if (!has_string_ops &&
403  *inner_outer_str_dict_proxies.first == *inner_outer_str_dict_proxies.second) {
404  // Dictionaries are the same - don't need to translate
405  CHECK(inner_col->get_comp_param() == outer_col->get_comp_param());
406  inner_outer_str_dict_proxies.first = nullptr;
407  inner_outer_str_dict_proxies.second = nullptr;
408  }
409  }
410  return inner_outer_str_dict_proxies;
411 }
#define CHECK(condition)
Definition: Logger.h:289

+ Here is the caller graph for this function:

std::shared_ptr< HashJoin > HashJoin::getSyntheticInstance ( std::string_view  table1,
std::string_view  column1,
std::string_view  table2,
std::string_view  column2,
const Data_Namespace::MemoryLevel  memory_level,
const HashType  preferred_hash_type,
const int  device_count,
ColumnCacheMap column_cache,
Executor executor 
)
static

Make hash table from named tables and columns (such as for testing).

Definition at line 672 of file HashJoin.cpp.

References RegisteredQueryHint::defaults(), getInstance(), getSyntheticColumnVar(), getSyntheticInputTableInfo(), INNER, kBOOLEAN, kEQ, kONE, setupSyntheticCaching(), and ScalarExprVisitor< T >::visit().

681  {
682  auto a1 = getSyntheticColumnVar(table1, column1, 0, executor);
683  auto a2 = getSyntheticColumnVar(table2, column2, 1, executor);
684 
685  auto qual_bin_oper = std::make_shared<Analyzer::BinOper>(kBOOLEAN, kEQ, kONE, a1, a2);
686 
687  std::set<const Analyzer::ColumnVar*> cvs =
688  AllColumnVarsVisitor().visit(qual_bin_oper.get());
689  auto query_infos = getSyntheticInputTableInfo(cvs, executor);
690  setupSyntheticCaching(cvs, executor);
692 
693  auto hash_table = HashJoin::getInstance(qual_bin_oper,
694  query_infos,
695  memory_level,
697  preferred_hash_type,
698  device_count,
699  column_cache,
700  executor,
701  {},
702  query_hint,
703  {});
704  return hash_table;
705 }
std::vector< InputTableInfo > getSyntheticInputTableInfo(std::set< const Analyzer::ColumnVar * > cvs, Executor *executor)
Definition: HashJoin.cpp:645
Definition: sqldefs.h:29
T visit(const Analyzer::Expr *expr) const
void setupSyntheticCaching(std::set< const Analyzer::ColumnVar * > cvs, Executor *executor)
Definition: HashJoin.cpp:631
static RegisteredQueryHint defaults()
Definition: QueryHint.h:329
Definition: sqldefs.h:71
static std::shared_ptr< HashJoin > getInstance(const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)
Make hash table from an in-flight SQL query&#39;s parse tree etc.
Definition: HashJoin.cpp:283
std::shared_ptr< Analyzer::ColumnVar > getSyntheticColumnVar(std::string_view table, std::string_view column, int rte_idx, Executor *executor)
Definition: HashJoin.cpp:560

+ Here is the call graph for this function:

std::shared_ptr< HashJoin > HashJoin::getSyntheticInstance ( const std::shared_ptr< Analyzer::BinOper qual_bin_oper,
const Data_Namespace::MemoryLevel  memory_level,
const HashType  preferred_hash_type,
const int  device_count,
ColumnCacheMap column_cache,
Executor executor 
)
static

Make hash table from named tables and columns (such as for testing).

Definition at line 708 of file HashJoin.cpp.

References RegisteredQueryHint::defaults(), getInstance(), getSyntheticInputTableInfo(), INNER, setupSyntheticCaching(), and ScalarExprVisitor< T >::visit().

714  {
715  std::set<const Analyzer::ColumnVar*> cvs =
716  AllColumnVarsVisitor().visit(qual_bin_oper.get());
717  auto query_infos = getSyntheticInputTableInfo(cvs, executor);
718  setupSyntheticCaching(cvs, executor);
720 
721  auto hash_table = HashJoin::getInstance(qual_bin_oper,
722  query_infos,
723  memory_level,
725  preferred_hash_type,
726  device_count,
727  column_cache,
728  executor,
729  {},
730  query_hint,
731  {});
732  return hash_table;
733 }
std::vector< InputTableInfo > getSyntheticInputTableInfo(std::set< const Analyzer::ColumnVar * > cvs, Executor *executor)
Definition: HashJoin.cpp:645
T visit(const Analyzer::Expr *expr) const
void setupSyntheticCaching(std::set< const Analyzer::ColumnVar * > cvs, Executor *executor)
Definition: HashJoin.cpp:631
static RegisteredQueryHint defaults()
Definition: QueryHint.h:329
static std::shared_ptr< HashJoin > getInstance(const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)
Make hash table from an in-flight SQL query&#39;s parse tree etc.
Definition: HashJoin.cpp:283

+ Here is the call graph for this function:

std::pair< std::string, std::shared_ptr< HashJoin > > HashJoin::getSyntheticInstance ( std::vector< std::shared_ptr< Analyzer::BinOper >>  qual_bin_opers,
const Data_Namespace::MemoryLevel  memory_level,
const HashType  preferred_hash_type,
const int  device_count,
ColumnCacheMap column_cache,
Executor executor 
)
static

Definition at line 735 of file HashJoin.cpp.

References RegisteredQueryHint::defaults(), getInstance(), getSyntheticInputTableInfo(), INNER, setupSyntheticCaching(), and ScalarExprVisitor< T >::visit().

741  {
742  std::set<const Analyzer::ColumnVar*> cvs;
743  for (auto& qual : qual_bin_opers) {
744  auto cv = AllColumnVarsVisitor().visit(qual.get());
745  cvs.insert(cv.begin(), cv.end());
746  }
747  auto query_infos = getSyntheticInputTableInfo(cvs, executor);
748  setupSyntheticCaching(cvs, executor);
750  std::shared_ptr<HashJoin> hash_table;
751  std::string error_msg;
752  for (auto& qual : qual_bin_opers) {
753  try {
754  auto candidate_hash_table = HashJoin::getInstance(qual,
755  query_infos,
756  memory_level,
758  preferred_hash_type,
759  device_count,
760  column_cache,
761  executor,
762  {},
763  query_hint,
764  {});
765  if (candidate_hash_table) {
766  hash_table = candidate_hash_table;
767  }
768  } catch (HashJoinFail& e) {
769  error_msg = e.what();
770  continue;
771  }
772  }
773  return std::make_pair(error_msg, hash_table);
774 }
std::vector< InputTableInfo > getSyntheticInputTableInfo(std::set< const Analyzer::ColumnVar * > cvs, Executor *executor)
Definition: HashJoin.cpp:645
T visit(const Analyzer::Expr *expr) const
void setupSyntheticCaching(std::set< const Analyzer::ColumnVar * > cvs, Executor *executor)
Definition: HashJoin.cpp:631
static RegisteredQueryHint defaults()
Definition: QueryHint.h:329
static std::shared_ptr< HashJoin > getInstance(const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)
Make hash table from an in-flight SQL query&#39;s parse tree etc.
Definition: HashJoin.cpp:283

+ Here is the call graph for this function:

virtual bool HashJoin::isBitwiseEq ( ) const
pure virtual
std::pair< InnerOuter, InnerOuterStringOpInfos > HashJoin::normalizeColumnPair ( const Analyzer::Expr lhs,
const Analyzer::Expr rhs,
const Catalog_Namespace::Catalog cat,
const TemporaryTables temporary_tables,
const bool  is_overlaps_join = false 
)
static

Definition at line 802 of file HashJoin.cpp.

References cat(), CHECK, CHECK_GT, get_column_descriptor_maybe(), get_column_type(), SQLTypeInfo::get_precision(), SQLTypeInfo::get_scale(), SQLTypeInfo::get_type(), Analyzer::Expr::get_type_info(), SQLTypeInfo::get_type_name(), is_constructed_point(), SQLTypeInfo::is_decimal(), SQLTypeInfo::is_integer(), SQLTypeInfo::is_string(), SQLTypeInfo::is_time(), kCAST, kENCODING_DICT, kPOINT, LHS, RHS, gpu_enabled::swap(), UNKNOWN, and ScalarExprVisitor< T >::visit().

Referenced by anonymous_namespace{PerfectJoinHashTable.cpp}::get_cols(), anonymous_namespace{HashJoin.cpp}::get_cols(), normalizeColumnPairs(), and QueryPlanDagExtractor::normalizeColumnsPair().

807  {
808  SQLTypeInfo lhs_ti = lhs->get_type_info();
809  SQLTypeInfo rhs_ti = rhs->get_type_info();
810  if (!is_overlaps_join) {
811  if (lhs_ti.get_type() != rhs_ti.get_type()) {
812  throw HashJoinFail("Equijoin types must be identical, found: " +
813  lhs_ti.get_type_name() + ", " + rhs_ti.get_type_name());
814  }
815  if (!lhs_ti.is_integer() && !lhs_ti.is_time() && !lhs_ti.is_string() &&
816  !lhs_ti.is_decimal()) {
817  throw HashJoinFail("Cannot apply hash join to inner column type " +
818  lhs_ti.get_type_name());
819  }
820  // Decimal types should be identical.
821  if (lhs_ti.is_decimal() && (lhs_ti.get_scale() != rhs_ti.get_scale() ||
822  lhs_ti.get_precision() != rhs_ti.get_precision())) {
823  throw HashJoinFail("Equijoin with different decimal types");
824  }
825  }
826 
827  const auto lhs_cast = dynamic_cast<const Analyzer::UOper*>(lhs);
828  const auto rhs_cast = dynamic_cast<const Analyzer::UOper*>(rhs);
829  if (lhs_ti.is_string() && (static_cast<bool>(lhs_cast) != static_cast<bool>(rhs_cast) ||
830  (lhs_cast && lhs_cast->get_optype() != kCAST) ||
831  (rhs_cast && rhs_cast->get_optype() != kCAST))) {
832  throw HashJoinFail(
833  "Cannot use hash join for given expression (non-cast unary operator)");
834  }
835  // Casts to decimal are not suported.
836  if (lhs_ti.is_decimal() && (lhs_cast || rhs_cast)) {
837  throw HashJoinFail("Cannot use hash join for given expression (cast to decimal)");
838  }
839  auto lhs_col = getHashJoinColumn<Analyzer::ColumnVar>(lhs);
840  auto rhs_col = getHashJoinColumn<Analyzer::ColumnVar>(rhs);
841 
842  const auto lhs_string_oper = getHashJoinColumn<Analyzer::StringOper>(lhs);
843  const auto rhs_string_oper = getHashJoinColumn<Analyzer::StringOper>(rhs);
844 
845  auto process_string_op_infos = [](const auto& string_oper, auto& col, auto& ti) {
846  std::vector<StringOps_Namespace::StringOpInfo> string_op_infos;
847  if (string_oper) {
848  col = dynamic_cast<const Analyzer::ColumnVar*>(string_oper->getArg(0));
849  if (!col) {
850  // Todo (todd): Allow for non-colvar inputs into string operators for
851  // join predicates
852  // We now guard against non constant/colvar/stringoper inputs
853  // in Analyzer::StringOper::check_operand_types, but keeping this to not
854  // depend on that logic if and when it changes as allowing non-colvar inputs
855  // for hash joins will be additional work on top of allowing them
856  // outside of join predicates
857  throw HashJoinFail(
858  "Hash joins involving string operators currently restricted to column inputs "
859  "(i.e. not case statements).");
860  }
861  ti = col->get_type_info();
862  CHECK(ti.is_dict_encoded_string());
863  const auto chained_string_op_exprs = string_oper->getChainedStringOpExprs();
864  CHECK_GT(chained_string_op_exprs.size(), 0UL);
865  for (const auto& chained_string_op_expr : chained_string_op_exprs) {
866  auto chained_string_op =
867  dynamic_cast<const Analyzer::StringOper*>(chained_string_op_expr.get());
868  CHECK(chained_string_op);
869  StringOps_Namespace::StringOpInfo string_op_info(
870  chained_string_op->get_kind(),
871  chained_string_op->get_type_info(),
872  chained_string_op->getLiteralArgs());
873  string_op_infos.emplace_back(string_op_info);
874  }
875  }
876  return string_op_infos;
877  };
878 
879  auto outer_string_op_infos = process_string_op_infos(lhs_string_oper, lhs_col, lhs_ti);
880  auto inner_string_op_infos = process_string_op_infos(rhs_string_oper, rhs_col, rhs_ti);
881 
882  if (!lhs_col && !rhs_col) {
883  throw HashJoinFail(
884  "Cannot use hash join for given expression (both lhs and rhs are invalid)",
886  }
887 
888  const Analyzer::ColumnVar* inner_col{nullptr};
889  const Analyzer::ColumnVar* outer_col{nullptr};
890  auto outer_ti = lhs_ti;
891  auto inner_ti = rhs_ti;
892  const Analyzer::Expr* outer_expr{lhs};
893  InnerQualDecision inner_qual_decision = InnerQualDecision::UNKNOWN;
894  if (!lhs_col || (rhs_col && lhs_col->get_rte_idx() < rhs_col->get_rte_idx())) {
895  inner_qual_decision = InnerQualDecision::RHS;
896  inner_col = rhs_col;
897  outer_col = lhs_col;
898  } else {
899  inner_qual_decision = InnerQualDecision::LHS;
900  if (lhs_col && lhs_col->get_rte_idx() == 0) {
901  throw HashJoinFail(
902  "Cannot use hash join for given expression (lhs' rte idx is zero)",
903  inner_qual_decision);
904  }
905  inner_col = lhs_col;
906  outer_col = rhs_col;
907  std::swap(outer_ti, inner_ti);
908  std::swap(outer_string_op_infos, inner_string_op_infos);
909  outer_expr = rhs;
910  }
911  if (!inner_col) {
912  throw HashJoinFail("Cannot use hash join for given expression (invalid inner col)",
913  inner_qual_decision);
914  }
915  if (!outer_col) {
916  // check whether outer_col is a constant, i.e., inner_col = K;
917  const auto outer_constant_col = dynamic_cast<const Analyzer::Constant*>(outer_expr);
918  if (outer_constant_col) {
919  throw HashJoinFail(
920  "Cannot use hash join for given expression: try to join with a constant "
921  "value",
922  inner_qual_decision);
923  }
924  MaxRangeTableIndexVisitor rte_idx_visitor;
925  int outer_rte_idx = rte_idx_visitor.visit(outer_expr);
926  // The inner column candidate is not actually inner; the outer
927  // expression contains columns which are at least as deep.
928  if (inner_col->get_rte_idx() <= outer_rte_idx) {
929  throw HashJoinFail(
930  "Cannot use hash join for given expression (inner's rte <= outer's rte)",
931  inner_qual_decision);
932  }
933  }
934  // We need to fetch the actual type information from the catalog since Analyzer
935  // always reports nullable as true for inner table columns in left joins.
936  const auto inner_col_cd = get_column_descriptor_maybe(
937  inner_col->get_column_id(), inner_col->get_table_id(), cat);
938  const auto inner_col_real_ti = get_column_type(inner_col->get_column_id(),
939  inner_col->get_table_id(),
940  inner_col_cd,
941  temporary_tables);
942  const auto& outer_col_ti =
943  !(dynamic_cast<const Analyzer::FunctionOper*>(lhs)) && outer_col
944  ? outer_col->get_type_info()
945  : outer_ti;
946  // Casts from decimal are not supported.
947  if ((inner_col_real_ti.is_decimal() || outer_col_ti.is_decimal()) &&
948  (lhs_cast || rhs_cast)) {
949  throw HashJoinFail("Cannot use hash join for given expression (cast from decimal)");
950  }
951  if (is_overlaps_join) {
952  if (!inner_col_real_ti.is_array()) {
953  throw HashJoinFail(
954  "Overlaps join only supported for inner columns with array type");
955  }
956  auto is_bounds_array = [](const auto ti) {
957  return ti.is_fixlen_array() && ti.get_size() == 32;
958  };
959  if (!is_bounds_array(inner_col_real_ti)) {
960  throw HashJoinFail(
961  "Overlaps join only supported for 4-element double fixed length arrays");
962  }
963  if (!(outer_col_ti.get_type() == kPOINT || is_bounds_array(outer_col_ti) ||
964  is_constructed_point(outer_expr))) {
965  throw HashJoinFail(
966  "Overlaps join only supported for geometry outer columns of type point, "
967  "geometry columns with bounds or constructed points");
968  }
969  } else {
970  if (!(inner_col_real_ti.is_integer() || inner_col_real_ti.is_time() ||
971  inner_col_real_ti.is_decimal() ||
972  (inner_col_real_ti.is_string() &&
973  inner_col_real_ti.get_compression() == kENCODING_DICT))) {
974  throw HashJoinFail(
975  "Can only apply hash join to integer-like types and dictionary encoded "
976  "strings");
977  }
978  }
979 
980  auto normalized_inner_col = inner_col;
981  auto normalized_outer_col = outer_col ? outer_col : outer_expr;
982 
983  const auto& normalized_inner_ti = normalized_inner_col->get_type_info();
984  const auto& normalized_outer_ti = normalized_outer_col->get_type_info();
985 
986  if (normalized_inner_ti.is_string() != normalized_outer_ti.is_string()) {
987  throw HashJoinFail(std::string("Could not build hash tables for incompatible types " +
988  normalized_inner_ti.get_type_name() + " and " +
989  normalized_outer_ti.get_type_name()));
990  }
991  return std::make_pair(std::make_pair(normalized_inner_col, normalized_outer_col),
992  std::make_pair(inner_string_op_infos, outer_string_op_infos));
993 }
std::string cat(Ts &&...args)
HOST DEVICE int get_scale() const
Definition: sqltypes.h:385
bool is_constructed_point(const Analyzer::Expr *expr)
Definition: Execute.h:1516
const SQLTypeInfo get_column_type(const int col_id, const int table_id, const ColumnDescriptor *cd, const TemporaryTables *temporary_tables)
Definition: Execute.h:236
Definition: sqldefs.h:48
T visit(const Analyzer::Expr *expr) const
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:380
#define CHECK_GT(x, y)
Definition: Logger.h:301
bool is_time() const
Definition: sqltypes.h:582
const ColumnDescriptor * get_column_descriptor_maybe(const int col_id, const int table_id, const Catalog_Namespace::Catalog &cat)
Definition: Execute.h:220
bool is_integer() const
Definition: sqltypes.h:578
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:83
int get_precision() const
Definition: sqltypes.h:383
Expression class for string functions The &quot;arg&quot; constructor parameter must be an expression that reso...
Definition: Analyzer.h:1475
std::string get_type_name() const
Definition: sqltypes.h:504
#define CHECK(condition)
Definition: Logger.h:289
InnerQualDecision
Definition: HashJoin.h:62
bool is_string() const
Definition: sqltypes.h:576
bool is_decimal() const
Definition: sqltypes.h:579
DEVICE void swap(ARGS &&...args)
Definition: gpu_enabled.h:114

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::pair< std::vector< InnerOuter >, std::vector< InnerOuterStringOpInfos > > HashJoin::normalizeColumnPairs ( const Analyzer::BinOper condition,
const Catalog_Namespace::Catalog cat,
const TemporaryTables temporary_tables 
)
static

Definition at line 996 of file HashJoin.cpp.

References cat(), CHECK, CHECK_EQ, Analyzer::BinOper::get_left_operand(), Analyzer::BinOper::get_right_operand(), Analyzer::BinOper::is_overlaps_oper(), normalizeColumnPair(), and run_benchmark_import::result.

Referenced by anonymous_namespace{FromTableReordering.cpp}::get_join_qual_cost(), BaselineJoinHashTable::getInstance(), OverlapsJoinHashTable::getInstance(), BaselineJoinHashTable::reifyWithLayout(), and Executor::skipFragmentPair().

998  {
999  std::pair<std::vector<InnerOuter>, std::vector<InnerOuterStringOpInfos>> result;
1000  const auto lhs_tuple_expr =
1001  dynamic_cast<const Analyzer::ExpressionTuple*>(condition->get_left_operand());
1002  const auto rhs_tuple_expr =
1003  dynamic_cast<const Analyzer::ExpressionTuple*>(condition->get_right_operand());
1004 
1005  CHECK_EQ(static_cast<bool>(lhs_tuple_expr), static_cast<bool>(rhs_tuple_expr));
1006  if (lhs_tuple_expr) {
1007  const auto& lhs_tuple = lhs_tuple_expr->getTuple();
1008  const auto& rhs_tuple = rhs_tuple_expr->getTuple();
1009  CHECK_EQ(lhs_tuple.size(), rhs_tuple.size());
1010  for (size_t i = 0; i < lhs_tuple.size(); ++i) {
1011  const auto col_pair = normalizeColumnPair(lhs_tuple[i].get(),
1012  rhs_tuple[i].get(),
1013  cat,
1014  temporary_tables,
1015  condition->is_overlaps_oper());
1016  result.first.emplace_back(col_pair.first);
1017  result.second.emplace_back(col_pair.second);
1018  }
1019  } else {
1020  CHECK(!lhs_tuple_expr && !rhs_tuple_expr);
1021  const auto col_pair = normalizeColumnPair(condition->get_left_operand(),
1022  condition->get_right_operand(),
1023  cat,
1024  temporary_tables,
1025  condition->is_overlaps_oper());
1026  result.first.emplace_back(col_pair.first);
1027  result.second.emplace_back(col_pair.second);
1028  }
1029 
1030  return result;
1031 }
#define CHECK_EQ(x, y)
Definition: Logger.h:297
std::string cat(Ts &&...args)
const Expr * get_right_operand() const
Definition: Analyzer.h:452
static std::pair< InnerOuter, InnerOuterStringOpInfos > normalizeColumnPair(const Analyzer::Expr *lhs, const Analyzer::Expr *rhs, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables, const bool is_overlaps_join=false)
Definition: HashJoin.cpp:802
#define CHECK(condition)
Definition: Logger.h:289
const Expr * get_left_operand() const
Definition: Analyzer.h:451
bool is_overlaps_oper() const
Definition: Analyzer.h:449

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

virtual size_t HashJoin::offsetBufferOff ( ) const
pure virtualnoexcept
virtual size_t HashJoin::payloadBufferOff ( ) const
pure virtualnoexcept
virtual DecodedJoinHashBufferSet HashJoin::toSet ( const ExecutorDeviceType  device_type,
const int  device_id 
) const
pure virtual
virtual std::string HashJoin::toString ( const ExecutorDeviceType  device_type,
const int  device_id = 0,
bool  raw = false 
) const
pure virtual
std::string HashJoin::toStringFlat32 ( const ExecutorDeviceType  device_type,
const int  device_id 
) const
virtual

Definition at line 121 of file HashJoin.cpp.

122  {
123  return toStringFlat<int32_t>(this, device_type, device_id);
124 }
std::string HashJoin::toStringFlat64 ( const ExecutorDeviceType  device_type,
const int  device_id 
) const
virtual

Definition at line 116 of file HashJoin.cpp.

117  {
118  return toStringFlat<int64_t>(this, device_type, device_id);
119 }
std::vector< const StringDictionaryProxy::IdMap * > HashJoin::translateCompositeStrDictProxies ( const CompositeKeyInfo composite_key_info,
const std::vector< InnerOuterStringOpInfos > &  string_op_infos_for_keys,
const Executor executor 
)
static

Definition at line 508 of file HashJoin.cpp.

References CHECK, CHECK_EQ, CHECK_NE, CompositeKeyInfo::sd_inner_proxy_per_key, and CompositeKeyInfo::sd_outer_proxy_per_key.

Referenced by BaselineJoinHashTable::reifyWithLayout().

511  {
512  const auto& inner_proxies = composite_key_info.sd_inner_proxy_per_key;
513  const auto& outer_proxies = composite_key_info.sd_outer_proxy_per_key;
514  const size_t num_proxies = inner_proxies.size();
515  CHECK_EQ(num_proxies, outer_proxies.size());
516  std::vector<const StringDictionaryProxy::IdMap*> proxy_translation_maps;
517  proxy_translation_maps.reserve(num_proxies);
518  for (size_t proxy_pair_idx = 0; proxy_pair_idx < num_proxies; ++proxy_pair_idx) {
519  const bool translate_proxies =
520  inner_proxies[proxy_pair_idx] && outer_proxies[proxy_pair_idx];
521  if (translate_proxies) {
522  const auto inner_proxy =
523  reinterpret_cast<const StringDictionaryProxy*>(inner_proxies[proxy_pair_idx]);
524  auto outer_proxy =
525  reinterpret_cast<StringDictionaryProxy*>(outer_proxies[proxy_pair_idx]);
526  CHECK(inner_proxy);
527  CHECK(outer_proxy);
528 
529  CHECK_NE(inner_proxy->getDictId(), outer_proxy->getDictId());
530  proxy_translation_maps.emplace_back(
531  executor->getJoinIntersectionStringProxyTranslationMap(
532  inner_proxy,
533  outer_proxy,
534  string_op_infos_for_keys[proxy_pair_idx].first,
535  string_op_infos_for_keys[proxy_pair_idx].second,
536  executor->getRowSetMemoryOwner()));
537  } else {
538  proxy_translation_maps.emplace_back(nullptr);
539  }
540  }
541  return proxy_translation_maps;
542 }
#define CHECK_EQ(x, y)
Definition: Logger.h:297
std::vector< const void * > sd_inner_proxy_per_key
Definition: HashJoin.h:127
std::vector< void * > sd_outer_proxy_per_key
Definition: HashJoin.h:128
#define CHECK_NE(x, y)
Definition: Logger.h:298
#define CHECK(condition)
Definition: Logger.h:289

+ Here is the caller graph for this function:

const StringDictionaryProxy::IdMap * HashJoin::translateInnerToOuterStrDictProxies ( const InnerOuter cols,
const InnerOuterStringOpInfos inner_outer_string_op_infos,
ExpressionRange old_col_range,
const Executor executor 
)
static

Definition at line 413 of file HashJoin.cpp.

References CHECK, ExpressionRange::getIntMax(), ExpressionRange::getIntMin(), getStrDictProxies(), ExpressionRange::hasNulls(), ExpressionRange::makeIntRange(), and StringDictionaryProxy::TranslationMap< T >::size().

Referenced by PerfectJoinHashTable::reify().

417  {
418  const bool has_string_ops = inner_outer_string_op_infos.first.size() ||
419  inner_outer_string_op_infos.second.size();
420  const auto inner_outer_proxies =
421  HashJoin::getStrDictProxies(cols, executor, has_string_ops);
422  const bool translate_dictionary =
423  inner_outer_proxies.first && inner_outer_proxies.second;
424  if (translate_dictionary) {
425  const auto inner_dict_id = inner_outer_proxies.first->getDictId();
426  const auto outer_dict_id = inner_outer_proxies.second->getDictId();
427  CHECK(has_string_ops || inner_dict_id != outer_dict_id);
428  const auto id_map = executor->getJoinIntersectionStringProxyTranslationMap(
429  inner_outer_proxies.first,
430  inner_outer_proxies.second,
431  inner_outer_string_op_infos.first,
432  inner_outer_string_op_infos.second,
433  executor->getRowSetMemoryOwner());
434  if (!inner_outer_string_op_infos.second.empty()) {
435  // String op was applied to lhs table,
436  // need to expand column range appropriately
437  col_range = ExpressionRange::makeIntRange(
438  std::min(col_range.getIntMin(),
439  static_cast<int64_t>(
440  inner_outer_proxies.second->transientEntryCount() + 1) *
441  -1),
442  col_range.getIntMax(),
443  0,
444  col_range.hasNulls());
445  }
446  return id_map;
447  }
448  return nullptr;
449 }
static std::pair< const StringDictionaryProxy *, StringDictionaryProxy * > getStrDictProxies(const InnerOuter &cols, const Executor *executor, const bool has_string_ops)
Definition: HashJoin.cpp:385
static ExpressionRange makeIntRange(const int64_t int_min, const int64_t int_max, const int64_t bucket, const bool has_nulls)
#define CHECK(condition)
Definition: Logger.h:289

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Member Data Documentation


The documentation for this class was generated from the following files: