19 #include <llvm/IR/Value.h> 35 :
std::runtime_error(
"Hash tables with more than 2B entries not supported yet") {}
43 :
std::runtime_error(
"Hash join failed: Table '" + table_name +
44 "' must be replicated.") {}
60 :
HashJoinFail(
"Not enough memory for columns involved in join") {}
82 using InnerOuter = std::pair<const Analyzer::ColumnVar*, const Analyzer::Expr*>;
95 const int device_id = 0,
96 bool raw =
false)
const = 0;
99 const int device_id)
const;
102 const int device_id)
const;
105 const int device_id)
const = 0;
112 virtual int getInnerTableId()
const noexcept = 0;
114 virtual int getInnerTableRteIdx()
const noexcept = 0;
116 virtual HashType getHashType()
const noexcept = 0;
123 const char* HashTypeStrings[3] = {
"OneToOne",
"OneToMany",
"ManyToMany"};
124 return HashTypeStrings[
static_cast<int>(ht)];
128 const std::vector<llvm::Value*>& hash_join_idx_args_in,
129 const bool is_sharded,
130 const bool col_is_nullable,
132 const int64_t sub_buff_size,
134 const bool is_bucketized =
false);
136 static llvm::Value* codegenHashTableLoad(
const size_t table_idx, Executor* executor);
140 virtual int getDeviceCount()
const noexcept = 0;
142 virtual size_t offsetBufferOff()
const noexcept = 0;
144 virtual size_t countBufferOff()
const noexcept = 0;
146 virtual size_t payloadBufferOff()
const noexcept = 0;
148 virtual std::string getHashJoinType()
const = 0;
152 const std::vector<Fragmenter_Namespace::FragmentInfo>& fragment_info,
155 std::vector<std::shared_ptr<Chunk_NS::Chunk>>& chunks_owner,
157 std::vector<std::shared_ptr<void>>& malloc_owner,
162 static std::shared_ptr<HashJoin> getInstance(
163 const std::shared_ptr<Analyzer::BinOper> qual_bin_oper,
164 const std::vector<InputTableInfo>& query_infos,
167 const int device_count,
173 static std::shared_ptr<HashJoin> getSyntheticInstance(
174 std::string_view table1,
175 std::string_view column1,
176 std::string_view table2,
177 std::string_view column2,
180 const int device_count,
185 static std::shared_ptr<HashJoin> getSyntheticInstance(
186 const std::shared_ptr<Analyzer::BinOper> qual_bin_oper,
189 const int device_count,
194 CHECK(!inner_outer_pairs.empty());
195 const auto first_inner_col = inner_outer_pairs.front().first;
196 return first_inner_col->get_table_id();
199 static void checkHashJoinReplicationConstraint(
const int table_id,
200 const size_t shard_count,
201 const Executor* executor);
204 CHECK_LT(device_id, hash_tables_for_device_.size());
205 return hash_tables_for_device_[device_id].get();
210 return getJoinHashBufferSize(device_type, 0);
214 const int device_id)
const {
215 auto hash_table = getHashTableForDevice(device_id);
219 return hash_table->getHashTableBufferSize(device_type);
223 const int device_id)
const {
225 CHECK_LT(
size_t(device_id), hash_tables_for_device_.size());
226 if (!hash_tables_for_device_[device_id]) {
229 CHECK(hash_tables_for_device_[device_id]);
230 auto hash_table = hash_tables_for_device_[device_id].get();
233 return reinterpret_cast<int64_t
>(hash_table->getCpuBuffer());
236 const auto gpu_buff = hash_table->getGpuBuffer();
241 return reinterpret_cast<int64_t
>(hash_table->getCpuBuffer());
246 auto empty_hash_tables =
247 decltype(hash_tables_for_device_)(hash_tables_for_device_.size());
248 hash_tables_for_device_.swap(empty_hash_tables);
252 const std::vector<InnerOuter>& inner_outer_pairs,
253 const Executor* executor);
256 virtual size_t getComponentBufferSize()
const noexcept = 0;
266 std::string_view column,
273 std::pair<const Analyzer::ColumnVar*, const Analyzer::Expr*> equi_pair,
274 const Executor* executor);
281 const bool is_overlaps_join =
false);
Defines data structures for the semantic analysis phase of query processing.
size_t getJoinHashBufferSize(const ExecutorDeviceType device_type, const int device_id) const
std::string toString(const ExtArgumentType &sig_type)
size_t get_shard_count(const Analyzer::BinOper *join_condition, const Executor *executor)
class for a per-database catalog. also includes metadata for the current database and the current use...
FailedToJoinOnVirtualColumn()
std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > InnerOuter
std::vector< ChunkKey > cache_key_chunks
std::vector< const void * > sd_inner_proxy_per_key
std::unordered_map< int, std::unordered_map< int, std::shared_ptr< const ColumnarResults > >> ColumnCacheMap
unsigned long long CUdeviceptr
std::vector< std::shared_ptr< HashTable > > hash_tables_for_device_
TableMustBeReplicated(const std::string &table_name)
void freeHashBufferMemory()
InnerOuter normalize_column_pair(const Analyzer::Expr *lhs, const Analyzer::Expr *rhs, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables, const bool is_overlaps_join=false)
std::shared_ptr< Analyzer::ColumnVar > getSyntheticColumnVar(std::string_view table, std::string_view column, int rte_idx, Executor *executor)
const std::vector< JoinColumnTypeInfo > join_column_types
std::ostream & operator<<(std::ostream &os, const DecodedJoinHashBufferEntry &e)
HashJoinFail(const std::string &reason)
std::vector< const void * > sd_outer_proxy_per_key
int64_t getJoinHashBuffer(const ExecutorDeviceType device_type, const int device_id) const
HashTable * getHashTableForDevice(const size_t device_id) const
const std::vector< std::shared_ptr< Chunk_NS::Chunk > > chunks_owner
TooManyHashEntries(const std::string &reason)
static std::string getHashTypeString(HashType ht) noexcept
size_t getJoinHashBufferSize(const ExecutorDeviceType device_type)
std::set< DecodedJoinHashBufferEntry > DecodedJoinHashBufferSet
static int getInnerTableId(const std::vector< InnerOuter > &inner_outer_pairs)
std::vector< InnerOuter > normalize_column_pairs(const Analyzer::BinOper *condition, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables)
const std::vector< std::shared_ptr< void > > malloc_owner
std::vector< JoinBucketInfo > join_buckets
const std::vector< JoinColumn > join_columns
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept