19 #include <llvm/IR/Value.h>
41 : std::runtime_error(
"The size of hash table is larger than a threshold (" +
42 ::
toString(cur_hash_table_size) +
" > " +
49 : std::runtime_error(
"Hash tables with more than 4B entries not supported yet") {}
57 : std::runtime_error(
"Hash join failed: Table '" + table_name +
58 "' must be replicated.") {}
65 constexpr
char const* strings[]{
"IGNORE",
"UNKNOWN",
"LHS",
"RHS"};
66 return os << strings[static_cast<int>(decision)];
88 :
HashJoinFail(
"Not enough memory for columns involved in join") {}
100 "Could not create hash table for bounding box intersection with less than "
101 "max allowed size of " +
102 std::
to_string(bbox_intersect_hash_table_max_bytes) +
" bytes") {}
105 using InnerOuter = std::pair<const Analyzer::ColumnVar*, const Analyzer::Expr*>;
107 std::vector<StringOps_Namespace::StringOpInfo>>;
116 void setBucketInfo(
const std::vector<double>& bucket_sizes_for_dimension,
117 const std::vector<InnerOuter> inner_outer_pairs);
138 const int device_id = 0,
139 bool raw =
false)
const = 0;
142 const int device_id)
const;
145 const int device_id)
const;
148 const int device_id)
const = 0;
162 const Executor* executor,
163 size_t rowid_size) noexcept;
168 std::ostringstream oss;
169 oss <<
"Hash tables with more than " << threshold
170 <<
" entries (# hash entries: " << num_entries <<
") on "
171 <<
::toString(memory_level) <<
" not supported yet";
180 const char* HashTypeStrings[3] = {
"OneToOne",
"OneToMany",
"ManyToMany"};
181 return HashTypeStrings[
static_cast<int>(ht)];
185 const std::vector<llvm::Value*>& hash_join_idx_args_in,
186 const bool is_sharded,
187 const bool col_is_nullable,
189 const int64_t sub_buff_size,
191 const bool is_bucketized =
false);
210 const Analyzer::ColumnVar* hash_col,
211 const std::vector<Fragmenter_Namespace::FragmentInfo>& fragment_info,
212 const Data_Namespace::
MemoryLevel effective_memory_level,
214 std::vector<std::shared_ptr<Chunk_NS::
Chunk>>& chunks_owner,
216 std::vector<std::shared_ptr<
void>>& malloc_owner,
222 const std::shared_ptr<Analyzer::BinOper> qual_bin_oper,
227 const
int device_count,
236 std::string_view table1,
237 std::string_view column1,
238 const Catalog_Namespace::Catalog& catalog1,
239 std::string_view table2,
240 std::string_view column2,
241 const Catalog_Namespace::Catalog& catalog2,
244 const
int device_count,
250 const std::shared_ptr<Analyzer::BinOper> qual_bin_oper,
253 const
int device_count,
258 std::vector<std::shared_ptr<Analyzer::BinOper>>,
261 const
int device_count,
266 const std::vector<
InnerOuter>& inner_outer_pairs) {
267 CHECK(!inner_outer_pairs.empty());
268 const auto first_inner_col = inner_outer_pairs.front().first;
269 return first_inner_col->getTableKey();
273 bool invalid_cache_key,
277 const size_t shard_count,
278 const Executor* executor);
285 const bool is_bbox_intersect =
false);
287 template <
typename T>
291 static std::pair<std::vector<InnerOuter>, std::vector<InnerOuterStringOpInfos>>
306 const int device_id)
const {
311 return hash_table->getHashTableBufferSize(device_type);
315 const int device_id)
const {
325 return hash_table->getCpuBuffer();
328 const auto gpu_buff = hash_table->getGpuBuffer();
333 return hash_table->getCpuBuffer();
338 auto empty_hash_tables =
344 const std::vector<Fragmenter_Namespace::FragmentInfo>& fragments);
347 const std::vector<InnerOuter>& inner_outer_pairs,
348 const Executor* executor,
349 const std::vector<InnerOuterStringOpInfos>& inner_outer_string_op_infos_pairs = {});
351 static std::vector<const StringDictionaryProxy::IdMap*>
354 const std::vector<InnerOuterStringOpInfos>& string_op_infos_for_keys,
355 const Executor* executor);
357 static std::pair<const StringDictionaryProxy*, StringDictionaryProxy*>
359 const Executor* executor,
360 const bool has_string_ops);
366 const Executor* executor);
371 const std::vector<StringOps_Namespace::StringOpInfo>& string_op_infos,
384 std::ostream& operator<<(std::ostream& os,
386 std::ostream& operator<<(
396 std::string_view table,
397 std::string_view column,
399 const Catalog_Namespace::Catalog& catalog);
401 size_t get_shard_count(const Analyzer::BinOper* join_condition, const Executor* executor);
404 std::pair<const Analyzer::ColumnVar*, const Analyzer::Expr*> equi_pair,
405 const Executor* executor);
static std::vector< int > collectFragmentIds(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments)
static std::shared_ptr< HashJoin > getSyntheticInstance(std::string_view table1, std::string_view column1, const Catalog_Namespace::Catalog &catalog1, std::string_view table2, std::string_view column2, const Catalog_Namespace::Catalog &catalog2, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
Make hash table from named tables and columns (such as for testing).
Defines data structures for the semantic analysis phase of query processing.
virtual int getInnerTableRteIdx() const noexcept=0
virtual size_t payloadBufferOff() const noexcept=0
virtual std::string getHashJoinType() const =0
virtual HashJoinMatchingSet codegenMatchingSet(const CompilationOptions &, const size_t)=0
static llvm::Value * codegenHashTableLoad(const size_t table_idx, Executor *executor)
FailedToJoinOnVirtualColumn()
std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > InnerOuter
static bool canAccessHashTable(bool allow_hash_table_recycling, bool invalid_cache_key, JoinType join_type)
virtual HashType getHashType() const noexcept=0
std::vector< ChunkKey > cache_key_chunks
std::vector< const void * > sd_inner_proxy_per_key
virtual int getDeviceCount() const noexcept=0
virtual std::string toStringFlat64(const ExecutorDeviceType device_type, const int device_id) const
static void checkHashJoinReplicationConstraint(const shared::TableKey &table_key, const size_t shard_count, const Executor *executor)
std::ostream & operator<<(std::ostream &os, const SessionInfo &session_info)
void setBucketInfo(const std::vector< double > &bucket_sizes_for_dimension, const std::vector< InnerOuter > inner_outer_pairs)
JoinColumn fetchJoinColumn(const Analyzer::ColumnVar *hash_col, const std::vector< Fragmenter_Namespace::FragmentInfo > &fragment_info, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks_owner, DeviceAllocator *dev_buff_owner, std::vector< std::shared_ptr< void >> &malloc_owner, Executor *executor, ColumnCacheMap *column_cache)
virtual Data_Namespace::MemoryLevel getMemoryLevel() const noexcept=0
std::vector< std::shared_ptr< HashTable > > hash_tables_for_device_
static std::pair< const StringDictionaryProxy *, StringDictionaryProxy * > getStrDictProxies(const InnerOuter &cols, const Executor *executor, const bool has_string_ops)
virtual llvm::Value * codegenSlot(const CompilationOptions &, const size_t)=0
TableMustBeReplicated(const std::string &table_name)
std::string toString(const QueryDescriptionType &type)
static llvm::Value * codegenColOrStringOper(const Analyzer::Expr *col_or_string_oper, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos, CodeGenerator &code_generator, const CompilationOptions &co)
void freeHashBufferMemory()
virtual size_t offsetBufferOff() const noexcept=0
virtual std::string toStringFlat32(const ExecutorDeviceType device_type, const int device_id) const
virtual size_t countBufferOff() const noexcept=0
const std::vector< JoinColumnTypeInfo > join_column_types
std::unordered_map< size_t, HashTableBuildDag > HashTableBuildDagMap
std::vector< void * > sd_outer_proxy_per_key
HashJoinFail(const std::string &err_msg, InnerQualDecision qual_decision)
static size_t getMaximumNumHashEntriesCanHold(MemoryLevel memory_level, const Executor *executor, size_t rowid_size) noexcept
static std::string generateTooManyHashEntriesErrMsg(size_t num_entries, size_t threshold, MemoryLevel memory_level)
static constexpr size_t MAX_NUM_HASH_ENTRIES
int8_t * getJoinHashBuffer(const ExecutorDeviceType device_type, const int device_id) const
static std::vector< const StringDictionaryProxy::IdMap * > translateCompositeStrDictProxies(const CompositeKeyInfo &composite_key_info, const std::vector< InnerOuterStringOpInfos > &string_op_infos_for_keys, const Executor *executor)
JoinHashTableTooBig(size_t cur_hash_table_size, size_t threshold_size)
size_t getJoinHashBufferSize(const ExecutorDeviceType device_type, const int device_id) const
virtual size_t getComponentBufferSize() const noexcept=0
const std::vector< std::shared_ptr< Chunk_NS::Chunk > > chunks_owner
static const StringDictionaryProxy::IdMap * translateInnerToOuterStrDictProxies(const InnerOuter &cols, const InnerOuterStringOpInfos &inner_outer_string_op_infos, ExpressionRange &old_col_range, const Executor *executor)
HashTable * getHashTableForDevice(const size_t device_id) const
virtual shared::TableKey getInnerTableId() const noexcept=0
std::unordered_map< shared::TableKey, const RelAlgNode * > TableIdToNodeMap
TooManyHashEntries(const std::string &reason)
static std::string getHashTypeString(HashType ht) noexcept
size_t getJoinHashBufferSize(const ExecutorDeviceType device_type)
std::set< DecodedJoinHashBufferEntry > DecodedJoinHashBufferSet
std::unordered_map< shared::TableKey, std::unordered_map< int, std::shared_ptr< const ColumnarResults >>> ColumnCacheMap
HashJoinFail(const std::string &err_msg)
std::shared_ptr< Analyzer::ColumnVar > getSyntheticColumnVar(std::string_view table, std::string_view column, int rte_idx, const Catalog_Namespace::Catalog &catalog)
TooBigHashTableForBoundingBoxIntersect(const size_t bbox_intersect_hash_table_max_bytes)
std::pair< std::vector< StringOps_Namespace::StringOpInfo >, std::vector< StringOps_Namespace::StringOpInfo >> InnerOuterStringOpInfos
static std::pair< InnerOuter, InnerOuterStringOpInfos > normalizeColumnPair(const Analyzer::Expr *lhs, const Analyzer::Expr *rhs, const TemporaryTables *temporary_tables, const bool is_bbox_intersect=false)
static const T * getHashJoinColumn(const Analyzer::Expr *expr)
static std::pair< std::vector< InnerOuter >, std::vector< InnerOuterStringOpInfos > > normalizeColumnPairs(const Analyzer::BinOper *condition, const TemporaryTables *temporary_tables)
FileBuffer Chunk
A Chunk is the fundamental unit of execution in Map-D.
const std::vector< std::shared_ptr< void > > malloc_owner
virtual DecodedJoinHashBufferSet toSet(const ExecutorDeviceType device_type, const int device_id) const =0
std::vector< JoinBucketInfo > join_buckets
size_t get_shard_count(const Analyzer::BinOper *join_condition, const Executor *executor)
static std::shared_ptr< HashJoin > getInstance(const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)
Make hash table from an in-flight SQL query's parse tree etc.
virtual std::string toString(const ExecutorDeviceType device_type, const int device_id=0, bool raw=false) const =0
InnerQualDecision inner_qual_decision
const std::vector< JoinColumn > join_columns
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
virtual bool isBitwiseEq() const =0
static CompositeKeyInfo getCompositeKeyInfo(const std::vector< InnerOuter > &inner_outer_pairs, const Executor *executor, const std::vector< InnerOuterStringOpInfos > &inner_outer_string_op_infos_pairs={})