19 #include <llvm/IR/Value.h>
42 "The size of hash table is larger than a threshold defined in the query hint "
44 ::
toString(cur_hash_table_size) +
" > " + ::
toString(query_hint_size) +
")") {
51 : std::runtime_error(
"Hash tables with more than 2B entries not supported yet") {}
59 : std::runtime_error(
"Hash join failed: Table '" + table_name +
60 "' must be replicated.") {}
67 constexpr
char const* strings[]{
"IGNORE",
"UNKNOWN",
"LHS",
"RHS"};
68 return os << strings[static_cast<int>(decision)];
90 :
HashJoinFail(
"Not enough memory for columns involved in join") {}
102 "Could not create overlaps hash table with less than max allowed size of " +
103 std::
to_string(overlaps_hash_table_max_bytes) +
" bytes") {}
106 using InnerOuter = std::pair<const Analyzer::ColumnVar*, const Analyzer::Expr*>;
108 std::vector<StringOps_Namespace::StringOpInfo>>;
117 void setBucketInfo(
const std::vector<double>& bucket_sizes_for_dimension,
118 const std::vector<InnerOuter> inner_outer_pairs);
138 const int device_id = 0,
139 bool raw =
false)
const = 0;
142 const int device_id)
const;
145 const int device_id)
const;
148 const int device_id)
const = 0;
166 const char* HashTypeStrings[3] = {
"OneToOne",
"OneToMany",
"ManyToMany"};
167 return HashTypeStrings[
static_cast<int>(ht)];
171 const std::vector<llvm::Value*>& hash_join_idx_args_in,
172 const bool is_sharded,
173 const bool col_is_nullable,
175 const int64_t sub_buff_size,
177 const bool is_bucketized =
false);
196 const Analyzer::ColumnVar* hash_col,
197 const std::vector<Fragmenter_Namespace::FragmentInfo>& fragment_info,
198 const Data_Namespace::
MemoryLevel effective_memory_level,
200 std::vector<std::shared_ptr<Chunk_NS::
Chunk>>& chunks_owner,
202 std::vector<std::shared_ptr<
void>>& malloc_owner,
208 const std::shared_ptr<Analyzer::BinOper> qual_bin_oper,
213 const
int device_count,
222 std::string_view table1,
223 std::string_view column1,
224 const Catalog_Namespace::Catalog& catalog1,
225 std::string_view table2,
226 std::string_view column2,
227 const Catalog_Namespace::Catalog& catalog2,
230 const
int device_count,
236 const std::shared_ptr<Analyzer::BinOper> qual_bin_oper,
239 const
int device_count,
244 std::vector<std::shared_ptr<Analyzer::BinOper>>,
247 const
int device_count,
252 const std::vector<
InnerOuter>& inner_outer_pairs) {
253 CHECK(!inner_outer_pairs.empty());
254 const auto first_inner_col = inner_outer_pairs.front().first;
255 return first_inner_col->getTableKey();
259 bool invalid_cache_key,
263 const size_t shard_count,
264 const Executor* executor);
271 const bool is_overlaps_join =
false);
273 template <
typename T>
277 static std::pair<std::vector<InnerOuter>, std::vector<InnerOuterStringOpInfos>>
292 const int device_id)
const {
297 return hash_table->getHashTableBufferSize(device_type);
301 const int device_id)
const {
311 return hash_table->getCpuBuffer();
314 const auto gpu_buff = hash_table->getGpuBuffer();
319 return hash_table->getCpuBuffer();
324 auto empty_hash_tables =
330 const std::vector<Fragmenter_Namespace::FragmentInfo>& fragments);
333 const std::vector<InnerOuter>& inner_outer_pairs,
334 const Executor* executor,
335 const std::vector<InnerOuterStringOpInfos>& inner_outer_string_op_infos_pairs = {});
337 static std::vector<const StringDictionaryProxy::IdMap*>
340 const std::vector<InnerOuterStringOpInfos>& string_op_infos_for_keys,
341 const Executor* executor);
343 static std::pair<const StringDictionaryProxy*, StringDictionaryProxy*>
345 const Executor* executor,
346 const bool has_string_ops);
352 const Executor* executor);
357 const std::vector<StringOps_Namespace::StringOpInfo>& string_op_infos,
370 std::ostream& operator<<(std::ostream& os,
372 std::ostream& operator<<(
382 std::string_view table,
383 std::string_view column,
385 const Catalog_Namespace::Catalog& catalog);
387 size_t get_shard_count(const Analyzer::BinOper* join_condition, const Executor* executor);
390 std::pair<const Analyzer::ColumnVar*, const Analyzer::Expr*> equi_pair,
391 const Executor* executor);
static std::vector< int > collectFragmentIds(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments)
static std::shared_ptr< HashJoin > getSyntheticInstance(std::string_view table1, std::string_view column1, const Catalog_Namespace::Catalog &catalog1, std::string_view table2, std::string_view column2, const Catalog_Namespace::Catalog &catalog2, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
Make hash table from named tables and columns (such as for testing).
Defines data structures for the semantic analysis phase of query processing.
virtual int getInnerTableRteIdx() const noexcept=0
virtual size_t payloadBufferOff() const noexcept=0
virtual std::string getHashJoinType() const =0
virtual HashJoinMatchingSet codegenMatchingSet(const CompilationOptions &, const size_t)=0
static llvm::Value * codegenHashTableLoad(const size_t table_idx, Executor *executor)
FailedToJoinOnVirtualColumn()
std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > InnerOuter
static bool canAccessHashTable(bool allow_hash_table_recycling, bool invalid_cache_key, JoinType join_type)
virtual HashType getHashType() const noexcept=0
std::vector< ChunkKey > cache_key_chunks
std::vector< const void * > sd_inner_proxy_per_key
virtual int getDeviceCount() const noexcept=0
virtual std::string toStringFlat64(const ExecutorDeviceType device_type, const int device_id) const
static void checkHashJoinReplicationConstraint(const shared::TableKey &table_key, const size_t shard_count, const Executor *executor)
std::ostream & operator<<(std::ostream &os, const SessionInfo &session_info)
void setBucketInfo(const std::vector< double > &bucket_sizes_for_dimension, const std::vector< InnerOuter > inner_outer_pairs)
JoinColumn fetchJoinColumn(const Analyzer::ColumnVar *hash_col, const std::vector< Fragmenter_Namespace::FragmentInfo > &fragment_info, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks_owner, DeviceAllocator *dev_buff_owner, std::vector< std::shared_ptr< void >> &malloc_owner, Executor *executor, ColumnCacheMap *column_cache)
virtual Data_Namespace::MemoryLevel getMemoryLevel() const noexcept=0
std::vector< std::shared_ptr< HashTable > > hash_tables_for_device_
static std::pair< const StringDictionaryProxy *, StringDictionaryProxy * > getStrDictProxies(const InnerOuter &cols, const Executor *executor, const bool has_string_ops)
OverlapsHashTableTooBig(const size_t overlaps_hash_table_max_bytes)
virtual llvm::Value * codegenSlot(const CompilationOptions &, const size_t)=0
TableMustBeReplicated(const std::string &table_name)
static llvm::Value * codegenColOrStringOper(const Analyzer::Expr *col_or_string_oper, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos, CodeGenerator &code_generator, const CompilationOptions &co)
void freeHashBufferMemory()
virtual size_t offsetBufferOff() const noexcept=0
virtual std::string toStringFlat32(const ExecutorDeviceType device_type, const int device_id) const
virtual size_t countBufferOff() const noexcept=0
JoinHashTableTooBig(size_t cur_hash_table_size, size_t query_hint_size)
const std::vector< JoinColumnTypeInfo > join_column_types
std::unordered_map< size_t, HashTableBuildDag > HashTableBuildDagMap
std::vector< void * > sd_outer_proxy_per_key
HashJoinFail(const std::string &err_msg, InnerQualDecision qual_decision)
int8_t * getJoinHashBuffer(const ExecutorDeviceType device_type, const int device_id) const
static std::vector< const StringDictionaryProxy::IdMap * > translateCompositeStrDictProxies(const CompositeKeyInfo &composite_key_info, const std::vector< InnerOuterStringOpInfos > &string_op_infos_for_keys, const Executor *executor)
std::string toString(const ExecutorDeviceType &device_type)
size_t getJoinHashBufferSize(const ExecutorDeviceType device_type, const int device_id) const
virtual size_t getComponentBufferSize() const noexcept=0
const std::vector< std::shared_ptr< Chunk_NS::Chunk > > chunks_owner
static const StringDictionaryProxy::IdMap * translateInnerToOuterStrDictProxies(const InnerOuter &cols, const InnerOuterStringOpInfos &inner_outer_string_op_infos, ExpressionRange &old_col_range, const Executor *executor)
static std::pair< InnerOuter, InnerOuterStringOpInfos > normalizeColumnPair(const Analyzer::Expr *lhs, const Analyzer::Expr *rhs, const TemporaryTables *temporary_tables, const bool is_overlaps_join=false)
HashTable * getHashTableForDevice(const size_t device_id) const
virtual shared::TableKey getInnerTableId() const noexcept=0
std::unordered_map< shared::TableKey, const RelAlgNode * > TableIdToNodeMap
TooManyHashEntries(const std::string &reason)
static std::string getHashTypeString(HashType ht) noexcept
size_t getJoinHashBufferSize(const ExecutorDeviceType device_type)
std::set< DecodedJoinHashBufferEntry > DecodedJoinHashBufferSet
std::unordered_map< shared::TableKey, std::unordered_map< int, std::shared_ptr< const ColumnarResults >>> ColumnCacheMap
HashJoinFail(const std::string &err_msg)
std::shared_ptr< Analyzer::ColumnVar > getSyntheticColumnVar(std::string_view table, std::string_view column, int rte_idx, const Catalog_Namespace::Catalog &catalog)
std::pair< std::vector< StringOps_Namespace::StringOpInfo >, std::vector< StringOps_Namespace::StringOpInfo >> InnerOuterStringOpInfos
static const T * getHashJoinColumn(const Analyzer::Expr *expr)
static std::pair< std::vector< InnerOuter >, std::vector< InnerOuterStringOpInfos > > normalizeColumnPairs(const Analyzer::BinOper *condition, const TemporaryTables *temporary_tables)
FileBuffer Chunk
A Chunk is the fundamental unit of execution in Map-D.
const std::vector< std::shared_ptr< void > > malloc_owner
virtual DecodedJoinHashBufferSet toSet(const ExecutorDeviceType device_type, const int device_id) const =0
std::vector< JoinBucketInfo > join_buckets
size_t get_shard_count(const Analyzer::BinOper *join_condition, const Executor *executor)
static std::shared_ptr< HashJoin > getInstance(const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)
Make hash table from an in-flight SQL query's parse tree etc.
virtual std::string toString(const ExecutorDeviceType device_type, const int device_id=0, bool raw=false) const =0
InnerQualDecision inner_qual_decision
const std::vector< JoinColumn > join_columns
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
virtual bool isBitwiseEq() const =0
static CompositeKeyInfo getCompositeKeyInfo(const std::vector< InnerOuter > &inner_outer_pairs, const Executor *executor, const std::vector< InnerOuterStringOpInfos > &inner_outer_string_op_infos_pairs={})