19 #include <llvm/IR/Value.h>
40 : std::runtime_error(
"Hash tables with more than 2B entries not supported yet") {}
48 : std::runtime_error(
"Hash join failed: Table '" + table_name +
49 "' must be replicated.") {}
56 constexpr
char const* strings[]{
"IGNORE",
"UNKNOWN",
"LHS",
"RHS"};
57 return os << strings[static_cast<int>(decision)];
79 :
HashJoinFail(
"Not enough memory for columns involved in join") {}
91 "Could not create overlaps hash table with less than max allowed size of " +
92 std::
to_string(overlaps_hash_table_max_bytes) +
" bytes") {}
95 using InnerOuter = std::pair<const Analyzer::ColumnVar*, const Analyzer::Expr*>;
97 std::vector<StringOps_Namespace::StringOpInfo>>;
106 void setBucketInfo(
const std::vector<double>& bucket_sizes_for_dimension,
107 const std::vector<InnerOuter> inner_outer_pairs);
127 const int device_id = 0,
128 bool raw =
false)
const = 0;
131 const int device_id)
const;
134 const int device_id)
const;
137 const int device_id)
const = 0;
155 const char* HashTypeStrings[3] = {
"OneToOne",
"OneToMany",
"ManyToMany"};
156 return HashTypeStrings[
static_cast<int>(ht)];
160 const std::vector<llvm::Value*>& hash_join_idx_args_in,
161 const bool is_sharded,
162 const bool col_is_nullable,
164 const int64_t sub_buff_size,
166 const bool is_bucketized =
false);
185 const Analyzer::ColumnVar* hash_col,
186 const std::vector<Fragmenter_Namespace::FragmentInfo>& fragment_info,
187 const Data_Namespace::
MemoryLevel effective_memory_level,
189 std::vector<std::shared_ptr<Chunk_NS::
Chunk>>& chunks_owner,
191 std::vector<std::shared_ptr<
void>>& malloc_owner,
197 const std::shared_ptr<Analyzer::BinOper> qual_bin_oper,
202 const
int device_count,
211 std::string_view table1,
212 std::string_view column1,
213 std::string_view table2,
214 std::string_view column2,
217 const
int device_count,
223 const std::shared_ptr<Analyzer::BinOper> qual_bin_oper,
226 const
int device_count,
231 std::vector<std::shared_ptr<Analyzer::BinOper>>,
234 const
int device_count,
239 CHECK(!inner_outer_pairs.empty());
240 const auto first_inner_col = inner_outer_pairs.front().first;
241 return first_inner_col->get_table_id();
245 bool invalid_cache_key,
249 const size_t shard_count,
250 const Executor* executor);
258 const bool is_overlaps_join =
false);
260 template <
typename T>
264 static std::pair<std::vector<InnerOuter>, std::vector<InnerOuterStringOpInfos>>
280 const int device_id)
const {
285 return hash_table->getHashTableBufferSize(device_type);
289 const int device_id)
const {
299 return hash_table->getCpuBuffer();
302 const auto gpu_buff = hash_table->getGpuBuffer();
307 return hash_table->getCpuBuffer();
312 auto empty_hash_tables =
318 const std::vector<Fragmenter_Namespace::FragmentInfo>& fragments);
321 const std::vector<InnerOuter>& inner_outer_pairs,
322 const Executor* executor,
323 const std::vector<InnerOuterStringOpInfos>& inner_outer_string_op_infos_pairs = {});
325 static std::vector<const StringDictionaryProxy::IdMap*>
328 const std::vector<InnerOuterStringOpInfos>& string_op_infos_for_keys,
329 const Executor* executor);
331 static std::pair<const StringDictionaryProxy*, StringDictionaryProxy*>
333 const Executor* executor,
334 const bool has_string_ops);
340 const Executor* executor);
345 const std::vector<StringOps_Namespace::StringOpInfo>& string_op_infos,
358 std::ostream& operator<<(std::ostream& os,
360 std::ostream& operator<<(
370 std::string_view column,
374 size_t get_shard_count(const Analyzer::BinOper* join_condition, const Executor* executor);
377 std::pair<const Analyzer::ColumnVar*, const Analyzer::Expr*> equi_pair,
378 const Executor* executor);
static std::vector< int > collectFragmentIds(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments)
Defines data structures for the semantic analysis phase of query processing.
virtual int getInnerTableRteIdx() const noexcept=0
virtual size_t payloadBufferOff() const noexcept=0
virtual std::string getHashJoinType() const =0
virtual HashJoinMatchingSet codegenMatchingSet(const CompilationOptions &, const size_t)=0
static llvm::Value * codegenHashTableLoad(const size_t table_idx, Executor *executor)
class for a per-database catalog. also includes metadata for the current database and the current use...
FailedToJoinOnVirtualColumn()
std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > InnerOuter
static bool canAccessHashTable(bool allow_hash_table_recycling, bool invalid_cache_key, JoinType join_type)
virtual HashType getHashType() const noexcept=0
std::vector< ChunkKey > cache_key_chunks
std::vector< const void * > sd_inner_proxy_per_key
virtual int getDeviceCount() const noexcept=0
virtual std::string toStringFlat64(const ExecutorDeviceType device_type, const int device_id) const
std::ostream & operator<<(std::ostream &os, const SessionInfo &session_info)
static std::pair< std::vector< InnerOuter >, std::vector< InnerOuterStringOpInfos > > normalizeColumnPairs(const Analyzer::BinOper *condition, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables)
void setBucketInfo(const std::vector< double > &bucket_sizes_for_dimension, const std::vector< InnerOuter > inner_outer_pairs)
JoinColumn fetchJoinColumn(const Analyzer::ColumnVar *hash_col, const std::vector< Fragmenter_Namespace::FragmentInfo > &fragment_info, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks_owner, DeviceAllocator *dev_buff_owner, std::vector< std::shared_ptr< void >> &malloc_owner, Executor *executor, ColumnCacheMap *column_cache)
virtual Data_Namespace::MemoryLevel getMemoryLevel() const noexcept=0
std::vector< std::shared_ptr< HashTable > > hash_tables_for_device_
static std::pair< const StringDictionaryProxy *, StringDictionaryProxy * > getStrDictProxies(const InnerOuter &cols, const Executor *executor, const bool has_string_ops)
OverlapsHashTableTooBig(const size_t overlaps_hash_table_max_bytes)
virtual llvm::Value * codegenSlot(const CompilationOptions &, const size_t)=0
TableMustBeReplicated(const std::string &table_name)
static llvm::Value * codegenColOrStringOper(const Analyzer::Expr *col_or_string_oper, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos, CodeGenerator &code_generator, const CompilationOptions &co)
void freeHashBufferMemory()
virtual size_t offsetBufferOff() const noexcept=0
virtual std::string toStringFlat32(const ExecutorDeviceType device_type, const int device_id) const
virtual size_t countBufferOff() const noexcept=0
const std::vector< JoinColumnTypeInfo > join_column_types
static std::pair< InnerOuter, InnerOuterStringOpInfos > normalizeColumnPair(const Analyzer::Expr *lhs, const Analyzer::Expr *rhs, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables, const bool is_overlaps_join=false)
std::unordered_map< size_t, HashTableBuildDag > HashTableBuildDagMap
std::vector< void * > sd_outer_proxy_per_key
std::unordered_map< int, const RelAlgNode * > TableIdToNodeMap
HashJoinFail(const std::string &err_msg, InnerQualDecision qual_decision)
int8_t * getJoinHashBuffer(const ExecutorDeviceType device_type, const int device_id) const
static std::vector< const StringDictionaryProxy::IdMap * > translateCompositeStrDictProxies(const CompositeKeyInfo &composite_key_info, const std::vector< InnerOuterStringOpInfos > &string_op_infos_for_keys, const Executor *executor)
virtual int getInnerTableId() const noexcept=0
size_t getJoinHashBufferSize(const ExecutorDeviceType device_type, const int device_id) const
virtual size_t getComponentBufferSize() const noexcept=0
static void checkHashJoinReplicationConstraint(const int table_id, const size_t shard_count, const Executor *executor)
const std::vector< std::shared_ptr< Chunk_NS::Chunk > > chunks_owner
static const StringDictionaryProxy::IdMap * translateInnerToOuterStrDictProxies(const InnerOuter &cols, const InnerOuterStringOpInfos &inner_outer_string_op_infos, ExpressionRange &old_col_range, const Executor *executor)
std::unordered_map< int, std::unordered_map< int, std::shared_ptr< const ColumnarResults >>> ColumnCacheMap
HashTable * getHashTableForDevice(const size_t device_id) const
TooManyHashEntries(const std::string &reason)
static std::string getHashTypeString(HashType ht) noexcept
size_t getJoinHashBufferSize(const ExecutorDeviceType device_type)
std::set< DecodedJoinHashBufferEntry > DecodedJoinHashBufferSet
HashJoinFail(const std::string &err_msg)
std::pair< std::vector< StringOps_Namespace::StringOpInfo >, std::vector< StringOps_Namespace::StringOpInfo >> InnerOuterStringOpInfos
static const T * getHashJoinColumn(const Analyzer::Expr *expr)
FileBuffer Chunk
A Chunk is the fundamental unit of execution in Map-D.
const std::vector< std::shared_ptr< void > > malloc_owner
static std::shared_ptr< HashJoin > getSyntheticInstance(std::string_view table1, std::string_view column1, std::string_view table2, std::string_view column2, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
Make hash table from named tables and columns (such as for testing).
virtual DecodedJoinHashBufferSet toSet(const ExecutorDeviceType device_type, const int device_id) const =0
std::vector< JoinBucketInfo > join_buckets
size_t get_shard_count(const Analyzer::BinOper *join_condition, const Executor *executor)
static std::shared_ptr< HashJoin > getInstance(const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)
Make hash table from an in-flight SQL query's parse tree etc.
virtual std::string toString(const ExecutorDeviceType device_type, const int device_id=0, bool raw=false) const =0
std::shared_ptr< Analyzer::ColumnVar > getSyntheticColumnVar(std::string_view table, std::string_view column, int rte_idx, Executor *executor)
InnerQualDecision inner_qual_decision
const std::vector< JoinColumn > join_columns
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
virtual bool isBitwiseEq() const =0
static CompositeKeyInfo getCompositeKeyInfo(const std::vector< InnerOuter > &inner_outer_pairs, const Executor *executor, const std::vector< InnerOuterStringOpInfos > &inner_outer_string_op_infos_pairs={})