OmniSciDB  72180abbfe
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
JoinHashTableInterface.h
Go to the documentation of this file.
1 /*
2  * Copyright 2019 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #ifndef QUERYENGINE_JOINHASHTABLEINTERFACE_H
17 #define QUERYENGINE_JOINHASHTABLEINTERFACE_H
18 
19 #include <llvm/IR/Value.h>
20 #include <cstdint>
21 #include <set>
22 #include <string>
23 #include "Analyzer/Analyzer.h"
24 #include "ColumnarResults.h"
25 #include "CompilationOptions.h"
28 #include "HashJoinRuntime.h"
29 
30 class TooManyHashEntries : public std::runtime_error {
31  public:
33  : std::runtime_error("Hash tables with more than 2B entries not supported yet") {}
34 };
35 
36 class TableMustBeReplicated : public std::runtime_error {
37  public:
38  TableMustBeReplicated(const std::string& table_name)
39  : std::runtime_error("Hash join failed: Table '" + table_name +
40  "' must be replicated.") {}
41 };
42 
43 class HashJoinFail : public std::runtime_error {
44  public:
45  HashJoinFail(const std::string& reason) : std::runtime_error(reason) {}
46 };
47 
49  public:
51  : HashJoinFail("Not enough memory for columns involved in join") {}
52 };
53 
55  public:
56  FailedToJoinOnVirtualColumn() : HashJoinFail("Cannot join on rowid") {}
57 };
58 
60  llvm::Value* elements;
61  llvm::Value* count;
62  llvm::Value* slot;
63 };
64 
66  std::vector<int64_t> key;
67  std::set<int32_t> payload;
68 
69  bool operator<(const DecodedJoinHashBufferEntry& other) const {
70  return std::tie(key, payload) < std::tie(other.key, other.payload);
71  }
72 
73  bool operator==(const DecodedJoinHashBufferEntry& other) const {
74  return key == other.key && payload == other.payload;
75  }
76 }; // struct DecodedJoinHashBufferEntry
77 
78 using DecodedJoinHashBufferSet = std::set<DecodedJoinHashBufferEntry>;
79 
80 using InnerOuter = std::pair<const Analyzer::ColumnVar*, const Analyzer::Expr*>;
81 
82 class DeviceAllocator;
83 
85  public:
86  virtual int64_t getJoinHashBuffer(const ExecutorDeviceType device_type,
87  const int device_id = 0) const noexcept = 0;
88 
89  virtual size_t getJoinHashBufferSize(const ExecutorDeviceType device_type,
90  const int device_id = 0) const
91  noexcept = 0; // bytes
92 
93  virtual std::string toString(const ExecutorDeviceType device_type,
94  const int device_id = 0,
95  bool raw = false) const = 0;
96 
97  virtual std::string toStringFlat64(const ExecutorDeviceType device_type,
98  const int device_id) const;
99 
100  virtual std::string toStringFlat32(const ExecutorDeviceType device_type,
101  const int device_id) const;
102 
103  virtual DecodedJoinHashBufferSet toSet(const ExecutorDeviceType device_type,
104  const int device_id) const = 0;
105 
106  virtual llvm::Value* codegenSlot(const CompilationOptions&, const size_t) = 0;
107 
109  const size_t) = 0;
110 
111  virtual int getInnerTableId() const noexcept = 0;
112 
113  virtual int getInnerTableRteIdx() const noexcept = 0;
114 
115  enum class HashType : int { OneToOne, OneToMany, ManyToMany };
116 
117  virtual HashType getHashType() const noexcept = 0;
118 
119  virtual bool layoutRequiresAdditionalBuffers(
120  JoinHashTableInterface::HashType layout) const noexcept = 0;
121 
122  static std::string getHashTypeString(HashType ht) noexcept {
123  const char* HashTypeStrings[3] = {"OneToOne", "OneToMany", "ManyToMany"};
124  return HashTypeStrings[static_cast<int>(ht)];
125  };
126 
127  virtual Data_Namespace::MemoryLevel getMemoryLevel() const noexcept = 0;
128 
129  virtual int getDeviceCount() const noexcept = 0;
130 
131  virtual size_t offsetBufferOff() const noexcept = 0;
132 
133  virtual size_t countBufferOff() const noexcept = 0;
134 
135  virtual size_t payloadBufferOff() const noexcept = 0;
136 
138  const Analyzer::ColumnVar* hash_col,
139  const std::vector<Fragmenter_Namespace::FragmentInfo>& fragment_info,
140  const Data_Namespace::MemoryLevel effective_memory_level,
141  const int device_id,
142  std::vector<std::shared_ptr<Chunk_NS::Chunk>>& chunks_owner,
143  DeviceAllocator* dev_buff_owner,
144  std::vector<std::shared_ptr<void>>& malloc_owner,
145  Executor* executor,
146  ColumnCacheMap* column_cache);
147 
148  public:
151  size_t key_component_count, // number of key parts
152  size_t key_component_width, // width of a key part
153  size_t entry_count, // number of hashable entries
154  const int8_t* ptr1, // hash entries
155  const int8_t* ptr2, // offsets
156  const int8_t* ptr3, // counts
157  const int8_t* ptr4, // payloads (rowids)
158  size_t buffer_size);
159 
161  static std::string toString(
162  const std::string& type, // perfect, keyed, or geo
163  const std::string& layout_type, // one-to-one, one-to-many, many-to-many
164  size_t key_component_count, // number of key parts
165  size_t key_component_width, // width of a key part
166  size_t entry_count, // number of hashable entries
167  const int8_t* ptr1, // hash entries
168  const int8_t* ptr2, // offsets
169  const int8_t* ptr3, // counts
170  const int8_t* ptr4, // payloads (rowids)
171  size_t buffer_size,
172  bool raw = false);
173 
175  static std::shared_ptr<JoinHashTableInterface> getInstance(
176  const std::shared_ptr<Analyzer::BinOper> qual_bin_oper,
177  const std::vector<InputTableInfo>& query_infos,
178  const Data_Namespace::MemoryLevel memory_level,
179  const HashType preferred_hash_type,
180  const int device_count,
181  ColumnCacheMap& column_cache,
182  Executor* executor);
183 
185  static std::shared_ptr<JoinHashTableInterface> getSyntheticInstance(
186  std::string_view table1,
187  std::string_view column1,
188  std::string_view table2,
189  std::string_view column2,
190  const Data_Namespace::MemoryLevel memory_level,
191  const HashType preferred_hash_type,
192  const int device_count,
193  ColumnCacheMap& column_cache,
194  Executor* executor);
195 
197  static std::shared_ptr<JoinHashTableInterface> getSyntheticInstance(
198  const std::shared_ptr<Analyzer::BinOper> qual_bin_oper,
199  const Data_Namespace::MemoryLevel memory_level,
200  const HashType preferred_hash_type,
201  const int device_count,
202  ColumnCacheMap& column_cache,
203  Executor* executor);
204 
205 }; // class JoinHashTableInterface
206 
207 std::ostream& operator<<(std::ostream& os, const DecodedJoinHashBufferEntry& e);
208 
209 std::ostream& operator<<(std::ostream& os, const DecodedJoinHashBufferSet& s);
210 
211 std::shared_ptr<Analyzer::ColumnVar> getSyntheticColumnVar(std::string_view table,
212  std::string_view column,
213  int rte_idx,
214  Executor* executor);
215 
216 #endif // QUERYENGINE_JOINHASHTABLEINTERFACE_H
Defines data structures for the semantic analysis phase of query processing.
virtual int64_t getJoinHashBuffer(const ExecutorDeviceType device_type, const int device_id=0) const noexcept=0
std::shared_ptr< Analyzer::ColumnVar > getSyntheticColumnVar(std::string_view table, std::string_view column, int rte_idx, Executor *executor)
ExecutorDeviceType
static std::shared_ptr< JoinHashTableInterface > getSyntheticInstance(std::string_view table1, std::string_view column1, std::string_view table2, std::string_view column2, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
Make hash table from named tables and columns (such as for testing).
virtual llvm::Value * codegenSlot(const CompilationOptions &, const size_t)=0
virtual int getDeviceCount() const noexcept=0
TableMustBeReplicated(const std::string &table_name)
virtual bool layoutRequiresAdditionalBuffers(JoinHashTableInterface::HashType layout) const noexcept=0
std::set< DecodedJoinHashBufferEntry > DecodedJoinHashBufferSet
std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > InnerOuter
virtual int getInnerTableId() const noexcept=0
HashJoinFail(const std::string &reason)
virtual int getInnerTableRteIdx() const noexcept=0
virtual HashJoinMatchingSet codegenMatchingSet(const CompilationOptions &, const size_t)=0
virtual size_t payloadBufferOff() const noexcept=0
virtual HashType getHashType() const noexcept=0
std::unordered_map< int, std::unordered_map< int, std::shared_ptr< const ColumnarResults >>> ColumnCacheMap
static std::shared_ptr< JoinHashTableInterface > getInstance(const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
Make hash table from an in-flight SQL query&#39;s parse tree etc.
virtual Data_Namespace::MemoryLevel getMemoryLevel() const noexcept=0
JoinColumn fetchJoinColumn(const Analyzer::ColumnVar *hash_col, const std::vector< Fragmenter_Namespace::FragmentInfo > &fragment_info, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks_owner, DeviceAllocator *dev_buff_owner, std::vector< std::shared_ptr< void >> &malloc_owner, Executor *executor, ColumnCacheMap *column_cache)
virtual DecodedJoinHashBufferSet toSet(const ExecutorDeviceType device_type, const int device_id) const =0
virtual std::string toStringFlat32(const ExecutorDeviceType device_type, const int device_id) const
bool g_enable_watchdog false
Definition: Execute.cpp:74
bool operator==(const DecodedJoinHashBufferEntry &other) const
virtual size_t getJoinHashBufferSize(const ExecutorDeviceType device_type, const int device_id=0) const noexcept=0
std::set< int32_t > payload
FileBuffer Chunk
A Chunk is the fundamental unit of execution in Map-D.
Definition: FileMgr.h:67
virtual std::string toStringFlat64(const ExecutorDeviceType device_type, const int device_id) const
static std::string getHashTypeString(HashType ht) noexcept
std::vector< int64_t > key
virtual size_t offsetBufferOff() const noexcept=0
virtual std::string toString(const ExecutorDeviceType device_type, const int device_id=0, bool raw=false) const =0
virtual size_t countBufferOff() const noexcept=0
bool operator<(const DecodedJoinHashBufferEntry &other) const