OmniSciDB  8a228a1076
JoinHashTableInterface.h
Go to the documentation of this file.
1 /*
2  * Copyright 2019 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #ifndef QUERYENGINE_JOINHASHTABLEINTERFACE_H
17 #define QUERYENGINE_JOINHASHTABLEINTERFACE_H
18 
19 #include <llvm/IR/Value.h>
20 #include <cstdint>
21 #include <set>
22 #include <string>
23 
24 #include "Analyzer/Analyzer.h"
30 
31 class TooManyHashEntries : public std::runtime_error {
32  public:
34  : std::runtime_error("Hash tables with more than 2B entries not supported yet") {}
35 };
36 
37 class TableMustBeReplicated : public std::runtime_error {
38  public:
39  TableMustBeReplicated(const std::string& table_name)
40  : std::runtime_error("Hash join failed: Table '" + table_name +
41  "' must be replicated.") {}
42 };
43 
44 class HashJoinFail : public std::runtime_error {
45  public:
46  HashJoinFail(const std::string& reason) : std::runtime_error(reason) {}
47 };
48 
50  public:
52  : HashJoinFail("Not enough memory for columns involved in join") {}
53 };
54 
56  public:
57  FailedToJoinOnVirtualColumn() : HashJoinFail("Cannot join on rowid") {}
58 };
59 
61  llvm::Value* elements;
62  llvm::Value* count;
63  llvm::Value* slot;
64 };
65 
67  std::vector<int64_t> key;
68  std::set<int32_t> payload;
69 
70  bool operator<(const DecodedJoinHashBufferEntry& other) const {
71  return std::tie(key, payload) < std::tie(other.key, other.payload);
72  }
73 
74  bool operator==(const DecodedJoinHashBufferEntry& other) const {
75  return key == other.key && payload == other.payload;
76  }
77 }; // struct DecodedJoinHashBufferEntry
78 
79 using DecodedJoinHashBufferSet = std::set<DecodedJoinHashBufferEntry>;
80 
81 using InnerOuter = std::pair<const Analyzer::ColumnVar*, const Analyzer::Expr*>;
82 
83 class DeviceAllocator;
84 
86  public:
87  virtual int64_t getJoinHashBuffer(const ExecutorDeviceType device_type,
88  const int device_id = 0) const noexcept = 0;
89 
90  virtual size_t getJoinHashBufferSize(const ExecutorDeviceType device_type,
91  const int device_id = 0) const
92  noexcept = 0; // bytes
93 
94  virtual std::string toString(const ExecutorDeviceType device_type,
95  const int device_id = 0,
96  bool raw = false) const = 0;
97 
98  virtual std::string toStringFlat64(const ExecutorDeviceType device_type,
99  const int device_id) const;
100 
101  virtual std::string toStringFlat32(const ExecutorDeviceType device_type,
102  const int device_id) const;
103 
104  virtual DecodedJoinHashBufferSet toSet(const ExecutorDeviceType device_type,
105  const int device_id) const = 0;
106 
107  virtual llvm::Value* codegenSlot(const CompilationOptions&, const size_t) = 0;
108 
109  virtual HashJoinMatchingSet codegenMatchingSet(const CompilationOptions&,
110  const size_t) = 0;
111 
112  virtual int getInnerTableId() const noexcept = 0;
113 
114  virtual int getInnerTableRteIdx() const noexcept = 0;
115 
116  enum class HashType : int { OneToOne, OneToMany, ManyToMany };
117 
118  virtual HashType getHashType() const noexcept = 0;
119 
120  virtual bool layoutRequiresAdditionalBuffers(
121  JoinHashTableInterface::HashType layout) const noexcept = 0;
122 
123  static std::string getHashTypeString(HashType ht) noexcept {
124  const char* HashTypeStrings[3] = {"OneToOne", "OneToMany", "ManyToMany"};
125  return HashTypeStrings[static_cast<int>(ht)];
126  };
127 
128  virtual Data_Namespace::MemoryLevel getMemoryLevel() const noexcept = 0;
129 
130  virtual int getDeviceCount() const noexcept = 0;
131 
132  virtual size_t offsetBufferOff() const noexcept = 0;
133 
134  virtual size_t countBufferOff() const noexcept = 0;
135 
136  virtual size_t payloadBufferOff() const noexcept = 0;
137 
138  JoinColumn fetchJoinColumn(
139  const Analyzer::ColumnVar* hash_col,
140  const std::vector<Fragmenter_Namespace::FragmentInfo>& fragment_info,
141  const Data_Namespace::MemoryLevel effective_memory_level,
142  const int device_id,
143  std::vector<std::shared_ptr<Chunk_NS::Chunk>>& chunks_owner,
144  DeviceAllocator* dev_buff_owner,
145  std::vector<std::shared_ptr<void>>& malloc_owner,
146  Executor* executor,
147  ColumnCacheMap* column_cache);
148 
149  public:
151  static DecodedJoinHashBufferSet toSet(
152  size_t key_component_count, // number of key parts
153  size_t key_component_width, // width of a key part
154  size_t entry_count, // number of hashable entries
155  const int8_t* ptr1, // hash entries
156  const int8_t* ptr2, // offsets
157  const int8_t* ptr3, // counts
158  const int8_t* ptr4, // payloads (rowids)
159  size_t buffer_size);
160 
162  static std::string toString(
163  const std::string& type, // perfect, keyed, or geo
164  const std::string& layout_type, // one-to-one, one-to-many, many-to-many
165  size_t key_component_count, // number of key parts
166  size_t key_component_width, // width of a key part
167  size_t entry_count, // number of hashable entries
168  const int8_t* ptr1, // hash entries
169  const int8_t* ptr2, // offsets
170  const int8_t* ptr3, // counts
171  const int8_t* ptr4, // payloads (rowids)
172  size_t buffer_size,
173  bool raw = false);
174 
176  static std::shared_ptr<JoinHashTableInterface> getInstance(
177  const std::shared_ptr<Analyzer::BinOper> qual_bin_oper,
178  const std::vector<InputTableInfo>& query_infos,
179  const Data_Namespace::MemoryLevel memory_level,
180  const HashType preferred_hash_type,
181  const int device_count,
182  ColumnCacheMap& column_cache,
183  Executor* executor);
184 
186  static std::shared_ptr<JoinHashTableInterface> getSyntheticInstance(
187  std::string_view table1,
188  std::string_view column1,
189  std::string_view table2,
190  std::string_view column2,
191  const Data_Namespace::MemoryLevel memory_level,
192  const HashType preferred_hash_type,
193  const int device_count,
194  ColumnCacheMap& column_cache,
195  Executor* executor);
196 
198  static std::shared_ptr<JoinHashTableInterface> getSyntheticInstance(
199  const std::shared_ptr<Analyzer::BinOper> qual_bin_oper,
200  const Data_Namespace::MemoryLevel memory_level,
201  const HashType preferred_hash_type,
202  const int device_count,
203  ColumnCacheMap& column_cache,
204  Executor* executor);
205 
206 }; // class JoinHashTableInterface
207 
208 std::ostream& operator<<(std::ostream& os, const DecodedJoinHashBufferEntry& e);
209 
210 std::ostream& operator<<(std::ostream& os, const DecodedJoinHashBufferSet& s);
211 
212 std::shared_ptr<Analyzer::ColumnVar> getSyntheticColumnVar(std::string_view table,
213  std::string_view column,
214  int rte_idx,
215  Executor* executor);
216 
217 #endif // QUERYENGINE_JOINHASHTABLEINTERFACE_H
Defines data structures for the semantic analysis phase of query processing.
std::ostream & operator<<(std::ostream &os, const DecodedJoinHashBufferEntry &e)
ExecutorDeviceType
std::unordered_map< int, std::unordered_map< int, std::shared_ptr< const ColumnarResults > >> ColumnCacheMap
bool operator==(const DecodedJoinHashBufferEntry &other) const
TableMustBeReplicated(const std::string &table_name)
std::set< DecodedJoinHashBufferEntry > DecodedJoinHashBufferSet
std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > InnerOuter
HashJoinFail(const std::string &reason)
std::shared_ptr< Analyzer::ColumnVar > getSyntheticColumnVar(std::string_view table, std::string_view column, int rte_idx, Executor *executor)
bool operator<(const DecodedJoinHashBufferEntry &other) const
std::set< int32_t > payload
static std::string getHashTypeString(HashType ht) noexcept
std::vector< int64_t > key