OmniSciDB  340b00dbf6
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
JoinHashTableInterface.h
Go to the documentation of this file.
1 /*
2  * Copyright 2019 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #ifndef QUERYENGINE_JOINHASHTABLEINTERFACE_H
17 #define QUERYENGINE_JOINHASHTABLEINTERFACE_H
18 
19 #include <llvm/IR/Value.h>
20 #include <cstdint>
21 #include <set>
22 #include <string>
23 
24 #include "Analyzer/Analyzer.h"
30 
31 class TooManyHashEntries : public std::runtime_error {
32  public:
34  : std::runtime_error("Hash tables with more than 2B entries not supported yet") {}
35 
36  TooManyHashEntries(const std::string& reason) : std::runtime_error(reason) {}
37 };
38 
39 class TableMustBeReplicated : public std::runtime_error {
40  public:
41  TableMustBeReplicated(const std::string& table_name)
42  : std::runtime_error("Hash join failed: Table '" + table_name +
43  "' must be replicated.") {}
44 };
45 
46 class HashJoinFail : public std::runtime_error {
47  public:
48  HashJoinFail(const std::string& reason) : std::runtime_error(reason) {}
49 };
50 
52  public:
54  : HashJoinFail("Not enough memory for columns involved in join") {}
55 };
56 
58  public:
59  FailedToJoinOnVirtualColumn() : HashJoinFail("Cannot join on rowid") {}
60 };
61 
63  llvm::Value* elements;
64  llvm::Value* count;
65  llvm::Value* slot;
66 };
67 
69  std::vector<int64_t> key;
70  std::set<int32_t> payload;
71 
72  bool operator<(const DecodedJoinHashBufferEntry& other) const {
73  return std::tie(key, payload) < std::tie(other.key, other.payload);
74  }
75 
76  bool operator==(const DecodedJoinHashBufferEntry& other) const {
77  return key == other.key && payload == other.payload;
78  }
79 }; // struct DecodedJoinHashBufferEntry
80 
81 using DecodedJoinHashBufferSet = std::set<DecodedJoinHashBufferEntry>;
82 
83 using InnerOuter = std::pair<const Analyzer::ColumnVar*, const Analyzer::Expr*>;
84 
85 class DeviceAllocator;
86 
88  public:
89  virtual int64_t getJoinHashBuffer(const ExecutorDeviceType device_type,
90  const int device_id = 0) const noexcept = 0;
91 
92  virtual size_t getJoinHashBufferSize(const ExecutorDeviceType device_type,
93  const int device_id = 0) const
94  noexcept = 0; // bytes
95 
96  virtual std::string toString(const ExecutorDeviceType device_type,
97  const int device_id = 0,
98  bool raw = false) const = 0;
99 
100  virtual std::string toStringFlat64(const ExecutorDeviceType device_type,
101  const int device_id) const;
102 
103  virtual std::string toStringFlat32(const ExecutorDeviceType device_type,
104  const int device_id) const;
105 
106  virtual DecodedJoinHashBufferSet toSet(const ExecutorDeviceType device_type,
107  const int device_id) const = 0;
108 
109  virtual llvm::Value* codegenSlot(const CompilationOptions&, const size_t) = 0;
110 
112  const size_t) = 0;
113 
114  virtual int getInnerTableId() const noexcept = 0;
115 
116  virtual int getInnerTableRteIdx() const noexcept = 0;
117 
118  enum class HashType : int { OneToOne, OneToMany, ManyToMany };
119 
120  virtual HashType getHashType() const noexcept = 0;
121 
123  JoinHashTableInterface::HashType layout) noexcept {
126  }
127 
128  static std::string getHashTypeString(HashType ht) noexcept {
129  const char* HashTypeStrings[3] = {"OneToOne", "OneToMany", "ManyToMany"};
130  return HashTypeStrings[static_cast<int>(ht)];
131  };
132 
133  virtual Data_Namespace::MemoryLevel getMemoryLevel() const noexcept = 0;
134 
135  virtual int getDeviceCount() const noexcept = 0;
136 
137  virtual size_t offsetBufferOff() const noexcept = 0;
138 
139  virtual size_t countBufferOff() const noexcept = 0;
140 
141  virtual size_t payloadBufferOff() const noexcept = 0;
142 
144  const Analyzer::ColumnVar* hash_col,
145  const std::vector<Fragmenter_Namespace::FragmentInfo>& fragment_info,
146  const Data_Namespace::MemoryLevel effective_memory_level,
147  const int device_id,
148  std::vector<std::shared_ptr<Chunk_NS::Chunk>>& chunks_owner,
149  DeviceAllocator* dev_buff_owner,
150  std::vector<std::shared_ptr<void>>& malloc_owner,
151  Executor* executor,
152  ColumnCacheMap* column_cache);
153 
154  public:
157  size_t key_component_count, // number of key parts
158  size_t key_component_width, // width of a key part
159  size_t entry_count, // number of hashable entries
160  const int8_t* ptr1, // hash entries
161  const int8_t* ptr2, // offsets
162  const int8_t* ptr3, // counts
163  const int8_t* ptr4, // payloads (rowids)
164  size_t buffer_size);
165 
167  static std::string toString(
168  const std::string& type, // perfect, keyed, or geo
169  const std::string& layout_type, // one-to-one, one-to-many, many-to-many
170  size_t key_component_count, // number of key parts
171  size_t key_component_width, // width of a key part
172  size_t entry_count, // number of hashable entries
173  const int8_t* ptr1, // hash entries
174  const int8_t* ptr2, // offsets
175  const int8_t* ptr3, // counts
176  const int8_t* ptr4, // payloads (rowids)
177  size_t buffer_size,
178  bool raw = false);
179 
181  static std::shared_ptr<JoinHashTableInterface> getInstance(
182  const std::shared_ptr<Analyzer::BinOper> qual_bin_oper,
183  const std::vector<InputTableInfo>& query_infos,
184  const Data_Namespace::MemoryLevel memory_level,
185  const HashType preferred_hash_type,
186  const int device_count,
187  ColumnCacheMap& column_cache,
188  Executor* executor);
189 
191  static std::shared_ptr<JoinHashTableInterface> getSyntheticInstance(
192  std::string_view table1,
193  std::string_view column1,
194  std::string_view table2,
195  std::string_view column2,
196  const Data_Namespace::MemoryLevel memory_level,
197  const HashType preferred_hash_type,
198  const int device_count,
199  ColumnCacheMap& column_cache,
200  Executor* executor);
201 
203  static std::shared_ptr<JoinHashTableInterface> getSyntheticInstance(
204  const std::shared_ptr<Analyzer::BinOper> qual_bin_oper,
205  const Data_Namespace::MemoryLevel memory_level,
206  const HashType preferred_hash_type,
207  const int device_count,
208  ColumnCacheMap& column_cache,
209  Executor* executor);
210 }; // class JoinHashTableInterface
211 
212 std::ostream& operator<<(std::ostream& os, const DecodedJoinHashBufferEntry& e);
213 
214 std::ostream& operator<<(std::ostream& os, const DecodedJoinHashBufferSet& s);
215 
216 std::shared_ptr<Analyzer::ColumnVar> getSyntheticColumnVar(std::string_view table,
217  std::string_view column,
218  int rte_idx,
219  Executor* executor);
220 
221 #endif // QUERYENGINE_JOINHASHTABLEINTERFACE_H
Defines data structures for the semantic analysis phase of query processing.
virtual int64_t getJoinHashBuffer(const ExecutorDeviceType device_type, const int device_id=0) const noexcept=0
std::shared_ptr< Analyzer::ColumnVar > getSyntheticColumnVar(std::string_view table, std::string_view column, int rte_idx, Executor *executor)
ExecutorDeviceType
static std::shared_ptr< JoinHashTableInterface > getSyntheticInstance(std::string_view table1, std::string_view column1, std::string_view table2, std::string_view column2, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
Make hash table from named tables and columns (such as for testing).
virtual llvm::Value * codegenSlot(const CompilationOptions &, const size_t)=0
virtual int getDeviceCount() const noexcept=0
TableMustBeReplicated(const std::string &table_name)
std::set< DecodedJoinHashBufferEntry > DecodedJoinHashBufferSet
std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > InnerOuter
virtual int getInnerTableId() const noexcept=0
HashJoinFail(const std::string &reason)
virtual int getInnerTableRteIdx() const noexcept=0
virtual HashJoinMatchingSet codegenMatchingSet(const CompilationOptions &, const size_t)=0
virtual size_t payloadBufferOff() const noexcept=0
static bool layoutRequiresAdditionalBuffers(JoinHashTableInterface::HashType layout) noexcept
virtual HashType getHashType() const noexcept=0
std::unordered_map< int, std::unordered_map< int, std::shared_ptr< const ColumnarResults >>> ColumnCacheMap
static std::shared_ptr< JoinHashTableInterface > getInstance(const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
Make hash table from an in-flight SQL query&#39;s parse tree etc.
TooManyHashEntries(const std::string &reason)
virtual Data_Namespace::MemoryLevel getMemoryLevel() const noexcept=0
JoinColumn fetchJoinColumn(const Analyzer::ColumnVar *hash_col, const std::vector< Fragmenter_Namespace::FragmentInfo > &fragment_info, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id, std::vector< std::shared_ptr< Chunk_NS::Chunk >> &chunks_owner, DeviceAllocator *dev_buff_owner, std::vector< std::shared_ptr< void >> &malloc_owner, Executor *executor, ColumnCacheMap *column_cache)
virtual DecodedJoinHashBufferSet toSet(const ExecutorDeviceType device_type, const int device_id) const =0
virtual std::string toStringFlat32(const ExecutorDeviceType device_type, const int device_id) const
bool g_enable_watchdog false
Definition: Execute.cpp:73
bool operator==(const DecodedJoinHashBufferEntry &other) const
virtual size_t getJoinHashBufferSize(const ExecutorDeviceType device_type, const int device_id=0) const noexcept=0
std::set< int32_t > payload
FileBuffer Chunk
A Chunk is the fundamental unit of execution in Map-D.
Definition: FileMgr.h:67
virtual std::string toStringFlat64(const ExecutorDeviceType device_type, const int device_id) const
static std::string getHashTypeString(HashType ht) noexcept
std::vector< int64_t > key
virtual size_t offsetBufferOff() const noexcept=0
virtual std::string toString(const ExecutorDeviceType device_type, const int device_id=0, bool raw=false) const =0
virtual size_t countBufferOff() const noexcept=0
bool operator<(const DecodedJoinHashBufferEntry &other) const