OmniSciDB  6686921089
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
PerfectJoinHashTable.h
Go to the documentation of this file.
1 /*
2  * Copyright 2017 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /*
18  * @file JoinHashTable.h
19  * @author Alex Suhan <alex@mapd.com>
20  *
21  */
22 
23 #pragma once
24 
25 #include "Analyzer/Analyzer.h"
27 #include "DataMgr/Chunk/Chunk.h"
37 
38 #include <llvm/IR/Value.h>
39 
40 #ifdef HAVE_CUDA
41 #include <cuda.h>
42 #endif
43 #include <functional>
44 #include <memory>
45 #include <mutex>
46 #include <stdexcept>
47 
48 struct HashEntryInfo;
49 
51  public:
53  static std::shared_ptr<PerfectJoinHashTable> getInstance(
54  const std::shared_ptr<Analyzer::BinOper> qual_bin_oper,
55  const std::vector<InputTableInfo>& query_infos,
56  const Data_Namespace::MemoryLevel memory_level,
57  const JoinType join_type,
58  const HashType preferred_hash_type,
59  const int device_count,
60  ColumnCacheMap& column_cache,
61  Executor* executor,
62  const HashTableBuildDagMap& hashtable_build_dag_map,
63  const TableIdToNodeMap& table_id_to_node_map);
64 
65  std::string toString(const ExecutorDeviceType device_type,
66  const int device_id = 0,
67  bool raw = false) const override;
68 
69  std::set<DecodedJoinHashBufferEntry> toSet(const ExecutorDeviceType device_type,
70  const int device_id) const override;
71 
72  llvm::Value* codegenSlot(const CompilationOptions&, const size_t) override;
73 
75  const size_t) override;
76 
77  int getInnerTableId() const noexcept override {
78  return col_var_.get()->get_table_id();
79  };
80 
81  int getInnerTableRteIdx() const noexcept override {
82  return col_var_.get()->get_rte_idx();
83  };
84 
85  HashType getHashType() const noexcept override { return hash_type_; }
86 
88  return memory_level_;
89  };
90 
91  int getDeviceCount() const noexcept override { return device_count_; };
92 
93  size_t offsetBufferOff() const noexcept override;
94 
95  size_t countBufferOff() const noexcept override;
96 
97  size_t payloadBufferOff() const noexcept override;
98 
99  std::string getHashJoinType() const final { return "Perfect"; }
100 
103  return hash_table_cache_.get();
104  }
107  return hash_table_layout_cache_.get();
108  }
109 
110  static auto getCacheInvalidator() -> std::function<void()> {
113  return []() -> void {
114  auto layout_cache_invalidator = hash_table_layout_cache_->getCacheInvalidator();
115  layout_cache_invalidator();
116 
117  auto main_cache_invalidator = hash_table_cache_->getCacheInvalidator();
118  main_cache_invalidator();
119  };
120  }
121 
123 
124  private:
125  // Equijoin API
127  const std::vector<Fragmenter_Namespace::FragmentInfo>& fragments,
128  const int device_id,
129  DeviceAllocator* dev_buff_owner,
130  const Catalog_Namespace::Catalog& catalog);
131 
132  void reifyForDevice(const ChunkKey& hash_table_key,
133  const ColumnsForDevice& columns_for_device,
134  const HashType layout,
135  const int device_id,
136  const logger::ThreadId parent_thread_id);
137 
138  int initHashTableForDevice(const ChunkKey& chunk_key,
139  const JoinColumn& join_column,
140  const InnerOuter& cols,
141  const HashType layout,
142  const Data_Namespace::MemoryLevel effective_memory_level,
143  const int device_id);
144 
146  const std::vector<InnerOuter>& inner_outer_pairs) const;
147 
148  std::vector<InnerOuter> inner_outer_pairs_;
149 
150  PerfectJoinHashTable(const std::shared_ptr<Analyzer::BinOper> qual_bin_oper,
151  const Analyzer::ColumnVar* col_var,
152  const std::vector<InputTableInfo>& query_infos,
153  const Data_Namespace::MemoryLevel memory_level,
154  const JoinType join_type,
155  const HashType preferred_hash_type,
156  const ExpressionRange& col_range,
157  ColumnCacheMap& column_cache,
158  Executor* executor,
159  const int device_count,
160  QueryPlanHash hashtable_cache_key,
161  HashtableCacheMetaInfo hashtable_cache_meta_info,
162  const TableIdToNodeMap& table_id_to_node_map)
163  : qual_bin_oper_(qual_bin_oper)
164  , join_type_(join_type)
165  , col_var_(std::dynamic_pointer_cast<Analyzer::ColumnVar>(col_var->deep_copy()))
166  , query_infos_(query_infos)
167  , memory_level_(memory_level)
168  , hash_type_(preferred_hash_type)
169  , col_range_(col_range)
170  , executor_(executor)
171  , column_cache_(column_cache)
172  , device_count_(device_count)
174  , table_id_to_node_map_(table_id_to_node_map)
175  , hashtable_cache_key_(hashtable_cache_key)
176  , hashtable_cache_meta_info_(hashtable_cache_meta_info) {
180  }
181 
182  ChunkKey genChunkKey(const std::vector<Fragmenter_Namespace::FragmentInfo>& fragments,
183  const Analyzer::Expr* outer_col,
184  const Analyzer::ColumnVar* inner_col) const;
185 
186  void reify();
187  std::shared_ptr<PerfectHashTable> initHashTableOnCpuFromCache(
188  QueryPlanHash key,
189  CacheItemType item_type,
190  DeviceIdentifier device_identifier);
192  CacheItemType item_type,
193  std::shared_ptr<PerfectHashTable> hashtable_ptr,
194  DeviceIdentifier device_identifier,
195  size_t hashtable_building_time);
196 
197  const InputTableInfo& getInnerQueryInfo(const Analyzer::ColumnVar* inner_col) const;
198 
199  size_t shardCount() const;
200 
201  llvm::Value* codegenHashTableLoad(const size_t table_idx);
202 
203  std::vector<llvm::Value*> getHashJoinArgs(llvm::Value* hash_ptr,
204  const Analyzer::Expr* key_col,
205  const int shard_count,
206  const CompilationOptions& co);
207 
208  bool isBitwiseEq() const;
209 
210  size_t getComponentBufferSize() const noexcept override;
211 
212  HashTable* getHashTableForDevice(const size_t device_id) const;
213 
219  const size_t num_elements;
220  const SQLOps optype;
222  };
223 
226  auto hash = boost::hash_value(::toString(info.chunk_key));
227  boost::hash_combine(hash, info.inner_col->toString());
228  if (info.inner_col->get_type_info().is_string()) {
229  boost::hash_combine(hash, info.outer_col->toString());
230  }
231  boost::hash_combine(hash, info.col_range.toString());
232  boost::hash_combine(hash, info.num_elements);
233  boost::hash_combine(hash, ::toString(info.optype));
234  boost::hash_combine(hash, ::toString(info.join_type));
235  return hash;
236  }
237 
238  std::shared_ptr<Analyzer::BinOper> qual_bin_oper_;
240  std::shared_ptr<Analyzer::ColumnVar> col_var_;
241  const std::vector<InputTableInfo>& query_infos_;
244 
247  Executor* executor_;
249  const int device_count_;
254 
255  static std::unique_ptr<HashtableRecycler> hash_table_cache_;
256  static std::unique_ptr<HashingSchemeRecycler> hash_table_layout_cache_;
257 };
258 
260  const Analyzer::Expr* outer_col,
261  const Executor* executor);
262 
263 std::vector<Fragmenter_Namespace::FragmentInfo> only_shards_for_device(
264  const std::vector<Fragmenter_Namespace::FragmentInfo>& fragments,
265  const int device_id,
266  const int device_count);
267 
269  const int inner_table_id,
270  const std::vector<InputTableInfo>& query_infos);
271 
272 size_t get_entries_per_device(const size_t total_entries,
273  const size_t shard_count,
274  const size_t device_count,
275  const Data_Namespace::MemoryLevel memory_level);
Defines data structures for the semantic analysis phase of query processing.
llvm::Value * codegenHashTableLoad(const size_t table_idx)
void reifyForDevice(const ChunkKey &hash_table_key, const ColumnsForDevice &columns_for_device, const HashType layout, const int device_id, const logger::ThreadId parent_thread_id)
std::string toString() const
std::vector< int > ChunkKey
Definition: types.h:37
size_t DeviceIdentifier
Definition: DataRecycler.h:111
JoinType
Definition: sqldefs.h:108
Data_Namespace::MemoryLevel getEffectiveMemoryLevel(const std::vector< InnerOuter > &inner_outer_pairs) const
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:111
std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > InnerOuter
Definition: HashJoin.h:77
const Data_Namespace::MemoryLevel memory_level_
size_t getComponentBufferSize() const noexceptoverride
ExecutorDeviceType
std::shared_ptr< Analyzer::BinOper > qual_bin_oper_
QueryPlanHash hashtable_cache_key_
ChunkKey genChunkKey(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const Analyzer::Expr *outer_col, const Analyzer::ColumnVar *inner_col) const
#define const
const TableIdToNodeMap table_id_to_node_map_
static auto getCacheInvalidator() -> std::function< void()>
SQLOps
Definition: sqldefs.h:29
size_t offsetBufferOff() const noexceptoverride
std::vector< std::shared_ptr< HashTable > > hash_tables_for_device_
Definition: HashJoin.h:294
int getDeviceCount() const noexceptoverride
const InputTableInfo & getInnerQueryInfo(const Analyzer::ColumnVar *inner_col) const
size_t payloadBufferOff() const noexceptoverride
bool needs_dictionary_translation(const Analyzer::ColumnVar *inner_col, const Analyzer::Expr *outer_col_expr, const Executor *executor)
std::shared_ptr< PerfectHashTable > initHashTableOnCpuFromCache(QueryPlanHash key, CacheItemType item_type, DeviceIdentifier device_identifier)
PerfectJoinHashTable(const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const Analyzer::ColumnVar *col_var, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, const ExpressionRange &col_range, ColumnCacheMap &column_cache, Executor *executor, const int device_count, QueryPlanHash hashtable_cache_key, HashtableCacheMetaInfo hashtable_cache_meta_info, const TableIdToNodeMap &table_id_to_node_map)
#define CHECK_GT(x, y)
Definition: Logger.h:221
HashType getHashType() const noexceptoverride
std::vector< llvm::Value * > getHashJoinArgs(llvm::Value *hash_ptr, const Analyzer::Expr *key_col, const int shard_count, const CompilationOptions &co)
HashtableCacheMetaInfo hashtable_cache_meta_info_
static std::unique_ptr< HashtableRecycler > hash_table_cache_
ColumnsForDevice fetchColumnsForDevice(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, DeviceAllocator *dev_buff_owner, const Catalog_Namespace::Catalog &catalog)
std::string getHashJoinType() const final
CacheItemType
Definition: DataRecycler.h:36
static HashtableRecycler * getHashTableCache()
std::string toString() const override
Definition: Analyzer.cpp:2511
std::unordered_map< int, const RelAlgNode * > TableIdToNodeMap
std::vector< Fragmenter_Namespace::FragmentInfo > only_shards_for_device(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, const int device_count)
static HashingSchemeRecycler * getHashingSchemeCache()
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:77
std::vector< InnerOuter > inner_outer_pairs_
std::unordered_map< int, std::unordered_map< int, std::shared_ptr< const ColumnarResults >>> ColumnCacheMap
static std::unique_ptr< HashingSchemeRecycler > hash_table_layout_cache_
const InputTableInfo & get_inner_query_info(const int inner_table_id, const std::vector< InputTableInfo > &query_infos)
static QueryPlanHash getAlternativeCacheKey(AlternativeCacheKeyForPerfectHashJoin &info)
const std::vector< InputTableInfo > & query_infos_
static std::shared_ptr< PerfectJoinHashTable > getInstance(const std::shared_ptr< Analyzer::BinOper > qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const HashTableBuildDagMap &hashtable_build_dag_map, const TableIdToNodeMap &table_id_to_node_map)
Make hash table from an in-flight SQL query&#39;s parse tree etc.
int getInnerTableId() const noexceptoverride
void putHashTableOnCpuToCache(QueryPlanHash key, CacheItemType item_type, std::shared_ptr< PerfectHashTable > hashtable_ptr, DeviceIdentifier device_identifier, size_t hashtable_building_time)
std::string toString(const ExecutorDeviceType device_type, const int device_id=0, bool raw=false) const override
ColumnCacheMap & column_cache_
size_t get_entries_per_device(const size_t total_entries, const size_t shard_count, const size_t device_count, const Data_Namespace::MemoryLevel memory_level)
std::shared_ptr< Analyzer::ColumnVar > col_var_
uint64_t ThreadId
Definition: Logger.h:345
size_t QueryPlanHash
llvm::Value * codegenSlot(const CompilationOptions &, const size_t) override
ExpressionRangeType getType() const
bool g_enable_watchdog false
Definition: Execute.cpp:76
#define CHECK(condition)
Definition: Logger.h:209
size_t countBufferOff() const noexceptoverride
int initHashTableForDevice(const ChunkKey &chunk_key, const JoinColumn &join_column, const InnerOuter &cols, const HashType layout, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id)
bool is_string() const
Definition: sqltypes.h:509
HashTable * getHashTableForDevice(const size_t device_id) const
Data_Namespace::MemoryLevel getMemoryLevel() const noexceptoverride
std::set< DecodedJoinHashBufferEntry > toSet(const ExecutorDeviceType device_type, const int device_id) const override
HashType
Definition: HashTable.h:19
HashJoinMatchingSet codegenMatchingSet(const CompilationOptions &, const size_t) override
int getInnerTableRteIdx() const noexceptoverride
std::unordered_map< JoinColumnsInfo, HashTableBuildDag > HashTableBuildDagMap