OmniSciDB  8fa3bf436f
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
BaselineJoinHashTable.h
Go to the documentation of this file.
1 /*
2  * Copyright 2020 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #ifdef HAVE_CUDA
20 #include <cuda.h>
21 #endif
22 #include <cstdint>
23 #include <map>
24 #include <mutex>
25 #include <thread>
26 #include <unordered_set>
27 #include <vector>
28 
29 #include "Analyzer/Analyzer.h"
30 #include "DataMgr/MemoryLevel.h"
38 
39 class Executor;
40 
42  const size_t num_elements;
43  const std::vector<ChunkKey> chunk_keys;
44  const SQLOps optype;
45 
46  bool operator==(const struct HashTableCacheKey& that) const {
47  return num_elements == that.num_elements && chunk_keys == that.chunk_keys &&
48  optype == that.optype;
49  }
50 };
51 
53  public:
54  static void set(const std::vector<ChunkKey>& key, const HashType hash_type);
55 
56  static std::pair<HashType, bool> get(const std::vector<ChunkKey>& key);
57 
58  static void clear();
59 
60  private:
61  static std::map<std::vector<ChunkKey>, HashType> hash_type_cache_;
62  static std::mutex hash_type_cache_mutex_;
63 };
64 
65 // Representation for a hash table using the baseline layout: an open-addressing
66 // hash with a fill rate of 50%. It is used for equi-joins on multiple columns and
67 // on single sparse columns (with very wide range), typically big integer. As of
68 // now, such tuples must be unique within the inner table.
70  public:
72  static std::shared_ptr<BaselineJoinHashTable> getInstance(
73  const std::shared_ptr<Analyzer::BinOper> condition,
74  const std::vector<InputTableInfo>& query_infos,
75  const Data_Namespace::MemoryLevel memory_level,
76  const HashType preferred_hash_type,
77  const int device_count,
78  ColumnCacheMap& column_cache,
79  Executor* executor);
80 
81  static size_t getShardCountForCondition(
82  const Analyzer::BinOper* condition,
83  const Executor* executor,
84  const std::vector<InnerOuter>& inner_outer_pairs);
85 
86  std::string toString(const ExecutorDeviceType device_type,
87  const int device_id = 0,
88  bool raw = false) const override;
89 
90  std::set<DecodedJoinHashBufferEntry> toSet(const ExecutorDeviceType device_type,
91  const int device_id) const override;
92 
93  llvm::Value* codegenSlot(const CompilationOptions&, const size_t) override;
94 
96  const size_t) override;
97 
98  int getInnerTableId() const noexcept override;
99 
100  int getInnerTableRteIdx() const noexcept override;
101 
102  HashType getHashType() const noexcept override;
103 
105  return memory_level_;
106  };
107 
108  int getDeviceCount() const noexcept override { return device_count_; };
109 
110  size_t offsetBufferOff() const noexcept override;
111 
112  size_t countBufferOff() const noexcept override;
113 
114  size_t payloadBufferOff() const noexcept override;
115 
116  std::string getHashJoinType() const final { return "Baseline"; }
117 
118  static auto getCacheInvalidator() -> std::function<void()> {
119  return []() -> void {
120  // TODO: make hash type cache part of the main cache
122  hash_table_cache_->clear();
124  };
125  }
126 
127  static auto* getHashTableCache() {
129  return hash_table_cache_.get();
130  }
131 
133 
134  protected:
135  BaselineJoinHashTable(const std::shared_ptr<Analyzer::BinOper> condition,
136  const std::vector<InputTableInfo>& query_infos,
137  const Data_Namespace::MemoryLevel memory_level,
138  ColumnCacheMap& column_cache,
139  Executor* executor,
140  const std::vector<InnerOuter>& inner_outer_pairs,
141  const int device_count);
142 
143  size_t getComponentBufferSize() const noexcept override;
144 
145  size_t getKeyBufferSize() const noexcept;
146 
147  static int getInnerTableId(const std::vector<InnerOuter>& inner_outer_pairs);
148 
149  virtual void reifyWithLayout(const HashType layout);
150 
152  const std::vector<Fragmenter_Namespace::FragmentInfo>& fragments,
153  const int device_id,
154  DeviceAllocator* dev_buff_owner);
155 
156  virtual std::pair<size_t, size_t> approximateTupleCount(
157  const std::vector<ColumnsForDevice>&) const;
158 
159  virtual size_t getKeyComponentWidth() const;
160 
161  virtual size_t getKeyComponentCount() const;
162 
163  virtual llvm::Value* codegenKey(const CompilationOptions&);
164 
165  size_t shardCount() const;
166 
167  Data_Namespace::MemoryLevel getEffectiveMemoryLevel(
168  const std::vector<InnerOuter>& inner_outer_pairs) const;
169 
170  void reify(const HashType preferred_layout);
171 
172  virtual void reifyForDevice(const ColumnsForDevice& columns_for_device,
173  const HashType layout,
174  const int device_id,
175  const size_t entry_count,
176  const size_t emitted_keys_count,
177  const logger::ThreadId parent_thread_id);
178 
179  virtual int initHashTableForDevice(
180  const std::vector<JoinColumn>& join_columns,
181  const std::vector<JoinColumnTypeInfo>& join_column_types,
182  const std::vector<JoinBucketInfo>& join_buckets,
183  const HashType layout,
184  const Data_Namespace::MemoryLevel effective_memory_level,
185  const size_t entry_count,
186  const size_t emitted_keys_count,
187  const int device_id);
188 
189  llvm::Value* hashPtr(const size_t index);
190 
192 
193  void putHashTableOnCpuToCache(const HashTableCacheKey&,
194  std::shared_ptr<HashTable>& hash_table);
195 
196  std::pair<std::optional<size_t>, size_t> getApproximateTupleCountFromCache(
197  const HashTableCacheKey&) const;
198 
199  bool isBitwiseEq() const;
200 
201  const std::shared_ptr<Analyzer::BinOper> condition_;
202  const std::vector<InputTableInfo>& query_infos_;
203  const Data_Namespace::MemoryLevel memory_level_;
204  Executor* executor_;
207 
209  const Catalog_Namespace::Catalog* catalog_;
210  const int device_count_;
211 
212  std::optional<HashType>
213  layout_override_; // allows us to use a 1:many hash table for many:many
214 
215  using HashTableCacheValue = std::shared_ptr<HashTable>;
216  static std::unique_ptr<HashTableCache<HashTableCacheKey, HashTableCacheValue>>
218 };
Defines data structures for the semantic analysis phase of query processing.
size_t offsetBufferOff() const noexceptoverride
std::set< DecodedJoinHashBufferEntry > toSet(const ExecutorDeviceType device_type, const int device_id) const override
void putHashTableOnCpuToCache(const HashTableCacheKey &, std::shared_ptr< HashTable > &hash_table)
static std::unique_ptr< HashTableCache< HashTableCacheKey, HashTableCacheValue > > hash_table_cache_
std::string toString(const ExecutorDeviceType device_type, const int device_id=0, bool raw=false) const override
std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > InnerOuter
Definition: HashJoin.h:76
Data_Namespace::MemoryLevel getEffectiveMemoryLevel(const std::vector< InnerOuter > &inner_outer_pairs) const
static std::mutex hash_type_cache_mutex_
ExecutorDeviceType
#define const
std::shared_ptr< HashTable > HashTableCacheValue
static std::map< std::vector< ChunkKey >, HashType > hash_type_cache_
HashJoinMatchingSet codegenMatchingSet(const CompilationOptions &, const size_t) override
SQLOps
Definition: sqldefs.h:29
size_t getKeyBufferSize() const noexcept
size_t getComponentBufferSize() const noexceptoverride
static std::shared_ptr< BaselineJoinHashTable > getInstance(const std::shared_ptr< Analyzer::BinOper > condition, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
Make hash table from an in-flight SQL query&#39;s parse tree etc.
int getInnerTableRteIdx() const noexceptoverride
virtual void reifyForDevice(const ColumnsForDevice &columns_for_device, const HashType layout, const int device_id, const size_t entry_count, const size_t emitted_keys_count, const logger::ThreadId parent_thread_id)
virtual ColumnsForDevice fetchColumnsForDevice(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, DeviceAllocator *dev_buff_owner)
const std::vector< InputTableInfo > & query_infos_
std::pair< std::optional< size_t >, size_t > getApproximateTupleCountFromCache(const HashTableCacheKey &) const
virtual llvm::Value * codegenKey(const CompilationOptions &)
static auto * getHashTableCache()
size_t payloadBufferOff() const noexceptoverride
std::vector< InnerOuter > inner_outer_pairs_
void reify(const HashType preferred_layout)
HashType getHashType() const noexceptoverride
ColumnCacheMap & column_cache_
BaselineJoinHashTable(const std::shared_ptr< Analyzer::BinOper > condition, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, ColumnCacheMap &column_cache, Executor *executor, const std::vector< InnerOuter > &inner_outer_pairs, const int device_count)
std::string getHashJoinType() const final
Data_Namespace::MemoryLevel getMemoryLevel() const noexceptoverride
virtual std::pair< size_t, size_t > approximateTupleCount(const std::vector< ColumnsForDevice > &) const
virtual void reifyWithLayout(const HashType layout)
std::unordered_map< int, std::unordered_map< int, std::shared_ptr< const ColumnarResults >>> ColumnCacheMap
int getInnerTableId() const noexceptoverride
std::optional< HashType > layout_override_
const Catalog_Namespace::Catalog * catalog_
std::shared_ptr< HashTable > initHashTableOnCpuFromCache(const HashTableCacheKey &)
uint64_t ThreadId
Definition: Logger.h:312
const Data_Namespace::MemoryLevel memory_level_
bool operator==(const struct HashTableCacheKey &that) const
llvm::Value * hashPtr(const size_t index)
virtual int initHashTableForDevice(const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_buckets, const HashType layout, const Data_Namespace::MemoryLevel effective_memory_level, const size_t entry_count, const size_t emitted_keys_count, const int device_id)
llvm::Value * codegenSlot(const CompilationOptions &, const size_t) override
#define CHECK(condition)
Definition: Logger.h:203
virtual size_t getKeyComponentCount() const
virtual size_t getKeyComponentWidth() const
Executor(const ExecutorId id, const size_t block_size_x, const size_t grid_size_x, const size_t max_gpu_slab_size, const std::string &debug_dir, const std::string &debug_file)
Definition: Execute.cpp:144
static size_t getShardCountForCondition(const Analyzer::BinOper *condition, const Executor *executor, const std::vector< InnerOuter > &inner_outer_pairs)
static void set(const std::vector< ChunkKey > &key, const HashType hash_type)
int getDeviceCount() const noexceptoverride
static auto getCacheInvalidator() -> std::function< void()>
HashType
Definition: HashTable.h:19
const std::shared_ptr< Analyzer::BinOper > condition_
size_t countBufferOff() const noexceptoverride
const std::vector< ChunkKey > chunk_keys