OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
BoundingBoxIntersectJoinHashTable.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
23 
25  public:
26  BoundingBoxIntersectJoinHashTable(const std::shared_ptr<Analyzer::BinOper> condition,
27  const JoinType join_type,
28  const std::vector<InputTableInfo>& query_infos,
29  const Data_Namespace::MemoryLevel memory_level,
30  ColumnCacheMap& column_cache,
31  Executor* executor,
32  const std::vector<InnerOuter>& inner_outer_pairs,
33  const int device_count,
34  const RegisteredQueryHint& query_hints,
35  const HashTableBuildDagMap& hashtable_build_dag_map,
36  const TableIdToNodeMap& table_id_to_node_map)
37  : condition_(condition)
38  , join_type_(join_type)
39  , query_infos_(query_infos)
40  , memory_level_(memory_level)
41  , executor_(executor)
42  , column_cache_(column_cache)
43  , inner_outer_pairs_(inner_outer_pairs)
44  , device_count_(device_count)
45  , query_hints_(query_hints)
46  , hashtable_build_dag_map_(hashtable_build_dag_map)
47  , table_id_to_node_map_(table_id_to_node_map) {
49  hash_tables_for_device_.resize(std::max(device_count_, 1));
50  }
51 
53 
55  static std::shared_ptr<BoundingBoxIntersectJoinHashTable> getInstance(
56  const std::shared_ptr<Analyzer::BinOper> condition,
57  const std::vector<InputTableInfo>& query_infos,
58  const Data_Namespace::MemoryLevel memory_level,
59  const JoinType join_type,
60  const int device_count,
61  ColumnCacheMap& column_cache,
62  Executor* executor,
63  const HashTableBuildDagMap& hashtable_build_dag_map,
64  const RegisteredQueryHint& query_hint,
65  const TableIdToNodeMap& table_id_to_node_map);
66 
67  static void invalidateCache() {
69  auto_tuner_cache_->clearCache();
70 
72  hash_table_cache_->clearCache();
73  }
74 
75  static void markCachedItemAsDirty(size_t table_key) {
78  auto candidate_table_keys =
79  hash_table_cache_->getMappedQueryPlanDagsWithTableKey(table_key);
80  if (candidate_table_keys.has_value()) {
81  auto_tuner_cache_->markCachedItemAsDirty(
82  table_key,
83  *candidate_table_keys,
86  hash_table_cache_->markCachedItemAsDirty(table_key,
87  *candidate_table_keys,
90  }
91  }
92 
95  return hash_table_cache_.get();
96  }
97 
101  return auto_tuner_cache_.get();
102  }
103 
104  protected:
105  void reify(const HashType preferred_layout);
106 
107  virtual void reifyWithLayout(const HashType layout);
108 
109  virtual void reifyImpl(std::vector<ColumnsForDevice>& columns_per_device,
110  const Fragmenter_Namespace::TableInfo& query_info,
111  const HashType layout,
112  const size_t shard_count,
113  const size_t entry_count,
114  const size_t emitted_keys_count,
115  const bool skip_hashtable_caching,
116  const size_t chosen_max_hashtable_size,
117  const double chosen_bucket_threshold);
118 
119  void reifyForDevice(const ColumnsForDevice& columns_for_device,
120  const HashType layout,
121  const size_t entry_count,
122  const size_t emitted_keys_count,
123  const bool skip_hashtable_caching,
124  const int device_id,
125  const logger::ThreadLocalIds parent_thread_local_ids);
126 
127  size_t calculateHashTableSize(size_t number_of_dimensions,
128  size_t emitted_keys_count,
129  size_t entry_count) const;
130 
132  const std::vector<Fragmenter_Namespace::FragmentInfo>& fragments,
133  const int device_id,
134  DeviceAllocator* dev_buff_owner);
135 
136  // returns entry_count, emitted_keys_count
137  virtual std::pair<size_t, size_t> approximateTupleCount(
138  const std::vector<double>& inverse_bucket_sizes_for_dimension,
139  std::vector<ColumnsForDevice>&,
140  const size_t chosen_max_hashtable_size,
141  const double chosen_bucket_threshold);
142 
143  // returns entry_count, emitted_keys_count
144  virtual std::pair<size_t, size_t> computeHashTableCounts(
145  const size_t shard_count,
146  const std::vector<double>& inverse_bucket_sizes_for_dimension,
147  std::vector<ColumnsForDevice>& columns_per_device,
148  const size_t chosen_max_hashtable_size,
149  const double chosen_bucket_threshold);
150 
151  void setInverseBucketSizeInfo(const std::vector<double>& inverse_bucket_sizes,
152  std::vector<ColumnsForDevice>& columns_per_device,
153  const size_t device_count);
154 
155  size_t getKeyComponentWidth() const;
156 
157  size_t getKeyComponentCount() const;
158 
159  HashType getHashType() const noexcept override {
160  if (layout_override_) {
161  return *layout_override_;
162  }
163  auto hash_table = getHashTableForDevice(0);
164  CHECK(hash_table);
165  return hash_table->getLayout();
166  }
167 
168  Data_Namespace::MemoryLevel getMemoryLevel() const noexcept override {
169  return memory_level_;
170  }
171 
172  int getDeviceCount() const noexcept override { return device_count_; };
173 
174  std::shared_ptr<BaselineHashTable> initHashTableOnCpu(
175  const std::vector<JoinColumn>& join_columns,
176  const std::vector<JoinColumnTypeInfo>& join_column_types,
177  const std::vector<JoinBucketInfo>& join_bucket_info,
178  const BaselineHashTableEntryInfo hash_table_entry_info,
179  const bool skip_hashtable_caching);
180 
181 #ifdef HAVE_CUDA
182  std::shared_ptr<BaselineHashTable> initHashTableOnGpu(
183  const std::vector<JoinColumn>& join_columns,
184  const std::vector<JoinColumnTypeInfo>& join_column_types,
185  const std::vector<JoinBucketInfo>& join_bucket_info,
186  const BaselineHashTableEntryInfo hash_table_entry_info,
187  const size_t device_id);
188 
189  std::shared_ptr<BaselineHashTable> copyCpuHashTableToGpu(
190  std::shared_ptr<BaselineHashTable>& cpu_hash_table,
191  const size_t device_id);
192 #endif // HAVE_CUDA
193 
195  const size_t) override;
196 
197  std::string toString(const ExecutorDeviceType device_type,
198  const int device_id = 0,
199  bool raw = false) const override;
200 
202  const int device_id) const override;
203 
204  llvm::Value* codegenSlot(const CompilationOptions&, const size_t) override {
205  UNREACHABLE(); // not applicable for bounding box intersection
206  return nullptr;
207  }
208 
210  return query_hints_;
211  }
212 
213  size_t getEntryCount() const {
214  auto hash_table = getHashTableForDevice(0);
215  CHECK(hash_table);
216  return hash_table->getEntryCount();
217  }
218 
219  size_t getEmittedKeysCount() const {
220  auto hash_table = getHashTableForDevice(0);
221  CHECK(hash_table);
222  return hash_table->getEmittedKeysCount();
223  }
224 
225  size_t getComponentBufferSize() const noexcept override {
226  CHECK(!hash_tables_for_device_.empty());
227  auto hash_table = hash_tables_for_device_.front();
228  CHECK(hash_table);
229  return hash_table->getEntryCount() * sizeof(int32_t);
230  }
231 
232  size_t shardCount() const {
234  return 0;
235  }
238  }
239 
241  const std::vector<InnerOuter>& inner_outer_pairs) const;
242 
243  shared::TableKey getInnerTableId() const noexcept override;
244 
245  int getInnerTableRteIdx() const noexcept override {
246  CHECK(!inner_outer_pairs_.empty());
247  const auto first_inner_col = inner_outer_pairs_.front().first;
248  return first_inner_col->get_rte_idx();
249  }
250 
251  size_t getKeyBufferSize() const noexcept {
252  const auto key_component_width = getKeyComponentWidth();
253  CHECK(key_component_width == 4 || key_component_width == 8);
254  const auto key_component_count = getKeyComponentCount();
256  return getEntryCount() * key_component_count * key_component_width;
257  } else {
258  return getEntryCount() * (key_component_count + 1) * key_component_width;
259  }
260  }
261 
262  size_t offsetBufferOff() const noexcept override {
263  return getKeyBufferSize();
264  }
265 
266  size_t countBufferOff() const noexcept override {
269  } else {
270  return getKeyBufferSize();
271  }
272  }
273 
274  size_t payloadBufferOff() const noexcept override {
277  } else {
278  return getKeyBufferSize();
279  }
280  }
281 
282  std::string getHashJoinType() const final {
283  return "BoundingBoxIntersect";
284  }
285 
286  bool isBitwiseEq() const override;
287 
288  std::shared_ptr<HashTable> initHashTableOnCpuFromCache(
289  QueryPlanHash key,
290  CacheItemType item_type,
291  DeviceIdentifier device_identifier);
292 
293  std::optional<std::pair<size_t, size_t>> getApproximateTupleCountFromCache(
294  QueryPlanHash key,
295  CacheItemType item_type,
296  DeviceIdentifier device_identifier);
297 
299  CacheItemType item_type,
300  std::shared_ptr<HashTable> hashtable_ptr,
301  DeviceIdentifier device_identifier,
302  size_t hashtable_building_time);
303 
304  llvm::Value* codegenKey(const CompilationOptions&);
305  std::vector<llvm::Value*> codegenManyKey(const CompilationOptions&);
306 
307  std::optional<BoundingBoxIntersectMetaInfo> getBoundingBoxIntersectMetaInfo() {
309  }
310 
312  std::vector<InnerOuter> inner_outer_pairs;
313  const size_t num_elements;
314  const size_t chunk_key_hash;
315  const SQLOps optype;
316  const size_t max_hashtable_size;
317  const double bucket_threshold;
318  const std::vector<double> inverse_bucket_sizes = {};
319  };
320 
323  auto hash = info.chunk_key_hash;
324  for (InnerOuter inner_outer : info.inner_outer_pairs) {
325  auto inner_col = inner_outer.first;
326  auto rhs_col_var = dynamic_cast<const Analyzer::ColumnVar*>(inner_outer.second);
327  auto outer_col = rhs_col_var ? rhs_col_var : inner_col;
328  boost::hash_combine(hash, inner_col->toString());
329  if (inner_col->get_type_info().is_string()) {
330  boost::hash_combine(hash, outer_col->toString());
331  }
332  }
333  boost::hash_combine(hash, info.num_elements);
334  boost::hash_combine(hash, info.optype);
335  boost::hash_combine(hash, info.max_hashtable_size);
336  boost::hash_combine(hash, info.bucket_threshold);
337  boost::hash_combine(hash, info.inverse_bucket_sizes);
338  return hash;
339  }
340 
342  const size_t max_hashtable_size,
343  const double bucket_threshold,
344  const std::vector<double>& bucket_sizes,
345  std::vector<std::vector<Fragmenter_Namespace::FragmentInfo>>& fragments_per_device,
346  int device_count) {
347  for (int device_id = 0; device_id < device_count; ++device_id) {
348  auto hash_val = boost::hash_value(hashtable_cache_key_[device_id]);
349  boost::hash_combine(hash_val, max_hashtable_size);
350  boost::hash_combine(hash_val, bucket_threshold);
351  boost::hash_combine(hash_val, bucket_sizes);
352  boost::hash_combine(hash_val,
353  HashJoin::collectFragmentIds(fragments_per_device[device_id]));
354  hashtable_cache_key_[device_id] = hash_val;
355  hash_table_cache_->addQueryPlanDagForTableKeys(hashtable_cache_key_[device_id],
356  table_keys_);
357  }
358  }
359 
360  QueryPlanHash getCacheKey(int device_id) const {
361  return hashtable_cache_key_[device_id];
362  }
363 
364  const std::vector<InnerOuter>& getInnerOuterPairs() const {
365  return inner_outer_pairs_;
366  }
367 
368  void setBoundingBoxIntersectionMetaInfo(size_t max_table_size_bytes,
369  double bucket_threshold,
370  std::vector<double>& bucket_sizes) {
371  BoundingBoxIntersectMetaInfo bbox_intersect_meta_info;
372  bbox_intersect_meta_info.bucket_sizes = bucket_sizes;
373  bbox_intersect_meta_info.bbox_intersect_max_table_size_bytes = max_table_size_bytes;
374  bbox_intersect_meta_info.bbox_intersect_bucket_threshold = bucket_threshold;
375  HashtableCacheMetaInfo meta_info;
376  meta_info.bbox_intersect_meta_info = bbox_intersect_meta_info;
377  hashtable_cache_meta_info_ = meta_info;
378  }
379 
380  const std::shared_ptr<Analyzer::BinOper> condition_;
382  const std::vector<InputTableInfo>& query_infos_;
384 
385  Executor* executor_;
387 
388  std::vector<InnerOuter> inner_outer_pairs_;
389  const int device_count_;
391 
396 
397  std::optional<HashType>
398  layout_override_; // allows us to use a 1:many hash table for many:many
399 
401 
402  // cache a hashtable based on the cache key C
403  // C = query plan dag D + join col J + hashtable params P
404  // by varying a parameters P for bounding box intersect, we can build
405  // multiple (and different) hashtables for the same query plan dag D
406  // in this scenario, the rule we follow is cache everything
407  // with the assumption that varying P is intended by user
408  // for the performance and so worth to keep it for future recycling
409  static std::unique_ptr<HashtableRecycler> hash_table_cache_;
410  // auto tuner cache is maintained separately with hashtable cache
411  static std::unique_ptr<BoundingBoxIntersectTuningParamRecycler> auto_tuner_cache_;
412 
415  std::vector<QueryPlanHash> hashtable_cache_key_;
417  std::unordered_set<size_t> table_keys_;
419 };
static std::vector< int > collectFragmentIds(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments)
Definition: HashJoin.cpp:461
const RegisteredQueryHint & getRegisteredQueryHint()
static std::unique_ptr< BoundingBoxIntersectTuningParamRecycler > auto_tuner_cache_
size_t DeviceIdentifier
Definition: DataRecycler.h:129
void setBoundingBoxIntersectionMetaInfo(size_t max_table_size_bytes, double bucket_threshold, std::vector< double > &bucket_sizes)
JoinType
Definition: sqldefs.h:174
llvm::Value * codegenSlot(const CompilationOptions &, const size_t) override
virtual std::pair< size_t, size_t > computeHashTableCounts(const size_t shard_count, const std::vector< double > &inverse_bucket_sizes_for_dimension, std::vector< ColumnsForDevice > &columns_per_device, const size_t chosen_max_hashtable_size, const double chosen_bucket_threshold)
Data_Namespace::MemoryLevel getEffectiveMemoryLevel(const std::vector< InnerOuter > &inner_outer_pairs) const
Data_Namespace::MemoryLevel getMemoryLevel() const noexceptoverride
size_t calculateHashTableSize(size_t number_of_dimensions, size_t emitted_keys_count, size_t entry_count) const
std::pair< const Analyzer::ColumnVar *, const Analyzer::Expr * > InnerOuter
Definition: HashJoin.h:105
llvm::Value * codegenKey(const CompilationOptions &)
shared::TableKey getInnerTableId() const noexceptoverride
HashJoinMatchingSet codegenMatchingSet(const CompilationOptions &, const size_t) override
std::string QueryPlanDAG
SQLOps
Definition: sqldefs.h:28
void reify(const HashType preferred_layout)
std::vector< std::shared_ptr< HashTable > > hash_tables_for_device_
Definition: HashJoin.h:377
const std::shared_ptr< Analyzer::BinOper > condition_
#define UNREACHABLE()
Definition: Logger.h:338
std::shared_ptr< BaselineHashTable > initHashTableOnCpu(const std::vector< JoinColumn > &join_columns, const std::vector< JoinColumnTypeInfo > &join_column_types, const std::vector< JoinBucketInfo > &join_bucket_info, const BaselineHashTableEntryInfo hash_table_entry_info, const bool skip_hashtable_caching)
DecodedJoinHashBufferSet toSet(const ExecutorDeviceType device_type, const int device_id) const override
BoundingBoxIntersectJoinHashTable(const std::shared_ptr< Analyzer::BinOper > condition, const JoinType join_type, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, ColumnCacheMap &column_cache, Executor *executor, const std::vector< InnerOuter > &inner_outer_pairs, const int device_count, const RegisteredQueryHint &query_hints, const HashTableBuildDagMap &hashtable_build_dag_map, const TableIdToNodeMap &table_id_to_node_map)
virtual std::pair< size_t, size_t > approximateTupleCount(const std::vector< double > &inverse_bucket_sizes_for_dimension, std::vector< ColumnsForDevice > &, const size_t chosen_max_hashtable_size, const double chosen_bucket_threshold)
std::vector< llvm::Value * > codegenManyKey(const CompilationOptions &)
#define CHECK_GT(x, y)
Definition: Logger.h:305
ExecutorDeviceType
void setInverseBucketSizeInfo(const std::vector< double > &inverse_bucket_sizes, std::vector< ColumnsForDevice > &columns_per_device, const size_t device_count)
std::optional< BoundingBoxIntersectMetaInfo > bbox_intersect_meta_info
const std::vector< InnerOuter > & getInnerOuterPairs() const
ColumnsForDevice fetchColumnsForDevice(const std::vector< Fragmenter_Namespace::FragmentInfo > &fragments, const int device_id, DeviceAllocator *dev_buff_owner)
std::optional< BoundingBoxIntersectMetaInfo > getBoundingBoxIntersectMetaInfo()
std::unordered_map< size_t, HashTableBuildDag > HashTableBuildDagMap
CacheItemType
Definition: DataRecycler.h:38
const Data_Namespace::MemoryLevel memory_level_
static std::unique_ptr< HashtableRecycler > hash_table_cache_
void generateCacheKey(const size_t max_hashtable_size, const double bucket_threshold, const std::vector< double > &bucket_sizes, std::vector< std::vector< Fragmenter_Namespace::FragmentInfo >> &fragments_per_device, int device_count)
static std::shared_ptr< BoundingBoxIntersectJoinHashTable > getInstance(const std::shared_ptr< Analyzer::BinOper > condition, const std::vector< InputTableInfo > &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)
Make hash table from an in-flight SQL query&#39;s parse tree etc.
const std::vector< InputTableInfo > & query_infos_
virtual void reifyImpl(std::vector< ColumnsForDevice > &columns_per_device, const Fragmenter_Namespace::TableInfo &query_info, const HashType layout, const size_t shard_count, const size_t entry_count, const size_t emitted_keys_count, const bool skip_hashtable_caching, const size_t chosen_max_hashtable_size, const double chosen_bucket_threshold)
QueryPlanHash getAlternativeCacheKey(AlternativeCacheKeyForBoundingBoxIntersection &info)
size_t payloadBufferOff() const noexceptoverride
HashTable * getHashTableForDevice(const size_t device_id) const
Definition: HashJoin.h:295
std::unordered_map< shared::TableKey, const RelAlgNode * > TableIdToNodeMap
HashType getHashType() const noexceptoverride
std::set< DecodedJoinHashBufferEntry > DecodedJoinHashBufferSet
Definition: HashTable.h:72
static void markCachedItemAsDirty(size_t table_key)
virtual void reifyWithLayout(const HashType layout)
std::unordered_map< shared::TableKey, std::unordered_map< int, std::shared_ptr< const ColumnarResults >>> ColumnCacheMap
size_t QueryPlanHash
std::size_t hash_value(RexAbstractInput const &rex_ab_input)
Definition: RelAlgDag.cpp:3525
void putHashTableOnCpuToCache(QueryPlanHash key, CacheItemType item_type, std::shared_ptr< HashTable > hashtable_ptr, DeviceIdentifier device_identifier, size_t hashtable_building_time)
#define CHECK(condition)
Definition: Logger.h:291
void reifyForDevice(const ColumnsForDevice &columns_for_device, const HashType layout, const size_t entry_count, const size_t emitted_keys_count, const bool skip_hashtable_caching, const int device_id, const logger::ThreadLocalIds parent_thread_local_ids)
std::vector< double > bucket_sizes
QueryPlanHash getCacheKey(int device_id) const
std::optional< std::pair< size_t, size_t > > getApproximateTupleCountFromCache(QueryPlanHash key, CacheItemType item_type, DeviceIdentifier device_identifier)
static size_t getShardCountForCondition(const Analyzer::BinOper *condition, const Executor *executor, const std::vector< InnerOuter > &inner_outer_pairs)
std::shared_ptr< HashTable > initHashTableOnCpuFromCache(QueryPlanHash key, CacheItemType item_type, DeviceIdentifier device_identifier)
static constexpr DeviceIdentifier CPU_DEVICE_IDENTIFIER
Definition: DataRecycler.h:136
std::string toString(const ExecutorDeviceType device_type, const int device_id=0, bool raw=false) const override
HashType
Definition: HashTable.h:19
static BoundingBoxIntersectTuningParamRecycler * getBoundingBoxIntersectTuningParamCache()
static bool layoutRequiresAdditionalBuffers(HashType layout) noexcept
Definition: HashJoin.h:175
size_t getComponentBufferSize() const noexceptoverride