OmniSciDB  085a039ca4
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
HashtableRecycler.h
Go to the documentation of this file.
1 /*
2  * Copyright 2021 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include "DataRecycler.h"
21 #include "QueryEngine/QueryHint.h"
22 
26  std::vector<double> bucket_sizes;
27 };
28 
30  std::optional<OverlapsHashTableMetaInfo> overlaps_meta_info;
31  std::optional<RegisteredQueryHint> registered_query_hint;
32 
34  : overlaps_meta_info(std::nullopt), registered_query_hint(std::nullopt){};
35 };
36 
38  std::vector<QueryPlanHash> hashed_query_plan_dag;
40  std::unordered_set<size_t> table_keys;
41 
42  HashtableAccessPathInfo(int device_count)
44  // each shard can build different hash tables,
45  // and each device fetches different set of sharded column
46  // (based on round-robin shard distribution)
47  // so we need to keep cache key per device
48  // (all device have the same key if the table is not sharde)
50  }
51 };
52 
54  : public DataRecycler<std::shared_ptr<HashTable>, HashtableCacheMetaInfo> {
55  public:
57  : DataRecycler({hashtable_type},
61 
62  std::shared_ptr<HashTable> getItemFromCache(
63  QueryPlanHash key,
64  CacheItemType item_type,
65  DeviceIdentifier device_identifier,
66  std::optional<HashtableCacheMetaInfo> meta_info = std::nullopt) override;
67 
68  void putItemToCache(
69  QueryPlanHash key,
70  std::shared_ptr<HashTable> item_ptr,
71  CacheItemType item_type,
72  DeviceIdentifier device_identifier,
73  size_t item_size,
74  size_t compute_time,
75  std::optional<HashtableCacheMetaInfo> meta_info = std::nullopt) override;
76 
77  // nothing to do with hashtable recycler
78  void initCache() override {}
79 
80  void clearCache() override;
81 
82  void markCachedItemAsDirty(size_t table_key,
83  std::unordered_set<QueryPlanHash>& key_set,
84  CacheItemType item_type,
85  DeviceIdentifier device_identifier) override;
86 
87  std::string toString() const override;
88 
90  const OverlapsHashTableMetaInfo& candidate_bucket_dim,
91  const OverlapsHashTableMetaInfo& target_bucket_dim) const;
92 
94  const std::vector<InnerOuter>& inner_outer_pairs,
95  const std::vector<InnerOuterStringOpInfos>& inner_outer_string_op_infos_pairs,
96  const SQLOps op_type,
97  const JoinType join_type,
98  const HashTableBuildDagMap& hashtable_build_dag_map,
99  int device_count,
100  int shard_count,
101  const std::vector<std::vector<Fragmenter_Namespace::FragmentInfo>>&
102  frags_for_device,
103  Executor* executor);
104 
105  static size_t getJoinColumnInfoHash(std::vector<const Analyzer::ColumnVar*>& inner_cols,
106  std::vector<const Analyzer::ColumnVar*>& outer_cols,
107  Executor* executor);
108 
109  static bool isSafeToCacheHashtable(
110  const TableIdToNodeMap& table_id_to_node_map,
111  bool need_dict_translation,
112  const std::vector<InnerOuterStringOpInfos>& inner_outer_string_op_info_pairs,
113  const int table_id);
114 
115  static bool isInvalidHashTableCacheKey(const std::vector<QueryPlanHash>& cache_keys);
116 
117  // this function is required to test data recycler
118  // specifically, it is tricky to get a hashtable cache key when we only know
119  // a target query sql in test code
120  // so this function utilizes an incorrect way to manipulate our hashtable recycler
121  // but provides the cached hashtable for performing the test
122  // a set "visited" contains cached hashtable keys that we have retrieved so far
123  // based on that, this function iterates hashtable cache and return a cached one
124  // when its hashtable cache key has not been visited yet
125  // for instance, if we call this function with an empty "visited" key, we return
126  // the first hashtable that its iterator visits
127  std::tuple<QueryPlanHash,
128  std::shared_ptr<HashTable>,
129  std::optional<HashtableCacheMetaInfo>>
130  getCachedHashtableWithoutCacheKey(std::set<size_t>& visited,
131  CacheItemType hash_table_type,
132  DeviceIdentifier device_identifier);
133 
134  void addQueryPlanDagForTableKeys(size_t hashed_query_plan_dag,
135  const std::unordered_set<size_t>& table_keys);
136 
137  std::optional<std::unordered_set<size_t>> getMappedQueryPlanDagsWithTableKey(
138  size_t table_key) const;
139 
140  void removeTableKeyInfoFromQueryPlanDagMap(size_t table_key);
141 
142  private:
143  bool hasItemInCache(
144  QueryPlanHash key,
145  CacheItemType item_type,
146  DeviceIdentifier device_identifier,
147  std::lock_guard<std::mutex>& lock,
148  std::optional<HashtableCacheMetaInfo> meta_info = std::nullopt) const override;
149 
150  void removeItemFromCache(
151  QueryPlanHash key,
152  CacheItemType item_type,
153  DeviceIdentifier device_identifier,
154  std::lock_guard<std::mutex>& lock,
155  std::optional<HashtableCacheMetaInfo> meta_info = std::nullopt) override;
156 
158  CacheItemType item_type,
159  DeviceIdentifier device_identifier,
160  size_t required_size,
161  std::lock_guard<std::mutex>& lock,
162  std::optional<HashtableCacheMetaInfo> meta_info = std::nullopt) override;
163 
164  // we maintain the mapping between a hashed table_key -> a set of hashed query plan dag
165  // only in hashtable recycler to minimize memory footprint
166  // so other types of data recycler related to hashtable cache
167  // i.e., hashing scheme recycler and overlaps tuning param recycler should use the
168  // key_set when we retrieve it from here, see `markCachedItemAsDirty` function
169  std::unordered_map<size_t, std::unordered_set<size_t>> table_key_to_query_plan_dag_map_;
170 };
bool hasItemInCache(QueryPlanHash key, CacheItemType item_type, DeviceIdentifier device_identifier, std::lock_guard< std::mutex > &lock, std::optional< HashtableCacheMetaInfo > meta_info=std::nullopt) const override
size_t DeviceIdentifier
Definition: DataRecycler.h:129
JoinType
Definition: sqldefs.h:136
HashtableRecycler(CacheItemType hashtable_type, int num_gpus)
void putItemToCache(QueryPlanHash key, std::shared_ptr< HashTable > item_ptr, CacheItemType item_type, DeviceIdentifier device_identifier, size_t item_size, size_t compute_time, std::optional< HashtableCacheMetaInfo > meta_info=std::nullopt) override
static bool isInvalidHashTableCacheKey(const std::vector< QueryPlanHash > &cache_keys)
HashtableAccessPathInfo(int device_count)
constexpr QueryPlanHash EMPTY_HASHED_PLAN_DAG_KEY
SQLOps
Definition: sqldefs.h:29
std::optional< OverlapsHashTableMetaInfo > overlaps_meta_info
static size_t getJoinColumnInfoHash(std::vector< const Analyzer::ColumnVar * > &inner_cols, std::vector< const Analyzer::ColumnVar * > &outer_cols, Executor *executor)
void addQueryPlanDagForTableKeys(size_t hashed_query_plan_dag, const std::unordered_set< size_t > &table_keys)
void cleanupCacheForInsertion(CacheItemType item_type, DeviceIdentifier device_identifier, size_t required_size, std::lock_guard< std::mutex > &lock, std::optional< HashtableCacheMetaInfo > meta_info=std::nullopt) override
std::optional< RegisteredQueryHint > registered_query_hint
std::unordered_set< size_t > table_keys
std::vector< QueryPlanHash > hashed_query_plan_dag
void initCache() override
std::unordered_map< size_t, HashTableBuildDag > HashTableBuildDagMap
std::unordered_map< size_t, std::unordered_set< size_t > > table_key_to_query_plan_dag_map_
CacheItemType
Definition: DataRecycler.h:38
std::unordered_map< int, const RelAlgNode * > TableIdToNodeMap
void removeItemFromCache(QueryPlanHash key, CacheItemType item_type, DeviceIdentifier device_identifier, std::lock_guard< std::mutex > &lock, std::optional< HashtableCacheMetaInfo > meta_info=std::nullopt) override
HashtableCacheMetaInfo meta_info
size_t QueryPlanHash
std::optional< std::unordered_set< size_t > > getMappedQueryPlanDagsWithTableKey(size_t table_key) const
std::string toString() const override
void markCachedItemAsDirty(size_t table_key, std::unordered_set< QueryPlanHash > &key_set, CacheItemType item_type, DeviceIdentifier device_identifier) override
void clearCache() override
bool checkOverlapsHashtableBucketCompatability(const OverlapsHashTableMetaInfo &candidate_bucket_dim, const OverlapsHashTableMetaInfo &target_bucket_dim) const
void removeTableKeyInfoFromQueryPlanDagMap(size_t table_key)
std::tuple< QueryPlanHash, std::shared_ptr< HashTable >, std::optional< HashtableCacheMetaInfo > > getCachedHashtableWithoutCacheKey(std::set< size_t > &visited, CacheItemType hash_table_type, DeviceIdentifier device_identifier)
virtual std::shared_ptr< HashTable > getItemFromCache(QueryPlanHash key, CacheItemType item_type, DeviceIdentifier device_identifier, std::optional< HashtableCacheMetaInfo > meta_info=std::nullopt)=0
std::vector< double > bucket_sizes
static HashtableAccessPathInfo getHashtableAccessPathInfo(const std::vector< InnerOuter > &inner_outer_pairs, const std::vector< InnerOuterStringOpInfos > &inner_outer_string_op_infos_pairs, const SQLOps op_type, const JoinType join_type, const HashTableBuildDagMap &hashtable_build_dag_map, int device_count, int shard_count, const std::vector< std::vector< Fragmenter_Namespace::FragmentInfo >> &frags_for_device, Executor *executor)
static bool isSafeToCacheHashtable(const TableIdToNodeMap &table_id_to_node_map, bool need_dict_translation, const std::vector< InnerOuterStringOpInfos > &inner_outer_string_op_info_pairs, const int table_id)