OmniSciDB  6686921089
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
HashtableRecycler.h
Go to the documentation of this file.
1 /*
2  * Copyright 2021 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include "DataRecycler.h"
21 
24  std::string inner_col_info_string;
25 };
26 
30  std::vector<double> bucket_sizes;
31 };
32 
34  std::optional<QueryPlanMetaInfo> query_plan_meta_info;
35  std::optional<OverlapsHashTableMetaInfo> overlaps_meta_info;
36 };
37 
39  : public DataRecycler<std::shared_ptr<HashTable>, HashtableCacheMetaInfo> {
40  public:
42  : DataRecycler({hashtable_type},
46 
47  std::shared_ptr<HashTable> getItemFromCache(
48  QueryPlanHash key,
49  CacheItemType item_type,
50  DeviceIdentifier device_identifier,
51  std::optional<HashtableCacheMetaInfo> meta_info = std::nullopt) const override;
52 
53  void putItemToCache(
54  QueryPlanHash key,
55  std::shared_ptr<HashTable> item_ptr,
56  CacheItemType item_type,
57  DeviceIdentifier device_identifier,
58  size_t item_size,
59  size_t compute_time,
60  std::optional<HashtableCacheMetaInfo> meta_info = std::nullopt) override;
61 
62  // nothing to do with hashtable recycler
63  void initCache() override {}
64 
65  void clearCache() override;
66 
67  std::string toString() const override;
68 
70  const OverlapsHashTableMetaInfo& candidate_bucket_dim,
71  const OverlapsHashTableMetaInfo& target_bucket_dim) const;
72 
73  static std::pair<QueryPlanHash, HashtableCacheMetaInfo> getHashtableCacheKey(
74  const std::vector<InnerOuter>& inner_outer_pairs,
75  const SQLOps op_type,
76  const JoinType join_type,
77  const HashTableBuildDagMap& hashtable_build_dag_map,
78  Executor* executor);
79 
80  static std::pair<QueryPlan, HashtableCacheMetaInfo> getHashtableKeyString(
81  const std::vector<InnerOuter>& inner_outer_pairs,
82  const SQLOps op_type,
83  const JoinType join_type,
84  const HashTableBuildDagMap& hashtable_build_dag_map,
85  Executor* executor);
86 
87  static std::string getJoinColumnInfoString(
88  std::vector<const Analyzer::ColumnVar*>& inner_cols,
89  std::vector<const Analyzer::ColumnVar*>& outer_cols,
90  Executor* executor);
91 
92  static bool isSafeToCacheHashtable(const TableIdToNodeMap& table_id_to_node_map,
93  bool need_dict_translation,
94  const int table_id);
95 
96  // this function is required to test data recycler
97  // specifically, it is tricky to get a hashtable cache key when we only know
98  // a target query sql in test code
99  // so this function utilizes an incorrect way to manipulate our hashtable recycler
100  // but provides the cached hashtable for performing the test
101  // a set "visited" contains cached hashtable keys that we have retrieved so far
102  // based on that, this function iterates hashtable cache and return a cached one
103  // when its hashtable cache key has not been visited yet
104  // for instance, if we call this function with an empty "visited" key, we return
105  // the first hashtable that its iterator visits
106  std::tuple<QueryPlanHash,
107  std::shared_ptr<HashTable>,
108  std::optional<HashtableCacheMetaInfo>>
109  getCachedHashtableWithoutCacheKey(std::set<size_t>& visited,
110  CacheItemType hash_table_type,
111  DeviceIdentifier device_identifier);
112 
113  private:
114  bool hasItemInCache(
115  QueryPlanHash key,
116  CacheItemType item_type,
117  DeviceIdentifier device_identifier,
118  std::lock_guard<std::mutex>& lock,
119  std::optional<HashtableCacheMetaInfo> meta_info = std::nullopt) const override;
120 
121  void removeItemFromCache(
122  QueryPlanHash key,
123  CacheItemType item_type,
124  DeviceIdentifier device_identifier,
125  std::lock_guard<std::mutex>& lock,
126  std::optional<HashtableCacheMetaInfo> meta_info = std::nullopt) override;
127 
129  CacheItemType item_type,
130  DeviceIdentifier device_identifier,
131  size_t required_size,
132  std::lock_guard<std::mutex>& lock,
133  std::optional<HashtableCacheMetaInfo> meta_info = std::nullopt) override;
134 };
bool hasItemInCache(QueryPlanHash key, CacheItemType item_type, DeviceIdentifier device_identifier, std::lock_guard< std::mutex > &lock, std::optional< HashtableCacheMetaInfo > meta_info=std::nullopt) const override
size_t DeviceIdentifier
Definition: DataRecycler.h:111
JoinType
Definition: sqldefs.h:108
HashtableRecycler(CacheItemType hashtable_type, int num_gpus)
void putItemToCache(QueryPlanHash key, std::shared_ptr< HashTable > item_ptr, CacheItemType item_type, DeviceIdentifier device_identifier, size_t item_size, size_t compute_time, std::optional< HashtableCacheMetaInfo > meta_info=std::nullopt) override
std::string inner_col_info_string
static bool isSafeToCacheHashtable(const TableIdToNodeMap &table_id_to_node_map, bool need_dict_translation, const int table_id)
static std::pair< QueryPlanHash, HashtableCacheMetaInfo > getHashtableCacheKey(const std::vector< InnerOuter > &inner_outer_pairs, const SQLOps op_type, const JoinType join_type, const HashTableBuildDagMap &hashtable_build_dag_map, Executor *executor)
SQLOps
Definition: sqldefs.h:29
std::optional< OverlapsHashTableMetaInfo > overlaps_meta_info
std::optional< QueryPlanMetaInfo > query_plan_meta_info
static std::string getJoinColumnInfoString(std::vector< const Analyzer::ColumnVar * > &inner_cols, std::vector< const Analyzer::ColumnVar * > &outer_cols, Executor *executor)
void cleanupCacheForInsertion(CacheItemType item_type, DeviceIdentifier device_identifier, size_t required_size, std::lock_guard< std::mutex > &lock, std::optional< HashtableCacheMetaInfo > meta_info=std::nullopt) override
static std::pair< QueryPlan, HashtableCacheMetaInfo > getHashtableKeyString(const std::vector< InnerOuter > &inner_outer_pairs, const SQLOps op_type, const JoinType join_type, const HashTableBuildDagMap &hashtable_build_dag_map, Executor *executor)
void initCache() override
CacheItemType
Definition: DataRecycler.h:36
std::unordered_map< int, const RelAlgNode * > TableIdToNodeMap
virtual std::shared_ptr< HashTable > getItemFromCache(QueryPlanHash key, CacheItemType item_type, DeviceIdentifier device_identifier, std::optional< HashtableCacheMetaInfo > meta_info=std::nullopt) const =0
void removeItemFromCache(QueryPlanHash key, CacheItemType item_type, DeviceIdentifier device_identifier, std::lock_guard< std::mutex > &lock, std::optional< HashtableCacheMetaInfo > meta_info=std::nullopt) override
size_t QueryPlanHash
std::string toString() const override
void clearCache() override
bool checkOverlapsHashtableBucketCompatability(const OverlapsHashTableMetaInfo &candidate_bucket_dim, const OverlapsHashTableMetaInfo &target_bucket_dim) const
std::tuple< QueryPlanHash, std::shared_ptr< HashTable >, std::optional< HashtableCacheMetaInfo > > getCachedHashtableWithoutCacheKey(std::set< size_t > &visited, CacheItemType hash_table_type, DeviceIdentifier device_identifier)
std::vector< double > bucket_sizes
std::string QueryPlan
std::unordered_map< JoinColumnsInfo, HashTableBuildDag > HashTableBuildDagMap