OmniSciDB  2e3a973ef4
ForeignStorageCache.h
Go to the documentation of this file.
1 /*
2  * Copyright 2020 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
26 #pragma once
27 
28 #include <gtest/gtest.h>
29 #include "../Shared/mapd_shared_mutex.h"
34 #include "ForeignDataWrapper.h"
35 
36 class CacheTooSmallException : public std::runtime_error {
37  public:
38  CacheTooSmallException(const std::string& msg) : std::runtime_error(msg) {}
39 };
40 
41 enum class DiskCacheLevel { none, fsi, non_fsi, all };
43  std::string path;
45  uint64_t size_limit = 21474836480; // 20GB default
46  size_t num_reader_threads = 0;
47  inline bool isEnabledForMutableTables() const {
48  return enabled_level == DiskCacheLevel::non_fsi ||
49  enabled_level == DiskCacheLevel::all;
50  }
51  inline bool isEnabledForFSI() const {
52  return enabled_level == DiskCacheLevel::fsi || enabled_level == DiskCacheLevel::all;
53  }
54  inline bool isEnabled() const { return enabled_level != DiskCacheLevel::none; }
55 };
56 
57 using namespace Data_Namespace;
58 
59 namespace foreign_storage {
60 
62  // We can swap out different eviction algorithms here.
63  std::unique_ptr<CacheEvictionAlgorithm> eviction_alg_ =
64  std::make_unique<LRUEvictionAlgorithm>();
65  size_t num_pages_ = 0;
66 };
67 
69  public:
70  ForeignStorageCache(const DiskCacheConfig& config);
71 
80  void cacheTableChunks(const std::vector<ChunkKey>& chunk_keys);
81  void cacheChunk(const ChunkKey&, AbstractBuffer*);
82 
83  AbstractBuffer* getCachedChunkIfExists(const ChunkKey&);
84  bool isMetadataCached(const ChunkKey&) const;
85  void cacheMetadataVec(const ChunkMetadataVector&);
86  void getCachedMetadataVecForKeyPrefix(ChunkMetadataVector&, const ChunkKey&) const;
87  bool hasCachedMetadataForKeyPrefix(const ChunkKey&) const;
88  void clearForTablePrefix(const ChunkKey&);
89  void clear();
90  void setLimit(uint64_t limit);
91  std::vector<ChunkKey> getCachedChunksForKeyPrefix(const ChunkKey&) const;
92  bool recoverCacheForTable(ChunkMetadataVector&, const ChunkKey&);
93  std::map<ChunkKey, AbstractBuffer*> getChunkBuffersForCaching(
94  const std::vector<ChunkKey>& chunk_keys) const;
95  void deleteBufferIfExists(const ChunkKey& chunk_key);
96 
97  // Exists for testing purposes.
98  inline uint64_t getLimit() const {
99  return max_pages_per_table_ * global_file_mgr_->getDefaultPageSize();
100  }
101  inline size_t getNumCachedChunks() const { return cached_chunks_.size(); }
102  inline size_t getNumCachedMetadata() const { return cached_metadata_.size(); }
103  size_t getNumChunksAdded() const { return num_chunks_added_; }
104  size_t getNumMetadataAdded() const { return num_metadata_added_; }
105 
106  // Useful for debugging.
107  std::string dumpCachedChunkEntries() const;
108  std::string dumpCachedMetadataEntries() const;
109  std::string dumpEvictionQueue() const;
110 
112  return global_file_mgr_.get();
113  }
114 
115  std::string getCacheDirectoryForTablePrefix(const ChunkKey&) const;
116  void cacheMetadataWithFragIdGreaterOrEqualTo(const ChunkMetadataVector& metadata_vec,
117  const int frag_id);
118  void evictThenEraseChunk(const ChunkKey&);
119 
120  private:
121  // These methods are private and assume locks are already acquired when called.
122  std::set<ChunkKey>::iterator eraseChunk(const std::set<ChunkKey>::iterator&);
123  void eraseChunk(const ChunkKey&, TableEvictionTracker& tracker);
124  std::set<ChunkKey>::iterator eraseChunkByIterator(
125  const std::set<ChunkKey>::iterator& chunk_it);
126  void evictThenEraseChunkUnlocked(const ChunkKey&);
127  void validatePath(const std::string&) const;
128  bool insertChunkIntoEvictionAlg(const ChunkKey&, const size_t);
129  void createTrackerMapEntryIfNoneExists(const ChunkKey& chunk_key);
130 
131  std::map<const ChunkKey, TableEvictionTracker> eviction_tracker_map_;
133 
134  // Underlying storage is handled by a GlobalFileMgr unique to the cache.
135  std::unique_ptr<File_Namespace::GlobalFileMgr> global_file_mgr_;
136 
137  // Keeps tracks of which Chunks/ChunkMetadata are cached.
138  std::set<ChunkKey> cached_chunks_;
139  std::set<ChunkKey> cached_metadata_;
140 
141  // Keeps tracks of how many times we cache chunks or metadata for testing purposes.
144 
145  // Separate mutexes for chunks/metadata.
148 
149  // Maximum number of chunk bytes that can be in the cache before eviction.
151 }; // ForeignStorageCache
152 } // namespace foreign_storage
DiskCacheLevel
bool isEnabledForMutableTables() const
File_Namespace::GlobalFileMgr * getGlobalFileMgr() const
std::map< const ChunkKey, TableEvictionTracker > eviction_tracker_map_
std::shared_timed_mutex mapd_shared_mutex
bool isEnabledForFSI() const
An AbstractBuffer is a unit of data management for a data manager.
CacheTooSmallException(const std::string &msg)
bool isEnabled() const
std::unique_ptr< File_Namespace::GlobalFileMgr > global_file_mgr_
std::vector< int > ChunkKey
Definition: types.h:37
std::vector< std::pair< ChunkKey, std::shared_ptr< ChunkMetadata > >> ChunkMetadataVector