OmniSciDB  6686921089
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
CachingFileMgr.h
Go to the documentation of this file.
1 /*
2  * Copyright 2021 Omnisci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
29 #pragma once
30 
32 #include "FileMgr.h"
33 #include "Shared/File.h"
34 
35 namespace File_Namespace {
36 
37 enum class DiskCacheLevel { none, fsi, non_fsi, all };
39  // Note the suffix UL is not portable. 'long' i a different size
40  // on nix vs windows. Hense the explicit cast to long.
41  static constexpr size_t DEFAULT_MAX_SIZE{(size_t)1024 * (size_t)1024 * (size_t)1024 *
42  (size_t)100}; // 100G default
43  std::string path;
45  size_t num_reader_threads = 0;
48  inline bool isEnabledForMutableTables() const {
51  }
52  inline bool isEnabledForFSI() const {
54  }
55  inline bool isEnabled() const { return enabled_level != DiskCacheLevel::none; }
56  std::string dump() const {
57  std::stringstream ss;
58  ss << "DiskCacheConfig(path = " << path << ", level = " << levelAsString()
59  << ", threads = " << num_reader_threads << ", size limit = " << size_limit
60  << ", page size = " << page_size << ")";
61  return ss.str();
62  }
63  std::string levelAsString() const {
64  switch (enabled_level) {
66  return "none";
68  return "fsi";
70  return "non_fsi";
72  return "all";
73  }
74  return "";
75  }
76  static std::string getDefaultPath(const std::string& base_path) {
77  return base_path + "/omnisci_disk_cache";
78  }
79 };
80 
81 inline std::string get_dir_name_for_table(int db_id, int tb_id) {
82  std::stringstream file_name;
83  file_name << "table_" << db_id << "_" << tb_id << "/";
84  return file_name.str();
85 }
86 
87 // Class to control access to the table-specific directories and data created inside a
88 // CachingFileMgr.
89 class TableFileMgr {
90  public:
91  TableFileMgr(const std::string& table_path);
93 
97  void incrementEpoch();
98 
103 
107  int32_t getEpoch() const;
108 
112  void removeDiskContent() const;
113 
117  size_t getReservedSpace() const;
118 
122  void deleteWrapperFile() const;
123 
127  void writeWrapperFile(const std::string& doc) const;
128 
129  private:
130  std::string table_path_;
131  std::string epoch_file_path_;
132  std::string wrapper_file_path_;
134  bool is_checkpointed_ = true;
135  FILE* epoch_file_ = nullptr;
136 
138 };
139 
140 // Extension of FileBuffer with restricted behaviour.
142  public:
144  // The cache can only be appended to, not written, as it lets us maintain a single
145  // version of the data. This override is to make sure we don't accidentally start
146  // writing to cache buffers.
147  void write(int8_t* src,
148  const size_t numBytes,
149  const size_t offset = 0,
150  const MemoryLevel srcMemoryLevel = CPU_LEVEL,
151  const int32_t deviceId = -1) override {
152  UNREACHABLE() << "Cache buffers support append(), but not write()";
153  }
154 };
155 
164 class CachingFileMgr : public FileMgr {
165  public:
166  static constexpr char WRAPPER_FILE_NAME[] = "wrapper_metadata.json";
167  // We currently assign %10 of the cache to data wrapper space arbitrarily.
168  // static constexpr size_t WRAPPER_SPACE_RATIO{10};
169  // Portion of the CFM space reserved for metadata (metadata files and data wrappers)
170  static constexpr float METADATA_SPACE_PERCENTAGE{0.1};
171  // Portion of the CFM metadata space reserved for metadata files (subset of above).
172  static constexpr float METADATA_FILE_SPACE_PERCENTAGE{0.01};
173 
174  static size_t getMinimumSize() {
175  // Currently the minimum default size is based on the metadata file size and
176  // percentage usage.
179  }
180 
181  CachingFileMgr(const DiskCacheConfig& config);
182 
183  ~CachingFileMgr() override;
184 
185  // Simple getters.
186  inline MgrType getMgrType() override { return CACHING_FILE_MGR; };
187  inline std::string getStringMgrType() override { return ToString(CACHING_FILE_MGR); }
188  inline size_t getDefaultPageSize() { return defaultPageSize_; }
189  inline size_t getMaxSize() override { return max_size_; }
190  inline size_t getMaxDataFiles() const { return max_num_data_files_; }
191  inline size_t getMaxMetaFiles() const { return max_num_meta_files_; }
192  inline size_t getMaxWrapperSize() const { return max_wrapper_space_; }
193  inline size_t getDataFileSize() const {
195  }
196  inline size_t getMetadataFileSize() const {
198  }
199 
200  size_t getNumDataFiles() const;
201  size_t getNumMetaFiles() const;
202  inline size_t getAvailableSpace() { return max_size_ - getAllocated(); }
203  inline size_t getAvailableWrapperSpace() {
205  }
206  inline size_t getAllocated() override {
207  return getFilesSize() + getTableFileMgrsSize();
208  }
209 
213  void removeChunkKeepMetadata(const ChunkKey& key);
214 
218  void clearForTable(int32_t db_id, int32_t tb_id);
219 
224  inline bool hasFileMgrKey() const override { return false; }
225 
229  void closeRemovePhysical() override;
230 
234  size_t getChunkSpaceReservedByTable(int32_t db_id, int32_t tb_id) const;
235  size_t getMetadataSpaceReservedByTable(int32_t db_id, int32_t tb_id) const;
236  size_t getTableFileMgrSpaceReserved(int32_t db_id, int32_t tb_id) const;
237  size_t getSpaceReservedByTable(int32_t db_id, int32_t tb_id) const;
238 
242  std::string describeSelf() const override;
243 
248  void checkpoint(const int32_t db_id, const int32_t tb_id) override;
249 
253  int32_t epoch(int32_t db_id, int32_t tb_id) const override;
254 
258  FileBuffer* putBuffer(const ChunkKey& key,
259  AbstractBuffer* srcBuffer,
260  const size_t numBytes = 0) override;
265  CachingFileBuffer* allocateBuffer(const size_t page_size,
266  const ChunkKey& key,
267  const size_t num_bytes = 0) override;
269  const ChunkKey& key,
270  const std::vector<HeaderInfo>::const_iterator& headerStartIt,
271  const std::vector<HeaderInfo>::const_iterator& headerEndIt) override;
272 
276  bool updatePageIfDeleted(FileInfo* file_info,
277  ChunkKey& chunk_key,
278  int32_t contingent,
279  int32_t page_epoch,
280  int32_t page_num) override;
281 
285  inline bool failOnReadError() const override { return false; }
286 
290  void deleteBufferIfExists(const ChunkKey& key);
291 
296  size_t getNumChunksWithMetadata() const;
297 
301  size_t getNumDataChunks() const;
302 
306  std::vector<ChunkKey> getChunkKeysForPrefix(const ChunkKey& prefix) const;
307 
311  std::unique_ptr<CachingFileMgr> reconstruct() const;
312 
316  void deleteWrapperFile(int32_t db, int32_t tb);
317 
321  void writeWrapperFile(const std::string& doc, int32_t db, int32_t tb);
322 
323  std::string getTableFileMgrPath(int32_t db, int32_t tb) const;
324 
329  size_t getFilesSize() const;
330 
335  size_t getTableFileMgrsSize() const;
336 
340  std::optional<FileBuffer*> getBufferIfExists(const ChunkKey& key);
341 
346  void free_page(std::pair<FileInfo*, int32_t>&& page) override;
347 
349  const ChunkKey& keyPrefix) override;
350 
351  // Useful for debugging.
352  std::string dumpKeysWithMetadata() const;
353  std::string dumpKeysWithChunkData() const;
354  std::string dumpTableQueue() const { return table_evict_alg_.dumpEvictionQueue(); }
355 
356  // Used for unit testing
357  void setMaxNumDataFiles(size_t max) { max_num_data_files_ = max; }
358  void setMaxNumMetadataFiles(size_t max) { max_num_meta_files_ = max; }
359  void setMaxWrapperSpace(size_t max) { max_wrapper_space_ = max; }
360  std::set<ChunkKey> getKeysWithMetadata() const;
361 
362  private:
366  void incrementEpoch(int32_t db_id, int32_t tb_id);
367 
372  void init(const size_t num_reader_threads);
373 
377  void writeAndSyncEpochToDisk(int32_t db_id, int32_t tb_id);
378 
383  void readTableFileMgrs();
384 
389  const ChunkKey& key,
390  const std::vector<HeaderInfo>::const_iterator& startIt,
391  const std::vector<HeaderInfo>::const_iterator& endIt) override;
392 
397  size_t pageSize = 0,
398  const size_t numBytes = 0) override;
399 
403  void createTableFileMgrIfNoneExists(const int32_t db_id, const int32_t tb_id);
404 
408  void incrementAllEpochs();
409 
413  void removeTableFileMgr(int32_t db_id, int32_t tb_id);
414 
418  void removeTableBuffers(int32_t db_id, int32_t tb_id);
419 
423  void writeDirtyBuffers(int32_t db_id, int32_t tb_id);
424 
429  Page requestFreePage(size_t pagesize, const bool isMetadata) override;
430 
434  void touchKey(const ChunkKey& key) const;
435  void removeKey(const ChunkKey& key) const;
436 
441  std::vector<ChunkKey> getKeysForTable(int32_t db_id, int32_t tb_id) const;
442 
449 
455  FileInfo* evictPages();
456 
462  void deleteCacheIfTooLarge();
463 
468  void setMaxSizes();
469 
470  FileBuffer* getBufferUnlocked(const ChunkKeyToChunkMap::iterator chunk_it,
471  const size_t numBytes = 0) override;
472  ChunkKeyToChunkMap::iterator deleteBufferUnlocked(
473  const ChunkKeyToChunkMap::iterator chunk_it,
474  const bool purge = true) override;
475 
476  mutable mapd_shared_mutex table_dirs_mutex_; // mutex for table_dirs_.
477  // each table gest a separate epoch. Uses pointers for move semantics.
478  std::map<TablePair, std::unique_ptr<TableFileMgr>> table_dirs_;
479 
480  size_t max_num_data_files_; // set based on max_size_.
481  size_t max_num_meta_files_; // set based on max_size_.
482  size_t max_wrapper_space_; // set based on max_size_.
483  size_t max_size_;
484  mutable LRUEvictionAlgorithm chunk_evict_alg_; // last chunk touched.
485  mutable LRUEvictionAlgorithm table_evict_alg_; // last table touched.
486 };
487 
488 } // namespace File_Namespace
size_t getTableFileMgrSpaceReserved(int32_t db_id, int32_t tb_id) const
void getChunkMetadataVecForKeyPrefix(ChunkMetadataVector &chunkMetadataVec, const ChunkKey &keyPrefix) override
#define METADATA_PAGE_SIZE
Definition: FileBuffer.h:37
std::vector< int > ChunkKey
Definition: types.h:37
LRUEvictionAlgorithm table_evict_alg_
std::string dumpTableQueue() const
void removeDiskContent() const
Removes all disk data for the subdir.
std::string getStringMgrType() override
static constexpr size_t DEFAULT_NUM_PAGES_PER_METADATA_FILE
Definition: FileMgr.h:363
static constexpr float METADATA_SPACE_PERCENTAGE
std::string get_dir_name_for_table(int db_id, int tb_id)
void writeWrapperFile(const std::string &doc, int32_t db, int32_t tb)
Writes a wrapper file to a table subdir.
A logical page (Page) belongs to a file on disk.
Definition: Page.h:46
size_t getFilesSize() const
Get the total size of page files (data and metadata files). This includes allocated, but unused space.
std::vector< ChunkKey > getChunkKeysForPrefix(const ChunkKey &prefix) const
Returns the keys for chunks with chunk data that match the given prefix.
void setMaxSizes()
Sets the maximum number of files/space for each type of storage based on the maximum size...
mapd_shared_mutex table_dirs_mutex_
void writeAndSyncEpochToDisk()
Write and flush the epoch to the epoch file on disk.
#define UNREACHABLE()
Definition: Logger.h:253
std::string describeSelf() const override
describes this FileMgr for logging purposes.
size_t getSpaceReservedByTable(int32_t db_id, int32_t tb_id) const
void closeRemovePhysical() override
Closes files and removes the caching directory.
void touchKey(const ChunkKey &key) const
Used to track which tables/chunks were least recently used.
size_t getMetadataSpaceReservedByTable(int32_t db_id, int32_t tb_id) const
void createTableFileMgrIfNoneExists(const int32_t db_id, const int32_t tb_id)
Create and initialize a subdirectory for a table if none exists.
Represents/provides access to contiguous data stored in the file system.
Definition: FileBuffer.h:58
void checkpoint() override
Fsyncs data files, writes out epoch and fsyncs that.
Definition: FileMgr.cpp:678
Page requestFreePage(size_t pagesize, const bool isMetadata) override
requests a free page similar to FileMgr, but this override will also evict existing pages to make spa...
void deleteWrapperFile(int32_t db, int32_t tb)
Deletes the wrapper file from a table subdir.
ChunkKeyToChunkMap::iterator deleteBufferUnlocked(const ChunkKeyToChunkMap::iterator chunk_it, const bool purge=true) override
CachingFileMgr(const DiskCacheConfig &config)
static size_t num_pages_per_data_file_
Definition: FileMgr.h:407
std::optional< FileBuffer * > getBufferIfExists(const ChunkKey &key)
an optional version of get buffer if we are not sure a chunk exists.
mapd_shared_mutex table_mutex_
std::set< ChunkKey > getKeysWithMetadata() const
bool failOnReadError() const override
True if a read error should cause a fatal error.
FileInfo * evictPages()
evicts all data pages for the least recently used Chunk (metadata pages persist). Returns the first F...
FileBuffer * createBufferUnlocked(const ChunkKey &key, size_t pageSize=0, const size_t numBytes=0) override
Creates a buffer.
void deleteCacheIfTooLarge()
When the cache is read from disk, we don&#39;t know which chunks were least recently used. Rather than try to evict random pages to get down to size we just reset the cache to make sure we have space.
void incrementAllEpochs()
Increment epochs for each table in the CFM.
CachingFileBuffer * allocateBuffer(const size_t page_size, const ChunkKey &key, const size_t num_bytes=0) override
allocates a new CachingFileBuffer and tracks it&#39;s use in the eviction algorithms. ...
int32_t incrementEpoch()
Definition: FileMgr.h:275
size_t getNumDataChunks() const
Returns the number of buffers with chunk data in the CFM.
std::shared_timed_mutex mapd_shared_mutex
std::unique_ptr< CachingFileMgr > reconstruct() const
Initializes a new CFM using the initialization values in the current CFM.
size_t getTableFileMgrsSize() const
Returns the total size of all subdirectory files. Each table represented in the CFM has a subdirector...
void removeTableBuffers(int32_t db_id, int32_t tb_id)
Erases and cleans up all buffers for a table.
TableFileMgr(const std::string &table_path)
std::vector< std::pair< ChunkKey, std::shared_ptr< ChunkMetadata >>> ChunkMetadataVector
static constexpr size_t DEFAULT_MAX_SIZE
bool updatePageIfDeleted(FileInfo *file_info, ChunkKey &chunk_key, int32_t contingent, int32_t page_epoch, int32_t page_num) override
checks whether a page should be deleted.
void deleteWrapperFile() const
Deletes only the wrapper file on disk.
An AbstractBuffer is a unit of data management for a data manager.
void incrementEpoch()
increment the epoch for this subdir (not synced to disk).
void setMaxNumDataFiles(size_t max)
void deleteBufferIfExists(const ChunkKey &key)
deletes a buffer if it exists in the mgr. Otherwise do nothing.
FileBuffer * getBufferUnlocked(const ChunkKeyToChunkMap::iterator chunk_it, const size_t numBytes=0) override
static size_t num_pages_per_metadata_file_
Definition: FileMgr.h:408
void writeAndSyncEpochToDisk()
Definition: FileMgr.cpp:631
void removeTableFileMgr(int32_t db_id, int32_t tb_id)
Removes the subdirectory content for a table.
#define DEFAULT_PAGE_SIZE
size_t defaultPageSize_
number of threads used when loading data
Definition: FileMgr.h:392
void setMaxNumMetadataFiles(size_t max)
void removeChunkKeepMetadata(const ChunkKey &key)
Free pages for chunk and remove it from the chunk eviction algorithm.
void writeWrapperFile(const std::string &doc) const
Writes wrapper file to disk.
bool hasFileMgrKey() const override
Query to determine if the contained pages will have their database and table ids overriden by the fil...
std::string dumpKeysWithChunkData() const
Definition: Epoch.h:30
void write(int8_t *src, const size_t numBytes, const size_t offset=0, const MemoryLevel srcMemoryLevel=CPU_LEVEL, const int32_t deviceId=-1) override
static constexpr char WRAPPER_FILE_NAME[]
size_t getReservedSpace() const
Returns the disk space used (in bytes) for the subdir.
std::map< TablePair, std::unique_ptr< TableFileMgr > > table_dirs_
std::string dumpKeysWithMetadata() const
std::vector< ChunkKey > getKeysForTable(int32_t db_id, int32_t tb_id) const
returns set of keys contained in chunkIndex_ that match the given table prefix.
void readTableFileMgrs()
Checks for any sub-directories containing table-specific data and creates epochs from found files...
FileInfo * evictMetadataPages()
evicts all metadata pages for the least recently used table. Returns the first FileInfo that a page w...
FileBuffer(FileMgr *fm, const size_t pageSize, const ChunkKey &chunkKey, const size_t initialSize=0)
Constructs a FileBuffer object.
Definition: FileBuffer.cpp:38
int32_t epoch() const
Definition: FileMgr.h:506
void close(FILE *f)
Closes the file pointed to by the FILE pointer.
Definition: File.cpp:119
void init(const size_t num_reader_threads)
Initializes a CFM, parsing any existing files and initializing data structures appropriately (current...
static std::string getDefaultPath(const std::string &base_path)
void free_page(std::pair< FileInfo *, int32_t > &&page) override
Unlike the FileMgr, the CFM frees pages immediately instead of holding them until the next checkpoint...
size_t getNumChunksWithMetadata() const
Returns the number of buffers with metadata in the CFM. Any buffer with an encoder counts...
size_t getChunkSpaceReservedByTable(int32_t db_id, int32_t tb_id) const
static constexpr float METADATA_FILE_SPACE_PERCENTAGE
int32_t getEpoch() const
Returns the current epoch (locked)
std::string getTableFileMgrPath(int32_t db, int32_t tb) const
A selection of helper methods for File I/O.
void clearForTable(int32_t db_id, int32_t tb_id)
Removes all data related to the given table (pages and subdirectories).
void removeKey(const ChunkKey &key) const
A FileMgr capable of limiting it&#39;s size and storing data from multiple tables in a shared directory...
void setMaxWrapperSpace(size_t max)
LRUEvictionAlgorithm chunk_evict_alg_
std::string levelAsString() const
FileBuffer * putBuffer(const ChunkKey &key, AbstractBuffer *srcBuffer, const size_t numBytes=0) override
deletes any existing buffer for the given key then copies in a new one.
FileBuffer * createBufferFromHeaders(const ChunkKey &key, const std::vector< HeaderInfo >::const_iterator &startIt, const std::vector< HeaderInfo >::const_iterator &endIt) override
Creates a buffer and initializes it with info read from files on disk.