OmniSciDB  8fa3bf436f
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
CachingFileMgr.h
Go to the documentation of this file.
1 /*
2  * Copyright 2021 Omnisci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
29 #pragma once
30 
31 #include "FileMgr.h"
32 #include "Shared/File.h"
33 
34 namespace File_Namespace {
35 
36 inline std::string get_dir_name_for_table(int db_id, int tb_id) {
37  std::stringstream file_name;
38  file_name << "table_" << db_id << "_" << tb_id << "/";
39  return file_name.str();
40 }
41 
42 // Struct to group data related to a single epoch. Manages epoch file pointers.
43 struct EpochInfo {
45  FILE* epoch_file = nullptr;
46  bool is_checkpointed = true;
47  EpochInfo(FILE* f) {
48  CHECK(f) << "Cannot create EpochInfo from null file descriptor";
49  epoch = Epoch();
50  epoch_file = f;
51  is_checkpointed = true;
52  }
54  void increment() {
55  epoch.increment();
56  is_checkpointed = false;
58  << "Epoch greater than maximum allowed value (" << epoch.ceiling() << " > "
59  << Epoch::max_allowable_epoch() << ").";
60  }
61 };
62 
63 // Extension of FileBuffer with restricted behaviour.
64 class CachingFileBuffer : public FileBuffer {
65  public:
67  // The cache can only be appended to, not written, as it lets us maintain a single
68  // version of the data. This override is to make sure we don't accidentally start
69  // writing to cache buffers.
70  void write(int8_t* src,
71  const size_t numBytes,
72  const size_t offset = 0,
73  const MemoryLevel srcMemoryLevel = CPU_LEVEL,
74  const int32_t deviceId = -1) override {
75  UNREACHABLE() << "Cache buffers support append(), but not write()";
76  }
77 };
78 
87 class CachingFileMgr : public FileMgr {
88  public:
89  CachingFileMgr(const std::string& base_path,
90  const size_t num_reader_threads = 0,
91  const size_t default_page_size = DEFAULT_PAGE_SIZE);
92  virtual ~CachingFileMgr();
93 
94  // Simple getters.
95  inline MgrType getMgrType() override { return CACHING_FILE_MGR; };
96  inline std::string getStringMgrType() override { return ToString(CACHING_FILE_MGR); }
97  inline size_t getDefaultPageSize() { return defaultPageSize_; }
98 
99  // TODO(Misiu): These are unimplemented for now, but will become necessary when we want
100  // to limit the size.
101  inline size_t getMaxSize() override {
102  UNREACHABLE() << "Unimplemented";
103  return 0;
104  }
105  inline size_t getInUseSize() override {
106  UNREACHABLE() << "Unimplemented";
107  return 0;
108  }
109  inline size_t getAllocated() override {
110  UNREACHABLE() << "Unimplemented";
111  return 0;
112  }
113  inline bool isAllocationCapped() override { return false; }
114 
118  void clearForTable(int32_t db_id, int32_t tb_id);
119 
124  std::string getOrAddTableDir(int db_id, int tb_id);
125 
130  inline bool hasFileMgrKey() const override { return false; }
134  void closeRemovePhysical() override;
135 
139  uint64_t getChunkSpaceReservedByTable(int db_id, int tb_id);
140  uint64_t getMetadataSpaceReservedByTable(int db_id, int tb_id);
141  uint64_t getWrapperSpaceReservedByTable(int db_id, int tb_id);
142  uint64_t getSpaceReservedByTable(int db_id, int tb_id);
143 
147  std::string describeSelf() override;
148 
153  void checkpoint(const int32_t db_id, const int32_t tb_id) override;
154 
158  int32_t epoch(int32_t db_id, int32_t tb_id) const override;
159 
163  FileBuffer* putBuffer(const ChunkKey& key,
164  AbstractBuffer* srcBuffer,
165  const size_t numBytes = 0) override;
169  CachingFileBuffer* allocateBuffer(const size_t page_size,
170  const ChunkKey& key,
171  const size_t num_bytes) override;
172 
176  bool updatePageIfDeleted(FileInfo* file_info,
177  ChunkKey& chunk_key,
178  int32_t contingent,
179  int32_t page_epoch,
180  int32_t page_num) override;
181 
185  inline bool failOnReadError() const override { return false; }
186 
190  void deleteBufferIfExists(const ChunkKey& key);
191 
192  private:
193  void openOrCreateEpochIfNotExists(int32_t db_id, int32_t tb_id);
194  void openAndReadEpochFileUnlocked(int32_t db_id, int32_t tb_id);
195  void incrementEpoch(int32_t db_id, int32_t tb_id);
196  void init(const size_t num_reader_threads);
197  void createEpochFileUnlocked(int32_t db_id, int32_t tb_id);
198  void writeAndSyncEpochToDisk(int32_t db_id, int32_t tb_id);
199  std::string getOrAddTableDirUnlocked(int db_id, int tb_id);
200  void readTableDirs();
201  void createBufferFromHeaders(const ChunkKey& key,
202  const std::vector<HeaderInfo>::const_iterator& startIt,
203  const std::vector<HeaderInfo>::const_iterator& endIt);
205  size_t pageSize = 0,
206  const size_t numBytes = 0) override;
207  void incrementAllEpochs();
208  void removeTableDirectory(int32_t db_id, int32_t tb_id);
209  void removeTableBuffers(int32_t db_id, int32_t tb_id);
210  void writeDirtyBuffers(int32_t db_id, int32_t tb_id);
211 
212  mutable mapd_shared_mutex epochs_mutex_; // mutex for table_epochs_.
213  // each table gets a separate epoch. Uses pointers for move semantics.
214  std::map<TablePair, std::unique_ptr<EpochInfo>> table_epochs_;
215 };
216 
217 } // namespace File_Namespace
uint64_t getSpaceReservedByTable(int db_id, int tb_id)
uint64_t getMetadataSpaceReservedByTable(int db_id, int tb_id)
std::vector< int > ChunkKey
Definition: types.h:37
uint64_t getWrapperSpaceReservedByTable(int db_id, int tb_id)
std::string getStringMgrType() override
std::string get_dir_name_for_table(int db_id, int tb_id)
#define UNREACHABLE()
Definition: Logger.h:247
std::string getOrAddTableDir(int db_id, int tb_id)
Returns (and optionally creates) a subdirectory for table-specific persistent data (e...
void closeRemovePhysical() override
Closes files and removes the caching directory.
int32_t ceiling() const
Definition: Epoch.h:44
Represents/provides access to contiguous data stored in the file system.
Definition: FileBuffer.h:58
void checkpoint() override
Fsyncs data files, writes out epoch and fsyncs that.
Definition: FileMgr.cpp:685
static int64_t max_allowable_epoch()
Definition: Epoch.h:69
std::string describeSelf() override
describes this FileMgr for logging purposes.
bool failOnReadError() const override
True if a read error should cause a fatal error.
FileBuffer * createBufferUnlocked(const ChunkKey &key, size_t pageSize=0, const size_t numBytes=0) override
void createBufferFromHeaders(const ChunkKey &key, const std::vector< HeaderInfo >::const_iterator &startIt, const std::vector< HeaderInfo >::const_iterator &endIt)
int32_t incrementEpoch()
Definition: FileMgr.h:275
int32_t increment()
Definition: Epoch.h:54
std::shared_timed_mutex mapd_shared_mutex
std::string getOrAddTableDirUnlocked(int db_id, int tb_id)
void removeTableBuffers(int32_t db_id, int32_t tb_id)
bool updatePageIfDeleted(FileInfo *file_info, ChunkKey &chunk_key, int32_t contingent, int32_t page_epoch, int32_t page_num) override
checks whether a page should be deleted.
An AbstractBuffer is a unit of data management for a data manager.
void deleteBufferIfExists(const ChunkKey &key)
deletes a buffer if it exists in the mgr. Otherwise do nothing.
void writeAndSyncEpochToDisk()
Definition: FileMgr.cpp:638
void openOrCreateEpochIfNotExists(int32_t db_id, int32_t tb_id)
#define DEFAULT_PAGE_SIZE
void openAndReadEpochFileUnlocked(int32_t db_id, int32_t tb_id)
size_t defaultPageSize_
number of threads used when loading data
Definition: FileMgr.h:394
MgrType getMgrType() override
bool hasFileMgrKey() const override
Query to determine if the contained pages will have their database and table ids overriden by the fil...
Definition: Epoch.h:30
void write(int8_t *src, const size_t numBytes, const size_t offset=0, const MemoryLevel srcMemoryLevel=CPU_LEVEL, const int32_t deviceId=-1) override
FileBuffer(FileMgr *fm, const size_t pageSize, const ChunkKey &chunkKey, const size_t initialSize=0)
Constructs a FileBuffer object.
Definition: FileBuffer.cpp:38
CachingFileBuffer * allocateBuffer(const size_t page_size, const ChunkKey &key, const size_t num_bytes) override
allocates a new CachingFileBuffer.
int32_t epoch() const
Definition: FileMgr.h:500
#define CHECK(condition)
Definition: Logger.h:203
uint64_t getChunkSpaceReservedByTable(int db_id, int tb_id)
void close(FILE *f)
Closes the file pointed to by the FILE pointer.
Definition: File.cpp:119
void removeTableDirectory(int32_t db_id, int32_t tb_id)
char * f
void init(const size_t num_reader_threads)
void createEpochFileUnlocked(int32_t db_id, int32_t tb_id)
CachingFileMgr(const std::string &base_path, const size_t num_reader_threads=0, const size_t default_page_size=DEFAULT_PAGE_SIZE)
std::map< TablePair, std::unique_ptr< EpochInfo > > table_epochs_
A selection of helper methods for File I/O.
void clearForTable(int32_t db_id, int32_t tb_id)
Removes all data related to the given table (pages and subdirectories).
A FileMgr capable of limiting it&#39;s size and storing data from multiple tables in a shared directory...
FileBuffer * putBuffer(const ChunkKey &key, AbstractBuffer *srcBuffer, const size_t numBytes=0) override
deletes any existing buffer for the given key then copies in a new one.