OmniSciDB  94e8789169
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
FileMgr.h
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
26 #pragma once
27 
28 #include <future>
29 #include <iostream>
30 #include <map>
31 #include <mutex>
32 #include <set>
33 #include <vector>
34 
35 #include "DataMgr/AbstractBuffer.h"
37 #include "DataMgr/FileMgr/Epoch.h"
40 #include "DataMgr/FileMgr/Page.h"
42 
43 using namespace Data_Namespace;
44 
45 namespace File_Namespace {
46 
47 #define DEFAULT_PAGE_SIZE 2097152
48 
49 class GlobalFileMgr; // forward declaration
58 using PageSizeFileMMap = std::multimap<size_t, int32_t>;
59 
70 using Chunk = FileBuffer;
71 
82 using ChunkKeyToChunkMap = std::map<ChunkKey, FileBuffer*>;
83 
84 struct FileMetadata {
85  int32_t file_id;
86  std::string file_path;
87  size_t page_size;
88  size_t file_size;
89  size_t num_pages;
91 };
92 
93 struct StorageStats {
94  int32_t epoch{0};
95  int32_t epoch_floor{0};
96  uint64_t metadata_file_count{0};
97  uint64_t total_metadata_file_size{0};
98  uint64_t total_metadata_page_count{0};
99  std::optional<uint64_t> total_free_metadata_page_count{};
100  uint64_t data_file_count{0};
101  uint64_t total_data_file_size{0};
102  uint64_t total_data_page_count{0};
103  std::optional<uint64_t> total_free_data_page_count{};
104 
105  StorageStats() = default;
106  StorageStats(const StorageStats& storage_stats) = default;
107  virtual ~StorageStats() = default;
108 };
109 
114 class FileMgr : public AbstractBufferMgr { // implements
115  friend class GlobalFileMgr;
116 
117  public:
119  FileMgr(const int32_t deviceId,
120  GlobalFileMgr* gfm,
121  const std::pair<const int32_t, const int32_t> fileMgrKey,
122  const int32_t max_rollback_epochs = -1,
123  const size_t num_reader_threads = 0,
124  const int32_t epoch = -1,
125  const size_t defaultPageSize = DEFAULT_PAGE_SIZE);
126 
127  // used only to initialize enough to drop or to get basic metadata
128  FileMgr(const int32_t deviceId,
129  GlobalFileMgr* gfm,
130  const std::pair<const int32_t, const int32_t> fileMgrKey,
131  const size_t defaultPageSize,
132  const bool runCoreInit);
133 
134  FileMgr(GlobalFileMgr* gfm, const size_t defaultPageSize, std::string basePath);
135 
137  ~FileMgr() override;
138 
139  StorageStats getStorageStats();
141  FileBuffer* createBuffer(const ChunkKey& key,
142  size_t pageSize = 0,
143  const size_t numBytes = 0) override;
144 
145  bool isBufferOnDevice(const ChunkKey& key) override;
147  // Purge == true means delete the data chunks -
148  // can't undelete and revert to previous
149  // state - reclaims disk space for chunk
150  void deleteBuffer(const ChunkKey& key, const bool purge = true) override;
151 
152  void deleteBuffersWithPrefix(const ChunkKey& keyPrefix,
153  const bool purge = true) override;
154 
156  FileBuffer* getBuffer(const ChunkKey& key, const size_t numBytes = 0) override;
157 
158  void fetchBuffer(const ChunkKey& key,
159  AbstractBuffer* destBuffer,
160  const size_t numBytes) override;
161 
168  FileBuffer* putBuffer(const ChunkKey& key,
169  AbstractBuffer* d,
170  const size_t numBytes = 0) override;
171 
172  // Buffer API
173  AbstractBuffer* alloc(const size_t numBytes) override;
174  void free(AbstractBuffer* buffer) override;
175  Page requestFreePage(size_t pagesize, const bool isMetadata);
176 
177  inline MgrType getMgrType() override { return FILE_MGR; };
178  inline std::string getStringMgrType() override { return ToString(FILE_MGR); }
179  inline std::string printSlabs() override { return "Not Implemented"; }
180  inline void clearSlabs() override { /* noop */
181  }
182  inline size_t getMaxSize() override { return 0; }
183  inline size_t getInUseSize() override { return 0; }
184  inline size_t getAllocated() override { return 0; }
185  inline bool isAllocationCapped() override { return false; }
186 
187  inline FileInfo* getFileInfoForFileId(const int32_t fileId) { return files_[fileId]; }
188 
189  uint64_t getTotalFileSize() const;
190  FileMetadata getMetadataForFile(
191  const boost::filesystem::directory_iterator& fileIterator);
192 
193  void init(const size_t num_reader_threads, const int32_t epochOverride);
194  void init(const std::string& dataPathToConvertFrom, const int32_t epochOverride);
195 
202  bool coreInit();
203 
204  void copyPage(Page& srcPage,
205  FileMgr* destFileMgr,
206  Page& destPage,
207  const size_t reservedHeaderSize,
208  const size_t numBytes,
209  const size_t offset);
210 
224  void requestFreePages(size_t npages,
225  size_t pagesize,
226  std::vector<Page>& pages,
227  const bool isMetadata);
228 
229  void getChunkMetadataVecForKeyPrefix(ChunkMetadataVector& chunkMetadataVec,
230  const ChunkKey& keyPrefix) override;
231 
237  void checkpoint() override;
238  void checkpoint(const int32_t db_id, const int32_t tb_id) override {
239  LOG(FATAL) << "Operation not supported, api checkpoint() should be used instead";
240  }
245  inline int32_t epoch() { return static_cast<int32_t>(epoch_.ceiling()); }
246 
247  inline int32_t epochFloor() { return static_cast<int32_t>(epoch_.floor()); }
248 
249  inline int32_t incrementEpoch() {
250  int32_t newEpoch = epoch_.increment();
251  epochIsCheckpointed_ = false;
252  // We test for error here instead of in Epoch::increment so we can log FileMgr
253  // metadata
254  if (newEpoch > Epoch::max_allowable_epoch()) {
255  LOG(FATAL) << "Epoch for table (" << fileMgrKey_.first << ", " << fileMgrKey_.second
256  << ") greater than maximum allowed value of "
257  << Epoch::max_allowable_epoch() << ".";
258  }
259  return newEpoch;
260  }
261 
265  inline int32_t lastCheckpointedEpoch() {
266  return epoch() - (epochIsCheckpointed_ ? 0 : 1);
267  }
268 
272  inline int32_t maxRollbackEpochs() { return maxRollbackEpochs_; }
273 
278  inline size_t getNumReaderThreads() { return num_reader_threads_; }
279 
287  FILE* getFileForFileId(const int32_t fileId);
288 
289  inline size_t getNumChunks() override {
290  mapd_shared_lock<mapd_shared_mutex> read_lock(chunkIndexMutex_);
291  return chunkIndex_.size();
292  }
293  size_t getNumUsedPages() const;
294  size_t getNumUsedMetadataPages() const;
295  size_t getNumUsedMetadataPagesForChunkKey(const ChunkKey& chunkKey) const;
296 
298  // #TM Not sure if we need this below
299  int32_t getDBVersion() const;
300  bool getDBConvert() const;
301  void createTopLevelMetadata(); // create metadata shared by all tables of all DBs
302  std::string getFileMgrBasePath() const { return fileMgrBasePath_; }
303  void closeRemovePhysical();
304 
305  void removeTableRelatedDS(const int32_t db_id, const int32_t table_id) override;
306 
307  void free_page(std::pair<FileInfo*, int32_t>&& page);
308  const std::pair<const int32_t, const int32_t> get_fileMgrKey() const {
309  return fileMgrKey_;
310  }
311 
312  protected:
313  // For testing purposes only
314  FileMgr(const int epoch);
315 
316  private:
318  std::pair<const int32_t, const int32_t> fileMgrKey_;
320  std::string fileMgrBasePath_;
321  std::vector<FileInfo*> files_;
326  unsigned nextFileId_;
328  bool epochIsCheckpointed_ = true;
329  // int64_t epoch_; /// the current epoch (time of last checkpoint)
330  // int64_t epochFloor_; /// the minimum epoch we can roll back to
331  FILE* epochFile_ = nullptr;
332  int32_t db_version_;
333  int32_t fileMgrVersion_;
335  const int32_t latestFileMgrVersion_{1};
336  FILE* DBMetaFile_ = nullptr;
337  // bool isDirty_; /// true if metadata changed since last writeState()
338  std::mutex getPageMutex_;
341 
343  std::vector<std::pair<FileInfo*, int32_t>> free_pages_;
344  bool isFullyInitted_{false};
345 
362  FileInfo* createFile(const size_t pageSize, const size_t numPages);
363  FileInfo* openExistingFile(const std::string& path,
364  const int32_t fileId,
365  const size_t pageSize,
366  const size_t numPages,
367  std::vector<HeaderInfo>& headerVec);
368  void createEpochFile(const std::string& epochFileName);
369  int32_t openAndReadLegacyEpochFile(const std::string& epochFileName);
370  void openAndReadEpochFile(const std::string& epochFileName);
371  void writeAndSyncEpochToDisk();
372  void setEpoch(const int32_t newEpoch); // resets current value of epoch at startup
373  void freePagesBeforeEpoch(const int32_t minRollbackEpoch);
374 
375  void rollOffOldData(const int32_t epochCeiling, const bool shouldCheckpoint);
376  // int32_t checkEpochFloor(const std::string& epochFloorFilePath) const;
377  // void setEpochFloor(const std::string& epochFloorFilePath, const int32_t epochFloor);
378 
379  int32_t readVersionFromDisk(const std::string& versionFileName) const;
380  void writeAndSyncVersionToDisk(const std::string& versionFileName,
381  const int32_t version);
382  void processFileFutures(std::vector<std::future<std::vector<HeaderInfo>>>& file_futures,
383  std::vector<HeaderInfo>& headerVec);
384  FileBuffer* createBufferUnlocked(const ChunkKey& key,
385  size_t pageSize = 0,
386  const size_t numBytes = 0);
387 
388  // Migration functions
389  void migrateToLatestFileMgrVersion();
390  void migrateEpochFileV0();
391 };
392 
393 } // namespace File_Namespace
std::vector< int > ChunkKey
Definition: types.h:37
mapd_shared_mutex mutex_free_page_
Definition: FileMgr.h:342
A logical page (Page) belongs to a file on disk.
Definition: Page.h:46
#define LOG(tag)
Definition: Logger.h:188
std::string printSlabs() override
Definition: FileMgr.h:179
size_t getMaxSize() override
Definition: FileMgr.h:182
std::string getFileMgrBasePath() const
Definition: FileMgr.h:302
GlobalFileMgr * gfm_
Definition: FileMgr.h:317
std::mutex getPageMutex_
pointer to DB level metadata
Definition: FileMgr.h:338
Represents/provides access to contiguous data stored in the file system.
Definition: FileBuffer.h:56
static int64_t max_allowable_epoch()
Definition: Epoch.h:69
MgrType getMgrType() override
Definition: FileMgr.h:177
std::string fileMgrBasePath_
Definition: FileMgr.h:320
std::multimap< size_t, int32_t > PageSizeFileMMap
Maps logical page sizes to files.
Definition: FileMgr.h:58
size_t getInUseSize() override
Definition: FileMgr.h:183
const std::pair< const int32_t, const int32_t > get_fileMgrKey() const
Definition: FileMgr.h:308
int32_t epochFloor()
Definition: FileMgr.h:247
#define DEFAULT_PAGE_SIZE
Definition: FileMgr.h:47
size_t getNumChunks() override
Definition: FileMgr.h:289
int32_t incrementEpoch()
Definition: FileMgr.h:249
void init(LogOptions const &log_opts)
Definition: Logger.cpp:280
std::pair< const int32_t, const int32_t > fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:318
std::shared_timed_mutex mapd_shared_mutex
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:297
PageSizeFileMMap fileIndex_
A vector of files accessible via a file identifier.
Definition: FileMgr.h:323
std::vector< std::pair< ChunkKey, std::shared_ptr< ChunkMetadata >>> ChunkMetadataVector
std::vector< std::pair< FileInfo *, int32_t > > free_pages_
Definition: FileMgr.h:343
An AbstractBuffer is a unit of data management for a data manager.
size_t num_reader_threads_
Maps page sizes to FileInfo objects.
Definition: FileMgr.h:324
std::map< ChunkKey, FileBuffer * > ChunkKeyToChunkMap
Maps ChunkKeys (unique ids for Chunks) to Chunk objects.
Definition: FileMgr.h:82
FileInfo * getFileInfoForFileId(const int32_t fileId)
Definition: FileMgr.h:187
version
Definition: setup.py:65
size_t defaultPageSize_
number of threads used when loading data
Definition: FileMgr.h:325
int32_t maxRollbackEpochs_
Definition: FileMgr.h:319
Definition: Epoch.h:30
void checkpoint(const int32_t db_id, const int32_t tb_id) override
Definition: FileMgr.h:238
std::string getStringMgrType() override
Definition: FileMgr.h:178
void clearSlabs() override
Definition: FileMgr.h:180
size_t getAllocated() override
Definition: FileMgr.h:184
int32_t epoch()
Returns current value of epoch - should be one greater than recorded at last checkpoint.
Definition: FileMgr.h:245
mapd_shared_lock< mapd_shared_mutex > read_lock
int32_t maxRollbackEpochs()
Returns value max_rollback_epochs.
Definition: FileMgr.h:272
bool isAllocationCapped() override
Definition: FileMgr.h:185
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:339
mapd_shared_mutex files_rw_mutex_
Definition: FileMgr.h:340
Epoch epoch_
the index of the next file id
Definition: FileMgr.h:327
int32_t lastCheckpointedEpoch()
Returns value of epoch at last checkpoint.
Definition: FileMgr.h:265
size_t getNumReaderThreads()
Returns number of threads defined by parameter num-reader-threads which should be used during initial...
Definition: FileMgr.h:278