OmniSciDB  fe05a0c208
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
FileMgr.h
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
26 #pragma once
27 
28 #include <future>
29 #include <iostream>
30 #include <map>
31 #include <mutex>
32 #include <set>
33 #include <vector>
34 
35 #include "DataMgr/AbstractBuffer.h"
37 #include "DataMgr/FileMgr/Epoch.h"
40 #include "DataMgr/FileMgr/Page.h"
43 
44 using namespace Data_Namespace;
45 
46 namespace File_Namespace {
47 class GlobalFileMgr; // forward declaration
56 using PageSizeFileMMap = std::multimap<size_t, int32_t>;
57 
68 using Chunk = FileBuffer;
69 
80 using ChunkKeyToChunkMap = std::map<ChunkKey, FileBuffer*>;
86 using TablePair = std::pair<const int32_t, const int32_t>;
87 
88 struct FileMetadata {
89  int32_t file_id;
90  std::string file_path;
91  size_t page_size;
92  size_t file_size;
93  size_t num_pages;
95 };
96 
97 struct StorageStats {
98  int32_t epoch{0};
99  int32_t epoch_floor{0};
100  uint64_t metadata_file_count{0};
101  uint64_t total_metadata_file_size{0};
102  uint64_t total_metadata_page_count{0};
103  std::optional<uint64_t> total_free_metadata_page_count{};
104  uint64_t data_file_count{0};
105  uint64_t total_data_file_size{0};
106  uint64_t total_data_page_count{0};
107  std::optional<uint64_t> total_free_data_page_count{};
108 
109  StorageStats() = default;
110  StorageStats(const StorageStats& storage_stats) = default;
111  virtual ~StorageStats() = default;
112 };
113 
115  std::vector<HeaderInfo> header_infos;
116  int32_t max_file_id;
118 };
119 
120 // Page header size is serialized/deserialized as an int.
121 using PageHeaderSizeType = int32_t;
122 
123 struct PageMapping {
125 
126  PageMapping(int32_t source_file_id,
127  size_t source_page_num,
128  PageHeaderSizeType source_page_header_size,
129  int32_t destination_file_id,
130  size_t destination_page_num)
131  : source_file_id(source_file_id)
132  , source_page_num(source_page_num)
133  , source_page_header_size(source_page_header_size)
134  , destination_file_id(destination_file_id)
135  , destination_page_num(destination_page_num) {}
136 
137  int32_t source_file_id;
142 };
143 
148 class FileMgr : public AbstractBufferMgr { // implements
149  friend class GlobalFileMgr;
150 
151  public:
153  FileMgr(const int32_t deviceId,
154  GlobalFileMgr* gfm,
155  const TablePair fileMgrKey,
156  const int32_t max_rollback_epochs = -1,
157  const size_t num_reader_threads = 0,
158  const int32_t epoch = -1,
159  const size_t defaultPageSize = DEFAULT_PAGE_SIZE);
160 
161  // used only to initialize enough to drop or to get basic metadata
162  FileMgr(const int32_t deviceId,
163  GlobalFileMgr* gfm,
164  const TablePair fileMgrKey,
165  const size_t defaultPageSize,
166  const bool runCoreInit);
167 
168  FileMgr(GlobalFileMgr* gfm, const size_t defaultPageSize, std::string basePath);
169 
171  virtual ~FileMgr() override;
172 
173  StorageStats getStorageStats();
175  FileBuffer* createBuffer(const ChunkKey& key,
176  size_t pageSize = 0,
177  const size_t numBytes = 0) override;
178 
179  bool isBufferOnDevice(const ChunkKey& key) override;
181  // Purge == true means delete the data chunks -
182  // can't undelete and revert to previous
183  // state - reclaims disk space for chunk
184  void deleteBuffer(const ChunkKey& key, const bool purge = true) override;
185 
186  void deleteBuffersWithPrefix(const ChunkKey& keyPrefix,
187  const bool purge = true) override;
188 
190  FileBuffer* getBuffer(const ChunkKey& key, const size_t numBytes = 0) override;
191 
192  void fetchBuffer(const ChunkKey& key,
193  AbstractBuffer* destBuffer,
194  const size_t numBytes) override;
195 
202  FileBuffer* putBuffer(const ChunkKey& key,
203  AbstractBuffer* d,
204  const size_t numBytes = 0) override;
205 
206  // Buffer API
207  AbstractBuffer* alloc(const size_t numBytes) override;
208  void free(AbstractBuffer* buffer) override;
209  Page requestFreePage(size_t pagesize, const bool isMetadata);
210 
211  inline MgrType getMgrType() override { return FILE_MGR; };
212  inline std::string getStringMgrType() override { return ToString(FILE_MGR); }
213  inline std::string printSlabs() override { return "Not Implemented"; }
214  inline void clearSlabs() override {} // noop
215  inline size_t getMaxSize() override { return 0; }
216  inline size_t getInUseSize() override { return 0; }
217  inline size_t getAllocated() override { return 0; }
218  inline bool isAllocationCapped() override { return false; }
219 
220  inline FileInfo* getFileInfoForFileId(const int32_t fileId) { return files_[fileId]; }
221 
222  FileMetadata getMetadataForFile(
223  const boost::filesystem::directory_iterator& fileIterator);
224 
225  void init(const size_t num_reader_threads, const int32_t epochOverride);
226  void init(const std::string& dataPathToConvertFrom, const int32_t epochOverride);
227 
228  void copyPage(Page& srcPage,
229  FileMgr* destFileMgr,
230  Page& destPage,
231  const size_t reservedHeaderSize,
232  const size_t numBytes,
233  const size_t offset);
234 
248  void requestFreePages(size_t npages,
249  size_t pagesize,
250  std::vector<Page>& pages,
251  const bool isMetadata);
252 
253  void getChunkMetadataVecForKeyPrefix(ChunkMetadataVector& chunkMetadataVec,
254  const ChunkKey& keyPrefix) override;
255 
261  void checkpoint() override;
262  void checkpoint(const int32_t db_id, const int32_t tb_id) override {
263  LOG(FATAL) << "Operation not supported, api checkpoint() should be used instead";
264  }
271  inline virtual int32_t epoch(int32_t db_id, int32_t tb_id) const { return epoch(); }
272 
273  inline int32_t epochFloor() const { return static_cast<int32_t>(epoch_.floor()); }
274 
275  inline int32_t incrementEpoch() {
276  int32_t newEpoch = epoch_.increment();
277  epochIsCheckpointed_ = false;
278  // We test for error here instead of in Epoch::increment so we can log FileMgr
279  // metadata
280  if (newEpoch > Epoch::max_allowable_epoch()) {
281  LOG(FATAL) << "Epoch for table (" << fileMgrKey_.first << ", " << fileMgrKey_.second
282  << ") greater than maximum allowed value of "
283  << Epoch::max_allowable_epoch() << ".";
284  }
285  return newEpoch;
286  }
287 
291  inline int32_t lastCheckpointedEpoch() {
292  return epoch() - (epochIsCheckpointed_ ? 0 : 1);
293  }
294 
298  inline int32_t maxRollbackEpochs() { return maxRollbackEpochs_; }
299 
304  inline size_t getNumReaderThreads() { return num_reader_threads_; }
305 
313  FILE* getFileForFileId(const int32_t fileId);
314 
315  inline size_t getNumChunks() override {
316  mapd_shared_lock<mapd_shared_mutex> read_lock(chunkIndexMutex_);
317  return chunkIndex_.size();
318  }
319  size_t getNumUsedPages() const;
320  size_t getNumUsedMetadataPages() const;
321  size_t getNumUsedMetadataPagesForChunkKey(const ChunkKey& chunkKey) const;
322 
324  // #TM Not sure if we need this below
325  int32_t getDBVersion() const;
326  bool getDBConvert() const;
327  void createTopLevelMetadata(); // create metadata shared by all tables of all DBs
328  inline std::string getFileMgrBasePath() const { return fileMgrBasePath_; }
329  virtual void closeRemovePhysical();
330 
331  void removeTableRelatedDS(const int32_t db_id, const int32_t table_id) override;
332 
333  void free_page(std::pair<FileInfo*, int32_t>&& page);
334  inline virtual bool hasFileMgrKey() const { return true; }
335  const TablePair get_fileMgrKey() const { return fileMgrKey_; }
336 
337  inline boost::filesystem::path getFilePath(const std::string& file_name) {
338  return boost::filesystem::path(fileMgrBasePath_) / file_name;
339  }
340 
341  // Visible for use in unit tests.
342  void writePageMappingsToStatusFile(const std::vector<PageMapping>& page_mappings);
343 
344  // Visible for use in unit tests.
345  void renameCompactionStatusFile(const char* const from_status,
346  const char* const to_status);
347 
348  void compactFiles();
349 
353  virtual bool updatePageIfDeleted(FileInfo* file_info,
354  ChunkKey& chunk_key,
355  int32_t contingent,
356  int32_t page_epoch,
357  int32_t page_num);
358 
362  inline virtual bool failOnReadError() const { return true; }
363 
364  static constexpr size_t DEFAULT_NUM_PAGES_PER_DATA_FILE{256};
365  static constexpr size_t DEFAULT_NUM_PAGES_PER_METADATA_FILE{4096};
366 
367  // Name of files that indicate the different statuses/phases of data compaction.
368  static constexpr char const* COPY_PAGES_STATUS{"pending_data_compaction_0"};
369  static constexpr char const* UPDATE_PAGE_VISIBILITY_STATUS{"pending_data_compaction_1"};
370  static constexpr char const* DELETE_EMPTY_FILES_STATUS{"pending_data_compaction_2"};
371 
372  // Methods that enable override of number of pages per data/metadata file
373  // for use in unit tests.
374  static void setNumPagesPerDataFile(size_t num_pages);
375  static void setNumPagesPerMetadataFile(size_t num_pages);
376 
377  static constexpr char LEGACY_EPOCH_FILENAME[] = "epoch";
378  static constexpr char EPOCH_FILENAME[] = "epoch_metadata";
379  static constexpr char DB_META_FILENAME[] = "dbmeta";
380  static constexpr char FILE_MGR_VERSION_FILENAME[] = "filemgr_version";
381  static constexpr int32_t INVALID_VERSION = -1;
382 
383  protected:
384  // Used to initialize CachingFileMgr.
385  FileMgr();
386 
388  std::string fileMgrBasePath_;
389  std::map<int32_t, FileInfo*>
395  unsigned nextFileId_;
396  int32_t db_version_;
397  int32_t fileMgrVersion_;
399  const int32_t latestFileMgrVersion_{1};
400  FILE* DBMetaFile_ = nullptr;
401  std::mutex getPageMutex_;
404 
406  std::vector<std::pair<FileInfo*, int32_t>> free_pages_;
407  bool isFullyInitted_{false};
408 
411 
428  FileInfo* createFile(const size_t pageSize, const size_t numPages);
429  FileInfo* openExistingFile(const std::string& path,
430  const int32_t fileId,
431  const size_t pageSize,
432  const size_t numPages,
433  std::vector<HeaderInfo>& headerVec);
434  void createEpochFile(const std::string& epochFileName);
435  int32_t openAndReadLegacyEpochFile(const std::string& epochFileName);
436  void openAndReadEpochFile(const std::string& epochFileName);
437  void writeAndSyncEpochToDisk();
438  void setEpoch(const int32_t newEpoch); // resets current value of epoch at startup
439  int32_t readVersionFromDisk(const std::string& versionFileName) const;
440  void writeAndSyncVersionToDisk(const std::string& versionFileName,
441  const int32_t version);
442  void processFileFutures(std::vector<std::future<std::vector<HeaderInfo>>>& file_futures,
443  std::vector<HeaderInfo>& headerVec);
444  virtual FileBuffer* createBufferUnlocked(const ChunkKey& key,
445  size_t pageSize = 0,
446  const size_t numBytes = 0);
447 
448  // Migration functions
449  void migrateToLatestFileMgrVersion();
450  void migrateEpochFileV0();
451 
452  OpenFilesResult openFiles();
453 
454  void clearFileInfos();
455 
456  // Data compaction methods
457  void copySourcePageForCompaction(const Page& source_page,
458  FileInfo* destination_file_info,
459  std::vector<PageMapping>& page_mappings,
460  std::set<Page>& touched_pages);
461  int32_t copyPageWithoutHeaderSize(const Page& source_page,
462  const Page& destination_page);
463  void sortAndCopyFilePagesForCompaction(size_t page_size,
464  std::vector<PageMapping>& page_mappings,
465  std::set<Page>& touched_pages);
466  void updateMappedPagesVisibility(const std::vector<PageMapping>& page_mappings);
467  void deleteEmptyFiles();
468  void resumeFileCompaction(const std::string& status_file_name);
469  std::vector<PageMapping> readPageMappingsFromStatusFile();
470 
471  // For testing purposes only
472  FileMgr(const int epoch);
473 
474  // Used to describe the manager in logging and error messages.
475  virtual std::string describeSelf();
476 
477  void closePhysicalUnlocked();
478  void syncFilesToDisk();
479  void freePages();
480  void initializeNumThreads(size_t num_reader_threads = 0);
481  virtual FileBuffer* allocateBuffer(const size_t page_size,
482  const ChunkKey& key,
483  const size_t num_bytes);
484  void deleteBufferUnlocked(const ChunkKeyToChunkMap::iterator chunk_it,
485  const bool purge = true);
486 
487  private:
488  void rollOffOldData(const int32_t epochCeiling, const bool shouldCheckpoint);
489  void freePagesBeforeEpoch(const int32_t min_epoch);
490  void freePagesBeforeEpochUnlocked(const int32_t min_epoch,
491  const ChunkKeyToChunkMap::iterator lower_bound,
492  const ChunkKeyToChunkMap::iterator upper_bound);
493  FileBuffer* getOrCreateBuffer(const ChunkKey& key);
499  bool coreInit();
500  inline int32_t epoch() const { return static_cast<int32_t>(epoch_.ceiling()); }
501  void writeDirtyBuffers();
502 
505 
507  bool epochIsCheckpointed_ = true;
508  FILE* epochFile_ = nullptr;
509 };
510 
511 } // namespace File_Namespace
DEVICE auto upper_bound(ARGS &&...args)
Definition: gpu_enabled.h:123
virtual int32_t epoch(int32_t db_id, int32_t tb_id) const
Returns current value of epoch - should be one greater than recorded at last checkpoint. Because FileMgr only contains buffers from one table we can just return the FileMgr&#39;s epoch instead of finding a table-specific epoch.
Definition: FileMgr.h:271
std::vector< int > ChunkKey
Definition: types.h:37
TablePair fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:504
mapd_shared_mutex mutex_free_page_
Definition: FileMgr.h:405
virtual bool hasFileMgrKey() const
Definition: FileMgr.h:334
tuple d
Definition: test_fsi.py:9
std::vector< HeaderInfo > header_infos
Definition: FileMgr.h:115
A logical page (Page) belongs to a file on disk.
Definition: Page.h:46
#define LOG(tag)
Definition: Logger.h:194
std::string printSlabs() override
Definition: FileMgr.h:213
size_t getMaxSize() override
Definition: FileMgr.h:215
std::string getFileMgrBasePath() const
Definition: FileMgr.h:328
GlobalFileMgr * gfm_
Definition: FileMgr.h:503
std::mutex getPageMutex_
pointer to DB level metadata
Definition: FileMgr.h:401
Represents/provides access to contiguous data stored in the file system.
Definition: FileBuffer.h:58
static int64_t max_allowable_epoch()
Definition: Epoch.h:69
MgrType getMgrType() override
Definition: FileMgr.h:211
std::string fileMgrBasePath_
Definition: FileMgr.h:388
std::multimap< size_t, int32_t > PageSizeFileMMap
Maps logical page sizes to files.
Definition: FileMgr.h:56
size_t getInUseSize() override
Definition: FileMgr.h:216
static size_t num_pages_per_data_file_
Definition: FileMgr.h:409
int32_t PageHeaderSizeType
Definition: FileMgr.h:121
int32_t db_version_
the index of the next file id
Definition: FileMgr.h:396
PageMapping(int32_t source_file_id, size_t source_page_num, PageHeaderSizeType source_page_header_size, int32_t destination_file_id, size_t destination_page_num)
Definition: FileMgr.h:126
size_t getNumChunks() override
Definition: FileMgr.h:315
int32_t incrementEpoch()
Definition: FileMgr.h:275
void init(LogOptions const &log_opts)
Definition: Logger.cpp:280
std::shared_timed_mutex mapd_shared_mutex
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:323
PageSizeFileMMap fileIndex_
A map of files accessible via a file identifier.
Definition: FileMgr.h:392
std::vector< std::pair< ChunkKey, std::shared_ptr< ChunkMetadata >>> ChunkMetadataVector
std::vector< std::pair< FileInfo *, int32_t > > free_pages_
Definition: FileMgr.h:406
An AbstractBuffer is a unit of data management for a data manager.
size_t num_reader_threads_
Maps page sizes to FileInfo objects.
Definition: FileMgr.h:393
static size_t num_pages_per_metadata_file_
Definition: FileMgr.h:410
std::map< ChunkKey, FileBuffer * > ChunkKeyToChunkMap
Maps ChunkKeys (unique ids for Chunks) to Chunk objects.
Definition: FileMgr.h:80
#define DEFAULT_PAGE_SIZE
std::string compaction_status_file_name
Definition: FileMgr.h:117
FileInfo * getFileInfoForFileId(const int32_t fileId)
Definition: FileMgr.h:220
const TablePair get_fileMgrKey() const
Definition: FileMgr.h:335
string version
Definition: setup.in.py:73
size_t defaultPageSize_
number of threads used when loading data
Definition: FileMgr.h:394
int32_t maxRollbackEpochs_
Definition: FileMgr.h:387
DEVICE auto lower_bound(ARGS &&...args)
Definition: gpu_enabled.h:78
Definition: Epoch.h:30
void checkpoint(const int32_t db_id, const int32_t tb_id) override
Definition: FileMgr.h:262
virtual bool failOnReadError() const
True if a read error should cause a fatal error.
Definition: FileMgr.h:362
std::string getStringMgrType() override
Definition: FileMgr.h:212
void clearSlabs() override
Definition: FileMgr.h:214
size_t getAllocated() override
Definition: FileMgr.h:217
mapd_shared_lock< mapd_shared_mutex > read_lock
int32_t epoch() const
Definition: FileMgr.h:500
std::map< int32_t, FileInfo * > files_
Definition: FileMgr.h:391
int32_t maxRollbackEpochs()
Returns value max_rollback_epochs.
Definition: FileMgr.h:298
std::pair< const int32_t, const int32_t > TablePair
Definition: FileMgr.h:86
int32_t epochFloor() const
Definition: FileMgr.h:273
PageHeaderSizeType source_page_header_size
Definition: FileMgr.h:139
bool isAllocationCapped() override
Definition: FileMgr.h:218
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:402
mapd_shared_mutex files_rw_mutex_
Definition: FileMgr.h:403
int32_t lastCheckpointedEpoch()
Returns value of epoch at last checkpoint.
Definition: FileMgr.h:291
size_t getNumReaderThreads()
Returns number of threads defined by parameter num-reader-threads which should be used during initial...
Definition: FileMgr.h:304
boost::filesystem::path getFilePath(const std::string &file_name)
Definition: FileMgr.h:337