OmniSciDB  6686921089
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
FileMgr.h
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
26 #pragma once
27 
28 #include <future>
29 #include <iostream>
30 #include <map>
31 #include <mutex>
32 #include <set>
33 #include <vector>
34 
35 #include "DataMgr/AbstractBuffer.h"
37 #include "DataMgr/FileMgr/Epoch.h"
40 #include "DataMgr/FileMgr/Page.h"
43 
44 using namespace Data_Namespace;
45 
46 namespace File_Namespace {
47 class GlobalFileMgr; // forward declaration
56 using PageSizeFileMMap = std::multimap<size_t, int32_t>;
57 
68 using Chunk = FileBuffer;
69 
80 using ChunkKeyToChunkMap = std::map<ChunkKey, FileBuffer*>;
86 using TablePair = std::pair<const int32_t, const int32_t>;
87 
88 struct FileMetadata {
89  int32_t file_id;
90  std::string file_path;
91  size_t page_size;
92  size_t file_size;
93  size_t num_pages;
95 };
96 
97 struct StorageStats {
98  int32_t epoch{0};
99  int32_t epoch_floor{0};
100  uint64_t metadata_file_count{0};
101  uint64_t total_metadata_file_size{0};
102  uint64_t total_metadata_page_count{0};
103  std::optional<uint64_t> total_free_metadata_page_count{};
104  uint64_t data_file_count{0};
105  uint64_t total_data_file_size{0};
106  uint64_t total_data_page_count{0};
107  std::optional<uint64_t> total_free_data_page_count{};
108 
109  StorageStats() = default;
110  StorageStats(const StorageStats& storage_stats) = default;
111  virtual ~StorageStats() = default;
112 };
113 
115  std::vector<HeaderInfo> header_infos;
116  int32_t max_file_id;
118 };
119 
120 // Page header size is serialized/deserialized as an int.
121 using PageHeaderSizeType = int32_t;
122 
123 struct PageMapping {
125 
126  PageMapping(int32_t source_file_id,
127  size_t source_page_num,
128  PageHeaderSizeType source_page_header_size,
129  int32_t destination_file_id,
130  size_t destination_page_num)
131  : source_file_id(source_file_id)
132  , source_page_num(source_page_num)
133  , source_page_header_size(source_page_header_size)
134  , destination_file_id(destination_file_id)
135  , destination_page_num(destination_page_num) {}
136 
137  int32_t source_file_id;
142 };
143 
148 class FileMgr : public AbstractBufferMgr { // implements
149  friend class GlobalFileMgr;
150 
151  public:
153  FileMgr(const int32_t deviceId,
154  GlobalFileMgr* gfm,
155  const TablePair fileMgrKey,
156  const int32_t max_rollback_epochs = -1,
157  const size_t num_reader_threads = 0,
158  const int32_t epoch = -1,
159  const size_t defaultPageSize = DEFAULT_PAGE_SIZE);
160 
161  // used only to initialize enough to drop or to get basic metadata
162  FileMgr(const int32_t deviceId,
163  GlobalFileMgr* gfm,
164  const TablePair fileMgrKey,
165  const size_t defaultPageSize,
166  const bool runCoreInit);
167 
168  FileMgr(GlobalFileMgr* gfm, const size_t defaultPageSize, std::string basePath);
169 
171  ~FileMgr() override;
172 
173  StorageStats getStorageStats();
175  FileBuffer* createBuffer(const ChunkKey& key,
176  size_t pageSize = 0,
177  const size_t numBytes = 0) override;
178 
179  bool isBufferOnDevice(const ChunkKey& key) override;
181  // Purge == true means delete the data chunks -
182  // can't undelete and revert to previous
183  // state - reclaims disk space for chunk
184  void deleteBuffer(const ChunkKey& key, const bool purge = true) override;
185  void deleteBuffersWithPrefix(const ChunkKey& keyPrefix,
186  const bool purge = true) override;
187 
189  FileBuffer* getBuffer(const ChunkKey& key, const size_t numBytes = 0) override;
190 
191  void fetchBuffer(const ChunkKey& key,
192  AbstractBuffer* destBuffer,
193  const size_t numBytes) override;
194 
201  FileBuffer* putBuffer(const ChunkKey& key,
202  AbstractBuffer* d,
203  const size_t numBytes = 0) override;
204 
205  // Buffer API
206  AbstractBuffer* alloc(const size_t numBytes) override;
207  void free(AbstractBuffer* buffer) override;
208  virtual Page requestFreePage(size_t pagesize, const bool isMetadata);
209 
210  inline MgrType getMgrType() override { return FILE_MGR; };
211  inline std::string getStringMgrType() override { return ToString(FILE_MGR); }
212  inline std::string printSlabs() override { return "Not Implemented"; }
213  inline size_t getMaxSize() override { return 0; }
214  inline size_t getInUseSize() override { return 0; }
215  inline size_t getAllocated() override { return 0; }
216  inline bool isAllocationCapped() override { return false; }
217 
218  inline FileInfo* getFileInfoForFileId(const int32_t fileId) const {
219  return files_.at(fileId);
220  }
221 
222  FileMetadata getMetadataForFile(
223  const boost::filesystem::directory_iterator& fileIterator);
224 
225  void init(const size_t num_reader_threads, const int32_t epochOverride);
226  void init(const std::string& dataPathToConvertFrom, const int32_t epochOverride);
227 
228  void copyPage(Page& srcPage,
229  FileMgr* destFileMgr,
230  Page& destPage,
231  const size_t reservedHeaderSize,
232  const size_t numBytes,
233  const size_t offset);
234 
248  void requestFreePages(size_t npages,
249  size_t pagesize,
250  std::vector<Page>& pages,
251  const bool isMetadata);
252 
253  void getChunkMetadataVecForKeyPrefix(ChunkMetadataVector& chunkMetadataVec,
254  const ChunkKey& keyPrefix) override;
255 
261  void checkpoint() override;
262  void checkpoint(const int32_t db_id, const int32_t tb_id) override {
263  LOG(FATAL) << "Operation not supported, api checkpoint() should be used instead";
264  }
271  inline virtual int32_t epoch(int32_t db_id, int32_t tb_id) const { return epoch(); }
272 
273  inline int32_t epochFloor() const { return static_cast<int32_t>(epoch_.floor()); }
274 
275  inline int32_t incrementEpoch() {
276  int32_t newEpoch = epoch_.increment();
277  epochIsCheckpointed_ = false;
278  // We test for error here instead of in Epoch::increment so we can log FileMgr
279  // metadata
280  if (newEpoch > Epoch::max_allowable_epoch()) {
281  LOG(FATAL) << "Epoch for table (" << fileMgrKey_.first << ", " << fileMgrKey_.second
282  << ") greater than maximum allowed value of "
283  << Epoch::max_allowable_epoch() << ".";
284  }
285  return newEpoch;
286  }
287 
291  inline int32_t lastCheckpointedEpoch() {
292  return epoch() - (epochIsCheckpointed_ ? 0 : 1);
293  }
294 
298  inline int32_t maxRollbackEpochs() { return maxRollbackEpochs_; }
299 
304  inline size_t getNumReaderThreads() { return num_reader_threads_; }
305 
313  FILE* getFileForFileId(const int32_t fileId);
314 
315  size_t getNumChunks() override;
316  size_t getNumUsedMetadataPagesForChunkKey(const ChunkKey& chunkKey) const;
317 
319  // #TM Not sure if we need this below
320  int32_t getDBVersion() const;
321  bool getDBConvert() const;
322  void createTopLevelMetadata(); // create metadata shared by all tables of all DBs
323  inline std::string getFileMgrBasePath() const { return fileMgrBasePath_; }
324  virtual void closeRemovePhysical();
325 
326  void removeTableRelatedDS(const int32_t db_id, const int32_t table_id) override;
327 
328  virtual void free_page(std::pair<FileInfo*, int32_t>&& page);
329  inline virtual bool hasFileMgrKey() const { return true; }
330  const TablePair get_fileMgrKey() const { return fileMgrKey_; }
331 
332  inline boost::filesystem::path getFilePath(const std::string& file_name) {
333  return boost::filesystem::path(fileMgrBasePath_) / file_name;
334  }
335 
336  // Visible for use in unit tests.
337  void writePageMappingsToStatusFile(const std::vector<PageMapping>& page_mappings);
338 
339  // Visible for use in unit tests.
340  void renameCompactionStatusFile(const char* const from_status,
341  const char* const to_status);
342 
343  void compactFiles();
344 
348  virtual bool updatePageIfDeleted(FileInfo* file_info,
349  ChunkKey& chunk_key,
350  int32_t contingent,
351  int32_t page_epoch,
352  int32_t page_num);
353 
357  inline virtual bool failOnReadError() const { return true; }
358 
359  // Used to describe the manager in logging and error messages.
360  virtual std::string describeSelf() const;
361 
362  static constexpr size_t DEFAULT_NUM_PAGES_PER_DATA_FILE{256};
363  static constexpr size_t DEFAULT_NUM_PAGES_PER_METADATA_FILE{4096};
364 
365  // Name of files that indicate the different statuses/phases of data compaction.
366  static constexpr char const* COPY_PAGES_STATUS{"pending_data_compaction_0"};
367  static constexpr char const* UPDATE_PAGE_VISIBILITY_STATUS{"pending_data_compaction_1"};
368  static constexpr char const* DELETE_EMPTY_FILES_STATUS{"pending_data_compaction_2"};
369 
370  // Methods that enable override of number of pages per data/metadata file
371  // for use in unit tests.
372  static void setNumPagesPerDataFile(size_t num_pages);
373  static void setNumPagesPerMetadataFile(size_t num_pages);
374 
375  static constexpr char LEGACY_EPOCH_FILENAME[] = "epoch";
376  static constexpr char EPOCH_FILENAME[] = "epoch_metadata";
377  static constexpr char DB_META_FILENAME[] = "dbmeta";
378  static constexpr char FILE_MGR_VERSION_FILENAME[] = "filemgr_version";
379  static constexpr int32_t INVALID_VERSION = -1;
380 
381  protected:
382  // Used to initialize CachingFileMgr.
383  FileMgr();
384 
386  std::string fileMgrBasePath_;
387  std::map<int32_t, FileInfo*>
393  unsigned nextFileId_;
394  int32_t db_version_;
395  int32_t fileMgrVersion_;
397  const int32_t latestFileMgrVersion_{1};
398  FILE* DBMetaFile_ = nullptr;
399  std::mutex getPageMutex_;
402 
404  std::vector<std::pair<FileInfo*, int32_t>> free_pages_;
405  bool isFullyInitted_{false};
406 
409 
426  FileInfo* createFile(const size_t pageSize, const size_t numPages);
427  FileInfo* openExistingFile(const std::string& path,
428  const int32_t fileId,
429  const size_t pageSize,
430  const size_t numPages,
431  std::vector<HeaderInfo>& headerVec);
432  void createEpochFile(const std::string& epochFileName);
433  int32_t openAndReadLegacyEpochFile(const std::string& epochFileName);
434  void openAndReadEpochFile(const std::string& epochFileName);
435  void writeAndSyncEpochToDisk();
436  void setEpoch(const int32_t newEpoch); // resets current value of epoch at startup
437  int32_t readVersionFromDisk(const std::string& versionFileName) const;
438  void writeAndSyncVersionToDisk(const std::string& versionFileName,
439  const int32_t version);
440  void processFileFutures(std::vector<std::future<std::vector<HeaderInfo>>>& file_futures,
441  std::vector<HeaderInfo>& headerVec);
442  virtual FileBuffer* createBufferUnlocked(const ChunkKey& key,
443  size_t pageSize = 0,
444  const size_t numBytes = 0);
445  virtual FileBuffer* createBufferFromHeaders(
446  const ChunkKey& key,
447  const std::vector<HeaderInfo>::const_iterator& headerStartIt,
448  const std::vector<HeaderInfo>::const_iterator& headerEndIt);
449 
450  // Migration functions
451  void migrateToLatestFileMgrVersion();
452  void migrateEpochFileV0();
453 
454  OpenFilesResult openFiles();
455 
456  void clearFileInfos();
457 
458  // Data compaction methods
459  void copySourcePageForCompaction(const Page& source_page,
460  FileInfo* destination_file_info,
461  std::vector<PageMapping>& page_mappings,
462  std::set<Page>& touched_pages);
463  int32_t copyPageWithoutHeaderSize(const Page& source_page,
464  const Page& destination_page);
465  void sortAndCopyFilePagesForCompaction(size_t page_size,
466  std::vector<PageMapping>& page_mappings,
467  std::set<Page>& touched_pages);
468  void updateMappedPagesVisibility(const std::vector<PageMapping>& page_mappings);
469  void deleteEmptyFiles();
470  void resumeFileCompaction(const std::string& status_file_name);
471  std::vector<PageMapping> readPageMappingsFromStatusFile();
472 
473  // For testing purposes only
474  FileMgr(const int epoch);
475 
476  void closePhysicalUnlocked();
477  void syncFilesToDisk();
478  void freePages();
479  void initializeNumThreads(size_t num_reader_threads = 0);
480  virtual FileBuffer* allocateBuffer(const size_t page_size,
481  const ChunkKey& key,
482  const size_t num_bytes = 0);
483  virtual FileBuffer* allocateBuffer(
484  const ChunkKey& key,
485  const std::vector<HeaderInfo>::const_iterator& headerStartIt,
486  const std::vector<HeaderInfo>::const_iterator& headerEndIt);
487  virtual ChunkKeyToChunkMap::iterator deleteBufferUnlocked(
488  const ChunkKeyToChunkMap::iterator chunk_it,
489  const bool purge = true);
490  virtual FileBuffer* getBufferUnlocked(const ChunkKeyToChunkMap::iterator chunk_it,
491  const size_t numBytes = 0);
492 
493  private:
494  void rollOffOldData(const int32_t epochCeiling, const bool shouldCheckpoint);
495  void freePagesBeforeEpoch(const int32_t min_epoch);
496  void freePagesBeforeEpochUnlocked(const int32_t min_epoch,
497  const ChunkKeyToChunkMap::iterator lower_bound,
498  const ChunkKeyToChunkMap::iterator upper_bound);
499  FileBuffer* getOrCreateBuffer(const ChunkKey& key);
505  bool coreInit();
506  inline int32_t epoch() const { return static_cast<int32_t>(epoch_.ceiling()); }
507  void writeDirtyBuffers();
508 
511 
513  bool epochIsCheckpointed_ = true;
514  FILE* epochFile_ = nullptr;
515 };
516 
517 } // namespace File_Namespace
DEVICE auto upper_bound(ARGS &&...args)
Definition: gpu_enabled.h:123
virtual int32_t epoch(int32_t db_id, int32_t tb_id) const
Returns current value of epoch - should be one greater than recorded at last checkpoint. Because FileMgr only contains buffers from one table we can just return the FileMgr&#39;s epoch instead of finding a table-specific epoch.
Definition: FileMgr.h:271
std::vector< int > ChunkKey
Definition: types.h:37
TablePair fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:510
mapd_shared_mutex mutex_free_page_
Definition: FileMgr.h:403
virtual bool hasFileMgrKey() const
Definition: FileMgr.h:329
std::vector< HeaderInfo > header_infos
Definition: FileMgr.h:115
A logical page (Page) belongs to a file on disk.
Definition: Page.h:46
#define LOG(tag)
Definition: Logger.h:203
std::string printSlabs() override
Definition: FileMgr.h:212
size_t getMaxSize() override
Definition: FileMgr.h:213
std::string getFileMgrBasePath() const
Definition: FileMgr.h:323
GlobalFileMgr * gfm_
Definition: FileMgr.h:509
std::mutex getPageMutex_
pointer to DB level metadata
Definition: FileMgr.h:399
Represents/provides access to contiguous data stored in the file system.
Definition: FileBuffer.h:58
static int64_t max_allowable_epoch()
Definition: Epoch.h:69
MgrType getMgrType() override
Definition: FileMgr.h:210
std::string fileMgrBasePath_
Definition: FileMgr.h:386
std::multimap< size_t, int32_t > PageSizeFileMMap
Maps logical page sizes to files.
Definition: FileMgr.h:56
size_t getInUseSize() override
Definition: FileMgr.h:214
static size_t num_pages_per_data_file_
Definition: FileMgr.h:407
int32_t PageHeaderSizeType
Definition: FileMgr.h:121
int32_t db_version_
the index of the next file id
Definition: FileMgr.h:394
PageMapping(int32_t source_file_id, size_t source_page_num, PageHeaderSizeType source_page_header_size, int32_t destination_file_id, size_t destination_page_num)
Definition: FileMgr.h:126
int32_t incrementEpoch()
Definition: FileMgr.h:275
void init(LogOptions const &log_opts)
Definition: Logger.cpp:290
std::shared_timed_mutex mapd_shared_mutex
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:318
PageSizeFileMMap fileIndex_
A map of files accessible via a file identifier.
Definition: FileMgr.h:390
std::vector< std::pair< ChunkKey, std::shared_ptr< ChunkMetadata >>> ChunkMetadataVector
std::vector< std::pair< FileInfo *, int32_t > > free_pages_
Definition: FileMgr.h:404
An AbstractBuffer is a unit of data management for a data manager.
size_t num_reader_threads_
Maps page sizes to FileInfo objects.
Definition: FileMgr.h:391
static size_t num_pages_per_metadata_file_
Definition: FileMgr.h:408
std::map< ChunkKey, FileBuffer * > ChunkKeyToChunkMap
Maps ChunkKeys (unique ids for Chunks) to Chunk objects.
Definition: FileMgr.h:80
#define DEFAULT_PAGE_SIZE
std::string compaction_status_file_name
Definition: FileMgr.h:117
const TablePair get_fileMgrKey() const
Definition: FileMgr.h:330
string version
Definition: setup.in.py:73
size_t defaultPageSize_
number of threads used when loading data
Definition: FileMgr.h:392
int32_t maxRollbackEpochs_
Definition: FileMgr.h:385
DEVICE auto lower_bound(ARGS &&...args)
Definition: gpu_enabled.h:78
Definition: Epoch.h:30
void checkpoint(const int32_t db_id, const int32_t tb_id) override
Definition: FileMgr.h:262
virtual bool failOnReadError() const
True if a read error should cause a fatal error.
Definition: FileMgr.h:357
std::string getStringMgrType() override
Definition: FileMgr.h:211
size_t getAllocated() override
Definition: FileMgr.h:215
int32_t epoch() const
Definition: FileMgr.h:506
std::map< int32_t, FileInfo * > files_
Definition: FileMgr.h:389
FileInfo * getFileInfoForFileId(const int32_t fileId) const
Definition: FileMgr.h:218
int32_t maxRollbackEpochs()
Returns value max_rollback_epochs.
Definition: FileMgr.h:298
std::pair< const int32_t, const int32_t > TablePair
Definition: FileMgr.h:86
int32_t epochFloor() const
Definition: FileMgr.h:273
PageHeaderSizeType source_page_header_size
Definition: FileMgr.h:139
bool isAllocationCapped() override
Definition: FileMgr.h:216
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:400
mapd_shared_mutex files_rw_mutex_
Definition: FileMgr.h:401
int32_t lastCheckpointedEpoch()
Returns value of epoch at last checkpoint.
Definition: FileMgr.h:291
size_t getNumReaderThreads()
Returns number of threads defined by parameter num-reader-threads which should be used during initial...
Definition: FileMgr.h:304
boost::filesystem::path getFilePath(const std::string &file_name)
Definition: FileMgr.h:332