OmniSciDB  085a039ca4
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
FileMgr.h
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
26 #pragma once
27 
28 #include <future>
29 #include <iostream>
30 #include <map>
31 #include <mutex>
32 #include <set>
33 #include <vector>
34 
35 #include "DataMgr/AbstractBuffer.h"
37 #include "DataMgr/FileMgr/Epoch.h"
40 #include "DataMgr/FileMgr/Page.h"
43 
44 using namespace Data_Namespace;
45 
46 namespace boost {
47 namespace filesystem {
48 class directory_iterator;
49 }
50 } // namespace boost
51 
52 namespace File_Namespace {
53 class GlobalFileMgr; // forward declaration
62 using PageSizeFileMMap = std::multimap<size_t, int32_t>;
63 
74 using Chunk = FileBuffer;
75 
86 using ChunkKeyToChunkMap = std::map<ChunkKey, FileBuffer*>;
92 using TablePair = std::pair<const int32_t, const int32_t>;
93 
94 struct FileMetadata {
95  int32_t file_id;
96  std::string file_path;
97  size_t page_size;
98  size_t file_size;
99  size_t num_pages;
101 };
102 
103 struct StorageStats {
104  int32_t epoch{0};
105  int32_t epoch_floor{0};
106  uint32_t metadata_file_count{0};
107  uint64_t total_metadata_file_size{0};
108  uint64_t total_metadata_page_count{0};
109  std::optional<uint64_t> total_free_metadata_page_count{};
110  uint32_t data_file_count{0};
111  uint64_t total_data_file_size{0};
112  uint64_t total_data_page_count{0};
113  std::optional<uint64_t> total_free_data_page_count{};
114  std::optional<uint32_t> fragment_count{};
115 
116  StorageStats() = default;
117  StorageStats(const StorageStats& storage_stats) = default;
118  virtual ~StorageStats() = default;
119 };
120 
122  std::vector<HeaderInfo> header_infos;
123  int32_t max_file_id;
125 };
126 
127 // Page header size is serialized/deserialized as an int.
128 using PageHeaderSizeType = int32_t;
129 
130 struct PageMapping {
132 
133  PageMapping(int32_t source_file_id,
134  size_t source_page_num,
135  PageHeaderSizeType source_page_header_size,
136  int32_t destination_file_id,
137  size_t destination_page_num)
138  : source_file_id(source_file_id)
139  , source_page_num(source_page_num)
140  , source_page_header_size(source_page_header_size)
141  , destination_file_id(destination_file_id)
142  , destination_page_num(destination_page_num) {}
143 
144  int32_t source_file_id;
149 };
150 
155 class FileMgr : public AbstractBufferMgr { // implements
156  friend class GlobalFileMgr;
157 
158  public:
160  FileMgr(const int32_t deviceId,
161  GlobalFileMgr* gfm,
162  const TablePair fileMgrKey,
163  const int32_t max_rollback_epochs = -1,
164  const size_t num_reader_threads = 0,
165  const int32_t epoch = -1,
166  const size_t defaultPageSize = DEFAULT_PAGE_SIZE);
167 
168  // used only to initialize enough to drop or to get basic metadata
169  FileMgr(const int32_t deviceId,
170  GlobalFileMgr* gfm,
171  const TablePair fileMgrKey,
172  const size_t defaultPageSize,
173  const bool runCoreInit);
174 
175  FileMgr(GlobalFileMgr* gfm, const size_t defaultPageSize, std::string basePath);
176 
178  ~FileMgr() override;
179 
180  StorageStats getStorageStats() const;
182  FileBuffer* createBuffer(const ChunkKey& key,
183  size_t pageSize = 0,
184  const size_t numBytes = 0) override;
185 
186  bool isBufferOnDevice(const ChunkKey& key) override;
188  // Purge == true means delete the data chunks -
189  // can't undelete and revert to previous
190  // state - reclaims disk space for chunk
191  void deleteBuffer(const ChunkKey& key, const bool purge = true) override;
192  void deleteBuffersWithPrefix(const ChunkKey& keyPrefix,
193  const bool purge = true) override;
194 
196  FileBuffer* getBuffer(const ChunkKey& key, const size_t numBytes = 0) override;
197 
198  void fetchBuffer(const ChunkKey& key,
199  AbstractBuffer* destBuffer,
200  const size_t numBytes) override;
201 
208  FileBuffer* putBuffer(const ChunkKey& key,
209  AbstractBuffer* d,
210  const size_t numBytes = 0) override;
211 
212  // Buffer API
213  AbstractBuffer* alloc(const size_t numBytes) override;
214  void free(AbstractBuffer* buffer) override;
215  virtual Page requestFreePage(size_t pagesize, const bool isMetadata);
216 
217  inline MgrType getMgrType() override { return FILE_MGR; };
218  inline std::string getStringMgrType() override { return ToString(FILE_MGR); }
219  inline std::string printSlabs() override { return "Not Implemented"; }
220  inline size_t getMaxSize() override { return 0; }
221  inline size_t getInUseSize() override { return 0; }
222  inline size_t getAllocated() override { return 0; }
223  inline bool isAllocationCapped() override { return false; }
224 
225  inline FileInfo* getFileInfoForFileId(const int32_t fileId) const {
226  return files_.at(fileId);
227  }
228 
229  FileMetadata getMetadataForFile(
230  const boost::filesystem::directory_iterator& fileIterator) const;
231 
232  void init(const size_t num_reader_threads, const int32_t epochOverride);
233  void init(const std::string& dataPathToConvertFrom, const int32_t epochOverride);
234 
235  void copyPage(Page& srcPage,
236  FileMgr* destFileMgr,
237  Page& destPage,
238  const size_t reservedHeaderSize,
239  const size_t numBytes,
240  const size_t offset);
241 
255  void requestFreePages(size_t npages,
256  size_t pagesize,
257  std::vector<Page>& pages,
258  const bool isMetadata);
259 
260  void getChunkMetadataVecForKeyPrefix(ChunkMetadataVector& chunkMetadataVec,
261  const ChunkKey& keyPrefix) override;
262 
263  bool hasChunkMetadataForKeyPrefix(const ChunkKey& keyPrefix);
264 
270  void checkpoint() override;
271  void checkpoint(const int32_t db_id, const int32_t tb_id) override {
272  LOG(FATAL) << "Operation not supported, api checkpoint() should be used instead";
273  }
280  inline virtual int32_t epoch(int32_t db_id, int32_t tb_id) const { return epoch(); }
281 
282  inline int32_t epochFloor() const { return static_cast<int32_t>(epoch_.floor()); }
283 
284  inline int32_t incrementEpoch() {
285  int32_t newEpoch = epoch_.increment();
286  epochIsCheckpointed_ = false;
287  // We test for error here instead of in Epoch::increment so we can log FileMgr
288  // metadata
289  if (newEpoch > Epoch::max_allowable_epoch()) {
290  LOG(FATAL) << "Epoch for table (" << fileMgrKey_.first << ", " << fileMgrKey_.second
291  << ") greater than maximum allowed value of "
292  << Epoch::max_allowable_epoch() << ".";
293  }
294  return newEpoch;
295  }
296 
300  inline int32_t lastCheckpointedEpoch() const {
301  return epoch() - (epochIsCheckpointed_ ? 0 : 1);
302  }
303 
304  inline void resetEpochFloor() { epoch_.floor(epoch_.ceiling()); }
305 
309  inline int32_t maxRollbackEpochs() { return maxRollbackEpochs_; }
310 
315  inline size_t getNumReaderThreads() { return num_reader_threads_; }
316 
324  FILE* getFileForFileId(const int32_t fileId);
325 
326  size_t getNumChunks() override;
327  size_t getNumUsedMetadataPagesForChunkKey(const ChunkKey& chunkKey) const;
328 
330  // #TM Not sure if we need this below
331  int32_t getDBVersion() const;
332  bool getDBConvert() const;
333  void createTopLevelMetadata(); // create metadata shared by all tables of all DBs
334  inline std::string getFileMgrBasePath() const { return fileMgrBasePath_; }
335  virtual void closeRemovePhysical();
336 
337  void removeTableRelatedDS(const int32_t db_id, const int32_t table_id) override;
338 
339  virtual void free_page(std::pair<FileInfo*, int32_t>&& page);
340  inline virtual bool hasFileMgrKey() const { return true; }
341  const TablePair get_fileMgrKey() const { return fileMgrKey_; }
342 
343  boost::filesystem::path getFilePath(const std::string& file_name) const;
344 
345  // Visible for use in unit tests.
346  void writePageMappingsToStatusFile(const std::vector<PageMapping>& page_mappings);
347 
348  // Visible for use in unit tests.
349  void renameCompactionStatusFile(const char* const from_status,
350  const char* const to_status);
351 
352  void compactFiles();
353 
357  virtual bool updatePageIfDeleted(FileInfo* file_info,
358  ChunkKey& chunk_key,
359  int32_t contingent,
360  int32_t page_epoch,
361  int32_t page_num);
362 
366  inline virtual bool failOnReadError() const { return true; }
367 
368  // Used to describe the manager in logging and error messages.
369  virtual std::string describeSelf() const;
370 
371  static constexpr size_t DEFAULT_NUM_PAGES_PER_DATA_FILE{256};
372  static constexpr size_t DEFAULT_NUM_PAGES_PER_METADATA_FILE{4096};
373 
374  // Name of files that indicate the different statuses/phases of data compaction.
375  static constexpr char const* COPY_PAGES_STATUS{"pending_data_compaction_0"};
376  static constexpr char const* UPDATE_PAGE_VISIBILITY_STATUS{"pending_data_compaction_1"};
377  static constexpr char const* DELETE_EMPTY_FILES_STATUS{"pending_data_compaction_2"};
378 
379  // Methods that enable override of number of pages per data/metadata file
380  // for use in unit tests.
381  static void setNumPagesPerDataFile(size_t num_pages);
382  static void setNumPagesPerMetadataFile(size_t num_pages);
383 
384  static void renameAndSymlinkLegacyFiles(const std::string& table_data_dir);
385 
386  static constexpr char LEGACY_EPOCH_FILENAME[] = "epoch";
387  static constexpr char EPOCH_FILENAME[] = "epoch_metadata";
388  static constexpr char DB_META_FILENAME[] = "dbmeta";
389  static constexpr char FILE_MGR_VERSION_FILENAME[] = "filemgr_version";
390  static constexpr int32_t INVALID_VERSION = -1;
391 
392  protected:
393  // Used to initialize CachingFileMgr.
394  FileMgr();
395 
397  std::string fileMgrBasePath_;
398  std::map<int32_t, FileInfo*>
404  unsigned nextFileId_;
405  int32_t db_version_;
406  int32_t fileMgrVersion_;
408  const int32_t latestFileMgrVersion_{2};
409  FILE* DBMetaFile_ = nullptr;
410  std::mutex getPageMutex_;
413 
415  std::vector<std::pair<FileInfo*, int32_t>> free_pages_;
416  bool isFullyInitted_{false};
417 
420 
437  FileInfo* createFile(const size_t pageSize, const size_t numPages);
438  FileInfo* openExistingFile(const std::string& path,
439  const int32_t fileId,
440  const size_t pageSize,
441  const size_t numPages,
442  std::vector<HeaderInfo>& headerVec);
443  void createEpochFile(const std::string& epochFileName);
444  int32_t openAndReadLegacyEpochFile(const std::string& epochFileName);
445  void openAndReadEpochFile(const std::string& epochFileName);
446  void writeAndSyncEpochToDisk();
447  void setEpoch(const int32_t newEpoch); // resets current value of epoch at startup
448  int32_t readVersionFromDisk(const std::string& versionFileName) const;
449  void writeAndSyncVersionToDisk(const std::string& versionFileName,
450  const int32_t version);
451  void processFileFutures(std::vector<std::future<std::vector<HeaderInfo>>>& file_futures,
452  std::vector<HeaderInfo>& headerVec);
453  virtual FileBuffer* createBufferUnlocked(const ChunkKey& key,
454  size_t pageSize = 0,
455  const size_t numBytes = 0);
456  virtual FileBuffer* createBufferFromHeaders(
457  const ChunkKey& key,
458  const std::vector<HeaderInfo>::const_iterator& headerStartIt,
459  const std::vector<HeaderInfo>::const_iterator& headerEndIt);
460 
461  // Migration functions
462  void migrateToLatestFileMgrVersion();
463  void migrateEpochFileV0();
464  void migrateLegacyFilesV1();
465 
466  OpenFilesResult openFiles();
467 
468  void clearFileInfos();
469 
470  // Data compaction methods
471  void copySourcePageForCompaction(const Page& source_page,
472  FileInfo* destination_file_info,
473  std::vector<PageMapping>& page_mappings,
474  std::set<Page>& touched_pages);
475  int32_t copyPageWithoutHeaderSize(const Page& source_page,
476  const Page& destination_page);
477  void sortAndCopyFilePagesForCompaction(size_t page_size,
478  std::vector<PageMapping>& page_mappings,
479  std::set<Page>& touched_pages);
480  void updateMappedPagesVisibility(const std::vector<PageMapping>& page_mappings);
481  void deleteEmptyFiles();
482  void resumeFileCompaction(const std::string& status_file_name);
483  std::vector<PageMapping> readPageMappingsFromStatusFile();
484 
485  // For testing purposes only
486  FileMgr(const int epoch);
487 
488  void closePhysicalUnlocked();
489  void syncFilesToDisk();
490  void freePages();
491  void initializeNumThreads(size_t num_reader_threads = 0);
492  virtual FileBuffer* allocateBuffer(const size_t page_size,
493  const ChunkKey& key,
494  const size_t num_bytes = 0);
495  virtual FileBuffer* allocateBuffer(
496  const ChunkKey& key,
497  const std::vector<HeaderInfo>::const_iterator& headerStartIt,
498  const std::vector<HeaderInfo>::const_iterator& headerEndIt);
499  virtual ChunkKeyToChunkMap::iterator deleteBufferUnlocked(
500  const ChunkKeyToChunkMap::iterator chunk_it,
501  const bool purge = true);
502  virtual FileBuffer* getBufferUnlocked(const ChunkKey& key,
503  const size_t numBytes = 0) const;
504 
505  private:
506  void rollOffOldData(const int32_t epochCeiling, const bool shouldCheckpoint);
507  void freePagesBeforeEpoch(const int32_t min_epoch);
508  void freePagesBeforeEpochUnlocked(const int32_t min_epoch,
509  const ChunkKeyToChunkMap::iterator lower_bound,
510  const ChunkKeyToChunkMap::iterator upper_bound);
511  FileBuffer* getOrCreateBuffer(const ChunkKey& key);
517  bool coreInit();
518  inline int32_t epoch() const { return static_cast<int32_t>(epoch_.ceiling()); }
519  void writeDirtyBuffers();
520 
521  void setDataAndMetadataFileStats(StorageStats& storage_stats) const;
522  uint32_t getFragmentCount() const;
523 
526 
528  bool epochIsCheckpointed_ = true;
529  FILE* epochFile_ = nullptr;
530 };
531 
532 } // namespace File_Namespace
DEVICE auto upper_bound(ARGS &&...args)
Definition: gpu_enabled.h:123
virtual int32_t epoch(int32_t db_id, int32_t tb_id) const
Returns current value of epoch - should be one greater than recorded at last checkpoint. Because FileMgr only contains buffers from one table we can just return the FileMgr&#39;s epoch instead of finding a table-specific epoch.
Definition: FileMgr.h:280
std::vector< int > ChunkKey
Definition: types.h:37
TablePair fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:525
mapd_shared_mutex mutex_free_page_
Definition: FileMgr.h:414
virtual bool hasFileMgrKey() const
Definition: FileMgr.h:340
std::vector< HeaderInfo > header_infos
Definition: FileMgr.h:122
A logical page (Page) belongs to a file on disk.
Definition: Page.h:46
#define LOG(tag)
Definition: Logger.h:217
std::string printSlabs() override
Definition: FileMgr.h:219
size_t getMaxSize() override
Definition: FileMgr.h:220
std::string getFileMgrBasePath() const
Definition: FileMgr.h:334
GlobalFileMgr * gfm_
Definition: FileMgr.h:524
std::mutex getPageMutex_
pointer to DB level metadata
Definition: FileMgr.h:410
Represents/provides access to contiguous data stored in the file system.
Definition: FileBuffer.h:58
static int64_t max_allowable_epoch()
Definition: Epoch.h:69
MgrType getMgrType() override
Definition: FileMgr.h:217
std::string fileMgrBasePath_
Definition: FileMgr.h:397
std::multimap< size_t, int32_t > PageSizeFileMMap
Maps logical page sizes to files.
Definition: FileMgr.h:62
int32_t lastCheckpointedEpoch() const
Returns value of epoch at last checkpoint.
Definition: FileMgr.h:300
size_t getInUseSize() override
Definition: FileMgr.h:221
static size_t num_pages_per_data_file_
Definition: FileMgr.h:418
int32_t PageHeaderSizeType
Definition: FileMgr.h:128
int32_t db_version_
the index of the next file id
Definition: FileMgr.h:405
PageMapping(int32_t source_file_id, size_t source_page_num, PageHeaderSizeType source_page_header_size, int32_t destination_file_id, size_t destination_page_num)
Definition: FileMgr.h:133
int32_t incrementEpoch()
Definition: FileMgr.h:284
void init(LogOptions const &log_opts)
Definition: Logger.cpp:306
std::shared_timed_mutex mapd_shared_mutex
string version
Definition: setup.in.py:73
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:329
PageSizeFileMMap fileIndex_
A map of files accessible via a file identifier.
Definition: FileMgr.h:401
std::vector< std::pair< ChunkKey, std::shared_ptr< ChunkMetadata >>> ChunkMetadataVector
std::vector< std::pair< FileInfo *, int32_t > > free_pages_
Definition: FileMgr.h:415
An AbstractBuffer is a unit of data management for a data manager.
size_t num_reader_threads_
Maps page sizes to FileInfo objects.
Definition: FileMgr.h:402
static size_t num_pages_per_metadata_file_
Definition: FileMgr.h:419
std::map< ChunkKey, FileBuffer * > ChunkKeyToChunkMap
Maps ChunkKeys (unique ids for Chunks) to Chunk objects.
Definition: FileMgr.h:86
#define DEFAULT_PAGE_SIZE
std::string compaction_status_file_name
Definition: FileMgr.h:124
const TablePair get_fileMgrKey() const
Definition: FileMgr.h:341
size_t defaultPageSize_
number of threads used when loading data
Definition: FileMgr.h:403
int32_t maxRollbackEpochs_
Definition: FileMgr.h:396
DEVICE auto lower_bound(ARGS &&...args)
Definition: gpu_enabled.h:78
Definition: Epoch.h:30
void checkpoint(const int32_t db_id, const int32_t tb_id) override
Definition: FileMgr.h:271
virtual bool failOnReadError() const
True if a read error should cause a fatal error.
Definition: FileMgr.h:366
std::string getStringMgrType() override
Definition: FileMgr.h:218
size_t getAllocated() override
Definition: FileMgr.h:222
int32_t epoch() const
Definition: FileMgr.h:518
std::map< int32_t, FileInfo * > files_
Definition: FileMgr.h:400
FileInfo * getFileInfoForFileId(const int32_t fileId) const
Definition: FileMgr.h:225
int32_t maxRollbackEpochs()
Returns value max_rollback_epochs.
Definition: FileMgr.h:309
std::pair< const int32_t, const int32_t > TablePair
Definition: FileMgr.h:92
int32_t epochFloor() const
Definition: FileMgr.h:282
PageHeaderSizeType source_page_header_size
Definition: FileMgr.h:146
bool isAllocationCapped() override
Definition: FileMgr.h:223
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:411
mapd_shared_mutex files_rw_mutex_
Definition: FileMgr.h:412
size_t getNumReaderThreads()
Returns number of threads defined by parameter num-reader-threads which should be used during initial...
Definition: FileMgr.h:315