OmniSciDB  a47db9e897
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
FileMgr.h
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
26 #pragma once
27 
28 #include <future>
29 #include <iostream>
30 #include <map>
31 #include <mutex>
32 #include <set>
33 #include <vector>
34 
35 #include "DataMgr/AbstractBuffer.h"
39 #include "DataMgr/FileMgr/Page.h"
41 
42 using namespace Data_Namespace;
43 
44 namespace File_Namespace {
45 
46 class GlobalFileMgr; // forward declaration
55 typedef std::multimap<size_t, int> PageSizeFileMMap;
56 
67 using Chunk = FileBuffer;
68 
79 typedef std::map<ChunkKey, FileBuffer*> ChunkKeyToChunkMap;
80 
85 class FileMgr : public AbstractBufferMgr { // implements
86  friend class GlobalFileMgr;
87 
88  public:
90  FileMgr(const int deviceId,
91  GlobalFileMgr* gfm,
92  const std::pair<const int, const int> fileMgrKey,
93  const size_t num_reader_threads = 0,
94  const int epoch = -1,
95  const size_t defaultPageSize = 2097152);
96 
97  // used only to initialize enough to drop
98  FileMgr(const int deviceId,
99  GlobalFileMgr* gfm,
100  const std::pair<const int, const int> fileMgrKey,
101  const bool initOnly);
102 
103  FileMgr(GlobalFileMgr* gfm, const size_t defaultPageSize, std::string basePath);
104 
106  ~FileMgr() override;
107 
109  AbstractBuffer* createBuffer(const ChunkKey& key,
110  size_t pageSize = 0,
111  const size_t numBytes = 0) override;
112 
113  bool isBufferOnDevice(const ChunkKey& key) override;
115  // Purge == true means delete the data chunks -
116  // can't undelete and revert to previous
117  // state - reclaims disk space for chunk
118  void deleteBuffer(const ChunkKey& key, const bool purge = true) override;
119 
120  void deleteBuffersWithPrefix(const ChunkKey& keyPrefix,
121  const bool purge = true) override;
122 
124  AbstractBuffer* getBuffer(const ChunkKey& key, const size_t numBytes = 0) override;
125 
126  void fetchBuffer(const ChunkKey& key,
127  AbstractBuffer* destBuffer,
128  const size_t numBytes) override;
129 
136  AbstractBuffer* putBuffer(const ChunkKey& key,
137  AbstractBuffer* d,
138  const size_t numBytes = 0) override;
139 
140  // Buffer API
141  AbstractBuffer* alloc(const size_t numBytes) override;
142  void free(AbstractBuffer* buffer) override;
143  Page requestFreePage(size_t pagesize, const bool isMetadata);
144 
145  inline MgrType getMgrType() override { return FILE_MGR; };
146  inline std::string getStringMgrType() override { return ToString(FILE_MGR); }
147  inline std::string printSlabs() override { return "Not Implemented"; }
148  inline void clearSlabs() override { /* noop */
149  }
150  inline size_t getMaxSize() override { return 0; }
151  inline size_t getInUseSize() override { return 0; }
152  inline size_t getAllocated() override { return 0; }
153  inline bool isAllocationCapped() override { return false; }
154 
155  inline FileInfo* getFileInfoForFileId(const int fileId) { return files_[fileId]; }
156 
157  void init(const size_t num_reader_threads);
158  void init(const std::string dataPathToConvertFrom);
159 
160  void copyPage(Page& srcPage,
161  FileMgr* destFileMgr,
162  Page& destPage,
163  const size_t reservedHeaderSize,
164  const size_t numBytes,
165  const size_t offset);
166 
180  void requestFreePages(size_t npages,
181  size_t pagesize,
182  std::vector<Page>& pages,
183  const bool isMetadata);
184 
185  void getChunkMetadataVec(
186  std::vector<std::pair<ChunkKey, ChunkMetadata>>& chunkMetadataVec) override;
187  void getChunkMetadataVecForKeyPrefix(
188  std::vector<std::pair<ChunkKey, ChunkMetadata>>& chunkMetadataVec,
189  const ChunkKey& keyPrefix) override;
190 
196  void checkpoint() override;
197  void checkpoint(const int db_id, const int tb_id) override {
198  LOG(FATAL) << "Operation not supported, api checkpoint() should be used instead";
199  }
204  inline int epoch() { return epoch_; }
205 
210  inline size_t getNumReaderThreads() { return num_reader_threads_; }
211 
219  FILE* getFileForFileId(const int fileId);
220 
221  inline size_t getNumChunks() override {
222  // @todo should be locked - but this is more for testing now
223  return chunkIndex_.size();
224  }
226  // #TM Not sure if we need this below
227  int getDBVersion() const;
228  bool getDBConvert() const;
229  void createTopLevelMetadata(); // create metadata shared by all tables of all DBs
230  std::string getFileMgrBasePath() const { return fileMgrBasePath_; }
231  void closeRemovePhysical();
232 
233  void free_page(std::pair<FileInfo*, int>&& page);
234  const std::pair<const int, const int> get_fileMgrKey() const { return fileMgrKey_; }
235 
236  private:
238  std::pair<const int, const int> fileMgrKey_;
239  std::string fileMgrBasePath_;
240  std::vector<FileInfo*> files_;
245  unsigned nextFileId_;
246  int epoch_;
247  FILE* epochFile_;
249  FILE* DBMetaFile_;
251  // bool isDirty_; /// true if metadata changed since last writeState()
252  std::mutex getPageMutex_;
255 
257  std::vector<std::pair<FileInfo*, int>> free_pages;
258 
275  FileInfo* createFile(const size_t pageSize, const size_t numPages);
276  FileInfo* openExistingFile(const std::string& path,
277  const int fileId,
278  const size_t pageSize,
279  const size_t numPages,
280  std::vector<HeaderInfo>& headerVec);
281  void createEpochFile(const std::string& epochFileName);
282  void openEpochFile(const std::string& epochFileName);
283  void writeAndSyncEpochToDisk();
284  void createDBMetaFile(const std::string& DBMetaFileName);
285  bool openDBMetaFile(const std::string& DBMetaFileName);
286  void writeAndSyncDBMetaToDisk();
287  void setEpoch(int epoch); // resets current value of epoch at startup
288  void processFileFutures(std::vector<std::future<std::vector<HeaderInfo>>>& file_futures,
289  std::vector<HeaderInfo>& headerVec);
290 };
291 
292 } // namespace File_Namespace
std::vector< int > ChunkKey
Definition: types.h:35
FileInfo * getFileInfoForFileId(const int fileId)
Definition: FileMgr.h:155
A logical page (Page) belongs to a file on disk.
Definition: Page.h:46
#define LOG(tag)
Definition: Logger.h:185
std::string printSlabs() override
Definition: FileMgr.h:147
std::vector< std::pair< FileInfo *, int > > free_pages
Definition: FileMgr.h:257
size_t getMaxSize() override
Definition: FileMgr.h:150
std::string getFileMgrBasePath() const
Definition: FileMgr.h:230
GlobalFileMgr * gfm_
Definition: FileMgr.h:237
std::mutex getPageMutex_
pointer to DB level metadata
Definition: FileMgr.h:252
Represents/provides access to contiguous data stored in the file system.
Definition: FileBuffer.h:55
MgrType getMgrType() override
Definition: FileMgr.h:145
std::string fileMgrBasePath_
Definition: FileMgr.h:239
size_t getInUseSize() override
Definition: FileMgr.h:151
FILE * epochFile_
the current epoch (time of last checkpoint)
Definition: FileMgr.h:247
int epoch()
Returns current value of epoch - should be one greater than recorded at last checkpoint.
Definition: FileMgr.h:204
std::map< ChunkKey, FileBuffer * > ChunkKeyToChunkMap
Maps ChunkKeys (unique ids for Chunks) to Chunk objects.
Definition: FileMgr.h:79
size_t getNumChunks() override
Definition: FileMgr.h:221
void init(LogOptions const &log_opts)
Definition: Logger.cpp:265
std::shared_timed_mutex mapd_shared_mutex
int epoch_
the index of the next file id
Definition: FileMgr.h:246
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:225
PageSizeFileMMap fileIndex_
A vector of files accessible via a file identifier.
Definition: FileMgr.h:242
An AbstractBuffer is a unit of data management for a data manager.
size_t num_reader_threads_
Maps page sizes to FileInfo objects.
Definition: FileMgr.h:243
size_t defaultPageSize_
number of threads used when loading data
Definition: FileMgr.h:244
std::string getStringMgrType() override
Definition: FileMgr.h:146
void clearSlabs() override
Definition: FileMgr.h:148
std::multimap< size_t, int > PageSizeFileMMap
Maps logical page sizes to files.
Definition: FileMgr.h:46
size_t getAllocated() override
Definition: FileMgr.h:152
void checkpoint(const int db_id, const int tb_id) override
Definition: FileMgr.h:197
mapd_shared_mutex mutex_free_page
Definition: FileMgr.h:256
const std::pair< const int, const int > get_fileMgrKey() const
Definition: FileMgr.h:234
bool isAllocationCapped() override
Definition: FileMgr.h:153
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:253
mapd_shared_mutex files_rw_mutex_
Definition: FileMgr.h:254
std::pair< const int, const int > fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:238
size_t getNumReaderThreads()
Returns number of threads defined by parameter num-reader-threads which should be used during initial...
Definition: FileMgr.h:210