OmniSciDB  c07336695a
FileMgr.h
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
26 #ifndef DATAMGR_MEMORY_FILE_FILEMGR_H
27 #define DATAMGR_MEMORY_FILE_FILEMGR_H
28 
29 #include <future>
30 #include <iostream>
31 #include <map>
32 #include <mutex>
33 #include <set>
34 #include <vector>
35 
36 #include "../AbstractBuffer.h"
37 #include "../AbstractBufferMgr.h"
38 #include "../Shared/mapd_shared_mutex.h"
39 #include "FileBuffer.h"
40 #include "FileInfo.h"
41 #include "Page.h"
42 
43 using namespace Data_Namespace;
44 
45 namespace File_Namespace {
46 
47 class GlobalFileMgr; // forward declaration
56 typedef std::multimap<size_t, int> PageSizeFileMMap;
57 
68 using Chunk = FileBuffer;
69 
80 typedef std::map<ChunkKey, FileBuffer*> ChunkKeyToChunkMap;
81 
86 class FileMgr : public AbstractBufferMgr { // implements
87  friend class GlobalFileMgr;
88 
89  public:
91  FileMgr(const int deviceId,
92  GlobalFileMgr* gfm,
93  const std::pair<const int, const int> fileMgrKey,
94  const size_t num_reader_threads = 0,
95  const int epoch = -1,
96  const size_t defaultPageSize = 2097152);
97 
98  // used only to initialize enough to drop
99  FileMgr(const int deviceId,
100  GlobalFileMgr* gfm,
101  const std::pair<const int, const int> fileMgrKey,
102  const bool initOnly);
103 
104  FileMgr(GlobalFileMgr* gfm, const size_t defaultPageSize, std::string basePath);
105 
107  ~FileMgr() override;
108 
110  AbstractBuffer* createBuffer(const ChunkKey& key,
111  size_t pageSize = 0,
112  const size_t numBytes = 0) override;
113 
114  bool isBufferOnDevice(const ChunkKey& key) override;
116  // Purge == true means delete the data chunks -
117  // can't undelete and revert to previous
118  // state - reclaims disk space for chunk
119  void deleteBuffer(const ChunkKey& key, const bool purge = true) override;
120 
121  void deleteBuffersWithPrefix(const ChunkKey& keyPrefix,
122  const bool purge = true) override;
123 
125  AbstractBuffer* getBuffer(const ChunkKey& key, const size_t numBytes = 0) override;
126 
127  void fetchBuffer(const ChunkKey& key,
128  AbstractBuffer* destBuffer,
129  const size_t numBytes) override;
130 
137  AbstractBuffer* putBuffer(const ChunkKey& key,
138  AbstractBuffer* d,
139  const size_t numBytes = 0) override;
140 
141  // Buffer API
142  AbstractBuffer* alloc(const size_t numBytes) override;
143  void free(AbstractBuffer* buffer) override;
144  // virtual AbstractBuffer* putBuffer(AbstractBuffer *d);
145  Page requestFreePage(size_t pagesize, const bool isMetadata);
146 
147  inline MgrType getMgrType() override { return FILE_MGR; };
148  inline std::string getStringMgrType() override { return ToString(FILE_MGR); }
149  inline std::string printSlabs() override { return "Not Implemented"; }
150  inline void clearSlabs() override { /* noop */
151  }
152  inline size_t getMaxSize() override { return 0; }
153  inline size_t getInUseSize() override { return 0; }
154  inline size_t getAllocated() override { return 0; }
155  inline bool isAllocationCapped() override { return false; }
156 
157  inline FileInfo* getFileInfoForFileId(const int fileId) { return files_[fileId]; }
158 
159  void init(const size_t num_reader_threads);
160  void init(const std::string dataPathToConvertFrom);
161 
162  void copyPage(Page& srcPage,
163  FileMgr* destFileMgr,
164  Page& destPage,
165  const size_t reservedHeaderSize,
166  const size_t numBytes,
167  const size_t offset);
168 
182  void requestFreePages(size_t npages,
183  size_t pagesize,
184  std::vector<Page>& pages,
185  const bool isMetadata);
186 
187  void getChunkMetadataVec(
188  std::vector<std::pair<ChunkKey, ChunkMetadata>>& chunkMetadataVec) override;
189  void getChunkMetadataVecForKeyPrefix(
190  std::vector<std::pair<ChunkKey, ChunkMetadata>>& chunkMetadataVec,
191  const ChunkKey& keyPrefix) override;
192 
198  void checkpoint() override;
199  void checkpoint(const int db_id, const int tb_id) override {
200  LOG(FATAL) << "Operation not supported, api checkpoint() should be used instead";
201  }
206  inline int epoch() { return epoch_; }
207 
212  inline size_t getNumReaderThreads() { return num_reader_threads_; }
213 
221  FILE* getFileForFileId(const int fileId);
222 
223  inline size_t getNumChunks() override {
224  // @todo should be locked - but this is more for testing now
225  return chunkIndex_.size();
226  }
227  ChunkKeyToChunkMap chunkIndex_;
228  // #TM Not sure if we need this below
229  int getDBVersion() const;
230  bool getDBConvert() const;
231  void createTopLevelMetadata(); // create metadata shared by all tables of all DBs
232  std::string getFileMgrBasePath() const { return fileMgrBasePath_; }
233  void closeRemovePhysical();
234 
235  void free_page(std::pair<FileInfo*, int>&& page);
236  const std::pair<const int, const int> get_fileMgrKey() const { return fileMgrKey_; }
237 
238  private:
240  std::pair<const int, const int> fileMgrKey_;
241  std::string fileMgrBasePath_;
242  std::vector<FileInfo*> files_;
244  PageSizeFileMMap fileIndex_;
247  unsigned nextFileId_;
248  int epoch_;
249  FILE* epochFile_;
251  FILE* DBMetaFile_;
253  // bool isDirty_; /// true if metadata changed since last writeState()
254  std::mutex getPageMutex_;
257 
259  std::vector<std::pair<FileInfo*, int>> free_pages;
260 
277  FileInfo* createFile(const size_t pageSize, const size_t numPages);
278  FileInfo* openExistingFile(const std::string& path,
279  const int fileId,
280  const size_t pageSize,
281  const size_t numPages,
282  std::vector<HeaderInfo>& headerVec);
283  void createEpochFile(const std::string& epochFileName);
284  void openEpochFile(const std::string& epochFileName);
285  void writeAndSyncEpochToDisk();
286  void createDBMetaFile(const std::string& DBMetaFileName);
287  bool openDBMetaFile(const std::string& DBMetaFileName);
288  void writeAndSyncDBMetaToDisk();
289  void setEpoch(int epoch); // resets current value of epoch at startup
290  void processFileFutures(std::vector<std::future<std::vector<HeaderInfo>>>& file_futures,
291  std::vector<HeaderInfo>& headerVec);
292 };
293 
294 } // namespace File_Namespace
295 
296 #endif // DATAMGR_MEMORY_FILE_FILEMGR_H
FileInfo * getFileInfoForFileId(const int fileId)
Definition: FileMgr.h:157
void d(const SQLTypes expected_type, const std::string &str)
Definition: ImportTest.cpp:268
A logical page (Page) belongs to a file on disk.
Definition: Page.h:46
#define LOG(tag)
Definition: Logger.h:182
std::string printSlabs() override
Definition: FileMgr.h:149
std::vector< std::pair< FileInfo *, int > > free_pages
Definition: FileMgr.h:259
size_t getMaxSize() override
Definition: FileMgr.h:152
GlobalFileMgr * gfm_
Definition: FileMgr.h:239
std::mutex getPageMutex_
pointer to DB level metadata
Definition: FileMgr.h:254
Represents/provides access to contiguous data stored in the file system.
Definition: FileBuffer.h:55
MgrType getMgrType() override
Definition: FileMgr.h:147
std::string fileMgrBasePath_
Definition: FileMgr.h:241
size_t getInUseSize() override
Definition: FileMgr.h:153
FILE * epochFile_
the current epoch (time of last checkpoint)
Definition: FileMgr.h:249
int epoch()
Returns current value of epoch - should be one greater than recorded at last checkpoint.
Definition: FileMgr.h:206
std::map< ChunkKey, FileBuffer * > ChunkKeyToChunkMap
Maps ChunkKeys (unique ids for Chunks) to Chunk objects.
Definition: FileMgr.h:80
size_t getNumChunks() override
Definition: FileMgr.h:223
void init(LogOptions const &log_opts)
Definition: Logger.cpp:260
std::shared_timed_mutex mapd_shared_mutex
int epoch_
the index of the next file id
Definition: FileMgr.h:248
std::string getFileMgrBasePath() const
Definition: FileMgr.h:232
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:227
PageSizeFileMMap fileIndex_
A vector of files accessible via a file identifier.
Definition: FileMgr.h:244
An AbstractBuffer is a unit of data management for a data manager.
size_t num_reader_threads_
Maps page sizes to FileInfo objects.
Definition: FileMgr.h:245
size_t defaultPageSize_
number of threads used when loading data
Definition: FileMgr.h:246
const std::pair< const int, const int > get_fileMgrKey() const
Definition: FileMgr.h:236
std::string getStringMgrType() override
Definition: FileMgr.h:148
void clearSlabs() override
Definition: FileMgr.h:150
std::multimap< size_t, int > PageSizeFileMMap
Maps logical page sizes to files.
Definition: FileMgr.h:47
size_t getAllocated() override
Definition: FileMgr.h:154
std::vector< int > ChunkKey
Definition: types.h:35
void checkpoint(const int db_id, const int tb_id) override
Definition: FileMgr.h:199
mapd_shared_mutex mutex_free_page
Definition: FileMgr.h:258
bool isAllocationCapped() override
Definition: FileMgr.h:155
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:255
mapd_shared_mutex files_rw_mutex_
Definition: FileMgr.h:256
std::pair< const int, const int > fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:240
size_t getNumReaderThreads()
Returns number of threads defined by parameter num-reader-threads which should be used during initial...
Definition: FileMgr.h:212