OmniSciDB  c1a53651b2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
GlobalFileMgr.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
25 #ifndef DATAMGR_MEMORY_FILE_GLOBAL_FILEMGR_H
26 #define DATAMGR_MEMORY_FILE_GLOBAL_FILEMGR_H
27 
28 #include <iostream>
29 #include <map>
30 #include <mutex>
31 #include <set>
32 #include "../Shared/heavyai_shared_mutex.h"
33 
34 #include "../AbstractBuffer.h"
35 #include "../AbstractBufferMgr.h"
36 #include "FileMgr.h"
37 
39 
40 using namespace Data_Namespace;
41 
42 namespace File_Namespace {
43 
44 struct FileMgrParams {
45  FileMgrParams() : epoch(-1), max_rollback_epochs(-1) {}
46  int32_t epoch;
48 };
49 
54 class GlobalFileMgr : public AbstractBufferMgr { // implements
55 
56  public:
58  GlobalFileMgr(const int32_t device_id,
59  std::shared_ptr<ForeignStorageInterface> fsi,
60  std::string base_path = ".",
61  const size_t num_reader_threads = 0,
62  const size_t page_size = DEFAULT_PAGE_SIZE,
63  const size_t metadata_page_size = DEFAULT_METADATA_PAGE_SIZE);
64 
65  ~GlobalFileMgr() override {}
66 
69  size_t pageSize = 0,
70  const size_t numBytes = 0) override {
71  return getFileMgr(key)->createBuffer(key, pageSize, numBytes);
72  }
73 
74  bool isBufferOnDevice(const ChunkKey& key) override {
75  return getFileMgr(key)->isBufferOnDevice(key);
76  }
77 
79  // Purge == true means delete the data chunks -
80  // can't undelete and revert to previous
81  // state - reclaims disk space for chunk
82  void deleteBuffer(const ChunkKey& key, const bool purge = true) override {
83  return getFileMgr(key)->deleteBuffer(key, purge);
84  }
85 
86  void deleteBuffersWithPrefix(const ChunkKey& keyPrefix,
87  const bool purge = true) override;
88 
90  AbstractBuffer* getBuffer(const ChunkKey& key, const size_t numBytes = 0) override {
91  return getFileMgr(key)->getBuffer(key, numBytes);
92  }
93 
94  void fetchBuffer(const ChunkKey& key,
95  AbstractBuffer* destBuffer,
96  const size_t numBytes) override {
97  return getFileMgr(key)->fetchBuffer(key, destBuffer, numBytes);
98  }
99 
107  AbstractBuffer* d,
108  const size_t numBytes = 0) override {
109  return getFileMgr(key)->putBuffer(key, d, numBytes);
110  }
111 
112  // Buffer API
113  AbstractBuffer* alloc(const size_t numBytes) override {
114  LOG(FATAL) << "Operation not supported";
115  return nullptr; // satisfy return-type warning
116  }
117 
118  void free(AbstractBuffer* buffer) override { LOG(FATAL) << "Operation not supported"; }
119 
120  inline MgrType getMgrType() override { return GLOBAL_FILE_MGR; };
121  inline std::string getStringMgrType() override { return ToString(GLOBAL_FILE_MGR); }
122  inline std::string printSlabs() override { return "Not Implemented"; }
123  inline size_t getMaxSize() override { return 0; }
124  inline size_t getInUseSize() override { return 0; }
125  inline size_t getAllocated() override { return 0; }
126  inline bool isAllocationCapped() override { return false; }
127 
128  void init();
129 
131  const ChunkKey& keyPrefix) override {
132  return getFileMgr(keyPrefix)->getChunkMetadataVecForKeyPrefix(chunkMetadataVec,
133  keyPrefix);
134  }
135 
140  void checkpoint() override;
141  void checkpoint(const int32_t db_id, const int32_t tb_id) override;
142 
147  inline size_t getNumReaderThreads() { return num_reader_threads_; }
148 
149  size_t getNumChunks() override;
150 
151  void compactDataFiles(const int32_t db_id, const int32_t tb_id);
152 
153  private:
154  AbstractBufferMgr* findFileMgrUnlocked(const int32_t db_id, const int32_t tb_id);
155  void deleteFileMgr(const int32_t db_id, const int32_t tb_id);
156 
157  public:
158  AbstractBufferMgr* findFileMgr(const int32_t db_id, const int32_t tb_id) {
160  return findFileMgrUnlocked(db_id, tb_id);
161  }
162  void setFileMgrParams(const int32_t db_id,
163  const int32_t tb_id,
164  const FileMgrParams& file_mgr_params);
165  AbstractBufferMgr* getFileMgr(const int32_t db_id, const int32_t tb_id);
166  AbstractBufferMgr* getFileMgr(const ChunkKey& key) {
167  return getFileMgr(key[0], key[1]);
168  }
169 
170  std::string getBasePath() const { return basePath_; }
171  size_t getPageSize() const { return page_size_; }
172  size_t getMetadataPageSize() const { return metadata_page_size_; }
173 
174  void writeFileMgrData(FileMgr* fileMgr = 0);
175 
176  inline int32_t getDBVersion() const { return omnisci_db_version_; }
177  inline bool getDBConvert() const { return dbConvert_; }
178  inline void setDBConvert(bool val) { dbConvert_ = val; }
179 
180  void removeTableRelatedDS(const int32_t db_id, const int32_t tb_id) override;
181  void setTableEpoch(const int32_t db_id, const int32_t tb_id, const int32_t start_epoch);
182  size_t getTableEpoch(const int32_t db_id, const int32_t tb_id);
183  void resetTableEpochFloor(const int32_t db_id, const int32_t tb_id);
184  StorageStats getStorageStats(const int32_t db_id, const int32_t tb_id);
185 
186  // For testing purposes only
187  std::shared_ptr<FileMgr> getSharedFileMgr(const int db_id, const int table_id);
188 
189  // For testing purposes only
190  void setFileMgr(const int db_id, const int table_id, std::shared_ptr<FileMgr> file_mgr);
191  void closeFileMgr(const int32_t db_id,
192  const int32_t tb_id); // A locked public wrapper for deleteFileMgr,
193  // for now for unit testing
194  protected:
195  std::shared_ptr<ForeignStorageInterface> fsi_;
196 
197  private:
198  bool existsDiffBetweenFileMgrParamsAndFileMgr(
199  FileMgr* file_mgr,
200  const FileMgrParams& file_mgr_params) const;
201  std::string basePath_;
203  int32_t
204  epoch_; /* the current epoch (time of last checkpoint) will be used for all
205  * tables except of the one for which the value of the epoch has been reset
206  * using --start-epoch option at start up to rollback this table's updates.
207  */
208  const size_t page_size_;
209  const size_t metadata_page_size_;
210  // bool isDirty_; /// true if metadata changed since last writeState()
211 
213  /* In future omnisci_db_version_ may be added to AbstractBufferMgr class.
215  * This will allow support of different dbVersions for different tables, so
216  * original tables can be generated by different versions of mapd software.
217  */
218  bool dbConvert_;
219 
221  std::map<TablePair, std::shared_ptr<FileMgr>> ownedFileMgrs_;
222  std::map<TablePair, AbstractBufferMgr*> allFileMgrs_;
223  std::map<TablePair, int32_t> max_rollback_epochs_per_table_;
224  std::map<TablePair, StorageStats> lazy_initialized_stats_;
225 
227 };
228 
229 } // namespace File_Namespace
230 
231 #endif // DATAMGR_MEMORY_FILE_GLOBAL_FILEMGR_H
AbstractBuffer * putBuffer(const ChunkKey &key, AbstractBuffer *d, const size_t numBytes=0) override
Puts the contents of d into the Chunk with the given key.
std::vector< int > ChunkKey
Definition: types.h:36
std::string getBasePath() const
int32_t epoch_
number of threads used when loading data
std::shared_ptr< ForeignStorageInterface > fsi_
heavyai::shared_lock< heavyai::shared_mutex > read_lock
bool isBufferOnDevice(const ChunkKey &key) override
Definition: GlobalFileMgr.h:74
AbstractBuffer * createBuffer(const ChunkKey &key, size_t pageSize=0, const size_t numBytes=0) override
Creates a chunk with the specified key and page size.
Definition: GlobalFileMgr.h:68
std::map< TablePair, std::shared_ptr< FileMgr > > ownedFileMgrs_
#define LOG(tag)
Definition: Logger.h:285
bool isAllocationCapped() override
AbstractBuffer * alloc(const size_t numBytes) override
This file includes the class specification for the FILE manager (FileMgr), and related data structure...
AbstractBufferMgr * getFileMgr(const ChunkKey &key)
#define DEFAULT_METADATA_PAGE_SIZE
std::string getStringMgrType() override
std::shared_lock< T > shared_lock
std::map< TablePair, AbstractBufferMgr * > allFileMgrs_
size_t getAllocated() override
void init(LogOptions const &log_opts)
Definition: Logger.cpp:360
const size_t metadata_page_size_
used to set FileMgr page_size_
std::vector< std::pair< ChunkKey, std::shared_ptr< ChunkMetadata >>> ChunkMetadataVector
int32_t omnisci_db_version_
used to set FileMgr metadta_page_size_
An AbstractBuffer is a unit of data management for a data manager.
size_t getMetadataPageSize() const
void getChunkMetadataVecForKeyPrefix(ChunkMetadataVector &chunkMetadataVec, const ChunkKey &keyPrefix) override
std::string printSlabs() override
#define DEFAULT_PAGE_SIZE
void deleteBuffer(const ChunkKey &key, const bool purge=true) override
Deletes the chunk with the specified key.
Definition: GlobalFileMgr.h:82
MgrType getMgrType() override
size_t getMaxSize() override
AbstractBuffer * getBuffer(const ChunkKey &key, const size_t numBytes=0) override
Returns the a pointer to the chunk with the specified key.
Definition: GlobalFileMgr.h:90
void free(AbstractBuffer *buffer) override
size_t num_reader_threads_
The OS file system path containing the files.
std::map< TablePair, int32_t > max_rollback_epochs_per_table_
AbstractBufferMgr * findFileMgr(const int32_t db_id, const int32_t tb_id)
std::map< TablePair, StorageStats > lazy_initialized_stats_
void fetchBuffer(const ChunkKey &key, AbstractBuffer *destBuffer, const size_t numBytes) override
Definition: GlobalFileMgr.h:94
std::shared_timed_mutex shared_mutex
size_t getNumReaderThreads()
Returns number of threads defined by parameter num-reader-threads which should be used during initial...
size_t getInUseSize() override
heavyai::shared_mutex fileMgrs_mutex_