OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
GlobalFileMgr.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
24 
25 #include <fcntl.h>
26 #include <algorithm>
27 #include <boost/filesystem.hpp>
28 #include <boost/lexical_cast.hpp>
29 #include <string>
30 #include <thread>
31 #include <utility>
32 #include <vector>
33 
36 #include "Shared/File.h"
37 
38 using namespace std;
39 
40 namespace File_Namespace {
41 
42 GlobalFileMgr::GlobalFileMgr(const int32_t device_id,
43  std::shared_ptr<ForeignStorageInterface> fsi,
44  std::string base_path,
45  const size_t num_reader_threads,
46  const size_t page_size,
47  const size_t metadata_page_size)
48  : AbstractBufferMgr(device_id)
49  , fsi_(fsi)
50  , basePath_(base_path)
51  , num_reader_threads_(num_reader_threads)
52  , epoch_(-1) // set the default epoch for all tables corresponding to the time of
53  // last checkpoint
54  , page_size_(page_size)
55  , metadata_page_size_(metadata_page_size) {
56  // DS changes also triggered by individual FileMgr per table project (release 2.1.0)
57  dbConvert_ = false;
58  init();
59 }
60 
62  // check if basePath_ already exists, and if not create one
63  boost::filesystem::path path(basePath_);
64  if (basePath_.size() > 0 && basePath_[basePath_.size() - 1] != '/') {
65  basePath_.push_back('/');
66  }
67  if (boost::filesystem::exists(path)) {
68  if (!boost::filesystem::is_directory(path)) {
69  LOG(FATAL) << "Specified path is not a directory.";
70  }
71  } else { // data directory does not exist
72  if (!boost::filesystem::create_directory(path)) {
73  LOG(FATAL) << "Could not create data directory";
74  }
75  }
76 }
77 
80  for (auto fileMgrsIt = allFileMgrs_.begin(); fileMgrsIt != allFileMgrs_.end();
81  ++fileMgrsIt) {
82  fileMgrsIt->second->checkpoint();
83  }
84 }
85 
86 void GlobalFileMgr::checkpoint(const int32_t db_id, const int32_t tb_id) {
87  getFileMgr(db_id, tb_id)->checkpoint();
88 }
89 
92  size_t num_chunks = 0;
93  for (auto fileMgrsIt = allFileMgrs_.begin(); fileMgrsIt != allFileMgrs_.end();
94  ++fileMgrsIt) {
95  num_chunks += fileMgrsIt->second->getNumChunks();
96  }
97 
98  return num_chunks;
99 }
100 
101 void GlobalFileMgr::deleteBuffersWithPrefix(const ChunkKey& keyPrefix, const bool purge) {
102  /* keyPrefix[0] can be -1 only for gpu or cpu buffers but not for FileMgr.
103  * There is no assert here, as GlobalFileMgr is being called with -1 value as well in
104  * the same loop with other buffers. So the case of -1 will just be ignored, as nothing
105  * needs to be done.
106  */
107  if (keyPrefix[0] != -1) {
108  return getFileMgr(keyPrefix)->deleteBuffersWithPrefix(keyPrefix, purge);
109  }
110 }
111 
112 AbstractBufferMgr* GlobalFileMgr::findFileMgrUnlocked(const int32_t db_id,
113  const int32_t tb_id) {
114  // NOTE: only call this private function after locking is already in place
115  AbstractBufferMgr* fm = nullptr;
116  const auto file_mgr_key = std::make_pair(db_id, tb_id);
117  if (auto it = allFileMgrs_.find(file_mgr_key); it != allFileMgrs_.end()) {
118  fm = it->second;
119  }
120  return fm;
121 }
122 
123 void GlobalFileMgr::deleteFileMgr(const int32_t db_id, const int32_t tb_id) {
124  // NOTE: only call this private function after locking is already in place
125  const auto file_mgr_key = std::make_pair(db_id, tb_id);
126  if (auto it = ownedFileMgrs_.find(file_mgr_key); it != ownedFileMgrs_.end()) {
127  ownedFileMgrs_.erase(it);
128  }
129  if (auto it = allFileMgrs_.find(file_mgr_key); it != allFileMgrs_.end()) {
130  allFileMgrs_.erase(it);
131  }
132 }
133 
134 void GlobalFileMgr::closeFileMgr(const int32_t db_id, const int32_t tb_id) {
136  deleteFileMgr(db_id, tb_id);
137 }
138 
140  FileMgr* file_mgr,
141  const FileMgrParams& file_mgr_params) const {
142  if (file_mgr_params.epoch != -1 &&
143  file_mgr_params.epoch != file_mgr->lastCheckpointedEpoch()) {
144  return true;
145  }
146  if (file_mgr_params.max_rollback_epochs != -1 &&
147  file_mgr_params.max_rollback_epochs != file_mgr->maxRollbackEpochs()) {
148  return true;
149  }
150  return false;
151 }
152 
153 void GlobalFileMgr::setFileMgrParams(const int32_t db_id,
154  const int32_t tb_id,
155  const FileMgrParams& file_mgr_params) {
156  auto fm = dynamic_cast<File_Namespace::FileMgr*>(findFileMgr(db_id, tb_id));
158  if (fm) {
159  deleteFileMgr(db_id, tb_id);
160  }
161  const auto file_mgr_key = std::make_pair(db_id, tb_id);
162  auto max_rollback_epochs =
163  (file_mgr_params.max_rollback_epochs >= 0 ? file_mgr_params.max_rollback_epochs
164  : -1);
165  auto s = std::make_shared<FileMgr>(
166  0,
167  this,
168  file_mgr_key,
169  max_rollback_epochs,
171  file_mgr_params.epoch != -1 ? file_mgr_params.epoch : epoch_);
172  CHECK(ownedFileMgrs_.insert(std::make_pair(file_mgr_key, s)).second);
173  CHECK(allFileMgrs_.insert(std::make_pair(file_mgr_key, s.get())).second);
174  max_rollback_epochs_per_table_[file_mgr_key] = max_rollback_epochs;
175  lazy_initialized_stats_.erase(file_mgr_key);
176  return;
177 }
178 
179 AbstractBufferMgr* GlobalFileMgr::getFileMgr(const int32_t db_id, const int32_t tb_id) {
180  { // check if FileMgr already exists for (db_id, tb_id)
182  AbstractBufferMgr* fm = findFileMgrUnlocked(db_id, tb_id);
183  if (fm) {
184  return fm;
185  }
186  }
187 
188  { // create new FileMgr for (db_id, tb_id)
190  AbstractBufferMgr* fm = findFileMgrUnlocked(db_id, tb_id);
191  if (fm) {
192  return fm; // mgr was added between the read lock and the write lock
193  }
194  const auto file_mgr_key = std::make_pair(db_id, tb_id);
195  const auto foreign_buffer_manager = fsi_->lookupBufferManager(db_id, tb_id);
196  if (foreign_buffer_manager) {
197  CHECK(allFileMgrs_.insert(std::make_pair(file_mgr_key, foreign_buffer_manager))
198  .second);
199  return foreign_buffer_manager;
200  } else {
201  int32_t max_rollback_epochs{-1};
202  if (max_rollback_epochs_per_table_.find(file_mgr_key) !=
204  max_rollback_epochs = max_rollback_epochs_per_table_[file_mgr_key];
205  }
206  auto s = std::make_shared<FileMgr>(
207  0, this, file_mgr_key, max_rollback_epochs, num_reader_threads_, epoch_);
208  CHECK(ownedFileMgrs_.insert(std::make_pair(file_mgr_key, s)).second);
209  CHECK(allFileMgrs_.insert(std::make_pair(file_mgr_key, s.get())).second);
210  lazy_initialized_stats_.erase(file_mgr_key);
211  return s.get();
212  }
213  }
214 }
215 
216 // For testing purposes only
217 std::shared_ptr<FileMgr> GlobalFileMgr::getSharedFileMgr(const int db_id,
218  const int table_id) {
219  const auto table_key = std::make_pair(db_id, table_id);
220  if (ownedFileMgrs_.find(table_key) == ownedFileMgrs_.end()) {
221  return nullptr;
222  }
223  return ownedFileMgrs_[table_key];
224 }
225 
226 // For testing purposes only
227 void GlobalFileMgr::setFileMgr(const int db_id,
228  const int table_id,
229  std::shared_ptr<FileMgr> file_mgr) {
230  TablePair file_mgr_key{db_id, table_id};
231  allFileMgrs_[file_mgr_key] = file_mgr.get();
232  ownedFileMgrs_[file_mgr_key] = file_mgr;
233  lazy_initialized_stats_.erase(file_mgr_key);
234 }
235 
237  FileMgr* fileMgr) { // this function is not used, keep it for now for future needs
239  for (auto fileMgrIt = allFileMgrs_.begin(); fileMgrIt != allFileMgrs_.end();
240  fileMgrIt++) {
241  FileMgr* fm = dynamic_cast<FileMgr*>(fileMgrIt->second);
242  CHECK(fm);
243  if ((fileMgr != 0) && (fileMgr != fm)) {
244  continue;
245  }
246  for (auto chunkIt = fm->chunkIndex_.begin(); chunkIt != fm->chunkIndex_.end();
247  chunkIt++) {
248  chunkIt->second->write((int8_t*)chunkIt->second, chunkIt->second->size(), 0);
249  }
250  }
251 }
252 
253 void GlobalFileMgr::removeTableRelatedDS(const int32_t db_id, const int32_t tb_id) {
255  auto abm = findFileMgrUnlocked(db_id, tb_id);
256  if (auto fm = dynamic_cast<File_Namespace::FileMgr*>(abm)) {
257  fm->closeRemovePhysical();
258  } else if (dynamic_cast<ForeignStorageBufferMgr*>(abm)) {
259  abm->removeTableRelatedDS(db_id, tb_id);
260  fsi_->dropBufferManager(db_id, tb_id);
261  } else {
262  // fileMgr has not been initialized so there is no need to
263  // spend the time initializing
264  // initialize just enough to have to rename
265  const auto file_mgr_key = std::make_pair(db_id, tb_id);
266  auto u = std::make_unique<FileMgr>(0, this, file_mgr_key, true);
267  u->closeRemovePhysical();
268  }
269  // remove table related in-memory DS only if directory was removed successfully
270 
271  deleteFileMgr(db_id, tb_id);
272  max_rollback_epochs_per_table_.erase({db_id, tb_id});
273 }
274 
275 void GlobalFileMgr::setTableEpoch(const int32_t db_id,
276  const int32_t tb_id,
277  const int32_t start_epoch) {
278  AbstractBufferMgr* opened_fm = findFileMgr(db_id, tb_id);
279  if (opened_fm) {
280  // Delete this FileMgr to ensure epoch change occurs in constructor with other
281  // reads/writes locked out
282  deleteFileMgr(db_id, tb_id);
283  }
284  const auto file_mgr_key = std::make_pair(db_id, tb_id);
285  // this is where the real rollback of any data ahead of the currently set epoch is
286  // performed
287  // Will call set_epoch with start_epoch internally
288  auto u = std::make_unique<FileMgr>(
289  0, this, file_mgr_key, -1, num_reader_threads_, start_epoch);
290  // remove the dummy one we built
291  u.reset();
292 }
293 
294 size_t GlobalFileMgr::getTableEpoch(const int32_t db_id, const int32_t tb_id) {
295  // UX change was made to this function Oct 2020 to return checkpointed epoch. In turn,
296  // setTableEpoch was changed to set the epoch at the user's input, instead of input - 1
298  AbstractBufferMgr* opened_fm = findFileMgr(db_id, tb_id);
299  if (opened_fm) {
300  return dynamic_cast<FileMgr*>(opened_fm)->lastCheckpointedEpoch();
301  }
302  // Do not do full init of table just to get table epoch, just check file instead
303  const auto file_mgr_key = std::make_pair(db_id, tb_id);
304  auto u = std::make_unique<FileMgr>(0, this, file_mgr_key, true);
305  const auto epoch = u->lastCheckpointedEpoch();
306  u.reset();
307  return epoch;
308 }
309 
310 void GlobalFileMgr::resetTableEpochFloor(const int32_t db_id, const int32_t tb_id) {
311  AbstractBufferMgr* fm = getFileMgr(db_id, tb_id);
312  CHECK(fm);
313  dynamic_cast<FileMgr*>(fm)->resetEpochFloor();
314 }
315 
316 StorageStats GlobalFileMgr::getStorageStats(const int32_t db_id, const int32_t tb_id) {
318  AbstractBufferMgr* opened_fm = findFileMgr(db_id, tb_id);
319  if (opened_fm) {
320  return dynamic_cast<FileMgr*>(opened_fm)->getStorageStats();
321  }
322  TablePair file_mgr_key{db_id, tb_id};
323  auto it = lazy_initialized_stats_.find(file_mgr_key);
324  if (it != lazy_initialized_stats_.end()) {
325  return it->second;
326  } else {
327  // Do not do full init of table just to get storage stats, just check file instead
328  auto u = std::make_unique<FileMgr>(0, this, file_mgr_key, true);
329  lazy_initialized_stats_[file_mgr_key] = u->getStorageStats();
330  u.reset();
331  return lazy_initialized_stats_[file_mgr_key];
332  }
333 }
334 
335 void GlobalFileMgr::compactDataFiles(const int32_t db_id, const int32_t tb_id) {
336  auto file_mgr = dynamic_cast<File_Namespace::FileMgr*>(findFileMgr(db_id, tb_id));
337  {
339  if (file_mgr) {
340  file_mgr->compactFiles();
341  deleteFileMgr(db_id, tb_id);
342  }
343  }
344 
345  // Re-initialize file manager
346  getFileMgr(db_id, tb_id);
347 }
348 } // namespace File_Namespace
void writeFileMgrData(FileMgr *fileMgr=0)
void deleteBuffersWithPrefix(const ChunkKey &keyPrefix, const bool purge=true) override
std::vector< int > ChunkKey
Definition: types.h:36
void deleteFileMgr(const int32_t db_id, const int32_t tb_id)
int32_t epoch_
number of threads used when loading data
std::shared_ptr< ForeignStorageInterface > fsi_
This file includes the class specification for the FILE manager (FileMgr), and related data structure...
void checkpoint() override
Fsyncs data files, writes out epoch and fsyncs that.
std::map< TablePair, std::shared_ptr< FileMgr > > ownedFileMgrs_
#define LOG(tag)
Definition: Logger.h:285
size_t getNumChunks() override
int32_t lastCheckpointedEpoch() const
Returns value of epoch at last checkpoint.
Definition: FileMgr.h:301
std::shared_lock< T > shared_lock
std::map< TablePair, AbstractBufferMgr * > allFileMgrs_
void resetTableEpochFloor(const int32_t db_id, const int32_t tb_id)
void setTableEpoch(const int32_t db_id, const int32_t tb_id, const int32_t start_epoch)
StorageStats getStorageStats(const int32_t db_id, const int32_t tb_id)
AbstractBufferMgr * findFileMgrUnlocked(const int32_t db_id, const int32_t tb_id)
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:330
std::unique_lock< T > unique_lock
std::shared_ptr< FileMgr > getSharedFileMgr(const int db_id, const int table_id)
bool existsDiffBetweenFileMgrParamsAndFileMgr(FileMgr *file_mgr, const FileMgrParams &file_mgr_params) const
void compactDataFiles(const int32_t db_id, const int32_t tb_id)
AbstractBufferMgr * getFileMgr(const int32_t db_id, const int32_t tb_id)
size_t num_reader_threads_
The OS file system path containing the files.
std::map< TablePair, int32_t > max_rollback_epochs_per_table_
void setFileMgr(const int db_id, const int table_id, std::shared_ptr< FileMgr > file_mgr)
void closeFileMgr(const int32_t db_id, const int32_t tb_id)
void setFileMgrParams(const int32_t db_id, const int32_t tb_id, const FileMgrParams &file_mgr_params)
#define CHECK(condition)
Definition: Logger.h:291
AbstractBufferMgr * findFileMgr(const int32_t db_id, const int32_t tb_id)
std::map< TablePair, StorageStats > lazy_initialized_stats_
int32_t maxRollbackEpochs()
Returns value max_rollback_epochs.
Definition: FileMgr.h:310
std::pair< const int32_t, const int32_t > TablePair
Definition: FileMgr.h:98
void removeTableRelatedDS(const int32_t db_id, const int32_t tb_id) override
heavyai::shared_mutex fileMgrs_mutex_
A selection of helper methods for File I/O.
bool dbConvert_
used to set FileMgr metadta_page_size_
size_t getTableEpoch(const int32_t db_id, const int32_t tb_id)