OmniSciDB  cde582ebc3
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
GlobalFileMgr.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
24 
25 #include <fcntl.h>
26 #include <algorithm>
27 #include <boost/filesystem.hpp>
28 #include <boost/lexical_cast.hpp>
29 #include <string>
30 #include <thread>
31 #include <utility>
32 #include <vector>
33 
36 #include "Shared/File.h"
37 
38 using namespace std;
39 
40 namespace File_Namespace {
41 
42 GlobalFileMgr::GlobalFileMgr(const int32_t deviceId,
43  std::shared_ptr<ForeignStorageInterface> fsi,
44  std::string basePath,
45  const size_t num_reader_threads,
46  const size_t defaultPageSize)
47  : AbstractBufferMgr(deviceId)
48  , fsi_(fsi)
49  , basePath_(basePath)
50  , num_reader_threads_(num_reader_threads)
51  , epoch_(-1)
52  , // set the default epoch for all tables corresponding to the time of
53  // last checkpoint
54  defaultPageSize_(defaultPageSize) {
56  // DS changes also triggered by individual FileMgr per table project (release 2.1.0)
57  dbConvert_ = false;
58  init();
59 }
60 
62  // check if basePath_ already exists, and if not create one
63  boost::filesystem::path path(basePath_);
64  if (basePath_.size() > 0 && basePath_[basePath_.size() - 1] != '/') {
65  basePath_.push_back('/');
66  }
67  if (boost::filesystem::exists(path)) {
68  if (!boost::filesystem::is_directory(path)) {
69  LOG(FATAL) << "Specified path is not a directory.";
70  }
71  } else { // data directory does not exist
72  if (!boost::filesystem::create_directory(path)) {
73  LOG(FATAL) << "Could not create data directory";
74  }
75  }
76 }
77 
80  for (auto fileMgrsIt = allFileMgrs_.begin(); fileMgrsIt != allFileMgrs_.end();
81  ++fileMgrsIt) {
82  fileMgrsIt->second->checkpoint();
83  }
84 }
85 
86 void GlobalFileMgr::checkpoint(const int32_t db_id, const int32_t tb_id) {
87  getFileMgr(db_id, tb_id)->checkpoint();
88 }
89 
92  size_t num_chunks = 0;
93  for (auto fileMgrsIt = allFileMgrs_.begin(); fileMgrsIt != allFileMgrs_.end();
94  ++fileMgrsIt) {
95  num_chunks += fileMgrsIt->second->getNumChunks();
96  }
97 
98  return num_chunks;
99 }
100 
101 void GlobalFileMgr::deleteBuffersWithPrefix(const ChunkKey& keyPrefix, const bool purge) {
102  /* keyPrefix[0] can be -1 only for gpu or cpu buffers but not for FileMgr.
103  * There is no assert here, as GlobalFileMgr is being called with -1 value as well in
104  * the same loop with other buffers. So the case of -1 will just be ignored, as nothing
105  * needs to be done.
106  */
107  if (keyPrefix[0] != -1) {
108  return getFileMgr(keyPrefix)->deleteBuffersWithPrefix(keyPrefix, purge);
109  }
110 }
111 
112 AbstractBufferMgr* GlobalFileMgr::findFileMgrUnlocked(const int32_t db_id,
113  const int32_t tb_id) {
114  // NOTE: only call this private function after locking is already in place
115  AbstractBufferMgr* fm = nullptr;
116  const auto file_mgr_key = std::make_pair(db_id, tb_id);
117  if (auto it = allFileMgrs_.find(file_mgr_key); it != allFileMgrs_.end()) {
118  fm = it->second;
119  }
120  return fm;
121 }
122 
123 void GlobalFileMgr::deleteFileMgr(const int32_t db_id, const int32_t tb_id) {
124  // NOTE: only call this private function after locking is already in place
125  const auto file_mgr_key = std::make_pair(db_id, tb_id);
126  if (auto it = ownedFileMgrs_.find(file_mgr_key); it != ownedFileMgrs_.end()) {
127  ownedFileMgrs_.erase(it);
128  }
129  if (auto it = allFileMgrs_.find(file_mgr_key); it != allFileMgrs_.end()) {
130  allFileMgrs_.erase(it);
131  }
132 }
133 
134 void GlobalFileMgr::closeFileMgr(const int32_t db_id, const int32_t tb_id) {
136  deleteFileMgr(db_id, tb_id);
137 }
138 
140  FileMgr* file_mgr,
141  const FileMgrParams& file_mgr_params) const {
142  if (file_mgr_params.epoch != -1 &&
143  file_mgr_params.epoch != file_mgr->lastCheckpointedEpoch()) {
144  return true;
145  }
146  if (file_mgr_params.max_rollback_epochs != -1 &&
147  file_mgr_params.max_rollback_epochs != file_mgr->maxRollbackEpochs()) {
148  return true;
149  }
150  return false;
151 }
152 
153 void GlobalFileMgr::setFileMgrParams(const int32_t db_id,
154  const int32_t tb_id,
155  const FileMgrParams& file_mgr_params) {
156  auto fm = dynamic_cast<File_Namespace::FileMgr*>(findFileMgr(db_id, tb_id));
158  if (fm) {
159  deleteFileMgr(db_id, tb_id);
160  }
161  const auto file_mgr_key = std::make_pair(db_id, tb_id);
162  auto max_rollback_epochs =
163  (file_mgr_params.max_rollback_epochs >= 0 ? file_mgr_params.max_rollback_epochs
164  : -1);
165  auto s = std::make_shared<FileMgr>(
166  0,
167  this,
168  file_mgr_key,
169  max_rollback_epochs,
171  file_mgr_params.epoch != -1 ? file_mgr_params.epoch : epoch_,
173  CHECK(ownedFileMgrs_.insert(std::make_pair(file_mgr_key, s)).second);
174  CHECK(allFileMgrs_.insert(std::make_pair(file_mgr_key, s.get())).second);
175  max_rollback_epochs_per_table_[file_mgr_key] = max_rollback_epochs;
176  lazy_initialized_stats_.erase(file_mgr_key);
177  return;
178 }
179 
180 AbstractBufferMgr* GlobalFileMgr::getFileMgr(const int32_t db_id, const int32_t tb_id) {
181  { // check if FileMgr already exists for (db_id, tb_id)
183  AbstractBufferMgr* fm = findFileMgrUnlocked(db_id, tb_id);
184  if (fm) {
185  return fm;
186  }
187  }
188 
189  { // create new FileMgr for (db_id, tb_id)
191  AbstractBufferMgr* fm = findFileMgrUnlocked(db_id, tb_id);
192  if (fm) {
193  return fm; // mgr was added between the read lock and the write lock
194  }
195  const auto file_mgr_key = std::make_pair(db_id, tb_id);
196  const auto foreign_buffer_manager = fsi_->lookupBufferManager(db_id, tb_id);
197  if (foreign_buffer_manager) {
198  CHECK(allFileMgrs_.insert(std::make_pair(file_mgr_key, foreign_buffer_manager))
199  .second);
200  return foreign_buffer_manager;
201  } else {
202  int32_t max_rollback_epochs{-1};
203  if (max_rollback_epochs_per_table_.find(file_mgr_key) !=
205  max_rollback_epochs = max_rollback_epochs_per_table_[file_mgr_key];
206  }
207  auto s = std::make_shared<FileMgr>(0,
208  this,
209  file_mgr_key,
210  max_rollback_epochs,
212  epoch_,
214  CHECK(ownedFileMgrs_.insert(std::make_pair(file_mgr_key, s)).second);
215  CHECK(allFileMgrs_.insert(std::make_pair(file_mgr_key, s.get())).second);
216  lazy_initialized_stats_.erase(file_mgr_key);
217  return s.get();
218  }
219  }
220 }
221 
222 // For testing purposes only
223 std::shared_ptr<FileMgr> GlobalFileMgr::getSharedFileMgr(const int db_id,
224  const int table_id) {
225  const auto table_key = std::make_pair(db_id, table_id);
226  if (ownedFileMgrs_.find(table_key) == ownedFileMgrs_.end()) {
227  return nullptr;
228  }
229  return ownedFileMgrs_[table_key];
230 }
231 
232 // For testing purposes only
233 void GlobalFileMgr::setFileMgr(const int db_id,
234  const int table_id,
235  std::shared_ptr<FileMgr> file_mgr) {
236  TablePair file_mgr_key{db_id, table_id};
237  allFileMgrs_[file_mgr_key] = file_mgr.get();
238  ownedFileMgrs_[file_mgr_key] = file_mgr;
239  lazy_initialized_stats_.erase(file_mgr_key);
240 }
241 
243  FileMgr* fileMgr) { // this function is not used, keep it for now for future needs
245  for (auto fileMgrIt = allFileMgrs_.begin(); fileMgrIt != allFileMgrs_.end();
246  fileMgrIt++) {
247  FileMgr* fm = dynamic_cast<FileMgr*>(fileMgrIt->second);
248  CHECK(fm);
249  if ((fileMgr != 0) && (fileMgr != fm)) {
250  continue;
251  }
252  for (auto chunkIt = fm->chunkIndex_.begin(); chunkIt != fm->chunkIndex_.end();
253  chunkIt++) {
254  chunkIt->second->write((int8_t*)chunkIt->second, chunkIt->second->size(), 0);
255  }
256  }
257 }
258 
259 void GlobalFileMgr::removeTableRelatedDS(const int32_t db_id, const int32_t tb_id) {
261  auto abm = findFileMgrUnlocked(db_id, tb_id);
262  if (auto fm = dynamic_cast<File_Namespace::FileMgr*>(abm)) {
263  fm->closeRemovePhysical();
264  } else if (dynamic_cast<ForeignStorageBufferMgr*>(abm)) {
265  abm->removeTableRelatedDS(db_id, tb_id);
266  fsi_->dropBufferManager(db_id, tb_id);
267  } else {
268  // fileMgr has not been initialized so there is no need to
269  // spend the time initializing
270  // initialize just enough to have to rename
271  const auto file_mgr_key = std::make_pair(db_id, tb_id);
272  auto u = std::make_unique<FileMgr>(0, this, file_mgr_key, defaultPageSize_, true);
273  u->closeRemovePhysical();
274  }
275  // remove table related in-memory DS only if directory was removed successfully
276 
277  deleteFileMgr(db_id, tb_id);
278  max_rollback_epochs_per_table_.erase({db_id, tb_id});
279 }
280 
281 void GlobalFileMgr::setTableEpoch(const int32_t db_id,
282  const int32_t tb_id,
283  const int32_t start_epoch) {
284  AbstractBufferMgr* opened_fm = findFileMgr(db_id, tb_id);
285  if (opened_fm) {
286  // Delete this FileMgr to ensure epoch change occurs in constructor with other
287  // reads/writes locked out
288  deleteFileMgr(db_id, tb_id);
289  }
290  const auto file_mgr_key = std::make_pair(db_id, tb_id);
291  // this is where the real rollback of any data ahead of the currently set epoch is
292  // performed
293  // Will call set_epoch with start_epoch internally
294  auto u = std::make_unique<FileMgr>(
295  0, this, file_mgr_key, -1, num_reader_threads_, start_epoch, defaultPageSize_);
296  // remove the dummy one we built
297  u.reset();
298 }
299 
300 size_t GlobalFileMgr::getTableEpoch(const int32_t db_id, const int32_t tb_id) {
301  // UX change was made to this function Oct 2020 to return checkpointed epoch. In turn,
302  // setTableEpoch was changed to set the epoch at the user's input, instead of input - 1
304  AbstractBufferMgr* opened_fm = findFileMgr(db_id, tb_id);
305  if (opened_fm) {
306  return dynamic_cast<FileMgr*>(opened_fm)->lastCheckpointedEpoch();
307  }
308  // Do not do full init of table just to get table epoch, just check file instead
309  const auto file_mgr_key = std::make_pair(db_id, tb_id);
310  auto u = std::make_unique<FileMgr>(0, this, file_mgr_key, defaultPageSize_, true);
311  const auto epoch = u->lastCheckpointedEpoch();
312  u.reset();
313  return epoch;
314 }
315 
316 void GlobalFileMgr::resetTableEpochFloor(const int32_t db_id, const int32_t tb_id) {
317  AbstractBufferMgr* fm = getFileMgr(db_id, tb_id);
318  CHECK(fm);
319  dynamic_cast<FileMgr*>(fm)->resetEpochFloor();
320 }
321 
322 StorageStats GlobalFileMgr::getStorageStats(const int32_t db_id, const int32_t tb_id) {
324  AbstractBufferMgr* opened_fm = findFileMgr(db_id, tb_id);
325  if (opened_fm) {
326  return dynamic_cast<FileMgr*>(opened_fm)->getStorageStats();
327  }
328  TablePair file_mgr_key{db_id, tb_id};
329  auto it = lazy_initialized_stats_.find(file_mgr_key);
330  if (it != lazy_initialized_stats_.end()) {
331  return it->second;
332  } else {
333  // Do not do full init of table just to get storage stats, just check file instead
334  auto u = std::make_unique<FileMgr>(0, this, file_mgr_key, defaultPageSize_, true);
335  lazy_initialized_stats_[file_mgr_key] = u->getStorageStats();
336  u.reset();
337  return lazy_initialized_stats_[file_mgr_key];
338  }
339 }
340 
341 void GlobalFileMgr::compactDataFiles(const int32_t db_id, const int32_t tb_id) {
342  auto file_mgr = dynamic_cast<File_Namespace::FileMgr*>(findFileMgr(db_id, tb_id));
343  {
345  if (file_mgr) {
346  file_mgr->compactFiles();
347  deleteFileMgr(db_id, tb_id);
348  }
349  }
350 
351  // Re-initialize file manager
352  getFileMgr(db_id, tb_id);
353 }
354 } // namespace File_Namespace
void writeFileMgrData(FileMgr *fileMgr=0)
void deleteBuffersWithPrefix(const ChunkKey &keyPrefix, const bool purge=true) override
std::vector< int > ChunkKey
Definition: types.h:36
void deleteFileMgr(const int32_t db_id, const int32_t tb_id)
int32_t epoch_
number of threads used when loading data
std::shared_ptr< ForeignStorageInterface > fsi_
heavyai::shared_lock< heavyai::shared_mutex > read_lock
This file includes the class specification for the FILE manager (FileMgr), and related data structure...
void checkpoint() override
Fsyncs data files, writes out epoch and fsyncs that.
std::map< TablePair, std::shared_ptr< FileMgr > > ownedFileMgrs_
#define LOG(tag)
Definition: Logger.h:216
heavyai::unique_lock< heavyai::shared_mutex > write_lock
size_t getNumChunks() override
int32_t lastCheckpointedEpoch() const
Returns value of epoch at last checkpoint.
Definition: FileMgr.h:299
std::shared_lock< T > shared_lock
std::map< TablePair, AbstractBufferMgr * > allFileMgrs_
void resetTableEpochFloor(const int32_t db_id, const int32_t tb_id)
void setTableEpoch(const int32_t db_id, const int32_t tb_id, const int32_t start_epoch)
StorageStats getStorageStats(const int32_t db_id, const int32_t tb_id)
AbstractBufferMgr * findFileMgrUnlocked(const int32_t db_id, const int32_t tb_id)
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:328
std::unique_lock< T > unique_lock
std::shared_ptr< FileMgr > getSharedFileMgr(const int db_id, const int table_id)
int32_t omnisci_db_version_
default page size, used to set FileMgr defaultPageSize_
bool existsDiffBetweenFileMgrParamsAndFileMgr(FileMgr *file_mgr, const FileMgrParams &file_mgr_params) const
void compactDataFiles(const int32_t db_id, const int32_t tb_id)
AbstractBufferMgr * getFileMgr(const int32_t db_id, const int32_t tb_id)
size_t num_reader_threads_
The OS file system path containing the files.
std::map< TablePair, int32_t > max_rollback_epochs_per_table_
void setFileMgr(const int db_id, const int table_id, std::shared_ptr< FileMgr > file_mgr)
void closeFileMgr(const int32_t db_id, const int32_t tb_id)
void setFileMgrParams(const int32_t db_id, const int32_t tb_id, const FileMgrParams &file_mgr_params)
#define CHECK(condition)
Definition: Logger.h:222
AbstractBufferMgr * findFileMgr(const int32_t db_id, const int32_t tb_id)
std::map< TablePair, StorageStats > lazy_initialized_stats_
int32_t maxRollbackEpochs()
Returns value max_rollback_epochs.
Definition: FileMgr.h:308
std::pair< const int32_t, const int32_t > TablePair
Definition: FileMgr.h:91
void removeTableRelatedDS(const int32_t db_id, const int32_t tb_id) override
heavyai::shared_mutex fileMgrs_mutex_
A selection of helper methods for File I/O.
size_t getTableEpoch(const int32_t db_id, const int32_t tb_id)