OmniSciDB  a667adc9c8
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
CachingFileMgr.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2021 Omnisci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
22 #include <boost/filesystem.hpp>
23 #include "Shared/File.h"
24 
25 constexpr char EPOCH_FILENAME[] = "epoch_metadata";
26 
27 namespace File_Namespace {
28 namespace bf = boost::filesystem;
29 
30 CachingFileMgr::CachingFileMgr(const std::string& base_path,
31  const size_t num_reader_threads) {
32  fileMgrBasePath_ = base_path;
35  nextFileId_ = 0;
36  init(num_reader_threads, -1);
37 }
38 
40  mapd_unique_lock<mapd_shared_mutex> write_lock(files_rw_mutex_);
41  bf::path path(fileMgrBasePath_);
42  if (bf::exists(path)) {
43  if (!bf::is_directory(path)) {
44  LOG(FATAL) << "Specified path '" << fileMgrBasePath_
45  << "' for disk cache is not a directory.";
46  }
49  return true;
50  }
51  LOG(FATAL) << "Cache path: " << fileMgrBasePath_ << "does not exit.";
52  return false;
53 }
54 
55 void CachingFileMgr::clearForTable(int db_id, int tb_id) {
56  {
57  mapd_unique_lock<mapd_shared_mutex> write_lock(chunkIndexMutex_);
58  for (auto it = chunkIndex_.begin(); it != chunkIndex_.end();) {
59  auto& [key, buffer] = *it;
60  if (in_same_table(key, {db_id, tb_id})) {
61  buffer->freePages();
62  delete buffer;
63  it = chunkIndex_.erase(it);
64  } else {
65  ++it;
66  }
67  }
68  auto dir_name = getFileMgrBasePath() + "/" + get_dir_name_for_table(db_id, tb_id);
69  if (bf::exists(dir_name)) {
70  bf::remove_all(dir_name);
71  }
72  }
73  checkpoint();
74  // TODO(Misiu): Implement background file removal.
75  // Currently the renameForDelete idiom will only work in the mapd/ directory as the
76  // cleanup thread is targetted there. If we want it to work for arbitrary directories
77  // we will need to add a new dir to the thread, or start a second thread.
78  // File_Namespace::renameForDelete(get_dir_name_for_table(db_id, tb_id));
79 }
80 
81 std::string CachingFileMgr::getOrAddTableDir(int db_id, int tb_id) {
82  mapd_unique_lock<mapd_shared_mutex> write_lock(files_rw_mutex_);
83  std::string table_dir =
84  getFileMgrBasePath() + "/" + get_dir_name_for_table(db_id, tb_id);
85  if (!bf::exists(table_dir)) {
86  bf::create_directory(table_dir);
87  } else {
88  if (!bf::is_directory(table_dir)) {
89  LOG(FATAL) << "Specified path '" << table_dir
90  << "' for cache table data is not a directory.";
91  }
92  }
93  return table_dir;
94 }
95 
97  mapd_unique_lock<mapd_shared_mutex> write_lock(files_rw_mutex_);
99  auto dir_name = getFileMgrBasePath();
100  if (bf::exists(dir_name)) {
101  bf::remove_all(dir_name);
102  }
103 
104  // TODO(Misiu): Implement background file removal.
105  // Currently the renameForDelete idiom will only work in the mapd/ directory as the
106  // cleanup thread is targetted there. If we want it to work for arbitrary directories
107  // we will need to add a new dir to the thread, or start a second thread.
108  // File_Namespace::renameForDelete(getFileMgrBasePath());
109 }
110 
111 uint64_t CachingFileMgr::getChunkSpaceReservedByTable(int db_id, int tb_id) {
112  mapd_shared_lock<mapd_shared_mutex> read_lock(chunkIndexMutex_);
113  uint64_t space_used = 0;
114  for (const auto& [key, buffer] : chunkIndex_) {
115  if (key[CHUNK_KEY_DB_IDX] == db_id && key[CHUNK_KEY_TABLE_IDX] == tb_id) {
116  space_used += buffer->reservedSize();
117  }
118  }
119  return space_used;
120 }
121 
122 uint64_t CachingFileMgr::getMetadataSpaceReservedByTable(int db_id, int tb_id) {
123  mapd_shared_lock<mapd_shared_mutex> read_lock(chunkIndexMutex_);
124  uint64_t space_used = 0;
125  for (const auto& [key, buffer] : chunkIndex_) {
126  if (key[CHUNK_KEY_DB_IDX] == db_id && key[CHUNK_KEY_TABLE_IDX] == tb_id) {
127  space_used += (buffer->numMetadataPages() * METADATA_PAGE_SIZE);
128  }
129  }
130  return space_used;
131 }
132 
133 uint64_t CachingFileMgr::getWrapperSpaceReservedByTable(int db_id, int tb_id) {
134  mapd_shared_lock<mapd_shared_mutex> read_lock(files_rw_mutex_);
135  uint64_t space_used = 0;
136  std::string table_dir =
137  getFileMgrBasePath() + "/" + get_dir_name_for_table(db_id, tb_id);
138  if (bf::exists(table_dir)) {
139  for (const auto& file : bf::recursive_directory_iterator(table_dir)) {
140  if (bf::is_regular_file(file.path())) {
141  space_used += bf::file_size(file.path());
142  }
143  }
144  }
145  return space_used;
146 }
147 
148 uint64_t CachingFileMgr::getSpaceReservedByTable(int db_id, int tb_id) {
149  auto chunkSpace = getChunkSpaceReservedByTable(db_id, tb_id);
150  auto metaSpace = getMetadataSpaceReservedByTable(db_id, tb_id);
151  auto wrapperSpace = getWrapperSpaceReservedByTable(db_id, tb_id);
152  return chunkSpace + metaSpace + wrapperSpace;
153 }
154 
156  return "cache";
157 }
158 
159 } // namespace File_Namespace
uint64_t getSpaceReservedByTable(int db_id, int tb_id)
#define METADATA_PAGE_SIZE
Definition: FileBuffer.h:37
uint64_t getMetadataSpaceReservedByTable(int db_id, int tb_id)
uint64_t getWrapperSpaceReservedByTable(int db_id, int tb_id)
constexpr char EPOCH_FILENAME[]
std::string get_dir_name_for_table(int db_id, int tb_id)
#define LOG(tag)
Definition: Logger.h:188
#define CHUNK_KEY_DB_IDX
Definition: types.h:39
void migrateToLatestFileMgrVersion()
Definition: FileMgr.cpp:1161
std::string getOrAddTableDir(int db_id, int tb_id)
Returns (and optionally creates) a subdirectory for table-specific persistent data (e...
void init(const size_t num_reader_threads, const int32_t epochOverride)
Definition: FileMgr.cpp:255
std::string getFileMgrBasePath() const
Definition: FileMgr.h:330
void checkpoint() override
Fsyncs data files, writes out epoch and fsyncs that.
Definition: FileMgr.cpp:705
std::string fileMgrBasePath_
Definition: FileMgr.h:372
std::string describeSelf() override
void clearForTable(int db_id, int tb_id)
Removes all data related to the given table (pages and subdirectories).
bool coreInit() override
Determines file path, and if exists, runs file migration and opens and reads epoch file...
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:325
#define CHUNK_KEY_TABLE_IDX
Definition: types.h:40
#define DEFAULT_PAGE_SIZE
size_t defaultPageSize_
number of threads used when loading data
Definition: FileMgr.h:378
int32_t maxRollbackEpochs_
Definition: FileMgr.h:371
void openAndReadEpochFile(const std::string &epochFileName)
Definition: FileMgr.cpp:642
mapd_shared_lock< mapd_shared_mutex > read_lock
uint64_t getChunkSpaceReservedByTable(int db_id, int tb_id)
CachingFileMgr(const std::string &base_path, const size_t num_reader_threads=0)
mapd_unique_lock< mapd_shared_mutex > write_lock
bool in_same_table(const ChunkKey &left_key, const ChunkKey &right_key)
Definition: types.h:78
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:392
mapd_shared_mutex files_rw_mutex_
Definition: FileMgr.h:393
A selection of helper methods for File I/O.
size_t file_size(const int fd)
Definition: omnisci_fs.cpp:31