OmniSciDB  8fa3bf436f
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
FileInfo.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "FileInfo.h"
18 #include <iostream>
19 #include "../../Shared/File.h"
20 #include "FileMgr.h"
21 #include "Page.h"
22 
23 #include <utility>
24 using namespace std;
25 
26 namespace File_Namespace {
27 
28 FileInfo::FileInfo(FileMgr* fileMgr,
29  const int32_t fileId,
30  FILE* f,
31  const size_t pageSize,
32  size_t numPages,
33  bool init)
34  : fileMgr(fileMgr), fileId(fileId), f(f), pageSize(pageSize), numPages(numPages) {
35  if (init) {
36  initNewFile();
37  }
38 }
39 
41  // close file, if applicable
42  if (f) {
43  close(f);
44  }
45 }
46 
48  // initialize pages and free page list
49  // Also zeroes out first four bytes of every header
50 
51  int32_t headerSize = 0;
52  int8_t* headerSizePtr = (int8_t*)(&headerSize);
53  for (size_t pageId = 0; pageId < numPages; ++pageId) {
54  File_Namespace::write(f, pageId * pageSize, sizeof(int32_t), headerSizePtr);
55  freePages.insert(pageId);
56  }
57  isDirty = true;
58 }
59 
60 size_t FileInfo::write(const size_t offset, const size_t size, int8_t* buf) {
61  std::lock_guard<std::mutex> lock(readWriteMutex_);
62  isDirty = true;
63  return File_Namespace::write(f, offset, size, buf);
64 }
65 
66 size_t FileInfo::read(const size_t offset, const size_t size, int8_t* buf) {
67  std::lock_guard<std::mutex> lock(readWriteMutex_);
68  return File_Namespace::read(f, offset, size, buf);
69 }
70 
71 void FileInfo::openExistingFile(std::vector<HeaderInfo>& headerVec) {
72  // HeaderInfo is defined in Page.h
73 
74  // Oct 2020: Changing semantics such that fileMgrEpoch should be last checkpointed
75  // epoch, not incremented epoch. This changes some of the gt/gte/lt/lte comparison below
76  ChunkKey oldChunkKey(4);
77  int32_t oldPageId = -99;
78  int32_t oldVersionEpoch = -99;
79  int32_t skipped = 0;
80  for (size_t pageNum = 0; pageNum < numPages; ++pageNum) {
81  constexpr size_t MAX_INTS_TO_READ{10}; // currently use 1+6 ints
82  int32_t ints[MAX_INTS_TO_READ];
83  CHECK_EQ(fseek(f, pageNum * pageSize, SEEK_SET), 0);
84  CHECK_EQ(fread(ints, sizeof(int32_t), MAX_INTS_TO_READ, f), MAX_INTS_TO_READ);
85 
86  auto headerSize = ints[0];
87  if (headerSize == 0) {
88  // no header for this page - insert into free list
89  freePages.insert(pageNum);
90  continue;
91  }
92 
93  // headerSize doesn't include headerSize itself
94  // We're tying ourself to headers of ints here
95  size_t numHeaderElems = headerSize / sizeof(int32_t);
96  CHECK_GE(numHeaderElems, size_t(2));
97  // We don't want to read headerSize in our header - so start
98  // reading 4 bytes past it
99  ChunkKey chunkKey(&ints[1], &ints[1 + numHeaderElems - 2]);
100  if (fileMgr->updatePageIfDeleted(this, chunkKey, ints[1], ints[2], pageNum)) {
101  continue;
102  }
103  // Last two elements of header are always PageId and Version
104  // epoch - these are not in the chunk key so seperate them
105  int32_t pageId = ints[1 + numHeaderElems - 2];
106  int32_t versionEpoch = ints[1 + numHeaderElems - 1];
107  if (chunkKey != oldChunkKey || oldPageId != pageId - (1 + skipped)) {
108  if (skipped > 0) {
109  VLOG(4) << "FId.PSz: " << fileId << "." << pageSize
110  << " Chunk key: " << show_chunk(oldChunkKey)
111  << " Page id from : " << oldPageId << " to : " << oldPageId + skipped
112  << " Epoch: " << oldVersionEpoch;
113  } else if (oldPageId != -99) {
114  VLOG(4) << "FId.PSz: " << fileId << "." << pageSize
115  << " Chunk key: " << show_chunk(oldChunkKey) << " Page id: " << oldPageId
116  << " Epoch: " << oldVersionEpoch;
117  }
118  oldPageId = pageId;
119  oldVersionEpoch = versionEpoch;
120  oldChunkKey = chunkKey;
121  skipped = 0;
122  } else {
123  skipped++;
124  }
125 
126  /* Check if version epoch is equal to
127  * or greater (note: should never be greater)
128  * than FileMgr epoch_ - this means that this
129  * page wasn't checkpointed and thus we should
130  * not use it
131  */
132  int32_t fileMgrEpoch =
133  fileMgr->epoch(chunkKey[CHUNK_KEY_DB_IDX], chunkKey[CHUNK_KEY_TABLE_IDX]);
134  if (versionEpoch > fileMgrEpoch) {
135  // First write 0 to first four bytes of
136  // header to mark as free
137  if (!g_read_only) {
138  freePageImmediate(pageNum);
139  }
140  LOG(WARNING) << "Was not checkpointed: Chunk key: " << show_chunk(chunkKey)
141  << " Page id: " << pageId << " Epoch: " << versionEpoch
142  << " FileMgrEpoch " << fileMgrEpoch << endl;
143  } else { // page was checkpointed properly
144  Page page(fileId, pageNum);
145  headerVec.emplace_back(chunkKey, pageId, versionEpoch, page);
146  }
147  }
148  // printlast
149  if (oldPageId != -99) {
150  if (skipped > 0) {
151  VLOG(4) << "FId.PSz: " << fileId << "." << pageSize
152  << " Chunk key: " << show_chunk(oldChunkKey)
153  << " Page id from : " << oldPageId << " to : " << oldPageId + skipped
154  << " Epoch: " << oldVersionEpoch;
155  } else {
156  VLOG(4) << "FId.PSz: " << fileId << "." << pageSize
157  << " Chunk key: " << show_chunk(oldChunkKey) << " Page id: " << oldPageId
158  << " Epoch: " << oldVersionEpoch;
159  }
160  }
161 }
162 
163 void FileInfo::freePageDeferred(int32_t pageId) {
164  std::lock_guard<std::mutex> lock(freePagesMutex_);
165  freePages.insert(pageId);
166 }
167 
168 #ifdef ENABLE_CRASH_CORRUPTION_TEST
169 #warning "!!!!! DB corruption crash test is enabled !!!!!"
170 #include <signal.h>
171 static bool goto_crash;
172 static void sighandler(int sig) {
173  if (getenv("ENABLE_CRASH_CORRUPTION_TEST"))
174  goto_crash = true;
175 }
176 #endif
177 
178 void FileInfo::freePage(int pageId, const bool isRolloff, int32_t epoch) {
179  std::lock_guard<std::mutex> lock(readWriteMutex_);
180 #define RESILIENT_PAGE_HEADER
181 #ifdef RESILIENT_PAGE_HEADER
182  int32_t epoch_freed_page[2] = {DELETE_CONTINGENT, epoch};
183  if (isRolloff) {
184  epoch_freed_page[0] = ROLLOFF_CONTINGENT;
185  }
187  pageId * pageSize + sizeof(int32_t),
188  sizeof(epoch_freed_page),
189  (int8_t*)epoch_freed_page);
190  fileMgr->free_page(std::make_pair(this, pageId));
191 #else
192  freePageImmediate(pageId);
193 #endif // RESILIENT_PAGE_HEADER
194  isDirty = true;
195 
196 #ifdef ENABLE_CRASH_CORRUPTION_TEST
197  signal(SIGUSR2, sighandler);
198  if (goto_crash)
199  CHECK(pageId % 8 != 4);
200 #endif
201 }
202 
204  // returns -1 if there is no free page
205  std::lock_guard<std::mutex> lock(freePagesMutex_);
206  if (freePages.size() == 0) {
207  return -1;
208  }
209  auto pageIt = freePages.begin();
210  int32_t pageNum = *pageIt;
211  freePages.erase(pageIt);
212  return pageNum;
213 }
214 
215 void FileInfo::print(bool pagesummary) {
216  std::cout << "File: " << fileId << std::endl;
217  std::cout << "Size: " << size() << std::endl;
218  std::cout << "Used: " << used() << std::endl;
219  std::cout << "Free: " << available() << std::endl;
220  if (!pagesummary) {
221  return;
222  }
223 }
225  std::lock_guard<std::mutex> lock(readWriteMutex_);
226  if (isDirty) {
227  if (fflush(f) != 0) {
228  LOG(FATAL) << "Error trying to flush changes to disk, the error was: "
229  << std::strerror(errno);
230  }
231 #ifdef __APPLE__
232  const int32_t sync_result = fcntl(fileno(f), 51);
233 #else
234  const int32_t sync_result = omnisci::fsync(fileno(f));
235 #endif
236  if (sync_result == 0) {
237  isDirty = false;
238  }
239  return sync_result;
240  }
241  return 0; // if file was not dirty and no syncing was needed
242 }
243 
244 void FileInfo::freePageImmediate(int32_t page_num) {
245  std::lock_guard<std::mutex> lock(freePagesMutex_);
246  int32_t zero{0};
248  f, page_num * pageSize, sizeof(int32_t), reinterpret_cast<int8_t*>(&zero));
249  freePages.insert(page_num);
250 }
251 
252 // Overwrites delete/rollback contingents by re-writing chunk key to page.
253 void FileInfo::recoverPage(const ChunkKey& chunk_key, int32_t page_num) {
255  page_num * pageSize + sizeof(int32_t),
256  2 * sizeof(int32_t),
257  reinterpret_cast<const int8_t*>(chunk_key.data()));
258 }
259 } // namespace File_Namespace
virtual int32_t epoch(int32_t db_id, int32_t tb_id) const
Returns current value of epoch - should be one greater than recorded at last checkpoint. Because FileMgr only contains buffers from one table we can just return the FileMgr&#39;s epoch instead of finding a table-specific epoch.
Definition: FileMgr.h:271
#define CHECK_EQ(x, y)
Definition: Logger.h:211
std::vector< int > ChunkKey
Definition: types.h:37
A logical page (Page) belongs to a file on disk.
Definition: Page.h:46
#define LOG(tag)
Definition: Logger.h:194
std::mutex readWriteMutex_
Definition: FileInfo.h:64
#define CHUNK_KEY_DB_IDX
Definition: types.h:39
void freePageImmediate(int32_t page_num)
Definition: FileInfo.cpp:244
#define CHECK_GE(x, y)
Definition: Logger.h:216
virtual bool updatePageIfDeleted(FileInfo *file_info, ChunkKey &chunk_key, int32_t contingent, int32_t page_epoch, int32_t page_num)
deletes or recovers a page based on last checkpointed epoch.
Definition: FileMgr.cpp:1570
std::string show_chunk(const ChunkKey &key)
Definition: types.h:85
void freePage(int32_t pageId, const bool isRolloff, int32_t epoch)
Definition: FileInfo.cpp:178
size_t write(FILE *f, const size_t offset, const size_t size, const int8_t *buf)
Writes the specified number of bytes to the offset position in file f from buf.
Definition: File.cpp:141
size_t write(const size_t offset, const size_t size, int8_t *buf)
Definition: FileInfo.cpp:60
std::set< size_t > freePages
Definition: FileInfo.h:62
size_t pageSize
file stream object for the represented file
Definition: FileInfo.h:59
size_t read(FILE *f, const size_t offset, const size_t size, int8_t *buf)
Reads the specified number of bytes from the offset position in file f into buf.
Definition: File.cpp:133
constexpr int32_t DELETE_CONTINGENT
A FileInfo type has a file pointer and metadata about a file.
Definition: FileInfo.h:51
void init(LogOptions const &log_opts)
Definition: Logger.cpp:280
void initNewFile()
Adds all pages to freePages and zeroes first four bytes of header.
Definition: FileInfo.cpp:47
size_t size() const
Returns the number of bytes used by the file.
Definition: FileInfo.h:92
#define CHUNK_KEY_TABLE_IDX
Definition: types.h:40
std::mutex freePagesMutex_
set of page numbers of free pages
Definition: FileInfo.h:63
int fsync(int fd)
Definition: omnisci_fs.cpp:60
constexpr int32_t ROLLOFF_CONTINGENT
Definition: FileInfo.h:52
bool g_read_only
Definition: File.cpp:38
~FileInfo()
Destructor.
Definition: FileInfo.cpp:40
size_t read(const size_t offset, const size_t size, int8_t *buf)
Definition: FileInfo.cpp:66
void free_page(std::pair< FileInfo *, int32_t > &&page)
Definition: FileMgr.cpp:1164
FILE * f
unique file identifier (i.e., used for a file name)
Definition: FileInfo.h:58
void openExistingFile(std::vector< HeaderInfo > &headerVec)
Definition: FileInfo.cpp:71
#define CHECK(condition)
Definition: Logger.h:203
void recoverPage(const ChunkKey &chunk_key, int32_t page_num)
Definition: FileInfo.cpp:253
void close(FILE *f)
Closes the file pointed to by the FILE pointer.
Definition: File.cpp:119
void freePageDeferred(int32_t pageId)
Definition: FileInfo.cpp:163
size_t used()
Returns the amount of used bytes; size() - available()
Definition: FileInfo.h:108
char * f
void print(bool pagesummary)
Prints a summary of the file to stdout.
Definition: FileInfo.cpp:215
size_t numPages
the fixed size of each page in the file
Definition: FileInfo.h:60
bool isDirty
the number of pages in the file
Definition: FileInfo.h:61
#define VLOG(n)
Definition: Logger.h:297
size_t available()
Returns the number of free bytes available.
Definition: FileInfo.h:99