OmniSciDB  1dac507f6e
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
FileInfo.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "FileInfo.h"
18 #include <iostream>
19 #include "../../Shared/File.h"
20 #include "FileMgr.h"
21 #include "Page.h"
22 
23 #include <utility>
24 using namespace std;
25 
26 namespace File_Namespace {
27 
28 FileInfo::FileInfo(FileMgr* fileMgr,
29  const int fileId,
30  FILE* f,
31  const size_t pageSize,
32  size_t numPages,
33  bool init)
34  : fileMgr(fileMgr), fileId(fileId), f(f), pageSize(pageSize), numPages(numPages) {
35  if (init) {
36  initNewFile();
37  }
38 }
39 
41  // close file, if applicable
42  if (f) {
43  close(f);
44  }
45 }
46 
48  // initialize pages and free page list
49  // Also zeroes out first four bytes of every header
50 
51  int headerSize = 0;
52  int8_t* headerSizePtr = (int8_t*)(&headerSize);
53  for (size_t pageId = 0; pageId < numPages; ++pageId) {
54  File_Namespace::write(f, pageId * pageSize, sizeof(int), headerSizePtr);
55  freePages.insert(pageId);
56  }
57 }
58 
59 size_t FileInfo::write(const size_t offset, const size_t size, int8_t* buf) {
60  std::lock_guard<std::mutex> lock(readWriteMutex_);
61  return File_Namespace::write(f, offset, size, buf);
62 }
63 
64 size_t FileInfo::read(const size_t offset, const size_t size, int8_t* buf) {
65  std::lock_guard<std::mutex> lock(readWriteMutex_);
66  return File_Namespace::read(f, offset, size, buf);
67 }
68 
69 void FileInfo::openExistingFile(std::vector<HeaderInfo>& headerVec,
70  const int fileMgrEpoch) {
71  // HeaderInfo is defined in Page.h
72  ChunkKey oldChunkKey(4);
73  int oldPageId = -99;
74  int oldVersionEpoch = -99;
75  int skipped = 0;
76  for (size_t pageNum = 0; pageNum < numPages; ++pageNum) {
77  int headerSize;
78 
79  constexpr size_t MAX_INTS_TO_READ{10}; // currently use 1+6 ints
80  int ints[MAX_INTS_TO_READ];
81  CHECK_EQ(fseek(f, pageNum * pageSize, SEEK_SET), 0);
82  CHECK_EQ(fread(ints, sizeof(int), MAX_INTS_TO_READ, f), MAX_INTS_TO_READ);
83 
84  headerSize = ints[0];
85  if (0 != headerSize) {
86  if (DELETE_CONTINGENT == ints[1]) {
87  if (fileMgr->epoch() > ints[2]) {
88  int zero{0};
89  File_Namespace::write(f, pageNum * pageSize, sizeof(int), (int8_t*)&zero);
90  headerSize = 0;
91  }
92  }
93  }
94 
95  if (headerSize != 0) {
96  // headerSize doesn't include headerSize itself
97  // We're tying ourself to headers of ints here
98  size_t numHeaderElems = headerSize / sizeof(int);
99  CHECK_GE(numHeaderElems, size_t(2));
100  // size_t chunkSize;
101  // We don't want to read headerSize in our header - so start
102  // reading 4 bytes past it
103 
104  // always derive dbid/tbid from FileMgr
105  ChunkKey chunkKey(&ints[1], &ints[1 + numHeaderElems - 2]);
106  chunkKey[0] = fileMgr->get_fileMgrKey().first;
107  chunkKey[1] = fileMgr->get_fileMgrKey().second;
108  // recover page in case a crash failed deletion of this page
109  if (DELETE_CONTINGENT == ints[1]) {
111  f, pageNum * pageSize + sizeof(int), 2 * sizeof(int), (int8_t*)&chunkKey[0]);
112  }
113 
114  // cout << "Chunk key: " << showChunk(chunkKey) << endl;
115  // Last two elements of header are always PageId and Version
116  // epoch - these are not in the chunk key so seperate them
117  int pageId = ints[1 + numHeaderElems - 2];
118  // cout << "Page id: " << pageId << endl;
119  int versionEpoch = ints[1 + numHeaderElems - 1];
120  if (chunkKey != oldChunkKey || oldPageId != pageId - (1 + skipped)) {
121  if (skipped > 0) {
122  VLOG(4) << "FId.PSz: " << fileId << "." << pageSize
123  << " Chunk key: " << showChunk(oldChunkKey)
124  << " Page id from : " << oldPageId << " to : " << oldPageId + skipped
125  << " Epoch: " << oldVersionEpoch;
126  } else if (oldPageId != -99) {
127  VLOG(4) << "FId.PSz: " << fileId << "." << pageSize
128  << " Chunk key: " << showChunk(oldChunkKey) << " Page id: " << oldPageId
129  << " Epoch: " << oldVersionEpoch;
130  }
131  oldPageId = pageId;
132  oldVersionEpoch = versionEpoch;
133  oldChunkKey = chunkKey;
134  skipped = 0;
135  } else {
136  skipped++;
137  }
138  // read(f,pageNum*pageSize+sizeof(int),headerSize-2*sizeof(int),(int8_t
139  // *)(&chunkKey[0])); read(f,pageNum*pageSize+sizeof(int) + headerSize -
140  // 2*sizeof(int),sizeof(int),(int8_t *)(&pageId));
141  // read(f,pageNum*pageSize+sizeof(int) + headerSize -
142  // sizeof(int),sizeof(int),(int8_t *)(&versionEpoch));
143  // read(f,pageNum*pageSize+sizeof(int) + headerSize -
144  // sizeof(size_t),sizeof(size_t),(int8_t *)(&chunkSize));
145 
146  /* Check if version epoch is equal to
147  * or greater (note: should never be greater)
148  * than FileMgr epoch_ - this means that this
149  * page wasn't checkpointed and thus we should
150  * not use it
151  */
152  if (versionEpoch >= fileMgrEpoch) {
153  // First write 0 to first four bytes of
154  // header to mark as free
155  headerSize = 0;
156  File_Namespace::write(f, pageNum * pageSize, sizeof(int), (int8_t*)&headerSize);
157  // Now add page to free list
158  freePages.insert(pageNum);
159  LOG(WARNING) << "Was not checkpointed: Chunk key: " << showChunk(chunkKey)
160  << " Page id: " << pageId << " Epoch: " << versionEpoch
161  << " FileMgrEpoch " << fileMgrEpoch << endl;
162 
163  } else { // page was checkpointed properly
164  Page page(fileId, pageNum);
165  headerVec.emplace_back(chunkKey, pageId, versionEpoch, page);
166  // std::cout << "Inserted into headerVec" << std::endl;
167  }
168  } else { // no header for this page - insert into free list
169  freePages.insert(pageNum);
170  }
171  }
172  // printlast
173  if (oldPageId != -99) {
174  if (skipped > 0) {
175  VLOG(4) << "FId.PSz: " << fileId << "." << pageSize
176  << " Chunk key: " << showChunk(oldChunkKey)
177  << " Page id from : " << oldPageId << " to : " << oldPageId + skipped
178  << " Epoch: " << oldVersionEpoch;
179  } else {
180  VLOG(4) << "FId.PSz: " << fileId << "." << pageSize
181  << " Chunk key: " << showChunk(oldChunkKey) << " Page id: " << oldPageId
182  << " Epoch: " << oldVersionEpoch;
183  }
184  }
185 }
186 
187 void FileInfo::freePageDeferred(int pageId) {
188  std::lock_guard<std::mutex> lock(freePagesMutex_);
189  freePages.insert(pageId);
190 }
191 
192 #ifdef ENABLE_CRASH_CORRUPTION_TEST
193 #warning "!!!!! DB corruption crash test is enabled !!!!!"
194 #include <signal.h>
195 static bool goto_crash;
196 static void sighandler(int sig) {
197  if (getenv("ENABLE_CRASH_CORRUPTION_TEST"))
198  goto_crash = true;
199 }
200 #endif
201 
202 void FileInfo::freePage(int pageId) {
203 #define RESILIENT_PAGE_HEADER
204 #ifdef RESILIENT_PAGE_HEADER
205  int epoch_freed_page[2] = {DELETE_CONTINGENT, fileMgr->epoch()};
207  pageId * pageSize + sizeof(int),
208  sizeof(epoch_freed_page),
209  (int8_t*)epoch_freed_page);
210  fileMgr->free_page(std::make_pair(this, pageId));
211 #else
212  int zeroVal = 0;
213  int8_t* zeroAddr = reinterpret_cast<int8_t*>(&zeroVal);
214  File_Namespace::write(f, pageId * pageSize, sizeof(int), zeroAddr);
215  std::lock_guard<std::mutex> lock(freePagesMutex_);
216  freePages.insert(pageId);
217 #endif // RESILIENT_PAGE_HEADER
218 
219 #ifdef ENABLE_CRASH_CORRUPTION_TEST
220  signal(SIGUSR2, sighandler);
221  if (goto_crash)
222  CHECK(pageId % 8 != 4);
223 #endif
224 }
225 
227  // returns -1 if there is no free page
228  std::lock_guard<std::mutex> lock(freePagesMutex_);
229  if (freePages.size() == 0) {
230  return -1;
231  }
232  auto pageIt = freePages.begin();
233  int pageNum = *pageIt;
234  freePages.erase(pageIt);
235  return pageNum;
236 }
237 
238 void FileInfo::print(bool pagesummary) {
239  std::cout << "File: " << fileId << std::endl;
240  std::cout << "Size: " << size() << std::endl;
241  std::cout << "Used: " << used() << std::endl;
242  std::cout << "Free: " << available() << std::endl;
243  if (!pagesummary) {
244  return;
245  }
246 
247  // for (size_t i = 0; i < pages.size(); ++i) {
248  // // @todo page summary
249  //}
250 }
251 } // namespace File_Namespace
#define CHECK_EQ(x, y)
Definition: Logger.h:198
std::vector< int > ChunkKey
Definition: types.h:35
A logical page (Page) belongs to a file on disk.
Definition: Page.h:46
#define LOG(tag)
Definition: Logger.h:185
std::mutex readWriteMutex_
Definition: FileInfo.h:62
void freePageDeferred(int pageId)
Definition: FileInfo.cpp:187
#define CHECK_GE(x, y)
Definition: Logger.h:203
size_t size()
Returns the number of bytes used by the file.
Definition: FileInfo.h:90
int epoch()
Returns current value of epoch - should be one greater than recorded at last checkpoint.
Definition: FileMgr.h:204
size_t write(const size_t offset, const size_t size, int8_t *buf)
Definition: FileInfo.cpp:59
void free_page(std::pair< FileInfo *, int > &&page)
Definition: FileMgr.cpp:997
std::set< size_t > freePages
the number of pages in the file
Definition: FileInfo.h:60
size_t pageSize
file stream object for the represented file
Definition: FileInfo.h:56
size_t read(FILE *f, const size_t offset, const size_t size, int8_t *buf)
Reads the specified number of bytes from the offset position in file f into buf.
Definition: File.cpp:113
CHECK(cgen_state)
void init(LogOptions const &log_opts)
Definition: Logger.cpp:265
void initNewFile()
Adds all pages to freePages and zeroes first four bytes of header.
Definition: FileInfo.cpp:47
std::mutex freePagesMutex_
set of page numbers of free pages
Definition: FileInfo.h:61
void freePage(int pageId)
Definition: FileInfo.cpp:202
std::string showChunk(const ChunkKey &key)
Definition: types.h:37
#define DELETE_CONTINGENT
A FileInfo type has a file pointer and metadata about a file.
Definition: FileInfo.h:49
~FileInfo()
Destructor.
Definition: FileInfo.cpp:40
size_t read(const size_t offset, const size_t size, int8_t *buf)
Definition: FileInfo.cpp:64
FILE * f
unique file identifier (i.e., used for a file name)
Definition: FileInfo.h:55
void openExistingFile(std::vector< HeaderInfo > &headerVec, const int fileMgrEpoch)
Definition: FileInfo.cpp:69
size_t write(FILE *f, const size_t offset, const size_t size, int8_t *buf)
Writes the specified number of bytes to the offset position in file f from buf.
Definition: File.cpp:121
void close(FILE *f)
Closes the file pointed to by the FILE pointer.
Definition: File.cpp:102
size_t used()
Returns the amount of used bytes; size() - available()
Definition: FileInfo.h:114
void print(bool pagesummary)
Prints a summary of the file to stdout.
Definition: FileInfo.cpp:238
const std::pair< const int, const int > get_fileMgrKey() const
Definition: FileMgr.h:234
size_t numPages
the fixed size of each page in the file
Definition: FileInfo.h:57
#define VLOG(n)
Definition: Logger.h:280
size_t available()
Returns the number of free bytes available.
Definition: FileInfo.h:105