OmniSciDB  c1a53651b2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
File.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
23 #include "Shared/File.h"
24 
25 #include <algorithm>
26 #include <atomic>
27 #include <cerrno>
28 #include <chrono>
29 #include <cstdio>
30 #include <cstring>
31 #include <iostream>
32 #include <stdexcept>
33 #include <string>
34 
35 #include "Logger/Logger.h"
36 #include "OSDependent/heavyai_fs.h"
37 
38 #include <boost/filesystem.hpp>
39 
40 bool g_read_only{false};
41 
42 namespace File_Namespace {
43 
44 std::string get_data_file_path(const std::string& base_path,
45  int file_id,
46  size_t page_size) {
47  return base_path + "/" + std::to_string(file_id) + "." + std::to_string(page_size) +
48  std::string(DATA_FILE_EXT); // DATA_FILE_EXT has preceding "."
49 }
50 
51 std::string get_legacy_data_file_path(const std::string& new_data_file_path) {
52  auto legacy_path = boost::filesystem::canonical(new_data_file_path);
53  legacy_path.replace_extension(kLegacyDataFileExtension);
54  return legacy_path.string();
55 }
56 
57 std::pair<FILE*, std::string> create(const std::string& basePath,
58  const int fileId,
59  const size_t pageSize,
60  const size_t numPages) {
61  auto path = get_data_file_path(basePath, fileId, pageSize);
62  if (numPages < 1 || pageSize < 1) {
63  LOG(FATAL) << "Error trying to create file '" << path
64  << "', Number of pages and page size must be positive integers. numPages "
65  << numPages << " pageSize " << pageSize;
66  }
67  FILE* f = heavyai::fopen(path.c_str(), "w+b");
68  if (f == nullptr) {
69  LOG(FATAL) << "Error trying to create file '" << path
70  << "', the error was: " << std::strerror(errno);
71  }
72  fseek(f, static_cast<long>((pageSize * numPages) - 1), SEEK_SET);
73  fputc(EOF, f);
74  fseek(f, 0, SEEK_SET); // rewind
75  if (fileSize(f) != pageSize * numPages) {
76  LOG(FATAL) << "Error trying to create file '" << path << "', file size "
77  << fileSize(f) << " does not equal pageSize * numPages "
78  << pageSize * numPages;
79  }
80  boost::filesystem::create_symlink(boost::filesystem::canonical(path).filename(),
82  return {f, path};
83 }
84 
85 FILE* create(const std::string& fullPath, const size_t requestedFileSize) {
86  if (g_read_only) {
87  LOG(FATAL) << "Error trying to create file '" << fullPath
88  << "', not allowed read only ";
89  }
90  FILE* f = heavyai::fopen(fullPath.c_str(), "w+b");
91  if (f == nullptr) {
92  LOG(FATAL) << "Error trying to create file '" << fullPath
93  << "', the error was: " << std::strerror(errno);
94  ;
95  }
96  fseek(f, static_cast<long>(requestedFileSize - 1), SEEK_SET);
97  fputc(EOF, f);
98  fseek(f, 0, SEEK_SET); // rewind
99  if (fileSize(f) != requestedFileSize) {
100  LOG(FATAL) << "Error trying to create file '" << fullPath << "', file size "
101  << fileSize(f) << " does not equal requestedFileSize "
102  << requestedFileSize;
103  }
104  return f;
105 }
106 
107 FILE* open(int fileId) {
108  std::string s(std::to_string(fileId) + std::string(DATA_FILE_EXT));
109  FILE* f = heavyai::fopen(
110  s.c_str(), g_read_only ? "rb" : "r+b"); // opens existing file for updates
111  if (f == nullptr) {
112  LOG(FATAL) << "Error trying to open file '" << s
113  << "', the error was: " << std::strerror(errno);
114  }
115  return f;
116 }
117 
118 FILE* open(const std::string& path) {
119  FILE* f = heavyai::fopen(
120  path.c_str(), g_read_only ? "rb" : "r+b"); // opens existing file for updates
121  if (f == nullptr) {
122  LOG(FATAL) << "Error trying to open file '" << path
123  << "', the errno was: " << std::strerror(errno);
124  }
125  return f;
126 }
127 
128 void close(FILE* f) {
129  CHECK(f);
130  CHECK_EQ(fflush(f), 0);
131  CHECK_EQ(fclose(f), 0);
132 }
133 
134 bool removeFile(const std::string basePath, const std::string filename) {
135  if (g_read_only) {
136  LOG(FATAL) << "Error trying to remove file '" << filename << "', running readonly";
137  }
138  const std::string filePath = basePath + filename;
139  return remove(filePath.c_str()) == 0;
140 }
141 
142 size_t read(FILE* f,
143  const size_t offset,
144  const size_t size,
145  int8_t* buf,
146  const std::string& file_path) {
147  // read "size" bytes from the offset location in the file into the buffer
148  CHECK_EQ(fseek(f, static_cast<long>(offset), SEEK_SET), 0);
149  size_t bytesRead = fread(buf, sizeof(int8_t), size, f);
150  auto expected_bytes_read = sizeof(int8_t) * size;
151  CHECK_EQ(bytesRead, expected_bytes_read)
152  << "Unexpected number of bytes read from file: " << file_path
153  << ". Expected bytes read: " << expected_bytes_read
154  << ", actual bytes read: " << bytesRead << ", offset: " << offset
155  << ", file stream error set: " << (std::ferror(f) ? "true" : "false")
156  << ", EOF reached: " << (std::feof(f) ? "true" : "false");
157  return bytesRead;
158 }
159 
160 size_t write(FILE* f, const size_t offset, const size_t size, const int8_t* buf) {
161  if (g_read_only) {
162  LOG(FATAL) << "Error trying to write file '" << f << "', running readonly";
163  }
164  // write size bytes from the buffer to the offset location in the file
165  if (fseek(f, static_cast<long>(offset), SEEK_SET) != 0) {
166  LOG(FATAL)
167  << "Error trying to write to file (during positioning seek) the error was: "
168  << std::strerror(errno);
169  }
170  size_t bytesWritten = fwrite(buf, sizeof(int8_t), size, f);
171  if (bytesWritten != sizeof(int8_t) * size) {
172  LOG(FATAL) << "Error trying to write to file (during fwrite) the error was: "
173  << std::strerror(errno);
174  }
175  return bytesWritten;
176 }
177 
178 size_t append(FILE* f, const size_t size, const int8_t* buf) {
179  if (g_read_only) {
180  LOG(FATAL) << "Error trying to append file '" << f << "', running readonly";
181  }
182  return write(f, fileSize(f), size, buf);
183 }
184 
185 size_t readPage(FILE* f,
186  const size_t pageSize,
187  const size_t pageNum,
188  int8_t* buf,
189  const std::string& file_path) {
190  return read(f, pageNum * pageSize, pageSize, buf, file_path);
191 }
192 
193 size_t readPartialPage(FILE* f,
194  const size_t pageSize,
195  const size_t offset,
196  const size_t readSize,
197  const size_t pageNum,
198  int8_t* buf,
199  const std::string& file_path) {
200  return read(f, pageNum * pageSize + offset, readSize, buf, file_path);
201 }
202 
203 size_t writePage(FILE* f, const size_t pageSize, const size_t pageNum, int8_t* buf) {
204  if (g_read_only) {
205  LOG(FATAL) << "Error trying to writePage file '" << f << "', running readonly";
206  }
207  return write(f, pageNum * pageSize, pageSize, buf);
208 }
209 
210 size_t writePartialPage(FILE* f,
211  const size_t pageSize,
212  const size_t offset,
213  const size_t writeSize,
214  const size_t pageNum,
215  int8_t* buf) {
216  if (g_read_only) {
217  LOG(FATAL) << "Error trying to writePartialPage file '" << f << "', running readonly";
218  }
219  return write(f, pageNum * pageSize + offset, writeSize, buf);
220 }
221 
222 size_t appendPage(FILE* f, const size_t pageSize, int8_t* buf) {
223  if (g_read_only) {
224  LOG(FATAL) << "Error trying to appendPage file '" << f << "', running readonly";
225  }
226  return write(f, fileSize(f), pageSize, buf);
227 }
228 
230 size_t fileSize(FILE* f) {
231  fseek(f, 0, SEEK_END);
232  size_t size = (size_t)ftell(f);
233  fseek(f, 0, SEEK_SET);
234  return size;
235 }
236 
237 // this is a helper function to rename existing directories
238 // allowing for an async process to actually remove the physical directries
239 // and subfolders and files later
240 // it is required due to the large amount of time it can take to delete
241 // physical files from large disks
242 void renameForDelete(const std::string directoryName) {
243  boost::system::error_code ec;
244  boost::filesystem::path directoryPath(directoryName);
245  using namespace std::chrono;
246  milliseconds ms = duration_cast<milliseconds>(system_clock::now().time_since_epoch());
247 
248  if (boost::filesystem::exists(directoryPath) &&
249  boost::filesystem::is_directory(directoryPath)) {
250  boost::filesystem::path newDirectoryPath(directoryName + "_" +
251  std::to_string(ms.count()) + "_DELETE_ME");
252  boost::filesystem::rename(directoryPath, newDirectoryPath, ec);
253 
254 #ifdef _WIN32
255  // On Windows we sometimes fail to rename a directory with System: 5 error
256  // code (access denied). An attempt to stop in debugger and look for opened
257  // handles for some of directory content shows no opened handles and actually
258  // allows renaming to execute successfully. It's not clear why, but a short
259  // pause allows to rename directory successfully. Until reasons are known,
260  // use this retry loop as a workaround.
261  int tries = 10;
262  while (ec.value() != boost::system::errc::success && tries) {
263  LOG(ERROR) << "Failed to rename directory " << directoryPath << " error was " << ec
264  << " (" << tries << " attempts left)";
265  std::this_thread::sleep_for(std::chrono::milliseconds(100 / tries));
266  tries--;
267  boost::filesystem::rename(directoryPath, newDirectoryPath, ec);
268  }
269 #endif
270 
271  if (ec.value() == boost::system::errc::success) {
272  std::thread th([newDirectoryPath]() {
273  boost::system::error_code ec;
274  boost::filesystem::remove_all(newDirectoryPath, ec);
275  // We dont check error on remove here as we cant log the
276  // issue fromdetached thrad, its not safe to LOG from here
277  // This is under investigation as clang detects TSAN issue data race
278  // the main system wide file_delete_thread will clean up any missed files
279  });
280  // let it run free so we can return
281  // if it fails the file_delete_thread in DBHandler will clean up
282  th.detach();
283 
284  return;
285  }
286 
287  LOG(FATAL) << "Failed to rename file " << directoryName << " to "
288  << directoryName + "_" + std::to_string(ms.count()) + "_DELETE_ME Error: "
289  << ec;
290  }
291 }
292 
293 } // namespace File_Namespace
294 
295 // Still temporary location but avoids the link errors in the new distributed branch.
296 // See the comment file_delete.h
297 
298 #include <atomic>
299 #include <boost/algorithm/string/predicate.hpp>
300 #include <boost/filesystem.hpp>
301 #include <chrono>
302 #include <thread>
303 
304 void file_delete(std::atomic<bool>& program_is_running,
305  const unsigned int wait_interval_seconds,
306  const std::string base_path) {
307  const auto wait_duration = std::chrono::seconds(wait_interval_seconds);
308  const boost::filesystem::path path(base_path);
309  while (program_is_running) {
310  using vec = std::vector<boost::filesystem::path>; // store paths,
311  vec v;
312  boost::system::error_code ec;
313 
314  // copy vector from iterator as was getting weird random errors if
315  // removed direct from iterator
316  copy(boost::filesystem::directory_iterator(path),
317  boost::filesystem::directory_iterator(),
318  back_inserter(v));
319  for (vec::const_iterator it(v.begin()); it != v.end(); ++it) {
320  std::string object_name(it->string());
321 
322  if (boost::algorithm::ends_with(object_name, "DELETE_ME")) {
323  LOG(INFO) << " removing object " << object_name;
324  boost::filesystem::remove_all(*it, ec);
325  if (ec.value() != boost::system::errc::success) {
326  LOG(ERROR) << "Failed to remove object " << object_name << " error was " << ec;
327  }
328  }
329  }
330 
331  std::this_thread::sleep_for(wait_duration);
332  }
333 }
size_t appendPage(FILE *f, const size_t pageSize, int8_t *buf)
Appends a page from buf to the file.
Definition: File.cpp:222
#define CHECK_EQ(x, y)
Definition: Logger.h:301
size_t append(FILE *f, const size_t size, const int8_t *buf)
Appends the specified number of bytes to the end of the file f from buf.
Definition: File.cpp:178
std::string get_legacy_data_file_path(const std::string &new_data_file_path)
Definition: File.cpp:51
size_t readPartialPage(FILE *f, const size_t pageSize, const size_t offset, const size_t readSize, const size_t pageNum, int8_t *buf, const std::string &file_path)
Definition: File.cpp:193
#define LOG(tag)
Definition: Logger.h:285
size_t writePartialPage(FILE *f, const size_t pageSize, const size_t offset, const size_t writeSize, const size_t pageNum, int8_t *buf)
Definition: File.cpp:210
#define DATA_FILE_EXT
Definition: File.h:25
size_t writePage(FILE *f, const size_t pageSize, const size_t pageNum, int8_t *buf)
Writes a page from buf to the file.
Definition: File.cpp:203
std::pair< FILE *, std::string > create(const std::string &basePath, const int fileId, const size_t pageSize, const size_t numPages)
Definition: File.cpp:57
constexpr double f
Definition: Utm.h:31
std::string to_string(char const *&&v)
size_t write(FILE *f, const size_t offset, const size_t size, const int8_t *buf)
Writes the specified number of bytes to the offset position in file f from buf.
Definition: File.cpp:160
DEVICE auto copy(ARGS &&...args)
Definition: gpu_enabled.h:51
::FILE * fopen(const char *filename, const char *mode)
Definition: heavyai_fs.cpp:74
size_t read(FILE *f, const size_t offset, const size_t size, int8_t *buf, const std::string &file_path)
Reads the specified number of bytes from the offset position in file f into buf.
Definition: File.cpp:142
size_t fileSize(FILE *f)
Returns the size of the specified file.
Definition: File.cpp:230
bool g_read_only
Definition: File.cpp:40
FILE * open(int fileId)
Opens/creates the file with the given id; returns NULL on error.
Definition: File.cpp:107
#define CHECK(condition)
Definition: Logger.h:291
void close(FILE *f)
Closes the file pointed to by the FILE pointer.
Definition: File.cpp:128
bool removeFile(const std::string basePath, const std::string filename)
Deletes the file pointed to by the FILE pointer.
Definition: File.cpp:134
void file_delete(std::atomic< bool > &program_is_running, const unsigned int wait_interval_seconds, const std::string base_path)
Definition: File.cpp:304
std::string get_data_file_path(const std::string &base_path, int file_id, size_t page_size)
Definition: File.cpp:44
void renameForDelete(const std::string directoryName)
Renames a directory to DELETE_ME_&lt;EPOCH&gt;_&lt;oldname&gt;.
Definition: File.cpp:242
constexpr auto kLegacyDataFileExtension
Definition: File.h:36
A selection of helper methods for File I/O.
size_t readPage(FILE *f, const size_t pageSize, const size_t pageNum, int8_t *buf, const std::string &file_path)
Reads the specified page from the file f into buf.
Definition: File.cpp:185