OmniSciDB  ca0c39ec8f
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
File.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
23 #include "Shared/File.h"
24 
25 #include <algorithm>
26 #include <atomic>
27 #include <cerrno>
28 #include <chrono>
29 #include <cstdio>
30 #include <cstring>
31 #include <iostream>
32 #include <stdexcept>
33 #include <string>
34 
35 #include "Logger/Logger.h"
36 #include "OSDependent/heavyai_fs.h"
37 
38 #include <boost/filesystem.hpp>
39 
40 bool g_read_only{false};
41 
42 namespace File_Namespace {
43 
44 std::string get_data_file_path(const std::string& base_path,
45  int file_id,
46  size_t page_size) {
47  return base_path + "/" + std::to_string(file_id) + "." + std::to_string(page_size) +
48  std::string(DATA_FILE_EXT); // DATA_FILE_EXT has preceding "."
49 }
50 
51 std::string get_legacy_data_file_path(const std::string& new_data_file_path) {
52  auto legacy_path = boost::filesystem::canonical(new_data_file_path);
53  legacy_path.replace_extension(kLegacyDataFileExtension);
54  return legacy_path.string();
55 }
56 
57 FILE* create(const std::string& basePath,
58  const int fileId,
59  const size_t pageSize,
60  const size_t numPages) {
61  auto path = get_data_file_path(basePath, fileId, pageSize);
62  if (numPages < 1 || pageSize < 1) {
63  LOG(FATAL) << "Error trying to create file '" << path
64  << "', Number of pages and page size must be positive integers. numPages "
65  << numPages << " pageSize " << pageSize;
66  }
67  FILE* f = heavyai::fopen(path.c_str(), "w+b");
68  if (f == nullptr) {
69  LOG(FATAL) << "Error trying to create file '" << path
70  << "', the error was: " << std::strerror(errno);
71  }
72  fseek(f, static_cast<long>((pageSize * numPages) - 1), SEEK_SET);
73  fputc(EOF, f);
74  fseek(f, 0, SEEK_SET); // rewind
75  if (fileSize(f) != pageSize * numPages) {
76  LOG(FATAL) << "Error trying to create file '" << path << "', file size "
77  << fileSize(f) << " does not equal pageSize * numPages "
78  << pageSize * numPages;
79  }
80  boost::filesystem::create_symlink(boost::filesystem::canonical(path).filename(),
82  return f;
83 }
84 
85 FILE* create(const std::string& fullPath, const size_t requestedFileSize) {
86  if (g_read_only) {
87  LOG(FATAL) << "Error trying to create file '" << fullPath
88  << "', not allowed read only ";
89  }
90  FILE* f = heavyai::fopen(fullPath.c_str(), "w+b");
91  if (f == nullptr) {
92  LOG(FATAL) << "Error trying to create file '" << fullPath
93  << "', the error was: " << std::strerror(errno);
94  ;
95  }
96  fseek(f, static_cast<long>(requestedFileSize - 1), SEEK_SET);
97  fputc(EOF, f);
98  fseek(f, 0, SEEK_SET); // rewind
99  if (fileSize(f) != requestedFileSize) {
100  LOG(FATAL) << "Error trying to create file '" << fullPath << "', file size "
101  << fileSize(f) << " does not equal requestedFileSize "
102  << requestedFileSize;
103  }
104  return f;
105 }
106 
107 FILE* open(int fileId) {
108  std::string s(std::to_string(fileId) + std::string(DATA_FILE_EXT));
109  FILE* f = heavyai::fopen(
110  s.c_str(), g_read_only ? "rb" : "r+b"); // opens existing file for updates
111  if (f == nullptr) {
112  LOG(FATAL) << "Error trying to open file '" << s
113  << "', the error was: " << std::strerror(errno);
114  }
115  return f;
116 }
117 
118 FILE* open(const std::string& path) {
119  FILE* f = heavyai::fopen(
120  path.c_str(), g_read_only ? "rb" : "r+b"); // opens existing file for updates
121  if (f == nullptr) {
122  LOG(FATAL) << "Error trying to open file '" << path
123  << "', the errno was: " << std::strerror(errno);
124  }
125  return f;
126 }
127 
128 void close(FILE* f) {
129  CHECK(f);
130  CHECK_EQ(fflush(f), 0);
131  CHECK_EQ(fclose(f), 0);
132 }
133 
134 bool removeFile(const std::string basePath, const std::string filename) {
135  if (g_read_only) {
136  LOG(FATAL) << "Error trying to remove file '" << filename << "', running readonly";
137  }
138  const std::string filePath = basePath + filename;
139  return remove(filePath.c_str()) == 0;
140 }
141 
142 size_t read(FILE* f, const size_t offset, const size_t size, int8_t* buf) {
143  // read "size" bytes from the offset location in the file into the buffer
144  CHECK_EQ(fseek(f, static_cast<long>(offset), SEEK_SET), 0);
145  size_t bytesRead = fread(buf, sizeof(int8_t), size, f);
146  CHECK_EQ(bytesRead, sizeof(int8_t) * size);
147  return bytesRead;
148 }
149 
150 size_t write(FILE* f, const size_t offset, const size_t size, const int8_t* buf) {
151  if (g_read_only) {
152  LOG(FATAL) << "Error trying to write file '" << f << "', running readonly";
153  }
154  // write size bytes from the buffer to the offset location in the file
155  if (fseek(f, static_cast<long>(offset), SEEK_SET) != 0) {
156  LOG(FATAL)
157  << "Error trying to write to file (during positioning seek) the error was: "
158  << std::strerror(errno);
159  }
160  size_t bytesWritten = fwrite(buf, sizeof(int8_t), size, f);
161  if (bytesWritten != sizeof(int8_t) * size) {
162  LOG(FATAL) << "Error trying to write to file (during fwrite) the error was: "
163  << std::strerror(errno);
164  }
165  return bytesWritten;
166 }
167 
168 size_t append(FILE* f, const size_t size, const int8_t* buf) {
169  if (g_read_only) {
170  LOG(FATAL) << "Error trying to append file '" << f << "', running readonly";
171  }
172  return write(f, fileSize(f), size, buf);
173 }
174 
175 size_t readPage(FILE* f, const size_t pageSize, const size_t pageNum, int8_t* buf) {
176  return read(f, pageNum * pageSize, pageSize, buf);
177 }
178 
179 size_t readPartialPage(FILE* f,
180  const size_t pageSize,
181  const size_t offset,
182  const size_t readSize,
183  const size_t pageNum,
184  int8_t* buf) {
185  return read(f, pageNum * pageSize + offset, readSize, buf);
186 }
187 
188 size_t writePage(FILE* f, const size_t pageSize, const size_t pageNum, int8_t* buf) {
189  if (g_read_only) {
190  LOG(FATAL) << "Error trying to writePage file '" << f << "', running readonly";
191  }
192  return write(f, pageNum * pageSize, pageSize, buf);
193 }
194 
195 size_t writePartialPage(FILE* f,
196  const size_t pageSize,
197  const size_t offset,
198  const size_t writeSize,
199  const size_t pageNum,
200  int8_t* buf) {
201  if (g_read_only) {
202  LOG(FATAL) << "Error trying to writePartialPage file '" << f << "', running readonly";
203  }
204  return write(f, pageNum * pageSize + offset, writeSize, buf);
205 }
206 
207 size_t appendPage(FILE* f, const size_t pageSize, int8_t* buf) {
208  if (g_read_only) {
209  LOG(FATAL) << "Error trying to appendPage file '" << f << "', running readonly";
210  }
211  return write(f, fileSize(f), pageSize, buf);
212 }
213 
215 size_t fileSize(FILE* f) {
216  fseek(f, 0, SEEK_END);
217  size_t size = (size_t)ftell(f);
218  fseek(f, 0, SEEK_SET);
219  return size;
220 }
221 
222 // this is a helper function to rename existing directories
223 // allowing for an async process to actually remove the physical directries
224 // and subfolders and files later
225 // it is required due to the large amount of time it can take to delete
226 // physical files from large disks
227 void renameForDelete(const std::string directoryName) {
228  boost::system::error_code ec;
229  boost::filesystem::path directoryPath(directoryName);
230  using namespace std::chrono;
231  milliseconds ms = duration_cast<milliseconds>(system_clock::now().time_since_epoch());
232 
233  if (boost::filesystem::exists(directoryPath) &&
234  boost::filesystem::is_directory(directoryPath)) {
235  boost::filesystem::path newDirectoryPath(directoryName + "_" +
236  std::to_string(ms.count()) + "_DELETE_ME");
237  boost::filesystem::rename(directoryPath, newDirectoryPath, ec);
238 
239 #ifdef _WIN32
240  // On Windows we sometimes fail to rename a directory with System: 5 error
241  // code (access denied). An attempt to stop in debugger and look for opened
242  // handles for some of directory content shows no opened handles and actually
243  // allows renaming to execute successfully. It's not clear why, but a short
244  // pause allows to rename directory successfully. Until reasons are known,
245  // use this retry loop as a workaround.
246  int tries = 10;
247  while (ec.value() != boost::system::errc::success && tries) {
248  LOG(ERROR) << "Failed to rename directory " << directoryPath << " error was " << ec
249  << " (" << tries << " attempts left)";
250  std::this_thread::sleep_for(std::chrono::milliseconds(100 / tries));
251  tries--;
252  boost::filesystem::rename(directoryPath, newDirectoryPath, ec);
253  }
254 #endif
255 
256  if (ec.value() == boost::system::errc::success) {
257  std::thread th([newDirectoryPath]() {
258  boost::system::error_code ec;
259  boost::filesystem::remove_all(newDirectoryPath, ec);
260  // We dont check error on remove here as we cant log the
261  // issue fromdetached thrad, its not safe to LOG from here
262  // This is under investigation as clang detects TSAN issue data race
263  // the main system wide file_delete_thread will clean up any missed files
264  });
265  // let it run free so we can return
266  // if it fails the file_delete_thread in DBHandler will clean up
267  th.detach();
268 
269  return;
270  }
271 
272  LOG(FATAL) << "Failed to rename file " << directoryName << " to "
273  << directoryName + "_" + std::to_string(ms.count()) + "_DELETE_ME Error: "
274  << ec;
275  }
276 }
277 
278 } // namespace File_Namespace
279 
280 // Still temporary location but avoids the link errors in the new distributed branch.
281 // See the comment file_delete.h
282 
283 #include <atomic>
284 #include <boost/algorithm/string/predicate.hpp>
285 #include <boost/filesystem.hpp>
286 #include <chrono>
287 #include <thread>
288 
289 void file_delete(std::atomic<bool>& program_is_running,
290  const unsigned int wait_interval_seconds,
291  const std::string base_path) {
292  const auto wait_duration = std::chrono::seconds(wait_interval_seconds);
293  const boost::filesystem::path path(base_path);
294  while (program_is_running) {
295  using vec = std::vector<boost::filesystem::path>; // store paths,
296  vec v;
297  boost::system::error_code ec;
298 
299  // copy vector from iterator as was getting weird random errors if
300  // removed direct from iterator
301  copy(boost::filesystem::directory_iterator(path),
302  boost::filesystem::directory_iterator(),
303  back_inserter(v));
304  for (vec::const_iterator it(v.begin()); it != v.end(); ++it) {
305  std::string object_name(it->string());
306 
307  if (boost::algorithm::ends_with(object_name, "DELETE_ME")) {
308  LOG(INFO) << " removing object " << object_name;
309  boost::filesystem::remove_all(*it, ec);
310  if (ec.value() != boost::system::errc::success) {
311  LOG(ERROR) << "Failed to remove object " << object_name << " error was " << ec;
312  }
313  }
314  }
315 
316  std::this_thread::sleep_for(wait_duration);
317  }
318 }
size_t appendPage(FILE *f, const size_t pageSize, int8_t *buf)
Appends a page from buf to the file.
Definition: File.cpp:207
#define CHECK_EQ(x, y)
Definition: Logger.h:230
size_t append(FILE *f, const size_t size, const int8_t *buf)
Appends the specified number of bytes to the end of the file f from buf.
Definition: File.cpp:168
std::string get_legacy_data_file_path(const std::string &new_data_file_path)
Definition: File.cpp:51
#define LOG(tag)
Definition: Logger.h:216
size_t writePartialPage(FILE *f, const size_t pageSize, const size_t offset, const size_t writeSize, const size_t pageNum, int8_t *buf)
Definition: File.cpp:195
#define DATA_FILE_EXT
Definition: File.h:25
size_t writePage(FILE *f, const size_t pageSize, const size_t pageNum, int8_t *buf)
Writes a page from buf to the file.
Definition: File.cpp:188
constexpr double f
Definition: Utm.h:31
std::string to_string(char const *&&v)
FILE * create(const std::string &basePath, const int fileId, const size_t pageSize, const size_t numPages)
Definition: File.cpp:57
size_t write(FILE *f, const size_t offset, const size_t size, const int8_t *buf)
Writes the specified number of bytes to the offset position in file f from buf.
Definition: File.cpp:150
size_t read(FILE *f, const size_t offset, const size_t size, int8_t *buf)
Reads the specified number of bytes from the offset position in file f into buf.
Definition: File.cpp:142
DEVICE auto copy(ARGS &&...args)
Definition: gpu_enabled.h:51
::FILE * fopen(const char *filename, const char *mode)
Definition: heavyai_fs.cpp:74
size_t readPage(FILE *f, const size_t pageSize, const size_t pageNum, int8_t *buf)
Reads the specified page from the file f into buf.
Definition: File.cpp:175
size_t fileSize(FILE *f)
Returns the size of the specified file.
Definition: File.cpp:215
bool g_read_only
Definition: File.cpp:40
size_t readPartialPage(FILE *f, const size_t pageSize, const size_t offset, const size_t readSize, const size_t pageNum, int8_t *buf)
Definition: File.cpp:179
FILE * open(int fileId)
Opens/creates the file with the given id; returns NULL on error.
Definition: File.cpp:107
#define CHECK(condition)
Definition: Logger.h:222
void close(FILE *f)
Closes the file pointed to by the FILE pointer.
Definition: File.cpp:128
bool removeFile(const std::string basePath, const std::string filename)
Deletes the file pointed to by the FILE pointer.
Definition: File.cpp:134
void file_delete(std::atomic< bool > &program_is_running, const unsigned int wait_interval_seconds, const std::string base_path)
Definition: File.cpp:289
std::string get_data_file_path(const std::string &base_path, int file_id, size_t page_size)
Definition: File.cpp:44
void renameForDelete(const std::string directoryName)
Renames a directory to DELETE_ME_&lt;EPOCH&gt;_&lt;oldname&gt;.
Definition: File.cpp:227
constexpr auto kLegacyDataFileExtension
Definition: File.h:36
A selection of helper methods for File I/O.