OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
File.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
23 #include "Shared/File.h"
24 
25 #include <algorithm>
26 #include <atomic>
27 #include <cerrno>
28 #include <chrono>
29 #include <cstdio>
30 #include <cstring>
31 #include <iostream>
32 #include <stdexcept>
33 #include <string>
34 
35 #include "Logger/Logger.h"
36 #include "OSDependent/heavyai_fs.h"
37 
38 #include <boost/filesystem.hpp>
39 
40 namespace File_Namespace {
41 
42 std::string get_data_file_path(const std::string& base_path,
43  int file_id,
44  size_t page_size) {
45  return base_path + "/" + std::to_string(file_id) + "." + std::to_string(page_size) +
46  std::string(DATA_FILE_EXT); // DATA_FILE_EXT has preceding "."
47 }
48 
49 std::string get_legacy_data_file_path(const std::string& new_data_file_path) {
50  auto legacy_path = boost::filesystem::canonical(new_data_file_path);
51  legacy_path.replace_extension(kLegacyDataFileExtension);
52  return legacy_path.string();
53 }
54 
55 std::pair<FILE*, std::string> create(const std::string& basePath,
56  const int fileId,
57  const size_t pageSize,
58  const size_t numPages) {
59  auto path = get_data_file_path(basePath, fileId, pageSize);
60  if (numPages < 1 || pageSize < 1) {
61  LOG(FATAL) << "Error trying to create file '" << path
62  << "', Number of pages and page size must be positive integers. numPages "
63  << numPages << " pageSize " << pageSize;
64  }
65  FILE* f = heavyai::fopen(path.c_str(), "w+b");
66  if (f == nullptr) {
67  LOG(FATAL) << "Error trying to create file '" << path
68  << "', the error was: " << std::strerror(errno);
69  }
70  fseek(f, static_cast<long>((pageSize * numPages) - 1), SEEK_SET);
71  fputc(EOF, f);
72  fseek(f, 0, SEEK_SET); // rewind
73  if (fileSize(f) != pageSize * numPages) {
74  LOG(FATAL) << "Error trying to create file '" << path << "', file size "
75  << fileSize(f) << " does not equal pageSize * numPages "
76  << pageSize * numPages;
77  }
78  boost::filesystem::create_symlink(boost::filesystem::canonical(path).filename(),
80  return {f, path};
81 }
82 
83 FILE* create(const std::string& full_path, const size_t requested_file_size) {
84  FILE* f = heavyai::fopen(full_path.c_str(), "w+b");
85  if (f == nullptr) {
86  LOG(FATAL) << "Error trying to create file '" << full_path
87  << "', the error was: " << std::strerror(errno);
88  }
89  fseek(f, static_cast<long>(requested_file_size - 1), SEEK_SET);
90  fputc(EOF, f);
91  fseek(f, 0, SEEK_SET); // rewind
92  if (fileSize(f) != requested_file_size) {
93  LOG(FATAL) << "Error trying to create file '" << full_path << "', file size "
94  << fileSize(f) << " does not equal requested_file_size "
95  << requested_file_size;
96  }
97  return f;
98 }
99 
100 FILE* open(int file_id) {
101  std::string s(std::to_string(file_id) + std::string(DATA_FILE_EXT));
102  return open(s);
103 }
104 
105 FILE* open(const std::string& path) {
106  FILE* f = heavyai::fopen(path.c_str(), "r+b");
107  if (f == nullptr) {
108  LOG(FATAL) << "Error trying to open file '" << path
109  << "', the errno was: " << std::strerror(errno);
110  }
111  return f;
112 }
113 
114 void close(FILE* f) {
115  CHECK(f);
116  CHECK_EQ(fflush(f), 0);
117  CHECK_EQ(fclose(f), 0);
118 }
119 
120 bool removeFile(const std::string& base_path, const std::string& filename) {
121  const std::string file_path = base_path + filename;
122  return remove(file_path.c_str()) == 0;
123 }
124 
125 size_t read(FILE* f,
126  const size_t offset,
127  const size_t size,
128  int8_t* buf,
129  const std::string& file_path) {
130  // read "size" bytes from the offset location in the file into the buffer
131  CHECK_EQ(fseek(f, static_cast<long>(offset), SEEK_SET), 0);
132  size_t bytesRead = fread(buf, sizeof(int8_t), size, f);
133  auto expected_bytes_read = sizeof(int8_t) * size;
134  CHECK_EQ(bytesRead, expected_bytes_read)
135  << "Unexpected number of bytes read from file: " << file_path
136  << ". Expected bytes read: " << expected_bytes_read
137  << ", actual bytes read: " << bytesRead << ", offset: " << offset
138  << ", file stream error set: " << (std::ferror(f) ? "true" : "false")
139  << ", EOF reached: " << (std::feof(f) ? "true" : "false");
140  return bytesRead;
141 }
142 
143 size_t write(FILE* f, const size_t offset, const size_t size, const int8_t* buf) {
144  // write size bytes from the buffer to the offset location in the file
145  if (fseek(f, static_cast<long>(offset), SEEK_SET) != 0) {
146  LOG(FATAL)
147  << "Error trying to write to file (during positioning seek) the error was: "
148  << std::strerror(errno);
149  }
150  size_t bytesWritten = fwrite(buf, sizeof(int8_t), size, f);
151  if (bytesWritten != sizeof(int8_t) * size) {
152  LOG(FATAL) << "Error trying to write to file (during fwrite) the error was: "
153  << std::strerror(errno);
154  }
155  return bytesWritten;
156 }
157 
158 size_t append(FILE* f, const size_t size, const int8_t* buf) {
159  return write(f, fileSize(f), size, buf);
160 }
161 
162 size_t readPage(FILE* f,
163  const size_t pageSize,
164  const size_t pageNum,
165  int8_t* buf,
166  const std::string& file_path) {
167  return read(f, pageNum * pageSize, pageSize, buf, file_path);
168 }
169 
170 size_t readPartialPage(FILE* f,
171  const size_t pageSize,
172  const size_t offset,
173  const size_t readSize,
174  const size_t pageNum,
175  int8_t* buf,
176  const std::string& file_path) {
177  return read(f, pageNum * pageSize + offset, readSize, buf, file_path);
178 }
179 
180 size_t writePage(FILE* f, const size_t pageSize, const size_t pageNum, int8_t* buf) {
181  return write(f, pageNum * pageSize, pageSize, buf);
182 }
183 
184 size_t writePartialPage(FILE* f,
185  const size_t pageSize,
186  const size_t offset,
187  const size_t writeSize,
188  const size_t pageNum,
189  int8_t* buf) {
190  return write(f, pageNum * pageSize + offset, writeSize, buf);
191 }
192 
193 size_t appendPage(FILE* f, const size_t pageSize, int8_t* buf) {
194  return write(f, fileSize(f), pageSize, buf);
195 }
196 
198 size_t fileSize(FILE* f) {
199  fseek(f, 0, SEEK_END);
200  size_t size = (size_t)ftell(f);
201  fseek(f, 0, SEEK_SET);
202  return size;
203 }
204 
205 // this is a helper function to rename existing directories
206 // allowing for an async process to actually remove the physical directries
207 // and subfolders and files later
208 // it is required due to the large amount of time it can take to delete
209 // physical files from large disks
210 void renameForDelete(const std::string directoryName) {
212  boost::filesystem::path directoryPath(directoryName);
213  using namespace std::chrono;
214  milliseconds ms = duration_cast<milliseconds>(system_clock::now().time_since_epoch());
215 
216  if (boost::filesystem::exists(directoryPath) &&
217  boost::filesystem::is_directory(directoryPath)) {
218  boost::filesystem::path newDirectoryPath(directoryName + "_" +
219  std::to_string(ms.count()) + "_DELETE_ME");
220  boost::filesystem::rename(directoryPath, newDirectoryPath, ec);
221 
222 #ifdef _WIN32
223  // On Windows we sometimes fail to rename a directory with System: 5 error
224  // code (access denied). An attempt to stop in debugger and look for opened
225  // handles for some of directory content shows no opened handles and actually
226  // allows renaming to execute successfully. It's not clear why, but a short
227  // pause allows to rename directory successfully. Until reasons are known,
228  // use this retry loop as a workaround.
229  int tries = 10;
230  while (ec.value() != boost::system::errc::success && tries) {
231  LOG(ERROR) << "Failed to rename directory " << directoryPath << " error was " << ec
232  << " (" << tries << " attempts left)";
233  std::this_thread::sleep_for(std::chrono::milliseconds(100 / tries));
234  tries--;
235  boost::filesystem::rename(directoryPath, newDirectoryPath, ec);
236  }
237 #endif
238 
239  if (ec.value() == boost::system::errc::success) {
240  std::thread th([newDirectoryPath]() {
242  boost::filesystem::remove_all(newDirectoryPath, ec);
243  // We dont check error on remove here as we cant log the
244  // issue fromdetached thrad, its not safe to LOG from here
245  // This is under investigation as clang detects TSAN issue data race
246  // the main system wide file_delete_thread will clean up any missed files
247  });
248  // let it run free so we can return
249  // if it fails the file_delete_thread in DBHandler will clean up
250  th.detach();
251 
252  return;
253  }
254 
255  LOG(FATAL) << "Failed to rename file " << directoryName << " to "
256  << directoryName + "_" + std::to_string(ms.count()) + "_DELETE_ME Error: "
257  << ec;
258  }
259 }
260 
261 } // namespace File_Namespace
262 
263 // Still temporary location but avoids the link errors in the new distributed branch.
264 // See the comment file_delete.h
265 
266 #include <atomic>
267 #include <boost/algorithm/string/predicate.hpp>
268 #include <boost/filesystem.hpp>
269 #include <chrono>
270 #include <thread>
271 
272 void file_delete(std::atomic<bool>& program_is_running,
273  const unsigned int wait_interval_seconds,
274  const std::string base_path) {
275  const auto wait_duration = std::chrono::seconds(wait_interval_seconds);
276  const boost::filesystem::path path(base_path);
277  while (program_is_running) {
278  using vec = std::vector<boost::filesystem::path>; // store paths,
279  vec v;
281 
282  // copy vector from iterator as was getting weird random errors if
283  // removed direct from iterator
284  copy(boost::filesystem::directory_iterator(path),
285  boost::filesystem::directory_iterator(),
286  back_inserter(v));
287  for (vec::const_iterator it(v.begin()); it != v.end(); ++it) {
288  std::string object_name(it->string());
289 
290  if (boost::algorithm::ends_with(object_name, "DELETE_ME")) {
291  LOG(INFO) << " removing object " << object_name;
292  boost::filesystem::remove_all(*it, ec);
293  if (ec.value() != boost::system::errc::success) {
294  LOG(ERROR) << "Failed to remove object " << object_name << " error was " << ec;
295  }
296  }
297  }
298 
299  std::this_thread::sleep_for(wait_duration);
300  }
301 }
size_t appendPage(FILE *f, const size_t pageSize, int8_t *buf)
Appends a page from buf to the file.
Definition: File.cpp:193
#define CHECK_EQ(x, y)
Definition: Logger.h:301
size_t append(FILE *f, const size_t size, const int8_t *buf)
Appends the specified number of bytes to the end of the file f from buf.
Definition: File.cpp:158
std::string get_legacy_data_file_path(const std::string &new_data_file_path)
Definition: File.cpp:49
size_t readPartialPage(FILE *f, const size_t pageSize, const size_t offset, const size_t readSize, const size_t pageNum, int8_t *buf, const std::string &file_path)
Definition: File.cpp:170
#define LOG(tag)
Definition: Logger.h:285
size_t writePartialPage(FILE *f, const size_t pageSize, const size_t offset, const size_t writeSize, const size_t pageNum, int8_t *buf)
Definition: File.cpp:184
#define DATA_FILE_EXT
Definition: File.h:25
size_t writePage(FILE *f, const size_t pageSize, const size_t pageNum, int8_t *buf)
Writes a page from buf to the file.
Definition: File.cpp:180
std::pair< FILE *, std::string > create(const std::string &basePath, const int fileId, const size_t pageSize, const size_t numPages)
Definition: File.cpp:55
std::string to_string(char const *&&v)
size_t write(FILE *f, const size_t offset, const size_t size, const int8_t *buf)
Writes the specified number of bytes to the offset position in file f from buf.
Definition: File.cpp:143
bool removeFile(const std::string &base_path, const std::string &filename)
Deletes the file pointed to by the FILE pointer.
Definition: File.cpp:120
DEVICE auto copy(ARGS &&...args)
Definition: gpu_enabled.h:51
::FILE * fopen(const char *filename, const char *mode)
Definition: heavyai_fs.cpp:74
size_t read(FILE *f, const size_t offset, const size_t size, int8_t *buf, const std::string &file_path)
Reads the specified number of bytes from the offset position in file f into buf.
Definition: File.cpp:125
size_t fileSize(FILE *f)
Returns the size of the specified file.
Definition: File.cpp:198
torch::Tensor f(torch::Tensor x, torch::Tensor W_target, torch::Tensor b_target)
FILE * open(int file_id)
Opens the file with the given id; fatal crash on error.
Definition: File.cpp:100
def error_code
Definition: report.py:244
#define CHECK(condition)
Definition: Logger.h:291
void close(FILE *f)
Closes the file pointed to by the FILE pointer.
Definition: File.cpp:114
void file_delete(std::atomic< bool > &program_is_running, const unsigned int wait_interval_seconds, const std::string base_path)
Definition: File.cpp:272
std::string get_data_file_path(const std::string &base_path, int file_id, size_t page_size)
Definition: File.cpp:42
void renameForDelete(const std::string directoryName)
Renames a directory to DELETE_ME_&lt;EPOCH&gt;_&lt;oldname&gt;.
Definition: File.cpp:210
constexpr auto kLegacyDataFileExtension
Definition: File.h:36
A selection of helper methods for File I/O.
size_t readPage(FILE *f, const size_t pageSize, const size_t pageNum, int8_t *buf, const std::string &file_path)
Reads the specified page from the file f into buf.
Definition: File.cpp:162