OmniSciDB  fe05a0c208
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
File.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
23 #include "Shared/File.h"
24 
25 #include <algorithm>
26 #include <atomic>
27 #include <cerrno>
28 #include <chrono>
29 #include <cstdio>
30 #include <cstring>
31 #include <iostream>
32 #include <stdexcept>
33 #include <string>
34 
35 #include "Logger/Logger.h"
36 #include "OSDependent/omnisci_fs.h"
37 
38 bool g_read_only{false};
39 
40 namespace File_Namespace {
41 
42 std::string get_data_file_path(const std::string& base_path,
43  int file_id,
44  size_t page_size) {
45  return base_path + "/" + std::to_string(file_id) + "." + std::to_string(page_size) +
46  std::string(MAPD_FILE_EXT); // MAPD_FILE_EXT has preceding "."
47 }
48 
49 FILE* create(const std::string& basePath,
50  const int fileId,
51  const size_t pageSize,
52  const size_t numPages) {
53  auto path = get_data_file_path(basePath, fileId, pageSize);
54  if (numPages < 1 || pageSize < 1) {
55  LOG(FATAL) << "Error trying to create file '" << path
56  << "', Number of pages and page size must be positive integers. numPages "
57  << numPages << " pageSize " << pageSize;
58  }
59  FILE* f = omnisci::fopen(path.c_str(), "w+b");
60  if (f == nullptr) {
61  LOG(FATAL) << "Error trying to create file '" << path
62  << "', the error was: " << std::strerror(errno);
63  }
64  fseek(f, static_cast<long>((pageSize * numPages) - 1), SEEK_SET);
65  fputc(EOF, f);
66  fseek(f, 0, SEEK_SET); // rewind
67  if (fileSize(f) != pageSize * numPages) {
68  LOG(FATAL) << "Error trying to create file '" << path << "', file size "
69  << fileSize(f) << " does not equal pageSize * numPages "
70  << pageSize * numPages;
71  }
72 
73  return f;
74 }
75 
76 FILE* create(const std::string& fullPath, const size_t requestedFileSize) {
77  if (g_read_only) {
78  LOG(FATAL) << "Error trying to create file '" << fullPath
79  << "', not allowed read only ";
80  }
81  FILE* f = omnisci::fopen(fullPath.c_str(), "w+b");
82  if (f == nullptr) {
83  LOG(FATAL) << "Error trying to create file '" << fullPath
84  << "', the error was: " << std::strerror(errno);
85  ;
86  }
87  fseek(f, static_cast<long>(requestedFileSize - 1), SEEK_SET);
88  fputc(EOF, f);
89  fseek(f, 0, SEEK_SET); // rewind
90  if (fileSize(f) != requestedFileSize) {
91  LOG(FATAL) << "Error trying to create file '" << fullPath << "', file size "
92  << fileSize(f) << " does not equal requestedFileSize "
93  << requestedFileSize;
94  }
95  return f;
96 }
97 
98 FILE* open(int fileId) {
99  std::string s(std::to_string(fileId) + std::string(MAPD_FILE_EXT));
100  FILE* f = omnisci::fopen(
101  s.c_str(), g_read_only ? "rb" : "r+b"); // opens existing file for updates
102  if (f == nullptr) {
103  LOG(FATAL) << "Error trying to open file '" << s
104  << "', the error was: " << std::strerror(errno);
105  }
106  return f;
107 }
108 
109 FILE* open(const std::string& path) {
110  FILE* f = omnisci::fopen(
111  path.c_str(), g_read_only ? "rb" : "r+b"); // opens existing file for updates
112  if (f == nullptr) {
113  LOG(FATAL) << "Error trying to open file '" << path
114  << "', the errno was: " << std::strerror(errno);
115  }
116  return f;
117 }
118 
119 void close(FILE* f) {
120  CHECK(f);
121  CHECK_EQ(fflush(f), 0);
122  CHECK_EQ(fclose(f), 0);
123 }
124 
125 bool removeFile(const std::string basePath, const std::string filename) {
126  if (g_read_only) {
127  LOG(FATAL) << "Error trying to remove file '" << filename << "', running readonly";
128  }
129  const std::string filePath = basePath + filename;
130  return remove(filePath.c_str()) == 0;
131 }
132 
133 size_t read(FILE* f, const size_t offset, const size_t size, int8_t* buf) {
134  // read "size" bytes from the offset location in the file into the buffer
135  CHECK_EQ(fseek(f, static_cast<long>(offset), SEEK_SET), 0);
136  size_t bytesRead = fread(buf, sizeof(int8_t), size, f);
137  CHECK_EQ(bytesRead, sizeof(int8_t) * size);
138  return bytesRead;
139 }
140 
141 size_t write(FILE* f, const size_t offset, const size_t size, const int8_t* buf) {
142  if (g_read_only) {
143  LOG(FATAL) << "Error trying to write file '" << f << "', running readonly";
144  }
145  // write size bytes from the buffer to the offset location in the file
146  if (fseek(f, static_cast<long>(offset), SEEK_SET) != 0) {
147  LOG(FATAL)
148  << "Error trying to write to file (during positioning seek) the error was: "
149  << std::strerror(errno);
150  }
151  size_t bytesWritten = fwrite(buf, sizeof(int8_t), size, f);
152  if (bytesWritten != sizeof(int8_t) * size) {
153  LOG(FATAL) << "Error trying to write to file (during fwrite) the error was: "
154  << std::strerror(errno);
155  }
156  return bytesWritten;
157 }
158 
159 size_t append(FILE* f, const size_t size, const int8_t* buf) {
160  if (g_read_only) {
161  LOG(FATAL) << "Error trying to append file '" << f << "', running readonly";
162  }
163  return write(f, fileSize(f), size, buf);
164 }
165 
166 size_t readPage(FILE* f, const size_t pageSize, const size_t pageNum, int8_t* buf) {
167  return read(f, pageNum * pageSize, pageSize, buf);
168 }
169 
170 size_t readPartialPage(FILE* f,
171  const size_t pageSize,
172  const size_t offset,
173  const size_t readSize,
174  const size_t pageNum,
175  int8_t* buf) {
176  return read(f, pageNum * pageSize + offset, readSize, buf);
177 }
178 
179 size_t writePage(FILE* f, const size_t pageSize, const size_t pageNum, int8_t* buf) {
180  if (g_read_only) {
181  LOG(FATAL) << "Error trying to writePage file '" << f << "', running readonly";
182  }
183  return write(f, pageNum * pageSize, pageSize, buf);
184 }
185 
186 size_t writePartialPage(FILE* f,
187  const size_t pageSize,
188  const size_t offset,
189  const size_t writeSize,
190  const size_t pageNum,
191  int8_t* buf) {
192  if (g_read_only) {
193  LOG(FATAL) << "Error trying to writePartialPage file '" << f << "', running readonly";
194  }
195  return write(f, pageNum * pageSize + offset, writeSize, buf);
196 }
197 
198 size_t appendPage(FILE* f, const size_t pageSize, int8_t* buf) {
199  if (g_read_only) {
200  LOG(FATAL) << "Error trying to appendPage file '" << f << "', running readonly";
201  }
202  return write(f, fileSize(f), pageSize, buf);
203 }
204 
206 size_t fileSize(FILE* f) {
207  fseek(f, 0, SEEK_END);
208  size_t size = (size_t)ftell(f);
209  fseek(f, 0, SEEK_SET);
210  return size;
211 }
212 
213 // this is a helper function to rename existing directories
214 // allowing for an async process to actually remove the physical directries
215 // and subfolders and files later
216 // it is required due to the large amount of time it can take to delete
217 // physical files from large disks
218 void renameForDelete(const std::string directoryName) {
219  boost::system::error_code ec;
220  boost::filesystem::path directoryPath(directoryName);
221  using namespace std::chrono;
222  milliseconds ms = duration_cast<milliseconds>(system_clock::now().time_since_epoch());
223 
224  if (boost::filesystem::exists(directoryPath) &&
225  boost::filesystem::is_directory(directoryPath)) {
226  boost::filesystem::path newDirectoryPath(directoryName + "_" +
227  std::to_string(ms.count()) + "_DELETE_ME");
228  boost::filesystem::rename(directoryPath, newDirectoryPath, ec);
229 
230 #ifdef _WIN32
231  // On Windows we sometimes fail to rename a directory with System: 5 error
232  // code (access denied). An attempt to stop in debugger and look for opened
233  // handles for some of directory content shows no opened handles and actually
234  // allows renaming to execute successfully. It's not clear why, but a short
235  // pause allows to rename directory successfully. Until reasons are known,
236  // use this retry loop as a workaround.
237  int tries = 10;
238  while (ec.value() != boost::system::errc::success && tries) {
239  LOG(ERROR) << "Failed to rename directory " << directoryPath << " error was " << ec
240  << " (" << tries << " attempts left)";
241  std::this_thread::sleep_for(std::chrono::milliseconds(100 / tries));
242  tries--;
243  boost::filesystem::rename(directoryPath, newDirectoryPath, ec);
244  }
245 #endif
246 
247  if (ec.value() == boost::system::errc::success) {
248  std::thread th([newDirectoryPath]() {
249  boost::system::error_code ec;
250  boost::filesystem::remove_all(newDirectoryPath, ec);
251  // We dont check error on remove here as we cant log the
252  // issue fromdetached thrad, its not safe to LOG from here
253  // This is under investigation as clang detects TSAN issue data race
254  // the main system wide file_delete_thread will clean up any missed files
255  });
256  // let it run free so we can return
257  // if it fails the file_delete_thread in DBHandler will clean up
258  th.detach();
259 
260  return;
261  }
262 
263  LOG(FATAL) << "Failed to rename file " << directoryName << " to "
264  << directoryName + "_" + std::to_string(ms.count()) + "_DELETE_ME Error: "
265  << ec;
266  }
267 }
268 
269 } // namespace File_Namespace
270 
271 // Still temporary location but avoids the link errors in the new distributed branch.
272 // See the comment file_delete.h
273 
274 #include <atomic>
275 #include <boost/algorithm/string/predicate.hpp>
276 #include <boost/filesystem.hpp>
277 #include <chrono>
278 #include <thread>
279 
280 void file_delete(std::atomic<bool>& program_is_running,
281  const unsigned int wait_interval_seconds,
282  const std::string base_path) {
283  const auto wait_duration = std::chrono::seconds(wait_interval_seconds);
284  const boost::filesystem::path path(base_path);
285  while (program_is_running) {
286  using vec = std::vector<boost::filesystem::path>; // store paths,
287  vec v;
288  boost::system::error_code ec;
289 
290  // copy vector from iterator as was getting weird random errors if
291  // removed direct from iterator
292  copy(boost::filesystem::directory_iterator(path),
293  boost::filesystem::directory_iterator(),
294  back_inserter(v));
295  for (vec::const_iterator it(v.begin()); it != v.end(); ++it) {
296  std::string object_name(it->string());
297 
298  if (boost::algorithm::ends_with(object_name, "DELETE_ME")) {
299  LOG(INFO) << " removing object " << object_name;
300  boost::filesystem::remove_all(*it, ec);
301  if (ec.value() != boost::system::errc::success) {
302  LOG(ERROR) << "Failed to remove object " << object_name << " error was " << ec;
303  }
304  }
305  }
306 
307  std::this_thread::sleep_for(wait_duration);
308  }
309 }
size_t appendPage(FILE *f, const size_t pageSize, int8_t *buf)
Appends a page from buf to the file.
Definition: File.cpp:198
#define CHECK_EQ(x, y)
Definition: Logger.h:211
std::string filename(char const *path)
Definition: Logger.cpp:62
size_t append(FILE *f, const size_t size, const int8_t *buf)
Appends the specified number of bytes to the end of the file f from buf.
Definition: File.cpp:159
::FILE * fopen(const char *filename, const char *mode)
Definition: omnisci_fs.cpp:72
#define LOG(tag)
Definition: Logger.h:194
size_t writePartialPage(FILE *f, const size_t pageSize, const size_t offset, const size_t writeSize, const size_t pageNum, int8_t *buf)
Definition: File.cpp:186
#define MAPD_FILE_EXT
Definition: File.h:25
size_t writePage(FILE *f, const size_t pageSize, const size_t pageNum, int8_t *buf)
Writes a page from buf to the file.
Definition: File.cpp:179
std::string to_string(char const *&&v)
FILE * create(const std::string &basePath, const int fileId, const size_t pageSize, const size_t numPages)
Definition: File.cpp:49
size_t write(FILE *f, const size_t offset, const size_t size, const int8_t *buf)
Writes the specified number of bytes to the offset position in file f from buf.
Definition: File.cpp:141
size_t read(FILE *f, const size_t offset, const size_t size, int8_t *buf)
Reads the specified number of bytes from the offset position in file f into buf.
Definition: File.cpp:133
DEVICE auto copy(ARGS &&...args)
Definition: gpu_enabled.h:51
size_t readPage(FILE *f, const size_t pageSize, const size_t pageNum, int8_t *buf)
Reads the specified page from the file f into buf.
Definition: File.cpp:166
size_t fileSize(FILE *f)
Returns the size of the specified file.
Definition: File.cpp:206
bool g_read_only
Definition: File.cpp:38
size_t readPartialPage(FILE *f, const size_t pageSize, const size_t offset, const size_t readSize, const size_t pageNum, int8_t *buf)
Definition: File.cpp:170
FILE * open(int fileId)
Opens/creates the file with the given id; returns NULL on error.
Definition: File.cpp:98
#define CHECK(condition)
Definition: Logger.h:203
void close(FILE *f)
Closes the file pointed to by the FILE pointer.
Definition: File.cpp:119
char * f
bool removeFile(const std::string basePath, const std::string filename)
Deletes the file pointed to by the FILE pointer.
Definition: File.cpp:125
void file_delete(std::atomic< bool > &program_is_running, const unsigned int wait_interval_seconds, const std::string base_path)
Definition: File.cpp:280
std::string get_data_file_path(const std::string &base_path, int file_id, size_t page_size)
Definition: File.cpp:42
void renameForDelete(const std::string directoryName)
Renames a directory to DELETE_ME_&lt;EPOCH&gt;_&lt;oldname&gt;.
Definition: File.cpp:218
A selection of helper methods for File I/O.