OmniSciDB  d2f719934e
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
File.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
23 #include "Shared/File.h"
24 
25 #include <algorithm>
26 #include <atomic>
27 #include <cerrno>
28 #include <chrono>
29 #include <cstdio>
30 #include <cstring>
31 #include <iostream>
32 #include <stdexcept>
33 #include <string>
34 
35 #include "Logger/Logger.h"
36 #include "OSDependent/omnisci_fs.h"
37 
38 #include <boost/filesystem.hpp>
39 
40 bool g_read_only{false};
41 
42 namespace File_Namespace {
43 
44 std::string get_data_file_path(const std::string& base_path,
45  int file_id,
46  size_t page_size) {
47  return base_path + "/" + std::to_string(file_id) + "." + std::to_string(page_size) +
48  std::string(MAPD_FILE_EXT); // MAPD_FILE_EXT has preceding "."
49 }
50 
51 FILE* create(const std::string& basePath,
52  const int fileId,
53  const size_t pageSize,
54  const size_t numPages) {
55  auto path = get_data_file_path(basePath, fileId, pageSize);
56  if (numPages < 1 || pageSize < 1) {
57  LOG(FATAL) << "Error trying to create file '" << path
58  << "', Number of pages and page size must be positive integers. numPages "
59  << numPages << " pageSize " << pageSize;
60  }
61  FILE* f = omnisci::fopen(path.c_str(), "w+b");
62  if (f == nullptr) {
63  LOG(FATAL) << "Error trying to create file '" << path
64  << "', the error was: " << std::strerror(errno);
65  }
66  fseek(f, static_cast<long>((pageSize * numPages) - 1), SEEK_SET);
67  fputc(EOF, f);
68  fseek(f, 0, SEEK_SET); // rewind
69  if (fileSize(f) != pageSize * numPages) {
70  LOG(FATAL) << "Error trying to create file '" << path << "', file size "
71  << fileSize(f) << " does not equal pageSize * numPages "
72  << pageSize * numPages;
73  }
74 
75  return f;
76 }
77 
78 FILE* create(const std::string& fullPath, const size_t requestedFileSize) {
79  if (g_read_only) {
80  LOG(FATAL) << "Error trying to create file '" << fullPath
81  << "', not allowed read only ";
82  }
83  FILE* f = omnisci::fopen(fullPath.c_str(), "w+b");
84  if (f == nullptr) {
85  LOG(FATAL) << "Error trying to create file '" << fullPath
86  << "', the error was: " << std::strerror(errno);
87  ;
88  }
89  fseek(f, static_cast<long>(requestedFileSize - 1), SEEK_SET);
90  fputc(EOF, f);
91  fseek(f, 0, SEEK_SET); // rewind
92  if (fileSize(f) != requestedFileSize) {
93  LOG(FATAL) << "Error trying to create file '" << fullPath << "', file size "
94  << fileSize(f) << " does not equal requestedFileSize "
95  << requestedFileSize;
96  }
97  return f;
98 }
99 
100 FILE* open(int fileId) {
101  std::string s(std::to_string(fileId) + std::string(MAPD_FILE_EXT));
102  FILE* f = omnisci::fopen(
103  s.c_str(), g_read_only ? "rb" : "r+b"); // opens existing file for updates
104  if (f == nullptr) {
105  LOG(FATAL) << "Error trying to open file '" << s
106  << "', the error was: " << std::strerror(errno);
107  }
108  return f;
109 }
110 
111 FILE* open(const std::string& path) {
112  FILE* f = omnisci::fopen(
113  path.c_str(), g_read_only ? "rb" : "r+b"); // opens existing file for updates
114  if (f == nullptr) {
115  LOG(FATAL) << "Error trying to open file '" << path
116  << "', the errno was: " << std::strerror(errno);
117  }
118  return f;
119 }
120 
121 void close(FILE* f) {
122  CHECK(f);
123  CHECK_EQ(fflush(f), 0);
124  CHECK_EQ(fclose(f), 0);
125 }
126 
127 bool removeFile(const std::string basePath, const std::string filename) {
128  if (g_read_only) {
129  LOG(FATAL) << "Error trying to remove file '" << filename << "', running readonly";
130  }
131  const std::string filePath = basePath + filename;
132  return remove(filePath.c_str()) == 0;
133 }
134 
135 size_t read(FILE* f, const size_t offset, const size_t size, int8_t* buf) {
136  // read "size" bytes from the offset location in the file into the buffer
137  CHECK_EQ(fseek(f, static_cast<long>(offset), SEEK_SET), 0);
138  size_t bytesRead = fread(buf, sizeof(int8_t), size, f);
139  CHECK_EQ(bytesRead, sizeof(int8_t) * size);
140  return bytesRead;
141 }
142 
143 size_t write(FILE* f, const size_t offset, const size_t size, const int8_t* buf) {
144  if (g_read_only) {
145  LOG(FATAL) << "Error trying to write file '" << f << "', running readonly";
146  }
147  // write size bytes from the buffer to the offset location in the file
148  if (fseek(f, static_cast<long>(offset), SEEK_SET) != 0) {
149  LOG(FATAL)
150  << "Error trying to write to file (during positioning seek) the error was: "
151  << std::strerror(errno);
152  }
153  size_t bytesWritten = fwrite(buf, sizeof(int8_t), size, f);
154  if (bytesWritten != sizeof(int8_t) * size) {
155  LOG(FATAL) << "Error trying to write to file (during fwrite) the error was: "
156  << std::strerror(errno);
157  }
158  return bytesWritten;
159 }
160 
161 size_t append(FILE* f, const size_t size, const int8_t* buf) {
162  if (g_read_only) {
163  LOG(FATAL) << "Error trying to append file '" << f << "', running readonly";
164  }
165  return write(f, fileSize(f), size, buf);
166 }
167 
168 size_t readPage(FILE* f, const size_t pageSize, const size_t pageNum, int8_t* buf) {
169  return read(f, pageNum * pageSize, pageSize, buf);
170 }
171 
172 size_t readPartialPage(FILE* f,
173  const size_t pageSize,
174  const size_t offset,
175  const size_t readSize,
176  const size_t pageNum,
177  int8_t* buf) {
178  return read(f, pageNum * pageSize + offset, readSize, buf);
179 }
180 
181 size_t writePage(FILE* f, const size_t pageSize, const size_t pageNum, int8_t* buf) {
182  if (g_read_only) {
183  LOG(FATAL) << "Error trying to writePage file '" << f << "', running readonly";
184  }
185  return write(f, pageNum * pageSize, pageSize, buf);
186 }
187 
188 size_t writePartialPage(FILE* f,
189  const size_t pageSize,
190  const size_t offset,
191  const size_t writeSize,
192  const size_t pageNum,
193  int8_t* buf) {
194  if (g_read_only) {
195  LOG(FATAL) << "Error trying to writePartialPage file '" << f << "', running readonly";
196  }
197  return write(f, pageNum * pageSize + offset, writeSize, buf);
198 }
199 
200 size_t appendPage(FILE* f, const size_t pageSize, int8_t* buf) {
201  if (g_read_only) {
202  LOG(FATAL) << "Error trying to appendPage file '" << f << "', running readonly";
203  }
204  return write(f, fileSize(f), pageSize, buf);
205 }
206 
208 size_t fileSize(FILE* f) {
209  fseek(f, 0, SEEK_END);
210  size_t size = (size_t)ftell(f);
211  fseek(f, 0, SEEK_SET);
212  return size;
213 }
214 
215 // this is a helper function to rename existing directories
216 // allowing for an async process to actually remove the physical directries
217 // and subfolders and files later
218 // it is required due to the large amount of time it can take to delete
219 // physical files from large disks
220 void renameForDelete(const std::string directoryName) {
221  boost::system::error_code ec;
222  boost::filesystem::path directoryPath(directoryName);
223  using namespace std::chrono;
224  milliseconds ms = duration_cast<milliseconds>(system_clock::now().time_since_epoch());
225 
226  if (boost::filesystem::exists(directoryPath) &&
227  boost::filesystem::is_directory(directoryPath)) {
228  boost::filesystem::path newDirectoryPath(directoryName + "_" +
229  std::to_string(ms.count()) + "_DELETE_ME");
230  boost::filesystem::rename(directoryPath, newDirectoryPath, ec);
231 
232 #ifdef _WIN32
233  // On Windows we sometimes fail to rename a directory with System: 5 error
234  // code (access denied). An attempt to stop in debugger and look for opened
235  // handles for some of directory content shows no opened handles and actually
236  // allows renaming to execute successfully. It's not clear why, but a short
237  // pause allows to rename directory successfully. Until reasons are known,
238  // use this retry loop as a workaround.
239  int tries = 10;
240  while (ec.value() != boost::system::errc::success && tries) {
241  LOG(ERROR) << "Failed to rename directory " << directoryPath << " error was " << ec
242  << " (" << tries << " attempts left)";
243  std::this_thread::sleep_for(std::chrono::milliseconds(100 / tries));
244  tries--;
245  boost::filesystem::rename(directoryPath, newDirectoryPath, ec);
246  }
247 #endif
248 
249  if (ec.value() == boost::system::errc::success) {
250  std::thread th([newDirectoryPath]() {
251  boost::system::error_code ec;
252  boost::filesystem::remove_all(newDirectoryPath, ec);
253  // We dont check error on remove here as we cant log the
254  // issue fromdetached thrad, its not safe to LOG from here
255  // This is under investigation as clang detects TSAN issue data race
256  // the main system wide file_delete_thread will clean up any missed files
257  });
258  // let it run free so we can return
259  // if it fails the file_delete_thread in DBHandler will clean up
260  th.detach();
261 
262  return;
263  }
264 
265  LOG(FATAL) << "Failed to rename file " << directoryName << " to "
266  << directoryName + "_" + std::to_string(ms.count()) + "_DELETE_ME Error: "
267  << ec;
268  }
269 }
270 
271 } // namespace File_Namespace
272 
273 // Still temporary location but avoids the link errors in the new distributed branch.
274 // See the comment file_delete.h
275 
276 #include <atomic>
277 #include <boost/algorithm/string/predicate.hpp>
278 #include <boost/filesystem.hpp>
279 #include <chrono>
280 #include <thread>
281 
282 void file_delete(std::atomic<bool>& program_is_running,
283  const unsigned int wait_interval_seconds,
284  const std::string base_path) {
285  const auto wait_duration = std::chrono::seconds(wait_interval_seconds);
286  const boost::filesystem::path path(base_path);
287  while (program_is_running) {
288  using vec = std::vector<boost::filesystem::path>; // store paths,
289  vec v;
290  boost::system::error_code ec;
291 
292  // copy vector from iterator as was getting weird random errors if
293  // removed direct from iterator
294  copy(boost::filesystem::directory_iterator(path),
295  boost::filesystem::directory_iterator(),
296  back_inserter(v));
297  for (vec::const_iterator it(v.begin()); it != v.end(); ++it) {
298  std::string object_name(it->string());
299 
300  if (boost::algorithm::ends_with(object_name, "DELETE_ME")) {
301  LOG(INFO) << " removing object " << object_name;
302  boost::filesystem::remove_all(*it, ec);
303  if (ec.value() != boost::system::errc::success) {
304  LOG(ERROR) << "Failed to remove object " << object_name << " error was " << ec;
305  }
306  }
307  }
308 
309  std::this_thread::sleep_for(wait_duration);
310  }
311 }
size_t appendPage(FILE *f, const size_t pageSize, int8_t *buf)
Appends a page from buf to the file.
Definition: File.cpp:200
#define CHECK_EQ(x, y)
Definition: Logger.h:219
size_t append(FILE *f, const size_t size, const int8_t *buf)
Appends the specified number of bytes to the end of the file f from buf.
Definition: File.cpp:161
::FILE * fopen(const char *filename, const char *mode)
Definition: omnisci_fs.cpp:72
#define LOG(tag)
Definition: Logger.h:205
size_t writePartialPage(FILE *f, const size_t pageSize, const size_t offset, const size_t writeSize, const size_t pageNum, int8_t *buf)
Definition: File.cpp:188
#define MAPD_FILE_EXT
Definition: File.h:25
size_t writePage(FILE *f, const size_t pageSize, const size_t pageNum, int8_t *buf)
Writes a page from buf to the file.
Definition: File.cpp:181
std::string to_string(char const *&&v)
FILE * create(const std::string &basePath, const int fileId, const size_t pageSize, const size_t numPages)
Definition: File.cpp:51
size_t write(FILE *f, const size_t offset, const size_t size, const int8_t *buf)
Writes the specified number of bytes to the offset position in file f from buf.
Definition: File.cpp:143
size_t read(FILE *f, const size_t offset, const size_t size, int8_t *buf)
Reads the specified number of bytes from the offset position in file f into buf.
Definition: File.cpp:135
DEVICE auto copy(ARGS &&...args)
Definition: gpu_enabled.h:51
size_t readPage(FILE *f, const size_t pageSize, const size_t pageNum, int8_t *buf)
Reads the specified page from the file f into buf.
Definition: File.cpp:168
size_t fileSize(FILE *f)
Returns the size of the specified file.
Definition: File.cpp:208
bool g_read_only
Definition: File.cpp:40
size_t readPartialPage(FILE *f, const size_t pageSize, const size_t offset, const size_t readSize, const size_t pageNum, int8_t *buf)
Definition: File.cpp:172
FILE * open(int fileId)
Opens/creates the file with the given id; returns NULL on error.
Definition: File.cpp:100
#define CHECK(condition)
Definition: Logger.h:211
void close(FILE *f)
Closes the file pointed to by the FILE pointer.
Definition: File.cpp:121
char * f
bool removeFile(const std::string basePath, const std::string filename)
Deletes the file pointed to by the FILE pointer.
Definition: File.cpp:127
void file_delete(std::atomic< bool > &program_is_running, const unsigned int wait_interval_seconds, const std::string base_path)
Definition: File.cpp:282
std::string get_data_file_path(const std::string &base_path, int file_id, size_t page_size)
Definition: File.cpp:44
void renameForDelete(const std::string directoryName)
Renames a directory to DELETE_ME_&lt;EPOCH&gt;_&lt;oldname&gt;.
Definition: File.cpp:220
A selection of helper methods for File I/O.