OmniSciDB  c1a53651b2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
anonymous_namespace{TableArchiver.cpp} Namespace Reference

Functions

std::string abs_path (const File_Namespace::GlobalFileMgr *global_file_mgr)
 
std::string run (const std::string &cmd, const std::string &chdir="")
 
std::string simple_file_cat (const std::string &archive_path, const std::string &file_name, const std::string &compression)
 
std::string get_table_schema (const std::string &archive_path, const std::string &table, const std::string &compression)
 
void rewrite_column_ids_in_page_headers (const boost::filesystem::path &path, const std::unordered_map< int, int > &column_ids_map, const int32_t table_epoch)
 
void adjust_altered_table_files (const int32_t table_epoch, const std::string &temp_data_dir, const std::unordered_map< int, int > &column_ids_map)
 
void delete_old_symlinks (const std::string &table_data_dir)
 
void add_data_file_symlinks (const std::string &table_data_dir)
 
void rename_table_directories (const File_Namespace::GlobalFileMgr *global_file_mgr, const std::string &temp_data_dir, const std::vector< std::string > &target_paths, const std::string &name_prefix)
 

Variables

auto simple_file_closer = [](FILE* f) { std::fclose(f); }
 

Function Documentation

std::string anonymous_namespace{TableArchiver.cpp}::abs_path ( const File_Namespace::GlobalFileMgr global_file_mgr)
inline

Definition at line 78 of file TableArchiver.cpp.

References File_Namespace::GlobalFileMgr::getBasePath().

Referenced by TableArchiver::dumpTable(), rename_table_directories(), and TableArchiver::restoreTable().

78  {
79  return boost::filesystem::canonical(global_file_mgr->getBasePath()).string();
80 }
std::string getBasePath() const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void anonymous_namespace{TableArchiver.cpp}::add_data_file_symlinks ( const std::string &  table_data_dir)

Definition at line 285 of file TableArchiver.cpp.

References DATA_FILE_EXT, and File_Namespace::kLegacyDataFileExtension.

Referenced by rename_table_directories().

285  {
286  std::map<boost::filesystem::path, boost::filesystem::path> old_to_new_paths;
287  for (boost::filesystem::directory_iterator it(table_data_dir), end_it; it != end_it;
288  it++) {
289  const auto path = boost::filesystem::canonical(it->path());
290  if (path.extension().string() == DATA_FILE_EXT) {
291  auto old_path = path;
292  old_path.replace_extension(File_Namespace::kLegacyDataFileExtension);
293  // Add a symlink to data file, if one does not exist.
294  if (!boost::filesystem::exists(old_path)) {
295  old_to_new_paths[old_path] = path;
296  }
297  }
298  }
299  for (const auto& [old_path, new_path] : old_to_new_paths) {
300  boost::filesystem::create_symlink(new_path.filename(), old_path);
301  }
302 }
#define DATA_FILE_EXT
Definition: File.h:25
constexpr auto kLegacyDataFileExtension
Definition: File.h:36

+ Here is the caller graph for this function:

void anonymous_namespace{TableArchiver.cpp}::adjust_altered_table_files ( const int32_t  table_epoch,
const std::string &  temp_data_dir,
const std::unordered_map< int, int > &  column_ids_map 
)

Definition at line 254 of file TableArchiver.cpp.

References ThreadController_NS::SimpleThreadController< FutureReturnType >::checkThreadsStatus(), cpu_threads(), ThreadController_NS::SimpleThreadController< FutureReturnType >::finish(), rewrite_column_ids_in_page_headers(), and ThreadController_NS::SimpleThreadController< FutureReturnType >::startThread().

Referenced by TableArchiver::restoreTable().

256  {
257  boost::filesystem::path base_path(temp_data_dir);
258  boost::filesystem::recursive_directory_iterator end_it;
260  for (boost::filesystem::recursive_directory_iterator fit(base_path); fit != end_it;
261  ++fit) {
262  if (!boost::filesystem::is_symlink(fit->path()) &&
263  boost::filesystem::is_regular_file(fit->status())) {
264  thread_controller.startThread(
265  rewrite_column_ids_in_page_headers, fit->path(), column_ids_map, table_epoch);
266  thread_controller.checkThreadsStatus();
267  }
268  }
269  thread_controller.finish();
270 }
void rewrite_column_ids_in_page_headers(const boost::filesystem::path &path, const std::unordered_map< int, int > &column_ids_map, const int32_t table_epoch)
int cpu_threads()
Definition: thread_count.h:25

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void anonymous_namespace{TableArchiver.cpp}::delete_old_symlinks ( const std::string &  table_data_dir)

Definition at line 272 of file TableArchiver.cpp.

Referenced by rename_table_directories().

272  {
273  std::vector<boost::filesystem::path> symlinks;
274  for (boost::filesystem::directory_iterator it(table_data_dir), end_it; it != end_it;
275  it++) {
276  if (boost::filesystem::is_symlink(it->path())) {
277  symlinks.emplace_back(it->path());
278  }
279  }
280  for (const auto& symlink : symlinks) {
281  boost::filesystem::remove_all(symlink);
282  }
283 }

+ Here is the caller graph for this function:

std::string anonymous_namespace{TableArchiver.cpp}::get_table_schema ( const std::string &  archive_path,
const std::string &  table,
const std::string &  compression 
)
inline

Definition at line 172 of file TableArchiver.cpp.

References simple_file_cat(), and table_schema_filename.

Referenced by TableArchiver::restoreTable().

174  {
175  const auto schema_str =
176  simple_file_cat(archive_path, table_schema_filename, compression);
177  std::regex regex("@T");
178  return std::regex_replace(schema_str, regex, table);
179 }
static constexpr char const * table_schema_filename
std::string simple_file_cat(const std::string &archive_path, const std::string &file_name, const std::string &compression)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void anonymous_namespace{TableArchiver.cpp}::rename_table_directories ( const File_Namespace::GlobalFileMgr global_file_mgr,
const std::string &  temp_data_dir,
const std::vector< std::string > &  target_paths,
const std::string &  name_prefix 
)

Definition at line 304 of file TableArchiver.cpp.

References abs_path(), add_data_file_symlinks(), delete_old_symlinks(), and File_Namespace::FileMgr::renameAndSymlinkLegacyFiles().

Referenced by TableArchiver::restoreTable().

307  {
308  boost::filesystem::path base_path(temp_data_dir);
309  boost::filesystem::directory_iterator end_it;
310  int target_path_index = 0;
311  for (boost::filesystem::directory_iterator fit(base_path); fit != end_it; ++fit) {
312  if (!boost::filesystem::is_regular_file(fit->status())) {
313  const std::string file_path = fit->path().string();
314  const std::string file_name = fit->path().filename().string();
315  if (boost::istarts_with(file_name, name_prefix)) {
316  const std::string target_path =
317  abs_path(global_file_mgr) + "/" + target_paths[target_path_index++];
318  if (std::rename(file_path.c_str(), target_path.c_str())) {
319  throw std::runtime_error("Failed to rename file " + file_path + " to " +
320  target_path + ": " + std::strerror(errno));
321  }
322  // Delete any old/invalid symlinks contained in table dump.
323  delete_old_symlinks(target_path);
325  // For post-rebrand table dumps, symlinks need to be added here, since file mgr
326  // migration would already have been executed for the dumped table.
327  add_data_file_symlinks(target_path);
328  }
329  }
330  }
331 }
void delete_old_symlinks(const std::string &table_data_dir)
std::string abs_path(const File_Namespace::GlobalFileMgr *global_file_mgr)
static void renameAndSymlinkLegacyFiles(const std::string &table_data_dir)
Definition: FileMgr.cpp:1125
void add_data_file_symlinks(const std::string &table_data_dir)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void anonymous_namespace{TableArchiver.cpp}::rewrite_column_ids_in_page_headers ( const boost::filesystem::path &  path,
const std::unordered_map< int, int > &  column_ids_map,
const int32_t  table_epoch 
)

Definition at line 183 of file TableArchiver.cpp.

References CHECK, DATA_FILE_EXT, heavyai::file_size(), heavyai::fopen(), File_Namespace::is_page_deleted_with_checkpoint(), simple_file_closer, split(), and to_string().

Referenced by adjust_altered_table_files().

186  {
187  const std::string file_path = path.string();
188  const std::string file_name = path.filename().string();
189  std::vector<std::string> tokens;
190  boost::split(tokens, file_name, boost::is_any_of("."));
191 
192  // ref. FileMgr::init for hint of data file name layout
193  if (tokens.size() <= 2 || !(DATA_FILE_EXT == "." + tokens[2] || tokens[2] == "mapd")) {
194  // We are only interested in files in the form <id>.<page_size>.<DATA_FILE_EXT>
195  return;
196  }
197 
198  const auto page_size = boost::lexical_cast<int64_t>(tokens[1]);
199  const auto file_size = boost::filesystem::file_size(file_path);
200  std::unique_ptr<FILE, decltype(simple_file_closer)> fp(
201  std::fopen(file_path.c_str(), "r+"), simple_file_closer);
202  if (!fp) {
203  throw std::runtime_error("Failed to open " + file_path +
204  " for update: " + std::strerror(errno));
205  }
206  // TODO(Misiu): Rather than reference an exernal layout we should de-duplicate this
207  // page-reading code in a single location. This will also reduce the need for comments
208  // below.
209  // ref. FileInfo::openExistingFile for hint of chunk header layout
210  for (size_t page = 0; page < file_size / page_size; ++page) {
211  int32_t header_info[8];
212  if (0 != std::fseek(fp.get(), page * page_size, SEEK_SET)) {
213  throw std::runtime_error("Failed to seek to page# " + std::to_string(page) +
214  file_path + " for read: " + std::strerror(errno));
215  }
216  if (1 != fread(header_info, sizeof header_info, 1, fp.get())) {
217  throw std::runtime_error("Failed to read " + file_path + ": " +
218  std::strerror(errno));
219  }
220  if (const auto header_size = header_info[0]; header_size > 0) {
221  // header_info[1] is the page's db_id; but can also be used as an "is deleted"
222  // indicator if negative.
223  auto& contingent = header_info[1];
224  // header_info[2] is the page's table_id; but can also used to store the page's
225  // epoch since the FileMgr stores table_id information separately.
226  auto& epoch = header_info[2];
227  auto& col_id = header_info[3];
229  table_epoch, epoch, contingent)) {
230  continue;
231  }
232  auto column_map_it = column_ids_map.find(col_id);
233  CHECK(column_map_it != column_ids_map.end()) << "could not find " << col_id;
234  // If a header contains a column id that is remapped to new location
235  // then write that change to the file.
236  if (const auto dest_col_id = column_map_it->second; col_id != dest_col_id) {
237  col_id = dest_col_id;
238  if (0 != std::fseek(fp.get(), page * page_size, SEEK_SET)) {
239  throw std::runtime_error("Failed to seek to page# " + std::to_string(page) +
240  file_path + " for write: " + std::strerror(errno));
241  }
242  if (1 != fwrite(header_info, sizeof header_info, 1, fp.get())) {
243  throw std::runtime_error("Failed to write " + file_path + ": " +
244  std::strerror(errno));
245  }
246  }
247  }
248  }
249 }
#define DATA_FILE_EXT
Definition: File.h:25
std::string to_string(char const *&&v)
std::vector< std::string > split(std::string_view str, std::string_view delim, std::optional< size_t > maxsplit)
split apart a string into a vector of substrings
::FILE * fopen(const char *filename, const char *mode)
Definition: heavyai_fs.cpp:74
bool is_page_deleted_with_checkpoint(int32_t table_epoch, int32_t page_epoch, int32_t contingent)
Definition: FileInfo.cpp:277
#define CHECK(condition)
Definition: Logger.h:291
size_t file_size(const int fd)
Definition: heavyai_fs.cpp:33

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::string anonymous_namespace{TableArchiver.cpp}::run ( const std::string &  cmd,
const std::string &  chdir = "" 
)
inline

Definition at line 82 of file TableArchiver.cpp.

References logger::ERROR, measure< TimeT >::execution(), LOG, to_lower(), to_string(), and VLOG.

82  {
83  VLOG(3) << "running cmd: " << cmd;
84  int rcode;
85  std::error_code ec;
86  std::string output, errors;
87  const auto time_ms = measure<>::execution([&]() {
88  using namespace boost::process;
89  ipstream stdout, stderr;
90  if (!chdir.empty()) {
91  rcode = system(cmd, std_out > stdout, std_err > stderr, ec, start_dir = chdir);
92  } else {
93  rcode = system(cmd, std_out > stdout, std_err > stderr, ec);
94  }
95  std::ostringstream ss_output, ss_errors;
96  stdout >> ss_output.rdbuf();
97  stderr >> ss_errors.rdbuf();
98  output = ss_output.str();
99  errors = ss_errors.str();
100  });
101  if (rcode || ec) {
102  LOG(ERROR) << "failed cmd: " << cmd;
103  LOG(ERROR) << "exit code: " << rcode;
104  LOG(ERROR) << "error code: " << ec.value() << " - " << ec.message();
105  LOG(ERROR) << "stdout: " << output;
106  LOG(ERROR) << "stderr: " << errors;
107 #if defined(__APPLE__)
108  // osx bsdtar options "--use-compress-program" and "--fast-read" together
109  // run into pipe write error after tar extracts the first occurrence of a
110  // file and closes the read end while the decompression program still writes
111  // to the pipe. bsdtar doesn't handle this situation well like gnu tar does.
112  if (1 == rcode && cmd.find("--fast-read") &&
113  (errors.find("cannot write decoded block") != std::string::npos ||
114  errors.find("Broken pipe") != std::string::npos)) {
115  // ignore this error, or lose speed advantage of "--fast-read" on osx.
116  LOG(ERROR) << "tar error ignored on osx for --fast-read";
117  } else
118 #endif
119  // circumvent tar warning on reading file that is "changed as we read it".
120  // this warning results from reading a table file under concurrent inserts
121  if (1 == rcode && errors.find("changed as we read") != std::string::npos) {
122  LOG(ERROR) << "tar error ignored under concurrent inserts";
123  } else {
124  int error_code;
125  std::string error_message;
126  if (ec) {
127  error_code = ec.value();
128  error_message = ec.message();
129  } else {
130  error_code = rcode;
131  // Show a more concise message for permission errors instead of the default
132  // verbose message. Error logs will still contain all details.
133  if (to_lower(errors).find("permission denied") != std::string::npos) {
134  error_message = "Insufficient file read/write permission.";
135  } else {
136  error_message = errors;
137  }
138  }
139  throw std::runtime_error(
140  "An error occurred while executing an internal command. Error code: " +
141  std::to_string(error_code) + ", message: " + error_message);
142  }
143  } else {
144  VLOG(3) << "finished cmd: " << cmd;
145  VLOG(3) << "time: " << time_ms << " ms";
146  VLOG(3) << "stdout: " << output;
147  }
148  return output;
149 }
std::string to_lower(const std::string &str)
static TimeT::rep execution(F func, Args &&...args)
Definition: sample.cpp:29
#define LOG(tag)
Definition: Logger.h:285
std::string to_string(char const *&&v)
#define VLOG(n)
Definition: Logger.h:387

+ Here is the call graph for this function:

std::string anonymous_namespace{TableArchiver.cpp}::simple_file_cat ( const std::string &  archive_path,
const std::string &  file_name,
const std::string &  compression 
)
inline

Definition at line 151 of file TableArchiver.cpp.

References get_quoted_string(), ddl_utils::IMPORT, run, and ddl_utils::validate_allowed_file_path().

Referenced by get_table_schema(), and TableArchiver::restoreTable().

153  {
156 #if defined(__APPLE__)
157  constexpr static auto opt_occurrence = "--fast-read";
158 #else
159  constexpr static auto opt_occurrence = "--occurrence=1";
160 #endif
161  boost::filesystem::path temp_dir =
162  boost::filesystem::temp_directory_path() / boost::filesystem::unique_path();
163  boost::filesystem::create_directories(temp_dir);
164  run("tar " + compression + " -xvf " + get_quoted_string(archive_path) + " " +
165  opt_occurrence + " " + file_name,
166  temp_dir.string());
167  const auto output = run("cat " + (temp_dir / file_name).string());
168  boost::filesystem::remove_all(temp_dir);
169  return output;
170 }
std::string get_quoted_string(const std::string &filename, char quote, char escape)
Quote a string while escaping any existing quotes in the string.
void validate_allowed_file_path(const std::string &file_path, const DataTransferType data_transfer_type, const bool allow_wildcards)
Definition: DdlUtils.cpp:785
static bool run

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Variable Documentation

auto anonymous_namespace{TableArchiver.cpp}::simple_file_closer = [](FILE* f) { std::fclose(f); }
inline