OmniSciDB  c1a53651b2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MigrationMgr.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
18 
19 #include <algorithm>
20 #include <exception>
21 #include <filesystem>
22 #include <sstream>
23 #include <string>
24 #include <unordered_map>
25 #include <vector>
26 
27 #include "Logger/Logger.h"
28 #include "QueryEngine/Execute.h"
30 #include "Shared/SysDefinitions.h"
31 #include "Shared/sqltypes.h"
32 
33 #include "MapDRelease.h"
34 
35 extern bool g_multi_instance;
36 
37 namespace migrations {
38 
39 void MigrationMgr::takeMigrationLock(const std::string& base_path) {
40 // TODO: support lock on Windows
41 #ifndef _WIN32
42  // Only used for --multi-instance clusters.
43  if (!g_multi_instance) {
44  migration_enabled_ = true;
45  return;
46  }
47 
48  // If we already have the migration lock then do nothing.
49  if (migration_mutex_) {
50  return;
51  }
52 
53  // Initialize the migration mutex. Will be locked until process exit.
54  migration_mutex_ = std::make_unique<heavyai::DistributedSharedMutex>(
55  std::filesystem::path(base_path) / shared::kLockfilesDirectoryName /
56  "migration.lockfile");
57 
58  // Take an exclusive lock if we can. If we get the exclusive lock, then later it will be
59  // relaxed to a shared lock, after we run migrations.
61  if (!g_multi_instance && !migration_enabled_) {
62  throw std::runtime_error(
63  "another HeavyDB server instance is already using data directory: " + base_path);
64  }
65 
66  // If we didn't get the exclusive lock, we'll wait for a shared lock instead, and we
67  // won't run migrations.
68  if (!migration_enabled_) {
69  migration_mutex_->lock_shared();
70  }
71 #else
72  migration_enabled_ = true;
73 #endif // _WIN32
74 }
75 
77 // TODO: support lock on Windows
78 #ifndef _WIN32
79  // Only used for --multi-instance clusters.
80  if (!g_multi_instance) {
81  return;
82  }
83 
84  // If we ran migrations, now relax the exclusive lock to a shared lock.
86  migration_mutex_->convert_lock_shared();
87  }
88 #endif // _WIN32
89 }
90 
92  const Catalog_Namespace::TableDescriptorMapById& table_descriptors_by_id,
93  const int database_id,
95  SqliteConnector& sqlite) {
96  std::vector<int> tables_migrated = {};
97  std::unordered_map<int, std::vector<std::string>> tables_to_migrate;
98  sqlite.query("BEGIN TRANSACTION");
99  try {
100  sqlite.query(
101  "select name from sqlite_master WHERE type='table' AND "
102  "name='mapd_version_history'");
103  if (sqlite.getNumRows() == 0) {
104  sqlite.query(
105  "CREATE TABLE mapd_version_history(version integer, migration_history text "
106  "unique)");
107  sqlite.query(
108  "CREATE TABLE mapd_date_in_days_column_migration_tmp(table_id integer primary "
109  "key)");
110  } else {
111  sqlite.query(
112  "select * from mapd_version_history where migration_history = "
113  "'date_in_days_column'");
114  if (sqlite.getNumRows() != 0) {
115  // no need for further execution
116  sqlite.query("END TRANSACTION");
117  return;
118  }
119  LOG(INFO) << "Checking for date columns requiring metadata migration.";
120  sqlite.query(
121  "select name from sqlite_master where type='table' AND "
122  "name='mapd_date_in_days_column_migration_tmp'");
123  if (sqlite.getNumRows() != 0) {
124  sqlite.query("select table_id from mapd_date_in_days_column_migration_tmp");
125  if (sqlite.getNumRows() != 0) {
126  for (size_t i = 0; i < sqlite.getNumRows(); i++) {
127  tables_migrated.push_back(sqlite.getData<int>(i, 0));
128  }
129  }
130  } else {
131  sqlite.query(
132  "CREATE TABLE mapd_date_in_days_column_migration_tmp(table_id integer "
133  "primary key)");
134  }
135  }
136  sqlite.query_with_text_params(
137  "SELECT tables.tableid, tables.name, columns.name FROM mapd_tables tables, "
138  "mapd_columns columns where tables.tableid = columns.tableid AND "
139  "columns.coltype = ?1 AND columns.compression = ?2",
140  std::vector<std::string>{
141  std::to_string(static_cast<int>(SQLTypes::kDATE)),
143  if (sqlite.getNumRows() != 0) {
144  for (size_t i = 0; i < sqlite.getNumRows(); i++) {
145  tables_to_migrate[sqlite.getData<int>(i, 0)] = {
146  sqlite.getData<std::string>(i, 1), sqlite.getData<std::string>(i, 2)};
147  }
148  }
149  } catch (const std::exception& e) {
150  LOG(ERROR) << "Failed to complete migration on date in days column metadata: "
151  << e.what();
152  sqlite.query("ROLLBACK");
153  throw;
154  }
155  sqlite.query("END TRANSACTION");
156 
157  for (auto& id_names : tables_to_migrate) {
158  if (std::find(tables_migrated.begin(), tables_migrated.end(), id_names.first) ==
159  tables_migrated.end()) {
160  sqlite.query("BEGIN TRANSACTION");
161  try {
162  LOG(INFO) << "Table: " << id_names.second[0]
163  << " may suffer from issues with DATE column: " << id_names.second[1]
164  << ". Running an OPTIMIZE command to solve any issues with metadata.";
165 
166  // TODO(adb): Could have the TableOptimizer get the Executor and avoid including
167  // Execute.h
169  auto table_desc_itr = table_descriptors_by_id.find(id_names.first);
170  if (table_desc_itr == table_descriptors_by_id.end()) {
171  throw std::runtime_error("Table descriptor does not exist for table " +
172  id_names.second[0] + " does not exist.");
173  }
174  auto td = table_desc_itr->second;
175  TableOptimizer optimizer(td, executor.get(), *cat);
176  optimizer.recomputeMetadata();
177 
178  sqlite.query_with_text_params(
179  "INSERT INTO mapd_date_in_days_column_migration_tmp VALUES(?)",
180  std::vector<std::string>{std::to_string(id_names.first)});
181  } catch (const std::exception& e) {
182  LOG(ERROR) << "Failed to complete metadata migration on date in days column: "
183  << e.what();
184  sqlite.query("ROLLBACK");
185  throw;
186  }
187  sqlite.query("COMMIT");
188  }
189  }
190 
191  sqlite.query("BEGIN TRANSACTION");
192  try {
193  sqlite.query("DROP TABLE mapd_date_in_days_column_migration_tmp");
194  sqlite.query_with_text_params(
195  "INSERT INTO mapd_version_history(version, migration_history) values(?,?)",
196  std::vector<std::string>{std::to_string(MAPD_VERSION), "date_in_days_column"});
197  } catch (const std::exception& e) {
198  LOG(ERROR) << "Failed to complete migraion on date in days column: " << e.what();
199  sqlite.query("ROLLBACK");
200  throw;
201  }
202  sqlite.query("END TRANSACTION");
203  LOG(INFO) << "Successfully migrated all date in days column metadata.";
204 }
205 
206 namespace {
207 bool rename_and_symlink_path(const std::filesystem::path& old_path,
208  const std::filesystem::path& new_path) {
209  bool file_updated{false};
210  if (std::filesystem::exists(old_path)) {
211  // Skip if we have already created a symlink for the old path.
212  if (std::filesystem::is_symlink(old_path)) {
213  if (std::filesystem::read_symlink(old_path) != new_path.filename()) {
214  std::stringstream ss;
215  ss << "Rebrand migration: Encountered an unexpected symlink at path: " << old_path
216  << ". Symlink does not reference file: " << new_path.filename();
217  throw std::runtime_error(ss.str());
218  }
219  if (!std::filesystem::exists(new_path)) {
220  std::stringstream ss;
221  ss << "Rebrand migration: Encountered symlink at legacy path: " << old_path
222  << " but no corresponding file at new path: " << new_path;
223  throw std::runtime_error(ss.str());
224  }
225  } else {
226  if (std::filesystem::exists(new_path)) {
227  std::stringstream ss;
228  ss << "Rebrand migration: Encountered existing non-symlink files at the legacy "
229  "path: "
230  << old_path << " and new path: " << new_path;
231  throw std::runtime_error(ss.str());
232  }
233  std::filesystem::rename(old_path, new_path);
234  std::cout << "Rebrand migration: Renamed " << old_path << " to " << new_path
235  << std::endl;
236  file_updated = true;
237  }
238  }
239 
240  if (std::filesystem::exists(old_path)) {
241  if (!std::filesystem::is_symlink(old_path)) {
242  std::stringstream ss;
243  ss << "Rebrand migration: An unexpected error occurred. A symlink should have been "
244  "created at "
245  << old_path;
246  throw std::runtime_error(ss.str());
247  }
248  if (std::filesystem::read_symlink(old_path) != new_path.filename()) {
249  std::stringstream ss;
250  ss << "Rebrand migration: Encountered an unexpected symlink at path: " << old_path
251  << ". Symlink does not reference file: " << new_path.filename();
252  throw std::runtime_error(ss.str());
253  }
254  } else if (std::filesystem::exists(new_path)) {
255  std::filesystem::create_symlink(new_path.filename(), old_path);
256  std::cout << "Rebrand migration: Added symlink from " << old_path << " to "
257  << new_path.filename() << std::endl;
258  file_updated = true;
259  }
260  return file_updated;
261 }
262 
263 bool rename_and_symlink_file(const std::filesystem::path& base_path,
264  const std::string& dir_name,
265  const std::string& old_file_name,
266  const std::string& new_file_name) {
267  auto old_path = std::filesystem::canonical(base_path);
268  auto new_path = std::filesystem::canonical(base_path);
269  if (!dir_name.empty()) {
270  old_path /= dir_name;
271  new_path /= dir_name;
272  }
273  if (old_file_name.empty()) {
274  throw std::runtime_error(
275  "Unexpected error in rename_and_symlink_file: old_file_name is empty");
276  }
277  old_path /= old_file_name;
278 
279  if (new_file_name.empty()) {
280  throw std::runtime_error(
281  "Unexpected error in rename_and_symlink_file: new_file_name is empty");
282  }
283  new_path /= new_file_name;
284 
285  return rename_and_symlink_path(old_path, new_path);
286 }
287 } // namespace
288 
289 void MigrationMgr::executeRebrandMigration(const std::string& base_path) {
290  bool migration_occurred{false};
291 
292  // clang-format off
293  const std::map<std::string, std::string> old_to_new_dir_names {
294  {"mapd_catalogs", shared::kCatalogDirectoryName},
295  {"mapd_data", shared::kDataDirectoryName},
296  {"mapd_log", shared::kDefaultLogDirName},
297  {"mapd_export", shared::kDefaultExportDirName},
298  {"mapd_import", shared::kDefaultImportDirName},
299  {"omnisci_key_store", shared::kDefaultKeyStoreDirName}
300  };
301  // clang-format on
302 
303  const auto storage_base_path = std::filesystem::canonical(base_path);
304  // Rename legacy directories (if they exist), and create symlinks from legacy directory
305  // names to the new names (if they don't already exist).
306  for (const auto& [old_dir_name, new_dir_name] : old_to_new_dir_names) {
307  auto old_path = storage_base_path / old_dir_name;
308  auto new_path = storage_base_path / new_dir_name;
309  if (rename_and_symlink_path(old_path, new_path)) {
310  migration_occurred = true;
311  }
312  }
313 
314  // Rename legacy files and create symlinks to them.
315  const auto license_updated = rename_and_symlink_file(
316  storage_base_path, "", "omnisci.license", shared::kDefaultLicenseFileName);
317  const auto key_updated = rename_and_symlink_file(storage_base_path,
319  "omnisci.pem",
321  const auto sys_catalog_updated = rename_and_symlink_file(storage_base_path,
323  "omnisci_system_catalog",
325  if (license_updated || key_updated || sys_catalog_updated) {
326  migration_occurred = true;
327  }
328 
329  // Delete the disk cache directory and legacy files that will no longer be used.
330  const std::array<std::filesystem::path, 9> files_to_delete{
331  storage_base_path / "omnisci_disk_cache",
332  storage_base_path / "omnisci_server_pid.lck",
333  storage_base_path / "mapd_server_pid.lck",
334  storage_base_path / shared::kDefaultLogDirName / "omnisci_server.FATAL",
335  storage_base_path / shared::kDefaultLogDirName / "omnisci_server.ERROR",
336  storage_base_path / shared::kDefaultLogDirName / "omnisci_server.WARNING",
337  storage_base_path / shared::kDefaultLogDirName / "omnisci_server.INFO",
338  storage_base_path / shared::kDefaultLogDirName / "omnisci_web_server.ALL",
339  storage_base_path / shared::kDefaultLogDirName / "omnisci_web_server.ACCESS"};
340 
341  for (const auto& file_path : files_to_delete) {
342  if (std::filesystem::exists(file_path)) {
343  std::filesystem::remove_all(file_path);
344  std::cout << "Rebrand migration: Deleted file " << file_path << std::endl;
345  migration_occurred = true;
346  }
347  }
348  if (migration_occurred) {
349  std::cout << "Rebrand migration completed" << std::endl;
350  }
351 }
352 } // namespace migrations
const std::string kDataDirectoryName
bool g_multi_instance
Definition: heavyai_locks.h:21
std::string cat(Ts &&...args)
T getData(const int row, const int col)
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:132
virtual void query_with_text_params(std::string const &query_only)
#define LOG(tag)
Definition: Logger.h:285
static void relaxMigrationLock()
const std::string kDefaultLogDirName
const std::string kSystemCatalogName
virtual void query(const std::string &queryString)
Driver for running cleanup processes on a table. TableOptimizer provides functions for various cleanu...
Constants for Builtin SQL Types supported by HEAVY.AI.
const std::string kDefaultExportDirName
std::string to_string(char const *&&v)
static std::shared_ptr< Executor > getExecutor(const ExecutorId id, const std::string &debug_dir="", const std::string &debug_file="", const SystemParameters &system_parameters=SystemParameters())
Definition: Execute.cpp:475
bool rename_and_symlink_path(const std::filesystem::path &old_path, const std::filesystem::path &new_path)
const std::string kDefaultImportDirName
std::map< int, TableDescriptor * > TableDescriptorMapById
Definition: Types.h:35
static bool migration_enabled_
Definition: MigrationMgr.h:54
static const int32_t MAPD_VERSION
Definition: release.h:32
const std::string kDefaultKeyFileName
const std::string kDefaultKeyStoreDirName
Definition: sqltypes.h:70
static void executeRebrandMigration(const std::string &base_path)
static void takeMigrationLock(const std::string &base_path)
bool rename_and_symlink_file(const std::filesystem::path &base_path, const std::string &dir_name, const std::string &old_file_name, const std::string &new_file_name)
const std::string kCatalogDirectoryName
const std::string kDefaultLicenseFileName
static void migrateDateInDaysMetadata(const Catalog_Namespace::TableDescriptorMapById &table_descriptors_by_id, const int database_id, Catalog_Namespace::Catalog *cat, SqliteConnector &sqlite)
static std::unique_ptr< heavyai::DistributedSharedMutex > migration_mutex_
Definition: MigrationMgr.h:53
const std::string kLockfilesDirectoryName
virtual size_t getNumRows() const
void recomputeMetadata() const
Recomputes per-chunk metadata for each fragment in the table. Updates and deletes can cause chunk met...
static const ExecutorId UNITARY_EXECUTOR_ID
Definition: Execute.h:373