OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
AlterTableAlterColumnCommandRecoveryMgr Class Reference

#include <AlterColumnRecovery.h>

+ Collaboration diagram for AlterTableAlterColumnCommandRecoveryMgr:

Classes

struct  ColumnAltered
 
struct  RecoveryInfo
 
struct  RecoveryParamFilepathInfo
 

Public Types

using TypePairs = alter_column_shared::TypePairs
 

Public Member Functions

 AlterTableAlterColumnCommandRecoveryMgr (Catalog_Namespace::Catalog &catalog)
 
void rollback (const TableDescriptor *td, const RecoveryInfo &param)
 
void cleanup (const TableDescriptor *td, const TypePairs &src_dst_cds)
 
void checkpoint (const TableDescriptor *td, const TypePairs &src_dst_cds)
 
RecoveryInfo deserializeRecoveryInformation (const std::string &filename)
 
std::string serializeRecoveryInformation (const RecoveryInfo &param)
 
void writeSerializedRecoveryInformation (const RecoveryInfo &param, const RecoveryParamFilepathInfo &filepath_info)
 
void readSerializedRecoveryInformation (RecoveryInfo &param, const RecoveryParamFilepathInfo &filepath_info)
 
std::string recoveryFilepath (const RecoveryParamFilepathInfo &filepath_info)
 
RecoveryParamFilepathInfo getRecoveryFilepathInfo (const int32_t table_id=-1)
 

Static Public Member Functions

static void resolveIncompleteAlterColumnCommandsForAllCatalogs ()
 

Static Public Attributes

static const std::string kRecoveryDirectoryName = "crash_recovery"
 

Private Member Functions

void cleanupDeleteDictionaries (const TableDescriptor *td, const TypePairs &src_dst_cds)
 
void cleanupClearChunk (const ChunkKey &key, const MemoryLevel mem_level)
 
void cleanupClearChunk (const ChunkKey &key)
 
void cleanupClearRemainingChunks (const TableDescriptor *td, const TypePairs &src_dst_cds)
 
void cleanupDropSourceGeoColumns (const TableDescriptor *td, const TypePairs &src_dst_cds)
 
std::list< std::pair
< ColumnDescriptor,
ColumnDescriptor > > 
toPairedCds (const std::list< ColumnAltered > &altered_columns)
 
void recoverAlterTableAlterColumnFromFile (const std::string &filename)
 
std::list< std::filesystem::path > getRecoveryFiles ()
 
TypePairs getSrcDstCds (int table_id, std::list< std::pair< ColumnDescriptor, ColumnDescriptor >> &pairs_list)
 
void resolveIncompleteAlterColumnCommands ()
 

Static Private Member Functions

static std::list< ColumnAlteredfromPairedCds (const std::list< std::pair< ColumnDescriptor, ColumnDescriptor >> &altered_columns)
 
static std::filesystem::path getRecoveryPrefix (const std::string &base_path)
 
static std::map< std::string,
AlterTableAlterColumnCommandRecoveryMgr
createRecoveryManagersForCatalogs ()
 

Private Attributes

Catalog_Namespace::Catalogcatalog_
 

Detailed Description

Definition at line 22 of file AlterColumnRecovery.h.

Member Typedef Documentation

Constructor & Destructor Documentation

AlterTableAlterColumnCommandRecoveryMgr::AlterTableAlterColumnCommandRecoveryMgr ( Catalog_Namespace::Catalog catalog)

Definition at line 27 of file AlterColumnRecovery.cpp.

29  : catalog_(catalog) {}

Member Function Documentation

void AlterTableAlterColumnCommandRecoveryMgr::checkpoint ( const TableDescriptor td,
const TypePairs src_dst_cds 
)

Definition at line 348 of file AlterColumnRecovery.cpp.

References catalog_, Catalog_Namespace::Catalog::checkpointWithAutoRollback(), ddl_utils::alter_column_utils::compare_column_descriptors(), Catalog_Namespace::Catalog::getMetadataForDict(), ddl_utils::alter_column_utils::CompareResult::sql_types_match, and TableDescriptor::tableId.

Referenced by AlterTableAlterColumnCommand::alterColumnTypes(), and cleanup().

349  {
350  for (auto& [src_cd, dst_cd] : src_dst_cds) {
351  if (!dst_cd->columnType.is_dict_encoded_type()) {
352  continue;
353  }
355  .sql_types_match) {
356  continue;
357  }
358  auto string_dictionary =
359  catalog_.getMetadataForDict(dst_cd->columnType.get_comp_param(), true)
360  ->stringDict.get();
361  if (!string_dictionary->checkpoint()) {
362  throw std::runtime_error("Failed to checkpoint dictionary while altering column " +
363  dst_cd->columnName + ".");
364  }
365  }
367 }
CompareResult compare_column_descriptors(const ColumnDescriptor *lhs, const ColumnDescriptor *rhs)
Definition: DdlUtils.cpp:52
const DictDescriptor * getMetadataForDict(int dict_ref, bool loadDict=true) const
Definition: Catalog.cpp:1904
void checkpointWithAutoRollback(const int logical_table_id) const
Definition: Catalog.cpp:5030

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void AlterTableAlterColumnCommandRecoveryMgr::cleanup ( const TableDescriptor td,
const TypePairs src_dst_cds 
)

Definition at line 317 of file AlterColumnRecovery.cpp.

References catalog_, checkpoint(), cleanupClearRemainingChunks(), cleanupDeleteDictionaries(), cleanupDropSourceGeoColumns(), LOG, Catalog_Namespace::Catalog::resetTableEpochFloor(), TableDescriptor::tableId, and logger::WARNING.

Referenced by AlterTableAlterColumnCommand::alterColumnTypes(), and recoverAlterTableAlterColumnFromFile().

318  {
319  try {
320  cleanupDeleteDictionaries(td, src_dst_cds);
321  } catch (std::exception& except) {
322  LOG(WARNING) << "Alter column type: failed to clear source dictionaries: "
323  << except.what();
324  throw;
325  }
326 
327  try {
328  cleanupClearRemainingChunks(td, src_dst_cds);
329  } catch (std::exception& except) {
330  LOG(WARNING) << "Alter column type: failed to clear remaining chunks: "
331  << except.what();
332  throw;
333  }
334 
335  try {
336  cleanupDropSourceGeoColumns(td, src_dst_cds);
337  } catch (std::exception& except) {
338  LOG(WARNING) << "Alter column type: failed to remove geo's source column : "
339  << except.what();
340  throw;
341  }
342 
343  // Data is removed data, rollback no longer possible
344  checkpoint(td, src_dst_cds);
346 }
void cleanupDeleteDictionaries(const TableDescriptor *td, const TypePairs &src_dst_cds)
#define LOG(tag)
Definition: Logger.h:285
void resetTableEpochFloor(const int logicalTableId) const
Definition: Catalog.cpp:5040
void cleanupDropSourceGeoColumns(const TableDescriptor *td, const TypePairs &src_dst_cds)
void checkpoint(const TableDescriptor *td, const TypePairs &src_dst_cds)
void cleanupClearRemainingChunks(const TableDescriptor *td, const TypePairs &src_dst_cds)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void AlterTableAlterColumnCommandRecoveryMgr::cleanupClearChunk ( const ChunkKey key,
const MemoryLevel  mem_level 
)
private

Definition at line 115 of file AlterColumnRecovery.cpp.

References catalog_, Data_Namespace::DataMgr::deleteChunk(), and Catalog_Namespace::Catalog::getDataMgr().

Referenced by cleanupClearChunk(), and cleanupClearRemainingChunks().

117  {
118  auto& data_mgr = catalog_.getDataMgr();
119  if (mem_level >= data_mgr.levelSizes_.size()) {
120  return;
121  }
122  for (int device = 0; device < data_mgr.levelSizes_[mem_level]; ++device) {
123  if (data_mgr.isBufferOnDevice(key, mem_level, device)) {
124  data_mgr.deleteChunk(key, mem_level, device);
125  }
126  }
127 }
void deleteChunk(const ChunkKey &key, const MemoryLevel mem_level, const int device_id)
Definition: DataMgr.cpp:547
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:266

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void AlterTableAlterColumnCommandRecoveryMgr::cleanupClearChunk ( const ChunkKey key)
private

Definition at line 129 of file AlterColumnRecovery.cpp.

References cleanupClearChunk(), Data_Namespace::CPU_LEVEL, Data_Namespace::DISK_LEVEL, and Data_Namespace::GPU_LEVEL.

+ Here is the call graph for this function:

void AlterTableAlterColumnCommandRecoveryMgr::cleanupClearRemainingChunks ( const TableDescriptor td,
const TypePairs src_dst_cds 
)
private

Definition at line 135 of file AlterColumnRecovery.cpp.

References catalog_, CHUNK_KEY_FRAGMENT_IDX, cleanupClearChunk(), Data_Namespace::DataMgr::getChunkMetadataVecForKeyPrefix(), Catalog_Namespace::Catalog::getDatabaseId(), Catalog_Namespace::Catalog::getDataMgr(), and TableDescriptor::tableId.

Referenced by cleanup().

137  {
138  // for (non-geo) cases where the chunk keys change, chunks that remain with old chunk
139  // key must be removed
140  for (auto& [src_cd, dst_cd] : src_dst_cds) {
141  if (src_cd->columnType.is_varlen_indeed() != dst_cd->columnType.is_varlen_indeed()) {
142  ChunkMetadataVector chunk_metadata;
144  chunk_metadata, {catalog_.getDatabaseId(), td->tableId, dst_cd->columnId});
145  std::set<int> fragment_ids;
146  for (const auto& [key, _] : chunk_metadata) {
147  fragment_ids.insert(key[CHUNK_KEY_FRAGMENT_IDX]);
148  }
149  for (const auto& frag_id : fragment_ids) {
150  ChunkKey key = {catalog_.getDatabaseId(), td->tableId, src_cd->columnId, frag_id};
151  if (src_cd->columnType.is_varlen_indeed()) {
152  auto data_key = key;
153  data_key.push_back(1);
154  cleanupClearChunk(data_key);
155  auto index_key = key;
156  index_key.push_back(2);
157  cleanupClearChunk(index_key);
158  } else { // no varlen case
159  cleanupClearChunk(key);
160  }
161  }
162  }
163  }
164 }
std::vector< int > ChunkKey
Definition: types.h:36
void cleanupClearChunk(const ChunkKey &key, const MemoryLevel mem_level)
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:266
#define CHUNK_KEY_FRAGMENT_IDX
Definition: types.h:41
void getChunkMetadataVecForKeyPrefix(ChunkMetadataVector &chunkMetadataVec, const ChunkKey &keyPrefix)
Definition: DataMgr.cpp:496
std::vector< std::pair< ChunkKey, std::shared_ptr< ChunkMetadata >>> ChunkMetadataVector
int getDatabaseId() const
Definition: Catalog.h:326

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void AlterTableAlterColumnCommandRecoveryMgr::cleanupDeleteDictionaries ( const TableDescriptor td,
const TypePairs src_dst_cds 
)
private

Definition at line 101 of file AlterColumnRecovery.cpp.

References catalog_, ddl_utils::alter_column_utils::compare_column_descriptors(), Catalog_Namespace::Catalog::delDictionaryTransactional(), and ddl_utils::alter_column_utils::CompareResult::sql_types_match.

Referenced by cleanup().

103  {
104  for (auto& [src_cd, dst_cd] : src_dst_cds) {
105  if (!src_cd->columnType.is_dict_encoded_type()) {
106  continue;
107  }
109  .sql_types_match) {
111  }
112  }
113 }
CompareResult compare_column_descriptors(const ColumnDescriptor *lhs, const ColumnDescriptor *rhs)
Definition: DdlUtils.cpp:52
void delDictionaryTransactional(const ColumnDescriptor &cd)
Definition: Catalog.cpp:2267

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void AlterTableAlterColumnCommandRecoveryMgr::cleanupDropSourceGeoColumns ( const TableDescriptor td,
const TypePairs src_dst_cds 
)
private

Definition at line 166 of file AlterColumnRecovery.cpp.

References catalog_, Data_Namespace::CPU_LEVEL, Catalog_Namespace::DBMetadata::dbId, Data_Namespace::DataMgr::deleteChunksWithPrefix(), Data_Namespace::DISK_LEVEL, Catalog_Namespace::Catalog::dropColumnTransactional(), Catalog_Namespace::Catalog::getCurrentDB(), Catalog_Namespace::Catalog::getDataMgr(), Catalog_Namespace::Catalog::getMetadataForColumn(), Data_Namespace::GPU_LEVEL, and TableDescriptor::tableId.

Referenced by cleanup().

168  {
169  for (auto& [src_cd, dst_cd] : src_dst_cds) {
170  if (!dst_cd->columnType.is_geometry()) {
171  continue;
172  }
173  auto catalog_cd = catalog_.getMetadataForColumn(src_cd->tableId, src_cd->columnId);
174  catalog_.dropColumnTransactional(*td, *catalog_cd);
175  ChunkKey col_key{catalog_.getCurrentDB().dbId, td->tableId, src_cd->columnId};
176  auto& data_mgr = catalog_.getDataMgr();
178  data_mgr.deleteChunksWithPrefix(col_key, MemoryLevel::CPU_LEVEL);
179  data_mgr.deleteChunksWithPrefix(col_key, MemoryLevel::DISK_LEVEL);
180  }
181 }
std::vector< int > ChunkKey
Definition: types.h:36
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:266
const DBMetadata & getCurrentDB() const
Definition: Catalog.h:265
const ColumnDescriptor * getMetadataForColumn(int tableId, const std::string &colName) const
void deleteChunksWithPrefix(const ChunkKey &keyPrefix)
Definition: DataMgr.cpp:522
void dropColumnTransactional(const TableDescriptor &td, const ColumnDescriptor &cd)
Definition: Catalog.cpp:2591

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::map< std::string, AlterTableAlterColumnCommandRecoveryMgr > AlterTableAlterColumnCommandRecoveryMgr::createRecoveryManagersForCatalogs ( )
staticprivate

Definition at line 509 of file AlterColumnRecovery.cpp.

References CHECK, getRecoveryPrefix(), Catalog_Namespace::SysCatalog::instance(), and run_benchmark_import::result.

Referenced by resolveIncompleteAlterColumnCommandsForAllCatalogs().

509  {
510  std::map<std::string, AlterTableAlterColumnCommandRecoveryMgr> result;
511 
513  auto base_path = syscat.getCatalogBasePath();
514  auto prefix = getRecoveryPrefix(base_path);
515  if (!std::filesystem::exists(prefix)) {
516  return {};
517  }
518 
519  auto catalog_metadata = syscat.getAllDBMetadata();
520 
521  for (const auto& entry : std::filesystem::directory_iterator(prefix)) {
522  auto entry_path = entry.path().string();
523 
524  for (const auto& db_metadata : catalog_metadata) {
525  if (result.count(db_metadata.dbName)) {
526  continue;
527  }
528  auto match_db = entry_path.find("alter_column_recovery_db_" + db_metadata.dbName);
529  if (match_db == std::string::npos) {
530  continue;
531  }
532  auto catalog = syscat.getCatalog(db_metadata.dbName);
533  CHECK(catalog.get());
534  result.emplace(db_metadata.dbName, *catalog);
535  }
536  }
537 
538  return result;
539 }
static SysCatalog & instance()
Definition: SysCatalog.h:343
static std::filesystem::path getRecoveryPrefix(const std::string &base_path)
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

AlterTableAlterColumnCommandRecoveryMgr::RecoveryInfo AlterTableAlterColumnCommandRecoveryMgr::deserializeRecoveryInformation ( const std::string &  filename)

Definition at line 204 of file AlterColumnRecovery.cpp.

References AlterTableAlterColumnCommandRecoveryMgr::RecoveryInfo::added_columns, AlterTableAlterColumnCommandRecoveryMgr::RecoveryInfo::altered_columns, CHECK, fromPairedCds(), json_utils::get_value_from_object(), AlterTableAlterColumnCommandRecoveryMgr::RecoveryInfo::is_vacuumed, json_utils::read_from_file(), AlterTableAlterColumnCommandRecoveryMgr::RecoveryInfo::renamed_columns, AlterTableAlterColumnCommandRecoveryMgr::RecoveryInfo::src_dst_cds, AlterTableAlterColumnCommandRecoveryMgr::RecoveryInfo::table_epoch, and AlterTableAlterColumnCommandRecoveryMgr::RecoveryInfo::updated_dict_cds.

Referenced by readSerializedRecoveryInformation(), and recoverAlterTableAlterColumnFromFile().

205  {
206  RecoveryInfo param;
208  CHECK(d.IsObject());
209 
210  json_utils::get_value_from_object(d, param.added_columns, "added_columns");
211 
212  std::list<std::pair<ColumnDescriptor, ColumnDescriptor>> altered_column_pairs;
213  json_utils::get_value_from_object(d, altered_column_pairs, "altered_columns");
214  param.altered_columns = fromPairedCds(altered_column_pairs);
215 
216  json_utils::get_value_from_object(d, param.renamed_columns, "renamed_columns");
217 
218  json_utils::get_value_from_object(d, param.updated_dict_cds, "updated_dict_cds");
219 
220  json_utils::get_value_from_object(d, param.table_epoch, "table_epoch");
221  json_utils::get_value_from_object(d, param.is_vacuumed, "is_vacuumed");
222 
223  json_utils::get_value_from_object(d, param.src_dst_cds, "src_dst_cds");
224 
225  return param;
226 }
void get_value_from_object(const rapidjson::Value &object, T &value, const std::string &name)
Definition: JsonUtils.h:270
rapidjson::Document read_from_file(const std::string &file_path)
Definition: JsonUtils.cpp:201
#define CHECK(condition)
Definition: Logger.h:291
static std::list< ColumnAltered > fromPairedCds(const std::list< std::pair< ColumnDescriptor, ColumnDescriptor >> &altered_columns)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::list< AlterTableAlterColumnCommandRecoveryMgr::ColumnAltered > AlterTableAlterColumnCommandRecoveryMgr::fromPairedCds ( const std::list< std::pair< ColumnDescriptor, ColumnDescriptor >> &  altered_columns)
staticprivate

Definition at line 184 of file AlterColumnRecovery.cpp.

Referenced by deserializeRecoveryInformation().

185  {
186  std::list<ColumnAltered> retval;
187  for (const auto& [old_cd, new_cd] : altered_columns) {
188  retval.emplace_back(old_cd, new_cd);
189  }
190  return retval;
191 }

+ Here is the caller graph for this function:

AlterTableAlterColumnCommandRecoveryMgr::RecoveryParamFilepathInfo AlterTableAlterColumnCommandRecoveryMgr::getRecoveryFilepathInfo ( const int32_t  table_id = -1)

Definition at line 248 of file AlterColumnRecovery.cpp.

References AlterTableAlterColumnCommandRecoveryMgr::RecoveryParamFilepathInfo::base_path, catalog_, AlterTableAlterColumnCommandRecoveryMgr::RecoveryParamFilepathInfo::db_name, Catalog_Namespace::DBMetadata::dbName, Catalog_Namespace::Catalog::getCatalogBasePath(), Catalog_Namespace::Catalog::getCurrentDB(), Catalog_Namespace::Catalog::getTableName(), and AlterTableAlterColumnCommandRecoveryMgr::RecoveryParamFilepathInfo::table_name.

Referenced by AlterTableAlterColumnCommand::cleanupRecoveryInfo(), getRecoveryFiles(), and AlterTableAlterColumnCommand::populateAndWriteRecoveryInfo().

248  {
249  RecoveryParamFilepathInfo path_info;
250  path_info.base_path = catalog_.getCatalogBasePath();
251  path_info.db_name = catalog_.getCurrentDB().dbName;
252  path_info.table_name =
253  table_id >= 0 ? catalog_.getTableName(table_id).value() : std::string{};
254  return path_info;
255 }
const DBMetadata & getCurrentDB() const
Definition: Catalog.h:265
std::optional< std::string > getTableName(int32_t table_id) const
Definition: Catalog.cpp:1869
const std::string & getCatalogBasePath() const
Definition: Catalog.h:273

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::list< std::filesystem::path > AlterTableAlterColumnCommandRecoveryMgr::getRecoveryFiles ( )
private

Definition at line 370 of file AlterColumnRecovery.cpp.

References catalog_, getRecoveryFilepathInfo(), logger::INFO, LOG, Catalog_Namespace::Catalog::name(), recoveryFilepath(), and run_benchmark_import::result.

Referenced by resolveIncompleteAlterColumnCommands().

370  {
371  std::list<std::filesystem::path> result;
372  std::string path = recoveryFilepath(getRecoveryFilepathInfo());
373 
374  if (!std::filesystem::exists(path)) {
375  return {};
376  }
377 
378  for (const auto& entry : std::filesystem::directory_iterator(path)) {
379  auto entry_path = entry.path().string();
380  if (entry_path.find("alter_column_recovery_db_" + catalog_.name() + "_table_") !=
381  std::string::npos &&
382  entry_path.find(".json") != std::string::npos) {
383  if (entry_path.find(".tmp") != std::string::npos &&
384  entry_path.find(".tmp") == entry_path.size() - std::string{".tmp"}.size()) {
385  if (!std::filesystem::remove(entry_path)) {
386  throw std::runtime_error("Failed to remove incomplete recovery file: " +
387  entry_path);
388  } else {
389  LOG(INFO) << "Removing incomplete ALTER COLUMN recovery file: " + entry_path;
390  }
391  } else {
392  result.emplace_back(entry.path());
393  }
394  }
395  }
396  return result;
397 }
RecoveryParamFilepathInfo getRecoveryFilepathInfo(const int32_t table_id=-1)
#define LOG(tag)
Definition: Logger.h:285
std::string name() const
Definition: Catalog.h:348
std::string recoveryFilepath(const RecoveryParamFilepathInfo &filepath_info)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::filesystem::path AlterTableAlterColumnCommandRecoveryMgr::getRecoveryPrefix ( const std::string &  base_path)
staticprivate

Definition at line 228 of file AlterColumnRecovery.cpp.

References kRecoveryDirectoryName.

Referenced by createRecoveryManagersForCatalogs(), recoveryFilepath(), and writeSerializedRecoveryInformation().

229  {
230  return std::filesystem::path(base_path) / std::filesystem::path(kRecoveryDirectoryName);
231 }
static const std::string kRecoveryDirectoryName

+ Here is the caller graph for this function:

AlterTableAlterColumnCommandRecoveryMgr::TypePairs AlterTableAlterColumnCommandRecoveryMgr::getSrcDstCds ( int  table_id,
std::list< std::pair< ColumnDescriptor, ColumnDescriptor >> &  pairs_list 
)
private

Definition at line 400 of file AlterColumnRecovery.cpp.

References catalog_, Catalog_Namespace::Catalog::getMetadataForColumn(), and run_benchmark_import::result.

Referenced by recoverAlterTableAlterColumnFromFile().

402  {
403  // Source columns must be obtained from catalog to ensure correctness/consistency
404  std::list<std::pair<const ColumnDescriptor*, ColumnDescriptor*>> result;
405  for (auto& [src, dst] : pairs_list) {
406  auto catalog_cd = catalog_.getMetadataForColumn(table_id, src.columnId);
407  if (!catalog_cd) {
408  // If column is missing in catalog, operate under the assumption it was
409  // already successfully removed in cleanup, along with all related
410  // components such as dictionaries
411  continue;
412  }
413  result.emplace_back(catalog_cd, &dst);
414  }
415  return result;
416 }
const ColumnDescriptor * getMetadataForColumn(int tableId, const std::string &colName) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void AlterTableAlterColumnCommandRecoveryMgr::readSerializedRecoveryInformation ( RecoveryInfo param,
const RecoveryParamFilepathInfo filepath_info 
)

Definition at line 257 of file AlterColumnRecovery.cpp.

References deserializeRecoveryInformation(), nvtx_helpers::anonymous_namespace{nvtx_helpers.cpp}::filename(), and recoveryFilepath().

259  {
260  auto filename = recoveryFilepath(filepath_info);
261  std::ifstream ifs(filename);
262  if (!ifs) {
263  throw std::runtime_error{"Error trying to read file \"" + filename +
264  "\". The error was: " + std::strerror(errno)};
265  }
266  std::string json_string;
267  ifs >> json_string;
268  param = deserializeRecoveryInformation(json_string);
269 }
RecoveryInfo deserializeRecoveryInformation(const std::string &filename)
std::string recoveryFilepath(const RecoveryParamFilepathInfo &filepath_info)

+ Here is the call graph for this function:

void AlterTableAlterColumnCommandRecoveryMgr::recoverAlterTableAlterColumnFromFile ( const std::string &  filename)
private

Definition at line 418 of file AlterColumnRecovery.cpp.

References catalog_, CHECK, CHECK_EQ, CHECK_GT, cleanup(), Data_Namespace::CPU_LEVEL, Catalog_Namespace::DBMetadata::dbId, Data_Namespace::DataMgr::deleteChunksWithPrefix(), deserializeRecoveryInformation(), Catalog_Namespace::Catalog::getCurrentDB(), Catalog_Namespace::Catalog::getDatabaseId(), Catalog_Namespace::Catalog::getDataMgr(), getSrcDstCds(), Catalog_Namespace::Catalog::getTableEpochs(), Catalog_Namespace::Catalog::getTableName(), Data_Namespace::GPU_LEVEL, hash_value(), logger::INFO, CacheInvalidator< CACHE_HOLDING_TYPES >::invalidateCachesByTable(), AlterTableAlterColumnCommandRecoveryMgr::RecoveryInfo::is_vacuumed, LOG, rollback(), Catalog_Namespace::Catalog::setTableEpoch(), AlterTableAlterColumnCommandRecoveryMgr::RecoveryInfo::src_dst_cds, and AlterTableAlterColumnCommandRecoveryMgr::RecoveryInfo::table_epoch.

Referenced by resolveIncompleteAlterColumnCommands().

419  {
421 
422  recovery_param = deserializeRecoveryInformation(filename);
423 
424  CHECK_GT(recovery_param.src_dst_cds.size(), 0UL);
425 
426  auto table_id = recovery_param.src_dst_cds.begin()->first.tableId;
427  auto table_name_opt = catalog_.getTableName(table_id);
428  CHECK(table_name_opt.has_value());
429  auto table_name = table_name_opt.value();
430 
431  const auto td_with_lock =
433  catalog_, table_name, false);
434  const auto td = td_with_lock();
435 
436  CHECK(td);
437  LOG(INFO) << "Starting crash recovery for table: " << td->tableName;
438 
439  auto table_epochs = catalog_.getTableEpochs(catalog_.getDatabaseId(), td->tableId);
440  CHECK_GT(table_epochs.size(), 0UL);
441  auto current_first_epoch = table_epochs[0].table_epoch;
442 
443  // The following semantics apply to the three epochs referenced below.
444  //
445  // alter_catalog_checkpoint_epoch: This refers to the checkpoint prior to any
446  // data modification, only operations that have modified the catalog are
447  // expected to have happened.
448  //
449  // alter_data_checkpoint_epoch: This refers to the checkpoint immediately
450  // after data conversion is complete.
451  //
452  // completed_alter_column_checkpoint_epoch: This refers to the checkpoint
453  // after all cleanup operations complete for the alter table command.
454  //
455  // NOTE: If the table may have deleted but unvacuumed elements, there may be
456  // an additional epoch introduced at the start requiring an additional offset.
457  int alter_catalog_checkpoint_epoch = recovery_param.table_epoch;
458 
459  if (recovery_param.is_vacuumed) {
460  alter_catalog_checkpoint_epoch++;
461  }
462  int alter_data_checkpoint_epoch = alter_catalog_checkpoint_epoch + 1;
463  int completed_alter_column_checkpoint_epoch = alter_data_checkpoint_epoch + 1;
464 
465  if (current_first_epoch == alter_catalog_checkpoint_epoch) {
466  rollback(td, recovery_param);
467  // If a vacuum operation was performed prior to alter column, revert the
468  // operation.
469  if (recovery_param.is_vacuumed) {
471  catalog_.getDatabaseId(), td->tableId, recovery_param.table_epoch);
472  }
473  } else if (current_first_epoch == alter_data_checkpoint_epoch) {
474  auto src_dst_cds = getSrcDstCds(table_id, recovery_param.src_dst_cds);
475  try {
476  cleanup(td, src_dst_cds);
477  } catch (std::exception& except) {
478  throw std::runtime_error("Alter column recovery error during cleanup: " +
479  std::string(except.what()));
480  }
481  ChunkKey table_key{catalog_.getCurrentDB().dbId, td->tableId};
483  boost::hash_value(table_key));
486 
487  } else {
488  CHECK_EQ(current_first_epoch, completed_alter_column_checkpoint_epoch);
489  // no-op, last checkpoint reached in processing
490  }
491  LOG(INFO) << "Completed crash recovery for table: " << td->tableName;
492 }
static void invalidateCachesByTable(size_t table_key)
#define CHECK_EQ(x, y)
Definition: Logger.h:301
std::vector< int > ChunkKey
Definition: types.h:36
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:266
#define LOG(tag)
Definition: Logger.h:285
RecoveryInfo deserializeRecoveryInformation(const std::string &filename)
#define CHECK_GT(x, y)
Definition: Logger.h:305
std::list< std::pair< ColumnDescriptor, ColumnDescriptor > > src_dst_cds
const DBMetadata & getCurrentDB() const
Definition: Catalog.h:265
int getDatabaseId() const
Definition: Catalog.h:326
TypePairs getSrcDstCds(int table_id, std::list< std::pair< ColumnDescriptor, ColumnDescriptor >> &pairs_list)
void deleteChunksWithPrefix(const ChunkKey &keyPrefix)
Definition: DataMgr.cpp:522
void setTableEpoch(const int db_id, const int table_id, const int new_epoch)
Definition: Catalog.cpp:3647
std::size_t hash_value(RexAbstractInput const &rex_ab_input)
Definition: RelAlgDag.cpp:3525
void cleanup(const TableDescriptor *td, const TypePairs &src_dst_cds)
#define CHECK(condition)
Definition: Logger.h:291
std::optional< std::string > getTableName(int32_t table_id) const
Definition: Catalog.cpp:1869
void rollback(const TableDescriptor *td, const RecoveryInfo &param)
std::vector< TableEpochInfo > getTableEpochs(const int32_t db_id, const int32_t table_id) const
Definition: Catalog.cpp:3821

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::string AlterTableAlterColumnCommandRecoveryMgr::recoveryFilepath ( const RecoveryParamFilepathInfo filepath_info)

Definition at line 233 of file AlterColumnRecovery.cpp.

References AlterTableAlterColumnCommandRecoveryMgr::RecoveryParamFilepathInfo::base_path, AlterTableAlterColumnCommandRecoveryMgr::RecoveryParamFilepathInfo::db_name, getRecoveryPrefix(), and AlterTableAlterColumnCommandRecoveryMgr::RecoveryParamFilepathInfo::table_name.

Referenced by AlterTableAlterColumnCommand::cleanupRecoveryInfo(), getRecoveryFiles(), AlterTableAlterColumnCommand::populateAndWriteRecoveryInfo(), readSerializedRecoveryInformation(), and writeSerializedRecoveryInformation().

234  {
235  auto prefix = getRecoveryPrefix(filepath_info.base_path);
236 
237  if (filepath_info.table_name.empty()) {
238  return prefix.string();
239  }
240 
241  return (prefix /
242  std::filesystem::path("alter_column_recovery_db_" + filepath_info.db_name +
243  "_table_" + filepath_info.table_name + ".json"))
244  .string();
245 }
static std::filesystem::path getRecoveryPrefix(const std::string &base_path)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void AlterTableAlterColumnCommandRecoveryMgr::resolveIncompleteAlterColumnCommands ( )
private

Definition at line 494 of file AlterColumnRecovery.cpp.

References catalog_, getRecoveryFiles(), logger::INFO, LOG, Catalog_Namespace::Catalog::name(), and recoverAlterTableAlterColumnFromFile().

494  {
495  auto recovery_files = getRecoveryFiles();
496  if (recovery_files.empty()) {
497  return;
498  }
499 
500  LOG(INFO) << "Starting crash recovery for tables in catalog: " << catalog_.name();
501  for (const auto& filepath : recovery_files) {
502  recoverAlterTableAlterColumnFromFile(filepath.string());
503  std::filesystem::remove(filepath);
504  }
505  LOG(INFO) << "Completed crash recovery for tables in catalog: " << catalog_.name();
506 }
#define LOG(tag)
Definition: Logger.h:285
void recoverAlterTableAlterColumnFromFile(const std::string &filename)
std::string name() const
Definition: Catalog.h:348
std::list< std::filesystem::path > getRecoveryFiles()

+ Here is the call graph for this function:

void AlterTableAlterColumnCommandRecoveryMgr::resolveIncompleteAlterColumnCommandsForAllCatalogs ( )
static

Definition at line 542 of file AlterColumnRecovery.cpp.

References createRecoveryManagersForCatalogs().

Referenced by startHeavyDBServer().

542  {
543  auto recovery_mgrs = createRecoveryManagersForCatalogs();
544 
545  for (auto& [dbname, recovery_mgr] : recovery_mgrs) {
546  recovery_mgr.resolveIncompleteAlterColumnCommands();
547  }
548 }
static std::map< std::string, AlterTableAlterColumnCommandRecoveryMgr > createRecoveryManagersForCatalogs()

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void AlterTableAlterColumnCommandRecoveryMgr::rollback ( const TableDescriptor td,
const RecoveryInfo param 
)

Definition at line 31 of file AlterColumnRecovery.cpp.

References AlterTableAlterColumnCommandRecoveryMgr::RecoveryInfo::added_columns, Catalog_Namespace::Catalog::alterColumnTypeTransactional(), AlterTableAlterColumnCommandRecoveryMgr::RecoveryInfo::altered_columns, catalog_, Data_Namespace::CPU_LEVEL, Catalog_Namespace::DBMetadata::dbId, Catalog_Namespace::Catalog::delDictionaryTransactional(), Data_Namespace::DataMgr::deleteChunksWithPrefix(), Catalog_Namespace::Catalog::dropColumnTransactional(), Catalog_Namespace::Catalog::getAllColumnMetadataForTable(), Catalog_Namespace::Catalog::getAllDictionariesWithColumnInName(), Catalog_Namespace::Catalog::getCurrentDB(), Catalog_Namespace::Catalog::getDatabaseId(), Catalog_Namespace::Catalog::getDataMgr(), Data_Namespace::GPU_LEVEL, Catalog_Namespace::Catalog::renameColumn(), AlterTableAlterColumnCommandRecoveryMgr::RecoveryInfo::renamed_columns, TableDescriptor::tableId, and AlterTableAlterColumnCommandRecoveryMgr::RecoveryInfo::updated_dict_cds.

Referenced by AlterTableAlterColumnCommand::alterColumnTypes(), and recoverAlterTableAlterColumnFromFile().

32  {
33  auto cds = catalog_.getAllColumnMetadataForTable(td->tableId, false, false, true);
34 
35  // Drop any columns that were added
36  for (auto& added_column : param.added_columns) {
37  auto cd_it =
38  std::find_if(cds.begin(), cds.end(), [&added_column](const ColumnDescriptor* cd) {
39  return added_column.columnId == cd->columnId;
40  });
41  auto cd = *cd_it;
42  ChunkKey col_key{catalog_.getCurrentDB().dbId, td->tableId, cd->columnId};
44  auto& data_mgr = catalog_.getDataMgr();
46  data_mgr.deleteChunksWithPrefix(col_key, MemoryLevel::CPU_LEVEL);
47  cds.erase(cd_it);
48  }
49 
50  // Rename any columns back to original name
51  for (auto& renamed_column : param.renamed_columns) {
52  auto cd_it = std::find_if(
53  cds.begin(), cds.end(), [&renamed_column](const ColumnDescriptor* cd) {
54  return renamed_column.columnId == cd->columnId;
55  });
56  if (cd_it != cds.end()) {
57  auto cd = *cd_it;
58  auto old_name = renamed_column.columnName;
59  if (cd->columnName != old_name) {
60  catalog_.renameColumn(td, cd, old_name);
61  }
62  }
63  }
64 
65  // Remove any added dictionary
66  for (auto& added_dict : param.updated_dict_cds) {
67  auto cd_it =
68  std::find_if(cds.begin(), cds.end(), [&added_dict](const ColumnDescriptor* cd) {
69  return added_dict.columnId == cd->columnId;
70  });
71  if (cd_it != cds.end()) {
72  auto cd = *cd_it;
73 
74  // Find all dictionaries, delete dictionaries which are defunct
76  for (const auto& dd : dds) {
77  if (!added_dict.columnType.is_dict_encoded_type() ||
78  dd->dictRef.dictId != added_dict.columnType.get_comp_param()) {
79  auto temp_cd = *cd;
80  temp_cd.columnType.set_comp_param(dd->dictRef.dictId);
81  temp_cd.columnType.setStringDictKey(
82  {catalog_.getDatabaseId(), dd->dictRef.dictId});
84  }
85  }
86  }
87  }
88 
89  // Undo any altered column
90  for (auto& altered_column : param.altered_columns) {
91  auto cd_it = std::find_if(
92  cds.begin(), cds.end(), [&altered_column](const ColumnDescriptor* cd) {
93  return altered_column.new_cd.columnId == cd->columnId;
94  });
95  if (cd_it != cds.end()) {
96  catalog_.alterColumnTypeTransactional(altered_column.old_cd);
97  }
98  }
99 }
std::vector< int > ChunkKey
Definition: types.h:36
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:266
void delDictionaryTransactional(const ColumnDescriptor &cd)
Definition: Catalog.cpp:2267
void renameColumn(const TableDescriptor *td, const ColumnDescriptor *cd, const std::string &newColumnName)
Definition: Catalog.cpp:4621
const DBMetadata & getCurrentDB() const
Definition: Catalog.h:265
std::list< const DictDescriptor * > getAllDictionariesWithColumnInName(const ColumnDescriptor *cd)
Definition: Catalog.cpp:2323
int getDatabaseId() const
Definition: Catalog.h:326
specifies the content in-memory of a row in the column metadata table
void deleteChunksWithPrefix(const ChunkKey &keyPrefix)
Definition: DataMgr.cpp:522
std::list< const ColumnDescriptor * > getAllColumnMetadataForTable(const int tableId, const bool fetchSystemColumns, const bool fetchVirtualColumns, const bool fetchPhysicalColumns) const
Returns a list of pointers to constant ColumnDescriptor structs for all the columns from a particular...
Definition: Catalog.cpp:2172
void dropColumnTransactional(const TableDescriptor &td, const ColumnDescriptor &cd)
Definition: Catalog.cpp:2591
void alterColumnTypeTransactional(const ColumnDescriptor &cd)
Definition: Catalog.cpp:2383

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::string AlterTableAlterColumnCommandRecoveryMgr::serializeRecoveryInformation ( const RecoveryInfo param)

Definition at line 271 of file AlterColumnRecovery.cpp.

References json_utils::add_value_to_object(), AlterTableAlterColumnCommandRecoveryMgr::RecoveryInfo::added_columns, AlterTableAlterColumnCommandRecoveryMgr::RecoveryInfo::altered_columns, AlterTableAlterColumnCommandRecoveryMgr::RecoveryInfo::is_vacuumed, AlterTableAlterColumnCommandRecoveryMgr::RecoveryInfo::renamed_columns, AlterTableAlterColumnCommandRecoveryMgr::RecoveryInfo::src_dst_cds, AlterTableAlterColumnCommandRecoveryMgr::RecoveryInfo::table_epoch, toPairedCds(), AlterTableAlterColumnCommandRecoveryMgr::RecoveryInfo::updated_dict_cds, and json_utils::write_to_string().

Referenced by writeSerializedRecoveryInformation().

272  {
273  rapidjson::Document d;
274  d.SetObject();
275 
277  d, param.added_columns, "added_columns", d.GetAllocator());
279  d, toPairedCds(param.altered_columns), "altered_columns", d.GetAllocator());
281  d, param.renamed_columns, "renamed_columns", d.GetAllocator());
283  d, param.updated_dict_cds, "updated_dict_cds", d.GetAllocator());
284  json_utils::add_value_to_object(d, param.table_epoch, "table_epoch", d.GetAllocator());
285  json_utils::add_value_to_object(d, param.is_vacuumed, "is_vacuumed", d.GetAllocator());
286 
287  json_utils::add_value_to_object(d, param.src_dst_cds, "src_dst_cds", d.GetAllocator());
288 
289  return json_utils::write_to_string(d);
290 }
std::list< std::pair< ColumnDescriptor, ColumnDescriptor > > toPairedCds(const std::list< ColumnAltered > &altered_columns)
std::string write_to_string(const rapidjson::Document &document)
Definition: JsonUtils.cpp:225
void add_value_to_object(rapidjson::Value &object, const T &value, const std::string &name, rapidjson::Document::AllocatorType &allocator)
Definition: JsonUtils.h:255

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::list< std::pair< ColumnDescriptor, ColumnDescriptor > > AlterTableAlterColumnCommandRecoveryMgr::toPairedCds ( const std::list< ColumnAltered > &  altered_columns)
private

Definition at line 194 of file AlterColumnRecovery.cpp.

Referenced by serializeRecoveryInformation().

195  {
196  std::list<std::pair<ColumnDescriptor, ColumnDescriptor>> retval;
197  for (const auto& [old_cd, new_cd] : altered_columns) {
198  retval.emplace_back(old_cd, new_cd);
199  }
200  return retval;
201 }

+ Here is the caller graph for this function:

void AlterTableAlterColumnCommandRecoveryMgr::writeSerializedRecoveryInformation ( const RecoveryInfo param,
const RecoveryParamFilepathInfo filepath_info 
)

Definition at line 292 of file AlterColumnRecovery.cpp.

References AlterTableAlterColumnCommandRecoveryMgr::RecoveryParamFilepathInfo::base_path, nvtx_helpers::anonymous_namespace{nvtx_helpers.cpp}::filename(), getRecoveryPrefix(), recoveryFilepath(), and serializeRecoveryInformation().

Referenced by AlterTableAlterColumnCommand::populateAndWriteRecoveryInfo().

294  {
295  auto filename = recoveryFilepath(filepath_info);
296 
297  // Create crash recovery directory if non-existent
298  auto prefix = getRecoveryPrefix(filepath_info.base_path).string();
299  if (!std::filesystem::exists(prefix)) {
300  if (!std::filesystem::create_directory(prefix)) {
301  throw std::runtime_error{"Error trying to create crash recovery directory \"" +
302  prefix + "\". The error was: " + std::strerror(errno)};
303  }
304  }
305 
306  // Use a temporary file name to indicate file has not been written yet
307  std::ofstream ofs(filename + ".tmp");
308  if (!ofs) {
309  throw std::runtime_error{"Error trying to create file \"" + filename +
310  "\". The error was: " + std::strerror(errno)};
311  }
312  ofs << serializeRecoveryInformation(param);
313  // Rename to target filename to indicate file was successfully written
314  std::filesystem::rename(filename + ".tmp", filename);
315 }
std::string serializeRecoveryInformation(const RecoveryInfo &param)
std::string recoveryFilepath(const RecoveryParamFilepathInfo &filepath_info)
static std::filesystem::path getRecoveryPrefix(const std::string &base_path)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Member Data Documentation

const std::string AlterTableAlterColumnCommandRecoveryMgr::kRecoveryDirectoryName = "crash_recovery"
inlinestatic

Definition at line 76 of file AlterColumnRecovery.h.

Referenced by getRecoveryPrefix().


The documentation for this class was generated from the following files: