OmniSciDB  c1a53651b2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
File_Namespace::FileMgr Class Reference

#include <FileMgr.h>

+ Inheritance diagram for File_Namespace::FileMgr:
+ Collaboration diagram for File_Namespace::FileMgr:

Public Member Functions

 FileMgr (const int32_t device_id, GlobalFileMgr *gfm, const TablePair file_mgr_key, const int32_t max_rollback_epochs=-1, const size_t num_reader_threads=0, const int32_t epoch=-1)
 Constructor. More...
 
 FileMgr (const int32_t device_id, GlobalFileMgr *gfm, const TablePair file_mgr_key, const bool run_core_init)
 
 FileMgr (GlobalFileMgr *gfm, std::string basePath)
 
 ~FileMgr () override
 Destructor. More...
 
StorageStats getStorageStats () const
 
FileBuffercreateBuffer (const ChunkKey &key, size_t pageSize=0, const size_t numBytes=0) override
 Creates a chunk with the specified key and page size. More...
 
bool isBufferOnDevice (const ChunkKey &key) override
 
void deleteBuffer (const ChunkKey &key, const bool purge=true) override
 Deletes the chunk with the specified key. More...
 
void deleteBuffersWithPrefix (const ChunkKey &keyPrefix, const bool purge=true) override
 
FileBuffergetBuffer (const ChunkKey &key, const size_t numBytes=0) override
 Returns the a pointer to the chunk with the specified key. More...
 
void fetchBuffer (const ChunkKey &key, AbstractBuffer *destBuffer, const size_t numBytes) override
 
FileBufferputBuffer (const ChunkKey &key, AbstractBuffer *d, const size_t numBytes=0) override
 Puts the contents of d into the Chunk with the given key. More...
 
AbstractBufferalloc (const size_t numBytes) override
 
void free (AbstractBuffer *buffer) override
 
virtual Page requestFreePage (size_t pagesize, const bool isMetadata)
 
MgrType getMgrType () override
 
std::string getStringMgrType () override
 
std::string printSlabs () override
 
size_t getMaxSize () override
 
size_t getInUseSize () override
 
size_t getAllocated () override
 
bool isAllocationCapped () override
 
FileInfogetFileInfoForFileId (const int32_t fileId) const
 
FileMetadata getMetadataForFile (const boost::filesystem::directory_iterator &fileIterator) const
 
void init (const size_t num_reader_threads, const int32_t epochOverride)
 
void init (const std::string &dataPathToConvertFrom, const int32_t epochOverride)
 
void copyPage (Page &srcPage, FileMgr *destFileMgr, Page &destPage, const size_t reservedHeaderSize, const size_t numBytes, const size_t offset)
 
void requestFreePages (size_t npages, size_t pagesize, std::vector< Page > &pages, const bool isMetadata)
 Obtains free pages – creates new files if necessary – of the requested size. More...
 
void getChunkMetadataVecForKeyPrefix (ChunkMetadataVector &chunkMetadataVec, const ChunkKey &keyPrefix) override
 
bool hasChunkMetadataForKeyPrefix (const ChunkKey &keyPrefix)
 
void checkpoint () override
 Fsyncs data files, writes out epoch and fsyncs that. More...
 
void checkpoint (const int32_t db_id, const int32_t tb_id) override
 
virtual int32_t epoch (int32_t db_id, int32_t tb_id) const
 Returns current value of epoch - should be one greater than recorded at last checkpoint. Because FileMgr only contains buffers from one table we can just return the FileMgr's epoch instead of finding a table-specific epoch. More...
 
int32_t epochFloor () const
 
int32_t incrementEpoch ()
 
int32_t lastCheckpointedEpoch () const
 Returns value of epoch at last checkpoint. More...
 
void resetEpochFloor ()
 
int32_t maxRollbackEpochs ()
 Returns value max_rollback_epochs. More...
 
size_t getNumReaderThreads ()
 Returns number of threads defined by parameter num-reader-threads which should be used during initial load and consequent read of data. More...
 
FILE * getFileForFileId (const int32_t fileId)
 Returns FILE pointer associated with requested fileId. More...
 
size_t getNumChunks () override
 
size_t getNumUsedMetadataPagesForChunkKey (const ChunkKey &chunkKey) const
 
int32_t getDBVersion () const
 Index for looking up chunks. More...
 
bool getDBConvert () const
 
void createTopLevelMetadata ()
 
std::string getFileMgrBasePath () const
 
virtual void closeRemovePhysical ()
 
void removeTableRelatedDS (const int32_t db_id, const int32_t table_id) override
 
virtual void free_page (std::pair< FileInfo *, int32_t > &&page)
 
virtual bool hasFileMgrKey () const
 
const TablePair get_fileMgrKey () const
 
boost::filesystem::path getFilePath (const std::string &file_name) const
 
void writePageMappingsToStatusFile (const std::vector< PageMapping > &page_mappings)
 
void renameCompactionStatusFile (const char *const from_status, const char *const to_status)
 
void compactFiles ()
 
virtual bool updatePageIfDeleted (FileInfo *file_info, ChunkKey &chunk_key, int32_t contingent, int32_t page_epoch, int32_t page_num)
 deletes or recovers a page based on last checkpointed epoch. More...
 
virtual bool failOnReadError () const
 True if a read error should cause a fatal error. More...
 
size_t getPageSize () const
 
size_t getMetadataPageSize () const
 
virtual std::string describeSelf () const
 

Static Public Member Functions

static void setNumPagesPerDataFile (size_t num_pages)
 
static void setNumPagesPerMetadataFile (size_t num_pages)
 
static void renameAndSymlinkLegacyFiles (const std::string &table_data_dir)
 

Public Attributes

ChunkKeyToChunkMap chunkIndex_
 

Static Public Attributes

static constexpr size_t DEFAULT_NUM_PAGES_PER_DATA_FILE {256}
 
static constexpr size_t DEFAULT_NUM_PAGES_PER_METADATA_FILE {4096}
 
static constexpr char const * COPY_PAGES_STATUS {"pending_data_compaction_0"}
 
static constexpr char const * UPDATE_PAGE_VISIBILITY_STATUS {"pending_data_compaction_1"}
 
static constexpr char const * DELETE_EMPTY_FILES_STATUS {"pending_data_compaction_2"}
 
static constexpr char LEGACY_EPOCH_FILENAME [] = "epoch"
 
static constexpr char EPOCH_FILENAME [] = "epoch_metadata"
 
static constexpr char DB_META_FILENAME [] = "dbmeta"
 
static constexpr char FILE_MGR_VERSION_FILENAME [] = "filemgr_version"
 
static constexpr int32_t INVALID_VERSION = -1
 

Protected Member Functions

 FileMgr (const size_t defaultPageSize, const size_t defaultMetadataPageSize)
 
FileInfocreateFile (const size_t pageSize, const size_t numPages)
 Adds a file to the file manager repository. More...
 
FileInfoopenExistingFile (const std::string &path, const int32_t fileId, const size_t pageSize, const size_t numPages, std::vector< HeaderInfo > &headerVec)
 
void createEpochFile (const std::string &epochFileName)
 
int32_t openAndReadLegacyEpochFile (const std::string &epochFileName)
 
void openAndReadEpochFile (const std::string &epochFileName)
 
void writeAndSyncEpochToDisk ()
 
void setEpoch (const int32_t newEpoch)
 
int32_t readVersionFromDisk (const std::string &versionFileName) const
 
void writeAndSyncVersionToDisk (const std::string &versionFileName, const int32_t version)
 
void processFileFutures (std::vector< std::future< std::vector< HeaderInfo >>> &file_futures, std::vector< HeaderInfo > &headerVec)
 
virtual FileBuffercreateBufferUnlocked (const ChunkKey &key, size_t pageSize=0, const size_t numBytes=0)
 
virtual FileBuffercreateBufferFromHeaders (const ChunkKey &key, const std::vector< HeaderInfo >::const_iterator &headerStartIt, const std::vector< HeaderInfo >::const_iterator &headerEndIt)
 
void migrateToLatestFileMgrVersion ()
 
void migrateEpochFileV0 ()
 
void migrateLegacyFilesV1 ()
 
OpenFilesResult openFiles ()
 
void clearFileInfos ()
 
void copySourcePageForCompaction (const Page &source_page, FileInfo *destination_file_info, std::vector< PageMapping > &page_mappings, std::set< Page > &touched_pages)
 
int32_t copyPageWithoutHeaderSize (const Page &source_page, const Page &destination_page)
 
void sortAndCopyFilePagesForCompaction (size_t page_size, std::vector< PageMapping > &page_mappings, std::set< Page > &touched_pages)
 
void updateMappedPagesVisibility (const std::vector< PageMapping > &page_mappings)
 
void deleteEmptyFiles ()
 
void resumeFileCompaction (const std::string &status_file_name)
 
std::vector< PageMappingreadPageMappingsFromStatusFile ()
 
 FileMgr (const int epoch)
 
void closePhysicalUnlocked ()
 
void syncFilesToDisk ()
 
void freePages ()
 
void initializeNumThreads (size_t num_reader_threads=0)
 
virtual FileBufferallocateBuffer (const size_t page_size, const ChunkKey &key, const size_t num_bytes=0)
 
virtual FileBufferallocateBuffer (const ChunkKey &key, const std::vector< HeaderInfo >::const_iterator &headerStartIt, const std::vector< HeaderInfo >::const_iterator &headerEndIt)
 
virtual
ChunkKeyToChunkMap::iterator 
deleteBufferUnlocked (const ChunkKeyToChunkMap::iterator chunk_it, const bool purge=true)
 
virtual FileBuffergetBufferUnlocked (const ChunkKey &key, const size_t numBytes=0) const
 

Protected Attributes

int32_t maxRollbackEpochs_
 
std::string fileMgrBasePath_
 
std::map< int32_t, FileInfo * > files_
 
PageSizeFileMMap fileIndex_
 A map of files accessible via a file identifier. More...
 
size_t num_reader_threads_
 Maps page sizes to FileInfo objects. More...
 
unsigned nextFileId_
 number of threads used when loading data More...
 
int32_t db_version_
 the index of the next file id More...
 
int32_t fileMgrVersion_
 
const int32_t latestFileMgrVersion_ {2}
 
FILE * DBMetaFile_ = nullptr
 
std::mutex getPageMutex_
 pointer to DB level metadata More...
 
heavyai::shared_mutex chunkIndexMutex_
 
heavyai::shared_mutex files_rw_mutex_
 
heavyai::shared_mutex mutex_free_page_
 
std::vector< std::pair
< FileInfo *, int32_t > > 
free_pages_
 
bool isFullyInitted_ {false}
 
const size_t page_size_
 
const size_t metadata_page_size_
 

Static Protected Attributes

static size_t num_pages_per_data_file_ {DEFAULT_NUM_PAGES_PER_DATA_FILE}
 
static size_t num_pages_per_metadata_file_ {DEFAULT_NUM_PAGES_PER_METADATA_FILE}
 

Private Member Functions

void rollOffOldData (const int32_t epochCeiling, const bool shouldCheckpoint)
 
void freePagesBeforeEpoch (const int32_t min_epoch)
 
void freePagesBeforeEpochUnlocked (const int32_t min_epoch, const ChunkKeyToChunkMap::iterator lower_bound, const ChunkKeyToChunkMap::iterator upper_bound)
 
FileBuffergetOrCreateBuffer (const ChunkKey &key)
 
bool coreInit ()
 Determines file path, and if exists, runs file migration and opens and reads epoch file. More...
 
int32_t epoch () const
 
void writeDirtyBuffers ()
 
void setDataAndMetadataFileStats (StorageStats &storage_stats) const
 
uint32_t getFragmentCount () const
 

Private Attributes

GlobalFileMgrgfm_
 
TablePair fileMgrKey_
 Global FileMgr. More...
 
Epoch epoch_
 
bool epochIsCheckpointed_ = true
 
FILE * epochFile_ = nullptr
 

Friends

class GlobalFileMgr
 

Detailed Description

Definition at line 154 of file FileMgr.h.

Constructor & Destructor Documentation

File_Namespace::FileMgr::FileMgr ( const int32_t  device_id,
GlobalFileMgr gfm,
const TablePair  file_mgr_key,
const int32_t  max_rollback_epochs = -1,
const size_t  num_reader_threads = 0,
const int32_t  epoch = -1 
)

Constructor.

Definition at line 47 of file FileMgr.cpp.

References init().

53  : AbstractBufferMgr(device_id)
54  , maxRollbackEpochs_(max_rollback_epochs)
55  , nextFileId_(0)
56  , gfm_(gfm)
57  , fileMgrKey_(file_mgr_key)
58  , page_size_(gfm->getPageSize())
59  , metadata_page_size_(gfm->getMetadataPageSize()) {
60  init(num_reader_threads, epoch);
61 }
const size_t metadata_page_size_
Definition: FileMgr.h:536
TablePair fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:524
const size_t page_size_
Definition: FileMgr.h:535
void init(const size_t num_reader_threads, const int32_t epochOverride)
Definition: FileMgr.cpp:256
GlobalFileMgr * gfm_
Definition: FileMgr.h:523
int32_t maxRollbackEpochs_
Definition: FileMgr.h:396
int32_t epoch() const
Definition: FileMgr.h:517
unsigned nextFileId_
number of threads used when loading data
Definition: FileMgr.h:403

+ Here is the call graph for this function:

File_Namespace::FileMgr::FileMgr ( const int32_t  device_id,
GlobalFileMgr gfm,
const TablePair  file_mgr_key,
const bool  run_core_init 
)

Definition at line 64 of file FileMgr.cpp.

References coreInit(), epochFile_, fileMgrBasePath_, fileMgrKey_, files_, File_Namespace::GlobalFileMgr::getBasePath(), gfm_, and to_string().

68  : AbstractBufferMgr(device_id)
69  , maxRollbackEpochs_(-1)
70  , nextFileId_(0)
71  , gfm_(gfm)
72  , fileMgrKey_(file_mgr_key)
73  , page_size_(gfm->getPageSize())
74  , metadata_page_size_(gfm->getMetadataPageSize()) {
75  const std::string fileMgrDirPrefix("table");
76  const std::string fileMgrDirDelim("_");
77  fileMgrBasePath_ = (gfm_->getBasePath() + fileMgrDirPrefix + fileMgrDirDelim +
78  std::to_string(fileMgrKey_.first) + // db_id
79  fileMgrDirDelim + std::to_string(fileMgrKey_.second)); // tb_id
80  epochFile_ = nullptr;
81  files_.clear();
82  if (run_core_init) {
83  coreInit();
84  }
85 }
const size_t metadata_page_size_
Definition: FileMgr.h:536
TablePair fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:524
std::string getBasePath() const
const size_t page_size_
Definition: FileMgr.h:535
GlobalFileMgr * gfm_
Definition: FileMgr.h:523
std::string fileMgrBasePath_
Definition: FileMgr.h:397
std::string to_string(char const *&&v)
int32_t maxRollbackEpochs_
Definition: FileMgr.h:396
std::map< int32_t, FileInfo * > files_
Definition: FileMgr.h:400
bool coreInit()
Determines file path, and if exists, runs file migration and opens and reads epoch file...
Definition: FileMgr.cpp:133
unsigned nextFileId_
number of threads used when loading data
Definition: FileMgr.h:403

+ Here is the call graph for this function:

File_Namespace::FileMgr::FileMgr ( GlobalFileMgr gfm,
std::string  basePath 
)

Definition at line 87 of file FileMgr.cpp.

References init().

88  : AbstractBufferMgr(0)
89  , maxRollbackEpochs_(-1)
90  , fileMgrBasePath_(base_path)
91  , nextFileId_(0)
92  , gfm_(gfm)
93  , fileMgrKey_(0, 0)
94  , page_size_(gfm->getPageSize())
95  , metadata_page_size_(gfm->getMetadataPageSize()) {
96  init(base_path, -1);
97 }
const size_t metadata_page_size_
Definition: FileMgr.h:536
TablePair fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:524
const size_t page_size_
Definition: FileMgr.h:535
void init(const size_t num_reader_threads, const int32_t epochOverride)
Definition: FileMgr.cpp:256
GlobalFileMgr * gfm_
Definition: FileMgr.h:523
std::string fileMgrBasePath_
Definition: FileMgr.h:397
int32_t maxRollbackEpochs_
Definition: FileMgr.h:396
unsigned nextFileId_
number of threads used when loading data
Definition: FileMgr.h:403

+ Here is the call graph for this function:

File_Namespace::FileMgr::~FileMgr ( )
override

Destructor.

Definition at line 113 of file FileMgr.cpp.

References chunkIndex_, File_Namespace::close(), DBMetaFile_, epochFile_, and files_.

113  {
114  // free memory used by FileInfo objects
115  for (auto chunkIt = chunkIndex_.begin(); chunkIt != chunkIndex_.end(); ++chunkIt) {
116  delete chunkIt->second;
117  }
118  for (auto file_info_entry : files_) {
119  delete file_info_entry.second;
120  }
121 
122  if (epochFile_) {
123  close(epochFile_);
124  epochFile_ = nullptr;
125  }
126 
127  if (DBMetaFile_) {
129  DBMetaFile_ = nullptr;
130  }
131 }
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:326
std::map< int32_t, FileInfo * > files_
Definition: FileMgr.h:400
void close(FILE *f)
Closes the file pointed to by the FILE pointer.
Definition: File.cpp:128

+ Here is the call graph for this function:

File_Namespace::FileMgr::FileMgr ( const size_t  defaultPageSize,
const size_t  defaultMetadataPageSize 
)
protected

Definition at line 108 of file FileMgr.cpp.

109  : AbstractBufferMgr(0)
110  , page_size_(page_size)
111  , metadata_page_size_(metadata_page_size) {}
const size_t metadata_page_size_
Definition: FileMgr.h:536
const size_t page_size_
Definition: FileMgr.h:535
File_Namespace::FileMgr::FileMgr ( const int  epoch)
protected

Definition at line 100 of file FileMgr.cpp.

References Epoch::ceiling(), and epoch_.

101  : AbstractBufferMgr(-1)
105 }
const size_t metadata_page_size_
Definition: FileMgr.h:536
const size_t page_size_
Definition: FileMgr.h:535
#define DEFAULT_METADATA_PAGE_SIZE
int32_t ceiling() const
Definition: Epoch.h:44
#define DEFAULT_PAGE_SIZE
int32_t epoch() const
Definition: FileMgr.h:517

+ Here is the call graph for this function:

Member Function Documentation

AbstractBuffer * File_Namespace::FileMgr::alloc ( const size_t  numBytes = 0)
override

Definition at line 866 of file FileMgr.cpp.

References logger::FATAL, and LOG.

866  {
867  LOG(FATAL) << "Operation not supported";
868  return nullptr; // satisfy return-type warning
869 }
#define LOG(tag)
Definition: Logger.h:285
FileBuffer * File_Namespace::FileMgr::allocateBuffer ( const size_t  page_size,
const ChunkKey key,
const size_t  num_bytes = 0 
)
protectedvirtual

Reimplemented in File_Namespace::CachingFileMgr.

Definition at line 1601 of file FileMgr.cpp.

Referenced by createBufferFromHeaders(), and createBufferUnlocked().

1603  {
1604  return new FileBuffer(this, page_size, key, num_bytes);
1605 }

+ Here is the caller graph for this function:

FileBuffer * File_Namespace::FileMgr::allocateBuffer ( const ChunkKey key,
const std::vector< HeaderInfo >::const_iterator &  headerStartIt,
const std::vector< HeaderInfo >::const_iterator &  headerEndIt 
)
protectedvirtual

Reimplemented in File_Namespace::CachingFileMgr.

Definition at line 1607 of file FileMgr.cpp.

1610  {
1611  return new FileBuffer(this, key, headerStartIt, headerEndIt);
1612 }
void File_Namespace::FileMgr::checkpoint ( )
override

Fsyncs data files, writes out epoch and fsyncs that.

Definition at line 703 of file FileMgr.cpp.

References describeSelf(), epoch(), freePages(), incrementEpoch(), rollOffOldData(), syncFilesToDisk(), VLOG, writeAndSyncEpochToDisk(), and writeDirtyBuffers().

Referenced by rollOffOldData().

703  {
704  VLOG(2) << "Checkpointing " << describeSelf() << " epoch: " << epoch();
706  rollOffOldData(epoch(), false /* shouldCheckpoint */);
707  syncFilesToDisk();
709  incrementEpoch();
710  freePages();
711 }
void rollOffOldData(const int32_t epochCeiling, const bool shouldCheckpoint)
Definition: FileMgr.cpp:684
int32_t incrementEpoch()
Definition: FileMgr.h:281
void writeAndSyncEpochToDisk()
Definition: FileMgr.cpp:656
virtual std::string describeSelf() const
Definition: FileMgr.cpp:697
int32_t epoch() const
Definition: FileMgr.h:517
#define VLOG(n)
Definition: Logger.h:387

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::checkpoint ( const int32_t  db_id,
const int32_t  tb_id 
)
inlineoverride

Definition at line 268 of file FileMgr.h.

References logger::FATAL, and LOG.

268  {
269  LOG(FATAL) << "Operation not supported, api checkpoint() should be used instead";
270  }
#define LOG(tag)
Definition: Logger.h:285
void File_Namespace::FileMgr::clearFileInfos ( )
protected

Definition at line 243 of file FileMgr.cpp.

References File_Namespace::close(), fileIndex_, and files_.

Referenced by init().

243  {
244  for (auto file_info_entry : files_) {
245  auto file_info = file_info_entry.second;
246  if (file_info->f) {
247  close(file_info->f);
248  file_info->f = nullptr;
249  }
250  delete file_info;
251  }
252  files_.clear();
253  fileIndex_.clear();
254 }
PageSizeFileMMap fileIndex_
A map of files accessible via a file identifier.
Definition: FileMgr.h:401
std::map< int32_t, FileInfo * > files_
Definition: FileMgr.h:400
void close(FILE *f)
Closes the file pointed to by the FILE pointer.
Definition: File.cpp:128

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::closePhysicalUnlocked ( )
protected

Definition at line 558 of file FileMgr.cpp.

References File_Namespace::close(), DBMetaFile_, epochFile_, and files_.

Referenced by File_Namespace::CachingFileMgr::closeRemovePhysical(), and closeRemovePhysical().

558  {
559  for (auto& [idx, file_info] : files_) {
560  if (file_info->f) {
561  close(file_info->f);
562  file_info->f = nullptr;
563  }
564  }
565 
566  if (DBMetaFile_) {
568  DBMetaFile_ = nullptr;
569  }
570 
571  if (epochFile_) {
572  close(epochFile_);
573  epochFile_ = nullptr;
574  }
575 }
std::map< int32_t, FileInfo * > files_
Definition: FileMgr.h:400
void close(FILE *f)
Closes the file pointed to by the FILE pointer.
Definition: File.cpp:128

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::closeRemovePhysical ( )
virtual

Reimplemented in File_Namespace::CachingFileMgr.

Definition at line 577 of file FileMgr.cpp.

References closePhysicalUnlocked(), files_rw_mutex_, getFileMgrBasePath(), and File_Namespace::renameForDelete().

577  {
580  /* rename for later deletion the directory containing table related data */
582 }
std::string getFileMgrBasePath() const
Definition: FileMgr.h:331
heavyai::unique_lock< heavyai::shared_mutex > write_lock
std::unique_lock< T > unique_lock
heavyai::shared_mutex files_rw_mutex_
Definition: FileMgr.h:411
void renameForDelete(const std::string directoryName)
Renames a directory to DELETE_ME_&lt;EPOCH&gt;_&lt;oldname&gt;.
Definition: File.cpp:242

+ Here is the call graph for this function:

void File_Namespace::FileMgr::compactFiles ( )

Compacts metadata and data file pages and deletes resulting empty files (if any exists). Compaction occurs in 3 idempotent phases in order to enable graceful recovery if a crash/process interruption occurs in the middle data compaction.

Phase 1: Create a status file that indicates initiation of this phase. Sort metadata/data files in order of files with the lowest number of free pages to those with the highest number of free pages. Copy over used pages from files at the end of the sorted order (files with the highest number of free pages) to those at the beginning of the sorted order (files with the lowest number of free pages). Keep destination/copied to pages as free while copying. Keep track of copied source to destination page mapping. Write page mapping to the status file (to be used during crash recovery if needed).

Phase 2: Rename status file to a file name that indicates initiation of this phase. Go through page mapping and mark source/copied from pages as free while making the destination/copied to pages as used.

Phase 3: Rename status file to a file name that indicates initiation of this phase. Delete all empty files (files containing only free pages). Delete status file.

Definition at line 1265 of file FileMgr.cpp.

References CHECK, COPY_PAGES_STATUS, DELETE_EMPTY_FILES_STATUS, deleteEmptyFiles(), files_, files_rw_mutex_, getFilePath(), renameCompactionStatusFile(), sortAndCopyFilePagesForCompaction(), UPDATE_PAGE_VISIBILITY_STATUS, updateMappedPagesVisibility(), and writePageMappingsToStatusFile().

Referenced by resumeFileCompaction().

1265  {
1267  if (files_.empty()) {
1268  return;
1269  }
1270 
1271  auto copy_pages_status_file_path = getFilePath(COPY_PAGES_STATUS);
1272  CHECK(!boost::filesystem::exists(copy_pages_status_file_path));
1273  std::ofstream status_file(copy_pages_status_file_path.string(),
1274  std::ios::out | std::ios::binary);
1275  status_file.close();
1276 
1277  std::vector<PageMapping> page_mappings;
1278  std::set<Page> touched_pages;
1279  std::set<size_t> page_sizes;
1280  for (auto [file_id, file_info] : files_) {
1281  page_sizes.emplace(file_info->pageSize);
1282  }
1283  for (auto page_size : page_sizes) {
1284  sortAndCopyFilePagesForCompaction(page_size, page_mappings, touched_pages);
1285  }
1286 
1287  writePageMappingsToStatusFile(page_mappings);
1289 
1290  updateMappedPagesVisibility(page_mappings);
1292 
1293  deleteEmptyFiles();
1294 }
void sortAndCopyFilePagesForCompaction(size_t page_size, std::vector< PageMapping > &page_mappings, std::set< Page > &touched_pages)
Definition: FileMgr.cpp:1302
static constexpr char const * UPDATE_PAGE_VISIBILITY_STATUS
Definition: FileMgr.h:376
heavyai::unique_lock< heavyai::shared_mutex > write_lock
void writePageMappingsToStatusFile(const std::vector< PageMapping > &page_mappings)
Definition: FileMgr.cpp:1512
std::unique_lock< T > unique_lock
boost::filesystem::path getFilePath(const std::string &file_name) const
Definition: FileMgr.cpp:1667
static constexpr char const * DELETE_EMPTY_FILES_STATUS
Definition: FileMgr.h:377
static constexpr char const * COPY_PAGES_STATUS
Definition: FileMgr.h:375
std::map< int32_t, FileInfo * > files_
Definition: FileMgr.h:400
void updateMappedPagesVisibility(const std::vector< PageMapping > &page_mappings)
Definition: FileMgr.cpp:1457
#define CHECK(condition)
Definition: Logger.h:291
heavyai::shared_mutex files_rw_mutex_
Definition: FileMgr.h:411
void renameCompactionStatusFile(const char *const from_status, const char *const to_status)
Definition: FileMgr.cpp:1555

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::copyPage ( Page srcPage,
FileMgr destFileMgr,
Page destPage,
const size_t  reservedHeaderSize,
const size_t  numBytes,
const size_t  offset 
)

Definition at line 584 of file FileMgr.cpp.

References CHECK, checked_malloc(), File_Namespace::Page::fileId, free(), getFileInfoForFileId(), page_size_, File_Namespace::Page::pageNum, File_Namespace::FileInfo::read(), and File_Namespace::FileInfo::write().

Referenced by init().

589  {
590  CHECK(offset + numBytes <= page_size_);
591  FileInfo* srcFileInfo = getFileInfoForFileId(srcPage.fileId);
592  FileInfo* destFileInfo = destFileMgr->getFileInfoForFileId(destPage.fileId);
593  int8_t* buffer = reinterpret_cast<int8_t*>(checked_malloc(numBytes));
594 
595  size_t bytesRead = srcFileInfo->read(
596  srcPage.pageNum * page_size_ + offset + reservedHeaderSize, numBytes, buffer);
597  CHECK(bytesRead == numBytes);
598  size_t bytesWritten = destFileInfo->write(
599  destPage.pageNum * page_size_ + offset + reservedHeaderSize, numBytes, buffer);
600  CHECK(bytesWritten == numBytes);
601  ::free(buffer);
602 }
const size_t page_size_
Definition: FileMgr.h:535
void free(AbstractBuffer *buffer) override
Definition: FileMgr.cpp:871
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:45
#define CHECK(condition)
Definition: Logger.h:291
FileInfo * getFileInfoForFileId(const int32_t fileId) const
Definition: FileMgr.h:222

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int32_t File_Namespace::FileMgr::copyPageWithoutHeaderSize ( const Page source_page,
const Page destination_page 
)
protected

Copies content of source_page to destination_page without copying over the source_page header size. The header size is instead returned by the method. Not copying over the header size enables a use case where destination_page has all the content of the source_page but is still marked as a free page.

Definition at line 1427 of file FileMgr.cpp.

References CHECK, CHECK_EQ, File_Namespace::Page::fileId, File_Namespace::FileInfo::fileId, files_, File_Namespace::Page::pageNum, File_Namespace::FileInfo::pageSize, File_Namespace::FileInfo::read(), and File_Namespace::FileInfo::write().

Referenced by copySourcePageForCompaction().

1428  {
1429  FileInfo* source_file_info = files_.at(source_page.fileId);
1430  CHECK(source_file_info);
1431  CHECK_EQ(source_file_info->fileId, source_page.fileId);
1432 
1433  FileInfo* destination_file_info = files_.at(destination_page.fileId);
1434  CHECK(destination_file_info);
1435  CHECK_EQ(destination_file_info->fileId, destination_page.fileId);
1436  CHECK_EQ(source_file_info->pageSize, destination_file_info->pageSize);
1437 
1438  auto page_size = source_file_info->pageSize;
1439  auto buffer = std::make_unique<int8_t[]>(page_size);
1440  size_t bytes_read =
1441  source_file_info->read(source_page.pageNum * page_size, page_size, buffer.get());
1442  CHECK_EQ(page_size, bytes_read);
1443 
1444  auto header_size_offset = sizeof(int32_t);
1445  size_t bytes_written = destination_file_info->write(
1446  (destination_page.pageNum * page_size) + header_size_offset,
1447  page_size - header_size_offset,
1448  buffer.get() + header_size_offset);
1449  CHECK_EQ(page_size - header_size_offset, bytes_written);
1450  return reinterpret_cast<int32_t*>(buffer.get())[0];
1451 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
std::map< int32_t, FileInfo * > files_
Definition: FileMgr.h:400
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::copySourcePageForCompaction ( const Page source_page,
FileInfo destination_file_info,
std::vector< PageMapping > &  page_mappings,
std::set< Page > &  touched_pages 
)
protected

Copies a used page (indicated by the top of the source_used_pages set) from the given source file to a free page in the given destination file. Source and destination pages are recorded in the given page_mappings vector after copying is done.

Definition at line 1397 of file FileMgr.cpp.

References CHECK, CHECK_NE, copyPageWithoutHeaderSize(), File_Namespace::Page::fileId, File_Namespace::FileInfo::fileId, File_Namespace::FileInfo::getFreePage(), and File_Namespace::Page::pageNum.

Referenced by sortAndCopyFilePagesForCompaction().

1400  {
1401  size_t destination_page_num = destination_file_info->getFreePage();
1402  CHECK_NE(destination_page_num, static_cast<size_t>(-1));
1403  Page destination_page{destination_file_info->fileId, destination_page_num};
1404 
1405  // Assert that the same pages are not copied or overridden multiple times
1406  CHECK(touched_pages.find(source_page) == touched_pages.end());
1407  touched_pages.emplace(source_page);
1408 
1409  CHECK(touched_pages.find(destination_page) == touched_pages.end());
1410  touched_pages.emplace(destination_page);
1411 
1412  auto header_size = copyPageWithoutHeaderSize(source_page, destination_page);
1413  page_mappings.emplace_back(static_cast<size_t>(source_page.fileId),
1414  source_page.pageNum,
1415  header_size,
1416  static_cast<size_t>(destination_page.fileId),
1417  destination_page.pageNum);
1418 }
int32_t copyPageWithoutHeaderSize(const Page &source_page, const Page &destination_page)
Definition: FileMgr.cpp:1427
#define CHECK_NE(x, y)
Definition: Logger.h:302
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool File_Namespace::FileMgr::coreInit ( )
private

Determines file path, and if exists, runs file migration and opens and reads epoch file.

Returns
a boolean representing whether the directory path existed

Definition at line 133 of file FileMgr.cpp.

References EPOCH_FILENAME, logger::FATAL, fileMgrBasePath_, fileMgrKey_, files_rw_mutex_, File_Namespace::GlobalFileMgr::getBasePath(), gfm_, LOG, migrateToLatestFileMgrVersion(), openAndReadEpochFile(), and to_string().

Referenced by FileMgr(), and init().

133  {
135  const std::string fileMgrDirPrefix("table");
136  const std::string FileMgrDirDelim("_");
137  fileMgrBasePath_ = (gfm_->getBasePath() + fileMgrDirPrefix + FileMgrDirDelim +
138  std::to_string(fileMgrKey_.first) + // db_id
139  FileMgrDirDelim + std::to_string(fileMgrKey_.second)); // tb_id
140  boost::filesystem::path path(fileMgrBasePath_);
141  if (boost::filesystem::exists(path)) {
142  if (!boost::filesystem::is_directory(path)) {
143  LOG(FATAL) << "Specified path '" << fileMgrBasePath_
144  << "' for table data is not a directory.";
145  }
148  return true;
149  }
150  return false;
151 }
TablePair fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:524
std::string getBasePath() const
#define LOG(tag)
Definition: Logger.h:285
void migrateToLatestFileMgrVersion()
Definition: FileMgr.cpp:1146
GlobalFileMgr * gfm_
Definition: FileMgr.h:523
heavyai::unique_lock< heavyai::shared_mutex > write_lock
std::string fileMgrBasePath_
Definition: FileMgr.h:397
std::string to_string(char const *&&v)
std::unique_lock< T > unique_lock
static constexpr char EPOCH_FILENAME[]
Definition: FileMgr.h:387
void openAndReadEpochFile(const std::string &epochFileName)
Definition: FileMgr.cpp:636
heavyai::shared_mutex files_rw_mutex_
Definition: FileMgr.h:411

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

FileBuffer * File_Namespace::FileMgr::createBuffer ( const ChunkKey key,
size_t  pageSize = 0,
const size_t  numBytes = 0 
)
override

Creates a chunk with the specified key and page size.

Definition at line 713 of file FileMgr.cpp.

References CHECK, chunkIndex_, chunkIndexMutex_, createBufferUnlocked(), and show_chunk().

Referenced by init().

715  {
717  CHECK(chunkIndex_.find(key) == chunkIndex_.end())
718  << "Chunk already exists for key: " << show_chunk(key);
719  return createBufferUnlocked(key, page_size, num_bytes);
720 }
std::string show_chunk(const ChunkKey &key)
Definition: types.h:98
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:326
std::unique_lock< T > unique_lock
heavyai::shared_mutex chunkIndexMutex_
Definition: FileMgr.h:410
virtual FileBuffer * createBufferUnlocked(const ChunkKey &key, size_t pageSize=0, const size_t numBytes=0)
Definition: FileMgr.cpp:723
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

FileBuffer * File_Namespace::FileMgr::createBufferFromHeaders ( const ChunkKey key,
const std::vector< HeaderInfo >::const_iterator &  headerStartIt,
const std::vector< HeaderInfo >::const_iterator &  headerEndIt 
)
protectedvirtual

Reimplemented in File_Namespace::CachingFileMgr.

Definition at line 734 of file FileMgr.cpp.

References allocateBuffer(), CHECK, chunkIndex_, chunkIndexMutex_, and show_chunk().

Referenced by init().

737  {
739  CHECK(chunkIndex_.find(key) == chunkIndex_.end())
740  << "Chunk already exists for key: " << show_chunk(key);
741  chunkIndex_[key] = allocateBuffer(key, headerStartIt, headerEndIt);
742  return (chunkIndex_[key]);
743 }
virtual FileBuffer * allocateBuffer(const size_t page_size, const ChunkKey &key, const size_t num_bytes=0)
Definition: FileMgr.cpp:1601
std::string show_chunk(const ChunkKey &key)
Definition: types.h:98
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:326
std::unique_lock< T > unique_lock
heavyai::shared_mutex chunkIndexMutex_
Definition: FileMgr.h:410
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

FileBuffer * File_Namespace::FileMgr::createBufferUnlocked ( const ChunkKey key,
size_t  pageSize = 0,
const size_t  numBytes = 0 
)
protectedvirtual

Reimplemented in File_Namespace::CachingFileMgr.

Definition at line 723 of file FileMgr.cpp.

References allocateBuffer(), chunkIndex_, and page_size_.

Referenced by createBuffer(), and getOrCreateBuffer().

725  {
726  size_t actual_page_size = page_size;
727  if (actual_page_size == 0) {
728  actual_page_size = page_size_;
729  }
730  chunkIndex_[key] = allocateBuffer(actual_page_size, key, num_bytes);
731  return (chunkIndex_[key]);
732 }
const size_t page_size_
Definition: FileMgr.h:535
virtual FileBuffer * allocateBuffer(const size_t page_size, const ChunkKey &key, const size_t num_bytes=0)
Definition: FileMgr.cpp:1601
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:326

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::createEpochFile ( const std::string &  epochFileName)
protected

Definition at line 604 of file FileMgr.cpp.

References Epoch::byte_size(), File_Namespace::create(), epochFile_, logger::FATAL, fileMgrBasePath_, LOG, and writeAndSyncEpochToDisk().

Referenced by init(), and migrateEpochFileV0().

604  {
605  std::string epochFilePath(fileMgrBasePath_ + "/" + epochFileName);
606  if (boost::filesystem::exists(epochFilePath)) {
607  LOG(FATAL) << "Epoch file `" << epochFilePath << "` already exists";
608  }
609  epochFile_ = create(epochFilePath, sizeof(Epoch::byte_size()));
610  // Write out current epoch to file - which if this
611  // function is being called should be 0
613 }
#define LOG(tag)
Definition: Logger.h:285
std::pair< FILE *, std::string > create(const std::string &basePath, const int fileId, const size_t pageSize, const size_t numPages)
Definition: File.cpp:57
std::string fileMgrBasePath_
Definition: FileMgr.h:397
void writeAndSyncEpochToDisk()
Definition: FileMgr.cpp:656
static size_t byte_size()
Definition: Epoch.h:63

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

FileInfo * File_Namespace::FileMgr::createFile ( const size_t  pageSize,
const size_t  numPages 
)
protected

Adds a file to the file manager repository.

This method will create a FileInfo object for the file being added, and it will create the corresponding file on physical disk with the indicated number of pages pre-allocated.

A pointer to the FileInfo object is returned, which itself has a file pointer (FILE*) and a file identifier (int32_t fileId).

Parameters
fileNameThe name given to the file in physical storage.
pageSizeThe logical page size for the pages in the file.
numPagesThe number of logical pages to initially allocate for the file.
Returns
FileInfo* A pointer to the FileInfo object of the added file.

Definition at line 960 of file FileMgr.cpp.

References CHECK, File_Namespace::create(), anonymous_namespace{Utm.h}::f, logger::FATAL, fileIndex_, fileMgrBasePath_, files_, files_rw_mutex_, LOG, and nextFileId_.

Referenced by requestFreePage(), and requestFreePages().

960  {
961  // check arguments
962  if (pageSize == 0 || numPages == 0) {
963  LOG(FATAL) << "File creation failed: pageSize and numPages must be greater than 0.";
964  }
965 
966  // create the new file
967  auto [f, file_path] = create(fileMgrBasePath_,
968  nextFileId_,
969  pageSize,
970  numPages); // TM: not sure if I like naming scheme here -
971  // should be in separate namespace?
972  CHECK(f);
973 
974  // instantiate a new FileInfo for the newly created file
975  int32_t fileId = nextFileId_++;
976  FileInfo* fInfo = new FileInfo(
977  this, fileId, f, pageSize, numPages, file_path, true); // true means init file
978  CHECK(fInfo);
979 
981  // update file manager data structures
982  files_[fileId] = fInfo;
983  fileIndex_.insert(std::pair<size_t, int32_t>(pageSize, fileId));
984 
985  return fInfo;
986 }
#define LOG(tag)
Definition: Logger.h:285
heavyai::unique_lock< heavyai::shared_mutex > write_lock
std::pair< FILE *, std::string > create(const std::string &basePath, const int fileId, const size_t pageSize, const size_t numPages)
Definition: File.cpp:57
std::string fileMgrBasePath_
Definition: FileMgr.h:397
constexpr double f
Definition: Utm.h:31
PageSizeFileMMap fileIndex_
A map of files accessible via a file identifier.
Definition: FileMgr.h:401
std::unique_lock< T > unique_lock
std::map< int32_t, FileInfo * > files_
Definition: FileMgr.h:400
#define CHECK(condition)
Definition: Logger.h:291
heavyai::shared_mutex files_rw_mutex_
Definition: FileMgr.h:411
unsigned nextFileId_
number of threads used when loading data
Definition: FileMgr.h:403

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::createTopLevelMetadata ( )

Definition at line 1045 of file FileMgr.cpp.

References DB_META_FILENAME, db_version_, logger::FATAL, getDBVersion(), INVALID_VERSION, LOG, readVersionFromDisk(), and writeAndSyncVersionToDisk().

1045  {
1047 
1048  if (db_version_ > getDBVersion()) {
1049  LOG(FATAL) << "DB forward compatibility is not supported. Version of HeavyDB "
1050  "software used is older than the version of DB being read: "
1051  << db_version_;
1052  }
1054  // new system, or we are moving forward versions
1055  // system wide migration would go here if required
1057  return;
1058  }
1059 }
int32_t readVersionFromDisk(const std::string &versionFileName) const
Definition: FileMgr.cpp:1061
#define LOG(tag)
Definition: Logger.h:285
int32_t db_version_
the index of the next file id
Definition: FileMgr.h:404
void writeAndSyncVersionToDisk(const std::string &versionFileName, const int32_t version)
Definition: FileMgr.cpp:1079
int32_t getDBVersion() const
Index for looking up chunks.
Definition: FileMgr.cpp:1037
static constexpr char DB_META_FILENAME[]
Definition: FileMgr.h:388
static constexpr int32_t INVALID_VERSION
Definition: FileMgr.h:390

+ Here is the call graph for this function:

void File_Namespace::FileMgr::deleteBuffer ( const ChunkKey key,
const bool  purge = true 
)
override

Deletes the chunk with the specified key.

Definition at line 750 of file FileMgr.cpp.

References CHECK, chunkIndex_, chunkIndexMutex_, deleteBufferUnlocked(), and show_chunk().

750  {
752  auto chunk_it = chunkIndex_.find(key);
753  CHECK(chunk_it != chunkIndex_.end())
754  << "Chunk does not exist for key: " << show_chunk(key);
755  deleteBufferUnlocked(chunk_it, purge);
756 }
std::string show_chunk(const ChunkKey &key)
Definition: types.h:98
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:326
std::unique_lock< T > unique_lock
virtual ChunkKeyToChunkMap::iterator deleteBufferUnlocked(const ChunkKeyToChunkMap::iterator chunk_it, const bool purge=true)
Definition: FileMgr.cpp:758
heavyai::shared_mutex chunkIndexMutex_
Definition: FileMgr.h:410
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

void File_Namespace::FileMgr::deleteBuffersWithPrefix ( const ChunkKey keyPrefix,
const bool  purge = true 
)
override

Definition at line 768 of file FileMgr.cpp.

References chunkIndex_, chunkIndexMutex_, and deleteBufferUnlocked().

768  {
770  auto chunkIt = chunkIndex_.lower_bound(keyPrefix);
771  if (chunkIt == chunkIndex_.end()) {
772  return; // should we throw?
773  }
774  while (chunkIt != chunkIndex_.end() &&
775  std::search(chunkIt->first.begin(),
776  chunkIt->first.begin() + keyPrefix.size(),
777  keyPrefix.begin(),
778  keyPrefix.end()) != chunkIt->first.begin() + keyPrefix.size()) {
779  deleteBufferUnlocked(chunkIt++, purge);
780  }
781 }
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:326
std::unique_lock< T > unique_lock
virtual ChunkKeyToChunkMap::iterator deleteBufferUnlocked(const ChunkKeyToChunkMap::iterator chunk_it, const bool purge=true)
Definition: FileMgr.cpp:758
heavyai::shared_mutex chunkIndexMutex_
Definition: FileMgr.h:410

+ Here is the call graph for this function:

ChunkKeyToChunkMap::iterator File_Namespace::FileMgr::deleteBufferUnlocked ( const ChunkKeyToChunkMap::iterator  chunk_it,
const bool  purge = true 
)
protectedvirtual

Reimplemented in File_Namespace::CachingFileMgr.

Definition at line 758 of file FileMgr.cpp.

References chunkIndex_.

Referenced by deleteBuffer(), and deleteBuffersWithPrefix().

760  {
761  if (purge) {
762  chunk_it->second->freePages();
763  }
764  delete chunk_it->second;
765  return chunkIndex_.erase(chunk_it);
766 }
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:326

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::deleteEmptyFiles ( )
protected

Deletes files that contain only free pages. Also deletes the compaction status file.

Definition at line 1490 of file FileMgr.cpp.

References CHECK, CHECK_EQ, DELETE_EMPTY_FILES_STATUS, fileMgrBasePath_, files_, File_Namespace::get_data_file_path(), File_Namespace::get_legacy_data_file_path(), and getFilePath().

Referenced by compactFiles(), and resumeFileCompaction().

1490  {
1491  for (auto [file_id, file_info] : files_) {
1492  CHECK_EQ(file_id, file_info->fileId);
1493  if (file_info->freePages.size() == file_info->numPages) {
1494  fclose(file_info->f);
1495  file_info->f = nullptr;
1496  auto file_path = get_data_file_path(fileMgrBasePath_, file_id, file_info->pageSize);
1497  boost::filesystem::remove(get_legacy_data_file_path(file_path));
1498  boost::filesystem::remove(file_path);
1499  }
1500  }
1501 
1502  auto status_file_path = getFilePath(DELETE_EMPTY_FILES_STATUS);
1503  CHECK(boost::filesystem::exists(status_file_path));
1504  boost::filesystem::remove(status_file_path);
1505 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
std::string get_legacy_data_file_path(const std::string &new_data_file_path)
Definition: File.cpp:51
std::string fileMgrBasePath_
Definition: FileMgr.h:397
boost::filesystem::path getFilePath(const std::string &file_name) const
Definition: FileMgr.cpp:1667
static constexpr char const * DELETE_EMPTY_FILES_STATUS
Definition: FileMgr.h:377
std::map< int32_t, FileInfo * > files_
Definition: FileMgr.h:400
#define CHECK(condition)
Definition: Logger.h:291
std::string get_data_file_path(const std::string &base_path, int file_id, size_t page_size)
Definition: File.cpp:44

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::string File_Namespace::FileMgr::describeSelf ( ) const
virtual

Reimplemented in File_Namespace::CachingFileMgr.

Definition at line 697 of file FileMgr.cpp.

References fileMgrKey_.

Referenced by checkpoint(), and setEpoch().

697  {
698  stringstream ss;
699  ss << "table (" << fileMgrKey_.first << ", " << fileMgrKey_.second << ")";
700  return ss.str();
701 }
TablePair fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:524

+ Here is the caller graph for this function:

virtual int32_t File_Namespace::FileMgr::epoch ( int32_t  db_id,
int32_t  tb_id 
) const
inlinevirtual

Returns current value of epoch - should be one greater than recorded at last checkpoint. Because FileMgr only contains buffers from one table we can just return the FileMgr's epoch instead of finding a table-specific epoch.

Reimplemented in File_Namespace::CachingFileMgr.

Definition at line 277 of file FileMgr.h.

References epoch().

Referenced by epoch(), File_Namespace::FileBuffer::getFileMgrEpoch(), and File_Namespace::FileInfo::openExistingFile().

277 { return epoch(); }
int32_t epoch() const
Definition: FileMgr.h:517

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int32_t File_Namespace::FileMgr::epoch ( ) const
inlineprivate

Definition at line 517 of file FileMgr.h.

Referenced by checkpoint(), init(), openAndReadLegacyEpochFile(), updatePageIfDeleted(), and writeDirtyBuffers().

517 { return static_cast<int32_t>(epoch_.ceiling()); }
int32_t ceiling() const
Definition: Epoch.h:44

+ Here is the caller graph for this function:

int32_t File_Namespace::FileMgr::epochFloor ( ) const
inline

Definition at line 279 of file FileMgr.h.

Referenced by setDataAndMetadataFileStats().

279 { return static_cast<int32_t>(epoch_.floor()); }
int32_t floor() const
Definition: Epoch.h:43

+ Here is the caller graph for this function:

virtual bool File_Namespace::FileMgr::failOnReadError ( ) const
inlinevirtual

True if a read error should cause a fatal error.

Reimplemented in File_Namespace::CachingFileMgr.

Definition at line 363 of file FileMgr.h.

Referenced by File_Namespace::FileBuffer::FileBuffer().

363 { return true; }

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::fetchBuffer ( const ChunkKey key,
AbstractBuffer destBuffer,
const size_t  numBytes 
)
override

Definition at line 795 of file FileMgr.cpp.

References CHECK, Data_Namespace::AbstractBuffer::copyTo(), logger::FATAL, getBuffer(), Data_Namespace::AbstractBuffer::isDirty(), LOG, show_chunk(), and Data_Namespace::AbstractBuffer::size().

797  {
798  // reads chunk specified by ChunkKey into AbstractBuffer provided by
799  // destBuffer
800  CHECK(!destBuffer->isDirty())
801  << "Aborting attempt to fetch a chunk marked dirty. Chunk inconsistency for key: "
802  << show_chunk(key);
803  AbstractBuffer* chunk = getBuffer(key);
804  // chunk's size is either specified in function call with numBytes or we
805  // just look at pageSize * numPages in FileBuffer
806  if (numBytes > 0 && numBytes > chunk->size()) {
807  LOG(FATAL) << "Chunk retrieved for key `" << show_chunk(key) << "` is smaller ("
808  << chunk->size() << ") than number of bytes requested (" << numBytes
809  << ")";
810  }
811  chunk->copyTo(destBuffer, numBytes);
812 }
#define LOG(tag)
Definition: Logger.h:285
std::string show_chunk(const ChunkKey &key)
Definition: types.h:98
An AbstractBuffer is a unit of data management for a data manager.
void copyTo(AbstractBuffer *destination_buffer, const size_t num_bytes=0)
#define CHECK(condition)
Definition: Logger.h:291
FileBuffer * getBuffer(const ChunkKey &key, const size_t numBytes=0) override
Returns the a pointer to the chunk with the specified key.
Definition: FileMgr.cpp:783

+ Here is the call graph for this function:

void File_Namespace::FileMgr::free ( AbstractBuffer buffer)
override

Definition at line 871 of file FileMgr.cpp.

References logger::FATAL, and LOG.

Referenced by copyPage().

871  {
872  LOG(FATAL) << "Operation not supported";
873 }
#define LOG(tag)
Definition: Logger.h:285

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::free_page ( std::pair< FileInfo *, int32_t > &&  page)
virtual

Reimplemented in File_Namespace::CachingFileMgr.

Definition at line 1201 of file FileMgr.cpp.

References free_pages_, and mutex_free_page_.

Referenced by File_Namespace::FileInfo::freePage(), and freePages().

1201  {
1202  std::unique_lock<heavyai::shared_mutex> lock(mutex_free_page_);
1203  free_pages_.push_back(page);
1204 }
std::vector< std::pair< FileInfo *, int32_t > > free_pages_
Definition: FileMgr.h:414
heavyai::shared_mutex mutex_free_page_
Definition: FileMgr.h:413

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::freePages ( )
protected

Definition at line 1593 of file FileMgr.cpp.

References free_page(), free_pages_, and mutex_free_page_.

Referenced by checkpoint(), File_Namespace::CachingFileMgr::clearForTable(), init(), File_Namespace::CachingFileMgr::init(), and sortAndCopyFilePagesForCompaction().

1593  {
1595  for (auto& free_page : free_pages_) {
1596  free_page.first->freePageDeferred(free_page.second);
1597  }
1598  free_pages_.clear();
1599 }
std::unique_lock< T > unique_lock
std::vector< std::pair< FileInfo *, int32_t > > free_pages_
Definition: FileMgr.h:414
heavyai::shared_mutex mutex_free_page_
Definition: FileMgr.h:413
virtual void free_page(std::pair< FileInfo *, int32_t > &&page)
Definition: FileMgr.cpp:1201

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::freePagesBeforeEpoch ( const int32_t  min_epoch)
private

Definition at line 670 of file FileMgr.cpp.

References chunkIndex_, chunkIndexMutex_, and freePagesBeforeEpochUnlocked().

Referenced by rollOffOldData().

670  {
672  freePagesBeforeEpochUnlocked(min_epoch, chunkIndex_.begin(), chunkIndex_.end());
673 }
std::shared_lock< T > shared_lock
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:326
heavyai::shared_mutex chunkIndexMutex_
Definition: FileMgr.h:410
void freePagesBeforeEpochUnlocked(const int32_t min_epoch, const ChunkKeyToChunkMap::iterator lower_bound, const ChunkKeyToChunkMap::iterator upper_bound)
Definition: FileMgr.cpp:675

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::freePagesBeforeEpochUnlocked ( const int32_t  min_epoch,
const ChunkKeyToChunkMap::iterator  lower_bound,
const ChunkKeyToChunkMap::iterator  upper_bound 
)
private

Definition at line 675 of file FileMgr.cpp.

References gpu_enabled::upper_bound().

Referenced by freePagesBeforeEpoch().

678  {
679  for (auto chunkIt = lower_bound; chunkIt != upper_bound; ++chunkIt) {
680  chunkIt->second->freePagesBeforeEpoch(min_epoch);
681  }
682 }
DEVICE auto upper_bound(ARGS &&...args)
Definition: gpu_enabled.h:123
DEVICE auto lower_bound(ARGS &&...args)
Definition: gpu_enabled.h:78

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const TablePair File_Namespace::FileMgr::get_fileMgrKey ( ) const
inline

Definition at line 338 of file FileMgr.h.

Referenced by updatePageIfDeleted().

338 { return fileMgrKey_; }
TablePair fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:524

+ Here is the caller graph for this function:

size_t File_Namespace::FileMgr::getAllocated ( )
inlineoverride

Definition at line 219 of file FileMgr.h.

219 { return 0; }
FileBuffer * File_Namespace::FileMgr::getBuffer ( const ChunkKey key,
const size_t  numBytes = 0 
)
override

Returns the a pointer to the chunk with the specified key.

Definition at line 783 of file FileMgr.cpp.

References chunkIndexMutex_, and getBufferUnlocked().

Referenced by fetchBuffer().

783  {
785  return getBufferUnlocked(key, num_bytes);
786 }
std::shared_lock< T > shared_lock
heavyai::shared_mutex chunkIndexMutex_
Definition: FileMgr.h:410
virtual FileBuffer * getBufferUnlocked(const ChunkKey &key, const size_t numBytes=0) const
Definition: FileMgr.cpp:788

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

FileBuffer * File_Namespace::FileMgr::getBufferUnlocked ( const ChunkKey key,
const size_t  numBytes = 0 
) const
protectedvirtual

Reimplemented in File_Namespace::CachingFileMgr.

Definition at line 788 of file FileMgr.cpp.

References CHECK, chunkIndex_, and show_chunk().

Referenced by getBuffer(), and getOrCreateBuffer().

789  {
790  auto chunk_it = chunkIndex_.find(key);
791  CHECK(chunk_it != chunkIndex_.end()) << "Chunk does not exist: " << show_chunk(key);
792  return chunk_it->second;
793 }
std::string show_chunk(const ChunkKey &key)
Definition: types.h:98
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:326
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::getChunkMetadataVecForKeyPrefix ( ChunkMetadataVector chunkMetadataVec,
const ChunkKey keyPrefix 
)
override

Definition at line 1006 of file FileMgr.cpp.

References chunkIndex_, and chunkIndexMutex_.

1007  {
1009  auto chunkIt = chunkIndex_.lower_bound(keyPrefix);
1010  if (chunkIt == chunkIndex_.end()) {
1011  return; // throw?
1012  }
1013  while (chunkIt != chunkIndex_.end() &&
1014  std::search(chunkIt->first.begin(),
1015  chunkIt->first.begin() + keyPrefix.size(),
1016  keyPrefix.begin(),
1017  keyPrefix.end()) != chunkIt->first.begin() + keyPrefix.size()) {
1018  if (chunkIt->second->hasEncoder()) {
1019  auto chunk_metadata = std::make_shared<ChunkMetadata>();
1020  chunkIt->second->encoder_->getMetadata(chunk_metadata);
1021  chunkMetadataVec.emplace_back(chunkIt->first, chunk_metadata);
1022  }
1023  chunkIt++;
1024  }
1025 }
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:326
std::unique_lock< T > unique_lock
heavyai::shared_mutex chunkIndexMutex_
Definition: FileMgr.h:410
bool File_Namespace::FileMgr::getDBConvert ( ) const

Definition at line 1041 of file FileMgr.cpp.

References File_Namespace::GlobalFileMgr::getDBConvert(), and gfm_.

1041  {
1042  return gfm_->getDBConvert();
1043 }
GlobalFileMgr * gfm_
Definition: FileMgr.h:523

+ Here is the call graph for this function:

int32_t File_Namespace::FileMgr::getDBVersion ( ) const

Index for looking up chunks.

Definition at line 1037 of file FileMgr.cpp.

References File_Namespace::GlobalFileMgr::getDBVersion(), and gfm_.

Referenced by createTopLevelMetadata().

1037  {
1038  return gfm_->getDBVersion();
1039 }
GlobalFileMgr * gfm_
Definition: FileMgr.h:523

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

FILE * File_Namespace::FileMgr::getFileForFileId ( const int32_t  fileId)

Returns FILE pointer associated with requested fileId.

See Also
FileBuffer

Definition at line 988 of file FileMgr.cpp.

References CHECK, and files_.

Referenced by File_Namespace::FileBuffer::readMetadata(), and File_Namespace::FileBuffer::writeMetadata().

988  {
989  CHECK(fileId >= 0);
990  CHECK(files_.find(fileId) != files_.end());
991  return files_.at(fileId)->f;
992 }
std::map< int32_t, FileInfo * > files_
Definition: FileMgr.h:400
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the caller graph for this function:

FileInfo* File_Namespace::FileMgr::getFileInfoForFileId ( const int32_t  fileId) const
inline

Definition at line 222 of file FileMgr.h.

Referenced by File_Namespace::FileBuffer::append(), File_Namespace::FileBuffer::copyPage(), copyPage(), File_Namespace::FileBuffer::freePage(), File_Namespace::readForThread(), File_Namespace::FileBuffer::write(), and File_Namespace::FileBuffer::writeHeader().

222  {
223  return files_.at(fileId);
224  }
std::map< int32_t, FileInfo * > files_
Definition: FileMgr.h:400

+ Here is the caller graph for this function:

std::string File_Namespace::FileMgr::getFileMgrBasePath ( ) const
inline

Definition at line 331 of file FileMgr.h.

Referenced by File_Namespace::CachingFileMgr::closeRemovePhysical(), closeRemovePhysical(), and File_Namespace::CachingFileMgr::getTableFileMgrPath().

331 { return fileMgrBasePath_; }
std::string fileMgrBasePath_
Definition: FileMgr.h:397

+ Here is the caller graph for this function:

boost::filesystem::path File_Namespace::FileMgr::getFilePath ( const std::string &  file_name) const

Definition at line 1667 of file FileMgr.cpp.

References fileMgrBasePath_.

Referenced by compactFiles(), deleteEmptyFiles(), readPageMappingsFromStatusFile(), renameCompactionStatusFile(), resumeFileCompaction(), and writePageMappingsToStatusFile().

1667  {
1668  return boost::filesystem::path(fileMgrBasePath_) / file_name;
1669 }
std::string fileMgrBasePath_
Definition: FileMgr.h:397

+ Here is the caller graph for this function:

uint32_t File_Namespace::FileMgr::getFragmentCount ( ) const
private

Definition at line 410 of file FileMgr.cpp.

References CHUNK_KEY_FRAGMENT_IDX, chunkIndex_, and chunkIndexMutex_.

Referenced by getStorageStats().

410  {
412  std::set<int32_t> fragment_ids;
413  for (const auto& [chunk_key, file_buffer] : chunkIndex_) {
414  fragment_ids.emplace(chunk_key[CHUNK_KEY_FRAGMENT_IDX]);
415  }
416  return static_cast<uint32_t>(fragment_ids.size());
417 }
#define CHUNK_KEY_FRAGMENT_IDX
Definition: types.h:41
std::shared_lock< T > shared_lock
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:326
heavyai::shared_mutex chunkIndexMutex_
Definition: FileMgr.h:410

+ Here is the caller graph for this function:

size_t File_Namespace::FileMgr::getInUseSize ( )
inlineoverride

Definition at line 218 of file FileMgr.h.

218 { return 0; }
size_t File_Namespace::FileMgr::getMaxSize ( )
inlineoverride

Definition at line 217 of file FileMgr.h.

217 { return 0; }
FileMetadata File_Namespace::FileMgr::getMetadataForFile ( const boost::filesystem::directory_iterator &  fileIterator) const

Definition at line 153 of file FileMgr.cpp.

References CHECK_EQ, DATA_FILE_EXT, logger::FATAL, File_Namespace::FileMetadata::file_id, File_Namespace::FileMetadata::file_path, heavyai::file_size(), File_Namespace::FileMetadata::file_size, File_Namespace::FileMetadata::is_data_file, LOG, File_Namespace::FileMetadata::num_pages, and File_Namespace::FileMetadata::page_size.

Referenced by init(), openFiles(), and setDataAndMetadataFileStats().

154  {
155  FileMetadata fileMetadata;
156  fileMetadata.is_data_file = false;
157  fileMetadata.file_path = fileIterator->path().string();
158  if (!boost::filesystem::is_regular_file(fileIterator->status())) {
159  return fileMetadata;
160  }
161  // note that boost::filesystem leaves preceding dot on
162  // extension - hence DATA_FILE_EXT is ".data"
163  std::string extension(fileIterator->path().extension().string());
164  if (extension == DATA_FILE_EXT) {
165  std::string fileStem(fileIterator->path().stem().string());
166  // remove trailing dot if any
167  if (fileStem.size() > 0 && fileStem.back() == '.') {
168  fileStem = fileStem.substr(0, fileStem.size() - 1);
169  }
170  size_t dotPos = fileStem.find_last_of("."); // should only be one
171  if (dotPos == std::string::npos) {
172  LOG(FATAL) << "File `" << fileIterator->path()
173  << "` does not carry page size information in the filename.";
174  }
175  fileMetadata.is_data_file = true;
176  fileMetadata.file_id = boost::lexical_cast<int>(fileStem.substr(0, dotPos));
177  fileMetadata.page_size =
178  boost::lexical_cast<size_t>(fileStem.substr(dotPos + 1, fileStem.size()));
179 
180  fileMetadata.file_size = boost::filesystem::file_size(fileMetadata.file_path);
181  CHECK_EQ(fileMetadata.file_size % fileMetadata.page_size,
182  size_t(0)); // should be no partial pages
183  fileMetadata.num_pages = fileMetadata.file_size / fileMetadata.page_size;
184  }
185  return fileMetadata;
186 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
#define LOG(tag)
Definition: Logger.h:285
#define DATA_FILE_EXT
Definition: File.h:25
size_t file_size(const int fd)
Definition: heavyai_fs.cpp:33

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t File_Namespace::FileMgr::getMetadataPageSize ( ) const
inline

Definition at line 366 of file FileMgr.h.

366 { return metadata_page_size_; }
const size_t metadata_page_size_
Definition: FileMgr.h:536
MgrType File_Namespace::FileMgr::getMgrType ( )
inlineoverride

Definition at line 214 of file FileMgr.h.

214 { return FILE_MGR; };
size_t File_Namespace::FileMgr::getNumChunks ( )
override

Definition at line 1662 of file FileMgr.cpp.

References chunkIndex_, and chunkIndexMutex_.

1662  {
1664  return chunkIndex_.size();
1665 }
heavyai::shared_lock< heavyai::shared_mutex > read_lock
std::shared_lock< T > shared_lock
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:326
heavyai::shared_mutex chunkIndexMutex_
Definition: FileMgr.h:410
size_t File_Namespace::FileMgr::getNumReaderThreads ( )
inline

Returns number of threads defined by parameter num-reader-threads which should be used during initial load and consequent read of data.

Definition at line 312 of file FileMgr.h.

Referenced by File_Namespace::FileBuffer::read().

312 { return num_reader_threads_; }
size_t num_reader_threads_
Maps page sizes to FileInfo objects.
Definition: FileMgr.h:402

+ Here is the caller graph for this function:

size_t File_Namespace::FileMgr::getNumUsedMetadataPagesForChunkKey ( const ChunkKey chunkKey) const

Definition at line 1027 of file FileMgr.cpp.

References chunkIndex_, and chunkIndexMutex_.

1027  {
1029  const auto& chunkIt = chunkIndex_.find(chunkKey);
1030  if (chunkIt != chunkIndex_.end()) {
1031  return chunkIt->second->numMetadataPages();
1032  } else {
1033  throw std::runtime_error("Chunk was not found.");
1034  }
1035 }
heavyai::shared_lock< heavyai::shared_mutex > read_lock
std::shared_lock< T > shared_lock
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:326
heavyai::shared_mutex chunkIndexMutex_
Definition: FileMgr.h:410
FileBuffer * File_Namespace::FileMgr::getOrCreateBuffer ( const ChunkKey key)
private

Definition at line 1640 of file FileMgr.cpp.

References chunkIndex_, chunkIndexMutex_, createBufferUnlocked(), and getBufferUnlocked().

Referenced by putBuffer().

1640  {
1641  FileBuffer* buf;
1643  auto chunk_it = chunkIndex_.find(key);
1644  if (chunk_it == chunkIndex_.end()) {
1645  buf = createBufferUnlocked(key);
1646  } else {
1647  buf = getBufferUnlocked(key);
1648  }
1649  return buf;
1650 }
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:326
std::unique_lock< T > unique_lock
heavyai::shared_mutex chunkIndexMutex_
Definition: FileMgr.h:410
virtual FileBuffer * createBufferUnlocked(const ChunkKey &key, size_t pageSize=0, const size_t numBytes=0)
Definition: FileMgr.cpp:723
virtual FileBuffer * getBufferUnlocked(const ChunkKey &key, const size_t numBytes=0) const
Definition: FileMgr.cpp:788

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t File_Namespace::FileMgr::getPageSize ( ) const
inline

Definition at line 365 of file FileMgr.h.

365 { return page_size_; }
const size_t page_size_
Definition: FileMgr.h:535
StorageStats File_Namespace::FileMgr::getStorageStats ( ) const

Definition at line 337 of file FileMgr.cpp.

References File_Namespace::StorageStats::fragment_count, getFragmentCount(), isFullyInitted_, and setDataAndMetadataFileStats().

337  {
338  StorageStats storage_stats;
339  setDataAndMetadataFileStats(storage_stats);
340  if (isFullyInitted_) {
341  storage_stats.fragment_count = getFragmentCount();
342  }
343  return storage_stats;
344 }
void setDataAndMetadataFileStats(StorageStats &storage_stats) const
Definition: FileMgr.cpp:346
uint32_t getFragmentCount() const
Definition: FileMgr.cpp:410

+ Here is the call graph for this function:

std::string File_Namespace::FileMgr::getStringMgrType ( )
inlineoverride

Definition at line 215 of file FileMgr.h.

215 { return ToString(FILE_MGR); }
bool File_Namespace::FileMgr::hasChunkMetadataForKeyPrefix ( const ChunkKey keyPrefix)

Definition at line 994 of file FileMgr.cpp.

References chunkIndex_, and chunkIndexMutex_.

994  {
996  auto chunk_it = chunkIndex_.lower_bound(key_prefix);
997  if (chunk_it == chunkIndex_.end()) {
998  return false;
999  } else {
1000  auto it_pair =
1001  std::mismatch(key_prefix.begin(), key_prefix.end(), chunk_it->first.begin());
1002  return it_pair.first == key_prefix.end();
1003  }
1004 }
std::shared_lock< T > shared_lock
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:326
heavyai::shared_mutex chunkIndexMutex_
Definition: FileMgr.h:410
virtual bool File_Namespace::FileMgr::hasFileMgrKey ( ) const
inlinevirtual

Reimplemented in File_Namespace::CachingFileMgr.

Definition at line 337 of file FileMgr.h.

337 { return true; }
int32_t File_Namespace::FileMgr::incrementEpoch ( )
inline

Definition at line 281 of file FileMgr.h.

References logger::FATAL, LOG, and Epoch::max_allowable_epoch().

Referenced by checkpoint(), and init().

281  {
282  int32_t newEpoch = epoch_.increment();
283  epochIsCheckpointed_ = false;
284  // We test for error here instead of in Epoch::increment so we can log FileMgr
285  // metadata
286  if (newEpoch > Epoch::max_allowable_epoch()) {
287  LOG(FATAL) << "Epoch for table (" << fileMgrKey_.first << ", " << fileMgrKey_.second
288  << ") greater than maximum allowed value of "
289  << Epoch::max_allowable_epoch() << ".";
290  }
291  return newEpoch;
292  }
TablePair fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:524
#define LOG(tag)
Definition: Logger.h:285
static int64_t max_allowable_epoch()
Definition: Epoch.h:69
int32_t increment()
Definition: Epoch.h:54

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::init ( const size_t  num_reader_threads,
const int32_t  epochOverride 
)

Definition at line 256 of file FileMgr.cpp.

References Epoch::ceiling(), CHECK, clearFileInfos(), coreInit(), createBufferFromHeaders(), createEpochFile(), epoch(), epoch_, EPOCH_FILENAME, logger::FATAL, FILE_MGR_VERSION_FILENAME, fileMgrBasePath_, fileMgrVersion_, Epoch::floor(), freePages(), incrementEpoch(), initializeNumThreads(), isFullyInitted_, latestFileMgrVersion_, LOG, nextFileId_, openFiles(), resumeFileCompaction(), rollOffOldData(), setEpoch(), gpu_enabled::sort(), VLOG, and writeAndSyncVersionToDisk().

Referenced by FileMgr().

256  {
257  // if epochCeiling = -1 this means open from epoch file
258 
259  const bool dataExists = coreInit();
260  if (dataExists) {
261  if (epochOverride != -1) { // if opening at specified epoch
262  setEpoch(epochOverride);
263  }
264 
265  auto open_files_result = openFiles();
266  if (!open_files_result.compaction_status_file_name.empty()) {
267  resumeFileCompaction(open_files_result.compaction_status_file_name);
268  clearFileInfos();
269  open_files_result = openFiles();
270  CHECK(open_files_result.compaction_status_file_name.empty());
271  }
272 
273  /* Sort headerVec so that all HeaderInfos
274  * from a chunk will be grouped together
275  * and in order of increasing PageId
276  * - Version Epoch */
277  auto& header_vec = open_files_result.header_infos;
278  std::sort(header_vec.begin(), header_vec.end());
279 
280  /* Goal of next section is to find sequences in the
281  * sorted headerVec of the same ChunkId, which we
282  * can then initiate a FileBuffer with */
283 
284  VLOG(3) << "Number of Headers in Vector: " << header_vec.size();
285  if (header_vec.size() > 0) {
286  ChunkKey lastChunkKey = header_vec.begin()->chunkKey;
287  auto startIt = header_vec.begin();
288 
289  for (auto headerIt = header_vec.begin() + 1; headerIt != header_vec.end();
290  ++headerIt) {
291  if (headerIt->chunkKey != lastChunkKey) {
292  createBufferFromHeaders(lastChunkKey, startIt, headerIt);
293  lastChunkKey = headerIt->chunkKey;
294  startIt = headerIt;
295  }
296  }
297  // now need to insert last Chunk
298  createBufferFromHeaders(lastChunkKey, startIt, header_vec.end());
299  }
300  nextFileId_ = open_files_result.max_file_id + 1;
301  rollOffOldData(epoch(), true /* only checkpoint if data is rolled off */);
302  incrementEpoch();
303  freePages();
304  } else {
305  boost::filesystem::path path(fileMgrBasePath_);
306  if (!boost::filesystem::create_directory(path)) {
307  LOG(FATAL) << "Could not create data directory: " << path;
308  }
310  if (epochOverride != -1) {
311  epoch_.floor(epochOverride);
312  epoch_.ceiling(epochOverride);
313  } else {
314  // These are default constructor values for epoch_, but resetting here for clarity
315  epoch_.floor(0);
316  epoch_.ceiling(0);
317  }
320  incrementEpoch();
321  }
322 
323  initializeNumThreads(num_reader_threads);
324  isFullyInitted_ = true;
325 }
virtual FileBuffer * createBufferFromHeaders(const ChunkKey &key, const std::vector< HeaderInfo >::const_iterator &headerStartIt, const std::vector< HeaderInfo >::const_iterator &headerEndIt)
Definition: FileMgr.cpp:734
std::vector< int > ChunkKey
Definition: types.h:36
OpenFilesResult openFiles()
Definition: FileMgr.cpp:196
void createEpochFile(const std::string &epochFileName)
Definition: FileMgr.cpp:604
#define LOG(tag)
Definition: Logger.h:285
void rollOffOldData(const int32_t epochCeiling, const bool shouldCheckpoint)
Definition: FileMgr.cpp:684
DEVICE void sort(ARGS &&...args)
Definition: gpu_enabled.h:105
int32_t floor() const
Definition: Epoch.h:43
int32_t ceiling() const
Definition: Epoch.h:44
std::string fileMgrBasePath_
Definition: FileMgr.h:397
const int32_t latestFileMgrVersion_
Definition: FileMgr.h:407
void writeAndSyncVersionToDisk(const std::string &versionFileName, const int32_t version)
Definition: FileMgr.cpp:1079
int32_t incrementEpoch()
Definition: FileMgr.h:281
static constexpr char FILE_MGR_VERSION_FILENAME[]
Definition: FileMgr.h:389
static constexpr char EPOCH_FILENAME[]
Definition: FileMgr.h:387
int32_t epoch() const
Definition: FileMgr.h:517
#define CHECK(condition)
Definition: Logger.h:291
void setEpoch(const int32_t newEpoch)
Definition: FileMgr.cpp:1190
bool coreInit()
Determines file path, and if exists, runs file migration and opens and reads epoch file...
Definition: FileMgr.cpp:133
void initializeNumThreads(size_t num_reader_threads=0)
Definition: FileMgr.cpp:1582
unsigned nextFileId_
number of threads used when loading data
Definition: FileMgr.h:403
#define VLOG(n)
Definition: Logger.h:387
void resumeFileCompaction(const std::string &status_file_name)
Definition: FileMgr.cpp:1215

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::init ( const std::string &  dataPathToConvertFrom,
const int32_t  epochOverride 
)

Definition at line 433 of file FileMgr.cpp.

References threading_serial::async(), CHECK, copyPage(), createBuffer(), createBufferFromHeaders(), EPOCH_FILENAME, logger::FATAL, File_Namespace::FileMetadata::file_id, File_Namespace::FileMetadata::file_path, File_Namespace::GlobalFileMgr::getFileMgr(), getMetadataForFile(), gfm_, File_Namespace::FileMetadata::is_data_file, isFullyInitted_, LOG, nextFileId_, File_Namespace::FileMetadata::num_pages, openAndReadEpochFile(), openExistingFile(), File_Namespace::FileMetadata::page_size, processFileFutures(), File_Namespace::MultiPage::push(), requestFreePage(), setEpoch(), gpu_enabled::sort(), and Data_Namespace::AbstractBuffer::syncEncoder().

434  {
435  int32_t converted_data_epoch = 0;
436  boost::filesystem::path path(dataPathToConvertFrom);
437  if (boost::filesystem::exists(path)) {
438  if (!boost::filesystem::is_directory(path)) {
439  LOG(FATAL) << "Specified path `" << path << "` is not a directory.";
440  }
442 
443  if (epochOverride != -1) { // if opening at previous epoch
444  setEpoch(epochOverride);
445  }
446 
447  boost::filesystem::directory_iterator
448  endItr; // default construction yields past-the-end
449  int32_t maxFileId = -1;
450  int32_t fileCount = 0;
451  int32_t threadCount = std::thread::hardware_concurrency();
452  std::vector<HeaderInfo> headerVec;
453  std::vector<std::future<std::vector<HeaderInfo>>> file_futures;
454  for (boost::filesystem::directory_iterator fileIt(path); fileIt != endItr; ++fileIt) {
455  FileMetadata fileMetadata = getMetadataForFile(fileIt);
456  if (fileMetadata.is_data_file) {
457  maxFileId = std::max(maxFileId, fileMetadata.file_id);
458  file_futures.emplace_back(std::async(std::launch::async, [fileMetadata, this] {
459  std::vector<HeaderInfo> tempHeaderVec;
460  openExistingFile(fileMetadata.file_path,
461  fileMetadata.file_id,
462  fileMetadata.page_size,
463  fileMetadata.num_pages,
464  tempHeaderVec);
465  return tempHeaderVec;
466  }));
467  fileCount++;
468  if (fileCount % threadCount) {
469  processFileFutures(file_futures, headerVec);
470  }
471  }
472  }
473 
474  if (file_futures.size() > 0) {
475  processFileFutures(file_futures, headerVec);
476  }
477 
478  /* Sort headerVec so that all HeaderInfos
479  * from a chunk will be grouped together
480  * and in order of increasing PageId
481  * - Version Epoch */
482 
483  std::sort(headerVec.begin(), headerVec.end());
484 
485  /* Goal of next section is to find sequences in the
486  * sorted headerVec of the same ChunkId, which we
487  * can then initiate a FileBuffer with */
488 
489  if (headerVec.size() > 0) {
490  ChunkKey lastChunkKey = headerVec.begin()->chunkKey;
491  auto startIt = headerVec.begin();
492 
493  for (auto headerIt = headerVec.begin() + 1; headerIt != headerVec.end();
494  ++headerIt) {
495  if (headerIt->chunkKey != lastChunkKey) {
496  FileMgr* c_fm_ =
497  dynamic_cast<File_Namespace::FileMgr*>(gfm_->getFileMgr(lastChunkKey));
498  CHECK(c_fm_);
499  auto srcBuf = createBufferFromHeaders(lastChunkKey, startIt, headerIt);
500  auto destBuf = c_fm_->createBuffer(lastChunkKey, srcBuf->pageSize());
501  destBuf->syncEncoder(srcBuf);
502  destBuf->setSize(srcBuf->size());
503  destBuf->setDirty(); // this needs to be set to force writing out metadata
504  // files from "checkpoint()" call
505 
506  size_t totalNumPages = srcBuf->getMultiPage().size();
507  for (size_t pageNum = 0; pageNum < totalNumPages; pageNum++) {
508  Page srcPage = srcBuf->getMultiPage()[pageNum].current().page;
509  Page destPage = c_fm_->requestFreePage(
510  srcBuf->pageSize(),
511  false); // may modify and use api "FileBuffer::addNewMultiPage" instead
512  MultiPage multiPage(srcBuf->pageSize());
513  multiPage.push(destPage, converted_data_epoch);
514  destBuf->multiPages_.push_back(multiPage);
515  size_t reservedHeaderSize = srcBuf->reservedHeaderSize();
516  copyPage(
517  srcPage, c_fm_, destPage, reservedHeaderSize, srcBuf->pageDataSize(), 0);
518  destBuf->writeHeader(destPage, pageNum, converted_data_epoch, false);
519  }
520  lastChunkKey = headerIt->chunkKey;
521  startIt = headerIt;
522  }
523  }
524 
525  // now need to insert last Chunk
526  FileMgr* c_fm_ =
527  dynamic_cast<File_Namespace::FileMgr*>(gfm_->getFileMgr(lastChunkKey));
528  auto srcBuf = createBufferFromHeaders(lastChunkKey, startIt, headerVec.end());
529  auto destBuf = c_fm_->createBuffer(lastChunkKey, srcBuf->pageSize());
530  destBuf->syncEncoder(srcBuf);
531  destBuf->setSize(srcBuf->size());
532  destBuf->setDirty(); // this needs to be set to write out metadata file from the
533  // "checkpoint()" call
534 
535  size_t totalNumPages = srcBuf->getMultiPage().size();
536  for (size_t pageNum = 0; pageNum < totalNumPages; pageNum++) {
537  Page srcPage = srcBuf->getMultiPage()[pageNum].current().page;
538  Page destPage = c_fm_->requestFreePage(
539  srcBuf->pageSize(),
540  false); // may modify and use api "FileBuffer::addNewMultiPage" instead
541  MultiPage multiPage(srcBuf->pageSize());
542  multiPage.push(destPage, converted_data_epoch);
543  destBuf->multiPages_.push_back(multiPage);
544  size_t reservedHeaderSize = srcBuf->reservedHeaderSize();
545  copyPage(srcPage, c_fm_, destPage, reservedHeaderSize, srcBuf->pageDataSize(), 0);
546  destBuf->writeHeader(destPage, pageNum, converted_data_epoch, false);
547  }
548  }
549  nextFileId_ = maxFileId + 1;
550  } else {
551  if (!boost::filesystem::create_directory(path)) {
552  LOG(FATAL) << "Specified path does not exist: " << path;
553  }
554  }
555  isFullyInitted_ = true;
556 }
virtual FileBuffer * createBufferFromHeaders(const ChunkKey &key, const std::vector< HeaderInfo >::const_iterator &headerStartIt, const std::vector< HeaderInfo >::const_iterator &headerEndIt)
Definition: FileMgr.cpp:734
std::vector< int > ChunkKey
Definition: types.h:36
FileMgr(const int32_t device_id, GlobalFileMgr *gfm, const TablePair file_mgr_key, const int32_t max_rollback_epochs=-1, const size_t num_reader_threads=0, const int32_t epoch=-1)
Constructor.
Definition: FileMgr.cpp:47
void syncEncoder(const AbstractBuffer *src_buffer)
#define LOG(tag)
Definition: Logger.h:285
void copyPage(Page &srcPage, FileMgr *destFileMgr, Page &destPage, const size_t reservedHeaderSize, const size_t numBytes, const size_t offset)
Definition: FileMgr.cpp:584
DEVICE void sort(ARGS &&...args)
Definition: gpu_enabled.h:105
GlobalFileMgr * gfm_
Definition: FileMgr.h:523
future< Result > async(Fn &&fn, Args &&...args)
void processFileFutures(std::vector< std::future< std::vector< HeaderInfo >>> &file_futures, std::vector< HeaderInfo > &headerVec)
Definition: FileMgr.cpp:419
FileInfo * openExistingFile(const std::string &path, const int32_t fileId, const size_t pageSize, const size_t numPages, std::vector< HeaderInfo > &headerVec)
Definition: FileMgr.cpp:944
static constexpr char EPOCH_FILENAME[]
Definition: FileMgr.h:387
void openAndReadEpochFile(const std::string &epochFileName)
Definition: FileMgr.cpp:636
AbstractBufferMgr * getFileMgr(const int32_t db_id, const int32_t tb_id)
FileMetadata getMetadataForFile(const boost::filesystem::directory_iterator &fileIterator) const
Definition: FileMgr.cpp:153
#define CHECK(condition)
Definition: Logger.h:291
void setEpoch(const int32_t newEpoch)
Definition: FileMgr.cpp:1190
unsigned nextFileId_
number of threads used when loading data
Definition: FileMgr.h:403

+ Here is the call graph for this function:

void File_Namespace::FileMgr::initializeNumThreads ( size_t  num_reader_threads = 0)
protected

Definition at line 1582 of file FileMgr.cpp.

References num_reader_threads_.

Referenced by init(), and File_Namespace::CachingFileMgr::init().

1582  {
1583  // # of threads is based on # of cores on the host
1584  size_t num_hardware_based_threads = std::thread::hardware_concurrency();
1585  if (num_reader_threads == 0 || num_reader_threads > num_hardware_based_threads) {
1586  // # of threads has not been defined by user
1587  num_reader_threads_ = num_hardware_based_threads;
1588  } else {
1589  num_reader_threads_ = num_reader_threads;
1590  }
1591 }
size_t num_reader_threads_
Maps page sizes to FileInfo objects.
Definition: FileMgr.h:402

+ Here is the caller graph for this function:

bool File_Namespace::FileMgr::isAllocationCapped ( )
inlineoverride

Definition at line 220 of file FileMgr.h.

220 { return false; }
bool File_Namespace::FileMgr::isBufferOnDevice ( const ChunkKey key)
override

Definition at line 745 of file FileMgr.cpp.

References chunkIndex_, and chunkIndexMutex_.

745  {
747  return chunkIndex_.find(key) != chunkIndex_.end();
748 }
std::shared_lock< T > shared_lock
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:326
heavyai::shared_mutex chunkIndexMutex_
Definition: FileMgr.h:410
int32_t File_Namespace::FileMgr::lastCheckpointedEpoch ( ) const
inline

Returns value of epoch at last checkpoint.

Definition at line 297 of file FileMgr.h.

Referenced by File_Namespace::GlobalFileMgr::existsDiffBetweenFileMgrParamsAndFileMgr(), and setDataAndMetadataFileStats().

297  {
298  return epoch() - (epochIsCheckpointed_ ? 0 : 1);
299  }
int32_t epoch() const
Definition: FileMgr.h:517

+ Here is the caller graph for this function:

int32_t File_Namespace::FileMgr::maxRollbackEpochs ( )
inline

Returns value max_rollback_epochs.

Definition at line 306 of file FileMgr.h.

Referenced by File_Namespace::GlobalFileMgr::existsDiffBetweenFileMgrParamsAndFileMgr().

306 { return maxRollbackEpochs_; }
int32_t maxRollbackEpochs_
Definition: FileMgr.h:396

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::migrateEpochFileV0 ( )
protected

Definition at line 1107 of file FileMgr.cpp.

References Epoch::ceiling(), createEpochFile(), epoch_, EPOCH_FILENAME, FILE_MGR_VERSION_FILENAME, fileMgrBasePath_, Epoch::floor(), logger::INFO, LEGACY_EPOCH_FILENAME, LOG, Epoch::min_allowable_epoch(), openAndReadLegacyEpochFile(), writeAndSyncEpochToDisk(), and writeAndSyncVersionToDisk().

Referenced by migrateToLatestFileMgrVersion().

1107  {
1108  const std::string versionFilePath(fileMgrBasePath_ + "/" + FILE_MGR_VERSION_FILENAME);
1109  LOG(INFO) << "Migrating file format version from 0 to 1 for `" << versionFilePath;
1114  int32_t migrationCompleteVersion = 1;
1115  writeAndSyncVersionToDisk(FILE_MGR_VERSION_FILENAME, migrationCompleteVersion);
1116 }
int32_t openAndReadLegacyEpochFile(const std::string &epochFileName)
Definition: FileMgr.cpp:615
void createEpochFile(const std::string &epochFileName)
Definition: FileMgr.cpp:604
#define LOG(tag)
Definition: Logger.h:285
static int64_t min_allowable_epoch()
Definition: Epoch.h:65
int32_t floor() const
Definition: Epoch.h:43
int32_t ceiling() const
Definition: Epoch.h:44
std::string fileMgrBasePath_
Definition: FileMgr.h:397
void writeAndSyncVersionToDisk(const std::string &versionFileName, const int32_t version)
Definition: FileMgr.cpp:1079
static constexpr char FILE_MGR_VERSION_FILENAME[]
Definition: FileMgr.h:389
void writeAndSyncEpochToDisk()
Definition: FileMgr.cpp:656
static constexpr char EPOCH_FILENAME[]
Definition: FileMgr.h:387
static constexpr char LEGACY_EPOCH_FILENAME[]
Definition: FileMgr.h:386

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::migrateLegacyFilesV1 ( )
protected

Definition at line 1118 of file FileMgr.cpp.

References FILE_MGR_VERSION_FILENAME, fileMgrBasePath_, logger::INFO, LOG, renameAndSymlinkLegacyFiles(), and writeAndSyncVersionToDisk().

Referenced by migrateToLatestFileMgrVersion().

1118  {
1119  LOG(INFO) << "Migrating file format version from 1 to 2";
1121  constexpr int32_t migration_complete_version{2};
1122  writeAndSyncVersionToDisk(FILE_MGR_VERSION_FILENAME, migration_complete_version);
1123 }
#define LOG(tag)
Definition: Logger.h:285
std::string fileMgrBasePath_
Definition: FileMgr.h:397
void writeAndSyncVersionToDisk(const std::string &versionFileName, const int32_t version)
Definition: FileMgr.cpp:1079
static constexpr char FILE_MGR_VERSION_FILENAME[]
Definition: FileMgr.h:389
static void renameAndSymlinkLegacyFiles(const std::string &table_data_dir)
Definition: FileMgr.cpp:1125

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::migrateToLatestFileMgrVersion ( )
protected

Definition at line 1146 of file FileMgr.cpp.

References logger::FATAL, FILE_MGR_VERSION_FILENAME, fileMgrVersion_, INVALID_VERSION, latestFileMgrVersion_, LOG, migrateEpochFileV0(), migrateLegacyFilesV1(), readVersionFromDisk(), UNREACHABLE, and writeAndSyncVersionToDisk().

Referenced by coreInit().

1146  {
1149  fileMgrVersion_ = 0;
1151  } else if (fileMgrVersion_ > latestFileMgrVersion_) {
1152  LOG(FATAL)
1153  << "Table storage forward compatibility is not supported. Version of HeavyDB "
1154  "software used is older than the version of table being read: "
1155  << fileMgrVersion_;
1156  }
1157 
1160  switch (fileMgrVersion_) {
1161  case 0: {
1163  break;
1164  }
1165  case 1: {
1167  break;
1168  }
1169  default: {
1170  UNREACHABLE();
1171  }
1172  }
1173  fileMgrVersion_++;
1174  }
1175  }
1176 }
int32_t readVersionFromDisk(const std::string &versionFileName) const
Definition: FileMgr.cpp:1061
#define LOG(tag)
Definition: Logger.h:285
#define UNREACHABLE()
Definition: Logger.h:337
const int32_t latestFileMgrVersion_
Definition: FileMgr.h:407
void writeAndSyncVersionToDisk(const std::string &versionFileName, const int32_t version)
Definition: FileMgr.cpp:1079
static constexpr char FILE_MGR_VERSION_FILENAME[]
Definition: FileMgr.h:389
static constexpr int32_t INVALID_VERSION
Definition: FileMgr.h:390

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::openAndReadEpochFile ( const std::string &  epochFileName)
protected

Definition at line 636 of file FileMgr.cpp.

References Epoch::byte_size(), epoch_, epochFile_, logger::FATAL, heavyai::file_size(), fileMgrBasePath_, LOG, File_Namespace::open(), File_Namespace::read(), and Epoch::storage_ptr().

Referenced by coreInit(), and init().

636  {
637  if (!epochFile_) { // Check to see if already open
638  std::string epochFilePath(fileMgrBasePath_ + "/" + epochFileName);
639  if (!boost::filesystem::exists(epochFilePath)) {
640  LOG(FATAL) << "Epoch file `" << epochFilePath << "` does not exist";
641  }
642  if (!boost::filesystem::is_regular_file(epochFilePath)) {
643  LOG(FATAL) << "Epoch file `" << epochFilePath << "` is not a regular file";
644  }
645  if (boost::filesystem::file_size(epochFilePath) != Epoch::byte_size()) {
646  LOG(FATAL) << "Epoch file `" << epochFilePath
647  << "` is not sized properly (current size: "
648  << boost::filesystem::file_size(epochFilePath)
649  << ", expected size: " << Epoch::byte_size() << ")";
650  }
651  epochFile_ = open(epochFilePath);
652  }
653  read(epochFile_, 0, Epoch::byte_size(), epoch_.storage_ptr(), epochFileName);
654 }
int8_t * storage_ptr()
Definition: Epoch.h:61
#define LOG(tag)
Definition: Logger.h:285
std::string fileMgrBasePath_
Definition: FileMgr.h:397
size_t read(FILE *f, const size_t offset, const size_t size, int8_t *buf, const std::string &file_path)
Reads the specified number of bytes from the offset position in file f into buf.
Definition: File.cpp:142
static size_t byte_size()
Definition: Epoch.h:63
FILE * open(int fileId)
Opens/creates the file with the given id; returns NULL on error.
Definition: File.cpp:107
size_t file_size(const int fd)
Definition: heavyai_fs.cpp:33

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int32_t File_Namespace::FileMgr::openAndReadLegacyEpochFile ( const std::string &  epochFileName)
protected

Definition at line 615 of file FileMgr.cpp.

References File_Namespace::close(), epoch(), logger::FATAL, heavyai::file_size(), fileMgrBasePath_, LOG, File_Namespace::open(), and File_Namespace::read().

Referenced by migrateEpochFileV0().

615  {
616  std::string epochFilePath(fileMgrBasePath_ + "/" + epochFileName);
617  if (!boost::filesystem::exists(epochFilePath)) {
618  return 0;
619  }
620 
621  if (!boost::filesystem::is_regular_file(epochFilePath)) {
622  LOG(FATAL) << "Epoch file `" << epochFilePath << "` is not a regular file";
623  }
624  if (boost::filesystem::file_size(epochFilePath) < 4) {
625  LOG(FATAL) << "Epoch file `" << epochFilePath
626  << "` is not sized properly (current size: "
627  << boost::filesystem::file_size(epochFilePath) << ", expected size: 4)";
628  }
629  FILE* legacyEpochFile = open(epochFilePath);
630  int32_t epoch;
631  read(legacyEpochFile, 0, sizeof(int32_t), (int8_t*)&epoch, epochFilePath);
632  close(legacyEpochFile);
633  return epoch;
634 }
#define LOG(tag)
Definition: Logger.h:285
std::string fileMgrBasePath_
Definition: FileMgr.h:397
size_t read(FILE *f, const size_t offset, const size_t size, int8_t *buf, const std::string &file_path)
Reads the specified number of bytes from the offset position in file f into buf.
Definition: File.cpp:142
FILE * open(int fileId)
Opens/creates the file with the given id; returns NULL on error.
Definition: File.cpp:107
int32_t epoch() const
Definition: FileMgr.h:517
void close(FILE *f)
Closes the file pointed to by the FILE pointer.
Definition: File.cpp:128
size_t file_size(const int fd)
Definition: heavyai_fs.cpp:33

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

FileInfo * File_Namespace::FileMgr::openExistingFile ( const std::string &  path,
const int32_t  fileId,
const size_t  pageSize,
const size_t  numPages,
std::vector< HeaderInfo > &  headerVec 
)
protected

Definition at line 944 of file FileMgr.cpp.

References anonymous_namespace{Utm.h}::f, fileIndex_, files_, files_rw_mutex_, File_Namespace::open(), and File_Namespace::FileInfo::openExistingFile().

Referenced by init(), and openFiles().

948  {
949  FILE* f = open(path);
950  FileInfo* fInfo = new FileInfo(
951  this, fileId, f, pageSize, numPages, path, false); // false means don't init file
952 
953  fInfo->openExistingFile(headerVec);
955  files_[fileId] = fInfo;
956  fileIndex_.insert(std::pair<size_t, int32_t>(pageSize, fileId));
957  return fInfo;
958 }
heavyai::unique_lock< heavyai::shared_mutex > write_lock
constexpr double f
Definition: Utm.h:31
PageSizeFileMMap fileIndex_
A map of files accessible via a file identifier.
Definition: FileMgr.h:401
std::unique_lock< T > unique_lock
FILE * open(int fileId)
Opens/creates the file with the given id; returns NULL on error.
Definition: File.cpp:107
std::map< int32_t, FileInfo * > files_
Definition: FileMgr.h:400
heavyai::shared_mutex files_rw_mutex_
Definition: FileMgr.h:411

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

OpenFilesResult File_Namespace::FileMgr::openFiles ( )
protected

Definition at line 196 of file FileMgr.cpp.

References threading_serial::async(), Epoch::ceiling(), CHECK, File_Namespace::OpenFilesResult::compaction_status_file_name, epoch_, File_Namespace::FileMetadata::file_id, File_Namespace::FileMetadata::file_path, fileMgrBasePath_, getMetadataForFile(), File_Namespace::OpenFilesResult::header_infos, logger::INFO, File_Namespace::anonymous_namespace{FileMgr.cpp}::is_compaction_status_file(), File_Namespace::FileMetadata::is_data_file, LOG, File_Namespace::OpenFilesResult::max_file_id, File_Namespace::FileMetadata::num_pages, openExistingFile(), File_Namespace::FileMetadata::page_size, processFileFutures(), run_benchmark_import::result, timer_start(), and timer_stop().

Referenced by init(), and File_Namespace::CachingFileMgr::init().

196  {
197  auto clock_begin = timer_start();
198  boost::filesystem::directory_iterator
199  end_itr; // default construction yields past-the-end
200  OpenFilesResult result;
201  result.max_file_id = -1;
202  int32_t file_count = 0;
203  int32_t thread_count = std::thread::hardware_concurrency();
204  std::vector<std::future<std::vector<HeaderInfo>>> file_futures;
205  boost::filesystem::path path(fileMgrBasePath_);
206  for (boost::filesystem::directory_iterator file_it(path); file_it != end_itr;
207  ++file_it) {
208  FileMetadata file_metadata = getMetadataForFile(file_it);
209  if (file_metadata.is_data_file) {
210  result.max_file_id = std::max(result.max_file_id, file_metadata.file_id);
211  file_futures.emplace_back(std::async(std::launch::async, [file_metadata, this] {
212  std::vector<HeaderInfo> temp_header_vec;
213  openExistingFile(file_metadata.file_path,
214  file_metadata.file_id,
215  file_metadata.page_size,
216  file_metadata.num_pages,
217  temp_header_vec);
218  return temp_header_vec;
219  }));
220  file_count++;
221  if (file_count % thread_count == 0) {
222  processFileFutures(file_futures, result.header_infos);
223  }
224  }
225 
226  if (is_compaction_status_file(file_it->path().filename().string())) {
227  CHECK(result.compaction_status_file_name.empty());
228  result.compaction_status_file_name = file_it->path().filename().string();
229  }
230  }
231 
232  if (file_futures.size() > 0) {
233  processFileFutures(file_futures, result.header_infos);
234  }
235 
236  int64_t queue_time_ms = timer_stop(clock_begin);
237  LOG(INFO) << "Completed Reading table's file metadata, Elapsed time : " << queue_time_ms
238  << "ms Epoch: " << epoch_.ceiling() << " files read: " << file_count
239  << " table location: '" << fileMgrBasePath_ << "'";
240  return result;
241 }
#define LOG(tag)
Definition: Logger.h:285
TypeR::rep timer_stop(Type clock_begin)
Definition: measure.h:48
bool is_compaction_status_file(const std::string &file_name)
Definition: FileMgr.cpp:189
int32_t ceiling() const
Definition: Epoch.h:44
std::string fileMgrBasePath_
Definition: FileMgr.h:397
future< Result > async(Fn &&fn, Args &&...args)
void processFileFutures(std::vector< std::future< std::vector< HeaderInfo >>> &file_futures, std::vector< HeaderInfo > &headerVec)
Definition: FileMgr.cpp:419
FileInfo * openExistingFile(const std::string &path, const int32_t fileId, const size_t pageSize, const size_t numPages, std::vector< HeaderInfo > &headerVec)
Definition: FileMgr.cpp:944
FileMetadata getMetadataForFile(const boost::filesystem::directory_iterator &fileIterator) const
Definition: FileMgr.cpp:153
#define CHECK(condition)
Definition: Logger.h:291
Type timer_start()
Definition: measure.h:42

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::string File_Namespace::FileMgr::printSlabs ( )
inlineoverride

Definition at line 216 of file FileMgr.h.

216 { return "Not Implemented"; }
void File_Namespace::FileMgr::processFileFutures ( std::vector< std::future< std::vector< HeaderInfo >>> &  file_futures,
std::vector< HeaderInfo > &  headerVec 
)
protected

Definition at line 419 of file FileMgr.cpp.

Referenced by init(), and openFiles().

421  {
422  for (auto& file_future : file_futures) {
423  file_future.wait();
424  }
425  // concatenate the vectors after thread completes
426  for (auto& file_future : file_futures) {
427  auto tempHeaderVec = file_future.get();
428  headerVec.insert(headerVec.end(), tempHeaderVec.begin(), tempHeaderVec.end());
429  }
430  file_futures.clear();
431 }

+ Here is the caller graph for this function:

FileBuffer * File_Namespace::FileMgr::putBuffer ( const ChunkKey key,
AbstractBuffer d,
const size_t  numBytes = 0 
)
override

Puts the contents of d into the Chunk with the given key.

Parameters
key- Unique identifier for a Chunk.
d- An object representing the source data for the Chunk.
Returns
AbstractBuffer*

Definition at line 814 of file FileMgr.cpp.

References CHECK, CHECK_LT, Data_Namespace::AbstractBuffer::clearDirtyBits(), logger::FATAL, Data_Namespace::AbstractBuffer::getDeviceId(), Data_Namespace::AbstractBuffer::getMemoryPtr(), getOrCreateBuffer(), Data_Namespace::AbstractBuffer::getType(), Data_Namespace::AbstractBuffer::isAppended(), Data_Namespace::AbstractBuffer::isDirty(), Data_Namespace::AbstractBuffer::isUpdated(), LOG, show_chunk(), and Data_Namespace::AbstractBuffer::size().

816  {
817  auto chunk = getOrCreateBuffer(key);
818  size_t oldChunkSize = chunk->size();
819  // write the buffer's data to the Chunk
820  size_t newChunkSize = (numBytes == 0) ? srcBuffer->size() : numBytes;
821  if (chunk->isDirty()) {
822  // multiple appends are allowed,
823  // but only single update is allowed
824  if (srcBuffer->isUpdated() && chunk->isUpdated()) {
825  LOG(FATAL) << "Aborting attempt to write a chunk marked dirty. Chunk inconsistency "
826  "for key: "
827  << show_chunk(key);
828  }
829  }
830  CHECK(srcBuffer->isDirty()) << "putBuffer expects a dirty buffer";
831  if (srcBuffer->isUpdated()) {
832  // chunk size is not changed when fixed rows are updated or are marked as deleted.
833  // but when rows are vacuumed or varlen rows are updated (new rows are appended),
834  // chunk size will change. For vacuum, checkpoint should sync size from cpu to disk.
835  // For varlen update, it takes another route via fragmenter using disk-level buffer.
836  if (0 == numBytes && !chunk->isDirty()) {
837  chunk->setSize(newChunkSize);
838  }
839  //@todo use dirty flags to only flush pages of chunk that need to
840  // be flushed
841  chunk->write((int8_t*)srcBuffer->getMemoryPtr(),
842  newChunkSize,
843  0,
844  srcBuffer->getType(),
845  srcBuffer->getDeviceId());
846  } else if (srcBuffer->isAppended()) {
847  CHECK_LT(oldChunkSize, newChunkSize);
848  chunk->append((int8_t*)srcBuffer->getMemoryPtr() + oldChunkSize,
849  newChunkSize - oldChunkSize,
850  srcBuffer->getType(),
851  srcBuffer->getDeviceId());
852  } else {
853  // If dirty buffer comes in unmarked, it must be empty.
854  // Encoder sync is still required to flush the metadata.
855  CHECK(numBytes == 0)
856  << "Dirty buffer with size > 0 must be marked as isAppended() or isUpdated()";
857  }
858  // chunk->clearDirtyBits(); // Hack: because write and append will set dirty bits
859  //@todo commenting out line above will make sure this metadata is set
860  // but will trigger error on fetch chunk
861  srcBuffer->clearDirtyBits();
862  chunk->syncEncoder(srcBuffer);
863  return chunk;
864 }
#define LOG(tag)
Definition: Logger.h:285
std::string show_chunk(const ChunkKey &key)
Definition: types.h:98
FileBuffer * getOrCreateBuffer(const ChunkKey &key)
Definition: FileMgr.cpp:1640
#define CHECK_LT(x, y)
Definition: Logger.h:303
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

std::vector< PageMapping > File_Namespace::FileMgr::readPageMappingsFromStatusFile ( )
protected

Deserializes a page mapping vector from expected status file.

Definition at line 1528 of file FileMgr.cpp.

References CHECK, CHECK_EQ, CHECK_GE, heavyai::file_size(), getFilePath(), and UPDATE_PAGE_VISIBILITY_STATUS.

Referenced by resumeFileCompaction().

1528  {
1529  auto file_path = getFilePath(UPDATE_PAGE_VISIBILITY_STATUS);
1530  CHECK(boost::filesystem::exists(file_path));
1531  std::ifstream status_file{file_path.string(),
1532  std::ios::in | std::ios::binary | std::ios::ate};
1533  CHECK(status_file.is_open());
1534  size_t file_size = status_file.tellg();
1535  status_file.seekg(0, std::ios::beg);
1536  CHECK_GE(file_size, sizeof(int64_t));
1537 
1538  int64_t page_mappings_count;
1539  status_file.read(reinterpret_cast<char*>(&page_mappings_count), sizeof(int64_t));
1540  auto page_mappings_byte_size = file_size - sizeof(int64_t);
1541  CHECK_EQ(page_mappings_byte_size % sizeof(PageMapping), static_cast<size_t>(0));
1542  CHECK_EQ(static_cast<size_t>(page_mappings_count),
1543  page_mappings_byte_size / sizeof(PageMapping));
1544 
1545  std::vector<PageMapping> page_mappings(page_mappings_count);
1546  status_file.read(reinterpret_cast<char*>(page_mappings.data()),
1547  page_mappings_byte_size);
1548  status_file.close();
1549  return page_mappings;
1550 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
static constexpr char const * UPDATE_PAGE_VISIBILITY_STATUS
Definition: FileMgr.h:376
#define CHECK_GE(x, y)
Definition: Logger.h:306
boost::filesystem::path getFilePath(const std::string &file_name) const
Definition: FileMgr.cpp:1667
#define CHECK(condition)
Definition: Logger.h:291
size_t file_size(const int fd)
Definition: heavyai_fs.cpp:33

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int32_t File_Namespace::FileMgr::readVersionFromDisk ( const std::string &  versionFileName) const
protected

Definition at line 1061 of file FileMgr.cpp.

References File_Namespace::close(), heavyai::file_size(), fileMgrBasePath_, File_Namespace::open(), File_Namespace::read(), and setup::version.

Referenced by createTopLevelMetadata(), migrateToLatestFileMgrVersion(), and writeAndSyncVersionToDisk().

1061  {
1062  const std::string versionFilePath(fileMgrBasePath_ + "/" + versionFileName);
1063  if (!boost::filesystem::exists(versionFilePath)) {
1064  return -1;
1065  }
1066  if (!boost::filesystem::is_regular_file(versionFilePath)) {
1067  return -1;
1068  }
1069  if (boost::filesystem::file_size(versionFilePath) < 4) {
1070  return -1;
1071  }
1072  FILE* versionFile = open(versionFilePath);
1073  int32_t version;
1074  read(versionFile, 0, sizeof(int32_t), (int8_t*)&version, versionFilePath);
1075  close(versionFile);
1076  return version;
1077 }
std::string fileMgrBasePath_
Definition: FileMgr.h:397
string version
Definition: setup.in.py:73
size_t read(FILE *f, const size_t offset, const size_t size, int8_t *buf, const std::string &file_path)
Reads the specified number of bytes from the offset position in file f into buf.
Definition: File.cpp:142
FILE * open(int fileId)
Opens/creates the file with the given id; returns NULL on error.
Definition: File.cpp:107
void close(FILE *f)
Closes the file pointed to by the FILE pointer.
Definition: File.cpp:128
size_t file_size(const int fd)
Definition: heavyai_fs.cpp:33

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::removeTableRelatedDS ( const int32_t  db_id,
const int32_t  table_id 
)
override

Definition at line 1206 of file FileMgr.cpp.

References UNREACHABLE.

1206  {
1207  UNREACHABLE();
1208 }
#define UNREACHABLE()
Definition: Logger.h:337
void File_Namespace::FileMgr::renameAndSymlinkLegacyFiles ( const std::string &  table_data_dir)
static

Definition at line 1125 of file FileMgr.cpp.

References DATA_FILE_EXT, logger::INFO, File_Namespace::kLegacyDataFileExtension, and LOG.

Referenced by migrateLegacyFilesV1(), and anonymous_namespace{TableArchiver.cpp}::rename_table_directories().

1125  {
1126  std::map<boost::filesystem::path, boost::filesystem::path> old_to_new_paths;
1127  for (boost::filesystem::directory_iterator it(table_data_dir), end_it; it != end_it;
1128  it++) {
1129  const auto old_path = boost::filesystem::canonical(it->path());
1130  if (boost::filesystem::is_regular_file(it->status()) &&
1131  old_path.extension().string() == kLegacyDataFileExtension) {
1132  auto new_path = old_path;
1133  new_path.replace_extension(DATA_FILE_EXT);
1134  old_to_new_paths[old_path] = new_path;
1135  }
1136  }
1137  for (const auto& [old_path, new_path] : old_to_new_paths) {
1138  boost::filesystem::rename(old_path, new_path);
1139  LOG(INFO) << "Rebrand migration: Renamed " << old_path << " to " << new_path;
1140  boost::filesystem::create_symlink(new_path.filename(), old_path);
1141  LOG(INFO) << "Rebrand migration: Added symlink from " << old_path << " to "
1142  << new_path.filename();
1143  }
1144 }
#define LOG(tag)
Definition: Logger.h:285
#define DATA_FILE_EXT
Definition: File.h:25
constexpr auto kLegacyDataFileExtension
Definition: File.h:36

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::renameCompactionStatusFile ( const char *const  from_status,
const char *const  to_status 
)

Renames a given status file name to a new given file name.

Definition at line 1555 of file FileMgr.cpp.

References CHECK, and getFilePath().

Referenced by compactFiles(), and resumeFileCompaction().

1556  {
1557  auto from_status_file_path = getFilePath(from_status);
1558  auto to_status_file_path = getFilePath(to_status);
1559  CHECK(boost::filesystem::exists(from_status_file_path));
1560  CHECK(!boost::filesystem::exists(to_status_file_path));
1561  boost::filesystem::rename(from_status_file_path, to_status_file_path);
1562 }
boost::filesystem::path getFilePath(const std::string &file_name) const
Definition: FileMgr.cpp:1667
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Page File_Namespace::FileMgr::requestFreePage ( size_t  pagesize,
const bool  isMetadata 
)
virtual

Reimplemented in File_Namespace::CachingFileMgr.

Definition at line 875 of file FileMgr.cpp.

References CHECK, createFile(), File_Namespace::FileInfo::fileId, fileIndex_, files_, File_Namespace::FileInfo::getFreePage(), getPageMutex_, num_pages_per_data_file_, and num_pages_per_metadata_file_.

Referenced by File_Namespace::FileBuffer::addNewMultiPage(), init(), File_Namespace::FileBuffer::write(), and File_Namespace::FileBuffer::writeMetadata().

875  {
876  std::lock_guard<std::mutex> lock(getPageMutex_);
877 
878  auto candidateFiles = fileIndex_.equal_range(pageSize);
879  int32_t pageNum = -1;
880  for (auto fileIt = candidateFiles.first; fileIt != candidateFiles.second; ++fileIt) {
881  FileInfo* fileInfo = files_.at(fileIt->second);
882  pageNum = fileInfo->getFreePage();
883  if (pageNum != -1) {
884  return (Page(fileInfo->fileId, pageNum));
885  }
886  }
887  // if here then we need to add a file
888  FileInfo* fileInfo;
889  if (isMetadata) {
890  fileInfo = createFile(pageSize, num_pages_per_metadata_file_);
891  } else {
892  fileInfo = createFile(pageSize, num_pages_per_data_file_);
893  }
894  pageNum = fileInfo->getFreePage();
895  CHECK(pageNum != -1);
896  return (Page(fileInfo->fileId, pageNum));
897 }
std::mutex getPageMutex_
pointer to DB level metadata
Definition: FileMgr.h:409
static size_t num_pages_per_data_file_
Definition: FileMgr.h:417
PageSizeFileMMap fileIndex_
A map of files accessible via a file identifier.
Definition: FileMgr.h:401
static size_t num_pages_per_metadata_file_
Definition: FileMgr.h:418
FileInfo * createFile(const size_t pageSize, const size_t numPages)
Adds a file to the file manager repository.
Definition: FileMgr.cpp:960
std::map< int32_t, FileInfo * > files_
Definition: FileMgr.h:400
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::requestFreePages ( size_t  npages,
size_t  pagesize,
std::vector< Page > &  pages,
const bool  isMetadata 
)

Obtains free pages – creates new files if necessary – of the requested size.

Given a page size and number of pages, this method updates the vector "pages" to include free pages of the requested size. These pages are immediately removed from the free list of the affected file(s). If there are not enough pages available among current files, new files are created and their pages are included in the vector.

Parameters
npagesThe number of free pages requested
pagesizeThe size of each requested page
pagesA vector containing the free pages obtained by this method

Definition at line 899 of file FileMgr.cpp.

References CHECK, createFile(), File_Namespace::FileInfo::fileId, fileIndex_, files_, File_Namespace::FileInfo::getFreePage(), getPageMutex_, num_pages_per_data_file_, and num_pages_per_metadata_file_.

902  {
903  // not used currently
904  // @todo add method to FileInfo to get more than one page
905  std::lock_guard<std::mutex> lock(getPageMutex_);
906  auto candidateFiles = fileIndex_.equal_range(pageSize);
907  size_t numPagesNeeded = numPagesRequested;
908  for (auto fileIt = candidateFiles.first; fileIt != candidateFiles.second; ++fileIt) {
909  FileInfo* fileInfo = files_.at(fileIt->second);
910  int32_t pageNum;
911  do {
912  pageNum = fileInfo->getFreePage();
913  if (pageNum != -1) {
914  pages.emplace_back(fileInfo->fileId, pageNum);
915  numPagesNeeded--;
916  }
917  } while (pageNum != -1 && numPagesNeeded > 0);
918  if (numPagesNeeded == 0) {
919  break;
920  }
921  }
922  while (numPagesNeeded > 0) {
923  FileInfo* fileInfo;
924  if (isMetadata) {
925  fileInfo = createFile(pageSize, num_pages_per_metadata_file_);
926  } else {
927  fileInfo = createFile(pageSize, num_pages_per_data_file_);
928  }
929  int32_t pageNum;
930  do {
931  pageNum = fileInfo->getFreePage();
932  if (pageNum != -1) {
933  pages.emplace_back(fileInfo->fileId, pageNum);
934  numPagesNeeded--;
935  }
936  } while (pageNum != -1 && numPagesNeeded > 0);
937  if (numPagesNeeded == 0) {
938  break;
939  }
940  }
941  CHECK(pages.size() == numPagesRequested);
942 }
std::mutex getPageMutex_
pointer to DB level metadata
Definition: FileMgr.h:409
static size_t num_pages_per_data_file_
Definition: FileMgr.h:417
PageSizeFileMMap fileIndex_
A map of files accessible via a file identifier.
Definition: FileMgr.h:401
static size_t num_pages_per_metadata_file_
Definition: FileMgr.h:418
FileInfo * createFile(const size_t pageSize, const size_t numPages)
Adds a file to the file manager repository.
Definition: FileMgr.cpp:960
std::map< int32_t, FileInfo * > files_
Definition: FileMgr.h:400
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

void File_Namespace::FileMgr::resetEpochFloor ( )
inline

Definition at line 301 of file FileMgr.h.

301 { epoch_.floor(epoch_.ceiling()); }
int32_t floor() const
Definition: Epoch.h:43
int32_t ceiling() const
Definition: Epoch.h:44
void File_Namespace::FileMgr::resumeFileCompaction ( const std::string &  status_file_name)
protected

Resumes an interrupted file compaction process. This method would normally only be called when re-initializing the file manager after a crash occurred in the middle of file compaction.

Definition at line 1215 of file FileMgr.cpp.

References CHECK, compactFiles(), COPY_PAGES_STATUS, DELETE_EMPTY_FILES_STATUS, deleteEmptyFiles(), files_rw_mutex_, getFilePath(), readPageMappingsFromStatusFile(), renameCompactionStatusFile(), UNREACHABLE, UPDATE_PAGE_VISIBILITY_STATUS, and updateMappedPagesVisibility().

Referenced by init().

1215  {
1216  if (status_file_name == COPY_PAGES_STATUS) {
1217  // Delete status file and restart data compaction process
1218  auto file_path = getFilePath(status_file_name);
1219  CHECK(boost::filesystem::exists(file_path));
1220  boost::filesystem::remove(file_path);
1221  compactFiles();
1222  } else if (status_file_name == UPDATE_PAGE_VISIBILITY_STATUS) {
1223  // Execute second and third phases of data compaction
1225  auto page_mappings = readPageMappingsFromStatusFile();
1226  updateMappedPagesVisibility(page_mappings);
1228  deleteEmptyFiles();
1229  } else if (status_file_name == DELETE_EMPTY_FILES_STATUS) {
1230  // Execute last phase of data compaction
1232  deleteEmptyFiles();
1233  } else {
1234  UNREACHABLE() << "Unexpected status file name: " << status_file_name;
1235  }
1236 }
std::vector< PageMapping > readPageMappingsFromStatusFile()
Definition: FileMgr.cpp:1528
#define UNREACHABLE()
Definition: Logger.h:337
static constexpr char const * UPDATE_PAGE_VISIBILITY_STATUS
Definition: FileMgr.h:376
heavyai::unique_lock< heavyai::shared_mutex > write_lock
std::unique_lock< T > unique_lock
boost::filesystem::path getFilePath(const std::string &file_name) const
Definition: FileMgr.cpp:1667
static constexpr char const * DELETE_EMPTY_FILES_STATUS
Definition: FileMgr.h:377
static constexpr char const * COPY_PAGES_STATUS
Definition: FileMgr.h:375
void updateMappedPagesVisibility(const std::vector< PageMapping > &page_mappings)
Definition: FileMgr.cpp:1457
#define CHECK(condition)
Definition: Logger.h:291
heavyai::shared_mutex files_rw_mutex_
Definition: FileMgr.h:411
void renameCompactionStatusFile(const char *const from_status, const char *const to_status)
Definition: FileMgr.cpp:1555

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::rollOffOldData ( const int32_t  epochCeiling,
const bool  shouldCheckpoint 
)
private

Definition at line 684 of file FileMgr.cpp.

References checkpoint(), epoch_, Epoch::floor(), freePagesBeforeEpoch(), and maxRollbackEpochs_.

Referenced by checkpoint(), and init().

684  {
685  if (maxRollbackEpochs_ >= 0) {
686  auto min_epoch = std::max(epoch_ceiling - maxRollbackEpochs_, epoch_.floor());
687  if (min_epoch > epoch_.floor()) {
688  freePagesBeforeEpoch(min_epoch);
689  epoch_.floor(min_epoch);
690  if (should_checkpoint) {
691  checkpoint();
692  }
693  }
694  }
695 }
int32_t floor() const
Definition: Epoch.h:43
void checkpoint() override
Fsyncs data files, writes out epoch and fsyncs that.
Definition: FileMgr.cpp:703
void freePagesBeforeEpoch(const int32_t min_epoch)
Definition: FileMgr.cpp:670
int32_t maxRollbackEpochs_
Definition: FileMgr.h:396

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::setDataAndMetadataFileStats ( StorageStats storage_stats) const
private

Definition at line 346 of file FileMgr.cpp.

References CHECK, File_Namespace::StorageStats::data_file_count, File_Namespace::StorageStats::epoch, File_Namespace::StorageStats::epoch_floor, epochFloor(), logger::FATAL, File_Namespace::FileMetadata::file_size, fileMgrBasePath_, files_, files_rw_mutex_, getMetadataForFile(), File_Namespace::FileMetadata::is_data_file, File_Namespace::anonymous_namespace{FileMgr.cpp}::is_metadata_file(), isFullyInitted_, lastCheckpointedEpoch(), LOG, File_Namespace::StorageStats::metadata_file_count, metadata_page_size_, File_Namespace::FileMetadata::num_pages, num_pages_per_metadata_file_, File_Namespace::FileMetadata::page_size, File_Namespace::StorageStats::total_data_file_size, File_Namespace::StorageStats::total_data_page_count, File_Namespace::StorageStats::total_free_data_page_count, File_Namespace::StorageStats::total_free_metadata_page_count, File_Namespace::StorageStats::total_metadata_file_size, and File_Namespace::StorageStats::total_metadata_page_count.

Referenced by getStorageStats().

346  {
348  if (!isFullyInitted_) {
349  CHECK(!fileMgrBasePath_.empty());
350  boost::filesystem::path path(fileMgrBasePath_);
351  if (boost::filesystem::exists(path)) {
352  if (!boost::filesystem::is_directory(path)) {
353  LOG(FATAL) << "getStorageStats: Specified path '" << fileMgrBasePath_
354  << "' for table data is not a directory.";
355  }
356 
357  storage_stats.epoch = lastCheckpointedEpoch();
358  storage_stats.epoch_floor = epochFloor();
359  boost::filesystem::directory_iterator
360  endItr; // default construction yields past-the-end
361  for (boost::filesystem::directory_iterator fileIt(path); fileIt != endItr;
362  ++fileIt) {
363  FileMetadata file_metadata = getMetadataForFile(fileIt);
364  if (file_metadata.is_data_file) {
365  if (is_metadata_file(file_metadata.file_size,
366  file_metadata.page_size,
369  storage_stats.metadata_file_count++;
370  storage_stats.total_metadata_file_size += file_metadata.file_size;
371  storage_stats.total_metadata_page_count += file_metadata.num_pages;
372  } else {
373  storage_stats.data_file_count++;
374  storage_stats.total_data_file_size += file_metadata.file_size;
375  storage_stats.total_data_page_count += file_metadata.num_pages;
376  }
377  }
378  }
379  }
380  } else {
381  storage_stats.epoch = lastCheckpointedEpoch();
382  storage_stats.epoch_floor = epochFloor();
383  storage_stats.total_free_metadata_page_count = 0;
384  storage_stats.total_free_data_page_count = 0;
385 
386  // We already initialized this table so take the faster path of walking through the
387  // FileInfo objects and getting metadata from there
388  for (const auto& file_info_entry : files_) {
389  const auto file_info = file_info_entry.second;
390  if (is_metadata_file(file_info->size(),
391  file_info->pageSize,
394  storage_stats.metadata_file_count++;
395  storage_stats.total_metadata_file_size +=
396  file_info->pageSize * file_info->numPages;
397  storage_stats.total_metadata_page_count += file_info->numPages;
398  storage_stats.total_free_metadata_page_count.value() +=
399  file_info->freePages.size();
400  } else {
401  storage_stats.data_file_count++;
402  storage_stats.total_data_file_size += file_info->pageSize * file_info->numPages;
403  storage_stats.total_data_page_count += file_info->numPages;
404  storage_stats.total_free_data_page_count.value() += file_info->freePages.size();
405  }
406  }
407  }
408 }
const size_t metadata_page_size_
Definition: FileMgr.h:536
heavyai::shared_lock< heavyai::shared_mutex > read_lock
#define LOG(tag)
Definition: Logger.h:285
std::string fileMgrBasePath_
Definition: FileMgr.h:397
int32_t lastCheckpointedEpoch() const
Returns value of epoch at last checkpoint.
Definition: FileMgr.h:297
std::shared_lock< T > shared_lock
static size_t num_pages_per_metadata_file_
Definition: FileMgr.h:418
FileMetadata getMetadataForFile(const boost::filesystem::directory_iterator &fileIterator) const
Definition: FileMgr.cpp:153
std::map< int32_t, FileInfo * > files_
Definition: FileMgr.h:400
#define CHECK(condition)
Definition: Logger.h:291
heavyai::shared_mutex files_rw_mutex_
Definition: FileMgr.h:411
int32_t epochFloor() const
Definition: FileMgr.h:279
bool is_metadata_file(size_t file_size, size_t page_size, size_t metadata_page_size, size_t num_pages_per_metadata_file)
Definition: FileMgr.cpp:328

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::setEpoch ( const int32_t  newEpoch)
protected

Definition at line 1190 of file FileMgr.cpp.

References Epoch::ceiling(), describeSelf(), epoch_, Epoch::floor(), and writeAndSyncEpochToDisk().

Referenced by init().

1190  {
1191  if (newEpoch < epoch_.floor()) {
1192  std::stringstream error_message;
1193  error_message << "Cannot set epoch for " << describeSelf()
1194  << " lower than the minimum rollback epoch (" << epoch_.floor() << ").";
1195  throw std::runtime_error(error_message.str());
1196  }
1197  epoch_.ceiling(newEpoch);
1199 }
int32_t floor() const
Definition: Epoch.h:43
int32_t ceiling() const
Definition: Epoch.h:44
void writeAndSyncEpochToDisk()
Definition: FileMgr.cpp:656
virtual std::string describeSelf() const
Definition: FileMgr.cpp:697

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::setNumPagesPerDataFile ( size_t  num_pages)
static

Definition at line 1566 of file FileMgr.cpp.

References num_pages_per_data_file_.

1566  {
1567  num_pages_per_data_file_ = num_pages;
1568 }
static size_t num_pages_per_data_file_
Definition: FileMgr.h:417
void File_Namespace::FileMgr::setNumPagesPerMetadataFile ( size_t  num_pages)
static

Definition at line 1570 of file FileMgr.cpp.

References num_pages_per_metadata_file_.

1570  {
1571  num_pages_per_metadata_file_ = num_pages;
1572 }
static size_t num_pages_per_metadata_file_
Definition: FileMgr.h:418
void File_Namespace::FileMgr::sortAndCopyFilePagesForCompaction ( size_t  page_size,
std::vector< PageMapping > &  page_mappings,
std::set< Page > &  touched_pages 
)
protected

Sorts all files with the given page size in ascending order of number of free pages. Then copy over pages from files with more free pages to those with less free pages. Leave destination/copied to pages as free when copying. Record copied source and destination pages in page mapping.

Definition at line 1302 of file FileMgr.cpp.

References CHECK, copySourcePageForCompaction(), File_Namespace::Page::fileId, fileIndex_, files_, File_Namespace::FileInfo::freePages, freePages(), and gpu_enabled::sort().

Referenced by compactFiles().

1304  {
1305  std::vector<FileInfo*> sorted_file_infos;
1306  auto range = fileIndex_.equal_range(page_size);
1307  for (auto it = range.first; it != range.second; it++) {
1308  sorted_file_infos.emplace_back(files_.at(it->second));
1309  }
1310  if (sorted_file_infos.empty()) {
1311  return;
1312  }
1313 
1314  // Sort file infos in ascending order of free pages count i.e. from files with
1315  // the least number of free pages to those with the highest number of free pages.
1316  std::sort(sorted_file_infos.begin(),
1317  sorted_file_infos.end(),
1318  [](const FileInfo* file_1, const FileInfo* file_2) {
1319  return file_1->freePages.size() < file_2->freePages.size();
1320  });
1321 
1322  size_t destination_index = 0, source_index = sorted_file_infos.size() - 1;
1323 
1324  // For page copy destinations, skip files without free pages.
1325  while (destination_index < source_index &&
1326  sorted_file_infos[destination_index]->freePages.empty()) {
1327  destination_index++;
1328  }
1329 
1330  // For page copy sources, skip files with only free pages.
1331  while (destination_index < source_index &&
1332  sorted_file_infos[source_index]->freePages.size() ==
1333  sorted_file_infos[source_index]->numPages) {
1334  source_index--;
1335  }
1336 
1337  std::set<size_t> source_used_pages;
1338  CHECK(destination_index <= source_index);
1339 
1340  // Get the total number of free pages available for compaction
1341  int64_t total_free_pages{0};
1342  for (size_t i = destination_index; i <= source_index; i++) {
1343  total_free_pages += sorted_file_infos[i]->numFreePages();
1344  }
1345 
1346  while (destination_index < source_index) {
1347  if (source_used_pages.empty()) {
1348  // Populate source_used_pages with only used pages in the source file.
1349  auto source_file_info = sorted_file_infos[source_index];
1350  auto& free_pages = source_file_info->freePages;
1351  for (size_t page_num = 0; page_num < source_file_info->numPages; page_num++) {
1352  if (free_pages.find(page_num) == free_pages.end()) {
1353  source_used_pages.emplace(page_num);
1354  }
1355  }
1356 
1357  // Free pages of current source file will not be copy destinations
1358  total_free_pages -= source_file_info->numFreePages();
1359  }
1360 
1361  // Exit early if there are not enough free pages to empty the next file
1362  if (total_free_pages - static_cast<int64_t>(source_used_pages.size()) < 0) {
1363  return;
1364  }
1365 
1366  // Copy pages from source files to destination files
1367  auto dest_file_info = sorted_file_infos[destination_index];
1368  while (!source_used_pages.empty() && !dest_file_info->freePages.empty()) {
1369  // Get next page to copy
1370  size_t source_page_num = *source_used_pages.begin();
1371  source_used_pages.erase(source_page_num);
1372 
1373  Page source_page{sorted_file_infos[source_index]->fileId, source_page_num};
1374  copySourcePageForCompaction(source_page,
1375  sorted_file_infos[destination_index],
1376  page_mappings,
1377  touched_pages);
1378  total_free_pages--;
1379  }
1380 
1381  if (source_used_pages.empty()) {
1382  source_index--;
1383  }
1384 
1385  if (dest_file_info->freePages.empty()) {
1386  destination_index++;
1387  }
1388  }
1389 }
void copySourcePageForCompaction(const Page &source_page, FileInfo *destination_file_info, std::vector< PageMapping > &page_mappings, std::set< Page > &touched_pages)
Definition: FileMgr.cpp:1397
DEVICE void sort(ARGS &&...args)
Definition: gpu_enabled.h:105
PageSizeFileMMap fileIndex_
A map of files accessible via a file identifier.
Definition: FileMgr.h:401
std::map< int32_t, FileInfo * > files_
Definition: FileMgr.h:400
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::syncFilesToDisk ( )
protected

Definition at line 1574 of file FileMgr.cpp.

References CHECK, files_, and files_rw_mutex_.

Referenced by checkpoint().

1574  {
1576  for (auto file_info_entry : files_) {
1577  int32_t status = file_info_entry.second->syncToDisk();
1578  CHECK(status == 0) << "Could not sync file to disk";
1579  }
1580 }
std::shared_lock< T > shared_lock
std::map< int32_t, FileInfo * > files_
Definition: FileMgr.h:400
#define CHECK(condition)
Definition: Logger.h:291
heavyai::shared_mutex files_rw_mutex_
Definition: FileMgr.h:411

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::updateMappedPagesVisibility ( const std::vector< PageMapping > &  page_mappings)
protected

Goes through the given page mapping and marks source/copied from pages as free while marking destination/copied to pages as used (by setting the header size).

Definition at line 1457 of file FileMgr.cpp.

References CHECK_GT, logger::FATAL, files_, and LOG.

Referenced by compactFiles(), and resumeFileCompaction().

1457  {
1458  for (const auto& page_mapping : page_mappings) {
1459  auto destination_file = files_.at(page_mapping.destination_file_id);
1460 
1461  // Set destination page header size
1462  auto header_size = page_mapping.source_page_header_size;
1463  CHECK_GT(header_size, 0);
1464  destination_file->write(
1465  page_mapping.destination_page_num * destination_file->pageSize,
1466  sizeof(PageHeaderSizeType),
1467  reinterpret_cast<int8_t*>(&header_size));
1468  auto source_file = files_.at(page_mapping.source_file_id);
1469 
1470  // Free source page
1471  PageHeaderSizeType free_page_header_size{0};
1472  source_file->write(page_mapping.source_page_num * source_file->pageSize,
1473  sizeof(PageHeaderSizeType),
1474  reinterpret_cast<int8_t*>(&free_page_header_size));
1475  source_file->freePageDeferred(page_mapping.source_page_num);
1476  }
1477 
1478  for (auto file_info_entry : files_) {
1479  int32_t status = file_info_entry.second->syncToDisk();
1480  if (status != 0) {
1481  LOG(FATAL) << "Could not sync file to disk";
1482  }
1483  }
1484 }
#define LOG(tag)
Definition: Logger.h:285
#define CHECK_GT(x, y)
Definition: Logger.h:305
int32_t PageHeaderSizeType
Definition: FileMgr.h:127
std::map< int32_t, FileInfo * > files_
Definition: FileMgr.h:400

+ Here is the caller graph for this function:

bool File_Namespace::FileMgr::updatePageIfDeleted ( FileInfo file_info,
ChunkKey chunk_key,
int32_t  contingent,
int32_t  page_epoch,
int32_t  page_num 
)
virtual

deletes or recovers a page based on last checkpointed epoch.

Reimplemented in File_Namespace::CachingFileMgr.

Definition at line 1615 of file FileMgr.cpp.

References CHUNK_KEY_DB_IDX, CHUNK_KEY_TABLE_IDX, epoch(), File_Namespace::FileInfo::freePageImmediate(), get_fileMgrKey(), File_Namespace::is_page_deleted_with_checkpoint(), File_Namespace::is_page_deleted_without_checkpoint(), and File_Namespace::FileInfo::recoverPage().

Referenced by File_Namespace::FileInfo::openExistingFile().

1619  {
1620  // If the parent FileMgr has a fileMgrKey, then all keys are locked to one table and
1621  // can be set from the manager.
1622  auto [db_id, tb_id] = get_fileMgrKey();
1623  chunk_key[CHUNK_KEY_DB_IDX] = db_id;
1624  chunk_key[CHUNK_KEY_TABLE_IDX] = tb_id;
1625 
1626  auto table_epoch = epoch(db_id, tb_id);
1627 
1628  if (is_page_deleted_with_checkpoint(table_epoch, page_epoch, contingent)) {
1629  file_info->freePageImmediate(page_num);
1630  return true;
1631  }
1632 
1633  // Recover page if it was deleted but not checkpointed.
1634  if (is_page_deleted_without_checkpoint(table_epoch, page_epoch, contingent)) {
1635  file_info->recoverPage(chunk_key, page_num);
1636  }
1637  return false;
1638 }
bool is_page_deleted_without_checkpoint(int32_t table_epoch, int32_t page_epoch, int32_t contingent)
Definition: FileInfo.cpp:289
#define CHUNK_KEY_DB_IDX
Definition: types.h:38
#define CHUNK_KEY_TABLE_IDX
Definition: types.h:39
const TablePair get_fileMgrKey() const
Definition: FileMgr.h:338
bool is_page_deleted_with_checkpoint(int32_t table_epoch, int32_t page_epoch, int32_t contingent)
Definition: FileInfo.cpp:277
int32_t epoch() const
Definition: FileMgr.h:517

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::writeAndSyncEpochToDisk ( )
protected

Definition at line 656 of file FileMgr.cpp.

References Epoch::byte_size(), CHECK, epoch_, epochFile_, epochIsCheckpointed_, heavyai::fsync(), Epoch::storage_ptr(), and File_Namespace::write().

Referenced by checkpoint(), createEpochFile(), migrateEpochFileV0(), and setEpoch().

656  {
657  CHECK(epochFile_);
659  int32_t status = fflush(epochFile_);
660  CHECK(status == 0) << "Could not flush epoch file to disk";
661 #ifdef __APPLE__
662  status = fcntl(fileno(epochFile_), 51);
663 #else
664  status = heavyai::fsync(fileno(epochFile_));
665 #endif
666  CHECK(status == 0) << "Could not sync epoch file to disk";
667  epochIsCheckpointed_ = true;
668 }
int8_t * storage_ptr()
Definition: Epoch.h:61
size_t write(FILE *f, const size_t offset, const size_t size, const int8_t *buf)
Writes the specified number of bytes to the offset position in file f from buf.
Definition: File.cpp:160
static size_t byte_size()
Definition: Epoch.h:63
int fsync(int fd)
Definition: heavyai_fs.cpp:62
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::writeAndSyncVersionToDisk ( const std::string &  versionFileName,
const int32_t  version 
)
protected

Definition at line 1079 of file FileMgr.cpp.

References File_Namespace::close(), File_Namespace::create(), epochFile_, logger::FATAL, fileMgrBasePath_, heavyai::fsync(), logger::INFO, LOG, File_Namespace::open(), readVersionFromDisk(), and File_Namespace::write().

Referenced by createTopLevelMetadata(), init(), migrateEpochFileV0(), migrateLegacyFilesV1(), and migrateToLatestFileMgrVersion().

1080  {
1081  const std::string versionFilePath(fileMgrBasePath_ + "/" + versionFileName);
1082  FILE* versionFile;
1083  if (boost::filesystem::exists(versionFilePath)) {
1084  int32_t oldVersion = readVersionFromDisk(versionFileName);
1085  LOG(INFO) << "Storage version file `" << versionFilePath
1086  << "` already exists, its current version is " << oldVersion;
1087  versionFile = open(versionFilePath);
1088  } else {
1089  versionFile = create(versionFilePath, sizeof(int32_t));
1090  }
1091  write(versionFile, 0, sizeof(int32_t), (int8_t*)&version);
1092  int32_t status = fflush(versionFile);
1093  if (status != 0) {
1094  LOG(FATAL) << "Could not flush version file " << versionFilePath << " to disk";
1095  }
1096 #ifdef __APPLE__
1097  status = fcntl(fileno(epochFile_), 51);
1098 #else
1099  status = heavyai::fsync(fileno(versionFile));
1100 #endif
1101  if (status != 0) {
1102  LOG(FATAL) << "Could not sync version file " << versionFilePath << " to disk";
1103  }
1104  close(versionFile);
1105 }
int32_t readVersionFromDisk(const std::string &versionFileName) const
Definition: FileMgr.cpp:1061
#define LOG(tag)
Definition: Logger.h:285
std::pair< FILE *, std::string > create(const std::string &basePath, const int fileId, const size_t pageSize, const size_t numPages)
Definition: File.cpp:57
std::string fileMgrBasePath_
Definition: FileMgr.h:397
size_t write(FILE *f, const size_t offset, const size_t size, const int8_t *buf)
Writes the specified number of bytes to the offset position in file f from buf.
Definition: File.cpp:160
string version
Definition: setup.in.py:73
FILE * open(int fileId)
Opens/creates the file with the given id; returns NULL on error.
Definition: File.cpp:107
int fsync(int fd)
Definition: heavyai_fs.cpp:62
void close(FILE *f)
Closes the file pointed to by the FILE pointer.
Definition: File.cpp:128

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::writeDirtyBuffers ( )
private

Definition at line 1652 of file FileMgr.cpp.

References chunkIndex_, chunkIndexMutex_, and epoch().

Referenced by checkpoint().

1652  {
1654  for (auto [key, buf] : chunkIndex_) {
1655  if (buf->isDirty()) {
1656  buf->writeMetadata(epoch());
1657  buf->clearDirtyBits();
1658  }
1659  }
1660 }
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:326
std::unique_lock< T > unique_lock
heavyai::shared_mutex chunkIndexMutex_
Definition: FileMgr.h:410
int32_t epoch() const
Definition: FileMgr.h:517

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::writePageMappingsToStatusFile ( const std::vector< PageMapping > &  page_mappings)

Serializes a page mapping vector to expected status file. Page mapping vector is serialized in the following format: [{page mapping vector size}, {page mapping vector data bytes ...}]

Definition at line 1512 of file FileMgr.cpp.

References CHECK, COPY_PAGES_STATUS, and getFilePath().

Referenced by compactFiles().

1513  {
1514  auto file_path = getFilePath(COPY_PAGES_STATUS);
1515  CHECK(boost::filesystem::exists(file_path));
1516  CHECK(boost::filesystem::is_empty(file_path));
1517  std::ofstream status_file{file_path.string(), std::ios::out | std::ios::binary};
1518  int64_t page_mappings_count = page_mappings.size();
1519  status_file.write(reinterpret_cast<const char*>(&page_mappings_count), sizeof(int64_t));
1520  status_file.write(reinterpret_cast<const char*>(page_mappings.data()),
1521  page_mappings_count * sizeof(PageMapping));
1522  status_file.close();
1523 }
boost::filesystem::path getFilePath(const std::string &file_name) const
Definition: FileMgr.cpp:1667
static constexpr char const * COPY_PAGES_STATUS
Definition: FileMgr.h:375
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Friends And Related Function Documentation

friend class GlobalFileMgr
friend

Definition at line 155 of file FileMgr.h.

Member Data Documentation

constexpr char const* File_Namespace::FileMgr::COPY_PAGES_STATUS {"pending_data_compaction_0"}
static
constexpr char File_Namespace::FileMgr::DB_META_FILENAME[] = "dbmeta"
static

Definition at line 388 of file FileMgr.h.

Referenced by createTopLevelMetadata().

int32_t File_Namespace::FileMgr::db_version_
protected

the index of the next file id

Definition at line 404 of file FileMgr.h.

Referenced by createTopLevelMetadata().

FILE* File_Namespace::FileMgr::DBMetaFile_ = nullptr
protected

Definition at line 408 of file FileMgr.h.

Referenced by closePhysicalUnlocked(), and ~FileMgr().

constexpr size_t File_Namespace::FileMgr::DEFAULT_NUM_PAGES_PER_DATA_FILE {256}
static

Definition at line 371 of file FileMgr.h.

constexpr size_t File_Namespace::FileMgr::DEFAULT_NUM_PAGES_PER_METADATA_FILE {4096}
static

Definition at line 372 of file FileMgr.h.

Referenced by File_Namespace::CachingFileMgr::getMinimumSize().

constexpr char const* File_Namespace::FileMgr::DELETE_EMPTY_FILES_STATUS {"pending_data_compaction_2"}
static
Epoch File_Namespace::FileMgr::epoch_
private
constexpr char File_Namespace::FileMgr::EPOCH_FILENAME[] = "epoch_metadata"
static

Definition at line 387 of file FileMgr.h.

Referenced by coreInit(), init(), and migrateEpochFileV0().

FILE* File_Namespace::FileMgr::epochFile_ = nullptr
private
bool File_Namespace::FileMgr::epochIsCheckpointed_ = true
private

Definition at line 527 of file FileMgr.h.

Referenced by writeAndSyncEpochToDisk().

constexpr char File_Namespace::FileMgr::FILE_MGR_VERSION_FILENAME[] = "filemgr_version"
static
PageSizeFileMMap File_Namespace::FileMgr::fileIndex_
protected

A map of files accessible via a file identifier.

Definition at line 401 of file FileMgr.h.

Referenced by clearFileInfos(), createFile(), openExistingFile(), requestFreePage(), requestFreePages(), and sortAndCopyFilePagesForCompaction().

TablePair File_Namespace::FileMgr::fileMgrKey_
private

Global FileMgr.

Definition at line 524 of file FileMgr.h.

Referenced by coreInit(), describeSelf(), and FileMgr().

int32_t File_Namespace::FileMgr::fileMgrVersion_
protected

DB version from dbmeta file, should be compatible with GlobalFileMgr::omnisci_db_version_

Definition at line 406 of file FileMgr.h.

Referenced by init(), and migrateToLatestFileMgrVersion().

std::vector<std::pair<FileInfo*, int32_t> > File_Namespace::FileMgr::free_pages_
protected

Definition at line 414 of file FileMgr.h.

Referenced by free_page(), and freePages().

std::mutex File_Namespace::FileMgr::getPageMutex_
protected

pointer to DB level metadata

Definition at line 409 of file FileMgr.h.

Referenced by requestFreePage(), and requestFreePages().

GlobalFileMgr* File_Namespace::FileMgr::gfm_
private

Definition at line 523 of file FileMgr.h.

Referenced by coreInit(), FileMgr(), getDBConvert(), getDBVersion(), and init().

constexpr int32_t File_Namespace::FileMgr::INVALID_VERSION = -1
static

Definition at line 390 of file FileMgr.h.

Referenced by createTopLevelMetadata(), and migrateToLatestFileMgrVersion().

bool File_Namespace::FileMgr::isFullyInitted_ {false}
protected
const int32_t File_Namespace::FileMgr::latestFileMgrVersion_ {2}
protected

Definition at line 407 of file FileMgr.h.

Referenced by init(), and migrateToLatestFileMgrVersion().

constexpr char File_Namespace::FileMgr::LEGACY_EPOCH_FILENAME[] = "epoch"
static

Definition at line 386 of file FileMgr.h.

Referenced by migrateEpochFileV0().

int32_t File_Namespace::FileMgr::maxRollbackEpochs_
protected

Definition at line 396 of file FileMgr.h.

Referenced by File_Namespace::CachingFileMgr::CachingFileMgr(), and rollOffOldData().

const size_t File_Namespace::FileMgr::metadata_page_size_
protected
heavyai::shared_mutex File_Namespace::FileMgr::mutex_free_page_
mutableprotected

Definition at line 413 of file FileMgr.h.

Referenced by free_page(), and freePages().

unsigned File_Namespace::FileMgr::nextFileId_
protected

number of threads used when loading data

Definition at line 403 of file FileMgr.h.

Referenced by File_Namespace::CachingFileMgr::CachingFileMgr(), createFile(), init(), and File_Namespace::CachingFileMgr::init().

size_t File_Namespace::FileMgr::num_pages_per_data_file_ {DEFAULT_NUM_PAGES_PER_DATA_FILE}
staticprotected
size_t File_Namespace::FileMgr::num_pages_per_metadata_file_ {DEFAULT_NUM_PAGES_PER_METADATA_FILE}
staticprotected
size_t File_Namespace::FileMgr::num_reader_threads_
protected

Maps page sizes to FileInfo objects.

Definition at line 402 of file FileMgr.h.

Referenced by initializeNumThreads().

constexpr char const* File_Namespace::FileMgr::UPDATE_PAGE_VISIBILITY_STATUS {"pending_data_compaction_1"}
static

The documentation for this class was generated from the following files: