OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
File_Namespace::FileMgr Class Reference

#include <FileMgr.h>

+ Inheritance diagram for File_Namespace::FileMgr:
+ Collaboration diagram for File_Namespace::FileMgr:

Public Member Functions

 FileMgr (const int32_t device_id, GlobalFileMgr *gfm, const TablePair file_mgr_key, const int32_t max_rollback_epochs=-1, const size_t num_reader_threads=0, const int32_t epoch=-1)
 Constructor. More...
 
 FileMgr (const int32_t device_id, GlobalFileMgr *gfm, const TablePair file_mgr_key, const bool run_core_init)
 
 FileMgr (GlobalFileMgr *gfm, std::string basePath)
 
 ~FileMgr () override
 Destructor. More...
 
StorageStats getStorageStats () const
 
FileBuffercreateBuffer (const ChunkKey &key, size_t pageSize=0, const size_t numBytes=0) override
 Creates a chunk with the specified key and page size. More...
 
bool isBufferOnDevice (const ChunkKey &key) override
 
void deleteBuffer (const ChunkKey &key, const bool purge=true) override
 Deletes the chunk with the specified key. More...
 
void deleteBuffersWithPrefix (const ChunkKey &keyPrefix, const bool purge=true) override
 
FileBuffergetBuffer (const ChunkKey &key, const size_t numBytes=0) override
 Returns the a pointer to the chunk with the specified key. More...
 
void fetchBuffer (const ChunkKey &key, AbstractBuffer *destBuffer, const size_t numBytes) override
 
FileBufferputBuffer (const ChunkKey &key, AbstractBuffer *d, const size_t numBytes=0) override
 Puts the contents of d into the Chunk with the given key. More...
 
AbstractBufferalloc (const size_t numBytes) override
 
void free (AbstractBuffer *buffer) override
 
virtual Page requestFreePage (size_t pagesize, const bool isMetadata)
 
MgrType getMgrType () override
 
std::string getStringMgrType () override
 
std::string printSlabs () override
 
size_t getMaxSize () override
 
size_t getInUseSize () override
 
size_t getAllocated () override
 
bool isAllocationCapped () override
 
FileInfogetFileInfoForFileId (const int32_t fileId) const
 
FileMetadata getMetadataForFile (const boost::filesystem::directory_iterator &fileIterator) const
 
void copyPage (Page &srcPage, FileMgr *destFileMgr, Page &destPage, const size_t reservedHeaderSize, const size_t numBytes, const size_t offset)
 
void requestFreePages (size_t npages, size_t pagesize, std::vector< Page > &pages, const bool isMetadata)
 Obtains free pages – creates new files if necessary – of the requested size. More...
 
void getChunkMetadataVecForKeyPrefix (ChunkMetadataVector &chunkMetadataVec, const ChunkKey &keyPrefix) override
 
bool hasChunkMetadataForKeyPrefix (const ChunkKey &keyPrefix)
 
void checkpoint () override
 Fsyncs data files, writes out epoch and fsyncs that. More...
 
void checkpoint (const int32_t db_id, const int32_t tb_id) override
 
virtual int32_t epoch (int32_t db_id, int32_t tb_id) const
 Returns current value of epoch - should be one greater than recorded at last checkpoint. Because FileMgr only contains buffers from one table we can just return the FileMgr's epoch instead of finding a table-specific epoch. More...
 
int32_t epochFloor () const
 
int32_t incrementEpoch ()
 
int32_t lastCheckpointedEpoch () const
 Returns value of epoch at last checkpoint. More...
 
void resetEpochFloor ()
 
int32_t maxRollbackEpochs ()
 Returns value max_rollback_epochs. More...
 
size_t getNumReaderThreads ()
 Returns number of threads defined by parameter num-reader-threads which should be used during initial load and consequent read of data. More...
 
FILE * getFileForFileId (const int32_t fileId)
 Returns FILE pointer associated with requested fileId. More...
 
size_t getNumChunks () override
 
size_t getNumUsedMetadataPagesForChunkKey (const ChunkKey &chunkKey) const
 
bool getDBConvert () const
 Index for looking up chunks. More...
 
void createOrMigrateTopLevelMetadata ()
 
std::string getFileMgrBasePath () const
 
virtual void closeRemovePhysical ()
 
void removeTableRelatedDS (const int32_t db_id, const int32_t table_id) override
 
virtual void free_page (std::pair< FileInfo *, int32_t > &&page)
 
virtual bool hasFileMgrKey () const
 
const TablePair get_fileMgrKey () const
 
boost::filesystem::path getFilePath (const std::string &file_name) const
 
void writePageMappingsToStatusFile (const std::vector< PageMapping > &page_mappings)
 
void renameCompactionStatusFile (const char *const from_status, const char *const to_status)
 
void compactFiles ()
 
virtual bool updatePageIfDeleted (FileInfo *file_info, ChunkKey &chunk_key, int32_t contingent, int32_t page_epoch, int32_t page_num)
 deletes or recovers a page based on last checkpointed epoch. More...
 
virtual bool failOnReadError () const
 True if a read error should cause a fatal error. More...
 
size_t getPageSize () const
 
size_t getMetadataPageSize () const
 
virtual std::string describeSelf () const
 
FILE * createFile (const std::string &full_path, const size_t requested_file_size) const
 
std::pair< FILE *, std::string > createFile (const std::string &base_path, const int file_id, const size_t page_size, const size_t num_pages) const
 
size_t writeFile (FILE *f, const size_t offset, const size_t size, const int8_t *buf) const
 

Static Public Member Functions

static void setNumPagesPerDataFile (size_t num_pages)
 
static void setNumPagesPerMetadataFile (size_t num_pages)
 
static void renameAndSymlinkLegacyFiles (const std::string &table_data_dir)
 

Public Attributes

ChunkKeyToChunkMap chunkIndex_
 

Static Public Attributes

static constexpr size_t DEFAULT_NUM_PAGES_PER_DATA_FILE {256}
 
static constexpr size_t DEFAULT_NUM_PAGES_PER_METADATA_FILE {4096}
 
static constexpr char const * COPY_PAGES_STATUS {"pending_data_compaction_0"}
 
static constexpr char const * UPDATE_PAGE_VISIBILITY_STATUS {"pending_data_compaction_1"}
 
static constexpr char const * DELETE_EMPTY_FILES_STATUS {"pending_data_compaction_2"}
 
static constexpr char LEGACY_EPOCH_FILENAME [] = "epoch"
 
static constexpr char EPOCH_FILENAME [] = "epoch_metadata"
 
static constexpr char DB_META_FILENAME [] = "dbmeta"
 
static constexpr char FILE_MGR_VERSION_FILENAME [] = "filemgr_version"
 
static constexpr int32_t INVALID_VERSION = -1
 
static constexpr int32_t LATEST_FILE_MGR_VERSION = 2
 

Protected Member Functions

 FileMgr (const size_t defaultPageSize, const size_t defaultMetadataPageSize)
 
FileInfocreateFileInfo (const size_t pageSize, const size_t numPages)
 Adds a file to the file manager repository. More...
 
FileInfoopenExistingFile (const std::string &path, const int32_t fileId, const size_t pageSize, const size_t numPages, std::vector< HeaderInfo > &headerVec)
 
void createEpochFile (const std::string &epochFileName)
 
int32_t openAndReadLegacyEpochFile (const std::string &epochFileName)
 
void openAndReadEpochFile (const std::string &epochFileName)
 
void writeAndSyncEpochToDisk ()
 
void setEpoch (const int32_t newEpoch)
 
int32_t readVersionFromDisk (const std::string &versionFileName) const
 
void writeAndSyncVersionToDisk (const std::string &versionFileName, const int32_t version)
 
void processFileFutures (std::vector< std::future< std::vector< HeaderInfo >>> &file_futures, std::vector< HeaderInfo > &headerVec)
 
virtual FileBuffercreateBufferUnlocked (const ChunkKey &key, size_t pageSize=0, const size_t numBytes=0)
 
virtual FileBuffercreateBufferFromHeaders (const ChunkKey &key, const std::vector< HeaderInfo >::const_iterator &headerStartIt, const std::vector< HeaderInfo >::const_iterator &headerEndIt)
 
void migrateToLatestFileMgrVersion ()
 
void migrateEpochFileV0 ()
 
void migrateLegacyFilesV1 ()
 
OpenFilesResult openFiles ()
 
void clearFileInfos ()
 
void copySourcePageForCompaction (const Page &source_page, FileInfo *destination_file_info, std::vector< PageMapping > &page_mappings, std::set< Page > &touched_pages)
 
int32_t copyPageWithoutHeaderSize (const Page &source_page, const Page &destination_page)
 
void sortAndCopyFilePagesForCompaction (size_t page_size, std::vector< PageMapping > &page_mappings, std::set< Page > &touched_pages)
 
void updateMappedPagesVisibility (const std::vector< PageMapping > &page_mappings)
 
void deleteEmptyFiles ()
 
void resumeFileCompaction (const std::string &status_file_name)
 
std::vector< PageMappingreadPageMappingsFromStatusFile ()
 
 FileMgr (const int epoch)
 
void closePhysicalUnlocked ()
 
void syncFilesToDisk ()
 
void freePages ()
 
void initializeNumThreads (size_t num_reader_threads=0)
 
virtual FileBufferallocateBuffer (const size_t page_size, const ChunkKey &key, const size_t num_bytes=0)
 
virtual FileBufferallocateBuffer (const ChunkKey &key, const std::vector< HeaderInfo >::const_iterator &headerStartIt, const std::vector< HeaderInfo >::const_iterator &headerEndIt)
 
virtual
ChunkKeyToChunkMap::iterator 
deleteBufferUnlocked (const ChunkKeyToChunkMap::iterator chunk_it, const bool purge=true)
 
virtual FileBuffergetBufferUnlocked (const ChunkKey &key, const size_t numBytes=0) const
 

Protected Attributes

int32_t maxRollbackEpochs_
 
std::string fileMgrBasePath_
 
std::map< int32_t,
std::unique_ptr< FileInfo > > 
files_
 
PageSizeFileMMap fileIndex_
 
size_t num_reader_threads_
 Maps page sizes to FileInfo objects. More...
 
unsigned nextFileId_
 number of threads used when loading data More...
 
int32_t fileMgrVersion_
 the index of the next file id More...
 
FILE * DBMetaFile_ = nullptr
 
std::mutex getPageMutex_
 pointer to DB level metadata More...
 
heavyai::shared_mutex chunkIndexMutex_
 
heavyai::shared_mutex files_rw_mutex_
 
heavyai::shared_mutex mutex_free_page_
 
std::vector< std::pair
< FileInfo *, int32_t > > 
free_pages_
 
bool isFullyInitted_ {false}
 
const size_t page_size_
 
const size_t metadata_page_size_
 

Static Protected Attributes

static size_t num_pages_per_data_file_ {DEFAULT_NUM_PAGES_PER_DATA_FILE}
 
static size_t num_pages_per_metadata_file_ {DEFAULT_NUM_PAGES_PER_METADATA_FILE}
 

Private Member Functions

void init (const size_t num_reader_threads, const int32_t epochOverride)
 
void init (const std::string &dataPathToConvertFrom, const int32_t epochOverride)
 
void rollOffOldData (const int32_t epochCeiling, const bool shouldCheckpoint)
 
void freePagesBeforeEpoch (const int32_t min_epoch)
 
void freePagesBeforeEpochUnlocked (const int32_t min_epoch, const ChunkKeyToChunkMap::iterator lower_bound, const ChunkKeyToChunkMap::iterator upper_bound)
 
FileBuffergetOrCreateBuffer (const ChunkKey &key)
 
bool coreInit ()
 Determines file path, and if exists, runs file migration and opens and reads epoch file. More...
 
int32_t epoch () const
 
void writeDirtyBuffers ()
 
void setDataAndMetadataFileStats (StorageStats &storage_stats) const
 
uint32_t getFragmentCount () const
 
virtual void readOnlyCheck (const std::string &action, const std::optional< std::string > &file_name={}) const
 

Private Attributes

GlobalFileMgrgfm_
 
TablePair fileMgrKey_
 Global FileMgr. More...
 
Epoch epoch_
 
bool epochIsCheckpointed_ = true
 
FILE * epochFile_ = nullptr
 

Friends

class GlobalFileMgr
 

Detailed Description

Definition at line 161 of file FileMgr.h.

Constructor & Destructor Documentation

File_Namespace::FileMgr::FileMgr ( const int32_t  device_id,
GlobalFileMgr gfm,
const TablePair  file_mgr_key,
const int32_t  max_rollback_epochs = -1,
const size_t  num_reader_threads = 0,
const int32_t  epoch = -1 
)

Constructor.

Definition at line 51 of file FileMgr.cpp.

References init().

57  : AbstractBufferMgr(device_id)
58  , maxRollbackEpochs_(max_rollback_epochs)
59  , nextFileId_(0)
60  , gfm_(gfm)
61  , fileMgrKey_(file_mgr_key)
62  , page_size_(gfm->getPageSize())
63  , metadata_page_size_(gfm->getMetadataPageSize()) {
64  init(num_reader_threads, epoch);
65 }
const size_t metadata_page_size_
Definition: FileMgr.h:552
TablePair fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:540
const size_t page_size_
Definition: FileMgr.h:551
void init(const size_t num_reader_threads, const int32_t epochOverride)
Definition: FileMgr.cpp:252
GlobalFileMgr * gfm_
Definition: FileMgr.h:539
int32_t maxRollbackEpochs_
Definition: FileMgr.h:410
int32_t epoch() const
Definition: FileMgr.h:530
unsigned nextFileId_
number of threads used when loading data
Definition: FileMgr.h:416

+ Here is the call graph for this function:

File_Namespace::FileMgr::FileMgr ( const int32_t  device_id,
GlobalFileMgr gfm,
const TablePair  file_mgr_key,
const bool  run_core_init 
)

Definition at line 68 of file FileMgr.cpp.

References coreInit(), epochFile_, fileMgrBasePath_, fileMgrKey_, files_, File_Namespace::GlobalFileMgr::getBasePath(), gfm_, and to_string().

72  : AbstractBufferMgr(device_id)
73  , maxRollbackEpochs_(-1)
74  , nextFileId_(0)
75  , gfm_(gfm)
76  , fileMgrKey_(file_mgr_key)
77  , page_size_(gfm->getPageSize())
78  , metadata_page_size_(gfm->getMetadataPageSize()) {
79  // TODO(Misiu): Standardize prefix and delim.
80  const std::string fileMgrDirPrefix("table");
81  const std::string fileMgrDirDelim("_");
82  fileMgrBasePath_ = (gfm_->getBasePath() + fileMgrDirPrefix + fileMgrDirDelim +
83  std::to_string(fileMgrKey_.first) + // db_id
84  fileMgrDirDelim + std::to_string(fileMgrKey_.second)); // tb_id
85  epochFile_ = nullptr;
86  files_.clear();
87  if (run_core_init) {
88  coreInit();
89  }
90 }
const size_t metadata_page_size_
Definition: FileMgr.h:552
TablePair fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:540
std::string getBasePath() const
const size_t page_size_
Definition: FileMgr.h:551
GlobalFileMgr * gfm_
Definition: FileMgr.h:539
std::string fileMgrBasePath_
Definition: FileMgr.h:411
std::string to_string(char const *&&v)
int32_t maxRollbackEpochs_
Definition: FileMgr.h:410
bool coreInit()
Determines file path, and if exists, runs file migration and opens and reads epoch file...
Definition: FileMgr.cpp:137
std::map< int32_t, std::unique_ptr< FileInfo > > files_
Definition: FileMgr.h:413
unsigned nextFileId_
number of threads used when loading data
Definition: FileMgr.h:416

+ Here is the call graph for this function:

File_Namespace::FileMgr::FileMgr ( GlobalFileMgr gfm,
std::string  basePath 
)

Definition at line 92 of file FileMgr.cpp.

References init().

93  : AbstractBufferMgr(0)
94  , maxRollbackEpochs_(-1)
95  , fileMgrBasePath_(base_path)
96  , nextFileId_(0)
97  , gfm_(gfm)
98  , fileMgrKey_(0, 0)
99  , page_size_(gfm->getPageSize())
100  , metadata_page_size_(gfm->getMetadataPageSize()) {
101  init(base_path, -1);
102 }
const size_t metadata_page_size_
Definition: FileMgr.h:552
TablePair fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:540
const size_t page_size_
Definition: FileMgr.h:551
void init(const size_t num_reader_threads, const int32_t epochOverride)
Definition: FileMgr.cpp:252
GlobalFileMgr * gfm_
Definition: FileMgr.h:539
std::string fileMgrBasePath_
Definition: FileMgr.h:411
int32_t maxRollbackEpochs_
Definition: FileMgr.h:410
unsigned nextFileId_
number of threads used when loading data
Definition: FileMgr.h:416

+ Here is the call graph for this function:

File_Namespace::FileMgr::~FileMgr ( )
override

Destructor.

Definition at line 118 of file FileMgr.cpp.

References chunkIndex_, File_Namespace::close(), DBMetaFile_, epochFile_, and files_.

118  {
119  // free memory used by FileInfo objects
120  for (auto chunkIt = chunkIndex_.begin(); chunkIt != chunkIndex_.end(); ++chunkIt) {
121  delete chunkIt->second;
122  }
123 
124  files_.clear();
125 
126  if (epochFile_) {
127  close(epochFile_);
128  epochFile_ = nullptr;
129  }
130 
131  if (DBMetaFile_) {
133  DBMetaFile_ = nullptr;
134  }
135 }
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:330
void close(FILE *f)
Closes the file pointed to by the FILE pointer.
Definition: File.cpp:114
std::map< int32_t, std::unique_ptr< FileInfo > > files_
Definition: FileMgr.h:413

+ Here is the call graph for this function:

File_Namespace::FileMgr::FileMgr ( const size_t  defaultPageSize,
const size_t  defaultMetadataPageSize 
)
protected

Definition at line 113 of file FileMgr.cpp.

114  : AbstractBufferMgr(0)
115  , page_size_(page_size)
116  , metadata_page_size_(metadata_page_size) {}
const size_t metadata_page_size_
Definition: FileMgr.h:552
const size_t page_size_
Definition: FileMgr.h:551
File_Namespace::FileMgr::FileMgr ( const int  epoch)
protected

Definition at line 105 of file FileMgr.cpp.

References Epoch::ceiling(), and epoch_.

106  : AbstractBufferMgr(-1)
110 }
const size_t metadata_page_size_
Definition: FileMgr.h:552
const size_t page_size_
Definition: FileMgr.h:551
#define DEFAULT_METADATA_PAGE_SIZE
int32_t ceiling() const
Definition: Epoch.h:44
#define DEFAULT_PAGE_SIZE
int32_t epoch() const
Definition: FileMgr.h:530

+ Here is the call graph for this function:

Member Function Documentation

AbstractBuffer * File_Namespace::FileMgr::alloc ( const size_t  numBytes = 0)
override

Definition at line 868 of file FileMgr.cpp.

References logger::FATAL, and LOG.

868  {
869  LOG(FATAL) << "Operation not supported";
870  return nullptr; // satisfy return-type warning
871 }
#define LOG(tag)
Definition: Logger.h:285
FileBuffer * File_Namespace::FileMgr::allocateBuffer ( const size_t  page_size,
const ChunkKey key,
const size_t  num_bytes = 0 
)
protectedvirtual

Reimplemented in File_Namespace::CachingFileMgr.

Definition at line 1623 of file FileMgr.cpp.

Referenced by createBufferFromHeaders(), and createBufferUnlocked().

1625  {
1626  return new FileBuffer(this, page_size, key, num_bytes);
1627 }

+ Here is the caller graph for this function:

FileBuffer * File_Namespace::FileMgr::allocateBuffer ( const ChunkKey key,
const std::vector< HeaderInfo >::const_iterator &  headerStartIt,
const std::vector< HeaderInfo >::const_iterator &  headerEndIt 
)
protectedvirtual

Reimplemented in File_Namespace::CachingFileMgr.

Definition at line 1629 of file FileMgr.cpp.

1632  {
1633  return new FileBuffer(this, key, headerStartIt, headerEndIt);
1634 }
void File_Namespace::FileMgr::checkpoint ( )
override

Fsyncs data files, writes out epoch and fsyncs that.

Definition at line 706 of file FileMgr.cpp.

References describeSelf(), epoch(), freePages(), incrementEpoch(), rollOffOldData(), syncFilesToDisk(), VLOG, writeAndSyncEpochToDisk(), and writeDirtyBuffers().

Referenced by rollOffOldData().

706  {
707  VLOG(2) << "Checkpointing " << describeSelf() << " epoch: " << epoch();
709  rollOffOldData(epoch(), false /* shouldCheckpoint */);
710  syncFilesToDisk();
712  incrementEpoch();
713  freePages();
714 }
void rollOffOldData(const int32_t epochCeiling, const bool shouldCheckpoint)
Definition: FileMgr.cpp:687
int32_t incrementEpoch()
Definition: FileMgr.h:285
void writeAndSyncEpochToDisk()
Definition: FileMgr.cpp:659
virtual std::string describeSelf() const
Definition: FileMgr.cpp:700
int32_t epoch() const
Definition: FileMgr.h:530
#define VLOG(n)
Definition: Logger.h:388

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::checkpoint ( const int32_t  db_id,
const int32_t  tb_id 
)
inlineoverride

Definition at line 272 of file FileMgr.h.

References logger::FATAL, and LOG.

272  {
273  LOG(FATAL) << "Operation not supported, api checkpoint() should be used instead";
274  }
#define LOG(tag)
Definition: Logger.h:285
void File_Namespace::FileMgr::clearFileInfos ( )
protected

Definition at line 247 of file FileMgr.cpp.

References fileIndex_, and files_.

Referenced by init().

247  {
248  files_.clear();
249  fileIndex_.clear();
250 }
PageSizeFileMMap fileIndex_
Definition: FileMgr.h:414
std::map< int32_t, std::unique_ptr< FileInfo > > files_
Definition: FileMgr.h:413

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::closePhysicalUnlocked ( )
protected

Definition at line 556 of file FileMgr.cpp.

References File_Namespace::close(), DBMetaFile_, epochFile_, and files_.

Referenced by File_Namespace::CachingFileMgr::closeRemovePhysical(), and closeRemovePhysical().

556  {
557  for (const auto& [idx, file_info] : files_) {
558  if (file_info->f) {
559  close(file_info->f);
560  file_info->f = nullptr;
561  }
562  }
563 
564  if (DBMetaFile_) {
566  DBMetaFile_ = nullptr;
567  }
568 
569  if (epochFile_) {
570  close(epochFile_);
571  epochFile_ = nullptr;
572  }
573 }
void close(FILE *f)
Closes the file pointed to by the FILE pointer.
Definition: File.cpp:114
std::map< int32_t, std::unique_ptr< FileInfo > > files_
Definition: FileMgr.h:413

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::closeRemovePhysical ( )
virtual

Reimplemented in File_Namespace::CachingFileMgr.

Definition at line 575 of file FileMgr.cpp.

References closePhysicalUnlocked(), files_rw_mutex_, getFileMgrBasePath(), readOnlyCheck(), and File_Namespace::renameForDelete().

575  {
577  auto path = getFileMgrBasePath();
578  readOnlyCheck("rename file", path);
579 
581  /* rename for later deletion the directory containing table related data */
583 }
std::string getFileMgrBasePath() const
Definition: FileMgr.h:334
std::unique_lock< T > unique_lock
virtual void readOnlyCheck(const std::string &action, const std::optional< std::string > &file_name={}) const
Definition: FileMgr.cpp:1723
heavyai::shared_mutex files_rw_mutex_
Definition: FileMgr.h:421
void renameForDelete(const std::string directoryName)
Renames a directory to DELETE_ME_&lt;EPOCH&gt;_&lt;oldname&gt;.
Definition: File.cpp:210

+ Here is the call graph for this function:

void File_Namespace::FileMgr::compactFiles ( )

Compacts metadata and data file pages and deletes resulting empty files (if any exists). Compaction occurs in 3 idempotent phases in order to enable graceful recovery if a crash/process interruption occurs in the middle data compaction.

Phase 1: Create a status file that indicates initiation of this phase. Sort metadata/data files in order of files with the lowest number of free pages to those with the highest number of free pages. Copy over used pages from files at the end of the sorted order (files with the highest number of free pages) to those at the beginning of the sorted order (files with the lowest number of free pages). Keep destination/copied to pages as free while copying. Keep track of copied source to destination page mapping. Write page mapping to the status file (to be used during crash recovery if needed).

Phase 2: Rename status file to a file name that indicates initiation of this phase. Go through page mapping and mark source/copied from pages as free while making the destination/copied to pages as used.

Phase 3: Rename status file to a file name that indicates initiation of this phase. Delete all empty files (files containing only free pages). Delete status file.

Definition at line 1276 of file FileMgr.cpp.

References CHECK, COPY_PAGES_STATUS, DELETE_EMPTY_FILES_STATUS, deleteEmptyFiles(), files_, files_rw_mutex_, getFilePath(), readOnlyCheck(), renameCompactionStatusFile(), sortAndCopyFilePagesForCompaction(), UPDATE_PAGE_VISIBILITY_STATUS, updateMappedPagesVisibility(), and writePageMappingsToStatusFile().

Referenced by resumeFileCompaction().

1276  {
1278 
1279  readOnlyCheck("run file compaction");
1280 
1281  if (files_.empty()) {
1282  return;
1283  }
1284 
1285  auto copy_pages_status_file_path = getFilePath(COPY_PAGES_STATUS);
1286  CHECK(!boost::filesystem::exists(copy_pages_status_file_path));
1287  std::ofstream status_file(copy_pages_status_file_path.string(),
1288  std::ios::out | std::ios::binary);
1289  status_file.close();
1290 
1291  std::vector<PageMapping> page_mappings;
1292  std::set<Page> touched_pages;
1293  std::set<size_t> page_sizes;
1294  for (const auto& [file_id, file_info] : files_) {
1295  page_sizes.emplace(file_info->pageSize);
1296  }
1297  for (auto page_size : page_sizes) {
1298  sortAndCopyFilePagesForCompaction(page_size, page_mappings, touched_pages);
1299  }
1300 
1301  writePageMappingsToStatusFile(page_mappings);
1303 
1304  updateMappedPagesVisibility(page_mappings);
1306 
1307  deleteEmptyFiles();
1308 }
void sortAndCopyFilePagesForCompaction(size_t page_size, std::vector< PageMapping > &page_mappings, std::set< Page > &touched_pages)
Definition: FileMgr.cpp:1316
static constexpr char const * UPDATE_PAGE_VISIBILITY_STATUS
Definition: FileMgr.h:389
void writePageMappingsToStatusFile(const std::vector< PageMapping > &page_mappings)
Definition: FileMgr.cpp:1528
std::unique_lock< T > unique_lock
boost::filesystem::path getFilePath(const std::string &file_name) const
Definition: FileMgr.cpp:1697
virtual void readOnlyCheck(const std::string &action, const std::optional< std::string > &file_name={}) const
Definition: FileMgr.cpp:1723
static constexpr char const * DELETE_EMPTY_FILES_STATUS
Definition: FileMgr.h:390
static constexpr char const * COPY_PAGES_STATUS
Definition: FileMgr.h:388
void updateMappedPagesVisibility(const std::vector< PageMapping > &page_mappings)
Definition: FileMgr.cpp:1471
#define CHECK(condition)
Definition: Logger.h:291
heavyai::shared_mutex files_rw_mutex_
Definition: FileMgr.h:421
std::map< int32_t, std::unique_ptr< FileInfo > > files_
Definition: FileMgr.h:413
void renameCompactionStatusFile(const char *const from_status, const char *const to_status)
Definition: FileMgr.cpp:1574

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::copyPage ( Page srcPage,
FileMgr destFileMgr,
Page destPage,
const size_t  reservedHeaderSize,
const size_t  numBytes,
const size_t  offset 
)

Definition at line 586 of file FileMgr.cpp.

References CHECK, checked_malloc(), File_Namespace::Page::fileId, free(), getFileInfoForFileId(), page_size_, File_Namespace::Page::pageNum, File_Namespace::FileInfo::read(), and File_Namespace::FileInfo::write().

Referenced by init().

591  {
592  CHECK(offset + numBytes <= page_size_);
593  FileInfo* srcFileInfo = getFileInfoForFileId(srcPage.fileId);
594  FileInfo* destFileInfo = destFileMgr->getFileInfoForFileId(destPage.fileId);
595  int8_t* buffer = reinterpret_cast<int8_t*>(checked_malloc(numBytes));
596  ScopeGuard guard = [&buffer] { ::free(buffer); };
597 
598  size_t bytesRead = srcFileInfo->read(
599  srcPage.pageNum * page_size_ + offset + reservedHeaderSize, numBytes, buffer);
600  CHECK(bytesRead == numBytes);
601  size_t bytesWritten = destFileInfo->write(
602  destPage.pageNum * page_size_ + offset + reservedHeaderSize, numBytes, buffer);
603  CHECK(bytesWritten == numBytes);
604 }
const size_t page_size_
Definition: FileMgr.h:551
void free(AbstractBuffer *buffer) override
Definition: FileMgr.cpp:873
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:45
#define CHECK(condition)
Definition: Logger.h:291
FileInfo * getFileInfoForFileId(const int32_t fileId) const
Definition: FileMgr.h:229

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int32_t File_Namespace::FileMgr::copyPageWithoutHeaderSize ( const Page source_page,
const Page destination_page 
)
protected

Copies content of source_page to destination_page without copying over the source_page header size. The header size is instead returned by the method. Not copying over the header size enables a use case where destination_page has all the content of the source_page but is still marked as a free page.

Definition at line 1441 of file FileMgr.cpp.

References CHECK, CHECK_EQ, File_Namespace::Page::fileId, File_Namespace::FileInfo::fileId, getFileInfoForFileId(), File_Namespace::Page::pageNum, File_Namespace::FileInfo::pageSize, File_Namespace::FileInfo::read(), and File_Namespace::FileInfo::write().

Referenced by copySourcePageForCompaction().

1442  {
1443  FileInfo* source_file_info = getFileInfoForFileId(source_page.fileId);
1444  CHECK(source_file_info);
1445  CHECK_EQ(source_file_info->fileId, source_page.fileId);
1446 
1447  FileInfo* destination_file_info = getFileInfoForFileId(destination_page.fileId);
1448  CHECK(destination_file_info);
1449  CHECK_EQ(destination_file_info->fileId, destination_page.fileId);
1450  CHECK_EQ(source_file_info->pageSize, destination_file_info->pageSize);
1451 
1452  auto page_size = source_file_info->pageSize;
1453  auto buffer = std::make_unique<int8_t[]>(page_size);
1454  size_t bytes_read =
1455  source_file_info->read(source_page.pageNum * page_size, page_size, buffer.get());
1456  CHECK_EQ(page_size, bytes_read);
1457 
1458  auto header_size_offset = sizeof(int32_t);
1459  size_t bytes_written = destination_file_info->write(
1460  (destination_page.pageNum * page_size) + header_size_offset,
1461  page_size - header_size_offset,
1462  buffer.get() + header_size_offset);
1463  CHECK_EQ(page_size - header_size_offset, bytes_written);
1464  return reinterpret_cast<int32_t*>(buffer.get())[0];
1465 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
#define CHECK(condition)
Definition: Logger.h:291
FileInfo * getFileInfoForFileId(const int32_t fileId) const
Definition: FileMgr.h:229

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::copySourcePageForCompaction ( const Page source_page,
FileInfo destination_file_info,
std::vector< PageMapping > &  page_mappings,
std::set< Page > &  touched_pages 
)
protected

Copies a used page (indicated by the top of the source_used_pages set) from the given source file to a free page in the given destination file. Source and destination pages are recorded in the given page_mappings vector after copying is done.

Definition at line 1411 of file FileMgr.cpp.

References CHECK, CHECK_NE, copyPageWithoutHeaderSize(), File_Namespace::Page::fileId, File_Namespace::FileInfo::fileId, File_Namespace::FileInfo::getFreePage(), and File_Namespace::Page::pageNum.

Referenced by sortAndCopyFilePagesForCompaction().

1414  {
1415  size_t destination_page_num = destination_file_info->getFreePage();
1416  CHECK_NE(destination_page_num, static_cast<size_t>(-1));
1417  Page destination_page{destination_file_info->fileId, destination_page_num};
1418 
1419  // Assert that the same pages are not copied or overridden multiple times
1420  CHECK(touched_pages.find(source_page) == touched_pages.end());
1421  touched_pages.emplace(source_page);
1422 
1423  CHECK(touched_pages.find(destination_page) == touched_pages.end());
1424  touched_pages.emplace(destination_page);
1425 
1426  auto header_size = copyPageWithoutHeaderSize(source_page, destination_page);
1427  page_mappings.emplace_back(static_cast<size_t>(source_page.fileId),
1428  source_page.pageNum,
1429  header_size,
1430  static_cast<size_t>(destination_page.fileId),
1431  destination_page.pageNum);
1432 }
int32_t copyPageWithoutHeaderSize(const Page &source_page, const Page &destination_page)
Definition: FileMgr.cpp:1441
#define CHECK_NE(x, y)
Definition: Logger.h:302
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool File_Namespace::FileMgr::coreInit ( )
private

Determines file path, and if exists, runs file migration and opens and reads epoch file.

Returns
a boolean representing whether the directory path existed

Definition at line 137 of file FileMgr.cpp.

References EPOCH_FILENAME, logger::FATAL, fileMgrBasePath_, fileMgrKey_, files_rw_mutex_, File_Namespace::GlobalFileMgr::getBasePath(), gfm_, LOG, migrateToLatestFileMgrVersion(), openAndReadEpochFile(), and to_string().

Referenced by FileMgr(), and init().

137  {
139  const std::string fileMgrDirPrefix("table");
140  const std::string FileMgrDirDelim("_");
141  fileMgrBasePath_ = (gfm_->getBasePath() + fileMgrDirPrefix + FileMgrDirDelim +
142  std::to_string(fileMgrKey_.first) + // db_id
143  FileMgrDirDelim + std::to_string(fileMgrKey_.second)); // tb_id
144  boost::filesystem::path path(fileMgrBasePath_);
145  if (boost::filesystem::exists(path)) {
146  if (!boost::filesystem::is_directory(path)) {
147  LOG(FATAL) << "Specified path '" << fileMgrBasePath_
148  << "' for table data is not a directory.";
149  }
152  return true;
153  }
154  return false;
155 }
TablePair fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:540
std::string getBasePath() const
#define LOG(tag)
Definition: Logger.h:285
void migrateToLatestFileMgrVersion()
Definition: FileMgr.cpp:1157
GlobalFileMgr * gfm_
Definition: FileMgr.h:539
std::string fileMgrBasePath_
Definition: FileMgr.h:411
std::string to_string(char const *&&v)
std::unique_lock< T > unique_lock
static constexpr char EPOCH_FILENAME[]
Definition: FileMgr.h:400
void openAndReadEpochFile(const std::string &epochFileName)
Definition: FileMgr.cpp:639
heavyai::shared_mutex files_rw_mutex_
Definition: FileMgr.h:421

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

FileBuffer * File_Namespace::FileMgr::createBuffer ( const ChunkKey key,
size_t  pageSize = 0,
const size_t  numBytes = 0 
)
override

Creates a chunk with the specified key and page size.

Definition at line 716 of file FileMgr.cpp.

References CHECK, chunkIndex_, chunkIndexMutex_, createBufferUnlocked(), and show_chunk().

Referenced by init().

718  {
720  CHECK(chunkIndex_.find(key) == chunkIndex_.end())
721  << "Chunk already exists: " + show_chunk(key);
722  return createBufferUnlocked(key, page_size, num_bytes);
723 }
std::string show_chunk(const ChunkKey &key)
Definition: types.h:98
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:330
std::unique_lock< T > unique_lock
heavyai::shared_mutex chunkIndexMutex_
Definition: FileMgr.h:420
virtual FileBuffer * createBufferUnlocked(const ChunkKey &key, size_t pageSize=0, const size_t numBytes=0)
Definition: FileMgr.cpp:726
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

FileBuffer * File_Namespace::FileMgr::createBufferFromHeaders ( const ChunkKey key,
const std::vector< HeaderInfo >::const_iterator &  headerStartIt,
const std::vector< HeaderInfo >::const_iterator &  headerEndIt 
)
protectedvirtual

Reimplemented in File_Namespace::CachingFileMgr.

Definition at line 737 of file FileMgr.cpp.

References allocateBuffer(), CHECK, chunkIndex_, chunkIndexMutex_, and show_chunk().

Referenced by init().

740  {
742  CHECK(chunkIndex_.find(key) == chunkIndex_.end())
743  << "Chunk already exists for key: " << show_chunk(key);
744  chunkIndex_[key] = allocateBuffer(key, headerStartIt, headerEndIt);
745  return (chunkIndex_[key]);
746 }
virtual FileBuffer * allocateBuffer(const size_t page_size, const ChunkKey &key, const size_t num_bytes=0)
Definition: FileMgr.cpp:1623
std::string show_chunk(const ChunkKey &key)
Definition: types.h:98
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:330
std::unique_lock< T > unique_lock
heavyai::shared_mutex chunkIndexMutex_
Definition: FileMgr.h:420
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

FileBuffer * File_Namespace::FileMgr::createBufferUnlocked ( const ChunkKey key,
size_t  pageSize = 0,
const size_t  numBytes = 0 
)
protectedvirtual

Reimplemented in File_Namespace::CachingFileMgr.

Definition at line 726 of file FileMgr.cpp.

References allocateBuffer(), chunkIndex_, and page_size_.

Referenced by createBuffer(), and getOrCreateBuffer().

728  {
729  size_t actual_page_size = page_size;
730  if (actual_page_size == 0) {
731  actual_page_size = page_size_;
732  }
733  chunkIndex_[key] = allocateBuffer(actual_page_size, key, num_bytes);
734  return (chunkIndex_[key]);
735 }
const size_t page_size_
Definition: FileMgr.h:551
virtual FileBuffer * allocateBuffer(const size_t page_size, const ChunkKey &key, const size_t num_bytes=0)
Definition: FileMgr.cpp:1623
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:330

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::createEpochFile ( const std::string &  epochFileName)
protected

Definition at line 606 of file FileMgr.cpp.

References Epoch::byte_size(), File_Namespace::create(), epochFile_, logger::FATAL, fileMgrBasePath_, LOG, readOnlyCheck(), and writeAndSyncEpochToDisk().

Referenced by init(), and migrateEpochFileV0().

606  {
607  std::string epochFilePath(fileMgrBasePath_ + "/" + epochFileName);
608  readOnlyCheck("create file", epochFilePath);
609  if (boost::filesystem::exists(epochFilePath)) {
610  LOG(FATAL) << "Epoch file '" << epochFilePath << "' already exists";
611  }
612  epochFile_ = create(epochFilePath, sizeof(Epoch::byte_size()));
613  // Write out current epoch to file - which if this
614  // function is being called should be 0
616 }
#define LOG(tag)
Definition: Logger.h:285
std::pair< FILE *, std::string > create(const std::string &basePath, const int fileId, const size_t pageSize, const size_t numPages)
Definition: File.cpp:55
std::string fileMgrBasePath_
Definition: FileMgr.h:411
void writeAndSyncEpochToDisk()
Definition: FileMgr.cpp:659
virtual void readOnlyCheck(const std::string &action, const std::optional< std::string > &file_name={}) const
Definition: FileMgr.cpp:1723
static size_t byte_size()
Definition: Epoch.h:63

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

FILE * File_Namespace::FileMgr::createFile ( const std::string &  full_path,
const size_t  requested_file_size 
) const

Definition at line 1701 of file FileMgr.cpp.

References File_Namespace::create(), and readOnlyCheck().

1702  {
1703  readOnlyCheck("create file", full_path);
1704  return create(full_path, requested_file_size);
1705 }
std::pair< FILE *, std::string > create(const std::string &basePath, const int fileId, const size_t pageSize, const size_t numPages)
Definition: File.cpp:55
virtual void readOnlyCheck(const std::string &action, const std::optional< std::string > &file_name={}) const
Definition: FileMgr.cpp:1723

+ Here is the call graph for this function:

std::pair< FILE *, std::string > File_Namespace::FileMgr::createFile ( const std::string &  base_path,
const int  file_id,
const size_t  page_size,
const size_t  num_pages 
) const

Definition at line 1707 of file FileMgr.cpp.

References File_Namespace::create(), File_Namespace::get_data_file_path(), and readOnlyCheck().

1710  {
1711  readOnlyCheck("create file", get_data_file_path(base_path, file_id, page_size));
1712  return create(base_path, file_id, page_size, num_pages);
1713 }
std::pair< FILE *, std::string > create(const std::string &basePath, const int fileId, const size_t pageSize, const size_t numPages)
Definition: File.cpp:55
virtual void readOnlyCheck(const std::string &action, const std::optional< std::string > &file_name={}) const
Definition: FileMgr.cpp:1723
std::string get_data_file_path(const std::string &base_path, int file_id, size_t page_size)
Definition: File.cpp:42

+ Here is the call graph for this function:

FileInfo * File_Namespace::FileMgr::createFileInfo ( const size_t  pageSize,
const size_t  numPages 
)
protected

Adds a file to the file manager repository.

This method will create a FileInfo object for the file being added, and it will create the corresponding file on physical disk with the indicated number of pages pre-allocated.

A pointer to the FileInfo object is returned, which itself has a file pointer (FILE*) and a file identifier (int32_t fileId).

Parameters
fileNameThe name given to the file in physical storage.
pageSizeThe logical page size for the pages in the file.
numPagesThe number of logical pages to initially allocate for the file.
Returns
FileInfo* A pointer to the FileInfo object of the added file.

Definition at line 968 of file FileMgr.cpp.

References CHECK, File_Namespace::create(), f(), logger::FATAL, fileIndex_, fileMgrBasePath_, files_, files_rw_mutex_, File_Namespace::get_data_file_path(), getFileInfoForFileId(), LOG, nextFileId_, and readOnlyCheck().

Referenced by requestFreePage(), and requestFreePages().

968  {
969  readOnlyCheck("create file",
971  // check arguments
972  if (pageSize == 0 || numPages == 0) {
973  LOG(FATAL) << "File creation failed: pageSize and numPages must be greater than 0.";
974  }
975 
976  // create the new file
977  auto [f, file_path] = create(fileMgrBasePath_,
978  nextFileId_,
979  pageSize,
980  numPages); // TM: not sure if I like naming scheme here -
981  // should be in separate namespace?
982  CHECK(f);
983 
984  // instantiate a new FileInfo for the newly created file
985  int32_t fileId = nextFileId_++;
986  auto fInfo = std::make_unique<FileInfo>(this,
987  fileId,
988  f,
989  pageSize,
990  numPages,
991  file_path,
992  true); // true means init file
993  CHECK(fInfo);
994 
996  // update file manager data structures
997  files_[fileId] = std::move(fInfo);
998  fileIndex_.insert(std::pair<size_t, int32_t>(pageSize, fileId));
999 
1000  return getFileInfoForFileId(fileId);
1001 }
#define LOG(tag)
Definition: Logger.h:285
std::pair< FILE *, std::string > create(const std::string &basePath, const int fileId, const size_t pageSize, const size_t numPages)
Definition: File.cpp:55
std::string fileMgrBasePath_
Definition: FileMgr.h:411
PageSizeFileMMap fileIndex_
Definition: FileMgr.h:414
std::unique_lock< T > unique_lock
virtual void readOnlyCheck(const std::string &action, const std::optional< std::string > &file_name={}) const
Definition: FileMgr.cpp:1723
torch::Tensor f(torch::Tensor x, torch::Tensor W_target, torch::Tensor b_target)
#define CHECK(condition)
Definition: Logger.h:291
FileInfo * getFileInfoForFileId(const int32_t fileId) const
Definition: FileMgr.h:229
heavyai::shared_mutex files_rw_mutex_
Definition: FileMgr.h:421
std::map< int32_t, std::unique_ptr< FileInfo > > files_
Definition: FileMgr.h:413
std::string get_data_file_path(const std::string &base_path, int file_id, size_t page_size)
Definition: File.cpp:42
unsigned nextFileId_
number of threads used when loading data
Definition: FileMgr.h:416

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::createOrMigrateTopLevelMetadata ( )

Definition at line 1056 of file FileMgr.cpp.

References DB_META_FILENAME, File_Namespace::GlobalFileMgr::db_version_, logger::FATAL, gfm_, INVALID_VERSION, LOG, readVersionFromDisk(), and writeAndSyncVersionToDisk().

1056  {
1057  auto file_version = readVersionFromDisk(DB_META_FILENAME);
1058  auto gfm_version = gfm_->db_version_;
1059 
1060  if (file_version > gfm_version) {
1061  LOG(FATAL) << "DB forward compatibility is not supported. Version of HeavyDB "
1062  "software used is older than the version of DB being read: "
1063  << file_version;
1064  }
1065  if (file_version == INVALID_VERSION || file_version < gfm_version) {
1066  // new system, or we are moving forward versions
1067  // system wide migration would go here if required
1069  return;
1070  }
1071 }
int32_t readVersionFromDisk(const std::string &versionFileName) const
Definition: FileMgr.cpp:1073
#define LOG(tag)
Definition: Logger.h:285
GlobalFileMgr * gfm_
Definition: FileMgr.h:539
static constexpr int32_t db_version_
void writeAndSyncVersionToDisk(const std::string &versionFileName, const int32_t version)
Definition: FileMgr.cpp:1087
static constexpr char DB_META_FILENAME[]
Definition: FileMgr.h:401
static constexpr int32_t INVALID_VERSION
Definition: FileMgr.h:403

+ Here is the call graph for this function:

void File_Namespace::FileMgr::deleteBuffer ( const ChunkKey key,
const bool  purge = true 
)
override

Deletes the chunk with the specified key.

Definition at line 753 of file FileMgr.cpp.

References CHECK, chunkIndex_, chunkIndexMutex_, deleteBufferUnlocked(), and show_chunk().

753  {
755  auto chunk_it = chunkIndex_.find(key);
756  CHECK(chunk_it != chunkIndex_.end()) << "Chunk does not exist: " << show_chunk(key);
757  deleteBufferUnlocked(chunk_it, purge);
758 }
std::string show_chunk(const ChunkKey &key)
Definition: types.h:98
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:330
std::unique_lock< T > unique_lock
virtual ChunkKeyToChunkMap::iterator deleteBufferUnlocked(const ChunkKeyToChunkMap::iterator chunk_it, const bool purge=true)
Definition: FileMgr.cpp:760
heavyai::shared_mutex chunkIndexMutex_
Definition: FileMgr.h:420
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

void File_Namespace::FileMgr::deleteBuffersWithPrefix ( const ChunkKey keyPrefix,
const bool  purge = true 
)
override

Definition at line 770 of file FileMgr.cpp.

References chunkIndex_, chunkIndexMutex_, and deleteBufferUnlocked().

770  {
772  auto chunkIt = chunkIndex_.lower_bound(keyPrefix);
773  if (chunkIt == chunkIndex_.end()) {
774  return; // should we throw?
775  }
776  while (chunkIt != chunkIndex_.end() &&
777  std::search(chunkIt->first.begin(),
778  chunkIt->first.begin() + keyPrefix.size(),
779  keyPrefix.begin(),
780  keyPrefix.end()) != chunkIt->first.begin() + keyPrefix.size()) {
781  deleteBufferUnlocked(chunkIt++, purge);
782  }
783 }
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:330
std::unique_lock< T > unique_lock
virtual ChunkKeyToChunkMap::iterator deleteBufferUnlocked(const ChunkKeyToChunkMap::iterator chunk_it, const bool purge=true)
Definition: FileMgr.cpp:760
heavyai::shared_mutex chunkIndexMutex_
Definition: FileMgr.h:420

+ Here is the call graph for this function:

ChunkKeyToChunkMap::iterator File_Namespace::FileMgr::deleteBufferUnlocked ( const ChunkKeyToChunkMap::iterator  chunk_it,
const bool  purge = true 
)
protectedvirtual

Reimplemented in File_Namespace::CachingFileMgr.

Definition at line 760 of file FileMgr.cpp.

References chunkIndex_.

Referenced by deleteBuffer(), and deleteBuffersWithPrefix().

762  {
763  if (purge) {
764  chunk_it->second->freePages();
765  }
766  delete chunk_it->second;
767  return chunkIndex_.erase(chunk_it);
768 }
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:330

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::deleteEmptyFiles ( )
protected

Deletes files that contain only free pages. Also deletes the compaction status file.

Definition at line 1504 of file FileMgr.cpp.

References CHECK, CHECK_EQ, DELETE_EMPTY_FILES_STATUS, fileMgrBasePath_, files_, File_Namespace::get_data_file_path(), File_Namespace::get_legacy_data_file_path(), getFilePath(), and readOnlyCheck().

Referenced by compactFiles(), and resumeFileCompaction().

1504  {
1505  for (const auto& [file_id, file_info] : files_) {
1506  CHECK_EQ(file_id, file_info->fileId);
1507  if (file_info->freePages.size() == file_info->numPages) {
1508  fclose(file_info->f);
1509  file_info->f = nullptr;
1510  auto file_path = get_data_file_path(fileMgrBasePath_, file_id, file_info->pageSize);
1511  readOnlyCheck("delete file", file_path);
1512  boost::filesystem::remove(get_legacy_data_file_path(file_path));
1513  boost::filesystem::remove(file_path);
1514  }
1515  }
1516 
1517  auto status_file_path = getFilePath(DELETE_EMPTY_FILES_STATUS);
1518  CHECK(boost::filesystem::exists(status_file_path));
1519  readOnlyCheck("delete file", status_file_path.string());
1520  boost::filesystem::remove(status_file_path);
1521 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
std::string get_legacy_data_file_path(const std::string &new_data_file_path)
Definition: File.cpp:49
std::string fileMgrBasePath_
Definition: FileMgr.h:411
boost::filesystem::path getFilePath(const std::string &file_name) const
Definition: FileMgr.cpp:1697
virtual void readOnlyCheck(const std::string &action, const std::optional< std::string > &file_name={}) const
Definition: FileMgr.cpp:1723
static constexpr char const * DELETE_EMPTY_FILES_STATUS
Definition: FileMgr.h:390
#define CHECK(condition)
Definition: Logger.h:291
std::map< int32_t, std::unique_ptr< FileInfo > > files_
Definition: FileMgr.h:413
std::string get_data_file_path(const std::string &base_path, int file_id, size_t page_size)
Definition: File.cpp:42

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::string File_Namespace::FileMgr::describeSelf ( ) const
virtual

Reimplemented in File_Namespace::CachingFileMgr.

Definition at line 700 of file FileMgr.cpp.

References fileMgrKey_.

Referenced by checkpoint(), and setEpoch().

700  {
701  stringstream ss;
702  ss << "table (" << fileMgrKey_.first << ", " << fileMgrKey_.second << ")";
703  return ss.str();
704 }
TablePair fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:540

+ Here is the caller graph for this function:

virtual int32_t File_Namespace::FileMgr::epoch ( int32_t  db_id,
int32_t  tb_id 
) const
inlinevirtual

Returns current value of epoch - should be one greater than recorded at last checkpoint. Because FileMgr only contains buffers from one table we can just return the FileMgr's epoch instead of finding a table-specific epoch.

Reimplemented in File_Namespace::CachingFileMgr.

Definition at line 281 of file FileMgr.h.

References epoch().

Referenced by epoch(), File_Namespace::FileBuffer::getFileMgrEpoch(), and File_Namespace::FileInfo::openExistingFile().

281 { return epoch(); }
int32_t epoch() const
Definition: FileMgr.h:530

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int32_t File_Namespace::FileMgr::epoch ( ) const
inlineprivate

Definition at line 530 of file FileMgr.h.

Referenced by checkpoint(), init(), openAndReadLegacyEpochFile(), updatePageIfDeleted(), and writeDirtyBuffers().

530 { return static_cast<int32_t>(epoch_.ceiling()); }
int32_t ceiling() const
Definition: Epoch.h:44

+ Here is the caller graph for this function:

int32_t File_Namespace::FileMgr::epochFloor ( ) const
inline

Definition at line 283 of file FileMgr.h.

Referenced by setDataAndMetadataFileStats().

283 { return static_cast<int32_t>(epoch_.floor()); }
int32_t floor() const
Definition: Epoch.h:43

+ Here is the caller graph for this function:

virtual bool File_Namespace::FileMgr::failOnReadError ( ) const
inlinevirtual

True if a read error should cause a fatal error.

Reimplemented in File_Namespace::CachingFileMgr.

Definition at line 366 of file FileMgr.h.

Referenced by File_Namespace::FileBuffer::FileBuffer().

366 { return true; }

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::fetchBuffer ( const ChunkKey key,
AbstractBuffer destBuffer,
const size_t  numBytes 
)
override

Definition at line 797 of file FileMgr.cpp.

References CHECK, Data_Namespace::AbstractBuffer::copyTo(), logger::FATAL, getBuffer(), Data_Namespace::AbstractBuffer::isDirty(), LOG, show_chunk(), and Data_Namespace::AbstractBuffer::size().

799  {
800  // reads chunk specified by ChunkKey into AbstractBuffer provided by
801  // destBuffer
802  CHECK(!destBuffer->isDirty())
803  << "Aborting attempt to fetch a chunk marked dirty. Chunk inconsistency for key: "
804  << show_chunk(key);
805  AbstractBuffer* chunk = getBuffer(key);
806  // chunk's size is either specified in function call with numBytes or we
807  // just look at pageSize * numPages in FileBuffer
808  if (numBytes > 0 && numBytes > chunk->size()) {
809  LOG(FATAL) << "Chunk retrieved for key `" << show_chunk(key) << "` is smaller ("
810  << chunk->size() << ") than number of bytes requested (" << numBytes
811  << ")";
812  }
813  chunk->copyTo(destBuffer, numBytes);
814 }
#define LOG(tag)
Definition: Logger.h:285
std::string show_chunk(const ChunkKey &key)
Definition: types.h:98
An AbstractBuffer is a unit of data management for a data manager.
void copyTo(AbstractBuffer *destination_buffer, const size_t num_bytes=0)
#define CHECK(condition)
Definition: Logger.h:291
FileBuffer * getBuffer(const ChunkKey &key, const size_t numBytes=0) override
Returns the a pointer to the chunk with the specified key.
Definition: FileMgr.cpp:785

+ Here is the call graph for this function:

void File_Namespace::FileMgr::free ( AbstractBuffer buffer)
override

Definition at line 873 of file FileMgr.cpp.

References logger::FATAL, and LOG.

Referenced by copyPage().

873  {
874  LOG(FATAL) << "Operation not supported";
875 }
#define LOG(tag)
Definition: Logger.h:285

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::free_page ( std::pair< FileInfo *, int32_t > &&  page)
virtual

Reimplemented in File_Namespace::CachingFileMgr.

Definition at line 1210 of file FileMgr.cpp.

References free_pages_, and mutex_free_page_.

Referenced by File_Namespace::FileInfo::freePage(), and freePages().

1210  {
1211  std::unique_lock<heavyai::shared_mutex> lock(mutex_free_page_);
1212  free_pages_.push_back(page);
1213 }
std::vector< std::pair< FileInfo *, int32_t > > free_pages_
Definition: FileMgr.h:424
heavyai::shared_mutex mutex_free_page_
Definition: FileMgr.h:423

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::freePages ( )
protected

Definition at line 1615 of file FileMgr.cpp.

References free_page(), free_pages_, and mutex_free_page_.

Referenced by checkpoint(), File_Namespace::CachingFileMgr::clearForTable(), File_Namespace::CachingFileMgr::init(), init(), and sortAndCopyFilePagesForCompaction().

1615  {
1617  for (auto& free_page : free_pages_) {
1618  free_page.first->freePageDeferred(free_page.second);
1619  }
1620  free_pages_.clear();
1621 }
std::unique_lock< T > unique_lock
std::vector< std::pair< FileInfo *, int32_t > > free_pages_
Definition: FileMgr.h:424
heavyai::shared_mutex mutex_free_page_
Definition: FileMgr.h:423
virtual void free_page(std::pair< FileInfo *, int32_t > &&page)
Definition: FileMgr.cpp:1210

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::freePagesBeforeEpoch ( const int32_t  min_epoch)
private

Definition at line 673 of file FileMgr.cpp.

References chunkIndex_, chunkIndexMutex_, and freePagesBeforeEpochUnlocked().

Referenced by rollOffOldData().

673  {
675  freePagesBeforeEpochUnlocked(min_epoch, chunkIndex_.begin(), chunkIndex_.end());
676 }
std::shared_lock< T > shared_lock
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:330
heavyai::shared_mutex chunkIndexMutex_
Definition: FileMgr.h:420
void freePagesBeforeEpochUnlocked(const int32_t min_epoch, const ChunkKeyToChunkMap::iterator lower_bound, const ChunkKeyToChunkMap::iterator upper_bound)
Definition: FileMgr.cpp:678

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::freePagesBeforeEpochUnlocked ( const int32_t  min_epoch,
const ChunkKeyToChunkMap::iterator  lower_bound,
const ChunkKeyToChunkMap::iterator  upper_bound 
)
private

Definition at line 678 of file FileMgr.cpp.

References gpu_enabled::upper_bound().

Referenced by freePagesBeforeEpoch().

681  {
682  for (auto chunkIt = lower_bound; chunkIt != upper_bound; ++chunkIt) {
683  chunkIt->second->freePagesBeforeEpoch(min_epoch);
684  }
685 }
DEVICE auto upper_bound(ARGS &&...args)
Definition: gpu_enabled.h:123
DEVICE auto lower_bound(ARGS &&...args)
Definition: gpu_enabled.h:78

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const TablePair File_Namespace::FileMgr::get_fileMgrKey ( ) const
inline

Definition at line 341 of file FileMgr.h.

Referenced by updatePageIfDeleted().

341 { return fileMgrKey_; }
TablePair fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:540

+ Here is the caller graph for this function:

size_t File_Namespace::FileMgr::getAllocated ( )
inlineoverride

Definition at line 226 of file FileMgr.h.

226 { return 0; }
FileBuffer * File_Namespace::FileMgr::getBuffer ( const ChunkKey key,
const size_t  numBytes = 0 
)
override

Returns the a pointer to the chunk with the specified key.

Definition at line 785 of file FileMgr.cpp.

References chunkIndexMutex_, and getBufferUnlocked().

Referenced by fetchBuffer().

785  {
787  return getBufferUnlocked(key, num_bytes);
788 }
std::shared_lock< T > shared_lock
heavyai::shared_mutex chunkIndexMutex_
Definition: FileMgr.h:420
virtual FileBuffer * getBufferUnlocked(const ChunkKey &key, const size_t numBytes=0) const
Definition: FileMgr.cpp:790

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

FileBuffer * File_Namespace::FileMgr::getBufferUnlocked ( const ChunkKey key,
const size_t  numBytes = 0 
) const
protectedvirtual

Reimplemented in File_Namespace::CachingFileMgr.

Definition at line 790 of file FileMgr.cpp.

References CHECK, chunkIndex_, and show_chunk().

Referenced by getBuffer(), and getOrCreateBuffer().

791  {
792  auto chunk_it = chunkIndex_.find(key);
793  CHECK(chunk_it != chunkIndex_.end()) << "Chunk does not exist: " << show_chunk(key);
794  return chunk_it->second;
795 }
std::string show_chunk(const ChunkKey &key)
Definition: types.h:98
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:330
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::getChunkMetadataVecForKeyPrefix ( ChunkMetadataVector chunkMetadataVec,
const ChunkKey keyPrefix 
)
override

Definition at line 1021 of file FileMgr.cpp.

References chunkIndex_, and chunkIndexMutex_.

1022  {
1024  auto chunkIt = chunkIndex_.lower_bound(keyPrefix);
1025  if (chunkIt == chunkIndex_.end()) {
1026  return; // throw?
1027  }
1028  while (chunkIt != chunkIndex_.end() &&
1029  std::search(chunkIt->first.begin(),
1030  chunkIt->first.begin() + keyPrefix.size(),
1031  keyPrefix.begin(),
1032  keyPrefix.end()) != chunkIt->first.begin() + keyPrefix.size()) {
1033  if (chunkIt->second->hasEncoder()) {
1034  auto chunk_metadata = std::make_shared<ChunkMetadata>();
1035  chunkIt->second->encoder_->getMetadata(chunk_metadata);
1036  chunkMetadataVec.emplace_back(chunkIt->first, chunk_metadata);
1037  }
1038  chunkIt++;
1039  }
1040 }
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:330
std::unique_lock< T > unique_lock
heavyai::shared_mutex chunkIndexMutex_
Definition: FileMgr.h:420
bool File_Namespace::FileMgr::getDBConvert ( ) const

Index for looking up chunks.

Definition at line 1052 of file FileMgr.cpp.

References File_Namespace::GlobalFileMgr::getDBConvert(), and gfm_.

1052  {
1053  return gfm_->getDBConvert();
1054 }
GlobalFileMgr * gfm_
Definition: FileMgr.h:539

+ Here is the call graph for this function:

FILE * File_Namespace::FileMgr::getFileForFileId ( const int32_t  fileId)

Returns FILE pointer associated with requested fileId.

See Also
FileBuffer

Definition at line 1003 of file FileMgr.cpp.

References CHECK, and files_.

Referenced by File_Namespace::FileBuffer::readMetadata(), and File_Namespace::FileBuffer::writeMetadata().

1003  {
1004  CHECK(fileId >= 0);
1005  CHECK(files_.find(fileId) != files_.end()) << "File does not exist for id: " << fileId;
1006  return files_.at(fileId)->f;
1007 }
#define CHECK(condition)
Definition: Logger.h:291
std::map< int32_t, std::unique_ptr< FileInfo > > files_
Definition: FileMgr.h:413

+ Here is the caller graph for this function:

FileInfo* File_Namespace::FileMgr::getFileInfoForFileId ( const int32_t  fileId) const
inline

Definition at line 229 of file FileMgr.h.

Referenced by File_Namespace::FileBuffer::append(), File_Namespace::FileBuffer::copyPage(), copyPage(), copyPageWithoutHeaderSize(), createFileInfo(), File_Namespace::FileBuffer::freePage(), openExistingFile(), File_Namespace::readForThread(), requestFreePage(), requestFreePages(), updateMappedPagesVisibility(), File_Namespace::FileBuffer::write(), and File_Namespace::FileBuffer::writeHeader().

229  {
230  return files_.at(fileId).get();
231  }
std::map< int32_t, std::unique_ptr< FileInfo > > files_
Definition: FileMgr.h:413

+ Here is the caller graph for this function:

std::string File_Namespace::FileMgr::getFileMgrBasePath ( ) const
inline

Definition at line 334 of file FileMgr.h.

Referenced by File_Namespace::CachingFileMgr::closeRemovePhysical(), closeRemovePhysical(), and File_Namespace::CachingFileMgr::getTableFileMgrPath().

334 { return fileMgrBasePath_; }
std::string fileMgrBasePath_
Definition: FileMgr.h:411

+ Here is the caller graph for this function:

boost::filesystem::path File_Namespace::FileMgr::getFilePath ( const std::string &  file_name) const

Definition at line 1697 of file FileMgr.cpp.

References fileMgrBasePath_.

Referenced by compactFiles(), deleteEmptyFiles(), readPageMappingsFromStatusFile(), renameCompactionStatusFile(), resumeFileCompaction(), and writePageMappingsToStatusFile().

1697  {
1698  return boost::filesystem::path(fileMgrBasePath_) / file_name;
1699 }
std::string fileMgrBasePath_
Definition: FileMgr.h:411

+ Here is the caller graph for this function:

uint32_t File_Namespace::FileMgr::getFragmentCount ( ) const
private

Definition at line 407 of file FileMgr.cpp.

References CHUNK_KEY_FRAGMENT_IDX, chunkIndex_, and chunkIndexMutex_.

Referenced by getStorageStats().

407  {
409  std::set<int32_t> fragment_ids;
410  for (const auto& [chunk_key, file_buffer] : chunkIndex_) {
411  fragment_ids.emplace(chunk_key[CHUNK_KEY_FRAGMENT_IDX]);
412  }
413  return static_cast<uint32_t>(fragment_ids.size());
414 }
#define CHUNK_KEY_FRAGMENT_IDX
Definition: types.h:41
std::shared_lock< T > shared_lock
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:330
heavyai::shared_mutex chunkIndexMutex_
Definition: FileMgr.h:420

+ Here is the caller graph for this function:

size_t File_Namespace::FileMgr::getInUseSize ( )
inlineoverride

Definition at line 225 of file FileMgr.h.

225 { return 0; }
size_t File_Namespace::FileMgr::getMaxSize ( )
inlineoverride

Definition at line 224 of file FileMgr.h.

224 { return 0; }
FileMetadata File_Namespace::FileMgr::getMetadataForFile ( const boost::filesystem::directory_iterator &  fileIterator) const

Definition at line 157 of file FileMgr.cpp.

References CHECK_EQ, DATA_FILE_EXT, logger::FATAL, File_Namespace::FileMetadata::file_id, File_Namespace::FileMetadata::file_path, heavyai::file_size(), File_Namespace::FileMetadata::file_size, File_Namespace::FileMetadata::is_data_file, LOG, File_Namespace::FileMetadata::num_pages, and File_Namespace::FileMetadata::page_size.

Referenced by init(), openFiles(), and setDataAndMetadataFileStats().

158  {
159  FileMetadata fileMetadata;
160  fileMetadata.is_data_file = false;
161  fileMetadata.file_path = fileIterator->path().string();
162  if (!boost::filesystem::is_regular_file(fileIterator->status())) {
163  return fileMetadata;
164  }
165  // note that boost::filesystem leaves preceding dot on
166  // extension - hence DATA_FILE_EXT is ".data"
167  std::string extension(fileIterator->path().extension().string());
168  if (extension == DATA_FILE_EXT) {
169  std::string fileStem(fileIterator->path().stem().string());
170  // remove trailing dot if any
171  if (fileStem.size() > 0 && fileStem.back() == '.') {
172  fileStem = fileStem.substr(0, fileStem.size() - 1);
173  }
174  size_t dotPos = fileStem.find_last_of("."); // should only be one
175  if (dotPos == std::string::npos) {
176  LOG(FATAL) << "File `" << fileIterator->path()
177  << "` does not carry page size information in the filename.";
178  }
179  fileMetadata.is_data_file = true;
180  fileMetadata.file_id = boost::lexical_cast<int>(fileStem.substr(0, dotPos));
181  fileMetadata.page_size =
182  boost::lexical_cast<size_t>(fileStem.substr(dotPos + 1, fileStem.size()));
183 
184  fileMetadata.file_size = boost::filesystem::file_size(fileMetadata.file_path);
185  CHECK_EQ(fileMetadata.file_size % fileMetadata.page_size,
186  size_t(0)); // should be no partial pages
187  fileMetadata.num_pages = fileMetadata.file_size / fileMetadata.page_size;
188  }
189  return fileMetadata;
190 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
#define LOG(tag)
Definition: Logger.h:285
#define DATA_FILE_EXT
Definition: File.h:25
size_t file_size(const int fd)
Definition: heavyai_fs.cpp:33

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t File_Namespace::FileMgr::getMetadataPageSize ( ) const
inline

Definition at line 369 of file FileMgr.h.

369 { return metadata_page_size_; }
const size_t metadata_page_size_
Definition: FileMgr.h:552
MgrType File_Namespace::FileMgr::getMgrType ( )
inlineoverride

Definition at line 221 of file FileMgr.h.

221 { return FILE_MGR; };
size_t File_Namespace::FileMgr::getNumChunks ( )
override

Definition at line 1692 of file FileMgr.cpp.

References chunkIndex_, and chunkIndexMutex_.

1692  {
1694  return chunkIndex_.size();
1695 }
std::shared_lock< T > shared_lock
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:330
heavyai::shared_mutex chunkIndexMutex_
Definition: FileMgr.h:420
size_t File_Namespace::FileMgr::getNumReaderThreads ( )
inline

Returns number of threads defined by parameter num-reader-threads which should be used during initial load and consequent read of data.

Definition at line 316 of file FileMgr.h.

Referenced by File_Namespace::FileBuffer::read().

316 { return num_reader_threads_; }
size_t num_reader_threads_
Maps page sizes to FileInfo objects.
Definition: FileMgr.h:415

+ Here is the caller graph for this function:

size_t File_Namespace::FileMgr::getNumUsedMetadataPagesForChunkKey ( const ChunkKey chunkKey) const

Definition at line 1042 of file FileMgr.cpp.

References chunkIndex_, and chunkIndexMutex_.

1042  {
1044  const auto& chunkIt = chunkIndex_.find(chunkKey);
1045  if (chunkIt != chunkIndex_.end()) {
1046  return chunkIt->second->numMetadataPages();
1047  } else {
1048  throw std::runtime_error("Chunk was not found.");
1049  }
1050 }
std::shared_lock< T > shared_lock
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:330
heavyai::shared_mutex chunkIndexMutex_
Definition: FileMgr.h:420
FileBuffer * File_Namespace::FileMgr::getOrCreateBuffer ( const ChunkKey key)
private

Definition at line 1670 of file FileMgr.cpp.

References chunkIndex_, chunkIndexMutex_, createBufferUnlocked(), and getBufferUnlocked().

Referenced by putBuffer().

1670  {
1671  FileBuffer* buf;
1673  auto chunk_it = chunkIndex_.find(key);
1674  if (chunk_it == chunkIndex_.end()) {
1675  buf = createBufferUnlocked(key);
1676  } else {
1677  buf = getBufferUnlocked(key);
1678  }
1679  return buf;
1680 }
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:330
std::unique_lock< T > unique_lock
heavyai::shared_mutex chunkIndexMutex_
Definition: FileMgr.h:420
virtual FileBuffer * createBufferUnlocked(const ChunkKey &key, size_t pageSize=0, const size_t numBytes=0)
Definition: FileMgr.cpp:726
virtual FileBuffer * getBufferUnlocked(const ChunkKey &key, const size_t numBytes=0) const
Definition: FileMgr.cpp:790

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t File_Namespace::FileMgr::getPageSize ( ) const
inline

Definition at line 368 of file FileMgr.h.

368 { return page_size_; }
const size_t page_size_
Definition: FileMgr.h:551
StorageStats File_Namespace::FileMgr::getStorageStats ( ) const

Definition at line 334 of file FileMgr.cpp.

References File_Namespace::StorageStats::fragment_count, getFragmentCount(), isFullyInitted_, and setDataAndMetadataFileStats().

334  {
335  StorageStats storage_stats;
336  setDataAndMetadataFileStats(storage_stats);
337  if (isFullyInitted_) {
338  storage_stats.fragment_count = getFragmentCount();
339  }
340  return storage_stats;
341 }
void setDataAndMetadataFileStats(StorageStats &storage_stats) const
Definition: FileMgr.cpp:343
uint32_t getFragmentCount() const
Definition: FileMgr.cpp:407

+ Here is the call graph for this function:

std::string File_Namespace::FileMgr::getStringMgrType ( )
inlineoverride

Definition at line 222 of file FileMgr.h.

222 { return ToString(FILE_MGR); }
bool File_Namespace::FileMgr::hasChunkMetadataForKeyPrefix ( const ChunkKey keyPrefix)

Definition at line 1009 of file FileMgr.cpp.

References chunkIndex_, and chunkIndexMutex_.

1009  {
1011  auto chunk_it = chunkIndex_.lower_bound(key_prefix);
1012  if (chunk_it == chunkIndex_.end()) {
1013  return false;
1014  } else {
1015  auto it_pair =
1016  std::mismatch(key_prefix.begin(), key_prefix.end(), chunk_it->first.begin());
1017  return it_pair.first == key_prefix.end();
1018  }
1019 }
std::shared_lock< T > shared_lock
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:330
heavyai::shared_mutex chunkIndexMutex_
Definition: FileMgr.h:420
virtual bool File_Namespace::FileMgr::hasFileMgrKey ( ) const
inlinevirtual

Reimplemented in File_Namespace::CachingFileMgr.

Definition at line 340 of file FileMgr.h.

340 { return true; }
int32_t File_Namespace::FileMgr::incrementEpoch ( )
inline

Definition at line 285 of file FileMgr.h.

References logger::FATAL, LOG, and Epoch::max_allowable_epoch().

Referenced by checkpoint(), and init().

285  {
286  int32_t newEpoch = epoch_.increment();
287  epochIsCheckpointed_ = false;
288  // We test for error here instead of in Epoch::increment so we can log FileMgr
289  // metadata
290  if (newEpoch > Epoch::max_allowable_epoch()) {
291  LOG(FATAL) << "Epoch for table (" << fileMgrKey_.first << ", " << fileMgrKey_.second
292  << ") greater than maximum allowed value of "
293  << Epoch::max_allowable_epoch() << ".";
294  }
295  return newEpoch;
296  }
TablePair fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:540
#define LOG(tag)
Definition: Logger.h:285
static int64_t max_allowable_epoch()
Definition: Epoch.h:69
int32_t increment()
Definition: Epoch.h:54

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::init ( const size_t  num_reader_threads,
const int32_t  epochOverride 
)
private

Definition at line 252 of file FileMgr.cpp.

References Epoch::ceiling(), CHECK, clearFileInfos(), coreInit(), createBufferFromHeaders(), createEpochFile(), epoch(), epoch_, EPOCH_FILENAME, logger::FATAL, FILE_MGR_VERSION_FILENAME, fileMgrBasePath_, fileMgrVersion_, Epoch::floor(), freePages(), incrementEpoch(), initializeNumThreads(), isFullyInitted_, LATEST_FILE_MGR_VERSION, LOG, nextFileId_, openFiles(), readOnlyCheck(), resumeFileCompaction(), rollOffOldData(), setEpoch(), gpu_enabled::sort(), VLOG, and writeAndSyncVersionToDisk().

Referenced by FileMgr().

252  {
253  // if epochCeiling = -1 this means open from epoch file
254 
255  const bool dataExists = coreInit();
256  if (dataExists) {
257  if (epochOverride != -1) { // if opening at specified epoch
258  setEpoch(epochOverride);
259  }
260 
261  auto open_files_result = openFiles();
262  if (!open_files_result.compaction_status_file_name.empty()) {
263  resumeFileCompaction(open_files_result.compaction_status_file_name);
264  clearFileInfos();
265  open_files_result = openFiles();
266  CHECK(open_files_result.compaction_status_file_name.empty());
267  }
268 
269  /* Sort headerVec so that all HeaderInfos
270  * from a chunk will be grouped together
271  * and in order of increasing PageId
272  * - Version Epoch */
273  auto& header_vec = open_files_result.header_infos;
274  std::sort(header_vec.begin(), header_vec.end());
275 
276  /* Goal of next section is to find sequences in the
277  * sorted headerVec of the same ChunkId, which we
278  * can then initiate a FileBuffer with */
279 
280  VLOG(3) << "Number of Headers in Vector: " << header_vec.size();
281  if (header_vec.size() > 0) {
282  ChunkKey lastChunkKey = header_vec.begin()->chunkKey;
283  auto startIt = header_vec.begin();
284 
285  for (auto headerIt = header_vec.begin() + 1; headerIt != header_vec.end();
286  ++headerIt) {
287  if (headerIt->chunkKey != lastChunkKey) {
288  createBufferFromHeaders(lastChunkKey, startIt, headerIt);
289  lastChunkKey = headerIt->chunkKey;
290  startIt = headerIt;
291  }
292  }
293  // now need to insert last Chunk
294  createBufferFromHeaders(lastChunkKey, startIt, header_vec.end());
295  }
296  nextFileId_ = open_files_result.max_file_id + 1;
297  rollOffOldData(epoch(), true /* only checkpoint if data is rolled off */);
298  incrementEpoch();
299  freePages();
300  } else {
301  boost::filesystem::path path(fileMgrBasePath_);
302  readOnlyCheck("create file", path.string());
303  if (!boost::filesystem::create_directory(path)) {
304  LOG(FATAL) << "Could not create data directory: " << path;
305  }
307  if (epochOverride != -1) {
308  epoch_.floor(epochOverride);
309  epoch_.ceiling(epochOverride);
310  } else {
311  // These are default constructor values for epoch_, but resetting here for clarity
312  epoch_.floor(0);
313  epoch_.ceiling(0);
314  }
317  incrementEpoch();
318  }
319 
320  initializeNumThreads(num_reader_threads);
321  isFullyInitted_ = true;
322 }
virtual FileBuffer * createBufferFromHeaders(const ChunkKey &key, const std::vector< HeaderInfo >::const_iterator &headerStartIt, const std::vector< HeaderInfo >::const_iterator &headerEndIt)
Definition: FileMgr.cpp:737
std::vector< int > ChunkKey
Definition: types.h:36
OpenFilesResult openFiles()
Definition: FileMgr.cpp:200
static constexpr int32_t LATEST_FILE_MGR_VERSION
Definition: FileMgr.h:404
void createEpochFile(const std::string &epochFileName)
Definition: FileMgr.cpp:606
#define LOG(tag)
Definition: Logger.h:285
void rollOffOldData(const int32_t epochCeiling, const bool shouldCheckpoint)
Definition: FileMgr.cpp:687
DEVICE void sort(ARGS &&...args)
Definition: gpu_enabled.h:105
int32_t floor() const
Definition: Epoch.h:43
int32_t ceiling() const
Definition: Epoch.h:44
std::string fileMgrBasePath_
Definition: FileMgr.h:411
int32_t fileMgrVersion_
the index of the next file id
Definition: FileMgr.h:417
void writeAndSyncVersionToDisk(const std::string &versionFileName, const int32_t version)
Definition: FileMgr.cpp:1087
int32_t incrementEpoch()
Definition: FileMgr.h:285
static constexpr char FILE_MGR_VERSION_FILENAME[]
Definition: FileMgr.h:402
static constexpr char EPOCH_FILENAME[]
Definition: FileMgr.h:400
virtual void readOnlyCheck(const std::string &action, const std::optional< std::string > &file_name={}) const
Definition: FileMgr.cpp:1723
int32_t epoch() const
Definition: FileMgr.h:530
#define CHECK(condition)
Definition: Logger.h:291
void setEpoch(const int32_t newEpoch)
Definition: FileMgr.cpp:1199
bool coreInit()
Determines file path, and if exists, runs file migration and opens and reads epoch file...
Definition: FileMgr.cpp:137
void initializeNumThreads(size_t num_reader_threads=0)
Definition: FileMgr.cpp:1604
unsigned nextFileId_
number of threads used when loading data
Definition: FileMgr.h:416
#define VLOG(n)
Definition: Logger.h:388
void resumeFileCompaction(const std::string &status_file_name)
Definition: FileMgr.cpp:1224

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::init ( const std::string &  dataPathToConvertFrom,
const int32_t  epochOverride 
)
private

Definition at line 430 of file FileMgr.cpp.

References threading_serial::async(), CHECK, copyPage(), createBuffer(), createBufferFromHeaders(), EPOCH_FILENAME, logger::FATAL, File_Namespace::FileMetadata::file_id, File_Namespace::FileMetadata::file_path, File_Namespace::GlobalFileMgr::getFileMgr(), getMetadataForFile(), gfm_, File_Namespace::FileMetadata::is_data_file, isFullyInitted_, LOG, nextFileId_, File_Namespace::FileMetadata::num_pages, openAndReadEpochFile(), openExistingFile(), File_Namespace::FileMetadata::page_size, processFileFutures(), File_Namespace::MultiPage::push(), readOnlyCheck(), requestFreePage(), setEpoch(), gpu_enabled::sort(), and Data_Namespace::AbstractBuffer::syncEncoder().

431  {
432  int32_t converted_data_epoch = 0;
433  boost::filesystem::path path(dataPathToConvertFrom);
434  if (boost::filesystem::exists(path)) {
435  if (!boost::filesystem::is_directory(path)) {
436  LOG(FATAL) << "Specified path `" << path << "` is not a directory.";
437  }
439 
440  if (epochOverride != -1) { // if opening at previous epoch
441  setEpoch(epochOverride);
442  }
443 
444  boost::filesystem::directory_iterator
445  endItr; // default construction yields past-the-end
446  int32_t maxFileId = -1;
447  int32_t fileCount = 0;
448  int32_t threadCount = std::thread::hardware_concurrency();
449  std::vector<HeaderInfo> headerVec;
450  std::vector<std::future<std::vector<HeaderInfo>>> file_futures;
451  for (boost::filesystem::directory_iterator fileIt(path); fileIt != endItr; ++fileIt) {
452  FileMetadata fileMetadata = getMetadataForFile(fileIt);
453  if (fileMetadata.is_data_file) {
454  maxFileId = std::max(maxFileId, fileMetadata.file_id);
455  file_futures.emplace_back(std::async(std::launch::async, [fileMetadata, this] {
456  std::vector<HeaderInfo> tempHeaderVec;
457  openExistingFile(fileMetadata.file_path,
458  fileMetadata.file_id,
459  fileMetadata.page_size,
460  fileMetadata.num_pages,
461  tempHeaderVec);
462  return tempHeaderVec;
463  }));
464  fileCount++;
465  if (fileCount % threadCount) {
466  processFileFutures(file_futures, headerVec);
467  }
468  }
469  }
470 
471  if (file_futures.size() > 0) {
472  processFileFutures(file_futures, headerVec);
473  }
474 
475  /* Sort headerVec so that all HeaderInfos
476  * from a chunk will be grouped together
477  * and in order of increasing PageId
478  * - Version Epoch */
479 
480  std::sort(headerVec.begin(), headerVec.end());
481 
482  /* Goal of next section is to find sequences in the
483  * sorted headerVec of the same ChunkId, which we
484  * can then initiate a FileBuffer with */
485 
486  if (headerVec.size() > 0) {
487  ChunkKey lastChunkKey = headerVec.begin()->chunkKey;
488  auto startIt = headerVec.begin();
489 
490  for (auto headerIt = headerVec.begin() + 1; headerIt != headerVec.end();
491  ++headerIt) {
492  if (headerIt->chunkKey != lastChunkKey) {
493  FileMgr* c_fm_ =
494  dynamic_cast<File_Namespace::FileMgr*>(gfm_->getFileMgr(lastChunkKey));
495  CHECK(c_fm_);
496  auto srcBuf = createBufferFromHeaders(lastChunkKey, startIt, headerIt);
497  auto destBuf = c_fm_->createBuffer(lastChunkKey, srcBuf->pageSize());
498  destBuf->syncEncoder(srcBuf);
499  destBuf->setSize(srcBuf->size());
500  destBuf->setDirty(); // this needs to be set to force writing out metadata
501  // files from "checkpoint()" call
502 
503  size_t totalNumPages = srcBuf->getMultiPage().size();
504  for (size_t pageNum = 0; pageNum < totalNumPages; pageNum++) {
505  Page srcPage = srcBuf->getMultiPage()[pageNum].current().page;
506  Page destPage = c_fm_->requestFreePage(
507  srcBuf->pageSize(),
508  false); // may modify and use api "FileBuffer::addNewMultiPage" instead
509  MultiPage multiPage(srcBuf->pageSize());
510  multiPage.push(destPage, converted_data_epoch);
511  destBuf->multiPages_.push_back(multiPage);
512  size_t reservedHeaderSize = srcBuf->reservedHeaderSize();
513  copyPage(
514  srcPage, c_fm_, destPage, reservedHeaderSize, srcBuf->pageDataSize(), 0);
515  destBuf->writeHeader(destPage, pageNum, converted_data_epoch, false);
516  }
517  lastChunkKey = headerIt->chunkKey;
518  startIt = headerIt;
519  }
520  }
521 
522  // now need to insert last Chunk
523  FileMgr* c_fm_ =
524  dynamic_cast<File_Namespace::FileMgr*>(gfm_->getFileMgr(lastChunkKey));
525  auto srcBuf = createBufferFromHeaders(lastChunkKey, startIt, headerVec.end());
526  auto destBuf = c_fm_->createBuffer(lastChunkKey, srcBuf->pageSize());
527  destBuf->syncEncoder(srcBuf);
528  destBuf->setSize(srcBuf->size());
529  destBuf->setDirty(); // this needs to be set to write out metadata file from the
530  // "checkpoint()" call
531 
532  size_t totalNumPages = srcBuf->getMultiPage().size();
533  for (size_t pageNum = 0; pageNum < totalNumPages; pageNum++) {
534  Page srcPage = srcBuf->getMultiPage()[pageNum].current().page;
535  Page destPage = c_fm_->requestFreePage(
536  srcBuf->pageSize(),
537  false); // may modify and use api "FileBuffer::addNewMultiPage" instead
538  MultiPage multiPage(srcBuf->pageSize());
539  multiPage.push(destPage, converted_data_epoch);
540  destBuf->multiPages_.push_back(multiPage);
541  size_t reservedHeaderSize = srcBuf->reservedHeaderSize();
542  copyPage(srcPage, c_fm_, destPage, reservedHeaderSize, srcBuf->pageDataSize(), 0);
543  destBuf->writeHeader(destPage, pageNum, converted_data_epoch, false);
544  }
545  }
546  nextFileId_ = maxFileId + 1;
547  } else {
548  readOnlyCheck("create file", path.string());
549  if (!boost::filesystem::create_directory(path)) {
550  LOG(FATAL) << "Specified path does not exist: " << path;
551  }
552  }
553  isFullyInitted_ = true;
554 }
virtual FileBuffer * createBufferFromHeaders(const ChunkKey &key, const std::vector< HeaderInfo >::const_iterator &headerStartIt, const std::vector< HeaderInfo >::const_iterator &headerEndIt)
Definition: FileMgr.cpp:737
std::vector< int > ChunkKey
Definition: types.h:36
FileMgr(const int32_t device_id, GlobalFileMgr *gfm, const TablePair file_mgr_key, const int32_t max_rollback_epochs=-1, const size_t num_reader_threads=0, const int32_t epoch=-1)
Constructor.
Definition: FileMgr.cpp:51
void syncEncoder(const AbstractBuffer *src_buffer)
#define LOG(tag)
Definition: Logger.h:285
void copyPage(Page &srcPage, FileMgr *destFileMgr, Page &destPage, const size_t reservedHeaderSize, const size_t numBytes, const size_t offset)
Definition: FileMgr.cpp:586
DEVICE void sort(ARGS &&...args)
Definition: gpu_enabled.h:105
GlobalFileMgr * gfm_
Definition: FileMgr.h:539
future< Result > async(Fn &&fn, Args &&...args)
void processFileFutures(std::vector< std::future< std::vector< HeaderInfo >>> &file_futures, std::vector< HeaderInfo > &headerVec)
Definition: FileMgr.cpp:416
FileInfo * openExistingFile(const std::string &path, const int32_t fileId, const size_t pageSize, const size_t numPages, std::vector< HeaderInfo > &headerVec)
Definition: FileMgr.cpp:946
static constexpr char EPOCH_FILENAME[]
Definition: FileMgr.h:400
virtual void readOnlyCheck(const std::string &action, const std::optional< std::string > &file_name={}) const
Definition: FileMgr.cpp:1723
void openAndReadEpochFile(const std::string &epochFileName)
Definition: FileMgr.cpp:639
AbstractBufferMgr * getFileMgr(const int32_t db_id, const int32_t tb_id)
FileMetadata getMetadataForFile(const boost::filesystem::directory_iterator &fileIterator) const
Definition: FileMgr.cpp:157
#define CHECK(condition)
Definition: Logger.h:291
void setEpoch(const int32_t newEpoch)
Definition: FileMgr.cpp:1199
unsigned nextFileId_
number of threads used when loading data
Definition: FileMgr.h:416

+ Here is the call graph for this function:

void File_Namespace::FileMgr::initializeNumThreads ( size_t  num_reader_threads = 0)
protected

Definition at line 1604 of file FileMgr.cpp.

References num_reader_threads_.

Referenced by File_Namespace::CachingFileMgr::init(), and init().

1604  {
1605  // # of threads is based on # of cores on the host
1606  size_t num_hardware_based_threads = std::thread::hardware_concurrency();
1607  if (num_reader_threads == 0 || num_reader_threads > num_hardware_based_threads) {
1608  // # of threads has not been defined by user
1609  num_reader_threads_ = num_hardware_based_threads;
1610  } else {
1611  num_reader_threads_ = num_reader_threads;
1612  }
1613 }
size_t num_reader_threads_
Maps page sizes to FileInfo objects.
Definition: FileMgr.h:415

+ Here is the caller graph for this function:

bool File_Namespace::FileMgr::isAllocationCapped ( )
inlineoverride

Definition at line 227 of file FileMgr.h.

227 { return false; }
bool File_Namespace::FileMgr::isBufferOnDevice ( const ChunkKey key)
override

Definition at line 748 of file FileMgr.cpp.

References chunkIndex_, and chunkIndexMutex_.

748  {
750  return chunkIndex_.find(key) != chunkIndex_.end();
751 }
std::shared_lock< T > shared_lock
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:330
heavyai::shared_mutex chunkIndexMutex_
Definition: FileMgr.h:420
int32_t File_Namespace::FileMgr::lastCheckpointedEpoch ( ) const
inline

Returns value of epoch at last checkpoint.

Definition at line 301 of file FileMgr.h.

Referenced by File_Namespace::GlobalFileMgr::existsDiffBetweenFileMgrParamsAndFileMgr(), and setDataAndMetadataFileStats().

301  {
302  return epoch() - (epochIsCheckpointed_ ? 0 : 1);
303  }
int32_t epoch() const
Definition: FileMgr.h:530

+ Here is the caller graph for this function:

int32_t File_Namespace::FileMgr::maxRollbackEpochs ( )
inline

Returns value max_rollback_epochs.

Definition at line 310 of file FileMgr.h.

Referenced by File_Namespace::GlobalFileMgr::existsDiffBetweenFileMgrParamsAndFileMgr().

310 { return maxRollbackEpochs_; }
int32_t maxRollbackEpochs_
Definition: FileMgr.h:410

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::migrateEpochFileV0 ( )
protected

Definition at line 1116 of file FileMgr.cpp.

References Epoch::ceiling(), createEpochFile(), epoch_, EPOCH_FILENAME, FILE_MGR_VERSION_FILENAME, fileMgrBasePath_, Epoch::floor(), logger::INFO, LEGACY_EPOCH_FILENAME, LOG, Epoch::min_allowable_epoch(), openAndReadLegacyEpochFile(), readOnlyCheck(), writeAndSyncEpochToDisk(), and writeAndSyncVersionToDisk().

Referenced by migrateToLatestFileMgrVersion().

1116  {
1117  const std::string versionFilePath(fileMgrBasePath_ + "/" + FILE_MGR_VERSION_FILENAME);
1118  LOG(INFO) << "Migrating file format version from 0 to 1 for `" << versionFilePath;
1119  readOnlyCheck("migrate epoch file", versionFilePath);
1124  int32_t migrationCompleteVersion = 1;
1125  writeAndSyncVersionToDisk(FILE_MGR_VERSION_FILENAME, migrationCompleteVersion);
1126 }
int32_t openAndReadLegacyEpochFile(const std::string &epochFileName)
Definition: FileMgr.cpp:618
void createEpochFile(const std::string &epochFileName)
Definition: FileMgr.cpp:606
#define LOG(tag)
Definition: Logger.h:285
static int64_t min_allowable_epoch()
Definition: Epoch.h:65
int32_t floor() const
Definition: Epoch.h:43
int32_t ceiling() const
Definition: Epoch.h:44
std::string fileMgrBasePath_
Definition: FileMgr.h:411
void writeAndSyncVersionToDisk(const std::string &versionFileName, const int32_t version)
Definition: FileMgr.cpp:1087
static constexpr char FILE_MGR_VERSION_FILENAME[]
Definition: FileMgr.h:402
void writeAndSyncEpochToDisk()
Definition: FileMgr.cpp:659
static constexpr char EPOCH_FILENAME[]
Definition: FileMgr.h:400
virtual void readOnlyCheck(const std::string &action, const std::optional< std::string > &file_name={}) const
Definition: FileMgr.cpp:1723
static constexpr char LEGACY_EPOCH_FILENAME[]
Definition: FileMgr.h:399

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::migrateLegacyFilesV1 ( )
protected

Definition at line 1128 of file FileMgr.cpp.

References FILE_MGR_VERSION_FILENAME, fileMgrBasePath_, logger::INFO, LOG, readOnlyCheck(), renameAndSymlinkLegacyFiles(), and writeAndSyncVersionToDisk().

Referenced by migrateToLatestFileMgrVersion().

1128  {
1129  LOG(INFO) << "Migrating file format version from 1 to 2";
1130  readOnlyCheck("migrate file format version");
1132  constexpr int32_t migration_complete_version{2};
1133  writeAndSyncVersionToDisk(FILE_MGR_VERSION_FILENAME, migration_complete_version);
1134 }
#define LOG(tag)
Definition: Logger.h:285
std::string fileMgrBasePath_
Definition: FileMgr.h:411
void writeAndSyncVersionToDisk(const std::string &versionFileName, const int32_t version)
Definition: FileMgr.cpp:1087
static constexpr char FILE_MGR_VERSION_FILENAME[]
Definition: FileMgr.h:402
virtual void readOnlyCheck(const std::string &action, const std::optional< std::string > &file_name={}) const
Definition: FileMgr.cpp:1723
static void renameAndSymlinkLegacyFiles(const std::string &table_data_dir)
Definition: FileMgr.cpp:1136

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::migrateToLatestFileMgrVersion ( )
protected

Definition at line 1157 of file FileMgr.cpp.

References logger::FATAL, FILE_MGR_VERSION_FILENAME, fileMgrVersion_, INVALID_VERSION, LATEST_FILE_MGR_VERSION, LOG, migrateEpochFileV0(), migrateLegacyFilesV1(), readVersionFromDisk(), UNREACHABLE, and writeAndSyncVersionToDisk().

Referenced by coreInit().

1157  {
1160  fileMgrVersion_ = 0;
1163  LOG(FATAL)
1164  << "Table storage forward compatibility is not supported. Version of HeavyDB "
1165  "software used is older than the version of table being read: "
1166  << fileMgrVersion_;
1167  }
1168 
1170  switch (fileMgrVersion_) {
1171  case 0: {
1173  break;
1174  }
1175  case 1: {
1177  break;
1178  }
1179  default: {
1180  UNREACHABLE();
1181  }
1182  }
1183  fileMgrVersion_++;
1184  }
1185 }
int32_t readVersionFromDisk(const std::string &versionFileName) const
Definition: FileMgr.cpp:1073
static constexpr int32_t LATEST_FILE_MGR_VERSION
Definition: FileMgr.h:404
#define LOG(tag)
Definition: Logger.h:285
#define UNREACHABLE()
Definition: Logger.h:338
int32_t fileMgrVersion_
the index of the next file id
Definition: FileMgr.h:417
void writeAndSyncVersionToDisk(const std::string &versionFileName, const int32_t version)
Definition: FileMgr.cpp:1087
static constexpr char FILE_MGR_VERSION_FILENAME[]
Definition: FileMgr.h:402
static constexpr int32_t INVALID_VERSION
Definition: FileMgr.h:403

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::openAndReadEpochFile ( const std::string &  epochFileName)
protected

Definition at line 639 of file FileMgr.cpp.

References Epoch::byte_size(), epoch_, epochFile_, logger::FATAL, heavyai::file_size(), fileMgrBasePath_, LOG, File_Namespace::open(), File_Namespace::read(), and Epoch::storage_ptr().

Referenced by coreInit(), and init().

639  {
640  if (!epochFile_) { // Check to see if already open
641  std::string epochFilePath(fileMgrBasePath_ + "/" + epochFileName);
642  if (!boost::filesystem::exists(epochFilePath)) {
643  LOG(FATAL) << "Epoch file `" << epochFilePath << "` does not exist";
644  }
645  if (!boost::filesystem::is_regular_file(epochFilePath)) {
646  LOG(FATAL) << "Epoch file `" << epochFilePath << "` is not a regular file";
647  }
648  if (boost::filesystem::file_size(epochFilePath) != Epoch::byte_size()) {
649  LOG(FATAL) << "Epoch file `" << epochFilePath
650  << "` is not sized properly (current size: "
651  << boost::filesystem::file_size(epochFilePath)
652  << ", expected size: " << Epoch::byte_size() << ")";
653  }
654  epochFile_ = open(epochFilePath);
655  }
656  read(epochFile_, 0, Epoch::byte_size(), epoch_.storage_ptr(), epochFileName);
657 }
int8_t * storage_ptr()
Definition: Epoch.h:61
#define LOG(tag)
Definition: Logger.h:285
std::string fileMgrBasePath_
Definition: FileMgr.h:411
size_t read(FILE *f, const size_t offset, const size_t size, int8_t *buf, const std::string &file_path)
Reads the specified number of bytes from the offset position in file f into buf.
Definition: File.cpp:125
static size_t byte_size()
Definition: Epoch.h:63
FILE * open(int file_id)
Opens the file with the given id; fatal crash on error.
Definition: File.cpp:100
size_t file_size(const int fd)
Definition: heavyai_fs.cpp:33

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int32_t File_Namespace::FileMgr::openAndReadLegacyEpochFile ( const std::string &  epochFileName)
protected

Definition at line 618 of file FileMgr.cpp.

References File_Namespace::close(), epoch(), logger::FATAL, heavyai::file_size(), fileMgrBasePath_, LOG, File_Namespace::open(), and File_Namespace::read().

Referenced by migrateEpochFileV0().

618  {
619  std::string epochFilePath(fileMgrBasePath_ + "/" + epochFileName);
620  if (!boost::filesystem::exists(epochFilePath)) {
621  return 0;
622  }
623 
624  if (!boost::filesystem::is_regular_file(epochFilePath)) {
625  LOG(FATAL) << "Epoch file `" << epochFilePath << "` is not a regular file";
626  }
627  if (boost::filesystem::file_size(epochFilePath) < 4) {
628  LOG(FATAL) << "Epoch file `" << epochFilePath
629  << "` is not sized properly (current size: "
630  << boost::filesystem::file_size(epochFilePath) << ", expected size: 4)";
631  }
632  FILE* legacyEpochFile = open(epochFilePath);
633  int32_t epoch;
634  read(legacyEpochFile, 0, sizeof(int32_t), (int8_t*)&epoch, epochFilePath);
635  close(legacyEpochFile);
636  return epoch;
637 }
#define LOG(tag)
Definition: Logger.h:285
std::string fileMgrBasePath_
Definition: FileMgr.h:411
size_t read(FILE *f, const size_t offset, const size_t size, int8_t *buf, const std::string &file_path)
Reads the specified number of bytes from the offset position in file f into buf.
Definition: File.cpp:125
FILE * open(int file_id)
Opens the file with the given id; fatal crash on error.
Definition: File.cpp:100
int32_t epoch() const
Definition: FileMgr.h:530
void close(FILE *f)
Closes the file pointed to by the FILE pointer.
Definition: File.cpp:114
size_t file_size(const int fd)
Definition: heavyai_fs.cpp:33

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

FileInfo * File_Namespace::FileMgr::openExistingFile ( const std::string &  path,
const int32_t  fileId,
const size_t  pageSize,
const size_t  numPages,
std::vector< HeaderInfo > &  headerVec 
)
protected

Definition at line 946 of file FileMgr.cpp.

References CHECK, f(), fileIndex_, files_, files_rw_mutex_, getFileInfoForFileId(), and File_Namespace::open().

Referenced by init(), and openFiles().

950  {
951  FILE* f = open(path);
952  auto file_info = std::make_unique<FileInfo>(this,
953  fileId,
954  f,
955  pageSize,
956  numPages,
957  path,
958  false); // false means don't init file
959 
960  file_info->openExistingFile(headerVec);
962  CHECK(files_.find(fileId) == files_.end()) << "Attempting to re-open file";
963  files_.emplace(fileId, std::move(file_info));
964  fileIndex_.insert(std::pair<size_t, int32_t>(pageSize, fileId));
965  return getFileInfoForFileId(fileId);
966 }
PageSizeFileMMap fileIndex_
Definition: FileMgr.h:414
std::unique_lock< T > unique_lock
torch::Tensor f(torch::Tensor x, torch::Tensor W_target, torch::Tensor b_target)
FILE * open(int file_id)
Opens the file with the given id; fatal crash on error.
Definition: File.cpp:100
#define CHECK(condition)
Definition: Logger.h:291
FileInfo * getFileInfoForFileId(const int32_t fileId) const
Definition: FileMgr.h:229
heavyai::shared_mutex files_rw_mutex_
Definition: FileMgr.h:421
std::map< int32_t, std::unique_ptr< FileInfo > > files_
Definition: FileMgr.h:413

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

OpenFilesResult File_Namespace::FileMgr::openFiles ( )
protected

Definition at line 200 of file FileMgr.cpp.

References threading_serial::async(), Epoch::ceiling(), CHECK, File_Namespace::OpenFilesResult::compaction_status_file_name, epoch_, File_Namespace::FileMetadata::file_id, File_Namespace::FileMetadata::file_path, fileMgrBasePath_, getMetadataForFile(), File_Namespace::OpenFilesResult::header_infos, logger::INFO, File_Namespace::anonymous_namespace{FileMgr.cpp}::is_compaction_status_file(), File_Namespace::FileMetadata::is_data_file, LOG, File_Namespace::OpenFilesResult::max_file_id, File_Namespace::FileMetadata::num_pages, openExistingFile(), File_Namespace::FileMetadata::page_size, processFileFutures(), run_benchmark_import::result, timer_start(), and timer_stop().

Referenced by File_Namespace::CachingFileMgr::init(), and init().

200  {
201  auto clock_begin = timer_start();
202  boost::filesystem::directory_iterator
203  end_itr; // default construction yields past-the-end
204  OpenFilesResult result;
205  result.max_file_id = -1;
206  int32_t file_count = 0;
207  int32_t thread_count = std::thread::hardware_concurrency();
208  std::vector<std::future<std::vector<HeaderInfo>>> file_futures;
209  boost::filesystem::path path(fileMgrBasePath_);
210  for (boost::filesystem::directory_iterator file_it(path); file_it != end_itr;
211  ++file_it) {
212  FileMetadata file_metadata = getMetadataForFile(file_it);
213  if (file_metadata.is_data_file) {
214  result.max_file_id = std::max(result.max_file_id, file_metadata.file_id);
215  file_futures.emplace_back(std::async(std::launch::async, [file_metadata, this] {
216  std::vector<HeaderInfo> temp_header_vec;
217  openExistingFile(file_metadata.file_path,
218  file_metadata.file_id,
219  file_metadata.page_size,
220  file_metadata.num_pages,
221  temp_header_vec);
222  return temp_header_vec;
223  }));
224  file_count++;
225  if (file_count % thread_count == 0) {
226  processFileFutures(file_futures, result.header_infos);
227  }
228  }
229 
230  if (is_compaction_status_file(file_it->path().filename().string())) {
231  CHECK(result.compaction_status_file_name.empty());
232  result.compaction_status_file_name = file_it->path().filename().string();
233  }
234  }
235 
236  if (file_futures.size() > 0) {
237  processFileFutures(file_futures, result.header_infos);
238  }
239 
240  int64_t queue_time_ms = timer_stop(clock_begin);
241  LOG(INFO) << "Completed Reading table's file metadata, Elapsed time : " << queue_time_ms
242  << "ms Epoch: " << epoch_.ceiling() << " files read: " << file_count
243  << " table location: '" << fileMgrBasePath_ << "'";
244  return result;
245 }
#define LOG(tag)
Definition: Logger.h:285
TypeR::rep timer_stop(Type clock_begin)
Definition: measure.h:48
bool is_compaction_status_file(const std::string &file_name)
Definition: FileMgr.cpp:193
int32_t ceiling() const
Definition: Epoch.h:44
std::string fileMgrBasePath_
Definition: FileMgr.h:411
future< Result > async(Fn &&fn, Args &&...args)
void processFileFutures(std::vector< std::future< std::vector< HeaderInfo >>> &file_futures, std::vector< HeaderInfo > &headerVec)
Definition: FileMgr.cpp:416
FileInfo * openExistingFile(const std::string &path, const int32_t fileId, const size_t pageSize, const size_t numPages, std::vector< HeaderInfo > &headerVec)
Definition: FileMgr.cpp:946
FileMetadata getMetadataForFile(const boost::filesystem::directory_iterator &fileIterator) const
Definition: FileMgr.cpp:157
#define CHECK(condition)
Definition: Logger.h:291
Type timer_start()
Definition: measure.h:42

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::string File_Namespace::FileMgr::printSlabs ( )
inlineoverride

Definition at line 223 of file FileMgr.h.

223 { return "Not Implemented"; }
void File_Namespace::FileMgr::processFileFutures ( std::vector< std::future< std::vector< HeaderInfo >>> &  file_futures,
std::vector< HeaderInfo > &  headerVec 
)
protected

Definition at line 416 of file FileMgr.cpp.

Referenced by init(), and openFiles().

418  {
419  for (auto& file_future : file_futures) {
420  file_future.wait();
421  }
422  // concatenate the vectors after thread completes
423  for (auto& file_future : file_futures) {
424  auto tempHeaderVec = file_future.get();
425  headerVec.insert(headerVec.end(), tempHeaderVec.begin(), tempHeaderVec.end());
426  }
427  file_futures.clear();
428 }

+ Here is the caller graph for this function:

FileBuffer * File_Namespace::FileMgr::putBuffer ( const ChunkKey key,
AbstractBuffer d,
const size_t  numBytes = 0 
)
override

Puts the contents of d into the Chunk with the given key.

Parameters
key- Unique identifier for a Chunk.
d- An object representing the source data for the Chunk.
Returns
AbstractBuffer*

Definition at line 816 of file FileMgr.cpp.

References CHECK, CHECK_LT, Data_Namespace::AbstractBuffer::clearDirtyBits(), logger::FATAL, Data_Namespace::AbstractBuffer::getDeviceId(), Data_Namespace::AbstractBuffer::getMemoryPtr(), getOrCreateBuffer(), Data_Namespace::AbstractBuffer::getType(), Data_Namespace::AbstractBuffer::isAppended(), Data_Namespace::AbstractBuffer::isDirty(), Data_Namespace::AbstractBuffer::isUpdated(), LOG, show_chunk(), and Data_Namespace::AbstractBuffer::size().

818  {
819  auto chunk = getOrCreateBuffer(key);
820  size_t oldChunkSize = chunk->size();
821  // write the buffer's data to the Chunk
822  size_t newChunkSize = (numBytes == 0) ? srcBuffer->size() : numBytes;
823  if (chunk->isDirty()) {
824  // multiple appends are allowed,
825  // but only single update is allowed
826  if (srcBuffer->isUpdated() && chunk->isUpdated()) {
827  LOG(FATAL) << "Aborting attempt to write a chunk marked dirty. Chunk inconsistency "
828  "for key: "
829  << show_chunk(key);
830  }
831  }
832  CHECK(srcBuffer->isDirty()) << "putBuffer expects a dirty buffer";
833  if (srcBuffer->isUpdated()) {
834  // chunk size is not changed when fixed rows are updated or are marked as deleted.
835  // but when rows are vacuumed or varlen rows are updated (new rows are appended),
836  // chunk size will change. For vacuum, checkpoint should sync size from cpu to disk.
837  // For varlen update, it takes another route via fragmenter using disk-level buffer.
838  if (0 == numBytes && !chunk->isDirty()) {
839  chunk->setSize(newChunkSize);
840  }
841  //@todo use dirty flags to only flush pages of chunk that need to
842  // be flushed
843  chunk->write((int8_t*)srcBuffer->getMemoryPtr(),
844  newChunkSize,
845  0,
846  srcBuffer->getType(),
847  srcBuffer->getDeviceId());
848  } else if (srcBuffer->isAppended()) {
849  CHECK_LT(oldChunkSize, newChunkSize);
850  chunk->append((int8_t*)srcBuffer->getMemoryPtr() + oldChunkSize,
851  newChunkSize - oldChunkSize,
852  srcBuffer->getType(),
853  srcBuffer->getDeviceId());
854  } else {
855  // If dirty buffer comes in unmarked, it must be empty.
856  // Encoder sync is still required to flush the metadata.
857  CHECK(numBytes == 0)
858  << "Dirty buffer with size > 0 must be marked as isAppended() or isUpdated()";
859  }
860  // chunk->clearDirtyBits(); // Hack: because write and append will set dirty bits
861  //@todo commenting out line above will make sure this metadata is set
862  // but will trigger error on fetch chunk
863  srcBuffer->clearDirtyBits();
864  chunk->syncEncoder(srcBuffer);
865  return chunk;
866 }
#define LOG(tag)
Definition: Logger.h:285
std::string show_chunk(const ChunkKey &key)
Definition: types.h:98
FileBuffer * getOrCreateBuffer(const ChunkKey &key)
Definition: FileMgr.cpp:1670
#define CHECK_LT(x, y)
Definition: Logger.h:303
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

void File_Namespace::FileMgr::readOnlyCheck ( const std::string &  action,
const std::optional< std::string > &  file_name = {} 
) const
privatevirtual

Reimplemented in File_Namespace::CachingFileMgr.

Definition at line 1723 of file FileMgr.cpp.

References CHECK, and g_read_only.

Referenced by closeRemovePhysical(), compactFiles(), createEpochFile(), createFile(), createFileInfo(), deleteEmptyFiles(), init(), migrateEpochFileV0(), migrateLegacyFilesV1(), renameCompactionStatusFile(), resumeFileCompaction(), writeAndSyncVersionToDisk(), writeFile(), and writePageMappingsToStatusFile().

1724  {
1725  CHECK(!g_read_only) << "Error trying to " << action
1726  << (file_name.has_value() ? (": '" + file_name.value() + "'") : "")
1727  << ". Not allowed in read only mode.";
1728 }
bool g_read_only
Definition: heavyai_locks.h:21
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the caller graph for this function:

std::vector< PageMapping > File_Namespace::FileMgr::readPageMappingsFromStatusFile ( )
protected

Deserializes a page mapping vector from expected status file.

Definition at line 1547 of file FileMgr.cpp.

References CHECK, CHECK_EQ, CHECK_GE, heavyai::file_size(), getFilePath(), and UPDATE_PAGE_VISIBILITY_STATUS.

Referenced by resumeFileCompaction().

1547  {
1548  auto file_path = getFilePath(UPDATE_PAGE_VISIBILITY_STATUS);
1549  CHECK(boost::filesystem::exists(file_path));
1550  std::ifstream status_file{file_path.string(),
1551  std::ios::in | std::ios::binary | std::ios::ate};
1552  CHECK(status_file.is_open());
1553  size_t file_size = status_file.tellg();
1554  status_file.seekg(0, std::ios::beg);
1555  CHECK_GE(file_size, sizeof(int64_t));
1556 
1557  int64_t page_mappings_count;
1558  status_file.read(reinterpret_cast<char*>(&page_mappings_count), sizeof(int64_t));
1559  auto page_mappings_byte_size = file_size - sizeof(int64_t);
1560  CHECK_EQ(page_mappings_byte_size % sizeof(PageMapping), static_cast<size_t>(0));
1561  CHECK_EQ(static_cast<size_t>(page_mappings_count),
1562  page_mappings_byte_size / sizeof(PageMapping));
1563 
1564  std::vector<PageMapping> page_mappings(page_mappings_count);
1565  status_file.read(reinterpret_cast<char*>(page_mappings.data()),
1566  page_mappings_byte_size);
1567  status_file.close();
1568  return page_mappings;
1569 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
static constexpr char const * UPDATE_PAGE_VISIBILITY_STATUS
Definition: FileMgr.h:389
#define CHECK_GE(x, y)
Definition: Logger.h:306
boost::filesystem::path getFilePath(const std::string &file_name) const
Definition: FileMgr.cpp:1697
#define CHECK(condition)
Definition: Logger.h:291
size_t file_size(const int fd)
Definition: heavyai_fs.cpp:33

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int32_t File_Namespace::FileMgr::readVersionFromDisk ( const std::string &  versionFileName) const
protected

Definition at line 1073 of file FileMgr.cpp.

References File_Namespace::close(), heavyai::file_size(), fileMgrBasePath_, INVALID_VERSION, File_Namespace::open(), File_Namespace::read(), and setup::version.

Referenced by createOrMigrateTopLevelMetadata(), migrateToLatestFileMgrVersion(), and writeAndSyncVersionToDisk().

1073  {
1074  const std::string versionFilePath(fileMgrBasePath_ + "/" + versionFileName);
1075  if (!boost::filesystem::exists(versionFilePath) ||
1076  !boost::filesystem::is_regular_file(versionFilePath) ||
1077  boost::filesystem::file_size(versionFilePath) < 4) {
1078  return INVALID_VERSION;
1079  }
1080  FILE* versionFile = open(versionFilePath);
1081  int32_t version;
1082  read(versionFile, 0, sizeof(int32_t), (int8_t*)&version, versionFilePath);
1083  close(versionFile);
1084  return version;
1085 }
std::string fileMgrBasePath_
Definition: FileMgr.h:411
string version
Definition: setup.in.py:73
size_t read(FILE *f, const size_t offset, const size_t size, int8_t *buf, const std::string &file_path)
Reads the specified number of bytes from the offset position in file f into buf.
Definition: File.cpp:125
FILE * open(int file_id)
Opens the file with the given id; fatal crash on error.
Definition: File.cpp:100
void close(FILE *f)
Closes the file pointed to by the FILE pointer.
Definition: File.cpp:114
size_t file_size(const int fd)
Definition: heavyai_fs.cpp:33
static constexpr int32_t INVALID_VERSION
Definition: FileMgr.h:403

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::removeTableRelatedDS ( const int32_t  db_id,
const int32_t  table_id 
)
override

Definition at line 1215 of file FileMgr.cpp.

References UNREACHABLE.

1215  {
1216  UNREACHABLE();
1217 }
#define UNREACHABLE()
Definition: Logger.h:338
void File_Namespace::FileMgr::renameAndSymlinkLegacyFiles ( const std::string &  table_data_dir)
static

Definition at line 1136 of file FileMgr.cpp.

References DATA_FILE_EXT, logger::INFO, File_Namespace::kLegacyDataFileExtension, and LOG.

Referenced by migrateLegacyFilesV1(), and anonymous_namespace{TableArchiver.cpp}::rename_table_directories().

1136  {
1137  std::map<boost::filesystem::path, boost::filesystem::path> old_to_new_paths;
1138  for (boost::filesystem::directory_iterator it(table_data_dir), end_it; it != end_it;
1139  it++) {
1140  const auto old_path = boost::filesystem::canonical(it->path());
1141  if (boost::filesystem::is_regular_file(it->status()) &&
1142  old_path.extension().string() == kLegacyDataFileExtension) {
1143  auto new_path = old_path;
1144  new_path.replace_extension(DATA_FILE_EXT);
1145  old_to_new_paths[old_path] = new_path;
1146  }
1147  }
1148  for (const auto& [old_path, new_path] : old_to_new_paths) {
1149  boost::filesystem::rename(old_path, new_path);
1150  LOG(INFO) << "Rebrand migration: Renamed " << old_path << " to " << new_path;
1151  boost::filesystem::create_symlink(new_path.filename(), old_path);
1152  LOG(INFO) << "Rebrand migration: Added symlink from " << old_path << " to "
1153  << new_path.filename();
1154  }
1155 }
#define LOG(tag)
Definition: Logger.h:285
#define DATA_FILE_EXT
Definition: File.h:25
constexpr auto kLegacyDataFileExtension
Definition: File.h:36

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::renameCompactionStatusFile ( const char *const  from_status,
const char *const  to_status 
)

Renames a given status file name to a new given file name.

Definition at line 1574 of file FileMgr.cpp.

References CHECK, getFilePath(), and readOnlyCheck().

Referenced by compactFiles(), and resumeFileCompaction().

1575  {
1576  auto from_status_file_path = getFilePath(from_status);
1577  auto to_status_file_path = getFilePath(to_status);
1578 
1579  readOnlyCheck("write file", from_status_file_path.string());
1580 
1581  CHECK(boost::filesystem::exists(from_status_file_path));
1582  CHECK(!boost::filesystem::exists(to_status_file_path));
1583  boost::filesystem::rename(from_status_file_path, to_status_file_path);
1584 }
boost::filesystem::path getFilePath(const std::string &file_name) const
Definition: FileMgr.cpp:1697
virtual void readOnlyCheck(const std::string &action, const std::optional< std::string > &file_name={}) const
Definition: FileMgr.cpp:1723
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Page File_Namespace::FileMgr::requestFreePage ( size_t  pagesize,
const bool  isMetadata 
)
virtual

Reimplemented in File_Namespace::CachingFileMgr.

Definition at line 877 of file FileMgr.cpp.

References CHECK, createFileInfo(), File_Namespace::FileInfo::fileId, fileIndex_, getFileInfoForFileId(), File_Namespace::FileInfo::getFreePage(), getPageMutex_, num_pages_per_data_file_, and num_pages_per_metadata_file_.

Referenced by File_Namespace::FileBuffer::addNewMultiPage(), init(), File_Namespace::FileBuffer::write(), and File_Namespace::FileBuffer::writeMetadata().

877  {
878  std::lock_guard<std::mutex> lock(getPageMutex_);
879 
880  auto candidateFiles = fileIndex_.equal_range(pageSize);
881  int32_t pageNum = -1;
882  for (auto fileIt = candidateFiles.first; fileIt != candidateFiles.second; ++fileIt) {
883  FileInfo* fileInfo = getFileInfoForFileId(fileIt->second);
884  pageNum = fileInfo->getFreePage();
885  if (pageNum != -1) {
886  return (Page(fileInfo->fileId, pageNum));
887  }
888  }
889  // if here then we need to add a file
890  FileInfo* fileInfo;
891  if (isMetadata) {
892  fileInfo = createFileInfo(pageSize, num_pages_per_metadata_file_);
893  } else {
894  fileInfo = createFileInfo(pageSize, num_pages_per_data_file_);
895  }
896  pageNum = fileInfo->getFreePage();
897  CHECK(pageNum != -1);
898  return (Page(fileInfo->fileId, pageNum));
899 }
FileInfo * createFileInfo(const size_t pageSize, const size_t numPages)
Adds a file to the file manager repository.
Definition: FileMgr.cpp:968
std::mutex getPageMutex_
pointer to DB level metadata
Definition: FileMgr.h:419
static size_t num_pages_per_data_file_
Definition: FileMgr.h:427
PageSizeFileMMap fileIndex_
Definition: FileMgr.h:414
static size_t num_pages_per_metadata_file_
Definition: FileMgr.h:428
#define CHECK(condition)
Definition: Logger.h:291
FileInfo * getFileInfoForFileId(const int32_t fileId) const
Definition: FileMgr.h:229

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::requestFreePages ( size_t  npages,
size_t  pagesize,
std::vector< Page > &  pages,
const bool  isMetadata 
)

Obtains free pages – creates new files if necessary – of the requested size.

Given a page size and number of pages, this method updates the vector "pages" to include free pages of the requested size. These pages are immediately removed from the free list of the affected file(s). If there are not enough pages available among current files, new files are created and their pages are included in the vector.

Parameters
npagesThe number of free pages requested
pagesizeThe size of each requested page
pagesA vector containing the free pages obtained by this method

Definition at line 901 of file FileMgr.cpp.

References CHECK, createFileInfo(), File_Namespace::FileInfo::fileId, fileIndex_, getFileInfoForFileId(), File_Namespace::FileInfo::getFreePage(), getPageMutex_, num_pages_per_data_file_, and num_pages_per_metadata_file_.

904  {
905  // not used currently
906  // @todo add method to FileInfo to get more than one page
907  std::lock_guard<std::mutex> lock(getPageMutex_);
908  auto candidateFiles = fileIndex_.equal_range(pageSize);
909  size_t numPagesNeeded = numPagesRequested;
910  for (auto fileIt = candidateFiles.first; fileIt != candidateFiles.second; ++fileIt) {
911  FileInfo* fileInfo = getFileInfoForFileId(fileIt->second);
912  int32_t pageNum;
913  do {
914  pageNum = fileInfo->getFreePage();
915  if (pageNum != -1) {
916  pages.emplace_back(fileInfo->fileId, pageNum);
917  numPagesNeeded--;
918  }
919  } while (pageNum != -1 && numPagesNeeded > 0);
920  if (numPagesNeeded == 0) {
921  break;
922  }
923  }
924  while (numPagesNeeded > 0) {
925  FileInfo* fileInfo;
926  if (isMetadata) {
927  fileInfo = createFileInfo(pageSize, num_pages_per_metadata_file_);
928  } else {
929  fileInfo = createFileInfo(pageSize, num_pages_per_data_file_);
930  }
931  int32_t pageNum;
932  do {
933  pageNum = fileInfo->getFreePage();
934  if (pageNum != -1) {
935  pages.emplace_back(fileInfo->fileId, pageNum);
936  numPagesNeeded--;
937  }
938  } while (pageNum != -1 && numPagesNeeded > 0);
939  if (numPagesNeeded == 0) {
940  break;
941  }
942  }
943  CHECK(pages.size() == numPagesRequested);
944 }
FileInfo * createFileInfo(const size_t pageSize, const size_t numPages)
Adds a file to the file manager repository.
Definition: FileMgr.cpp:968
std::mutex getPageMutex_
pointer to DB level metadata
Definition: FileMgr.h:419
static size_t num_pages_per_data_file_
Definition: FileMgr.h:427
PageSizeFileMMap fileIndex_
Definition: FileMgr.h:414
static size_t num_pages_per_metadata_file_
Definition: FileMgr.h:428
#define CHECK(condition)
Definition: Logger.h:291
FileInfo * getFileInfoForFileId(const int32_t fileId) const
Definition: FileMgr.h:229

+ Here is the call graph for this function:

void File_Namespace::FileMgr::resetEpochFloor ( )
inline

Definition at line 305 of file FileMgr.h.

305 { epoch_.floor(epoch_.ceiling()); }
int32_t floor() const
Definition: Epoch.h:43
int32_t ceiling() const
Definition: Epoch.h:44
void File_Namespace::FileMgr::resumeFileCompaction ( const std::string &  status_file_name)
protected

Resumes an interrupted file compaction process. This method would normally only be called when re-initializing the file manager after a crash occurred in the middle of file compaction.

Definition at line 1224 of file FileMgr.cpp.

References CHECK, compactFiles(), COPY_PAGES_STATUS, DELETE_EMPTY_FILES_STATUS, deleteEmptyFiles(), files_rw_mutex_, getFilePath(), readOnlyCheck(), readPageMappingsFromStatusFile(), renameCompactionStatusFile(), UNREACHABLE, UPDATE_PAGE_VISIBILITY_STATUS, and updateMappedPagesVisibility().

Referenced by init().

1224  {
1225  readOnlyCheck("resume file compaction", status_file_name);
1226 
1227  if (status_file_name == COPY_PAGES_STATUS) {
1228  // Delete status file and restart data compaction process
1229  auto file_path = getFilePath(status_file_name);
1230  CHECK(boost::filesystem::exists(file_path));
1231  boost::filesystem::remove(file_path);
1232  compactFiles();
1233  } else if (status_file_name == UPDATE_PAGE_VISIBILITY_STATUS) {
1234  // Execute second and third phases of data compaction
1236  auto page_mappings = readPageMappingsFromStatusFile();
1237  updateMappedPagesVisibility(page_mappings);
1239  deleteEmptyFiles();
1240  } else if (status_file_name == DELETE_EMPTY_FILES_STATUS) {
1241  // Execute last phase of data compaction
1243  deleteEmptyFiles();
1244  } else {
1245  UNREACHABLE() << "Unexpected status file name: " << status_file_name;
1246  }
1247 }
std::vector< PageMapping > readPageMappingsFromStatusFile()
Definition: FileMgr.cpp:1547
#define UNREACHABLE()
Definition: Logger.h:338
static constexpr char const * UPDATE_PAGE_VISIBILITY_STATUS
Definition: FileMgr.h:389
std::unique_lock< T > unique_lock
boost::filesystem::path getFilePath(const std::string &file_name) const
Definition: FileMgr.cpp:1697
virtual void readOnlyCheck(const std::string &action, const std::optional< std::string > &file_name={}) const
Definition: FileMgr.cpp:1723
static constexpr char const * DELETE_EMPTY_FILES_STATUS
Definition: FileMgr.h:390
static constexpr char const * COPY_PAGES_STATUS
Definition: FileMgr.h:388
void updateMappedPagesVisibility(const std::vector< PageMapping > &page_mappings)
Definition: FileMgr.cpp:1471
#define CHECK(condition)
Definition: Logger.h:291
heavyai::shared_mutex files_rw_mutex_
Definition: FileMgr.h:421
void renameCompactionStatusFile(const char *const from_status, const char *const to_status)
Definition: FileMgr.cpp:1574

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::rollOffOldData ( const int32_t  epochCeiling,
const bool  shouldCheckpoint 
)
private

Definition at line 687 of file FileMgr.cpp.

References checkpoint(), epoch_, Epoch::floor(), freePagesBeforeEpoch(), and maxRollbackEpochs_.

Referenced by checkpoint(), and init().

687  {
688  if (maxRollbackEpochs_ >= 0) {
689  auto min_epoch = std::max(epoch_ceiling - maxRollbackEpochs_, epoch_.floor());
690  if (min_epoch > epoch_.floor()) {
691  freePagesBeforeEpoch(min_epoch);
692  epoch_.floor(min_epoch);
693  if (should_checkpoint) {
694  checkpoint();
695  }
696  }
697  }
698 }
int32_t floor() const
Definition: Epoch.h:43
void checkpoint() override
Fsyncs data files, writes out epoch and fsyncs that.
Definition: FileMgr.cpp:706
void freePagesBeforeEpoch(const int32_t min_epoch)
Definition: FileMgr.cpp:673
int32_t maxRollbackEpochs_
Definition: FileMgr.h:410

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::setDataAndMetadataFileStats ( StorageStats storage_stats) const
private

Definition at line 343 of file FileMgr.cpp.

References CHECK, File_Namespace::StorageStats::data_file_count, File_Namespace::StorageStats::epoch, File_Namespace::StorageStats::epoch_floor, epochFloor(), logger::FATAL, File_Namespace::FileMetadata::file_size, fileMgrBasePath_, files_, files_rw_mutex_, getMetadataForFile(), File_Namespace::FileMetadata::is_data_file, File_Namespace::anonymous_namespace{FileMgr.cpp}::is_metadata_file(), isFullyInitted_, lastCheckpointedEpoch(), LOG, File_Namespace::StorageStats::metadata_file_count, metadata_page_size_, File_Namespace::FileMetadata::num_pages, num_pages_per_metadata_file_, File_Namespace::FileMetadata::page_size, File_Namespace::StorageStats::total_data_file_size, File_Namespace::StorageStats::total_data_page_count, File_Namespace::StorageStats::total_free_data_page_count, File_Namespace::StorageStats::total_free_metadata_page_count, File_Namespace::StorageStats::total_metadata_file_size, and File_Namespace::StorageStats::total_metadata_page_count.

Referenced by getStorageStats().

343  {
345  if (!isFullyInitted_) {
346  CHECK(!fileMgrBasePath_.empty());
347  boost::filesystem::path path(fileMgrBasePath_);
348  if (boost::filesystem::exists(path)) {
349  if (!boost::filesystem::is_directory(path)) {
350  LOG(FATAL) << "getStorageStats: Specified path '" << fileMgrBasePath_
351  << "' for table data is not a directory.";
352  }
353 
354  storage_stats.epoch = lastCheckpointedEpoch();
355  storage_stats.epoch_floor = epochFloor();
356  boost::filesystem::directory_iterator
357  endItr; // default construction yields past-the-end
358  for (boost::filesystem::directory_iterator fileIt(path); fileIt != endItr;
359  ++fileIt) {
360  FileMetadata file_metadata = getMetadataForFile(fileIt);
361  if (file_metadata.is_data_file) {
362  if (is_metadata_file(file_metadata.file_size,
363  file_metadata.page_size,
366  storage_stats.metadata_file_count++;
367  storage_stats.total_metadata_file_size += file_metadata.file_size;
368  storage_stats.total_metadata_page_count += file_metadata.num_pages;
369  } else {
370  storage_stats.data_file_count++;
371  storage_stats.total_data_file_size += file_metadata.file_size;
372  storage_stats.total_data_page_count += file_metadata.num_pages;
373  }
374  }
375  }
376  }
377  } else {
378  storage_stats.epoch = lastCheckpointedEpoch();
379  storage_stats.epoch_floor = epochFloor();
380  storage_stats.total_free_metadata_page_count = 0;
381  storage_stats.total_free_data_page_count = 0;
382 
383  // We already initialized this table so take the faster path of walking through the
384  // FileInfo objects and getting metadata from there
385  for (const auto& file_info_entry : files_) {
386  const auto file_info = file_info_entry.second.get();
387  if (is_metadata_file(file_info->size(),
388  file_info->pageSize,
391  storage_stats.metadata_file_count++;
392  storage_stats.total_metadata_file_size +=
393  file_info->pageSize * file_info->numPages;
394  storage_stats.total_metadata_page_count += file_info->numPages;
395  storage_stats.total_free_metadata_page_count.value() +=
396  file_info->freePages.size();
397  } else {
398  storage_stats.data_file_count++;
399  storage_stats.total_data_file_size += file_info->pageSize * file_info->numPages;
400  storage_stats.total_data_page_count += file_info->numPages;
401  storage_stats.total_free_data_page_count.value() += file_info->freePages.size();
402  }
403  }
404  }
405 }
const size_t metadata_page_size_
Definition: FileMgr.h:552
#define LOG(tag)
Definition: Logger.h:285
std::string fileMgrBasePath_
Definition: FileMgr.h:411
int32_t lastCheckpointedEpoch() const
Returns value of epoch at last checkpoint.
Definition: FileMgr.h:301
std::shared_lock< T > shared_lock
static size_t num_pages_per_metadata_file_
Definition: FileMgr.h:428
FileMetadata getMetadataForFile(const boost::filesystem::directory_iterator &fileIterator) const
Definition: FileMgr.cpp:157
#define CHECK(condition)
Definition: Logger.h:291
heavyai::shared_mutex files_rw_mutex_
Definition: FileMgr.h:421
std::map< int32_t, std::unique_ptr< FileInfo > > files_
Definition: FileMgr.h:413
int32_t epochFloor() const
Definition: FileMgr.h:283
bool is_metadata_file(size_t file_size, size_t page_size, size_t metadata_page_size, size_t num_pages_per_metadata_file)
Definition: FileMgr.cpp:325

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::setEpoch ( const int32_t  newEpoch)
protected

Definition at line 1199 of file FileMgr.cpp.

References Epoch::ceiling(), describeSelf(), epoch_, Epoch::floor(), and writeAndSyncEpochToDisk().

Referenced by init().

1199  {
1200  if (newEpoch < epoch_.floor()) {
1201  std::stringstream error_message;
1202  error_message << "Cannot set epoch for " << describeSelf()
1203  << " lower than the minimum rollback epoch (" << epoch_.floor() << ").";
1204  throw std::runtime_error(error_message.str());
1205  }
1206  epoch_.ceiling(newEpoch);
1208 }
int32_t floor() const
Definition: Epoch.h:43
int32_t ceiling() const
Definition: Epoch.h:44
void writeAndSyncEpochToDisk()
Definition: FileMgr.cpp:659
virtual std::string describeSelf() const
Definition: FileMgr.cpp:700

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::setNumPagesPerDataFile ( size_t  num_pages)
static

Definition at line 1588 of file FileMgr.cpp.

References num_pages_per_data_file_.

1588  {
1589  num_pages_per_data_file_ = num_pages;
1590 }
static size_t num_pages_per_data_file_
Definition: FileMgr.h:427
void File_Namespace::FileMgr::setNumPagesPerMetadataFile ( size_t  num_pages)
static

Definition at line 1592 of file FileMgr.cpp.

References num_pages_per_metadata_file_.

1592  {
1593  num_pages_per_metadata_file_ = num_pages;
1594 }
static size_t num_pages_per_metadata_file_
Definition: FileMgr.h:428
void File_Namespace::FileMgr::sortAndCopyFilePagesForCompaction ( size_t  page_size,
std::vector< PageMapping > &  page_mappings,
std::set< Page > &  touched_pages 
)
protected

Sorts all files with the given page size in ascending order of number of free pages. Then copy over pages from files with more free pages to those with less free pages. Leave destination/copied to pages as free when copying. Record copied source and destination pages in page mapping.

Definition at line 1316 of file FileMgr.cpp.

References CHECK, copySourcePageForCompaction(), File_Namespace::Page::fileId, fileIndex_, files_, File_Namespace::FileInfo::freePages, freePages(), and gpu_enabled::sort().

Referenced by compactFiles().

1318  {
1319  std::vector<FileInfo*> sorted_file_infos;
1320  auto range = fileIndex_.equal_range(page_size);
1321  for (auto it = range.first; it != range.second; it++) {
1322  sorted_file_infos.emplace_back(files_.at(it->second).get());
1323  }
1324  if (sorted_file_infos.empty()) {
1325  return;
1326  }
1327 
1328  // Sort file infos in ascending order of free pages count i.e. from files with
1329  // the least number of free pages to those with the highest number of free pages.
1330  std::sort(sorted_file_infos.begin(),
1331  sorted_file_infos.end(),
1332  [](const FileInfo* file_1, const FileInfo* file_2) {
1333  return file_1->freePages.size() < file_2->freePages.size();
1334  });
1335 
1336  size_t destination_index = 0, source_index = sorted_file_infos.size() - 1;
1337 
1338  // For page copy destinations, skip files without free pages.
1339  while (destination_index < source_index &&
1340  sorted_file_infos[destination_index]->freePages.empty()) {
1341  destination_index++;
1342  }
1343 
1344  // For page copy sources, skip files with only free pages.
1345  while (destination_index < source_index &&
1346  sorted_file_infos[source_index]->freePages.size() ==
1347  sorted_file_infos[source_index]->numPages) {
1348  source_index--;
1349  }
1350 
1351  std::set<size_t> source_used_pages;
1352  CHECK(destination_index <= source_index);
1353 
1354  // Get the total number of free pages available for compaction
1355  int64_t total_free_pages{0};
1356  for (size_t i = destination_index; i <= source_index; i++) {
1357  total_free_pages += sorted_file_infos[i]->numFreePages();
1358  }
1359 
1360  while (destination_index < source_index) {
1361  if (source_used_pages.empty()) {
1362  // Populate source_used_pages with only used pages in the source file.
1363  auto source_file_info = sorted_file_infos[source_index];
1364  auto& free_pages = source_file_info->freePages;
1365  for (size_t page_num = 0; page_num < source_file_info->numPages; page_num++) {
1366  if (free_pages.find(page_num) == free_pages.end()) {
1367  source_used_pages.emplace(page_num);
1368  }
1369  }
1370 
1371  // Free pages of current source file will not be copy destinations
1372  total_free_pages -= source_file_info->numFreePages();
1373  }
1374 
1375  // Exit early if there are not enough free pages to empty the next file
1376  if (total_free_pages - static_cast<int64_t>(source_used_pages.size()) < 0) {
1377  return;
1378  }
1379 
1380  // Copy pages from source files to destination files
1381  auto dest_file_info = sorted_file_infos[destination_index];
1382  while (!source_used_pages.empty() && !dest_file_info->freePages.empty()) {
1383  // Get next page to copy
1384  size_t source_page_num = *source_used_pages.begin();
1385  source_used_pages.erase(source_page_num);
1386 
1387  Page source_page{sorted_file_infos[source_index]->fileId, source_page_num};
1388  copySourcePageForCompaction(source_page,
1389  sorted_file_infos[destination_index],
1390  page_mappings,
1391  touched_pages);
1392  total_free_pages--;
1393  }
1394 
1395  if (source_used_pages.empty()) {
1396  source_index--;
1397  }
1398 
1399  if (dest_file_info->freePages.empty()) {
1400  destination_index++;
1401  }
1402  }
1403 }
void copySourcePageForCompaction(const Page &source_page, FileInfo *destination_file_info, std::vector< PageMapping > &page_mappings, std::set< Page > &touched_pages)
Definition: FileMgr.cpp:1411
DEVICE void sort(ARGS &&...args)
Definition: gpu_enabled.h:105
PageSizeFileMMap fileIndex_
Definition: FileMgr.h:414
#define CHECK(condition)
Definition: Logger.h:291
std::map< int32_t, std::unique_ptr< FileInfo > > files_
Definition: FileMgr.h:413

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::syncFilesToDisk ( )
protected

Definition at line 1596 of file FileMgr.cpp.

References CHECK, files_, and files_rw_mutex_.

Referenced by checkpoint().

1596  {
1598  for (const auto& file_info_entry : files_) {
1599  int32_t status = file_info_entry.second->syncToDisk();
1600  CHECK(status == 0) << "Could not sync file to disk";
1601  }
1602 }
std::shared_lock< T > shared_lock
#define CHECK(condition)
Definition: Logger.h:291
heavyai::shared_mutex files_rw_mutex_
Definition: FileMgr.h:421
std::map< int32_t, std::unique_ptr< FileInfo > > files_
Definition: FileMgr.h:413

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::updateMappedPagesVisibility ( const std::vector< PageMapping > &  page_mappings)
protected

Goes through the given page mapping and marks source/copied from pages as free while marking destination/copied to pages as used (by setting the header size).

Definition at line 1471 of file FileMgr.cpp.

References CHECK_GT, logger::FATAL, files_, getFileInfoForFileId(), and LOG.

Referenced by compactFiles(), and resumeFileCompaction().

1471  {
1472  for (const auto& page_mapping : page_mappings) {
1473  auto destination_file = getFileInfoForFileId(page_mapping.destination_file_id);
1474 
1475  // Set destination page header size
1476  auto header_size = page_mapping.source_page_header_size;
1477  CHECK_GT(header_size, 0);
1478  destination_file->write(
1479  page_mapping.destination_page_num * destination_file->pageSize,
1480  sizeof(PageHeaderSizeType),
1481  reinterpret_cast<int8_t*>(&header_size));
1482  auto source_file = getFileInfoForFileId(page_mapping.source_file_id);
1483 
1484  // Free source page
1485  PageHeaderSizeType free_page_header_size{0};
1486  source_file->write(page_mapping.source_page_num * source_file->pageSize,
1487  sizeof(PageHeaderSizeType),
1488  reinterpret_cast<int8_t*>(&free_page_header_size));
1489  source_file->freePageDeferred(page_mapping.source_page_num);
1490  }
1491 
1492  for (const auto& file_info_entry : files_) {
1493  int32_t status = file_info_entry.second->syncToDisk();
1494  if (status != 0) {
1495  LOG(FATAL) << "Could not sync file to disk";
1496  }
1497  }
1498 }
#define LOG(tag)
Definition: Logger.h:285
#define CHECK_GT(x, y)
Definition: Logger.h:305
int32_t PageHeaderSizeType
Definition: FileMgr.h:134
FileInfo * getFileInfoForFileId(const int32_t fileId) const
Definition: FileMgr.h:229
std::map< int32_t, std::unique_ptr< FileInfo > > files_
Definition: FileMgr.h:413

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool File_Namespace::FileMgr::updatePageIfDeleted ( FileInfo file_info,
ChunkKey chunk_key,
int32_t  contingent,
int32_t  page_epoch,
int32_t  page_num 
)
virtual

deletes or recovers a page based on last checkpointed epoch.

Reimplemented in File_Namespace::CachingFileMgr.

Definition at line 1637 of file FileMgr.cpp.

References CHUNK_KEY_DB_IDX, CHUNK_KEY_TABLE_IDX, epoch(), File_Namespace::FileInfo::freePageImmediate(), g_multi_instance, g_read_only, get_fileMgrKey(), File_Namespace::is_page_deleted_with_checkpoint(), File_Namespace::is_page_deleted_without_checkpoint(), and File_Namespace::FileInfo::recoverPage().

Referenced by File_Namespace::FileInfo::openExistingFile().

1641  {
1642  // If the parent FileMgr has a fileMgrKey, then all keys are locked to one table and
1643  // can be set from the manager.
1644  auto [db_id, tb_id] = get_fileMgrKey();
1645  chunk_key[CHUNK_KEY_DB_IDX] = db_id;
1646  chunk_key[CHUNK_KEY_TABLE_IDX] = tb_id;
1647 
1648  auto table_epoch = epoch(db_id, tb_id);
1649 
1650  if (is_page_deleted_with_checkpoint(table_epoch, page_epoch, contingent)) {
1651  if (!g_read_only && !g_multi_instance) {
1652  // Read-only mode can find pages like this if the server was previously run in
1653  // write-mode but is not allowed to free them.
1654  file_info->freePageImmediate(page_num);
1655  }
1656  return true;
1657  }
1658 
1659  // Recover page if it was deleted but not checkpointed.
1660  if (is_page_deleted_without_checkpoint(table_epoch, page_epoch, contingent)) {
1661  if (!g_read_only && !g_multi_instance) {
1662  // Read-only mode can find pages like this if the server was previously run in
1663  // write-mode but is not allowed to free them.
1664  file_info->recoverPage(chunk_key, page_num);
1665  }
1666  }
1667  return false;
1668 }
bool g_multi_instance
Definition: heavyai_locks.h:22
bool is_page_deleted_without_checkpoint(int32_t table_epoch, int32_t page_epoch, int32_t contingent)
Definition: FileInfo.cpp:271
#define CHUNK_KEY_DB_IDX
Definition: types.h:38
#define CHUNK_KEY_TABLE_IDX
Definition: types.h:39
const TablePair get_fileMgrKey() const
Definition: FileMgr.h:341
bool g_read_only
Definition: heavyai_locks.h:21
bool is_page_deleted_with_checkpoint(int32_t table_epoch, int32_t page_epoch, int32_t contingent)
Definition: FileInfo.cpp:259
int32_t epoch() const
Definition: FileMgr.h:530

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::writeAndSyncEpochToDisk ( )
protected

Definition at line 659 of file FileMgr.cpp.

References Epoch::byte_size(), CHECK, epoch_, epochFile_, epochIsCheckpointed_, heavyai::fsync(), Epoch::storage_ptr(), and writeFile().

Referenced by checkpoint(), createEpochFile(), migrateEpochFileV0(), and setEpoch().

659  {
660  CHECK(epochFile_);
662  int32_t status = fflush(epochFile_);
663  CHECK(status == 0) << "Could not flush epoch file to disk";
664 #ifdef __APPLE__
665  status = fcntl(fileno(epochFile_), 51);
666 #else
667  status = heavyai::fsync(fileno(epochFile_));
668 #endif
669  CHECK(status == 0) << "Could not sync epoch file to disk";
670  epochIsCheckpointed_ = true;
671 }
int8_t * storage_ptr()
Definition: Epoch.h:61
static size_t byte_size()
Definition: Epoch.h:63
int fsync(int fd)
Definition: heavyai_fs.cpp:62
#define CHECK(condition)
Definition: Logger.h:291
size_t writeFile(FILE *f, const size_t offset, const size_t size, const int8_t *buf) const
Definition: FileMgr.cpp:1715

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::writeAndSyncVersionToDisk ( const std::string &  versionFileName,
const int32_t  version 
)
protected

Definition at line 1087 of file FileMgr.cpp.

References File_Namespace::close(), File_Namespace::create(), epochFile_, logger::FATAL, fileMgrBasePath_, heavyai::fsync(), logger::INFO, LOG, File_Namespace::open(), readOnlyCheck(), readVersionFromDisk(), and File_Namespace::write().

Referenced by createOrMigrateTopLevelMetadata(), init(), migrateEpochFileV0(), migrateLegacyFilesV1(), and migrateToLatestFileMgrVersion().

1088  {
1089  const std::string versionFilePath(fileMgrBasePath_ + "/" + versionFileName);
1090  readOnlyCheck("write file", versionFilePath);
1091  FILE* versionFile;
1092  if (boost::filesystem::exists(versionFilePath)) {
1093  int32_t oldVersion = readVersionFromDisk(versionFileName);
1094  LOG(INFO) << "Storage version file `" << versionFilePath
1095  << "` already exists, its current version is " << oldVersion;
1096  versionFile = open(versionFilePath);
1097  } else {
1098  versionFile = create(versionFilePath, sizeof(int32_t));
1099  }
1100  write(versionFile, 0, sizeof(int32_t), (int8_t*)&version);
1101  int32_t status = fflush(versionFile);
1102  if (status != 0) {
1103  LOG(FATAL) << "Could not flush version file " << versionFilePath << " to disk";
1104  }
1105 #ifdef __APPLE__
1106  status = fcntl(fileno(epochFile_), 51);
1107 #else
1108  status = heavyai::fsync(fileno(versionFile));
1109 #endif
1110  if (status != 0) {
1111  LOG(FATAL) << "Could not sync version file " << versionFilePath << " to disk";
1112  }
1113  close(versionFile);
1114 }
int32_t readVersionFromDisk(const std::string &versionFileName) const
Definition: FileMgr.cpp:1073
#define LOG(tag)
Definition: Logger.h:285
std::pair< FILE *, std::string > create(const std::string &basePath, const int fileId, const size_t pageSize, const size_t numPages)
Definition: File.cpp:55
std::string fileMgrBasePath_
Definition: FileMgr.h:411
size_t write(FILE *f, const size_t offset, const size_t size, const int8_t *buf)
Writes the specified number of bytes to the offset position in file f from buf.
Definition: File.cpp:143
string version
Definition: setup.in.py:73
virtual void readOnlyCheck(const std::string &action, const std::optional< std::string > &file_name={}) const
Definition: FileMgr.cpp:1723
FILE * open(int file_id)
Opens the file with the given id; fatal crash on error.
Definition: File.cpp:100
int fsync(int fd)
Definition: heavyai_fs.cpp:62
void close(FILE *f)
Closes the file pointed to by the FILE pointer.
Definition: File.cpp:114

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::writeDirtyBuffers ( )
private

Definition at line 1682 of file FileMgr.cpp.

References chunkIndex_, chunkIndexMutex_, and epoch().

Referenced by checkpoint().

1682  {
1684  for (auto [key, buf] : chunkIndex_) {
1685  if (buf->isDirty()) {
1686  buf->writeMetadata(epoch());
1687  buf->clearDirtyBits();
1688  }
1689  }
1690 }
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:330
std::unique_lock< T > unique_lock
heavyai::shared_mutex chunkIndexMutex_
Definition: FileMgr.h:420
int32_t epoch() const
Definition: FileMgr.h:530

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t File_Namespace::FileMgr::writeFile ( FILE *  f,
const size_t  offset,
const size_t  size,
const int8_t *  buf 
) const

Definition at line 1715 of file FileMgr.cpp.

References readOnlyCheck(), and File_Namespace::write().

Referenced by File_Namespace::FileInfo::write(), and writeAndSyncEpochToDisk().

1718  {
1719  readOnlyCheck("write file");
1720  return write(f, offset, size, buf);
1721 }
size_t write(FILE *f, const size_t offset, const size_t size, const int8_t *buf)
Writes the specified number of bytes to the offset position in file f from buf.
Definition: File.cpp:143
virtual void readOnlyCheck(const std::string &action, const std::optional< std::string > &file_name={}) const
Definition: FileMgr.cpp:1723
torch::Tensor f(torch::Tensor x, torch::Tensor W_target, torch::Tensor b_target)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::writePageMappingsToStatusFile ( const std::vector< PageMapping > &  page_mappings)

Serializes a page mapping vector to expected status file. Page mapping vector is serialized in the following format: [{page mapping vector size}, {page mapping vector data bytes ...}]

Definition at line 1528 of file FileMgr.cpp.

References CHECK, COPY_PAGES_STATUS, getFilePath(), and readOnlyCheck().

Referenced by compactFiles().

1529  {
1530  auto file_path = getFilePath(COPY_PAGES_STATUS);
1531 
1532  readOnlyCheck("write file", file_path.string());
1533 
1534  CHECK(boost::filesystem::exists(file_path));
1535  CHECK(boost::filesystem::is_empty(file_path));
1536  std::ofstream status_file{file_path.string(), std::ios::out | std::ios::binary};
1537  int64_t page_mappings_count = page_mappings.size();
1538  status_file.write(reinterpret_cast<const char*>(&page_mappings_count), sizeof(int64_t));
1539  status_file.write(reinterpret_cast<const char*>(page_mappings.data()),
1540  page_mappings_count * sizeof(PageMapping));
1541  status_file.close();
1542 }
boost::filesystem::path getFilePath(const std::string &file_name) const
Definition: FileMgr.cpp:1697
virtual void readOnlyCheck(const std::string &action, const std::optional< std::string > &file_name={}) const
Definition: FileMgr.cpp:1723
static constexpr char const * COPY_PAGES_STATUS
Definition: FileMgr.h:388
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Friends And Related Function Documentation

friend class GlobalFileMgr
friend

Definition at line 162 of file FileMgr.h.

Member Data Documentation

constexpr char const* File_Namespace::FileMgr::COPY_PAGES_STATUS {"pending_data_compaction_0"}
static
constexpr char File_Namespace::FileMgr::DB_META_FILENAME[] = "dbmeta"
static

Definition at line 401 of file FileMgr.h.

Referenced by createOrMigrateTopLevelMetadata().

FILE* File_Namespace::FileMgr::DBMetaFile_ = nullptr
protected

Definition at line 418 of file FileMgr.h.

Referenced by closePhysicalUnlocked(), and ~FileMgr().

constexpr size_t File_Namespace::FileMgr::DEFAULT_NUM_PAGES_PER_DATA_FILE {256}
static

Definition at line 384 of file FileMgr.h.

constexpr size_t File_Namespace::FileMgr::DEFAULT_NUM_PAGES_PER_METADATA_FILE {4096}
static

Definition at line 385 of file FileMgr.h.

Referenced by File_Namespace::CachingFileMgr::getMinimumSize().

constexpr char const* File_Namespace::FileMgr::DELETE_EMPTY_FILES_STATUS {"pending_data_compaction_2"}
static
Epoch File_Namespace::FileMgr::epoch_
private
constexpr char File_Namespace::FileMgr::EPOCH_FILENAME[] = "epoch_metadata"
static

Definition at line 400 of file FileMgr.h.

Referenced by coreInit(), init(), and migrateEpochFileV0().

FILE* File_Namespace::FileMgr::epochFile_ = nullptr
private
bool File_Namespace::FileMgr::epochIsCheckpointed_ = true
private

Definition at line 543 of file FileMgr.h.

Referenced by writeAndSyncEpochToDisk().

constexpr char File_Namespace::FileMgr::FILE_MGR_VERSION_FILENAME[] = "filemgr_version"
static
PageSizeFileMMap File_Namespace::FileMgr::fileIndex_
protected
TablePair File_Namespace::FileMgr::fileMgrKey_
private

Global FileMgr.

Definition at line 540 of file FileMgr.h.

Referenced by coreInit(), describeSelf(), and FileMgr().

int32_t File_Namespace::FileMgr::fileMgrVersion_
protected

the index of the next file id

Definition at line 417 of file FileMgr.h.

Referenced by init(), and migrateToLatestFileMgrVersion().

std::map<int32_t, std::unique_ptr<FileInfo> > File_Namespace::FileMgr::files_
protected
std::vector<std::pair<FileInfo*, int32_t> > File_Namespace::FileMgr::free_pages_
protected

Definition at line 424 of file FileMgr.h.

Referenced by free_page(), and freePages().

std::mutex File_Namespace::FileMgr::getPageMutex_
protected

pointer to DB level metadata

Definition at line 419 of file FileMgr.h.

Referenced by requestFreePage(), and requestFreePages().

GlobalFileMgr* File_Namespace::FileMgr::gfm_
private

Definition at line 539 of file FileMgr.h.

Referenced by coreInit(), createOrMigrateTopLevelMetadata(), FileMgr(), getDBConvert(), and init().

constexpr int32_t File_Namespace::FileMgr::INVALID_VERSION = -1
static
bool File_Namespace::FileMgr::isFullyInitted_ {false}
protected
constexpr int32_t File_Namespace::FileMgr::LATEST_FILE_MGR_VERSION = 2
static

Definition at line 404 of file FileMgr.h.

Referenced by init(), and migrateToLatestFileMgrVersion().

constexpr char File_Namespace::FileMgr::LEGACY_EPOCH_FILENAME[] = "epoch"
static

Definition at line 399 of file FileMgr.h.

Referenced by migrateEpochFileV0().

int32_t File_Namespace::FileMgr::maxRollbackEpochs_
protected

Definition at line 410 of file FileMgr.h.

Referenced by File_Namespace::CachingFileMgr::CachingFileMgr(), and rollOffOldData().

const size_t File_Namespace::FileMgr::metadata_page_size_
protected
heavyai::shared_mutex File_Namespace::FileMgr::mutex_free_page_
mutableprotected

Definition at line 423 of file FileMgr.h.

Referenced by free_page(), and freePages().

unsigned File_Namespace::FileMgr::nextFileId_
protected

number of threads used when loading data

Definition at line 416 of file FileMgr.h.

Referenced by File_Namespace::CachingFileMgr::CachingFileMgr(), createFileInfo(), File_Namespace::CachingFileMgr::init(), and init().

size_t File_Namespace::FileMgr::num_pages_per_data_file_ {DEFAULT_NUM_PAGES_PER_DATA_FILE}
staticprotected
size_t File_Namespace::FileMgr::num_pages_per_metadata_file_ {DEFAULT_NUM_PAGES_PER_METADATA_FILE}
staticprotected
size_t File_Namespace::FileMgr::num_reader_threads_
protected

Maps page sizes to FileInfo objects.

Definition at line 415 of file FileMgr.h.

Referenced by initializeNumThreads().

constexpr char const* File_Namespace::FileMgr::UPDATE_PAGE_VISIBILITY_STATUS {"pending_data_compaction_1"}
static

The documentation for this class was generated from the following files: