OmniSciDB  cde582ebc3
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
File_Namespace::FileMgr Class Reference

#include <FileMgr.h>

+ Inheritance diagram for File_Namespace::FileMgr:
+ Collaboration diagram for File_Namespace::FileMgr:

Public Member Functions

 FileMgr (const int32_t deviceId, GlobalFileMgr *gfm, const TablePair fileMgrKey, const int32_t max_rollback_epochs=-1, const size_t num_reader_threads=0, const int32_t epoch=-1, const size_t defaultPageSize=DEFAULT_PAGE_SIZE)
 Constructor. More...
 
 FileMgr (const int32_t deviceId, GlobalFileMgr *gfm, const TablePair fileMgrKey, const size_t defaultPageSize, const bool runCoreInit)
 
 FileMgr (GlobalFileMgr *gfm, const size_t defaultPageSize, std::string basePath)
 
 ~FileMgr () override
 Destructor. More...
 
StorageStats getStorageStats () const
 
FileBuffercreateBuffer (const ChunkKey &key, size_t pageSize=0, const size_t numBytes=0) override
 Creates a chunk with the specified key and page size. More...
 
bool isBufferOnDevice (const ChunkKey &key) override
 
void deleteBuffer (const ChunkKey &key, const bool purge=true) override
 Deletes the chunk with the specified key. More...
 
void deleteBuffersWithPrefix (const ChunkKey &keyPrefix, const bool purge=true) override
 
FileBuffergetBuffer (const ChunkKey &key, const size_t numBytes=0) override
 Returns the a pointer to the chunk with the specified key. More...
 
void fetchBuffer (const ChunkKey &key, AbstractBuffer *destBuffer, const size_t numBytes) override
 
FileBufferputBuffer (const ChunkKey &key, AbstractBuffer *d, const size_t numBytes=0) override
 Puts the contents of d into the Chunk with the given key. More...
 
AbstractBufferalloc (const size_t numBytes) override
 
void free (AbstractBuffer *buffer) override
 
virtual Page requestFreePage (size_t pagesize, const bool isMetadata)
 
MgrType getMgrType () override
 
std::string getStringMgrType () override
 
std::string printSlabs () override
 
size_t getMaxSize () override
 
size_t getInUseSize () override
 
size_t getAllocated () override
 
bool isAllocationCapped () override
 
FileInfogetFileInfoForFileId (const int32_t fileId) const
 
FileMetadata getMetadataForFile (const boost::filesystem::directory_iterator &fileIterator) const
 
void init (const size_t num_reader_threads, const int32_t epochOverride)
 
void init (const std::string &dataPathToConvertFrom, const int32_t epochOverride)
 
void copyPage (Page &srcPage, FileMgr *destFileMgr, Page &destPage, const size_t reservedHeaderSize, const size_t numBytes, const size_t offset)
 
void requestFreePages (size_t npages, size_t pagesize, std::vector< Page > &pages, const bool isMetadata)
 Obtains free pages – creates new files if necessary – of the requested size. More...
 
void getChunkMetadataVecForKeyPrefix (ChunkMetadataVector &chunkMetadataVec, const ChunkKey &keyPrefix) override
 
bool hasChunkMetadataForKeyPrefix (const ChunkKey &keyPrefix)
 
void checkpoint () override
 Fsyncs data files, writes out epoch and fsyncs that. More...
 
void checkpoint (const int32_t db_id, const int32_t tb_id) override
 
virtual int32_t epoch (int32_t db_id, int32_t tb_id) const
 Returns current value of epoch - should be one greater than recorded at last checkpoint. Because FileMgr only contains buffers from one table we can just return the FileMgr's epoch instead of finding a table-specific epoch. More...
 
int32_t epochFloor () const
 
int32_t incrementEpoch ()
 
int32_t lastCheckpointedEpoch () const
 Returns value of epoch at last checkpoint. More...
 
void resetEpochFloor ()
 
int32_t maxRollbackEpochs ()
 Returns value max_rollback_epochs. More...
 
size_t getNumReaderThreads ()
 Returns number of threads defined by parameter num-reader-threads which should be used during initial load and consequent read of data. More...
 
FILE * getFileForFileId (const int32_t fileId)
 Returns FILE pointer associated with requested fileId. More...
 
size_t getNumChunks () override
 
size_t getNumUsedMetadataPagesForChunkKey (const ChunkKey &chunkKey) const
 
int32_t getDBVersion () const
 Index for looking up chunks. More...
 
bool getDBConvert () const
 
void createTopLevelMetadata ()
 
std::string getFileMgrBasePath () const
 
virtual void closeRemovePhysical ()
 
void removeTableRelatedDS (const int32_t db_id, const int32_t table_id) override
 
virtual void free_page (std::pair< FileInfo *, int32_t > &&page)
 
virtual bool hasFileMgrKey () const
 
const TablePair get_fileMgrKey () const
 
boost::filesystem::path getFilePath (const std::string &file_name) const
 
void writePageMappingsToStatusFile (const std::vector< PageMapping > &page_mappings)
 
void renameCompactionStatusFile (const char *const from_status, const char *const to_status)
 
void compactFiles ()
 
virtual bool updatePageIfDeleted (FileInfo *file_info, ChunkKey &chunk_key, int32_t contingent, int32_t page_epoch, int32_t page_num)
 deletes or recovers a page based on last checkpointed epoch. More...
 
virtual bool failOnReadError () const
 True if a read error should cause a fatal error. More...
 
virtual std::string describeSelf () const
 

Static Public Member Functions

static void setNumPagesPerDataFile (size_t num_pages)
 
static void setNumPagesPerMetadataFile (size_t num_pages)
 
static void renameAndSymlinkLegacyFiles (const std::string &table_data_dir)
 

Public Attributes

ChunkKeyToChunkMap chunkIndex_
 

Static Public Attributes

static constexpr size_t DEFAULT_NUM_PAGES_PER_DATA_FILE {256}
 
static constexpr size_t DEFAULT_NUM_PAGES_PER_METADATA_FILE {4096}
 
static constexpr char const * COPY_PAGES_STATUS {"pending_data_compaction_0"}
 
static constexpr char const * UPDATE_PAGE_VISIBILITY_STATUS {"pending_data_compaction_1"}
 
static constexpr char const * DELETE_EMPTY_FILES_STATUS {"pending_data_compaction_2"}
 
static constexpr char LEGACY_EPOCH_FILENAME [] = "epoch"
 
static constexpr char EPOCH_FILENAME [] = "epoch_metadata"
 
static constexpr char DB_META_FILENAME [] = "dbmeta"
 
static constexpr char FILE_MGR_VERSION_FILENAME [] = "filemgr_version"
 
static constexpr int32_t INVALID_VERSION = -1
 

Protected Member Functions

 FileMgr ()
 
FileInfocreateFile (const size_t pageSize, const size_t numPages)
 Adds a file to the file manager repository. More...
 
FileInfoopenExistingFile (const std::string &path, const int32_t fileId, const size_t pageSize, const size_t numPages, std::vector< HeaderInfo > &headerVec)
 
void createEpochFile (const std::string &epochFileName)
 
int32_t openAndReadLegacyEpochFile (const std::string &epochFileName)
 
void openAndReadEpochFile (const std::string &epochFileName)
 
void writeAndSyncEpochToDisk ()
 
void setEpoch (const int32_t newEpoch)
 
int32_t readVersionFromDisk (const std::string &versionFileName) const
 
void writeAndSyncVersionToDisk (const std::string &versionFileName, const int32_t version)
 
void processFileFutures (std::vector< std::future< std::vector< HeaderInfo >>> &file_futures, std::vector< HeaderInfo > &headerVec)
 
virtual FileBuffercreateBufferUnlocked (const ChunkKey &key, size_t pageSize=0, const size_t numBytes=0)
 
virtual FileBuffercreateBufferFromHeaders (const ChunkKey &key, const std::vector< HeaderInfo >::const_iterator &headerStartIt, const std::vector< HeaderInfo >::const_iterator &headerEndIt)
 
void migrateToLatestFileMgrVersion ()
 
void migrateEpochFileV0 ()
 
void migrateLegacyFilesV1 ()
 
OpenFilesResult openFiles ()
 
void clearFileInfos ()
 
void copySourcePageForCompaction (const Page &source_page, FileInfo *destination_file_info, std::vector< PageMapping > &page_mappings, std::set< Page > &touched_pages)
 
int32_t copyPageWithoutHeaderSize (const Page &source_page, const Page &destination_page)
 
void sortAndCopyFilePagesForCompaction (size_t page_size, std::vector< PageMapping > &page_mappings, std::set< Page > &touched_pages)
 
void updateMappedPagesVisibility (const std::vector< PageMapping > &page_mappings)
 
void deleteEmptyFiles ()
 
void resumeFileCompaction (const std::string &status_file_name)
 
std::vector< PageMappingreadPageMappingsFromStatusFile ()
 
 FileMgr (const int epoch)
 
void closePhysicalUnlocked ()
 
void syncFilesToDisk ()
 
void freePages ()
 
void initializeNumThreads (size_t num_reader_threads=0)
 
virtual FileBufferallocateBuffer (const size_t page_size, const ChunkKey &key, const size_t num_bytes=0)
 
virtual FileBufferallocateBuffer (const ChunkKey &key, const std::vector< HeaderInfo >::const_iterator &headerStartIt, const std::vector< HeaderInfo >::const_iterator &headerEndIt)
 
virtual
ChunkKeyToChunkMap::iterator 
deleteBufferUnlocked (const ChunkKeyToChunkMap::iterator chunk_it, const bool purge=true)
 
virtual FileBuffergetBufferUnlocked (const ChunkKey &key, const size_t numBytes=0) const
 

Protected Attributes

int32_t maxRollbackEpochs_
 
std::string fileMgrBasePath_
 
std::map< int32_t, FileInfo * > files_
 
PageSizeFileMMap fileIndex_
 A map of files accessible via a file identifier. More...
 
size_t num_reader_threads_
 Maps page sizes to FileInfo objects. More...
 
size_t defaultPageSize_
 number of threads used when loading data More...
 
unsigned nextFileId_
 
int32_t db_version_
 the index of the next file id More...
 
int32_t fileMgrVersion_
 
const int32_t latestFileMgrVersion_ {2}
 
FILE * DBMetaFile_ = nullptr
 
std::mutex getPageMutex_
 pointer to DB level metadata More...
 
heavyai::shared_mutex chunkIndexMutex_
 
heavyai::shared_mutex files_rw_mutex_
 
heavyai::shared_mutex mutex_free_page_
 
std::vector< std::pair
< FileInfo *, int32_t > > 
free_pages_
 
bool isFullyInitted_ {false}
 

Static Protected Attributes

static size_t num_pages_per_data_file_ {DEFAULT_NUM_PAGES_PER_DATA_FILE}
 
static size_t num_pages_per_metadata_file_ {DEFAULT_NUM_PAGES_PER_METADATA_FILE}
 

Private Member Functions

void rollOffOldData (const int32_t epochCeiling, const bool shouldCheckpoint)
 
void freePagesBeforeEpoch (const int32_t min_epoch)
 
void freePagesBeforeEpochUnlocked (const int32_t min_epoch, const ChunkKeyToChunkMap::iterator lower_bound, const ChunkKeyToChunkMap::iterator upper_bound)
 
FileBuffergetOrCreateBuffer (const ChunkKey &key)
 
bool coreInit ()
 Determines file path, and if exists, runs file migration and opens and reads epoch file. More...
 
int32_t epoch () const
 
void writeDirtyBuffers ()
 
void setDataAndMetadataFileStats (StorageStats &storage_stats) const
 
uint32_t getFragmentCount () const
 

Private Attributes

GlobalFileMgrgfm_
 
TablePair fileMgrKey_
 Global FileMgr. More...
 
Epoch epoch_
 
bool epochIsCheckpointed_ = true
 
FILE * epochFile_ = nullptr
 

Friends

class GlobalFileMgr
 

Detailed Description

Definition at line 154 of file FileMgr.h.

Constructor & Destructor Documentation

File_Namespace::FileMgr::FileMgr ( const int32_t  deviceId,
GlobalFileMgr gfm,
const TablePair  fileMgrKey,
const int32_t  max_rollback_epochs = -1,
const size_t  num_reader_threads = 0,
const int32_t  epoch = -1,
const size_t  defaultPageSize = DEFAULT_PAGE_SIZE 
)

Constructor.

Definition at line 47 of file FileMgr.cpp.

References init().

54  : AbstractBufferMgr(deviceId)
56  , defaultPageSize_(defaultPageSize)
57  , nextFileId_(0)
58  , gfm_(gfm)
59  , fileMgrKey_(fileMgrKey) {
60  init(num_reader_threads, epoch);
61 }
TablePair fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:524
void init(const size_t num_reader_threads, const int32_t epochOverride)
Definition: FileMgr.cpp:249
GlobalFileMgr * gfm_
Definition: FileMgr.h:523
size_t defaultPageSize_
number of threads used when loading data
Definition: FileMgr.h:402
int32_t maxRollbackEpochs_
Definition: FileMgr.h:395
int32_t epoch() const
Definition: FileMgr.h:517
int32_t maxRollbackEpochs()
Returns value max_rollback_epochs.
Definition: FileMgr.h:308

+ Here is the call graph for this function:

File_Namespace::FileMgr::FileMgr ( const int32_t  deviceId,
GlobalFileMgr gfm,
const TablePair  fileMgrKey,
const size_t  defaultPageSize,
const bool  runCoreInit 
)

Definition at line 64 of file FileMgr.cpp.

References coreInit(), epochFile_, fileMgrBasePath_, fileMgrKey_, files_, File_Namespace::GlobalFileMgr::getBasePath(), gfm_, and to_string().

69  : AbstractBufferMgr(deviceId)
70  , maxRollbackEpochs_(-1)
71  , defaultPageSize_(defaultPageSize)
72  , nextFileId_(0)
73  , gfm_(gfm)
74  , fileMgrKey_(fileMgrKey) {
75  const std::string fileMgrDirPrefix("table");
76  const std::string FileMgrDirDelim("_");
77  fileMgrBasePath_ = (gfm_->getBasePath() + fileMgrDirPrefix + FileMgrDirDelim +
78  std::to_string(fileMgrKey_.first) + // db_id
79  FileMgrDirDelim + std::to_string(fileMgrKey_.second)); // tb_id
80  epochFile_ = nullptr;
81  files_.clear();
82  if (runCoreInit) {
83  coreInit();
84  }
85 }
TablePair fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:524
std::string getBasePath() const
GlobalFileMgr * gfm_
Definition: FileMgr.h:523
std::string fileMgrBasePath_
Definition: FileMgr.h:396
std::string to_string(char const *&&v)
size_t defaultPageSize_
number of threads used when loading data
Definition: FileMgr.h:402
int32_t maxRollbackEpochs_
Definition: FileMgr.h:395
std::map< int32_t, FileInfo * > files_
Definition: FileMgr.h:399
bool coreInit()
Determines file path, and if exists, runs file migration and opens and reads epoch file...
Definition: FileMgr.cpp:126

+ Here is the call graph for this function:

File_Namespace::FileMgr::FileMgr ( GlobalFileMgr gfm,
const size_t  defaultPageSize,
std::string  basePath 
)

Definition at line 87 of file FileMgr.cpp.

References init().

88  : AbstractBufferMgr(0)
89  , maxRollbackEpochs_(-1)
90  , fileMgrBasePath_(basePath)
91  , defaultPageSize_(defaultPageSize)
92  , nextFileId_(0)
93  , gfm_(gfm)
94  , fileMgrKey_(0, 0) {
95  init(basePath, -1);
96 }
TablePair fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:524
void init(const size_t num_reader_threads, const int32_t epochOverride)
Definition: FileMgr.cpp:249
GlobalFileMgr * gfm_
Definition: FileMgr.h:523
std::string fileMgrBasePath_
Definition: FileMgr.h:396
size_t defaultPageSize_
number of threads used when loading data
Definition: FileMgr.h:402
int32_t maxRollbackEpochs_
Definition: FileMgr.h:395

+ Here is the call graph for this function:

File_Namespace::FileMgr::~FileMgr ( )
override

Destructor.

Definition at line 106 of file FileMgr.cpp.

References chunkIndex_, File_Namespace::close(), DBMetaFile_, epochFile_, and files_.

106  {
107  // free memory used by FileInfo objects
108  for (auto chunkIt = chunkIndex_.begin(); chunkIt != chunkIndex_.end(); ++chunkIt) {
109  delete chunkIt->second;
110  }
111  for (auto file_info_entry : files_) {
112  delete file_info_entry.second;
113  }
114 
115  if (epochFile_) {
116  close(epochFile_);
117  epochFile_ = nullptr;
118  }
119 
120  if (DBMetaFile_) {
122  DBMetaFile_ = nullptr;
123  }
124 }
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:328
std::map< int32_t, FileInfo * > files_
Definition: FileMgr.h:399
void close(FILE *f)
Closes the file pointed to by the FILE pointer.
Definition: File.cpp:128

+ Here is the call graph for this function:

File_Namespace::FileMgr::FileMgr ( )
protected

Definition at line 104 of file FileMgr.cpp.

104 : AbstractBufferMgr(0) {}
File_Namespace::FileMgr::FileMgr ( const int  epoch)
protected

Definition at line 99 of file FileMgr.cpp.

References Epoch::ceiling(), and epoch_.

99  : AbstractBufferMgr(-1) {
101 }
int32_t ceiling() const
Definition: Epoch.h:44
int32_t epoch() const
Definition: FileMgr.h:517

+ Here is the call graph for this function:

Member Function Documentation

AbstractBuffer * File_Namespace::FileMgr::alloc ( const size_t  numBytes = 0)
override

Definition at line 857 of file FileMgr.cpp.

References logger::FATAL, and LOG.

857  {
858  LOG(FATAL) << "Operation not supported";
859  return nullptr; // satisfy return-type warning
860 }
#define LOG(tag)
Definition: Logger.h:216
FileBuffer * File_Namespace::FileMgr::allocateBuffer ( const size_t  page_size,
const ChunkKey key,
const size_t  num_bytes = 0 
)
protectedvirtual

Reimplemented in File_Namespace::CachingFileMgr.

Definition at line 1592 of file FileMgr.cpp.

Referenced by createBufferFromHeaders(), and createBufferUnlocked().

1594  {
1595  return new FileBuffer(this, page_size, key, num_bytes);
1596 }

+ Here is the caller graph for this function:

FileBuffer * File_Namespace::FileMgr::allocateBuffer ( const ChunkKey key,
const std::vector< HeaderInfo >::const_iterator &  headerStartIt,
const std::vector< HeaderInfo >::const_iterator &  headerEndIt 
)
protectedvirtual

Reimplemented in File_Namespace::CachingFileMgr.

Definition at line 1598 of file FileMgr.cpp.

1601  {
1602  return new FileBuffer(this, key, headerStartIt, headerEndIt);
1603 }
void File_Namespace::FileMgr::checkpoint ( )
override

Fsyncs data files, writes out epoch and fsyncs that.

Definition at line 694 of file FileMgr.cpp.

References describeSelf(), epoch(), freePages(), incrementEpoch(), rollOffOldData(), syncFilesToDisk(), VLOG, writeAndSyncEpochToDisk(), and writeDirtyBuffers().

Referenced by rollOffOldData().

694  {
695  VLOG(2) << "Checkpointing " << describeSelf() << " epoch: " << epoch();
697  rollOffOldData(epoch(), false /* shouldCheckpoint */);
698  syncFilesToDisk();
700  incrementEpoch();
701  freePages();
702 }
void rollOffOldData(const int32_t epochCeiling, const bool shouldCheckpoint)
Definition: FileMgr.cpp:675
int32_t incrementEpoch()
Definition: FileMgr.h:283
void writeAndSyncEpochToDisk()
Definition: FileMgr.cpp:647
virtual std::string describeSelf() const
Definition: FileMgr.cpp:688
int32_t epoch() const
Definition: FileMgr.h:517
#define VLOG(n)
Definition: Logger.h:316

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::checkpoint ( const int32_t  db_id,
const int32_t  tb_id 
)
inlineoverride

Definition at line 270 of file FileMgr.h.

References logger::FATAL, and LOG.

270  {
271  LOG(FATAL) << "Operation not supported, api checkpoint() should be used instead";
272  }
#define LOG(tag)
Definition: Logger.h:216
void File_Namespace::FileMgr::clearFileInfos ( )
protected

Definition at line 236 of file FileMgr.cpp.

References File_Namespace::close(), fileIndex_, and files_.

Referenced by init().

236  {
237  for (auto file_info_entry : files_) {
238  auto file_info = file_info_entry.second;
239  if (file_info->f) {
240  close(file_info->f);
241  file_info->f = nullptr;
242  }
243  delete file_info;
244  }
245  files_.clear();
246  fileIndex_.clear();
247 }
PageSizeFileMMap fileIndex_
A map of files accessible via a file identifier.
Definition: FileMgr.h:400
std::map< int32_t, FileInfo * > files_
Definition: FileMgr.h:399
void close(FILE *f)
Closes the file pointed to by the FILE pointer.
Definition: File.cpp:128

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::closePhysicalUnlocked ( )
protected

Definition at line 547 of file FileMgr.cpp.

References File_Namespace::close(), DBMetaFile_, epochFile_, and files_.

Referenced by File_Namespace::CachingFileMgr::closeRemovePhysical(), and closeRemovePhysical().

547  {
548  for (auto& [idx, file_info] : files_) {
549  if (file_info->f) {
550  close(file_info->f);
551  file_info->f = nullptr;
552  }
553  }
554 
555  if (DBMetaFile_) {
557  DBMetaFile_ = nullptr;
558  }
559 
560  if (epochFile_) {
561  close(epochFile_);
562  epochFile_ = nullptr;
563  }
564 }
std::map< int32_t, FileInfo * > files_
Definition: FileMgr.h:399
void close(FILE *f)
Closes the file pointed to by the FILE pointer.
Definition: File.cpp:128

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::closeRemovePhysical ( )
virtual

Reimplemented in File_Namespace::CachingFileMgr.

Definition at line 566 of file FileMgr.cpp.

References closePhysicalUnlocked(), files_rw_mutex_, getFileMgrBasePath(), and File_Namespace::renameForDelete().

566  {
569  /* rename for later deletion the directory containing table related data */
571 }
std::string getFileMgrBasePath() const
Definition: FileMgr.h:333
heavyai::unique_lock< heavyai::shared_mutex > write_lock
std::unique_lock< T > unique_lock
heavyai::shared_mutex files_rw_mutex_
Definition: FileMgr.h:411
void renameForDelete(const std::string directoryName)
Renames a directory to DELETE_ME_&lt;EPOCH&gt;_&lt;oldname&gt;.
Definition: File.cpp:227

+ Here is the call graph for this function:

void File_Namespace::FileMgr::compactFiles ( )

Compacts metadata and data file pages and deletes resulting empty files (if any exists). Compaction occurs in 3 idempotent phases in order to enable graceful recovery if a crash/process interruption occurs in the middle data compaction.

Phase 1: Create a status file that indicates initiation of this phase. Sort metadata/data files in order of files with the lowest number of free pages to those with the highest number of free pages. Copy over used pages from files at the end of the sorted order (files with the highest number of free pages) to those at the beginning of the sorted order (files with the lowest number of free pages). Keep destination/copied to pages as free while copying. Keep track of copied source to destination page mapping. Write page mapping to the status file (to be used during crash recovery if needed).

Phase 2: Rename status file to a file name that indicates initiation of this phase. Go through page mapping and mark source/copied from pages as free while making the destination/copied to pages as used.

Phase 3: Rename status file to a file name that indicates initiation of this phase. Delete all empty files (files containing only free pages). Delete status file.

Definition at line 1256 of file FileMgr.cpp.

References CHECK, COPY_PAGES_STATUS, DELETE_EMPTY_FILES_STATUS, deleteEmptyFiles(), files_, files_rw_mutex_, getFilePath(), renameCompactionStatusFile(), sortAndCopyFilePagesForCompaction(), UPDATE_PAGE_VISIBILITY_STATUS, updateMappedPagesVisibility(), and writePageMappingsToStatusFile().

Referenced by resumeFileCompaction().

1256  {
1258  if (files_.empty()) {
1259  return;
1260  }
1261 
1262  auto copy_pages_status_file_path = getFilePath(COPY_PAGES_STATUS);
1263  CHECK(!boost::filesystem::exists(copy_pages_status_file_path));
1264  std::ofstream status_file(copy_pages_status_file_path.string(),
1265  std::ios::out | std::ios::binary);
1266  status_file.close();
1267 
1268  std::vector<PageMapping> page_mappings;
1269  std::set<Page> touched_pages;
1270  std::set<size_t> page_sizes;
1271  for (auto [file_id, file_info] : files_) {
1272  page_sizes.emplace(file_info->pageSize);
1273  }
1274  for (auto page_size : page_sizes) {
1275  sortAndCopyFilePagesForCompaction(page_size, page_mappings, touched_pages);
1276  }
1277 
1278  writePageMappingsToStatusFile(page_mappings);
1280 
1281  updateMappedPagesVisibility(page_mappings);
1283 
1284  deleteEmptyFiles();
1285 }
void sortAndCopyFilePagesForCompaction(size_t page_size, std::vector< PageMapping > &page_mappings, std::set< Page > &touched_pages)
Definition: FileMgr.cpp:1293
static constexpr char const * UPDATE_PAGE_VISIBILITY_STATUS
Definition: FileMgr.h:375
heavyai::unique_lock< heavyai::shared_mutex > write_lock
void writePageMappingsToStatusFile(const std::vector< PageMapping > &page_mappings)
Definition: FileMgr.cpp:1503
std::unique_lock< T > unique_lock
boost::filesystem::path getFilePath(const std::string &file_name) const
Definition: FileMgr.cpp:1658
static constexpr char const * DELETE_EMPTY_FILES_STATUS
Definition: FileMgr.h:376
static constexpr char const * COPY_PAGES_STATUS
Definition: FileMgr.h:374
std::map< int32_t, FileInfo * > files_
Definition: FileMgr.h:399
void updateMappedPagesVisibility(const std::vector< PageMapping > &page_mappings)
Definition: FileMgr.cpp:1448
#define CHECK(condition)
Definition: Logger.h:222
heavyai::shared_mutex files_rw_mutex_
Definition: FileMgr.h:411
void renameCompactionStatusFile(const char *const from_status, const char *const to_status)
Definition: FileMgr.cpp:1546

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::copyPage ( Page srcPage,
FileMgr destFileMgr,
Page destPage,
const size_t  reservedHeaderSize,
const size_t  numBytes,
const size_t  offset 
)

Definition at line 573 of file FileMgr.cpp.

References CHECK, checked_malloc(), defaultPageSize_, File_Namespace::Page::fileId, free(), getFileInfoForFileId(), File_Namespace::Page::pageNum, File_Namespace::FileInfo::read(), and File_Namespace::FileInfo::write().

Referenced by init().

578  {
579  CHECK(offset + numBytes <= defaultPageSize_);
580  FileInfo* srcFileInfo = getFileInfoForFileId(srcPage.fileId);
581  FileInfo* destFileInfo = destFileMgr->getFileInfoForFileId(destPage.fileId);
582  int8_t* buffer = reinterpret_cast<int8_t*>(checked_malloc(numBytes));
583 
584  size_t bytesRead = srcFileInfo->read(
585  srcPage.pageNum * defaultPageSize_ + offset + reservedHeaderSize, numBytes, buffer);
586  CHECK(bytesRead == numBytes);
587  size_t bytesWritten = destFileInfo->write(
588  destPage.pageNum * defaultPageSize_ + offset + reservedHeaderSize,
589  numBytes,
590  buffer);
591  CHECK(bytesWritten == numBytes);
592  ::free(buffer);
593 }
void free(AbstractBuffer *buffer) override
Definition: FileMgr.cpp:862
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:45
size_t defaultPageSize_
number of threads used when loading data
Definition: FileMgr.h:402
#define CHECK(condition)
Definition: Logger.h:222
FileInfo * getFileInfoForFileId(const int32_t fileId) const
Definition: FileMgr.h:224

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int32_t File_Namespace::FileMgr::copyPageWithoutHeaderSize ( const Page source_page,
const Page destination_page 
)
protected

Copies content of source_page to destination_page without copying over the source_page header size. The header size is instead returned by the method. Not copying over the header size enables a use case where destination_page has all the content of the source_page but is still marked as a free page.

Definition at line 1418 of file FileMgr.cpp.

References CHECK, CHECK_EQ, File_Namespace::Page::fileId, File_Namespace::FileInfo::fileId, files_, File_Namespace::Page::pageNum, File_Namespace::FileInfo::pageSize, File_Namespace::FileInfo::read(), and File_Namespace::FileInfo::write().

Referenced by copySourcePageForCompaction().

1419  {
1420  FileInfo* source_file_info = files_.at(source_page.fileId);
1421  CHECK(source_file_info);
1422  CHECK_EQ(source_file_info->fileId, source_page.fileId);
1423 
1424  FileInfo* destination_file_info = files_.at(destination_page.fileId);
1425  CHECK(destination_file_info);
1426  CHECK_EQ(destination_file_info->fileId, destination_page.fileId);
1427  CHECK_EQ(source_file_info->pageSize, destination_file_info->pageSize);
1428 
1429  auto page_size = source_file_info->pageSize;
1430  auto buffer = std::make_unique<int8_t[]>(page_size);
1431  size_t bytes_read =
1432  source_file_info->read(source_page.pageNum * page_size, page_size, buffer.get());
1433  CHECK_EQ(page_size, bytes_read);
1434 
1435  auto header_size_offset = sizeof(int32_t);
1436  size_t bytes_written = destination_file_info->write(
1437  (destination_page.pageNum * page_size) + header_size_offset,
1438  page_size - header_size_offset,
1439  buffer.get() + header_size_offset);
1440  CHECK_EQ(page_size - header_size_offset, bytes_written);
1441  return reinterpret_cast<int32_t*>(buffer.get())[0];
1442 }
#define CHECK_EQ(x, y)
Definition: Logger.h:230
std::map< int32_t, FileInfo * > files_
Definition: FileMgr.h:399
#define CHECK(condition)
Definition: Logger.h:222

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::copySourcePageForCompaction ( const Page source_page,
FileInfo destination_file_info,
std::vector< PageMapping > &  page_mappings,
std::set< Page > &  touched_pages 
)
protected

Copies a used page (indicated by the top of the source_used_pages set) from the given source file to a free page in the given destination file. Source and destination pages are recorded in the given page_mappings vector after copying is done.

Definition at line 1388 of file FileMgr.cpp.

References CHECK, CHECK_NE, copyPageWithoutHeaderSize(), File_Namespace::Page::fileId, File_Namespace::FileInfo::fileId, File_Namespace::FileInfo::getFreePage(), and File_Namespace::Page::pageNum.

Referenced by sortAndCopyFilePagesForCompaction().

1391  {
1392  size_t destination_page_num = destination_file_info->getFreePage();
1393  CHECK_NE(destination_page_num, static_cast<size_t>(-1));
1394  Page destination_page{destination_file_info->fileId, destination_page_num};
1395 
1396  // Assert that the same pages are not copied or overridden multiple times
1397  CHECK(touched_pages.find(source_page) == touched_pages.end());
1398  touched_pages.emplace(source_page);
1399 
1400  CHECK(touched_pages.find(destination_page) == touched_pages.end());
1401  touched_pages.emplace(destination_page);
1402 
1403  auto header_size = copyPageWithoutHeaderSize(source_page, destination_page);
1404  page_mappings.emplace_back(static_cast<size_t>(source_page.fileId),
1405  source_page.pageNum,
1406  header_size,
1407  static_cast<size_t>(destination_page.fileId),
1408  destination_page.pageNum);
1409 }
int32_t copyPageWithoutHeaderSize(const Page &source_page, const Page &destination_page)
Definition: FileMgr.cpp:1418
#define CHECK_NE(x, y)
Definition: Logger.h:231
#define CHECK(condition)
Definition: Logger.h:222

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool File_Namespace::FileMgr::coreInit ( )
private

Determines file path, and if exists, runs file migration and opens and reads epoch file.

Returns
a boolean representing whether the directory path existed

Definition at line 126 of file FileMgr.cpp.

References EPOCH_FILENAME, logger::FATAL, fileMgrBasePath_, fileMgrKey_, files_rw_mutex_, File_Namespace::GlobalFileMgr::getBasePath(), gfm_, LOG, migrateToLatestFileMgrVersion(), openAndReadEpochFile(), and to_string().

Referenced by FileMgr(), and init().

126  {
128  const std::string fileMgrDirPrefix("table");
129  const std::string FileMgrDirDelim("_");
130  fileMgrBasePath_ = (gfm_->getBasePath() + fileMgrDirPrefix + FileMgrDirDelim +
131  std::to_string(fileMgrKey_.first) + // db_id
132  FileMgrDirDelim + std::to_string(fileMgrKey_.second)); // tb_id
133  boost::filesystem::path path(fileMgrBasePath_);
134  if (boost::filesystem::exists(path)) {
135  if (!boost::filesystem::is_directory(path)) {
136  LOG(FATAL) << "Specified path '" << fileMgrBasePath_
137  << "' for table data is not a directory.";
138  }
141  return true;
142  }
143  return false;
144 }
TablePair fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:524
std::string getBasePath() const
#define LOG(tag)
Definition: Logger.h:216
void migrateToLatestFileMgrVersion()
Definition: FileMgr.cpp:1137
GlobalFileMgr * gfm_
Definition: FileMgr.h:523
heavyai::unique_lock< heavyai::shared_mutex > write_lock
std::string fileMgrBasePath_
Definition: FileMgr.h:396
std::string to_string(char const *&&v)
std::unique_lock< T > unique_lock
static constexpr char EPOCH_FILENAME[]
Definition: FileMgr.h:386
void openAndReadEpochFile(const std::string &epochFileName)
Definition: FileMgr.cpp:627
heavyai::shared_mutex files_rw_mutex_
Definition: FileMgr.h:411

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

FileBuffer * File_Namespace::FileMgr::createBuffer ( const ChunkKey key,
size_t  pageSize = 0,
const size_t  numBytes = 0 
)
override

Creates a chunk with the specified key and page size.

Definition at line 704 of file FileMgr.cpp.

References CHECK, chunkIndex_, chunkIndexMutex_, createBufferUnlocked(), and show_chunk().

Referenced by init().

706  {
708  CHECK(chunkIndex_.find(key) == chunkIndex_.end())
709  << "Chunk already exists for key: " << show_chunk(key);
710  return createBufferUnlocked(key, page_size, num_bytes);
711 }
std::string show_chunk(const ChunkKey &key)
Definition: types.h:98
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:328
std::unique_lock< T > unique_lock
heavyai::shared_mutex chunkIndexMutex_
Definition: FileMgr.h:410
virtual FileBuffer * createBufferUnlocked(const ChunkKey &key, size_t pageSize=0, const size_t numBytes=0)
Definition: FileMgr.cpp:714
#define CHECK(condition)
Definition: Logger.h:222

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

FileBuffer * File_Namespace::FileMgr::createBufferFromHeaders ( const ChunkKey key,
const std::vector< HeaderInfo >::const_iterator &  headerStartIt,
const std::vector< HeaderInfo >::const_iterator &  headerEndIt 
)
protectedvirtual

Reimplemented in File_Namespace::CachingFileMgr.

Definition at line 725 of file FileMgr.cpp.

References allocateBuffer(), CHECK, chunkIndex_, chunkIndexMutex_, and show_chunk().

Referenced by init().

728  {
730  CHECK(chunkIndex_.find(key) == chunkIndex_.end())
731  << "Chunk already exists for key: " << show_chunk(key);
732  chunkIndex_[key] = allocateBuffer(key, headerStartIt, headerEndIt);
733  return (chunkIndex_[key]);
734 }
virtual FileBuffer * allocateBuffer(const size_t page_size, const ChunkKey &key, const size_t num_bytes=0)
Definition: FileMgr.cpp:1592
std::string show_chunk(const ChunkKey &key)
Definition: types.h:98
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:328
std::unique_lock< T > unique_lock
heavyai::shared_mutex chunkIndexMutex_
Definition: FileMgr.h:410
#define CHECK(condition)
Definition: Logger.h:222

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

FileBuffer * File_Namespace::FileMgr::createBufferUnlocked ( const ChunkKey key,
size_t  pageSize = 0,
const size_t  numBytes = 0 
)
protectedvirtual

Reimplemented in File_Namespace::CachingFileMgr.

Definition at line 714 of file FileMgr.cpp.

References allocateBuffer(), chunkIndex_, and defaultPageSize_.

Referenced by createBuffer(), and getOrCreateBuffer().

716  {
717  size_t actual_page_size = page_size;
718  if (actual_page_size == 0) {
719  actual_page_size = defaultPageSize_;
720  }
721  chunkIndex_[key] = allocateBuffer(actual_page_size, key, num_bytes);
722  return (chunkIndex_[key]);
723 }
virtual FileBuffer * allocateBuffer(const size_t page_size, const ChunkKey &key, const size_t num_bytes=0)
Definition: FileMgr.cpp:1592
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:328
size_t defaultPageSize_
number of threads used when loading data
Definition: FileMgr.h:402

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::createEpochFile ( const std::string &  epochFileName)
protected

Definition at line 595 of file FileMgr.cpp.

References Epoch::byte_size(), File_Namespace::create(), epochFile_, logger::FATAL, fileMgrBasePath_, LOG, and writeAndSyncEpochToDisk().

Referenced by init(), and migrateEpochFileV0().

595  {
596  std::string epochFilePath(fileMgrBasePath_ + "/" + epochFileName);
597  if (boost::filesystem::exists(epochFilePath)) {
598  LOG(FATAL) << "Epoch file `" << epochFilePath << "` already exists";
599  }
600  epochFile_ = create(epochFilePath, sizeof(Epoch::byte_size()));
601  // Write out current epoch to file - which if this
602  // function is being called should be 0
604 }
#define LOG(tag)
Definition: Logger.h:216
std::string fileMgrBasePath_
Definition: FileMgr.h:396
FILE * create(const std::string &basePath, const int fileId, const size_t pageSize, const size_t numPages)
Definition: File.cpp:57
void writeAndSyncEpochToDisk()
Definition: FileMgr.cpp:647
static size_t byte_size()
Definition: Epoch.h:63

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

FileInfo * File_Namespace::FileMgr::createFile ( const size_t  pageSize,
const size_t  numPages 
)
protected

Adds a file to the file manager repository.

This method will create a FileInfo object for the file being added, and it will create the corresponding file on physical disk with the indicated number of pages pre-allocated.

A pointer to the FileInfo object is returned, which itself has a file pointer (FILE*) and a file identifier (int32_t fileId).

Parameters
fileNameThe name given to the file in physical storage.
pageSizeThe logical page size for the pages in the file.
numPagesThe number of logical pages to initially allocate for the file.
Returns
FileInfo* A pointer to the FileInfo object of the added file.

Definition at line 951 of file FileMgr.cpp.

References CHECK, File_Namespace::create(), anonymous_namespace{Utm.h}::f, logger::FATAL, fileIndex_, fileMgrBasePath_, files_, files_rw_mutex_, LOG, and nextFileId_.

Referenced by requestFreePage(), and requestFreePages().

951  {
952  // check arguments
953  if (pageSize == 0 || numPages == 0) {
954  LOG(FATAL) << "File creation failed: pageSize and numPages must be greater than 0.";
955  }
956 
957  // create the new file
958  FILE* f = create(fileMgrBasePath_,
959  nextFileId_,
960  pageSize,
961  numPages); // TM: not sure if I like naming scheme here - should be in
962  // separate namespace?
963  CHECK(f);
964 
965  // instantiate a new FileInfo for the newly created file
966  int32_t fileId = nextFileId_++;
967  FileInfo* fInfo =
968  new FileInfo(this, fileId, f, pageSize, numPages, true); // true means init file
969  CHECK(fInfo);
970 
972  // update file manager data structures
973  files_[fileId] = fInfo;
974  fileIndex_.insert(std::pair<size_t, int32_t>(pageSize, fileId));
975 
976  return fInfo;
977 }
#define LOG(tag)
Definition: Logger.h:216
heavyai::unique_lock< heavyai::shared_mutex > write_lock
std::string fileMgrBasePath_
Definition: FileMgr.h:396
constexpr double f
Definition: Utm.h:31
FILE * create(const std::string &basePath, const int fileId, const size_t pageSize, const size_t numPages)
Definition: File.cpp:57
PageSizeFileMMap fileIndex_
A map of files accessible via a file identifier.
Definition: FileMgr.h:400
std::unique_lock< T > unique_lock
std::map< int32_t, FileInfo * > files_
Definition: FileMgr.h:399
#define CHECK(condition)
Definition: Logger.h:222
heavyai::shared_mutex files_rw_mutex_
Definition: FileMgr.h:411

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::createTopLevelMetadata ( )

Definition at line 1036 of file FileMgr.cpp.

References DB_META_FILENAME, db_version_, logger::FATAL, getDBVersion(), INVALID_VERSION, LOG, readVersionFromDisk(), and writeAndSyncVersionToDisk().

1036  {
1038 
1039  if (db_version_ > getDBVersion()) {
1040  LOG(FATAL) << "DB forward compatibility is not supported. Version of HeavyDB "
1041  "software used is older than the version of DB being read: "
1042  << db_version_;
1043  }
1045  // new system, or we are moving forward versions
1046  // system wide migration would go here if required
1048  return;
1049  }
1050 }
int32_t readVersionFromDisk(const std::string &versionFileName) const
Definition: FileMgr.cpp:1052
#define LOG(tag)
Definition: Logger.h:216
int32_t db_version_
the index of the next file id
Definition: FileMgr.h:404
void writeAndSyncVersionToDisk(const std::string &versionFileName, const int32_t version)
Definition: FileMgr.cpp:1070
int32_t getDBVersion() const
Index for looking up chunks.
Definition: FileMgr.cpp:1028
static constexpr char DB_META_FILENAME[]
Definition: FileMgr.h:387
static constexpr int32_t INVALID_VERSION
Definition: FileMgr.h:389

+ Here is the call graph for this function:

void File_Namespace::FileMgr::deleteBuffer ( const ChunkKey key,
const bool  purge = true 
)
override

Deletes the chunk with the specified key.

Definition at line 741 of file FileMgr.cpp.

References CHECK, chunkIndex_, chunkIndexMutex_, deleteBufferUnlocked(), and show_chunk().

741  {
743  auto chunk_it = chunkIndex_.find(key);
744  CHECK(chunk_it != chunkIndex_.end())
745  << "Chunk does not exist for key: " << show_chunk(key);
746  deleteBufferUnlocked(chunk_it, purge);
747 }
std::string show_chunk(const ChunkKey &key)
Definition: types.h:98
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:328
std::unique_lock< T > unique_lock
virtual ChunkKeyToChunkMap::iterator deleteBufferUnlocked(const ChunkKeyToChunkMap::iterator chunk_it, const bool purge=true)
Definition: FileMgr.cpp:749
heavyai::shared_mutex chunkIndexMutex_
Definition: FileMgr.h:410
#define CHECK(condition)
Definition: Logger.h:222

+ Here is the call graph for this function:

void File_Namespace::FileMgr::deleteBuffersWithPrefix ( const ChunkKey keyPrefix,
const bool  purge = true 
)
override

Definition at line 759 of file FileMgr.cpp.

References chunkIndex_, chunkIndexMutex_, and deleteBufferUnlocked().

759  {
761  auto chunkIt = chunkIndex_.lower_bound(keyPrefix);
762  if (chunkIt == chunkIndex_.end()) {
763  return; // should we throw?
764  }
765  while (chunkIt != chunkIndex_.end() &&
766  std::search(chunkIt->first.begin(),
767  chunkIt->first.begin() + keyPrefix.size(),
768  keyPrefix.begin(),
769  keyPrefix.end()) != chunkIt->first.begin() + keyPrefix.size()) {
770  deleteBufferUnlocked(chunkIt++, purge);
771  }
772 }
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:328
std::unique_lock< T > unique_lock
virtual ChunkKeyToChunkMap::iterator deleteBufferUnlocked(const ChunkKeyToChunkMap::iterator chunk_it, const bool purge=true)
Definition: FileMgr.cpp:749
heavyai::shared_mutex chunkIndexMutex_
Definition: FileMgr.h:410

+ Here is the call graph for this function:

ChunkKeyToChunkMap::iterator File_Namespace::FileMgr::deleteBufferUnlocked ( const ChunkKeyToChunkMap::iterator  chunk_it,
const bool  purge = true 
)
protectedvirtual

Reimplemented in File_Namespace::CachingFileMgr.

Definition at line 749 of file FileMgr.cpp.

References chunkIndex_.

Referenced by deleteBuffer(), and deleteBuffersWithPrefix().

751  {
752  if (purge) {
753  chunk_it->second->freePages();
754  }
755  delete chunk_it->second;
756  return chunkIndex_.erase(chunk_it);
757 }
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:328

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::deleteEmptyFiles ( )
protected

Deletes files that contain only free pages. Also deletes the compaction status file.

Definition at line 1481 of file FileMgr.cpp.

References CHECK, CHECK_EQ, DELETE_EMPTY_FILES_STATUS, fileMgrBasePath_, files_, File_Namespace::get_data_file_path(), File_Namespace::get_legacy_data_file_path(), and getFilePath().

Referenced by compactFiles(), and resumeFileCompaction().

1481  {
1482  for (auto [file_id, file_info] : files_) {
1483  CHECK_EQ(file_id, file_info->fileId);
1484  if (file_info->freePages.size() == file_info->numPages) {
1485  fclose(file_info->f);
1486  file_info->f = nullptr;
1487  auto file_path = get_data_file_path(fileMgrBasePath_, file_id, file_info->pageSize);
1488  boost::filesystem::remove(get_legacy_data_file_path(file_path));
1489  boost::filesystem::remove(file_path);
1490  }
1491  }
1492 
1493  auto status_file_path = getFilePath(DELETE_EMPTY_FILES_STATUS);
1494  CHECK(boost::filesystem::exists(status_file_path));
1495  boost::filesystem::remove(status_file_path);
1496 }
#define CHECK_EQ(x, y)
Definition: Logger.h:230
std::string get_legacy_data_file_path(const std::string &new_data_file_path)
Definition: File.cpp:51
std::string fileMgrBasePath_
Definition: FileMgr.h:396
boost::filesystem::path getFilePath(const std::string &file_name) const
Definition: FileMgr.cpp:1658
static constexpr char const * DELETE_EMPTY_FILES_STATUS
Definition: FileMgr.h:376
std::map< int32_t, FileInfo * > files_
Definition: FileMgr.h:399
#define CHECK(condition)
Definition: Logger.h:222
std::string get_data_file_path(const std::string &base_path, int file_id, size_t page_size)
Definition: File.cpp:44

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::string File_Namespace::FileMgr::describeSelf ( ) const
virtual

Reimplemented in File_Namespace::CachingFileMgr.

Definition at line 688 of file FileMgr.cpp.

References fileMgrKey_.

Referenced by checkpoint(), and setEpoch().

688  {
689  stringstream ss;
690  ss << "table (" << fileMgrKey_.first << ", " << fileMgrKey_.second << ")";
691  return ss.str();
692 }
TablePair fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:524

+ Here is the caller graph for this function:

virtual int32_t File_Namespace::FileMgr::epoch ( int32_t  db_id,
int32_t  tb_id 
) const
inlinevirtual

Returns current value of epoch - should be one greater than recorded at last checkpoint. Because FileMgr only contains buffers from one table we can just return the FileMgr's epoch instead of finding a table-specific epoch.

Reimplemented in File_Namespace::CachingFileMgr.

Definition at line 279 of file FileMgr.h.

References epoch().

Referenced by epoch(), File_Namespace::FileBuffer::getFileMgrEpoch(), and File_Namespace::FileInfo::openExistingFile().

279 { return epoch(); }
int32_t epoch() const
Definition: FileMgr.h:517

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int32_t File_Namespace::FileMgr::epoch ( ) const
inlineprivate

Definition at line 517 of file FileMgr.h.

Referenced by checkpoint(), init(), openAndReadLegacyEpochFile(), updatePageIfDeleted(), and writeDirtyBuffers().

517 { return static_cast<int32_t>(epoch_.ceiling()); }
int32_t ceiling() const
Definition: Epoch.h:44

+ Here is the caller graph for this function:

int32_t File_Namespace::FileMgr::epochFloor ( ) const
inline

Definition at line 281 of file FileMgr.h.

Referenced by setDataAndMetadataFileStats().

281 { return static_cast<int32_t>(epoch_.floor()); }
int32_t floor() const
Definition: Epoch.h:43

+ Here is the caller graph for this function:

virtual bool File_Namespace::FileMgr::failOnReadError ( ) const
inlinevirtual

True if a read error should cause a fatal error.

Reimplemented in File_Namespace::CachingFileMgr.

Definition at line 365 of file FileMgr.h.

Referenced by File_Namespace::FileBuffer::FileBuffer().

365 { return true; }

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::fetchBuffer ( const ChunkKey key,
AbstractBuffer destBuffer,
const size_t  numBytes 
)
override

Definition at line 786 of file FileMgr.cpp.

References CHECK, Data_Namespace::AbstractBuffer::copyTo(), logger::FATAL, getBuffer(), Data_Namespace::AbstractBuffer::isDirty(), LOG, show_chunk(), and Data_Namespace::AbstractBuffer::size().

788  {
789  // reads chunk specified by ChunkKey into AbstractBuffer provided by
790  // destBuffer
791  CHECK(!destBuffer->isDirty())
792  << "Aborting attempt to fetch a chunk marked dirty. Chunk inconsistency for key: "
793  << show_chunk(key);
794  AbstractBuffer* chunk = getBuffer(key);
795  // chunk's size is either specified in function call with numBytes or we
796  // just look at pageSize * numPages in FileBuffer
797  if (numBytes > 0 && numBytes > chunk->size()) {
798  LOG(FATAL) << "Chunk retrieved for key `" << show_chunk(key) << "` is smaller ("
799  << chunk->size() << ") than number of bytes requested (" << numBytes
800  << ")";
801  }
802  chunk->copyTo(destBuffer, numBytes);
803 }
#define LOG(tag)
Definition: Logger.h:216
std::string show_chunk(const ChunkKey &key)
Definition: types.h:98
An AbstractBuffer is a unit of data management for a data manager.
void copyTo(AbstractBuffer *destination_buffer, const size_t num_bytes=0)
#define CHECK(condition)
Definition: Logger.h:222
FileBuffer * getBuffer(const ChunkKey &key, const size_t numBytes=0) override
Returns the a pointer to the chunk with the specified key.
Definition: FileMgr.cpp:774

+ Here is the call graph for this function:

void File_Namespace::FileMgr::free ( AbstractBuffer buffer)
override

Definition at line 862 of file FileMgr.cpp.

References logger::FATAL, and LOG.

Referenced by copyPage().

862  {
863  LOG(FATAL) << "Operation not supported";
864 }
#define LOG(tag)
Definition: Logger.h:216

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::free_page ( std::pair< FileInfo *, int32_t > &&  page)
virtual

Reimplemented in File_Namespace::CachingFileMgr.

Definition at line 1192 of file FileMgr.cpp.

References free_pages_, and mutex_free_page_.

Referenced by File_Namespace::FileInfo::freePage(), and freePages().

1192  {
1193  std::unique_lock<heavyai::shared_mutex> lock(mutex_free_page_);
1194  free_pages_.push_back(page);
1195 }
std::vector< std::pair< FileInfo *, int32_t > > free_pages_
Definition: FileMgr.h:414
heavyai::shared_mutex mutex_free_page_
Definition: FileMgr.h:413

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::freePages ( )
protected

Definition at line 1584 of file FileMgr.cpp.

References free_page(), free_pages_, and mutex_free_page_.

Referenced by checkpoint(), File_Namespace::CachingFileMgr::clearForTable(), init(), File_Namespace::CachingFileMgr::init(), and sortAndCopyFilePagesForCompaction().

1584  {
1586  for (auto& free_page : free_pages_) {
1587  free_page.first->freePageDeferred(free_page.second);
1588  }
1589  free_pages_.clear();
1590 }
std::unique_lock< T > unique_lock
std::vector< std::pair< FileInfo *, int32_t > > free_pages_
Definition: FileMgr.h:414
heavyai::shared_mutex mutex_free_page_
Definition: FileMgr.h:413
virtual void free_page(std::pair< FileInfo *, int32_t > &&page)
Definition: FileMgr.cpp:1192

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::freePagesBeforeEpoch ( const int32_t  min_epoch)
private

Definition at line 661 of file FileMgr.cpp.

References chunkIndex_, chunkIndexMutex_, and freePagesBeforeEpochUnlocked().

Referenced by rollOffOldData().

661  {
663  freePagesBeforeEpochUnlocked(min_epoch, chunkIndex_.begin(), chunkIndex_.end());
664 }
std::shared_lock< T > shared_lock
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:328
heavyai::shared_mutex chunkIndexMutex_
Definition: FileMgr.h:410
void freePagesBeforeEpochUnlocked(const int32_t min_epoch, const ChunkKeyToChunkMap::iterator lower_bound, const ChunkKeyToChunkMap::iterator upper_bound)
Definition: FileMgr.cpp:666

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::freePagesBeforeEpochUnlocked ( const int32_t  min_epoch,
const ChunkKeyToChunkMap::iterator  lower_bound,
const ChunkKeyToChunkMap::iterator  upper_bound 
)
private

Definition at line 666 of file FileMgr.cpp.

References gpu_enabled::upper_bound().

Referenced by freePagesBeforeEpoch().

669  {
670  for (auto chunkIt = lower_bound; chunkIt != upper_bound; ++chunkIt) {
671  chunkIt->second->freePagesBeforeEpoch(min_epoch);
672  }
673 }
DEVICE auto upper_bound(ARGS &&...args)
Definition: gpu_enabled.h:123
DEVICE auto lower_bound(ARGS &&...args)
Definition: gpu_enabled.h:78

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const TablePair File_Namespace::FileMgr::get_fileMgrKey ( ) const
inline

Definition at line 340 of file FileMgr.h.

Referenced by updatePageIfDeleted().

340 { return fileMgrKey_; }
TablePair fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:524

+ Here is the caller graph for this function:

size_t File_Namespace::FileMgr::getAllocated ( )
inlineoverride

Definition at line 221 of file FileMgr.h.

221 { return 0; }
FileBuffer * File_Namespace::FileMgr::getBuffer ( const ChunkKey key,
const size_t  numBytes = 0 
)
override

Returns the a pointer to the chunk with the specified key.

Definition at line 774 of file FileMgr.cpp.

References chunkIndexMutex_, and getBufferUnlocked().

Referenced by fetchBuffer().

774  {
776  return getBufferUnlocked(key, num_bytes);
777 }
std::shared_lock< T > shared_lock
heavyai::shared_mutex chunkIndexMutex_
Definition: FileMgr.h:410
virtual FileBuffer * getBufferUnlocked(const ChunkKey &key, const size_t numBytes=0) const
Definition: FileMgr.cpp:779

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

FileBuffer * File_Namespace::FileMgr::getBufferUnlocked ( const ChunkKey key,
const size_t  numBytes = 0 
) const
protectedvirtual

Reimplemented in File_Namespace::CachingFileMgr.

Definition at line 779 of file FileMgr.cpp.

References CHECK, chunkIndex_, and show_chunk().

Referenced by getBuffer(), and getOrCreateBuffer().

780  {
781  auto chunk_it = chunkIndex_.find(key);
782  CHECK(chunk_it != chunkIndex_.end()) << "Chunk does not exist: " << show_chunk(key);
783  return chunk_it->second;
784 }
std::string show_chunk(const ChunkKey &key)
Definition: types.h:98
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:328
#define CHECK(condition)
Definition: Logger.h:222

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::getChunkMetadataVecForKeyPrefix ( ChunkMetadataVector chunkMetadataVec,
const ChunkKey keyPrefix 
)
override

Definition at line 997 of file FileMgr.cpp.

References chunkIndex_, and chunkIndexMutex_.

998  {
1000  auto chunkIt = chunkIndex_.lower_bound(keyPrefix);
1001  if (chunkIt == chunkIndex_.end()) {
1002  return; // throw?
1003  }
1004  while (chunkIt != chunkIndex_.end() &&
1005  std::search(chunkIt->first.begin(),
1006  chunkIt->first.begin() + keyPrefix.size(),
1007  keyPrefix.begin(),
1008  keyPrefix.end()) != chunkIt->first.begin() + keyPrefix.size()) {
1009  if (chunkIt->second->hasEncoder()) {
1010  auto chunk_metadata = std::make_shared<ChunkMetadata>();
1011  chunkIt->second->encoder_->getMetadata(chunk_metadata);
1012  chunkMetadataVec.emplace_back(chunkIt->first, chunk_metadata);
1013  }
1014  chunkIt++;
1015  }
1016 }
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:328
std::unique_lock< T > unique_lock
heavyai::shared_mutex chunkIndexMutex_
Definition: FileMgr.h:410
bool File_Namespace::FileMgr::getDBConvert ( ) const

Definition at line 1032 of file FileMgr.cpp.

References File_Namespace::GlobalFileMgr::getDBConvert(), and gfm_.

1032  {
1033  return gfm_->getDBConvert();
1034 }
GlobalFileMgr * gfm_
Definition: FileMgr.h:523

+ Here is the call graph for this function:

int32_t File_Namespace::FileMgr::getDBVersion ( ) const

Index for looking up chunks.

Definition at line 1028 of file FileMgr.cpp.

References File_Namespace::GlobalFileMgr::getDBVersion(), and gfm_.

Referenced by createTopLevelMetadata().

1028  {
1029  return gfm_->getDBVersion();
1030 }
GlobalFileMgr * gfm_
Definition: FileMgr.h:523

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

FILE * File_Namespace::FileMgr::getFileForFileId ( const int32_t  fileId)

Returns FILE pointer associated with requested fileId.

See Also
FileBuffer

Definition at line 979 of file FileMgr.cpp.

References CHECK, and files_.

Referenced by File_Namespace::FileBuffer::readMetadata(), and File_Namespace::FileBuffer::writeMetadata().

979  {
980  CHECK(fileId >= 0);
981  CHECK(files_.find(fileId) != files_.end());
982  return files_.at(fileId)->f;
983 }
std::map< int32_t, FileInfo * > files_
Definition: FileMgr.h:399
#define CHECK(condition)
Definition: Logger.h:222

+ Here is the caller graph for this function:

FileInfo* File_Namespace::FileMgr::getFileInfoForFileId ( const int32_t  fileId) const
inline

Definition at line 224 of file FileMgr.h.

Referenced by File_Namespace::FileBuffer::append(), File_Namespace::FileBuffer::copyPage(), copyPage(), File_Namespace::FileBuffer::freePage(), File_Namespace::readForThread(), File_Namespace::FileBuffer::write(), and File_Namespace::FileBuffer::writeHeader().

224  {
225  return files_.at(fileId);
226  }
std::map< int32_t, FileInfo * > files_
Definition: FileMgr.h:399

+ Here is the caller graph for this function:

std::string File_Namespace::FileMgr::getFileMgrBasePath ( ) const
inline

Definition at line 333 of file FileMgr.h.

Referenced by File_Namespace::CachingFileMgr::closeRemovePhysical(), closeRemovePhysical(), and File_Namespace::CachingFileMgr::getTableFileMgrPath().

333 { return fileMgrBasePath_; }
std::string fileMgrBasePath_
Definition: FileMgr.h:396

+ Here is the caller graph for this function:

boost::filesystem::path File_Namespace::FileMgr::getFilePath ( const std::string &  file_name) const

Definition at line 1658 of file FileMgr.cpp.

References fileMgrBasePath_.

Referenced by compactFiles(), deleteEmptyFiles(), readPageMappingsFromStatusFile(), renameCompactionStatusFile(), resumeFileCompaction(), and writePageMappingsToStatusFile().

1658  {
1659  return boost::filesystem::path(fileMgrBasePath_) / file_name;
1660 }
std::string fileMgrBasePath_
Definition: FileMgr.h:396

+ Here is the caller graph for this function:

uint32_t File_Namespace::FileMgr::getFragmentCount ( ) const
private

Definition at line 399 of file FileMgr.cpp.

References CHUNK_KEY_FRAGMENT_IDX, chunkIndex_, and chunkIndexMutex_.

Referenced by getStorageStats().

399  {
401  std::set<int32_t> fragment_ids;
402  for (const auto& [chunk_key, file_buffer] : chunkIndex_) {
403  fragment_ids.emplace(chunk_key[CHUNK_KEY_FRAGMENT_IDX]);
404  }
405  return static_cast<uint32_t>(fragment_ids.size());
406 }
#define CHUNK_KEY_FRAGMENT_IDX
Definition: types.h:41
std::shared_lock< T > shared_lock
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:328
heavyai::shared_mutex chunkIndexMutex_
Definition: FileMgr.h:410

+ Here is the caller graph for this function:

size_t File_Namespace::FileMgr::getInUseSize ( )
inlineoverride

Definition at line 220 of file FileMgr.h.

220 { return 0; }
size_t File_Namespace::FileMgr::getMaxSize ( )
inlineoverride

Definition at line 219 of file FileMgr.h.

219 { return 0; }
FileMetadata File_Namespace::FileMgr::getMetadataForFile ( const boost::filesystem::directory_iterator &  fileIterator) const

Definition at line 146 of file FileMgr.cpp.

References CHECK_EQ, DATA_FILE_EXT, logger::FATAL, File_Namespace::FileMetadata::file_id, File_Namespace::FileMetadata::file_path, heavyai::file_size(), File_Namespace::FileMetadata::file_size, File_Namespace::FileMetadata::is_data_file, LOG, File_Namespace::FileMetadata::num_pages, and File_Namespace::FileMetadata::page_size.

Referenced by init(), openFiles(), and setDataAndMetadataFileStats().

147  {
148  FileMetadata fileMetadata;
149  fileMetadata.is_data_file = false;
150  fileMetadata.file_path = fileIterator->path().string();
151  if (!boost::filesystem::is_regular_file(fileIterator->status())) {
152  return fileMetadata;
153  }
154  // note that boost::filesystem leaves preceding dot on
155  // extension - hence DATA_FILE_EXT is ".data"
156  std::string extension(fileIterator->path().extension().string());
157  if (extension == DATA_FILE_EXT) {
158  std::string fileStem(fileIterator->path().stem().string());
159  // remove trailing dot if any
160  if (fileStem.size() > 0 && fileStem.back() == '.') {
161  fileStem = fileStem.substr(0, fileStem.size() - 1);
162  }
163  size_t dotPos = fileStem.find_last_of("."); // should only be one
164  if (dotPos == std::string::npos) {
165  LOG(FATAL) << "File `" << fileIterator->path()
166  << "` does not carry page size information in the filename.";
167  }
168  fileMetadata.is_data_file = true;
169  fileMetadata.file_id = boost::lexical_cast<int>(fileStem.substr(0, dotPos));
170  fileMetadata.page_size =
171  boost::lexical_cast<size_t>(fileStem.substr(dotPos + 1, fileStem.size()));
172 
173  fileMetadata.file_size = boost::filesystem::file_size(fileMetadata.file_path);
174  CHECK_EQ(fileMetadata.file_size % fileMetadata.page_size,
175  size_t(0)); // should be no partial pages
176  fileMetadata.num_pages = fileMetadata.file_size / fileMetadata.page_size;
177  }
178  return fileMetadata;
179 }
#define CHECK_EQ(x, y)
Definition: Logger.h:230
#define LOG(tag)
Definition: Logger.h:216
#define DATA_FILE_EXT
Definition: File.h:25
size_t file_size(const int fd)
Definition: heavyai_fs.cpp:33

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

MgrType File_Namespace::FileMgr::getMgrType ( )
inlineoverride

Definition at line 216 of file FileMgr.h.

216 { return FILE_MGR; };
size_t File_Namespace::FileMgr::getNumChunks ( )
override

Definition at line 1653 of file FileMgr.cpp.

References chunkIndex_, and chunkIndexMutex_.

1653  {
1655  return chunkIndex_.size();
1656 }
heavyai::shared_lock< heavyai::shared_mutex > read_lock
std::shared_lock< T > shared_lock
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:328
heavyai::shared_mutex chunkIndexMutex_
Definition: FileMgr.h:410
size_t File_Namespace::FileMgr::getNumReaderThreads ( )
inline

Returns number of threads defined by parameter num-reader-threads which should be used during initial load and consequent read of data.

Definition at line 314 of file FileMgr.h.

Referenced by File_Namespace::FileBuffer::read().

314 { return num_reader_threads_; }
size_t num_reader_threads_
Maps page sizes to FileInfo objects.
Definition: FileMgr.h:401

+ Here is the caller graph for this function:

size_t File_Namespace::FileMgr::getNumUsedMetadataPagesForChunkKey ( const ChunkKey chunkKey) const

Definition at line 1018 of file FileMgr.cpp.

References chunkIndex_, and chunkIndexMutex_.

1018  {
1020  const auto& chunkIt = chunkIndex_.find(chunkKey);
1021  if (chunkIt != chunkIndex_.end()) {
1022  return chunkIt->second->numMetadataPages();
1023  } else {
1024  throw std::runtime_error("Chunk was not found.");
1025  }
1026 }
heavyai::shared_lock< heavyai::shared_mutex > read_lock
std::shared_lock< T > shared_lock
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:328
heavyai::shared_mutex chunkIndexMutex_
Definition: FileMgr.h:410
FileBuffer * File_Namespace::FileMgr::getOrCreateBuffer ( const ChunkKey key)
private

Definition at line 1631 of file FileMgr.cpp.

References chunkIndex_, chunkIndexMutex_, createBufferUnlocked(), and getBufferUnlocked().

Referenced by putBuffer().

1631  {
1632  FileBuffer* buf;
1634  auto chunk_it = chunkIndex_.find(key);
1635  if (chunk_it == chunkIndex_.end()) {
1636  buf = createBufferUnlocked(key);
1637  } else {
1638  buf = getBufferUnlocked(key);
1639  }
1640  return buf;
1641 }
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:328
std::unique_lock< T > unique_lock
heavyai::shared_mutex chunkIndexMutex_
Definition: FileMgr.h:410
virtual FileBuffer * createBufferUnlocked(const ChunkKey &key, size_t pageSize=0, const size_t numBytes=0)
Definition: FileMgr.cpp:714
virtual FileBuffer * getBufferUnlocked(const ChunkKey &key, const size_t numBytes=0) const
Definition: FileMgr.cpp:779

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

StorageStats File_Namespace::FileMgr::getStorageStats ( ) const

Definition at line 329 of file FileMgr.cpp.

References File_Namespace::StorageStats::fragment_count, getFragmentCount(), isFullyInitted_, and setDataAndMetadataFileStats().

329  {
330  StorageStats storage_stats;
331  setDataAndMetadataFileStats(storage_stats);
332  if (isFullyInitted_) {
333  storage_stats.fragment_count = getFragmentCount();
334  }
335  return storage_stats;
336 }
void setDataAndMetadataFileStats(StorageStats &storage_stats) const
Definition: FileMgr.cpp:338
uint32_t getFragmentCount() const
Definition: FileMgr.cpp:399

+ Here is the call graph for this function:

std::string File_Namespace::FileMgr::getStringMgrType ( )
inlineoverride

Definition at line 217 of file FileMgr.h.

217 { return ToString(FILE_MGR); }
bool File_Namespace::FileMgr::hasChunkMetadataForKeyPrefix ( const ChunkKey keyPrefix)

Definition at line 985 of file FileMgr.cpp.

References chunkIndex_, and chunkIndexMutex_.

985  {
987  auto chunk_it = chunkIndex_.lower_bound(key_prefix);
988  if (chunk_it == chunkIndex_.end()) {
989  return false;
990  } else {
991  auto it_pair =
992  std::mismatch(key_prefix.begin(), key_prefix.end(), chunk_it->first.begin());
993  return it_pair.first == key_prefix.end();
994  }
995 }
std::shared_lock< T > shared_lock
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:328
heavyai::shared_mutex chunkIndexMutex_
Definition: FileMgr.h:410
virtual bool File_Namespace::FileMgr::hasFileMgrKey ( ) const
inlinevirtual

Reimplemented in File_Namespace::CachingFileMgr.

Definition at line 339 of file FileMgr.h.

339 { return true; }
int32_t File_Namespace::FileMgr::incrementEpoch ( )
inline

Definition at line 283 of file FileMgr.h.

References logger::FATAL, LOG, and Epoch::max_allowable_epoch().

Referenced by checkpoint(), and init().

283  {
284  int32_t newEpoch = epoch_.increment();
285  epochIsCheckpointed_ = false;
286  // We test for error here instead of in Epoch::increment so we can log FileMgr
287  // metadata
288  if (newEpoch > Epoch::max_allowable_epoch()) {
289  LOG(FATAL) << "Epoch for table (" << fileMgrKey_.first << ", " << fileMgrKey_.second
290  << ") greater than maximum allowed value of "
291  << Epoch::max_allowable_epoch() << ".";
292  }
293  return newEpoch;
294  }
TablePair fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:524
#define LOG(tag)
Definition: Logger.h:216
static int64_t max_allowable_epoch()
Definition: Epoch.h:69
int32_t increment()
Definition: Epoch.h:54

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::init ( const size_t  num_reader_threads,
const int32_t  epochOverride 
)

Definition at line 249 of file FileMgr.cpp.

References Epoch::ceiling(), CHECK, clearFileInfos(), coreInit(), createBufferFromHeaders(), createEpochFile(), epoch(), epoch_, EPOCH_FILENAME, logger::FATAL, FILE_MGR_VERSION_FILENAME, fileMgrBasePath_, fileMgrVersion_, Epoch::floor(), freePages(), incrementEpoch(), initializeNumThreads(), isFullyInitted_, latestFileMgrVersion_, LOG, nextFileId_, openFiles(), resumeFileCompaction(), rollOffOldData(), setEpoch(), gpu_enabled::sort(), VLOG, and writeAndSyncVersionToDisk().

Referenced by FileMgr().

249  {
250  // if epochCeiling = -1 this means open from epoch file
251 
252  const bool dataExists = coreInit();
253  if (dataExists) {
254  if (epochOverride != -1) { // if opening at specified epoch
255  setEpoch(epochOverride);
256  }
257 
258  auto open_files_result = openFiles();
259  if (!open_files_result.compaction_status_file_name.empty()) {
260  resumeFileCompaction(open_files_result.compaction_status_file_name);
261  clearFileInfos();
262  open_files_result = openFiles();
263  CHECK(open_files_result.compaction_status_file_name.empty());
264  }
265 
266  /* Sort headerVec so that all HeaderInfos
267  * from a chunk will be grouped together
268  * and in order of increasing PageId
269  * - Version Epoch */
270  auto& header_vec = open_files_result.header_infos;
271  std::sort(header_vec.begin(), header_vec.end());
272 
273  /* Goal of next section is to find sequences in the
274  * sorted headerVec of the same ChunkId, which we
275  * can then initiate a FileBuffer with */
276 
277  VLOG(3) << "Number of Headers in Vector: " << header_vec.size();
278  if (header_vec.size() > 0) {
279  ChunkKey lastChunkKey = header_vec.begin()->chunkKey;
280  auto startIt = header_vec.begin();
281 
282  for (auto headerIt = header_vec.begin() + 1; headerIt != header_vec.end();
283  ++headerIt) {
284  if (headerIt->chunkKey != lastChunkKey) {
285  createBufferFromHeaders(lastChunkKey, startIt, headerIt);
286  lastChunkKey = headerIt->chunkKey;
287  startIt = headerIt;
288  }
289  }
290  // now need to insert last Chunk
291  createBufferFromHeaders(lastChunkKey, startIt, header_vec.end());
292  }
293  nextFileId_ = open_files_result.max_file_id + 1;
294  rollOffOldData(epoch(), true /* only checkpoint if data is rolled off */);
295  incrementEpoch();
296  freePages();
297  } else {
298  boost::filesystem::path path(fileMgrBasePath_);
299  if (!boost::filesystem::create_directory(path)) {
300  LOG(FATAL) << "Could not create data directory: " << path;
301  }
303  if (epochOverride != -1) {
304  epoch_.floor(epochOverride);
305  epoch_.ceiling(epochOverride);
306  } else {
307  // These are default constructor values for epoch_, but resetting here for clarity
308  epoch_.floor(0);
309  epoch_.ceiling(0);
310  }
313  incrementEpoch();
314  }
315 
316  initializeNumThreads(num_reader_threads);
317  isFullyInitted_ = true;
318 }
virtual FileBuffer * createBufferFromHeaders(const ChunkKey &key, const std::vector< HeaderInfo >::const_iterator &headerStartIt, const std::vector< HeaderInfo >::const_iterator &headerEndIt)
Definition: FileMgr.cpp:725
std::vector< int > ChunkKey
Definition: types.h:36
OpenFilesResult openFiles()
Definition: FileMgr.cpp:189
void createEpochFile(const std::string &epochFileName)
Definition: FileMgr.cpp:595
#define LOG(tag)
Definition: Logger.h:216
void rollOffOldData(const int32_t epochCeiling, const bool shouldCheckpoint)
Definition: FileMgr.cpp:675
DEVICE void sort(ARGS &&...args)
Definition: gpu_enabled.h:105
int32_t floor() const
Definition: Epoch.h:43
int32_t ceiling() const
Definition: Epoch.h:44
std::string fileMgrBasePath_
Definition: FileMgr.h:396
const int32_t latestFileMgrVersion_
Definition: FileMgr.h:407
void writeAndSyncVersionToDisk(const std::string &versionFileName, const int32_t version)
Definition: FileMgr.cpp:1070
int32_t incrementEpoch()
Definition: FileMgr.h:283
static constexpr char FILE_MGR_VERSION_FILENAME[]
Definition: FileMgr.h:388
static constexpr char EPOCH_FILENAME[]
Definition: FileMgr.h:386
int32_t epoch() const
Definition: FileMgr.h:517
#define CHECK(condition)
Definition: Logger.h:222
void setEpoch(const int32_t newEpoch)
Definition: FileMgr.cpp:1181
bool coreInit()
Determines file path, and if exists, runs file migration and opens and reads epoch file...
Definition: FileMgr.cpp:126
void initializeNumThreads(size_t num_reader_threads=0)
Definition: FileMgr.cpp:1573
#define VLOG(n)
Definition: Logger.h:316
void resumeFileCompaction(const std::string &status_file_name)
Definition: FileMgr.cpp:1206

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::init ( const std::string &  dataPathToConvertFrom,
const int32_t  epochOverride 
)

Definition at line 422 of file FileMgr.cpp.

References threading_serial::async(), CHECK, copyPage(), createBuffer(), createBufferFromHeaders(), EPOCH_FILENAME, logger::FATAL, File_Namespace::FileMetadata::file_id, File_Namespace::FileMetadata::file_path, File_Namespace::GlobalFileMgr::getFileMgr(), getMetadataForFile(), gfm_, File_Namespace::FileMetadata::is_data_file, isFullyInitted_, LOG, nextFileId_, File_Namespace::FileMetadata::num_pages, openAndReadEpochFile(), openExistingFile(), File_Namespace::FileMetadata::page_size, processFileFutures(), File_Namespace::MultiPage::push(), requestFreePage(), setEpoch(), gpu_enabled::sort(), and Data_Namespace::AbstractBuffer::syncEncoder().

423  {
424  int32_t converted_data_epoch = 0;
425  boost::filesystem::path path(dataPathToConvertFrom);
426  if (boost::filesystem::exists(path)) {
427  if (!boost::filesystem::is_directory(path)) {
428  LOG(FATAL) << "Specified path `" << path << "` is not a directory.";
429  }
431 
432  if (epochOverride != -1) { // if opening at previous epoch
433  setEpoch(epochOverride);
434  }
435 
436  boost::filesystem::directory_iterator
437  endItr; // default construction yields past-the-end
438  int32_t maxFileId = -1;
439  int32_t fileCount = 0;
440  int32_t threadCount = std::thread::hardware_concurrency();
441  std::vector<HeaderInfo> headerVec;
442  std::vector<std::future<std::vector<HeaderInfo>>> file_futures;
443  for (boost::filesystem::directory_iterator fileIt(path); fileIt != endItr; ++fileIt) {
444  FileMetadata fileMetadata = getMetadataForFile(fileIt);
445  if (fileMetadata.is_data_file) {
446  maxFileId = std::max(maxFileId, fileMetadata.file_id);
447  file_futures.emplace_back(std::async(std::launch::async, [fileMetadata, this] {
448  std::vector<HeaderInfo> tempHeaderVec;
449  openExistingFile(fileMetadata.file_path,
450  fileMetadata.file_id,
451  fileMetadata.page_size,
452  fileMetadata.num_pages,
453  tempHeaderVec);
454  return tempHeaderVec;
455  }));
456  fileCount++;
457  if (fileCount % threadCount) {
458  processFileFutures(file_futures, headerVec);
459  }
460  }
461  }
462 
463  if (file_futures.size() > 0) {
464  processFileFutures(file_futures, headerVec);
465  }
466 
467  /* Sort headerVec so that all HeaderInfos
468  * from a chunk will be grouped together
469  * and in order of increasing PageId
470  * - Version Epoch */
471 
472  std::sort(headerVec.begin(), headerVec.end());
473 
474  /* Goal of next section is to find sequences in the
475  * sorted headerVec of the same ChunkId, which we
476  * can then initiate a FileBuffer with */
477 
478  if (headerVec.size() > 0) {
479  ChunkKey lastChunkKey = headerVec.begin()->chunkKey;
480  auto startIt = headerVec.begin();
481 
482  for (auto headerIt = headerVec.begin() + 1; headerIt != headerVec.end();
483  ++headerIt) {
484  if (headerIt->chunkKey != lastChunkKey) {
485  FileMgr* c_fm_ =
486  dynamic_cast<File_Namespace::FileMgr*>(gfm_->getFileMgr(lastChunkKey));
487  CHECK(c_fm_);
488  auto srcBuf = createBufferFromHeaders(lastChunkKey, startIt, headerIt);
489  auto destBuf = c_fm_->createBuffer(lastChunkKey, srcBuf->pageSize());
490  destBuf->syncEncoder(srcBuf);
491  destBuf->setSize(srcBuf->size());
492  destBuf->setDirty(); // this needs to be set to force writing out metadata
493  // files from "checkpoint()" call
494 
495  size_t totalNumPages = srcBuf->getMultiPage().size();
496  for (size_t pageNum = 0; pageNum < totalNumPages; pageNum++) {
497  Page srcPage = srcBuf->getMultiPage()[pageNum].current().page;
498  Page destPage = c_fm_->requestFreePage(
499  srcBuf->pageSize(),
500  false); // may modify and use api "FileBuffer::addNewMultiPage" instead
501  MultiPage multiPage(srcBuf->pageSize());
502  multiPage.push(destPage, converted_data_epoch);
503  destBuf->multiPages_.push_back(multiPage);
504  size_t reservedHeaderSize = srcBuf->reservedHeaderSize();
505  copyPage(
506  srcPage, c_fm_, destPage, reservedHeaderSize, srcBuf->pageDataSize(), 0);
507  destBuf->writeHeader(destPage, pageNum, converted_data_epoch, false);
508  }
509  lastChunkKey = headerIt->chunkKey;
510  startIt = headerIt;
511  }
512  }
513 
514  // now need to insert last Chunk
515  FileMgr* c_fm_ =
516  dynamic_cast<File_Namespace::FileMgr*>(gfm_->getFileMgr(lastChunkKey));
517  auto srcBuf = createBufferFromHeaders(lastChunkKey, startIt, headerVec.end());
518  auto destBuf = c_fm_->createBuffer(lastChunkKey, srcBuf->pageSize());
519  destBuf->syncEncoder(srcBuf);
520  destBuf->setSize(srcBuf->size());
521  destBuf->setDirty(); // this needs to be set to write out metadata file from the
522  // "checkpoint()" call
523 
524  size_t totalNumPages = srcBuf->getMultiPage().size();
525  for (size_t pageNum = 0; pageNum < totalNumPages; pageNum++) {
526  Page srcPage = srcBuf->getMultiPage()[pageNum].current().page;
527  Page destPage = c_fm_->requestFreePage(
528  srcBuf->pageSize(),
529  false); // may modify and use api "FileBuffer::addNewMultiPage" instead
530  MultiPage multiPage(srcBuf->pageSize());
531  multiPage.push(destPage, converted_data_epoch);
532  destBuf->multiPages_.push_back(multiPage);
533  size_t reservedHeaderSize = srcBuf->reservedHeaderSize();
534  copyPage(srcPage, c_fm_, destPage, reservedHeaderSize, srcBuf->pageDataSize(), 0);
535  destBuf->writeHeader(destPage, pageNum, converted_data_epoch, false);
536  }
537  }
538  nextFileId_ = maxFileId + 1;
539  } else {
540  if (!boost::filesystem::create_directory(path)) {
541  LOG(FATAL) << "Specified path does not exist: " << path;
542  }
543  }
544  isFullyInitted_ = true;
545 }
virtual FileBuffer * createBufferFromHeaders(const ChunkKey &key, const std::vector< HeaderInfo >::const_iterator &headerStartIt, const std::vector< HeaderInfo >::const_iterator &headerEndIt)
Definition: FileMgr.cpp:725
std::vector< int > ChunkKey
Definition: types.h:36
void syncEncoder(const AbstractBuffer *src_buffer)
#define LOG(tag)
Definition: Logger.h:216
void copyPage(Page &srcPage, FileMgr *destFileMgr, Page &destPage, const size_t reservedHeaderSize, const size_t numBytes, const size_t offset)
Definition: FileMgr.cpp:573
DEVICE void sort(ARGS &&...args)
Definition: gpu_enabled.h:105
GlobalFileMgr * gfm_
Definition: FileMgr.h:523
future< Result > async(Fn &&fn, Args &&...args)
void processFileFutures(std::vector< std::future< std::vector< HeaderInfo >>> &file_futures, std::vector< HeaderInfo > &headerVec)
Definition: FileMgr.cpp:408
FileInfo * openExistingFile(const std::string &path, const int32_t fileId, const size_t pageSize, const size_t numPages, std::vector< HeaderInfo > &headerVec)
Definition: FileMgr.cpp:935
static constexpr char EPOCH_FILENAME[]
Definition: FileMgr.h:386
void openAndReadEpochFile(const std::string &epochFileName)
Definition: FileMgr.cpp:627
AbstractBufferMgr * getFileMgr(const int32_t db_id, const int32_t tb_id)
FileMetadata getMetadataForFile(const boost::filesystem::directory_iterator &fileIterator) const
Definition: FileMgr.cpp:146
#define CHECK(condition)
Definition: Logger.h:222
void setEpoch(const int32_t newEpoch)
Definition: FileMgr.cpp:1181

+ Here is the call graph for this function:

void File_Namespace::FileMgr::initializeNumThreads ( size_t  num_reader_threads = 0)
protected

Definition at line 1573 of file FileMgr.cpp.

References num_reader_threads_.

Referenced by init(), and File_Namespace::CachingFileMgr::init().

1573  {
1574  // # of threads is based on # of cores on the host
1575  size_t num_hardware_based_threads = std::thread::hardware_concurrency();
1576  if (num_reader_threads == 0 || num_reader_threads > num_hardware_based_threads) {
1577  // # of threads has not been defined by user
1578  num_reader_threads_ = num_hardware_based_threads;
1579  } else {
1580  num_reader_threads_ = num_reader_threads;
1581  }
1582 }
size_t num_reader_threads_
Maps page sizes to FileInfo objects.
Definition: FileMgr.h:401

+ Here is the caller graph for this function:

bool File_Namespace::FileMgr::isAllocationCapped ( )
inlineoverride

Definition at line 222 of file FileMgr.h.

222 { return false; }
bool File_Namespace::FileMgr::isBufferOnDevice ( const ChunkKey key)
override

Definition at line 736 of file FileMgr.cpp.

References chunkIndex_, and chunkIndexMutex_.

736  {
738  return chunkIndex_.find(key) != chunkIndex_.end();
739 }
std::shared_lock< T > shared_lock
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:328
heavyai::shared_mutex chunkIndexMutex_
Definition: FileMgr.h:410
int32_t File_Namespace::FileMgr::lastCheckpointedEpoch ( ) const
inline

Returns value of epoch at last checkpoint.

Definition at line 299 of file FileMgr.h.

Referenced by File_Namespace::GlobalFileMgr::existsDiffBetweenFileMgrParamsAndFileMgr(), and setDataAndMetadataFileStats().

299  {
300  return epoch() - (epochIsCheckpointed_ ? 0 : 1);
301  }
int32_t epoch() const
Definition: FileMgr.h:517

+ Here is the caller graph for this function:

int32_t File_Namespace::FileMgr::maxRollbackEpochs ( )
inline

Returns value max_rollback_epochs.

Definition at line 308 of file FileMgr.h.

Referenced by File_Namespace::GlobalFileMgr::existsDiffBetweenFileMgrParamsAndFileMgr().

308 { return maxRollbackEpochs_; }
int32_t maxRollbackEpochs_
Definition: FileMgr.h:395

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::migrateEpochFileV0 ( )
protected

Definition at line 1098 of file FileMgr.cpp.

References Epoch::ceiling(), createEpochFile(), epoch_, EPOCH_FILENAME, FILE_MGR_VERSION_FILENAME, fileMgrBasePath_, Epoch::floor(), logger::INFO, LEGACY_EPOCH_FILENAME, LOG, Epoch::min_allowable_epoch(), openAndReadLegacyEpochFile(), writeAndSyncEpochToDisk(), and writeAndSyncVersionToDisk().

Referenced by migrateToLatestFileMgrVersion().

1098  {
1099  const std::string versionFilePath(fileMgrBasePath_ + "/" + FILE_MGR_VERSION_FILENAME);
1100  LOG(INFO) << "Migrating file format version from 0 to 1 for `" << versionFilePath;
1105  int32_t migrationCompleteVersion = 1;
1106  writeAndSyncVersionToDisk(FILE_MGR_VERSION_FILENAME, migrationCompleteVersion);
1107 }
int32_t openAndReadLegacyEpochFile(const std::string &epochFileName)
Definition: FileMgr.cpp:606
void createEpochFile(const std::string &epochFileName)
Definition: FileMgr.cpp:595
#define LOG(tag)
Definition: Logger.h:216
static int64_t min_allowable_epoch()
Definition: Epoch.h:65
int32_t floor() const
Definition: Epoch.h:43
int32_t ceiling() const
Definition: Epoch.h:44
std::string fileMgrBasePath_
Definition: FileMgr.h:396
void writeAndSyncVersionToDisk(const std::string &versionFileName, const int32_t version)
Definition: FileMgr.cpp:1070
static constexpr char FILE_MGR_VERSION_FILENAME[]
Definition: FileMgr.h:388
void writeAndSyncEpochToDisk()
Definition: FileMgr.cpp:647
static constexpr char EPOCH_FILENAME[]
Definition: FileMgr.h:386
static constexpr char LEGACY_EPOCH_FILENAME[]
Definition: FileMgr.h:385

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::migrateLegacyFilesV1 ( )
protected

Definition at line 1109 of file FileMgr.cpp.

References FILE_MGR_VERSION_FILENAME, fileMgrBasePath_, logger::INFO, LOG, renameAndSymlinkLegacyFiles(), and writeAndSyncVersionToDisk().

Referenced by migrateToLatestFileMgrVersion().

1109  {
1110  LOG(INFO) << "Migrating file format version from 1 to 2";
1112  constexpr int32_t migration_complete_version{2};
1113  writeAndSyncVersionToDisk(FILE_MGR_VERSION_FILENAME, migration_complete_version);
1114 }
#define LOG(tag)
Definition: Logger.h:216
std::string fileMgrBasePath_
Definition: FileMgr.h:396
void writeAndSyncVersionToDisk(const std::string &versionFileName, const int32_t version)
Definition: FileMgr.cpp:1070
static constexpr char FILE_MGR_VERSION_FILENAME[]
Definition: FileMgr.h:388
static void renameAndSymlinkLegacyFiles(const std::string &table_data_dir)
Definition: FileMgr.cpp:1116

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::migrateToLatestFileMgrVersion ( )
protected

Definition at line 1137 of file FileMgr.cpp.

References logger::FATAL, FILE_MGR_VERSION_FILENAME, fileMgrVersion_, INVALID_VERSION, latestFileMgrVersion_, LOG, migrateEpochFileV0(), migrateLegacyFilesV1(), readVersionFromDisk(), UNREACHABLE, and writeAndSyncVersionToDisk().

Referenced by coreInit().

1137  {
1140  fileMgrVersion_ = 0;
1142  } else if (fileMgrVersion_ > latestFileMgrVersion_) {
1143  LOG(FATAL)
1144  << "Table storage forward compatibility is not supported. Version of HeavyDB "
1145  "software used is older than the version of table being read: "
1146  << fileMgrVersion_;
1147  }
1148 
1151  switch (fileMgrVersion_) {
1152  case 0: {
1154  break;
1155  }
1156  case 1: {
1158  break;
1159  }
1160  default: {
1161  UNREACHABLE();
1162  }
1163  }
1164  fileMgrVersion_++;
1165  }
1166  }
1167 }
int32_t readVersionFromDisk(const std::string &versionFileName) const
Definition: FileMgr.cpp:1052
#define LOG(tag)
Definition: Logger.h:216
#define UNREACHABLE()
Definition: Logger.h:266
const int32_t latestFileMgrVersion_
Definition: FileMgr.h:407
void writeAndSyncVersionToDisk(const std::string &versionFileName, const int32_t version)
Definition: FileMgr.cpp:1070
static constexpr char FILE_MGR_VERSION_FILENAME[]
Definition: FileMgr.h:388
static constexpr int32_t INVALID_VERSION
Definition: FileMgr.h:389

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::openAndReadEpochFile ( const std::string &  epochFileName)
protected

Definition at line 627 of file FileMgr.cpp.

References Epoch::byte_size(), epoch_, epochFile_, logger::FATAL, heavyai::file_size(), fileMgrBasePath_, LOG, File_Namespace::open(), File_Namespace::read(), and Epoch::storage_ptr().

Referenced by coreInit(), and init().

627  {
628  if (!epochFile_) { // Check to see if already open
629  std::string epochFilePath(fileMgrBasePath_ + "/" + epochFileName);
630  if (!boost::filesystem::exists(epochFilePath)) {
631  LOG(FATAL) << "Epoch file `" << epochFilePath << "` does not exist";
632  }
633  if (!boost::filesystem::is_regular_file(epochFilePath)) {
634  LOG(FATAL) << "Epoch file `" << epochFilePath << "` is not a regular file";
635  }
636  if (boost::filesystem::file_size(epochFilePath) != Epoch::byte_size()) {
637  LOG(FATAL) << "Epoch file `" << epochFilePath
638  << "` is not sized properly (current size: "
639  << boost::filesystem::file_size(epochFilePath)
640  << ", expected size: " << Epoch::byte_size() << ")";
641  }
642  epochFile_ = open(epochFilePath);
643  }
645 }
int8_t * storage_ptr()
Definition: Epoch.h:61
#define LOG(tag)
Definition: Logger.h:216
std::string fileMgrBasePath_
Definition: FileMgr.h:396
size_t read(FILE *f, const size_t offset, const size_t size, int8_t *buf)
Reads the specified number of bytes from the offset position in file f into buf.
Definition: File.cpp:142
static size_t byte_size()
Definition: Epoch.h:63
FILE * open(int fileId)
Opens/creates the file with the given id; returns NULL on error.
Definition: File.cpp:107
size_t file_size(const int fd)
Definition: heavyai_fs.cpp:33

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int32_t File_Namespace::FileMgr::openAndReadLegacyEpochFile ( const std::string &  epochFileName)
protected

Definition at line 606 of file FileMgr.cpp.

References File_Namespace::close(), epoch(), logger::FATAL, heavyai::file_size(), fileMgrBasePath_, LOG, File_Namespace::open(), and File_Namespace::read().

Referenced by migrateEpochFileV0().

606  {
607  std::string epochFilePath(fileMgrBasePath_ + "/" + epochFileName);
608  if (!boost::filesystem::exists(epochFilePath)) {
609  return 0;
610  }
611 
612  if (!boost::filesystem::is_regular_file(epochFilePath)) {
613  LOG(FATAL) << "Epoch file `" << epochFilePath << "` is not a regular file";
614  }
615  if (boost::filesystem::file_size(epochFilePath) < 4) {
616  LOG(FATAL) << "Epoch file `" << epochFilePath
617  << "` is not sized properly (current size: "
618  << boost::filesystem::file_size(epochFilePath) << ", expected size: 4)";
619  }
620  FILE* legacyEpochFile = open(epochFilePath);
621  int32_t epoch;
622  read(legacyEpochFile, 0, sizeof(int32_t), (int8_t*)&epoch);
623  close(legacyEpochFile);
624  return epoch;
625 }
#define LOG(tag)
Definition: Logger.h:216
std::string fileMgrBasePath_
Definition: FileMgr.h:396
size_t read(FILE *f, const size_t offset, const size_t size, int8_t *buf)
Reads the specified number of bytes from the offset position in file f into buf.
Definition: File.cpp:142
FILE * open(int fileId)
Opens/creates the file with the given id; returns NULL on error.
Definition: File.cpp:107
int32_t epoch() const
Definition: FileMgr.h:517
void close(FILE *f)
Closes the file pointed to by the FILE pointer.
Definition: File.cpp:128
size_t file_size(const int fd)
Definition: heavyai_fs.cpp:33

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

FileInfo * File_Namespace::FileMgr::openExistingFile ( const std::string &  path,
const int32_t  fileId,
const size_t  pageSize,
const size_t  numPages,
std::vector< HeaderInfo > &  headerVec 
)
protected

Definition at line 935 of file FileMgr.cpp.

References anonymous_namespace{Utm.h}::f, fileIndex_, files_, files_rw_mutex_, File_Namespace::open(), and File_Namespace::FileInfo::openExistingFile().

Referenced by init(), and openFiles().

939  {
940  FILE* f = open(path);
941  FileInfo* fInfo = new FileInfo(
942  this, fileId, f, pageSize, numPages, false); // false means don't init file
943 
944  fInfo->openExistingFile(headerVec);
946  files_[fileId] = fInfo;
947  fileIndex_.insert(std::pair<size_t, int32_t>(pageSize, fileId));
948  return fInfo;
949 }
heavyai::unique_lock< heavyai::shared_mutex > write_lock
constexpr double f
Definition: Utm.h:31
PageSizeFileMMap fileIndex_
A map of files accessible via a file identifier.
Definition: FileMgr.h:400
std::unique_lock< T > unique_lock
FILE * open(int fileId)
Opens/creates the file with the given id; returns NULL on error.
Definition: File.cpp:107
std::map< int32_t, FileInfo * > files_
Definition: FileMgr.h:399
heavyai::shared_mutex files_rw_mutex_
Definition: FileMgr.h:411

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

OpenFilesResult File_Namespace::FileMgr::openFiles ( )
protected

Definition at line 189 of file FileMgr.cpp.

References threading_serial::async(), Epoch::ceiling(), CHECK, File_Namespace::OpenFilesResult::compaction_status_file_name, epoch_, File_Namespace::FileMetadata::file_id, File_Namespace::FileMetadata::file_path, fileMgrBasePath_, getMetadataForFile(), File_Namespace::OpenFilesResult::header_infos, logger::INFO, File_Namespace::anonymous_namespace{FileMgr.cpp}::is_compaction_status_file(), File_Namespace::FileMetadata::is_data_file, LOG, File_Namespace::OpenFilesResult::max_file_id, File_Namespace::FileMetadata::num_pages, openExistingFile(), File_Namespace::FileMetadata::page_size, processFileFutures(), run_benchmark_import::result, timer_start(), and timer_stop().

Referenced by init(), and File_Namespace::CachingFileMgr::init().

189  {
190  auto clock_begin = timer_start();
191  boost::filesystem::directory_iterator
192  end_itr; // default construction yields past-the-end
193  OpenFilesResult result;
194  result.max_file_id = -1;
195  int32_t file_count = 0;
196  int32_t thread_count = std::thread::hardware_concurrency();
197  std::vector<std::future<std::vector<HeaderInfo>>> file_futures;
198  boost::filesystem::path path(fileMgrBasePath_);
199  for (boost::filesystem::directory_iterator file_it(path); file_it != end_itr;
200  ++file_it) {
201  FileMetadata file_metadata = getMetadataForFile(file_it);
202  if (file_metadata.is_data_file) {
203  result.max_file_id = std::max(result.max_file_id, file_metadata.file_id);
204  file_futures.emplace_back(std::async(std::launch::async, [file_metadata, this] {
205  std::vector<HeaderInfo> temp_header_vec;
206  openExistingFile(file_metadata.file_path,
207  file_metadata.file_id,
208  file_metadata.page_size,
209  file_metadata.num_pages,
210  temp_header_vec);
211  return temp_header_vec;
212  }));
213  file_count++;
214  if (file_count % thread_count == 0) {
215  processFileFutures(file_futures, result.header_infos);
216  }
217  }
218 
219  if (is_compaction_status_file(file_it->path().filename().string())) {
220  CHECK(result.compaction_status_file_name.empty());
221  result.compaction_status_file_name = file_it->path().filename().string();
222  }
223  }
224 
225  if (file_futures.size() > 0) {
226  processFileFutures(file_futures, result.header_infos);
227  }
228 
229  int64_t queue_time_ms = timer_stop(clock_begin);
230  LOG(INFO) << "Completed Reading table's file metadata, Elapsed time : " << queue_time_ms
231  << "ms Epoch: " << epoch_.ceiling() << " files read: " << file_count
232  << " table location: '" << fileMgrBasePath_ << "'";
233  return result;
234 }
#define LOG(tag)
Definition: Logger.h:216
TypeR::rep timer_stop(Type clock_begin)
Definition: measure.h:48
bool is_compaction_status_file(const std::string &file_name)
Definition: FileMgr.cpp:182
int32_t ceiling() const
Definition: Epoch.h:44
std::string fileMgrBasePath_
Definition: FileMgr.h:396
future< Result > async(Fn &&fn, Args &&...args)
void processFileFutures(std::vector< std::future< std::vector< HeaderInfo >>> &file_futures, std::vector< HeaderInfo > &headerVec)
Definition: FileMgr.cpp:408
FileInfo * openExistingFile(const std::string &path, const int32_t fileId, const size_t pageSize, const size_t numPages, std::vector< HeaderInfo > &headerVec)
Definition: FileMgr.cpp:935
FileMetadata getMetadataForFile(const boost::filesystem::directory_iterator &fileIterator) const
Definition: FileMgr.cpp:146
#define CHECK(condition)
Definition: Logger.h:222
Type timer_start()
Definition: measure.h:42

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::string File_Namespace::FileMgr::printSlabs ( )
inlineoverride

Definition at line 218 of file FileMgr.h.

218 { return "Not Implemented"; }
void File_Namespace::FileMgr::processFileFutures ( std::vector< std::future< std::vector< HeaderInfo >>> &  file_futures,
std::vector< HeaderInfo > &  headerVec 
)
protected

Definition at line 408 of file FileMgr.cpp.

Referenced by init(), and openFiles().

410  {
411  for (auto& file_future : file_futures) {
412  file_future.wait();
413  }
414  // concatenate the vectors after thread completes
415  for (auto& file_future : file_futures) {
416  auto tempHeaderVec = file_future.get();
417  headerVec.insert(headerVec.end(), tempHeaderVec.begin(), tempHeaderVec.end());
418  }
419  file_futures.clear();
420 }

+ Here is the caller graph for this function:

FileBuffer * File_Namespace::FileMgr::putBuffer ( const ChunkKey key,
AbstractBuffer d,
const size_t  numBytes = 0 
)
override

Puts the contents of d into the Chunk with the given key.

Parameters
key- Unique identifier for a Chunk.
d- An object representing the source data for the Chunk.
Returns
AbstractBuffer*

Definition at line 805 of file FileMgr.cpp.

References CHECK, CHECK_LT, Data_Namespace::AbstractBuffer::clearDirtyBits(), logger::FATAL, Data_Namespace::AbstractBuffer::getDeviceId(), Data_Namespace::AbstractBuffer::getMemoryPtr(), getOrCreateBuffer(), Data_Namespace::AbstractBuffer::getType(), Data_Namespace::AbstractBuffer::isAppended(), Data_Namespace::AbstractBuffer::isDirty(), Data_Namespace::AbstractBuffer::isUpdated(), LOG, show_chunk(), and Data_Namespace::AbstractBuffer::size().

807  {
808  auto chunk = getOrCreateBuffer(key);
809  size_t oldChunkSize = chunk->size();
810  // write the buffer's data to the Chunk
811  size_t newChunkSize = (numBytes == 0) ? srcBuffer->size() : numBytes;
812  if (chunk->isDirty()) {
813  // multiple appends are allowed,
814  // but only single update is allowed
815  if (srcBuffer->isUpdated() && chunk->isUpdated()) {
816  LOG(FATAL) << "Aborting attempt to write a chunk marked dirty. Chunk inconsistency "
817  "for key: "
818  << show_chunk(key);
819  }
820  }
821  CHECK(srcBuffer->isDirty()) << "putBuffer expects a dirty buffer";
822  if (srcBuffer->isUpdated()) {
823  // chunk size is not changed when fixed rows are updated or are marked as deleted.
824  // but when rows are vacuumed or varlen rows are updated (new rows are appended),
825  // chunk size will change. For vacuum, checkpoint should sync size from cpu to disk.
826  // For varlen update, it takes another route via fragmenter using disk-level buffer.
827  if (0 == numBytes && !chunk->isDirty()) {
828  chunk->setSize(newChunkSize);
829  }
830  //@todo use dirty flags to only flush pages of chunk that need to
831  // be flushed
832  chunk->write((int8_t*)srcBuffer->getMemoryPtr(),
833  newChunkSize,
834  0,
835  srcBuffer->getType(),
836  srcBuffer->getDeviceId());
837  } else if (srcBuffer->isAppended()) {
838  CHECK_LT(oldChunkSize, newChunkSize);
839  chunk->append((int8_t*)srcBuffer->getMemoryPtr() + oldChunkSize,
840  newChunkSize - oldChunkSize,
841  srcBuffer->getType(),
842  srcBuffer->getDeviceId());
843  } else {
844  // If dirty buffer comes in unmarked, it must be empty.
845  // Encoder sync is still required to flush the metadata.
846  CHECK(numBytes == 0)
847  << "Dirty buffer with size > 0 must be marked as isAppended() or isUpdated()";
848  }
849  // chunk->clearDirtyBits(); // Hack: because write and append will set dirty bits
850  //@todo commenting out line above will make sure this metadata is set
851  // but will trigger error on fetch chunk
852  srcBuffer->clearDirtyBits();
853  chunk->syncEncoder(srcBuffer);
854  return chunk;
855 }
#define LOG(tag)
Definition: Logger.h:216
std::string show_chunk(const ChunkKey &key)
Definition: types.h:98
FileBuffer * getOrCreateBuffer(const ChunkKey &key)
Definition: FileMgr.cpp:1631
#define CHECK_LT(x, y)
Definition: Logger.h:232
#define CHECK(condition)
Definition: Logger.h:222

+ Here is the call graph for this function:

std::vector< PageMapping > File_Namespace::FileMgr::readPageMappingsFromStatusFile ( )
protected

Deserializes a page mapping vector from expected status file.

Definition at line 1519 of file FileMgr.cpp.

References CHECK, CHECK_EQ, CHECK_GE, heavyai::file_size(), getFilePath(), and UPDATE_PAGE_VISIBILITY_STATUS.

Referenced by resumeFileCompaction().

1519  {
1520  auto file_path = getFilePath(UPDATE_PAGE_VISIBILITY_STATUS);
1521  CHECK(boost::filesystem::exists(file_path));
1522  std::ifstream status_file{file_path.string(),
1523  std::ios::in | std::ios::binary | std::ios::ate};
1524  CHECK(status_file.is_open());
1525  size_t file_size = status_file.tellg();
1526  status_file.seekg(0, std::ios::beg);
1527  CHECK_GE(file_size, sizeof(int64_t));
1528 
1529  int64_t page_mappings_count;
1530  status_file.read(reinterpret_cast<char*>(&page_mappings_count), sizeof(int64_t));
1531  auto page_mappings_byte_size = file_size - sizeof(int64_t);
1532  CHECK_EQ(page_mappings_byte_size % sizeof(PageMapping), static_cast<size_t>(0));
1533  CHECK_EQ(static_cast<size_t>(page_mappings_count),
1534  page_mappings_byte_size / sizeof(PageMapping));
1535 
1536  std::vector<PageMapping> page_mappings(page_mappings_count);
1537  status_file.read(reinterpret_cast<char*>(page_mappings.data()),
1538  page_mappings_byte_size);
1539  status_file.close();
1540  return page_mappings;
1541 }
#define CHECK_EQ(x, y)
Definition: Logger.h:230
static constexpr char const * UPDATE_PAGE_VISIBILITY_STATUS
Definition: FileMgr.h:375
#define CHECK_GE(x, y)
Definition: Logger.h:235
boost::filesystem::path getFilePath(const std::string &file_name) const
Definition: FileMgr.cpp:1658
#define CHECK(condition)
Definition: Logger.h:222
size_t file_size(const int fd)
Definition: heavyai_fs.cpp:33

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int32_t File_Namespace::FileMgr::readVersionFromDisk ( const std::string &  versionFileName) const
protected

Definition at line 1052 of file FileMgr.cpp.

References File_Namespace::close(), heavyai::file_size(), fileMgrBasePath_, File_Namespace::open(), File_Namespace::read(), and setup::version.

Referenced by createTopLevelMetadata(), migrateToLatestFileMgrVersion(), and writeAndSyncVersionToDisk().

1052  {
1053  const std::string versionFilePath(fileMgrBasePath_ + "/" + versionFileName);
1054  if (!boost::filesystem::exists(versionFilePath)) {
1055  return -1;
1056  }
1057  if (!boost::filesystem::is_regular_file(versionFilePath)) {
1058  return -1;
1059  }
1060  if (boost::filesystem::file_size(versionFilePath) < 4) {
1061  return -1;
1062  }
1063  FILE* versionFile = open(versionFilePath);
1064  int32_t version;
1065  read(versionFile, 0, sizeof(int32_t), (int8_t*)&version);
1066  close(versionFile);
1067  return version;
1068 }
std::string fileMgrBasePath_
Definition: FileMgr.h:396
size_t read(FILE *f, const size_t offset, const size_t size, int8_t *buf)
Reads the specified number of bytes from the offset position in file f into buf.
Definition: File.cpp:142
string version
Definition: setup.in.py:73
FILE * open(int fileId)
Opens/creates the file with the given id; returns NULL on error.
Definition: File.cpp:107
void close(FILE *f)
Closes the file pointed to by the FILE pointer.
Definition: File.cpp:128
size_t file_size(const int fd)
Definition: heavyai_fs.cpp:33

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::removeTableRelatedDS ( const int32_t  db_id,
const int32_t  table_id 
)
override

Definition at line 1197 of file FileMgr.cpp.

References UNREACHABLE.

1197  {
1198  UNREACHABLE();
1199 }
#define UNREACHABLE()
Definition: Logger.h:266
void File_Namespace::FileMgr::renameAndSymlinkLegacyFiles ( const std::string &  table_data_dir)
static

Definition at line 1116 of file FileMgr.cpp.

References DATA_FILE_EXT, logger::INFO, File_Namespace::kLegacyDataFileExtension, and LOG.

Referenced by migrateLegacyFilesV1(), and anonymous_namespace{TableArchiver.cpp}::rename_table_directories().

1116  {
1117  std::map<boost::filesystem::path, boost::filesystem::path> old_to_new_paths;
1118  for (boost::filesystem::directory_iterator it(table_data_dir), end_it; it != end_it;
1119  it++) {
1120  const auto old_path = boost::filesystem::canonical(it->path());
1121  if (boost::filesystem::is_regular_file(it->status()) &&
1122  old_path.extension().string() == kLegacyDataFileExtension) {
1123  auto new_path = old_path;
1124  new_path.replace_extension(DATA_FILE_EXT);
1125  old_to_new_paths[old_path] = new_path;
1126  }
1127  }
1128  for (const auto& [old_path, new_path] : old_to_new_paths) {
1129  boost::filesystem::rename(old_path, new_path);
1130  LOG(INFO) << "Rebrand migration: Renamed " << old_path << " to " << new_path;
1131  boost::filesystem::create_symlink(new_path.filename(), old_path);
1132  LOG(INFO) << "Rebrand migration: Added symlink from " << old_path << " to "
1133  << new_path.filename();
1134  }
1135 }
#define LOG(tag)
Definition: Logger.h:216
#define DATA_FILE_EXT
Definition: File.h:25
constexpr auto kLegacyDataFileExtension
Definition: File.h:36

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::renameCompactionStatusFile ( const char *const  from_status,
const char *const  to_status 
)

Renames a given status file name to a new given file name.

Definition at line 1546 of file FileMgr.cpp.

References CHECK, and getFilePath().

Referenced by compactFiles(), and resumeFileCompaction().

1547  {
1548  auto from_status_file_path = getFilePath(from_status);
1549  auto to_status_file_path = getFilePath(to_status);
1550  CHECK(boost::filesystem::exists(from_status_file_path));
1551  CHECK(!boost::filesystem::exists(to_status_file_path));
1552  boost::filesystem::rename(from_status_file_path, to_status_file_path);
1553 }
boost::filesystem::path getFilePath(const std::string &file_name) const
Definition: FileMgr.cpp:1658
#define CHECK(condition)
Definition: Logger.h:222

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Page File_Namespace::FileMgr::requestFreePage ( size_t  pagesize,
const bool  isMetadata 
)
virtual

Reimplemented in File_Namespace::CachingFileMgr.

Definition at line 866 of file FileMgr.cpp.

References CHECK, createFile(), File_Namespace::FileInfo::fileId, fileIndex_, files_, File_Namespace::FileInfo::getFreePage(), getPageMutex_, num_pages_per_data_file_, and num_pages_per_metadata_file_.

Referenced by File_Namespace::FileBuffer::addNewMultiPage(), init(), File_Namespace::FileBuffer::write(), and File_Namespace::FileBuffer::writeMetadata().

866  {
867  std::lock_guard<std::mutex> lock(getPageMutex_);
868 
869  auto candidateFiles = fileIndex_.equal_range(pageSize);
870  int32_t pageNum = -1;
871  for (auto fileIt = candidateFiles.first; fileIt != candidateFiles.second; ++fileIt) {
872  FileInfo* fileInfo = files_.at(fileIt->second);
873  pageNum = fileInfo->getFreePage();
874  if (pageNum != -1) {
875  return (Page(fileInfo->fileId, pageNum));
876  }
877  }
878  // if here then we need to add a file
879  FileInfo* fileInfo;
880  if (isMetadata) {
881  fileInfo = createFile(pageSize, num_pages_per_metadata_file_);
882  } else {
883  fileInfo = createFile(pageSize, num_pages_per_data_file_);
884  }
885  pageNum = fileInfo->getFreePage();
886  CHECK(pageNum != -1);
887  return (Page(fileInfo->fileId, pageNum));
888 }
std::mutex getPageMutex_
pointer to DB level metadata
Definition: FileMgr.h:409
static size_t num_pages_per_data_file_
Definition: FileMgr.h:417
PageSizeFileMMap fileIndex_
A map of files accessible via a file identifier.
Definition: FileMgr.h:400
static size_t num_pages_per_metadata_file_
Definition: FileMgr.h:418
FileInfo * createFile(const size_t pageSize, const size_t numPages)
Adds a file to the file manager repository.
Definition: FileMgr.cpp:951
std::map< int32_t, FileInfo * > files_
Definition: FileMgr.h:399
#define CHECK(condition)
Definition: Logger.h:222

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::requestFreePages ( size_t  npages,
size_t  pagesize,
std::vector< Page > &  pages,
const bool  isMetadata 
)

Obtains free pages – creates new files if necessary – of the requested size.

Given a page size and number of pages, this method updates the vector "pages" to include free pages of the requested size. These pages are immediately removed from the free list of the affected file(s). If there are not enough pages available among current files, new files are created and their pages are included in the vector.

Parameters
npagesThe number of free pages requested
pagesizeThe size of each requested page
pagesA vector containing the free pages obtained by this method

Definition at line 890 of file FileMgr.cpp.

References CHECK, createFile(), File_Namespace::FileInfo::fileId, fileIndex_, files_, File_Namespace::FileInfo::getFreePage(), getPageMutex_, num_pages_per_data_file_, and num_pages_per_metadata_file_.

893  {
894  // not used currently
895  // @todo add method to FileInfo to get more than one page
896  std::lock_guard<std::mutex> lock(getPageMutex_);
897  auto candidateFiles = fileIndex_.equal_range(pageSize);
898  size_t numPagesNeeded = numPagesRequested;
899  for (auto fileIt = candidateFiles.first; fileIt != candidateFiles.second; ++fileIt) {
900  FileInfo* fileInfo = files_.at(fileIt->second);
901  int32_t pageNum;
902  do {
903  pageNum = fileInfo->getFreePage();
904  if (pageNum != -1) {
905  pages.emplace_back(fileInfo->fileId, pageNum);
906  numPagesNeeded--;
907  }
908  } while (pageNum != -1 && numPagesNeeded > 0);
909  if (numPagesNeeded == 0) {
910  break;
911  }
912  }
913  while (numPagesNeeded > 0) {
914  FileInfo* fileInfo;
915  if (isMetadata) {
916  fileInfo = createFile(pageSize, num_pages_per_metadata_file_);
917  } else {
918  fileInfo = createFile(pageSize, num_pages_per_data_file_);
919  }
920  int32_t pageNum;
921  do {
922  pageNum = fileInfo->getFreePage();
923  if (pageNum != -1) {
924  pages.emplace_back(fileInfo->fileId, pageNum);
925  numPagesNeeded--;
926  }
927  } while (pageNum != -1 && numPagesNeeded > 0);
928  if (numPagesNeeded == 0) {
929  break;
930  }
931  }
932  CHECK(pages.size() == numPagesRequested);
933 }
std::mutex getPageMutex_
pointer to DB level metadata
Definition: FileMgr.h:409
static size_t num_pages_per_data_file_
Definition: FileMgr.h:417
PageSizeFileMMap fileIndex_
A map of files accessible via a file identifier.
Definition: FileMgr.h:400
static size_t num_pages_per_metadata_file_
Definition: FileMgr.h:418
FileInfo * createFile(const size_t pageSize, const size_t numPages)
Adds a file to the file manager repository.
Definition: FileMgr.cpp:951
std::map< int32_t, FileInfo * > files_
Definition: FileMgr.h:399
#define CHECK(condition)
Definition: Logger.h:222

+ Here is the call graph for this function:

void File_Namespace::FileMgr::resetEpochFloor ( )
inline

Definition at line 303 of file FileMgr.h.

303 { epoch_.floor(epoch_.ceiling()); }
int32_t floor() const
Definition: Epoch.h:43
int32_t ceiling() const
Definition: Epoch.h:44
void File_Namespace::FileMgr::resumeFileCompaction ( const std::string &  status_file_name)
protected

Resumes an interrupted file compaction process. This method would normally only be called when re-initializing the file manager after a crash occurred in the middle of file compaction.

Definition at line 1206 of file FileMgr.cpp.

References CHECK, compactFiles(), COPY_PAGES_STATUS, DELETE_EMPTY_FILES_STATUS, deleteEmptyFiles(), files_rw_mutex_, getFilePath(), readPageMappingsFromStatusFile(), renameCompactionStatusFile(), UNREACHABLE, UPDATE_PAGE_VISIBILITY_STATUS, and updateMappedPagesVisibility().

Referenced by init().

1206  {
1207  if (status_file_name == COPY_PAGES_STATUS) {
1208  // Delete status file and restart data compaction process
1209  auto file_path = getFilePath(status_file_name);
1210  CHECK(boost::filesystem::exists(file_path));
1211  boost::filesystem::remove(file_path);
1212  compactFiles();
1213  } else if (status_file_name == UPDATE_PAGE_VISIBILITY_STATUS) {
1214  // Execute second and third phases of data compaction
1216  auto page_mappings = readPageMappingsFromStatusFile();
1217  updateMappedPagesVisibility(page_mappings);
1219  deleteEmptyFiles();
1220  } else if (status_file_name == DELETE_EMPTY_FILES_STATUS) {
1221  // Execute last phase of data compaction
1223  deleteEmptyFiles();
1224  } else {
1225  UNREACHABLE() << "Unexpected status file name: " << status_file_name;
1226  }
1227 }
std::vector< PageMapping > readPageMappingsFromStatusFile()
Definition: FileMgr.cpp:1519
#define UNREACHABLE()
Definition: Logger.h:266
static constexpr char const * UPDATE_PAGE_VISIBILITY_STATUS
Definition: FileMgr.h:375
heavyai::unique_lock< heavyai::shared_mutex > write_lock
std::unique_lock< T > unique_lock
boost::filesystem::path getFilePath(const std::string &file_name) const
Definition: FileMgr.cpp:1658
static constexpr char const * DELETE_EMPTY_FILES_STATUS
Definition: FileMgr.h:376
static constexpr char const * COPY_PAGES_STATUS
Definition: FileMgr.h:374
void updateMappedPagesVisibility(const std::vector< PageMapping > &page_mappings)
Definition: FileMgr.cpp:1448
#define CHECK(condition)
Definition: Logger.h:222
heavyai::shared_mutex files_rw_mutex_
Definition: FileMgr.h:411
void renameCompactionStatusFile(const char *const from_status, const char *const to_status)
Definition: FileMgr.cpp:1546

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::rollOffOldData ( const int32_t  epochCeiling,
const bool  shouldCheckpoint 
)
private

Definition at line 675 of file FileMgr.cpp.

References checkpoint(), epoch_, Epoch::floor(), freePagesBeforeEpoch(), and maxRollbackEpochs_.

Referenced by checkpoint(), and init().

675  {
676  if (maxRollbackEpochs_ >= 0) {
677  auto min_epoch = std::max(epoch_ceiling - maxRollbackEpochs_, epoch_.floor());
678  if (min_epoch > epoch_.floor()) {
679  freePagesBeforeEpoch(min_epoch);
680  epoch_.floor(min_epoch);
681  if (should_checkpoint) {
682  checkpoint();
683  }
684  }
685  }
686 }
int32_t floor() const
Definition: Epoch.h:43
void checkpoint() override
Fsyncs data files, writes out epoch and fsyncs that.
Definition: FileMgr.cpp:694
void freePagesBeforeEpoch(const int32_t min_epoch)
Definition: FileMgr.cpp:661
int32_t maxRollbackEpochs_
Definition: FileMgr.h:395

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::setDataAndMetadataFileStats ( StorageStats storage_stats) const
private

Definition at line 338 of file FileMgr.cpp.

References CHECK, File_Namespace::StorageStats::data_file_count, File_Namespace::StorageStats::epoch, File_Namespace::StorageStats::epoch_floor, epochFloor(), logger::FATAL, File_Namespace::FileMetadata::file_size, fileMgrBasePath_, files_, files_rw_mutex_, getMetadataForFile(), File_Namespace::FileMetadata::is_data_file, File_Namespace::anonymous_namespace{FileMgr.cpp}::is_metadata_file(), isFullyInitted_, lastCheckpointedEpoch(), LOG, File_Namespace::StorageStats::metadata_file_count, File_Namespace::FileMetadata::num_pages, num_pages_per_metadata_file_, File_Namespace::FileMetadata::page_size, File_Namespace::StorageStats::total_data_file_size, File_Namespace::StorageStats::total_data_page_count, File_Namespace::StorageStats::total_free_data_page_count, File_Namespace::StorageStats::total_free_metadata_page_count, File_Namespace::StorageStats::total_metadata_file_size, and File_Namespace::StorageStats::total_metadata_page_count.

Referenced by getStorageStats().

338  {
340  if (!isFullyInitted_) {
341  CHECK(!fileMgrBasePath_.empty());
342  boost::filesystem::path path(fileMgrBasePath_);
343  if (boost::filesystem::exists(path)) {
344  if (!boost::filesystem::is_directory(path)) {
345  LOG(FATAL) << "getStorageStats: Specified path '" << fileMgrBasePath_
346  << "' for table data is not a directory.";
347  }
348 
349  storage_stats.epoch = lastCheckpointedEpoch();
350  storage_stats.epoch_floor = epochFloor();
351  boost::filesystem::directory_iterator
352  endItr; // default construction yields past-the-end
353  for (boost::filesystem::directory_iterator fileIt(path); fileIt != endItr;
354  ++fileIt) {
355  FileMetadata file_metadata = getMetadataForFile(fileIt);
356  if (file_metadata.is_data_file) {
357  if (is_metadata_file(file_metadata.file_size,
358  file_metadata.page_size,
360  storage_stats.metadata_file_count++;
361  storage_stats.total_metadata_file_size += file_metadata.file_size;
362  storage_stats.total_metadata_page_count += file_metadata.num_pages;
363  } else {
364  storage_stats.data_file_count++;
365  storage_stats.total_data_file_size += file_metadata.file_size;
366  storage_stats.total_data_page_count += file_metadata.num_pages;
367  }
368  }
369  }
370  }
371  } else {
372  storage_stats.epoch = lastCheckpointedEpoch();
373  storage_stats.epoch_floor = epochFloor();
374  storage_stats.total_free_metadata_page_count = 0;
375  storage_stats.total_free_data_page_count = 0;
376 
377  // We already initialized this table so take the faster path of walking through the
378  // FileInfo objects and getting metadata from there
379  for (const auto& file_info_entry : files_) {
380  const auto file_info = file_info_entry.second;
381  if (is_metadata_file(
382  file_info->size(), file_info->pageSize, num_pages_per_metadata_file_)) {
383  storage_stats.metadata_file_count++;
384  storage_stats.total_metadata_file_size +=
385  file_info->pageSize * file_info->numPages;
386  storage_stats.total_metadata_page_count += file_info->numPages;
387  storage_stats.total_free_metadata_page_count.value() +=
388  file_info->freePages.size();
389  } else {
390  storage_stats.data_file_count++;
391  storage_stats.total_data_file_size += file_info->pageSize * file_info->numPages;
392  storage_stats.total_data_page_count += file_info->numPages;
393  storage_stats.total_free_data_page_count.value() += file_info->freePages.size();
394  }
395  }
396  }
397 }
heavyai::shared_lock< heavyai::shared_mutex > read_lock
#define LOG(tag)
Definition: Logger.h:216
std::string fileMgrBasePath_
Definition: FileMgr.h:396
int32_t lastCheckpointedEpoch() const
Returns value of epoch at last checkpoint.
Definition: FileMgr.h:299
std::shared_lock< T > shared_lock
static size_t num_pages_per_metadata_file_
Definition: FileMgr.h:418
bool is_metadata_file(size_t file_size, size_t page_size, size_t num_pages_per_metadata_file)
Definition: FileMgr.cpp:321
FileMetadata getMetadataForFile(const boost::filesystem::directory_iterator &fileIterator) const
Definition: FileMgr.cpp:146
std::map< int32_t, FileInfo * > files_
Definition: FileMgr.h:399
#define CHECK(condition)
Definition: Logger.h:222
heavyai::shared_mutex files_rw_mutex_
Definition: FileMgr.h:411
int32_t epochFloor() const
Definition: FileMgr.h:281

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::setEpoch ( const int32_t  newEpoch)
protected

Definition at line 1181 of file FileMgr.cpp.

References Epoch::ceiling(), describeSelf(), epoch_, Epoch::floor(), and writeAndSyncEpochToDisk().

Referenced by init().

1181  {
1182  if (newEpoch < epoch_.floor()) {
1183  std::stringstream error_message;
1184  error_message << "Cannot set epoch for " << describeSelf()
1185  << " lower than the minimum rollback epoch (" << epoch_.floor() << ").";
1186  throw std::runtime_error(error_message.str());
1187  }
1188  epoch_.ceiling(newEpoch);
1190 }
int32_t floor() const
Definition: Epoch.h:43
int32_t ceiling() const
Definition: Epoch.h:44
void writeAndSyncEpochToDisk()
Definition: FileMgr.cpp:647
virtual std::string describeSelf() const
Definition: FileMgr.cpp:688

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::setNumPagesPerDataFile ( size_t  num_pages)
static

Definition at line 1557 of file FileMgr.cpp.

References num_pages_per_data_file_.

1557  {
1558  num_pages_per_data_file_ = num_pages;
1559 }
static size_t num_pages_per_data_file_
Definition: FileMgr.h:417
void File_Namespace::FileMgr::setNumPagesPerMetadataFile ( size_t  num_pages)
static

Definition at line 1561 of file FileMgr.cpp.

References num_pages_per_metadata_file_.

1561  {
1562  num_pages_per_metadata_file_ = num_pages;
1563 }
static size_t num_pages_per_metadata_file_
Definition: FileMgr.h:418
void File_Namespace::FileMgr::sortAndCopyFilePagesForCompaction ( size_t  page_size,
std::vector< PageMapping > &  page_mappings,
std::set< Page > &  touched_pages 
)
protected

Sorts all files with the given page size in ascending order of number of free pages. Then copy over pages from files with more free pages to those with less free pages. Leave destination/copied to pages as free when copying. Record copied source and destination pages in page mapping.

Definition at line 1293 of file FileMgr.cpp.

References CHECK, copySourcePageForCompaction(), File_Namespace::Page::fileId, fileIndex_, files_, File_Namespace::FileInfo::freePages, freePages(), and gpu_enabled::sort().

Referenced by compactFiles().

1295  {
1296  std::vector<FileInfo*> sorted_file_infos;
1297  auto range = fileIndex_.equal_range(page_size);
1298  for (auto it = range.first; it != range.second; it++) {
1299  sorted_file_infos.emplace_back(files_.at(it->second));
1300  }
1301  if (sorted_file_infos.empty()) {
1302  return;
1303  }
1304 
1305  // Sort file infos in ascending order of free pages count i.e. from files with
1306  // the least number of free pages to those with the highest number of free pages.
1307  std::sort(sorted_file_infos.begin(),
1308  sorted_file_infos.end(),
1309  [](const FileInfo* file_1, const FileInfo* file_2) {
1310  return file_1->freePages.size() < file_2->freePages.size();
1311  });
1312 
1313  size_t destination_index = 0, source_index = sorted_file_infos.size() - 1;
1314 
1315  // For page copy destinations, skip files without free pages.
1316  while (destination_index < source_index &&
1317  sorted_file_infos[destination_index]->freePages.empty()) {
1318  destination_index++;
1319  }
1320 
1321  // For page copy sources, skip files with only free pages.
1322  while (destination_index < source_index &&
1323  sorted_file_infos[source_index]->freePages.size() ==
1324  sorted_file_infos[source_index]->numPages) {
1325  source_index--;
1326  }
1327 
1328  std::set<size_t> source_used_pages;
1329  CHECK(destination_index <= source_index);
1330 
1331  // Get the total number of free pages available for compaction
1332  int64_t total_free_pages{0};
1333  for (size_t i = destination_index; i <= source_index; i++) {
1334  total_free_pages += sorted_file_infos[i]->numFreePages();
1335  }
1336 
1337  while (destination_index < source_index) {
1338  if (source_used_pages.empty()) {
1339  // Populate source_used_pages with only used pages in the source file.
1340  auto source_file_info = sorted_file_infos[source_index];
1341  auto& free_pages = source_file_info->freePages;
1342  for (size_t page_num = 0; page_num < source_file_info->numPages; page_num++) {
1343  if (free_pages.find(page_num) == free_pages.end()) {
1344  source_used_pages.emplace(page_num);
1345  }
1346  }
1347 
1348  // Free pages of current source file will not be copy destinations
1349  total_free_pages -= source_file_info->numFreePages();
1350  }
1351 
1352  // Exit early if there are not enough free pages to empty the next file
1353  if (total_free_pages - static_cast<int64_t>(source_used_pages.size()) < 0) {
1354  return;
1355  }
1356 
1357  // Copy pages from source files to destination files
1358  auto dest_file_info = sorted_file_infos[destination_index];
1359  while (!source_used_pages.empty() && !dest_file_info->freePages.empty()) {
1360  // Get next page to copy
1361  size_t source_page_num = *source_used_pages.begin();
1362  source_used_pages.erase(source_page_num);
1363 
1364  Page source_page{sorted_file_infos[source_index]->fileId, source_page_num};
1365  copySourcePageForCompaction(source_page,
1366  sorted_file_infos[destination_index],
1367  page_mappings,
1368  touched_pages);
1369  total_free_pages--;
1370  }
1371 
1372  if (source_used_pages.empty()) {
1373  source_index--;
1374  }
1375 
1376  if (dest_file_info->freePages.empty()) {
1377  destination_index++;
1378  }
1379  }
1380 }
void copySourcePageForCompaction(const Page &source_page, FileInfo *destination_file_info, std::vector< PageMapping > &page_mappings, std::set< Page > &touched_pages)
Definition: FileMgr.cpp:1388
DEVICE void sort(ARGS &&...args)
Definition: gpu_enabled.h:105
PageSizeFileMMap fileIndex_
A map of files accessible via a file identifier.
Definition: FileMgr.h:400
std::map< int32_t, FileInfo * > files_
Definition: FileMgr.h:399
#define CHECK(condition)
Definition: Logger.h:222

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::syncFilesToDisk ( )
protected

Definition at line 1565 of file FileMgr.cpp.

References CHECK, files_, and files_rw_mutex_.

Referenced by checkpoint().

1565  {
1567  for (auto file_info_entry : files_) {
1568  int32_t status = file_info_entry.second->syncToDisk();
1569  CHECK(status == 0) << "Could not sync file to disk";
1570  }
1571 }
std::shared_lock< T > shared_lock
std::map< int32_t, FileInfo * > files_
Definition: FileMgr.h:399
#define CHECK(condition)
Definition: Logger.h:222
heavyai::shared_mutex files_rw_mutex_
Definition: FileMgr.h:411

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::updateMappedPagesVisibility ( const std::vector< PageMapping > &  page_mappings)
protected

Goes through the given page mapping and marks source/copied from pages as free while marking destination/copied to pages as used (by setting the header size).

Definition at line 1448 of file FileMgr.cpp.

References CHECK_GT, logger::FATAL, files_, and LOG.

Referenced by compactFiles(), and resumeFileCompaction().

1448  {
1449  for (const auto& page_mapping : page_mappings) {
1450  auto destination_file = files_.at(page_mapping.destination_file_id);
1451 
1452  // Set destination page header size
1453  auto header_size = page_mapping.source_page_header_size;
1454  CHECK_GT(header_size, 0);
1455  destination_file->write(
1456  page_mapping.destination_page_num * destination_file->pageSize,
1457  sizeof(PageHeaderSizeType),
1458  reinterpret_cast<int8_t*>(&header_size));
1459  auto source_file = files_.at(page_mapping.source_file_id);
1460 
1461  // Free source page
1462  PageHeaderSizeType free_page_header_size{0};
1463  source_file->write(page_mapping.source_page_num * source_file->pageSize,
1464  sizeof(PageHeaderSizeType),
1465  reinterpret_cast<int8_t*>(&free_page_header_size));
1466  source_file->freePageDeferred(page_mapping.source_page_num);
1467  }
1468 
1469  for (auto file_info_entry : files_) {
1470  int32_t status = file_info_entry.second->syncToDisk();
1471  if (status != 0) {
1472  LOG(FATAL) << "Could not sync file to disk";
1473  }
1474  }
1475 }
#define LOG(tag)
Definition: Logger.h:216
#define CHECK_GT(x, y)
Definition: Logger.h:234
int32_t PageHeaderSizeType
Definition: FileMgr.h:127
std::map< int32_t, FileInfo * > files_
Definition: FileMgr.h:399

+ Here is the caller graph for this function:

bool File_Namespace::FileMgr::updatePageIfDeleted ( FileInfo file_info,
ChunkKey chunk_key,
int32_t  contingent,
int32_t  page_epoch,
int32_t  page_num 
)
virtual

deletes or recovers a page based on last checkpointed epoch.

Reimplemented in File_Namespace::CachingFileMgr.

Definition at line 1606 of file FileMgr.cpp.

References CHUNK_KEY_DB_IDX, CHUNK_KEY_TABLE_IDX, epoch(), File_Namespace::FileInfo::freePageImmediate(), get_fileMgrKey(), File_Namespace::is_page_deleted_with_checkpoint(), File_Namespace::is_page_deleted_without_checkpoint(), and File_Namespace::FileInfo::recoverPage().

Referenced by File_Namespace::FileInfo::openExistingFile().

1610  {
1611  // If the parent FileMgr has a fileMgrKey, then all keys are locked to one table and
1612  // can be set from the manager.
1613  auto [db_id, tb_id] = get_fileMgrKey();
1614  chunk_key[CHUNK_KEY_DB_IDX] = db_id;
1615  chunk_key[CHUNK_KEY_TABLE_IDX] = tb_id;
1616 
1617  auto table_epoch = epoch(db_id, tb_id);
1618 
1619  if (is_page_deleted_with_checkpoint(table_epoch, page_epoch, contingent)) {
1620  file_info->freePageImmediate(page_num);
1621  return true;
1622  }
1623 
1624  // Recover page if it was deleted but not checkpointed.
1625  if (is_page_deleted_without_checkpoint(table_epoch, page_epoch, contingent)) {
1626  file_info->recoverPage(chunk_key, page_num);
1627  }
1628  return false;
1629 }
bool is_page_deleted_without_checkpoint(int32_t table_epoch, int32_t page_epoch, int32_t contingent)
Definition: FileInfo.cpp:283
#define CHUNK_KEY_DB_IDX
Definition: types.h:38
#define CHUNK_KEY_TABLE_IDX
Definition: types.h:39
const TablePair get_fileMgrKey() const
Definition: FileMgr.h:340
bool is_page_deleted_with_checkpoint(int32_t table_epoch, int32_t page_epoch, int32_t contingent)
Definition: FileInfo.cpp:271
int32_t epoch() const
Definition: FileMgr.h:517

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::writeAndSyncEpochToDisk ( )
protected

Definition at line 647 of file FileMgr.cpp.

References Epoch::byte_size(), CHECK, epoch_, epochFile_, epochIsCheckpointed_, heavyai::fsync(), Epoch::storage_ptr(), and File_Namespace::write().

Referenced by checkpoint(), createEpochFile(), migrateEpochFileV0(), and setEpoch().

647  {
648  CHECK(epochFile_);
650  int32_t status = fflush(epochFile_);
651  CHECK(status == 0) << "Could not flush epoch file to disk";
652 #ifdef __APPLE__
653  status = fcntl(fileno(epochFile_), 51);
654 #else
655  status = heavyai::fsync(fileno(epochFile_));
656 #endif
657  CHECK(status == 0) << "Could not sync epoch file to disk";
658  epochIsCheckpointed_ = true;
659 }
int8_t * storage_ptr()
Definition: Epoch.h:61
size_t write(FILE *f, const size_t offset, const size_t size, const int8_t *buf)
Writes the specified number of bytes to the offset position in file f from buf.
Definition: File.cpp:150
static size_t byte_size()
Definition: Epoch.h:63
int fsync(int fd)
Definition: heavyai_fs.cpp:62
#define CHECK(condition)
Definition: Logger.h:222

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::writeAndSyncVersionToDisk ( const std::string &  versionFileName,
const int32_t  version 
)
protected

Definition at line 1070 of file FileMgr.cpp.

References File_Namespace::close(), File_Namespace::create(), epochFile_, logger::FATAL, fileMgrBasePath_, heavyai::fsync(), logger::INFO, LOG, File_Namespace::open(), readVersionFromDisk(), and File_Namespace::write().

Referenced by createTopLevelMetadata(), init(), migrateEpochFileV0(), migrateLegacyFilesV1(), and migrateToLatestFileMgrVersion().

1071  {
1072  const std::string versionFilePath(fileMgrBasePath_ + "/" + versionFileName);
1073  FILE* versionFile;
1074  if (boost::filesystem::exists(versionFilePath)) {
1075  int32_t oldVersion = readVersionFromDisk(versionFileName);
1076  LOG(INFO) << "Storage version file `" << versionFilePath
1077  << "` already exists, its current version is " << oldVersion;
1078  versionFile = open(versionFilePath);
1079  } else {
1080  versionFile = create(versionFilePath, sizeof(int32_t));
1081  }
1082  write(versionFile, 0, sizeof(int32_t), (int8_t*)&version);
1083  int32_t status = fflush(versionFile);
1084  if (status != 0) {
1085  LOG(FATAL) << "Could not flush version file " << versionFilePath << " to disk";
1086  }
1087 #ifdef __APPLE__
1088  status = fcntl(fileno(epochFile_), 51);
1089 #else
1090  status = heavyai::fsync(fileno(versionFile));
1091 #endif
1092  if (status != 0) {
1093  LOG(FATAL) << "Could not sync version file " << versionFilePath << " to disk";
1094  }
1095  close(versionFile);
1096 }
int32_t readVersionFromDisk(const std::string &versionFileName) const
Definition: FileMgr.cpp:1052
#define LOG(tag)
Definition: Logger.h:216
std::string fileMgrBasePath_
Definition: FileMgr.h:396
FILE * create(const std::string &basePath, const int fileId, const size_t pageSize, const size_t numPages)
Definition: File.cpp:57
size_t write(FILE *f, const size_t offset, const size_t size, const int8_t *buf)
Writes the specified number of bytes to the offset position in file f from buf.
Definition: File.cpp:150
string version
Definition: setup.in.py:73
FILE * open(int fileId)
Opens/creates the file with the given id; returns NULL on error.
Definition: File.cpp:107
int fsync(int fd)
Definition: heavyai_fs.cpp:62
void close(FILE *f)
Closes the file pointed to by the FILE pointer.
Definition: File.cpp:128

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::writeDirtyBuffers ( )
private

Definition at line 1643 of file FileMgr.cpp.

References chunkIndex_, chunkIndexMutex_, and epoch().

Referenced by checkpoint().

1643  {
1645  for (auto [key, buf] : chunkIndex_) {
1646  if (buf->isDirty()) {
1647  buf->writeMetadata(epoch());
1648  buf->clearDirtyBits();
1649  }
1650  }
1651 }
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:328
std::unique_lock< T > unique_lock
heavyai::shared_mutex chunkIndexMutex_
Definition: FileMgr.h:410
int32_t epoch() const
Definition: FileMgr.h:517

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::writePageMappingsToStatusFile ( const std::vector< PageMapping > &  page_mappings)

Serializes a page mapping vector to expected status file. Page mapping vector is serialized in the following format: [{page mapping vector size}, {page mapping vector data bytes ...}]

Definition at line 1503 of file FileMgr.cpp.

References CHECK, COPY_PAGES_STATUS, and getFilePath().

Referenced by compactFiles().

1504  {
1505  auto file_path = getFilePath(COPY_PAGES_STATUS);
1506  CHECK(boost::filesystem::exists(file_path));
1507  CHECK(boost::filesystem::is_empty(file_path));
1508  std::ofstream status_file{file_path.string(), std::ios::out | std::ios::binary};
1509  int64_t page_mappings_count = page_mappings.size();
1510  status_file.write(reinterpret_cast<const char*>(&page_mappings_count), sizeof(int64_t));
1511  status_file.write(reinterpret_cast<const char*>(page_mappings.data()),
1512  page_mappings_count * sizeof(PageMapping));
1513  status_file.close();
1514 }
boost::filesystem::path getFilePath(const std::string &file_name) const
Definition: FileMgr.cpp:1658
static constexpr char const * COPY_PAGES_STATUS
Definition: FileMgr.h:374
#define CHECK(condition)
Definition: Logger.h:222

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Friends And Related Function Documentation

friend class GlobalFileMgr
friend

Definition at line 155 of file FileMgr.h.

Member Data Documentation

constexpr char const* File_Namespace::FileMgr::COPY_PAGES_STATUS {"pending_data_compaction_0"}
static
constexpr char File_Namespace::FileMgr::DB_META_FILENAME[] = "dbmeta"
static

Definition at line 387 of file FileMgr.h.

Referenced by createTopLevelMetadata().

int32_t File_Namespace::FileMgr::db_version_
protected

the index of the next file id

Definition at line 404 of file FileMgr.h.

Referenced by createTopLevelMetadata().

FILE* File_Namespace::FileMgr::DBMetaFile_ = nullptr
protected

Definition at line 408 of file FileMgr.h.

Referenced by closePhysicalUnlocked(), and ~FileMgr().

constexpr size_t File_Namespace::FileMgr::DEFAULT_NUM_PAGES_PER_DATA_FILE {256}
static

Definition at line 370 of file FileMgr.h.

constexpr size_t File_Namespace::FileMgr::DEFAULT_NUM_PAGES_PER_METADATA_FILE {4096}
static

Definition at line 371 of file FileMgr.h.

Referenced by File_Namespace::CachingFileMgr::getMinimumSize().

constexpr char const* File_Namespace::FileMgr::DELETE_EMPTY_FILES_STATUS {"pending_data_compaction_2"}
static
Epoch File_Namespace::FileMgr::epoch_
private
constexpr char File_Namespace::FileMgr::EPOCH_FILENAME[] = "epoch_metadata"
static

Definition at line 386 of file FileMgr.h.

Referenced by coreInit(), init(), and migrateEpochFileV0().

FILE* File_Namespace::FileMgr::epochFile_ = nullptr
private
bool File_Namespace::FileMgr::epochIsCheckpointed_ = true
private

Definition at line 527 of file FileMgr.h.

Referenced by writeAndSyncEpochToDisk().

constexpr char File_Namespace::FileMgr::FILE_MGR_VERSION_FILENAME[] = "filemgr_version"
static
PageSizeFileMMap File_Namespace::FileMgr::fileIndex_
protected

A map of files accessible via a file identifier.

Definition at line 400 of file FileMgr.h.

Referenced by clearFileInfos(), createFile(), openExistingFile(), requestFreePage(), requestFreePages(), and sortAndCopyFilePagesForCompaction().

TablePair File_Namespace::FileMgr::fileMgrKey_
private

Global FileMgr.

Definition at line 524 of file FileMgr.h.

Referenced by coreInit(), describeSelf(), and FileMgr().

int32_t File_Namespace::FileMgr::fileMgrVersion_
protected

DB version from dbmeta file, should be compatible with GlobalFileMgr::omnisci_db_version_

Definition at line 406 of file FileMgr.h.

Referenced by init(), and migrateToLatestFileMgrVersion().

std::vector<std::pair<FileInfo*, int32_t> > File_Namespace::FileMgr::free_pages_
protected

Definition at line 414 of file FileMgr.h.

Referenced by free_page(), and freePages().

std::mutex File_Namespace::FileMgr::getPageMutex_
protected

pointer to DB level metadata

Definition at line 409 of file FileMgr.h.

Referenced by requestFreePage(), and requestFreePages().

GlobalFileMgr* File_Namespace::FileMgr::gfm_
private

Definition at line 523 of file FileMgr.h.

Referenced by coreInit(), FileMgr(), getDBConvert(), getDBVersion(), and init().

constexpr int32_t File_Namespace::FileMgr::INVALID_VERSION = -1
static

Definition at line 389 of file FileMgr.h.

Referenced by createTopLevelMetadata(), and migrateToLatestFileMgrVersion().

bool File_Namespace::FileMgr::isFullyInitted_ {false}
protected
const int32_t File_Namespace::FileMgr::latestFileMgrVersion_ {2}
protected

Definition at line 407 of file FileMgr.h.

Referenced by init(), and migrateToLatestFileMgrVersion().

constexpr char File_Namespace::FileMgr::LEGACY_EPOCH_FILENAME[] = "epoch"
static

Definition at line 385 of file FileMgr.h.

Referenced by migrateEpochFileV0().

int32_t File_Namespace::FileMgr::maxRollbackEpochs_
protected

Definition at line 395 of file FileMgr.h.

Referenced by File_Namespace::CachingFileMgr::CachingFileMgr(), and rollOffOldData().

heavyai::shared_mutex File_Namespace::FileMgr::mutex_free_page_
mutableprotected

Definition at line 413 of file FileMgr.h.

Referenced by free_page(), and freePages().

unsigned File_Namespace::FileMgr::nextFileId_
protected
size_t File_Namespace::FileMgr::num_pages_per_data_file_ {DEFAULT_NUM_PAGES_PER_DATA_FILE}
staticprotected
size_t File_Namespace::FileMgr::num_pages_per_metadata_file_ {DEFAULT_NUM_PAGES_PER_METADATA_FILE}
staticprotected
size_t File_Namespace::FileMgr::num_reader_threads_
protected

Maps page sizes to FileInfo objects.

Definition at line 401 of file FileMgr.h.

Referenced by initializeNumThreads().

constexpr char const* File_Namespace::FileMgr::UPDATE_PAGE_VISIBILITY_STATUS {"pending_data_compaction_1"}
static

The documentation for this class was generated from the following files: