OmniSciDB
a667adc9c8
|
A FileMgr capable of limiting it's size and storing data from multiple tables in a shared directory. For any table that supports DiskCaching, the CachingFileMgr must contain either metadata for all table chunks, or for none (the cache is either has no knowledge of that table, or has complete knowledge of that table). Any data chunk within a table may or may not be contained within the cache. More...
#include <CachingFileMgr.h>
Public Member Functions | |
CachingFileMgr (const std::string &base_path, const size_t num_reader_threads=0) | |
~CachingFileMgr () | |
bool | coreInit () override |
Determines file path, and if exists, runs file migration and opens and reads epoch file. More... | |
MgrType | getMgrType () override |
std::string | getStringMgrType () override |
size_t | getDefaultPageSize () |
size_t | getMaxSize () override |
size_t | getInUseSize () override |
size_t | getAllocated () override |
bool | isAllocationCapped () override |
void | clearForTable (int db_id, int tb_id) |
Removes all data related to the given table (pages and subdirectories). More... | |
std::string | getOrAddTableDir (int db_id, int tb_id) |
Returns (and optionally creates) a subdirectory for table-specific persistent data (e.g. serialized foreign data warppers). More... | |
bool | hasFileMgrKey () const override |
Query to determine if the contained pages will have their database and table ids overriden by the filemgr key (FileMgr does this). More... | |
void | closeRemovePhysical () override |
uint64_t | getChunkSpaceReservedByTable (int db_id, int tb_id) |
uint64_t | getMetadataSpaceReservedByTable (int db_id, int tb_id) |
uint64_t | getWrapperSpaceReservedByTable (int db_id, int tb_id) |
uint64_t | getSpaceReservedByTable (int db_id, int tb_id) |
std::string | describeSelf () override |
![]() | |
FileMgr (const int32_t deviceId, GlobalFileMgr *gfm, const std::pair< const int32_t, const int32_t > fileMgrKey, const int32_t max_rollback_epochs=-1, const size_t num_reader_threads=0, const int32_t epoch=-1, const size_t defaultPageSize=DEFAULT_PAGE_SIZE) | |
Constructor. More... | |
FileMgr (const int32_t deviceId, GlobalFileMgr *gfm, const std::pair< const int32_t, const int32_t > fileMgrKey, const size_t defaultPageSize, const bool runCoreInit) | |
FileMgr (GlobalFileMgr *gfm, const size_t defaultPageSize, std::string basePath) | |
virtual | ~FileMgr () override |
Destructor. More... | |
StorageStats | getStorageStats () |
FileBuffer * | createBuffer (const ChunkKey &key, size_t pageSize=0, const size_t numBytes=0) override |
Creates a chunk with the specified key and page size. More... | |
bool | isBufferOnDevice (const ChunkKey &key) override |
void | deleteBuffer (const ChunkKey &key, const bool purge=true) override |
Deletes the chunk with the specified key. More... | |
void | deleteBuffersWithPrefix (const ChunkKey &keyPrefix, const bool purge=true) override |
FileBuffer * | getBuffer (const ChunkKey &key, const size_t numBytes=0) override |
Returns the a pointer to the chunk with the specified key. More... | |
void | fetchBuffer (const ChunkKey &key, AbstractBuffer *destBuffer, const size_t numBytes) override |
FileBuffer * | putBuffer (const ChunkKey &key, AbstractBuffer *d, const size_t numBytes=0) override |
Puts the contents of d into the Chunk with the given key. More... | |
AbstractBuffer * | alloc (const size_t numBytes) override |
void | free (AbstractBuffer *buffer) override |
Page | requestFreePage (size_t pagesize, const bool isMetadata) |
MgrType | getMgrType () override |
std::string | getStringMgrType () override |
std::string | printSlabs () override |
void | clearSlabs () override |
size_t | getMaxSize () override |
size_t | getInUseSize () override |
size_t | getAllocated () override |
bool | isAllocationCapped () override |
FileInfo * | getFileInfoForFileId (const int32_t fileId) |
FileMetadata | getMetadataForFile (const boost::filesystem::directory_iterator &fileIterator) |
void | init (const size_t num_reader_threads, const int32_t epochOverride) |
void | init (const std::string &dataPathToConvertFrom, const int32_t epochOverride) |
void | copyPage (Page &srcPage, FileMgr *destFileMgr, Page &destPage, const size_t reservedHeaderSize, const size_t numBytes, const size_t offset) |
void | requestFreePages (size_t npages, size_t pagesize, std::vector< Page > &pages, const bool isMetadata) |
Obtains free pages – creates new files if necessary – of the requested size. More... | |
void | getChunkMetadataVecForKeyPrefix (ChunkMetadataVector &chunkMetadataVec, const ChunkKey &keyPrefix) override |
void | checkpoint () override |
Fsyncs data files, writes out epoch and fsyncs that. More... | |
void | checkpoint (const int32_t db_id, const int32_t tb_id) override |
int32_t | epoch () |
Returns current value of epoch - should be one greater than recorded at last checkpoint. More... | |
int32_t | epochFloor () |
int32_t | incrementEpoch () |
int32_t | lastCheckpointedEpoch () |
Returns value of epoch at last checkpoint. More... | |
int32_t | maxRollbackEpochs () |
Returns value max_rollback_epochs. More... | |
size_t | getNumReaderThreads () |
Returns number of threads defined by parameter num-reader-threads which should be used during initial load and consequent read of data. More... | |
FILE * | getFileForFileId (const int32_t fileId) |
Returns FILE pointer associated with requested fileId. More... | |
size_t | getNumChunks () override |
size_t | getNumUsedPages () const |
size_t | getNumUsedMetadataPages () const |
size_t | getNumUsedMetadataPagesForChunkKey (const ChunkKey &chunkKey) const |
int32_t | getDBVersion () const |
Index for looking up chunks. More... | |
bool | getDBConvert () const |
void | createTopLevelMetadata () |
std::string | getFileMgrBasePath () const |
void | removeTableRelatedDS (const int32_t db_id, const int32_t table_id) override |
void | free_page (std::pair< FileInfo *, int32_t > &&page) |
const std::pair< const int32_t, const int32_t > | get_fileMgrKey () const |
boost::filesystem::path | getFilePath (const std::string &file_name) |
void | writePageMappingsToStatusFile (const std::vector< PageMapping > &page_mappings) |
void | renameCompactionStatusFile (const char *const from_status, const char *const to_status) |
void | compactFiles () |
Additional Inherited Members | |
![]() | |
static void | setNumPagesPerDataFile (size_t num_pages) |
static void | setNumPagesPerMetadataFile (size_t num_pages) |
![]() | |
ChunkKeyToChunkMap | chunkIndex_ |
![]() | |
static constexpr size_t | DEFAULT_NUM_PAGES_PER_DATA_FILE {256} |
static constexpr size_t | DEFAULT_NUM_PAGES_PER_METADATA_FILE {4096} |
static constexpr char const * | COPY_PAGES_STATUS {"pending_data_compaction_0"} |
static constexpr char const * | UPDATE_PAGE_VISIBILITY_STATUS {"pending_data_compaction_1"} |
static constexpr char const * | DELETE_EMPTY_FILES_STATUS {"pending_data_compaction_2"} |
![]() | |
FileMgr () | |
FileInfo * | createFile (const size_t pageSize, const size_t numPages) |
Adds a file to the file manager repository. More... | |
FileInfo * | openExistingFile (const std::string &path, const int32_t fileId, const size_t pageSize, const size_t numPages, std::vector< HeaderInfo > &headerVec) |
void | createEpochFile (const std::string &epochFileName) |
int32_t | openAndReadLegacyEpochFile (const std::string &epochFileName) |
void | openAndReadEpochFile (const std::string &epochFileName) |
void | writeAndSyncEpochToDisk () |
void | setEpoch (const int32_t newEpoch) |
void | freePagesBeforeEpoch (const int32_t minRollbackEpoch) |
void | rollOffOldData (const int32_t epochCeiling, const bool shouldCheckpoint) |
int32_t | readVersionFromDisk (const std::string &versionFileName) const |
void | writeAndSyncVersionToDisk (const std::string &versionFileName, const int32_t version) |
void | processFileFutures (std::vector< std::future< std::vector< HeaderInfo >>> &file_futures, std::vector< HeaderInfo > &headerVec) |
FileBuffer * | createBufferUnlocked (const ChunkKey &key, size_t pageSize=0, const size_t numBytes=0) |
void | migrateToLatestFileMgrVersion () |
void | migrateEpochFileV0 () |
OpenFilesResult | openFiles () |
void | clearFileInfos () |
void | copySourcePageForCompaction (const Page &source_page, FileInfo *destination_file_info, std::vector< PageMapping > &page_mappings, std::set< Page > &touched_pages) |
int32_t | copyPageWithoutHeaderSize (const Page &source_page, const Page &destination_page) |
void | sortAndCopyFilePagesForCompaction (size_t page_size, std::vector< PageMapping > &page_mappings, std::set< Page > &touched_pages) |
void | updateMappedPagesVisibility (const std::vector< PageMapping > &page_mappings) |
void | deleteEmptyFiles () |
void | resumeFileCompaction (const std::string &status_file_name) |
std::vector< PageMapping > | readPageMappingsFromStatusFile () |
FileMgr (const int epoch) | |
void | closePhysicalUnlocked () |
![]() | |
int32_t | maxRollbackEpochs_ |
std::string | fileMgrBasePath_ |
std::map< int32_t, FileInfo * > | files_ |
PageSizeFileMMap | fileIndex_ |
A map of files accessible via a file identifier. More... | |
size_t | num_reader_threads_ |
Maps page sizes to FileInfo objects. More... | |
size_t | defaultPageSize_ |
number of threads used when loading data More... | |
unsigned | nextFileId_ |
Epoch | epoch_ |
the index of the next file id More... | |
bool | epochIsCheckpointed_ = true |
FILE * | epochFile_ = nullptr |
int32_t | db_version_ |
int32_t | fileMgrVersion_ |
const int32_t | latestFileMgrVersion_ {1} |
FILE * | DBMetaFile_ = nullptr |
std::mutex | getPageMutex_ |
pointer to DB level metadata More... | |
mapd_shared_mutex | chunkIndexMutex_ |
mapd_shared_mutex | files_rw_mutex_ |
mapd_shared_mutex | mutex_free_page_ |
std::vector< std::pair < FileInfo *, int32_t > > | free_pages_ |
bool | isFullyInitted_ {false} |
![]() | |
static size_t | num_pages_per_data_file_ {DEFAULT_NUM_PAGES_PER_DATA_FILE} |
static size_t | num_pages_per_metadata_file_ {DEFAULT_NUM_PAGES_PER_METADATA_FILE} |
A FileMgr capable of limiting it's size and storing data from multiple tables in a shared directory. For any table that supports DiskCaching, the CachingFileMgr must contain either metadata for all table chunks, or for none (the cache is either has no knowledge of that table, or has complete knowledge of that table). Any data chunk within a table may or may not be contained within the cache.
Definition at line 44 of file CachingFileMgr.h.
File_Namespace::CachingFileMgr::CachingFileMgr | ( | const std::string & | base_path, |
const size_t | num_reader_threads = 0 |
||
) |
Definition at line 30 of file CachingFileMgr.cpp.
References DEFAULT_PAGE_SIZE, File_Namespace::FileMgr::defaultPageSize_, File_Namespace::FileMgr::fileMgrBasePath_, File_Namespace::FileMgr::init(), File_Namespace::FileMgr::maxRollbackEpochs_, and File_Namespace::FileMgr::nextFileId_.
|
inline |
Definition at line 47 of file CachingFileMgr.h.
void File_Namespace::CachingFileMgr::clearForTable | ( | int | db_id, |
int | tb_id | ||
) |
Removes all data related to the given table (pages and subdirectories).
Definition at line 55 of file CachingFileMgr.cpp.
References File_Namespace::FileMgr::checkpoint(), File_Namespace::FileMgr::chunkIndex_, File_Namespace::FileMgr::chunkIndexMutex_, File_Namespace::get_dir_name_for_table(), File_Namespace::FileMgr::getFileMgrBasePath(), and in_same_table().
|
overridevirtual |
Closes files and removes the caching directory.
Reimplemented from File_Namespace::FileMgr.
Definition at line 96 of file CachingFileMgr.cpp.
References File_Namespace::FileMgr::closePhysicalUnlocked(), File_Namespace::FileMgr::files_rw_mutex_, and File_Namespace::FileMgr::getFileMgrBasePath().
|
overridevirtual |
Determines file path, and if exists, runs file migration and opens and reads epoch file.
Reimplemented from File_Namespace::FileMgr.
Definition at line 39 of file CachingFileMgr.cpp.
References EPOCH_FILENAME, logger::FATAL, File_Namespace::FileMgr::fileMgrBasePath_, File_Namespace::FileMgr::files_rw_mutex_, LOG, File_Namespace::FileMgr::migrateToLatestFileMgrVersion(), and File_Namespace::FileMgr::openAndReadEpochFile().
|
overridevirtual |
Reimplemented from File_Namespace::FileMgr.
Definition at line 155 of file CachingFileMgr.cpp.
|
inlineoverride |
uint64_t File_Namespace::CachingFileMgr::getChunkSpaceReservedByTable | ( | int | db_id, |
int | tb_id | ||
) |
Set of functions to determine how much space is reserved in a table by type.
Definition at line 111 of file CachingFileMgr.cpp.
References CHUNK_KEY_DB_IDX, CHUNK_KEY_TABLE_IDX, File_Namespace::FileMgr::chunkIndex_, and File_Namespace::FileMgr::chunkIndexMutex_.
Referenced by getSpaceReservedByTable().
|
inline |
Definition at line 58 of file CachingFileMgr.h.
References File_Namespace::FileMgr::defaultPageSize_.
|
inlineoverride |
|
inlineoverride |
uint64_t File_Namespace::CachingFileMgr::getMetadataSpaceReservedByTable | ( | int | db_id, |
int | tb_id | ||
) |
Definition at line 122 of file CachingFileMgr.cpp.
References CHUNK_KEY_DB_IDX, CHUNK_KEY_TABLE_IDX, File_Namespace::FileMgr::chunkIndex_, File_Namespace::FileMgr::chunkIndexMutex_, and METADATA_PAGE_SIZE.
Referenced by getSpaceReservedByTable().
|
inlineoverride |
Definition at line 56 of file CachingFileMgr.h.
std::string File_Namespace::CachingFileMgr::getOrAddTableDir | ( | int | db_id, |
int | tb_id | ||
) |
Returns (and optionally creates) a subdirectory for table-specific persistent data (e.g. serialized foreign data warppers).
Definition at line 81 of file CachingFileMgr.cpp.
References logger::FATAL, File_Namespace::FileMgr::files_rw_mutex_, File_Namespace::get_dir_name_for_table(), File_Namespace::FileMgr::getFileMgrBasePath(), and LOG.
uint64_t File_Namespace::CachingFileMgr::getSpaceReservedByTable | ( | int | db_id, |
int | tb_id | ||
) |
Definition at line 148 of file CachingFileMgr.cpp.
References getChunkSpaceReservedByTable(), getMetadataSpaceReservedByTable(), and getWrapperSpaceReservedByTable().
|
inlineoverride |
Definition at line 57 of file CachingFileMgr.h.
uint64_t File_Namespace::CachingFileMgr::getWrapperSpaceReservedByTable | ( | int | db_id, |
int | tb_id | ||
) |
Definition at line 133 of file CachingFileMgr.cpp.
References omnisci::file_size(), File_Namespace::FileMgr::files_rw_mutex_, File_Namespace::get_dir_name_for_table(), and File_Namespace::FileMgr::getFileMgrBasePath().
Referenced by getSpaceReservedByTable().
|
inlineoverridevirtual |
Query to determine if the contained pages will have their database and table ids overriden by the filemgr key (FileMgr does this).
Reimplemented from File_Namespace::FileMgr.
Definition at line 91 of file CachingFileMgr.h.
|
inlineoverride |
Definition at line 74 of file CachingFileMgr.h.