_caching_file_mgr_8cpp_source.html

 /*

  * Copyright 2022 HEAVY.AI, Inc.

  *

  * Licensed under the Apache License, Version 2.0 (the "License");

  * you may not use this file except in compliance with the License.

  * You may obtain a copy of the License at

  *

  *     http://www.apache.org/licenses/LICENSE-2.0

  *

  * Unless required by applicable law or agreed to in writing, software

  * distributed under the License is distributed on an "AS IS" BASIS,

  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

  * See the License for the specific language governing permissions and

  * limitations under the License.

  */


 #include "DataMgr/FileMgr/CachingFileMgr.h"

 #include <boost/filesystem.hpp>

 #include <fstream>

 #include "Shared/misc.h"


 namespace bf = boost::filesystem;


 namespace {

 size_t size_of_dir(const std::string& dir) {

   size_t space_used = 0;

   if (bf::exists(dir)) {

     for (const auto& file : bf::recursive_directory_iterator(dir)) {

       if (bf::is_regular_file(file.path())) {

         space_used += bf::file_size(file.path());

       }

     }

   }

   return space_used;

 }


 ChunkKey evict_chunk_or_fail(LRUEvictionAlgorithm& alg) {

   ChunkKey ret;

   try {

     ret = alg.evictNextChunk();

   } catch (const NoEntryFoundException& e) {

     LOG(FATAL) << "Disk cache needs to evict data to make space, but no data found in "

                   "eviction queue.";

   }

   return ret;

 }


 }  // namespace


 namespace File_Namespace {


 std::string CachingFileMgr::dump() const {

   std::stringstream ss;

   ss << "Dump Cache:\n";

   for (const auto& [key, buf] : chunkIndex_) {

     ss << "  " << show_chunk(key) << " num_pages: " << buf->pageCount()

        << ", is dirty: " << buf->isDirty() << "\n";

   }

   ss << "Data Eviction Queue:\n" << chunk_evict_alg_.dumpEvictionQueue();

   ss << "Metadata Eviction Queue:\n" << table_evict_alg_.dumpEvictionQueue();

   ss << "\n";

   return ss.str();

 }


 CachingFileMgr::CachingFileMgr(const DiskCacheConfig& config)

     : FileMgr(config.page_size, config.meta_page_size) {

   fileMgrBasePath_ = config.path;

   maxRollbackEpochs_ = 0;

   nextFileId_ = 0;

   max_size_ = config.size_limit;

   init(config.num_reader_threads);

   setMaxSizes();

 }


 CachingFileMgr::~CachingFileMgr() {}


 void CachingFileMgr::init(const size_t num_reader_threads) {

   deleteCacheIfTooLarge();

   readTableFileMgrs();

   auto open_files_result = openFiles();

   /* Sort headerVec so that all HeaderInfos

    * from a chunk will be grouped together

    * and in order of increasing PageId

    * - Version Epoch */

   auto& header_vec = open_files_result.header_infos;

   std::sort(header_vec.begin(), header_vec.end());


   /* Goal of next section is to find sequences in the

    * sorted headerVec of the same ChunkId, which we

    * can then initiate a FileBuffer with */

   VLOG(3) << "Number of Headers in Vector: " << header_vec.size();

   if (header_vec.size() > 0) {

     auto startIt = header_vec.begin();

     ChunkKey lastChunkKey = startIt->chunkKey;

     for (auto it = header_vec.begin() + 1; it != header_vec.end(); ++it) {

       if (it->chunkKey != lastChunkKey) {

         createBufferFromHeaders(lastChunkKey, startIt, it);

         lastChunkKey = it->chunkKey;

         startIt = it;

       }

     }

     createBufferFromHeaders(lastChunkKey, startIt, header_vec.end());

   }

   nextFileId_ = open_files_result.max_file_id + 1;

   incrementAllEpochs();

   freePages();

   initializeNumThreads(num_reader_threads);

   isFullyInitted_ = true;

 }


 void CachingFileMgr::readTableFileMgrs() {

   heavyai::unique_lock<heavyai::shared_mutex> write_lock(table_dirs_mutex_);

   bf::path path(fileMgrBasePath_);

   CHECK(bf::exists(path)) << "Cache path: " << fileMgrBasePath_ << " does not exit.";

   CHECK(bf::is_directory(path))

       << "Specified path '" << fileMgrBasePath_ << "' for disk cache is not a directory.";


   // Look for directories with table-specific names.

   boost::regex table_filter("table_([0-9]+)_([0-9]+)");

   for (const auto& file : bf::directory_iterator(path)) {

     boost::smatch match;

     auto file_name = file.path().filename().string();

     if (boost::regex_match(file_name, match, table_filter)) {

       int32_t db_id = std::stoi(match[1]);

       int32_t tb_id = std::stoi(match[2]);

       TablePair table_pair{db_id, tb_id};

       CHECK(table_dirs_.find(table_pair) == table_dirs_.end())

           << "Trying to read data for existing table";

       table_dirs_.emplace(table_pair,

                           std::make_unique<TableFileMgr>(file.path().string()));

     }

   }

 }


 int32_t CachingFileMgr::epoch(int32_t db_id, int32_t tb_id) const {

   heavyai::shared_lock<heavyai::shared_mutex> read_lock(table_dirs_mutex_);

   auto tables_it = table_dirs_.find({db_id, tb_id});

   if (tables_it == table_dirs_.end()) {

     // If there is no directory for this table, that means the cache does not recognize

     // the table that is requested.  This can happen if a table was dropped, and it's

     // pages were invalidated but not yet freed and then the server crashed before they

     // were freed.  Upon re-starting the FileMgr will find these pages and attempt to

     // compare their epoch to know if they are valid or not.  In this case we should

     // return an invalid epoch to indicate that any page for this table is not valid and

     // should be freed.

     return Epoch::min_allowable_epoch();

   }

   auto& [pair, table_dir] = *tables_it;

   return table_dir->getEpoch();

 }


 void CachingFileMgr::incrementEpoch(int32_t db_id, int32_t tb_id) {

   heavyai::shared_lock<heavyai::shared_mutex> read_lock(table_dirs_mutex_);

   auto tables_it = table_dirs_.find({db_id, tb_id});

   CHECK(tables_it != table_dirs_.end());

   auto& [pair, table_dir] = *tables_it;

   table_dir->incrementEpoch();

 }


 void CachingFileMgr::writeAndSyncEpochToDisk(int32_t db_id, int32_t tb_id) {

   heavyai::shared_lock<heavyai::shared_mutex> read_lock(table_dirs_mutex_);

   auto table_it = table_dirs_.find({db_id, tb_id});

   CHECK(table_it != table_dirs_.end());

   table_it->second->writeAndSyncEpochToDisk();

 }


 void CachingFileMgr::clearForTable(int32_t db_id, int32_t tb_id) {

   removeTableBuffers(db_id, tb_id);

   removeTableFileMgr(db_id, tb_id);

   freePages();

 }


 std::string CachingFileMgr::getTableFileMgrPath(int32_t db_id, int32_t tb_id) const {

   return getFileMgrBasePath() + "/" + get_dir_name_for_table(db_id, tb_id);

 }


 void CachingFileMgr::closeRemovePhysical() {

   {

     heavyai::unique_lock<heavyai::shared_mutex> write_lock(files_rw_mutex_);

     closePhysicalUnlocked();

   }

   {

     heavyai::unique_lock<heavyai::shared_mutex> tables_lock(table_dirs_mutex_);

     table_dirs_.clear();

   }

   bf::remove_all(getFileMgrBasePath());

 }


 size_t CachingFileMgr::getChunkSpaceReservedByTable(int32_t db_id, int32_t tb_id) const {

   heavyai::shared_lock<heavyai::shared_mutex> read_lock(chunkIndexMutex_);

   size_t space_used = 0;

   ChunkKey min_table_key{db_id, tb_id};

   ChunkKey max_table_key{db_id, tb_id, std::numeric_limits<int32_t>::max()};

   for (auto it = chunkIndex_.lower_bound(min_table_key);

        it != chunkIndex_.upper_bound(max_table_key);

        ++it) {

     auto& [key, buffer] = *it;

     space_used += (buffer->numChunkPages() * page_size_);

   }

   return space_used;

 }


 size_t CachingFileMgr::getMetadataSpaceReservedByTable(int32_t db_id,

                                                        int32_t tb_id) const {

   heavyai::shared_lock<heavyai::shared_mutex> read_lock(chunkIndexMutex_);

   size_t space_used = 0;

   ChunkKey min_table_key{db_id, tb_id};

   ChunkKey max_table_key{db_id, tb_id, std::numeric_limits<int32_t>::max()};

   for (auto it = chunkIndex_.lower_bound(min_table_key);

        it != chunkIndex_.upper_bound(max_table_key);

        ++it) {

     auto& [key, buffer] = *it;

     space_used += (buffer->numMetadataPages() * metadata_page_size_);

   }

   return space_used;

 }


 size_t CachingFileMgr::getTableFileMgrSpaceReserved(int32_t db_id, int32_t tb_id) const {

   heavyai::shared_lock<heavyai::shared_mutex> read_lock(table_dirs_mutex_);

   size_t space = 0;

   auto table_it = table_dirs_.find({db_id, tb_id});

   if (table_it != table_dirs_.end()) {

     space += table_it->second->getReservedSpace();

   }

   return space;

 }


 size_t CachingFileMgr::getSpaceReservedByTable(int32_t db_id, int32_t tb_id) const {

   auto chunk_space = getChunkSpaceReservedByTable(db_id, tb_id);

   auto meta_space = getMetadataSpaceReservedByTable(db_id, tb_id);

   auto subdir_space = getTableFileMgrSpaceReserved(db_id, tb_id);

   return chunk_space + meta_space + subdir_space;

 }


 std::string CachingFileMgr::describeSelf() const {

   return "cache";

 }


 // Similar to FileMgr::checkpoint() but only writes a subset of buffers.

 void CachingFileMgr::checkpoint(const int32_t db_id, const int32_t tb_id) {

   {

     heavyai::shared_lock<heavyai::shared_mutex> read_lock(table_dirs_mutex_);

     CHECK(table_dirs_.find({db_id, tb_id}) != table_dirs_.end()) << "No data for table";

   }

   VLOG(2) << "Checkpointing " << describeSelf() << " (" << db_id << ", " << tb_id

           << ") epoch: " << epoch(db_id, tb_id);

   writeDirtyBuffers(db_id, tb_id);

   syncFilesToDisk();

   writeAndSyncEpochToDisk(db_id, tb_id);

   incrementEpoch(db_id, tb_id);

   freePages();

 }


 void CachingFileMgr::createTableFileMgrIfNoneExists(const int32_t db_id,

                                                     const int32_t tb_id) {

   heavyai::unique_lock<heavyai::shared_mutex> write_lock(table_dirs_mutex_);

   TablePair table_pair{db_id, tb_id};

   if (table_dirs_.find(table_pair) == table_dirs_.end()) {

     table_dirs_.emplace(

         table_pair, std::make_unique<TableFileMgr>(getTableFileMgrPath(db_id, tb_id)));

   }

 }


 FileBuffer* CachingFileMgr::createBufferUnlocked(const ChunkKey& key,

                                                  const size_t page_size,

                                                  const size_t num_bytes) {

   touchKey(key);

   auto [db_id, tb_id] = get_table_prefix(key);

   createTableFileMgrIfNoneExists(db_id, tb_id);

   return FileMgr::createBufferUnlocked(key, page_size, num_bytes);

 }


 FileBuffer* CachingFileMgr::createBufferFromHeaders(

     const ChunkKey& key,

     const std::vector<HeaderInfo>::const_iterator& startIt,

     const std::vector<HeaderInfo>::const_iterator& endIt) {

   if (startIt->pageId != -1) {

     // If the first pageId is not -1 then there is no metadata page for the

     // current key (which means it was never checkpointed), so we should skip.

     return nullptr;

   }

   touchKey(key);

   auto [db_id, tb_id] = get_table_prefix(key);

   createTableFileMgrIfNoneExists(db_id, tb_id);

   auto buffer = FileMgr::createBufferFromHeaders(key, startIt, endIt);

   if (buffer->isMissingPages()) {

     // Detect the case where a page is missing by comparing the amount of pages read

     // with the metadata size.  If data are missing, discard the chunk.

     buffer->freeChunkPages();

   }

   return buffer;

 }


 FileBuffer* CachingFileMgr::putBuffer(const ChunkKey& key,

                                       AbstractBuffer* src_buffer,

                                       const size_t num_bytes) {

   CHECK(!src_buffer->isDirty()) << "Cannot cache dirty buffers.";

   deleteBufferIfExists(key);

   // Since the buffer is not dirty we mark it as dirty if we are only writing metadata and

   // appended if we are writing chunk data.  We delete + append rather than write to make

   // sure we don't write multiple page versions.

   (src_buffer->size() == 0) ? src_buffer->setDirty() : src_buffer->setAppended();

   return FileMgr::putBuffer(key, src_buffer, num_bytes);

 }


 void CachingFileMgr::incrementAllEpochs() {

   heavyai::shared_lock<heavyai::shared_mutex> read_lock(table_dirs_mutex_);

   for (auto& table_dir : table_dirs_) {

     table_dir.second->incrementEpoch();

   }

 }


 void CachingFileMgr::removeTableFileMgr(int32_t db_id, int32_t tb_id) {

   // Delete table-specific directory (stores table epoch data and serialized data wrapper)

   heavyai::unique_lock<heavyai::shared_mutex> write_lock(table_dirs_mutex_);

   auto it = table_dirs_.find({db_id, tb_id});

   if (it != table_dirs_.end()) {

     it->second->removeDiskContent();

     table_dirs_.erase(it);

   }

 }


 void CachingFileMgr::removeTableBuffers(int32_t db_id, int32_t tb_id) {

   // Free associated FileBuffers and clear buffer entries.

   heavyai::unique_lock<heavyai::shared_mutex> write_lock(chunkIndexMutex_);

   ChunkKey min_table_key{db_id, tb_id};

   ChunkKey max_table_key{db_id, tb_id, std::numeric_limits<int32_t>::max()};

   for (auto it = chunkIndex_.lower_bound(min_table_key);

        it != chunkIndex_.upper_bound(max_table_key);) {

     it = deleteBufferUnlocked(it);

   }

 }


 CachingFileBuffer* CachingFileMgr::allocateBuffer(const size_t page_size,

                                                   const ChunkKey& key,

                                                   const size_t num_bytes) {

   return new CachingFileBuffer(this, page_size, key, num_bytes);

 }


 CachingFileBuffer* CachingFileMgr::allocateBuffer(

     const ChunkKey& key,

     const std::vector<HeaderInfo>::const_iterator& headerStartIt,

     const std::vector<HeaderInfo>::const_iterator& headerEndIt) {

   return new CachingFileBuffer(this, key, headerStartIt, headerEndIt);

 }


 // Checks if a page should be deleted or recovered.  Returns true if page was deleted.

 bool CachingFileMgr::updatePageIfDeleted(FileInfo* file_info,

                                          ChunkKey& chunk_key,

                                          int32_t contingent,

                                          int32_t page_epoch,

                                          int32_t page_num) {

   // These contingents are stored by overwriting the bytes used for chunkKeys.  If

   // we run into a key marked for deletion in a fileMgr with no fileMgrKey (i.e.

   // CachingFileMgr) then we can't know if the epoch is valid because we don't know

   // the key.  At this point our only option is to free the page as though it was

   // checkpointed (which should be fine since we only maintain one version of each

   // page).

   if (contingent == DELETE_CONTINGENT || contingent == ROLLOFF_CONTINGENT) {

     file_info->freePage(page_num, false, page_epoch);

     return true;

   }

   return false;

 }


 void CachingFileMgr::writeDirtyBuffers(int32_t db_id, int32_t tb_id) {

   heavyai::unique_lock<heavyai::shared_mutex> chunk_index_write_lock(chunkIndexMutex_);

   ChunkKey min_table_key{db_id, tb_id};

   ChunkKey max_table_key{db_id, tb_id, std::numeric_limits<int32_t>::max()};


   for (auto chunk_it = chunkIndex_.lower_bound(min_table_key);

        chunk_it != chunkIndex_.upper_bound(max_table_key);

        ++chunk_it) {

     if (auto [key, buf] = *chunk_it; buf->isDirty()) {

       // Free previous versions first so we only have one metadata version.

       buf->freeMetadataPages();

       buf->writeMetadata(epoch(db_id, tb_id));

       buf->clearDirtyBits();

       touchKey(key);

     }

   }

 }


 void CachingFileMgr::deleteBufferIfExists(const ChunkKey& key) {

   heavyai::unique_lock<heavyai::shared_mutex> chunk_index_write_lock(chunkIndexMutex_);

   auto chunk_it = chunkIndex_.find(key);

   if (chunk_it != chunkIndex_.end()) {

     deleteBufferUnlocked(chunk_it);

   }

 }


 size_t CachingFileMgr::getNumDataChunks() const {

   heavyai::shared_lock<heavyai::shared_mutex> read_lock(chunkIndexMutex_);

   size_t num_chunks = 0;

   for (auto [key, buf] : chunkIndex_) {

     if (buf->hasDataPages()) {

       num_chunks++;

     }

   }

   return num_chunks;

 }


 void CachingFileMgr::deleteCacheIfTooLarge() {

   if (size_of_dir(fileMgrBasePath_) > max_size_) {

     closeRemovePhysical();

     bf::create_directory(fileMgrBasePath_);

     LOG(INFO) << "Cache path over limit.  Existing cache deleted.";

   }

 }


 Page CachingFileMgr::requestFreePage(size_t pageSize, const bool isMetadata) {

   std::lock_guard<std::mutex> lock(getPageMutex_);

   int32_t pageNum = -1;

   // Splits files into metadata and regular data by size.

   auto candidateFiles = fileIndex_.equal_range(pageSize);

   // Check if there is a free page in an existing file.

   for (auto fileIt = candidateFiles.first; fileIt != candidateFiles.second; ++fileIt) {

     FileInfo* fileInfo = getFileInfoForFileId(fileIt->second);

     pageNum = fileInfo->getFreePage();

     if (pageNum != -1) {

       return (Page(fileInfo->fileId, pageNum));

     }

   }


   // Try to add a new file if there is free space available.

   FileInfo* fileInfo = nullptr;

   if (isMetadata) {

     if (getMaxMetaFiles() > getNumMetaFiles()) {

       fileInfo = createFileInfo(pageSize, num_pages_per_metadata_file_);

     }

   } else {

     if (getMaxDataFiles() > getNumDataFiles()) {

       fileInfo = createFileInfo(pageSize, num_pages_per_data_file_);

     }

   }


   if (!fileInfo) {

     // We were not able to create a new file, so we try to evict space.

     // Eviction will return the first file it evicted a page from (a file now guaranteed

     // to have a free page).

     fileInfo = isMetadata ? evictMetadataPages() : evictPages();

   }

   CHECK(fileInfo);


   pageNum = fileInfo->getFreePage();

   CHECK(pageNum != -1);

   return (Page(fileInfo->fileId, pageNum));

 }


 std::vector<ChunkKey> CachingFileMgr::getKeysForTable(int32_t db_id,

                                                       int32_t tb_id) const {

   std::vector<ChunkKey> keys;

   ChunkKey min_table_key{db_id, tb_id};

   ChunkKey max_table_key{db_id, tb_id, std::numeric_limits<int32_t>::max()};

   for (auto it = chunkIndex_.lower_bound(min_table_key);

        it != chunkIndex_.upper_bound(max_table_key);

        ++it) {

     keys.emplace_back(it->first);

   }

   return keys;

 }


 FileInfo* CachingFileMgr::evictMetadataPages() {

   // Locks should already be in place before calling this method.

   FileInfo* file_info{nullptr};

   auto key_to_evict = evict_chunk_or_fail(table_evict_alg_);

   auto [db_id, tb_id] = get_table_prefix(key_to_evict);

   const auto keys = getKeysForTable(db_id, tb_id);

   for (const auto& key : keys) {

     auto chunk_it = chunkIndex_.find(key);

     CHECK(chunk_it != chunkIndex_.end());

     auto& buf = chunk_it->second;

     if (!file_info) {

       // Return the FileInfo for the first file we are freeing a page from so that the

       // caller does not have to search for a FileInfo guaranteed to have at least one

       // free page.

       CHECK(buf->getMetadataPage().pageVersions.size() > 0);

       file_info =

           getFileInfoForFileId(buf->getMetadataPage().pageVersions.front().page.fileId);

     }

     // We erase all pages and entries for the chunk, as without metadata all other

     // entries are useless.

     deleteBufferUnlocked(chunk_it);

   }

   // Serialized datawrappers require metadata to be in the cache.

   deleteWrapperFile(db_id, tb_id);

   CHECK(file_info) << "FileInfo with freed page not found";

   return file_info;

 }


 FileInfo* CachingFileMgr::evictPages() {

   FileInfo* file_info{nullptr};

   FileBuffer* buf{nullptr};

   while (!file_info) {

     buf = chunkIndex_.at(evict_chunk_or_fail(chunk_evict_alg_));

     CHECK(buf);

     if (!buf->hasDataPages()) {

       // This buffer contains no chunk data (metadata only, uninitialized, size == 0,

       // etc...) so we won't recover any space by evicting it.  In this case it gets

       // removed from the eviction queue (it will get re-added if it gets populated with

       // data) and we look at the next chunk in queue until we find a buffer with page

       // data.

       continue;

     }

     // Return the FileInfo for the first file we are freeing a page from so that the

     // caller does not have to search for a FileInfo guaranteed to have at least one free

     // page.

     CHECK(buf->getMultiPage().front().pageVersions.size() > 0);

     file_info = getFileInfoForFileId(

         buf->getMultiPage().front().pageVersions.front().page.fileId);

   }

   auto pages_freed = buf->freeChunkPages();

   CHECK(pages_freed > 0) << "failed to evict a page";

   CHECK(file_info) << "FileInfo with freed page not found";

   return file_info;

 }


 void CachingFileMgr::touchKey(const ChunkKey& key) const {

   chunk_evict_alg_.touchChunk(key);

   table_evict_alg_.touchChunk(get_table_key(key));

 }


 void CachingFileMgr::removeKey(const ChunkKey& key) const {

   // chunkIndex lock should already be acquired.

   chunk_evict_alg_.removeChunk(key);

   auto [db_id, tb_id] = get_table_prefix(key);

   ChunkKey table_key{db_id, tb_id};

   ChunkKey max_table_key{db_id, tb_id, std::numeric_limits<int32_t>::max()};

   for (auto it = chunkIndex_.lower_bound(table_key);

        it != chunkIndex_.upper_bound(max_table_key);

        ++it) {

     if (it->first != key) {

       // If there are any keys in this table other than that one we are removing, then

       // keep the table in the eviction queue.

       return;

     }

   }

   // No other keys exist for this table, so remove it from the queue.

   table_evict_alg_.removeChunk(table_key);

 }


 size_t CachingFileMgr::getFilesSize() const {

   heavyai::shared_lock<heavyai::shared_mutex> read_lock(files_rw_mutex_);

   size_t sum = 0;

   for (const auto& [id, file] : files_) {

     sum += file->size();

   }

   return sum;

 }


 size_t CachingFileMgr::getTableFileMgrsSize() const {

   heavyai::shared_lock<heavyai::shared_mutex> read_lock(table_dirs_mutex_);

   size_t space_used = 0;

   for (const auto& [pair, table_dir] : table_dirs_) {

     space_used += table_dir->getReservedSpace();

   }

   return space_used;

 }


 size_t CachingFileMgr::getNumDataFiles() const {

   heavyai::shared_lock<heavyai::shared_mutex> read_lock(files_rw_mutex_);

   return fileIndex_.count(page_size_);

 }


 size_t CachingFileMgr::getNumMetaFiles() const {

   heavyai::shared_lock<heavyai::shared_mutex> read_lock(files_rw_mutex_);

   return fileIndex_.count(metadata_page_size_);

 }


 std::vector<ChunkKey> CachingFileMgr::getChunkKeysForPrefix(

     const ChunkKey& prefix) const {

   heavyai::shared_lock<heavyai::shared_mutex> read_lock(chunkIndexMutex_);

   std::vector<ChunkKey> chunks;

   for (auto [key, buf] : chunkIndex_) {

     if (in_same_table(key, prefix)) {

       if (buf->hasDataPages()) {

         chunks.emplace_back(key);

         touchKey(key);

       }

     }

   }

   return chunks;

 }


 void CachingFileMgr::removeChunkKeepMetadata(const ChunkKey& key) {

   if (isBufferOnDevice(key)) {

     auto chunkIt = chunkIndex_.find(key);

     CHECK(chunkIt != chunkIndex_.end());

     auto& buf = chunkIt->second;

     if (buf->hasDataPages()) {

       buf->freeChunkPages();

       chunk_evict_alg_.removeChunk(key);

     }

   }

 }


 size_t CachingFileMgr::getNumChunksWithMetadata() const {

   heavyai::shared_lock<heavyai::shared_mutex> read_lock(chunkIndexMutex_);

   size_t sum = 0;

   for (const auto& [key, buf] : chunkIndex_) {

     if (buf->hasEncoder()) {

       sum++;

     }

   }

   return sum;

 }


 std::string CachingFileMgr::dumpKeysWithMetadata() const {

   heavyai::shared_lock<heavyai::shared_mutex> read_lock(chunkIndexMutex_);

   std::string ret_string = "CFM keys with metadata:\n";

   for (const auto& [key, buf] : chunkIndex_) {

     if (buf->hasEncoder()) {

       ret_string += "  " + show_chunk(key) + "\n";

     }

   }

   return ret_string;

 }


 std::string CachingFileMgr::dumpKeysWithChunkData() const {

   heavyai::shared_lock<heavyai::shared_mutex> read_lock(chunkIndexMutex_);

   std::string ret_string = "CFM keys with chunk data:\n";

   for (const auto& [key, buf] : chunkIndex_) {

     if (buf->hasDataPages()) {

       ret_string += "  " + show_chunk(key) + "\n";

     }

   }

   return ret_string;

 }


 std::unique_ptr<CachingFileMgr> CachingFileMgr::reconstruct() const {

   DiskCacheConfig config{fileMgrBasePath_,

                          DiskCacheLevel::none,

                          num_reader_threads_,

                          max_size_,

                          page_size_,

                          metadata_page_size_};

   return std::make_unique<CachingFileMgr>(config);

 }


 void CachingFileMgr::deleteWrapperFile(int32_t db, int32_t tb) {

   heavyai::shared_lock<heavyai::shared_mutex> read_lock(table_dirs_mutex_);

   auto it = table_dirs_.find({db, tb});

   CHECK(it != table_dirs_.end()) << "Wrapper does not exist.";

   it->second->deleteWrapperFile();

 }


 void CachingFileMgr::writeWrapperFile(const std::string& doc, int32_t db, int32_t tb) {

   createTableFileMgrIfNoneExists(db, tb);

   auto wrapper_size = doc.size();

   CHECK_LE(wrapper_size, getMaxWrapperSize())

       << "Wrapper is too big to fit into the cache";

   while (wrapper_size > getAvailableWrapperSpace()) {

     evictMetadataPages();

   }

   heavyai::shared_lock<heavyai::shared_mutex> read_lock(table_dirs_mutex_);

   table_dirs_.at({db, tb})->writeWrapperFile(doc);

 }


 bool CachingFileMgr::hasWrapperFile(int32_t db_id, int32_t table_id) const {

   heavyai::shared_lock<heavyai::shared_mutex> read_lock(table_dirs_mutex_);

   auto it = table_dirs_.find({db_id, table_id});

   if (it != table_dirs_.end()) {

     return it->second->hasWrapperFile();

   }

   return false;

 }


 /*

  * While the CFM allows for multiple tables to share the same allocated files for chunk

  * data and metadata, space cannot be reallocated between metadata files and data files

  * (once the space has been reserve for a data file the file won't be deleted unless the

  * cache is deleted).  To prevent a case where we have allocated too many files of one

  * type to the detrement of the other, we have a minimum portion of the cache that is

  * reserved for each type.  This default ratio gives %9 of space to data wrappers, %1 to

  * metadata files, and %90 to data files.

  */

 void CachingFileMgr::setMaxSizes() {

   size_t max_meta_space = std::floor(max_size_ * METADATA_SPACE_PERCENTAGE);

   size_t max_meta_file_space = std::floor(max_size_ * METADATA_FILE_SPACE_PERCENTAGE);

   max_wrapper_space_ = max_meta_space - max_meta_file_space;

   auto max_data_space = max_size_ - max_meta_space;

   auto meta_file_size = metadata_page_size_ * num_pages_per_metadata_file_;

   auto data_file_size = page_size_ * num_pages_per_data_file_;

   max_num_data_files_ = max_data_space / data_file_size;

   max_num_meta_files_ = max_meta_file_space / meta_file_size;

   CHECK_GT(max_num_data_files_, 0U) << "Cannot create a cache of size " << max_size_

                                     << ".  Not enough space to create a data file.";

   CHECK_GT(max_num_meta_files_, 0U) << "Cannot create a cache of size " << max_size_

                                     << ".  Not enough space to create a metadata file.";

 }


 std::optional<FileBuffer*> CachingFileMgr::getBufferIfExists(const ChunkKey& key) {

   heavyai::shared_lock<heavyai::shared_mutex> chunk_index_read_lock(chunkIndexMutex_);

   auto chunk_it = chunkIndex_.find(key);

   if (chunk_it == chunkIndex_.end()) {

     return {};

   }

   return getBufferUnlocked(key);

 }


 ChunkKeyToChunkMap::iterator CachingFileMgr::deleteBufferUnlocked(

     const ChunkKeyToChunkMap::iterator chunk_it,

     const bool purge) {

   removeKey(chunk_it->first);

   return FileMgr::deleteBufferUnlocked(chunk_it, purge);

 }


 void CachingFileMgr::getChunkMetadataVecForKeyPrefix(

     ChunkMetadataVector& chunkMetadataVec,

     const ChunkKey& keyPrefix) {

   FileMgr::getChunkMetadataVecForKeyPrefix(chunkMetadataVec, keyPrefix);

   for (const auto& [key, meta] : chunkMetadataVec) {

     touchKey(key);

   }

 }


 FileBuffer* CachingFileMgr::getBufferUnlocked(const ChunkKey& key,

                                               const size_t num_bytes) const {

   touchKey(key);

   return FileMgr::getBufferUnlocked(key, num_bytes);

 }


 void CachingFileMgr::free_page(std::pair<FileInfo*, int32_t>&& page) {

   page.first->freePageDeferred(page.second);

 }


 std::set<ChunkKey> CachingFileMgr::getKeysWithMetadata() const {

   heavyai::shared_lock<heavyai::shared_mutex> read_lock(chunkIndexMutex_);

   std::set<ChunkKey> ret;

   for (const auto& [key, buf] : chunkIndex_) {

     if (buf->hasEncoder()) {

       ret.emplace(key);

     }

   }

   return ret;

 }


 size_t CachingFileMgr::getMaxDataFilesSize() const {

   if (limit_data_size_) {

     return *limit_data_size_;

   }

   return getMaxDataFiles() * getDataFileSize();

 }


 TableFileMgr::TableFileMgr(const std::string& table_path)

     : table_path_(table_path)

     , epoch_file_path_(table_path_ + "/" + FileMgr::EPOCH_FILENAME)

     , wrapper_file_path_(table_path_ + "/" + CachingFileMgr::WRAPPER_FILE_NAME)

     , epoch_(Epoch())

     , is_checkpointed_(true) {

   if (!bf::exists(table_path_)) {

     bf::create_directory(table_path_);

   } else {

     CHECK(bf::is_directory(table_path_)) << "Specified path '" << table_path_

                                          << "' for cache table data is not a directory.";

   }

   if (bf::exists(epoch_file_path_)) {

     CHECK(bf::is_regular_file(epoch_file_path_))

         << "Found epoch file '" << epoch_file_path_ << "' which is not a regular file";

     CHECK(bf::file_size(epoch_file_path_) == Epoch::byte_size())

         << "Found epoch file '" << epoch_file_path_ << "' which is not of expected size";

     epoch_file_ = open(epoch_file_path_);

     read(epoch_file_, 0, Epoch::byte_size(), epoch_.storage_ptr(), epoch_file_path_);

   } else {

     epoch_file_ = create(epoch_file_path_, sizeof(Epoch::byte_size()));

     writeAndSyncEpochToDisk();

     incrementEpoch();

   }

 }


 void TableFileMgr::incrementEpoch() {

   heavyai::unique_lock<heavyai::shared_mutex> w_lock(table_mutex_);

   epoch_.increment();

   is_checkpointed_ = false;

   CHECK(epoch_.ceiling() <= Epoch::max_allowable_epoch())

       << "Epoch greater than maximum allowed value (" << epoch_.ceiling() << " > "

       << Epoch::max_allowable_epoch() << ").";

 }


 int32_t TableFileMgr::getEpoch() const {

   heavyai::shared_lock<heavyai::shared_mutex> r_lock(table_mutex_);

   return static_cast<int32_t>(epoch_.ceiling());

 }


 void TableFileMgr::writeAndSyncEpochToDisk() {

   heavyai::unique_lock<heavyai::shared_mutex> w_lock(table_mutex_);

   write(epoch_file_, 0, Epoch::byte_size(), epoch_.storage_ptr());

   int32_t status = fflush(epoch_file_);

   CHECK(status == 0) << "Could not flush epoch file to disk";

 #ifdef __APPLE__

   status = fcntl(fileno(epoch_file_), 51);

 #else

   status = heavyai::fsync(fileno(epoch_file_));

 #endif

   CHECK(status == 0) << "Could not sync epoch file to disk";

   is_checkpointed_ = true;

 }


 void TableFileMgr::removeDiskContent() const {

   heavyai::unique_lock<heavyai::shared_mutex> w_lock(table_mutex_);

   bf::remove_all(table_path_);

 }


 size_t TableFileMgr::getReservedSpace() const {

   heavyai::shared_lock<heavyai::shared_mutex> r_lock(table_mutex_);

   size_t space = 0;

   for (const auto& file : bf::recursive_directory_iterator(table_path_)) {

     if (bf::is_regular_file(file.path())) {

       space += bf::file_size(file.path());

     }

   }

   return space;

 }


 void TableFileMgr::deleteWrapperFile() const {

   heavyai::unique_lock<heavyai::shared_mutex> w_lock(table_mutex_);

   bf::remove_all(wrapper_file_path_);

 }


 void TableFileMgr::writeWrapperFile(const std::string& doc) const {

   heavyai::unique_lock<heavyai::shared_mutex> w_lock(table_mutex_);

   std::ofstream ofs(wrapper_file_path_);

   if (!ofs) {

     throw std::runtime_error{"Error trying to create file \"" + wrapper_file_path_ +

                              "\". The error was: " + std::strerror(errno)};

   }

   ofs << doc;

 }


 bool TableFileMgr::hasWrapperFile() const {

   heavyai::shared_lock<heavyai::shared_mutex> r_lock(table_mutex_);

   return bf::exists(wrapper_file_path_);

 }


 std::ostream& operator<<(std::ostream& os, DiskCacheLevel disk_cache_level) {

   if (disk_cache_level == DiskCacheLevel::none) {

     os << "None";

   } else if (disk_cache_level == DiskCacheLevel::fsi) {

     os << "ForeignTables";

   } else if (disk_cache_level == DiskCacheLevel::non_fsi) {

     os << "LocalTables";

   } else if (disk_cache_level == DiskCacheLevel::all) {

     os << "All";

   } else {

     UNREACHABLE() << "Unexpected disk cache level: "

                   << static_cast<int32_t>(disk_cache_level);

   }

   return os;

 }

 }  // namespace File_Namespace

File_Namespace::FileMgr::metadata_page_size_
const size_t metadata_page_size_
Definition: FileMgr.h:552

File_Namespace::FileMgr::isFullyInitted_
bool isFullyInitted_
Definition: FileMgr.h:425

File_Namespace::CachingFileMgr::getTableFileMgrSpaceReserved
size_t getTableFileMgrSpaceReserved(int32_t db_id, int32_t tb_id) const
Definition: CachingFileMgr.cpp:224

ChunkKey
std::vector< int > ChunkKey
Definition: types.h:36

File_Namespace::FileMgr::openFiles
OpenFilesResult openFiles()
Definition: FileMgr.cpp:200

File_Namespace::operator<<
std::ostream & operator<<(std::ostream &os, DiskCacheLevel disk_cache_level)
Definition: CachingFileMgr.cpp:847

File_Namespace::CachingFileMgr::table_evict_alg_
LRUEvictionAlgorithm table_evict_alg_
Definition: CachingFileMgr.h:513

Epoch::storage_ptr
int8_t * storage_ptr()
Definition: Epoch.h:61

misc.h

File_Namespace::TableFileMgr::removeDiskContent
void removeDiskContent() const
Removes all disk data for the subdir.
Definition: CachingFileMgr.cpp:811

File_Namespace::FileMgr::page_size_
const size_t page_size_
Definition: FileMgr.h:551

File_Namespace::DiskCacheConfig
Definition: CachingFileMgr.h:46

LRUEvictionAlgorithm::evictNextChunk
const ChunkKey evictNextChunk() override
Definition: LRUEvictionAlgorithm.cpp:27

CachingFileMgr.h
This file details an extension of the FileMgr that can contain pages from multiple tables (CachingFil...

anonymous_namespace{CachingFileMgr.cpp}::size_of_dir
size_t size_of_dir(const std::string &dir)
Definition: CachingFileMgr.cpp:31

File_Namespace::CachingFileMgr::table_dirs_mutex_
heavyai::shared_mutex table_dirs_mutex_
Definition: CachingFileMgr.h:500

File_Namespace::DiskCacheLevel::fsi

File_Namespace::get_dir_name_for_table
std::string get_dir_name_for_table(int db_id, int tb_id)
Definition: CachingFileMgr.h:90

File_Namespace::Page
A logical page (Page) belongs to a file on disk.
Definition: Page.h:46

LOG
#define LOG(tag)
Definition: Logger.h:285

File_Namespace::CachingFileMgr::setMaxSizes
void setMaxSizes()
Sets the maximum number of files/space for each type of storage based on the maximum size...
Definition: CachingFileMgr.cpp:689

File_Namespace::TableFileMgr::writeAndSyncEpochToDisk
void writeAndSyncEpochToDisk()
Write and flush the epoch to the epoch file on disk.
Definition: CachingFileMgr.cpp:797

logger::FATAL
Definition: Logger.h:111

File_Namespace::CachingFileMgr::~CachingFileMgr
~CachingFileMgr() override
Definition: CachingFileMgr.cpp:81

UNREACHABLE
#define UNREACHABLE()
Definition: Logger.h:338

gpu_enabled::sort
DEVICE void sort(ARGS &&...args)
Definition: gpu_enabled.h:105

File_Namespace::CachingFileMgr::describeSelf
std::string describeSelf() const override
describes this FileMgr for logging purposes.
Definition: CachingFileMgr.cpp:241

File_Namespace::CachingFileMgr::getSpaceReservedByTable
size_t getSpaceReservedByTable(int32_t db_id, int32_t tb_id) const
Definition: CachingFileMgr.cpp:234

File_Namespace::CachingFileMgr::closeRemovePhysical
void closeRemovePhysical() override
Closes files and removes the caching directory.
Definition: CachingFileMgr.cpp:183

Epoch::min_allowable_epoch
static int64_t min_allowable_epoch()
Definition: Epoch.h:65

File_Namespace::TableFileMgr::table_path_
std::string table_path_
Definition: CachingFileMgr.h:144

File_Namespace::FileMgr::getFileMgrBasePath
std::string getFileMgrBasePath() const
Definition: FileMgr.h:334

File_Namespace::CachingFileMgr::getMetadataSpaceReservedByTable
size_t getMetadataSpaceReservedByTable(int32_t db_id, int32_t tb_id) const
Definition: CachingFileMgr.cpp:209

Epoch::ceiling
int32_t ceiling() const
Definition: Epoch.h:44

File_Namespace::create
std::pair< FILE *, std::string > create(const std::string &basePath, const int fileId, const size_t pageSize, const size_t numPages)
Definition: File.cpp:55

File_Namespace::FileBuffer
Represents/provides access to contiguous data stored in the file system.
Definition: FileBuffer.h:57

File_Namespace::FileMgr::checkpoint
void checkpoint() override
Fsyncs data files, writes out epoch and fsyncs that.
Definition: FileMgr.cpp:706

Epoch::max_allowable_epoch
static int64_t max_allowable_epoch()
Definition: Epoch.h:69

File_Namespace::TableFileMgr::hasWrapperFile
bool hasWrapperFile() const
Definition: CachingFileMgr.cpp:842

CHECK_GT
#define CHECK_GT(x, y)
Definition: Logger.h:305

File_Namespace::FileMgr::fileMgrBasePath_
std::string fileMgrBasePath_
Definition: FileMgr.h:411

get_table_key
ChunkKey get_table_key(const ChunkKey &key)
Definition: types.h:57

Data_Namespace::AbstractBuffer::isDirty
bool isDirty() const
Definition: AbstractBuffer.h:98

show_chunk
std::string show_chunk(const ChunkKey &key)
Definition: types.h:98

File_Namespace::CachingFileMgr::CachingFileMgr
CachingFileMgr(const DiskCacheConfig &config)
Definition: CachingFileMgr.cpp:71

File_Namespace::FileInfo::freePage
void freePage(int32_t pageId, const bool isRolloff, int32_t epoch)
Definition: FileInfo.cpp:187

File_Namespace::FileInfo::getFreePage
int32_t getFreePage()
Definition: FileInfo.cpp:204

File_Namespace::write
size_t write(FILE *f, const size_t offset, const size_t size, const int8_t *buf)
Writes the specified number of bytes to the offset position in file f from buf.
Definition: File.cpp:143

File_Namespace::DiskCacheLevel::non_fsi

heavyai::shared_lock
std::shared_lock< T > shared_lock
Definition: heavyai_shared_mutex.h:41

LRUEvictionAlgorithm::dumpEvictionQueue
std::string dumpEvictionQueue()
Definition: LRUEvictionAlgorithm.cpp:59

File_Namespace::CachingFileMgr::deleteCacheIfTooLarge
void deleteCacheIfTooLarge()
When the cache is read from disk, we don&#39;t know which chunks were least recently used. Rather than try to evict random pages to get down to size we just reset the cache to make sure we have space.
Definition: CachingFileMgr.cpp:415

File_Namespace::TableFileMgr::table_mutex_
heavyai::shared_mutex table_mutex_
Definition: CachingFileMgr.h:151

File_Namespace::CachingFileMgr::incrementAllEpochs
void incrementAllEpochs()
Increment epochs for each table in the CFM.
Definition: CachingFileMgr.cpp:318

File_Namespace::DiskCacheLevel::none

File_Namespace::FileMgr
Definition: FileMgr.h:161

logger::INFO
Definition: Logger.h:108

File_Namespace::CachingFileMgr::max_size_
size_t max_size_
Definition: CachingFileMgr.h:509

File_Namespace::FileMgr::incrementEpoch
int32_t incrementEpoch()
Definition: FileMgr.h:285

Epoch::increment
int32_t increment()
Definition: Epoch.h:54

Data_Namespace::AbstractBuffer::setAppended
void setAppended()
Definition: AbstractBuffer.h:116

File_Namespace::DELETE_CONTINGENT
constexpr int32_t DELETE_CONTINGENT
A FileInfo type has a file pointer and metadata about a file.
Definition: FileInfo.h:51

File_Namespace::TableFileMgr::wrapper_file_path_
std::string wrapper_file_path_
Definition: CachingFileMgr.h:146

File_Namespace::TableFileMgr::epoch_file_path_
std::string epoch_file_path_
Definition: CachingFileMgr.h:145

File_Namespace::FileMgr::chunkIndex_
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:330

File_Namespace::CachingFileMgr::removeTableBuffers
void removeTableBuffers(int32_t db_id, int32_t tb_id)
Erases and cleans up all buffers for a table.
Definition: CachingFileMgr.cpp:335

LRUEvictionAlgorithm
Definition: LRUEvictionAlgorithm.h:39

heavyai::unique_lock
std::unique_lock< T > unique_lock
Definition: heavyai_shared_mutex.h:39

ChunkMetadataVector
std::vector< std::pair< ChunkKey, std::shared_ptr< ChunkMetadata >>> ChunkMetadataVector
Definition: ChunkMetadata.h:201

File_Namespace::CachingFileMgr::dump
std::string dump() const
Definition: CachingFileMgr.cpp:58

File_Namespace::TableFileMgr::deleteWrapperFile
void deleteWrapperFile() const
Deletes only the wrapper file on disk.
Definition: CachingFileMgr.cpp:827

Data_Namespace::AbstractBuffer
An AbstractBuffer is a unit of data management for a data manager.
Definition: AbstractBuffer.h:48

File_Namespace::TableFileMgr::incrementEpoch
void incrementEpoch()
increment the epoch for this subdir (not synced to disk).
Definition: CachingFileMgr.cpp:783

File_Namespace::FileInfo::fileId
int32_t fileId
Definition: FileInfo.h:57

true
bool g_enable_smem_group_by true
Definition: QueryMemoryDescriptor.cpp:28

File_Namespace::CachingFileBuffer
Definition: CachingFileMgr.h:155

Data_Namespace::AbstractBuffer::size
size_t size() const
Definition: AbstractBuffer.h:96

File_Namespace::read
size_t read(FILE *f, const size_t offset, const size_t size, int8_t *buf, const std::string &file_path)
Reads the specified number of bytes from the offset position in file f into buf.
Definition: File.cpp:125

File_Namespace::FileMgr::writeAndSyncEpochToDisk
void writeAndSyncEpochToDisk()
Definition: FileMgr.cpp:659

File_Namespace::CachingFileMgr::removeTableFileMgr
void removeTableFileMgr(int32_t db_id, int32_t tb_id)
Removes the subdirectory content for a table.
Definition: CachingFileMgr.cpp:325

File_Namespace::ROLLOFF_CONTINGENT
constexpr int32_t ROLLOFF_CONTINGENT
Definition: FileInfo.h:52

File_Namespace::FileMgr::freePages
void freePages()
Definition: FileMgr.cpp:1615

File_Namespace::TableFileMgr::epoch_
Epoch epoch_
Definition: CachingFileMgr.h:147

anonymous_namespace{CachingFileMgr.cpp}::evict_chunk_or_fail
ChunkKey evict_chunk_or_fail(LRUEvictionAlgorithm &alg)
Definition: CachingFileMgr.cpp:43

File_Namespace::FileMgr::chunkIndexMutex_
heavyai::shared_mutex chunkIndexMutex_
Definition: FileMgr.h:420

Data_Namespace::AbstractBuffer::setDirty
void setDirty()
Definition: AbstractBuffer.h:109

File_Namespace::FileMgr::maxRollbackEpochs_
int32_t maxRollbackEpochs_
Definition: FileMgr.h:410

File_Namespace::TableFileMgr::writeWrapperFile
void writeWrapperFile(const std::string &doc) const
Writes wrapper file to disk.
Definition: CachingFileMgr.cpp:832

CHECK_LE
#define CHECK_LE(x, y)
Definition: Logger.h:304

Epoch
Definition: Epoch.h:30

File_Namespace::DiskCacheConfig::size_limit
size_t size_limit
Definition: CachingFileMgr.h:54

File_Namespace::TableFileMgr::epoch_file_
FILE * epoch_file_
Definition: CachingFileMgr.h:149

Epoch::byte_size
static size_t byte_size()
Definition: Epoch.h:63

File_Namespace::TableFileMgr::getReservedSpace
size_t getReservedSpace() const
Returns the disk space used (in bytes) for the subdir.
Definition: CachingFileMgr.cpp:816

File_Namespace::DiskCacheLevel::all

File_Namespace::CachingFileMgr::table_dirs_
std::map< TablePair, std::unique_ptr< TableFileMgr > > table_dirs_
Definition: CachingFileMgr.h:504

File_Namespace::open
FILE * open(int file_id)
Opens the file with the given id; fatal crash on error.
Definition: File.cpp:100

File_Namespace::CachingFileMgr::readTableFileMgrs
void readTableFileMgrs()
Checks for any sub-directories containing table-specific data and creates epochs from found files...
Definition: CachingFileMgr.cpp:117

get_table_prefix
std::pair< int, int > get_table_prefix(const ChunkKey &key)
Definition: types.h:62

heavyai::fsync
int fsync(int fd)
Definition: heavyai_fs.cpp:62

File_Namespace::FileMgr::epoch
int32_t epoch() const
Definition: FileMgr.h:530

CHECK
#define CHECK(condition)
Definition: Logger.h:291

File_Namespace::FileMgr::initializeNumThreads
void initializeNumThreads(size_t num_reader_threads=0)
Definition: FileMgr.cpp:1604

File_Namespace::CachingFileMgr::init
void init(const size_t num_reader_threads)
Initializes a CFM, parsing any existing files and initializing data structures appropriately (current...
Definition: CachingFileMgr.cpp:83

File_Namespace::FileMgr::files_rw_mutex_
heavyai::shared_mutex files_rw_mutex_
Definition: FileMgr.h:421

File_Namespace::FileInfo
Definition: FileInfo.h:55

File_Namespace::FileMgr::closePhysicalUnlocked
void closePhysicalUnlocked()
Definition: FileMgr.cpp:556

in_same_table
bool in_same_table(const ChunkKey &left_key, const ChunkKey &right_key)
Definition: types.h:83

File_Namespace::DiskCacheConfig::num_reader_threads
size_t num_reader_threads
Definition: CachingFileMgr.h:53

File_Namespace::TablePair
std::pair< const int32_t, const int32_t > TablePair
Definition: FileMgr.h:98

File_Namespace::DiskCacheLevel
DiskCacheLevel
Definition: CachingFileMgr.h:42

File_Namespace::FileMgr::nextFileId_
unsigned nextFileId_
number of threads used when loading data
Definition: FileMgr.h:416

File_Namespace::CachingFileMgr::getChunkSpaceReservedByTable
size_t getChunkSpaceReservedByTable(int32_t db_id, int32_t tb_id) const
Definition: CachingFileMgr.cpp:195

heavyai::file_size
size_t file_size(const int fd)
Definition: heavyai_fs.cpp:33

File_Namespace::TableFileMgr::getEpoch
int32_t getEpoch() const
Returns the current epoch (locked)
Definition: CachingFileMgr.cpp:792

File_Namespace::CachingFileMgr::getTableFileMgrPath
std::string getTableFileMgrPath(int32_t db, int32_t tb) const
Definition: CachingFileMgr.cpp:179

File_Namespace::CachingFileMgr::clearForTable
void clearForTable(int32_t db_id, int32_t tb_id)
Removes all data related to the given table (pages and subdirectories).
Definition: CachingFileMgr.cpp:173

File_Namespace::DiskCacheConfig::path
std::string path
Definition: CachingFileMgr.h:51

File_Namespace::TableFileMgr::is_checkpointed_
bool is_checkpointed_
Definition: CachingFileMgr.h:148

File_Namespace::CachingFileMgr
A FileMgr capable of limiting it&#39;s size and storing data from multiple tables in a shared directory...
Definition: CachingFileMgr.h:178

VLOG
#define VLOG(n)
Definition: Logger.h:388

File_Namespace::CachingFileMgr::chunk_evict_alg_
LRUEvictionAlgorithm chunk_evict_alg_
Definition: CachingFileMgr.h:512

NoEntryFoundException
Definition: CacheEvictionAlgorithm.h:32

File_Namespace::CachingFileMgr::createBufferFromHeaders
FileBuffer * createBufferFromHeaders(const ChunkKey &key, const std::vector< HeaderInfo >::const_iterator &startIt, const std::vector< HeaderInfo >::const_iterator &endIt) override
Creates a buffer and initializes it with info read from files on disk.
Definition: CachingFileMgr.cpp:279