OmniSciDB  8fa3bf436f
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
File_Namespace::FileMgr Class Reference

#include <FileMgr.h>

+ Inheritance diagram for File_Namespace::FileMgr:
+ Collaboration diagram for File_Namespace::FileMgr:

Public Member Functions

 FileMgr (const int32_t deviceId, GlobalFileMgr *gfm, const TablePair fileMgrKey, const int32_t max_rollback_epochs=-1, const size_t num_reader_threads=0, const int32_t epoch=-1, const size_t defaultPageSize=DEFAULT_PAGE_SIZE)
 Constructor. More...
 
 FileMgr (const int32_t deviceId, GlobalFileMgr *gfm, const TablePair fileMgrKey, const size_t defaultPageSize, const bool runCoreInit)
 
 FileMgr (GlobalFileMgr *gfm, const size_t defaultPageSize, std::string basePath)
 
virtual ~FileMgr () override
 Destructor. More...
 
StorageStats getStorageStats ()
 
FileBuffercreateBuffer (const ChunkKey &key, size_t pageSize=0, const size_t numBytes=0) override
 Creates a chunk with the specified key and page size. More...
 
bool isBufferOnDevice (const ChunkKey &key) override
 
void deleteBuffer (const ChunkKey &key, const bool purge=true) override
 Deletes the chunk with the specified key. More...
 
void deleteBuffersWithPrefix (const ChunkKey &keyPrefix, const bool purge=true) override
 
FileBuffergetBuffer (const ChunkKey &key, const size_t numBytes=0) override
 Returns the a pointer to the chunk with the specified key. More...
 
void fetchBuffer (const ChunkKey &key, AbstractBuffer *destBuffer, const size_t numBytes) override
 
FileBufferputBuffer (const ChunkKey &key, AbstractBuffer *d, const size_t numBytes=0) override
 Puts the contents of d into the Chunk with the given key. More...
 
AbstractBufferalloc (const size_t numBytes) override
 
void free (AbstractBuffer *buffer) override
 
Page requestFreePage (size_t pagesize, const bool isMetadata)
 
MgrType getMgrType () override
 
std::string getStringMgrType () override
 
std::string printSlabs () override
 
void clearSlabs () override
 
size_t getMaxSize () override
 
size_t getInUseSize () override
 
size_t getAllocated () override
 
bool isAllocationCapped () override
 
FileInfogetFileInfoForFileId (const int32_t fileId)
 
FileMetadata getMetadataForFile (const boost::filesystem::directory_iterator &fileIterator)
 
void init (const size_t num_reader_threads, const int32_t epochOverride)
 
void init (const std::string &dataPathToConvertFrom, const int32_t epochOverride)
 
void copyPage (Page &srcPage, FileMgr *destFileMgr, Page &destPage, const size_t reservedHeaderSize, const size_t numBytes, const size_t offset)
 
void requestFreePages (size_t npages, size_t pagesize, std::vector< Page > &pages, const bool isMetadata)
 Obtains free pages – creates new files if necessary – of the requested size. More...
 
void getChunkMetadataVecForKeyPrefix (ChunkMetadataVector &chunkMetadataVec, const ChunkKey &keyPrefix) override
 
void checkpoint () override
 Fsyncs data files, writes out epoch and fsyncs that. More...
 
void checkpoint (const int32_t db_id, const int32_t tb_id) override
 
virtual int32_t epoch (int32_t db_id, int32_t tb_id) const
 Returns current value of epoch - should be one greater than recorded at last checkpoint. Because FileMgr only contains buffers from one table we can just return the FileMgr's epoch instead of finding a table-specific epoch. More...
 
int32_t epochFloor () const
 
int32_t incrementEpoch ()
 
int32_t lastCheckpointedEpoch ()
 Returns value of epoch at last checkpoint. More...
 
int32_t maxRollbackEpochs ()
 Returns value max_rollback_epochs. More...
 
size_t getNumReaderThreads ()
 Returns number of threads defined by parameter num-reader-threads which should be used during initial load and consequent read of data. More...
 
FILE * getFileForFileId (const int32_t fileId)
 Returns FILE pointer associated with requested fileId. More...
 
size_t getNumChunks () override
 
size_t getNumUsedPages () const
 
size_t getNumUsedMetadataPages () const
 
size_t getNumUsedMetadataPagesForChunkKey (const ChunkKey &chunkKey) const
 
int32_t getDBVersion () const
 Index for looking up chunks. More...
 
bool getDBConvert () const
 
void createTopLevelMetadata ()
 
std::string getFileMgrBasePath () const
 
virtual void closeRemovePhysical ()
 
void removeTableRelatedDS (const int32_t db_id, const int32_t table_id) override
 
void free_page (std::pair< FileInfo *, int32_t > &&page)
 
virtual bool hasFileMgrKey () const
 
const TablePair get_fileMgrKey () const
 
boost::filesystem::path getFilePath (const std::string &file_name)
 
void writePageMappingsToStatusFile (const std::vector< PageMapping > &page_mappings)
 
void renameCompactionStatusFile (const char *const from_status, const char *const to_status)
 
void compactFiles ()
 
virtual bool updatePageIfDeleted (FileInfo *file_info, ChunkKey &chunk_key, int32_t contingent, int32_t page_epoch, int32_t page_num)
 deletes or recovers a page based on last checkpointed epoch. More...
 
virtual bool failOnReadError () const
 True if a read error should cause a fatal error. More...
 

Static Public Member Functions

static void setNumPagesPerDataFile (size_t num_pages)
 
static void setNumPagesPerMetadataFile (size_t num_pages)
 

Public Attributes

ChunkKeyToChunkMap chunkIndex_
 

Static Public Attributes

static constexpr size_t DEFAULT_NUM_PAGES_PER_DATA_FILE {256}
 
static constexpr size_t DEFAULT_NUM_PAGES_PER_METADATA_FILE {4096}
 
static constexpr char constCOPY_PAGES_STATUS {"pending_data_compaction_0"}
 
static constexpr char constUPDATE_PAGE_VISIBILITY_STATUS {"pending_data_compaction_1"}
 
static constexpr char constDELETE_EMPTY_FILES_STATUS {"pending_data_compaction_2"}
 
static constexpr char LEGACY_EPOCH_FILENAME [] = "epoch"
 
static constexpr char EPOCH_FILENAME [] = "epoch_metadata"
 
static constexpr char DB_META_FILENAME [] = "dbmeta"
 
static constexpr char FILE_MGR_VERSION_FILENAME [] = "filemgr_version"
 
static constexpr int32_t INVALID_VERSION = -1
 

Protected Member Functions

 FileMgr ()
 
FileInfocreateFile (const size_t pageSize, const size_t numPages)
 Adds a file to the file manager repository. More...
 
FileInfoopenExistingFile (const std::string &path, const int32_t fileId, const size_t pageSize, const size_t numPages, std::vector< HeaderInfo > &headerVec)
 
void createEpochFile (const std::string &epochFileName)
 
int32_t openAndReadLegacyEpochFile (const std::string &epochFileName)
 
void openAndReadEpochFile (const std::string &epochFileName)
 
void writeAndSyncEpochToDisk ()
 
void setEpoch (const int32_t newEpoch)
 
int32_t readVersionFromDisk (const std::string &versionFileName) const
 
void writeAndSyncVersionToDisk (const std::string &versionFileName, const int32_t version)
 
void processFileFutures (std::vector< std::future< std::vector< HeaderInfo >>> &file_futures, std::vector< HeaderInfo > &headerVec)
 
virtual FileBuffercreateBufferUnlocked (const ChunkKey &key, size_t pageSize=0, const size_t numBytes=0)
 
void migrateToLatestFileMgrVersion ()
 
void migrateEpochFileV0 ()
 
OpenFilesResult openFiles ()
 
void clearFileInfos ()
 
void copySourcePageForCompaction (const Page &source_page, FileInfo *destination_file_info, std::vector< PageMapping > &page_mappings, std::set< Page > &touched_pages)
 
int32_t copyPageWithoutHeaderSize (const Page &source_page, const Page &destination_page)
 
void sortAndCopyFilePagesForCompaction (size_t page_size, std::vector< PageMapping > &page_mappings, std::set< Page > &touched_pages)
 
void updateMappedPagesVisibility (const std::vector< PageMapping > &page_mappings)
 
void deleteEmptyFiles ()
 
void resumeFileCompaction (const std::string &status_file_name)
 
std::vector< PageMappingreadPageMappingsFromStatusFile ()
 
 FileMgr (const int epoch)
 
virtual std::string describeSelf ()
 
void closePhysicalUnlocked ()
 
void syncFilesToDisk ()
 
void freePages ()
 
void initializeNumThreads (size_t num_reader_threads=0)
 
virtual FileBufferallocateBuffer (const size_t page_size, const ChunkKey &key, const size_t num_bytes)
 
void deleteBufferUnlocked (const ChunkKeyToChunkMap::iterator chunk_it, const bool purge=true)
 

Protected Attributes

int32_t maxRollbackEpochs_
 
std::string fileMgrBasePath_
 
std::map< int32_t, FileInfo * > files_
 
PageSizeFileMMap fileIndex_
 A map of files accessible via a file identifier. More...
 
size_t num_reader_threads_
 Maps page sizes to FileInfo objects. More...
 
size_t defaultPageSize_
 number of threads used when loading data More...
 
unsigned nextFileId_
 
int32_t db_version_
 the index of the next file id More...
 
int32_t fileMgrVersion_
 
const int32_t latestFileMgrVersion_ {1}
 
FILE * DBMetaFile_ = nullptr
 
std::mutex getPageMutex_
 pointer to DB level metadata More...
 
mapd_shared_mutex chunkIndexMutex_
 
mapd_shared_mutex files_rw_mutex_
 
mapd_shared_mutex mutex_free_page_
 
std::vector< std::pair
< FileInfo *, int32_t > > 
free_pages_
 
bool isFullyInitted_ {false}
 

Static Protected Attributes

static size_t num_pages_per_data_file_ {DEFAULT_NUM_PAGES_PER_DATA_FILE}
 
static size_t num_pages_per_metadata_file_ {DEFAULT_NUM_PAGES_PER_METADATA_FILE}
 

Private Member Functions

void rollOffOldData (const int32_t epochCeiling, const bool shouldCheckpoint)
 
void freePagesBeforeEpoch (const int32_t min_epoch)
 
void freePagesBeforeEpochUnlocked (const int32_t min_epoch, const ChunkKeyToChunkMap::iterator lower_bound, const ChunkKeyToChunkMap::iterator upper_bound)
 
FileBuffergetOrCreateBuffer (const ChunkKey &key)
 
bool coreInit ()
 Determines file path, and if exists, runs file migration and opens and reads epoch file. More...
 
int32_t epoch () const
 
void writeDirtyBuffers ()
 

Private Attributes

GlobalFileMgrgfm_
 
TablePair fileMgrKey_
 Global FileMgr. More...
 
Epoch epoch_
 
bool epochIsCheckpointed_ = true
 
FILE * epochFile_ = nullptr
 

Friends

class GlobalFileMgr
 

Detailed Description

Definition at line 148 of file FileMgr.h.

Constructor & Destructor Documentation

File_Namespace::FileMgr::FileMgr ( const int32_t  deviceId,
GlobalFileMgr gfm,
const TablePair  fileMgrKey,
const int32_t  max_rollback_epochs = -1,
const size_t  num_reader_threads = 0,
const int32_t  epoch = -1,
const size_t  defaultPageSize = DEFAULT_PAGE_SIZE 
)

Constructor.

Definition at line 47 of file FileMgr.cpp.

References init().

54  : AbstractBufferMgr(deviceId)
56  , defaultPageSize_(defaultPageSize)
57  , nextFileId_(0)
58  , gfm_(gfm)
59  , fileMgrKey_(fileMgrKey) {
60  init(num_reader_threads, epoch);
61 }
TablePair fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:504
void init(const size_t num_reader_threads, const int32_t epochOverride)
Definition: FileMgr.cpp:249
GlobalFileMgr * gfm_
Definition: FileMgr.h:503
size_t defaultPageSize_
number of threads used when loading data
Definition: FileMgr.h:394
int32_t maxRollbackEpochs_
Definition: FileMgr.h:387
int32_t epoch() const
Definition: FileMgr.h:500
int32_t maxRollbackEpochs()
Returns value max_rollback_epochs.
Definition: FileMgr.h:298

+ Here is the call graph for this function:

File_Namespace::FileMgr::FileMgr ( const int32_t  deviceId,
GlobalFileMgr gfm,
const TablePair  fileMgrKey,
const size_t  defaultPageSize,
const bool  runCoreInit 
)

Definition at line 64 of file FileMgr.cpp.

References coreInit(), epochFile_, fileMgrBasePath_, fileMgrKey_, files_, File_Namespace::GlobalFileMgr::getBasePath(), gfm_, and to_string().

69  : AbstractBufferMgr(deviceId)
70  , maxRollbackEpochs_(-1)
71  , defaultPageSize_(defaultPageSize)
72  , nextFileId_(0)
73  , gfm_(gfm)
74  , fileMgrKey_(fileMgrKey) {
75  const std::string fileMgrDirPrefix("table");
76  const std::string FileMgrDirDelim("_");
77  fileMgrBasePath_ = (gfm_->getBasePath() + fileMgrDirPrefix + FileMgrDirDelim +
78  std::to_string(fileMgrKey_.first) + // db_id
79  FileMgrDirDelim + std::to_string(fileMgrKey_.second)); // tb_id
80  epochFile_ = nullptr;
81  files_.clear();
82  if (runCoreInit) {
83  coreInit();
84  }
85 }
TablePair fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:504
std::string getBasePath() const
GlobalFileMgr * gfm_
Definition: FileMgr.h:503
std::string fileMgrBasePath_
Definition: FileMgr.h:388
std::string to_string(char const *&&v)
size_t defaultPageSize_
number of threads used when loading data
Definition: FileMgr.h:394
int32_t maxRollbackEpochs_
Definition: FileMgr.h:387
std::map< int32_t, FileInfo * > files_
Definition: FileMgr.h:391
bool coreInit()
Determines file path, and if exists, runs file migration and opens and reads epoch file...
Definition: FileMgr.cpp:126

+ Here is the call graph for this function:

File_Namespace::FileMgr::FileMgr ( GlobalFileMgr gfm,
const size_t  defaultPageSize,
std::string  basePath 
)

Definition at line 87 of file FileMgr.cpp.

References init().

88  : AbstractBufferMgr(0)
89  , maxRollbackEpochs_(-1)
90  , fileMgrBasePath_(basePath)
91  , defaultPageSize_(defaultPageSize)
92  , nextFileId_(0)
93  , gfm_(gfm)
94  , fileMgrKey_(0, 0) {
95  init(basePath, -1);
96 }
TablePair fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:504
void init(const size_t num_reader_threads, const int32_t epochOverride)
Definition: FileMgr.cpp:249
GlobalFileMgr * gfm_
Definition: FileMgr.h:503
std::string fileMgrBasePath_
Definition: FileMgr.h:388
size_t defaultPageSize_
number of threads used when loading data
Definition: FileMgr.h:394
int32_t maxRollbackEpochs_
Definition: FileMgr.h:387

+ Here is the call graph for this function:

File_Namespace::FileMgr::~FileMgr ( )
overridevirtual

Destructor.

Definition at line 106 of file FileMgr.cpp.

References chunkIndex_, File_Namespace::close(), DBMetaFile_, epochFile_, and files_.

106  {
107  // free memory used by FileInfo objects
108  for (auto chunkIt = chunkIndex_.begin(); chunkIt != chunkIndex_.end(); ++chunkIt) {
109  delete chunkIt->second;
110  }
111  for (auto file_info_entry : files_) {
112  delete file_info_entry.second;
113  }
114 
115  if (epochFile_) {
116  close(epochFile_);
117  epochFile_ = nullptr;
118  }
119 
120  if (DBMetaFile_) {
122  DBMetaFile_ = nullptr;
123  }
124 }
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:323
std::map< int32_t, FileInfo * > files_
Definition: FileMgr.h:391
void close(FILE *f)
Closes the file pointed to by the FILE pointer.
Definition: File.cpp:119

+ Here is the call graph for this function:

File_Namespace::FileMgr::FileMgr ( )
protected

Definition at line 104 of file FileMgr.cpp.

104 : AbstractBufferMgr(0) {}
File_Namespace::FileMgr::FileMgr ( const int  epoch)
protected

Definition at line 99 of file FileMgr.cpp.

References Epoch::ceiling(), and epoch_.

99  : AbstractBufferMgr(-1) {
101 }
int32_t ceiling() const
Definition: Epoch.h:44
int32_t epoch() const
Definition: FileMgr.h:500

+ Here is the call graph for this function:

Member Function Documentation

AbstractBuffer * File_Namespace::FileMgr::alloc ( const size_t  numBytes = 0)
override

Definition at line 852 of file FileMgr.cpp.

References logger::FATAL, and LOG.

852  {
853  LOG(FATAL) << "Operation not supported";
854  return nullptr; // satisfy return-type warning
855 }
#define LOG(tag)
Definition: Logger.h:194
FileBuffer * File_Namespace::FileMgr::allocateBuffer ( const size_t  page_size,
const ChunkKey key,
const size_t  num_bytes 
)
protectedvirtual

Reimplemented in File_Namespace::CachingFileMgr.

Definition at line 1563 of file FileMgr.cpp.

Referenced by createBufferUnlocked().

1565  {
1566  return new FileBuffer(this, page_size, key, num_bytes);
1567 }

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::checkpoint ( )
override

Fsyncs data files, writes out epoch and fsyncs that.

Definition at line 685 of file FileMgr.cpp.

References describeSelf(), epoch(), freePages(), incrementEpoch(), rollOffOldData(), syncFilesToDisk(), VLOG, writeAndSyncEpochToDisk(), and writeDirtyBuffers().

Referenced by rollOffOldData().

685  {
686  VLOG(2) << "Checkpointing " << describeSelf() << " epoch: " << epoch();
688  rollOffOldData(epoch(), false /* shouldCheckpoint */);
689  syncFilesToDisk();
691  incrementEpoch();
692  freePages();
693 }
void rollOffOldData(const int32_t epochCeiling, const bool shouldCheckpoint)
Definition: FileMgr.cpp:666
int32_t incrementEpoch()
Definition: FileMgr.h:275
virtual std::string describeSelf()
Definition: FileMgr.cpp:679
void writeAndSyncEpochToDisk()
Definition: FileMgr.cpp:638
int32_t epoch() const
Definition: FileMgr.h:500
#define VLOG(n)
Definition: Logger.h:297

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::checkpoint ( const int32_t  db_id,
const int32_t  tb_id 
)
inlineoverride

Definition at line 262 of file FileMgr.h.

References logger::FATAL, and LOG.

262  {
263  LOG(FATAL) << "Operation not supported, api checkpoint() should be used instead";
264  }
#define LOG(tag)
Definition: Logger.h:194
void File_Namespace::FileMgr::clearFileInfos ( )
protected

Definition at line 236 of file FileMgr.cpp.

References File_Namespace::close(), fileIndex_, and files_.

Referenced by init().

236  {
237  for (auto file_info_entry : files_) {
238  auto file_info = file_info_entry.second;
239  if (file_info->f) {
240  close(file_info->f);
241  file_info->f = nullptr;
242  }
243  delete file_info;
244  }
245  files_.clear();
246  fileIndex_.clear();
247 }
PageSizeFileMMap fileIndex_
A map of files accessible via a file identifier.
Definition: FileMgr.h:392
std::map< int32_t, FileInfo * > files_
Definition: FileMgr.h:391
void close(FILE *f)
Closes the file pointed to by the FILE pointer.
Definition: File.cpp:119

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::clearSlabs ( )
inlineoverride

Definition at line 214 of file FileMgr.h.

214 {} // noop
void File_Namespace::FileMgr::closePhysicalUnlocked ( )
protected

Definition at line 538 of file FileMgr.cpp.

References File_Namespace::close(), DBMetaFile_, epochFile_, and files_.

Referenced by closeRemovePhysical().

538  {
539  for (auto& [idx, file_info] : files_) {
540  if (file_info->f) {
541  close(file_info->f);
542  file_info->f = nullptr;
543  }
544  }
545 
546  if (DBMetaFile_) {
548  DBMetaFile_ = nullptr;
549  }
550 
551  if (epochFile_) {
552  close(epochFile_);
553  epochFile_ = nullptr;
554  }
555 }
std::map< int32_t, FileInfo * > files_
Definition: FileMgr.h:391
void close(FILE *f)
Closes the file pointed to by the FILE pointer.
Definition: File.cpp:119

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::closeRemovePhysical ( )
virtual

Reimplemented in File_Namespace::CachingFileMgr.

Definition at line 557 of file FileMgr.cpp.

References closePhysicalUnlocked(), files_rw_mutex_, getFileMgrBasePath(), and File_Namespace::renameForDelete().

Referenced by File_Namespace::GlobalFileMgr::removeTableRelatedDS().

557  {
558  mapd_unique_lock<mapd_shared_mutex> write_lock(files_rw_mutex_);
560  /* rename for later deletion the directory containing table related data */
562 }
std::string getFileMgrBasePath() const
Definition: FileMgr.h:328
mapd_unique_lock< mapd_shared_mutex > write_lock
void renameForDelete(const std::string directoryName)
Renames a directory to DELETE_ME_&lt;EPOCH&gt;_&lt;oldname&gt;.
Definition: File.cpp:218
mapd_shared_mutex files_rw_mutex_
Definition: FileMgr.h:403

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::compactFiles ( )

Compacts metadata and data file pages and deletes resulting empty files (if any exists). Compaction occurs in 3 idempotent phases in order to enable graceful recovery if a crash/process interruption occurs in the middle data compaction.

Phase 1: Create a status file that indicates initiation of this phase. Sort metadata/data files in order of files with the lowest number of free pages to those with the highest number of free pages. Copy over used pages from files at the end of the sorted order (files with the highest number of free pages) to those at the beginning of the sorted order (files with the lowest number of free pages). Keep destination/copied to pages as free while copying. Keep track of copied source to destination page mapping. Write page mapping to the status file (to be used during crash recovery if needed).

Phase 2: Rename status file to a file name that indicates initiation of this phase. Go through page mapping and mark source/copied from pages as free while making the destination/copied to pages as used.

Phase 3: Rename status file to a file name that indicates initiation of this phase. Delete all empty files (files containing only free pages). Delete status file.

Definition at line 1228 of file FileMgr.cpp.

References CHECK, COPY_PAGES_STATUS, DELETE_EMPTY_FILES_STATUS, deleteEmptyFiles(), files_, files_rw_mutex_, getFilePath(), renameCompactionStatusFile(), sortAndCopyFilePagesForCompaction(), UPDATE_PAGE_VISIBILITY_STATUS, updateMappedPagesVisibility(), and writePageMappingsToStatusFile().

Referenced by resumeFileCompaction().

1228  {
1229  mapd_unique_lock<mapd_shared_mutex> write_lock(files_rw_mutex_);
1230  if (files_.empty()) {
1231  return;
1232  }
1233 
1234  auto copy_pages_status_file_path = getFilePath(COPY_PAGES_STATUS);
1235  CHECK(!boost::filesystem::exists(copy_pages_status_file_path));
1236  std::ofstream status_file(copy_pages_status_file_path.string(),
1237  std::ios::out | std::ios::binary);
1238  status_file.close();
1239 
1240  std::vector<PageMapping> page_mappings;
1241  std::set<Page> touched_pages;
1242  std::set<size_t> page_sizes;
1243  for (auto [file_id, file_info] : files_) {
1244  page_sizes.emplace(file_info->pageSize);
1245  }
1246  for (auto page_size : page_sizes) {
1247  sortAndCopyFilePagesForCompaction(page_size, page_mappings, touched_pages);
1248  }
1249 
1250  writePageMappingsToStatusFile(page_mappings);
1252 
1253  updateMappedPagesVisibility(page_mappings);
1255 
1256  deleteEmptyFiles();
1257 }
void sortAndCopyFilePagesForCompaction(size_t page_size, std::vector< PageMapping > &page_mappings, std::set< Page > &touched_pages)
Definition: FileMgr.cpp:1265
static constexpr char const * UPDATE_PAGE_VISIBILITY_STATUS
Definition: FileMgr.h:369
void writePageMappingsToStatusFile(const std::vector< PageMapping > &page_mappings)
Definition: FileMgr.cpp:1474
static constexpr char const * DELETE_EMPTY_FILES_STATUS
Definition: FileMgr.h:370
static constexpr char const * COPY_PAGES_STATUS
Definition: FileMgr.h:368
std::map< int32_t, FileInfo * > files_
Definition: FileMgr.h:391
void updateMappedPagesVisibility(const std::vector< PageMapping > &page_mappings)
Definition: FileMgr.cpp:1420
#define CHECK(condition)
Definition: Logger.h:203
mapd_unique_lock< mapd_shared_mutex > write_lock
void renameCompactionStatusFile(const char *const from_status, const char *const to_status)
Definition: FileMgr.cpp:1517
mapd_shared_mutex files_rw_mutex_
Definition: FileMgr.h:403
boost::filesystem::path getFilePath(const std::string &file_name)
Definition: FileMgr.h:337

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::copyPage ( Page srcPage,
FileMgr destFileMgr,
Page destPage,
const size_t  reservedHeaderSize,
const size_t  numBytes,
const size_t  offset 
)

Definition at line 564 of file FileMgr.cpp.

References CHECK, checked_malloc(), defaultPageSize_, File_Namespace::Page::fileId, free(), getFileInfoForFileId(), File_Namespace::Page::pageNum, File_Namespace::FileInfo::read(), and File_Namespace::FileInfo::write().

Referenced by init().

569  {
570  CHECK(offset + numBytes <= defaultPageSize_);
571  FileInfo* srcFileInfo = getFileInfoForFileId(srcPage.fileId);
572  FileInfo* destFileInfo = destFileMgr->getFileInfoForFileId(destPage.fileId);
573  int8_t* buffer = reinterpret_cast<int8_t*>(checked_malloc(numBytes));
574 
575  size_t bytesRead = srcFileInfo->read(
576  srcPage.pageNum * defaultPageSize_ + offset + reservedHeaderSize, numBytes, buffer);
577  CHECK(bytesRead == numBytes);
578  size_t bytesWritten = destFileInfo->write(
579  destPage.pageNum * defaultPageSize_ + offset + reservedHeaderSize,
580  numBytes,
581  buffer);
582  CHECK(bytesWritten == numBytes);
583  ::free(buffer);
584 }
void free(AbstractBuffer *buffer) override
Definition: FileMgr.cpp:857
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:45
FileInfo * getFileInfoForFileId(const int32_t fileId)
Definition: FileMgr.h:220
size_t defaultPageSize_
number of threads used when loading data
Definition: FileMgr.h:394
#define CHECK(condition)
Definition: Logger.h:203

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int32_t File_Namespace::FileMgr::copyPageWithoutHeaderSize ( const Page source_page,
const Page destination_page 
)
protected

Copies content of source_page to destination_page without copying over the source_page header size. The header size is instead returned by the method. Not copying over the header size enables a use case where destination_page has all the content of the source_page but is still marked as a free page.

Definition at line 1390 of file FileMgr.cpp.

References CHECK, CHECK_EQ, File_Namespace::Page::fileId, File_Namespace::FileInfo::fileId, files_, File_Namespace::Page::pageNum, File_Namespace::FileInfo::pageSize, File_Namespace::FileInfo::read(), and File_Namespace::FileInfo::write().

Referenced by copySourcePageForCompaction().

1391  {
1392  FileInfo* source_file_info = files_[source_page.fileId];
1393  CHECK(source_file_info);
1394  CHECK_EQ(source_file_info->fileId, source_page.fileId);
1395 
1396  FileInfo* destination_file_info = files_[destination_page.fileId];
1397  CHECK(destination_file_info);
1398  CHECK_EQ(destination_file_info->fileId, destination_page.fileId);
1399  CHECK_EQ(source_file_info->pageSize, destination_file_info->pageSize);
1400 
1401  auto page_size = source_file_info->pageSize;
1402  auto buffer = std::make_unique<int8_t[]>(page_size);
1403  size_t bytes_read =
1404  source_file_info->read(source_page.pageNum * page_size, page_size, buffer.get());
1405  CHECK_EQ(page_size, bytes_read);
1406 
1407  auto header_size_offset = sizeof(int32_t);
1408  size_t bytes_written = destination_file_info->write(
1409  (destination_page.pageNum * page_size) + header_size_offset,
1410  page_size - header_size_offset,
1411  buffer.get() + header_size_offset);
1412  CHECK_EQ(page_size - header_size_offset, bytes_written);
1413  return reinterpret_cast<int32_t*>(buffer.get())[0];
1414 }
#define CHECK_EQ(x, y)
Definition: Logger.h:211
std::map< int32_t, FileInfo * > files_
Definition: FileMgr.h:391
#define CHECK(condition)
Definition: Logger.h:203

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::copySourcePageForCompaction ( const Page source_page,
FileInfo destination_file_info,
std::vector< PageMapping > &  page_mappings,
std::set< Page > &  touched_pages 
)
protected

Copies a used page (indicated by the top of the source_used_pages set) from the given source file to a free page in the given destination file. Source and destination pages are recorded in the given page_mappings vector after copying is done.

Definition at line 1360 of file FileMgr.cpp.

References CHECK, CHECK_NE, copyPageWithoutHeaderSize(), File_Namespace::Page::fileId, File_Namespace::FileInfo::fileId, File_Namespace::FileInfo::getFreePage(), and File_Namespace::Page::pageNum.

Referenced by sortAndCopyFilePagesForCompaction().

1363  {
1364  size_t destination_page_num = destination_file_info->getFreePage();
1365  CHECK_NE(destination_page_num, static_cast<size_t>(-1));
1366  Page destination_page{destination_file_info->fileId, destination_page_num};
1367 
1368  // Assert that the same pages are not copied or overridden multiple times
1369  CHECK(touched_pages.find(source_page) == touched_pages.end());
1370  touched_pages.emplace(source_page);
1371 
1372  CHECK(touched_pages.find(destination_page) == touched_pages.end());
1373  touched_pages.emplace(destination_page);
1374 
1375  auto header_size = copyPageWithoutHeaderSize(source_page, destination_page);
1376  page_mappings.emplace_back(static_cast<size_t>(source_page.fileId),
1377  source_page.pageNum,
1378  header_size,
1379  static_cast<size_t>(destination_page.fileId),
1380  destination_page.pageNum);
1381 }
int32_t copyPageWithoutHeaderSize(const Page &source_page, const Page &destination_page)
Definition: FileMgr.cpp:1390
#define CHECK_NE(x, y)
Definition: Logger.h:212
#define CHECK(condition)
Definition: Logger.h:203

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool File_Namespace::FileMgr::coreInit ( )
private

Determines file path, and if exists, runs file migration and opens and reads epoch file.

Returns
a boolean representing whether the directory path existed

Definition at line 126 of file FileMgr.cpp.

References EPOCH_FILENAME, logger::FATAL, fileMgrBasePath_, fileMgrKey_, files_rw_mutex_, File_Namespace::GlobalFileMgr::getBasePath(), gfm_, LOG, migrateToLatestFileMgrVersion(), openAndReadEpochFile(), and to_string().

Referenced by FileMgr(), and init().

126  {
127  mapd_unique_lock<mapd_shared_mutex> write_lock(files_rw_mutex_);
128  const std::string fileMgrDirPrefix("table");
129  const std::string FileMgrDirDelim("_");
130  fileMgrBasePath_ = (gfm_->getBasePath() + fileMgrDirPrefix + FileMgrDirDelim +
131  std::to_string(fileMgrKey_.first) + // db_id
132  FileMgrDirDelim + std::to_string(fileMgrKey_.second)); // tb_id
133  boost::filesystem::path path(fileMgrBasePath_);
134  if (boost::filesystem::exists(path)) {
135  if (!boost::filesystem::is_directory(path)) {
136  LOG(FATAL) << "Specified path '" << fileMgrBasePath_
137  << "' for table data is not a directory.";
138  }
141  return true;
142  }
143  return false;
144 }
TablePair fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:504
std::string getBasePath() const
#define LOG(tag)
Definition: Logger.h:194
void migrateToLatestFileMgrVersion()
Definition: FileMgr.cpp:1113
GlobalFileMgr * gfm_
Definition: FileMgr.h:503
std::string fileMgrBasePath_
Definition: FileMgr.h:388
std::string to_string(char const *&&v)
static constexpr char EPOCH_FILENAME[]
Definition: FileMgr.h:378
void openAndReadEpochFile(const std::string &epochFileName)
Definition: FileMgr.cpp:618
mapd_unique_lock< mapd_shared_mutex > write_lock
mapd_shared_mutex files_rw_mutex_
Definition: FileMgr.h:403

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

FileBuffer * File_Namespace::FileMgr::createBuffer ( const ChunkKey key,
size_t  pageSize = 0,
const size_t  numBytes = 0 
)
override

Creates a chunk with the specified key and page size.

Definition at line 695 of file FileMgr.cpp.

References chunkIndexMutex_, and createBufferUnlocked().

697  {
698  mapd_unique_lock<mapd_shared_mutex> chunkIndexWriteLock(chunkIndexMutex_);
699  return createBufferUnlocked(key, pageSize, numBytes);
700 }
virtual FileBuffer * createBufferUnlocked(const ChunkKey &key, size_t pageSize=0, const size_t numBytes=0)
Definition: FileMgr.cpp:705
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:402

+ Here is the call graph for this function:

FileBuffer * File_Namespace::FileMgr::createBufferUnlocked ( const ChunkKey key,
size_t  pageSize = 0,
const size_t  numBytes = 0 
)
protectedvirtual
Todo:
Make all accesses to chunkIndex_ thread-safe

Reimplemented in File_Namespace::CachingFileMgr.

Definition at line 705 of file FileMgr.cpp.

References allocateBuffer(), CHECK, chunkIndex_, defaultPageSize_, and show_chunk().

Referenced by createBuffer(), and getOrCreateBuffer().

707  {
708  size_t actualPageSize = pageSize;
709  if (actualPageSize == 0) {
710  actualPageSize = defaultPageSize_;
711  }
713  // we will do this lazily and not allocate space for the Chunk (i.e.
714  // FileBuffer yet)
715 
716  CHECK(chunkIndex_.find(key) == chunkIndex_.end())
717  << "Chunk already exists for key: " << show_chunk(key);
718 
719  chunkIndex_[key] = allocateBuffer(actualPageSize, key, numBytes);
720  return (chunkIndex_[key]);
721 }
std::string show_chunk(const ChunkKey &key)
Definition: types.h:85
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:323
size_t defaultPageSize_
number of threads used when loading data
Definition: FileMgr.h:394
virtual FileBuffer * allocateBuffer(const size_t page_size, const ChunkKey &key, const size_t num_bytes)
Definition: FileMgr.cpp:1563
#define CHECK(condition)
Definition: Logger.h:203

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::createEpochFile ( const std::string &  epochFileName)
protected

Definition at line 586 of file FileMgr.cpp.

References Epoch::byte_size(), File_Namespace::create(), epochFile_, logger::FATAL, fileMgrBasePath_, LOG, and writeAndSyncEpochToDisk().

Referenced by init(), and migrateEpochFileV0().

586  {
587  std::string epochFilePath(fileMgrBasePath_ + "/" + epochFileName);
588  if (boost::filesystem::exists(epochFilePath)) {
589  LOG(FATAL) << "Epoch file `" << epochFilePath << "` already exists";
590  }
591  epochFile_ = create(epochFilePath, sizeof(Epoch::byte_size()));
592  // Write out current epoch to file - which if this
593  // function is being called should be 0
595 }
#define LOG(tag)
Definition: Logger.h:194
std::string fileMgrBasePath_
Definition: FileMgr.h:388
FILE * create(const std::string &basePath, const int fileId, const size_t pageSize, const size_t numPages)
Definition: File.cpp:49
void writeAndSyncEpochToDisk()
Definition: FileMgr.cpp:638
static size_t byte_size()
Definition: Epoch.h:63

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

FileInfo * File_Namespace::FileMgr::createFile ( const size_t  pageSize,
const size_t  numPages 
)
protected

Adds a file to the file manager repository.

This method will create a FileInfo object for the file being added, and it will create the corresponding file on physical disk with the indicated number of pages pre-allocated.

A pointer to the FileInfo object is returned, which itself has a file pointer (FILE*) and a file identifier (int32_t fileId).

Parameters
fileNameThe name given to the file in physical storage.
pageSizeThe logical page size for the pages in the file.
numPagesThe number of logical pages to initially allocate for the file.
Returns
FileInfo* A pointer to the FileInfo object of the added file.

Definition at line 946 of file FileMgr.cpp.

References CHECK, File_Namespace::create(), f, logger::FATAL, fileIndex_, fileMgrBasePath_, files_, files_rw_mutex_, LOG, and nextFileId_.

Referenced by requestFreePage(), and requestFreePages().

946  {
947  // check arguments
948  if (pageSize == 0 || numPages == 0) {
949  LOG(FATAL) << "File creation failed: pageSize and numPages must be greater than 0.";
950  }
951 
952  // create the new file
953  FILE* f = create(fileMgrBasePath_,
954  nextFileId_,
955  pageSize,
956  numPages); // TM: not sure if I like naming scheme here - should be in
957  // separate namespace?
958  CHECK(f);
959 
960  // instantiate a new FileInfo for the newly created file
961  int32_t fileId = nextFileId_++;
962  FileInfo* fInfo =
963  new FileInfo(this, fileId, f, pageSize, numPages, true); // true means init file
964  CHECK(fInfo);
965 
966  mapd_unique_lock<mapd_shared_mutex> write_lock(files_rw_mutex_);
967  // update file manager data structures
968  files_[fileId] = fInfo;
969  fileIndex_.insert(std::pair<size_t, int32_t>(pageSize, fileId));
970 
971  return fInfo;
972 }
#define LOG(tag)
Definition: Logger.h:194
std::string fileMgrBasePath_
Definition: FileMgr.h:388
FILE * create(const std::string &basePath, const int fileId, const size_t pageSize, const size_t numPages)
Definition: File.cpp:49
PageSizeFileMMap fileIndex_
A map of files accessible via a file identifier.
Definition: FileMgr.h:392
std::map< int32_t, FileInfo * > files_
Definition: FileMgr.h:391
#define CHECK(condition)
Definition: Logger.h:203
char * f
mapd_unique_lock< mapd_shared_mutex > write_lock
mapd_shared_mutex files_rw_mutex_
Definition: FileMgr.h:403

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::createTopLevelMetadata ( )

Definition at line 1040 of file FileMgr.cpp.

References DB_META_FILENAME, db_version_, logger::FATAL, getDBVersion(), INVALID_VERSION, LOG, readVersionFromDisk(), and writeAndSyncVersionToDisk().

1040  {
1042 
1043  if (db_version_ > getDBVersion()) {
1044  LOG(FATAL) << "DB forward compatibility is not supported. Version of OmniSci "
1045  "software used is older than the version of DB being read: "
1046  << db_version_;
1047  }
1049  // new system, or we are moving forward versions
1050  // system wide migration would go here if required
1052  return;
1053  }
1054 }
int32_t readVersionFromDisk(const std::string &versionFileName) const
Definition: FileMgr.cpp:1056
#define LOG(tag)
Definition: Logger.h:194
int32_t db_version_
the index of the next file id
Definition: FileMgr.h:396
void writeAndSyncVersionToDisk(const std::string &versionFileName, const int32_t version)
Definition: FileMgr.cpp:1074
int32_t getDBVersion() const
Index for looking up chunks.
Definition: FileMgr.cpp:1032
static constexpr char DB_META_FILENAME[]
Definition: FileMgr.h:379
static constexpr int32_t INVALID_VERSION
Definition: FileMgr.h:381

+ Here is the call graph for this function:

void File_Namespace::FileMgr::deleteBuffer ( const ChunkKey key,
const bool  purge = true 
)
override

Deletes the chunk with the specified key.

Definition at line 728 of file FileMgr.cpp.

References CHECK, chunkIndex_, chunkIndexMutex_, deleteBufferUnlocked(), and show_chunk().

728  {
729  mapd_unique_lock<mapd_shared_mutex> chunkIndexWriteLock(chunkIndexMutex_);
730  auto chunk_it = chunkIndex_.find(key);
731  CHECK(chunk_it != chunkIndex_.end())
732  << "Chunk does not exist for key: " << show_chunk(key);
733  deleteBufferUnlocked(chunk_it, purge);
734 }
void deleteBufferUnlocked(const ChunkKeyToChunkMap::iterator chunk_it, const bool purge=true)
Definition: FileMgr.cpp:736
std::string show_chunk(const ChunkKey &key)
Definition: types.h:85
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:323
#define CHECK(condition)
Definition: Logger.h:203
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:402

+ Here is the call graph for this function:

void File_Namespace::FileMgr::deleteBuffersWithPrefix ( const ChunkKey keyPrefix,
const bool  purge = true 
)
override

Definition at line 746 of file FileMgr.cpp.

References chunkIndex_, and chunkIndexMutex_.

746  {
747  mapd_unique_lock<mapd_shared_mutex> chunkIndexWriteLock(chunkIndexMutex_);
748  auto chunkIt = chunkIndex_.lower_bound(keyPrefix);
749  if (chunkIt == chunkIndex_.end()) {
750  return; // should we throw?
751  }
752  while (chunkIt != chunkIndex_.end() &&
753  std::search(chunkIt->first.begin(),
754  chunkIt->first.begin() + keyPrefix.size(),
755  keyPrefix.begin(),
756  keyPrefix.end()) != chunkIt->first.begin() + keyPrefix.size()) {
757  if (purge) {
758  chunkIt->second->freePages();
759  }
760  //@todo need a way to represent delete in non purge case
761  delete chunkIt->second;
762  chunkIndex_.erase(chunkIt++);
763  }
764 }
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:323
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:402
void File_Namespace::FileMgr::deleteBufferUnlocked ( const ChunkKeyToChunkMap::iterator  chunk_it,
const bool  purge = true 
)
protected

Definition at line 736 of file FileMgr.cpp.

References chunkIndex_.

Referenced by deleteBuffer().

737  {
738  if (purge) {
739  chunk_it->second->freePages();
740  }
741  //@todo need a way to represent delete in non purge case
742  delete chunk_it->second;
743  chunkIndex_.erase(chunk_it);
744 }
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:323

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::deleteEmptyFiles ( )
protected

Deletes files that contain only free pages. Also deletes the compaction status file.

Definition at line 1453 of file FileMgr.cpp.

References CHECK, CHECK_EQ, DELETE_EMPTY_FILES_STATUS, fileMgrBasePath_, files_, File_Namespace::get_data_file_path(), and getFilePath().

Referenced by compactFiles(), and resumeFileCompaction().

1453  {
1454  for (auto [file_id, file_info] : files_) {
1455  CHECK_EQ(file_id, file_info->fileId);
1456  if (file_info->freePages.size() == file_info->numPages) {
1457  fclose(file_info->f);
1458  file_info->f = nullptr;
1459  auto file_path = get_data_file_path(fileMgrBasePath_, file_id, file_info->pageSize);
1460  boost::filesystem::remove(file_path);
1461  }
1462  }
1463 
1464  auto status_file_path = getFilePath(DELETE_EMPTY_FILES_STATUS);
1465  CHECK(boost::filesystem::exists(status_file_path));
1466  boost::filesystem::remove(status_file_path);
1467 }
#define CHECK_EQ(x, y)
Definition: Logger.h:211
std::string fileMgrBasePath_
Definition: FileMgr.h:388
static constexpr char const * DELETE_EMPTY_FILES_STATUS
Definition: FileMgr.h:370
std::map< int32_t, FileInfo * > files_
Definition: FileMgr.h:391
#define CHECK(condition)
Definition: Logger.h:203
std::string get_data_file_path(const std::string &base_path, int file_id, size_t page_size)
Definition: File.cpp:42
boost::filesystem::path getFilePath(const std::string &file_name)
Definition: FileMgr.h:337

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::string File_Namespace::FileMgr::describeSelf ( )
protectedvirtual

Reimplemented in File_Namespace::CachingFileMgr.

Definition at line 679 of file FileMgr.cpp.

References fileMgrKey_.

Referenced by checkpoint(), and setEpoch().

679  {
680  stringstream ss;
681  ss << "table (" << fileMgrKey_.first << ", " << fileMgrKey_.second << ")";
682  return ss.str();
683 }
TablePair fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:504

+ Here is the caller graph for this function:

virtual int32_t File_Namespace::FileMgr::epoch ( int32_t  db_id,
int32_t  tb_id 
) const
inlinevirtual

Returns current value of epoch - should be one greater than recorded at last checkpoint. Because FileMgr only contains buffers from one table we can just return the FileMgr's epoch instead of finding a table-specific epoch.

Reimplemented in File_Namespace::CachingFileMgr.

Definition at line 271 of file FileMgr.h.

References epoch().

Referenced by epoch(), File_Namespace::FileBuffer::getFileMgrEpoch(), and File_Namespace::FileInfo::openExistingFile().

271 { return epoch(); }
int32_t epoch() const
Definition: FileMgr.h:500

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int32_t File_Namespace::FileMgr::epoch ( ) const
inlineprivate

Definition at line 500 of file FileMgr.h.

Referenced by checkpoint(), init(), openAndReadLegacyEpochFile(), updatePageIfDeleted(), and writeDirtyBuffers().

500 { return static_cast<int32_t>(epoch_.ceiling()); }
int32_t ceiling() const
Definition: Epoch.h:44

+ Here is the caller graph for this function:

int32_t File_Namespace::FileMgr::epochFloor ( ) const
inline

Definition at line 273 of file FileMgr.h.

Referenced by getStorageStats().

273 { return static_cast<int32_t>(epoch_.floor()); }
int32_t floor() const
Definition: Epoch.h:43

+ Here is the caller graph for this function:

virtual bool File_Namespace::FileMgr::failOnReadError ( ) const
inlinevirtual

True if a read error should cause a fatal error.

Reimplemented in File_Namespace::CachingFileMgr.

Definition at line 362 of file FileMgr.h.

Referenced by File_Namespace::FileBuffer::FileBuffer().

362 { return true; }

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::fetchBuffer ( const ChunkKey key,
AbstractBuffer destBuffer,
const size_t  numBytes 
)
override

Definition at line 774 of file FileMgr.cpp.

References chunkIndex_, chunkIndexMutex_, Data_Namespace::AbstractBuffer::copyTo(), logger::FATAL, Data_Namespace::AbstractBuffer::isDirty(), LOG, show_chunk(), and Data_Namespace::AbstractBuffer::size().

776  {
777  // reads chunk specified by ChunkKey into AbstractBuffer provided by
778  // destBuffer
779  if (destBuffer->isDirty()) {
780  LOG(FATAL)
781  << "Aborting attempt to fetch a chunk marked dirty. Chunk inconsistency for key: "
782  << show_chunk(key);
783  }
784  mapd_shared_lock<mapd_shared_mutex> chunkIndexReadLock(chunkIndexMutex_);
785  auto chunkIt = chunkIndex_.find(key);
786  if (chunkIt == chunkIndex_.end()) {
787  LOG(FATAL) << "Chunk does not exist for key: " << show_chunk(key);
788  }
789  chunkIndexReadLock.unlock();
790 
791  AbstractBuffer* chunk = chunkIt->second;
792  // chunk's size is either specified in function call with numBytes or we
793  // just look at pageSize * numPages in FileBuffer
794  if (numBytes > 0 && numBytes > chunk->size()) {
795  LOG(FATAL) << "Chunk retrieved for key `" << show_chunk(key) << "` is smaller ("
796  << chunk->size() << ") than number of bytes requested (" << numBytes
797  << ")";
798  }
799 
800  chunk->copyTo(destBuffer, numBytes);
801 }
#define LOG(tag)
Definition: Logger.h:194
std::string show_chunk(const ChunkKey &key)
Definition: types.h:85
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:323
An AbstractBuffer is a unit of data management for a data manager.
void copyTo(AbstractBuffer *destination_buffer, const size_t num_bytes=0)
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:402

+ Here is the call graph for this function:

void File_Namespace::FileMgr::free ( AbstractBuffer buffer)
override

Definition at line 857 of file FileMgr.cpp.

References logger::FATAL, and LOG.

Referenced by copyPage().

857  {
858  LOG(FATAL) << "Operation not supported";
859 }
#define LOG(tag)
Definition: Logger.h:194

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::free_page ( std::pair< FileInfo *, int32_t > &&  page)

Definition at line 1164 of file FileMgr.cpp.

References free_pages_, and mutex_free_page_.

Referenced by File_Namespace::FileInfo::freePage(), and freePages().

1164  {
1165  std::unique_lock<mapd_shared_mutex> lock(mutex_free_page_);
1166  free_pages_.push_back(page);
1167 }
mapd_shared_mutex mutex_free_page_
Definition: FileMgr.h:405
std::vector< std::pair< FileInfo *, int32_t > > free_pages_
Definition: FileMgr.h:406

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::freePages ( )
protected

Definition at line 1555 of file FileMgr.cpp.

References free_page(), free_pages_, and mutex_free_page_.

Referenced by checkpoint(), File_Namespace::CachingFileMgr::init(), init(), and sortAndCopyFilePagesForCompaction().

1555  {
1556  mapd_unique_lock<mapd_shared_mutex> free_pages_write_lock(mutex_free_page_);
1557  for (auto& free_page : free_pages_) {
1558  free_page.first->freePageDeferred(free_page.second);
1559  }
1560  free_pages_.clear();
1561 }
mapd_shared_mutex mutex_free_page_
Definition: FileMgr.h:405
std::vector< std::pair< FileInfo *, int32_t > > free_pages_
Definition: FileMgr.h:406
void free_page(std::pair< FileInfo *, int32_t > &&page)
Definition: FileMgr.cpp:1164

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::freePagesBeforeEpoch ( const int32_t  min_epoch)
private

Definition at line 652 of file FileMgr.cpp.

References chunkIndex_, chunkIndexMutex_, and freePagesBeforeEpochUnlocked().

Referenced by rollOffOldData().

652  {
653  mapd_shared_lock<mapd_shared_mutex> chunk_index_read_lock(chunkIndexMutex_);
654  freePagesBeforeEpochUnlocked(min_epoch, chunkIndex_.begin(), chunkIndex_.end());
655 }
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:323
void freePagesBeforeEpochUnlocked(const int32_t min_epoch, const ChunkKeyToChunkMap::iterator lower_bound, const ChunkKeyToChunkMap::iterator upper_bound)
Definition: FileMgr.cpp:657
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:402

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::freePagesBeforeEpochUnlocked ( const int32_t  min_epoch,
const ChunkKeyToChunkMap::iterator  lower_bound,
const ChunkKeyToChunkMap::iterator  upper_bound 
)
private

Definition at line 657 of file FileMgr.cpp.

References gpu_enabled::upper_bound().

Referenced by freePagesBeforeEpoch().

660  {
661  for (auto chunkIt = lower_bound; chunkIt != upper_bound; ++chunkIt) {
662  chunkIt->second->freePagesBeforeEpoch(min_epoch);
663  }
664 }
DEVICE auto upper_bound(ARGS &&...args)
Definition: gpu_enabled.h:123
DEVICE auto lower_bound(ARGS &&...args)
Definition: gpu_enabled.h:78

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const TablePair File_Namespace::FileMgr::get_fileMgrKey ( ) const
inline

Definition at line 335 of file FileMgr.h.

Referenced by updatePageIfDeleted().

335 { return fileMgrKey_; }
TablePair fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:504

+ Here is the caller graph for this function:

size_t File_Namespace::FileMgr::getAllocated ( )
inlineoverride

Definition at line 217 of file FileMgr.h.

217 { return 0; }
FileBuffer * File_Namespace::FileMgr::getBuffer ( const ChunkKey key,
const size_t  numBytes = 0 
)
override

Returns the a pointer to the chunk with the specified key.

Definition at line 766 of file FileMgr.cpp.

References CHECK, chunkIndex_, chunkIndexMutex_, and show_chunk().

766  {
767  mapd_shared_lock<mapd_shared_mutex> chunkIndexReadLock(chunkIndexMutex_);
768  auto chunkIt = chunkIndex_.find(key);
769  CHECK(chunkIt != chunkIndex_.end())
770  << "Chunk does not exist for key: " << show_chunk(key);
771  return chunkIt->second;
772 }
std::string show_chunk(const ChunkKey &key)
Definition: types.h:85
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:323
#define CHECK(condition)
Definition: Logger.h:203
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:402

+ Here is the call graph for this function:

void File_Namespace::FileMgr::getChunkMetadataVecForKeyPrefix ( ChunkMetadataVector chunkMetadataVec,
const ChunkKey keyPrefix 
)
override

Definition at line 980 of file FileMgr.cpp.

References chunkIndex_, and chunkIndexMutex_.

981  {
982  mapd_unique_lock<mapd_shared_mutex> chunkIndexWriteLock(
983  chunkIndexMutex_); // is this guarding the right structure? it look slike we oly
984  // read here for chunk
985  auto chunkIt = chunkIndex_.lower_bound(keyPrefix);
986  if (chunkIt == chunkIndex_.end()) {
987  return; // throw?
988  }
989  while (chunkIt != chunkIndex_.end() &&
990  std::search(chunkIt->first.begin(),
991  chunkIt->first.begin() + keyPrefix.size(),
992  keyPrefix.begin(),
993  keyPrefix.end()) != chunkIt->first.begin() + keyPrefix.size()) {
994  if (chunkIt->second->hasEncoder()) {
995  auto chunk_metadata = std::make_shared<ChunkMetadata>();
996  chunkIt->second->encoder_->getMetadata(chunk_metadata);
997  chunkMetadataVec.emplace_back(chunkIt->first, chunk_metadata);
998  }
999  chunkIt++;
1000  }
1001 }
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:323
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:402
bool File_Namespace::FileMgr::getDBConvert ( ) const

Definition at line 1036 of file FileMgr.cpp.

References File_Namespace::GlobalFileMgr::getDBConvert(), and gfm_.

1036  {
1037  return gfm_->getDBConvert();
1038 }
GlobalFileMgr * gfm_
Definition: FileMgr.h:503

+ Here is the call graph for this function:

int32_t File_Namespace::FileMgr::getDBVersion ( ) const

Index for looking up chunks.

Definition at line 1032 of file FileMgr.cpp.

References File_Namespace::GlobalFileMgr::getDBVersion(), and gfm_.

Referenced by createTopLevelMetadata().

1032  {
1033  return gfm_->getDBVersion();
1034 }
GlobalFileMgr * gfm_
Definition: FileMgr.h:503

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

FILE * File_Namespace::FileMgr::getFileForFileId ( const int32_t  fileId)

Returns FILE pointer associated with requested fileId.

See Also
FileBuffer

Definition at line 974 of file FileMgr.cpp.

References CHECK, and files_.

Referenced by File_Namespace::FileBuffer::readMetadata(), and File_Namespace::FileBuffer::writeMetadata().

974  {
975  CHECK(fileId >= 0);
976  CHECK(files_.find(fileId) != files_.end());
977  return files_[fileId]->f;
978 }
std::map< int32_t, FileInfo * > files_
Definition: FileMgr.h:391
#define CHECK(condition)
Definition: Logger.h:203

+ Here is the caller graph for this function:

FileInfo* File_Namespace::FileMgr::getFileInfoForFileId ( const int32_t  fileId)
inline

Definition at line 220 of file FileMgr.h.

Referenced by File_Namespace::FileBuffer::append(), File_Namespace::FileBuffer::copyPage(), copyPage(), File_Namespace::FileBuffer::freePage(), File_Namespace::readForThread(), File_Namespace::FileBuffer::write(), and File_Namespace::FileBuffer::writeHeader().

220 { return files_[fileId]; }
std::map< int32_t, FileInfo * > files_
Definition: FileMgr.h:391

+ Here is the caller graph for this function:

std::string File_Namespace::FileMgr::getFileMgrBasePath ( ) const
inline

Definition at line 328 of file FileMgr.h.

Referenced by closeRemovePhysical().

328 { return fileMgrBasePath_; }
std::string fileMgrBasePath_
Definition: FileMgr.h:388

+ Here is the caller graph for this function:

boost::filesystem::path File_Namespace::FileMgr::getFilePath ( const std::string &  file_name)
inline

Definition at line 337 of file FileMgr.h.

Referenced by compactFiles(), deleteEmptyFiles(), readPageMappingsFromStatusFile(), renameCompactionStatusFile(), resumeFileCompaction(), and writePageMappingsToStatusFile().

337  {
338  return boost::filesystem::path(fileMgrBasePath_) / file_name;
339  }
std::string fileMgrBasePath_
Definition: FileMgr.h:388

+ Here is the caller graph for this function:

size_t File_Namespace::FileMgr::getInUseSize ( )
inlineoverride

Definition at line 216 of file FileMgr.h.

216 { return 0; }
size_t File_Namespace::FileMgr::getMaxSize ( )
inlineoverride

Definition at line 215 of file FileMgr.h.

215 { return 0; }
FileMetadata File_Namespace::FileMgr::getMetadataForFile ( const boost::filesystem::directory_iterator &  fileIterator)

Definition at line 146 of file FileMgr.cpp.

References CHECK_EQ, logger::FATAL, File_Namespace::FileMetadata::file_id, File_Namespace::FileMetadata::file_path, omnisci::file_size(), File_Namespace::FileMetadata::file_size, File_Namespace::FileMetadata::is_data_file, LOG, MAPD_FILE_EXT, File_Namespace::FileMetadata::num_pages, and File_Namespace::FileMetadata::page_size.

Referenced by getStorageStats(), init(), and openFiles().

147  {
148  FileMetadata fileMetadata;
149  fileMetadata.is_data_file = false;
150  fileMetadata.file_path = fileIterator->path().string();
151  if (!boost::filesystem::is_regular_file(fileIterator->status())) {
152  return fileMetadata;
153  }
154  // note that boost::filesystem leaves preceding dot on
155  // extension - hence MAPD_FILE_EXT is ".mapd"
156  std::string extension(fileIterator->path().extension().string());
157  if (extension == MAPD_FILE_EXT) {
158  std::string fileStem(fileIterator->path().stem().string());
159  // remove trailing dot if any
160  if (fileStem.size() > 0 && fileStem.back() == '.') {
161  fileStem = fileStem.substr(0, fileStem.size() - 1);
162  }
163  size_t dotPos = fileStem.find_last_of("."); // should only be one
164  if (dotPos == std::string::npos) {
165  LOG(FATAL) << "File `" << fileIterator->path()
166  << "` does not carry page size information in the filename.";
167  }
168  fileMetadata.is_data_file = true;
169  fileMetadata.file_id = boost::lexical_cast<int>(fileStem.substr(0, dotPos));
170  fileMetadata.page_size =
171  boost::lexical_cast<size_t>(fileStem.substr(dotPos + 1, fileStem.size()));
172 
173  fileMetadata.file_size = boost::filesystem::file_size(fileMetadata.file_path);
174  CHECK_EQ(fileMetadata.file_size % fileMetadata.page_size,
175  size_t(0)); // should be no partial pages
176  fileMetadata.num_pages = fileMetadata.file_size / fileMetadata.page_size;
177  }
178  return fileMetadata;
179 }
#define CHECK_EQ(x, y)
Definition: Logger.h:211
#define LOG(tag)
Definition: Logger.h:194
#define MAPD_FILE_EXT
Definition: File.h:25
size_t file_size(const int fd)
Definition: omnisci_fs.cpp:31

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

MgrType File_Namespace::FileMgr::getMgrType ( )
inlineoverride

Definition at line 211 of file FileMgr.h.

211 { return FILE_MGR; };
size_t File_Namespace::FileMgr::getNumChunks ( )
inlineoverride

Definition at line 315 of file FileMgr.h.

315  {
316  mapd_shared_lock<mapd_shared_mutex> read_lock(chunkIndexMutex_);
317  return chunkIndex_.size();
318  }
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:323
mapd_shared_lock< mapd_shared_mutex > read_lock
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:402
size_t File_Namespace::FileMgr::getNumReaderThreads ( )
inline

Returns number of threads defined by parameter num-reader-threads which should be used during initial load and consequent read of data.

Definition at line 304 of file FileMgr.h.

Referenced by File_Namespace::FileBuffer::read().

304 { return num_reader_threads_; }
size_t num_reader_threads_
Maps page sizes to FileInfo objects.
Definition: FileMgr.h:393

+ Here is the caller graph for this function:

size_t File_Namespace::FileMgr::getNumUsedMetadataPages ( ) const

Definition at line 1013 of file FileMgr.cpp.

References chunkIndex_, and chunkIndexMutex_.

1013  {
1014  size_t num_used_metadata_pages = 0;
1015  mapd_shared_lock<mapd_shared_mutex> read_lock(chunkIndexMutex_);
1016  for (const auto& chunkIt : chunkIndex_) {
1017  num_used_metadata_pages += chunkIt.second->numMetadataPages();
1018  }
1019  return num_used_metadata_pages;
1020 }
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:323
mapd_shared_lock< mapd_shared_mutex > read_lock
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:402
size_t File_Namespace::FileMgr::getNumUsedMetadataPagesForChunkKey ( const ChunkKey chunkKey) const

Definition at line 1022 of file FileMgr.cpp.

References chunkIndex_, and chunkIndexMutex_.

1022  {
1023  mapd_shared_lock<mapd_shared_mutex> read_lock(chunkIndexMutex_);
1024  const auto& chunkIt = chunkIndex_.find(chunkKey);
1025  if (chunkIt != chunkIndex_.end()) {
1026  return chunkIt->second->numMetadataPages();
1027  } else {
1028  throw std::runtime_error("Chunk was not found.");
1029  }
1030 }
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:323
mapd_shared_lock< mapd_shared_mutex > read_lock
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:402
size_t File_Namespace::FileMgr::getNumUsedPages ( ) const

Definition at line 1003 of file FileMgr.cpp.

References files_, and files_rw_mutex_.

1003  {
1004  size_t num_used_pages = 0;
1005  mapd_shared_lock<mapd_shared_mutex> read_lock(files_rw_mutex_);
1006  for (const auto file_info_entry : files_) {
1007  num_used_pages +=
1008  (file_info_entry.second->numPages - file_info_entry.second->freePages.size());
1009  }
1010  return num_used_pages;
1011 }
mapd_shared_lock< mapd_shared_mutex > read_lock
std::map< int32_t, FileInfo * > files_
Definition: FileMgr.h:391
mapd_shared_mutex files_rw_mutex_
Definition: FileMgr.h:403
FileBuffer * File_Namespace::FileMgr::getOrCreateBuffer ( const ChunkKey key)
private

Definition at line 1594 of file FileMgr.cpp.

References chunkIndex_, chunkIndexMutex_, createBufferUnlocked(), and defaultPageSize_.

Referenced by putBuffer().

1594  {
1595  FileBuffer* buf;
1596  mapd_unique_lock<mapd_shared_mutex> chunkIndexWriteLock(chunkIndexMutex_);
1597  auto chunkIt = chunkIndex_.find(key);
1598  if (chunkIt == chunkIndex_.end()) {
1600  } else {
1601  buf = chunkIt->second;
1602  }
1603  return buf;
1604 }
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:323
size_t defaultPageSize_
number of threads used when loading data
Definition: FileMgr.h:394
virtual FileBuffer * createBufferUnlocked(const ChunkKey &key, size_t pageSize=0, const size_t numBytes=0)
Definition: FileMgr.cpp:705
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:402

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

StorageStats File_Namespace::FileMgr::getStorageStats ( )

Definition at line 332 of file FileMgr.cpp.

References CHECK, File_Namespace::StorageStats::data_file_count, File_Namespace::StorageStats::epoch, File_Namespace::StorageStats::epoch_floor, epochFloor(), logger::FATAL, File_Namespace::FileMetadata::file_size, fileMgrBasePath_, files_, files_rw_mutex_, getMetadataForFile(), File_Namespace::FileMetadata::is_data_file, File_Namespace::anonymous_namespace{FileMgr.cpp}::is_metadata_file(), isFullyInitted_, lastCheckpointedEpoch(), LOG, File_Namespace::StorageStats::metadata_file_count, File_Namespace::FileMetadata::num_pages, num_pages_per_metadata_file_, File_Namespace::FileMetadata::page_size, File_Namespace::StorageStats::total_data_file_size, File_Namespace::StorageStats::total_data_page_count, File_Namespace::StorageStats::total_free_data_page_count, File_Namespace::StorageStats::total_free_metadata_page_count, File_Namespace::StorageStats::total_metadata_file_size, and File_Namespace::StorageStats::total_metadata_page_count.

332  {
333  StorageStats storage_stats;
334  mapd_shared_lock<mapd_shared_mutex> read_lock(files_rw_mutex_);
335  if (!isFullyInitted_) {
336  CHECK(!fileMgrBasePath_.empty());
337  boost::filesystem::path path(fileMgrBasePath_);
338  if (boost::filesystem::exists(path)) {
339  if (!boost::filesystem::is_directory(path)) {
340  LOG(FATAL) << "getStorageStats: Specified path '" << fileMgrBasePath_
341  << "' for table data is not a directory.";
342  }
343 
344  storage_stats.epoch = lastCheckpointedEpoch();
345  storage_stats.epoch_floor = epochFloor();
346  boost::filesystem::directory_iterator
347  endItr; // default construction yields past-the-end
348  for (boost::filesystem::directory_iterator fileIt(path); fileIt != endItr;
349  ++fileIt) {
350  FileMetadata file_metadata = getMetadataForFile(fileIt);
351  if (file_metadata.is_data_file) {
352  if (is_metadata_file(file_metadata.file_size,
353  file_metadata.page_size,
355  storage_stats.metadata_file_count++;
356  storage_stats.total_metadata_file_size += file_metadata.file_size;
357  storage_stats.total_metadata_page_count += file_metadata.num_pages;
358  } else {
359  storage_stats.data_file_count++;
360  storage_stats.total_data_file_size += file_metadata.file_size;
361  storage_stats.total_data_page_count += file_metadata.num_pages;
362  }
363  }
364  }
365  }
366  } else {
367  storage_stats.epoch = lastCheckpointedEpoch();
368  storage_stats.epoch_floor = epochFloor();
369  storage_stats.total_free_metadata_page_count = 0;
370  storage_stats.total_free_data_page_count = 0;
371 
372  // We already initialized this table so take the faster path of walking through the
373  // FileInfo objects and getting metadata from there
374  for (const auto& file_info_entry : files_) {
375  const auto file_info = file_info_entry.second;
376  if (is_metadata_file(
377  file_info->size(), file_info->pageSize, num_pages_per_metadata_file_)) {
378  storage_stats.metadata_file_count++;
379  storage_stats.total_metadata_file_size +=
380  file_info->pageSize * file_info->numPages;
381  storage_stats.total_metadata_page_count += file_info->numPages;
382  storage_stats.total_free_metadata_page_count.value() +=
383  file_info->freePages.size();
384  } else {
385  storage_stats.data_file_count++;
386  storage_stats.total_data_file_size += file_info->pageSize * file_info->numPages;
387  storage_stats.total_data_page_count += file_info->numPages;
388  storage_stats.total_free_data_page_count.value() += file_info->freePages.size();
389  }
390  }
391  }
392  return storage_stats;
393 }
FileMetadata getMetadataForFile(const boost::filesystem::directory_iterator &fileIterator)
Definition: FileMgr.cpp:146
#define LOG(tag)
Definition: Logger.h:194
std::string fileMgrBasePath_
Definition: FileMgr.h:388
static size_t num_pages_per_metadata_file_
Definition: FileMgr.h:410
bool is_metadata_file(size_t file_size, size_t page_size, size_t num_pages_per_metadata_file)
Definition: FileMgr.cpp:324
mapd_shared_lock< mapd_shared_mutex > read_lock
std::map< int32_t, FileInfo * > files_
Definition: FileMgr.h:391
#define CHECK(condition)
Definition: Logger.h:203
int32_t epochFloor() const
Definition: FileMgr.h:273
mapd_shared_mutex files_rw_mutex_
Definition: FileMgr.h:403
int32_t lastCheckpointedEpoch()
Returns value of epoch at last checkpoint.
Definition: FileMgr.h:291

+ Here is the call graph for this function:

std::string File_Namespace::FileMgr::getStringMgrType ( )
inlineoverride

Definition at line 212 of file FileMgr.h.

212 { return ToString(FILE_MGR); }
virtual bool File_Namespace::FileMgr::hasFileMgrKey ( ) const
inlinevirtual

Reimplemented in File_Namespace::CachingFileMgr.

Definition at line 334 of file FileMgr.h.

334 { return true; }
int32_t File_Namespace::FileMgr::incrementEpoch ( )
inline

Definition at line 275 of file FileMgr.h.

References logger::FATAL, LOG, and Epoch::max_allowable_epoch().

Referenced by checkpoint(), and init().

275  {
276  int32_t newEpoch = epoch_.increment();
277  epochIsCheckpointed_ = false;
278  // We test for error here instead of in Epoch::increment so we can log FileMgr
279  // metadata
280  if (newEpoch > Epoch::max_allowable_epoch()) {
281  LOG(FATAL) << "Epoch for table (" << fileMgrKey_.first << ", " << fileMgrKey_.second
282  << ") greater than maximum allowed value of "
283  << Epoch::max_allowable_epoch() << ".";
284  }
285  return newEpoch;
286  }
TablePair fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:504
#define LOG(tag)
Definition: Logger.h:194
static int64_t max_allowable_epoch()
Definition: Epoch.h:69
int32_t increment()
Definition: Epoch.h:54

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::init ( const size_t  num_reader_threads,
const int32_t  epochOverride 
)

Definition at line 249 of file FileMgr.cpp.

References Epoch::ceiling(), CHECK, chunkIndex_, clearFileInfos(), coreInit(), createEpochFile(), epoch(), epoch_, EPOCH_FILENAME, logger::FATAL, FILE_MGR_VERSION_FILENAME, fileMgrBasePath_, fileMgrVersion_, Epoch::floor(), freePages(), incrementEpoch(), initializeNumThreads(), isFullyInitted_, latestFileMgrVersion_, LOG, nextFileId_, openFiles(), resumeFileCompaction(), rollOffOldData(), setEpoch(), gpu_enabled::sort(), VLOG, writeAndSyncEpochToDisk(), and writeAndSyncVersionToDisk().

Referenced by FileMgr().

249  {
250  // if epochCeiling = -1 this means open from epoch file
251 
252  const bool dataExists = coreInit();
253  if (dataExists) {
254  if (epochOverride != -1) { // if opening at specified epoch
255  setEpoch(epochOverride);
256  }
257 
258  auto open_files_result = openFiles();
259  if (!open_files_result.compaction_status_file_name.empty()) {
260  resumeFileCompaction(open_files_result.compaction_status_file_name);
261  clearFileInfos();
262  open_files_result = openFiles();
263  CHECK(open_files_result.compaction_status_file_name.empty());
264  }
265 
266  /* Sort headerVec so that all HeaderInfos
267  * from a chunk will be grouped together
268  * and in order of increasing PageId
269  * - Version Epoch */
270  auto& header_vec = open_files_result.header_infos;
271  std::sort(header_vec.begin(), header_vec.end());
272 
273  /* Goal of next section is to find sequences in the
274  * sorted headerVec of the same ChunkId, which we
275  * can then initiate a FileBuffer with */
276 
277  VLOG(3) << "Number of Headers in Vector: " << header_vec.size();
278  if (header_vec.size() > 0) {
279  ChunkKey lastChunkKey = header_vec.begin()->chunkKey;
280  auto startIt = header_vec.begin();
281 
282  for (auto headerIt = header_vec.begin() + 1; headerIt != header_vec.end();
283  ++headerIt) {
284  if (headerIt->chunkKey != lastChunkKey) {
285  chunkIndex_[lastChunkKey] =
286  new FileBuffer(this, /*pageSize,*/ lastChunkKey, startIt, headerIt);
287  lastChunkKey = headerIt->chunkKey;
288  startIt = headerIt;
289  }
290  }
291  // now need to insert last Chunk
292  chunkIndex_[lastChunkKey] =
293  new FileBuffer(this, /*pageSize,*/ lastChunkKey, startIt, header_vec.end());
294  }
295  nextFileId_ = open_files_result.max_file_id + 1;
296  rollOffOldData(epoch(), true /* only checkpoint if data is rolled off */);
297  incrementEpoch();
298  freePages();
299  } else {
300  boost::filesystem::path path(fileMgrBasePath_);
301  if (!boost::filesystem::create_directory(path)) {
302  LOG(FATAL) << "Could not create data directory: " << path;
303  }
305  if (epochOverride != -1) {
306  epoch_.floor(epochOverride);
307  epoch_.ceiling(epochOverride);
308  } else {
309  // These are default constructor values for epoch_, but resetting here for clarity
310  epoch_.floor(0);
311  epoch_.ceiling(0);
312  }
316  incrementEpoch();
317  }
318 
319  initializeNumThreads(num_reader_threads);
320  isFullyInitted_ = true;
321 }
std::vector< int > ChunkKey
Definition: types.h:37
OpenFilesResult openFiles()
Definition: FileMgr.cpp:189
void createEpochFile(const std::string &epochFileName)
Definition: FileMgr.cpp:586
#define LOG(tag)
Definition: Logger.h:194
void rollOffOldData(const int32_t epochCeiling, const bool shouldCheckpoint)
Definition: FileMgr.cpp:666
DEVICE void sort(ARGS &&...args)
Definition: gpu_enabled.h:105
int32_t floor() const
Definition: Epoch.h:43
int32_t ceiling() const
Definition: Epoch.h:44
std::string fileMgrBasePath_
Definition: FileMgr.h:388
const int32_t latestFileMgrVersion_
Definition: FileMgr.h:399
void writeAndSyncVersionToDisk(const std::string &versionFileName, const int32_t version)
Definition: FileMgr.cpp:1074
int32_t incrementEpoch()
Definition: FileMgr.h:275
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:323
static constexpr char FILE_MGR_VERSION_FILENAME[]
Definition: FileMgr.h:380
void writeAndSyncEpochToDisk()
Definition: FileMgr.cpp:638
static constexpr char EPOCH_FILENAME[]
Definition: FileMgr.h:378
int32_t epoch() const
Definition: FileMgr.h:500
#define CHECK(condition)
Definition: Logger.h:203
void setEpoch(const int32_t newEpoch)
Definition: FileMgr.cpp:1153
bool coreInit()
Determines file path, and if exists, runs file migration and opens and reads epoch file...
Definition: FileMgr.cpp:126
void initializeNumThreads(size_t num_reader_threads=0)
Definition: FileMgr.cpp:1544
#define VLOG(n)
Definition: Logger.h:297
void resumeFileCompaction(const std::string &status_file_name)
Definition: FileMgr.cpp:1178

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::init ( const std::string &  dataPathToConvertFrom,
const int32_t  epochOverride 
)

Definition at line 409 of file FileMgr.cpp.

References CHECK, chunkIndex_, copyPage(), EPOCH_FILENAME, logger::FATAL, File_Namespace::FileMetadata::file_id, File_Namespace::FileMetadata::file_path, File_Namespace::GlobalFileMgr::getFileMgr(), getMetadataForFile(), File_Namespace::FileBuffer::getMultiPage(), gfm_, File_Namespace::FileMetadata::is_data_file, isFullyInitted_, LOG, File_Namespace::FileBuffer::multiPages_, nextFileId_, File_Namespace::FileMetadata::num_pages, openAndReadEpochFile(), openExistingFile(), File_Namespace::FileMetadata::page_size, File_Namespace::FileBuffer::pageDataSize(), File_Namespace::FileBuffer::pageSize(), processFileFutures(), File_Namespace::MultiPage::push(), requestFreePage(), File_Namespace::FileBuffer::reservedHeaderSize(), Data_Namespace::AbstractBuffer::setDirty(), setEpoch(), Data_Namespace::AbstractBuffer::setSize(), Data_Namespace::AbstractBuffer::size(), gpu_enabled::sort(), Data_Namespace::AbstractBuffer::syncEncoder(), and File_Namespace::FileBuffer::writeHeader().

410  {
411  int32_t converted_data_epoch = 0;
412  boost::filesystem::path path(dataPathToConvertFrom);
413  if (boost::filesystem::exists(path)) {
414  if (!boost::filesystem::is_directory(path)) {
415  LOG(FATAL) << "Specified path `" << path << "` is not a directory.";
416  }
418 
419  if (epochOverride != -1) { // if opening at previous epoch
420  setEpoch(epochOverride);
421  }
422 
423  boost::filesystem::directory_iterator
424  endItr; // default construction yields past-the-end
425  int32_t maxFileId = -1;
426  int32_t fileCount = 0;
427  int32_t threadCount = std::thread::hardware_concurrency();
428  std::vector<HeaderInfo> headerVec;
429  std::vector<std::future<std::vector<HeaderInfo>>> file_futures;
430  for (boost::filesystem::directory_iterator fileIt(path); fileIt != endItr; ++fileIt) {
431  FileMetadata fileMetadata = getMetadataForFile(fileIt);
432  if (fileMetadata.is_data_file) {
433  maxFileId = std::max(maxFileId, fileMetadata.file_id);
434  file_futures.emplace_back(std::async(std::launch::async, [fileMetadata, this] {
435  std::vector<HeaderInfo> tempHeaderVec;
436  openExistingFile(fileMetadata.file_path,
437  fileMetadata.file_id,
438  fileMetadata.page_size,
439  fileMetadata.num_pages,
440  tempHeaderVec);
441  return tempHeaderVec;
442  }));
443  fileCount++;
444  if (fileCount % threadCount) {
445  processFileFutures(file_futures, headerVec);
446  }
447  }
448  }
449 
450  if (file_futures.size() > 0) {
451  processFileFutures(file_futures, headerVec);
452  }
453 
454  /* Sort headerVec so that all HeaderInfos
455  * from a chunk will be grouped together
456  * and in order of increasing PageId
457  * - Version Epoch */
458 
459  std::sort(headerVec.begin(), headerVec.end());
460 
461  /* Goal of next section is to find sequences in the
462  * sorted headerVec of the same ChunkId, which we
463  * can then initiate a FileBuffer with */
464 
465  if (headerVec.size() > 0) {
466  ChunkKey lastChunkKey = headerVec.begin()->chunkKey;
467  auto startIt = headerVec.begin();
468 
469  for (auto headerIt = headerVec.begin() + 1; headerIt != headerVec.end();
470  ++headerIt) {
471  if (headerIt->chunkKey != lastChunkKey) {
472  FileMgr* c_fm_ =
473  dynamic_cast<File_Namespace::FileMgr*>(gfm_->getFileMgr(lastChunkKey));
474  CHECK(c_fm_);
475  FileBuffer* srcBuf = new FileBuffer(this, lastChunkKey, startIt, headerIt);
476  chunkIndex_[lastChunkKey] = srcBuf;
477  FileBuffer* destBuf = new FileBuffer(c_fm_, srcBuf->pageSize(), lastChunkKey);
478  c_fm_->chunkIndex_[lastChunkKey] = destBuf;
479  destBuf->syncEncoder(srcBuf);
480  destBuf->setSize(srcBuf->size());
481  destBuf->setDirty(); // this needs to be set to force writing out metadata
482  // files from "checkpoint()" call
483 
484  size_t totalNumPages = srcBuf->getMultiPage().size();
485  for (size_t pageNum = 0; pageNum < totalNumPages; pageNum++) {
486  Page srcPage = srcBuf->getMultiPage()[pageNum].current().page;
487  Page destPage = c_fm_->requestFreePage(
488  srcBuf->pageSize(),
489  false); // may modify and use api "FileBuffer::addNewMultiPage" instead
490  MultiPage multiPage(srcBuf->pageSize());
491  multiPage.push(destPage, converted_data_epoch);
492  destBuf->multiPages_.push_back(multiPage);
493  size_t reservedHeaderSize = srcBuf->reservedHeaderSize();
494  copyPage(
495  srcPage, c_fm_, destPage, reservedHeaderSize, srcBuf->pageDataSize(), 0);
496  destBuf->writeHeader(destPage, pageNum, converted_data_epoch, false);
497  }
498  lastChunkKey = headerIt->chunkKey;
499  startIt = headerIt;
500  }
501  }
502 
503  // now need to insert last Chunk
504  FileMgr* c_fm_ =
505  dynamic_cast<File_Namespace::FileMgr*>(gfm_->getFileMgr(lastChunkKey));
506  FileBuffer* srcBuf = new FileBuffer(this, lastChunkKey, startIt, headerVec.end());
507  chunkIndex_[lastChunkKey] = srcBuf;
508  FileBuffer* destBuf = new FileBuffer(c_fm_, srcBuf->pageSize(), lastChunkKey);
509  c_fm_->chunkIndex_[lastChunkKey] = destBuf;
510  destBuf->syncEncoder(srcBuf);
511  destBuf->setSize(srcBuf->size());
512  destBuf->setDirty(); // this needs to be set to write out metadata file from the
513  // "checkpoint()" call
514 
515  size_t totalNumPages = srcBuf->getMultiPage().size();
516  for (size_t pageNum = 0; pageNum < totalNumPages; pageNum++) {
517  Page srcPage = srcBuf->getMultiPage()[pageNum].current().page;
518  Page destPage = c_fm_->requestFreePage(
519  srcBuf->pageSize(),
520  false); // may modify and use api "FileBuffer::addNewMultiPage" instead
521  MultiPage multiPage(srcBuf->pageSize());
522  multiPage.push(destPage, converted_data_epoch);
523  destBuf->multiPages_.push_back(multiPage);
524  size_t reservedHeaderSize = srcBuf->reservedHeaderSize();
525  copyPage(srcPage, c_fm_, destPage, reservedHeaderSize, srcBuf->pageDataSize(), 0);
526  destBuf->writeHeader(destPage, pageNum, converted_data_epoch, false);
527  }
528  }
529  nextFileId_ = maxFileId + 1;
530  } else {
531  if (!boost::filesystem::create_directory(path)) {
532  LOG(FATAL) << "Specified path does not exist: " << path;
533  }
534  }
535  isFullyInitted_ = true;
536 }
std::vector< int > ChunkKey
Definition: types.h:37
FileMetadata getMetadataForFile(const boost::filesystem::directory_iterator &fileIterator)
Definition: FileMgr.cpp:146
#define LOG(tag)
Definition: Logger.h:194
void copyPage(Page &srcPage, FileMgr *destFileMgr, Page &destPage, const size_t reservedHeaderSize, const size_t numBytes, const size_t offset)
Definition: FileMgr.cpp:564
DEVICE void sort(ARGS &&...args)
Definition: gpu_enabled.h:105
GlobalFileMgr * gfm_
Definition: FileMgr.h:503
void processFileFutures(std::vector< std::future< std::vector< HeaderInfo >>> &file_futures, std::vector< HeaderInfo > &headerVec)
Definition: FileMgr.cpp:395
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:323
FileInfo * openExistingFile(const std::string &path, const int32_t fileId, const size_t pageSize, const size_t numPages, std::vector< HeaderInfo > &headerVec)
Definition: FileMgr.cpp:930
static constexpr char EPOCH_FILENAME[]
Definition: FileMgr.h:378
void openAndReadEpochFile(const std::string &epochFileName)
Definition: FileMgr.cpp:618
AbstractBufferMgr * getFileMgr(const int32_t db_id, const int32_t tb_id)
#define CHECK(condition)
Definition: Logger.h:203
void setEpoch(const int32_t newEpoch)
Definition: FileMgr.cpp:1153

+ Here is the call graph for this function:

void File_Namespace::FileMgr::initializeNumThreads ( size_t  num_reader_threads = 0)
protected

Definition at line 1544 of file FileMgr.cpp.

References num_reader_threads_.

Referenced by File_Namespace::CachingFileMgr::init(), and init().

1544  {
1545  // # of threads is based on # of cores on the host
1546  size_t num_hardware_based_threads = std::thread::hardware_concurrency();
1547  if (num_reader_threads == 0 || num_reader_threads > num_hardware_based_threads) {
1548  // # of threads has not been defined by user
1549  num_reader_threads_ = num_hardware_based_threads;
1550  } else {
1551  num_reader_threads_ = num_reader_threads;
1552  }
1553 }
size_t num_reader_threads_
Maps page sizes to FileInfo objects.
Definition: FileMgr.h:393

+ Here is the caller graph for this function:

bool File_Namespace::FileMgr::isAllocationCapped ( )
inlineoverride

Definition at line 218 of file FileMgr.h.

218 { return false; }
bool File_Namespace::FileMgr::isBufferOnDevice ( const ChunkKey key)
override

Definition at line 723 of file FileMgr.cpp.

References chunkIndex_, and chunkIndexMutex_.

723  {
724  mapd_shared_lock<mapd_shared_mutex> chunkIndexReadLock(chunkIndexMutex_);
725  return chunkIndex_.find(key) != chunkIndex_.end();
726 }
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:323
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:402
int32_t File_Namespace::FileMgr::lastCheckpointedEpoch ( )
inline

Returns value of epoch at last checkpoint.

Definition at line 291 of file FileMgr.h.

Referenced by File_Namespace::GlobalFileMgr::existsDiffBetweenFileMgrParamsAndFileMgr(), and getStorageStats().

291  {
292  return epoch() - (epochIsCheckpointed_ ? 0 : 1);
293  }
int32_t epoch() const
Definition: FileMgr.h:500

+ Here is the caller graph for this function:

int32_t File_Namespace::FileMgr::maxRollbackEpochs ( )
inline

Returns value max_rollback_epochs.

Definition at line 298 of file FileMgr.h.

Referenced by File_Namespace::GlobalFileMgr::existsDiffBetweenFileMgrParamsAndFileMgr().

298 { return maxRollbackEpochs_; }
int32_t maxRollbackEpochs_
Definition: FileMgr.h:387

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::migrateEpochFileV0 ( )
protected

Definition at line 1102 of file FileMgr.cpp.

References Epoch::ceiling(), createEpochFile(), epoch_, EPOCH_FILENAME, FILE_MGR_VERSION_FILENAME, fileMgrBasePath_, Epoch::floor(), logger::INFO, LEGACY_EPOCH_FILENAME, LOG, Epoch::min_allowable_epoch(), openAndReadLegacyEpochFile(), writeAndSyncEpochToDisk(), and writeAndSyncVersionToDisk().

Referenced by migrateToLatestFileMgrVersion().

1102  {
1103  const std::string versionFilePath(fileMgrBasePath_ + "/" + FILE_MGR_VERSION_FILENAME);
1104  LOG(INFO) << "Migrating file format version from 0 to 1 for `" << versionFilePath;
1109  int32_t migrationCompleteVersion = 1;
1110  writeAndSyncVersionToDisk(FILE_MGR_VERSION_FILENAME, migrationCompleteVersion);
1111 }
int32_t openAndReadLegacyEpochFile(const std::string &epochFileName)
Definition: FileMgr.cpp:597
void createEpochFile(const std::string &epochFileName)
Definition: FileMgr.cpp:586
#define LOG(tag)
Definition: Logger.h:194
static int64_t min_allowable_epoch()
Definition: Epoch.h:65
int32_t floor() const
Definition: Epoch.h:43
int32_t ceiling() const
Definition: Epoch.h:44
std::string fileMgrBasePath_
Definition: FileMgr.h:388
void writeAndSyncVersionToDisk(const std::string &versionFileName, const int32_t version)
Definition: FileMgr.cpp:1074
static constexpr char FILE_MGR_VERSION_FILENAME[]
Definition: FileMgr.h:380
void writeAndSyncEpochToDisk()
Definition: FileMgr.cpp:638
static constexpr char EPOCH_FILENAME[]
Definition: FileMgr.h:378
static constexpr char LEGACY_EPOCH_FILENAME[]
Definition: FileMgr.h:377

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::migrateToLatestFileMgrVersion ( )
protected

Definition at line 1113 of file FileMgr.cpp.

References logger::FATAL, FILE_MGR_VERSION_FILENAME, fileMgrVersion_, INVALID_VERSION, latestFileMgrVersion_, LOG, migrateEpochFileV0(), readVersionFromDisk(), UNREACHABLE, and writeAndSyncVersionToDisk().

Referenced by coreInit().

1113  {
1116  fileMgrVersion_ = 0;
1118  } else if (fileMgrVersion_ > latestFileMgrVersion_) {
1119  LOG(FATAL)
1120  << "Table storage forward compatibility is not supported. Version of OmniSci "
1121  "software used is older than the version of table being read: "
1122  << fileMgrVersion_;
1123  }
1124 
1127  switch (fileMgrVersion_) {
1128  case 0: {
1130  break;
1131  }
1132  default: {
1133  UNREACHABLE();
1134  }
1135  }
1136  fileMgrVersion_++;
1137  }
1138  }
1139 }
int32_t readVersionFromDisk(const std::string &versionFileName) const
Definition: FileMgr.cpp:1056
#define LOG(tag)
Definition: Logger.h:194
#define UNREACHABLE()
Definition: Logger.h:247
const int32_t latestFileMgrVersion_
Definition: FileMgr.h:399
void writeAndSyncVersionToDisk(const std::string &versionFileName, const int32_t version)
Definition: FileMgr.cpp:1074
static constexpr char FILE_MGR_VERSION_FILENAME[]
Definition: FileMgr.h:380
static constexpr int32_t INVALID_VERSION
Definition: FileMgr.h:381

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::openAndReadEpochFile ( const std::string &  epochFileName)
protected

Definition at line 618 of file FileMgr.cpp.

References Epoch::byte_size(), epoch_, epochFile_, logger::FATAL, omnisci::file_size(), fileMgrBasePath_, LOG, File_Namespace::open(), File_Namespace::read(), and Epoch::storage_ptr().

Referenced by coreInit(), and init().

618  {
619  if (!epochFile_) { // Check to see if already open
620  std::string epochFilePath(fileMgrBasePath_ + "/" + epochFileName);
621  if (!boost::filesystem::exists(epochFilePath)) {
622  LOG(FATAL) << "Epoch file `" << epochFilePath << "` does not exist";
623  }
624  if (!boost::filesystem::is_regular_file(epochFilePath)) {
625  LOG(FATAL) << "Epoch file `" << epochFilePath << "` is not a regular file";
626  }
627  if (boost::filesystem::file_size(epochFilePath) != Epoch::byte_size()) {
628  LOG(FATAL) << "Epoch file `" << epochFilePath
629  << "` is not sized properly (current size: "
630  << boost::filesystem::file_size(epochFilePath)
631  << ", expected size: " << Epoch::byte_size() << ")";
632  }
633  epochFile_ = open(epochFilePath);
634  }
636 }
int8_t * storage_ptr()
Definition: Epoch.h:61
#define LOG(tag)
Definition: Logger.h:194
std::string fileMgrBasePath_
Definition: FileMgr.h:388
size_t read(FILE *f, const size_t offset, const size_t size, int8_t *buf)
Reads the specified number of bytes from the offset position in file f into buf.
Definition: File.cpp:133
static size_t byte_size()
Definition: Epoch.h:63
FILE * open(int fileId)
Opens/creates the file with the given id; returns NULL on error.
Definition: File.cpp:98
size_t file_size(const int fd)
Definition: omnisci_fs.cpp:31

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int32_t File_Namespace::FileMgr::openAndReadLegacyEpochFile ( const std::string &  epochFileName)
protected

Definition at line 597 of file FileMgr.cpp.

References File_Namespace::close(), epoch(), logger::FATAL, omnisci::file_size(), fileMgrBasePath_, LOG, File_Namespace::open(), and File_Namespace::read().

Referenced by migrateEpochFileV0().

597  {
598  std::string epochFilePath(fileMgrBasePath_ + "/" + epochFileName);
599  if (!boost::filesystem::exists(epochFilePath)) {
600  return 0;
601  }
602 
603  if (!boost::filesystem::is_regular_file(epochFilePath)) {
604  LOG(FATAL) << "Epoch file `" << epochFilePath << "` is not a regular file";
605  }
606  if (boost::filesystem::file_size(epochFilePath) < 4) {
607  LOG(FATAL) << "Epoch file `" << epochFilePath
608  << "` is not sized properly (current size: "
609  << boost::filesystem::file_size(epochFilePath) << ", expected size: 4)";
610  }
611  FILE* legacyEpochFile = open(epochFilePath);
612  int32_t epoch;
613  read(legacyEpochFile, 0, sizeof(int32_t), (int8_t*)&epoch);
614  close(legacyEpochFile);
615  return epoch;
616 }
#define LOG(tag)
Definition: Logger.h:194
std::string fileMgrBasePath_
Definition: FileMgr.h:388
size_t read(FILE *f, const size_t offset, const size_t size, int8_t *buf)
Reads the specified number of bytes from the offset position in file f into buf.
Definition: File.cpp:133
FILE * open(int fileId)
Opens/creates the file with the given id; returns NULL on error.
Definition: File.cpp:98
int32_t epoch() const
Definition: FileMgr.h:500
void close(FILE *f)
Closes the file pointed to by the FILE pointer.
Definition: File.cpp:119
size_t file_size(const int fd)
Definition: omnisci_fs.cpp:31

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

FileInfo * File_Namespace::FileMgr::openExistingFile ( const std::string &  path,
const int32_t  fileId,
const size_t  pageSize,
const size_t  numPages,
std::vector< HeaderInfo > &  headerVec 
)
protected

Definition at line 930 of file FileMgr.cpp.

References f, fileIndex_, files_, files_rw_mutex_, File_Namespace::open(), and File_Namespace::FileInfo::openExistingFile().

Referenced by init(), and openFiles().

934  {
935  FILE* f = open(path);
936  FileInfo* fInfo = new FileInfo(
937  this, fileId, f, pageSize, numPages, false); // false means don't init file
938 
939  fInfo->openExistingFile(headerVec);
940  mapd_unique_lock<mapd_shared_mutex> write_lock(files_rw_mutex_);
941  files_[fileId] = fInfo;
942  fileIndex_.insert(std::pair<size_t, int32_t>(pageSize, fileId));
943  return fInfo;
944 }
PageSizeFileMMap fileIndex_
A map of files accessible via a file identifier.
Definition: FileMgr.h:392
FILE * open(int fileId)
Opens/creates the file with the given id; returns NULL on error.
Definition: File.cpp:98
std::map< int32_t, FileInfo * > files_
Definition: FileMgr.h:391
char * f
mapd_unique_lock< mapd_shared_mutex > write_lock
mapd_shared_mutex files_rw_mutex_
Definition: FileMgr.h:403

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

OpenFilesResult File_Namespace::FileMgr::openFiles ( )
protected

Definition at line 189 of file FileMgr.cpp.

References Epoch::ceiling(), CHECK, File_Namespace::OpenFilesResult::compaction_status_file_name, epoch_, File_Namespace::FileMetadata::file_id, File_Namespace::FileMetadata::file_path, fileMgrBasePath_, getMetadataForFile(), File_Namespace::OpenFilesResult::header_infos, logger::INFO, File_Namespace::anonymous_namespace{FileMgr.cpp}::is_compaction_status_file(), File_Namespace::FileMetadata::is_data_file, LOG, File_Namespace::OpenFilesResult::max_file_id, File_Namespace::FileMetadata::num_pages, openExistingFile(), File_Namespace::FileMetadata::page_size, processFileFutures(), run_benchmark_import::result, timer_start(), and timer_stop().

Referenced by File_Namespace::CachingFileMgr::init(), and init().

189  {
190  auto clock_begin = timer_start();
191  boost::filesystem::directory_iterator
192  end_itr; // default construction yields past-the-end
193  OpenFilesResult result;
194  result.max_file_id = -1;
195  int32_t file_count = 0;
196  int32_t thread_count = std::thread::hardware_concurrency();
197  std::vector<std::future<std::vector<HeaderInfo>>> file_futures;
198  boost::filesystem::path path(fileMgrBasePath_);
199  for (boost::filesystem::directory_iterator file_it(path); file_it != end_itr;
200  ++file_it) {
201  FileMetadata file_metadata = getMetadataForFile(file_it);
202  if (file_metadata.is_data_file) {
203  result.max_file_id = std::max(result.max_file_id, file_metadata.file_id);
204  file_futures.emplace_back(std::async(std::launch::async, [file_metadata, this] {
205  std::vector<HeaderInfo> temp_header_vec;
206  openExistingFile(file_metadata.file_path,
207  file_metadata.file_id,
208  file_metadata.page_size,
209  file_metadata.num_pages,
210  temp_header_vec);
211  return temp_header_vec;
212  }));
213  file_count++;
214  if (file_count % thread_count == 0) {
215  processFileFutures(file_futures, result.header_infos);
216  }
217  }
218 
219  if (is_compaction_status_file(file_it->path().filename().string())) {
220  CHECK(result.compaction_status_file_name.empty());
221  result.compaction_status_file_name = file_it->path().filename().string();
222  }
223  }
224 
225  if (file_futures.size() > 0) {
226  processFileFutures(file_futures, result.header_infos);
227  }
228 
229  int64_t queue_time_ms = timer_stop(clock_begin);
230  LOG(INFO) << "Completed Reading table's file metadata, Elapsed time : " << queue_time_ms
231  << "ms Epoch: " << epoch_.ceiling() << " files read: " << file_count
232  << " table location: '" << fileMgrBasePath_ << "'";
233  return result;
234 }
FileMetadata getMetadataForFile(const boost::filesystem::directory_iterator &fileIterator)
Definition: FileMgr.cpp:146
#define LOG(tag)
Definition: Logger.h:194
TypeR::rep timer_stop(Type clock_begin)
Definition: measure.h:48
bool is_compaction_status_file(const std::string &file_name)
Definition: FileMgr.cpp:182
int32_t ceiling() const
Definition: Epoch.h:44
std::string fileMgrBasePath_
Definition: FileMgr.h:388
void processFileFutures(std::vector< std::future< std::vector< HeaderInfo >>> &file_futures, std::vector< HeaderInfo > &headerVec)
Definition: FileMgr.cpp:395
FileInfo * openExistingFile(const std::string &path, const int32_t fileId, const size_t pageSize, const size_t numPages, std::vector< HeaderInfo > &headerVec)
Definition: FileMgr.cpp:930
#define CHECK(condition)
Definition: Logger.h:203
Type timer_start()
Definition: measure.h:42

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::string File_Namespace::FileMgr::printSlabs ( )
inlineoverride

Definition at line 213 of file FileMgr.h.

213 { return "Not Implemented"; }
void File_Namespace::FileMgr::processFileFutures ( std::vector< std::future< std::vector< HeaderInfo >>> &  file_futures,
std::vector< HeaderInfo > &  headerVec 
)
protected

Definition at line 395 of file FileMgr.cpp.

Referenced by init(), and openFiles().

397  {
398  for (auto& file_future : file_futures) {
399  file_future.wait();
400  }
401  // concatenate the vectors after thread completes
402  for (auto& file_future : file_futures) {
403  auto tempHeaderVec = file_future.get();
404  headerVec.insert(headerVec.end(), tempHeaderVec.begin(), tempHeaderVec.end());
405  }
406  file_futures.clear();
407 }

+ Here is the caller graph for this function:

FileBuffer * File_Namespace::FileMgr::putBuffer ( const ChunkKey key,
AbstractBuffer d,
const size_t  numBytes = 0 
)
override

Puts the contents of d into the Chunk with the given key.

Parameters
key- Unique identifier for a Chunk.
d- An object representing the source data for the Chunk.
Returns
AbstractBuffer*

Definition at line 803 of file FileMgr.cpp.

References CHECK_LT, Data_Namespace::AbstractBuffer::clearDirtyBits(), logger::FATAL, Data_Namespace::AbstractBuffer::getDeviceId(), Data_Namespace::AbstractBuffer::getMemoryPtr(), getOrCreateBuffer(), Data_Namespace::AbstractBuffer::getType(), Data_Namespace::AbstractBuffer::isAppended(), Data_Namespace::AbstractBuffer::isUpdated(), LOG, show_chunk(), Data_Namespace::AbstractBuffer::size(), and UNREACHABLE.

805  {
806  auto chunk = getOrCreateBuffer(key);
807  size_t oldChunkSize = chunk->size();
808  // write the buffer's data to the Chunk
809  // size_t newChunkSize = numBytes == 0 ? srcBuffer->size() : numBytes;
810  size_t newChunkSize = numBytes == 0 ? srcBuffer->size() : numBytes;
811  if (chunk->isDirty()) {
812  // multiple appends are allowed,
813  // but only single update is allowed
814  if (srcBuffer->isUpdated() && chunk->isUpdated()) {
815  LOG(FATAL) << "Aborting attempt to write a chunk marked dirty. Chunk inconsistency "
816  "for key: "
817  << show_chunk(key);
818  }
819  }
820  if (srcBuffer->isUpdated()) {
821  // chunk size is not changed when fixed rows are updated or are marked as deleted.
822  // but when rows are vacuumed or varlen rows are updated (new rows are appended),
823  // chunk size will change. For vacuum, checkpoint should sync size from cpu to disk.
824  // For varlen update, it takes another route via fragmenter using disk-level buffer.
825  if (0 == numBytes && !chunk->isDirty()) {
826  chunk->setSize(newChunkSize);
827  }
828  //@todo use dirty flags to only flush pages of chunk that need to
829  // be flushed
830  chunk->write((int8_t*)srcBuffer->getMemoryPtr(),
831  newChunkSize,
832  0,
833  srcBuffer->getType(),
834  srcBuffer->getDeviceId());
835  } else if (srcBuffer->isAppended()) {
836  CHECK_LT(oldChunkSize, newChunkSize);
837  chunk->append((int8_t*)srcBuffer->getMemoryPtr() + oldChunkSize,
838  newChunkSize - oldChunkSize,
839  srcBuffer->getType(),
840  srcBuffer->getDeviceId());
841  } else {
842  UNREACHABLE() << "putBuffer() expects a buffer marked is_updated or is_appended";
843  }
844  // chunk->clearDirtyBits(); // Hack: because write and append will set dirty bits
845  //@todo commenting out line above will make sure this metadata is set
846  // but will trigger error on fetch chunk
847  srcBuffer->clearDirtyBits();
848  chunk->syncEncoder(srcBuffer);
849  return chunk;
850 }
#define LOG(tag)
Definition: Logger.h:194
#define UNREACHABLE()
Definition: Logger.h:247
std::string show_chunk(const ChunkKey &key)
Definition: types.h:85
FileBuffer * getOrCreateBuffer(const ChunkKey &key)
Definition: FileMgr.cpp:1594
#define CHECK_LT(x, y)
Definition: Logger.h:213

+ Here is the call graph for this function:

std::vector< PageMapping > File_Namespace::FileMgr::readPageMappingsFromStatusFile ( )
protected

Deserializes a page mapping vector from expected status file.

Definition at line 1490 of file FileMgr.cpp.

References CHECK, CHECK_EQ, CHECK_GE, omnisci::file_size(), getFilePath(), and UPDATE_PAGE_VISIBILITY_STATUS.

Referenced by resumeFileCompaction().

1490  {
1491  auto file_path = getFilePath(UPDATE_PAGE_VISIBILITY_STATUS);
1492  CHECK(boost::filesystem::exists(file_path));
1493  std::ifstream status_file{file_path.string(),
1494  std::ios::in | std::ios::binary | std::ios::ate};
1495  CHECK(status_file.is_open());
1496  size_t file_size = status_file.tellg();
1497  status_file.seekg(0, std::ios::beg);
1498  CHECK_GE(file_size, sizeof(int64_t));
1499 
1500  int64_t page_mappings_count;
1501  status_file.read(reinterpret_cast<char*>(&page_mappings_count), sizeof(int64_t));
1502  auto page_mappings_byte_size = file_size - sizeof(int64_t);
1503  CHECK_EQ(page_mappings_byte_size % sizeof(PageMapping), static_cast<size_t>(0));
1504  CHECK_EQ(static_cast<size_t>(page_mappings_count),
1505  page_mappings_byte_size / sizeof(PageMapping));
1506 
1507  std::vector<PageMapping> page_mappings(page_mappings_count);
1508  status_file.read(reinterpret_cast<char*>(page_mappings.data()),
1509  page_mappings_byte_size);
1510  status_file.close();
1511  return page_mappings;
1512 }
#define CHECK_EQ(x, y)
Definition: Logger.h:211
static constexpr char const * UPDATE_PAGE_VISIBILITY_STATUS
Definition: FileMgr.h:369
#define CHECK_GE(x, y)
Definition: Logger.h:216
#define CHECK(condition)
Definition: Logger.h:203
size_t file_size(const int fd)
Definition: omnisci_fs.cpp:31
boost::filesystem::path getFilePath(const std::string &file_name)
Definition: FileMgr.h:337

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int32_t File_Namespace::FileMgr::readVersionFromDisk ( const std::string &  versionFileName) const
protected

Definition at line 1056 of file FileMgr.cpp.

References File_Namespace::close(), omnisci::file_size(), fileMgrBasePath_, File_Namespace::open(), File_Namespace::read(), and setup::version.

Referenced by createTopLevelMetadata(), migrateToLatestFileMgrVersion(), and writeAndSyncVersionToDisk().

1056  {
1057  const std::string versionFilePath(fileMgrBasePath_ + "/" + versionFileName);
1058  if (!boost::filesystem::exists(versionFilePath)) {
1059  return -1;
1060  }
1061  if (!boost::filesystem::is_regular_file(versionFilePath)) {
1062  return -1;
1063  }
1064  if (boost::filesystem::file_size(versionFilePath) < 4) {
1065  return -1;
1066  }
1067  FILE* versionFile = open(versionFilePath);
1068  int32_t version;
1069  read(versionFile, 0, sizeof(int32_t), (int8_t*)&version);
1070  close(versionFile);
1071  return version;
1072 }
std::string fileMgrBasePath_
Definition: FileMgr.h:388
size_t read(FILE *f, const size_t offset, const size_t size, int8_t *buf)
Reads the specified number of bytes from the offset position in file f into buf.
Definition: File.cpp:133
string version
Definition: setup.in.py:73
FILE * open(int fileId)
Opens/creates the file with the given id; returns NULL on error.
Definition: File.cpp:98
void close(FILE *f)
Closes the file pointed to by the FILE pointer.
Definition: File.cpp:119
size_t file_size(const int fd)
Definition: omnisci_fs.cpp:31

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::removeTableRelatedDS ( const int32_t  db_id,
const int32_t  table_id 
)
override

Definition at line 1169 of file FileMgr.cpp.

References UNREACHABLE.

1169  {
1170  UNREACHABLE();
1171 }
#define UNREACHABLE()
Definition: Logger.h:247
void File_Namespace::FileMgr::renameCompactionStatusFile ( const char *const  from_status,
const char *const  to_status 
)

Renames a given status file name to a new given file name.

Definition at line 1517 of file FileMgr.cpp.

References CHECK, and getFilePath().

Referenced by compactFiles(), and resumeFileCompaction().

1518  {
1519  auto from_status_file_path = getFilePath(from_status);
1520  auto to_status_file_path = getFilePath(to_status);
1521  CHECK(boost::filesystem::exists(from_status_file_path));
1522  CHECK(!boost::filesystem::exists(to_status_file_path));
1523  boost::filesystem::rename(from_status_file_path, to_status_file_path);
1524 }
#define CHECK(condition)
Definition: Logger.h:203
boost::filesystem::path getFilePath(const std::string &file_name)
Definition: FileMgr.h:337

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Page File_Namespace::FileMgr::requestFreePage ( size_t  pagesize,
const bool  isMetadata 
)

Definition at line 861 of file FileMgr.cpp.

References CHECK, createFile(), File_Namespace::FileInfo::fileId, fileIndex_, files_, File_Namespace::FileInfo::getFreePage(), getPageMutex_, num_pages_per_data_file_, and num_pages_per_metadata_file_.

Referenced by File_Namespace::FileBuffer::addNewMultiPage(), init(), File_Namespace::FileBuffer::write(), and File_Namespace::FileBuffer::writeMetadata().

861  {
862  std::lock_guard<std::mutex> lock(getPageMutex_);
863 
864  auto candidateFiles = fileIndex_.equal_range(pageSize);
865  int32_t pageNum = -1;
866  for (auto fileIt = candidateFiles.first; fileIt != candidateFiles.second; ++fileIt) {
867  FileInfo* fileInfo = files_[fileIt->second];
868  pageNum = fileInfo->getFreePage();
869  if (pageNum != -1) {
870  return (Page(fileInfo->fileId, pageNum));
871  }
872  }
873  // if here then we need to add a file
874  FileInfo* fileInfo;
875  if (isMetadata) {
876  fileInfo = createFile(pageSize, num_pages_per_metadata_file_);
877  } else {
878  fileInfo = createFile(pageSize, num_pages_per_data_file_);
879  }
880  pageNum = fileInfo->getFreePage();
881  CHECK(pageNum != -1);
882  return (Page(fileInfo->fileId, pageNum));
883 }
std::mutex getPageMutex_
pointer to DB level metadata
Definition: FileMgr.h:401
static size_t num_pages_per_data_file_
Definition: FileMgr.h:409
PageSizeFileMMap fileIndex_
A map of files accessible via a file identifier.
Definition: FileMgr.h:392
static size_t num_pages_per_metadata_file_
Definition: FileMgr.h:410
FileInfo * createFile(const size_t pageSize, const size_t numPages)
Adds a file to the file manager repository.
Definition: FileMgr.cpp:946
std::map< int32_t, FileInfo * > files_
Definition: FileMgr.h:391
#define CHECK(condition)
Definition: Logger.h:203

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::requestFreePages ( size_t  npages,
size_t  pagesize,
std::vector< Page > &  pages,
const bool  isMetadata 
)

Obtains free pages – creates new files if necessary – of the requested size.

Given a page size and number of pages, this method updates the vector "pages" to include free pages of the requested size. These pages are immediately removed from the free list of the affected file(s). If there are not enough pages available among current files, new files are created and their pages are included in the vector.

Parameters
npagesThe number of free pages requested
pagesizeThe size of each requested page
pagesA vector containing the free pages obtained by this method

Definition at line 885 of file FileMgr.cpp.

References CHECK, createFile(), File_Namespace::FileInfo::fileId, fileIndex_, files_, File_Namespace::FileInfo::getFreePage(), getPageMutex_, num_pages_per_data_file_, and num_pages_per_metadata_file_.

888  {
889  // not used currently
890  // @todo add method to FileInfo to get more than one page
891  std::lock_guard<std::mutex> lock(getPageMutex_);
892  auto candidateFiles = fileIndex_.equal_range(pageSize);
893  size_t numPagesNeeded = numPagesRequested;
894  for (auto fileIt = candidateFiles.first; fileIt != candidateFiles.second; ++fileIt) {
895  FileInfo* fileInfo = files_[fileIt->second];
896  int32_t pageNum;
897  do {
898  pageNum = fileInfo->getFreePage();
899  if (pageNum != -1) {
900  pages.emplace_back(fileInfo->fileId, pageNum);
901  numPagesNeeded--;
902  }
903  } while (pageNum != -1 && numPagesNeeded > 0);
904  if (numPagesNeeded == 0) {
905  break;
906  }
907  }
908  while (numPagesNeeded > 0) {
909  FileInfo* fileInfo;
910  if (isMetadata) {
911  fileInfo = createFile(pageSize, num_pages_per_metadata_file_);
912  } else {
913  fileInfo = createFile(pageSize, num_pages_per_data_file_);
914  }
915  int32_t pageNum;
916  do {
917  pageNum = fileInfo->getFreePage();
918  if (pageNum != -1) {
919  pages.emplace_back(fileInfo->fileId, pageNum);
920  numPagesNeeded--;
921  }
922  } while (pageNum != -1 && numPagesNeeded > 0);
923  if (numPagesNeeded == 0) {
924  break;
925  }
926  }
927  CHECK(pages.size() == numPagesRequested);
928 }
std::mutex getPageMutex_
pointer to DB level metadata
Definition: FileMgr.h:401
static size_t num_pages_per_data_file_
Definition: FileMgr.h:409
PageSizeFileMMap fileIndex_
A map of files accessible via a file identifier.
Definition: FileMgr.h:392
static size_t num_pages_per_metadata_file_
Definition: FileMgr.h:410
FileInfo * createFile(const size_t pageSize, const size_t numPages)
Adds a file to the file manager repository.
Definition: FileMgr.cpp:946
std::map< int32_t, FileInfo * > files_
Definition: FileMgr.h:391
#define CHECK(condition)
Definition: Logger.h:203

+ Here is the call graph for this function:

void File_Namespace::FileMgr::resumeFileCompaction ( const std::string &  status_file_name)
protected

Resumes an interrupted file compaction process. This method would normally only be called when re-initializing the file manager after a crash occurred in the middle of file compaction.

Definition at line 1178 of file FileMgr.cpp.

References CHECK, compactFiles(), COPY_PAGES_STATUS, DELETE_EMPTY_FILES_STATUS, deleteEmptyFiles(), files_rw_mutex_, getFilePath(), readPageMappingsFromStatusFile(), renameCompactionStatusFile(), UNREACHABLE, UPDATE_PAGE_VISIBILITY_STATUS, and updateMappedPagesVisibility().

Referenced by init().

1178  {
1179  if (status_file_name == COPY_PAGES_STATUS) {
1180  // Delete status file and restart data compaction process
1181  auto file_path = getFilePath(status_file_name);
1182  CHECK(boost::filesystem::exists(file_path));
1183  boost::filesystem::remove(file_path);
1184  compactFiles();
1185  } else if (status_file_name == UPDATE_PAGE_VISIBILITY_STATUS) {
1186  // Execute second and third phases of data compaction
1187  mapd_unique_lock<mapd_shared_mutex> write_lock(files_rw_mutex_);
1188  auto page_mappings = readPageMappingsFromStatusFile();
1189  updateMappedPagesVisibility(page_mappings);
1191  deleteEmptyFiles();
1192  } else if (status_file_name == DELETE_EMPTY_FILES_STATUS) {
1193  // Execute last phase of data compaction
1194  mapd_unique_lock<mapd_shared_mutex> write_lock(files_rw_mutex_);
1195  deleteEmptyFiles();
1196  } else {
1197  UNREACHABLE() << "Unexpected status file name: " << status_file_name;
1198  }
1199 }
std::vector< PageMapping > readPageMappingsFromStatusFile()
Definition: FileMgr.cpp:1490
#define UNREACHABLE()
Definition: Logger.h:247
static constexpr char const * UPDATE_PAGE_VISIBILITY_STATUS
Definition: FileMgr.h:369
static constexpr char const * DELETE_EMPTY_FILES_STATUS
Definition: FileMgr.h:370
static constexpr char const * COPY_PAGES_STATUS
Definition: FileMgr.h:368
void updateMappedPagesVisibility(const std::vector< PageMapping > &page_mappings)
Definition: FileMgr.cpp:1420
#define CHECK(condition)
Definition: Logger.h:203
mapd_unique_lock< mapd_shared_mutex > write_lock
void renameCompactionStatusFile(const char *const from_status, const char *const to_status)
Definition: FileMgr.cpp:1517
mapd_shared_mutex files_rw_mutex_
Definition: FileMgr.h:403
boost::filesystem::path getFilePath(const std::string &file_name)
Definition: FileMgr.h:337

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::rollOffOldData ( const int32_t  epochCeiling,
const bool  shouldCheckpoint 
)
private

Definition at line 666 of file FileMgr.cpp.

References checkpoint(), epoch_, Epoch::floor(), freePagesBeforeEpoch(), and maxRollbackEpochs_.

Referenced by checkpoint(), and init().

666  {
667  if (maxRollbackEpochs_ >= 0) {
668  auto min_epoch = std::max(epoch_ceiling - maxRollbackEpochs_, epoch_.floor());
669  if (min_epoch > epoch_.floor()) {
670  freePagesBeforeEpoch(min_epoch);
671  epoch_.floor(min_epoch);
672  if (should_checkpoint) {
673  checkpoint();
674  }
675  }
676  }
677 }
int32_t floor() const
Definition: Epoch.h:43
void checkpoint() override
Fsyncs data files, writes out epoch and fsyncs that.
Definition: FileMgr.cpp:685
void freePagesBeforeEpoch(const int32_t min_epoch)
Definition: FileMgr.cpp:652
int32_t maxRollbackEpochs_
Definition: FileMgr.h:387

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::setEpoch ( const int32_t  newEpoch)
protected

Definition at line 1153 of file FileMgr.cpp.

References Epoch::ceiling(), describeSelf(), epoch_, Epoch::floor(), and writeAndSyncEpochToDisk().

Referenced by init().

1153  {
1154  if (newEpoch < epoch_.floor()) {
1155  std::stringstream error_message;
1156  error_message << "Cannot set epoch for " << describeSelf()
1157  << " lower than the minimum rollback epoch (" << epoch_.floor() << ").";
1158  throw std::runtime_error(error_message.str());
1159  }
1160  epoch_.ceiling(newEpoch);
1162 }
int32_t floor() const
Definition: Epoch.h:43
int32_t ceiling() const
Definition: Epoch.h:44
virtual std::string describeSelf()
Definition: FileMgr.cpp:679
void writeAndSyncEpochToDisk()
Definition: FileMgr.cpp:638

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::setNumPagesPerDataFile ( size_t  num_pages)
static

Definition at line 1528 of file FileMgr.cpp.

References num_pages_per_data_file_.

1528  {
1529  num_pages_per_data_file_ = num_pages;
1530 }
static size_t num_pages_per_data_file_
Definition: FileMgr.h:409
void File_Namespace::FileMgr::setNumPagesPerMetadataFile ( size_t  num_pages)
static

Definition at line 1532 of file FileMgr.cpp.

References num_pages_per_metadata_file_.

1532  {
1533  num_pages_per_metadata_file_ = num_pages;
1534 }
static size_t num_pages_per_metadata_file_
Definition: FileMgr.h:410
void File_Namespace::FileMgr::sortAndCopyFilePagesForCompaction ( size_t  page_size,
std::vector< PageMapping > &  page_mappings,
std::set< Page > &  touched_pages 
)
protected

Sorts all files with the given page size in ascending order of number of free pages. Then copy over pages from files with more free pages to those with less free pages. Leave destination/copied to pages as free when copying. Record copied source and destination pages in page mapping.

Definition at line 1265 of file FileMgr.cpp.

References CHECK, copySourcePageForCompaction(), File_Namespace::Page::fileId, fileIndex_, files_, File_Namespace::FileInfo::freePages, freePages(), i, and gpu_enabled::sort().

Referenced by compactFiles().

1267  {
1268  std::vector<FileInfo*> sorted_file_infos;
1269  auto range = fileIndex_.equal_range(page_size);
1270  for (auto it = range.first; it != range.second; it++) {
1271  sorted_file_infos.emplace_back(files_[it->second]);
1272  }
1273  if (sorted_file_infos.empty()) {
1274  return;
1275  }
1276 
1277  // Sort file infos in ascending order of free pages count i.e. from files with
1278  // the least number of free pages to those with the highest number of free pages.
1279  std::sort(sorted_file_infos.begin(),
1280  sorted_file_infos.end(),
1281  [](const FileInfo* file_1, const FileInfo* file_2) {
1282  return file_1->freePages.size() < file_2->freePages.size();
1283  });
1284 
1285  size_t destination_index = 0, source_index = sorted_file_infos.size() - 1;
1286 
1287  // For page copy destinations, skip files without free pages.
1288  while (destination_index < source_index &&
1289  sorted_file_infos[destination_index]->freePages.empty()) {
1290  destination_index++;
1291  }
1292 
1293  // For page copy sources, skip files with only free pages.
1294  while (destination_index < source_index &&
1295  sorted_file_infos[source_index]->freePages.size() ==
1296  sorted_file_infos[source_index]->numPages) {
1297  source_index--;
1298  }
1299 
1300  std::set<size_t> source_used_pages;
1301  CHECK(destination_index <= source_index);
1302 
1303  // Get the total number of free pages available for compaction
1304  int64_t total_free_pages{0};
1305  for (size_t i = destination_index; i <= source_index; i++) {
1306  total_free_pages += sorted_file_infos[i]->numFreePages();
1307  }
1308 
1309  while (destination_index < source_index) {
1310  if (source_used_pages.empty()) {
1311  // Populate source_used_pages with only used pages in the source file.
1312  auto source_file_info = sorted_file_infos[source_index];
1313  auto& free_pages = source_file_info->freePages;
1314  for (size_t page_num = 0; page_num < source_file_info->numPages; page_num++) {
1315  if (free_pages.find(page_num) == free_pages.end()) {
1316  source_used_pages.emplace(page_num);
1317  }
1318  }
1319 
1320  // Free pages of current source file will not be copy destinations
1321  total_free_pages -= source_file_info->numFreePages();
1322  }
1323 
1324  // Exit early if there are not enough free pages to empty the next file
1325  if (total_free_pages - static_cast<int64_t>(source_used_pages.size()) < 0) {
1326  return;
1327  }
1328 
1329  // Copy pages from source files to destination files
1330  auto dest_file_info = sorted_file_infos[destination_index];
1331  while (!source_used_pages.empty() && !dest_file_info->freePages.empty()) {
1332  // Get next page to copy
1333  size_t source_page_num = *source_used_pages.begin();
1334  source_used_pages.erase(source_page_num);
1335 
1336  Page source_page{sorted_file_infos[source_index]->fileId, source_page_num};
1337  copySourcePageForCompaction(source_page,
1338  sorted_file_infos[destination_index],
1339  page_mappings,
1340  touched_pages);
1341  total_free_pages--;
1342  }
1343 
1344  if (source_used_pages.empty()) {
1345  source_index--;
1346  }
1347 
1348  if (dest_file_info->freePages.empty()) {
1349  destination_index++;
1350  }
1351  }
1352 }
void copySourcePageForCompaction(const Page &source_page, FileInfo *destination_file_info, std::vector< PageMapping > &page_mappings, std::set< Page > &touched_pages)
Definition: FileMgr.cpp:1360
DEVICE void sort(ARGS &&...args)
Definition: gpu_enabled.h:105
PageSizeFileMMap fileIndex_
A map of files accessible via a file identifier.
Definition: FileMgr.h:392
std::map< int32_t, FileInfo * > files_
Definition: FileMgr.h:391
#define CHECK(condition)
Definition: Logger.h:203

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::syncFilesToDisk ( )
protected

Definition at line 1536 of file FileMgr.cpp.

References CHECK, files_, and files_rw_mutex_.

Referenced by checkpoint().

1536  {
1537  mapd_shared_lock<mapd_shared_mutex> files_read_lock(files_rw_mutex_);
1538  for (auto file_info_entry : files_) {
1539  int32_t status = file_info_entry.second->syncToDisk();
1540  CHECK(status == 0) << "Could not sync file to disk";
1541  }
1542 }
std::map< int32_t, FileInfo * > files_
Definition: FileMgr.h:391
#define CHECK(condition)
Definition: Logger.h:203
mapd_shared_mutex files_rw_mutex_
Definition: FileMgr.h:403

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::updateMappedPagesVisibility ( const std::vector< PageMapping > &  page_mappings)
protected

Goes through the given page mapping and marks source/copied from pages as free while marking destination/copied to pages as used (by setting the header size).

Definition at line 1420 of file FileMgr.cpp.

References CHECK_GT, logger::FATAL, files_, and LOG.

Referenced by compactFiles(), and resumeFileCompaction().

1420  {
1421  for (const auto& page_mapping : page_mappings) {
1422  auto destination_file = files_[page_mapping.destination_file_id];
1423 
1424  // Set destination page header size
1425  auto header_size = page_mapping.source_page_header_size;
1426  CHECK_GT(header_size, 0);
1427  destination_file->write(
1428  page_mapping.destination_page_num * destination_file->pageSize,
1429  sizeof(PageHeaderSizeType),
1430  reinterpret_cast<int8_t*>(&header_size));
1431  auto source_file = files_[page_mapping.source_file_id];
1432 
1433  // Free source page
1434  PageHeaderSizeType free_page_header_size{0};
1435  source_file->write(page_mapping.source_page_num * source_file->pageSize,
1436  sizeof(PageHeaderSizeType),
1437  reinterpret_cast<int8_t*>(&free_page_header_size));
1438  source_file->freePageDeferred(page_mapping.source_page_num);
1439  }
1440 
1441  for (auto file_info_entry : files_) {
1442  int32_t status = file_info_entry.second->syncToDisk();
1443  if (status != 0) {
1444  LOG(FATAL) << "Could not sync file to disk";
1445  }
1446  }
1447 }
#define LOG(tag)
Definition: Logger.h:194
#define CHECK_GT(x, y)
Definition: Logger.h:215
int32_t PageHeaderSizeType
Definition: FileMgr.h:121
std::map< int32_t, FileInfo * > files_
Definition: FileMgr.h:391

+ Here is the caller graph for this function:

bool File_Namespace::FileMgr::updatePageIfDeleted ( FileInfo file_info,
ChunkKey chunk_key,
int32_t  contingent,
int32_t  page_epoch,
int32_t  page_num 
)
virtual

deletes or recovers a page based on last checkpointed epoch.

Reimplemented in File_Namespace::CachingFileMgr.

Definition at line 1570 of file FileMgr.cpp.

References CHUNK_KEY_DB_IDX, CHUNK_KEY_TABLE_IDX, File_Namespace::DELETE_CONTINGENT, epoch(), File_Namespace::FileInfo::freePageImmediate(), g_read_only, get_fileMgrKey(), File_Namespace::FileInfo::recoverPage(), and File_Namespace::ROLLOFF_CONTINGENT.

Referenced by File_Namespace::FileInfo::openExistingFile().

1574  {
1575  // If the parent FileMgr has a fileMgrKey, then all keys are locked to one table and
1576  // can be set from the manager.
1577  auto [db_id, tb_id] = get_fileMgrKey();
1578  chunk_key[CHUNK_KEY_DB_IDX] = db_id;
1579  chunk_key[CHUNK_KEY_TABLE_IDX] = tb_id;
1580  const bool delete_contingent =
1581  (contingent == DELETE_CONTINGENT || contingent == ROLLOFF_CONTINGENT);
1582  // Check if page was deleted with a checkpointed epoch
1583  if (delete_contingent && epoch(db_id, tb_id) >= page_epoch) {
1584  file_info->freePageImmediate(page_num);
1585  return true;
1586  }
1587  // Recover page if it was deleted but not checkpointed.
1588  if (!g_read_only && delete_contingent) {
1589  file_info->recoverPage(chunk_key, page_num);
1590  }
1591  return false;
1592 }
#define CHUNK_KEY_DB_IDX
Definition: types.h:39
constexpr int32_t DELETE_CONTINGENT
A FileInfo type has a file pointer and metadata about a file.
Definition: FileInfo.h:51
#define CHUNK_KEY_TABLE_IDX
Definition: types.h:40
constexpr int32_t ROLLOFF_CONTINGENT
Definition: FileInfo.h:52
const TablePair get_fileMgrKey() const
Definition: FileMgr.h:335
bool g_read_only
Definition: File.cpp:38
int32_t epoch() const
Definition: FileMgr.h:500

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::writeAndSyncEpochToDisk ( )
protected

Definition at line 638 of file FileMgr.cpp.

References Epoch::byte_size(), CHECK, epoch_, epochFile_, epochIsCheckpointed_, omnisci::fsync(), Epoch::storage_ptr(), and File_Namespace::write().

Referenced by checkpoint(), createEpochFile(), init(), migrateEpochFileV0(), and setEpoch().

638  {
639  CHECK(epochFile_);
641  int32_t status = fflush(epochFile_);
642  CHECK(status == 0) << "Could not flush epoch file to disk";
643 #ifdef __APPLE__
644  status = fcntl(fileno(epochFile_), 51);
645 #else
646  status = omnisci::fsync(fileno(epochFile_));
647 #endif
648  CHECK(status == 0) << "Could not sync epoch file to disk";
649  epochIsCheckpointed_ = true;
650 }
int8_t * storage_ptr()
Definition: Epoch.h:61
size_t write(FILE *f, const size_t offset, const size_t size, const int8_t *buf)
Writes the specified number of bytes to the offset position in file f from buf.
Definition: File.cpp:141
int fsync(int fd)
Definition: omnisci_fs.cpp:60
static size_t byte_size()
Definition: Epoch.h:63
#define CHECK(condition)
Definition: Logger.h:203

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::writeAndSyncVersionToDisk ( const std::string &  versionFileName,
const int32_t  version 
)
protected

Definition at line 1074 of file FileMgr.cpp.

References File_Namespace::close(), File_Namespace::create(), epochFile_, logger::FATAL, fileMgrBasePath_, omnisci::fsync(), logger::INFO, LOG, File_Namespace::open(), readVersionFromDisk(), and File_Namespace::write().

Referenced by createTopLevelMetadata(), init(), migrateEpochFileV0(), and migrateToLatestFileMgrVersion().

1075  {
1076  const std::string versionFilePath(fileMgrBasePath_ + "/" + versionFileName);
1077  FILE* versionFile;
1078  if (boost::filesystem::exists(versionFilePath)) {
1079  int32_t oldVersion = readVersionFromDisk(versionFileName);
1080  LOG(INFO) << "Storage version file `" << versionFilePath
1081  << "` already exists, its current version is " << oldVersion;
1082  versionFile = open(versionFilePath);
1083  } else {
1084  versionFile = create(versionFilePath, sizeof(int32_t));
1085  }
1086  write(versionFile, 0, sizeof(int32_t), (int8_t*)&version);
1087  int32_t status = fflush(versionFile);
1088  if (status != 0) {
1089  LOG(FATAL) << "Could not flush version file " << versionFilePath << " to disk";
1090  }
1091 #ifdef __APPLE__
1092  status = fcntl(fileno(epochFile_), 51);
1093 #else
1094  status = omnisci::fsync(fileno(versionFile));
1095 #endif
1096  if (status != 0) {
1097  LOG(FATAL) << "Could not sync version file " << versionFilePath << " to disk";
1098  }
1099  close(versionFile);
1100 }
int32_t readVersionFromDisk(const std::string &versionFileName) const
Definition: FileMgr.cpp:1056
#define LOG(tag)
Definition: Logger.h:194
std::string fileMgrBasePath_
Definition: FileMgr.h:388
FILE * create(const std::string &basePath, const int fileId, const size_t pageSize, const size_t numPages)
Definition: File.cpp:49
size_t write(FILE *f, const size_t offset, const size_t size, const int8_t *buf)
Writes the specified number of bytes to the offset position in file f from buf.
Definition: File.cpp:141
int fsync(int fd)
Definition: omnisci_fs.cpp:60
string version
Definition: setup.in.py:73
FILE * open(int fileId)
Opens/creates the file with the given id; returns NULL on error.
Definition: File.cpp:98
void close(FILE *f)
Closes the file pointed to by the FILE pointer.
Definition: File.cpp:119

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::writeDirtyBuffers ( )
private

Definition at line 1606 of file FileMgr.cpp.

References chunkIndex_, chunkIndexMutex_, and epoch().

Referenced by checkpoint().

1606  {
1607  mapd_unique_lock<mapd_shared_mutex> chunk_index_write_lock(chunkIndexMutex_);
1608  for (auto [key, buf] : chunkIndex_) {
1609  if (buf->isDirty()) {
1610  buf->writeMetadata(epoch());
1611  buf->clearDirtyBits();
1612  }
1613  }
1614 }
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:323
int32_t epoch() const
Definition: FileMgr.h:500
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:402

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::writePageMappingsToStatusFile ( const std::vector< PageMapping > &  page_mappings)

Serializes a page mapping vector to expected status file. Page mapping vector is serialized in the following format: [{page mapping vector size}, {page mapping vector data bytes ...}]

Definition at line 1474 of file FileMgr.cpp.

References CHECK, COPY_PAGES_STATUS, and getFilePath().

Referenced by compactFiles().

1475  {
1476  auto file_path = getFilePath(COPY_PAGES_STATUS);
1477  CHECK(boost::filesystem::exists(file_path));
1478  CHECK(boost::filesystem::is_empty(file_path));
1479  std::ofstream status_file{file_path.string(), std::ios::out | std::ios::binary};
1480  int64_t page_mappings_count = page_mappings.size();
1481  status_file.write(reinterpret_cast<const char*>(&page_mappings_count), sizeof(int64_t));
1482  status_file.write(reinterpret_cast<const char*>(page_mappings.data()),
1483  page_mappings_count * sizeof(PageMapping));
1484  status_file.close();
1485 }
static constexpr char const * COPY_PAGES_STATUS
Definition: FileMgr.h:368
#define CHECK(condition)
Definition: Logger.h:203
boost::filesystem::path getFilePath(const std::string &file_name)
Definition: FileMgr.h:337

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Friends And Related Function Documentation

friend class GlobalFileMgr
friend

Definition at line 149 of file FileMgr.h.

Member Data Documentation

constexpr char const* File_Namespace::FileMgr::COPY_PAGES_STATUS {"pending_data_compaction_0"}
static
constexpr char File_Namespace::FileMgr::DB_META_FILENAME[] = "dbmeta"
static

Definition at line 379 of file FileMgr.h.

Referenced by createTopLevelMetadata().

int32_t File_Namespace::FileMgr::db_version_
protected

the index of the next file id

Definition at line 396 of file FileMgr.h.

Referenced by createTopLevelMetadata().

FILE* File_Namespace::FileMgr::DBMetaFile_ = nullptr
protected

Definition at line 400 of file FileMgr.h.

Referenced by closePhysicalUnlocked(), and ~FileMgr().

constexpr size_t File_Namespace::FileMgr::DEFAULT_NUM_PAGES_PER_DATA_FILE {256}
static

Definition at line 364 of file FileMgr.h.

constexpr size_t File_Namespace::FileMgr::DEFAULT_NUM_PAGES_PER_METADATA_FILE {4096}
static

Definition at line 365 of file FileMgr.h.

size_t File_Namespace::FileMgr::defaultPageSize_
protected
constexpr char const* File_Namespace::FileMgr::DELETE_EMPTY_FILES_STATUS {"pending_data_compaction_2"}
static
Epoch File_Namespace::FileMgr::epoch_
private
constexpr char File_Namespace::FileMgr::EPOCH_FILENAME[] = "epoch_metadata"
static

Definition at line 378 of file FileMgr.h.

Referenced by coreInit(), init(), and migrateEpochFileV0().

FILE* File_Namespace::FileMgr::epochFile_ = nullptr
private
bool File_Namespace::FileMgr::epochIsCheckpointed_ = true
private

Definition at line 507 of file FileMgr.h.

Referenced by writeAndSyncEpochToDisk().

constexpr char File_Namespace::FileMgr::FILE_MGR_VERSION_FILENAME[] = "filemgr_version"
static

Definition at line 380 of file FileMgr.h.

Referenced by init(), migrateEpochFileV0(), and migrateToLatestFileMgrVersion().

PageSizeFileMMap File_Namespace::FileMgr::fileIndex_
protected

A map of files accessible via a file identifier.

Definition at line 392 of file FileMgr.h.

Referenced by clearFileInfos(), createFile(), openExistingFile(), requestFreePage(), requestFreePages(), and sortAndCopyFilePagesForCompaction().

TablePair File_Namespace::FileMgr::fileMgrKey_
private

Global FileMgr.

Definition at line 504 of file FileMgr.h.

Referenced by coreInit(), describeSelf(), and FileMgr().

int32_t File_Namespace::FileMgr::fileMgrVersion_
protected

DB version from dbmeta file, should be compatible with GlobalFileMgr::omnisci_db_version_

Definition at line 398 of file FileMgr.h.

Referenced by init(), and migrateToLatestFileMgrVersion().

std::vector<std::pair<FileInfo*, int32_t> > File_Namespace::FileMgr::free_pages_
protected

Definition at line 406 of file FileMgr.h.

Referenced by free_page(), and freePages().

std::mutex File_Namespace::FileMgr::getPageMutex_
protected

pointer to DB level metadata

Definition at line 401 of file FileMgr.h.

Referenced by requestFreePage(), and requestFreePages().

GlobalFileMgr* File_Namespace::FileMgr::gfm_
private

Definition at line 503 of file FileMgr.h.

Referenced by coreInit(), FileMgr(), getDBConvert(), getDBVersion(), and init().

constexpr int32_t File_Namespace::FileMgr::INVALID_VERSION = -1
static

Definition at line 381 of file FileMgr.h.

Referenced by createTopLevelMetadata(), and migrateToLatestFileMgrVersion().

bool File_Namespace::FileMgr::isFullyInitted_ {false}
protected

Definition at line 407 of file FileMgr.h.

Referenced by getStorageStats(), File_Namespace::CachingFileMgr::init(), and init().

const int32_t File_Namespace::FileMgr::latestFileMgrVersion_ {1}
protected

Definition at line 399 of file FileMgr.h.

Referenced by init(), and migrateToLatestFileMgrVersion().

constexpr char File_Namespace::FileMgr::LEGACY_EPOCH_FILENAME[] = "epoch"
static

Definition at line 377 of file FileMgr.h.

Referenced by migrateEpochFileV0().

int32_t File_Namespace::FileMgr::maxRollbackEpochs_
protected

Definition at line 387 of file FileMgr.h.

Referenced by File_Namespace::CachingFileMgr::CachingFileMgr(), and rollOffOldData().

mapd_shared_mutex File_Namespace::FileMgr::mutex_free_page_
mutableprotected

Definition at line 405 of file FileMgr.h.

Referenced by free_page(), and freePages().

unsigned File_Namespace::FileMgr::nextFileId_
protected
size_t File_Namespace::FileMgr::num_pages_per_data_file_ {DEFAULT_NUM_PAGES_PER_DATA_FILE}
staticprotected

Definition at line 409 of file FileMgr.h.

Referenced by requestFreePage(), requestFreePages(), and setNumPagesPerDataFile().

size_t File_Namespace::FileMgr::num_pages_per_metadata_file_ {DEFAULT_NUM_PAGES_PER_METADATA_FILE}
staticprotected
size_t File_Namespace::FileMgr::num_reader_threads_
protected

Maps page sizes to FileInfo objects.

Definition at line 393 of file FileMgr.h.

Referenced by initializeNumThreads().

constexpr char const* File_Namespace::FileMgr::UPDATE_PAGE_VISIBILITY_STATUS {"pending_data_compaction_1"}
static

The documentation for this class was generated from the following files: