OmniSciDB  5ade3759e0
File_Namespace::FileMgr Class Reference

#include <FileMgr.h>

+ Inheritance diagram for File_Namespace::FileMgr:
+ Collaboration diagram for File_Namespace::FileMgr:

Public Member Functions

 FileMgr (const int deviceId, GlobalFileMgr *gfm, const std::pair< const int, const int > fileMgrKey, const size_t num_reader_threads=0, const int epoch=-1, const size_t defaultPageSize=2097152)
 Constructor. More...
 
 FileMgr (const int deviceId, GlobalFileMgr *gfm, const std::pair< const int, const int > fileMgrKey, const bool initOnly)
 
 FileMgr (GlobalFileMgr *gfm, const size_t defaultPageSize, std::string basePath)
 
 ~FileMgr () override
 Destructor. More...
 
AbstractBuffercreateBuffer (const ChunkKey &key, size_t pageSize=0, const size_t numBytes=0) override
 Creates a chunk with the specified key and page size. More...
 
bool isBufferOnDevice (const ChunkKey &key) override
 
void deleteBuffer (const ChunkKey &key, const bool purge=true) override
 Deletes the chunk with the specified key. More...
 
void deleteBuffersWithPrefix (const ChunkKey &keyPrefix, const bool purge=true) override
 
AbstractBuffergetBuffer (const ChunkKey &key, const size_t numBytes=0) override
 Returns the a pointer to the chunk with the specified key. More...
 
void fetchBuffer (const ChunkKey &key, AbstractBuffer *destBuffer, const size_t numBytes) override
 
AbstractBufferputBuffer (const ChunkKey &key, AbstractBuffer *d, const size_t numBytes=0) override
 Puts the contents of d into the Chunk with the given key. More...
 
AbstractBufferalloc (const size_t numBytes) override
 
void free (AbstractBuffer *buffer) override
 
Page requestFreePage (size_t pagesize, const bool isMetadata)
 
MgrType getMgrType () override
 
std::string getStringMgrType () override
 
std::string printSlabs () override
 
void clearSlabs () override
 
size_t getMaxSize () override
 
size_t getInUseSize () override
 
size_t getAllocated () override
 
bool isAllocationCapped () override
 
FileInfogetFileInfoForFileId (const int fileId)
 
void init (const size_t num_reader_threads)
 
void init (const std::string dataPathToConvertFrom)
 
void copyPage (Page &srcPage, FileMgr *destFileMgr, Page &destPage, const size_t reservedHeaderSize, const size_t numBytes, const size_t offset)
 
void requestFreePages (size_t npages, size_t pagesize, std::vector< Page > &pages, const bool isMetadata)
 Obtains free pages – creates new files if necessary – of the requested size. More...
 
void getChunkMetadataVec (std::vector< std::pair< ChunkKey, ChunkMetadata >> &chunkMetadataVec) override
 
void getChunkMetadataVecForKeyPrefix (std::vector< std::pair< ChunkKey, ChunkMetadata >> &chunkMetadataVec, const ChunkKey &keyPrefix) override
 
void checkpoint () override
 Fsyncs data files, writes out epoch and fsyncs that. More...
 
void checkpoint (const int db_id, const int tb_id) override
 
int epoch ()
 Returns current value of epoch - should be one greater than recorded at last checkpoint. More...
 
size_t getNumReaderThreads ()
 Returns number of threads defined by parameter num-reader-threads which should be used during initial load and consequent read of data. More...
 
FILE * getFileForFileId (const int fileId)
 Returns FILE pointer associated with requested fileId. More...
 
size_t getNumChunks () override
 
int getDBVersion () const
 Index for looking up chunks. More...
 
bool getDBConvert () const
 
void createTopLevelMetadata ()
 
std::string getFileMgrBasePath () const
 
void closeRemovePhysical ()
 
void free_page (std::pair< FileInfo *, int > &&page)
 
const std::pair< const int, const int > get_fileMgrKey () const
 

Public Attributes

ChunkKeyToChunkMap chunkIndex_
 

Private Member Functions

FileInfocreateFile (const size_t pageSize, const size_t numPages)
 Adds a file to the file manager repository. More...
 
FileInfoopenExistingFile (const std::string &path, const int fileId, const size_t pageSize, const size_t numPages, std::vector< HeaderInfo > &headerVec)
 
void createEpochFile (const std::string &epochFileName)
 
void openEpochFile (const std::string &epochFileName)
 
void writeAndSyncEpochToDisk ()
 
void createDBMetaFile (const std::string &DBMetaFileName)
 
bool openDBMetaFile (const std::string &DBMetaFileName)
 
void writeAndSyncDBMetaToDisk ()
 
void setEpoch (int epoch)
 
void processFileFutures (std::vector< std::future< std::vector< HeaderInfo >>> &file_futures, std::vector< HeaderInfo > &headerVec)
 

Private Attributes

GlobalFileMgrgfm_
 
std::pair< const int, const int > fileMgrKey_
 Global FileMgr. More...
 
std::string fileMgrBasePath_
 
std::vector< FileInfo * > files_
 
PageSizeFileMMap fileIndex_
 A vector of files accessible via a file identifier. More...
 
size_t num_reader_threads_
 Maps page sizes to FileInfo objects. More...
 
size_t defaultPageSize_
 number of threads used when loading data More...
 
unsigned nextFileId_
 
int epoch_
 the index of the next file id More...
 
FILE * epochFile_
 the current epoch (time of last checkpoint) More...
 
int db_version_
 
FILE * DBMetaFile_
 
std::mutex getPageMutex_
 pointer to DB level metadata More...
 
mapd_shared_mutex chunkIndexMutex_
 
mapd_shared_mutex files_rw_mutex_
 
mapd_shared_mutex mutex_free_page
 
std::vector< std::pair< FileInfo *, int > > free_pages
 

Friends

class GlobalFileMgr
 

Detailed Description

Definition at line 86 of file FileMgr.h.

Constructor & Destructor Documentation

◆ FileMgr() [1/3]

File_Namespace::FileMgr::FileMgr ( const int  deviceId,
GlobalFileMgr gfm,
const std::pair< const int, const int >  fileMgrKey,
const size_t  num_reader_threads = 0,
const int  epoch = -1,
const size_t  defaultPageSize = 2097152 
)

Constructor.

Definition at line 65 of file FileMgr.cpp.

References init().

71  : AbstractBufferMgr(deviceId)
72  , gfm_(gfm)
73  , fileMgrKey_(fileMgrKey)
74  , defaultPageSize_(defaultPageSize)
75  , nextFileId_(0)
76  , epoch_(epoch) {
77  init(num_reader_threads);
78 }
GlobalFileMgr * gfm_
Definition: FileMgr.h:239
int epoch()
Returns current value of epoch - should be one greater than recorded at last checkpoint.
Definition: FileMgr.h:206
void init(const size_t num_reader_threads)
Definition: FileMgr.cpp:122
int epoch_
the index of the next file id
Definition: FileMgr.h:248
size_t defaultPageSize_
number of threads used when loading data
Definition: FileMgr.h:246
std::pair< const int, const int > fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:240
+ Here is the call graph for this function:

◆ FileMgr() [2/3]

File_Namespace::FileMgr::FileMgr ( const int  deviceId,
GlobalFileMgr gfm,
const std::pair< const int, const int >  fileMgrKey,
const bool  initOnly 
)

Definition at line 81 of file FileMgr.cpp.

References epochFile_, fileMgrBasePath_, fileMgrKey_, files_, File_Namespace::GlobalFileMgr::getBasePath(), gfm_, and to_string().

85  : AbstractBufferMgr(deviceId)
86  , gfm_(gfm)
87  , fileMgrKey_(fileMgrKey)
88  , defaultPageSize_(0)
89  , nextFileId_(0)
90  , epoch_(0) {
91  const std::string fileMgrDirPrefix("table");
92  const std::string FileMgrDirDelim("_");
93  fileMgrBasePath_ = (gfm_->getBasePath() + fileMgrDirPrefix + FileMgrDirDelim +
94  std::to_string(fileMgrKey_.first) + // db_id
95  FileMgrDirDelim + std::to_string(fileMgrKey_.second)); // tb_id
96  epochFile_ = nullptr;
97  files_.clear();
98 }
std::string getBasePath() const
GlobalFileMgr * gfm_
Definition: FileMgr.h:239
std::string fileMgrBasePath_
Definition: FileMgr.h:241
std::string to_string(char const *&&v)
FILE * epochFile_
the current epoch (time of last checkpoint)
Definition: FileMgr.h:249
std::vector< FileInfo * > files_
Definition: FileMgr.h:243
int epoch_
the index of the next file id
Definition: FileMgr.h:248
size_t defaultPageSize_
number of threads used when loading data
Definition: FileMgr.h:246
std::pair< const int, const int > fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:240
+ Here is the call graph for this function:

◆ FileMgr() [3/3]

File_Namespace::FileMgr::FileMgr ( GlobalFileMgr gfm,
const size_t  defaultPageSize,
std::string  basePath 
)

Definition at line 100 of file FileMgr.cpp.

References init().

101  : AbstractBufferMgr(0)
102  , gfm_(gfm)
103  , fileMgrKey_(0, 0)
104  , fileMgrBasePath_(basePath)
105  , defaultPageSize_(defaultPageSize)
106  , nextFileId_(0)
107  , epoch_(-1) {
108  init(basePath);
109 }
GlobalFileMgr * gfm_
Definition: FileMgr.h:239
std::string fileMgrBasePath_
Definition: FileMgr.h:241
void init(const size_t num_reader_threads)
Definition: FileMgr.cpp:122
int epoch_
the index of the next file id
Definition: FileMgr.h:248
size_t defaultPageSize_
number of threads used when loading data
Definition: FileMgr.h:246
std::pair< const int, const int > fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:240
+ Here is the call graph for this function:

◆ ~FileMgr()

File_Namespace::FileMgr::~FileMgr ( )
override

Destructor.

Definition at line 111 of file FileMgr.cpp.

References chunkIndex_, and files_.

111  {
112  // checkpoint();
113  // free memory used by FileInfo objects
114  for (auto chunkIt = chunkIndex_.begin(); chunkIt != chunkIndex_.end(); ++chunkIt) {
115  delete chunkIt->second;
116  }
117  for (auto file_info : files_) {
118  delete file_info;
119  }
120 }
std::vector< FileInfo * > files_
Definition: FileMgr.h:243
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:227

Member Function Documentation

◆ alloc()

AbstractBuffer * File_Namespace::FileMgr::alloc ( const size_t  numBytes = 0)
override

Definition at line 788 of file FileMgr.cpp.

References logger::FATAL, and LOG.

788  {
789  LOG(FATAL) << "Operation not supported";
790  return nullptr; // satisfy return-type warning
791 }
#define LOG(tag)
Definition: Logger.h:182

◆ checkpoint() [1/2]

void File_Namespace::FileMgr::checkpoint ( )
override

Fsyncs data files, writes out epoch and fsyncs that.

Definition at line 575 of file FileMgr.cpp.

References chunkIndex_, chunkIndexMutex_, epoch_, logger::FATAL, files_, files_rw_mutex_, free_page(), free_pages, LOG, mutex_free_page, VLOG, and writeAndSyncEpochToDisk().

Referenced by File_Namespace::GlobalFileMgr::checkpoint().

575  {
576  VLOG(2) << "Checkpointing epoch: " << epoch_;
577  mapd_unique_lock<mapd_shared_mutex> chunkIndexWriteLock(chunkIndexMutex_);
578  for (auto chunkIt = chunkIndex_.begin(); chunkIt != chunkIndex_.end(); ++chunkIt) {
579  /*
580  for (auto vecIt = chunkIt->first.begin(); vecIt != chunkIt->first.end(); ++vecIt) {
581  std::cout << *vecIt << ",";
582  }
583  cout << "Is dirty: " << chunkIt->second->isDirty_ << endl;
584  */
585  if (chunkIt->second->isDirty_) {
586  chunkIt->second->writeMetadata(epoch_);
587  chunkIt->second->clearDirtyBits();
588  }
589  }
590  chunkIndexWriteLock.unlock();
591 
592  mapd_shared_lock<mapd_shared_mutex> read_lock(files_rw_mutex_);
593  for (auto fileIt = files_.begin(); fileIt != files_.end(); ++fileIt) {
594  int status = (*fileIt)->syncToDisk();
595  if (status != 0) {
596  LOG(FATAL) << "Could not sync file to disk";
597  }
598  }
599 
601 
602  mapd_unique_lock<mapd_shared_mutex> freePagesWriteLock(mutex_free_page);
603  for (auto& free_page : free_pages) {
604  free_page.first->freePageDeferred(free_page.second);
605  }
606  free_pages.clear();
607 }
#define LOG(tag)
Definition: Logger.h:182
std::vector< std::pair< FileInfo *, int > > free_pages
Definition: FileMgr.h:259
void free_page(std::pair< FileInfo *, int > &&page)
Definition: FileMgr.cpp:999
std::vector< FileInfo * > files_
Definition: FileMgr.h:243
int epoch_
the index of the next file id
Definition: FileMgr.h:248
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:227
void writeAndSyncEpochToDisk()
Definition: FileMgr.cpp:516
mapd_shared_mutex mutex_free_page
Definition: FileMgr.h:258
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:255
mapd_shared_mutex files_rw_mutex_
Definition: FileMgr.h:256
#define VLOG(n)
Definition: Logger.h:277
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ checkpoint() [2/2]

void File_Namespace::FileMgr::checkpoint ( const int  db_id,
const int  tb_id 
)
inlineoverride

Definition at line 199 of file FileMgr.h.

References logger::FATAL, and LOG.

199  {
200  LOG(FATAL) << "Operation not supported, api checkpoint() should be used instead";
201  }
#define LOG(tag)
Definition: Logger.h:182

◆ clearSlabs()

void File_Namespace::FileMgr::clearSlabs ( )
inlineoverride

Definition at line 150 of file FileMgr.h.

150  { /* noop */
151  }

◆ closeRemovePhysical()

void File_Namespace::FileMgr::closeRemovePhysical ( )

Definition at line 446 of file FileMgr.cpp.

References File_Namespace::close(), epochFile_, files_, getFileMgrBasePath(), and File_Namespace::renameForDelete().

Referenced by File_Namespace::GlobalFileMgr::removeTableRelatedDS().

446  {
447  for (auto file_info : files_) {
448  if (file_info->f) {
449  close(file_info->f);
450  file_info->f = nullptr;
451  }
452  }
453 
454  if (epochFile_) {
455  close(epochFile_);
456  epochFile_ = nullptr;
457  }
458 
459  /* rename for later deletion the directory containing table related data */
461 }
FILE * epochFile_
the current epoch (time of last checkpoint)
Definition: FileMgr.h:249
std::vector< FileInfo * > files_
Definition: FileMgr.h:243
std::string getFileMgrBasePath() const
Definition: FileMgr.h:232
void close(FILE *f)
Closes the file pointed to by the FILE pointer.
Definition: File.cpp:102
void renameForDelete(const std::string directoryName)
Renames a directory to DELETE_ME_<EPOCH>_<oldname>.
Definition: File.cpp:183
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ copyPage()

void File_Namespace::FileMgr::copyPage ( Page srcPage,
FileMgr destFileMgr,
Page destPage,
const size_t  reservedHeaderSize,
const size_t  numBytes,
const size_t  offset 
)

Definition at line 463 of file FileMgr.cpp.

References CHECK, defaultPageSize_, File_Namespace::Page::fileId, getFileInfoForFileId(), File_Namespace::Page::pageNum, File_Namespace::FileInfo::read(), and File_Namespace::FileInfo::write().

Referenced by init().

468  {
469  CHECK(offset + numBytes <= defaultPageSize_);
470  FileInfo* srcFileInfo = getFileInfoForFileId(srcPage.fileId);
471  FileInfo* destFileInfo = destFileMgr->getFileInfoForFileId(destPage.fileId);
472  int8_t* buffer = new int8_t[numBytes];
473 
474  size_t bytesRead = srcFileInfo->read(
475  srcPage.pageNum * defaultPageSize_ + offset + reservedHeaderSize, numBytes, buffer);
476  CHECK(bytesRead == numBytes);
477  size_t bytesWritten = destFileInfo->write(
478  destPage.pageNum * defaultPageSize_ + offset + reservedHeaderSize,
479  numBytes,
480  buffer);
481  CHECK(bytesWritten == numBytes);
482  delete[] buffer;
483 }
FileInfo * getFileInfoForFileId(const int fileId)
Definition: FileMgr.h:157
size_t defaultPageSize_
number of threads used when loading data
Definition: FileMgr.h:246
#define CHECK(condition)
Definition: Logger.h:187
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ createBuffer()

AbstractBuffer * File_Namespace::FileMgr::createBuffer ( const ChunkKey key,
size_t  pageSize = 0,
const size_t  numBytes = 0 
)
override

Creates a chunk with the specified key and page size.

Todo:
Make all accesses to chunkIndex_ thread-safe

Definition at line 609 of file FileMgr.cpp.

References chunkIndex_, chunkIndexMutex_, defaultPageSize_, logger::FATAL, LOG, and showChunk().

Referenced by putBuffer().

611  {
612  size_t actualPageSize = pageSize;
613  if (actualPageSize == 0) {
614  actualPageSize = defaultPageSize_;
615  }
617  // we will do this lazily and not allocate space for the Chunk (i.e.
618  // FileBuffer yet)
619  mapd_unique_lock<mapd_shared_mutex> chunkIndexWriteLock(chunkIndexMutex_);
620 
621  if (chunkIndex_.find(key) != chunkIndex_.end()) {
622  LOG(FATAL) << "Chunk already exists for key: " << showChunk(key);
623  }
624  chunkIndex_[key] = new FileBuffer(this, actualPageSize, key, numBytes);
625  chunkIndexWriteLock.unlock();
626  return (chunkIndex_[key]);
627 }
#define LOG(tag)
Definition: Logger.h:182
std::string showChunk(const ChunkKey &key)
Definition: types.h:37
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:227
size_t defaultPageSize_
number of threads used when loading data
Definition: FileMgr.h:246
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:255
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ createDBMetaFile()

void File_Namespace::FileMgr::createDBMetaFile ( const std::string &  DBMetaFileName)
private

Definition at line 534 of file FileMgr.cpp.

References File_Namespace::create(), DBMetaFile_, logger::FATAL, fileMgrBasePath_, getDBVersion(), LOG, and File_Namespace::write().

Referenced by createTopLevelMetadata().

534  {
535  std::string DBMetaFilePath(fileMgrBasePath_ + "/" + DBMetaFileName);
536  if (boost::filesystem::exists(DBMetaFilePath)) {
537  LOG(FATAL) << "DB metadata file `" << DBMetaFilePath << "` already exists.";
538  }
539  DBMetaFile_ = create(DBMetaFilePath, sizeof(int));
540  int db_ver = getDBVersion();
541  write(DBMetaFile_, 0, sizeof(int), (int8_t*)&db_ver);
542  // LOG(INFO) << "DB metadata file has been created.";
543 }
#define LOG(tag)
Definition: Logger.h:182
int getDBVersion() const
Index for looking up chunks.
Definition: FileMgr.cpp:974
std::string fileMgrBasePath_
Definition: FileMgr.h:241
FILE * create(const std::string &basePath, const int fileId, const size_t pageSize, const size_t numPages)
Definition: File.cpp:35
size_t write(FILE *f, const size_t offset, const size_t size, int8_t *buf)
Writes the specified number of bytes to the offset position in file f from buf.
Definition: File.cpp:121
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ createEpochFile()

void File_Namespace::FileMgr::createEpochFile ( const std::string &  epochFileName)
private

Definition at line 485 of file FileMgr.cpp.

References File_Namespace::create(), epoch_, epochFile_, logger::FATAL, fileMgrBasePath_, LOG, and File_Namespace::write().

Referenced by init().

485  {
486  std::string epochFilePath(fileMgrBasePath_ + "/" + epochFileName);
487  if (boost::filesystem::exists(epochFilePath)) {
488  LOG(FATAL) << "Epoch file `" << epochFilePath << "` already exists";
489  }
490  epochFile_ = create(epochFilePath, sizeof(int));
491  // Write out current epoch to file - which if this
492  // function is being called should be 0
493  write(epochFile_, 0, sizeof(int), (int8_t*)&epoch_);
494  epoch_++;
495 }
#define LOG(tag)
Definition: Logger.h:182
std::string fileMgrBasePath_
Definition: FileMgr.h:241
FILE * create(const std::string &basePath, const int fileId, const size_t pageSize, const size_t numPages)
Definition: File.cpp:35
FILE * epochFile_
the current epoch (time of last checkpoint)
Definition: FileMgr.h:249
int epoch_
the index of the next file id
Definition: FileMgr.h:248
size_t write(FILE *f, const size_t offset, const size_t size, int8_t *buf)
Writes the specified number of bytes to the offset position in file f from buf.
Definition: File.cpp:121
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ createFile()

FileInfo * File_Namespace::FileMgr::createFile ( const size_t  pageSize,
const size_t  numPages 
)
private

Adds a file to the file manager repository.

This method will create a FileInfo object for the file being added, and it will create the corresponding file on physical disk with the indicated number of pages pre-allocated.

A pointer to the FileInfo object is returned, which itself has a file pointer (FILE*) and a file identifier (int fileId).

Parameters
fileNameThe name given to the file in physical storage.
pageSizeThe logical page size for the pages in the file.
numPagesThe number of logical pages to initially allocate for the file.
Returns
FileInfo* A pointer to the FileInfo object of the added file.

Definition at line 889 of file FileMgr.cpp.

References CHECK, File_Namespace::create(), logger::FATAL, fileIndex_, fileMgrBasePath_, files_, files_rw_mutex_, LOG, and nextFileId_.

Referenced by requestFreePage(), and requestFreePages().

889  {
890  // check arguments
891  if (pageSize == 0 || numPages == 0) {
892  LOG(FATAL) << "File creation failed: pageSize and numPages must be greater than 0.";
893  }
894 
895  // create the new file
896  FILE* f = create(fileMgrBasePath_,
897  nextFileId_,
898  pageSize,
899  numPages); // TM: not sure if I like naming scheme here - should be in
900  // separate namespace?
901  CHECK(f);
902 
903  // instantiate a new FileInfo for the newly created file
904  int fileId = nextFileId_++;
905  FileInfo* fInfo =
906  new FileInfo(this, fileId, f, pageSize, numPages, true); // true means init file
907  assert(fInfo);
908 
909  mapd_unique_lock<mapd_shared_mutex> write_lock(files_rw_mutex_);
910  // update file manager data structures
911  files_.push_back(fInfo);
912  fileIndex_.insert(std::pair<size_t, int>(pageSize, fileId));
913 
914  assert(files_.back() == fInfo); // postcondition
915  return fInfo;
916 }
#define LOG(tag)
Definition: Logger.h:182
std::string fileMgrBasePath_
Definition: FileMgr.h:241
FILE * create(const std::string &basePath, const int fileId, const size_t pageSize, const size_t numPages)
Definition: File.cpp:35
std::vector< FileInfo * > files_
Definition: FileMgr.h:243
PageSizeFileMMap fileIndex_
A vector of files accessible via a file identifier.
Definition: FileMgr.h:244
#define CHECK(condition)
Definition: Logger.h:187
mapd_shared_mutex files_rw_mutex_
Definition: FileMgr.h:256
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ createTopLevelMetadata()

void File_Namespace::FileMgr::createTopLevelMetadata ( )

Definition at line 982 of file FileMgr.cpp.

References createDBMetaFile(), DB_META_FILENAME, db_version_, logger::FATAL, getDBVersion(), LOG, and openDBMetaFile().

Referenced by Data_Namespace::DataMgr::createTopLevelMetadata().

982  {
984  if (db_version_ > getDBVersion()) {
985  LOG(FATAL) << "DB forward compatibility is not supported. Version of OmniSci "
986  "software used is older than the version of DB being read: "
987  << db_version_;
988  }
989  } else {
991  }
992 }
#define LOG(tag)
Definition: Logger.h:182
int getDBVersion() const
Index for looking up chunks.
Definition: FileMgr.cpp:974
void createDBMetaFile(const std::string &DBMetaFileName)
Definition: FileMgr.cpp:534
#define DB_META_FILENAME
Definition: FileMgr.cpp:41
bool openDBMetaFile(const std::string &DBMetaFileName)
Definition: FileMgr.cpp:545
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ deleteBuffer()

void File_Namespace::FileMgr::deleteBuffer ( const ChunkKey key,
const bool  purge = true 
)
override

Deletes the chunk with the specified key.

Definition at line 634 of file FileMgr.cpp.

References chunkIndex_, chunkIndexMutex_, logger::FATAL, LOG, and showChunk().

634  {
635  mapd_unique_lock<mapd_shared_mutex> chunkIndexWriteLock(chunkIndexMutex_);
636  auto chunkIt = chunkIndex_.find(key);
637  // ensure the Chunk exists
638  if (chunkIt == chunkIndex_.end()) {
639  LOG(FATAL) << "Chunk does not exist for key: " << showChunk(key);
640  }
641  chunkIndexWriteLock.unlock();
642  // chunkIt->second->writeMetadata(-1); // writes -1 as epoch - signifies deleted
643  if (purge) {
644  chunkIt->second->freePages();
645  }
646  //@todo need a way to represent delete in non purge case
647  delete chunkIt->second;
648  chunkIndex_.erase(chunkIt);
649 }
#define LOG(tag)
Definition: Logger.h:182
std::string showChunk(const ChunkKey &key)
Definition: types.h:37
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:227
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:255
+ Here is the call graph for this function:

◆ deleteBuffersWithPrefix()

void File_Namespace::FileMgr::deleteBuffersWithPrefix ( const ChunkKey keyPrefix,
const bool  purge = true 
)
override

Definition at line 651 of file FileMgr.cpp.

References chunkIndex_, and chunkIndexMutex_.

Referenced by File_Namespace::GlobalFileMgr::deleteBuffersWithPrefix().

651  {
652  mapd_unique_lock<mapd_shared_mutex> chunkIndexWriteLock(chunkIndexMutex_);
653  auto chunkIt = chunkIndex_.lower_bound(keyPrefix);
654  if (chunkIt == chunkIndex_.end()) {
655  return; // should we throw?
656  }
657  while (chunkIt != chunkIndex_.end() &&
658  std::search(chunkIt->first.begin(),
659  chunkIt->first.begin() + keyPrefix.size(),
660  keyPrefix.begin(),
661  keyPrefix.end()) != chunkIt->first.begin() + keyPrefix.size()) {
662  /*
663  cout << "Freeing pages for chunk ";
664  for (auto vecIt = chunkIt->first.begin(); vecIt != chunkIt->first.end(); ++vecIt) {
665  std::cout << *vecIt << ",";
666  }
667  cout << endl;
668  */
669  if (purge) {
670  chunkIt->second->freePages();
671  }
672  //@todo need a way to represent delete in non purge case
673  delete chunkIt->second;
674  chunkIndex_.erase(chunkIt++);
675  }
676 }
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:227
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:255
+ Here is the caller graph for this function:

◆ epoch()

int File_Namespace::FileMgr::epoch ( )
inline

Returns current value of epoch - should be one greater than recorded at last checkpoint.

Definition at line 206 of file FileMgr.h.

Referenced by File_Namespace::FileBuffer::append(), File_Namespace::FileInfo::freePage(), File_Namespace::FileInfo::openExistingFile(), File_Namespace::FileBuffer::reserve(), setEpoch(), and File_Namespace::FileBuffer::write().

206 { return epoch_; }
int epoch_
the index of the next file id
Definition: FileMgr.h:248
+ Here is the caller graph for this function:

◆ fetchBuffer()

void File_Namespace::FileMgr::fetchBuffer ( const ChunkKey key,
AbstractBuffer destBuffer,
const size_t  numBytes 
)
override

Definition at line 687 of file FileMgr.cpp.

References chunkIndex_, chunkIndexMutex_, logger::FATAL, Data_Namespace::AbstractBuffer::getDeviceId(), Data_Namespace::AbstractBuffer::getMemoryPtr(), Data_Namespace::AbstractBuffer::getType(), Data_Namespace::AbstractBuffer::isDirty(), Data_Namespace::AbstractBuffer::isUpdated(), LOG, Data_Namespace::AbstractBuffer::read(), Data_Namespace::AbstractBuffer::reserve(), Data_Namespace::AbstractBuffer::setSize(), showChunk(), Data_Namespace::AbstractBuffer::size(), and Data_Namespace::AbstractBuffer::syncEncoder().

689  {
690  // reads chunk specified by ChunkKey into AbstractBuffer provided by
691  // destBuffer
692  if (destBuffer->isDirty()) {
693  LOG(FATAL)
694  << "Aborting attempt to fetch a chunk marked dirty. Chunk inconsistency for key: "
695  << showChunk(key);
696  }
697  mapd_shared_lock<mapd_shared_mutex> chunkIndexReadLock(chunkIndexMutex_);
698  auto chunkIt = chunkIndex_.find(key);
699  if (chunkIt == chunkIndex_.end()) {
700  LOG(FATAL) << "Chunk does not exist for key: " << showChunk(key);
701  }
702  chunkIndexReadLock.unlock();
703 
704  AbstractBuffer* chunk = chunkIt->second;
705  // ChunkSize is either specified in function call with numBytes or we
706  // just look at pageSize * numPages in FileBuffer
707  size_t chunkSize = numBytes == 0 ? chunk->size() : numBytes;
708  if (numBytes > 0 && numBytes > chunk->size()) {
709  LOG(FATAL) << "Chunk retrieved for key `" << showChunk(key) << "` is smaller ("
710  << chunk->size() << ") than number of bytes requested (" << numBytes
711  << ")";
712  }
713  destBuffer->reserve(chunkSize);
714  // std::cout << "After reserve chunksize: " << chunkSize << std::endl;
715  if (chunk->isUpdated()) {
716  chunk->read(destBuffer->getMemoryPtr(),
717  chunkSize,
718  0,
719  destBuffer->getType(),
720  destBuffer->getDeviceId());
721  } else {
722  chunk->read(destBuffer->getMemoryPtr() + destBuffer->size(),
723  chunkSize - destBuffer->size(),
724  destBuffer->size(),
725  destBuffer->getType(),
726  destBuffer->getDeviceId());
727  }
728  destBuffer->setSize(chunkSize);
729  destBuffer->syncEncoder(chunk);
730 }
#define LOG(tag)
Definition: Logger.h:182
virtual size_t size() const =0
virtual int8_t * getMemoryPtr()=0
virtual MemoryLevel getType() const =0
std::string showChunk(const ChunkKey &key)
Definition: types.h:37
virtual void reserve(size_t numBytes)=0
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:227
An AbstractBuffer is a unit of data management for a data manager.
virtual void read(int8_t *const dst, const size_t numBytes, const size_t offset=0, const MemoryLevel dstBufferType=CPU_LEVEL, const int dstDeviceId=-1)=0
virtual int getDeviceId() const
void setSize(const size_t size)
virtual bool isDirty() const
void syncEncoder(const AbstractBuffer *srcBuffer)
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:255
virtual bool isUpdated() const
+ Here is the call graph for this function:

◆ free()

void File_Namespace::FileMgr::free ( AbstractBuffer buffer)
override

Definition at line 793 of file FileMgr.cpp.

References logger::FATAL, and LOG.

793  {
794  LOG(FATAL) << "Operation not supported";
795 }
#define LOG(tag)
Definition: Logger.h:182

◆ free_page()

void File_Namespace::FileMgr::free_page ( std::pair< FileInfo *, int > &&  page)

Definition at line 999 of file FileMgr.cpp.

References free_pages, and mutex_free_page.

Referenced by checkpoint(), and File_Namespace::FileInfo::freePage().

999  {
1000  std::unique_lock<mapd_shared_mutex> lock(mutex_free_page);
1001  free_pages.push_back(page);
1002 }
std::vector< std::pair< FileInfo *, int > > free_pages
Definition: FileMgr.h:259
mapd_shared_mutex mutex_free_page
Definition: FileMgr.h:258
+ Here is the caller graph for this function:

◆ get_fileMgrKey()

const std::pair<const int, const int> File_Namespace::FileMgr::get_fileMgrKey ( ) const
inline

Definition at line 236 of file FileMgr.h.

Referenced by File_Namespace::FileInfo::openExistingFile().

236 { return fileMgrKey_; }
std::pair< const int, const int > fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:240
+ Here is the caller graph for this function:

◆ getAllocated()

size_t File_Namespace::FileMgr::getAllocated ( )
inlineoverride

Definition at line 154 of file FileMgr.h.

154 { return 0; }

◆ getBuffer()

AbstractBuffer * File_Namespace::FileMgr::getBuffer ( const ChunkKey key,
const size_t  numBytes = 0 
)
override

Returns the a pointer to the chunk with the specified key.

Definition at line 678 of file FileMgr.cpp.

References chunkIndex_, chunkIndexMutex_, logger::FATAL, LOG, and showChunk().

678  {
679  mapd_shared_lock<mapd_shared_mutex> chunkIndexReadLock(chunkIndexMutex_);
680  auto chunkIt = chunkIndex_.find(key);
681  if (chunkIt == chunkIndex_.end()) {
682  LOG(FATAL) << "Chunk does not exist for key: " << showChunk(key);
683  }
684  return chunkIt->second;
685 }
#define LOG(tag)
Definition: Logger.h:182
std::string showChunk(const ChunkKey &key)
Definition: types.h:37
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:227
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:255
+ Here is the call graph for this function:

◆ getChunkMetadataVec()

void File_Namespace::FileMgr::getChunkMetadataVec ( std::vector< std::pair< ChunkKey, ChunkMetadata >> &  chunkMetadataVec)
override

Definition at line 931 of file FileMgr.cpp.

References chunkIndex_, and chunkIndexMutex_.

932  {
933  mapd_unique_lock<mapd_shared_mutex> chunkIndexWriteLock(chunkIndexMutex_);
934  chunkMetadataVec.reserve(chunkIndex_.size());
935  for (auto chunkIt = chunkIndex_.begin(); chunkIt != chunkIndex_.end(); ++chunkIt) {
936  if (chunkIt->second->hasEncoder) {
937  ChunkMetadata chunkMetadata;
938  chunkIt->second->encoder->getMetadata(chunkMetadata);
939  chunkMetadataVec.emplace_back(chunkIt->first, chunkMetadata);
940  }
941  }
942 }
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:227
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:255

◆ getChunkMetadataVecForKeyPrefix()

void File_Namespace::FileMgr::getChunkMetadataVecForKeyPrefix ( std::vector< std::pair< ChunkKey, ChunkMetadata >> &  chunkMetadataVec,
const ChunkKey keyPrefix 
)
override

Definition at line 944 of file FileMgr.cpp.

References chunkIndex_, and chunkIndexMutex_.

946  {
947  mapd_unique_lock<mapd_shared_mutex> chunkIndexWriteLock(
948  chunkIndexMutex_); // is this guarding the right structure? it look slike we oly
949  // read here for chunk
950  auto chunkIt = chunkIndex_.lower_bound(keyPrefix);
951  if (chunkIt == chunkIndex_.end()) {
952  return; // throw?
953  }
954  while (chunkIt != chunkIndex_.end() &&
955  std::search(chunkIt->first.begin(),
956  chunkIt->first.begin() + keyPrefix.size(),
957  keyPrefix.begin(),
958  keyPrefix.end()) != chunkIt->first.begin() + keyPrefix.size()) {
959  /*
960  for (auto vecIt = chunkIt->first.begin(); vecIt != chunkIt->first.end(); ++vecIt) {
961  std::cout << *vecIt << ",";
962  }
963  cout << endl;
964  */
965  if (chunkIt->second->hasEncoder) {
966  ChunkMetadata chunkMetadata;
967  chunkIt->second->encoder->getMetadata(chunkMetadata);
968  chunkMetadataVec.emplace_back(chunkIt->first, chunkMetadata);
969  }
970  chunkIt++;
971  }
972 }
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:227
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:255

◆ getDBConvert()

bool File_Namespace::FileMgr::getDBConvert ( ) const

Definition at line 978 of file FileMgr.cpp.

References File_Namespace::GlobalFileMgr::getDBConvert(), and gfm_.

978  {
979  return gfm_->getDBConvert();
980 }
GlobalFileMgr * gfm_
Definition: FileMgr.h:239
+ Here is the call graph for this function:

◆ getDBVersion()

int File_Namespace::FileMgr::getDBVersion ( ) const

Index for looking up chunks.

Definition at line 974 of file FileMgr.cpp.

References File_Namespace::GlobalFileMgr::getDBVersion(), and gfm_.

Referenced by createDBMetaFile(), createTopLevelMetadata(), and writeAndSyncDBMetaToDisk().

974  {
975  return gfm_->getDBVersion();
976 }
GlobalFileMgr * gfm_
Definition: FileMgr.h:239
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ getFileForFileId()

FILE * File_Namespace::FileMgr::getFileForFileId ( const int  fileId)

Returns FILE pointer associated with requested fileId.

See also
FileBuffer

Definition at line 918 of file FileMgr.cpp.

References files_.

Referenced by File_Namespace::FileBuffer::readMetadata(), and File_Namespace::FileBuffer::writeMetadata().

918  {
919  assert(fileId >= 0);
920  return files_[fileId]->f;
921 }
std::vector< FileInfo * > files_
Definition: FileMgr.h:243
+ Here is the caller graph for this function:

◆ getFileInfoForFileId()

FileInfo* File_Namespace::FileMgr::getFileInfoForFileId ( const int  fileId)
inline

Definition at line 157 of file FileMgr.h.

References logger::init().

Referenced by File_Namespace::FileBuffer::append(), File_Namespace::FileBuffer::copyPage(), copyPage(), File_Namespace::FileBuffer::freePages(), File_Namespace::readForThread(), File_Namespace::FileBuffer::write(), and File_Namespace::FileBuffer::writeHeader().

157 { return files_[fileId]; }
std::vector< FileInfo * > files_
Definition: FileMgr.h:243
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ getFileMgrBasePath()

std::string File_Namespace::FileMgr::getFileMgrBasePath ( ) const
inline

Definition at line 232 of file FileMgr.h.

Referenced by closeRemovePhysical().

232 { return fileMgrBasePath_; }
std::string fileMgrBasePath_
Definition: FileMgr.h:241
+ Here is the caller graph for this function:

◆ getInUseSize()

size_t File_Namespace::FileMgr::getInUseSize ( )
inlineoverride

Definition at line 153 of file FileMgr.h.

153 { return 0; }

◆ getMaxSize()

size_t File_Namespace::FileMgr::getMaxSize ( )
inlineoverride

Definition at line 152 of file FileMgr.h.

152 { return 0; }

◆ getMgrType()

MgrType File_Namespace::FileMgr::getMgrType ( )
inlineoverride

Definition at line 147 of file FileMgr.h.

147 { return FILE_MGR; };

◆ getNumChunks()

size_t File_Namespace::FileMgr::getNumChunks ( )
inlineoverride

Definition at line 223 of file FileMgr.h.

223  {
224  // @todo should be locked - but this is more for testing now
225  return chunkIndex_.size();
226  }
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:227

◆ getNumReaderThreads()

size_t File_Namespace::FileMgr::getNumReaderThreads ( )
inline

Returns number of threads defined by parameter num-reader-threads which should be used during initial load and consequent read of data.

Definition at line 212 of file FileMgr.h.

Referenced by File_Namespace::FileBuffer::read().

212 { return num_reader_threads_; }
size_t num_reader_threads_
Maps page sizes to FileInfo objects.
Definition: FileMgr.h:245
+ Here is the caller graph for this function:

◆ getStringMgrType()

std::string File_Namespace::FileMgr::getStringMgrType ( )
inlineoverride

Definition at line 148 of file FileMgr.h.

148 { return ToString(FILE_MGR); }

◆ init() [1/2]

void File_Namespace::FileMgr::init ( const size_t  num_reader_threads)

Definition at line 122 of file FileMgr.cpp.

References chunkIndex_, createEpochFile(), epoch_, EPOCH_FILENAME, logger::FATAL, anonymous_namespace{StringDictionary.cpp}::file_size(), fileMgrBasePath_, fileMgrKey_, File_Namespace::fileSize(), File_Namespace::GlobalFileMgr::getBasePath(), gfm_, File_Namespace::headerCompare(), logger::INFO, LOG, MAPD_FILE_EXT, nextFileId_, num_reader_threads_, openEpochFile(), openExistingFile(), processFileFutures(), timer_start(), timer_stop(), to_string(), and VLOG.

Referenced by FileMgr().

122  {
123  // if epoch = -1 this means open from epoch file
124  const std::string fileMgrDirPrefix("table");
125  const std::string FileMgrDirDelim("_");
126  fileMgrBasePath_ = (gfm_->getBasePath() + fileMgrDirPrefix + FileMgrDirDelim +
127  std::to_string(fileMgrKey_.first) + // db_id
128  FileMgrDirDelim + std::to_string(fileMgrKey_.second)); // tb_id
129  boost::filesystem::path path(fileMgrBasePath_);
130  if (boost::filesystem::exists(path)) {
131  if (!boost::filesystem::is_directory(path)) {
132  LOG(FATAL) << "Specified path '" << fileMgrBasePath_
133  << "' for table data is not a directory.";
134  }
135  if (epoch_ != -1) { // if opening at previous epoch
136  int epochCopy = epoch_;
138  epoch_ = epochCopy;
139  } else {
141  }
142 
143  auto clock_begin = timer_start();
144 
145  boost::filesystem::directory_iterator
146  endItr; // default construction yields past-the-end
147  int maxFileId = -1;
148  int fileCount = 0;
149  int threadCount = std::thread::hardware_concurrency();
150  std::vector<HeaderInfo> headerVec;
151  std::vector<std::future<std::vector<HeaderInfo>>> file_futures;
152  for (boost::filesystem::directory_iterator fileIt(path); fileIt != endItr; ++fileIt) {
153  if (boost::filesystem::is_regular_file(fileIt->status())) {
154  // note that boost::filesystem leaves preceding dot on
155  // extension - hence MAPD_FILE_EXT is ".mapd"
156  std::string extension(fileIt->path().extension().string());
157 
158  if (extension == MAPD_FILE_EXT) {
159  std::string fileStem(fileIt->path().stem().string());
160  // remove trailing dot if any
161  if (fileStem.size() > 0 && fileStem.back() == '.') {
162  fileStem = fileStem.substr(0, fileStem.size() - 1);
163  }
164  size_t dotPos = fileStem.find_last_of("."); // should only be one
165  if (dotPos == std::string::npos) {
166  LOG(FATAL) << "File `" << fileIt->path()
167  << "` does not carry page size information in the filename.";
168  }
169  int fileId = boost::lexical_cast<int>(fileStem.substr(0, dotPos));
170  if (fileId > maxFileId) {
171  maxFileId = fileId;
172  }
173  size_t pageSize =
174  boost::lexical_cast<size_t>(fileStem.substr(dotPos + 1, fileStem.size()));
175  std::string filePath(fileIt->path().string());
176  size_t fileSize = boost::filesystem::file_size(filePath);
177  assert(fileSize % pageSize == 0); // should be no partial pages
178  size_t numPages = fileSize / pageSize;
179 
180  VLOG(4) << "File id: " << fileId << " Page size: " << pageSize
181  << " Num pages: " << numPages;
182 
183  file_futures.emplace_back(std::async(
184  std::launch::async, [filePath, fileId, pageSize, numPages, this] {
185  std::vector<HeaderInfo> tempHeaderVec;
186  openExistingFile(filePath, fileId, pageSize, numPages, tempHeaderVec);
187  return tempHeaderVec;
188  }));
189  fileCount++;
190  if (fileCount % threadCount == 0) {
191  processFileFutures(file_futures, headerVec);
192  }
193  }
194  }
195  }
196 
197  if (file_futures.size() > 0) {
198  processFileFutures(file_futures, headerVec);
199  }
200  int64_t queue_time_ms = timer_stop(clock_begin);
201 
202  LOG(INFO) << "Completed Reading table's file metadata, Elapsed time : "
203  << queue_time_ms << "ms Epoch: " << epoch_ << " files read: " << fileCount
204  << " table location: '" << fileMgrBasePath_ << "'";
205 
206  /* Sort headerVec so that all HeaderInfos
207  * from a chunk will be grouped together
208  * and in order of increasing PageId
209  * - Version Epoch */
210 
211  std::sort(headerVec.begin(), headerVec.end(), headerCompare);
212 
213  /* Goal of next section is to find sequences in the
214  * sorted headerVec of the same ChunkId, which we
215  * can then initiate a FileBuffer with */
216 
217  VLOG(4) << "Number of Headers in Vector: " << headerVec.size();
218  if (headerVec.size() > 0) {
219  ChunkKey lastChunkKey = headerVec.begin()->chunkKey;
220  auto startIt = headerVec.begin();
221 
222  for (auto headerIt = headerVec.begin() + 1; headerIt != headerVec.end();
223  ++headerIt) {
224  // for (auto chunkIt = headerIt->chunkKey.begin(); chunkIt !=
225  // headerIt->chunkKey.end(); ++chunkIt) {
226  // std::cout << *chunkIt << " ";
227  //}
228 
229  if (headerIt->chunkKey != lastChunkKey) {
230  chunkIndex_[lastChunkKey] =
231  new FileBuffer(this, /*pageSize,*/ lastChunkKey, startIt, headerIt);
232  /*
233  if (startIt->versionEpoch != -1) {
234  cout << "not skipping bc version != -1" << endl;
235  // -1 means that chunk was deleted
236  // lets not read it in
237  chunkIndex_[lastChunkKey] = new FileBuffer
238  (this,/lastChunkKey,startIt,headerIt);
239 
240  }
241  else {
242  cout << "Skipping bc version == -1" << endl;
243  }
244  */
245  lastChunkKey = headerIt->chunkKey;
246  startIt = headerIt;
247  }
248  }
249  // now need to insert last Chunk
250  // size_t pageSize = files_[startIt->page.fileId]->pageSize;
251  // cout << "Inserting last chunk" << endl;
252  // if (startIt->versionEpoch != -1) {
253  chunkIndex_[lastChunkKey] =
254  new FileBuffer(this, /*pageSize,*/ lastChunkKey, startIt, headerVec.end());
255  //}
256  }
257  nextFileId_ = maxFileId + 1;
258  // std::cout << "next file id: " << nextFileId_ << std::endl;
259  } else {
260  if (!boost::filesystem::create_directory(path)) {
261  LOG(FATAL) << "Could not create data directory: " << path;
262  }
264  }
265 
266  /* define number of reader threads to be used */
267  size_t num_hardware_based_threads =
268  std::thread::hardware_concurrency(); // # of threads is based on # of cores on the
269  // host
270  if (num_reader_threads == 0) { // # of threads has not been defined by user
271  num_reader_threads_ = num_hardware_based_threads;
272  } else {
273  if (num_reader_threads > num_hardware_based_threads) {
274  num_reader_threads_ = num_hardware_based_threads;
275  } else {
276  num_reader_threads_ = num_reader_threads;
277  }
278  }
279 }
std::string getBasePath() const
void createEpochFile(const std::string &epochFileName)
Definition: FileMgr.cpp:485
void openEpochFile(const std::string &epochFileName)
Definition: FileMgr.cpp:497
#define LOG(tag)
Definition: Logger.h:182
#define MAPD_FILE_EXT
Definition: File.h:26
TypeR::rep timer_stop(Type clock_begin)
Definition: measure.h:46
GlobalFileMgr * gfm_
Definition: FileMgr.h:239
std::string fileMgrBasePath_
Definition: FileMgr.h:241
bool headerCompare(const HeaderInfo &firstElem, const HeaderInfo &secondElem)
Definition: FileMgr.cpp:47
std::string to_string(char const *&&v)
void processFileFutures(std::vector< std::future< std::vector< HeaderInfo >>> &file_futures, std::vector< HeaderInfo > &headerVec)
Definition: FileMgr.cpp:281
int epoch_
the index of the next file id
Definition: FileMgr.h:248
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:227
size_t num_reader_threads_
Maps page sizes to FileInfo objects.
Definition: FileMgr.h:245
size_t fileSize(FILE *f)
Returns the size of the specified file.
Definition: File.cpp:171
std::vector< int > ChunkKey
Definition: types.h:35
#define EPOCH_FILENAME
Definition: FileMgr.cpp:40
std::pair< const int, const int > fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:240
#define VLOG(n)
Definition: Logger.h:277
Type timer_start()
Definition: measure.h:40
FileInfo * openExistingFile(const std::string &path, const int fileId, const size_t pageSize, const size_t numPages, std::vector< HeaderInfo > &headerVec)
Definition: FileMgr.cpp:870
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ init() [2/2]

void File_Namespace::FileMgr::init ( const std::string  dataPathToConvertFrom)

Definition at line 295 of file FileMgr.cpp.

References chunkIndex_, copyPage(), epoch_, EPOCH_FILENAME, File_Namespace::MultiPage::epochs, logger::FATAL, anonymous_namespace{StringDictionary.cpp}::file_size(), File_Namespace::fileSize(), File_Namespace::GlobalFileMgr::getFileMgr(), File_Namespace::FileBuffer::getMultiPage(), gfm_, File_Namespace::headerCompare(), LOG, MAPD_FILE_EXT, File_Namespace::FileBuffer::multiPages_, nextFileId_, openEpochFile(), openExistingFile(), File_Namespace::FileBuffer::pageDataSize(), File_Namespace::FileBuffer::pageSize(), processFileFutures(), requestFreePage(), File_Namespace::FileBuffer::reservedHeaderSize(), Data_Namespace::AbstractBuffer::setDirty(), Data_Namespace::AbstractBuffer::setSize(), File_Namespace::FileBuffer::size(), Data_Namespace::AbstractBuffer::syncEncoder(), and File_Namespace::FileBuffer::writeHeader().

295  {
296  int converted_data_epoch = 0;
297  boost::filesystem::path path(dataPathToConvertFrom);
298  if (boost::filesystem::exists(path)) {
299  if (!boost::filesystem::is_directory(path)) {
300  LOG(FATAL) << "Specified path `" << path << "` is not a directory.";
301  }
302 
303  if (epoch_ != -1) { // if opening at previous epoch
304  int epochCopy = epoch_;
306  epoch_ = epochCopy;
307  } else {
309  }
310 
311  boost::filesystem::directory_iterator
312  endItr; // default construction yields past-the-end
313  int maxFileId = -1;
314  int fileCount = 0;
315  int threadCount = std::thread::hardware_concurrency();
316  std::vector<HeaderInfo> headerVec;
317  std::vector<std::future<std::vector<HeaderInfo>>> file_futures;
318  for (boost::filesystem::directory_iterator fileIt(path); fileIt != endItr; ++fileIt) {
319  if (boost::filesystem::is_regular_file(fileIt->status())) {
320  // note that boost::filesystem leaves preceding dot on
321  // extension - hence MAPD_FILE_EXT is ".mapd"
322  std::string extension(fileIt->path().extension().string());
323 
324  if (extension == MAPD_FILE_EXT) {
325  std::string fileStem(fileIt->path().stem().string());
326  // remove trailing dot if any
327  if (fileStem.size() > 0 && fileStem.back() == '.') {
328  fileStem = fileStem.substr(0, fileStem.size() - 1);
329  }
330  size_t dotPos = fileStem.find_last_of("."); // should only be one
331  if (dotPos == std::string::npos) {
332  LOG(FATAL) << "File `" + fileIt->path().string() +
333  "` does not carry page size information in the filename.";
334  }
335  int fileId = boost::lexical_cast<int>(fileStem.substr(0, dotPos));
336  if (fileId > maxFileId) {
337  maxFileId = fileId;
338  }
339  size_t pageSize =
340  boost::lexical_cast<size_t>(fileStem.substr(dotPos + 1, fileStem.size()));
341  std::string filePath(fileIt->path().string());
342  size_t fileSize = boost::filesystem::file_size(filePath);
343  assert(fileSize % pageSize == 0); // should be no partial pages
344  size_t numPages = fileSize / pageSize;
345 
346  file_futures.emplace_back(std::async(
347  std::launch::async, [filePath, fileId, pageSize, numPages, this] {
348  std::vector<HeaderInfo> tempHeaderVec;
349  openExistingFile(filePath, fileId, pageSize, numPages, tempHeaderVec);
350  return tempHeaderVec;
351  }));
352  fileCount++;
353  if (fileCount % threadCount) {
354  processFileFutures(file_futures, headerVec);
355  }
356  }
357  }
358  }
359 
360  if (file_futures.size() > 0) {
361  processFileFutures(file_futures, headerVec);
362  }
363 
364  /* Sort headerVec so that all HeaderInfos
365  * from a chunk will be grouped together
366  * and in order of increasing PageId
367  * - Version Epoch */
368 
369  std::sort(headerVec.begin(), headerVec.end(), headerCompare);
370 
371  /* Goal of next section is to find sequences in the
372  * sorted headerVec of the same ChunkId, which we
373  * can then initiate a FileBuffer with */
374 
375  if (headerVec.size() > 0) {
376  ChunkKey lastChunkKey = headerVec.begin()->chunkKey;
377  auto startIt = headerVec.begin();
378 
379  for (auto headerIt = headerVec.begin() + 1; headerIt != headerVec.end();
380  ++headerIt) {
381  if (headerIt->chunkKey != lastChunkKey) {
382  FileMgr* c_fm_ = gfm_->getFileMgr(lastChunkKey);
383  FileBuffer* srcBuf = new FileBuffer(this, lastChunkKey, startIt, headerIt);
384  chunkIndex_[lastChunkKey] = srcBuf;
385  FileBuffer* destBuf = new FileBuffer(c_fm_, srcBuf->pageSize(), lastChunkKey);
386  c_fm_->chunkIndex_[lastChunkKey] = destBuf;
387  destBuf->syncEncoder(srcBuf);
388  destBuf->setSize(srcBuf->size());
389  destBuf->setDirty(); // this needs to be set to force writing out metadata
390  // files from "checkpoint()" call
391 
392  size_t totalNumPages = srcBuf->getMultiPage().size();
393  for (size_t pageNum = 0; pageNum < totalNumPages; pageNum++) {
394  Page srcPage = srcBuf->getMultiPage()[pageNum].current();
395  Page destPage = c_fm_->requestFreePage(
396  srcBuf->pageSize(),
397  false); // may modify and use api "FileBuffer::addNewMultiPage" instead
398  MultiPage multiPage(srcBuf->pageSize());
399  multiPage.epochs.push_back(converted_data_epoch);
400  multiPage.pageVersions.push_back(destPage);
401  destBuf->multiPages_.push_back(multiPage);
402  size_t reservedHeaderSize = srcBuf->reservedHeaderSize();
403  copyPage(
404  srcPage, c_fm_, destPage, reservedHeaderSize, srcBuf->pageDataSize(), 0);
405  destBuf->writeHeader(destPage, pageNum, converted_data_epoch, false);
406  }
407  lastChunkKey = headerIt->chunkKey;
408  startIt = headerIt;
409  }
410  }
411 
412  // now need to insert last Chunk
413  FileMgr* c_fm_ = gfm_->getFileMgr(lastChunkKey);
414  FileBuffer* srcBuf = new FileBuffer(this, lastChunkKey, startIt, headerVec.end());
415  chunkIndex_[lastChunkKey] = srcBuf;
416  FileBuffer* destBuf = new FileBuffer(c_fm_, srcBuf->pageSize(), lastChunkKey);
417  c_fm_->chunkIndex_[lastChunkKey] = destBuf;
418  destBuf->syncEncoder(srcBuf);
419  destBuf->setSize(srcBuf->size());
420  destBuf->setDirty(); // this needs to be set to write out metadata file from the
421  // "checkpoint()" call
422 
423  size_t totalNumPages = srcBuf->getMultiPage().size();
424  for (size_t pageNum = 0; pageNum < totalNumPages; pageNum++) {
425  Page srcPage = srcBuf->getMultiPage()[pageNum].current();
426  Page destPage = c_fm_->requestFreePage(
427  srcBuf->pageSize(),
428  false); // may modify and use api "FileBuffer::addNewMultiPage" instead
429  MultiPage multiPage(srcBuf->pageSize());
430  multiPage.epochs.push_back(converted_data_epoch);
431  multiPage.pageVersions.push_back(destPage);
432  destBuf->multiPages_.push_back(multiPage);
433  size_t reservedHeaderSize = srcBuf->reservedHeaderSize();
434  copyPage(srcPage, c_fm_, destPage, reservedHeaderSize, srcBuf->pageDataSize(), 0);
435  destBuf->writeHeader(destPage, pageNum, converted_data_epoch, false);
436  }
437  }
438  nextFileId_ = maxFileId + 1;
439  } else {
440  if (!boost::filesystem::create_directory(path)) {
441  LOG(FATAL) << "Specified path does not exist: " << path;
442  }
443  }
444 }
void openEpochFile(const std::string &epochFileName)
Definition: FileMgr.cpp:497
#define LOG(tag)
Definition: Logger.h:182
FileMgr * getFileMgr(const int db_id, const int tb_id)
void copyPage(Page &srcPage, FileMgr *destFileMgr, Page &destPage, const size_t reservedHeaderSize, const size_t numBytes, const size_t offset)
Definition: FileMgr.cpp:463
#define MAPD_FILE_EXT
Definition: File.h:26
GlobalFileMgr * gfm_
Definition: FileMgr.h:239
bool headerCompare(const HeaderInfo &firstElem, const HeaderInfo &secondElem)
Definition: FileMgr.cpp:47
FileMgr(const int deviceId, GlobalFileMgr *gfm, const std::pair< const int, const int > fileMgrKey, const size_t num_reader_threads=0, const int epoch=-1, const size_t defaultPageSize=2097152)
Constructor.
Definition: FileMgr.cpp:65
void processFileFutures(std::vector< std::future< std::vector< HeaderInfo >>> &file_futures, std::vector< HeaderInfo > &headerVec)
Definition: FileMgr.cpp:281
int epoch_
the index of the next file id
Definition: FileMgr.h:248
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:227
size_t fileSize(FILE *f)
Returns the size of the specified file.
Definition: File.cpp:171
std::vector< int > ChunkKey
Definition: types.h:35
#define EPOCH_FILENAME
Definition: FileMgr.cpp:40
FileInfo * openExistingFile(const std::string &path, const int fileId, const size_t pageSize, const size_t numPages, std::vector< HeaderInfo > &headerVec)
Definition: FileMgr.cpp:870
+ Here is the call graph for this function:

◆ isAllocationCapped()

bool File_Namespace::FileMgr::isAllocationCapped ( )
inlineoverride

Definition at line 155 of file FileMgr.h.

155 { return false; }

◆ isBufferOnDevice()

bool File_Namespace::FileMgr::isBufferOnDevice ( const ChunkKey key)
override

Definition at line 629 of file FileMgr.cpp.

References chunkIndex_, and chunkIndexMutex_.

629  {
630  mapd_shared_lock<mapd_shared_mutex> chunkIndexReadLock(chunkIndexMutex_);
631  return chunkIndex_.find(key) != chunkIndex_.end();
632 }
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:227
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:255

◆ openDBMetaFile()

bool File_Namespace::FileMgr::openDBMetaFile ( const std::string &  DBMetaFileName)
private

Definition at line 545 of file FileMgr.cpp.

References db_version_, DBMetaFile_, anonymous_namespace{StringDictionary.cpp}::file_size(), fileMgrBasePath_, File_Namespace::open(), and File_Namespace::read().

Referenced by createTopLevelMetadata().

545  {
546  std::string DBMetaFilePath(fileMgrBasePath_ + "/" + DBMetaFileName);
547 
548  if (!boost::filesystem::exists(DBMetaFilePath)) {
549  // LOG(INFO) << "DB metadata file does not exist, one will be created.";
550  return false;
551  }
552  if (!boost::filesystem::is_regular_file(DBMetaFilePath)) {
553  // LOG(INFO) << "DB metadata file is not a regular file, one will be created.";
554  return false;
555  }
556  if (boost::filesystem::file_size(DBMetaFilePath) < 4) {
557  // LOG(INFO) << "DB metadata file is not sized properly, one will be created.";
558  return false;
559  }
560  DBMetaFile_ = open(DBMetaFilePath);
561  read(DBMetaFile_, 0, sizeof(int), (int8_t*)&db_version_);
562 
563  return true;
564 }
std::string fileMgrBasePath_
Definition: FileMgr.h:241
size_t read(FILE *f, const size_t offset, const size_t size, int8_t *buf)
Reads the specified number of bytes from the offset position in file f into buf.
Definition: File.cpp:113
FILE * open(int fileId)
Opens/creates the file with the given id; returns NULL on error.
Definition: File.cpp:83
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ openEpochFile()

void File_Namespace::FileMgr::openEpochFile ( const std::string &  epochFileName)
private

Definition at line 497 of file FileMgr.cpp.

References epoch_, epochFile_, logger::FATAL, anonymous_namespace{StringDictionary.cpp}::file_size(), fileMgrBasePath_, LOG, File_Namespace::open(), and File_Namespace::read().

Referenced by init().

497  {
498  std::string epochFilePath(fileMgrBasePath_ + "/" + epochFileName);
499  if (!boost::filesystem::exists(epochFilePath)) {
500  LOG(FATAL) << "Epoch file `" << epochFilePath << "` does not exist";
501  }
502  if (!boost::filesystem::is_regular_file(epochFilePath)) {
503  LOG(FATAL) << "Epoch file `" << epochFilePath << "` is not a regular file";
504  }
505  if (boost::filesystem::file_size(epochFilePath) < 4) {
506  LOG(FATAL) << "Epoch file `" << epochFilePath
507  << "` is not sized properly (current size: "
508  << boost::filesystem::file_size(epochFilePath) << ", expected size: 4)";
509  }
510  epochFile_ = open(epochFilePath);
511  read(epochFile_, 0, sizeof(int), (int8_t*)&epoch_);
512  // std::cout << "Epoch after open file: " << epoch_ << std::endl;
513  epoch_++; // we are in new epoch from last checkpoint
514 }
#define LOG(tag)
Definition: Logger.h:182
std::string fileMgrBasePath_
Definition: FileMgr.h:241
FILE * epochFile_
the current epoch (time of last checkpoint)
Definition: FileMgr.h:249
size_t read(FILE *f, const size_t offset, const size_t size, int8_t *buf)
Reads the specified number of bytes from the offset position in file f into buf.
Definition: File.cpp:113
int epoch_
the index of the next file id
Definition: FileMgr.h:248
FILE * open(int fileId)
Opens/creates the file with the given id; returns NULL on error.
Definition: File.cpp:83
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ openExistingFile()

FileInfo * File_Namespace::FileMgr::openExistingFile ( const std::string &  path,
const int  fileId,
const size_t  pageSize,
const size_t  numPages,
std::vector< HeaderInfo > &  headerVec 
)
private

Definition at line 870 of file FileMgr.cpp.

References epoch_, fileIndex_, files_, files_rw_mutex_, File_Namespace::open(), and File_Namespace::FileInfo::openExistingFile().

Referenced by init().

874  {
875  FILE* f = open(path);
876  FileInfo* fInfo = new FileInfo(
877  this, fileId, f, pageSize, numPages, false); // false means don't init file
878 
879  fInfo->openExistingFile(headerVec, epoch_);
880  mapd_unique_lock<mapd_shared_mutex> write_lock(files_rw_mutex_);
881  if (fileId >= static_cast<int>(files_.size())) {
882  files_.resize(fileId + 1);
883  }
884  files_[fileId] = fInfo;
885  fileIndex_.insert(std::pair<size_t, int>(pageSize, fileId));
886  return fInfo;
887 }
std::vector< FileInfo * > files_
Definition: FileMgr.h:243
int epoch_
the index of the next file id
Definition: FileMgr.h:248
PageSizeFileMMap fileIndex_
A vector of files accessible via a file identifier.
Definition: FileMgr.h:244
FILE * open(int fileId)
Opens/creates the file with the given id; returns NULL on error.
Definition: File.cpp:83
mapd_shared_mutex files_rw_mutex_
Definition: FileMgr.h:256
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ printSlabs()

std::string File_Namespace::FileMgr::printSlabs ( )
inlineoverride

Definition at line 149 of file FileMgr.h.

149 { return "Not Implemented"; }

◆ processFileFutures()

void File_Namespace::FileMgr::processFileFutures ( std::vector< std::future< std::vector< HeaderInfo >>> &  file_futures,
std::vector< HeaderInfo > &  headerVec 
)
private

Definition at line 281 of file FileMgr.cpp.

Referenced by init().

283  {
284  for (auto& file_future : file_futures) {
285  file_future.wait();
286  }
287  // concatenate the vectors after thread completes
288  for (auto& file_future : file_futures) {
289  auto tempHeaderVec = file_future.get();
290  headerVec.insert(headerVec.end(), tempHeaderVec.begin(), tempHeaderVec.end());
291  }
292  file_futures.clear();
293 }
+ Here is the caller graph for this function:

◆ putBuffer()

AbstractBuffer * File_Namespace::FileMgr::putBuffer ( const ChunkKey key,
AbstractBuffer d,
const size_t  numBytes = 0 
)
override

Puts the contents of d into the Chunk with the given key.

Parameters
key- Unique identifier for a Chunk.
d- An object representing the source data for the Chunk.
Returns
AbstractBuffer*

Definition at line 732 of file FileMgr.cpp.

References Data_Namespace::AbstractBuffer::append(), chunkIndex_, chunkIndexMutex_, Data_Namespace::AbstractBuffer::clearDirtyBits(), createBuffer(), defaultPageSize_, logger::FATAL, Data_Namespace::AbstractBuffer::getDeviceId(), Data_Namespace::AbstractBuffer::getMemoryPtr(), Data_Namespace::AbstractBuffer::getType(), Data_Namespace::AbstractBuffer::isAppended(), Data_Namespace::AbstractBuffer::isDirty(), Data_Namespace::AbstractBuffer::isUpdated(), LOG, Data_Namespace::AbstractBuffer::setSize(), showChunk(), Data_Namespace::AbstractBuffer::size(), Data_Namespace::AbstractBuffer::syncEncoder(), and Data_Namespace::AbstractBuffer::write().

734  {
735  // obtain a pointer to the Chunk
736  mapd_unique_lock<mapd_shared_mutex> chunkIndexWriteLock(chunkIndexMutex_);
737  auto chunkIt = chunkIndex_.find(key);
738  AbstractBuffer* chunk;
739  if (chunkIt == chunkIndex_.end()) {
740  chunk = createBuffer(key, defaultPageSize_);
741  } else {
742  chunk = chunkIt->second;
743  }
744  chunkIndexWriteLock.unlock();
745  size_t oldChunkSize = chunk->size();
746  // write the buffer's data to the Chunk
747  // size_t newChunkSize = numBytes == 0 ? srcBuffer->size() : numBytes;
748  size_t newChunkSize = numBytes == 0 ? srcBuffer->size() : numBytes;
749  if (chunk->isDirty()) {
750  // multiple appends are allowed,
751  // but only single update is allowed
752  if (srcBuffer->isUpdated() && chunk->isUpdated()) {
753  LOG(FATAL) << "Aborting attempt to write a chunk marked dirty. Chunk inconsistency "
754  "for key: "
755  << showChunk(key);
756  }
757  }
758  if (srcBuffer->isUpdated()) {
759  // chunk size is not changed when fixed rows are updated or are marked as deleted.
760  // but when rows are vacuumed or varlen rows are updated (new rows are appended),
761  // chunk size will change. For vacuum, checkpoint should sync size from cpu to disk.
762  // For varlen update, it takes another route via fragmenter using disk-level buffer.
763  if (0 == numBytes && !chunk->isDirty()) {
764  chunk->setSize(newChunkSize);
765  }
766  //@todo use dirty flags to only flush pages of chunk that need to
767  // be flushed
768  chunk->write((int8_t*)srcBuffer->getMemoryPtr(),
769  newChunkSize,
770  0,
771  srcBuffer->getType(),
772  srcBuffer->getDeviceId());
773  } else if (srcBuffer->isAppended()) {
774  assert(oldChunkSize < newChunkSize);
775  chunk->append((int8_t*)srcBuffer->getMemoryPtr() + oldChunkSize,
776  newChunkSize - oldChunkSize,
777  srcBuffer->getType(),
778  srcBuffer->getDeviceId());
779  }
780  // chunk->clearDirtyBits(); // Hack: because write and append will set dirty bits
781  //@todo commenting out line above will make sure this metadata is set
782  // but will trigger error on fetch chunk
783  srcBuffer->clearDirtyBits();
784  chunk->syncEncoder(srcBuffer);
785  return chunk;
786 }
virtual void write(int8_t *src, const size_t numBytes, const size_t offset=0, const MemoryLevel srcBufferType=CPU_LEVEL, const int srcDeviceId=-1)=0
#define LOG(tag)
Definition: Logger.h:182
virtual size_t size() const =0
std::string showChunk(const ChunkKey &key)
Definition: types.h:37
AbstractBuffer * createBuffer(const ChunkKey &key, size_t pageSize=0, const size_t numBytes=0) override
Creates a chunk with the specified key and page size.
Definition: FileMgr.cpp:609
virtual void append(int8_t *src, const size_t numBytes, const MemoryLevel srcBufferType=CPU_LEVEL, const int deviceId=-1)=0
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:227
An AbstractBuffer is a unit of data management for a data manager.
size_t defaultPageSize_
number of threads used when loading data
Definition: FileMgr.h:246
void setSize(const size_t size)
virtual bool isDirty() const
void syncEncoder(const AbstractBuffer *srcBuffer)
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:255
virtual bool isUpdated() const
+ Here is the call graph for this function:

◆ requestFreePage()

Page File_Namespace::FileMgr::requestFreePage ( size_t  pagesize,
const bool  isMetadata 
)

Definition at line 801 of file FileMgr.cpp.

References createFile(), File_Namespace::FileInfo::fileId, fileIndex_, files_, File_Namespace::FileInfo::getFreePage(), getPageMutex_, MAX_FILE_N_METADATA_PAGES, and MAX_FILE_N_PAGES.

Referenced by File_Namespace::FileBuffer::addNewMultiPage(), init(), File_Namespace::FileBuffer::write(), and File_Namespace::FileBuffer::writeMetadata().

801  {
802  std::lock_guard<std::mutex> lock(getPageMutex_);
803 
804  auto candidateFiles = fileIndex_.equal_range(pageSize);
805  int pageNum = -1;
806  for (auto fileIt = candidateFiles.first; fileIt != candidateFiles.second; ++fileIt) {
807  FileInfo* fileInfo = files_[fileIt->second];
808  pageNum = fileInfo->getFreePage();
809  if (pageNum != -1) {
810  return (Page(fileInfo->fileId, pageNum));
811  }
812  }
813  // if here then we need to add a file
814  FileInfo* fileInfo;
815  if (isMetadata) {
816  fileInfo = createFile(pageSize, MAX_FILE_N_METADATA_PAGES);
817  } else {
818  fileInfo = createFile(pageSize, MAX_FILE_N_PAGES);
819  }
820  pageNum = fileInfo->getFreePage();
821  assert(pageNum != -1);
822  return (Page(fileInfo->fileId, pageNum));
823 }
std::mutex getPageMutex_
pointer to DB level metadata
Definition: FileMgr.h:254
std::vector< FileInfo * > files_
Definition: FileMgr.h:243
#define MAX_FILE_N_METADATA_PAGES
Definition: File.h:28
PageSizeFileMMap fileIndex_
A vector of files accessible via a file identifier.
Definition: FileMgr.h:244
#define MAX_FILE_N_PAGES
Definition: File.h:27
FileInfo * createFile(const size_t pageSize, const size_t numPages)
Adds a file to the file manager repository.
Definition: FileMgr.cpp:889
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ requestFreePages()

void File_Namespace::FileMgr::requestFreePages ( size_t  npages,
size_t  pagesize,
std::vector< Page > &  pages,
const bool  isMetadata 
)

Obtains free pages – creates new files if necessary – of the requested size.

Given a page size and number of pages, this method updates the vector "pages" to include free pages of the requested size. These pages are immediately removed from the free list of the affected file(s). If there are not enough pages available among current files, new files are created and their pages are included in the vector.

Parameters
npagesThe number of free pages requested
pagesizeThe size of each requested page
pagesA vector containing the free pages obtained by this method

Definition at line 825 of file FileMgr.cpp.

References createFile(), File_Namespace::FileInfo::fileId, fileIndex_, files_, File_Namespace::FileInfo::getFreePage(), getPageMutex_, MAX_FILE_N_METADATA_PAGES, and MAX_FILE_N_PAGES.

828  {
829  // not used currently
830  // @todo add method to FileInfo to get more than one page
831  std::lock_guard<std::mutex> lock(getPageMutex_);
832  auto candidateFiles = fileIndex_.equal_range(pageSize);
833  size_t numPagesNeeded = numPagesRequested;
834  for (auto fileIt = candidateFiles.first; fileIt != candidateFiles.second; ++fileIt) {
835  FileInfo* fileInfo = files_[fileIt->second];
836  int pageNum;
837  do {
838  pageNum = fileInfo->getFreePage();
839  if (pageNum != -1) {
840  pages.emplace_back(fileInfo->fileId, pageNum);
841  numPagesNeeded--;
842  }
843  } while (pageNum != -1 && numPagesNeeded > 0);
844  if (numPagesNeeded == 0) {
845  break;
846  }
847  }
848  while (numPagesNeeded > 0) {
849  FileInfo* fileInfo;
850  if (isMetadata) {
851  fileInfo = createFile(pageSize, MAX_FILE_N_METADATA_PAGES);
852  } else {
853  fileInfo = createFile(pageSize, MAX_FILE_N_PAGES);
854  }
855  int pageNum;
856  do {
857  pageNum = fileInfo->getFreePage();
858  if (pageNum != -1) {
859  pages.emplace_back(fileInfo->fileId, pageNum);
860  numPagesNeeded--;
861  }
862  } while (pageNum != -1 && numPagesNeeded > 0);
863  if (numPagesNeeded == 0) {
864  break;
865  }
866  }
867  assert(pages.size() == numPagesRequested);
868 }
std::mutex getPageMutex_
pointer to DB level metadata
Definition: FileMgr.h:254
std::vector< FileInfo * > files_
Definition: FileMgr.h:243
#define MAX_FILE_N_METADATA_PAGES
Definition: File.h:28
PageSizeFileMMap fileIndex_
A vector of files accessible via a file identifier.
Definition: FileMgr.h:244
#define MAX_FILE_N_PAGES
Definition: File.h:27
FileInfo * createFile(const size_t pageSize, const size_t numPages)
Adds a file to the file manager repository.
Definition: FileMgr.cpp:889
+ Here is the call graph for this function:

◆ setEpoch()

void File_Namespace::FileMgr::setEpoch ( int  epoch)
private

Definition at line 994 of file FileMgr.cpp.

References epoch(), epoch_, and writeAndSyncEpochToDisk().

Referenced by File_Namespace::GlobalFileMgr::setTableEpoch().

994  {
995  epoch_ = epoch;
997 }
int epoch()
Returns current value of epoch - should be one greater than recorded at last checkpoint.
Definition: FileMgr.h:206
int epoch_
the index of the next file id
Definition: FileMgr.h:248
void writeAndSyncEpochToDisk()
Definition: FileMgr.cpp:516
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ writeAndSyncDBMetaToDisk()

void File_Namespace::FileMgr::writeAndSyncDBMetaToDisk ( )
private

Definition at line 566 of file FileMgr.cpp.

References DBMetaFile_, logger::FATAL, getDBVersion(), LOG, and File_Namespace::write().

566  {
567  int db_ver = getDBVersion();
568  write(DBMetaFile_, 0, sizeof(int), (int8_t*)&db_ver);
569  int status = fflush(DBMetaFile_);
570  if (status != 0) {
571  LOG(FATAL) << "Could not sync DB metadata file to disk";
572  }
573 }
#define LOG(tag)
Definition: Logger.h:182
int getDBVersion() const
Index for looking up chunks.
Definition: FileMgr.cpp:974
size_t write(FILE *f, const size_t offset, const size_t size, int8_t *buf)
Writes the specified number of bytes to the offset position in file f from buf.
Definition: File.cpp:121
+ Here is the call graph for this function:

◆ writeAndSyncEpochToDisk()

void File_Namespace::FileMgr::writeAndSyncEpochToDisk ( )
private

Definition at line 516 of file FileMgr.cpp.

References epoch_, epochFile_, logger::FATAL, LOG, and File_Namespace::write().

Referenced by checkpoint(), and setEpoch().

516  {
517  write(epochFile_, 0, sizeof(int), (int8_t*)&epoch_);
518  int status = fflush(epochFile_);
519  // int status = fcntl(fileno(epochFile_),51);
520  if (status != 0) {
521  LOG(FATAL) << "Could not flush epoch file to disk";
522  }
523 #ifdef __APPLE__
524  status = fcntl(fileno(epochFile_), 51);
525 #else
526  status = fsync(fileno(epochFile_));
527 #endif
528  if (status != 0) {
529  LOG(FATAL) << "Could not sync epoch file to disk";
530  }
531  ++epoch_;
532 }
#define LOG(tag)
Definition: Logger.h:182
FILE * epochFile_
the current epoch (time of last checkpoint)
Definition: FileMgr.h:249
int epoch_
the index of the next file id
Definition: FileMgr.h:248
size_t write(FILE *f, const size_t offset, const size_t size, int8_t *buf)
Writes the specified number of bytes to the offset position in file f from buf.
Definition: File.cpp:121
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

Friends And Related Function Documentation

◆ GlobalFileMgr

friend class GlobalFileMgr
friend

Definition at line 87 of file FileMgr.h.

Member Data Documentation

◆ chunkIndex_

◆ chunkIndexMutex_

◆ db_version_

int File_Namespace::FileMgr::db_version_
private

Definition at line 250 of file FileMgr.h.

Referenced by createTopLevelMetadata(), and openDBMetaFile().

◆ DBMetaFile_

FILE* File_Namespace::FileMgr::DBMetaFile_
private

DB version from dbmeta file, should be compatible with GlobalFileMgr::mapd_db_version_

Definition at line 252 of file FileMgr.h.

Referenced by createDBMetaFile(), openDBMetaFile(), and writeAndSyncDBMetaToDisk().

◆ defaultPageSize_

size_t File_Namespace::FileMgr::defaultPageSize_
private

number of threads used when loading data

Definition at line 246 of file FileMgr.h.

Referenced by copyPage(), createBuffer(), and putBuffer().

◆ epoch_

int File_Namespace::FileMgr::epoch_
private

◆ epochFile_

FILE* File_Namespace::FileMgr::epochFile_
private

the current epoch (time of last checkpoint)

Definition at line 249 of file FileMgr.h.

Referenced by closeRemovePhysical(), createEpochFile(), FileMgr(), openEpochFile(), and writeAndSyncEpochToDisk().

◆ fileIndex_

PageSizeFileMMap File_Namespace::FileMgr::fileIndex_
private

A vector of files accessible via a file identifier.

Definition at line 244 of file FileMgr.h.

Referenced by createFile(), openExistingFile(), requestFreePage(), and requestFreePages().

◆ fileMgrBasePath_

std::string File_Namespace::FileMgr::fileMgrBasePath_
private

◆ fileMgrKey_

std::pair<const int, const int> File_Namespace::FileMgr::fileMgrKey_
private

Global FileMgr.

Definition at line 240 of file FileMgr.h.

Referenced by FileMgr(), and init().

◆ files_

std::vector<FileInfo*> File_Namespace::FileMgr::files_
private

The OS file system path containing files related to this FileMgr

Definition at line 243 of file FileMgr.h.

Referenced by checkpoint(), closeRemovePhysical(), createFile(), FileMgr(), getFileForFileId(), openExistingFile(), requestFreePage(), requestFreePages(), and ~FileMgr().

◆ files_rw_mutex_

mapd_shared_mutex File_Namespace::FileMgr::files_rw_mutex_
mutableprivate

Definition at line 256 of file FileMgr.h.

Referenced by checkpoint(), createFile(), and openExistingFile().

◆ free_pages

std::vector<std::pair<FileInfo*, int> > File_Namespace::FileMgr::free_pages
private

Definition at line 259 of file FileMgr.h.

Referenced by checkpoint(), and free_page().

◆ getPageMutex_

std::mutex File_Namespace::FileMgr::getPageMutex_
private

pointer to DB level metadata

Definition at line 254 of file FileMgr.h.

Referenced by requestFreePage(), and requestFreePages().

◆ gfm_

GlobalFileMgr* File_Namespace::FileMgr::gfm_
private

Definition at line 239 of file FileMgr.h.

Referenced by FileMgr(), getDBConvert(), getDBVersion(), and init().

◆ mutex_free_page

mapd_shared_mutex File_Namespace::FileMgr::mutex_free_page
mutableprivate

Definition at line 258 of file FileMgr.h.

Referenced by checkpoint(), and free_page().

◆ nextFileId_

unsigned File_Namespace::FileMgr::nextFileId_
private

Definition at line 247 of file FileMgr.h.

Referenced by createFile(), and init().

◆ num_reader_threads_

size_t File_Namespace::FileMgr::num_reader_threads_
private

Maps page sizes to FileInfo objects.

Definition at line 245 of file FileMgr.h.

Referenced by init().


The documentation for this class was generated from the following files: