OmniSciDB  8a228a1076
File_Namespace::FileMgr Class Reference

#include <FileMgr.h>

+ Inheritance diagram for File_Namespace::FileMgr:
+ Collaboration diagram for File_Namespace::FileMgr:

Public Member Functions

 FileMgr (const int deviceId, GlobalFileMgr *gfm, const std::pair< const int, const int > fileMgrKey, const size_t num_reader_threads=0, const int epoch=-1, const size_t defaultPageSize=2097152)
 Constructor. More...
 
 FileMgr (const int deviceId, GlobalFileMgr *gfm, const std::pair< const int, const int > fileMgrKey, const bool initOnly)
 
 FileMgr (GlobalFileMgr *gfm, const size_t defaultPageSize, std::string basePath)
 
 ~FileMgr () override
 Destructor. More...
 
AbstractBuffercreateBuffer (const ChunkKey &key, size_t pageSize=0, const size_t numBytes=0) override
 Creates a chunk with the specified key and page size. More...
 
bool isBufferOnDevice (const ChunkKey &key) override
 
void deleteBuffer (const ChunkKey &key, const bool purge=true) override
 Deletes the chunk with the specified key. More...
 
void deleteBuffersWithPrefix (const ChunkKey &keyPrefix, const bool purge=true) override
 
AbstractBuffergetBuffer (const ChunkKey &key, const size_t numBytes=0) override
 Returns the a pointer to the chunk with the specified key. More...
 
void fetchBuffer (const ChunkKey &key, AbstractBuffer *destBuffer, const size_t numBytes) override
 
AbstractBufferputBuffer (const ChunkKey &key, AbstractBuffer *d, const size_t numBytes=0) override
 Puts the contents of d into the Chunk with the given key. More...
 
AbstractBufferalloc (const size_t numBytes) override
 
void free (AbstractBuffer *buffer) override
 
Page requestFreePage (size_t pagesize, const bool isMetadata)
 
MgrType getMgrType () override
 
std::string getStringMgrType () override
 
std::string printSlabs () override
 
void clearSlabs () override
 
size_t getMaxSize () override
 
size_t getInUseSize () override
 
size_t getAllocated () override
 
bool isAllocationCapped () override
 
FileInfogetFileInfoForFileId (const int fileId)
 
void init (const size_t num_reader_threads)
 
void init (const std::string dataPathToConvertFrom)
 
void copyPage (Page &srcPage, FileMgr *destFileMgr, Page &destPage, const size_t reservedHeaderSize, const size_t numBytes, const size_t offset)
 
void requestFreePages (size_t npages, size_t pagesize, std::vector< Page > &pages, const bool isMetadata)
 Obtains free pages – creates new files if necessary – of the requested size. More...
 
void getChunkMetadataVec (ChunkMetadataVector &chunkMetadataVec) override
 
void getChunkMetadataVecForKeyPrefix (ChunkMetadataVector &chunkMetadataVec, const ChunkKey &keyPrefix) override
 
void checkpoint () override
 Fsyncs data files, writes out epoch and fsyncs that. More...
 
void checkpoint (const int db_id, const int tb_id) override
 
int epoch ()
 Returns current value of epoch - should be one greater than recorded at last checkpoint. More...
 
size_t getNumReaderThreads ()
 Returns number of threads defined by parameter num-reader-threads which should be used during initial load and consequent read of data. More...
 
FILE * getFileForFileId (const int fileId)
 Returns FILE pointer associated with requested fileId. More...
 
size_t getNumChunks () override
 
int getDBVersion () const
 Index for looking up chunks. More...
 
bool getDBConvert () const
 
void createTopLevelMetadata ()
 
std::string getFileMgrBasePath () const
 
void closeRemovePhysical ()
 
void removeTableRelatedDS (const int db_id, const int table_id) override
 
void free_page (std::pair< FileInfo *, int > &&page)
 
const std::pair< const int, const int > get_fileMgrKey () const
 

Public Attributes

ChunkKeyToChunkMap chunkIndex_
 

Private Member Functions

FileInfocreateFile (const size_t pageSize, const size_t numPages)
 Adds a file to the file manager repository. More...
 
FileInfoopenExistingFile (const std::string &path, const int fileId, const size_t pageSize, const size_t numPages, std::vector< HeaderInfo > &headerVec)
 
void createEpochFile (const std::string &epochFileName)
 
void openEpochFile (const std::string &epochFileName)
 
void writeAndSyncEpochToDisk ()
 
void createDBMetaFile (const std::string &DBMetaFileName)
 
bool openDBMetaFile (const std::string &DBMetaFileName)
 
void writeAndSyncDBMetaToDisk ()
 
void setEpoch (int epoch)
 
void processFileFutures (std::vector< std::future< std::vector< HeaderInfo >>> &file_futures, std::vector< HeaderInfo > &headerVec)
 
AbstractBuffercreateBufferUnlocked (const ChunkKey &key, size_t pageSize=0, const size_t numBytes=0)
 

Private Attributes

GlobalFileMgrgfm_
 
std::pair< const int, const int > fileMgrKey_
 Global FileMgr. More...
 
std::string fileMgrBasePath_
 
std::vector< FileInfo * > files_
 
PageSizeFileMMap fileIndex_
 A vector of files accessible via a file identifier. More...
 
size_t num_reader_threads_
 Maps page sizes to FileInfo objects. More...
 
size_t defaultPageSize_
 number of threads used when loading data More...
 
unsigned nextFileId_
 
int epoch_
 the index of the next file id More...
 
FILE * epochFile_ = nullptr
 the current epoch (time of last checkpoint) More...
 
int db_version_
 
FILE * DBMetaFile_ = nullptr
 
std::mutex getPageMutex_
 pointer to DB level metadata More...
 
mapd_shared_mutex chunkIndexMutex_
 
mapd_shared_mutex files_rw_mutex_
 
mapd_shared_mutex mutex_free_page
 
std::vector< std::pair< FileInfo *, int > > free_pages
 

Friends

class GlobalFileMgr
 

Detailed Description

Definition at line 85 of file FileMgr.h.

Constructor & Destructor Documentation

◆ FileMgr() [1/3]

File_Namespace::FileMgr::FileMgr ( const int  deviceId,
GlobalFileMgr gfm,
const std::pair< const int, const int >  fileMgrKey,
const size_t  num_reader_threads = 0,
const int  epoch = -1,
const size_t  defaultPageSize = 2097152 
)

Constructor.

Definition at line 68 of file FileMgr.cpp.

References init().

74  : AbstractBufferMgr(deviceId)
75  , gfm_(gfm)
76  , fileMgrKey_(fileMgrKey)
77  , defaultPageSize_(defaultPageSize)
78  , nextFileId_(0)
79  , epoch_(epoch) {
80  init(num_reader_threads);
81 }
GlobalFileMgr * gfm_
Definition: FileMgr.h:237
int epoch()
Returns current value of epoch - should be one greater than recorded at last checkpoint.
Definition: FileMgr.h:202
void init(const size_t num_reader_threads)
Definition: FileMgr.cpp:135
int epoch_
the index of the next file id
Definition: FileMgr.h:246
size_t defaultPageSize_
number of threads used when loading data
Definition: FileMgr.h:244
std::pair< const int, const int > fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:238
+ Here is the call graph for this function:

◆ FileMgr() [2/3]

File_Namespace::FileMgr::FileMgr ( const int  deviceId,
GlobalFileMgr gfm,
const std::pair< const int, const int >  fileMgrKey,
const bool  initOnly 
)

Definition at line 84 of file FileMgr.cpp.

References epochFile_, fileMgrBasePath_, fileMgrKey_, files_, File_Namespace::GlobalFileMgr::getBasePath(), gfm_, and to_string().

88  : AbstractBufferMgr(deviceId)
89  , gfm_(gfm)
90  , fileMgrKey_(fileMgrKey)
91  , defaultPageSize_(0)
92  , nextFileId_(0)
93  , epoch_(0) {
94  const std::string fileMgrDirPrefix("table");
95  const std::string FileMgrDirDelim("_");
96  fileMgrBasePath_ = (gfm_->getBasePath() + fileMgrDirPrefix + FileMgrDirDelim +
97  std::to_string(fileMgrKey_.first) + // db_id
98  FileMgrDirDelim + std::to_string(fileMgrKey_.second)); // tb_id
99  epochFile_ = nullptr;
100  files_.clear();
101 }
std::string getBasePath() const
GlobalFileMgr * gfm_
Definition: FileMgr.h:237
std::string fileMgrBasePath_
Definition: FileMgr.h:239
std::string to_string(char const *&&v)
FILE * epochFile_
the current epoch (time of last checkpoint)
Definition: FileMgr.h:247
std::vector< FileInfo * > files_
Definition: FileMgr.h:241
int epoch_
the index of the next file id
Definition: FileMgr.h:246
size_t defaultPageSize_
number of threads used when loading data
Definition: FileMgr.h:244
std::pair< const int, const int > fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:238
+ Here is the call graph for this function:

◆ FileMgr() [3/3]

File_Namespace::FileMgr::FileMgr ( GlobalFileMgr gfm,
const size_t  defaultPageSize,
std::string  basePath 
)

Definition at line 103 of file FileMgr.cpp.

References init().

104  : AbstractBufferMgr(0)
105  , gfm_(gfm)
106  , fileMgrKey_(0, 0)
107  , fileMgrBasePath_(basePath)
108  , defaultPageSize_(defaultPageSize)
109  , nextFileId_(0)
110  , epoch_(-1) {
111  init(basePath);
112 }
GlobalFileMgr * gfm_
Definition: FileMgr.h:237
std::string fileMgrBasePath_
Definition: FileMgr.h:239
void init(const size_t num_reader_threads)
Definition: FileMgr.cpp:135
int epoch_
the index of the next file id
Definition: FileMgr.h:246
size_t defaultPageSize_
number of threads used when loading data
Definition: FileMgr.h:244
std::pair< const int, const int > fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:238
+ Here is the call graph for this function:

◆ ~FileMgr()

File_Namespace::FileMgr::~FileMgr ( )
override

Destructor.

Definition at line 114 of file FileMgr.cpp.

References chunkIndex_, File_Namespace::close(), DBMetaFile_, epochFile_, and files_.

114  {
115  // checkpoint();
116  // free memory used by FileInfo objects
117  for (auto chunkIt = chunkIndex_.begin(); chunkIt != chunkIndex_.end(); ++chunkIt) {
118  delete chunkIt->second;
119  }
120  for (auto file_info : files_) {
121  delete file_info;
122  }
123 
124  if (epochFile_) {
125  close(epochFile_);
126  epochFile_ = nullptr;
127  }
128 
129  if (DBMetaFile_) {
131  DBMetaFile_ = nullptr;
132  }
133 }
FILE * epochFile_
the current epoch (time of last checkpoint)
Definition: FileMgr.h:247
std::vector< FileInfo * > files_
Definition: FileMgr.h:241
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:223
void close(FILE *f)
Closes the file pointed to by the FILE pointer.
Definition: File.cpp:106
+ Here is the call graph for this function:

Member Function Documentation

◆ alloc()

AbstractBuffer * File_Namespace::FileMgr::alloc ( const size_t  numBytes = 0)
override

Definition at line 814 of file FileMgr.cpp.

References logger::FATAL, and LOG.

814  {
815  LOG(FATAL) << "Operation not supported";
816  return nullptr; // satisfy return-type warning
817 }
#define LOG(tag)
Definition: Logger.h:188

◆ checkpoint() [1/2]

void File_Namespace::FileMgr::checkpoint ( )
override

Fsyncs data files, writes out epoch and fsyncs that.

Definition at line 591 of file FileMgr.cpp.

References chunkIndex_, chunkIndexMutex_, epoch_, logger::FATAL, files_, files_rw_mutex_, free_page(), free_pages, LOG, mutex_free_page, VLOG, and writeAndSyncEpochToDisk().

591  {
592  VLOG(2) << "Checkpointing epoch: " << epoch_;
593  mapd_unique_lock<mapd_shared_mutex> chunkIndexWriteLock(chunkIndexMutex_);
594  for (auto chunkIt = chunkIndex_.begin(); chunkIt != chunkIndex_.end(); ++chunkIt) {
595  /*
596  for (auto vecIt = chunkIt->first.begin(); vecIt != chunkIt->first.end(); ++vecIt) {
597  std::cout << *vecIt << ",";
598  }
599  cout << "Is dirty: " << chunkIt->second->isDirty_ << endl;
600  */
601  if (chunkIt->second->is_dirty_) {
602  chunkIt->second->writeMetadata(epoch_);
603  chunkIt->second->clearDirtyBits();
604  }
605  }
606  chunkIndexWriteLock.unlock();
607 
608  mapd_shared_lock<mapd_shared_mutex> read_lock(files_rw_mutex_);
609  for (auto fileIt = files_.begin(); fileIt != files_.end(); ++fileIt) {
610  int status = (*fileIt)->syncToDisk();
611  if (status != 0) {
612  LOG(FATAL) << "Could not sync file to disk";
613  }
614  }
615 
617 
618  mapd_unique_lock<mapd_shared_mutex> freePagesWriteLock(mutex_free_page);
619  for (auto& free_page : free_pages) {
620  free_page.first->freePageDeferred(free_page.second);
621  }
622  free_pages.clear();
623 }
#define LOG(tag)
Definition: Logger.h:188
std::vector< std::pair< FileInfo *, int > > free_pages
Definition: FileMgr.h:257
void free_page(std::pair< FileInfo *, int > &&page)
Definition: FileMgr.cpp:1019
std::vector< FileInfo * > files_
Definition: FileMgr.h:241
int epoch_
the index of the next file id
Definition: FileMgr.h:246
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:223
void writeAndSyncEpochToDisk()
Definition: FileMgr.cpp:532
mapd_shared_lock< mapd_shared_mutex > read_lock
mapd_shared_mutex mutex_free_page
Definition: FileMgr.h:256
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:253
mapd_shared_mutex files_rw_mutex_
Definition: FileMgr.h:254
#define VLOG(n)
Definition: Logger.h:291
+ Here is the call graph for this function:

◆ checkpoint() [2/2]

void File_Namespace::FileMgr::checkpoint ( const int  db_id,
const int  tb_id 
)
inlineoverride

Definition at line 195 of file FileMgr.h.

References logger::FATAL, and LOG.

195  {
196  LOG(FATAL) << "Operation not supported, api checkpoint() should be used instead";
197  }
#define LOG(tag)
Definition: Logger.h:188

◆ clearSlabs()

void File_Namespace::FileMgr::clearSlabs ( )
inlineoverride

Definition at line 148 of file FileMgr.h.

148  { /* noop */
149  }

◆ closeRemovePhysical()

void File_Namespace::FileMgr::closeRemovePhysical ( )

Definition at line 462 of file FileMgr.cpp.

References File_Namespace::close(), epochFile_, files_, getFileMgrBasePath(), and File_Namespace::renameForDelete().

Referenced by File_Namespace::GlobalFileMgr::removeTableRelatedDS().

462  {
463  for (auto file_info : files_) {
464  if (file_info->f) {
465  close(file_info->f);
466  file_info->f = nullptr;
467  }
468  }
469 
470  if (epochFile_) {
471  close(epochFile_);
472  epochFile_ = nullptr;
473  }
474 
475  /* rename for later deletion the directory containing table related data */
477 }
FILE * epochFile_
the current epoch (time of last checkpoint)
Definition: FileMgr.h:247
std::vector< FileInfo * > files_
Definition: FileMgr.h:241
std::string getFileMgrBasePath() const
Definition: FileMgr.h:228
void close(FILE *f)
Closes the file pointed to by the FILE pointer.
Definition: File.cpp:106
void renameForDelete(const std::string directoryName)
Renames a directory to DELETE_ME_<EPOCH>_<oldname>.
Definition: File.cpp:187
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ copyPage()

void File_Namespace::FileMgr::copyPage ( Page srcPage,
FileMgr destFileMgr,
Page destPage,
const size_t  reservedHeaderSize,
const size_t  numBytes,
const size_t  offset 
)

Definition at line 479 of file FileMgr.cpp.

References CHECK, checked_malloc(), defaultPageSize_, File_Namespace::Page::fileId, free(), getFileInfoForFileId(), File_Namespace::Page::pageNum, File_Namespace::FileInfo::read(), and File_Namespace::FileInfo::write().

Referenced by init().

484  {
485  CHECK(offset + numBytes <= defaultPageSize_);
486  FileInfo* srcFileInfo = getFileInfoForFileId(srcPage.fileId);
487  FileInfo* destFileInfo = destFileMgr->getFileInfoForFileId(destPage.fileId);
488  int8_t* buffer = reinterpret_cast<int8_t*>(checked_malloc(numBytes));
489 
490  size_t bytesRead = srcFileInfo->read(
491  srcPage.pageNum * defaultPageSize_ + offset + reservedHeaderSize, numBytes, buffer);
492  CHECK(bytesRead == numBytes);
493  size_t bytesWritten = destFileInfo->write(
494  destPage.pageNum * defaultPageSize_ + offset + reservedHeaderSize,
495  numBytes,
496  buffer);
497  CHECK(bytesWritten == numBytes);
498  ::free(buffer);
499 }
FileInfo * getFileInfoForFileId(const int fileId)
Definition: FileMgr.h:155
void free(AbstractBuffer *buffer) override
Definition: FileMgr.cpp:819
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:44
size_t defaultPageSize_
number of threads used when loading data
Definition: FileMgr.h:244
#define CHECK(condition)
Definition: Logger.h:197
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ createBuffer()

AbstractBuffer * File_Namespace::FileMgr::createBuffer ( const ChunkKey key,
size_t  pageSize = 0,
const size_t  numBytes = 0 
)
override

Creates a chunk with the specified key and page size.

Definition at line 625 of file FileMgr.cpp.

References chunkIndexMutex_, and createBufferUnlocked().

627  {
628  mapd_unique_lock<mapd_shared_mutex> chunkIndexWriteLock(chunkIndexMutex_);
629  return createBufferUnlocked(key, pageSize, numBytes);
630 }
AbstractBuffer * createBufferUnlocked(const ChunkKey &key, size_t pageSize=0, const size_t numBytes=0)
Definition: FileMgr.cpp:635
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:253
+ Here is the call graph for this function:

◆ createBufferUnlocked()

AbstractBuffer * File_Namespace::FileMgr::createBufferUnlocked ( const ChunkKey key,
size_t  pageSize = 0,
const size_t  numBytes = 0 
)
private
Todo:
Make all accesses to chunkIndex_ thread-safe

Definition at line 635 of file FileMgr.cpp.

References chunkIndex_, defaultPageSize_, logger::FATAL, LOG, and showChunk().

Referenced by createBuffer(), and putBuffer().

637  {
638  size_t actualPageSize = pageSize;
639  if (actualPageSize == 0) {
640  actualPageSize = defaultPageSize_;
641  }
643  // we will do this lazily and not allocate space for the Chunk (i.e.
644  // FileBuffer yet)
645 
646  if (chunkIndex_.find(key) != chunkIndex_.end()) {
647  LOG(FATAL) << "Chunk already exists for key: " << showChunk(key);
648  }
649  chunkIndex_[key] = new FileBuffer(this, actualPageSize, key, numBytes);
650  return (chunkIndex_[key]);
651 }
#define LOG(tag)
Definition: Logger.h:188
std::string showChunk(const ChunkKey &key)
Definition: types.h:62
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:223
size_t defaultPageSize_
number of threads used when loading data
Definition: FileMgr.h:244
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ createDBMetaFile()

void File_Namespace::FileMgr::createDBMetaFile ( const std::string &  DBMetaFileName)
private

Definition at line 550 of file FileMgr.cpp.

References File_Namespace::create(), DBMetaFile_, logger::FATAL, fileMgrBasePath_, getDBVersion(), LOG, and File_Namespace::write().

Referenced by createTopLevelMetadata().

550  {
551  std::string DBMetaFilePath(fileMgrBasePath_ + "/" + DBMetaFileName);
552  if (boost::filesystem::exists(DBMetaFilePath)) {
553  LOG(FATAL) << "DB metadata file `" << DBMetaFilePath << "` already exists.";
554  }
555  DBMetaFile_ = create(DBMetaFilePath, sizeof(int));
556  int db_ver = getDBVersion();
557  write(DBMetaFile_, 0, sizeof(int), (int8_t*)&db_ver);
558  // LOG(INFO) << "DB metadata file has been created.";
559 }
#define LOG(tag)
Definition: Logger.h:188
int getDBVersion() const
Index for looking up chunks.
Definition: FileMgr.cpp:994
std::string fileMgrBasePath_
Definition: FileMgr.h:239
FILE * create(const std::string &basePath, const int fileId, const size_t pageSize, const size_t numPages)
Definition: File.cpp:39
size_t write(FILE *f, const size_t offset, const size_t size, int8_t *buf)
Writes the specified number of bytes to the offset position in file f from buf.
Definition: File.cpp:125
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ createEpochFile()

void File_Namespace::FileMgr::createEpochFile ( const std::string &  epochFileName)
private

Definition at line 501 of file FileMgr.cpp.

References File_Namespace::create(), epoch_, epochFile_, logger::FATAL, fileMgrBasePath_, LOG, and File_Namespace::write().

Referenced by init().

501  {
502  std::string epochFilePath(fileMgrBasePath_ + "/" + epochFileName);
503  if (boost::filesystem::exists(epochFilePath)) {
504  LOG(FATAL) << "Epoch file `" << epochFilePath << "` already exists";
505  }
506  epochFile_ = create(epochFilePath, sizeof(int));
507  // Write out current epoch to file - which if this
508  // function is being called should be 0
509  write(epochFile_, 0, sizeof(int), (int8_t*)&epoch_);
510  epoch_++;
511 }
#define LOG(tag)
Definition: Logger.h:188
std::string fileMgrBasePath_
Definition: FileMgr.h:239
FILE * create(const std::string &basePath, const int fileId, const size_t pageSize, const size_t numPages)
Definition: File.cpp:39
FILE * epochFile_
the current epoch (time of last checkpoint)
Definition: FileMgr.h:247
int epoch_
the index of the next file id
Definition: FileMgr.h:246
size_t write(FILE *f, const size_t offset, const size_t size, int8_t *buf)
Writes the specified number of bytes to the offset position in file f from buf.
Definition: File.cpp:125
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ createFile()

FileInfo * File_Namespace::FileMgr::createFile ( const size_t  pageSize,
const size_t  numPages 
)
private

Adds a file to the file manager repository.

This method will create a FileInfo object for the file being added, and it will create the corresponding file on physical disk with the indicated number of pages pre-allocated.

A pointer to the FileInfo object is returned, which itself has a file pointer (FILE*) and a file identifier (int fileId).

Parameters
fileNameThe name given to the file in physical storage.
pageSizeThe logical page size for the pages in the file.
numPagesThe number of logical pages to initially allocate for the file.
Returns
FileInfo* A pointer to the FileInfo object of the added file.

Definition at line 911 of file FileMgr.cpp.

References CHECK, File_Namespace::create(), logger::FATAL, fileIndex_, fileMgrBasePath_, files_, files_rw_mutex_, LOG, and nextFileId_.

Referenced by requestFreePage(), and requestFreePages().

911  {
912  // check arguments
913  if (pageSize == 0 || numPages == 0) {
914  LOG(FATAL) << "File creation failed: pageSize and numPages must be greater than 0.";
915  }
916 
917  // create the new file
918  FILE* f = create(fileMgrBasePath_,
919  nextFileId_,
920  pageSize,
921  numPages); // TM: not sure if I like naming scheme here - should be in
922  // separate namespace?
923  CHECK(f);
924 
925  // instantiate a new FileInfo for the newly created file
926  int fileId = nextFileId_++;
927  FileInfo* fInfo =
928  new FileInfo(this, fileId, f, pageSize, numPages, true); // true means init file
929  CHECK(fInfo);
930 
931  mapd_unique_lock<mapd_shared_mutex> write_lock(files_rw_mutex_);
932  // update file manager data structures
933  files_.push_back(fInfo);
934  fileIndex_.insert(std::pair<size_t, int>(pageSize, fileId));
935 
936  CHECK(files_.back() == fInfo); // postcondition
937  return fInfo;
938 }
#define LOG(tag)
Definition: Logger.h:188
std::string fileMgrBasePath_
Definition: FileMgr.h:239
FILE * create(const std::string &basePath, const int fileId, const size_t pageSize, const size_t numPages)
Definition: File.cpp:39
std::vector< FileInfo * > files_
Definition: FileMgr.h:241
PageSizeFileMMap fileIndex_
A vector of files accessible via a file identifier.
Definition: FileMgr.h:242
#define CHECK(condition)
Definition: Logger.h:197
mapd_unique_lock< mapd_shared_mutex > write_lock
mapd_shared_mutex files_rw_mutex_
Definition: FileMgr.h:254
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ createTopLevelMetadata()

void File_Namespace::FileMgr::createTopLevelMetadata ( )

Definition at line 1002 of file FileMgr.cpp.

References createDBMetaFile(), DB_META_FILENAME, db_version_, logger::FATAL, getDBVersion(), LOG, and openDBMetaFile().

1002  {
1004  if (db_version_ > getDBVersion()) {
1005  LOG(FATAL) << "DB forward compatibility is not supported. Version of OmniSci "
1006  "software used is older than the version of DB being read: "
1007  << db_version_;
1008  }
1009  } else {
1011  }
1012 }
#define LOG(tag)
Definition: Logger.h:188
int getDBVersion() const
Index for looking up chunks.
Definition: FileMgr.cpp:994
void createDBMetaFile(const std::string &DBMetaFileName)
Definition: FileMgr.cpp:550
#define DB_META_FILENAME
Definition: FileMgr.cpp:44
bool openDBMetaFile(const std::string &DBMetaFileName)
Definition: FileMgr.cpp:561
+ Here is the call graph for this function:

◆ deleteBuffer()

void File_Namespace::FileMgr::deleteBuffer ( const ChunkKey key,
const bool  purge = true 
)
override

Deletes the chunk with the specified key.

Definition at line 658 of file FileMgr.cpp.

References chunkIndex_, chunkIndexMutex_, logger::FATAL, LOG, and showChunk().

658  {
659  mapd_unique_lock<mapd_shared_mutex> chunkIndexWriteLock(chunkIndexMutex_);
660  auto chunkIt = chunkIndex_.find(key);
661  // ensure the Chunk exists
662  if (chunkIt == chunkIndex_.end()) {
663  LOG(FATAL) << "Chunk does not exist for key: " << showChunk(key);
664  }
665  chunkIndexWriteLock.unlock();
666  // chunkIt->second->writeMetadata(-1); // writes -1 as epoch - signifies deleted
667  if (purge) {
668  chunkIt->second->freePages();
669  }
670  //@todo need a way to represent delete in non purge case
671  delete chunkIt->second;
672  chunkIndex_.erase(chunkIt);
673 }
#define LOG(tag)
Definition: Logger.h:188
std::string showChunk(const ChunkKey &key)
Definition: types.h:62
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:223
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:253
+ Here is the call graph for this function:

◆ deleteBuffersWithPrefix()

void File_Namespace::FileMgr::deleteBuffersWithPrefix ( const ChunkKey keyPrefix,
const bool  purge = true 
)
override

Definition at line 675 of file FileMgr.cpp.

References chunkIndex_, and chunkIndexMutex_.

675  {
676  mapd_unique_lock<mapd_shared_mutex> chunkIndexWriteLock(chunkIndexMutex_);
677  auto chunkIt = chunkIndex_.lower_bound(keyPrefix);
678  if (chunkIt == chunkIndex_.end()) {
679  return; // should we throw?
680  }
681  while (chunkIt != chunkIndex_.end() &&
682  std::search(chunkIt->first.begin(),
683  chunkIt->first.begin() + keyPrefix.size(),
684  keyPrefix.begin(),
685  keyPrefix.end()) != chunkIt->first.begin() + keyPrefix.size()) {
686  /*
687  cout << "Freeing pages for chunk ";
688  for (auto vecIt = chunkIt->first.begin(); vecIt != chunkIt->first.end(); ++vecIt) {
689  std::cout << *vecIt << ",";
690  }
691  cout << endl;
692  */
693  if (purge) {
694  chunkIt->second->freePages();
695  }
696  //@todo need a way to represent delete in non purge case
697  delete chunkIt->second;
698  chunkIndex_.erase(chunkIt++);
699  }
700 }
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:223
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:253

◆ epoch()

int File_Namespace::FileMgr::epoch ( )
inline

Returns current value of epoch - should be one greater than recorded at last checkpoint.

Definition at line 202 of file FileMgr.h.

Referenced by File_Namespace::FileBuffer::append(), File_Namespace::FileInfo::freePage(), File_Namespace::FileInfo::openExistingFile(), File_Namespace::FileBuffer::reserve(), setEpoch(), and File_Namespace::FileBuffer::write().

202 { return epoch_; }
int epoch_
the index of the next file id
Definition: FileMgr.h:246
+ Here is the caller graph for this function:

◆ fetchBuffer()

void File_Namespace::FileMgr::fetchBuffer ( const ChunkKey key,
AbstractBuffer destBuffer,
const size_t  numBytes 
)
override

Definition at line 711 of file FileMgr.cpp.

References chunkIndex_, chunkIndexMutex_, logger::FATAL, Data_Namespace::AbstractBuffer::getDeviceId(), Data_Namespace::AbstractBuffer::getMemoryPtr(), Data_Namespace::AbstractBuffer::getType(), Data_Namespace::AbstractBuffer::isDirty(), Data_Namespace::AbstractBuffer::isUpdated(), LOG, Data_Namespace::AbstractBuffer::read(), Data_Namespace::AbstractBuffer::reserve(), Data_Namespace::AbstractBuffer::setSize(), showChunk(), Data_Namespace::AbstractBuffer::size(), and Data_Namespace::AbstractBuffer::syncEncoder().

713  {
714  // reads chunk specified by ChunkKey into AbstractBuffer provided by
715  // destBuffer
716  if (destBuffer->isDirty()) {
717  LOG(FATAL)
718  << "Aborting attempt to fetch a chunk marked dirty. Chunk inconsistency for key: "
719  << showChunk(key);
720  }
721  mapd_shared_lock<mapd_shared_mutex> chunkIndexReadLock(chunkIndexMutex_);
722  auto chunkIt = chunkIndex_.find(key);
723  if (chunkIt == chunkIndex_.end()) {
724  LOG(FATAL) << "Chunk does not exist for key: " << showChunk(key);
725  }
726  chunkIndexReadLock.unlock();
727 
728  AbstractBuffer* chunk = chunkIt->second;
729  // ChunkSize is either specified in function call with numBytes or we
730  // just look at pageSize * numPages in FileBuffer
731  size_t chunkSize = numBytes == 0 ? chunk->size() : numBytes;
732  if (numBytes > 0 && numBytes > chunk->size()) {
733  LOG(FATAL) << "Chunk retrieved for key `" << showChunk(key) << "` is smaller ("
734  << chunk->size() << ") than number of bytes requested (" << numBytes
735  << ")";
736  }
737  destBuffer->reserve(chunkSize);
738  // std::cout << "After reserve chunksize: " << chunkSize << std::endl;
739  if (chunk->isUpdated()) {
740  chunk->read(destBuffer->getMemoryPtr(),
741  chunkSize,
742  0,
743  destBuffer->getType(),
744  destBuffer->getDeviceId());
745  } else {
746  chunk->read(destBuffer->getMemoryPtr() + destBuffer->size(),
747  chunkSize - destBuffer->size(),
748  destBuffer->size(),
749  destBuffer->getType(),
750  destBuffer->getDeviceId());
751  }
752  destBuffer->setSize(chunkSize);
753  destBuffer->syncEncoder(chunk);
754 }
void syncEncoder(const AbstractBuffer *src_buffer)
#define LOG(tag)
Definition: Logger.h:188
virtual size_t size() const =0
virtual int8_t * getMemoryPtr()=0
virtual MemoryLevel getType() const =0
std::string showChunk(const ChunkKey &key)
Definition: types.h:62
virtual void read(int8_t *const dst, const size_t num_bytes, const size_t offset=0, const MemoryLevel dst_buffer_type=CPU_LEVEL, const int dst_device_id=-1)=0
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:223
virtual void setSize(const size_t size)
An AbstractBuffer is a unit of data management for a data manager.
virtual int getDeviceId() const
virtual bool isDirty() const
virtual void reserve(size_t num_bytes)=0
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:253
virtual bool isUpdated() const
+ Here is the call graph for this function:

◆ free()

void File_Namespace::FileMgr::free ( AbstractBuffer buffer)
override

Definition at line 819 of file FileMgr.cpp.

References logger::FATAL, and LOG.

Referenced by copyPage().

819  {
820  LOG(FATAL) << "Operation not supported";
821 }
#define LOG(tag)
Definition: Logger.h:188
+ Here is the caller graph for this function:

◆ free_page()

void File_Namespace::FileMgr::free_page ( std::pair< FileInfo *, int > &&  page)

Definition at line 1019 of file FileMgr.cpp.

References free_pages, and mutex_free_page.

Referenced by checkpoint(), and File_Namespace::FileInfo::freePage().

1019  {
1020  std::unique_lock<mapd_shared_mutex> lock(mutex_free_page);
1021  free_pages.push_back(page);
1022 }
std::vector< std::pair< FileInfo *, int > > free_pages
Definition: FileMgr.h:257
mapd_shared_mutex mutex_free_page
Definition: FileMgr.h:256
+ Here is the caller graph for this function:

◆ get_fileMgrKey()

const std::pair<const int, const int> File_Namespace::FileMgr::get_fileMgrKey ( ) const
inline

Definition at line 234 of file FileMgr.h.

Referenced by File_Namespace::FileInfo::openExistingFile().

234 { return fileMgrKey_; }
std::pair< const int, const int > fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:238
+ Here is the caller graph for this function:

◆ getAllocated()

size_t File_Namespace::FileMgr::getAllocated ( )
inlineoverride

Definition at line 152 of file FileMgr.h.

152 { return 0; }

◆ getBuffer()

AbstractBuffer * File_Namespace::FileMgr::getBuffer ( const ChunkKey key,
const size_t  numBytes = 0 
)
override

Returns the a pointer to the chunk with the specified key.

Definition at line 702 of file FileMgr.cpp.

References chunkIndex_, chunkIndexMutex_, logger::FATAL, LOG, and showChunk().

702  {
703  mapd_shared_lock<mapd_shared_mutex> chunkIndexReadLock(chunkIndexMutex_);
704  auto chunkIt = chunkIndex_.find(key);
705  if (chunkIt == chunkIndex_.end()) {
706  LOG(FATAL) << "Chunk does not exist for key: " << showChunk(key);
707  }
708  return chunkIt->second;
709 }
#define LOG(tag)
Definition: Logger.h:188
std::string showChunk(const ChunkKey &key)
Definition: types.h:62
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:223
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:253
+ Here is the call graph for this function:

◆ getChunkMetadataVec()

void File_Namespace::FileMgr::getChunkMetadataVec ( ChunkMetadataVector chunkMetadataVec)
override

Definition at line 953 of file FileMgr.cpp.

References chunkIndex_, and chunkIndexMutex_.

953  {
954  mapd_unique_lock<mapd_shared_mutex> chunkIndexWriteLock(chunkIndexMutex_);
955  chunkMetadataVec.reserve(chunkIndex_.size());
956  for (auto chunkIt = chunkIndex_.begin(); chunkIt != chunkIndex_.end(); ++chunkIt) {
957  if (chunkIt->second->has_encoder) {
958  auto chunk_metadata = std::make_shared<ChunkMetadata>();
959  chunkIt->second->encoder->getMetadata(chunk_metadata);
960  chunkMetadataVec.emplace_back(chunkIt->first, chunk_metadata);
961  }
962  }
963 }
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:223
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:253

◆ getChunkMetadataVecForKeyPrefix()

void File_Namespace::FileMgr::getChunkMetadataVecForKeyPrefix ( ChunkMetadataVector chunkMetadataVec,
const ChunkKey keyPrefix 
)
override

Definition at line 965 of file FileMgr.cpp.

References chunkIndex_, and chunkIndexMutex_.

966  {
967  mapd_unique_lock<mapd_shared_mutex> chunkIndexWriteLock(
968  chunkIndexMutex_); // is this guarding the right structure? it look slike we oly
969  // read here for chunk
970  auto chunkIt = chunkIndex_.lower_bound(keyPrefix);
971  if (chunkIt == chunkIndex_.end()) {
972  return; // throw?
973  }
974  while (chunkIt != chunkIndex_.end() &&
975  std::search(chunkIt->first.begin(),
976  chunkIt->first.begin() + keyPrefix.size(),
977  keyPrefix.begin(),
978  keyPrefix.end()) != chunkIt->first.begin() + keyPrefix.size()) {
979  /*
980  for (auto vecIt = chunkIt->first.begin(); vecIt != chunkIt->first.end(); ++vecIt) {
981  std::cout << *vecIt << ",";
982  }
983  cout << endl;
984  */
985  if (chunkIt->second->has_encoder) {
986  auto chunk_metadata = std::make_shared<ChunkMetadata>();
987  chunkIt->second->encoder->getMetadata(chunk_metadata);
988  chunkMetadataVec.emplace_back(chunkIt->first, chunk_metadata);
989  }
990  chunkIt++;
991  }
992 }
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:223
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:253

◆ getDBConvert()

bool File_Namespace::FileMgr::getDBConvert ( ) const

Definition at line 998 of file FileMgr.cpp.

References File_Namespace::GlobalFileMgr::getDBConvert(), and gfm_.

998  {
999  return gfm_->getDBConvert();
1000 }
GlobalFileMgr * gfm_
Definition: FileMgr.h:237
+ Here is the call graph for this function:

◆ getDBVersion()

int File_Namespace::FileMgr::getDBVersion ( ) const

Index for looking up chunks.

Definition at line 994 of file FileMgr.cpp.

References File_Namespace::GlobalFileMgr::getDBVersion(), and gfm_.

Referenced by createDBMetaFile(), createTopLevelMetadata(), and writeAndSyncDBMetaToDisk().

994  {
995  return gfm_->getDBVersion();
996 }
GlobalFileMgr * gfm_
Definition: FileMgr.h:237
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ getFileForFileId()

FILE * File_Namespace::FileMgr::getFileForFileId ( const int  fileId)

Returns FILE pointer associated with requested fileId.

See also
FileBuffer

Definition at line 940 of file FileMgr.cpp.

References CHECK, and files_.

Referenced by File_Namespace::FileBuffer::readMetadata(), and File_Namespace::FileBuffer::writeMetadata().

940  {
941  CHECK(fileId >= 0 && static_cast<size_t>(fileId) < files_.size());
942  return files_[fileId]->f;
943 }
std::vector< FileInfo * > files_
Definition: FileMgr.h:241
#define CHECK(condition)
Definition: Logger.h:197
+ Here is the caller graph for this function:

◆ getFileInfoForFileId()

FileInfo* File_Namespace::FileMgr::getFileInfoForFileId ( const int  fileId)
inline

Definition at line 155 of file FileMgr.h.

References logger::init().

Referenced by File_Namespace::FileBuffer::append(), File_Namespace::FileBuffer::copyPage(), copyPage(), File_Namespace::FileBuffer::freeChunkPages(), File_Namespace::FileBuffer::freeMetadataPages(), File_Namespace::readForThread(), File_Namespace::FileBuffer::write(), and File_Namespace::FileBuffer::writeHeader().

155 { return files_[fileId]; }
std::vector< FileInfo * > files_
Definition: FileMgr.h:241
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ getFileMgrBasePath()

std::string File_Namespace::FileMgr::getFileMgrBasePath ( ) const
inline

Definition at line 228 of file FileMgr.h.

Referenced by closeRemovePhysical().

228 { return fileMgrBasePath_; }
std::string fileMgrBasePath_
Definition: FileMgr.h:239
+ Here is the caller graph for this function:

◆ getInUseSize()

size_t File_Namespace::FileMgr::getInUseSize ( )
inlineoverride

Definition at line 151 of file FileMgr.h.

151 { return 0; }

◆ getMaxSize()

size_t File_Namespace::FileMgr::getMaxSize ( )
inlineoverride

Definition at line 150 of file FileMgr.h.

150 { return 0; }

◆ getMgrType()

MgrType File_Namespace::FileMgr::getMgrType ( )
inlineoverride

Definition at line 145 of file FileMgr.h.

145 { return FILE_MGR; };

◆ getNumChunks()

size_t File_Namespace::FileMgr::getNumChunks ( )
inlineoverride

Definition at line 219 of file FileMgr.h.

219  {
220  // @todo should be locked - but this is more for testing now
221  return chunkIndex_.size();
222  }
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:223

◆ getNumReaderThreads()

size_t File_Namespace::FileMgr::getNumReaderThreads ( )
inline

Returns number of threads defined by parameter num-reader-threads which should be used during initial load and consequent read of data.

Definition at line 208 of file FileMgr.h.

Referenced by File_Namespace::FileBuffer::read().

208 { return num_reader_threads_; }
size_t num_reader_threads_
Maps page sizes to FileInfo objects.
Definition: FileMgr.h:243
+ Here is the caller graph for this function:

◆ getStringMgrType()

std::string File_Namespace::FileMgr::getStringMgrType ( )
inlineoverride

Definition at line 146 of file FileMgr.h.

146 { return ToString(FILE_MGR); }

◆ init() [1/2]

void File_Namespace::FileMgr::init ( const size_t  num_reader_threads)

Definition at line 135 of file FileMgr.cpp.

References CHECK_EQ, chunkIndex_, createEpochFile(), epoch_, EPOCH_FILENAME, logger::FATAL, anonymous_namespace{StringDictionary.cpp}::file_size(), fileMgrBasePath_, fileMgrKey_, File_Namespace::fileSize(), File_Namespace::GlobalFileMgr::getBasePath(), gfm_, File_Namespace::headerCompare(), logger::INFO, LOG, MAPD_FILE_EXT, nextFileId_, num_reader_threads_, openEpochFile(), openExistingFile(), processFileFutures(), timer_start(), timer_stop(), to_string(), and VLOG.

Referenced by FileMgr().

135  {
136  // if epoch = -1 this means open from epoch file
137  const std::string fileMgrDirPrefix("table");
138  const std::string FileMgrDirDelim("_");
139  fileMgrBasePath_ = (gfm_->getBasePath() + fileMgrDirPrefix + FileMgrDirDelim +
140  std::to_string(fileMgrKey_.first) + // db_id
141  FileMgrDirDelim + std::to_string(fileMgrKey_.second)); // tb_id
142  boost::filesystem::path path(fileMgrBasePath_);
143  if (boost::filesystem::exists(path)) {
144  if (!boost::filesystem::is_directory(path)) {
145  LOG(FATAL) << "Specified path '" << fileMgrBasePath_
146  << "' for table data is not a directory.";
147  }
148  if (epoch_ != -1) { // if opening at previous epoch
149  int epochCopy = epoch_;
151  epoch_ = epochCopy;
152  } else {
154  }
155 
156  auto clock_begin = timer_start();
157 
158  boost::filesystem::directory_iterator
159  endItr; // default construction yields past-the-end
160  int maxFileId = -1;
161  int fileCount = 0;
162  int threadCount = std::thread::hardware_concurrency();
163  std::vector<HeaderInfo> headerVec;
164  std::vector<std::future<std::vector<HeaderInfo>>> file_futures;
165  for (boost::filesystem::directory_iterator fileIt(path); fileIt != endItr; ++fileIt) {
166  if (boost::filesystem::is_regular_file(fileIt->status())) {
167  // note that boost::filesystem leaves preceding dot on
168  // extension - hence MAPD_FILE_EXT is ".mapd"
169  std::string extension(fileIt->path().extension().string());
170 
171  if (extension == MAPD_FILE_EXT) {
172  std::string fileStem(fileIt->path().stem().string());
173  // remove trailing dot if any
174  if (fileStem.size() > 0 && fileStem.back() == '.') {
175  fileStem = fileStem.substr(0, fileStem.size() - 1);
176  }
177  size_t dotPos = fileStem.find_last_of("."); // should only be one
178  if (dotPos == std::string::npos) {
179  LOG(FATAL) << "File `" << fileIt->path()
180  << "` does not carry page size information in the filename.";
181  }
182  int fileId = boost::lexical_cast<int>(fileStem.substr(0, dotPos));
183  if (fileId > maxFileId) {
184  maxFileId = fileId;
185  }
186  size_t pageSize =
187  boost::lexical_cast<size_t>(fileStem.substr(dotPos + 1, fileStem.size()));
188  std::string filePath(fileIt->path().string());
189  size_t fileSize = boost::filesystem::file_size(filePath);
190  CHECK_EQ(fileSize % pageSize, size_t(0)); // should be no partial pages
191  size_t numPages = fileSize / pageSize;
192 
193  VLOG(4) << "File id: " << fileId << " Page size: " << pageSize
194  << " Num pages: " << numPages;
195 
196  file_futures.emplace_back(std::async(
197  std::launch::async, [filePath, fileId, pageSize, numPages, this] {
198  std::vector<HeaderInfo> tempHeaderVec;
199  openExistingFile(filePath, fileId, pageSize, numPages, tempHeaderVec);
200  return tempHeaderVec;
201  }));
202  fileCount++;
203  if (fileCount % threadCount == 0) {
204  processFileFutures(file_futures, headerVec);
205  }
206  }
207  }
208  }
209 
210  if (file_futures.size() > 0) {
211  processFileFutures(file_futures, headerVec);
212  }
213  int64_t queue_time_ms = timer_stop(clock_begin);
214 
215  LOG(INFO) << "Completed Reading table's file metadata, Elapsed time : "
216  << queue_time_ms << "ms Epoch: " << epoch_ << " files read: " << fileCount
217  << " table location: '" << fileMgrBasePath_ << "'";
218 
219  /* Sort headerVec so that all HeaderInfos
220  * from a chunk will be grouped together
221  * and in order of increasing PageId
222  * - Version Epoch */
223 
224  std::sort(headerVec.begin(), headerVec.end(), headerCompare);
225 
226  /* Goal of next section is to find sequences in the
227  * sorted headerVec of the same ChunkId, which we
228  * can then initiate a FileBuffer with */
229 
230  VLOG(4) << "Number of Headers in Vector: " << headerVec.size();
231  if (headerVec.size() > 0) {
232  ChunkKey lastChunkKey = headerVec.begin()->chunkKey;
233  auto startIt = headerVec.begin();
234 
235  for (auto headerIt = headerVec.begin() + 1; headerIt != headerVec.end();
236  ++headerIt) {
237  // for (auto chunkIt = headerIt->chunkKey.begin(); chunkIt !=
238  // headerIt->chunkKey.end(); ++chunkIt) {
239  // std::cout << *chunkIt << " ";
240  //}
241 
242  if (headerIt->chunkKey != lastChunkKey) {
243  chunkIndex_[lastChunkKey] =
244  new FileBuffer(this, /*pageSize,*/ lastChunkKey, startIt, headerIt);
245  /*
246  if (startIt->versionEpoch != -1) {
247  cout << "not skipping bc version != -1" << endl;
248  // -1 means that chunk was deleted
249  // lets not read it in
250  chunkIndex_[lastChunkKey] = new FileBuffer
251  (this,/lastChunkKey,startIt,headerIt);
252 
253  }
254  else {
255  cout << "Skipping bc version == -1" << endl;
256  }
257  */
258  lastChunkKey = headerIt->chunkKey;
259  startIt = headerIt;
260  }
261  }
262  // now need to insert last Chunk
263  // size_t pageSize = files_[startIt->page.fileId]->pageSize;
264  // cout << "Inserting last chunk" << endl;
265  // if (startIt->versionEpoch != -1) {
266  chunkIndex_[lastChunkKey] =
267  new FileBuffer(this, /*pageSize,*/ lastChunkKey, startIt, headerVec.end());
268  //}
269  }
270  nextFileId_ = maxFileId + 1;
271  // std::cout << "next file id: " << nextFileId_ << std::endl;
272  } else {
273  if (!boost::filesystem::create_directory(path)) {
274  LOG(FATAL) << "Could not create data directory: " << path;
275  }
277  }
278 
279  /* define number of reader threads to be used */
280  size_t num_hardware_based_threads =
281  std::thread::hardware_concurrency(); // # of threads is based on # of cores on the
282  // host
283  if (num_reader_threads == 0) { // # of threads has not been defined by user
284  num_reader_threads_ = num_hardware_based_threads;
285  } else {
286  if (num_reader_threads > num_hardware_based_threads) {
287  num_reader_threads_ = num_hardware_based_threads;
288  } else {
289  num_reader_threads_ = num_reader_threads;
290  }
291  }
292 }
std::string getBasePath() const
#define CHECK_EQ(x, y)
Definition: Logger.h:205
void createEpochFile(const std::string &epochFileName)
Definition: FileMgr.cpp:501
void openEpochFile(const std::string &epochFileName)
Definition: FileMgr.cpp:513
#define LOG(tag)
Definition: Logger.h:188
#define MAPD_FILE_EXT
Definition: File.h:25
TypeR::rep timer_stop(Type clock_begin)
Definition: measure.h:46
GlobalFileMgr * gfm_
Definition: FileMgr.h:237
std::string fileMgrBasePath_
Definition: FileMgr.h:239
bool headerCompare(const HeaderInfo &firstElem, const HeaderInfo &secondElem)
Definition: FileMgr.cpp:50
std::string to_string(char const *&&v)
void processFileFutures(std::vector< std::future< std::vector< HeaderInfo >>> &file_futures, std::vector< HeaderInfo > &headerVec)
Definition: FileMgr.cpp:294
int epoch_
the index of the next file id
Definition: FileMgr.h:246
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:223
size_t num_reader_threads_
Maps page sizes to FileInfo objects.
Definition: FileMgr.h:243
size_t fileSize(FILE *f)
Returns the size of the specified file.
Definition: File.cpp:175
std::vector< int > ChunkKey
Definition: types.h:35
#define EPOCH_FILENAME
Definition: FileMgr.cpp:43
std::pair< const int, const int > fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:238
#define VLOG(n)
Definition: Logger.h:291
Type timer_start()
Definition: measure.h:40
FileInfo * openExistingFile(const std::string &path, const int fileId, const size_t pageSize, const size_t numPages, std::vector< HeaderInfo > &headerVec)
Definition: FileMgr.cpp:892
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ init() [2/2]

void File_Namespace::FileMgr::init ( const std::string  dataPathToConvertFrom)

Definition at line 308 of file FileMgr.cpp.

References CHECK, chunkIndex_, copyPage(), epoch_, EPOCH_FILENAME, File_Namespace::MultiPage::epochs, logger::FATAL, anonymous_namespace{StringDictionary.cpp}::file_size(), File_Namespace::fileSize(), File_Namespace::GlobalFileMgr::getFileMgr(), File_Namespace::FileBuffer::getMultiPage(), gfm_, File_Namespace::headerCompare(), LOG, MAPD_FILE_EXT, File_Namespace::FileBuffer::multiPages_, nextFileId_, openEpochFile(), openExistingFile(), File_Namespace::FileBuffer::pageDataSize(), File_Namespace::FileBuffer::pageSize(), processFileFutures(), requestFreePage(), File_Namespace::FileBuffer::reservedHeaderSize(), Data_Namespace::AbstractBuffer::setDirty(), Data_Namespace::AbstractBuffer::setSize(), File_Namespace::FileBuffer::size(), Data_Namespace::AbstractBuffer::syncEncoder(), and File_Namespace::FileBuffer::writeHeader().

308  {
309  int converted_data_epoch = 0;
310  boost::filesystem::path path(dataPathToConvertFrom);
311  if (boost::filesystem::exists(path)) {
312  if (!boost::filesystem::is_directory(path)) {
313  LOG(FATAL) << "Specified path `" << path << "` is not a directory.";
314  }
315 
316  if (epoch_ != -1) { // if opening at previous epoch
317  int epochCopy = epoch_;
319  epoch_ = epochCopy;
320  } else {
322  }
323 
324  boost::filesystem::directory_iterator
325  endItr; // default construction yields past-the-end
326  int maxFileId = -1;
327  int fileCount = 0;
328  int threadCount = std::thread::hardware_concurrency();
329  std::vector<HeaderInfo> headerVec;
330  std::vector<std::future<std::vector<HeaderInfo>>> file_futures;
331  for (boost::filesystem::directory_iterator fileIt(path); fileIt != endItr; ++fileIt) {
332  if (boost::filesystem::is_regular_file(fileIt->status())) {
333  // note that boost::filesystem leaves preceding dot on
334  // extension - hence MAPD_FILE_EXT is ".mapd"
335  std::string extension(fileIt->path().extension().string());
336 
337  if (extension == MAPD_FILE_EXT) {
338  std::string fileStem(fileIt->path().stem().string());
339  // remove trailing dot if any
340  if (fileStem.size() > 0 && fileStem.back() == '.') {
341  fileStem = fileStem.substr(0, fileStem.size() - 1);
342  }
343  size_t dotPos = fileStem.find_last_of("."); // should only be one
344  if (dotPos == std::string::npos) {
345  LOG(FATAL) << "File `" + fileIt->path().string() +
346  "` does not carry page size information in the filename.";
347  }
348  int fileId = boost::lexical_cast<int>(fileStem.substr(0, dotPos));
349  if (fileId > maxFileId) {
350  maxFileId = fileId;
351  }
352  size_t pageSize =
353  boost::lexical_cast<size_t>(fileStem.substr(dotPos + 1, fileStem.size()));
354  std::string filePath(fileIt->path().string());
355  size_t fileSize = boost::filesystem::file_size(filePath);
356  CHECK(fileSize % pageSize == 0); // should be no partial pages
357  size_t numPages = fileSize / pageSize;
358 
359  file_futures.emplace_back(std::async(
360  std::launch::async, [filePath, fileId, pageSize, numPages, this] {
361  std::vector<HeaderInfo> tempHeaderVec;
362  openExistingFile(filePath, fileId, pageSize, numPages, tempHeaderVec);
363  return tempHeaderVec;
364  }));
365  fileCount++;
366  if (fileCount % threadCount) {
367  processFileFutures(file_futures, headerVec);
368  }
369  }
370  }
371  }
372 
373  if (file_futures.size() > 0) {
374  processFileFutures(file_futures, headerVec);
375  }
376 
377  /* Sort headerVec so that all HeaderInfos
378  * from a chunk will be grouped together
379  * and in order of increasing PageId
380  * - Version Epoch */
381 
382  std::sort(headerVec.begin(), headerVec.end(), headerCompare);
383 
384  /* Goal of next section is to find sequences in the
385  * sorted headerVec of the same ChunkId, which we
386  * can then initiate a FileBuffer with */
387 
388  if (headerVec.size() > 0) {
389  ChunkKey lastChunkKey = headerVec.begin()->chunkKey;
390  auto startIt = headerVec.begin();
391 
392  for (auto headerIt = headerVec.begin() + 1; headerIt != headerVec.end();
393  ++headerIt) {
394  if (headerIt->chunkKey != lastChunkKey) {
395  FileMgr* c_fm_ =
396  dynamic_cast<File_Namespace::FileMgr*>(gfm_->getFileMgr(lastChunkKey));
397  CHECK(c_fm_);
398  FileBuffer* srcBuf = new FileBuffer(this, lastChunkKey, startIt, headerIt);
399  chunkIndex_[lastChunkKey] = srcBuf;
400  FileBuffer* destBuf = new FileBuffer(c_fm_, srcBuf->pageSize(), lastChunkKey);
401  c_fm_->chunkIndex_[lastChunkKey] = destBuf;
402  destBuf->syncEncoder(srcBuf);
403  destBuf->setSize(srcBuf->size());
404  destBuf->setDirty(); // this needs to be set to force writing out metadata
405  // files from "checkpoint()" call
406 
407  size_t totalNumPages = srcBuf->getMultiPage().size();
408  for (size_t pageNum = 0; pageNum < totalNumPages; pageNum++) {
409  Page srcPage = srcBuf->getMultiPage()[pageNum].current();
410  Page destPage = c_fm_->requestFreePage(
411  srcBuf->pageSize(),
412  false); // may modify and use api "FileBuffer::addNewMultiPage" instead
413  MultiPage multiPage(srcBuf->pageSize());
414  multiPage.epochs.push_back(converted_data_epoch);
415  multiPage.pageVersions.push_back(destPage);
416  destBuf->multiPages_.push_back(multiPage);
417  size_t reservedHeaderSize = srcBuf->reservedHeaderSize();
418  copyPage(
419  srcPage, c_fm_, destPage, reservedHeaderSize, srcBuf->pageDataSize(), 0);
420  destBuf->writeHeader(destPage, pageNum, converted_data_epoch, false);
421  }
422  lastChunkKey = headerIt->chunkKey;
423  startIt = headerIt;
424  }
425  }
426 
427  // now need to insert last Chunk
428  FileMgr* c_fm_ =
429  dynamic_cast<File_Namespace::FileMgr*>(gfm_->getFileMgr(lastChunkKey));
430  FileBuffer* srcBuf = new FileBuffer(this, lastChunkKey, startIt, headerVec.end());
431  chunkIndex_[lastChunkKey] = srcBuf;
432  FileBuffer* destBuf = new FileBuffer(c_fm_, srcBuf->pageSize(), lastChunkKey);
433  c_fm_->chunkIndex_[lastChunkKey] = destBuf;
434  destBuf->syncEncoder(srcBuf);
435  destBuf->setSize(srcBuf->size());
436  destBuf->setDirty(); // this needs to be set to write out metadata file from the
437  // "checkpoint()" call
438 
439  size_t totalNumPages = srcBuf->getMultiPage().size();
440  for (size_t pageNum = 0; pageNum < totalNumPages; pageNum++) {
441  Page srcPage = srcBuf->getMultiPage()[pageNum].current();
442  Page destPage = c_fm_->requestFreePage(
443  srcBuf->pageSize(),
444  false); // may modify and use api "FileBuffer::addNewMultiPage" instead
445  MultiPage multiPage(srcBuf->pageSize());
446  multiPage.epochs.push_back(converted_data_epoch);
447  multiPage.pageVersions.push_back(destPage);
448  destBuf->multiPages_.push_back(multiPage);
449  size_t reservedHeaderSize = srcBuf->reservedHeaderSize();
450  copyPage(srcPage, c_fm_, destPage, reservedHeaderSize, srcBuf->pageDataSize(), 0);
451  destBuf->writeHeader(destPage, pageNum, converted_data_epoch, false);
452  }
453  }
454  nextFileId_ = maxFileId + 1;
455  } else {
456  if (!boost::filesystem::create_directory(path)) {
457  LOG(FATAL) << "Specified path does not exist: " << path;
458  }
459  }
460 }
void openEpochFile(const std::string &epochFileName)
Definition: FileMgr.cpp:513
#define LOG(tag)
Definition: Logger.h:188
void copyPage(Page &srcPage, FileMgr *destFileMgr, Page &destPage, const size_t reservedHeaderSize, const size_t numBytes, const size_t offset)
Definition: FileMgr.cpp:479
#define MAPD_FILE_EXT
Definition: File.h:25
GlobalFileMgr * gfm_
Definition: FileMgr.h:237
bool headerCompare(const HeaderInfo &firstElem, const HeaderInfo &secondElem)
Definition: FileMgr.cpp:50
FileMgr(const int deviceId, GlobalFileMgr *gfm, const std::pair< const int, const int > fileMgrKey, const size_t num_reader_threads=0, const int epoch=-1, const size_t defaultPageSize=2097152)
Constructor.
Definition: FileMgr.cpp:68
void processFileFutures(std::vector< std::future< std::vector< HeaderInfo >>> &file_futures, std::vector< HeaderInfo > &headerVec)
Definition: FileMgr.cpp:294
int epoch_
the index of the next file id
Definition: FileMgr.h:246
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:223
AbstractBufferMgr * getFileMgr(const int db_id, const int tb_id)
size_t fileSize(FILE *f)
Returns the size of the specified file.
Definition: File.cpp:175
#define CHECK(condition)
Definition: Logger.h:197
std::vector< int > ChunkKey
Definition: types.h:35
#define EPOCH_FILENAME
Definition: FileMgr.cpp:43
FileInfo * openExistingFile(const std::string &path, const int fileId, const size_t pageSize, const size_t numPages, std::vector< HeaderInfo > &headerVec)
Definition: FileMgr.cpp:892
+ Here is the call graph for this function:

◆ isAllocationCapped()

bool File_Namespace::FileMgr::isAllocationCapped ( )
inlineoverride

Definition at line 153 of file FileMgr.h.

153 { return false; }

◆ isBufferOnDevice()

bool File_Namespace::FileMgr::isBufferOnDevice ( const ChunkKey key)
override

Definition at line 653 of file FileMgr.cpp.

References chunkIndex_, and chunkIndexMutex_.

653  {
654  mapd_shared_lock<mapd_shared_mutex> chunkIndexReadLock(chunkIndexMutex_);
655  return chunkIndex_.find(key) != chunkIndex_.end();
656 }
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:223
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:253

◆ openDBMetaFile()

bool File_Namespace::FileMgr::openDBMetaFile ( const std::string &  DBMetaFileName)
private

Definition at line 561 of file FileMgr.cpp.

References db_version_, DBMetaFile_, anonymous_namespace{StringDictionary.cpp}::file_size(), fileMgrBasePath_, File_Namespace::open(), and File_Namespace::read().

Referenced by createTopLevelMetadata().

561  {
562  std::string DBMetaFilePath(fileMgrBasePath_ + "/" + DBMetaFileName);
563 
564  if (!boost::filesystem::exists(DBMetaFilePath)) {
565  // LOG(INFO) << "DB metadata file does not exist, one will be created.";
566  return false;
567  }
568  if (!boost::filesystem::is_regular_file(DBMetaFilePath)) {
569  // LOG(INFO) << "DB metadata file is not a regular file, one will be created.";
570  return false;
571  }
572  if (boost::filesystem::file_size(DBMetaFilePath) < 4) {
573  // LOG(INFO) << "DB metadata file is not sized properly, one will be created.";
574  return false;
575  }
576  DBMetaFile_ = open(DBMetaFilePath);
577  read(DBMetaFile_, 0, sizeof(int), (int8_t*)&db_version_);
578 
579  return true;
580 }
std::string fileMgrBasePath_
Definition: FileMgr.h:239
size_t read(FILE *f, const size_t offset, const size_t size, int8_t *buf)
Reads the specified number of bytes from the offset position in file f into buf.
Definition: File.cpp:117
FILE * open(int fileId)
Opens/creates the file with the given id; returns NULL on error.
Definition: File.cpp:87
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ openEpochFile()

void File_Namespace::FileMgr::openEpochFile ( const std::string &  epochFileName)
private

Definition at line 513 of file FileMgr.cpp.

References epoch_, epochFile_, logger::FATAL, anonymous_namespace{StringDictionary.cpp}::file_size(), fileMgrBasePath_, LOG, File_Namespace::open(), and File_Namespace::read().

Referenced by init().

513  {
514  std::string epochFilePath(fileMgrBasePath_ + "/" + epochFileName);
515  if (!boost::filesystem::exists(epochFilePath)) {
516  LOG(FATAL) << "Epoch file `" << epochFilePath << "` does not exist";
517  }
518  if (!boost::filesystem::is_regular_file(epochFilePath)) {
519  LOG(FATAL) << "Epoch file `" << epochFilePath << "` is not a regular file";
520  }
521  if (boost::filesystem::file_size(epochFilePath) < 4) {
522  LOG(FATAL) << "Epoch file `" << epochFilePath
523  << "` is not sized properly (current size: "
524  << boost::filesystem::file_size(epochFilePath) << ", expected size: 4)";
525  }
526  epochFile_ = open(epochFilePath);
527  read(epochFile_, 0, sizeof(int), (int8_t*)&epoch_);
528  // std::cout << "Epoch after open file: " << epoch_ << std::endl;
529  epoch_++; // we are in new epoch from last checkpoint
530 }
#define LOG(tag)
Definition: Logger.h:188
std::string fileMgrBasePath_
Definition: FileMgr.h:239
FILE * epochFile_
the current epoch (time of last checkpoint)
Definition: FileMgr.h:247
size_t read(FILE *f, const size_t offset, const size_t size, int8_t *buf)
Reads the specified number of bytes from the offset position in file f into buf.
Definition: File.cpp:117
int epoch_
the index of the next file id
Definition: FileMgr.h:246
FILE * open(int fileId)
Opens/creates the file with the given id; returns NULL on error.
Definition: File.cpp:87
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ openExistingFile()

FileInfo * File_Namespace::FileMgr::openExistingFile ( const std::string &  path,
const int  fileId,
const size_t  pageSize,
const size_t  numPages,
std::vector< HeaderInfo > &  headerVec 
)
private

Definition at line 892 of file FileMgr.cpp.

References epoch_, fileIndex_, files_, files_rw_mutex_, File_Namespace::open(), and File_Namespace::FileInfo::openExistingFile().

Referenced by init().

896  {
897  FILE* f = open(path);
898  FileInfo* fInfo = new FileInfo(
899  this, fileId, f, pageSize, numPages, false); // false means don't init file
900 
901  fInfo->openExistingFile(headerVec, epoch_);
902  mapd_unique_lock<mapd_shared_mutex> write_lock(files_rw_mutex_);
903  if (fileId >= static_cast<int>(files_.size())) {
904  files_.resize(fileId + 1);
905  }
906  files_[fileId] = fInfo;
907  fileIndex_.insert(std::pair<size_t, int>(pageSize, fileId));
908  return fInfo;
909 }
std::vector< FileInfo * > files_
Definition: FileMgr.h:241
int epoch_
the index of the next file id
Definition: FileMgr.h:246
PageSizeFileMMap fileIndex_
A vector of files accessible via a file identifier.
Definition: FileMgr.h:242
FILE * open(int fileId)
Opens/creates the file with the given id; returns NULL on error.
Definition: File.cpp:87
mapd_unique_lock< mapd_shared_mutex > write_lock
mapd_shared_mutex files_rw_mutex_
Definition: FileMgr.h:254
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ printSlabs()

std::string File_Namespace::FileMgr::printSlabs ( )
inlineoverride

Definition at line 147 of file FileMgr.h.

147 { return "Not Implemented"; }

◆ processFileFutures()

void File_Namespace::FileMgr::processFileFutures ( std::vector< std::future< std::vector< HeaderInfo >>> &  file_futures,
std::vector< HeaderInfo > &  headerVec 
)
private

Definition at line 294 of file FileMgr.cpp.

Referenced by init().

296  {
297  for (auto& file_future : file_futures) {
298  file_future.wait();
299  }
300  // concatenate the vectors after thread completes
301  for (auto& file_future : file_futures) {
302  auto tempHeaderVec = file_future.get();
303  headerVec.insert(headerVec.end(), tempHeaderVec.begin(), tempHeaderVec.end());
304  }
305  file_futures.clear();
306 }
+ Here is the caller graph for this function:

◆ putBuffer()

AbstractBuffer * File_Namespace::FileMgr::putBuffer ( const ChunkKey key,
AbstractBuffer d,
const size_t  numBytes = 0 
)
override

Puts the contents of d into the Chunk with the given key.

Parameters
key- Unique identifier for a Chunk.
d- An object representing the source data for the Chunk.
Returns
AbstractBuffer*

Definition at line 756 of file FileMgr.cpp.

References Data_Namespace::AbstractBuffer::append(), CHECK_LT, chunkIndex_, chunkIndexMutex_, Data_Namespace::AbstractBuffer::clearDirtyBits(), createBufferUnlocked(), defaultPageSize_, logger::FATAL, Data_Namespace::AbstractBuffer::getDeviceId(), Data_Namespace::AbstractBuffer::getMemoryPtr(), Data_Namespace::AbstractBuffer::getType(), Data_Namespace::AbstractBuffer::isAppended(), Data_Namespace::AbstractBuffer::isDirty(), Data_Namespace::AbstractBuffer::isUpdated(), LOG, Data_Namespace::AbstractBuffer::setSize(), showChunk(), Data_Namespace::AbstractBuffer::size(), Data_Namespace::AbstractBuffer::syncEncoder(), UNREACHABLE, and Data_Namespace::AbstractBuffer::write().

758  {
759  // obtain a pointer to the Chunk
760  mapd_unique_lock<mapd_shared_mutex> chunkIndexWriteLock(chunkIndexMutex_);
761  auto chunkIt = chunkIndex_.find(key);
762  AbstractBuffer* chunk;
763  if (chunkIt == chunkIndex_.end()) {
765  } else {
766  chunk = chunkIt->second;
767  }
768  chunkIndexWriteLock.unlock();
769  size_t oldChunkSize = chunk->size();
770  // write the buffer's data to the Chunk
771  // size_t newChunkSize = numBytes == 0 ? srcBuffer->size() : numBytes;
772  size_t newChunkSize = numBytes == 0 ? srcBuffer->size() : numBytes;
773  if (chunk->isDirty()) {
774  // multiple appends are allowed,
775  // but only single update is allowed
776  if (srcBuffer->isUpdated() && chunk->isUpdated()) {
777  LOG(FATAL) << "Aborting attempt to write a chunk marked dirty. Chunk inconsistency "
778  "for key: "
779  << showChunk(key);
780  }
781  }
782  if (srcBuffer->isUpdated()) {
783  // chunk size is not changed when fixed rows are updated or are marked as deleted.
784  // but when rows are vacuumed or varlen rows are updated (new rows are appended),
785  // chunk size will change. For vacuum, checkpoint should sync size from cpu to disk.
786  // For varlen update, it takes another route via fragmenter using disk-level buffer.
787  if (0 == numBytes && !chunk->isDirty()) {
788  chunk->setSize(newChunkSize);
789  }
790  //@todo use dirty flags to only flush pages of chunk that need to
791  // be flushed
792  chunk->write((int8_t*)srcBuffer->getMemoryPtr(),
793  newChunkSize,
794  0,
795  srcBuffer->getType(),
796  srcBuffer->getDeviceId());
797  } else if (srcBuffer->isAppended()) {
798  CHECK_LT(oldChunkSize, newChunkSize);
799  chunk->append((int8_t*)srcBuffer->getMemoryPtr() + oldChunkSize,
800  newChunkSize - oldChunkSize,
801  srcBuffer->getType(),
802  srcBuffer->getDeviceId());
803  } else {
804  UNREACHABLE() << "putBuffer() expects a buffer marked is_updated or is_appended";
805  }
806  // chunk->clearDirtyBits(); // Hack: because write and append will set dirty bits
807  //@todo commenting out line above will make sure this metadata is set
808  // but will trigger error on fetch chunk
809  srcBuffer->clearDirtyBits();
810  chunk->syncEncoder(srcBuffer);
811  return chunk;
812 }
void syncEncoder(const AbstractBuffer *src_buffer)
#define LOG(tag)
Definition: Logger.h:188
virtual size_t size() const =0
#define UNREACHABLE()
Definition: Logger.h:241
std::string showChunk(const ChunkKey &key)
Definition: types.h:62
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:223
virtual void setSize(const size_t size)
An AbstractBuffer is a unit of data management for a data manager.
virtual void write(int8_t *src, const size_t num_bytes, const size_t offset=0, const MemoryLevel src_buffer_type=CPU_LEVEL, const int src_device_id=-1)=0
#define CHECK_LT(x, y)
Definition: Logger.h:207
size_t defaultPageSize_
number of threads used when loading data
Definition: FileMgr.h:244
virtual void append(int8_t *src, const size_t num_bytes, const MemoryLevel src_buffer_type=CPU_LEVEL, const int device_id=-1)=0
AbstractBuffer * createBufferUnlocked(const ChunkKey &key, size_t pageSize=0, const size_t numBytes=0)
Definition: FileMgr.cpp:635
virtual bool isDirty() const
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:253
virtual bool isUpdated() const
+ Here is the call graph for this function:

◆ removeTableRelatedDS()

void File_Namespace::FileMgr::removeTableRelatedDS ( const int  db_id,
const int  table_id 
)
override

Definition at line 1024 of file FileMgr.cpp.

References UNREACHABLE.

1024  {
1025  UNREACHABLE();
1026 }
#define UNREACHABLE()
Definition: Logger.h:241

◆ requestFreePage()

Page File_Namespace::FileMgr::requestFreePage ( size_t  pagesize,
const bool  isMetadata 
)

Definition at line 823 of file FileMgr.cpp.

References CHECK, createFile(), File_Namespace::FileInfo::fileId, fileIndex_, files_, File_Namespace::FileInfo::getFreePage(), getPageMutex_, MAX_FILE_N_METADATA_PAGES, and MAX_FILE_N_PAGES.

Referenced by File_Namespace::FileBuffer::addNewMultiPage(), init(), File_Namespace::FileBuffer::write(), and File_Namespace::FileBuffer::writeMetadata().

823  {
824  std::lock_guard<std::mutex> lock(getPageMutex_);
825 
826  auto candidateFiles = fileIndex_.equal_range(pageSize);
827  int pageNum = -1;
828  for (auto fileIt = candidateFiles.first; fileIt != candidateFiles.second; ++fileIt) {
829  FileInfo* fileInfo = files_[fileIt->second];
830  pageNum = fileInfo->getFreePage();
831  if (pageNum != -1) {
832  return (Page(fileInfo->fileId, pageNum));
833  }
834  }
835  // if here then we need to add a file
836  FileInfo* fileInfo;
837  if (isMetadata) {
838  fileInfo = createFile(pageSize, MAX_FILE_N_METADATA_PAGES);
839  } else {
840  fileInfo = createFile(pageSize, MAX_FILE_N_PAGES);
841  }
842  pageNum = fileInfo->getFreePage();
843  CHECK(pageNum != -1);
844  return (Page(fileInfo->fileId, pageNum));
845 }
std::mutex getPageMutex_
pointer to DB level metadata
Definition: FileMgr.h:252
std::vector< FileInfo * > files_
Definition: FileMgr.h:241
#define MAX_FILE_N_METADATA_PAGES
Definition: File.h:27
PageSizeFileMMap fileIndex_
A vector of files accessible via a file identifier.
Definition: FileMgr.h:242
#define MAX_FILE_N_PAGES
Definition: File.h:26
FileInfo * createFile(const size_t pageSize, const size_t numPages)
Adds a file to the file manager repository.
Definition: FileMgr.cpp:911
#define CHECK(condition)
Definition: Logger.h:197
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ requestFreePages()

void File_Namespace::FileMgr::requestFreePages ( size_t  npages,
size_t  pagesize,
std::vector< Page > &  pages,
const bool  isMetadata 
)

Obtains free pages – creates new files if necessary – of the requested size.

Given a page size and number of pages, this method updates the vector "pages" to include free pages of the requested size. These pages are immediately removed from the free list of the affected file(s). If there are not enough pages available among current files, new files are created and their pages are included in the vector.

Parameters
npagesThe number of free pages requested
pagesizeThe size of each requested page
pagesA vector containing the free pages obtained by this method

Definition at line 847 of file FileMgr.cpp.

References CHECK, createFile(), File_Namespace::FileInfo::fileId, fileIndex_, files_, File_Namespace::FileInfo::getFreePage(), getPageMutex_, MAX_FILE_N_METADATA_PAGES, and MAX_FILE_N_PAGES.

850  {
851  // not used currently
852  // @todo add method to FileInfo to get more than one page
853  std::lock_guard<std::mutex> lock(getPageMutex_);
854  auto candidateFiles = fileIndex_.equal_range(pageSize);
855  size_t numPagesNeeded = numPagesRequested;
856  for (auto fileIt = candidateFiles.first; fileIt != candidateFiles.second; ++fileIt) {
857  FileInfo* fileInfo = files_[fileIt->second];
858  int pageNum;
859  do {
860  pageNum = fileInfo->getFreePage();
861  if (pageNum != -1) {
862  pages.emplace_back(fileInfo->fileId, pageNum);
863  numPagesNeeded--;
864  }
865  } while (pageNum != -1 && numPagesNeeded > 0);
866  if (numPagesNeeded == 0) {
867  break;
868  }
869  }
870  while (numPagesNeeded > 0) {
871  FileInfo* fileInfo;
872  if (isMetadata) {
873  fileInfo = createFile(pageSize, MAX_FILE_N_METADATA_PAGES);
874  } else {
875  fileInfo = createFile(pageSize, MAX_FILE_N_PAGES);
876  }
877  int pageNum;
878  do {
879  pageNum = fileInfo->getFreePage();
880  if (pageNum != -1) {
881  pages.emplace_back(fileInfo->fileId, pageNum);
882  numPagesNeeded--;
883  }
884  } while (pageNum != -1 && numPagesNeeded > 0);
885  if (numPagesNeeded == 0) {
886  break;
887  }
888  }
889  CHECK(pages.size() == numPagesRequested);
890 }
std::mutex getPageMutex_
pointer to DB level metadata
Definition: FileMgr.h:252
std::vector< FileInfo * > files_
Definition: FileMgr.h:241
#define MAX_FILE_N_METADATA_PAGES
Definition: File.h:27
PageSizeFileMMap fileIndex_
A vector of files accessible via a file identifier.
Definition: FileMgr.h:242
#define MAX_FILE_N_PAGES
Definition: File.h:26
FileInfo * createFile(const size_t pageSize, const size_t numPages)
Adds a file to the file manager repository.
Definition: FileMgr.cpp:911
#define CHECK(condition)
Definition: Logger.h:197
+ Here is the call graph for this function:

◆ setEpoch()

void File_Namespace::FileMgr::setEpoch ( int  epoch)
private

Definition at line 1014 of file FileMgr.cpp.

References epoch(), epoch_, and writeAndSyncEpochToDisk().

1014  {
1015  epoch_ = epoch;
1017 }
int epoch()
Returns current value of epoch - should be one greater than recorded at last checkpoint.
Definition: FileMgr.h:202
int epoch_
the index of the next file id
Definition: FileMgr.h:246
void writeAndSyncEpochToDisk()
Definition: FileMgr.cpp:532
+ Here is the call graph for this function:

◆ writeAndSyncDBMetaToDisk()

void File_Namespace::FileMgr::writeAndSyncDBMetaToDisk ( )
private

Definition at line 582 of file FileMgr.cpp.

References DBMetaFile_, logger::FATAL, getDBVersion(), LOG, and File_Namespace::write().

582  {
583  int db_ver = getDBVersion();
584  write(DBMetaFile_, 0, sizeof(int), (int8_t*)&db_ver);
585  int status = fflush(DBMetaFile_);
586  if (status != 0) {
587  LOG(FATAL) << "Could not sync DB metadata file to disk";
588  }
589 }
#define LOG(tag)
Definition: Logger.h:188
int getDBVersion() const
Index for looking up chunks.
Definition: FileMgr.cpp:994
size_t write(FILE *f, const size_t offset, const size_t size, int8_t *buf)
Writes the specified number of bytes to the offset position in file f from buf.
Definition: File.cpp:125
+ Here is the call graph for this function:

◆ writeAndSyncEpochToDisk()

void File_Namespace::FileMgr::writeAndSyncEpochToDisk ( )
private

Definition at line 532 of file FileMgr.cpp.

References epoch_, epochFile_, logger::FATAL, LOG, and File_Namespace::write().

Referenced by checkpoint(), and setEpoch().

532  {
533  write(epochFile_, 0, sizeof(int), (int8_t*)&epoch_);
534  int status = fflush(epochFile_);
535  // int status = fcntl(fileno(epochFile_),51);
536  if (status != 0) {
537  LOG(FATAL) << "Could not flush epoch file to disk";
538  }
539 #ifdef __APPLE__
540  status = fcntl(fileno(epochFile_), 51);
541 #else
542  status = fsync(fileno(epochFile_));
543 #endif
544  if (status != 0) {
545  LOG(FATAL) << "Could not sync epoch file to disk";
546  }
547  ++epoch_;
548 }
#define LOG(tag)
Definition: Logger.h:188
FILE * epochFile_
the current epoch (time of last checkpoint)
Definition: FileMgr.h:247
int epoch_
the index of the next file id
Definition: FileMgr.h:246
size_t write(FILE *f, const size_t offset, const size_t size, int8_t *buf)
Writes the specified number of bytes to the offset position in file f from buf.
Definition: File.cpp:125
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

Friends And Related Function Documentation

◆ GlobalFileMgr

friend class GlobalFileMgr
friend

Definition at line 86 of file FileMgr.h.

Member Data Documentation

◆ chunkIndex_

◆ chunkIndexMutex_

◆ db_version_

int File_Namespace::FileMgr::db_version_
private

Definition at line 248 of file FileMgr.h.

Referenced by createTopLevelMetadata(), and openDBMetaFile().

◆ DBMetaFile_

FILE* File_Namespace::FileMgr::DBMetaFile_ = nullptr
private

DB version from dbmeta file, should be compatible with GlobalFileMgr::mapd_db_version_

Definition at line 250 of file FileMgr.h.

Referenced by createDBMetaFile(), openDBMetaFile(), writeAndSyncDBMetaToDisk(), and ~FileMgr().

◆ defaultPageSize_

size_t File_Namespace::FileMgr::defaultPageSize_
private

number of threads used when loading data

Definition at line 244 of file FileMgr.h.

Referenced by copyPage(), createBufferUnlocked(), and putBuffer().

◆ epoch_

int File_Namespace::FileMgr::epoch_
private

the index of the next file id

Definition at line 246 of file FileMgr.h.

Referenced by checkpoint(), createEpochFile(), init(), openEpochFile(), openExistingFile(), setEpoch(), and writeAndSyncEpochToDisk().

◆ epochFile_

FILE* File_Namespace::FileMgr::epochFile_ = nullptr
private

the current epoch (time of last checkpoint)

Definition at line 247 of file FileMgr.h.

Referenced by closeRemovePhysical(), createEpochFile(), FileMgr(), openEpochFile(), writeAndSyncEpochToDisk(), and ~FileMgr().

◆ fileIndex_

PageSizeFileMMap File_Namespace::FileMgr::fileIndex_
private

A vector of files accessible via a file identifier.

Definition at line 242 of file FileMgr.h.

Referenced by createFile(), openExistingFile(), requestFreePage(), and requestFreePages().

◆ fileMgrBasePath_

std::string File_Namespace::FileMgr::fileMgrBasePath_
private

◆ fileMgrKey_

std::pair<const int, const int> File_Namespace::FileMgr::fileMgrKey_
private

Global FileMgr.

Definition at line 238 of file FileMgr.h.

Referenced by FileMgr(), and init().

◆ files_

std::vector<FileInfo*> File_Namespace::FileMgr::files_
private

The OS file system path containing files related to this FileMgr

Definition at line 241 of file FileMgr.h.

Referenced by checkpoint(), closeRemovePhysical(), createFile(), FileMgr(), getFileForFileId(), openExistingFile(), requestFreePage(), requestFreePages(), and ~FileMgr().

◆ files_rw_mutex_

mapd_shared_mutex File_Namespace::FileMgr::files_rw_mutex_
mutableprivate

Definition at line 254 of file FileMgr.h.

Referenced by checkpoint(), createFile(), and openExistingFile().

◆ free_pages

std::vector<std::pair<FileInfo*, int> > File_Namespace::FileMgr::free_pages
private

Definition at line 257 of file FileMgr.h.

Referenced by checkpoint(), and free_page().

◆ getPageMutex_

std::mutex File_Namespace::FileMgr::getPageMutex_
private

pointer to DB level metadata

Definition at line 252 of file FileMgr.h.

Referenced by requestFreePage(), and requestFreePages().

◆ gfm_

GlobalFileMgr* File_Namespace::FileMgr::gfm_
private

Definition at line 237 of file FileMgr.h.

Referenced by FileMgr(), getDBConvert(), getDBVersion(), and init().

◆ mutex_free_page

mapd_shared_mutex File_Namespace::FileMgr::mutex_free_page
mutableprivate

Definition at line 256 of file FileMgr.h.

Referenced by checkpoint(), and free_page().

◆ nextFileId_

unsigned File_Namespace::FileMgr::nextFileId_
private

Definition at line 245 of file FileMgr.h.

Referenced by createFile(), and init().

◆ num_reader_threads_

size_t File_Namespace::FileMgr::num_reader_threads_
private

Maps page sizes to FileInfo objects.

Definition at line 243 of file FileMgr.h.

Referenced by init().


The documentation for this class was generated from the following files: