OmniSciDB  1dac507f6e
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
File_Namespace::FileMgr Class Reference

#include <FileMgr.h>

+ Inheritance diagram for File_Namespace::FileMgr:
+ Collaboration diagram for File_Namespace::FileMgr:

Public Member Functions

 FileMgr (const int deviceId, GlobalFileMgr *gfm, const std::pair< const int, const int > fileMgrKey, const size_t num_reader_threads=0, const int epoch=-1, const size_t defaultPageSize=2097152)
 Constructor. More...
 
 FileMgr (const int deviceId, GlobalFileMgr *gfm, const std::pair< const int, const int > fileMgrKey, const bool initOnly)
 
 FileMgr (GlobalFileMgr *gfm, const size_t defaultPageSize, std::string basePath)
 
 ~FileMgr () override
 Destructor. More...
 
AbstractBuffercreateBuffer (const ChunkKey &key, size_t pageSize=0, const size_t numBytes=0) override
 Creates a chunk with the specified key and page size. More...
 
bool isBufferOnDevice (const ChunkKey &key) override
 
void deleteBuffer (const ChunkKey &key, const bool purge=true) override
 Deletes the chunk with the specified key. More...
 
void deleteBuffersWithPrefix (const ChunkKey &keyPrefix, const bool purge=true) override
 
AbstractBuffergetBuffer (const ChunkKey &key, const size_t numBytes=0) override
 Returns the a pointer to the chunk with the specified key. More...
 
void fetchBuffer (const ChunkKey &key, AbstractBuffer *destBuffer, const size_t numBytes) override
 
AbstractBufferputBuffer (const ChunkKey &key, AbstractBuffer *d, const size_t numBytes=0) override
 Puts the contents of d into the Chunk with the given key. More...
 
AbstractBufferalloc (const size_t numBytes) override
 
void free (AbstractBuffer *buffer) override
 
Page requestFreePage (size_t pagesize, const bool isMetadata)
 
MgrType getMgrType () override
 
std::string getStringMgrType () override
 
std::string printSlabs () override
 
void clearSlabs () override
 
size_t getMaxSize () override
 
size_t getInUseSize () override
 
size_t getAllocated () override
 
bool isAllocationCapped () override
 
FileInfogetFileInfoForFileId (const int fileId)
 
void init (const size_t num_reader_threads)
 
void init (const std::string dataPathToConvertFrom)
 
void copyPage (Page &srcPage, FileMgr *destFileMgr, Page &destPage, const size_t reservedHeaderSize, const size_t numBytes, const size_t offset)
 
void requestFreePages (size_t npages, size_t pagesize, std::vector< Page > &pages, const bool isMetadata)
 Obtains free pages – creates new files if necessary – of the requested size. More...
 
void getChunkMetadataVec (std::vector< std::pair< ChunkKey, ChunkMetadata >> &chunkMetadataVec) override
 
void getChunkMetadataVecForKeyPrefix (std::vector< std::pair< ChunkKey, ChunkMetadata >> &chunkMetadataVec, const ChunkKey &keyPrefix) override
 
void checkpoint () override
 Fsyncs data files, writes out epoch and fsyncs that. More...
 
void checkpoint (const int db_id, const int tb_id) override
 
int epoch ()
 Returns current value of epoch - should be one greater than recorded at last checkpoint. More...
 
size_t getNumReaderThreads ()
 Returns number of threads defined by parameter num-reader-threads which should be used during initial load and consequent read of data. More...
 
FILE * getFileForFileId (const int fileId)
 Returns FILE pointer associated with requested fileId. More...
 
size_t getNumChunks () override
 
int getDBVersion () const
 Index for looking up chunks. More...
 
bool getDBConvert () const
 
void createTopLevelMetadata ()
 
std::string getFileMgrBasePath () const
 
void closeRemovePhysical ()
 
void free_page (std::pair< FileInfo *, int > &&page)
 
const std::pair< const int,
const int > 
get_fileMgrKey () const
 

Public Attributes

ChunkKeyToChunkMap chunkIndex_
 

Private Member Functions

FileInfocreateFile (const size_t pageSize, const size_t numPages)
 Adds a file to the file manager repository. More...
 
FileInfoopenExistingFile (const std::string &path, const int fileId, const size_t pageSize, const size_t numPages, std::vector< HeaderInfo > &headerVec)
 
void createEpochFile (const std::string &epochFileName)
 
void openEpochFile (const std::string &epochFileName)
 
void writeAndSyncEpochToDisk ()
 
void createDBMetaFile (const std::string &DBMetaFileName)
 
bool openDBMetaFile (const std::string &DBMetaFileName)
 
void writeAndSyncDBMetaToDisk ()
 
void setEpoch (int epoch)
 
void processFileFutures (std::vector< std::future< std::vector< HeaderInfo >>> &file_futures, std::vector< HeaderInfo > &headerVec)
 

Private Attributes

GlobalFileMgrgfm_
 
std::pair< const int, const int > fileMgrKey_
 Global FileMgr. More...
 
std::string fileMgrBasePath_
 
std::vector< FileInfo * > files_
 
PageSizeFileMMap fileIndex_
 A vector of files accessible via a file identifier. More...
 
size_t num_reader_threads_
 Maps page sizes to FileInfo objects. More...
 
size_t defaultPageSize_
 number of threads used when loading data More...
 
unsigned nextFileId_
 
int epoch_
 the index of the next file id More...
 
FILE * epochFile_
 the current epoch (time of last checkpoint) More...
 
int db_version_
 
FILE * DBMetaFile_
 
std::mutex getPageMutex_
 pointer to DB level metadata More...
 
mapd_shared_mutex chunkIndexMutex_
 
mapd_shared_mutex files_rw_mutex_
 
mapd_shared_mutex mutex_free_page
 
std::vector< std::pair
< FileInfo *, int > > 
free_pages
 

Friends

class GlobalFileMgr
 

Detailed Description

Definition at line 85 of file FileMgr.h.

Constructor & Destructor Documentation

File_Namespace::FileMgr::FileMgr ( const int  deviceId,
GlobalFileMgr gfm,
const std::pair< const int, const int >  fileMgrKey,
const size_t  num_reader_threads = 0,
const int  epoch = -1,
const size_t  defaultPageSize = 2097152 
)

Constructor.

Definition at line 67 of file FileMgr.cpp.

References init().

73  : AbstractBufferMgr(deviceId)
74  , gfm_(gfm)
75  , fileMgrKey_(fileMgrKey)
76  , defaultPageSize_(defaultPageSize)
77  , nextFileId_(0)
78  , epoch_(epoch) {
79  init(num_reader_threads);
80 }
GlobalFileMgr * gfm_
Definition: FileMgr.h:237
int epoch()
Returns current value of epoch - should be one greater than recorded at last checkpoint.
Definition: FileMgr.h:204
void init(const size_t num_reader_threads)
Definition: FileMgr.cpp:124
int epoch_
the index of the next file id
Definition: FileMgr.h:246
size_t defaultPageSize_
number of threads used when loading data
Definition: FileMgr.h:244
std::pair< const int, const int > fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:238

+ Here is the call graph for this function:

File_Namespace::FileMgr::FileMgr ( const int  deviceId,
GlobalFileMgr gfm,
const std::pair< const int, const int >  fileMgrKey,
const bool  initOnly 
)

Definition at line 83 of file FileMgr.cpp.

References epochFile_, fileMgrBasePath_, fileMgrKey_, files_, File_Namespace::GlobalFileMgr::getBasePath(), gfm_, and to_string().

87  : AbstractBufferMgr(deviceId)
88  , gfm_(gfm)
89  , fileMgrKey_(fileMgrKey)
90  , defaultPageSize_(0)
91  , nextFileId_(0)
92  , epoch_(0) {
93  const std::string fileMgrDirPrefix("table");
94  const std::string FileMgrDirDelim("_");
95  fileMgrBasePath_ = (gfm_->getBasePath() + fileMgrDirPrefix + FileMgrDirDelim +
96  std::to_string(fileMgrKey_.first) + // db_id
97  FileMgrDirDelim + std::to_string(fileMgrKey_.second)); // tb_id
98  epochFile_ = nullptr;
99  files_.clear();
100 }
std::string getBasePath() const
GlobalFileMgr * gfm_
Definition: FileMgr.h:237
std::string fileMgrBasePath_
Definition: FileMgr.h:239
std::string to_string(char const *&&v)
FILE * epochFile_
the current epoch (time of last checkpoint)
Definition: FileMgr.h:247
std::vector< FileInfo * > files_
Definition: FileMgr.h:241
int epoch_
the index of the next file id
Definition: FileMgr.h:246
size_t defaultPageSize_
number of threads used when loading data
Definition: FileMgr.h:244
std::pair< const int, const int > fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:238

+ Here is the call graph for this function:

File_Namespace::FileMgr::FileMgr ( GlobalFileMgr gfm,
const size_t  defaultPageSize,
std::string  basePath 
)

Definition at line 102 of file FileMgr.cpp.

References init().

103  : AbstractBufferMgr(0)
104  , gfm_(gfm)
105  , fileMgrKey_(0, 0)
106  , fileMgrBasePath_(basePath)
107  , defaultPageSize_(defaultPageSize)
108  , nextFileId_(0)
109  , epoch_(-1) {
110  init(basePath);
111 }
GlobalFileMgr * gfm_
Definition: FileMgr.h:237
std::string fileMgrBasePath_
Definition: FileMgr.h:239
void init(const size_t num_reader_threads)
Definition: FileMgr.cpp:124
int epoch_
the index of the next file id
Definition: FileMgr.h:246
size_t defaultPageSize_
number of threads used when loading data
Definition: FileMgr.h:244
std::pair< const int, const int > fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:238

+ Here is the call graph for this function:

File_Namespace::FileMgr::~FileMgr ( )
override

Destructor.

Definition at line 113 of file FileMgr.cpp.

References chunkIndex_, and files_.

113  {
114  // checkpoint();
115  // free memory used by FileInfo objects
116  for (auto chunkIt = chunkIndex_.begin(); chunkIt != chunkIndex_.end(); ++chunkIt) {
117  delete chunkIt->second;
118  }
119  for (auto file_info : files_) {
120  delete file_info;
121  }
122 }
std::vector< FileInfo * > files_
Definition: FileMgr.h:241
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:225

Member Function Documentation

AbstractBuffer * File_Namespace::FileMgr::alloc ( const size_t  numBytes = 0)
override

Definition at line 790 of file FileMgr.cpp.

References logger::FATAL, and LOG.

790  {
791  LOG(FATAL) << "Operation not supported";
792  return nullptr; // satisfy return-type warning
793 }
#define LOG(tag)
Definition: Logger.h:185
void File_Namespace::FileMgr::checkpoint ( )
override

Fsyncs data files, writes out epoch and fsyncs that.

Definition at line 577 of file FileMgr.cpp.

References chunkIndex_, chunkIndexMutex_, epoch_, logger::FATAL, files_, files_rw_mutex_, free_page(), free_pages, LOG, mutex_free_page, VLOG, and writeAndSyncEpochToDisk().

Referenced by File_Namespace::GlobalFileMgr::checkpoint().

577  {
578  VLOG(2) << "Checkpointing epoch: " << epoch_;
579  mapd_unique_lock<mapd_shared_mutex> chunkIndexWriteLock(chunkIndexMutex_);
580  for (auto chunkIt = chunkIndex_.begin(); chunkIt != chunkIndex_.end(); ++chunkIt) {
581  /*
582  for (auto vecIt = chunkIt->first.begin(); vecIt != chunkIt->first.end(); ++vecIt) {
583  std::cout << *vecIt << ",";
584  }
585  cout << "Is dirty: " << chunkIt->second->isDirty_ << endl;
586  */
587  if (chunkIt->second->is_dirty_) {
588  chunkIt->second->writeMetadata(epoch_);
589  chunkIt->second->clearDirtyBits();
590  }
591  }
592  chunkIndexWriteLock.unlock();
593 
594  mapd_shared_lock<mapd_shared_mutex> read_lock(files_rw_mutex_);
595  for (auto fileIt = files_.begin(); fileIt != files_.end(); ++fileIt) {
596  int status = (*fileIt)->syncToDisk();
597  if (status != 0) {
598  LOG(FATAL) << "Could not sync file to disk";
599  }
600  }
601 
603 
604  mapd_unique_lock<mapd_shared_mutex> freePagesWriteLock(mutex_free_page);
605  for (auto& free_page : free_pages) {
606  free_page.first->freePageDeferred(free_page.second);
607  }
608  free_pages.clear();
609 }
#define LOG(tag)
Definition: Logger.h:185
std::vector< std::pair< FileInfo *, int > > free_pages
Definition: FileMgr.h:257
void free_page(std::pair< FileInfo *, int > &&page)
Definition: FileMgr.cpp:997
std::vector< FileInfo * > files_
Definition: FileMgr.h:241
int epoch_
the index of the next file id
Definition: FileMgr.h:246
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:225
void writeAndSyncEpochToDisk()
Definition: FileMgr.cpp:518
mapd_shared_mutex mutex_free_page
Definition: FileMgr.h:256
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:253
mapd_shared_mutex files_rw_mutex_
Definition: FileMgr.h:254
#define VLOG(n)
Definition: Logger.h:280

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::checkpoint ( const int  db_id,
const int  tb_id 
)
inlineoverride

Definition at line 197 of file FileMgr.h.

References logger::FATAL, and LOG.

197  {
198  LOG(FATAL) << "Operation not supported, api checkpoint() should be used instead";
199  }
#define LOG(tag)
Definition: Logger.h:185
void File_Namespace::FileMgr::clearSlabs ( )
inlineoverride

Definition at line 148 of file FileMgr.h.

148  { /* noop */
149  }
void File_Namespace::FileMgr::closeRemovePhysical ( )

Definition at line 448 of file FileMgr.cpp.

References File_Namespace::close(), epochFile_, files_, getFileMgrBasePath(), and File_Namespace::renameForDelete().

Referenced by File_Namespace::GlobalFileMgr::removeTableRelatedDS().

448  {
449  for (auto file_info : files_) {
450  if (file_info->f) {
451  close(file_info->f);
452  file_info->f = nullptr;
453  }
454  }
455 
456  if (epochFile_) {
457  close(epochFile_);
458  epochFile_ = nullptr;
459  }
460 
461  /* rename for later deletion the directory containing table related data */
463 }
std::string getFileMgrBasePath() const
Definition: FileMgr.h:230
FILE * epochFile_
the current epoch (time of last checkpoint)
Definition: FileMgr.h:247
std::vector< FileInfo * > files_
Definition: FileMgr.h:241
void close(FILE *f)
Closes the file pointed to by the FILE pointer.
Definition: File.cpp:102
void renameForDelete(const std::string directoryName)
Renames a directory to DELETE_ME_&lt;EPOCH&gt;_&lt;oldname&gt;.
Definition: File.cpp:183

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::copyPage ( Page srcPage,
FileMgr destFileMgr,
Page destPage,
const size_t  reservedHeaderSize,
const size_t  numBytes,
const size_t  offset 
)

Definition at line 465 of file FileMgr.cpp.

References CHECK(), defaultPageSize_, File_Namespace::Page::fileId, getFileInfoForFileId(), File_Namespace::Page::pageNum, File_Namespace::FileInfo::read(), and File_Namespace::FileInfo::write().

Referenced by init().

470  {
471  CHECK(offset + numBytes <= defaultPageSize_);
472  FileInfo* srcFileInfo = getFileInfoForFileId(srcPage.fileId);
473  FileInfo* destFileInfo = destFileMgr->getFileInfoForFileId(destPage.fileId);
474  int8_t* buffer = new int8_t[numBytes];
475 
476  size_t bytesRead = srcFileInfo->read(
477  srcPage.pageNum * defaultPageSize_ + offset + reservedHeaderSize, numBytes, buffer);
478  CHECK(bytesRead == numBytes);
479  size_t bytesWritten = destFileInfo->write(
480  destPage.pageNum * defaultPageSize_ + offset + reservedHeaderSize,
481  numBytes,
482  buffer);
483  CHECK(bytesWritten == numBytes);
484  delete[] buffer;
485 }
FileInfo * getFileInfoForFileId(const int fileId)
Definition: FileMgr.h:155
CHECK(cgen_state)
size_t defaultPageSize_
number of threads used when loading data
Definition: FileMgr.h:244

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

AbstractBuffer * File_Namespace::FileMgr::createBuffer ( const ChunkKey key,
size_t  pageSize = 0,
const size_t  numBytes = 0 
)
override

Creates a chunk with the specified key and page size.

Todo:
Make all accesses to chunkIndex_ thread-safe

Definition at line 611 of file FileMgr.cpp.

References chunkIndex_, chunkIndexMutex_, defaultPageSize_, logger::FATAL, LOG, and showChunk().

Referenced by putBuffer().

613  {
614  size_t actualPageSize = pageSize;
615  if (actualPageSize == 0) {
616  actualPageSize = defaultPageSize_;
617  }
619  // we will do this lazily and not allocate space for the Chunk (i.e.
620  // FileBuffer yet)
621  mapd_unique_lock<mapd_shared_mutex> chunkIndexWriteLock(chunkIndexMutex_);
622 
623  if (chunkIndex_.find(key) != chunkIndex_.end()) {
624  LOG(FATAL) << "Chunk already exists for key: " << showChunk(key);
625  }
626  chunkIndex_[key] = new FileBuffer(this, actualPageSize, key, numBytes);
627  chunkIndexWriteLock.unlock();
628  return (chunkIndex_[key]);
629 }
#define LOG(tag)
Definition: Logger.h:185
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:225
std::string showChunk(const ChunkKey &key)
Definition: types.h:37
size_t defaultPageSize_
number of threads used when loading data
Definition: FileMgr.h:244
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:253

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::createDBMetaFile ( const std::string &  DBMetaFileName)
private

Definition at line 536 of file FileMgr.cpp.

References File_Namespace::create(), DBMetaFile_, logger::FATAL, fileMgrBasePath_, getDBVersion(), LOG, and File_Namespace::write().

Referenced by createTopLevelMetadata().

536  {
537  std::string DBMetaFilePath(fileMgrBasePath_ + "/" + DBMetaFileName);
538  if (boost::filesystem::exists(DBMetaFilePath)) {
539  LOG(FATAL) << "DB metadata file `" << DBMetaFilePath << "` already exists.";
540  }
541  DBMetaFile_ = create(DBMetaFilePath, sizeof(int));
542  int db_ver = getDBVersion();
543  write(DBMetaFile_, 0, sizeof(int), (int8_t*)&db_ver);
544  // LOG(INFO) << "DB metadata file has been created.";
545 }
#define LOG(tag)
Definition: Logger.h:185
int getDBVersion() const
Index for looking up chunks.
Definition: FileMgr.cpp:972
std::string fileMgrBasePath_
Definition: FileMgr.h:239
FILE * create(const std::string &basePath, const int fileId, const size_t pageSize, const size_t numPages)
Definition: File.cpp:35
size_t write(FILE *f, const size_t offset, const size_t size, int8_t *buf)
Writes the specified number of bytes to the offset position in file f from buf.
Definition: File.cpp:121

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::createEpochFile ( const std::string &  epochFileName)
private

Definition at line 487 of file FileMgr.cpp.

References File_Namespace::create(), epoch_, epochFile_, logger::FATAL, fileMgrBasePath_, LOG, and File_Namespace::write().

Referenced by init().

487  {
488  std::string epochFilePath(fileMgrBasePath_ + "/" + epochFileName);
489  if (boost::filesystem::exists(epochFilePath)) {
490  LOG(FATAL) << "Epoch file `" << epochFilePath << "` already exists";
491  }
492  epochFile_ = create(epochFilePath, sizeof(int));
493  // Write out current epoch to file - which if this
494  // function is being called should be 0
495  write(epochFile_, 0, sizeof(int), (int8_t*)&epoch_);
496  epoch_++;
497 }
#define LOG(tag)
Definition: Logger.h:185
std::string fileMgrBasePath_
Definition: FileMgr.h:239
FILE * create(const std::string &basePath, const int fileId, const size_t pageSize, const size_t numPages)
Definition: File.cpp:35
FILE * epochFile_
the current epoch (time of last checkpoint)
Definition: FileMgr.h:247
int epoch_
the index of the next file id
Definition: FileMgr.h:246
size_t write(FILE *f, const size_t offset, const size_t size, int8_t *buf)
Writes the specified number of bytes to the offset position in file f from buf.
Definition: File.cpp:121

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

FileInfo * File_Namespace::FileMgr::createFile ( const size_t  pageSize,
const size_t  numPages 
)
private

Adds a file to the file manager repository.

This method will create a FileInfo object for the file being added, and it will create the corresponding file on physical disk with the indicated number of pages pre-allocated.

A pointer to the FileInfo object is returned, which itself has a file pointer (FILE*) and a file identifier (int fileId).

Parameters
fileNameThe name given to the file in physical storage.
pageSizeThe logical page size for the pages in the file.
numPagesThe number of logical pages to initially allocate for the file.
Returns
FileInfo* A pointer to the FileInfo object of the added file.

Definition at line 887 of file FileMgr.cpp.

References CHECK(), File_Namespace::create(), logger::FATAL, fileIndex_, fileMgrBasePath_, files_, files_rw_mutex_, LOG, and nextFileId_.

Referenced by requestFreePage(), and requestFreePages().

887  {
888  // check arguments
889  if (pageSize == 0 || numPages == 0) {
890  LOG(FATAL) << "File creation failed: pageSize and numPages must be greater than 0.";
891  }
892 
893  // create the new file
894  FILE* f = create(fileMgrBasePath_,
895  nextFileId_,
896  pageSize,
897  numPages); // TM: not sure if I like naming scheme here - should be in
898  // separate namespace?
899  CHECK(f);
900 
901  // instantiate a new FileInfo for the newly created file
902  int fileId = nextFileId_++;
903  FileInfo* fInfo =
904  new FileInfo(this, fileId, f, pageSize, numPages, true); // true means init file
905  CHECK(fInfo);
906 
907  mapd_unique_lock<mapd_shared_mutex> write_lock(files_rw_mutex_);
908  // update file manager data structures
909  files_.push_back(fInfo);
910  fileIndex_.insert(std::pair<size_t, int>(pageSize, fileId));
911 
912  CHECK(files_.back() == fInfo); // postcondition
913  return fInfo;
914 }
#define LOG(tag)
Definition: Logger.h:185
std::string fileMgrBasePath_
Definition: FileMgr.h:239
FILE * create(const std::string &basePath, const int fileId, const size_t pageSize, const size_t numPages)
Definition: File.cpp:35
std::vector< FileInfo * > files_
Definition: FileMgr.h:241
CHECK(cgen_state)
PageSizeFileMMap fileIndex_
A vector of files accessible via a file identifier.
Definition: FileMgr.h:242
mapd_shared_mutex files_rw_mutex_
Definition: FileMgr.h:254

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::createTopLevelMetadata ( )

Definition at line 980 of file FileMgr.cpp.

References createDBMetaFile(), DB_META_FILENAME, db_version_, logger::FATAL, getDBVersion(), LOG, and openDBMetaFile().

Referenced by Data_Namespace::DataMgr::createTopLevelMetadata().

980  {
982  if (db_version_ > getDBVersion()) {
983  LOG(FATAL) << "DB forward compatibility is not supported. Version of OmniSci "
984  "software used is older than the version of DB being read: "
985  << db_version_;
986  }
987  } else {
989  }
990 }
#define LOG(tag)
Definition: Logger.h:185
int getDBVersion() const
Index for looking up chunks.
Definition: FileMgr.cpp:972
void createDBMetaFile(const std::string &DBMetaFileName)
Definition: FileMgr.cpp:536
#define DB_META_FILENAME
Definition: FileMgr.cpp:43
bool openDBMetaFile(const std::string &DBMetaFileName)
Definition: FileMgr.cpp:547

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::deleteBuffer ( const ChunkKey key,
const bool  purge = true 
)
override

Deletes the chunk with the specified key.

Definition at line 636 of file FileMgr.cpp.

References chunkIndex_, chunkIndexMutex_, logger::FATAL, LOG, and showChunk().

636  {
637  mapd_unique_lock<mapd_shared_mutex> chunkIndexWriteLock(chunkIndexMutex_);
638  auto chunkIt = chunkIndex_.find(key);
639  // ensure the Chunk exists
640  if (chunkIt == chunkIndex_.end()) {
641  LOG(FATAL) << "Chunk does not exist for key: " << showChunk(key);
642  }
643  chunkIndexWriteLock.unlock();
644  // chunkIt->second->writeMetadata(-1); // writes -1 as epoch - signifies deleted
645  if (purge) {
646  chunkIt->second->freePages();
647  }
648  //@todo need a way to represent delete in non purge case
649  delete chunkIt->second;
650  chunkIndex_.erase(chunkIt);
651 }
#define LOG(tag)
Definition: Logger.h:185
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:225
std::string showChunk(const ChunkKey &key)
Definition: types.h:37
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:253

+ Here is the call graph for this function:

void File_Namespace::FileMgr::deleteBuffersWithPrefix ( const ChunkKey keyPrefix,
const bool  purge = true 
)
override

Definition at line 653 of file FileMgr.cpp.

References chunkIndex_, and chunkIndexMutex_.

Referenced by File_Namespace::GlobalFileMgr::deleteBuffersWithPrefix().

653  {
654  mapd_unique_lock<mapd_shared_mutex> chunkIndexWriteLock(chunkIndexMutex_);
655  auto chunkIt = chunkIndex_.lower_bound(keyPrefix);
656  if (chunkIt == chunkIndex_.end()) {
657  return; // should we throw?
658  }
659  while (chunkIt != chunkIndex_.end() &&
660  std::search(chunkIt->first.begin(),
661  chunkIt->first.begin() + keyPrefix.size(),
662  keyPrefix.begin(),
663  keyPrefix.end()) != chunkIt->first.begin() + keyPrefix.size()) {
664  /*
665  cout << "Freeing pages for chunk ";
666  for (auto vecIt = chunkIt->first.begin(); vecIt != chunkIt->first.end(); ++vecIt) {
667  std::cout << *vecIt << ",";
668  }
669  cout << endl;
670  */
671  if (purge) {
672  chunkIt->second->freePages();
673  }
674  //@todo need a way to represent delete in non purge case
675  delete chunkIt->second;
676  chunkIndex_.erase(chunkIt++);
677  }
678 }
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:225
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:253

+ Here is the caller graph for this function:

int File_Namespace::FileMgr::epoch ( )
inline

Returns current value of epoch - should be one greater than recorded at last checkpoint.

Definition at line 204 of file FileMgr.h.

Referenced by File_Namespace::FileBuffer::append(), File_Namespace::FileInfo::freePage(), File_Namespace::FileInfo::openExistingFile(), File_Namespace::FileBuffer::reserve(), setEpoch(), and File_Namespace::FileBuffer::write().

204 { return epoch_; }
int epoch_
the index of the next file id
Definition: FileMgr.h:246

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::fetchBuffer ( const ChunkKey key,
AbstractBuffer destBuffer,
const size_t  numBytes 
)
override

Definition at line 689 of file FileMgr.cpp.

References chunkIndex_, chunkIndexMutex_, logger::FATAL, Data_Namespace::AbstractBuffer::getDeviceId(), Data_Namespace::AbstractBuffer::getMemoryPtr(), Data_Namespace::AbstractBuffer::getType(), Data_Namespace::AbstractBuffer::isDirty(), Data_Namespace::AbstractBuffer::isUpdated(), LOG, Data_Namespace::AbstractBuffer::read(), Data_Namespace::AbstractBuffer::reserve(), Data_Namespace::AbstractBuffer::setSize(), showChunk(), Data_Namespace::AbstractBuffer::size(), and Data_Namespace::AbstractBuffer::syncEncoder().

691  {
692  // reads chunk specified by ChunkKey into AbstractBuffer provided by
693  // destBuffer
694  if (destBuffer->isDirty()) {
695  LOG(FATAL)
696  << "Aborting attempt to fetch a chunk marked dirty. Chunk inconsistency for key: "
697  << showChunk(key);
698  }
699  mapd_shared_lock<mapd_shared_mutex> chunkIndexReadLock(chunkIndexMutex_);
700  auto chunkIt = chunkIndex_.find(key);
701  if (chunkIt == chunkIndex_.end()) {
702  LOG(FATAL) << "Chunk does not exist for key: " << showChunk(key);
703  }
704  chunkIndexReadLock.unlock();
705 
706  AbstractBuffer* chunk = chunkIt->second;
707  // ChunkSize is either specified in function call with numBytes or we
708  // just look at pageSize * numPages in FileBuffer
709  size_t chunkSize = numBytes == 0 ? chunk->size() : numBytes;
710  if (numBytes > 0 && numBytes > chunk->size()) {
711  LOG(FATAL) << "Chunk retrieved for key `" << showChunk(key) << "` is smaller ("
712  << chunk->size() << ") than number of bytes requested (" << numBytes
713  << ")";
714  }
715  destBuffer->reserve(chunkSize);
716  // std::cout << "After reserve chunksize: " << chunkSize << std::endl;
717  if (chunk->isUpdated()) {
718  chunk->read(destBuffer->getMemoryPtr(),
719  chunkSize,
720  0,
721  destBuffer->getType(),
722  destBuffer->getDeviceId());
723  } else {
724  chunk->read(destBuffer->getMemoryPtr() + destBuffer->size(),
725  chunkSize - destBuffer->size(),
726  destBuffer->size(),
727  destBuffer->getType(),
728  destBuffer->getDeviceId());
729  }
730  destBuffer->setSize(chunkSize);
731  destBuffer->syncEncoder(chunk);
732 }
void syncEncoder(const AbstractBuffer *src_buffer)
#define LOG(tag)
Definition: Logger.h:185
virtual size_t size() const =0
virtual int8_t * getMemoryPtr()=0
virtual MemoryLevel getType() const =0
virtual bool isDirty() const
virtual void read(int8_t *const dst, const size_t num_bytes, const size_t offset=0, const MemoryLevel dst_buffer_type=CPU_LEVEL, const int dst_device_id=-1)=0
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:225
An AbstractBuffer is a unit of data management for a data manager.
std::string showChunk(const ChunkKey &key)
Definition: types.h:37
virtual bool isUpdated() const
void setSize(const size_t size)
virtual int getDeviceId() const
virtual void reserve(size_t num_bytes)=0
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:253

+ Here is the call graph for this function:

void File_Namespace::FileMgr::free ( AbstractBuffer buffer)
override

Definition at line 795 of file FileMgr.cpp.

References logger::FATAL, and LOG.

795  {
796  LOG(FATAL) << "Operation not supported";
797 }
#define LOG(tag)
Definition: Logger.h:185
void File_Namespace::FileMgr::free_page ( std::pair< FileInfo *, int > &&  page)

Definition at line 997 of file FileMgr.cpp.

References free_pages, and mutex_free_page.

Referenced by checkpoint(), and File_Namespace::FileInfo::freePage().

997  {
998  std::unique_lock<mapd_shared_mutex> lock(mutex_free_page);
999  free_pages.push_back(page);
1000 }
std::vector< std::pair< FileInfo *, int > > free_pages
Definition: FileMgr.h:257
mapd_shared_mutex mutex_free_page
Definition: FileMgr.h:256

+ Here is the caller graph for this function:

const std::pair<const int, const int> File_Namespace::FileMgr::get_fileMgrKey ( ) const
inline

Definition at line 234 of file FileMgr.h.

Referenced by File_Namespace::FileInfo::openExistingFile().

234 { return fileMgrKey_; }
std::pair< const int, const int > fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:238

+ Here is the caller graph for this function:

size_t File_Namespace::FileMgr::getAllocated ( )
inlineoverride

Definition at line 152 of file FileMgr.h.

152 { return 0; }
AbstractBuffer * File_Namespace::FileMgr::getBuffer ( const ChunkKey key,
const size_t  numBytes = 0 
)
override

Returns the a pointer to the chunk with the specified key.

Definition at line 680 of file FileMgr.cpp.

References chunkIndex_, chunkIndexMutex_, logger::FATAL, LOG, and showChunk().

680  {
681  mapd_shared_lock<mapd_shared_mutex> chunkIndexReadLock(chunkIndexMutex_);
682  auto chunkIt = chunkIndex_.find(key);
683  if (chunkIt == chunkIndex_.end()) {
684  LOG(FATAL) << "Chunk does not exist for key: " << showChunk(key);
685  }
686  return chunkIt->second;
687 }
#define LOG(tag)
Definition: Logger.h:185
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:225
std::string showChunk(const ChunkKey &key)
Definition: types.h:37
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:253

+ Here is the call graph for this function:

void File_Namespace::FileMgr::getChunkMetadataVec ( std::vector< std::pair< ChunkKey, ChunkMetadata >> &  chunkMetadataVec)
override

Definition at line 929 of file FileMgr.cpp.

References chunkIndex_, and chunkIndexMutex_.

930  {
931  mapd_unique_lock<mapd_shared_mutex> chunkIndexWriteLock(chunkIndexMutex_);
932  chunkMetadataVec.reserve(chunkIndex_.size());
933  for (auto chunkIt = chunkIndex_.begin(); chunkIt != chunkIndex_.end(); ++chunkIt) {
934  if (chunkIt->second->has_encoder) {
935  ChunkMetadata chunkMetadata;
936  chunkIt->second->encoder->getMetadata(chunkMetadata);
937  chunkMetadataVec.emplace_back(chunkIt->first, chunkMetadata);
938  }
939  }
940 }
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:225
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:253
void File_Namespace::FileMgr::getChunkMetadataVecForKeyPrefix ( std::vector< std::pair< ChunkKey, ChunkMetadata >> &  chunkMetadataVec,
const ChunkKey keyPrefix 
)
override

Definition at line 942 of file FileMgr.cpp.

References chunkIndex_, and chunkIndexMutex_.

944  {
945  mapd_unique_lock<mapd_shared_mutex> chunkIndexWriteLock(
946  chunkIndexMutex_); // is this guarding the right structure? it look slike we oly
947  // read here for chunk
948  auto chunkIt = chunkIndex_.lower_bound(keyPrefix);
949  if (chunkIt == chunkIndex_.end()) {
950  return; // throw?
951  }
952  while (chunkIt != chunkIndex_.end() &&
953  std::search(chunkIt->first.begin(),
954  chunkIt->first.begin() + keyPrefix.size(),
955  keyPrefix.begin(),
956  keyPrefix.end()) != chunkIt->first.begin() + keyPrefix.size()) {
957  /*
958  for (auto vecIt = chunkIt->first.begin(); vecIt != chunkIt->first.end(); ++vecIt) {
959  std::cout << *vecIt << ",";
960  }
961  cout << endl;
962  */
963  if (chunkIt->second->has_encoder) {
964  ChunkMetadata chunkMetadata;
965  chunkIt->second->encoder->getMetadata(chunkMetadata);
966  chunkMetadataVec.emplace_back(chunkIt->first, chunkMetadata);
967  }
968  chunkIt++;
969  }
970 }
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:225
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:253
bool File_Namespace::FileMgr::getDBConvert ( ) const

Definition at line 976 of file FileMgr.cpp.

References File_Namespace::GlobalFileMgr::getDBConvert(), and gfm_.

976  {
977  return gfm_->getDBConvert();
978 }
GlobalFileMgr * gfm_
Definition: FileMgr.h:237

+ Here is the call graph for this function:

int File_Namespace::FileMgr::getDBVersion ( ) const

Index for looking up chunks.

Definition at line 972 of file FileMgr.cpp.

References File_Namespace::GlobalFileMgr::getDBVersion(), and gfm_.

Referenced by createDBMetaFile(), createTopLevelMetadata(), and writeAndSyncDBMetaToDisk().

972  {
973  return gfm_->getDBVersion();
974 }
GlobalFileMgr * gfm_
Definition: FileMgr.h:237

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

FILE * File_Namespace::FileMgr::getFileForFileId ( const int  fileId)

Returns FILE pointer associated with requested fileId.

See Also
FileBuffer

Definition at line 916 of file FileMgr.cpp.

References CHECK(), and files_.

Referenced by File_Namespace::FileBuffer::readMetadata(), and File_Namespace::FileBuffer::writeMetadata().

916  {
917  CHECK(fileId >= 0 && static_cast<size_t>(fileId) < files_.size());
918  return files_[fileId]->f;
919 }
std::vector< FileInfo * > files_
Definition: FileMgr.h:241
CHECK(cgen_state)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

FileInfo* File_Namespace::FileMgr::getFileInfoForFileId ( const int  fileId)
inline

Definition at line 155 of file FileMgr.h.

Referenced by File_Namespace::FileBuffer::append(), File_Namespace::FileBuffer::copyPage(), copyPage(), File_Namespace::FileBuffer::freePages(), File_Namespace::readForThread(), File_Namespace::FileBuffer::write(), and File_Namespace::FileBuffer::writeHeader().

155 { return files_[fileId]; }
std::vector< FileInfo * > files_
Definition: FileMgr.h:241

+ Here is the caller graph for this function:

std::string File_Namespace::FileMgr::getFileMgrBasePath ( ) const
inline

Definition at line 230 of file FileMgr.h.

Referenced by closeRemovePhysical().

230 { return fileMgrBasePath_; }
std::string fileMgrBasePath_
Definition: FileMgr.h:239

+ Here is the caller graph for this function:

size_t File_Namespace::FileMgr::getInUseSize ( )
inlineoverride

Definition at line 151 of file FileMgr.h.

151 { return 0; }
size_t File_Namespace::FileMgr::getMaxSize ( )
inlineoverride

Definition at line 150 of file FileMgr.h.

150 { return 0; }
MgrType File_Namespace::FileMgr::getMgrType ( )
inlineoverride

Definition at line 145 of file FileMgr.h.

145 { return FILE_MGR; };
size_t File_Namespace::FileMgr::getNumChunks ( )
inlineoverride

Definition at line 221 of file FileMgr.h.

221  {
222  // @todo should be locked - but this is more for testing now
223  return chunkIndex_.size();
224  }
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:225
size_t File_Namespace::FileMgr::getNumReaderThreads ( )
inline

Returns number of threads defined by parameter num-reader-threads which should be used during initial load and consequent read of data.

Definition at line 210 of file FileMgr.h.

Referenced by File_Namespace::FileBuffer::read().

210 { return num_reader_threads_; }
size_t num_reader_threads_
Maps page sizes to FileInfo objects.
Definition: FileMgr.h:243

+ Here is the caller graph for this function:

std::string File_Namespace::FileMgr::getStringMgrType ( )
inlineoverride

Definition at line 146 of file FileMgr.h.

146 { return ToString(FILE_MGR); }
void File_Namespace::FileMgr::init ( const size_t  num_reader_threads)

Definition at line 124 of file FileMgr.cpp.

References CHECK_EQ, chunkIndex_, createEpochFile(), epoch_, EPOCH_FILENAME, logger::FATAL, anonymous_namespace{StringDictionary.cpp}::file_size(), fileMgrBasePath_, fileMgrKey_, File_Namespace::fileSize(), File_Namespace::GlobalFileMgr::getBasePath(), gfm_, File_Namespace::headerCompare(), logger::INFO, LOG, MAPD_FILE_EXT, nextFileId_, num_reader_threads_, openEpochFile(), openExistingFile(), processFileFutures(), timer_start(), timer_stop(), to_string(), and VLOG.

Referenced by FileMgr().

124  {
125  // if epoch = -1 this means open from epoch file
126  const std::string fileMgrDirPrefix("table");
127  const std::string FileMgrDirDelim("_");
128  fileMgrBasePath_ = (gfm_->getBasePath() + fileMgrDirPrefix + FileMgrDirDelim +
129  std::to_string(fileMgrKey_.first) + // db_id
130  FileMgrDirDelim + std::to_string(fileMgrKey_.second)); // tb_id
131  boost::filesystem::path path(fileMgrBasePath_);
132  if (boost::filesystem::exists(path)) {
133  if (!boost::filesystem::is_directory(path)) {
134  LOG(FATAL) << "Specified path '" << fileMgrBasePath_
135  << "' for table data is not a directory.";
136  }
137  if (epoch_ != -1) { // if opening at previous epoch
138  int epochCopy = epoch_;
140  epoch_ = epochCopy;
141  } else {
143  }
144 
145  auto clock_begin = timer_start();
146 
147  boost::filesystem::directory_iterator
148  endItr; // default construction yields past-the-end
149  int maxFileId = -1;
150  int fileCount = 0;
151  int threadCount = std::thread::hardware_concurrency();
152  std::vector<HeaderInfo> headerVec;
153  std::vector<std::future<std::vector<HeaderInfo>>> file_futures;
154  for (boost::filesystem::directory_iterator fileIt(path); fileIt != endItr; ++fileIt) {
155  if (boost::filesystem::is_regular_file(fileIt->status())) {
156  // note that boost::filesystem leaves preceding dot on
157  // extension - hence MAPD_FILE_EXT is ".mapd"
158  std::string extension(fileIt->path().extension().string());
159 
160  if (extension == MAPD_FILE_EXT) {
161  std::string fileStem(fileIt->path().stem().string());
162  // remove trailing dot if any
163  if (fileStem.size() > 0 && fileStem.back() == '.') {
164  fileStem = fileStem.substr(0, fileStem.size() - 1);
165  }
166  size_t dotPos = fileStem.find_last_of("."); // should only be one
167  if (dotPos == std::string::npos) {
168  LOG(FATAL) << "File `" << fileIt->path()
169  << "` does not carry page size information in the filename.";
170  }
171  int fileId = boost::lexical_cast<int>(fileStem.substr(0, dotPos));
172  if (fileId > maxFileId) {
173  maxFileId = fileId;
174  }
175  size_t pageSize =
176  boost::lexical_cast<size_t>(fileStem.substr(dotPos + 1, fileStem.size()));
177  std::string filePath(fileIt->path().string());
178  size_t fileSize = boost::filesystem::file_size(filePath);
179  CHECK_EQ(fileSize % pageSize, size_t(0)); // should be no partial pages
180  size_t numPages = fileSize / pageSize;
181 
182  VLOG(4) << "File id: " << fileId << " Page size: " << pageSize
183  << " Num pages: " << numPages;
184 
185  file_futures.emplace_back(std::async(
186  std::launch::async, [filePath, fileId, pageSize, numPages, this] {
187  std::vector<HeaderInfo> tempHeaderVec;
188  openExistingFile(filePath, fileId, pageSize, numPages, tempHeaderVec);
189  return tempHeaderVec;
190  }));
191  fileCount++;
192  if (fileCount % threadCount == 0) {
193  processFileFutures(file_futures, headerVec);
194  }
195  }
196  }
197  }
198 
199  if (file_futures.size() > 0) {
200  processFileFutures(file_futures, headerVec);
201  }
202  int64_t queue_time_ms = timer_stop(clock_begin);
203 
204  LOG(INFO) << "Completed Reading table's file metadata, Elapsed time : "
205  << queue_time_ms << "ms Epoch: " << epoch_ << " files read: " << fileCount
206  << " table location: '" << fileMgrBasePath_ << "'";
207 
208  /* Sort headerVec so that all HeaderInfos
209  * from a chunk will be grouped together
210  * and in order of increasing PageId
211  * - Version Epoch */
212 
213  std::sort(headerVec.begin(), headerVec.end(), headerCompare);
214 
215  /* Goal of next section is to find sequences in the
216  * sorted headerVec of the same ChunkId, which we
217  * can then initiate a FileBuffer with */
218 
219  VLOG(4) << "Number of Headers in Vector: " << headerVec.size();
220  if (headerVec.size() > 0) {
221  ChunkKey lastChunkKey = headerVec.begin()->chunkKey;
222  auto startIt = headerVec.begin();
223 
224  for (auto headerIt = headerVec.begin() + 1; headerIt != headerVec.end();
225  ++headerIt) {
226  // for (auto chunkIt = headerIt->chunkKey.begin(); chunkIt !=
227  // headerIt->chunkKey.end(); ++chunkIt) {
228  // std::cout << *chunkIt << " ";
229  //}
230 
231  if (headerIt->chunkKey != lastChunkKey) {
232  chunkIndex_[lastChunkKey] =
233  new FileBuffer(this, /*pageSize,*/ lastChunkKey, startIt, headerIt);
234  /*
235  if (startIt->versionEpoch != -1) {
236  cout << "not skipping bc version != -1" << endl;
237  // -1 means that chunk was deleted
238  // lets not read it in
239  chunkIndex_[lastChunkKey] = new FileBuffer
240  (this,/lastChunkKey,startIt,headerIt);
241 
242  }
243  else {
244  cout << "Skipping bc version == -1" << endl;
245  }
246  */
247  lastChunkKey = headerIt->chunkKey;
248  startIt = headerIt;
249  }
250  }
251  // now need to insert last Chunk
252  // size_t pageSize = files_[startIt->page.fileId]->pageSize;
253  // cout << "Inserting last chunk" << endl;
254  // if (startIt->versionEpoch != -1) {
255  chunkIndex_[lastChunkKey] =
256  new FileBuffer(this, /*pageSize,*/ lastChunkKey, startIt, headerVec.end());
257  //}
258  }
259  nextFileId_ = maxFileId + 1;
260  // std::cout << "next file id: " << nextFileId_ << std::endl;
261  } else {
262  if (!boost::filesystem::create_directory(path)) {
263  LOG(FATAL) << "Could not create data directory: " << path;
264  }
266  }
267 
268  /* define number of reader threads to be used */
269  size_t num_hardware_based_threads =
270  std::thread::hardware_concurrency(); // # of threads is based on # of cores on the
271  // host
272  if (num_reader_threads == 0) { // # of threads has not been defined by user
273  num_reader_threads_ = num_hardware_based_threads;
274  } else {
275  if (num_reader_threads > num_hardware_based_threads) {
276  num_reader_threads_ = num_hardware_based_threads;
277  } else {
278  num_reader_threads_ = num_reader_threads;
279  }
280  }
281 }
#define CHECK_EQ(x, y)
Definition: Logger.h:198
std::vector< int > ChunkKey
Definition: types.h:35
std::string getBasePath() const
void createEpochFile(const std::string &epochFileName)
Definition: FileMgr.cpp:487
void openEpochFile(const std::string &epochFileName)
Definition: FileMgr.cpp:499
#define LOG(tag)
Definition: Logger.h:185
#define MAPD_FILE_EXT
Definition: File.h:26
TypeR::rep timer_stop(Type clock_begin)
Definition: measure.h:46
GlobalFileMgr * gfm_
Definition: FileMgr.h:237
std::string fileMgrBasePath_
Definition: FileMgr.h:239
bool headerCompare(const HeaderInfo &firstElem, const HeaderInfo &secondElem)
Definition: FileMgr.cpp:49
std::string to_string(char const *&&v)
void processFileFutures(std::vector< std::future< std::vector< HeaderInfo >>> &file_futures, std::vector< HeaderInfo > &headerVec)
Definition: FileMgr.cpp:283
int epoch_
the index of the next file id
Definition: FileMgr.h:246
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:225
size_t num_reader_threads_
Maps page sizes to FileInfo objects.
Definition: FileMgr.h:243
size_t fileSize(FILE *f)
Returns the size of the specified file.
Definition: File.cpp:171
#define EPOCH_FILENAME
Definition: FileMgr.cpp:42
std::pair< const int, const int > fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:238
#define VLOG(n)
Definition: Logger.h:280
Type timer_start()
Definition: measure.h:40
FileInfo * openExistingFile(const std::string &path, const int fileId, const size_t pageSize, const size_t numPages, std::vector< HeaderInfo > &headerVec)
Definition: FileMgr.cpp:868

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::init ( const std::string  dataPathToConvertFrom)

Definition at line 297 of file FileMgr.cpp.

References CHECK(), chunkIndex_, copyPage(), epoch_, EPOCH_FILENAME, File_Namespace::MultiPage::epochs, logger::FATAL, anonymous_namespace{StringDictionary.cpp}::file_size(), File_Namespace::fileSize(), File_Namespace::GlobalFileMgr::getFileMgr(), File_Namespace::FileBuffer::getMultiPage(), gfm_, File_Namespace::headerCompare(), LOG, MAPD_FILE_EXT, File_Namespace::FileBuffer::multiPages_, nextFileId_, openEpochFile(), openExistingFile(), File_Namespace::FileBuffer::pageDataSize(), File_Namespace::FileBuffer::pageSize(), processFileFutures(), requestFreePage(), File_Namespace::FileBuffer::reservedHeaderSize(), Data_Namespace::AbstractBuffer::setDirty(), Data_Namespace::AbstractBuffer::setSize(), File_Namespace::FileBuffer::size(), Data_Namespace::AbstractBuffer::syncEncoder(), and File_Namespace::FileBuffer::writeHeader().

297  {
298  int converted_data_epoch = 0;
299  boost::filesystem::path path(dataPathToConvertFrom);
300  if (boost::filesystem::exists(path)) {
301  if (!boost::filesystem::is_directory(path)) {
302  LOG(FATAL) << "Specified path `" << path << "` is not a directory.";
303  }
304 
305  if (epoch_ != -1) { // if opening at previous epoch
306  int epochCopy = epoch_;
308  epoch_ = epochCopy;
309  } else {
311  }
312 
313  boost::filesystem::directory_iterator
314  endItr; // default construction yields past-the-end
315  int maxFileId = -1;
316  int fileCount = 0;
317  int threadCount = std::thread::hardware_concurrency();
318  std::vector<HeaderInfo> headerVec;
319  std::vector<std::future<std::vector<HeaderInfo>>> file_futures;
320  for (boost::filesystem::directory_iterator fileIt(path); fileIt != endItr; ++fileIt) {
321  if (boost::filesystem::is_regular_file(fileIt->status())) {
322  // note that boost::filesystem leaves preceding dot on
323  // extension - hence MAPD_FILE_EXT is ".mapd"
324  std::string extension(fileIt->path().extension().string());
325 
326  if (extension == MAPD_FILE_EXT) {
327  std::string fileStem(fileIt->path().stem().string());
328  // remove trailing dot if any
329  if (fileStem.size() > 0 && fileStem.back() == '.') {
330  fileStem = fileStem.substr(0, fileStem.size() - 1);
331  }
332  size_t dotPos = fileStem.find_last_of("."); // should only be one
333  if (dotPos == std::string::npos) {
334  LOG(FATAL) << "File `" + fileIt->path().string() +
335  "` does not carry page size information in the filename.";
336  }
337  int fileId = boost::lexical_cast<int>(fileStem.substr(0, dotPos));
338  if (fileId > maxFileId) {
339  maxFileId = fileId;
340  }
341  size_t pageSize =
342  boost::lexical_cast<size_t>(fileStem.substr(dotPos + 1, fileStem.size()));
343  std::string filePath(fileIt->path().string());
344  size_t fileSize = boost::filesystem::file_size(filePath);
345  CHECK(fileSize % pageSize == 0); // should be no partial pages
346  size_t numPages = fileSize / pageSize;
347 
348  file_futures.emplace_back(std::async(
349  std::launch::async, [filePath, fileId, pageSize, numPages, this] {
350  std::vector<HeaderInfo> tempHeaderVec;
351  openExistingFile(filePath, fileId, pageSize, numPages, tempHeaderVec);
352  return tempHeaderVec;
353  }));
354  fileCount++;
355  if (fileCount % threadCount) {
356  processFileFutures(file_futures, headerVec);
357  }
358  }
359  }
360  }
361 
362  if (file_futures.size() > 0) {
363  processFileFutures(file_futures, headerVec);
364  }
365 
366  /* Sort headerVec so that all HeaderInfos
367  * from a chunk will be grouped together
368  * and in order of increasing PageId
369  * - Version Epoch */
370 
371  std::sort(headerVec.begin(), headerVec.end(), headerCompare);
372 
373  /* Goal of next section is to find sequences in the
374  * sorted headerVec of the same ChunkId, which we
375  * can then initiate a FileBuffer with */
376 
377  if (headerVec.size() > 0) {
378  ChunkKey lastChunkKey = headerVec.begin()->chunkKey;
379  auto startIt = headerVec.begin();
380 
381  for (auto headerIt = headerVec.begin() + 1; headerIt != headerVec.end();
382  ++headerIt) {
383  if (headerIt->chunkKey != lastChunkKey) {
384  FileMgr* c_fm_ = gfm_->getFileMgr(lastChunkKey);
385  FileBuffer* srcBuf = new FileBuffer(this, lastChunkKey, startIt, headerIt);
386  chunkIndex_[lastChunkKey] = srcBuf;
387  FileBuffer* destBuf = new FileBuffer(c_fm_, srcBuf->pageSize(), lastChunkKey);
388  c_fm_->chunkIndex_[lastChunkKey] = destBuf;
389  destBuf->syncEncoder(srcBuf);
390  destBuf->setSize(srcBuf->size());
391  destBuf->setDirty(); // this needs to be set to force writing out metadata
392  // files from "checkpoint()" call
393 
394  size_t totalNumPages = srcBuf->getMultiPage().size();
395  for (size_t pageNum = 0; pageNum < totalNumPages; pageNum++) {
396  Page srcPage = srcBuf->getMultiPage()[pageNum].current();
397  Page destPage = c_fm_->requestFreePage(
398  srcBuf->pageSize(),
399  false); // may modify and use api "FileBuffer::addNewMultiPage" instead
400  MultiPage multiPage(srcBuf->pageSize());
401  multiPage.epochs.push_back(converted_data_epoch);
402  multiPage.pageVersions.push_back(destPage);
403  destBuf->multiPages_.push_back(multiPage);
404  size_t reservedHeaderSize = srcBuf->reservedHeaderSize();
405  copyPage(
406  srcPage, c_fm_, destPage, reservedHeaderSize, srcBuf->pageDataSize(), 0);
407  destBuf->writeHeader(destPage, pageNum, converted_data_epoch, false);
408  }
409  lastChunkKey = headerIt->chunkKey;
410  startIt = headerIt;
411  }
412  }
413 
414  // now need to insert last Chunk
415  FileMgr* c_fm_ = gfm_->getFileMgr(lastChunkKey);
416  FileBuffer* srcBuf = new FileBuffer(this, lastChunkKey, startIt, headerVec.end());
417  chunkIndex_[lastChunkKey] = srcBuf;
418  FileBuffer* destBuf = new FileBuffer(c_fm_, srcBuf->pageSize(), lastChunkKey);
419  c_fm_->chunkIndex_[lastChunkKey] = destBuf;
420  destBuf->syncEncoder(srcBuf);
421  destBuf->setSize(srcBuf->size());
422  destBuf->setDirty(); // this needs to be set to write out metadata file from the
423  // "checkpoint()" call
424 
425  size_t totalNumPages = srcBuf->getMultiPage().size();
426  for (size_t pageNum = 0; pageNum < totalNumPages; pageNum++) {
427  Page srcPage = srcBuf->getMultiPage()[pageNum].current();
428  Page destPage = c_fm_->requestFreePage(
429  srcBuf->pageSize(),
430  false); // may modify and use api "FileBuffer::addNewMultiPage" instead
431  MultiPage multiPage(srcBuf->pageSize());
432  multiPage.epochs.push_back(converted_data_epoch);
433  multiPage.pageVersions.push_back(destPage);
434  destBuf->multiPages_.push_back(multiPage);
435  size_t reservedHeaderSize = srcBuf->reservedHeaderSize();
436  copyPage(srcPage, c_fm_, destPage, reservedHeaderSize, srcBuf->pageDataSize(), 0);
437  destBuf->writeHeader(destPage, pageNum, converted_data_epoch, false);
438  }
439  }
440  nextFileId_ = maxFileId + 1;
441  } else {
442  if (!boost::filesystem::create_directory(path)) {
443  LOG(FATAL) << "Specified path does not exist: " << path;
444  }
445  }
446 }
std::vector< int > ChunkKey
Definition: types.h:35
void openEpochFile(const std::string &epochFileName)
Definition: FileMgr.cpp:499
#define LOG(tag)
Definition: Logger.h:185
FileMgr * getFileMgr(const int db_id, const int tb_id)
void copyPage(Page &srcPage, FileMgr *destFileMgr, Page &destPage, const size_t reservedHeaderSize, const size_t numBytes, const size_t offset)
Definition: FileMgr.cpp:465
#define MAPD_FILE_EXT
Definition: File.h:26
GlobalFileMgr * gfm_
Definition: FileMgr.h:237
bool headerCompare(const HeaderInfo &firstElem, const HeaderInfo &secondElem)
Definition: FileMgr.cpp:49
FileMgr(const int deviceId, GlobalFileMgr *gfm, const std::pair< const int, const int > fileMgrKey, const size_t num_reader_threads=0, const int epoch=-1, const size_t defaultPageSize=2097152)
Constructor.
Definition: FileMgr.cpp:67
CHECK(cgen_state)
void processFileFutures(std::vector< std::future< std::vector< HeaderInfo >>> &file_futures, std::vector< HeaderInfo > &headerVec)
Definition: FileMgr.cpp:283
int epoch_
the index of the next file id
Definition: FileMgr.h:246
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:225
size_t fileSize(FILE *f)
Returns the size of the specified file.
Definition: File.cpp:171
#define EPOCH_FILENAME
Definition: FileMgr.cpp:42
FileInfo * openExistingFile(const std::string &path, const int fileId, const size_t pageSize, const size_t numPages, std::vector< HeaderInfo > &headerVec)
Definition: FileMgr.cpp:868

+ Here is the call graph for this function:

bool File_Namespace::FileMgr::isAllocationCapped ( )
inlineoverride

Definition at line 153 of file FileMgr.h.

153 { return false; }
bool File_Namespace::FileMgr::isBufferOnDevice ( const ChunkKey key)
override

Definition at line 631 of file FileMgr.cpp.

References chunkIndex_, and chunkIndexMutex_.

631  {
632  mapd_shared_lock<mapd_shared_mutex> chunkIndexReadLock(chunkIndexMutex_);
633  return chunkIndex_.find(key) != chunkIndex_.end();
634 }
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:225
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:253
bool File_Namespace::FileMgr::openDBMetaFile ( const std::string &  DBMetaFileName)
private

Definition at line 547 of file FileMgr.cpp.

References db_version_, DBMetaFile_, anonymous_namespace{StringDictionary.cpp}::file_size(), fileMgrBasePath_, File_Namespace::open(), and File_Namespace::read().

Referenced by createTopLevelMetadata().

547  {
548  std::string DBMetaFilePath(fileMgrBasePath_ + "/" + DBMetaFileName);
549 
550  if (!boost::filesystem::exists(DBMetaFilePath)) {
551  // LOG(INFO) << "DB metadata file does not exist, one will be created.";
552  return false;
553  }
554  if (!boost::filesystem::is_regular_file(DBMetaFilePath)) {
555  // LOG(INFO) << "DB metadata file is not a regular file, one will be created.";
556  return false;
557  }
558  if (boost::filesystem::file_size(DBMetaFilePath) < 4) {
559  // LOG(INFO) << "DB metadata file is not sized properly, one will be created.";
560  return false;
561  }
562  DBMetaFile_ = open(DBMetaFilePath);
563  read(DBMetaFile_, 0, sizeof(int), (int8_t*)&db_version_);
564 
565  return true;
566 }
std::string fileMgrBasePath_
Definition: FileMgr.h:239
size_t read(FILE *f, const size_t offset, const size_t size, int8_t *buf)
Reads the specified number of bytes from the offset position in file f into buf.
Definition: File.cpp:113
FILE * open(int fileId)
Opens/creates the file with the given id; returns NULL on error.
Definition: File.cpp:83

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::openEpochFile ( const std::string &  epochFileName)
private

Definition at line 499 of file FileMgr.cpp.

References epoch_, epochFile_, logger::FATAL, anonymous_namespace{StringDictionary.cpp}::file_size(), fileMgrBasePath_, LOG, File_Namespace::open(), and File_Namespace::read().

Referenced by init().

499  {
500  std::string epochFilePath(fileMgrBasePath_ + "/" + epochFileName);
501  if (!boost::filesystem::exists(epochFilePath)) {
502  LOG(FATAL) << "Epoch file `" << epochFilePath << "` does not exist";
503  }
504  if (!boost::filesystem::is_regular_file(epochFilePath)) {
505  LOG(FATAL) << "Epoch file `" << epochFilePath << "` is not a regular file";
506  }
507  if (boost::filesystem::file_size(epochFilePath) < 4) {
508  LOG(FATAL) << "Epoch file `" << epochFilePath
509  << "` is not sized properly (current size: "
510  << boost::filesystem::file_size(epochFilePath) << ", expected size: 4)";
511  }
512  epochFile_ = open(epochFilePath);
513  read(epochFile_, 0, sizeof(int), (int8_t*)&epoch_);
514  // std::cout << "Epoch after open file: " << epoch_ << std::endl;
515  epoch_++; // we are in new epoch from last checkpoint
516 }
#define LOG(tag)
Definition: Logger.h:185
std::string fileMgrBasePath_
Definition: FileMgr.h:239
FILE * epochFile_
the current epoch (time of last checkpoint)
Definition: FileMgr.h:247
size_t read(FILE *f, const size_t offset, const size_t size, int8_t *buf)
Reads the specified number of bytes from the offset position in file f into buf.
Definition: File.cpp:113
int epoch_
the index of the next file id
Definition: FileMgr.h:246
FILE * open(int fileId)
Opens/creates the file with the given id; returns NULL on error.
Definition: File.cpp:83

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

FileInfo * File_Namespace::FileMgr::openExistingFile ( const std::string &  path,
const int  fileId,
const size_t  pageSize,
const size_t  numPages,
std::vector< HeaderInfo > &  headerVec 
)
private

Definition at line 868 of file FileMgr.cpp.

References epoch_, fileIndex_, files_, files_rw_mutex_, File_Namespace::open(), and File_Namespace::FileInfo::openExistingFile().

Referenced by init().

872  {
873  FILE* f = open(path);
874  FileInfo* fInfo = new FileInfo(
875  this, fileId, f, pageSize, numPages, false); // false means don't init file
876 
877  fInfo->openExistingFile(headerVec, epoch_);
878  mapd_unique_lock<mapd_shared_mutex> write_lock(files_rw_mutex_);
879  if (fileId >= static_cast<int>(files_.size())) {
880  files_.resize(fileId + 1);
881  }
882  files_[fileId] = fInfo;
883  fileIndex_.insert(std::pair<size_t, int>(pageSize, fileId));
884  return fInfo;
885 }
std::vector< FileInfo * > files_
Definition: FileMgr.h:241
int epoch_
the index of the next file id
Definition: FileMgr.h:246
PageSizeFileMMap fileIndex_
A vector of files accessible via a file identifier.
Definition: FileMgr.h:242
FILE * open(int fileId)
Opens/creates the file with the given id; returns NULL on error.
Definition: File.cpp:83
mapd_shared_mutex files_rw_mutex_
Definition: FileMgr.h:254

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::string File_Namespace::FileMgr::printSlabs ( )
inlineoverride

Definition at line 147 of file FileMgr.h.

147 { return "Not Implemented"; }
void File_Namespace::FileMgr::processFileFutures ( std::vector< std::future< std::vector< HeaderInfo >>> &  file_futures,
std::vector< HeaderInfo > &  headerVec 
)
private

Definition at line 283 of file FileMgr.cpp.

Referenced by init().

285  {
286  for (auto& file_future : file_futures) {
287  file_future.wait();
288  }
289  // concatenate the vectors after thread completes
290  for (auto& file_future : file_futures) {
291  auto tempHeaderVec = file_future.get();
292  headerVec.insert(headerVec.end(), tempHeaderVec.begin(), tempHeaderVec.end());
293  }
294  file_futures.clear();
295 }

+ Here is the caller graph for this function:

AbstractBuffer * File_Namespace::FileMgr::putBuffer ( const ChunkKey key,
AbstractBuffer d,
const size_t  numBytes = 0 
)
override

Puts the contents of d into the Chunk with the given key.

Parameters
key- Unique identifier for a Chunk.
d- An object representing the source data for the Chunk.
Returns
AbstractBuffer*

Definition at line 734 of file FileMgr.cpp.

References Data_Namespace::AbstractBuffer::append(), CHECK_LT, chunkIndex_, chunkIndexMutex_, Data_Namespace::AbstractBuffer::clearDirtyBits(), createBuffer(), defaultPageSize_, logger::FATAL, Data_Namespace::AbstractBuffer::getDeviceId(), Data_Namespace::AbstractBuffer::getMemoryPtr(), Data_Namespace::AbstractBuffer::getType(), Data_Namespace::AbstractBuffer::isAppended(), Data_Namespace::AbstractBuffer::isDirty(), Data_Namespace::AbstractBuffer::isUpdated(), LOG, Data_Namespace::AbstractBuffer::setSize(), showChunk(), Data_Namespace::AbstractBuffer::size(), Data_Namespace::AbstractBuffer::syncEncoder(), and Data_Namespace::AbstractBuffer::write().

736  {
737  // obtain a pointer to the Chunk
738  mapd_unique_lock<mapd_shared_mutex> chunkIndexWriteLock(chunkIndexMutex_);
739  auto chunkIt = chunkIndex_.find(key);
740  AbstractBuffer* chunk;
741  if (chunkIt == chunkIndex_.end()) {
742  chunk = createBuffer(key, defaultPageSize_);
743  } else {
744  chunk = chunkIt->second;
745  }
746  chunkIndexWriteLock.unlock();
747  size_t oldChunkSize = chunk->size();
748  // write the buffer's data to the Chunk
749  // size_t newChunkSize = numBytes == 0 ? srcBuffer->size() : numBytes;
750  size_t newChunkSize = numBytes == 0 ? srcBuffer->size() : numBytes;
751  if (chunk->isDirty()) {
752  // multiple appends are allowed,
753  // but only single update is allowed
754  if (srcBuffer->isUpdated() && chunk->isUpdated()) {
755  LOG(FATAL) << "Aborting attempt to write a chunk marked dirty. Chunk inconsistency "
756  "for key: "
757  << showChunk(key);
758  }
759  }
760  if (srcBuffer->isUpdated()) {
761  // chunk size is not changed when fixed rows are updated or are marked as deleted.
762  // but when rows are vacuumed or varlen rows are updated (new rows are appended),
763  // chunk size will change. For vacuum, checkpoint should sync size from cpu to disk.
764  // For varlen update, it takes another route via fragmenter using disk-level buffer.
765  if (0 == numBytes && !chunk->isDirty()) {
766  chunk->setSize(newChunkSize);
767  }
768  //@todo use dirty flags to only flush pages of chunk that need to
769  // be flushed
770  chunk->write((int8_t*)srcBuffer->getMemoryPtr(),
771  newChunkSize,
772  0,
773  srcBuffer->getType(),
774  srcBuffer->getDeviceId());
775  } else if (srcBuffer->isAppended()) {
776  CHECK_LT(oldChunkSize, newChunkSize);
777  chunk->append((int8_t*)srcBuffer->getMemoryPtr() + oldChunkSize,
778  newChunkSize - oldChunkSize,
779  srcBuffer->getType(),
780  srcBuffer->getDeviceId());
781  }
782  // chunk->clearDirtyBits(); // Hack: because write and append will set dirty bits
783  //@todo commenting out line above will make sure this metadata is set
784  // but will trigger error on fetch chunk
785  srcBuffer->clearDirtyBits();
786  chunk->syncEncoder(srcBuffer);
787  return chunk;
788 }
void syncEncoder(const AbstractBuffer *src_buffer)
#define LOG(tag)
Definition: Logger.h:185
virtual size_t size() const =0
virtual bool isDirty() const
AbstractBuffer * createBuffer(const ChunkKey &key, size_t pageSize=0, const size_t numBytes=0) override
Creates a chunk with the specified key and page size.
Definition: FileMgr.cpp:611
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:225
An AbstractBuffer is a unit of data management for a data manager.
virtual void write(int8_t *src, const size_t num_bytes, const size_t offset=0, const MemoryLevel src_buffer_type=CPU_LEVEL, const int src_device_id=-1)=0
std::string showChunk(const ChunkKey &key)
Definition: types.h:37
virtual bool isUpdated() const
#define CHECK_LT(x, y)
Definition: Logger.h:200
size_t defaultPageSize_
number of threads used when loading data
Definition: FileMgr.h:244
virtual void append(int8_t *src, const size_t num_bytes, const MemoryLevel src_buffer_type=CPU_LEVEL, const int device_id=-1)=0
void setSize(const size_t size)
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:253

+ Here is the call graph for this function:

Page File_Namespace::FileMgr::requestFreePage ( size_t  pagesize,
const bool  isMetadata 
)

Definition at line 799 of file FileMgr.cpp.

References CHECK(), createFile(), File_Namespace::FileInfo::fileId, fileIndex_, files_, File_Namespace::FileInfo::getFreePage(), getPageMutex_, MAX_FILE_N_METADATA_PAGES, and MAX_FILE_N_PAGES.

Referenced by File_Namespace::FileBuffer::addNewMultiPage(), init(), File_Namespace::FileBuffer::write(), and File_Namespace::FileBuffer::writeMetadata().

799  {
800  std::lock_guard<std::mutex> lock(getPageMutex_);
801 
802  auto candidateFiles = fileIndex_.equal_range(pageSize);
803  int pageNum = -1;
804  for (auto fileIt = candidateFiles.first; fileIt != candidateFiles.second; ++fileIt) {
805  FileInfo* fileInfo = files_[fileIt->second];
806  pageNum = fileInfo->getFreePage();
807  if (pageNum != -1) {
808  return (Page(fileInfo->fileId, pageNum));
809  }
810  }
811  // if here then we need to add a file
812  FileInfo* fileInfo;
813  if (isMetadata) {
814  fileInfo = createFile(pageSize, MAX_FILE_N_METADATA_PAGES);
815  } else {
816  fileInfo = createFile(pageSize, MAX_FILE_N_PAGES);
817  }
818  pageNum = fileInfo->getFreePage();
819  CHECK(pageNum != -1);
820  return (Page(fileInfo->fileId, pageNum));
821 }
std::mutex getPageMutex_
pointer to DB level metadata
Definition: FileMgr.h:252
std::vector< FileInfo * > files_
Definition: FileMgr.h:241
CHECK(cgen_state)
#define MAX_FILE_N_METADATA_PAGES
Definition: File.h:28
PageSizeFileMMap fileIndex_
A vector of files accessible via a file identifier.
Definition: FileMgr.h:242
#define MAX_FILE_N_PAGES
Definition: File.h:27
FileInfo * createFile(const size_t pageSize, const size_t numPages)
Adds a file to the file manager repository.
Definition: FileMgr.cpp:887

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::requestFreePages ( size_t  npages,
size_t  pagesize,
std::vector< Page > &  pages,
const bool  isMetadata 
)

Obtains free pages – creates new files if necessary – of the requested size.

Given a page size and number of pages, this method updates the vector "pages" to include free pages of the requested size. These pages are immediately removed from the free list of the affected file(s). If there are not enough pages available among current files, new files are created and their pages are included in the vector.

Parameters
npagesThe number of free pages requested
pagesizeThe size of each requested page
pagesA vector containing the free pages obtained by this method

Definition at line 823 of file FileMgr.cpp.

References CHECK(), createFile(), File_Namespace::FileInfo::fileId, fileIndex_, files_, File_Namespace::FileInfo::getFreePage(), getPageMutex_, MAX_FILE_N_METADATA_PAGES, and MAX_FILE_N_PAGES.

826  {
827  // not used currently
828  // @todo add method to FileInfo to get more than one page
829  std::lock_guard<std::mutex> lock(getPageMutex_);
830  auto candidateFiles = fileIndex_.equal_range(pageSize);
831  size_t numPagesNeeded = numPagesRequested;
832  for (auto fileIt = candidateFiles.first; fileIt != candidateFiles.second; ++fileIt) {
833  FileInfo* fileInfo = files_[fileIt->second];
834  int pageNum;
835  do {
836  pageNum = fileInfo->getFreePage();
837  if (pageNum != -1) {
838  pages.emplace_back(fileInfo->fileId, pageNum);
839  numPagesNeeded--;
840  }
841  } while (pageNum != -1 && numPagesNeeded > 0);
842  if (numPagesNeeded == 0) {
843  break;
844  }
845  }
846  while (numPagesNeeded > 0) {
847  FileInfo* fileInfo;
848  if (isMetadata) {
849  fileInfo = createFile(pageSize, MAX_FILE_N_METADATA_PAGES);
850  } else {
851  fileInfo = createFile(pageSize, MAX_FILE_N_PAGES);
852  }
853  int pageNum;
854  do {
855  pageNum = fileInfo->getFreePage();
856  if (pageNum != -1) {
857  pages.emplace_back(fileInfo->fileId, pageNum);
858  numPagesNeeded--;
859  }
860  } while (pageNum != -1 && numPagesNeeded > 0);
861  if (numPagesNeeded == 0) {
862  break;
863  }
864  }
865  CHECK(pages.size() == numPagesRequested);
866 }
std::mutex getPageMutex_
pointer to DB level metadata
Definition: FileMgr.h:252
std::vector< FileInfo * > files_
Definition: FileMgr.h:241
CHECK(cgen_state)
#define MAX_FILE_N_METADATA_PAGES
Definition: File.h:28
PageSizeFileMMap fileIndex_
A vector of files accessible via a file identifier.
Definition: FileMgr.h:242
#define MAX_FILE_N_PAGES
Definition: File.h:27
FileInfo * createFile(const size_t pageSize, const size_t numPages)
Adds a file to the file manager repository.
Definition: FileMgr.cpp:887

+ Here is the call graph for this function:

void File_Namespace::FileMgr::setEpoch ( int  epoch)
private

Definition at line 992 of file FileMgr.cpp.

References epoch(), epoch_, and writeAndSyncEpochToDisk().

Referenced by File_Namespace::GlobalFileMgr::setTableEpoch().

992  {
993  epoch_ = epoch;
995 }
int epoch()
Returns current value of epoch - should be one greater than recorded at last checkpoint.
Definition: FileMgr.h:204
int epoch_
the index of the next file id
Definition: FileMgr.h:246
void writeAndSyncEpochToDisk()
Definition: FileMgr.cpp:518

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::writeAndSyncDBMetaToDisk ( )
private

Definition at line 568 of file FileMgr.cpp.

References DBMetaFile_, logger::FATAL, getDBVersion(), LOG, and File_Namespace::write().

568  {
569  int db_ver = getDBVersion();
570  write(DBMetaFile_, 0, sizeof(int), (int8_t*)&db_ver);
571  int status = fflush(DBMetaFile_);
572  if (status != 0) {
573  LOG(FATAL) << "Could not sync DB metadata file to disk";
574  }
575 }
#define LOG(tag)
Definition: Logger.h:185
int getDBVersion() const
Index for looking up chunks.
Definition: FileMgr.cpp:972
size_t write(FILE *f, const size_t offset, const size_t size, int8_t *buf)
Writes the specified number of bytes to the offset position in file f from buf.
Definition: File.cpp:121

+ Here is the call graph for this function:

void File_Namespace::FileMgr::writeAndSyncEpochToDisk ( )
private

Definition at line 518 of file FileMgr.cpp.

References epoch_, epochFile_, logger::FATAL, LOG, and File_Namespace::write().

Referenced by checkpoint(), and setEpoch().

518  {
519  write(epochFile_, 0, sizeof(int), (int8_t*)&epoch_);
520  int status = fflush(epochFile_);
521  // int status = fcntl(fileno(epochFile_),51);
522  if (status != 0) {
523  LOG(FATAL) << "Could not flush epoch file to disk";
524  }
525 #ifdef __APPLE__
526  status = fcntl(fileno(epochFile_), 51);
527 #else
528  status = fsync(fileno(epochFile_));
529 #endif
530  if (status != 0) {
531  LOG(FATAL) << "Could not sync epoch file to disk";
532  }
533  ++epoch_;
534 }
#define LOG(tag)
Definition: Logger.h:185
FILE * epochFile_
the current epoch (time of last checkpoint)
Definition: FileMgr.h:247
int epoch_
the index of the next file id
Definition: FileMgr.h:246
size_t write(FILE *f, const size_t offset, const size_t size, int8_t *buf)
Writes the specified number of bytes to the offset position in file f from buf.
Definition: File.cpp:121

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Friends And Related Function Documentation

friend class GlobalFileMgr
friend

Definition at line 86 of file FileMgr.h.

Member Data Documentation

int File_Namespace::FileMgr::db_version_
private

Definition at line 248 of file FileMgr.h.

Referenced by createTopLevelMetadata(), and openDBMetaFile().

FILE* File_Namespace::FileMgr::DBMetaFile_
private

DB version from dbmeta file, should be compatible with GlobalFileMgr::mapd_db_version_

Definition at line 250 of file FileMgr.h.

Referenced by createDBMetaFile(), openDBMetaFile(), and writeAndSyncDBMetaToDisk().

size_t File_Namespace::FileMgr::defaultPageSize_
private

number of threads used when loading data

Definition at line 244 of file FileMgr.h.

Referenced by copyPage(), createBuffer(), and putBuffer().

int File_Namespace::FileMgr::epoch_
private
FILE* File_Namespace::FileMgr::epochFile_
private

the current epoch (time of last checkpoint)

Definition at line 247 of file FileMgr.h.

Referenced by closeRemovePhysical(), createEpochFile(), FileMgr(), openEpochFile(), and writeAndSyncEpochToDisk().

PageSizeFileMMap File_Namespace::FileMgr::fileIndex_
private

A vector of files accessible via a file identifier.

Definition at line 242 of file FileMgr.h.

Referenced by createFile(), openExistingFile(), requestFreePage(), and requestFreePages().

std::string File_Namespace::FileMgr::fileMgrBasePath_
private
std::pair<const int, const int> File_Namespace::FileMgr::fileMgrKey_
private

Global FileMgr.

Definition at line 238 of file FileMgr.h.

Referenced by FileMgr(), and init().

std::vector<FileInfo*> File_Namespace::FileMgr::files_
private

The OS file system path containing files related to this FileMgr

Definition at line 241 of file FileMgr.h.

Referenced by checkpoint(), closeRemovePhysical(), createFile(), FileMgr(), getFileForFileId(), openExistingFile(), requestFreePage(), requestFreePages(), and ~FileMgr().

mapd_shared_mutex File_Namespace::FileMgr::files_rw_mutex_
mutableprivate

Definition at line 254 of file FileMgr.h.

Referenced by checkpoint(), createFile(), and openExistingFile().

std::vector<std::pair<FileInfo*, int> > File_Namespace::FileMgr::free_pages
private

Definition at line 257 of file FileMgr.h.

Referenced by checkpoint(), and free_page().

std::mutex File_Namespace::FileMgr::getPageMutex_
private

pointer to DB level metadata

Definition at line 252 of file FileMgr.h.

Referenced by requestFreePage(), and requestFreePages().

GlobalFileMgr* File_Namespace::FileMgr::gfm_
private

Definition at line 237 of file FileMgr.h.

Referenced by FileMgr(), getDBConvert(), getDBVersion(), and init().

mapd_shared_mutex File_Namespace::FileMgr::mutex_free_page
mutableprivate

Definition at line 256 of file FileMgr.h.

Referenced by checkpoint(), and free_page().

unsigned File_Namespace::FileMgr::nextFileId_
private

Definition at line 245 of file FileMgr.h.

Referenced by createFile(), and init().

size_t File_Namespace::FileMgr::num_reader_threads_
private

Maps page sizes to FileInfo objects.

Definition at line 243 of file FileMgr.h.

Referenced by init().


The documentation for this class was generated from the following files: