OmniSciDB  0fdbebe030
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
File_Namespace::FileMgr Class Reference

#include <FileMgr.h>

+ Inheritance diagram for File_Namespace::FileMgr:
+ Collaboration diagram for File_Namespace::FileMgr:

Public Member Functions

 FileMgr (const int deviceId, GlobalFileMgr *gfm, const std::pair< const int, const int > fileMgrKey, const size_t num_reader_threads=0, const int epoch=-1, const size_t defaultPageSize=2097152)
 Constructor. More...
 
 FileMgr (const int deviceId, GlobalFileMgr *gfm, const std::pair< const int, const int > fileMgrKey, const bool initOnly)
 
 FileMgr (GlobalFileMgr *gfm, const size_t defaultPageSize, std::string basePath)
 
 ~FileMgr () override
 Destructor. More...
 
AbstractBuffercreateBuffer (const ChunkKey &key, size_t pageSize=0, const size_t numBytes=0) override
 Creates a chunk with the specified key and page size. More...
 
bool isBufferOnDevice (const ChunkKey &key) override
 
void deleteBuffer (const ChunkKey &key, const bool purge=true) override
 Deletes the chunk with the specified key. More...
 
void deleteBuffersWithPrefix (const ChunkKey &keyPrefix, const bool purge=true) override
 
AbstractBuffergetBuffer (const ChunkKey &key, const size_t numBytes=0) override
 Returns the a pointer to the chunk with the specified key. More...
 
void fetchBuffer (const ChunkKey &key, AbstractBuffer *destBuffer, const size_t numBytes) override
 
AbstractBufferputBuffer (const ChunkKey &key, AbstractBuffer *d, const size_t numBytes=0) override
 Puts the contents of d into the Chunk with the given key. More...
 
AbstractBufferalloc (const size_t numBytes) override
 
void free (AbstractBuffer *buffer) override
 
Page requestFreePage (size_t pagesize, const bool isMetadata)
 
MgrType getMgrType () override
 
std::string getStringMgrType () override
 
std::string printSlabs () override
 
void clearSlabs () override
 
size_t getMaxSize () override
 
size_t getInUseSize () override
 
size_t getAllocated () override
 
bool isAllocationCapped () override
 
FileInfogetFileInfoForFileId (const int fileId)
 
void init (const size_t num_reader_threads)
 
void init (const std::string dataPathToConvertFrom)
 
void copyPage (Page &srcPage, FileMgr *destFileMgr, Page &destPage, const size_t reservedHeaderSize, const size_t numBytes, const size_t offset)
 
void requestFreePages (size_t npages, size_t pagesize, std::vector< Page > &pages, const bool isMetadata)
 Obtains free pages – creates new files if necessary – of the requested size. More...
 
void getChunkMetadataVec (std::vector< std::pair< ChunkKey, ChunkMetadata >> &chunkMetadataVec) override
 
void getChunkMetadataVecForKeyPrefix (std::vector< std::pair< ChunkKey, ChunkMetadata >> &chunkMetadataVec, const ChunkKey &keyPrefix) override
 
void checkpoint () override
 Fsyncs data files, writes out epoch and fsyncs that. More...
 
void checkpoint (const int db_id, const int tb_id) override
 
int epoch ()
 Returns current value of epoch - should be one greater than recorded at last checkpoint. More...
 
size_t getNumReaderThreads ()
 Returns number of threads defined by parameter num-reader-threads which should be used during initial load and consequent read of data. More...
 
FILE * getFileForFileId (const int fileId)
 Returns FILE pointer associated with requested fileId. More...
 
size_t getNumChunks () override
 
int getDBVersion () const
 Index for looking up chunks. More...
 
bool getDBConvert () const
 
void createTopLevelMetadata ()
 
std::string getFileMgrBasePath () const
 
void closeRemovePhysical ()
 
void free_page (std::pair< FileInfo *, int > &&page)
 
const std::pair< const int,
const int > 
get_fileMgrKey () const
 

Public Attributes

ChunkKeyToChunkMap chunkIndex_
 

Private Member Functions

FileInfocreateFile (const size_t pageSize, const size_t numPages)
 Adds a file to the file manager repository. More...
 
FileInfoopenExistingFile (const std::string &path, const int fileId, const size_t pageSize, const size_t numPages, std::vector< HeaderInfo > &headerVec)
 
void createEpochFile (const std::string &epochFileName)
 
void openEpochFile (const std::string &epochFileName)
 
void writeAndSyncEpochToDisk ()
 
void createDBMetaFile (const std::string &DBMetaFileName)
 
bool openDBMetaFile (const std::string &DBMetaFileName)
 
void writeAndSyncDBMetaToDisk ()
 
void setEpoch (int epoch)
 
void processFileFutures (std::vector< std::future< std::vector< HeaderInfo >>> &file_futures, std::vector< HeaderInfo > &headerVec)
 

Private Attributes

GlobalFileMgrgfm_
 
std::pair< const int, const int > fileMgrKey_
 Global FileMgr. More...
 
std::string fileMgrBasePath_
 
std::vector< FileInfo * > files_
 
PageSizeFileMMap fileIndex_
 A vector of files accessible via a file identifier. More...
 
size_t num_reader_threads_
 Maps page sizes to FileInfo objects. More...
 
size_t defaultPageSize_
 number of threads used when loading data More...
 
unsigned nextFileId_
 
int epoch_
 the index of the next file id More...
 
FILE * epochFile_
 the current epoch (time of last checkpoint) More...
 
int db_version_
 
FILE * DBMetaFile_
 
std::mutex getPageMutex_
 pointer to DB level metadata More...
 
mapd_shared_mutex chunkIndexMutex_
 
mapd_shared_mutex files_rw_mutex_
 
mapd_shared_mutex mutex_free_page
 
std::vector< std::pair
< FileInfo *, int > > 
free_pages
 

Friends

class GlobalFileMgr
 

Detailed Description

Definition at line 85 of file FileMgr.h.

Constructor & Destructor Documentation

File_Namespace::FileMgr::FileMgr ( const int  deviceId,
GlobalFileMgr gfm,
const std::pair< const int, const int >  fileMgrKey,
const size_t  num_reader_threads = 0,
const int  epoch = -1,
const size_t  defaultPageSize = 2097152 
)

Constructor.

Definition at line 67 of file FileMgr.cpp.

References init().

73  : AbstractBufferMgr(deviceId)
74  , gfm_(gfm)
75  , fileMgrKey_(fileMgrKey)
76  , defaultPageSize_(defaultPageSize)
77  , nextFileId_(0)
78  , epoch_(epoch) {
79  init(num_reader_threads);
80 }
GlobalFileMgr * gfm_
Definition: FileMgr.h:237
int epoch()
Returns current value of epoch - should be one greater than recorded at last checkpoint.
Definition: FileMgr.h:204
void init(const size_t num_reader_threads)
Definition: FileMgr.cpp:124
int epoch_
the index of the next file id
Definition: FileMgr.h:246
size_t defaultPageSize_
number of threads used when loading data
Definition: FileMgr.h:244
std::pair< const int, const int > fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:238

+ Here is the call graph for this function:

File_Namespace::FileMgr::FileMgr ( const int  deviceId,
GlobalFileMgr gfm,
const std::pair< const int, const int >  fileMgrKey,
const bool  initOnly 
)

Definition at line 83 of file FileMgr.cpp.

References epochFile_, fileMgrBasePath_, fileMgrKey_, files_, File_Namespace::GlobalFileMgr::getBasePath(), gfm_, and to_string().

87  : AbstractBufferMgr(deviceId)
88  , gfm_(gfm)
89  , fileMgrKey_(fileMgrKey)
90  , defaultPageSize_(0)
91  , nextFileId_(0)
92  , epoch_(0) {
93  const std::string fileMgrDirPrefix("table");
94  const std::string FileMgrDirDelim("_");
95  fileMgrBasePath_ = (gfm_->getBasePath() + fileMgrDirPrefix + FileMgrDirDelim +
96  std::to_string(fileMgrKey_.first) + // db_id
97  FileMgrDirDelim + std::to_string(fileMgrKey_.second)); // tb_id
98  epochFile_ = nullptr;
99  files_.clear();
100 }
std::string getBasePath() const
GlobalFileMgr * gfm_
Definition: FileMgr.h:237
std::string fileMgrBasePath_
Definition: FileMgr.h:239
std::string to_string(char const *&&v)
FILE * epochFile_
the current epoch (time of last checkpoint)
Definition: FileMgr.h:247
std::vector< FileInfo * > files_
Definition: FileMgr.h:241
int epoch_
the index of the next file id
Definition: FileMgr.h:246
size_t defaultPageSize_
number of threads used when loading data
Definition: FileMgr.h:244
std::pair< const int, const int > fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:238

+ Here is the call graph for this function:

File_Namespace::FileMgr::FileMgr ( GlobalFileMgr gfm,
const size_t  defaultPageSize,
std::string  basePath 
)

Definition at line 102 of file FileMgr.cpp.

References init().

103  : AbstractBufferMgr(0)
104  , gfm_(gfm)
105  , fileMgrKey_(0, 0)
106  , fileMgrBasePath_(basePath)
107  , defaultPageSize_(defaultPageSize)
108  , nextFileId_(0)
109  , epoch_(-1) {
110  init(basePath);
111 }
GlobalFileMgr * gfm_
Definition: FileMgr.h:237
std::string fileMgrBasePath_
Definition: FileMgr.h:239
void init(const size_t num_reader_threads)
Definition: FileMgr.cpp:124
int epoch_
the index of the next file id
Definition: FileMgr.h:246
size_t defaultPageSize_
number of threads used when loading data
Definition: FileMgr.h:244
std::pair< const int, const int > fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:238

+ Here is the call graph for this function:

File_Namespace::FileMgr::~FileMgr ( )
override

Destructor.

Definition at line 113 of file FileMgr.cpp.

References chunkIndex_, and files_.

113  {
114  // checkpoint();
115  // free memory used by FileInfo objects
116  for (auto chunkIt = chunkIndex_.begin(); chunkIt != chunkIndex_.end(); ++chunkIt) {
117  delete chunkIt->second;
118  }
119  for (auto file_info : files_) {
120  delete file_info;
121  }
122 }
std::vector< FileInfo * > files_
Definition: FileMgr.h:241
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:225

Member Function Documentation

AbstractBuffer * File_Namespace::FileMgr::alloc ( const size_t  numBytes = 0)
override

Definition at line 793 of file FileMgr.cpp.

References logger::FATAL, and LOG.

793  {
794  LOG(FATAL) << "Operation not supported";
795  return nullptr; // satisfy return-type warning
796 }
#define LOG(tag)
Definition: Logger.h:188
void File_Namespace::FileMgr::checkpoint ( )
override

Fsyncs data files, writes out epoch and fsyncs that.

Definition at line 580 of file FileMgr.cpp.

References chunkIndex_, chunkIndexMutex_, epoch_, logger::FATAL, files_, files_rw_mutex_, free_page(), free_pages, LOG, mutex_free_page, VLOG, and writeAndSyncEpochToDisk().

580  {
581  VLOG(2) << "Checkpointing epoch: " << epoch_;
582  mapd_unique_lock<mapd_shared_mutex> chunkIndexWriteLock(chunkIndexMutex_);
583  for (auto chunkIt = chunkIndex_.begin(); chunkIt != chunkIndex_.end(); ++chunkIt) {
584  /*
585  for (auto vecIt = chunkIt->first.begin(); vecIt != chunkIt->first.end(); ++vecIt) {
586  std::cout << *vecIt << ",";
587  }
588  cout << "Is dirty: " << chunkIt->second->isDirty_ << endl;
589  */
590  if (chunkIt->second->is_dirty_) {
591  chunkIt->second->writeMetadata(epoch_);
592  chunkIt->second->clearDirtyBits();
593  }
594  }
595  chunkIndexWriteLock.unlock();
596 
597  mapd_shared_lock<mapd_shared_mutex> read_lock(files_rw_mutex_);
598  for (auto fileIt = files_.begin(); fileIt != files_.end(); ++fileIt) {
599  int status = (*fileIt)->syncToDisk();
600  if (status != 0) {
601  LOG(FATAL) << "Could not sync file to disk";
602  }
603  }
604 
606 
607  mapd_unique_lock<mapd_shared_mutex> freePagesWriteLock(mutex_free_page);
608  for (auto& free_page : free_pages) {
609  free_page.first->freePageDeferred(free_page.second);
610  }
611  free_pages.clear();
612 }
#define LOG(tag)
Definition: Logger.h:188
std::vector< std::pair< FileInfo *, int > > free_pages
Definition: FileMgr.h:257
void free_page(std::pair< FileInfo *, int > &&page)
Definition: FileMgr.cpp:1000
std::vector< FileInfo * > files_
Definition: FileMgr.h:241
int epoch_
the index of the next file id
Definition: FileMgr.h:246
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:225
void writeAndSyncEpochToDisk()
Definition: FileMgr.cpp:521
mapd_shared_mutex mutex_free_page
Definition: FileMgr.h:256
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:253
mapd_shared_mutex files_rw_mutex_
Definition: FileMgr.h:254
#define VLOG(n)
Definition: Logger.h:291

+ Here is the call graph for this function:

void File_Namespace::FileMgr::checkpoint ( const int  db_id,
const int  tb_id 
)
inlineoverride

Definition at line 197 of file FileMgr.h.

References logger::FATAL, and LOG.

197  {
198  LOG(FATAL) << "Operation not supported, api checkpoint() should be used instead";
199  }
#define LOG(tag)
Definition: Logger.h:188
void File_Namespace::FileMgr::clearSlabs ( )
inlineoverride

Definition at line 148 of file FileMgr.h.

148  { /* noop */
149  }
void File_Namespace::FileMgr::closeRemovePhysical ( )

Definition at line 451 of file FileMgr.cpp.

References File_Namespace::close(), epochFile_, files_, getFileMgrBasePath(), and File_Namespace::renameForDelete().

Referenced by File_Namespace::GlobalFileMgr::removeTableRelatedDS().

451  {
452  for (auto file_info : files_) {
453  if (file_info->f) {
454  close(file_info->f);
455  file_info->f = nullptr;
456  }
457  }
458 
459  if (epochFile_) {
460  close(epochFile_);
461  epochFile_ = nullptr;
462  }
463 
464  /* rename for later deletion the directory containing table related data */
466 }
std::string getFileMgrBasePath() const
Definition: FileMgr.h:230
FILE * epochFile_
the current epoch (time of last checkpoint)
Definition: FileMgr.h:247
std::vector< FileInfo * > files_
Definition: FileMgr.h:241
void close(FILE *f)
Closes the file pointed to by the FILE pointer.
Definition: File.cpp:101
void renameForDelete(const std::string directoryName)
Renames a directory to DELETE_ME_&lt;EPOCH&gt;_&lt;oldname&gt;.
Definition: File.cpp:182

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::copyPage ( Page srcPage,
FileMgr destFileMgr,
Page destPage,
const size_t  reservedHeaderSize,
const size_t  numBytes,
const size_t  offset 
)

Definition at line 468 of file FileMgr.cpp.

References CHECK(), defaultPageSize_, File_Namespace::Page::fileId, getFileInfoForFileId(), File_Namespace::Page::pageNum, File_Namespace::FileInfo::read(), and File_Namespace::FileInfo::write().

Referenced by init().

473  {
474  CHECK(offset + numBytes <= defaultPageSize_);
475  FileInfo* srcFileInfo = getFileInfoForFileId(srcPage.fileId);
476  FileInfo* destFileInfo = destFileMgr->getFileInfoForFileId(destPage.fileId);
477  int8_t* buffer = new int8_t[numBytes];
478 
479  size_t bytesRead = srcFileInfo->read(
480  srcPage.pageNum * defaultPageSize_ + offset + reservedHeaderSize, numBytes, buffer);
481  CHECK(bytesRead == numBytes);
482  size_t bytesWritten = destFileInfo->write(
483  destPage.pageNum * defaultPageSize_ + offset + reservedHeaderSize,
484  numBytes,
485  buffer);
486  CHECK(bytesWritten == numBytes);
487  delete[] buffer;
488 }
FileInfo * getFileInfoForFileId(const int fileId)
Definition: FileMgr.h:155
CHECK(cgen_state)
size_t defaultPageSize_
number of threads used when loading data
Definition: FileMgr.h:244

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

AbstractBuffer * File_Namespace::FileMgr::createBuffer ( const ChunkKey key,
size_t  pageSize = 0,
const size_t  numBytes = 0 
)
override

Creates a chunk with the specified key and page size.

Todo:
Make all accesses to chunkIndex_ thread-safe

Definition at line 614 of file FileMgr.cpp.

References chunkIndex_, chunkIndexMutex_, defaultPageSize_, logger::FATAL, LOG, and showChunk().

Referenced by putBuffer().

616  {
617  size_t actualPageSize = pageSize;
618  if (actualPageSize == 0) {
619  actualPageSize = defaultPageSize_;
620  }
622  // we will do this lazily and not allocate space for the Chunk (i.e.
623  // FileBuffer yet)
624  mapd_unique_lock<mapd_shared_mutex> chunkIndexWriteLock(chunkIndexMutex_);
625 
626  if (chunkIndex_.find(key) != chunkIndex_.end()) {
627  LOG(FATAL) << "Chunk already exists for key: " << showChunk(key);
628  }
629  chunkIndex_[key] = new FileBuffer(this, actualPageSize, key, numBytes);
630  chunkIndexWriteLock.unlock();
631  return (chunkIndex_[key]);
632 }
#define LOG(tag)
Definition: Logger.h:188
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:225
std::string showChunk(const ChunkKey &key)
Definition: types.h:37
size_t defaultPageSize_
number of threads used when loading data
Definition: FileMgr.h:244
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:253

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::createDBMetaFile ( const std::string &  DBMetaFileName)
private

Definition at line 539 of file FileMgr.cpp.

References File_Namespace::create(), DBMetaFile_, logger::FATAL, fileMgrBasePath_, getDBVersion(), LOG, and File_Namespace::write().

Referenced by createTopLevelMetadata().

539  {
540  std::string DBMetaFilePath(fileMgrBasePath_ + "/" + DBMetaFileName);
541  if (boost::filesystem::exists(DBMetaFilePath)) {
542  LOG(FATAL) << "DB metadata file `" << DBMetaFilePath << "` already exists.";
543  }
544  DBMetaFile_ = create(DBMetaFilePath, sizeof(int));
545  int db_ver = getDBVersion();
546  write(DBMetaFile_, 0, sizeof(int), (int8_t*)&db_ver);
547  // LOG(INFO) << "DB metadata file has been created.";
548 }
#define LOG(tag)
Definition: Logger.h:188
int getDBVersion() const
Index for looking up chunks.
Definition: FileMgr.cpp:975
std::string fileMgrBasePath_
Definition: FileMgr.h:239
FILE * create(const std::string &basePath, const int fileId, const size_t pageSize, const size_t numPages)
Definition: File.cpp:34
size_t write(FILE *f, const size_t offset, const size_t size, int8_t *buf)
Writes the specified number of bytes to the offset position in file f from buf.
Definition: File.cpp:120

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::createEpochFile ( const std::string &  epochFileName)
private

Definition at line 490 of file FileMgr.cpp.

References File_Namespace::create(), epoch_, epochFile_, logger::FATAL, fileMgrBasePath_, LOG, and File_Namespace::write().

Referenced by init().

490  {
491  std::string epochFilePath(fileMgrBasePath_ + "/" + epochFileName);
492  if (boost::filesystem::exists(epochFilePath)) {
493  LOG(FATAL) << "Epoch file `" << epochFilePath << "` already exists";
494  }
495  epochFile_ = create(epochFilePath, sizeof(int));
496  // Write out current epoch to file - which if this
497  // function is being called should be 0
498  write(epochFile_, 0, sizeof(int), (int8_t*)&epoch_);
499  epoch_++;
500 }
#define LOG(tag)
Definition: Logger.h:188
std::string fileMgrBasePath_
Definition: FileMgr.h:239
FILE * create(const std::string &basePath, const int fileId, const size_t pageSize, const size_t numPages)
Definition: File.cpp:34
FILE * epochFile_
the current epoch (time of last checkpoint)
Definition: FileMgr.h:247
int epoch_
the index of the next file id
Definition: FileMgr.h:246
size_t write(FILE *f, const size_t offset, const size_t size, int8_t *buf)
Writes the specified number of bytes to the offset position in file f from buf.
Definition: File.cpp:120

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

FileInfo * File_Namespace::FileMgr::createFile ( const size_t  pageSize,
const size_t  numPages 
)
private

Adds a file to the file manager repository.

This method will create a FileInfo object for the file being added, and it will create the corresponding file on physical disk with the indicated number of pages pre-allocated.

A pointer to the FileInfo object is returned, which itself has a file pointer (FILE*) and a file identifier (int fileId).

Parameters
fileNameThe name given to the file in physical storage.
pageSizeThe logical page size for the pages in the file.
numPagesThe number of logical pages to initially allocate for the file.
Returns
FileInfo* A pointer to the FileInfo object of the added file.

Definition at line 890 of file FileMgr.cpp.

References CHECK(), File_Namespace::create(), logger::FATAL, fileIndex_, fileMgrBasePath_, files_, files_rw_mutex_, LOG, and nextFileId_.

Referenced by requestFreePage(), and requestFreePages().

890  {
891  // check arguments
892  if (pageSize == 0 || numPages == 0) {
893  LOG(FATAL) << "File creation failed: pageSize and numPages must be greater than 0.";
894  }
895 
896  // create the new file
897  FILE* f = create(fileMgrBasePath_,
898  nextFileId_,
899  pageSize,
900  numPages); // TM: not sure if I like naming scheme here - should be in
901  // separate namespace?
902  CHECK(f);
903 
904  // instantiate a new FileInfo for the newly created file
905  int fileId = nextFileId_++;
906  FileInfo* fInfo =
907  new FileInfo(this, fileId, f, pageSize, numPages, true); // true means init file
908  CHECK(fInfo);
909 
910  mapd_unique_lock<mapd_shared_mutex> write_lock(files_rw_mutex_);
911  // update file manager data structures
912  files_.push_back(fInfo);
913  fileIndex_.insert(std::pair<size_t, int>(pageSize, fileId));
914 
915  CHECK(files_.back() == fInfo); // postcondition
916  return fInfo;
917 }
#define LOG(tag)
Definition: Logger.h:188
std::string fileMgrBasePath_
Definition: FileMgr.h:239
FILE * create(const std::string &basePath, const int fileId, const size_t pageSize, const size_t numPages)
Definition: File.cpp:34
std::vector< FileInfo * > files_
Definition: FileMgr.h:241
CHECK(cgen_state)
PageSizeFileMMap fileIndex_
A vector of files accessible via a file identifier.
Definition: FileMgr.h:242
mapd_shared_mutex files_rw_mutex_
Definition: FileMgr.h:254

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::createTopLevelMetadata ( )

Definition at line 983 of file FileMgr.cpp.

References createDBMetaFile(), DB_META_FILENAME, db_version_, logger::FATAL, getDBVersion(), LOG, and openDBMetaFile().

983  {
985  if (db_version_ > getDBVersion()) {
986  LOG(FATAL) << "DB forward compatibility is not supported. Version of OmniSci "
987  "software used is older than the version of DB being read: "
988  << db_version_;
989  }
990  } else {
992  }
993 }
#define LOG(tag)
Definition: Logger.h:188
int getDBVersion() const
Index for looking up chunks.
Definition: FileMgr.cpp:975
void createDBMetaFile(const std::string &DBMetaFileName)
Definition: FileMgr.cpp:539
#define DB_META_FILENAME
Definition: FileMgr.cpp:43
bool openDBMetaFile(const std::string &DBMetaFileName)
Definition: FileMgr.cpp:550

+ Here is the call graph for this function:

void File_Namespace::FileMgr::deleteBuffer ( const ChunkKey key,
const bool  purge = true 
)
override

Deletes the chunk with the specified key.

Definition at line 639 of file FileMgr.cpp.

References chunkIndex_, chunkIndexMutex_, logger::FATAL, LOG, and showChunk().

639  {
640  mapd_unique_lock<mapd_shared_mutex> chunkIndexWriteLock(chunkIndexMutex_);
641  auto chunkIt = chunkIndex_.find(key);
642  // ensure the Chunk exists
643  if (chunkIt == chunkIndex_.end()) {
644  LOG(FATAL) << "Chunk does not exist for key: " << showChunk(key);
645  }
646  chunkIndexWriteLock.unlock();
647  // chunkIt->second->writeMetadata(-1); // writes -1 as epoch - signifies deleted
648  if (purge) {
649  chunkIt->second->freePages();
650  }
651  //@todo need a way to represent delete in non purge case
652  delete chunkIt->second;
653  chunkIndex_.erase(chunkIt);
654 }
#define LOG(tag)
Definition: Logger.h:188
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:225
std::string showChunk(const ChunkKey &key)
Definition: types.h:37
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:253

+ Here is the call graph for this function:

void File_Namespace::FileMgr::deleteBuffersWithPrefix ( const ChunkKey keyPrefix,
const bool  purge = true 
)
override

Definition at line 656 of file FileMgr.cpp.

References chunkIndex_, and chunkIndexMutex_.

656  {
657  mapd_unique_lock<mapd_shared_mutex> chunkIndexWriteLock(chunkIndexMutex_);
658  auto chunkIt = chunkIndex_.lower_bound(keyPrefix);
659  if (chunkIt == chunkIndex_.end()) {
660  return; // should we throw?
661  }
662  while (chunkIt != chunkIndex_.end() &&
663  std::search(chunkIt->first.begin(),
664  chunkIt->first.begin() + keyPrefix.size(),
665  keyPrefix.begin(),
666  keyPrefix.end()) != chunkIt->first.begin() + keyPrefix.size()) {
667  /*
668  cout << "Freeing pages for chunk ";
669  for (auto vecIt = chunkIt->first.begin(); vecIt != chunkIt->first.end(); ++vecIt) {
670  std::cout << *vecIt << ",";
671  }
672  cout << endl;
673  */
674  if (purge) {
675  chunkIt->second->freePages();
676  }
677  //@todo need a way to represent delete in non purge case
678  delete chunkIt->second;
679  chunkIndex_.erase(chunkIt++);
680  }
681 }
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:225
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:253
int File_Namespace::FileMgr::epoch ( )
inline

Returns current value of epoch - should be one greater than recorded at last checkpoint.

Definition at line 204 of file FileMgr.h.

Referenced by File_Namespace::FileBuffer::append(), File_Namespace::FileInfo::freePage(), File_Namespace::FileInfo::openExistingFile(), File_Namespace::FileBuffer::reserve(), setEpoch(), and File_Namespace::FileBuffer::write().

204 { return epoch_; }
int epoch_
the index of the next file id
Definition: FileMgr.h:246

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::fetchBuffer ( const ChunkKey key,
AbstractBuffer destBuffer,
const size_t  numBytes 
)
override

Definition at line 692 of file FileMgr.cpp.

References chunkIndex_, chunkIndexMutex_, logger::FATAL, Data_Namespace::AbstractBuffer::getDeviceId(), Data_Namespace::AbstractBuffer::getMemoryPtr(), Data_Namespace::AbstractBuffer::getType(), Data_Namespace::AbstractBuffer::isDirty(), Data_Namespace::AbstractBuffer::isUpdated(), LOG, Data_Namespace::AbstractBuffer::read(), Data_Namespace::AbstractBuffer::reserve(), Data_Namespace::AbstractBuffer::setSize(), showChunk(), Data_Namespace::AbstractBuffer::size(), and Data_Namespace::AbstractBuffer::syncEncoder().

694  {
695  // reads chunk specified by ChunkKey into AbstractBuffer provided by
696  // destBuffer
697  if (destBuffer->isDirty()) {
698  LOG(FATAL)
699  << "Aborting attempt to fetch a chunk marked dirty. Chunk inconsistency for key: "
700  << showChunk(key);
701  }
702  mapd_shared_lock<mapd_shared_mutex> chunkIndexReadLock(chunkIndexMutex_);
703  auto chunkIt = chunkIndex_.find(key);
704  if (chunkIt == chunkIndex_.end()) {
705  LOG(FATAL) << "Chunk does not exist for key: " << showChunk(key);
706  }
707  chunkIndexReadLock.unlock();
708 
709  AbstractBuffer* chunk = chunkIt->second;
710  // ChunkSize is either specified in function call with numBytes or we
711  // just look at pageSize * numPages in FileBuffer
712  size_t chunkSize = numBytes == 0 ? chunk->size() : numBytes;
713  if (numBytes > 0 && numBytes > chunk->size()) {
714  LOG(FATAL) << "Chunk retrieved for key `" << showChunk(key) << "` is smaller ("
715  << chunk->size() << ") than number of bytes requested (" << numBytes
716  << ")";
717  }
718  destBuffer->reserve(chunkSize);
719  // std::cout << "After reserve chunksize: " << chunkSize << std::endl;
720  if (chunk->isUpdated()) {
721  chunk->read(destBuffer->getMemoryPtr(),
722  chunkSize,
723  0,
724  destBuffer->getType(),
725  destBuffer->getDeviceId());
726  } else {
727  chunk->read(destBuffer->getMemoryPtr() + destBuffer->size(),
728  chunkSize - destBuffer->size(),
729  destBuffer->size(),
730  destBuffer->getType(),
731  destBuffer->getDeviceId());
732  }
733  destBuffer->setSize(chunkSize);
734  destBuffer->syncEncoder(chunk);
735 }
void syncEncoder(const AbstractBuffer *src_buffer)
#define LOG(tag)
Definition: Logger.h:188
virtual size_t size() const =0
virtual int8_t * getMemoryPtr()=0
virtual MemoryLevel getType() const =0
virtual bool isDirty() const
virtual void read(int8_t *const dst, const size_t num_bytes, const size_t offset=0, const MemoryLevel dst_buffer_type=CPU_LEVEL, const int dst_device_id=-1)=0
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:225
An AbstractBuffer is a unit of data management for a data manager.
std::string showChunk(const ChunkKey &key)
Definition: types.h:37
virtual bool isUpdated() const
void setSize(const size_t size)
virtual int getDeviceId() const
virtual void reserve(size_t num_bytes)=0
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:253

+ Here is the call graph for this function:

void File_Namespace::FileMgr::free ( AbstractBuffer buffer)
override

Definition at line 798 of file FileMgr.cpp.

References logger::FATAL, and LOG.

798  {
799  LOG(FATAL) << "Operation not supported";
800 }
#define LOG(tag)
Definition: Logger.h:188
void File_Namespace::FileMgr::free_page ( std::pair< FileInfo *, int > &&  page)

Definition at line 1000 of file FileMgr.cpp.

References free_pages, and mutex_free_page.

Referenced by checkpoint(), and File_Namespace::FileInfo::freePage().

1000  {
1001  std::unique_lock<mapd_shared_mutex> lock(mutex_free_page);
1002  free_pages.push_back(page);
1003 }
std::vector< std::pair< FileInfo *, int > > free_pages
Definition: FileMgr.h:257
mapd_shared_mutex mutex_free_page
Definition: FileMgr.h:256

+ Here is the caller graph for this function:

const std::pair<const int, const int> File_Namespace::FileMgr::get_fileMgrKey ( ) const
inline

Definition at line 234 of file FileMgr.h.

Referenced by File_Namespace::FileInfo::openExistingFile().

234 { return fileMgrKey_; }
std::pair< const int, const int > fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:238

+ Here is the caller graph for this function:

size_t File_Namespace::FileMgr::getAllocated ( )
inlineoverride

Definition at line 152 of file FileMgr.h.

152 { return 0; }
AbstractBuffer * File_Namespace::FileMgr::getBuffer ( const ChunkKey key,
const size_t  numBytes = 0 
)
override

Returns the a pointer to the chunk with the specified key.

Definition at line 683 of file FileMgr.cpp.

References chunkIndex_, chunkIndexMutex_, logger::FATAL, LOG, and showChunk().

683  {
684  mapd_shared_lock<mapd_shared_mutex> chunkIndexReadLock(chunkIndexMutex_);
685  auto chunkIt = chunkIndex_.find(key);
686  if (chunkIt == chunkIndex_.end()) {
687  LOG(FATAL) << "Chunk does not exist for key: " << showChunk(key);
688  }
689  return chunkIt->second;
690 }
#define LOG(tag)
Definition: Logger.h:188
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:225
std::string showChunk(const ChunkKey &key)
Definition: types.h:37
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:253

+ Here is the call graph for this function:

void File_Namespace::FileMgr::getChunkMetadataVec ( std::vector< std::pair< ChunkKey, ChunkMetadata >> &  chunkMetadataVec)
override

Definition at line 932 of file FileMgr.cpp.

References chunkIndex_, and chunkIndexMutex_.

933  {
934  mapd_unique_lock<mapd_shared_mutex> chunkIndexWriteLock(chunkIndexMutex_);
935  chunkMetadataVec.reserve(chunkIndex_.size());
936  for (auto chunkIt = chunkIndex_.begin(); chunkIt != chunkIndex_.end(); ++chunkIt) {
937  if (chunkIt->second->has_encoder) {
938  ChunkMetadata chunkMetadata;
939  chunkIt->second->encoder->getMetadata(chunkMetadata);
940  chunkMetadataVec.emplace_back(chunkIt->first, chunkMetadata);
941  }
942  }
943 }
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:225
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:253
void File_Namespace::FileMgr::getChunkMetadataVecForKeyPrefix ( std::vector< std::pair< ChunkKey, ChunkMetadata >> &  chunkMetadataVec,
const ChunkKey keyPrefix 
)
override

Definition at line 945 of file FileMgr.cpp.

References chunkIndex_, and chunkIndexMutex_.

947  {
948  mapd_unique_lock<mapd_shared_mutex> chunkIndexWriteLock(
949  chunkIndexMutex_); // is this guarding the right structure? it look slike we oly
950  // read here for chunk
951  auto chunkIt = chunkIndex_.lower_bound(keyPrefix);
952  if (chunkIt == chunkIndex_.end()) {
953  return; // throw?
954  }
955  while (chunkIt != chunkIndex_.end() &&
956  std::search(chunkIt->first.begin(),
957  chunkIt->first.begin() + keyPrefix.size(),
958  keyPrefix.begin(),
959  keyPrefix.end()) != chunkIt->first.begin() + keyPrefix.size()) {
960  /*
961  for (auto vecIt = chunkIt->first.begin(); vecIt != chunkIt->first.end(); ++vecIt) {
962  std::cout << *vecIt << ",";
963  }
964  cout << endl;
965  */
966  if (chunkIt->second->has_encoder) {
967  ChunkMetadata chunkMetadata;
968  chunkIt->second->encoder->getMetadata(chunkMetadata);
969  chunkMetadataVec.emplace_back(chunkIt->first, chunkMetadata);
970  }
971  chunkIt++;
972  }
973 }
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:225
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:253
bool File_Namespace::FileMgr::getDBConvert ( ) const

Definition at line 979 of file FileMgr.cpp.

References File_Namespace::GlobalFileMgr::getDBConvert(), and gfm_.

979  {
980  return gfm_->getDBConvert();
981 }
GlobalFileMgr * gfm_
Definition: FileMgr.h:237

+ Here is the call graph for this function:

int File_Namespace::FileMgr::getDBVersion ( ) const

Index for looking up chunks.

Definition at line 975 of file FileMgr.cpp.

References File_Namespace::GlobalFileMgr::getDBVersion(), and gfm_.

Referenced by createDBMetaFile(), createTopLevelMetadata(), and writeAndSyncDBMetaToDisk().

975  {
976  return gfm_->getDBVersion();
977 }
GlobalFileMgr * gfm_
Definition: FileMgr.h:237

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

FILE * File_Namespace::FileMgr::getFileForFileId ( const int  fileId)

Returns FILE pointer associated with requested fileId.

See Also
FileBuffer

Definition at line 919 of file FileMgr.cpp.

References CHECK(), and files_.

Referenced by File_Namespace::FileBuffer::readMetadata(), and File_Namespace::FileBuffer::writeMetadata().

919  {
920  CHECK(fileId >= 0 && static_cast<size_t>(fileId) < files_.size());
921  return files_[fileId]->f;
922 }
std::vector< FileInfo * > files_
Definition: FileMgr.h:241
CHECK(cgen_state)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

FileInfo* File_Namespace::FileMgr::getFileInfoForFileId ( const int  fileId)
inline

Definition at line 155 of file FileMgr.h.

Referenced by File_Namespace::FileBuffer::append(), File_Namespace::FileBuffer::copyPage(), copyPage(), File_Namespace::FileBuffer::freePages(), File_Namespace::readForThread(), File_Namespace::FileBuffer::write(), and File_Namespace::FileBuffer::writeHeader().

155 { return files_[fileId]; }
std::vector< FileInfo * > files_
Definition: FileMgr.h:241

+ Here is the caller graph for this function:

std::string File_Namespace::FileMgr::getFileMgrBasePath ( ) const
inline

Definition at line 230 of file FileMgr.h.

Referenced by closeRemovePhysical().

230 { return fileMgrBasePath_; }
std::string fileMgrBasePath_
Definition: FileMgr.h:239

+ Here is the caller graph for this function:

size_t File_Namespace::FileMgr::getInUseSize ( )
inlineoverride

Definition at line 151 of file FileMgr.h.

151 { return 0; }
size_t File_Namespace::FileMgr::getMaxSize ( )
inlineoverride

Definition at line 150 of file FileMgr.h.

150 { return 0; }
MgrType File_Namespace::FileMgr::getMgrType ( )
inlineoverride

Definition at line 145 of file FileMgr.h.

145 { return FILE_MGR; };
size_t File_Namespace::FileMgr::getNumChunks ( )
inlineoverride

Definition at line 221 of file FileMgr.h.

221  {
222  // @todo should be locked - but this is more for testing now
223  return chunkIndex_.size();
224  }
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:225
size_t File_Namespace::FileMgr::getNumReaderThreads ( )
inline

Returns number of threads defined by parameter num-reader-threads which should be used during initial load and consequent read of data.

Definition at line 210 of file FileMgr.h.

Referenced by File_Namespace::FileBuffer::read().

210 { return num_reader_threads_; }
size_t num_reader_threads_
Maps page sizes to FileInfo objects.
Definition: FileMgr.h:243

+ Here is the caller graph for this function:

std::string File_Namespace::FileMgr::getStringMgrType ( )
inlineoverride

Definition at line 146 of file FileMgr.h.

146 { return ToString(FILE_MGR); }
void File_Namespace::FileMgr::init ( const size_t  num_reader_threads)

Definition at line 124 of file FileMgr.cpp.

References CHECK_EQ, chunkIndex_, createEpochFile(), epoch_, EPOCH_FILENAME, logger::FATAL, anonymous_namespace{StringDictionary.cpp}::file_size(), fileMgrBasePath_, fileMgrKey_, File_Namespace::fileSize(), File_Namespace::GlobalFileMgr::getBasePath(), gfm_, File_Namespace::headerCompare(), logger::INFO, LOG, MAPD_FILE_EXT, nextFileId_, num_reader_threads_, openEpochFile(), openExistingFile(), processFileFutures(), timer_start(), timer_stop(), to_string(), and VLOG.

Referenced by FileMgr().

124  {
125  // if epoch = -1 this means open from epoch file
126  const std::string fileMgrDirPrefix("table");
127  const std::string FileMgrDirDelim("_");
128  fileMgrBasePath_ = (gfm_->getBasePath() + fileMgrDirPrefix + FileMgrDirDelim +
129  std::to_string(fileMgrKey_.first) + // db_id
130  FileMgrDirDelim + std::to_string(fileMgrKey_.second)); // tb_id
131  boost::filesystem::path path(fileMgrBasePath_);
132  if (boost::filesystem::exists(path)) {
133  if (!boost::filesystem::is_directory(path)) {
134  LOG(FATAL) << "Specified path '" << fileMgrBasePath_
135  << "' for table data is not a directory.";
136  }
137  if (epoch_ != -1) { // if opening at previous epoch
138  int epochCopy = epoch_;
140  epoch_ = epochCopy;
141  } else {
143  }
144 
145  auto clock_begin = timer_start();
146 
147  boost::filesystem::directory_iterator
148  endItr; // default construction yields past-the-end
149  int maxFileId = -1;
150  int fileCount = 0;
151  int threadCount = std::thread::hardware_concurrency();
152  std::vector<HeaderInfo> headerVec;
153  std::vector<std::future<std::vector<HeaderInfo>>> file_futures;
154  for (boost::filesystem::directory_iterator fileIt(path); fileIt != endItr; ++fileIt) {
155  if (boost::filesystem::is_regular_file(fileIt->status())) {
156  // note that boost::filesystem leaves preceding dot on
157  // extension - hence MAPD_FILE_EXT is ".mapd"
158  std::string extension(fileIt->path().extension().string());
159 
160  if (extension == MAPD_FILE_EXT) {
161  std::string fileStem(fileIt->path().stem().string());
162  // remove trailing dot if any
163  if (fileStem.size() > 0 && fileStem.back() == '.') {
164  fileStem = fileStem.substr(0, fileStem.size() - 1);
165  }
166  size_t dotPos = fileStem.find_last_of("."); // should only be one
167  if (dotPos == std::string::npos) {
168  LOG(FATAL) << "File `" << fileIt->path()
169  << "` does not carry page size information in the filename.";
170  }
171  int fileId = boost::lexical_cast<int>(fileStem.substr(0, dotPos));
172  if (fileId > maxFileId) {
173  maxFileId = fileId;
174  }
175  size_t pageSize =
176  boost::lexical_cast<size_t>(fileStem.substr(dotPos + 1, fileStem.size()));
177  std::string filePath(fileIt->path().string());
178  size_t fileSize = boost::filesystem::file_size(filePath);
179  CHECK_EQ(fileSize % pageSize, size_t(0)); // should be no partial pages
180  size_t numPages = fileSize / pageSize;
181 
182  VLOG(4) << "File id: " << fileId << " Page size: " << pageSize
183  << " Num pages: " << numPages;
184 
185  file_futures.emplace_back(std::async(
186  std::launch::async, [filePath, fileId, pageSize, numPages, this] {
187  std::vector<HeaderInfo> tempHeaderVec;
188  openExistingFile(filePath, fileId, pageSize, numPages, tempHeaderVec);
189  return tempHeaderVec;
190  }));
191  fileCount++;
192  if (fileCount % threadCount == 0) {
193  processFileFutures(file_futures, headerVec);
194  }
195  }
196  }
197  }
198 
199  if (file_futures.size() > 0) {
200  processFileFutures(file_futures, headerVec);
201  }
202  int64_t queue_time_ms = timer_stop(clock_begin);
203 
204  LOG(INFO) << "Completed Reading table's file metadata, Elapsed time : "
205  << queue_time_ms << "ms Epoch: " << epoch_ << " files read: " << fileCount
206  << " table location: '" << fileMgrBasePath_ << "'";
207 
208  /* Sort headerVec so that all HeaderInfos
209  * from a chunk will be grouped together
210  * and in order of increasing PageId
211  * - Version Epoch */
212 
213  std::sort(headerVec.begin(), headerVec.end(), headerCompare);
214 
215  /* Goal of next section is to find sequences in the
216  * sorted headerVec of the same ChunkId, which we
217  * can then initiate a FileBuffer with */
218 
219  VLOG(4) << "Number of Headers in Vector: " << headerVec.size();
220  if (headerVec.size() > 0) {
221  ChunkKey lastChunkKey = headerVec.begin()->chunkKey;
222  auto startIt = headerVec.begin();
223 
224  for (auto headerIt = headerVec.begin() + 1; headerIt != headerVec.end();
225  ++headerIt) {
226  // for (auto chunkIt = headerIt->chunkKey.begin(); chunkIt !=
227  // headerIt->chunkKey.end(); ++chunkIt) {
228  // std::cout << *chunkIt << " ";
229  //}
230 
231  if (headerIt->chunkKey != lastChunkKey) {
232  chunkIndex_[lastChunkKey] =
233  new FileBuffer(this, /*pageSize,*/ lastChunkKey, startIt, headerIt);
234  /*
235  if (startIt->versionEpoch != -1) {
236  cout << "not skipping bc version != -1" << endl;
237  // -1 means that chunk was deleted
238  // lets not read it in
239  chunkIndex_[lastChunkKey] = new FileBuffer
240  (this,/lastChunkKey,startIt,headerIt);
241 
242  }
243  else {
244  cout << "Skipping bc version == -1" << endl;
245  }
246  */
247  lastChunkKey = headerIt->chunkKey;
248  startIt = headerIt;
249  }
250  }
251  // now need to insert last Chunk
252  // size_t pageSize = files_[startIt->page.fileId]->pageSize;
253  // cout << "Inserting last chunk" << endl;
254  // if (startIt->versionEpoch != -1) {
255  chunkIndex_[lastChunkKey] =
256  new FileBuffer(this, /*pageSize,*/ lastChunkKey, startIt, headerVec.end());
257  //}
258  }
259  nextFileId_ = maxFileId + 1;
260  // std::cout << "next file id: " << nextFileId_ << std::endl;
261  } else {
262  if (!boost::filesystem::create_directory(path)) {
263  LOG(FATAL) << "Could not create data directory: " << path;
264  }
266  }
267 
268  /* define number of reader threads to be used */
269  size_t num_hardware_based_threads =
270  std::thread::hardware_concurrency(); // # of threads is based on # of cores on the
271  // host
272  if (num_reader_threads == 0) { // # of threads has not been defined by user
273  num_reader_threads_ = num_hardware_based_threads;
274  } else {
275  if (num_reader_threads > num_hardware_based_threads) {
276  num_reader_threads_ = num_hardware_based_threads;
277  } else {
278  num_reader_threads_ = num_reader_threads;
279  }
280  }
281 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
std::vector< int > ChunkKey
Definition: types.h:35
std::string getBasePath() const
void createEpochFile(const std::string &epochFileName)
Definition: FileMgr.cpp:490
void openEpochFile(const std::string &epochFileName)
Definition: FileMgr.cpp:502
#define LOG(tag)
Definition: Logger.h:188
#define MAPD_FILE_EXT
Definition: File.h:26
TypeR::rep timer_stop(Type clock_begin)
Definition: measure.h:46
GlobalFileMgr * gfm_
Definition: FileMgr.h:237
std::string fileMgrBasePath_
Definition: FileMgr.h:239
bool headerCompare(const HeaderInfo &firstElem, const HeaderInfo &secondElem)
Definition: FileMgr.cpp:49
std::string to_string(char const *&&v)
void processFileFutures(std::vector< std::future< std::vector< HeaderInfo >>> &file_futures, std::vector< HeaderInfo > &headerVec)
Definition: FileMgr.cpp:283
int epoch_
the index of the next file id
Definition: FileMgr.h:246
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:225
size_t num_reader_threads_
Maps page sizes to FileInfo objects.
Definition: FileMgr.h:243
size_t fileSize(FILE *f)
Returns the size of the specified file.
Definition: File.cpp:170
#define EPOCH_FILENAME
Definition: FileMgr.cpp:42
std::pair< const int, const int > fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:238
#define VLOG(n)
Definition: Logger.h:291
Type timer_start()
Definition: measure.h:40
FileInfo * openExistingFile(const std::string &path, const int fileId, const size_t pageSize, const size_t numPages, std::vector< HeaderInfo > &headerVec)
Definition: FileMgr.cpp:871

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::init ( const std::string  dataPathToConvertFrom)

Definition at line 297 of file FileMgr.cpp.

References CHECK(), chunkIndex_, copyPage(), epoch_, EPOCH_FILENAME, File_Namespace::MultiPage::epochs, logger::FATAL, anonymous_namespace{StringDictionary.cpp}::file_size(), File_Namespace::fileSize(), File_Namespace::GlobalFileMgr::getFileMgr(), File_Namespace::FileBuffer::getMultiPage(), gfm_, File_Namespace::headerCompare(), LOG, MAPD_FILE_EXT, File_Namespace::FileBuffer::multiPages_, nextFileId_, openEpochFile(), openExistingFile(), File_Namespace::FileBuffer::pageDataSize(), File_Namespace::FileBuffer::pageSize(), processFileFutures(), requestFreePage(), File_Namespace::FileBuffer::reservedHeaderSize(), Data_Namespace::AbstractBuffer::setDirty(), Data_Namespace::AbstractBuffer::setSize(), File_Namespace::FileBuffer::size(), Data_Namespace::AbstractBuffer::syncEncoder(), and File_Namespace::FileBuffer::writeHeader().

297  {
298  int converted_data_epoch = 0;
299  boost::filesystem::path path(dataPathToConvertFrom);
300  if (boost::filesystem::exists(path)) {
301  if (!boost::filesystem::is_directory(path)) {
302  LOG(FATAL) << "Specified path `" << path << "` is not a directory.";
303  }
304 
305  if (epoch_ != -1) { // if opening at previous epoch
306  int epochCopy = epoch_;
308  epoch_ = epochCopy;
309  } else {
311  }
312 
313  boost::filesystem::directory_iterator
314  endItr; // default construction yields past-the-end
315  int maxFileId = -1;
316  int fileCount = 0;
317  int threadCount = std::thread::hardware_concurrency();
318  std::vector<HeaderInfo> headerVec;
319  std::vector<std::future<std::vector<HeaderInfo>>> file_futures;
320  for (boost::filesystem::directory_iterator fileIt(path); fileIt != endItr; ++fileIt) {
321  if (boost::filesystem::is_regular_file(fileIt->status())) {
322  // note that boost::filesystem leaves preceding dot on
323  // extension - hence MAPD_FILE_EXT is ".mapd"
324  std::string extension(fileIt->path().extension().string());
325 
326  if (extension == MAPD_FILE_EXT) {
327  std::string fileStem(fileIt->path().stem().string());
328  // remove trailing dot if any
329  if (fileStem.size() > 0 && fileStem.back() == '.') {
330  fileStem = fileStem.substr(0, fileStem.size() - 1);
331  }
332  size_t dotPos = fileStem.find_last_of("."); // should only be one
333  if (dotPos == std::string::npos) {
334  LOG(FATAL) << "File `" + fileIt->path().string() +
335  "` does not carry page size information in the filename.";
336  }
337  int fileId = boost::lexical_cast<int>(fileStem.substr(0, dotPos));
338  if (fileId > maxFileId) {
339  maxFileId = fileId;
340  }
341  size_t pageSize =
342  boost::lexical_cast<size_t>(fileStem.substr(dotPos + 1, fileStem.size()));
343  std::string filePath(fileIt->path().string());
344  size_t fileSize = boost::filesystem::file_size(filePath);
345  CHECK(fileSize % pageSize == 0); // should be no partial pages
346  size_t numPages = fileSize / pageSize;
347 
348  file_futures.emplace_back(std::async(
349  std::launch::async, [filePath, fileId, pageSize, numPages, this] {
350  std::vector<HeaderInfo> tempHeaderVec;
351  openExistingFile(filePath, fileId, pageSize, numPages, tempHeaderVec);
352  return tempHeaderVec;
353  }));
354  fileCount++;
355  if (fileCount % threadCount) {
356  processFileFutures(file_futures, headerVec);
357  }
358  }
359  }
360  }
361 
362  if (file_futures.size() > 0) {
363  processFileFutures(file_futures, headerVec);
364  }
365 
366  /* Sort headerVec so that all HeaderInfos
367  * from a chunk will be grouped together
368  * and in order of increasing PageId
369  * - Version Epoch */
370 
371  std::sort(headerVec.begin(), headerVec.end(), headerCompare);
372 
373  /* Goal of next section is to find sequences in the
374  * sorted headerVec of the same ChunkId, which we
375  * can then initiate a FileBuffer with */
376 
377  if (headerVec.size() > 0) {
378  ChunkKey lastChunkKey = headerVec.begin()->chunkKey;
379  auto startIt = headerVec.begin();
380 
381  for (auto headerIt = headerVec.begin() + 1; headerIt != headerVec.end();
382  ++headerIt) {
383  if (headerIt->chunkKey != lastChunkKey) {
384  FileMgr* c_fm_ =
385  dynamic_cast<File_Namespace::FileMgr*>(gfm_->getFileMgr(lastChunkKey));
386  CHECK(c_fm_);
387  FileBuffer* srcBuf = new FileBuffer(this, lastChunkKey, startIt, headerIt);
388  chunkIndex_[lastChunkKey] = srcBuf;
389  FileBuffer* destBuf = new FileBuffer(c_fm_, srcBuf->pageSize(), lastChunkKey);
390  c_fm_->chunkIndex_[lastChunkKey] = destBuf;
391  destBuf->syncEncoder(srcBuf);
392  destBuf->setSize(srcBuf->size());
393  destBuf->setDirty(); // this needs to be set to force writing out metadata
394  // files from "checkpoint()" call
395 
396  size_t totalNumPages = srcBuf->getMultiPage().size();
397  for (size_t pageNum = 0; pageNum < totalNumPages; pageNum++) {
398  Page srcPage = srcBuf->getMultiPage()[pageNum].current();
399  Page destPage = c_fm_->requestFreePage(
400  srcBuf->pageSize(),
401  false); // may modify and use api "FileBuffer::addNewMultiPage" instead
402  MultiPage multiPage(srcBuf->pageSize());
403  multiPage.epochs.push_back(converted_data_epoch);
404  multiPage.pageVersions.push_back(destPage);
405  destBuf->multiPages_.push_back(multiPage);
406  size_t reservedHeaderSize = srcBuf->reservedHeaderSize();
407  copyPage(
408  srcPage, c_fm_, destPage, reservedHeaderSize, srcBuf->pageDataSize(), 0);
409  destBuf->writeHeader(destPage, pageNum, converted_data_epoch, false);
410  }
411  lastChunkKey = headerIt->chunkKey;
412  startIt = headerIt;
413  }
414  }
415 
416  // now need to insert last Chunk
417  FileMgr* c_fm_ =
418  dynamic_cast<File_Namespace::FileMgr*>(gfm_->getFileMgr(lastChunkKey));
419  FileBuffer* srcBuf = new FileBuffer(this, lastChunkKey, startIt, headerVec.end());
420  chunkIndex_[lastChunkKey] = srcBuf;
421  FileBuffer* destBuf = new FileBuffer(c_fm_, srcBuf->pageSize(), lastChunkKey);
422  c_fm_->chunkIndex_[lastChunkKey] = destBuf;
423  destBuf->syncEncoder(srcBuf);
424  destBuf->setSize(srcBuf->size());
425  destBuf->setDirty(); // this needs to be set to write out metadata file from the
426  // "checkpoint()" call
427 
428  size_t totalNumPages = srcBuf->getMultiPage().size();
429  for (size_t pageNum = 0; pageNum < totalNumPages; pageNum++) {
430  Page srcPage = srcBuf->getMultiPage()[pageNum].current();
431  Page destPage = c_fm_->requestFreePage(
432  srcBuf->pageSize(),
433  false); // may modify and use api "FileBuffer::addNewMultiPage" instead
434  MultiPage multiPage(srcBuf->pageSize());
435  multiPage.epochs.push_back(converted_data_epoch);
436  multiPage.pageVersions.push_back(destPage);
437  destBuf->multiPages_.push_back(multiPage);
438  size_t reservedHeaderSize = srcBuf->reservedHeaderSize();
439  copyPage(srcPage, c_fm_, destPage, reservedHeaderSize, srcBuf->pageDataSize(), 0);
440  destBuf->writeHeader(destPage, pageNum, converted_data_epoch, false);
441  }
442  }
443  nextFileId_ = maxFileId + 1;
444  } else {
445  if (!boost::filesystem::create_directory(path)) {
446  LOG(FATAL) << "Specified path does not exist: " << path;
447  }
448  }
449 }
std::vector< int > ChunkKey
Definition: types.h:35
void openEpochFile(const std::string &epochFileName)
Definition: FileMgr.cpp:502
#define LOG(tag)
Definition: Logger.h:188
void copyPage(Page &srcPage, FileMgr *destFileMgr, Page &destPage, const size_t reservedHeaderSize, const size_t numBytes, const size_t offset)
Definition: FileMgr.cpp:468
#define MAPD_FILE_EXT
Definition: File.h:26
GlobalFileMgr * gfm_
Definition: FileMgr.h:237
bool headerCompare(const HeaderInfo &firstElem, const HeaderInfo &secondElem)
Definition: FileMgr.cpp:49
FileMgr(const int deviceId, GlobalFileMgr *gfm, const std::pair< const int, const int > fileMgrKey, const size_t num_reader_threads=0, const int epoch=-1, const size_t defaultPageSize=2097152)
Constructor.
Definition: FileMgr.cpp:67
CHECK(cgen_state)
void processFileFutures(std::vector< std::future< std::vector< HeaderInfo >>> &file_futures, std::vector< HeaderInfo > &headerVec)
Definition: FileMgr.cpp:283
int epoch_
the index of the next file id
Definition: FileMgr.h:246
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:225
AbstractBufferMgr * getFileMgr(const int db_id, const int tb_id)
size_t fileSize(FILE *f)
Returns the size of the specified file.
Definition: File.cpp:170
#define EPOCH_FILENAME
Definition: FileMgr.cpp:42
FileInfo * openExistingFile(const std::string &path, const int fileId, const size_t pageSize, const size_t numPages, std::vector< HeaderInfo > &headerVec)
Definition: FileMgr.cpp:871

+ Here is the call graph for this function:

bool File_Namespace::FileMgr::isAllocationCapped ( )
inlineoverride

Definition at line 153 of file FileMgr.h.

153 { return false; }
bool File_Namespace::FileMgr::isBufferOnDevice ( const ChunkKey key)
override

Definition at line 634 of file FileMgr.cpp.

References chunkIndex_, and chunkIndexMutex_.

634  {
635  mapd_shared_lock<mapd_shared_mutex> chunkIndexReadLock(chunkIndexMutex_);
636  return chunkIndex_.find(key) != chunkIndex_.end();
637 }
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:225
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:253
bool File_Namespace::FileMgr::openDBMetaFile ( const std::string &  DBMetaFileName)
private

Definition at line 550 of file FileMgr.cpp.

References db_version_, DBMetaFile_, anonymous_namespace{StringDictionary.cpp}::file_size(), fileMgrBasePath_, File_Namespace::open(), and File_Namespace::read().

Referenced by createTopLevelMetadata().

550  {
551  std::string DBMetaFilePath(fileMgrBasePath_ + "/" + DBMetaFileName);
552 
553  if (!boost::filesystem::exists(DBMetaFilePath)) {
554  // LOG(INFO) << "DB metadata file does not exist, one will be created.";
555  return false;
556  }
557  if (!boost::filesystem::is_regular_file(DBMetaFilePath)) {
558  // LOG(INFO) << "DB metadata file is not a regular file, one will be created.";
559  return false;
560  }
561  if (boost::filesystem::file_size(DBMetaFilePath) < 4) {
562  // LOG(INFO) << "DB metadata file is not sized properly, one will be created.";
563  return false;
564  }
565  DBMetaFile_ = open(DBMetaFilePath);
566  read(DBMetaFile_, 0, sizeof(int), (int8_t*)&db_version_);
567 
568  return true;
569 }
std::string fileMgrBasePath_
Definition: FileMgr.h:239
size_t read(FILE *f, const size_t offset, const size_t size, int8_t *buf)
Reads the specified number of bytes from the offset position in file f into buf.
Definition: File.cpp:112
FILE * open(int fileId)
Opens/creates the file with the given id; returns NULL on error.
Definition: File.cpp:82

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::openEpochFile ( const std::string &  epochFileName)
private

Definition at line 502 of file FileMgr.cpp.

References epoch_, epochFile_, logger::FATAL, anonymous_namespace{StringDictionary.cpp}::file_size(), fileMgrBasePath_, LOG, File_Namespace::open(), and File_Namespace::read().

Referenced by init().

502  {
503  std::string epochFilePath(fileMgrBasePath_ + "/" + epochFileName);
504  if (!boost::filesystem::exists(epochFilePath)) {
505  LOG(FATAL) << "Epoch file `" << epochFilePath << "` does not exist";
506  }
507  if (!boost::filesystem::is_regular_file(epochFilePath)) {
508  LOG(FATAL) << "Epoch file `" << epochFilePath << "` is not a regular file";
509  }
510  if (boost::filesystem::file_size(epochFilePath) < 4) {
511  LOG(FATAL) << "Epoch file `" << epochFilePath
512  << "` is not sized properly (current size: "
513  << boost::filesystem::file_size(epochFilePath) << ", expected size: 4)";
514  }
515  epochFile_ = open(epochFilePath);
516  read(epochFile_, 0, sizeof(int), (int8_t*)&epoch_);
517  // std::cout << "Epoch after open file: " << epoch_ << std::endl;
518  epoch_++; // we are in new epoch from last checkpoint
519 }
#define LOG(tag)
Definition: Logger.h:188
std::string fileMgrBasePath_
Definition: FileMgr.h:239
FILE * epochFile_
the current epoch (time of last checkpoint)
Definition: FileMgr.h:247
size_t read(FILE *f, const size_t offset, const size_t size, int8_t *buf)
Reads the specified number of bytes from the offset position in file f into buf.
Definition: File.cpp:112
int epoch_
the index of the next file id
Definition: FileMgr.h:246
FILE * open(int fileId)
Opens/creates the file with the given id; returns NULL on error.
Definition: File.cpp:82

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

FileInfo * File_Namespace::FileMgr::openExistingFile ( const std::string &  path,
const int  fileId,
const size_t  pageSize,
const size_t  numPages,
std::vector< HeaderInfo > &  headerVec 
)
private

Definition at line 871 of file FileMgr.cpp.

References epoch_, fileIndex_, files_, files_rw_mutex_, File_Namespace::open(), and File_Namespace::FileInfo::openExistingFile().

Referenced by init().

875  {
876  FILE* f = open(path);
877  FileInfo* fInfo = new FileInfo(
878  this, fileId, f, pageSize, numPages, false); // false means don't init file
879 
880  fInfo->openExistingFile(headerVec, epoch_);
881  mapd_unique_lock<mapd_shared_mutex> write_lock(files_rw_mutex_);
882  if (fileId >= static_cast<int>(files_.size())) {
883  files_.resize(fileId + 1);
884  }
885  files_[fileId] = fInfo;
886  fileIndex_.insert(std::pair<size_t, int>(pageSize, fileId));
887  return fInfo;
888 }
std::vector< FileInfo * > files_
Definition: FileMgr.h:241
int epoch_
the index of the next file id
Definition: FileMgr.h:246
PageSizeFileMMap fileIndex_
A vector of files accessible via a file identifier.
Definition: FileMgr.h:242
FILE * open(int fileId)
Opens/creates the file with the given id; returns NULL on error.
Definition: File.cpp:82
mapd_shared_mutex files_rw_mutex_
Definition: FileMgr.h:254

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::string File_Namespace::FileMgr::printSlabs ( )
inlineoverride

Definition at line 147 of file FileMgr.h.

147 { return "Not Implemented"; }
void File_Namespace::FileMgr::processFileFutures ( std::vector< std::future< std::vector< HeaderInfo >>> &  file_futures,
std::vector< HeaderInfo > &  headerVec 
)
private

Definition at line 283 of file FileMgr.cpp.

Referenced by init().

285  {
286  for (auto& file_future : file_futures) {
287  file_future.wait();
288  }
289  // concatenate the vectors after thread completes
290  for (auto& file_future : file_futures) {
291  auto tempHeaderVec = file_future.get();
292  headerVec.insert(headerVec.end(), tempHeaderVec.begin(), tempHeaderVec.end());
293  }
294  file_futures.clear();
295 }

+ Here is the caller graph for this function:

AbstractBuffer * File_Namespace::FileMgr::putBuffer ( const ChunkKey key,
AbstractBuffer d,
const size_t  numBytes = 0 
)
override

Puts the contents of d into the Chunk with the given key.

Parameters
key- Unique identifier for a Chunk.
d- An object representing the source data for the Chunk.
Returns
AbstractBuffer*

Definition at line 737 of file FileMgr.cpp.

References Data_Namespace::AbstractBuffer::append(), CHECK_LT, chunkIndex_, chunkIndexMutex_, Data_Namespace::AbstractBuffer::clearDirtyBits(), createBuffer(), defaultPageSize_, logger::FATAL, Data_Namespace::AbstractBuffer::getDeviceId(), Data_Namespace::AbstractBuffer::getMemoryPtr(), Data_Namespace::AbstractBuffer::getType(), Data_Namespace::AbstractBuffer::isAppended(), Data_Namespace::AbstractBuffer::isDirty(), Data_Namespace::AbstractBuffer::isUpdated(), LOG, Data_Namespace::AbstractBuffer::setSize(), showChunk(), Data_Namespace::AbstractBuffer::size(), Data_Namespace::AbstractBuffer::syncEncoder(), and Data_Namespace::AbstractBuffer::write().

739  {
740  // obtain a pointer to the Chunk
741  mapd_unique_lock<mapd_shared_mutex> chunkIndexWriteLock(chunkIndexMutex_);
742  auto chunkIt = chunkIndex_.find(key);
743  AbstractBuffer* chunk;
744  if (chunkIt == chunkIndex_.end()) {
745  chunk = createBuffer(key, defaultPageSize_);
746  } else {
747  chunk = chunkIt->second;
748  }
749  chunkIndexWriteLock.unlock();
750  size_t oldChunkSize = chunk->size();
751  // write the buffer's data to the Chunk
752  // size_t newChunkSize = numBytes == 0 ? srcBuffer->size() : numBytes;
753  size_t newChunkSize = numBytes == 0 ? srcBuffer->size() : numBytes;
754  if (chunk->isDirty()) {
755  // multiple appends are allowed,
756  // but only single update is allowed
757  if (srcBuffer->isUpdated() && chunk->isUpdated()) {
758  LOG(FATAL) << "Aborting attempt to write a chunk marked dirty. Chunk inconsistency "
759  "for key: "
760  << showChunk(key);
761  }
762  }
763  if (srcBuffer->isUpdated()) {
764  // chunk size is not changed when fixed rows are updated or are marked as deleted.
765  // but when rows are vacuumed or varlen rows are updated (new rows are appended),
766  // chunk size will change. For vacuum, checkpoint should sync size from cpu to disk.
767  // For varlen update, it takes another route via fragmenter using disk-level buffer.
768  if (0 == numBytes && !chunk->isDirty()) {
769  chunk->setSize(newChunkSize);
770  }
771  //@todo use dirty flags to only flush pages of chunk that need to
772  // be flushed
773  chunk->write((int8_t*)srcBuffer->getMemoryPtr(),
774  newChunkSize,
775  0,
776  srcBuffer->getType(),
777  srcBuffer->getDeviceId());
778  } else if (srcBuffer->isAppended()) {
779  CHECK_LT(oldChunkSize, newChunkSize);
780  chunk->append((int8_t*)srcBuffer->getMemoryPtr() + oldChunkSize,
781  newChunkSize - oldChunkSize,
782  srcBuffer->getType(),
783  srcBuffer->getDeviceId());
784  }
785  // chunk->clearDirtyBits(); // Hack: because write and append will set dirty bits
786  //@todo commenting out line above will make sure this metadata is set
787  // but will trigger error on fetch chunk
788  srcBuffer->clearDirtyBits();
789  chunk->syncEncoder(srcBuffer);
790  return chunk;
791 }
void syncEncoder(const AbstractBuffer *src_buffer)
#define LOG(tag)
Definition: Logger.h:188
virtual size_t size() const =0
virtual bool isDirty() const
AbstractBuffer * createBuffer(const ChunkKey &key, size_t pageSize=0, const size_t numBytes=0) override
Creates a chunk with the specified key and page size.
Definition: FileMgr.cpp:614
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:225
An AbstractBuffer is a unit of data management for a data manager.
virtual void write(int8_t *src, const size_t num_bytes, const size_t offset=0, const MemoryLevel src_buffer_type=CPU_LEVEL, const int src_device_id=-1)=0
std::string showChunk(const ChunkKey &key)
Definition: types.h:37
virtual bool isUpdated() const
#define CHECK_LT(x, y)
Definition: Logger.h:207
size_t defaultPageSize_
number of threads used when loading data
Definition: FileMgr.h:244
virtual void append(int8_t *src, const size_t num_bytes, const MemoryLevel src_buffer_type=CPU_LEVEL, const int device_id=-1)=0
void setSize(const size_t size)
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:253

+ Here is the call graph for this function:

Page File_Namespace::FileMgr::requestFreePage ( size_t  pagesize,
const bool  isMetadata 
)

Definition at line 802 of file FileMgr.cpp.

References CHECK(), createFile(), File_Namespace::FileInfo::fileId, fileIndex_, files_, File_Namespace::FileInfo::getFreePage(), getPageMutex_, MAX_FILE_N_METADATA_PAGES, and MAX_FILE_N_PAGES.

Referenced by File_Namespace::FileBuffer::addNewMultiPage(), init(), File_Namespace::FileBuffer::write(), and File_Namespace::FileBuffer::writeMetadata().

802  {
803  std::lock_guard<std::mutex> lock(getPageMutex_);
804 
805  auto candidateFiles = fileIndex_.equal_range(pageSize);
806  int pageNum = -1;
807  for (auto fileIt = candidateFiles.first; fileIt != candidateFiles.second; ++fileIt) {
808  FileInfo* fileInfo = files_[fileIt->second];
809  pageNum = fileInfo->getFreePage();
810  if (pageNum != -1) {
811  return (Page(fileInfo->fileId, pageNum));
812  }
813  }
814  // if here then we need to add a file
815  FileInfo* fileInfo;
816  if (isMetadata) {
817  fileInfo = createFile(pageSize, MAX_FILE_N_METADATA_PAGES);
818  } else {
819  fileInfo = createFile(pageSize, MAX_FILE_N_PAGES);
820  }
821  pageNum = fileInfo->getFreePage();
822  CHECK(pageNum != -1);
823  return (Page(fileInfo->fileId, pageNum));
824 }
std::mutex getPageMutex_
pointer to DB level metadata
Definition: FileMgr.h:252
std::vector< FileInfo * > files_
Definition: FileMgr.h:241
CHECK(cgen_state)
#define MAX_FILE_N_METADATA_PAGES
Definition: File.h:28
PageSizeFileMMap fileIndex_
A vector of files accessible via a file identifier.
Definition: FileMgr.h:242
#define MAX_FILE_N_PAGES
Definition: File.h:27
FileInfo * createFile(const size_t pageSize, const size_t numPages)
Adds a file to the file manager repository.
Definition: FileMgr.cpp:890

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::requestFreePages ( size_t  npages,
size_t  pagesize,
std::vector< Page > &  pages,
const bool  isMetadata 
)

Obtains free pages – creates new files if necessary – of the requested size.

Given a page size and number of pages, this method updates the vector "pages" to include free pages of the requested size. These pages are immediately removed from the free list of the affected file(s). If there are not enough pages available among current files, new files are created and their pages are included in the vector.

Parameters
npagesThe number of free pages requested
pagesizeThe size of each requested page
pagesA vector containing the free pages obtained by this method

Definition at line 826 of file FileMgr.cpp.

References CHECK(), createFile(), File_Namespace::FileInfo::fileId, fileIndex_, files_, File_Namespace::FileInfo::getFreePage(), getPageMutex_, MAX_FILE_N_METADATA_PAGES, and MAX_FILE_N_PAGES.

829  {
830  // not used currently
831  // @todo add method to FileInfo to get more than one page
832  std::lock_guard<std::mutex> lock(getPageMutex_);
833  auto candidateFiles = fileIndex_.equal_range(pageSize);
834  size_t numPagesNeeded = numPagesRequested;
835  for (auto fileIt = candidateFiles.first; fileIt != candidateFiles.second; ++fileIt) {
836  FileInfo* fileInfo = files_[fileIt->second];
837  int pageNum;
838  do {
839  pageNum = fileInfo->getFreePage();
840  if (pageNum != -1) {
841  pages.emplace_back(fileInfo->fileId, pageNum);
842  numPagesNeeded--;
843  }
844  } while (pageNum != -1 && numPagesNeeded > 0);
845  if (numPagesNeeded == 0) {
846  break;
847  }
848  }
849  while (numPagesNeeded > 0) {
850  FileInfo* fileInfo;
851  if (isMetadata) {
852  fileInfo = createFile(pageSize, MAX_FILE_N_METADATA_PAGES);
853  } else {
854  fileInfo = createFile(pageSize, MAX_FILE_N_PAGES);
855  }
856  int pageNum;
857  do {
858  pageNum = fileInfo->getFreePage();
859  if (pageNum != -1) {
860  pages.emplace_back(fileInfo->fileId, pageNum);
861  numPagesNeeded--;
862  }
863  } while (pageNum != -1 && numPagesNeeded > 0);
864  if (numPagesNeeded == 0) {
865  break;
866  }
867  }
868  CHECK(pages.size() == numPagesRequested);
869 }
std::mutex getPageMutex_
pointer to DB level metadata
Definition: FileMgr.h:252
std::vector< FileInfo * > files_
Definition: FileMgr.h:241
CHECK(cgen_state)
#define MAX_FILE_N_METADATA_PAGES
Definition: File.h:28
PageSizeFileMMap fileIndex_
A vector of files accessible via a file identifier.
Definition: FileMgr.h:242
#define MAX_FILE_N_PAGES
Definition: File.h:27
FileInfo * createFile(const size_t pageSize, const size_t numPages)
Adds a file to the file manager repository.
Definition: FileMgr.cpp:890

+ Here is the call graph for this function:

void File_Namespace::FileMgr::setEpoch ( int  epoch)
private

Definition at line 995 of file FileMgr.cpp.

References epoch(), epoch_, and writeAndSyncEpochToDisk().

995  {
996  epoch_ = epoch;
998 }
int epoch()
Returns current value of epoch - should be one greater than recorded at last checkpoint.
Definition: FileMgr.h:204
int epoch_
the index of the next file id
Definition: FileMgr.h:246
void writeAndSyncEpochToDisk()
Definition: FileMgr.cpp:521

+ Here is the call graph for this function:

void File_Namespace::FileMgr::writeAndSyncDBMetaToDisk ( )
private

Definition at line 571 of file FileMgr.cpp.

References DBMetaFile_, logger::FATAL, getDBVersion(), LOG, and File_Namespace::write().

571  {
572  int db_ver = getDBVersion();
573  write(DBMetaFile_, 0, sizeof(int), (int8_t*)&db_ver);
574  int status = fflush(DBMetaFile_);
575  if (status != 0) {
576  LOG(FATAL) << "Could not sync DB metadata file to disk";
577  }
578 }
#define LOG(tag)
Definition: Logger.h:188
int getDBVersion() const
Index for looking up chunks.
Definition: FileMgr.cpp:975
size_t write(FILE *f, const size_t offset, const size_t size, int8_t *buf)
Writes the specified number of bytes to the offset position in file f from buf.
Definition: File.cpp:120

+ Here is the call graph for this function:

void File_Namespace::FileMgr::writeAndSyncEpochToDisk ( )
private

Definition at line 521 of file FileMgr.cpp.

References epoch_, epochFile_, logger::FATAL, LOG, and File_Namespace::write().

Referenced by checkpoint(), and setEpoch().

521  {
522  write(epochFile_, 0, sizeof(int), (int8_t*)&epoch_);
523  int status = fflush(epochFile_);
524  // int status = fcntl(fileno(epochFile_),51);
525  if (status != 0) {
526  LOG(FATAL) << "Could not flush epoch file to disk";
527  }
528 #ifdef __APPLE__
529  status = fcntl(fileno(epochFile_), 51);
530 #else
531  status = fsync(fileno(epochFile_));
532 #endif
533  if (status != 0) {
534  LOG(FATAL) << "Could not sync epoch file to disk";
535  }
536  ++epoch_;
537 }
#define LOG(tag)
Definition: Logger.h:188
FILE * epochFile_
the current epoch (time of last checkpoint)
Definition: FileMgr.h:247
int epoch_
the index of the next file id
Definition: FileMgr.h:246
size_t write(FILE *f, const size_t offset, const size_t size, int8_t *buf)
Writes the specified number of bytes to the offset position in file f from buf.
Definition: File.cpp:120

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Friends And Related Function Documentation

friend class GlobalFileMgr
friend

Definition at line 86 of file FileMgr.h.

Member Data Documentation

int File_Namespace::FileMgr::db_version_
private

Definition at line 248 of file FileMgr.h.

Referenced by createTopLevelMetadata(), and openDBMetaFile().

FILE* File_Namespace::FileMgr::DBMetaFile_
private

DB version from dbmeta file, should be compatible with GlobalFileMgr::mapd_db_version_

Definition at line 250 of file FileMgr.h.

Referenced by createDBMetaFile(), openDBMetaFile(), and writeAndSyncDBMetaToDisk().

size_t File_Namespace::FileMgr::defaultPageSize_
private

number of threads used when loading data

Definition at line 244 of file FileMgr.h.

Referenced by copyPage(), createBuffer(), and putBuffer().

int File_Namespace::FileMgr::epoch_
private

the index of the next file id

Definition at line 246 of file FileMgr.h.

Referenced by checkpoint(), createEpochFile(), init(), openEpochFile(), openExistingFile(), setEpoch(), and writeAndSyncEpochToDisk().

FILE* File_Namespace::FileMgr::epochFile_
private

the current epoch (time of last checkpoint)

Definition at line 247 of file FileMgr.h.

Referenced by closeRemovePhysical(), createEpochFile(), FileMgr(), openEpochFile(), and writeAndSyncEpochToDisk().

PageSizeFileMMap File_Namespace::FileMgr::fileIndex_
private

A vector of files accessible via a file identifier.

Definition at line 242 of file FileMgr.h.

Referenced by createFile(), openExistingFile(), requestFreePage(), and requestFreePages().

std::string File_Namespace::FileMgr::fileMgrBasePath_
private
std::pair<const int, const int> File_Namespace::FileMgr::fileMgrKey_
private

Global FileMgr.

Definition at line 238 of file FileMgr.h.

Referenced by FileMgr(), and init().

std::vector<FileInfo*> File_Namespace::FileMgr::files_
private

The OS file system path containing files related to this FileMgr

Definition at line 241 of file FileMgr.h.

Referenced by checkpoint(), closeRemovePhysical(), createFile(), FileMgr(), getFileForFileId(), openExistingFile(), requestFreePage(), requestFreePages(), and ~FileMgr().

mapd_shared_mutex File_Namespace::FileMgr::files_rw_mutex_
mutableprivate

Definition at line 254 of file FileMgr.h.

Referenced by checkpoint(), createFile(), and openExistingFile().

std::vector<std::pair<FileInfo*, int> > File_Namespace::FileMgr::free_pages
private

Definition at line 257 of file FileMgr.h.

Referenced by checkpoint(), and free_page().

std::mutex File_Namespace::FileMgr::getPageMutex_
private

pointer to DB level metadata

Definition at line 252 of file FileMgr.h.

Referenced by requestFreePage(), and requestFreePages().

GlobalFileMgr* File_Namespace::FileMgr::gfm_
private

Definition at line 237 of file FileMgr.h.

Referenced by FileMgr(), getDBConvert(), getDBVersion(), and init().

mapd_shared_mutex File_Namespace::FileMgr::mutex_free_page
mutableprivate

Definition at line 256 of file FileMgr.h.

Referenced by checkpoint(), and free_page().

unsigned File_Namespace::FileMgr::nextFileId_
private

Definition at line 245 of file FileMgr.h.

Referenced by createFile(), and init().

size_t File_Namespace::FileMgr::num_reader_threads_
private

Maps page sizes to FileInfo objects.

Definition at line 243 of file FileMgr.h.

Referenced by init().


The documentation for this class was generated from the following files: