OmniSciDB  340b00dbf6
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
File_Namespace::FileMgr Class Reference

#include <FileMgr.h>

+ Inheritance diagram for File_Namespace::FileMgr:
+ Collaboration diagram for File_Namespace::FileMgr:

Public Member Functions

 FileMgr (const int deviceId, GlobalFileMgr *gfm, const std::pair< const int, const int > fileMgrKey, const size_t num_reader_threads=0, const int epoch=-1, const size_t defaultPageSize=2097152)
 Constructor. More...
 
 FileMgr (const int deviceId, GlobalFileMgr *gfm, const std::pair< const int, const int > fileMgrKey, const bool initOnly)
 
 FileMgr (GlobalFileMgr *gfm, const size_t defaultPageSize, std::string basePath)
 
 ~FileMgr () override
 Destructor. More...
 
FileBuffercreateBuffer (const ChunkKey &key, size_t pageSize=0, const size_t numBytes=0) override
 Creates a chunk with the specified key and page size. More...
 
bool isBufferOnDevice (const ChunkKey &key) override
 
void deleteBuffer (const ChunkKey &key, const bool purge=true) override
 Deletes the chunk with the specified key. More...
 
void deleteBuffersWithPrefix (const ChunkKey &keyPrefix, const bool purge=true) override
 
FileBuffergetBuffer (const ChunkKey &key, const size_t numBytes=0) override
 Returns the a pointer to the chunk with the specified key. More...
 
void fetchBuffer (const ChunkKey &key, AbstractBuffer *destBuffer, const size_t numBytes) override
 
FileBufferputBuffer (const ChunkKey &key, AbstractBuffer *d, const size_t numBytes=0) override
 Puts the contents of d into the Chunk with the given key. More...
 
AbstractBufferalloc (const size_t numBytes) override
 
void free (AbstractBuffer *buffer) override
 
Page requestFreePage (size_t pagesize, const bool isMetadata)
 
MgrType getMgrType () override
 
std::string getStringMgrType () override
 
std::string printSlabs () override
 
void clearSlabs () override
 
size_t getMaxSize () override
 
size_t getInUseSize () override
 
size_t getAllocated () override
 
bool isAllocationCapped () override
 
FileInfogetFileInfoForFileId (const int fileId)
 
void init (const size_t num_reader_threads)
 
void init (const std::string dataPathToConvertFrom)
 
void copyPage (Page &srcPage, FileMgr *destFileMgr, Page &destPage, const size_t reservedHeaderSize, const size_t numBytes, const size_t offset)
 
void requestFreePages (size_t npages, size_t pagesize, std::vector< Page > &pages, const bool isMetadata)
 Obtains free pages – creates new files if necessary – of the requested size. More...
 
void getChunkMetadataVecForKeyPrefix (ChunkMetadataVector &chunkMetadataVec, const ChunkKey &keyPrefix) override
 
void checkpoint () override
 Fsyncs data files, writes out epoch and fsyncs that. More...
 
void checkpoint (const int db_id, const int tb_id) override
 
int epoch ()
 Returns current value of epoch - should be one greater than recorded at last checkpoint. More...
 
size_t getNumReaderThreads ()
 Returns number of threads defined by parameter num-reader-threads which should be used during initial load and consequent read of data. More...
 
FILE * getFileForFileId (const int fileId)
 Returns FILE pointer associated with requested fileId. More...
 
size_t getNumChunks () override
 
int getDBVersion () const
 Index for looking up chunks. More...
 
bool getDBConvert () const
 
void createTopLevelMetadata ()
 
std::string getFileMgrBasePath () const
 
void closeRemovePhysical ()
 
void removeTableRelatedDS (const int db_id, const int table_id) override
 
void free_page (std::pair< FileInfo *, int > &&page)
 
const std::pair< const int,
const int > 
get_fileMgrKey () const
 

Public Attributes

ChunkKeyToChunkMap chunkIndex_
 

Protected Member Functions

 FileMgr (const int epoch)
 

Private Member Functions

FileInfocreateFile (const size_t pageSize, const size_t numPages)
 Adds a file to the file manager repository. More...
 
FileInfoopenExistingFile (const std::string &path, const int fileId, const size_t pageSize, const size_t numPages, std::vector< HeaderInfo > &headerVec)
 
void createEpochFile (const std::string &epochFileName)
 
void openEpochFile (const std::string &epochFileName)
 
void writeAndSyncEpochToDisk ()
 
void createDBMetaFile (const std::string &DBMetaFileName)
 
bool openDBMetaFile (const std::string &DBMetaFileName)
 
void writeAndSyncDBMetaToDisk ()
 
void setEpoch (int epoch)
 
void processFileFutures (std::vector< std::future< std::vector< HeaderInfo >>> &file_futures, std::vector< HeaderInfo > &headerVec)
 
FileBuffercreateBufferUnlocked (const ChunkKey &key, size_t pageSize=0, const size_t numBytes=0)
 

Private Attributes

GlobalFileMgrgfm_
 
std::pair< const int, const int > fileMgrKey_
 Global FileMgr. More...
 
std::string fileMgrBasePath_
 
std::vector< FileInfo * > files_
 
PageSizeFileMMap fileIndex_
 A vector of files accessible via a file identifier. More...
 
size_t num_reader_threads_
 Maps page sizes to FileInfo objects. More...
 
size_t defaultPageSize_
 number of threads used when loading data More...
 
unsigned nextFileId_
 
int epoch_
 the index of the next file id More...
 
FILE * epochFile_ = nullptr
 the current epoch (time of last checkpoint) More...
 
int db_version_
 
FILE * DBMetaFile_ = nullptr
 
std::mutex getPageMutex_
 pointer to DB level metadata More...
 
mapd_shared_mutex chunkIndexMutex_
 
mapd_shared_mutex files_rw_mutex_
 
mapd_shared_mutex mutex_free_page
 
std::vector< std::pair
< FileInfo *, int > > 
free_pages
 

Friends

class GlobalFileMgr
 

Detailed Description

Definition at line 85 of file FileMgr.h.

Constructor & Destructor Documentation

File_Namespace::FileMgr::FileMgr ( const int  deviceId,
GlobalFileMgr gfm,
const std::pair< const int, const int >  fileMgrKey,
const size_t  num_reader_threads = 0,
const int  epoch = -1,
const size_t  defaultPageSize = 2097152 
)

Constructor.

Definition at line 67 of file FileMgr.cpp.

References init().

73  : AbstractBufferMgr(deviceId)
74  , gfm_(gfm)
75  , fileMgrKey_(fileMgrKey)
76  , defaultPageSize_(defaultPageSize)
77  , nextFileId_(0)
78  , epoch_(epoch) {
79  init(num_reader_threads);
80 }
GlobalFileMgr * gfm_
Definition: FileMgr.h:240
int epoch()
Returns current value of epoch - should be one greater than recorded at last checkpoint.
Definition: FileMgr.h:201
void init(const size_t num_reader_threads)
Definition: FileMgr.cpp:137
int epoch_
the index of the next file id
Definition: FileMgr.h:249
size_t defaultPageSize_
number of threads used when loading data
Definition: FileMgr.h:247
std::pair< const int, const int > fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:241

+ Here is the call graph for this function:

File_Namespace::FileMgr::FileMgr ( const int  deviceId,
GlobalFileMgr gfm,
const std::pair< const int, const int >  fileMgrKey,
const bool  initOnly 
)

Definition at line 83 of file FileMgr.cpp.

References epochFile_, fileMgrBasePath_, fileMgrKey_, files_, File_Namespace::GlobalFileMgr::getBasePath(), gfm_, and to_string().

87  : AbstractBufferMgr(deviceId)
88  , gfm_(gfm)
89  , fileMgrKey_(fileMgrKey)
90  , defaultPageSize_(0)
91  , nextFileId_(0)
92  , epoch_(0) {
93  const std::string fileMgrDirPrefix("table");
94  const std::string FileMgrDirDelim("_");
95  fileMgrBasePath_ = (gfm_->getBasePath() + fileMgrDirPrefix + FileMgrDirDelim +
96  std::to_string(fileMgrKey_.first) + // db_id
97  FileMgrDirDelim + std::to_string(fileMgrKey_.second)); // tb_id
98  epochFile_ = nullptr;
99  files_.clear();
100 }
std::string getBasePath() const
GlobalFileMgr * gfm_
Definition: FileMgr.h:240
std::string fileMgrBasePath_
Definition: FileMgr.h:242
std::string to_string(char const *&&v)
FILE * epochFile_
the current epoch (time of last checkpoint)
Definition: FileMgr.h:250
std::vector< FileInfo * > files_
Definition: FileMgr.h:244
int epoch_
the index of the next file id
Definition: FileMgr.h:249
size_t defaultPageSize_
number of threads used when loading data
Definition: FileMgr.h:247
std::pair< const int, const int > fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:241

+ Here is the call graph for this function:

File_Namespace::FileMgr::FileMgr ( GlobalFileMgr gfm,
const size_t  defaultPageSize,
std::string  basePath 
)

Definition at line 102 of file FileMgr.cpp.

References init().

103  : AbstractBufferMgr(0)
104  , gfm_(gfm)
105  , fileMgrKey_(0, 0)
106  , fileMgrBasePath_(basePath)
107  , defaultPageSize_(defaultPageSize)
108  , nextFileId_(0)
109  , epoch_(-1) {
110  init(basePath);
111 }
GlobalFileMgr * gfm_
Definition: FileMgr.h:240
std::string fileMgrBasePath_
Definition: FileMgr.h:242
void init(const size_t num_reader_threads)
Definition: FileMgr.cpp:137
int epoch_
the index of the next file id
Definition: FileMgr.h:249
size_t defaultPageSize_
number of threads used when loading data
Definition: FileMgr.h:247
std::pair< const int, const int > fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:241

+ Here is the call graph for this function:

File_Namespace::FileMgr::~FileMgr ( )
override

Destructor.

Definition at line 116 of file FileMgr.cpp.

References chunkIndex_, File_Namespace::close(), DBMetaFile_, epochFile_, and files_.

116  {
117  // checkpoint();
118  // free memory used by FileInfo objects
119  for (auto chunkIt = chunkIndex_.begin(); chunkIt != chunkIndex_.end(); ++chunkIt) {
120  delete chunkIt->second;
121  }
122  for (auto file_info : files_) {
123  delete file_info;
124  }
125 
126  if (epochFile_) {
127  close(epochFile_);
128  epochFile_ = nullptr;
129  }
130 
131  if (DBMetaFile_) {
133  DBMetaFile_ = nullptr;
134  }
135 }
FILE * epochFile_
the current epoch (time of last checkpoint)
Definition: FileMgr.h:250
std::vector< FileInfo * > files_
Definition: FileMgr.h:244
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:222
void close(FILE *f)
Closes the file pointed to by the FILE pointer.
Definition: File.cpp:107

+ Here is the call graph for this function:

File_Namespace::FileMgr::FileMgr ( const int  epoch)
protected

Definition at line 114 of file FileMgr.cpp.

114 : AbstractBufferMgr(-1), epoch_(epoch) {}
int epoch()
Returns current value of epoch - should be one greater than recorded at last checkpoint.
Definition: FileMgr.h:201
int epoch_
the index of the next file id
Definition: FileMgr.h:249

Member Function Documentation

AbstractBuffer * File_Namespace::FileMgr::alloc ( const size_t  numBytes = 0)
override

Definition at line 792 of file FileMgr.cpp.

References logger::FATAL, and LOG.

792  {
793  LOG(FATAL) << "Operation not supported";
794  return nullptr; // satisfy return-type warning
795 }
#define LOG(tag)
Definition: Logger.h:188
void File_Namespace::FileMgr::checkpoint ( )
override

Fsyncs data files, writes out epoch and fsyncs that.

Definition at line 592 of file FileMgr.cpp.

References chunkIndex_, chunkIndexMutex_, epoch_, logger::FATAL, files_, files_rw_mutex_, free_page(), free_pages, LOG, mutex_free_page, VLOG, and writeAndSyncEpochToDisk().

592  {
593  VLOG(2) << "Checkpointing epoch: " << epoch_;
594  mapd_unique_lock<mapd_shared_mutex> chunkIndexWriteLock(chunkIndexMutex_);
595  for (auto chunkIt = chunkIndex_.begin(); chunkIt != chunkIndex_.end(); ++chunkIt) {
596  if (chunkIt->second->isDirty()) {
597  chunkIt->second->writeMetadata(epoch_);
598  chunkIt->second->clearDirtyBits();
599  }
600  }
601  chunkIndexWriteLock.unlock();
602 
603  mapd_shared_lock<mapd_shared_mutex> read_lock(files_rw_mutex_);
604  for (auto fileIt = files_.begin(); fileIt != files_.end(); ++fileIt) {
605  int status = (*fileIt)->syncToDisk();
606  if (status != 0) {
607  LOG(FATAL) << "Could not sync file to disk";
608  }
609  }
610 
612 
613  mapd_unique_lock<mapd_shared_mutex> freePagesWriteLock(mutex_free_page);
614  for (auto& free_page : free_pages) {
615  free_page.first->freePageDeferred(free_page.second);
616  }
617  free_pages.clear();
618 }
#define LOG(tag)
Definition: Logger.h:188
std::vector< std::pair< FileInfo *, int > > free_pages
Definition: FileMgr.h:260
void free_page(std::pair< FileInfo *, int > &&page)
Definition: FileMgr.cpp:971
std::vector< FileInfo * > files_
Definition: FileMgr.h:244
int epoch_
the index of the next file id
Definition: FileMgr.h:249
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:222
void writeAndSyncEpochToDisk()
Definition: FileMgr.cpp:533
mapd_shared_lock< mapd_shared_mutex > read_lock
mapd_shared_mutex mutex_free_page
Definition: FileMgr.h:259
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:256
mapd_shared_mutex files_rw_mutex_
Definition: FileMgr.h:257
#define VLOG(n)
Definition: Logger.h:291

+ Here is the call graph for this function:

void File_Namespace::FileMgr::checkpoint ( const int  db_id,
const int  tb_id 
)
inlineoverride

Definition at line 194 of file FileMgr.h.

References logger::FATAL, and LOG.

194  {
195  LOG(FATAL) << "Operation not supported, api checkpoint() should be used instead";
196  }
#define LOG(tag)
Definition: Logger.h:188
void File_Namespace::FileMgr::clearSlabs ( )
inlineoverride

Definition at line 148 of file FileMgr.h.

148  { /* noop */
149  }
void File_Namespace::FileMgr::closeRemovePhysical ( )

Definition at line 464 of file FileMgr.cpp.

References File_Namespace::close(), epochFile_, files_, getFileMgrBasePath(), and File_Namespace::renameForDelete().

Referenced by File_Namespace::GlobalFileMgr::removeTableRelatedDS().

464  {
465  for (auto file_info : files_) {
466  if (file_info->f) {
467  close(file_info->f);
468  file_info->f = nullptr;
469  }
470  }
471 
472  if (epochFile_) {
473  close(epochFile_);
474  epochFile_ = nullptr;
475  }
476 
477  /* rename for later deletion the directory containing table related data */
479 }
std::string getFileMgrBasePath() const
Definition: FileMgr.h:227
FILE * epochFile_
the current epoch (time of last checkpoint)
Definition: FileMgr.h:250
std::vector< FileInfo * > files_
Definition: FileMgr.h:244
void close(FILE *f)
Closes the file pointed to by the FILE pointer.
Definition: File.cpp:107
void renameForDelete(const std::string directoryName)
Renames a directory to DELETE_ME_&lt;EPOCH&gt;_&lt;oldname&gt;.
Definition: File.cpp:188

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::copyPage ( Page srcPage,
FileMgr destFileMgr,
Page destPage,
const size_t  reservedHeaderSize,
const size_t  numBytes,
const size_t  offset 
)

Definition at line 481 of file FileMgr.cpp.

References CHECK, checked_malloc(), defaultPageSize_, File_Namespace::Page::fileId, free(), getFileInfoForFileId(), File_Namespace::Page::pageNum, File_Namespace::FileInfo::read(), and File_Namespace::FileInfo::write().

Referenced by init().

486  {
487  CHECK(offset + numBytes <= defaultPageSize_);
488  FileInfo* srcFileInfo = getFileInfoForFileId(srcPage.fileId);
489  FileInfo* destFileInfo = destFileMgr->getFileInfoForFileId(destPage.fileId);
490  int8_t* buffer = reinterpret_cast<int8_t*>(checked_malloc(numBytes));
491 
492  size_t bytesRead = srcFileInfo->read(
493  srcPage.pageNum * defaultPageSize_ + offset + reservedHeaderSize, numBytes, buffer);
494  CHECK(bytesRead == numBytes);
495  size_t bytesWritten = destFileInfo->write(
496  destPage.pageNum * defaultPageSize_ + offset + reservedHeaderSize,
497  numBytes,
498  buffer);
499  CHECK(bytesWritten == numBytes);
500  ::free(buffer);
501 }
FileInfo * getFileInfoForFileId(const int fileId)
Definition: FileMgr.h:155
void free(AbstractBuffer *buffer) override
Definition: FileMgr.cpp:797
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:44
size_t defaultPageSize_
number of threads used when loading data
Definition: FileMgr.h:247
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

FileBuffer * File_Namespace::FileMgr::createBuffer ( const ChunkKey key,
size_t  pageSize = 0,
const size_t  numBytes = 0 
)
override

Creates a chunk with the specified key and page size.

Definition at line 620 of file FileMgr.cpp.

References chunkIndexMutex_, and createBufferUnlocked().

622  {
623  mapd_unique_lock<mapd_shared_mutex> chunkIndexWriteLock(chunkIndexMutex_);
624  return createBufferUnlocked(key, pageSize, numBytes);
625 }
FileBuffer * createBufferUnlocked(const ChunkKey &key, size_t pageSize=0, const size_t numBytes=0)
Definition: FileMgr.cpp:630
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:256

+ Here is the call graph for this function:

FileBuffer * File_Namespace::FileMgr::createBufferUnlocked ( const ChunkKey key,
size_t  pageSize = 0,
const size_t  numBytes = 0 
)
private
Todo:
Make all accesses to chunkIndex_ thread-safe

Definition at line 630 of file FileMgr.cpp.

References chunkIndex_, defaultPageSize_, logger::FATAL, LOG, and show_chunk().

Referenced by createBuffer(), and putBuffer().

632  {
633  size_t actualPageSize = pageSize;
634  if (actualPageSize == 0) {
635  actualPageSize = defaultPageSize_;
636  }
638  // we will do this lazily and not allocate space for the Chunk (i.e.
639  // FileBuffer yet)
640 
641  if (chunkIndex_.find(key) != chunkIndex_.end()) {
642  LOG(FATAL) << "Chunk already exists for key: " << show_chunk(key);
643  }
644  chunkIndex_[key] = new FileBuffer(this, actualPageSize, key, numBytes);
645  return (chunkIndex_[key]);
646 }
#define LOG(tag)
Definition: Logger.h:188
std::string show_chunk(const ChunkKey &key)
Definition: types.h:73
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:222
size_t defaultPageSize_
number of threads used when loading data
Definition: FileMgr.h:247

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::createDBMetaFile ( const std::string &  DBMetaFileName)
private

Definition at line 551 of file FileMgr.cpp.

References File_Namespace::create(), DBMetaFile_, logger::FATAL, fileMgrBasePath_, getDBVersion(), LOG, and File_Namespace::write().

Referenced by createTopLevelMetadata().

551  {
552  std::string DBMetaFilePath(fileMgrBasePath_ + "/" + DBMetaFileName);
553  if (boost::filesystem::exists(DBMetaFilePath)) {
554  LOG(FATAL) << "DB metadata file `" << DBMetaFilePath << "` already exists.";
555  }
556  DBMetaFile_ = create(DBMetaFilePath, sizeof(int));
557  int db_ver = getDBVersion();
558  write(DBMetaFile_, 0, sizeof(int), (int8_t*)&db_ver);
559  // LOG(INFO) << "DB metadata file has been created.";
560 }
#define LOG(tag)
Definition: Logger.h:188
int getDBVersion() const
Index for looking up chunks.
Definition: FileMgr.cpp:946
std::string fileMgrBasePath_
Definition: FileMgr.h:242
FILE * create(const std::string &basePath, const int fileId, const size_t pageSize, const size_t numPages)
Definition: File.cpp:40
size_t write(FILE *f, const size_t offset, const size_t size, int8_t *buf)
Writes the specified number of bytes to the offset position in file f from buf.
Definition: File.cpp:126

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::createEpochFile ( const std::string &  epochFileName)
private

Definition at line 503 of file FileMgr.cpp.

References File_Namespace::create(), epochFile_, logger::FATAL, fileMgrBasePath_, LOG, and writeAndSyncEpochToDisk().

Referenced by init().

503  {
504  std::string epochFilePath(fileMgrBasePath_ + "/" + epochFileName);
505  if (boost::filesystem::exists(epochFilePath)) {
506  LOG(FATAL) << "Epoch file `" << epochFilePath << "` already exists";
507  }
508  epochFile_ = create(epochFilePath, sizeof(int));
509  // Write out current epoch to file - which if this
510  // function is being called should be 0
512 }
#define LOG(tag)
Definition: Logger.h:188
std::string fileMgrBasePath_
Definition: FileMgr.h:242
FILE * create(const std::string &basePath, const int fileId, const size_t pageSize, const size_t numPages)
Definition: File.cpp:40
FILE * epochFile_
the current epoch (time of last checkpoint)
Definition: FileMgr.h:250
void writeAndSyncEpochToDisk()
Definition: FileMgr.cpp:533

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

FileInfo * File_Namespace::FileMgr::createFile ( const size_t  pageSize,
const size_t  numPages 
)
private

Adds a file to the file manager repository.

This method will create a FileInfo object for the file being added, and it will create the corresponding file on physical disk with the indicated number of pages pre-allocated.

A pointer to the FileInfo object is returned, which itself has a file pointer (FILE*) and a file identifier (int fileId).

Parameters
fileNameThe name given to the file in physical storage.
pageSizeThe logical page size for the pages in the file.
numPagesThe number of logical pages to initially allocate for the file.
Returns
FileInfo* A pointer to the FileInfo object of the added file.

Definition at line 889 of file FileMgr.cpp.

References CHECK, File_Namespace::create(), logger::FATAL, fileIndex_, fileMgrBasePath_, files_, files_rw_mutex_, LOG, and nextFileId_.

Referenced by requestFreePage(), and requestFreePages().

889  {
890  // check arguments
891  if (pageSize == 0 || numPages == 0) {
892  LOG(FATAL) << "File creation failed: pageSize and numPages must be greater than 0.";
893  }
894 
895  // create the new file
896  FILE* f = create(fileMgrBasePath_,
897  nextFileId_,
898  pageSize,
899  numPages); // TM: not sure if I like naming scheme here - should be in
900  // separate namespace?
901  CHECK(f);
902 
903  // instantiate a new FileInfo for the newly created file
904  int fileId = nextFileId_++;
905  FileInfo* fInfo =
906  new FileInfo(this, fileId, f, pageSize, numPages, true); // true means init file
907  CHECK(fInfo);
908 
909  mapd_unique_lock<mapd_shared_mutex> write_lock(files_rw_mutex_);
910  // update file manager data structures
911  files_.push_back(fInfo);
912  fileIndex_.insert(std::pair<size_t, int>(pageSize, fileId));
913 
914  CHECK(files_.back() == fInfo); // postcondition
915  return fInfo;
916 }
#define LOG(tag)
Definition: Logger.h:188
std::string fileMgrBasePath_
Definition: FileMgr.h:242
FILE * create(const std::string &basePath, const int fileId, const size_t pageSize, const size_t numPages)
Definition: File.cpp:40
std::vector< FileInfo * > files_
Definition: FileMgr.h:244
PageSizeFileMMap fileIndex_
A vector of files accessible via a file identifier.
Definition: FileMgr.h:245
#define CHECK(condition)
Definition: Logger.h:197
mapd_unique_lock< mapd_shared_mutex > write_lock
mapd_shared_mutex files_rw_mutex_
Definition: FileMgr.h:257

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::createTopLevelMetadata ( )

Definition at line 954 of file FileMgr.cpp.

References createDBMetaFile(), DB_META_FILENAME, db_version_, logger::FATAL, getDBVersion(), LOG, and openDBMetaFile().

954  {
956  if (db_version_ > getDBVersion()) {
957  LOG(FATAL) << "DB forward compatibility is not supported. Version of OmniSci "
958  "software used is older than the version of DB being read: "
959  << db_version_;
960  }
961  } else {
963  }
964 }
#define LOG(tag)
Definition: Logger.h:188
int getDBVersion() const
Index for looking up chunks.
Definition: FileMgr.cpp:946
void createDBMetaFile(const std::string &DBMetaFileName)
Definition: FileMgr.cpp:551
#define DB_META_FILENAME
Definition: FileMgr.cpp:43
bool openDBMetaFile(const std::string &DBMetaFileName)
Definition: FileMgr.cpp:562

+ Here is the call graph for this function:

void File_Namespace::FileMgr::deleteBuffer ( const ChunkKey key,
const bool  purge = true 
)
override

Deletes the chunk with the specified key.

Definition at line 653 of file FileMgr.cpp.

References chunkIndex_, chunkIndexMutex_, logger::FATAL, LOG, and show_chunk().

653  {
654  mapd_unique_lock<mapd_shared_mutex> chunkIndexWriteLock(chunkIndexMutex_);
655  auto chunkIt = chunkIndex_.find(key);
656  // ensure the Chunk exists
657  if (chunkIt == chunkIndex_.end()) {
658  LOG(FATAL) << "Chunk does not exist for key: " << show_chunk(key);
659  }
660  chunkIndexWriteLock.unlock();
661  // chunkIt->second->writeMetadata(-1); // writes -1 as epoch - signifies deleted
662  if (purge) {
663  chunkIt->second->freePages();
664  }
665  //@todo need a way to represent delete in non purge case
666  delete chunkIt->second;
667  chunkIndex_.erase(chunkIt);
668 }
#define LOG(tag)
Definition: Logger.h:188
std::string show_chunk(const ChunkKey &key)
Definition: types.h:73
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:222
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:256

+ Here is the call graph for this function:

void File_Namespace::FileMgr::deleteBuffersWithPrefix ( const ChunkKey keyPrefix,
const bool  purge = true 
)
override

Definition at line 670 of file FileMgr.cpp.

References chunkIndex_, and chunkIndexMutex_.

670  {
671  mapd_unique_lock<mapd_shared_mutex> chunkIndexWriteLock(chunkIndexMutex_);
672  auto chunkIt = chunkIndex_.lower_bound(keyPrefix);
673  if (chunkIt == chunkIndex_.end()) {
674  return; // should we throw?
675  }
676  while (chunkIt != chunkIndex_.end() &&
677  std::search(chunkIt->first.begin(),
678  chunkIt->first.begin() + keyPrefix.size(),
679  keyPrefix.begin(),
680  keyPrefix.end()) != chunkIt->first.begin() + keyPrefix.size()) {
681  /*
682  cout << "Freeing pages for chunk ";
683  for (auto vecIt = chunkIt->first.begin(); vecIt != chunkIt->first.end(); ++vecIt) {
684  std::cout << *vecIt << ",";
685  }
686  cout << endl;
687  */
688  if (purge) {
689  chunkIt->second->freePages();
690  }
691  //@todo need a way to represent delete in non purge case
692  delete chunkIt->second;
693  chunkIndex_.erase(chunkIt++);
694  }
695 }
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:222
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:256
int File_Namespace::FileMgr::epoch ( )
inline

Returns current value of epoch - should be one greater than recorded at last checkpoint.

Definition at line 201 of file FileMgr.h.

Referenced by File_Namespace::FileBuffer::append(), File_Namespace::FileInfo::freePage(), File_Namespace::FileInfo::openExistingFile(), File_Namespace::FileBuffer::reserve(), setEpoch(), and File_Namespace::FileBuffer::write().

201 { return epoch_; }
int epoch_
the index of the next file id
Definition: FileMgr.h:249

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::fetchBuffer ( const ChunkKey key,
AbstractBuffer destBuffer,
const size_t  numBytes 
)
override

Definition at line 705 of file FileMgr.cpp.

References chunkIndex_, chunkIndexMutex_, Data_Namespace::AbstractBuffer::copyTo(), logger::FATAL, Data_Namespace::AbstractBuffer::isDirty(), LOG, show_chunk(), and Data_Namespace::AbstractBuffer::size().

707  {
708  // reads chunk specified by ChunkKey into AbstractBuffer provided by
709  // destBuffer
710  if (destBuffer->isDirty()) {
711  LOG(FATAL)
712  << "Aborting attempt to fetch a chunk marked dirty. Chunk inconsistency for key: "
713  << show_chunk(key);
714  }
715  mapd_shared_lock<mapd_shared_mutex> chunkIndexReadLock(chunkIndexMutex_);
716  auto chunkIt = chunkIndex_.find(key);
717  if (chunkIt == chunkIndex_.end()) {
718  LOG(FATAL) << "Chunk does not exist for key: " << show_chunk(key);
719  }
720  chunkIndexReadLock.unlock();
721 
722  AbstractBuffer* chunk = chunkIt->second;
723  // chunk's size is either specified in function call with numBytes or we
724  // just look at pageSize * numPages in FileBuffer
725  if (numBytes > 0 && numBytes > chunk->size()) {
726  LOG(FATAL) << "Chunk retrieved for key `" << show_chunk(key) << "` is smaller ("
727  << chunk->size() << ") than number of bytes requested (" << numBytes
728  << ")";
729  }
730 
731  chunk->copyTo(destBuffer, numBytes);
732 }
#define LOG(tag)
Definition: Logger.h:188
std::string show_chunk(const ChunkKey &key)
Definition: types.h:73
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:222
An AbstractBuffer is a unit of data management for a data manager.
void copyTo(AbstractBuffer *destination_buffer, const size_t num_bytes=0)
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:256

+ Here is the call graph for this function:

void File_Namespace::FileMgr::free ( AbstractBuffer buffer)
override

Definition at line 797 of file FileMgr.cpp.

References logger::FATAL, and LOG.

Referenced by copyPage().

797  {
798  LOG(FATAL) << "Operation not supported";
799 }
#define LOG(tag)
Definition: Logger.h:188

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::free_page ( std::pair< FileInfo *, int > &&  page)

Definition at line 971 of file FileMgr.cpp.

References free_pages, and mutex_free_page.

Referenced by checkpoint(), and File_Namespace::FileInfo::freePage().

971  {
972  std::unique_lock<mapd_shared_mutex> lock(mutex_free_page);
973  free_pages.push_back(page);
974 }
std::vector< std::pair< FileInfo *, int > > free_pages
Definition: FileMgr.h:260
mapd_shared_mutex mutex_free_page
Definition: FileMgr.h:259

+ Here is the caller graph for this function:

const std::pair<const int, const int> File_Namespace::FileMgr::get_fileMgrKey ( ) const
inline

Definition at line 233 of file FileMgr.h.

Referenced by File_Namespace::FileInfo::openExistingFile().

233 { return fileMgrKey_; }
std::pair< const int, const int > fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:241

+ Here is the caller graph for this function:

size_t File_Namespace::FileMgr::getAllocated ( )
inlineoverride

Definition at line 152 of file FileMgr.h.

152 { return 0; }
FileBuffer * File_Namespace::FileMgr::getBuffer ( const ChunkKey key,
const size_t  numBytes = 0 
)
override

Returns the a pointer to the chunk with the specified key.

Definition at line 697 of file FileMgr.cpp.

References CHECK, chunkIndex_, chunkIndexMutex_, and show_chunk().

697  {
698  mapd_shared_lock<mapd_shared_mutex> chunkIndexReadLock(chunkIndexMutex_);
699  auto chunkIt = chunkIndex_.find(key);
700  CHECK(chunkIt != chunkIndex_.end())
701  << "Chunk does not exist for key: " << show_chunk(key);
702  return chunkIt->second;
703 }
std::string show_chunk(const ChunkKey &key)
Definition: types.h:73
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:222
#define CHECK(condition)
Definition: Logger.h:197
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:256

+ Here is the call graph for this function:

void File_Namespace::FileMgr::getChunkMetadataVecForKeyPrefix ( ChunkMetadataVector chunkMetadataVec,
const ChunkKey keyPrefix 
)
override

Definition at line 923 of file FileMgr.cpp.

References chunkIndex_, and chunkIndexMutex_.

924  {
925  mapd_unique_lock<mapd_shared_mutex> chunkIndexWriteLock(
926  chunkIndexMutex_); // is this guarding the right structure? it look slike we oly
927  // read here for chunk
928  auto chunkIt = chunkIndex_.lower_bound(keyPrefix);
929  if (chunkIt == chunkIndex_.end()) {
930  return; // throw?
931  }
932  while (chunkIt != chunkIndex_.end() &&
933  std::search(chunkIt->first.begin(),
934  chunkIt->first.begin() + keyPrefix.size(),
935  keyPrefix.begin(),
936  keyPrefix.end()) != chunkIt->first.begin() + keyPrefix.size()) {
937  if (chunkIt->second->hasEncoder()) {
938  auto chunk_metadata = std::make_shared<ChunkMetadata>();
939  chunkIt->second->encoder_->getMetadata(chunk_metadata);
940  chunkMetadataVec.emplace_back(chunkIt->first, chunk_metadata);
941  }
942  chunkIt++;
943  }
944 }
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:222
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:256
bool File_Namespace::FileMgr::getDBConvert ( ) const

Definition at line 950 of file FileMgr.cpp.

References File_Namespace::GlobalFileMgr::getDBConvert(), and gfm_.

950  {
951  return gfm_->getDBConvert();
952 }
GlobalFileMgr * gfm_
Definition: FileMgr.h:240

+ Here is the call graph for this function:

int File_Namespace::FileMgr::getDBVersion ( ) const

Index for looking up chunks.

Definition at line 946 of file FileMgr.cpp.

References File_Namespace::GlobalFileMgr::getDBVersion(), and gfm_.

Referenced by createDBMetaFile(), createTopLevelMetadata(), and writeAndSyncDBMetaToDisk().

946  {
947  return gfm_->getDBVersion();
948 }
GlobalFileMgr * gfm_
Definition: FileMgr.h:240

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

FILE * File_Namespace::FileMgr::getFileForFileId ( const int  fileId)

Returns FILE pointer associated with requested fileId.

See Also
FileBuffer

Definition at line 918 of file FileMgr.cpp.

References CHECK, and files_.

Referenced by File_Namespace::FileBuffer::readMetadata(), and File_Namespace::FileBuffer::writeMetadata().

918  {
919  CHECK(fileId >= 0 && static_cast<size_t>(fileId) < files_.size());
920  return files_[fileId]->f;
921 }
std::vector< FileInfo * > files_
Definition: FileMgr.h:244
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the caller graph for this function:

FileInfo* File_Namespace::FileMgr::getFileInfoForFileId ( const int  fileId)
inline

Definition at line 155 of file FileMgr.h.

Referenced by File_Namespace::FileBuffer::append(), File_Namespace::FileBuffer::copyPage(), copyPage(), File_Namespace::FileBuffer::freeChunkPages(), File_Namespace::FileBuffer::freeMetadataPages(), File_Namespace::readForThread(), File_Namespace::FileBuffer::write(), and File_Namespace::FileBuffer::writeHeader().

155 { return files_[fileId]; }
std::vector< FileInfo * > files_
Definition: FileMgr.h:244

+ Here is the caller graph for this function:

std::string File_Namespace::FileMgr::getFileMgrBasePath ( ) const
inline

Definition at line 227 of file FileMgr.h.

Referenced by closeRemovePhysical().

227 { return fileMgrBasePath_; }
std::string fileMgrBasePath_
Definition: FileMgr.h:242

+ Here is the caller graph for this function:

size_t File_Namespace::FileMgr::getInUseSize ( )
inlineoverride

Definition at line 151 of file FileMgr.h.

151 { return 0; }
size_t File_Namespace::FileMgr::getMaxSize ( )
inlineoverride

Definition at line 150 of file FileMgr.h.

150 { return 0; }
MgrType File_Namespace::FileMgr::getMgrType ( )
inlineoverride

Definition at line 145 of file FileMgr.h.

145 { return FILE_MGR; };
size_t File_Namespace::FileMgr::getNumChunks ( )
inlineoverride

Definition at line 218 of file FileMgr.h.

218  {
219  // @todo should be locked - but this is more for testing now
220  return chunkIndex_.size();
221  }
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:222
size_t File_Namespace::FileMgr::getNumReaderThreads ( )
inline

Returns number of threads defined by parameter num-reader-threads which should be used during initial load and consequent read of data.

Definition at line 207 of file FileMgr.h.

Referenced by File_Namespace::FileBuffer::read().

207 { return num_reader_threads_; }
size_t num_reader_threads_
Maps page sizes to FileInfo objects.
Definition: FileMgr.h:246

+ Here is the caller graph for this function:

std::string File_Namespace::FileMgr::getStringMgrType ( )
inlineoverride

Definition at line 146 of file FileMgr.h.

146 { return ToString(FILE_MGR); }
void File_Namespace::FileMgr::init ( const size_t  num_reader_threads)

Definition at line 137 of file FileMgr.cpp.

References CHECK_EQ, chunkIndex_, createEpochFile(), epoch_, EPOCH_FILENAME, logger::FATAL, omnisci::file_size(), fileMgrBasePath_, fileMgrKey_, File_Namespace::fileSize(), File_Namespace::GlobalFileMgr::getBasePath(), gfm_, File_Namespace::headerCompare(), logger::INFO, LOG, MAPD_FILE_EXT, nextFileId_, num_reader_threads_, openEpochFile(), openExistingFile(), processFileFutures(), timer_start(), timer_stop(), to_string(), and VLOG.

Referenced by FileMgr().

137  {
138  // if epoch = -1 this means open from epoch file
139  const std::string fileMgrDirPrefix("table");
140  const std::string FileMgrDirDelim("_");
141  fileMgrBasePath_ = (gfm_->getBasePath() + fileMgrDirPrefix + FileMgrDirDelim +
142  std::to_string(fileMgrKey_.first) + // db_id
143  FileMgrDirDelim + std::to_string(fileMgrKey_.second)); // tb_id
144  boost::filesystem::path path(fileMgrBasePath_);
145  if (boost::filesystem::exists(path)) {
146  if (!boost::filesystem::is_directory(path)) {
147  LOG(FATAL) << "Specified path '" << fileMgrBasePath_
148  << "' for table data is not a directory.";
149  }
150  if (epoch_ != -1) { // if opening at previous epoch
151  int epochCopy = epoch_;
153  epoch_ = epochCopy;
154  } else {
156  }
157 
158  auto clock_begin = timer_start();
159 
160  boost::filesystem::directory_iterator
161  endItr; // default construction yields past-the-end
162  int maxFileId = -1;
163  int fileCount = 0;
164  int threadCount = std::thread::hardware_concurrency();
165  std::vector<HeaderInfo> headerVec;
166  std::vector<std::future<std::vector<HeaderInfo>>> file_futures;
167  for (boost::filesystem::directory_iterator fileIt(path); fileIt != endItr; ++fileIt) {
168  if (boost::filesystem::is_regular_file(fileIt->status())) {
169  // note that boost::filesystem leaves preceding dot on
170  // extension - hence MAPD_FILE_EXT is ".mapd"
171  std::string extension(fileIt->path().extension().string());
172 
173  if (extension == MAPD_FILE_EXT) {
174  std::string fileStem(fileIt->path().stem().string());
175  // remove trailing dot if any
176  if (fileStem.size() > 0 && fileStem.back() == '.') {
177  fileStem = fileStem.substr(0, fileStem.size() - 1);
178  }
179  size_t dotPos = fileStem.find_last_of("."); // should only be one
180  if (dotPos == std::string::npos) {
181  LOG(FATAL) << "File `" << fileIt->path()
182  << "` does not carry page size information in the filename.";
183  }
184  int fileId = boost::lexical_cast<int>(fileStem.substr(0, dotPos));
185  if (fileId > maxFileId) {
186  maxFileId = fileId;
187  }
188  size_t pageSize =
189  boost::lexical_cast<size_t>(fileStem.substr(dotPos + 1, fileStem.size()));
190  std::string filePath(fileIt->path().string());
191  size_t fileSize = boost::filesystem::file_size(filePath);
192  CHECK_EQ(fileSize % pageSize, size_t(0)); // should be no partial pages
193  size_t numPages = fileSize / pageSize;
194 
195  VLOG(4) << "File id: " << fileId << " Page size: " << pageSize
196  << " Num pages: " << numPages;
197 
198  file_futures.emplace_back(std::async(
199  std::launch::async, [filePath, fileId, pageSize, numPages, this] {
200  std::vector<HeaderInfo> tempHeaderVec;
201  openExistingFile(filePath, fileId, pageSize, numPages, tempHeaderVec);
202  return tempHeaderVec;
203  }));
204  fileCount++;
205  if (fileCount % threadCount == 0) {
206  processFileFutures(file_futures, headerVec);
207  }
208  }
209  }
210  }
211 
212  if (file_futures.size() > 0) {
213  processFileFutures(file_futures, headerVec);
214  }
215  int64_t queue_time_ms = timer_stop(clock_begin);
216 
217  LOG(INFO) << "Completed Reading table's file metadata, Elapsed time : "
218  << queue_time_ms << "ms Epoch: " << epoch_ << " files read: " << fileCount
219  << " table location: '" << fileMgrBasePath_ << "'";
220 
221  /* Sort headerVec so that all HeaderInfos
222  * from a chunk will be grouped together
223  * and in order of increasing PageId
224  * - Version Epoch */
225 
226  std::sort(headerVec.begin(), headerVec.end(), headerCompare);
227 
228  /* Goal of next section is to find sequences in the
229  * sorted headerVec of the same ChunkId, which we
230  * can then initiate a FileBuffer with */
231 
232  VLOG(4) << "Number of Headers in Vector: " << headerVec.size();
233  if (headerVec.size() > 0) {
234  ChunkKey lastChunkKey = headerVec.begin()->chunkKey;
235  auto startIt = headerVec.begin();
236 
237  for (auto headerIt = headerVec.begin() + 1; headerIt != headerVec.end();
238  ++headerIt) {
239  // for (auto chunkIt = headerIt->chunkKey.begin(); chunkIt !=
240  // headerIt->chunkKey.end(); ++chunkIt) {
241  // std::cout << *chunkIt << " ";
242  //}
243 
244  if (headerIt->chunkKey != lastChunkKey) {
245  chunkIndex_[lastChunkKey] =
246  new FileBuffer(this, /*pageSize,*/ lastChunkKey, startIt, headerIt);
247  /*
248  if (startIt->versionEpoch != -1) {
249  cout << "not skipping bc version != -1" << endl;
250  // -1 means that chunk was deleted
251  // lets not read it in
252  chunkIndex_[lastChunkKey] = new FileBuffer
253  (this,/lastChunkKey,startIt,headerIt);
254 
255  }
256  else {
257  cout << "Skipping bc version == -1" << endl;
258  }
259  */
260  lastChunkKey = headerIt->chunkKey;
261  startIt = headerIt;
262  }
263  }
264  // now need to insert last Chunk
265  // size_t pageSize = files_[startIt->page.fileId]->pageSize;
266  // cout << "Inserting last chunk" << endl;
267  // if (startIt->versionEpoch != -1) {
268  chunkIndex_[lastChunkKey] =
269  new FileBuffer(this, /*pageSize,*/ lastChunkKey, startIt, headerVec.end());
270  //}
271  }
272  nextFileId_ = maxFileId + 1;
273  // std::cout << "next file id: " << nextFileId_ << std::endl;
274  } else {
275  if (!boost::filesystem::create_directory(path)) {
276  LOG(FATAL) << "Could not create data directory: " << path;
277  }
279  }
280 
281  /* define number of reader threads to be used */
282  size_t num_hardware_based_threads =
283  std::thread::hardware_concurrency(); // # of threads is based on # of cores on the
284  // host
285  if (num_reader_threads == 0) { // # of threads has not been defined by user
286  num_reader_threads_ = num_hardware_based_threads;
287  } else {
288  if (num_reader_threads > num_hardware_based_threads) {
289  num_reader_threads_ = num_hardware_based_threads;
290  } else {
291  num_reader_threads_ = num_reader_threads;
292  }
293  }
294 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
std::vector< int > ChunkKey
Definition: types.h:37
std::string getBasePath() const
void createEpochFile(const std::string &epochFileName)
Definition: FileMgr.cpp:503
void openEpochFile(const std::string &epochFileName)
Definition: FileMgr.cpp:514
#define LOG(tag)
Definition: Logger.h:188
#define MAPD_FILE_EXT
Definition: File.h:25
TypeR::rep timer_stop(Type clock_begin)
Definition: measure.h:48
GlobalFileMgr * gfm_
Definition: FileMgr.h:240
std::string fileMgrBasePath_
Definition: FileMgr.h:242
bool headerCompare(const HeaderInfo &firstElem, const HeaderInfo &secondElem)
Definition: FileMgr.cpp:49
std::string to_string(char const *&&v)
void processFileFutures(std::vector< std::future< std::vector< HeaderInfo >>> &file_futures, std::vector< HeaderInfo > &headerVec)
Definition: FileMgr.cpp:296
int epoch_
the index of the next file id
Definition: FileMgr.h:249
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:222
size_t num_reader_threads_
Maps page sizes to FileInfo objects.
Definition: FileMgr.h:246
size_t fileSize(FILE *f)
Returns the size of the specified file.
Definition: File.cpp:176
#define EPOCH_FILENAME
Definition: FileMgr.cpp:42
std::pair< const int, const int > fileMgrKey_
Global FileMgr.
Definition: FileMgr.h:241
#define VLOG(n)
Definition: Logger.h:291
Type timer_start()
Definition: measure.h:42
FileInfo * openExistingFile(const std::string &path, const int fileId, const size_t pageSize, const size_t numPages, std::vector< HeaderInfo > &headerVec)
Definition: FileMgr.cpp:870
size_t file_size(const int fd)
Definition: omnisci_fs.cpp:31

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::init ( const std::string  dataPathToConvertFrom)

Definition at line 310 of file FileMgr.cpp.

References CHECK, chunkIndex_, copyPage(), epoch_, EPOCH_FILENAME, File_Namespace::MultiPage::epochs, logger::FATAL, omnisci::file_size(), File_Namespace::fileSize(), File_Namespace::GlobalFileMgr::getFileMgr(), File_Namespace::FileBuffer::getMultiPage(), gfm_, File_Namespace::headerCompare(), LOG, MAPD_FILE_EXT, File_Namespace::FileBuffer::multiPages_, nextFileId_, openEpochFile(), openExistingFile(), File_Namespace::FileBuffer::pageDataSize(), File_Namespace::FileBuffer::pageSize(), processFileFutures(), requestFreePage(), File_Namespace::FileBuffer::reservedHeaderSize(), Data_Namespace::AbstractBuffer::setDirty(), Data_Namespace::AbstractBuffer::setSize(), Data_Namespace::AbstractBuffer::size(), Data_Namespace::AbstractBuffer::syncEncoder(), and File_Namespace::FileBuffer::writeHeader().

310  {
311  int converted_data_epoch = 0;
312  boost::filesystem::path path(dataPathToConvertFrom);
313  if (boost::filesystem::exists(path)) {
314  if (!boost::filesystem::is_directory(path)) {
315  LOG(FATAL) << "Specified path `" << path << "` is not a directory.";
316  }
317 
318  if (epoch_ != -1) { // if opening at previous epoch
319  int epochCopy = epoch_;
321  epoch_ = epochCopy;
322  } else {
324  }
325 
326  boost::filesystem::directory_iterator
327  endItr; // default construction yields past-the-end
328  int maxFileId = -1;
329  int fileCount = 0;
330  int threadCount = std::thread::hardware_concurrency();
331  std::vector<HeaderInfo> headerVec;
332  std::vector<std::future<std::vector<HeaderInfo>>> file_futures;
333  for (boost::filesystem::directory_iterator fileIt(path); fileIt != endItr; ++fileIt) {
334  if (boost::filesystem::is_regular_file(fileIt->status())) {
335  // note that boost::filesystem leaves preceding dot on
336  // extension - hence MAPD_FILE_EXT is ".mapd"
337  std::string extension(fileIt->path().extension().string());
338 
339  if (extension == MAPD_FILE_EXT) {
340  std::string fileStem(fileIt->path().stem().string());
341  // remove trailing dot if any
342  if (fileStem.size() > 0 && fileStem.back() == '.') {
343  fileStem = fileStem.substr(0, fileStem.size() - 1);
344  }
345  size_t dotPos = fileStem.find_last_of("."); // should only be one
346  if (dotPos == std::string::npos) {
347  LOG(FATAL) << "File `" + fileIt->path().string() +
348  "` does not carry page size information in the filename.";
349  }
350  int fileId = boost::lexical_cast<int>(fileStem.substr(0, dotPos));
351  if (fileId > maxFileId) {
352  maxFileId = fileId;
353  }
354  size_t pageSize =
355  boost::lexical_cast<size_t>(fileStem.substr(dotPos + 1, fileStem.size()));
356  std::string filePath(fileIt->path().string());
357  size_t fileSize = boost::filesystem::file_size(filePath);
358  CHECK(fileSize % pageSize == 0); // should be no partial pages
359  size_t numPages = fileSize / pageSize;
360 
361  file_futures.emplace_back(std::async(
362  std::launch::async, [filePath, fileId, pageSize, numPages, this] {
363  std::vector<HeaderInfo> tempHeaderVec;
364  openExistingFile(filePath, fileId, pageSize, numPages, tempHeaderVec);
365  return tempHeaderVec;
366  }));
367  fileCount++;
368  if (fileCount % threadCount) {
369  processFileFutures(file_futures, headerVec);
370  }
371  }
372  }
373  }
374 
375  if (file_futures.size() > 0) {
376  processFileFutures(file_futures, headerVec);
377  }
378 
379  /* Sort headerVec so that all HeaderInfos
380  * from a chunk will be grouped together
381  * and in order of increasing PageId
382  * - Version Epoch */
383 
384  std::sort(headerVec.begin(), headerVec.end(), headerCompare);
385 
386  /* Goal of next section is to find sequences in the
387  * sorted headerVec of the same ChunkId, which we
388  * can then initiate a FileBuffer with */
389 
390  if (headerVec.size() > 0) {
391  ChunkKey lastChunkKey = headerVec.begin()->chunkKey;
392  auto startIt = headerVec.begin();
393 
394  for (auto headerIt = headerVec.begin() + 1; headerIt != headerVec.end();
395  ++headerIt) {
396  if (headerIt->chunkKey != lastChunkKey) {
397  FileMgr* c_fm_ =
398  dynamic_cast<File_Namespace::FileMgr*>(gfm_->getFileMgr(lastChunkKey));
399  CHECK(c_fm_);
400  FileBuffer* srcBuf = new FileBuffer(this, lastChunkKey, startIt, headerIt);
401  chunkIndex_[lastChunkKey] = srcBuf;
402  FileBuffer* destBuf = new FileBuffer(c_fm_, srcBuf->pageSize(), lastChunkKey);
403  c_fm_->chunkIndex_[lastChunkKey] = destBuf;
404  destBuf->syncEncoder(srcBuf);
405  destBuf->setSize(srcBuf->size());
406  destBuf->setDirty(); // this needs to be set to force writing out metadata
407  // files from "checkpoint()" call
408 
409  size_t totalNumPages = srcBuf->getMultiPage().size();
410  for (size_t pageNum = 0; pageNum < totalNumPages; pageNum++) {
411  Page srcPage = srcBuf->getMultiPage()[pageNum].current();
412  Page destPage = c_fm_->requestFreePage(
413  srcBuf->pageSize(),
414  false); // may modify and use api "FileBuffer::addNewMultiPage" instead
415  MultiPage multiPage(srcBuf->pageSize());
416  multiPage.epochs.push_back(converted_data_epoch);
417  multiPage.pageVersions.push_back(destPage);
418  destBuf->multiPages_.push_back(multiPage);
419  size_t reservedHeaderSize = srcBuf->reservedHeaderSize();
420  copyPage(
421  srcPage, c_fm_, destPage, reservedHeaderSize, srcBuf->pageDataSize(), 0);
422  destBuf->writeHeader(destPage, pageNum, converted_data_epoch, false);
423  }
424  lastChunkKey = headerIt->chunkKey;
425  startIt = headerIt;
426  }
427  }
428 
429  // now need to insert last Chunk
430  FileMgr* c_fm_ =
431  dynamic_cast<File_Namespace::FileMgr*>(gfm_->getFileMgr(lastChunkKey));
432  FileBuffer* srcBuf = new FileBuffer(this, lastChunkKey, startIt, headerVec.end());
433  chunkIndex_[lastChunkKey] = srcBuf;
434  FileBuffer* destBuf = new FileBuffer(c_fm_, srcBuf->pageSize(), lastChunkKey);
435  c_fm_->chunkIndex_[lastChunkKey] = destBuf;
436  destBuf->syncEncoder(srcBuf);
437  destBuf->setSize(srcBuf->size());
438  destBuf->setDirty(); // this needs to be set to write out metadata file from the
439  // "checkpoint()" call
440 
441  size_t totalNumPages = srcBuf->getMultiPage().size();
442  for (size_t pageNum = 0; pageNum < totalNumPages; pageNum++) {
443  Page srcPage = srcBuf->getMultiPage()[pageNum].current();
444  Page destPage = c_fm_->requestFreePage(
445  srcBuf->pageSize(),
446  false); // may modify and use api "FileBuffer::addNewMultiPage" instead
447  MultiPage multiPage(srcBuf->pageSize());
448  multiPage.epochs.push_back(converted_data_epoch);
449  multiPage.pageVersions.push_back(destPage);
450  destBuf->multiPages_.push_back(multiPage);
451  size_t reservedHeaderSize = srcBuf->reservedHeaderSize();
452  copyPage(srcPage, c_fm_, destPage, reservedHeaderSize, srcBuf->pageDataSize(), 0);
453  destBuf->writeHeader(destPage, pageNum, converted_data_epoch, false);
454  }
455  }
456  nextFileId_ = maxFileId + 1;
457  } else {
458  if (!boost::filesystem::create_directory(path)) {
459  LOG(FATAL) << "Specified path does not exist: " << path;
460  }
461  }
462 }
std::vector< int > ChunkKey
Definition: types.h:37
void openEpochFile(const std::string &epochFileName)
Definition: FileMgr.cpp:514
#define LOG(tag)
Definition: Logger.h:188
void copyPage(Page &srcPage, FileMgr *destFileMgr, Page &destPage, const size_t reservedHeaderSize, const size_t numBytes, const size_t offset)
Definition: FileMgr.cpp:481
#define MAPD_FILE_EXT
Definition: File.h:25
GlobalFileMgr * gfm_
Definition: FileMgr.h:240
bool headerCompare(const HeaderInfo &firstElem, const HeaderInfo &secondElem)
Definition: FileMgr.cpp:49
FileMgr(const int deviceId, GlobalFileMgr *gfm, const std::pair< const int, const int > fileMgrKey, const size_t num_reader_threads=0, const int epoch=-1, const size_t defaultPageSize=2097152)
Constructor.
Definition: FileMgr.cpp:67
void processFileFutures(std::vector< std::future< std::vector< HeaderInfo >>> &file_futures, std::vector< HeaderInfo > &headerVec)
Definition: FileMgr.cpp:296
int epoch_
the index of the next file id
Definition: FileMgr.h:249
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:222
AbstractBufferMgr * getFileMgr(const int db_id, const int tb_id)
size_t fileSize(FILE *f)
Returns the size of the specified file.
Definition: File.cpp:176
#define CHECK(condition)
Definition: Logger.h:197
#define EPOCH_FILENAME
Definition: FileMgr.cpp:42
FileInfo * openExistingFile(const std::string &path, const int fileId, const size_t pageSize, const size_t numPages, std::vector< HeaderInfo > &headerVec)
Definition: FileMgr.cpp:870
size_t file_size(const int fd)
Definition: omnisci_fs.cpp:31

+ Here is the call graph for this function:

bool File_Namespace::FileMgr::isAllocationCapped ( )
inlineoverride

Definition at line 153 of file FileMgr.h.

153 { return false; }
bool File_Namespace::FileMgr::isBufferOnDevice ( const ChunkKey key)
override

Definition at line 648 of file FileMgr.cpp.

References chunkIndex_, and chunkIndexMutex_.

648  {
649  mapd_shared_lock<mapd_shared_mutex> chunkIndexReadLock(chunkIndexMutex_);
650  return chunkIndex_.find(key) != chunkIndex_.end();
651 }
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:222
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:256
bool File_Namespace::FileMgr::openDBMetaFile ( const std::string &  DBMetaFileName)
private

Definition at line 562 of file FileMgr.cpp.

References db_version_, DBMetaFile_, omnisci::file_size(), fileMgrBasePath_, File_Namespace::open(), and File_Namespace::read().

Referenced by createTopLevelMetadata().

562  {
563  std::string DBMetaFilePath(fileMgrBasePath_ + "/" + DBMetaFileName);
564 
565  if (!boost::filesystem::exists(DBMetaFilePath)) {
566  // LOG(INFO) << "DB metadata file does not exist, one will be created.";
567  return false;
568  }
569  if (!boost::filesystem::is_regular_file(DBMetaFilePath)) {
570  // LOG(INFO) << "DB metadata file is not a regular file, one will be created.";
571  return false;
572  }
573  if (boost::filesystem::file_size(DBMetaFilePath) < 4) {
574  // LOG(INFO) << "DB metadata file is not sized properly, one will be created.";
575  return false;
576  }
577  DBMetaFile_ = open(DBMetaFilePath);
578  read(DBMetaFile_, 0, sizeof(int), (int8_t*)&db_version_);
579 
580  return true;
581 }
std::string fileMgrBasePath_
Definition: FileMgr.h:242
size_t read(FILE *f, const size_t offset, const size_t size, int8_t *buf)
Reads the specified number of bytes from the offset position in file f into buf.
Definition: File.cpp:118
FILE * open(int fileId)
Opens/creates the file with the given id; returns NULL on error.
Definition: File.cpp:88
size_t file_size(const int fd)
Definition: omnisci_fs.cpp:31

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::openEpochFile ( const std::string &  epochFileName)
private

Definition at line 514 of file FileMgr.cpp.

References epoch_, epochFile_, logger::FATAL, omnisci::file_size(), fileMgrBasePath_, LOG, File_Namespace::open(), and File_Namespace::read().

Referenced by init().

514  {
515  std::string epochFilePath(fileMgrBasePath_ + "/" + epochFileName);
516  if (!boost::filesystem::exists(epochFilePath)) {
517  LOG(FATAL) << "Epoch file `" << epochFilePath << "` does not exist";
518  }
519  if (!boost::filesystem::is_regular_file(epochFilePath)) {
520  LOG(FATAL) << "Epoch file `" << epochFilePath << "` is not a regular file";
521  }
522  if (boost::filesystem::file_size(epochFilePath) < 4) {
523  LOG(FATAL) << "Epoch file `" << epochFilePath
524  << "` is not sized properly (current size: "
525  << boost::filesystem::file_size(epochFilePath) << ", expected size: 4)";
526  }
527  epochFile_ = open(epochFilePath);
528  read(epochFile_, 0, sizeof(int), (int8_t*)&epoch_);
529  // std::cout << "Epoch after open file: " << epoch_ << std::endl;
530  epoch_++; // we are in new epoch from last checkpoint
531 }
#define LOG(tag)
Definition: Logger.h:188
std::string fileMgrBasePath_
Definition: FileMgr.h:242
FILE * epochFile_
the current epoch (time of last checkpoint)
Definition: FileMgr.h:250
size_t read(FILE *f, const size_t offset, const size_t size, int8_t *buf)
Reads the specified number of bytes from the offset position in file f into buf.
Definition: File.cpp:118
int epoch_
the index of the next file id
Definition: FileMgr.h:249
FILE * open(int fileId)
Opens/creates the file with the given id; returns NULL on error.
Definition: File.cpp:88
size_t file_size(const int fd)
Definition: omnisci_fs.cpp:31

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

FileInfo * File_Namespace::FileMgr::openExistingFile ( const std::string &  path,
const int  fileId,
const size_t  pageSize,
const size_t  numPages,
std::vector< HeaderInfo > &  headerVec 
)
private

Definition at line 870 of file FileMgr.cpp.

References epoch_, fileIndex_, files_, files_rw_mutex_, File_Namespace::open(), and File_Namespace::FileInfo::openExistingFile().

Referenced by init().

874  {
875  FILE* f = open(path);
876  FileInfo* fInfo = new FileInfo(
877  this, fileId, f, pageSize, numPages, false); // false means don't init file
878 
879  fInfo->openExistingFile(headerVec, epoch_);
880  mapd_unique_lock<mapd_shared_mutex> write_lock(files_rw_mutex_);
881  if (fileId >= static_cast<int>(files_.size())) {
882  files_.resize(fileId + 1);
883  }
884  files_[fileId] = fInfo;
885  fileIndex_.insert(std::pair<size_t, int>(pageSize, fileId));
886  return fInfo;
887 }
std::vector< FileInfo * > files_
Definition: FileMgr.h:244
int epoch_
the index of the next file id
Definition: FileMgr.h:249
PageSizeFileMMap fileIndex_
A vector of files accessible via a file identifier.
Definition: FileMgr.h:245
FILE * open(int fileId)
Opens/creates the file with the given id; returns NULL on error.
Definition: File.cpp:88
mapd_unique_lock< mapd_shared_mutex > write_lock
mapd_shared_mutex files_rw_mutex_
Definition: FileMgr.h:257

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::string File_Namespace::FileMgr::printSlabs ( )
inlineoverride

Definition at line 147 of file FileMgr.h.

147 { return "Not Implemented"; }
void File_Namespace::FileMgr::processFileFutures ( std::vector< std::future< std::vector< HeaderInfo >>> &  file_futures,
std::vector< HeaderInfo > &  headerVec 
)
private

Definition at line 296 of file FileMgr.cpp.

Referenced by init().

298  {
299  for (auto& file_future : file_futures) {
300  file_future.wait();
301  }
302  // concatenate the vectors after thread completes
303  for (auto& file_future : file_futures) {
304  auto tempHeaderVec = file_future.get();
305  headerVec.insert(headerVec.end(), tempHeaderVec.begin(), tempHeaderVec.end());
306  }
307  file_futures.clear();
308 }

+ Here is the caller graph for this function:

FileBuffer * File_Namespace::FileMgr::putBuffer ( const ChunkKey key,
AbstractBuffer d,
const size_t  numBytes = 0 
)
override

Puts the contents of d into the Chunk with the given key.

Parameters
key- Unique identifier for a Chunk.
d- An object representing the source data for the Chunk.
Returns
AbstractBuffer*

Definition at line 734 of file FileMgr.cpp.

References File_Namespace::FileBuffer::append(), CHECK_LT, chunkIndex_, chunkIndexMutex_, Data_Namespace::AbstractBuffer::clearDirtyBits(), createBufferUnlocked(), defaultPageSize_, logger::FATAL, Data_Namespace::AbstractBuffer::getDeviceId(), Data_Namespace::AbstractBuffer::getMemoryPtr(), Data_Namespace::AbstractBuffer::getType(), Data_Namespace::AbstractBuffer::isAppended(), Data_Namespace::AbstractBuffer::isDirty(), Data_Namespace::AbstractBuffer::isUpdated(), LOG, Data_Namespace::AbstractBuffer::setSize(), show_chunk(), Data_Namespace::AbstractBuffer::size(), Data_Namespace::AbstractBuffer::syncEncoder(), UNREACHABLE, and File_Namespace::FileBuffer::write().

736  {
737  // obtain a pointer to the Chunk
738  mapd_unique_lock<mapd_shared_mutex> chunkIndexWriteLock(chunkIndexMutex_);
739  auto chunkIt = chunkIndex_.find(key);
740  FileBuffer* chunk;
741  if (chunkIt == chunkIndex_.end()) {
743  } else {
744  chunk = chunkIt->second;
745  }
746  chunkIndexWriteLock.unlock();
747  size_t oldChunkSize = chunk->size();
748  // write the buffer's data to the Chunk
749  // size_t newChunkSize = numBytes == 0 ? srcBuffer->size() : numBytes;
750  size_t newChunkSize = numBytes == 0 ? srcBuffer->size() : numBytes;
751  if (chunk->isDirty()) {
752  // multiple appends are allowed,
753  // but only single update is allowed
754  if (srcBuffer->isUpdated() && chunk->isUpdated()) {
755  LOG(FATAL) << "Aborting attempt to write a chunk marked dirty. Chunk inconsistency "
756  "for key: "
757  << show_chunk(key);
758  }
759  }
760  if (srcBuffer->isUpdated()) {
761  // chunk size is not changed when fixed rows are updated or are marked as deleted.
762  // but when rows are vacuumed or varlen rows are updated (new rows are appended),
763  // chunk size will change. For vacuum, checkpoint should sync size from cpu to disk.
764  // For varlen update, it takes another route via fragmenter using disk-level buffer.
765  if (0 == numBytes && !chunk->isDirty()) {
766  chunk->setSize(newChunkSize);
767  }
768  //@todo use dirty flags to only flush pages of chunk that need to
769  // be flushed
770  chunk->write((int8_t*)srcBuffer->getMemoryPtr(),
771  newChunkSize,
772  0,
773  srcBuffer->getType(),
774  srcBuffer->getDeviceId());
775  } else if (srcBuffer->isAppended()) {
776  CHECK_LT(oldChunkSize, newChunkSize);
777  chunk->append((int8_t*)srcBuffer->getMemoryPtr() + oldChunkSize,
778  newChunkSize - oldChunkSize,
779  srcBuffer->getType(),
780  srcBuffer->getDeviceId());
781  } else {
782  UNREACHABLE() << "putBuffer() expects a buffer marked is_updated or is_appended";
783  }
784  // chunk->clearDirtyBits(); // Hack: because write and append will set dirty bits
785  //@todo commenting out line above will make sure this metadata is set
786  // but will trigger error on fetch chunk
787  srcBuffer->clearDirtyBits();
788  chunk->syncEncoder(srcBuffer);
789  return chunk;
790 }
#define LOG(tag)
Definition: Logger.h:188
#define UNREACHABLE()
Definition: Logger.h:241
std::string show_chunk(const ChunkKey &key)
Definition: types.h:73
ChunkKeyToChunkMap chunkIndex_
Definition: FileMgr.h:222
#define CHECK_LT(x, y)
Definition: Logger.h:207
size_t defaultPageSize_
number of threads used when loading data
Definition: FileMgr.h:247
FileBuffer * createBufferUnlocked(const ChunkKey &key, size_t pageSize=0, const size_t numBytes=0)
Definition: FileMgr.cpp:630
mapd_shared_mutex chunkIndexMutex_
Definition: FileMgr.h:256

+ Here is the call graph for this function:

void File_Namespace::FileMgr::removeTableRelatedDS ( const int  db_id,
const int  table_id 
)
override

Definition at line 976 of file FileMgr.cpp.

References UNREACHABLE.

976  {
977  UNREACHABLE();
978 }
#define UNREACHABLE()
Definition: Logger.h:241
Page File_Namespace::FileMgr::requestFreePage ( size_t  pagesize,
const bool  isMetadata 
)

Definition at line 801 of file FileMgr.cpp.

References CHECK, createFile(), File_Namespace::FileInfo::fileId, fileIndex_, files_, File_Namespace::FileInfo::getFreePage(), getPageMutex_, MAX_FILE_N_METADATA_PAGES, and MAX_FILE_N_PAGES.

Referenced by File_Namespace::FileBuffer::addNewMultiPage(), init(), File_Namespace::FileBuffer::write(), and File_Namespace::FileBuffer::writeMetadata().

801  {
802  std::lock_guard<std::mutex> lock(getPageMutex_);
803 
804  auto candidateFiles = fileIndex_.equal_range(pageSize);
805  int pageNum = -1;
806  for (auto fileIt = candidateFiles.first; fileIt != candidateFiles.second; ++fileIt) {
807  FileInfo* fileInfo = files_[fileIt->second];
808  pageNum = fileInfo->getFreePage();
809  if (pageNum != -1) {
810  return (Page(fileInfo->fileId, pageNum));
811  }
812  }
813  // if here then we need to add a file
814  FileInfo* fileInfo;
815  if (isMetadata) {
816  fileInfo = createFile(pageSize, MAX_FILE_N_METADATA_PAGES);
817  } else {
818  fileInfo = createFile(pageSize, MAX_FILE_N_PAGES);
819  }
820  pageNum = fileInfo->getFreePage();
821  CHECK(pageNum != -1);
822  return (Page(fileInfo->fileId, pageNum));
823 }
std::mutex getPageMutex_
pointer to DB level metadata
Definition: FileMgr.h:255
std::vector< FileInfo * > files_
Definition: FileMgr.h:244
#define MAX_FILE_N_METADATA_PAGES
Definition: File.h:27
PageSizeFileMMap fileIndex_
A vector of files accessible via a file identifier.
Definition: FileMgr.h:245
#define MAX_FILE_N_PAGES
Definition: File.h:26
FileInfo * createFile(const size_t pageSize, const size_t numPages)
Adds a file to the file manager repository.
Definition: FileMgr.cpp:889
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void File_Namespace::FileMgr::requestFreePages ( size_t  npages,
size_t  pagesize,
std::vector< Page > &  pages,
const bool  isMetadata 
)

Obtains free pages – creates new files if necessary – of the requested size.

Given a page size and number of pages, this method updates the vector "pages" to include free pages of the requested size. These pages are immediately removed from the free list of the affected file(s). If there are not enough pages available among current files, new files are created and their pages are included in the vector.

Parameters
npagesThe number of free pages requested
pagesizeThe size of each requested page
pagesA vector containing the free pages obtained by this method

Definition at line 825 of file FileMgr.cpp.

References CHECK, createFile(), File_Namespace::FileInfo::fileId, fileIndex_, files_, File_Namespace::FileInfo::getFreePage(), getPageMutex_, MAX_FILE_N_METADATA_PAGES, and MAX_FILE_N_PAGES.

828  {
829  // not used currently
830  // @todo add method to FileInfo to get more than one page
831  std::lock_guard<std::mutex> lock(getPageMutex_);
832  auto candidateFiles = fileIndex_.equal_range(pageSize);
833  size_t numPagesNeeded = numPagesRequested;
834  for (auto fileIt = candidateFiles.first; fileIt != candidateFiles.second; ++fileIt) {
835  FileInfo* fileInfo = files_[fileIt->second];
836  int pageNum;
837  do {
838  pageNum = fileInfo->getFreePage();
839  if (pageNum != -1) {
840  pages.emplace_back(fileInfo->fileId, pageNum);
841  numPagesNeeded--;
842  }
843  } while (pageNum != -1 && numPagesNeeded > 0);
844  if (numPagesNeeded == 0) {
845  break;
846  }
847  }
848  while (numPagesNeeded > 0) {
849  FileInfo* fileInfo;
850  if (isMetadata) {
851  fileInfo = createFile(pageSize, MAX_FILE_N_METADATA_PAGES);
852  } else {
853  fileInfo = createFile(pageSize, MAX_FILE_N_PAGES);
854  }
855  int pageNum;
856  do {
857  pageNum = fileInfo->getFreePage();
858  if (pageNum != -1) {
859  pages.emplace_back(fileInfo->fileId, pageNum);
860  numPagesNeeded--;
861  }
862  } while (pageNum != -1 && numPagesNeeded > 0);
863  if (numPagesNeeded == 0) {
864  break;
865  }
866  }
867  CHECK(pages.size() == numPagesRequested);
868 }
std::mutex getPageMutex_
pointer to DB level metadata
Definition: FileMgr.h:255
std::vector< FileInfo * > files_
Definition: FileMgr.h:244
#define MAX_FILE_N_METADATA_PAGES
Definition: File.h:27
PageSizeFileMMap fileIndex_
A vector of files accessible via a file identifier.
Definition: FileMgr.h:245
#define MAX_FILE_N_PAGES
Definition: File.h:26
FileInfo * createFile(const size_t pageSize, const size_t numPages)
Adds a file to the file manager repository.
Definition: FileMgr.cpp:889
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the call graph for this function:

void File_Namespace::FileMgr::setEpoch ( int  epoch)
private

Definition at line 966 of file FileMgr.cpp.

References epoch(), epoch_, and writeAndSyncEpochToDisk().

966  {
967  epoch_ = epoch;
969 }
int epoch()
Returns current value of epoch - should be one greater than recorded at last checkpoint.
Definition: FileMgr.h:201
int epoch_
the index of the next file id
Definition: FileMgr.h:249
void writeAndSyncEpochToDisk()
Definition: FileMgr.cpp:533

+ Here is the call graph for this function:

void File_Namespace::FileMgr::writeAndSyncDBMetaToDisk ( )
private

Definition at line 583 of file FileMgr.cpp.

References DBMetaFile_, logger::FATAL, getDBVersion(), LOG, and File_Namespace::write().

583  {
584  int db_ver = getDBVersion();
585  write(DBMetaFile_, 0, sizeof(int), (int8_t*)&db_ver);
586  int status = fflush(DBMetaFile_);
587  if (status != 0) {
588  LOG(FATAL) << "Could not sync DB metadata file to disk";
589  }
590 }
#define LOG(tag)
Definition: Logger.h:188
int getDBVersion() const
Index for looking up chunks.
Definition: FileMgr.cpp:946
size_t write(FILE *f, const size_t offset, const size_t size, int8_t *buf)
Writes the specified number of bytes to the offset position in file f from buf.
Definition: File.cpp:126

+ Here is the call graph for this function:

void File_Namespace::FileMgr::writeAndSyncEpochToDisk ( )
private

Definition at line 533 of file FileMgr.cpp.

References epoch_, epochFile_, logger::FATAL, omnisci::fsync(), LOG, and File_Namespace::write().

Referenced by checkpoint(), createEpochFile(), and setEpoch().

533  {
534  write(epochFile_, 0, sizeof(int), (int8_t*)&epoch_);
535  int status = fflush(epochFile_);
536  // int status = fcntl(fileno(epochFile_),51);
537  if (status != 0) {
538  LOG(FATAL) << "Could not flush epoch file to disk";
539  }
540 #ifdef __APPLE__
541  status = fcntl(fileno(epochFile_), 51);
542 #else
543  status = fsync(fileno(epochFile_));
544 #endif
545  if (status != 0) {
546  LOG(FATAL) << "Could not sync epoch file to disk";
547  }
548  ++epoch_;
549 }
#define LOG(tag)
Definition: Logger.h:188
FILE * epochFile_
the current epoch (time of last checkpoint)
Definition: FileMgr.h:250
int epoch_
the index of the next file id
Definition: FileMgr.h:249
int fsync(int fd)
Definition: omnisci_fs.cpp:60
size_t write(FILE *f, const size_t offset, const size_t size, int8_t *buf)
Writes the specified number of bytes to the offset position in file f from buf.
Definition: File.cpp:126

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Friends And Related Function Documentation

friend class GlobalFileMgr
friend

Definition at line 86 of file FileMgr.h.

Member Data Documentation

mapd_shared_mutex File_Namespace::FileMgr::chunkIndexMutex_
mutableprivate
int File_Namespace::FileMgr::db_version_
private

Definition at line 251 of file FileMgr.h.

Referenced by createTopLevelMetadata(), and openDBMetaFile().

FILE* File_Namespace::FileMgr::DBMetaFile_ = nullptr
private

DB version from dbmeta file, should be compatible with GlobalFileMgr::mapd_db_version_

Definition at line 253 of file FileMgr.h.

Referenced by createDBMetaFile(), openDBMetaFile(), writeAndSyncDBMetaToDisk(), and ~FileMgr().

size_t File_Namespace::FileMgr::defaultPageSize_
private

number of threads used when loading data

Definition at line 247 of file FileMgr.h.

Referenced by copyPage(), createBufferUnlocked(), and putBuffer().

int File_Namespace::FileMgr::epoch_
private

the index of the next file id

Definition at line 249 of file FileMgr.h.

Referenced by checkpoint(), init(), openEpochFile(), openExistingFile(), setEpoch(), and writeAndSyncEpochToDisk().

FILE* File_Namespace::FileMgr::epochFile_ = nullptr
private

the current epoch (time of last checkpoint)

Definition at line 250 of file FileMgr.h.

Referenced by closeRemovePhysical(), createEpochFile(), FileMgr(), openEpochFile(), writeAndSyncEpochToDisk(), and ~FileMgr().

PageSizeFileMMap File_Namespace::FileMgr::fileIndex_
private

A vector of files accessible via a file identifier.

Definition at line 245 of file FileMgr.h.

Referenced by createFile(), openExistingFile(), requestFreePage(), and requestFreePages().

std::string File_Namespace::FileMgr::fileMgrBasePath_
private
std::pair<const int, const int> File_Namespace::FileMgr::fileMgrKey_
private

Global FileMgr.

Definition at line 241 of file FileMgr.h.

Referenced by FileMgr(), and init().

std::vector<FileInfo*> File_Namespace::FileMgr::files_
private

The OS file system path containing files related to this FileMgr

Definition at line 244 of file FileMgr.h.

Referenced by checkpoint(), closeRemovePhysical(), createFile(), FileMgr(), getFileForFileId(), openExistingFile(), requestFreePage(), requestFreePages(), and ~FileMgr().

mapd_shared_mutex File_Namespace::FileMgr::files_rw_mutex_
mutableprivate

Definition at line 257 of file FileMgr.h.

Referenced by checkpoint(), createFile(), and openExistingFile().

std::vector<std::pair<FileInfo*, int> > File_Namespace::FileMgr::free_pages
private

Definition at line 260 of file FileMgr.h.

Referenced by checkpoint(), and free_page().

std::mutex File_Namespace::FileMgr::getPageMutex_
private

pointer to DB level metadata

Definition at line 255 of file FileMgr.h.

Referenced by requestFreePage(), and requestFreePages().

GlobalFileMgr* File_Namespace::FileMgr::gfm_
private

Definition at line 240 of file FileMgr.h.

Referenced by FileMgr(), getDBConvert(), getDBVersion(), and init().

mapd_shared_mutex File_Namespace::FileMgr::mutex_free_page
mutableprivate

Definition at line 259 of file FileMgr.h.

Referenced by checkpoint(), and free_page().

unsigned File_Namespace::FileMgr::nextFileId_
private

Definition at line 248 of file FileMgr.h.

Referenced by createFile(), and init().

size_t File_Namespace::FileMgr::num_reader_threads_
private

Maps page sizes to FileInfo objects.

Definition at line 246 of file FileMgr.h.

Referenced by init().


The documentation for this class was generated from the following files: