OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
foreign_storage::CachingForeignStorageMgr Class Reference

#include <CachingForeignStorageMgr.h>

+ Inheritance diagram for foreign_storage::CachingForeignStorageMgr:
+ Collaboration diagram for foreign_storage::CachingForeignStorageMgr:

Public Member Functions

 CachingForeignStorageMgr (ForeignStorageCache *cache)
 
void fetchBuffer (const ChunkKey &chunk_key, AbstractBuffer *destination_buffer, const size_t num_bytes) override
 
void getChunkMetadataVecForKeyPrefix (ChunkMetadataVector &chunk_metadata, const ChunkKey &chunk_key_prefix) override
 
void getChunkMetadataVecFromDataWrapper (ChunkMetadataVector &chunk_metadata, const ChunkKey &chunk_key_prefix)
 
void removeTableRelatedDS (const int db_id, const int table_id) override
 
void refreshTable (const ChunkKey &table_key, const bool evict_cached_entries) override
 
bool createDataWrapperIfNotExists (const ChunkKey &chunk_key) override
 
bool hasStoredDataWrapper (int32_t db, int32_t tb) const
 
- Public Member Functions inherited from foreign_storage::ForeignStorageMgr
 ForeignStorageMgr ()
 
 ~ForeignStorageMgr () override
 
AbstractBuffercreateBuffer (const ChunkKey &chunk_key, const size_t page_size, const size_t initial_size) override
 
void deleteBuffer (const ChunkKey &chunk_key, const bool purge) override
 
void deleteBuffersWithPrefix (const ChunkKey &chunk_key_prefix, const bool purge) override
 
AbstractBuffergetBuffer (const ChunkKey &chunk_key, const size_t num_bytes) override
 
void fetchBuffer (const ChunkKey &chunk_key, AbstractBuffer *destination_buffer, const size_t num_bytes) override
 
AbstractBufferputBuffer (const ChunkKey &chunk_key, AbstractBuffer *source_buffer, const size_t num_bytes) override
 
void getChunkMetadataVecForKeyPrefix (ChunkMetadataVector &chunk_metadata, const ChunkKey &chunk_key_prefix) override
 
bool isBufferOnDevice (const ChunkKey &chunk_key) override
 
std::string printSlabs () override
 
size_t getMaxSize () override
 
size_t getInUseSize () override
 
size_t getAllocated () override
 
bool isAllocationCapped () override
 
void checkpoint () override
 
void checkpoint (const int db_id, const int tb_id) override
 
AbstractBufferalloc (const size_t num_bytes) override
 
void free (AbstractBuffer *buffer) override
 
MgrType getMgrType () override
 
std::string getStringMgrType () override
 
size_t getNumChunks () override
 
void removeTableRelatedDS (const int db_id, const int table_id) override
 
bool hasDataWrapperForChunk (const ChunkKey &chunk_key) const
 
bool isDatawrapperRestored (const ChunkKey &chunk_key)
 
void setDataWrapper (const ChunkKey &table_key, std::shared_ptr< MockForeignDataWrapper > data_wrapper)
 
std::shared_ptr
< ForeignDataWrapper
getDataWrapper (const ChunkKey &chunk_key) const
 
void setParallelismHints (const std::map< ChunkKey, std::set< ParallelismHint >> &hints_per_table)
 

Private Member Functions

void refreshTableInCache (const ChunkKey &table_key)
 
int getHighestCachedFragId (const ChunkKey &table_key)
 
void refreshAppendTableInCache (const ChunkKey &table_key, const std::vector< ChunkKey > &old_chunk_keys)
 
void refreshNonAppendTableInCache (const ChunkKey &table_key, const std::vector< ChunkKey > &old_chunk_keys)
 
void refreshChunksInCacheByFragment (const std::vector< ChunkKey > &old_chunk_keys, int last_frag_id)
 
void populateChunkBuffersSafely (ForeignDataWrapper &data_wrapper, ChunkToBufferMap &required_buffers, ChunkToBufferMap &optional_buffers)
 
void eraseDataWrapper (const ChunkKey &key) override
 
void clearTable (const ChunkKey &table_key)
 
size_t maxFetchSize (int32_t db_id) const override
 
bool hasMaxFetchSize () const override
 
std::set< ChunkKeygetOptionalKeysWithinSizeLimit (const ChunkKey &chunk_key, const std::set< ChunkKey, decltype(set_comp)* > &same_fragment_keys, const std::set< ChunkKey, decltype(set_comp)* > &diff_fragment_keys) const override
 
bool isChunkCached (const ChunkKey &chunk_key) const override
 
void evictChunkFromCache (const ChunkKey &chunk_key) override
 
size_t getBufferSize (const ChunkKey &key) const
 
size_t getRequiredBuffersSize (const ChunkKey &chunk_key) const
 

Private Attributes

ForeignStorageCachedisk_cache_
 

Additional Inherited Members

- Public Types inherited from foreign_storage::ForeignStorageMgr
using ParallelismHint = std::pair< int, int >
 
- Protected Member Functions inherited from foreign_storage::ForeignStorageMgr
void updateFragmenterMetadata (const ChunkToBufferMap &) const
 
void createDataWrapperUnlocked (int32_t db, int32_t tb)
 
bool fetchBufferIfTempBufferMapEntryExists (const ChunkKey &chunk_key, AbstractBuffer *destination_buffer, const size_t num_bytes)
 
ChunkToBufferMap allocateTempBuffersForChunks (const std::set< ChunkKey > &chunk_keys)
 
void clearTempChunkBufferMapEntriesForTable (const ChunkKey &table_key)
 
void clearTempChunkBufferMapEntriesForTableUnlocked (const ChunkKey &table_key)
 
std::set< ChunkKeygetOptionalChunkKeySetAndNormalizeCache (const ChunkKey &chunk_key, const std::set< ChunkKey > &required_chunk_keys, const ForeignDataWrapper::ParallelismLevel parallelism_level)
 
std::pair< std::set< ChunkKey,
decltype(set_comp)* >
, std::set< ChunkKey, decltype(set_comp)* > > 
getPrefetchSets (const ChunkKey &chunk_key, const std::set< ChunkKey > &required_chunk_keys, const ForeignDataWrapper::ParallelismLevel parallelism_level) const
 
- Static Protected Member Functions inherited from foreign_storage::ForeignStorageMgr
static void checkIfS3NeedsToBeEnabled (const ChunkKey &chunk_key)
 
- Protected Attributes inherited from foreign_storage::ForeignStorageMgr
std::shared_mutex data_wrapper_mutex_
 
std::map< ChunkKey,
std::shared_ptr
< ForeignDataWrapper > > 
data_wrapper_map_
 
std::map< ChunkKey,
std::shared_ptr
< MockForeignDataWrapper > > 
mocked_wrapper_map_
 
std::map< ChunkKey,
std::unique_ptr
< AbstractBuffer > > 
temp_chunk_buffer_map_
 
std::shared_mutex temp_chunk_buffer_map_mutex_
 
std::shared_mutex parallelism_hints_mutex_
 
std::map< ChunkKey, std::set
< ParallelismHint > > 
parallelism_hints_per_table_
 

Detailed Description

Definition at line 27 of file CachingForeignStorageMgr.h.

Constructor & Destructor Documentation

foreign_storage::CachingForeignStorageMgr::CachingForeignStorageMgr ( ForeignStorageCache cache)

Definition at line 38 of file CachingForeignStorageMgr.cpp.

References CHECK, and disk_cache_.

39  : ForeignStorageMgr(), disk_cache_(cache) {
41 }
#define CHECK(condition)
Definition: Logger.h:291

Member Function Documentation

void foreign_storage::CachingForeignStorageMgr::clearTable ( const ChunkKey table_key)
private

Definition at line 265 of file CachingForeignStorageMgr.cpp.

References CHECK, foreign_storage::ForeignStorageCache::clearForTablePrefix(), disk_cache_, foreign_storage::ForeignStorageMgr::eraseDataWrapper(), and foreign_storage::ForeignStorageCache::hasCachedMetadataForKeyPrefix().

Referenced by getChunkMetadataVecForKeyPrefix(), getChunkMetadataVecFromDataWrapper(), refreshNonAppendTableInCache(), and refreshTable().

265  {
266  disk_cache_->clearForTablePrefix(table_key);
269 }
virtual void eraseDataWrapper(const ChunkKey &table_key)
#define CHECK(condition)
Definition: Logger.h:291
bool hasCachedMetadataForKeyPrefix(const ChunkKey &) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool foreign_storage::CachingForeignStorageMgr::createDataWrapperIfNotExists ( const ChunkKey chunk_key)
overridevirtual

Reimplemented from foreign_storage::ForeignStorageMgr.

Definition at line 379 of file CachingForeignStorageMgr.cpp.

References foreign_storage::ForeignStorageMgr::createDataWrapperUnlocked(), foreign_storage::ForeignStorageMgr::data_wrapper_map_, foreign_storage::ForeignStorageMgr::data_wrapper_mutex_, disk_cache_, shared::get_from_map(), get_table_key(), get_table_prefix(), foreign_storage::ForeignStorageCache::getCachedMetadataVecForKeyPrefix(), and foreign_storage::ForeignStorageCache::getSerializedWrapperPath().

Referenced by getChunkMetadataVecForKeyPrefix().

379  {
380  std::lock_guard data_wrapper_lock(data_wrapper_mutex_);
381  ChunkKey table_key = get_table_key(chunk_key);
382  auto data_wrapper_it = data_wrapper_map_.find(table_key);
383  if (data_wrapper_it != data_wrapper_map_.end()) {
384  return false;
385  }
386  auto [db, tb] = get_table_prefix(chunk_key);
388  auto wrapper_file = disk_cache_->getSerializedWrapperPath(db, tb);
389  if (boost::filesystem::exists(wrapper_file)) {
390  ChunkMetadataVector chunk_metadata;
391  disk_cache_->getCachedMetadataVecForKeyPrefix(chunk_metadata, table_key);
392  try {
393  auto data_wrapper = shared::get_from_map(data_wrapper_map_, table_key);
394  data_wrapper->restoreDataWrapperInternals(
395  disk_cache_->getSerializedWrapperPath(db, tb), chunk_metadata);
396  } catch (std::exception& e) {
397  throw RestoreDataWrapperException(e.what());
398  }
399  }
400  return true;
401 }
std::lock_guard< T > lock_guard
std::vector< int > ChunkKey
Definition: types.h:36
ChunkKey get_table_key(const ChunkKey &key)
Definition: types.h:57
void getCachedMetadataVecForKeyPrefix(ChunkMetadataVector &, const ChunkKey &) const
std::vector< std::pair< ChunkKey, std::shared_ptr< ChunkMetadata >>> ChunkMetadataVector
std::string getSerializedWrapperPath(int32_t db_id, int32_t tb_id) const
std::map< ChunkKey, std::shared_ptr< ForeignDataWrapper > > data_wrapper_map_
V & get_from_map(std::map< K, V, comp > &map, const K &key)
Definition: misc.h:61
std::pair< int, int > get_table_prefix(const ChunkKey &key)
Definition: types.h:62
void createDataWrapperUnlocked(int32_t db, int32_t tb)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::CachingForeignStorageMgr::eraseDataWrapper ( const ChunkKey key)
overrideprivatevirtual

Reimplemented from foreign_storage::ForeignStorageMgr.

Definition at line 253 of file CachingForeignStorageMgr.cpp.

References CHECK, foreign_storage::ForeignStorageMgr::data_wrapper_map_, foreign_storage::ForeignStorageMgr::data_wrapper_mutex_, disk_cache_, get_table_prefix(), foreign_storage::ForeignStorageCache::getSerializedWrapperPath(), and is_table_key().

Referenced by getChunkMetadataVecForKeyPrefix(), and removeTableRelatedDS().

253  {
254  CHECK(is_table_key(key));
255  std::lock_guard data_wrapper_lock(data_wrapper_mutex_);
256  auto [db, tb] = get_table_prefix(key);
257  // Need to erase serialized version on disk if it exists so we don't accidentally
258  // recover it after deleting. It is possible for a cached wrapper file to exist without
259  // a wrapper (in multi-instance-mode for instance, so we make sure to remove the file
260  // regardless).
261  boost::filesystem::remove_all(disk_cache_->getSerializedWrapperPath(db, tb));
262  data_wrapper_map_.erase(key);
263 }
std::lock_guard< T > lock_guard
bool is_table_key(const ChunkKey &key)
Definition: types.h:44
std::string getSerializedWrapperPath(int32_t db_id, int32_t tb_id) const
std::map< ChunkKey, std::shared_ptr< ForeignDataWrapper > > data_wrapper_map_
std::pair< int, int > get_table_prefix(const ChunkKey &key)
Definition: types.h:62
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::CachingForeignStorageMgr::evictChunkFromCache ( const ChunkKey chunk_key)
overrideprivatevirtual

Reimplemented from foreign_storage::ForeignStorageMgr.

Definition at line 478 of file CachingForeignStorageMgr.cpp.

References disk_cache_, and foreign_storage::ForeignStorageCache::eraseChunk().

478  {
479  disk_cache_->eraseChunk(chunk_key);
480 }
void eraseChunk(const ChunkKey &chunk_key)

+ Here is the call graph for this function:

void foreign_storage::CachingForeignStorageMgr::fetchBuffer ( const ChunkKey chunk_key,
AbstractBuffer destination_buffer,
const size_t  num_bytes 
)
override

Definition at line 83 of file CachingForeignStorageMgr.cpp.

References CHECK, CHUNK_KEY_DB_IDX, Data_Namespace::AbstractBuffer::copyTo(), disk_cache_, foreign_storage::ForeignStorageCache::eraseChunk(), foreign_storage::ForeignStorageMgr::fetchBuffer(), foreign_storage::get_column_key_set(), foreign_storage::ForeignStorageCache::getCachedChunkIfExists(), foreign_storage::ForeignStorageCache::getChunkBuffersForCaching(), foreign_storage::ForeignStorageMgr::getDataWrapper(), foreign_storage::ForeignStorageMgr::getOptionalChunkKeySetAndNormalizeCache(), getRequiredBuffersSize(), foreign_storage::anonymous_namespace{CachingForeignStorageMgr.cpp}::is_in_memory_system_table_chunk_key(), Data_Namespace::AbstractBuffer::isDirty(), maxFetchSize(), populateChunkBuffersSafely(), and foreign_storage::ChunkSizeValidator::validateChunkSize().

85  {
86  ChunkSizeValidator chunk_size_validator(chunk_key);
87  if (is_in_memory_system_table_chunk_key(chunk_key)) {
88  ForeignStorageMgr::fetchBuffer(chunk_key, destination_buffer, num_bytes);
89  chunk_size_validator.validateChunkSize(destination_buffer);
90  return;
91  }
92  CHECK(destination_buffer);
93  CHECK(!destination_buffer->isDirty());
94 
96  if (buffer) {
97  chunk_size_validator.validateChunkSize(buffer);
98  buffer->copyTo(destination_buffer, num_bytes);
99  return;
100  } else {
101  auto required_size = getRequiredBuffersSize(chunk_key);
102  if (required_size > maxFetchSize(chunk_key[CHUNK_KEY_DB_IDX])) {
103  // If we don't have space in the cache then skip the caching.
104  ForeignStorageMgr::fetchBuffer(chunk_key, destination_buffer, num_bytes);
105  return;
106  }
107 
108  auto column_keys = get_column_key_set(chunk_key);
109  // Avoid caching only a subset of the column key set.
110  for (const auto& key : column_keys) {
111  disk_cache_->eraseChunk(key);
112  }
113 
114  // Use hints to prefetch other chunks in fragment into cache
115  auto& data_wrapper = *getDataWrapper(chunk_key);
116  auto optional_set = getOptionalChunkKeySetAndNormalizeCache(
117  chunk_key, column_keys, data_wrapper.getCachedParallelismLevel());
118 
119  auto optional_buffers = disk_cache_->getChunkBuffersForCaching(optional_set);
120  auto required_buffers = disk_cache_->getChunkBuffersForCaching(column_keys);
121  CHECK(required_buffers.find(chunk_key) != required_buffers.end());
122  populateChunkBuffersSafely(data_wrapper, required_buffers, optional_buffers);
123 
124  AbstractBuffer* buffer = required_buffers.at(chunk_key);
125  CHECK(buffer);
126  buffer->copyTo(destination_buffer, num_bytes);
127  }
128 }
std::set< ChunkKey > getOptionalChunkKeySetAndNormalizeCache(const ChunkKey &chunk_key, const std::set< ChunkKey > &required_chunk_keys, const ForeignDataWrapper::ParallelismLevel parallelism_level)
std::shared_ptr< ForeignDataWrapper > getDataWrapper(const ChunkKey &chunk_key) const
void eraseChunk(const ChunkKey &chunk_key)
#define CHUNK_KEY_DB_IDX
Definition: types.h:38
size_t maxFetchSize(int32_t db_id) const override
std::set< ChunkKey > get_column_key_set(const ChunkKey &destination_chunk_key)
void fetchBuffer(const ChunkKey &chunk_key, AbstractBuffer *destination_buffer, const size_t num_bytes) override
An AbstractBuffer is a unit of data management for a data manager.
void populateChunkBuffersSafely(ForeignDataWrapper &data_wrapper, ChunkToBufferMap &required_buffers, ChunkToBufferMap &optional_buffers)
ChunkToBufferMap getChunkBuffersForCaching(const std::set< ChunkKey > &chunk_keys) const
void copyTo(AbstractBuffer *destination_buffer, const size_t num_bytes=0)
#define CHECK(condition)
Definition: Logger.h:291
File_Namespace::FileBuffer * getCachedChunkIfExists(const ChunkKey &)
size_t getRequiredBuffersSize(const ChunkKey &chunk_key) const

+ Here is the call graph for this function:

size_t foreign_storage::CachingForeignStorageMgr::getBufferSize ( const ChunkKey key) const
private

Definition at line 453 of file CachingForeignStorageMgr.cpp.

References CHECK_EQ, disk_cache_, get_fragment_key(), foreign_storage::get_max_chunk_size(), foreign_storage::ForeignStorageCache::getCachedMetadataVecForKeyPrefix(), is_varlen_data_key(), is_varlen_key(), and show_chunk().

Referenced by getOptionalKeysWithinSizeLimit(), and getRequiredBuffersSize().

453  {
454  size_t num_bytes = 0;
455  ChunkMetadataVector meta;
457  CHECK_EQ(meta.size(), 1U) << show_chunk(key);
458  auto metadata = meta.begin()->second;
459 
460  if (is_varlen_key(key)) {
461  if (is_varlen_data_key(key)) {
462  num_bytes = get_max_chunk_size(key);
463  } else {
464  num_bytes = (metadata->sqlType.is_string())
465  ? sizeof(StringOffsetT) * (metadata->numElements + 1)
466  : sizeof(ArrayOffsetT) * (metadata->numElements + 1);
467  }
468  } else {
469  num_bytes = metadata->numBytes;
470  }
471  return num_bytes;
472 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
bool is_varlen_data_key(const ChunkKey &key)
Definition: types.h:75
std::string show_chunk(const ChunkKey &key)
Definition: types.h:98
int32_t StringOffsetT
Definition: sqltypes.h:1493
void getCachedMetadataVecForKeyPrefix(ChunkMetadataVector &, const ChunkKey &) const
std::vector< std::pair< ChunkKey, std::shared_ptr< ChunkMetadata >>> ChunkMetadataVector
size_t get_max_chunk_size(const ChunkKey &key)
int32_t ArrayOffsetT
Definition: sqltypes.h:1494
ChunkKey get_fragment_key(const ChunkKey &key)
Definition: types.h:90
bool is_varlen_key(const ChunkKey &key)
Definition: types.h:71

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::CachingForeignStorageMgr::getChunkMetadataVecForKeyPrefix ( ChunkMetadataVector chunk_metadata,
const ChunkKey chunk_key_prefix 
)
override

Definition at line 138 of file CachingForeignStorageMgr.cpp.

References foreign_storage::ForeignStorageCache::cacheMetadataVec(), CHECK, CHUNK_KEY_DB_IDX, CHUNK_KEY_TABLE_IDX, clearTable(), createDataWrapperIfNotExists(), disk_cache_, eraseDataWrapper(), logger::ERROR, foreign_storage::fragment_maps_to_leaf(), get_table_key(), get_table_prefix(), foreign_storage::ForeignStorageCache::getCachedMetadataVecForKeyPrefix(), foreign_storage::ForeignStorageMgr::getChunkMetadataVecForKeyPrefix(), getChunkMetadataVecFromDataWrapper(), has_table_prefix(), foreign_storage::ForeignStorageCache::hasCachedMetadataForKeyPrefix(), foreign_storage::ForeignStorageCache::hasStoredDataWrapperMetadata(), dist::is_distributed(), foreign_storage::anonymous_namespace{CachingForeignStorageMgr.cpp}::is_in_memory_system_table_chunk_key(), foreign_storage::is_shardable_key(), LOG, and show_chunk().

140  {
141  if (is_in_memory_system_table_chunk_key(key_prefix)) {
142  ForeignStorageMgr::getChunkMetadataVecForKeyPrefix(chunk_metadata, key_prefix);
143  return;
144  }
145  CHECK(has_table_prefix(key_prefix));
146  // If the disk has any cached metadata for a prefix then it is guaranteed to have all
147  // metadata for that table, so we can return a complete set. If it has no metadata,
148  // then it may be that the table has no data, or that it's just not cached, so we need
149  // to go to storage to check.
150  if (disk_cache_->hasCachedMetadataForKeyPrefix(key_prefix)) {
151  disk_cache_->getCachedMetadataVecForKeyPrefix(chunk_metadata, key_prefix);
152 
153  // Assert all metadata in cache is mapped to this leaf node in distributed.
154  if (is_shardable_key(key_prefix)) {
155  for (auto& [key, meta] : chunk_metadata) {
156  CHECK(fragment_maps_to_leaf(key)) << show_chunk(key);
157  }
158  }
159 
160  try {
161  // If the data in cache was restored from disk then it is possible that the wrapper
162  // does not exist yet. In this case the wrapper will be restored from disk if
163  // possible.
164  createDataWrapperIfNotExists(key_prefix);
165  return;
166  } catch (RestoreDataWrapperException& e) {
167  auto [db_id, table_id] = get_table_prefix(key_prefix);
168  LOG(ERROR) << "An error occurred while attempting to restore data wrapper using "
169  "disk cached metadata. Clearing cached data for table and proceeding "
170  "with a new data wrapper instance. Database ID: "
171  << db_id << ", table ID: " << table_id << ", error: " << e.what();
172  chunk_metadata.clear();
173  clearTable({db_id, table_id});
174  }
175  } else if (dist::is_distributed() &&
177  key_prefix[CHUNK_KEY_TABLE_IDX])) {
178  // In distributed mode, it is possible to have all the chunk metadata filtered out for
179  // this node, after previously getting the chunk metadata from the wrapper and caching
180  // the wrapper metadata. In this case, return immediately and avoid doing a redundant
181  // metadata scan.
182  return;
183  }
184 
185  // If we have no cached data then either the data was evicted, was never populated, or
186  // the data for the table is an empty set (no chunks). In case we are hitting the first
187  // two, we should repopulate the data wrapper so just do it in all cases.
188  auto table_key = get_table_key(key_prefix);
189  eraseDataWrapper(table_key);
190  createDataWrapperIfNotExists(table_key);
191 
192  getChunkMetadataVecFromDataWrapper(chunk_metadata, key_prefix);
193  disk_cache_->cacheMetadataVec(chunk_metadata);
194 }
#define LOG(tag)
Definition: Logger.h:285
#define CHUNK_KEY_DB_IDX
Definition: types.h:38
ChunkKey get_table_key(const ChunkKey &key)
Definition: types.h:57
std::string show_chunk(const ChunkKey &key)
Definition: types.h:98
void eraseDataWrapper(const ChunkKey &key) override
void getChunkMetadataVecFromDataWrapper(ChunkMetadataVector &chunk_metadata, const ChunkKey &chunk_key_prefix)
void getCachedMetadataVecForKeyPrefix(ChunkMetadataVector &, const ChunkKey &) const
bool hasStoredDataWrapperMetadata(int32_t db_id, int32_t table_id) const
bool createDataWrapperIfNotExists(const ChunkKey &chunk_key) override
#define CHUNK_KEY_TABLE_IDX
Definition: types.h:39
bool has_table_prefix(const ChunkKey &key)
Definition: types.h:48
bool fragment_maps_to_leaf(const ChunkKey &key)
bool is_shardable_key(const ChunkKey &key)
void cacheMetadataVec(const ChunkMetadataVector &)
void getChunkMetadataVecForKeyPrefix(ChunkMetadataVector &chunk_metadata, const ChunkKey &chunk_key_prefix) override
std::pair< int, int > get_table_prefix(const ChunkKey &key)
Definition: types.h:62
#define CHECK(condition)
Definition: Logger.h:291
bool hasCachedMetadataForKeyPrefix(const ChunkKey &) const
bool is_distributed()
Definition: distributed.cpp:21

+ Here is the call graph for this function:

void foreign_storage::CachingForeignStorageMgr::getChunkMetadataVecFromDataWrapper ( ChunkMetadataVector chunk_metadata,
const ChunkKey chunk_key_prefix 
)

Definition at line 196 of file CachingForeignStorageMgr.cpp.

References CHECK, foreign_storage::ForeignStorageCache::checkpoint(), clearTable(), disk_cache_, get_table_prefix(), foreign_storage::ForeignStorageMgr::getChunkMetadataVecForKeyPrefix(), foreign_storage::ForeignStorageMgr::getDataWrapper(), has_table_prefix(), foreign_storage::is_table_enabled_on_node(), and foreign_storage::ForeignStorageCache::storeDataWrapper().

Referenced by getChunkMetadataVecForKeyPrefix(), refreshAppendTableInCache(), and refreshNonAppendTableInCache().

198  {
199  CHECK(has_table_prefix(chunk_key_prefix));
200  auto [db_id, tb_id] = get_table_prefix(chunk_key_prefix);
201  try {
202  ForeignStorageMgr::getChunkMetadataVecForKeyPrefix(chunk_metadata, chunk_key_prefix);
203  } catch (...) {
204  clearTable({db_id, tb_id});
205  throw;
206  }
207  // If the table was disabled then we will have no wrapper to serialize.
208  if (is_table_enabled_on_node(chunk_key_prefix)) {
209  auto doc = getDataWrapper(chunk_key_prefix)->getSerializedDataWrapper();
210  disk_cache_->storeDataWrapper(doc, db_id, tb_id);
211 
212  // If the wrapper populated buffers we want that action to be checkpointed.
213  disk_cache_->checkpoint(db_id, tb_id);
214  }
215 }
std::shared_ptr< ForeignDataWrapper > getDataWrapper(const ChunkKey &chunk_key) const
void storeDataWrapper(const std::string &doc, int32_t db_id, int32_t tb_id)
bool is_table_enabled_on_node(const ChunkKey &key)
bool has_table_prefix(const ChunkKey &key)
Definition: types.h:48
void getChunkMetadataVecForKeyPrefix(ChunkMetadataVector &chunk_metadata, const ChunkKey &chunk_key_prefix) override
std::pair< int, int > get_table_prefix(const ChunkKey &key)
Definition: types.h:62
void checkpoint(const int32_t db_id, const int32_t tb_id)
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int foreign_storage::CachingForeignStorageMgr::getHighestCachedFragId ( const ChunkKey table_key)
private

Definition at line 271 of file CachingForeignStorageMgr.cpp.

References CHUNK_KEY_FRAGMENT_IDX, disk_cache_, foreign_storage::ForeignStorageCache::getCachedMetadataVecForKeyPrefix(), and foreign_storage::ForeignStorageCache::hasCachedMetadataForKeyPrefix().

Referenced by refreshAppendTableInCache().

271  {
272  // Determine last fragment ID
273  int last_frag_id = 0;
274  if (disk_cache_->hasCachedMetadataForKeyPrefix(table_key)) {
275  ChunkMetadataVector cached_metadata;
276  disk_cache_->getCachedMetadataVecForKeyPrefix(cached_metadata, table_key);
277  for (const auto& [key, metadata] : cached_metadata) {
278  last_frag_id = std::max(last_frag_id, key[CHUNK_KEY_FRAGMENT_IDX]);
279  }
280  }
281  return last_frag_id;
282 }
#define CHUNK_KEY_FRAGMENT_IDX
Definition: types.h:41
void getCachedMetadataVecForKeyPrefix(ChunkMetadataVector &, const ChunkKey &) const
std::vector< std::pair< ChunkKey, std::shared_ptr< ChunkMetadata >>> ChunkMetadataVector
bool hasCachedMetadataForKeyPrefix(const ChunkKey &) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::set< ChunkKey > foreign_storage::CachingForeignStorageMgr::getOptionalKeysWithinSizeLimit ( const ChunkKey chunk_key,
const std::set< ChunkKey, decltype(set_comp)* > &  same_fragment_keys,
const std::set< ChunkKey, decltype(set_comp)* > &  diff_fragment_keys 
) const
overrideprivatevirtual

Reimplemented from foreign_storage::ForeignStorageMgr.

Definition at line 426 of file CachingForeignStorageMgr.cpp.

References CHUNK_KEY_DB_IDX, foreign_storage::get_column_key_set(), getBufferSize(), getRequiredBuffersSize(), and maxFetchSize().

429  {
430  std::set<ChunkKey> optional_keys;
431  auto total_chunk_size = getRequiredBuffersSize(chunk_key);
432  auto max_size = maxFetchSize(chunk_key[CHUNK_KEY_DB_IDX]);
433  // Add keys to the list of optional keys starting with the same fragment. If we run out
434  // of space, then exit early with what we have added so far.
435  for (const auto& keys : {same_fragment_keys, diff_fragment_keys}) {
436  for (const auto& key : keys) {
437  auto column_keys = get_column_key_set(key);
438  for (const auto& column_key : column_keys) {
439  total_chunk_size += getBufferSize(column_key);
440  }
441  // Early exist if we exceed the size limit.
442  if (total_chunk_size > max_size) {
443  return optional_keys;
444  }
445  for (const auto& column_key : column_keys) {
446  optional_keys.emplace(column_key);
447  }
448  }
449  }
450  return optional_keys;
451 }
size_t getBufferSize(const ChunkKey &key) const
#define CHUNK_KEY_DB_IDX
Definition: types.h:38
size_t maxFetchSize(int32_t db_id) const override
std::set< ChunkKey > get_column_key_set(const ChunkKey &destination_chunk_key)
size_t getRequiredBuffersSize(const ChunkKey &chunk_key) const

+ Here is the call graph for this function:

size_t foreign_storage::CachingForeignStorageMgr::getRequiredBuffersSize ( const ChunkKey chunk_key) const
private

Definition at line 417 of file CachingForeignStorageMgr.cpp.

References foreign_storage::get_column_key_set(), and getBufferSize().

Referenced by fetchBuffer(), and getOptionalKeysWithinSizeLimit().

417  {
418  auto key_set = get_column_key_set(chunk_key);
419  size_t total_size = 0U;
420  for (const auto& key : key_set) {
421  total_size += getBufferSize(key);
422  }
423  return total_size;
424 }
size_t getBufferSize(const ChunkKey &key) const
std::set< ChunkKey > get_column_key_set(const ChunkKey &destination_chunk_key)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool foreign_storage::CachingForeignStorageMgr::hasMaxFetchSize ( ) const
overrideprivatevirtual

Reimplemented from foreign_storage::ForeignStorageMgr.

Definition at line 413 of file CachingForeignStorageMgr.cpp.

413  {
414  return true;
415 }
bool foreign_storage::CachingForeignStorageMgr::hasStoredDataWrapper ( int32_t  db,
int32_t  tb 
) const

Definition at line 229 of file CachingForeignStorageMgr.cpp.

References disk_cache_, and foreign_storage::ForeignStorageCache::hasStoredDataWrapperMetadata().

229  {
231 }
bool hasStoredDataWrapperMetadata(int32_t db_id, int32_t table_id) const

+ Here is the call graph for this function:

bool foreign_storage::CachingForeignStorageMgr::isChunkCached ( const ChunkKey chunk_key) const
overrideprivatevirtual

Reimplemented from foreign_storage::ForeignStorageMgr.

Definition at line 474 of file CachingForeignStorageMgr.cpp.

References disk_cache_, and foreign_storage::ForeignStorageCache::getCachedChunkIfExists().

474  {
475  return disk_cache_->getCachedChunkIfExists(chunk_key) != nullptr;
476 }
File_Namespace::FileBuffer * getCachedChunkIfExists(const ChunkKey &)

+ Here is the call graph for this function:

size_t foreign_storage::CachingForeignStorageMgr::maxFetchSize ( int32_t  db_id) const
overrideprivatevirtual

Reimplemented from foreign_storage::ForeignStorageMgr.

Definition at line 409 of file CachingForeignStorageMgr.cpp.

References disk_cache_, and foreign_storage::ForeignStorageCache::getMaxChunkDataSize().

Referenced by fetchBuffer(), and getOptionalKeysWithinSizeLimit().

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::CachingForeignStorageMgr::populateChunkBuffersSafely ( ForeignDataWrapper data_wrapper,
ChunkToBufferMap required_buffers,
ChunkToBufferMap optional_buffers 
)
private

Definition at line 43 of file CachingForeignStorageMgr.cpp.

References CHECK_GT, foreign_storage::ForeignStorageCache::checkpoint(), disk_cache_, get_table_prefix(), foreign_storage::ForeignDataWrapper::populateChunkBuffers(), and foreign_storage::ForeignStorageMgr::updateFragmenterMetadata().

Referenced by fetchBuffer(), and refreshChunksInCacheByFragment().

46  {
47  CHECK_GT(required_buffers.size(), 0U) << "Must populate at least one buffer";
48  try {
49  ChunkSizeValidator chunk_size_validator(required_buffers.begin()->first);
50  data_wrapper.populateChunkBuffers(required_buffers, optional_buffers);
51  chunk_size_validator.validateChunkSizes(required_buffers);
52  chunk_size_validator.validateChunkSizes(optional_buffers);
53  updateFragmenterMetadata(required_buffers);
54  updateFragmenterMetadata(optional_buffers);
55  } catch (const std::runtime_error& error) {
56  // clear any partially loaded but failed chunks (there may be some
57  // fully-loaded chunks as well but they will be cleared conservatively
58  // anyways)
59  for (const auto& [chunk_key, buffer] : required_buffers) {
60  if (auto file_buffer = dynamic_cast<File_Namespace::FileBuffer*>(buffer)) {
61  file_buffer->freeChunkPages();
62  }
63  }
64  for (const auto& [chunk_key, buffer] : optional_buffers) {
65  if (auto file_buffer = dynamic_cast<File_Namespace::FileBuffer*>(buffer)) {
66  file_buffer->freeChunkPages();
67  }
68  }
69 
70  throw ForeignStorageException(error.what());
71  }
72  // All required buffers should be from the same table.
73  auto [db, tb] = get_table_prefix(required_buffers.begin()->first);
74  disk_cache_->checkpoint(db, tb);
75 }
#define CHECK_GT(x, y)
Definition: Logger.h:305
void updateFragmenterMetadata(const ChunkToBufferMap &) const
std::pair< int, int > get_table_prefix(const ChunkKey &key)
Definition: types.h:62
void checkpoint(const int32_t db_id, const int32_t tb_id)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::CachingForeignStorageMgr::refreshAppendTableInCache ( const ChunkKey table_key,
const std::vector< ChunkKey > &  old_chunk_keys 
)
private

Definition at line 284 of file CachingForeignStorageMgr.cpp.

References foreign_storage::ForeignStorageCache::cacheMetadataVec(), CHECK, disk_cache_, getChunkMetadataVecFromDataWrapper(), getHighestCachedFragId(), is_table_key(), and refreshChunksInCacheByFragment().

Referenced by refreshTableInCache().

286  {
287  CHECK(is_table_key(table_key));
288  int last_frag_id = getHighestCachedFragId(table_key);
289 
290  ChunkMetadataVector storage_metadata;
291  getChunkMetadataVecFromDataWrapper(storage_metadata, table_key);
292  try {
293  disk_cache_->cacheMetadataVec(storage_metadata);
294  refreshChunksInCacheByFragment(old_chunk_keys, last_frag_id);
295  } catch (std::runtime_error& e) {
297  }
298 }
bool is_table_key(const ChunkKey &key)
Definition: types.h:44
void getChunkMetadataVecFromDataWrapper(ChunkMetadataVector &chunk_metadata, const ChunkKey &chunk_key_prefix)
void refreshChunksInCacheByFragment(const std::vector< ChunkKey > &old_chunk_keys, int last_frag_id)
std::vector< std::pair< ChunkKey, std::shared_ptr< ChunkMetadata >>> ChunkMetadataVector
void cacheMetadataVec(const ChunkMetadataVector &)
#define CHECK(condition)
Definition: Logger.h:291
int getHighestCachedFragId(const ChunkKey &table_key)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::CachingForeignStorageMgr::refreshChunksInCacheByFragment ( const std::vector< ChunkKey > &  old_chunk_keys,
int  last_frag_id 
)
private

Definition at line 316 of file CachingForeignStorageMgr.cpp.

References CHECK, CHUNK_KEY_FRAGMENT_IDX, CHUNK_KEY_TABLE_IDX, disk_cache_, foreign_storage::get_column_key_set(), get_table_key(), foreign_storage::ForeignStorageCache::getCachedChunkIfExists(), foreign_storage::ForeignStorageCache::getChunkBuffersForCaching(), foreign_storage::ForeignStorageMgr::getDataWrapper(), foreign_storage::ForeignStorageCache::isMetadataCached(), LOG, foreign_storage::anonymous_namespace{CachingForeignStorageMgr.cpp}::MAX_REFRESH_TIME_IN_SECONDS, populateChunkBuffersSafely(), and logger::WARNING.

Referenced by refreshAppendTableInCache(), and refreshNonAppendTableInCache().

318  {
319  int64_t total_time{0};
320  auto fragment_refresh_start_time = std::chrono::high_resolution_clock::now();
321 
322  if (old_chunk_keys.empty()) {
323  return;
324  }
325 
326  // Iterate through previously cached chunks and re-cache them. Caching is
327  // done one fragment at a time, for all applicable chunks in the fragment.
328  ChunkToBufferMap optional_buffers;
329  std::set<ChunkKey> chunk_keys_to_be_cached;
330  auto fragment_id = old_chunk_keys[0][CHUNK_KEY_FRAGMENT_IDX];
331  const ChunkKey table_key{get_table_key(old_chunk_keys[0])};
332  std::set<ChunkKey> chunk_keys_in_fragment;
333  for (const auto& chunk_key : old_chunk_keys) {
334  CHECK(chunk_key[CHUNK_KEY_TABLE_IDX] == table_key[CHUNK_KEY_TABLE_IDX]);
335  if (chunk_key[CHUNK_KEY_FRAGMENT_IDX] < start_frag_id) {
336  continue;
337  }
338  if (disk_cache_->isMetadataCached(chunk_key)) {
339  if (chunk_key[CHUNK_KEY_FRAGMENT_IDX] != fragment_id) {
340  if (chunk_keys_in_fragment.size() > 0) {
341  auto required_buffers =
342  disk_cache_->getChunkBuffersForCaching(chunk_keys_in_fragment);
344  *getDataWrapper(table_key), required_buffers, optional_buffers);
345  chunk_keys_in_fragment.clear();
346  }
347  // At this point, cache buffers for refreshable chunks in the last fragment
348  // have been populated. Exit if the max refresh time has been exceeded.
349  // Otherwise, move to the next fragment.
350  auto current_time = std::chrono::high_resolution_clock::now();
351  total_time += std::chrono::duration_cast<std::chrono::seconds>(
352  current_time - fragment_refresh_start_time)
353  .count();
354  if (total_time >= MAX_REFRESH_TIME_IN_SECONDS) {
355  LOG(WARNING) << "Refresh time exceeded for table key: { " << table_key[0]
356  << ", " << table_key[1] << " } after fragment id: " << fragment_id;
357  break;
358  } else {
359  fragment_refresh_start_time = std::chrono::high_resolution_clock::now();
360  }
361  fragment_id = chunk_key[CHUNK_KEY_FRAGMENT_IDX];
362  }
363  // Key may have been cached during scan
364  if (disk_cache_->getCachedChunkIfExists(chunk_key) == nullptr) {
365  auto column_keys = get_column_key_set(chunk_key);
366  chunk_keys_in_fragment.insert(column_keys.begin(), column_keys.end());
367  chunk_keys_to_be_cached.insert(column_keys.begin(), column_keys.end());
368  }
369  }
370  }
371  if (chunk_keys_in_fragment.size() > 0) {
372  auto required_buffers =
373  disk_cache_->getChunkBuffersForCaching(chunk_keys_in_fragment);
375  *getDataWrapper(table_key), required_buffers, optional_buffers);
376  }
377 }
std::vector< int > ChunkKey
Definition: types.h:36
bool isMetadataCached(const ChunkKey &) const
std::shared_ptr< ForeignDataWrapper > getDataWrapper(const ChunkKey &chunk_key) const
#define LOG(tag)
Definition: Logger.h:285
#define CHUNK_KEY_FRAGMENT_IDX
Definition: types.h:41
std::map< ChunkKey, AbstractBuffer * > ChunkToBufferMap
std::set< ChunkKey > get_column_key_set(const ChunkKey &destination_chunk_key)
ChunkKey get_table_key(const ChunkKey &key)
Definition: types.h:57
#define CHUNK_KEY_TABLE_IDX
Definition: types.h:39
void populateChunkBuffersSafely(ForeignDataWrapper &data_wrapper, ChunkToBufferMap &required_buffers, ChunkToBufferMap &optional_buffers)
ChunkToBufferMap getChunkBuffersForCaching(const std::set< ChunkKey > &chunk_keys) const
#define CHECK(condition)
Definition: Logger.h:291
File_Namespace::FileBuffer * getCachedChunkIfExists(const ChunkKey &)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::CachingForeignStorageMgr::refreshNonAppendTableInCache ( const ChunkKey table_key,
const std::vector< ChunkKey > &  old_chunk_keys 
)
private

Definition at line 300 of file CachingForeignStorageMgr.cpp.

References foreign_storage::ForeignStorageCache::cacheMetadataVec(), CHECK, clearTable(), disk_cache_, getChunkMetadataVecFromDataWrapper(), is_table_key(), and refreshChunksInCacheByFragment().

Referenced by refreshTableInCache().

302  {
303  CHECK(is_table_key(table_key));
304  ChunkMetadataVector storage_metadata;
305  clearTable(table_key);
306  getChunkMetadataVecFromDataWrapper(storage_metadata, table_key);
307 
308  try {
309  disk_cache_->cacheMetadataVec(storage_metadata);
310  refreshChunksInCacheByFragment(old_chunk_keys, 0);
311  } catch (std::runtime_error& e) {
313  }
314 }
bool is_table_key(const ChunkKey &key)
Definition: types.h:44
void getChunkMetadataVecFromDataWrapper(ChunkMetadataVector &chunk_metadata, const ChunkKey &chunk_key_prefix)
void refreshChunksInCacheByFragment(const std::vector< ChunkKey > &old_chunk_keys, int last_frag_id)
std::vector< std::pair< ChunkKey, std::shared_ptr< ChunkMetadata >>> ChunkMetadataVector
void cacheMetadataVec(const ChunkMetadataVector &)
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::CachingForeignStorageMgr::refreshTable ( const ChunkKey table_key,
const bool  evict_cached_entries 
)
overridevirtual

Reimplemented from foreign_storage::ForeignStorageMgr.

Definition at line 217 of file CachingForeignStorageMgr.cpp.

References CHECK, foreign_storage::ForeignStorageMgr::checkIfS3NeedsToBeEnabled(), clearTable(), foreign_storage::ForeignStorageMgr::clearTempChunkBufferMapEntriesForTable(), is_table_key(), and refreshTableInCache().

218  {
219  CHECK(is_table_key(table_key));
222  if (evict_cached_entries) {
223  clearTable(table_key);
224  } else {
225  refreshTableInCache(table_key);
226  }
227 }
bool is_table_key(const ChunkKey &key)
Definition: types.h:44
static void checkIfS3NeedsToBeEnabled(const ChunkKey &chunk_key)
void refreshTableInCache(const ChunkKey &table_key)
#define CHECK(condition)
Definition: Logger.h:291
void clearTempChunkBufferMapEntriesForTable(const ChunkKey &table_key)

+ Here is the call graph for this function:

void foreign_storage::CachingForeignStorageMgr::refreshTableInCache ( const ChunkKey table_key)
private

Definition at line 233 of file CachingForeignStorageMgr.cpp.

References CHECK, disk_cache_, foreign_storage::fragment_maps_to_leaf(), foreign_storage::ForeignStorageCache::getCachedChunksForKeyPrefix(), foreign_storage::is_append_table_chunk_key(), foreign_storage::is_shardable_key(), is_table_key(), refreshAppendTableInCache(), refreshNonAppendTableInCache(), and show_chunk().

Referenced by refreshTable().

233  {
234  CHECK(is_table_key(table_key));
235 
236  // Preserve the list of which chunks were cached per table to refresh after clear.
237  std::vector<ChunkKey> old_chunk_keys =
239 
240  // Assert all data in cache is mapped to this leaf node in distributed.
241  if (is_shardable_key(table_key)) {
242  for (auto& key : old_chunk_keys) {
243  CHECK(fragment_maps_to_leaf(key)) << show_chunk(key);
244  }
245  }
246 
247  auto append_mode = is_append_table_chunk_key(table_key);
248 
249  append_mode ? refreshAppendTableInCache(table_key, old_chunk_keys)
250  : refreshNonAppendTableInCache(table_key, old_chunk_keys);
251 }
void refreshAppendTableInCache(const ChunkKey &table_key, const std::vector< ChunkKey > &old_chunk_keys)
bool is_table_key(const ChunkKey &key)
Definition: types.h:44
bool is_append_table_chunk_key(const ChunkKey &chunk_key)
std::string show_chunk(const ChunkKey &key)
Definition: types.h:98
void refreshNonAppendTableInCache(const ChunkKey &table_key, const std::vector< ChunkKey > &old_chunk_keys)
bool fragment_maps_to_leaf(const ChunkKey &key)
bool is_shardable_key(const ChunkKey &key)
std::vector< ChunkKey > getCachedChunksForKeyPrefix(const ChunkKey &) const
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::CachingForeignStorageMgr::removeTableRelatedDS ( const int  db_id,
const int  table_id 
)
override

Definition at line 403 of file CachingForeignStorageMgr.cpp.

References foreign_storage::ForeignStorageCache::clearForTablePrefix(), disk_cache_, eraseDataWrapper(), and foreign_storage::ForeignStorageMgr::removeTableRelatedDS().

403  {
404  disk_cache_->clearForTablePrefix({db_id, table_id});
405  eraseDataWrapper({db_id, table_id});
407 }
void removeTableRelatedDS(const int db_id, const int table_id) override
void eraseDataWrapper(const ChunkKey &key) override

+ Here is the call graph for this function:

Member Data Documentation


The documentation for this class was generated from the following files: