OmniSciDB  c1a53651b2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
foreign_storage::CachingForeignStorageMgr Class Reference

#include <CachingForeignStorageMgr.h>

+ Inheritance diagram for foreign_storage::CachingForeignStorageMgr:
+ Collaboration diagram for foreign_storage::CachingForeignStorageMgr:

Public Member Functions

 CachingForeignStorageMgr (ForeignStorageCache *cache)
 
void fetchBuffer (const ChunkKey &chunk_key, AbstractBuffer *destination_buffer, const size_t num_bytes) override
 
void getChunkMetadataVecForKeyPrefix (ChunkMetadataVector &chunk_metadata, const ChunkKey &chunk_key_prefix) override
 
void getChunkMetadataVecFromDataWrapper (ChunkMetadataVector &chunk_metadata, const ChunkKey &chunk_key_prefix)
 
void removeTableRelatedDS (const int db_id, const int table_id) override
 
void refreshTable (const ChunkKey &table_key, const bool evict_cached_entries) override
 
bool createDataWrapperIfNotExists (const ChunkKey &chunk_key) override
 
bool hasStoredDataWrapper (int32_t db, int32_t tb) const
 
- Public Member Functions inherited from foreign_storage::ForeignStorageMgr
 ForeignStorageMgr ()
 
 ~ForeignStorageMgr () override
 
AbstractBuffercreateBuffer (const ChunkKey &chunk_key, const size_t page_size, const size_t initial_size) override
 
void deleteBuffer (const ChunkKey &chunk_key, const bool purge) override
 
void deleteBuffersWithPrefix (const ChunkKey &chunk_key_prefix, const bool purge) override
 
AbstractBuffergetBuffer (const ChunkKey &chunk_key, const size_t num_bytes) override
 
void fetchBuffer (const ChunkKey &chunk_key, AbstractBuffer *destination_buffer, const size_t num_bytes) override
 
AbstractBufferputBuffer (const ChunkKey &chunk_key, AbstractBuffer *source_buffer, const size_t num_bytes) override
 
void getChunkMetadataVecForKeyPrefix (ChunkMetadataVector &chunk_metadata, const ChunkKey &chunk_key_prefix) override
 
bool isBufferOnDevice (const ChunkKey &chunk_key) override
 
std::string printSlabs () override
 
size_t getMaxSize () override
 
size_t getInUseSize () override
 
size_t getAllocated () override
 
bool isAllocationCapped () override
 
void checkpoint () override
 
void checkpoint (const int db_id, const int tb_id) override
 
AbstractBufferalloc (const size_t num_bytes) override
 
void free (AbstractBuffer *buffer) override
 
MgrType getMgrType () override
 
std::string getStringMgrType () override
 
size_t getNumChunks () override
 
void removeTableRelatedDS (const int db_id, const int table_id) override
 
bool hasDataWrapperForChunk (const ChunkKey &chunk_key) const
 
bool isDatawrapperRestored (const ChunkKey &chunk_key)
 
void setDataWrapper (const ChunkKey &table_key, std::shared_ptr< MockForeignDataWrapper > data_wrapper)
 
std::shared_ptr
< ForeignDataWrapper
getDataWrapper (const ChunkKey &chunk_key) const
 
void setParallelismHints (const std::map< ChunkKey, std::set< ParallelismHint >> &hints_per_table)
 

Private Member Functions

void refreshTableInCache (const ChunkKey &table_key)
 
int getHighestCachedFragId (const ChunkKey &table_key)
 
void refreshAppendTableInCache (const ChunkKey &table_key, const std::vector< ChunkKey > &old_chunk_keys)
 
void refreshNonAppendTableInCache (const ChunkKey &table_key, const std::vector< ChunkKey > &old_chunk_keys)
 
void refreshChunksInCacheByFragment (const std::vector< ChunkKey > &old_chunk_keys, int last_frag_id)
 
void populateChunkBuffersSafely (ForeignDataWrapper &data_wrapper, ChunkToBufferMap &required_buffers, ChunkToBufferMap &optional_buffers)
 
void eraseDataWrapper (const ChunkKey &key) override
 
void clearTable (const ChunkKey &table_key)
 
size_t maxFetchSize (int32_t db_id) const override
 
bool hasMaxFetchSize () const override
 
std::set< ChunkKeygetOptionalKeysWithinSizeLimit (const ChunkKey &chunk_key, const std::set< ChunkKey, decltype(set_comp)* > &same_fragment_keys, const std::set< ChunkKey, decltype(set_comp)* > &diff_fragment_keys) const override
 
size_t getBufferSize (const ChunkKey &key) const
 
size_t getRequiredBuffersSize (const ChunkKey &chunk_key) const
 

Private Attributes

ForeignStorageCachedisk_cache_
 

Additional Inherited Members

- Public Types inherited from foreign_storage::ForeignStorageMgr
using ParallelismHint = std::pair< int, int >
 
- Protected Member Functions inherited from foreign_storage::ForeignStorageMgr
void updateFragmenterMetadata (const ChunkToBufferMap &) const
 
void createDataWrapperUnlocked (int32_t db, int32_t tb)
 
bool fetchBufferIfTempBufferMapEntryExists (const ChunkKey &chunk_key, AbstractBuffer *destination_buffer, const size_t num_bytes)
 
ChunkToBufferMap allocateTempBuffersForChunks (const std::set< ChunkKey > &chunk_keys)
 
void clearTempChunkBufferMapEntriesForTable (const ChunkKey &table_key)
 
void clearTempChunkBufferMapEntriesForTableUnlocked (const ChunkKey &table_key)
 
std::set< ChunkKeygetOptionalChunkKeySet (const ChunkKey &chunk_key, const std::set< ChunkKey > &required_chunk_keys, const ForeignDataWrapper::ParallelismLevel parallelism_level) const
 
std::pair< std::set< ChunkKey,
decltype(set_comp)* >
, std::set< ChunkKey, decltype(set_comp)* > > 
getPrefetchSets (const ChunkKey &chunk_key, const std::set< ChunkKey > &required_chunk_keys, const ForeignDataWrapper::ParallelismLevel parallelism_level) const
 
- Static Protected Member Functions inherited from foreign_storage::ForeignStorageMgr
static void checkIfS3NeedsToBeEnabled (const ChunkKey &chunk_key)
 
- Protected Attributes inherited from foreign_storage::ForeignStorageMgr
std::shared_mutex data_wrapper_mutex_
 
std::map< ChunkKey,
std::shared_ptr
< ForeignDataWrapper > > 
data_wrapper_map_
 
std::map< ChunkKey,
std::shared_ptr
< MockForeignDataWrapper > > 
mocked_wrapper_map_
 
std::map< ChunkKey,
std::unique_ptr
< AbstractBuffer > > 
temp_chunk_buffer_map_
 
std::shared_mutex temp_chunk_buffer_map_mutex_
 
std::shared_mutex parallelism_hints_mutex_
 
std::map< ChunkKey, std::set
< ParallelismHint > > 
parallelism_hints_per_table_
 

Detailed Description

Definition at line 27 of file CachingForeignStorageMgr.h.

Constructor & Destructor Documentation

foreign_storage::CachingForeignStorageMgr::CachingForeignStorageMgr ( ForeignStorageCache cache)

Definition at line 37 of file CachingForeignStorageMgr.cpp.

References CHECK, and disk_cache_.

38  : ForeignStorageMgr(), disk_cache_(cache) {
40 }
#define CHECK(condition)
Definition: Logger.h:291

Member Function Documentation

void foreign_storage::CachingForeignStorageMgr::clearTable ( const ChunkKey table_key)
private

Definition at line 253 of file CachingForeignStorageMgr.cpp.

References CHECK, foreign_storage::ForeignStorageCache::clearForTablePrefix(), disk_cache_, foreign_storage::ForeignStorageMgr::eraseDataWrapper(), and foreign_storage::ForeignStorageCache::hasCachedMetadataForKeyPrefix().

Referenced by getChunkMetadataVecFromDataWrapper(), refreshNonAppendTableInCache(), and refreshTable().

253  {
254  disk_cache_->clearForTablePrefix(table_key);
257 }
virtual void eraseDataWrapper(const ChunkKey &table_key)
#define CHECK(condition)
Definition: Logger.h:291
bool hasCachedMetadataForKeyPrefix(const ChunkKey &) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool foreign_storage::CachingForeignStorageMgr::createDataWrapperIfNotExists ( const ChunkKey chunk_key)
overridevirtual

Reimplemented from foreign_storage::ForeignStorageMgr.

Definition at line 375 of file CachingForeignStorageMgr.cpp.

References foreign_storage::ForeignStorageMgr::createDataWrapperUnlocked(), foreign_storage::ForeignStorageMgr::data_wrapper_map_, foreign_storage::ForeignStorageMgr::data_wrapper_mutex_, disk_cache_, get_table_key(), get_table_prefix(), foreign_storage::ForeignStorageCache::getCachedMetadataVecForKeyPrefix(), and foreign_storage::ForeignStorageCache::getSerializedWrapperPath().

Referenced by getChunkMetadataVecForKeyPrefix().

375  {
376  std::lock_guard data_wrapper_lock(data_wrapper_mutex_);
377  ChunkKey table_key = get_table_key(chunk_key);
378  auto data_wrapper_it = data_wrapper_map_.find(table_key);
379  if (data_wrapper_it != data_wrapper_map_.end()) {
380  return false;
381  }
382  auto [db, tb] = get_table_prefix(chunk_key);
384  auto wrapper_file = disk_cache_->getSerializedWrapperPath(db, tb);
385  if (boost::filesystem::exists(wrapper_file)) {
386  ChunkMetadataVector chunk_metadata;
387  disk_cache_->getCachedMetadataVecForKeyPrefix(chunk_metadata, table_key);
388  data_wrapper_map_.at(table_key)->restoreDataWrapperInternals(
389  disk_cache_->getSerializedWrapperPath(db, tb), chunk_metadata);
390  }
391  return true;
392 }
std::lock_guard< T > lock_guard
std::vector< int > ChunkKey
Definition: types.h:36
ChunkKey get_table_key(const ChunkKey &key)
Definition: types.h:57
void getCachedMetadataVecForKeyPrefix(ChunkMetadataVector &, const ChunkKey &) const
std::vector< std::pair< ChunkKey, std::shared_ptr< ChunkMetadata >>> ChunkMetadataVector
std::string getSerializedWrapperPath(int32_t db_id, int32_t tb_id) const
std::map< ChunkKey, std::shared_ptr< ForeignDataWrapper > > data_wrapper_map_
std::pair< int, int > get_table_prefix(const ChunkKey &key)
Definition: types.h:62
void createDataWrapperUnlocked(int32_t db, int32_t tb)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::CachingForeignStorageMgr::eraseDataWrapper ( const ChunkKey key)
overrideprivatevirtual

Reimplemented from foreign_storage::ForeignStorageMgr.

Definition at line 240 of file CachingForeignStorageMgr.cpp.

References CHECK, foreign_storage::ForeignStorageMgr::data_wrapper_map_, foreign_storage::ForeignStorageMgr::data_wrapper_mutex_, disk_cache_, get_table_prefix(), foreign_storage::ForeignStorageCache::getSerializedWrapperPath(), and is_table_key().

Referenced by getChunkMetadataVecForKeyPrefix().

240  {
241  CHECK(is_table_key(key));
242  std::lock_guard data_wrapper_lock(data_wrapper_mutex_);
243  // May not be created yet
244  if (data_wrapper_map_.find(key) != data_wrapper_map_.end()) {
245  auto [db, tb] = get_table_prefix(key);
246  // Need to erase serialized version on disk if it exists so we don't accidentally
247  // recover it after deleting.
248  boost::filesystem::remove_all(disk_cache_->getSerializedWrapperPath(db, tb));
249  data_wrapper_map_.erase(key);
250  }
251 }
std::lock_guard< T > lock_guard
bool is_table_key(const ChunkKey &key)
Definition: types.h:44
std::string getSerializedWrapperPath(int32_t db_id, int32_t tb_id) const
std::map< ChunkKey, std::shared_ptr< ForeignDataWrapper > > data_wrapper_map_
std::pair< int, int > get_table_prefix(const ChunkKey &key)
Definition: types.h:62
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::CachingForeignStorageMgr::fetchBuffer ( const ChunkKey chunk_key,
AbstractBuffer destination_buffer,
const size_t  num_bytes 
)
override

Definition at line 82 of file CachingForeignStorageMgr.cpp.

References CHECK, CHUNK_KEY_DB_IDX, Data_Namespace::AbstractBuffer::copyTo(), disk_cache_, foreign_storage::ForeignStorageMgr::fetchBuffer(), foreign_storage::get_column_key_set(), foreign_storage::ForeignStorageCache::getCachedChunkIfExists(), foreign_storage::ForeignStorageCache::getChunkBuffersForCaching(), foreign_storage::ForeignStorageMgr::getDataWrapper(), foreign_storage::ForeignStorageMgr::getOptionalChunkKeySet(), getRequiredBuffersSize(), foreign_storage::anonymous_namespace{CachingForeignStorageMgr.cpp}::is_in_memory_system_table_chunk_key(), Data_Namespace::AbstractBuffer::isDirty(), maxFetchSize(), populateChunkBuffersSafely(), and foreign_storage::ChunkSizeValidator::validateChunkSize().

84  {
85  ChunkSizeValidator chunk_size_validator(chunk_key);
86  if (is_in_memory_system_table_chunk_key(chunk_key)) {
87  ForeignStorageMgr::fetchBuffer(chunk_key, destination_buffer, num_bytes);
88  chunk_size_validator.validateChunkSize(destination_buffer);
89  return;
90  }
91  CHECK(destination_buffer);
92  CHECK(!destination_buffer->isDirty());
93 
95  if (buffer) {
96  chunk_size_validator.validateChunkSize(buffer);
97  buffer->copyTo(destination_buffer, num_bytes);
98  return;
99  } else {
100  auto required_size = getRequiredBuffersSize(chunk_key);
101  if (required_size > maxFetchSize(chunk_key[CHUNK_KEY_DB_IDX])) {
102  // If we don't have space in the cache then skip the caching.
103  ForeignStorageMgr::fetchBuffer(chunk_key, destination_buffer, num_bytes);
104  return;
105  }
106 
107  auto column_keys = get_column_key_set(chunk_key);
108 
109  // Use hints to prefetch other chunks in fragment into cache
110  auto& data_wrapper = *getDataWrapper(chunk_key);
111  auto optional_set = getOptionalChunkKeySet(
112  chunk_key, column_keys, data_wrapper.getCachedParallelismLevel());
113 
114  // Remove any chunks that are already cached.
115  // TODO(Misiu): Change to use std::erase_if when we get c++20
116  for (auto it = optional_set.begin(); it != optional_set.end();) {
117  if (disk_cache_->getCachedChunkIfExists(*it) != nullptr) {
118  it = optional_set.erase(it);
119  } else {
120  ++it;
121  }
122  }
123 
124  auto optional_buffers = disk_cache_->getChunkBuffersForCaching(optional_set);
125  auto required_buffers = disk_cache_->getChunkBuffersForCaching(column_keys);
126  CHECK(required_buffers.find(chunk_key) != required_buffers.end());
127  populateChunkBuffersSafely(data_wrapper, required_buffers, optional_buffers);
128 
129  AbstractBuffer* buffer = required_buffers.at(chunk_key);
130  CHECK(buffer);
131  buffer->copyTo(destination_buffer, num_bytes);
132  }
133 }
std::shared_ptr< ForeignDataWrapper > getDataWrapper(const ChunkKey &chunk_key) const
#define CHUNK_KEY_DB_IDX
Definition: types.h:38
size_t maxFetchSize(int32_t db_id) const override
std::set< ChunkKey > get_column_key_set(const ChunkKey &destination_chunk_key)
std::set< ChunkKey > getOptionalChunkKeySet(const ChunkKey &chunk_key, const std::set< ChunkKey > &required_chunk_keys, const ForeignDataWrapper::ParallelismLevel parallelism_level) const
void fetchBuffer(const ChunkKey &chunk_key, AbstractBuffer *destination_buffer, const size_t num_bytes) override
An AbstractBuffer is a unit of data management for a data manager.
void populateChunkBuffersSafely(ForeignDataWrapper &data_wrapper, ChunkToBufferMap &required_buffers, ChunkToBufferMap &optional_buffers)
ChunkToBufferMap getChunkBuffersForCaching(const std::set< ChunkKey > &chunk_keys) const
void copyTo(AbstractBuffer *destination_buffer, const size_t num_bytes=0)
#define CHECK(condition)
Definition: Logger.h:291
File_Namespace::FileBuffer * getCachedChunkIfExists(const ChunkKey &)
size_t getRequiredBuffersSize(const ChunkKey &chunk_key) const

+ Here is the call graph for this function:

size_t foreign_storage::CachingForeignStorageMgr::getBufferSize ( const ChunkKey key) const
private

Definition at line 443 of file CachingForeignStorageMgr.cpp.

References CHECK_EQ, disk_cache_, get_fragment_key(), foreign_storage::get_max_chunk_size(), foreign_storage::ForeignStorageCache::getCachedMetadataVecForKeyPrefix(), is_varlen_data_key(), is_varlen_key(), and show_chunk().

Referenced by getOptionalKeysWithinSizeLimit(), and getRequiredBuffersSize().

443  {
444  size_t num_bytes = 0;
445  ChunkMetadataVector meta;
447  CHECK_EQ(meta.size(), 1U) << show_chunk(key);
448  auto metadata = meta.begin()->second;
449 
450  if (is_varlen_key(key)) {
451  if (is_varlen_data_key(key)) {
452  num_bytes = get_max_chunk_size(key);
453  } else {
454  num_bytes = (metadata->sqlType.is_string())
455  ? sizeof(StringOffsetT) * (metadata->numElements + 1)
456  : sizeof(ArrayOffsetT) * (metadata->numElements + 1);
457  }
458  } else {
459  num_bytes = metadata->numBytes;
460  }
461  return num_bytes;
462 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
bool is_varlen_data_key(const ChunkKey &key)
Definition: types.h:75
std::string show_chunk(const ChunkKey &key)
Definition: types.h:98
int32_t StringOffsetT
Definition: sqltypes.h:1258
void getCachedMetadataVecForKeyPrefix(ChunkMetadataVector &, const ChunkKey &) const
std::vector< std::pair< ChunkKey, std::shared_ptr< ChunkMetadata >>> ChunkMetadataVector
size_t get_max_chunk_size(const ChunkKey &key)
int32_t ArrayOffsetT
Definition: sqltypes.h:1259
ChunkKey get_fragment_key(const ChunkKey &key)
Definition: types.h:90
bool is_varlen_key(const ChunkKey &key)
Definition: types.h:71

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::CachingForeignStorageMgr::getChunkMetadataVecForKeyPrefix ( ChunkMetadataVector chunk_metadata,
const ChunkKey chunk_key_prefix 
)
override

Definition at line 135 of file CachingForeignStorageMgr.cpp.

References foreign_storage::ForeignStorageCache::cacheMetadataVec(), CHECK, CHUNK_KEY_DB_IDX, CHUNK_KEY_TABLE_IDX, createDataWrapperIfNotExists(), disk_cache_, eraseDataWrapper(), foreign_storage::fragment_maps_to_leaf(), get_table_key(), foreign_storage::ForeignStorageCache::getCachedMetadataVecForKeyPrefix(), foreign_storage::ForeignStorageMgr::getChunkMetadataVecForKeyPrefix(), getChunkMetadataVecFromDataWrapper(), has_table_prefix(), foreign_storage::ForeignStorageCache::hasCachedMetadataForKeyPrefix(), foreign_storage::ForeignStorageCache::hasStoredDataWrapperMetadata(), dist::is_distributed(), foreign_storage::anonymous_namespace{CachingForeignStorageMgr.cpp}::is_in_memory_system_table_chunk_key(), foreign_storage::is_shardable_key(), and show_chunk().

137  {
138  if (is_in_memory_system_table_chunk_key(key_prefix)) {
139  ForeignStorageMgr::getChunkMetadataVecForKeyPrefix(chunk_metadata, key_prefix);
140  return;
141  }
142  CHECK(has_table_prefix(key_prefix));
143  // If the disk has any cached metadata for a prefix then it is guaranteed to have all
144  // metadata for that table, so we can return a complete set. If it has no metadata,
145  // then it may be that the table has no data, or that it's just not cached, so we need
146  // to go to storage to check.
147  if (disk_cache_->hasCachedMetadataForKeyPrefix(key_prefix)) {
148  disk_cache_->getCachedMetadataVecForKeyPrefix(chunk_metadata, key_prefix);
149 
150  // Assert all metadata in cache is mapped to this leaf node in distributed.
151  if (is_shardable_key(key_prefix)) {
152  for (auto& [key, meta] : chunk_metadata) {
153  CHECK(fragment_maps_to_leaf(key)) << show_chunk(key);
154  }
155  }
156 
157  // If the data in cache was restored from disk then it is possible that the wrapper
158  // does not exist yet. In this case the wrapper will be restored from disk if
159  // possible.
160  createDataWrapperIfNotExists(key_prefix);
161  return;
162  } else if (dist::is_distributed() &&
164  key_prefix[CHUNK_KEY_TABLE_IDX])) {
165  // In distributed mode, it is possible to have all the chunk metadata filtered out for
166  // this node, after previously getting the chunk metadata from the wrapper and caching
167  // the wrapper metadata. In this case, return immediately and avoid doing a redundant
168  // metadata scan.
169  return;
170  }
171 
172  // If we have no cached data then either the data was evicted, was never populated, or
173  // the data for the table is an empty set (no chunks). In case we are hitting the first
174  // two, we should repopulate the data wrapper so just do it in all cases.
175  auto table_key = get_table_key(key_prefix);
176  eraseDataWrapper(table_key);
177  createDataWrapperIfNotExists(table_key);
178 
179  getChunkMetadataVecFromDataWrapper(chunk_metadata, key_prefix);
180  disk_cache_->cacheMetadataVec(chunk_metadata);
181 }
#define CHUNK_KEY_DB_IDX
Definition: types.h:38
ChunkKey get_table_key(const ChunkKey &key)
Definition: types.h:57
std::string show_chunk(const ChunkKey &key)
Definition: types.h:98
void eraseDataWrapper(const ChunkKey &key) override
void getChunkMetadataVecFromDataWrapper(ChunkMetadataVector &chunk_metadata, const ChunkKey &chunk_key_prefix)
void getCachedMetadataVecForKeyPrefix(ChunkMetadataVector &, const ChunkKey &) const
bool hasStoredDataWrapperMetadata(int32_t db_id, int32_t table_id) const
bool createDataWrapperIfNotExists(const ChunkKey &chunk_key) override
#define CHUNK_KEY_TABLE_IDX
Definition: types.h:39
bool has_table_prefix(const ChunkKey &key)
Definition: types.h:48
bool fragment_maps_to_leaf(const ChunkKey &key)
bool is_shardable_key(const ChunkKey &key)
void cacheMetadataVec(const ChunkMetadataVector &)
void getChunkMetadataVecForKeyPrefix(ChunkMetadataVector &chunk_metadata, const ChunkKey &chunk_key_prefix) override
#define CHECK(condition)
Definition: Logger.h:291
bool hasCachedMetadataForKeyPrefix(const ChunkKey &) const
bool is_distributed()
Definition: distributed.cpp:21

+ Here is the call graph for this function:

void foreign_storage::CachingForeignStorageMgr::getChunkMetadataVecFromDataWrapper ( ChunkMetadataVector chunk_metadata,
const ChunkKey chunk_key_prefix 
)

Definition at line 183 of file CachingForeignStorageMgr.cpp.

References CHECK, foreign_storage::ForeignStorageCache::checkpoint(), clearTable(), disk_cache_, get_table_prefix(), foreign_storage::ForeignStorageMgr::getChunkMetadataVecForKeyPrefix(), foreign_storage::ForeignStorageMgr::getDataWrapper(), has_table_prefix(), foreign_storage::is_table_enabled_on_node(), and foreign_storage::ForeignStorageCache::storeDataWrapper().

Referenced by getChunkMetadataVecForKeyPrefix(), refreshAppendTableInCache(), and refreshNonAppendTableInCache().

185  {
186  CHECK(has_table_prefix(chunk_key_prefix));
187  auto [db_id, tb_id] = get_table_prefix(chunk_key_prefix);
188  try {
189  ForeignStorageMgr::getChunkMetadataVecForKeyPrefix(chunk_metadata, chunk_key_prefix);
190  } catch (...) {
191  clearTable({db_id, tb_id});
192  throw;
193  }
194  // If the table was disabled then we will have no wrapper to serialize.
195  if (is_table_enabled_on_node(chunk_key_prefix)) {
196  auto doc = getDataWrapper(chunk_key_prefix)->getSerializedDataWrapper();
197  disk_cache_->storeDataWrapper(doc, db_id, tb_id);
198 
199  // If the wrapper populated buffers we want that action to be checkpointed.
200  disk_cache_->checkpoint(db_id, tb_id);
201  }
202 }
std::shared_ptr< ForeignDataWrapper > getDataWrapper(const ChunkKey &chunk_key) const
void storeDataWrapper(const std::string &doc, int32_t db_id, int32_t tb_id)
bool is_table_enabled_on_node(const ChunkKey &key)
bool has_table_prefix(const ChunkKey &key)
Definition: types.h:48
void getChunkMetadataVecForKeyPrefix(ChunkMetadataVector &chunk_metadata, const ChunkKey &chunk_key_prefix) override
std::pair< int, int > get_table_prefix(const ChunkKey &key)
Definition: types.h:62
void checkpoint(const int32_t db_id, const int32_t tb_id)
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int foreign_storage::CachingForeignStorageMgr::getHighestCachedFragId ( const ChunkKey table_key)
private

Definition at line 259 of file CachingForeignStorageMgr.cpp.

References CHUNK_KEY_FRAGMENT_IDX, disk_cache_, foreign_storage::ForeignStorageCache::getCachedMetadataVecForKeyPrefix(), and foreign_storage::ForeignStorageCache::hasCachedMetadataForKeyPrefix().

Referenced by refreshAppendTableInCache().

259  {
260  // Determine last fragment ID
261  int last_frag_id = 0;
262  if (disk_cache_->hasCachedMetadataForKeyPrefix(table_key)) {
263  ChunkMetadataVector cached_metadata;
264  disk_cache_->getCachedMetadataVecForKeyPrefix(cached_metadata, table_key);
265  for (const auto& [key, metadata] : cached_metadata) {
266  last_frag_id = std::max(last_frag_id, key[CHUNK_KEY_FRAGMENT_IDX]);
267  }
268  }
269  return last_frag_id;
270 }
#define CHUNK_KEY_FRAGMENT_IDX
Definition: types.h:41
void getCachedMetadataVecForKeyPrefix(ChunkMetadataVector &, const ChunkKey &) const
std::vector< std::pair< ChunkKey, std::shared_ptr< ChunkMetadata >>> ChunkMetadataVector
bool hasCachedMetadataForKeyPrefix(const ChunkKey &) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::set< ChunkKey > foreign_storage::CachingForeignStorageMgr::getOptionalKeysWithinSizeLimit ( const ChunkKey chunk_key,
const std::set< ChunkKey, decltype(set_comp)* > &  same_fragment_keys,
const std::set< ChunkKey, decltype(set_comp)* > &  diff_fragment_keys 
) const
overrideprivatevirtual

Reimplemented from foreign_storage::ForeignStorageMgr.

Definition at line 416 of file CachingForeignStorageMgr.cpp.

References CHUNK_KEY_DB_IDX, foreign_storage::get_column_key_set(), getBufferSize(), getRequiredBuffersSize(), and maxFetchSize().

419  {
420  std::set<ChunkKey> optional_keys;
421  auto total_chunk_size = getRequiredBuffersSize(chunk_key);
422  auto max_size = maxFetchSize(chunk_key[CHUNK_KEY_DB_IDX]);
423  // Add keys to the list of optional keys starting with the same fragment. If we run out
424  // of space, then exit early with what we have added so far.
425  for (const auto& keys : {same_fragment_keys, diff_fragment_keys}) {
426  for (const auto& key : keys) {
427  auto column_keys = get_column_key_set(key);
428  for (const auto& column_key : column_keys) {
429  total_chunk_size += getBufferSize(column_key);
430  }
431  // Early exist if we exceed the size limit.
432  if (total_chunk_size > max_size) {
433  return optional_keys;
434  }
435  for (const auto& column_key : column_keys) {
436  optional_keys.emplace(column_key);
437  }
438  }
439  }
440  return optional_keys;
441 }
size_t getBufferSize(const ChunkKey &key) const
#define CHUNK_KEY_DB_IDX
Definition: types.h:38
size_t maxFetchSize(int32_t db_id) const override
std::set< ChunkKey > get_column_key_set(const ChunkKey &destination_chunk_key)
size_t getRequiredBuffersSize(const ChunkKey &chunk_key) const

+ Here is the call graph for this function:

size_t foreign_storage::CachingForeignStorageMgr::getRequiredBuffersSize ( const ChunkKey chunk_key) const
private

Definition at line 407 of file CachingForeignStorageMgr.cpp.

References foreign_storage::get_column_key_set(), and getBufferSize().

Referenced by fetchBuffer(), and getOptionalKeysWithinSizeLimit().

407  {
408  auto key_set = get_column_key_set(chunk_key);
409  size_t total_size = 0U;
410  for (const auto& key : key_set) {
411  total_size += getBufferSize(key);
412  }
413  return total_size;
414 }
size_t getBufferSize(const ChunkKey &key) const
std::set< ChunkKey > get_column_key_set(const ChunkKey &destination_chunk_key)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool foreign_storage::CachingForeignStorageMgr::hasMaxFetchSize ( ) const
overrideprivatevirtual

Reimplemented from foreign_storage::ForeignStorageMgr.

Definition at line 403 of file CachingForeignStorageMgr.cpp.

403  {
404  return true;
405 }
bool foreign_storage::CachingForeignStorageMgr::hasStoredDataWrapper ( int32_t  db,
int32_t  tb 
) const

Definition at line 216 of file CachingForeignStorageMgr.cpp.

References disk_cache_, and foreign_storage::ForeignStorageCache::hasStoredDataWrapperMetadata().

216  {
218 }
bool hasStoredDataWrapperMetadata(int32_t db_id, int32_t table_id) const

+ Here is the call graph for this function:

size_t foreign_storage::CachingForeignStorageMgr::maxFetchSize ( int32_t  db_id) const
overrideprivatevirtual

Reimplemented from foreign_storage::ForeignStorageMgr.

Definition at line 399 of file CachingForeignStorageMgr.cpp.

References disk_cache_, and foreign_storage::ForeignStorageCache::getMaxChunkDataSize().

Referenced by fetchBuffer(), and getOptionalKeysWithinSizeLimit().

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::CachingForeignStorageMgr::populateChunkBuffersSafely ( ForeignDataWrapper data_wrapper,
ChunkToBufferMap required_buffers,
ChunkToBufferMap optional_buffers 
)
private

Definition at line 42 of file CachingForeignStorageMgr.cpp.

References CHECK_GT, foreign_storage::ForeignStorageCache::checkpoint(), disk_cache_, get_table_prefix(), foreign_storage::ForeignDataWrapper::populateChunkBuffers(), and foreign_storage::ForeignStorageMgr::updateFragmenterMetadata().

Referenced by fetchBuffer(), and refreshChunksInCacheByFragment().

45  {
46  CHECK_GT(required_buffers.size(), 0U) << "Must populate at least one buffer";
47  try {
48  ChunkSizeValidator chunk_size_validator(required_buffers.begin()->first);
49  data_wrapper.populateChunkBuffers(required_buffers, optional_buffers);
50  chunk_size_validator.validateChunkSizes(required_buffers);
51  chunk_size_validator.validateChunkSizes(optional_buffers);
52  updateFragmenterMetadata(required_buffers);
53  updateFragmenterMetadata(optional_buffers);
54  } catch (const std::runtime_error& error) {
55  // clear any partially loaded but failed chunks (there may be some
56  // fully-loaded chunks as well but they will be cleared conservatively
57  // anyways)
58  for (const auto& [chunk_key, buffer] : required_buffers) {
59  if (auto file_buffer = dynamic_cast<File_Namespace::FileBuffer*>(buffer)) {
60  file_buffer->freeChunkPages();
61  }
62  }
63  for (const auto& [chunk_key, buffer] : optional_buffers) {
64  if (auto file_buffer = dynamic_cast<File_Namespace::FileBuffer*>(buffer)) {
65  file_buffer->freeChunkPages();
66  }
67  }
68 
69  throw ForeignStorageException(error.what());
70  }
71  // All required buffers should be from the same table.
72  auto [db, tb] = get_table_prefix(required_buffers.begin()->first);
73  disk_cache_->checkpoint(db, tb);
74 }
#define CHECK_GT(x, y)
Definition: Logger.h:305
void updateFragmenterMetadata(const ChunkToBufferMap &) const
std::pair< int, int > get_table_prefix(const ChunkKey &key)
Definition: types.h:62
void checkpoint(const int32_t db_id, const int32_t tb_id)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::CachingForeignStorageMgr::refreshAppendTableInCache ( const ChunkKey table_key,
const std::vector< ChunkKey > &  old_chunk_keys 
)
private

Definition at line 272 of file CachingForeignStorageMgr.cpp.

References foreign_storage::ForeignStorageCache::cacheMetadataVec(), CHECK, disk_cache_, getChunkMetadataVecFromDataWrapper(), getHighestCachedFragId(), is_table_key(), and refreshChunksInCacheByFragment().

Referenced by refreshTableInCache().

274  {
275  CHECK(is_table_key(table_key));
276  int last_frag_id = getHighestCachedFragId(table_key);
277 
278  ChunkMetadataVector storage_metadata;
279  getChunkMetadataVecFromDataWrapper(storage_metadata, table_key);
280  try {
281  disk_cache_->cacheMetadataVec(storage_metadata);
282  refreshChunksInCacheByFragment(old_chunk_keys, last_frag_id);
283  } catch (std::runtime_error& e) {
285  }
286 }
bool is_table_key(const ChunkKey &key)
Definition: types.h:44
void getChunkMetadataVecFromDataWrapper(ChunkMetadataVector &chunk_metadata, const ChunkKey &chunk_key_prefix)
void refreshChunksInCacheByFragment(const std::vector< ChunkKey > &old_chunk_keys, int last_frag_id)
std::vector< std::pair< ChunkKey, std::shared_ptr< ChunkMetadata >>> ChunkMetadataVector
void cacheMetadataVec(const ChunkMetadataVector &)
#define CHECK(condition)
Definition: Logger.h:291
int getHighestCachedFragId(const ChunkKey &table_key)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::CachingForeignStorageMgr::refreshChunksInCacheByFragment ( const std::vector< ChunkKey > &  old_chunk_keys,
int  last_frag_id 
)
private

Definition at line 304 of file CachingForeignStorageMgr.cpp.

References CHECK, CHUNK_KEY_COLUMN_IDX, CHUNK_KEY_DB_IDX, CHUNK_KEY_FRAGMENT_IDX, CHUNK_KEY_TABLE_IDX, disk_cache_, get_table_key(), foreign_storage::ForeignStorageCache::getCachedChunkIfExists(), foreign_storage::ForeignStorageCache::getChunkBuffersForCaching(), foreign_storage::ForeignStorageMgr::getDataWrapper(), is_varlen_data_key(), is_varlen_key(), foreign_storage::ForeignStorageCache::isMetadataCached(), LOG, foreign_storage::anonymous_namespace{CachingForeignStorageMgr.cpp}::MAX_REFRESH_TIME_IN_SECONDS, populateChunkBuffersSafely(), and logger::WARNING.

Referenced by refreshAppendTableInCache(), and refreshNonAppendTableInCache().

306  {
307  int64_t total_time{0};
308  auto fragment_refresh_start_time = std::chrono::high_resolution_clock::now();
309 
310  if (old_chunk_keys.empty()) {
311  return;
312  }
313  // Iterate through previously cached chunks and re-cache them. Caching is
314  // done one fragment at a time, for all applicable chunks in the fragment.
315  ChunkToBufferMap optional_buffers;
316  std::set<ChunkKey> chunk_keys_to_be_cached;
317  auto fragment_id = old_chunk_keys[0][CHUNK_KEY_FRAGMENT_IDX];
318  const ChunkKey table_key{get_table_key(old_chunk_keys[0])};
319  std::set<ChunkKey> chunk_keys_in_fragment;
320  for (const auto& chunk_key : old_chunk_keys) {
321  CHECK(chunk_key[CHUNK_KEY_TABLE_IDX] == table_key[CHUNK_KEY_TABLE_IDX]);
322  if (chunk_key[CHUNK_KEY_FRAGMENT_IDX] < start_frag_id) {
323  continue;
324  }
325  if (disk_cache_->isMetadataCached(chunk_key)) {
326  if (chunk_key[CHUNK_KEY_FRAGMENT_IDX] != fragment_id) {
327  if (chunk_keys_in_fragment.size() > 0) {
328  auto required_buffers =
329  disk_cache_->getChunkBuffersForCaching(chunk_keys_in_fragment);
331  *getDataWrapper(table_key), required_buffers, optional_buffers);
332  chunk_keys_in_fragment.clear();
333  }
334  // At this point, cache buffers for refreshable chunks in the last fragment
335  // have been populated. Exit if the max refresh time has been exceeded.
336  // Otherwise, move to the next fragment.
337  auto current_time = std::chrono::high_resolution_clock::now();
338  total_time += std::chrono::duration_cast<std::chrono::seconds>(
339  current_time - fragment_refresh_start_time)
340  .count();
341  if (total_time >= MAX_REFRESH_TIME_IN_SECONDS) {
342  LOG(WARNING) << "Refresh time exceeded for table key: { " << table_key[0]
343  << ", " << table_key[1] << " } after fragment id: " << fragment_id;
344  break;
345  } else {
346  fragment_refresh_start_time = std::chrono::high_resolution_clock::now();
347  }
348  fragment_id = chunk_key[CHUNK_KEY_FRAGMENT_IDX];
349  }
350  // Key may have been cached during scan
351  if (disk_cache_->getCachedChunkIfExists(chunk_key) == nullptr) {
352  if (is_varlen_key(chunk_key)) {
353  CHECK(is_varlen_data_key(chunk_key));
354  ChunkKey index_chunk_key{chunk_key[CHUNK_KEY_DB_IDX],
355  chunk_key[CHUNK_KEY_TABLE_IDX],
356  chunk_key[CHUNK_KEY_COLUMN_IDX],
357  chunk_key[CHUNK_KEY_FRAGMENT_IDX],
358  2};
359  chunk_keys_in_fragment.emplace(index_chunk_key);
360  chunk_keys_to_be_cached.emplace(index_chunk_key);
361  }
362  chunk_keys_in_fragment.emplace(chunk_key);
363  chunk_keys_to_be_cached.emplace(chunk_key);
364  }
365  }
366  }
367  if (chunk_keys_in_fragment.size() > 0) {
368  auto required_buffers =
369  disk_cache_->getChunkBuffersForCaching(chunk_keys_in_fragment);
371  *getDataWrapper(table_key), required_buffers, optional_buffers);
372  }
373 }
std::vector< int > ChunkKey
Definition: types.h:36
bool isMetadataCached(const ChunkKey &) const
bool is_varlen_data_key(const ChunkKey &key)
Definition: types.h:75
std::shared_ptr< ForeignDataWrapper > getDataWrapper(const ChunkKey &chunk_key) const
#define LOG(tag)
Definition: Logger.h:285
#define CHUNK_KEY_DB_IDX
Definition: types.h:38
#define CHUNK_KEY_FRAGMENT_IDX
Definition: types.h:41
std::map< ChunkKey, AbstractBuffer * > ChunkToBufferMap
ChunkKey get_table_key(const ChunkKey &key)
Definition: types.h:57
#define CHUNK_KEY_TABLE_IDX
Definition: types.h:39
void populateChunkBuffersSafely(ForeignDataWrapper &data_wrapper, ChunkToBufferMap &required_buffers, ChunkToBufferMap &optional_buffers)
ChunkToBufferMap getChunkBuffersForCaching(const std::set< ChunkKey > &chunk_keys) const
#define CHECK(condition)
Definition: Logger.h:291
File_Namespace::FileBuffer * getCachedChunkIfExists(const ChunkKey &)
#define CHUNK_KEY_COLUMN_IDX
Definition: types.h:40
bool is_varlen_key(const ChunkKey &key)
Definition: types.h:71

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::CachingForeignStorageMgr::refreshNonAppendTableInCache ( const ChunkKey table_key,
const std::vector< ChunkKey > &  old_chunk_keys 
)
private

Definition at line 288 of file CachingForeignStorageMgr.cpp.

References foreign_storage::ForeignStorageCache::cacheMetadataVec(), CHECK, clearTable(), disk_cache_, getChunkMetadataVecFromDataWrapper(), is_table_key(), and refreshChunksInCacheByFragment().

Referenced by refreshTableInCache().

290  {
291  CHECK(is_table_key(table_key));
292  ChunkMetadataVector storage_metadata;
293  clearTable(table_key);
294  getChunkMetadataVecFromDataWrapper(storage_metadata, table_key);
295 
296  try {
297  disk_cache_->cacheMetadataVec(storage_metadata);
298  refreshChunksInCacheByFragment(old_chunk_keys, 0);
299  } catch (std::runtime_error& e) {
301  }
302 }
bool is_table_key(const ChunkKey &key)
Definition: types.h:44
void getChunkMetadataVecFromDataWrapper(ChunkMetadataVector &chunk_metadata, const ChunkKey &chunk_key_prefix)
void refreshChunksInCacheByFragment(const std::vector< ChunkKey > &old_chunk_keys, int last_frag_id)
std::vector< std::pair< ChunkKey, std::shared_ptr< ChunkMetadata >>> ChunkMetadataVector
void cacheMetadataVec(const ChunkMetadataVector &)
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::CachingForeignStorageMgr::refreshTable ( const ChunkKey table_key,
const bool  evict_cached_entries 
)
overridevirtual

Reimplemented from foreign_storage::ForeignStorageMgr.

Definition at line 204 of file CachingForeignStorageMgr.cpp.

References CHECK, foreign_storage::ForeignStorageMgr::checkIfS3NeedsToBeEnabled(), clearTable(), foreign_storage::ForeignStorageMgr::clearTempChunkBufferMapEntriesForTable(), is_table_key(), and refreshTableInCache().

205  {
206  CHECK(is_table_key(table_key));
209  if (evict_cached_entries) {
210  clearTable(table_key);
211  } else {
212  refreshTableInCache(table_key);
213  }
214 }
bool is_table_key(const ChunkKey &key)
Definition: types.h:44
static void checkIfS3NeedsToBeEnabled(const ChunkKey &chunk_key)
void refreshTableInCache(const ChunkKey &table_key)
#define CHECK(condition)
Definition: Logger.h:291
void clearTempChunkBufferMapEntriesForTable(const ChunkKey &table_key)

+ Here is the call graph for this function:

void foreign_storage::CachingForeignStorageMgr::refreshTableInCache ( const ChunkKey table_key)
private

Definition at line 220 of file CachingForeignStorageMgr.cpp.

References CHECK, disk_cache_, foreign_storage::fragment_maps_to_leaf(), foreign_storage::ForeignStorageCache::getCachedChunksForKeyPrefix(), foreign_storage::is_append_table_chunk_key(), foreign_storage::is_shardable_key(), is_table_key(), refreshAppendTableInCache(), refreshNonAppendTableInCache(), and show_chunk().

Referenced by refreshTable().

220  {
221  CHECK(is_table_key(table_key));
222 
223  // Preserve the list of which chunks were cached per table to refresh after clear.
224  std::vector<ChunkKey> old_chunk_keys =
226 
227  // Assert all data in cache is mapped to this leaf node in distributed.
228  if (is_shardable_key(table_key)) {
229  for (auto& key : old_chunk_keys) {
230  CHECK(fragment_maps_to_leaf(key)) << show_chunk(key);
231  }
232  }
233 
234  auto append_mode = is_append_table_chunk_key(table_key);
235 
236  append_mode ? refreshAppendTableInCache(table_key, old_chunk_keys)
237  : refreshNonAppendTableInCache(table_key, old_chunk_keys);
238 }
void refreshAppendTableInCache(const ChunkKey &table_key, const std::vector< ChunkKey > &old_chunk_keys)
bool is_table_key(const ChunkKey &key)
Definition: types.h:44
bool is_append_table_chunk_key(const ChunkKey &chunk_key)
std::string show_chunk(const ChunkKey &key)
Definition: types.h:98
void refreshNonAppendTableInCache(const ChunkKey &table_key, const std::vector< ChunkKey > &old_chunk_keys)
bool fragment_maps_to_leaf(const ChunkKey &key)
bool is_shardable_key(const ChunkKey &key)
std::vector< ChunkKey > getCachedChunksForKeyPrefix(const ChunkKey &) const
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::CachingForeignStorageMgr::removeTableRelatedDS ( const int  db_id,
const int  table_id 
)
override

Definition at line 394 of file CachingForeignStorageMgr.cpp.

References foreign_storage::ForeignStorageCache::clearForTablePrefix(), disk_cache_, and foreign_storage::ForeignStorageMgr::removeTableRelatedDS().

394  {
395  disk_cache_->clearForTablePrefix({db_id, table_id});
397 }
void removeTableRelatedDS(const int db_id, const int table_id) override

+ Here is the call graph for this function:

Member Data Documentation


The documentation for this class was generated from the following files: