OmniSciDB  6686921089
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ForeignStorageCache.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2020 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /*
18  TODO(Misiu): A lot of methods here can be made asyncronous. It may be worth an
19  investigation to determine if it's worth adding async versions of them for performance
20  reasons.
21 */
22 
23 #include "ForeignStorageCache.h"
24 #include "Shared/File.h"
25 #include "Shared/measure.h"
26 
27 namespace foreign_storage {
28 using read_lock = mapd_shared_lock<mapd_shared_mutex>;
29 using write_lock = mapd_unique_lock<mapd_shared_mutex>;
30 
31 namespace {
32 template <typename Func, typename T>
34  T& chunk_collection,
35  const ChunkKey& chunk_prefix) {
36  ChunkKey upper_prefix(chunk_prefix);
37  upper_prefix.push_back(std::numeric_limits<int>::max());
38  auto end_it = chunk_collection.upper_bound(static_cast<const ChunkKey>(upper_prefix));
39  for (auto chunk_it = chunk_collection.lower_bound(chunk_prefix); chunk_it != end_it;
40  ++chunk_it) {
41  func(*chunk_it);
42  }
43 }
44 
46  buffer->initEncoder(meta->sqlType);
47  buffer->setSize(meta->numBytes);
48  buffer->getEncoder()->setNumElems(meta->numElements);
49  buffer->getEncoder()->resetChunkStats(meta->chunkStats);
50  buffer->setUpdated();
51 }
52 } // namespace
53 
55  validatePath(config.path);
56  caching_file_mgr_ = std::make_unique<File_Namespace::CachingFileMgr>(config);
57 }
58 
60  caching_file_mgr_->deleteBufferIfExists(chunk_key);
61 }
62 
64  AbstractBuffer* buf,
65  const size_t num_bytes) {
66  caching_file_mgr_->putBuffer(key, buf, num_bytes);
67  CHECK(!buf->isDirty());
68 }
69 
70 void ForeignStorageCache::checkpoint(const int32_t db_id, const int32_t tb_id) {
71  caching_file_mgr_->checkpoint(db_id, tb_id);
72 }
73 
75  const ChunkKey& chunk_key) {
76  auto buf = caching_file_mgr_->getBufferIfExists(chunk_key);
77 
78  if (buf) {
79  if ((*buf)->hasDataPages()) {
80  // 1. If the buffer has data pages then must be in the cache.
81  return *buf;
82  }
83  if (is_varlen_data_key(chunk_key)) {
84  // 2. If the buffer is a varlen data buffer and the
85  // corresponding chunk contains only nulls, then even
86  // without data pages it will still have been cached
87  // if it has a corresponding index buffer which does
88  // have dataPages
89  // Note the empty buffer proviso that the size be 0,
90  // corresponding to all nulls in the chunk
91  auto index_chunk_key = chunk_key;
92  index_chunk_key[CHUNK_KEY_VARLEN_IDX] = 2;
93  auto index_buffer = caching_file_mgr_->getBufferIfExists(index_chunk_key);
94  if (index_buffer && (*index_buffer)->hasDataPages() && (*buf)->size() == 0) {
95  return *buf;
96  }
97  }
98  }
99  // 3. Otherwise this chunk hasn't been cached.
100  return nullptr;
101 }
102 
103 bool ForeignStorageCache::isMetadataCached(const ChunkKey& chunk_key) const {
104  auto buf = caching_file_mgr_->getBufferIfExists(chunk_key);
105  if (buf) {
106  return (*buf)->hasEncoder();
107  }
108  return false;
109 }
110 
112  auto timer = DEBUG_TIMER(__func__);
113  if (metadata_vec.empty()) {
114  return;
115  }
116  auto first_chunk_key = metadata_vec.begin()->first;
117  for (auto& [chunk_key, metadata] : metadata_vec) {
118  CHECK(in_same_table(chunk_key, first_chunk_key));
119  AbstractBuffer* buf;
120  AbstractBuffer* index_buffer = nullptr;
121  ChunkKey index_chunk_key;
122  if (is_varlen_key(chunk_key)) {
123  // For variable length chunks, metadata is associated with the data chunk.
124  CHECK(is_varlen_data_key(chunk_key));
125  index_chunk_key = {chunk_key[CHUNK_KEY_DB_IDX],
126  chunk_key[CHUNK_KEY_TABLE_IDX],
127  chunk_key[CHUNK_KEY_COLUMN_IDX],
128  chunk_key[CHUNK_KEY_FRAGMENT_IDX],
129  2};
130  }
131  bool chunk_in_cache = false;
132  if (!caching_file_mgr_->isBufferOnDevice(chunk_key)) {
133  buf = caching_file_mgr_->createBuffer(chunk_key);
134 
135  if (!index_chunk_key.empty()) {
136  CHECK(!caching_file_mgr_->isBufferOnDevice(index_chunk_key));
137  index_buffer = caching_file_mgr_->createBuffer(index_chunk_key);
138  CHECK(index_buffer);
139  }
140  } else {
141  buf = caching_file_mgr_->getBuffer(chunk_key);
142 
143  if (!index_chunk_key.empty()) {
144  CHECK(caching_file_mgr_->isBufferOnDevice(index_chunk_key));
145  index_buffer = caching_file_mgr_->getBuffer(index_chunk_key);
146  CHECK(index_buffer);
147  }
148 
149  // We should have already cleared the data unless we are appending
150  // If the buffer metadata has changed, we need to remove this chunk
151  if (buf->getEncoder() != nullptr) {
152  const std::shared_ptr<ChunkMetadata> buf_metadata =
153  std::make_shared<ChunkMetadata>();
154  buf->getEncoder()->getMetadata(buf_metadata);
155  chunk_in_cache = *metadata.get() == *buf_metadata;
156  }
157  }
158 
159  if (!chunk_in_cache) {
160  set_metadata_for_buffer(buf, metadata.get());
161  eraseChunk(chunk_key);
162 
163  if (!index_chunk_key.empty()) {
164  CHECK(index_buffer);
165  index_buffer->setUpdated();
166  eraseChunk(index_chunk_key);
167  }
168  }
169  }
170  caching_file_mgr_->checkpoint(first_chunk_key[CHUNK_KEY_DB_IDX],
171  first_chunk_key[CHUNK_KEY_TABLE_IDX]);
172 }
173 
175  ChunkMetadataVector& metadata_vec,
176  const ChunkKey& chunk_prefix) const {
177  caching_file_mgr_->getChunkMetadataVecForKeyPrefix(metadata_vec, chunk_prefix);
178 }
179 
181  const ChunkKey& chunk_prefix) const {
182  ChunkMetadataVector meta_vec;
183  caching_file_mgr_->getChunkMetadataVecForKeyPrefix(meta_vec, chunk_prefix);
184  return (meta_vec.size() > 0);
185 }
186 
188  CHECK(is_table_key(chunk_prefix));
189  auto timer = DEBUG_TIMER(__func__);
190  caching_file_mgr_->clearForTable(chunk_prefix[CHUNK_KEY_DB_IDX],
191  chunk_prefix[CHUNK_KEY_TABLE_IDX]);
192 }
193 
195  auto timer = DEBUG_TIMER(__func__);
196  // FileMgrs do not clean up after themselves nicely, so we need to close all their disk
197  // resources and then re-create the CachingFileMgr to reset it.
198  caching_file_mgr_->closeRemovePhysical();
199  boost::filesystem::create_directory(caching_file_mgr_->getFileMgrBasePath());
200  caching_file_mgr_ = caching_file_mgr_->reconstruct();
201 }
202 
204  const ChunkKey& chunk_prefix) const {
205  return caching_file_mgr_->getChunkKeysForPrefix(chunk_prefix);
206 }
207 
209  const std::vector<ChunkKey>& chunk_keys) const {
210  ChunkToBufferMap chunk_buffer_map;
211  for (const auto& chunk_key : chunk_keys) {
212  CHECK(caching_file_mgr_->isBufferOnDevice(chunk_key));
213  chunk_buffer_map[chunk_key] = caching_file_mgr_->getBuffer(chunk_key);
214  auto file_buf =
215  dynamic_cast<File_Namespace::FileBuffer*>(chunk_buffer_map[chunk_key]);
216  CHECK(file_buf);
217  CHECK(!file_buf->hasDataPages());
218 
219  // Clear all buffer metadata
220  file_buf->resetToEmpty();
221  }
222  return chunk_buffer_map;
223 }
224 
226  caching_file_mgr_->removeChunkKeepMetadata(chunk_key);
227 }
228 
230  return caching_file_mgr_->dumpKeysWithChunkData();
231 }
232 
234  return caching_file_mgr_->dumpKeysWithMetadata();
235 }
236 
237 void ForeignStorageCache::validatePath(const std::string& base_path) const {
238  // check if base_path already exists, and if not create one
239  boost::filesystem::path path(base_path);
240  if (boost::filesystem::exists(path)) {
241  if (!boost::filesystem::is_directory(path)) {
242  throw std::runtime_error{
243  "cache path \"" + base_path +
244  "\" is not a directory. Please specify a valid directory "
245  "with --disk_cache_path=<path>, or use the default location."};
246  }
247  } else { // data directory does not exist
248  if (!boost::filesystem::create_directory(path)) {
249  throw std::runtime_error{
250  "could not create directory at cache path \"" + base_path +
251  "\". Please specify a valid directory location "
252  "with --disk_cache_path=<path> or use the default location."};
253  }
254  }
255 }
256 
258  const ChunkMetadataVector& metadata_vec,
259  const int frag_id) {
260  // Only re-cache last fragment and above
261  ChunkMetadataVector new_metadata_vec;
262  for (const auto& chunk_metadata : metadata_vec) {
263  if (chunk_metadata.first[CHUNK_KEY_FRAGMENT_IDX] >= frag_id) {
264  new_metadata_vec.push_back(chunk_metadata);
265  }
266  }
267  cacheMetadataVec(new_metadata_vec);
268 }
269 
271  const ChunkKey& chunk_key,
272  bool is_new_buffer) {
273  if (!is_new_buffer) {
274  CHECK(caching_file_mgr_->isBufferOnDevice(chunk_key));
275  return caching_file_mgr_->getBuffer(chunk_key);
276  } else {
277  CHECK(!caching_file_mgr_->isBufferOnDevice(chunk_key));
278  return caching_file_mgr_->createBuffer(chunk_key);
279  }
280 }
281 
282 void ForeignStorageCache::storeDataWrapper(const std::string& doc,
283  int32_t db_id,
284  int32_t tb_id) {
285  caching_file_mgr_->writeWrapperFile(doc, db_id, tb_id);
286 }
287 
288 } // namespace foreign_storage
std::vector< int > ChunkKey
Definition: types.h:37
bool isMetadataCached(const ChunkKey &) const
bool is_table_key(const ChunkKey &key)
Definition: types.h:45
bool is_varlen_data_key(const ChunkKey &key)
Definition: types.h:71
void storeDataWrapper(const std::string &doc, int32_t db_id, int32_t tb_id)
#define CHUNK_KEY_DB_IDX
Definition: types.h:39
#define CHUNK_KEY_FRAGMENT_IDX
Definition: types.h:42
std::map< ChunkKey, AbstractBuffer * > ChunkToBufferMap
Represents/provides access to contiguous data stored in the file system.
Definition: FileBuffer.h:58
void initEncoder(const SQLTypeInfo &tmp_sql_type)
void setNumElems(const size_t num_elems)
Definition: Encoder.h:234
ChunkStats chunkStats
Definition: ChunkMetadata.h:35
virtual bool resetChunkStats(const ChunkStats &)
: Reset chunk level stats (min, max, nulls) using new values from the argument.
Definition: Encoder.h:223
virtual void getMetadata(const std::shared_ptr< ChunkMetadata > &chunkMetadata)
Definition: Encoder.cpp:227
void getCachedMetadataVecForKeyPrefix(ChunkMetadataVector &, const ChunkKey &) const
#define CHUNK_KEY_TABLE_IDX
Definition: types.h:40
void iterate_over_matching_prefix(Func func, T &chunk_collection, const ChunkKey &chunk_prefix)
std::vector< std::pair< ChunkKey, std::shared_ptr< ChunkMetadata >>> ChunkMetadataVector
An AbstractBuffer is a unit of data management for a data manager.
void cacheMetadataWithFragIdGreaterOrEqualTo(const ChunkMetadataVector &metadata_vec, const int frag_id)
void putBuffer(const ChunkKey &, AbstractBuffer *, const size_t numBytes=0)
void cacheMetadataVec(const ChunkMetadataVector &)
std::set< ChunkKey >::iterator eraseChunk(const std::set< ChunkKey >::iterator &)
std::unique_ptr< File_Namespace::CachingFileMgr > caching_file_mgr_
void deleteBufferIfExists(const ChunkKey &chunk_key)
void set_metadata_for_buffer(AbstractBuffer *buffer, ChunkMetadata *meta)
void validatePath(const std::string &) const
#define CHUNK_KEY_VARLEN_IDX
Definition: types.h:43
std::vector< ChunkKey > getCachedChunksForKeyPrefix(const ChunkKey &) const
AbstractBuffer * getChunkBufferForPrecaching(const ChunkKey &chunk_key, bool is_new_buffer)
void setSize(const size_t size)
mapd_shared_lock< mapd_shared_mutex > read_lock
void checkpoint(const int32_t db_id, const int32_t tb_id)
#define CHECK(condition)
Definition: Logger.h:209
#define DEBUG_TIMER(name)
Definition: Logger.h:352
File_Namespace::FileBuffer * getCachedChunkIfExists(const ChunkKey &)
bool hasCachedMetadataForKeyPrefix(const ChunkKey &) const
ChunkToBufferMap getChunkBuffersForCaching(const std::vector< ChunkKey > &chunk_keys) const
mapd_unique_lock< mapd_shared_mutex > write_lock
ForeignStorageCache(const File_Namespace::DiskCacheConfig &config)
#define CHUNK_KEY_COLUMN_IDX
Definition: types.h:41
bool in_same_table(const ChunkKey &left_key, const ChunkKey &right_key)
Definition: types.h:79
SQLTypeInfo sqlType
Definition: ChunkMetadata.h:32
A selection of helper methods for File I/O.
bool is_varlen_key(const ChunkKey &key)
Definition: types.h:67
size_t numElements
Definition: ChunkMetadata.h:34