OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
DataMgr.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
23 #pragma once
24 
25 #include "AbstractBuffer.h"
26 #include "AbstractBufferMgr.h"
27 #include "BufferMgr/Buffer.h"
28 #include "BufferMgr/BufferMgr.h"
29 #include "MemoryLevel.h"
30 #include "OSDependent/heavyai_fs.h"
34 
35 #include <fstream>
36 #include <iomanip>
37 #include <iostream>
38 #include <map>
39 #include <string>
40 #include <unordered_map>
41 #include <vector>
42 
43 namespace File_Namespace {
44 class FileBuffer;
45 class GlobalFileMgr;
46 } // namespace File_Namespace
47 
48 namespace CudaMgr_Namespace {
49 class CudaMgr;
50 }
51 
52 class DeviceAllocator;
53 
54 namespace Buffer_Namespace {
55 class CpuBufferMgr;
56 class GpuCudaBufferMgr;
57 } // namespace Buffer_Namespace
58 
59 namespace Data_Namespace {
60 
61 struct MemoryData {
62  size_t slabNum;
63  int32_t startPage;
64  size_t numPages;
65  uint32_t touch;
66  std::vector<int32_t> chunk_key;
68 };
69 
70 struct MemoryInfo {
71  size_t pageSize;
72  size_t maxNumPages;
75  std::vector<MemoryData> nodeMemoryData;
76 };
77 
80  std::unordered_map<std::string, size_t> items_;
81 
82  public:
84  std::ifstream f("/proc/meminfo");
85  std::stringstream ss;
86  ss << f.rdbuf();
87 
88  for (const std::string& line : split(ss.str(), "\n")) {
89  if (line.empty()) {
90  continue;
91  }
92  const auto nv = split(line, ":", 1);
93  CHECK(nv.size() == 2) << "unexpected line format in /proc/meminfo: " << line;
94  const auto name = strip(nv[0]), value = to_lower(strip(nv[1]));
95  auto v = split(value);
96  CHECK(v.size() == 1 || v.size() == 2)
97  << "unexpected line format in /proc/meminfo: " << line;
98  items_[name] = std::atoll(v[0].c_str());
99  if (v.size() == 2) {
100  CHECK(v[1] == "kb") << "unexpected unit suffix in /proc/meminfo: " << line;
101  items_[name] *= 1024;
102  }
103  }
104  }
105 
106  auto operator[](const std::string& name) { return items_[name]; }
107  auto begin() { return items_.begin(); }
108  auto end() { return items_.end(); }
109 };
110 
113  std::string inputText_;
114  std::vector<size_t> orders_;
116 
117  public:
118  ProcBuddyinfoParser(std::string text = {}) {
119  if (text.empty()) {
120  std::ifstream f("/proc/buddyinfo");
121  std::stringstream ss;
122  ss << f.rdbuf();
123  text = ss.str();
124  }
125  inputText_ = text;
126 
127  const size_t skipped_columns = 4;
128  // NOTE(sy): For now this calculation ignores the first four buddyinfo columns,
129  // but in the future we could break out subscores by node and/or by zone.
130  size_t number_of_columns = 0;
131  for (const std::string& line : split(text, "\n")) {
132  if (line.empty()) {
133  continue;
134  }
135  const auto columns = split(line);
136  CHECK_GT(columns.size(), skipped_columns) << "unexpected line format: " << line;
137  if (number_of_columns != 0) {
138  CHECK_EQ(columns.size(), number_of_columns)
139  << "expected line to have " << number_of_columns << " columns: " << line;
140  } else {
141  number_of_columns = columns.size();
142  orders_.resize(number_of_columns - skipped_columns, 0);
143  }
144  for (size_t i = skipped_columns; i < number_of_columns; ++i) {
145  orders_[i - skipped_columns] += strtoull(columns[i].c_str(), NULL, 10);
146  }
147  }
148 #ifdef __linux__
149  const long page_size =
150  sysconf(_SC_PAGE_SIZE); // in case x86-64 is configured to use 2MB pages
151 #else
152  const long page_size = heavyai::get_page_size();
153 #endif
154  size_t scaled = 0;
155  size_t total = 0;
156  for (size_t order = 0; order < orders_.size(); ++order) {
157  const size_t bytes = orders_[order] * (size_t(1) << order) * page_size;
158  scaled += (bytes * (orders_.size() - 1 - order)) / (orders_.size() - 1);
159  total += bytes;
160  }
161 
162  CHECK_GT(total, size_t(0)) << "failed to parse:\n" << text;
163  fragmentationPercent_ = (scaled * 100) / total;
164  }
165 
166  auto operator[](size_t order) {
167  return orders_[order];
168  }
169  auto begin() {
170  return orders_.begin();
171  }
172  auto end() {
173  return orders_.end();
174  }
176  return fragmentationPercent_;
177  }
178  auto getInputText() {
179  return inputText_;
180  }
181 };
182 
183 class DataMgr {
184  friend class GlobalFileMgr;
185 
186  public:
187  explicit DataMgr(
188  const std::string& dataDir,
189  const SystemParameters& system_parameters,
190  std::unique_ptr<CudaMgr_Namespace::CudaMgr> cudaMgr,
191  const bool useGpus,
192  const size_t reservedGpuMem = (1 << 27),
193  const size_t numReaderThreads = 0, /* 0 means use default for # of reader threads */
194  const File_Namespace::DiskCacheConfig cacheConfig =
196  ~DataMgr();
198  const MemoryLevel memoryLevel,
199  const int deviceId = 0,
200  const size_t page_size = 0);
202  const MemoryLevel memoryLevel,
203  const int deviceId = 0,
204  const size_t numBytes = 0);
205  void deleteChunk(const ChunkKey& key, const MemoryLevel mem_level, const int device_id);
206  void deleteChunksWithPrefix(const ChunkKey& keyPrefix);
207  void deleteChunksWithPrefix(const ChunkKey& keyPrefix, const MemoryLevel memLevel);
208  AbstractBuffer* alloc(const MemoryLevel memoryLevel,
209  const int deviceId,
210  const size_t numBytes);
211  void free(AbstractBuffer* buffer);
212  // copies one buffer to another
213  void copy(AbstractBuffer* destBuffer, AbstractBuffer* srcBuffer);
214  bool isBufferOnDevice(const ChunkKey& key,
215  const MemoryLevel memLevel,
216  const int deviceId);
217  std::vector<MemoryInfo> getMemoryInfo(const MemoryLevel memLevel) const;
218  std::vector<MemoryInfo> getMemoryInfoUnlocked(const MemoryLevel memLevel) const;
219  std::string dumpLevel(const MemoryLevel memLevel);
220  void clearMemory(const MemoryLevel memLevel);
221 
222  const std::map<ChunkKey, File_Namespace::FileBuffer*>& getChunkMap();
223  void checkpoint(const int db_id,
224  const int tb_id); // checkpoint for individual table of DB
225  void checkpoint(const int db_id, const int table_id, const MemoryLevel memory_level);
227  const ChunkKey& keyPrefix);
228  inline bool gpusPresent() const { return hasGpus_; }
229  void removeTableRelatedDS(const int db_id, const int tb_id);
230  void removeMutableTableDiskCacheData(const int db_id, const int tb_id) const;
231  void setTableEpoch(const int db_id, const int tb_id, const int start_epoch);
232  size_t getTableEpoch(const int db_id, const int tb_id);
233  void resetTableEpochFloor(const int32_t db_id, const int32_t tb_id);
234 
235  CudaMgr_Namespace::CudaMgr* getCudaMgr() const { return cudaMgr_.get(); }
237  std::shared_ptr<ForeignStorageInterface> getForeignStorageInterface() const;
238 
239  // database_id, table_id, column_id, fragment_id
240  std::vector<int> levelSizes_;
241 
242  // std::unique_ptr<DeviceAllocator> createGpuAllocator(int device_id);
243  // NOTE(sy): Revisit how DataMgr should handle Cuda streams if Intel ever needs this.
244 
246  size_t free; // available CPU RAM memory in bytes
247  size_t total; // total CPU RAM memory in bytes
248  size_t resident; // resident process memory in bytes
249  size_t vtotal; // total process virtual memory in bytes
250  size_t regular; // process bytes non-shared
251  size_t shared; // process bytes shared (file maps + shmem)
252  size_t frag; // fragmentation percent
253  };
254 
256  static size_t getTotalSystemMemory();
257 
259  void resetBufferMgrs(const File_Namespace::DiskCacheConfig& cache_config,
260  const size_t num_reader_threads,
261  const SystemParameters& sys_params);
262 
263  size_t getCpuBufferPoolSize() const;
264  size_t getGpuBufferPoolSize() const;
265 
266  // Used for testing.
268 
269  // Used for testing.
270  Buffer_Namespace::GpuCudaBufferMgr* getGpuBufferMgr(int32_t device_id) const;
271 
272  static void atExitHandler();
273 
274  private:
275  void populateMgrs(const SystemParameters& system_parameters,
276  const size_t userSpecifiedNumReaderThreads,
277  const File_Namespace::DiskCacheConfig& cache_config);
278  void convertDB(const std::string basePath);
279  void checkpoint(); // checkpoint for whole DB, called from convertDB proc only
280  void createTopLevelMetadata() const;
281  void allocateCpuBufferMgr(int32_t device_id,
282  size_t total_cpu_size,
283  size_t minCpuSlabSize,
284  size_t maxCpuSlabSize,
285  size_t page_size,
286  const std::vector<size_t>& cpu_tier_sizes);
287 
288  std::vector<std::vector<AbstractBufferMgr*>> bufferMgrs_;
289  std::unique_ptr<CudaMgr_Namespace::CudaMgr> cudaMgr_;
290  std::string dataDir_;
291  bool hasGpus_;
293  mutable std::mutex buffer_access_mutex_;
294 };
295 
296 std::ostream& operator<<(std::ostream& os, const DataMgr::SystemMemoryUsage&);
297 
298 } // namespace Data_Namespace
CudaMgr_Namespace::CudaMgr * getCudaMgr() const
Definition: DataMgr.h:235
auto operator[](const std::string &name)
Definition: DataMgr.h:106
std::string to_lower(const std::string &str)
std::mutex buffer_access_mutex_
Definition: DataMgr.h:293
#define CHECK_EQ(x, y)
Definition: Logger.h:301
std::vector< int > ChunkKey
Definition: types.h:36
std::vector< MemoryData > nodeMemoryData
Definition: DataMgr.h:75
Buffer_Namespace::MemStatus memStatus
Definition: DataMgr.h:67
std::unordered_map< std::string, size_t > items_
Definition: DataMgr.h:80
void deleteChunk(const ChunkKey &key, const MemoryLevel mem_level, const int device_id)
Definition: DataMgr.cpp:547
ProcBuddyinfoParser(std::string text={})
Definition: DataMgr.h:118
std::vector< std::vector< AbstractBufferMgr * > > bufferMgrs_
Definition: DataMgr.h:288
std::vector< int > levelSizes_
Definition: DataMgr.h:240
std::string strip(std::string_view str)
trim any whitespace from the left and right ends of a string
std::ostream & operator<<(std::ostream &os, const DataMgr::SystemMemoryUsage &mem_info)
Definition: DataMgr.cpp:662
SystemMemoryUsage getSystemMemoryUsage() const
Definition: DataMgr.cpp:123
PersistentStorageMgr * getPersistentStorageMgr() const
Definition: DataMgr.cpp:677
void clearMemory(const MemoryLevel memLevel)
Definition: DataMgr.cpp:465
std::vector< MemoryInfo > getMemoryInfoUnlocked(const MemoryLevel memLevel) const
Definition: DataMgr.cpp:385
void resetTableEpochFloor(const int32_t db_id, const int32_t tb_id)
Definition: DataMgr.cpp:642
std::string dumpLevel(const MemoryLevel memLevel)
Definition: DataMgr.cpp:449
size_t getCpuBufferPoolSize() const
Definition: DataMgr.cpp:681
void convertDB(const std::string basePath)
Definition: DataMgr.cpp:334
#define CHECK_GT(x, y)
Definition: Logger.h:305
size_t getGpuBufferPoolSize() const
Definition: DataMgr.cpp:686
std::vector< std::string > split(std::string_view str, std::string_view delim, std::optional< size_t > maxsplit)
split apart a string into a vector of substrings
auto operator[](size_t order)
Definition: DataMgr.h:166
static size_t getTotalSystemMemory()
Definition: DataMgr.cpp:179
size_t getTableEpoch(const int db_id, const int tb_id)
Definition: DataMgr.cpp:635
Buffer_Namespace::GpuCudaBufferMgr * getGpuBufferMgr(int32_t device_id) const
Definition: DataMgr.cpp:704
std::shared_ptr< ForeignStorageInterface > getForeignStorageInterface() const
Definition: DataMgr.cpp:657
void createTopLevelMetadata() const
Definition: DataMgr.cpp:364
static void atExitHandler()
Definition: DataMgr.cpp:59
void removeMutableTableDiskCacheData(const int db_id, const int tb_id) const
Definition: DataMgr.cpp:624
std::unique_ptr< CudaMgr_Namespace::CudaMgr > cudaMgr_
Definition: DataMgr.h:289
void getChunkMetadataVecForKeyPrefix(ChunkMetadataVector &chunkMetadataVec, const ChunkKey &keyPrefix)
Definition: DataMgr.cpp:496
void populateMgrs(const SystemParameters &system_parameters, const size_t userSpecifiedNumReaderThreads, const File_Namespace::DiskCacheConfig &cache_config)
Definition: DataMgr.cpp:246
std::vector< std::pair< ChunkKey, std::shared_ptr< ChunkMetadata >>> ChunkMetadataVector
const std::map< ChunkKey, File_Namespace::FileBuffer * > & getChunkMap()
An AbstractBuffer is a unit of data management for a data manager.
This file includes the class specification for the buffer manager (BufferMgr), and related data struc...
File_Namespace::GlobalFileMgr * getGlobalFileMgr() const
Definition: DataMgr.cpp:649
Parse /proc/meminfo into key/value pairs.
Definition: DataMgr.h:79
void deleteChunksWithPrefix(const ChunkKey &keyPrefix)
Definition: DataMgr.cpp:522
tuple line
Definition: parse_ast.py:10
bool isBufferOnDevice(const ChunkKey &key, const MemoryLevel memLevel, const int deviceId)
Definition: DataMgr.cpp:489
AbstractBuffer * getChunkBuffer(const ChunkKey &key, const MemoryLevel memoryLevel, const int deviceId=0, const size_t numBytes=0)
Definition: DataMgr.cpp:511
torch::Tensor f(torch::Tensor x, torch::Tensor W_target, torch::Tensor b_target)
std::vector< MemoryInfo > getMemoryInfo(const MemoryLevel memLevel) const
Definition: DataMgr.cpp:380
void removeTableRelatedDS(const int db_id, const int tb_id)
Definition: DataMgr.cpp:619
DataMgr(const std::string &dataDir, const SystemParameters &system_parameters, std::unique_ptr< CudaMgr_Namespace::CudaMgr > cudaMgr, const bool useGpus, const size_t reservedGpuMem=(1<< 27), const size_t numReaderThreads=0, const File_Namespace::DiskCacheConfig cacheConfig=File_Namespace::DiskCacheConfig())
Definition: DataMgr.cpp:69
std::vector< size_t > orders_
Definition: DataMgr.h:114
Buffer_Namespace::CpuBufferMgr * getCpuBufferMgr() const
Definition: DataMgr.cpp:698
#define CHECK(condition)
Definition: Logger.h:291
void copy(AbstractBuffer *destBuffer, AbstractBuffer *srcBuffer)
Definition: DataMgr.cpp:570
bool gpusPresent() const
Definition: DataMgr.h:228
void resetBufferMgrs(const File_Namespace::DiskCacheConfig &cache_config, const size_t num_reader_threads, const SystemParameters &sys_params)
Definition: DataMgr.cpp:232
std::vector< int32_t > chunk_key
Definition: DataMgr.h:66
AbstractBuffer * createChunkBuffer(const ChunkKey &key, const MemoryLevel memoryLevel, const int deviceId=0, const size_t page_size=0)
Definition: DataMgr.cpp:502
void free(AbstractBuffer *buffer)
Definition: DataMgr.cpp:564
string name
Definition: setup.in.py:72
int get_page_size()
Definition: heavyai_fs.cpp:29
void allocateCpuBufferMgr(int32_t device_id, size_t total_cpu_size, size_t minCpuSlabSize, size_t maxCpuSlabSize, size_t page_size, const std::vector< size_t > &cpu_tier_sizes)
Definition: DataMgr.cpp:202
Parse /proc/buddyinfo into a Fragmentation health score.
Definition: DataMgr.h:112
void setTableEpoch(const int db_id, const int tb_id, const int start_epoch)
Definition: DataMgr.cpp:628
friend class GlobalFileMgr
Definition: DataMgr.h:184
AbstractBuffer * alloc(const MemoryLevel memoryLevel, const int deviceId, const size_t numBytes)
Definition: DataMgr.cpp:555
std::string dataDir_
Definition: DataMgr.h:290