OmniSciDB  340b00dbf6
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
DataMgr.h
Go to the documentation of this file.
1 /*
2  * Copyright 2020 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
21 #ifndef DATAMGR_H
22 #define DATAMGR_H
23 
24 #include "../Shared/SystemParameters.h"
25 #include "../Shared/mapd_shared_mutex.h"
26 #include "AbstractBuffer.h"
27 #include "AbstractBufferMgr.h"
28 #include "BufferMgr/Buffer.h"
29 #include "BufferMgr/BufferMgr.h"
30 #include "MemoryLevel.h"
32 
33 #include <iomanip>
34 #include <iostream>
35 #include <map>
36 #include <string>
37 #include <unordered_map>
38 #include <vector>
39 
40 namespace File_Namespace {
41 class FileBuffer;
42 class GlobalFileMgr;
43 } // namespace File_Namespace
44 
45 namespace CudaMgr_Namespace {
46 class CudaMgr;
47 }
48 
49 namespace Data_Namespace {
50 
51 struct MemoryData {
52  size_t slabNum;
53  int32_t startPage;
54  size_t numPages;
55  uint32_t touch;
56  std::vector<int32_t> chunk_key;
58 };
59 
60 struct MemoryInfo {
61  size_t pageSize;
62  size_t maxNumPages;
65  std::vector<MemoryData> nodeMemoryData;
66 };
67 
70  std::unordered_map<std::string, size_t> items_;
71 
72  public:
74  std::ifstream f("/proc/meminfo");
75  std::stringstream ss;
76  ss << f.rdbuf();
77 
78  for (const std::string& line : split(ss.str(), "\n")) {
79  if (line.empty()) {
80  continue;
81  }
82  const auto nv = split(line, ":", 1);
83  CHECK(nv.size() == 2) << "unexpected line format in /proc/meminfo: " << line;
84  const auto name = strip(nv[0]), value = to_lower(strip(nv[1]));
85  auto v = split(value);
86  CHECK(v.size() == 1 || v.size() == 2)
87  << "unexpected line format in /proc/meminfo: " << line;
88  items_[name] = std::atoll(v[0].c_str());
89  if (v.size() == 2) {
90  CHECK(v[1] == "kb") << "unexpected unit suffix in /proc/meminfo: " << line;
91  items_[name] *= 1024;
92  }
93  }
94  }
95 
96  auto operator[](const std::string& name) { return items_[name]; }
97  auto begin() { return items_.begin(); }
98  auto end() { return items_.end(); }
99 };
100 
103  std::string inputText_;
104  std::vector<size_t> orders_;
106 
107  public:
108  ProcBuddyinfoParser(std::string text = {}) {
109  if (text.empty()) {
110  std::ifstream f("/proc/buddyinfo");
111  std::stringstream ss;
112  ss << f.rdbuf();
113  text = ss.str();
114  }
115  inputText_ = text;
116 
117  const size_t skipped_columns = 4;
118  // NOTE(sy): For now this calculation ignores the first four buddyinfo columns,
119  // but in the future we could break out subscores by node and/or by zone.
120  size_t number_of_columns = 0;
121  for (const std::string& line : split(text, "\n")) {
122  if (line.empty()) {
123  continue;
124  }
125  const auto columns = split(line);
126  CHECK_GT(columns.size(), skipped_columns) << "unexpected line format: " << line;
127  if (number_of_columns != 0) {
128  CHECK_EQ(columns.size(), number_of_columns)
129  << "expected line to have " << number_of_columns << " columns: " << line;
130  } else {
131  number_of_columns = columns.size();
132  orders_.resize(number_of_columns - skipped_columns, 0);
133  }
134  for (size_t i = skipped_columns; i < number_of_columns; ++i) {
135  orders_[i - skipped_columns] += strtoull(columns[i].c_str(), NULL, 10);
136  }
137  }
138 
139  const long page_size =
140  sysconf(_SC_PAGE_SIZE); // in case x86-64 is configured to use 2MB pages
141  size_t scaled = 0;
142  size_t total = 0;
143  for (size_t order = 0; order < orders_.size(); ++order) {
144  const size_t bytes = orders_[order] * (size_t(1) << order) * page_size;
145  scaled += (bytes * (orders_.size() - 1 - order)) / (orders_.size() - 1);
146  total += bytes;
147  }
148 
149  CHECK_GT(total, size_t(0)) << "failed to parse:\n" << text;
150  fragmentationPercent_ = (scaled * 100) / total;
151  }
152 
153  auto operator[](size_t order) { return orders_[order]; }
154  auto begin() { return orders_.begin(); }
155  auto end() { return orders_.end(); }
157  auto getInputText() { return inputText_; }
158 };
159 
160 class DataMgr {
161  friend class GlobalFileMgr;
162 
163  public:
164  explicit DataMgr(
165  const std::string& dataDir,
166  const SystemParameters& system_parameters,
167  std::unique_ptr<CudaMgr_Namespace::CudaMgr> cudaMgr,
168  const bool useGpus,
169  const size_t reservedGpuMem = (1 << 27),
170  const size_t numReaderThreads = 0, /* 0 means use default for # of reader threads */
171  const DiskCacheConfig cacheConfig = DiskCacheConfig());
172  ~DataMgr();
174  const MemoryLevel memoryLevel,
175  const int deviceId = 0,
176  const size_t page_size = 0);
178  const MemoryLevel memoryLevel,
179  const int deviceId = 0,
180  const size_t numBytes = 0);
181  void deleteChunksWithPrefix(const ChunkKey& keyPrefix);
182  void deleteChunksWithPrefix(const ChunkKey& keyPrefix, const MemoryLevel memLevel);
183  AbstractBuffer* alloc(const MemoryLevel memoryLevel,
184  const int deviceId,
185  const size_t numBytes);
186  void free(AbstractBuffer* buffer);
187  // copies one buffer to another
188  void copy(AbstractBuffer* destBuffer, AbstractBuffer* srcBuffer);
189  bool isBufferOnDevice(const ChunkKey& key,
190  const MemoryLevel memLevel,
191  const int deviceId);
192  std::vector<MemoryInfo> getMemoryInfo(const MemoryLevel memLevel);
193  std::string dumpLevel(const MemoryLevel memLevel);
194  void clearMemory(const MemoryLevel memLevel);
195 
196  const std::map<ChunkKey, File_Namespace::FileBuffer*>& getChunkMap();
197  void checkpoint(const int db_id,
198  const int tb_id); // checkpoint for individual table of DB
200  const ChunkKey& keyPrefix);
201  inline bool gpusPresent() { return hasGpus_; }
202  void removeTableRelatedDS(const int db_id, const int tb_id);
203  void setTableEpoch(const int db_id, const int tb_id, const int start_epoch);
204  size_t getTableEpoch(const int db_id, const int tb_id);
205 
206  CudaMgr_Namespace::CudaMgr* getCudaMgr() const { return cudaMgr_.get(); }
208 
209  // database_id, table_id, column_id, fragment_id
210  std::vector<int> levelSizes_;
211 
213  size_t free; // available CPU RAM memory in bytes
214  size_t total; // total CPU RAM memory in bytes
215  size_t resident; // resident process memory in bytes
216  size_t vtotal; // total process virtual memory in bytes
217  size_t regular; // process bytes non-shared
218  size_t shared; // process bytes shared (file maps + shmem)
219  size_t frag; // fragmentation percent
220  };
221 
223  static size_t getTotalSystemMemory();
224 
226  void resetPersistentStorage(const DiskCacheConfig& cache_config,
227  const size_t num_reader_threads,
228  const SystemParameters& sys_params);
229 
230  private:
231  void populateMgrs(const SystemParameters& system_parameters,
232  const size_t userSpecifiedNumReaderThreads,
233  const DiskCacheConfig& cache_config);
234  void convertDB(const std::string basePath);
235  void checkpoint(); // checkpoint for whole DB, called from convertDB proc only
236  void createTopLevelMetadata() const;
237 
238  std::vector<std::vector<AbstractBufferMgr*>> bufferMgrs_;
239  std::unique_ptr<CudaMgr_Namespace::CudaMgr> cudaMgr_;
240  std::string dataDir_;
241  bool hasGpus_;
244 };
245 
246 std::ostream& operator<<(std::ostream& os, const DataMgr::SystemMemoryUsage&);
247 
248 } // namespace Data_Namespace
249 
250 #endif // DATAMGR_H
CudaMgr_Namespace::CudaMgr * getCudaMgr() const
Definition: DataMgr.h:206
auto operator[](const std::string &name)
Definition: DataMgr.h:96
std::string to_lower(const std::string &str)
std::mutex buffer_access_mutex_
Definition: DataMgr.h:243
#define CHECK_EQ(x, y)
Definition: Logger.h:205
std::vector< int > ChunkKey
Definition: types.h:37
std::vector< MemoryData > nodeMemoryData
Definition: DataMgr.h:65
Buffer_Namespace::MemStatus memStatus
Definition: DataMgr.h:57
std::unordered_map< std::string, size_t > items_
Definition: DataMgr.h:70
ProcBuddyinfoParser(std::string text={})
Definition: DataMgr.h:108
std::vector< std::vector< AbstractBufferMgr * > > bufferMgrs_
Definition: DataMgr.h:238
tuple line
Definition: parse_ast.py:10
std::vector< int > levelSizes_
Definition: DataMgr.h:210
std::string strip(std::string_view str)
trim any whitespace from the left and right ends of a string
std::ostream & operator<<(std::ostream &os, const DataMgr::SystemMemoryUsage &mem_info)
Definition: DataMgr.cpp:536
SystemMemoryUsage getSystemMemoryUsage() const
Definition: DataMgr.cpp:85
void populateMgrs(const SystemParameters &system_parameters, const size_t userSpecifiedNumReaderThreads, const DiskCacheConfig &cache_config)
Definition: DataMgr.cpp:180
DataMgr(const std::string &dataDir, const SystemParameters &system_parameters, std::unique_ptr< CudaMgr_Namespace::CudaMgr > cudaMgr, const bool useGpus, const size_t reservedGpuMem=(1<< 27), const size_t numReaderThreads=0, const DiskCacheConfig cacheConfig=DiskCacheConfig())
Definition: DataMgr.cpp:44
PersistentStorageMgr * getPersistentStorageMgr() const
Definition: DataMgr.cpp:551
void clearMemory(const MemoryLevel memLevel)
Definition: DataMgr.cpp:385
std::string dumpLevel(const MemoryLevel memLevel)
Definition: DataMgr.cpp:369
void convertDB(const std::string basePath)
Definition: DataMgr.cpp:257
Represents/provides access to contiguous data stored in the file system.
Definition: FileBuffer.h:55
#define CHECK_GT(x, y)
Definition: Logger.h:209
std::vector< std::string > split(std::string_view str, std::string_view delim, std::optional< size_t > maxsplit)
split apart a string into a vector of substrings
auto operator[](size_t order)
Definition: DataMgr.h:153
static size_t getTotalSystemMemory()
Definition: DataMgr.cpp:142
size_t getTableEpoch(const int db_id, const int tb_id)
Definition: DataMgr.cpp:521
void createTopLevelMetadata() const
Definition: DataMgr.cpp:288
std::unique_ptr< CudaMgr_Namespace::CudaMgr > cudaMgr_
Definition: DataMgr.h:239
void getChunkMetadataVecForKeyPrefix(ChunkMetadataVector &chunkMetadataVec, const ChunkKey &keyPrefix)
Definition: DataMgr.cpp:411
std::vector< std::pair< ChunkKey, std::shared_ptr< ChunkMetadata >>> ChunkMetadataVector
const std::map< ChunkKey, File_Namespace::FileBuffer * > & getChunkMap()
An AbstractBuffer is a unit of data management for a data manager.
std::vector< MemoryInfo > getMemoryInfo(const MemoryLevel memLevel)
Definition: DataMgr.cpp:304
File_Namespace::GlobalFileMgr * getGlobalFileMgr() const
Definition: DataMgr.cpp:528
Parse /proc/meminfo into key/value pairs.
Definition: DataMgr.h:69
void deleteChunksWithPrefix(const ChunkKey &keyPrefix)
Definition: DataMgr.cpp:436
bool isBufferOnDevice(const ChunkKey &key, const MemoryLevel memLevel, const int deviceId)
Definition: DataMgr.cpp:404
AbstractBuffer * getChunkBuffer(const ChunkKey &key, const MemoryLevel memoryLevel, const int deviceId=0, const size_t numBytes=0)
Definition: DataMgr.cpp:425
void removeTableRelatedDS(const int db_id, const int tb_id)
Definition: DataMgr.cpp:509
std::vector< size_t > orders_
Definition: DataMgr.h:104
#define CHECK(condition)
Definition: Logger.h:197
void resetPersistentStorage(const DiskCacheConfig &cache_config, const size_t num_reader_threads, const SystemParameters &sys_params)
Definition: DataMgr.cpp:166
void copy(AbstractBuffer *destBuffer, AbstractBuffer *srcBuffer)
Definition: DataMgr.cpp:475
std::vector< int32_t > chunk_key
Definition: DataMgr.h:56
AbstractBuffer * createChunkBuffer(const ChunkKey &key, const MemoryLevel memoryLevel, const int deviceId=0, const size_t page_size=0)
Definition: DataMgr.cpp:416
void free(AbstractBuffer *buffer)
Definition: DataMgr.cpp:469
string name
Definition: setup.py:35
Parse /proc/buddyinfo into a Fragmentation health score.
Definition: DataMgr.h:102
void setTableEpoch(const int db_id, const int tb_id, const int start_epoch)
Definition: DataMgr.cpp:514
friend class GlobalFileMgr
Definition: DataMgr.h:161
AbstractBuffer * alloc(const MemoryLevel memoryLevel, const int deviceId, const size_t numBytes)
Definition: DataMgr.cpp:460
std::string dataDir_
Definition: DataMgr.h:240