OmniSciDB  eee9fa949c
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
DataMgr.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
22 #include "DataMgr.h"
23 #include "../CudaMgr/CudaMgr.h"
26 #include "FileMgr/GlobalFileMgr.h"
27 
28 #ifdef __APPLE__
29 #include <sys/sysctl.h>
30 #include <sys/types.h>
31 #else
32 #include <unistd.h>
33 #endif
34 
35 #include <boost/filesystem.hpp>
36 
37 #include <algorithm>
38 #include <limits>
39 
40 using namespace std;
41 using namespace Buffer_Namespace;
42 using namespace File_Namespace;
43 
44 namespace Data_Namespace {
45 
46 DataMgr::DataMgr(const string& dataDir,
47  const MapDParameters& mapd_parameters,
48  const bool useGpus,
49  const int numGpus,
50  const int startGpu,
51  const size_t reservedGpuMem,
52  const size_t numReaderThreads)
53  : dataDir_(dataDir) {
54  if (useGpus) {
55  try {
56  cudaMgr_ = std::make_unique<CudaMgr_Namespace::CudaMgr>(numGpus, startGpu);
57  reservedGpuMem_ = reservedGpuMem;
58  hasGpus_ = true;
59  } catch (std::runtime_error& error) {
60  hasGpus_ = false;
61  }
62  } else {
63  hasGpus_ = false;
64  }
65 
66  populateMgrs(mapd_parameters, numReaderThreads);
68 }
69 
71  int numLevels = bufferMgrs_.size();
72  for (int level = numLevels - 1; level >= 0; --level) {
73  for (size_t device = 0; device < bufferMgrs_[level].size(); device++) {
74  delete bufferMgrs_[level][device];
75  }
76  }
77 }
78 
80 #ifdef __APPLE__
81  int mib[2];
82  size_t physical_memory;
83  size_t length;
84  // Get the Physical memory size
85  mib[0] = CTL_HW;
86  mib[1] = HW_MEMSIZE;
87  length = sizeof(size_t);
88  sysctl(mib, 2, &physical_memory, &length, NULL, 0);
89  return physical_memory;
90 
91 #else
92  long pages = sysconf(_SC_PHYS_PAGES);
93  long page_size = sysconf(_SC_PAGE_SIZE);
94  return pages * page_size;
95 #endif
96 }
97 
98 void DataMgr::populateMgrs(const MapDParameters& mapd_parameters,
99  const size_t userSpecifiedNumReaderThreads) {
100  bufferMgrs_.resize(2);
101  bufferMgrs_[0].push_back(new GlobalFileMgr(0, dataDir_, userSpecifiedNumReaderThreads));
102  levelSizes_.push_back(1);
103  size_t cpuBufferSize = mapd_parameters.cpu_buffer_mem_bytes;
104  if (cpuBufferSize == 0) { // if size is not specified
105  const auto total_system_memory = getTotalSystemMemory();
106  VLOG(1) << "Detected " << (float)total_system_memory / (1024 * 1024)
107  << "M of total system memory.";
108  cpuBufferSize = total_system_memory *
109  0.8; // should get free memory instead of this ugly heuristic
110  }
111  size_t cpuSlabSize = std::min(static_cast<size_t>(1L << 32), cpuBufferSize);
112  // cpuSlabSize -= cpuSlabSize % 512 == 0 ? 0 : 512 - (cpuSlabSize % 512);
113  cpuSlabSize = (cpuSlabSize / 512) * 512;
114  LOG(INFO) << "cpuSlabSize is " << (float)cpuSlabSize / (1024 * 1024) << "M";
115  LOG(INFO) << "memory pool for CPU is " << (float)cpuBufferSize / (1024 * 1024) << "M";
116  if (hasGpus_) {
117  LOG(INFO) << "reserved GPU memory is " << (float)reservedGpuMem_ / (1024 * 1024)
118  << "M includes render buffer allocation";
119  bufferMgrs_.resize(3);
120  bufferMgrs_[1].push_back(new CpuBufferMgr(
121  0, cpuBufferSize, cudaMgr_.get(), cpuSlabSize, 512, bufferMgrs_[0][0]));
122  levelSizes_.push_back(1);
123  int numGpus = cudaMgr_->getDeviceCount();
124  for (int gpuNum = 0; gpuNum < numGpus; ++gpuNum) {
125  size_t gpuMaxMemSize =
126  mapd_parameters.gpu_buffer_mem_bytes != 0
127  ? mapd_parameters.gpu_buffer_mem_bytes
128  : (cudaMgr_->getDeviceProperties(gpuNum)->globalMem) - (reservedGpuMem_);
129  size_t gpuSlabSize = std::min(static_cast<size_t>(1L << 31), gpuMaxMemSize);
130  gpuSlabSize -= gpuSlabSize % 512 == 0 ? 0 : 512 - (gpuSlabSize % 512);
131  LOG(INFO) << "gpuSlabSize is " << (float)gpuSlabSize / (1024 * 1024) << "M";
132  LOG(INFO) << "memory pool for GPU " << gpuNum << " is "
133  << (float)gpuMaxMemSize / (1024 * 1024) << "M";
134  bufferMgrs_[2].push_back(new GpuCudaBufferMgr(
135  gpuNum, gpuMaxMemSize, cudaMgr_.get(), gpuSlabSize, 512, bufferMgrs_[1][0]));
136  }
137  levelSizes_.push_back(numGpus);
138  } else {
139  bufferMgrs_[1].push_back(new CpuBufferMgr(
140  0, cpuBufferSize, cudaMgr_.get(), cpuSlabSize, 512, bufferMgrs_[0][0]));
141  levelSizes_.push_back(1);
142  }
143 }
144 
145 void DataMgr::convertDB(const std::string basePath) {
146  /* check that "mapd_data" directory exists and it's empty */
147  std::string mapdDataPath(basePath + "/../mapd_data/");
148  boost::filesystem::path path(mapdDataPath);
149  if (boost::filesystem::exists(path)) {
150  if (!boost::filesystem::is_directory(path)) {
151  LOG(FATAL) << "Path to directory mapd_data to convert DB is not a directory.";
152  }
153  } else { // data directory does not exist
154  LOG(FATAL) << "Path to directory mapd_data to convert DB does not exist.";
155  }
156 
157  GlobalFileMgr* gfm = dynamic_cast<GlobalFileMgr*>(bufferMgrs_[0][0]);
158  size_t defaultPageSize = gfm->getDefaultPageSize();
159  LOG(INFO) << "Database conversion started.";
160  FileMgr* fm_base_db =
161  new FileMgr(gfm,
162  defaultPageSize,
163  basePath); // this call also copies data into new DB structure
164  delete fm_base_db;
165 
166  /* write content of DB into newly created/converted DB structure & location */
167  checkpoint(); // outputs data files as well as metadata files
168  LOG(INFO) << "Database conversion completed.";
169 }
170 
172  const { // create metadata shared by all tables of all DBs
173  ChunkKey chunkKey(2);
174  chunkKey[0] = 0; // top level db_id
175  chunkKey[1] = 0; // top level tb_id
176 
177  GlobalFileMgr* gfm = dynamic_cast<GlobalFileMgr*>(bufferMgrs_[0][0]);
178  FileMgr* fm_top = gfm->getFileMgr(chunkKey);
179  fm_top->createTopLevelMetadata();
180 }
181 
182 std::vector<MemoryInfo> DataMgr::getMemoryInfo(const MemoryLevel memLevel) {
183  // TODO (vraj) : Reduce the duplicate code
184  std::vector<MemoryInfo> memInfo;
185  if (memLevel == MemoryLevel::CPU_LEVEL) {
186  CpuBufferMgr* cpuBuffer =
187  dynamic_cast<CpuBufferMgr*>(bufferMgrs_[MemoryLevel::CPU_LEVEL][0]);
188  MemoryInfo mi;
189 
190  mi.pageSize = cpuBuffer->getPageSize();
191  mi.maxNumPages = cpuBuffer->getMaxSize() / mi.pageSize;
192  mi.isAllocationCapped = cpuBuffer->isAllocationCapped();
193  mi.numPageAllocated = cpuBuffer->getAllocated() / mi.pageSize;
194 
195  const std::vector<BufferList> slab_segments = cpuBuffer->getSlabSegments();
196  size_t numSlabs = slab_segments.size();
197 
198  for (size_t slabNum = 0; slabNum != numSlabs; ++slabNum) {
199  for (auto segIt : slab_segments[slabNum]) {
200  MemoryData md;
201  md.slabNum = slabNum;
202  md.startPage = segIt.start_page;
203  md.numPages = segIt.num_pages;
204  md.touch = segIt.last_touched;
205  md.memStatus = segIt.mem_status;
206  md.chunk_key.insert(
207  md.chunk_key.end(), segIt.chunk_key.begin(), segIt.chunk_key.end());
208  mi.nodeMemoryData.push_back(md);
209  }
210  }
211  memInfo.push_back(mi);
212  } else if (hasGpus_) {
213  int numGpus = cudaMgr_->getDeviceCount();
214  for (int gpuNum = 0; gpuNum < numGpus; ++gpuNum) {
215  GpuCudaBufferMgr* gpuBuffer =
216  dynamic_cast<GpuCudaBufferMgr*>(bufferMgrs_[MemoryLevel::GPU_LEVEL][gpuNum]);
217  MemoryInfo mi;
218 
219  mi.pageSize = gpuBuffer->getPageSize();
220  mi.maxNumPages = gpuBuffer->getMaxSize() / mi.pageSize;
221  mi.isAllocationCapped = gpuBuffer->isAllocationCapped();
222  mi.numPageAllocated = gpuBuffer->getAllocated() / mi.pageSize;
223  const std::vector<BufferList> slab_segments = gpuBuffer->getSlabSegments();
224  size_t numSlabs = slab_segments.size();
225 
226  for (size_t slabNum = 0; slabNum != numSlabs; ++slabNum) {
227  for (auto segIt : slab_segments[slabNum]) {
228  MemoryData md;
229  md.slabNum = slabNum;
230  md.startPage = segIt.start_page;
231  md.numPages = segIt.num_pages;
232  md.touch = segIt.last_touched;
233  md.chunk_key.insert(
234  md.chunk_key.end(), segIt.chunk_key.begin(), segIt.chunk_key.end());
235  md.memStatus = segIt.mem_status;
236  mi.nodeMemoryData.push_back(md);
237  }
238  }
239  memInfo.push_back(mi);
240  }
241  }
242  return memInfo;
243 }
244 
245 /*
246 std::vector<MemoryData> DataMgr::getGpuMemory() {
247  std::vector<MemoryData> memInfo;
248  if (hasGpus_) {
249  int numGpus = cudaMgr_->getDeviceCount();
250  for (int gpuNum = 0; gpuNum < numGpus; ++gpuNum) {
251  gpuMemorySummary gms;
252  gms.max = bufferMgrs_[MemoryLevel::GPU_LEVEL][gpuNum]->getMaxSize();
253  gms.inUse = bufferMgrs_[MemoryLevel::GPU_LEVEL][gpuNum]->getInUseSize();
254  gms.allocated = bufferMgrs_[MemoryLevel::GPU_LEVEL][gpuNum]->getAllocated();
255  gms.isAllocationCapped =
256 bufferMgrs_[MemoryLevel::GPU_LEVEL][gpuNum]->isAllocationCapped(); memInfo.push_back(gms);
257  }
258  }
259  return memInfo;
260 }
261 
262 */
263 // std::ostringstream tss;
264 // size_t mb = 1024 * 1024;
265 // tss << std::endl;
266 // // tss << "CPU RAM TOTAL AVAILABLE : " std::fixed << setw(9) << setprecision(2) <<
267 // // ((float)bufferMgrs_[MemoryLevel::CPU_LEVEL][0]->getMaxSize() / mb)
268 // // << std::endl;
269 // tss << "CPU RAM IN BUFFER USE : " << std::fixed << setw(9) << setprecision(2)
270 // << ((float)bufferMgrs_[MemoryLevel::CPU_LEVEL][0]->getInUseSize() / mb) << " MB"
271 // << std::endl;
272 // if (hasGpus_) {
273 // int numGpus = cudaMgr_->getDeviceCount();
274 // for (int gpuNum = 0; gpuNum < numGpus; ++gpuNum) {
275 // tss << "GPU" << setfill(' ') << setw(2) << gpuNum << " RAM TOTAL AVAILABLE : " <<
276 // std::fixed << setw(9)
277 // << setprecision(2) <<
278 // ((float)bufferMgrs_[MemoryLevel::GPU_LEVEL][gpuNum]->getMaxSize() / mb) << "
279 // MB"
280 // << std::endl;
281 // tss << "GPU" << setfill(' ') << setw(2) << gpuNum << " RAM IN BUFFER USE : " <<
282 // std::fixed << setw(9)
283 // << setprecision(2) <<
284 // ((float)bufferMgrs_[MemoryLevel::GPU_LEVEL][gpuNum]->getInUseSize() / mb) << "
285 // MB"
286 // << std::endl;
287 // }
288 // }
289 // return tss.str();
290 //}
291 
292 std::string DataMgr::dumpLevel(const MemoryLevel memLevel) {
293  // if gpu we need to iterate through all the buffermanagers for each card
294  if (memLevel == MemoryLevel::GPU_LEVEL) {
295  int numGpus = cudaMgr_->getDeviceCount();
296  std::ostringstream tss;
297  for (int gpuNum = 0; gpuNum < numGpus; ++gpuNum) {
298  tss << bufferMgrs_[memLevel][gpuNum]->printSlabs();
299  }
300  return tss.str();
301  } else {
302  return bufferMgrs_[memLevel][0]->printSlabs();
303  }
304 }
305 
306 void DataMgr::clearMemory(const MemoryLevel memLevel) {
307  // if gpu we need to iterate through all the buffermanagers for each card
308  if (memLevel == MemoryLevel::GPU_LEVEL) {
309  if (cudaMgr_) {
310  int numGpus = cudaMgr_->getDeviceCount();
311  for (int gpuNum = 0; gpuNum < numGpus; ++gpuNum) {
312  LOG(INFO) << "clear slabs on gpu " << gpuNum;
313  bufferMgrs_[memLevel][gpuNum]->clearSlabs();
314  }
315  } else {
316  throw std::runtime_error("Unable to clear GPU memory: No GPUs detected");
317  }
318  } else {
319  bufferMgrs_[memLevel][0]->clearSlabs();
320  }
321 }
322 
324  const MemoryLevel memLevel,
325  const int deviceId) {
326  return bufferMgrs_[memLevel][deviceId]->isBufferOnDevice(key);
327 }
328 
330  std::vector<std::pair<ChunkKey, ChunkMetadata>>& chunkMetadataVec) {
331  // Can we always assume this will just be at the disklevel bc we just
332  // started?
333  bufferMgrs_[0][0]->getChunkMetadataVec(chunkMetadataVec);
334 }
335 
337  std::vector<std::pair<ChunkKey, ChunkMetadata>>& chunkMetadataVec,
338  const ChunkKey& keyPrefix) {
339  bufferMgrs_[0][0]->getChunkMetadataVecForKeyPrefix(chunkMetadataVec, keyPrefix);
340 }
341 
343  const MemoryLevel memoryLevel,
344  const int deviceId,
345  const size_t page_size) {
346  int level = static_cast<int>(memoryLevel);
347  return bufferMgrs_[level][deviceId]->createBuffer(key, page_size);
348 }
349 
351  const MemoryLevel memoryLevel,
352  const int deviceId,
353  const size_t numBytes) {
354  const auto level = static_cast<size_t>(memoryLevel);
355  CHECK_LT(level, levelSizes_.size()); // make sure we have a legit buffermgr
356  CHECK_LT(deviceId, levelSizes_[level]); // make sure we have a legit buffermgr
357  return bufferMgrs_[level][deviceId]->getBuffer(key, numBytes);
358 }
359 
361  int numLevels = bufferMgrs_.size();
362  for (int level = numLevels - 1; level >= 0; --level) {
363  for (int device = 0; device < levelSizes_[level]; ++device) {
364  bufferMgrs_[level][device]->deleteBuffersWithPrefix(keyPrefix);
365  }
366  }
367 }
368 
369 // only deletes the chunks at the given memory level
371  const MemoryLevel memLevel) {
372  if (bufferMgrs_.size() <= memLevel) {
373  return;
374  }
375  for (int device = 0; device < levelSizes_[memLevel]; ++device) {
376  bufferMgrs_[memLevel][device]->deleteBuffersWithPrefix(keyPrefix);
377  }
378 }
379 
381  const int deviceId,
382  const size_t numBytes) {
383  const auto level = static_cast<int>(memoryLevel);
384  CHECK_LT(deviceId, levelSizes_[level]);
385  return bufferMgrs_[level][deviceId]->alloc(numBytes);
386 }
387 
389  int level = static_cast<int>(buffer->getType());
390  bufferMgrs_[level][buffer->getDeviceId()]->free(buffer);
391 }
392 
394  ChunkKey keyPrefix = {-1};
395  deleteChunksWithPrefix(keyPrefix);
396 }
397 
398 void DataMgr::copy(AbstractBuffer* destBuffer, AbstractBuffer* srcBuffer) {
399  destBuffer->write(srcBuffer->getMemoryPtr(),
400  srcBuffer->size(),
401  0,
402  srcBuffer->getType(),
403  srcBuffer->getDeviceId());
404 }
405 
406 // could add function below to do arbitrary copies between buffers
407 
408 // void DataMgr::copy(AbstractBuffer *destBuffer, const AbstractBuffer *srcBuffer, const
409 // size_t numBytes, const size_t destOffset, const size_t srcOffset) {
410 //} /
411 
412 void DataMgr::checkpoint(const int db_id, const int tb_id) {
413  for (auto levelIt = bufferMgrs_.rbegin(); levelIt != bufferMgrs_.rend(); ++levelIt) {
414  // use reverse iterator so we start at GPU level, then CPU then DISK
415  for (auto deviceIt = levelIt->begin(); deviceIt != levelIt->end(); ++deviceIt) {
416  (*deviceIt)->checkpoint(db_id, tb_id);
417  }
418  }
419 }
420 
422  for (auto levelIt = bufferMgrs_.rbegin(); levelIt != bufferMgrs_.rend(); ++levelIt) {
423  // use reverse iterator so we start at GPU level, then CPU then DISK
424  for (auto deviceIt = levelIt->begin(); deviceIt != levelIt->end(); ++deviceIt) {
425  (*deviceIt)->checkpoint();
426  }
427  }
428 }
429 
430 void DataMgr::removeTableRelatedDS(const int db_id, const int tb_id) {
431  dynamic_cast<GlobalFileMgr*>(bufferMgrs_[0][0])->removeTableRelatedDS(db_id, tb_id);
432 }
433 
434 void DataMgr::setTableEpoch(const int db_id, const int tb_id, const int start_epoch) {
435  dynamic_cast<GlobalFileMgr*>(bufferMgrs_[0][0])
436  ->setTableEpoch(db_id, tb_id, start_epoch);
437 }
438 
439 size_t DataMgr::getTableEpoch(const int db_id, const int tb_id) {
440  return dynamic_cast<GlobalFileMgr*>(bufferMgrs_[0][0])->getTableEpoch(db_id, tb_id);
441 }
442 
444  auto global_file_mgr = dynamic_cast<GlobalFileMgr*>(bufferMgrs_[0][0]);
445  CHECK(global_file_mgr);
446  return global_file_mgr;
447 }
448 
449 } // namespace Data_Namespace
size_t getAllocated() override
Definition: BufferMgr.cpp:483
std::vector< int > ChunkKey
Definition: types.h:35
std::vector< MemoryData > nodeMemoryData
Definition: DataMgr.h:63
Buffer_Namespace::MemStatus memStatus
Definition: DataMgr.h:55
size_t getMaxSize() override
Definition: BufferMgr.cpp:478
std::vector< std::vector< AbstractBufferMgr * > > bufferMgrs_
Definition: DataMgr.h:131
std::vector< int > levelSizes_
Definition: DataMgr.h:121
#define LOG(tag)
Definition: Logger.h:188
size_t gpu_buffer_mem_bytes
virtual size_t size() const =0
FileMgr * getFileMgr(const int db_id, const int tb_id)
virtual int8_t * getMemoryPtr()=0
virtual MemoryLevel getType() const =0
void clearMemory(const MemoryLevel memLevel)
Definition: DataMgr.cpp:306
std::string dumpLevel(const MemoryLevel memLevel)
Definition: DataMgr.cpp:292
void convertDB(const std::string basePath)
Definition: DataMgr.cpp:145
size_t getTotalSystemMemory()
Definition: DataMgr.cpp:79
size_t getTableEpoch(const int db_id, const int tb_id)
Definition: DataMgr.cpp:439
void createTopLevelMetadata() const
Definition: DataMgr.cpp:171
CHECK(cgen_state)
bool isAllocationCapped() override
Definition: BufferMgr.cpp:488
std::unique_ptr< CudaMgr_Namespace::CudaMgr > cudaMgr_
Definition: DataMgr.h:132
An AbstractBuffer is a unit of data management for a data manager.
virtual void write(int8_t *src, const size_t num_bytes, const size_t offset=0, const MemoryLevel src_buffer_type=CPU_LEVEL, const int src_device_id=-1)=0
std::vector< MemoryInfo > getMemoryInfo(const MemoryLevel memLevel)
Definition: DataMgr.cpp:182
size_t getDefaultPageSize() const
#define CHECK_LT(x, y)
Definition: Logger.h:207
void deleteChunksWithPrefix(const ChunkKey &keyPrefix)
Definition: DataMgr.cpp:360
const std::vector< BufferList > & getSlabSegments()
Definition: BufferMgr.cpp:878
bool isBufferOnDevice(const ChunkKey &key, const MemoryLevel memLevel, const int deviceId)
Definition: DataMgr.cpp:323
AbstractBuffer * getChunkBuffer(const ChunkKey &key, const MemoryLevel memoryLevel, const int deviceId=0, const size_t numBytes=0)
Definition: DataMgr.cpp:350
size_t cpu_buffer_mem_bytes
void getChunkMetadataVecForKeyPrefix(std::vector< std::pair< ChunkKey, ChunkMetadata >> &chunkMetadataVec, const ChunkKey &keyPrefix)
Definition: DataMgr.cpp:336
void removeTableRelatedDS(const int db_id, const int tb_id)
Definition: DataMgr.cpp:430
void copy(AbstractBuffer *destBuffer, AbstractBuffer *srcBuffer)
Definition: DataMgr.cpp:398
void populateMgrs(const MapDParameters &mapd_parameters, const size_t userSpecifiedNumReaderThreads)
Definition: DataMgr.cpp:98
std::vector< int32_t > chunk_key
Definition: DataMgr.h:54
AbstractBuffer * createChunkBuffer(const ChunkKey &key, const MemoryLevel memoryLevel, const int deviceId=0, const size_t page_size=0)
Definition: DataMgr.cpp:342
void free(AbstractBuffer *buffer)
Definition: DataMgr.cpp:388
void getChunkMetadataVec(std::vector< std::pair< ChunkKey, ChunkMetadata >> &chunkMetadataVec)
Definition: DataMgr.cpp:329
virtual int getDeviceId() const
#define VLOG(n)
Definition: Logger.h:291
File_Namespace::GlobalFileMgr * getGlobalFileMgr() const
Definition: DataMgr.cpp:443
void setTableEpoch(const int db_id, const int tb_id, const int start_epoch)
Definition: DataMgr.cpp:434
friend class GlobalFileMgr
Definition: DataMgr.h:67
AbstractBuffer * alloc(const MemoryLevel memoryLevel, const int deviceId, const size_t numBytes)
Definition: DataMgr.cpp:380
std::string dataDir_
Definition: DataMgr.h:133