OmniSciDB  1dac507f6e
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
DataMgr.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
22 #include "DataMgr.h"
23 #include "../CudaMgr/CudaMgr.h"
26 #include "FileMgr/GlobalFileMgr.h"
27 
28 #ifdef __APPLE__
29 #include <sys/sysctl.h>
30 #include <sys/types.h>
31 #else
32 #include <unistd.h>
33 #endif
34 
35 #include <boost/filesystem.hpp>
36 
37 #include <algorithm>
38 #include <limits>
39 
40 using namespace std;
41 using namespace Buffer_Namespace;
42 using namespace File_Namespace;
43 
44 namespace Data_Namespace {
45 
46 DataMgr::DataMgr(const string& dataDir,
47  const MapDParameters& mapd_parameters,
48  const bool useGpus,
49  const int numGpus,
50  const int startGpu,
51  const size_t reservedGpuMem,
52  const size_t numReaderThreads)
53  : dataDir_(dataDir) {
54  if (useGpus) {
55  try {
56  cudaMgr_ = std::make_unique<CudaMgr_Namespace::CudaMgr>(numGpus, startGpu);
57  reservedGpuMem_ = reservedGpuMem;
58  hasGpus_ = true;
59  } catch (std::runtime_error& error) {
60  hasGpus_ = false;
61  }
62  } else {
63  hasGpus_ = false;
64  }
65 
66  populateMgrs(mapd_parameters, numReaderThreads);
68 }
69 
71  int numLevels = bufferMgrs_.size();
72  for (int level = numLevels - 1; level >= 0; --level) {
73  for (size_t device = 0; device < bufferMgrs_[level].size(); device++) {
74  delete bufferMgrs_[level][device];
75  }
76  }
77 }
78 
80 #ifdef __APPLE__
81  int mib[2];
82  size_t physical_memory;
83  size_t length;
84  // Get the Physical memory size
85  mib[0] = CTL_HW;
86  mib[1] = HW_MEMSIZE;
87  length = sizeof(size_t);
88  sysctl(mib, 2, &physical_memory, &length, NULL, 0);
89  return physical_memory;
90 
91 #else
92  long pages = sysconf(_SC_PHYS_PAGES);
93  long page_size = sysconf(_SC_PAGE_SIZE);
94  return pages * page_size;
95 #endif
96 }
97 
98 void DataMgr::populateMgrs(const MapDParameters& mapd_parameters,
99  const size_t userSpecifiedNumReaderThreads) {
100  bufferMgrs_.resize(2);
101  bufferMgrs_[0].push_back(new GlobalFileMgr(0, dataDir_, userSpecifiedNumReaderThreads));
102  levelSizes_.push_back(1);
103  size_t cpuBufferSize = mapd_parameters.cpu_buffer_mem_bytes;
104  if (cpuBufferSize == 0) { // if size is not specified
105  cpuBufferSize = getTotalSystemMemory() *
106  0.8; // should get free memory instead of this ugly heuristic
107  }
108  size_t cpuSlabSize = std::min(static_cast<size_t>(1L << 32), cpuBufferSize);
109  // cpuSlabSize -= cpuSlabSize % 512 == 0 ? 0 : 512 - (cpuSlabSize % 512);
110  cpuSlabSize = (cpuSlabSize / 512) * 512;
111  LOG(INFO) << "cpuSlabSize is " << (float)cpuSlabSize / (1024 * 1024) << "M";
112  if (hasGpus_) {
113  LOG(INFO) << "reserved GPU memory is " << (float)reservedGpuMem_ / (1024 * 1024)
114  << "M includes render buffer allocation";
115  bufferMgrs_.resize(3);
116  bufferMgrs_[1].push_back(new CpuBufferMgr(
117  0, cpuBufferSize, cudaMgr_.get(), cpuSlabSize, 512, bufferMgrs_[0][0]));
118  levelSizes_.push_back(1);
119  int numGpus = cudaMgr_->getDeviceCount();
120  for (int gpuNum = 0; gpuNum < numGpus; ++gpuNum) {
121  size_t gpuMaxMemSize =
122  mapd_parameters.gpu_buffer_mem_bytes != 0
123  ? mapd_parameters.gpu_buffer_mem_bytes
124  : (cudaMgr_->getDeviceProperties(gpuNum)->globalMem) - (reservedGpuMem_);
125  size_t gpuSlabSize = std::min(static_cast<size_t>(1L << 31), gpuMaxMemSize);
126  gpuSlabSize -= gpuSlabSize % 512 == 0 ? 0 : 512 - (gpuSlabSize % 512);
127  LOG(INFO) << "gpuSlabSize is " << (float)gpuSlabSize / (1024 * 1024) << "M";
128  bufferMgrs_[2].push_back(new GpuCudaBufferMgr(
129  gpuNum, gpuMaxMemSize, cudaMgr_.get(), gpuSlabSize, 512, bufferMgrs_[1][0]));
130  }
131  levelSizes_.push_back(numGpus);
132  } else {
133  bufferMgrs_[1].push_back(new CpuBufferMgr(
134  0, cpuBufferSize, cudaMgr_.get(), cpuSlabSize, 512, bufferMgrs_[0][0]));
135  levelSizes_.push_back(1);
136  }
137 }
138 
139 void DataMgr::convertDB(const std::string basePath) {
140  /* check that "mapd_data" directory exists and it's empty */
141  std::string mapdDataPath(basePath + "/../mapd_data/");
142  boost::filesystem::path path(mapdDataPath);
143  if (boost::filesystem::exists(path)) {
144  if (!boost::filesystem::is_directory(path)) {
145  LOG(FATAL) << "Path to directory mapd_data to convert DB is not a directory.";
146  }
147  } else { // data directory does not exist
148  LOG(FATAL) << "Path to directory mapd_data to convert DB does not exist.";
149  }
150 
151  GlobalFileMgr* gfm = dynamic_cast<GlobalFileMgr*>(bufferMgrs_[0][0]);
152  size_t defaultPageSize = gfm->getDefaultPageSize();
153  LOG(INFO) << "Database conversion started.";
154  FileMgr* fm_base_db =
155  new FileMgr(gfm,
156  defaultPageSize,
157  basePath); // this call also copies data into new DB structure
158  delete fm_base_db;
159 
160  /* write content of DB into newly created/converted DB structure & location */
161  checkpoint(); // outputs data files as well as metadata files
162  LOG(INFO) << "Database conversion completed.";
163 }
164 
166  const { // create metadata shared by all tables of all DBs
167  ChunkKey chunkKey(2);
168  chunkKey[0] = 0; // top level db_id
169  chunkKey[1] = 0; // top level tb_id
170 
171  GlobalFileMgr* gfm = dynamic_cast<GlobalFileMgr*>(bufferMgrs_[0][0]);
172  FileMgr* fm_top = gfm->getFileMgr(chunkKey);
173  fm_top->createTopLevelMetadata();
174 }
175 
176 std::vector<MemoryInfo> DataMgr::getMemoryInfo(const MemoryLevel memLevel) {
177  // TODO (vraj) : Reduce the duplicate code
178  std::vector<MemoryInfo> memInfo;
179  if (memLevel == MemoryLevel::CPU_LEVEL) {
180  CpuBufferMgr* cpuBuffer =
181  dynamic_cast<CpuBufferMgr*>(bufferMgrs_[MemoryLevel::CPU_LEVEL][0]);
182  MemoryInfo mi;
183 
184  mi.pageSize = cpuBuffer->getPageSize();
185  mi.maxNumPages = cpuBuffer->getMaxSize() / mi.pageSize;
186  mi.isAllocationCapped = cpuBuffer->isAllocationCapped();
187  mi.numPageAllocated = cpuBuffer->getAllocated() / mi.pageSize;
188 
189  const std::vector<BufferList> slab_segments = cpuBuffer->getSlabSegments();
190  size_t numSlabs = slab_segments.size();
191 
192  for (size_t slabNum = 0; slabNum != numSlabs; ++slabNum) {
193  for (auto segIt : slab_segments[slabNum]) {
194  MemoryData md;
195  md.slabNum = slabNum;
196  md.startPage = segIt.start_page;
197  md.numPages = segIt.num_pages;
198  md.touch = segIt.last_touched;
199  md.isFree = segIt.mem_status;
200  md.chunk_key.insert(
201  md.chunk_key.end(), segIt.chunk_key.begin(), segIt.chunk_key.end());
202  mi.nodeMemoryData.push_back(md);
203  }
204  }
205  memInfo.push_back(mi);
206  } else if (hasGpus_) {
207  int numGpus = cudaMgr_->getDeviceCount();
208  for (int gpuNum = 0; gpuNum < numGpus; ++gpuNum) {
209  GpuCudaBufferMgr* gpuBuffer =
210  dynamic_cast<GpuCudaBufferMgr*>(bufferMgrs_[MemoryLevel::GPU_LEVEL][gpuNum]);
211  MemoryInfo mi;
212 
213  mi.pageSize = gpuBuffer->getPageSize();
214  mi.maxNumPages = gpuBuffer->getMaxSize() / mi.pageSize;
215  mi.isAllocationCapped = gpuBuffer->isAllocationCapped();
216  mi.numPageAllocated = gpuBuffer->getAllocated() / mi.pageSize;
217  const std::vector<BufferList> slab_segments = gpuBuffer->getSlabSegments();
218  size_t numSlabs = slab_segments.size();
219 
220  for (size_t slabNum = 0; slabNum != numSlabs; ++slabNum) {
221  for (auto segIt : slab_segments[slabNum]) {
222  MemoryData md;
223  md.slabNum = slabNum;
224  md.startPage = segIt.start_page;
225  md.numPages = segIt.num_pages;
226  md.touch = segIt.last_touched;
227  md.chunk_key.insert(
228  md.chunk_key.end(), segIt.chunk_key.begin(), segIt.chunk_key.end());
229  md.isFree = segIt.mem_status;
230  mi.nodeMemoryData.push_back(md);
231  }
232  }
233  memInfo.push_back(mi);
234  }
235  }
236  return memInfo;
237 }
238 
239 /*
240 std::vector<MemoryData> DataMgr::getGpuMemory() {
241  std::vector<MemoryData> memInfo;
242  if (hasGpus_) {
243  int numGpus = cudaMgr_->getDeviceCount();
244  for (int gpuNum = 0; gpuNum < numGpus; ++gpuNum) {
245  gpuMemorySummary gms;
246  gms.max = bufferMgrs_[MemoryLevel::GPU_LEVEL][gpuNum]->getMaxSize();
247  gms.inUse = bufferMgrs_[MemoryLevel::GPU_LEVEL][gpuNum]->getInUseSize();
248  gms.allocated = bufferMgrs_[MemoryLevel::GPU_LEVEL][gpuNum]->getAllocated();
249  gms.isAllocationCapped =
250 bufferMgrs_[MemoryLevel::GPU_LEVEL][gpuNum]->isAllocationCapped(); memInfo.push_back(gms);
251  }
252  }
253  return memInfo;
254 }
255 
256 */
257 // std::ostringstream tss;
258 // size_t mb = 1024 * 1024;
259 // tss << std::endl;
260 // // tss << "CPU RAM TOTAL AVAILABLE : " std::fixed << setw(9) << setprecision(2) <<
261 // // ((float)bufferMgrs_[MemoryLevel::CPU_LEVEL][0]->getMaxSize() / mb)
262 // // << std::endl;
263 // tss << "CPU RAM IN BUFFER USE : " << std::fixed << setw(9) << setprecision(2)
264 // << ((float)bufferMgrs_[MemoryLevel::CPU_LEVEL][0]->getInUseSize() / mb) << " MB"
265 // << std::endl;
266 // if (hasGpus_) {
267 // int numGpus = cudaMgr_->getDeviceCount();
268 // for (int gpuNum = 0; gpuNum < numGpus; ++gpuNum) {
269 // tss << "GPU" << setfill(' ') << setw(2) << gpuNum << " RAM TOTAL AVAILABLE : " <<
270 // std::fixed << setw(9)
271 // << setprecision(2) <<
272 // ((float)bufferMgrs_[MemoryLevel::GPU_LEVEL][gpuNum]->getMaxSize() / mb) << "
273 // MB"
274 // << std::endl;
275 // tss << "GPU" << setfill(' ') << setw(2) << gpuNum << " RAM IN BUFFER USE : " <<
276 // std::fixed << setw(9)
277 // << setprecision(2) <<
278 // ((float)bufferMgrs_[MemoryLevel::GPU_LEVEL][gpuNum]->getInUseSize() / mb) << "
279 // MB"
280 // << std::endl;
281 // }
282 // }
283 // return tss.str();
284 //}
285 
286 std::string DataMgr::dumpLevel(const MemoryLevel memLevel) {
287  // if gpu we need to iterate through all the buffermanagers for each card
288  if (memLevel == MemoryLevel::GPU_LEVEL) {
289  int numGpus = cudaMgr_->getDeviceCount();
290  std::ostringstream tss;
291  for (int gpuNum = 0; gpuNum < numGpus; ++gpuNum) {
292  tss << bufferMgrs_[memLevel][gpuNum]->printSlabs();
293  }
294  return tss.str();
295  } else {
296  return bufferMgrs_[memLevel][0]->printSlabs();
297  }
298 }
299 
300 void DataMgr::clearMemory(const MemoryLevel memLevel) {
301  // if gpu we need to iterate through all the buffermanagers for each card
302  if (memLevel == MemoryLevel::GPU_LEVEL) {
303  if (cudaMgr_) {
304  int numGpus = cudaMgr_->getDeviceCount();
305  for (int gpuNum = 0; gpuNum < numGpus; ++gpuNum) {
306  LOG(INFO) << "clear slabs on gpu " << gpuNum;
307  bufferMgrs_[memLevel][gpuNum]->clearSlabs();
308  }
309  } else {
310  throw std::runtime_error("Unable to clear GPU memory: No GPUs detected");
311  }
312  } else {
313  bufferMgrs_[memLevel][0]->clearSlabs();
314  }
315 }
316 
318  const MemoryLevel memLevel,
319  const int deviceId) {
320  return bufferMgrs_[memLevel][deviceId]->isBufferOnDevice(key);
321 }
322 
324  std::vector<std::pair<ChunkKey, ChunkMetadata>>& chunkMetadataVec) {
325  // Can we always assume this will just be at the disklevel bc we just
326  // started?
327  bufferMgrs_[0][0]->getChunkMetadataVec(chunkMetadataVec);
328 }
329 
331  std::vector<std::pair<ChunkKey, ChunkMetadata>>& chunkMetadataVec,
332  const ChunkKey& keyPrefix) {
333  bufferMgrs_[0][0]->getChunkMetadataVecForKeyPrefix(chunkMetadataVec, keyPrefix);
334 }
335 
337  const MemoryLevel memoryLevel,
338  const int deviceId,
339  const size_t page_size) {
340  int level = static_cast<int>(memoryLevel);
341  return bufferMgrs_[level][deviceId]->createBuffer(key, page_size);
342 }
343 
345  const MemoryLevel memoryLevel,
346  const int deviceId,
347  const size_t numBytes) {
348  const auto level = static_cast<size_t>(memoryLevel);
349  CHECK_LT(level, levelSizes_.size()); // make sure we have a legit buffermgr
350  CHECK_LT(deviceId, levelSizes_[level]); // make sure we have a legit buffermgr
351  return bufferMgrs_[level][deviceId]->getBuffer(key, numBytes);
352 }
353 
355  int numLevels = bufferMgrs_.size();
356  for (int level = numLevels - 1; level >= 0; --level) {
357  for (int device = 0; device < levelSizes_[level]; ++device) {
358  bufferMgrs_[level][device]->deleteBuffersWithPrefix(keyPrefix);
359  }
360  }
361 }
362 
363 // only deletes the chunks at the given memory level
365  const MemoryLevel memLevel) {
366  if (bufferMgrs_.size() <= memLevel) {
367  return;
368  }
369  for (int device = 0; device < levelSizes_[memLevel]; ++device) {
370  bufferMgrs_[memLevel][device]->deleteBuffersWithPrefix(keyPrefix);
371  }
372 }
373 
375  const int deviceId,
376  const size_t numBytes) {
377  const auto level = static_cast<int>(memoryLevel);
378  CHECK_LT(deviceId, levelSizes_[level]);
379  return bufferMgrs_[level][deviceId]->alloc(numBytes);
380 }
381 
383  int level = static_cast<int>(buffer->getType());
384  bufferMgrs_[level][buffer->getDeviceId()]->free(buffer);
385 }
386 
388  ChunkKey keyPrefix = {-1};
389  deleteChunksWithPrefix(keyPrefix);
390 }
391 
392 void DataMgr::copy(AbstractBuffer* destBuffer, AbstractBuffer* srcBuffer) {
393  destBuffer->write(srcBuffer->getMemoryPtr(),
394  srcBuffer->size(),
395  0,
396  srcBuffer->getType(),
397  srcBuffer->getDeviceId());
398 }
399 
400 // could add function below to do arbitrary copies between buffers
401 
402 // void DataMgr::copy(AbstractBuffer *destBuffer, const AbstractBuffer *srcBuffer, const
403 // size_t numBytes, const size_t destOffset, const size_t srcOffset) {
404 //} /
405 
406 void DataMgr::checkpoint(const int db_id, const int tb_id) {
407  for (auto levelIt = bufferMgrs_.rbegin(); levelIt != bufferMgrs_.rend(); ++levelIt) {
408  // use reverse iterator so we start at GPU level, then CPU then DISK
409  for (auto deviceIt = levelIt->begin(); deviceIt != levelIt->end(); ++deviceIt) {
410  (*deviceIt)->checkpoint(db_id, tb_id);
411  }
412  }
413 }
414 
416  for (auto levelIt = bufferMgrs_.rbegin(); levelIt != bufferMgrs_.rend(); ++levelIt) {
417  // use reverse iterator so we start at GPU level, then CPU then DISK
418  for (auto deviceIt = levelIt->begin(); deviceIt != levelIt->end(); ++deviceIt) {
419  (*deviceIt)->checkpoint();
420  }
421  }
422 }
423 
424 void DataMgr::removeTableRelatedDS(const int db_id, const int tb_id) {
425  dynamic_cast<GlobalFileMgr*>(bufferMgrs_[0][0])->removeTableRelatedDS(db_id, tb_id);
426 }
427 
428 void DataMgr::setTableEpoch(const int db_id, const int tb_id, const int start_epoch) {
429  dynamic_cast<GlobalFileMgr*>(bufferMgrs_[0][0])
430  ->setTableEpoch(db_id, tb_id, start_epoch);
431 }
432 
433 size_t DataMgr::getTableEpoch(const int db_id, const int tb_id) {
434  return dynamic_cast<GlobalFileMgr*>(bufferMgrs_[0][0])->getTableEpoch(db_id, tb_id);
435 }
436 
438  auto global_file_mgr = dynamic_cast<GlobalFileMgr*>(bufferMgrs_[0][0]);
439  CHECK(global_file_mgr);
440  return global_file_mgr;
441 }
442 
443 } // namespace Data_Namespace
size_t getAllocated() override
Definition: BufferMgr.cpp:483
std::vector< int > ChunkKey
Definition: types.h:35
std::vector< MemoryData > nodeMemoryData
Definition: DataMgr.h:63
size_t getMaxSize() override
Definition: BufferMgr.cpp:478
std::vector< std::vector< AbstractBufferMgr * > > bufferMgrs_
Definition: DataMgr.h:131
std::vector< int > levelSizes_
Definition: DataMgr.h:121
#define LOG(tag)
Definition: Logger.h:185
size_t gpu_buffer_mem_bytes
Buffer_Namespace::MemStatus isFree
Definition: DataMgr.h:55
virtual size_t size() const =0
FileMgr * getFileMgr(const int db_id, const int tb_id)
virtual int8_t * getMemoryPtr()=0
virtual MemoryLevel getType() const =0
void clearMemory(const MemoryLevel memLevel)
Definition: DataMgr.cpp:300
std::string dumpLevel(const MemoryLevel memLevel)
Definition: DataMgr.cpp:286
void convertDB(const std::string basePath)
Definition: DataMgr.cpp:139
size_t getTotalSystemMemory()
Definition: DataMgr.cpp:79
size_t getTableEpoch(const int db_id, const int tb_id)
Definition: DataMgr.cpp:433
void createTopLevelMetadata() const
Definition: DataMgr.cpp:165
CHECK(cgen_state)
bool isAllocationCapped() override
Definition: BufferMgr.cpp:488
std::unique_ptr< CudaMgr_Namespace::CudaMgr > cudaMgr_
Definition: DataMgr.h:132
An AbstractBuffer is a unit of data management for a data manager.
virtual void write(int8_t *src, const size_t num_bytes, const size_t offset=0, const MemoryLevel src_buffer_type=CPU_LEVEL, const int src_device_id=-1)=0
std::vector< MemoryInfo > getMemoryInfo(const MemoryLevel memLevel)
Definition: DataMgr.cpp:176
size_t getDefaultPageSize() const
#define CHECK_LT(x, y)
Definition: Logger.h:200
void deleteChunksWithPrefix(const ChunkKey &keyPrefix)
Definition: DataMgr.cpp:354
const std::vector< BufferList > & getSlabSegments()
Definition: BufferMgr.cpp:878
bool isBufferOnDevice(const ChunkKey &key, const MemoryLevel memLevel, const int deviceId)
Definition: DataMgr.cpp:317
AbstractBuffer * getChunkBuffer(const ChunkKey &key, const MemoryLevel memoryLevel, const int deviceId=0, const size_t numBytes=0)
Definition: DataMgr.cpp:344
size_t cpu_buffer_mem_bytes
void getChunkMetadataVecForKeyPrefix(std::vector< std::pair< ChunkKey, ChunkMetadata >> &chunkMetadataVec, const ChunkKey &keyPrefix)
Definition: DataMgr.cpp:330
void removeTableRelatedDS(const int db_id, const int tb_id)
Definition: DataMgr.cpp:424
void copy(AbstractBuffer *destBuffer, AbstractBuffer *srcBuffer)
Definition: DataMgr.cpp:392
void populateMgrs(const MapDParameters &mapd_parameters, const size_t userSpecifiedNumReaderThreads)
Definition: DataMgr.cpp:98
std::vector< int32_t > chunk_key
Definition: DataMgr.h:54
AbstractBuffer * createChunkBuffer(const ChunkKey &key, const MemoryLevel memoryLevel, const int deviceId=0, const size_t page_size=0)
Definition: DataMgr.cpp:336
void free(AbstractBuffer *buffer)
Definition: DataMgr.cpp:382
void getChunkMetadataVec(std::vector< std::pair< ChunkKey, ChunkMetadata >> &chunkMetadataVec)
Definition: DataMgr.cpp:323
virtual int getDeviceId() const
File_Namespace::GlobalFileMgr * getGlobalFileMgr() const
Definition: DataMgr.cpp:437
void setTableEpoch(const int db_id, const int tb_id, const int start_epoch)
Definition: DataMgr.cpp:428
friend class GlobalFileMgr
Definition: DataMgr.h:67
AbstractBuffer * alloc(const MemoryLevel memoryLevel, const int deviceId, const size_t numBytes)
Definition: DataMgr.cpp:374
std::string dataDir_
Definition: DataMgr.h:133