OmniSciDB  5ade3759e0
PopulateTableRandom.cpp File Reference

Populate a table with random data. More...

#include <boost/functional/hash.hpp>
#include <cfloat>
#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <exception>
#include <iostream>
#include <memory>
#include <random>
#include <string>
#include "../Catalog/Catalog.h"
#include "../DataMgr/DataMgr.h"
#include "../Fragmenter/Fragmenter.h"
#include "../Shared/DateConverters.h"
#include "../Shared/measure.h"
#include "../Shared/sqltypes.h"
#include "Shared/Logger.h"
+ Include dependency graph for PopulateTableRandom.cpp:

Go to the source code of this file.

Macros

#define MAX_TEXT_LEN   255
 

Functions

size_t random_fill_int16 (int8_t *buf, size_t num_elems)
 
size_t random_fill_int32 (int8_t *buf, size_t num_elems)
 
size_t random_fill_int64 (int8_t *buf, size_t num_elems, int64_t min, int64_t max)
 
size_t random_fill_int64 (int8_t *buf, size_t num_elems)
 
size_t random_fill_float (int8_t *buf, size_t num_elems)
 
size_t random_fill_double (int8_t *buf, size_t num_elems)
 
size_t random_fill_string (std::vector< std::string > &stringVec, size_t num_elems, int max_len, size_t &data_volumn)
 
size_t random_fill_int8array (std::vector< std::vector< int8_t >> &stringVec, size_t num_elems, int max_len, size_t &data_volumn)
 
size_t random_fill_int16array (std::vector< std::vector< int16_t >> &stringVec, size_t num_elems, int max_len, size_t &data_volumn)
 
size_t random_fill_int32array (std::vector< std::vector< int32_t >> &stringVec, size_t num_elems, int max_len, size_t &data_volumn)
 
size_t random_fill_dates (int8_t *buf, size_t num_elems)
 
size_t random_fill (const ColumnDescriptor *cd, DataBlockPtr p, size_t num_elems, size_t &data_volumn)
 
std::vector< size_t > populate_table_random (const std::string &table_name, const size_t num_rows, const Catalog &cat)
 

Detailed Description

Populate a table with random data.

Author
Wei Hong wei@m.nosp@m.ap-d.nosp@m..com Copyright (c) 2014 MapD Technologies, Inc. All rights reserved.

Definition in file PopulateTableRandom.cpp.

Macro Definition Documentation

◆ MAX_TEXT_LEN

#define MAX_TEXT_LEN   255

Definition at line 213 of file PopulateTableRandom.cpp.

Referenced by random_fill().

Function Documentation

◆ populate_table_random()

std::vector<size_t> populate_table_random ( const std::string &  table_name,
const size_t  num_rows,
const Catalog cat 
)

Definition at line 279 of file PopulateTableRandom.cpp.

References CHECK, Fragmenter_Namespace::InsertData::columnIds, Fragmenter_Namespace::InsertData::data, Fragmenter_Namespace::InsertData::databaseId, Catalog_Namespace::DBMetadata::dbId, measure< TimeT >::execution(), TableDescriptor::fragmenter, Catalog_Namespace::Catalog::getAllColumnMetadataForTable(), Catalog_Namespace::Catalog::getCurrentDB(), Catalog_Namespace::Catalog::getMetadataForTable(), Fragmenter_Namespace::AbstractFragmenter::insertData(), kENCODING_NONE, num_rows, Fragmenter_Namespace::InsertData::numRows, random_fill(), DataBlockPtr::stringsPtr, TableDescriptor::tableId, and Fragmenter_Namespace::InsertData::tableId.

Referenced by anonymous_namespace{StoragePerfTest.cpp}::load_data_for_thread_test_2(), anonymous_namespace{StoragePerfTest.cpp}::load_data_test(), anonymous_namespace{StorageTest.cpp}::simple_thread_wrapper(), and anonymous_namespace{StorageTest.cpp}::storage_test().

281  {
282  const TableDescriptor* td = cat.getMetadataForTable(table_name);
283  const auto cds = cat.getAllColumnMetadataForTable(td->tableId, false, false, false);
284  InsertData insert_data;
285  insert_data.databaseId = cat.getCurrentDB().dbId;
286  insert_data.tableId = td->tableId;
287  for (const auto& cd : cds) {
288  insert_data.columnIds.push_back(cd->columnId);
289  }
290  insert_data.numRows = num_rows;
291  std::vector<std::vector<int8_t>> numbers_vec;
292  std::vector<std::unique_ptr<std::vector<std::string>>> strings_vec;
293 
294  DataBlockPtr p{0};
295  // now allocate space for insert data
296  for (auto cd : cds) {
297  if (cd->columnType.is_varlen()) {
298  if (cd->columnType.get_compression() == kENCODING_NONE) {
299  strings_vec.push_back(std::make_unique<std::vector<std::string>>(num_rows));
300  p.stringsPtr = strings_vec.back().get();
301  } else {
302  CHECK(false);
303  }
304  } else {
305  numbers_vec.emplace_back(num_rows * cd->columnType.get_logical_size());
306  p.numbersPtr = numbers_vec.back().data();
307  }
308  insert_data.data.push_back(p);
309  }
310 
311  // fill InsertData with random data
312  std::vector<size_t> col_hashs(
313  cds.size()); // compute one hash per column for the generated data
314  int i = 0;
315  size_t data_volumn = 0;
316  for (auto cd : cds) {
317  col_hashs[i] = random_fill(cd, insert_data.data[i], num_rows, data_volumn);
318  i++;
319  }
320 
321  // now load the data into table
322  auto ms = measure<>::execution([&]() { td->fragmenter->insertData(insert_data); });
323  std::cout << "Loaded " << num_rows << " rows " << data_volumn << " bytes in " << ms
324  << " ms. at " << (double)data_volumn / (ms / 1000.0) / 1e6 << " MB/sec."
325  << std::endl;
326 
327  return col_hashs;
328 }
const int8_t const int64_t * num_rows
std::vector< std::string > * stringsPtr
Definition: sqltypes.h:138
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
size_t random_fill(const ColumnDescriptor *cd, DataBlockPtr p, size_t num_elems, size_t &data_volumn)
int tableId
identifies the database into which the data is being inserted
Definition: Fragmenter.h:61
size_t numRows
a vector of column ids for the row(s) being inserted
Definition: Fragmenter.h:63
const DBMetadata & getCurrentDB() const
Definition: Catalog.h:176
std::vector< DataBlockPtr > data
the number of rows being inserted
Definition: Fragmenter.h:64
std::list< const ColumnDescriptor * > getAllColumnMetadataForTable(const int tableId, const bool fetchSystemColumns, const bool fetchVirtualColumns, const bool fetchPhysicalColumns) const
Returns a list of pointers to constant ColumnDescriptor structs for all the columns from a particular...
Definition: Catalog.cpp:1579
#define CHECK(condition)
Definition: Logger.h:187
The data to be inserted using the fragment manager.
Definition: Fragmenter.h:59
static TimeT::rep execution(F func, Args &&... args)
Definition: sample.cpp:29
specifies the content in-memory of a row in the table metadata table
Fragmenter_Namespace::AbstractFragmenter * fragmenter
std::vector< int > columnIds
identifies the table into which the data is being inserted
Definition: Fragmenter.h:62
virtual void insertData(InsertData &insertDataStruct)=0
Given data wrapped in an InsertData struct, inserts it into the correct partitions with locks and che...
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ random_fill()

size_t random_fill ( const ColumnDescriptor cd,
DataBlockPtr  p,
size_t  num_elems,
size_t &  data_volumn 
)

Definition at line 215 of file PopulateTableRandom.cpp.

References ColumnDescriptor::columnType, SQLTypeInfoCore< TYPE_FACET_PACK >::get_compression(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_dimension(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_precision(), SQLTypeInfoCore< TYPE_FACET_PACK >::get_type(), kBIGINT, kCHAR, kDATE, kDECIMAL, kDOUBLE, kENCODING_NONE, kFLOAT, kINT, kNUMERIC, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kVARCHAR, MAX_TEXT_LEN, DataBlockPtr::numbersPtr, random_fill_dates(), random_fill_double(), random_fill_float(), random_fill_int16(), random_fill_int32(), random_fill_int64(), random_fill_string(), and DataBlockPtr::stringsPtr.

Referenced by populate_table_random().

218  {
219  size_t hash = 0;
220  switch (cd->columnType.get_type()) {
221  case kSMALLINT:
222  hash = random_fill_int16(p.numbersPtr, num_elems);
223  data_volumn += num_elems * sizeof(int16_t);
224  break;
225  case kINT:
226  hash = random_fill_int32(p.numbersPtr, num_elems);
227  data_volumn += num_elems * sizeof(int32_t);
228  break;
229  case kBIGINT:
230  hash = random_fill_int64(p.numbersPtr, num_elems, INT64_MIN, INT64_MAX);
231  data_volumn += num_elems * sizeof(int64_t);
232  break;
233  case kNUMERIC:
234  case kDECIMAL: {
235  int64_t max = std::pow((double)10, cd->columnType.get_precision());
236  int64_t min = -max;
237  hash = random_fill_int64(p.numbersPtr, num_elems, min, max);
238  data_volumn += num_elems * sizeof(int64_t);
239  break;
240  }
241  case kFLOAT:
242  hash = random_fill_float(p.numbersPtr, num_elems);
243  data_volumn += num_elems * sizeof(float);
244  break;
245  case kDOUBLE:
246  hash = random_fill_double(p.numbersPtr, num_elems);
247  data_volumn += num_elems * sizeof(double);
248  break;
249  case kVARCHAR:
250  case kCHAR:
252  {
253  hash = random_fill_string(
254  *p.stringsPtr, num_elems, cd->columnType.get_dimension(), data_volumn);
255  }
256  }
257  break;
258  case kTEXT:
260  {
261  hash = random_fill_string(*p.stringsPtr, num_elems, MAX_TEXT_LEN, data_volumn);
262  }
263  }
264  break;
265  case kDATE:
266  case kTIME:
267  case kTIMESTAMP:
268  hash = cd->columnType.get_type() == kDATE
269  ? random_fill_dates(p.numbersPtr, num_elems)
270  : random_fill_int64(p.numbersPtr, num_elems);
271  data_volumn += num_elems * sizeof(int64_t);
272  break;
273  default:
274  assert(false);
275  }
276  return hash;
277 }
int get_precision() const
Definition: sqltypes.h:326
HOST DEVICE int get_dimension() const
Definition: sqltypes.h:325
Definition: sqltypes.h:51
std::vector< std::string > * stringsPtr
Definition: sqltypes.h:138
size_t random_fill_int16(int8_t *buf, size_t num_elems)
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:323
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:331
size_t random_fill_int32(int8_t *buf, size_t num_elems)
size_t random_fill_float(int8_t *buf, size_t num_elems)
Definition: sqltypes.h:54
Definition: sqltypes.h:55
size_t random_fill_int64(int8_t *buf, size_t num_elems, int64_t min, int64_t max)
size_t random_fill_double(int8_t *buf, size_t num_elems)
Definition: sqltypes.h:43
size_t random_fill_dates(int8_t *buf, size_t num_elems)
Definition: sqltypes.h:47
SQLTypeInfo columnType
size_t random_fill_string(std::vector< std::string > &stringVec, size_t num_elems, int max_len, size_t &data_volumn)
int8_t * numbersPtr
Definition: sqltypes.h:137
#define MAX_TEXT_LEN
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ random_fill_dates()

size_t random_fill_dates ( int8_t *  buf,
size_t  num_elems 
)

Definition at line 199 of file PopulateTableRandom.cpp.

References DateConverters::get_epoch_days_from_seconds(), and anonymous_namespace{ProfileTest.cpp}::hash_combine().

Referenced by random_fill().

199  {
200  constexpr int64_t kDateMin = -185542587187200;
201  constexpr int64_t kDateMax = 185542587100800;
202  std::default_random_engine gen;
203  std::uniform_int_distribution<int64_t> dist(kDateMin, kDateMax);
204  auto p = reinterpret_cast<int64_t*>(buf);
205  size_t hash = 0;
206  for (size_t i = 0; i < num_elems; i++) {
207  p[i] = dist(gen);
209  }
210  return hash;
211 }
void hash_combine(std::size_t &seed, T const &v)
int64_t get_epoch_days_from_seconds(const int64_t seconds)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ random_fill_double()

size_t random_fill_double ( int8_t *  buf,
size_t  num_elems 
)

Definition at line 99 of file PopulateTableRandom.cpp.

References anonymous_namespace{ProfileTest.cpp}::hash_combine().

Referenced by random_fill().

99  {
100  std::default_random_engine gen;
101  std::uniform_real_distribution<double> dist(DBL_MIN, DBL_MAX);
102  auto p = reinterpret_cast<double*>(buf);
103  size_t hash = 0;
104  for (size_t i = 0; i < num_elems; i++) {
105  p[i] = dist(gen);
106  boost::hash_combine(hash, p[i]);
107  }
108  return hash;
109 }
void hash_combine(std::size_t &seed, T const &v)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ random_fill_float()

size_t random_fill_float ( int8_t *  buf,
size_t  num_elems 
)

Definition at line 87 of file PopulateTableRandom.cpp.

References anonymous_namespace{ProfileTest.cpp}::hash_combine().

Referenced by random_fill().

87  {
88  std::default_random_engine gen;
89  std::uniform_real_distribution<float> dist(FLT_MIN, FLT_MAX);
90  auto p = reinterpret_cast<float*>(buf);
91  size_t hash = 0;
92  for (size_t i = 0; i < num_elems; i++) {
93  p[i] = dist(gen);
94  boost::hash_combine(hash, p[i]);
95  }
96  return hash;
97 }
void hash_combine(std::size_t &seed, T const &v)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ random_fill_int16()

size_t random_fill_int16 ( int8_t *  buf,
size_t  num_elems 
)

Definition at line 47 of file PopulateTableRandom.cpp.

References anonymous_namespace{ProfileTest.cpp}::hash_combine().

Referenced by random_fill().

47  {
48  std::default_random_engine gen;
49  std::uniform_int_distribution<int16_t> dist(INT16_MIN, INT16_MAX);
50  auto p = reinterpret_cast<int16_t*>(buf);
51  size_t hash = 0;
52  for (size_t i = 0; i < num_elems; i++) {
53  p[i] = dist(gen);
54  boost::hash_combine(hash, p[i]);
55  }
56  return hash;
57 }
void hash_combine(std::size_t &seed, T const &v)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ random_fill_int16array()

size_t random_fill_int16array ( std::vector< std::vector< int16_t >> &  stringVec,
size_t  num_elems,
int  max_len,
size_t &  data_volumn 
)

Definition at line 157 of file PopulateTableRandom.cpp.

References anonymous_namespace{ProfileTest.cpp}::hash_combine().

160  {
161  std::default_random_engine gen;
162  std::uniform_int_distribution<int16_t> dist(INT16_MIN, INT16_MAX);
163  std::uniform_int_distribution<> len_dist(0, max_len / 2);
164  size_t hash = 0;
165  for (size_t n = 0; n < num_elems; n++) {
166  int len = len_dist(gen);
167  std::vector<int16_t> s(len);
168  for (int i = 0; i < len; i++) {
169  s[i] = dist(gen);
170  boost::hash_combine(hash, s[i]);
171  }
172  stringVec[n] = s;
173  data_volumn += len * sizeof(int16_t);
174  }
175  return hash;
176 }
void hash_combine(std::size_t &seed, T const &v)
+ Here is the call graph for this function:

◆ random_fill_int32()

size_t random_fill_int32 ( int8_t *  buf,
size_t  num_elems 
)

Definition at line 59 of file PopulateTableRandom.cpp.

References anonymous_namespace{ProfileTest.cpp}::hash_combine().

Referenced by random_fill().

59  {
60  std::default_random_engine gen;
61  std::uniform_int_distribution<int32_t> dist(INT32_MIN, INT32_MAX);
62  auto p = reinterpret_cast<int32_t*>(buf);
63  size_t hash = 0;
64  for (size_t i = 0; i < num_elems; i++) {
65  p[i] = dist(gen);
66  boost::hash_combine(hash, p[i]);
67  }
68  return hash;
69 }
void hash_combine(std::size_t &seed, T const &v)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ random_fill_int32array()

size_t random_fill_int32array ( std::vector< std::vector< int32_t >> &  stringVec,
size_t  num_elems,
int  max_len,
size_t &  data_volumn 
)

Definition at line 178 of file PopulateTableRandom.cpp.

References anonymous_namespace{ProfileTest.cpp}::hash_combine().

181  {
182  std::default_random_engine gen;
183  std::uniform_int_distribution<int32_t> dist(INT32_MIN, INT32_MAX);
184  std::uniform_int_distribution<> len_dist(0, max_len / 4);
185  size_t hash = 0;
186  for (size_t n = 0; n < num_elems; n++) {
187  int len = len_dist(gen);
188  std::vector<int32_t> s(len);
189  for (int i = 0; i < len; i++) {
190  s[i] = dist(gen);
191  boost::hash_combine(hash, s[i]);
192  }
193  stringVec[n] = s;
194  data_volumn += len * sizeof(int32_t);
195  }
196  return hash;
197 }
void hash_combine(std::size_t &seed, T const &v)
+ Here is the call graph for this function:

◆ random_fill_int64() [1/2]

size_t random_fill_int64 ( int8_t *  buf,
size_t  num_elems,
int64_t  min,
int64_t  max 
)

Definition at line 71 of file PopulateTableRandom.cpp.

References anonymous_namespace{ProfileTest.cpp}::hash_combine().

Referenced by random_fill(), and random_fill_int64().

71  {
72  std::default_random_engine gen;
73  std::uniform_int_distribution<int64_t> dist(min, max);
74  auto p = reinterpret_cast<int64_t*>(buf);
75  size_t hash = 0;
76  for (size_t i = 0; i < num_elems; i++) {
77  p[i] = dist(gen);
78  boost::hash_combine(hash, p[i]);
79  }
80  return hash;
81 }
void hash_combine(std::size_t &seed, T const &v)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ random_fill_int64() [2/2]

size_t random_fill_int64 ( int8_t *  buf,
size_t  num_elems 
)

Definition at line 83 of file PopulateTableRandom.cpp.

References random_fill_int64().

83  {
84  return random_fill_int64(buf, num_elems, INT64_MIN, INT64_MAX);
85 }
size_t random_fill_int64(int8_t *buf, size_t num_elems, int64_t min, int64_t max)
+ Here is the call graph for this function:

◆ random_fill_int8array()

size_t random_fill_int8array ( std::vector< std::vector< int8_t >> &  stringVec,
size_t  num_elems,
int  max_len,
size_t &  data_volumn 
)

Definition at line 136 of file PopulateTableRandom.cpp.

References anonymous_namespace{ProfileTest.cpp}::hash_combine().

139  {
140  std::default_random_engine gen;
141  std::uniform_int_distribution<int8_t> dist(INT8_MIN, INT8_MAX);
142  std::uniform_int_distribution<> len_dist(0, max_len);
143  size_t hash = 0;
144  for (size_t n = 0; n < num_elems; n++) {
145  int len = len_dist(gen);
146  std::vector<int8_t> s(len);
147  for (int i = 0; i < len; i++) {
148  s[i] = dist(gen);
149  boost::hash_combine(hash, s[i]);
150  }
151  stringVec[n] = s;
152  data_volumn += len * sizeof(int8_t);
153  }
154  return hash;
155 }
void hash_combine(std::size_t &seed, T const &v)
+ Here is the call graph for this function:

◆ random_fill_string()

size_t random_fill_string ( std::vector< std::string > &  stringVec,
size_t  num_elems,
int  max_len,
size_t &  data_volumn 
)

Definition at line 111 of file PopulateTableRandom.cpp.

References anonymous_namespace{ProfileTest.cpp}::hash_combine().

Referenced by random_fill().

114  {
115  std::string chars("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890");
116  std::default_random_engine gen;
117  std::uniform_int_distribution<> char_dist(0, chars.size() - 1);
118  std::uniform_int_distribution<> len_dist(0, max_len);
119  size_t hash = 0;
120  std::hash<std::string> string_hash;
121  for (size_t n = 0; n < num_elems; n++) {
122  int len = len_dist(gen);
123  std::string s(len, ' ');
124  for (int i = 0; i < len; i++) {
125  {
126  s[i] = chars[char_dist(gen)];
127  }
128  }
129  stringVec[n] = s;
130  boost::hash_combine(hash, string_hash(s));
131  data_volumn += len;
132  }
133  return hash;
134 }
void hash_combine(std::size_t &seed, T const &v)
+ Here is the call graph for this function:
+ Here is the caller graph for this function: