OmniSciDB  5ade3759e0
DataGen.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
33 #include <cfloat>
34 #include <cstdint>
35 #include <cstdlib>
36 #include <cstring>
37 #include <ctime>
38 #include <iostream>
39 #include <random>
40 #include <string>
41 
42 // include files for Thrift and MapD Thrift Services
43 #include <thrift/protocol/TBinaryProtocol.h>
44 #include <thrift/transport/TBufferTransports.h>
45 #include <thrift/transport/TSocket.h>
46 #include "gen-cpp/MapD.h"
47 
48 using namespace ::apache::thrift;
49 using namespace ::apache::thrift::protocol;
50 using namespace ::apache::thrift::transport;
51 
52 #ifdef HAVE_THRIFT_STD_SHAREDPTR
53 #include <memory>
54 namespace mapd {
55 using std::make_shared;
56 using std::shared_ptr;
57 } // namespace mapd
58 #else
59 #include <boost/make_shared.hpp>
60 namespace mapd {
61 using boost::make_shared;
62 using boost::shared_ptr;
63 } // namespace mapd
64 #endif // HAVE_THRIFT_STD_SHAREDPTR
65 
66 namespace {
67 // anonymous namespace for private functions
68 std::default_random_engine random_gen(std::random_device{}());
69 
70 // returns a random int as string
71 std::string gen_int() {
72  std::uniform_int_distribution<int> dist(INT_MIN, INT_MAX);
73  return std::to_string(dist(random_gen));
74 }
75 
76 // returns a random float as string
77 std::string gen_real() {
78  std::uniform_real_distribution<float> dist(0.0, 1.0);
79  return std::to_string(dist(random_gen));
80 }
81 
82 const int max_str_len = 100;
83 
84 // returns a random string of length up to max_str_len
85 std::string gen_string() {
86  std::string chars("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890");
87  std::uniform_int_distribution<> char_dist(0, chars.size() - 1);
88  std::uniform_int_distribution<> len_dist(0, max_str_len);
89  int len = len_dist(random_gen);
90  std::string s(len, ' ');
91  for (int i = 0; i < len; i++) {
92  s[i] = chars[char_dist(random_gen)];
93  }
94  return s;
95 }
96 
97 // returns a random boolean as string
98 std::string gen_bool() {
99  std::uniform_int_distribution<int> dist(0, 1);
100  if (dist(random_gen) == 1) {
101  return "t";
102  }
103  return "f";
104 }
105 
106 // returns a random time as string
107 std::string gen_time() {
108  std::uniform_int_distribution<int> dist(0, INT32_MAX);
109  time_t t = dist(random_gen);
110  std::tm* tm_ptr = gmtime(&t);
111  char buf[9];
112  strftime(buf, 9, "%T", tm_ptr);
113  return buf;
114 }
115 
116 // returns a random timestamp as string
117 std::string gen_timestamp() {
118  std::uniform_int_distribution<int> dist(0, INT32_MAX);
119  time_t t = dist(random_gen);
120  std::tm* tm_ptr = gmtime(&t);
121  char buf[20];
122  strftime(buf, 20, "%F %T", tm_ptr);
123  return buf;
124 }
125 
126 // returns a random date as string
127 std::string gen_date() {
128  std::uniform_int_distribution<int> dist(0, INT32_MAX);
129  time_t t = dist(random_gen);
130  std::tm* tm_ptr = gmtime(&t);
131  char buf[11];
132  strftime(buf, 11, "%F", tm_ptr);
133  return buf;
134 }
135 
136 // output to std::cout num_rows number of rows conforming to row_desc.
137 // each column value is separated by delimiter.
138 void data_gen(const TRowDescriptor& row_desc, const char* delimiter, int num_rows) {
139  for (int i = 0; i < num_rows; i++) {
140  bool not_first = false;
141  for (auto p = row_desc.begin(); p != row_desc.end(); ++p) {
142  if (not_first) {
143  std::cout << delimiter;
144  } else {
145  not_first = true;
146  }
147  switch (p->col_type.type) {
148  case TDatumType::SMALLINT:
149  case TDatumType::INT:
150  case TDatumType::BIGINT:
151  std::cout << gen_int();
152  break;
153  case TDatumType::FLOAT:
154  case TDatumType::DOUBLE:
155  case TDatumType::DECIMAL:
156  std::cout << gen_real();
157  break;
158  case TDatumType::STR:
159  std::cout << gen_string();
160  break;
161  case TDatumType::TIME:
162  std::cout << gen_time();
163  break;
164  case TDatumType::TIMESTAMP:
165  case TDatumType::INTERVAL_DAY_TIME:
166  case TDatumType::INTERVAL_YEAR_MONTH:
167  std::cout << gen_timestamp();
168  break;
169  case TDatumType::DATE:
170  std::cout << gen_date();
171  break;
172  case TDatumType::BOOL:
173  std::cout << gen_bool();
174  break;
175  default:
176  std::cout << "???";
177  break;
178  }
179  }
180  std::cout << std::endl;
181  }
182 }
183 } // namespace
184 
185 int main(int argc, char** argv) {
186  std::string server_host("localhost"); // default to localhost
187  int port = 6274; // default port number
188  int num_rows = 1000000; // default number of rows to generate
189  const char* delimiter = "\t"; // only support tab delimiter for now
190 
191  if (argc < 5) {
192  std::cout
193  << "Usage: <table> <database> <user> <password> [<num rows>] [hostname[:port]]"
194  << std::endl;
195  return 1;
196  }
197  std::string table_name(argv[1]);
198  std::string db_name(argv[2]);
199  std::string user_name(argv[3]);
200  std::string passwd(argv[4]);
201 
202  if (argc >= 6) {
203  num_rows = atoi(argv[5]);
204  if (argc >= 7) {
205  char* host = strtok(argv[6], ":");
206  char* portno = strtok(NULL, ":");
207  server_host = host;
208  if (portno != NULL) {
209  port = atoi(portno);
210  }
211  }
212  }
213 
214  mapd::shared_ptr<TTransport> socket(new TSocket(server_host, port));
215  mapd::shared_ptr<TTransport> transport(new TBufferedTransport(socket));
216  mapd::shared_ptr<TProtocol> protocol(new TBinaryProtocol(transport));
217  MapDClient client(protocol);
218  TSessionId session;
219  try {
220  transport->open(); // open transport
221  client.connect(session, user_name, passwd, db_name); // connect to omnisci_server
222  TTableDetails table_details;
223  client.get_table_details(table_details, session, table_name);
224  data_gen(table_details.row_desc, delimiter, num_rows);
225  client.disconnect(session); // disconnect from omnisci_server
226  transport->close(); // close transport
227  } catch (TMapDException& e) {
228  std::cerr << e.error_msg << std::endl;
229  return 1;
230  } catch (TException& te) {
231  std::cerr << "Thrift error: " << te.what() << std::endl;
232  return 1;
233  }
234 
235  return 0;
236 }
int main(int argc, char **argv)
Definition: DataGen.cpp:185
const int8_t const int64_t * num_rows
void data_gen(const TRowDescriptor &row_desc, const char *delimiter, int num_rows)
Definition: DataGen.cpp:138
Definition: DataGen.cpp:60
std::default_random_engine random_gen(std::random_device{}())
std::string to_string(char const *&&v)
mapd::shared_ptr< MapDClient > client
TSessionId session