OmniSciDB  04ee39c94c
StoragePerfTest.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <boost/functional/hash.hpp>
18 #include <csignal>
19 #include <cstdlib>
20 #include <cstring>
21 #include <exception>
22 #include <future>
23 #include <iostream>
24 #include <memory>
25 #include <string>
26 #include <thread>
27 #include "../Analyzer/Analyzer.h"
28 #include "../Catalog/Catalog.h"
29 #include "../DataMgr/DataMgr.h"
30 #include "../Fragmenter/Fragmenter.h"
31 #include "../Parser/ParserNode.h"
32 #include "../Parser/parser.h"
33 #include "../QueryRunner/QueryRunner.h"
34 #include "PopulateTableRandom.h"
35 #include "ScanTable.h"
36 #include "Shared/MapDParameters.h"
37 #include "TestHelpers.h"
38 #include "boost/filesystem.hpp"
39 #include "boost/program_options.hpp"
40 #include "gtest/gtest.h"
41 
42 using namespace std;
43 using namespace Catalog_Namespace;
44 using namespace Analyzer;
45 using namespace Fragmenter_Namespace;
46 
47 #ifndef BASE_PATH
48 #define BASE_PATH "./tmp"
49 #endif
50 
52 
53 namespace {
54 
55 inline void run_ddl_statement(const string& input_str) {
56  QR::get()->runDDLStatement(input_str);
57 }
58 
59 bool load_data_test(string table_name, size_t num_rows) {
60  vector<size_t> insert_col_hashs =
61  populate_table_random(table_name, num_rows, *QR::get()->getCatalog());
62  return true;
63 }
64 
65 #define SMALL 10000000 // - 10M
66 #define LARGE 100000000 // - 100M
67 
68 static size_t load_data_for_thread_test_2(int num_rows, string table_name) {
69  int initial_num_rows, num_rows_step;
70  initial_num_rows = num_rows_step = SMALL / 2; // insert 5M rows per iteration
71  vector<size_t> insert_col_hashs;
72 
73  if (num_rows <
74  initial_num_rows) { // to handle special case when only few rows should be added
75  insert_col_hashs =
76  populate_table_random(table_name, num_rows, *QR::get()->getCatalog());
77  } else {
78  for (int cur_num_rows = initial_num_rows; cur_num_rows <= num_rows;
79  cur_num_rows += num_rows_step) {
80  if (cur_num_rows == num_rows) {
81  insert_col_hashs =
82  populate_table_random(table_name, num_rows_step, *QR::get()->getCatalog());
83  } else {
84  populate_table_random(table_name, num_rows_step, *QR::get()->getCatalog());
85  }
86  }
87  }
88  return insert_col_hashs.size();
89 }
90 
91 } // namespace
92 
93 TEST(DataLoad, Numbers) {
94  ASSERT_NO_THROW(run_ddl_statement("drop table if exists numbers;"););
95  ASSERT_NO_THROW(
97  "create table numbers (a smallint, b int, c bigint, d numeric(17,3), e "
98  "double, f float);"););
99  EXPECT_TRUE(load_data_test("numbers", LARGE));
100  ASSERT_NO_THROW(run_ddl_statement("drop table numbers;"););
101 }
102 
103 TEST(DataLoad, Strings) {
104  ASSERT_NO_THROW(run_ddl_statement("drop table if exists strings;"););
105  ASSERT_NO_THROW(run_ddl_statement("create table strings (x varchar(10), y text);"););
106  EXPECT_TRUE(load_data_test("strings", SMALL));
107  ASSERT_NO_THROW(run_ddl_statement("drop table strings;"););
108 }
109 
110 TEST(StorageSmall, AllTypes) {
111  ASSERT_NO_THROW(run_ddl_statement("drop table if exists alltypes;"););
112  ASSERT_NO_THROW(
113  run_ddl_statement("create table alltypes (a smallint, b int, c bigint, d "
114  "numeric(17,3), e double, f float, "
115  "g timestamp(0), h time(0), i date, x varchar(10), y text);"););
116  EXPECT_TRUE(load_data_test("alltypes", SMALL));
117  ASSERT_NO_THROW(run_ddl_statement("drop table alltypes;"););
118 }
119 
120 TEST(DataLoad, Numbers_Parallel_Load) {
121  ASSERT_NO_THROW(run_ddl_statement("drop table if exists numbers_1;"););
122  ASSERT_NO_THROW(run_ddl_statement("drop table if exists numbers_2;"););
123  ASSERT_NO_THROW(run_ddl_statement("drop table if exists numbers_3;"););
124  ASSERT_NO_THROW(run_ddl_statement("drop table if exists numbers_4;"););
125  ASSERT_NO_THROW(run_ddl_statement("drop table if exists numbers_5;"););
126 
127  /* create tables in single thread */
128  ASSERT_NO_THROW(
130  "create table numbers_1 (a smallint, b int, c bigint, d numeric(17,3), e "
131  "double, f float);"););
132  ASSERT_NO_THROW(
134  "create table numbers_2 (a smallint, b int, c bigint, d numeric(17,3), e "
135  "double, f float);"););
136  ASSERT_NO_THROW(
138  "create table numbers_3 (a smallint, b int, c bigint, d numeric(17,3), e "
139  "double, f float);"););
140  ASSERT_NO_THROW(
142  "create table numbers_4 (a smallint, b int, c bigint, d numeric(17,3), e "
143  "double, f float);"););
144  ASSERT_NO_THROW(
146  "create table numbers_5 (a smallint, b int, c bigint, d numeric(17,3), e "
147  "double, f float);"););
148 
149  /* load data into tables using parallel threads */
150  int numThreads = 5;
151  vector<string> db_table;
152  std::vector<std::future<size_t>> threads;
153  string table_name("numbers_");
154 
155  int num_rows = SMALL;
156  for (int i = 1; i <= numThreads; i++) {
157  int num_table_rows = num_rows * (numThreads - i + 1);
158  db_table.push_back(table_name + to_string(i));
159  threads.push_back(std::async(std::launch::async,
161  num_table_rows,
162  db_table[i - 1]));
163  }
164 
165  for (auto& p : threads) {
166  int num_columns_inserted = (int)p.get();
167  ASSERT_EQ(num_columns_inserted, 6); // each table was created with 6 columns
168  }
169 
170  /* delete tables in single thread */
171  ASSERT_NO_THROW(run_ddl_statement("drop table numbers_1;"););
172  ASSERT_NO_THROW(run_ddl_statement("drop table numbers_2;"););
173  ASSERT_NO_THROW(run_ddl_statement("drop table numbers_3;"););
174  ASSERT_NO_THROW(run_ddl_statement("drop table numbers_4;"););
175  ASSERT_NO_THROW(run_ddl_statement("drop table numbers_5;"););
176 }
177 
178 TEST(DataLoad, NumbersTable_Parallel_CreateDropTable) {
179  ASSERT_NO_THROW(run_ddl_statement("drop table if exists numbers_1;"););
180  ASSERT_NO_THROW(run_ddl_statement("drop table if exists numbers_2;"););
181  ASSERT_NO_THROW(run_ddl_statement("drop table if exists numbers_3;"););
182  ASSERT_NO_THROW(run_ddl_statement("drop table if exists numbers_4;"););
183  ASSERT_NO_THROW(run_ddl_statement("drop table if exists numbers_5;"););
184 
185  /* create tables in single thread */
186  ASSERT_NO_THROW(
188  "create table numbers_1 (a smallint, b int, c bigint, d numeric(17,3), e "
189  "double, f float);"););
190  ASSERT_NO_THROW(
192  "create table numbers_2 (a smallint, b int, c bigint, d numeric(17,3), e "
193  "double, f float);"););
194  ASSERT_NO_THROW(
196  "create table numbers_3 (a smallint, b int, c bigint, d numeric(17,3), e "
197  "double, f float);"););
198  ASSERT_NO_THROW(
200  "create table numbers_4 (a smallint, b int, c bigint, d numeric(17,3), e "
201  "double, f float);"););
202  ASSERT_NO_THROW(
204  "create table numbers_5 (a smallint, b int, c bigint, d numeric(17,3), e "
205  "double, f float);"););
206 
207  /* Load table numbers_4 with data in the main thread, so it will be available for sure
208  * when drop_table on it will be executed later. Don't use new thread for loading table
209  * numbers_4 (see commented out), as one can't be sure that this action in the
210  * new/independent thread will be completed before executing drop_table in the main
211  * thread. It's enough to load just 1 row of data in the table numbers_4 to make sure it
212  * exists in the storage layer.
213  *
214  * threads.push_back(std::async(std::launch::async, load_data_for_thread_test_4, 1,
215  * table_name_temp));
216  */
217  string table_name("numbers_");
218  string table_name_temp(table_name + to_string(4));
219  EXPECT_TRUE(load_data_test(table_name_temp, 1));
220 
221  /* load data into tables numbers_1/2/3/5 using parallel threads */
222  int numThreads = 5;
223  vector<string> db_table;
224  std::vector<std::future<size_t>> threads;
225  int num_rows = SMALL;
226  for (int i = 1; i <= numThreads; i++) {
227  int num_table_rows = num_rows * (numThreads - i + 1);
228  db_table.push_back(table_name + to_string(i));
229  if (i == 4) {
230  continue; // table numbers_4 has been loaded already
231  }
232  threads.push_back(std::async(std::launch::async,
234  num_table_rows,
235  db_table[i - 1]));
236  }
237 
238  /* drop table numbers_4 while loading other tables in independent threads */
239  ASSERT_NO_THROW(run_ddl_statement("drop table numbers_4;"););
240 
241  /* create table numbers_6 and load it with data */
242  ASSERT_NO_THROW(
244  "create table numbers_6 (a smallint, b int, c bigint, d numeric(17,3), e "
245  "double, f float);"););
246  int num_table_rows = SMALL;
247  db_table.push_back(table_name + to_string(6));
248  threads.push_back(std::async(
249  std::launch::async, load_data_for_thread_test_2, num_table_rows, db_table[5]));
250 
251  for (auto& p : threads) {
252  int num_columns_inserted = (int)p.get();
253  ASSERT_EQ(num_columns_inserted, 6); // each table was created with 6 columns
254  }
255 
256  /* delete tables in single thread */
257  ASSERT_NO_THROW(run_ddl_statement("drop table numbers_1;"););
258  ASSERT_NO_THROW(run_ddl_statement("drop table numbers_2;"););
259  ASSERT_NO_THROW(run_ddl_statement("drop table numbers_3;"););
260  // ASSERT_NO_THROW(run_ddl_statement("drop table numbers_4;"););
261  ASSERT_NO_THROW(run_ddl_statement("drop table numbers_5;"););
262  ASSERT_NO_THROW(run_ddl_statement("drop table numbers_6;"););
263 }
264 
265 TEST(DataLoad, NumbersTable_Parallel_CreateDropCreateTable_InsertRows) {
266  ASSERT_NO_THROW(run_ddl_statement("drop table if exists numbers_1;"););
267  ASSERT_NO_THROW(run_ddl_statement("drop table if exists numbers_2;"););
268  ASSERT_NO_THROW(run_ddl_statement("drop table if exists numbers_3;"););
269 
270  /* create tables in single thread */
271  ASSERT_NO_THROW(
273  "create table numbers_1 (a smallint, b int, c bigint, d numeric(17,3), e "
274  "double, f float);"););
275  ASSERT_NO_THROW(
277  "create table numbers_2 (a smallint, b int, c bigint, d numeric(17,3), e "
278  "double, f float);"););
279  ASSERT_NO_THROW(
281  "create table numbers_3 (a smallint, b int, c bigint, d numeric(17,3), e "
282  "double, f float);"););
283  ASSERT_NO_THROW(
285  "create table numbers_4 (a smallint, b int, c bigint, d numeric(17,3), e "
286  "double, f float);"););
287  ASSERT_NO_THROW(
289  "create table numbers_5 (a smallint, b int, c bigint, d numeric(17,3), e "
290  "double, f float);"););
291 
292  /* Load table numbers_2 with data in the main thread, so it will be available for sure
293  * when drop_table on it will be executed later. Don't use new thread for loading table
294  * numbers_2 (see commented out), as one can't be sure that this action in the
295  * new/independent thread will be completed before executing drop_table in the main
296  * thread. It's enough to load just 1 row of data in the table numbers_2 to make sure it
297  * exists in the storage layer.
298  *
299  * threads.push_back(std::async(std::launch::async, load_data_for_thread_test_2, 1,
300  * table_name_temp));
301  */
302  string table_name("numbers_");
303  string table_name_temp(table_name + to_string(2));
304  EXPECT_TRUE(load_data_test(table_name_temp, 1));
305 
306  /* load data into tables numbers_1/3/4/5 using parallel threads */
307  int numThreads = 5;
308  vector<string> db_table;
309  std::vector<std::future<size_t>> threads;
310 
311  int num_rows = SMALL;
312  for (int i = 1; i <= numThreads; i++) {
313  int num_table_rows = num_rows * (numThreads - i + 1);
314  db_table.push_back(table_name + to_string(i));
315  if (i == 2) {
316  continue; // table numbers_2 has been loaded already
317  }
318  threads.push_back(std::async(std::launch::async,
320  num_table_rows,
321  db_table[i - 1]));
322  }
323 
324  /* drop table numbers_2 while loading other tables in independent threads */
325  ASSERT_NO_THROW(run_ddl_statement("drop table numbers_2;"););
326 
327  /* create table numbers_6 and load it with data */
328  ASSERT_NO_THROW(
330  "create table numbers_6 (a smallint, b int, c bigint, d numeric(17,3), e "
331  "double, f float);"););
332  int num_table_rows = SMALL;
333  db_table.push_back(table_name + to_string(6));
334  threads.push_back(std::async(
335  std::launch::async, load_data_for_thread_test_2, num_table_rows, db_table[5]));
336 
337  /* recreate table numbers_2, this table will have new tb_id which will be different from
338  * the tb_id of dropped table numbers_2;
339  * this is true when new table's schema is same and/or is different than the one for the
340  * dropped table.
341  */
342  ASSERT_NO_THROW(
343  run_ddl_statement("create table numbers_2 (e "
344  "double, f double, g double, h double, i double, j double);"););
345  /* insert rows in table numbers_2, this table have been dropped and recreated, so data
346  * can be loaded */
347  int num_rows_for_dropped_table = SMALL * 2;
348  threads.push_back(std::async(std::launch::async,
350  num_rows_for_dropped_table,
351  table_name_temp));
352 
353  for (auto& p : threads) {
354  int num_columns_inserted = (int)p.get();
355  ASSERT_EQ(num_columns_inserted, 6); // each table was created with 6 columns
356  }
357 
358  /* delete tables in single thread */
359  ASSERT_NO_THROW(run_ddl_statement("drop table numbers_1;"););
360  ASSERT_NO_THROW(run_ddl_statement("drop table numbers_2;"););
361  ASSERT_NO_THROW(run_ddl_statement("drop table numbers_3;"););
362  ASSERT_NO_THROW(run_ddl_statement("drop table numbers_4;"););
363  ASSERT_NO_THROW(run_ddl_statement("drop table numbers_5;"););
364  ASSERT_NO_THROW(run_ddl_statement("drop table numbers_6;"););
365 }
366 
367 int main(int argc, char* argv[]) {
369  ::testing::InitGoogleTest(&argc, argv);
370 
372 
373  int err{0};
374  try {
375  err = RUN_ALL_TESTS();
376  } catch (const std::exception& e) {
377  LOG(ERROR) << e.what();
378  }
379  QR::reset();
380  return err;
381 }
int main(int argc, char *argv[])
const int8_t const int64_t * num_rows
#define LOG(tag)
Definition: Logger.h:182
Populate a table with random data.
Scan through each column of a table via Chunk iterators.
std::string to_string(char const *&&v)
static QueryRunner * init(const char *db_path, const std::string &udf_filename="", const size_t max_gpu_mem=0, const int reserved_gpu_mem=256<< 20)
Definition: QueryRunner.h:70
#define SMALL
std::vector< size_t > populate_table_random(const std::string &table_name, const size_t num_rows, const Catalog &cat)
virtual void runDDLStatement(const std::string &)
TEST(DataLoad, Numbers)
static QueryRunner * get()
Definition: QueryRunner.h:115
static size_t load_data_for_thread_test_2(int num_rows, string table_name)
#define LARGE
void init_logger_stderr_only(int argc, char const *const *argv)
Definition: TestHelpers.h:194
#define BASE_PATH
void run_ddl_statement(std::string ddl)
bool load_data_test(string table_name, size_t num_rows)