OmniSciDB  c07336695a
ImportTest.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "TestHelpers.h"
18 
19 #include "../Import/Importer.h"
20 
21 #include <algorithm>
22 #include <limits>
23 #include <string>
24 
25 #include <gtest/gtest.h>
26 
27 #include <boost/algorithm/string.hpp>
28 #include <boost/program_options.hpp>
29 #include "../Catalog/Catalog.h"
30 #include "../Parser/parser.h"
31 #include "../QueryEngine/ResultSet.h"
32 #include "../QueryRunner/QueryRunner.h"
33 #include "../Shared/geo_types.h"
34 #include "boost/filesystem.hpp"
35 
36 #ifndef BASE_PATH
37 #define BASE_PATH "./tmp"
38 #endif
39 
40 using namespace std;
41 using namespace TestHelpers;
42 
44 
45 extern size_t g_leaf_count;
46 
47 namespace {
48 
49 bool g_aggregator{false};
50 size_t g_num_leafs{1};
51 
52 #define SKIP_ALL_ON_AGGREGATOR() \
53  if (g_aggregator) { \
54  LOG(ERROR) << "Tests not valid in distributed mode"; \
55  return; \
56  }
57 
58 bool g_hoist_literals{true};
59 
61 
62 inline void run_ddl_statement(const string& input_str) {
63  QR::get()->runDDLStatement(input_str);
64 }
65 
66 std::shared_ptr<ResultSet> run_query(const string& query_str) {
68 }
69 
70 bool compare_agg(const int64_t cnt, const double avg) {
71  std::string query_str = "SELECT COUNT(*), AVG(trip_distance) FROM trips;";
72  auto rows = run_query(query_str);
73  auto crt_row = rows->getNextRow(true, true);
74  CHECK_EQ(size_t(2), crt_row.size());
75  auto r_cnt = v<int64_t>(crt_row[0]);
76  auto r_avg = v<double>(crt_row[1]);
77  if (!(r_cnt == cnt && fabs(r_avg - avg) < 1E-9)) {
78  LOG(ERROR) << "error: " << r_cnt << ":" << cnt << ", " << r_avg << ":" << avg;
79  }
80  return r_cnt == cnt && fabs(r_avg - avg) < 1E-9;
81 }
82 
83 #ifdef ENABLE_IMPORT_PARQUET
84 bool import_test_parquet_with_null(const int64_t cnt) {
85  std::string query_str = "select count(*) from trips where rate_code_id is null;";
86  auto rows = run_query(query_str);
87  auto crt_row = rows->getNextRow(true, true);
88  CHECK_EQ(size_t(1), crt_row.size());
89  auto r_cnt = v<int64_t>(crt_row[0]);
90  return r_cnt == cnt;
91 }
92 #endif
93 
94 bool import_test_common(const string& query_str, const int64_t cnt, const double avg) {
95  run_ddl_statement(query_str);
96  return compare_agg(cnt, avg);
97 }
98 
99 bool import_test_common_geo(const string& query_str,
100  const std::string& table,
101  const int64_t cnt,
102  const double avg) {
103  // TODO(adb): Return ddl from QueryRunner::run_ddl_statement and use that
104  SQLParser parser;
105  std::list<std::unique_ptr<Parser::Stmt>> parse_trees;
106  std::string last_parsed;
107  if (parser.parse(query_str, parse_trees, last_parsed)) {
108  return false;
109  }
110  CHECK_EQ(parse_trees.size(), size_t(1));
111  const auto& stmt = parse_trees.front();
112  Parser::CopyTableStmt* ddl = dynamic_cast<Parser::CopyTableStmt*>(stmt.get());
113  if (!ddl) {
114  return false;
115  }
116  ddl->execute(*QR::get()->getSession());
117 
118  // was it a geo copy from?
119  bool was_geo_copy_from = ddl->was_geo_copy_from();
120  if (!was_geo_copy_from) {
121  return false;
122  }
123 
124  // get the rest of the payload
125  std::string geo_copy_from_table, geo_copy_from_file_name, geo_copy_from_partitions;
126  Importer_NS::CopyParams geo_copy_from_copy_params;
127  ddl->get_geo_copy_from_payload(geo_copy_from_table,
128  geo_copy_from_file_name,
129  geo_copy_from_copy_params,
130  geo_copy_from_partitions);
131 
132  // was it the right table?
133  if (geo_copy_from_table != "geo") {
134  return false;
135  }
136 
137  // @TODO simon.eves
138  // test other stuff
139  // filename
140  // CopyParams contents
141 
142  // success
143  return true;
144 }
145 
146 void import_test_geofile_importer(const std::string& file_str,
147  const std::string& table_name,
148  const bool compression,
149  const bool create_table = true) {
150  Importer_NS::ImportDriver import_driver(
151  QR::get()->getCatalog(),
152  QueryRunner::get_user_metadata(QR::get()->getSession()),
154 
155  auto file_path = boost::filesystem::path("../../Tests/Import/datafiles/" + file_str);
156 
157  ASSERT_TRUE(boost::filesystem::exists(file_path));
158 
159  ASSERT_NO_THROW(import_driver.importGeoTable(
160  file_path.string(), table_name, compression, create_table));
161 }
162 
163 bool import_test_local(const string& filename, const int64_t cnt, const double avg) {
164  return import_test_common(
165  string("COPY trips FROM '") + "../../Tests/Import/datafiles/" + filename +
166  "' WITH (header='true'" +
167  (filename.find(".parquet") != std::string::npos ? ",parquet='true'" : "") +
168  ");",
169  cnt,
170  avg);
171 }
172 
173 bool import_test_local_geo(const string& filename,
174  const string& other_options,
175  const int64_t cnt,
176  const double avg) {
177  return import_test_common_geo(string("COPY geo FROM '") +
178  "../../Tests/Import/datafiles/" + filename +
179  "' WITH (geo='true'" + other_options + ");",
180  "geo",
181  cnt,
182  avg);
183 }
184 
185 #ifdef HAVE_AWS_S3
186 bool import_test_s3(const string& prefix,
187  const string& filename,
188  const int64_t cnt,
189  const double avg) {
190  // unlikely we will expose any credentials in clear text here.
191  // likely credentials will be passed as the "tester"'s env.
192  // though s3 sdk should by default access the env, if any,
193  // we still read them out to test coverage of the code
194  // that passes credentials on per user basis.
195  char* env;
196  std::string s3_region, s3_access_key, s3_secret_key;
197  if (0 != (env = getenv("AWS_REGION"))) {
198  s3_region = env;
199  }
200  if (0 != (env = getenv("AWS_ACCESS_KEY_ID"))) {
201  s3_access_key = env;
202  }
203  if (0 != (env = getenv("AWS_SECRET_ACCESS_KEY"))) {
204  s3_secret_key = env;
205  }
206 
207  return import_test_common(
208  string("COPY trips FROM '") + "s3://mapd-parquet-testdata/" + prefix + "/" +
209  filename + "' WITH (header='true'" +
210  (s3_access_key.size() ? ",s3_access_key='" + s3_access_key + "'" : "") +
211  (s3_secret_key.size() ? ",s3_secret_key='" + s3_secret_key + "'" : "") +
212  (s3_region.size() ? ",s3_region='" + s3_region + "'" : "") +
213  (prefix.find(".parquet") != std::string::npos ||
214  filename.find(".parquet") != std::string::npos
215  ? ",parquet='true'"
216  : "") +
217  ");",
218  cnt,
219  avg);
220 }
221 
222 bool import_test_s3_compressed(const string& filename,
223  const int64_t cnt,
224  const double avg) {
225  return import_test_s3("trip.compressed", filename, cnt, avg);
226 }
227 #endif // HAVE_AWS_S3
228 
229 #ifdef ENABLE_IMPORT_PARQUET
230 bool import_test_local_parquet(const string& prefix,
231  const string& filename,
232  const int64_t cnt,
233  const double avg) {
234  return import_test_local(prefix + "/" + filename, cnt, avg);
235 }
236 #ifdef HAVE_AWS_S3
237 bool import_test_s3_parquet(const string& prefix,
238  const string& filename,
239  const int64_t cnt,
240  const double avg) {
241  return import_test_s3(prefix, filename, cnt, avg);
242 }
243 #endif // HAVE_AWS_S3
244 #endif // ENABLE_IMPORT_PARQUET
245 
246 #ifdef ENABLE_IMPORT_PARQUET
247 bool import_test_local_parquet_with_geo_point(const string& prefix,
248  const string& filename,
249  const int64_t cnt,
250  const double avg) {
251  run_ddl_statement("alter table trips add column pt_dropoff point;");
252  EXPECT_TRUE(import_test_local_parquet(prefix, filename, cnt, avg));
253  std::string query_str =
254  "select count(*) from trips where abs(dropoff_longitude-st_x(pt_dropoff))<0.01 and "
255  "abs(dropoff_latitude-st_y(pt_dropoff))<0.01;";
256  auto rows = run_query(query_str);
257  auto crt_row = rows->getNextRow(true, true);
258  CHECK_EQ(size_t(1), crt_row.size());
259  auto r_cnt = v<int64_t>(crt_row[0]);
260  return r_cnt == cnt;
261 }
262 #endif // ENABLE_IMPORT_PARQUET
263 
264 std::string TypeToString(SQLTypes type) {
265  return SQLTypeInfo(type, false).get_type_name();
266 }
267 
268 void d(const SQLTypes expected_type, const std::string& str) {
269  auto detected_type = Importer_NS::Detector::detect_sqltype(str);
270  EXPECT_EQ(TypeToString(expected_type), TypeToString(detected_type))
271  << "String: " << str;
272 }
273 
274 TEST(Detect, DateTime) {
275  d(kDATE, "2016-01-02");
276  d(kDATE, "02/01/2016");
277  d(kDATE, "01-Feb-16");
278  d(kDATE, "01/Feb/2016");
279  d(kDATE, "01/Feb/16");
280  d(kTIMESTAMP, "2016-01-02T03:04");
281  d(kTIMESTAMP, "2016-01-02T030405");
282  d(kTIMESTAMP, "2016-01-02T03:04:05");
283  d(kTIMESTAMP, "1776-01-02T03:04:05");
284  d(kTIMESTAMP, "9999-01-02T03:04:05");
285  d(kTIME, "03:04");
286  d(kTIME, "03:04:05");
287  d(kTEXT, "33:04");
288 }
289 
290 TEST(Detect, Numeric) {
291  d(kSMALLINT, "1");
292  d(kSMALLINT, "12345");
293  d(kINT, "123456");
294  d(kINT, "1234567890");
295  d(kBIGINT, "12345678901");
296  d(kFLOAT, "1.");
297  d(kFLOAT, "1.2345678");
298  // d(kDOUBLE, "1.2345678901");
299  // d(kDOUBLE, "1.23456789012345678901234567890");
300  d(kTEXT, "1.22.22");
301 }
302 
303 const char* create_table_mini_sort = R"(
304  CREATE TABLE sortab(
305  i int,
306  f float,
307  ia int[2],
308  pt point,
309  sa text[],
310  s2 text encoding dict(16),
311  dt date,
312  d2 date ENCODING FIXED(16),
313  tm timestamp,
314  t4 timestamp ENCODING FIXED(32),
315  va int[])
316  )";
317 
318 class ImportTestMiniSort : public ::testing::Test {
319  protected:
320  void SetUp() override {
321  ASSERT_NO_THROW(run_ddl_statement("drop table if exists sortab;"));
322  ASSERT_NO_THROW(run_ddl_statement("drop table if exists sortctas;"));
323  }
324 
325  void TearDown() override {
326  ASSERT_NO_THROW(run_ddl_statement("drop table if exists sortab;"));
327  ASSERT_NO_THROW(run_ddl_statement("drop table if exists sortctas;"));
328  }
329 };
330 
331 void create_minisort_table_on_column(const std::string& column_name) {
332  ASSERT_NO_THROW(run_ddl_statement(
333  std::string(create_table_mini_sort) +
334  (column_name.size() ? " with (sort_column='" + column_name + "');" : ";")));
335  EXPECT_NO_THROW(
336  run_ddl_statement("copy sortab from '../../Tests/Import/datafiles/mini_sort.txt' "
337  "with (header='false');"));
338 }
339 
340 void check_minisort_on_expects(const std::string& table_name,
341  const std::vector<int>& expects) {
342  auto rows = run_query("SELECT i FROM " + table_name + ";");
343  CHECK_EQ(expects.size(), rows->rowCount());
344  for (auto exp : expects) {
345  auto crt_row = rows->getNextRow(true, true);
346  CHECK_EQ(size_t(1), crt_row.size());
347  CHECK_EQ(int64_t(exp), v<int64_t>(crt_row[0]));
348  }
349 }
350 
351 void test_minisort_on_column(const std::string& column_name,
352  const std::vector<int> expects) {
353  create_minisort_table_on_column(column_name);
354  check_minisort_on_expects("sortab", expects);
355 }
356 
357 void create_minisort_table_on_column_with_ctas(const std::string& column_name) {
358  EXPECT_NO_THROW(
360  "create table sortctas as select * from sortab" +
361  (column_name.size() ? " with (sort_column='" + column_name + "');" : ";")););
362 }
363 
364 void test_minisort_on_column_with_ctas(const std::string& column_name,
365  const std::vector<int> expects) {
368  check_minisort_on_expects("sortctas", expects);
369 }
370 
372  test_minisort_on_column("", {5, 3, 1, 2, 4});
373 }
374 
376  test_minisort_on_column("i", {1, 2, 3, 4, 5});
377 }
378 
380  test_minisort_on_column("f", {1, 2, 3, 4, 5});
381 }
382 
383 TEST_F(ImportTestMiniSort, on_int_array) {
384  test_minisort_on_column("ia", {1, 2, 3, 4, 5});
385 }
386 
387 TEST_F(ImportTestMiniSort, on_string_array) {
388  test_minisort_on_column("sa", {5, 3, 1, 2, 4});
389 }
390 
391 TEST_F(ImportTestMiniSort, on_string_2b) {
392  test_minisort_on_column("s2", {5, 3, 1, 2, 4});
393 }
394 
396  test_minisort_on_column("dt", {1, 2, 3, 4, 5});
397 }
398 
400  test_minisort_on_column("d2", {1, 2, 3, 4, 5});
401 }
402 
404  test_minisort_on_column("tm", {1, 2, 3, 4, 5});
405 }
406 
408  test_minisort_on_column("t4", {1, 2, 3, 4, 5});
409 }
410 
411 TEST_F(ImportTestMiniSort, on_varlen_array) {
412  test_minisort_on_column("va", {1, 2, 3, 4, 5});
413 }
414 
415 TEST_F(ImportTestMiniSort, on_geo_point) {
416  test_minisort_on_column("pt", {2, 3, 4, 5, 1});
417 }
418 
419 TEST_F(ImportTestMiniSort, ctas_on_none) {
421  test_minisort_on_column_with_ctas("", {5, 3, 1, 2, 4});
422 }
423 
424 TEST_F(ImportTestMiniSort, ctas_on_int) {
426  test_minisort_on_column_with_ctas("i", {1, 2, 3, 4, 5});
427 }
428 
429 TEST_F(ImportTestMiniSort, ctas_on_float) {
431  test_minisort_on_column_with_ctas("f", {1, 2, 3, 4, 5});
432 }
433 
434 TEST_F(ImportTestMiniSort, ctas_on_int_array) {
436  test_minisort_on_column_with_ctas("ia", {1, 2, 3, 4, 5});
437 }
438 
439 TEST_F(ImportTestMiniSort, ctas_on_string_array) {
441  test_minisort_on_column_with_ctas("sa", {5, 3, 1, 2, 4});
442 }
443 
444 TEST_F(ImportTestMiniSort, ctas_on_string_2b) {
446  test_minisort_on_column_with_ctas("s2", {5, 3, 1, 2, 4});
447 }
448 
449 TEST_F(ImportTestMiniSort, ctas_on_date) {
451  test_minisort_on_column_with_ctas("dt", {1, 2, 3, 4, 5});
452 }
453 
454 TEST_F(ImportTestMiniSort, ctas_on_date_2b) {
456  test_minisort_on_column_with_ctas("d2", {1, 2, 3, 4, 5});
457 }
458 
459 TEST_F(ImportTestMiniSort, ctas_on_time) {
461  test_minisort_on_column_with_ctas("tm", {1, 2, 3, 4, 5});
462 }
463 
464 TEST_F(ImportTestMiniSort, ctas_on_time_4b) {
466  test_minisort_on_column_with_ctas("t4", {1, 2, 3, 4, 5});
467 }
468 
469 TEST_F(ImportTestMiniSort, ctas_on_varlen_array) {
471  test_minisort_on_column_with_ctas("va", {1, 2, 3, 4, 5});
472 }
473 
474 TEST_F(ImportTestMiniSort, ctas_on_geo_point) {
476  test_minisort_on_column_with_ctas("pt", {2, 3, 4, 5, 1});
477 }
478 
479 const char* create_table_mixed_varlen = R"(
480  CREATE TABLE import_test_mixed_varlen(
481  pt GEOMETRY(POINT),
482  ls GEOMETRY(LINESTRING),
483  faii INTEGER[2],
484  fadc DECIMAL(5,2)[2],
485  fatx TEXT[] ENCODING DICT(32),
486  fatx2 TEXT[2] ENCODING DICT(32)
487  );
488  )";
489 
490 class ImportTestMixedVarlen : public ::testing::Test {
491  protected:
492  void SetUp() override {
493  ASSERT_NO_THROW(run_ddl_statement("drop table if exists import_test_mixed_varlen;"));
494  ASSERT_NO_THROW(run_ddl_statement(create_table_mixed_varlen););
495  }
496 
497  void TearDown() override {
498  ASSERT_NO_THROW(run_ddl_statement("drop table if exists import_test_mixed_varlen;"));
499  }
500 };
501 
502 TEST_F(ImportTestMixedVarlen, Fix_failed_import_arrays_after_geos) {
503  EXPECT_NO_THROW(
504  run_ddl_statement("copy import_test_mixed_varlen from "
505  "'../../Tests/Import/datafiles/mixed_varlen.txt' with "
506  "(header='false');"));
507  std::string query_str = "SELECT COUNT(*) FROM import_test_mixed_varlen;";
508  auto rows = run_query(query_str);
509  auto crt_row = rows->getNextRow(true, true);
510  CHECK_EQ(size_t(1), crt_row.size());
511  CHECK_EQ(int64_t(1), v<int64_t>(crt_row[0]));
512 }
513 
514 const char* create_table_date = R"(
515  CREATE TABLE import_test_date(
516  date_text TEXT ENCODING DICT(32),
517  date_date DATE,
518  date_date_not_null DATE NOT NULL,
519  date_i32 DATE ENCODING FIXED(32),
520  date_i16 DATE ENCODING FIXED(16)
521  );
522 )";
523 
524 class ImportTestDate : public ::testing::Test {
525  protected:
526  void SetUp() override {
527  ASSERT_NO_THROW(run_ddl_statement("drop table if exists import_test_date;"));
528  ASSERT_NO_THROW(run_ddl_statement(create_table_date));
529  }
530 
531  void TearDown() override {
532  ASSERT_NO_THROW(run_ddl_statement("drop table if exists import_test_date;"));
533  }
534 };
535 
536 std::string convert_date_to_string(int64_t d) {
537  if (d == std::numeric_limits<int64_t>::min()) {
538  return std::string("NULL");
539  }
540  const auto date = static_cast<time_t>(d);
541  std::tm tm_struct;
542  gmtime_r(&date, &tm_struct);
543  char buf[11];
544  strftime(buf, 11, "%F", &tm_struct);
545  return std::string(buf);
546 }
547 
548 inline void run_mixed_dates_test() {
549  ASSERT_NO_THROW(run_ddl_statement(
550  "COPY import_test_date FROM '../../Tests/Import/datafiles/mixed_dates.txt';"));
551 
552  auto rows = run_query("SELECT * FROM import_test_date;");
553  ASSERT_EQ(size_t(11), rows->entryCount());
554  for (size_t i = 0; i < 10; i++) {
555  const auto crt_row = rows->getNextRow(true, true);
556  ASSERT_EQ(size_t(5), crt_row.size());
557  const auto date_truth_str_nullable = v<NullableString>(crt_row[0]);
558  const auto date_truth_str = boost::get<std::string>(&date_truth_str_nullable);
559  CHECK(date_truth_str);
560  for (size_t j = 1; j < crt_row.size(); j++) {
561  const auto date = v<int64_t>(crt_row[j]);
562  const auto date_str = convert_date_to_string(static_cast<int64_t>(date));
563  ASSERT_EQ(*date_truth_str, date_str);
564  }
565  }
566 
567  // Last row is NULL (except for column 2 which is NOT NULL)
568  const auto crt_row = rows->getNextRow(true, true);
569  ASSERT_EQ(size_t(5), crt_row.size());
570  for (size_t j = 1; j < crt_row.size(); j++) {
571  if (j == 2) {
572  continue;
573  }
574  const auto date_null = v<int64_t>(crt_row[j]);
575  ASSERT_EQ(date_null, std::numeric_limits<int64_t>::min());
576  }
577 }
578 
579 TEST_F(ImportTestDate, ImportMixedDates) {
580  SKIP_ALL_ON_AGGREGATOR(); // global variable not available on leaf nodes
582 }
583 
584 class ImportTestLegacyDate : public ::testing::Test {
585  protected:
586  void SetUp() override {
587  ASSERT_NO_THROW(run_ddl_statement("drop table if exists import_test_date;"));
589  ASSERT_NO_THROW(run_ddl_statement(create_table_date));
590  }
591 
592  void TearDown() override {
593  ASSERT_NO_THROW(run_ddl_statement("drop table if exists import_test_date;"));
595  }
596 };
597 
598 TEST_F(ImportTestLegacyDate, ImportMixedDates) {
599  SKIP_ALL_ON_AGGREGATOR(); // global variable not available on leaf nodes
601 }
602 
603 const char* create_table_date_arr = R"(
604  CREATE TABLE import_test_date_arr(
605  date_text TEXT[],
606  date_date DATE[],
607  date_date_fixed DATE[2],
608  date_date_not_null DATE[] NOT NULL
609  );
610 )";
611 
612 class ImportTestDateArray : public ::testing::Test {
613  protected:
614  void SetUp() override {
615  ASSERT_NO_THROW(run_ddl_statement("drop table if exists import_test_date_arr;"));
616  ASSERT_NO_THROW(run_ddl_statement(create_table_date_arr));
617  }
618 
619  void TearDown() override {
620  ASSERT_NO_THROW(run_ddl_statement("drop table if exists import_test_date_arr;"));
621  }
622 };
623 
624 void decode_str_array(const TargetValue& r, std::vector<std::string>& arr) {
625  const auto atv = boost::get<ArrayTargetValue>(&r);
626  CHECK(atv);
627  if (!atv->is_initialized()) {
628  return;
629  }
630  const auto& vec = atv->get();
631  for (const auto& stv : vec) {
632  const auto ns = v<NullableString>(stv);
633  const auto str = boost::get<std::string>(&ns);
634  CHECK(str);
635  arr.push_back(*str);
636  }
637  CHECK_EQ(arr.size(), vec.size());
638 }
639 
640 TEST_F(ImportTestDateArray, ImportMixedDateArrays) {
641  EXPECT_NO_THROW(
642  run_ddl_statement("COPY import_test_date_arr FROM "
643  "'../../Tests/Import/datafiles/mixed_date_arrays.txt';"));
644 
645  auto rows = run_query("SELECT * FROM import_test_date_arr;");
646  ASSERT_EQ(size_t(10), rows->entryCount());
647  for (size_t i = 0; i < 3; i++) {
648  const auto crt_row = rows->getNextRow(true, true);
649  ASSERT_EQ(size_t(4), crt_row.size());
650  std::vector<std::string> truth_arr;
651  decode_str_array(crt_row[0], truth_arr);
652  for (size_t j = 1; j < crt_row.size(); j++) {
653  const auto date_arr = boost::get<ArrayTargetValue>(&crt_row[j]);
654  CHECK(date_arr && date_arr->is_initialized());
655  const auto& vec = date_arr->get();
656  for (size_t k = 0; k < vec.size(); k++) {
657  const auto date = v<int64_t>(vec[k]);
658  const auto date_str = convert_date_to_string(static_cast<int64_t>(date));
659  ASSERT_EQ(truth_arr[k], date_str);
660  }
661  }
662  }
663  // Date arrays with NULL dates
664  for (size_t i = 3; i < 6; i++) {
665  const auto crt_row = rows->getNextRow(true, true);
666  ASSERT_EQ(size_t(4), crt_row.size());
667  std::vector<std::string> truth_arr;
668  decode_str_array(crt_row[0], truth_arr);
669  for (size_t j = 1; j < crt_row.size() - 1; j++) {
670  const auto date_arr = boost::get<ArrayTargetValue>(&crt_row[j]);
671  CHECK(date_arr && date_arr->is_initialized());
672  const auto& vec = date_arr->get();
673  for (size_t k = 0; k < vec.size(); k++) {
674  const auto date = v<int64_t>(vec[k]);
675  const auto date_str = convert_date_to_string(static_cast<int64_t>(date));
676  ASSERT_EQ(truth_arr[k], date_str);
677  }
678  }
679  }
680  // NULL date arrays, empty date arrays, NULL fixed date arrays
681  for (size_t i = 6; i < rows->entryCount(); i++) {
682  const auto crt_row = rows->getNextRow(true, true);
683  ASSERT_EQ(size_t(4), crt_row.size());
684  const auto date_arr1 = boost::get<ArrayTargetValue>(&crt_row[1]);
685  CHECK(date_arr1);
686  if (i == 9) {
687  // Empty date array
688  CHECK(date_arr1->is_initialized());
689  const auto& vec = date_arr1->get();
690  ASSERT_EQ(size_t(0), vec.size());
691  } else {
692  // NULL array
693  CHECK(!date_arr1->is_initialized());
694  }
695  const auto date_arr2 = boost::get<ArrayTargetValue>(&crt_row[2]);
696  CHECK(date_arr2);
697  if (i == 9) {
698  // Fixlen array - not NULL, filled with NULLs
699  CHECK(date_arr2->is_initialized());
700  const auto& vec = date_arr2->get();
701  for (size_t k = 0; k < vec.size(); k++) {
702  const auto date = v<int64_t>(vec[k]);
703  const auto date_str = convert_date_to_string(static_cast<int64_t>(date));
704  ASSERT_EQ("NULL", date_str);
705  }
706  } else {
707  // NULL fixlen array
708  CHECK(!date_arr2->is_initialized());
709  }
710  }
711 }
712 
713 const char* create_table_timestamps = R"(
714  CREATE TABLE import_test_timestamps(
715  ts0_text TEXT ENCODING DICT(32),
716  ts3_text TEXT ENCODING DICT(32),
717  ts6_text TEXT ENCODING DICT(32),
718  ts9_text TEXT ENCODING DICT(32),
719  ts_0 TIMESTAMP(0),
720  ts_0_i32 TIMESTAMP ENCODING FIXED(32),
721  ts_0_not_null TIMESTAMP NOT NULL,
722  ts_3 TIMESTAMP(3),
723  ts_3_not_null TIMESTAMP(3) NOT NULL,
724  ts_6 TIMESTAMP(6),
725  ts_6_not_null TIMESTAMP(6) NOT NULL,
726  ts_9 TIMESTAMP(9),
727  ts_9_not_null TIMESTAMP(9) NOT NULL
728  );
729 )";
730 
731 class ImportTestTimestamps : public ::testing::Test {
732  protected:
733  void SetUp() override {
734  ASSERT_NO_THROW(run_ddl_statement("drop table if exists import_test_timestamps;"));
735  ASSERT_NO_THROW(run_ddl_statement(create_table_timestamps));
736  }
737 
738  void TearDown() override {
739  ASSERT_NO_THROW(run_ddl_statement("drop table if exists import_test_date;"));
740  }
741 };
742 
743 std::string convert_timestamp_to_string(const time_t timeval, const int dimen) {
744  std::tm tm_struct;
745  if (dimen > 0) {
746  auto scale = static_cast<int64_t>(std::pow(10, dimen));
747  auto dv = std::div(static_cast<int64_t>(timeval), scale);
748  auto modulus = (dv.rem + scale) % scale;
749  time_t sec = dv.quot - (dv.quot < 0 && modulus > 0);
750  gmtime_r(&sec, &tm_struct);
751  char buf[21];
752  strftime(buf, 21, "%F %T.", &tm_struct);
753  auto subsecond = std::to_string(modulus);
754  return std::string(buf) + std::string(dimen - subsecond.length(), '0') + subsecond;
755  } else {
756  time_t sec = timeval;
757  gmtime_r(&sec, &tm_struct);
758  char buf[20];
759  strftime(buf, 20, "%F %T", &tm_struct);
760  return std::string(buf);
761  }
762 }
763 
765  EXPECT_NO_THROW(
766  run_ddl_statement("COPY import_test_timestamps FROM "
767  "'../../Tests/Import/datafiles/mixed_timestamps.txt';"));
768 
769  auto rows = run_query("SELECT * FROM import_test_timestamps");
770  ASSERT_EQ(size_t(11), rows->entryCount());
771  for (size_t i = 0; i < rows->entryCount() - 1; i++) {
772  const auto crt_row = rows->getNextRow(true, true);
773  ASSERT_EQ(size_t(13), crt_row.size());
774  const auto ts0_str_nullable = v<NullableString>(crt_row[0]);
775  const auto ts0_str = boost::get<std::string>(&ts0_str_nullable);
776  const auto ts3_str_nullable = v<NullableString>(crt_row[1]);
777  const auto ts3_str = boost::get<std::string>(&ts3_str_nullable);
778  const auto ts6_str_nullable = v<NullableString>(crt_row[2]);
779  const auto ts6_str = boost::get<std::string>(&ts6_str_nullable);
780  const auto ts9_str_nullable = v<NullableString>(crt_row[3]);
781  const auto ts9_str = boost::get<std::string>(&ts9_str_nullable);
782  CHECK(ts0_str && ts3_str && ts6_str && ts9_str);
783  for (size_t j = 4; j < crt_row.size(); j++) {
784  const auto timeval = v<int64_t>(crt_row[j]);
785  const auto ti = rows->getColType(j);
786  CHECK(ti.is_timestamp());
787  const auto ts_str = convert_timestamp_to_string(timeval, ti.get_dimension());
788  switch (ti.get_dimension()) {
789  case 0:
790  ASSERT_EQ(*ts0_str, ts_str);
791  break;
792  case 3:
793  ASSERT_EQ(*ts3_str, ts_str);
794  break;
795  case 6:
796  ASSERT_EQ(*ts6_str, ts_str);
797  break;
798  case 9:
799  ASSERT_EQ(*ts9_str, ts_str);
800  break;
801  default:
802  CHECK(false);
803  }
804  }
805  }
806 
807  const auto crt_row = rows->getNextRow(true, true);
808  ASSERT_EQ(size_t(13), crt_row.size());
809  for (size_t j = 4; j < crt_row.size(); j++) {
810  if (j == 6 || j == 8 || j == 10 || j == 12) {
811  continue;
812  }
813  const auto ts_null = v<int64_t>(crt_row[j]);
814  ASSERT_EQ(ts_null, std::numeric_limits<int64_t>::min());
815  }
816 }
817 
818 TEST_F(ImportTestTimestamps, ImportMixedTimestamps) {
820 }
821 
822 const char* create_table_trips = R"(
823  CREATE TABLE trips (
824  medallion TEXT ENCODING DICT,
825  hack_license TEXT ENCODING DICT,
826  vendor_id TEXT ENCODING DICT,
827  rate_code_id SMALLINT,
828  store_and_fwd_flag TEXT ENCODING DICT,
829  pickup_datetime TIMESTAMP,
830  dropoff_datetime TIMESTAMP,
831  passenger_count SMALLINT,
832  trip_time_in_secs INTEGER,
833  trip_distance DECIMAL(14,2),
834  pickup_longitude DECIMAL(14,2),
835  pickup_latitude DECIMAL(14,2),
836  dropoff_longitude DECIMAL(14,2),
837  dropoff_latitude DECIMAL(14,2)
838  ) WITH (FRAGMENT_SIZE=75000000);
839  )";
840 
841 class ImportTest : public ::testing::Test {
842  protected:
843  void SetUp() override {
844  ASSERT_NO_THROW(run_ddl_statement("drop table if exists trips;"););
845  ASSERT_NO_THROW(run_ddl_statement(create_table_trips););
846  }
847 
848  void TearDown() override {
849  ASSERT_NO_THROW(run_ddl_statement("drop table trips;"););
850  ASSERT_NO_THROW(run_ddl_statement("drop table if exists geo;"););
851  }
852 };
853 
854 #ifdef ENABLE_IMPORT_PARQUET
855 // parquet test cases
856 TEST_F(ImportTest, One_parquet_file_1k_rows_in_10_groups) {
857  EXPECT_TRUE(
858  import_test_local_parquet(".", "trip_data_1k_rows_in_10_grps.parquet", 1000, 1.0));
859 }
860 TEST_F(ImportTest, One_parquet_file) {
861  EXPECT_TRUE(import_test_local_parquet(
862  "trip.parquet",
863  "part-00000-027865e6-e4d9-40b9-97ff-83c5c5531154-c000.snappy.parquet",
864  100,
865  1.0));
866  EXPECT_TRUE(import_test_parquet_with_null(100));
867 }
868 TEST_F(ImportTest, One_parquet_file_drop) {
869  EXPECT_TRUE(import_test_local_parquet(
870  "trip+1.parquet",
871  "part-00000-00496d78-a271-4067-b637-cf955cc1cece-c000.snappy.parquet",
872  100,
873  1.0));
874 }
875 TEST_F(ImportTest, All_parquet_file) {
876  EXPECT_TRUE(import_test_local_parquet("trip.parquet", "*.parquet", 1200, 1.0));
877  EXPECT_TRUE(import_test_parquet_with_null(1200));
878 }
879 TEST_F(ImportTest, All_parquet_file_drop) {
880  EXPECT_TRUE(import_test_local_parquet("trip+1.parquet", "*.parquet", 1200, 1.0));
881 }
882 TEST_F(ImportTest, One_parquet_file_with_geo_point) {
883  EXPECT_TRUE(import_test_local_parquet_with_geo_point(
884  "trip_data_with_point.parquet",
885  "part-00000-6dbefb0c-abbd-4c39-93e7-0026e36b7b7c-c000.snappy.parquet",
886  100,
887  1.0));
888 }
889 #ifdef HAVE_AWS_S3
890 // s3 parquet test cases
891 TEST_F(ImportTest, S3_One_parquet_file) {
892  EXPECT_TRUE(import_test_s3_parquet(
893  "trip.parquet",
894  "part-00000-0284f745-1595-4743-b5c4-3aa0262e4de3-c000.snappy.parquet",
895  100,
896  1.0));
897 }
898 TEST_F(ImportTest, S3_One_parquet_file_drop) {
899  EXPECT_TRUE(import_test_s3_parquet(
900  "trip+1.parquet",
901  "part-00000-00496d78-a271-4067-b637-cf955cc1cece-c000.snappy.parquet",
902  100,
903  1.0));
904 }
905 TEST_F(ImportTest, S3_All_parquet_file) {
906  EXPECT_TRUE(import_test_s3_parquet("trip.parquet", "", 1200, 1.0));
907 }
908 TEST_F(ImportTest, S3_All_parquet_file_drop) {
909  EXPECT_TRUE(import_test_s3_parquet("trip+1.parquet", "", 1200, 1.0));
910 }
911 TEST_F(ImportTest, S3_Null_Prefix) {
912  EXPECT_THROW(run_ddl_statement("copy trips from 's3://omnisci_ficticiousbucket/';"),
913  std::runtime_error);
914 }
915 TEST_F(ImportTest, S3_Wildcard_Prefix) {
916  EXPECT_THROW(run_ddl_statement("copy trips from 's3://omnisci_ficticiousbucket/*';"),
917  std::runtime_error);
918 }
919 #endif // HAVE_AWS_S3
920 #endif // ENABLE_IMPORT_PARQUET
921 
922 TEST_F(ImportTest, One_csv_file) {
923  EXPECT_TRUE(import_test_local("trip_data_9.csv", 100, 1.0));
924 }
925 
926 TEST_F(ImportTest, One_csv_file_no_newline) {
927  EXPECT_TRUE(import_test_local("trip_data_no_newline_1.csv", 100, 1.0));
928 }
929 
930 TEST_F(ImportTest, Many_csv_file) {
931  EXPECT_TRUE(import_test_local("trip_data_*.csv", 1200, 1.0));
932 }
933 
934 TEST_F(ImportTest, Many_csv_file_no_newline) {
935  EXPECT_TRUE(import_test_local("trip_data_no_newline_*.csv", 200, 1.0));
936 }
937 
938 TEST_F(ImportTest, One_gz_file) {
939  EXPECT_TRUE(import_test_local("trip_data_9.gz", 100, 1.0));
940 }
941 
942 TEST_F(ImportTest, One_bz2_file) {
943  EXPECT_TRUE(import_test_local("trip_data_9.bz2", 100, 1.0));
944 }
945 
946 TEST_F(ImportTest, One_tar_with_many_csv_files) {
947  EXPECT_TRUE(import_test_local("trip_data.tar", 1000, 1.0));
948 }
949 
950 TEST_F(ImportTest, One_tgz_with_many_csv_files) {
951  EXPECT_TRUE(import_test_local("trip_data.tgz", 100000, 1.0));
952 }
953 
954 TEST_F(ImportTest, One_rar_with_many_csv_files) {
955  EXPECT_TRUE(import_test_local("trip_data.rar", 1000, 1.0));
956 }
957 
958 TEST_F(ImportTest, One_zip_with_many_csv_files) {
959  EXPECT_TRUE(import_test_local("trip_data.zip", 1000, 1.0));
960 }
961 
962 TEST_F(ImportTest, One_7z_with_many_csv_files) {
963  EXPECT_TRUE(import_test_local("trip_data.7z", 1000, 1.0));
964 }
965 
966 TEST_F(ImportTest, One_tgz_with_many_csv_files_no_newline) {
967  EXPECT_TRUE(import_test_local("trip_data_some_with_no_newline.tgz", 500, 1.0));
968 }
969 
970 // Sharding tests
971 const char* create_table_trips_sharded = R"(
972  CREATE TABLE trips (
973  id INTEGER,
974  medallion TEXT ENCODING DICT,
975  hack_license TEXT ENCODING DICT,
976  vendor_id TEXT ENCODING DICT,
977  rate_code_id SMALLINT,
978  store_and_fwd_flag TEXT ENCODING DICT,
979  pickup_date DATE,
980  drop_date DATE ENCODING FIXED(16),
981  pickup_datetime TIMESTAMP,
982  dropoff_datetime TIMESTAMP,
983  passenger_count SMALLINT,
984  trip_time_in_secs INTEGER,
985  trip_distance DECIMAL(14,2),
986  pickup_longitude DECIMAL(14,2),
987  pickup_latitude DECIMAL(14,2),
988  dropoff_longitude DECIMAL(14,2),
989  dropoff_latitude DECIMAL(14,2),
990  shard key (id)
991  ) WITH (FRAGMENT_SIZE=75000000, SHARD_COUNT=2);
992  )";
993 class ImportTestSharded : public ::testing::Test {
994  protected:
995  void SetUp() override {
996  ASSERT_NO_THROW(run_ddl_statement("drop table if exists trips;"););
997  ASSERT_NO_THROW(run_ddl_statement(create_table_trips_sharded););
998  }
999 
1000  void TearDown() override {
1001  ASSERT_NO_THROW(run_ddl_statement("drop table trips;"););
1002  ASSERT_NO_THROW(run_ddl_statement("drop table if exists geo;"););
1003  }
1004 };
1005 
1006 TEST_F(ImportTestSharded, One_csv_file) {
1007  EXPECT_TRUE(import_test_local("sharded_trip_data_9.csv", 100, 1.0));
1008 }
1009 
1011  CREATE TABLE trips (
1012  id INTEGER,
1013  medallion TEXT ENCODING DICT,
1014  hack_license TEXT ENCODING DICT,
1015  vendor_id TEXT ENCODING DICT,
1016  rate_code_id SMALLINT,
1017  store_and_fwd_flag TEXT ENCODING DICT,
1018  pickup_date DATE,
1019  drop_date DATE ENCODING FIXED(16),
1020  pickup_datetime TIMESTAMP,
1021  dropoff_datetime TIMESTAMP,
1022  passenger_count SMALLINT,
1023  trip_time_in_secs INTEGER,
1024  trip_distance DECIMAL(14,2),
1025  pickup_longitude DECIMAL(14,2),
1026  pickup_latitude DECIMAL(14,2),
1027  dropoff_longitude DECIMAL(14,2),
1028  dropoff_latitude DECIMAL(14,2),
1029  shard key (medallion)
1030  ) WITH (FRAGMENT_SIZE=75000000, SHARD_COUNT=2);
1031  )";
1032 class ImportTestShardedText : public ::testing::Test {
1033  protected:
1034  void SetUp() override {
1035  ASSERT_NO_THROW(run_ddl_statement("drop table if exists trips;"););
1036  ASSERT_NO_THROW(run_ddl_statement(create_table_trips_dict_sharded_text););
1037  }
1038 
1039  void TearDown() override {
1040  ASSERT_NO_THROW(run_ddl_statement("drop table trips;"););
1041  ASSERT_NO_THROW(run_ddl_statement("drop table if exists geo;"););
1042  }
1043 };
1044 
1046  EXPECT_TRUE(import_test_local("sharded_trip_data_9.csv", 100, 1.0));
1047 }
1048 
1050  CREATE TABLE trips (
1051  id INTEGER,
1052  medallion TEXT ENCODING DICT (8),
1053  hack_license TEXT ENCODING DICT,
1054  vendor_id TEXT ENCODING DICT,
1055  rate_code_id SMALLINT,
1056  store_and_fwd_flag TEXT ENCODING DICT,
1057  pickup_date DATE,
1058  drop_date DATE ENCODING FIXED(16),
1059  pickup_datetime TIMESTAMP,
1060  dropoff_datetime TIMESTAMP,
1061  passenger_count SMALLINT,
1062  trip_time_in_secs INTEGER,
1063  trip_distance DECIMAL(14,2),
1064  pickup_longitude DECIMAL(14,2),
1065  pickup_latitude DECIMAL(14,2),
1066  dropoff_longitude DECIMAL(14,2),
1067  dropoff_latitude DECIMAL(14,2),
1068  shard key (medallion)
1069  ) WITH (FRAGMENT_SIZE=75000000, SHARD_COUNT=2);
1070  )";
1071 class ImportTestShardedText8 : public ::testing::Test {
1072  protected:
1073  void SetUp() override {
1074  ASSERT_NO_THROW(run_ddl_statement("drop table if exists trips;"););
1075  ASSERT_NO_THROW(run_ddl_statement(create_table_trips_dict_sharded_text_8bit););
1076  }
1077 
1078  void TearDown() override {
1079  ASSERT_NO_THROW(run_ddl_statement("drop table trips;"););
1080  ASSERT_NO_THROW(run_ddl_statement("drop table if exists geo;"););
1081  }
1082 };
1083 
1085  EXPECT_TRUE(import_test_local("sharded_trip_data_9.csv", 100, 1.0));
1086 }
1087 
1088 namespace {
1089 const char* create_table_geo = R"(
1090  CREATE TABLE geospatial (
1091  p1 POINT,
1092  l LINESTRING,
1093  poly POLYGON,
1094  mpoly MULTIPOLYGON,
1095  p2 POINT,
1096  p3 POINT,
1097  p4 POINT,
1098  trip_distance DOUBLE
1099  ) WITH (FRAGMENT_SIZE=65000000);
1100  )";
1101 
1103  auto rows = run_query(R"(
1104  SELECT p1, l, poly, mpoly, p2, p3, p4, trip_distance
1105  FROM geospatial
1106  WHERE trip_distance = 1.0;
1107  )");
1108  auto crt_row = rows->getNextRow(true, true);
1109  CHECK_EQ(size_t(8), crt_row.size());
1110  const auto p1 = boost::get<std::string>(v<NullableString>(crt_row[0]));
1111  ASSERT_TRUE(Geo_namespace::GeoPoint("POINT (1 1)") == Geo_namespace::GeoPoint(p1));
1112  const auto linestring = boost::get<std::string>(v<NullableString>(crt_row[1]));
1113  ASSERT_TRUE(Geo_namespace::GeoLineString("LINESTRING (1 0,2 2,3 3)") ==
1114  Geo_namespace::GeoLineString(linestring));
1115  const auto poly = boost::get<std::string>(v<NullableString>(crt_row[2]));
1116  ASSERT_TRUE(Geo_namespace::GeoPolygon("POLYGON ((0 0,2 0,0 2,0 0))") ==
1118  const auto mpoly = boost::get<std::string>(v<NullableString>(crt_row[3]));
1119  ASSERT_TRUE(Geo_namespace::GeoMultiPolygon("MULTIPOLYGON (((0 0,2 0,0 2,0 0)))") ==
1121  const auto p2 = boost::get<std::string>(v<NullableString>(crt_row[4]));
1122  ASSERT_TRUE(Geo_namespace::GeoPoint("POINT (1 1)") == Geo_namespace::GeoPoint(p2));
1123  const auto p3 = boost::get<std::string>(v<NullableString>(crt_row[5]));
1124  ASSERT_TRUE(Geo_namespace::GeoPoint("POINT (1 1)") == Geo_namespace::GeoPoint(p3));
1125  const auto p4 = boost::get<std::string>(v<NullableString>(crt_row[6]));
1126  ASSERT_TRUE(Geo_namespace::GeoPoint("POINT (1 1)") == Geo_namespace::GeoPoint(p4));
1127  const auto trip_distance = v<double>(crt_row[7]);
1128  ASSERT_NEAR(1.0, trip_distance, 1e-7);
1129 }
1130 
1132  auto rows = run_query("SELECT omnisci_geo, trip FROM geospatial WHERE trip = 1.0");
1133  auto crt_row = rows->getNextRow(true, true);
1134  CHECK_EQ(size_t(2), crt_row.size());
1135  const auto point = boost::get<std::string>(v<NullableString>(crt_row[0]));
1136  ASSERT_TRUE(Geo_namespace::GeoPoint("POINT (1 1)") == Geo_namespace::GeoPoint(point));
1137  const auto trip_distance = v<double>(crt_row[1]);
1138  ASSERT_NEAR(1.0, trip_distance, 1e-7);
1139 }
1140 
1142  auto rows = run_query("SELECT omnisci_geo, trip FROM geospatial WHERE trip = 1.0");
1143  auto crt_row = rows->getNextRow(true, true);
1144  CHECK_EQ(size_t(2), crt_row.size());
1145  const auto mpoly = boost::get<std::string>(v<NullableString>(crt_row[0]));
1146  ASSERT_TRUE(Geo_namespace::GeoMultiPolygon("MULTIPOLYGON (((0 0,2 0,0 2,0 0)))") ==
1148  const auto trip_distance = v<double>(crt_row[1]);
1149  ASSERT_NEAR(1.0, trip_distance, 1e-7);
1150 }
1151 
1152 void check_geo_num_rows(const std::string& project_columns,
1153  const size_t num_expected_rows) {
1154  auto rows = run_query("SELECT " + project_columns + " FROM geospatial");
1155  ASSERT_TRUE(rows->entryCount() == num_expected_rows);
1156 }
1157 
1159  auto rows = run_query("SELECT omnisci_geo, trip FROM geospatial WHERE trip = 1.0");
1160  rows->setGeoReturnType(ResultSet::GeoReturnType::GeoTargetValue);
1161  auto crt_row = rows->getNextRow(true, true);
1162  compare_geo_target(crt_row[0], GeoPointTargetValue({1.0, 1.0}), 1e-7);
1163  const auto trip_distance = v<double>(crt_row[1]);
1164  ASSERT_NEAR(1.0, trip_distance, 1e-7);
1165 }
1166 
1168  auto rows = run_query("SELECT omnisci_geo, trip FROM geospatial WHERE trip = 1.0");
1169  rows->setGeoReturnType(ResultSet::GeoReturnType::GeoTargetValue);
1170  auto crt_row = rows->getNextRow(true, true);
1171  compare_geo_target(crt_row[0],
1172  GeoMultiPolyTargetValue({0.0, 0.0, 2.0, 0.0, 0.0, 2.0}, {3}, {1}),
1173  1e-7);
1174  const auto trip_distance = v<double>(crt_row[1]);
1175  ASSERT_NEAR(1.0, trip_distance, 1e-7);
1176 }
1177 
1178 } // namespace
1179 
1180 class GeoImportTest : public ::testing::Test {
1181  protected:
1182  void SetUp() override {
1183  ASSERT_NO_THROW(run_ddl_statement("drop table if exists geospatial;"););
1184  ASSERT_NO_THROW(run_ddl_statement(create_table_geo););
1185  }
1186 
1187  void TearDown() override {
1188  ASSERT_NO_THROW(run_ddl_statement("drop table geospatial;"););
1189  ASSERT_NO_THROW(run_ddl_statement("drop table if exists geospatial;"););
1190  }
1191 };
1192 
1193 TEST_F(GeoImportTest, CSV_Import) {
1194  const auto file_path =
1195  boost::filesystem::path("../../Tests/Import/datafiles/geospatial.csv");
1196  run_ddl_statement("COPY geospatial FROM '" + file_path.string() + "';");
1197  check_geo_import();
1198  check_geo_num_rows("p1, l, poly, mpoly, p2, p3, p4, trip_distance", 10);
1199 }
1200 
1201 TEST_F(GeoImportTest, CSV_Import_Empties) {
1202  const auto file_path =
1203  boost::filesystem::path("../../Tests/Import/datafiles/geospatial_empties.csv");
1204  run_ddl_statement("COPY geospatial FROM '" + file_path.string() + "';");
1205  check_geo_import();
1206  check_geo_num_rows("p1, l, poly, mpoly, p2, p3, p4, trip_distance",
1207  6); // we expect it to drop the 4 rows containing 'EMPTY'
1208 }
1209 
1210 TEST_F(GeoImportTest, CSV_Import_Degenerate) {
1211  const auto file_path =
1212  boost::filesystem::path("../../Tests/Import/datafiles/geospatial_degenerate.csv");
1213  run_ddl_statement("COPY geospatial FROM '" + file_path.string() + "';");
1214  check_geo_import();
1215  check_geo_num_rows("p1, l, poly, mpoly, p2, p3, p4, trip_distance",
1216  6); // we expect it to drop the 4 rows containing degenerate polys
1217 }
1218 
1219 // the remaining tests in this group are incomplete but leave them as placeholders
1220 
1221 TEST_F(GeoImportTest, Geo_CSV_Local_Type_Geometry) {
1222  EXPECT_TRUE(
1223  import_test_local_geo("geospatial.csv", ", geo_coords_type='geometry'", 10, 4.5));
1224 }
1225 
1226 TEST_F(GeoImportTest, Geo_CSV_Local_Type_Geography) {
1227  EXPECT_THROW(
1228  import_test_local_geo("geospatial.csv", ", geo_coords_type='geography'", 10, 4.5),
1229  std::runtime_error);
1230 }
1231 
1232 TEST_F(GeoImportTest, Geo_CSV_Local_Type_Other) {
1233  EXPECT_THROW(
1234  import_test_local_geo("geospatial.csv", ", geo_coords_type='other'", 10, 4.5),
1235  std::runtime_error);
1236 }
1237 
1238 TEST_F(GeoImportTest, Geo_CSV_Local_Encoding_NONE) {
1239  EXPECT_TRUE(
1240  import_test_local_geo("geospatial.csv", ", geo_coords_encoding='none'", 10, 4.5));
1241 }
1242 
1243 TEST_F(GeoImportTest, Geo_CSV_Local_Encoding_GEOINT32) {
1244  EXPECT_TRUE(import_test_local_geo(
1245  "geospatial.csv", ", geo_coords_encoding='compressed(32)'", 10, 4.5));
1246 }
1247 
1248 TEST_F(GeoImportTest, Geo_CSV_Local_Encoding_Other) {
1249  EXPECT_THROW(
1250  import_test_local_geo("geospatial.csv", ", geo_coords_encoding='other'", 10, 4.5),
1251  std::runtime_error);
1252 }
1253 
1254 TEST_F(GeoImportTest, Geo_CSV_Local_SRID_LonLat) {
1255  EXPECT_TRUE(import_test_local_geo("geospatial.csv", ", geo_coords_srid=4326", 10, 4.5));
1256 }
1257 
1258 TEST_F(GeoImportTest, Geo_CSV_Local_SRID_Mercator) {
1259  EXPECT_TRUE(
1260  import_test_local_geo("geospatial.csv", ", geo_coords_srid=900913", 10, 4.5));
1261 }
1262 
1263 TEST_F(GeoImportTest, Geo_CSV_Local_SRID_Other) {
1264  EXPECT_THROW(
1265  import_test_local_geo("geospatial.csv", ", geo_coords_srid=12345", 10, 4.5),
1266  std::runtime_error);
1267 }
1268 
1269 class GeoGDALImportTest : public ::testing::Test {
1270  protected:
1271  void SetUp() override {
1272  ASSERT_NO_THROW(run_ddl_statement("drop table if exists geospatial;"););
1273  }
1274 
1275  void TearDown() override {
1276  ASSERT_NO_THROW(run_ddl_statement("drop table if exists geospatial;"););
1277  }
1278 };
1279 
1280 TEST_F(GeoGDALImportTest, Geojson_Point_Import) {
1282  const auto file_path =
1283  boost::filesystem::path("geospatial_point/geospatial_point.geojson");
1284  import_test_geofile_importer(file_path.string(), "geospatial", false);
1286 }
1287 
1288 TEST_F(GeoGDALImportTest, Geojson_MultiPolygon_Import) {
1290  const auto file_path =
1291  boost::filesystem::path("geospatial_mpoly/geospatial_mpoly.geojson");
1292  import_test_geofile_importer(file_path.string(), "geospatial", false);
1294  check_geo_num_rows("omnisci_geo, trip", 10);
1295 }
1296 
1297 TEST_F(GeoGDALImportTest, Geojson_MultiPolygon_Import_Empties) {
1299  const auto file_path =
1300  boost::filesystem::path("geospatial_mpoly/geospatial_mpoly_empties.geojson");
1301  import_test_geofile_importer(file_path.string(), "geospatial", false);
1303  check_geo_num_rows("omnisci_geo, trip", 8); // we expect it to drop 2 of the 10 rows
1304 }
1305 
1306 TEST_F(GeoGDALImportTest, Geojson_MultiPolygon_Import_Degenerate) {
1308  const auto file_path =
1309  boost::filesystem::path("geospatial_mpoly/geospatial_mpoly_degenerate.geojson");
1310  import_test_geofile_importer(file_path.string(), "geospatial", false);
1312  check_geo_num_rows("omnisci_geo, trip", 8); // we expect it to drop 2 of the 10 rows
1313 }
1314 
1315 TEST_F(GeoGDALImportTest, Shapefile_Point_Import) {
1317  const auto file_path = boost::filesystem::path("geospatial_point/geospatial_point.shp");
1318  import_test_geofile_importer(file_path.string(), "geospatial", false);
1320 }
1321 
1322 TEST_F(GeoGDALImportTest, Shapefile_MultiPolygon_Import) {
1324  const auto file_path = boost::filesystem::path("geospatial_mpoly/geospatial_mpoly.shp");
1325  import_test_geofile_importer(file_path.string(), "geospatial", false);
1327 }
1328 
1329 TEST_F(GeoGDALImportTest, Shapefile_Point_Import_Compressed) {
1331  const auto file_path = boost::filesystem::path("geospatial_point/geospatial_point.shp");
1332  import_test_geofile_importer(file_path.string(), "geospatial", true);
1334 }
1335 
1336 TEST_F(GeoGDALImportTest, Shapefile_MultiPolygon_Import_Compressed) {
1338  const auto file_path = boost::filesystem::path("geospatial_mpoly/geospatial_mpoly.shp");
1339  import_test_geofile_importer(file_path.string(), "geospatial", true);
1341 }
1342 
1343 TEST_F(GeoGDALImportTest, Shapefile_Point_Import_3857) {
1345  const auto file_path =
1346  boost::filesystem::path("geospatial_point/geospatial_point_3857.shp");
1347  import_test_geofile_importer(file_path.string(), "geospatial", false);
1349 }
1350 
1351 TEST_F(GeoGDALImportTest, Shapefile_MultiPolygon_Import_3857) {
1353  const auto file_path =
1354  boost::filesystem::path("geospatial_mpoly/geospatial_mpoly_3857.shp");
1355  import_test_geofile_importer(file_path.string(), "geospatial", false);
1357 }
1358 
1359 TEST_F(GeoGDALImportTest, Geojson_MultiPolygon_Append) {
1361  const auto file_path =
1362  boost::filesystem::path("geospatial_mpoly/geospatial_mpoly.geojson");
1363  import_test_geofile_importer(file_path.string(), "geospatial", false);
1364  check_geo_num_rows("omnisci_geo, trip", 10);
1365  ASSERT_NO_THROW(
1366  import_test_geofile_importer(file_path.string(), "geospatial", false, false));
1367  check_geo_num_rows("omnisci_geo, trip", 20);
1368 }
1369 
1370 TEST_F(GeoGDALImportTest, Geodatabase_Simple) {
1372  const auto file_path =
1373  boost::filesystem::path("geodatabase/S_USA.Experimental_Area_Locations.gdb.zip");
1374  import_test_geofile_importer(file_path.string(), "geospatial", false);
1375  check_geo_num_rows("omnisci_geo, ESTABLISHED", 87);
1376 }
1377 
1381  LOG(ERROR) << "Test requires LibKML support in GDAL";
1382  } else {
1383  const auto file_path = boost::filesystem::path("KML/test.kml");
1384  import_test_geofile_importer(file_path.string(), "geospatial", false);
1385  check_geo_num_rows("omnisci_geo, FID", 10);
1386  }
1387 }
1388 
1389 #ifdef HAVE_AWS_S3
1390 // s3 compressed (non-parquet) test cases
1391 TEST_F(ImportTest, S3_One_csv_file) {
1392  EXPECT_TRUE(import_test_s3_compressed("trip_data_9.csv", 100, 1.0));
1393 }
1394 
1395 TEST_F(ImportTest, S3_One_gz_file) {
1396  EXPECT_TRUE(import_test_s3_compressed("trip_data_9.gz", 100, 1.0));
1397 }
1398 
1399 TEST_F(ImportTest, S3_One_bz2_file) {
1400  EXPECT_TRUE(import_test_s3_compressed("trip_data_9.bz2", 100, 1.0));
1401 }
1402 
1403 TEST_F(ImportTest, S3_One_tar_with_many_csv_files) {
1404  EXPECT_TRUE(import_test_s3_compressed("trip_data.tar", 1000, 1.0));
1405 }
1406 
1407 TEST_F(ImportTest, S3_One_tgz_with_many_csv_files) {
1408  EXPECT_TRUE(import_test_s3_compressed("trip_data.tgz", 100000, 1.0));
1409 }
1410 
1411 TEST_F(ImportTest, S3_One_rar_with_many_csv_files) {
1412  EXPECT_TRUE(import_test_s3_compressed("trip_data.rar", 1000, 1.0));
1413 }
1414 
1415 TEST_F(ImportTest, S3_One_zip_with_many_csv_files) {
1416  EXPECT_TRUE(import_test_s3_compressed("trip_data.zip", 1000, 1.0));
1417 }
1418 
1419 TEST_F(ImportTest, S3_One_7z_with_many_csv_files) {
1420  EXPECT_TRUE(import_test_s3_compressed("trip_data.7z", 1000, 1.0));
1421 }
1422 
1423 TEST_F(ImportTest, S3_All_files) {
1424  EXPECT_TRUE(import_test_s3_compressed("", 105200, 1.0));
1425 }
1426 
1427 TEST_F(ImportTest, S3_GCS_One_gz_file) {
1428  EXPECT_TRUE(import_test_common(
1429  std::string(
1430  "COPY trips FROM 's3://omnisci-importtest-data/trip-data/trip_data_9.gz' "
1431  "WITH (header='true', s3_endpoint='storage.googleapis.com');"),
1432  100,
1433  1.0));
1434 }
1435 
1436 TEST_F(ImportTest, S3_GCS_One_geo_file) {
1437  EXPECT_TRUE(
1438  import_test_common_geo("COPY geo FROM "
1439  "'s3://omnisci-importtest-data/geo-data/"
1440  "S_USA.Experimental_Area_Locations.gdb.zip' "
1441  "WITH (geo='true', s3_endpoint='storage.googleapis.com');",
1442  "geo",
1443  87,
1444  1.0));
1445 }
1446 #endif // HAVE_AWS_S3
1447 } // namespace
1448 
1449 int main(int argc, char** argv) {
1450  testing::InitGoogleTest(&argc, argv);
1451 
1452  namespace po = boost::program_options;
1453 
1454  po::options_description desc("Options");
1455 
1456  // these two are here to allow passing correctly google testing parameters
1457  desc.add_options()("gtest_list_tests", "list all tests");
1458  desc.add_options()("gtest_filter", "filters tests, use --help for details");
1459 
1460  desc.add_options()(
1461  "test-help",
1462  "Print all ImportTest specific options (for gtest options use `--help`).");
1463 
1464  logger::LogOptions log_options(argv[0]);
1465  log_options.max_files_ = 0; // stderr only by default
1466  desc.add(log_options.get_options());
1467 
1468  po::variables_map vm;
1469  po::store(po::command_line_parser(argc, argv).options(desc).run(), vm);
1470  po::notify(vm);
1471 
1472  if (vm.count("test-help")) {
1473  std::cout << "Usage: ImportTest" << std::endl << std::endl;
1474  std::cout << desc << std::endl;
1475  return 0;
1476  }
1477 
1478  logger::init(log_options);
1479 
1481 
1482  int err{0};
1483  try {
1484  err = RUN_ALL_TESTS();
1485  } catch (const std::exception& e) {
1486  LOG(ERROR) << e.what();
1487  }
1488  QR::reset();
1489  return err;
1490 }
int main(int argc, char **argv)
#define CHECK_EQ(x, y)
Definition: Logger.h:195
std::string convert_date_to_string(int64_t d)
Definition: ImportTest.cpp:536
void compare_geo_target(const TargetValue &r, const T &geo_truth_target, const double tol=-1.)
Definition: TestHelpers.h:127
void d(const SQLTypes expected_type, const std::string &str)
Definition: ImportTest.cpp:268
void import_test_geofile_importer(const std::string &file_str, const std::string &table_name, const bool compression, const bool create_table=true)
Definition: ImportTest.cpp:146
void check_geo_num_rows(const std::string &project_columns, const size_t num_expected_rows)
Definition: sqltypes.h:51
SQLTypes
Definition: sqltypes.h:40
bool import_test_local(const string &filename, const int64_t cnt, const double avg)
Definition: ImportTest.cpp:163
static SQLTypes detect_sqltype(const std::string &str)
Definition: Importer.cpp:2768
#define LOG(tag)
Definition: Logger.h:182
std::shared_ptr< ResultSet > run_query(const string &query_str)
Definition: ImportTest.cpp:66
void check_minisort_on_expects(const std::string &table_name, const std::vector< int > &expects)
Definition: ImportTest.cpp:340
void get_geo_copy_from_payload(std::string &geo_copy_from_table, std::string &geo_copy_from_file_name, Importer_NS::CopyParams &geo_copy_from_copy_params, std::string &geo_copy_from_partitions)
Definition: ParserNode.h:1297
size_t g_leaf_count
Definition: ParserNode.cpp:63
void test_minisort_on_column(const std::string &column_name, const std::vector< int > expects)
Definition: ImportTest.cpp:351
boost::program_options::options_description const & get_options() const
Definition: Logger.cpp:112
TEST_F(GeoGDALImportTest, KML_Simple)
void create_minisort_table_on_column(const std::string &column_name)
Definition: ImportTest.cpp:331
const char * create_table_trips_dict_sharded_text_8bit
void execute(const Catalog_Namespace::SessionInfo &session) override
std::string to_string(char const *&&v)
bool g_aggregator
Definition: ExecuteTest.cpp:46
static QueryRunner * init(const char *db_path, const std::string &udf_filename="", const size_t max_gpu_mem=0, const int reserved_gpu_mem=256<< 20)
Definition: QueryRunner.h:70
virtual std::shared_ptr< ResultSet > runSQL(const std::string &query_str, const ExecutorDeviceType device_type, const bool hoist_literals=true, const bool allow_loop_joins=true)
std::string get_type_name() const
Definition: sqltypes.h:422
void decode_str_array(const TargetValue &r, std::vector< std::string > &arr)
Definition: ImportTest.cpp:624
#define SKIP_ALL_ON_AGGREGATOR()
Definition: ImportTest.cpp:52
static bool hasGDALLibKML()
Definition: Importer.cpp:4013
void importGeoTable(const std::string &file_path, const std::string &table_name, const bool compression=true, const bool create_table=true)
Definition: Importer.cpp:4947
void init(LogOptions const &log_opts)
Definition: Logger.cpp:260
bool import_test_local_geo(const string &filename, const string &other_options, const int64_t cnt, const double avg)
Definition: ImportTest.cpp:173
virtual void runDDLStatement(const std::string &)
SQLTypeInfoCore< ArrayContextTypeSizer, ExecutorTypePackaging, DateTimeFacilities > SQLTypeInfo
Definition: sqltypes.h:819
#define BASE_PATH
Definition: ImportTest.cpp:37
static QueryRunner * get()
Definition: QueryRunner.h:115
Definition: sqltypes.h:54
Definition: sqltypes.h:55
bool import_test_common(const string &query_str, const int64_t cnt, const double avg)
Definition: ImportTest.cpp:94
Catalog_Namespace::UserMetadata get_user_metadata(const Catalog_Namespace::SessionInfo *session)
bool compare_agg(const int64_t cnt, const double avg)
Definition: ImportTest.cpp:70
#define CHECK(condition)
Definition: Logger.h:187
bool was_geo_copy_from() const
Definition: ParserNode.h:1295
bool import_test_common_geo(const string &query_str, const std::string &table, const int64_t cnt, const double avg)
Definition: ImportTest.cpp:99
boost::variant< ScalarTargetValue, ArrayTargetValue, GeoTargetValue, GeoTargetValuePtr > TargetValue
Definition: TargetValue.h:167
bool g_use_date_in_days_default_encoding
Definition: ParserNode.cpp:64
size_t max_files_
Definition: Logger.h:117
Definition: sqltypes.h:47
static bool run
std::string TypeToString(SQLTypes type)
Definition: ImportTest.cpp:264
void run_ddl_statement(std::string ddl)
void create_minisort_table_on_column_with_ctas(const std::string &column_name)
Definition: ImportTest.cpp:357
void test_minisort_on_column_with_ctas(const std::string &column_name, const std::vector< int > expects)
Definition: ImportTest.cpp:364
std::string convert_timestamp_to_string(const time_t timeval, const int dimen)
Definition: ImportTest.cpp:743