OmniSciDB  04ee39c94c
ComputeMetadataTest.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2019 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "TestHelpers.h"
18 
19 #include "../Catalog/Catalog.h"
20 #include "../QueryEngine/Execute.h"
21 #include "../QueryEngine/TableOptimizer.h"
22 #include "../QueryRunner/QueryRunner.h"
23 
24 #include <gtest/gtest.h>
25 #include <string>
26 #include <utility>
27 
28 #ifndef BASE_PATH
29 #define BASE_PATH "./tmp"
30 #endif
31 
33 
34 namespace {
35 
36 inline void run_ddl_statement(const std::string& stmt) {
37  QR::get()->runDDLStatement(stmt);
38 }
39 
40 std::shared_ptr<ResultSet> run_multiple_agg(const std::string& query_str,
41  const ExecutorDeviceType device_type) {
42  return QR::get()->runSQL(query_str, device_type, false, false);
43 }
44 
45 #define ASSERT_METADATA(type, tag) \
46  template <typename T, bool enabled = std::is_same<T, type>::value> \
47  void assert_metadata(const ChunkStats& chunkStats, \
48  const T min, \
49  const T max, \
50  const bool has_nulls, \
51  const std::enable_if_t<enabled, type>* = 0) { \
52  ASSERT_EQ(chunkStats.min.tag##val, min); \
53  ASSERT_EQ(chunkStats.max.tag##val, max); \
54  ASSERT_EQ(chunkStats.has_nulls, has_nulls); \
55  }
56 
57 ASSERT_METADATA(bool, bool)
58 ASSERT_METADATA(int8_t, tinyint)
59 ASSERT_METADATA(int16_t, smallint)
60 ASSERT_METADATA(int32_t, int)
61 ASSERT_METADATA(int64_t, bigint)
62 ASSERT_METADATA(float, float)
63 ASSERT_METADATA(double, double)
64 
65 template <typename T, typename... Args>
66 void check_column_metadata_impl(const std::map<int, ChunkMetadata>& metadata_map,
67  const int column_idx, // -1 is $deleted
68  const T min,
69  const T max,
70  const bool has_nulls) {
71  auto chunk_metadata_itr = metadata_map.find(column_idx);
72  if (column_idx < 0) {
73  chunk_metadata_itr--;
74  }
75  CHECK(chunk_metadata_itr != metadata_map.end());
76  const auto& chunk_metadata = chunk_metadata_itr->second;
77  assert_metadata<T>(chunk_metadata.chunkStats, min, max, has_nulls);
78 }
79 
80 template <typename T, typename... Args>
81 void check_column_metadata_impl(const std::map<int, ChunkMetadata>& metadata_map,
82  const int column_idx, // -1 is $deleted
83  const T min,
84  const T max,
85  const bool has_nulls,
86  Args&&... args) {
87  check_column_metadata_impl(metadata_map, column_idx, min, max, has_nulls);
88  using T1 = typename std::tuple_element<1, std::tuple<Args...>>::type;
89  check_column_metadata_impl<T1>(metadata_map, std::forward<Args>(args)...);
90 }
91 
92 template <typename... Args>
94  [](const Fragmenter_Namespace::FragmentInfo& fragment, Args&&... args) {
95  const auto metadata_map = fragment.getChunkMetadataMapPhysical();
96  using T = typename std::tuple_element<1, std::tuple<Args...>>::type;
97  check_column_metadata_impl<T>(metadata_map, std::forward<Args>(args)...);
98  };
99 
100 template <typename... Args>
101 auto check_fragment_metadata(Args&&... args) -> auto {
102  static_assert(sizeof...(Args) % 4 == 0,
103  "check_fragment_metadata expects arguments to be a multiple of 4");
104  return std::make_tuple(check_column_metadata<Args...>,
105  std::make_tuple<Args...>(std::move(args)...));
106 }
107 
108 template <typename FUNC, typename... Args>
109 void run_op_per_fragment(const TableDescriptor* td, FUNC f, Args&&... args) {
110  const auto shards = QR::get()->getCatalog()->getPhysicalTablesDescriptors(td);
111  for (const auto shard : shards) {
112  auto* fragmenter = shard->fragmenter;
113  CHECK(fragmenter);
114  const auto table_info = fragmenter->getFragmentsForQuery();
115  for (const auto& fragment : table_info.fragments) {
116  f(fragment, std::forward<Args>(args)...);
117  }
118  }
119 }
120 
121 template <typename FUNC, typename... Args, std::size_t... Is>
123  FUNC f,
124  std::tuple<Args...> tuple,
125  std::index_sequence<Is...>) {
126  run_op_per_fragment(td, f, std::forward<Args>(std::get<Is>(tuple))...);
127 }
128 
129 template <typename FUNC, typename... Args>
131  std::tuple<FUNC, std::tuple<Args...>> tuple) {
133  td, std::get<0>(tuple), std::get<1>(tuple), std::index_sequence_for<Args...>{});
134 }
135 
137  const Catalog_Namespace::Catalog& cat) {
138  auto executor = Executor::getExecutor(cat.getCurrentDB().dbId);
139  TableOptimizer optimizer(td, executor.get(), cat);
140  EXPECT_NO_THROW(optimizer.recomputeMetadata());
141 }
142 
144  const Catalog_Namespace::Catalog& cat) {
145  auto executor = Executor::getExecutor(cat.getCurrentDB().dbId);
146  TableOptimizer optimizer(td, executor.get(), cat);
147  EXPECT_NO_THROW(optimizer.vacuumDeletedRows());
148  EXPECT_NO_THROW(optimizer.recomputeMetadata());
149 }
150 
151 static const std::string g_table_name{"metadata_test"};
152 
153 } // namespace
154 
155 class MultiFragMetadataUpdate : public ::testing::Test {
156  void SetUp() override {
157  EXPECT_NO_THROW(run_ddl_statement("DROP TABLE IF EXISTS " + g_table_name + ";"));
158  EXPECT_NO_THROW(run_ddl_statement(
159  "CREATE TABLE " + g_table_name +
160  " (x INT, y INT NOT NULL, z INT "
161  "ENCODING FIXED(8), a DOUBLE, b FLOAT, d DATE, dd DATE "
162  "ENCODING FIXED(16), c TEXT ENCODING DICT(32)) WITH (FRAGMENT_SIZE=4);"));
163 
165 
166  for (int i = 0; i < 5; i++) {
167  std::string date_str = i % 2 == 0 ? "'1/1/2019'" : "'2/2/2020'";
168  const auto insert_query =
169  gen(i, i, i, i * 1.1, i * 1.2, date_str, date_str, "'foo'");
171  }
172 
173  for (int i = 0; i < 5; i++) {
174  std::string date_str = i % 2 == 0 ? "'5/30/2021'" : "'6/30/2022'";
175  const int multiplier = i % 2 == 0 ? -1 : 1;
176  const auto insert_query = gen(multiplier * i,
177  multiplier * i,
178  multiplier * i,
179  std::to_string(multiplier * i * 1.1),
180  multiplier * i * 1.2,
181  date_str,
182  date_str,
183  "'bar'");
185  }
186 
187  for (size_t i = 6; i < 11; i++) {
188  std::string insert_query;
189  if (i % 2 == 0) {
190  insert_query = gen(i, i, i, i * 1.1, i * 1.2, "null", "null", "'hello'");
191  } else {
192  insert_query = gen("null",
193  std::numeric_limits<int32_t>::min(),
194  "null",
195  "null",
196  "null",
197  "'10/11/1981'",
198  "'10/11/1981'",
199  "'world'");
200  }
201 
203  }
204 
205  for (int i = 0; i < 5; i++) {
206  const auto insert_query = gen("null",
207  std::numeric_limits<int32_t>::max(),
208  "null",
209  "null",
210  "null",
211  "null",
212  "null",
213  "null");
215  }
216  }
217 
218  void TearDown() override {
219  EXPECT_NO_THROW(run_ddl_statement("DROP TABLE IF EXISTS " + g_table_name + ";"));
220  }
221 };
222 
224  std::vector<std::map<int, ChunkMetadata>> metadata_for_fragments;
225  {
226  const auto cat = QR::get()->getCatalog();
227  const auto td = cat->getMetadataForTable(g_table_name, /*populateFragmenter=*/true);
228 
229  // Get chunk metadata before recomputing
230  auto store_original_metadata =
231  [&metadata_for_fragments](const Fragmenter_Namespace::FragmentInfo& fragment) {
232  metadata_for_fragments.push_back(fragment.getChunkMetadataMapPhysical());
233  };
234 
235  run_op_per_fragment(td, store_original_metadata);
236  recompute_metadata(td, *cat);
237  }
238 
239  // Make sure metadata matches after recomputing
240  {
241  const auto cat = QR::get()->getCatalog();
242  const auto td = cat->getMetadataForTable(g_table_name, /*populateFragmenter=*/true);
243 
244  auto* fragmenter = td->fragmenter;
245  CHECK(fragmenter);
246  const auto table_info = fragmenter->getFragmentsForQuery();
247 
248  size_t ctr = 0;
249  auto check_metadata_equality =
250  [&ctr,
251  &metadata_for_fragments](const Fragmenter_Namespace::FragmentInfo& fragment) {
252  ASSERT_LT(ctr, metadata_for_fragments.size());
253  ASSERT_TRUE(metadata_for_fragments[ctr++] ==
254  fragment.getChunkMetadataMapPhysical());
255  };
256  run_op_per_fragment(td, check_metadata_equality);
257  }
258 }
259 
260 template <int NSHARDS>
261 class MetadataUpdate : public ::testing::Test {
262  void SetUp() override {
263  std::string phrase_shard_key = NSHARDS > 1 ? ", SHARD KEY (skey)" : "";
264  std::string phrase_shard_count =
265  NSHARDS > 1 ? ", SHARD_COUNT = " + std::to_string(NSHARDS) : "";
266  EXPECT_NO_THROW(run_ddl_statement("DROP TABLE IF EXISTS " + g_table_name + ";"));
267  EXPECT_NO_THROW(run_ddl_statement(
268  "CREATE TABLE " + g_table_name +
269  " (x INT, y INT NOT NULL, z INT "
270  "ENCODING FIXED(8), a DOUBLE, b FLOAT, d DATE, dd DATE "
271  "ENCODING FIXED(16), c TEXT ENCODING DICT(32), skey int" +
272  phrase_shard_key + ") WITH (FRAGMENT_SIZE=5" + phrase_shard_count + ");"));
273 
275  for (int sh = 0; sh < NSHARDS; ++sh) {
276  run_multiple_agg(gen(1, 1, 1, 1, 1, "'1/1/2010'", "'1/1/2010'", "'foo'", sh),
278  run_multiple_agg(gen(2, 2, 2, 2, 2, "'12/31/2012'", "'12/31/2012'", "'foo'", sh),
281  gen("null", 2, "null", "null", "null", "null", "'1/1/1940'", "'foo'", sh),
283  }
284  }
285 
286  void TearDown() override {
287  EXPECT_NO_THROW(run_ddl_statement("DROP TABLE IF EXISTS " + g_table_name + ";"));
288  }
289 };
290 
293 
294 #define BODY_F(test_class, test_name) test_class##_##test_name##_body()
295 #define TEST_F1(test_class, test_name, sharded_or_not) \
296  TEST_F(test_class##_##sharded_or_not, test_name) { BODY_F(test_class, test_name); }
297 #define TEST_UNSHARDED_AND_SHARDED(test_class, test_name) \
298  TEST_F1(test_class, test_name, Unsharded) \
299  TEST_F1(test_class, test_name, Sharded)
300 
301 void BODY_F(MetadataUpdate, InitialMetadata) {
302  const auto cat = QR::get()->getCatalog();
303  const auto td = cat->getMetadataForTable(g_table_name, /*populateFragmenter=*/true);
304 
306  td,
308  // Check int col: expected range 1,2 nulls
309  /* id = */ 1,
310  /* min = */ 1,
311  /* max = 2 */ 2,
312  /* has_nulls = */ true,
313 
314  // Check int not null col: expected range 1,2 no nulls
315  2,
316  1,
317  2,
318  false,
319 
320  // Check int encoded call: expected range 1,2 nulls
321  3,
322  1,
323  2,
324  true,
325 
326  // Check double col: expected range 1.0,2.0 nulls
327  4,
328  (double)1.0,
329  2.0,
330  true,
331 
332  // Check float col: expected range 1.0,2.0 nulls
333  5,
334  (float)1.0,
335  2.0,
336  true,
337 
338  // Check date in days 32 col: expected range 1262304000,1356912000 nulls
339  6,
340  1262304000,
341  1356912000,
342  true,
343 
344  // Check date in days 16 col: expected range -946771200,1356912000 nulls
345  7,
346  -946771200,
347  1356912000,
348  false,
349 
350  // Check col c TEXT ENCODING DICT(32): expected range [0, 0]
351  8,
352  0,
353  0,
354  false));
355 }
356 
357 void BODY_F(MetadataUpdate, IntUpdate) {
358  const auto cat = QR::get()->getCatalog();
359  const auto td = cat->getMetadataForTable(g_table_name, /*populateFragmenter=*/true);
360 
361  run_multiple_agg("UPDATE " + g_table_name + " SET x = 3 WHERE x = 1;",
363 
364  // Check int col: expected range 1,3 nulls
365  run_op_per_fragment(td, check_fragment_metadata(1, (int32_t)1, 3, true));
366 
367  run_multiple_agg("UPDATE " + g_table_name + " SET x = 0 WHERE x = 3;",
369 
370  recompute_metadata(td, *cat);
371  // Check int col: expected range 1,2 nulls
372  run_op_per_fragment(td, check_fragment_metadata(1, (int32_t)0, 2, true));
373 }
374 
375 void BODY_F(MetadataUpdate, IntRemoveNull) {
376  const auto cat = QR::get()->getCatalog();
377  const auto td = cat->getMetadataForTable(g_table_name, /*populateFragmenter=*/true);
378 
379  run_multiple_agg("UPDATE " + g_table_name + " SET x = 3;", ExecutorDeviceType::CPU);
380 
381  recompute_metadata(td, *cat);
382  // Check int col: expected range 1,2 nulls
383  run_op_per_fragment(td, check_fragment_metadata(1, (int32_t)3, 3, false));
384 }
385 
386 void BODY_F(MetadataUpdate, NotNullInt) {
387  const auto cat = QR::get()->getCatalog();
388  const auto td = cat->getMetadataForTable(g_table_name, /*populateFragmenter=*/true);
389 
390  run_multiple_agg("UPDATE " + g_table_name + " SET y = " +
391  std::to_string(std::numeric_limits<int32_t>::lowest() + 1) +
392  " WHERE y = 1;",
394  // Check int col: expected range 1,3 nulls
396  td,
397  check_fragment_metadata(2, std::numeric_limits<int32_t>::lowest() + 1, 2, false));
398 
399  run_multiple_agg("UPDATE " + g_table_name + " SET y = 1;", ExecutorDeviceType::CPU);
400 
401  recompute_metadata(td, *cat);
402  run_op_per_fragment(td, check_fragment_metadata(2, (int32_t)1, 1, false));
403 }
404 
405 void BODY_F(MetadataUpdate, DateNarrowRange) {
406  const auto cat = QR::get()->getCatalog();
407  const auto td = cat->getMetadataForTable(g_table_name, /*populateFragmenter=*/true);
408 
409  run_multiple_agg("UPDATE " + g_table_name + " SET d = '1/1/2010';",
411 
412  recompute_metadata(td, *cat);
413  // Check date in days 32 col: expected range 1262304000,1262304000 nulls
415  check_fragment_metadata(6, (int64_t)1262304000, 1262304000, false));
416 }
417 
418 void BODY_F(MetadataUpdate, SmallDateNarrowMin) {
419  const auto cat = QR::get()->getCatalog();
420  const auto td = cat->getMetadataForTable(g_table_name, /*populateFragmenter=*/true);
421 
423  "UPDATE " + g_table_name + " SET dd = '1/1/2010' WHERE dd = '1/1/1940';",
425 
426  recompute_metadata(td, *cat);
428  check_fragment_metadata(7, (int64_t)1262304000, 1356912000, false));
429 }
430 
431 void BODY_F(MetadataUpdate, SmallDateNarrowMax) {
432  const auto cat = QR::get()->getCatalog();
433  const auto td = cat->getMetadataForTable(g_table_name, /*populateFragmenter=*/true);
434 
436  "UPDATE " + g_table_name + " SET dd = '1/1/2010' WHERE dd = '12/31/2012';",
438 
439  recompute_metadata(td, *cat);
441  check_fragment_metadata(7, (int64_t)-946771200, 1262304000, false));
442 }
443 
444 void BODY_F(MetadataUpdate, DeleteReset) {
445  const auto cat = QR::get()->getCatalog();
446  const auto td = cat->getMetadataForTable(g_table_name, /*populateFragmenter=*/true);
447 
448  run_multiple_agg("DELETE FROM " + g_table_name + " WHERE dd = '12/31/2012';",
450  run_op_per_fragment(td, check_fragment_metadata(-1, false, true, false));
451 
453  run_op_per_fragment(td, check_fragment_metadata(-1, false, false, false));
454 }
455 
456 void BODY_F(MetadataUpdate, EncodedStringNull) {
457  const auto cat = QR::get()->getCatalog();
458  const auto td = cat->getMetadataForTable(g_table_name, /*populateFragmenter=*/true);
459 
461  for (int sh = 0; sh < std::max(1, td->nShards); ++sh) {
462  run_multiple_agg(gen(1, 1, 1, 1, 1, "'1/1/2010'", "'1/1/2010'", "'abc'", sh),
464  }
466  run_op_per_fragment(td, check_fragment_metadata(8, 0, 1, false));
467 
468  for (int sh = 0; sh < std::max(1, td->nShards); ++sh) {
469  run_multiple_agg(gen(1, 1, 1, 1, 1, "'1/1/2010'", "'1/1/2010'", "null", sh),
471  }
473  run_op_per_fragment(td, check_fragment_metadata(8, 0, 1, true));
474 }
475 
476 void BODY_F(MetadataUpdate, AlterAfterOptimize) {
477  const auto cat = QR::get()->getCatalog();
478  const auto td = cat->getMetadataForTable(g_table_name, /*populateFragmenter=*/true);
479  run_op_per_fragment(td, check_fragment_metadata(1, 1, 2, true));
480  run_multiple_agg("DELETE FROM " + g_table_name + " WHERE x IS NULL;",
483  run_op_per_fragment(td, check_fragment_metadata(1, 1, 2, false));
484  // test ADD one column
485  EXPECT_NO_THROW(
486  run_ddl_statement("ALTER TABLE " + g_table_name + " ADD (c99 int default 99);"));
487  run_op_per_fragment(td, check_fragment_metadata(12, 99, 99, false));
488  // test ADD multiple columns
489  EXPECT_NO_THROW(run_ddl_statement("ALTER TABLE " + g_table_name +
490  " ADD (c88 int default 88, cnn int);"));
491  run_op_per_fragment(td, check_fragment_metadata(13, 88, 88, false));
494  std::numeric_limits<int32_t>::max(),
495  std::numeric_limits<int32_t>::lowest(),
496  true));
497 }
498 
499 void BODY_F(MetadataUpdate, AlterAfterEmptied) {
500  const auto cat = QR::get()->getCatalog();
501  const auto td = cat->getMetadataForTable(g_table_name, /*populateFragmenter=*/true);
502  run_multiple_agg("DELETE FROM " + g_table_name + ";", ExecutorDeviceType::CPU);
506  std::numeric_limits<int32_t>::max(),
507  std::numeric_limits<int32_t>::lowest(),
508  false));
509  // test ADD one column to make sure column is added even if no row exists
510  EXPECT_NO_THROW(
511  run_ddl_statement("ALTER TABLE " + g_table_name + " ADD (c99 int default 99);"));
514  std::numeric_limits<int32_t>::max(),
515  std::numeric_limits<int32_t>::lowest(),
516  true));
517  // test ADD multiple columns
518  EXPECT_NO_THROW(run_ddl_statement("ALTER TABLE " + g_table_name +
519  " ADD (c88 int default 88, cnn int);"));
520  run_op_per_fragment(td, check_fragment_metadata(13, 88, 88, false));
523  std::numeric_limits<int32_t>::max(),
524  std::numeric_limits<int32_t>::lowest(),
525  true));
526 }
527 TEST_UNSHARDED_AND_SHARDED(MetadataUpdate, AlterAfterEmptied)
528 TEST_UNSHARDED_AND_SHARDED(MetadataUpdate, AlterAfterOptimize)
534 TEST_UNSHARDED_AND_SHARDED(MetadataUpdate, SmallDateNarrowMin)
535 TEST_UNSHARDED_AND_SHARDED(MetadataUpdate, SmallDateNarrowMax)
537 TEST_UNSHARDED_AND_SHARDED(MetadataUpdate, EncodedStringNull)
538 
539 int main(int argc, char** argv) {
541  testing::InitGoogleTest(&argc, argv);
542 
544 
545  int err{0};
546  try {
547  err = RUN_ALL_TESTS();
548  } catch (const std::exception& e) {
549  LOG(ERROR) << e.what();
550  }
551  QR::reset();
552  return err;
553 }
void TearDown() override
static std::shared_ptr< Executor > getExecutor(const int db_id, const std::string &debug_dir="", const std::string &debug_file="", const MapDParameters mapd_parameters=MapDParameters(), ::QueryRenderer::QueryRenderManager *render_manager=nullptr)
Definition: Execute.cpp:122
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:81
#define TEST_UNSHARDED_AND_SHARDED(test_class, test_name)
ExecutorDeviceType
void check_column_metadata_impl(const std::map< int, ChunkMetadata > &metadata_map, const int column_idx, const T min, const T max, const bool has_nulls, Args &&... args)
#define LOG(tag)
Definition: Logger.h:182
Driver for running cleanup processes on a table. TableOptimizer provides functions for various cleanu...
void SetUp() override
const std::map< int, ChunkMetadata > & getChunkMetadataMapPhysical() const
Definition: Fragmenter.h:102
std::string to_string(char const *&&v)
static QueryRunner * init(const char *db_path, const std::string &udf_filename="", const size_t max_gpu_mem=0, const int reserved_gpu_mem=256<< 20)
Definition: QueryRunner.h:70
virtual std::shared_ptr< ResultSet > runSQL(const std::string &query_str, const ExecutorDeviceType device_type, const bool hoist_literals=true, const bool allow_loop_joins=true)
#define BODY_F(test_class, test_name)
Used by Fragmenter classes to store info about each fragment - the fragment id and number of tuples(r...
Definition: Fragmenter.h:79
virtual void runDDLStatement(const std::string &)
const DBMetadata & getCurrentDB() const
Definition: Catalog.h:176
#define ASSERT_METADATA(type, tag)
#define BASE_PATH
std::shared_ptr< Catalog_Namespace::Catalog > getCatalog() const
int main(int argc, char **argv)
static QueryRunner * get()
Definition: QueryRunner.h:115
void run_op_per_fragment(const TableDescriptor *td, std::tuple< FUNC, std::tuple< Args... >> tuple)
void recompute_metadata(const TableDescriptor *td, const Catalog_Namespace::Catalog &cat)
TEST_F(MultiFragMetadataUpdate, NoChanges)
#define CHECK(condition)
Definition: Logger.h:187
void init_logger_stderr_only(int argc, char const *const *argv)
Definition: TestHelpers.h:194
specifies the content in-memory of a row in the table metadata table
void vacuum_and_recompute_metadata(const TableDescriptor *td, const Catalog_Namespace::Catalog &cat)
void run_ddl_statement(std::string ddl)
auto check_fragment_metadata(Args &&... args) -> auto
TQueryResult run_multiple_agg(std::string sql)