OmniSciDB  340b00dbf6
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Catalog.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
25 #include "Catalog/Catalog.h"
26 
27 #include <algorithm>
28 #include <boost/algorithm/string/predicate.hpp>
29 #include <boost/filesystem.hpp>
30 #include <boost/range/adaptor/map.hpp>
31 #include <boost/version.hpp>
32 #include <cassert>
33 #include <cerrno>
34 #include <cstdio>
35 #include <cstring>
36 #include <exception>
37 #include <fstream>
38 #include <list>
39 #include <memory>
40 #include <random>
41 #include <regex>
42 #include <sstream>
43 
44 #if BOOST_VERSION >= 106600
45 #include <boost/uuid/detail/sha1.hpp>
46 #else
47 #include <boost/uuid/sha1.hpp>
48 #endif
49 #include <rapidjson/document.h>
50 #include <rapidjson/istreamwrapper.h>
51 #include <rapidjson/ostreamwrapper.h>
52 #include <rapidjson/writer.h>
53 
54 #include "Catalog/SysCatalog.h"
55 
56 #include "QueryEngine/Execute.h"
58 
62 #include "Fragmenter/Fragmenter.h"
64 #include "LockMgr/LockMgr.h"
66 #include "Parser/ParserNode.h"
67 #include "QueryEngine/Execute.h"
69 #include "RefreshTimeCalculator.h"
70 #include "Shared/DateTimeParser.h"
71 #include "Shared/File.h"
72 #include "Shared/StringTransform.h"
73 #include "Shared/measure.h"
75 
76 #include "MapDRelease.h"
77 #include "RWLocks.h"
79 
80 using Chunk_NS::Chunk;
83 using std::list;
84 using std::map;
85 using std::pair;
86 using std::runtime_error;
87 using std::string;
88 using std::vector;
89 
91 bool g_enable_fsi{false};
92 extern bool g_cache_string_hash;
93 
94 // Serialize temp tables to a json file in the Catalogs directory for Calcite parsing
95 // under unit testing.
97 
98 namespace Catalog_Namespace {
99 
100 const int DEFAULT_INITIAL_VERSION = 1; // start at version 1
102  1073741824; // 2^30, give room for over a billion non-temp tables
104  1073741824; // 2^30, give room for over a billion non-temp dictionaries
105 
106 const std::string Catalog::physicalTableNameTag_("_shard_#");
107 std::map<std::string, std::shared_ptr<Catalog>> Catalog::mapd_cat_map_;
108 
109 thread_local bool Catalog::thread_holds_read_lock = false;
110 
115 
116 // migration will be done as two step process this release
117 // will create and use new table
118 // next release will remove old table, doing this to have fall back path
119 // incase of migration failure
122  sqliteConnector_.query("BEGIN TRANSACTION");
123  try {
125  "SELECT name FROM sqlite_master WHERE type='table' AND name='mapd_dashboards'");
126  if (sqliteConnector_.getNumRows() != 0) {
127  // already done
128  sqliteConnector_.query("END TRANSACTION");
129  return;
130  }
132  "CREATE TABLE mapd_dashboards (id integer primary key autoincrement, name text , "
133  "userid integer references mapd_users, state text, image_hash text, update_time "
134  "timestamp, "
135  "metadata text, UNIQUE(userid, name) )");
136  // now copy content from old table to new table
138  "insert into mapd_dashboards (id, name , "
139  "userid, state, image_hash, update_time , "
140  "metadata) "
141  "SELECT viewid , name , userid, view_state, image_hash, update_time, "
142  "view_metadata "
143  "from mapd_frontend_views");
144  } catch (const std::exception& e) {
145  sqliteConnector_.query("ROLLBACK TRANSACTION");
146  throw;
147  }
148  sqliteConnector_.query("END TRANSACTION");
149 }
150 
151 namespace {
152 
153 inline auto table_json_filepath(const std::string& base_path,
154  const std::string& db_name) {
155  return boost::filesystem::path(base_path + "/mapd_catalogs/" + db_name +
156  "_temp_tables.json");
157 }
158 
159 } // namespace
160 
161 Catalog::Catalog(const string& basePath,
162  const DBMetadata& curDB,
163  std::shared_ptr<Data_Namespace::DataMgr> dataMgr,
164  const std::vector<LeafHostInfo>& string_dict_hosts,
165  std::shared_ptr<Calcite> calcite,
166  bool is_new_db)
167  : basePath_(basePath)
168  , sqliteConnector_(curDB.dbName, basePath + "/mapd_catalogs/")
169  , currentDB_(curDB)
170  , dataMgr_(dataMgr)
171  , string_dict_hosts_(string_dict_hosts)
172  , calciteMgr_(calcite)
173  , nextTempTableId_(MAPD_TEMP_TABLE_START_ID)
174  , nextTempDictId_(MAPD_TEMP_DICT_START_ID)
175  , sqliteMutex_()
176  , sharedMutex_()
177  , thread_holding_sqlite_lock()
178  , thread_holding_write_lock() {
179  if (!is_new_db) {
181  }
182  buildMaps();
183  if (!is_new_db) {
185  }
187  boost::filesystem::remove(table_json_filepath(basePath_, currentDB_.dbName));
188  }
189 }
190 
193  // must clean up heap-allocated TableDescriptor and ColumnDescriptor structs
194  for (TableDescriptorMap::iterator tableDescIt = tableDescriptorMap_.begin();
195  tableDescIt != tableDescriptorMap_.end();
196  ++tableDescIt) {
197  tableDescIt->second->fragmenter = nullptr;
198  delete tableDescIt->second;
199  }
200 
201  // TableDescriptorMapById points to the same descriptors. No need to delete
202 
203  for (ColumnDescriptorMap::iterator columnDescIt = columnDescriptorMap_.begin();
204  columnDescIt != columnDescriptorMap_.end();
205  ++columnDescIt) {
206  delete columnDescIt->second;
207  }
208 
209  // ColumnDescriptorMapById points to the same descriptors. No need to delete
210 
212  boost::filesystem::remove(table_json_filepath(basePath_, currentDB_.dbName));
213  }
214 }
215 
218  sqliteConnector_.query("BEGIN TRANSACTION");
219  try {
220  sqliteConnector_.query("PRAGMA TABLE_INFO(mapd_tables)");
221  std::vector<std::string> cols;
222  for (size_t i = 0; i < sqliteConnector_.getNumRows(); i++) {
223  cols.push_back(sqliteConnector_.getData<std::string>(i, 1));
224  }
225  if (std::find(cols.begin(), cols.end(), std::string("max_chunk_size")) ==
226  cols.end()) {
227  string queryString("ALTER TABLE mapd_tables ADD max_chunk_size BIGINT DEFAULT " +
229  sqliteConnector_.query(queryString);
230  }
231  if (std::find(cols.begin(), cols.end(), std::string("shard_column_id")) ==
232  cols.end()) {
233  string queryString("ALTER TABLE mapd_tables ADD shard_column_id BIGINT DEFAULT " +
234  std::to_string(0));
235  sqliteConnector_.query(queryString);
236  }
237  if (std::find(cols.begin(), cols.end(), std::string("shard")) == cols.end()) {
238  string queryString("ALTER TABLE mapd_tables ADD shard BIGINT DEFAULT " +
239  std::to_string(-1));
240  sqliteConnector_.query(queryString);
241  }
242  if (std::find(cols.begin(), cols.end(), std::string("num_shards")) == cols.end()) {
243  string queryString("ALTER TABLE mapd_tables ADD num_shards BIGINT DEFAULT " +
244  std::to_string(0));
245  sqliteConnector_.query(queryString);
246  }
247  if (std::find(cols.begin(), cols.end(), std::string("key_metainfo")) == cols.end()) {
248  string queryString("ALTER TABLE mapd_tables ADD key_metainfo TEXT DEFAULT '[]'");
249  sqliteConnector_.query(queryString);
250  }
251  if (std::find(cols.begin(), cols.end(), std::string("userid")) == cols.end()) {
252  string queryString("ALTER TABLE mapd_tables ADD userid integer DEFAULT " +
254  sqliteConnector_.query(queryString);
255  }
256  if (std::find(cols.begin(), cols.end(), std::string("sort_column_id")) ==
257  cols.end()) {
259  "ALTER TABLE mapd_tables ADD sort_column_id INTEGER DEFAULT 0");
260  }
261  if (std::find(cols.begin(), cols.end(), std::string("storage_type")) == cols.end()) {
262  string queryString("ALTER TABLE mapd_tables ADD storage_type TEXT DEFAULT ''");
263  sqliteConnector_.query(queryString);
264  }
265  } catch (std::exception& e) {
266  sqliteConnector_.query("ROLLBACK TRANSACTION");
267  throw;
268  }
269  sqliteConnector_.query("END TRANSACTION");
270 }
271 
274  sqliteConnector_.query("BEGIN TRANSACTION");
275  try {
277  "select name from sqlite_master WHERE type='table' AND "
278  "name='mapd_version_history'");
279  if (sqliteConnector_.getNumRows() == 0) {
281  "CREATE TABLE mapd_version_history(version integer, migration_history text "
282  "unique)");
283  } else {
285  "select * from mapd_version_history where migration_history = "
286  "'notnull_fixlen_arrays'");
287  if (sqliteConnector_.getNumRows() != 0) {
288  // legacy fixlen arrays had migrated
289  // no need for further execution
290  sqliteConnector_.query("END TRANSACTION");
291  return;
292  }
293  }
294  // Insert check for migration
296  "INSERT INTO mapd_version_history(version, migration_history) values(?,?)",
297  std::vector<std::string>{std::to_string(MAPD_VERSION), "notnull_fixlen_arrays"});
298  LOG(INFO) << "Updating mapd_columns, legacy fixlen arrays";
299  // Upating all fixlen array columns
300  string queryString("UPDATE mapd_columns SET is_notnull=1 WHERE coltype=" +
301  std::to_string(kARRAY) + " AND size>0;");
302  sqliteConnector_.query(queryString);
303  } catch (std::exception& e) {
304  sqliteConnector_.query("ROLLBACK TRANSACTION");
305  throw;
306  }
307  sqliteConnector_.query("END TRANSACTION");
308 }
309 
312  sqliteConnector_.query("BEGIN TRANSACTION");
313  try {
315  "select name from sqlite_master WHERE type='table' AND "
316  "name='mapd_version_history'");
317  if (sqliteConnector_.getNumRows() == 0) {
319  "CREATE TABLE mapd_version_history(version integer, migration_history text "
320  "unique)");
321  } else {
323  "select * from mapd_version_history where migration_history = "
324  "'notnull_geo_columns'");
325  if (sqliteConnector_.getNumRows() != 0) {
326  // legacy geo columns had migrated
327  // no need for further execution
328  sqliteConnector_.query("END TRANSACTION");
329  return;
330  }
331  }
332  // Insert check for migration
334  "INSERT INTO mapd_version_history(version, migration_history) values(?,?)",
335  std::vector<std::string>{std::to_string(MAPD_VERSION), "notnull_geo_columns"});
336  LOG(INFO) << "Updating mapd_columns, legacy geo columns";
337  // Upating all geo columns
338  string queryString(
339  "UPDATE mapd_columns SET is_notnull=1 WHERE coltype=" + std::to_string(kPOINT) +
340  " OR coltype=" + std::to_string(kLINESTRING) + " OR coltype=" +
341  std::to_string(kPOLYGON) + " OR coltype=" + std::to_string(kMULTIPOLYGON) + ";");
342  sqliteConnector_.query(queryString);
343  } catch (std::exception& e) {
344  sqliteConnector_.query("ROLLBACK TRANSACTION");
345  throw;
346  }
347  sqliteConnector_.query("END TRANSACTION");
348 }
349 
352  sqliteConnector_.query("BEGIN TRANSACTION");
353  try {
354  // check table still exists
356  "SELECT name FROM sqlite_master WHERE type='table' AND "
357  "name='mapd_frontend_views'");
358  if (sqliteConnector_.getNumRows() == 0) {
359  // table does not exists
360  // no need to migrate
361  sqliteConnector_.query("END TRANSACTION");
362  return;
363  }
364  sqliteConnector_.query("PRAGMA TABLE_INFO(mapd_frontend_views)");
365  std::vector<std::string> cols;
366  for (size_t i = 0; i < sqliteConnector_.getNumRows(); i++) {
367  cols.push_back(sqliteConnector_.getData<std::string>(i, 1));
368  }
369  if (std::find(cols.begin(), cols.end(), std::string("image_hash")) == cols.end()) {
370  sqliteConnector_.query("ALTER TABLE mapd_frontend_views ADD image_hash text");
371  }
372  if (std::find(cols.begin(), cols.end(), std::string("update_time")) == cols.end()) {
373  sqliteConnector_.query("ALTER TABLE mapd_frontend_views ADD update_time timestamp");
374  }
375  if (std::find(cols.begin(), cols.end(), std::string("view_metadata")) == cols.end()) {
376  sqliteConnector_.query("ALTER TABLE mapd_frontend_views ADD view_metadata text");
377  }
378  } catch (std::exception& e) {
379  sqliteConnector_.query("ROLLBACK TRANSACTION");
380  throw;
381  }
382  sqliteConnector_.query("END TRANSACTION");
383 }
384 
387  sqliteConnector_.query("BEGIN TRANSACTION");
388  try {
390  "CREATE TABLE IF NOT EXISTS mapd_links (linkid integer primary key, userid "
391  "integer references mapd_users, "
392  "link text unique, view_state text, update_time timestamp, view_metadata text)");
393  sqliteConnector_.query("PRAGMA TABLE_INFO(mapd_links)");
394  std::vector<std::string> cols;
395  for (size_t i = 0; i < sqliteConnector_.getNumRows(); i++) {
396  cols.push_back(sqliteConnector_.getData<std::string>(i, 1));
397  }
398  if (std::find(cols.begin(), cols.end(), std::string("view_metadata")) == cols.end()) {
399  sqliteConnector_.query("ALTER TABLE mapd_links ADD view_metadata text");
400  }
401  } catch (const std::exception& e) {
402  sqliteConnector_.query("ROLLBACK TRANSACTION");
403  throw;
404  }
405  sqliteConnector_.query("END TRANSACTION");
406 }
407 
410  sqliteConnector_.query("BEGIN TRANSACTION");
411  try {
412  sqliteConnector_.query("UPDATE mapd_links SET userid = 0 WHERE userid IS NULL");
413  // check table still exists
415  "SELECT name FROM sqlite_master WHERE type='table' AND "
416  "name='mapd_frontend_views'");
417  if (sqliteConnector_.getNumRows() == 0) {
418  // table does not exists
419  // no need to migrate
420  sqliteConnector_.query("END TRANSACTION");
421  return;
422  }
424  "UPDATE mapd_frontend_views SET userid = 0 WHERE userid IS NULL");
425  } catch (const std::exception& e) {
426  sqliteConnector_.query("ROLLBACK TRANSACTION");
427  throw;
428  }
429  sqliteConnector_.query("END TRANSACTION");
430 }
431 
432 // introduce DB version into the tables table
433 // if the DB does not have a version reset all pagesizes to 2097152 to be compatible with
434 // old value
435 
438  if (currentDB_.dbName.length() == 0) {
439  // updateDictionaryNames dbName length is zero nothing to do here
440  return;
441  }
442  sqliteConnector_.query("BEGIN TRANSACTION");
443  try {
444  sqliteConnector_.query("PRAGMA TABLE_INFO(mapd_tables)");
445  std::vector<std::string> cols;
446  for (size_t i = 0; i < sqliteConnector_.getNumRows(); i++) {
447  cols.push_back(sqliteConnector_.getData<std::string>(i, 1));
448  }
449  if (std::find(cols.begin(), cols.end(), std::string("version_num")) == cols.end()) {
450  LOG(INFO) << "Updating mapd_tables updatePageSize";
451  // No version number
452  // need to update the defaul tpagesize to old correct value
453  sqliteConnector_.query("UPDATE mapd_tables SET frag_page_size = 2097152 ");
454  // need to add new version info
455  string queryString("ALTER TABLE mapd_tables ADD version_num BIGINT DEFAULT " +
457  sqliteConnector_.query(queryString);
458  }
459  } catch (std::exception& e) {
460  sqliteConnector_.query("ROLLBACK TRANSACTION");
461  throw;
462  }
463  sqliteConnector_.query("END TRANSACTION");
464 }
465 
468  sqliteConnector_.query("BEGIN TRANSACTION");
469  try {
470  sqliteConnector_.query("PRAGMA TABLE_INFO(mapd_columns)");
471  std::vector<std::string> cols;
472  for (size_t i = 0; i < sqliteConnector_.getNumRows(); i++) {
473  cols.push_back(sqliteConnector_.getData<std::string>(i, 1));
474  }
475  if (std::find(cols.begin(), cols.end(), std::string("version_num")) == cols.end()) {
476  LOG(INFO) << "Updating mapd_columns updateDeletedColumnIndicator";
477  // need to add new version info
478  string queryString("ALTER TABLE mapd_columns ADD version_num BIGINT DEFAULT " +
480  sqliteConnector_.query(queryString);
481  // need to add new column to table defintion to indicate deleted column, column used
482  // as bitmap for deleted rows.
484  "ALTER TABLE mapd_columns ADD is_deletedcol boolean default 0 ");
485  }
486  } catch (std::exception& e) {
487  sqliteConnector_.query("ROLLBACK TRANSACTION");
488  throw;
489  }
490  sqliteConnector_.query("END TRANSACTION");
491 }
492 
493 // introduce DB version into the dictionary tables
494 // if the DB does not have a version rename all dictionary tables
495 
498  if (currentDB_.dbName.length() == 0) {
499  // updateDictionaryNames dbName length is zero nothing to do here
500  return;
501  }
502  sqliteConnector_.query("BEGIN TRANSACTION");
503  try {
504  sqliteConnector_.query("PRAGMA TABLE_INFO(mapd_dictionaries)");
505  std::vector<std::string> cols;
506  for (size_t i = 0; i < sqliteConnector_.getNumRows(); i++) {
507  cols.push_back(sqliteConnector_.getData<std::string>(i, 1));
508  }
509  if (std::find(cols.begin(), cols.end(), std::string("version_num")) == cols.end()) {
510  // No version number
511  // need to rename dictionaries
512  string dictQuery("SELECT dictid, name from mapd_dictionaries");
513  sqliteConnector_.query(dictQuery);
514  size_t numRows = sqliteConnector_.getNumRows();
515  for (size_t r = 0; r < numRows; ++r) {
516  int dictId = sqliteConnector_.getData<int>(r, 0);
517  std::string dictName = sqliteConnector_.getData<string>(r, 1);
518 
519  std::string oldName =
520  basePath_ + "/mapd_data/" + currentDB_.dbName + "_" + dictName;
521  std::string newName = basePath_ + "/mapd_data/DB_" +
522  std::to_string(currentDB_.dbId) + "_DICT_" +
523  std::to_string(dictId);
524 
525  int result = rename(oldName.c_str(), newName.c_str());
526 
527  if (result == 0) {
528  LOG(INFO) << "Dictionary upgrade: successfully renamed " << oldName << " to "
529  << newName;
530  } else {
531  LOG(ERROR) << "Failed to rename old dictionary directory " << oldName << " to "
532  << newName + " dbname '" << currentDB_.dbName << "' error code "
533  << std::to_string(result);
534  }
535  }
536  // need to add new version info
537  string queryString("ALTER TABLE mapd_dictionaries ADD version_num BIGINT DEFAULT " +
539  sqliteConnector_.query(queryString);
540  }
541  } catch (std::exception& e) {
542  sqliteConnector_.query("ROLLBACK TRANSACTION");
543  throw;
544  }
545  sqliteConnector_.query("END TRANSACTION");
546 }
547 
550  sqliteConnector_.query("BEGIN TRANSACTION");
551  try {
553  "CREATE TABLE IF NOT EXISTS mapd_logical_to_physical("
554  "logical_table_id integer, physical_table_id integer)");
555  } catch (const std::exception& e) {
556  sqliteConnector_.query("ROLLBACK TRANSACTION");
557  throw;
558  }
559  sqliteConnector_.query("END TRANSACTION");
560 }
561 
562 void Catalog::updateLogicalToPhysicalTableMap(const int32_t logical_tb_id) {
563  /* this proc inserts/updates all pairs of (logical_tb_id, physical_tb_id) in
564  * sqlite mapd_logical_to_physical table for given logical_tb_id as needed
565  */
566 
568  sqliteConnector_.query("BEGIN TRANSACTION");
569  try {
570  const auto physicalTableIt = logicalToPhysicalTableMapById_.find(logical_tb_id);
571  if (physicalTableIt != logicalToPhysicalTableMapById_.end()) {
572  const auto physicalTables = physicalTableIt->second;
573  CHECK(!physicalTables.empty());
574  for (size_t i = 0; i < physicalTables.size(); i++) {
575  int32_t physical_tb_id = physicalTables[i];
577  "INSERT OR REPLACE INTO mapd_logical_to_physical (logical_table_id, "
578  "physical_table_id) VALUES (?1, ?2)",
579  std::vector<std::string>{std::to_string(logical_tb_id),
580  std::to_string(physical_tb_id)});
581  }
582  }
583  } catch (std::exception& e) {
584  sqliteConnector_.query("ROLLBACK TRANSACTION");
585  throw;
586  }
587  sqliteConnector_.query("END TRANSACTION");
588 }
589 
592  sqliteConnector_.query("BEGIN TRANSACTION");
593  try {
594  sqliteConnector_.query("PRAGMA TABLE_INFO(mapd_dictionaries)");
595  std::vector<std::string> cols;
596  for (size_t i = 0; i < sqliteConnector_.getNumRows(); i++) {
597  cols.push_back(sqliteConnector_.getData<std::string>(i, 1));
598  }
599  if (std::find(cols.begin(), cols.end(), std::string("refcount")) == cols.end()) {
600  sqliteConnector_.query("ALTER TABLE mapd_dictionaries ADD refcount DEFAULT 1");
601  }
602  } catch (std::exception& e) {
603  sqliteConnector_.query("ROLLBACK TRANSACTION");
604  throw;
605  }
606  sqliteConnector_.query("END TRANSACTION");
607 }
608 
611  sqliteConnector_.query("BEGIN TRANSACTION");
612  try {
616  } catch (std::exception& e) {
617  sqliteConnector_.query("ROLLBACK TRANSACTION");
618  throw;
619  }
620  sqliteConnector_.query("END TRANSACTION");
621 }
622 
624  std::vector<foreign_storage::ForeignTable> foreign_tables{};
625  {
627  sqliteConnector_.query("BEGIN TRANSACTION");
628  try {
630  "SELECT name FROM sqlite_master WHERE type='table' AND "
631  "name IN ('omnisci_foreign_servers', 'omnisci_foreign_tables', "
632  "'omnisci_user_mappings')");
633  if (sqliteConnector_.getNumRows() > 0) {
635  "SELECT tableid, name, isview, storage_type FROM mapd_tables "
636  "WHERE storage_type = 'FOREIGN_TABLE'");
637  auto num_rows = sqliteConnector_.getNumRows();
638  for (size_t r = 0; r < num_rows; r++) {
639  foreign_storage::ForeignTable foreign_table{};
640  foreign_table.tableId = sqliteConnector_.getData<int>(r, 0);
641  foreign_table.tableName = sqliteConnector_.getData<std::string>(r, 1);
642  foreign_table.isView = sqliteConnector_.getData<bool>(r, 2);
643  foreign_table.storageType = sqliteConnector_.getData<std::string>(r, 3);
644  foreign_tables.emplace_back(foreign_table);
645  }
646  for (auto& foreign_table : foreign_tables) {
647  tableDescriptorMap_[to_upper(foreign_table.tableName)] = &foreign_table;
648  tableDescriptorMapById_[foreign_table.tableId] = &foreign_table;
649  executeDropTableSqliteQueries(&foreign_table);
650  }
651  sqliteConnector_.query("SELECT COUNT(*) FROM omnisci_foreign_tables");
652  CHECK_EQ(size_t(1), sqliteConnector_.getNumRows());
653  CHECK_EQ(0, sqliteConnector_.getData<int>(0, 0));
654 
655  sqliteConnector_.query("DROP TABLE omnisci_foreign_tables");
656  sqliteConnector_.query("DROP TABLE omnisci_foreign_servers");
657  }
658  } catch (std::exception& e) {
659  sqliteConnector_.query("ROLLBACK TRANSACTION");
660  throw;
661  }
662  sqliteConnector_.query("END TRANSACTION");
663  }
664 
665  for (auto& foreign_table : foreign_tables) {
667  DBObject(foreign_table.tableName, TableDBObjectType), this);
668  tableDescriptorMap_.erase(to_upper(foreign_table.tableName));
669  tableDescriptorMapById_.erase(foreign_table.tableId);
670  }
671 }
672 
673 const std::string Catalog::getForeignServerSchema(bool if_not_exists) {
674  return "CREATE TABLE " + (if_not_exists ? std::string{"IF NOT EXISTS "} : "") +
675  "omnisci_foreign_servers(id integer primary key, name text unique, " +
676  "data_wrapper_type text, owner_user_id integer, creation_time integer, " +
677  "options text)";
678 }
679 
680 const std::string Catalog::getForeignTableSchema(bool if_not_exists) {
681  return "CREATE TABLE " + (if_not_exists ? std::string{"IF NOT EXISTS "} : "") +
682  "omnisci_foreign_tables(table_id integer unique, server_id integer, " +
683  "options text, last_refresh_time integer, next_refresh_time integer, " +
684  "FOREIGN KEY(table_id) REFERENCES mapd_tables(tableid), " +
685  "FOREIGN KEY(server_id) REFERENCES omnisci_foreign_servers(id))";
686 }
687 
690  sqliteConnector_.query("BEGIN TRANSACTION");
691  std::vector<DBObject> objects;
692  try {
694  "SELECT name FROM sqlite_master WHERE type='table' AND "
695  "name='mapd_record_ownership_marker'");
696  // check if mapd catalog - marker exists
697  if (sqliteConnector_.getNumRows() != 0 && currentDB_.dbId == 1) {
698  // already done
699  sqliteConnector_.query("END TRANSACTION");
700  return;
701  }
702  // check if different catalog - marker exists
703  else if (sqliteConnector_.getNumRows() != 0 && currentDB_.dbId != 1) {
704  sqliteConnector_.query("SELECT dummy FROM mapd_record_ownership_marker");
705  // Check if migration is being performed on existing non mapd catalogs
706  // Older non mapd dbs will have table but no record in them
707  if (sqliteConnector_.getNumRows() != 0) {
708  // already done
709  sqliteConnector_.query("END TRANSACTION");
710  return;
711  }
712  }
713  // marker not exists - create one
714  else {
715  sqliteConnector_.query("CREATE TABLE mapd_record_ownership_marker (dummy integer)");
716  }
717 
718  DBMetadata db;
719  CHECK(SysCatalog::instance().getMetadataForDB(currentDB_.dbName, db));
720  // place dbId as a refernce for migration being performed
722  "INSERT INTO mapd_record_ownership_marker (dummy) VALUES (?1)",
723  std::vector<std::string>{std::to_string(db.dbOwner)});
724 
725  static const std::map<const DBObjectType, const AccessPrivileges>
726  object_level_all_privs_lookup{
732 
733  // grant owner all permissions on DB
734  DBObjectKey key;
735  key.dbId = currentDB_.dbId;
736  auto _key_place = [&key](auto type) {
737  key.permissionType = type;
738  return key;
739  };
740  for (auto& it : object_level_all_privs_lookup) {
741  objects.emplace_back(_key_place(it.first), it.second, db.dbOwner);
742  objects.back().setName(currentDB_.dbName);
743  }
744 
745  {
746  // other users tables and views
747  string tableQuery(
748  "SELECT tableid, name, userid, isview FROM mapd_tables WHERE userid > 0");
749  sqliteConnector_.query(tableQuery);
750  size_t numRows = sqliteConnector_.getNumRows();
751  for (size_t r = 0; r < numRows; ++r) {
752  int32_t tableid = sqliteConnector_.getData<int>(r, 0);
753  std::string tableName = sqliteConnector_.getData<string>(r, 1);
754  int32_t ownerid = sqliteConnector_.getData<int>(r, 2);
755  bool isview = sqliteConnector_.getData<bool>(r, 3);
756 
759  DBObjectKey key;
760  key.dbId = currentDB_.dbId;
761  key.objectId = tableid;
762  key.permissionType = type;
763 
764  DBObject obj(tableName, type);
765  obj.setObjectKey(key);
766  obj.setOwner(ownerid);
769 
770  objects.push_back(obj);
771  }
772  }
773 
774  {
775  // other users dashboards
776  string tableQuery("SELECT id, name, userid FROM mapd_dashboards WHERE userid > 0");
777  sqliteConnector_.query(tableQuery);
778  size_t numRows = sqliteConnector_.getNumRows();
779  for (size_t r = 0; r < numRows; ++r) {
780  int32_t dashId = sqliteConnector_.getData<int>(r, 0);
781  std::string dashName = sqliteConnector_.getData<string>(r, 1);
782  int32_t ownerid = sqliteConnector_.getData<int>(r, 2);
783 
785  DBObjectKey key;
786  key.dbId = currentDB_.dbId;
787  key.objectId = dashId;
788  key.permissionType = type;
789 
790  DBObject obj(dashName, type);
791  obj.setObjectKey(key);
792  obj.setOwner(ownerid);
794 
795  objects.push_back(obj);
796  }
797  }
798  } catch (const std::exception& e) {
799  sqliteConnector_.query("ROLLBACK TRANSACTION");
800  throw;
801  }
802  sqliteConnector_.query("END TRANSACTION");
803 
804  // now apply the objects to the syscat to track the permisisons
805  // moved outside transaction to avoid lock in sqlite
806  try {
808  } catch (const std::exception& e) {
809  LOG(ERROR) << " Issue during migration of DB " << name() << " issue was " << e.what();
810  throw std::runtime_error(" Issue during migration of DB " + name() + " issue was " +
811  e.what());
812  // will need to remove the mapd_record_ownership_marker table and retry
813  }
814 }
815 
820 }
821 
823  std::unordered_map<std::string, std::pair<int, std::string>> dashboards;
824  std::vector<std::string> dashboard_ids;
825  static const std::string migration_name{"dashboard_roles_migration"};
826  {
828  sqliteConnector_.query("BEGIN TRANSACTION");
829  try {
830  // migration_history should be present in all catalogs by now
831  // if not then would be created before this migration
833  "select * from mapd_version_history where migration_history = '" +
834  migration_name + "'");
835  if (sqliteConnector_.getNumRows() != 0) {
836  // no need for further execution
837  sqliteConnector_.query("END TRANSACTION");
838  return;
839  }
840  LOG(INFO) << "Performing dashboard internal roles Migration.";
841  sqliteConnector_.query("select id, userid, metadata from mapd_dashboards");
842  for (size_t i = 0; i < sqliteConnector_.getNumRows(); ++i) {
845  sqliteConnector_.getData<string>(i, 0)))) {
846  // Successfully created roles during previous migration/crash
847  // No need to include them
848  continue;
849  }
850  dashboards[sqliteConnector_.getData<string>(i, 0)] = std::make_pair(
851  sqliteConnector_.getData<int>(i, 1), sqliteConnector_.getData<string>(i, 2));
852  dashboard_ids.push_back(sqliteConnector_.getData<string>(i, 0));
853  }
854  } catch (const std::exception& e) {
855  sqliteConnector_.query("ROLLBACK TRANSACTION");
856  throw;
857  }
858  sqliteConnector_.query("END TRANSACTION");
859  }
860  // All current grantees with shared dashboards.
861  const auto active_grantees =
863 
864  try {
865  // NOTE(wamsi): Transactionally unsafe
866  for (auto dash : dashboards) {
867  createOrUpdateDashboardSystemRole(dash.second.second,
868  dash.second.first,
870  std::to_string(currentDB_.dbId), dash.first));
871  auto result = active_grantees.find(dash.first);
872  if (result != active_grantees.end()) {
875  dash.first)},
876  result->second);
877  }
878  }
880  "INSERT INTO mapd_version_history(version, migration_history) values(?,?)",
881  std::vector<std::string>{std::to_string(MAPD_VERSION), migration_name});
882  } catch (const std::exception& e) {
883  LOG(ERROR) << "Failed to create dashboard system roles during migration: "
884  << e.what();
885  throw;
886  }
887  LOG(INFO) << "Successfully created dashboard system roles during migration.";
888 }
889 
900  updatePageSize();
904 
905  if (g_enable_fsi) {
907  } else {
909  }
910 }
911 
915 }
916 
917 namespace {
918 std::string getUserFromId(const int32_t id) {
919  UserMetadata user;
920  if (SysCatalog::instance().getMetadataForUserById(id, user)) {
921  return user.userName;
922  }
923  // a user could be deleted and a dashboard still exist?
924  return "Unknown";
925 }
926 } // namespace
927 
931 
932  string dictQuery(
933  "SELECT dictid, name, nbits, is_shared, refcount from mapd_dictionaries");
934  sqliteConnector_.query(dictQuery);
935  size_t numRows = sqliteConnector_.getNumRows();
936  for (size_t r = 0; r < numRows; ++r) {
937  int dictId = sqliteConnector_.getData<int>(r, 0);
938  std::string dictName = sqliteConnector_.getData<string>(r, 1);
939  int dictNBits = sqliteConnector_.getData<int>(r, 2);
940  bool is_shared = sqliteConnector_.getData<bool>(r, 3);
941  int refcount = sqliteConnector_.getData<int>(r, 4);
942  std::string fname = basePath_ + "/mapd_data/DB_" + std::to_string(currentDB_.dbId) +
943  "_DICT_" + std::to_string(dictId);
944  DictRef dict_ref(currentDB_.dbId, dictId);
945  DictDescriptor* dd = new DictDescriptor(
946  dict_ref, dictName, dictNBits, is_shared, refcount, fname, false);
947  dictDescriptorMapByRef_[dict_ref].reset(dd);
948  }
949 
950  string tableQuery(
951  "SELECT tableid, name, ncolumns, isview, fragments, frag_type, max_frag_rows, "
952  "max_chunk_size, frag_page_size, "
953  "max_rows, partitions, shard_column_id, shard, num_shards, key_metainfo, userid, "
954  "sort_column_id, storage_type "
955  "from mapd_tables");
956  sqliteConnector_.query(tableQuery);
957  numRows = sqliteConnector_.getNumRows();
958  for (size_t r = 0; r < numRows; ++r) {
959  TableDescriptor* td;
960  const auto& storage_type = sqliteConnector_.getData<string>(r, 17);
961  if (!storage_type.empty() &&
962  (!g_enable_fsi || storage_type != StorageType::FOREIGN_TABLE)) {
963  const auto table_id = sqliteConnector_.getData<int>(r, 0);
964  const auto& table_name = sqliteConnector_.getData<string>(r, 1);
965  LOG(FATAL) << "Unable to read Catalog metadata: storage type is currently not a "
966  "supported table option (table "
967  << table_name << " [" << table_id << "] in database "
968  << currentDB_.dbName << ").";
969  }
970 
971  if (storage_type == StorageType::FOREIGN_TABLE) {
973  } else {
974  td = new TableDescriptor();
975  }
976 
977  td->storageType = storage_type;
978  td->tableId = sqliteConnector_.getData<int>(r, 0);
979  td->tableName = sqliteConnector_.getData<string>(r, 1);
980  td->nColumns = sqliteConnector_.getData<int>(r, 2);
981  td->isView = sqliteConnector_.getData<bool>(r, 3);
982  td->fragments = sqliteConnector_.getData<string>(r, 4);
983  td->fragType =
985  td->maxFragRows = sqliteConnector_.getData<int>(r, 6);
986  td->maxChunkSize = sqliteConnector_.getData<int>(r, 7);
987  td->fragPageSize = sqliteConnector_.getData<int>(r, 8);
988  td->maxRows = sqliteConnector_.getData<int64_t>(r, 9);
989  td->partitions = sqliteConnector_.getData<string>(r, 10);
990  td->shardedColumnId = sqliteConnector_.getData<int>(r, 11);
991  td->shard = sqliteConnector_.getData<int>(r, 12);
992  td->nShards = sqliteConnector_.getData<int>(r, 13);
993  td->keyMetainfo = sqliteConnector_.getData<string>(r, 14);
994  td->userId = sqliteConnector_.getData<int>(r, 15);
995  td->sortedColumnId =
996  sqliteConnector_.isNull(r, 16) ? 0 : sqliteConnector_.getData<int>(r, 16);
997  if (!td->isView) {
998  td->fragmenter = nullptr;
999  }
1000  td->hasDeletedCol = false;
1001 
1003  tableDescriptorMapById_[td->tableId] = td;
1004  }
1005 
1006  if (g_enable_fsi) {
1009  }
1010 
1011  string columnQuery(
1012  "SELECT tableid, columnid, name, coltype, colsubtype, coldim, colscale, "
1013  "is_notnull, compression, comp_param, "
1014  "size, chunks, is_systemcol, is_virtualcol, virtual_expr, is_deletedcol from "
1015  "mapd_columns ORDER BY tableid, "
1016  "columnid");
1017  sqliteConnector_.query(columnQuery);
1018  numRows = sqliteConnector_.getNumRows();
1019  int32_t skip_physical_cols = 0;
1020  for (size_t r = 0; r < numRows; ++r) {
1021  ColumnDescriptor* cd = new ColumnDescriptor();
1022  cd->tableId = sqliteConnector_.getData<int>(r, 0);
1023  cd->columnId = sqliteConnector_.getData<int>(r, 1);
1024  cd->columnName = sqliteConnector_.getData<string>(r, 2);
1028  cd->columnType.set_scale(sqliteConnector_.getData<int>(r, 6));
1032  cd->columnType.set_size(sqliteConnector_.getData<int>(r, 10));
1033  cd->chunks = sqliteConnector_.getData<string>(r, 11);
1034  cd->isSystemCol = sqliteConnector_.getData<bool>(r, 12);
1035  cd->isVirtualCol = sqliteConnector_.getData<bool>(r, 13);
1036  cd->virtualExpr = sqliteConnector_.getData<string>(r, 14);
1037  cd->isDeletedCol = sqliteConnector_.getData<bool>(r, 15);
1038  cd->isGeoPhyCol = skip_physical_cols > 0;
1039  ColumnKey columnKey(cd->tableId, to_upper(cd->columnName));
1040  columnDescriptorMap_[columnKey] = cd;
1041  ColumnIdKey columnIdKey(cd->tableId, cd->columnId);
1042  columnDescriptorMapById_[columnIdKey] = cd;
1043 
1044  if (skip_physical_cols <= 0) {
1045  skip_physical_cols = cd->columnType.get_physical_cols();
1046  }
1047 
1048  auto td_itr = tableDescriptorMapById_.find(cd->tableId);
1049  CHECK(td_itr != tableDescriptorMapById_.end());
1050 
1051  if (cd->isDeletedCol) {
1052  td_itr->second->hasDeletedCol = true;
1053  setDeletedColumnUnlocked(td_itr->second, cd);
1054  } else if (cd->columnType.is_geometry() || skip_physical_cols-- <= 0) {
1055  tableDescriptorMapById_[cd->tableId]->columnIdBySpi_.push_back(cd->columnId);
1056  }
1057  }
1058  // sort columnIdBySpi_ based on columnId
1059  for (auto& tit : tableDescriptorMapById_) {
1060  std::sort(tit.second->columnIdBySpi_.begin(),
1061  tit.second->columnIdBySpi_.end(),
1062  [](const size_t a, const size_t b) -> bool { return a < b; });
1063  }
1064 
1065  string viewQuery("SELECT tableid, sql FROM mapd_views");
1066  sqliteConnector_.query(viewQuery);
1067  numRows = sqliteConnector_.getNumRows();
1068  for (size_t r = 0; r < numRows; ++r) {
1069  int32_t tableId = sqliteConnector_.getData<int>(r, 0);
1070  TableDescriptor* td = tableDescriptorMapById_[tableId];
1071  td->viewSQL = sqliteConnector_.getData<string>(r, 1);
1072  td->fragmenter = nullptr;
1073  }
1074 
1075  string frontendViewQuery(
1076  "SELECT id, state, name, image_hash, strftime('%Y-%m-%dT%H:%M:%SZ', update_time), "
1077  "userid, "
1078  "metadata "
1079  "FROM mapd_dashboards");
1080  sqliteConnector_.query(frontendViewQuery);
1081  numRows = sqliteConnector_.getNumRows();
1082  for (size_t r = 0; r < numRows; ++r) {
1083  std::shared_ptr<DashboardDescriptor> vd = std::make_shared<DashboardDescriptor>();
1084  vd->dashboardId = sqliteConnector_.getData<int>(r, 0);
1085  vd->dashboardState = sqliteConnector_.getData<string>(r, 1);
1086  vd->dashboardName = sqliteConnector_.getData<string>(r, 2);
1087  vd->imageHash = sqliteConnector_.getData<string>(r, 3);
1088  vd->updateTime = sqliteConnector_.getData<string>(r, 4);
1089  vd->userId = sqliteConnector_.getData<int>(r, 5);
1090  vd->dashboardMetadata = sqliteConnector_.getData<string>(r, 6);
1091  vd->user = getUserFromId(vd->userId);
1092  vd->dashboardSystemRoleName = generate_dashboard_system_rolename(
1094  dashboardDescriptorMap_[std::to_string(vd->userId) + ":" + vd->dashboardName] = vd;
1095  }
1096 
1097  string linkQuery(
1098  "SELECT linkid, userid, link, view_state, strftime('%Y-%m-%dT%H:%M:%SZ', "
1099  "update_time), view_metadata "
1100  "FROM mapd_links");
1101  sqliteConnector_.query(linkQuery);
1102  numRows = sqliteConnector_.getNumRows();
1103  for (size_t r = 0; r < numRows; ++r) {
1104  LinkDescriptor* ld = new LinkDescriptor();
1105  ld->linkId = sqliteConnector_.getData<int>(r, 0);
1106  ld->userId = sqliteConnector_.getData<int>(r, 1);
1107  ld->link = sqliteConnector_.getData<string>(r, 2);
1108  ld->viewState = sqliteConnector_.getData<string>(r, 3);
1109  ld->updateTime = sqliteConnector_.getData<string>(r, 4);
1110  ld->viewMetadata = sqliteConnector_.getData<string>(r, 5);
1112  linkDescriptorMapById_[ld->linkId] = ld;
1113  }
1114 
1115  /* rebuild map linking logical tables to corresponding physical ones */
1116  string logicalToPhysicalTableMapQuery(
1117  "SELECT logical_table_id, physical_table_id "
1118  "FROM mapd_logical_to_physical");
1119  sqliteConnector_.query(logicalToPhysicalTableMapQuery);
1120  numRows = sqliteConnector_.getNumRows();
1121  for (size_t r = 0; r < numRows; ++r) {
1122  int32_t logical_tb_id = sqliteConnector_.getData<int>(r, 0);
1123  int32_t physical_tb_id = sqliteConnector_.getData<int>(r, 1);
1124  const auto physicalTableIt = logicalToPhysicalTableMapById_.find(logical_tb_id);
1125  if (physicalTableIt == logicalToPhysicalTableMapById_.end()) {
1126  /* add new entity to the map logicalToPhysicalTableMapById_ */
1127  std::vector<int32_t> physicalTables;
1128  physicalTables.push_back(physical_tb_id);
1129  const auto it_ok =
1130  logicalToPhysicalTableMapById_.emplace(logical_tb_id, physicalTables);
1131  CHECK(it_ok.second);
1132  } else {
1133  /* update map logicalToPhysicalTableMapById_ */
1134  physicalTableIt->second.push_back(physical_tb_id);
1135  }
1136  }
1137 }
1138 
1140  const list<ColumnDescriptor>& columns,
1141  const list<DictDescriptor>& dicts) {
1142  cat_write_lock write_lock(this);
1143  TableDescriptor* new_td;
1144 
1145  auto foreign_table = dynamic_cast<const foreign_storage::ForeignTable*>(td);
1146  if (foreign_table) {
1147  auto new_foreign_table = new foreign_storage::ForeignTable();
1148  *new_foreign_table = *foreign_table;
1149  new_td = new_foreign_table;
1150  } else {
1151  new_td = new TableDescriptor();
1152  *new_td = *td;
1153  }
1154 
1155  new_td->mutex_ = std::make_shared<std::mutex>();
1156  tableDescriptorMap_[to_upper(td->tableName)] = new_td;
1157  tableDescriptorMapById_[td->tableId] = new_td;
1158  for (auto cd : columns) {
1159  ColumnDescriptor* new_cd = new ColumnDescriptor();
1160  *new_cd = cd;
1161  ColumnKey columnKey(new_cd->tableId, to_upper(new_cd->columnName));
1162  columnDescriptorMap_[columnKey] = new_cd;
1163  ColumnIdKey columnIdKey(new_cd->tableId, new_cd->columnId);
1164  columnDescriptorMapById_[columnIdKey] = new_cd;
1165 
1166  // Add deleted column to the map
1167  if (cd.isDeletedCol) {
1168  CHECK(new_td->hasDeletedCol);
1169  setDeletedColumnUnlocked(new_td, new_cd);
1170  }
1171  }
1172 
1173  std::sort(new_td->columnIdBySpi_.begin(),
1174  new_td->columnIdBySpi_.end(),
1175  [](const size_t a, const size_t b) -> bool { return a < b; });
1176 
1177  std::unique_ptr<StringDictionaryClient> client;
1178  DictRef dict_ref(currentDB_.dbId, -1);
1179  if (!string_dict_hosts_.empty()) {
1180  client.reset(new StringDictionaryClient(string_dict_hosts_.front(), dict_ref, true));
1181  }
1182  for (auto dd : dicts) {
1183  if (!dd.dictRef.dictId) {
1184  // Dummy entry created for a shard of a logical table, nothing to do.
1185  continue;
1186  }
1187  dict_ref.dictId = dd.dictRef.dictId;
1188  if (client) {
1189  client->create(dict_ref, dd.dictIsTemp);
1190  }
1191  DictDescriptor* new_dd = new DictDescriptor(dd);
1192  dictDescriptorMapByRef_[dict_ref].reset(new_dd);
1193  if (!dd.dictIsTemp) {
1194  boost::filesystem::create_directory(new_dd->dictFolderPath);
1195  }
1196  }
1197 }
1198 
1199 void Catalog::removeTableFromMap(const string& tableName,
1200  const int tableId,
1201  const bool is_on_error) {
1202  cat_write_lock write_lock(this);
1203  TableDescriptorMapById::iterator tableDescIt = tableDescriptorMapById_.find(tableId);
1204  if (tableDescIt == tableDescriptorMapById_.end()) {
1205  throw runtime_error("Table " + tableName + " does not exist.");
1206  }
1207 
1208  TableDescriptor* td = tableDescIt->second;
1209 
1210  if (td->hasDeletedCol) {
1211  const auto ret = deletedColumnPerTable_.erase(td);
1212  CHECK_EQ(ret, size_t(1));
1213  }
1214 
1215  tableDescriptorMapById_.erase(tableDescIt);
1216  tableDescriptorMap_.erase(to_upper(tableName));
1217  td->fragmenter = nullptr;
1218 
1220  delete td;
1221 
1222  std::unique_ptr<StringDictionaryClient> client;
1223  if (SysCatalog::instance().isAggregator()) {
1224  CHECK(!string_dict_hosts_.empty());
1225  DictRef dict_ref(currentDB_.dbId, -1);
1226  client.reset(new StringDictionaryClient(string_dict_hosts_.front(), dict_ref, true));
1227  }
1228 
1229  // delete all column descriptors for the table
1230  // no more link columnIds to sequential indexes!
1231  for (auto cit = columnDescriptorMapById_.begin();
1232  cit != columnDescriptorMapById_.end();) {
1233  if (tableId != std::get<0>(cit->first)) {
1234  ++cit;
1235  } else {
1236  int i = std::get<1>(cit++->first);
1237  ColumnIdKey cidKey(tableId, i);
1238  ColumnDescriptorMapById::iterator colDescIt = columnDescriptorMapById_.find(cidKey);
1239  ColumnDescriptor* cd = colDescIt->second;
1240  columnDescriptorMapById_.erase(colDescIt);
1241  ColumnKey cnameKey(tableId, to_upper(cd->columnName));
1242  columnDescriptorMap_.erase(cnameKey);
1243  const int dictId = cd->columnType.get_comp_param();
1244  // Dummy dictionaries created for a shard of a logical table have the id set to
1245  // zero.
1246  if (cd->columnType.get_compression() == kENCODING_DICT && dictId) {
1247  INJECT_TIMER(removingDicts);
1248  DictRef dict_ref(currentDB_.dbId, dictId);
1249  const auto dictIt = dictDescriptorMapByRef_.find(dict_ref);
1250  // If we're removing this table due to an error, it is possible that the string
1251  // dictionary reference was never populated. Don't crash, just continue cleaning
1252  // up the TableDescriptor and ColumnDescriptors
1253  if (!is_on_error) {
1254  CHECK(dictIt != dictDescriptorMapByRef_.end());
1255  } else {
1256  if (dictIt == dictDescriptorMapByRef_.end()) {
1257  continue;
1258  }
1259  }
1260  const auto& dd = dictIt->second;
1261  CHECK_GE(dd->refcount, 1);
1262  --dd->refcount;
1263  if (!dd->refcount) {
1264  dd->stringDict.reset();
1265  if (!isTemp) {
1266  File_Namespace::renameForDelete(dd->dictFolderPath);
1267  }
1268  if (client) {
1269  client->drop(dict_ref);
1270  }
1271  dictDescriptorMapByRef_.erase(dictIt);
1272  }
1273  }
1274 
1275  delete cd;
1276  }
1277  }
1278 }
1279 
1281  cat_write_lock write_lock(this);
1283 }
1284 
1286  cat_write_lock write_lock(this);
1288  std::make_shared<DashboardDescriptor>(vd);
1289 }
1290 
1291 std::vector<DBObject> Catalog::parseDashboardObjects(const std::string& view_meta,
1292  const int& user_id) {
1293  std::vector<DBObject> objects;
1294  DBObjectKey key;
1295  key.dbId = currentDB_.dbId;
1296  auto _key_place = [&key](auto type, auto id) {
1297  key.permissionType = type;
1298  key.objectId = id;
1299  return key;
1300  };
1301  for (auto object_name : parse_underlying_dashboard_objects(view_meta)) {
1302  auto td = getMetadataForTable(object_name);
1303  if (!td) {
1304  // Parsed object source is not present in current database
1305  // LOG the info and ignore
1306  LOG(INFO) << "Ignoring dashboard source Table/View: " << object_name
1307  << " no longer exists in current DB.";
1308  continue;
1309  }
1310  // Dashboard source can be Table or View
1311  const auto object_type = td->isView ? ViewDBObjectType : TableDBObjectType;
1312  const auto priv = td->isView ? AccessPrivileges::SELECT_FROM_VIEW
1314  objects.emplace_back(_key_place(object_type, td->tableId), priv, user_id);
1315  objects.back().setObjectType(td->isView ? ViewDBObjectType : TableDBObjectType);
1316  objects.back().setName(td->tableName);
1317  }
1318  return objects;
1319 }
1320 
1321 void Catalog::createOrUpdateDashboardSystemRole(const std::string& view_meta,
1322  const int32_t& user_id,
1323  const std::string& dash_role_name) {
1324  auto objects = parseDashboardObjects(view_meta, user_id);
1325  Role* rl = SysCatalog::instance().getRoleGrantee(dash_role_name);
1326  if (!rl) {
1327  // Dashboard role does not exist
1328  // create role and grant privileges
1329  // NOTE(wamsi): Transactionally unsafe
1330  SysCatalog::instance().createRole(dash_role_name, false);
1331  SysCatalog::instance().grantDBObjectPrivilegesBatch({dash_role_name}, objects, *this);
1332  } else {
1333  // Dashboard system role already exists
1334  // Add/remove privileges on objects
1335  auto ex_objects = rl->getDbObjects(true);
1336  for (auto key : *ex_objects | boost::adaptors::map_keys) {
1337  if (key.permissionType != TableDBObjectType &&
1338  key.permissionType != ViewDBObjectType) {
1339  continue;
1340  }
1341  bool found = false;
1342  for (auto obj : objects) {
1343  found = key == obj.getObjectKey() ? true : false;
1344  if (found) {
1345  break;
1346  }
1347  }
1348  if (!found) {
1349  // revoke privs on object since the object is no
1350  // longer used by the dashboard as source
1351  // NOTE(wamsi): Transactionally unsafe
1353  dash_role_name, *rl->findDbObject(key, true), *this);
1354  }
1355  }
1356  // Update privileges on remaining objects
1357  // NOTE(wamsi): Transactionally unsafe
1358  SysCatalog::instance().grantDBObjectPrivilegesBatch({dash_role_name}, objects, *this);
1359  }
1360 }
1361 
1363  cat_write_lock write_lock(this);
1364  LinkDescriptor* new_ld = new LinkDescriptor();
1365  *new_ld = ld;
1367  linkDescriptorMapById_[ld.linkId] = new_ld;
1368 }
1369 
1371  auto time_ms = measure<>::execution([&]() {
1372  // instanciate table fragmenter upon first use
1373  // assume only insert order fragmenter is supported
1375  vector<Chunk> chunkVec;
1376  list<const ColumnDescriptor*> columnDescs;
1377  getAllColumnMetadataForTableImpl(td, columnDescs, true, false, true);
1378  Chunk::translateColumnDescriptorsToChunkVec(columnDescs, chunkVec);
1379  ChunkKey chunkKeyPrefix = {currentDB_.dbId, td->tableId};
1380  if (td->sortedColumnId > 0) {
1381  td->fragmenter = std::make_shared<SortedOrderFragmenter>(chunkKeyPrefix,
1382  chunkVec,
1383  dataMgr_.get(),
1384  const_cast<Catalog*>(this),
1385  td->tableId,
1386  td->shard,
1387  td->maxFragRows,
1388  td->maxChunkSize,
1389  td->fragPageSize,
1390  td->maxRows,
1391  td->persistenceLevel);
1392  } else {
1393  td->fragmenter = std::make_shared<InsertOrderFragmenter>(chunkKeyPrefix,
1394  chunkVec,
1395  dataMgr_.get(),
1396  const_cast<Catalog*>(this),
1397  td->tableId,
1398  td->shard,
1399  td->maxFragRows,
1400  td->maxChunkSize,
1401  td->fragPageSize,
1402  td->maxRows,
1403  td->persistenceLevel,
1404  !td->storageType.empty());
1405  }
1406  });
1407  LOG(INFO) << "Instantiating Fragmenter for table " << td->tableName << " took "
1408  << time_ms << "ms";
1409 }
1410 
1412  const std::string& tableName) const {
1413  auto tableDescIt = tableDescriptorMap_.find(to_upper(tableName));
1414  if (tableDescIt == tableDescriptorMap_.end()) { // check to make sure table exists
1415  return nullptr;
1416  }
1417  return dynamic_cast<foreign_storage::ForeignTable*>(tableDescIt->second);
1418 };
1419 
1420 const TableDescriptor* Catalog::getMetadataForTable(const string& tableName,
1421  const bool populateFragmenter) const {
1422  // we give option not to populate fragmenter (default true/yes) as it can be heavy for
1423  // pure metadata calls
1424  cat_read_lock read_lock(this);
1425  auto tableDescIt = tableDescriptorMap_.find(to_upper(tableName));
1426  if (tableDescIt == tableDescriptorMap_.end()) { // check to make sure table exists
1427  return nullptr;
1428  }
1429  TableDescriptor* td = tableDescIt->second;
1430  std::unique_lock<std::mutex> td_lock(*td->mutex_.get());
1431  if (populateFragmenter && td->fragmenter == nullptr && !td->isView) {
1433  }
1434  return td; // returns pointer to table descriptor
1435 }
1436 
1438  int tableId,
1439  const bool populateFragmenter) const {
1440  auto tableDescIt = tableDescriptorMapById_.find(tableId);
1441  if (tableDescIt == tableDescriptorMapById_.end()) { // check to make sure table exists
1442  return nullptr;
1443  }
1444  TableDescriptor* td = tableDescIt->second;
1445  if (populateFragmenter) {
1446  std::unique_lock<std::mutex> td_lock(*td->mutex_.get());
1447  if (td->fragmenter == nullptr && !td->isView) {
1449  }
1450  }
1451  return td; // returns pointer to table descriptor
1452 }
1453 
1455  bool populateFragmenter) const {
1456  cat_read_lock read_lock(this);
1457  return getMetadataForTableImpl(tableId, populateFragmenter);
1458 }
1459 
1461  const bool load_dict) const {
1462  cat_read_lock read_lock(this);
1463  return getMetadataForDictUnlocked(dict_id, load_dict);
1464 }
1465 
1467  const bool loadDict) const {
1468  const DictRef dictRef(currentDB_.dbId, dictId);
1469  auto dictDescIt = dictDescriptorMapByRef_.find(dictRef);
1470  if (dictDescIt ==
1471  dictDescriptorMapByRef_.end()) { // check to make sure dictionary exists
1472  return nullptr;
1473  }
1474  auto& dd = dictDescIt->second;
1475 
1476  if (loadDict) {
1477  std::lock_guard string_dict_lock(*dd->string_dict_mutex);
1478  if (!dd->stringDict) {
1479  auto time_ms = measure<>::execution([&]() {
1480  if (string_dict_hosts_.empty()) {
1481  if (dd->dictIsTemp) {
1482  dd->stringDict = std::make_shared<StringDictionary>(
1483  dd->dictFolderPath, true, true, g_cache_string_hash);
1484  } else {
1485  dd->stringDict = std::make_shared<StringDictionary>(
1486  dd->dictFolderPath, false, true, g_cache_string_hash);
1487  }
1488  } else {
1489  dd->stringDict =
1490  std::make_shared<StringDictionary>(string_dict_hosts_.front(), dd->dictRef);
1491  }
1492  });
1493  LOG(INFO) << "Time to load Dictionary " << dd->dictRef.dbId << "_"
1494  << dd->dictRef.dictId << " was " << time_ms << "ms";
1495  }
1496  }
1497 
1498  return dd.get();
1499 }
1500 
1501 const std::vector<LeafHostInfo>& Catalog::getStringDictionaryHosts() const {
1502  return string_dict_hosts_;
1503 }
1504 
1506  const string& columnName) const {
1507  cat_read_lock read_lock(this);
1508 
1509  ColumnKey columnKey(tableId, to_upper(columnName));
1510  auto colDescIt = columnDescriptorMap_.find(columnKey);
1511  if (colDescIt ==
1512  columnDescriptorMap_.end()) { // need to check to make sure column exists for table
1513  return nullptr;
1514  }
1515  return colDescIt->second;
1516 }
1517 
1518 const ColumnDescriptor* Catalog::getMetadataForColumn(int table_id, int column_id) const {
1519  cat_read_lock read_lock(this);
1520  return getMetadataForColumnUnlocked(table_id, column_id);
1521 }
1522 
1524  int columnId) const {
1525  ColumnIdKey columnIdKey(tableId, columnId);
1526  auto colDescIt = columnDescriptorMapById_.find(columnIdKey);
1527  if (colDescIt == columnDescriptorMapById_
1528  .end()) { // need to check to make sure column exists for table
1529  return nullptr;
1530  }
1531  return colDescIt->second;
1532 }
1533 
1534 const int Catalog::getColumnIdBySpiUnlocked(const int table_id, const size_t spi) const {
1535  const auto tabDescIt = tableDescriptorMapById_.find(table_id);
1536  CHECK(tableDescriptorMapById_.end() != tabDescIt);
1537  const auto& columnIdBySpi = tabDescIt->second->columnIdBySpi_;
1538 
1539  auto spx = spi;
1540  int phi = 0;
1541  if (spx >= SPIMAP_MAGIC1) // see Catalog.h
1542  {
1543  phi = (spx - SPIMAP_MAGIC1) % SPIMAP_MAGIC2;
1544  spx = (spx - SPIMAP_MAGIC1) / SPIMAP_MAGIC2;
1545  }
1546 
1547  CHECK(0 < spx && spx <= columnIdBySpi.size());
1548  return columnIdBySpi[spx - 1] + phi;
1549 }
1550 
1551 const int Catalog::getColumnIdBySpi(const int table_id, const size_t spi) const {
1552  cat_read_lock read_lock(this);
1553  return getColumnIdBySpiUnlocked(table_id, spi);
1554 }
1555 
1557  const size_t spi) const {
1558  cat_read_lock read_lock(this);
1559 
1560  const auto columnId = getColumnIdBySpiUnlocked(tableId, spi);
1561  ColumnIdKey columnIdKey(tableId, columnId);
1562  const auto colDescIt = columnDescriptorMapById_.find(columnIdKey);
1563  return columnDescriptorMapById_.end() == colDescIt ? nullptr : colDescIt->second;
1564 }
1565 
1566 void Catalog::deleteMetadataForDashboards(const std::vector<int32_t> dashboard_ids,
1567  const UserMetadata& user) {
1568  std::stringstream invalid_ids, restricted_ids;
1569 
1570  for (int32_t dashboard_id : dashboard_ids) {
1571  if (!getMetadataForDashboard(dashboard_id)) {
1572  invalid_ids << (!invalid_ids.str().empty() ? ", " : "") << dashboard_id;
1573  continue;
1574  }
1575  DBObject object(dashboard_id, DashboardDBObjectType);
1576  object.loadKey(*this);
1577  object.setPrivileges(AccessPrivileges::DELETE_DASHBOARD);
1578  std::vector<DBObject> privs = {object};
1579  if (!SysCatalog::instance().checkPrivileges(user, privs))
1580  restricted_ids << (!restricted_ids.str().empty() ? ", " : "") << dashboard_id;
1581  }
1582 
1583  if (invalid_ids.str().size() > 0 || restricted_ids.str().size() > 0) {
1584  std::stringstream error_message;
1585  error_message << "Delete dashboard(s) failed with error(s):";
1586  if (invalid_ids.str().size() > 0)
1587  error_message << "\nDashboard id: " << invalid_ids.str()
1588  << " - Dashboard id does not exist";
1589  if (restricted_ids.str().size() > 0)
1590  error_message
1591  << "\nDashboard id: " << restricted_ids.str()
1592  << " - User should be either owner of dashboard or super user to delete it";
1593  throw std::runtime_error(error_message.str());
1594  }
1595  std::vector<DBObject> dash_objs;
1596 
1597  for (int32_t dashboard_id : dashboard_ids) {
1598  dash_objs.push_back(DBObject(dashboard_id, DashboardDBObjectType));
1599  }
1600  // BE-5245: Transactionally unsafe (like other combined Catalog/Syscatalog operations)
1602  {
1603  cat_write_lock write_lock(this);
1605 
1606  sqliteConnector_.query("BEGIN TRANSACTION");
1607  try {
1608  for (int32_t dashboard_id : dashboard_ids) {
1609  auto dash = getMetadataForDashboard(dashboard_id);
1610  // Dash should still exist if revokeDBObjectPrivileges passed but throw and
1611  // rollback if already deleted
1612  if (!dash) {
1613  throw std::runtime_error(
1614  std::string("Delete dashboard(s) failed with error(s):\nDashboard id: ") +
1615  std::to_string(dashboard_id) + " - Dashboard id does not exist ");
1616  }
1617  std::string user_id = std::to_string(dash->userId);
1618  std::string dash_name = dash->dashboardName;
1619  auto viewDescIt = dashboardDescriptorMap_.find(user_id + ":" + dash_name);
1620  dashboardDescriptorMap_.erase(viewDescIt);
1622  "DELETE FROM mapd_dashboards WHERE name = ? and userid = ?",
1623  std::vector<std::string>{dash_name, user_id});
1624  }
1625  } catch (std::exception& e) {
1626  sqliteConnector_.query("ROLLBACK TRANSACTION");
1627  throw;
1628  }
1629  sqliteConnector_.query("END TRANSACTION");
1630  }
1631 }
1632 
1634  const string& userId,
1635  const string& dashName) const {
1636  cat_read_lock read_lock(this);
1637 
1638  auto viewDescIt = dashboardDescriptorMap_.find(userId + ":" + dashName);
1639  if (viewDescIt == dashboardDescriptorMap_.end()) { // check to make sure view exists
1640  return nullptr;
1641  }
1642  return viewDescIt->second.get(); // returns pointer to view descriptor
1643 }
1644 
1646  cat_read_lock read_lock(this);
1647  std::string userId;
1648  std::string name;
1649  bool found{false};
1650  {
1651  for (auto descp : dashboardDescriptorMap_) {
1652  auto dash = descp.second.get();
1653  if (dash->dashboardId == id) {
1654  userId = std::to_string(dash->userId);
1655  name = dash->dashboardName;
1656  found = true;
1657  break;
1658  }
1659  }
1660  }
1661  if (found) {
1662  return getMetadataForDashboard(userId, name);
1663  }
1664  return nullptr;
1665 }
1666 
1667 const LinkDescriptor* Catalog::getMetadataForLink(const string& link) const {
1668  cat_read_lock read_lock(this);
1669  auto linkDescIt = linkDescriptorMap_.find(link);
1670  if (linkDescIt == linkDescriptorMap_.end()) { // check to make sure view exists
1671  return nullptr;
1672  }
1673  return linkDescIt->second; // returns pointer to view descriptor
1674 }
1675 
1677  cat_read_lock read_lock(this);
1678  auto linkDescIt = linkDescriptorMapById_.find(linkId);
1679  if (linkDescIt == linkDescriptorMapById_.end()) { // check to make sure view exists
1680  return nullptr;
1681  }
1682  return linkDescIt->second;
1683 }
1684 
1686  int table_id) const {
1687  auto table = getMetadataForTableImpl(table_id, false);
1688  CHECK(table);
1689  auto foreign_table = dynamic_cast<const foreign_storage::ForeignTable*>(table);
1690  CHECK(foreign_table);
1691  return foreign_table;
1692 }
1693 
1695  cat_read_lock read_lock(this);
1696  return getForeignTableUnlocked(table_id);
1697 }
1698 
1700  const TableDescriptor* td,
1701  list<const ColumnDescriptor*>& columnDescriptors,
1702  const bool fetchSystemColumns,
1703  const bool fetchVirtualColumns,
1704  const bool fetchPhysicalColumns) const {
1705  int32_t skip_physical_cols = 0;
1706  for (const auto& columnDescriptor : columnDescriptorMapById_) {
1707  if (!fetchPhysicalColumns && skip_physical_cols > 0) {
1708  --skip_physical_cols;
1709  continue;
1710  }
1711  auto cd = columnDescriptor.second;
1712  if (cd->tableId != td->tableId) {
1713  continue;
1714  }
1715  if (!fetchSystemColumns && cd->isSystemCol) {
1716  continue;
1717  }
1718  if (!fetchVirtualColumns && cd->isVirtualCol) {
1719  continue;
1720  }
1721  if (!fetchPhysicalColumns) {
1722  const auto& col_ti = cd->columnType;
1723  skip_physical_cols = col_ti.get_physical_cols();
1724  }
1725  columnDescriptors.push_back(cd);
1726  }
1727 }
1728 
1729 std::list<const ColumnDescriptor*> Catalog::getAllColumnMetadataForTable(
1730  const int tableId,
1731  const bool fetchSystemColumns,
1732  const bool fetchVirtualColumns,
1733  const bool fetchPhysicalColumns) const {
1734  cat_read_lock read_lock(this);
1736  tableId, fetchSystemColumns, fetchVirtualColumns, fetchPhysicalColumns);
1737 }
1738 
1739 std::list<const ColumnDescriptor*> Catalog::getAllColumnMetadataForTableUnlocked(
1740  const int tableId,
1741  const bool fetchSystemColumns,
1742  const bool fetchVirtualColumns,
1743  const bool fetchPhysicalColumns) const {
1744  std::list<const ColumnDescriptor*> columnDescriptors;
1745  const TableDescriptor* td =
1746  getMetadataForTableImpl(tableId, false); // dont instantiate fragmenter
1748  columnDescriptors,
1749  fetchSystemColumns,
1750  fetchVirtualColumns,
1751  fetchPhysicalColumns);
1752  return columnDescriptors;
1753 }
1754 
1755 list<const TableDescriptor*> Catalog::getAllTableMetadata() const {
1756  cat_read_lock read_lock(this);
1757  list<const TableDescriptor*> table_list;
1758  for (auto p : tableDescriptorMapById_) {
1759  table_list.push_back(p.second);
1760  }
1761  return table_list;
1762 }
1763 
1764 list<const DashboardDescriptor*> Catalog::getAllDashboardsMetadata() const {
1765  list<const DashboardDescriptor*> view_list;
1766  for (auto p : dashboardDescriptorMap_) {
1767  view_list.push_back(p.second.get());
1768  }
1769  return view_list;
1770 }
1771 
1773  const auto& td = *tableDescriptorMapById_[cd.tableId];
1774  list<DictDescriptor> dds;
1775  setColumnDictionary(cd, dds, td, true);
1776  auto& dd = dds.back();
1777  CHECK(dd.dictRef.dictId);
1778 
1779  std::unique_ptr<StringDictionaryClient> client;
1780  if (!string_dict_hosts_.empty()) {
1781  client.reset(new StringDictionaryClient(
1782  string_dict_hosts_.front(), DictRef(currentDB_.dbId, -1), true));
1783  }
1784  if (client) {
1785  client->create(dd.dictRef, dd.dictIsTemp);
1786  }
1787 
1788  DictDescriptor* new_dd = new DictDescriptor(dd);
1789  dictDescriptorMapByRef_[dd.dictRef].reset(new_dd);
1790  if (!dd.dictIsTemp) {
1791  boost::filesystem::create_directory(new_dd->dictFolderPath);
1792  }
1793  return dd.dictRef;
1794 }
1795 
1797  cat_write_lock write_lock(this);
1799  if (!(cd.columnType.is_string() || cd.columnType.is_string_array())) {
1800  return;
1801  }
1802  if (!(cd.columnType.get_compression() == kENCODING_DICT)) {
1803  return;
1804  }
1805  const auto dictId = cd.columnType.get_comp_param();
1806  CHECK_GT(dictId, 0);
1807  // decrement and zero check dict ref count
1808  const auto td = getMetadataForTable(cd.tableId);
1809  CHECK(td);
1811  "UPDATE mapd_dictionaries SET refcount = refcount - 1 WHERE dictid = ?",
1812  std::to_string(dictId));
1814  "SELECT refcount FROM mapd_dictionaries WHERE dictid = ?", std::to_string(dictId));
1815  const auto refcount = sqliteConnector_.getData<int>(0, 0);
1816  VLOG(3) << "Dictionary " << dictId << "from dropped table has reference count "
1817  << refcount;
1818  if (refcount > 0) {
1819  return;
1820  }
1821  const DictRef dictRef(currentDB_.dbId, dictId);
1822  sqliteConnector_.query_with_text_param("DELETE FROM mapd_dictionaries WHERE dictid = ?",
1823  std::to_string(dictId));
1824  File_Namespace::renameForDelete(basePath_ + "/mapd_data/DB_" +
1825  std::to_string(currentDB_.dbId) + "_DICT_" +
1826  std::to_string(dictId));
1827 
1828  std::unique_ptr<StringDictionaryClient> client;
1829  if (!string_dict_hosts_.empty()) {
1830  client.reset(new StringDictionaryClient(string_dict_hosts_.front(), dictRef, true));
1831  }
1832  if (client) {
1833  client->drop(dictRef);
1834  }
1835 
1836  dictDescriptorMapByRef_.erase(dictRef);
1837 }
1838 
1840  std::map<int, StringDictionary*>& stringDicts) {
1841  // learn 'committed' ColumnDescriptor of this column
1842  auto cit = columnDescriptorMap_.find(ColumnKey(cd.tableId, to_upper(cd.columnName)));
1843  CHECK(cit != columnDescriptorMap_.end());
1844  auto& ccd = *cit->second;
1845 
1846  if (!(ccd.columnType.is_string() || ccd.columnType.is_string_array())) {
1847  return;
1848  }
1849  if (!(ccd.columnType.get_compression() == kENCODING_DICT)) {
1850  return;
1851  }
1852  if (!(ccd.columnType.get_comp_param() > 0)) {
1853  return;
1854  }
1855 
1856  auto dictId = ccd.columnType.get_comp_param();
1857  getMetadataForDict(dictId);
1858 
1859  const DictRef dictRef(currentDB_.dbId, dictId);
1860  auto dit = dictDescriptorMapByRef_.find(dictRef);
1861  CHECK(dit != dictDescriptorMapByRef_.end());
1862  CHECK(dit->second);
1863  CHECK(dit->second.get()->stringDict);
1864  stringDicts[ccd.columnId] = dit->second.get()->stringDict.get();
1865 }
1866 
1868  cat_write_lock write_lock(this);
1869  // caller must handle sqlite/chunk transaction TOGETHER
1870  cd.tableId = td.tableId;
1871  if (td.nShards > 0 && td.shard < 0) {
1872  for (const auto shard : getPhysicalTablesDescriptors(&td)) {
1873  auto shard_cd = cd;
1874  addColumn(*shard, shard_cd);
1875  }
1876  }
1878  addDictionary(cd);
1879  }
1880 
1882  "INSERT INTO mapd_columns (tableid, columnid, name, coltype, colsubtype, coldim, "
1883  "colscale, is_notnull, "
1884  "compression, comp_param, size, chunks, is_systemcol, is_virtualcol, virtual_expr, "
1885  "is_deletedcol) "
1886  "VALUES (?, "
1887  "(SELECT max(columnid) + 1 FROM mapd_columns WHERE tableid = ?), "
1888  "?, ?, ?, "
1889  "?, "
1890  "?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
1891  std::vector<std::string>{std::to_string(td.tableId),
1892  std::to_string(td.tableId),
1893  cd.columnName,
1902  "",
1905  cd.virtualExpr,
1907 
1909  "UPDATE mapd_tables SET ncolumns = ncolumns + 1 WHERE tableid = ?",
1910  std::vector<std::string>{std::to_string(td.tableId)});
1911 
1913  "SELECT columnid FROM mapd_columns WHERE tableid = ? AND name = ?",
1914  std::vector<std::string>{std::to_string(td.tableId), cd.columnName});
1915  cd.columnId = sqliteConnector_.getData<int>(0, 0);
1916 
1917  ++tableDescriptorMapById_[td.tableId]->nColumns;
1918  auto ncd = new ColumnDescriptor(cd);
1921  columnDescriptorsForRoll.emplace_back(nullptr, ncd);
1922 }
1923 
1925  cat_write_lock write_lock(this);
1927  // caller must handle sqlite/chunk transaction TOGETHER
1929  "DELETE FROM mapd_columns where tableid = ? and columnid = ?",
1930  std::vector<std::string>{std::to_string(td.tableId), std::to_string(cd.columnId)});
1931 
1933  "UPDATE mapd_tables SET ncolumns = ncolumns - 1 WHERE tableid = ?",
1934  std::vector<std::string>{std::to_string(td.tableId)});
1935 
1936  ColumnDescriptorMap::iterator columnDescIt =
1938  CHECK(columnDescIt != columnDescriptorMap_.end());
1939 
1940  columnDescriptorsForRoll.emplace_back(columnDescIt->second, nullptr);
1941 
1942  columnDescriptorMap_.erase(columnDescIt);
1944  --tableDescriptorMapById_[td.tableId]->nColumns;
1945  // for each shard
1946  if (td.nShards > 0 && td.shard < 0) {
1947  for (const auto shard : getPhysicalTablesDescriptors(&td)) {
1948  const auto shard_cd = getMetadataForColumn(shard->tableId, cd.columnId);
1949  CHECK(shard_cd);
1950  dropColumn(*shard, *shard_cd);
1951  }
1952  }
1953 }
1954 
1955 void Catalog::roll(const bool forward) {
1956  cat_write_lock write_lock(this);
1957  std::set<const TableDescriptor*> tds;
1958 
1959  for (const auto& cdr : columnDescriptorsForRoll) {
1960  auto ocd = cdr.first;
1961  auto ncd = cdr.second;
1962  CHECK(ocd || ncd);
1963  auto tabDescIt = tableDescriptorMapById_.find((ncd ? ncd : ocd)->tableId);
1964  CHECK(tableDescriptorMapById_.end() != tabDescIt);
1965  auto td = tabDescIt->second;
1966  auto& vc = td->columnIdBySpi_;
1967  if (forward) {
1968  if (ocd) {
1969  if (nullptr == ncd ||
1970  ncd->columnType.get_comp_param() != ocd->columnType.get_comp_param()) {
1971  delDictionary(*ocd);
1972  }
1973 
1974  vc.erase(std::remove(vc.begin(), vc.end(), ocd->columnId), vc.end());
1975 
1976  delete ocd;
1977  }
1978  if (ncd) {
1979  // append columnId if its new and not phy geo
1980  if (vc.end() == std::find(vc.begin(), vc.end(), ncd->columnId)) {
1981  if (!ncd->isGeoPhyCol) {
1982  vc.push_back(ncd->columnId);
1983  }
1984  }
1985  }
1986  tds.insert(td);
1987  } else {
1988  if (ocd) {
1989  columnDescriptorMap_[ColumnKey(ocd->tableId, to_upper(ocd->columnName))] = ocd;
1990  columnDescriptorMapById_[ColumnIdKey(ocd->tableId, ocd->columnId)] = ocd;
1991  }
1992  // roll back the dict of new column
1993  if (ncd) {
1994  columnDescriptorMap_.erase(ColumnKey(ncd->tableId, to_upper(ncd->columnName)));
1995  columnDescriptorMapById_.erase(ColumnIdKey(ncd->tableId, ncd->columnId));
1996  if (nullptr == ocd ||
1997  ocd->columnType.get_comp_param() != ncd->columnType.get_comp_param()) {
1998  delDictionary(*ncd);
1999  }
2000  delete ncd;
2001  }
2002  }
2003  }
2004  columnDescriptorsForRoll.clear();
2005 
2006  if (forward) {
2007  for (const auto td : tds) {
2008  calciteMgr_->updateMetadata(currentDB_.dbName, td->tableName);
2009  }
2010  }
2011 }
2012 
2014  list<ColumnDescriptor>& columns) {
2015  const auto& col_ti = cd.columnType;
2016  if (IS_GEO(col_ti.get_type())) {
2017  switch (col_ti.get_type()) {
2018  case kPOINT: {
2019  ColumnDescriptor physical_cd_coords(true);
2020  physical_cd_coords.columnName = cd.columnName + "_coords";
2021  SQLTypeInfo coords_ti = SQLTypeInfo(kARRAY, col_ti.get_notnull());
2022  // Raw data: compressed/uncompressed coords
2023  coords_ti.set_subtype(kTINYINT);
2024  size_t unit_size;
2025  if (col_ti.get_compression() == kENCODING_GEOINT &&
2026  col_ti.get_comp_param() == 32) {
2027  unit_size = 4 * sizeof(int8_t);
2028  } else {
2029  CHECK(col_ti.get_compression() == kENCODING_NONE);
2030  unit_size = 8 * sizeof(int8_t);
2031  }
2032  coords_ti.set_size(2 * unit_size);
2033  physical_cd_coords.columnType = coords_ti;
2034  columns.push_back(physical_cd_coords);
2035 
2036  // If adding more physical columns - update SQLTypeInfo::get_physical_cols()
2037 
2038  break;
2039  }
2040  case kLINESTRING: {
2041  ColumnDescriptor physical_cd_coords(true);
2042  physical_cd_coords.columnName = cd.columnName + "_coords";
2043  SQLTypeInfo coords_ti = SQLTypeInfo(kARRAY, col_ti.get_notnull());
2044  // Raw data: compressed/uncompressed coords
2045  coords_ti.set_subtype(kTINYINT);
2046  physical_cd_coords.columnType = coords_ti;
2047  columns.push_back(physical_cd_coords);
2048 
2049  ColumnDescriptor physical_cd_bounds(true);
2050  physical_cd_bounds.columnName = cd.columnName + "_bounds";
2051  SQLTypeInfo bounds_ti = SQLTypeInfo(kARRAY, col_ti.get_notnull());
2052  bounds_ti.set_subtype(kDOUBLE);
2053  bounds_ti.set_size(4 * sizeof(double));
2054  physical_cd_bounds.columnType = bounds_ti;
2055  columns.push_back(physical_cd_bounds);
2056 
2057  // If adding more physical columns - update SQLTypeInfo::get_physical_cols()
2058 
2059  break;
2060  }
2061  case kPOLYGON: {
2062  ColumnDescriptor physical_cd_coords(true);
2063  physical_cd_coords.columnName = cd.columnName + "_coords";
2064  SQLTypeInfo coords_ti = SQLTypeInfo(kARRAY, col_ti.get_notnull());
2065  // Raw data: compressed/uncompressed coords
2066  coords_ti.set_subtype(kTINYINT);
2067  physical_cd_coords.columnType = coords_ti;
2068  columns.push_back(physical_cd_coords);
2069 
2070  ColumnDescriptor physical_cd_ring_sizes(true);
2071  physical_cd_ring_sizes.columnName = cd.columnName + "_ring_sizes";
2072  SQLTypeInfo ring_sizes_ti = SQLTypeInfo(kARRAY, col_ti.get_notnull());
2073  ring_sizes_ti.set_subtype(kINT);
2074  physical_cd_ring_sizes.columnType = ring_sizes_ti;
2075  columns.push_back(physical_cd_ring_sizes);
2076 
2077  ColumnDescriptor physical_cd_bounds(true);
2078  physical_cd_bounds.columnName = cd.columnName + "_bounds";
2079  SQLTypeInfo bounds_ti = SQLTypeInfo(kARRAY, col_ti.get_notnull());
2080  bounds_ti.set_subtype(kDOUBLE);
2081  bounds_ti.set_size(4 * sizeof(double));
2082  physical_cd_bounds.columnType = bounds_ti;
2083  columns.push_back(physical_cd_bounds);
2084 
2085  ColumnDescriptor physical_cd_render_group(true);
2086  physical_cd_render_group.columnName = cd.columnName + "_render_group";
2087  SQLTypeInfo render_group_ti = SQLTypeInfo(kINT, col_ti.get_notnull());
2088  physical_cd_render_group.columnType = render_group_ti;
2089  columns.push_back(physical_cd_render_group);
2090 
2091  // If adding more physical columns - update SQLTypeInfo::get_physical_cols()
2092 
2093  break;
2094  }
2095  case kMULTIPOLYGON: {
2096  ColumnDescriptor physical_cd_coords(true);
2097  physical_cd_coords.columnName = cd.columnName + "_coords";
2098  SQLTypeInfo coords_ti = SQLTypeInfo(kARRAY, col_ti.get_notnull());
2099  // Raw data: compressed/uncompressed coords
2100  coords_ti.set_subtype(kTINYINT);
2101  physical_cd_coords.columnType = coords_ti;
2102  columns.push_back(physical_cd_coords);
2103 
2104  ColumnDescriptor physical_cd_ring_sizes(true);
2105  physical_cd_ring_sizes.columnName = cd.columnName + "_ring_sizes";
2106  SQLTypeInfo ring_sizes_ti = SQLTypeInfo(kARRAY, col_ti.get_notnull());
2107  ring_sizes_ti.set_subtype(kINT);
2108  physical_cd_ring_sizes.columnType = ring_sizes_ti;
2109  columns.push_back(physical_cd_ring_sizes);
2110 
2111  ColumnDescriptor physical_cd_poly_rings(true);
2112  physical_cd_poly_rings.columnName = cd.columnName + "_poly_rings";
2113  SQLTypeInfo poly_rings_ti = SQLTypeInfo(kARRAY, col_ti.get_notnull());
2114  poly_rings_ti.set_subtype(kINT);
2115  physical_cd_poly_rings.columnType = poly_rings_ti;
2116  columns.push_back(physical_cd_poly_rings);
2117 
2118  ColumnDescriptor physical_cd_bounds(true);
2119  physical_cd_bounds.columnName = cd.columnName + "_bounds";
2120  SQLTypeInfo bounds_ti = SQLTypeInfo(kARRAY, col_ti.get_notnull());
2121  bounds_ti.set_subtype(kDOUBLE);
2122  bounds_ti.set_size(4 * sizeof(double));
2123  physical_cd_bounds.columnType = bounds_ti;
2124  columns.push_back(physical_cd_bounds);
2125 
2126  ColumnDescriptor physical_cd_render_group(true);
2127  physical_cd_render_group.columnName = cd.columnName + "_render_group";
2128  SQLTypeInfo render_group_ti = SQLTypeInfo(kINT, col_ti.get_notnull());
2129  physical_cd_render_group.columnType = render_group_ti;
2130  columns.push_back(physical_cd_render_group);
2131 
2132  // If adding more physical columns - update SQLTypeInfo::get_physical_cols()
2133 
2134  break;
2135  }
2136  default:
2137  throw runtime_error("Unrecognized geometry type.");
2138  break;
2139  }
2140  }
2141 }
2142 
2143 namespace {
2145  auto timing_type_entry =
2147  CHECK(timing_type_entry != foreign_table.options.end());
2148  if (timing_type_entry->second ==
2150  return foreign_storage::get_next_refresh_time(foreign_table.options);
2151  }
2153 }
2154 } // namespace
2155 
2157  TableDescriptor& td,
2158  const list<ColumnDescriptor>& cols,
2159  const std::vector<Parser::SharedDictionaryDef>& shared_dict_defs,
2160  bool isLogicalTable) {
2161  cat_write_lock write_lock(this);
2162  list<ColumnDescriptor> cds = cols;
2163  list<DictDescriptor> dds;
2164  std::set<std::string> toplevel_column_names;
2165  list<ColumnDescriptor> columns;
2166 
2167  if (!td.storageType.empty() &&
2170  throw std::runtime_error("Only temporary tables can be backed by foreign storage.");
2171  }
2173  }
2174 
2175  for (auto cd : cds) {
2176  if (cd.columnName == "rowid") {
2177  throw std::runtime_error(
2178  "Cannot create column with name rowid. rowid is a system defined column.");
2179  }
2180  columns.push_back(cd);
2181  toplevel_column_names.insert(cd.columnName);
2182  if (cd.columnType.is_geometry()) {
2183  expandGeoColumn(cd, columns);
2184  }
2185  }
2186  cds.clear();
2187 
2188  ColumnDescriptor cd;
2189  // add row_id column -- Must be last column in the table
2190  cd.columnName = "rowid";
2191  cd.isSystemCol = true;
2192  cd.columnType = SQLTypeInfo(kBIGINT, true);
2193 #ifdef MATERIALIZED_ROWID
2194  cd.isVirtualCol = false;
2195 #else
2196  cd.isVirtualCol = true;
2197  cd.virtualExpr = "MAPD_FRAG_ID * MAPD_ROWS_PER_FRAG + MAPD_FRAG_ROW_ID";
2198 #endif
2199  columns.push_back(cd);
2200  toplevel_column_names.insert(cd.columnName);
2201 
2202  if (td.hasDeletedCol) {
2203  ColumnDescriptor cd_del;
2204  cd_del.columnName = "$deleted$";
2205  cd_del.isSystemCol = true;
2206  cd_del.isVirtualCol = false;
2207  cd_del.columnType = SQLTypeInfo(kBOOLEAN, true);
2208  cd_del.isDeletedCol = true;
2209 
2210  columns.push_back(cd_del);
2211  }
2212 
2213  td.nColumns = columns.size();
2215  sqliteConnector_.query("BEGIN TRANSACTION");
2217  try {
2219  R"(INSERT INTO mapd_tables (name, userid, ncolumns, isview, fragments, frag_type, max_frag_rows, max_chunk_size, frag_page_size, max_rows, partitions, shard_column_id, shard, num_shards, sort_column_id, storage_type, key_metainfo) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?))",
2220  std::vector<std::string>{td.tableName,
2221  std::to_string(td.userId),
2223  std::to_string(td.isView),
2224  "",
2229  std::to_string(td.maxRows),
2230  td.partitions,
2232  std::to_string(td.shard),
2233  std::to_string(td.nShards),
2235  td.storageType,
2236  td.keyMetainfo});
2237 
2238  // now get the auto generated tableid
2240  "SELECT tableid FROM mapd_tables WHERE name = ?", td.tableName);
2241  td.tableId = sqliteConnector_.getData<int>(0, 0);
2242  int colId = 1;
2243  for (auto cd : columns) {
2245  const bool is_foreign_col =
2246  setColumnSharedDictionary(cd, cds, dds, td, shared_dict_defs);
2247  if (!is_foreign_col) {
2248  setColumnDictionary(cd, dds, td, isLogicalTable);
2249  }
2250  }
2251 
2252  if (toplevel_column_names.count(cd.columnName)) {
2253  // make up colId gap for sanity test (begin with 1 bc much code depends on it!)
2254  if (colId > 1) {
2255  colId += g_test_against_columnId_gap;
2256  }
2257  if (!cd.isGeoPhyCol) {
2258  td.columnIdBySpi_.push_back(colId);
2259  }
2260  }
2261 
2263  "INSERT INTO mapd_columns (tableid, columnid, name, coltype, colsubtype, "
2264  "coldim, colscale, is_notnull, "
2265  "compression, comp_param, size, chunks, is_systemcol, is_virtualcol, "
2266  "virtual_expr, is_deletedcol) "
2267  "VALUES (?, ?, ?, ?, ?, "
2268  "?, "
2269  "?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
2270  std::vector<std::string>{std::to_string(td.tableId),
2271  std::to_string(colId),
2272  cd.columnName,
2281  "",
2284  cd.virtualExpr,
2286  cd.tableId = td.tableId;
2287  cd.columnId = colId++;
2288  cds.push_back(cd);
2289  }
2290  if (td.isView) {
2292  "INSERT INTO mapd_views (tableid, sql) VALUES (?,?)",
2293  std::vector<std::string>{std::to_string(td.tableId), td.viewSQL});
2294  }
2296  auto& foreign_table = dynamic_cast<foreign_storage::ForeignTable&>(td);
2297  foreign_table.next_refresh_time = get_next_refresh_time(foreign_table);
2299  "INSERT INTO omnisci_foreign_tables (table_id, server_id, options, "
2300  "last_refresh_time, next_refresh_time) VALUES (?, ?, ?, ?, ?)",
2301  std::vector<std::string>{std::to_string(foreign_table.tableId),
2302  std::to_string(foreign_table.foreign_server->id),
2303  foreign_table.getOptionsAsJsonString(),
2304  std::to_string(foreign_table.last_refresh_time),
2305  std::to_string(foreign_table.next_refresh_time)});
2306  }
2307  } catch (std::exception& e) {
2308  sqliteConnector_.query("ROLLBACK TRANSACTION");
2309  throw;
2310  }
2311  } else { // Temporary table
2312  td.tableId = nextTempTableId_++;
2313  int colId = 1;
2314  for (auto cd : columns) {
2316  const bool is_foreign_col =
2317  setColumnSharedDictionary(cd, cds, dds, td, shared_dict_defs);
2318 
2319  if (!is_foreign_col) {
2320  // Create a new temporary dictionary
2321  std::string fileName("");
2322  std::string folderPath("");
2324  nextTempDictId_++;
2325  DictDescriptor dd(dict_ref,
2326  fileName,
2328  false,
2329  1,
2330  folderPath,
2331  true); // Is dictName (2nd argument) used?
2332  dds.push_back(dd);
2333  if (!cd.columnType.is_array()) {
2335  }
2336  cd.columnType.set_comp_param(dict_ref.dictId);
2337  }
2338  }
2339  if (toplevel_column_names.count(cd.columnName)) {
2340  // make up colId gap for sanity test (begin with 1 bc much code depends on it!)
2341  if (colId > 1) {
2342  colId += g_test_against_columnId_gap;
2343  }
2344  if (!cd.isGeoPhyCol) {
2345  td.columnIdBySpi_.push_back(colId);
2346  }
2347  }
2348  cd.tableId = td.tableId;
2349  cd.columnId = colId++;
2350  cds.push_back(cd);
2351  }
2352 
2354  serializeTableJsonUnlocked(&td, cds);
2355  }
2356  }
2357 
2358  try {
2359  addTableToMap(&td, cds, dds);
2360  calciteMgr_->updateMetadata(currentDB_.dbName, td.tableName);
2361  if (!td.storageType.empty() && td.storageType != StorageType::FOREIGN_TABLE) {
2363  }
2364  } catch (std::exception& e) {
2365  sqliteConnector_.query("ROLLBACK TRANSACTION");
2366  removeTableFromMap(td.tableName, td.tableId, true);
2367  throw;
2368  }
2369  sqliteConnector_.query("END TRANSACTION");
2370 }
2371 
2373  const std::list<ColumnDescriptor>& cds) const {
2374  // relies on the catalog write lock
2375  using namespace rapidjson;
2376 
2377  VLOG(1) << "Serializing temporary table " << td->tableName << " to JSON for Calcite.";
2378 
2379  const auto db_name = currentDB_.dbName;
2380  const auto file_path = table_json_filepath(basePath_, db_name);
2381 
2382  Document d;
2383  if (boost::filesystem::exists(file_path)) {
2384  // look for an existing file for this database
2385  std::ifstream reader(file_path.string());
2386  CHECK(reader.is_open());
2387  IStreamWrapper json_read_wrapper(reader);
2388  d.ParseStream(json_read_wrapper);
2389  } else {
2390  d.SetObject();
2391  }
2392  CHECK(d.IsObject());
2393  CHECK(!d.HasMember(StringRef(td->tableName.c_str())));
2394 
2395  Value table(kObjectType);
2396  table.AddMember(
2397  "name", Value().SetString(StringRef(td->tableName.c_str())), d.GetAllocator());
2398  table.AddMember("id", Value().SetInt(td->tableId), d.GetAllocator());
2399  table.AddMember("columns", Value(kArrayType), d.GetAllocator());
2400 
2401  for (const auto& cd : cds) {
2402  Value column(kObjectType);
2403  column.AddMember(
2404  "name", Value().SetString(StringRef(cd.columnName)), d.GetAllocator());
2405  column.AddMember("coltype",
2406  Value().SetInt(static_cast<int>(cd.columnType.get_type())),
2407  d.GetAllocator());
2408  column.AddMember("colsubtype",
2409  Value().SetInt(static_cast<int>(cd.columnType.get_subtype())),
2410  d.GetAllocator());
2411  column.AddMember(
2412  "coldim", Value().SetInt(cd.columnType.get_dimension()), d.GetAllocator());
2413  column.AddMember(
2414  "colscale", Value().SetInt(cd.columnType.get_scale()), d.GetAllocator());
2415  column.AddMember(
2416  "is_notnull", Value().SetBool(cd.columnType.get_notnull()), d.GetAllocator());
2417  column.AddMember("is_systemcol", Value().SetBool(cd.isSystemCol), d.GetAllocator());
2418  column.AddMember("is_virtualcol", Value().SetBool(cd.isVirtualCol), d.GetAllocator());
2419  column.AddMember("is_deletedcol", Value().SetBool(cd.isDeletedCol), d.GetAllocator());
2420  table["columns"].PushBack(column, d.GetAllocator());
2421  }
2422  d.AddMember(StringRef(td->tableName.c_str()), table, d.GetAllocator());
2423 
2424  // Overwrite the existing file
2425  std::ofstream writer(file_path.string(), std::ios::trunc | std::ios::out);
2426  CHECK(writer.is_open());
2427  OStreamWrapper json_wrapper(writer);
2428 
2429  Writer<OStreamWrapper> json_writer(json_wrapper);
2430  d.Accept(json_writer);
2431  writer.close();
2432 }
2433 
2434 void Catalog::dropTableFromJsonUnlocked(const std::string& table_name) const {
2435  // relies on the catalog write lock
2436  using namespace rapidjson;
2437 
2438  VLOG(1) << "Dropping temporary table " << table_name << " to JSON for Calcite.";
2439 
2440  const auto db_name = currentDB_.dbName;
2441  const auto file_path = table_json_filepath(basePath_, db_name);
2442 
2443  CHECK(boost::filesystem::exists(file_path));
2444  Document d;
2445 
2446  std::ifstream reader(file_path.string());
2447  CHECK(reader.is_open());
2448  IStreamWrapper json_read_wrapper(reader);
2449  d.ParseStream(json_read_wrapper);
2450 
2451  CHECK(d.IsObject());
2452  auto table_name_ref = StringRef(table_name.c_str());
2453  CHECK(d.HasMember(table_name_ref));
2454  CHECK(d.RemoveMember(table_name_ref));
2455 
2456  // Overwrite the existing file
2457  std::ofstream writer(file_path.string(), std::ios::trunc | std::ios::out);
2458  CHECK(writer.is_open());
2459  OStreamWrapper json_wrapper(writer);
2460 
2461  Writer<OStreamWrapper> json_writer(json_wrapper);
2462  d.Accept(json_writer);
2463  writer.close();
2464 }
2465 
2467  std::unique_ptr<foreign_storage::ForeignServer> foreign_server,
2468  bool if_not_exists) {
2469  cat_write_lock write_lock(this);
2471  createForeignServerNoLocks(std::move(foreign_server), if_not_exists);
2472 }
2473 
2475  std::unique_ptr<foreign_storage::ForeignServer> foreign_server,
2476  bool if_not_exists) {
2478  "SELECT name from omnisci_foreign_servers where name = ?",
2479  std::vector<std::string>{foreign_server->name});
2480 
2481  if (sqliteConnector_.getNumRows() == 0) {
2482  foreign_server->creation_time = std::time(nullptr);
2484  "INSERT INTO omnisci_foreign_servers (name, data_wrapper_type, owner_user_id, "
2485  "creation_time, "
2486  "options) "
2487  "VALUES (?, ?, ?, ?, ?)",
2488  std::vector<std::string>{foreign_server->name,
2489  foreign_server->data_wrapper_type,
2490  std::to_string(foreign_server->user_id),
2491  std::to_string(foreign_server->creation_time),
2492  foreign_server->getOptionsAsJsonString()});
2494  "SELECT id from omnisci_foreign_servers where name = ?",
2495  std::vector<std::string>{foreign_server->name});
2496  CHECK_EQ(sqliteConnector_.getNumRows(), size_t(1));
2497  foreign_server->id = sqliteConnector_.getData<int32_t>(0, 0);
2498  } else if (!if_not_exists) {
2499  throw std::runtime_error{"A foreign server with name \"" + foreign_server->name +
2500  "\" already exists."};
2501  }
2502 
2503  std::shared_ptr<foreign_storage::ForeignServer> foreign_server_shared =
2504  std::move(foreign_server);
2505  foreignServerMap_[foreign_server_shared->name] = foreign_server_shared;
2506  foreignServerMapById_[foreign_server_shared->id] = foreign_server_shared;
2507 }
2508 
2510  const std::string& server_name) const {
2511  foreign_storage::ForeignServer* foreign_server = nullptr;
2512  cat_read_lock read_lock(this);
2513  if (foreignServerMap_.find(server_name) != foreignServerMap_.end()) {
2514  foreign_server = foreignServerMap_.find(server_name)->second.get();
2515  }
2516  return foreign_server;
2517 }
2518 
2519 const std::unique_ptr<const foreign_storage::ForeignServer>
2520 Catalog::getForeignServerFromStorage(const std::string& server_name) {
2521  std::unique_ptr<foreign_storage::ForeignServer> foreign_server = nullptr;
2524  "SELECT id, name, data_wrapper_type, options, owner_user_id, creation_time "
2525  "FROM omnisci_foreign_servers WHERE name = ?",
2526  std::vector<std::string>{server_name});
2527  if (sqliteConnector_.getNumRows() > 0) {
2528  foreign_server = std::make_unique<foreign_storage::ForeignServer>(
2529  sqliteConnector_.getData<int>(0, 0),
2530  sqliteConnector_.getData<std::string>(0, 1),
2531  sqliteConnector_.getData<std::string>(0, 2),
2532  sqliteConnector_.getData<std::string>(0, 3),
2533  sqliteConnector_.getData<std::int32_t>(0, 4),
2534  sqliteConnector_.getData<std::int32_t>(0, 5));
2535  }
2536  return foreign_server;
2537 }
2538 
2539 const std::unique_ptr<const foreign_storage::ForeignTable>
2541  std::unique_ptr<foreign_storage::ForeignTable> foreign_table = nullptr;
2544  "SELECT table_id, server_id, options, last_refresh_time, next_refresh_time from "
2545  "omnisci_foreign_tables WHERE table_id = ?",
2546  std::vector<std::string>{to_string(table_id)});
2547  auto num_rows = sqliteConnector_.getNumRows();
2548  if (num_rows > 0) {
2549  CHECK_EQ(size_t(1), num_rows);
2550  foreign_table = std::make_unique<foreign_storage::ForeignTable>(
2551  sqliteConnector_.getData<int>(0, 0),
2552  foreignServerMapById_[sqliteConnector_.getData<int32_t>(0, 1)].get(),
2553  sqliteConnector_.getData<std::string>(0, 2),
2554  sqliteConnector_.getData<int>(0, 3),
2555  sqliteConnector_.getData<int>(0, 4));
2556  }
2557  return foreign_table;
2558 }
2559 
2560 void Catalog::changeForeignServerOwner(const std::string& server_name,
2561  const int new_owner_id) {
2562  cat_write_lock write_lock(this);
2563  foreign_storage::ForeignServer* foreign_server =
2564  foreignServerMap_.find(server_name)->second.get();
2565  CHECK(foreign_server);
2566  setForeignServerProperty(server_name, "owner_user_id", std::to_string(new_owner_id));
2567  // update in-memory server
2568  foreign_server->user_id = new_owner_id;
2569 }
2570 
2571 void Catalog::setForeignServerDataWrapper(const std::string& server_name,
2572  const std::string& data_wrapper) {
2573  cat_write_lock write_lock(this);
2574  auto data_wrapper_type = to_upper(data_wrapper);
2575  // update in-memory server
2576  foreign_storage::ForeignServer* foreign_server =
2577  foreignServerMap_.find(server_name)->second.get();
2578  CHECK(foreign_server);
2579  std::string saved_data_wrapper_type = foreign_server->data_wrapper_type;
2580  foreign_server->data_wrapper_type = data_wrapper_type;
2581  try {
2582  foreign_server->validate();
2583  } catch (const std::exception& e) {
2584  // validation did not succeed:
2585  // revert to saved data_wrapper_type & throw exception
2586  foreign_server->data_wrapper_type = saved_data_wrapper_type;
2587  throw;
2588  }
2589  setForeignServerProperty(server_name, "data_wrapper_type", data_wrapper_type);
2590 }
2591 
2592 void Catalog::setForeignServerOptions(const std::string& server_name,
2593  const std::string& options) {
2594  cat_write_lock write_lock(this);
2595  // update in-memory server
2596  foreign_storage::ForeignServer* foreign_server =
2597  foreignServerMap_.find(server_name)->second.get();
2598  CHECK(foreign_server);
2599  auto saved_options = foreign_server->options;
2600  foreign_server->populateOptionsMap(options, true);
2601  try {
2602  foreign_server->validate();
2603  } catch (const std::exception& e) {
2604  // validation did not succeed:
2605  // revert to saved options & throw exception
2606  foreign_server->options = saved_options;
2607  throw;
2608  }
2609  setForeignServerProperty(server_name, "options", options);
2610 }
2611 
2612 void Catalog::renameForeignServer(const std::string& server_name,
2613  const std::string& name) {
2614  cat_write_lock write_lock(this);
2615  auto foreign_server_it = foreignServerMap_.find(server_name);
2616  CHECK(foreign_server_it != foreignServerMap_.end());
2617  setForeignServerProperty(server_name, "name", name);
2618  auto foreign_server_shared = foreign_server_it->second;
2619  foreign_server_shared->name = name;
2620  foreignServerMap_[name] = foreign_server_shared;
2621  foreignServerMap_.erase(foreign_server_it);
2622 }
2623 
2624 void Catalog::dropForeignServer(const std::string& server_name) {
2625  cat_write_lock write_lock(this);
2627 
2629  "SELECT id from omnisci_foreign_servers where name = ?",
2630  std::vector<std::string>{server_name});
2631  auto num_rows = sqliteConnector_.getNumRows();
2632  if (num_rows > 0) {
2633  CHECK_EQ(size_t(1), num_rows);
2634  auto server_id = sqliteConnector_.getData<int32_t>(0, 0);
2636  "SELECT table_id from omnisci_foreign_tables where server_id = ?",
2637  std::to_string(server_id));
2638  if (sqliteConnector_.getNumRows() > 0) {
2639  throw std::runtime_error{"Foreign server \"" + server_name +
2640  "\" is referenced "
2641  "by existing foreign tables and cannot be dropped."};
2642  }
2643  sqliteConnector_.query("BEGIN TRANSACTION");
2644  try {
2646  "DELETE FROM omnisci_foreign_servers WHERE name = ?",
2647  std::vector<std::string>{server_name});
2648  } catch (const std::exception& e) {
2649  sqliteConnector_.query("ROLLBACK TRANSACTION");
2650  throw;
2651  }
2652  sqliteConnector_.query("END TRANSACTION");
2653  foreignServerMap_.erase(server_name);
2654  foreignServerMapById_.erase(server_id);
2655  }
2656 }
2657 
2659  const rapidjson::Value* filters,
2660  const UserMetadata& user,
2661  std::vector<const foreign_storage::ForeignServer*>& results) {
2662  sys_read_lock syscat_read_lock(&SysCatalog::instance());
2663  cat_read_lock read_lock(this);
2665  // Customer facing and internal SQlite names
2666  std::map<std::string, std::string> col_names{{"server_name", "name"},
2667  {"data_wrapper", "data_wrapper_type"},
2668  {"created_at", "creation_time"},
2669  {"options", "options"}};
2670 
2671  // TODO add "owner" when FSI privilege is implemented
2672  std::stringstream filter_string;
2673  std::vector<std::string> arguments;
2674 
2675  if (filters != nullptr) {
2676  // Create SQL WHERE clause for SQLite query
2677  int num_filters = 0;
2678  filter_string << " WHERE";
2679  for (auto& filter_def : filters->GetArray()) {
2680  if (num_filters > 0) {
2681  filter_string << " " << std::string(filter_def["chain"].GetString());
2682  ;
2683  }
2684 
2685  if (col_names.find(std::string(filter_def["attribute"].GetString())) ==
2686  col_names.end()) {
2687  throw std::runtime_error{"Attribute with name \"" +
2688  std::string(filter_def["attribute"].GetString()) +
2689  "\" does not exist."};
2690  }
2691 
2692  filter_string << " " << col_names[std::string(filter_def["attribute"].GetString())];
2693 
2694  bool equals_operator = false;
2695  if (std::strcmp(filter_def["operation"].GetString(), "EQUALS") == 0) {
2696  filter_string << " = ? ";
2697  equals_operator = true;
2698  } else {
2699  filter_string << " LIKE ? ";
2700  }
2701 
2702  bool timestamp_column =
2703  (std::strcmp(filter_def["attribute"].GetString(), "created_at") == 0);
2704 
2705  if (timestamp_column && !equals_operator) {
2706  throw std::runtime_error{"LIKE operator is incompatible with TIMESTAMP data"};
2707  }
2708 
2709  if (timestamp_column && equals_operator) {
2710  arguments.push_back(std::to_string(
2711  dateTimeParse<kTIMESTAMP>(filter_def["value"].GetString(), 0)));
2712  } else {
2713  arguments.push_back(filter_def["value"].GetString());
2714  }
2715 
2716  num_filters++;
2717  }
2718  }
2719  // Create select query for the omnisci_foreign_servers table
2720  std::string query = std::string("SELECT name from omnisci_foreign_servers ");
2721  query += filter_string.str();
2722 
2723  sqliteConnector_.query_with_text_params(query, arguments);
2724  auto num_rows = sqliteConnector_.getNumRows();
2725 
2726  if (sqliteConnector_.getNumRows() == 0)
2727  return;
2728 
2730  // Return pointers to objects
2731  results.reserve(num_rows);
2732  for (size_t row = 0; row < num_rows; ++row) {
2733  const foreign_storage::ForeignServer* foreign_server =
2734  getForeignServer(sqliteConnector_.getData<std::string>(row, 0));
2735 
2736  CHECK(foreign_server != nullptr);
2737 
2738  DBObject dbObject(foreign_server->name, ServerDBObjectType);
2739  dbObject.loadKey(*this);
2740  std::vector<DBObject> privObjects = {dbObject};
2741  if (!SysCatalog::instance().hasAnyPrivileges(user, privObjects)) {
2742  // skip server, as there are no privileges to access it
2743  continue;
2744  }
2745  results.push_back(foreign_server);
2746  }
2747 }
2748 
2749 // returns the table epoch or -1 if there is something wrong with the shared epoch
2750 int32_t Catalog::getTableEpoch(const int32_t db_id, const int32_t table_id) const {
2751  cat_read_lock read_lock(this);
2752  const auto physicalTableIt = logicalToPhysicalTableMapById_.find(table_id);
2753  if (physicalTableIt != logicalToPhysicalTableMapById_.end()) {
2754  // check all shards have same checkpoint
2755  const auto physicalTables = physicalTableIt->second;
2756  CHECK(!physicalTables.empty());
2757  size_t curr_epoch{0}, first_epoch{0};
2758  int32_t first_table_id{0};
2759  bool are_epochs_inconsistent{false};
2760  for (size_t i = 0; i < physicalTables.size(); i++) {
2761  int32_t physical_tb_id = physicalTables[i];
2762  const TableDescriptor* phys_td = getMetadataForTable(physical_tb_id);
2763  CHECK(phys_td);
2764 
2765  curr_epoch = dataMgr_->getTableEpoch(db_id, physical_tb_id);
2766  LOG(INFO) << "Got sharded table epoch for db id: " << db_id
2767  << ", table id: " << physical_tb_id << ", epoch: " << curr_epoch;
2768  if (i == 0) {
2769  first_epoch = curr_epoch;
2770  first_table_id = physical_tb_id;
2771  } else if (first_epoch != curr_epoch) {
2772  are_epochs_inconsistent = true;
2773  LOG(ERROR) << "Epochs on shards do not all agree on table id: " << table_id
2774  << ", db id: " << db_id
2775  << ". First table (table id: " << first_table_id
2776  << ") has epoch: " << first_epoch << ". Table id: " << physical_tb_id
2777  << ", has inconsistent epoch: " << curr_epoch
2778  << ". See previous INFO logs for all epochs and their table ids.";
2779  }
2780  }
2781  if (are_epochs_inconsistent) {
2782  // oh dear the shards do not agree on the epoch for this table
2783  return -1;
2784  }
2785  return curr_epoch;
2786  } else {
2787  auto epoch = dataMgr_->getTableEpoch(db_id, table_id);
2788  LOG(INFO) << "Got table epoch for db id: " << db_id << ", table id: " << table_id
2789  << ", epoch: " << epoch;
2790  return epoch;
2791  }
2792 }
2793 
2794 void Catalog::setTableEpoch(const int db_id, const int table_id, int new_epoch) {
2795  cat_read_lock read_lock(this);
2796  LOG(INFO) << "Set table epoch db:" << db_id << " Table ID " << table_id
2797  << " back to new epoch " << new_epoch;
2798  removeChunks(table_id);
2799  dataMgr_->setTableEpoch(db_id, table_id, new_epoch);
2800 
2801  // check if sharded
2802  const auto physicalTableIt = logicalToPhysicalTableMapById_.find(table_id);
2803  if (physicalTableIt != logicalToPhysicalTableMapById_.end()) {
2804  const auto physicalTables = physicalTableIt->second;
2805  CHECK(!physicalTables.empty());
2806  for (size_t i = 0; i < physicalTables.size(); i++) {
2807  int32_t physical_tb_id = physicalTables[i];
2808  const TableDescriptor* phys_td = getMetadataForTable(physical_tb_id);
2809  CHECK(phys_td);
2810  LOG(INFO) << "Set sharded table epoch db:" << db_id << " Table ID "
2811  << physical_tb_id << " back to new epoch " << new_epoch;
2812  removeChunks(physical_tb_id);
2813  dataMgr_->setTableEpoch(db_id, physical_tb_id, new_epoch);
2814  }
2815  }
2816 }
2817 
2818 std::vector<TableEpochInfo> Catalog::getTableEpochs(const int32_t db_id,
2819  const int32_t table_id) const {
2820  cat_read_lock read_lock(this);
2821  std::vector<TableEpochInfo> table_epochs;
2822  const auto physical_table_it = logicalToPhysicalTableMapById_.find(table_id);
2823  if (physical_table_it != logicalToPhysicalTableMapById_.end()) {
2824  const auto physical_tables = physical_table_it->second;
2825  CHECK(!physical_tables.empty());
2826 
2827  for (const auto physical_tb_id : physical_tables) {
2828  const auto phys_td = getMetadataForTableImpl(physical_tb_id, false);
2829  CHECK(phys_td);
2830 
2831  auto table_id = phys_td->tableId;
2832  auto epoch = dataMgr_->getTableEpoch(db_id, phys_td->tableId);
2833  table_epochs.emplace_back(table_id, epoch);
2834  LOG(INFO) << "Got sharded table epoch for db id: " << db_id
2835  << ", table id: " << table_id << ", epoch: " << epoch;
2836  }
2837  } else {
2838  auto epoch = dataMgr_->getTableEpoch(db_id, table_id);
2839  LOG(INFO) << "Got table epoch for db id: " << db_id << ", table id: " << table_id
2840  << ", epoch: " << epoch;
2841  table_epochs.emplace_back(table_id, epoch);
2842  }
2843  return table_epochs;
2844 }
2845 
2846 void Catalog::setTableEpochs(const int32_t db_id,
2847  const std::vector<TableEpochInfo>& table_epochs) {
2848  cat_read_lock read_lock(this);
2849  for (const auto& table_epoch_info : table_epochs) {
2850  removeChunks(table_epoch_info.table_id);
2851  dataMgr_->setTableEpoch(
2852  db_id, table_epoch_info.table_id, table_epoch_info.table_epoch);
2853  LOG(INFO) << "Set table epoch for db id: " << db_id
2854  << ", table id: " << table_epoch_info.table_id
2855  << ", back to epoch: " << table_epoch_info.table_epoch;
2856  }
2857 }
2858 
2859 namespace {
2860 std::string table_epochs_to_string(const std::vector<TableEpochInfo>& table_epochs) {
2861  std::string table_epochs_str{"["};
2862  bool first_entry{true};
2863  for (const auto& table_epoch : table_epochs) {
2864  if (first_entry) {
2865  first_entry = false;
2866  } else {
2867  table_epochs_str += ", ";
2868  }
2869  table_epochs_str += "(table_id: " + std::to_string(table_epoch.table_id) +
2870  ", epoch: " + std::to_string(table_epoch.table_epoch) + ")";
2871  }
2872  table_epochs_str += "]";
2873  return table_epochs_str;
2874 }
2875 } // namespace
2876 
2878  const int32_t db_id,
2879  const std::vector<TableEpochInfo>& table_epochs) {
2880  try {
2881  setTableEpochs(db_id, table_epochs);
2882  } catch (std::exception& e) {
2883  LOG(ERROR) << "An error occurred when attempting to set table epochs. DB id: "
2884  << db_id << ", Table epochs: " << table_epochs_to_string(table_epochs)
2885  << ", Error: " << e.what();
2886  }
2887 }
2888 
2890  cat_read_lock read_lock(this);
2891  const auto it = deletedColumnPerTable_.find(td);
2892  return it != deletedColumnPerTable_.end() ? it->second : nullptr;
2893 }
2894 
2896  int delete_column_id) const {
2897  // check if there are rows deleted by examining the deletedColumn metadata
2898  CHECK(td);
2899 
2900  if (table_is_temporary(td)) {
2901  auto fragmenter = td->fragmenter;
2902  CHECK(fragmenter);
2903  return fragmenter->hasDeletedRows(delete_column_id);
2904  } else {
2905  ChunkKey chunk_key_prefix = {currentDB_.dbId, td->tableId, delete_column_id};
2906  ChunkMetadataVector chunk_metadata_vec;
2907  dataMgr_->getChunkMetadataVecForKeyPrefix(chunk_metadata_vec, chunk_key_prefix);
2908  int64_t chunk_max{0};
2909 
2910  for (auto chunk_metadata : chunk_metadata_vec) {
2911  chunk_max = chunk_metadata.second->chunkStats.max.tinyintval;
2912  // delete has occured
2913  if (chunk_max == 1) {
2914  return true;
2915  }
2916  }
2917  return false;
2918  }
2919 }
2920 
2922  const TableDescriptor* td) const {
2923  cat_read_lock read_lock(this);
2924 
2925  const auto it = deletedColumnPerTable_.find(td);
2926  // if not a table that supports delete return nullptr, nothing more to do
2927  if (it == deletedColumnPerTable_.end()) {
2928  return nullptr;
2929  }
2930  const ColumnDescriptor* cd = it->second;
2931 
2932  const auto physicalTableIt = logicalToPhysicalTableMapById_.find(td->tableId);
2933 
2934  if (physicalTableIt != logicalToPhysicalTableMapById_.end()) {
2935  // check all shards
2936  const auto physicalTables = physicalTableIt->second;
2937  CHECK(!physicalTables.empty());
2938  for (size_t i = 0; i < physicalTables.size(); i++) {
2939  int32_t physical_tb_id = physicalTables[i];
2940  const TableDescriptor* phys_td = getMetadataForTable(physical_tb_id);
2941  CHECK(phys_td);
2942  if (checkMetadataForDeletedRecs(phys_td, cd->columnId)) {
2943  return cd;
2944  }
2945  }
2946  } else {
2947  if (checkMetadataForDeletedRecs(td, cd->columnId)) {
2948  return cd;
2949  }
2950  }
2951  // no deletes so far recorded in metadata
2952  return nullptr;
2953 }
2954 
2956  cat_write_lock write_lock(this);
2957  setDeletedColumnUnlocked(td, cd);
2958 }
2959 
2961  const ColumnDescriptor* cd) {
2962  cat_write_lock write_lock(this);
2963  const auto it_ok = deletedColumnPerTable_.emplace(td, cd);
2964  CHECK(it_ok.second);
2965 }
2966 
2967 namespace {
2968 
2970  const Catalog& cat,
2971  const Parser::SharedDictionaryDef& shared_dict_def) {
2972  const auto& table_name = shared_dict_def.get_foreign_table();
2973  const auto td = cat.getMetadataForTable(table_name);
2974  CHECK(td);
2975  const auto& foreign_col_name = shared_dict_def.get_foreign_column();
2976  return cat.getMetadataForColumn(td->tableId, foreign_col_name);
2977 }
2978 
2979 } // namespace
2980 
2982  Parser::SharedDictionaryDef shared_dict_def,
2983  const bool persist_reference) {
2984  cat_write_lock write_lock(this);
2985  const auto foreign_ref_col = get_foreign_col(*this, shared_dict_def);
2986  CHECK(foreign_ref_col);
2987  referencing_column.columnType = foreign_ref_col->columnType;
2988  const int dict_id = referencing_column.columnType.get_comp_param();
2989  const DictRef dict_ref(currentDB_.dbId, dict_id);
2990  const auto dictIt = dictDescriptorMapByRef_.find(dict_ref);
2991  CHECK(dictIt != dictDescriptorMapByRef_.end());
2992  const auto& dd = dictIt->second;
2993  CHECK_GE(dd->refcount, 1);
2994  ++dd->refcount;
2995  if (persist_reference) {
2998  "UPDATE mapd_dictionaries SET refcount = refcount + 1 WHERE dictid = ?",
2999  {std::to_string(dict_id)});
3000  }
3001 }
3002 
3004  ColumnDescriptor& cd,
3005  std::list<ColumnDescriptor>& cdd,
3006  std::list<DictDescriptor>& dds,
3007  const TableDescriptor td,
3008  const std::vector<Parser::SharedDictionaryDef>& shared_dict_defs) {
3009  cat_write_lock write_lock(this);
3011 
3012  if (shared_dict_defs.empty()) {
3013  return false;
3014  }
3015  for (const auto& shared_dict_def : shared_dict_defs) {
3016  // check if the current column is a referencing column
3017  const auto& column = shared_dict_def.get_column();
3018  if (cd.columnName == column) {
3019  if (!shared_dict_def.get_foreign_table().compare(td.tableName)) {
3020  // Dictionaries are being shared in table to be created
3021  const auto& ref_column = shared_dict_def.get_foreign_column();
3022  auto colIt =
3023  std::find_if(cdd.begin(), cdd.end(), [ref_column](const ColumnDescriptor it) {
3024  return !ref_column.compare(it.columnName);
3025  });
3026  CHECK(colIt != cdd.end());
3027  cd.columnType = colIt->columnType;
3028 
3029  const int dict_id = colIt->columnType.get_comp_param();
3030  CHECK_GE(dict_id, 1);
3031  auto dictIt = std::find_if(
3032  dds.begin(), dds.end(), [this, dict_id](const DictDescriptor it) {
3033  return it.dictRef.dbId == this->currentDB_.dbId &&
3034  it.dictRef.dictId == dict_id;
3035  });
3036  if (dictIt != dds.end()) {
3037  // There exists dictionary definition of a dictionary column
3038  CHECK_GE(dictIt->refcount, 1);
3039  ++dictIt->refcount;
3040  if (!table_is_temporary(&td)) {
3041  // Persist reference count
3043  "UPDATE mapd_dictionaries SET refcount = refcount + 1 WHERE dictid = ?",
3044  {std::to_string(dict_id)});
3045  }
3046  } else {
3047  // The dictionary is referencing a column which is referencing a column in
3048  // diffrent table
3049  auto root_dict_def = compress_reference_path(shared_dict_def, shared_dict_defs);
3050  addReferenceToForeignDict(cd, root_dict_def, !table_is_temporary(&td));
3051  }
3052  } else {
3053  const auto& foreign_table_name = shared_dict_def.get_foreign_table();
3054  const auto foreign_td = getMetadataForTable(foreign_table_name, false);
3055  if (table_is_temporary(foreign_td)) {
3056  if (!table_is_temporary(&td)) {
3057  throw std::runtime_error(
3058  "Only temporary tables can share dictionaries with other temporary "
3059  "tables.");
3060  }
3061  addReferenceToForeignDict(cd, shared_dict_def, false);
3062  } else {
3063  addReferenceToForeignDict(cd, shared_dict_def, !table_is_temporary(&td));
3064  }
3065  }
3066  return true;
3067  }
3068  }
3069  return false;
3070 }
3071 
3073  std::list<DictDescriptor>& dds,
3074  const TableDescriptor& td,
3075  const bool isLogicalTable) {
3076  cat_write_lock write_lock(this);
3077 
3078  std::string dictName{"Initial_key"};
3079  int dictId{0};
3080  std::string folderPath;
3081  if (isLogicalTable) {
3083 
3085  "INSERT INTO mapd_dictionaries (name, nbits, is_shared, refcount) VALUES (?, ?, "
3086  "?, 1)",
3087  std::vector<std::string>{
3088  dictName, std::to_string(cd.columnType.get_comp_param()), "0"});
3090  "SELECT dictid FROM mapd_dictionaries WHERE name = ?", dictName);
3091  dictId = sqliteConnector_.getData<int>(0, 0);
3092  dictName = td.tableName + "_" + cd.columnName + "_dict" + std::to_string(dictId);
3094  "UPDATE mapd_dictionaries SET name = ? WHERE name = 'Initial_key'", dictName);
3095  folderPath = basePath_ + "/mapd_data/DB_" + std::to_string(currentDB_.dbId) +
3096  "_DICT_" + std::to_string(dictId);
3097  }
3099  dictId,
3100  dictName,
3102  false,
3103  1,
3104  folderPath,
3105  false);
3106  dds.push_back(dd);
3107  if (!cd.columnType.is_array()) {
3109  }
3110  cd.columnType.set_comp_param(dictId);
3111 }
3112 
3114  TableDescriptor& td,
3115  const list<ColumnDescriptor>& cols,
3116  const std::vector<Parser::SharedDictionaryDef>& shared_dict_defs) {
3117  cat_write_lock write_lock(this);
3118 
3119  /* create logical table */
3120  TableDescriptor* tdl = &td;
3121  createTable(*tdl, cols, shared_dict_defs, true); // create logical table
3122  int32_t logical_tb_id = tdl->tableId;
3123 
3124  /* create physical tables and link them to the logical table */
3125  std::vector<int32_t> physicalTables;
3126  for (int32_t i = 1; i <= td.nShards; i++) {
3127  TableDescriptor* tdp = &td;
3129  tdp->shard = i - 1;
3130  createTable(*tdp, cols, shared_dict_defs, false); // create physical table
3131  int32_t physical_tb_id = tdp->tableId;
3132 
3133  /* add physical table to the vector of physical tables */
3134  physicalTables.push_back(physical_tb_id);
3135  }
3136 
3137  if (!physicalTables.empty()) {
3138  /* add logical to physical tables correspondence to the map */
3139  const auto it_ok =
3140  logicalToPhysicalTableMapById_.emplace(logical_tb_id, physicalTables);
3141  CHECK(it_ok.second);
3142  /* update sqlite mapd_logical_to_physical in sqlite database */
3143  if (!table_is_temporary(&td)) {
3144  updateLogicalToPhysicalTableMap(logical_tb_id);
3145  }
3146  }
3147 }
3148 
3150  cat_write_lock write_lock(this);
3151 
3152  const auto physicalTableIt = logicalToPhysicalTableMapById_.find(td->tableId);
3153  if (physicalTableIt != logicalToPhysicalTableMapById_.end()) {
3154  // truncate all corresponding physical tables if this is a logical table
3155  const auto physicalTables = physicalTableIt->second;
3156  CHECK(!physicalTables.empty());
3157  for (size_t i = 0; i < physicalTables.size(); i++) {
3158  int32_t physical_tb_id = physicalTables[i];
3159  const TableDescriptor* phys_td = getMetadataForTable(physical_tb_id);
3160  CHECK(phys_td);
3161  doTruncateTable(phys_td);
3162  }
3163  }
3164  doTruncateTable(td);
3165 }
3166 
3168  cat_write_lock write_lock(this);
3169 
3170  const int tableId = td->tableId;
3171  // must destroy fragmenter before deleteChunks is called.
3172  if (td->fragmenter != nullptr) {
3173  auto tableDescIt = tableDescriptorMapById_.find(tableId);
3174  CHECK(tableDescIt != tableDescriptorMapById_.end());
3175  tableDescIt->second->fragmenter = nullptr;
3176  CHECK(td->fragmenter == nullptr);
3177  }
3178  ChunkKey chunkKeyPrefix = {currentDB_.dbId, tableId};
3179  // assuming deleteChunksWithPrefix is atomic
3180  dataMgr_->deleteChunksWithPrefix(chunkKeyPrefix, MemoryLevel::CPU_LEVEL);
3181  dataMgr_->deleteChunksWithPrefix(chunkKeyPrefix, MemoryLevel::GPU_LEVEL);
3182 
3183  dataMgr_->removeTableRelatedDS(currentDB_.dbId, tableId);
3184 
3185  std::unique_ptr<StringDictionaryClient> client;
3186  if (SysCatalog::instance().isAggregator()) {
3187  CHECK(!string_dict_hosts_.empty());
3188  DictRef dict_ref(currentDB_.dbId, -1);
3189  client.reset(new StringDictionaryClient(string_dict_hosts_.front(), dict_ref, true));
3190  }
3191  // clean up any dictionaries
3192  // delete all column descriptors for the table
3193  for (const auto& columnDescriptor : columnDescriptorMapById_) {
3194  auto cd = columnDescriptor.second;
3195  if (cd->tableId != td->tableId) {
3196  continue;
3197  }
3198  const int dict_id = cd->columnType.get_comp_param();
3199  // Dummy dictionaries created for a shard of a logical table have the id set to zero.
3200  if (cd->columnType.get_compression() == kENCODING_DICT && dict_id) {
3201  const DictRef dict_ref(currentDB_.dbId, dict_id);
3202  const auto dictIt = dictDescriptorMapByRef_.find(dict_ref);
3203  CHECK(dictIt != dictDescriptorMapByRef_.end());
3204  const auto& dd = dictIt->second;
3205  CHECK_GE(dd->refcount, 1);
3206  // if this is the only table using this dict reset the dict
3207  if (dd->refcount == 1) {
3208  // close the dictionary
3209  dd->stringDict.reset();
3210  File_Namespace::renameForDelete(dd->dictFolderPath);
3211  if (client) {
3212  client->drop(dd->dictRef);
3213  }
3214  if (!dd->dictIsTemp) {
3215  boost::filesystem::create_directory(dd->dictFolderPath);
3216  }
3217  }
3218 
3219  DictDescriptor* new_dd = new DictDescriptor(dd->dictRef,
3220  dd->dictName,
3221  dd->dictNBits,
3222  dd->dictIsShared,
3223  dd->refcount,
3224  dd->dictFolderPath,
3225  dd->dictIsTemp);
3226  dictDescriptorMapByRef_.erase(dictIt);
3227  // now create new Dict -- need to figure out what to do here for temp tables
3228  if (client) {
3229  client->create(new_dd->dictRef, new_dd->dictIsTemp);
3230  }
3231  dictDescriptorMapByRef_[new_dd->dictRef].reset(new_dd);
3233  }
3234  }
3235 }
3236 
3237 void Catalog::removeFragmenterForTable(const int table_id) {
3238  cat_write_lock write_lock(this);
3239  auto td = getMetadataForTable(table_id, false);
3240  if (td->fragmenter != nullptr) {
3241  auto tableDescIt = tableDescriptorMapById_.find(table_id);
3242  CHECK(tableDescIt != tableDescriptorMapById_.end());
3243  tableDescIt->second->fragmenter = nullptr;
3244  CHECK(td->fragmenter == nullptr);
3245  }
3246 }
3247 
3248 // used by rollback_table_epoch to clean up in memory artifacts after a rollback
3249 void Catalog::removeChunks(const int table_id) {
3250  auto td = getMetadataForTable(table_id);
3251  CHECK(td);
3252 
3253  if (td->fragmenter != nullptr) {
3255  if (td->fragmenter != nullptr) {
3256  auto tableDescIt = tableDescriptorMapById_.find(table_id);
3257  CHECK(tableDescIt != tableDescriptorMapById_.end());
3258  tableDescIt->second->fragmenter = nullptr;
3259  CHECK(td->fragmenter == nullptr);
3260  }
3261  }
3262 
3263  // remove the chunks from in memory structures
3264  ChunkKey chunkKey = {currentDB_.dbId, table_id};
3265 
3266  dataMgr_->deleteChunksWithPrefix(chunkKey, MemoryLevel::CPU_LEVEL);
3267  dataMgr_->deleteChunksWithPrefix(chunkKey, MemoryLevel::GPU_LEVEL);
3268 }
3269 
3273  cat_write_lock write_lock(this);
3275  const auto physicalTableIt = logicalToPhysicalTableMapById_.find(td->tableId);
3276  sqliteConnector_.query("BEGIN TRANSACTION");
3277  try {
3278  if (physicalTableIt != logicalToPhysicalTableMapById_.end()) {
3279  // remove all corresponding physical tables if this is a logical table
3280  const auto physicalTables = physicalTableIt->second;
3281  CHECK(!physicalTables.empty());
3282  for (size_t i = 0; i < physicalTables.size(); i++) {
3283  int32_t physical_tb_id = physicalTables[i];
3284  const TableDescriptor* phys_td = getMetadataForTable(physical_tb_id);
3285  CHECK(phys_td);
3286  doDropTable(phys_td);
3287  }
3288 
3289  // remove corresponding record from the logicalToPhysicalTableMap in sqlite database
3291  "DELETE FROM mapd_logical_to_physical WHERE logical_table_id = ?",
3292  std::to_string(td->tableId));
3294  }
3295  doDropTable(td);
3296  } catch (std::exception& e) {
3297  sqliteConnector_.query("ROLLBACK TRANSACTION");
3298  throw;
3299  }
3300  sqliteConnector_.query("END TRANSACTION");
3301 }
3302 
3307  }
3309 }
3310 
3312  const int tableId = td->tableId;
3313  sqliteConnector_.query_with_text_param("DELETE FROM mapd_tables WHERE tableid = ?",
3314  std::to_string(tableId));
3316  "select comp_param from mapd_columns where compression = ? and tableid = ?",
3317  std::vector<std::string>{std::to_string(kENCODING_DICT), std::to_string(tableId)});
3318  int numRows = sqliteConnector_.getNumRows();
3319  std::vector<int> dict_id_list;
3320  for (int r = 0; r < numRows; ++r) {
3321  dict_id_list.push_back(sqliteConnector_.getData<int>(r, 0));
3322  }
3323  for (auto dict_id : dict_id_list) {
3325  "UPDATE mapd_dictionaries SET refcount = refcount - 1 WHERE dictid = ?",
3326  std::vector<std::string>{std::to_string(dict_id)});
3327  }
3329  "DELETE FROM mapd_dictionaries WHERE dictid in (select comp_param from "
3330  "mapd_columns where compression = ? "
3331  "and tableid = ?) and refcount = 0",
3332  std::vector<std::string>{std::to_string(kENCODING_DICT), std::to_string(tableId)});
3333  sqliteConnector_.query_with_text_param("DELETE FROM mapd_columns WHERE tableid = ?",
3334  std::to_string(tableId));
3335  if (td->isView) {
3336  sqliteConnector_.query_with_text_param("DELETE FROM mapd_views WHERE tableid = ?",
3337  std::to_string(tableId));
3338  }
3341  "DELETE FROM omnisci_foreign_tables WHERE table_id = ?", std::to_string(tableId));
3342  }
3343 }
3344 
3345 void Catalog::renamePhysicalTable(const TableDescriptor* td, const string& newTableName) {
3346  cat_write_lock write_lock(this);
3348 
3349  sqliteConnector_.query("BEGIN TRANSACTION");
3350  try {
3352  "UPDATE mapd_tables SET name = ? WHERE tableid = ?",
3353  std::vector<std::string>{newTableName, std::to_string(td->tableId)});
3354  } catch (std::exception& e) {
3355  sqliteConnector_.query("ROLLBACK TRANSACTION");
3356  throw;
3357  }
3358  sqliteConnector_.query("END TRANSACTION");
3359  TableDescriptorMap::iterator tableDescIt =
3361  CHECK(tableDescIt != tableDescriptorMap_.end());
3362  calciteMgr_->updateMetadata(currentDB_.dbName, td->tableName);
3363  // Get table descriptor to change it
3364  TableDescriptor* changeTd = tableDescIt->second;
3365  changeTd->tableName = newTableName;
3366  tableDescriptorMap_.erase(tableDescIt); // erase entry under old name
3367  tableDescriptorMap_[to_upper(newTableName)] = changeTd;
3368  calciteMgr_->updateMetadata(currentDB_.dbName, td->tableName);
3369 }
3370 
3371 void Catalog::renameTable(const TableDescriptor* td, const string& newTableName) {
3372  {
3373  cat_write_lock write_lock(this);
3375  // rename all corresponding physical tables if this is a logical table
3376  const auto physicalTableIt = logicalToPhysicalTableMapById_.find(td->tableId);
3377  if (physicalTableIt != logicalToPhysicalTableMapById_.end()) {
3378  const auto physicalTables = physicalTableIt->second;
3379  CHECK(!physicalTables.empty());
3380  for (size_t i = 0; i < physicalTables.size(); i++) {
3381  int32_t physical_tb_id = physicalTables[i];
3382  const TableDescriptor* phys_td = getMetadataForTable(physical_tb_id);
3383  CHECK(phys_td);
3384  std::string newPhysTableName =
3385  generatePhysicalTableName(newTableName, static_cast<int32_t>(i + 1));
3386  renamePhysicalTable(phys_td, newPhysTableName);
3387  }
3388  }
3389  renamePhysicalTable(td, newTableName);
3390  }
3391  {
3392  DBObject object(newTableName, TableDBObjectType);
3393  // update table name in direct and effective priv map
3394  DBObjectKey key;
3395  key.dbId = currentDB_.dbId;
3396  key.objectId = td->tableId;
3397  key.permissionType = static_cast<int>(DBObjectType::TableDBObjectType);
3398  object.setObjectKey(key);
3399  auto objdescs = SysCatalog::instance().getMetadataForObject(
3400  currentDB_.dbId, static_cast<int>(DBObjectType::TableDBObjectType), td->tableId);
3401  for (auto obj : objdescs) {
3402  Grantee* grnt = SysCatalog::instance().getGrantee(obj->roleName);
3403  if (grnt) {
3404  grnt->renameDbObject(object);
3405  }
3406  }
3408  }
3409 }
3410 
3412  const ColumnDescriptor* cd,
3413  const string& newColumnName) {
3414  cat_write_lock write_lock(this);
3416  sqliteConnector_.query("BEGIN TRANSACTION");
3417  try {
3418  for (int i = 0; i <= cd->columnType.get_physical_cols(); ++i) {
3419  auto cdx = getMetadataForColumn(td->tableId, cd->columnId + i);
3420  CHECK(cdx);
3421  std::string new_column_name = cdx->columnName;
3422  new_column_name.replace(0, cd->columnName.size(), newColumnName);
3424  "UPDATE mapd_columns SET name = ? WHERE tableid = ? AND columnid = ?",
3425  std::vector<std::string>{new_column_name,
3426  std::to_string(td->tableId),
3427  std::to_string(cdx->columnId)});
3428  }
3429  } catch (std::exception& e) {
3430  sqliteConnector_.query("ROLLBACK TRANSACTION");
3431  throw;
3432  }
3433  sqliteConnector_.query("END TRANSACTION");
3434  calciteMgr_->updateMetadata(currentDB_.dbName, td->tableName);
3435  for (int i = 0; i <= cd->columnType.get_physical_cols(); ++i) {
3436  auto cdx = getMetadataForColumn(td->tableId, cd->columnId + i);
3437  CHECK(cdx);
3438  ColumnDescriptorMap::iterator columnDescIt = columnDescriptorMap_.find(
3439  std::make_tuple(td->tableId, to_upper(cdx->columnName)));
3440  CHECK(columnDescIt != columnDescriptorMap_.end());
3441  ColumnDescriptor* changeCd = columnDescIt->second;
3442  changeCd->columnName.replace(0, cd->columnName.size(), newColumnName);
3443  columnDescriptorMap_.erase(columnDescIt); // erase entry under old name
3444  columnDescriptorMap_[std::make_tuple(td->tableId, to_upper(changeCd->columnName))] =
3445  changeCd;
3446  }
3447  calciteMgr_->updateMetadata(currentDB_.dbName, td->tableName);
3448 }
3449 
3451  cat_write_lock write_lock(this);
3453  sqliteConnector_.query("BEGIN TRANSACTION");
3454  try {
3455  // TODO(andrew): this should be an upsert
3457  "SELECT id FROM mapd_dashboards WHERE name = ? and userid = ?",
3458  std::vector<std::string>{vd.dashboardName, std::to_string(vd.userId)});
3459  if (sqliteConnector_.getNumRows() > 0) {
3461  "UPDATE mapd_dashboards SET state = ?, image_hash = ?, metadata = ?, "
3462  "update_time = "
3463  "datetime('now') where name = ? "
3464  "and userid = ?",
3465  std::vector<std::string>{vd.dashboardState,
3466  vd.imageHash,
3467  vd.dashboardMetadata,
3468  vd.dashboardName,
3469  std::to_string(vd.userId)});
3470  } else {
3472  "INSERT INTO mapd_dashboards (name, state, image_hash, metadata, "
3473  "update_time, "
3474  "userid) "
3475  "VALUES "
3476  "(?,?,?,?, "
3477  "datetime('now'), ?)",
3478  std::vector<std::string>{vd.dashboardName,
3479  vd.dashboardState,
3480  vd.imageHash,
3481  vd.dashboardMetadata,
3482  std::to_string(vd.userId)});
3483  }
3484  } catch (std::exception& e) {
3485  sqliteConnector_.query("ROLLBACK TRANSACTION");
3486  throw;
3487  }
3488  sqliteConnector_.query("END TRANSACTION");
3489 
3490  // now get the auto generated dashboardId
3491  try {
3493  "SELECT id, strftime('%Y-%m-%dT%H:%M:%SZ', update_time) FROM mapd_dashboards "
3494  "WHERE name = ? and userid = ?",
3495  std::vector<std::string>{vd.dashboardName, std::to_string(vd.userId)});
3496  vd.dashboardId = sqliteConnector_.getData<int>(0, 0);
3497  vd.updateTime = sqliteConnector_.getData<std::string>(0, 1);
3498  } catch (std::exception& e) {
3499  throw;
3500  }
3504  sqlite_lock.unlock();
3505  write_lock.unlock();
3506  // NOTE(wamsi): Transactionally unsafe
3509  return vd.dashboardId;
3510 }
3511 
3513  cat_write_lock write_lock(this);
3515 
3516  sqliteConnector_.query("BEGIN TRANSACTION");
3517  try {
3519  "SELECT id FROM mapd_dashboards WHERE id = ?",
3520  std::vector<std::string>{std::to_string(vd.dashboardId)});
3521  if (sqliteConnector_.getNumRows() > 0) {
3523  "UPDATE mapd_dashboards SET name = ?, state = ?, image_hash = ?, metadata = ?, "
3524  "userid = ?, update_time = datetime('now') where id = ? ",
3525  std::vector<std::string>{vd.dashboardName,
3526  vd.dashboardState,
3527  vd.imageHash,
3528  vd.dashboardMetadata,
3529  std::to_string(vd.userId),
3531  } else {
3532  LOG(ERROR) << "Error replacing dashboard id " << vd.dashboardId
3533  << " does not exist in db";
3534  throw runtime_error("Error replacing dashboard id " +
3535  std::to_string(vd.dashboardId) + " does not exist in db");
3536  }
3537  } catch (std::exception& e) {
3538  sqliteConnector_.query("ROLLBACK TRANSACTION");
3539  throw;
3540  }
3541  sqliteConnector_.query("END TRANSACTION");
3542 
3543  bool found{false};
3544  for (auto descp : dashboardDescriptorMap_) {
3545  auto dash = descp.second.get();
3546  if (dash->dashboardId == vd.dashboardId) {
3547  found = true;
3548  auto viewDescIt = dashboardDescriptorMap_.find(std::to_string(dash->userId) + ":" +
3549  dash->dashboardName);
3550  if (viewDescIt ==
3551  dashboardDescriptorMap_.end()) { // check to make sure view exists
3552  LOG(ERROR) << "No metadata for dashboard for user " << dash->userId
3553  << " dashboard " << dash->dashboardName << " does not exist in map";
3554  throw runtime_error("No metadata for dashboard for user " +
3555  std::to_string(dash->userId) + " dashboard " +
3556  dash->dashboardName + " does not exist in map");
3557  }
3558  dashboardDescriptorMap_.erase(viewDescIt);
3559  break;
3560  }
3561  }
3562  if (!found) {
3563  LOG(ERROR) << "Error replacing dashboard id " << vd.dashboardId
3564  << " does not exist in map";
3565  throw runtime_error("Error replacing dashboard id " + std::to_string(vd.dashboardId) +
3566  " does not exist in map");
3567  }
3568 
3569  // now reload the object
3571  "SELECT id, strftime('%Y-%m-%dT%H:%M:%SZ', update_time) FROM "
3572  "mapd_dashboards "
3573  "WHERE id = ?",
3574  std::vector<std::string>{std::to_string(vd.dashboardId)});
3575  vd.updateTime = sqliteConnector_.getData<string>(0, 1);
3579  sqlite_lock.unlock();
3580  write_lock.unlock();
3581  // NOTE(wamsi): Transactionally unsafe
3584 }
3585 
3586 std::string Catalog::calculateSHA1(const std::string& data) {
3587  boost::uuids::detail::sha1 sha1;
3588  unsigned int digest[5];
3589  sha1.process_bytes(data.c_str(), data.length());
3590  sha1.get_digest(digest);
3591  std::stringstream ss;
3592  for (size_t i = 0; i < 5; i++) {
3593  ss << std::hex << digest[i];
3594  }
3595  return ss.str();
3596 }
3597 
3598 std::string Catalog::createLink(LinkDescriptor& ld, size_t min_length) {
3599  cat_write_lock write_lock(this);
3601  sqliteConnector_.query("BEGIN TRANSACTION");
3602  try {
3604  .substr(0, 8);
3606  "SELECT linkid FROM mapd_links WHERE link = ? and userid = ?",
3607  std::vector<std::string>{ld.link, std::to_string(ld.userId)});
3608  if (sqliteConnector_.getNumRows() > 0) {
3610  "UPDATE mapd_links SET update_time = datetime('now') WHERE userid = ? AND link "
3611  "= ?",
3612  std::vector<std::string>{std::to_string(ld.userId), ld.link});
3613  } else {
3615  "INSERT INTO mapd_links (userid, link, view_state, view_metadata, update_time) "
3616  "VALUES (?,?,?,?, "
3617  "datetime('now'))",
3618  std::vector<std::string>{
3619  std::to_string(ld.userId), ld.link, ld.viewState, ld.viewMetadata});
3620  }
3621  // now get the auto generated dashid
3623  "SELECT linkid, strftime('%Y-%m-%dT%H:%M:%SZ', update_time) FROM mapd_links "
3624  "WHERE link = ?",
3625  ld.link);
3626  ld.linkId = sqliteConnector_.getData<int>(0, 0);
3627  ld.updateTime = sqliteConnector_.getData<std::string>(0, 1);
3628  } catch (std::exception& e) {
3629  sqliteConnector_.query("ROLLBACK TRANSACTION");
3630  throw;
3631  }
3632  sqliteConnector_.query("END TRANSACTION");
3633  addLinkToMap(ld);
3634  return ld.link;
3635 }
3636 
3638  const TableDescriptor* td) const {
3639  cat_read_lock read_lock(this);
3640 
3641  const auto column_descriptors =
3642  getAllColumnMetadataForTable(td->tableId, false, true, true);
3643 
3644  const ColumnDescriptor* shard_cd{nullptr};
3645  int i = 1;
3646  for (auto cd_itr = column_descriptors.begin(); cd_itr != column_descriptors.end();
3647  ++cd_itr, ++i) {
3648  if (i == td->shardedColumnId) {
3649  shard_cd = *cd_itr;
3650  }
3651  }
3652  return shard_cd;
3653 }
3654 
3655 std::vector<const TableDescriptor*> Catalog::getPhysicalTablesDescriptors(
3656  const TableDescriptor* logicalTableDesc) const {
3657  cat_read_lock read_lock(this);
3658  const auto physicalTableIt =
3659  logicalToPhysicalTableMapById_.find(logicalTableDesc->tableId);
3660  if (physicalTableIt == logicalToPhysicalTableMapById_.end()) {
3661  return {logicalTableDesc};
3662  }
3663 
3664  const auto physicalTablesIds = physicalTableIt->second;
3665  CHECK(!physicalTablesIds.empty());
3666  std::vector<const TableDescriptor*> physicalTables;
3667  for (size_t i = 0; i < physicalTablesIds.size(); i++) {
3668  physicalTables.push_back(getMetadataForTable(physicalTablesIds[i]));
3669  }
3670 
3671  return physicalTables;
3672 }
3673 
3674 const std::map<int, const ColumnDescriptor*> Catalog::getDictionaryToColumnMapping() {
3675  cat_read_lock read_lock(this);
3676 
3677  std::map<int, const ColumnDescriptor*> mapping;
3678 
3679  const auto tables = getAllTableMetadata();
3680  for (const auto td : tables) {
3681  if (td->shard >= 0) {
3682  // skip shards, they're not standalone tables
3683  continue;
3684  }
3685 
3686  for (auto& cd : getAllColumnMetadataForTable(td->tableId, false, false, true)) {
3687  const auto& ti = cd->columnType;
3688  if (ti.is_string()) {
3689  if (ti.get_compression() == kENCODING_DICT) {
3690  // if foreign reference, get referenced tab.col
3691  const auto dict_id = ti.get_comp_param();
3692 
3693  // ignore temp (negative) dictionaries
3694  if (dict_id > 0 && mapping.end() == mapping.find(dict_id)) {
3695  mapping[dict_id] = cd;
3696  }
3697  }
3698  }
3699  }
3700  }
3701 
3702  return mapping;
3703 }
3704 
3705 std::vector<std::string> Catalog::getTableNamesForUser(
3706  const UserMetadata& user_metadata,
3707  const GetTablesType get_tables_type) const {
3708  sys_read_lock syscat_read_lock(&SysCatalog::instance());
3709  cat_read_lock read_lock(this);
3710 
3711  std::vector<std::string> table_names;
3712  const auto tables = getAllTableMetadata();
3713  for (const auto td : tables) {
3714  if (td->shard >= 0) {
3715  // skip shards, they're not standalone tables
3716  continue;
3717  }
3718  switch (get_tables_type) {
3719  case GET_PHYSICAL_TABLES: {
3720  if (td->isView) {
3721  continue;
3722  }
3723  break;
3724  }
3725  case GET_VIEWS: {
3726  if (!td->isView) {
3727  continue;
3728  }
3729  }
3730  default:
3731  break;
3732  }
3734  dbObject.loadKey(*this);
3735  std::vector<DBObject> privObjects = {dbObject};
3736  if (!SysCatalog::instance().hasAnyPrivileges(user_metadata, privObjects)) {
3737  // skip table, as there are no privileges to access it
3738  continue;
3739  }
3740  table_names.push_back(td->tableName);
3741  }
3742  return table_names;
3743 }
3744 
3745 int Catalog::getLogicalTableId(const int physicalTableId) const {
3746  cat_read_lock read_lock(this);
3747  for (const auto& l : logicalToPhysicalTableMapById_) {
3748  if (l.second.end() != std::find_if(l.second.begin(),
3749  l.second.end(),
3750  [&](decltype(*l.second.begin()) tid) -> bool {
3751  return physicalTableId == tid;
3752  })) {
3753  return l.first;
3754  }
3755  }
3756  return physicalTableId;
3757 }
3758 
3759 void Catalog::checkpoint(const int logicalTableId) const {
3760  const auto td = getMetadataForTable(logicalTableId);
3761  const auto shards = getPhysicalTablesDescriptors(td);
3762  for (const auto shard : shards) {
3763  getDataMgr().checkpoint(getCurrentDB().dbId, shard->tableId);
3764  }
3765 }
3766 
3767 void Catalog::checkpointWithAutoRollback(const int logical_table_id) {
3768  auto table_epochs = getTableEpochs(getDatabaseId(), logical_table_id);
3769  try {
3770  checkpoint(logical_table_id);
3771  } catch (...) {
3772  setTableEpochsLogExceptions(getDatabaseId(), table_epochs);
3773  throw;
3774  }
3775 }
3776 
3778  cat_write_lock write_lock(this);
3779  // Physically erase all tables and dictionaries from disc and memory
3780  const auto tables = getAllTableMetadata();
3781  for (const auto table : tables) {
3782  eraseTablePhysicalData(table);
3783  }
3784  // Physically erase database metadata
3785  boost::filesystem::remove(basePath_ + "/mapd_catalogs/" + currentDB_.dbName);
3786  calciteMgr_->updateMetadata(currentDB_.dbName, "");
3787 }
3788 
3790  const int tableId = td->tableId;
3791  // must destroy fragmenter before deleteChunks is called.
3792  if (td->fragmenter != nullptr) {
3793  auto tableDescIt = tableDescriptorMapById_.find(tableId);
3794  CHECK(tableDescIt != tableDescriptorMapById_.end());
3795  {
3796  INJECT_TIMER(deleting_fragmenter);
3797  tableDescIt->second->fragmenter = nullptr;
3798  }
3799  }
3800  ChunkKey chunkKeyPrefix = {currentDB_.dbId, tableId};
3801  {
3802  INJECT_TIMER(deleteChunksWithPrefix);
3803  // assuming deleteChunksWithPrefix is atomic
3804  dataMgr_->deleteChunksWithPrefix(chunkKeyPrefix, MemoryLevel::CPU_LEVEL);
3805  dataMgr_->deleteChunksWithPrefix(chunkKeyPrefix, MemoryLevel::GPU_LEVEL);
3806  }
3807  if (!td->isView) {
3808  INJECT_TIMER(Remove_Table);
3809  dataMgr_->removeTableRelatedDS(currentDB_.dbId, tableId);
3810  }
3811  calciteMgr_->updateMetadata(currentDB_.dbName, td->tableName);
3812  {
3813  INJECT_TIMER(removeTableFromMap_);
3814  removeTableFromMap(td->tableName, tableId);
3815  }
3816 }
3817 
3818 std::string Catalog::generatePhysicalTableName(const std::string& logicalTableName,
3819  const int32_t& shardNumber) {
3820  std::string physicalTableName =
3821  logicalTableName + physicalTableNameTag_ + std::to_string(shardNumber);
3822  return (physicalTableName);
3823 }
3824 
3825 void Catalog::set(const std::string& dbName, std::shared_ptr<Catalog> cat) {
3826  mapd_cat_map_[dbName] = cat;
3827 }
3828 
3829 std::shared_ptr<Catalog> Catalog::get(const std::string& dbName) {
3830  auto cat_it = mapd_cat_map_.find(dbName);
3831  if (cat_it != mapd_cat_map_.end()) {
3832  return cat_it->second;
3833  }
3834  return nullptr;
3835 }
3836 
3837 std::shared_ptr<Catalog> Catalog::get(const int32_t db_id) {
3838  for (const auto& entry : mapd_cat_map_) {
3839  if (entry.second->currentDB_.dbId == db_id) {
3840  return entry.second;
3841  }
3842  }
3843  return nullptr;
3844 }
3845 
3846 std::shared_ptr<Catalog> Catalog::checkedGet(const int32_t db_id) {
3847  auto catalog = get(db_id);
3848  CHECK(catalog);
3849  return catalog;
3850 }
3851 
3852 std::shared_ptr<Catalog> Catalog::get(const string& basePath,
3853  const DBMetadata& curDB,
3854  std::shared_ptr<Data_Namespace::DataMgr> dataMgr,
3855  const std::vector<LeafHostInfo>& string_dict_hosts,
3856  std::shared_ptr<Calcite> calcite,
3857  bool is_new_db) {
3858  auto cat = Catalog::get(curDB.dbName);
3859 
3860  if (cat) {
3861  return cat;
3862  } else {
3863  cat = std::make_shared<Catalog>(
3864  basePath, curDB, dataMgr, string_dict_hosts, calcite, is_new_db);
3865  Catalog::set(curDB.dbName, cat);
3866  return cat;
3867  }
3868 }
3869 
3870 void Catalog::remove(const std::string& dbName) {
3871  mapd_cat_map_.erase(dbName);
3872 }
3873 
3874 void Catalog::vacuumDeletedRows(const int logicalTableId) const {
3875  // shard here to serve request from TableOptimizer and elsewhere
3876  const auto td = getMetadataForTable(logicalTableId);
3877  const auto shards = getPhysicalTablesDescriptors(td);
3878  for (const auto shard : shards) {
3879  vacuumDeletedRows(shard);
3880  }
3881 }
3882 
3884  // "if not a table that supports delete return nullptr, nothing more to do"
3885  const ColumnDescriptor* cd = getDeletedColumn(td);
3886  if (nullptr == cd) {
3887  return;
3888  }
3889  // vacuum chunks which show sign of deleted rows in metadata
3890  ChunkKey chunkKeyPrefix = {currentDB_.dbId, td->tableId, cd->columnId};
3891  ChunkMetadataVector chunkMetadataVec;
3892  dataMgr_->getChunkMetadataVecForKeyPrefix(chunkMetadataVec, chunkKeyPrefix);
3893  for (auto cm : chunkMetadataVec) {
3894  // "delete has occured"
3895  if (cm.second->chunkStats.max.tinyintval == 1) {
3896  UpdelRoll updel_roll;
3897  updel_roll.catalog = this;
3898  updel_roll.logicalTableId = getLogicalTableId(td->tableId);
3900  const auto cd = getMetadataForColumn(td->tableId, cm.first[2]);
3901  const auto chunk = Chunk_NS::Chunk::getChunk(cd,
3902  &getDataMgr(),
3903  cm.first,
3904  updel_roll.memoryLevel,
3905  0,
3906  cm.second->numBytes,
3907  cm.second->numElements);
3908  td->fragmenter->compactRows(this,
3909  td,
3910  cm.first[3],
3911  td->fragmenter->getVacuumOffsets(chunk),
3912  updel_roll.memoryLevel,
3913  updel_roll);
3914  updel_roll.commitUpdate();
3915  }
3916  }
3917 }
3918 
3921  "SELECT id, name, data_wrapper_type, options, owner_user_id, creation_time FROM "
3922  "omnisci_foreign_servers");
3923  auto num_rows = sqliteConnector_.getNumRows();
3924  for (size_t row = 0; row < num_rows; row++) {
3925  auto foreign_server = std::make_shared<foreign_storage::ForeignServer>(
3926  sqliteConnector_.getData<int>(row, 0),
3927  sqliteConnector_.getData<std::string>(row, 1),
3928  sqliteConnector_.getData<std::string>(row, 2),
3929  sqliteConnector_.getData<std::string>(row, 3),
3930  sqliteConnector_.getData<std::int32_t>(row, 4),
3931  sqliteConnector_.getData<std::int32_t>(row, 5));
3932  foreignServerMap_[foreign_server->name] = foreign_server;
3933  foreignServerMapById_[foreign_server->id] = foreign_server;
3934  }
3935 }
3936 
3939  "SELECT table_id, server_id, options, last_refresh_time, next_refresh_time from "
3940  "omnisci_foreign_tables");
3941  auto num_rows = sqliteConnector_.getNumRows();
3942  for (size_t r = 0; r < num_rows; r++) {
3943  const auto table_id = sqliteConnector_.getData<int32_t>(r, 0);
3944  const auto server_id = sqliteConnector_.getData<int32_t>(r, 1);
3945  const auto& options = sqliteConnector_.getData<std::string>(r, 2);
3946  const auto last_refresh_time = sqliteConnector_.getData<int>(r, 3);
3947  const auto next_refresh_time = sqliteConnector_.getData<int>(r, 4);
3948 
3949  CHECK(tableDescriptorMapById_.find(table_id) != tableDescriptorMapById_.end());
3950  auto foreign_table =
3951  dynamic_cast<foreign_storage::ForeignTable*>(tableDescriptorMapById_[table_id]);
3952  CHECK(foreign_table);
3953  foreign_table->foreign_server = foreignServerMapById_[server_id].get();
3954  CHECK(foreign_table->foreign_server);
3955  foreign_table->populateOptionsMap(options);
3956  foreign_table->last_refresh_time = last_refresh_time;
3957  foreign_table->next_refresh_time = next_refresh_time;
3958  }
3959 }
3960 
3961 void Catalog::setForeignServerProperty(const std::string& server_name,
3962  const std::string& property,
3963  const std::string& value) {
3966  "SELECT id from omnisci_foreign_servers where name = ?",
3967  std::vector<std::string>{server_name});
3968  auto num_rows = sqliteConnector_.getNumRows();
3969  if (num_rows > 0) {
3970  CHECK_EQ(size_t(1), num_rows);
3971  auto server_id = sqliteConnector_.getData<int32_t>(0, 0);
3973  "UPDATE omnisci_foreign_servers SET " + property + " = ? WHERE id = ?",
3974  std::vector<std::string>{value, std::to_string(server_id)});
3975  } else {
3976  throw std::runtime_error{"Can not change property \"" + property +
3977  "\" for foreign server." + " Foreign server \"" +
3978  server_name + "\" is not found."};
3979  }
3980 }
3981 
3984  std::map<std::string, std::string, std::less<>> options;
3985  options[std::string{ForeignServer::STORAGE_TYPE_KEY}] =
3986  ForeignServer::LOCAL_FILE_STORAGE_TYPE;
3987  options[std::string{ForeignServer::BASE_PATH_KEY}] =
3988  boost::filesystem::path::preferred_separator;
3989 
3990  auto local_csv_server =
3991  std::make_unique<ForeignServer>("omnisci_local_csv",
3993  options,
3995  local_csv_server->validate();
3996  createForeignServerNoLocks(std::move(local_csv_server), true);
3997 
3998  auto local_parquet_server =
3999  std::make_unique<ForeignServer>("omnisci_local_parquet",
4001  options,
4003  local_parquet_server->validate();
4004  createForeignServerNoLocks(std::move(local_parquet_server), true);
4005 }
4006 
4007 // prepare a fresh file reload on next table access
4008 void Catalog::setForReload(const int32_t tableId) {
4009  cat_read_lock read_lock(this);
4010  const auto td = getMetadataForTable(tableId);
4011  for (const auto shard : getPhysicalTablesDescriptors(td)) {
4012  const auto tableEpoch = getTableEpoch(currentDB_.dbId, shard->tableId);
4013  setTableEpoch(currentDB_.dbId, shard->tableId, tableEpoch);
4014  }
4015 }
4016 
4017 // get a table's data dirs
4018 std::vector<std::string> Catalog::getTableDataDirectories(
4019  const TableDescriptor* td) const {
4020  const auto global_file_mgr = getDataMgr().getGlobalFileMgr();
4021  std::vector<std::string> file_paths;
4022  for (auto shard : getPhysicalTablesDescriptors(td)) {
4023  const auto file_mgr = dynamic_cast<File_Namespace::FileMgr*>(
4024  global_file_mgr->getFileMgr(currentDB_.dbId, shard->tableId));
4025  boost::filesystem::path file_path(file_mgr->getFileMgrBasePath());
4026  file_paths.push_back(file_path.filename().string());
4027  }
4028  return file_paths;
4029 }
4030 
4031 // get a column's dict dir basename
4033  if ((cd->columnType.is_string() || cd->columnType.is_string_array()) &&
4035  cd->columnType.get_comp_param() > 0) {
4036  const auto dictId = cd->columnType.get_comp_param();
4037  const DictRef dictRef(currentDB_.dbId, dictId);
4038  const auto dit = dictDescriptorMapByRef_.find(dictRef);
4039  CHECK(dit != dictDescriptorMapByRef_.end());
4040  CHECK(dit->second);
4041  boost::filesystem::path file_path(dit->second->dictFolderPath);
4042  return file_path.filename().string();
4043  }
4044  return std::string();
4045 }
4046 
4047 // get a table's dict dirs
4048 std::vector<std::string> Catalog::getTableDictDirectories(
4049  const TableDescriptor* td) const {
4050  std::vector<std::string> file_paths;
4051  for (auto cd : getAllColumnMetadataForTable(td->tableId, false, false, true)) {
4052  auto file_base = getColumnDictDirectory(cd);
4053  if (!file_base.empty() &&
4054  file_paths.end() == std::find(file_paths.begin(), file_paths.end(), file_base)) {
4055  file_paths.push_back(file_base);
4056  }
4057  }
4058  return file_paths;
4059 }
4060 
4061 // returns table schema in a string
4062 // NOTE(sy): Might be able to replace dumpSchema() later with
4063 // dumpCreateTable() after a deeper review of the TableArchiver code.
4064 std::string Catalog::dumpSchema(const TableDescriptor* td) const {
4065  cat_read_lock read_lock(this);
4066 
4067  std::ostringstream os;
4068  os << "CREATE TABLE @T (";
4069  // gather column defines
4070  const auto cds = getAllColumnMetadataForTable(td->tableId, false, false, false);
4071  std::string comma;
4072  std::vector<std::string> shared_dicts;
4073  std::map<const std::string, const ColumnDescriptor*> dict_root_cds;
4074  for (const auto cd : cds) {
4075  if (!(cd->isSystemCol || cd->isVirtualCol)) {
4076  const auto& ti = cd->columnType;
4077  os << comma << cd->columnName;
4078  // CHAR is perculiar... better dump it as TEXT(32) like \d does
4079  if (ti.get_type() == SQLTypes::kCHAR) {
4080  os << " "
4081  << "TEXT";
4082  } else if (ti.get_subtype() == SQLTypes::kCHAR) {
4083  os << " "
4084  << "TEXT[]";
4085  } else {
4086  os << " " << ti.get_type_name();
4087  }
4088  os << (ti.get_notnull() ? " NOT NULL" : "");
4089  if (ti.is_string() || (ti.is_array() && ti.get_subtype() == kTEXT)) {
4090  auto size = ti.is_array() ? ti.get_logical_size() : ti.get_size();
4091  if (ti.get_compression() == kENCODING_DICT) {
4092  // if foreign reference, get referenced tab.col
4093  const auto dict_id = ti.get_comp_param();
4094  const DictRef dict_ref(currentDB_.dbId, dict_id);
4095  const auto dict_it = dictDescriptorMapByRef_.find(dict_ref);
4096  CHECK(dict_it != dictDescriptorMapByRef_.end());
4097  const auto dict_name = dict_it->second->dictName;
4098  // when migrating a table, any foreign dict ref will be dropped
4099  // and the first cd of a dict will become root of the dict
4100  if (dict_root_cds.end() == dict_root_cds.find(dict_name)) {
4101  dict_root_cds[dict_name] = cd;
4102  os << " ENCODING " << ti.get_compression_name() << "(" << (size * 8) << ")";
4103  } else {
4104  const auto dict_root_cd = dict_root_cds[dict_name];
4105  shared_dicts.push_back("SHARED DICTIONARY (" + cd->columnName +
4106  ") REFERENCES @T(" + dict_root_cd->columnName + ")");
4107  // "... shouldn't specify an encoding, it borrows from the referenced column"
4108  }
4109  } else {
4110  os << " ENCODING NONE";
4111  }
4112  } else if (ti.is_date_in_days() ||
4113  (ti.get_size() > 0 && ti.get_size() != ti.get_logical_size())) {
4114  const auto comp_param = ti.get_comp_param() ? ti.get_comp_param() : 32;
4115  os << " ENCODING " << ti.get_compression_name() << "(" << comp_param << ")";
4116  } else if (ti.is_geometry()) {
4117  if (ti.get_compression() == kENCODING_GEOINT) {
4118  os << " ENCODING " << ti.get_compression_name() << "(" << ti.get_comp_param()
4119  << ")";
4120  } else {
4121  os << " ENCODING NONE";
4122  }
4123  }
4124  comma = ", ";
4125  }
4126  }
4127  // gather SHARED DICTIONARYs
4128  if (shared_dicts.size()) {
4129  os << ", " << boost::algorithm::join(shared_dicts, ", ");
4130  }
4131  // gather WITH options ...
4132  std::vector<std::string> with_options;
4133  with_options.push_back("FRAGMENT_SIZE=" + std::to_string(td->maxFragRows));
4134  with_options.push_back("MAX_CHUNK_SIZE=" + std::to_string(td->maxChunkSize));
4135  with_options.push_back("PAGE_SIZE=" + std::to_string(td->fragPageSize));
4136  with_options.push_back("MAX_ROWS=" + std::to_string(td->maxRows));
4137  with_options.emplace_back(td->hasDeletedCol ? "VACUUM='DELAYED'"
4138  : "VACUUM='IMMEDIATE'");
4139  if (!td->partitions.empty()) {
4140  with_options.push_back("PARTITIONS='" + td->partitions + "'");
4141  }
4142  if (td->nShards > 0) {
4143  const auto shard_cd = getMetadataForColumn(td->tableId, td->shardedColumnId);
4144  CHECK(shard_cd);
4145  os << ", SHARD KEY(" << shard_cd->columnName << ")";
4146  with_options.push_back(
4147  "SHARD_COUNT=" +
4148  std::to_string(td->nShards * std::max(g_leaf_count, static_cast<size_t>(1))));
4149  }
4150  if (td->sortedColumnId > 0) {
4151  const auto sort_cd = getMetadataForColumn(td->tableId, td->sortedColumnId);
4152  CHECK(sort_cd);
4153  with_options.push_back("SORT_COLUMN='" + sort_cd->columnName + "'");
4154  }
4155  os << ") WITH (" + boost::algorithm::join(with_options, ", ") + ");";
4156  return os.str();
4157 }
4158 
4159 namespace {
4160 
4161 void unserialize_key_metainfo(std::vector<std::string>& shared_dicts,
4162  std::set<std::string>& shared_dict_column_names,
4163  const std::string keyMetainfo) {
4164  rapidjson::Document document;
4165  document.Parse(keyMetainfo.c_str());
4166  CHECK(!document.HasParseError());
4167  CHECK(document.IsArray());
4168  for (auto it = document.Begin(); it != document.End(); ++it) {
4169  const auto& key_with_spec_json = *it;
4170  CHECK(key_with_spec_json.IsObject());
4171  const std::string type = key_with_spec_json["type"].GetString();
4172  const std::string name = key_with_spec_json["name"].GetString();
4173  auto key_with_spec = type + " (" + name + ")";
4174  if (type == "SHARED DICTIONARY") {
4175  shared_dict_column_names.insert(name);
4176  key_with_spec += " REFERENCES ";
4177  const std::string foreign_table = key_with_spec_json["foreign_table"].GetString();
4178  const std::string foreign_column = key_with_spec_json["foreign_column"].GetString();
4179  key_with_spec += foreign_table + "(" + foreign_column + ")";
4180  } else {
4181  CHECK(type == "SHARD KEY");
4182  }
4183  shared_dicts.push_back(key_with_spec);
4184  }
4185 }
4186 
4187 } // namespace
4188 
4189 #include "Parser/ReservedKeywords.h"
4190 
4192 inline bool contains_spaces(std::string_view str) {
4193  return std::find_if(str.begin(), str.end(), [](const unsigned char& ch) {
4194  return std::isspace(ch);
4195  }) != str.end();
4196 }
4197 
4200  std::string_view str,
4201  std::string_view chars = "`~!@#$%^&*()-=+[{]}\\|;:'\",<.>/?") {
4202  return str.find_first_of(chars) != std::string_view::npos;
4203 }
4204 
4206 inline bool is_reserved_sql_keyword(std::string_view str) {
4207  return reserved_keywords.find(to_upper(std::string(str))) != reserved_keywords.end();
4208 }
4209 
4210 // returns a "CREATE TABLE" statement in a string for "SHOW CREATE TABLE"
4212  bool multiline_formatting,
4213  bool dump_defaults) const {
4214  cat_read_lock read_lock(this);
4215 
4216  auto foreign_table = dynamic_cast<const foreign_storage::ForeignTable*>(td);
4217  std::ostringstream os;
4218 
4219  if (foreign_table) {
4220  os << "CREATE FOREIGN TABLE " << td->tableName << " (";
4221  } else if (!td->isView) {
4222  os << "CREATE ";
4224  os << "TEMPORARY ";
4225  }
4226  os << "TABLE " + td->tableName + " (";
4227  } else {
4228  os << "CREATE VIEW " + td->tableName + " AS " << td->viewSQL;
4229  return os.str();
4230  }
4231  // scan column defines
4232  std::vector<std::string> shared_dicts;
4233  std::set<std::string> shared_dict_column_names;
4234  unserialize_key_metainfo(shared_dicts, shared_dict_column_names, td->keyMetainfo);
4235  // gather column defines
4236  const auto cds = getAllColumnMetadataForTable(td->tableId, false, false, false);
4237  std::map<const std::string, const ColumnDescriptor*> dict_root_cds;
4238  bool first = true;
4239  for (const auto cd : cds) {
4240  if (!(cd->isSystemCol || cd->isVirtualCol)) {
4241  const auto& ti = cd->columnType;
4242  if (!first) {
4243  os << ",";
4244  if (!multiline_formatting) {
4245  os << " ";
4246  }
4247  } else {
4248  first = false;
4249  }
4250  if (multiline_formatting) {
4251  os << "\n ";
4252  }
4253  // column name
4254  bool column_name_needs_quotes = is_reserved_sql_keyword(cd->columnName) ||
4255  contains_spaces(cd->columnName) ||
4257  if (!column_name_needs_quotes) {
4258  os << cd->columnName;
4259  } else {
4260  os << "\"" << cd->columnName << "\"";
4261  }
4262  // CHAR is perculiar... better dump it as TEXT(32) like \d does
4263  if (ti.get_type() == SQLTypes::kCHAR) {
4264  os << " "
4265  << "TEXT";
4266  } else if (ti.get_subtype() == SQLTypes::kCHAR) {
4267  os << " "
4268  << "TEXT[]";
4269  } else {
4270  os << " " << ti.get_type_name();
4271  }
4272  os << (ti.get_notnull() ? " NOT NULL" : "");
4273  if (shared_dict_column_names.find(cd->columnName) ==
4274  shared_dict_column_names.end()) {
4275  // avoids "Exception: Column ... shouldn't specify an encoding, it borrows it from
4276  // the referenced column"
4277  if (ti.is_string() || (ti.is_array() && ti.get_subtype() == kTEXT)) {
4278  auto size = ti.is_array() ? ti.get_logical_size() : ti.get_size();
4279  if (ti.get_compression() == kENCODING_DICT) {
4280  os << " ENCODING " << ti.get_compression_name() << "(" << (size * 8) << ")";
4281  } else {
4282  os << " ENCODING NONE";
4283  }
4284  } else if (ti.is_date_in_days() ||
4285  (ti.get_size() > 0 && ti.get_size() != ti.get_logical_size())) {
4286  const auto comp_param = ti.get_comp_param() ? ti.get_comp_param() : 32;
4287  os << " ENCODING " << ti.get_compression_name() << "(" << comp_param << ")";
4288  } else if (ti.is_geometry()) {
4289  if (ti.get_compression() == kENCODING_GEOINT) {
4290  os << " ENCODING " << ti.get_compression_name() << "(" << ti.get_comp_param()
4291  << ")";
4292  } else {
4293  os << " ENCODING NONE";
4294  }
4295  }
4296  }
4297  }
4298  }
4299  // gather SHARED DICTIONARYs
4300  if (shared_dicts.size()) {
4301  std::string comma;
4302  if (!multiline_formatting) {
4303  comma = ", ";
4304  } else {
4305  comma = ",\n ";
4306  }
4307  os << comma;
4308  os << boost::algorithm::join(shared_dicts, comma);
4309  }
4310  os << ")";
4311 
4312  std::vector<std::string> with_options;
4313  if (foreign_table) {
4314  if (multiline_formatting) {
4315  os << "\n";
4316  } else {
4317  os << " ";
4318  }
4319  os << "SERVER " << foreign_table->foreign_server->name;
4320 
4321  // gather WITH options ...
4322  for (const auto& [option, value] : foreign_table->options) {
4323  with_options.emplace_back(option + "='" + value + "'");
4324  }
4325  }
4326 
4327  if (dump_defaults || td->maxFragRows != DEFAULT_FRAGMENT_ROWS) {
4328  with_options.push_back("FRAGMENT_SIZE=" + std::to_string(td->maxFragRows));
4329  }
4330  if (!foreign_table && (dump_defaults || td->maxChunkSize != DEFAULT_MAX_CHUNK_SIZE)) {
4331  with_options.push_back("MAX_CHUNK_SIZE=" + std::to_string(td->maxChunkSize));
4332  }
4333  if (!foreign_table && (dump_defaults || td->fragPageSize != DEFAULT_PAGE_SIZE)) {
4334  with_options.push_back("PAGE_SIZE=" + std::to_string(td->fragPageSize));
4335  }
4336  if (!foreign_table && (dump_defaults || td->maxRows != DEFAULT_MAX_ROWS)) {
4337  with_options.push_back("MAX_ROWS=" + std::to_string(td->maxRows));
4338  }
4339  if (!foreign_table && (dump_defaults || !td->hasDeletedCol)) {
4340  with_options.push_back(td->hasDeletedCol ? "VACUUM='DELAYED'" : "VACUUM='IMMEDIATE'");
4341  }
4342  if (!foreign_table && !td->partitions.empty()) {
4343  with_options.push_back("PARTITIONS='" + td->partitions + "'");
4344  }
4345  if (!foreign_table && td->nShards > 0) {
4346  const auto shard_cd = getMetadataForColumn(td->tableId, td->shardedColumnId);
4347  CHECK(shard_cd);
4348  with_options.push_back(
4349  "SHARD_COUNT=" +
4350  std::to_string(td->nShards * std::max(g_leaf_count, static_cast<size_t>(1))));
4351  }
4352  if (!foreign_table && td->sortedColumnId > 0) {
4353  const auto sort_cd = getMetadataForColumn(td->tableId, td->sortedColumnId);
4354  CHECK(sort_cd);
4355  with_options.push_back("SORT_COLUMN='" + sort_cd->columnName + "'");
4356  }
4357 
4358  if (!with_options.empty()) {
4359  if (!multiline_formatting) {
4360  os << " ";
4361  } else {
4362  os << "\n";
4363  }
4364  os << "WITH (" + boost::algorithm::join(with_options, ", ") + ")";
4365  }
4366  os << ";";
4367  return os.str();
4368 }
4369 
4371  const bool if_not_exists) {
4372  if (getMetadataForTable(name, false)) {
4373  if (if_not_exists) {
4374  return false;
4375  }
4376  throw std::runtime_error("Table or View with name \"" + name + "\" already exists.");
4377  }
4378  return true;
4379 }
4380 
4381 std::vector<const TableDescriptor*> Catalog::getAllForeignTablesForRefresh() const {
4382  cat_read_lock read_lock(this);
4383  std::vector<const TableDescriptor*> tables;
4384  for (auto entry : tableDescriptorMapById_) {
4385  auto table_descriptor = entry.second;
4386  if (table_descriptor->storageType == StorageType::FOREIGN_TABLE) {
4387  auto foreign_table = dynamic_cast<foreign_storage::ForeignTable*>(table_descriptor);
4388  CHECK(foreign_table);
4389  auto timing_type_entry = foreign_table->options.find(
4391  CHECK(timing_type_entry != foreign_table->options.end());
4392  int64_t current_time = std::chrono::duration_cast<std::chrono::seconds>(
4393  std::chrono::system_clock::now().time_since_epoch())
4394  .count();
4395  if (timing_type_entry->second ==
4397  foreign_table->next_refresh_time <= current_time) {
4398  tables.emplace_back(foreign_table);
4399  }
4400  }
4401  }
4402  return tables;
4403 }
4404 
4405 void Catalog::updateForeignTableRefreshTimes(const int32_t table_id) {
4406  cat_write_lock write_lock(this);
4408  CHECK(tableDescriptorMapById_.find(table_id) != tableDescriptorMapById_.end());
4409  auto table_descriptor = tableDescriptorMapById_.find(table_id)->second;
4410  CHECK(table_descriptor);
4411  auto foreign_table = dynamic_cast<foreign_storage::ForeignTable*>(table_descriptor);
4412  CHECK(foreign_table);
4413  int64_t current_time = std::chrono::duration_cast<std::chrono::seconds>(
4414  std::chrono::system_clock::now().time_since_epoch())
4415  .count();
4416  auto last_refresh_time = current_time;
4417  auto next_refresh_time = get_next_refresh_time(*foreign_table);
4419  "UPDATE omnisci_foreign_tables SET last_refresh_time = ?, next_refresh_time = ? "
4420  "WHERE table_id = ?",
4421  std::vector<std::string>{std::to_string(last_refresh_time),
4422  std::to_string(next_refresh_time),
4423  std::to_string(foreign_table->tableId)});
4424  foreign_table->last_refresh_time = last_refresh_time;
4425  foreign_table->next_refresh_time = next_refresh_time;
4426 }
4427 
4428 // TODO(Misiu): This function should be merged with setForeignServerOptions via
4429 // inheritance rather than replication similar functions.
4430 void Catalog::setForeignTableOptions(const std::string& table_name,
4431  foreign_storage::OptionsMap& options_map,
4432  bool clear_existing_options) {
4433  cat_write_lock write_lock(this);
4434  // update in-memory table
4435  auto foreign_table = getForeignTableUnlocked(table_name);
4436  auto saved_options = foreign_table->options;
4437  foreign_table->populateOptionsMap(std::move(options_map), clear_existing_options);
4438  try {
4439  foreign_table->validateOptions();
4440  } catch (const std::exception& e) {
4441  // validation did not succeed:
4442  // revert to saved options & throw exception
4443  foreign_table->options = saved_options;
4444  throw;
4445  }
4447  foreign_table, "options", foreign_table->getOptionsAsJsonString());
4448 }
4449 
4451  const std::string& property,
4452  const std::string& value) {
4455  "SELECT table_id from omnisci_foreign_tables where table_id = ?",
4456  std::vector<std::string>{std::to_string(table->tableId)});
4457  auto num_rows = sqliteConnector_.getNumRows();
4458  if (num_rows > 0) {
4459  CHECK_EQ(size_t(1), num_rows);
4461  "UPDATE omnisci_foreign_tables SET " + property + " = ? WHERE table_id = ?",
4462  std::vector<std::string>{value, std::to_string(table->tableId)});
4463  } else {
4464  throw std::runtime_error{"Can not change property \"" + property +
4465  "\" for foreign table." + " Foreign table \"" +
4466  table->tableName + "\" is not found."};
4467  }
4468 }
4469 } // namespace Catalog_Namespace
std::string dumpCreateTable(const TableDescriptor *td, bool multiline_formatting=true, bool dump_defaults=false) const
Definition: Catalog.cpp:4211
std::list< const ColumnDescriptor * > getAllColumnMetadataForTableUnlocked(const int tableId, const bool fetchSystemColumns, const bool fetchVirtualColumns, const bool fetchPhysicalColumns) const
Definition: Catalog.cpp:1739
void serializeTableJsonUnlocked(const TableDescriptor *td, const std::list< ColumnDescriptor > &cds) const
Definition: Catalog.cpp:2372
int64_t get_next_refresh_time(const foreign_storage::ForeignTable &foreign_table)
Definition: Catalog.cpp:2144
void instantiateFragmenter(TableDescriptor *td) const
Definition: Catalog.cpp:1370
const Parser::SharedDictionaryDef compress_reference_path(Parser::SharedDictionaryDef cur_node, const std::vector< Parser::SharedDictionaryDef > &shared_dict_defs)
static std::set< std::string > reserved_keywords
Data_Namespace::MemoryLevel memoryLevel
Definition: UpdelRoll.h:65
std::string virtualExpr
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:331
void set_compression(EncodingType c)
Definition: sqltypes.h:430
void set_size(int s)
Definition: sqltypes.h:428
void deleteMetadataForDashboards(const std::vector< int32_t > ids, const UserMetadata &user)
Definition: Catalog.cpp:1566
#define CHECK_EQ(x, y)
Definition: Logger.h:205
void executeDropTableSqliteQueries(const TableDescriptor *td)
Definition: Catalog.cpp:3311
const int MAPD_TEMP_TABLE_START_ID
Definition: Catalog.cpp:101
std::string partitions
std::vector< int > ChunkKey
Definition: types.h:37
void eraseTablePhysicalData(const TableDescriptor *td)
Definition: Catalog.cpp:3789
std::string dictFolderPath
std::tuple< int, std::string > ColumnKey
Definition: Types.h:36
HOST DEVICE int get_size() const
Definition: sqltypes.h:340
void doDropTable(const TableDescriptor *td)
Definition: Catalog.cpp:3303
~Catalog()
Destructor - deletes all ColumnDescriptor and TableDescriptor structures which were allocated on the ...
Definition: Catalog.cpp:191
void getDictionary(const ColumnDescriptor &cd, std::map< int, StringDictionary * > &stringDicts)
Definition: Catalog.cpp:1839
std::string cat(Ts &&...args)
T getData(const int row, const int col)
void commitUpdate()
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:97
SQLTypes
Definition: sqltypes.h:40
#define SPIMAP_MAGIC1
Definition: Catalog.h:72
std::string tableName
void addReferenceToForeignDict(ColumnDescriptor &referencing_column, Parser::SharedDictionaryDef shared_dict_def, const bool persist_reference)
Definition: Catalog.cpp:2981
const ColumnDescriptor * getDeletedColumn(const TableDescriptor *td) const
Definition: Catalog.cpp:2889
void setDeletedColumn(const TableDescriptor *td, const ColumnDescriptor *cd)
Definition: Catalog.cpp:2955
ColumnDescriptorMap columnDescriptorMap_
Definition: Catalog.h:528
const foreign_storage::ForeignTable * getForeignTableUnlocked(int tableId) const
Definition: Catalog.cpp:1685
static const AccessPrivileges ALL_DATABASE
Definition: DBObject.h:152
static TimeT::rep execution(F func, Args &&...args)
Definition: sample.cpp:29
void createTable(TableDescriptor &td, const std::list< ColumnDescriptor > &columns, const std::vector< Parser::SharedDictionaryDef > &shared_dict_defs, bool isLogicalTable)
Definition: Catalog.cpp:2156
void updateFrontendViewAndLinkUsers()
Definition: Catalog.cpp:408
virtual void query_with_text_params(std::string const &query_only)
void setTableEpochs(const int32_t db_id, const std::vector< TableEpochInfo > &table_epochs)
Definition: Catalog.cpp:2846
DBObjectType
Definition: DBObject.h:42
void createForeignServer(std::unique_ptr< foreign_storage::ForeignServer > foreign_server, bool if_not_exists)
Definition: Catalog.cpp:2466
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:211
void addColumn(const TableDescriptor &td, ColumnDescriptor &cd)
Definition: Catalog.cpp:1867
void checkpoint(const int db_id, const int tb_id)
Definition: DataMgr.cpp:489
void roll(const bool forward)
Definition: Catalog.cpp:1955
bool validateNonExistentTableOrView(const std::string &name, const bool if_not_exists)
Definition: Catalog.cpp:4370
std::vector< const TableDescriptor * > getAllForeignTablesForRefresh() const
Definition: Catalog.cpp:4381
const foreign_storage::ForeignServer * getForeignServer(const std::string &server_name) const
Definition: Catalog.cpp:2509
std::list< const TableDescriptor * > getAllTableMetadata() const
Definition: Catalog.cpp:1755
#define LOG(tag)
Definition: Logger.h:188
SqliteConnector sqliteConnector_
Definition: Catalog.h:537
static std::shared_ptr< Catalog > get(const std::string &dbName)
Definition: Catalog.cpp:3829
const std::vector< LeafHostInfo > string_dict_hosts_
Definition: Catalog.h:541
std::vector< const TableDescriptor * > getPhysicalTablesDescriptors(const TableDescriptor *logicalTableDesc) const
Definition: Catalog.cpp:3655
DictDescriptorMapById dictDescriptorMapByRef_
Definition: Catalog.h:530
HOST DEVICE int get_scale() const
Definition: sqltypes.h:335
int64_t dateTimeParse< kTIMESTAMP >(std::string_view str, unsigned const dim)
void checkpointWithAutoRollback(const int logical_table_id)
Definition: Catalog.cpp:3767
std::string storageType
void revokeDBObjectPrivilegesFromAll(DBObject object, Catalog *catalog)
void revokeDBObjectPrivileges(const std::string &grantee, const DBObject &object, const Catalog_Namespace::Catalog &catalog)
std::string join(T const &container, std::string const &delim)
Catalog(const std::string &basePath, const DBMetadata &curDB, std::shared_ptr< Data_Namespace::DataMgr > dataMgr, const std::vector< LeafHostInfo > &string_dict_hosts, std::shared_ptr< Calcite > calcite, bool is_new_db)
Constructor - takes basePath to already extant data directory for writing.
Definition: Catalog.cpp:161
#define DEFAULT_MAX_CHUNK_SIZE
std::shared_ptr< Data_Namespace::DataMgr > dataMgr_
Definition: Catalog.h:539
const LinkDescriptor * getMetadataForLink(const std::string &link) const
HOST DEVICE void set_subtype(SQLTypes st)
Definition: sqltypes.h:421
const std::string & get_foreign_table() const
Definition: ParserNode.h:903
virtual void query(const std::string &queryString)
#define CHECK_GE(x, y)
Definition: Logger.h:210
void setObjectKey(const DBObjectKey &objectKey)
Definition: DBObject.h:215
void dropTableFromJsonUnlocked(const std::string &table_name) const
Definition: Catalog.cpp:2434
auto table_json_filepath(const std::string &base_path, const std::string &db_name)
Definition: Catalog.cpp:153
Grantee * getGrantee(const std::string &name) const
std::string fragments
void setForReload(const int32_t tableId)
Definition: Catalog.cpp:4008
int32_t objectId
Definition: DBObject.h:57
int32_t getTableEpoch(const int32_t db_id, const int32_t table_id) const
Definition: Catalog.cpp:2750
void replaceDashboard(DashboardDescriptor &vd)
Definition: Catalog.cpp:3512
void unserialize_key_metainfo(std::vector< std::string > &shared_dicts, std::set< std::string > &shared_dict_column_names, const std::string keyMetainfo)
Definition: Catalog.cpp:4161
const int MAPD_TEMP_DICT_START_ID
Definition: Catalog.cpp:103
Definition: Grantee.h:76
std::vector< std::string > getTableDataDirectories(const TableDescriptor *td) const
Definition: Catalog.cpp:4018
std::string generatePhysicalTableName(const std::string &logicalTableName, const int32_t &shardNumber)
Definition: Catalog.cpp:3818
The InsertOrderFragmenter is a child class of AbstractFragmenter, and fragments data in insert order...
void dropColumn(const TableDescriptor &td, const ColumnDescriptor &cd)
Definition: Catalog.cpp:1924
void checkDateInDaysColumnMigration()
Definition: Catalog.cpp:816
void setPrivileges(const AccessPrivileges &privs)
Definition: DBObject.h:217
void addFrontendViewToMap(DashboardDescriptor &vd)
Definition: Catalog.cpp:1280
bool contains_spaces(std::string_view str)
returns true if the string contains one or more spaces
Definition: Catalog.cpp:4192
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:330
static const AccessPrivileges SELECT_FROM_TABLE
Definition: DBObject.h:161
const ColumnDescriptor * getShardColumnMetadataForTable(const TableDescriptor *td) const
Definition: Catalog.cpp:3637
std::vector< int > columnIdBySpi_
#define CHECK_GT(x, y)
Definition: Logger.h:209
void changeForeignServerOwner(const std::string &server_name, const int new_owner_id)
Definition: Catalog.cpp:2560
const foreign_storage::ForeignTable * getForeignTable(int table_id) const
Definition: Catalog.cpp:1694
void renameColumn(const TableDescriptor *td, const ColumnDescriptor *cd, const std::strin