OmniSciDB  94e8789169
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Catalog.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
25 #include "Catalog/Catalog.h"
26 
27 #include <algorithm>
28 #include <boost/algorithm/string/predicate.hpp>
29 #include <boost/filesystem.hpp>
30 #include <boost/range/adaptor/map.hpp>
31 #include <boost/version.hpp>
32 #include <cassert>
33 #include <cerrno>
34 #include <cstdio>
35 #include <cstring>
36 #include <exception>
37 #include <fstream>
38 #include <list>
39 #include <memory>
40 #include <random>
41 #include <regex>
42 #include <sstream>
43 
44 #if BOOST_VERSION >= 106600
45 #include <boost/uuid/detail/sha1.hpp>
46 #else
47 #include <boost/uuid/sha1.hpp>
48 #endif
49 #include <rapidjson/document.h>
50 #include <rapidjson/istreamwrapper.h>
51 #include <rapidjson/ostreamwrapper.h>
52 #include <rapidjson/writer.h>
53 
54 #include "Catalog/SysCatalog.h"
55 
56 #include "QueryEngine/Execute.h"
58 
62 #include "Fragmenter/Fragmenter.h"
64 #include "LockMgr/LockMgr.h"
66 #include "Parser/ParserNode.h"
67 #include "QueryEngine/Execute.h"
69 #include "RefreshTimeCalculator.h"
70 #include "Shared/DateTimeParser.h"
71 #include "Shared/File.h"
72 #include "Shared/StringTransform.h"
73 #include "Shared/measure.h"
75 
76 #include "MapDRelease.h"
77 #include "RWLocks.h"
79 
80 using Chunk_NS::Chunk;
83 using std::list;
84 using std::map;
85 using std::pair;
86 using std::runtime_error;
87 using std::string;
88 using std::vector;
89 
91 bool g_enable_fsi{false};
92 extern bool g_enable_s3_fsi;
93 extern bool g_cache_string_hash;
94 
95 // Serialize temp tables to a json file in the Catalogs directory for Calcite parsing
96 // under unit testing.
98 
99 namespace Catalog_Namespace {
100 
101 const int DEFAULT_INITIAL_VERSION = 1; // start at version 1
103  1073741824; // 2^30, give room for over a billion non-temp tables
105  1073741824; // 2^30, give room for over a billion non-temp dictionaries
106 
107 const std::string Catalog::physicalTableNameTag_("_shard_#");
108 
109 thread_local bool Catalog::thread_holds_read_lock = false;
110 
115 
116 // migration will be done as two step process this release
117 // will create and use new table
118 // next release will remove old table, doing this to have fall back path
119 // incase of migration failure
122  sqliteConnector_.query("BEGIN TRANSACTION");
123  try {
125  "SELECT name FROM sqlite_master WHERE type='table' AND name='mapd_dashboards'");
126  if (sqliteConnector_.getNumRows() != 0) {
127  // already done
128  sqliteConnector_.query("END TRANSACTION");
129  return;
130  }
132  "CREATE TABLE mapd_dashboards (id integer primary key autoincrement, name text , "
133  "userid integer references mapd_users, state text, image_hash text, update_time "
134  "timestamp, "
135  "metadata text, UNIQUE(userid, name) )");
136  // now copy content from old table to new table
138  "insert into mapd_dashboards (id, name , "
139  "userid, state, image_hash, update_time , "
140  "metadata) "
141  "SELECT viewid , name , userid, view_state, image_hash, update_time, "
142  "view_metadata "
143  "from mapd_frontend_views");
144  } catch (const std::exception& e) {
145  sqliteConnector_.query("ROLLBACK TRANSACTION");
146  throw;
147  }
148  sqliteConnector_.query("END TRANSACTION");
149 }
150 
151 namespace {
152 
153 inline auto table_json_filepath(const std::string& base_path,
154  const std::string& db_name) {
155  return boost::filesystem::path(base_path + "/mapd_catalogs/" + db_name +
156  "_temp_tables.json");
157 }
158 
159 } // namespace
160 
161 Catalog::Catalog(const string& basePath,
162  const DBMetadata& curDB,
163  std::shared_ptr<Data_Namespace::DataMgr> dataMgr,
164  const std::vector<LeafHostInfo>& string_dict_hosts,
165  std::shared_ptr<Calcite> calcite,
166  bool is_new_db)
167  : basePath_(basePath)
168  , sqliteConnector_(curDB.dbName, basePath + "/mapd_catalogs/")
169  , currentDB_(curDB)
170  , dataMgr_(dataMgr)
171  , string_dict_hosts_(string_dict_hosts)
172  , calciteMgr_(calcite)
173  , nextTempTableId_(MAPD_TEMP_TABLE_START_ID)
174  , nextTempDictId_(MAPD_TEMP_DICT_START_ID)
175  , sqliteMutex_()
176  , sharedMutex_()
177  , thread_holding_sqlite_lock()
178  , thread_holding_write_lock() {
179  if (!is_new_db) {
180  CheckAndExecuteMigrations();
181  }
182  buildMaps();
183  if (!is_new_db) {
184  CheckAndExecuteMigrationsPostBuildMaps();
185  }
187  boost::filesystem::remove(table_json_filepath(basePath_, currentDB_.dbName));
188  }
189  // once all initialized use real object
190  initialized_ = true;
191 }
192 
194 
197  // must clean up heap-allocated TableDescriptor and ColumnDescriptor structs
198  for (TableDescriptorMap::iterator tableDescIt = tableDescriptorMap_.begin();
199  tableDescIt != tableDescriptorMap_.end();
200  ++tableDescIt) {
201  tableDescIt->second->fragmenter = nullptr;
202  delete tableDescIt->second;
203  }
204 
205  // TableDescriptorMapById points to the same descriptors. No need to delete
206 
207  for (ColumnDescriptorMap::iterator columnDescIt = columnDescriptorMap_.begin();
208  columnDescIt != columnDescriptorMap_.end();
209  ++columnDescIt) {
210  delete columnDescIt->second;
211  }
212 
213  // ColumnDescriptorMapById points to the same descriptors. No need to delete
214 
216  boost::filesystem::remove(table_json_filepath(basePath_, currentDB_.dbName));
217  }
218 }
219 
221  if (initialized_) {
222  return this;
223  } else {
224  return SysCatalog::instance().getDummyCatalog().get();
225  }
226 }
227 
230  sqliteConnector_.query("BEGIN TRANSACTION");
231  try {
232  sqliteConnector_.query("PRAGMA TABLE_INFO(mapd_tables)");
233  std::vector<std::string> cols;
234  for (size_t i = 0; i < sqliteConnector_.getNumRows(); i++) {
235  cols.push_back(sqliteConnector_.getData<std::string>(i, 1));
236  }
237  if (std::find(cols.begin(), cols.end(), std::string("max_chunk_size")) ==
238  cols.end()) {
239  string queryString("ALTER TABLE mapd_tables ADD max_chunk_size BIGINT DEFAULT " +
241  sqliteConnector_.query(queryString);
242  }
243  if (std::find(cols.begin(), cols.end(), std::string("shard_column_id")) ==
244  cols.end()) {
245  string queryString("ALTER TABLE mapd_tables ADD shard_column_id BIGINT DEFAULT " +
246  std::to_string(0));
247  sqliteConnector_.query(queryString);
248  }
249  if (std::find(cols.begin(), cols.end(), std::string("shard")) == cols.end()) {
250  string queryString("ALTER TABLE mapd_tables ADD shard BIGINT DEFAULT " +
251  std::to_string(-1));
252  sqliteConnector_.query(queryString);
253  }
254  if (std::find(cols.begin(), cols.end(), std::string("num_shards")) == cols.end()) {
255  string queryString("ALTER TABLE mapd_tables ADD num_shards BIGINT DEFAULT " +
256  std::to_string(0));
257  sqliteConnector_.query(queryString);
258  }
259  if (std::find(cols.begin(), cols.end(), std::string("key_metainfo")) == cols.end()) {
260  string queryString("ALTER TABLE mapd_tables ADD key_metainfo TEXT DEFAULT '[]'");
261  sqliteConnector_.query(queryString);
262  }
263  if (std::find(cols.begin(), cols.end(), std::string("userid")) == cols.end()) {
264  string queryString("ALTER TABLE mapd_tables ADD userid integer DEFAULT " +
266  sqliteConnector_.query(queryString);
267  }
268  if (std::find(cols.begin(), cols.end(), std::string("sort_column_id")) ==
269  cols.end()) {
271  "ALTER TABLE mapd_tables ADD sort_column_id INTEGER DEFAULT 0");
272  }
273  if (std::find(cols.begin(), cols.end(), std::string("storage_type")) == cols.end()) {
274  string queryString("ALTER TABLE mapd_tables ADD storage_type TEXT DEFAULT ''");
275  sqliteConnector_.query(queryString);
276  }
277  if (std::find(cols.begin(), cols.end(), std::string("max_rollback_epochs")) ==
278  cols.end()) {
279  string queryString("ALTER TABLE mapd_tables ADD max_rollback_epochs INT DEFAULT " +
280  std::to_string(-1));
281  sqliteConnector_.query(queryString);
282  }
283  } catch (std::exception& e) {
284  sqliteConnector_.query("ROLLBACK TRANSACTION");
285  throw;
286  }
287  sqliteConnector_.query("END TRANSACTION");
288 }
289 
292  sqliteConnector_.query("BEGIN TRANSACTION");
293  try {
295  "select name from sqlite_master WHERE type='table' AND "
296  "name='mapd_version_history'");
297  if (sqliteConnector_.getNumRows() == 0) {
299  "CREATE TABLE mapd_version_history(version integer, migration_history text "
300  "unique)");
301  } else {
303  "select * from mapd_version_history where migration_history = "
304  "'notnull_fixlen_arrays'");
305  if (sqliteConnector_.getNumRows() != 0) {
306  // legacy fixlen arrays had migrated
307  // no need for further execution
308  sqliteConnector_.query("END TRANSACTION");
309  return;
310  }
311  }
312  // Insert check for migration
314  "INSERT INTO mapd_version_history(version, migration_history) values(?,?)",
315  std::vector<std::string>{std::to_string(MAPD_VERSION), "notnull_fixlen_arrays"});
316  LOG(INFO) << "Updating mapd_columns, legacy fixlen arrays";
317  // Upating all fixlen array columns
318  string queryString("UPDATE mapd_columns SET is_notnull=1 WHERE coltype=" +
319  std::to_string(kARRAY) + " AND size>0;");
320  sqliteConnector_.query(queryString);
321  } catch (std::exception& e) {
322  sqliteConnector_.query("ROLLBACK TRANSACTION");
323  throw;
324  }
325  sqliteConnector_.query("END TRANSACTION");
326 }
327 
330  sqliteConnector_.query("BEGIN TRANSACTION");
331  try {
333  "select name from sqlite_master WHERE type='table' AND "
334  "name='mapd_version_history'");
335  if (sqliteConnector_.getNumRows() == 0) {
337  "CREATE TABLE mapd_version_history(version integer, migration_history text "
338  "unique)");
339  } else {
341  "select * from mapd_version_history where migration_history = "
342  "'notnull_geo_columns'");
343  if (sqliteConnector_.getNumRows() != 0) {
344  // legacy geo columns had migrated
345  // no need for further execution
346  sqliteConnector_.query("END TRANSACTION");
347  return;
348  }
349  }
350  // Insert check for migration
352  "INSERT INTO mapd_version_history(version, migration_history) values(?,?)",
353  std::vector<std::string>{std::to_string(MAPD_VERSION), "notnull_geo_columns"});
354  LOG(INFO) << "Updating mapd_columns, legacy geo columns";
355  // Upating all geo columns
356  string queryString(
357  "UPDATE mapd_columns SET is_notnull=1 WHERE coltype=" + std::to_string(kPOINT) +
358  " OR coltype=" + std::to_string(kLINESTRING) + " OR coltype=" +
359  std::to_string(kPOLYGON) + " OR coltype=" + std::to_string(kMULTIPOLYGON) + ";");
360  sqliteConnector_.query(queryString);
361  } catch (std::exception& e) {
362  sqliteConnector_.query("ROLLBACK TRANSACTION");
363  throw;
364  }
365  sqliteConnector_.query("END TRANSACTION");
366 }
367 
370  sqliteConnector_.query("BEGIN TRANSACTION");
371  try {
372  // check table still exists
374  "SELECT name FROM sqlite_master WHERE type='table' AND "
375  "name='mapd_frontend_views'");
376  if (sqliteConnector_.getNumRows() == 0) {
377  // table does not exists
378  // no need to migrate
379  sqliteConnector_.query("END TRANSACTION");
380  return;
381  }
382  sqliteConnector_.query("PRAGMA TABLE_INFO(mapd_frontend_views)");
383  std::vector<std::string> cols;
384  for (size_t i = 0; i < sqliteConnector_.getNumRows(); i++) {
385  cols.push_back(sqliteConnector_.getData<std::string>(i, 1));
386  }
387  if (std::find(cols.begin(), cols.end(), std::string("image_hash")) == cols.end()) {
388  sqliteConnector_.query("ALTER TABLE mapd_frontend_views ADD image_hash text");
389  }
390  if (std::find(cols.begin(), cols.end(), std::string("update_time")) == cols.end()) {
391  sqliteConnector_.query("ALTER TABLE mapd_frontend_views ADD update_time timestamp");
392  }
393  if (std::find(cols.begin(), cols.end(), std::string("view_metadata")) == cols.end()) {
394  sqliteConnector_.query("ALTER TABLE mapd_frontend_views ADD view_metadata text");
395  }
396  } catch (std::exception& e) {
397  sqliteConnector_.query("ROLLBACK TRANSACTION");
398  throw;
399  }
400  sqliteConnector_.query("END TRANSACTION");
401 }
402 
405  sqliteConnector_.query("BEGIN TRANSACTION");
406  try {
408  "CREATE TABLE IF NOT EXISTS mapd_links (linkid integer primary key, userid "
409  "integer references mapd_users, "
410  "link text unique, view_state text, update_time timestamp, view_metadata text)");
411  sqliteConnector_.query("PRAGMA TABLE_INFO(mapd_links)");
412  std::vector<std::string> cols;
413  for (size_t i = 0; i < sqliteConnector_.getNumRows(); i++) {
414  cols.push_back(sqliteConnector_.getData<std::string>(i, 1));
415  }
416  if (std::find(cols.begin(), cols.end(), std::string("view_metadata")) == cols.end()) {
417  sqliteConnector_.query("ALTER TABLE mapd_links ADD view_metadata text");
418  }
419  } catch (const std::exception& e) {
420  sqliteConnector_.query("ROLLBACK TRANSACTION");
421  throw;
422  }
423  sqliteConnector_.query("END TRANSACTION");
424 }
425 
428  sqliteConnector_.query("BEGIN TRANSACTION");
429  try {
430  sqliteConnector_.query("UPDATE mapd_links SET userid = 0 WHERE userid IS NULL");
431  // check table still exists
433  "SELECT name FROM sqlite_master WHERE type='table' AND "
434  "name='mapd_frontend_views'");
435  if (sqliteConnector_.getNumRows() == 0) {
436  // table does not exists
437  // no need to migrate
438  sqliteConnector_.query("END TRANSACTION");
439  return;
440  }
442  "UPDATE mapd_frontend_views SET userid = 0 WHERE userid IS NULL");
443  } catch (const std::exception& e) {
444  sqliteConnector_.query("ROLLBACK TRANSACTION");
445  throw;
446  }
447  sqliteConnector_.query("END TRANSACTION");
448 }
449 
450 // introduce DB version into the tables table
451 // if the DB does not have a version reset all pagesizes to 2097152 to be compatible with
452 // old value
453 
456  if (currentDB_.dbName.length() == 0) {
457  // updateDictionaryNames dbName length is zero nothing to do here
458  return;
459  }
460  sqliteConnector_.query("BEGIN TRANSACTION");
461  try {
462  sqliteConnector_.query("PRAGMA TABLE_INFO(mapd_tables)");
463  std::vector<std::string> cols;
464  for (size_t i = 0; i < sqliteConnector_.getNumRows(); i++) {
465  cols.push_back(sqliteConnector_.getData<std::string>(i, 1));
466  }
467  if (std::find(cols.begin(), cols.end(), std::string("version_num")) == cols.end()) {
468  LOG(INFO) << "Updating mapd_tables updatePageSize";
469  // No version number
470  // need to update the defaul tpagesize to old correct value
471  sqliteConnector_.query("UPDATE mapd_tables SET frag_page_size = 2097152 ");
472  // need to add new version info
473  string queryString("ALTER TABLE mapd_tables ADD version_num BIGINT DEFAULT " +
475  sqliteConnector_.query(queryString);
476  }
477  } catch (std::exception& e) {
478  sqliteConnector_.query("ROLLBACK TRANSACTION");
479  throw;
480  }
481  sqliteConnector_.query("END TRANSACTION");
482 }
483 
486  sqliteConnector_.query("BEGIN TRANSACTION");
487  try {
488  sqliteConnector_.query("PRAGMA TABLE_INFO(mapd_columns)");
489  std::vector<std::string> cols;
490  for (size_t i = 0; i < sqliteConnector_.getNumRows(); i++) {
491  cols.push_back(sqliteConnector_.getData<std::string>(i, 1));
492  }
493  if (std::find(cols.begin(), cols.end(), std::string("version_num")) == cols.end()) {
494  LOG(INFO) << "Updating mapd_columns updateDeletedColumnIndicator";
495  // need to add new version info
496  string queryString("ALTER TABLE mapd_columns ADD version_num BIGINT DEFAULT " +
498  sqliteConnector_.query(queryString);
499  // need to add new column to table defintion to indicate deleted column, column used
500  // as bitmap for deleted rows.
502  "ALTER TABLE mapd_columns ADD is_deletedcol boolean default 0 ");
503  }
504  } catch (std::exception& e) {
505  sqliteConnector_.query("ROLLBACK TRANSACTION");
506  throw;
507  }
508  sqliteConnector_.query("END TRANSACTION");
509 }
510 
511 // introduce DB version into the dictionary tables
512 // if the DB does not have a version rename all dictionary tables
513 
516  if (currentDB_.dbName.length() == 0) {
517  // updateDictionaryNames dbName length is zero nothing to do here
518  return;
519  }
520  sqliteConnector_.query("BEGIN TRANSACTION");
521  try {
522  sqliteConnector_.query("PRAGMA TABLE_INFO(mapd_dictionaries)");
523  std::vector<std::string> cols;
524  for (size_t i = 0; i < sqliteConnector_.getNumRows(); i++) {
525  cols.push_back(sqliteConnector_.getData<std::string>(i, 1));
526  }
527  if (std::find(cols.begin(), cols.end(), std::string("version_num")) == cols.end()) {
528  // No version number
529  // need to rename dictionaries
530  string dictQuery("SELECT dictid, name from mapd_dictionaries");
531  sqliteConnector_.query(dictQuery);
532  size_t numRows = sqliteConnector_.getNumRows();
533  for (size_t r = 0; r < numRows; ++r) {
534  int dictId = sqliteConnector_.getData<int>(r, 0);
535  std::string dictName = sqliteConnector_.getData<string>(r, 1);
536 
537  std::string oldName =
538  basePath_ + "/mapd_data/" + currentDB_.dbName + "_" + dictName;
539  std::string newName = basePath_ + "/mapd_data/DB_" +
540  std::to_string(currentDB_.dbId) + "_DICT_" +
541  std::to_string(dictId);
542 
543  int result = rename(oldName.c_str(), newName.c_str());
544 
545  if (result == 0) {
546  LOG(INFO) << "Dictionary upgrade: successfully renamed " << oldName << " to "
547  << newName;
548  } else {
549  LOG(ERROR) << "Failed to rename old dictionary directory " << oldName << " to "
550  << newName + " dbname '" << currentDB_.dbName << "' error code "
551  << std::to_string(result);
552  }
553  }
554  // need to add new version info
555  string queryString("ALTER TABLE mapd_dictionaries ADD version_num BIGINT DEFAULT " +
557  sqliteConnector_.query(queryString);
558  }
559  } catch (std::exception& e) {
560  sqliteConnector_.query("ROLLBACK TRANSACTION");
561  throw;
562  }
563  sqliteConnector_.query("END TRANSACTION");
564 }
565 
568  sqliteConnector_.query("BEGIN TRANSACTION");
569  try {
571  "CREATE TABLE IF NOT EXISTS mapd_logical_to_physical("
572  "logical_table_id integer, physical_table_id integer)");
573  } catch (const std::exception& e) {
574  sqliteConnector_.query("ROLLBACK TRANSACTION");
575  throw;
576  }
577  sqliteConnector_.query("END TRANSACTION");
578 }
579 
580 void Catalog::updateLogicalToPhysicalTableMap(const int32_t logical_tb_id) {
581  /* this proc inserts/updates all pairs of (logical_tb_id, physical_tb_id) in
582  * sqlite mapd_logical_to_physical table for given logical_tb_id as needed
583  */
584 
586  sqliteConnector_.query("BEGIN TRANSACTION");
587  try {
588  const auto physicalTableIt = logicalToPhysicalTableMapById_.find(logical_tb_id);
589  if (physicalTableIt != logicalToPhysicalTableMapById_.end()) {
590  const auto physicalTables = physicalTableIt->second;
591  CHECK(!physicalTables.empty());
592  for (size_t i = 0; i < physicalTables.size(); i++) {
593  int32_t physical_tb_id = physicalTables[i];
595  "INSERT OR REPLACE INTO mapd_logical_to_physical (logical_table_id, "
596  "physical_table_id) VALUES (?1, ?2)",
597  std::vector<std::string>{std::to_string(logical_tb_id),
598  std::to_string(physical_tb_id)});
599  }
600  }
601  } catch (std::exception& e) {
602  sqliteConnector_.query("ROLLBACK TRANSACTION");
603  throw;
604  }
605  sqliteConnector_.query("END TRANSACTION");
606 }
607 
610  sqliteConnector_.query("BEGIN TRANSACTION");
611  try {
612  sqliteConnector_.query("PRAGMA TABLE_INFO(mapd_dictionaries)");
613  std::vector<std::string> cols;
614  for (size_t i = 0; i < sqliteConnector_.getNumRows(); i++) {
615  cols.push_back(sqliteConnector_.getData<std::string>(i, 1));
616  }
617  if (std::find(cols.begin(), cols.end(), std::string("refcount")) == cols.end()) {
618  sqliteConnector_.query("ALTER TABLE mapd_dictionaries ADD refcount DEFAULT 1");
619  }
620  } catch (std::exception& e) {
621  sqliteConnector_.query("ROLLBACK TRANSACTION");
622  throw;
623  }
624  sqliteConnector_.query("END TRANSACTION");
625 }
626 
629  sqliteConnector_.query("BEGIN TRANSACTION");
630  try {
633  } catch (std::exception& e) {
634  sqliteConnector_.query("ROLLBACK TRANSACTION");
635  throw;
636  }
637  sqliteConnector_.query("END TRANSACTION");
638 }
639 
640 const std::string Catalog::getForeignServerSchema(bool if_not_exists) {
641  return "CREATE TABLE " + (if_not_exists ? std::string{"IF NOT EXISTS "} : "") +
642  "omnisci_foreign_servers(id integer primary key, name text unique, " +
643  "data_wrapper_type text, owner_user_id integer, creation_time integer, " +
644  "options text)";
645 }
646 
647 const std::string Catalog::getForeignTableSchema(bool if_not_exists) {
648  return "CREATE TABLE " + (if_not_exists ? std::string{"IF NOT EXISTS "} : "") +
649  "omnisci_foreign_tables(table_id integer unique, server_id integer, " +
650  "options text, last_refresh_time integer, next_refresh_time integer, " +
651  "FOREIGN KEY(table_id) REFERENCES mapd_tables(tableid), " +
652  "FOREIGN KEY(server_id) REFERENCES omnisci_foreign_servers(id))";
653 }
654 
657  sqliteConnector_.query("BEGIN TRANSACTION");
658  std::vector<DBObject> objects;
659  try {
661  "SELECT name FROM sqlite_master WHERE type='table' AND "
662  "name='mapd_record_ownership_marker'");
663  // check if mapd catalog - marker exists
664  if (sqliteConnector_.getNumRows() != 0 && currentDB_.dbId == 1) {
665  // already done
666  sqliteConnector_.query("END TRANSACTION");
667  return;
668  }
669  // check if different catalog - marker exists
670  else if (sqliteConnector_.getNumRows() != 0 && currentDB_.dbId != 1) {
671  sqliteConnector_.query("SELECT dummy FROM mapd_record_ownership_marker");
672  // Check if migration is being performed on existing non mapd catalogs
673  // Older non mapd dbs will have table but no record in them
674  if (sqliteConnector_.getNumRows() != 0) {
675  // already done
676  sqliteConnector_.query("END TRANSACTION");
677  return;
678  }
679  }
680  // marker not exists - create one
681  else {
682  sqliteConnector_.query("CREATE TABLE mapd_record_ownership_marker (dummy integer)");
683  }
684 
685  DBMetadata db;
686  CHECK(SysCatalog::instance().getMetadataForDB(currentDB_.dbName, db));
687  // place dbId as a refernce for migration being performed
689  "INSERT INTO mapd_record_ownership_marker (dummy) VALUES (?1)",
690  std::vector<std::string>{std::to_string(db.dbOwner)});
691 
692  static const std::map<const DBObjectType, const AccessPrivileges>
693  object_level_all_privs_lookup{
699 
700  // grant owner all permissions on DB
701  DBObjectKey key;
702  key.dbId = currentDB_.dbId;
703  auto _key_place = [&key](auto type) {
704  key.permissionType = type;
705  return key;
706  };
707  for (auto& it : object_level_all_privs_lookup) {
708  objects.emplace_back(_key_place(it.first), it.second, db.dbOwner);
709  objects.back().setName(currentDB_.dbName);
710  }
711 
712  {
713  // other users tables and views
714  string tableQuery(
715  "SELECT tableid, name, userid, isview FROM mapd_tables WHERE userid > 0");
716  sqliteConnector_.query(tableQuery);
717  size_t numRows = sqliteConnector_.getNumRows();
718  for (size_t r = 0; r < numRows; ++r) {
719  int32_t tableid = sqliteConnector_.getData<int>(r, 0);
720  std::string tableName = sqliteConnector_.getData<string>(r, 1);
721  int32_t ownerid = sqliteConnector_.getData<int>(r, 2);
722  bool isview = sqliteConnector_.getData<bool>(r, 3);
723 
726  DBObjectKey key;
727  key.dbId = currentDB_.dbId;
728  key.objectId = tableid;
729  key.permissionType = type;
730 
731  DBObject obj(tableName, type);
732  obj.setObjectKey(key);
733  obj.setOwner(ownerid);
736 
737  objects.push_back(obj);
738  }
739  }
740 
741  {
742  // other users dashboards
743  string tableQuery("SELECT id, name, userid FROM mapd_dashboards WHERE userid > 0");
744  sqliteConnector_.query(tableQuery);
745  size_t numRows = sqliteConnector_.getNumRows();
746  for (size_t r = 0; r < numRows; ++r) {
747  int32_t dashId = sqliteConnector_.getData<int>(r, 0);
748  std::string dashName = sqliteConnector_.getData<string>(r, 1);
749  int32_t ownerid = sqliteConnector_.getData<int>(r, 2);
750 
752  DBObjectKey key;
753  key.dbId = currentDB_.dbId;
754  key.objectId = dashId;
755  key.permissionType = type;
756 
757  DBObject obj(dashName, type);
758  obj.setObjectKey(key);
759  obj.setOwner(ownerid);
761 
762  objects.push_back(obj);
763  }
764  }
765  } catch (const std::exception& e) {
766  sqliteConnector_.query("ROLLBACK TRANSACTION");
767  throw;
768  }
769  sqliteConnector_.query("END TRANSACTION");
770 
771  // now apply the objects to the syscat to track the permisisons
772  // moved outside transaction to avoid lock in sqlite
773  try {
775  } catch (const std::exception& e) {
776  LOG(ERROR) << " Issue during migration of DB " << name() << " issue was " << e.what();
777  throw std::runtime_error(" Issue during migration of DB " + name() + " issue was " +
778  e.what());
779  // will need to remove the mapd_record_ownership_marker table and retry
780  }
781 }
782 
787 }
788 
790  std::unordered_map<std::string, std::pair<int, std::string>> dashboards;
791  std::vector<std::string> dashboard_ids;
792  static const std::string migration_name{"dashboard_roles_migration"};
793  {
795  sqliteConnector_.query("BEGIN TRANSACTION");
796  try {
797  // migration_history should be present in all catalogs by now
798  // if not then would be created before this migration
800  "select * from mapd_version_history where migration_history = '" +
801  migration_name + "'");
802  if (sqliteConnector_.getNumRows() != 0) {
803  // no need for further execution
804  sqliteConnector_.query("END TRANSACTION");
805  return;
806  }
807  LOG(INFO) << "Performing dashboard internal roles Migration.";
808  sqliteConnector_.query("select id, userid, metadata from mapd_dashboards");
809  for (size_t i = 0; i < sqliteConnector_.getNumRows(); ++i) {
812  sqliteConnector_.getData<string>(i, 0)))) {
813  // Successfully created roles during previous migration/crash
814  // No need to include them
815  continue;
816  }
817  dashboards[sqliteConnector_.getData<string>(i, 0)] = std::make_pair(
818  sqliteConnector_.getData<int>(i, 1), sqliteConnector_.getData<string>(i, 2));
819  dashboard_ids.push_back(sqliteConnector_.getData<string>(i, 0));
820  }
821  } catch (const std::exception& e) {
822  sqliteConnector_.query("ROLLBACK TRANSACTION");
823  throw;
824  }
825  sqliteConnector_.query("END TRANSACTION");
826  }
827  // All current grantees with shared dashboards.
828  const auto active_grantees =
830 
831  try {
832  // NOTE(wamsi): Transactionally unsafe
833  for (auto dash : dashboards) {
834  createOrUpdateDashboardSystemRole(dash.second.second,
835  dash.second.first,
837  std::to_string(currentDB_.dbId), dash.first));
838  auto result = active_grantees.find(dash.first);
839  if (result != active_grantees.end()) {
842  dash.first)},
843  result->second);
844  }
845  }
847  // check if this has already been completed
849  "select * from mapd_version_history where migration_history = '" +
850  migration_name + "'");
851  if (sqliteConnector_.getNumRows() != 0) {
852  return;
853  }
855  "INSERT INTO mapd_version_history(version, migration_history) values(?,?)",
856  std::vector<std::string>{std::to_string(MAPD_VERSION), migration_name});
857  } catch (const std::exception& e) {
858  LOG(ERROR) << "Failed to create dashboard system roles during migration: "
859  << e.what();
860  throw;
861  }
862  LOG(INFO) << "Successfully created dashboard system roles during migration.";
863 }
864 
875  updatePageSize();
879  if (g_enable_fsi) {
881  }
882 }
883 
887 }
888 
889 namespace {
890 std::string getUserFromId(const int32_t id) {
891  UserMetadata user;
892  if (SysCatalog::instance().getMetadataForUserById(id, user)) {
893  return user.userName;
894  }
895  // a user could be deleted and a dashboard still exist?
896  return "Unknown";
897 }
898 } // namespace
899 
903 
904  string dictQuery(
905  "SELECT dictid, name, nbits, is_shared, refcount from mapd_dictionaries");
906  sqliteConnector_.query(dictQuery);
907  size_t numRows = sqliteConnector_.getNumRows();
908  for (size_t r = 0; r < numRows; ++r) {
909  int dictId = sqliteConnector_.getData<int>(r, 0);
910  std::string dictName = sqliteConnector_.getData<string>(r, 1);
911  int dictNBits = sqliteConnector_.getData<int>(r, 2);
912  bool is_shared = sqliteConnector_.getData<bool>(r, 3);
913  int refcount = sqliteConnector_.getData<int>(r, 4);
914  std::string fname = basePath_ + "/mapd_data/DB_" + std::to_string(currentDB_.dbId) +
915  "_DICT_" + std::to_string(dictId);
916  DictRef dict_ref(currentDB_.dbId, dictId);
917  DictDescriptor* dd = new DictDescriptor(
918  dict_ref, dictName, dictNBits, is_shared, refcount, fname, false);
919  dictDescriptorMapByRef_[dict_ref].reset(dd);
920  }
921 
922  string tableQuery(
923  "SELECT tableid, name, ncolumns, isview, fragments, frag_type, max_frag_rows, "
924  "max_chunk_size, frag_page_size, "
925  "max_rows, partitions, shard_column_id, shard, num_shards, key_metainfo, userid, "
926  "sort_column_id, storage_type, max_rollback_epochs "
927  "from mapd_tables");
928  sqliteConnector_.query(tableQuery);
929  numRows = sqliteConnector_.getNumRows();
930  for (size_t r = 0; r < numRows; ++r) {
931  TableDescriptor* td;
932  const auto& storage_type = sqliteConnector_.getData<string>(r, 17);
933  if (!storage_type.empty() && storage_type != StorageType::FOREIGN_TABLE) {
934  const auto table_id = sqliteConnector_.getData<int>(r, 0);
935  const auto& table_name = sqliteConnector_.getData<string>(r, 1);
936  LOG(FATAL) << "Unable to read Catalog metadata: storage type is currently not a "
937  "supported table option (table "
938  << table_name << " [" << table_id << "] in database "
939  << currentDB_.dbName << ").";
940  }
941 
942  if (storage_type == StorageType::FOREIGN_TABLE) {
944  } else {
945  td = new TableDescriptor();
946  }
947 
948  td->storageType = storage_type;
949  td->tableId = sqliteConnector_.getData<int>(r, 0);
950  td->tableName = sqliteConnector_.getData<string>(r, 1);
951  td->nColumns = sqliteConnector_.getData<int>(r, 2);
952  td->isView = sqliteConnector_.getData<bool>(r, 3);
953  td->fragments = sqliteConnector_.getData<string>(r, 4);
954  td->fragType =
956  td->maxFragRows = sqliteConnector_.getData<int>(r, 6);
957  td->maxChunkSize = sqliteConnector_.getData<int64_t>(r, 7);
958  td->fragPageSize = sqliteConnector_.getData<int>(r, 8);
959  td->maxRows = sqliteConnector_.getData<int64_t>(r, 9);
960  td->partitions = sqliteConnector_.getData<string>(r, 10);
961  td->shardedColumnId = sqliteConnector_.getData<int>(r, 11);
962  td->shard = sqliteConnector_.getData<int>(r, 12);
963  td->nShards = sqliteConnector_.getData<int>(r, 13);
964  td->keyMetainfo = sqliteConnector_.getData<string>(r, 14);
965  td->userId = sqliteConnector_.getData<int>(r, 15);
966  td->sortedColumnId =
967  sqliteConnector_.isNull(r, 16) ? 0 : sqliteConnector_.getData<int>(r, 16);
968  if (!td->isView) {
969  td->fragmenter = nullptr;
970  }
972  td->hasDeletedCol = false;
973 
976  }
977 
978  if (g_enable_fsi) {
982  }
983 
984  string columnQuery(
985  "SELECT tableid, columnid, name, coltype, colsubtype, coldim, colscale, "
986  "is_notnull, compression, comp_param, "
987  "size, chunks, is_systemcol, is_virtualcol, virtual_expr, is_deletedcol from "
988  "mapd_columns ORDER BY tableid, "
989  "columnid");
990  sqliteConnector_.query(columnQuery);
991  numRows = sqliteConnector_.getNumRows();
992  int32_t skip_physical_cols = 0;
993  for (size_t r = 0; r < numRows; ++r) {
995  cd->tableId = sqliteConnector_.getData<int>(r, 0);
996  cd->columnId = sqliteConnector_.getData<int>(r, 1);
997  cd->columnName = sqliteConnector_.getData<string>(r, 2);
1001  cd->columnType.set_scale(sqliteConnector_.getData<int>(r, 6));
1005  cd->columnType.set_size(sqliteConnector_.getData<int>(r, 10));
1006  cd->chunks = sqliteConnector_.getData<string>(r, 11);
1007  cd->isSystemCol = sqliteConnector_.getData<bool>(r, 12);
1008  cd->isVirtualCol = sqliteConnector_.getData<bool>(r, 13);
1009  cd->virtualExpr = sqliteConnector_.getData<string>(r, 14);
1010  cd->isDeletedCol = sqliteConnector_.getData<bool>(r, 15);
1011  cd->isGeoPhyCol = skip_physical_cols > 0;
1012  ColumnKey columnKey(cd->tableId, to_upper(cd->columnName));
1013  columnDescriptorMap_[columnKey] = cd;
1014  ColumnIdKey columnIdKey(cd->tableId, cd->columnId);
1015  columnDescriptorMapById_[columnIdKey] = cd;
1016 
1017  if (skip_physical_cols <= 0) {
1018  skip_physical_cols = cd->columnType.get_physical_cols();
1019  }
1020 
1021  auto td_itr = tableDescriptorMapById_.find(cd->tableId);
1022  CHECK(td_itr != tableDescriptorMapById_.end());
1023 
1024  if (cd->isDeletedCol) {
1025  td_itr->second->hasDeletedCol = true;
1026  setDeletedColumnUnlocked(td_itr->second, cd);
1027  } else if (cd->columnType.is_geometry() || skip_physical_cols-- <= 0) {
1028  tableDescriptorMapById_[cd->tableId]->columnIdBySpi_.push_back(cd->columnId);
1029  }
1030  }
1031  // sort columnIdBySpi_ based on columnId
1032  for (auto& tit : tableDescriptorMapById_) {
1033  std::sort(tit.second->columnIdBySpi_.begin(),
1034  tit.second->columnIdBySpi_.end(),
1035  [](const size_t a, const size_t b) -> bool { return a < b; });
1036  }
1037 
1038  string viewQuery("SELECT tableid, sql FROM mapd_views");
1039  sqliteConnector_.query(viewQuery);
1040  numRows = sqliteConnector_.getNumRows();
1041  for (size_t r = 0; r < numRows; ++r) {
1042  int32_t tableId = sqliteConnector_.getData<int>(r, 0);
1043  TableDescriptor* td = tableDescriptorMapById_[tableId];
1044  td->viewSQL = sqliteConnector_.getData<string>(r, 1);
1045  td->fragmenter = nullptr;
1046  }
1047 
1048  string frontendViewQuery(
1049  "SELECT id, state, name, image_hash, strftime('%Y-%m-%dT%H:%M:%SZ', update_time), "
1050  "userid, "
1051  "metadata "
1052  "FROM mapd_dashboards");
1053  sqliteConnector_.query(frontendViewQuery);
1054  numRows = sqliteConnector_.getNumRows();
1055  for (size_t r = 0; r < numRows; ++r) {
1056  std::shared_ptr<DashboardDescriptor> vd = std::make_shared<DashboardDescriptor>();
1057  vd->dashboardId = sqliteConnector_.getData<int>(r, 0);
1058  vd->dashboardState = sqliteConnector_.getData<string>(r, 1);
1059  vd->dashboardName = sqliteConnector_.getData<string>(r, 2);
1060  vd->imageHash = sqliteConnector_.getData<string>(r, 3);
1061  vd->updateTime = sqliteConnector_.getData<string>(r, 4);
1062  vd->userId = sqliteConnector_.getData<int>(r, 5);
1063  vd->dashboardMetadata = sqliteConnector_.getData<string>(r, 6);
1064  vd->user = getUserFromId(vd->userId);
1065  vd->dashboardSystemRoleName = generate_dashboard_system_rolename(
1067  dashboardDescriptorMap_[std::to_string(vd->userId) + ":" + vd->dashboardName] = vd;
1068  }
1069 
1070  string linkQuery(
1071  "SELECT linkid, userid, link, view_state, strftime('%Y-%m-%dT%H:%M:%SZ', "
1072  "update_time), view_metadata "
1073  "FROM mapd_links");
1074  sqliteConnector_.query(linkQuery);
1075  numRows = sqliteConnector_.getNumRows();
1076  for (size_t r = 0; r < numRows; ++r) {
1077  LinkDescriptor* ld = new LinkDescriptor();
1078  ld->linkId = sqliteConnector_.getData<int>(r, 0);
1079  ld->userId = sqliteConnector_.getData<int>(r, 1);
1080  ld->link = sqliteConnector_.getData<string>(r, 2);
1081  ld->viewState = sqliteConnector_.getData<string>(r, 3);
1082  ld->updateTime = sqliteConnector_.getData<string>(r, 4);
1083  ld->viewMetadata = sqliteConnector_.getData<string>(r, 5);
1085  linkDescriptorMapById_[ld->linkId] = ld;
1086  }
1087 
1088  /* rebuild map linking logical tables to corresponding physical ones */
1089  string logicalToPhysicalTableMapQuery(
1090  "SELECT logical_table_id, physical_table_id "
1091  "FROM mapd_logical_to_physical");
1092  sqliteConnector_.query(logicalToPhysicalTableMapQuery);
1093  numRows = sqliteConnector_.getNumRows();
1094  for (size_t r = 0; r < numRows; ++r) {
1095  int32_t logical_tb_id = sqliteConnector_.getData<int>(r, 0);
1096  int32_t physical_tb_id = sqliteConnector_.getData<int>(r, 1);
1097  const auto physicalTableIt = logicalToPhysicalTableMapById_.find(logical_tb_id);
1098  if (physicalTableIt == logicalToPhysicalTableMapById_.end()) {
1099  /* add new entity to the map logicalToPhysicalTableMapById_ */
1100  std::vector<int32_t> physicalTables;
1101  physicalTables.push_back(physical_tb_id);
1102  const auto it_ok =
1103  logicalToPhysicalTableMapById_.emplace(logical_tb_id, physicalTables);
1104  CHECK(it_ok.second);
1105  } else {
1106  /* update map logicalToPhysicalTableMapById_ */
1107  physicalTableIt->second.push_back(physical_tb_id);
1108  }
1109  }
1110 }
1111 
1113  const list<ColumnDescriptor>& columns,
1114  const list<DictDescriptor>& dicts) {
1115  cat_write_lock write_lock(this);
1116  TableDescriptor* new_td;
1117 
1118  auto foreign_table = dynamic_cast<const foreign_storage::ForeignTable*>(td);
1119  if (foreign_table) {
1120  auto new_foreign_table = new foreign_storage::ForeignTable();
1121  *new_foreign_table = *foreign_table;
1122  new_td = new_foreign_table;
1123  } else {
1124  new_td = new TableDescriptor();
1125  *new_td = *td;
1126  }
1127 
1128  new_td->mutex_ = std::make_shared<std::mutex>();
1129  tableDescriptorMap_[to_upper(td->tableName)] = new_td;
1130  tableDescriptorMapById_[td->tableId] = new_td;
1131  for (auto cd : columns) {
1132  ColumnDescriptor* new_cd = new ColumnDescriptor();
1133  *new_cd = cd;
1134  ColumnKey columnKey(new_cd->tableId, to_upper(new_cd->columnName));
1135  columnDescriptorMap_[columnKey] = new_cd;
1136  ColumnIdKey columnIdKey(new_cd->tableId, new_cd->columnId);
1137  columnDescriptorMapById_[columnIdKey] = new_cd;
1138 
1139  // Add deleted column to the map
1140  if (cd.isDeletedCol) {
1141  CHECK(new_td->hasDeletedCol);
1142  setDeletedColumnUnlocked(new_td, new_cd);
1143  }
1144  }
1145 
1146  std::sort(new_td->columnIdBySpi_.begin(),
1147  new_td->columnIdBySpi_.end(),
1148  [](const size_t a, const size_t b) -> bool { return a < b; });
1149 
1150  std::unique_ptr<StringDictionaryClient> client;
1151  DictRef dict_ref(currentDB_.dbId, -1);
1152  if (!string_dict_hosts_.empty()) {
1153  client.reset(new StringDictionaryClient(string_dict_hosts_.front(), dict_ref, true));
1154  }
1155  for (auto dd : dicts) {
1156  if (!dd.dictRef.dictId) {
1157  // Dummy entry created for a shard of a logical table, nothing to do.
1158  continue;
1159  }
1160  dict_ref.dictId = dd.dictRef.dictId;
1161  if (client) {
1162  client->create(dict_ref, dd.dictIsTemp);
1163  }
1164  DictDescriptor* new_dd = new DictDescriptor(dd);
1165  dictDescriptorMapByRef_[dict_ref].reset(new_dd);
1166  if (!dd.dictIsTemp) {
1167  boost::filesystem::create_directory(new_dd->dictFolderPath);
1168  }
1169  }
1170 }
1171 
1172 void Catalog::removeTableFromMap(const string& tableName,
1173  const int tableId,
1174  const bool is_on_error) {
1175  cat_write_lock write_lock(this);
1176  TableDescriptorMapById::iterator tableDescIt = tableDescriptorMapById_.find(tableId);
1177  if (tableDescIt == tableDescriptorMapById_.end()) {
1178  throw runtime_error("Table " + tableName + " does not exist.");
1179  }
1180 
1181  TableDescriptor* td = tableDescIt->second;
1182 
1183  if (td->hasDeletedCol) {
1184  const auto ret = deletedColumnPerTable_.erase(td);
1185  CHECK_EQ(ret, size_t(1));
1186  }
1187 
1188  tableDescriptorMapById_.erase(tableDescIt);
1189  tableDescriptorMap_.erase(to_upper(tableName));
1190  td->fragmenter = nullptr;
1191 
1193  delete td;
1194 
1195  std::unique_ptr<StringDictionaryClient> client;
1196  if (SysCatalog::instance().isAggregator()) {
1197  CHECK(!string_dict_hosts_.empty());
1198  DictRef dict_ref(currentDB_.dbId, -1);
1199  client.reset(new StringDictionaryClient(string_dict_hosts_.front(), dict_ref, true));
1200  }
1201 
1202  // delete all column descriptors for the table
1203  // no more link columnIds to sequential indexes!
1204  for (auto cit = columnDescriptorMapById_.begin();
1205  cit != columnDescriptorMapById_.end();) {
1206  if (tableId != std::get<0>(cit->first)) {
1207  ++cit;
1208  } else {
1209  int i = std::get<1>(cit++->first);
1210  ColumnIdKey cidKey(tableId, i);
1211  ColumnDescriptorMapById::iterator colDescIt = columnDescriptorMapById_.find(cidKey);
1212  ColumnDescriptor* cd = colDescIt->second;
1213  columnDescriptorMapById_.erase(colDescIt);
1214  ColumnKey cnameKey(tableId, to_upper(cd->columnName));
1215  columnDescriptorMap_.erase(cnameKey);
1216  const int dictId = cd->columnType.get_comp_param();
1217  // Dummy dictionaries created for a shard of a logical table have the id set to
1218  // zero.
1219  if (cd->columnType.get_compression() == kENCODING_DICT && dictId) {
1220  INJECT_TIMER(removingDicts);
1221  DictRef dict_ref(currentDB_.dbId, dictId);
1222  const auto dictIt = dictDescriptorMapByRef_.find(dict_ref);
1223  // If we're removing this table due to an error, it is possible that the string
1224  // dictionary reference was never populated. Don't crash, just continue cleaning
1225  // up the TableDescriptor and ColumnDescriptors
1226  if (!is_on_error) {
1227  CHECK(dictIt != dictDescriptorMapByRef_.end());
1228  } else {
1229  if (dictIt == dictDescriptorMapByRef_.end()) {
1230  continue;
1231  }
1232  }
1233  const auto& dd = dictIt->second;
1234  CHECK_GE(dd->refcount, 1);
1235  --dd->refcount;
1236  if (!dd->refcount) {
1237  dd->stringDict.reset();
1238  if (!isTemp) {
1239  File_Namespace::renameForDelete(dd->dictFolderPath);
1240  }
1241  if (client) {
1242  client->drop(dict_ref);
1243  }
1244  dictDescriptorMapByRef_.erase(dictIt);
1245  }
1246  }
1247 
1248  delete cd;
1249  }
1250  }
1251 }
1252 
1254  cat_write_lock write_lock(this);
1256 }
1257 
1259  cat_write_lock write_lock(this);
1261  std::make_shared<DashboardDescriptor>(vd);
1262 }
1263 
1264 std::vector<DBObject> Catalog::parseDashboardObjects(const std::string& view_meta,
1265  const int& user_id) {
1266  std::vector<DBObject> objects;
1267  DBObjectKey key;
1268  key.dbId = currentDB_.dbId;
1269  auto _key_place = [&key](auto type, auto id) {
1270  key.permissionType = type;
1271  key.objectId = id;
1272  return key;
1273  };
1274  for (auto object_name : parse_underlying_dashboard_objects(view_meta)) {
1275  auto td = getMetadataForTable(object_name);
1276  if (!td) {
1277  // Parsed object source is not present in current database
1278  // LOG the info and ignore
1279  LOG(INFO) << "Ignoring dashboard source Table/View: " << object_name
1280  << " no longer exists in current DB.";
1281  continue;
1282  }
1283  // Dashboard source can be Table or View
1284  const auto object_type = td->isView ? ViewDBObjectType : TableDBObjectType;
1285  const auto priv = td->isView ? AccessPrivileges::SELECT_FROM_VIEW
1287  objects.emplace_back(_key_place(object_type, td->tableId), priv, user_id);
1288  objects.back().setObjectType(td->isView ? ViewDBObjectType : TableDBObjectType);
1289  objects.back().setName(td->tableName);
1290  }
1291  return objects;
1292 }
1293 
1294 void Catalog::createOrUpdateDashboardSystemRole(const std::string& view_meta,
1295  const int32_t& user_id,
1296  const std::string& dash_role_name) {
1297  auto objects = parseDashboardObjects(view_meta, user_id);
1298  Role* rl = SysCatalog::instance().getRoleGrantee(dash_role_name);
1299  if (!rl) {
1300  // Dashboard role does not exist
1301  // create role and grant privileges
1302  // NOTE(wamsi): Transactionally unsafe
1303  SysCatalog::instance().createRole(dash_role_name, false);
1304  SysCatalog::instance().grantDBObjectPrivilegesBatch({dash_role_name}, objects, *this);
1305  } else {
1306  // Dashboard system role already exists
1307  // Add/remove privileges on objects
1308  std::set<DBObjectKey> revoke_keys;
1309  auto ex_objects = rl->getDbObjects(true);
1310  for (auto key : *ex_objects | boost::adaptors::map_keys) {
1311  if (key.permissionType != TableDBObjectType &&
1312  key.permissionType != ViewDBObjectType) {
1313  continue;
1314  }
1315  bool found = false;
1316  for (auto obj : objects) {
1317  found = key == obj.getObjectKey() ? true : false;
1318  if (found) {
1319  break;
1320  }
1321  }
1322  if (!found) {
1323  revoke_keys.insert(key);
1324  }
1325  }
1326  for (auto& key : revoke_keys) {
1327  // revoke privs on object since the object is no
1328  // longer used by the dashboard as source
1329  // NOTE(wamsi): Transactionally unsafe
1331  dash_role_name, *rl->findDbObject(key, true), *this);
1332  }
1333  // Update privileges on remaining objects
1334  // NOTE(wamsi): Transactionally unsafe
1335  SysCatalog::instance().grantDBObjectPrivilegesBatch({dash_role_name}, objects, *this);
1336  }
1337 }
1338 
1340  cat_write_lock write_lock(this);
1341  LinkDescriptor* new_ld = new LinkDescriptor();
1342  *new_ld = ld;
1344  linkDescriptorMapById_[ld.linkId] = new_ld;
1345 }
1346 
1348  auto time_ms = measure<>::execution([&]() {
1349  // instanciate table fragmenter upon first use
1350  // assume only insert order fragmenter is supported
1352  vector<Chunk> chunkVec;
1353  list<const ColumnDescriptor*> columnDescs;
1354  getAllColumnMetadataForTableImpl(td, columnDescs, true, false, true);
1355  Chunk::translateColumnDescriptorsToChunkVec(columnDescs, chunkVec);
1356  ChunkKey chunkKeyPrefix = {currentDB_.dbId, td->tableId};
1357  if (td->sortedColumnId > 0) {
1358  td->fragmenter = std::make_shared<SortedOrderFragmenter>(chunkKeyPrefix,
1359  chunkVec,
1360  dataMgr_.get(),
1361  const_cast<Catalog*>(this),
1362  td->tableId,
1363  td->shard,
1364  td->maxFragRows,
1365  td->maxChunkSize,
1366  td->fragPageSize,
1367  td->maxRows,
1368  td->persistenceLevel);
1369  } else {
1370  td->fragmenter = std::make_shared<InsertOrderFragmenter>(chunkKeyPrefix,
1371  chunkVec,
1372  dataMgr_.get(),
1373  const_cast<Catalog*>(this),
1374  td->tableId,
1375  td->shard,
1376  td->maxFragRows,
1377  td->maxChunkSize,
1378  td->fragPageSize,
1379  td->maxRows,
1380  td->persistenceLevel,
1381  !td->storageType.empty());
1382  }
1383  });
1384  LOG(INFO) << "Instantiating Fragmenter for table " << td->tableName << " took "
1385  << time_ms << "ms";
1386 }
1387 
1389  const std::string& tableName) const {
1390  auto tableDescIt = tableDescriptorMap_.find(to_upper(tableName));
1391  if (tableDescIt == tableDescriptorMap_.end()) { // check to make sure table exists
1392  return nullptr;
1393  }
1394  return dynamic_cast<foreign_storage::ForeignTable*>(tableDescIt->second);
1395 };
1396 
1398  const std::string& tableName) const {
1399  cat_read_lock read_lock(this);
1400  return getForeignTableUnlocked(tableName);
1401 };
1402 
1403 const TableDescriptor* Catalog::getMetadataForTable(const string& tableName,
1404  const bool populateFragmenter) const {
1405  // we give option not to populate fragmenter (default true/yes) as it can be heavy for
1406  // pure metadata calls
1407  cat_read_lock read_lock(this);
1408  auto tableDescIt = tableDescriptorMap_.find(to_upper(tableName));
1409  if (tableDescIt == tableDescriptorMap_.end()) { // check to make sure table exists
1410  return nullptr;
1411  }
1412  TableDescriptor* td = tableDescIt->second;
1413  std::unique_lock<std::mutex> td_lock(*td->mutex_.get());
1414  if (populateFragmenter && td->fragmenter == nullptr && !td->isView) {
1416  }
1417  return td; // returns pointer to table descriptor
1418 }
1419 
1421  int tableId,
1422  const bool populateFragmenter) const {
1423  auto tableDescIt = tableDescriptorMapById_.find(tableId);
1424  if (tableDescIt == tableDescriptorMapById_.end()) { // check to make sure table exists
1425  return nullptr;
1426  }
1427  TableDescriptor* td = tableDescIt->second;
1428  if (populateFragmenter) {
1429  std::unique_lock<std::mutex> td_lock(*td->mutex_.get());
1430  if (td->fragmenter == nullptr && !td->isView) {
1432  }
1433  }
1434  return td; // returns pointer to table descriptor
1435 }
1436 
1438  bool populateFragmenter) const {
1439  cat_read_lock read_lock(this);
1440  return getMetadataForTableImpl(tableId, populateFragmenter);
1441 }
1442 
1444  const bool load_dict) const {
1445  cat_read_lock read_lock(this);
1446  return getMetadataForDictUnlocked(dict_id, load_dict);
1447 }
1448 
1450  auto tableDescIt = tableDescriptorMapById_.find(tableId);
1451  if (tableDescIt == tableDescriptorMapById_.end()) { // check to make sure table exists
1452  return nullptr;
1453  }
1454  return tableDescIt->second;
1455 }
1456 
1458  const bool loadDict) const {
1459  const DictRef dictRef(currentDB_.dbId, dictId);
1460  auto dictDescIt = dictDescriptorMapByRef_.find(dictRef);
1461  if (dictDescIt ==
1462  dictDescriptorMapByRef_.end()) { // check to make sure dictionary exists
1463  return nullptr;
1464  }
1465  auto& dd = dictDescIt->second;
1466 
1467  if (loadDict) {
1468  std::lock_guard string_dict_lock(*dd->string_dict_mutex);
1469  if (!dd->stringDict) {
1470  auto time_ms = measure<>::execution([&]() {
1471  if (string_dict_hosts_.empty()) {
1472  if (dd->dictIsTemp) {
1473  dd->stringDict = std::make_shared<StringDictionary>(
1474  dd->dictFolderPath, true, true, g_cache_string_hash);
1475  } else {
1476  dd->stringDict = std::make_shared<StringDictionary>(
1477  dd->dictFolderPath, false, true, g_cache_string_hash);
1478  }
1479  } else {
1480  dd->stringDict =
1481  std::make_shared<StringDictionary>(string_dict_hosts_.front(), dd->dictRef);
1482  }
1483  });
1484  LOG(INFO) << "Time to load Dictionary " << dd->dictRef.dbId << "_"
1485  << dd->dictRef.dictId << " was " << time_ms << "ms";
1486  }
1487  }
1488 
1489  return dd.get();
1490 }
1491 
1492 const std::vector<LeafHostInfo>& Catalog::getStringDictionaryHosts() const {
1493  return string_dict_hosts_;
1494 }
1495 
1497  const string& columnName) const {
1498  cat_read_lock read_lock(this);
1499 
1500  ColumnKey columnKey(tableId, to_upper(columnName));
1501  auto colDescIt = columnDescriptorMap_.find(columnKey);
1502  if (colDescIt ==
1503  columnDescriptorMap_.end()) { // need to check to make sure column exists for table
1504  return nullptr;
1505  }
1506  return colDescIt->second;
1507 }
1508 
1509 const ColumnDescriptor* Catalog::getMetadataForColumn(int table_id, int column_id) const {
1510  cat_read_lock read_lock(this);
1511  return getMetadataForColumnUnlocked(table_id, column_id);
1512 }
1513 
1515  int columnId) const {
1516  ColumnIdKey columnIdKey(tableId, columnId);
1517  auto colDescIt = columnDescriptorMapById_.find(columnIdKey);
1518  if (colDescIt == columnDescriptorMapById_
1519  .end()) { // need to check to make sure column exists for table
1520  return nullptr;
1521  }
1522  return colDescIt->second;
1523 }
1524 
1525 const int Catalog::getColumnIdBySpiUnlocked(const int table_id, const size_t spi) const {
1526  const auto tabDescIt = tableDescriptorMapById_.find(table_id);
1527  CHECK(tableDescriptorMapById_.end() != tabDescIt);
1528  const auto& columnIdBySpi = tabDescIt->second->columnIdBySpi_;
1529 
1530  auto spx = spi;
1531  int phi = 0;
1532  if (spx >= SPIMAP_MAGIC1) // see Catalog.h
1533  {
1534  phi = (spx - SPIMAP_MAGIC1) % SPIMAP_MAGIC2;
1535  spx = (spx - SPIMAP_MAGIC1) / SPIMAP_MAGIC2;
1536  }
1537 
1538  CHECK(0 < spx && spx <= columnIdBySpi.size());
1539  return columnIdBySpi[spx - 1] + phi;
1540 }
1541 
1542 const int Catalog::getColumnIdBySpi(const int table_id, const size_t spi) const {
1543  cat_read_lock read_lock(this);
1544  return getColumnIdBySpiUnlocked(table_id, spi);
1545 }
1546 
1548  const size_t spi) const {
1549  cat_read_lock read_lock(this);
1550 
1551  const auto columnId = getColumnIdBySpiUnlocked(tableId, spi);
1552  ColumnIdKey columnIdKey(tableId, columnId);
1553  const auto colDescIt = columnDescriptorMapById_.find(columnIdKey);
1554  return columnDescriptorMapById_.end() == colDescIt ? nullptr : colDescIt->second;
1555 }
1556 
1557 void Catalog::deleteMetadataForDashboards(const std::vector<int32_t> dashboard_ids,
1558  const UserMetadata& user) {
1559  std::stringstream invalid_ids, restricted_ids;
1560 
1561  for (int32_t dashboard_id : dashboard_ids) {
1562  if (!getMetadataForDashboard(dashboard_id)) {
1563  invalid_ids << (!invalid_ids.str().empty() ? ", " : "") << dashboard_id;
1564  continue;
1565  }
1566  DBObject object(dashboard_id, DashboardDBObjectType);
1567  object.loadKey(*this);
1568  object.setPrivileges(AccessPrivileges::DELETE_DASHBOARD);
1569  std::vector<DBObject> privs = {object};
1570  if (!SysCatalog::instance().checkPrivileges(user, privs))
1571  restricted_ids << (!restricted_ids.str().empty() ? ", " : "") << dashboard_id;
1572  }
1573 
1574  if (invalid_ids.str().size() > 0 || restricted_ids.str().size() > 0) {
1575  std::stringstream error_message;
1576  error_message << "Delete dashboard(s) failed with error(s):";
1577  if (invalid_ids.str().size() > 0)
1578  error_message << "\nDashboard id: " << invalid_ids.str()
1579  << " - Dashboard id does not exist";
1580  if (restricted_ids.str().size() > 0)
1581  error_message
1582  << "\nDashboard id: " << restricted_ids.str()
1583  << " - User should be either owner of dashboard or super user to delete it";
1584  throw std::runtime_error(error_message.str());
1585  }
1586  std::vector<DBObject> dash_objs;
1587 
1588  for (int32_t dashboard_id : dashboard_ids) {
1589  dash_objs.push_back(DBObject(dashboard_id, DashboardDBObjectType));
1590  }
1591  // BE-5245: Transactionally unsafe (like other combined Catalog/Syscatalog operations)
1593  {
1594  cat_write_lock write_lock(this);
1596 
1597  sqliteConnector_.query("BEGIN TRANSACTION");
1598  try {
1599  for (int32_t dashboard_id : dashboard_ids) {
1600  auto dash = getMetadataForDashboard(dashboard_id);
1601  // Dash should still exist if revokeDBObjectPrivileges passed but throw and
1602  // rollback if already deleted
1603  if (!dash) {
1604  throw std::runtime_error(
1605  std::string("Delete dashboard(s) failed with error(s):\nDashboard id: ") +
1606  std::to_string(dashboard_id) + " - Dashboard id does not exist ");
1607  }
1608  std::string user_id = std::to_string(dash->userId);
1609  std::string dash_name = dash->dashboardName;
1610  auto viewDescIt = dashboardDescriptorMap_.find(user_id + ":" + dash_name);
1611  dashboardDescriptorMap_.erase(viewDescIt);
1613  "DELETE FROM mapd_dashboards WHERE name = ? and userid = ?",
1614  std::vector<std::string>{dash_name, user_id});
1615  }
1616  } catch (std::exception& e) {
1617  sqliteConnector_.query("ROLLBACK TRANSACTION");
1618  throw;
1619  }
1620  sqliteConnector_.query("END TRANSACTION");
1621  }
1622 }
1623 
1625  const string& userId,
1626  const string& dashName) const {
1627  cat_read_lock read_lock(this);
1628 
1629  auto viewDescIt = dashboardDescriptorMap_.find(userId + ":" + dashName);
1630  if (viewDescIt == dashboardDescriptorMap_.end()) { // check to make sure view exists
1631  return nullptr;
1632  }
1633  return viewDescIt->second.get(); // returns pointer to view descriptor
1634 }
1635 
1637  cat_read_lock read_lock(this);
1638  std::string userId;
1639  std::string name;
1640  bool found{false};
1641  {
1642  for (auto descp : dashboardDescriptorMap_) {
1643  auto dash = descp.second.get();
1644  if (dash->dashboardId == id) {
1645  userId = std::to_string(dash->userId);
1646  name = dash->dashboardName;
1647  found = true;
1648  break;
1649  }
1650  }
1651  }
1652  if (found) {
1653  return getMetadataForDashboard(userId, name);
1654  }
1655  return nullptr;
1656 }
1657 
1658 const LinkDescriptor* Catalog::getMetadataForLink(const string& link) const {
1659  cat_read_lock read_lock(this);
1660  auto linkDescIt = linkDescriptorMap_.find(link);
1661  if (linkDescIt == linkDescriptorMap_.end()) { // check to make sure view exists
1662  return nullptr;
1663  }
1664  return linkDescIt->second; // returns pointer to view descriptor
1665 }
1666 
1668  cat_read_lock read_lock(this);
1669  auto linkDescIt = linkDescriptorMapById_.find(linkId);
1670  if (linkDescIt == linkDescriptorMapById_.end()) { // check to make sure view exists
1671  return nullptr;
1672  }
1673  return linkDescIt->second;
1674 }
1675 
1677  int table_id) const {
1678  auto table = getMetadataForTableImpl(table_id, false);
1679  CHECK(table);
1680  auto foreign_table = dynamic_cast<const foreign_storage::ForeignTable*>(table);
1681  CHECK(foreign_table);
1682  return foreign_table;
1683 }
1684 
1686  cat_read_lock read_lock(this);
1687  return getForeignTableUnlocked(table_id);
1688 }
1689 
1691  const TableDescriptor* td,
1692  list<const ColumnDescriptor*>& columnDescriptors,
1693  const bool fetchSystemColumns,
1694  const bool fetchVirtualColumns,
1695  const bool fetchPhysicalColumns) const {
1696  int32_t skip_physical_cols = 0;
1697  for (const auto& columnDescriptor : columnDescriptorMapById_) {
1698  if (!fetchPhysicalColumns && skip_physical_cols > 0) {
1699  --skip_physical_cols;
1700  continue;
1701  }
1702  auto cd = columnDescriptor.second;
1703  if (cd->tableId != td->tableId) {
1704  continue;
1705  }
1706  if (!fetchSystemColumns && cd->isSystemCol) {
1707  continue;
1708  }
1709  if (!fetchVirtualColumns && cd->isVirtualCol) {
1710  continue;
1711  }
1712  if (!fetchPhysicalColumns) {
1713  const auto& col_ti = cd->columnType;
1714  skip_physical_cols = col_ti.get_physical_cols();
1715  }
1716  columnDescriptors.push_back(cd);
1717  }
1718 }
1719 
1720 std::list<const ColumnDescriptor*> Catalog::getAllColumnMetadataForTable(
1721  const int tableId,
1722  const bool fetchSystemColumns,
1723  const bool fetchVirtualColumns,
1724  const bool fetchPhysicalColumns) const {
1725  cat_read_lock read_lock(this);
1727  tableId, fetchSystemColumns, fetchVirtualColumns, fetchPhysicalColumns);
1728 }
1729 
1730 std::list<const ColumnDescriptor*> Catalog::getAllColumnMetadataForTableUnlocked(
1731  const int tableId,
1732  const bool fetchSystemColumns,
1733  const bool fetchVirtualColumns,
1734  const bool fetchPhysicalColumns) const {
1735  std::list<const ColumnDescriptor*> columnDescriptors;
1736  const TableDescriptor* td =
1737  getMetadataForTableImpl(tableId, false); // dont instantiate fragmenter
1739  columnDescriptors,
1740  fetchSystemColumns,
1741  fetchVirtualColumns,
1742  fetchPhysicalColumns);
1743  return columnDescriptors;
1744 }
1745 
1746 list<const TableDescriptor*> Catalog::getAllTableMetadata() const {
1747  cat_read_lock read_lock(this);
1748  list<const TableDescriptor*> table_list;
1749  for (auto p : tableDescriptorMapById_) {
1750  table_list.push_back(p.second);
1751  }
1752  return table_list;
1753 }
1754 
1755 list<const DashboardDescriptor*> Catalog::getAllDashboardsMetadata() const {
1756  list<const DashboardDescriptor*> view_list;
1757  for (auto p : dashboardDescriptorMap_) {
1758  view_list.push_back(p.second.get());
1759  }
1760  return view_list;
1761 }
1762 
1764  const auto& td = *tableDescriptorMapById_[cd.tableId];
1765  list<DictDescriptor> dds;
1766  setColumnDictionary(cd, dds, td, true);
1767  auto& dd = dds.back();
1768  CHECK(dd.dictRef.dictId);
1769 
1770  std::unique_ptr<StringDictionaryClient> client;
1771  if (!string_dict_hosts_.empty()) {
1772  client.reset(new StringDictionaryClient(
1773  string_dict_hosts_.front(), DictRef(currentDB_.dbId, -1), true));
1774  }
1775  if (client) {
1776  client->create(dd.dictRef, dd.dictIsTemp);
1777  }
1778 
1779  DictDescriptor* new_dd = new DictDescriptor(dd);
1780  dictDescriptorMapByRef_[dd.dictRef].reset(new_dd);
1781  if (!dd.dictIsTemp) {
1782  boost::filesystem::create_directory(new_dd->dictFolderPath);
1783  }
1784  return dd.dictRef;
1785 }
1786 
1788  cat_write_lock write_lock(this);
1790  if (!(cd.columnType.is_string() || cd.columnType.is_string_array())) {
1791  return;
1792  }
1793  if (!(cd.columnType.get_compression() == kENCODING_DICT)) {
1794  return;
1795  }
1796  const auto dictId = cd.columnType.get_comp_param();
1797  CHECK_GT(dictId, 0);
1798  // decrement and zero check dict ref count
1799  const auto td = getMetadataForTable(cd.tableId);
1800  CHECK(td);
1802  "UPDATE mapd_dictionaries SET refcount = refcount - 1 WHERE dictid = ?",
1803  std::to_string(dictId));
1805  "SELECT refcount FROM mapd_dictionaries WHERE dictid = ?", std::to_string(dictId));
1806  const auto refcount = sqliteConnector_.getData<int>(0, 0);
1807  VLOG(3) << "Dictionary " << dictId << "from dropped table has reference count "
1808  << refcount;
1809  if (refcount > 0) {
1810  return;
1811  }
1812  const DictRef dictRef(currentDB_.dbId, dictId);
1813  sqliteConnector_.query_with_text_param("DELETE FROM mapd_dictionaries WHERE dictid = ?",
1814  std::to_string(dictId));
1815  File_Namespace::renameForDelete(basePath_ + "/mapd_data/DB_" +
1816  std::to_string(currentDB_.dbId) + "_DICT_" +
1817  std::to_string(dictId));
1818 
1819  std::unique_ptr<StringDictionaryClient> client;
1820  if (!string_dict_hosts_.empty()) {
1821  client.reset(new StringDictionaryClient(string_dict_hosts_.front(), dictRef, true));
1822  }
1823  if (client) {
1824  client->drop(dictRef);
1825  }
1826 
1827  dictDescriptorMapByRef_.erase(dictRef);
1828 }
1829 
1831  std::map<int, StringDictionary*>& stringDicts) {
1832  // learn 'committed' ColumnDescriptor of this column
1833  auto cit = columnDescriptorMap_.find(ColumnKey(cd.tableId, to_upper(cd.columnName)));
1834  CHECK(cit != columnDescriptorMap_.end());
1835  auto& ccd = *cit->second;
1836 
1837  if (!(ccd.columnType.is_string() || ccd.columnType.is_string_array())) {
1838  return;
1839  }
1840  if (!(ccd.columnType.get_compression() == kENCODING_DICT)) {
1841  return;
1842  }
1843  if (!(ccd.columnType.get_comp_param() > 0)) {
1844  return;
1845  }
1846 
1847  auto dictId = ccd.columnType.get_comp_param();
1848  getMetadataForDict(dictId);
1849 
1850  const DictRef dictRef(currentDB_.dbId, dictId);
1851  auto dit = dictDescriptorMapByRef_.find(dictRef);
1852  CHECK(dit != dictDescriptorMapByRef_.end());
1853  CHECK(dit->second);
1854  CHECK(dit->second.get()->stringDict);
1855  stringDicts[ccd.columnId] = dit->second.get()->stringDict.get();
1856 }
1857 
1859  cat_write_lock write_lock(this);
1860  // caller must handle sqlite/chunk transaction TOGETHER
1861  cd.tableId = td.tableId;
1862  if (td.nShards > 0 && td.shard < 0) {
1863  for (const auto shard : getPhysicalTablesDescriptors(&td)) {
1864  auto shard_cd = cd;
1865  addColumn(*shard, shard_cd);
1866  }
1867  }
1869  addDictionary(cd);
1870  }
1871 
1873  "INSERT INTO mapd_columns (tableid, columnid, name, coltype, colsubtype, coldim, "
1874  "colscale, is_notnull, "
1875  "compression, comp_param, size, chunks, is_systemcol, is_virtualcol, virtual_expr, "
1876  "is_deletedcol) "
1877  "VALUES (?, "
1878  "(SELECT max(columnid) + 1 FROM mapd_columns WHERE tableid = ?), "
1879  "?, ?, ?, "
1880  "?, "
1881  "?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
1882  std::vector<std::string>{std::to_string(td.tableId),
1883  std::to_string(td.tableId),
1884  cd.columnName,
1893  "",
1896  cd.virtualExpr,
1898 
1900  "UPDATE mapd_tables SET ncolumns = ncolumns + 1 WHERE tableid = ?",
1901  std::vector<std::string>{std::to_string(td.tableId)});
1902 
1904  "SELECT columnid FROM mapd_columns WHERE tableid = ? AND name = ?",
1905  std::vector<std::string>{std::to_string(td.tableId), cd.columnName});
1906  cd.columnId = sqliteConnector_.getData<int>(0, 0);
1907 
1908  ++tableDescriptorMapById_[td.tableId]->nColumns;
1909  auto ncd = new ColumnDescriptor(cd);
1912  columnDescriptorsForRoll.emplace_back(nullptr, ncd);
1913 }
1914 
1916  cat_write_lock write_lock(this);
1918  // caller must handle sqlite/chunk transaction TOGETHER
1920  "DELETE FROM mapd_columns where tableid = ? and columnid = ?",
1921  std::vector<std::string>{std::to_string(td.tableId), std::to_string(cd.columnId)});
1922 
1924  "UPDATE mapd_tables SET ncolumns = ncolumns - 1 WHERE tableid = ?",
1925  std::vector<std::string>{std::to_string(td.tableId)});
1926 
1927  ColumnDescriptorMap::iterator columnDescIt =
1929  CHECK(columnDescIt != columnDescriptorMap_.end());
1930 
1931  columnDescriptorsForRoll.emplace_back(columnDescIt->second, nullptr);
1932 
1933  columnDescriptorMap_.erase(columnDescIt);
1935  --tableDescriptorMapById_[td.tableId]->nColumns;
1936  // for each shard
1937  if (td.nShards > 0 && td.shard < 0) {
1938  for (const auto shard : getPhysicalTablesDescriptors(&td)) {
1939  const auto shard_cd = getMetadataForColumn(shard->tableId, cd.columnId);
1940  CHECK(shard_cd);
1941  dropColumn(*shard, *shard_cd);
1942  }
1943  }
1944 }
1945 
1946 void Catalog::roll(const bool forward) {
1947  cat_write_lock write_lock(this);
1948  std::set<const TableDescriptor*> tds;
1949 
1950  for (const auto& cdr : columnDescriptorsForRoll) {
1951  auto ocd = cdr.first;
1952  auto ncd = cdr.second;
1953  CHECK(ocd || ncd);
1954  auto tabDescIt = tableDescriptorMapById_.find((ncd ? ncd : ocd)->tableId);
1955  CHECK(tableDescriptorMapById_.end() != tabDescIt);
1956  auto td = tabDescIt->second;
1957  auto& vc = td->columnIdBySpi_;
1958  if (forward) {
1959  if (ocd) {
1960  if (nullptr == ncd ||
1961  ncd->columnType.get_comp_param() != ocd->columnType.get_comp_param()) {
1962  delDictionary(*ocd);
1963  }
1964 
1965  vc.erase(std::remove(vc.begin(), vc.end(), ocd->columnId), vc.end());
1966 
1967  delete ocd;
1968  }
1969  if (ncd) {
1970  // append columnId if its new and not phy geo
1971  if (vc.end() == std::find(vc.begin(), vc.end(), ncd->columnId)) {
1972  if (!ncd->isGeoPhyCol) {
1973  vc.push_back(ncd->columnId);
1974  }
1975  }
1976  }
1977  tds.insert(td);
1978  } else {
1979  if (ocd) {
1980  columnDescriptorMap_[ColumnKey(ocd->tableId, to_upper(ocd->columnName))] = ocd;
1981  columnDescriptorMapById_[ColumnIdKey(ocd->tableId, ocd->columnId)] = ocd;
1982  }
1983  // roll back the dict of new column
1984  if (ncd) {
1985  columnDescriptorMap_.erase(ColumnKey(ncd->tableId, to_upper(ncd->columnName)));
1986  columnDescriptorMapById_.erase(ColumnIdKey(ncd->tableId, ncd->columnId));
1987  if (nullptr == ocd ||
1988  ocd->columnType.get_comp_param() != ncd->columnType.get_comp_param()) {
1989  delDictionary(*ncd);
1990  }
1991  delete ncd;
1992  }
1993  }
1994  }
1995  columnDescriptorsForRoll.clear();
1996 
1997  if (forward) {
1998  for (const auto td : tds) {
1999  calciteMgr_->updateMetadata(currentDB_.dbName, td->tableName);
2000  }
2001  }
2002 }
2003 
2005  list<ColumnDescriptor>& columns) {
2006  const auto& col_ti = cd.columnType;
2007  if (IS_GEO(col_ti.get_type())) {
2008  switch (col_ti.get_type()) {
2009  case kPOINT: {
2010  ColumnDescriptor physical_cd_coords(true);
2011  physical_cd_coords.columnName = cd.columnName + "_coords";
2012  SQLTypeInfo coords_ti = SQLTypeInfo(kARRAY, col_ti.get_notnull());
2013  // Raw data: compressed/uncompressed coords
2014  coords_ti.set_subtype(kTINYINT);
2015  size_t unit_size;
2016  if (col_ti.get_compression() == kENCODING_GEOINT &&
2017  col_ti.get_comp_param() == 32) {
2018  unit_size = 4 * sizeof(int8_t);
2019  } else {
2020  CHECK(col_ti.get_compression() == kENCODING_NONE);
2021  unit_size = 8 * sizeof(int8_t);
2022  }
2023  coords_ti.set_size(2 * unit_size);
2024  physical_cd_coords.columnType = coords_ti;
2025  columns.push_back(physical_cd_coords);
2026 
2027  // If adding more physical columns - update SQLTypeInfo::get_physical_cols()
2028 
2029  break;
2030  }
2031  case kLINESTRING: {
2032  ColumnDescriptor physical_cd_coords(true);
2033  physical_cd_coords.columnName = cd.columnName + "_coords";
2034  SQLTypeInfo coords_ti = SQLTypeInfo(kARRAY, col_ti.get_notnull());
2035  // Raw data: compressed/uncompressed coords
2036  coords_ti.set_subtype(kTINYINT);
2037  physical_cd_coords.columnType = coords_ti;
2038  columns.push_back(physical_cd_coords);
2039 
2040  ColumnDescriptor physical_cd_bounds(true);
2041  physical_cd_bounds.columnName = cd.columnName + "_bounds";
2042  SQLTypeInfo bounds_ti = SQLTypeInfo(kARRAY, col_ti.get_notnull());
2043  bounds_ti.set_subtype(kDOUBLE);
2044  bounds_ti.set_size(4 * sizeof(double));
2045  physical_cd_bounds.columnType = bounds_ti;
2046  columns.push_back(physical_cd_bounds);
2047 
2048  // If adding more physical columns - update SQLTypeInfo::get_physical_cols()
2049 
2050  break;
2051  }
2052  case kPOLYGON: {
2053  ColumnDescriptor physical_cd_coords(true);
2054  physical_cd_coords.columnName = cd.columnName + "_coords";
2055  SQLTypeInfo coords_ti = SQLTypeInfo(kARRAY, col_ti.get_notnull());
2056  // Raw data: compressed/uncompressed coords
2057  coords_ti.set_subtype(kTINYINT);
2058  physical_cd_coords.columnType = coords_ti;
2059  columns.push_back(physical_cd_coords);
2060 
2061  ColumnDescriptor physical_cd_ring_sizes(true);
2062  physical_cd_ring_sizes.columnName = cd.columnName + "_ring_sizes";
2063  SQLTypeInfo ring_sizes_ti = SQLTypeInfo(kARRAY, col_ti.get_notnull());
2064  ring_sizes_ti.set_subtype(kINT);
2065  physical_cd_ring_sizes.columnType = ring_sizes_ti;
2066  columns.push_back(physical_cd_ring_sizes);
2067 
2068  ColumnDescriptor physical_cd_bounds(true);
2069  physical_cd_bounds.columnName = cd.columnName + "_bounds";
2070  SQLTypeInfo bounds_ti = SQLTypeInfo(kARRAY, col_ti.get_notnull());
2071  bounds_ti.set_subtype(kDOUBLE);
2072  bounds_ti.set_size(4 * sizeof(double));
2073  physical_cd_bounds.columnType = bounds_ti;
2074  columns.push_back(physical_cd_bounds);
2075 
2076  ColumnDescriptor physical_cd_render_group(true);
2077  physical_cd_render_group.columnName = cd.columnName + "_render_group";
2078  SQLTypeInfo render_group_ti = SQLTypeInfo(kINT, col_ti.get_notnull());
2079  physical_cd_render_group.columnType = render_group_ti;
2080  columns.push_back(physical_cd_render_group);
2081 
2082  // If adding more physical columns - update SQLTypeInfo::get_physical_cols()
2083 
2084  break;
2085  }
2086  case kMULTIPOLYGON: {
2087  ColumnDescriptor physical_cd_coords(true);
2088  physical_cd_coords.columnName = cd.columnName + "_coords";
2089  SQLTypeInfo coords_ti = SQLTypeInfo(kARRAY, col_ti.get_notnull());
2090  // Raw data: compressed/uncompressed coords
2091  coords_ti.set_subtype(kTINYINT);
2092  physical_cd_coords.columnType = coords_ti;
2093  columns.push_back(physical_cd_coords);
2094 
2095  ColumnDescriptor physical_cd_ring_sizes(true);
2096  physical_cd_ring_sizes.columnName = cd.columnName + "_ring_sizes";
2097  SQLTypeInfo ring_sizes_ti = SQLTypeInfo(kARRAY, col_ti.get_notnull());
2098  ring_sizes_ti.set_subtype(kINT);
2099  physical_cd_ring_sizes.columnType = ring_sizes_ti;
2100  columns.push_back(physical_cd_ring_sizes);
2101 
2102  ColumnDescriptor physical_cd_poly_rings(true);
2103  physical_cd_poly_rings.columnName = cd.columnName + "_poly_rings";
2104  SQLTypeInfo poly_rings_ti = SQLTypeInfo(kARRAY, col_ti.get_notnull());
2105  poly_rings_ti.set_subtype(kINT);
2106  physical_cd_poly_rings.columnType = poly_rings_ti;
2107  columns.push_back(physical_cd_poly_rings);
2108 
2109  ColumnDescriptor physical_cd_bounds(true);
2110  physical_cd_bounds.columnName = cd.columnName + "_bounds";
2111  SQLTypeInfo bounds_ti = SQLTypeInfo(kARRAY, col_ti.get_notnull());
2112  bounds_ti.set_subtype(kDOUBLE);
2113  bounds_ti.set_size(4 * sizeof(double));
2114  physical_cd_bounds.columnType = bounds_ti;
2115  columns.push_back(physical_cd_bounds);
2116 
2117  ColumnDescriptor physical_cd_render_group(true);
2118  physical_cd_render_group.columnName = cd.columnName + "_render_group";
2119  SQLTypeInfo render_group_ti = SQLTypeInfo(kINT, col_ti.get_notnull());
2120  physical_cd_render_group.columnType = render_group_ti;
2121  columns.push_back(physical_cd_render_group);
2122 
2123  // If adding more physical columns - update SQLTypeInfo::get_physical_cols()
2124 
2125  break;
2126  }
2127  default:
2128  throw runtime_error("Unrecognized geometry type.");
2129  break;
2130  }
2131  }
2132 }
2133 
2134 namespace {
2136  auto timing_type_entry =
2138  CHECK(timing_type_entry != foreign_table.options.end());
2139  if (timing_type_entry->second ==
2141  return foreign_storage::get_next_refresh_time(foreign_table.options);
2142  }
2144 }
2145 } // namespace
2146 
2148  TableDescriptor& td,
2149  const list<ColumnDescriptor>& cols,
2150  const std::vector<Parser::SharedDictionaryDef>& shared_dict_defs,
2151  bool isLogicalTable) {
2152  cat_write_lock write_lock(this);
2153  list<ColumnDescriptor> cds = cols;
2154  list<DictDescriptor> dds;
2155  std::set<std::string> toplevel_column_names;
2156  list<ColumnDescriptor> columns;
2157 
2158  if (!td.storageType.empty() &&
2161  throw std::runtime_error("Only temporary tables can be backed by foreign storage.");
2162  }
2164  }
2165 
2166  for (auto cd : cds) {
2167  if (cd.columnName == "rowid") {
2168  throw std::runtime_error(
2169  "Cannot create column with name rowid. rowid is a system defined column.");
2170  }
2171  columns.push_back(cd);
2172  toplevel_column_names.insert(cd.columnName);
2173  if (cd.columnType.is_geometry()) {
2174  expandGeoColumn(cd, columns);
2175  }
2176  }
2177  cds.clear();
2178 
2179  ColumnDescriptor cd;
2180  // add row_id column -- Must be last column in the table
2181  cd.columnName = "rowid";
2182  cd.isSystemCol = true;
2183  cd.columnType = SQLTypeInfo(kBIGINT, true);
2184 #ifdef MATERIALIZED_ROWID
2185  cd.isVirtualCol = false;
2186 #else
2187  cd.isVirtualCol = true;
2188  cd.virtualExpr = "MAPD_FRAG_ID * MAPD_ROWS_PER_FRAG + MAPD_FRAG_ROW_ID";
2189 #endif
2190  columns.push_back(cd);
2191  toplevel_column_names.insert(cd.columnName);
2192 
2193  if (td.hasDeletedCol) {
2194  ColumnDescriptor cd_del;
2195  cd_del.columnName = "$deleted$";
2196  cd_del.isSystemCol = true;
2197  cd_del.isVirtualCol = false;
2198  cd_del.columnType = SQLTypeInfo(kBOOLEAN, true);
2199  cd_del.isDeletedCol = true;
2200 
2201  columns.push_back(cd_del);
2202  }
2203 
2204  td.nColumns = columns.size();
2206  sqliteConnector_.query("BEGIN TRANSACTION");
2208  try {
2210  R"(INSERT INTO mapd_tables (name, userid, ncolumns, isview, fragments, frag_type, max_frag_rows, max_chunk_size, frag_page_size, max_rows, partitions, shard_column_id, shard, num_shards, sort_column_id, storage_type, max_rollback_epochs, key_metainfo) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?))",
2211  std::vector<std::string>{td.tableName,
2212  std::to_string(td.userId),
2214  std::to_string(td.isView),
2215  "",
2220  std::to_string(td.maxRows),
2221  td.partitions,
2223  std::to_string(td.shard),
2224  std::to_string(td.nShards),
2226  td.storageType,
2228  td.keyMetainfo});
2229 
2230  // now get the auto generated tableid
2232  "SELECT tableid FROM mapd_tables WHERE name = ?", td.tableName);
2233  td.tableId = sqliteConnector_.getData<int>(0, 0);
2234  int colId = 1;
2235  for (auto cd : columns) {
2237  const bool is_foreign_col =
2238  setColumnSharedDictionary(cd, cds, dds, td, shared_dict_defs);
2239  if (!is_foreign_col) {
2240  setColumnDictionary(cd, dds, td, isLogicalTable);
2241  }
2242  }
2243 
2244  if (toplevel_column_names.count(cd.columnName)) {
2245  // make up colId gap for sanity test (begin with 1 bc much code depends on it!)
2246  if (colId > 1) {
2247  colId += g_test_against_columnId_gap;
2248  }
2249  if (!cd.isGeoPhyCol) {
2250  td.columnIdBySpi_.push_back(colId);
2251  }
2252  }
2253 
2255  "INSERT INTO mapd_columns (tableid, columnid, name, coltype, colsubtype, "
2256  "coldim, colscale, is_notnull, "
2257  "compression, comp_param, size, chunks, is_systemcol, is_virtualcol, "
2258  "virtual_expr, is_deletedcol) "
2259  "VALUES (?, ?, ?, ?, ?, "
2260  "?, "
2261  "?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
2262  std::vector<std::string>{std::to_string(td.tableId),
2263  std::to_string(colId),
2264  cd.columnName,
2273  "",
2276  cd.virtualExpr,
2278  cd.tableId = td.tableId;
2279  cd.columnId = colId++;
2280  cds.push_back(cd);
2281  }
2282  if (td.isView) {
2284  "INSERT INTO mapd_views (tableid, sql) VALUES (?,?)",
2285  std::vector<std::string>{std::to_string(td.tableId), td.viewSQL});
2286  }
2288  auto& foreign_table = dynamic_cast<foreign_storage::ForeignTable&>(td);
2289  foreign_table.next_refresh_time = get_next_refresh_time(foreign_table);
2291  "INSERT INTO omnisci_foreign_tables (table_id, server_id, options, "
2292  "last_refresh_time, next_refresh_time) VALUES (?, ?, ?, ?, ?)",
2293  std::vector<std::string>{std::to_string(foreign_table.tableId),
2294  std::to_string(foreign_table.foreign_server->id),
2295  foreign_table.getOptionsAsJsonString(),
2296  std::to_string(foreign_table.last_refresh_time),
2297  std::to_string(foreign_table.next_refresh_time)});
2298  }
2299  } catch (std::exception& e) {
2300  sqliteConnector_.query("ROLLBACK TRANSACTION");
2301  throw;
2302  }
2303  } else { // Temporary table
2304  td.tableId = nextTempTableId_++;
2305  int colId = 1;
2306  for (auto cd : columns) {
2308  const bool is_foreign_col =
2309  setColumnSharedDictionary(cd, cds, dds, td, shared_dict_defs);
2310 
2311  if (!is_foreign_col) {
2312  // Create a new temporary dictionary
2313  std::string fileName("");
2314  std::string folderPath("");
2316  nextTempDictId_++;
2317  DictDescriptor dd(dict_ref,
2318  fileName,
2320  false,
2321  1,
2322  folderPath,
2323  true); // Is dictName (2nd argument) used?
2324  dds.push_back(dd);
2325  if (!cd.columnType.is_array()) {
2327  }
2328  cd.columnType.set_comp_param(dict_ref.dictId);
2329  }
2330  }
2331  if (toplevel_column_names.count(cd.columnName)) {
2332  // make up colId gap for sanity test (begin with 1 bc much code depends on it!)
2333  if (colId > 1) {
2334  colId += g_test_against_columnId_gap;
2335  }
2336  if (!cd.isGeoPhyCol) {
2337  td.columnIdBySpi_.push_back(colId);
2338  }
2339  }
2340  cd.tableId = td.tableId;
2341  cd.columnId = colId++;
2342  cds.push_back(cd);
2343  }
2344 
2346  serializeTableJsonUnlocked(&td, cds);
2347  }
2348  }
2349 
2350  try {
2351  addTableToMap(&td, cds, dds);
2352  calciteMgr_->updateMetadata(currentDB_.dbName, td.tableName);
2353  if (!td.storageType.empty() && td.storageType != StorageType::FOREIGN_TABLE) {
2355  }
2356  } catch (std::exception& e) {
2357  sqliteConnector_.query("ROLLBACK TRANSACTION");
2358  removeTableFromMap(td.tableName, td.tableId, true);
2359  throw;
2360  }
2361  sqliteConnector_.query("END TRANSACTION");
2362 }
2363 
2365  const std::list<ColumnDescriptor>& cds) const {
2366  // relies on the catalog write lock
2367  using namespace rapidjson;
2368 
2369  VLOG(1) << "Serializing temporary table " << td->tableName << " to JSON for Calcite.";
2370 
2371  const auto db_name = currentDB_.dbName;
2372  const auto file_path = table_json_filepath(basePath_, db_name);
2373 
2374  Document d;
2375  if (boost::filesystem::exists(file_path)) {
2376  // look for an existing file for this database
2377  std::ifstream reader(file_path.string());
2378  CHECK(reader.is_open());
2379  IStreamWrapper json_read_wrapper(reader);
2380  d.ParseStream(json_read_wrapper);
2381  } else {
2382  d.SetObject();
2383  }
2384  CHECK(d.IsObject());
2385  CHECK(!d.HasMember(StringRef(td->tableName.c_str())));
2386 
2387  Value table(kObjectType);
2388  table.AddMember(
2389  "name", Value().SetString(StringRef(td->tableName.c_str())), d.GetAllocator());
2390  table.AddMember("id", Value().SetInt(td->tableId), d.GetAllocator());
2391  table.AddMember("columns", Value(kArrayType), d.GetAllocator());
2392 
2393  for (const auto& cd : cds) {
2394  Value column(kObjectType);
2395  column.AddMember(
2396  "name", Value().SetString(StringRef(cd.columnName)), d.GetAllocator());
2397  column.AddMember("coltype",
2398  Value().SetInt(static_cast<int>(cd.columnType.get_type())),
2399  d.GetAllocator());
2400  column.AddMember("colsubtype",
2401  Value().SetInt(static_cast<int>(cd.columnType.get_subtype())),
2402  d.GetAllocator());
2403  column.AddMember(
2404  "coldim", Value().SetInt(cd.columnType.get_dimension()), d.GetAllocator());
2405  column.AddMember(
2406  "colscale", Value().SetInt(cd.columnType.get_scale()), d.GetAllocator());
2407  column.AddMember(
2408  "is_notnull", Value().SetBool(cd.columnType.get_notnull()), d.GetAllocator());
2409  column.AddMember("is_systemcol", Value().SetBool(cd.isSystemCol), d.GetAllocator());
2410  column.AddMember("is_virtualcol", Value().SetBool(cd.isVirtualCol), d.GetAllocator());
2411  column.AddMember("is_deletedcol", Value().SetBool(cd.isDeletedCol), d.GetAllocator());
2412  table["columns"].PushBack(column, d.GetAllocator());
2413  }
2414  d.AddMember(StringRef(td->tableName.c_str()), table, d.GetAllocator());
2415 
2416  // Overwrite the existing file
2417  std::ofstream writer(file_path.string(), std::ios::trunc | std::ios::out);
2418  CHECK(writer.is_open());
2419  OStreamWrapper json_wrapper(writer);
2420 
2421  Writer<OStreamWrapper> json_writer(json_wrapper);
2422  d.Accept(json_writer);
2423  writer.close();
2424 }
2425 
2426 void Catalog::dropTableFromJsonUnlocked(const std::string& table_name) const {
2427  // relies on the catalog write lock
2428  using namespace rapidjson;
2429 
2430  VLOG(1) << "Dropping temporary table " << table_name << " to JSON for Calcite.";
2431 
2432  const auto db_name = currentDB_.dbName;
2433  const auto file_path = table_json_filepath(basePath_, db_name);
2434 
2435  CHECK(boost::filesystem::exists(file_path));
2436  Document d;
2437 
2438  std::ifstream reader(file_path.string());
2439  CHECK(reader.is_open());
2440  IStreamWrapper json_read_wrapper(reader);
2441  d.ParseStream(json_read_wrapper);
2442 
2443  CHECK(d.IsObject());
2444  auto table_name_ref = StringRef(table_name.c_str());
2445  CHECK(d.HasMember(table_name_ref));
2446  CHECK(d.RemoveMember(table_name_ref));
2447 
2448  // Overwrite the existing file
2449  std::ofstream writer(file_path.string(), std::ios::trunc | std::ios::out);
2450  CHECK(writer.is_open());
2451  OStreamWrapper json_wrapper(writer);
2452 
2453  Writer<OStreamWrapper> json_writer(json_wrapper);
2454  d.Accept(json_writer);
2455  writer.close();
2456 }
2457 
2459  std::unique_ptr<foreign_storage::ForeignServer> foreign_server,
2460  bool if_not_exists) {
2461  cat_write_lock write_lock(this);
2463  createForeignServerNoLocks(std::move(foreign_server), if_not_exists);
2464 }
2465 
2467  std::unique_ptr<foreign_storage::ForeignServer> foreign_server,
2468  bool if_not_exists) {
2470  "SELECT name from omnisci_foreign_servers where name = ?",
2471  std::vector<std::string>{foreign_server->name});
2472 
2473  if (sqliteConnector_.getNumRows() == 0) {
2474  foreign_server->creation_time = std::time(nullptr);
2476  "INSERT INTO omnisci_foreign_servers (name, data_wrapper_type, owner_user_id, "
2477  "creation_time, "
2478  "options) "
2479  "VALUES (?, ?, ?, ?, ?)",
2480  std::vector<std::string>{foreign_server->name,
2481  foreign_server->data_wrapper_type,
2482  std::to_string(foreign_server->user_id),
2483  std::to_string(foreign_server->creation_time),
2484  foreign_server->getOptionsAsJsonString()});
2486  "SELECT id from omnisci_foreign_servers where name = ?",
2487  std::vector<std::string>{foreign_server->name});
2488  CHECK_EQ(sqliteConnector_.getNumRows(), size_t(1));
2489  foreign_server->id = sqliteConnector_.getData<int32_t>(0, 0);
2490  std::shared_ptr<foreign_storage::ForeignServer> foreign_server_shared =
2491  std::move(foreign_server);
2492  CHECK(foreignServerMap_.find(foreign_server_shared->name) == foreignServerMap_.end())
2493  << "Attempting to insert a foreign server into foreign server map that already "
2494  "exists.";
2495  foreignServerMap_[foreign_server_shared->name] = foreign_server_shared;
2496  foreignServerMapById_[foreign_server_shared->id] = foreign_server_shared;
2497  } else if (!if_not_exists) {
2498  throw std::runtime_error{"A foreign server with name \"" + foreign_server->name +
2499  "\" already exists."};
2500  }
2501 }
2502 
2504  const std::string& server_name) const {
2505  foreign_storage::ForeignServer* foreign_server = nullptr;
2506  cat_read_lock read_lock(this);
2507 
2508  if (foreignServerMap_.find(server_name) != foreignServerMap_.end()) {
2509  foreign_server = foreignServerMap_.find(server_name)->second.get();
2510  }
2511  return foreign_server;
2512 }
2513 
2514 const std::unique_ptr<const foreign_storage::ForeignServer>
2515 Catalog::getForeignServerFromStorage(const std::string& server_name) {
2516  std::unique_ptr<foreign_storage::ForeignServer> foreign_server = nullptr;
2519  "SELECT id, name, data_wrapper_type, options, owner_user_id, creation_time "
2520  "FROM omnisci_foreign_servers WHERE name = ?",
2521  std::vector<std::string>{server_name});
2522  if (sqliteConnector_.getNumRows() > 0) {
2523  foreign_server = std::make_unique<foreign_storage::ForeignServer>(
2524  sqliteConnector_.getData<int>(0, 0),
2525  sqliteConnector_.getData<std::string>(0, 1),
2526  sqliteConnector_.getData<std::string>(0, 2),
2527  sqliteConnector_.getData<std::string>(0, 3),
2528  sqliteConnector_.getData<std::int32_t>(0, 4),
2529  sqliteConnector_.getData<std::int32_t>(0, 5));
2530  }
2531  return foreign_server;
2532 }
2533 
2534 const std::unique_ptr<const foreign_storage::ForeignTable>
2536  std::unique_ptr<foreign_storage::ForeignTable> foreign_table = nullptr;
2539  "SELECT table_id, server_id, options, last_refresh_time, next_refresh_time from "
2540  "omnisci_foreign_tables WHERE table_id = ?",
2541  std::vector<std::string>{to_string(table_id)});
2542  auto num_rows = sqliteConnector_.getNumRows();
2543  if (num_rows > 0) {
2544  CHECK_EQ(size_t(1), num_rows);
2545  foreign_table = std::make_unique<foreign_storage::ForeignTable>(
2546  sqliteConnector_.getData<int>(0, 0),
2547  foreignServerMapById_[sqliteConnector_.getData<int32_t>(0, 1)].get(),
2548  sqliteConnector_.getData<std::string>(0, 2),
2549  sqliteConnector_.getData<int>(0, 3),
2550  sqliteConnector_.getData<int>(0, 4));
2551  }
2552  return foreign_table;
2553 }
2554 
2555 void Catalog::changeForeignServerOwner(const std::string& server_name,
2556  const int new_owner_id) {
2557  cat_write_lock write_lock(this);
2558  foreign_storage::ForeignServer* foreign_server =
2559  foreignServerMap_.find(server_name)->second.get();
2560  CHECK(foreign_server);
2561  setForeignServerProperty(server_name, "owner_user_id", std::to_string(new_owner_id));
2562  // update in-memory server
2563  foreign_server->user_id = new_owner_id;
2564 }
2565 
2566 void Catalog::setForeignServerDataWrapper(const std::string& server_name,
2567  const std::string& data_wrapper) {
2568  cat_write_lock write_lock(this);
2569  auto data_wrapper_type = to_upper(data_wrapper);
2570  // update in-memory server
2571  foreign_storage::ForeignServer* foreign_server =
2572  foreignServerMap_.find(server_name)->second.get();
2573  CHECK(foreign_server);
2574  std::string saved_data_wrapper_type = foreign_server->data_wrapper_type;
2575  foreign_server->data_wrapper_type = data_wrapper_type;
2576  try {
2577  foreign_server->validate();
2578  } catch (const std::exception& e) {
2579  // validation did not succeed:
2580  // revert to saved data_wrapper_type & throw exception
2581  foreign_server->data_wrapper_type = saved_data_wrapper_type;
2582  throw;
2583  }
2584  setForeignServerProperty(server_name, "data_wrapper_type", data_wrapper_type);
2585 }
2586 
2587 void Catalog::setForeignServerOptions(const std::string& server_name,
2588  const std::string& options) {
2589  cat_write_lock write_lock(this);
2590  // update in-memory server
2591  foreign_storage::ForeignServer* foreign_server =
2592  foreignServerMap_.find(server_name)->second.get();
2593  CHECK(foreign_server);
2594  auto saved_options = foreign_server->options;
2595  foreign_server->populateOptionsMap(options, true);
2596  try {
2597  foreign_server->validate();
2598  } catch (const std::exception& e) {
2599  // validation did not succeed:
2600  // revert to saved options & throw exception
2601  foreign_server->options = saved_options;
2602  throw;
2603  }
2604  setForeignServerProperty(server_name, "options", options);
2605 }
2606 
2607 void Catalog::renameForeignServer(const std::string& server_name,
2608  const std::string& name) {
2609  cat_write_lock write_lock(this);
2610  auto foreign_server_it = foreignServerMap_.find(server_name);
2611  CHECK(foreign_server_it != foreignServerMap_.end());
2612  setForeignServerProperty(server_name, "name", name);
2613  auto foreign_server_shared = foreign_server_it->second;
2614  foreign_server_shared->name = name;
2615  foreignServerMap_[name] = foreign_server_shared;
2616  foreignServerMap_.erase(foreign_server_it);
2617 }
2618 
2619 void Catalog::dropForeignServer(const std::string& server_name) {
2620  cat_write_lock write_lock(this);
2622 
2624  "SELECT id from omnisci_foreign_servers where name = ?",
2625  std::vector<std::string>{server_name});
2626  auto num_rows = sqliteConnector_.getNumRows();
2627  if (num_rows > 0) {
2628  CHECK_EQ(size_t(1), num_rows);
2629  auto server_id = sqliteConnector_.getData<int32_t>(0, 0);
2631  "SELECT table_id from omnisci_foreign_tables where server_id = ?",
2632  std::to_string(server_id));
2633  if (sqliteConnector_.getNumRows() > 0) {
2634  throw std::runtime_error{"Foreign server \"" + server_name +
2635  "\" is referenced "
2636  "by existing foreign tables and cannot be dropped."};
2637  }
2638  sqliteConnector_.query("BEGIN TRANSACTION");
2639  try {
2641  "DELETE FROM omnisci_foreign_servers WHERE name = ?",
2642  std::vector<std::string>{server_name});
2643  } catch (const std::exception& e) {
2644  sqliteConnector_.query("ROLLBACK TRANSACTION");
2645  throw;
2646  }
2647  sqliteConnector_.query("END TRANSACTION");
2648  foreignServerMap_.erase(server_name);
2649  foreignServerMapById_.erase(server_id);
2650  }
2651 }
2652 
2654  const rapidjson::Value* filters,
2655  const UserMetadata& user,
2656  std::vector<const foreign_storage::ForeignServer*>& results) {
2657  sys_read_lock syscat_read_lock(&SysCatalog::instance());
2658  cat_read_lock read_lock(this);
2660  // Customer facing and internal SQlite names
2661  std::map<std::string, std::string> col_names{{"server_name", "name"},
2662  {"data_wrapper", "data_wrapper_type"},
2663  {"created_at", "creation_time"},
2664  {"options", "options"}};
2665 
2666  // TODO add "owner" when FSI privilege is implemented
2667  std::stringstream filter_string;
2668  std::vector<std::string> arguments;
2669 
2670  if (filters != nullptr) {
2671  // Create SQL WHERE clause for SQLite query
2672  int num_filters = 0;
2673  filter_string << " WHERE";
2674  for (auto& filter_def : filters->GetArray()) {
2675  if (num_filters > 0) {
2676  filter_string << " " << std::string(filter_def["chain"].GetString());
2677  ;
2678  }
2679 
2680  if (col_names.find(std::string(filter_def["attribute"].GetString())) ==
2681  col_names.end()) {
2682  throw std::runtime_error{"Attribute with name \"" +
2683  std::string(filter_def["attribute"].GetString()) +
2684  "\" does not exist."};
2685  }
2686 
2687  filter_string << " " << col_names[std::string(filter_def["attribute"].GetString())];
2688 
2689  bool equals_operator = false;
2690  if (std::strcmp(filter_def["operation"].GetString(), "EQUALS") == 0) {
2691  filter_string << " = ? ";
2692  equals_operator = true;
2693  } else {
2694  filter_string << " LIKE ? ";
2695  }
2696 
2697  bool timestamp_column =
2698  (std::strcmp(filter_def["attribute"].GetString(), "created_at") == 0);
2699 
2700  if (timestamp_column && !equals_operator) {
2701  throw std::runtime_error{"LIKE operator is incompatible with TIMESTAMP data"};
2702  }
2703 
2704  if (timestamp_column && equals_operator) {
2705  arguments.push_back(std::to_string(
2706  dateTimeParse<kTIMESTAMP>(filter_def["value"].GetString(), 0)));
2707  } else {
2708  arguments.push_back(filter_def["value"].GetString());
2709  }
2710 
2711  num_filters++;
2712  }
2713  }
2714  // Create select query for the omnisci_foreign_servers table
2715  std::string query = std::string("SELECT name from omnisci_foreign_servers ");
2716  query += filter_string.str();
2717 
2718  sqliteConnector_.query_with_text_params(query, arguments);
2719  auto num_rows = sqliteConnector_.getNumRows();
2720 
2721  if (sqliteConnector_.getNumRows() == 0)
2722  return;
2723 
2725  // Return pointers to objects
2726  results.reserve(num_rows);
2727  for (size_t row = 0; row < num_rows; ++row) {
2728  const foreign_storage::ForeignServer* foreign_server =
2729  getForeignServer(sqliteConnector_.getData<std::string>(row, 0));
2730 
2731  CHECK(foreign_server != nullptr);
2732 
2733  DBObject dbObject(foreign_server->name, ServerDBObjectType);
2734  dbObject.loadKey(*this);
2735  std::vector<DBObject> privObjects = {dbObject};
2736  if (!SysCatalog::instance().hasAnyPrivileges(user, privObjects)) {
2737  // skip server, as there are no privileges to access it
2738  continue;
2739  }
2740  results.push_back(foreign_server);
2741  }
2742 }
2743 
2744 // returns the table epoch or -1 if there is something wrong with the shared epoch
2745 int32_t Catalog::getTableEpoch(const int32_t db_id, const int32_t table_id) const {
2746  cat_read_lock read_lock(this);
2747  const auto td = getMetadataForTable(table_id, false);
2748  if (!td) {
2749  std::stringstream table_not_found_error_message;
2750  table_not_found_error_message << "Table (" << db_id << "," << table_id
2751  << ") not found";
2752  throw std::runtime_error(table_not_found_error_message.str());
2753  }
2754  const auto physicalTableIt = logicalToPhysicalTableMapById_.find(table_id);
2755  if (physicalTableIt != logicalToPhysicalTableMapById_.end()) {
2756  // check all shards have same checkpoint
2757  const auto physicalTables = physicalTableIt->second;
2758  CHECK(!physicalTables.empty());
2759  size_t curr_epoch{0}, first_epoch{0};
2760  int32_t first_table_id{0};
2761  bool are_epochs_inconsistent{false};
2762  for (size_t i = 0; i < physicalTables.size(); i++) {
2763  int32_t physical_tb_id = physicalTables[i];
2764  const TableDescriptor* phys_td = getMetadataForTable(physical_tb_id, false);
2765  CHECK(phys_td);
2766 
2767  curr_epoch = dataMgr_->getTableEpoch(db_id, physical_tb_id);
2768  LOG(INFO) << "Got sharded table epoch for db id: " << db_id
2769  << ", table id: " << physical_tb_id << ", epoch: " << curr_epoch;
2770  if (i == 0) {
2771  first_epoch = curr_epoch;
2772  first_table_id = physical_tb_id;
2773  } else if (first_epoch != curr_epoch) {
2774  are_epochs_inconsistent = true;
2775  LOG(ERROR) << "Epochs on shards do not all agree on table id: " << table_id
2776  << ", db id: " << db_id
2777  << ". First table (table id: " << first_table_id
2778  << ") has epoch: " << first_epoch << ". Table id: " << physical_tb_id
2779  << ", has inconsistent epoch: " << curr_epoch
2780  << ". See previous INFO logs for all epochs and their table ids.";
2781  }
2782  }
2783  if (are_epochs_inconsistent) {
2784  // oh dear the shards do not agree on the epoch for this table
2785  return -1;
2786  }
2787  return curr_epoch;
2788  } else {
2789  auto epoch = dataMgr_->getTableEpoch(db_id, table_id);
2790  LOG(INFO) << "Got table epoch for db id: " << db_id << ", table id: " << table_id
2791  << ", epoch: " << epoch;
2792  return epoch;
2793  }
2794 }
2795 
2796 void Catalog::setTableEpoch(const int db_id, const int table_id, int new_epoch) {
2797  cat_read_lock read_lock(this);
2798  LOG(INFO) << "Set table epoch db:" << db_id << " Table ID " << table_id
2799  << " back to new epoch " << new_epoch;
2800  const auto td = getMetadataForTable(table_id, false);
2801  if (!td) {
2802  std::stringstream table_not_found_error_message;
2803  table_not_found_error_message << "Table (" << db_id << "," << table_id
2804  << ") not found";
2805  throw std::runtime_error(table_not_found_error_message.str());
2806  }
2808  std::stringstream is_temp_table_error_message;
2809  is_temp_table_error_message << "Cannot set epoch on temporary table";
2810  throw std::runtime_error(is_temp_table_error_message.str());
2811  }
2812  File_Namespace::FileMgrParams file_mgr_params;
2813  file_mgr_params.epoch = new_epoch;
2814  file_mgr_params.max_rollback_epochs = td->maxRollbackEpochs;
2815 
2816  const auto physicalTableIt = logicalToPhysicalTableMapById_.find(table_id);
2817  if (physicalTableIt != logicalToPhysicalTableMapById_.end()) {
2818  const auto physicalTables = physicalTableIt->second;
2819  CHECK(!physicalTables.empty());
2820  for (size_t i = 0; i < physicalTables.size(); i++) {
2821  const int32_t physical_tb_id = physicalTables[i];
2822  const TableDescriptor* phys_td = getMetadataForTable(physical_tb_id, false);
2823  CHECK(phys_td);
2824  LOG(INFO) << "Set sharded table epoch db:" << db_id << " Table ID "
2825  << physical_tb_id << " back to new epoch " << new_epoch;
2826  // Should have table lock from caller so safe to do this after, avoids
2827  // having to repopulate data on error
2828  removeChunks(physical_tb_id);
2829  dataMgr_->getGlobalFileMgr()->setFileMgrParams(
2830  db_id, physical_tb_id, file_mgr_params);
2831  }
2832  } else { // not shared
2833  // Should have table lock from caller so safe to do this after, avoids
2834  // having to repopulate data on error
2835  removeChunks(table_id);
2836  dataMgr_->getGlobalFileMgr()->setFileMgrParams(db_id, table_id, file_mgr_params);
2837  }
2838 }
2839 
2841  const TableDescriptor* td,
2842  const TableDescriptorUpdateParams& table_update_params) {
2843  // Only called from parent alterTableParamMetadata, expect already to have catalog and
2844  // sqlite write locks
2845 
2846  // Sqlite transaction should have already been begun in parent alterTableCatalogMetadata
2847 
2849  CHECK(mutable_td);
2850  if (td->maxRollbackEpochs != table_update_params.maxRollbackEpochs) {
2852  "UPDATE mapd_tables SET max_rollback_epochs = ? WHERE tableid = ?",
2853  std::vector<std::string>{std::to_string(table_update_params.maxRollbackEpochs),
2854  std::to_string(td->tableId)});
2855  mutable_td->maxRollbackEpochs = table_update_params.maxRollbackEpochs;
2856  }
2857 }
2858 
2860  const TableDescriptorUpdateParams& table_update_params) {
2861  cat_write_lock write_lock(this);
2863  sqliteConnector_.query("BEGIN TRANSACTION");
2864  try {
2865  const auto physical_table_it = logicalToPhysicalTableMapById_.find(td->tableId);
2866  if (physical_table_it != logicalToPhysicalTableMapById_.end()) {
2867  const auto physical_tables = physical_table_it->second;
2868  CHECK(!physical_tables.empty());
2869  for (size_t i = 0; i < physical_tables.size(); i++) {
2870  int32_t physical_tb_id = physical_tables[i];
2871  const TableDescriptor* phys_td = getMetadataForTable(physical_tb_id, false);
2872  CHECK(phys_td);
2873  alterPhysicalTableMetadata(phys_td, table_update_params);
2874  }
2875  }
2876  alterPhysicalTableMetadata(td, table_update_params);
2877  } catch (std::exception& e) {
2878  sqliteConnector_.query("ROLLBACK TRANSACTION");
2879  LOG(FATAL) << "Table '" << td->tableName << "' catalog update failed";
2880  }
2881  sqliteConnector_.query("END TRANSACTION");
2882 }
2883 
2884 void Catalog::setMaxRollbackEpochs(const int32_t table_id,
2885  const int32_t max_rollback_epochs) {
2886  // Must be called from AlterTableParamStmt or other method that takes executor and
2887  // TableSchema locks
2888  cat_write_lock write_lock(this); // Consider only taking read lock for potentially
2889  // heavy table storage metadata rolloff operations
2890  if (max_rollback_epochs <= -1) {
2891  throw std::runtime_error("Cannot set max_rollback_epochs < 0.");
2892  }
2893  const auto td = getMetadataForTable(
2894  table_id, false); // Deep copy as there will be gap between read and write locks
2895  CHECK(td); // Existence should have already been checked in
2896  // ParserNode::AlterTableParmStmt
2897  TableDescriptorUpdateParams table_update_params(td);
2898  table_update_params.maxRollbackEpochs = max_rollback_epochs;
2899  if (table_update_params == td) { // Operator is overloaded to test for equality
2900  LOG(INFO) << "Setting max_rollback_epochs for table " << table_id
2901  << " to existing value, skipping operation";
2902  return;
2903  }
2904  File_Namespace::FileMgrParams file_mgr_params;
2905  file_mgr_params.epoch = -1; // Use existing epoch
2906  file_mgr_params.max_rollback_epochs = max_rollback_epochs;
2907  setTableFileMgrParams(table_id, file_mgr_params);
2908  // Unlock as alterTableCatalogMetadata will take write lock, and Catalog locks are not
2909  // upgradeable Should be safe as we have schema lock on this table
2911  alterTableMetadata(td, table_update_params);
2912 }
2913 
2915  const int table_id,
2916  const File_Namespace::FileMgrParams& file_mgr_params) {
2917  // Expects parent to have write lock
2918  const auto td = getMetadataForTable(table_id, false);
2919  const auto db_id = this->getDatabaseId();
2920  if (!td) {
2921  std::stringstream table_not_found_error_message;
2922  table_not_found_error_message << "Table (" << db_id << "," << table_id
2923  << ") not found";
2924  throw std::runtime_error(table_not_found_error_message.str());
2925  }
2927  std::stringstream is_temp_table_error_message;
2928  is_temp_table_error_message << "Cannot set storage params on temporary table";
2929  throw std::runtime_error(is_temp_table_error_message.str());
2930  }
2931  const auto physical_table_it = logicalToPhysicalTableMapById_.find(table_id);
2932  if (physical_table_it != logicalToPhysicalTableMapById_.end()) {
2933  const auto physical_tables = physical_table_it->second;
2934  CHECK(!physical_tables.empty());
2935  for (size_t i = 0; i < physical_tables.size(); i++) {
2936  const int32_t physical_tb_id = physical_tables[i];
2937  const TableDescriptor* phys_td = getMetadataForTable(physical_tb_id);
2938  CHECK(phys_td);
2939  removeChunks(physical_tb_id);
2940  dataMgr_->getGlobalFileMgr()->setFileMgrParams(
2941  db_id, physical_tb_id, file_mgr_params);
2942  }
2943  } else { // not shared
2944  removeChunks(table_id);
2945  dataMgr_->getGlobalFileMgr()->setFileMgrParams(db_id, table_id, file_mgr_params);
2946  }
2947 }
2948 
2949 std::vector<TableEpochInfo> Catalog::getTableEpochs(const int32_t db_id,
2950  const int32_t table_id) const {
2951  cat_read_lock read_lock(this);
2952  std::vector<TableEpochInfo> table_epochs;
2953  const auto physical_table_it = logicalToPhysicalTableMapById_.find(table_id);
2954  if (physical_table_it != logicalToPhysicalTableMapById_.end()) {
2955  const auto physical_tables = physical_table_it->second;
2956  CHECK(!physical_tables.empty());
2957 
2958  for (const auto physical_tb_id : physical_tables) {
2959  const auto phys_td = getMetadataForTableImpl(physical_tb_id, false);
2960  CHECK(phys_td);
2961 
2962  auto table_id = phys_td->tableId;
2963  auto epoch = dataMgr_->getTableEpoch(db_id, phys_td->tableId);
2964  table_epochs.emplace_back(table_id, epoch);
2965  LOG(INFO) << "Got sharded table epoch for db id: " << db_id
2966  << ", table id: " << table_id << ", epoch: " << epoch;
2967  }
2968  } else {
2969  auto epoch = dataMgr_->getTableEpoch(db_id, table_id);
2970  LOG(INFO) << "Got table epoch for db id: " << db_id << ", table id: " << table_id
2971  << ", epoch: " << epoch;
2972  table_epochs.emplace_back(table_id, epoch);
2973  }
2974  return table_epochs;
2975 }
2976 
2977 void Catalog::setTableEpochs(const int32_t db_id,
2978  const std::vector<TableEpochInfo>& table_epochs) {
2979  const auto td = getMetadataForTable(table_epochs[0].table_id, false);
2980  CHECK(td);
2981  File_Namespace::FileMgrParams file_mgr_params;
2982  file_mgr_params.max_rollback_epochs = td->maxRollbackEpochs;
2983 
2984  cat_read_lock read_lock(this);
2985  for (const auto& table_epoch_info : table_epochs) {
2986  removeChunks(table_epoch_info.table_id);
2987  file_mgr_params.epoch = table_epoch_info.table_epoch;
2988  dataMgr_->getGlobalFileMgr()->setFileMgrParams(
2989  db_id, table_epoch_info.table_id, file_mgr_params);
2990  LOG(INFO) << "Set table epoch for db id: " << db_id
2991  << ", table id: " << table_epoch_info.table_id
2992  << ", back to epoch: " << table_epoch_info.table_epoch;
2993  }
2994 }
2995 
2996 namespace {
2997 std::string table_epochs_to_string(const std::vector<TableEpochInfo>& table_epochs) {
2998  std::string table_epochs_str{"["};
2999  bool first_entry{true};
3000  for (const auto& table_epoch : table_epochs) {
3001  if (first_entry) {
3002  first_entry = false;
3003  } else {
3004  table_epochs_str += ", ";
3005  }
3006  table_epochs_str += "(table_id: " + std::to_string(table_epoch.table_id) +
3007  ", epoch: " + std::to_string(table_epoch.table_epoch) + ")";
3008  }
3009  table_epochs_str += "]";
3010  return table_epochs_str;
3011 }
3012 } // namespace
3013 
3015  const int32_t db_id,
3016  const std::vector<TableEpochInfo>& table_epochs) {
3017  try {
3018  setTableEpochs(db_id, table_epochs);
3019  } catch (std::exception& e) {
3020  LOG(ERROR) << "An error occurred when attempting to set table epochs. DB id: "
3021  << db_id << ", Table epochs: " << table_epochs_to_string(table_epochs)
3022  << ", Error: " << e.what();
3023  }
3024 }
3025 
3027  cat_read_lock read_lock(this);
3028  const auto it = deletedColumnPerTable_.find(td);
3029  return it != deletedColumnPerTable_.end() ? it->second : nullptr;
3030 }
3031 
3033  int delete_column_id) const {
3034  // check if there are rows deleted by examining the deletedColumn metadata
3035  CHECK(td);
3036 
3037  if (table_is_temporary(td)) {
3038  auto fragmenter = td->fragmenter;
3039  CHECK(fragmenter);
3040  return fragmenter->hasDeletedRows(delete_column_id);
3041  } else {
3042  ChunkKey chunk_key_prefix = {currentDB_.dbId, td->tableId, delete_column_id};
3043  ChunkMetadataVector chunk_metadata_vec;
3044  dataMgr_->getChunkMetadataVecForKeyPrefix(chunk_metadata_vec, chunk_key_prefix);
3045  int64_t chunk_max{0};
3046 
3047  for (auto chunk_metadata : chunk_metadata_vec) {
3048  chunk_max = chunk_metadata.second->chunkStats.max.tinyintval;
3049  // delete has occured
3050  if (chunk_max == 1) {
3051  return true;
3052  }
3053  }
3054  return false;
3055  }
3056 }
3057 
3059  const TableDescriptor* td) const {
3060  cat_read_lock read_lock(this);
3061 
3062  const auto it = deletedColumnPerTable_.find(td);
3063  // if not a table that supports delete return nullptr, nothing more to do
3064  if (it == deletedColumnPerTable_.end()) {
3065  return nullptr;
3066  }
3067  const ColumnDescriptor* cd = it->second;
3068 
3069  const auto physicalTableIt = logicalToPhysicalTableMapById_.find(td->tableId);
3070 
3071  if (physicalTableIt != logicalToPhysicalTableMapById_.end()) {
3072  // check all shards
3073  const auto physicalTables = physicalTableIt->second;
3074  CHECK(!physicalTables.empty());
3075  for (size_t i = 0; i < physicalTables.size(); i++) {
3076  int32_t physical_tb_id = physicalTables[i];
3077  const TableDescriptor* phys_td = getMetadataForTable(physical_tb_id);
3078  CHECK(phys_td);
3079  if (checkMetadataForDeletedRecs(phys_td, cd->columnId)) {
3080  return cd;
3081  }
3082  }
3083  } else {
3084  if (checkMetadataForDeletedRecs(td, cd->columnId)) {
3085  return cd;
3086  }
3087  }
3088  // no deletes so far recorded in metadata
3089  return nullptr;
3090 }
3091 
3093  cat_write_lock write_lock(this);
3094  setDeletedColumnUnlocked(td, cd);
3095 }
3096 
3098  const ColumnDescriptor* cd) {
3099  cat_write_lock write_lock(this);
3100  const auto it_ok = deletedColumnPerTable_.emplace(td, cd);
3101  CHECK(it_ok.second);
3102 }
3103 
3104 namespace {
3105 
3107  const Catalog& cat,
3108  const Parser::SharedDictionaryDef& shared_dict_def) {
3109  const auto& table_name = shared_dict_def.get_foreign_table();
3110  const auto td = cat.getMetadataForTable(table_name);
3111  CHECK(td);
3112  const auto& foreign_col_name = shared_dict_def.get_foreign_column();
3113  return cat.getMetadataForColumn(td->tableId, foreign_col_name);
3114 }
3115 
3116 } // namespace
3117 
3119  Parser::SharedDictionaryDef shared_dict_def,
3120  const bool persist_reference) {
3121  cat_write_lock write_lock(this);
3122  const auto foreign_ref_col = get_foreign_col(*this, shared_dict_def);
3123  CHECK(foreign_ref_col);
3124  referencing_column.columnType = foreign_ref_col->columnType;
3125  const int dict_id = referencing_column.columnType.get_comp_param();
3126  const DictRef dict_ref(currentDB_.dbId, dict_id);
3127  const auto dictIt = dictDescriptorMapByRef_.find(dict_ref);
3128  CHECK(dictIt != dictDescriptorMapByRef_.end());
3129  const auto& dd = dictIt->second;
3130  CHECK_GE(dd->refcount, 1);
3131  ++dd->refcount;
3132  if (persist_reference) {
3135  "UPDATE mapd_dictionaries SET refcount = refcount + 1 WHERE dictid = ?",
3136  {std::to_string(dict_id)});
3137  }
3138 }
3139 
3141  ColumnDescriptor& cd,
3142  std::list<ColumnDescriptor>& cdd,
3143  std::list<DictDescriptor>& dds,
3144  const TableDescriptor td,
3145  const std::vector<Parser::SharedDictionaryDef>& shared_dict_defs) {
3146  cat_write_lock write_lock(this);
3148 
3149  if (shared_dict_defs.empty()) {
3150  return false;
3151  }
3152  for (const auto& shared_dict_def : shared_dict_defs) {
3153  // check if the current column is a referencing column
3154  const auto& column = shared_dict_def.get_column();
3155  if (cd.columnName == column) {
3156  if (!shared_dict_def.get_foreign_table().compare(td.tableName)) {
3157  // Dictionaries are being shared in table to be created
3158  const auto& ref_column = shared_dict_def.get_foreign_column();
3159  auto colIt =
3160  std::find_if(cdd.begin(), cdd.end(), [ref_column](const ColumnDescriptor it) {
3161  return !ref_column.compare(it.columnName);
3162  });
3163  CHECK(colIt != cdd.end());
3164  cd.columnType = colIt->columnType;
3165 
3166  const int dict_id = colIt->columnType.get_comp_param();
3167  CHECK_GE(dict_id, 1);
3168  auto dictIt = std::find_if(
3169  dds.begin(), dds.end(), [this, dict_id](const DictDescriptor it) {
3170  return it.dictRef.dbId == this->currentDB_.dbId &&
3171  it.dictRef.dictId == dict_id;
3172  });
3173  if (dictIt != dds.end()) {
3174  // There exists dictionary definition of a dictionary column
3175  CHECK_GE(dictIt->refcount, 1);
3176  ++dictIt->refcount;
3177  if (!table_is_temporary(&td)) {
3178  // Persist reference count
3180  "UPDATE mapd_dictionaries SET refcount = refcount + 1 WHERE dictid = ?",
3181  {std::to_string(dict_id)});
3182  }
3183  } else {
3184  // The dictionary is referencing a column which is referencing a column in
3185  // diffrent table
3186  auto root_dict_def = compress_reference_path(shared_dict_def, shared_dict_defs);
3187  addReferenceToForeignDict(cd, root_dict_def, !table_is_temporary(&td));
3188  }
3189  } else {
3190  const auto& foreign_table_name = shared_dict_def.get_foreign_table();
3191  const auto foreign_td = getMetadataForTable(foreign_table_name, false);
3192  if (table_is_temporary(foreign_td)) {
3193  if (!table_is_temporary(&td)) {
3194  throw std::runtime_error(
3195  "Only temporary tables can share dictionaries with other temporary "
3196  "tables.");
3197  }
3198  addReferenceToForeignDict(cd, shared_dict_def, false);
3199  } else {
3200  addReferenceToForeignDict(cd, shared_dict_def, !table_is_temporary(&td));
3201  }
3202  }
3203  return true;
3204  }
3205  }
3206  return false;
3207 }
3208 
3210  std::list<DictDescriptor>& dds,
3211  const TableDescriptor& td,
3212  const bool isLogicalTable) {
3213  cat_write_lock write_lock(this);
3214 
3215  std::string dictName{"Initial_key"};
3216  int dictId{0};
3217  std::string folderPath;
3218  if (isLogicalTable) {
3220 
3222  "INSERT INTO mapd_dictionaries (name, nbits, is_shared, refcount) VALUES (?, ?, "
3223  "?, 1)",
3224  std::vector<std::string>{
3225  dictName, std::to_string(cd.columnType.get_comp_param()), "0"});
3227  "SELECT dictid FROM mapd_dictionaries WHERE name = ?", dictName);
3228  dictId = sqliteConnector_.getData<int>(0, 0);
3229  dictName = td.tableName + "_" + cd.columnName + "_dict" + std::to_string(dictId);
3231  "UPDATE mapd_dictionaries SET name = ? WHERE name = 'Initial_key'", dictName);
3232  folderPath = basePath_ + "/mapd_data/DB_" + std::to_string(currentDB_.dbId) +
3233  "_DICT_" + std::to_string(dictId);
3234  }
3236  dictId,
3237  dictName,
3239  false,
3240  1,
3241  folderPath,
3242  false);
3243  dds.push_back(dd);
3244  if (!cd.columnType.is_array()) {
3246  }
3247  cd.columnType.set_comp_param(dictId);
3248 }
3249 
3251  TableDescriptor& td,
3252  const list<ColumnDescriptor>& cols,
3253  const std::vector<Parser::SharedDictionaryDef>& shared_dict_defs) {
3254  cat_write_lock write_lock(this);
3255 
3256  /* create logical table */
3257  TableDescriptor* tdl = &td;
3258  createTable(*tdl, cols, shared_dict_defs, true); // create logical table
3259  int32_t logical_tb_id = tdl->tableId;
3260  std::string logical_table_name = tdl->tableName;
3261 
3262  /* create physical tables and link them to the logical table */
3263  std::vector<int32_t> physicalTables;
3264  for (int32_t i = 1; i <= td.nShards; i++) {
3265  TableDescriptor* tdp = &td;
3266  tdp->tableName = generatePhysicalTableName(logical_table_name, i);
3267  tdp->shard = i - 1;
3268  createTable(*tdp, cols, shared_dict_defs, false); // create physical table
3269  int32_t physical_tb_id = tdp->tableId;
3270 
3271  /* add physical table to the vector of physical tables */
3272  physicalTables.push_back(physical_tb_id);
3273  }
3274 
3275  if (!physicalTables.empty()) {
3276  /* add logical to physical tables correspondence to the map */
3277  const auto it_ok =
3278  logicalToPhysicalTableMapById_.emplace(logical_tb_id, physicalTables);
3279  CHECK(it_ok.second);
3280  /* update sqlite mapd_logical_to_physical in sqlite database */
3281  if (!table_is_temporary(&td)) {
3282  updateLogicalToPhysicalTableMap(logical_tb_id);
3283  }
3284  }
3285 }
3286 
3288  cat_write_lock write_lock(this);
3289 
3290  const auto physicalTableIt = logicalToPhysicalTableMapById_.find(td->tableId);
3291  if (physicalTableIt != logicalToPhysicalTableMapById_.end()) {
3292  // truncate all corresponding physical tables if this is a logical table
3293  const auto physicalTables = physicalTableIt->second;
3294  CHECK(!physicalTables.empty());
3295  for (size_t i = 0; i < physicalTables.size(); i++) {
3296  int32_t physical_tb_id = physicalTables[i];
3297  const TableDescriptor* phys_td = getMetadataForTable(physical_tb_id);
3298  CHECK(phys_td);
3299  doTruncateTable(phys_td);
3300  }
3301  }
3302  doTruncateTable(td);
3303 }
3304 
3306  cat_write_lock write_lock(this);
3307 
3308  const int tableId = td->tableId;
3309  // must destroy fragmenter before deleteChunks is called.
3310  if (td->fragmenter != nullptr) {
3311  auto tableDescIt = tableDescriptorMapById_.find(tableId);
3312  CHECK(tableDescIt != tableDescriptorMapById_.end());
3313  tableDescIt->second->fragmenter = nullptr;
3314  CHECK(td->fragmenter == nullptr);
3315  }
3316  ChunkKey chunkKeyPrefix = {currentDB_.dbId, tableId};
3317  // assuming deleteChunksWithPrefix is atomic
3318  dataMgr_->deleteChunksWithPrefix(chunkKeyPrefix, MemoryLevel::CPU_LEVEL);
3319  dataMgr_->deleteChunksWithPrefix(chunkKeyPrefix, MemoryLevel::GPU_LEVEL);
3320 
3321  dataMgr_->removeTableRelatedDS(currentDB_.dbId, tableId);
3322 
3323  std::unique_ptr<StringDictionaryClient> client;
3324  if (SysCatalog::instance().isAggregator()) {
3325  CHECK(!string_dict_hosts_.empty());
3326  DictRef dict_ref(currentDB_.dbId, -1);
3327  client.reset(new StringDictionaryClient(string_dict_hosts_.front(), dict_ref, true));
3328  }
3329  // clean up any dictionaries
3330  // delete all column descriptors for the table
3331  for (const auto& columnDescriptor : columnDescriptorMapById_) {
3332  auto cd = columnDescriptor.second;
3333  if (cd->tableId != td->tableId) {
3334  continue;
3335  }
3336  const int dict_id = cd->columnType.get_comp_param();
3337  // Dummy dictionaries created for a shard of a logical table have the id set to zero.
3338  if (cd->columnType.get_compression() == kENCODING_DICT && dict_id) {
3339  const DictRef dict_ref(currentDB_.dbId, dict_id);
3340  const auto dictIt = dictDescriptorMapByRef_.find(dict_ref);
3341  CHECK(dictIt != dictDescriptorMapByRef_.end());
3342  const auto& dd = dictIt->second;
3343  CHECK_GE(dd->refcount, 1);
3344  // if this is the only table using this dict reset the dict
3345  if (dd->refcount == 1) {
3346  // close the dictionary
3347  dd->stringDict.reset();
3348  File_Namespace::renameForDelete(dd->dictFolderPath);
3349  if (client) {
3350  client->drop(dd->dictRef);
3351  }
3352  if (!dd->dictIsTemp) {
3353  boost::filesystem::create_directory(dd->dictFolderPath);
3354  }
3355  }
3356 
3357  DictDescriptor* new_dd = new DictDescriptor(dd->dictRef,
3358  dd->dictName,
3359  dd->dictNBits,
3360  dd->dictIsShared,
3361  dd->refcount,
3362  dd->dictFolderPath,
3363  dd->dictIsTemp);
3364  dictDescriptorMapByRef_.erase(dictIt);
3365  // now create new Dict -- need to figure out what to do here for temp tables
3366  if (client) {
3367  client->create(new_dd->dictRef, new_dd->dictIsTemp);
3368  }
3369  dictDescriptorMapByRef_[new_dd->dictRef].reset(new_dd);
3371  }
3372  }
3373 }
3374 
3375 void Catalog::removeFragmenterForTable(const int table_id) {
3376  cat_write_lock write_lock(this);
3377  auto td = getMetadataForTable(table_id, false);
3378  if (td->fragmenter != nullptr) {
3379  auto tableDescIt = tableDescriptorMapById_.find(table_id);
3380  CHECK(tableDescIt != tableDescriptorMapById_.end());
3381  tableDescIt->second->fragmenter = nullptr;
3382  CHECK(td->fragmenter == nullptr);
3383  }
3384 }
3385 
3386 // used by rollback_table_epoch to clean up in memory artifacts after a rollback
3387 void Catalog::removeChunks(const int table_id) {
3388  auto td = getMetadataForTable(table_id);
3389  CHECK(td);
3390 
3391  if (td->fragmenter != nullptr) {
3392  auto tableDescIt = tableDescriptorMapById_.find(table_id);
3393  CHECK(tableDescIt != tableDescriptorMapById_.end());
3394  tableDescIt->second->fragmenter = nullptr;
3395  CHECK(td->fragmenter == nullptr);
3396  }
3397 
3398  // remove the chunks from in memory structures
3399  ChunkKey chunkKey = {currentDB_.dbId, table_id};
3400 
3401  dataMgr_->deleteChunksWithPrefix(chunkKey, MemoryLevel::CPU_LEVEL);
3402  dataMgr_->deleteChunksWithPrefix(chunkKey, MemoryLevel::GPU_LEVEL);
3403 }
3404 
3408  cat_write_lock write_lock(this);
3410  const auto physicalTableIt = logicalToPhysicalTableMapById_.find(td->tableId);
3411  sqliteConnector_.query("BEGIN TRANSACTION");
3412  try {
3413  if (physicalTableIt != logicalToPhysicalTableMapById_.end()) {
3414  // remove all corresponding physical tables if this is a logical table
3415  const auto physicalTables = physicalTableIt->second;
3416  CHECK(!physicalTables.empty());
3417  for (size_t i = 0; i < physicalTables.size(); i++) {
3418  int32_t physical_tb_id = physicalTables[i];
3419  const TableDescriptor* phys_td = getMetadataForTable(physical_tb_id);
3420  CHECK(phys_td);
3421  doDropTable(phys_td);
3422  }
3423 
3424  // remove corresponding record from the logicalToPhysicalTableMap in sqlite database
3426  "DELETE FROM mapd_logical_to_physical WHERE logical_table_id = ?",
3427  std::to_string(td->tableId));
3429  }
3430  doDropTable(td);
3431  } catch (std::exception& e) {
3432  sqliteConnector_.query("ROLLBACK TRANSACTION");
3433  throw;
3434  }
3435  sqliteConnector_.query("END TRANSACTION");
3436 }
3437 
3442  }
3444 }
3445 
3447  const int tableId = td->tableId;
3448  sqliteConnector_.query_with_text_param("DELETE FROM mapd_tables WHERE tableid = ?",
3449  std::to_string(tableId));
3451  "select comp_param from mapd_columns where compression = ? and tableid = ?",
3452  std::vector<std::string>{std::to_string(kENCODING_DICT), std::to_string(tableId)});
3453  int numRows = sqliteConnector_.getNumRows();
3454  std::vector<int> dict_id_list;
3455  for (int r = 0; r < numRows; ++r) {
3456  dict_id_list.push_back(sqliteConnector_.getData<int>(r, 0));
3457  }
3458  for (auto dict_id : dict_id_list) {
3460  "UPDATE mapd_dictionaries SET refcount = refcount - 1 WHERE dictid = ?",
3461  std::vector<std::string>{std::to_string(dict_id)});
3462  }
3464  "DELETE FROM mapd_dictionaries WHERE dictid in (select comp_param from "
3465  "mapd_columns where compression = ? "
3466  "and tableid = ?) and refcount = 0",
3467  std::vector<std::string>{std::to_string(kENCODING_DICT), std::to_string(tableId)});
3468  sqliteConnector_.query_with_text_param("DELETE FROM mapd_columns WHERE tableid = ?",
3469  std::to_string(tableId));
3470  if (td->isView) {
3471  sqliteConnector_.query_with_text_param("DELETE FROM mapd_views WHERE tableid = ?",
3472  std::to_string(tableId));
3473  }
3476  "DELETE FROM omnisci_foreign_tables WHERE table_id = ?", std::to_string(tableId));
3477  }
3478 }
3479 
3480 void Catalog::renamePhysicalTable(const TableDescriptor* td, const string& newTableName) {
3481  cat_write_lock write_lock(this);
3483 
3484  sqliteConnector_.query("BEGIN TRANSACTION");
3485  try {
3487  "UPDATE mapd_tables SET name = ? WHERE tableid = ?",
3488  std::vector<std::string>{newTableName, std::to_string(td->tableId)});
3489  } catch (std::exception& e) {
3490  sqliteConnector_.query("ROLLBACK TRANSACTION");
3491  throw;
3492  }
3493  sqliteConnector_.query("END TRANSACTION");
3494  TableDescriptorMap::iterator tableDescIt =
3496  CHECK(tableDescIt != tableDescriptorMap_.end());
3497  calciteMgr_->updateMetadata(currentDB_.dbName, td->tableName);
3498  // Get table descriptor to change it
3499  TableDescriptor* changeTd = tableDescIt->second;
3500  changeTd->tableName = newTableName;
3501  tableDescriptorMap_.erase(tableDescIt); // erase entry under old name
3502  tableDescriptorMap_[to_upper(newTableName)] = changeTd;
3503  calciteMgr_->updateMetadata(currentDB_.dbName, td->tableName);
3504 }
3505 
3506 void Catalog::renameTable(const TableDescriptor* td, const string& newTableName) {
3507  {
3508  cat_write_lock write_lock(this);
3510  // rename all corresponding physical tables if this is a logical table
3511  const auto physicalTableIt = logicalToPhysicalTableMapById_.find(td->tableId);
3512  if (physicalTableIt != logicalToPhysicalTableMapById_.end()) {
3513  const auto physicalTables = physicalTableIt->second;
3514  CHECK(!physicalTables.empty());
3515  for (size_t i = 0; i < physicalTables.size(); i++) {
3516  int32_t physical_tb_id = physicalTables[i];
3517  const TableDescriptor* phys_td = getMetadataForTable(physical_tb_id);
3518  CHECK(phys_td);
3519  std::string newPhysTableName =
3520  generatePhysicalTableName(newTableName, static_cast<int32_t>(i + 1));
3521  renamePhysicalTable(phys_td, newPhysTableName);
3522  }
3523  }
3524  renamePhysicalTable(td, newTableName);
3525  }
3526  {
3527  DBObject object(newTableName, TableDBObjectType);
3528  // update table name in direct and effective priv map
3529  DBObjectKey key;
3530  key.dbId = currentDB_.dbId;
3531  key.objectId = td->tableId;
3532  key.permissionType = static_cast<int>(DBObjectType::TableDBObjectType);
3533  object.setObjectKey(key);
3534  auto objdescs = SysCatalog::instance().getMetadataForObject(
3535  currentDB_.dbId, static_cast<int>(DBObjectType::TableDBObjectType), td->tableId);
3536  for (auto obj : objdescs) {
3537  Grantee* grnt = SysCatalog::instance().getGrantee(obj->roleName);
3538  if (grnt) {
3539  grnt->renameDbObject(object);
3540  }
3541  }
3543  }
3544 }
3545 
3547  const ColumnDescriptor* cd,
3548  const string& newColumnName) {
3549  cat_write_lock write_lock(this);
3551  sqliteConnector_.query("BEGIN TRANSACTION");
3552  try {
3553  for (int i = 0; i <= cd->columnType.get_physical_cols(); ++i) {
3554  auto cdx = getMetadataForColumn(td->tableId, cd->columnId + i);
3555  CHECK(cdx);
3556  std::string new_column_name = cdx->columnName;
3557  new_column_name.replace(0, cd->columnName.size(), newColumnName);
3559  "UPDATE mapd_columns SET name = ? WHERE tableid = ? AND columnid = ?",
3560  std::vector<std::string>{new_column_name,
3561  std::to_string(td->tableId),
3562  std::to_string(cdx->columnId)});
3563  }
3564  } catch (std::exception& e) {
3565  sqliteConnector_.query("ROLLBACK TRANSACTION");
3566  throw;
3567  }
3568  sqliteConnector_.query("END TRANSACTION");
3569  calciteMgr_->updateMetadata(currentDB_.dbName, td->tableName);
3570  for (int i = 0; i <= cd->columnType.get_physical_cols(); ++i) {
3571  auto cdx = getMetadataForColumn(td->tableId, cd->columnId + i);
3572  CHECK(cdx);
3573  ColumnDescriptorMap::iterator columnDescIt = columnDescriptorMap_.find(
3574  std::make_tuple(td->tableId, to_upper(cdx->columnName)));
3575  CHECK(columnDescIt != columnDescriptorMap_.end());
3576  ColumnDescriptor* changeCd = columnDescIt->second;
3577  changeCd->columnName.replace(0, cd->columnName.size(), newColumnName);
3578  columnDescriptorMap_.erase(columnDescIt); // erase entry under old name
3579  columnDescriptorMap_[std::make_tuple(td->tableId, to_upper(changeCd->columnName))] =
3580  changeCd;
3581  }
3582  calciteMgr_->updateMetadata(currentDB_.dbName, td->tableName);
3583 }
3584 
3586  cat_write_lock write_lock(this);
3588  sqliteConnector_.query("BEGIN TRANSACTION");
3589  try {
3590  // TODO(andrew): this should be an upsert
3592  "SELECT id FROM mapd_dashboards WHERE name = ? and userid = ?",
3593  std::vector<std::string>{vd.dashboardName, std::to_string(vd.userId)});
3594  if (sqliteConnector_.getNumRows() > 0) {
3596  "UPDATE mapd_dashboards SET state = ?, image_hash = ?, metadata = ?, "
3597  "update_time = "
3598  "datetime('now') where name = ? "
3599  "and userid = ?",
3600  std::vector<std::string>{vd.dashboardState,
3601  vd.imageHash,
3602  vd.dashboardMetadata,
3603  vd.dashboardName,
3604  std::to_string(vd.userId)});
3605  } else {
3607  "INSERT INTO mapd_dashboards (name, state, image_hash, metadata, "
3608  "update_time, "
3609  "userid) "
3610  "VALUES "
3611  "(?,?,?,?, "
3612  "datetime('now'), ?)",
3613  std::vector<std::string>{vd.dashboardName,
3614  vd.dashboardState,
3615  vd.imageHash,
3616  vd.dashboardMetadata,
3617  std::to_string(vd.userId)});
3618  }
3619  } catch (std::exception& e) {
3620  sqliteConnector_.query("ROLLBACK TRANSACTION");
3621  throw;
3622  }
3623  sqliteConnector_.query("END TRANSACTION");
3624 
3625  // now get the auto generated dashboardId
3626  try {
3628  "SELECT id, strftime('%Y-%m-%dT%H:%M:%SZ', update_time) FROM mapd_dashboards "
3629  "WHERE name = ? and userid = ?",
3630  std::vector<std::string>{vd.dashboardName, std::to_string(vd.userId)});
3631  vd.dashboardId = sqliteConnector_.getData<int>(0, 0);
3632  vd.updateTime = sqliteConnector_.getData<std::string>(0, 1);
3633  } catch (std::exception& e) {
3634  throw;
3635  }
3639  sqlite_lock.unlock();
3640  write_lock.unlock();
3641  // NOTE(wamsi): Transactionally unsafe
3644  return vd.dashboardId;
3645 }
3646 
3648  cat_write_lock write_lock(this);
3650 
3652  sqliteConnector_.query("BEGIN TRANSACTION");
3653  try {
3655  "SELECT id FROM mapd_dashboards WHERE id = ?",
3656  std::vector<std::string>{std::to_string(vd.dashboardId)});
3657  if (sqliteConnector_.getNumRows() > 0) {
3659  "UPDATE mapd_dashboards SET name = ?, state = ?, image_hash = ?, metadata = ?, "
3660  "userid = ?, update_time = datetime('now') where id = ? ",
3661  std::vector<std::string>{vd.dashboardName,
3662  vd.dashboardState,
3663  vd.imageHash,
3664  vd.dashboardMetadata,
3665  std::to_string(vd.userId),
3667  } else {
3668  LOG(ERROR) << "Error replacing dashboard id " << vd.dashboardId
3669  << " does not exist in db";
3670  throw runtime_error("Error replacing dashboard id " +
3671  std::to_string(vd.dashboardId) + " does not exist in db");
3672  }
3673  } catch (std::exception& e) {
3674  sqliteConnector_.query("ROLLBACK TRANSACTION");
3675  throw;
3676  }
3677  sqliteConnector_.query("END TRANSACTION");
3678 
3679  bool found{false};
3680  for (auto descp : dashboardDescriptorMap_) {
3681  auto dash = descp.second.get();
3682  if (dash->dashboardId == vd.dashboardId) {
3683  found = true;
3684  auto viewDescIt = dashboardDescriptorMap_.find(std::to_string(dash->userId) + ":" +
3685  dash->dashboardName);
3686  if (viewDescIt ==
3687  dashboardDescriptorMap_.end()) { // check to make sure view exists
3688  LOG(ERROR) << "No metadata for dashboard for user " << dash->userId
3689  << " dashboard " << dash->dashboardName << " does not exist in map";
3690  throw runtime_error("No metadata for dashboard for user " +
3691  std::to_string(dash->userId) + " dashboard " +
3692  dash->dashboardName + " does not exist in map");
3693  }
3694  dashboardDescriptorMap_.erase(viewDescIt);
3695  break;
3696  }
3697  }
3698  if (!found) {
3699  LOG(ERROR) << "Error replacing dashboard id " << vd.dashboardId
3700  << " does not exist in map";
3701  throw runtime_error("Error replacing dashboard id " + std::to_string(vd.dashboardId) +
3702  " does not exist in map");
3703  }
3704 
3705  // now reload the object
3707  "SELECT id, strftime('%Y-%m-%dT%H:%M:%SZ', update_time) FROM "
3708  "mapd_dashboards "
3709  "WHERE id = ?",
3710  std::vector<std::string>{std::to_string(vd.dashboardId)});
3711  vd.updateTime = sqliteConnector_.getData<string>(0, 1);
3715  sqlite_lock.unlock();
3716  write_lock.unlock();
3717  // NOTE(wamsi): Transactionally unsafe
3720 }
3721 
3722 std::string Catalog::calculateSHA1(const std::string& data) {
3723  boost::uuids::detail::sha1 sha1;
3724  unsigned int digest[5];
3725  sha1.process_bytes(data.c_str(), data.length());
3726  sha1.get_digest(digest);
3727  std::stringstream ss;
3728  for (size_t i = 0; i < 5; i++) {
3729  ss << std::hex << digest[i];
3730  }
3731  return ss.str();
3732 }
3733 
3734 std::string Catalog::createLink(LinkDescriptor& ld, size_t min_length) {
3735  cat_write_lock write_lock(this);
3737  sqliteConnector_.query("BEGIN TRANSACTION");
3738  try {
3740  .substr(0, 8);
3742  "SELECT linkid FROM mapd_links WHERE link = ? and userid = ?",
3743  std::vector<std::string>{ld.link, std::to_string(ld.userId)});
3744  if (sqliteConnector_.getNumRows() > 0) {
3746  "UPDATE mapd_links SET update_time = datetime('now') WHERE userid = ? AND link "
3747  "= ?",
3748  std::vector<std::string>{std::to_string(ld.userId), ld.link});
3749  } else {
3751  "INSERT INTO mapd_links (userid, link, view_state, view_metadata, update_time) "
3752  "VALUES (?,?,?,?, "
3753  "datetime('now'))",
3754  std::vector<std::string>{
3755  std::to_string(ld.userId), ld.link, ld.viewState, ld.viewMetadata});
3756  }
3757  // now get the auto generated dashid
3759  "SELECT linkid, strftime('%Y-%m-%dT%H:%M:%SZ', update_time) FROM mapd_links "
3760  "WHERE link = ?",
3761  ld.link);
3762  ld.linkId = sqliteConnector_.getData<int>(0, 0);
3763  ld.updateTime = sqliteConnector_.getData<std::string>(0, 1);
3764  } catch (std::exception& e) {
3765  sqliteConnector_.query("ROLLBACK TRANSACTION");
3766  throw;
3767  }
3768  sqliteConnector_.query("END TRANSACTION");
3769  addLinkToMap(ld);
3770  return ld.link;
3771 }
3772 
3774  const TableDescriptor* td) const {
3775  cat_read_lock read_lock(this);
3776 
3777  const auto column_descriptors =
3778  getAllColumnMetadataForTable(td->tableId, false, true, true);
3779 
3780  const ColumnDescriptor* shard_cd{nullptr};
3781  int i = 1;
3782  for (auto cd_itr = column_descriptors.begin(); cd_itr != column_descriptors.end();
3783  ++cd_itr, ++i) {
3784  if (i == td->shardedColumnId) {
3785  shard_cd = *cd_itr;
3786  }
3787  }
3788  return shard_cd;
3789 }
3790 
3791 std::vector<const TableDescriptor*> Catalog::getPhysicalTablesDescriptors(
3792  const TableDescriptor* logicalTableDesc) const {
3793  cat_read_lock read_lock(this);
3794  const auto physicalTableIt =
3795  logicalToPhysicalTableMapById_.find(logicalTableDesc->tableId);
3796  if (physicalTableIt == logicalToPhysicalTableMapById_.end()) {
3797  return {logicalTableDesc};
3798  }
3799 
3800  const auto physicalTablesIds = physicalTableIt->second;
3801  CHECK(!physicalTablesIds.empty());
3802  std::vector<const TableDescriptor*> physicalTables;
3803  for (size_t i = 0; i < physicalTablesIds.size(); i++) {
3804  physicalTables.push_back(getMetadataForTable(physicalTablesIds[i]));
3805  }
3806 
3807  return physicalTables;
3808 }
3809 
3810 const std::map<int, const ColumnDescriptor*> Catalog::getDictionaryToColumnMapping() {
3811  cat_read_lock read_lock(this);
3812 
3813  std::map<int, const ColumnDescriptor*> mapping;
3814 
3815  const auto tables = getAllTableMetadata();
3816  for (const auto td : tables) {
3817  if (td->shard >= 0) {
3818  // skip shards, they're not standalone tables
3819  continue;
3820  }
3821 
3822  for (auto& cd : getAllColumnMetadataForTable(td->tableId, false, false, true)) {
3823  const auto& ti = cd->columnType;
3824  if (ti.is_string()) {
3825  if (ti.get_compression() == kENCODING_DICT) {
3826  // if foreign reference, get referenced tab.col
3827  const auto dict_id = ti.get_comp_param();
3828 
3829  // ignore temp (negative) dictionaries
3830  if (dict_id > 0 && mapping.end() == mapping.find(dict_id)) {
3831  mapping[dict_id] = cd;
3832  }
3833  }
3834  }
3835  }
3836  }
3837 
3838  return mapping;
3839 }
3840 
3842  const UserMetadata& user_metadata,
3843  const GetTablesType get_tables_type) const {
3844  if (td->shard >= 0) {
3845  // skip shards, they're not standalone tables
3846  return false;
3847  }
3848  switch (get_tables_type) {
3849  case GET_PHYSICAL_TABLES: {
3850  if (td->isView) {
3851  return false;
3852  }
3853  break;
3854  }
3855  case GET_VIEWS: {
3856  if (!td->isView) {
3857  return false;
3858  }
3859  break;
3860  }
3861  default:
3862  break;
3863  }
3865  dbObject.loadKey(*this);
3866  std::vector<DBObject> privObjects = {dbObject};
3867  if (!SysCatalog::instance().hasAnyPrivileges(user_metadata, privObjects)) {
3868  // skip table, as there are no privileges to access it
3869  return false;
3870  }
3871  return true;
3872 }
3873 
3874 std::vector<std::string> Catalog::getTableNamesForUser(
3875  const UserMetadata& user_metadata,
3876  const GetTablesType get_tables_type) const {
3877  sys_read_lock syscat_read_lock(&SysCatalog::instance());
3878  cat_read_lock read_lock(this);
3879 
3880  std::vector<std::string> table_names;
3881  const auto tables = getAllTableMetadata();
3882  for (const auto td : tables) {
3883  if (filterTableByTypeAndUser(td, user_metadata, get_tables_type)) {
3884  table_names.push_back(td->tableName);
3885  }
3886  }
3887  return table_names;
3888 }
3889 
3890 std::vector<TableMetadata> Catalog::getTablesMetadataForUser(
3891  const UserMetadata& user_metadata,
3892  const GetTablesType get_tables_type,
3893  const std::string& filter_table_name) const {
3894  sys_read_lock syscat_read_lock(&SysCatalog::instance());
3895  cat_read_lock read_lock(this);
3896 
3897  std::vector<TableMetadata> tables_metadata;
3898  const auto tables = getAllTableMetadata();
3899  for (const auto td : tables) {
3900  if (filterTableByTypeAndUser(td, user_metadata, get_tables_type)) {
3901  if (!filter_table_name.empty()) {
3902  if (td->tableName != filter_table_name) {
3903  continue;
3904  }
3905  }
3906  TableMetadata table_metadata(td); // Makes a copy, not safe to access raw table
3907  // descriptor outside catalog lock
3908  tables_metadata.emplace_back(table_metadata);
3909  }
3910  }
3911  return tables_metadata;
3912 }
3913 
3914 int Catalog::getLogicalTableId(const int physicalTableId) const {
3915  cat_read_lock read_lock(this);
3916  for (const auto& l : logicalToPhysicalTableMapById_) {
3917  if (l.second.end() != std::find_if(l.second.begin(),
3918  l.second.end(),
3919  [&](decltype(*l.second.begin()) tid) -> bool {
3920  return physicalTableId == tid;
3921  })) {
3922  return l.first;
3923  }
3924  }
3925  return physicalTableId;
3926 }
3927 
3928 void Catalog::checkpoint(const int logicalTableId) const {
3929  const auto td = getMetadataForTable(logicalTableId);
3930  const auto shards = getPhysicalTablesDescriptors(td);
3931  for (const auto shard : shards) {
3932  getDataMgr().checkpoint(getCurrentDB().dbId, shard->tableId);
3933  }
3934 }
3935 
3936 void Catalog::checkpointWithAutoRollback(const int logical_table_id) {
3937  auto table_epochs = getTableEpochs(getDatabaseId(), logical_table_id);
3938  try {
3939  checkpoint(logical_table_id);
3940  } catch (...) {
3941  setTableEpochsLogExceptions(getDatabaseId(), table_epochs);
3942  throw;
3943  }
3944 }
3945 
3947  cat_write_lock write_lock(this);
3948  // Physically erase all tables and dictionaries from disc and memory
3949  const auto tables = getAllTableMetadata();
3950  for (const auto table : tables) {
3951  eraseTablePhysicalData(table);
3952  }
3953  // Physically erase database metadata
3954  boost::filesystem::remove(basePath_ + "/mapd_catalogs/" + currentDB_.dbName);
3955  calciteMgr_->updateMetadata(currentDB_.dbName, "");
3956 }
3957 
3959  const int tableId = td->tableId;
3960  // must destroy fragmenter before deleteChunks is called.
3961  if (td->fragmenter != nullptr) {
3962  auto tableDescIt = tableDescriptorMapById_.find(tableId);
3963  CHECK(tableDescIt != tableDescriptorMapById_.end());
3964  {
3965  INJECT_TIMER(deleting_fragmenter);
3966  tableDescIt->second->fragmenter = nullptr;
3967  }
3968  }
3969  ChunkKey chunkKeyPrefix = {currentDB_.dbId, tableId};
3970  {
3971  INJECT_TIMER(deleteChunksWithPrefix);
3972  // assuming deleteChunksWithPrefix is atomic
3973  dataMgr_->deleteChunksWithPrefix(chunkKeyPrefix, MemoryLevel::CPU_LEVEL);
3974  dataMgr_->deleteChunksWithPrefix(chunkKeyPrefix, MemoryLevel::GPU_LEVEL);
3975  }
3976  if (!td->isView) {
3977  INJECT_TIMER(Remove_Table);
3978  dataMgr_->removeTableRelatedDS(currentDB_.dbId, tableId);
3979  }
3980  calciteMgr_->updateMetadata(currentDB_.dbName, td->tableName);
3981  {
3982  INJECT_TIMER(removeTableFromMap_);
3983  removeTableFromMap(td->tableName, tableId);
3984  }
3985 }
3986 
3987 std::string Catalog::generatePhysicalTableName(const std::string& logicalTableName,
3988  const int32_t& shardNumber) {
3989  std::string physicalTableName =
3990  logicalTableName + physicalTableNameTag_ + std::to_string(shardNumber);
3991  return (physicalTableName);
3992 }
3993 
3994 void Catalog::vacuumDeletedRows(const int logicalTableId) const {
3995  // shard here to serve request from TableOptimizer and elsewhere
3996  const auto td = getMetadataForTable(logicalTableId);
3997  const auto shards = getPhysicalTablesDescriptors(td);
3998  for (const auto shard : shards) {
3999  vacuumDeletedRows(shard);
4000  }
4001 }
4002 
4004  // "if not a table that supports delete return nullptr, nothing more to do"
4005  const ColumnDescriptor* cd = getDeletedColumn(td);
4006  if (nullptr == cd) {
4007  return;
4008  }
4009  // vacuum chunks which show sign of deleted rows in metadata
4010  ChunkKey chunkKeyPrefix = {currentDB_.dbId, td->tableId, cd->columnId};
4011  ChunkMetadataVector chunkMetadataVec;
4012  dataMgr_->getChunkMetadataVecForKeyPrefix(chunkMetadataVec, chunkKeyPrefix);
4013  for (auto cm : chunkMetadataVec) {
4014  // "delete has occured"
4015  if (cm.second->chunkStats.max.tinyintval == 1) {
4016  UpdelRoll updel_roll;
4017  updel_roll.catalog = this;
4018  updel_roll.logicalTableId = getLogicalTableId(td->tableId);
4020  const auto cd = getMetadataForColumn(td->tableId, cm.first[2]);
4021  const auto chunk = Chunk_NS::Chunk::getChunk(cd,
4022  &getDataMgr(),
4023  cm.first,
4024  updel_roll.memoryLevel,
4025  0,
4026  cm.second->numBytes,
4027  cm.second->numElements);
4028  td->fragmenter->compactRows(this,
4029  td,
4030  cm.first[3],
4031  td->fragmenter->getVacuumOffsets(chunk),
4032  updel_roll.memoryLevel,
4033  updel_roll);
4034  updel_roll.commitUpdate();
4035  }
4036  }
4037 }
4038 
4041  "SELECT id, name, data_wrapper_type, options, owner_user_id, creation_time FROM "
4042  "omnisci_foreign_servers");
4043  auto num_rows = sqliteConnector_.getNumRows();
4044  for (size_t row = 0; row < num_rows; row++) {
4045  auto foreign_server = std::make_shared<foreign_storage::ForeignServer>(
4046  sqliteConnector_.getData<int>(row, 0),
4047  sqliteConnector_.getData<std::string>(row, 1),
4048  sqliteConnector_.getData<std::string>(row, 2),
4049  sqliteConnector_.getData<std::string>(row, 3),
4050  sqliteConnector_.getData<std::int32_t>(row, 4),
4051  sqliteConnector_.getData<std::int32_t>(row, 5));
4052  foreignServerMap_[foreign_server->name] = foreign_server;
4053  foreignServerMapById_[foreign_server->id] = foreign_server;
4054  }
4055 }
4056 
4059  "SELECT table_id, server_id, options, last_refresh_time, next_refresh_time from "
4060  "omnisci_foreign_tables");
4061  auto num_rows = sqliteConnector_.getNumRows();
4062  for (size_t r = 0; r < num_rows; r++) {
4063  const auto table_id = sqliteConnector_.getData<int32_t>(r, 0);
4064  const auto server_id = sqliteConnector_.getData<int32_t>(r, 1);
4065  const auto& options = sqliteConnector_.getData<std::string>(r, 2);
4066  const auto last_refresh_time = sqliteConnector_.getData<int>(r, 3);
4067  const auto next_refresh_time = sqliteConnector_.getData<int>(r, 4);
4068 
4069  CHECK(tableDescriptorMapById_.find(table_id) != tableDescriptorMapById_.end());
4070  auto foreign_table =
4071  dynamic_cast<foreign_storage::ForeignTable*>(tableDescriptorMapById_[table_id]);
4072  CHECK(foreign_table);
4073  foreign_table->foreign_server = foreignServerMapById_[server_id].get();
4074  CHECK(foreign_table->foreign_server);
4075  foreign_table->populateOptionsMap(options);
4076  foreign_table->last_refresh_time = last_refresh_time;
4077  foreign_table->next_refresh_time = next_refresh_time;
4078  }
4079 }
4080 
4081 void Catalog::setForeignServerProperty(const std::string& server_name,
4082  const std::string& property,
4083  const std::string& value) {
4086  "SELECT id from omnisci_foreign_servers where name = ?",
4087  std::vector<std::string>{server_name});
4088  auto num_rows = sqliteConnector_.getNumRows();
4089  if (num_rows > 0) {
4090  CHECK_EQ(size_t(1), num_rows);
4091  auto server_id = sqliteConnector_.getData<int32_t>(0, 0);
4093  "UPDATE omnisci_foreign_servers SET " + property + " = ? WHERE id = ?",
4094  std::vector<std::string>{value, std::to_string(server_id)});
4095  } else {
4096  throw std::runtime_error{"Can not change property \"" + property +
4097  "\" for foreign server." + " Foreign server \"" +
4098  server_name + "\" is not found."};
4099  }
4100 }
4101 
4104  std::map<std::string, std::string, std::less<>> options;
4105  options[std::string{ForeignServer::STORAGE_TYPE_KEY}] =
4106  ForeignServer::LOCAL_FILE_STORAGE_TYPE;
4107 
4108  auto local_csv_server =
4109  std::make_unique<ForeignServer>("omnisci_local_csv",
4111  options,
4113  local_csv_server->validate();
4114  createForeignServerNoLocks(std::move(local_csv_server), true);
4115 
4116  auto local_parquet_server =
4117  std::make_unique<ForeignServer>("omnisci_local_parquet",
4119  options,
4121  local_parquet_server->validate();
4122  createForeignServerNoLocks(std::move(local_parquet_server), true);
4123 }
4124 
4125 // prepare a fresh file reload on next table access
4126 void Catalog::setForReload(const int32_t tableId) {
4127  cat_read_lock read_lock(this);
4128  const auto td = getMetadataForTable(tableId);
4129  for (const auto shard : getPhysicalTablesDescriptors(td)) {
4130  const auto tableEpoch = getTableEpoch(currentDB_.dbId, shard->tableId);
4131  setTableEpoch(currentDB_.dbId, shard->tableId, tableEpoch);
4132  }
4133 }
4134 
4135 // get a table's data dirs
4136 std::vector<std::string> Catalog::getTableDataDirectories(
4137  const TableDescriptor* td) const {
4138  const auto global_file_mgr = getDataMgr().getGlobalFileMgr();
4139  std::vector<std::string> file_paths;
4140  for (auto shard : getPhysicalTablesDescriptors(td)) {
4141  const auto file_mgr = dynamic_cast<File_Namespace::FileMgr*>(
4142  global_file_mgr->getFileMgr(currentDB_.dbId, shard->tableId));
4143  boost::filesystem::path file_path(file_mgr->getFileMgrBasePath());
4144  file_paths.push_back(file_path.filename().string());
4145  }
4146  return file_paths;
4147 }
4148 
4149 // get a column's dict dir basename
4151  if ((cd->columnType.is_string() || cd->columnType.is_string_array()) &&
4153  cd->columnType.get_comp_param() > 0) {
4154  const auto dictId = cd->columnType.get_comp_param();
4155  const DictRef dictRef(currentDB_.dbId, dictId);
4156  const auto dit = dictDescriptorMapByRef_.find(dictRef);
4157  CHECK(dit != dictDescriptorMapByRef_.end());
4158  CHECK(dit->second);
4159  boost::filesystem::path file_path(dit->second->dictFolderPath);
4160  return file_path.filename().string();
4161  }
4162  return std::string();
4163 }
4164 
4165 // get a table's dict dirs
4166 std::vector<std::string> Catalog::getTableDictDirectories(
4167  const TableDescriptor* td) const {
4168  std::vector<std::string> file_paths;
4169  for (auto cd : getAllColumnMetadataForTable(td->tableId, false, false, true)) {
4170  auto file_base = getColumnDictDirectory(cd);
4171  if (!file_base.empty() &&
4172  file_paths.end() == std::find(file_paths.begin(), file_paths.end(), file_base)) {
4173  file_paths.push_back(file_base);
4174  }
4175  }
4176  return file_paths;
4177 }
4178 
4179 // returns table schema in a string
4180 // NOTE(sy): Might be able to replace dumpSchema() later with
4181 // dumpCreateTable() after a deeper review of the TableArchiver code.
4182 std::string Catalog::dumpSchema(const TableDescriptor* td) const {
4183  cat_read_lock read_lock(this);
4184 
4185  std::ostringstream os;
4186  os << "CREATE TABLE @T (";
4187  // gather column defines
4188  const auto cds = getAllColumnMetadataForTable(td->tableId, false, false, false);
4189  std::string comma;
4190  std::vector<std::string> shared_dicts;
4191  std::map<const std::string, const ColumnDescriptor*> dict_root_cds;
4192  for (const auto cd : cds) {
4193  if (!(cd->isSystemCol || cd->isVirtualCol)) {
4194  const auto& ti = cd->columnType;
4195  os << comma << cd->columnName;
4196  // CHAR is perculiar... better dump it as TEXT(32) like \d does
4197  if (ti.get_type() == SQLTypes::kCHAR) {
4198  os << " "
4199  << "TEXT";
4200  } else if (ti.get_subtype() == SQLTypes::kCHAR) {
4201  os << " "
4202  << "TEXT[]";
4203  } else {
4204  os << " " << ti.get_type_name();
4205  }
4206  os << (ti.get_notnull() ? " NOT NULL" : "");
4207  if (ti.is_string() || (ti.is_array() && ti.get_subtype() == kTEXT)) {
4208  auto size = ti.is_array() ? ti.get_logical_size() : ti.get_size();
4209  if (ti.get_compression() == kENCODING_DICT) {
4210  // if foreign reference, get referenced tab.col
4211  const auto dict_id = ti.get_comp_param();
4212  const DictRef dict_ref(currentDB_.dbId, dict_id);
4213  const auto dict_it = dictDescriptorMapByRef_.find(dict_ref);
4214  CHECK(dict_it != dictDescriptorMapByRef_.end());
4215  const auto dict_name = dict_it->second->dictName;
4216  // when migrating a table, any foreign dict ref will be dropped
4217  // and the first cd of a dict will become root of the dict
4218  if (dict_root_cds.end() == dict_root_cds.find(dict_name)) {
4219  dict_root_cds[dict_name] = cd;
4220  os << " ENCODING " << ti.get_compression_name() << "(" << (size * 8) << ")";
4221  } else {
4222  const auto dict_root_cd = dict_root_cds[dict_name];
4223  shared_dicts.push_back("SHARED DICTIONARY (" + cd->columnName +
4224  ") REFERENCES @T(" + dict_root_cd->columnName + ")");
4225  // "... shouldn't specify an encoding, it borrows from the referenced column"
4226  }
4227  } else {
4228  os << " ENCODING NONE";
4229  }
4230  } else if (ti.is_date_in_days() ||
4231  (ti.get_size() > 0 && ti.get_size() != ti.get_logical_size())) {
4232  const auto comp_param = ti.get_comp_param() ? ti.get_comp_param() : 32;
4233  os << " ENCODING " << ti.get_compression_name() << "(" << comp_param << ")";
4234  } else if (ti.is_geometry()) {
4235  if (ti.get_compression() == kENCODING_GEOINT) {
4236  os << " ENCODING " << ti.get_compression_name() << "(" << ti.get_comp_param()
4237  << ")";
4238  } else {
4239  os << " ENCODING NONE";
4240  }
4241  }
4242  comma = ", ";
4243  }
4244  }
4245  // gather SHARED DICTIONARYs
4246  if (shared_dicts.size()) {
4247  os << ", " << boost::algorithm::join(shared_dicts, ", ");
4248  }
4249  // gather WITH options ...
4250  std::vector<std::string> with_options;
4251  with_options.push_back("FRAGMENT_SIZE=" + std::to_string(td->maxFragRows));
4252  with_options.push_back("MAX_CHUNK_SIZE=" + std::to_string(td->maxChunkSize));
4253  with_options.push_back("PAGE_SIZE=" + std::to_string(td->fragPageSize));
4254  with_options.push_back("MAX_ROWS=" + std::to_string(td->maxRows));
4255  with_options.emplace_back(td->hasDeletedCol ? "VACUUM='DELAYED'"
4256  : "VACUUM='IMMEDIATE'");
4257  if (!td->partitions.empty()) {
4258  with_options.push_back("PARTITIONS='" + td->partitions + "'");
4259  }
4260  if (td->nShards > 0) {
4261  const auto shard_cd = getMetadataForColumn(td->tableId, td->shardedColumnId);
4262  CHECK(shard_cd);
4263  os << ", SHARD KEY(" << shard_cd->columnName << ")";
4264  with_options.push_back(
4265  "SHARD_COUNT=" +
4266  std::to_string(td->nShards * std::max(g_leaf_count, static_cast<size_t>(1))));
4267  }
4268  if (td->sortedColumnId > 0) {
4269  const auto sort_cd = getMetadataForColumn(td->tableId, td->sortedColumnId);
4270  CHECK(sort_cd);
4271  with_options.push_back("SORT_COLUMN='" + sort_cd->columnName + "'");
4272  }
4273  if (td->maxRollbackEpochs >= 0) {
4274  with_options.push_back("MAX_ROLLBACK_EPOCHS=" +
4276  }
4277  os << ") WITH (" + boost::algorithm::join(with_options, ", ") + ");";
4278  return os.str();
4279 }
4280 
4281 namespace {
4282 
4283 void unserialize_key_metainfo(std::vector<std::string>& shared_dicts,
4284  std::set<std::string>& shared_dict_column_names,
4285  const std::string keyMetainfo) {
4286  rapidjson::Document document;
4287  document.Parse(keyMetainfo.c_str());
4288  CHECK(!document.HasParseError());
4289  CHECK(document.IsArray());
4290  for (auto it = document.Begin(); it != document.End(); ++it) {
4291  const auto& key_with_spec_json = *it;
4292  CHECK(key_with_spec_json.IsObject());
4293  const std::string type = key_with_spec_json["type"].GetString();
4294  const std::string name = key_with_spec_json["name"].GetString();
4295  auto key_with_spec = type + " (" + name + ")";
4296  if (type == "SHARED DICTIONARY") {
4297  shared_dict_column_names.insert(name);
4298  key_with_spec += " REFERENCES ";
4299  const std::string foreign_table = key_with_spec_json["foreign_table"].GetString();
4300  const std::string foreign_column = key_with_spec_json["foreign_column"].GetString();
4301  key_with_spec += foreign_table + "(" + foreign_column + ")";
4302  } else {
4303  CHECK(type == "SHARD KEY");
4304  }
4305  shared_dicts.push_back(key_with_spec);
4306  }
4307 }
4308 
4309 } // namespace
4310 
4311 #include "Parser/ReservedKeywords.h"
4312 
4314 inline bool contains_spaces(std::string_view str) {
4315  return std::find_if(str.begin(), str.end(), [](const unsigned char& ch) {
4316  return std::isspace(ch);
4317  }) != str.end();
4318 }
4319 
4322  std::string_view str,
4323  std::string_view chars = "`~!@#$%^&*()-=+[{]}\\|;:'\",<.>/?") {
4324  return str.find_first_of(chars) != std::string_view::npos;
4325 }
4326 
4328 inline bool is_reserved_sql_keyword(std::string_view str) {
4329  return reserved_keywords.find(to_upper(std::string(str))) != reserved_keywords.end();
4330 }
4331 
4332 // returns a "CREATE TABLE" statement in a string for "SHOW CREATE TABLE"
4334  bool multiline_formatting,
4335  bool dump_defaults) const {
4336  cat_read_lock read_lock(this);
4337 
4338  auto foreign_table = dynamic_cast<const foreign_storage::ForeignTable*>(td);
4339  std::ostringstream os;
4340 
4341  if (foreign_table) {
4342  os << "CREATE FOREIGN TABLE " << td->tableName << " (";
4343  } else if (!td->isView) {
4344  os << "CREATE ";
4346  os << "TEMPORARY ";
4347  }
4348  os << "TABLE " + td->tableName + " (";
4349  } else {
4350  os << "CREATE VIEW " + td->tableName + " AS " << td->viewSQL;
4351  return os.str();
4352  }
4353  // scan column defines
4354  std::vector<std::string> shared_dicts;
4355  std::set<std::string> shared_dict_column_names;
4356  unserialize_key_metainfo(shared_dicts, shared_dict_column_names, td->keyMetainfo);
4357  // gather column defines
4358  const auto cds = getAllColumnMetadataForTable(td->tableId, false, false, false);
4359  std::map<const std::string, const ColumnDescriptor*> dict_root_cds;
4360  bool first = true;
4361  for (const auto cd : cds) {
4362  if (!(cd->isSystemCol || cd->isVirtualCol)) {
4363  const auto& ti = cd->columnType;
4364  if (!first) {
4365  os << ",";
4366  if (!multiline_formatting) {
4367  os << " ";
4368  }
4369  } else {
4370  first = false;
4371  }
4372  if (multiline_formatting) {
4373  os << "\n ";
4374  }
4375  // column name
4376  bool column_name_needs_quotes = is_reserved_sql_keyword(cd->columnName) ||
4377  contains_spaces(cd->columnName) ||
4379  if (!column_name_needs_quotes) {
4380  os << cd->columnName;
4381  } else {
4382  os << "\"" << cd->columnName << "\"";
4383  }
4384  // CHAR is perculiar... better dump it as TEXT(32) like \d does
4385  if (ti.get_type() == SQLTypes::kCHAR) {
4386  os << " "
4387  << "TEXT";
4388  } else if (ti.get_subtype() == SQLTypes::kCHAR) {
4389  os << " "
4390  << "TEXT[]";
4391  } else {
4392  os << " " << ti.get_type_name();
4393  }
4394  os << (ti.get_notnull() ? " NOT NULL" : "");
4395  if (shared_dict_column_names.find(cd->columnName) ==
4396  shared_dict_column_names.end()) {
4397  // avoids "Exception: Column ... shouldn't specify an encoding, it borrows it from
4398  // the referenced column"
4399  if (ti.is_string() || (ti.is_array() && ti.get_subtype() == kTEXT)) {
4400  auto size = ti.is_array() ? ti.get_logical_size() : ti.get_size();
4401  if (ti.get_compression() == kENCODING_DICT) {
4402  os << " ENCODING " << ti.get_compression_name() << "(" << (size * 8) << ")";
4403  } else {
4404  os << " ENCODING NONE";
4405  }
4406  } else if (ti.is_date_in_days() ||
4407  (ti.get_size() > 0 && ti.get_size() != ti.get_logical_size())) {
4408  const auto comp_param = ti.get_comp_param() ? ti.get_comp_param() : 32;
4409  os << " ENCODING " << ti.get_compression_name() << "(" << comp_param << ")";
4410  } else if (ti.is_geometry()) {
4411  if (ti.get_compression() == kENCODING_GEOINT) {
4412  os << " ENCODING " << ti.get_compression_name() << "(" << ti.get_comp_param()
4413  << ")";
4414  } else {
4415  os << " ENCODING NONE";
4416  }
4417  }
4418  }
4419  }
4420  }
4421  // gather SHARED DICTIONARYs
4422  if (shared_dicts.size()) {
4423  std::string comma;
4424  if (!multiline_formatting) {
4425  comma = ", ";
4426  } else {
4427  comma = ",\n ";
4428  }
4429  os << comma;
4430  os << boost::algorithm::join(shared_dicts, comma);
4431  }
4432  os << ")";
4433 
4434  std::vector<std::string> with_options;
4435  if (foreign_table) {
4436  if (multiline_formatting) {
4437  os << "\n";
4438  } else {
4439  os << " ";
4440  }
4441  os << "SERVER " << foreign_table->foreign_server->name;
4442 
4443  // gather WITH options ...
4444  for (const auto& [option, value] : foreign_table->options) {
4445  with_options.emplace_back(option + "='" + value + "'");
4446  }
4447  }
4448 
4449  if (dump_defaults || td->maxFragRows != DEFAULT_FRAGMENT_ROWS) {
4450  with_options.push_back("FRAGMENT_SIZE=" + std::to_string(td->maxFragRows));
4451  }
4452  if (!foreign_table && (dump_defaults || td->maxChunkSize != DEFAULT_MAX_CHUNK_SIZE)) {
4453  with_options.push_back("MAX_CHUNK_SIZE=" + std::to_string(td->maxChunkSize));
4454  }
4455  if (!foreign_table && (dump_defaults || td->fragPageSize != DEFAULT_PAGE_SIZE)) {
4456  with_options.push_back("PAGE_SIZE=" + std::to_string(td->fragPageSize));
4457  }
4458  if (!foreign_table && (dump_defaults || td->maxRows != DEFAULT_MAX_ROWS)) {
4459  with_options.push_back("MAX_ROWS=" + std::to_string(td->maxRows));
4460  }
4461  if (dump_defaults || td->maxRollbackEpochs != DEFAULT_MAX_ROLLBACK_EPOCHS) {
4462  with_options.push_back("MAX_ROLLBACK_EPOCHS=" +
4464  }
4465  if (!foreign_table && (dump_defaults || !td->hasDeletedCol)) {
4466  with_options.push_back(td->hasDeletedCol ? "VACUUM='DELAYED'" : "VACUUM='IMMEDIATE'");
4467  }
4468  if (!foreign_table && !td->partitions.empty()) {
4469  with_options.push_back("PARTITIONS='" + td->partitions + "'");
4470  }
4471  if (!foreign_table && td->nShards > 0) {
4472  const auto shard_cd = getMetadataForColumn(td->tableId, td->shardedColumnId);
4473  CHECK(shard_cd);
4474  with_options.push_back(
4475  "SHARD_COUNT=" +
4476  std::to_string(td->nShards * std::max(g_leaf_count, static_cast<size_t>(1))));
4477  }
4478  if (!foreign_table && td->sortedColumnId > 0) {
4479  const auto sort_cd = getMetadataForColumn(td->tableId, td->sortedColumnId);
4480  CHECK(sort_cd);
4481  with_options.push_back("SORT_COLUMN='" + sort_cd->columnName + "'");
4482  }
4483 
4484  if (!with_options.empty()) {
4485  if (!multiline_formatting) {
4486  os << " ";
4487  } else {
4488  os << "\n";
4489  }
4490  os << "WITH (" + boost::algorithm::join(with_options, ", ") + ")";
4491  }
4492  os << ";";
4493  return os.str();
4494 }
4495 
4497  const bool if_not_exists) {
4498  if (getMetadataForTable(name, false)) {
4499  if (if_not_exists) {
4500  return false;
4501  }
4502  throw std::runtime_error("Table or View with name \"" + name + "\" already exists.");
4503  }
4504  return true;
4505 }
4506 
4507 std::vector<const TableDescriptor*> Catalog::getAllForeignTablesForRefresh() const {
4508  cat_read_lock read_lock(this);
4509  std::vector<const TableDescriptor*> tables;
4510  for (auto entry : tableDescriptorMapById_) {
4511  auto table_descriptor = entry.second;
4512  if (table_descriptor->storageType == StorageType::FOREIGN_TABLE) {
4513  auto foreign_table = dynamic_cast<foreign_storage::ForeignTable*>(table_descriptor);
4514  CHECK(foreign_table);
4515  auto timing_type_entry = foreign_table->options.find(
4517  CHECK(timing_type_entry != foreign_table->options.end());
4518  int64_t current_time = std::chrono::duration_cast<std::chrono::seconds>(
4519  std::chrono::system_clock::now().time_since_epoch())
4520  .count();
4521  if (timing_type_entry->second ==
4523  foreign_table->next_refresh_time <= current_time) {
4524  tables.emplace_back(foreign_table);
4525  }
4526  }
4527  }
4528  return tables;
4529 }
4530 
4531 void Catalog::updateForeignTableRefreshTimes(const int32_t table_id) {
4532  cat_write_lock write_lock(this);
4534  CHECK(tableDescriptorMapById_.find(table_id) != tableDescriptorMapById_.end());
4535  auto table_descriptor = tableDescriptorMapById_.find(table_id)->second;
4536  CHECK(table_descriptor);
4537  auto foreign_table = dynamic_cast<foreign_storage::ForeignTable*>(table_descriptor);
4538  CHECK(foreign_table);
4539  int64_t current_time = std::chrono::duration_cast<std::chrono::seconds>(
4540  std::chrono::system_clock::now().time_since_epoch())
4541  .count();
4542  auto last_refresh_time = current_time;
4543  auto next_refresh_time = get_next_refresh_time(*foreign_table);
4545  "UPDATE omnisci_foreign_tables SET last_refresh_time = ?, next_refresh_time = ? "
4546  "WHERE table_id = ?",
4547  std::vector<std::string>{std::to_string(last_refresh_time),
4548  std::to_string(next_refresh_time),
4549  std::to_string(foreign_table->tableId)});
4550  foreign_table->last_refresh_time = last_refresh_time;
4551  foreign_table->next_refresh_time = next_refresh_time;
4552 }
4553 
4554 // TODO(Misiu): This function should be merged with setForeignServerOptions via
4555 // inheritance rather than replication similar functions.
4556 void Catalog::setForeignTableOptions(const std::string& table_name,
4557  foreign_storage::OptionsMap& options_map,
4558  bool clear_existing_options) {
4559  cat_write_lock write_lock(this);
4560  // update in-memory table
4561  auto foreign_table = getForeignTableUnlocked(table_name);
4562  auto saved_options = foreign_table->options;
4563  foreign_table->populateOptionsMap(std::move(options_map), clear_existing_options);
4564  try {
4565  foreign_table->validateOptionValues();
4566  } catch (const std::exception& e) {
4567  // validation did not succeed:
4568  // revert to saved options & throw exception
4569  foreign_table->options = saved_options;
4570  throw;
4571  }
4573  foreign_table, "options", foreign_table->getOptionsAsJsonString());
4574 }
4575 
4577  const std::string& property,
4578  const std::string& value) {
4581  "SELECT table_id from omnisci_foreign_tables where table_id = ?",
4582  std::vector<std::string>{std::to_string(table->tableId)});
4583  auto num_rows = sqliteConnector_.getNumRows();
4584  if (num_rows > 0) {
4585  CHECK_EQ(size_t(1), num_rows);
4587  "UPDATE omnisci_foreign_tables SET " + property + " = ? WHERE table_id = ?",
4588  std::vector<std::string>{value, std::to_string(table->tableId)});
4589  } else {
4590  throw std::runtime_error{"Can not change property \"" + property +
4591  "\" for foreign table." + " Foreign table \"" +
4592  table->tableName + "\" is not found."};
4593  }
4594 }
4595 } // namespace Catalog_Namespace
std::string dumpCreateTable(const TableDescriptor *td, bool multiline_formatting=true, bool dump_defaults=false) const
Definition: Catalog.cpp:4333
std::list< const ColumnDescriptor * > getAllColumnMetadataForTableUnlocked(const int tableId, const bool fetchSystemColumns, const bool fetchVirtualColumns, const bool fetchPhysicalColumns) const
Definition: Catalog.cpp:1730
int32_t maxRollbackEpochs
void serializeTableJsonUnlocked(const TableDescriptor *td, const std::list< ColumnDescriptor > &cds) const
Definition: Catalog.cpp:2364
int64_t get_next_refresh_time(const foreign_storage::ForeignTable &foreign_table)
Definition: Catalog.cpp:2135
void instantiateFragmenter(TableDescriptor *td) const
Definition: Catalog.cpp:1347
const Parser::SharedDictionaryDef compress_reference_path(Parser::SharedDictionaryDef cur_node, const std::vector< Parser::SharedDictionaryDef > &shared_dict_defs)
static std::set< std::string > reserved_keywords
const foreign_storage::ForeignTable * getForeignTable(const std::string &tableName) const
Definition: Catalog.cpp:1397
Data_Namespace::MemoryLevel memoryLevel
Definition: UpdelRoll.h:65
std::string virtualExpr
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:312
void set_compression(EncodingType c)
Definition: sqltypes.h:411
void set_size(int s)
Definition: sqltypes.h:409
void deleteMetadataForDashboards(const std::vector< int32_t > ids, const UserMetadata &user)
Definition: Catalog.cpp:1557
#define CHECK_EQ(x, y)
Definition: Logger.h:205
void executeDropTableSqliteQueries(const TableDescriptor *td)
Definition: Catalog.cpp:3446
const int MAPD_TEMP_TABLE_START_ID
Definition: Catalog.cpp:102
std::string partitions
std::vector< int > ChunkKey
Definition: types.h:37