OmniSciDB  8a228a1076
Catalog.h
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
29 #pragma once
30 
31 #include <atomic>
32 #include <cstdint>
33 #include <ctime>
34 #include <limits>
35 #include <list>
36 #include <map>
37 #include <mutex>
38 #include <string>
39 #include <utility>
40 #include <vector>
41 
42 #include "Calcite/Calcite.h"
45 #include "Catalog/DictDescriptor.h"
46 #include "Catalog/ForeignServer.h"
47 #include "Catalog/ForeignTable.h"
48 #include "Catalog/LinkDescriptor.h"
49 #include "Catalog/SessionInfo.h"
50 #include "Catalog/SysCatalog.h"
52 #include "Catalog/Types.h"
53 #include "DataMgr/DataMgr.h"
54 #include "LockMgr/LockMgrImpl.h"
58 
59 #include "LeafHostInfo.h"
60 
62 
63 namespace Parser {
64 
66 
67 } // namespace Parser
68 
69 class TableArchiver;
70 
71 // SPI means Sequential Positional Index which is equivalent to the input index in a
72 // RexInput node
73 #define SPIMAP_MAGIC1 (std::numeric_limits<unsigned>::max() / 4)
74 #define SPIMAP_MAGIC2 8
75 #define SPIMAP_GEO_PHYSICAL_INPUT(c, i) \
76  (SPIMAP_MAGIC1 + (unsigned)(SPIMAP_MAGIC2 * ((c) + 1) + (i)))
77 
78 namespace Catalog_Namespace {
79 
86 class Catalog final {
87  public:
96  Catalog(const std::string& basePath,
97  const DBMetadata& curDB,
98  std::shared_ptr<Data_Namespace::DataMgr> dataMgr,
99  const std::vector<LeafHostInfo>& string_dict_hosts,
100  std::shared_ptr<Calcite> calcite,
101  bool is_new_db);
102 
109  ~Catalog();
110 
111  static void expandGeoColumn(const ColumnDescriptor& cd,
112  std::list<ColumnDescriptor>& columns);
113  void createTable(TableDescriptor& td,
114  const std::list<ColumnDescriptor>& columns,
115  const std::vector<Parser::SharedDictionaryDef>& shared_dict_defs,
116  bool isLogicalTable);
117  void createShardedTable(
118  TableDescriptor& td,
119  const std::list<ColumnDescriptor>& columns,
120  const std::vector<Parser::SharedDictionaryDef>& shared_dict_defs);
121  int32_t createDashboard(DashboardDescriptor& vd);
122  void replaceDashboard(DashboardDescriptor& vd);
123  std::string createLink(LinkDescriptor& ld, size_t min_length);
124  void dropTable(const TableDescriptor* td);
125  void truncateTable(const TableDescriptor* td);
126  void renameTable(const TableDescriptor* td, const std::string& newTableName);
127  void renameColumn(const TableDescriptor* td,
128  const ColumnDescriptor* cd,
129  const std::string& newColumnName);
130  void addColumn(const TableDescriptor& td, ColumnDescriptor& cd);
131  void dropColumn(const TableDescriptor& td, const ColumnDescriptor& cd);
132  void removeChunks(const int table_id);
133  void removeFragmenterForTable(const int table_id);
134 
135  const std::map<int, const ColumnDescriptor*> getDictionaryToColumnMapping();
136 
145  const TableDescriptor* getMetadataForTable(const std::string& tableName,
146  const bool populateFragmenter = true) const;
147  const TableDescriptor* getMetadataForTableImpl(int tableId,
148  const bool populateFragmenter) const;
149  const TableDescriptor* getMetadataForTable(int tableId,
150  bool populateFragmenter = true) const;
151 
152  const ColumnDescriptor* getMetadataForColumn(int tableId,
153  const std::string& colName) const;
154  const ColumnDescriptor* getMetadataForColumn(int tableId, int columnId) const;
155  const ColumnDescriptor* getMetadataForColumnUnlocked(int tableId, int columnId) const;
156 
157  const int getColumnIdBySpi(const int tableId, const size_t spi) const;
158  const ColumnDescriptor* getMetadataForColumnBySpi(const int tableId,
159  const size_t spi) const;
160 
161  const DashboardDescriptor* getMetadataForDashboard(const std::string& userId,
162  const std::string& dashName) const;
163 
164  const DashboardDescriptor* getMetadataForDashboard(const int32_t dashboard_id) const;
165  void deleteMetadataForDashboards(const std::vector<int32_t> ids,
166  const UserMetadata& user);
167 
168  const LinkDescriptor* getMetadataForLink(const std::string& link) const;
169  const LinkDescriptor* getMetadataForLink(int linkId) const;
170 
179  std::list<const ColumnDescriptor*> getAllColumnMetadataForTable(
180  const int tableId,
181  const bool fetchSystemColumns,
182  const bool fetchVirtualColumns,
183  const bool fetchPhysicalColumns) const;
187  std::list<const ColumnDescriptor*> getAllColumnMetadataForTableUnlocked(
188  const int tableId,
189  const bool fetchSystemColumns,
190  const bool fetchVirtualColumns,
191  const bool fetchPhysicalColumns) const;
192 
193  std::list<const TableDescriptor*> getAllTableMetadata() const;
194  std::list<const DashboardDescriptor*> getAllDashboardsMetadata() const;
195  const DBMetadata& getCurrentDB() const { return currentDB_; }
196  Data_Namespace::DataMgr& getDataMgr() const { return *dataMgr_; }
197  std::shared_ptr<Calcite> getCalciteMgr() const { return calciteMgr_; }
198  const std::string& getBasePath() const { return basePath_; }
199 
200  const DictDescriptor* getMetadataForDict(int dict_ref, bool loadDict = true) const;
201  const DictDescriptor* getMetadataForDictUnlocked(int dict_ref, bool loadDict) const;
202 
203  const std::vector<LeafHostInfo>& getStringDictionaryHosts() const;
204 
205  const ColumnDescriptor* getShardColumnMetadataForTable(const TableDescriptor* td) const;
206 
207  std::vector<const TableDescriptor*> getPhysicalTablesDescriptors(
208  const TableDescriptor* logicalTableDesc) const;
209 
218  std::vector<std::string> getTableNamesForUser(
219  const UserMetadata& user,
220  const GetTablesType get_tables_type) const;
221 
222  int32_t getTableEpoch(const int32_t db_id, const int32_t table_id) const;
223  void setTableEpoch(const int db_id, const int table_id, const int new_epoch);
224  int getDatabaseId() const { return currentDB_.dbId; }
225  SqliteConnector& getSqliteConnector() { return sqliteConnector_; }
226  void roll(const bool forward);
227  DictRef addDictionary(ColumnDescriptor& cd);
228  void delDictionary(const ColumnDescriptor& cd);
229  void getDictionary(const ColumnDescriptor& cd,
230  std::map<int, StringDictionary*>& stringDicts);
231 
232  static void set(const std::string& dbName, std::shared_ptr<Catalog> cat);
233  static std::shared_ptr<Catalog> get(const std::string& dbName);
234  static std::shared_ptr<Catalog> get(const int32_t db_id);
235  static std::shared_ptr<Catalog> get(const std::string& basePath,
236  const DBMetadata& curDB,
237  std::shared_ptr<Data_Namespace::DataMgr> dataMgr,
238  const std::vector<LeafHostInfo>& string_dict_hosts,
239  std::shared_ptr<Calcite> calcite,
240  bool is_new_db);
241  static void remove(const std::string& dbName);
242 
243  const bool checkMetadataForDeletedRecs(const TableDescriptor* td, int column_id) const;
244  const ColumnDescriptor* getDeletedColumn(const TableDescriptor* td) const;
245  const ColumnDescriptor* getDeletedColumnIfRowsDeleted(const TableDescriptor* td) const;
246 
247  void setDeletedColumn(const TableDescriptor* td, const ColumnDescriptor* cd);
248  void setDeletedColumnUnlocked(const TableDescriptor* td, const ColumnDescriptor* cd);
249  int getLogicalTableId(const int physicalTableId) const;
250  void checkpoint(const int logicalTableId) const;
251  std::string name() const { return getCurrentDB().dbName; }
252  void eraseDBData();
253  void eraseTablePhysicalData(const TableDescriptor* td);
254  void vacuumDeletedRows(const TableDescriptor* td) const;
255  void vacuumDeletedRows(const int logicalTableId) const;
256  void setForReload(const int32_t tableId);
257 
258  std::vector<std::string> getTableDataDirectories(const TableDescriptor* td) const;
259  std::vector<std::string> getTableDictDirectories(const TableDescriptor* td) const;
260  std::string getColumnDictDirectory(const ColumnDescriptor* cd) const;
261  std::string dumpSchema(const TableDescriptor* td) const;
262  std::string dumpCreateTable(const TableDescriptor* td,
263  bool multiline_formatting = true,
264  bool dump_defaults = false) const;
265 
273  static const std::string getForeignTableSchema(bool if_not_exists = false);
274 
282  static const std::string getForeignServerSchema(bool if_not_exists = false);
283 
293  void createForeignServer(std::unique_ptr<foreign_storage::ForeignServer> foreign_server,
294  bool if_not_exists);
295 
303  const foreign_storage::ForeignServer* getForeignServer(
304  const std::string& server_name) const;
305 
315  const std::unique_ptr<const foreign_storage::ForeignServer> getForeignServerFromStorage(
316  const std::string& server_name);
317 
324  void changeForeignServerOwner(const std::string& server_name, const int new_owner_id);
325 
332  void setForeignServerDataWrapper(const std::string& server_name,
333  const std::string& data_wrapper);
340  void setForeignServerOptions(const std::string& server_name,
341  const std::string& options);
348  void renameForeignServer(const std::string& server_name, const std::string& name);
349 
355  void dropForeignServer(const std::string& server_name);
356 
369  void getForeignServersForUser(
370  const rapidjson::Value* filters,
371  const UserMetadata& user,
372  std::vector<const foreign_storage::ForeignServer*>& results);
373 
377  void createDefaultServersIfNotExists();
378 
389  bool validateNonExistentTableOrView(const std::string& name, const bool if_not_exists);
390 
391  protected:
392  void CheckAndExecuteMigrations();
393  void CheckAndExecuteMigrationsPostBuildMaps();
394  void updateDictionaryNames();
395  void updateTableDescriptorSchema();
396  void updateFixlenArrayColumns();
397  void updateGeoColumns();
398  void updateFrontendViewSchema();
399  void updateLinkSchema();
400  void updateFrontendViewAndLinkUsers();
401  void updateLogicalToPhysicalTableLinkSchema();
402  void updateLogicalToPhysicalTableMap(const int32_t logical_tb_id);
403  void updateDictionarySchema();
404  void updatePageSize();
405  void updateDeletedColumnIndicator();
406  void updateFrontendViewsToDashboards();
407  void createFsiSchemasAndDefaultServers();
408  void dropFsiSchemasAndTables();
409  void recordOwnershipOfObjectsInObjectPermissions();
410  void checkDateInDaysColumnMigration();
411  void createDashboardSystemRoles();
412  void buildMaps();
413  void addTableToMap(const TableDescriptor* td,
414  const std::list<ColumnDescriptor>& columns,
415  const std::list<DictDescriptor>& dicts);
416  void addReferenceToForeignDict(ColumnDescriptor& referencing_column,
417  Parser::SharedDictionaryDef shared_dict_def,
418  const bool persist_reference);
419  bool setColumnSharedDictionary(
420  ColumnDescriptor& cd,
421  std::list<ColumnDescriptor>& cdd,
422  std::list<DictDescriptor>& dds,
423  const TableDescriptor td,
424  const std::vector<Parser::SharedDictionaryDef>& shared_dict_defs);
425  void setColumnDictionary(ColumnDescriptor& cd,
426  std::list<DictDescriptor>& dds,
427  const TableDescriptor& td,
428  const bool isLogicalTable);
429  void addFrontendViewToMap(DashboardDescriptor& vd);
430  void addFrontendViewToMapNoLock(DashboardDescriptor& vd);
431  void addLinkToMap(LinkDescriptor& ld);
432  void removeTableFromMap(const std::string& tableName,
433  const int tableId,
434  const bool is_on_error = false);
435  void doDropTable(const TableDescriptor* td);
436  void executeDropTableSqliteQueries(const TableDescriptor* td);
437  void doTruncateTable(const TableDescriptor* td);
438  void renamePhysicalTable(const TableDescriptor* td, const std::string& newTableName);
439  void instantiateFragmenter(TableDescriptor* td) const;
440  void getAllColumnMetadataForTableImpl(const TableDescriptor* td,
441  std::list<const ColumnDescriptor*>& colDescs,
442  const bool fetchSystemColumns,
443  const bool fetchVirtualColumns,
444  const bool fetchPhysicalColumns) const;
445  std::string calculateSHA1(const std::string& data);
446  std::string generatePhysicalTableName(const std::string& logicalTableName,
447  const int32_t& shardNumber);
448  std::vector<DBObject> parseDashboardObjects(const std::string& view_meta,
449  const int& user_id);
450  void createOrUpdateDashboardSystemRole(const std::string& view_meta,
451  const int32_t& user_id,
452  const std::string& dash_role_name);
453 
454  const int getColumnIdBySpiUnlocked(const int table_id, const size_t spi) const;
455 
456  void serializeTableJsonUnlocked(const TableDescriptor* td,
457  const std::list<ColumnDescriptor>& cds) const;
458  void dropTableFromJsonUnlocked(const std::string& table_name) const;
459 
460  std::string basePath_;
471 
474  std::shared_ptr<Data_Namespace::DataMgr> dataMgr_;
475 
476  const std::vector<LeafHostInfo> string_dict_hosts_;
477  std::shared_ptr<Calcite> calciteMgr_;
478 
480  static const std::string
481  physicalTableNameTag_; // extra component added to the name of each physical table
484 
485  // this tuple is for rolling forw/back once after ALTER ADD/DEL/MODIFY columns
486  // succeeds/fails
487  // get(0) = old ColumnDescriptor*
488  // get(1) = new ColumnDescriptor*
490  std::vector<std::pair<ColumnDescriptor*, ColumnDescriptor*>>;
492 
493  private:
494  static std::map<std::string, std::shared_ptr<Catalog>> mapd_cat_map_;
496  void adjustAlteredTableFiles(
497  const std::string& temp_data_dir,
498  const std::unordered_map<int, int>& all_column_ids_map) const;
499  void renameTableDirectories(const std::string& temp_data_dir,
500  const std::vector<std::string>& target_paths,
501  const std::string& name_prefix) const;
502  void buildForeignServerMap();
503  void addForeignTableDetails();
504 
505  void setForeignServerProperty(const std::string& server_name,
506  const std::string& property,
507  const std::string& value);
508 
513  void createForeignServerNoLocks(
514  std::unique_ptr<foreign_storage::ForeignServer> foreign_server,
515  bool if_not_exists);
516 
517  public:
518  mutable std::mutex sqliteMutex_;
520  mutable std::atomic<std::thread::id> thread_holding_sqlite_lock;
521  mutable std::atomic<std::thread::id> thread_holding_write_lock;
522  // assuming that you never call into a catalog from another catalog via the same thread
523  static thread_local bool thread_holds_read_lock;
524 };
525 
526 } // namespace Catalog_Namespace
std::unordered_map< const TableDescriptor *, const ColumnDescriptor * > DeletedColumnPerTableMap
Definition: Types.h:46
mapd_shared_mutex sharedMutex_
Definition: Catalog.h:519
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:86
ColumnDescriptorMap columnDescriptorMap_
Definition: Catalog.h:463
std::map< ColumnKey, ColumnDescriptor * > ColumnDescriptorMap
Definition: Types.h:37
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:196
SqliteConnector sqliteConnector_
Definition: Catalog.h:472
const std::vector< LeafHostInfo > string_dict_hosts_
Definition: Catalog.h:476
std::shared_ptr< Calcite > getCalciteMgr() const
Definition: Catalog.h:197
DictDescriptorMapById dictDescriptorMapByRef_
Definition: Catalog.h:465
std::map< std::string, std::shared_ptr< foreign_storage::ForeignServer > > ForeignServerMap
Definition: Types.h:48
std::shared_ptr< Data_Namespace::DataMgr > dataMgr_
Definition: Catalog.h:474
std::atomic< std::thread::id > thread_holding_sqlite_lock
Definition: Catalog.h:520
DeletedColumnPerTableMap deletedColumnPerTable_
Definition: Catalog.h:495
ColumnDescriptorMapById columnDescriptorMapById_
Definition: Catalog.h:464
DashboardDescriptorMap dashboardDescriptorMap_
Definition: Catalog.h:466
std::map< int32_t, std::vector< int32_t > > LogicalToPhysicalTableMapById
Definition: Types.h:35
int getDatabaseId() const
Definition: Catalog.h:224
This file contains the class specification and related data structures for SysCatalog.
std::string cat(Ts &&... args)
std::map< ColumnIdKey, ColumnDescriptor * > ColumnDescriptorMapById
Definition: Types.h:39
std::map< int, TableDescriptor * > TableDescriptorMapById
Definition: Types.h:34
std::map< std::string, LinkDescriptor * > LinkDescriptorMap
Definition: Types.h:43
GetTablesType
Definition: Catalog.h:61
std::shared_timed_mutex mapd_shared_mutex
std::vector< std::pair< ColumnDescriptor *, ColumnDescriptor * > > ColumnDescriptorsForRoll
Definition: Catalog.h:490
TableDescriptorMapById tableDescriptorMapById_
Definition: Catalog.h:462
specifies the content in-memory of a row in the column metadata table
std::map< int, std::shared_ptr< foreign_storage::ForeignServer > > ForeignServerMapById
Definition: Types.h:50
const DBMetadata & getCurrentDB() const
Definition: Catalog.h:195
ForeignServerMapById foreignServerMapById_
Definition: Catalog.h:470
ForeignServerMap foreignServerMap_
Definition: Catalog.h:469
std::map< std::string, std::shared_ptr< DashboardDescriptor > > DashboardDescriptorMap
Definition: Types.h:42
std::shared_ptr< Calcite > calciteMgr_
Definition: Catalog.h:477
static const std::string physicalTableNameTag_
Definition: Catalog.h:481
const std::string & getBasePath() const
Definition: Catalog.h:198
std::string name() const
Definition: Catalog.h:251
Definition: Catalog.h:63
std::map< int, LinkDescriptor * > LinkDescriptorMapById
Definition: Types.h:44
Descriptor for a dictionary for a string columne.
std::atomic< std::thread::id > thread_holding_write_lock
Definition: Catalog.h:521
SqliteConnector & getSqliteConnector()
Definition: Catalog.h:225
specifies the content in-memory of a row in the table metadata table
LinkDescriptorMapById linkDescriptorMapById_
Definition: Catalog.h:468
static std::map< std::string, std::shared_ptr< Catalog > > mapd_cat_map_
Definition: Catalog.h:494
ColumnDescriptorsForRoll columnDescriptorsForRoll
Definition: Catalog.h:491
LogicalToPhysicalTableMapById logicalToPhysicalTableMapById_
Definition: Catalog.h:479
TableDescriptorMap tableDescriptorMap_
Definition: Catalog.h:461
static thread_local bool thread_holds_read_lock
Definition: Catalog.h:523
std::map< DictRef, std::unique_ptr< DictDescriptor > > DictDescriptorMapById
Definition: Types.h:40
std::map< std::string, TableDescriptor * > TableDescriptorMap
Definition: Types.h:33
LinkDescriptorMap linkDescriptorMap_
Definition: Catalog.h:467