OmniSciDB  04ee39c94c
Catalog.h
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
29 #ifndef CATALOG_H
30 #define CATALOG_H
31 
32 #include <atomic>
33 #include <cstdint>
34 #include <ctime>
35 #include <limits>
36 #include <list>
37 #include <map>
38 #include <mutex>
39 #include <string>
40 #include <utility>
41 #include <vector>
42 
43 #include "ColumnDescriptor.h"
44 #include "DashboardDescriptor.h"
45 #include "DictDescriptor.h"
46 #include "LinkDescriptor.h"
47 #include "TableDescriptor.h"
48 
49 #include "../DataMgr/DataMgr.h"
50 #include "../QueryEngine/CompilationOptions.h"
51 #include "../SqliteConnector/SqliteConnector.h"
52 #include "LeafHostInfo.h"
53 
54 #include "../Calcite/Calcite.h"
55 #include "../Shared/mapd_shared_mutex.h"
56 
57 #include "SessionInfo.h"
58 #include "SysCatalog.h"
59 
60 namespace Parser {
61 
63 
64 } // namespace Parser
65 
66 // SPI means Sequential Positional Index which is equivalent to the input index in a
67 // RexInput node
68 #define SPIMAP_MAGIC1 (std::numeric_limits<unsigned>::max() / 4)
69 #define SPIMAP_MAGIC2 8
70 #define SPIMAP_GEO_PHYSICAL_INPUT(c, i) \
71  (SPIMAP_MAGIC1 + (unsigned)(SPIMAP_MAGIC2 * ((c) + 1) + (i)))
72 
73 namespace Catalog_Namespace {
74 
81 class Catalog {
82  public:
91  Catalog(const std::string& basePath,
92  const DBMetadata& curDB,
93  std::shared_ptr<Data_Namespace::DataMgr> dataMgr,
94  const std::vector<LeafHostInfo>& string_dict_hosts,
95  std::shared_ptr<Calcite> calcite,
96  bool is_new_db);
97 
104  virtual ~Catalog();
105 
106  static void expandGeoColumn(const ColumnDescriptor& cd,
107  std::list<ColumnDescriptor>& columns);
108  void createTable(TableDescriptor& td,
109  const std::list<ColumnDescriptor>& columns,
110  const std::vector<Parser::SharedDictionaryDef>& shared_dict_defs,
111  bool isLogicalTable);
112  void createShardedTable(
113  TableDescriptor& td,
114  const std::list<ColumnDescriptor>& columns,
115  const std::vector<Parser::SharedDictionaryDef>& shared_dict_defs);
116  int32_t createDashboard(DashboardDescriptor& vd);
117  void replaceDashboard(DashboardDescriptor& vd);
118  std::string createLink(LinkDescriptor& ld, size_t min_length);
119  void dropTable(const TableDescriptor* td);
120  void truncateTable(const TableDescriptor* td);
121  void renameTable(const TableDescriptor* td, const std::string& newTableName);
122  void renameColumn(const TableDescriptor* td,
123  const ColumnDescriptor* cd,
124  const std::string& newColumnName);
125  void addColumn(const TableDescriptor& td, ColumnDescriptor& cd);
126  void removeChunks(const int table_id);
127 
136  const TableDescriptor* getMetadataForTable(const std::string& tableName,
137  const bool populateFragmenter = true) const;
138  const TableDescriptor* getMetadataForTableImpl(int tableId,
139  const bool populateFragmenter) const;
140  const TableDescriptor* getMetadataForTable(int tableId) const;
141 
142  const ColumnDescriptor* getMetadataForColumn(int tableId,
143  const std::string& colName) const;
144  const ColumnDescriptor* getMetadataForColumn(int tableId, int columnId) const;
145 
146  const int getColumnIdBySpi(const int tableId, const size_t spi) const;
147  const ColumnDescriptor* getMetadataForColumnBySpi(const int tableId,
148  const size_t spi) const;
149 
150  const DashboardDescriptor* getMetadataForDashboard(const std::string& userId,
151  const std::string& dashName) const;
152  void deleteMetadataForDashboard(const std::string& userId, const std::string& dashName);
153 
154  const DashboardDescriptor* getMetadataForDashboard(const int32_t dashboard_id) const;
155  void deleteMetadataForDashboard(const int32_t dashboard_id);
156 
157  const LinkDescriptor* getMetadataForLink(const std::string& link) const;
158  const LinkDescriptor* getMetadataForLink(int linkId) const;
159 
168  std::list<const ColumnDescriptor*> getAllColumnMetadataForTable(
169  const int tableId,
170  const bool fetchSystemColumns,
171  const bool fetchVirtualColumns,
172  const bool fetchPhysicalColumns) const;
173 
174  std::list<const TableDescriptor*> getAllTableMetadata() const;
175  std::list<const DashboardDescriptor*> getAllDashboardsMetadata() const;
176  const DBMetadata& getCurrentDB() const { return currentDB_; }
177  Data_Namespace::DataMgr& getDataMgr() const { return *dataMgr_; }
178  std::shared_ptr<Calcite> getCalciteMgr() const { return calciteMgr_; }
179  const std::string& getBasePath() const { return basePath_; }
180 
181  const DictDescriptor* getMetadataForDict(int dict_ref, bool loadDict = true) const;
182 
183  const std::vector<LeafHostInfo>& getStringDictionaryHosts() const;
184 
185  const ColumnDescriptor* getShardColumnMetadataForTable(const TableDescriptor* td) const;
186 
187  std::vector<const TableDescriptor*> getPhysicalTablesDescriptors(
188  const TableDescriptor* logicalTableDesc) const;
189 
190  int32_t getTableEpoch(const int32_t db_id, const int32_t table_id) const;
191  void setTableEpoch(const int db_id, const int table_id, const int new_epoch);
192  int getDatabaseId() const { return currentDB_.dbId; }
193 
194  SqliteConnector& getSqliteConnector() { return sqliteConnector_; }
195  void roll(const bool forward);
196  DictRef addDictionary(ColumnDescriptor& cd);
197  void delDictionary(const ColumnDescriptor& cd);
198  void getDictionary(const ColumnDescriptor& cd,
199  std::map<int, StringDictionary*>& stringDicts);
200 
201  static void set(const std::string& dbName, std::shared_ptr<Catalog> cat);
202  static std::shared_ptr<Catalog> get(const std::string& dbName);
203  static std::shared_ptr<Catalog> get(const std::string& basePath,
204  const DBMetadata& curDB,
205  std::shared_ptr<Data_Namespace::DataMgr> dataMgr,
206  const std::vector<LeafHostInfo>& string_dict_hosts,
207  std::shared_ptr<Calcite> calcite,
208  bool is_new_db);
209  static void remove(const std::string& dbName);
210 
211  const bool checkMetadataForDeletedRecs(int dbId, int tableId, int columnId) const;
212  const ColumnDescriptor* getDeletedColumn(const TableDescriptor* td) const;
213  const ColumnDescriptor* getDeletedColumnIfRowsDeleted(const TableDescriptor* td) const;
214 
215  void setDeletedColumn(const TableDescriptor* td, const ColumnDescriptor* cd);
216  void setDeletedColumnUnlocked(const TableDescriptor* td, const ColumnDescriptor* cd);
217  int getLogicalTableId(const int physicalTableId) const;
218  void checkpoint(const int logicalTableId) const;
219  std::string name() const { return getCurrentDB().dbName; }
220  void eraseDBData();
221  void eraseTablePhysicalData(const TableDescriptor* td);
222  void vacuumDeletedRows(const TableDescriptor* td) const;
223  void vacuumDeletedRows(const int logicalTableId) const;
224 
225  protected:
226  typedef std::map<std::string, TableDescriptor*> TableDescriptorMap;
227  typedef std::map<int, TableDescriptor*> TableDescriptorMapById;
228  typedef std::map<int32_t, std::vector<int32_t>> LogicalToPhysicalTableMapById;
229  typedef std::tuple<int, std::string> ColumnKey;
230  typedef std::map<ColumnKey, ColumnDescriptor*> ColumnDescriptorMap;
231  typedef std::tuple<int, int> ColumnIdKey;
232  typedef std::map<ColumnIdKey, ColumnDescriptor*> ColumnDescriptorMapById;
233  typedef std::map<DictRef, std::unique_ptr<DictDescriptor>> DictDescriptorMapById;
234  typedef std::map<std::string, std::shared_ptr<DashboardDescriptor>>
236  typedef std::map<std::string, LinkDescriptor*> LinkDescriptorMap;
237  typedef std::map<int, LinkDescriptor*> LinkDescriptorMapById;
238  typedef std::unordered_map<const TableDescriptor*, const ColumnDescriptor*>
240 
241  void CheckAndExecuteMigrations();
242  void CheckAndExecuteMigrationsPostBuildMaps();
243  void updateDictionaryNames();
244  void updateTableDescriptorSchema();
245  void updateFixlenArrayColumns();
246  void updateFrontendViewSchema();
247  void updateLinkSchema();
248  void updateFrontendViewAndLinkUsers();
249  void updateLogicalToPhysicalTableLinkSchema();
250  void updateLogicalToPhysicalTableMap(const int32_t logical_tb_id);
251  void updateDictionarySchema();
252  void updatePageSize();
253  void updateDeletedColumnIndicator();
254  void updateFrontendViewsToDashboards();
255  void recordOwnershipOfObjectsInObjectPermissions();
256  void checkDateInDaysColumnMigration();
257  void createDashboardSystemRoles();
258  void buildMaps();
259  void addTableToMap(TableDescriptor& td,
260  const std::list<ColumnDescriptor>& columns,
261  const std::list<DictDescriptor>& dicts);
262  void addReferenceToForeignDict(ColumnDescriptor& referencing_column,
263  Parser::SharedDictionaryDef shared_dict_def);
264  bool setColumnSharedDictionary(
265  ColumnDescriptor& cd,
266  std::list<ColumnDescriptor>& cdd,
267  std::list<DictDescriptor>& dds,
268  const TableDescriptor td,
269  const std::vector<Parser::SharedDictionaryDef>& shared_dict_defs);
270  void setColumnDictionary(ColumnDescriptor& cd,
271  std::list<DictDescriptor>& dds,
272  const TableDescriptor& td,
273  const bool isLogicalTable);
274  void addFrontendViewToMap(DashboardDescriptor& vd);
275  void addFrontendViewToMapNoLock(DashboardDescriptor& vd);
276  void addLinkToMap(LinkDescriptor& ld);
277  void removeTableFromMap(const std::string& tableName,
278  const int tableId,
279  const bool is_on_error = false);
280  void doDropTable(const TableDescriptor* td);
281  void doTruncateTable(const TableDescriptor* td);
282  void renamePhysicalTable(const TableDescriptor* td, const std::string& newTableName);
283  void instantiateFragmenter(TableDescriptor* td) const;
284  void getAllColumnMetadataForTable(const TableDescriptor* td,
285  std::list<const ColumnDescriptor*>& colDescs,
286  const bool fetchSystemColumns,
287  const bool fetchVirtualColumns,
288  const bool fetchPhysicalColumns) const;
289  std::string calculateSHA1(const std::string& data);
290  std::string generatePhysicalTableName(const std::string& logicalTableName,
291  const int32_t& shardNumber);
292  std::vector<DBObject> parseDashboardObjects(const std::string& view_meta,
293  const int& user_id);
294  void createOrUpdateDashboardSystemRole(const std::string& view_meta,
295  const int32_t& user_id,
296  const std::string& dash_role_name);
297 
298  const int getColumnIdBySpiUnlocked(const int table_id, const size_t spi) const;
299 
300  std::string basePath_;
301  TableDescriptorMap tableDescriptorMap_;
302  TableDescriptorMapById tableDescriptorMapById_;
303  ColumnDescriptorMap columnDescriptorMap_;
304  ColumnDescriptorMapById columnDescriptorMapById_;
305  DictDescriptorMapById dictDescriptorMapByRef_;
307  LinkDescriptorMap linkDescriptorMap_;
308  LinkDescriptorMapById linkDescriptorMapById_;
311  std::shared_ptr<Data_Namespace::DataMgr> dataMgr_;
312 
313  const std::vector<LeafHostInfo> string_dict_hosts_;
314  std::shared_ptr<Calcite> calciteMgr_;
315 
316  LogicalToPhysicalTableMapById logicalToPhysicalTableMapById_;
317  static const std::string
318  physicalTableNameTag_; // extra component added to the name of each physical table
321 
322  // this tuple is for rolling forw/back once after ALTER ADD/DEL/MODIFY columns
323  // succeeds/fails
324  // get(0) = old ColumnDescriptor*
325  // get(1) = new ColumnDescriptor*
327  std::vector<std::pair<ColumnDescriptor*, ColumnDescriptor*>>;
329 
330  private:
331  static std::map<std::string, std::shared_ptr<Catalog>> mapd_cat_map_;
333 
334  public:
335  mutable std::mutex sqliteMutex_;
337  mutable std::atomic<std::thread::id> thread_holding_sqlite_lock;
338  mutable std::atomic<std::thread::id> thread_holding_write_lock;
339  // assuming that you never call into a catalog from another catalog via the same thread
340  static thread_local bool thread_holds_read_lock;
341 };
342 
343 } // namespace Catalog_Namespace
344 
345 #endif // CATALOG_H
mapd_shared_mutex sharedMutex_
Definition: Catalog.h:336
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:81
ColumnDescriptorMap columnDescriptorMap_
Definition: Catalog.h:303
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:177
std::map< ColumnKey, ColumnDescriptor * > ColumnDescriptorMap
Definition: Catalog.h:230
std::map< std::string, TableDescriptor * > TableDescriptorMap
Definition: Catalog.h:226
std::unordered_map< const TableDescriptor *, const ColumnDescriptor * > DeletedColumnPerTableMap
Definition: Catalog.h:239
std::map< int32_t, std::vector< int32_t > > LogicalToPhysicalTableMapById
Definition: Catalog.h:228
SqliteConnector sqliteConnector_
Definition: Catalog.h:309
const std::vector< LeafHostInfo > string_dict_hosts_
Definition: Catalog.h:313
std::shared_ptr< Calcite > getCalciteMgr() const
Definition: Catalog.h:178
DictDescriptorMapById dictDescriptorMapByRef_
Definition: Catalog.h:305
std::shared_ptr< Data_Namespace::DataMgr > dataMgr_
Definition: Catalog.h:311
std::atomic< std::thread::id > thread_holding_sqlite_lock
Definition: Catalog.h:337
DeletedColumnPerTableMap deletedColumnPerTable_
Definition: Catalog.h:332
ColumnDescriptorMapById columnDescriptorMapById_
Definition: Catalog.h:304
DashboardDescriptorMap dashboardDescriptorMap_
Definition: Catalog.h:306
std::map< std::string, LinkDescriptor * > LinkDescriptorMap
Definition: Catalog.h:236
int getDatabaseId() const
Definition: Catalog.h:192
std::map< int, LinkDescriptor * > LinkDescriptorMapById
Definition: Catalog.h:237
This file contains the class specification and related data structures for SysCatalog.
std::map< DictRef, std::unique_ptr< DictDescriptor > > DictDescriptorMapById
Definition: Catalog.h:233
std::shared_timed_mutex mapd_shared_mutex
std::map< int, TableDescriptor * > TableDescriptorMapById
Definition: Catalog.h:227
std::tuple< int, int > ColumnIdKey
Definition: Catalog.h:231
std::vector< std::pair< ColumnDescriptor *, ColumnDescriptor * > > ColumnDescriptorsForRoll
Definition: Catalog.h:327
std::tuple< int, std::string > ColumnKey
Definition: Catalog.h:229
TableDescriptorMapById tableDescriptorMapById_
Definition: Catalog.h:302
specifies the content in-memory of a row in the column metadata table
const DBMetadata & getCurrentDB() const
Definition: Catalog.h:176
std::map< ColumnIdKey, ColumnDescriptor * > ColumnDescriptorMapById
Definition: Catalog.h:232
std::shared_ptr< Calcite > calciteMgr_
Definition: Catalog.h:314
static const std::string physicalTableNameTag_
Definition: Catalog.h:318
const std::string & getBasePath() const
Definition: Catalog.h:179
std::string name() const
Definition: Catalog.h:219
Definition: Catalog.h:60
Descriptor for a dictionary for a string columne.
std::atomic< std::thread::id > thread_holding_write_lock
Definition: Catalog.h:338
SqliteConnector & getSqliteConnector()
Definition: Catalog.h:194
specifies the content in-memory of a row in the table metadata table
LinkDescriptorMapById linkDescriptorMapById_
Definition: Catalog.h:308
static std::map< std::string, std::shared_ptr< Catalog > > mapd_cat_map_
Definition: Catalog.h:331
ColumnDescriptorsForRoll columnDescriptorsForRoll
Definition: Catalog.h:328
std::map< std::string, std::shared_ptr< DashboardDescriptor > > DashboardDescriptorMap
Definition: Catalog.h:235
LogicalToPhysicalTableMapById logicalToPhysicalTableMapById_
Definition: Catalog.h:316
TableDescriptorMap tableDescriptorMap_
Definition: Catalog.h:301
static thread_local bool thread_holds_read_lock
Definition: Catalog.h:340
LinkDescriptorMap linkDescriptorMap_
Definition: Catalog.h:307