1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
29 #ifndef CATALOG_H
30 #define CATALOG_H
32 #include <atomic>
33 #include <cstdint>
34 #include <ctime>
35 #include <limits>
36 #include <list>
37 #include <map>
38 #include <mutex>
39 #include <string>
40 #include <utility>
41 #include <vector>
43 #include "ColumnDescriptor.h"
44 #include "DashboardDescriptor.h"
45 #include "DictDescriptor.h"
46 #include "LinkDescriptor.h"
47 #include "TableDescriptor.h"
49 #include "../DataMgr/DataMgr.h"
50 #include "../QueryEngine/CompilationOptions.h"
51 #include "../SqliteConnector/SqliteConnector.h"
52 #include "LeafHostInfo.h"
54 #include "../Calcite/Calcite.h"
55 #include "../Shared/mapd_shared_mutex.h"
57 #include "SessionInfo.h"
58 #include "SysCatalog.h"
60 namespace Parser {
64 } // namespace Parser
66 // SPI means Sequential Positional Index which is equivalent to the input index in a
67 // RexInput node
68 #define SPIMAP_MAGIC1 (std::numeric_limits<unsigned>::max() / 4)
69 #define SPIMAP_MAGIC2 8
70 #define SPIMAP_GEO_PHYSICAL_INPUT(c, i) \
71  (SPIMAP_MAGIC1 + (unsigned)(SPIMAP_MAGIC2 * ((c) + 1) + (i)))
73 namespace Catalog_Namespace {
81 class Catalog final {
82  public:
91  Catalog(const std::string& basePath,
92  const DBMetadata& curDB,
93  std::shared_ptr<Data_Namespace::DataMgr> dataMgr,
94  const std::vector<LeafHostInfo>& string_dict_hosts,
95  std::shared_ptr<Calcite> calcite,
96  bool is_new_db);
104  ~Catalog();
106  static void expandGeoColumn(const ColumnDescriptor& cd,
107  std::list<ColumnDescriptor>& columns);
108  void createTable(TableDescriptor& td,
109  const std::list<ColumnDescriptor>& columns,
110  const std::vector<Parser::SharedDictionaryDef>& shared_dict_defs,
111  bool isLogicalTable);
112  void createShardedTable(
113  TableDescriptor& td,
114  const std::list<ColumnDescriptor>& columns,
115  const std::vector<Parser::SharedDictionaryDef>& shared_dict_defs);
118  std::string createLink(LinkDescriptor& ld, size_t min_length);
119  void dropTable(const TableDescriptor* td);
120  void truncateTable(const TableDescriptor* td);
121  void renameTable(const TableDescriptor* td, const std::string& newTableName);
122  void renameColumn(const TableDescriptor* td,
123  const ColumnDescriptor* cd,
124  const std::string& newColumnName);
125  void addColumn(const TableDescriptor& td, ColumnDescriptor& cd);
126  void removeChunks(const int table_id);
136  const TableDescriptor* getMetadataForTable(const std::string& tableName,
137  const bool populateFragmenter = true) const;
138  const TableDescriptor* getMetadataForTableImpl(int tableId,
139  const bool populateFragmenter) const;
140  const TableDescriptor* getMetadataForTable(int tableId) const;
142  const ColumnDescriptor* getMetadataForColumn(int tableId,
143  const std::string& colName) const;
144  const ColumnDescriptor* getMetadataForColumn(int tableId, int columnId) const;
146  const int getColumnIdBySpi(const int tableId, const size_t spi) const;
147  const ColumnDescriptor* getMetadataForColumnBySpi(const int tableId,
148  const size_t spi) const;
150  const DashboardDescriptor* getMetadataForDashboard(const std::string& userId,
151  const std::string& dashName) const;
152  void deleteMetadataForDashboard(const std::string& userId, const std::string& dashName);
154  const DashboardDescriptor* getMetadataForDashboard(const int32_t dashboard_id) const;
155  void deleteMetadataForDashboard(const int32_t dashboard_id);
157  const LinkDescriptor* getMetadataForLink(const std::string& link) const;
158  const LinkDescriptor* getMetadataForLink(int linkId) const;
168  std::list<const ColumnDescriptor*> getAllColumnMetadataForTable(
169  const int tableId,
170  const bool fetchSystemColumns,
171  const bool fetchVirtualColumns,
172  const bool fetchPhysicalColumns) const;
174  std::list<const TableDescriptor*> getAllTableMetadata() const;
175  std::list<const DashboardDescriptor*> getAllDashboardsMetadata() const;
176  const DBMetadata& getCurrentDB() const { return currentDB_; }
178  std::shared_ptr<Calcite> getCalciteMgr() const { return calciteMgr_; }
179  const std::string& getBasePath() const { return basePath_; }
181  const DictDescriptor* getMetadataForDict(int dict_ref, bool loadDict = true) const;
183  const std::vector<LeafHostInfo>& getStringDictionaryHosts() const;
187  std::vector<const TableDescriptor*> getPhysicalTablesDescriptors(
188  const TableDescriptor* logicalTableDesc) const;
190  int32_t getTableEpoch(const int32_t db_id, const int32_t table_id) const;
191  void setTableEpoch(const int db_id, const int table_id, const int new_epoch);
192  int getDatabaseId() const { return currentDB_.dbId; }
195  void roll(const bool forward);
197  void delDictionary(const ColumnDescriptor& cd);
198  void getDictionary(const ColumnDescriptor& cd,
199  std::map<int, StringDictionary*>& stringDicts);
201  static void set(const std::string& dbName, std::shared_ptr<Catalog> cat);
202  static std::shared_ptr<Catalog> get(const std::string& dbName);
203  static std::shared_ptr<Catalog> get(const std::string& basePath,
204  const DBMetadata& curDB,
205  std::shared_ptr<Data_Namespace::DataMgr> dataMgr,
206  const std::vector<LeafHostInfo>& string_dict_hosts,
207  std::shared_ptr<Calcite> calcite,
208  bool is_new_db);
209  static void remove(const std::string& dbName);
211  const bool checkMetadataForDeletedRecs(int dbId, int tableId, int columnId) const;
212  const ColumnDescriptor* getDeletedColumn(const TableDescriptor* td) const;
215  void setDeletedColumn(const TableDescriptor* td, const ColumnDescriptor* cd);
216  void setDeletedColumnUnlocked(const TableDescriptor* td, const ColumnDescriptor* cd);
217  int getLogicalTableId(const int physicalTableId) const;
218  void checkpoint(const int logicalTableId) const;
219  std::string name() const { return getCurrentDB().dbName; }
220  void eraseDBData();
221  void eraseTablePhysicalData(const TableDescriptor* td);
222  void vacuumDeletedRows(const TableDescriptor* td) const;
223  void vacuumDeletedRows(const int logicalTableId) const;
225  // dump & restore
226  void dumpTable(const TableDescriptor* td,
227  const std::string& path,
228  const std::string& compression) const;
229  void restoreTable(const SessionInfo& session,
230  const TableDescriptor* td,
231  const std::string& file_path,
232  const std::string& compression);
233  void restoreTable(const SessionInfo& session,
234  const std::string& table_name,
235  const std::string& file_path,
236  const std::string& compression);
237  std::vector<std::string> getTableDataDirectories(const TableDescriptor* td) const;
238  std::vector<std::string> getTableDictDirectories(const TableDescriptor* td) const;
239  std::string getColumnDictDirectory(const ColumnDescriptor* cd) const;
240  std::string dumpSchema(const TableDescriptor* td) const;
242  protected:
243  typedef std::map<std::string, TableDescriptor*> TableDescriptorMap;
244  typedef std::map<int, TableDescriptor*> TableDescriptorMapById;
245  typedef std::map<int32_t, std::vector<int32_t>> LogicalToPhysicalTableMapById;
246  typedef std::tuple<int, std::string> ColumnKey;
247  typedef std::map<ColumnKey, ColumnDescriptor*> ColumnDescriptorMap;
248  typedef std::tuple<int, int> ColumnIdKey;
249  typedef std::map<ColumnIdKey, ColumnDescriptor*> ColumnDescriptorMapById;
250  typedef std::map<DictRef, std::unique_ptr<DictDescriptor>> DictDescriptorMapById;
251  typedef std::map<std::string, std::shared_ptr<DashboardDescriptor>>
253  typedef std::map<std::string, LinkDescriptor*> LinkDescriptorMap;
254  typedef std::map<int, LinkDescriptor*> LinkDescriptorMapById;
255  typedef std::unordered_map<const TableDescriptor*, const ColumnDescriptor*>
260  void updateDictionaryNames();
264  void updateLinkSchema();
267  void updateLogicalToPhysicalTableMap(const int32_t logical_tb_id);
268  void updateDictionarySchema();
269  void updatePageSize();
275  void buildMaps();
277  const std::list<ColumnDescriptor>& columns,
278  const std::list<DictDescriptor>& dicts);
279  void addReferenceToForeignDict(ColumnDescriptor& referencing_column,
280  Parser::SharedDictionaryDef shared_dict_def,
281  const bool persist_reference);
283  ColumnDescriptor& cd,
284  std::list<ColumnDescriptor>& cdd,
285  std::list<DictDescriptor>& dds,
286  const TableDescriptor td,
287  const std::vector<Parser::SharedDictionaryDef>& shared_dict_defs);
289  std::list<DictDescriptor>& dds,
290  const TableDescriptor& td,
291  const bool isLogicalTable);
294  void addLinkToMap(LinkDescriptor& ld);
295  void removeTableFromMap(const std::string& tableName,
296  const int tableId,
297  const bool is_on_error = false);
298  void doDropTable(const TableDescriptor* td);
299  void doTruncateTable(const TableDescriptor* td);
300  void renamePhysicalTable(const TableDescriptor* td, const std::string& newTableName);
301  void instantiateFragmenter(TableDescriptor* td) const;
303  std::list<const ColumnDescriptor*>& colDescs,
304  const bool fetchSystemColumns,
305  const bool fetchVirtualColumns,
306  const bool fetchPhysicalColumns) const;
307  std::string calculateSHA1(const std::string& data);
308  std::string generatePhysicalTableName(const std::string& logicalTableName,
309  const int32_t& shardNumber);
310  std::vector<DBObject> parseDashboardObjects(const std::string& view_meta,
311  const int& user_id);
312  void createOrUpdateDashboardSystemRole(const std::string& view_meta,
313  const int32_t& user_id,
314  const std::string& dash_role_name);
316  const int getColumnIdBySpiUnlocked(const int table_id, const size_t spi) const;
319  const std::list<ColumnDescriptor>& cds) const;
320  void dropTableFromJsonUnlocked(const std::string& table_name) const;
322  std::string basePath_;
333  std::shared_ptr<Data_Namespace::DataMgr> dataMgr_;
335  const std::vector<LeafHostInfo> string_dict_hosts_;
336  std::shared_ptr<Calcite> calciteMgr_;
339  static const std::string
340  physicalTableNameTag_; // extra component added to the name of each physical table
344  // this tuple is for rolling forw/back once after ALTER ADD/DEL/MODIFY columns
345  // succeeds/fails
346  // get(0) = old ColumnDescriptor*
347  // get(1) = new ColumnDescriptor*
349  std::vector<std::pair<ColumnDescriptor*, ColumnDescriptor*>>;
352  private:
353  static std::map<std::string, std::shared_ptr<Catalog>> mapd_cat_map_;
356  const std::string& temp_data_dir,
357  const std::unordered_map<int, int>& all_column_ids_map) const;
358  void renameTableDirectories(const std::string& temp_data_dir,
359  const std::vector<std::string>& target_paths,
360  const std::string& name_prefix) const;
362  public:
363  mutable std::mutex sqliteMutex_;
365  mutable std::atomic<std::thread::id> thread_holding_sqlite_lock;
366  mutable std::atomic<std::thread::id> thread_holding_write_lock;
367  // assuming that you never call into a catalog from another catalog via the same thread
368  static thread_local bool thread_holds_read_lock;
369 };
371 } // namespace Catalog_Namespace
373 #endif // CATALOG_H
