OmniSciDB  c07336695a
SharedDictionaryValidator.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
18 
19 #include <memory>
20 
21 #include "Shared/Logger.h"
22 
25  const std::vector<Parser::SharedDictionaryDef>& shared_dict_defs) {
26  size_t idx = 0;
27  for (; idx < shared_dict_defs.size(); idx++) {
28  if (!shared_dict_defs[idx].get_column().compare(cur_node.get_column()) &&
29  !shared_dict_defs[idx].get_foreign_table().compare(
30  cur_node.get_foreign_table()) &&
31  !shared_dict_defs[idx].get_foreign_column().compare(
32  cur_node.get_foreign_column())) {
33  break;
34  }
35  }
36  // Make sure we have found the shared dictionary definition
37  CHECK_LT(idx, shared_dict_defs.size());
38 
39  size_t ret_val_idx = idx;
40  for (size_t j = 0; j < shared_dict_defs.size(); j++) {
41  for (size_t i = 0; i < shared_dict_defs.size(); ++i) {
42  if (!shared_dict_defs[i].get_column().compare(
43  shared_dict_defs[ret_val_idx].get_foreign_column())) {
44  ret_val_idx = i;
45  break;
46  }
47  }
48  if (shared_dict_defs[ret_val_idx].get_foreign_table().compare(
49  cur_node.get_foreign_table())) {
50  // found a dictionary share definition which shares the dict outside this table to
51  // be created
52  break;
53  }
54  }
55 
56  return shared_dict_defs[ret_val_idx];
57 }
58 
59 // Make sure the dependency of shared dictionaries does not form a cycle
61  const Parser::CreateTableStmt* stmt,
62  const Parser::SharedDictionaryDef* shared_dict_def,
63  const std::vector<Parser::SharedDictionaryDef>& shared_dict_defs,
64  const std::list<ColumnDescriptor>& columns) {
65  std::string reference_col_qualified_name =
66  shared_dict_def->get_foreign_column() + "." + shared_dict_def->get_foreign_table();
67  if (!shared_dict_def->get_column().compare(shared_dict_def->get_foreign_column())) {
68  throw std::runtime_error(
69  "Dictionary cannot be shared with itself. For dictionary : " +
70  reference_col_qualified_name);
71  }
72  auto table_name = stmt->get_table();
73  CHECK(!shared_dict_def->get_foreign_table().compare(*table_name));
74  auto col = std::find_if(
75  columns.rbegin(), columns.rend(), [shared_dict_def](const ColumnDescriptor& elem) {
76  return !elem.columnName.compare(shared_dict_def->get_column());
77  });
78  CHECK(col != columns.rend());
79  auto ref_col =
80  std::find_if(col, columns.rend(), [shared_dict_def](const ColumnDescriptor& elem) {
81  return !elem.columnName.compare(shared_dict_def->get_foreign_column());
82  });
83 
84  if (ref_col == columns.rend()) {
85  throw std::runtime_error("Dictionary dependencies might create a cycle for " +
86  shared_dict_def->get_column() + "referencing " +
87  reference_col_qualified_name);
88  }
89 }
90 
91 namespace {
92 const ColumnDescriptor* lookup_column(const std::string& name,
93  const std::list<ColumnDescriptor>& columns) {
94  for (const auto& cd : columns) {
95  if (cd.columnName == name) {
96  return &cd;
97  }
98  }
99  return nullptr;
100 }
101 
102 const ColumnDescriptor* lookup_column(const std::string& name,
103  const std::list<const ColumnDescriptor*>& columns) {
104  for (const auto& cd : columns) {
105  if (cd->columnName == name) {
106  return cd;
107  }
108  }
109  return nullptr;
110 }
111 
113  const std::string& name,
114  const std::list<std::unique_ptr<Parser::TableElement>>& table_element_list) {
115  for (const auto& e : table_element_list) {
116  const auto col_def = dynamic_cast<Parser::ColumnDef*>(e.get());
117  if (!col_def || *col_def->get_column_name() != name) {
118  continue;
119  }
120  return col_def->get_compression();
121  }
122  UNREACHABLE();
123  return nullptr;
124 }
125 
126 } // namespace
127 
128 // Validate shared dictionary directive against the list of columns seen so far.
130  const Parser::CreateTableStmt* stmt,
131  const Parser::SharedDictionaryDef* shared_dict_def,
132  const std::list<ColumnDescriptor>& columns,
133  const std::vector<Parser::SharedDictionaryDef>& shared_dict_defs_so_far,
134  const Catalog_Namespace::Catalog& catalog) {
135  CHECK(shared_dict_def);
136  auto table_name = stmt->get_table();
137  const auto cd_ptr = lookup_column(shared_dict_def->get_column(), columns);
138  const auto col_qualified_name = *table_name + "." + shared_dict_def->get_column();
139  if (!cd_ptr) {
140  throw std::runtime_error("Column " + col_qualified_name + " doesn't exist");
141  }
142  if (!cd_ptr->columnType.is_string() ||
143  cd_ptr->columnType.get_compression() != kENCODING_DICT) {
144  throw std::runtime_error("Column " + col_qualified_name +
145  " must be a dictionary encoded string");
146  }
147  const std::list<std::unique_ptr<Parser::TableElement>>& table_element_list =
148  stmt->get_table_element_list();
149  if (get_compression_for_column(shared_dict_def->get_column(), table_element_list)) {
150  throw std::runtime_error(
151  "Column " + col_qualified_name +
152  " shouldn't specify an encoding, it borrows it from the referenced column");
153  }
154  const auto foreign_td =
155  catalog.getMetadataForTable(shared_dict_def->get_foreign_table());
156  if (!foreign_td && table_name->compare(shared_dict_def->get_foreign_table())) {
157  throw std::runtime_error("Table " + shared_dict_def->get_foreign_table() +
158  " doesn't exist");
159  }
160 
161  if (foreign_td) {
162  const auto reference_columns =
163  catalog.getAllColumnMetadataForTable(foreign_td->tableId, false, false, false);
164  const auto reference_cd_ptr =
165  lookup_column(shared_dict_def->get_foreign_column(), reference_columns);
166  if (!reference_cd_ptr) {
167  throw std::runtime_error("Could not find referenced column " +
168  shared_dict_def->get_foreign_column() + " in table " +
169  foreign_td->tableName);
170  }
171  if (!reference_cd_ptr->columnType.is_string() ||
172  reference_cd_ptr->columnType.get_compression() != kENCODING_DICT) {
173  const auto reference_col_qualified_name =
174  reference_cd_ptr->columnName + "." + shared_dict_def->get_foreign_column();
175  throw std::runtime_error("Referenced column " + reference_col_qualified_name +
176  " must be a dictionary encoded string column");
177  }
178  } else {
179  // The dictionary is to be shared within table
180  const auto reference_col_qualified_name =
181  *table_name + "." + shared_dict_def->get_foreign_column();
182  const auto reference_cd_ptr =
183  lookup_column(shared_dict_def->get_foreign_column(), columns);
184  if (!reference_cd_ptr) {
185  throw std::runtime_error("Column " + reference_col_qualified_name +
186  " doesn't exist");
187  }
188  if (!reference_cd_ptr->columnType.is_string() ||
189  reference_cd_ptr->columnType.get_compression() != kENCODING_DICT) {
190  throw std::runtime_error("Column " + reference_col_qualified_name +
191  " must be a dictionary encoded string");
192  }
194  stmt, shared_dict_def, shared_dict_defs_so_far, columns);
195  }
196  const auto it =
197  std::find_if(shared_dict_defs_so_far.begin(),
198  shared_dict_defs_so_far.end(),
199  [shared_dict_def](const Parser::SharedDictionaryDef& elem) {
200  return elem.get_column() == shared_dict_def->get_column();
201  });
202  if (it != shared_dict_defs_so_far.end()) {
203  throw std::runtime_error("Duplicate shared dictionary hint for column " +
204  *table_name + "." + shared_dict_def->get_column());
205  }
206 }
const Parser::SharedDictionaryDef compress_reference_path(Parser::SharedDictionaryDef cur_node, const std::vector< Parser::SharedDictionaryDef > &shared_dict_defs)
void validate_shared_dictionary_order(const Parser::CreateTableStmt *stmt, const Parser::SharedDictionaryDef *shared_dict_def, const std::vector< Parser::SharedDictionaryDef > &shared_dict_defs, const std::list< ColumnDescriptor > &columns)
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:81
const std::string * get_table() const
Definition: ParserNode.h:973
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
#define UNREACHABLE()
Definition: Logger.h:231
const Parser::CompressDef * get_compression_for_column(const std::string &name, const std::list< std::unique_ptr< Parser::TableElement >> &table_element_list)
const std::list< std::unique_ptr< TableElement > > & get_table_element_list() const
Definition: ParserNode.h:974
const std::string & get_foreign_column() const
Definition: ParserNode.h:912
const ColumnDescriptor * lookup_column(const std::string &name, const std::list< const ColumnDescriptor *> &columns)
specifies the content in-memory of a row in the column metadata table
#define CHECK_LT(x, y)
Definition: Logger.h:197
const std::string & get_column() const
Definition: ParserNode.h:908
std::list< const ColumnDescriptor * > getAllColumnMetadataForTable(const int tableId, const bool fetchSystemColumns, const bool fetchVirtualColumns, const bool fetchPhysicalColumns) const
Returns a list of pointers to constant ColumnDescriptor structs for all the columns from a particular...
Definition: Catalog.cpp:1579
const std::string & get_foreign_table() const
Definition: ParserNode.h:910
#define CHECK(condition)
Definition: Logger.h:187
std::string columnName
const CompressDef * get_compression() const
Definition: ParserNode.h:806
void validate_shared_dictionary(const Parser::CreateTableStmt *stmt, const Parser::SharedDictionaryDef *shared_dict_def, const std::list< ColumnDescriptor > &columns, const std::vector< Parser::SharedDictionaryDef > &shared_dict_defs_so_far, const Catalog_Namespace::Catalog &catalog)