OmniSciDB  6686921089
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ParserWrapper.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /*
18  * File: ParserWrapper.cpp
19  * Author: michael
20  *
21  * Created on Feb 23, 2016, 9:33 AM
22  */
23 
24 #include "ParserWrapper.h"
25 #include "Shared/measure.h"
26 
27 #include <boost/algorithm/string.hpp>
28 
29 using namespace std;
30 
31 const std::vector<std::string> ParserWrapper::ddl_cmd = {"ARCHIVE",
32  "ALTER",
33  "COPY",
34  "CREATE",
35  "DROP",
36  "DUMP",
37  "GRANT",
38  "KILL",
39  "OPTIMIZE",
40  "REFRESH",
41  "RENAME",
42  "RESTORE",
43  "REVOKE",
44  "SHOW",
45  "TRUNCATE",
46  "REASSIGN",
47  "VALIDATE",
48  "CLEAR"};
49 
50 const std::vector<std::string> ParserWrapper::update_dml_cmd = {
51  "INSERT",
52  "DELETE",
53  "UPDATE",
54  "UPSERT",
55 };
56 
57 const std::string ParserWrapper::explain_str = {"explain"};
58 const std::string ParserWrapper::calcite_explain_str = {"explain calcite"};
59 const std::string ParserWrapper::optimized_explain_str = {"explain optimized"};
60 const std::string ParserWrapper::plan_explain_str = {"explain plan"};
61 const std::string ParserWrapper::optimize_str = {"optimize"};
62 const std::string ParserWrapper::validate_str = {"validate"};
63 
64 extern bool g_enable_fsi;
65 extern bool g_enable_calcite_ddl_parser;
66 
67 ParserWrapper::ParserWrapper(std::string query_string) {
68  query_type_ = QueryType::SchemaRead;
69  if (boost::istarts_with(query_string, calcite_explain_str)) {
70  actual_query = boost::trim_copy(query_string.substr(calcite_explain_str.size()));
71  ParserWrapper inner{actual_query};
72  if (inner.is_ddl || inner.is_update_dml) {
73  explain_type_ = ExplainType::Other;
74  return;
75  } else {
76  explain_type_ = ExplainType::Calcite;
77  return;
78  }
79  }
80 
81  if (boost::istarts_with(query_string, optimized_explain_str)) {
82  actual_query = boost::trim_copy(query_string.substr(optimized_explain_str.size()));
83  ParserWrapper inner{actual_query};
84  if (inner.is_ddl || inner.is_update_dml) {
85  explain_type_ = ExplainType::Other;
86  return;
87  } else {
88  explain_type_ = ExplainType::OptimizedIR;
89  return;
90  }
91  }
92 
93  if (boost::istarts_with(query_string, plan_explain_str)) {
94  actual_query = boost::trim_copy(query_string.substr(plan_explain_str.size()));
95  ParserWrapper inner{actual_query};
96  if (inner.is_ddl || inner.is_update_dml) {
97  explain_type_ = ExplainType::Other;
98  return;
99  } else {
100  explain_type_ = ExplainType::ExecutionPlan;
101  return;
102  }
103  }
104 
105  if (boost::istarts_with(query_string, explain_str)) {
106  actual_query = boost::trim_copy(query_string.substr(explain_str.size()));
107  ParserWrapper inner{actual_query};
108  if (inner.is_ddl || inner.is_update_dml) {
109  explain_type_ = ExplainType::Other;
110  return;
111  } else {
112  explain_type_ = ExplainType::IR;
113  return;
114  }
115  }
116 
117  query_type_ = QueryType::Read;
118  for (std::string ddl : ddl_cmd) {
119  is_ddl = boost::istarts_with(query_string, ddl);
120  if (is_ddl) {
121  query_type_ = QueryType::SchemaWrite;
122  if (g_enable_fsi) {
123  std::string fsi_regex_pattern{
124  R"((CREATE|DROP|ALTER)\s+(SERVER|FOREIGN\s+TABLE).*)"};
125 
126  boost::regex fsi_regex{fsi_regex_pattern,
127  boost::regex::extended | boost::regex::icase};
128  boost::regex refresh_regex{R"(REFRESH\s+FOREIGN\s+TABLES.*)",
129  boost::regex::extended | boost::regex::icase};
130 
131  if (boost::regex_match(query_string, fsi_regex) ||
132  boost::regex_match(query_string, refresh_regex)) {
133  is_calcite_ddl_ = true;
134  is_legacy_ddl_ = false;
135  return;
136  }
137  }
138  if (ddl == "CREATE") {
139  boost::regex ctas_regex{
140  R"(CREATE\s+(TEMPORARY\s+|\s*)+TABLE.*(\"|\s)AS(\(|\s)+(SELECT|WITH).*)",
141  boost::regex::extended | boost::regex::icase};
142  if (boost::regex_match(query_string, ctas_regex)) {
143  is_ctas = true;
144  // why is TEMPORARY being processed in legacy still
145  boost::regex temp_regex{R"(\s+TEMPORARY\s+)",
146  boost::regex::extended | boost::regex::icase};
147  if (boost::regex_match(query_string, temp_regex)) {
148  is_calcite_ddl_ = false;
149  is_legacy_ddl_ = true;
150  }
151  } else {
152  boost::regex create_regex{
153  R"(CREATE\s+(DATABASE|DATAFRAME|(TEMPORARY\s+|\s*)+TABLE|ROLE|USER|VIEW).*)",
154  boost::regex::extended | boost::regex::icase};
156  boost::regex_match(query_string, create_regex)) {
157  is_calcite_ddl_ = true;
158  is_legacy_ddl_ = false;
159  return;
160  }
161  }
162  } else if (ddl == "COPY") {
163  is_copy = true;
164  is_calcite_ddl_ = true;
165  is_legacy_ddl_ = false;
166  // now check if it is COPY TO
167  boost::regex copy_to{R"(COPY\s*\(([^#])(.+)\)\s+TO\s+.*)",
168  boost::regex::extended | boost::regex::icase};
169  if (boost::regex_match(query_string, copy_to)) {
170  query_type_ = QueryType::Read;
171  is_copy_to = true;
172  } else {
173  query_type_ = QueryType::Write;
174  }
175  } else if (ddl == "SHOW") {
176  query_type_ = QueryType::SchemaRead;
177  is_calcite_ddl_ = true;
178  is_legacy_ddl_ = false;
179  return;
180  } else if (ddl == "DROP") {
181  boost::regex drop_regex{R"(DROP\s+(TABLE|ROLE|VIEW|DATABASE|USER).*)",
182  boost::regex::extended | boost::regex::icase};
184  (boost::regex_match(query_string, drop_regex))) {
185  is_calcite_ddl_ = true;
186  is_legacy_ddl_ = false;
187  return;
188  }
189  } else if (ddl == "KILL") {
190  query_type_ = QueryType::Unknown;
191  is_calcite_ddl_ = true;
192  is_legacy_ddl_ = false;
193  return;
194  } else if (ddl == "VALIDATE") {
195  query_type_ = QueryType::Unknown;
196  is_calcite_ddl_ = true;
197  is_legacy_ddl_ = false;
198  // needs to execute in a different context from other DDL
199  is_validate = true;
200  return;
201  } else if (ddl == "RENAME") {
202  query_type_ = QueryType::SchemaWrite;
203  boost::regex rename_regex{R"(RENAME\s+TABLE.*)",
204  boost::regex::extended | boost::regex::icase};
206  boost::regex_match(query_string, rename_regex)) {
207  is_calcite_ddl_ = true;
208  is_legacy_ddl_ = false;
209  return;
210  }
211  } else if (ddl == "ALTER") {
212  boost::regex alter_regex{R"(ALTER\s+(TABLE|DATABASE|USER).*)",
213  boost::regex::extended | boost::regex::icase};
214  boost::regex alter_system_regex{R"(ALTER\s+(SYSTEM).*)",
215  boost::regex::extended | boost::regex::icase};
216 
218  boost::regex_match(query_string, alter_regex)) {
219  query_type_ = QueryType::SchemaWrite;
220  is_calcite_ddl_ = true;
221  is_legacy_ddl_ = false;
222  return;
223  } else {
224  if (boost::regex_match(query_string, alter_system_regex)) {
225  query_type_ = QueryType::Unknown;
226  is_calcite_ddl_ = true;
227  is_legacy_ddl_ = false;
228  return;
229  }
230  }
231 
232  } else if (ddl == "GRANT") {
233  boost::regex grant_regex{R"(GRANT.*)",
234  boost::regex::extended | boost::regex::icase};
236  boost::regex_match(query_string, grant_regex)) {
237  is_calcite_ddl_ = true;
238  is_legacy_ddl_ = false;
239  return;
240  }
241  } else if (ddl == "REVOKE") {
242  boost::regex revoke_regex{R"(REVOKE.*)",
243  boost::regex::extended | boost::regex::icase};
245  boost::regex_match(query_string, revoke_regex)) {
246  is_calcite_ddl_ = true;
247  is_legacy_ddl_ = false;
248  return;
249  }
250  } else if (ddl == "REASSIGN") {
251  query_type_ = QueryType::SchemaWrite;
252  is_calcite_ddl_ = true;
253  is_legacy_ddl_ = false;
254  return;
255  } else if (ddl == "ARCHIVE" || ddl == "DUMP" || ddl == "OPTIMIZE" ||
256  ddl == "RESTORE" || ddl == "TRUNCATE") {
257  if (ddl == "ARCHIVE" || ddl == "DUMP") {
258  query_type_ = QueryType::SchemaRead;
259  } else {
260  query_type_ = QueryType::SchemaWrite;
261  }
262  is_calcite_ddl_ = true;
263  is_legacy_ddl_ = false;
264  return;
265  }
266 
267  // ctas may look like ddl, but is neither legacy_dll nor calcite_ddl
268  if (!is_ctas) {
269  is_legacy_ddl_ = !is_calcite_ddl_;
270  }
271  return;
272  }
273  }
274 
275  for (int i = 0; i < update_dml_cmd.size(); i++) {
276  is_update_dml = boost::istarts_with(query_string, ParserWrapper::update_dml_cmd[i]);
277  if (is_update_dml) {
278  query_type_ = QueryType::Write;
279  dml_type_ = (DMLType)(i);
280  break;
281  }
282  }
283 
284  if (dml_type_ == DMLType::Insert) {
285  boost::regex itas_regex{R"(INSERT\s+INTO\s+.*(\s+|\(|\")SELECT(\s|\(|\").*)",
286  boost::regex::extended | boost::regex::icase};
287  if (boost::regex_match(query_string, itas_regex)) {
288  is_itas = true;
289  return;
290  }
291  }
292 }
293 
295 
297  return {explain_type_ == ExplainType::IR,
298  explain_type_ == ExplainType::OptimizedIR,
299  explain_type_ == ExplainType::ExecutionPlan,
300  explain_type_ == ExplainType::Calcite};
301 }
Classes used to wrap parser calls for calcite redirection.
static const std::string optimize_str
virtual ~ParserWrapper()
static const std::string optimized_explain_str
ExplainInfo getExplainInfo() const
static const std::string calcite_explain_str
static const std::vector< std::string > ddl_cmd
bool g_enable_calcite_ddl_parser
Definition: ParserNode.cpp:76
static const std::string validate_str
static const std::string explain_str
static const std::vector< std::string > update_dml_cmd
static const std::string plan_explain_str
ParserWrapper(std::string query_string)
bool g_enable_fsi
Definition: Catalog.cpp:93