OmniSciDB  91042dcc5b
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ExtensionFunctionSignatureParser.java
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 package com.mapd.parser.server;
17 
18 import org.slf4j.Logger;
19 import org.slf4j.LoggerFactory;
20 
21 import java.io.BufferedReader;
22 import java.io.File;
23 import java.io.FileReader;
24 import java.io.IOException;
25 import java.io.StringReader;
26 import java.util.ArrayList;
27 import java.util.Arrays;
28 import java.util.HashMap;
29 import java.util.List;
30 import java.util.Map;
31 import java.util.regex.Matcher;
32 import java.util.regex.Pattern;
38  final static Logger MAPDLOGGER =
39  LoggerFactory.getLogger(ExtensionFunctionSignatureParser.class);
40  // Windows DE supports slightly different types sizes.
41  private static String OS = System.getProperty("os.name").toLowerCase();
42  static private boolean isWindows() {
43  return (OS.indexOf("win") >= 0);
44  }
45 
46  static Map<String, ExtensionFunction> parse(final String file_path) throws IOException {
47  File file = new File(file_path);
48  FileReader fileReader = new FileReader(file);
49  BufferedReader bufferedReader = new BufferedReader(fileReader);
50  String line;
51  Pattern s = Pattern.compile("\\| ([\\` ]|used)+ ([\\w]+) '([\\w<>]+) \\((.*)\\)'");
52  Map<String, ExtensionFunction> sigs = new HashMap<String, ExtensionFunction>();
53  while ((line = bufferedReader.readLine()) != null) {
54  Matcher m = s.matcher(line);
55  if (m.find()) {
56  final String name = m.group(2);
57  final String ret = m.group(3);
58  final String cs_param_list = m.group(4);
59  sigs.put(name, toSignature(ret, cs_param_list, false));
60  }
61  }
62  return sigs;
63  }
64 
65  static Map<String, ExtensionFunction> parseUdfAst(final String file_path)
66  throws IOException {
67  File file = new File(file_path);
68  FileReader fileReader = new FileReader(file);
69  BufferedReader bufferedReader = new BufferedReader(fileReader);
70  String line;
71  Pattern s = Pattern.compile("([<>:\\w]+) ([:\\w]+)(?:\\(\\))?\\((.*)\\)");
72  Map<String, ExtensionFunction> sigs = new HashMap<String, ExtensionFunction>();
73  while ((line = bufferedReader.readLine()) != null) {
74  Matcher m = s.matcher(line);
75  if (m.find()) {
76  final String name = m.group(2);
77  final String ret = m.group(1);
78  final String cs_param_list = m.group(3);
79  if (cs_param_list.isEmpty()) {
80  continue;
81  }
82  sigs.put(name, toSignature(ret, cs_param_list, true));
83  }
84  }
85  return sigs;
86  }
87 
88  static Map<String, ExtensionFunction> parseFromString(final String udf_string)
89  throws IOException {
90  return parseFromString(udf_string, true);
91  }
92 
93  static Map<String, ExtensionFunction> parseFromString(
94  final String udf_string, final boolean is_row_func) throws IOException {
95  StringReader stringReader = new StringReader(udf_string);
96  BufferedReader bufferedReader = new BufferedReader(stringReader);
97  String line;
98  Pattern r = Pattern.compile("([\\w]+)\\s+'([\\w]+)\\s*\\((.*)\\)'");
99  Map<String, ExtensionFunction> sigs = new HashMap<String, ExtensionFunction>();
100  while ((line = bufferedReader.readLine()) != null) {
101  Matcher m = r.matcher(line);
102  if (m.find()) {
103  final String name = m.group(1);
104  final String ret = m.group(2);
105  final String cs_param_list = m.group(3);
106  sigs.put(name, toSignature(ret, cs_param_list, is_row_func));
107  }
108  }
109  return sigs;
110  }
111  static String signaturesToJson(final Map<String, ExtensionFunction> sigs) {
112  List<String> json_sigs = new ArrayList<String>();
113  if (sigs != null) {
114  for (Map.Entry<String, ExtensionFunction> sig : sigs.entrySet()) {
115  if (sig.getValue().isRowUdf()) {
116  json_sigs.add(sig.getValue().toJson(sig.getKey()));
117  }
118  }
119  }
120  return "[" + join(json_sigs, ",") + "]";
121  }
122 
124  final String ret, final String cs_param_list, final boolean has_variable_name) {
125  return toSignature(ret, cs_param_list, has_variable_name, true);
126  }
127 
128  private static ExtensionFunction toSignature(final String ret,
129  final String cs_param_list,
130  final boolean has_variable_name,
131  final boolean is_row_func) {
132  String[] params = cs_param_list.split(",");
133  List<ExtensionFunction.ExtArgumentType> args =
134  new ArrayList<ExtensionFunction.ExtArgumentType>();
135  for (final String param : params) {
136  ExtensionFunction.ExtArgumentType arg_type;
137  if (has_variable_name) {
138  String[] full_param = param.trim().split("\\s+");
139  if (full_param.length > 0) {
140  if (full_param[0].trim().compareTo("const") == 0) {
141  assert full_param.length > 1;
142  arg_type = deserializeType((full_param[1]).trim());
143  } else {
144  arg_type = deserializeType((full_param[0]).trim());
145  }
146  } else {
147  arg_type = deserializeType(full_param[0]);
148  }
149  } else {
150  arg_type = deserializeType(param.trim());
151  }
152  if (arg_type != ExtensionFunction.ExtArgumentType.Void) {
153  args.add(arg_type);
154  }
155  }
156  assert is_row_func;
157  return new ExtensionFunction(args, deserializeType(ret));
158  }
159  private static ExtensionFunction.ExtArgumentType deserializeType(
160  final String type_name) {
161  final String const_prefix = "const ";
162  final String std_namespace_prefix = "std::";
163 
164  if (type_name.startsWith(const_prefix)) {
165  return deserializeType(type_name.substring(const_prefix.length()));
166  }
167  if (type_name.startsWith(std_namespace_prefix)) {
168  return deserializeType(type_name.substring(std_namespace_prefix.length()));
169  }
170 
171  if (type_name.equals("bool") || type_name.equals("_Bool")) {
172  return ExtensionFunction.ExtArgumentType.Bool;
173  }
174  if (type_name.equals("int8_t") || type_name.equals("char")
175  || type_name.equals("int8")) {
176  return ExtensionFunction.ExtArgumentType.Int8;
177  }
178  if (type_name.equals("int16_t") || type_name.equals("short")
179  || type_name.equals("int16")) {
180  return ExtensionFunction.ExtArgumentType.Int16;
181  }
182  if (type_name.equals("int32_t") || type_name.equals("int")
183  || type_name.equals("int32") || (isWindows() && type_name.equals("long"))) {
184  return ExtensionFunction.ExtArgumentType.Int32;
185  }
186  if (type_name.equals("int64_t") || type_name.equals("size_t")
187  || type_name.equals("int64")) {
188  return ExtensionFunction.ExtArgumentType.Int64;
189  }
190  if ((!isWindows() && type_name.equals("long"))
191  || (isWindows() && type_name.equals("long long"))) {
192  return ExtensionFunction.ExtArgumentType.Int64;
193  }
194  if (type_name.equals("float") || type_name.equals("float32")) {
195  return ExtensionFunction.ExtArgumentType.Float;
196  }
197  if (type_name.equals("double") || type_name.equals("float64")) {
199  }
200  if (type_name.isEmpty() || type_name.equals("void")) {
201  return ExtensionFunction.ExtArgumentType.Void;
202  }
203  if (type_name.endsWith(" *")) {
204  return pointerType(deserializeType(type_name.substring(0, type_name.length() - 2)));
205  }
206  if (type_name.endsWith("*")) {
207  return pointerType(deserializeType(type_name.substring(0, type_name.length() - 1)));
208  }
209  if (type_name.equals("Array<bool>")) {
210  return ExtensionFunction.ExtArgumentType.ArrayBool;
211  }
212  if (type_name.equals("Array<int8_t>") || type_name.equals("Array<char>")) {
213  return ExtensionFunction.ExtArgumentType.ArrayInt8;
214  }
215  if (type_name.equals("Array<int16_t>") || type_name.equals("Array<short>")) {
216  return ExtensionFunction.ExtArgumentType.ArrayInt16;
217  }
218  if (type_name.equals("Array<int32_t>") || type_name.equals("Array<int>")) {
219  return ExtensionFunction.ExtArgumentType.ArrayInt32;
220  }
221  if (type_name.equals("Array<int64_t>") || type_name.equals("Array<size_t>")
222  || type_name.equals("Array<long>")) {
223  return ExtensionFunction.ExtArgumentType.ArrayInt64;
224  }
225  if (type_name.equals("Array<float>")) {
226  return ExtensionFunction.ExtArgumentType.ArrayFloat;
227  }
228  if (type_name.equals("Array<double>")) {
229  return ExtensionFunction.ExtArgumentType.ArrayDouble;
230  }
231  if (type_name.equals("Array<bool>")) {
232  return ExtensionFunction.ExtArgumentType.ArrayBool;
233  }
234  if (type_name.equals("Column<int8_t>") || type_name.equals("Column<char>")) {
235  return ExtensionFunction.ExtArgumentType.ColumnInt8;
236  }
237  if (type_name.equals("Column<int16_t>") || type_name.equals("Column<short>")) {
238  return ExtensionFunction.ExtArgumentType.ColumnInt16;
239  }
240  if (type_name.equals("Column<int32_t>") || type_name.equals("Column<int>")) {
241  return ExtensionFunction.ExtArgumentType.ColumnInt32;
242  }
243  if (type_name.equals("Column<int64_t>") || type_name.equals("Column<size_t>")
244  || type_name.equals("Column<long>")) {
245  return ExtensionFunction.ExtArgumentType.ColumnInt64;
246  }
247  if (type_name.equals("Column<float>")) {
248  return ExtensionFunction.ExtArgumentType.ColumnFloat;
249  }
250  if (type_name.equals("Column<double>")) {
251  return ExtensionFunction.ExtArgumentType.ColumnDouble;
252  }
253  if (type_name.equals("Column<TextEncodingDict>")) {
254  return ExtensionFunction.ExtArgumentType.ColumnTextEncodingDict;
255  }
256  if (type_name.equals("Cursor")) {
257  return ExtensionFunction.ExtArgumentType.Cursor;
258  }
259  if (type_name.equals("GeoPoint")) {
260  return ExtensionFunction.ExtArgumentType.GeoPoint;
261  }
262  if (type_name.equals("GeoLineString")) {
263  return ExtensionFunction.ExtArgumentType.GeoLineString;
264  }
265  if (type_name.equals("GeoPolygon")) {
266  return ExtensionFunction.ExtArgumentType.GeoPolygon;
267  }
268  if (type_name.equals("GeoMultiPolygon")) {
269  return ExtensionFunction.ExtArgumentType.GeoMultiPolygon;
270  }
271  if (type_name.equals("ColumnList<int8_t>") || type_name.equals("ColumnList<char>")) {
272  return ExtensionFunction.ExtArgumentType.ColumnListInt8;
273  }
274  if (type_name.equals("ColumnList<int16_t>")
275  || type_name.equals("ColumnList<short>")) {
276  return ExtensionFunction.ExtArgumentType.ColumnListInt16;
277  }
278  if (type_name.equals("ColumnList<int32_t>") || type_name.equals("ColumnList<int>")) {
279  return ExtensionFunction.ExtArgumentType.ColumnListInt32;
280  }
281  if (type_name.equals("ColumnList<int64_t>") || type_name.equals("ColumnList<size_t>")
282  || type_name.equals("ColumnList<long>")) {
283  return ExtensionFunction.ExtArgumentType.ColumnListInt64;
284  }
285  if (type_name.equals("ColumnList<float>")) {
286  return ExtensionFunction.ExtArgumentType.ColumnListFloat;
287  }
288  if (type_name.equals("ColumnList<double>")) {
289  return ExtensionFunction.ExtArgumentType.ColumnListDouble;
290  }
291  if (type_name.equals("ColumnList<TextEncodingDict>")) {
292  return ExtensionFunction.ExtArgumentType.ColumnListTextEncodingDict;
293  }
294  MAPDLOGGER.info(
295  "ExtensionfunctionSignatureParser::deserializeType: unknown type_name=`"
296  + type_name + "`");
297  // TODO: Return void for convenience. Consider sanitizing functions for supported
298  // types before they reach Calcite
299  return ExtensionFunction.ExtArgumentType.Void;
300  }
301 
302  private static ExtensionFunction.ExtArgumentType pointerType(
303  final ExtensionFunction.ExtArgumentType targetType) {
304  switch (targetType) {
305  case Bool:
306  return ExtensionFunction.ExtArgumentType.PBool;
307  case Int8:
308  return ExtensionFunction.ExtArgumentType.PInt8;
309  case Int16:
310  return ExtensionFunction.ExtArgumentType.PInt16;
311  case Int32:
312  return ExtensionFunction.ExtArgumentType.PInt32;
313  case Int64:
314  return ExtensionFunction.ExtArgumentType.PInt64;
315  case Float:
316  return ExtensionFunction.ExtArgumentType.PFloat;
317  case Double:
318  return ExtensionFunction.ExtArgumentType.PDouble;
319  default:
320  assert false;
321  return null;
322  }
323  }
324 
325  static String join(final List<String> strs, final String sep) {
326  StringBuilder sb = new StringBuilder();
327  if (strs.isEmpty()) {
328  return "";
329  }
330  sb.append(strs.get(0));
331  for (int i = 1; i < strs.size(); ++i) {
332  sb.append(sep).append(strs.get(i));
333  }
334  return sb.toString();
335  }
336 }
static Map< String, ExtensionFunction > parseFromString(final String udf_string)
size_t append(FILE *f, const size_t size, const int8_t *buf)
Appends the specified number of bytes to the end of the file f from buf.
Definition: File.cpp:161
static ExtensionFunction.ExtArgumentType pointerType(final ExtensionFunction.ExtArgumentType targetType)
string name
Definition: setup.in.py:72
std::vector< std::string > split(std::string_view str, std::string_view delim, std::optional< size_t > maxsplit)
split apart a string into a vector of substrings
static Map< String, ExtensionFunction > parseFromString(final String udf_string, final boolean is_row_func)
static ExtensionFunction toSignature(final String ret, final String cs_param_list, final boolean has_variable_name, final boolean is_row_func)
static Map< String, ExtensionFunction > parse(final String file_path)
tuple line
Definition: parse_ast.py:10
static String join(final List< String > strs, final String sep)
static ExtensionFunction toSignature(final String ret, final String cs_param_list, final boolean has_variable_name)
static String signaturesToJson(final Map< String, ExtensionFunction > sigs)
static ExtensionFunction.ExtArgumentType deserializeType(final String type_name)
static Map< String, ExtensionFunction > parseUdfAst(final String file_path)
constexpr auto type_name() noexcept