OmniSciDB  cde582ebc3
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ExtensionFunctionSignatureParser.java
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.mapd.parser.server;
18 
19 import org.slf4j.Logger;
20 import org.slf4j.LoggerFactory;
21 
22 import java.io.BufferedReader;
23 import java.io.File;
24 import java.io.FileReader;
25 import java.io.IOException;
26 import java.io.StringReader;
27 import java.util.ArrayList;
28 import java.util.Arrays;
29 import java.util.HashMap;
30 import java.util.List;
31 import java.util.Map;
32 import java.util.regex.Matcher;
33 import java.util.regex.Pattern;
34 
36  final static Logger HEAVYDBLOGGER =
37  LoggerFactory.getLogger(ExtensionFunctionSignatureParser.class);
38  // Windows DE supports slightly different types sizes.
39  private static String OS = System.getProperty("os.name").toLowerCase();
40  static private boolean isWindows() {
41  return (OS.indexOf("win") >= 0);
42  }
43 
44  static Map<String, ExtensionFunction> parse(final String file_path) throws IOException {
45  File file = new File(file_path);
46  FileReader fileReader = new FileReader(file);
47  BufferedReader bufferedReader = new BufferedReader(fileReader);
48  String line;
49  Pattern s = Pattern.compile("\\| ([\\` ]|used)+ ([\\w]+) '([\\w<>]+) \\((.*)\\)'");
50  Map<String, ExtensionFunction> sigs = new HashMap<String, ExtensionFunction>();
51  while ((line = bufferedReader.readLine()) != null) {
52  Matcher m = s.matcher(line);
53  if (m.find()) {
54  final String name = m.group(2);
55  final String ret = m.group(3);
56  final String cs_param_list = m.group(4);
57  sigs.put(name, toSignature(ret, cs_param_list, false));
58  }
59  }
60  return sigs;
61  }
62 
63  static Map<String, ExtensionFunction> parseUdfAst(final String file_path)
64  throws IOException {
65  File file = new File(file_path);
66  FileReader fileReader = new FileReader(file);
67  BufferedReader bufferedReader = new BufferedReader(fileReader);
68  String line;
69  Pattern s = Pattern.compile("([<>:\\w]+) ([:\\w]+)(?:\\(\\))?\\((.*)\\)");
70  Map<String, ExtensionFunction> sigs = new HashMap<String, ExtensionFunction>();
71  while ((line = bufferedReader.readLine()) != null) {
72  Matcher m = s.matcher(line);
73  if (m.find()) {
74  final String name = m.group(2);
75  final String ret = m.group(1);
76  final String cs_param_list = m.group(3);
77  if (cs_param_list.isEmpty()) {
78  continue;
79  }
80  sigs.put(name, toSignature(ret, cs_param_list, true));
81  }
82  }
83  return sigs;
84  }
85 
86  static Map<String, ExtensionFunction> parseFromString(final String udf_string)
87  throws IOException {
88  return parseFromString(udf_string, true);
89  }
90 
91  static Map<String, ExtensionFunction> parseFromString(
92  final String udf_string, final boolean is_row_func) throws IOException {
93  StringReader stringReader = new StringReader(udf_string);
94  BufferedReader bufferedReader = new BufferedReader(stringReader);
95  String line;
96  Pattern r = Pattern.compile("([\\w]+)\\s+'([\\w]+)\\s*\\((.*)\\)'");
97  Map<String, ExtensionFunction> sigs = new HashMap<String, ExtensionFunction>();
98  while ((line = bufferedReader.readLine()) != null) {
99  Matcher m = r.matcher(line);
100  if (m.find()) {
101  final String name = m.group(1);
102  final String ret = m.group(2);
103  final String cs_param_list = m.group(3);
104  sigs.put(name, toSignature(ret, cs_param_list, is_row_func));
105  }
106  }
107  return sigs;
108  }
109  static String signaturesToJson(final Map<String, ExtensionFunction> sigs) {
110  List<String> json_sigs = new ArrayList<String>();
111  if (sigs != null) {
112  for (Map.Entry<String, ExtensionFunction> sig : sigs.entrySet()) {
113  if (sig.getValue().isRowUdf()) {
114  json_sigs.add(sig.getValue().toJson(sig.getKey()));
115  }
116  }
117  }
118  return "[" + join(json_sigs, ",") + "]";
119  }
120 
122  final String ret, final String cs_param_list, final boolean has_variable_name) {
123  return toSignature(ret, cs_param_list, has_variable_name, true);
124  }
125 
126  private static ExtensionFunction toSignature(final String ret,
127  final String cs_param_list,
128  final boolean has_variable_name,
129  final boolean is_row_func) {
130  String[] params = cs_param_list.split(",");
131  List<ExtensionFunction.ExtArgumentType> args =
132  new ArrayList<ExtensionFunction.ExtArgumentType>();
133  for (final String param : params) {
134  ExtensionFunction.ExtArgumentType arg_type;
135  if (has_variable_name) {
136  String[] full_param = param.trim().split("\\s+");
137  if (full_param.length > 0) {
138  if (full_param[0].trim().compareTo("const") == 0) {
139  assert full_param.length > 1;
140  arg_type = deserializeType((full_param[1]).trim());
141  } else {
142  arg_type = deserializeType((full_param[0]).trim());
143  }
144  } else {
145  arg_type = deserializeType(full_param[0]);
146  }
147  } else {
148  arg_type = deserializeType(param.trim());
149  }
150  if (arg_type != ExtensionFunction.ExtArgumentType.Void) {
151  args.add(arg_type);
152  }
153  }
154  assert is_row_func;
155  return new ExtensionFunction(args, deserializeType(ret));
156  }
157  private static ExtensionFunction.ExtArgumentType deserializeType(
158  final String type_name) {
159  final String const_prefix = "const ";
160  final String std_namespace_prefix = "std::";
161 
162  if (type_name.startsWith(const_prefix)) {
163  return deserializeType(type_name.substring(const_prefix.length()));
164  }
165  if (type_name.startsWith(std_namespace_prefix)) {
166  return deserializeType(type_name.substring(std_namespace_prefix.length()));
167  }
168 
169  if (type_name.equals("bool") || type_name.equals("_Bool")) {
170  return ExtensionFunction.ExtArgumentType.Bool;
171  }
172  if (type_name.equals("int8_t") || type_name.equals("char")
173  || type_name.equals("int8")) {
174  return ExtensionFunction.ExtArgumentType.Int8;
175  }
176  if (type_name.equals("int16_t") || type_name.equals("short")
177  || type_name.equals("int16")) {
178  return ExtensionFunction.ExtArgumentType.Int16;
179  }
180  if (type_name.equals("int32_t") || type_name.equals("int")
181  || type_name.equals("int32") || (isWindows() && type_name.equals("long"))) {
182  return ExtensionFunction.ExtArgumentType.Int32;
183  }
184  if (type_name.equals("int64_t") || type_name.equals("size_t")
185  || type_name.equals("int64")) {
186  return ExtensionFunction.ExtArgumentType.Int64;
187  }
188  if ((!isWindows() && type_name.equals("long"))
189  || (isWindows() && type_name.equals("long long"))) {
190  return ExtensionFunction.ExtArgumentType.Int64;
191  }
192  if (type_name.equals("float") || type_name.equals("float32")) {
193  return ExtensionFunction.ExtArgumentType.Float;
194  }
195  if (type_name.equals("double") || type_name.equals("float64")) {
197  }
198  if (type_name.isEmpty() || type_name.equals("void")) {
199  return ExtensionFunction.ExtArgumentType.Void;
200  }
201  if (type_name.endsWith(" *")) {
202  return pointerType(deserializeType(type_name.substring(0, type_name.length() - 2)));
203  }
204  if (type_name.endsWith("*")) {
205  return pointerType(deserializeType(type_name.substring(0, type_name.length() - 1)));
206  }
207  if (type_name.equals("Array<bool>")) {
208  return ExtensionFunction.ExtArgumentType.ArrayBool;
209  }
210  if (type_name.equals("Array<int8_t>") || type_name.equals("Array<char>")) {
211  return ExtensionFunction.ExtArgumentType.ArrayInt8;
212  }
213  if (type_name.equals("Array<int16_t>") || type_name.equals("Array<short>")) {
214  return ExtensionFunction.ExtArgumentType.ArrayInt16;
215  }
216  if (type_name.equals("Array<int32_t>") || type_name.equals("Array<int>")) {
217  return ExtensionFunction.ExtArgumentType.ArrayInt32;
218  }
219  if (type_name.equals("Array<int64_t>") || type_name.equals("Array<size_t>")
220  || type_name.equals("Array<long>")) {
221  return ExtensionFunction.ExtArgumentType.ArrayInt64;
222  }
223  if (type_name.equals("Array<float>")) {
224  return ExtensionFunction.ExtArgumentType.ArrayFloat;
225  }
226  if (type_name.equals("Array<double>")) {
227  return ExtensionFunction.ExtArgumentType.ArrayDouble;
228  }
229  if (type_name.equals("Array<bool>")) {
230  return ExtensionFunction.ExtArgumentType.ArrayBool;
231  }
232  if (type_name.equals("Timestamp")) {
233  return ExtensionFunction.ExtArgumentType.Timestamp;
234  }
235  if (type_name.equals("Column<int8_t>") || type_name.equals("Column<char>")) {
236  return ExtensionFunction.ExtArgumentType.ColumnInt8;
237  }
238  if (type_name.equals("Column<int16_t>") || type_name.equals("Column<short>")) {
239  return ExtensionFunction.ExtArgumentType.ColumnInt16;
240  }
241  if (type_name.equals("Column<int32_t>") || type_name.equals("Column<int>")) {
242  return ExtensionFunction.ExtArgumentType.ColumnInt32;
243  }
244  if (type_name.equals("Column<int64_t>") || type_name.equals("Column<size_t>")
245  || type_name.equals("Column<long>")) {
246  return ExtensionFunction.ExtArgumentType.ColumnInt64;
247  }
248  if (type_name.equals("Column<float>")) {
249  return ExtensionFunction.ExtArgumentType.ColumnFloat;
250  }
251  if (type_name.equals("Column<double>")) {
252  return ExtensionFunction.ExtArgumentType.ColumnDouble;
253  }
254  if (type_name.equals("Column<TextEncodingDict>")) {
255  return ExtensionFunction.ExtArgumentType.ColumnTextEncodingDict;
256  }
257  if (type_name.equals("Column<Timestamp>")) {
258  return ExtensionFunction.ExtArgumentType.ColumnTimestamp;
259  }
260  if (type_name.equals("Cursor")) {
261  return ExtensionFunction.ExtArgumentType.Cursor;
262  }
263  if (type_name.equals("GeoPoint")) {
264  return ExtensionFunction.ExtArgumentType.GeoPoint;
265  }
266  if (type_name.equals("GeoLineString")) {
267  return ExtensionFunction.ExtArgumentType.GeoLineString;
268  }
269  if (type_name.equals("GeoPolygon")) {
270  return ExtensionFunction.ExtArgumentType.GeoPolygon;
271  }
272  if (type_name.equals("GeoMultiPolygon")) {
273  return ExtensionFunction.ExtArgumentType.GeoMultiPolygon;
274  }
275  if (type_name.equals("ColumnList<int8_t>") || type_name.equals("ColumnList<char>")) {
276  return ExtensionFunction.ExtArgumentType.ColumnListInt8;
277  }
278  if (type_name.equals("ColumnList<int16_t>")
279  || type_name.equals("ColumnList<short>")) {
280  return ExtensionFunction.ExtArgumentType.ColumnListInt16;
281  }
282  if (type_name.equals("ColumnList<int32_t>") || type_name.equals("ColumnList<int>")) {
283  return ExtensionFunction.ExtArgumentType.ColumnListInt32;
284  }
285  if (type_name.equals("ColumnList<int64_t>") || type_name.equals("ColumnList<size_t>")
286  || type_name.equals("ColumnList<long>")) {
287  return ExtensionFunction.ExtArgumentType.ColumnListInt64;
288  }
289  if (type_name.equals("ColumnList<float>")) {
290  return ExtensionFunction.ExtArgumentType.ColumnListFloat;
291  }
292  if (type_name.equals("ColumnList<double>")) {
293  return ExtensionFunction.ExtArgumentType.ColumnListDouble;
294  }
295  if (type_name.equals("ColumnList<TextEncodingDict>")) {
296  return ExtensionFunction.ExtArgumentType.ColumnListTextEncodingDict;
297  }
298  HEAVYDBLOGGER.info(
299  "ExtensionfunctionSignatureParser::deserializeType: unknown type_name=`"
300  + type_name + "`");
301  // TODO: Return void for convenience. Consider sanitizing functions for supported
302  // types before they reach Calcite
303  return ExtensionFunction.ExtArgumentType.Void;
304  }
305 
306  private static ExtensionFunction.ExtArgumentType pointerType(
307  final ExtensionFunction.ExtArgumentType targetType) {
308  switch (targetType) {
309  case Bool:
310  return ExtensionFunction.ExtArgumentType.PBool;
311  case Int8:
312  return ExtensionFunction.ExtArgumentType.PInt8;
313  case Int16:
314  return ExtensionFunction.ExtArgumentType.PInt16;
315  case Int32:
316  return ExtensionFunction.ExtArgumentType.PInt32;
317  case Int64:
318  return ExtensionFunction.ExtArgumentType.PInt64;
319  case Float:
320  return ExtensionFunction.ExtArgumentType.PFloat;
321  case Double:
322  return ExtensionFunction.ExtArgumentType.PDouble;
323  default:
324  assert false;
325  return null;
326  }
327  }
328 
329  static String join(final List<String> strs, final String sep) {
330  StringBuilder sb = new StringBuilder();
331  if (strs.isEmpty()) {
332  return "";
333  }
334  sb.append(strs.get(0));
335  for (int i = 1; i < strs.size(); ++i) {
336  sb.append(sep).append(strs.get(i));
337  }
338  return sb.toString();
339  }
340 }
static Map< String, ExtensionFunction > parseFromString(final String udf_string)
size_t append(FILE *f, const size_t size, const int8_t *buf)
Appends the specified number of bytes to the end of the file f from buf.
Definition: File.cpp:168
static ExtensionFunction.ExtArgumentType pointerType(final ExtensionFunction.ExtArgumentType targetType)
std::vector< std::string > split(std::string_view str, std::string_view delim, std::optional< size_t > maxsplit)
split apart a string into a vector of substrings
static Map< String, ExtensionFunction > parseFromString(final String udf_string, final boolean is_row_func)
static ExtensionFunction toSignature(final String ret, final String cs_param_list, final boolean has_variable_name, final boolean is_row_func)
static Map< String, ExtensionFunction > parse(final String file_path)
tuple line
Definition: parse_ast.py:10
static String join(final List< String > strs, final String sep)
static ExtensionFunction toSignature(final String ret, final String cs_param_list, final boolean has_variable_name)
static String signaturesToJson(final Map< String, ExtensionFunction > sigs)
static ExtensionFunction.ExtArgumentType deserializeType(final String type_name)
static Map< String, ExtensionFunction > parseUdfAst(final String file_path)
string name
Definition: setup.in.py:72
constexpr auto type_name() noexcept