OmniSciDB  a987f07e93
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ExtensionFunctionSignatureParser.java
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.mapd.parser.server;
18 
19 import org.slf4j.Logger;
20 import org.slf4j.LoggerFactory;
21 
22 import java.io.BufferedReader;
23 import java.io.File;
24 import java.io.FileReader;
25 import java.io.IOException;
26 import java.io.StringReader;
27 import java.util.ArrayList;
28 import java.util.Arrays;
29 import java.util.Collections;
30 import java.util.HashMap;
31 import java.util.List;
32 import java.util.Map;
33 import java.util.regex.Matcher;
34 import java.util.regex.Pattern;
35 
37  final static Logger HEAVYDBLOGGER =
38  LoggerFactory.getLogger(ExtensionFunctionSignatureParser.class);
39  // Windows DE supports slightly different types sizes.
40  private static String OS = System.getProperty("os.name").toLowerCase();
41  static private boolean isWindows() {
42  return (OS.indexOf("win") >= 0);
43  }
44 
45  static Map<String, ExtensionFunction> parse(final String file_path) throws IOException {
46  File file = new File(file_path);
47  FileReader fileReader = new FileReader(file);
48  BufferedReader bufferedReader = new BufferedReader(fileReader);
49  String line;
50  Pattern s = Pattern.compile("\\| ([\\` ]|used)+ ([\\w]+) '([\\w<>]+) \\((.*)\\)'");
51  Map<String, ExtensionFunction> sigs = new HashMap<String, ExtensionFunction>();
52  while ((line = bufferedReader.readLine()) != null) {
53  Matcher m = s.matcher(line);
54  if (m.find()) {
55  final String name = m.group(2);
56  final String ret = m.group(3);
57  final String cs_param_list = m.group(4);
58  sigs.put(name, toSignature(ret, cs_param_list, false));
59  }
60  }
61  return sigs;
62  }
63 
64  static Map<String, ExtensionFunction> parseUdfAst(final String file_path)
65  throws IOException {
66  File file = new File(file_path);
67  FileReader fileReader = new FileReader(file);
68  BufferedReader bufferedReader = new BufferedReader(fileReader);
69  String line;
70  Pattern s = Pattern.compile("([<>:\\w]+) ([:\\w]+)(?:\\(\\))?\\((.*)\\)");
71  Map<String, ExtensionFunction> sigs = new HashMap<String, ExtensionFunction>();
72  while ((line = bufferedReader.readLine()) != null) {
73  Matcher m = s.matcher(line);
74  if (m.find()) {
75  final String name = m.group(2);
76  final String ret = m.group(1);
77  final String cs_param_list = m.group(3);
78  if (cs_param_list.isEmpty()) {
79  continue;
80  }
81  sigs.put(name, toSignature(ret, cs_param_list, true));
82  }
83  }
84  return sigs;
85  }
86 
87  static Map<String, ExtensionFunction> parseFromString(final String udf_string)
88  throws IOException {
89  return parseFromString(udf_string, true);
90  }
91 
92  static Map<String, ExtensionFunction> parseFromString(
93  final String udf_string, final boolean is_row_func) throws IOException {
94  StringReader stringReader = new StringReader(udf_string);
95  BufferedReader bufferedReader = new BufferedReader(stringReader);
96  String line;
97  Pattern r = Pattern.compile("([\\w]+)\\s+'([\\w]+)\\s*\\((.*)\\)'");
98  Map<String, ExtensionFunction> sigs = new HashMap<String, ExtensionFunction>();
99  while ((line = bufferedReader.readLine()) != null) {
100  Matcher m = r.matcher(line);
101  if (m.find()) {
102  final String name = m.group(1);
103  final String ret = m.group(2);
104  final String cs_param_list = m.group(3);
105  sigs.put(name, toSignature(ret, cs_param_list, is_row_func));
106  }
107  }
108  return sigs;
109  }
110  static String signaturesToJson(final Map<String, ExtensionFunction> sigs) {
111  List<String> json_sigs = new ArrayList<String>();
112  if (sigs != null) {
113  for (Map.Entry<String, ExtensionFunction> sig : sigs.entrySet()) {
114  if (sig.getValue().isRowUdf()) {
115  json_sigs.add(sig.getValue().toJson(sig.getKey()));
116  }
117  }
118  }
119  return "[" + join(json_sigs, ",") + "]";
120  }
121 
123  final String ret, final String cs_param_list, final boolean has_variable_name) {
124  return toSignature(ret, cs_param_list, has_variable_name, true);
125  }
126 
127  private static ExtensionFunction toSignature(final String ret,
128  final String cs_param_list,
129  final boolean has_variable_name,
130  final boolean is_row_func) {
131  String[] params = cs_param_list.split(",");
132  List<ExtensionFunction.ExtArgumentType> args =
133  new ArrayList<ExtensionFunction.ExtArgumentType>();
134  String uses_manager = "false";
135  List<Map<String, String>> annotations = new ArrayList<Map<String, String>>();
136  for (final String param : params) {
137  ExtensionFunction.ExtArgumentType arg_type;
138  if (param.contains("RowFunctionManager")) {
139  uses_manager = "true";
140  continue;
141  }
142  if (has_variable_name) {
143  String[] full_param = param.trim().split("\\s+");
144  if (full_param.length > 0) {
145  if (full_param[0].trim().compareTo("const") == 0) {
146  assert full_param.length > 1;
147  arg_type = deserializeType((full_param[1]).trim());
148  } else {
149  arg_type = deserializeType((full_param[0]).trim());
150  }
151  } else {
152  arg_type = deserializeType(full_param[0]);
153  }
154  } else {
155  arg_type = deserializeType(param.trim());
156  }
157  annotations.add(Collections.EMPTY_MAP);
158  if (arg_type != ExtensionFunction.ExtArgumentType.Void) {
159  args.add(arg_type);
160  }
161  }
162  assert is_row_func;
163  annotations.add(Collections.singletonMap("uses_manager", uses_manager));
164  return new ExtensionFunction(args, deserializeType(ret), annotations);
165  }
166  private static ExtensionFunction.ExtArgumentType deserializeType(
167  final String type_name) {
168  final String const_prefix = "const ";
169  final String std_namespace_prefix = "std::";
170 
171  if (type_name.startsWith(const_prefix)) {
172  return deserializeType(type_name.substring(const_prefix.length()));
173  }
174  if (type_name.startsWith(std_namespace_prefix)) {
175  return deserializeType(type_name.substring(std_namespace_prefix.length()));
176  }
177 
178  if (type_name.equals("bool") || type_name.equals("_Bool")) {
179  return ExtensionFunction.ExtArgumentType.Bool;
180  }
181  if (type_name.equals("int8_t") || type_name.equals("char")
182  || type_name.equals("int8")) {
183  return ExtensionFunction.ExtArgumentType.Int8;
184  }
185  if (type_name.equals("int16_t") || type_name.equals("short")
186  || type_name.equals("int16")) {
187  return ExtensionFunction.ExtArgumentType.Int16;
188  }
189  if (type_name.equals("int32_t") || type_name.equals("int")
190  || type_name.equals("int32") || (isWindows() && type_name.equals("long"))) {
191  return ExtensionFunction.ExtArgumentType.Int32;
192  }
193  if (type_name.equals("int64_t") || type_name.equals("size_t")
194  || type_name.equals("int64")) {
195  return ExtensionFunction.ExtArgumentType.Int64;
196  }
197  if ((!isWindows() && type_name.equals("long"))
198  || (isWindows() && type_name.equals("long long"))) {
199  return ExtensionFunction.ExtArgumentType.Int64;
200  }
201  if (type_name.equals("float") || type_name.equals("float32")) {
202  return ExtensionFunction.ExtArgumentType.Float;
203  }
204  if (type_name.equals("double") || type_name.equals("float64")) {
206  }
207  if (type_name.isEmpty() || type_name.equals("void")) {
208  return ExtensionFunction.ExtArgumentType.Void;
209  }
210  if (type_name.endsWith(" *")) {
211  return pointerType(deserializeType(type_name.substring(0, type_name.length() - 2)));
212  }
213  if (type_name.endsWith("*")) {
214  return pointerType(deserializeType(type_name.substring(0, type_name.length() - 1)));
215  }
216  if (type_name.endsWith("&")) {
217  return deserializeType(type_name.substring(0, type_name.length() - 1).trim());
218  }
219  if (type_name.equals("Array<bool>")) {
220  return ExtensionFunction.ExtArgumentType.ArrayBool;
221  }
222  if (type_name.equals("Array<int8_t>") || type_name.equals("Array<char>")) {
223  return ExtensionFunction.ExtArgumentType.ArrayInt8;
224  }
225  if (type_name.equals("Array<int16_t>") || type_name.equals("Array<short>")) {
226  return ExtensionFunction.ExtArgumentType.ArrayInt16;
227  }
228  if (type_name.equals("Array<int32_t>") || type_name.equals("Array<int>")) {
229  return ExtensionFunction.ExtArgumentType.ArrayInt32;
230  }
231  if (type_name.equals("Array<int64_t>") || type_name.equals("Array<size_t>")
232  || type_name.equals("Array<long>")) {
233  return ExtensionFunction.ExtArgumentType.ArrayInt64;
234  }
235  if (type_name.equals("Array<float>")) {
236  return ExtensionFunction.ExtArgumentType.ArrayFloat;
237  }
238  if (type_name.equals("Array<double>")) {
239  return ExtensionFunction.ExtArgumentType.ArrayDouble;
240  }
241  if (type_name.equals("Array<bool>")) {
242  return ExtensionFunction.ExtArgumentType.ArrayBool;
243  }
244  if (type_name.equals("Array<TextEncodingDict>")) {
245  return ExtensionFunction.ExtArgumentType.ArrayTextEncodingDict;
246  }
247  if (type_name.equals("TextEncodingDict")) {
248  return ExtensionFunction.ExtArgumentType.TextEncodingDict;
249  }
250  if (type_name.equals("RowFunctionManager")) {
251  // RowFunctionManager is not actually materialized in udfs
252  // return void as a convenience here to not generate a warning
253  // in the line below
254  return ExtensionFunction.ExtArgumentType.Void;
255  }
256  if (type_name.equals("Timestamp")) {
257  return ExtensionFunction.ExtArgumentType.Timestamp;
258  }
259  if (type_name.equals("TextEncodingNone")) {
260  return ExtensionFunction.ExtArgumentType.TextEncodingNone;
261  }
262  if (type_name.equals("Column<int8_t>") || type_name.equals("Column<char>")) {
263  return ExtensionFunction.ExtArgumentType.ColumnInt8;
264  }
265  if (type_name.equals("Column<int16_t>") || type_name.equals("Column<short>")) {
266  return ExtensionFunction.ExtArgumentType.ColumnInt16;
267  }
268  if (type_name.equals("Column<int32_t>") || type_name.equals("Column<int>")) {
269  return ExtensionFunction.ExtArgumentType.ColumnInt32;
270  }
271  if (type_name.equals("Column<int64_t>") || type_name.equals("Column<size_t>")
272  || type_name.equals("Column<long>")) {
273  return ExtensionFunction.ExtArgumentType.ColumnInt64;
274  }
275  if (type_name.equals("Column<float>")) {
276  return ExtensionFunction.ExtArgumentType.ColumnFloat;
277  }
278  if (type_name.equals("Column<double>")) {
279  return ExtensionFunction.ExtArgumentType.ColumnDouble;
280  }
281  if (type_name.equals("Column<TextEncodingDict>")) {
282  return ExtensionFunction.ExtArgumentType.ColumnTextEncodingDict;
283  }
284  if (type_name.equals("Column<Timestamp>")) {
285  return ExtensionFunction.ExtArgumentType.ColumnTimestamp;
286  }
287  if (type_name.equals("Cursor")) {
288  return ExtensionFunction.ExtArgumentType.Cursor;
289  }
290  if (type_name.equals("GeoPoint")) {
291  return ExtensionFunction.ExtArgumentType.GeoPoint;
292  }
293  if (type_name.equals("GeoMultiPoint")) {
294  return ExtensionFunction.ExtArgumentType.GeoMultiPoint;
295  }
296  if (type_name.equals("GeoLineString")) {
297  return ExtensionFunction.ExtArgumentType.GeoLineString;
298  }
299  if (type_name.equals("GeoMultiLineString")) {
300  return ExtensionFunction.ExtArgumentType.GeoMultiLineString;
301  }
302  if (type_name.equals("GeoPolygon")) {
303  return ExtensionFunction.ExtArgumentType.GeoPolygon;
304  }
305  if (type_name.equals("GeoMultiPolygon")) {
306  return ExtensionFunction.ExtArgumentType.GeoMultiPolygon;
307  }
308  if (type_name.equals("ColumnList<int8_t>") || type_name.equals("ColumnList<char>")) {
309  return ExtensionFunction.ExtArgumentType.ColumnListInt8;
310  }
311  if (type_name.equals("ColumnList<int16_t>")
312  || type_name.equals("ColumnList<short>")) {
313  return ExtensionFunction.ExtArgumentType.ColumnListInt16;
314  }
315  if (type_name.equals("ColumnList<int32_t>") || type_name.equals("ColumnList<int>")) {
316  return ExtensionFunction.ExtArgumentType.ColumnListInt32;
317  }
318  if (type_name.equals("ColumnList<int64_t>") || type_name.equals("ColumnList<size_t>")
319  || type_name.equals("ColumnList<long>")) {
320  return ExtensionFunction.ExtArgumentType.ColumnListInt64;
321  }
322  if (type_name.equals("ColumnList<float>")) {
323  return ExtensionFunction.ExtArgumentType.ColumnListFloat;
324  }
325  if (type_name.equals("ColumnList<double>")) {
326  return ExtensionFunction.ExtArgumentType.ColumnListDouble;
327  }
328  if (type_name.equals("ColumnList<TextEncodingDict>")) {
329  return ExtensionFunction.ExtArgumentType.ColumnListTextEncodingDict;
330  }
331  HEAVYDBLOGGER.info(
332  "ExtensionfunctionSignatureParser::deserializeType: unknown type_name=`"
333  + type_name + "`");
334  // TODO: Return void for convenience. Consider sanitizing functions for supported
335  // types before they reach Calcite
336  return ExtensionFunction.ExtArgumentType.Void;
337  }
338 
339  private static ExtensionFunction.ExtArgumentType pointerType(
340  final ExtensionFunction.ExtArgumentType targetType) {
341  switch (targetType) {
342  case Bool:
343  return ExtensionFunction.ExtArgumentType.PBool;
344  case Int8:
345  return ExtensionFunction.ExtArgumentType.PInt8;
346  case Int16:
347  return ExtensionFunction.ExtArgumentType.PInt16;
348  case Int32:
349  return ExtensionFunction.ExtArgumentType.PInt32;
350  case Int64:
351  return ExtensionFunction.ExtArgumentType.PInt64;
352  case Float:
353  return ExtensionFunction.ExtArgumentType.PFloat;
354  case Double:
355  return ExtensionFunction.ExtArgumentType.PDouble;
356  default:
357  assert false;
358  return null;
359  }
360  }
361 
362  static String join(final List<String> strs, final String sep) {
363  StringBuilder sb = new StringBuilder();
364  if (strs.isEmpty()) {
365  return "";
366  }
367  sb.append(strs.get(0));
368  for (int i = 1; i < strs.size(); ++i) {
369  sb.append(sep).append(strs.get(i));
370  }
371  return sb.toString();
372  }
373 }
static Map< String, ExtensionFunction > parseFromString(final String udf_string)
size_t append(FILE *f, const size_t size, const int8_t *buf)
Appends the specified number of bytes to the end of the file f from buf.
Definition: File.cpp:168
static ExtensionFunction.ExtArgumentType pointerType(final ExtensionFunction.ExtArgumentType targetType)
std::vector< std::string > split(std::string_view str, std::string_view delim, std::optional< size_t > maxsplit)
split apart a string into a vector of substrings
static Map< String, ExtensionFunction > parseFromString(final String udf_string, final boolean is_row_func)
static ExtensionFunction toSignature(final String ret, final String cs_param_list, final boolean has_variable_name, final boolean is_row_func)
static Map< String, ExtensionFunction > parse(final String file_path)
tuple line
Definition: parse_ast.py:10
static String join(final List< String > strs, final String sep)
static ExtensionFunction toSignature(final String ret, final String cs_param_list, final boolean has_variable_name)
static String signaturesToJson(final Map< String, ExtensionFunction > sigs)
static ExtensionFunction.ExtArgumentType deserializeType(final String type_name)
static Map< String, ExtensionFunction > parseUdfAst(final String file_path)
string name
Definition: setup.in.py:72
constexpr auto type_name() noexcept