OmniSciDB  fe05a0c208
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
generate_TableFunctionsFactory_init.py
Go to the documentation of this file.
1 """Given a list of input files, scan for lines containing UDTF
2 specification statements in the following form:
3 
4  UDTF: function_name(<arguments>) -> <output column types>
5 
6 where <arguments> is a comma-separated list of argument types. The
7 argument types specifications are:
8 
9 - scalar types:
10  Int8, Int16, Int32, Int64, Float, Double, Bool, etc
11 - column types:
12  ColumnInt8, ColumnInt16, ColumnInt32, ColumnInt64, ColumnFloat, ColumnDouble, ColumnBool, etc
13 - column list types:
14  ColumnListInt8, ColumnListInt16, ColumnListInt32, ColumnListInt64, ColumnListFloat, ColumnListDouble, ColumnListBool, etc
15 - cursor type:
16  Cursor<t0, t1, ...>
17  where t0, t1 are column or column list types
18 - output buffer size parameter type:
19  RowMultiplier<i>, ConstantParameter<i>, Constant<i>
20  where i is literal integer
21 
22 The output column types is a comma-separated list of column types, see above.
23 
24 In addition, the following equivalents are suppored:
25  Column<T> == ColumnT
26  ColumnList<T> == ColumnListT
27  Cursor<T, V, ...> == Cursor<ColumnT, ColumnV, ...>
28  int8 == int8_t == Int8, etc
29  float == Float, double == Double, bool == Bool
30  T == ColumnT for output column types
31  RowMultiplier == RowMultiplier<i> where i is the one-based position of the sizer argument
32  when no sizer argument is provided, Constant<1> is assumed
33 """
34 # Author: Pearu Peterson
35 # Created: January 2021
36 
37 import os
38 import re
39 import sys
40 
41 ExtArgumentTypes = ''' Int8, Int16, Int32, Int64, Float, Double, Void, PInt8, PInt16,
42 PInt32, PInt64, PFloat, PDouble, PBool, Bool, ArrayInt8, ArrayInt16,
43 ArrayInt32, ArrayInt64, ArrayFloat, ArrayDouble, ArrayBool, GeoPoint,
44 GeoLineString, Cursor, GeoPolygon, GeoMultiPolygon, ColumnInt8,
45 ColumnInt16, ColumnInt32, ColumnInt64, ColumnFloat, ColumnDouble,
46 ColumnBool, TextEncodingNone, TextEncodingDict8, TextEncodingDict16,
47 TextEncodingDict32, ColumnListInt8, ColumnListInt16, ColumnListInt32, ColumnListInt64,
48 ColumnListFloat, ColumnListDouble, ColumnListBool '''.strip().replace(' ', '').split(',')
49 
50 OutputBufferSizeTypes = '''
51 kConstant, kUserSpecifiedConstantParameter, kUserSpecifiedRowMultiplier
52 '''.strip().replace(' ', '').split(',')
53 
54 translate_map = dict(
55  Constant = 'kConstant',
56  ConstantParameter = 'kUserSpecifiedConstantParameter',
57  RowMultiplier = 'kUserSpecifiedRowMultiplier',
58  UserSpecifiedConstantParameter = 'kUserSpecifiedConstantParameter',
59  UserSpecifiedRowMultiplier = 'kUserSpecifiedRowMultiplier',
60  short = 'Int16',
61  int = 'Int32',
62  long = 'Int64',
63 )
64 for t in ['Int8', 'Int16', 'Int32', 'Int64', 'Float', 'Double', 'Bool']:
65  translate_map[t.lower()] = t
66  if t.startswith('Int'):
67  translate_map[t.lower() + '_t'] = t
68 
69 
70 _is_int = re.compile(r'\d+').match
71 
72 def type_parse(a):
73  i = a.find('<')
74  if i >= 0:
75  assert a.endswith('>'), a
76  n = a[:i]
77  n = translate_map.get(n, n)
78  if n in OutputBufferSizeTypes:
79  v = a[i+1:-1]
80  assert _is_int(v)
81  return n, v
82  if n == 'Cursor':
83  lst = []
84  # map Cursor<T> to Cursor<ColumnT>
85  for t in map(type_parse, a[i+1:-1].split(',')):
86  if 'Column' + t in ExtArgumentTypes:
87  lst.append('Column' + t)
88  else:
89  lst.append(t)
90  return n, tuple(lst)
91  if n == 'Column':
92  return n + type_parse(a[i+1:-1])
93  if n == 'ColumnList':
94  return n + type_parse(a[i+1:-1])
95  else:
96  a = translate_map.get(a, a)
97  if a in ExtArgumentTypes:
98  return a
99  if a in OutputBufferSizeTypes:
100  return a, None
101  raise ValueError('Cannot parse `%s` to ExtArgumentTypes or OutputBufferSizeTypes' % (a,))
102 
103 
104 def find_comma(line):
105  d = 0
106  for i, c in enumerate(line):
107  if c in '<([{':
108  d += 1
109  elif c in '>)]{':
110  d -= 1
111  elif d == 0 and c == ',':
112  return i
113  return -1
114 
115 add_stmts = []
116 
117 for input_file in sys.argv[1:-1]:
118 
119  last_line = None
120  for line in open(input_file).readlines():
121  line = line.replace(' ', '').strip()
122  if not line.startswith('UDTF:'):
123  continue
124  if last_line is not None:
125  line = last_line + line
126  if line.endswith(','):
127  last_line = line
128  continue
129  last_line = None
130  line = line[5:]
131  i = line.find('(')
132  j = line.find(')')
133  if i == -1 or j == -1:
134  sys.stderr.write('Invalid UDTF specification: `%s`. Skipping.\n' % (line))
135  continue
136  name = line[:i]
137  args_line = line[i+1:j]
138  outputs = line[j+1:]
139  if outputs.startswith('->'):
140  outputs = outputs[2:]
141  outputs = outputs.split(',')
142  args = []
143  while args_line:
144  i = args_line.find(',')
145  i = find_comma(args_line)
146  if i == -1:
147  args.append(args_line)
148  break
149  j = args_line.find('<', 0, i)
150  k = args_line.rfind('>', 0, i)
151  if j == -1 or i < j:
152  args.append(args_line[:i])
153  args_line = args_line[i+1:]
154  else:
155  assert k != -1
156  args.append(args_line[:k+1])
157  args_line = args_line[k+1:].lstrip(',')
158 
159  input_types = []
160  output_types = []
161  sql_types = []
162  sizer = None
163  consumed_nargs = 0
164  for i, a in enumerate(args):
165  try:
166  r = type_parse(a)
167  except (ValueError, AssertionError) as msg:
168  raise ValueError('`%s`: %s' % (line, msg))
169  if isinstance(r, str) and r.startswith('Column'):
170  r = 'Cursor', (r,)
171  if isinstance(r, str):
172  assert r in ExtArgumentTypes, r
173  r = 'ExtArgumentType::' + r
174  input_types.append(r)
175  sql_types.append(r)
176  consumed_nargs += 1
177  else:
178  n, t = r
179  if n in OutputBufferSizeTypes:
180  if n != 'kConstant':
181  input_types.append('ExtArgumentType::Int32')
182  sql_types.append('ExtArgumentType::Int32')
183  if n == 'kUserSpecifiedRowMultiplier':
184  if not t:
185  t = str(consumed_nargs + 1)
186  assert t == str(consumed_nargs+1), 'Expected %s<%s> got %s<%s> from %s' % (n, consumed_nargs+1, n, t, a)
187  assert sizer is None # exactly one sizer argument is allowed
188  sizer = 'TableFunctionOutputRowSizer{OutputBufferSizeType::%s, %s}' % (n, t)
189  else:
190  assert n == 'Cursor', (a, r)
191  for t_ in t:
192  input_types.append('ExtArgumentType::%s' % (t_))
193  consumed_nargs += 1
194  sql_types.append('ExtArgumentType::%s' % (n))
195 
196  for a in outputs:
197  try:
198  r = type_parse(a)
199  except (ValueError, AssertionError) as msg:
200  raise ValueError('`%s`: %s' % (line, msg))
201  assert isinstance(r, str), (a, r)
202  # map T to ColumnT
203  if 'Column' + r in ExtArgumentTypes:
204  r = 'Column' + r
205  output_types.append('ExtArgumentType::%s' % (r))
206 
207  if sizer is None:
208  sizer = 'TableFunctionOutputRowSizer{OutputBufferSizeType::kConstant, 1}'
209 
210  input_types = 'std::vector<ExtArgumentType>{%s}' % (', '.join(input_types))
211  output_types = 'std::vector<ExtArgumentType>{%s}' % (', '.join(output_types))
212  sql_types = 'std::vector<ExtArgumentType>{%s}' % (', '.join(sql_types))
213  add = 'TableFunctionsFactory::add("%s", %s, %s, %s, %s);' % (name, sizer, input_types, output_types, sql_types)
214  add_stmts.append(add)
215 
216 content = '''
217 /*
218  This file is generated by %s. Do no edit!
219 */
220 
221 #include "QueryEngine/TableFunctions/TableFunctionsFactory.h"
222 
223 extern bool g_enable_table_functions;
224 
225 namespace table_functions {
226 
227 std::once_flag init_flag;
228 
229 void TableFunctionsFactory::init() {
230  if (!g_enable_table_functions) {
231  return;
232  }
233  std::call_once(init_flag, []() {
234  %s
235  });
236 }
237 
238 } // namespace table_functions
239 ''' % (sys.argv[0], '\n '.join(add_stmts))
240 
241 output_filename = sys.argv[-1]
242 dirname = os.path.dirname(output_filename)
243 if not os.path.exists(dirname):
244  os.makedirs(dirname)
245 
246 f = open(output_filename, 'w')
247 f.write(content)
248 f.close()
int open(const char *path, int flags, int mode)
Definition: omnisci_fs.cpp:64
std::string strip(std::string_view str)
trim any whitespace from the left and right ends of a string
std::string join(T const &container, std::string const &delim)
std::vector< std::string > split(std::string_view str, std::string_view delim, std::optional< size_t > maxsplit)
split apart a string into a vector of substrings