1 """Given a list of input files, scan for lines containing UDTF
2 specification statements in the following form:
4 UDTF: function_name(<arguments>) -> <output column types>
6 where <arguments> is a comma-separated list of argument types. The
7 argument types specifications are:
10 Int8, Int16, Int32, Int64, Float, Double, Bool, etc
12 ColumnInt8, ColumnInt16, ColumnInt32, ColumnInt64, ColumnFloat, ColumnDouble, ColumnBool, etc
15 where t0, t1 are column types
16 - output buffer size parameter type:
17 RowMultiplier<i>, ConstantParameter<i>, Constant<i>
18 where i is literal integer
20 The output column types is a comma-separated list of column types, see above.
22 In addition, the following equivalents are suppored:
24 Cursor<T, V, ...> == Cursor<ColumnT, ColumnV, ...>
25 int8 == int8_t == Int8, etc
26 float == Float, double == Double, bool == Bool
27 T == ColumnT for output column types
28 RowMultiplier == RowMultiplier<i> where i is the one-based position of the sizer argument
29 when no sizer argument is provided, Constant<1> is assumed
38 ExtArgumentTypes =
'''
39 Int8, Int16, Int32, Int64, Float, Double, Void, PInt8, PInt16, PInt32,
40 PInt64, PFloat, PDouble, PBool, Bool, ArrayInt8, ArrayInt16,
41 ArrayInt32, ArrayInt64, ArrayFloat, ArrayDouble, ArrayBool, GeoPoint,
42 GeoLineString, Cursor, GeoPolygon, GeoMultiPolygon, ColumnInt8,
43 ColumnInt16, ColumnInt32, ColumnInt64, ColumnFloat, ColumnDouble,
44 ColumnBool, TextEncodingNone, TextEncodingDict8, TextEncodingDict16,
48 OutputBufferSizeTypes =
'''
49 kConstant, kUserSpecifiedConstantParameter, kUserSpecifiedRowMultiplier
53 Constant =
'kConstant',
54 ConstantParameter =
'kUserSpecifiedConstantParameter',
55 RowMultiplier =
'kUserSpecifiedRowMultiplier',
56 UserSpecifiedConstantParameter =
'kUserSpecifiedConstantParameter',
57 UserSpecifiedRowMultiplier =
'kUserSpecifiedRowMultiplier',
62 for t
in [
'Int8',
'Int16',
'Int32',
'Int64',
'Float',
'Double',
'Bool']:
63 translate_map[t.lower()] = t
64 if t.startswith(
'Int'):
65 translate_map[t.lower() +
'_t'] = t
68 _is_int = re.compile(
r'\d+').match
73 assert a.endswith(
'>')
75 n = translate_map.get(n, n)
76 if n
in OutputBufferSizeTypes:
82 for t
in map(type_parse, a[i+1:-1].
split(
',')):
83 if 'Column' + t
in ExtArgumentTypes:
84 lst.append(
'Column' + t)
91 a = translate_map.get(a, a)
92 if a
in ExtArgumentTypes:
94 if a
in OutputBufferSizeTypes:
96 raise ValueError(
'Cannot parse `%s` to ExtArgumentTypes or OutputBufferSizeTypes' % (a,))
101 for input_file
in sys.argv[1:-1]:
102 for line
in open(input_file).readlines():
103 line = line.replace(
' ',
'').
strip()
104 if not line.startswith(
'UDTF:'):
109 if i == -1
or j == -1:
110 sys.stderr.write(
'Invalid UDTF specification: `%s`. Skipping.\n' % (line))
113 args_line = line[i+1:j]
115 if outputs.startswith(
'->'):
116 outputs = outputs[2:]
117 outputs = outputs.split(
',')
121 i = args_line.find(
',')
123 args.append(args_line)
125 j = args_line.find(
'<')
126 k = args_line.find(
'>')
128 args.append(args_line[:i])
129 args_line = args_line[i+1:]
132 args.append(args_line[:k+1])
133 args_line = args_line[k+1:].lstrip(
',')
139 for i, a
in enumerate(args):
142 except ValueError
as msg:
143 raise ValueError(
'`%s`: %s' % (line, msg))
144 if isinstance(r, str)
and r.startswith(
'Column'):
146 if isinstance(r, str):
147 input_types.append(r)
151 if n
in OutputBufferSizeTypes:
153 input_types.append(
'ExtArgumentType::Int32')
154 sql_types.append(
'ExtArgumentType::Int32')
155 if n ==
'kUserSpecifiedRowMultiplier':
158 assert t == str(i+1),
'Expected %s<%s> got %s<%s> from %s' % (n, i+1, n, t, a)
160 sizer =
'TableFunctionOutputRowSizer{OutputBufferSizeType::%s, %s}' % (n, t)
162 assert n ==
'Cursor', (a, r)
164 input_types.append(
'ExtArgumentType::%s' % (t_))
165 sql_types.append(
'ExtArgumentType::%s' % (n))
170 except ValueError
as msg:
171 raise ValueError(
'`%s`: %s' % (line, msg))
172 assert isinstance(r, str), (a, r)
173 if 'Column' + r
in ExtArgumentTypes:
175 output_types.append(
'ExtArgumentType::%s' % (r))
178 sizer =
'TableFunctionOutputRowSizer{OutputBufferSizeType::kConstant, 1}'
180 input_types =
'std::vector<ExtArgumentType>{%s}' % (
', '.
join(input_types))
181 output_types =
'std::vector<ExtArgumentType>{%s}' % (
', '.
join(output_types))
182 sql_types =
'std::vector<ExtArgumentType>{%s}' % (
', '.
join(sql_types))
183 add =
'TableFunctionsFactory::add("%s", %s, %s, %s, %s);' % (name, sizer, input_types, output_types, sql_types)
184 add_stmts.append(add)
189 This file is generated by %s. Do no edit!
192 #include "QueryEngine/TableFunctions/TableFunctionsFactory.h"
194 extern bool g_enable_table_functions;
196 namespace table_functions {
198 std::once_flag init_flag;
200 void TableFunctionsFactory::init() {
201 if (!g_enable_table_functions) {
204 std::call_once(init_flag, []() {
209 } // namespace table_functions
210 ''' % (sys.argv[0],
'\n '.
join(add_stmts))
212 output_filename = sys.argv[-1]
213 dirname = os.path.dirname(output_filename)
214 if not os.path.exists(dirname):
int open(const char *path, int flags, int mode)