OmniSciDB  471d68cefb
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TableFunctionsFactory.h
Go to the documentation of this file.
1 /*
2  * Copyright 2019 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <string>
20 #include <vector>
21 
24 #include "Shared/toString.h"
26 
27 #define DEFAULT_ROW_MULTIPLIER_SUFFIX "__default_RowMultiplier_"
28 #define DEFAULT_ROW_MULTIPLIER_VALUE 1
29 #define REQUIRE_CHECK_SUFFIX "__require_check"
30 
31 /*
32 
33  TableFunction represents a User-Defined Table Function (UDTF) and it
34  holds the following information:
35 
36  - the name of a table function that corresponds to its
37  implementation. The name must match the following pattern:
38 
39  \w[\w\d_]*([_][_](gpu_|cpu_|)\d*|)
40 
41  where the first part left to the double underscore is the
42  so-called SQL name of table function that is used in SQL query
43  context, and the right part determines a particular implementation
44  of the table function. One can define many implementations for the
45  same SQL table function with specializations to
46 
47  + different argument types (overloading support)
48 
49  + different execution context, CPU or GPU. When gpu or cpu is not
50  present, the implementation is assumed to be valid for both CPU
51  and GPU contexts.
52 
53  - the output sizer parameter <sizer> that determines the allocated
54  size of the output columns:
55 
56  + UserSpecifiedRowMultiplier - the allocated column size will be
57 
58  <sizer value> * <size of the input columns>
59 
60  where <sizer value> is user-specified integer value as specified
61  in the <sizer> argument position of the table function call.
62 
63  + UserSpecifiedConstantParameter - the allocated column size will
64  be user-specified integer value as specified in the <sizer>
65  argument position of the table function call.
66 
67  + Constant - the allocated output column size will be <sizer>.
68 
69  + TableFunctionSpecifiedParameter - The table function
70  implementation must call resize to allocate output column
71  buffers. The <sizer> value is not used.
72 
73  The actual size of the output column is returned by the table
74  function implementation that must be equal or smaller to the
75  allocated output column size.
76 
77  - the list of input argument types. The input argument type can be a
78  scalar or a column type (that is `Column<scalar>`). Supported
79  scalar types are int8, ..., int64, double, float, bool.
80 
81  - the list of output argument types. The output types of table
82  functions is always some column type. Hence, the output argument
83  types are stored as scalar types that correspond to the data type
84  of the output columns.
85 
86  - a boolean flag specifying the table function is a load-time or
87  run-time function. Run-time functions can be overwitten or removed
88  by users. Load-time functions cannot be redefined in run-time.
89 
90  Future notes:
91 
92  - introduce a list of output column names. Currently, the names of
93  output columns match the pattern
94 
95  out\d+
96 
97  but for better UX it would be nice to enable user-defined names
98  for output columns.
99 
100  */
101 
102 namespace table_functions {
103 
106  const size_t val{0};
107 
108  public:
109  std::string toString() const {
110  switch (type) {
112  return "kUserSpecifiedConstantParameter[" + std::to_string(val) + "]";
114  return "kUserSpecifiedRowMultiplier[" + std::to_string(val) + "]";
116  return "kConstant[" + std::to_string(val) + "]";
118  return "kTableFunctionSpecifiedParameter[" + std::to_string(val) + "]";
119  }
120  return "";
121  }
122 };
123 
125  public:
126  TableFunction(const std::string& name,
127  const TableFunctionOutputRowSizer output_sizer,
128  const std::vector<ExtArgumentType>& input_args,
129  const std::vector<ExtArgumentType>& output_args,
130  const std::vector<ExtArgumentType>& sql_args,
131  const std::vector<std::map<std::string, std::string>>& annotations,
132  bool is_runtime,
133  bool uses_manager)
134  : name_(name)
135  , output_sizer_(output_sizer)
136  , input_args_(input_args)
137  , output_args_(output_args)
138  , sql_args_(sql_args)
139  , annotations_(annotations)
140  , is_runtime_(is_runtime)
141  , uses_manager_(uses_manager) {}
142 
143  std::vector<ExtArgumentType> getArgs(const bool ensure_column = false) const {
144  std::vector<ExtArgumentType> args;
145  args.insert(args.end(), input_args_.begin(), input_args_.end());
146  if (ensure_column) {
147  // map row dtype to column type
148  std::for_each(output_args_.begin(), output_args_.end(), [&args](auto t) {
149  args.push_back(ext_arg_type_ensure_column(t));
150  });
151  } else {
152  args.insert(args.end(), output_args_.begin(), output_args_.end());
153  }
154  return args;
155  }
156  const std::vector<ExtArgumentType>& getInputArgs() const { return input_args_; }
157  const std::vector<ExtArgumentType>& getOutputArgs() const { return output_args_; }
158  const std::vector<ExtArgumentType>& getSqlArgs() const { return sql_args_; }
159  const std::vector<std::map<std::string, std::string>>& getAnnotations() const {
160  return annotations_;
161  }
163 
164  SQLTypeInfo getInputSQLType(const size_t idx) const;
165  SQLTypeInfo getOutputSQLType(const size_t idx) const;
166 
167  int32_t countScalarArgs() const;
168 
169  auto getInputsSize() const { return input_args_.size(); }
170  auto getOutputsSize() const { return output_args_.size(); }
171 
172  std::string getName(const bool drop_suffix = false, const bool lower = false) const;
173 
174  auto getSignature() const {
175  return getName(/*drop_suffix=*/true, /*lower=*/true) + "(" +
177  }
178 
181  }
182 
185  }
186 
187  bool hasConstantOutputSize() const {
190  }
191 
194  }
195 
199  }
200 
204  }
205 
210  }
211 
216  }
217 
220  }
221 
223 
224  size_t getOutputRowSizeParameter() const { return output_sizer_.val; }
225 
226  bool requireInAnnotations() const;
227  std::string getRequireCheckName() const;
228 
229  const std::map<std::string, std::string>& getAnnotation(const size_t idx) const;
230  const std::map<std::string, std::string>& getInputAnnotation(
231  const size_t input_arg_idx) const;
232  const std::map<std::string, std::string>& getOutputAnnotation(
233  const size_t output_arg_idx) const;
234  const std::map<std::string, std::string>& getFunctionAnnotation() const;
235 
236  std::pair<int32_t, int32_t> getInputID(const size_t idx) const;
237 
238  size_t getSqlOutputRowSizeParameter() const;
239 
240  size_t getOutputRowSizeParameter(const std::vector<SQLTypeInfo>& variant) const {
241  auto val = output_sizer_.val;
243  size_t col_index = 0;
244  size_t func_arg_index = 0;
245  for (const auto& ti : variant) {
246  func_arg_index++;
247  if (ti.is_column_list()) {
248  col_index += ti.get_dimension();
249  } else {
250  col_index++;
251  }
252  if (func_arg_index == val) {
253  val = col_index;
254  break;
255  }
256  }
257  }
258  return val;
259  }
260 
261  bool isRuntime() const { return is_runtime_; }
262 
263  bool usesManager() const { return uses_manager_; }
264 
265  inline bool isGPU() const {
266  return !usesManager() && (name_.find("_cpu_", name_.find("__")) == std::string::npos);
267  }
268 
269  inline bool isCPU() const {
270  return usesManager() || (name_.find("_gpu_", name_.find("__")) == std::string::npos);
271  }
272 
273  inline bool useDefaultSizer() const {
274  // Functions that use a default sizer value have one less argument
275  return (name_.find("_default_", name_.find("__")) != std::string::npos);
276  }
277 
278  std::string toString() const {
279  auto result = "TableFunction(" + name_ + ", input_args=[";
281  result += "], output_args=[";
283  result += "], sql_args=[";
285  result += "], is_runtime=" + std::string((is_runtime_ ? "true" : "false"));
286  result += ", uses_manager=" + std::string((uses_manager_ ? "true" : "false"));
287  result += ", sizer=" + ::toString(output_sizer_);
288  result += ", annotations=[";
289  for (auto annotation : annotations_) {
290  if (annotation.empty()) {
291  result += "{}, ";
292  } else {
293  result += "{";
294  for (auto it : annotation) {
295  result += ::toString(it.first) + ": " + ::toString(it.second);
296  }
297  result += "}, ";
298  }
299  }
300  result += "])";
301  return result;
302  }
303 
304  std::string toStringSQL() const {
305  auto result = name_ + "(";
307  result += ") -> (";
309  result += ")";
310  return result;
311  }
312 
313  private:
314  const std::string name_;
316  const std::vector<ExtArgumentType> input_args_;
317  const std::vector<ExtArgumentType> output_args_;
318  const std::vector<ExtArgumentType> sql_args_;
319  const std::vector<std::map<std::string, std::string>> annotations_;
320  const bool is_runtime_;
321  const bool uses_manager_;
322 };
323 
325  public:
326  static void add(const std::string& name,
327  const TableFunctionOutputRowSizer sizer,
328  const std::vector<ExtArgumentType>& input_args,
329  const std::vector<ExtArgumentType>& output_args,
330  const std::vector<ExtArgumentType>& sql_args,
331  const std::vector<std::map<std::string, std::string>>& annotations,
332  bool is_runtime = false,
333  bool uses_manager = false);
334 
335  static std::vector<TableFunction> get_table_funcs(const std::string& name,
336  const bool is_gpu);
337  static std::vector<TableFunction> get_table_funcs(const bool is_runtime = false);
338  static void init();
339  static void reset();
340 
341  private:
342  static std::unordered_map<std::string, TableFunction> functions_;
343 
344  friend class ::ExtensionFunctionsWhitelist;
345 };
346 
347 } // namespace table_functions
SQLTypeInfo getOutputSQLType(const size_t idx) const
const TableFunctionOutputRowSizer output_sizer_
string name
Definition: setup.in.py:72
static void add(const std::string &name, const TableFunctionOutputRowSizer sizer, const std::vector< ExtArgumentType > &input_args, const std::vector< ExtArgumentType > &output_args, const std::vector< ExtArgumentType > &sql_args, const std::vector< std::map< std::string, std::string >> &annotations, bool is_runtime=false, bool uses_manager=false)
const std::map< std::string, std::string > & getInputAnnotation(const size_t input_arg_idx) const
TableFunction(const std::string &name, const TableFunctionOutputRowSizer output_sizer, const std::vector< ExtArgumentType > &input_args, const std::vector< ExtArgumentType > &output_args, const std::vector< ExtArgumentType > &sql_args, const std::vector< std::map< std::string, std::string >> &annotations, bool is_runtime, bool uses_manager)
const std::vector< std::map< std::string, std::string > > annotations_
const std::vector< ExtArgumentType > output_args_
std::pair< int32_t, int32_t > getInputID(const size_t idx) const
const std::map< std::string, std::string > & getFunctionAnnotation() const
std::string to_string(char const *&&v)
ExtArgumentType ext_arg_type_ensure_column(const ExtArgumentType ext_arg_type)
const std::vector< ExtArgumentType > sql_args_
SQLTypeInfo getInputSQLType(const size_t idx) const
std::vector< ExtArgumentType > getArgs(const bool ensure_column=false) const
const std::vector< ExtArgumentType > & getOutputArgs() const
const std::map< std::string, std::string > & getOutputAnnotation(const size_t output_arg_idx) const
std::string getName(const bool drop_suffix=false, const bool lower=false) const
const std::map< std::string, std::string > & getAnnotation(const size_t idx) const
const std::vector< ExtArgumentType > & getInputArgs() const
static std::string toString(const std::vector< ExtensionFunction > &ext_funcs, std::string tab="")
const std::vector< ExtArgumentType > & getSqlArgs() const
char * t
static std::vector< TableFunction > get_table_funcs(const std::string &name, const bool is_gpu)
static std::unordered_map< std::string, TableFunction > functions_
const std::vector< ExtArgumentType > input_args_
size_t getOutputRowSizeParameter(const std::vector< SQLTypeInfo > &variant) const
OutputBufferSizeType getOutputRowSizeType() const
const ExtArgumentType getRet() const
static std::string toStringSQL(const std::vector< ExtArgumentType > &sig_types)
const std::vector< std::map< std::string, std::string > > & getAnnotations() const