OmniSciDB  c1a53651b2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TableFunctionsFactory.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <string>
20 #include <vector>
21 
24 #include "Shared/toString.h"
26 
27 #define DEFAULT_ROW_MULTIPLIER_SUFFIX "__default_RowMultiplier_"
28 #define DEFAULT_ROW_MULTIPLIER_VALUE 1
29 #define PREFLIGHT_SUFFIX "__preflight"
30 
31 /*
32 
33  TableFunction represents a User-Defined Table Function (UDTF) and it
34  holds the following information:
35 
36  - the name of a table function that corresponds to its
37  implementation. The name must match the following pattern:
38 
39  \w[\w\d_]*([_][_](gpu_|cpu_|)\d*|)
40 
41  where the first part left to the double underscore is the
42  so-called SQL name of table function that is used in SQL query
43  context, and the right part determines a particular implementation
44  of the table function. One can define many implementations for the
45  same SQL table function with specializations to
46 
47  + different argument types (overloading support)
48 
49  + different execution context, CPU or GPU. When gpu or cpu is not
50  present, the implementation is assumed to be valid for both CPU
51  and GPU contexts.
52 
53  - the output sizer parameter <sizer> that determines the allocated
54  size of the output columns:
55 
56  + UserSpecifiedRowMultiplier - the allocated column size will be
57 
58  <sizer value> * <size of the input columns>
59 
60  where <sizer value> is user-specified integer value as specified
61  in the <sizer> argument position of the table function call.
62 
63  + UserSpecifiedConstantParameter - the allocated column size will
64  be user-specified integer value as specified in the <sizer>
65  argument position of the table function call.
66 
67  + Constant - the allocated output column size will be <sizer>.
68 
69  + TableFunctionSpecifiedParameter - The table function
70  implementation must call resize to allocate output column
71  buffers. The <sizer> value is not used.
72 
73  The actual size of the output column is returned by the table
74  function implementation that must be equal or smaller to the
75  allocated output column size.
76 
77  - the list of input argument types. The input argument type can be a
78  scalar or a column type (that is `Column<scalar>`). Supported
79  scalar types are int8, ..., int64, double, float, bool.
80 
81  - the list of output argument types. The output types of table
82  functions is always some column type. Hence, the output argument
83  types are stored as scalar types that correspond to the data type
84  of the output columns.
85 
86  - a boolean flag specifying the table function is a load-time or
87  run-time function. Run-time functions can be overwitten or removed
88  by users. Load-time functions cannot be redefined in run-time.
89 
90  Future notes:
91 
92  - introduce a list of output column names. Currently, the names of
93  output columns match the pattern
94 
95  out\d+
96 
97  but for better UX it would be nice to enable user-defined names
98  for output columns.
99 
100  */
101 
102 namespace table_functions {
103 
106  const size_t val{0};
107 
108  public:
109  std::string toString() const {
110  switch (type) {
112  return "kUserSpecifiedConstantParameter[" + std::to_string(val) + "]";
114  return "kUserSpecifiedRowMultiplier[" + std::to_string(val) + "]";
116  return "kConstant[" + std::to_string(val) + "]";
118  return "kTableFunctionSpecifiedParameter[" + std::to_string(val) + "]";
120  return "kPreFlightParameter[" + std::to_string(val) + "]";
121  }
122  return "";
123  }
124 };
125 
127  public:
128  TableFunction(const std::string& name,
129  const TableFunctionOutputRowSizer output_sizer,
130  const std::vector<ExtArgumentType>& input_args,
131  const std::vector<ExtArgumentType>& output_args,
132  const std::vector<ExtArgumentType>& sql_args,
133  const std::vector<std::map<std::string, std::string>>& annotations,
134  bool is_runtime,
135  bool uses_manager)
136  : name_(name)
137  , output_sizer_(output_sizer)
138  , input_args_(input_args)
139  , output_args_(output_args)
140  , sql_args_(sql_args)
141  , annotations_(annotations)
142  , is_runtime_(is_runtime)
143  , uses_manager_(uses_manager) {}
144 
145  std::vector<ExtArgumentType> getArgs(const bool ensure_column = false) const {
146  std::vector<ExtArgumentType> args;
147  args.insert(args.end(), input_args_.begin(), input_args_.end());
148  if (ensure_column) {
149  // map row dtype to column type
150  std::for_each(output_args_.begin(), output_args_.end(), [&args](auto t) {
151  args.push_back(ext_arg_type_ensure_column(t));
152  });
153  } else {
154  args.insert(args.end(), output_args_.begin(), output_args_.end());
155  }
156  return args;
157  }
158  const std::vector<ExtArgumentType>& getInputArgs() const { return input_args_; }
159  const std::vector<ExtArgumentType>& getOutputArgs() const { return output_args_; }
160  const std::vector<ExtArgumentType>& getSqlArgs() const { return sql_args_; }
161  const std::vector<std::map<std::string, std::string>>& getAnnotations() const {
162  return annotations_;
163  }
165 
166  SQLTypeInfo getInputSQLType(const size_t idx) const;
167  SQLTypeInfo getOutputSQLType(const size_t idx) const;
168 
169  int32_t countScalarArgs() const;
170 
171  size_t getInputsSize() const { return input_args_.size(); }
172  size_t getOutputsSize() const { return output_args_.size(); }
173 
174  std::string getName(const bool drop_suffix = false, const bool lower = false) const;
175 
176  std::string getSignature(const bool include_name, const bool include_output) const;
177 
180  }
181 
182  bool hasPreFlightOutputSizer() const {
184  }
185 
188  }
189 
190  bool hasConstantOutputSize() const {
193  }
194 
197  }
198 
202  }
203 
208  }
209 
215  }
216 
221  }
222 
225  }
226 
228  for (auto& input : input_args_) {
229  if (input == ExtArgumentType::Timestamp ||
231  return true;
232  }
233  }
234  return false;
235  }
236 
238 
239  size_t getOutputRowSizeParameter() const { return output_sizer_.val; }
240 
241  bool containsPreFlightFn() const;
242  std::string getPreFlightFnName() const;
243 
244  const std::map<std::string, std::string> getAnnotations(const size_t idx) const;
245  const std::map<std::string, std::string> getInputAnnotations(
246  const size_t input_arg_idx) const;
247  const std::string getInputAnnotation(const size_t input_arg_idx,
248  const std::string& key,
249  const std::string& default_) const;
250  const std::map<std::string, std::string> getOutputAnnotations(
251  const size_t output_arg_idx) const;
252  const std::string getOutputAnnotation(const size_t output_arg_idx,
253  const std::string& key,
254  const std::string& default_) const;
255  const std::string getFunctionAnnotation(const std::string& key,
256  const std::string& default_) const;
257  const std::map<std::string, std::string> getFunctionAnnotations() const;
258 
259  const std::vector<std::string> getCursorFields(const size_t sql_idx) const;
260  const std::string getArgTypes(const bool use_input_args) const;
261  const std::string getArgNames(const bool use_input_args) const;
262  std::pair<int32_t, int32_t> getInputID(const size_t idx) const;
263 
264  size_t getSqlOutputRowSizeParameter() const;
265 
266  size_t getOutputRowSizeParameter(const std::vector<SQLTypeInfo>& variant) const {
267  auto val = output_sizer_.val;
269  size_t col_index = 0;
270  size_t func_arg_index = 0;
271  for (const auto& ti : variant) {
272  func_arg_index++;
273  if (ti.is_column_list()) {
274  col_index += ti.get_dimension();
275  } else {
276  col_index++;
277  }
278  if (func_arg_index == val) {
279  val = col_index;
280  break;
281  }
282  }
283  }
284  return val;
285  }
286 
287  inline bool isRuntime() const { return is_runtime_; }
288 
289  inline bool usesManager() const { return uses_manager_; }
290 
291  inline bool isGPU() const {
292  return !usesManager() && (name_.find("_cpu_", name_.find("__")) == std::string::npos);
293  }
294 
295  inline bool isCPU() const {
296  return usesManager() || (name_.find("_gpu_", name_.find("__")) == std::string::npos);
297  }
298 
299  inline bool useDefaultSizer() const {
300  // Functions that use a default sizer value have one less argument
301  return (name_.find("_default_", name_.find("__")) != std::string::npos);
302  }
303 
304  std::string toString() const {
305  auto result = "TableFunction(" + name_ + ", input_args=[";
307  result += "], output_args=[";
309  result += "], sql_args=[";
311  result += "], is_runtime=" + std::string((is_runtime_ ? "true" : "false"));
312  result += ", uses_manager=" + std::string((uses_manager_ ? "true" : "false"));
313  result += ", sizer=" + ::toString(output_sizer_);
314  result += ", annotations=[";
315  for (auto annotation : annotations_) {
316  if (annotation.empty()) {
317  result += "{}, ";
318  } else {
319  result += "{";
320  for (auto it : annotation) {
321  result += ::toString(it.first) + ": " + ::toString(it.second);
322  }
323  result += "}, ";
324  }
325  }
326  result += "])";
327  return result;
328  }
329 
330  std::string toStringSQL() const {
331  auto result = name_ + "(";
333  result += ") -> (";
335  result += ")";
336  return result;
337  }
338 
339  private:
340  const std::string name_;
342  const std::vector<ExtArgumentType> input_args_;
343  const std::vector<ExtArgumentType> output_args_;
344  const std::vector<ExtArgumentType> sql_args_;
345  const std::vector<std::map<std::string, std::string>> annotations_;
346  const bool is_runtime_;
347  const bool uses_manager_;
348 };
349 
351  public:
352  static void add(const std::string& name,
353  const TableFunctionOutputRowSizer sizer,
354  const std::vector<ExtArgumentType>& input_args,
355  const std::vector<ExtArgumentType>& output_args,
356  const std::vector<ExtArgumentType>& sql_args,
357  const std::vector<std::map<std::string, std::string>>& annotations,
358  bool is_runtime = false);
359 
360  static std::vector<TableFunction> get_table_funcs(const std::string& name,
361  const bool is_gpu);
362  static std::vector<TableFunction> get_table_funcs(const std::string& name);
363  static std::vector<TableFunction> get_table_funcs(const bool is_runtime);
364  static std::vector<TableFunction> get_table_funcs();
365  static void init();
366  static void reset();
367 
368  private:
369  static std::unordered_map<std::string, TableFunction> functions_;
370 
371  friend class ::ExtensionFunctionsWhitelist;
372 };
373 
374 } // namespace table_functions
SQLTypeInfo getOutputSQLType(const size_t idx) const
const std::string getOutputAnnotation(const size_t output_arg_idx, const std::string &key, const std::string &default_) const
static std::vector< TableFunction > get_table_funcs()
static void add(const std::string &name, const TableFunctionOutputRowSizer sizer, const std::vector< ExtArgumentType > &input_args, const std::vector< ExtArgumentType > &output_args, const std::vector< ExtArgumentType > &sql_args, const std::vector< std::map< std::string, std::string >> &annotations, bool is_runtime=false)
const TableFunctionOutputRowSizer output_sizer_
const std::map< std::string, std::string > getFunctionAnnotations() const
TableFunction(const std::string &name, const TableFunctionOutputRowSizer output_sizer, const std::vector< ExtArgumentType > &input_args, const std::vector< ExtArgumentType > &output_args, const std::vector< ExtArgumentType > &sql_args, const std::vector< std::map< std::string, std::string >> &annotations, bool is_runtime, bool uses_manager)
const std::vector< std::map< std::string, std::string > > annotations_
const std::vector< ExtArgumentType > output_args_
std::pair< int32_t, int32_t > getInputID(const size_t idx) const
const std::string getFunctionAnnotation(const std::string &key, const std::string &default_) const
std::string to_string(char const *&&v)
ExtArgumentType ext_arg_type_ensure_column(const ExtArgumentType ext_arg_type)
std::string getSignature(const bool include_name, const bool include_output) const
const std::vector< ExtArgumentType > sql_args_
Supported runtime functions management and retrieval.
SQLTypeInfo getInputSQLType(const size_t idx) const
const std::string getArgNames(const bool use_input_args) const
std::vector< ExtArgumentType > getArgs(const bool ensure_column=false) const
const std::map< std::string, std::string > getOutputAnnotations(const size_t output_arg_idx) const
const std::vector< ExtArgumentType > & getOutputArgs() const
const std::string getInputAnnotation(const size_t input_arg_idx, const std::string &key, const std::string &default_) const
std::string getName(const bool drop_suffix=false, const bool lower=false) const
const std::string getArgTypes(const bool use_input_args) const
const std::vector< std::string > getCursorFields(const size_t sql_idx) const
const std::vector< ExtArgumentType > & getInputArgs() const
const std::map< std::string, std::string > getInputAnnotations(const size_t input_arg_idx) const
static std::string toString(const std::vector< ExtensionFunction > &ext_funcs, std::string tab="")
const std::vector< ExtArgumentType > & getSqlArgs() const
static std::unordered_map< std::string, TableFunction > functions_
const std::vector< ExtArgumentType > input_args_
string name
Definition: setup.in.py:72
size_t getOutputRowSizeParameter(const std::vector< SQLTypeInfo > &variant) const
OutputBufferSizeType getOutputRowSizeType() const
const ExtArgumentType getRet() const
static std::string toStringSQL(const std::vector< ExtArgumentType > &sig_types)
const std::vector< std::map< std::string, std::string > > & getAnnotations() const