OmniSciDB  8fa3bf436f
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
TableFunctionsFactory.h
Go to the documentation of this file.
1 /*
2  * Copyright 2019 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <string>
20 #include <vector>
21 
24 #include "Shared/toString.h"
26 
27 #define DEFAULT_ROW_MULTIPLIER_SUFFIX "__default_RowMultiplier_"
28 #define DEFAULT_ROW_MULTIPLIER_VALUE 1
29 
30 /*
31 
32  TableFunction represents a User-Defined Table Function (UDTF) and it
33  holds the following information:
34 
35  - the name of a table function that corresponds to its
36  implementation. The name must match the following pattern:
37 
38  \w[\w\d_]*([_][_](gpu_|cpu_|)\d*|)
39 
40  where the first part left to the double underscore is the
41  so-called SQL name of table function that is used in SQL query
42  context, and the right part determines a particular implementation
43  of the table function. One can define many implementations for the
44  same SQL table function with specializations to
45 
46  + different argument types (overloading support)
47 
48  + different execution context, CPU or GPU. When gpu or cpu is not
49  present, the implementation is assumed to be valid for both CPU
50  and GPU contexts.
51 
52  - the output sizer parameter <sizer> that determines the allocated
53  size of the output columns:
54 
55  + UserSpecifiedRowMultiplier - the allocated column size will be
56 
57  <sizer value> * <size of the input columns>
58 
59  where <sizer value> is user-specified integer value as specified
60  in the <sizer> argument position of the table function call.
61 
62  + UserSpecifiedConstantParameter - the allocated column size will
63  be user-specified integer value as specified in the <sizer>
64  argument position of the table function call.
65 
66  + Constant - the allocated output column size will be <sizer>. The
67  table function
68 
69  The actual size of the output column is returned by the table
70  function implementation that must be equal or smaller to the
71  allocated output column size.
72 
73  - the list of input argument types. The input argument type can be a
74  scalar or a column type (that is `Column<scalar>`). Supported
75  scalar types are int8, ..., int64, double, float, bool.
76 
77  - the list of output argument types. The output types of table
78  functions is always some column type. Hence, the output argument
79  types are stored as scalar types that correspond to the data type
80  of the output columns.
81 
82  - a boolean flag specifying the table function is a load-time or
83  run-time function. Run-time functions can be overwitten or removed
84  by users. Load-time functions cannot be redefined in run-time.
85 
86  Future notes:
87 
88  - introduce a list of output column names. Currently, the names of
89  output columns match the pattern
90 
91  out\d+
92 
93  but for better UX it would be nice to enable user-defined names
94  for output columns.
95 
96  */
97 
98 namespace table_functions {
99 
102  const size_t val{0};
103 
104  public:
105  std::string toString() const {
106  switch (type) {
108  return "kUserSpecifiedConstantParameter[" + std::to_string(val) + "]";
110  return "kUserSpecifiedRowMultiplier[" + std::to_string(val) + "]";
112  return "kConstant[" + std::to_string(val) + "]";
113  }
114  return "";
115  }
116 };
117 
119  public:
120  TableFunction(const std::string& name,
121  const TableFunctionOutputRowSizer output_sizer,
122  const std::vector<ExtArgumentType>& input_args,
123  const std::vector<ExtArgumentType>& output_args,
124  const std::vector<ExtArgumentType>& sql_args,
125  bool is_runtime)
126  : name_(name)
127  , output_sizer_(output_sizer)
128  , input_args_(input_args)
129  , output_args_(output_args)
130  , sql_args_(sql_args)
131  , is_runtime_(is_runtime) {}
132 
133  std::vector<ExtArgumentType> getArgs(const bool ensure_column = false) const {
134  std::vector<ExtArgumentType> args;
135  args.insert(args.end(), input_args_.begin(), input_args_.end());
136  if (ensure_column) {
137  // map row dtype to column type
138  std::for_each(output_args_.begin(), output_args_.end(), [&args](auto t) {
139  args.push_back(ext_arg_type_ensure_column(t));
140  });
141  } else {
142  args.insert(args.end(), output_args_.begin(), output_args_.end());
143  }
144  return args;
145  }
146  const std::vector<ExtArgumentType>& getInputArgs() const { return input_args_; }
147  const std::vector<ExtArgumentType>& getOutputArgs() const { return output_args_; }
148  const std::vector<ExtArgumentType>& getSqlArgs() const { return sql_args_; }
150 
151  SQLTypeInfo getInputSQLType(const size_t idx) const;
152  SQLTypeInfo getOutputSQLType(const size_t idx) const;
153 
154  int32_t countScalarArgs() const;
155 
156  auto getInputsSize() const { return input_args_.size(); }
157  auto getOutputsSize() const { return output_args_.size(); }
158 
159  std::string getName(const bool drop_suffix = false, const bool lower = false) const;
160 
161  auto getSignature() const {
162  return getName(/*drop_suffix=*/true, /*lower=*/true) + "(" +
164  }
165 
168  }
169 
172  }
173 
176  }
177 
179 
180  size_t getOutputRowSizeParameter() const { return output_sizer_.val; }
181 
182  size_t getSqlOutputRowSizeParameter() const;
183 
184  size_t getOutputRowSizeParameter(const std::vector<SQLTypeInfo>& variant) const {
185  auto val = output_sizer_.val;
187  size_t col_index = 0;
188  size_t func_arg_index = 0;
189  for (const auto& ti : variant) {
190  func_arg_index++;
191  if (ti.is_column_list()) {
192  col_index += ti.get_dimension();
193  } else {
194  col_index++;
195  }
196  if (func_arg_index == val) {
197  val = col_index;
198  break;
199  }
200  }
201  }
202  return val;
203  }
204 
205  bool isRuntime() const { return is_runtime_; }
206 
207  inline bool isGPU() const {
208  return (name_.find("_cpu_", name_.find("__")) == std::string::npos);
209  }
210 
211  inline bool isCPU() const {
212  return (name_.find("_gpu_", name_.find("__")) == std::string::npos);
213  }
214 
215  inline bool useDefaultSizer() const {
216  // Functions that use a default sizer value have one less argument
217  return (name_.find("_default_", name_.find("__")) != std::string::npos);
218  }
219 
220  std::string toString() const {
221  auto result = "TableFunction(" + name_ + ", input_args=[";
223  result += "], output_args=[";
225  result += "], sql_args=[";
227  result += "], is_runtime=" + std::string((is_runtime_ ? "true" : "false"));
228  result += ", sizer=" + ::toString(output_sizer_);
229  result += ")";
230  return result;
231  }
232 
233  std::string toStringSQL() const {
234  auto result = name_ + "(";
236  result += ") -> (";
238  result += ")";
239  return result;
240  }
241 
242  private:
243  const std::string name_;
245  const std::vector<ExtArgumentType> input_args_;
246  const std::vector<ExtArgumentType> output_args_;
247  const std::vector<ExtArgumentType> sql_args_;
248  const bool is_runtime_;
249 };
250 
252  public:
253  static void add(const std::string& name,
255  const std::vector<ExtArgumentType>& input_args,
256  const std::vector<ExtArgumentType>& output_args,
257  const std::vector<ExtArgumentType>& sql_args,
258  bool is_runtime = false);
259 
260  static std::vector<TableFunction> get_table_funcs(const std::string& name,
261  const bool is_gpu);
262  static std::vector<TableFunction> get_table_funcs(const bool is_runtime = false);
263  static void init();
264  static void reset();
265 
266  private:
267  static std::unordered_map<std::string, TableFunction> functions_;
268 
269  friend class ::ExtensionFunctionsWhitelist;
270 };
271 
272 } // namespace table_functions
SQLTypeInfo getOutputSQLType(const size_t idx) const
const TableFunctionOutputRowSizer output_sizer_
string name
Definition: setup.in.py:72
const std::vector< ExtArgumentType > output_args_
std::string to_string(char const *&&v)
ExtArgumentType ext_arg_type_ensure_column(const ExtArgumentType ext_arg_type)
const std::vector< ExtArgumentType > sql_args_
SQLTypeInfo getInputSQLType(const size_t idx) const
std::vector< ExtArgumentType > getArgs(const bool ensure_column=false) const
const std::vector< ExtArgumentType > & getOutputArgs() const
std::string getName(const bool drop_suffix=false, const bool lower=false) const
TableFunction(const std::string &name, const TableFunctionOutputRowSizer output_sizer, const std::vector< ExtArgumentType > &input_args, const std::vector< ExtArgumentType > &output_args, const std::vector< ExtArgumentType > &sql_args, bool is_runtime)
static void add(const std::string &name, const TableFunctionOutputRowSizer sizer, const std::vector< ExtArgumentType > &input_args, const std::vector< ExtArgumentType > &output_args, const std::vector< ExtArgumentType > &sql_args, bool is_runtime=false)
const std::vector< ExtArgumentType > & getInputArgs() const
static std::string toString(const std::vector< ExtensionFunction > &ext_funcs, std::string tab="")
const std::vector< ExtArgumentType > & getSqlArgs() const
char * t
static std::vector< TableFunction > get_table_funcs(const std::string &name, const bool is_gpu)
static std::unordered_map< std::string, TableFunction > functions_
const std::vector< ExtArgumentType > input_args_
size_t getOutputRowSizeParameter(const std::vector< SQLTypeInfo > &variant) const
OutputBufferSizeType getOutputRowSizeType() const
const ExtArgumentType getRet() const
static std::string toStringSQL(const std::vector< ExtArgumentType > &sig_types)