OmniSciDB  471d68cefb
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
anonymous_namespace{ExtensionFunctionsBinding.cpp} Namespace Reference

Functions

ExtArgumentType get_column_arg_elem_type (const ExtArgumentType ext_arg_column_type)
 
ExtArgumentType get_column_list_arg_elem_type (const ExtArgumentType ext_arg_column_list_type)
 
ExtArgumentType get_array_arg_elem_type (const ExtArgumentType ext_arg_array_type)
 
static int match_numeric_argument (const SQLTypeInfo &arg_type_info, const bool is_arg_literal, const ExtArgumentType &sig_ext_arg_type, int32_t &penalty_score)
 
static int match_arguments (const SQLTypeInfo &arg_type, const bool is_arg_literal, int sig_pos, const std::vector< ExtArgumentType > &sig_types, int &penalty_score)
 
bool is_valid_identifier (std::string str)
 

Function Documentation

ExtArgumentType anonymous_namespace{ExtensionFunctionsBinding.cpp}::get_array_arg_elem_type ( const ExtArgumentType  ext_arg_array_type)

Definition at line 85 of file ExtensionFunctionsBinding.cpp.

References ArrayBool, ArrayDouble, ArrayFloat, ArrayInt16, ArrayInt32, ArrayInt64, ArrayInt8, Bool, Double, Float, Int16, Int32, Int64, Int8, and UNREACHABLE.

Referenced by match_arguments().

+ Here is the caller graph for this function:

ExtArgumentType anonymous_namespace{ExtensionFunctionsBinding.cpp}::get_column_arg_elem_type ( const ExtArgumentType  ext_arg_column_type)

Definition at line 36 of file ExtensionFunctionsBinding.cpp.

References Bool, ColumnBool, ColumnDouble, ColumnFloat, ColumnInt16, ColumnInt32, ColumnInt64, ColumnInt8, ColumnTextEncodingDict, Double, Float, Int16, Int32, Int64, Int8, TextEncodingDict, and UNREACHABLE.

Referenced by match_arguments().

+ Here is the caller graph for this function:

ExtArgumentType anonymous_namespace{ExtensionFunctionsBinding.cpp}::get_column_list_arg_elem_type ( const ExtArgumentType  ext_arg_column_list_type)

Definition at line 60 of file ExtensionFunctionsBinding.cpp.

References Bool, ColumnListBool, ColumnListDouble, ColumnListFloat, ColumnListInt16, ColumnListInt32, ColumnListInt64, ColumnListInt8, ColumnListTextEncodingDict, Double, Float, Int16, Int32, Int64, Int8, TextEncodingDict, and UNREACHABLE.

Referenced by match_arguments().

+ Here is the caller graph for this function:

bool anonymous_namespace{ExtensionFunctionsBinding.cpp}::is_valid_identifier ( std::string  str)

Definition at line 402 of file ExtensionFunctionsBinding.cpp.

References i.

Referenced by bind_function().

402  {
403  if (!str.size()) {
404  return false;
405  }
406 
407  if (!(std::isalpha(str[0]) || str[0] == '_')) {
408  return false;
409  }
410 
411  for (size_t i = 1; i < str.size(); i++) {
412  if (!(std::isalnum(str[i]) || str[i] == '_')) {
413  return false;
414  }
415  }
416 
417  return true;
418 }

+ Here is the caller graph for this function:

static int anonymous_namespace{ExtensionFunctionsBinding.cpp}::match_arguments ( const SQLTypeInfo arg_type,
const bool  is_arg_literal,
int  sig_pos,
const std::vector< ExtArgumentType > &  sig_types,
int &  penalty_score 
)
static

Definition at line 199 of file ExtensionFunctionsBinding.cpp.

References CHECK, ext_arg_type_to_type_info(), GeoLineString, GeoMultiPolygon, GeoPoint, GeoPolygon, get_array_arg_elem_type(), get_column_arg_elem_type(), get_column_list_arg_elem_type(), SQLTypeInfo::get_compression(), SQLTypeInfo::get_elem_type(), SQLTypeInfo::get_type(), SQLTypeInfo::get_type_name(), Int64, SQLTypeInfo::is_array(), is_ext_arg_type_array(), is_ext_arg_type_column(), is_ext_arg_type_column_list(), kARRAY, kBIGINT, kBOOLEAN, kCOLUMN, kCOLUMN_LIST, kDECIMAL, kDOUBLE, kENCODING_DICT, kENCODING_NONE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kNULLT, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTINYINT, kVARCHAR, match_numeric_argument(), PBool, PDouble, PFloat, PInt16, PInt32, PInt64, PInt8, TextEncodingNone, to_string(), ExtensionFunctionsWhitelist::toString(), and UNREACHABLE.

Referenced by bind_function().

203  {
204  /*
205  Returns non-negative integer `offset` if `arg_type` and
206  `sig_types[sig_pos:sig_pos + offset]` match.
207 
208  The `offset` value can be interpreted as the number of extension
209  function arguments that is consumed by the given `arg_type`. For
210  instance, for scalar types the offset is always 1, for array
211  types the offset is 2: one argument for array pointer value and
212  one argument for the array size value, etc.
213 
214  Returns -1 when the types of an argument and the corresponding
215  extension function argument(s) mismatch, or when downcasting would
216  be effective.
217 
218  In case of non-negative `offset` result, the function updates
219  penalty_score argument as follows:
220 
221  add 1000 if arg_type is non-scalar, otherwise:
222  add 1000 * sizeof(sig_type) / sizeof(arg_type)
223  add 1000000 if type kinds differ (integer vs double, for instance)
224 
225  */
226  int max_pos = sig_types.size() - 1;
227  if (sig_pos > max_pos) {
228  return -1;
229  }
230  auto sig_type = sig_types[sig_pos];
231  switch (arg_type.get_type()) {
232  case kBOOLEAN:
233  case kTINYINT:
234  case kSMALLINT:
235  case kINT:
236  case kBIGINT:
237  case kFLOAT:
238  case kDOUBLE:
239  case kDECIMAL:
240  case kNUMERIC:
241  return match_numeric_argument(arg_type, is_arg_literal, sig_type, penalty_score);
242  case kPOINT:
243  case kLINESTRING:
244  if ((sig_type == ExtArgumentType::PInt8 || sig_type == ExtArgumentType::PInt16 ||
245  sig_type == ExtArgumentType::PInt32 || sig_type == ExtArgumentType::PInt64 ||
246  sig_type == ExtArgumentType::PFloat || sig_type == ExtArgumentType::PDouble) &&
247  sig_pos < max_pos && sig_types[sig_pos + 1] == ExtArgumentType::Int64) {
248  penalty_score += 1000;
249  return 2;
250  } else if (sig_type == ExtArgumentType::GeoPoint ||
251  sig_type == ExtArgumentType::GeoLineString) {
252  penalty_score += 1000;
253  return 1;
254  }
255  return -1;
256  case kARRAY:
257  if ((sig_type == ExtArgumentType::PInt8 || sig_type == ExtArgumentType::PInt16 ||
258  sig_type == ExtArgumentType::PInt32 || sig_type == ExtArgumentType::PInt64 ||
259  sig_type == ExtArgumentType::PFloat || sig_type == ExtArgumentType::PDouble ||
260  sig_type == ExtArgumentType::PBool) &&
261  sig_pos < max_pos && sig_types[sig_pos + 1] == ExtArgumentType::Int64) {
262  penalty_score += 1000;
263  return 2;
264  } else if (is_ext_arg_type_array(sig_type)) {
265  // array arguments must match exactly
266  CHECK(arg_type.is_array());
267  const auto sig_type_ti =
269  if (arg_type.get_elem_type() == kBOOLEAN && sig_type_ti.get_type() == kTINYINT) {
270  /* Boolean array has the same low-level structure as Int8 array. */
271  penalty_score += 1000;
272  return 1;
273  } else if (arg_type.get_elem_type().get_type() == sig_type_ti.get_type()) {
274  penalty_score += 1000;
275  return 1;
276  } else {
277  return -1;
278  }
279  }
280  break;
281  case kPOLYGON:
282  if (sig_type == ExtArgumentType::PInt8 && sig_pos + 3 < max_pos &&
283  sig_types[sig_pos + 1] == ExtArgumentType::Int64 &&
284  sig_types[sig_pos + 2] == ExtArgumentType::PInt32 &&
285  sig_types[sig_pos + 3] == ExtArgumentType::Int64) {
286  penalty_score += 1000;
287  return 4;
288  } else if (sig_type == ExtArgumentType::GeoPolygon) {
289  penalty_score += 1000;
290  return 1;
291  }
292  break;
293  case kMULTIPOLYGON:
294  if (sig_type == ExtArgumentType::PInt8 && sig_pos + 5 < max_pos &&
295  sig_types[sig_pos + 1] == ExtArgumentType::Int64 &&
296  sig_types[sig_pos + 2] == ExtArgumentType::PInt32 &&
297  sig_types[sig_pos + 3] == ExtArgumentType::Int64 &&
298  sig_types[sig_pos + 4] == ExtArgumentType::PInt32 &&
299  sig_types[sig_pos + 5] == ExtArgumentType::Int64) {
300  penalty_score += 1000;
301  return 6;
302  } else if (sig_type == ExtArgumentType::GeoMultiPolygon) {
303  penalty_score += 1000;
304  return 1;
305  }
306  break;
307  case kNULLT: // NULL maps to a pointer and size argument
308  if ((sig_type == ExtArgumentType::PInt8 || sig_type == ExtArgumentType::PInt16 ||
309  sig_type == ExtArgumentType::PInt32 || sig_type == ExtArgumentType::PInt64 ||
310  sig_type == ExtArgumentType::PFloat || sig_type == ExtArgumentType::PDouble ||
311  sig_type == ExtArgumentType::PBool) &&
312  sig_pos < max_pos && sig_types[sig_pos + 1] == ExtArgumentType::Int64) {
313  penalty_score += 1000;
314  return 2;
315  }
316  break;
317  case kCOLUMN:
318  if (is_ext_arg_type_column(sig_type)) {
319  // column arguments must match exactly
320  const auto sig_type_ti =
322  if (arg_type.get_elem_type() == kBOOLEAN && sig_type_ti.get_type() == kTINYINT) {
323  /* Boolean column has the same low-level structure as Int8 column. */
324  penalty_score += 1000;
325  return 1;
326  } else if (arg_type.get_elem_type().get_type() == sig_type_ti.get_type()) {
327  penalty_score += 1000;
328  return 1;
329  } else {
330  return -1;
331  }
332  }
333  break;
334  case kCOLUMN_LIST:
335  if (is_ext_arg_type_column_list(sig_type)) {
336  // column_list arguments must match exactly
337  const auto sig_type_ti =
339  if (arg_type.get_elem_type() == kBOOLEAN && sig_type_ti.get_type() == kTINYINT) {
340  /* Boolean column_list has the same low-level structure as Int8 column_list. */
341  penalty_score += 10000;
342  return 1;
343  } else if (arg_type.get_elem_type().get_type() == sig_type_ti.get_type()) {
344  penalty_score += 10000;
345  return 1;
346  } else {
347  return -1;
348  }
349  }
350  break;
351  case kVARCHAR:
352  if (sig_type != ExtArgumentType::TextEncodingNone) {
353  return -1;
354  }
355  switch (arg_type.get_compression()) {
356  case kENCODING_NONE:
357  penalty_score += 1000;
358  return 1;
359  case kENCODING_DICT:
360  return -1;
361  // Todo (todd): Evaluate when and where we can tranlate to dictionary-encoded
362  default:
363  UNREACHABLE();
364  }
365  case kTEXT:
366  if (sig_type != ExtArgumentType::TextEncodingNone) {
367  return -1;
368  }
369  switch (arg_type.get_compression()) {
370  case kENCODING_NONE:
371  penalty_score += 1000;
372  return 1;
373  case kENCODING_DICT:
374  return -1;
375  default:
376  UNREACHABLE();
377  }
378  /* Not implemented types:
379  kCHAR
380  kTIME
381  kTIMESTAMP
382  kDATE
383  kINTERVAL_DAY_TIME
384  kINTERVAL_YEAR_MONTH
385  kGEOMETRY
386  kGEOGRAPHY
387  kEVAL_CONTEXT_TYPE
388  kVOID
389  kCURSOR
390  */
391  default:
392  throw std::runtime_error(std::string(__FILE__) + "#" + std::to_string(__LINE__) +
393  ": support for " + arg_type.get_type_name() +
394  "(type=" + std::to_string(arg_type.get_type()) + ")" +
395  +" not implemented: \n pos=" + std::to_string(sig_pos) +
396  " max_pos=" + std::to_string(max_pos) + "\n sig_types=(" +
397  ExtensionFunctionsWhitelist::toString(sig_types) + ")");
398  }
399  return -1;
400 }
ExtArgumentType get_array_arg_elem_type(const ExtArgumentType ext_arg_array_type)
bool is_ext_arg_type_column(const ExtArgumentType ext_arg_type)
#define UNREACHABLE()
Definition: Logger.h:253
ExtArgumentType get_column_list_arg_elem_type(const ExtArgumentType ext_arg_column_list_type)
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:329
std::string to_string(char const *&&v)
bool is_ext_arg_type_column_list(const ExtArgumentType ext_arg_type)
bool is_ext_arg_type_array(const ExtArgumentType ext_arg_type)
ExtArgumentType get_column_arg_elem_type(const ExtArgumentType ext_arg_column_type)
static int match_numeric_argument(const SQLTypeInfo &arg_type_info, const bool is_arg_literal, const ExtArgumentType &sig_ext_arg_type, int32_t &penalty_score)
Definition: sqltypes.h:52
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:337
std::string get_type_name() const
Definition: sqltypes.h:432
static std::string toString(const std::vector< ExtensionFunction > &ext_funcs, std::string tab="")
#define CHECK(condition)
Definition: Logger.h:209
Definition: sqltypes.h:45
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:850
bool is_array() const
Definition: sqltypes.h:517
SQLTypeInfo ext_arg_type_to_type_info(const ExtArgumentType ext_arg_type)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

static int anonymous_namespace{ExtensionFunctionsBinding.cpp}::match_numeric_argument ( const SQLTypeInfo arg_type_info,
const bool  is_arg_literal,
const ExtArgumentType sig_ext_arg_type,
int32_t &  penalty_score 
)
static

Definition at line 107 of file ExtensionFunctionsBinding.cpp.

References CHECK, CHECK_GE, CHECK_GT, CHECK_LE, ext_arg_type_to_type_info(), SQLTypeInfo::get_numeric_scalar_scale(), SQLTypeInfo::get_type(), SQLTypeInfo::is_numeric_scalar_auto_castable(), kBIGINT, kBOOLEAN, kDECIMAL, kDOUBLE, kFLOAT, kINT, kNUMERIC, kSMALLINT, and kTINYINT.

Referenced by match_arguments().

110  {
111  const auto arg_type = arg_type_info.get_type();
112  CHECK(arg_type == kBOOLEAN || arg_type == kTINYINT || arg_type == kSMALLINT ||
113  arg_type == kINT || arg_type == kBIGINT || arg_type == kFLOAT ||
114  arg_type == kDOUBLE || arg_type == kDECIMAL || arg_type == kNUMERIC);
115  // Todo (todd): Add support for timestamp, date, and time types
116  const auto sig_type_info = ext_arg_type_to_type_info(sig_ext_arg_type);
117  const auto sig_type = sig_type_info.get_type();
118 
119  // If we can't legally auto-cast to sig_type, abort
120  if (!arg_type_info.is_numeric_scalar_auto_castable(sig_type_info)) {
121  return -1;
122  }
123 
124  // We now compare a measure of the scale of the sig_type with the
125  // arg_type, which provides a basis for scoring the match between
126  // the two. Note that get_numeric_scalar_scale for the most part
127  // returns the logical byte width of the type, with a few caveats
128  // for decimals and timestamps described in more depth in comments
129  // in the function itself. Also even though for example float and
130  // int types return 4 (as in 4 bytes), and double and bigint types
131  // return 8, a fp32 type cannot express every 32-bit integer (even
132  // if it can cover a larger absolute range), and an fp64 type
133  // likewise cannot express every 64-bit integer. With the aim to
134  // minimize the precision loss from casting (always precise) integer
135  // value to (imprecise) floating point value, in the case of integer
136  // inputs, we'll penalize wider floating point argument types least
137  // by a specific scale transformation (see the implementation
138  // below). For instance, casting tinyint to fp64 is prefered over
139  // casting it to fp32 to minimize precision loss.
140  const bool is_integer_to_fp_cast = (arg_type == kTINYINT || arg_type == kSMALLINT ||
141  arg_type == kINT || arg_type == kBIGINT) &&
142  (sig_type == kFLOAT || sig_type == kDOUBLE);
143 
144  const auto arg_type_relative_scale = arg_type_info.get_numeric_scalar_scale();
145  CHECK_GE(arg_type_relative_scale, 1);
146  CHECK_LE(arg_type_relative_scale, 8);
147  auto sig_type_relative_scale = sig_type_info.get_numeric_scalar_scale();
148  CHECK_GE(sig_type_relative_scale, 1);
149  CHECK_LE(sig_type_relative_scale, 8);
150 
151  if (is_integer_to_fp_cast) {
152  // transform fp scale: 4 becomes 16, 8 remains 8
153  sig_type_relative_scale = (3 - (sig_type_relative_scale >> 2)) << 3;
154  }
155 
156  // We do not allow auto-casting to types with less scale/precision
157  // within the same type family.
158  CHECK_GE(sig_type_relative_scale, arg_type_relative_scale);
159 
160  // Calculate the ratio of the sig_type by the arg_type, per the above check will be >= 1
161  const auto sig_type_scale_gain_ratio =
162  sig_type_relative_scale / arg_type_relative_scale;
163  CHECK_GE(sig_type_scale_gain_ratio, 1);
164 
165  // Following the old bespoke scoring logic this function replaces, we heavily penalize
166  // any casts that move ints to floats/doubles for the precision-loss reasons above
167  // Arguably all integers in the tinyint and smallint can be fully specified with both
168  // float and double types, but we treat them the same as int and bigint types here.
169  const int32_t type_family_cast_penalty_score = is_integer_to_fp_cast ? 1001000 : 1000;
170 
171  int32_t scale_cast_penalty_score;
172 
173  // The following logic is new. Basically there are strong reasons to
174  // prefer the promotion of constant literals to the most precise type possible, as
175  // rather than the type being inherent in the data - that is a column or columns where
176  // a user specified a type (and with any expressions on those columns following our
177  // standard sql casting logic), literal types are given to us by Calcite and do not
178  // necessarily convey any semantic intent (i.e. 10 will be an int, but 10.0 a decimal)
179  // Hence it is better to promote these types to the most precise sig_type available,
180  // while at the same time keeping column expressions as close as possible to the input
181  // types (mainly for performance, we have many float versions of various functions
182  // to allow for greater performance when the underlying data is not of double precision,
183  // and hence there is little benefit of the extra cost of computing double precision
184  // operators on this data)
185  if (is_arg_literal) {
186  scale_cast_penalty_score =
187  (8000 / arg_type_relative_scale) - (1000 * sig_type_scale_gain_ratio);
188  } else {
189  scale_cast_penalty_score = (1000 * sig_type_scale_gain_ratio);
190  }
191 
192  const auto cast_penalty_score =
193  type_family_cast_penalty_score + scale_cast_penalty_score;
194  CHECK_GT(cast_penalty_score, 0);
195  penalty_score += cast_penalty_score;
196  return 1;
197 }
#define CHECK_GE(x, y)
Definition: Logger.h:222
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:329
#define CHECK_GT(x, y)
Definition: Logger.h:221
bool is_numeric_scalar_auto_castable(const SQLTypeInfo &new_type_info) const
returns true if the sql_type can be cast to the type specified by new_type_info with no loss of preci...
Definition: sqltypes.h:637
#define CHECK_LE(x, y)
Definition: Logger.h:220
int32_t get_numeric_scalar_scale() const
returns integer between 1 and 8 indicating what is roughly equivalent to the logical byte size of a s...
Definition: sqltypes.h:698
#define CHECK(condition)
Definition: Logger.h:209
Definition: sqltypes.h:45
SQLTypeInfo ext_arg_type_to_type_info(const ExtArgumentType ext_arg_type)

+ Here is the call graph for this function:

+ Here is the caller graph for this function: