OmniSciDB  085a039ca4
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
anonymous_namespace{ExtensionFunctionsBinding.cpp} Namespace Reference

Functions

ExtArgumentType get_column_arg_elem_type (const ExtArgumentType ext_arg_column_type)
 
ExtArgumentType get_column_list_arg_elem_type (const ExtArgumentType ext_arg_column_list_type)
 
ExtArgumentType get_array_arg_elem_type (const ExtArgumentType ext_arg_array_type)
 
static int match_numeric_argument (const SQLTypeInfo &arg_type_info, const bool is_arg_literal, const ExtArgumentType &sig_ext_arg_type, int32_t &penalty_score)
 
static int match_arguments (const SQLTypeInfo &arg_type, const bool is_arg_literal, int sig_pos, const std::vector< ExtArgumentType > &sig_types, int &penalty_score)
 
bool is_valid_identifier (std::string str)
 

Function Documentation

ExtArgumentType anonymous_namespace{ExtensionFunctionsBinding.cpp}::get_array_arg_elem_type ( const ExtArgumentType  ext_arg_array_type)

Definition at line 87 of file ExtensionFunctionsBinding.cpp.

References ArrayBool, ArrayDouble, ArrayFloat, ArrayInt16, ArrayInt32, ArrayInt64, ArrayInt8, Bool, Double, Float, Int16, Int32, Int64, Int8, and UNREACHABLE.

Referenced by match_arguments().

+ Here is the caller graph for this function:

ExtArgumentType anonymous_namespace{ExtensionFunctionsBinding.cpp}::get_column_arg_elem_type ( const ExtArgumentType  ext_arg_column_type)

Definition at line 36 of file ExtensionFunctionsBinding.cpp.

References Bool, ColumnBool, ColumnDouble, ColumnFloat, ColumnInt16, ColumnInt32, ColumnInt64, ColumnInt8, ColumnTextEncodingDict, ColumnTimestamp, Double, Float, Int16, Int32, Int64, Int8, TextEncodingDict, Timestamp, and UNREACHABLE.

Referenced by match_arguments().

+ Here is the caller graph for this function:

ExtArgumentType anonymous_namespace{ExtensionFunctionsBinding.cpp}::get_column_list_arg_elem_type ( const ExtArgumentType  ext_arg_column_list_type)

Definition at line 62 of file ExtensionFunctionsBinding.cpp.

References Bool, ColumnListBool, ColumnListDouble, ColumnListFloat, ColumnListInt16, ColumnListInt32, ColumnListInt64, ColumnListInt8, ColumnListTextEncodingDict, Double, Float, Int16, Int32, Int64, Int8, TextEncodingDict, and UNREACHABLE.

Referenced by match_arguments().

+ Here is the caller graph for this function:

bool anonymous_namespace{ExtensionFunctionsBinding.cpp}::is_valid_identifier ( std::string  str)

Definition at line 412 of file ExtensionFunctionsBinding.cpp.

Referenced by bind_function().

412  {
413  if (!str.size()) {
414  return false;
415  }
416 
417  if (!(std::isalpha(str[0]) || str[0] == '_')) {
418  return false;
419  }
420 
421  for (size_t i = 1; i < str.size(); i++) {
422  if (!(std::isalnum(str[i]) || str[i] == '_')) {
423  return false;
424  }
425  }
426 
427  return true;
428 }

+ Here is the caller graph for this function:

static int anonymous_namespace{ExtensionFunctionsBinding.cpp}::match_arguments ( const SQLTypeInfo arg_type,
const bool  is_arg_literal,
int  sig_pos,
const std::vector< ExtArgumentType > &  sig_types,
int &  penalty_score 
)
static

Definition at line 201 of file ExtensionFunctionsBinding.cpp.

References CHECK, ext_arg_type_to_type_info(), GeoLineString, GeoMultiPolygon, GeoPoint, GeoPolygon, get_array_arg_elem_type(), get_column_arg_elem_type(), get_column_list_arg_elem_type(), SQLTypeInfo::get_compression(), SQLTypeInfo::get_elem_type(), SQLTypeInfo::get_precision(), SQLTypeInfo::get_type(), SQLTypeInfo::get_type_name(), Int64, SQLTypeInfo::is_array(), is_ext_arg_type_array(), is_ext_arg_type_column(), is_ext_arg_type_column_list(), SQLTypeInfo::is_timestamp(), kARRAY, kBIGINT, kBOOLEAN, kCOLUMN, kCOLUMN_LIST, kDECIMAL, kDOUBLE, kENCODING_DICT, kENCODING_NONE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kNULLT, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIMESTAMP, kTINYINT, kVARCHAR, match_numeric_argument(), PBool, PDouble, PFloat, PInt16, PInt32, PInt64, PInt8, TextEncodingNone, to_string(), ExtensionFunctionsWhitelist::toString(), and UNREACHABLE.

Referenced by bind_function().

205  {
206  /*
207  Returns non-negative integer `offset` if `arg_type` and
208  `sig_types[sig_pos:sig_pos + offset]` match.
209 
210  The `offset` value can be interpreted as the number of extension
211  function arguments that is consumed by the given `arg_type`. For
212  instance, for scalar types the offset is always 1, for array
213  types the offset is 2: one argument for array pointer value and
214  one argument for the array size value, etc.
215 
216  Returns -1 when the types of an argument and the corresponding
217  extension function argument(s) mismatch, or when downcasting would
218  be effective.
219 
220  In case of non-negative `offset` result, the function updates
221  penalty_score argument as follows:
222 
223  add 1000 if arg_type is non-scalar, otherwise:
224  add 1000 * sizeof(sig_type) / sizeof(arg_type)
225  add 1000000 if type kinds differ (integer vs double, for instance)
226 
227  */
228  int max_pos = sig_types.size() - 1;
229  if (sig_pos > max_pos) {
230  return -1;
231  }
232  auto sig_type = sig_types[sig_pos];
233  switch (arg_type.get_type()) {
234  case kBOOLEAN:
235  case kTINYINT:
236  case kSMALLINT:
237  case kINT:
238  case kBIGINT:
239  case kFLOAT:
240  case kDOUBLE:
241  case kDECIMAL:
242  case kNUMERIC:
243  return match_numeric_argument(arg_type, is_arg_literal, sig_type, penalty_score);
244  case kPOINT:
245  case kLINESTRING:
246  if ((sig_type == ExtArgumentType::PInt8 || sig_type == ExtArgumentType::PInt16 ||
247  sig_type == ExtArgumentType::PInt32 || sig_type == ExtArgumentType::PInt64 ||
248  sig_type == ExtArgumentType::PFloat || sig_type == ExtArgumentType::PDouble) &&
249  sig_pos < max_pos && sig_types[sig_pos + 1] == ExtArgumentType::Int64) {
250  penalty_score += 1000;
251  return 2;
252  } else if (sig_type == ExtArgumentType::GeoPoint ||
253  sig_type == ExtArgumentType::GeoLineString) {
254  penalty_score += 1000;
255  return 1;
256  }
257  return -1;
258  case kARRAY:
259  if ((sig_type == ExtArgumentType::PInt8 || sig_type == ExtArgumentType::PInt16 ||
260  sig_type == ExtArgumentType::PInt32 || sig_type == ExtArgumentType::PInt64 ||
261  sig_type == ExtArgumentType::PFloat || sig_type == ExtArgumentType::PDouble ||
262  sig_type == ExtArgumentType::PBool) &&
263  sig_pos < max_pos && sig_types[sig_pos + 1] == ExtArgumentType::Int64) {
264  penalty_score += 1000;
265  return 2;
266  } else if (is_ext_arg_type_array(sig_type)) {
267  // array arguments must match exactly
268  CHECK(arg_type.is_array());
269  const auto sig_type_ti =
271  if (arg_type.get_elem_type() == kBOOLEAN && sig_type_ti.get_type() == kTINYINT) {
272  /* Boolean array has the same low-level structure as Int8 array. */
273  penalty_score += 1000;
274  return 1;
275  } else if (arg_type.get_elem_type().get_type() == sig_type_ti.get_type()) {
276  penalty_score += 1000;
277  return 1;
278  } else {
279  return -1;
280  }
281  }
282  break;
283  case kPOLYGON:
284  if (sig_type == ExtArgumentType::PInt8 && sig_pos + 3 < max_pos &&
285  sig_types[sig_pos + 1] == ExtArgumentType::Int64 &&
286  sig_types[sig_pos + 2] == ExtArgumentType::PInt32 &&
287  sig_types[sig_pos + 3] == ExtArgumentType::Int64) {
288  penalty_score += 1000;
289  return 4;
290  } else if (sig_type == ExtArgumentType::GeoPolygon) {
291  penalty_score += 1000;
292  return 1;
293  }
294  break;
295  case kMULTIPOLYGON:
296  if (sig_type == ExtArgumentType::PInt8 && sig_pos + 5 < max_pos &&
297  sig_types[sig_pos + 1] == ExtArgumentType::Int64 &&
298  sig_types[sig_pos + 2] == ExtArgumentType::PInt32 &&
299  sig_types[sig_pos + 3] == ExtArgumentType::Int64 &&
300  sig_types[sig_pos + 4] == ExtArgumentType::PInt32 &&
301  sig_types[sig_pos + 5] == ExtArgumentType::Int64) {
302  penalty_score += 1000;
303  return 6;
304  } else if (sig_type == ExtArgumentType::GeoMultiPolygon) {
305  penalty_score += 1000;
306  return 1;
307  }
308  break;
309  case kNULLT: // NULL maps to a pointer and size argument
310  if ((sig_type == ExtArgumentType::PInt8 || sig_type == ExtArgumentType::PInt16 ||
311  sig_type == ExtArgumentType::PInt32 || sig_type == ExtArgumentType::PInt64 ||
312  sig_type == ExtArgumentType::PFloat || sig_type == ExtArgumentType::PDouble ||
313  sig_type == ExtArgumentType::PBool) &&
314  sig_pos < max_pos && sig_types[sig_pos + 1] == ExtArgumentType::Int64) {
315  penalty_score += 1000;
316  return 2;
317  }
318  break;
319  case kCOLUMN:
320  if (is_ext_arg_type_column(sig_type)) {
321  // column arguments must match exactly
322  const auto sig_type_ti =
324  if (arg_type.get_elem_type() == kBOOLEAN && sig_type_ti.get_type() == kTINYINT) {
325  /* Boolean column has the same low-level structure as Int8 column. */
326  penalty_score += 1000;
327  return 1;
328  } else if (arg_type.get_elem_type().get_type() == sig_type_ti.get_type()) {
329  penalty_score += 1000;
330  return 1;
331  } else {
332  return -1;
333  }
334  }
335  break;
336  case kCOLUMN_LIST:
337  if (is_ext_arg_type_column_list(sig_type)) {
338  // column_list arguments must match exactly
339  const auto sig_type_ti =
341  if (arg_type.get_elem_type() == kBOOLEAN && sig_type_ti.get_type() == kTINYINT) {
342  /* Boolean column_list has the same low-level structure as Int8 column_list. */
343  penalty_score += 10000;
344  return 1;
345  } else if (arg_type.get_elem_type().get_type() == sig_type_ti.get_type()) {
346  penalty_score += 10000;
347  return 1;
348  } else {
349  return -1;
350  }
351  }
352  break;
353  case kVARCHAR:
354  if (sig_type != ExtArgumentType::TextEncodingNone) {
355  return -1;
356  }
357  switch (arg_type.get_compression()) {
358  case kENCODING_NONE:
359  penalty_score += 1000;
360  return 1;
361  case kENCODING_DICT:
362  return -1;
363  // Todo (todd): Evaluate when and where we can tranlate to dictionary-encoded
364  default:
365  UNREACHABLE();
366  }
367  case kTEXT:
368  if (sig_type != ExtArgumentType::TextEncodingNone) {
369  return -1;
370  }
371  switch (arg_type.get_compression()) {
372  case kENCODING_NONE:
373  penalty_score += 1000;
374  return 1;
375  case kENCODING_DICT:
376  return -1;
377  default:
378  UNREACHABLE();
379  }
380  case kTIMESTAMP:
381  if (arg_type.is_timestamp()) {
382  if (arg_type.get_precision() != 9) {
383  return -1;
384  }
385  penalty_score += 1000;
386  return 1;
387  }
388  break;
389  /* Not implemented types:
390  kCHAR
391  kTIME
392  kDATE
393  kINTERVAL_DAY_TIME
394  kINTERVAL_YEAR_MONTH
395  kGEOMETRY
396  kGEOGRAPHY
397  kEVAL_CONTEXT_TYPE
398  kVOID
399  kCURSOR
400  */
401  default:
402  throw std::runtime_error(std::string(__FILE__) + "#" + std::to_string(__LINE__) +
403  ": support for " + arg_type.get_type_name() +
404  "(type=" + std::to_string(arg_type.get_type()) + ")" +
405  +" not implemented: \n pos=" + std::to_string(sig_pos) +
406  " max_pos=" + std::to_string(max_pos) + "\n sig_types=(" +
407  ExtensionFunctionsWhitelist::toString(sig_types) + ")");
408  }
409  return -1;
410 }
ExtArgumentType get_array_arg_elem_type(const ExtArgumentType ext_arg_array_type)
bool is_timestamp() const
Definition: sqltypes.h:895
bool is_ext_arg_type_column(const ExtArgumentType ext_arg_type)
#define UNREACHABLE()
Definition: Logger.h:267
ExtArgumentType get_column_list_arg_elem_type(const ExtArgumentType ext_arg_column_list_type)
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:329
std::string to_string(char const *&&v)
bool is_ext_arg_type_column_list(const ExtArgumentType ext_arg_type)
bool is_ext_arg_type_array(const ExtArgumentType ext_arg_type)
ExtArgumentType get_column_arg_elem_type(const ExtArgumentType ext_arg_column_type)
int get_precision() const
Definition: sqltypes.h:332
static int match_numeric_argument(const SQLTypeInfo &arg_type_info, const bool is_arg_literal, const ExtArgumentType &sig_ext_arg_type, int32_t &penalty_score)
Definition: sqltypes.h:52
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:337
std::string get_type_name() const
Definition: sqltypes.h:443
static std::string toString(const std::vector< ExtensionFunction > &ext_funcs, std::string tab="")
#define CHECK(condition)
Definition: Logger.h:223
Definition: sqltypes.h:45
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:865
bool is_array() const
Definition: sqltypes.h:518
SQLTypeInfo ext_arg_type_to_type_info(const ExtArgumentType ext_arg_type)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

static int anonymous_namespace{ExtensionFunctionsBinding.cpp}::match_numeric_argument ( const SQLTypeInfo arg_type_info,
const bool  is_arg_literal,
const ExtArgumentType sig_ext_arg_type,
int32_t &  penalty_score 
)
static

Definition at line 109 of file ExtensionFunctionsBinding.cpp.

References CHECK, CHECK_GE, CHECK_GT, CHECK_LE, ext_arg_type_to_type_info(), SQLTypeInfo::get_numeric_scalar_scale(), SQLTypeInfo::get_type(), SQLTypeInfo::is_numeric_scalar_auto_castable(), kBIGINT, kBOOLEAN, kDECIMAL, kDOUBLE, kFLOAT, kINT, kNUMERIC, kSMALLINT, and kTINYINT.

Referenced by match_arguments().

112  {
113  const auto arg_type = arg_type_info.get_type();
114  CHECK(arg_type == kBOOLEAN || arg_type == kTINYINT || arg_type == kSMALLINT ||
115  arg_type == kINT || arg_type == kBIGINT || arg_type == kFLOAT ||
116  arg_type == kDOUBLE || arg_type == kDECIMAL || arg_type == kNUMERIC);
117  // Todo (todd): Add support for timestamp, date, and time types
118  const auto sig_type_info = ext_arg_type_to_type_info(sig_ext_arg_type);
119  const auto sig_type = sig_type_info.get_type();
120 
121  // If we can't legally auto-cast to sig_type, abort
122  if (!arg_type_info.is_numeric_scalar_auto_castable(sig_type_info)) {
123  return -1;
124  }
125 
126  // We now compare a measure of the scale of the sig_type with the
127  // arg_type, which provides a basis for scoring the match between
128  // the two. Note that get_numeric_scalar_scale for the most part
129  // returns the logical byte width of the type, with a few caveats
130  // for decimals and timestamps described in more depth in comments
131  // in the function itself. Also even though for example float and
132  // int types return 4 (as in 4 bytes), and double and bigint types
133  // return 8, a fp32 type cannot express every 32-bit integer (even
134  // if it can cover a larger absolute range), and an fp64 type
135  // likewise cannot express every 64-bit integer. With the aim to
136  // minimize the precision loss from casting (always precise) integer
137  // value to (imprecise) floating point value, in the case of integer
138  // inputs, we'll penalize wider floating point argument types least
139  // by a specific scale transformation (see the implementation
140  // below). For instance, casting tinyint to fp64 is prefered over
141  // casting it to fp32 to minimize precision loss.
142  const bool is_integer_to_fp_cast = (arg_type == kTINYINT || arg_type == kSMALLINT ||
143  arg_type == kINT || arg_type == kBIGINT) &&
144  (sig_type == kFLOAT || sig_type == kDOUBLE);
145 
146  const auto arg_type_relative_scale = arg_type_info.get_numeric_scalar_scale();
147  CHECK_GE(arg_type_relative_scale, 1);
148  CHECK_LE(arg_type_relative_scale, 8);
149  auto sig_type_relative_scale = sig_type_info.get_numeric_scalar_scale();
150  CHECK_GE(sig_type_relative_scale, 1);
151  CHECK_LE(sig_type_relative_scale, 8);
152 
153  if (is_integer_to_fp_cast) {
154  // transform fp scale: 4 becomes 16, 8 remains 8
155  sig_type_relative_scale = (3 - (sig_type_relative_scale >> 2)) << 3;
156  }
157 
158  // We do not allow auto-casting to types with less scale/precision
159  // within the same type family.
160  CHECK_GE(sig_type_relative_scale, arg_type_relative_scale);
161 
162  // Calculate the ratio of the sig_type by the arg_type, per the above check will be >= 1
163  const auto sig_type_scale_gain_ratio =
164  sig_type_relative_scale / arg_type_relative_scale;
165  CHECK_GE(sig_type_scale_gain_ratio, 1);
166 
167  // Following the old bespoke scoring logic this function replaces, we heavily penalize
168  // any casts that move ints to floats/doubles for the precision-loss reasons above
169  // Arguably all integers in the tinyint and smallint can be fully specified with both
170  // float and double types, but we treat them the same as int and bigint types here.
171  const int32_t type_family_cast_penalty_score = is_integer_to_fp_cast ? 1001000 : 1000;
172 
173  int32_t scale_cast_penalty_score;
174 
175  // The following logic is new. Basically there are strong reasons to
176  // prefer the promotion of constant literals to the most precise type possible, as
177  // rather than the type being inherent in the data - that is a column or columns where
178  // a user specified a type (and with any expressions on those columns following our
179  // standard sql casting logic), literal types are given to us by Calcite and do not
180  // necessarily convey any semantic intent (i.e. 10 will be an int, but 10.0 a decimal)
181  // Hence it is better to promote these types to the most precise sig_type available,
182  // while at the same time keeping column expressions as close as possible to the input
183  // types (mainly for performance, we have many float versions of various functions
184  // to allow for greater performance when the underlying data is not of double precision,
185  // and hence there is little benefit of the extra cost of computing double precision
186  // operators on this data)
187  if (is_arg_literal) {
188  scale_cast_penalty_score =
189  (8000 / arg_type_relative_scale) - (1000 * sig_type_scale_gain_ratio);
190  } else {
191  scale_cast_penalty_score = (1000 * sig_type_scale_gain_ratio);
192  }
193 
194  const auto cast_penalty_score =
195  type_family_cast_penalty_score + scale_cast_penalty_score;
196  CHECK_GT(cast_penalty_score, 0);
197  penalty_score += cast_penalty_score;
198  return 1;
199 }
#define CHECK_GE(x, y)
Definition: Logger.h:236
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:329
#define CHECK_GT(x, y)
Definition: Logger.h:235
bool is_numeric_scalar_auto_castable(const SQLTypeInfo &new_type_info) const
returns true if the sql_type can be cast to the type specified by new_type_info with no loss of preci...
Definition: sqltypes.h:652
#define CHECK_LE(x, y)
Definition: Logger.h:234
int32_t get_numeric_scalar_scale() const
returns integer between 1 and 8 indicating what is roughly equivalent to the logical byte size of a s...
Definition: sqltypes.h:713
#define CHECK(condition)
Definition: Logger.h:223
Definition: sqltypes.h:45
SQLTypeInfo ext_arg_type_to_type_info(const ExtArgumentType ext_arg_type)

+ Here is the call graph for this function:

+ Here is the caller graph for this function: