OmniSciDB  ca0c39ec8f
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ExtensionFunctionsBinding.cpp File Reference
#include "ExtensionFunctionsBinding.h"
#include <algorithm>
#include "ExternalExecutor.h"
+ Include dependency graph for ExtensionFunctionsBinding.cpp:

Go to the source code of this file.

Namespaces

 anonymous_namespace{ExtensionFunctionsBinding.cpp}
 

Functions

ExtArgumentType anonymous_namespace{ExtensionFunctionsBinding.cpp}::get_column_arg_elem_type (const ExtArgumentType ext_arg_column_type)
 
ExtArgumentType anonymous_namespace{ExtensionFunctionsBinding.cpp}::get_column_list_arg_elem_type (const ExtArgumentType ext_arg_column_list_type)
 
ExtArgumentType anonymous_namespace{ExtensionFunctionsBinding.cpp}::get_array_arg_elem_type (const ExtArgumentType ext_arg_array_type)
 
static int anonymous_namespace{ExtensionFunctionsBinding.cpp}::match_numeric_argument (const SQLTypeInfo &arg_type_info, const bool is_arg_literal, const ExtArgumentType &sig_ext_arg_type, int32_t &penalty_score)
 
static int anonymous_namespace{ExtensionFunctionsBinding.cpp}::match_arguments (const SQLTypeInfo &arg_type, const bool is_arg_literal, int sig_pos, const std::vector< ExtArgumentType > &sig_types, int &penalty_score)
 
bool anonymous_namespace{ExtensionFunctionsBinding.cpp}::is_valid_identifier (std::string str)
 
template<typename T >
std::tuple< T, std::vector
< SQLTypeInfo > > 
bind_function (std::string name, Analyzer::ExpressionPtrVector func_args, const std::vector< T > &ext_funcs, const std::string processor)
 
const std::tuple
< table_functions::TableFunction,
std::vector< SQLTypeInfo > > 
bind_table_function (std::string name, Analyzer::ExpressionPtrVector input_args, const std::vector< table_functions::TableFunction > &table_funcs, const bool is_gpu)
 
ExtensionFunction bind_function (std::string name, Analyzer::ExpressionPtrVector func_args)
 
ExtensionFunction bind_function (std::string name, Analyzer::ExpressionPtrVector func_args, const bool is_gpu)
 
ExtensionFunction bind_function (const Analyzer::FunctionOper *function_oper, const bool is_gpu)
 
const std::tuple
< table_functions::TableFunction,
std::vector< SQLTypeInfo > > 
bind_table_function (std::string name, Analyzer::ExpressionPtrVector input_args, const bool is_gpu)
 

Function Documentation

template<typename T >
std::tuple<T, std::vector<SQLTypeInfo> > bind_function ( std::string  name,
Analyzer::ExpressionPtrVector  func_args,
const std::vector< T > &  ext_funcs,
const std::string  processor 
)

Definition at line 502 of file ExtensionFunctionsBinding.cpp.

References CHECK, CHECK_EQ, CHECK_LE, DEFAULT_ROW_MULTIPLIER_SUFFIX, ext_arg_type_to_type_info(), logger::FATAL, generate_column_list_type(), generate_column_type(), SQLTypeInfo::get_type(), anonymous_namespace{ExtensionFunctionsBinding.cpp}::is_valid_identifier(), kCOLUMN_LIST, kINT, kNULLT, kTEXT, LOG, anonymous_namespace{ExtensionFunctionsBinding.cpp}::match_arguments(), setup::name, heavydb.dtypes::T, to_string(), SQLTypeInfo::to_string(), ExtensionFunctionsWhitelist::toString(), and UNREACHABLE.

Referenced by bind_function(), CodeGenerator::codegenFunctionOper(), and RelAlgTranslator::translateFunction().

506  {
507  /* worker function
508 
509  Template type T must implement the following methods:
510 
511  std::vector<ExtArgumentType> getInputArgs()
512  */
513  /*
514  Return extension function/table function that has the following
515  properties
516 
517  1. each argument type in `arg_types` matches with extension
518  function argument types.
519 
520  For scalar types, the matching means that the types are either
521  equal or the argument type is smaller than the corresponding
522  the extension function argument type. This ensures that no
523  information is lost when casting of argument values is
524  required.
525 
526  For array and geo types, the matching means that the argument
527  type matches exactly with a group of extension function
528  argument types. See `match_arguments`.
529 
530  2. has minimal penalty score among all implementations of the
531  extension function with given `name`, see `get_penalty_score`
532  for the definition of penalty score.
533 
534  It is assumed that function_oper and extension functions in
535  ext_funcs have the same name.
536  */
537  if (!is_valid_identifier(name)) {
538  throw NativeExecutionError(
539  "Cannot bind function with invalid UDF/UDTF function name: " + name);
540  }
541 
542  int minimal_score = std::numeric_limits<int>::max();
543  int index = -1;
544  int optimal = -1;
545  int optimal_variant = -1;
546 
547  std::vector<SQLTypeInfo> type_infos_input;
548  std::vector<bool> args_are_constants;
549  for (auto atype : func_args) {
550  if constexpr (std::is_same_v<T, table_functions::TableFunction>) {
551  if (dynamic_cast<const Analyzer::ColumnVar*>(atype.get())) {
552  SQLTypeInfo type_info = atype->get_type_info();
553  auto ti = generate_column_type(type_info);
554  if (ti.get_subtype() == kNULLT) {
555  throw std::runtime_error(std::string(__FILE__) + "#" +
556  std::to_string(__LINE__) +
557  ": column support for type info " +
558  type_info.to_string() + " is not implemented");
559  }
560  type_infos_input.push_back(ti);
561  args_are_constants.push_back(type_info.get_type() != kTEXT);
562  continue;
563  }
564  }
565  type_infos_input.push_back(atype->get_type_info());
566  if (dynamic_cast<const Analyzer::Constant*>(atype.get())) {
567  args_are_constants.push_back(true);
568  } else {
569  args_are_constants.push_back(false);
570  }
571  }
572  CHECK_EQ(type_infos_input.size(), args_are_constants.size());
573 
574  if (type_infos_input.size() == 0 && ext_funcs.size() > 0) {
575  CHECK_EQ(ext_funcs.size(), static_cast<size_t>(1));
576  CHECK_EQ(ext_funcs[0].getInputArgs().size(), static_cast<size_t>(0));
577  if constexpr (std::is_same_v<T, table_functions::TableFunction>) {
578  CHECK(ext_funcs[0].hasNonUserSpecifiedOutputSize());
579  }
580  std::vector<SQLTypeInfo> empty_type_info_variant(0);
581  return {ext_funcs[0], empty_type_info_variant};
582  }
583 
584  // clang-format off
585  /*
586  Table functions may have arguments such as ColumnList that collect
587  neighboring columns with the same data type into a single object.
588  Here we compute all possible combinations of mapping a subset of
589  columns into columns sets. For example, if the types of function
590  arguments are (as given in func_args argument)
591 
592  (Column<int>, Column<int>, Column<int>, int)
593 
594  then the computed variants will be
595 
596  (Column<int>, Column<int>, Column<int>, int)
597  (Column<int>, Column<int>, ColumnList[1]<int>, int)
598  (Column<int>, ColumnList[1]<int>, Column<int>, int)
599  (Column<int>, ColumnList[2]<int>, int)
600  (ColumnList[1]<int>, Column<int>, Column<int>, int)
601  (ColumnList[1]<int>, Column<int>, ColumnList[1]<int>, int)
602  (ColumnList[2]<int>, Column<int>, int)
603  (ColumnList[3]<int>, int)
604 
605  where the integers in [..] indicate the number of collected
606  columns. In the SQLTypeInfo instance, this number is stored in the
607  SQLTypeInfo dimension attribute.
608 
609  As an example, let us consider a SQL query containing the
610  following expression calling a UDTF foo:
611 
612  table(foo(cursor(select a, b, c from tableofints), 1))
613 
614  Here follows a list of table functions and the corresponding
615  optimal argument type variants that are computed for the given
616  query expression:
617 
618  UDTF: foo(ColumnList<int>, RowMultiplier) -> Column<int>
619  (ColumnList[3]<int>, int) # a, b, c are all collected to column_list
620 
621  UDTF: foo(Column<int>, ColumnList<int>, RowMultiplier) -> Column<int>
622  (Column<int>, ColumnList[2]<int>, int) # b and c are collected to column_list
623 
624  UDTF: foo(Column<int>, Column<int>, Column<int>, RowMultiplier) -> Column<int>
625  (Column<int>, Column<int>, Column<int>, int)
626  */
627  // clang-format on
628  std::vector<std::vector<SQLTypeInfo>> type_infos_variants;
629  for (auto ti : type_infos_input) {
630  if (type_infos_variants.begin() == type_infos_variants.end()) {
631  type_infos_variants.push_back({ti});
632  if constexpr (std::is_same_v<T, table_functions::TableFunction>) {
633  if (ti.is_column()) {
634  auto mti = generate_column_list_type(ti);
635  if (mti.get_subtype() == kNULLT) {
636  continue; // skip unsupported element type.
637  }
638  mti.set_dimension(1);
639  type_infos_variants.push_back({mti});
640  }
641  }
642  continue;
643  }
644  std::vector<std::vector<SQLTypeInfo>> new_type_infos_variants;
645  for (auto& type_infos : type_infos_variants) {
646  if constexpr (std::is_same_v<T, table_functions::TableFunction>) {
647  if (ti.is_column()) {
648  auto new_type_infos = type_infos; // makes a copy
649  const auto& last = type_infos.back();
650  if (last.is_column_list() && last.has_same_itemtype(ti)) {
651  // last column_list consumes column argument if item types match
652  new_type_infos.back().set_dimension(last.get_dimension() + 1);
653  } else {
654  // add column as column_list argument
655  auto mti = generate_column_list_type(ti);
656  if (mti.get_subtype() == kNULLT) {
657  // skip unsupported element type
658  type_infos.push_back(ti);
659  continue;
660  }
661  mti.set_dimension(1);
662  new_type_infos.push_back(mti);
663  }
664  new_type_infos_variants.push_back(new_type_infos);
665  }
666  }
667  type_infos.push_back(ti);
668  }
669  type_infos_variants.insert(type_infos_variants.end(),
670  new_type_infos_variants.begin(),
671  new_type_infos_variants.end());
672  }
673 
674  // Find extension function that gives the best match on the set of
675  // argument type variants:
676  for (auto ext_func : ext_funcs) {
677  index++;
678 
679  auto ext_func_args = ext_func.getInputArgs();
680  int index_variant = -1;
681  for (const auto& type_infos : type_infos_variants) {
682  index_variant++;
683  int penalty_score = 0;
684  int pos = 0;
685  int original_input_idx = 0;
686  CHECK_LE(type_infos.size(), args_are_constants.size());
687  for (const auto& ti : type_infos) {
688  int offset = match_arguments(ti,
689  args_are_constants[original_input_idx],
690  pos,
691  ext_func_args,
692  penalty_score);
693  if (offset < 0) {
694  // atype does not match with ext_func argument
695  pos = -1;
696  break;
697  }
698  if (ti.get_type() == kCOLUMN_LIST) {
699  original_input_idx += ti.get_dimension();
700  } else {
701  original_input_idx++;
702  }
703  pos += offset;
704  }
705 
706  if ((size_t)pos == ext_func_args.size()) {
707  CHECK_EQ(args_are_constants.size(), original_input_idx);
708  // prefer smaller return types
709  penalty_score += ext_arg_type_to_type_info(ext_func.getRet()).get_logical_size();
710  if (penalty_score < minimal_score) {
711  optimal = index;
712  minimal_score = penalty_score;
713  optimal_variant = index_variant;
714  }
715  }
716  }
717  }
718 
719  if (optimal == -1) {
720  /* no extension function found that argument types would match
721  with types in `arg_types` */
722  auto sarg_types = ExtensionFunctionsWhitelist::toString(type_infos_input);
723  std::string message;
724  if (!ext_funcs.size()) {
725  message = "Function " + name + "(" + sarg_types + ") not supported.";
726  throw ExtensionFunctionBindingError(message);
727  } else {
728  if constexpr (std::is_same_v<T, table_functions::TableFunction>) {
729  message = "Could not bind " + name + "(" + sarg_types + ") to any " + processor +
730  " UDTF implementation.";
731  } else if constexpr (std::is_same_v<T, ExtensionFunction>) {
732  message = "Could not bind " + name + "(" + sarg_types + ") to any " + processor +
733  " UDF implementation.";
734  } else {
735  LOG(FATAL) << "bind_function: unknown extension function type "
736  << typeid(T).name();
737  }
738  message += "\n Existing extension function implementations:";
739  for (const auto& ext_func : ext_funcs) {
740  // Do not show functions missing the sizer argument
741  if constexpr (std::is_same_v<T, table_functions::TableFunction>)
742  if (ext_func.useDefaultSizer())
743  continue;
744  message += "\n " + ext_func.toStringSQL();
745  }
746  }
747  throw ExtensionFunctionBindingError(message);
748  }
749 
750  // Functions with "_default_" suffix only exist for calcite
751  if constexpr (std::is_same_v<T, table_functions::TableFunction>) {
752  if (ext_funcs[optimal].hasUserSpecifiedOutputSizeMultiplier() &&
753  ext_funcs[optimal].useDefaultSizer()) {
754  std::string name = ext_funcs[optimal].getName();
755  name.erase(name.find(DEFAULT_ROW_MULTIPLIER_SUFFIX),
757  for (size_t i = 0; i < ext_funcs.size(); i++) {
758  if (ext_funcs[i].getName() == name) {
759  optimal = i;
760  std::vector<SQLTypeInfo> type_info = type_infos_variants[optimal_variant];
761  size_t sizer = ext_funcs[optimal].getOutputRowSizeParameter();
762  type_info.insert(type_info.begin() + sizer - 1, SQLTypeInfo(kINT, true));
763  return {ext_funcs[optimal], type_info};
764  }
765  }
766  UNREACHABLE();
767  }
768  }
769 
770  return {ext_funcs[optimal], type_infos_variants[optimal_variant]};
771 }
#define CHECK_EQ(x, y)
Definition: Logger.h:230
#define LOG(tag)
Definition: Logger.h:216
static int match_arguments(const SQLTypeInfo &arg_type, const bool is_arg_literal, int sig_pos, const std::vector< ExtArgumentType > &sig_types, int &penalty_score)
#define UNREACHABLE()
Definition: Logger.h:266
#define DEFAULT_ROW_MULTIPLIER_SUFFIX
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:379
std::string to_string(char const *&&v)
std::string to_string() const
Definition: sqltypes.h:543
auto generate_column_type(const SQLTypeInfo &elem_ti)
Definition: sqltypes.h:1353
Definition: sqltypes.h:67
#define CHECK_LE(x, y)
Definition: Logger.h:233
auto generate_column_list_type(const SQLTypeInfo &elem_ti)
Definition: sqltypes.h:1401
static std::string toString(const std::vector< ExtensionFunction > &ext_funcs, std::string tab="")
#define CHECK(condition)
Definition: Logger.h:222
Definition: sqltypes.h:60
string name
Definition: setup.in.py:72
constexpr double n
Definition: Utm.h:38
SQLTypeInfo ext_arg_type_to_type_info(const ExtArgumentType ext_arg_type)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ExtensionFunction bind_function ( std::string  name,
Analyzer::ExpressionPtrVector  func_args 
)

Definition at line 783 of file ExtensionFunctionsBinding.cpp.

References ExtensionFunctionsWhitelist::get_ext_funcs(), and setup::name.

784  {
785  // used in RelAlgTranslator.cpp, first try GPU UDFs, then fall back
786  // to CPU UDFs.
787  bool is_gpu = true;
788  std::string processor = "GPU";
789  auto ext_funcs = ExtensionFunctionsWhitelist::get_ext_funcs(name, is_gpu);
790  if (!ext_funcs.size()) {
791  is_gpu = false;
792  processor = "CPU";
794  }
795  try {
796  return std::get<0>(
797  bind_function<ExtensionFunction>(name, func_args, ext_funcs, processor));
798  } catch (ExtensionFunctionBindingError& e) {
799  if (is_gpu) {
800  is_gpu = false;
801  processor = "GPU|CPU";
803  return std::get<0>(
804  bind_function<ExtensionFunction>(name, func_args, ext_funcs, processor));
805  } else {
806  throw;
807  }
808  }
809 }
static std::vector< ExtensionFunction > get_ext_funcs(const std::string &name)
string name
Definition: setup.in.py:72

+ Here is the call graph for this function:

ExtensionFunction bind_function ( std::string  name,
Analyzer::ExpressionPtrVector  func_args,
const bool  is_gpu 
)

Definition at line 811 of file ExtensionFunctionsBinding.cpp.

References ExtensionFunctionsWhitelist::get_ext_funcs(), and setup::name.

813  {
814  // used below
815  std::vector<ExtensionFunction> ext_funcs =
817  std::string processor = (is_gpu ? "GPU" : "CPU");
818  return std::get<0>(
819  bind_function<ExtensionFunction>(name, func_args, ext_funcs, processor));
820 }
static std::vector< ExtensionFunction > get_ext_funcs(const std::string &name)
string name
Definition: setup.in.py:72

+ Here is the call graph for this function:

ExtensionFunction bind_function ( const Analyzer::FunctionOper function_oper,
const bool  is_gpu 
)

Definition at line 822 of file ExtensionFunctionsBinding.cpp.

References bind_function(), Analyzer::FunctionOper::getArity(), Analyzer::FunctionOper::getName(), Analyzer::FunctionOper::getOwnArg(), and setup::name.

823  {
824  // used in ExtensionsIR.cpp
825  auto name = function_oper->getName();
826  Analyzer::ExpressionPtrVector func_args = {};
827  for (size_t i = 0; i < function_oper->getArity(); ++i) {
828  func_args.push_back(function_oper->getOwnArg(i));
829  }
830  return bind_function(name, func_args, is_gpu);
831 }
size_t getArity() const
Definition: Analyzer.h:2308
std::shared_ptr< Analyzer::Expr > getOwnArg(const size_t i) const
Definition: Analyzer.h:2315
std::tuple< T, std::vector< SQLTypeInfo > > bind_function(std::string name, Analyzer::ExpressionPtrVector func_args, const std::vector< T > &ext_funcs, const std::string processor)
std::vector< ExpressionPtr > ExpressionPtrVector
Definition: Analyzer.h:189
std::string getName() const
Definition: Analyzer.h:2306
string name
Definition: setup.in.py:72

+ Here is the call graph for this function:

const std::tuple<table_functions::TableFunction, std::vector<SQLTypeInfo> > bind_table_function ( std::string  name,
Analyzer::ExpressionPtrVector  input_args,
const std::vector< table_functions::TableFunction > &  table_funcs,
const bool  is_gpu 
)

Definition at line 774 of file ExtensionFunctionsBinding.cpp.

References setup::name.

Referenced by bind_table_function(), and RelAlgExecutor::createTableFunctionWorkUnit().

777  {
778  std::string processor = (is_gpu ? "GPU" : "CPU");
779  return bind_function<table_functions::TableFunction>(
780  name, input_args, table_funcs, processor);
781 }
string name
Definition: setup.in.py:72

+ Here is the caller graph for this function:

const std::tuple<table_functions::TableFunction, std::vector<SQLTypeInfo> > bind_table_function ( std::string  name,
Analyzer::ExpressionPtrVector  input_args,
const bool  is_gpu 
)

Definition at line 834 of file ExtensionFunctionsBinding.cpp.

References bind_table_function(), and table_functions::TableFunctionsFactory::get_table_funcs().

836  {
837  // used in RelAlgExecutor.cpp
838  std::vector<table_functions::TableFunction> table_funcs =
840  return bind_table_function(name, input_args, table_funcs, is_gpu);
841 }
static std::vector< TableFunction > get_table_funcs()
const std::tuple< table_functions::TableFunction, std::vector< SQLTypeInfo > > bind_table_function(std::string name, Analyzer::ExpressionPtrVector input_args, const std::vector< table_functions::TableFunction > &table_funcs, const bool is_gpu)
string name
Definition: setup.in.py:72

+ Here is the call graph for this function: