19 #include <boost/algorithm/string/join.hpp>
29 const std::string&
name) {
38 const std::string&
name) {
48 const bool is_runtime) {
49 std::unordered_set<std::string> names;
51 for (
auto funcs : collections) {
52 for (
auto& pair : *funcs) {
55 names.insert(udf.
getName(
false));
63 const std::string&
name) {
64 std::vector<ExtensionFunction> ext_funcs = {};
67 for (
auto funcs : collections) {
68 const auto it = funcs->find(uname);
69 if (it == funcs->end()) {
72 auto ext_func_sigs = it->second;
73 std::copy(ext_func_sigs.begin(), ext_func_sigs.end(), std::back_inserter(ext_funcs));
79 const std::string&
name,
81 std::vector<ExtensionFunction> ext_funcs = {};
84 for (
auto funcs : collections) {
85 const auto it = funcs->find(uname);
86 if (it == funcs->end()) {
89 auto ext_func_sigs = it->second;
90 std::copy_if(ext_func_sigs.begin(),
92 std::back_inserter(ext_funcs),
93 [is_gpu](
auto sig) {
return (is_gpu ? sig.isGPU() : sig.isCPU()); });
99 const std::string&
name,
101 std::vector<ExtensionFunction> ext_funcs = {};
104 for (
auto funcs : collections) {
105 const auto it = funcs->find(uname);
106 if (it == funcs->end()) {
109 auto ext_func_sigs = it->second;
110 std::copy_if(ext_func_sigs.begin(),
112 std::back_inserter(ext_funcs),
113 [arity](
auto sig) {
return arity == sig.getInputArgs().size(); });
119 const std::string&
name,
122 std::vector<ExtensionFunction> ext_funcs = {};
125 for (
auto funcs : collections) {
126 const auto it = funcs->find(uname);
127 if (it == funcs->end()) {
130 auto ext_func_sigs = it->second;
131 std::copy_if(ext_func_sigs.begin(),
133 std::back_inserter(ext_funcs),
134 [arity, rtype](
auto sig) {
141 if (arity > sig.getInputArgs().size()) {
157 bool declare =
false) {
190 return (declare ?
"{i8*, i64, i8}*" :
"Array<i8>");
192 return (declare ?
"{i16*, i64, i8}*" :
"Array<i16>");
194 return (declare ?
"{i32*, i64, i8}*" :
"Array<i32>");
196 return (declare ?
"{i64*, i64, i8}*" :
"Array<i64>");
198 return (declare ?
"{float*, i64, i8}*" :
"Array<float>");
200 return (declare ?
"{double*, i64, i8}*" :
"Array<double>");
202 return (declare ?
"{i1*, i64, i8}*" :
"Array<i1>");
204 return (declare ?
"{i32*, i64, i8}*" :
"Array<TextEncodingDict>");
208 return "geo_multi_point";
210 return "geo_linestring";
212 return "geo_multi_linestring";
214 return "geo_polygon";
216 return "geo_multi_polygon";
220 return (declare ? (byval ?
"{i8*, i64}" :
"i8*") :
"Column<i8>");
222 return (declare ? (byval ?
"{i16*, i64}" :
"i8*") :
"Column<i16>");
224 return (declare ? (byval ?
"{i32*, i64}" :
"i8*") :
"Column<i32>");
226 return (declare ? (byval ?
"{i64*, i64}" :
"i8*") :
"Column<i64>");
228 return (declare ? (byval ?
"{float*, i64}" :
"i8*") :
"Column<float>");
230 return (declare ? (byval ?
"{double*, i64}" :
"i8*") :
"Column<double>");
232 return (declare ? (byval ?
"{i8*, i64}" :
"i8*") :
"Column<bool>");
234 return (declare ? (byval ?
"{i32*, i64}" :
"i8*") :
"Column<TextEncodingDict>");
236 return (declare ? (byval ?
"{i64*, i64}" :
"i8*") :
"Column<Timestamp>");
238 return (declare ? (byval ?
"{i8*, i64}" :
"i8*") :
"TextEncodingNone");
240 return (declare ?
"{ i32 }" :
"TextEncodingDict");
242 return (declare ?
"{ i64 }" :
"Timestamp");
244 return (declare ?
"{i8**, i64, i64}*" :
"ColumnList<i8>");
246 return (declare ?
"{i8**, i64, i64}*" :
"ColumnList<i16>");
248 return (declare ?
"{i8**, i64, i64}*" :
"ColumnList<i32>");
250 return (declare ?
"{i8**, i64, i64}*" :
"ColumnList<i64>");
252 return (declare ?
"{i8**, i64, i64}*" :
"ColumnList<float>");
254 return (declare ?
"{i8**, i64, i64}*" :
"ColumnList<double>");
256 return (declare ?
"{i8**, i64, i64}*" :
"ColumnList<bool>");
258 return (declare ?
"{i8**, i64, i64}*" :
"ColumnList<TextEncodingDict>");
260 return (declare ?
"{i8*, i64}*" :
"Column<Array<i8>>");
262 return (declare ?
"{i8*, i64}*" :
"Column<Array<i16>>");
264 return (declare ?
"{i8*, i64}*" :
"Column<Array<i32>>");
266 return (declare ?
"{i8*, i64}*" :
"Column<Array<i64>>");
268 return (declare ?
"{i8*, i64}*" :
"Column<Array<float>>");
270 return (declare ?
"{i8*, i64}*" :
"Column<Array<double>>");
272 return (declare ?
"{i8*, i64}*" :
"Column<Array<bool>>");
274 return (declare ?
"{i8*, i64}" :
"Column<Array<TextEncodingDict>>");
276 return (declare ?
"{i8**, i64, i64}*" :
"ColumnListArray<i8>");
278 return (declare ?
"{i8**, i64, i64}*" :
"ColumnListArray<i16>");
280 return (declare ?
"{i8**, i64, i64}*" :
"ColumnListArray<i32>");
282 return (declare ?
"{i8**, i64, i64}*" :
"ColumnListArray<i64>");
284 return (declare ?
"{i8**, i64, i64}*" :
"ColumnListArray<float>");
286 return (declare ?
"{i8**, i64, i64}*" :
"ColumnListArray<double>");
288 return (declare ?
"{i8**, i64, i64}*" :
"ColumnListArray<bool>");
290 return (declare ?
"{i8**, i64, i64}" :
"ColumnList<Array<TextEncodingDict>>");
292 return (declare ?
"{ i64 }" :
"DayTimeInterval");
294 return (declare ?
"{ i64 }" :
"YearMonthTimeInterval");
303 const auto idx = str.find(
"__");
304 if (idx == std::string::npos) {
307 CHECK_GT(idx, std::string::size_type(0));
308 return str.substr(0, idx);
322 #define EXTARGTYPECASE(EXTARGTYPE, ELEMTYPE, ENCODING, ARRAYENCODING) \
323 case ExtArgumentType::EXTARGTYPE: \
325 c = kENCODING_##ENCODING; \
327 case ExtArgumentType::Array##EXTARGTYPE: \
329 c = kENCODING_##ENCODING; \
330 subtype = ELEMTYPE; \
332 case ExtArgumentType::Column##EXTARGTYPE: \
334 c = kENCODING_##ENCODING; \
335 subtype = ELEMTYPE; \
337 case ExtArgumentType::ColumnList##EXTARGTYPE: \
338 type = kCOLUMN_LIST; \
339 c = kENCODING_##ENCODING; \
340 subtype = ELEMTYPE; \
342 case ExtArgumentType::ColumnArray##EXTARGTYPE: \
344 subtype = ELEMTYPE; \
345 c = kENCODING_##ARRAYENCODING; \
347 case ExtArgumentType::ColumnListArray##EXTARGTYPE: \
348 type = kCOLUMN_LIST; \
349 subtype = ELEMTYPE; \
350 c = kENCODING_##ARRAYENCODING; \
353 switch (ext_arg_type) {
383 <<
"` cannot be converted to SQLTypes.";
390 const std::vector<ExtensionFunction>& ext_funcs,
393 for (
auto sig : ext_funcs) {
394 r += tab + sig.toString() +
"\n";
400 const std::vector<SQLTypeInfo>& arg_types) {
402 for (
auto sig = arg_types.begin(); sig != arg_types.end();) {
403 r += sig->get_type_name();
405 if (sig != arg_types.end()) {
413 const std::vector<ExtArgumentType>& sig_types) {
415 for (
auto t = sig_types.begin(); t != sig_types.end();) {
418 if (t != sig_types.end()) {
426 const std::vector<ExtArgumentType>& sig_types) {
428 for (
auto t = sig_types.begin(); t != sig_types.end();) {
431 if (t != sig_types.end()) {
473 return "ARRAY<TINYINT>";
475 return "ARRAY<SMALLINT>";
479 return "ARRAY<BIGINT>";
481 return "ARRAY<FLOAT>";
483 return "ARRAY<DOUBLE>";
485 return "ARRAY<BOOLEAN>";
487 return "ARRAY<TEXT ENCODING DICT>";
489 return "COLUMN<TINYINT>";
491 return "COLUMN<SMALLINT>";
493 return "COLUMN<INT>";
495 return "COLUMN<BIGINT>";
497 return "COLUMN<FLOAT>";
499 return "COLUMN<DOUBLE>";
501 return "COLUMN<BOOLEAN>";
503 return "COLUMN<TEXT ENCODING DICT>";
505 return "COLUMN<TIMESTAMP(9)>";
515 return "MULTILINESTRING";
519 return "MULTIPOLYGON";
523 return "TEXT ENCODING NONE";
525 return "TEXT ENCODING DICT";
527 return "TIMESTAMP(9)";
529 return "COLUMNLIST<TINYINT>";
531 return "COLUMNLIST<SMALLINT>";
533 return "COLUMNLIST<INT>";
535 return "COLUMNLIST<BIGINT>";
537 return "COLUMNLIST<FLOAT>";
539 return "COLUMNLIST<DOUBLE>";
541 return "COLUMNLIST<BOOLEAN>";
543 return "COLUMNLIST<TEXT ENCODING DICT>";
545 return "COLUMN<ARRAY<TINYINT>>";
547 return "COLUMN<ARRAY<SMALLINT>>";
549 return "COLUMN<ARRAY<INT>>";
551 return "COLUMN<ARRAY<BIGINT>>";
553 return "COLUMN<ARRAY<FLOAT>>";
555 return "COLUMN<ARRAY<DOUBLE>>";
557 return "COLUMN<ARRAY<BOOLEAN>>";
559 return "COLUMN<ARRAY<TEXT ENCODING DICT>>";
561 return "COLUMNLIST<ARRAY<TINYINT>>";
563 return "COLUMNLIST<ARRAY<SMALLINT>>";
565 return "COLUMNLIST<ARRAY<INT>>";
567 return "COLUMNLIST<ARRAY<BIGINT>>";
569 return "COLUMNLIST<ARRAY<FLOAT>>";
571 return "COLUMNLIST<ARRAY<DOUBLE>>";
573 return "COLUMNLIST<ARRAY<BOOLEAN>>";
575 return "COLUMNLIST<ARRAY<TEXT ENCODING DICT>>";
577 return "DAY TIME INTERVAL";
579 return "YEAR MONTH INTERVAL";
592 auto mgr_annotation = func_annotations.find(
"uses_manager");
593 if (mgr_annotation != func_annotations.end()) {
594 return boost::algorithm::to_lower_copy(mgr_annotation->second) ==
"true";
622 const std::unordered_set<std::string>& udf_decls,
624 std::vector<std::string> declarations;
626 const std::vector<ExtensionFunction>& ext_funcs = kv.second;
627 CHECK(!ext_funcs.empty());
628 for (
const auto& ext_func : ext_funcs) {
631 if (!udf_decls.empty() && udf_decls.find(ext_func.getName()) != udf_decls.end()) {
635 std::string decl_prefix;
636 std::vector<std::string> arg_strs;
639 decl_prefix =
"declare void @" + ext_func.getName();
640 arg_strs.emplace_back(
646 " @" + ext_func.getName();
651 if (ext_func.usesManager()) {
652 arg_strs.emplace_back(
"i8*");
655 for (
const auto arg : ext_func.getInputArgs()) {
658 declarations.emplace_back(decl_prefix +
"(" +
664 if (kv.second.isRuntime() || kv.second.useDefaultSizer()) {
669 if (!((is_gpu && kv.second.isGPU()) || (!is_gpu && kv.second.isCPU()))) {
672 std::string decl_prefix{
676 std::vector<std::string> arg_strs;
677 for (
const auto arg : kv.second.getArgs(
true)) {
690 if (type_name ==
"bool" || type_name ==
"i1") {
693 if (type_name ==
"i8") {
696 if (type_name ==
"i16") {
699 if (type_name ==
"i32") {
702 if (type_name ==
"i64") {
705 if (type_name ==
"float") {
708 if (type_name ==
"double") {
711 if (type_name ==
"void") {
714 if (type_name ==
"i8*") {
717 if (type_name ==
"i16*") {
720 if (type_name ==
"i32*") {
723 if (type_name ==
"i64*") {
726 if (type_name ==
"float*") {
729 if (type_name ==
"double*") {
732 if (type_name ==
"i1*" || type_name ==
"bool*") {
735 if (type_name ==
"Array<i8>") {
738 if (type_name ==
"Array<i16>") {
741 if (type_name ==
"Array<i32>") {
744 if (type_name ==
"Array<i64>") {
747 if (type_name ==
"Array<float>") {
750 if (type_name ==
"Array<double>") {
753 if (type_name ==
"Array<bool>" || type_name ==
"Array<i1>") {
756 if (type_name ==
"Array<TextEncodingDict>") {
759 if (type_name ==
"geo_point") {
762 if (type_name ==
"geo_multi_point") {
765 if (type_name ==
"geo_linestring") {
768 if (type_name ==
"geo_multi_linestring") {
771 if (type_name ==
"geo_polygon") {
774 if (type_name ==
"geo_multi_polygon") {
777 if (type_name ==
"cursor") {
780 if (type_name ==
"Column<i8>") {
783 if (type_name ==
"Column<i16>") {
786 if (type_name ==
"Column<i32>") {
789 if (type_name ==
"Column<i64>") {
792 if (type_name ==
"Column<float>") {
795 if (type_name ==
"Column<double>") {
798 if (type_name ==
"Column<bool>") {
801 if (type_name ==
"Column<TextEncodingDict>") {
804 if (type_name ==
"Column<Timestamp>") {
807 if (type_name ==
"TextEncodingNone") {
810 if (type_name ==
"TextEncodingDict") {
813 if (type_name ==
"timestamp") {
816 if (type_name ==
"ColumnList<i8>") {
819 if (type_name ==
"ColumnList<i16>") {
822 if (type_name ==
"ColumnList<i32>") {
825 if (type_name ==
"ColumnList<i64>") {
828 if (type_name ==
"ColumnList<float>") {
831 if (type_name ==
"ColumnList<double>") {
834 if (type_name ==
"ColumnList<bool>") {
837 if (type_name ==
"ColumnList<TextEncodingDict>") {
840 if (type_name ==
"Column<Array<i8>>") {
843 if (type_name ==
"Column<Array<i16>>") {
846 if (type_name ==
"Column<Array<i32>>") {
849 if (type_name ==
"Column<Array<i64>>") {
852 if (type_name ==
"Column<Array<float>>") {
855 if (type_name ==
"Column<Array<double>>") {
858 if (type_name ==
"Column<Array<bool>>") {
861 if (type_name ==
"Column<Array<TextEncodingDict>>") {
864 if (type_name ==
"ColumnList<Array<i8>>") {
867 if (type_name ==
"ColumnList<Array<i16>>") {
870 if (type_name ==
"ColumnList<Array<i32>>") {
873 if (type_name ==
"ColumnList<Array<i64>>") {
876 if (type_name ==
"ColumnList<Array<float>>") {
879 if (type_name ==
"ColumnList<Array<double>>") {
882 if (type_name ==
"ColumnList<Array<bool>>") {
885 if (type_name ==
"ColumnList<Array<TextEncodingDict>>") {
888 if (type_name ==
"DayTimeInterval") {
891 if (type_name ==
"YearMonthTimeInterval") {
900 using SignatureMap = std::unordered_map<std::string, std::vector<ExtensionFunction>>;
903 const std::string& json_func_sigs,
904 const bool is_runtime) {
905 rapidjson::Document func_sigs;
906 func_sigs.Parse(json_func_sigs.c_str());
907 CHECK(func_sigs.IsArray());
908 for (
auto func_sigs_it = func_sigs.Begin(); func_sigs_it != func_sigs.End();
910 CHECK(func_sigs_it->IsObject());
913 std::vector<ExtArgumentType>
args;
914 const auto& args_serialized =
field(*func_sigs_it,
"args");
915 CHECK(args_serialized.IsArray());
916 for (
auto args_serialized_it = args_serialized.Begin();
917 args_serialized_it != args_serialized.End();
918 ++args_serialized_it) {
922 std::vector<std::map<std::string, std::string>> annotations;
923 const auto& anns =
field(*func_sigs_it,
"annotations");
924 CHECK(anns.IsArray());
925 static const std::map<std::string, std::string> map_empty = {};
926 for (
auto obj = anns.Begin(); obj != anns.End(); ++obj) {
927 CHECK(obj->IsObject());
928 if (obj->ObjectEmpty()) {
929 annotations.push_back(map_empty);
931 std::map<std::string, std::string> m;
932 for (
auto kv = obj->MemberBegin(); kv != obj->MemberEnd(); ++kv) {
933 m[kv->name.GetString()] = kv->value.GetString();
935 annotations.push_back(m);
939 name, args, ret, annotations, is_runtime);
964 if (!json_func_sigs.empty()) {
974 if (!json_func_sigs.empty()) {
979 std::unordered_map<std::string, std::vector<ExtensionFunction>>
982 std::unordered_map<std::string, std::vector<ExtensionFunction>>
985 std::unordered_map<std::string, std::vector<ExtensionFunction>>
static void addUdfs(const std::string &json_func_sigs)
static void addCommon(std::unordered_map< std::string, std::vector< ExtensionFunction >> &sigs, const std::string &json_func_sigs, const bool is_runtime)
std::string drop_suffix(const std::string &str)
static std::vector< ExtensionFunction > get_ext_funcs(const std::string &name)
static std::unordered_map< std::string, std::vector< ExtensionFunction > > udf_functions_
static std::vector< ExtensionFunction > * get(const std::string &name)
static void clearRTUdfs()
std::string toSignature() const
const std::string json_str(const rapidjson::Value &obj) noexcept
static std::unordered_map< std::string, std::vector< ExtensionFunction > > rt_udf_functions_
const std::vector< ExtArgumentType > args_
static void add(const std::string &json_func_sigs)
std::unordered_map< std::string, std::vector< ExtensionFunction >> SignatureMap
const std::string getName(bool keep_suffix=true) const
HOST DEVICE SQLTypes get_type() const
std::string toStringSQL() const
std::string toString() const
static std::unordered_map< std::string, std::vector< ExtensionFunction > > functions_
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Supported runtime functions management and retrieval.
static std::vector< ExtensionFunction > * get_udf(const std::string &name)
#define EXTARGTYPECASE(EXTARGTYPE, ELEMTYPE, ENCODING, ARRAYENCODING)
DEVICE auto copy(ARGS &&...args)
ExtArgumentType deserialize_type(const std::string &type_name)
bool is_ext_arg_type_array(const ExtArgumentType ext_arg_type)
static std::unordered_set< std::string > get_udfs_name(const bool is_runtime)
Checked json field retrieval.
std::string toString(const ExecutorDeviceType &device_type)
Argument type based extension function binding.
std::string serialize_type(const ExtArgumentType type, bool byval=true, bool declare=false)
const std::vector< std::map< std::string, std::string > > annotations_
const ExtArgumentType ret_
static std::string toString(const std::vector< ExtensionFunction > &ext_funcs, std::string tab="")
static std::unordered_map< std::string, TableFunction > functions_
constexpr auto type_name() noexcept
static std::vector< std::string > getLLVMDeclarations(const std::unordered_set< std::string > &udf_decls, const bool is_gpu=false)
SQLTypeInfo ext_arg_type_to_type_info(const ExtArgumentType ext_arg_type)
static std::string toStringSQL(const std::vector< ExtArgumentType > &sig_types)
static void addRTUdfs(const std::string &json_func_sigs)