46 const int precision) {
59 std::shared_ptr<Analyzer::Expr> rhs;
61 const auto rex_operator =
dynamic_cast<const RexOperator*
>(rex_scalar);
63 return std::make_pair(rhs, sql_qual);
66 const auto qual_str = rex_function ? rex_function->
getName() :
"";
67 if (qual_str ==
"PG_ANY"sv || qual_str ==
"PG_ALL"sv) {
68 CHECK_EQ(
size_t(1), rex_function->size());
70 sql_qual = (qual_str ==
"PG_ANY"sv) ?
kANY :
kALL;
72 if (!rhs && rex_operator->getOperator() ==
kCAST) {
73 CHECK_EQ(
size_t(1), rex_operator->size());
76 return std::make_pair(rhs, sql_qual);
84 bool is_null_const{
false};
85 switch (ti.get_type()) {
87 const auto ival = boost::get<int64_t>(scalar_tv);
97 const auto ival = boost::get<int64_t>(scalar_tv);
100 is_null_const =
true;
102 d.tinyintval = *ival;
107 const auto ival = boost::get<int64_t>(scalar_tv);
110 is_null_const =
true;
112 d.smallintval = *ival;
117 const auto ival = boost::get<int64_t>(scalar_tv);
120 is_null_const =
true;
132 const auto ival = boost::get<int64_t>(scalar_tv);
135 is_null_const =
true;
142 const auto dval = boost::get<double>(scalar_tv);
145 is_null_const =
true;
152 const auto fval = boost::get<float>(scalar_tv);
155 is_null_const =
true;
164 auto nullable_sptr = boost::get<NullableString>(scalar_tv);
165 CHECK(nullable_sptr);
166 if (boost::get<void*>(nullable_sptr)) {
167 is_null_const =
true;
169 auto sptr = boost::get<std::string>(nullable_sptr);
170 d.stringval =
new std::string(*sptr);
175 CHECK(
false) <<
"Unhandled type: " << ti.get_type_name();
177 return {d, is_null_const};
184 template <
typename... Ts>
186 return {IndexedHandler{std::type_index(
typeid(Ts)),
187 &RelAlgTranslator::translateRexScalar<Ts>}...};
193 : type_index_(std::type_index(type_info)) {}
194 bool operator()(IndexedHandler
const& pair)
const {
return pair.first == type_index_; }
200 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateRexScalar<RexInput>(
202 return translateInput(static_cast<RexInput const*>(rex));
205 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateRexScalar<RexLiteral>(
207 return translateLiteral(static_cast<RexLiteral const*>(rex));
210 std::shared_ptr<Analyzer::Expr>
211 RelAlgTranslator::translateRexScalar<RexWindowFunctionOperator>(
213 return translateWindowFunction(static_cast<RexWindowFunctionOperator const*>(rex));
216 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateRexScalar<RexFunctionOperator>(
218 return translateFunction(static_cast<RexFunctionOperator const*>(rex));
221 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateRexScalar<RexOperator>(
223 return translateOper(static_cast<RexOperator const*>(rex));
226 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateRexScalar<RexCase>(
228 return translateCase(static_cast<RexCase const*>(rex));
231 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateRexScalar<RexSubQuery>(
233 return translateScalarSubquery(static_cast<RexSubQuery const*>(rex));
238 auto cache_itr =
cache_.find(rex);
239 if (cache_itr ==
cache_.end()) {
248 static_assert(std::is_trivially_destructible_v<decltype(handlers)>);
249 auto it = std::find_if(handlers.cbegin(), handlers.cend(), ByTypeIndex{
typeid(*rex)});
250 CHECK(it != handlers.cend()) <<
"Unhandled type: " <<
typeid(*rex).name();
252 auto cached =
cache_.emplace(rex, (this->*it->second)(rex));
253 CHECK(cached.second) <<
"Failed to emplace rex of type " <<
typeid(*rex).name();
254 cache_itr = cached.first;
256 return cache_itr->second;
269 !shared::is_any<kAVG, kMIN, kMAX, kSUM, kAPPROX_QUANTILE, kMODE>(agg_kind);
273 return shared::is_any<kMIN, kMAX, kCOUNT, kAPPROX_COUNT_DISTINCT>(agg_kind);
280 const std::vector<std::shared_ptr<Analyzer::Expr>>& scalar_sources) {
284 std::shared_ptr<Analyzer::Expr> arg_expr;
285 std::shared_ptr<Analyzer::Expr> arg1;
288 CHECK_LT(operand, scalar_sources.size());
290 arg_expr = scalar_sources[operand];
293 if (rex->
size() == 2) {
296 if (!const_arg1 || const_arg1->get_type_info().get_type() !=
kINT ||
297 const_arg1->get_constval().intval < 1 ||
298 const_arg1->get_constval().intval > 100) {
299 throw std::runtime_error(
300 "APPROX_COUNT_DISTINCT's second parameter must be a SMALLINT literal "
301 "between 1 and 100");
308 throw std::runtime_error(
309 "APPROX_PERCENTILE/MEDIAN is not supported in distributed mode at this "
313 if (rex->
size() == 2) {
325 arg1 = std::make_shared<Analyzer::Constant>(
kDOUBLE,
false, median);
330 throw std::runtime_error(
331 "MODE is not supported in distributed mode at this time.");
335 if (arg_expr->get_type_info().is_geometry()) {
336 throw std::runtime_error(
337 "COUNT_IF does not currently support geospatial types.");
342 if (arg1->get_type_info().get_type() !=
kBOOLEAN) {
343 throw std::runtime_error(
"Conditional argument must be a boolean expression.");
349 const auto& arg_ti = arg_expr->get_type_info();
351 throw std::runtime_error(
"Aggregate on " + arg_ti.get_type_name() +
352 " is not supported yet.");
355 throw std::runtime_error(
toString(agg_kind) +
356 " does not currently support the DISTINCT qualifier.");
359 const auto agg_ti =
get_agg_type(agg_kind, arg_expr.get());
360 return makeExpr<Analyzer::AggExpr>(agg_ti, agg_kind, arg_expr,
is_distinct, arg1);
364 const RexLiteral* rex_literal) {
366 rex_literal->getType(), rex_literal->getScale(), rex_literal->getPrecision());
368 rex_literal->getTargetScale(),
369 rex_literal->getTargetPrecision());
370 switch (rex_literal->getType()) {
374 d.
bigintval = rex_literal->getVal<int64_t>();
375 return makeExpr<Analyzer::Constant>(rex_literal->getType(),
false, d);
378 const auto val = rex_literal->getVal<int64_t>();
379 const int precision = rex_literal->getPrecision();
380 const int scale = rex_literal->getScale();
381 if (target_ti.is_fp() && !scale) {
386 return lit_ti != target_ti ? lit_expr->add_cast(target_ti) : lit_expr;
394 d.
boolval = rex_literal->getVal<
bool>();
395 return makeExpr<Analyzer::Constant>(
kBOOLEAN,
false, d);
399 d.
doubleval = rex_literal->getVal<
double>();
401 makeExpr<Analyzer::Constant>(
SQLTypeInfo(rex_literal->getType(),
402 rex_literal->getPrecision(),
403 rex_literal->getScale(),
407 return lit_ti != target_ti ? lit_expr->add_cast(target_ti) : lit_expr;
412 d.
bigintval = rex_literal->getVal<int64_t>();
413 return makeExpr<Analyzer::Constant>(rex_literal->getType(),
false, d);
419 rex_literal->getType() ==
kTIMESTAMP && rex_literal->getPrecision() > 0
420 ? rex_literal->getVal<int64_t>()
421 : rex_literal->getVal<int64_t>() / 1000;
422 return makeExpr<Analyzer::Constant>(
423 SQLTypeInfo(rex_literal->getType(), rex_literal->getPrecision(), 0,
false),
429 d.
bigintval = rex_literal->getVal<int64_t>() * 24 * 3600;
430 return makeExpr<Analyzer::Constant>(rex_literal->getType(),
false, d);
433 if (target_ti.is_array()) {
437 return makeExpr<Analyzer::ArrayExpr>(target_ti,
args,
true);
441 return makeExpr<Analyzer::Constant>(
kNULLT,
true,
Datum{0});
443 return makeExpr<Analyzer::Constant>(rex_literal->getTargetType(),
true,
Datum{0});
446 LOG(
FATAL) <<
"Unexpected literal type " << lit_ti.get_type_name();
453 const RexSubQuery* rex_subquery)
const {
455 throw std::runtime_error(
"EXPLAIN is not supported with sub-queries");
458 auto result = rex_subquery->getExecutionResult();
459 auto row_set =
result->getRows();
460 const size_t row_count = row_set->rowCount();
461 if (row_count >
size_t(1)) {
462 throw std::runtime_error(
"Scalar sub-query returned multiple rows");
464 auto ti = rex_subquery->getType();
466 throw std::runtime_error(
467 "Scalar sub-queries which return strings not supported in distributed mode");
469 if (row_count ==
size_t(0)) {
470 if (row_set->isValidationOnlyRes()) {
472 if (ti.is_string()) {
477 if (ti.is_dict_encoded_string()) {
481 return makeExpr<Analyzer::Constant>(ti,
false, d);
483 throw std::runtime_error(
"Scalar sub-query returned no results");
486 row_set->moveToBegin();
487 auto const first_row = row_set->getNextRow(ti.is_dict_encoded_string(),
false);
488 CHECK_EQ(first_row.size(), size_t(1));
490 bool is_null_const{
false};
491 auto scalar_tv = boost::get<ScalarTargetValue>(&first_row[0]);
493 if (ti.is_dict_encoded_string()) {
497 return makeExpr<Analyzer::Constant>(ti, is_null_const, d);
505 <<
"Not found in input_to_nest_level_, source="
507 const int rte_idx = it_rte_idx->second;
508 const auto scan_source =
dynamic_cast<const RelScan*
>(source);
513 CHECK(in_metainfo.empty());
514 const auto table_desc = scan_source->getTableDescriptor();
515 const auto& catalog = scan_source->getCatalog();
517 catalog.getMetadataForColumnBySpi(table_desc->tableId, rex_input->
getIndex() + 1);
519 auto col_ti = cd->columnType;
520 if (col_ti.is_string()) {
521 col_ti.set_type(
kTEXT);
523 if (cd->isVirtualCol) {
531 col_ti.set_notnull(
false);
533 return std::make_shared<Analyzer::ColumnVar>(
538 CHECK(!in_metainfo.empty()) <<
"for "
541 const int32_t col_id = rex_input->
getIndex();
542 CHECK_LT(col_id, in_metainfo.size());
543 auto col_ti = in_metainfo[col_id].get_type_info();
548 col_ti.set_notnull(
false);
552 return std::make_shared<Analyzer::ColumnVar>(
563 const auto& target_ti = rex_operator->
getType();
565 const auto& operand_ti = operand_expr->get_type_info();
566 if (operand_ti.is_string() && target_ti.is_string()) {
569 if (target_ti.is_time() ||
573 return target_ti.is_date_in_days()
575 : operand_expr->add_cast(target_ti);
577 if (!operand_ti.is_string() && target_ti.is_string()) {
578 return operand_expr->add_cast(target_ti);
580 return std::make_shared<Analyzer::UOper>(target_ti,
false, sql_op, operand_expr);
583 const auto& target_ti = rex_operator->
getType();
585 const auto& operand_ti = operand_expr->get_type_info();
586 CHECK(operand_ti.is_string());
587 if (operand_ti.is_dict_encoded_string()) {
591 if (operand_expr->get_num_column_vars(
true) == 0UL) {
595 throw std::runtime_error(
596 "ENCODE_TEXT is not currently supported in distributed mode at this time.");
604 return makeExpr<Analyzer::UOper>(
605 casted_target_ti, operand_expr->get_contains_agg(),
kCAST, operand_expr);
609 return std::make_shared<Analyzer::UOper>(
kBOOLEAN, sql_op, operand_expr);
616 const auto& ti = operand_expr->get_type_info();
617 return std::make_shared<Analyzer::UOper>(ti,
false,
kUMINUS, operand_expr);
620 const auto& ti = operand_expr->get_type_info();
621 CHECK(ti.is_array());
622 return makeExpr<Analyzer::UOper>(ti.get_elem_type(),
false,
kUNNEST, operand_expr);
633 const ResultSet& val_set) {
638 throw std::runtime_error(
639 "Unable to handle 'expr IN (subquery)', subquery returned 5M+ rows.");
641 std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
643 std::vector<std::list<std::shared_ptr<Analyzer::Expr>>> expr_set(
644 fetcher_count, std::list<std::shared_ptr<Analyzer::Expr>>());
645 std::vector<std::future<void>> fetcher_threads;
646 const auto& ti = arg->get_type_info();
647 const auto entry_count = val_set.entryCount();
650 stride = (entry_count + fetcher_count - 1) / fetcher_count;
651 i < fetcher_count && start_entry < entry_count;
652 ++i, start_entry += stride) {
653 const auto end_entry = std::min(start_entry + stride, entry_count);
656 [&](std::list<std::shared_ptr<Analyzer::Expr>>& in_vals,
659 for (
auto index = start; index < end; ++index) {
660 auto row = val_set.getRowAt(index);
664 auto scalar_tv = boost::get<ScalarTargetValue>(&row[0]);
666 bool is_null_const{
false};
669 auto ti_none_encoded = ti;
671 auto none_encoded_string =
672 makeExpr<Analyzer::Constant>(ti, is_null_const, d);
673 auto dict_encoded_string = std::make_shared<Analyzer::UOper>(
674 ti,
false,
kCAST, none_encoded_string);
675 in_vals.push_back(dict_encoded_string);
677 in_vals.push_back(makeExpr<Analyzer::Constant>(ti, is_null_const, d));
681 std::ref(expr_set[i]),
685 for (
auto& child : fetcher_threads) {
689 val_set.moveToBegin();
690 for (
auto& exprs : expr_set) {
691 value_exprs.splice(value_exprs.end(), exprs);
693 return makeExpr<Analyzer::InValues>(arg, value_exprs);
706 throw std::runtime_error(
"EXPLAIN is not supported with sub-queries");
711 const auto rex_subquery =
dynamic_cast<const RexSubQuery*
>(rhs);
713 auto ti = lhs->get_type_info();
714 auto result = rex_subquery->getExecutionResult();
716 auto& row_set =
result->getRows();
717 CHECK_EQ(
size_t(1), row_set->colCount());
718 const auto& rhs_ti = row_set->getColType(0);
719 if (rhs_ti.get_type() != ti.get_type()) {
720 throw std::runtime_error(
721 "The two sides of the IN operator must have the same type; found " +
722 ti.get_type_name() +
" and " + rhs_ti.get_type_name());
724 row_set->moveToBegin();
725 if (row_set->entryCount() > 10000) {
726 std::shared_ptr<Analyzer::Expr> expr;
727 if ((ti.is_integer() || (ti.is_string() && ti.get_compression() ==
kENCODING_DICT)) &&
728 !row_set->getQueryMemDesc().didOutputColumnar()) {
733 if (expr && std::static_pointer_cast<Analyzer::InIntegerSet>(expr)
745 std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
747 auto row = row_set->getNextRow(
true,
false);
752 throw std::runtime_error(
753 "Unable to handle 'expr IN (subquery)', subquery returned 10000+ rows.");
755 auto scalar_tv = boost::get<ScalarTargetValue>(&row[0]);
757 bool is_null_const{
false};
760 auto ti_none_encoded = ti;
762 auto none_encoded_string = makeExpr<Analyzer::Constant>(ti, is_null_const, d);
763 auto dict_encoded_string =
764 std::make_shared<Analyzer::UOper>(ti,
false,
kCAST, none_encoded_string);
765 value_exprs.push_back(dict_encoded_string);
767 value_exprs.push_back(makeExpr<Analyzer::Constant>(ti, is_null_const, d));
770 return makeExpr<Analyzer::InValues>(lhs, value_exprs);
778 std::vector<int64_t>& in_vals,
779 std::atomic<size_t>& total_in_vals_count,
780 const ResultSet* values_rowset,
781 const std::pair<int64_t, int64_t> values_rowset_slice,
784 const int64_t needle_null_val) {
785 CHECK(in_vals.empty());
786 bool dicts_are_equal = source_dict == dest_dict;
787 for (
auto index = values_rowset_slice.first; index < values_rowset_slice.second;
789 const auto row = values_rowset->getOneColRow(index);
793 if (dicts_are_equal) {
794 in_vals.push_back(row.value);
796 const int string_id =
797 row.value == needle_null_val
801 in_vals.push_back(string_id);
806 throw std::runtime_error(
807 "Unable to handle 'expr IN (subquery)', subquery returned 30M+ rows.");
813 std::atomic<size_t>& total_in_vals_count,
814 const ResultSet* values_rowset,
815 const std::pair<int64_t, int64_t> values_rowset_slice) {
816 CHECK(in_vals.empty());
817 for (
auto index = values_rowset_slice.first; index < values_rowset_slice.second;
819 const auto row = values_rowset->getOneColRow(index);
821 in_vals.push_back(row.value);
824 throw std::runtime_error(
825 "Unable to handle 'expr IN (subquery)', subquery returned 30M+ rows.");
839 std::vector<int64_t>& in_vals,
840 std::atomic<size_t>& total_in_vals_count,
841 const ResultSet* values_rowset,
842 const std::pair<int64_t, int64_t> values_rowset_slice,
843 const std::vector<LeafHostInfo>& leaf_hosts,
846 const int32_t dest_generation,
847 const int64_t needle_null_val) {
848 CHECK(in_vals.empty());
849 std::vector<int32_t> source_ids;
850 source_ids.reserve(values_rowset->entryCount());
851 bool has_nulls =
false;
852 if (source_dict_ref == dest_dict_ref) {
853 in_vals.reserve(values_rowset_slice.second - values_rowset_slice.first +
855 for (
auto index = values_rowset_slice.first; index < values_rowset_slice.second;
857 const auto row = values_rowset->getOneColRow(index);
861 if (row.value != needle_null_val) {
862 in_vals.push_back(row.value);
865 throw std::runtime_error(
866 "Unable to handle 'expr IN (subquery)', subquery returned 30M+ rows.");
880 for (
auto index = values_rowset_slice.first; index < values_rowset_slice.second;
882 const auto row = values_rowset->getOneColRow(index);
884 if (row.value != needle_null_val) {
885 source_ids.push_back(row.value);
891 std::vector<int32_t> dest_ids;
898 CHECK_EQ(dest_ids.size(), source_ids.size());
899 in_vals.reserve(dest_ids.size() + (has_nulls ? 1 : 0));
901 in_vals.push_back(needle_null_val);
903 for (
const int32_t dest_id : dest_ids) {
905 in_vals.push_back(dest_id);
908 throw std::runtime_error(
909 "Unable to handle 'expr IN (subquery)', subquery returned 30M+ rows.");
924 std::shared_ptr<Analyzer::Expr> arg,
925 const ResultSet& val_set)
const {
929 std::vector<int64_t> value_exprs;
931 std::vector<std::vector<int64_t>> expr_set(fetcher_count);
932 std::vector<std::future<void>> fetcher_threads;
933 const auto& arg_type = arg->get_type_info();
934 const auto entry_count = val_set.entryCount();
935 CHECK_EQ(
size_t(1), val_set.colCount());
936 const auto& col_type = val_set.getColType(0);
938 (col_type.get_comp_param() <= 0 || arg_type.get_comp_param() <= 0)) {
942 std::atomic<size_t> total_in_vals_count{0};
945 stride = (entry_count + fetcher_count - 1) / fetcher_count;
946 i < fetcher_count && start_entry < entry_count;
947 ++i, start_entry += stride) {
948 expr_set[i].reserve(entry_count / fetcher_count);
949 const auto end_entry = std::min(start_entry + stride, entry_count);
950 if (arg_type.is_string()) {
954 const auto& dest_dict_key = arg_type.getStringDictKey();
955 const auto& source_dict_key = col_type.getStringDictKey();
956 const auto dd =
executor_->getStringDictionaryProxy(
957 arg_type.getStringDictKey(), val_set.getRowSetMemOwner(),
true);
958 const auto sd =
executor_->getStringDictionaryProxy(
959 col_type.getStringDictKey(), val_set.getRowSetMemOwner(),
true);
963 col_expr->getColumnKey().db_id);
968 &total_in_vals_count,
974 catalog](std::vector<int64_t>& in_vals,
const size_t start,
const size_t end) {
982 catalog->getStringDictionaryHosts(),
983 {source_dict_key.db_id, source_dict_key.dict_id},
984 {dest_dict_key.db_id, dest_dict_key.dict_id},
997 std::ref(expr_set[i]),
1001 CHECK(arg_type.is_integer());
1004 [&val_set, &total_in_vals_count](
1005 std::vector<int64_t>& in_vals,
const size_t start,
const size_t end) {
1008 std::ref(expr_set[i]),
1013 for (
auto& child : fetcher_threads) {
1017 val_set.moveToBegin();
1018 value_exprs.reserve(entry_count);
1019 for (
auto& exprs : expr_set) {
1020 value_exprs.insert(value_exprs.end(), exprs.begin(), exprs.end());
1022 return makeExpr<Analyzer::InIntegerSet>(
1023 arg, value_exprs, arg_type.get_notnull() && col_type.get_notnull());
1029 if (rex_operator->
size() == 1) {
1033 if (sql_op ==
kIN) {
1038 if (date_plus_minus) {
1039 return date_plus_minus;
1051 for (
size_t i = 1; i < rex_operator->
size(); ++i) {
1052 std::shared_ptr<Analyzer::Expr> rhs;
1054 const auto rhs_op = rex_operator->
getOperand(i);
1074 const auto lhs_ti = lhs->get_type_info();
1075 if (lhs_ti.is_geometry()) {
1078 throw std::runtime_error(
1079 "Bounding Box Intersection equivalence is currently only supported for "
1080 "geospatial types");
1085 const RexCase* rex_case)
const {
1086 std::shared_ptr<Analyzer::Expr> else_expr;
1087 std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
1089 for (
size_t i = 0; i < rex_case->
branchCount(); ++i) {
1092 expr_list.emplace_back(when_expr, then_expr);
1102 const auto num_operands = rex_function->
size();
1105 std::vector<std::shared_ptr<Analyzer::Expr>> regressor_values;
1106 for (
size_t regressor_idx = 1; regressor_idx < num_operands; ++regressor_idx) {
1107 regressor_values.emplace_back(
1110 return makeExpr<Analyzer::MLPredictExpr>(model_value, regressor_values);
1115 const auto num_operands = rex_function->
size();
1118 std::vector<std::shared_ptr<Analyzer::Expr>> feature_values;
1119 for (
size_t feature_idx = 1; feature_idx < num_operands - 1; ++feature_idx) {
1120 feature_values.emplace_back(
1123 auto pc_dimension_value =
1125 return makeExpr<Analyzer::PCAProjectExpr>(
1126 model_value, feature_values, pc_dimension_value);
1136 if (!partition_count->get_type_info().is_integer()) {
1137 throw std::runtime_error(
1138 "PARTITION_COUNT expression of width_bucket function expects an integer type.");
1140 auto check_numeric_type =
1141 [](
const std::string& col_name,
const Analyzer::Expr* expr,
bool allow_null_type) {
1142 if (expr->get_type_info().get_type() ==
kNULLT) {
1143 if (!allow_null_type) {
1144 throw std::runtime_error(
1145 col_name +
" expression of width_bucket function expects non-null type.");
1149 if (!expr->get_type_info().is_number()) {
1150 throw std::runtime_error(
1151 col_name +
" expression of width_bucket function expects a numeric type.");
1155 check_numeric_type(
"TARGET_VALUE", target_value.get(),
true);
1156 check_numeric_type(
"LOWER_BOUND", lower_bound.get(),
false);
1157 check_numeric_type(
"UPPER_BOUND", upper_bound.get(),
false);
1159 auto cast_to_double_if_necessary = [](std::shared_ptr<Analyzer::Expr> arg) {
1160 const auto& arg_ti = arg->get_type_info();
1161 if (arg_ti.get_type() !=
kDOUBLE) {
1163 return arg->add_cast(double_ti);
1167 target_value = cast_to_double_if_necessary(target_value);
1168 lower_bound = cast_to_double_if_necessary(lower_bound);
1169 upper_bound = cast_to_double_if_necessary(upper_bound);
1170 return makeExpr<Analyzer::WidthBucketExpr>(
1176 CHECK(rex_function->
size() == 2 || rex_function->
size() == 3);
1179 if (!std::dynamic_pointer_cast<const Analyzer::Constant>(like)) {
1180 throw std::runtime_error(
"The matching pattern must be a literal.");
1182 const auto escape = (rex_function->
size() == 3)
1185 const bool is_ilike = rex_function->
getName() ==
"PG_ILIKE"sv;
1191 CHECK(rex_function->
size() == 2 || rex_function->
size() == 3);
1194 if (!std::dynamic_pointer_cast<const Analyzer::Constant>(pattern)) {
1195 throw std::runtime_error(
"The matching pattern must be a literal.");
1197 const auto escape = (rex_function->
size() == 3)
1207 return makeExpr<Analyzer::LikelihoodExpr>(arg, 0.9375);
1214 return makeExpr<Analyzer::LikelihoodExpr>(arg, 0.0625);
1220 const std::shared_ptr<Analyzer::Constant> literal_expr) {
1221 if (!literal_expr || literal_expr->get_is_null()) {
1222 throw std::runtime_error(
"The 'DatePart' argument must be a not 'null' literal.");
1235 const bool is_date_trunc = rex_function->
getName() ==
"PG_DATE_TRUNC"sv;
1236 if (is_date_trunc) {
1251 datum.tinyintval = val;
1255 datum.smallintval = val;
1263 datum.bigintval = val;
1272 datum.floatval = val;
1276 datum.doubleval = val;
1282 return makeExpr<Analyzer::Constant>(ti,
false, datum);
1294 const auto number_units_const =
1296 if (number_units_const && number_units_const->get_is_null()) {
1297 throw std::runtime_error(
"The 'Interval' argument literal must not be 'null'.");
1301 const auto& datetime_ti = datetime->get_type_info();
1302 if (datetime_ti.get_type() ==
kTIME) {
1303 throw std::runtime_error(
"DateAdd operation not supported for TIME.");
1306 const int dim = datetime_ti.get_dimension();
1307 return makeExpr<Analyzer::DateaddExpr>(
1315 return "DATETIME_PLUS"s;
1322 if (rex_operator->
size() != 2) {
1326 const auto datetime_ti = datetime->get_type_info();
1327 if (!datetime_ti.is_timestamp() && !datetime_ti.is_date()) {
1328 if (datetime_ti.get_type() ==
kTIME) {
1329 throw std::runtime_error(
"DateTime addition/subtraction not supported for TIME.");
1334 const auto rhs_ti = rhs->get_type_info();
1336 if (datetime_ti.is_high_precision_timestamp() ||
1337 rhs_ti.is_high_precision_timestamp()) {
1338 throw std::runtime_error(
1339 "High Precision timestamps are not supported for TIMESTAMPDIFF operation. "
1344 const auto& rex_operator_ti = rex_operator->
getType();
1345 const auto datediff_field =
1348 makeExpr<Analyzer::DatediffExpr>(bigint_ti, datediff_field, rhs, datetime);
1351 return makeExpr<Analyzer::BinOper>(bigint_ti.get_type(),
1362 std::vector<std::shared_ptr<Analyzer::Expr>>
args = {datetime, rhs};
1363 auto dt_plus = makeExpr<Analyzer::FunctionOper>(
1370 const auto interval =
fold_expr(rhs.get());
1371 auto interval_ti = interval->get_type_info();
1375 std::shared_ptr<Analyzer::Expr> interval_sec;
1379 (op ==
kMINUS ? -interval_lit->get_constval().bigintval
1380 : interval_lit->get_constval().bigintval) /
1383 interval_sec = makeExpr<Analyzer::BinOper>(bigint_ti.get_type(),
1390 std::make_shared<Analyzer::UOper>(bigint_ti,
false,
kUMINUS, interval_sec);
1393 return makeExpr<Analyzer::DateaddExpr>(datetime_ti,
daSECOND, interval_sec, datetime);
1396 const auto interval_months = op ==
kMINUS ? std::make_shared<Analyzer::UOper>(
1397 bigint_ti,
false,
kUMINUS, interval)
1399 return makeExpr<Analyzer::DateaddExpr>(datetime_ti,
daMONTH, interval_months, datetime);
1429 return makeExpr<Analyzer::CharLengthExpr>(str_arg->decompress(),
1430 rex_function->
getName() ==
"CHAR_LENGTH"sv);
1438 if (
nullptr == expr || !expr->get_type_info().is_string() ||
1439 expr->get_type_info().is_varlen()) {
1440 throw std::runtime_error(rex_function->
getName() +
1441 " expects a dictionary encoded text column.");
1445 throw std::runtime_error(
1447 " does not support unnest operator as its input expression.");
1449 return makeExpr<Analyzer::KeyForStringExpr>(
args[0]);
1456 const auto& arg_ti = arg->get_type_info();
1457 if (arg_ti.get_type() !=
kDOUBLE) {
1459 arg = arg->add_cast(double_ti);
1461 return makeExpr<Analyzer::SampleRatioExpr>(arg);
1466 std::string user{
"SESSIONLESS_USER"};
1468 user =
query_state_->getConstSessionInfo()->get_currentUser().userName;
1475 const auto func_name = rex_function->
getName();
1477 std::ostringstream oss;
1478 oss <<
"Function " << func_name <<
" not supported.";
1479 throw std::runtime_error(oss.str());
1484 switch (string_op_kind) {
1486 return makeExpr<Analyzer::LowerStringOper>(
args);
1488 return makeExpr<Analyzer::UpperStringOper>(
args);
1490 return makeExpr<Analyzer::InitCapStringOper>(
args);
1492 return makeExpr<Analyzer::ReverseStringOper>(
args);
1494 return makeExpr<Analyzer::RepeatStringOper>(
args);
1496 return makeExpr<Analyzer::ConcatStringOper>(
args);
1499 return makeExpr<Analyzer::PadStringOper>(string_op_kind,
args);
1504 return makeExpr<Analyzer::TrimStringOper>(string_op_kind,
args);
1507 return makeExpr<Analyzer::SubstringStringOper>(
args);
1509 return makeExpr<Analyzer::OverlayStringOper>(
args);
1511 return makeExpr<Analyzer::ReplaceStringOper>(
args);
1513 return makeExpr<Analyzer::SplitPartStringOper>(
args);
1515 return makeExpr<Analyzer::RegexpReplaceStringOper>(
args);
1517 return makeExpr<Analyzer::RegexpSubstrStringOper>(
args);
1519 return makeExpr<Analyzer::JsonValueStringOper>(
args);
1521 return makeExpr<Analyzer::Base64EncodeStringOper>(
args);
1523 return makeExpr<Analyzer::Base64DecodeStringOper>(
args);
1525 return makeExpr<Analyzer::TryStringCastOper>(rex_function->
getType(),
args);
1527 return makeExpr<Analyzer::PositionStringOper>(
args);
1529 return makeExpr<Analyzer::JarowinklerSimilarityStringOper>(
args);
1531 return makeExpr<Analyzer::LevenshteinDistanceStringOper>(
args);
1533 throw std::runtime_error(
"Unsupported string function.");
1540 const auto ret_ti = rex_function->
getType();
1542 const auto arg_ti = arg->get_type_info();
1543 if (!arg_ti.is_array()) {
1544 throw std::runtime_error(rex_function->
getName() +
" expects an array expression.");
1546 if (arg_ti.get_subtype() ==
kARRAY) {
1547 throw std::runtime_error(rex_function->
getName() +
1548 " expects one-dimension array expression.");
1550 const auto array_size = arg_ti.get_size();
1551 const auto array_elem_size = arg_ti.get_elem_type().get_array_context_logical_size();
1553 if (array_size > 0) {
1554 if (array_elem_size <= 0) {
1555 throw std::runtime_error(rex_function->
getName() +
1556 ": unexpected array element type.");
1562 return makeExpr<Analyzer::CardinalityExpr>(arg);
1570 return makeExpr<Analyzer::BinOper>(
1571 base->get_type_info().get_elem_type(),
false,
kARRAY_AT,
kONE, base, index);
1575 constexpr
bool is_null =
false;
1578 return makeExpr<Analyzer::Constant>(
kDATE,
is_null, datum);
1582 constexpr
bool is_null =
false;
1585 return makeExpr<Analyzer::Constant>(
kTIME,
is_null, datum);
1597 const std::string datetime_err{R
"(Only DATETIME('NOW') supported for now.)"};
1598 if (!arg_lit || arg_lit->get_is_null()) {
1599 throw std::runtime_error(datetime_err);
1601 CHECK(arg_lit->get_type_info().is_string());
1602 if (*arg_lit->get_constval().stringval !=
"NOW"sv) {
1603 throw std::runtime_error(datetime_err);
1610 std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
1614 const auto& operand_ti = operand->get_type_info();
1615 CHECK(operand_ti.is_number());
1617 const auto lt_zero = makeExpr<Analyzer::BinOper>(
kBOOLEAN,
kLT,
kONE, operand, zero);
1618 const auto uminus_operand =
1619 makeExpr<Analyzer::UOper>(operand_ti.get_type(),
kUMINUS, operand);
1620 expr_list.emplace_back(lt_zero, uminus_operand);
1621 return makeExpr<Analyzer::CaseExpr>(operand_ti,
false, expr_list, operand);
1626 std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
1630 const auto& operand_ti = operand->get_type_info();
1631 CHECK(operand_ti.is_number());
1633 const auto lt_zero = makeExpr<Analyzer::BinOper>(
kBOOLEAN,
kLT,
kONE, operand, zero);
1635 const auto eq_zero = makeExpr<Analyzer::BinOper>(
kBOOLEAN,
kEQ,
kONE, operand, zero);
1637 const auto gt_zero = makeExpr<Analyzer::BinOper>(
kBOOLEAN,
kGT,
kONE, operand, zero);
1639 return makeExpr<Analyzer::CaseExpr>(
1643 makeExpr<Analyzer::Constant>(operand_ti,
true,
Datum{0}));
1647 return makeExpr<Analyzer::OffsetInFragment>();
1653 auto sql_type = rex_function->
getType();
1658 if (translated_function_args.size() > 0) {
1659 const auto first_element_logical_type =
1662 auto diff_elem_itr =
1663 std::find_if(translated_function_args.begin(),
1664 translated_function_args.end(),
1665 [first_element_logical_type](
const auto expr) {
1666 const auto element_logical_type =
1668 if (first_element_logical_type != element_logical_type) {
1669 if (first_element_logical_type.is_none_encoded_string() &&
1670 element_logical_type.is_none_encoded_string()) {
1677 if (diff_elem_itr != translated_function_args.end()) {
1678 throw std::runtime_error(
1680 std::to_string(diff_elem_itr - translated_function_args.begin()) +
1681 " is not of the same type as other elements of the array. Consider casting "
1682 "to force this condition.\nElement Type: " +
1685 "\nArray type: " + first_element_logical_type.to_string());
1688 if (first_element_logical_type.is_string()) {
1689 sql_type.set_subtype(
kTEXT);
1691 if (first_element_logical_type.is_none_encoded_string()) {
1695 CHECK(first_element_logical_type.is_dict_encoded_string());
1696 sql_type.set_comp_param(first_element_logical_type.get_comp_param());
1697 sql_type.setStringDictKey(first_element_logical_type.getStringDictKey());
1699 }
else if (first_element_logical_type.is_dict_encoded_string()) {
1700 sql_type.set_subtype(
kTEXT);
1702 sql_type.set_comp_param(first_element_logical_type.get_comp_param());
1703 sql_type.setStringDictKey(first_element_logical_type.getStringDictKey());
1705 sql_type.set_subtype(first_element_logical_type.get_type());
1706 sql_type.set_scale(first_element_logical_type.get_scale());
1707 sql_type.set_precision(first_element_logical_type.get_precision());
1710 return makeExpr<Analyzer::ArrayExpr>(sql_type, translated_function_args);
1714 return makeExpr<Analyzer::ArrayExpr>(sql_type, translated_function_args);
1717 return makeExpr<Analyzer::ArrayExpr>(rex_function->
getType(),
1727 if (rex_function->
getName() ==
"REGEXP_LIKE"sv) {
1730 if (rex_function->
getName() ==
"LIKELY"sv) {
1733 if (rex_function->
getName() ==
"UNLIKELY"sv) {
1739 if (rex_function->
getName() ==
"DATEADD"sv) {
1742 if (rex_function->
getName() ==
"DATEDIFF"sv) {
1745 if (rex_function->
getName() ==
"DATEPART"sv) {
1751 if (rex_function->
getName() ==
"KEY_FOR_STRING"sv) {
1754 if (rex_function->
getName() ==
"WIDTH_BUCKET"sv) {
1757 if (rex_function->
getName() ==
"SAMPLE_RATIO"sv) {
1760 if (rex_function->
getName() ==
"CURRENT_USER"sv) {
1763 if (rex_function->
getName() ==
"ML_PREDICT"sv) {
1766 if (rex_function->
getName() ==
"PCA_PROJECT"sv) {
1793 "JAROWINKLER_SIMILARITY"sv,
1794 "LEVENSHTEIN_DISTANCE"sv)) {
1800 if (rex_function->
getName() ==
"ITEM"sv) {
1803 if (rex_function->
getName() ==
"CURRENT_DATE"sv) {
1806 if (rex_function->
getName() ==
"CURRENT_TIME"sv) {
1809 if (rex_function->
getName() ==
"CURRENT_TIMESTAMP"sv) {
1812 if (rex_function->
getName() ==
"NOW"sv) {
1815 if (rex_function->
getName() ==
"DATETIME"sv) {
1821 if (rex_function->
getName() ==
"ABS"sv) {
1824 if (rex_function->
getName() ==
"SIGN"sv) {
1828 return makeExpr<Analyzer::FunctionOperWithCustomTypeHandling>(
1832 }
else if (rex_function->
getName() ==
"ROUND"sv) {
1833 std::vector<std::shared_ptr<Analyzer::Expr>>
args =
1836 if (rex_function->
size() == 1) {
1844 args.push_back(makeExpr<Analyzer::Constant>(t,
false, d));
1848 CHECK(args.size() == 2);
1850 if (!args[0]->get_type_info().is_number()) {
1851 throw std::runtime_error(
"Only numeric 1st operands are supported");
1856 if (!args[1]->get_type_info().is_integer()) {
1857 throw std::runtime_error(
"Only integer 2nd operands are supported");
1864 ? args[0]->get_type_info()
1867 return makeExpr<Analyzer::FunctionOperWithCustomTypeHandling>(
1870 if (rex_function->
getName() ==
"DATETIME_PLUS"sv) {
1871 auto dt_plus = makeExpr<Analyzer::FunctionOper>(rex_function->
getType(),
1880 if (rex_function->
getName() ==
"/INT"sv) {
1887 if (rex_function->
getName() ==
"Reinterpret"sv) {
1899 "ST_NumGeometries"sv,
1905 "HeavyDB_Geo_PolyBoundsPtr"sv)) {
1915 "convert_meters_to_pixel_width"sv,
1916 "convert_meters_to_pixel_height"sv,
1917 "is_point_in_view"sv,
1918 "is_point_size_in_view"sv)) {
1927 "ST_IntersectsBox"sv,
1928 "ST_Approx_Overlaps"sv,
1937 if (rex_function->
getName() ==
"OFFSET_IN_FRAGMENT"sv) {
1941 if (rex_function->
getName() ==
"ARRAY"sv) {
1946 "ST_GeomFromText"sv,
1947 "ST_GeogFromText"sv,
1954 "ST_Transform"sv)) {
1959 "ST_Intersection"sv,
1963 "ST_ConcaveHull"sv)) {
1977 return distance_check;
1984 if (rex_function->
getName() == std::string(
"||") ||
1985 rex_function->
getName() == std::string(
"SUBSTRING")) {
1987 return makeExpr<Analyzer::FunctionOper>(
1988 ret_ti, rex_function->
getName(), arg_expr_list);
1999 auto ext_func_args = ext_func_sig.getInputArgs();
2000 CHECK_LE(arg_expr_list.size(), ext_func_args.size());
2001 for (
size_t i = 0, di = 0; i < arg_expr_list.size(); i++) {
2002 CHECK_LT(i + di, ext_func_args.size());
2003 auto ext_func_arg = ext_func_args[i + di];
2017 std::dynamic_pointer_cast<Analyzer::Constant>(arg_expr_list[i])) {
2019 if (ext_func_arg_ti != arg_expr_list[i]->get_type_info()) {
2020 arg_expr_list[i] = constant->add_cast(ext_func_arg_ti);
2027 LOG(
WARNING) <<
"RelAlgTranslator::translateFunction: " << e.what();
2033 bool arguments_not_null =
true;
2034 for (
const auto& arg_expr : arg_expr_list) {
2035 if (!arg_expr->get_type_info().get_notnull()) {
2036 arguments_not_null =
false;
2042 return makeExpr<Analyzer::FunctionOper>(ret_ti, rex_function->
getName(), arg_expr_list);
2048 const std::vector<SortField>& sort_fields) {
2049 std::vector<Analyzer::OrderEntry> collation;
2050 for (
size_t i = 0; i < sort_fields.size(); ++i) {
2051 const auto& sort_field = sort_fields[i];
2052 collation.emplace_back(i,
2090 if (time_unit_val == 1) {
2092 }
else if (time_unit_val == 12) {
2103 std::vector<std::shared_ptr<Analyzer::Expr>>
args;
2104 for (
size_t i = 0; i < rex_window_function->
size(); ++i) {
2107 std::vector<std::shared_ptr<Analyzer::Expr>> partition_keys;
2108 for (
const auto& partition_key : rex_window_function->
getPartitionKeys()) {
2111 std::vector<std::shared_ptr<Analyzer::Expr>> order_keys;
2112 for (
const auto& order_key : rex_window_function->
getOrderKeys()) {
2115 std::vector<Analyzer::OrderEntry> collation =
2118 auto ti = rex_window_function->
getType();
2119 auto window_func_kind = rex_window_function->
getKind();
2122 ti = args.front()->get_type_info();
2124 auto determine_frame_bound_type =
2126 if (bound.unbounded) {
2127 CHECK(!bound.bound_expr && !bound.is_current_row);
2128 if (bound.following) {
2130 }
else if (bound.preceding) {
2134 if (bound.is_current_row) {
2135 CHECK(!bound.unbounded && !bound.bound_expr);
2138 CHECK(!bound.unbounded && bound.bound_expr);
2139 if (bound.following) {
2141 }
else if (bound.preceding) {
2148 auto is_negative_framing_bound =
2149 [](
const SQLTypes t,
const Datum& d,
bool is_time_unit =
false) {
2152 return d.tinyintval < 0;
2154 return d.smallintval < 0;
2156 return d.intval < 0;
2161 CHECK(is_time_unit);
2162 return d.doubleval < 0;
2167 return d.bigintval < 0;
2169 throw std::runtime_error(
2170 "We currently only support integer-type literal expression as a window "
2171 "frame bound expression");
2176 bool negative_constant =
false;
2177 bool detect_invalid_frame_start_bound_expr =
false;
2178 bool detect_invalid_frame_end_bound_expr =
false;
2181 bool has_end_bound_frame_expr =
false;
2182 std::shared_ptr<Analyzer::Expr> frame_start_bound_expr;
2184 determine_frame_bound_type(frame_start_bound);
2185 std::shared_ptr<Analyzer::Expr> frame_end_bound_expr;
2187 determine_frame_bound_type(frame_end_bound);
2188 bool has_framing_clause =
2190 auto frame_mode = rex_window_function->
isRows()
2193 if (order_keys.empty()) {
2200 has_framing_clause =
false;
2203 auto translate_frame_bound_expr = [&](
const RexScalar* bound_expr) {
2204 std::shared_ptr<Analyzer::Expr> translated_expr;
2205 const auto rex_oper =
dynamic_cast<const RexOperator*
>(bound_expr);
2206 if (rex_oper && rex_oper->getType().is_timeinterval()) {
2208 const auto bin_oper =
2210 auto time_literal_expr =
2212 CHECK(time_literal_expr);
2214 is_negative_framing_bound(time_literal_expr->get_type_info().get_type(),
2215 time_literal_expr->get_constval(),
2217 return std::make_pair(
false, translated_expr);
2219 if (dynamic_cast<const RexLiteral*>(bound_expr)) {
2221 if (
auto literal_expr =
2222 dynamic_cast<const Analyzer::Constant*>(translated_expr.get())) {
2223 negative_constant = is_negative_framing_bound(
2224 literal_expr->get_type_info().get_type(), literal_expr->get_constval());
2225 return std::make_pair(
false, translated_expr);
2228 return std::make_pair(
true, translated_expr);
2231 if (frame_start_bound.bound_expr) {
2232 std::tie(detect_invalid_frame_start_bound_expr, frame_start_bound_expr) =
2233 translate_frame_bound_expr(frame_start_bound.bound_expr.get());
2236 if (frame_end_bound.bound_expr) {
2237 std::tie(detect_invalid_frame_end_bound_expr, frame_end_bound_expr) =
2238 translate_frame_bound_expr(frame_end_bound.bound_expr.get());
2242 if (detect_invalid_frame_start_bound_expr || detect_invalid_frame_end_bound_expr) {
2243 throw std::runtime_error(
2244 "We currently only support literal expression as a window frame bound "
2250 if (negative_constant) {
2251 throw std::runtime_error(
2252 "A constant expression for window framing should have nonnegative value.");
2255 auto handle_time_interval_expr_if_necessary = [&](
const Analyzer::Expr* bound_expr,
2257 bool for_start_bound) {
2265 if (for_start_bound) {
2266 frame_start_bound_expr = translated_expr;
2268 frame_end_bound_expr = translated_expr;
2272 handle_time_interval_expr_if_necessary(
2273 frame_start_bound_expr.get(), frame_start_bound_type,
true);
2274 handle_time_interval_expr_if_necessary(
2275 frame_end_bound_expr.get(), frame_end_bound_type,
false);
2278 if (frame_start_bound.following) {
2279 if (frame_end_bound.is_current_row) {
2280 throw std::runtime_error(
2281 "Window framing starting from following row cannot end with current row.");
2282 }
else if (has_end_bound_frame_expr && frame_end_bound.preceding) {
2283 throw std::runtime_error(
2284 "Window framing starting from following row cannot have preceding rows.");
2287 if (frame_start_bound.is_current_row && frame_end_bound.preceding &&
2288 !frame_end_bound.unbounded && has_end_bound_frame_expr) {
2289 throw std::runtime_error(
2290 "Window framing starting from current row cannot have preceding rows.");
2292 if (has_framing_clause) {
2294 if (order_keys.size() != 1) {
2295 throw std::runtime_error(
2296 "Window framing with range mode requires a single order-by column");
2298 if (!frame_start_bound_expr &&
2300 !frame_end_bound_expr &&
2302 has_framing_clause =
false;
2303 VLOG(1) <<
"Ignore range framing mode with a frame bound between "
2304 "UNBOUNDED_PRECEDING and CURRENT_ROW";
2307 bool (*)(
const Analyzer::ColumnVar*,
const Analyzer::ColumnVar*)>
2310 for (
auto cv : colvar_set) {
2311 if (!(cv->get_type_info().is_integer() || cv->get_type_info().is_fp() ||
2312 cv->get_type_info().is_time())) {
2313 has_framing_clause =
false;
2314 VLOG(1) <<
"Range framing mode with non-number type ordering column is not "
2315 "supported yet, skip window framing";
2320 auto const func_name =
::toString(window_func_kind);
2321 auto const num_args = args.size();
2322 bool need_order_by_clause =
false;
2323 bool need_frame_def =
false;
2324 switch (window_func_kind) {
2327 need_order_by_clause =
true;
2328 need_frame_def =
true;
2329 if (num_args != 2) {
2330 throw std::runtime_error(func_name +
" has an invalid number of input arguments");
2334 args.push_back(makeExpr<Analyzer::Constant>(
kINT,
false, d));
2335 const auto target_expr_cv =
2337 if (!target_expr_cv) {
2338 throw std::runtime_error(
"Currently, " + func_name +
2339 " only allows a column reference as its first argument");
2341 const auto target_ti = target_expr_cv->get_type_info();
2342 if (target_ti.is_dict_encoded_string()) {
2346 ti.set_comp_param(target_expr_cv->get_type_info().get_comp_param());
2347 ti.setStringDictKey(target_expr_cv->get_type_info().getStringDictKey());
2348 ti.set_fixed_size();
2350 const auto target_offset_cv =
2352 if (!target_expr_cv ||
2353 is_negative_framing_bound(target_offset_cv->get_type_info().get_type(),
2354 target_offset_cv->get_constval())) {
2355 throw std::runtime_error(
2356 "Currently, " + func_name +
2357 " only allows non-negative constant as its second argument");
2363 if (num_args != 1) {
2364 throw std::runtime_error(func_name +
" has an invalid number of input arguments");
2366 need_order_by_clause =
true;
2367 need_frame_def =
true;
2371 if (has_framing_clause) {
2372 throw std::runtime_error(func_name +
" does not support window framing clause");
2374 auto const input_expr_ti = args.front()->get_type_info();
2375 if (input_expr_ti.is_string()) {
2376 throw std::runtime_error(func_name +
" not supported on " +
2377 input_expr_ti.get_type_name() +
" type yet");
2379 need_order_by_clause =
true;
2380 std::string
const arg_str{args.front()->toString()};
2381 bool needs_inject_input_arg_ordering =
2384 [&arg_str](std::shared_ptr<Analyzer::Expr>
const& expr) {
2385 return boost::equals(arg_str, expr->toString());
2387 if (needs_inject_input_arg_ordering) {
2388 VLOG(1) <<
"Inject " << args.front()->toString() <<
" as ordering column of the "
2389 << func_name <<
" function";
2390 order_keys.push_back(args.front());
2393 collation.emplace_back(collation.size() + 1,
2402 if (num_args != 2) {
2403 throw std::runtime_error(func_name +
" has an invalid number of input arguments");
2407 ti.set_notnull(
false);
2409 need_order_by_clause =
true;
2410 need_frame_def =
true;
2413 throw std::runtime_error(func_name +
2414 " must have a positional argument expression.");
2416 bool has_valid_arg =
false;
2417 if (args[1]->get_type_info().is_integer()) {
2418 if (
auto* n_value_ptr = dynamic_cast<Analyzer::Constant*>(args[1].
get())) {
2419 if (0 < n_value_ptr->get_constval().intval) {
2422 auto d = n_value_ptr->get_constval();
2424 n_value_ptr->set_constval(d);
2425 has_valid_arg =
true;
2429 if (!has_valid_arg) {
2430 throw std::runtime_error(
"The positional argument of the " + func_name +
2431 " must be a positive integer constant.");
2436 if (order_keys.empty()) {
2437 throw std::runtime_error(
2439 " requires an ORDER BY sub-clause within the window clause");
2441 if (has_framing_clause) {
2444 <<
" must use a pre-defined window frame range (e.g., ROWS BETWEEN "
2445 "UNBOUNDED PRECEDING AND CURRENT ROW). "
2446 "Thus, we skip the user-defined window frame for this window function";
2448 has_framing_clause =
true;
2455 if (need_order_by_clause && order_keys.empty()) {
2456 throw std::runtime_error(func_name +
" requires an ORDER BY clause");
2458 if (need_frame_def && !has_framing_clause) {
2459 throw std::runtime_error(func_name +
" requires window frame definition");
2461 if (!has_framing_clause) {
2464 frame_start_bound_expr =
nullptr;
2465 frame_end_bound_expr =
nullptr;
2471 return makeExpr<Analyzer::WindowFunction>(
2473 rex_window_function->
getKind(),
2478 makeExpr<Analyzer::WindowFrame>(frame_start_bound_type, frame_start_bound_expr),
2479 makeExpr<Analyzer::WindowFrame>(frame_end_bound_type, frame_end_bound_expr),
2484 std::shared_ptr<Analyzer::Expr> order_key,
2485 bool for_preceding_bound,
2490 const auto order_key_ti = order_key->get_type_info();
2491 const auto frame_bound_ti = frame_bound_expr->
get_type_info();
2492 const auto time_val_expr =
2494 const auto time_unit_val_expr =
2498 bool invalid_time_unit_type =
false;
2499 bool invalid_frame_bound_expr_type =
false;
2501 auto prepare_time_value_datum = [&d,
2502 &invalid_frame_bound_expr_type,
2504 &for_preceding_bound](
bool is_timestamp_second) {
2511 switch (time_val_expr->get_type_info().get_type()) {
2513 d.
bigintval = time_val_expr->get_constval().tinyintval;
2517 d.
bigintval = time_val_expr->get_constval().smallintval;
2521 d.
bigintval = time_val_expr->get_constval().intval;
2525 d.
bigintval = time_val_expr->get_constval().bigintval;
2530 if (!is_timestamp_second) {
2532 invalid_frame_bound_expr_type =
true;
2535 d.
bigintval = time_val_expr->get_constval().bigintval;
2539 if (!is_timestamp_second) {
2541 invalid_frame_bound_expr_type =
true;
2544 d.
bigintval = time_val_expr->get_constval().doubleval *
2545 pow(10, time_val_expr->get_type_info().get_scale());
2549 invalid_frame_bound_expr_type =
true;
2553 if (for_preceding_bound) {
2558 switch (order_key_ti.get_type()) {
2560 if (time_val_expr->get_type_info().is_integer()) {
2563 frame_bound_ti.get_type(), time_unit_val_expr);
2564 switch (time_val_expr->get_type_info().get_type()) {
2566 d.
bigintval = time_val_expr->get_constval().tinyintval * time_multiplier;
2570 d.
bigintval = time_val_expr->get_constval().smallintval * time_multiplier;
2574 d.
bigintval = time_val_expr->get_constval().intval * time_multiplier;
2578 d.
bigintval = time_val_expr->get_constval().bigintval * time_multiplier;
2587 invalid_frame_bound_expr_type =
true;
2590 invalid_time_unit_type =
true;
2592 if (invalid_frame_bound_expr_type) {
2593 throw std::runtime_error(
2594 "Invalid time unit is used to define window frame bound expression for " +
2595 order_key_ti.get_type_name() +
" type");
2596 }
else if (invalid_time_unit_type) {
2597 throw std::runtime_error(
2598 "Window frame bound expression has an invalid type for " +
2599 order_key_ti.get_type_name() +
" type");
2601 return std::make_shared<Analyzer::Constant>(
kBIGINT,
false, d);
2605 if (time_val_expr->get_type_info().is_integer()) {
2606 switch (time_unit) {
2620 invalid_frame_bound_expr_type =
true;
2625 invalid_time_unit_type =
true;
2627 if (invalid_frame_bound_expr_type) {
2628 throw std::runtime_error(
2629 "Invalid time unit is used to define window frame bound expression for " +
2630 order_key_ti.get_type_name() +
" type");
2631 }
else if (invalid_time_unit_type) {
2632 throw std::runtime_error(
2633 "Window frame bound expression has an invalid type for " +
2634 order_key_ti.get_type_name() +
" type");
2637 prepare_time_value_datum(
false);
2638 const auto cast_number_units = makeExpr<Analyzer::Constant>(
kBIGINT,
false, d);
2639 const int dim = order_key_ti.get_dimension();
2640 return makeExpr<Analyzer::DateaddExpr>(
2645 switch (time_unit) {
2647 switch (time_val_expr->get_type_info().get_scale()) {
2668 prepare_time_value_datum(
true);
2673 prepare_time_value_datum(
false);
2678 prepare_time_value_datum(
false);
2683 prepare_time_value_datum(
false);
2688 prepare_time_value_datum(
false);
2693 prepare_time_value_datum(
false);
2697 invalid_time_unit_type =
true;
2701 if (!invalid_time_unit_type) {
2703 const auto cast_number_units = makeExpr<Analyzer::Constant>(
kBIGINT,
false, d);
2704 const int dim = order_key_ti.get_dimension();
2717 if (invalid_frame_bound_expr_type) {
2718 throw std::runtime_error(
2719 "Invalid time unit is used to define window frame bound expression for " +
2720 order_key_ti.get_type_name() +
" type");
2721 }
else if (invalid_time_unit_type) {
2722 throw std::runtime_error(
"Window frame bound expression has an invalid type for " +
2723 order_key_ti.get_type_name() +
" type");
2730 std::vector<std::shared_ptr<Analyzer::Expr>>
args;
2731 for (
size_t i = 0; i < rex_function->
size(); ++i) {
2738 const std::shared_ptr<Analyzer::Expr> qual_expr) {
2742 const auto rewritten_qual_expr =
rewrite_expr(qual_expr.get());
2743 return {{}, {rewritten_qual_expr ? rewritten_qual_expr : qual_expr}};
2746 if (bin_oper->get_optype() ==
kAND) {
2750 simple_quals.insert(
2751 simple_quals.end(), rhs_cf.simple_quals.begin(), rhs_cf.simple_quals.end());
2752 auto quals = lhs_cf.quals;
2753 quals.insert(quals.end(), rhs_cf.quals.begin(), rhs_cf.quals.end());
2754 return {simple_quals, quals};
2757 const auto simple_qual = bin_oper->normalize_simple_predicate(rte_idx);
2763 const std::shared_ptr<Analyzer::Expr>& qual_expr) {
2765 const auto bin_oper = std::dynamic_pointer_cast<
const Analyzer::BinOper>(qual_expr);
2767 const auto rewritten_qual_expr =
rewrite_expr(qual_expr.get());
2768 return {rewritten_qual_expr ? rewritten_qual_expr : qual_expr};
2770 if (bin_oper->get_optype() ==
kOR) {
2773 auto quals = lhs_df;
2774 quals.insert(quals.end(), rhs_df.begin(), rhs_df.end());
2790 const auto& operand_ti = operand->get_type_info();
2791 const auto& target_ti = rex_function->
getType();
2792 if (!operand_ti.is_string()) {
2793 throw std::runtime_error(
2794 "High precision timestamp cast argument must be a string. Input type is: " +
2795 operand_ti.get_type_name());
2796 }
else if (!target_ti.is_high_precision_timestamp()) {
2797 throw std::runtime_error(
2798 "Cast target type should be high precision timestamp. Input type is: " +
2799 target_ti.get_type_name());
2800 }
else if (target_ti.get_dimension() != 6 && target_ti.get_dimension() != 9) {
2801 throw std::runtime_error(
2802 "Cast target type should be TIMESTAMP(6|9). Input type is: TIMESTAMP(" +
2805 return operand->add_cast(target_ti);
DEVICE auto upper_bound(ARGS &&...args)
Defines data structures for the semantic analysis phase of query processing.
const RexScalar * getThen(const size_t idx) const
const std::vector< JoinType > join_types_
HOST DEVICE SQLTypes get_subtype() const
void set_compression(EncodingType c)
static std::shared_ptr< Analyzer::Expr > normalize(const std::list< std::pair< std::shared_ptr< Analyzer::Expr >, std::shared_ptr< Analyzer::Expr >>> &, const std::shared_ptr< Analyzer::Expr >, const Executor *executor=nullptr)
std::shared_ptr< Analyzer::Expr > translateOffsetInFragment() const
SqlStringOpKind name_to_string_op_kind(const std::string &func_name)
static std::shared_ptr< Analyzer::Expr > get(const std::string &)
std::shared_ptr< Analyzer::Expr > translateCurrentTimestamp() const
std::shared_ptr< Analyzer::Expr > translateBinaryGeoPredicate(const RexFunctionOperator *, SQLTypeInfo &, const bool with_bounds) const
std::shared_ptr< Analyzer::Expr > translateRegexp(const RexFunctionOperator *) const
static bool colvar_comp(const ColumnVar *l, const ColumnVar *r)
const size_t g_max_integer_set_size
size_t getOperand(size_t idx) const
const Executor * executor_
std::shared_ptr< Analyzer::Expr > translateUnlikely(const RexFunctionOperator *) const
const RexScalar * getElse() const
void collect_column_var(std::set< const ColumnVar *, bool(*)(const ColumnVar *, const ColumnVar *)> &colvar_set, bool include_agg) const override
std::shared_ptr< Analyzer::Expr >(RelAlgTranslator::*)(RexScalar const *) const Handler
static std::shared_ptr< Analyzer::Expr > analyzeValue(const int64_t intval)
std::shared_ptr< Analyzer::Expr > translateFunction(const RexFunctionOperator *) const
SQLTypeInfo get_nullable_logical_type_info(const SQLTypeInfo &type_info)
std::shared_ptr< Analyzer::Expr > translateScalarRex(const RexScalar *rex) const
const SQLTypeInfo & getType() const
const RexScalar * getOperand(const size_t idx) const
std::shared_ptr< Analyzer::Expr > translateUoper(const RexOperator *) const
HOST DEVICE int get_scale() const
const Expr * get_right_operand() const
const std::vector< SortField > & getCollation() const
std::shared_ptr< Analyzer::Expr > translateDateadd(const RexFunctionOperator *) const
static bool isFramingAvailableWindowFunc(SqlWindowFunctionKind kind)
static std::shared_ptr< Analyzer::Expr > normalize(const SQLOps optype, const SQLQualifier qual, std::shared_ptr< Analyzer::Expr > left_expr, std::shared_ptr< Analyzer::Expr > right_expr, const Executor *executor=nullptr)
std::shared_ptr< Analyzer::Expr > translateIntervalExprForWindowFraming(std::shared_ptr< Analyzer::Expr > order_key, bool for_preceding_bound, const Analyzer::BinOper *frame_bound_expr) const
std::shared_ptr< Analyzer::Expr > translateAbs(const RexFunctionOperator *) const
const RexScalar * getWhen(const size_t idx) const
std::shared_ptr< Analyzer::Expr > ExpressionPtr
std::string getString(int32_t string_id) const
std::shared_ptr< Analyzer::Expr > getInIntegerSetExpr(std::shared_ptr< Analyzer::Expr > arg, const ResultSet &val_set) const
SQLTypeInfo get_agg_type(const SQLAgg agg_kind, const Analyzer::Expr *arg_expr)
std::shared_ptr< Analyzer::Expr > translateItem(const RexFunctionOperator *) const
std::string toString(const QueryDescriptionType &type)
std::shared_ptr< Analyzer::Constant > makeNumericConstant(const SQLTypeInfo &ti, const long val)
Analyzer::ExpressionPtr rewrite_expr(const Analyzer::Expr *expr)
std::type_index const type_index_
HOST DEVICE SQLTypes get_type() const
bool operator()(IndexedHandler const &pair) const
QualsConjunctiveForm qual_to_conjunctive_form(const std::shared_ptr< Analyzer::Expr > qual_expr)
#define TRANSIENT_DICT_ID
bool is_agg_supported_for_type(const SQLAgg &agg_kind, const SQLTypeInfo &arg_ti)
std::shared_ptr< Analyzer::Expr > translateGeoProjection(const RexFunctionOperator *, SQLTypeInfo &, const bool with_bounds) const
double inline_fp_null_val(const SQL_TYPE_INFO &ti)
std::shared_ptr< Analyzer::Expr > translateOper(const RexOperator *) const
std::shared_ptr< Analyzer::Expr > translateDatediff(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Expr > translateInput(const RexInput *) const
std::shared_ptr< Analyzer::Expr > translateSign(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Expr > translateUnaryGeoFunction(const RexFunctionOperator *) const
bool g_enable_string_functions
std::shared_ptr< Analyzer::Expr > translateBoundingBoxIntersectOper(const RexOperator *) const
ExtractField to_datepart_field(const std::string &field)
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
robin_hood::unordered_map< RexScalar const *, std::shared_ptr< Analyzer::Expr > > cache_
Supported runtime functions management and retrieval.
future< Result > async(Fn &&fn, Args &&...args)
static SysCatalog & instance()
static std::shared_ptr< Analyzer::Expr > translateLiteral(const RexLiteral *)
SQLOps getOperator() const
bool window_function_is_value(const SqlWindowFunctionKind kind)
static constexpr int32_t INVALID_STR_ID
CONSTEXPR DEVICE bool is_null(const T &value)
Classes representing a parse tree.
std::shared_ptr< Analyzer::Expr > translateDatetime(const RexFunctionOperator *) const
void fill_dictionary_encoded_in_vals(std::vector< int64_t > &in_vals, std::atomic< size_t > &total_in_vals_count, const ResultSet *values_rowset, const std::pair< int64_t, int64_t > values_rowset_slice, const StringDictionaryProxy *source_dict, const StringDictionaryProxy *dest_dict, const int64_t needle_null_val)
std::shared_ptr< Analyzer::Expr > translateStringOper(const RexFunctionOperator *) const
static std::shared_ptr< Analyzer::Expr > get(std::shared_ptr< Analyzer::Expr > arg_expr, std::shared_ptr< Analyzer::Expr > pattern_expr, std::shared_ptr< Analyzer::Expr > escape_expr, const bool is_not)
size_t determineTimeValMultiplierForTimeType(const SQLTypes &window_frame_bound_type, const Analyzer::Constant *const_expr)
bool is_timeinterval() const
std::pair< std::shared_ptr< Analyzer::Expr >, SQLQualifier > getQuantifiedRhs(const RexScalar *) const
std::vector< Analyzer::OrderEntry > translate_collation(const std::vector< SortField > &sort_fields)
size_t branchCount() const
std::shared_ptr< Analyzer::Expr > translateCurrentTime() const
bool is_distinct_supported(SQLAgg const agg_kind)
SQLTypeInfo build_type_info(const SQLTypes sql_type, const int scale, const int precision)
DatetruncField to_datediff_field(const std::string &field)
std::array< IndexedHandler, sizeof...(Ts)> makeHandlers()
const RexWindowBound & getFrameEndBound() const
std::shared_ptr< Analyzer::Expr > translate(const RexScalar *rex) const
std::tuple< T, std::vector< SQLTypeInfo > > bind_function(std::string name, Analyzer::ExpressionPtrVector func_args, const std::vector< T > &ext_funcs, const std::string processor)
Argument type based extension function binding.
const SQLTypeInfo & get_type_info() const
ByTypeIndex(std::type_info const &type_info)
std::shared_ptr< Analyzer::Expr > translatePCAProject(const RexFunctionOperator *) const
const std::unordered_map< const RelAlgNode *, int > input_to_nest_level_
void translate_string_ids(std::vector< int32_t > &dest_ids, const LeafHostInfo &dict_server_host, const shared::StringDictKey &dest_dict_key, const std::vector< int32_t > &source_ids, const shared::StringDictKey &source_dict_key, const int32_t dest_generation)
void set_comp_param(int p)
std::shared_ptr< Catalog > getCatalog(const std::string &dbName)
Analyzer::ExpressionPtrVector translateFunctionArgs(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Expr > translateUnaryGeoPredicate(const RexFunctionOperator *, SQLTypeInfo &, const bool with_bounds) const
const ConstRexScalarPtrVector & getPartitionKeys() const
static std::shared_ptr< Analyzer::Expr > analyzeValue(const std::string &stringval, const bool is_null)
DEVICE auto lower_bound(ARGS &&...args)
const RexWindowBound & getFrameStartBound() const
std::shared_ptr< Analyzer::Expr > translateUnaryGeoConstructor(const RexFunctionOperator *, SQLTypeInfo &, const bool with_bounds) const
std::shared_ptr< Analyzer::Expr > translateArrayFunction(const RexFunctionOperator *) const
static std::shared_ptr< Analyzer::Expr > get(std::shared_ptr< Analyzer::Expr > arg_expr, std::shared_ptr< Analyzer::Expr > like_expr, std::shared_ptr< Analyzer::Expr > escape_expr, const bool is_ilike, const bool is_not)
std::pair< std::type_index, Handler > IndexedHandler
void setStringDictKey(const shared::StringDictKey &dict_key)
static RelRexToStringConfig defaults()
Datum get_constval() const
std::shared_ptr< Analyzer::Expr > translateMLPredict(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Expr > translateCurrentUser(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Expr > translateSampleRatio(const RexFunctionOperator *) const
SqlWindowFunctionKind getKind() const
std::shared_ptr< Analyzer::Expr > translateLike(const RexFunctionOperator *) const
bool takes_arg(const TargetInfo &target_info)
static std::shared_ptr< Analyzer::Expr > analyzeValue(const int64_t numericval, const int scale, const int precision)
std::shared_ptr< Analyzer::Expr > translateLikely(const RexFunctionOperator *) const
static const StringDictKey kTransientDictKey
std::shared_ptr< Analyzer::Expr > get_in_values_expr(std::shared_ptr< Analyzer::Expr > arg, const ResultSet &val_set)
static std::shared_ptr< Analyzer::Expr > get(const int64_t)
std::shared_ptr< Analyzer::Expr > translateTernaryGeoFunction(const RexFunctionOperator *) const
const ConstRexScalarPtrVector & getOrderKeys() const
std::vector< std::shared_ptr< Analyzer::Expr > > qual_to_disjunctive_form(const std::shared_ptr< Analyzer::Expr > &qual_expr)
std::shared_ptr< Analyzer::Expr > translateBinaryGeoFunction(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Constant > make_fp_constant(const int64_t val, const SQLTypeInfo &ti)
std::pair< Datum, bool > datum_from_scalar_tv(const ScalarTargetValue *scalar_tv, const SQLTypeInfo &ti) noexcept
std::shared_ptr< Analyzer::Expr > translateWidthBucket(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Expr > translateInOper(const RexOperator *) const
uint64_t exp_to_scale(const unsigned exp)
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
std::vector< ExpressionPtr > ExpressionPtrVector
const Expr * get_left_operand() const
std::shared_ptr< Analyzer::Expr > translateCase(const RexCase *) const
std::shared_ptr< Analyzer::Expr > translateFunctionWithGeoArg(const RexFunctionOperator *) const
bool any_of(std::vector< Analyzer::Expr * > const &target_exprs)
std::shared_ptr< const query_state::QueryState > query_state_
const std::string & getName() const
std::shared_ptr< Analyzer::Expr > translateCurrentDate() const
std::string get_datetimeplus_rewrite_funcname(const SQLOps &op)
void validate_datetime_datepart_argument(const std::shared_ptr< Analyzer::Constant > literal_expr)
std::shared_ptr< Analyzer::Expr > translateCardinality(const RexFunctionOperator *) const
const std::vector< TargetMetaInfo > & getOutputMetainfo() const
std::shared_ptr< Analyzer::Expr > translateGeoComparison(const RexOperator *) const
std::shared_ptr< Analyzer::Expr > translateDatePlusMinus(const RexOperator *) const
std::shared_ptr< Analyzer::Expr > translateHPTLiteral(const RexFunctionOperator *) const
bool is_distinct(const size_t input_idx, const RelAlgNode *node)
int32_t getIdOfString(const std::string &str) const
std::shared_ptr< Analyzer::Expr > translateDatepart(const RexFunctionOperator *) const
bool can_use_parallel_algorithms(const ResultSet &rows)
std::shared_ptr< Analyzer::Expr > translateBinaryGeoConstructor(const RexFunctionOperator *, SQLTypeInfo &, const bool with_bounds) const
std::shared_ptr< Analyzer::Expr > rewrite_to_date_trunc(const Analyzer::FunctionOper *dt_plus)
SQLTypeInfo ext_arg_type_to_type_info(const ExtArgumentType ext_arg_type)
DateaddField to_dateadd_field(const std::string &field)
std::shared_ptr< Analyzer::Expr > fold_expr(const Analyzer::Expr *expr)
void set_precision(int d)
std::shared_ptr< Analyzer::Expr > translateGeoBoundingBoxIntersectOper(const RexOperator *) const
void fill_integer_in_vals(std::vector< int64_t > &in_vals, std::atomic< size_t > &total_in_vals_count, const ResultSet *values_rowset, const std::pair< int64_t, int64_t > values_rowset_slice)
std::shared_ptr< Analyzer::Expr > translateKeyForString(const RexFunctionOperator *) const
static std::shared_ptr< Analyzer::Expr > translateAggregateRex(const RexAgg *rex, const std::vector< std::shared_ptr< Analyzer::Expr >> &scalar_sources)
std::shared_ptr< Analyzer::Expr > translateWindowFunction(const RexWindowFunctionOperator *) const
const std::shared_ptr< Analyzer::Expr > generate() const
std::shared_ptr< Analyzer::Expr > translateScalarSubquery(const RexSubQuery *) const
boost::variant< int64_t, double, float, NullableString > ScalarTargetValue
std::shared_ptr< Analyzer::Expr > translateLength(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Expr > translateExtract(const RexFunctionOperator *) const
ExtractField determineTimeUnit(const SQLTypes &window_frame_bound_type, const Analyzer::Constant *const_expr)
HOST DEVICE void set_type(SQLTypes t)