27 #include <boost/algorithm/string.hpp>
28 #include <boost/core/null_deleter.hpp>
29 #include <boost/filesystem.hpp>
30 #include <boost/function.hpp>
32 #include <rapidjson/document.h>
33 #include <rapidjson/stringbuffer.h>
34 #include <rapidjson/writer.h>
42 #include <type_traits>
69 #include "gen-cpp/CalciteServer.h"
80 using namespace std::string_literals;
83 const NameValueAssign*,
84 const std::list<ColumnDescriptor>& columns)>;
88 const NameValueAssign*,
89 const std::list<ColumnDescriptor>& columns)>;
92 std::shared_ptr<Analyzer::Expr> NullLiteral::analyze(
96 return makeExpr<Analyzer::Constant>(
kNULLT,
true);
99 std::shared_ptr<Analyzer::Expr> StringLiteral::analyze(
103 return analyzeValue(*stringval);
106 std::shared_ptr<Analyzer::Expr> StringLiteral::analyzeValue(
107 const std::string& stringval) {
110 d.
stringval =
new std::string(stringval);
111 return makeExpr<Analyzer::Constant>(ti,
false, d);
114 std::shared_ptr<Analyzer::Expr> IntLiteral::analyze(
118 return analyzeValue(intval);
121 std::shared_ptr<Analyzer::Expr> IntLiteral::analyzeValue(
const int64_t intval) {
124 if (intval >= INT16_MIN && intval <= INT16_MAX) {
127 }
else if (intval >= INT32_MIN && intval <= INT32_MAX) {
129 d.
intval = (int32_t)intval;
134 return makeExpr<Analyzer::Constant>(
t,
false, d);
137 std::shared_ptr<Analyzer::Expr> FixedPtLiteral::analyze(
143 return makeExpr<Analyzer::Constant>(ti,
false, d);
146 std::shared_ptr<Analyzer::Expr> FixedPtLiteral::analyzeValue(
const int64_t numericval,
148 const int precision) {
154 return makeExpr<Analyzer::Constant>(ti,
false, d);
157 std::shared_ptr<Analyzer::Expr> FloatLiteral::analyze(
163 return makeExpr<Analyzer::Constant>(
kFLOAT,
false, d);
166 std::shared_ptr<Analyzer::Expr> DoubleLiteral::analyze(
172 return makeExpr<Analyzer::Constant>(
kDOUBLE,
false, d);
175 std::shared_ptr<Analyzer::Expr> TimestampLiteral::analyze(
179 return get(timestampval_);
182 std::shared_ptr<Analyzer::Expr> TimestampLiteral::get(
const int64_t timestampval) {
185 return makeExpr<Analyzer::Constant>(
kTIMESTAMP,
false, d);
188 std::shared_ptr<Analyzer::Expr> UserLiteral::analyze(
193 return makeExpr<Analyzer::Constant>(
kTEXT,
false, d);
196 std::shared_ptr<Analyzer::Expr> UserLiteral::get(
const std::string& user) {
199 return makeExpr<Analyzer::Constant>(
kTEXT,
false, d);
202 std::shared_ptr<Analyzer::Expr> ArrayLiteral::analyze(
207 bool set_subtype =
true;
208 std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
209 for (
auto& p : value_list) {
210 auto e = p->analyze(catalog, query, allow_tlist_ref);
213 if (c !=
nullptr && c->get_is_null()) {
214 value_exprs.push_back(c);
220 }
else if (set_subtype) {
224 value_exprs.push_back(e);
226 std::shared_ptr<Analyzer::Expr>
result =
227 makeExpr<Analyzer::Constant>(ti,
false, value_exprs);
232 std::string str =
"{";
233 bool notfirst =
false;
234 for (
auto& p : value_list) {
240 str += p->to_string();
246 std::shared_ptr<Analyzer::Expr> OperExpr::analyze(
250 auto left_expr = left->analyze(catalog, query, allow_tlist_ref);
251 const auto& left_type = left_expr->get_type_info();
252 if (right ==
nullptr) {
253 return makeExpr<Analyzer::UOper>(
254 left_type, left_expr->get_contains_agg(), optype, left_expr->decompress());
257 if (left_type.get_type() !=
kARRAY) {
258 throw std::runtime_error(left->to_string() +
" is not of array type.");
260 auto right_expr = right->analyze(catalog, query, allow_tlist_ref);
261 const auto& right_type = right_expr->get_type_info();
262 if (!right_type.is_integer()) {
263 throw std::runtime_error(right->to_string() +
" is not of integer type.");
265 return makeExpr<Analyzer::BinOper>(
266 left_type.get_elem_type(),
false,
kARRAY_AT,
kONE, left_expr, right_expr);
268 auto right_expr = right->analyze(catalog, query, allow_tlist_ref);
269 return normalize(optype, opqualifier, left_expr, right_expr);
272 std::shared_ptr<Analyzer::Expr> OperExpr::normalize(
275 std::shared_ptr<Analyzer::Expr> left_expr,
276 std::shared_ptr<Analyzer::Expr> right_expr) {
277 if (left_expr->get_type_info().is_date_in_days() ||
278 right_expr->get_type_info().is_date_in_days()) {
280 left_expr = left_expr->decompress();
281 right_expr = right_expr->decompress();
283 const auto& left_type = left_expr->get_type_info();
284 auto right_type = right_expr->get_type_info();
287 CHECK(!std::dynamic_pointer_cast<Analyzer::Subquery>(right_expr));
288 if (right_type.get_type() !=
kARRAY) {
289 throw std::runtime_error(
290 "Existential or universal qualifiers can only be used in front of a subquery "
292 "expression of array type.");
294 right_type = right_type.get_elem_type();
299 optype, left_type, right_type, &new_left_type, &new_right_type);
300 if (result_type.is_timeinterval()) {
301 return makeExpr<Analyzer::BinOper>(
302 result_type,
false, optype, qual, left_expr, right_expr);
304 if (left_type != new_left_type) {
305 left_expr = left_expr->add_cast(new_left_type);
307 if (right_type != new_right_type) {
309 right_expr = right_expr->add_cast(new_right_type);
311 right_expr = right_expr->add_cast(new_right_type.
get_array_type());
318 throw std::runtime_error(
319 "Comparison operators are not yet supported for geospatial types.");
332 right_expr = right_expr->add_cast(ti);
339 left_expr = left_expr->add_cast(ti);
341 left_expr = left_expr->decompress();
342 right_expr = right_expr->decompress();
345 left_expr = left_expr->decompress();
346 right_expr = right_expr->decompress();
348 bool has_agg = (left_expr->get_contains_agg() || right_expr->get_contains_agg());
349 return makeExpr<Analyzer::BinOper>(
350 result_type, has_agg, optype, qual, left_expr, right_expr);
353 std::shared_ptr<Analyzer::Expr> SubqueryExpr::analyze(
357 throw std::runtime_error(
"Subqueries are not supported yet.");
361 std::shared_ptr<Analyzer::Expr> IsNullExpr::analyze(
365 auto arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
373 std::shared_ptr<Analyzer::Expr> InSubquery::analyze(
377 throw std::runtime_error(
"Subqueries are not supported yet.");
381 std::shared_ptr<Analyzer::Expr> InValues::analyze(
385 auto arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
388 std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
389 for (
auto& p : value_list) {
390 auto e = p->analyze(catalog, query, allow_tlist_ref);
391 if (ti != e->get_type_info()) {
392 if (ti.
is_string() && e->get_type_info().is_string()) {
394 }
else if (ti.
is_number() && e->get_type_info().is_number()) {
397 throw std::runtime_error(
"IN expressions must contain compatible types.");
401 value_exprs.push_back(e->add_cast(arg_expr->get_type_info()));
403 value_exprs.push_back(e);
407 arg_expr = arg_expr->decompress();
408 arg_expr = arg_expr->add_cast(ti);
409 std::list<std::shared_ptr<Analyzer::Expr>> cast_vals;
410 for (
auto p : value_exprs) {
411 cast_vals.push_back(p->add_cast(ti));
413 value_exprs.swap(cast_vals);
415 std::shared_ptr<Analyzer::Expr>
result =
416 makeExpr<Analyzer::InValues>(arg_expr, value_exprs);
423 std::shared_ptr<Analyzer::Expr> BetweenExpr::analyze(
427 auto arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
428 auto lower_expr = lower->analyze(catalog, query, allow_tlist_ref);
429 auto upper_expr = upper->analyze(catalog, query, allow_tlist_ref);
432 arg_expr->get_type_info(),
433 lower_expr->get_type_info(),
437 makeExpr<Analyzer::BinOper>(
kBOOLEAN,
440 arg_expr->add_cast(new_left_type)->decompress(),
441 lower_expr->add_cast(new_right_type)->decompress());
443 arg_expr->get_type_info(),
444 lower_expr->get_type_info(),
447 auto upper_pred = makeExpr<Analyzer::BinOper>(
451 arg_expr->deep_copy()->add_cast(new_left_type)->decompress(),
452 upper_expr->add_cast(new_right_type)->decompress());
453 std::shared_ptr<Analyzer::Expr>
result =
461 std::shared_ptr<Analyzer::Expr> CharLengthExpr::analyze(
465 auto arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
466 if (!arg_expr->get_type_info().is_string()) {
467 throw std::runtime_error(
468 "expression in char_length clause must be of a string type.");
470 std::shared_ptr<Analyzer::Expr>
result =
471 makeExpr<Analyzer::CharLengthExpr>(arg_expr->decompress(), calc_encoded_length);
475 std::shared_ptr<Analyzer::Expr> CardinalityExpr::analyze(
479 auto arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
480 if (!arg_expr->get_type_info().is_array()) {
481 throw std::runtime_error(
482 "expression in cardinality clause must be of an array type.");
484 std::shared_ptr<Analyzer::Expr>
result =
485 makeExpr<Analyzer::CardinalityExpr>(arg_expr->decompress());
489 void LikeExpr::check_like_expr(
const std::string& like_str,
char escape_char) {
490 if (like_str.back() == escape_char) {
491 throw std::runtime_error(
"LIKE pattern must not end with escape character.");
495 bool LikeExpr::test_is_simple_expr(
const std::string& like_str,
char escape_char) {
497 if (like_str.size() < 2 || like_str[0] !=
'%' || like_str[like_str.size() - 1] !=
'%') {
501 if (like_str[like_str.size() - 2] == escape_char &&
502 like_str[like_str.size() - 3] != escape_char) {
505 for (
size_t i = 1;
i < like_str.size() - 1;
i++) {
506 if (like_str[
i] ==
'%' || like_str[
i] ==
'_' || like_str[
i] ==
'[' ||
507 like_str[
i] ==
']') {
508 if (like_str[
i - 1] != escape_char) {
516 void LikeExpr::erase_cntl_chars(std::string& like_str,
char escape_char) {
517 char prev_char =
'\0';
522 for (
char& cur_char : like_str) {
523 if (cur_char ==
'%' || cur_char == escape_char) {
524 if (prev_char != escape_char) {
525 prev_char = cur_char;
529 new_str.push_back(cur_char);
530 prev_char = cur_char;
535 std::shared_ptr<Analyzer::Expr> LikeExpr::analyze(
539 auto arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
540 auto like_expr = like_string->analyze(catalog, query, allow_tlist_ref);
541 auto escape_expr = escape_string ==
nullptr
543 : escape_string->analyze(catalog, query, allow_tlist_ref);
544 return LikeExpr::get(arg_expr, like_expr, escape_expr, is_ilike, is_not);
547 std::shared_ptr<Analyzer::Expr> LikeExpr::get(std::shared_ptr<Analyzer::Expr> arg_expr,
548 std::shared_ptr<Analyzer::Expr> like_expr,
549 std::shared_ptr<Analyzer::Expr> escape_expr,
552 if (!arg_expr->get_type_info().is_string()) {
553 throw std::runtime_error(
"expression before LIKE must be of a string type.");
555 if (!like_expr->get_type_info().is_string()) {
556 throw std::runtime_error(
"expression after LIKE must be of a string type.");
558 char escape_char =
'\\';
559 if (escape_expr !=
nullptr) {
560 if (!escape_expr->get_type_info().is_string()) {
561 throw std::runtime_error(
"expression after ESCAPE must be of a string type.");
563 if (!escape_expr->get_type_info().is_string()) {
564 throw std::runtime_error(
"expression after ESCAPE must be of a string type.");
567 if (c !=
nullptr && c->get_constval().stringval->length() > 1) {
568 throw std::runtime_error(
"String after ESCAPE must have a single character.");
570 escape_char = (*c->get_constval().stringval)[0];
573 bool is_simple =
false;
577 std::transform(pattern.begin(), pattern.end(), pattern.begin(), ::tolower);
579 check_like_expr(pattern, escape_char);
580 is_simple = test_is_simple_expr(pattern, escape_char);
582 erase_cntl_chars(pattern, escape_char);
585 std::shared_ptr<Analyzer::Expr>
result = makeExpr<Analyzer::LikeExpr>(
586 arg_expr->decompress(), like_expr, escape_expr, is_ilike, is_simple);
593 void RegexpExpr::check_pattern_expr(
const std::string& pattern_str,
char escape_char) {
594 if (pattern_str.back() == escape_char) {
595 throw std::runtime_error(
"REGEXP pattern must not end with escape character.");
599 bool RegexpExpr::translate_to_like_pattern(std::string& pattern_str,
char escape_char) {
600 char prev_char =
'\0';
601 char prev_prev_char =
'\0';
602 std::string like_str;
603 for (
char& cur_char : pattern_str) {
604 if (prev_char == escape_char || isalnum(cur_char) || cur_char ==
' ' ||
606 like_str.push_back((cur_char ==
'.') ?
'_' : cur_char);
607 prev_prev_char = prev_char;
608 prev_char = cur_char;
611 if (prev_char ==
'.' && prev_prev_char != escape_char) {
612 if (cur_char ==
'*' || cur_char ==
'+') {
613 if (cur_char ==
'*') {
618 like_str.push_back(
'%');
619 prev_prev_char = prev_char;
620 prev_char = cur_char;
626 pattern_str = like_str;
630 std::shared_ptr<Analyzer::Expr> RegexpExpr::analyze(
634 auto arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
635 auto pattern_expr = pattern_string->analyze(catalog, query, allow_tlist_ref);
636 auto escape_expr = escape_string ==
nullptr
638 : escape_string->analyze(catalog, query, allow_tlist_ref);
639 return RegexpExpr::get(arg_expr, pattern_expr, escape_expr, is_not);
642 std::shared_ptr<Analyzer::Expr> RegexpExpr::get(
643 std::shared_ptr<Analyzer::Expr> arg_expr,
644 std::shared_ptr<Analyzer::Expr> pattern_expr,
645 std::shared_ptr<Analyzer::Expr> escape_expr,
647 if (!arg_expr->get_type_info().is_string()) {
648 throw std::runtime_error(
"expression before REGEXP must be of a string type.");
650 if (!pattern_expr->get_type_info().is_string()) {
651 throw std::runtime_error(
"expression after REGEXP must be of a string type.");
653 char escape_char =
'\\';
654 if (escape_expr !=
nullptr) {
655 if (!escape_expr->get_type_info().is_string()) {
656 throw std::runtime_error(
"expression after ESCAPE must be of a string type.");
658 if (!escape_expr->get_type_info().is_string()) {
659 throw std::runtime_error(
"expression after ESCAPE must be of a string type.");
662 if (c !=
nullptr && c->get_constval().stringval->length() > 1) {
663 throw std::runtime_error(
"String after ESCAPE must have a single character.");
665 escape_char = (*c->get_constval().stringval)[0];
666 if (escape_char !=
'\\') {
667 throw std::runtime_error(
"Only supporting '\\' escape character.");
673 if (translate_to_like_pattern(pattern, escape_char)) {
674 return LikeExpr::get(arg_expr, pattern_expr, escape_expr,
false, is_not);
677 std::shared_ptr<Analyzer::Expr>
result =
678 makeExpr<Analyzer::RegexpExpr>(arg_expr->decompress(), pattern_expr, escape_expr);
685 std::shared_ptr<Analyzer::Expr> LikelihoodExpr::analyze(
689 auto arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
690 return LikelihoodExpr::get(arg_expr, likelihood, is_not);
693 std::shared_ptr<Analyzer::Expr> LikelihoodExpr::get(
694 std::shared_ptr<Analyzer::Expr> arg_expr,
697 if (!arg_expr->get_type_info().is_boolean()) {
698 throw std::runtime_error(
"likelihood expression expects boolean type.");
700 std::shared_ptr<Analyzer::Expr>
result = makeExpr<Analyzer::LikelihoodExpr>(
701 arg_expr->decompress(), is_not ? 1 - likelihood : likelihood);
705 std::shared_ptr<Analyzer::Expr> ExistsExpr::analyze(
709 throw std::runtime_error(
"Subqueries are not supported yet.");
713 std::shared_ptr<Analyzer::Expr> ColumnRef::analyze(
720 if (column ==
nullptr) {
721 throw std::runtime_error(
"invalid column name *.");
723 if (table !=
nullptr) {
726 throw std::runtime_error(
"range variable or table name " + *table +
732 throw std::runtime_error(
"Column name " + *column +
" does not exist.");
739 cd = rte->get_column_desc(catalog, *column);
740 if (cd !=
nullptr && !found) {
743 table_id = rte->get_table_id();
744 }
else if (cd !=
nullptr && found) {
745 throw std::runtime_error(
"Column name " + *column +
" is ambiguous.");
749 if (cd ==
nullptr && allow_tlist_ref != TlistRefType::TLIST_NONE) {
754 std::shared_ptr<Analyzer::TargetEntry> tle;
756 if (*column == p->get_resname() && !found) {
760 }
else if (*column == p->get_resname() && found) {
761 throw std::runtime_error(
"Output alias " + *column +
" is ambiguous.");
766 if (dynamic_cast<Analyzer::Var*>(tle->get_expr())) {
772 if (allow_tlist_ref == TlistRefType::TLIST_COPY) {
773 return tle->get_expr()->deep_copy();
775 return makeExpr<Analyzer::Var>(
781 throw std::runtime_error(
"Column name " + *column +
" does not exist.");
784 return makeExpr<Analyzer::ColumnVar>(cd->columnType, table_id, cd->columnId, rte_idx);
787 std::shared_ptr<Analyzer::Expr> FunctionRef::analyze(
793 std::shared_ptr<Analyzer::Expr> arg_expr;
795 if (boost::iequals(*
name,
"count")) {
799 arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
802 throw std::runtime_error(
803 "Strings must be dictionary-encoded in COUNT(DISTINCT).");
806 throw std::runtime_error(
"Only COUNT(DISTINCT) is supported on arrays.");
809 is_distinct = distinct;
812 throw std::runtime_error(
"Cannot compute " + *
name +
" with argument '*'.");
814 if (boost::iequals(*
name,
"min")) {
816 arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
817 arg_expr = arg_expr->decompress();
818 result_type = arg_expr->get_type_info();
819 }
else if (boost::iequals(*
name,
"max")) {
821 arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
822 arg_expr = arg_expr->decompress();
823 result_type = arg_expr->get_type_info();
824 }
else if (boost::iequals(*
name,
"avg")) {
826 arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
827 if (!arg_expr->get_type_info().is_number()) {
828 throw std::runtime_error(
"Cannot compute AVG on non-number-type arguments.");
830 arg_expr = arg_expr->decompress();
832 }
else if (boost::iequals(*
name,
"sum")) {
834 arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
835 if (!arg_expr->get_type_info().is_number()) {
836 throw std::runtime_error(
"Cannot compute SUM on non-number-type arguments.");
838 arg_expr = arg_expr->decompress();
840 : arg_expr->get_type_info();
841 }
else if (boost::iequals(*
name,
"unnest")) {
842 arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
843 const SQLTypeInfo& arg_ti = arg_expr->get_type_info();
845 throw std::runtime_error(arg->to_string() +
" is not of array type.");
849 throw std::runtime_error(
"invalid function name: " + *
name);
851 if (arg_expr->get_type_info().is_string() ||
852 arg_expr->get_type_info().get_type() ==
kARRAY) {
853 throw std::runtime_error(
854 "Only COUNT(DISTINCT ) aggregate is supported on strings and arrays.");
859 return makeExpr<Analyzer::AggExpr>(
860 result_type, agg_type, arg_expr,
is_distinct,
nullptr);
863 std::shared_ptr<Analyzer::Expr> CastExpr::analyze(
867 target_type->check_type();
868 auto arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
870 target_type->get_param1(),
871 target_type->get_param2(),
872 arg_expr->get_type_info().get_notnull());
873 if (arg_expr->get_type_info().get_type() != target_type->get_type() &&
875 arg_expr->decompress();
877 return arg_expr->add_cast(ti);
880 std::shared_ptr<Analyzer::Expr> CaseExpr::analyze(
885 std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
887 for (
auto& p : when_then_list) {
888 auto e1 = p->get_expr1()->analyze(catalog, query, allow_tlist_ref);
889 if (e1->get_type_info().get_type() !=
kBOOLEAN) {
890 throw std::runtime_error(
"Only boolean expressions can be used after WHEN.");
892 auto e2 = p->get_expr2()->analyze(catalog, query, allow_tlist_ref);
893 expr_pair_list.emplace_back(e1, e2);
895 auto else_e = else_expr ? else_expr->analyze(catalog, query, allow_tlist_ref) :
nullptr;
896 return normalize(expr_pair_list, else_e);
911 if (*s ==
"t" || *s ==
"true" || *s ==
"T" || *s ==
"True") {
913 }
else if (*s ==
"f" || *s ==
"false" || *s ==
"F" || *s ==
"False") {
916 throw std::runtime_error(
"Invalid string for boolean " + *s);
922 std::shared_ptr<Analyzer::Expr> CaseExpr::normalize(
923 const std::list<std::pair<std::shared_ptr<Analyzer::Expr>,
924 std::shared_ptr<Analyzer::Expr>>>& expr_pair_list,
925 const std::shared_ptr<Analyzer::Expr> else_e_in) {
927 bool has_agg =
false;
928 std::set<int> dictionary_ids;
930 for (
auto& p : expr_pair_list) {
932 CHECK(e1->get_type_info().is_boolean());
934 if (e2->get_type_info().is_dict_encoded_string()) {
935 dictionary_ids.insert(e2->get_type_info().get_comp_param());
938 ti = e2->get_type_info();
939 }
else if (e2->get_type_info().get_type() ==
kNULLT) {
941 e2->set_type_info(ti);
942 }
else if (ti != e2->get_type_info()) {
943 if (ti.
is_string() && e2->get_type_info().is_string()) {
945 }
else if (ti.
is_number() && e2->get_type_info().is_number()) {
947 }
else if (ti.
is_boolean() && e2->get_type_info().is_boolean()) {
950 throw std::runtime_error(
951 "expressions in THEN clause must be of the same or compatible types.");
954 if (e2->get_contains_agg()) {
958 auto else_e = else_e_in;
960 if (else_e->get_contains_agg()) {
965 else_e->set_type_info(ti);
966 }
else if (ti != else_e->get_type_info()) {
967 if (else_e->get_type_info().is_dict_encoded_string()) {
968 dictionary_ids.insert(else_e->get_type_info().get_comp_param());
971 if (ti.
is_string() && else_e->get_type_info().is_string()) {
973 }
else if (ti.
is_number() && else_e->get_type_info().is_number()) {
975 }
else if (ti.
is_boolean() && else_e->get_type_info().is_boolean()) {
979 throw std::runtime_error(
982 "expressions in ELSE clause must be of the same or compatible types as those "
983 "in the THEN clauses.");
987 std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
989 for (
auto p : expr_pair_list) {
991 cast_expr_pair_list.emplace_back(p.first, p.second->add_cast(ti));
993 if (else_e !=
nullptr) {
994 else_e = else_e->add_cast(ti);
999 else_e = makeExpr<Analyzer::Constant>(ti,
true, d);
1002 throw std::runtime_error(
1003 "Can't deduce the type for case expressions, all branches null");
1006 auto case_expr = makeExpr<Analyzer::CaseExpr>(ti, has_agg, cast_expr_pair_list, else_e);
1007 if (ti.get_compression() !=
kENCODING_DICT && dictionary_ids.size() == 1 &&
1008 *(dictionary_ids.begin()) > 0) {
1015 ti.set_comp_param(*dictionary_ids.begin());
1016 case_expr->add_cast(ti);
1022 std::string str(
"CASE ");
1023 for (
auto& p : when_then_list) {
1024 str +=
"WHEN " + p->get_expr1()->to_string() +
" THEN " +
1025 p->get_expr2()->to_string() +
" ";
1027 if (else_expr !=
nullptr) {
1028 str +=
"ELSE " + else_expr->to_string();
1036 left->analyze(catalog, query);
1038 right->analyze(catalog, *right_query);
1045 std::shared_ptr<Analyzer::Expr> p;
1046 if (having_clause !=
nullptr) {
1047 p = having_clause->analyze(catalog, query, Expr::TlistRefType::TLIST_COPY);
1048 if (p->get_type_info().get_type() !=
kBOOLEAN) {
1049 throw std::runtime_error(
"Only boolean expressions can be in HAVING clause.");
1058 std::list<std::shared_ptr<Analyzer::Expr>> groupby;
1059 if (!groupby_clause.empty()) {
1061 std::shared_ptr<Analyzer::Expr> gexpr;
1062 const std::vector<std::shared_ptr<Analyzer::TargetEntry>>& tlist =
1064 for (
auto& c : groupby_clause) {
1066 if (dynamic_cast<Literal*>(c.get())) {
1069 throw std::runtime_error(
"Invalid literal in GROUP BY clause.");
1072 if (varno <= 0 || varno > static_cast<int>(tlist.size())) {
1073 throw std::runtime_error(
"Invalid ordinal number in GROUP BY clause.");
1075 if (tlist[varno - 1]->get_expr()->get_contains_agg()) {
1076 throw std::runtime_error(
1077 "Ordinal number in GROUP BY cannot reference an expression containing "
1081 gexpr = makeExpr<Analyzer::Var>(
1084 gexpr = c->analyze(catalog, query, Expr::TlistRefType::TLIST_REF);
1087 bool set_new_type =
false;
1090 set_new_type =
true;
1095 std::shared_ptr<Analyzer::Var> v;
1096 if (std::dynamic_pointer_cast<Analyzer::Var>(gexpr)) {
1099 gexpr = tlist[n - 1]->get_own_expr();
1101 if (cv !=
nullptr) {
1106 v->set_varno(gexpr_no);
1107 tlist[n - 1]->set_expr(v);
1111 groupby.push_back(new_e);
1113 v->set_type_info(new_e->get_type_info());
1116 groupby.push_back(gexpr);
1123 auto e =
t->get_expr();
1124 e->check_group_by(groupby);
1132 if (where_clause ==
nullptr) {
1136 auto p = where_clause->analyze(catalog, query, Expr::TlistRefType::TLIST_COPY);
1137 if (p->get_type_info().get_type() !=
kBOOLEAN) {
1138 throw std::runtime_error(
"Only boolean expressions can be in WHERE clause.");
1145 std::vector<std::shared_ptr<Analyzer::TargetEntry>>& tlist =
1147 if (select_clause.empty()) {
1151 rte->expand_star_in_targetlist(catalog, tlist, rte_idx++);
1154 for (
auto& p : select_clause) {
1155 const Parser::Expr* select_expr = p->get_select_expr();
1157 if (
typeid(*select_expr) ==
typeid(
ColumnRef) &&
1158 dynamic_cast<const ColumnRef*>(select_expr)->get_column() ==
nullptr) {
1159 const std::string* range_var_name =
1160 dynamic_cast<const ColumnRef*
>(select_expr)->get_table();
1163 throw std::runtime_error(
"invalid range variable name: " + *range_var_name);
1168 auto e = select_expr->
analyze(catalog, query);
1169 std::string resname;
1171 if (p->get_alias() !=
nullptr) {
1172 resname = *p->get_alias();
1173 }
else if (std::dynamic_pointer_cast<Analyzer::ColumnVar>(e) &&
1174 !std::dynamic_pointer_cast<Analyzer::Var>(e)) {
1177 colvar->get_table_id(), colvar->get_column_id());
1180 if (e->get_type_info().get_type() ==
kNULLT) {
1181 throw std::runtime_error(
1182 "Untyped NULL in SELECT clause. Use CAST to specify a type.");
1185 bool unnest = (o !=
nullptr && o->get_optype() ==
kUNNEST);
1186 auto tle = std::make_shared<Analyzer::TargetEntry>(resname, e, unnest);
1187 tlist.push_back(tle);
1196 for (
auto& p : from_clause) {
1199 if (table_desc ==
nullptr) {
1200 throw std::runtime_error(
"Table " + *p->get_table_name() +
" does not exist.");
1202 std::string range_var;
1203 if (p->get_range_var() ==
nullptr) {
1204 range_var = *p->get_table_name();
1206 range_var = *p->get_range_var();
1216 analyze_from_clause(catalog, query);
1217 analyze_select_clause(catalog, query);
1218 analyze_where_clause(catalog, query);
1219 analyze_group_by(catalog, query);
1220 analyze_having_clause(catalog, query);
1228 throw std::runtime_error(
"OFFSET cannot be negative.");
1231 query_expr->analyze(catalog, query);
1236 const std::vector<std::shared_ptr<Analyzer::TargetEntry>>& tlist =
1238 std::list<Analyzer::OrderEntry>* order_by =
new std::list<Analyzer::OrderEntry>();
1239 if (!orderby_clause.empty()) {
1240 for (
auto& p : orderby_clause) {
1241 int tle_no = p->get_colno();
1245 const std::string*
name = p->get_column()->get_column();
1248 for (
auto tle : tlist) {
1249 if (tle->get_resname() == *
name) {
1256 throw std::runtime_error(
"invalid name in order by: " + *name);
1259 order_by->push_back(
1265 for (
int i = 1; i <= static_cast<int>(tlist.size());
i++) {
1266 bool in_orderby =
false;
1267 std::for_each(order_by->begin(),
1270 in_orderby = in_orderby || (
i == oe.tle_no);
1281 std::string str = select_expr->to_string();
1282 if (alias !=
nullptr) {
1283 str +=
" AS " + *alias;
1289 std::string str = *table_name;
1290 if (range_var !=
nullptr) {
1291 str +=
" " + *range_var;
1298 if (table ==
nullptr) {
1300 }
else if (column ==
nullptr) {
1301 str = *table +
".*";
1303 str = *table +
"." + *column;
1309 std::string op_str[] = {
1310 "=",
"===",
"<>",
"<",
">",
"<=",
">=",
" AND ",
" OR ",
"NOT",
"-",
"+",
"*",
"/"};
1313 str =
"-(" + left->to_string() +
")";
1314 }
else if (optype ==
kNOT) {
1315 str =
"NOT (" + left->to_string() +
")";
1317 str = left->to_string() +
"[" + right->to_string() +
"]";
1318 }
else if (optype ==
kUNNEST) {
1319 str =
"UNNEST(" + left->to_string() +
")";
1320 }
else if (optype ==
kIN) {
1321 str =
"(" + left->to_string() +
" IN " + right->to_string() +
")";
1323 str =
"(" + left->to_string() + op_str[optype] + right->to_string() +
")";
1329 std::string str = arg->to_string();
1339 return "EXISTS (" + query->to_string() +
")";
1345 str += query->to_string();
1351 std::string str = arg->to_string();
1353 str +=
" IS NOT NULL";
1362 str += subquery->to_string();
1368 bool notfirst =
false;
1369 for (
auto& p : value_list) {
1375 str += p->to_string();
1382 std::string str = arg->to_string();
1384 str +=
" NOT BETWEEN ";
1388 str += lower->to_string() +
" AND " + upper->to_string();
1394 if (calc_encoded_length) {
1395 str =
"CHAR_LENGTH (" + arg->to_string() +
")";
1397 str =
"LENGTH (" + arg->to_string() +
")";
1403 std::string str =
"CARDINALITY(" + arg->to_string() +
")";
1408 std::string str = arg->to_string();
1410 str +=
" NOT LIKE ";
1414 str += like_string->to_string();
1415 if (escape_string !=
nullptr) {
1416 str +=
" ESCAPE " + escape_string->to_string();
1422 std::string str = arg->to_string();
1424 str +=
" NOT REGEXP ";
1428 str += pattern_string->to_string();
1429 if (escape_string !=
nullptr) {
1430 str +=
" ESCAPE " + escape_string->to_string();
1436 std::string str =
" LIKELIHOOD ";
1437 str += arg->to_string();
1439 str += boost::lexical_cast<std::string>(is_not ? 1.0 - likelihood : likelihood);
1444 std::string str = *
name +
"(";
1448 if (arg ==
nullptr) {
1451 str += arg->to_string() +
")";
1457 std::string query_str =
"SELECT ";
1459 query_str +=
"DISTINCT ";
1461 if (select_clause.empty()) {
1464 bool notfirst =
false;
1465 for (
auto& p : select_clause) {
1471 query_str += p->to_string();
1474 query_str +=
" FROM ";
1475 bool notfirst =
false;
1476 for (
auto& p : from_clause) {
1482 query_str += p->to_string();
1485 query_str +=
" WHERE " + where_clause->to_string();
1487 if (!groupby_clause.empty()) {
1488 query_str +=
" GROUP BY ";
1489 bool notfirst =
false;
1490 for (
auto& p : groupby_clause) {
1496 query_str += p->to_string();
1499 if (having_clause) {
1500 query_str +=
" HAVING " + having_clause->to_string();
1510 if (td ==
nullptr) {
1511 throw std::runtime_error(
"Table " + *table +
" does not exist.");
1514 throw std::runtime_error(
"Insert to views is not supported yet.");
1518 std::list<int> result_col_list;
1519 if (column_list.empty()) {
1520 const std::list<const ColumnDescriptor*> all_cols =
1522 for (
auto cd : all_cols) {
1523 result_col_list.push_back(cd->columnId);
1526 for (
auto& c : column_list) {
1528 if (cd ==
nullptr) {
1529 throw std::runtime_error(
"Column " + *c +
" does not exist.");
1531 result_col_list.push_back(cd->
columnId);
1533 if (col_ti.get_physical_cols() > 0) {
1535 for (
auto i = 1;
i <= col_ti.get_physical_cols();
i++) {
1538 if (pcd ==
nullptr) {
1539 throw std::runtime_error(
"Column " + *c +
"'s metadata is incomplete.");
1541 result_col_list.push_back(pcd->
columnId);
1546 result_col_list.size()) {
1547 throw std::runtime_error(
"Insert into a subset of columns is not supported yet.");
1556 if (td ==
nullptr) {
1557 throw std::runtime_error(
"Table " + *table +
" does not exist.");
1560 throw std::runtime_error(
"Insert to views is not supported yet.");
1564 throw std::runtime_error(
"Cannot determine leaf on replicated table.");
1568 std::random_device rd;
1569 std::mt19937_64 gen(rd());
1570 std::uniform_int_distribution<size_t> dis;
1571 const auto leaf_idx = dis(gen) % num_leafs;
1575 size_t indexOfShardColumn = 0;
1579 if (column_list.empty()) {
1582 auto iter = std::find(all_cols.begin(), all_cols.end(), shardColumn);
1583 CHECK(iter != all_cols.end());
1584 indexOfShardColumn = std::distance(all_cols.begin(), iter);
1586 for (
auto& c : column_list) {
1590 indexOfShardColumn++;
1593 if (indexOfShardColumn == column_list.size()) {
1594 throw std::runtime_error(
"No value defined for shard column.");
1598 if (indexOfShardColumn >= value_list.size()) {
1599 throw std::runtime_error(
"No value defined for shard column.");
1602 auto& shardColumnValueExpr = *(std::next(value_list.begin(), indexOfShardColumn));
1605 auto e = shardColumnValueExpr->analyze(catalog, query);
1618 auto shard_count = td->
nShards * num_leafs;
1619 int64_t shardId = 0;
1628 bool invalid =
false;
1631 invalid = str_id > max_valid_int_value<int32_t>();
1633 invalid = str_id > max_valid_int_value<uint16_t>();
1635 invalid = str_id > max_valid_int_value<uint8_t>();
1638 if (invalid || str_id == inline_int_null_value<int32_t>()) {
1666 InsertStmt::analyze(catalog, query);
1667 std::vector<std::shared_ptr<Analyzer::TargetEntry>>& tlist =
1670 const std::list<const ColumnDescriptor*> non_phys_cols =
1672 if (non_phys_cols.size() != value_list.size()) {
1673 throw std::runtime_error(
"Insert has more target columns than expressions.");
1676 for (
auto& v : value_list) {
1677 auto e = v->analyze(catalog, query);
1683 if (c !=
nullptr && c->get_is_null()) {
1684 throw std::runtime_error(
"Cannot insert NULL into column " + cd->
columnName);
1692 if (col_ti.get_physical_cols() > 0) {
1697 if (uoper && uoper->get_optype() ==
kCAST) {
1702 std::string* geo_string{
nullptr};
1704 is_null = c->get_is_null();
1706 geo_string = c->get_constval().stringval;
1709 if (!is_null && !geo_string) {
1710 throw std::runtime_error(
"Expecting a WKT or WKB hex string for column " +
1713 std::vector<double> coords;
1714 std::vector<double> bounds;
1715 std::vector<int> ring_sizes;
1716 std::vector<int> poly_rings;
1722 *geo_string, import_ti, coords, bounds, ring_sizes, poly_rings)) {
1723 throw std::runtime_error(
"Cannot read geometry to insert into column " +
1726 if (coords.empty()) {
1734 throw std::runtime_error(
1735 "Imported geometry doesn't match the type of column " + cd->
columnName);
1741 if (!coords.empty()) {
1742 throw std::runtime_error(
"NULL POINT with unexpected coordinates in column " +
1753 int nextColumnOffset = 1;
1760 std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
1763 for (
auto cc : compressed_coords) {
1766 auto e = makeExpr<Analyzer::Constant>(
kTINYINT,
false, d);
1767 value_exprs.push_back(e);
1772 makeExpr<Analyzer::Constant>(cd_coords->
columnType, is_null, value_exprs),
1782 CHECK(cd_ring_sizes);
1785 std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
1787 for (
auto c : ring_sizes) {
1790 auto e = makeExpr<Analyzer::Constant>(
kINT,
false, d);
1791 value_exprs.push_back(e);
1796 makeExpr<Analyzer::Constant>(cd_ring_sizes->
columnType, is_null, value_exprs),
1805 CHECK(cd_poly_rings);
1808 std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
1810 for (
auto c : poly_rings) {
1813 auto e = makeExpr<Analyzer::Constant>(
kINT,
false, d);
1814 value_exprs.push_back(e);
1819 makeExpr<Analyzer::Constant>(
1820 cd_poly_rings->
columnType, is_null, value_exprs),
1835 std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
1837 for (
auto b : bounds) {
1840 auto e = makeExpr<Analyzer::Constant>(
kDOUBLE,
false, d);
1841 value_exprs.push_back(e);
1846 makeExpr<Analyzer::Constant>(cd_bounds->
columnType, is_null, value_exprs),
1857 CHECK(cd_render_group);
1863 makeExpr<Analyzer::Constant>(cd_render_group->
columnType, is_null, d),
1877 throw std::runtime_error(
"User has no insert privileges on " + *table +
".");
1880 auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
1885 analyze(catalog, query);
1890 auto result_table_id = query.get_result_table_id();
1891 const auto td_with_lock =
1893 catalog, result_table_id);
1894 auto td = td_with_lock();
1898 throw std::runtime_error(
"Singleton inserts on views is not supported.");
1910 throw std::runtime_error(
"UPDATE statement not supported yet.");
1915 throw std::runtime_error(
"DELETE statement not supported yet.");
1922 if (col_ti.is_integer() ||
1923 (col_ti.is_string() && col_ti.get_compression() ==
kENCODING_DICT) ||
1927 throw std::runtime_error(
"Cannot shard on type " + col_ti.get_type_name() +
1928 ", encoding " + col_ti.get_compression_name());
1932 const std::list<ColumnDescriptor>& columns) {
1934 for (
const auto& cd : columns) {
1935 if (cd.columnName == name) {
1940 if (cd.columnType.is_geometry()) {
1941 index += cd.columnType.get_physical_cols();
1949 const std::list<ColumnDescriptor>& columns) {
1951 for (
const auto& cd : columns) {
1952 if (boost::to_upper_copy<std::string>(cd.columnName) == name) {
1956 if (cd.columnType.is_geometry()) {
1957 index += cd.columnType.get_physical_cols();
1965 const std::string& field_name,
1966 const std::string& field_value,
1967 rapidjson::Document& document) {
1968 rapidjson::Value field_name_json_str;
1969 field_name_json_str.SetString(
1970 field_name.c_str(), field_name.size(), document.GetAllocator());
1971 rapidjson::Value field_value_json_str;
1972 field_value_json_str.SetString(
1973 field_value.c_str(), field_value.size(), document.GetAllocator());
1974 obj.AddMember(field_name_json_str, field_value_json_str, document.GetAllocator());
1979 const std::vector<SharedDictionaryDef>& shared_dict_defs) {
1980 rapidjson::Document document;
1981 auto& allocator = document.GetAllocator();
1982 rapidjson::Value arr(rapidjson::kArrayType);
1983 if (shard_key_def) {
1984 rapidjson::Value shard_key_obj(rapidjson::kObjectType);
1987 arr.PushBack(shard_key_obj, allocator);
1989 for (
const auto& shared_dict_def : shared_dict_defs) {
1990 rapidjson::Value shared_dict_obj(rapidjson::kObjectType);
1992 set_string_field(shared_dict_obj,
"name", shared_dict_def.get_column(), document);
1994 shared_dict_obj,
"foreign_table", shared_dict_def.get_foreign_table(), document);
1997 shared_dict_def.get_foreign_column(),
1999 arr.PushBack(shared_dict_obj, allocator);
2001 rapidjson::StringBuffer buffer;
2002 rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
2004 return buffer.GetString();
2007 template <
typename LITERAL_TYPE,
2008 typename ASSIGNMENT,
2012 VALIDATE validate = VALIDATE()) {
2013 const auto val = validate(p);
2020 auto assignment = [&td](
const auto val) { td.storageType = val; };
2021 return get_property_value<StringLiteral, decltype(assignment), CaseSensitiveValidate>(
2028 return get_property_value<IntLiteral>(p,
2029 [&td](
const auto val) { td.maxFragRows = val; });
2035 return get_property_value<IntLiteral>(
2036 p, [&df_td](
const auto val) { df_td.maxFragRows = val; });
2042 return get_property_value<IntLiteral>(p,
2043 [&td](
const auto val) { td.maxChunkSize = val; });
2050 return get_property_value<IntLiteral>(
2051 p, [&df_td](
const auto val) { df_td.maxChunkSize = val; });
2057 return get_property_value<StringLiteral>(p, [&df_td](
const auto val) {
2058 if (val.size() != 1) {
2059 throw std::runtime_error(
"Length of DELIMITER must be equal to 1.");
2061 df_td.delimiter = val;
2068 return get_property_value<StringLiteral>(p, [&df_td](
const auto val) {
2069 if (val ==
"FALSE") {
2070 df_td.hasHeader =
false;
2071 }
else if (val ==
"TRUE") {
2072 df_td.hasHeader =
true;
2074 throw std::runtime_error(
"Option HEADER support only 'true' or 'false' values.");
2082 return get_property_value<IntLiteral>(p,
2083 [&td](
const auto val) { td.fragPageSize = val; });
2088 return get_property_value<IntLiteral>(p, [&td](
const auto val) { td.maxRows = val; });
2094 return get_property_value<IntLiteral>(
2095 p, [&df_td](
const auto val) { df_td.skipRows = val; });
2101 return get_property_value<StringLiteral>(p, [&td](
const auto partitions_uc) {
2102 if (partitions_uc !=
"SHARDED" && partitions_uc !=
"REPLICATED") {
2103 throw std::runtime_error(
"PARTITIONS must be SHARDED or REPLICATED");
2105 if (td.shardedColumnId != 0 && partitions_uc ==
"REPLICATED") {
2106 throw std::runtime_error(
2107 "A table cannot be sharded and replicated at the same time");
2109 td.partitions = partitions_uc;
2115 if (!td.shardedColumnId) {
2116 throw std::runtime_error(
"SHARD KEY must be defined.");
2118 return get_property_value<IntLiteral>(p, [&td](
const auto shard_count) {
2120 throw std::runtime_error(
2121 "SHARD_COUNT must be a multiple of the number of leaves in the cluster.");
2123 td.nShards = g_leaf_count ? shard_count / g_leaf_count : shard_count;
2124 if (!td.shardedColumnId && !td.nShards) {
2125 throw std::runtime_error(
2126 "Must specify the number of shards through the SHARD_COUNT option");
2134 return get_property_value<StringLiteral>(p, [&td](
const auto vacuum_uc) {
2135 if (vacuum_uc !=
"IMMEDIATE" && vacuum_uc !=
"DELAYED") {
2136 throw std::runtime_error(
"VACUUM must be IMMEDIATE or DELAYED");
2138 td.hasDeletedCol = boost::iequals(vacuum_uc,
"IMMEDIATE") ?
false :
true;
2145 return get_property_value<StringLiteral>(p, [&td, &columns](
const auto sort_upper) {
2147 if (!td.sortedColumnId) {
2148 throw std::runtime_error(
"Specified sort column " + sort_upper +
" doesn't exist");
2156 auto assignment = [&td](
const auto val) {
2157 td.maxRollbackEpochs =
2162 return get_property_value<IntLiteral, decltype(assignment), PositiveOrZeroValidate>(
2179 const std::unique_ptr<NameValueAssign>& p,
2180 const std::list<ColumnDescriptor>& columns) {
2181 const auto it = tableDefFuncMap.find(boost::to_lower_copy<std::string>(*p->get_name()));
2182 if (it == tableDefFuncMap.end()) {
2183 throw std::runtime_error(
2184 "Invalid CREATE TABLE option " + *p->get_name() +
2185 ". Should be FRAGMENT_SIZE, MAX_CHUNK_SIZE, PAGE_SIZE, MAX_ROLLBACK_EPOCHS, "
2187 "PARTITIONS, SHARD_COUNT, VACUUM, SORT_COLUMN, STORAGE_TYPE.");
2189 return it->second(td, p.get(), columns);
2193 const std::unique_ptr<NameValueAssign>& p,
2194 const std::list<ColumnDescriptor>& columns) {
2195 const auto it = tableDefFuncMap.find(boost::to_lower_copy<std::string>(*p->get_name()));
2196 if (it == tableDefFuncMap.end()) {
2197 throw std::runtime_error(
2198 "Invalid CREATE TABLE AS option " + *p->get_name() +
2199 ". Should be FRAGMENT_SIZE, MAX_CHUNK_SIZE, PAGE_SIZE, MAX_ROLLBACK_EPOCHS, "
2201 "PARTITIONS, SHARD_COUNT, VACUUM, SORT_COLUMN, STORAGE_TYPE or "
2202 "USE_SHARED_DICTIONARIES.");
2204 return it->second(td, p.get(), columns);
2215 const std::unique_ptr<NameValueAssign>& p,
2216 const std::list<ColumnDescriptor>& columns) {
2218 dataframeDefFuncMap.find(boost::to_lower_copy<std::string>(*p->get_name()));
2219 if (it == dataframeDefFuncMap.end()) {
2220 throw std::runtime_error(
2221 "Invalid CREATE DATAFRAME option " + *p->get_name() +
2222 ". Should be FRAGMENT_SIZE, MAX_CHUNK_SIZE, SKIP_ROWS, DELIMITER or HEADER.");
2224 return it->second(df_td, p.get(), columns);
2229 CreateTableStmt::CreateTableStmt(
const rapidjson::Value& payload) {
2230 CHECK(payload.HasMember(
"name"));
2231 table_ = std::make_unique<std::string>(
json_str(payload[
"name"]));
2232 CHECK(payload.HasMember(
"elements"));
2233 CHECK(payload[
"elements"].IsArray());
2236 is_temporary_ =
false;
2238 if_not_exists_ =
false;
2239 if (payload.HasMember(
"ifNotExists")) {
2240 if_not_exists_ =
json_bool(payload[
"ifNotExists"]);
2243 const auto elements = payload[
"elements"].GetArray();
2244 for (
const auto& element : elements) {
2245 CHECK(element.IsObject());
2246 CHECK(element.HasMember(
"type"));
2247 if (
json_str(element[
"type"]) ==
"SQL_COLUMN_DECLARATION") {
2248 CHECK(element.HasMember(
"name"));
2249 auto col_name = std::make_unique<std::string>(
json_str(element[
"name"]));
2250 CHECK(element.HasMember(
"sqltype"));
2256 if (element.HasMember(
"precision")) {
2257 precision =
json_i64(element[
"precision"]);
2259 if (element.HasMember(
"scale")) {
2260 scale =
json_i64(element[
"scale"]);
2263 std::optional<int64_t> array_size;
2264 if (element.HasMember(
"arraySize")) {
2266 array_size =
json_i64(element[
"arraySize"]);
2268 std::unique_ptr<SQLType> sql_type;
2269 if (element.HasMember(
"subtype")) {
2270 CHECK(element.HasMember(
"coordinateSystem"));
2272 sql_type = std::make_unique<SQLType>(
2275 static_cast<int>(
json_i64(element[
"coordinateSystem"])),
2277 }
else if (precision > 0 && scale > 0) {
2278 sql_type = std::make_unique<SQLType>(
sql_types,
2281 array_size.has_value(),
2282 array_size ? *array_size : -1);
2283 }
else if (precision > 0) {
2284 sql_type = std::make_unique<SQLType>(
sql_types,
2287 array_size.has_value(),
2288 array_size ? *array_size : -1);
2290 sql_type = std::make_unique<SQLType>(
sql_types,
2291 array_size.has_value(),
2292 array_size ? *array_size : -1);
2296 CHECK(element.HasMember(
"nullable"));
2297 const auto nullable =
json_bool(element[
"nullable"]);
2298 std::unique_ptr<ColumnConstraintDef> constraint_def;
2300 constraint_def = std::make_unique<ColumnConstraintDef>(
true,
2305 std::unique_ptr<CompressDef> compress_def;
2306 if (element.HasMember(
"encodingType") && !element[
"encodingType"].IsNull()) {
2307 std::string encoding_type =
json_str(element[
"encodingType"]);
2308 CHECK(element.HasMember(
"encodingSize"));
2309 auto encoding_name =
2310 std::make_unique<std::string>(
json_str(element[
"encodingType"]));
2311 compress_def = std::make_unique<CompressDef>(encoding_name.release(),
2312 json_i64(element[
"encodingSize"]));
2314 auto col_def = std::make_unique<ColumnDef>(
2317 compress_def ? compress_def.release() :
nullptr,
2318 constraint_def ? constraint_def.release() :
nullptr);
2319 table_element_list_.emplace_back(std::move(col_def));
2320 }
else if (
json_str(element[
"type"]) ==
"SQL_COLUMN_CONSTRAINT") {
2321 CHECK(element.HasMember(
"name"));
2322 if (
json_str(element[
"name"]) ==
"SHARD_KEY") {
2323 CHECK(element.HasMember(
"columns"));
2324 CHECK(element[
"columns"].IsArray());
2325 const auto& columns = element[
"columns"].GetArray();
2326 if (columns.Size() != size_t(1)) {
2327 throw std::runtime_error(
"Only one shard column is currently supported.");
2329 auto shard_key_def = std::make_unique<ShardKeyDef>(
json_str(columns[0]));
2330 table_element_list_.emplace_back(std::move(shard_key_def));
2331 }
else if (
json_str(element[
"name"]) ==
"SHARED_DICT") {
2332 CHECK(element.HasMember(
"columns"));
2333 CHECK(element[
"columns"].IsArray());
2334 const auto& columns = element[
"columns"].GetArray();
2335 if (columns.Size() != size_t(1)) {
2336 throw std::runtime_error(
2337 R
"(Only one column per shared dictionary entry is currently supported. Use multiple SHARED DICT statements to share dictionaries from multiple columns.)");
2339 CHECK(element.HasMember("references") && element[
"references"].IsObject());
2340 const auto& references = element[
"references"].GetObject();
2341 std::string references_table_name;
2342 if (references.HasMember(
"table")) {
2343 references_table_name =
json_str(references[
"table"]);
2345 references_table_name = *table_;
2347 CHECK(references.HasMember(
"column"));
2349 auto shared_dict_def = std::make_unique<SharedDictionaryDef>(
2350 json_str(columns[0]), references_table_name,
json_str(references[
"column"]));
2351 table_element_list_.emplace_back(std::move(shared_dict_def));
2354 LOG(
FATAL) <<
"Unsupported type for SQL_COLUMN_CONSTRAINT: "
2358 LOG(
FATAL) <<
"Unsupported element type for CREATE TABLE: "
2359 << element[
"type"].GetString();
2363 CHECK(payload.HasMember(
"options"));
2364 if (payload[
"options"].IsObject()) {
2365 for (
const auto& option : payload[
"options"].GetObject()) {
2366 auto option_name = std::make_unique<std::string>(
json_str(option.name));
2367 std::unique_ptr<Literal> literal_value;
2368 if (option.value.IsString()) {
2369 auto literal_string = std::make_unique<std::string>(
json_str(option.value));
2370 literal_value = std::make_unique<StringLiteral>(literal_string.release());
2371 }
else if (option.value.IsInt() || option.value.IsInt64()) {
2372 literal_value = std::make_unique<IntLiteral>(
json_i64(option.value));
2373 }
else if (option.value.IsNull()) {
2374 literal_value = std::make_unique<NullLiteral>();
2376 throw std::runtime_error(
"Unable to handle literal for " + *option_name);
2378 CHECK(literal_value);
2380 storage_options_.emplace_back(std::make_unique<NameValueAssign>(
2381 option_name.release(), literal_value.release()));
2384 CHECK(payload[
"options"].IsNull());
2390 std::list<ColumnDescriptor>& columns,
2391 std::vector<SharedDictionaryDef>& shared_dict_defs) {
2392 std::unordered_set<std::string> uc_col_names;
2395 for (
auto& e : table_element_list_) {
2396 if (dynamic_cast<SharedDictionaryDef*>(e.get())) {
2399 this, shared_dict_def, columns, shared_dict_defs, catalog);
2400 shared_dict_defs.push_back(*shared_dict_def);
2403 if (dynamic_cast<ShardKeyDef*>(e.get())) {
2404 if (shard_key_def) {
2405 throw std::runtime_error(
"Specified more than one shard key");
2407 shard_key_def =
static_cast<const ShardKeyDef*
>(e.get());
2410 if (!dynamic_cast<ColumnDef*>(e.get())) {
2411 throw std::runtime_error(
"Table constraints are not supported yet.");
2417 setColumnDescriptor(cd, coldef);
2418 columns.push_back(cd);
2423 if (shard_key_def) {
2426 throw std::runtime_error(
"Specified shard column " + shard_key_def->get_column() +
2430 if (is_temporary_) {
2435 if (!storage_options_.empty()) {
2436 for (
auto& p : storage_options_) {
2441 throw std::runtime_error(
"SHARD_COUNT needs to be specified with SHARD_KEY.");
2449 const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
2456 throw std::runtime_error(
"Table " + *table_ +
2457 " will not be created. User has no create privileges.");
2460 if (!catalog.validateNonExistentTableOrView(*table_, if_not_exists_)) {
2465 std::list<ColumnDescriptor> columns;
2466 std::vector<SharedDictionaryDef> shared_dict_defs;
2468 executeDryRun(session, td, columns, shared_dict_defs);
2471 catalog.createShardedTable(td, columns, shared_dict_defs);
2474 SysCatalog::instance().createDBObject(
2481 const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
2488 throw std::runtime_error(
"Table " + *table_ +
2489 " will not be created. User has no create privileges.");
2492 if (catalog.getMetadataForTable(*table_) !=
nullptr) {
2493 throw std::runtime_error(
"Table " + *table_ +
" already exists.");
2496 std::list<ColumnDescriptor> columns;
2497 std::vector<SharedDictionaryDef> shared_dict_defs;
2499 std::unordered_set<std::string> uc_col_names;
2500 for (
auto& e : table_element_list_) {
2501 if (dynamic_cast<SharedDictionaryDef*>(e.get())) {
2504 this, shared_dict_def, columns, shared_dict_defs, catalog);
2505 shared_dict_defs.push_back(*shared_dict_def);
2508 if (!dynamic_cast<ColumnDef*>(e.get())) {
2509 throw std::runtime_error(
"Table constraints are not supported yet.");
2514 const auto uc_col_name = boost::to_upper_copy<std::string>(cd.
columnName);
2515 const auto it_ok = uc_col_names.insert(uc_col_name);
2516 if (!it_ok.second) {
2517 throw std::runtime_error(
"Column '" + cd.
columnName +
"' defined more than once");
2520 setColumnDescriptor(cd, coldef);
2521 columns.push_back(cd);
2534 if (!storage_options_.empty()) {
2535 for (
auto& p : storage_options_) {
2543 catalog.createShardedTable(df_td, columns, shared_dict_defs);
2546 SysCatalog::instance().createDBObject(
2551 const std::string select_stmt,
2552 std::vector<TargetMetaInfo>& targets,
2553 bool validate_only =
false,
2554 std::vector<size_t> outer_fragment_indices = {}) {
2556 auto& catalog = session->getCatalog();
2560 const auto device_type = session->get_executor_device_type();
2564 auto calcite_mgr = catalog.getCalciteMgr();
2568 const auto query_ra =
2570 ->process(query_state_proxy,
pg_shim(select_stmt), {},
true,
false,
false,
true)
2574 const auto& query_hints = ra_executor.getParsedQueryHints();
2575 if (query_hints.cpu_mode) {
2596 outer_fragment_indices};
2605 result = ra_executor.executeRelAlgQuery(co, eo,
false,
nullptr);
2606 targets =
result.getTargetsMeta();
2612 std::string& sql_query_string) {
2614 auto& catalog = session->getCatalog();
2618 const auto device_type = session->get_executor_device_type();
2622 auto calcite_mgr = catalog.getCalciteMgr();
2626 const auto query_ra =
2629 query_state_proxy,
pg_shim(sql_query_string), {},
true,
false,
false,
true)
2640 false,
true,
false,
true,
false,
false,
false,
false, 10000,
false,
false, 0.9};
2641 return ra_executor.getOuterFragmentCount(co, eo);
2645 std::string& sql_query_string,
2646 std::vector<size_t> outer_frag_indices,
2647 bool validate_only) {
2650 std::string pg_shimmed_select_query =
pg_shim(sql_query_string);
2652 std::vector<TargetMetaInfo> target_metainfos;
2657 outer_frag_indices);
2662 std::vector<AggregatedResult> LocalConnector::query(
2664 std::string& sql_query_string,
2665 std::vector<size_t> outer_frag_indices) {
2666 auto res = query(query_state_proxy, sql_query_string, outer_frag_indices,
false);
2671 const size_t leaf_idx,
2673 CHECK(leaf_idx == 0);
2676 ChunkKey chunkKey = {catalog.getCurrentDB().dbId, created_td->
tableId};
2679 created_td->fragmenter->insertDataNoCheckpoint(insert_data);
2692 auto table_epochs = catalog.getTableEpochs(db_id, table_id);
2693 catalog.setTableEpochs(db_id, table_epochs);
2698 std::list<ColumnDescriptor> column_descriptors;
2699 std::list<ColumnDescriptor> column_descriptors_for_create;
2701 int rowid_suffix = 0;
2702 for (
const auto& target_metainfo : result.
targets_meta) {
2704 cd.
columnName = target_metainfo.get_resname();
2708 cd.
columnType = target_metainfo.get_physical_type_info();
2728 column_descriptors_for_create.push_back(cd_for_create);
2729 column_descriptors.push_back(cd);
2733 return column_descriptors_for_create;
2736 return column_descriptors;
2741 bool validate_table) {
2743 auto& catalog = session->getCatalog();
2747 bool populate_table =
false;
2749 if (leafs_connector_) {
2750 populate_table =
true;
2752 leafs_connector_ = &local_connector;
2754 populate_table =
true;
2758 auto get_target_column_descriptors = [
this, &catalog](
const TableDescriptor* td) {
2759 std::vector<const ColumnDescriptor*> target_column_descriptors;
2760 if (column_list_.empty()) {
2761 auto list = catalog.getAllColumnMetadataForTable(td->
tableId,
false,
false,
false);
2762 target_column_descriptors = {std::begin(list), std::end(list)};
2765 for (
auto& c : column_list_) {
2767 if (cd ==
nullptr) {
2768 throw std::runtime_error(
"Column " + *c +
" does not exist.");
2770 target_column_descriptors.push_back(cd);
2774 return target_column_descriptors;
2779 if (validate_table) {
2782 throw std::runtime_error(
"Table " + table_name_ +
" does not exist.");
2785 throw std::runtime_error(
"Insert to views is not supported yet.");
2791 throw std::runtime_error(
"User has no insert privileges on " + table_name_ +
".");
2796 auto result = local_connector.
query(query_state_proxy, select_query_, {},
true);
2799 std::vector<const ColumnDescriptor*> target_column_descriptors =
2800 get_target_column_descriptors(td);
2801 if (catalog.getAllColumnMetadataForTable(td->
tableId,
false,
false,
false).size() !=
2802 target_column_descriptors.size()) {
2803 throw std::runtime_error(
"Insert into a subset of columns is not supported yet.");
2806 if (source_column_descriptors.size() != target_column_descriptors.size()) {
2807 throw std::runtime_error(
"The number of source and target columns does not match.");
2810 for (
int i = 0;
i < source_column_descriptors.size();
i++) {
2812 &(*std::next(source_column_descriptors.begin(),
i));
2816 auto type_cannot_be_cast = [](
const auto& col_type) {
2817 return (col_type.is_time() || col_type.is_geometry() || col_type.is_array() ||
2818 col_type.is_boolean());
2821 if (type_cannot_be_cast(source_cd->
columnType) ||
2822 type_cannot_be_cast(target_cd->
columnType)) {
2823 throw std::runtime_error(
"Source '" + source_cd->
columnName +
" " +
2825 "' and target '" + target_cd->
columnName +
" " +
2827 "' column types do not match.");
2832 throw std::runtime_error(
"Source '" + source_cd->
columnName +
" " +
2834 "' and target '" + target_cd->
columnName +
" " +
2836 "' array column element types do not match.");
2851 throw std::runtime_error(
"Source '" + source_cd->
columnName +
" " +
2853 "' and target '" + target_cd->
columnName +
" " +
2855 "' decimal columns scales do not match.");
2861 throw std::runtime_error(
"Source '" + source_cd->
columnName +
" " +
2863 "' and target '" + target_cd->
columnName +
" " +
2865 "' column types do not match.");
2869 throw std::runtime_error(
"Source '" + source_cd->
columnName +
" " +
2871 "' and target '" + target_cd->
columnName +
" " +
2873 "' columns string encodings do not match.");
2880 throw std::runtime_error(
"Source '" + source_cd->
columnName +
" " +
2882 "' and target '" + target_cd->
columnName +
" " +
2884 "' timestamp column precisions do not match.");
2893 throw std::runtime_error(
"Source '" + source_cd->
columnName +
" " +
2895 "' and target '" + target_cd->
columnName +
" " +
2897 "' column encoding sizes do not match.");
2902 if (!populate_table) {
2906 int64_t total_row_count = 0;
2907 int64_t total_source_query_time_ms = 0;
2908 int64_t total_target_value_translate_time_ms = 0;
2909 int64_t total_data_load_time_ms = 0;
2912 auto target_column_descriptors = get_target_column_descriptors(td);
2914 auto outer_frag_count =
2915 leafs_connector_->getOuterFragmentCount(query_state_proxy, select_query_);
2917 size_t outer_frag_end = outer_frag_count == 0 ? 1 : outer_frag_count;
2920 for (
size_t outer_frag_idx = 0; outer_frag_idx < outer_frag_end; outer_frag_idx++) {
2921 std::vector<size_t> allowed_outer_fragment_indices;
2923 if (outer_frag_count) {
2924 allowed_outer_fragment_indices.push_back(outer_frag_idx);
2928 std::vector<AggregatedResult> query_results = leafs_connector_->query(
2929 query_state_proxy, select_query_, allowed_outer_fragment_indices);
2930 total_source_query_time_ms +=
timer_stop(query_clock_begin);
2932 for (
auto&
res : query_results) {
2933 auto result_rows =
res.rs;
2935 const auto num_rows = result_rows->rowCount();
2937 if (0 == num_rows) {
2941 total_row_count += num_rows;
2943 size_t leaf_count = leafs_connector_->leafCount();
2945 size_t max_number_of_rows_per_package =
2946 std::min(num_rows / leaf_count, 64UL * 1024UL);
2948 size_t start_row = 0;
2949 size_t num_rows_to_process = std::min(num_rows, max_number_of_rows_per_package);
2952 num_rows_to_process = std::max(num_rows_to_process, 1UL);
2954 std::vector<std::unique_ptr<TargetValueConverter>> value_converters;
2958 const int num_worker_threads = std::thread::hardware_concurrency();
2960 std::vector<size_t> thread_start_idx(num_worker_threads),
2961 thread_end_idx(num_worker_threads);
2962 bool can_go_parallel = !result_rows->isTruncated() && num_rows_to_process > 20000;
2964 std::atomic<size_t> row_idx{0};
2966 auto convert_function = [&result_rows,
2969 &num_rows_to_process,
2972 const int num_cols = value_converters.size();
2973 const size_t start = thread_start_idx[
thread_id];
2974 const size_t end = thread_end_idx[
thread_id];
2976 for (idx = start; idx < end; ++idx) {
2977 const auto result_row = result_rows->getRowAtNoTranslations(idx);
2978 if (!result_row.empty()) {
2979 size_t target_row = row_idx.fetch_add(1);
2981 if (target_row >= num_rows_to_process) {
2985 for (
unsigned int col = 0; col < num_cols; col++) {
2986 const auto& mapd_variant = result_row[col];
2987 value_converters[col]->convertToColumnarFormat(target_row, &mapd_variant);
2995 auto single_threaded_convert_function = [&result_rows,
2998 &num_rows_to_process,
3001 const int num_cols = value_converters.size();
3002 const size_t start = thread_start_idx[
thread_id];
3003 const size_t end = thread_end_idx[
thread_id];
3005 for (idx = start; idx < end; ++idx) {
3006 size_t target_row = row_idx.fetch_add(1);
3008 if (target_row >= num_rows_to_process) {
3011 const auto result_row = result_rows->getNextRow(
false,
false);
3012 CHECK(!result_row.empty());
3013 for (
unsigned int col = 0; col < num_cols; col++) {
3014 const auto& mapd_variant = result_row[col];
3015 value_converters[col]->convertToColumnarFormat(target_row, &mapd_variant);
3022 if (can_go_parallel) {
3023 const size_t entryCount = result_rows->entryCount();
3026 stride = (entryCount + num_worker_threads - 1) / num_worker_threads;
3027 i < num_worker_threads && start_entry < entryCount;
3028 ++
i, start_entry += stride) {
3029 const auto end_entry = std::min(start_entry + stride, entryCount);
3030 thread_start_idx[
i] = start_entry;
3031 thread_end_idx[
i] = end_entry;
3035 thread_start_idx[0] = 0;
3036 thread_end_idx[0] = result_rows->entryCount();
3039 std::shared_ptr<Executor> executor;
3045 while (start_row < num_rows) {
3046 value_converters.clear();
3049 for (
const auto targetDescriptor : target_column_descriptors) {
3050 auto sourceDataMetaInfo =
res.targets_meta[colNum++];
3053 num_rows_to_process,
3057 targetDescriptor->columnType,
3058 !targetDescriptor->columnType.get_notnull(),
3059 result_rows->getRowSetMemOwner()->getLiteralStringDictProxy(),
3061 ? executor->getStringDictionaryProxy(
3062 sourceDataMetaInfo.get_type_info().get_comp_param(),
3063 result_rows->getRowSetMemOwner(),
3066 auto converter = factory.
create(param);
3067 value_converters.push_back(std::move(converter));
3071 if (can_go_parallel) {
3072 std::vector<std::future<void>> worker_threads;
3073 for (
int i = 0;
i < num_worker_threads; ++
i) {
3074 worker_threads.push_back(
3075 std::async(std::launch::async, convert_function,
i));
3078 for (
auto& child : worker_threads) {
3081 for (
auto& child : worker_threads) {
3086 single_threaded_convert_function(0);
3091 auto finalizer_func =
3092 [](std::unique_ptr<TargetValueConverter>::pointer targetValueConverter) {
3093 targetValueConverter->finalizeDataBlocksForInsertData();
3095 std::vector<std::future<void>> worker_threads;
3096 for (
auto& converterPtr : value_converters) {
3097 worker_threads.push_back(
3098 std::async(std::launch::async, finalizer_func, converterPtr.get()));
3101 for (
auto& child : worker_threads) {
3104 for (
auto& child : worker_threads) {
3110 insert_data.
databaseId = catalog.getCurrentDB().dbId;
3113 insert_data.
numRows = num_rows_to_process;
3115 for (
int col_idx = 0; col_idx < target_column_descriptors.size(); col_idx++) {
3116 value_converters[col_idx]->addDataBlocksToInsertData(insert_data);
3118 total_target_value_translate_time_ms +=
timer_stop(translate_clock_begin);
3121 insertDataLoader.
insertData(*session, insert_data);
3122 total_data_load_time_ms +=
timer_stop(data_load_clock_begin);
3124 start_row += num_rows_to_process;
3125 num_rows_to_process =
3126 std::min(num_rows - start_row, max_number_of_rows_per_package);
3132 leafs_connector_->rollback(*session, td->
tableId);
3133 }
catch (std::exception& e) {
3134 LOG(
ERROR) <<
"An error occurred during ITAS rollback attempt. Table id: "
3135 << td->
tableId <<
", Error: " << e.what();
3140 int64_t total_time_ms = total_source_query_time_ms +
3141 total_target_value_translate_time_ms + total_data_load_time_ms;
3143 VLOG(1) <<
"CTAS/ITAS " << total_row_count <<
" rows loaded in " << total_time_ms
3144 <<
"ms (outer_frag_count=" << outer_frag_count
3145 <<
", query_time=" << total_source_query_time_ms
3146 <<
"ms, translation_time=" << total_target_value_translate_time_ms
3147 <<
"ms, data_load_time=" << total_data_load_time_ms
3148 <<
"ms)\nquery: " << select_query_;
3150 if (!is_temporary) {
3151 leafs_connector_->checkpoint(*session, td->
tableId);
3156 auto session_copy = session;
3157 auto session_ptr = std::shared_ptr<Catalog_Namespace::SessionInfo>(
3158 &session_copy, boost::null_deleter());
3160 auto stdlog =
STDLOG(query_state);
3162 auto& catalog = session_ptr->getCatalog();
3164 const auto execute_read_lock = mapd_shared_lock<mapd_shared_mutex>(
3169 std::vector<std::string>
tables;
3172 auto calcite_mgr = catalog.getCalciteMgr();
3176 const auto result = calcite_mgr->process(query_state->createQueryStateProxy(),
3184 tables.insert(tables.end(),
3185 result.resolved_accessed_objects.tables_selected_from.begin(),
3186 result.resolved_accessed_objects.tables_selected_from.end());
3187 tables.emplace_back(table_name_);
3193 [&catalog](
const std::string& a,
const std::string& b) {
3194 return catalog.getMetadataForTable(a,
false)->tableId <
3195 catalog.getMetadataForTable(b,
false)->tableId;
3198 tables.erase(unique(tables.begin(), tables.end()), tables.end());
3199 for (
const auto& table : tables) {
3204 if (table == table_name_) {
3210 catalog.getDatabaseId(), (*locks.back())())));
3215 catalog.getDatabaseId(), (*locks.back())())));
3221 populateData(query_state->createQueryStateProxy(), td,
true);
3225 auto session_copy = session;
3226 auto session_ptr = std::shared_ptr<Catalog_Namespace::SessionInfo>(
3227 &session_copy, boost::null_deleter());
3229 auto stdlog =
STDLOG(query_state);
3235 std::set<std::string> select_tables;
3237 const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
3244 throw std::runtime_error(
"CTAS failed. Table " + table_name_ +
3245 " will not be created. User has no create privileges.");
3248 if (catalog.getMetadataForTable(table_name_) !=
nullptr) {
3249 if (if_not_exists_) {
3252 throw std::runtime_error(
"Table " + table_name_ +
3253 " already exists and no data was loaded.");
3257 auto calcite_mgr = catalog.getCalciteMgr();
3261 const auto result = calcite_mgr->process(query_state->createQueryStateProxy(),
3269 select_tables.insert(
result.resolved_accessed_objects.tables_selected_from.begin(),
3270 result.resolved_accessed_objects.tables_selected_from.end());
3275 auto validate_result = local_connector.
query(
3276 query_state->createQueryStateProxy(), select_query_, {},
true);
3278 const auto column_descriptors_for_create =
3282 for (
auto& cd : column_descriptors_for_create) {
3283 if (cd.columnType.is_decimal() && cd.columnType.get_precision() > 18) {
3284 throw std::runtime_error(cd.columnName +
": Precision too high, max 18.");
3291 td.
nColumns = column_descriptors_for_create.size();
3300 if (is_temporary_) {
3306 bool use_shared_dictionaries =
true;
3308 if (!storage_options_.empty()) {
3309 for (
auto& p : storage_options_) {
3310 if (boost::to_lower_copy<std::string>(*p->get_name()) ==
3311 "use_shared_dictionaries") {
3314 if (
nullptr == literal) {
3315 throw std::runtime_error(
3316 "USE_SHARED_DICTIONARIES must be a string parameter");
3318 std::string val = boost::to_lower_copy<std::string>(*literal->
get_stringval());
3319 use_shared_dictionaries = val ==
"true" || val ==
"1" || val ==
"t";
3326 std::vector<SharedDictionaryDef> sharedDictionaryRefs;
3328 if (use_shared_dictionaries) {
3329 const auto source_column_descriptors =
3331 const auto mapping = catalog.getDictionaryToColumnMapping();
3333 for (
auto& source_cd : source_column_descriptors) {
3334 const auto& ti = source_cd.columnType;
3335 if (ti.is_string()) {
3337 int dict_id = ti.get_comp_param();
3338 auto it = mapping.find(dict_id);
3339 if (mapping.end() != it) {
3340 const auto targetColumn = it->second;
3342 catalog.getMetadataForTable(targetColumn->tableId,
false);
3344 LOG(
INFO) <<
"CTAS: sharing text dictionary on column "
3345 << source_cd.columnName <<
" with " << targetTable->tableName
3346 <<
"." << targetColumn->columnName;
3347 sharedDictionaryRefs.push_back(
3349 targetTable->tableName,
3350 targetColumn->columnName));
3360 catalog.createTable(td, column_descriptors_for_create, sharedDictionaryRefs,
true);
3363 SysCatalog::instance().createDBObject(
3369 const auto execute_read_lock = mapd_shared_lock<mapd_shared_mutex>(
3374 std::vector<std::string>
tables;
3375 tables.insert(tables.end(), select_tables.begin(), select_tables.end());
3376 CHECK_EQ(tables.size(), select_tables.size());
3377 tables.emplace_back(table_name_);
3382 [&catalog](
const std::string& a,
const std::string& b) {
3383 return catalog.getMetadataForTable(a,
false)->tableId <
3384 catalog.getMetadataForTable(b,
false)->tableId;
3386 tables.erase(unique(tables.begin(), tables.end()), tables.end());
3387 for (
const auto& table : tables) {
3392 if (table == table_name_) {
3398 catalog.getDatabaseId(), (*locks.back())())));
3403 catalog.getDatabaseId(), (*locks.back())())));
3410 populateData(query_state->createQueryStateProxy(), td,
false);
3413 const TableDescriptor* created_td = catalog.getMetadataForTable(table_name_);
3415 catalog.dropTable(created_td);
3422 DropTableStmt::DropTableStmt(
const rapidjson::Value& payload) {
3423 CHECK(payload.HasMember(
"tableName"));
3424 table = std::make_unique<std::string>(
json_str(payload[
"tableName"]));
3427 if (payload.HasMember(
"ifExists")) {
3428 if_exists =
json_bool(payload[
"ifExists"]);
3436 const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
3441 std::unique_ptr<lockmgr::TableSchemaLockContainer<lockmgr::WriteLock>> td_with_lock;
3445 std::make_unique<lockmgr::TableSchemaLockContainer<lockmgr::WriteLock>>(
3447 catalog, *table,
false));
3448 td = (*td_with_lock)();
3449 }
catch (
const std::runtime_error& e) {
3458 CHECK(td_with_lock);
3463 throw std::runtime_error(
"Table " + *table +
3464 " will not be dropped. User has no proper privileges.");
3469 auto table_data_write_lock =
3471 catalog.dropTable(td);
3482 const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
3486 const auto td_with_lock =
3488 catalog, *table,
true);
3489 const auto td = td_with_lock();
3491 throw std::runtime_error(
"Table " + *table +
" does not exist.");
3495 std::vector<DBObject> privObjects;
3499 privObjects.push_back(dbObject);
3500 if (!SysCatalog::instance().checkPrivileges(session.
get_currentUser(), privObjects)) {
3501 throw std::runtime_error(
"Table " + *table +
" will not be truncated. User " +
3503 " has no proper privileges.");
3507 throw std::runtime_error(*table +
" is a view. Cannot Truncate.");
3510 auto table_data_write_lock =
3512 catalog.truncateTable(td);
3524 std::vector<DBObject> privObjects;
3528 privObjects.push_back(dbObject);
3529 if (!SysCatalog::instance().checkPrivileges(session.
get_currentUser(), privObjects)) {
3530 throw std::runtime_error(
"Current user does not have the privilege to alter table: " +
3537 throw std::runtime_error(
"Only a super user can rename users.");
3541 if (!SysCatalog::instance().getMetadataForUser(*username_, user)) {
3542 throw std::runtime_error(
"User " + *username_ +
" does not exist.");
3545 SysCatalog::instance().renameUser(*username_, *new_username_);
3552 const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
3556 if (!SysCatalog::instance().getMetadataForDB(*database_name_, db)) {
3557 throw std::runtime_error(
"Database " + *database_name_ +
" does not exist.");
3562 throw std::runtime_error(
"Only a super user or the owner can rename the database.");
3565 SysCatalog::instance().renameDatabase(*database_name_, *new_database_name_);
3572 const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
3576 const auto td_with_lock =
3578 catalog, *table,
false);
3579 const auto td = td_with_lock();
3584 if (catalog.getMetadataForTable(*new_table_name) !=
nullptr) {
3585 throw std::runtime_error(
"Table or View " + *new_table_name +
" already exists.");
3587 catalog.renameTable(td, *new_table_name);
3593 if (cc ==
nullptr) {
3609 throw std::runtime_error(
"Table " + *table +
" does not exist.");
3612 throw std::runtime_error(
"Adding columns to a view is not supported.");
3616 throw std::runtime_error(
3617 "Adding columns to temporary tables is not yet supported.");
3623 if (0 == coldefs.size()) {
3624 coldefs.push_back(std::move(coldef));
3627 for (
const auto& coldef : coldefs) {
3628 auto& new_column_name = *coldef->get_column_name();
3629 if (catalog.getMetadataForColumn(td->
tableId, new_column_name) !=
nullptr) {
3630 throw std::runtime_error(
"Column " + new_column_name +
" already exists.");
3639 const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
3643 const auto td_with_lock =
3645 catalog, *table,
true);
3646 const auto td = td_with_lock();
3648 check_executable(session, td);
3651 if (std::dynamic_pointer_cast<Fragmenter_Namespace::SortedOrderFragmenter>(
3653 throw std::runtime_error(
3654 "Adding columns to a table is not supported when using the \"sort_column\" "
3662 catalog.getSqliteConnector().query(
"BEGIN TRANSACTION");
3664 std::map<const std::string, const ColumnDescriptor> cds;
3665 std::map<const int, const ColumnDef*> cid_coldefs;
3666 for (
const auto& coldef : coldefs) {
3668 setColumnDescriptor(cd, coldef.get());
3669 catalog.addColumn(*td, cd);
3670 cds.emplace(*coldef->get_column_name(), cd);
3671 cid_coldefs.emplace(cd.
columnId, coldef.get());
3675 std::list<ColumnDescriptor> phy_geo_columns;
3676 catalog.expandGeoColumn(cd, phy_geo_columns);
3677 for (
auto& cd : phy_geo_columns) {
3678 catalog.addColumn(*td, cd);
3680 cid_coldefs.emplace(cd.
columnId,
nullptr);
3687 loader->setReplicating(
true);
3691 import_buffers.end(),
3692 [](decltype(import_buffers[0])& a, decltype(import_buffers[0])& b) {
3693 return a->getColumnDesc()->columnId < b->getColumnDesc()->columnId;
3699 const auto physical_tds = catalog.getPhysicalTablesDescriptors(td);
3701 std::for_each(physical_tds.begin(), physical_tds.end(), [&nrows](
const auto& td) {
3706 int skip_physical_cols = 0;
3707 for (
const auto cit : cid_coldefs) {
3708 const auto cd = catalog.getMetadataForColumn(td->
tableId, cit.first);
3709 const auto coldef = cit.second;
3710 const auto column_constraint = coldef ? coldef->get_column_constraint() :
nullptr;
3711 std::string defaultval =
"";
3712 if (column_constraint) {
3713 auto defaultlp = column_constraint->get_defaultval();
3714 auto defaultsp =
dynamic_cast<const StringLiteral*
>(defaultlp);
3716 : defaultlp ? defaultlp->to_string() :
"";
3718 bool isnull = column_constraint ? (0 == defaultval.size()) :
true;
3719 if (boost::to_upper_copy<std::string>(defaultval) ==
"NULL") {
3724 if (column_constraint && column_constraint->get_notnull()) {
3725 throw std::runtime_error(
"Default value required for column " +
3726 cd->columnName +
" (NULL value not supported)");
3730 for (
auto it = import_buffers.begin(); it < import_buffers.end(); ++it) {
3731 auto& import_buffer = *it;
3732 if (cd->columnId == import_buffer->getColumnDesc()->columnId) {
3733 if (coldef !=
nullptr ||
3734 skip_physical_cols-- <= 0) {
3735 import_buffer->add_value(
3737 if (cd->columnType.is_geometry()) {
3738 std::vector<double> coords, bounds;
3739 std::vector<int> ring_sizes, poly_rings;
3740 int render_group = 0;
3749 throw std::runtime_error(
"Bad geometry data: '" + defaultval +
"'");
3751 size_t col_idx = 1 + std::distance(import_buffers.begin(), it);
3763 skip_physical_cols = cd->columnType.get_physical_cols();
3772 if (!loader->loadNoCheckpoint(import_buffers, nrows)) {
3773 throw std::runtime_error(
"loadNoCheckpoint failed!");
3776 loader->checkpoint();
3777 catalog.getSqliteConnector().query(
"END TRANSACTION");
3779 catalog.roll(
false);
3780 catalog.getSqliteConnector().query(
"ROLLBACK TRANSACTION");
3789 const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
3793 const auto td_with_lock =
3795 catalog, *table,
true);
3796 const auto td = td_with_lock();
3798 throw std::runtime_error(
"Table " + *table +
" does not exist.");
3802 throw std::runtime_error(
"Dropping a column from a view is not supported.");
3805 throw std::runtime_error(
3806 "Dropping a column from a temporary table is not yet supported.");
3811 for (
const auto& column : columns) {
3812 if (
nullptr == catalog.getMetadataForColumn(td->
tableId, *column)) {
3813 throw std::runtime_error(
"Column " + *column +
" does not exist.");
3818 throw std::runtime_error(
"Table " + *table +
" has only one column.");
3821 catalog.getSqliteConnector().query(
"BEGIN TRANSACTION");
3823 std::vector<int> columnIds;
3824 for (
const auto& column : columns) {
3827 throw std::runtime_error(
"Dropping sharding column " + cd.
columnName +
3828 " is not supported.");
3830 catalog.dropColumn(*td, cd);
3833 const auto pcd = catalog.getMetadataForColumn(td->
tableId, cd.
columnId +
i + 1);
3835 catalog.dropColumn(*td, *pcd);
3836 columnIds.push_back(cd.
columnId +
i + 1);
3840 for (
auto shard : catalog.getPhysicalTablesDescriptors(td)) {
3841 shard->fragmenter->dropColumns(columnIds);
3845 throw std::runtime_error(
"lol!");
3849 catalog.checkpoint(td->
tableId);
3851 catalog.getSqliteConnector().query(
"END TRANSACTION");
3853 catalog.setForReload(td->
tableId);
3854 catalog.roll(
false);
3855 catalog.getSqliteConnector().query(
"ROLLBACK TRANSACTION");
3866 const auto td_with_lock =
3868 catalog, *table,
false);
3869 const auto td = td_with_lock();
3875 if (cd ==
nullptr) {
3876 throw std::runtime_error(
"Column " + *column +
" does not exist.");
3878 if (catalog.getMetadataForColumn(td->
tableId, *new_column_name) !=
nullptr) {
3879 throw std::runtime_error(
"Column " + *new_column_name +
" already exists.");
3881 catalog.renameColumn(td, cd, *new_column_name);
3885 enum TableParamType { MaxRollbackEpochs,
Epoch };
3886 static const std::unordered_map<std::string, TableParamType> param_map = {
3887 {
"max_rollback_epochs", TableParamType::MaxRollbackEpochs},
3888 {
"epoch", TableParamType::Epoch}};
3891 const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
3895 const auto td_with_lock =
3897 catalog, *table,
false);
3898 const auto td = td_with_lock();
3900 throw std::runtime_error(
"Table " + *table +
" does not exist.");
3903 throw std::runtime_error(
"Setting parameters for a view is not supported.");
3906 throw std::runtime_error(
3907 "Setting parameters for a temporary table is not yet supported.");
3912 std::string param_name(*param->get_name());
3915 if (val_int_literal ==
nullptr) {
3916 throw std::runtime_error(
"Table parameters should be integers.");
3918 const int32_t param_val = val_int_literal->
get_intval();
3920 const auto param_it = param_map.find(param_name);
3921 if (param_it == param_map.end()) {
3922 throw std::runtime_error(param_name +
" is not a settable table parameter.");
3924 switch (param_it->second) {
3925 case MaxRollbackEpochs: {
3926 catalog.setMaxRollbackEpochs(td->
tableId, param_val);
3930 catalog.setTableEpoch(catalog.getDatabaseId(), td->
tableId, param_val);
3938 const std::string& file_path,
3940 return std::make_unique<import_export::Importer>(catalog, td, file_path, copy_params);
3942 return execute(session, importer_factory);
3946 const std::function<std::unique_ptr<import_export::Importer>(
3951 boost::regex non_local_file_regex{R
"(^\s*(s3|http|https)://.+)",
3952 boost::regex::extended | boost::regex::icase};
3953 if (!boost::regex_match(*file_pattern, non_local_file_regex)) {
3958 size_t rows_completed = 0;
3960 size_t total_time = 0;
3961 bool load_truncated =
false;
3964 const auto execute_read_lock = mapd_shared_lock<mapd_shared_mutex>(
3969 std::unique_ptr<lockmgr::TableSchemaLockContainer<lockmgr::ReadLock>> td_with_lock;
3970 std::unique_ptr<lockmgr::WriteLock> insert_data_lock;
3975 td_with_lock = std::make_unique<lockmgr::TableSchemaLockContainer<lockmgr::ReadLock>>(
3978 td = (*td_with_lock)();
3979 insert_data_lock = std::make_unique<lockmgr::WriteLock>(
3981 }
catch (
const std::runtime_error& e) {
3989 std::vector<DBObject> privObjects;
3993 privObjects.push_back(dbObject);
3994 if (!SysCatalog::instance().checkPrivileges(session.
get_currentUser(), privObjects)) {
3995 throw std::runtime_error(
"Violation of access privileges: user " +
3997 " has no insert privileges for table " + *table +
".");
4005 std::string file_path = *file_pattern;
4007 if (!options.empty()) {
4008 for (
auto& p : options) {
4009 if (boost::iequals(*p->get_name(),
"max_reject")) {
4011 if (int_literal ==
nullptr) {
4012 throw std::runtime_error(
"max_reject option must be an integer.");
4015 }
else if (boost::iequals(*p->get_name(),
"buffer_size")) {
4017 if (int_literal ==
nullptr) {
4018 throw std::runtime_error(
"buffer_size option must be an integer.");
4021 }
else if (boost::iequals(*p->get_name(),
"threads")) {
4023 if (int_literal ==
nullptr) {
4024 throw std::runtime_error(
"Threads option must be an integer.");
4027 }
else if (boost::iequals(*p->get_name(),
"delimiter")) {
4030 if (str_literal ==
nullptr) {
4031 throw std::runtime_error(
"Delimiter option must be a string.");
4033 throw std::runtime_error(
"Delimiter must be a single character string.");
4036 }
else if (boost::iequals(*p->get_name(),
"nulls")) {
4039 if (str_literal ==
nullptr) {
4040 throw std::runtime_error(
"Nulls option must be a string.");
4043 }
else if (boost::iequals(*p->get_name(),
"header")) {
4046 if (str_literal ==
nullptr) {
4047 throw std::runtime_error(
"Header option must be a boolean.");
4052 #ifdef ENABLE_IMPORT_PARQUET
4053 }
else if (boost::iequals(*p->get_name(),
"parquet")) {
4056 if (str_literal ==
nullptr) {
4057 throw std::runtime_error(
"Parquet option must be a boolean.");
4062 copy_params.
file_type = import_export::FileType::PARQUET;
4064 #endif // ENABLE_IMPORT_PARQUET
4065 }
else if (boost::iequals(*p->get_name(),
"s3_access_key")) {
4068 if (str_literal ==
nullptr) {
4069 throw std::runtime_error(
"Option s3_access_key must be a string.");
4072 }
else if (boost::iequals(*p->get_name(),
"s3_secret_key")) {
4075 if (str_literal ==
nullptr) {
4076 throw std::runtime_error(
"Option s3_secret_key must be a string.");
4079 }
else if (boost::iequals(*p->get_name(),
"s3_region")) {
4082 if (str_literal ==
nullptr) {
4083 throw std::runtime_error(
"Option s3_region must be a string.");
4086 }
else if (boost::iequals(*p->get_name(),
"s3_endpoint")) {
4089 if (str_literal ==
nullptr) {
4090 throw std::runtime_error(
"Option s3_endpoint must be a string.");
4093 }
else if (boost::iequals(*p->get_name(),
"quote")) {
4096 if (str_literal ==
nullptr) {
4097 throw std::runtime_error(
"Quote option must be a string.");
4099 throw std::runtime_error(
"Quote must be a single character string.");
4102 }
else if (boost::iequals(*p->get_name(),
"escape")) {
4105 if (str_literal ==
nullptr) {
4106 throw std::runtime_error(
"Escape option must be a string.");
4108 throw std::runtime_error(
"Escape must be a single character string.");
4111 }
else if (boost::iequals(*p->get_name(),
"line_delimiter")) {
4114 if (str_literal ==
nullptr) {
4115 throw std::runtime_error(
"Line_delimiter option must be a string.");
4117 throw std::runtime_error(
"Line_delimiter must be a single character string.");
4120 }
else if (boost::iequals(*p->get_name(),
"quoted")) {
4123 if (str_literal ==
nullptr) {
4124 throw std::runtime_error(
"Quoted option must be a boolean.");
4127 }
else if (boost::iequals(*p->get_name(),
"plain_text")) {
4130 if (str_literal ==
nullptr) {
4131 throw std::runtime_error(
"plain_text option must be a boolean.");
4134 }
else if (boost::iequals(*p->get_name(),
"array_marker")) {
4137 if (str_literal ==
nullptr) {
4138 throw std::runtime_error(
"Array Marker option must be a string.");
4140 throw std::runtime_error(
4141 "Array Marker option must be exactly two characters. Default is {}.");
4145 }
else if (boost::iequals(*p->get_name(),
"array_delimiter")) {
4148 if (str_literal ==
nullptr) {
4149 throw std::runtime_error(
"Array Delimiter option must be a string.");
4151 throw std::runtime_error(
"Array Delimiter must be a single character string.");
4154 }
else if (boost::iequals(*p->get_name(),
"lonlat")) {
4157 if (str_literal ==
nullptr) {
4158 throw std::runtime_error(
"Lonlat option must be a boolean.");
4161 }
else if (boost::iequals(*p->get_name(),
"geo")) {
4164 if (str_literal ==
nullptr) {
4165 throw std::runtime_error(
"Geo option must be a boolean.");
4170 }
else if (boost::iequals(*p->get_name(),
"geo_coords_type")) {
4173 if (str_literal ==
nullptr) {
4174 throw std::runtime_error(
"'geo_coords_type' option must be a string");
4177 if (boost::iequals(*s,
"geography")) {
4178 throw std::runtime_error(
4179 "GEOGRAPHY coords type not yet supported. Please use GEOMETRY.");
4181 }
else if (boost::iequals(*s,
"geometry")) {
4184 throw std::runtime_error(
4185 "Invalid string for 'geo_coords_type' option (must be 'GEOGRAPHY' or "
4189 }
else if (boost::iequals(*p->get_name(),
"geo_coords_encoding")) {
4192 if (str_literal ==
nullptr) {
4193 throw std::runtime_error(
"'geo_coords_encoding' option must be a string");
4196 if (boost::iequals(*s,
"none")) {
4199 }
else if (boost::iequals(*s,
"compressed(32)")) {
4203 throw std::runtime_error(
4204 "Invalid string for 'geo_coords_encoding' option (must be 'NONE' or "
4205 "'COMPRESSED(32)'): " +
4208 }
else if (boost::iequals(*p->get_name(),
"geo_coords_srid")) {
4210 if (int_literal ==
nullptr) {
4211 throw std::runtime_error(
"'geo_coords_srid' option must be an integer");
4214 if (srid == 4326 || srid == 3857 || srid == 900913) {
4217 throw std::runtime_error(
4218 "Invalid value for 'geo_coords_srid' option (must be 4326, 3857, or "
4222 }
else if (boost::iequals(*p->get_name(),
"geo_layer_name")) {
4225 if (str_literal ==
nullptr) {
4226 throw std::runtime_error(
"'geo_layer_name' option must be a string");
4228 const std::string* layer_name = str_literal->
get_stringval();
4232 throw std::runtime_error(
"Invalid value for 'geo_layer_name' option");
4234 }
else if (boost::iequals(*p->get_name(),
"partitions")) {
4236 const auto partitions =
4237 static_cast<const StringLiteral*
>(p->get_value())->get_stringval();
4239 const auto partitions_uc = boost::to_upper_copy<std::string>(*partitions);
4240 if (partitions_uc !=
"REPLICATED") {
4241 throw std::runtime_error(
"PARTITIONS must be REPLICATED for geo COPY");
4243 _geo_copy_from_partitions = partitions_uc;
4245 throw std::runtime_error(
"PARTITIONS option not supported for non-geo COPY: " +
4248 }
else if (boost::iequals(*p->get_name(),
"geo_assign_render_groups")) {
4251 if (str_literal ==
nullptr) {
4252 throw std::runtime_error(
"geo_assign_render_groups option must be a boolean.");
4255 }
else if (boost::iequals(*p->get_name(),
"geo_explode_collections")) {
4258 if (str_literal ==
nullptr) {
4259 throw std::runtime_error(
"geo_explode_collections option must be a boolean.");
4262 }
else if (boost::iequals(*p->get_name(),
"source_srid")) {
4264 if (int_literal ==
nullptr) {
4265 throw std::runtime_error(
"'source_srid' option must be an integer");
4271 throw std::runtime_error(
4272 "'source_srid' option can only be used on csv/tsv files");
4275 throw std::runtime_error(
"Invalid option for COPY: " + *p->get_name());
4285 _geo_copy_from_file_name = file_path;
4286 _geo_copy_from_copy_params = copy_params;
4287 _was_geo_copy_from =
true;
4293 tr = std::string(
"Appending geo to table '") + *table + std::string(
"'...");
4295 tr = std::string(
"Creating table '") + *table +
4296 std::string(
"' and importing geo...");
4300 CHECK(td_with_lock);
4303 auto importer = importer_factory(catalog, td, file_path, copy_params);
4305 auto res = importer->import();
4306 rows_completed +=
res.rows_completed;
4307 rows_rejected +=
res.rows_rejected;
4308 load_truncated =
res.load_truncated;
4313 if (load_truncated || rows_rejected > copy_params.
max_reject) {
4314 LOG(
ERROR) <<
"COPY exited early due to reject records count during multi file "
4317 load_truncated =
true;
4320 if (!load_truncated) {
4326 tr = std::string(
"Loader truncated due to reject count. Processed : " +
4332 throw std::runtime_error(
"Table '" + *table +
"' must exist before COPY FROM");
4336 return_message.reset(
new std::string(tr));
4343 if (!currentUser.isSuper) {
4344 throw std::runtime_error(
"CREATE ROLE " + get_role() +
4345 " failed. It can only be executed by super user.");
4347 SysCatalog::instance().createRole(get_role());
4353 if (!currentUser.isSuper) {
4354 throw std::runtime_error(
"DROP ROLE " + get_role() +
4355 " failed. It can only be executed by super user.");
4357 auto* rl = SysCatalog::instance().getRoleGrantee(get_role());
4359 throw std::runtime_error(
"DROP ROLE " + get_role() +
4360 " failed because role with this name does not exist.");
4362 SysCatalog::instance().dropRole(get_role());
4366 std::vector<std::string> componentNames;
4367 boost::split(componentNames, hierName, boost::is_any_of(
"."));
4368 return componentNames;
4372 const std::string& objectType,
4374 std::string objectName;
4376 if (objectType.compare(
"DATABASE") == 0) {
4377 if (componentNames.size() == 1) {
4378 objectName = componentNames[0];
4380 throw std::runtime_error(
"DB object name is not correct " + objectHierName);
4383 if (objectType.compare(
"TABLE") == 0 || objectType.compare(
"DASHBOARD") == 0 ||
4384 objectType.compare(
"VIEW") == 0 || objectType.compare(
"SERVER") == 0) {
4385 switch (componentNames.size()) {
4387 objectName = componentNames[0];
4391 objectName = componentNames[1];
4395 throw std::runtime_error(
"DB object name is not correct " + objectHierName);
4399 throw std::runtime_error(
"DB object type " + objectType +
" is not supported.");
4406 const std::string& privs,
4408 const std::string& object_name) {
4409 static const std::map<std::pair<const std::string, const DBObjectType>,
4410 std::pair<const AccessPrivileges, const DBObjectType>>
4496 auto result = privileges_lookup.find(std::make_pair(privs, objectType));
4497 if (
result == privileges_lookup.end()) {
4498 throw std::runtime_error(
"Privileges " + privs +
" on DB object " + object_name +
4499 " are not correct.");
4506 int32_t dashboard_id = -1;
4507 if (!objectName.empty()) {
4509 dashboard_id = stoi(objectName);
4510 }
catch (
const std::exception&) {
4511 throw std::runtime_error(
4512 "Privileges on dashboards should be changed via integer dashboard ID");
4515 return DBObject(dashboard_id, objectType);
4517 return DBObject(objectName, objectType);
4525 const auto parserObjectType = boost::to_upper_copy<std::string>(get_object_type());
4526 const auto objectName =
4530 throw std::runtime_error(
"GRANT failed. SERVER object unrecognized.");
4534 if (!currentUser.isSuper) {
4535 if (!SysCatalog::instance().verifyDBObjectOwnership(currentUser, dbObject, catalog)) {
4536 throw std::runtime_error(
4537 "GRANT failed. It can only be executed by super user or owner of the "
4542 std::vector<DBObject> objects(get_privs().size(), dbObject);
4543 for (
size_t i = 0;
i < get_privs().size(); ++
i) {
4545 boost::to_upper_copy<std::string>(get_privs()[
i]), objectType, get_object());
4546 objects[
i].setPrivileges(priv.first);
4547 objects[
i].setPermissionType(priv.second);
4549 throw std::runtime_error(
"GRANT failed. SERVER object unrecognized.");
4552 SysCatalog::instance().grantDBObjectPrivilegesBatch(grantees, objects, catalog);
4559 const auto parserObjectType = boost::to_upper_copy<std::string>(get_object_type());
4560 const auto objectName =
4564 throw std::runtime_error(
"REVOKE failed. SERVER object unrecognized.");
4568 if (!currentUser.isSuper) {
4569 if (!SysCatalog::instance().verifyDBObjectOwnership(currentUser, dbObject, catalog)) {
4570 throw std::runtime_error(
4571 "REVOKE failed. It can only be executed by super user or owner of the "
4576 std::vector<DBObject> objects(get_privs().size(), dbObject);
4577 for (
size_t i = 0;
i < get_privs().size(); ++
i) {
4579 boost::to_upper_copy<std::string>(get_privs()[
i]), objectType, get_object());
4580 objects[
i].setPrivileges(priv.first);
4581 objects[
i].setPermissionType(priv.second);
4583 throw std::runtime_error(
"REVOKE failed. SERVER object unrecognized.");
4586 SysCatalog::instance().revokeDBObjectPrivilegesBatch(grantees, objects, catalog);
4594 const auto parserObjectType = boost::to_upper_copy<std::string>(get_object_type());
4595 const auto objectName =
4600 if (!currentUser.isSuper) {
4601 if (!SysCatalog::instance().verifyDBObjectOwnership(currentUser, dbObject, catalog)) {
4602 throw std::runtime_error(
4603 "SHOW ON " + get_object() +
" FOR " + get_role() +
4604 " failed. It can only be executed by super user or owner of the object.");
4608 SysCatalog::instance().getDBObjectPrivileges(get_role(), dbObject, catalog);
4610 printf(
"\nPRIVILEGES ON %s FOR %s ARE SET AS FOLLOWING: ",
4611 get_object().c_str(),
4612 get_role().c_str());
4641 printf(
" TRUNCATE");
4685 if (!currentUser.isSuper) {
4686 throw std::runtime_error(
4687 "GRANT failed, because it can only be executed by super user.");
4689 if (std::find(get_grantees().begin(), get_grantees().end(),
OMNISCI_ROOT_USER) !=
4690 get_grantees().end()) {
4691 throw std::runtime_error(
4692 "Request to grant role failed because mapd root user has all privileges by "
4695 SysCatalog::instance().grantRoleBatch(get_roles(), get_grantees());
4701 if (!currentUser.isSuper) {
4702 throw std::runtime_error(
4703 "REVOKE failed, because it can only be executed by super user.");
4705 if (std::find(get_grantees().begin(), get_grantees().end(),
OMNISCI_ROOT_USER) !=
4706 get_grantees().end()) {
4707 throw std::runtime_error(
4708 "Request to revoke role failed because privileges can not be revoked from mapd "
4711 SysCatalog::instance().revokeRoleBatch(get_roles(), get_grantees());
4715 using namespace Catalog_Namespace;
4717 const auto execute_read_lock = mapd_shared_lock<mapd_shared_mutex>(
4722 const TableDescriptor* td = catalog.getMetadataForTable(*table_,
false);
4724 throw std::runtime_error(
"Table/View " + *table_ +
" does not exist.");
4729 std::vector<DBObject> privObjects = {dbObject};
4731 if (!SysCatalog::instance().hasAnyPrivileges(session.
get_currentUser(), privObjects)) {
4732 throw std::runtime_error(
"Table/View " + *table_ +
" does not exist.");
4738 throw std::runtime_error(
"SHOW CREATE TABLE not yet supported for views");
4741 create_stmt_ = catalog.dumpCreateTable(td);
4745 auto session_copy = session;
4746 auto session_ptr = std::shared_ptr<Catalog_Namespace::SessionInfo>(
4747 &session_copy, boost::null_deleter());
4749 auto stdlog =
STDLOG(query_state);
4750 auto query_state_proxy = query_state->createQueryStateProxy();
4756 if (!leafs_connector_) {
4757 leafs_connector_ = &local_connector;
4764 std::string layer_name;
4770 parseOptions(copy_params, file_type, layer_name, file_compression, array_null_handling);
4772 if (file_path->empty()) {
4773 throw std::runtime_error(
"Invalid file path for COPY TO");
4774 }
else if (!boost::filesystem::path(*file_path).is_absolute()) {
4775 std::string file_name = boost::filesystem::path(*file_path).filename().string();
4776 std::string file_dir =
4777 catalog.getBasePath() +
"/mapd_export/" + session.
get_session_id() +
"/";
4778 if (!boost::filesystem::exists(file_dir)) {
4779 if (!boost::filesystem::create_directories(file_dir)) {
4780 throw std::runtime_error(
"Directory " + file_dir +
" cannot be created.");
4783 *file_path = file_dir + file_name;
4792 auto column_info_result =
4793 local_connector.
query(query_state_proxy, *select_stmt, {},
true);
4799 if (layer_name.size() == 0) {
4800 layer_name = boost::filesystem::path(*file_path).stem().string();
4804 query_exporter->beginExport(*file_path,
4807 column_info_result.targets_meta,
4809 array_null_handling);
4812 size_t outer_frag_count =
4813 leafs_connector_->getOuterFragmentCount(query_state_proxy, *select_stmt);
4814 size_t outer_frag_end = outer_frag_count == 0 ? 1 : outer_frag_count;
4817 for (
size_t outer_frag_idx = 0; outer_frag_idx < outer_frag_end; outer_frag_idx++) {
4819 std::vector<size_t> allowed_outer_fragment_indices;
4820 if (outer_frag_count) {
4821 allowed_outer_fragment_indices.push_back(outer_frag_idx);
4825 std::vector<AggregatedResult> query_results = leafs_connector_->query(
4826 query_state_proxy, *select_stmt, allowed_outer_fragment_indices);
4829 query_exporter->exportResults(query_results);
4833 query_exporter->endExport();
4836 void ExportQueryStmt::parseOptions(
4839 std::string& layer_name,
4847 if (!options.empty()) {
4848 for (
auto& p : options) {
4849 if (boost::iequals(*p->get_name(),
"delimiter")) {
4852 if (str_literal ==
nullptr) {
4853 throw std::runtime_error(
"Delimiter option must be a string.");
4855 throw std::runtime_error(
"Delimiter must be a single character string.");
4858 }
else if (boost::iequals(*p->get_name(),
"nulls")) {
4861 if (str_literal ==
nullptr) {
4862 throw std::runtime_error(
"Nulls option must be a string.");
4865 }
else if (boost::iequals(*p->get_name(),
"header")) {
4868 if (str_literal ==
nullptr) {
4869 throw std::runtime_error(
"Header option must be a boolean.");
4874 }
else if (boost::iequals(*p->get_name(),
"quote")) {
4877 if (str_literal ==
nullptr) {
4878 throw std::runtime_error(
"Quote option must be a string.");
4880 throw std::runtime_error(
"Quote must be a single character string.");
4883 }
else if (boost::iequals(*p->get_name(),
"escape")) {
4886 if (str_literal ==
nullptr) {
4887 throw std::runtime_error(
"Escape option must be a string.");
4889 throw std::runtime_error(
"Escape must be a single character string.");
4892 }
else if (boost::iequals(*p->get_name(),
"line_delimiter")) {
4895 if (str_literal ==
nullptr) {
4896 throw std::runtime_error(
"Line_delimiter option must be a string.");
4898 throw std::runtime_error(
"Line_delimiter must be a single character string.");
4901 }
else if (boost::iequals(*p->get_name(),
"quoted")) {
4904 if (str_literal ==
nullptr) {
4905 throw std::runtime_error(
"Quoted option must be a boolean.");
4908 }
else if (boost::iequals(*p->get_name(),
"file_type")) {
4911 if (str_literal ==
nullptr) {
4912 throw std::runtime_error(
"File Type option must be a string.");
4914 auto file_type_str =
4915 boost::algorithm::to_lower_copy(*str_literal->
get_stringval());
4916 if (file_type_str ==
"csv") {
4918 }
else if (file_type_str ==
"geojson") {
4920 }
else if (file_type_str ==
"geojsonl") {
4922 }
else if (file_type_str ==
"shapefile") {
4925 throw std::runtime_error(
4926 "File Type option must be 'CSV', 'GeoJSON', 'GeoJSONL' or 'Shapefile'");
4928 }
else if (boost::iequals(*p->get_name(),
"layer_name")) {
4931 if (str_literal ==
nullptr) {
4932 throw std::runtime_error(
"Layer Name option must be a string.");
4935 }
else if (boost::iequals(*p->get_name(),
"file_compression")) {
4938 if (str_literal ==
nullptr) {
4939 throw std::runtime_error(
"File Compression option must be a string.");
4941 auto file_compression_str =
4942 boost::algorithm::to_lower_copy(*str_literal->
get_stringval());
4943 if (file_compression_str ==
"none") {
4945 }
else if (file_compression_str ==
"gzip") {
4947 }
else if (file_compression_str ==
"zip") {
4950 throw std::runtime_error(
4951 "File Compression option must be 'None', 'GZip', or 'Zip'");
4953 }
else if (boost::iequals(*p->get_name(),
"array_null_handling")) {
4956 if (str_literal ==
nullptr) {
4957 throw std::runtime_error(
"Array Null Handling option must be a string.");
4959 auto array_null_handling_str =
4960 boost::algorithm::to_lower_copy(*str_literal->
get_stringval());
4961 if (array_null_handling_str ==
"abort") {
4962 array_null_handling =
4964 }
else if (array_null_handling_str ==
"raw") {
4965 array_null_handling =
4967 }
else if (array_null_handling_str ==
"zero") {
4968 array_null_handling =
4970 }
else if (array_null_handling_str ==
"nullfield") {
4971 array_null_handling =
4974 throw std::runtime_error(
4975 "Array Null Handling option must be 'Abort', 'Raw', 'Zero', or "
4979 throw std::runtime_error(
"Invalid option for COPY: " + *p->get_name());
4985 CreateViewStmt::CreateViewStmt(
const rapidjson::Value& payload) {
4986 CHECK(payload.HasMember(
"name"));
4987 view_name_ =
json_str(payload[
"name"]);
4989 if_not_exists_ =
false;
4990 if (payload.HasMember(
"ifNotExists")) {
4991 if_not_exists_ =
json_bool(payload[
"ifNotExists"]);
4994 CHECK(payload.HasMember(
"query"));
4995 select_query_ =
json_str(payload[
"query"]);
4996 std::regex newline_re(
"\\n");
4997 select_query_ = std::regex_replace(select_query_, newline_re,
" ");
4999 if (select_query_.back() !=
';') {
5000 select_query_.push_back(
';');
5005 auto session_copy = session;
5006 auto session_ptr = std::shared_ptr<Catalog_Namespace::SessionInfo>(
5007 &session_copy, boost::null_deleter());
5009 auto stdlog =
STDLOG(query_state);
5012 if (!catalog.validateNonExistentTableOrView(view_name_, if_not_exists_)) {
5017 throw std::runtime_error(
"View " + view_name_ +
5018 " will not be created. User has no create view privileges.");
5021 const auto query_after_shim =
pg_shim(select_query_);
5024 catalog.getCalciteMgr()->process(query_state->createQueryStateProxy(),
5033 const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
5042 td.viewSQL = query_after_shim;
5043 td.fragmenter =
nullptr;
5051 catalog.createTable(td, {}, {},
true);
5055 SysCatalog::instance().createDBObject(
5059 DropViewStmt::DropViewStmt(
const rapidjson::Value& payload) {
5060 CHECK(payload.HasMember(
"viewName"));
5061 view_name = std::make_unique<std::string>(
json_str(payload[
"viewName"]));
5064 if (payload.HasMember(
"ifExists")) {
5065 if_exists =
json_bool(payload[
"ifExists"]);
5073 std::unique_ptr<lockmgr::TableSchemaLockContainer<lockmgr::WriteLock>> td_with_lock;
5077 std::make_unique<lockmgr::TableSchemaLockContainer<lockmgr::WriteLock>>(
5079 catalog, *view_name,
false));
5080 td = (*td_with_lock)();
5081 }
catch (
const std::runtime_error& e) {
5090 CHECK(td_with_lock);
5094 throw std::runtime_error(
"View " + *view_name +
5095 " will not be dropped. User has no drop view privileges.");
5099 catalog.dropTable(td);
5103 const std::unique_ptr<NameValueAssign>& p) {
5105 if (!dynamic_cast<const StringLiteral*>(p->get_value())) {
5106 throw std::runtime_error(option_name +
" option must be a string literal.");
5112 throw std::runtime_error(
5113 "CREATE DATABASE command can only be executed by super user.");
5116 const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
5121 if (SysCatalog::instance().getMetadataForDB(*db_name, db_meta) && if_not_exists_) {
5125 if (!name_value_list.empty()) {
5126 for (
auto& p : name_value_list) {
5127 if (boost::iequals(*p->get_name(),
"owner")) {
5129 const std::string* str =
5130 static_cast<const StringLiteral*
>(p->get_value())->get_stringval();
5132 if (!SysCatalog::instance().getMetadataForUser(*str, user)) {
5133 throw std::runtime_error(
"User " + *str +
" does not exist.");
5137 throw std::runtime_error(
"Invalid CREATE DATABASE option " + *p->get_name() +
5138 ". Only OWNER supported.");
5142 SysCatalog::instance().createDatabase(*db_name, ownerId);
5147 throw std::runtime_error(
"DROP DATABASE command can only be executed by super user.");
5150 const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
5155 if (!SysCatalog::instance().getMetadataForDB(*db_name, db)) {
5159 throw std::runtime_error(
"Database " + *db_name +
" does not exist.");
5164 throw std::runtime_error(
"Only the super user or the owner can drop database.");
5167 SysCatalog::instance().dropDatabase(db);
5171 const std::unique_ptr<NameValueAssign>& p) {
5173 const std::string* str =
5174 static_cast<const StringLiteral*
>(p->get_value())->get_stringval();
5175 if (boost::iequals(*str,
"true")) {
5177 }
else if (boost::iequals(*str,
"false")) {
5180 throw std::runtime_error(
"Value to " + option_name +
" must be TRUE or FALSE.");
5186 bool is_super =
false;
5187 std::string default_db;
5188 bool can_login =
true;
5189 for (
auto& p : name_value_list) {
5190 if (boost::iequals(*p->get_name(),
"password")) {
5192 passwd = *
static_cast<const StringLiteral*
>(p->get_value())->get_stringval();
5193 }
else if (boost::iequals(*p->get_name(),
"is_super")) {
5196 }
else if (boost::iequals(*p->get_name(),
"default_db")) {
5198 default_db = *
static_cast<const StringLiteral*
>(p->get_value())->get_stringval();
5199 }
else if (boost::iequals(*p->get_name(),
"can_login")) {
5203 throw std::runtime_error(
"Invalid CREATE USER option " + *p->get_name() +
5204 ". Should be PASSWORD, IS_SUPER, CAN_LOGIN"
5209 throw std::runtime_error(
"Only super user can create new users.");
5211 SysCatalog::instance().createUser(*user_name, passwd, is_super, default_db, can_login);
5216 const std::string* passwd =
nullptr;
5217 bool is_super =
false;
5218 bool* is_superp =
nullptr;
5219 const std::string* default_db =
nullptr;
5220 bool can_login =
true;
5221 bool* can_loginp =
nullptr;
5222 for (
auto& p : name_value_list) {
5223 if (boost::iequals(*p->get_name(),
"password")) {
5225 passwd =
static_cast<const StringLiteral*
>(p->get_value())->get_stringval();
5226 }
else if (boost::iequals(*p->get_name(),
"is_super")) {
5229 is_superp = &is_super;
5230 }
else if (boost::iequals(*p->get_name(),
"default_db")) {
5231 if (dynamic_cast<const StringLiteral*>(p->get_value())) {
5232 default_db =
static_cast<const StringLiteral*
>(p->get_value())->get_stringval();
5233 }
else if (dynamic_cast<const NullLiteral*>(p->get_value())) {
5234 static std::string blank;
5235 default_db = ␣
5237 throw std::runtime_error(
5238 "DEFAULT_DB option must be either a string literal or a NULL literal.");
5240 }
else if (boost::iequals(*p->get_name(),
"can_login")) {
5243 can_loginp = &can_login;
5245 throw std::runtime_error(
"Invalid ALTER USER option " + *p->get_name() +
5246 ". Should be PASSWORD, DEFAULT_DB, CAN_LOGIN"
5253 if (!SysCatalog::instance().getMetadataForUser(*user_name, user)) {
5254 throw std::runtime_error(
"User " + *user_name +
" does not exist.");
5258 throw std::runtime_error(
"Only super user can change another user's attributes.");
5259 }
else if (is_superp || can_loginp) {
5260 throw std::runtime_error(
5261 "A user can only update their own password or default database.");
5265 if (passwd || is_superp || default_db || can_loginp) {
5266 SysCatalog::instance().alterUser(
5267 user.
userId, passwd, is_superp, default_db, can_loginp);
5273 throw std::runtime_error(
"Only super user can drop users.");
5275 SysCatalog::instance().dropUser(*user_name);
5282 throw std::runtime_error(
"Table " + *table +
5283 " will not be dumped. User has no select privileges.");
5287 throw std::runtime_error(
"Table " + *table +
5288 " will not be dumped. User has no create privileges.");
5293 table_archiver.
dumpTable(td, *path, compression);
5298 const TableDescriptor* td = catalog.getMetadataForTable(*table,
false);
5303 throw std::runtime_error(
"Table " + *table +
" exists.");
5308 throw std::runtime_error(
"Table " + *table +
5309 " will not be restored. User has no create privileges.");
5312 table_archiver.
restoreTable(session, *table, *path, compression);
5317 const std::string& ddl_statement,
5318 std::shared_ptr<Catalog_Namespace::SessionInfo const> session_ptr) {
5319 CHECK(!ddl_statement.empty());
5320 VLOG(2) <<
"Parsing JSON DDL from Calcite: " << ddl_statement;
5321 rapidjson::Document ddl_query;
5322 ddl_query.Parse(ddl_statement);
5323 CHECK(ddl_query.IsObject());
5324 CHECK(ddl_query.HasMember(
"payload"));
5325 CHECK(ddl_query[
"payload"].IsObject());
5326 const auto& payload = ddl_query[
"payload"].GetObject();
5327 CHECK(payload.HasMember(
"command"));
5328 CHECK(payload[
"command"].IsString());
5330 const auto& ddl_command = std::string_view(payload[
"command"].GetString());
5331 if (ddl_command ==
"CREATE_TABLE") {
5333 create_table_stmt.execute(*session_ptr);
5334 }
else if (ddl_command ==
"DROP_TABLE") {
5336 drop_table_stmt.execute(*session_ptr);
5337 }
else if (ddl_command ==
"CREATE_VIEW") {
5339 create_view_stmt.execute(*session_ptr);
5340 }
else if (ddl_command ==
"DROP_VIEW") {
5342 drop_view_stmt.execute(*session_ptr);
5344 throw std::runtime_error(
"Unsupported DDL command");
SQLTypes to_sql_type(const std::string &type_name)
const ColumnConstraintDef * get_column_constraint() const
decltype(auto) get_max_rows_def(TableDescriptor &td, const NameValueAssign *p, const std::list< ColumnDescriptor > &columns)
int32_t maxRollbackEpochs
void execute_calcite_ddl(const std::string &ddl_statement, std::shared_ptr< Catalog_Namespace::SessionInfo const > session_ptr)
void validate_non_foreign_table_write(const TableDescriptor *table_descriptor)
static const AccessPrivileges VIEW_SQL_EDITOR
decltype(auto) get_max_chunk_size_def(TableDescriptor &td, const NameValueAssign *p, const std::list< ColumnDescriptor > &columns)
static const std::map< const std::string, const TableDefFuncPtr > tableDefFuncMap
HOST DEVICE SQLTypes get_subtype() const
void set_compression(EncodingType c)
std::string s3_secret_key
size_t shard_column_index(const std::string &name, const std::list< ColumnDescriptor > &columns)
const std::string & get_column() const
std::vector< int > ChunkKey
std::vector< std::unique_ptr< lockmgr::AbstractLockContainer< const TableDescriptor * >>> LockedTableDescriptors
static const AccessPrivileges VIEW_DASHBOARD
static const int32_t DROP_VIEW
HOST DEVICE int get_size() const
SQLType * get_column_type() const
static const AccessPrivileges DROP_SERVER
static const int32_t SELECT_FROM_VIEW
class for a per-database catalog. also includes metadata for the current database and the current use...
static void checkStringLiteral(const std::string &option_name, const std::unique_ptr< NameValueAssign > &p)
static const int32_t UPDATE_IN_VIEW
const std::vector< TargetMetaInfo > targets_meta