OmniSciDB  340b00dbf6
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
ParserNode.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
25 #include "ParserNode.h"
26 
27 #include <boost/algorithm/string.hpp>
28 #include <boost/core/null_deleter.hpp>
29 #include <boost/filesystem.hpp>
30 #include <boost/function.hpp>
31 
32 #include <rapidjson/document.h>
33 #include <rapidjson/stringbuffer.h>
34 #include <rapidjson/writer.h>
35 
36 #include <cassert>
37 #include <cmath>
38 #include <limits>
39 #include <random>
40 #include <regex>
41 #include <stdexcept>
42 #include <type_traits>
43 #include <typeinfo>
44 
46 #include "Catalog/Catalog.h"
52 #include "Geospatial/Compression.h"
53 #include "Geospatial/Types.h"
54 #include "ImportExport/Importer.h"
55 #include "LockMgr/LockMgr.h"
58 #include "QueryEngine/Execute.h"
62 #include "ReservedKeywords.h"
63 #include "Shared/StringTransform.h"
64 #include "Shared/measure.h"
65 #include "Shared/shard_key.h"
67 #include "Utils/FsiUtils.h"
68 
69 #include "gen-cpp/CalciteServer.h"
70 #include "parser.h"
71 
73 
74 size_t g_leaf_count{0};
77 extern bool g_enable_fsi;
78 
80 using namespace std::string_literals;
81 
82 using TableDefFuncPtr = boost::function<void(TableDescriptor&,
83  const NameValueAssign*,
84  const std::list<ColumnDescriptor>& columns)>;
85 
86 using DataframeDefFuncPtr =
87  boost::function<void(DataframeTableDescriptor&,
88  const NameValueAssign*,
89  const std::list<ColumnDescriptor>& columns)>;
90 
91 namespace Parser {
92 std::shared_ptr<Analyzer::Expr> NullLiteral::analyze(
93  const Catalog_Namespace::Catalog& catalog,
94  Analyzer::Query& query,
95  TlistRefType allow_tlist_ref) const {
96  return makeExpr<Analyzer::Constant>(kNULLT, true);
97 }
98 
99 std::shared_ptr<Analyzer::Expr> StringLiteral::analyze(
100  const Catalog_Namespace::Catalog& catalog,
101  Analyzer::Query& query,
102  TlistRefType allow_tlist_ref) const {
103  return analyzeValue(*stringval);
104 }
105 
106 std::shared_ptr<Analyzer::Expr> StringLiteral::analyzeValue(
107  const std::string& stringval) {
108  SQLTypeInfo ti(kVARCHAR, stringval.length(), 0, true);
109  Datum d;
110  d.stringval = new std::string(stringval);
111  return makeExpr<Analyzer::Constant>(ti, false, d);
112 }
113 
114 std::shared_ptr<Analyzer::Expr> IntLiteral::analyze(
115  const Catalog_Namespace::Catalog& catalog,
116  Analyzer::Query& query,
117  TlistRefType allow_tlist_ref) const {
118  return analyzeValue(intval);
119 }
120 
121 std::shared_ptr<Analyzer::Expr> IntLiteral::analyzeValue(const int64_t intval) {
122  SQLTypes t;
123  Datum d;
124  if (intval >= INT16_MIN && intval <= INT16_MAX) {
125  t = kSMALLINT;
126  d.smallintval = (int16_t)intval;
127  } else if (intval >= INT32_MIN && intval <= INT32_MAX) {
128  t = kINT;
129  d.intval = (int32_t)intval;
130  } else {
131  t = kBIGINT;
132  d.bigintval = intval;
133  }
134  return makeExpr<Analyzer::Constant>(t, false, d);
135 }
136 
137 std::shared_ptr<Analyzer::Expr> FixedPtLiteral::analyze(
138  const Catalog_Namespace::Catalog& catalog,
139  Analyzer::Query& query,
140  TlistRefType allow_tlist_ref) const {
141  SQLTypeInfo ti(kNUMERIC, 0, 0, false);
142  Datum d = StringToDatum(*fixedptval, ti);
143  return makeExpr<Analyzer::Constant>(ti, false, d);
144 }
145 
146 std::shared_ptr<Analyzer::Expr> FixedPtLiteral::analyzeValue(const int64_t numericval,
147  const int scale,
148  const int precision) {
149  SQLTypeInfo ti(kNUMERIC, 0, 0, false);
150  ti.set_scale(scale);
151  ti.set_precision(precision);
152  Datum d;
153  d.bigintval = numericval;
154  return makeExpr<Analyzer::Constant>(ti, false, d);
155 }
156 
157 std::shared_ptr<Analyzer::Expr> FloatLiteral::analyze(
158  const Catalog_Namespace::Catalog& catalog,
159  Analyzer::Query& query,
160  TlistRefType allow_tlist_ref) const {
161  Datum d;
162  d.floatval = floatval;
163  return makeExpr<Analyzer::Constant>(kFLOAT, false, d);
164 }
165 
166 std::shared_ptr<Analyzer::Expr> DoubleLiteral::analyze(
167  const Catalog_Namespace::Catalog& catalog,
168  Analyzer::Query& query,
169  TlistRefType allow_tlist_ref) const {
170  Datum d;
171  d.doubleval = doubleval;
172  return makeExpr<Analyzer::Constant>(kDOUBLE, false, d);
173 }
174 
175 std::shared_ptr<Analyzer::Expr> TimestampLiteral::analyze(
176  const Catalog_Namespace::Catalog& catalog,
177  Analyzer::Query& query,
178  TlistRefType allow_tlist_ref) const {
179  return get(timestampval_);
180 }
181 
182 std::shared_ptr<Analyzer::Expr> TimestampLiteral::get(const int64_t timestampval) {
183  Datum d;
184  d.bigintval = timestampval;
185  return makeExpr<Analyzer::Constant>(kTIMESTAMP, false, d);
186 }
187 
188 std::shared_ptr<Analyzer::Expr> UserLiteral::analyze(
189  const Catalog_Namespace::Catalog& catalog,
190  Analyzer::Query& query,
191  TlistRefType allow_tlist_ref) const {
192  Datum d;
193  return makeExpr<Analyzer::Constant>(kTEXT, false, d);
194 }
195 
196 std::shared_ptr<Analyzer::Expr> UserLiteral::get(const std::string& user) {
197  Datum d;
198  d.stringval = new std::string(user);
199  return makeExpr<Analyzer::Constant>(kTEXT, false, d);
200 }
201 
202 std::shared_ptr<Analyzer::Expr> ArrayLiteral::analyze(
203  const Catalog_Namespace::Catalog& catalog,
204  Analyzer::Query& query,
205  TlistRefType allow_tlist_ref) const {
206  SQLTypeInfo ti = SQLTypeInfo(kARRAY, true);
207  bool set_subtype = true;
208  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
209  for (auto& p : value_list) {
210  auto e = p->analyze(catalog, query, allow_tlist_ref);
211  CHECK(e);
212  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(e);
213  if (c != nullptr && c->get_is_null()) {
214  value_exprs.push_back(c);
215  continue;
216  }
217  auto subtype = e->get_type_info().get_type();
218  if (subtype == kNULLT) {
219  // NULL element
220  } else if (set_subtype) {
221  ti.set_subtype(subtype);
222  set_subtype = false;
223  } else {
224  if (ti.get_subtype() != subtype) {
225  throw std::runtime_error("ARRAY element literals should be of the same type.");
226  }
227  }
228  value_exprs.push_back(e);
229  }
230  std::shared_ptr<Analyzer::Expr> result =
231  makeExpr<Analyzer::Constant>(ti, false, value_exprs);
232  return result;
233 }
234 
235 std::string ArrayLiteral::to_string() const {
236  std::string str = "{";
237  bool notfirst = false;
238  for (auto& p : value_list) {
239  if (notfirst) {
240  str += ", ";
241  } else {
242  notfirst = true;
243  }
244  str += p->to_string();
245  }
246  str += "}";
247  return str;
248 }
249 
250 std::shared_ptr<Analyzer::Expr> OperExpr::analyze(
251  const Catalog_Namespace::Catalog& catalog,
252  Analyzer::Query& query,
253  TlistRefType allow_tlist_ref) const {
254  auto left_expr = left->analyze(catalog, query, allow_tlist_ref);
255  const auto& left_type = left_expr->get_type_info();
256  if (right == nullptr) {
257  return makeExpr<Analyzer::UOper>(
258  left_type, left_expr->get_contains_agg(), optype, left_expr->decompress());
259  }
260  if (optype == kARRAY_AT) {
261  if (left_type.get_type() != kARRAY) {
262  throw std::runtime_error(left->to_string() + " is not of array type.");
263  }
264  auto right_expr = right->analyze(catalog, query, allow_tlist_ref);
265  const auto& right_type = right_expr->get_type_info();
266  if (!right_type.is_integer()) {
267  throw std::runtime_error(right->to_string() + " is not of integer type.");
268  }
269  return makeExpr<Analyzer::BinOper>(
270  left_type.get_elem_type(), false, kARRAY_AT, kONE, left_expr, right_expr);
271  }
272  auto right_expr = right->analyze(catalog, query, allow_tlist_ref);
273  return normalize(optype, opqualifier, left_expr, right_expr);
274 }
275 
276 std::shared_ptr<Analyzer::Expr> OperExpr::normalize(
277  const SQLOps optype,
278  const SQLQualifier qual,
279  std::shared_ptr<Analyzer::Expr> left_expr,
280  std::shared_ptr<Analyzer::Expr> right_expr) {
281  if (left_expr->get_type_info().is_date_in_days() ||
282  right_expr->get_type_info().is_date_in_days()) {
283  // Do not propogate encoding
284  left_expr = left_expr->decompress();
285  right_expr = right_expr->decompress();
286  }
287  const auto& left_type = left_expr->get_type_info();
288  auto right_type = right_expr->get_type_info();
289  if (qual != kONE) {
290  // subquery not supported yet.
291  CHECK(!std::dynamic_pointer_cast<Analyzer::Subquery>(right_expr));
292  if (right_type.get_type() != kARRAY) {
293  throw std::runtime_error(
294  "Existential or universal qualifiers can only be used in front of a subquery "
295  "or an "
296  "expression of array type.");
297  }
298  right_type = right_type.get_elem_type();
299  }
300  SQLTypeInfo new_left_type;
301  SQLTypeInfo new_right_type;
302  auto result_type = Analyzer::BinOper::analyze_type_info(
303  optype, left_type, right_type, &new_left_type, &new_right_type);
304  if (result_type.is_timeinterval()) {
305  return makeExpr<Analyzer::BinOper>(
306  result_type, false, optype, qual, left_expr, right_expr);
307  }
308  if (left_type != new_left_type) {
309  left_expr = left_expr->add_cast(new_left_type);
310  }
311  if (right_type != new_right_type) {
312  if (qual == kONE) {
313  right_expr = right_expr->add_cast(new_right_type);
314  } else {
315  right_expr = right_expr->add_cast(new_right_type.get_array_type());
316  }
317  }
318 
319  if (IS_COMPARISON(optype)) {
320  if (optype != kOVERLAPS && new_left_type.is_geometry() &&
321  new_right_type.is_geometry()) {
322  throw std::runtime_error(
323  "Comparison operators are not yet supported for geospatial types.");
324  }
325 
326  if (new_left_type.get_compression() == kENCODING_DICT &&
327  new_right_type.get_compression() == kENCODING_DICT &&
328  new_left_type.get_comp_param() == new_right_type.get_comp_param()) {
329  // do nothing
330  } else if (new_left_type.get_compression() == kENCODING_DICT &&
331  new_right_type.get_compression() == kENCODING_NONE) {
332  SQLTypeInfo ti(new_right_type);
333  ti.set_compression(new_left_type.get_compression());
334  ti.set_comp_param(new_left_type.get_comp_param());
335  ti.set_fixed_size();
336  right_expr = right_expr->add_cast(ti);
337  } else if (new_right_type.get_compression() == kENCODING_DICT &&
338  new_left_type.get_compression() == kENCODING_NONE) {
339  SQLTypeInfo ti(new_left_type);
340  ti.set_compression(new_right_type.get_compression());
341  ti.set_comp_param(new_right_type.get_comp_param());
342  ti.set_fixed_size();
343  left_expr = left_expr->add_cast(ti);
344  } else {
345  left_expr = left_expr->decompress();
346  right_expr = right_expr->decompress();
347  }
348  } else {
349  left_expr = left_expr->decompress();
350  right_expr = right_expr->decompress();
351  }
352  bool has_agg = (left_expr->get_contains_agg() || right_expr->get_contains_agg());
353  return makeExpr<Analyzer::BinOper>(
354  result_type, has_agg, optype, qual, left_expr, right_expr);
355 }
356 
357 std::shared_ptr<Analyzer::Expr> SubqueryExpr::analyze(
358  const Catalog_Namespace::Catalog& catalog,
359  Analyzer::Query& query,
360  TlistRefType allow_tlist_ref) const {
361  throw std::runtime_error("Subqueries are not supported yet.");
362  return nullptr;
363 }
364 
365 std::shared_ptr<Analyzer::Expr> IsNullExpr::analyze(
366  const Catalog_Namespace::Catalog& catalog,
367  Analyzer::Query& query,
368  TlistRefType allow_tlist_ref) const {
369  auto arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
370  auto result = makeExpr<Analyzer::UOper>(kBOOLEAN, kISNULL, arg_expr);
371  if (is_not) {
372  result = makeExpr<Analyzer::UOper>(kBOOLEAN, kNOT, result);
373  }
374  return result;
375 }
376 
377 std::shared_ptr<Analyzer::Expr> InSubquery::analyze(
378  const Catalog_Namespace::Catalog& catalog,
379  Analyzer::Query& query,
380  TlistRefType allow_tlist_ref) const {
381  throw std::runtime_error("Subqueries are not supported yet.");
382  return nullptr;
383 }
384 
385 std::shared_ptr<Analyzer::Expr> InValues::analyze(
386  const Catalog_Namespace::Catalog& catalog,
387  Analyzer::Query& query,
388  TlistRefType allow_tlist_ref) const {
389  auto arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
390  SQLTypeInfo ti = arg_expr->get_type_info();
391  bool dict_comp = ti.get_compression() == kENCODING_DICT;
392  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
393  for (auto& p : value_list) {
394  auto e = p->analyze(catalog, query, allow_tlist_ref);
395  if (ti != e->get_type_info()) {
396  if (ti.is_string() && e->get_type_info().is_string()) {
397  ti = Analyzer::BinOper::common_string_type(ti, e->get_type_info());
398  } else if (ti.is_number() && e->get_type_info().is_number()) {
399  ti = Analyzer::BinOper::common_numeric_type(ti, e->get_type_info());
400  } else {
401  throw std::runtime_error("IN expressions must contain compatible types.");
402  }
403  }
404  if (dict_comp) {
405  value_exprs.push_back(e->add_cast(arg_expr->get_type_info()));
406  } else {
407  value_exprs.push_back(e);
408  }
409  }
410  if (!dict_comp) {
411  arg_expr = arg_expr->decompress();
412  arg_expr = arg_expr->add_cast(ti);
413  std::list<std::shared_ptr<Analyzer::Expr>> cast_vals;
414  for (auto p : value_exprs) {
415  cast_vals.push_back(p->add_cast(ti));
416  }
417  value_exprs.swap(cast_vals);
418  }
419  std::shared_ptr<Analyzer::Expr> result =
420  makeExpr<Analyzer::InValues>(arg_expr, value_exprs);
421  if (is_not) {
422  result = makeExpr<Analyzer::UOper>(kBOOLEAN, kNOT, result);
423  }
424  return result;
425 }
426 
427 std::shared_ptr<Analyzer::Expr> BetweenExpr::analyze(
428  const Catalog_Namespace::Catalog& catalog,
429  Analyzer::Query& query,
430  TlistRefType allow_tlist_ref) const {
431  auto arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
432  auto lower_expr = lower->analyze(catalog, query, allow_tlist_ref);
433  auto upper_expr = upper->analyze(catalog, query, allow_tlist_ref);
434  SQLTypeInfo new_left_type, new_right_type;
436  arg_expr->get_type_info(),
437  lower_expr->get_type_info(),
438  &new_left_type,
439  &new_right_type);
440  auto lower_pred =
441  makeExpr<Analyzer::BinOper>(kBOOLEAN,
442  kGE,
443  kONE,
444  arg_expr->add_cast(new_left_type)->decompress(),
445  lower_expr->add_cast(new_right_type)->decompress());
447  arg_expr->get_type_info(),
448  lower_expr->get_type_info(),
449  &new_left_type,
450  &new_right_type);
451  auto upper_pred = makeExpr<Analyzer::BinOper>(
452  kBOOLEAN,
453  kLE,
454  kONE,
455  arg_expr->deep_copy()->add_cast(new_left_type)->decompress(),
456  upper_expr->add_cast(new_right_type)->decompress());
457  std::shared_ptr<Analyzer::Expr> result =
458  makeExpr<Analyzer::BinOper>(kBOOLEAN, kAND, kONE, lower_pred, upper_pred);
459  if (is_not) {
460  result = makeExpr<Analyzer::UOper>(kBOOLEAN, kNOT, result);
461  }
462  return result;
463 }
464 
465 std::shared_ptr<Analyzer::Expr> CharLengthExpr::analyze(
466  const Catalog_Namespace::Catalog& catalog,
467  Analyzer::Query& query,
468  TlistRefType allow_tlist_ref) const {
469  auto arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
470  if (!arg_expr->get_type_info().is_string()) {
471  throw std::runtime_error(
472  "expression in char_length clause must be of a string type.");
473  }
474  std::shared_ptr<Analyzer::Expr> result =
475  makeExpr<Analyzer::CharLengthExpr>(arg_expr->decompress(), calc_encoded_length);
476  return result;
477 }
478 
479 std::shared_ptr<Analyzer::Expr> CardinalityExpr::analyze(
480  const Catalog_Namespace::Catalog& catalog,
481  Analyzer::Query& query,
482  TlistRefType allow_tlist_ref) const {
483  auto arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
484  if (!arg_expr->get_type_info().is_array()) {
485  throw std::runtime_error(
486  "expression in cardinality clause must be of an array type.");
487  }
488  std::shared_ptr<Analyzer::Expr> result =
489  makeExpr<Analyzer::CardinalityExpr>(arg_expr->decompress());
490  return result;
491 }
492 
493 void LikeExpr::check_like_expr(const std::string& like_str, char escape_char) {
494  if (like_str.back() == escape_char) {
495  throw std::runtime_error("LIKE pattern must not end with escape character.");
496  }
497 }
498 
499 bool LikeExpr::test_is_simple_expr(const std::string& like_str, char escape_char) {
500  // if not bounded by '%' then not a simple string
501  if (like_str.size() < 2 || like_str[0] != '%' || like_str[like_str.size() - 1] != '%') {
502  return false;
503  }
504  // if the last '%' is escaped then not a simple string
505  if (like_str[like_str.size() - 2] == escape_char &&
506  like_str[like_str.size() - 3] != escape_char) {
507  return false;
508  }
509  for (size_t i = 1; i < like_str.size() - 1; i++) {
510  if (like_str[i] == '%' || like_str[i] == '_' || like_str[i] == '[' ||
511  like_str[i] == ']') {
512  if (like_str[i - 1] != escape_char) {
513  return false;
514  }
515  }
516  }
517  return true;
518 }
519 
520 void LikeExpr::erase_cntl_chars(std::string& like_str, char escape_char) {
521  char prev_char = '\0';
522  // easier to create new string of allowable chars
523  // rather than erase chars from
524  // existing string
525  std::string new_str;
526  for (char& cur_char : like_str) {
527  if (cur_char == '%' || cur_char == escape_char) {
528  if (prev_char != escape_char) {
529  prev_char = cur_char;
530  continue;
531  }
532  }
533  new_str.push_back(cur_char);
534  prev_char = cur_char;
535  }
536  like_str = new_str;
537 }
538 
539 std::shared_ptr<Analyzer::Expr> LikeExpr::analyze(
540  const Catalog_Namespace::Catalog& catalog,
541  Analyzer::Query& query,
542  TlistRefType allow_tlist_ref) const {
543  auto arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
544  auto like_expr = like_string->analyze(catalog, query, allow_tlist_ref);
545  auto escape_expr = escape_string == nullptr
546  ? nullptr
547  : escape_string->analyze(catalog, query, allow_tlist_ref);
548  return LikeExpr::get(arg_expr, like_expr, escape_expr, is_ilike, is_not);
549 }
550 
551 std::shared_ptr<Analyzer::Expr> LikeExpr::get(std::shared_ptr<Analyzer::Expr> arg_expr,
552  std::shared_ptr<Analyzer::Expr> like_expr,
553  std::shared_ptr<Analyzer::Expr> escape_expr,
554  const bool is_ilike,
555  const bool is_not) {
556  if (!arg_expr->get_type_info().is_string()) {
557  throw std::runtime_error("expression before LIKE must be of a string type.");
558  }
559  if (!like_expr->get_type_info().is_string()) {
560  throw std::runtime_error("expression after LIKE must be of a string type.");
561  }
562  char escape_char = '\\';
563  if (escape_expr != nullptr) {
564  if (!escape_expr->get_type_info().is_string()) {
565  throw std::runtime_error("expression after ESCAPE must be of a string type.");
566  }
567  if (!escape_expr->get_type_info().is_string()) {
568  throw std::runtime_error("expression after ESCAPE must be of a string type.");
569  }
570  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(escape_expr);
571  if (c != nullptr && c->get_constval().stringval->length() > 1) {
572  throw std::runtime_error("String after ESCAPE must have a single character.");
573  }
574  escape_char = (*c->get_constval().stringval)[0];
575  }
576  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(like_expr);
577  bool is_simple = false;
578  if (c != nullptr) {
579  std::string& pattern = *c->get_constval().stringval;
580  if (is_ilike) {
581  std::transform(pattern.begin(), pattern.end(), pattern.begin(), ::tolower);
582  }
583  check_like_expr(pattern, escape_char);
584  is_simple = test_is_simple_expr(pattern, escape_char);
585  if (is_simple) {
586  erase_cntl_chars(pattern, escape_char);
587  }
588  }
589  std::shared_ptr<Analyzer::Expr> result = makeExpr<Analyzer::LikeExpr>(
590  arg_expr->decompress(), like_expr, escape_expr, is_ilike, is_simple);
591  if (is_not) {
592  result = makeExpr<Analyzer::UOper>(kBOOLEAN, kNOT, result);
593  }
594  return result;
595 }
596 
597 void RegexpExpr::check_pattern_expr(const std::string& pattern_str, char escape_char) {
598  if (pattern_str.back() == escape_char) {
599  throw std::runtime_error("REGEXP pattern must not end with escape character.");
600  }
601 }
602 
603 bool RegexpExpr::translate_to_like_pattern(std::string& pattern_str, char escape_char) {
604  char prev_char = '\0';
605  char prev_prev_char = '\0';
606  std::string like_str;
607  for (char& cur_char : pattern_str) {
608  if (prev_char == escape_char || isalnum(cur_char) || cur_char == ' ' ||
609  cur_char == '.') {
610  like_str.push_back((cur_char == '.') ? '_' : cur_char);
611  prev_prev_char = prev_char;
612  prev_char = cur_char;
613  continue;
614  }
615  if (prev_char == '.' && prev_prev_char != escape_char) {
616  if (cur_char == '*' || cur_char == '+') {
617  if (cur_char == '*') {
618  like_str.pop_back();
619  }
620  // .* --> %
621  // .+ --> _%
622  like_str.push_back('%');
623  prev_prev_char = prev_char;
624  prev_char = cur_char;
625  continue;
626  }
627  }
628  return false;
629  }
630  pattern_str = like_str;
631  return true;
632 }
633 
634 std::shared_ptr<Analyzer::Expr> RegexpExpr::analyze(
635  const Catalog_Namespace::Catalog& catalog,
636  Analyzer::Query& query,
637  TlistRefType allow_tlist_ref) const {
638  auto arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
639  auto pattern_expr = pattern_string->analyze(catalog, query, allow_tlist_ref);
640  auto escape_expr = escape_string == nullptr
641  ? nullptr
642  : escape_string->analyze(catalog, query, allow_tlist_ref);
643  return RegexpExpr::get(arg_expr, pattern_expr, escape_expr, is_not);
644 }
645 
646 std::shared_ptr<Analyzer::Expr> RegexpExpr::get(
647  std::shared_ptr<Analyzer::Expr> arg_expr,
648  std::shared_ptr<Analyzer::Expr> pattern_expr,
649  std::shared_ptr<Analyzer::Expr> escape_expr,
650  const bool is_not) {
651  if (!arg_expr->get_type_info().is_string()) {
652  throw std::runtime_error("expression before REGEXP must be of a string type.");
653  }
654  if (!pattern_expr->get_type_info().is_string()) {
655  throw std::runtime_error("expression after REGEXP must be of a string type.");
656  }
657  char escape_char = '\\';
658  if (escape_expr != nullptr) {
659  if (!escape_expr->get_type_info().is_string()) {
660  throw std::runtime_error("expression after ESCAPE must be of a string type.");
661  }
662  if (!escape_expr->get_type_info().is_string()) {
663  throw std::runtime_error("expression after ESCAPE must be of a string type.");
664  }
665  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(escape_expr);
666  if (c != nullptr && c->get_constval().stringval->length() > 1) {
667  throw std::runtime_error("String after ESCAPE must have a single character.");
668  }
669  escape_char = (*c->get_constval().stringval)[0];
670  if (escape_char != '\\') {
671  throw std::runtime_error("Only supporting '\\' escape character.");
672  }
673  }
674  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(pattern_expr);
675  if (c != nullptr) {
676  std::string& pattern = *c->get_constval().stringval;
677  if (translate_to_like_pattern(pattern, escape_char)) {
678  return LikeExpr::get(arg_expr, pattern_expr, escape_expr, false, is_not);
679  }
680  }
681  std::shared_ptr<Analyzer::Expr> result =
682  makeExpr<Analyzer::RegexpExpr>(arg_expr->decompress(), pattern_expr, escape_expr);
683  if (is_not) {
684  result = makeExpr<Analyzer::UOper>(kBOOLEAN, kNOT, result);
685  }
686  return result;
687 }
688 
689 std::shared_ptr<Analyzer::Expr> LikelihoodExpr::analyze(
690  const Catalog_Namespace::Catalog& catalog,
691  Analyzer::Query& query,
692  TlistRefType allow_tlist_ref) const {
693  auto arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
694  return LikelihoodExpr::get(arg_expr, likelihood, is_not);
695 }
696 
697 std::shared_ptr<Analyzer::Expr> LikelihoodExpr::get(
698  std::shared_ptr<Analyzer::Expr> arg_expr,
699  float likelihood,
700  const bool is_not) {
701  if (!arg_expr->get_type_info().is_boolean()) {
702  throw std::runtime_error("likelihood expression expects boolean type.");
703  }
704  std::shared_ptr<Analyzer::Expr> result = makeExpr<Analyzer::LikelihoodExpr>(
705  arg_expr->decompress(), is_not ? 1 - likelihood : likelihood);
706  return result;
707 }
708 
709 std::shared_ptr<Analyzer::Expr> ExistsExpr::analyze(
710  const Catalog_Namespace::Catalog& catalog,
711  Analyzer::Query& query,
712  TlistRefType allow_tlist_ref) const {
713  throw std::runtime_error("Subqueries are not supported yet.");
714  return nullptr;
715 }
716 
717 std::shared_ptr<Analyzer::Expr> ColumnRef::analyze(
718  const Catalog_Namespace::Catalog& catalog,
719  Analyzer::Query& query,
720  TlistRefType allow_tlist_ref) const {
721  int table_id{0};
722  int rte_idx{0};
723  const ColumnDescriptor* cd{nullptr};
724  if (column == nullptr) {
725  throw std::runtime_error("invalid column name *.");
726  }
727  if (table != nullptr) {
728  rte_idx = query.get_rte_idx(*table);
729  if (rte_idx < 0) {
730  throw std::runtime_error("range variable or table name " + *table +
731  " does not exist.");
732  }
733  Analyzer::RangeTableEntry* rte = query.get_rte(rte_idx);
734  cd = rte->get_column_desc(catalog, *column);
735  if (cd == nullptr) {
736  throw std::runtime_error("Column name " + *column + " does not exist.");
737  }
738  table_id = rte->get_table_id();
739  } else {
740  bool found = false;
741  int i = 0;
742  for (auto rte : query.get_rangetable()) {
743  cd = rte->get_column_desc(catalog, *column);
744  if (cd != nullptr && !found) {
745  found = true;
746  rte_idx = i;
747  table_id = rte->get_table_id();
748  } else if (cd != nullptr && found) {
749  throw std::runtime_error("Column name " + *column + " is ambiguous.");
750  }
751  i++;
752  }
753  if (cd == nullptr && allow_tlist_ref != TlistRefType::TLIST_NONE) {
754  // check if this is a reference to a targetlist entry
755  bool found = false;
756  int varno = -1;
757  int i = 1;
758  std::shared_ptr<Analyzer::TargetEntry> tle;
759  for (auto p : query.get_targetlist()) {
760  if (*column == p->get_resname() && !found) {
761  found = true;
762  varno = i;
763  tle = p;
764  } else if (*column == p->get_resname() && found) {
765  throw std::runtime_error("Output alias " + *column + " is ambiguous.");
766  }
767  i++;
768  }
769  if (found) {
770  if (dynamic_cast<Analyzer::Var*>(tle->get_expr())) {
771  Analyzer::Var* v = static_cast<Analyzer::Var*>(tle->get_expr());
773  return v->deep_copy();
774  }
775  }
776  if (allow_tlist_ref == TlistRefType::TLIST_COPY) {
777  return tle->get_expr()->deep_copy();
778  } else {
779  return makeExpr<Analyzer::Var>(
780  tle->get_expr()->get_type_info(), Analyzer::Var::kOUTPUT, varno);
781  }
782  }
783  }
784  if (cd == nullptr) {
785  throw std::runtime_error("Column name " + *column + " does not exist.");
786  }
787  }
788  return makeExpr<Analyzer::ColumnVar>(cd->columnType, table_id, cd->columnId, rte_idx);
789 }
790 
791 std::shared_ptr<Analyzer::Expr> FunctionRef::analyze(
792  const Catalog_Namespace::Catalog& catalog,
793  Analyzer::Query& query,
794  TlistRefType allow_tlist_ref) const {
795  SQLTypeInfo result_type;
796  SQLAgg agg_type;
797  std::shared_ptr<Analyzer::Expr> arg_expr;
798  bool is_distinct = false;
799  if (boost::iequals(*name, "count")) {
800  result_type = SQLTypeInfo(kBIGINT, false);
801  agg_type = kCOUNT;
802  if (arg) {
803  arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
804  const SQLTypeInfo& ti = arg_expr->get_type_info();
805  if (ti.is_string() && (ti.get_compression() != kENCODING_DICT || !distinct)) {
806  throw std::runtime_error(
807  "Strings must be dictionary-encoded in COUNT(DISTINCT).");
808  }
809  if (ti.get_type() == kARRAY && !distinct) {
810  throw std::runtime_error("Only COUNT(DISTINCT) is supported on arrays.");
811  }
812  }
813  is_distinct = distinct;
814  } else {
815  if (!arg) {
816  throw std::runtime_error("Cannot compute " + *name + " with argument '*'.");
817  }
818  if (boost::iequals(*name, "min")) {
819  agg_type = kMIN;
820  arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
821  arg_expr = arg_expr->decompress();
822  result_type = arg_expr->get_type_info();
823  } else if (boost::iequals(*name, "max")) {
824  agg_type = kMAX;
825  arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
826  arg_expr = arg_expr->decompress();
827  result_type = arg_expr->get_type_info();
828  } else if (boost::iequals(*name, "avg")) {
829  agg_type = kAVG;
830  arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
831  if (!arg_expr->get_type_info().is_number()) {
832  throw std::runtime_error("Cannot compute AVG on non-number-type arguments.");
833  }
834  arg_expr = arg_expr->decompress();
835  result_type = SQLTypeInfo(kDOUBLE, false);
836  } else if (boost::iequals(*name, "sum")) {
837  agg_type = kSUM;
838  arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
839  if (!arg_expr->get_type_info().is_number()) {
840  throw std::runtime_error("Cannot compute SUM on non-number-type arguments.");
841  }
842  arg_expr = arg_expr->decompress();
843  result_type = arg_expr->get_type_info().is_integer() ? SQLTypeInfo(kBIGINT, false)
844  : arg_expr->get_type_info();
845  } else if (boost::iequals(*name, "unnest")) {
846  arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
847  const SQLTypeInfo& arg_ti = arg_expr->get_type_info();
848  if (arg_ti.get_type() != kARRAY) {
849  throw std::runtime_error(arg->to_string() + " is not of array type.");
850  }
851  return makeExpr<Analyzer::UOper>(arg_ti.get_elem_type(), false, kUNNEST, arg_expr);
852  } else {
853  throw std::runtime_error("invalid function name: " + *name);
854  }
855  if (arg_expr->get_type_info().is_string() ||
856  arg_expr->get_type_info().get_type() == kARRAY) {
857  throw std::runtime_error(
858  "Only COUNT(DISTINCT ) aggregate is supported on strings and arrays.");
859  }
860  }
861  int naggs = query.get_num_aggs();
862  query.set_num_aggs(naggs + 1);
863  return makeExpr<Analyzer::AggExpr>(
864  result_type, agg_type, arg_expr, is_distinct, nullptr);
865 }
866 
867 std::shared_ptr<Analyzer::Expr> CastExpr::analyze(
868  const Catalog_Namespace::Catalog& catalog,
869  Analyzer::Query& query,
870  TlistRefType allow_tlist_ref) const {
871  target_type->check_type();
872  auto arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
873  SQLTypeInfo ti(target_type->get_type(),
874  target_type->get_param1(),
875  target_type->get_param2(),
876  arg_expr->get_type_info().get_notnull());
877  if (arg_expr->get_type_info().get_type() != target_type->get_type() &&
878  arg_expr->get_type_info().get_compression() != kENCODING_NONE) {
879  arg_expr->decompress();
880  }
881  return arg_expr->add_cast(ti);
882 }
883 
884 std::shared_ptr<Analyzer::Expr> CaseExpr::analyze(
885  const Catalog_Namespace::Catalog& catalog,
886  Analyzer::Query& query,
887  TlistRefType allow_tlist_ref) const {
888  SQLTypeInfo ti;
889  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
890  expr_pair_list;
891  for (auto& p : when_then_list) {
892  auto e1 = p->get_expr1()->analyze(catalog, query, allow_tlist_ref);
893  if (e1->get_type_info().get_type() != kBOOLEAN) {
894  throw std::runtime_error("Only boolean expressions can be used after WHEN.");
895  }
896  auto e2 = p->get_expr2()->analyze(catalog, query, allow_tlist_ref);
897  expr_pair_list.emplace_back(e1, e2);
898  }
899  auto else_e = else_expr ? else_expr->analyze(catalog, query, allow_tlist_ref) : nullptr;
900  return normalize(expr_pair_list, else_e);
901 }
902 
903 namespace {
904 
905 bool expr_is_null(const Analyzer::Expr* expr) {
906  if (expr->get_type_info().get_type() == kNULLT) {
907  return true;
908  }
909  const auto const_expr = dynamic_cast<const Analyzer::Constant*>(expr);
910  return const_expr && const_expr->get_is_null();
911 }
912 
914  const std::string* s = str_literal->get_stringval();
915  if (*s == "t" || *s == "true" || *s == "T" || *s == "True") {
916  return true;
917  } else if (*s == "f" || *s == "false" || *s == "F" || *s == "False") {
918  return false;
919  } else {
920  throw std::runtime_error("Invalid string for boolean " + *s);
921  }
922 }
923 
924 } // namespace
925 
926 std::shared_ptr<Analyzer::Expr> CaseExpr::normalize(
927  const std::list<std::pair<std::shared_ptr<Analyzer::Expr>,
928  std::shared_ptr<Analyzer::Expr>>>& expr_pair_list,
929  const std::shared_ptr<Analyzer::Expr> else_e_in) {
930  SQLTypeInfo ti;
931  bool has_agg = false;
932  for (auto& p : expr_pair_list) {
933  auto e1 = p.first;
934  CHECK(e1->get_type_info().is_boolean());
935  auto e2 = p.second;
936  if (ti.get_type() == kNULLT) {
937  ti = e2->get_type_info();
938  } else if (e2->get_type_info().get_type() == kNULLT) {
939  ti.set_notnull(false);
940  e2->set_type_info(ti);
941  } else if (ti != e2->get_type_info()) {
942  if (ti.is_string() && e2->get_type_info().is_string()) {
943  ti = Analyzer::BinOper::common_string_type(ti, e2->get_type_info());
944  } else if (ti.is_number() && e2->get_type_info().is_number()) {
945  ti = Analyzer::BinOper::common_numeric_type(ti, e2->get_type_info());
946  } else if (ti.is_boolean() && e2->get_type_info().is_boolean()) {
947  ti = Analyzer::BinOper::common_numeric_type(ti, e2->get_type_info());
948  } else {
949  throw std::runtime_error(
950  "expressions in THEN clause must be of the same or compatible types.");
951  }
952  }
953  if (e2->get_contains_agg()) {
954  has_agg = true;
955  }
956  }
957  auto else_e = else_e_in;
958  if (else_e) {
959  if (else_e->get_contains_agg()) {
960  has_agg = true;
961  }
962  if (expr_is_null(else_e.get())) {
963  ti.set_notnull(false);
964  else_e->set_type_info(ti);
965  } else if (ti != else_e->get_type_info()) {
966  ti.set_notnull(false);
967  if (ti.is_string() && else_e->get_type_info().is_string()) {
968  ti = Analyzer::BinOper::common_string_type(ti, else_e->get_type_info());
969  } else if (ti.is_number() && else_e->get_type_info().is_number()) {
970  ti = Analyzer::BinOper::common_numeric_type(ti, else_e->get_type_info());
971  } else if (ti.is_boolean() && else_e->get_type_info().is_boolean()) {
972  ti = Analyzer::BinOper::common_numeric_type(ti, else_e->get_type_info());
973  } else if (get_logical_type_info(ti) !=
974  get_logical_type_info(else_e->get_type_info())) {
975  throw std::runtime_error(
976  // types differing by encoding will be resolved at decode
977 
978  "expressions in ELSE clause must be of the same or compatible types as those "
979  "in the THEN clauses.");
980  }
981  }
982  }
983  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
984  cast_expr_pair_list;
985  for (auto p : expr_pair_list) {
986  ti.set_notnull(false);
987  cast_expr_pair_list.emplace_back(p.first, p.second->add_cast(ti));
988  }
989  if (else_e != nullptr) {
990  else_e = else_e->add_cast(ti);
991  } else {
992  Datum d;
993  // always create an else expr so that executor doesn't need to worry about it
994  ti.set_notnull(false);
995  else_e = makeExpr<Analyzer::Constant>(ti, true, d);
996  }
997  if (ti.get_type() == kNULLT) {
998  throw std::runtime_error(
999  "Can't deduce the type for case expressions, all branches null");
1000  }
1001  return makeExpr<Analyzer::CaseExpr>(ti, has_agg, cast_expr_pair_list, else_e);
1002 }
1003 
1004 std::string CaseExpr::to_string() const {
1005  std::string str("CASE ");
1006  for (auto& p : when_then_list) {
1007  str += "WHEN " + p->get_expr1()->to_string() + " THEN " +
1008  p->get_expr2()->to_string() + " ";
1009  }
1010  if (else_expr != nullptr) {
1011  str += "ELSE " + else_expr->to_string();
1012  }
1013  str += " END";
1014  return str;
1015 }
1016 
1017 void UnionQuery::analyze(const Catalog_Namespace::Catalog& catalog,
1018  Analyzer::Query& query) const {
1019  left->analyze(catalog, query);
1020  Analyzer::Query* right_query = new Analyzer::Query();
1021  right->analyze(catalog, *right_query);
1022  query.set_next_query(right_query);
1023  query.set_is_unionall(is_unionall);
1024 }
1025 
1026 void QuerySpec::analyze_having_clause(const Catalog_Namespace::Catalog& catalog,
1027  Analyzer::Query& query) const {
1028  std::shared_ptr<Analyzer::Expr> p;
1029  if (having_clause != nullptr) {
1030  p = having_clause->analyze(catalog, query, Expr::TlistRefType::TLIST_COPY);
1031  if (p->get_type_info().get_type() != kBOOLEAN) {
1032  throw std::runtime_error("Only boolean expressions can be in HAVING clause.");
1033  }
1034  p->check_group_by(query.get_group_by());
1035  }
1036  query.set_having_predicate(p);
1037 }
1038 
1039 void QuerySpec::analyze_group_by(const Catalog_Namespace::Catalog& catalog,
1040  Analyzer::Query& query) const {
1041  std::list<std::shared_ptr<Analyzer::Expr>> groupby;
1042  if (!groupby_clause.empty()) {
1043  int gexpr_no = 1;
1044  std::shared_ptr<Analyzer::Expr> gexpr;
1045  const std::vector<std::shared_ptr<Analyzer::TargetEntry>>& tlist =
1046  query.get_targetlist();
1047  for (auto& c : groupby_clause) {
1048  // special-case ordinal numbers in GROUP BY
1049  if (dynamic_cast<Literal*>(c.get())) {
1050  IntLiteral* i = dynamic_cast<IntLiteral*>(c.get());
1051  if (!i) {
1052  throw std::runtime_error("Invalid literal in GROUP BY clause.");
1053  }
1054  int varno = (int)i->get_intval();
1055  if (varno <= 0 || varno > static_cast<int>(tlist.size())) {
1056  throw std::runtime_error("Invalid ordinal number in GROUP BY clause.");
1057  }
1058  if (tlist[varno - 1]->get_expr()->get_contains_agg()) {
1059  throw std::runtime_error(
1060  "Ordinal number in GROUP BY cannot reference an expression containing "
1061  "aggregate "
1062  "functions.");
1063  }
1064  gexpr = makeExpr<Analyzer::Var>(
1065  tlist[varno - 1]->get_expr()->get_type_info(), Analyzer::Var::kOUTPUT, varno);
1066  } else {
1067  gexpr = c->analyze(catalog, query, Expr::TlistRefType::TLIST_REF);
1068  }
1069  const SQLTypeInfo gti = gexpr->get_type_info();
1070  bool set_new_type = false;
1071  SQLTypeInfo ti(gti);
1072  if (gti.is_string() && gti.get_compression() == kENCODING_NONE) {
1073  set_new_type = true;
1076  ti.set_fixed_size();
1077  }
1078  std::shared_ptr<Analyzer::Var> v;
1079  if (std::dynamic_pointer_cast<Analyzer::Var>(gexpr)) {
1080  v = std::static_pointer_cast<Analyzer::Var>(gexpr);
1081  int n = v->get_varno();
1082  gexpr = tlist[n - 1]->get_own_expr();
1083  auto cv = std::dynamic_pointer_cast<Analyzer::ColumnVar>(gexpr);
1084  if (cv != nullptr) {
1085  // inherit all ColumnVar info for lineage.
1086  *std::static_pointer_cast<Analyzer::ColumnVar>(v) = *cv;
1087  }
1088  v->set_which_row(Analyzer::Var::kGROUPBY);
1089  v->set_varno(gexpr_no);
1090  tlist[n - 1]->set_expr(v);
1091  }
1092  if (set_new_type) {
1093  auto new_e = gexpr->add_cast(ti);
1094  groupby.push_back(new_e);
1095  if (v != nullptr) {
1096  v->set_type_info(new_e->get_type_info());
1097  }
1098  } else {
1099  groupby.push_back(gexpr);
1100  }
1101  gexpr_no++;
1102  }
1103  }
1104  if (query.get_num_aggs() > 0 || !groupby.empty()) {
1105  for (auto t : query.get_targetlist()) {
1106  auto e = t->get_expr();
1107  e->check_group_by(groupby);
1108  }
1109  }
1110  query.set_group_by(groupby);
1111 }
1112 
1113 void QuerySpec::analyze_where_clause(const Catalog_Namespace::Catalog& catalog,
1114  Analyzer::Query& query) const {
1115  if (where_clause == nullptr) {
1116  query.set_where_predicate(nullptr);
1117  return;
1118  }
1119  auto p = where_clause->analyze(catalog, query, Expr::TlistRefType::TLIST_COPY);
1120  if (p->get_type_info().get_type() != kBOOLEAN) {
1121  throw std::runtime_error("Only boolean expressions can be in WHERE clause.");
1122  }
1123  query.set_where_predicate(p);
1124 }
1125 
1126 void QuerySpec::analyze_select_clause(const Catalog_Namespace::Catalog& catalog,
1127  Analyzer::Query& query) const {
1128  std::vector<std::shared_ptr<Analyzer::TargetEntry>>& tlist =
1129  query.get_targetlist_nonconst();
1130  if (select_clause.empty()) {
1131  // this means SELECT *
1132  int rte_idx = 0;
1133  for (auto rte : query.get_rangetable()) {
1134  rte->expand_star_in_targetlist(catalog, tlist, rte_idx++);
1135  }
1136  } else {
1137  for (auto& p : select_clause) {
1138  const Parser::Expr* select_expr = p->get_select_expr();
1139  // look for the case of range_var.*
1140  if (typeid(*select_expr) == typeid(ColumnRef) &&
1141  dynamic_cast<const ColumnRef*>(select_expr)->get_column() == nullptr) {
1142  const std::string* range_var_name =
1143  dynamic_cast<const ColumnRef*>(select_expr)->get_table();
1144  int rte_idx = query.get_rte_idx(*range_var_name);
1145  if (rte_idx < 0) {
1146  throw std::runtime_error("invalid range variable name: " + *range_var_name);
1147  }
1148  Analyzer::RangeTableEntry* rte = query.get_rte(rte_idx);
1149  rte->expand_star_in_targetlist(catalog, tlist, rte_idx);
1150  } else {
1151  auto e = select_expr->analyze(catalog, query);
1152  std::string resname;
1153 
1154  if (p->get_alias() != nullptr) {
1155  resname = *p->get_alias();
1156  } else if (std::dynamic_pointer_cast<Analyzer::ColumnVar>(e) &&
1157  !std::dynamic_pointer_cast<Analyzer::Var>(e)) {
1158  auto colvar = std::static_pointer_cast<Analyzer::ColumnVar>(e);
1159  const ColumnDescriptor* col_desc = catalog.getMetadataForColumn(
1160  colvar->get_table_id(), colvar->get_column_id());
1161  resname = col_desc->columnName;
1162  }
1163  if (e->get_type_info().get_type() == kNULLT) {
1164  throw std::runtime_error(
1165  "Untyped NULL in SELECT clause. Use CAST to specify a type.");
1166  }
1167  auto o = std::static_pointer_cast<Analyzer::UOper>(e);
1168  bool unnest = (o != nullptr && o->get_optype() == kUNNEST);
1169  auto tle = std::make_shared<Analyzer::TargetEntry>(resname, e, unnest);
1170  tlist.push_back(tle);
1171  }
1172  }
1173  }
1174 }
1175 
1176 void QuerySpec::analyze_from_clause(const Catalog_Namespace::Catalog& catalog,
1177  Analyzer::Query& query) const {
1179  for (auto& p : from_clause) {
1180  const TableDescriptor* table_desc;
1181  table_desc = catalog.getMetadataForTable(*p->get_table_name());
1182  if (table_desc == nullptr) {
1183  throw std::runtime_error("Table " + *p->get_table_name() + " does not exist.");
1184  }
1185  std::string range_var;
1186  if (p->get_range_var() == nullptr) {
1187  range_var = *p->get_table_name();
1188  } else {
1189  range_var = *p->get_range_var();
1190  }
1191  rte = new Analyzer::RangeTableEntry(range_var, table_desc, nullptr);
1192  query.add_rte(rte);
1193  }
1194 }
1195 
1196 void QuerySpec::analyze(const Catalog_Namespace::Catalog& catalog,
1197  Analyzer::Query& query) const {
1199  analyze_from_clause(catalog, query);
1200  analyze_select_clause(catalog, query);
1201  analyze_where_clause(catalog, query);
1202  analyze_group_by(catalog, query);
1203  analyze_having_clause(catalog, query);
1204 }
1205 
1206 void SelectStmt::analyze(const Catalog_Namespace::Catalog& catalog,
1207  Analyzer::Query& query) const {
1208  query.set_stmt_type(kSELECT);
1209  query.set_limit(limit);
1210  if (offset < 0) {
1211  throw std::runtime_error("OFFSET cannot be negative.");
1212  }
1213  query.set_offset(offset);
1214  query_expr->analyze(catalog, query);
1215  if (orderby_clause.empty() && !query.get_is_distinct()) {
1216  query.set_order_by(nullptr);
1217  return;
1218  }
1219  const std::vector<std::shared_ptr<Analyzer::TargetEntry>>& tlist =
1220  query.get_targetlist();
1221  std::list<Analyzer::OrderEntry>* order_by = new std::list<Analyzer::OrderEntry>();
1222  if (!orderby_clause.empty()) {
1223  for (auto& p : orderby_clause) {
1224  int tle_no = p->get_colno();
1225  if (tle_no == 0) {
1226  // use column name
1227  // search through targetlist for matching name
1228  const std::string* name = p->get_column()->get_column();
1229  tle_no = 1;
1230  bool found = false;
1231  for (auto tle : tlist) {
1232  if (tle->get_resname() == *name) {
1233  found = true;
1234  break;
1235  }
1236  tle_no++;
1237  }
1238  if (!found) {
1239  throw std::runtime_error("invalid name in order by: " + *name);
1240  }
1241  }
1242  order_by->push_back(
1243  Analyzer::OrderEntry(tle_no, p->get_is_desc(), p->get_nulls_first()));
1244  }
1245  }
1246  if (query.get_is_distinct()) {
1247  // extend order_by to include all targetlist entries.
1248  for (int i = 1; i <= static_cast<int>(tlist.size()); i++) {
1249  bool in_orderby = false;
1250  std::for_each(order_by->begin(),
1251  order_by->end(),
1252  [&in_orderby, i](const Analyzer::OrderEntry& oe) {
1253  in_orderby = in_orderby || (i == oe.tle_no);
1254  });
1255  if (!in_orderby) {
1256  order_by->push_back(Analyzer::OrderEntry(i, false, false));
1257  }
1258  }
1259  }
1260  query.set_order_by(order_by);
1261 }
1262 
1263 std::string SelectEntry::to_string() const {
1264  std::string str = select_expr->to_string();
1265  if (alias != nullptr) {
1266  str += " AS " + *alias;
1267  }
1268  return str;
1269 }
1270 
1271 std::string TableRef::to_string() const {
1272  std::string str = *table_name;
1273  if (range_var != nullptr) {
1274  str += " " + *range_var;
1275  }
1276  return str;
1277 }
1278 
1279 std::string ColumnRef::to_string() const {
1280  std::string str;
1281  if (table == nullptr) {
1282  str = *column;
1283  } else if (column == nullptr) {
1284  str = *table + ".*";
1285  } else {
1286  str = *table + "." + *column;
1287  }
1288  return str;
1289 }
1290 
1291 std::string OperExpr::to_string() const {
1292  std::string op_str[] = {
1293  "=", "===", "<>", "<", ">", "<=", ">=", " AND ", " OR ", "NOT", "-", "+", "*", "/"};
1294  std::string str;
1295  if (optype == kUMINUS) {
1296  str = "-(" + left->to_string() + ")";
1297  } else if (optype == kNOT) {
1298  str = "NOT (" + left->to_string() + ")";
1299  } else if (optype == kARRAY_AT) {
1300  str = left->to_string() + "[" + right->to_string() + "]";
1301  } else if (optype == kUNNEST) {
1302  str = "UNNEST(" + left->to_string() + ")";
1303  } else if (optype == kIN) {
1304  str = "(" + left->to_string() + " IN " + right->to_string() + ")";
1305  } else {
1306  str = "(" + left->to_string() + op_str[optype] + right->to_string() + ")";
1307  }
1308  return str;
1309 }
1310 
1311 std::string InExpr::to_string() const {
1312  std::string str = arg->to_string();
1313  if (is_not) {
1314  str += " NOT IN ";
1315  } else {
1316  str += " IN ";
1317  }
1318  return str;
1319 }
1320 
1321 std::string ExistsExpr::to_string() const {
1322  return "EXISTS (" + query->to_string() + ")";
1323 }
1324 
1325 std::string SubqueryExpr::to_string() const {
1326  std::string str;
1327  str = "(";
1328  str += query->to_string();
1329  str += ")";
1330  return str;
1331 }
1332 
1333 std::string IsNullExpr::to_string() const {
1334  std::string str = arg->to_string();
1335  if (is_not) {
1336  str += " IS NOT NULL";
1337  } else {
1338  str += " IS NULL";
1339  }
1340  return str;
1341 }
1342 
1343 std::string InSubquery::to_string() const {
1344  std::string str = InExpr::to_string();
1345  str += subquery->to_string();
1346  return str;
1347 }
1348 
1349 std::string InValues::to_string() const {
1350  std::string str = InExpr::to_string() + "(";
1351  bool notfirst = false;
1352  for (auto& p : value_list) {
1353  if (notfirst) {
1354  str += ", ";
1355  } else {
1356  notfirst = true;
1357  }
1358  str += p->to_string();
1359  }
1360  str += ")";
1361  return str;
1362 }
1363 
1364 std::string BetweenExpr::to_string() const {
1365  std::string str = arg->to_string();
1366  if (is_not) {
1367  str += " NOT BETWEEN ";
1368  } else {
1369  str += " BETWEEN ";
1370  }
1371  str += lower->to_string() + " AND " + upper->to_string();
1372  return str;
1373 }
1374 
1375 std::string CharLengthExpr::to_string() const {
1376  std::string str;
1377  if (calc_encoded_length) {
1378  str = "CHAR_LENGTH (" + arg->to_string() + ")";
1379  } else {
1380  str = "LENGTH (" + arg->to_string() + ")";
1381  }
1382  return str;
1383 }
1384 
1385 std::string CardinalityExpr::to_string() const {
1386  std::string str = "CARDINALITY(" + arg->to_string() + ")";
1387  return str;
1388 }
1389 
1390 std::string LikeExpr::to_string() const {
1391  std::string str = arg->to_string();
1392  if (is_not) {
1393  str += " NOT LIKE ";
1394  } else {
1395  str += " LIKE ";
1396  }
1397  str += like_string->to_string();
1398  if (escape_string != nullptr) {
1399  str += " ESCAPE " + escape_string->to_string();
1400  }
1401  return str;
1402 }
1403 
1404 std::string RegexpExpr::to_string() const {
1405  std::string str = arg->to_string();
1406  if (is_not) {
1407  str += " NOT REGEXP ";
1408  } else {
1409  str += " REGEXP ";
1410  }
1411  str += pattern_string->to_string();
1412  if (escape_string != nullptr) {
1413  str += " ESCAPE " + escape_string->to_string();
1414  }
1415  return str;
1416 }
1417 
1418 std::string LikelihoodExpr::to_string() const {
1419  std::string str = " LIKELIHOOD ";
1420  str += arg->to_string();
1421  str += " ";
1422  str += boost::lexical_cast<std::string>(is_not ? 1.0 - likelihood : likelihood);
1423  return str;
1424 }
1425 
1426 std::string FunctionRef::to_string() const {
1427  std::string str = *name + "(";
1428  if (distinct) {
1429  str += "DISTINCT ";
1430  }
1431  if (arg == nullptr) {
1432  str += "*)";
1433  } else {
1434  str += arg->to_string() + ")";
1435  }
1436  return str;
1437 }
1438 
1439 std::string QuerySpec::to_string() const {
1440  std::string query_str = "SELECT ";
1441  if (is_distinct) {
1442  query_str += "DISTINCT ";
1443  }
1444  if (select_clause.empty()) {
1445  query_str += "* ";
1446  } else {
1447  bool notfirst = false;
1448  for (auto& p : select_clause) {
1449  if (notfirst) {
1450  query_str += ", ";
1451  } else {
1452  notfirst = true;
1453  }
1454  query_str += p->to_string();
1455  }
1456  }
1457  query_str += " FROM ";
1458  bool notfirst = false;
1459  for (auto& p : from_clause) {
1460  if (notfirst) {
1461  query_str += ", ";
1462  } else {
1463  notfirst = true;
1464  }
1465  query_str += p->to_string();
1466  }
1467  if (where_clause) {
1468  query_str += " WHERE " + where_clause->to_string();
1469  }
1470  if (!groupby_clause.empty()) {
1471  query_str += " GROUP BY ";
1472  bool notfirst = false;
1473  for (auto& p : groupby_clause) {
1474  if (notfirst) {
1475  query_str += ", ";
1476  } else {
1477  notfirst = true;
1478  }
1479  query_str += p->to_string();
1480  }
1481  }
1482  if (having_clause) {
1483  query_str += " HAVING " + having_clause->to_string();
1484  }
1485  query_str += ";";
1486  return query_str;
1487 }
1488 
1489 void InsertStmt::analyze(const Catalog_Namespace::Catalog& catalog,
1490  Analyzer::Query& query) const {
1491  query.set_stmt_type(kINSERT);
1492  const TableDescriptor* td = catalog.getMetadataForTable(*table);
1493  if (td == nullptr) {
1494  throw std::runtime_error("Table " + *table + " does not exist.");
1495  }
1496  if (td->isView) {
1497  throw std::runtime_error("Insert to views is not supported yet.");
1498  }
1500  query.set_result_table_id(td->tableId);
1501  std::list<int> result_col_list;
1502  if (column_list.empty()) {
1503  const std::list<const ColumnDescriptor*> all_cols =
1504  catalog.getAllColumnMetadataForTable(td->tableId, false, false, true);
1505  for (auto cd : all_cols) {
1506  result_col_list.push_back(cd->columnId);
1507  }
1508  } else {
1509  for (auto& c : column_list) {
1510  const ColumnDescriptor* cd = catalog.getMetadataForColumn(td->tableId, *c);
1511  if (cd == nullptr) {
1512  throw std::runtime_error("Column " + *c + " does not exist.");
1513  }
1514  result_col_list.push_back(cd->columnId);
1515  const auto& col_ti = cd->columnType;
1516  if (col_ti.get_physical_cols() > 0) {
1517  CHECK(cd->columnType.is_geometry());
1518  for (auto i = 1; i <= col_ti.get_physical_cols(); i++) {
1519  const ColumnDescriptor* pcd =
1520  catalog.getMetadataForColumn(td->tableId, cd->columnId + i);
1521  if (pcd == nullptr) {
1522  throw std::runtime_error("Column " + *c + "'s metadata is incomplete.");
1523  }
1524  result_col_list.push_back(pcd->columnId);
1525  }
1526  }
1527  }
1528  if (catalog.getAllColumnMetadataForTable(td->tableId, false, false, true).size() !=
1529  result_col_list.size()) {
1530  throw std::runtime_error("Insert into a subset of columns is not supported yet.");
1531  }
1532  }
1533  query.set_result_col_list(result_col_list);
1534 }
1535 
1536 size_t InsertValuesStmt::determineLeafIndex(const Catalog_Namespace::Catalog& catalog,
1537  size_t num_leafs) {
1538  const TableDescriptor* td = catalog.getMetadataForTable(*table);
1539  if (td == nullptr) {
1540  throw std::runtime_error("Table " + *table + " does not exist.");
1541  }
1542  if (td->isView) {
1543  throw std::runtime_error("Insert to views is not supported yet.");
1544  }
1545 
1546  if (td->partitions == "REPLICATED") {
1547  throw std::runtime_error("Cannot determine leaf on replicated table.");
1548  }
1549 
1550  if (0 == td->nShards) {
1551  std::random_device rd;
1552  std::mt19937_64 gen(rd());
1553  std::uniform_int_distribution<size_t> dis;
1554  const auto leaf_idx = dis(gen) % num_leafs;
1555  return leaf_idx;
1556  }
1557 
1558  size_t indexOfShardColumn = 0;
1559  const ColumnDescriptor* shardColumn = catalog.getShardColumnMetadataForTable(td);
1560  CHECK(shardColumn);
1561 
1562  if (column_list.empty()) {
1563  auto all_cols =
1564  catalog.getAllColumnMetadataForTable(td->tableId, false, false, false);
1565  auto iter = std::find(all_cols.begin(), all_cols.end(), shardColumn);
1566  CHECK(iter != all_cols.end());
1567  indexOfShardColumn = std::distance(all_cols.begin(), iter);
1568  } else {
1569  for (auto& c : column_list) {
1570  if (*c == shardColumn->columnName) {
1571  break;
1572  }
1573  indexOfShardColumn++;
1574  }
1575 
1576  if (indexOfShardColumn == column_list.size()) {
1577  throw std::runtime_error("No value defined for shard column.");
1578  }
1579  }
1580 
1581  if (indexOfShardColumn >= value_list.size()) {
1582  throw std::runtime_error("No value defined for shard column.");
1583  }
1584 
1585  auto& shardColumnValueExpr = *(std::next(value_list.begin(), indexOfShardColumn));
1586 
1587  Analyzer::Query query;
1588  auto e = shardColumnValueExpr->analyze(catalog, query);
1589  e = e->add_cast(shardColumn->columnType);
1590  const Analyzer::Constant* con = dynamic_cast<Analyzer::Constant*>(e.get());
1591  if (!con) {
1592  auto col_cast = dynamic_cast<const Analyzer::UOper*>(e.get());
1593  CHECK(col_cast);
1594  CHECK_EQ(kCAST, col_cast->get_optype());
1595  con = dynamic_cast<const Analyzer::Constant*>(col_cast->get_operand());
1596  }
1597  CHECK(con);
1598 
1599  Datum d = con->get_constval();
1600 
1601  auto shard_count = td->nShards * num_leafs;
1602  int64_t shardId = 0;
1603 
1604  if (con->get_is_null()) {
1606  shard_count);
1607  } else if (shardColumn->columnType.is_string()) {
1608  auto dictDesc =
1609  catalog.getMetadataForDict(shardColumn->columnType.get_comp_param(), true);
1610  auto str_id = dictDesc->stringDict->getOrAdd(*d.stringval);
1611  bool invalid = false;
1612 
1613  if (4 == shardColumn->columnType.get_size()) {
1614  invalid = str_id > max_valid_int_value<int32_t>();
1615  } else if (2 == shardColumn->columnType.get_size()) {
1616  invalid = str_id > max_valid_int_value<uint16_t>();
1617  } else if (1 == shardColumn->columnType.get_size()) {
1618  invalid = str_id > max_valid_int_value<uint8_t>();
1619  }
1620 
1621  if (invalid || str_id == inline_int_null_value<int32_t>()) {
1622  str_id = inline_fixed_encoding_null_val(shardColumn->columnType);
1623  }
1624  shardId = SHARD_FOR_KEY(str_id, shard_count);
1625  } else {
1626  switch (shardColumn->columnType.get_logical_size()) {
1627  case 8:
1628  shardId = SHARD_FOR_KEY(d.bigintval, shard_count);
1629  break;
1630  case 4:
1631  shardId = SHARD_FOR_KEY(d.intval, shard_count);
1632  break;
1633  case 2:
1634  shardId = SHARD_FOR_KEY(d.smallintval, shard_count);
1635  break;
1636  case 1:
1637  shardId = SHARD_FOR_KEY(d.tinyintval, shard_count);
1638  break;
1639  default:
1640  CHECK(false);
1641  }
1642  }
1643 
1644  return shardId / td->nShards;
1645 }
1646 
1647 void InsertValuesStmt::analyze(const Catalog_Namespace::Catalog& catalog,
1648  Analyzer::Query& query) const {
1649  InsertStmt::analyze(catalog, query);
1650  std::vector<std::shared_ptr<Analyzer::TargetEntry>>& tlist =
1651  query.get_targetlist_nonconst();
1652  const auto tableId = query.get_result_table_id();
1653  const std::list<const ColumnDescriptor*> non_phys_cols =
1654  catalog.getAllColumnMetadataForTable(tableId, false, false, false);
1655  if (non_phys_cols.size() != value_list.size()) {
1656  throw std::runtime_error("Insert has more target columns than expressions.");
1657  }
1658  std::list<int>::const_iterator it = query.get_result_col_list().begin();
1659  for (auto& v : value_list) {
1660  auto e = v->analyze(catalog, query);
1661  const ColumnDescriptor* cd =
1662  catalog.getMetadataForColumn(query.get_result_table_id(), *it);
1663  CHECK(cd);
1664  if (cd->columnType.get_notnull()) {
1665  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(e);
1666  if (c != nullptr && c->get_is_null()) {
1667  throw std::runtime_error("Cannot insert NULL into column " + cd->columnName);
1668  }
1669  }
1670  e = e->add_cast(cd->columnType);
1671  tlist.emplace_back(new Analyzer::TargetEntry("", e, false));
1672  ++it;
1673 
1674  const auto& col_ti = cd->columnType;
1675  if (col_ti.get_physical_cols() > 0) {
1676  CHECK(cd->columnType.is_geometry());
1677  auto c = dynamic_cast<const Analyzer::Constant*>(e.get());
1678  if (!c) {
1679  auto uoper = std::dynamic_pointer_cast<Analyzer::UOper>(e);
1680  if (uoper && uoper->get_optype() == kCAST) {
1681  c = dynamic_cast<const Analyzer::Constant*>(uoper->get_operand());
1682  }
1683  }
1684  bool is_null = false;
1685  std::string* geo_string{nullptr};
1686  if (c) {
1687  is_null = c->get_is_null();
1688  if (!is_null) {
1689  geo_string = c->get_constval().stringval;
1690  }
1691  }
1692  if (!is_null && !geo_string) {
1693  throw std::runtime_error("Expecting a WKT or WKB hex string for column " +
1694  cd->columnName);
1695  }
1696  std::vector<double> coords;
1697  std::vector<double> bounds;
1698  std::vector<int> ring_sizes;
1699  std::vector<int> poly_rings;
1700  int render_group =
1701  0; // @TODO simon.eves where to get render_group from in this context?!
1702  SQLTypeInfo import_ti{cd->columnType};
1703  if (!is_null) {
1705  *geo_string, import_ti, coords, bounds, ring_sizes, poly_rings)) {
1706  throw std::runtime_error("Cannot read geometry to insert into column " +
1707  cd->columnName);
1708  }
1709  if (cd->columnType.get_type() != import_ti.get_type()) {
1710  // allow POLYGON to be inserted into MULTIPOLYGON column
1711  if (!(import_ti.get_type() == SQLTypes::kPOLYGON &&
1713  throw std::runtime_error(
1714  "Imported geometry doesn't match the type of column " + cd->columnName);
1715  }
1716  }
1717  } else {
1718  // Special case for NULL POINT, push NULL representation to coords
1719  if (cd->columnType.get_type() == kPOINT) {
1720  if (!coords.empty()) {
1721  throw std::runtime_error("NULL POINT with unexpected coordinates in column " +
1722  cd->columnName);
1723  }
1724  coords.push_back(NULL_ARRAY_DOUBLE);
1725  coords.push_back(NULL_DOUBLE);
1726  }
1727  }
1728 
1729  // TODO: check if import SRID matches columns SRID, may need to transform before
1730  // inserting
1731 
1732  int nextColumnOffset = 1;
1733 
1734  const ColumnDescriptor* cd_coords = catalog.getMetadataForColumn(
1735  query.get_result_table_id(), cd->columnId + nextColumnOffset);
1736  CHECK(cd_coords);
1737  CHECK_EQ(cd_coords->columnType.get_type(), kARRAY);
1738  CHECK_EQ(cd_coords->columnType.get_subtype(), kTINYINT);
1739  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
1740  if (!is_null || cd->columnType.get_type() == kPOINT) {
1741  auto compressed_coords = Geospatial::compress_coords(coords, col_ti);
1742  for (auto cc : compressed_coords) {
1743  Datum d;
1744  d.tinyintval = cc;
1745  auto e = makeExpr<Analyzer::Constant>(kTINYINT, false, d);
1746  value_exprs.push_back(e);
1747  }
1748  }
1749  tlist.emplace_back(new Analyzer::TargetEntry(
1750  "",
1751  makeExpr<Analyzer::Constant>(cd_coords->columnType, is_null, value_exprs),
1752  false));
1753  ++it;
1754  nextColumnOffset++;
1755 
1756  if (cd->columnType.get_type() == kPOLYGON ||
1757  cd->columnType.get_type() == kMULTIPOLYGON) {
1758  // Put ring sizes array into separate physical column
1759  const ColumnDescriptor* cd_ring_sizes = catalog.getMetadataForColumn(
1760  query.get_result_table_id(), cd->columnId + nextColumnOffset);
1761  CHECK(cd_ring_sizes);
1762  CHECK_EQ(cd_ring_sizes->columnType.get_type(), kARRAY);
1763  CHECK_EQ(cd_ring_sizes->columnType.get_subtype(), kINT);
1764  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
1765  if (!is_null) {
1766  for (auto c : ring_sizes) {
1767  Datum d;
1768  d.intval = c;
1769  auto e = makeExpr<Analyzer::Constant>(kINT, false, d);
1770  value_exprs.push_back(e);
1771  }
1772  }
1773  tlist.emplace_back(new Analyzer::TargetEntry(
1774  "",
1775  makeExpr<Analyzer::Constant>(cd_ring_sizes->columnType, is_null, value_exprs),
1776  false));
1777  ++it;
1778  nextColumnOffset++;
1779 
1780  if (cd->columnType.get_type() == kMULTIPOLYGON) {
1781  // Put poly_rings array into separate physical column
1782  const ColumnDescriptor* cd_poly_rings = catalog.getMetadataForColumn(
1783  query.get_result_table_id(), cd->columnId + nextColumnOffset);
1784  CHECK(cd_poly_rings);
1785  CHECK_EQ(cd_poly_rings->columnType.get_type(), kARRAY);
1786  CHECK_EQ(cd_poly_rings->columnType.get_subtype(), kINT);
1787  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
1788  if (!is_null) {
1789  for (auto c : poly_rings) {
1790  Datum d;
1791  d.intval = c;
1792  auto e = makeExpr<Analyzer::Constant>(kINT, false, d);
1793  value_exprs.push_back(e);
1794  }
1795  }
1796  tlist.emplace_back(new Analyzer::TargetEntry(
1797  "",
1798  makeExpr<Analyzer::Constant>(
1799  cd_poly_rings->columnType, is_null, value_exprs),
1800  false));
1801  ++it;
1802  nextColumnOffset++;
1803  }
1804  }
1805 
1806  if (cd->columnType.get_type() == kLINESTRING ||
1807  cd->columnType.get_type() == kPOLYGON ||
1808  cd->columnType.get_type() == kMULTIPOLYGON) {
1809  const ColumnDescriptor* cd_bounds = catalog.getMetadataForColumn(
1810  query.get_result_table_id(), cd->columnId + nextColumnOffset);
1811  CHECK(cd_bounds);
1812  CHECK_EQ(cd_bounds->columnType.get_type(), kARRAY);
1813  CHECK_EQ(cd_bounds->columnType.get_subtype(), kDOUBLE);
1814  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
1815  if (!is_null) {
1816  for (auto b : bounds) {
1817  Datum d;
1818  d.doubleval = b;
1819  auto e = makeExpr<Analyzer::Constant>(kDOUBLE, false, d);
1820  value_exprs.push_back(e);
1821  }
1822  }
1823  tlist.emplace_back(new Analyzer::TargetEntry(
1824  "",
1825  makeExpr<Analyzer::Constant>(cd_bounds->columnType, is_null, value_exprs),
1826  false));
1827  ++it;
1828  nextColumnOffset++;
1829  }
1830 
1831  if (cd->columnType.get_type() == kPOLYGON ||
1832  cd->columnType.get_type() == kMULTIPOLYGON) {
1833  // Put render group into separate physical column
1834  const ColumnDescriptor* cd_render_group = catalog.getMetadataForColumn(
1835  query.get_result_table_id(), cd->columnId + nextColumnOffset);
1836  CHECK(cd_render_group);
1837  CHECK_EQ(cd_render_group->columnType.get_type(), kINT);
1838  Datum d;
1839  d.intval = render_group;
1840  tlist.emplace_back(new Analyzer::TargetEntry(
1841  "",
1842  makeExpr<Analyzer::Constant>(cd_render_group->columnType, is_null, d),
1843  false));
1844  ++it;
1845  nextColumnOffset++;
1846  }
1847  }
1848  }
1849 }
1850 
1851 void InsertValuesStmt::execute(const Catalog_Namespace::SessionInfo& session) {
1852  auto& catalog = session.getCatalog();
1853 
1854  if (!session.checkDBAccessPrivileges(
1856  throw std::runtime_error("User has no insert privileges on " + *table + ".");
1857  }
1858 
1859  auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
1862 
1863  Analyzer::Query query;
1864  analyze(catalog, query);
1865 
1866  // Acquire schema write lock -- leave data lock so the fragmenter can checkpoint. For
1867  // singleton inserts we just take a write lock on the schema, which prevents concurrent
1868  // inserts.
1869  auto result_table_id = query.get_result_table_id();
1870  const auto td_with_lock =
1872  catalog, result_table_id);
1873  auto td = td_with_lock();
1874  CHECK(td);
1875 
1876  if (td->isView) {
1877  throw std::runtime_error("Singleton inserts on views is not supported.");
1878  }
1879 
1881  RelAlgExecutor ra_executor(executor.get(), catalog);
1882 
1883  ra_executor.executeSimpleInsert(query);
1884 }
1885 
1886 void UpdateStmt::analyze(const Catalog_Namespace::Catalog& catalog,
1887  Analyzer::Query& query) const {
1888  throw std::runtime_error("UPDATE statement not supported yet.");
1889 }
1890 
1891 void DeleteStmt::analyze(const Catalog_Namespace::Catalog& catalog,
1892  Analyzer::Query& query) const {
1893  throw std::runtime_error("DELETE statement not supported yet.");
1894 }
1895 
1896 namespace {
1897 
1899  const auto& col_ti = cd.columnType;
1900  if (col_ti.is_integer() ||
1901  (col_ti.is_string() && col_ti.get_compression() == kENCODING_DICT) ||
1902  col_ti.is_time()) {
1903  return;
1904  }
1905  throw std::runtime_error("Cannot shard on type " + col_ti.get_type_name() +
1906  ", encoding " + col_ti.get_compression_name());
1907 }
1908 
1909 size_t shard_column_index(const std::string& name,
1910  const std::list<ColumnDescriptor>& columns) {
1911  size_t index = 1;
1912  for (const auto& cd : columns) {
1913  if (cd.columnName == name) {
1915  return index;
1916  }
1917  ++index;
1918  if (cd.columnType.is_geometry()) {
1919  index += cd.columnType.get_physical_cols();
1920  }
1921  }
1922  // Not found, return 0
1923  return 0;
1924 }
1925 
1926 size_t sort_column_index(const std::string& name,
1927  const std::list<ColumnDescriptor>& columns) {
1928  size_t index = 1;
1929  for (const auto& cd : columns) {
1930  if (boost::to_upper_copy<std::string>(cd.columnName) == name) {
1931  return index;
1932  }
1933  ++index;
1934  if (cd.columnType.is_geometry()) {
1935  index += cd.columnType.get_physical_cols();
1936  }
1937  }
1938  // Not found, return 0
1939  return 0;
1940 }
1941 
1942 void set_string_field(rapidjson::Value& obj,
1943  const std::string& field_name,
1944  const std::string& field_value,
1945  rapidjson::Document& document) {
1946  rapidjson::Value field_name_json_str;
1947  field_name_json_str.SetString(
1948  field_name.c_str(), field_name.size(), document.GetAllocator());
1949  rapidjson::Value field_value_json_str;
1950  field_value_json_str.SetString(
1951  field_value.c_str(), field_value.size(), document.GetAllocator());
1952  obj.AddMember(field_name_json_str, field_value_json_str, document.GetAllocator());
1953 }
1954 
1956  const ShardKeyDef* shard_key_def,
1957  const std::vector<SharedDictionaryDef>& shared_dict_defs) {
1958  rapidjson::Document document;
1959  auto& allocator = document.GetAllocator();
1960  rapidjson::Value arr(rapidjson::kArrayType);
1961  if (shard_key_def) {
1962  rapidjson::Value shard_key_obj(rapidjson::kObjectType);
1963  set_string_field(shard_key_obj, "type", "SHARD KEY", document);
1964  set_string_field(shard_key_obj, "name", shard_key_def->get_column(), document);
1965  arr.PushBack(shard_key_obj, allocator);
1966  }
1967  for (const auto& shared_dict_def : shared_dict_defs) {
1968  rapidjson::Value shared_dict_obj(rapidjson::kObjectType);
1969  set_string_field(shared_dict_obj, "type", "SHARED DICTIONARY", document);
1970  set_string_field(shared_dict_obj, "name", shared_dict_def.get_column(), document);
1972  shared_dict_obj, "foreign_table", shared_dict_def.get_foreign_table(), document);
1973  set_string_field(shared_dict_obj,
1974  "foreign_column",
1975  shared_dict_def.get_foreign_column(),
1976  document);
1977  arr.PushBack(shared_dict_obj, allocator);
1978  }
1979  rapidjson::StringBuffer buffer;
1980  rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
1981  arr.Accept(writer);
1982  return buffer.GetString();
1983 }
1984 
1985 template <typename LITERAL_TYPE,
1986  typename ASSIGNMENT,
1987  typename VALIDATE = DefaultValidate<LITERAL_TYPE>>
1988 decltype(auto) get_property_value(const NameValueAssign* p,
1989  ASSIGNMENT op,
1990  VALIDATE validate = VALIDATE()) {
1991  const auto val = validate(p);
1992  return op(val);
1993 }
1994 
1996  const NameValueAssign* p,
1997  const std::list<ColumnDescriptor>& columns) {
1998  auto assignment = [&td](const auto val) { td.storageType = val; };
1999  return get_property_value<StringLiteral, decltype(assignment), CaseSensitiveValidate>(
2000  p, assignment);
2001 }
2002 
2004  const NameValueAssign* p,
2005  const std::list<ColumnDescriptor>& columns) {
2006  return get_property_value<IntLiteral>(p,
2007  [&td](const auto val) { td.maxFragRows = val; });
2008 }
2009 
2011  const NameValueAssign* p,
2012  const std::list<ColumnDescriptor>& columns) {
2013  return get_property_value<IntLiteral>(
2014  p, [&df_td](const auto val) { df_td.maxFragRows = val; });
2015 }
2016 
2018  const NameValueAssign* p,
2019  const std::list<ColumnDescriptor>& columns) {
2020  return get_property_value<IntLiteral>(p,
2021  [&td](const auto val) { td.maxChunkSize = val; });
2022 }
2023 
2025  DataframeTableDescriptor& df_td,
2026  const NameValueAssign* p,
2027  const std::list<ColumnDescriptor>& columns) {
2028  return get_property_value<IntLiteral>(
2029  p, [&df_td](const auto val) { df_td.maxChunkSize = val; });
2030 }
2031 
2033  const NameValueAssign* p,
2034  const std::list<ColumnDescriptor>& columns) {
2035  return get_property_value<StringLiteral>(p, [&df_td](const auto val) {
2036  if (val.size() != 1) {
2037  throw std::runtime_error("Length of DELIMITER must be equal to 1.");
2038  }
2039  df_td.delimiter = val;
2040  });
2041 }
2042 
2044  const NameValueAssign* p,
2045  const std::list<ColumnDescriptor>& columns) {
2046  return get_property_value<StringLiteral>(p, [&df_td](const auto val) {
2047  if (val == "FALSE") {
2048  df_td.hasHeader = false;
2049  } else if (val == "TRUE") {
2050  df_td.hasHeader = true;
2051  } else {
2052  throw std::runtime_error("Option HEADER support only 'true' or 'false' values.");
2053  }
2054  });
2055 }
2056 
2058  const NameValueAssign* p,
2059  const std::list<ColumnDescriptor>& columns) {
2060  return get_property_value<IntLiteral>(p,
2061  [&td](const auto val) { td.fragPageSize = val; });
2062 }
2064  const NameValueAssign* p,
2065  const std::list<ColumnDescriptor>& columns) {
2066  return get_property_value<IntLiteral>(p, [&td](const auto val) { td.maxRows = val; });
2067 }
2068 
2070  const NameValueAssign* p,
2071  const std::list<ColumnDescriptor>& columns) {
2072  return get_property_value<IntLiteral>(
2073  p, [&df_td](const auto val) { df_td.skipRows = val; });
2074 }
2075 
2077  const NameValueAssign* p,
2078  const std::list<ColumnDescriptor>& columns) {
2079  return get_property_value<StringLiteral>(p, [&td](const auto partitions_uc) {
2080  if (partitions_uc != "SHARDED" && partitions_uc != "REPLICATED") {
2081  throw std::runtime_error("PARTITIONS must be SHARDED or REPLICATED");
2082  }
2083  if (td.shardedColumnId != 0 && partitions_uc == "REPLICATED") {
2084  throw std::runtime_error(
2085  "A table cannot be sharded and replicated at the same time");
2086  };
2087  td.partitions = partitions_uc;
2088  });
2089 }
2091  const NameValueAssign* p,
2092  const std::list<ColumnDescriptor>& columns) {
2093  if (!td.shardedColumnId) {
2094  throw std::runtime_error("SHARD KEY must be defined.");
2095  }
2096  return get_property_value<IntLiteral>(p, [&td](const auto shard_count) {
2097  if (g_leaf_count && shard_count % g_leaf_count) {
2098  throw std::runtime_error(
2099  "SHARD_COUNT must be a multiple of the number of leaves in the cluster.");
2100  }
2101  td.nShards = g_leaf_count ? shard_count / g_leaf_count : shard_count;
2102  if (!td.shardedColumnId && !td.nShards) {
2103  throw std::runtime_error(
2104  "Must specify the number of shards through the SHARD_COUNT option");
2105  };
2106  });
2107 }
2108 
2109 decltype(auto) get_vacuum_def(TableDescriptor& td,
2110  const NameValueAssign* p,
2111  const std::list<ColumnDescriptor>& columns) {
2112  return get_property_value<StringLiteral>(p, [&td](const auto vacuum_uc) {
2113  if (vacuum_uc != "IMMEDIATE" && vacuum_uc != "DELAYED") {
2114  throw std::runtime_error("VACUUM must be IMMEDIATE or DELAYED");
2115  }
2116  td.hasDeletedCol = boost::iequals(vacuum_uc, "IMMEDIATE") ? false : true;
2117  });
2118 }
2119 
2121  const NameValueAssign* p,
2122  const std::list<ColumnDescriptor>& columns) {
2123  return get_property_value<StringLiteral>(p, [&td, &columns](const auto sort_upper) {
2124  td.sortedColumnId = sort_column_index(sort_upper, columns);
2125  if (!td.sortedColumnId) {
2126  throw std::runtime_error("Specified sort column " + sort_upper + " doesn't exist");
2127  }
2128  });
2129 }
2130 
2131 static const std::map<const std::string, const TableDefFuncPtr> tableDefFuncMap = {
2132  {"fragment_size"s, get_frag_size_def},
2133  {"max_chunk_size"s, get_max_chunk_size_def},
2134  {"page_size"s, get_page_size_def},
2135  {"max_rows"s, get_max_rows_def},
2136  {"partitions"s, get_partions_def},
2137  {"shard_count"s, get_shard_count_def},
2138  {"vacuum"s, get_vacuum_def},
2139  {"sort_column"s, get_sort_column_def},
2140  {"storage_type"s, get_storage_type}};
2141 
2143  const std::unique_ptr<NameValueAssign>& p,
2144  const std::list<ColumnDescriptor>& columns) {
2145  const auto it = tableDefFuncMap.find(boost::to_lower_copy<std::string>(*p->get_name()));
2146  if (it == tableDefFuncMap.end()) {
2147  throw std::runtime_error(
2148  "Invalid CREATE TABLE option " + *p->get_name() +
2149  ". Should be FRAGMENT_SIZE, MAX_CHUNK_SIZE, PAGE_SIZE, MAX_ROWS, "
2150  "PARTITIONS, SHARD_COUNT, VACUUM, SORT_COLUMN, or STORAGE_TYPE.");
2151  }
2152  return it->second(td, p.get(), columns);
2153 }
2154 
2156  const std::unique_ptr<NameValueAssign>& p,
2157  const std::list<ColumnDescriptor>& columns) {
2158  const auto it = tableDefFuncMap.find(boost::to_lower_copy<std::string>(*p->get_name()));
2159  if (it == tableDefFuncMap.end()) {
2160  throw std::runtime_error(
2161  "Invalid CREATE TABLE AS option " + *p->get_name() +
2162  ". Should be FRAGMENT_SIZE, MAX_CHUNK_SIZE, PAGE_SIZE, MAX_ROWS, "
2163  "PARTITIONS, SHARD_COUNT, VACUUM, SORT_COLUMN, STORAGE_TYPE or "
2164  "USE_SHARED_DICTIONARIES.");
2165  }
2166  return it->second(td, p.get(), columns);
2167 }
2168 
2169 static const std::map<const std::string, const DataframeDefFuncPtr> dataframeDefFuncMap =
2170  {{"fragment_size"s, get_frag_size_dataframe_def},
2171  {"max_chunk_size"s, get_max_chunk_size_dataframe_def},
2172  {"skip_rows"s, get_skip_rows_def},
2173  {"delimiter"s, get_delimiter_def},
2174  {"header"s, get_header_def}};
2175 
2177  const std::unique_ptr<NameValueAssign>& p,
2178  const std::list<ColumnDescriptor>& columns) {
2179  const auto it =
2180  dataframeDefFuncMap.find(boost::to_lower_copy<std::string>(*p->get_name()));
2181  if (it == dataframeDefFuncMap.end()) {
2182  throw std::runtime_error(
2183  "Invalid CREATE DATAFRAME option " + *p->get_name() +
2184  ". Should be FRAGMENT_SIZE, MAX_CHUNK_SIZE, SKIP_ROWS, DELIMITER or HEADER.");
2185  }
2186  return it->second(df_td, p.get(), columns);
2187 }
2188 
2189 } // namespace
2190 
2191 CreateTableStmt::CreateTableStmt(const rapidjson::Value& payload) {
2192  CHECK(payload.HasMember("name"));
2193  table_ = std::make_unique<std::string>(json_str(payload["name"]));
2194  CHECK(payload.HasMember("elements"));
2195  CHECK(payload["elements"].IsArray());
2196 
2197  // TODO: support temporary tables
2198  is_temporary_ = false;
2199 
2200  if_not_exists_ = false;
2201  if (payload.HasMember("ifNotExists")) {
2202  if_not_exists_ = json_bool(payload["ifNotExists"]);
2203  }
2204 
2205  const auto elements = payload["elements"].GetArray();
2206  for (const auto& element : elements) {
2207  CHECK(element.IsObject());
2208  CHECK(element.HasMember("type"));
2209  if (json_str(element["type"]) == "SQL_COLUMN_DECLARATION") {
2210  CHECK(element.HasMember("name"));
2211  auto col_name = std::make_unique<std::string>(json_str(element["name"]));
2212  CHECK(element.HasMember("sqltype"));
2213  const auto sql_types = to_sql_type(json_str(element["sqltype"]));
2214 
2215  // decimal / numeric precision / scale
2216  int precision = -1;
2217  int scale = -1;
2218  if (element.HasMember("precision")) {
2219  precision = json_i64(element["precision"]);
2220  }
2221  if (element.HasMember("scale")) {
2222  scale = json_i64(element["scale"]);
2223  }
2224 
2225  std::optional<int64_t> array_size;
2226  if (element.HasMember("arraySize")) {
2227  // We do not yet support geo arrays
2228  array_size = json_i64(element["arraySize"]);
2229  }
2230  std::unique_ptr<SQLType> sql_type;
2231  if (element.HasMember("subtype")) {
2232  CHECK(element.HasMember("coordinateSystem"));
2233  const auto subtype_sql_types = to_sql_type(json_str(element["subtype"]));
2234  sql_type = std::make_unique<SQLType>(
2235  subtype_sql_types,
2236  static_cast<int>(sql_types),
2237  static_cast<int>(json_i64(element["coordinateSystem"])),
2238  false);
2239  } else if (precision > 0 && scale > 0) {
2240  sql_type = std::make_unique<SQLType>(sql_types,
2241  precision,
2242  scale,
2243  /*is_array=*/array_size.has_value(),
2244  array_size ? *array_size : -1);
2245  } else if (precision > 0) {
2246  sql_type = std::make_unique<SQLType>(sql_types,
2247  precision,
2248  0,
2249  /*is_array=*/array_size.has_value(),
2250  array_size ? *array_size : -1);
2251  } else {
2252  sql_type = std::make_unique<SQLType>(sql_types,
2253  /*is_array=*/array_size.has_value(),
2254  array_size ? *array_size : -1);
2255  }
2256  CHECK(sql_type);
2257 
2258  CHECK(element.HasMember("nullable"));
2259  const auto nullable = json_bool(element["nullable"]);
2260  std::unique_ptr<ColumnConstraintDef> constraint_def;
2261  if (!nullable) {
2262  constraint_def = std::make_unique<ColumnConstraintDef>(/*notnull=*/true,
2263  /*unique=*/false,
2264  /*primarykey=*/false,
2265  /*defaultval=*/nullptr);
2266  }
2267  std::unique_ptr<CompressDef> compress_def;
2268  if (element.HasMember("encodingType") && !element["encodingType"].IsNull()) {
2269  std::string encoding_type = json_str(element["encodingType"]);
2270  CHECK(element.HasMember("encodingSize"));
2271  auto encoding_name =
2272  std::make_unique<std::string>(json_str(element["encodingType"]));
2273  compress_def = std::make_unique<CompressDef>(encoding_name.release(),
2274  json_i64(element["encodingSize"]));
2275  }
2276  auto col_def = std::make_unique<ColumnDef>(
2277  col_name.release(),
2278  sql_type.release(),
2279  compress_def ? compress_def.release() : nullptr,
2280  constraint_def ? constraint_def.release() : nullptr);
2281  table_element_list_.emplace_back(std::move(col_def));
2282  } else if (json_str(element["type"]) == "SQL_COLUMN_CONSTRAINT") {
2283  CHECK(element.HasMember("name"));
2284  if (json_str(element["name"]) == "SHARD_KEY") {
2285  CHECK(element.HasMember("columns"));
2286  CHECK(element["columns"].IsArray());
2287  const auto& columns = element["columns"].GetArray();
2288  if (columns.Size() != size_t(1)) {
2289  throw std::runtime_error("Only one shard column is currently supported.");
2290  }
2291  auto shard_key_def = std::make_unique<ShardKeyDef>(json_str(columns[0]));
2292  table_element_list_.emplace_back(std::move(shard_key_def));
2293  } else if (json_str(element["name"]) == "SHARED_DICT") {
2294  CHECK(element.HasMember("columns"));
2295  CHECK(element["columns"].IsArray());
2296  const auto& columns = element["columns"].GetArray();
2297  if (columns.Size() != size_t(1)) {
2298  throw std::runtime_error(
2299  R"(Only one column per shared dictionary entry is currently supported. Use multiple SHARED DICT statements to share dictionaries from multiple columns.)");
2300  }
2301  CHECK(element.HasMember("references") && element["references"].IsObject());
2302  const auto& references = element["references"].GetObject();
2303  std::string references_table_name;
2304  if (references.HasMember("table")) {
2305  references_table_name = json_str(references["table"]);
2306  } else {
2307  references_table_name = *table_;
2308  }
2309  CHECK(references.HasMember("column"));
2310 
2311  auto shared_dict_def = std::make_unique<SharedDictionaryDef>(
2312  json_str(columns[0]), references_table_name, json_str(references["column"]));
2313  table_element_list_.emplace_back(std::move(shared_dict_def));
2314 
2315  } else {
2316  LOG(FATAL) << "Unsupported type for SQL_COLUMN_CONSTRAINT: "
2317  << json_str(element["name"]);
2318  }
2319  } else {
2320  LOG(FATAL) << "Unsupported element type for CREATE TABLE: "
2321  << element["type"].GetString();
2322  }
2323  }
2324 
2325  CHECK(payload.HasMember("options"));
2326  if (payload["options"].IsObject()) {
2327  for (const auto& option : payload["options"].GetObject()) {
2328  auto option_name = std::make_unique<std::string>(json_str(option.name));
2329  std::unique_ptr<Literal> literal_value;
2330  if (option.value.IsString()) {
2331  auto literal_string = std::make_unique<std::string>(json_str(option.value));
2332  literal_value = std::make_unique<StringLiteral>(literal_string.release());
2333  } else if (option.value.IsInt() || option.value.IsInt64()) {
2334  literal_value = std::make_unique<IntLiteral>(json_i64(option.value));
2335  } else if (option.value.IsNull()) {
2336  literal_value = std::make_unique<NullLiteral>();
2337  } else {
2338  throw std::runtime_error("Unable to handle literal for " + *option_name);
2339  }
2340  CHECK(literal_value);
2341 
2342  storage_options_.emplace_back(std::make_unique<NameValueAssign>(
2343  option_name.release(), literal_value.release()));
2344  }
2345  } else {
2346  CHECK(payload["options"].IsNull());
2347  }
2348 }
2349 
2350 void CreateTableStmt::executeDryRun(const Catalog_Namespace::SessionInfo& session,
2351  TableDescriptor& td,
2352  std::list<ColumnDescriptor>& columns,
2353  std::vector<SharedDictionaryDef>& shared_dict_defs) {
2354  std::unordered_set<std::string> uc_col_names;
2355  const auto& catalog = session.getCatalog();
2356  const ShardKeyDef* shard_key_def{nullptr};
2357  for (auto& e : table_element_list_) {
2358  if (dynamic_cast<SharedDictionaryDef*>(e.get())) {
2359  auto shared_dict_def = static_cast<SharedDictionaryDef*>(e.get());
2361  this, shared_dict_def, columns, shared_dict_defs, catalog);
2362  shared_dict_defs.push_back(*shared_dict_def);
2363  continue;
2364  }
2365  if (dynamic_cast<ShardKeyDef*>(e.get())) {
2366  if (shard_key_def) {
2367  throw std::runtime_error("Specified more than one shard key");
2368  }
2369  shard_key_def = static_cast<const ShardKeyDef*>(e.get());
2370  continue;
2371  }
2372  if (!dynamic_cast<ColumnDef*>(e.get())) {
2373  throw std::runtime_error("Table constraints are not supported yet.");
2374  }
2375  ColumnDef* coldef = static_cast<ColumnDef*>(e.get());
2376  ColumnDescriptor cd;
2377  cd.columnName = *coldef->get_column_name();
2379  setColumnDescriptor(cd, coldef);
2380  columns.push_back(cd);
2381  }
2382 
2383  ddl_utils::set_default_table_attributes(*table_, td, columns.size());
2384 
2385  if (shard_key_def) {
2386  td.shardedColumnId = shard_column_index(shard_key_def->get_column(), columns);
2387  if (!td.shardedColumnId) {
2388  throw std::runtime_error("Specified shard column " + shard_key_def->get_column() +
2389  " doesn't exist");
2390  }
2391  }
2392  if (is_temporary_) {
2394  } else {
2396  }
2397  if (!storage_options_.empty()) {
2398  for (auto& p : storage_options_) {
2399  get_table_definitions(td, p, columns);
2400  }
2401  }
2402  if (td.shardedColumnId && !td.nShards) {
2403  throw std::runtime_error("SHARD_COUNT needs to be specified with SHARD_KEY.");
2404  }
2405  td.keyMetainfo = serialize_key_metainfo(shard_key_def, shared_dict_defs);
2406 }
2407 
2408 void CreateTableStmt::execute(const Catalog_Namespace::SessionInfo& session) {
2409  auto& catalog = session.getCatalog();
2410 
2411  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
2414 
2415  // check access privileges
2418  throw std::runtime_error("Table " + *table_ +
2419  " will not be created. User has no create privileges.");
2420  }
2421 
2422  if (!catalog.validateNonExistentTableOrView(*table_, if_not_exists_)) {
2423  return;
2424  }
2425 
2426  TableDescriptor td;
2427  std::list<ColumnDescriptor> columns;
2428  std::vector<SharedDictionaryDef> shared_dict_defs;
2429 
2430  executeDryRun(session, td, columns, shared_dict_defs);
2431  td.userId = session.get_currentUser().userId;
2432 
2433  catalog.createShardedTable(td, columns, shared_dict_defs);
2434  // TODO (max): It's transactionally unsafe, should be fixed: we may create object w/o
2435  // privileges
2436  SysCatalog::instance().createDBObject(
2437  session.get_currentUser(), td.tableName, TableDBObjectType, catalog);
2438 }
2439 
2440 void CreateDataframeStmt::execute(const Catalog_Namespace::SessionInfo& session) {
2441  auto& catalog = session.getCatalog();
2442 
2443  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
2446 
2447  // check access privileges
2450  throw std::runtime_error("Table " + *table_ +
2451  " will not be created. User has no create privileges.");
2452  }
2453 
2454  if (catalog.getMetadataForTable(*table_) != nullptr) {
2455  throw std::runtime_error("Table " + *table_ + " already exists.");
2456  }
2458  std::list<ColumnDescriptor> columns;
2459  std::vector<SharedDictionaryDef> shared_dict_defs;
2460 
2461  std::unordered_set<std::string> uc_col_names;
2462  for (auto& e : table_element_list_) {
2463  if (dynamic_cast<SharedDictionaryDef*>(e.get())) {
2464  auto shared_dict_def = static_cast<SharedDictionaryDef*>(e.get());
2466  this, shared_dict_def, columns, shared_dict_defs, catalog);
2467  shared_dict_defs.push_back(*shared_dict_def);
2468  continue;
2469  }
2470  if (!dynamic_cast<ColumnDef*>(e.get())) {
2471  throw std::runtime_error("Table constraints are not supported yet.");
2472  }
2473  ColumnDef* coldef = static_cast<ColumnDef*>(e.get());
2474  ColumnDescriptor cd;
2475  cd.columnName = *coldef->get_column_name();
2476  const auto uc_col_name = boost::to_upper_copy<std::string>(cd.columnName);
2477  const auto it_ok = uc_col_names.insert(uc_col_name);
2478  if (!it_ok.second) {
2479  throw std::runtime_error("Column '" + cd.columnName + "' defined more than once");
2480  }
2481 
2482  setColumnDescriptor(cd, coldef);
2483  columns.push_back(cd);
2484  }
2485 
2486  df_td.tableName = *table_;
2487  df_td.nColumns = columns.size();
2488  df_td.isView = false;
2489  df_td.fragmenter = nullptr;
2494  df_td.maxRows = DEFAULT_MAX_ROWS;
2496  if (!storage_options_.empty()) {
2497  for (auto& p : storage_options_) {
2498  get_dataframe_definitions(df_td, p, columns);
2499  }
2500  }
2501  df_td.keyMetainfo = serialize_key_metainfo(nullptr, shared_dict_defs);
2502  df_td.userId = session.get_currentUser().userId;
2503  df_td.storageType = *filename_;
2504 
2505  catalog.createShardedTable(df_td, columns, shared_dict_defs);
2506  // TODO (max): It's transactionally unsafe, should be fixed: we may create object w/o
2507  // privileges
2508  SysCatalog::instance().createDBObject(
2509  session.get_currentUser(), df_td.tableName, TableDBObjectType, catalog);
2510 }
2511 
2512 std::shared_ptr<ResultSet> getResultSet(QueryStateProxy query_state_proxy,
2513  const std::string select_stmt,
2514  std::vector<TargetMetaInfo>& targets,
2515  bool validate_only = false,
2516  std::vector<size_t> outer_fragment_indices = {}) {
2517  auto const session = query_state_proxy.getQueryState().getConstSessionInfo();
2518  auto& catalog = session->getCatalog();
2519 
2521 #ifdef HAVE_CUDA
2522  const auto device_type = session->get_executor_device_type();
2523 #else
2524  const auto device_type = ExecutorDeviceType::CPU;
2525 #endif // HAVE_CUDA
2526  auto calcite_mgr = catalog.getCalciteMgr();
2527 
2528  // TODO MAT this should actually get the global or the session parameter for
2529  // view optimization
2530  const auto query_ra =
2531  calcite_mgr
2532  ->process(query_state_proxy, pg_shim(select_stmt), {}, true, false, false, true)
2533  .plan_result;
2534  RelAlgExecutor ra_executor(executor.get(), catalog, query_ra);
2536  const auto& query_hints = ra_executor.getParsedQueryHints();
2537  if (query_hints.cpu_mode) {
2538  co.device_type = ExecutorDeviceType::CPU;
2539  }
2541  // TODO(adb): Need a better method of dropping constants into this ExecutionOptions
2542  // struct
2543  ExecutionOptions eo = {false,
2544  true,
2545  false,
2546  true,
2547  false,
2548  false,
2549  validate_only,
2550  false,
2551  10000,
2552  false,
2553  false,
2554  0.9,
2555  false,
2556  1000,
2558  outer_fragment_indices};
2559  ExecutionResult result{std::make_shared<ResultSet>(std::vector<TargetInfo>{},
2562  nullptr,
2563  nullptr),
2564  {}};
2565  result = ra_executor.executeRelAlgQuery(co, eo, false, nullptr);
2566  targets = result.getTargetsMeta();
2567 
2568  return result.getRows();
2569 }
2570 
2571 size_t LocalConnector::getOuterFragmentCount(QueryStateProxy query_state_proxy,
2572  std::string& sql_query_string) {
2573  auto const session = query_state_proxy.getQueryState().getConstSessionInfo();
2574  auto& catalog = session->getCatalog();
2575 
2577 #ifdef HAVE_CUDA
2578  const auto device_type = session->get_executor_device_type();
2579 #else
2580  const auto device_type = ExecutorDeviceType::CPU;
2581 #endif // HAVE_CUDA
2582  auto calcite_mgr = catalog.getCalciteMgr();
2583 
2584  // TODO MAT this should actually get the global or the session parameter for
2585  // view optimization
2586  const auto query_ra =
2587  calcite_mgr
2588  ->process(
2589  query_state_proxy, pg_shim(sql_query_string), {}, true, false, false, true)
2590  .plan_result;
2591  RelAlgExecutor ra_executor(executor.get(), catalog, query_ra);
2592  const auto& query_hints = ra_executor.getParsedQueryHints();
2593  CompilationOptions co = {query_hints.cpu_mode ? ExecutorDeviceType::CPU : device_type,
2594  true,
2596  false};
2597  // TODO(adb): Need a better method of dropping constants into this ExecutionOptions
2598  // struct
2599  ExecutionOptions eo = {
2600  false, true, false, true, false, false, false, false, 10000, false, false, 0.9};
2601  return ra_executor.getOuterFragmentCount(co, eo);
2602 }
2603 
2604 AggregatedResult LocalConnector::query(QueryStateProxy query_state_proxy,
2605  std::string& sql_query_string,
2606  std::vector<size_t> outer_frag_indices,
2607  bool validate_only) {
2608  auto const session = query_state_proxy.getQueryState().getConstSessionInfo();
2609  // TODO(PS): Should we be using the shimmed query in getResultSet?
2610  std::string pg_shimmed_select_query = pg_shim(sql_query_string);
2611 
2612  std::vector<TargetMetaInfo> target_metainfos;
2613  auto result_rows = getResultSet(query_state_proxy,
2614  sql_query_string,
2615  target_metainfos,
2616  validate_only,
2617  outer_frag_indices);
2618  AggregatedResult res = {result_rows, target_metainfos};
2619  return res;
2620 }
2621 
2622 std::vector<AggregatedResult> LocalConnector::query(
2623  QueryStateProxy query_state_proxy,
2624  std::string& sql_query_string,
2625  std::vector<size_t> outer_frag_indices) {
2626  auto res = query(query_state_proxy, sql_query_string, outer_frag_indices, false);
2627  return {res};
2628 }
2629 
2630 void LocalConnector::insertDataToLeaf(const Catalog_Namespace::SessionInfo& session,
2631  const size_t leaf_idx,
2632  Fragmenter_Namespace::InsertData& insert_data) {
2633  CHECK(leaf_idx == 0);
2634  auto& catalog = session.getCatalog();
2635  auto created_td = catalog.getMetadataForTable(insert_data.tableId);
2636  ChunkKey chunkKey = {catalog.getCurrentDB().dbId, created_td->tableId};
2637  // TODO(adb): Ensure that we have previously obtained a write lock for this table's
2638  // data.
2639  created_td->fragmenter->insertDataNoCheckpoint(insert_data);
2640 }
2641 
2642 void LocalConnector::checkpoint(const Catalog_Namespace::SessionInfo& session,
2643  int table_id) {
2644  auto& catalog = session.getCatalog();
2645  catalog.checkpointWithAutoRollback(table_id);
2646 }
2647 
2648 void LocalConnector::rollback(const Catalog_Namespace::SessionInfo& session,
2649  int table_id) {
2650  auto& catalog = session.getCatalog();
2651  auto db_id = catalog.getDatabaseId();
2652  auto table_epochs = catalog.getTableEpochs(db_id, table_id);
2653  catalog.setTableEpochs(db_id, table_epochs);
2654 }
2655 
2656 std::list<ColumnDescriptor> LocalConnector::getColumnDescriptors(AggregatedResult& result,
2657  bool for_create) {
2658  std::list<ColumnDescriptor> column_descriptors;
2659  std::list<ColumnDescriptor> column_descriptors_for_create;
2660 
2661  int rowid_suffix = 0;
2662  for (const auto& target_metainfo : result.targets_meta) {
2663  ColumnDescriptor cd;
2664  cd.columnName = target_metainfo.get_resname();
2665  if (cd.columnName == "rowid") {
2666  cd.columnName += std::to_string(rowid_suffix++);
2667  }
2668  cd.columnType = target_metainfo.get_physical_type_info();
2669 
2670  ColumnDescriptor cd_for_create = cd;
2671 
2673  // we need to reset the comp param (as this points to the actual dictionary)
2674  if (cd.columnType.is_array()) {
2675  // for dict encoded arrays, it is always 4 bytes
2676  cd_for_create.columnType.set_comp_param(32);
2677  } else {
2678  cd_for_create.columnType.set_comp_param(cd.columnType.get_size() * 8);
2679  }
2680  }
2681 
2682  if (cd.columnType.is_date() && !cd.columnType.is_date_in_days()) {
2683  // default to kENCODING_DATE_IN_DAYS encoding
2685  cd_for_create.columnType.set_comp_param(0);
2686  }
2687 
2688  column_descriptors_for_create.push_back(cd_for_create);
2689  column_descriptors.push_back(cd);
2690  }
2691 
2692  if (for_create) {
2693  return column_descriptors_for_create;
2694  }
2695 
2696  return column_descriptors;
2697 }
2698 
2699 void InsertIntoTableAsSelectStmt::populateData(QueryStateProxy query_state_proxy,
2700  bool validate_table) {
2701  auto const session = query_state_proxy.getQueryState().getConstSessionInfo();
2702  auto& catalog = session->getCatalog();
2703  const auto td_with_lock =
2705  catalog, table_name_);
2706  const auto td = td_with_lock();
2708 
2709  LocalConnector local_connector;
2710  bool populate_table = false;
2711 
2712  if (leafs_connector_) {
2713  populate_table = true;
2714  } else {
2715  leafs_connector_ = &local_connector;
2716  if (!g_cluster) {
2717  populate_table = true;
2718  }
2719  }
2720 
2721  auto get_target_column_descriptors = [this, &catalog](const TableDescriptor* td) {
2722  std::vector<const ColumnDescriptor*> target_column_descriptors;
2723  if (column_list_.empty()) {
2724  auto list = catalog.getAllColumnMetadataForTable(td->tableId, false, false, false);
2725  target_column_descriptors = {std::begin(list), std::end(list)};
2726 
2727  } else {
2728  for (auto& c : column_list_) {
2729  const ColumnDescriptor* cd = catalog.getMetadataForColumn(td->tableId, *c);
2730  if (cd == nullptr) {
2731  throw std::runtime_error("Column " + *c + " does not exist.");
2732  }
2733  target_column_descriptors.push_back(cd);
2734  }
2735  }
2736 
2737  return target_column_descriptors;
2738  };
2739 
2740  bool is_temporary = table_is_temporary(td);
2741 
2742  // Don't allow simultaneous inserts
2743  const auto insert_data_lock =
2745 
2746  if (validate_table) {
2747  // check access privileges
2748  if (!td) {
2749  throw std::runtime_error("Table " + table_name_ + " does not exist.");
2750  }
2751  if (td->isView) {
2752  throw std::runtime_error("Insert to views is not supported yet.");
2753  }
2754 
2755  if (!session->checkDBAccessPrivileges(DBObjectType::TableDBObjectType,
2757  table_name_)) {
2758  throw std::runtime_error("User has no insert privileges on " + table_name_ + ".");
2759  }
2760 
2761  // only validate the select query so we get the target types
2762  // correctly, but do not populate the result set
2763  auto result = local_connector.query(query_state_proxy, select_query_, {}, true);
2764  auto source_column_descriptors = local_connector.getColumnDescriptors(result, false);
2765 
2766  std::vector<const ColumnDescriptor*> target_column_descriptors =
2767  get_target_column_descriptors(td);
2768  if (catalog.getAllColumnMetadataForTable(td->tableId, false, false, false).size() !=
2769  target_column_descriptors.size()) {
2770  throw std::runtime_error("Insert into a subset of columns is not supported yet.");
2771  }
2772 
2773  if (source_column_descriptors.size() != target_column_descriptors.size()) {
2774  throw std::runtime_error("The number of source and target columns does not match.");
2775  }
2776 
2777  for (int i = 0; i < source_column_descriptors.size(); i++) {
2778  const ColumnDescriptor* source_cd =
2779  &(*std::next(source_column_descriptors.begin(), i));
2780  const ColumnDescriptor* target_cd = target_column_descriptors.at(i);
2781 
2782  if (source_cd->columnType.get_type() != target_cd->columnType.get_type()) {
2783  auto type_cannot_be_cast = [](const auto& col_type) {
2784  return (col_type.is_time() || col_type.is_geometry() || col_type.is_array() ||
2785  col_type.is_boolean());
2786  };
2787 
2788  if (type_cannot_be_cast(source_cd->columnType) ||
2789  type_cannot_be_cast(target_cd->columnType)) {
2790  throw std::runtime_error("Source '" + source_cd->columnName + " " +
2791  source_cd->columnType.get_type_name() +
2792  "' and target '" + target_cd->columnName + " " +
2793  target_cd->columnType.get_type_name() +
2794  "' column types do not match.");
2795  }
2796  }
2797  if (source_cd->columnType.is_array()) {
2798  if (source_cd->columnType.get_subtype() != target_cd->columnType.get_subtype()) {
2799  throw std::runtime_error("Source '" + source_cd->columnName + " " +
2800  source_cd->columnType.get_type_name() +
2801  "' and target '" + target_cd->columnName + " " +
2802  target_cd->columnType.get_type_name() +
2803  "' array column element types do not match.");
2804  }
2805  }
2806 
2807  if (source_cd->columnType.is_decimal() ||
2808  source_cd->columnType.get_elem_type().is_decimal()) {
2809  SQLTypeInfo sourceType = source_cd->columnType;
2810  SQLTypeInfo targetType = target_cd->columnType;
2811 
2812  if (source_cd->columnType.is_array()) {
2813  sourceType = source_cd->columnType.get_elem_type();
2814  targetType = target_cd->columnType.get_elem_type();
2815  }
2816 
2817  if (sourceType.get_scale() != targetType.get_scale()) {
2818  throw std::runtime_error("Source '" + source_cd->columnName + " " +
2819  source_cd->columnType.get_type_name() +
2820  "' and target '" + target_cd->columnName + " " +
2821  target_cd->columnType.get_type_name() +
2822  "' decimal columns scales do not match.");
2823  }
2824  }
2825 
2826  if (source_cd->columnType.is_string()) {
2827  if (!target_cd->columnType.is_string()) {
2828  throw std::runtime_error("Source '" + source_cd->columnName + " " +
2829  source_cd->columnType.get_type_name() +
2830  "' and target '" + target_cd->columnName + " " +
2831  target_cd->columnType.get_type_name() +
2832  "' column types do not match.");
2833  }
2834  if (source_cd->columnType.get_compression() !=
2835  target_cd->columnType.get_compression()) {
2836  throw std::runtime_error("Source '" + source_cd->columnName + " " +
2837  source_cd->columnType.get_type_name() +
2838  "' and target '" + target_cd->columnName + " " +
2839  target_cd->columnType.get_type_name() +
2840  "' columns string encodings do not match.");
2841  }
2842  }
2843 
2844  if (source_cd->columnType.is_timestamp() && target_cd->columnType.is_timestamp()) {
2845  if (source_cd->columnType.get_dimension() !=
2846  target_cd->columnType.get_dimension()) {
2847  throw std::runtime_error("Source '" + source_cd->columnName + " " +
2848  source_cd->columnType.get_type_name() +
2849  "' and target '" + target_cd->columnName + " " +
2850  target_cd->columnType.get_type_name() +
2851  "' timestamp column precisions do not match.");
2852  }
2853  }
2854 
2855  if (!source_cd->columnType.is_string() && !source_cd->columnType.is_geometry() &&
2856  !source_cd->columnType.is_integer() && !source_cd->columnType.is_decimal() &&
2857  !source_cd->columnType.is_date() && !source_cd->columnType.is_time() &&
2858  !source_cd->columnType.is_timestamp() &&
2859  source_cd->columnType.get_size() > target_cd->columnType.get_size()) {
2860  throw std::runtime_error("Source '" + source_cd->columnName + " " +
2861  source_cd->columnType.get_type_name() +
2862  "' and target '" + target_cd->columnName + " " +
2863  target_cd->columnType.get_type_name() +
2864  "' column encoding sizes do not match.");
2865  }
2866  }
2867  }
2868 
2869  if (!populate_table) {
2870  return;
2871  }
2872 
2873  int64_t total_row_count = 0;
2874  int64_t total_source_query_time_ms = 0;
2875  int64_t total_target_value_translate_time_ms = 0;
2876  int64_t total_data_load_time_ms = 0;
2877 
2878  Fragmenter_Namespace::InsertDataLoader insertDataLoader(*leafs_connector_);
2879  auto target_column_descriptors = get_target_column_descriptors(td);
2880 
2881  auto outer_frag_count =
2882  leafs_connector_->getOuterFragmentCount(query_state_proxy, select_query_);
2883 
2884  size_t outer_frag_end = outer_frag_count == 0 ? 1 : outer_frag_count;
2885 
2886  try {
2887  for (size_t outer_frag_idx = 0; outer_frag_idx < outer_frag_end; outer_frag_idx++) {
2888  std::vector<size_t> allowed_outer_fragment_indices;
2889 
2890  if (outer_frag_count) {
2891  allowed_outer_fragment_indices.push_back(outer_frag_idx);
2892  }
2893 
2894  const auto query_clock_begin = timer_start();
2895  std::vector<AggregatedResult> query_results = leafs_connector_->query(
2896  query_state_proxy, select_query_, allowed_outer_fragment_indices);
2897  total_source_query_time_ms += timer_stop(query_clock_begin);
2898 
2899  for (auto& res : query_results) {
2900  auto result_rows = res.rs;
2901  result_rows->setGeoReturnType(ResultSet::GeoReturnType::GeoTargetValue);
2902  const auto num_rows = result_rows->rowCount();
2903 
2904  if (0 == num_rows) {
2905  continue;
2906  }
2907 
2908  total_row_count += num_rows;
2909 
2910  size_t leaf_count = leafs_connector_->leafCount();
2911 
2912  size_t max_number_of_rows_per_package =
2913  std::min(num_rows / leaf_count, 64UL * 1024UL);
2914 
2915  size_t start_row = 0;
2916  size_t num_rows_to_process = std::min(num_rows, max_number_of_rows_per_package);
2917 
2918  // ensure that at least one row is being processed
2919  num_rows_to_process = std::max(num_rows_to_process, 1UL);
2920 
2921  std::vector<std::unique_ptr<TargetValueConverter>> value_converters;
2922 
2924 
2925  const int num_worker_threads = std::thread::hardware_concurrency();
2926 
2927  std::vector<size_t> thread_start_idx(num_worker_threads),
2928  thread_end_idx(num_worker_threads);
2929  bool can_go_parallel = !result_rows->isTruncated() && num_rows_to_process > 20000;
2930 
2931  std::atomic<size_t> row_idx{0};
2932 
2933  auto convert_function = [&result_rows,
2934  &value_converters,
2935  &row_idx,
2936  &num_rows_to_process,
2937  &thread_start_idx,
2938  &thread_end_idx](const int thread_id) {
2939  const int num_cols = value_converters.size();
2940  const size_t start = thread_start_idx[thread_id];
2941  const size_t end = thread_end_idx[thread_id];
2942  size_t idx = 0;
2943  for (idx = start; idx < end; ++idx) {
2944  const auto result_row = result_rows->getRowAtNoTranslations(idx);
2945  if (!result_row.empty()) {
2946  size_t target_row = row_idx.fetch_add(1);
2947 
2948  if (target_row >= num_rows_to_process) {
2949  break;
2950  }
2951 
2952  for (unsigned int col = 0; col < num_cols; col++) {
2953  const auto& mapd_variant = result_row[col];
2954  value_converters[col]->convertToColumnarFormat(target_row, &mapd_variant);
2955  }
2956  }
2957  }
2958 
2959  thread_start_idx[thread_id] = idx;
2960  };
2961 
2962  auto single_threaded_convert_function = [&result_rows,
2963  &value_converters,
2964  &row_idx,
2965  &num_rows_to_process,
2966  &thread_start_idx,
2967  &thread_end_idx](const int thread_id) {
2968  const int num_cols = value_converters.size();
2969  const size_t start = thread_start_idx[thread_id];
2970  const size_t end = thread_end_idx[thread_id];
2971  size_t idx = 0;
2972  for (idx = start; idx < end; ++idx) {
2973  size_t target_row = row_idx.fetch_add(1);
2974 
2975  if (target_row >= num_rows_to_process) {
2976  break;
2977  }
2978  const auto result_row = result_rows->getNextRow(false, false);
2979  CHECK(!result_row.empty());
2980  for (unsigned int col = 0; col < num_cols; col++) {
2981  const auto& mapd_variant = result_row[col];
2982  value_converters[col]->convertToColumnarFormat(target_row, &mapd_variant);
2983  }
2984  }
2985 
2986  thread_start_idx[thread_id] = idx;
2987  };
2988 
2989  if (can_go_parallel) {
2990  const size_t entryCount = result_rows->entryCount();
2991  for (size_t i = 0,
2992  start_entry = 0,
2993  stride = (entryCount + num_worker_threads - 1) / num_worker_threads;
2994  i < num_worker_threads && start_entry < entryCount;
2995  ++i, start_entry += stride) {
2996  const auto end_entry = std::min(start_entry + stride, entryCount);
2997  thread_start_idx[i] = start_entry;
2998  thread_end_idx[i] = end_entry;
2999  }
3000 
3001  } else {
3002  thread_start_idx[0] = 0;
3003  thread_end_idx[0] = result_rows->entryCount();
3004  }
3005 
3006  std::shared_ptr<Executor> executor;
3007 
3010  }
3011 
3012  while (start_row < num_rows) {
3013  value_converters.clear();
3014  row_idx = 0;
3015  int colNum = 0;
3016  for (const auto targetDescriptor : target_column_descriptors) {
3017  auto sourceDataMetaInfo = res.targets_meta[colNum++];
3018 
3020  num_rows_to_process,
3021  catalog,
3022  sourceDataMetaInfo,
3023  targetDescriptor,
3024  targetDescriptor->columnType,
3025  !targetDescriptor->columnType.get_notnull(),
3026  result_rows->getRowSetMemOwner()->getLiteralStringDictProxy(),
3028  ? executor->getStringDictionaryProxy(
3029  sourceDataMetaInfo.get_type_info().get_comp_param(),
3030  result_rows->getRowSetMemOwner(),
3031  true)
3032  : nullptr};
3033  auto converter = factory.create(param);
3034  value_converters.push_back(std::move(converter));
3035  }
3036 
3037  const auto translate_clock_begin = timer_start();
3038  if (can_go_parallel) {
3039  std::vector<std::future<void>> worker_threads;
3040  for (int i = 0; i < num_worker_threads; ++i) {
3041  worker_threads.push_back(
3042  std::async(std::launch::async, convert_function, i));
3043  }
3044 
3045  for (auto& child : worker_threads) {
3046  child.wait();
3047  }
3048  for (auto& child : worker_threads) {
3049  child.get();
3050  }
3051 
3052  } else {
3053  single_threaded_convert_function(0);
3054  }
3055 
3056  // finalize the insert data
3057  {
3058  auto finalizer_func =
3059  [](std::unique_ptr<TargetValueConverter>::pointer targetValueConverter) {
3060  targetValueConverter->finalizeDataBlocksForInsertData();
3061  };
3062  std::vector<std::future<void>> worker_threads;
3063  for (auto& converterPtr : value_converters) {
3064  worker_threads.push_back(
3065  std::async(std::launch::async, finalizer_func, converterPtr.get()));
3066  }
3067 
3068  for (auto& child : worker_threads) {
3069  child.wait();
3070  }
3071  for (auto& child : worker_threads) {
3072  child.get();
3073  }
3074  }
3075 
3077  insert_data.databaseId = catalog.getCurrentDB().dbId;
3078  CHECK(td);
3079  insert_data.tableId = td->tableId;
3080  insert_data.numRows = num_rows_to_process;
3081 
3082  for (int col_idx = 0; col_idx < target_column_descriptors.size(); col_idx++) {
3083  value_converters[col_idx]->addDataBlocksToInsertData(insert_data);
3084  }
3085  total_target_value_translate_time_ms += timer_stop(translate_clock_begin);
3086 
3087  const auto data_load_clock_begin = timer_start();
3088  insertDataLoader.insertData(*session, insert_data);
3089  total_data_load_time_ms += timer_stop(data_load_clock_begin);
3090 
3091  start_row += num_rows_to_process;
3092  num_rows_to_process =
3093  std::min(num_rows - start_row, max_number_of_rows_per_package);
3094  }
3095  }
3096  }
3097  } catch (...) {
3098  try {
3099  leafs_connector_->rollback(*session, td->tableId);
3100  } catch (std::exception& e) {
3101  LOG(ERROR) << "An error occurred during ITAS rollback attempt. Table id: "
3102  << td->tableId << ", Error: " << e.what();
3103  }
3104  throw;
3105  }
3106 
3107  int64_t total_time_ms = total_source_query_time_ms +
3108  total_target_value_translate_time_ms + total_data_load_time_ms;
3109 
3110  VLOG(1) << "CTAS/ITAS " << total_row_count << " rows loaded in " << total_time_ms
3111  << "ms (outer_frag_count=" << outer_frag_count
3112  << ", query_time=" << total_source_query_time_ms
3113  << "ms, translation_time=" << total_target_value_translate_time_ms
3114  << "ms, data_load_time=" << total_data_load_time_ms
3115  << "ms)\nquery: " << select_query_;
3116 
3117  if (!is_temporary) {
3118  leafs_connector_->checkpoint(*session, td->tableId);
3119  }
3120 }
3121 
3122 void InsertIntoTableAsSelectStmt::execute(const Catalog_Namespace::SessionInfo& session) {
3123  auto session_copy = session;
3124  auto session_ptr = std::shared_ptr<Catalog_Namespace::SessionInfo>(
3125  &session_copy, boost::null_deleter());
3126  auto query_state = query_state::QueryState::create(session_ptr, select_query_);
3127  auto stdlog = STDLOG(query_state);
3128  populateData(query_state->createQueryStateProxy(), true);
3129 }
3130 
3131 void CreateTableAsSelectStmt::execute(const Catalog_Namespace::SessionInfo& session) {
3132  auto session_copy = session;
3133  auto session_ptr = std::shared_ptr<Catalog_Namespace::SessionInfo>(
3134  &session_copy, boost::null_deleter());
3135  auto query_state = query_state::QueryState::create(session_ptr, select_query_);
3136  auto stdlog = STDLOG(query_state);
3137 
3138  LocalConnector local_connector;
3139  auto& catalog = session.getCatalog();
3140  bool create_table = nullptr == leafs_connector_;
3141 
3142  if (create_table) {
3143  // check access privileges
3146  throw std::runtime_error("CTAS failed. Table " + table_name_ +
3147  " will not be created. User has no create privileges.");
3148  }
3149 
3150  if (catalog.getMetadataForTable(table_name_) != nullptr) {
3151  if (if_not_exists_) {
3152  return;
3153  }
3154  throw std::runtime_error("Table " + table_name_ +
3155  " already exists and no data was loaded.");
3156  }
3157 
3158  // only validate the select query so we get the target types
3159  // correctly, but do not populate the result set
3160  auto result = local_connector.query(
3161  query_state->createQueryStateProxy(), select_query_, {}, true);
3162  const auto column_descriptors_for_create =
3163  local_connector.getColumnDescriptors(result, true);
3164 
3165  // some validation as the QE might return some out of range column types
3166  for (auto& cd : column_descriptors_for_create) {
3167  if (cd.columnType.is_decimal() && cd.columnType.get_precision() > 18) {
3168  throw std::runtime_error(cd.columnName + ": Precision too high, max 18.");
3169  }
3170  }
3171 
3172  TableDescriptor td;
3173  td.tableName = table_name_;
3174  td.userId = session.get_currentUser().userId;
3175  td.nColumns = column_descriptors_for_create.size();
3176  td.isView = false;
3177  td.fragmenter = nullptr;
3183  if (is_temporary_) {
3185  } else {
3187  }
3188 
3189  bool use_shared_dictionaries = true;
3190 
3191  if (!storage_options_.empty()) {
3192  for (auto& p : storage_options_) {
3193  if (boost::to_lower_copy<std::string>(*p->get_name()) ==
3194  "use_shared_dictionaries") {
3195  const StringLiteral* literal =
3196  dynamic_cast<const StringLiteral*>(p->get_value());
3197  if (nullptr == literal) {
3198  throw std::runtime_error(
3199  "USE_SHARED_DICTIONARIES must be a string parameter");
3200  }
3201  std::string val = boost::to_lower_copy<std::string>(*literal->get_stringval());
3202  use_shared_dictionaries = val == "true" || val == "1" || val == "t";
3203  } else {
3204  get_table_definitions_for_ctas(td, p, column_descriptors_for_create);
3205  }
3206  }
3207  }
3208 
3209  std::vector<SharedDictionaryDef> sharedDictionaryRefs;
3210 
3211  if (use_shared_dictionaries) {
3212  const auto source_column_descriptors =
3213  local_connector.getColumnDescriptors(result, false);
3214  const auto mapping = catalog.getDictionaryToColumnMapping();
3215 
3216  for (auto& source_cd : source_column_descriptors) {
3217  const auto& ti = source_cd.columnType;
3218  if (ti.is_string()) {
3219  if (ti.get_compression() == kENCODING_DICT) {
3220  int dict_id = ti.get_comp_param();
3221  auto it = mapping.find(dict_id);
3222  if (mapping.end() != it) {
3223  const auto targetColumn = it->second;
3224  auto targetTable =
3225  catalog.getMetadataForTable(targetColumn->tableId, false);
3226  CHECK(targetTable);
3227  LOG(INFO) << "CTAS: sharing text dictionary on column "
3228  << source_cd.columnName << " with " << targetTable->tableName
3229  << "." << targetColumn->columnName;
3230  sharedDictionaryRefs.push_back(
3231  SharedDictionaryDef(source_cd.columnName,
3232  targetTable->tableName,
3233  targetColumn->columnName));
3234  }
3235  }
3236  }
3237  }
3238  }
3239 
3240  // currently no means of defining sharding in CTAS
3241  td.keyMetainfo = serialize_key_metainfo(nullptr, sharedDictionaryRefs);
3242 
3243  catalog.createTable(td, column_descriptors_for_create, sharedDictionaryRefs, true);
3244  // TODO (max): It's transactionally unsafe, should be fixed: we may create object
3245  // w/o privileges
3246  SysCatalog::instance().createDBObject(
3247  session.get_currentUser(), td.tableName, TableDBObjectType, catalog);
3248  }
3249 
3250  try {
3251  populateData(query_state->createQueryStateProxy(), false);
3252  } catch (...) {
3253  if (!g_cluster) {
3254  const TableDescriptor* created_td = catalog.getMetadataForTable(table_name_);
3255  if (created_td) {
3256  catalog.dropTable(created_td);
3257  }
3258  }
3259  throw;
3260  }
3261 }
3262 
3263 void DropTableStmt::execute(const Catalog_Namespace::SessionInfo& session) {
3264  auto& catalog = session.getCatalog();
3265 
3266  // TODO(adb): the catalog should be handling this locking.
3267  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
3270 
3271  const TableDescriptor* td{nullptr};
3272  std::unique_ptr<lockmgr::TableSchemaLockContainer<lockmgr::WriteLock>> td_with_lock;
3273 
3274  try {
3275  td_with_lock =
3276  std::make_unique<lockmgr::TableSchemaLockContainer<lockmgr::WriteLock>>(
3278  catalog, *table, false));
3279  td = (*td_with_lock)();
3280  } catch (const std::runtime_error& e) {
3281  if (if_exists) {
3282  return;
3283  } else {
3284  throw e;
3285  }
3286  }
3287 
3288  CHECK(td);
3289  CHECK(td_with_lock);
3290 
3291  // check access privileges
3292  if (!session.checkDBAccessPrivileges(
3294  throw std::runtime_error("Table " + *table +
3295  " will not be dropped. User has no proper privileges.");
3296  }
3297 
3299 
3300  auto table_data_write_lock =
3302  catalog.dropTable(td);
3303 
3304  // invalidate cached hashtable
3306 }
3307 
3308 void TruncateTableStmt::execute(const Catalog_Namespace::SessionInfo& session) {
3309  auto& catalog = session.getCatalog();
3310 
3311  // TODO: Removal of the FileMgr is not thread safe. Take a global system write lock
3312  // when truncating a table
3313  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
3316 
3317  const auto td_with_lock =
3319  catalog, *table, true);
3320  const auto td = td_with_lock();
3321  if (!td) {
3322  throw std::runtime_error("Table " + *table + " does not exist.");
3323  }
3324 
3325  // check access privileges
3326  std::vector<DBObject> privObjects;
3327  DBObject dbObject(*table, TableDBObjectType);
3328  dbObject.loadKey(catalog);
3330  privObjects.push_back(dbObject);
3331  if (!SysCatalog::instance().checkPrivileges(session.get_currentUser(), privObjects)) {
3332  throw std::runtime_error("Table " + *table + " will not be truncated. User " +
3333  session.get_currentUser().userName +
3334  " has no proper privileges.");
3335  }
3336 
3337  if (td->isView) {
3338  throw std::runtime_error(*table + " is a view. Cannot Truncate.");
3339  }
3341  auto table_data_write_lock =
3343  catalog.truncateTable(td);
3344 
3345  // invalidate cached hashtable
3347 }
3348 
3350  const TableDescriptor* td) {
3351  if (session.get_currentUser().isSuper ||
3352  session.get_currentUser().userId == td->userId) {
3353  return;
3354  }
3355  std::vector<DBObject> privObjects;
3356  DBObject dbObject(td->tableName, TableDBObjectType);
3357  dbObject.loadKey(session.getCatalog());
3359  privObjects.push_back(dbObject);
3360  if (!SysCatalog::instance().checkPrivileges(session.get_currentUser(), privObjects)) {
3361  throw std::runtime_error("Current user does not have the privilege to alter table: " +
3362  td->tableName);
3363  }
3364 }
3365 
3366 void RenameUserStmt::execute(const Catalog_Namespace::SessionInfo& session) {
3367  if (!session.get_currentUser().isSuper) {
3368  throw std::runtime_error("Only a super user can rename users.");
3369  }
3370 
3372  if (!SysCatalog::instance().getMetadataForUser(*username_, user)) {
3373  throw std::runtime_error("User " + *username_ + " does not exist.");
3374  }
3375 
3376  SysCatalog::instance().renameUser(*username_, *new_username_);
3377 }
3378 
3379 void RenameDatabaseStmt::execute(const Catalog_Namespace::SessionInfo& session) {
3381 
3382  // TODO: use database lock instead
3383  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
3386 
3387  if (!SysCatalog::instance().getMetadataForDB(*database_name_, db)) {
3388  throw std::runtime_error("Database " + *database_name_ + " does not exist.");
3389  }
3390 
3391  if (!session.get_currentUser().isSuper &&
3392  session.get_currentUser().userId != db.dbOwner) {
3393  throw std::runtime_error("Only a super user or the owner can rename the database.");
3394  }
3395 
3396  SysCatalog::instance().renameDatabase(*database_name_, *new_database_name_);
3397 }
3398 
3399 void RenameTableStmt::execute(const Catalog_Namespace::SessionInfo& session) {
3400  auto& catalog = session.getCatalog();
3401 
3402  // TODO(adb): the catalog should be handling this locking (see AddColumStmt)
3403  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
3406 
3407  const auto td_with_lock =
3409  catalog, *table, false);
3410  const auto td = td_with_lock();
3411  CHECK(td);
3412 
3413  check_alter_table_privilege(session, td);
3414  if (catalog.getMetadataForTable(*new_table_name) != nullptr) {
3415  throw std::runtime_error("Table or View " + *new_table_name + " already exists.");
3416  }
3417  catalog.renameTable(td, *new_table_name);
3418 }
3419 
3420 void DDLStmt::setColumnDescriptor(ColumnDescriptor& cd, const ColumnDef* coldef) {
3421  bool not_null;
3422  const ColumnConstraintDef* cc = coldef->get_column_constraint();
3423  if (cc == nullptr) {
3424  not_null = false;
3425  } else {
3426  not_null = cc->get_notnull();
3427  }
3429  cd,
3430  coldef->get_column_type(),
3431  not_null,
3432  coldef->get_compression());
3433 }
3434 
3435 void AddColumnStmt::check_executable(const Catalog_Namespace::SessionInfo& session,
3436  const TableDescriptor* td) {
3437  auto& catalog = session.getCatalog();
3438  if (!td) {
3439  throw std::runtime_error("Table " + *table + " does not exist.");
3440  } else {
3441  if (td->isView) {
3442  throw std::runtime_error("Adding columns to a view is not supported.");
3443  }
3444  if (table_is_temporary(td)) {
3445  throw std::runtime_error(
3446  "Adding columns to temporary tables is not yet supported.");
3447  }
3448  };
3449 
3450  check_alter_table_privilege(session, td);
3451 
3452  if (0 == coldefs.size()) {
3453  coldefs.push_back(std::move(coldef));
3454  }
3455 
3456  for (const auto& coldef : coldefs) {
3457  auto& new_column_name = *coldef->get_column_name();
3458  if (catalog.getMetadataForColumn(td->tableId, new_column_name) != nullptr) {
3459  throw std::runtime_error("Column " + new_column_name + " already exists.");
3460  }
3461  }
3462 }
3463 
3464 void AddColumnStmt::execute(const Catalog_Namespace::SessionInfo& session) {
3465  auto& catalog = session.getCatalog();
3466 
3467  // TODO(adb): the catalog should be handling this locking.
3468  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
3471 
3472  const auto td_with_lock =
3474  catalog, *table, true);
3475  const auto td = td_with_lock();
3476 
3477  check_executable(session, td);
3478 
3479  CHECK(td->fragmenter);
3480  if (std::dynamic_pointer_cast<Fragmenter_Namespace::SortedOrderFragmenter>(
3481  td->fragmenter)) {
3482  throw std::runtime_error(
3483  "Adding columns to a table is not supported when using the \"sort_column\" "
3484  "option.");
3485  }
3486 
3487  // Do not take a data write lock, as the fragmenter may call `deleteFragments` during
3488  // a cap operation. Note that the schema write lock will prevent concurrent inserts
3489  // along with all other queries.
3490 
3491  catalog.getSqliteConnector().query("BEGIN TRANSACTION");
3492  try {
3493  std::map<const std::string, const ColumnDescriptor> cds;
3494  std::map<const int, const ColumnDef*> cid_coldefs;
3495  for (const auto& coldef : coldefs) {
3496  ColumnDescriptor cd;
3497  setColumnDescriptor(cd, coldef.get());
3498  catalog.addColumn(*td, cd);
3499  cds.emplace(*coldef->get_column_name(), cd);
3500  cid_coldefs.emplace(cd.columnId, coldef.get());
3501 
3502  // expand geo column to phy columns
3503  if (cd.columnType.is_geometry()) {
3504  std::list<ColumnDescriptor> phy_geo_columns;
3505  catalog.expandGeoColumn(cd, phy_geo_columns);
3506  for (auto& cd : phy_geo_columns) {
3507  catalog.addColumn(*td, cd);
3508  cds.emplace(cd.columnName, cd);
3509  cid_coldefs.emplace(cd.columnId, nullptr);
3510  }
3511  }
3512  }
3513 
3514  std::unique_ptr<import_export::Loader> loader(new import_export::Loader(catalog, td));
3515  auto import_buffers = import_export::setup_column_loaders(td, loader.get());
3516  loader->setReplicating(true);
3517 
3518  // set_geo_physical_import_buffer below needs a sorted import_buffers
3519  std::sort(import_buffers.begin(),
3520  import_buffers.end(),
3521  [](decltype(import_buffers[0])& a, decltype(import_buffers[0])& b) {
3522  return a->getColumnDesc()->columnId < b->getColumnDesc()->columnId;
3523  });
3524 
3525  size_t nrows = td->fragmenter->getNumRows();
3526  // if sharded, get total nrows from all sharded tables
3527  if (td->nShards > 0) {
3528  const auto physical_tds = catalog.getPhysicalTablesDescriptors(td);
3529  nrows = 0;
3530  std::for_each(physical_tds.begin(), physical_tds.end(), [&nrows](const auto& td) {
3531  nrows += td->fragmenter->getNumRows();
3532  });
3533  }
3534  if (nrows > 0) {
3535  int skip_physical_cols = 0;
3536  for (const auto cit : cid_coldefs) {
3537  const auto cd = catalog.getMetadataForColumn(td->tableId, cit.first);
3538  const auto coldef = cit.second;
3539  const auto column_constraint = coldef ? coldef->get_column_constraint() : nullptr;
3540  std::string defaultval = "";
3541  if (column_constraint) {
3542  auto defaultlp = column_constraint->get_defaultval();
3543  auto defaultsp = dynamic_cast<const StringLiteral*>(defaultlp);
3544  defaultval = defaultsp ? *defaultsp->get_stringval()
3545  : defaultlp ? defaultlp->to_string() : "";
3546  }
3547  bool isnull = column_constraint ? (0 == defaultval.size()) : true;
3548  if (boost::to_upper_copy<std::string>(defaultval) == "NULL") {
3549  isnull = true;
3550  }
3551 
3552  if (isnull) {
3553  if (column_constraint && column_constraint->get_notnull()) {
3554  throw std::runtime_error("Default value required for column " +
3555  cd->columnName + " (NULL value not supported)");
3556  }
3557  }
3558 
3559  for (auto it = import_buffers.begin(); it < import_buffers.end(); ++it) {
3560  auto& import_buffer = *it;
3561  if (cd->columnId == import_buffer->getColumnDesc()->columnId) {
3562  if (coldef != nullptr ||
3563  skip_physical_cols-- <= 0) { // skip non-null phy col
3564  import_buffer->add_value(
3565  cd, defaultval, isnull, import_export::CopyParams(), nrows);
3566  if (cd->columnType.is_geometry()) {
3567  std::vector<double> coords, bounds;
3568  std::vector<int> ring_sizes, poly_rings;
3569  int render_group = 0;
3570  SQLTypeInfo tinfo{cd->columnType};
3572  tinfo,
3573  coords,
3574  bounds,
3575  ring_sizes,
3576  poly_rings,
3577  false)) {
3578  throw std::runtime_error("Bad geometry data: '" + defaultval + "'");
3579  }
3580  size_t col_idx = 1 + std::distance(import_buffers.begin(), it);
3582  cd,
3583  import_buffers,
3584  col_idx,
3585  coords,
3586  bounds,
3587  ring_sizes,
3588  poly_rings,
3589  render_group,
3590  nrows);
3591  // skip following phy cols
3592  skip_physical_cols = cd->columnType.get_physical_cols();
3593  }
3594  }
3595  break;
3596  }
3597  }
3598  }
3599  }
3600 
3601  if (!loader->loadNoCheckpoint(import_buffers, nrows)) {
3602  throw std::runtime_error("loadNoCheckpoint failed!");
3603  }
3604  catalog.roll(true);
3605  loader->checkpoint();
3606  catalog.getSqliteConnector().query("END TRANSACTION");
3607  } catch (...) {
3608  catalog.roll(false);
3609  catalog.getSqliteConnector().query("ROLLBACK TRANSACTION");
3610  throw;
3611  }
3612 }
3613 
3614 void DropColumnStmt::execute(const Catalog_Namespace::SessionInfo& session) {
3615  auto& catalog = session.getCatalog();
3616 
3617  // TODO(adb): the catalog should be handling this locking.
3618  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
3621 
3622  const auto td_with_lock =
3624  catalog, *table, true);
3625  const auto td = td_with_lock();
3626  if (!td) {
3627  throw std::runtime_error("Table " + *table + " does not exist.");
3628  }
3629  if (td->isView) {
3630  throw std::runtime_error("Dropping a column from a view is not supported.");
3631  }
3632  if (table_is_temporary(td)) {
3633  throw std::runtime_error(
3634  "Dropping a column from a temporary table is not yet supported.");
3635  }
3636 
3637  check_alter_table_privilege(session, td);
3638 
3639  for (const auto& column : columns) {
3640  if (nullptr == catalog.getMetadataForColumn(td->tableId, *column)) {
3641  throw std::runtime_error("Column " + *column + " does not exist.");
3642  }
3643  }
3644 
3645  if (td->nColumns <= (td->hasDeletedCol ? 3 : 2)) {
3646  throw std::runtime_error("Table " + *table + " has only one column.");
3647  }
3648 
3649  catalog.getSqliteConnector().query("BEGIN TRANSACTION");
3650  try {
3651  std::vector<int> columnIds;
3652  for (const auto& column : columns) {
3653  ColumnDescriptor cd = *catalog.getMetadataForColumn(td->tableId, *column);
3654  if (td->nShards > 0 && td->shardedColumnId == cd.columnId) {
3655  throw std::runtime_error("Dropping sharding column " + cd.columnName +
3656  " is not supported.");
3657  }
3658  catalog.dropColumn(*td, cd);
3659  columnIds.push_back(cd.columnId);
3660  for (int i = 0; i < cd.columnType.get_physical_cols(); i++) {
3661  const auto pcd = catalog.getMetadataForColumn(td->tableId, cd.columnId + i + 1);
3662  CHECK(pcd);
3663  catalog.dropColumn(*td, *pcd);
3664  columnIds.push_back(cd.columnId + i + 1);
3665  }
3666  }
3667 
3668  for (auto shard : catalog.getPhysicalTablesDescriptors(td)) {
3669  shard->fragmenter->dropColumns(columnIds);
3670  }
3671  // if test forces to rollback
3673  throw std::runtime_error("lol!");
3674  }
3675  catalog.roll(true);
3676  if (td->persistenceLevel == Data_Namespace::MemoryLevel::DISK_LEVEL) {
3677  catalog.checkpoint(td->tableId);
3678  }
3679  catalog.getSqliteConnector().query("END TRANSACTION");
3680  } catch (...) {
3681  catalog.setForReload(td->tableId);
3682  catalog.roll(false);
3683  catalog.getSqliteConnector().query("ROLLBACK TRANSACTION");
3684  throw;
3685  }
3686 
3687  // invalidate cached hashtable
3689 }
3690 
3691 void RenameColumnStmt::execute(const Catalog_Namespace::SessionInfo& session) {
3692  auto& catalog = session.getCatalog();
3693 
3694  const auto td_with_lock =
3696  catalog, *table, false);
3697  const auto td = td_with_lock();
3698  CHECK(td);
3699 
3700  check_alter_table_privilege(session, td);
3701  const ColumnDescriptor* cd = catalog.getMetadataForColumn(td->tableId, *column);
3702  if (cd == nullptr) {
3703  throw std::runtime_error("Column " + *column + " does not exist.");
3704  }
3705  if (catalog.getMetadataForColumn(td->tableId, *new_column_name) != nullptr) {
3706  throw std::runtime_error("Column " + *new_column_name + " already exists.");
3707  }
3708  catalog.renameColumn(td, cd, *new_column_name);
3709 }
3710 
3711 void CopyTableStmt::execute(const Catalog_Namespace::SessionInfo& session) {
3712  auto importer_factory = [](Catalog_Namespace::Catalog& catalog,
3713  const TableDescriptor* td,
3714  const std::string& file_path,
3715  const import_export::CopyParams& copy_params) {
3716  return std::make_unique<import_export::Importer>(catalog, td, file_path, copy_params);
3717  };
3718  return execute(session, importer_factory);
3719 }
3720 
3721 void CopyTableStmt::execute(const Catalog_Namespace::SessionInfo& session,
3722  const std::function<std::unique_ptr<import_export::Importer>(
3724  const TableDescriptor*,
3725  const std::string&,
3726  const import_export::CopyParams&)>& importer_factory) {
3727  boost::regex non_local_file_regex{R"(^\s*(s3|http|https)://.+)",
3728  boost::regex::extended | boost::regex::icase};
3729  if (!boost::regex_match(*file_pattern, non_local_file_regex)) {
3731  *file_pattern, ddl_utils::DataTransferType::IMPORT, true);
3732  }
3733 
3734  size_t rows_completed = 0;
3735  size_t rows_rejected = 0;
3736  size_t total_time = 0;
3737  bool load_truncated = false;
3738 
3739  // Prevent simultaneous import / truncate (see TruncateTableStmt::execute)
3740  const auto execute_read_lock = mapd_shared_lock<mapd_shared_mutex>(
3743 
3744  const TableDescriptor* td{nullptr};
3745  std::unique_ptr<lockmgr::TableSchemaLockContainer<lockmgr::ReadLock>> td_with_lock;
3746  std::unique_ptr<lockmgr::WriteLock> insert_data_lock;
3747 
3748  auto& catalog = session.getCatalog();
3749 
3750  try {
3751  td_with_lock = std::make_unique<lockmgr::TableSchemaLockContainer<lockmgr::ReadLock>>(
3753  catalog, *table));
3754  td = (*td_with_lock)();
3755  insert_data_lock = std::make_unique<lockmgr::WriteLock>(
3757  } catch (const std::runtime_error& e) {
3758  // noop
3759  // TODO(adb): We're really only interested in whether the table exists or not.
3760  // Create a more refined exception.
3761  }
3762 
3763  // if the table already exists, it's locked, so check access privileges
3764  if (td) {
3765  std::vector<DBObject> privObjects;
3766  DBObject dbObject(*table, TableDBObjectType);
3767  dbObject.loadKey(catalog);
3769  privObjects.push_back(dbObject);
3770  if (!SysCatalog::instance().checkPrivileges(session.get_currentUser(), privObjects)) {
3771  throw std::runtime_error("Violation of access privileges: user " +
3772  session.get_currentUser().userName +
3773  " has no insert privileges for table " + *table + ".");
3774  }
3775  }
3776 
3777  // since we'll have not only posix file names but also s3/hdfs/... url
3778  // we do not expand wildcard or check file existence here.
3779  // from here on, file_path contains something which may be a url
3780  // or a wildcard of file names;
3781  std::string file_path = *file_pattern;
3782  import_export::CopyParams copy_params;
3783  if (!options.empty()) {
3784  for (auto& p : options) {
3785  if (boost::iequals(*p->get_name(), "max_reject")) {
3786  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
3787  if (int_literal == nullptr) {
3788  throw std::runtime_error("max_reject option must be an integer.");
3789  }
3790  copy_params.max_reject = int_literal->get_intval();
3791  } else if (boost::iequals(*p->get_name(), "buffer_size")) {
3792  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
3793  if (int_literal == nullptr) {
3794  throw std::runtime_error("buffer_size option must be an integer.");
3795  }
3796  copy_params.buffer_size = int_literal->get_intval();
3797  } else if (boost::iequals(*p->get_name(), "threads")) {
3798  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
3799  if (int_literal == nullptr) {
3800  throw std::runtime_error("Threads option must be an integer.");
3801  }
3802  copy_params.threads = int_literal->get_intval();
3803  } else if (boost::iequals(*p->get_name(), "delimiter")) {
3804  const StringLiteral* str_literal =
3805  dynamic_cast<const StringLiteral*>(p->get_value());
3806  if (str_literal == nullptr) {
3807  throw std::runtime_error("Delimiter option must be a string.");
3808  } else if (str_literal->get_stringval()->length() != 1) {
3809  throw std::runtime_error("Delimiter must be a single character string.");
3810  }
3811  copy_params.delimiter = (*str_literal->get_stringval())[0];
3812  } else if (boost::iequals(*p->get_name(), "nulls")) {
3813  const StringLiteral* str_literal =
3814  dynamic_cast<const StringLiteral*>(p->get_value());
3815  if (str_literal == nullptr) {
3816  throw std::runtime_error("Nulls option must be a string.");
3817  }
3818  copy_params.null_str = *str_literal->get_stringval();
3819  } else if (boost::iequals(*p->get_name(), "header")) {
3820  const StringLiteral* str_literal =
3821  dynamic_cast<const StringLiteral*>(p->get_value());
3822  if (str_literal == nullptr) {
3823  throw std::runtime_error("Header option must be a boolean.");
3824  }
3825  copy_params.has_header = bool_from_string_literal(str_literal)
3828 #ifdef ENABLE_IMPORT_PARQUET
3829  } else if (boost::iequals(*p->get_name(), "parquet")) {
3830  const StringLiteral* str_literal =
3831  dynamic_cast<const StringLiteral*>(p->get_value());
3832  if (str_literal == nullptr) {
3833  throw std::runtime_error("Parquet option must be a boolean.");
3834  }
3835  if (bool_from_string_literal(str_literal)) {
3836  // not sure a parquet "table" type is proper, but to make code
3837  // look consistent in some places, let's set "table" type too
3838  copy_params.file_type = import_export::FileType::PARQUET;
3839  }
3840 #endif // ENABLE_IMPORT_PARQUET
3841  } else if (boost::iequals(*p->get_name(), "s3_access_key")) {
3842  const StringLiteral* str_literal =
3843  dynamic_cast<const StringLiteral*>(p->get_value());
3844  if (str_literal == nullptr) {
3845  throw std::runtime_error("Option s3_access_key must be a string.");
3846  }
3847  copy_params.s3_access_key = *str_literal->get_stringval();
3848  } else if (boost::iequals(*p->get_name(), "s3_secret_key")) {
3849  const StringLiteral* str_literal =
3850  dynamic_cast<const StringLiteral*>(p->get_value());
3851  if (str_literal == nullptr) {
3852  throw std::runtime_error("Option s3_secret_key must be a string.");
3853  }
3854  copy_params.s3_secret_key = *str_literal->get_stringval();
3855  } else if (boost::iequals(*p->get_name(), "s3_region")) {
3856  const StringLiteral* str_literal =
3857  dynamic_cast<const StringLiteral*>(p->get_value());
3858  if (str_literal == nullptr) {
3859  throw std::runtime_error("Option s3_region must be a string.");
3860  }
3861  copy_params.s3_region = *str_literal->get_stringval();
3862  } else if (boost::iequals(*p->get_name(), "s3_endpoint")) {
3863  const StringLiteral* str_literal =
3864  dynamic_cast<const StringLiteral*>(p->get_value());
3865  if (str_literal == nullptr) {
3866  throw std::runtime_error("Option s3_endpoint must be a string.");
3867  }
3868  copy_params.s3_endpoint = *str_literal->get_stringval();
3869  } else if (boost::iequals(*p->get_name(), "quote")) {
3870  const StringLiteral* str_literal =
3871  dynamic_cast<const StringLiteral*>(p->get_value());
3872  if (str_literal == nullptr) {
3873  throw std::runtime_error("Quote option must be a string.");
3874  } else if (str_literal->get_stringval()->length() != 1) {
3875  throw std::runtime_error("Quote must be a single character string.");
3876  }
3877  copy_params.quote = (*str_literal->get_stringval())[0];
3878  } else if (boost::iequals(*p->get_name(), "escape")) {
3879  const StringLiteral* str_literal =
3880  dynamic_cast<const StringLiteral*>(p->get_value());
3881  if (str_literal == nullptr) {
3882  throw std::runtime_error("Escape option must be a string.");
3883  } else if (str_literal->get_stringval()->length() != 1) {
3884  throw std::runtime_error("Escape must be a single character string.");
3885  }
3886  copy_params.escape = (*str_literal->get_stringval())[0];
3887  } else if (boost::iequals(*p->get_name(), "line_delimiter")) {
3888  const StringLiteral* str_literal =
3889  dynamic_cast<const StringLiteral*>(p->get_value());
3890  if (str_literal == nullptr) {
3891  throw std::runtime_error("Line_delimiter option must be a string.");
3892  } else if (str_literal->get_stringval()->length() != 1) {
3893  throw std::runtime_error("Line_delimiter must be a single character string.");
3894  }
3895  copy_params.line_delim = (*str_literal->get_stringval())[0];
3896  } else if (boost::iequals(*p->get_name(), "quoted")) {
3897  const StringLiteral* str_literal =
3898  dynamic_cast<const StringLiteral*>(p->get_value());
3899  if (str_literal == nullptr) {
3900  throw std::runtime_error("Quoted option must be a boolean.");
3901  }
3902  copy_params.quoted = bool_from_string_literal(str_literal);
3903  } else if (boost::iequals(*p->get_name(), "plain_text")) {
3904  const StringLiteral* str_literal =
3905  dynamic_cast<const StringLiteral*>(p->get_value());
3906  if (str_literal == nullptr) {
3907  throw std::runtime_error("plain_text option must be a boolean.");
3908  }
3909  copy_params.plain_text = bool_from_string_literal(str_literal);
3910  } else if (boost::iequals(*p->get_name(), "array_marker")) {
3911  const StringLiteral* str_literal =
3912  dynamic_cast<const StringLiteral*>(p->get_value());
3913  if (str_literal == nullptr) {
3914  throw std::runtime_error("Array Marker option must be a string.");
3915  } else if (str_literal->get_stringval()->length() != 2) {
3916  throw std::runtime_error(
3917  "Array Marker option must be exactly two characters. Default is {}.");
3918  }
3919  copy_params.array_begin = (*str_literal->get_stringval())[0];
3920  copy_params.array_end = (*str_literal->get_stringval())[1];
3921  } else if (boost::iequals(*p->get_name(), "array_delimiter")) {
3922  const StringLiteral* str_literal =
3923  dynamic_cast<const StringLiteral*>(p->get_value());
3924  if (str_literal == nullptr) {
3925  throw std::runtime_error("Array Delimiter option must be a string.");
3926  } else if (str_literal->get_stringval()->length() != 1) {
3927  throw std::runtime_error("Array Delimiter must be a single character string.");
3928  }
3929  copy_params.array_delim = (*str_literal->get_stringval())[0];
3930  } else if (boost::iequals(*p->get_name(), "lonlat")) {
3931  const StringLiteral* str_literal =
3932  dynamic_cast<const StringLiteral*>(p->get_value());
3933  if (str_literal == nullptr) {
3934  throw std::runtime_error("Lonlat option must be a boolean.");
3935  }
3936  copy_params.lonlat = bool_from_string_literal(str_literal);
3937  } else if (boost::iequals(*p->get_name(), "geo")) {
3938  const StringLiteral* str_literal =
3939  dynamic_cast<const StringLiteral*>(p->get_value());
3940  if (str_literal == nullptr) {
3941  throw std::runtime_error("Geo option must be a boolean.");
3942  }
3943  copy_params.file_type = bool_from_string_literal(str_literal)
3946  } else if (boost::iequals(*p->get_name(), "geo_coords_type")) {
3947  const StringLiteral* str_literal =
3948  dynamic_cast<const StringLiteral*>(p->get_value());
3949  if (str_literal == nullptr) {
3950  throw std::runtime_error("'geo_coords_type' option must be a string");
3951  }
3952  const std::string* s = str_literal->get_stringval();
3953  if (boost::iequals(*s, "geography")) {
3954  throw std::runtime_error(
3955  "GEOGRAPHY coords type not yet supported. Please use GEOMETRY.");
3956  // copy_params.geo_coords_type = kGEOGRAPHY;
3957  } else if (boost::iequals(*s, "geometry")) {
3958  copy_params.geo_coords_type = kGEOMETRY;
3959  } else {
3960  throw std::runtime_error(
3961  "Invalid string for 'geo_coords_type' option (must be 'GEOGRAPHY' or "
3962  "'GEOMETRY'): " +
3963  *s);
3964  }
3965  } else if (boost::iequals(*p->get_name(), "geo_coords_encoding")) {
3966  const StringLiteral* str_literal =
3967  dynamic_cast<const StringLiteral*>(p->get_value());
3968  if (str_literal == nullptr) {
3969  throw std::runtime_error("'geo_coords_encoding' option must be a string");
3970  }
3971  const std::string* s = str_literal->get_stringval();
3972  if (boost::iequals(*s, "none")) {
3973  copy_params.geo_coords_encoding = kENCODING_NONE;
3974  copy_params.geo_coords_comp_param = 0;
3975  } else if (boost::iequals(*s, "compressed(32)")) {
3976  copy_params.geo_coords_encoding = kENCODING_GEOINT;
3977  copy_params.geo_coords_comp_param = 32;
3978  } else {
3979  throw std::runtime_error(
3980  "Invalid string for 'geo_coords_encoding' option (must be 'NONE' or "
3981  "'COMPRESSED(32)'): " +
3982  *s);
3983  }
3984  } else if (boost::iequals(*p->get_name(), "geo_coords_srid")) {
3985  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
3986  if (int_literal == nullptr) {
3987  throw std::runtime_error("'geo_coords_srid' option must be an integer");
3988  }
3989  const int srid = int_literal->get_intval();
3990  if (srid == 4326 || srid == 3857 || srid == 900913) {
3991  copy_params.geo_coords_srid = srid;
3992  } else {
3993  throw std::runtime_error(
3994  "Invalid value for 'geo_coords_srid' option (must be 4326, 3857, or "
3995  "900913): " +
3996  std::to_string(srid));
3997  }
3998  } else if (boost::iequals(*p->get_name(), "geo_layer_name")) {
3999  const StringLiteral* str_literal =
4000  dynamic_cast<const StringLiteral*>(p->get_value());
4001  if (str_literal == nullptr) {
4002  throw std::runtime_error("'geo_layer_name' option must be a string");
4003  }
4004  const std::string* layer_name = str_literal->get_stringval();
4005  if (layer_name) {
4006  copy_params.geo_layer_name = *layer_name;
4007  } else {
4008  throw std::runtime_error("Invalid value for 'geo_layer_name' option");
4009  }
4010  } else if (boost::iequals(*p->get_name(), "partitions")) {
4011  if (copy_params.file_type == import_export::FileType::POLYGON) {
4012  const auto partitions =
4013  static_cast<const StringLiteral*>(p->get_value())->get_stringval();
4014  CHECK(partitions);
4015  const auto partitions_uc = boost::to_upper_copy<std::string>(*partitions);
4016  if (partitions_uc != "REPLICATED") {
4017  throw std::runtime_error("PARTITIONS must be REPLICATED for geo COPY");
4018  }
4019  _geo_copy_from_partitions = partitions_uc;
4020  } else {
4021  throw std::runtime_error("PARTITIONS option not supported for non-geo COPY: " +
4022  *p->get_name());
4023  }
4024  } else if (boost::iequals(*p->get_name(), "geo_assign_render_groups")) {
4025  const StringLiteral* str_literal =
4026  dynamic_cast<const StringLiteral*>(p->get_value());
4027  if (str_literal == nullptr) {
4028  throw std::runtime_error("geo_assign_render_groups option must be a boolean.");
4029  }
4030  copy_params.geo_assign_render_groups = bool_from_string_literal(str_literal);
4031  } else if (boost::iequals(*p->get_name(), "geo_explode_collections")) {
4032  const StringLiteral* str_literal =
4033  dynamic_cast<const StringLiteral*>(p->get_value());
4034  if (str_literal == nullptr) {
4035  throw std::runtime_error("geo_explode_collections option must be a boolean.");
4036  }
4037  copy_params.geo_explode_collections = bool_from_string_literal(str_literal);
4038  } else {
4039  throw std::runtime_error("Invalid option for COPY: " + *p->get_name());
4040  }
4041  }
4042  }
4043 
4044  std::string tr;
4045  if (copy_params.file_type == import_export::FileType::POLYGON) {
4046  // geo import
4047  // we do nothing here, except stash the parameters so we can
4048  // do the import when we unwind to the top of the handler
4049  _geo_copy_from_file_name = file_path;
4050  _geo_copy_from_copy_params = copy_params;
4051  _was_geo_copy_from = true;
4052 
4053  // the result string
4054  // @TODO simon.eves put something more useful in here
4055  // except we really can't because we haven't done the import yet!
4056  if (td) {
4057  tr = std::string("Appending geo to table '") + *table + std::string("'...");
4058  } else {
4059  tr = std::string("Creating table '") + *table +
4060  std::string("' and importing geo...");
4061  }
4062  } else {
4063  if (td) {
4064  CHECK(td_with_lock);
4065 
4066  // regular import
4067  auto importer = importer_factory(catalog, td, file_path, copy_params);
4068  auto ms = measure<>::execution([&]() {
4069  auto res = importer->import();
4070  rows_completed += res.rows_completed;
4071  rows_rejected += res.rows_rejected;
4072  load_truncated = res.load_truncated;
4073  });
4074  total_time += ms;
4075 
4076  // results
4077  if (load_truncated || rows_rejected > copy_params.max_reject) {
4078  LOG(ERROR) << "COPY exited early due to reject records count during multi file "
4079  "processing ";
4080  // if we have crossed the truncated load threshold
4081  load_truncated = true;
4082  success = false;
4083  }
4084  if (!load_truncated) {
4085  tr = std::string("Loaded: " + std::to_string(rows_completed) +
4086  " recs, Rejected: " + std::to_string(rows_rejected) +
4087  " recs in " + std::to_string((double)total_time / 1000.0) +
4088  " secs");
4089  } else {
4090  tr = std::string("Loader truncated due to reject count. Processed : " +
4091  std::to_string(rows_completed) + " recs, Rejected: " +
4092  std::to_string(rows_rejected) + " recs in " +
4093  std::to_string((double)total_time / 1000.0) + " secs");
4094  }
4095  } else {
4096  throw std::runtime_error("Table '" + *table + "' must exist before COPY FROM");
4097  }
4098  }
4099 
4100  return_message.reset(new std::string(tr));
4101  LOG(INFO) << tr;
4102 }
4103 
4104 // CREATE ROLE payroll_dept_role;
4105 void CreateRoleStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4106  const auto& currentUser = session.get_currentUser();
4107  if (!currentUser.isSuper) {
4108  throw std::runtime_error("CREATE ROLE " + get_role() +
4109  " failed. It can only be executed by super user.");
4110  }
4111  SysCatalog::instance().createRole(get_role());
4112 }
4113 
4114 // DROP ROLE payroll_dept_role;
4115 void DropRoleStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4116  const auto& currentUser = session.get_currentUser();
4117  if (!currentUser.isSuper) {
4118  throw std::runtime_error("DROP ROLE " + get_role() +
4119  " failed. It can only be executed by super user.");
4120  }
4121  auto* rl = SysCatalog::instance().getRoleGrantee(get_role());
4122  if (!rl) {
4123  throw std::runtime_error("DROP ROLE " + get_role() +
4124  " failed because role with this name does not exist.");
4125  }
4126  SysCatalog::instance().dropRole(get_role());
4127 }
4128 
4129 std::vector<std::string> splitObjectHierName(const std::string& hierName) {
4130  std::vector<std::string> componentNames;
4131  boost::split(componentNames, hierName, boost::is_any_of("."));
4132  return componentNames;
4133 }
4134 
4135 std::string extractObjectNameFromHierName(const std::string& objectHierName,
4136  const std::string& objectType,
4138  std::string objectName;
4139  std::vector<std::string> componentNames = splitObjectHierName(objectHierName);
4140  if (objectType.compare("DATABASE") == 0) {
4141  if (componentNames.size() == 1) {
4142  objectName = componentNames[0];
4143  } else {
4144  throw std::runtime_error("DB object name is not correct " + objectHierName);
4145  }
4146  } else {
4147  if (objectType.compare("TABLE") == 0 || objectType.compare("DASHBOARD") == 0 ||
4148  objectType.compare("VIEW") == 0 || objectType.compare("SERVER") == 0) {
4149  switch (componentNames.size()) {
4150  case (1): {
4151  objectName = componentNames[0];
4152  break;
4153  }
4154  case (2): {
4155  objectName = componentNames[1];
4156  break;
4157  }
4158  default: {
4159  throw std::runtime_error("DB object name is not correct " + objectHierName);
4160  }
4161  }
4162  } else {
4163  throw std::runtime_error("DB object type " + objectType + " is not supported.");
4164  }
4165  }
4166  return objectName;
4167 }
4168 
4169 static std::pair<AccessPrivileges, DBObjectType> parseStringPrivs(
4170  const std::string& privs,
4171  const DBObjectType& objectType,
4172  const std::string& object_name) {
4173  static const std::map<std::pair<const std::string, const DBObjectType>,
4174  std::pair<const AccessPrivileges, const DBObjectType>>
4175  privileges_lookup{
4176  {{"ALL"s, DatabaseDBObjectType},
4179  {{"ALL"s, DashboardDBObjectType},
4182  {{"ALL"s, ServerDBObjectType},
4184 
4185  {{"CREATE TABLE"s, DatabaseDBObjectType},
4187  {{"CREATE"s, DatabaseDBObjectType},
4189  {{"SELECT"s, DatabaseDBObjectType},
4191  {{"INSERT"s, DatabaseDBObjectType},
4193  {{"TRUNCATE"s, DatabaseDBObjectType},
4195  {{"UPDATE"s, DatabaseDBObjectType},
4197  {{"DELETE"s, DatabaseDBObjectType},
4199  {{"DROP"s, DatabaseDBObjectType},
4201  {{"ALTER"s, DatabaseDBObjectType},
4203 
4204  {{"SELECT"s, TableDBObjectType},
4206  {{"INSERT"s, TableDBObjectType},
4208  {{"TRUNCATE"s, TableDBObjectType},
4210  {{"UPDATE"s, TableDBObjectType},
4212  {{"DELETE"s, TableDBObjectType},
4214  {{"DROP"s, TableDBObjectType},
4216  {{"ALTER"s, TableDBObjectType},
4218 
4219  {{"CREATE VIEW"s, DatabaseDBObjectType},
4221  {{"SELECT VIEW"s, DatabaseDBObjectType},
4223  {{"DROP VIEW"s, DatabaseDBObjectType},
4225  {{"SELECT"s, ViewDBObjectType},
4228 
4229  {{"CREATE DASHBOARD"s, DatabaseDBObjectType},
4231  {{"EDIT DASHBOARD"s, DatabaseDBObjectType},
4233  {{"VIEW DASHBOARD"s, DatabaseDBObjectType},
4235  {{"DELETE DASHBOARD"s, DatabaseDBObjectType},
4237  {{"VIEW"s, DashboardDBObjectType},
4239  {{"EDIT"s, DashboardDBObjectType},
4241  {{"DELETE"s, DashboardDBObjectType},
4243 
4244  {{"CREATE SERVER"s, DatabaseDBObjectType},
4246  {{"DROP SERVER"s, DatabaseDBObjectType},
4248  {{"DROP"s, ServerDBObjectType},
4250  {{"ALTER SERVER"s, DatabaseDBObjectType},
4252  {{"ALTER"s, ServerDBObjectType},
4254 
4255  {{"VIEW SQL EDITOR"s, DatabaseDBObjectType},
4257  {{"ACCESS"s, DatabaseDBObjectType},
4259 
4260  auto result = privileges_lookup.find(std::make_pair(privs, objectType));
4261  if (result == privileges_lookup.end()) {
4262  throw std::runtime_error("Privileges " + privs + " on DB object " + object_name +
4263  " are not correct.");
4264  }
4265  return result->second;
4266 }
4267 
4268 static DBObject createObject(const std::string& objectName, DBObjectType objectType) {
4269  if (objectType == DashboardDBObjectType) {
4270  int32_t dashboard_id = -1;
4271  if (!objectName.empty()) {
4272  try {
4273  dashboard_id = stoi(objectName);
4274  } catch (const std::exception&) {
4275  throw std::runtime_error(
4276  "Privileges on dashboards should be changed via integer dashboard ID");
4277  }
4278  }
4279  return DBObject(dashboard_id, objectType);
4280  } else {
4281  return DBObject(objectName, objectType);
4282  }
4283 }
4284 
4285 // GRANT SELECT/INSERT/CREATE ON TABLE payroll_table TO payroll_dept_role;
4286 void GrantPrivilegesStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4287  auto& catalog = session.getCatalog();
4288  const auto& currentUser = session.get_currentUser();
4289  const auto parserObjectType = boost::to_upper_copy<std::string>(get_object_type());
4290  const auto objectName =
4291  extractObjectNameFromHierName(get_object(), parserObjectType, catalog);
4292  auto objectType = DBObjectTypeFromString(parserObjectType);
4293  if (objectType == ServerDBObjectType && !g_enable_fsi) {
4294  throw std::runtime_error("GRANT failed. SERVER object unrecognized.");
4295  }
4296  DBObject dbObject = createObject(objectName, objectType);
4297  /* verify object ownership if not suser */
4298  if (!currentUser.isSuper) {
4299  if (!SysCatalog::instance().verifyDBObjectOwnership(currentUser, dbObject, catalog)) {
4300  throw std::runtime_error(
4301  "GRANT failed. It can only be executed by super user or owner of the "
4302  "object.");
4303  }
4304  }
4305  /* set proper values of privileges & grant them to the object */
4306  std::vector<DBObject> objects(get_privs().size(), dbObject);
4307  for (size_t i = 0; i < get_privs().size(); ++i) {
4308  std::pair<AccessPrivileges, DBObjectType> priv = parseStringPrivs(
4309  boost::to_upper_copy<std::string>(get_privs()[i]), objectType, get_object());
4310  objects[i].setPrivileges(priv.first);
4311  objects[i].setPermissionType(priv.second);
4312  if (priv.second == ServerDBObjectType && !g_enable_fsi) {
4313  throw std::runtime_error("GRANT failed. SERVER object unrecognized.");
4314  }
4315  }
4316  SysCatalog::instance().grantDBObjectPrivilegesBatch(grantees, objects, catalog);
4317 }
4318 
4319 // REVOKE SELECT/INSERT/CREATE ON TABLE payroll_table FROM payroll_dept_role;
4320 void RevokePrivilegesStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4321  auto& catalog = session.getCatalog();
4322  const auto& currentUser = session.get_currentUser();
4323  const auto parserObjectType = boost::to_upper_copy<std::string>(get_object_type());
4324  const auto objectName =
4325  extractObjectNameFromHierName(get_object(), parserObjectType, catalog);
4326  auto objectType = DBObjectTypeFromString(parserObjectType);
4327  if (objectType == ServerDBObjectType && !g_enable_fsi) {
4328  throw std::runtime_error("REVOKE failed. SERVER object unrecognized.");
4329  }
4330  DBObject dbObject = createObject(objectName, objectType);
4331  /* verify object ownership if not suser */
4332  if (!currentUser.isSuper) {
4333  if (!SysCatalog::instance().verifyDBObjectOwnership(currentUser, dbObject, catalog)) {
4334  throw std::runtime_error(
4335  "REVOKE failed. It can only be executed by super user or owner of the "
4336  "object.");
4337  }
4338  }
4339  /* set proper values of privileges & grant them to the object */
4340  std::vector<DBObject> objects(get_privs().size(), dbObject);
4341  for (size_t i = 0; i < get_privs().size(); ++i) {
4342  std::pair<AccessPrivileges, DBObjectType> priv = parseStringPrivs(
4343  boost::to_upper_copy<std::string>(get_privs()[i]), objectType, get_object());
4344  objects[i].setPrivileges(priv.first);
4345  objects[i].setPermissionType(priv.second);
4346  if (priv.second == ServerDBObjectType && !g_enable_fsi) {
4347  throw std::runtime_error("REVOKE failed. SERVER object unrecognized.");
4348  }
4349  }
4350  SysCatalog::instance().revokeDBObjectPrivilegesBatch(grantees, objects, catalog);
4351 }
4352 
4353 // NOTE: not used currently, will we ever use it?
4354 // SHOW ON TABLE payroll_table FOR payroll_dept_role;
4355 void ShowPrivilegesStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4356  auto& catalog = session.getCatalog();
4357  const auto& currentUser = session.get_currentUser();
4358  const auto parserObjectType = boost::to_upper_copy<std::string>(get_object_type());
4359  const auto objectName =
4360  extractObjectNameFromHierName(get_object(), parserObjectType, catalog);
4361  auto objectType = DBObjectTypeFromString(parserObjectType);
4362  DBObject dbObject = createObject(objectName, objectType);
4363  /* verify object ownership if not suser */
4364  if (!currentUser.isSuper) {
4365  if (!SysCatalog::instance().verifyDBObjectOwnership(currentUser, dbObject, catalog)) {
4366  throw std::runtime_error(
4367  "SHOW ON " + get_object() + " FOR " + get_role() +
4368  " failed. It can only be executed by super user or owner of the object.");
4369  }
4370  }
4371  /* get values of privileges for the object and report them */
4372  SysCatalog::instance().getDBObjectPrivileges(get_role(), dbObject, catalog);
4373  AccessPrivileges privs = dbObject.getPrivileges();
4374  printf("\nPRIVILEGES ON %s FOR %s ARE SET AS FOLLOWING: ",
4375  get_object().c_str(),
4376  get_role().c_str());
4377 
4378  if (objectType == DBObjectType::DatabaseDBObjectType) {
4380  printf(" CREATE");
4381  }
4383  printf(" DROP");
4384  }
4385  } else if (objectType == DBObjectType::TableDBObjectType) {
4387  printf(" CREATE");
4388  }
4390  printf(" DROP");
4391  }
4393  printf(" SELECT");
4394  }
4396  printf(" INSERT");
4397  }
4399  printf(" UPDATE");
4400  }
4402  printf(" DELETE");
4403  }
4405  printf(" TRUNCATE");
4406  }
4408  printf(" ALTER");
4409  }
4410  } else if (objectType == DBObjectType::DashboardDBObjectType) {
4412  printf(" CREATE");
4413  }
4415  printf(" DELETE");
4416  }
4418  printf(" VIEW");
4419  }
4421  printf(" EDIT");
4422  }
4423  } else if (objectType == DBObjectType::ViewDBObjectType) {
4425  printf(" CREATE");
4426  }
4428  printf(" DROP");
4429  }
4431  printf(" SELECT");
4432  }
4434  printf(" INSERT");
4435  }
4437  printf(" UPDATE");
4438  }
4440  printf(" DELETE");
4441  }
4442  }
4443  printf(".\n");
4444 }
4445 
4446 // GRANT payroll_dept_role TO joe;
4447 void GrantRoleStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4448  const auto& currentUser = session.get_currentUser();
4449  if (!currentUser.isSuper) {
4450  throw std::runtime_error(
4451  "GRANT failed, because it can only be executed by super user.");
4452  }
4453  if (std::find(get_grantees().begin(), get_grantees().end(), OMNISCI_ROOT_USER) !=
4454  get_grantees().end()) {
4455  throw std::runtime_error(
4456  "Request to grant role failed because mapd root user has all privileges by "
4457  "default.");
4458  }
4459  SysCatalog::instance().grantRoleBatch(get_roles(), get_grantees());
4460 }
4461 
4462 // REVOKE payroll_dept_role FROM joe;get_users
4463 void RevokeRoleStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4464  const auto& currentUser = session.get_currentUser();
4465  if (!currentUser.isSuper) {
4466  throw std::runtime_error(
4467  "REVOKE failed, because it can only be executed by super user.");
4468  }
4469  if (std::find(get_grantees().begin(), get_grantees().end(), OMNISCI_ROOT_USER) !=
4470  get_grantees().end()) {
4471  throw std::runtime_error(
4472  "Request to revoke role failed because privileges can not be revoked from mapd "
4473  "root user.");
4474  }
4475  SysCatalog::instance().revokeRoleBatch(get_roles(), get_grantees());
4476 }
4477 
4478 void ShowCreateTableStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4479  using namespace Catalog_Namespace;
4480 
4481  const auto execute_read_lock = mapd_shared_lock<mapd_shared_mutex>(
4484 
4485  auto& catalog = session.getCatalog();
4486  const TableDescriptor* td = catalog.getMetadataForTable(*table_, false);
4487  if (!td) {
4488  throw std::runtime_error("Table/View " + *table_ + " does not exist.");
4489  }
4490 
4491  DBObject dbObject(td->tableName, td->isView ? ViewDBObjectType : TableDBObjectType);
4492  dbObject.loadKey(catalog);
4493  std::vector<DBObject> privObjects = {dbObject};
4494 
4495  if (!SysCatalog::instance().hasAnyPrivileges(session.get_currentUser(), privObjects)) {
4496  throw std::runtime_error("Table/View " + *table_ + " does not exist.");
4497  }
4498  if (td->isView && !session.get_currentUser().isSuper) {
4499  // TODO: we need to run a validate query to ensure the user has access to the
4500  // underlying table, but we do not have any of the machinery in here. Disable for
4501  // now, unless the current user is a super user.
4502  throw std::runtime_error("SHOW CREATE TABLE not yet supported for views");
4503  }
4504 
4505  create_stmt_ = catalog.dumpCreateTable(td);
4506 }
4507 
4508 void ExportQueryStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4509  auto session_copy = session;
4510  auto session_ptr = std::shared_ptr<Catalog_Namespace::SessionInfo>(
4511  &session_copy, boost::null_deleter());
4512  auto query_state = query_state::QueryState::create(session_ptr, *select_stmt);
4513  auto stdlog = STDLOG(query_state);
4514  auto query_state_proxy = query_state->createQueryStateProxy();
4515 
4516  auto& catalog = session.getCatalog();
4517 
4518  LocalConnector local_connector;
4519 
4520  if (!leafs_connector_) {
4521  leafs_connector_ = &local_connector;
4522  }
4523 
4524  import_export::CopyParams copy_params;
4525  // @TODO(se) move rest to CopyParams when we have a Thrift endpoint
4528  std::string layer_name;
4533 
4534  parseOptions(copy_params, file_type, layer_name, file_compression, array_null_handling);
4535 
4536  if (file_path->empty()) {
4537  throw std::runtime_error("Invalid file path for COPY TO");
4538  } else if (!boost::filesystem::path(*file_path).is_absolute()) {
4539  std::string file_name = boost::filesystem::path(*file_path).filename().string();
4540  std::string file_dir =
4541  catalog.getBasePath() + "/mapd_export/" + session.get_session_id() + "/";
4542  if (!boost::filesystem::exists(file_dir)) {
4543  if (!boost::filesystem::create_directories(file_dir)) {
4544  throw std::runtime_error("Directory " + file_dir + " cannot be created.");
4545  }
4546  }
4547  *file_path = file_dir + file_name;
4548  } else {
4549  // Above branch will create a new file in the mapd_export directory. If that path is
4550  // not exercised, go through applicable file path validations.
4553  }
4554 
4555  // get column info
4556  auto column_info_result =
4557  local_connector.query(query_state_proxy, *select_stmt, {}, true);
4558 
4559  // create exporter for requested file type
4560  auto query_exporter = import_export::QueryExporter::create(file_type);
4561 
4562  // default layer name to file path stem if it wasn't specified
4563  if (layer_name.size() == 0) {
4564  layer_name = boost::filesystem::path(*file_path).stem().string();
4565  }
4566 
4567  // begin export
4568  query_exporter->beginExport(*file_path,
4569  layer_name,
4570  copy_params,
4571  column_info_result.targets_meta,
4572  file_compression,
4573  array_null_handling);
4574 
4575  // how many fragments?
4576  size_t outer_frag_count =
4577  leafs_connector_->getOuterFragmentCount(query_state_proxy, *select_stmt);
4578  size_t outer_frag_end = outer_frag_count == 0 ? 1 : outer_frag_count;
4579 
4580  // loop fragments
4581  for (size_t outer_frag_idx = 0; outer_frag_idx < outer_frag_end; outer_frag_idx++) {
4582  // limit the query to just this fragment
4583  std::vector<size_t> allowed_outer_fragment_indices;
4584  if (outer_frag_count) {
4585  allowed_outer_fragment_indices.push_back(outer_frag_idx);
4586  }
4587 
4588  // run the query
4589  std::vector<AggregatedResult> query_results = leafs_connector_->query(
4590  query_state_proxy, *select_stmt, allowed_outer_fragment_indices);
4591 
4592  // export the results
4593  query_exporter->exportResults(query_results);
4594  }
4595 
4596  // end export
4597  query_exporter->endExport();
4598 }
4599 
4600 void ExportQueryStmt::parseOptions(
4601  import_export::CopyParams& copy_params,
4603  std::string& layer_name,
4605  import_export::QueryExporter::ArrayNullHandling& array_null_handling) {
4606  // defaults for non-CopyParams values
4608  layer_name.clear();
4610 
4611  if (!options.empty()) {
4612  for (auto& p : options) {
4613  if (boost::iequals(*p->get_name(), "delimiter")) {
4614  const StringLiteral* str_literal =
4615  dynamic_cast<const StringLiteral*>(p->get_value());
4616  if (str_literal == nullptr) {
4617  throw std::runtime_error("Delimiter option must be a string.");
4618  } else if (str_literal->get_stringval()->length() != 1) {
4619  throw std::runtime_error("Delimiter must be a single character string.");
4620  }
4621  copy_params.delimiter = (*str_literal->get_stringval())[0];
4622  } else if (boost::iequals(*p->get_name(), "nulls")) {
4623  const StringLiteral* str_literal =
4624  dynamic_cast<const StringLiteral*>(p->get_value());
4625  if (str_literal == nullptr) {
4626  throw std::runtime_error("Nulls option must be a string.");
4627  }
4628  copy_params.null_str = *str_literal->get_stringval();
4629  } else if (boost::iequals(*p->get_name(), "header")) {
4630  const StringLiteral* str_literal =
4631  dynamic_cast<const StringLiteral*>(p->get_value());
4632  if (str_literal == nullptr) {
4633  throw std::runtime_error("Header option must be a boolean.");
4634  }
4635  copy_params.has_header = bool_from_string_literal(str_literal)
4638  } else if (boost::iequals(*p->get_name(), "quote")) {
4639  const StringLiteral* str_literal =
4640  dynamic_cast<const StringLiteral*>(p->get_value());
4641  if (str_literal == nullptr) {
4642  throw std::runtime_error("Quote option must be a string.");
4643  } else if (str_literal->get_stringval()->length() != 1) {
4644  throw std::runtime_error("Quote must be a single character string.");
4645  }
4646  copy_params.quote = (*str_literal->get_stringval())[0];
4647  } else if (boost::iequals(*p->get_name(), "escape")) {
4648  const StringLiteral* str_literal =
4649  dynamic_cast<const StringLiteral*>(p->get_value());
4650  if (str_literal == nullptr) {
4651  throw std::runtime_error("Escape option must be a string.");
4652  } else if (str_literal->get_stringval()->length() != 1) {
4653  throw std::runtime_error("Escape must be a single character string.");
4654  }
4655  copy_params.escape = (*str_literal->get_stringval())[0];
4656  } else if (boost::iequals(*p->get_name(), "line_delimiter")) {
4657  const StringLiteral* str_literal =
4658  dynamic_cast<const StringLiteral*>(p->get_value());
4659  if (str_literal == nullptr) {
4660  throw std::runtime_error("Line_delimiter option must be a string.");
4661  } else if (str_literal->get_stringval()->length() != 1) {
4662  throw std::runtime_error("Line_delimiter must be a single character string.");
4663  }
4664  copy_params.line_delim = (*str_literal->get_stringval())[0];
4665  } else if (boost::iequals(*p->get_name(), "quoted")) {
4666  const StringLiteral* str_literal =
4667  dynamic_cast<const StringLiteral*>(p->get_value());
4668  if (str_literal == nullptr) {
4669  throw std::runtime_error("Quoted option must be a boolean.");
4670  }
4671  copy_params.quoted = bool_from_string_literal(str_literal);
4672  } else if (boost::iequals(*p->get_name(), "file_type")) {
4673  const StringLiteral* str_literal =
4674  dynamic_cast<const StringLiteral*>(p->get_value());
4675  if (str_literal == nullptr) {
4676  throw std::runtime_error("File Type option must be a string.");
4677  }
4678  auto file_type_str =
4679  boost::algorithm::to_lower_copy(*str_literal->get_stringval());
4680  if (file_type_str == "csv") {
4682  } else if (file_type_str == "geojson") {
4684  } else if (file_type_str == "geojsonl") {
4686  } else if (file_type_str == "shapefile") {
4688  } else {
4689  throw std::runtime_error(
4690  "File Type option must be 'CSV', 'GeoJSON', 'GeoJSONL' or 'Shapefile'");
4691  }
4692  } else if (boost::iequals(*p->get_name(), "layer_name")) {
4693  const StringLiteral* str_literal =
4694  dynamic_cast<const StringLiteral*>(p->get_value());
4695  if (str_literal == nullptr) {
4696  throw std::runtime_error("Layer Name option must be a string.");
4697  }
4698  layer_name = *str_literal->get_stringval();
4699  } else if (boost::iequals(*p->get_name(), "file_compression")) {
4700  const StringLiteral* str_literal =
4701  dynamic_cast<const StringLiteral*>(p->get_value());
4702  if (str_literal == nullptr) {
4703  throw std::runtime_error("File Compression option must be a string.");
4704  }
4705  auto file_compression_str =
4706  boost::algorithm::to_lower_copy(*str_literal->get_stringval());
4707  if (file_compression_str == "none") {
4709  } else if (file_compression_str == "gzip") {
4711  } else if (file_compression_str == "zip") {
4713  } else {
4714  throw std::runtime_error(
4715  "File Compression option must be 'None', 'GZip', or 'Zip'");
4716  }
4717  } else if (boost::iequals(*p->get_name(), "array_null_handling")) {
4718  const StringLiteral* str_literal =
4719  dynamic_cast<const StringLiteral*>(p->get_value());
4720  if (str_literal == nullptr) {
4721  throw std::runtime_error("Array Null Handling option must be a string.");
4722  }
4723  auto array_null_handling_str =
4724  boost::algorithm::to_lower_copy(*str_literal->get_stringval());
4725  if (array_null_handling_str == "abort") {
4726  array_null_handling =
4728  } else if (array_null_handling_str == "raw") {
4729  array_null_handling =
4731  } else if (array_null_handling_str == "zero") {
4732  array_null_handling =
4734  } else if (array_null_handling_str == "nullfield") {
4735  array_null_handling =
4737  } else {
4738  throw std::runtime_error(
4739  "Array Null Handling option must be 'Abort', 'Raw', 'Zero', or "
4740  "'NullField'");
4741  }
4742  } else {
4743  throw std::runtime_error("Invalid option for COPY: " + *p->get_name());
4744  }
4745  }
4746  }
4747 }
4748 
4749 CreateViewStmt::CreateViewStmt(const rapidjson::Value& payload) {
4750  CHECK(payload.HasMember("name"));
4751  view_name_ = json_str(payload["name"]);
4752 
4753  if_not_exists_ = false;
4754  if (payload.HasMember("ifNotExists")) {
4755  if_not_exists_ = json_bool(payload["ifNotExists"]);
4756  }
4757 
4758  CHECK(payload.HasMember("query"));
4759  select_query_ = json_str(payload["query"]);
4760  std::regex newline_re("\\n");
4761  select_query_ = std::regex_replace(select_query_, newline_re, " ");
4762  // ensure a trailing semicolon is present on the select query
4763  if (select_query_.back() != ';') {
4764  select_query_.push_back(';');
4765  }
4766 }
4767 
4768 void CreateViewStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4769  auto session_copy = session;
4770  auto session_ptr = std::shared_ptr<Catalog_Namespace::SessionInfo>(
4771  &session_copy, boost::null_deleter());
4772  auto query_state = query_state::QueryState::create(session_ptr, select_query_);
4773  auto stdlog = STDLOG(query_state);
4774  auto& catalog = session.getCatalog();
4775 
4776  if (!catalog.validateNonExistentTableOrView(view_name_, if_not_exists_)) {
4777  return;
4778  }
4781  throw std::runtime_error("View " + view_name_ +
4782  " will not be created. User has no create view privileges.");
4783  }
4784 
4785  const auto query_after_shim = pg_shim(select_query_);
4786 
4787  // this now also ensures that access permissions are checked
4788  catalog.getCalciteMgr()->process(query_state->createQueryStateProxy(),
4789  query_after_shim,
4790  {},
4791  true,
4792  false,
4793  false,
4794  true);
4795 
4796  // Take write lock after the query is processed to ensure no deadlocks
4797  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
4800 
4801  TableDescriptor td;
4802  td.tableName = view_name_;
4803  td.userId = session.get_currentUser().userId;
4804  td.nColumns = 0;
4805  td.isView = true;
4806  td.viewSQL = query_after_shim;
4807  td.fragmenter = nullptr;
4809  td.maxFragRows =
4810  DEFAULT_FRAGMENT_ROWS; // @todo this stuff should not be InsertOrderFragmenter
4811  td.maxChunkSize =
4812  DEFAULT_MAX_CHUNK_SIZE; // @todo this stuff should not be InsertOrderFragmenter
4813  td.fragPageSize = DEFAULT_PAGE_SIZE;
4814  td.maxRows = DEFAULT_MAX_ROWS;
4815  catalog.createTable(td, {}, {}, true);
4816 
4817  // TODO (max): It's transactionally unsafe, should be fixed: we may create object w/o
4818  // privileges
4819  SysCatalog::instance().createDBObject(
4820  session.get_currentUser(), view_name_, ViewDBObjectType, catalog);
4821 }
4822 
4823 void DropViewStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4824  auto& catalog = session.getCatalog();
4825 
4826  const TableDescriptor* td{nullptr};
4827  std::unique_ptr<lockmgr::TableSchemaLockContainer<lockmgr::WriteLock>> td_with_lock;
4828 
4829  try {
4830  td_with_lock =
4831  std::make_unique<lockmgr::TableSchemaLockContainer<lockmgr::WriteLock>>(
4833  catalog, *view_name, false));
4834  td = (*td_with_lock)();
4835  } catch (const std::runtime_error& e) {
4836  if (if_exists) {
4837  return;
4838  } else {
4839  throw e;
4840  }
4841  }
4842 
4843  CHECK(td);
4844  CHECK(td_with_lock);
4845 
4846  if (!session.checkDBAccessPrivileges(
4848  throw std::runtime_error("View " + *view_name +
4849  " will not be dropped. User has no drop view privileges.");
4850  }
4851 
4853  catalog.dropTable(td);
4854 }
4855 
4856 static void checkStringLiteral(const std::string& option_name,
4857  const std::unique_ptr<NameValueAssign>& p) {
4858  CHECK(p);
4859  if (!dynamic_cast<const StringLiteral*>(p->get_value())) {
4860  throw std::runtime_error(option_name + " option must be a string literal.");
4861  }
4862 }
4863 
4864 void CreateDBStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4865  if (!session.get_currentUser().isSuper) {
4866  throw std::runtime_error(
4867  "CREATE DATABASE command can only be executed by super user.");
4868  }
4869 
4870  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
4873 
4875  if (SysCatalog::instance().getMetadataForDB(*db_name, db_meta) && if_not_exists_) {
4876  return;
4877  }
4878  int ownerId = session.get_currentUser().userId;
4879  if (!name_value_list.empty()) {
4880  for (auto& p : name_value_list) {
4881  if (boost::iequals(*p->get_name(), "owner")) {
4882  checkStringLiteral("Owner name", p);
4883  const std::string* str =
4884  static_cast<const StringLiteral*>(p->get_value())->get_stringval();
4886  if (!SysCatalog::instance().getMetadataForUser(*str, user)) {
4887  throw std::runtime_error("User " + *str + " does not exist.");
4888  }
4889  ownerId = user.userId;
4890  } else {
4891  throw std::runtime_error("Invalid CREATE DATABASE option " + *p->get_name() +
4892  ". Only OWNER supported.");
4893  }
4894  }
4895  }
4896  SysCatalog::instance().createDatabase(*db_name, ownerId);
4897 }
4898 
4899 void DropDBStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4900  if (!session.get_currentUser().isSuper) {
4901  throw std::runtime_error("DROP DATABASE command can only be executed by super user.");
4902  }
4903 
4904  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
4907 
4909  if (!SysCatalog::instance().getMetadataForDB(*db_name, db)) {
4910  if (if_exists_) {
4911  return;
4912  }
4913  throw std::runtime_error("Database " + *db_name + " does not exist.");
4914  }
4915 
4916  if (!session.get_currentUser().isSuper &&
4917  session.get_currentUser().userId != db.dbOwner) {
4918  throw std::runtime_error("Only the super user or the owner can drop database.");
4919  }
4920 
4921  SysCatalog::instance().dropDatabase(db);
4922 }
4923 
4924 static bool readBooleanLiteral(const std::string& option_name,
4925  const std::unique_ptr<NameValueAssign>& p) {
4926  CHECK(p);
4927  const std::string* str =
4928  static_cast<const StringLiteral*>(p->get_value())->get_stringval();
4929  if (boost::iequals(*str, "true")) {
4930  return true;
4931  } else if (boost::iequals(*str, "false")) {
4932  return false;
4933  } else {
4934  throw std::runtime_error("Value to " + option_name + " must be TRUE or FALSE.");
4935  }
4936 }
4937 
4938 void CreateUserStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4939  std::string passwd;
4940  bool is_super = false;
4941  std::string default_db;
4942  bool can_login = true;
4943  for (auto& p : name_value_list) {
4944  if (boost::iequals(*p->get_name(), "password")) {
4945  checkStringLiteral("Password", p);
4946  passwd = *static_cast<const StringLiteral*>(p->get_value())->get_stringval();
4947  } else if (boost::iequals(*p->get_name(), "is_super")) {
4948  checkStringLiteral("IS_SUPER", p);
4949  is_super = readBooleanLiteral("IS_SUPER", p);
4950  } else if (boost::iequals(*p->get_name(), "default_db")) {
4951  checkStringLiteral("DEFAULT_DB", p);
4952  default_db = *static_cast<const StringLiteral*>(p->get_value())->get_stringval();
4953  } else if (boost::iequals(*p->get_name(), "can_login")) {
4954  checkStringLiteral("CAN_LOGIN", p);
4955  can_login = readBooleanLiteral("can_login", p);
4956  } else {
4957  throw std::runtime_error("Invalid CREATE USER option " + *p->get_name() +
4958  ". Should be PASSWORD, IS_SUPER, CAN_LOGIN"
4959  " or DEFAULT_DB.");
4960  }
4961  }
4962  if (!session.get_currentUser().isSuper) {
4963  throw std::runtime_error("Only super user can create new users.");
4964  }
4965  SysCatalog::instance().createUser(*user_name, passwd, is_super, default_db, can_login);
4966 }
4967 
4968 void AlterUserStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4969  // Parse the statement
4970  const std::string* passwd = nullptr;
4971  bool is_super = false;
4972  bool* is_superp = nullptr;
4973  const std::string* default_db = nullptr;
4974  bool can_login = true;
4975  bool* can_loginp = nullptr;
4976  for (auto& p : name_value_list) {
4977  if (boost::iequals(*p->get_name(), "password")) {
4978  checkStringLiteral("Password", p);
4979  passwd = static_cast<const StringLiteral*>(p->get_value())->get_stringval();
4980  } else if (boost::iequals(*p->get_name(), "is_super")) {
4981  checkStringLiteral("IS_SUPER", p);
4982  is_super = readBooleanLiteral("IS_SUPER", p);
4983  is_superp = &is_super;
4984  } else if (boost::iequals(*p->get_name(), "default_db")) {
4985  if (dynamic_cast<const StringLiteral*>(p->get_value())) {
4986  default_db = static_cast<const StringLiteral*>(p->get_value())->get_stringval();
4987  } else if (dynamic_cast<const NullLiteral*>(p->get_value())) {
4988  static std::string blank;
4989  default_db = &blank;
4990  } else {
4991  throw std::runtime_error(
4992  "DEFAULT_DB option must be either a string literal or a NULL literal.");
4993  }
4994  } else if (boost::iequals(*p->get_name(), "can_login")) {
4995  checkStringLiteral("CAN_LOGIN", p);
4996  can_login = readBooleanLiteral("CAN_LOGIN", p);
4997  can_loginp = &can_login;
4998  } else {
4999  throw std::runtime_error("Invalid ALTER USER option " + *p->get_name() +
5000  ". Should be PASSWORD, DEFAULT_DB, CAN_LOGIN"
5001  " or IS_SUPER.");
5002  }
5003  }
5004 
5005  // Check if the user is authorized to execute ALTER USER statement
5007  if (!SysCatalog::instance().getMetadataForUser(*user_name, user)) {
5008  throw std::runtime_error("User " + *user_name + " does not exist.");
5009  }
5010  if (!session.get_currentUser().isSuper) {
5011  if (session.get_currentUser().userId != user.userId) {
5012  throw std::runtime_error("Only super user can change another user's attributes.");
5013  } else if (is_superp || can_loginp) {
5014  throw std::runtime_error(
5015  "A user can only update their own password or default database.");
5016  }
5017  }
5018 
5019  if (passwd || is_superp || default_db || can_loginp) {
5020  SysCatalog::instance().alterUser(
5021  user.userId, passwd, is_superp, default_db, can_loginp);
5022  }
5023 }
5024 
5025 void DropUserStmt::execute(const Catalog_Namespace::SessionInfo& session) {
5026  if (!session.get_currentUser().isSuper) {
5027  throw std::runtime_error("Only super user can drop users.");
5028  }
5029  SysCatalog::instance().dropUser(*user_name);
5030 }
5031 
5032 void DumpTableStmt::execute(const Catalog_Namespace::SessionInfo& session) {
5033  // check access privileges
5034  if (!session.checkDBAccessPrivileges(
5036  throw std::runtime_error("Table " + *table +
5037  " will not be dumped. User has no select privileges.");
5038  }
5041  throw std::runtime_error("Table " + *table +
5042  " will not be dumped. User has no create privileges.");
5043  }
5044  auto& catalog = session.getCatalog();
5045  const TableDescriptor* td = catalog.getMetadataForTable(*table);
5046  TableArchiver table_archiver(&catalog);
5047  table_archiver.dumpTable(td, *path, compression);
5048 }
5049 
5050 void RestoreTableStmt::execute(const Catalog_Namespace::SessionInfo& session) {
5051  auto& catalog = session.getCatalog();
5052  const TableDescriptor* td = catalog.getMetadataForTable(*table, false);
5053  if (td) {
5054  // TODO: v1.0 simply throws to avoid accidentally overwrite target table.
5055  // Will add a REPLACE TABLE to explictly replace target table.
5056  // catalog.restoreTable(session, td, *path, compression);
5057  throw std::runtime_error("Table " + *table + " exists.");
5058  } else {
5059  // check access privileges
5062  throw std::runtime_error("Table " + *table +
5063  " will not be restored. User has no create privileges.");
5064  }
5065  TableArchiver table_archiver(&catalog);
5066  table_archiver.restoreTable(session, *table, *path, compression);
5067  }
5068 }
5069 
5071  const std::string& ddl_statement,
5072  std::shared_ptr<Catalog_Namespace::SessionInfo const> session_ptr) {
5073  CHECK(!ddl_statement.empty());
5074  VLOG(2) << "Parsing JSON DDL from Calcite: " << ddl_statement;
5075  rapidjson::Document ddl_query;
5076  ddl_query.Parse(ddl_statement);
5077  CHECK(ddl_query.IsObject());
5078  CHECK(ddl_query.HasMember("payload"));
5079  CHECK(ddl_query["payload"].IsObject());
5080  const auto& payload = ddl_query["payload"].GetObject();
5081  CHECK(payload.HasMember("command"));
5082  CHECK(payload["command"].IsString());
5083 
5084  const auto& ddl_command = std::string_view(payload["command"].GetString());
5085  if (ddl_command == "CREATE_TABLE") {
5086  auto create_table_stmt = Parser::CreateTableStmt(payload);
5087  create_table_stmt.execute(*session_ptr);
5088  } else if (ddl_command == "CREATE_VIEW") {
5089  auto create_view_stmt = Parser::CreateViewStmt(payload);
5090  create_view_stmt.execute(*session_ptr);
5091  } else {
5092  throw std::runtime_error("Unsupported DDL command");
5093  }
5094 }
5095 
5096 } // namespace Parser
int8_t tinyintval
Definition: sqltypes.h:206
SQLTypes to_sql_type(const std::string &type_name)
const ColumnConstraintDef * get_column_constraint() const
Definition: ParserNode.h:800
decltype(auto) get_max_rows_def(TableDescriptor &td, const NameValueAssign *p, const std::list< ColumnDescriptor > &columns)
void execute_calcite_ddl(const std::string &ddl_statement, std::shared_ptr< Catalog_Namespace::SessionInfo const > session_ptr)
void validate_non_foreign_table_write(const TableDescriptor *table_descriptor)
Definition: FsiUtils.h:22
static const AccessPrivileges VIEW_SQL_EDITOR
Definition: DBObject.h:153
decltype(auto) get_max_chunk_size_def(TableDescriptor &td, const NameValueAssign *p, const std::list< ColumnDescriptor > &columns)
static const std::map< const std::string, const TableDefFuncPtr > tableDefFuncMap
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:331
void set_compression(EncodingType c)
Definition: sqltypes.h:430
SQLAgg
Definition: sqldefs.h:71
#define CHECK_EQ(x, y)
Definition: Logger.h:205
std::string s3_secret_key
Definition: CopyParams.h:63
size_t shard_column_index(const std::string &name, const std::list< ColumnDescriptor > &columns)
const std::string & get_column() const
Definition: ParserNode.h:921
std::string partitions
std::vector< int > ChunkKey
Definition: types.h:37
static const AccessPrivileges VIEW_DASHBOARD
Definition: DBObject.h:172
#define NULL_DOUBLE
Definition: sqltypes.h:257
static const int32_t DROP_VIEW
Definition: DBObject.h:115
HOST DEVICE int get_size() const
Definition: sqltypes.h:340
SQLType * get_column_type() const
Definition: ParserNode.h:798
static const AccessPrivileges DROP_SERVER
Definition: DBObject.h:190
std::string cat(Ts &&...args)
static const int32_t SELECT_FROM_VIEW
Definition: DBObject.h:116
Definition: Analyzer.h:1567
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:97
SQLTypes
Definition: sqltypes.h:40
static void checkStringLiteral(const std::string &option_name, const std::unique_ptr< NameValueAssign > &p)
static const int32_t UPDATE_IN_VIEW
Definition: DBObject.h:118
std::string tableName
const std::vector< TargetMetaInfo > targets_meta
void add_rte(RangeTableEntry *rte)
Definition: Analyzer.cpp:1347
bool is_timestamp() const
Definition: sqltypes.h:733
static const AccessPrivileges ALL_DATABASE
Definition: DBObject.h:152
static TimeT::rep execution(F func, Args &&...args)
Definition: sample.cpp:29
decltype(auto) get_vacuum_def(TableDescriptor &td, const NameValueAssign *p, const std::list< ColumnDescriptor > &columns)
static const AccessPrivileges ALTER_TABLE
Definition: DBObject.h:166
DBObjectType
Definition: DBObject.h:42
static const int32_t CREATE_VIEW
Definition: DBObject.h:114
std::vector< uint8_t > compress_coords(std::vector< double > &coords, const SQLTypeInfo &ti)
Definition: Compression.cpp:52
void expand_star_in_targetlist(const Catalog_Namespace::Catalog &catalog, std::vector< std::shared_ptr< TargetEntry >> &tlist, int rte_idx)
static const AccessPrivileges TRUNCATE_TABLE
Definition: DBObject.h:165
#define NULL_ARRAY_DOUBLE
Definition: sqltypes.h:265
SQLQualifier
Definition: sqldefs.h:69
#define LOG(tag)
Definition: Logger.h:188
static WriteLock getWriteLockForTable(const Catalog_Namespace::Catalog &cat, const std::string &table_name)
Definition: LockMgrImpl.h:155
const bool json_bool(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:49
decltype(auto) get_skip_rows_def(DataframeTableDescriptor &df_td, const NameValueAssign *p, const std::list< ColumnDescriptor > &columns)
HOST DEVICE int get_scale() const
Definition: sqltypes.h:335
SQLOps
Definition: sqldefs.h:29
void checkpointWithAutoRollback(const int logical_table_id)
Definition: Catalog.cpp:3767
Definition: sqldefs.h:35
std::string storageType
static const AccessPrivileges INSERT_INTO_TABLE
Definition: DBObject.h:162
const std::string json_str(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:44
std::shared_ptr< Analyzer::Expr > deep_copy() const override
Definition: Analyzer.cpp:78
Definition: sqldefs.h:36
static const AccessPrivileges CREATE_DASHBOARD
Definition: DBObject.h:171
static std::shared_ptr< QueryState > create(ARGS &&...args)
Definition: QueryState.h:140
bool get_is_null() const
Definition: Analyzer.h:334
#define DEFAULT_MAX_CHUNK_SIZE
std::vector< std::unique_ptr< TypedImportBuffer > > setup_column_loaders(const TableDescriptor *td, Loader *loader)
Definition: Importer.cpp:5152
HOST DEVICE void set_subtype(SQLTypes st)
Definition: sqltypes.h:421
SQLTypeInfo get_logical_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:899
TypeR::rep timer_stop(Type clock_begin)
Definition: measure.h:48
Definition: sqldefs.h:49
void set_order_by(std::list< OrderEntry > *o)
Definition: Analyzer.h:1634
std::string extractObjectNameFromHierName(const std::string &objectHierName, const std::string &objectType, const Catalog_Namespace::Catalog &cat)
static const int32_t ALTER_TABLE
Definition: DBObject.h:95
void set_result_col_list(const std::list< int > &col_list)
Definition: Analyzer.h:1628
static std::shared_ptr< Executor > getExecutor(const ExecutorId id, const std::string &debug_dir="", const std::string &debug_file="", const SystemParameters system_parameters=SystemParameters())
Definition: Execute.cpp:150
int get_num_aggs() const
Definition: Analyzer.h:1609
void setPrivileges(const AccessPrivileges &privs)
Definition: DBObject.h:217
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:330
static void invalidateCaches()
void set_offset(int64_t o)
Definition: Analyzer.h:1646
std::vector< std::string > splitObjectHierName(const std::string &hierName)
static const AccessPrivileges SELECT_FROM_TABLE
Definition: DBObject.h:161
static std::unique_ptr< QueryExporter > create(const FileType file_type)
bool is_number() const
Definition: sqltypes.h:492
const ColumnDescriptor * getShardColumnMetadataForTable(const TableDescriptor *td) const
Definition: Catalog.cpp:3637
bool bool_from_string_literal(const Parser::StringLiteral *str_literal)
Definition: ParserNode.cpp:913
void get_table_definitions_for_ctas(TableDescriptor &td, const std::unique_ptr< NameValueAssign > &p, const std::list< ColumnDescriptor > &columns)
int32_t intval
Definition: sqltypes.h:208
bool is_time() const
Definition: sqltypes.h:493
int32_t get_table_id() const
void restoreTable(const Catalog_Namespace::SessionInfo &session, const TableDescriptor *td, const std::string &archive_path, const std::string &compression)
std::string to_string(char const *&&v)
std::vector< std::string > split(std::string_view str, std::string_view delim, std::optional< size_t > maxsplit)
split apart a string into a vector of substrings
int get_rte_idx(const std::string &range_var_name) const
Definition: Analyzer.cpp:1336
static const AccessPrivileges ALL_VIEW
Definition: DBObject.h:178
void validate_non_duplicate_column(const std::string &column_name, std::unordered_set< std::string > &upper_column_names)
Definition: DdlUtils.cpp:521
std::shared_ptr< StringDictionary > stringDict
int tableId
identifies the database into which the data is being inserted
Definition: Fragmenter.h:61
DBObjectType DBObjectTypeFromString(const std::string &type)
Definition: DBObject.cpp:99
Definition: sqldefs.h:73
static void set_geo_physical_import_buffer(const Catalog_Namespace::Catalog &catalog, const ColumnDescriptor *cd, std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers, size_t &col_idx, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, int render_group, const int64_t replicate_count=0)
Definition: Importer.cpp:1420
static const AccessPrivileges ALTER_SERVER
Definition: DBObject.h:191
size_t numRows
a vector of column ids for the row(s) being inserted
Definition: Fragmenter.h:63
This file contains the class specification and related data structures for Catalog.
float floatval
Definition: sqltypes.h:210
ImportHeaderRow has_header
Definition: CopyParams.h:48
boost::function< void(DataframeTableDescriptor &, const NameValueAssign *, const std::list< ColumnDescriptor > &columns)> DataframeDefFuncPtr
Definition: ParserNode.cpp:89
static SQLTypeInfo common_string_type(const SQLTypeInfo &type1, const SQLTypeInfo &type2)
Definition: Analyzer.cpp:396
#define DEFAULT_MAX_ROWS
const QueryHint getParsedQueryHints() const
int get_physical_cols() const
Definition: sqltypes.h:351
const std::string * get_stringval() const
Definition: ParserNode.h:136
Classes representing a parse tree.
void set_fixed_size()
Definition: sqltypes.h:429
int get_logical_size() const
Definition: sqltypes.h:341
RangeTableEntry * get_rte(int rte_idx) const
Definition: Analyzer.h:1640
void get_dataframe_definitions(DataframeTableDescriptor &df_td, const std::unique_ptr< NameValueAssign > &p, const std::list< ColumnDescriptor > &columns)
QueryState & getQueryState()
Definition: QueryState.h:172
static const int32_t DROP_DATABASE
Definition: DBObject.h:81
bool is_integer() const
Definition: sqltypes.h:489
static const AccessPrivileges DROP_TABLE