OmniSciDB  8a228a1076
ParserNode.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
25 #include "ParserNode.h"
26 
27 #include <boost/algorithm/string.hpp>
28 #include <boost/core/null_deleter.hpp>
29 #include <boost/filesystem.hpp>
30 #include <boost/function.hpp>
31 
32 #include <rapidjson/document.h>
33 #include <rapidjson/stringbuffer.h>
34 #include <rapidjson/writer.h>
35 
36 #include <cassert>
37 #include <cmath>
38 #include <limits>
39 #include <random>
40 #include <stdexcept>
41 #include <type_traits>
42 #include <typeinfo>
43 
45 #include "Catalog/Catalog.h"
51 #include "ImportExport/Importer.h"
52 #include "LockMgr/LockMgr.h"
54 #include "QueryEngine/Execute.h"
57 #include "ReservedKeywords.h"
58 #include "Shared/StringTransform.h"
59 #include "Shared/TimeGM.h"
60 #include "Shared/geo_compression.h"
61 #include "Shared/geo_types.h"
62 #include "Shared/mapd_glob.h"
63 #include "Shared/measure.h"
64 #include "Shared/shard_key.h"
66 #include "Utils/FsiUtils.h"
67 #include "gen-cpp/CalciteServer.h"
68 #include "parser.h"
69 
70 size_t g_leaf_count{0};
73 extern bool g_enable_fsi;
74 
76 using namespace std::string_literals;
77 
78 using TableDefFuncPtr = boost::function<void(TableDescriptor&,
79  const NameValueAssign*,
80  const std::list<ColumnDescriptor>& columns)>;
81 
82 using DataframeDefFuncPtr =
83  boost::function<void(DataframeTableDescriptor&,
84  const NameValueAssign*,
85  const std::list<ColumnDescriptor>& columns)>;
86 
87 namespace Parser {
88 std::shared_ptr<Analyzer::Expr> NullLiteral::analyze(
89  const Catalog_Namespace::Catalog& catalog,
90  Analyzer::Query& query,
91  TlistRefType allow_tlist_ref) const {
92  return makeExpr<Analyzer::Constant>(kNULLT, true);
93 }
94 
95 std::shared_ptr<Analyzer::Expr> StringLiteral::analyze(
96  const Catalog_Namespace::Catalog& catalog,
97  Analyzer::Query& query,
98  TlistRefType allow_tlist_ref) const {
99  return analyzeValue(*stringval);
100 }
101 
102 std::shared_ptr<Analyzer::Expr> StringLiteral::analyzeValue(
103  const std::string& stringval) {
104  SQLTypeInfo ti(kVARCHAR, stringval.length(), 0, true);
105  Datum d;
106  d.stringval = new std::string(stringval);
107  return makeExpr<Analyzer::Constant>(ti, false, d);
108 }
109 
110 std::shared_ptr<Analyzer::Expr> IntLiteral::analyze(
111  const Catalog_Namespace::Catalog& catalog,
112  Analyzer::Query& query,
113  TlistRefType allow_tlist_ref) const {
114  return analyzeValue(intval);
115 }
116 
117 std::shared_ptr<Analyzer::Expr> IntLiteral::analyzeValue(const int64_t intval) {
118  SQLTypes t;
119  Datum d;
120  if (intval >= INT16_MIN && intval <= INT16_MAX) {
121  t = kSMALLINT;
122  d.smallintval = (int16_t)intval;
123  } else if (intval >= INT32_MIN && intval <= INT32_MAX) {
124  t = kINT;
125  d.intval = (int32_t)intval;
126  } else {
127  t = kBIGINT;
128  d.bigintval = intval;
129  }
130  return makeExpr<Analyzer::Constant>(t, false, d);
131 }
132 
133 std::shared_ptr<Analyzer::Expr> FixedPtLiteral::analyze(
134  const Catalog_Namespace::Catalog& catalog,
135  Analyzer::Query& query,
136  TlistRefType allow_tlist_ref) const {
137  SQLTypeInfo ti(kNUMERIC, 0, 0, false);
138  Datum d = StringToDatum(*fixedptval, ti);
139  return makeExpr<Analyzer::Constant>(ti, false, d);
140 }
141 
142 std::shared_ptr<Analyzer::Expr> FixedPtLiteral::analyzeValue(const int64_t numericval,
143  const int scale,
144  const int precision) {
145  SQLTypeInfo ti(kNUMERIC, 0, 0, false);
146  ti.set_scale(scale);
147  ti.set_precision(precision);
148  Datum d;
149  d.bigintval = numericval;
150  return makeExpr<Analyzer::Constant>(ti, false, d);
151 }
152 
153 std::shared_ptr<Analyzer::Expr> FloatLiteral::analyze(
154  const Catalog_Namespace::Catalog& catalog,
155  Analyzer::Query& query,
156  TlistRefType allow_tlist_ref) const {
157  Datum d;
158  d.floatval = floatval;
159  return makeExpr<Analyzer::Constant>(kFLOAT, false, d);
160 }
161 
162 std::shared_ptr<Analyzer::Expr> DoubleLiteral::analyze(
163  const Catalog_Namespace::Catalog& catalog,
164  Analyzer::Query& query,
165  TlistRefType allow_tlist_ref) const {
166  Datum d;
167  d.doubleval = doubleval;
168  return makeExpr<Analyzer::Constant>(kDOUBLE, false, d);
169 }
170 
171 std::shared_ptr<Analyzer::Expr> TimestampLiteral::analyze(
172  const Catalog_Namespace::Catalog& catalog,
173  Analyzer::Query& query,
174  TlistRefType allow_tlist_ref) const {
175  return get(timestampval_);
176 }
177 
178 std::shared_ptr<Analyzer::Expr> TimestampLiteral::get(const int64_t timestampval) {
179  Datum d;
180  d.bigintval = timestampval;
181  return makeExpr<Analyzer::Constant>(kTIMESTAMP, false, d);
182 }
183 
184 std::shared_ptr<Analyzer::Expr> UserLiteral::analyze(
185  const Catalog_Namespace::Catalog& catalog,
186  Analyzer::Query& query,
187  TlistRefType allow_tlist_ref) const {
188  Datum d;
189  return makeExpr<Analyzer::Constant>(kTEXT, false, d);
190 }
191 
192 std::shared_ptr<Analyzer::Expr> UserLiteral::get(const std::string& user) {
193  Datum d;
194  d.stringval = new std::string(user);
195  return makeExpr<Analyzer::Constant>(kTEXT, false, d);
196 }
197 
198 std::shared_ptr<Analyzer::Expr> ArrayLiteral::analyze(
199  const Catalog_Namespace::Catalog& catalog,
200  Analyzer::Query& query,
201  TlistRefType allow_tlist_ref) const {
202  SQLTypeInfo ti = SQLTypeInfo(kARRAY, true);
203  bool set_subtype = true;
204  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
205  for (auto& p : value_list) {
206  auto e = p->analyze(catalog, query, allow_tlist_ref);
207  CHECK(e);
208  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(e);
209  if (c != nullptr && c->get_is_null()) {
210  value_exprs.push_back(c);
211  continue;
212  }
213  auto subtype = e->get_type_info().get_type();
214  if (subtype == kNULLT) {
215  // NULL element
216  } else if (set_subtype) {
217  ti.set_subtype(subtype);
218  set_subtype = false;
219  } else {
220  if (ti.get_subtype() != subtype) {
221  throw std::runtime_error("ARRAY element literals should be of the same type.");
222  }
223  }
224  value_exprs.push_back(e);
225  }
226  std::shared_ptr<Analyzer::Expr> result =
227  makeExpr<Analyzer::Constant>(ti, false, value_exprs);
228  return result;
229 }
230 
231 std::string ArrayLiteral::to_string() const {
232  std::string str = "{";
233  bool notfirst = false;
234  for (auto& p : value_list) {
235  if (notfirst) {
236  str += ", ";
237  } else {
238  notfirst = true;
239  }
240  str += p->to_string();
241  }
242  str += "}";
243  return str;
244 }
245 
246 std::shared_ptr<Analyzer::Expr> OperExpr::analyze(
247  const Catalog_Namespace::Catalog& catalog,
248  Analyzer::Query& query,
249  TlistRefType allow_tlist_ref) const {
250  auto left_expr = left->analyze(catalog, query, allow_tlist_ref);
251  const auto& left_type = left_expr->get_type_info();
252  if (right == nullptr) {
253  return makeExpr<Analyzer::UOper>(
254  left_type, left_expr->get_contains_agg(), optype, left_expr->decompress());
255  }
256  if (optype == kARRAY_AT) {
257  if (left_type.get_type() != kARRAY) {
258  throw std::runtime_error(left->to_string() + " is not of array type.");
259  }
260  auto right_expr = right->analyze(catalog, query, allow_tlist_ref);
261  const auto& right_type = right_expr->get_type_info();
262  if (!right_type.is_integer()) {
263  throw std::runtime_error(right->to_string() + " is not of integer type.");
264  }
265  return makeExpr<Analyzer::BinOper>(
266  left_type.get_elem_type(), false, kARRAY_AT, kONE, left_expr, right_expr);
267  }
268  auto right_expr = right->analyze(catalog, query, allow_tlist_ref);
269  return normalize(optype, opqualifier, left_expr, right_expr);
270 }
271 
272 std::shared_ptr<Analyzer::Expr> OperExpr::normalize(
273  const SQLOps optype,
274  const SQLQualifier qual,
275  std::shared_ptr<Analyzer::Expr> left_expr,
276  std::shared_ptr<Analyzer::Expr> right_expr) {
277  if (left_expr->get_type_info().is_date_in_days() ||
278  right_expr->get_type_info().is_date_in_days()) {
279  // Do not propogate encoding
280  left_expr = left_expr->decompress();
281  right_expr = right_expr->decompress();
282  }
283  const auto& left_type = left_expr->get_type_info();
284  auto right_type = right_expr->get_type_info();
285  if (qual != kONE) {
286  // subquery not supported yet.
287  CHECK(!std::dynamic_pointer_cast<Analyzer::Subquery>(right_expr));
288  if (right_type.get_type() != kARRAY) {
289  throw std::runtime_error(
290  "Existential or universal qualifiers can only be used in front of a subquery "
291  "or an "
292  "expression of array type.");
293  }
294  right_type = right_type.get_elem_type();
295  }
296  SQLTypeInfo new_left_type;
297  SQLTypeInfo new_right_type;
298  auto result_type = Analyzer::BinOper::analyze_type_info(
299  optype, left_type, right_type, &new_left_type, &new_right_type);
300  if (result_type.is_timeinterval()) {
301  return makeExpr<Analyzer::BinOper>(
302  result_type, false, optype, qual, left_expr, right_expr);
303  }
304  if (left_type != new_left_type) {
305  left_expr = left_expr->add_cast(new_left_type);
306  }
307  if (right_type != new_right_type) {
308  if (qual == kONE) {
309  right_expr = right_expr->add_cast(new_right_type);
310  } else {
311  right_expr = right_expr->add_cast(new_right_type.get_array_type());
312  }
313  }
314 
315  if (IS_COMPARISON(optype)) {
316  if (optype != kOVERLAPS && new_left_type.is_geometry() &&
317  new_right_type.is_geometry()) {
318  throw std::runtime_error(
319  "Comparison operators are not yet supported for geospatial types.");
320  }
321 
322  if (new_left_type.get_compression() == kENCODING_DICT &&
323  new_right_type.get_compression() == kENCODING_DICT &&
324  new_left_type.get_comp_param() == new_right_type.get_comp_param()) {
325  // do nothing
326  } else if (new_left_type.get_compression() == kENCODING_DICT &&
327  new_right_type.get_compression() == kENCODING_NONE) {
328  SQLTypeInfo ti(new_right_type);
329  ti.set_compression(new_left_type.get_compression());
330  ti.set_comp_param(new_left_type.get_comp_param());
331  ti.set_fixed_size();
332  right_expr = right_expr->add_cast(ti);
333  } else if (new_right_type.get_compression() == kENCODING_DICT &&
334  new_left_type.get_compression() == kENCODING_NONE) {
335  SQLTypeInfo ti(new_left_type);
336  ti.set_compression(new_right_type.get_compression());
337  ti.set_comp_param(new_right_type.get_comp_param());
338  ti.set_fixed_size();
339  left_expr = left_expr->add_cast(ti);
340  } else {
341  left_expr = left_expr->decompress();
342  right_expr = right_expr->decompress();
343  }
344  } else {
345  left_expr = left_expr->decompress();
346  right_expr = right_expr->decompress();
347  }
348  bool has_agg = (left_expr->get_contains_agg() || right_expr->get_contains_agg());
349  return makeExpr<Analyzer::BinOper>(
350  result_type, has_agg, optype, qual, left_expr, right_expr);
351 }
352 
353 std::shared_ptr<Analyzer::Expr> SubqueryExpr::analyze(
354  const Catalog_Namespace::Catalog& catalog,
355  Analyzer::Query& query,
356  TlistRefType allow_tlist_ref) const {
357  throw std::runtime_error("Subqueries are not supported yet.");
358  return nullptr;
359 }
360 
361 std::shared_ptr<Analyzer::Expr> IsNullExpr::analyze(
362  const Catalog_Namespace::Catalog& catalog,
363  Analyzer::Query& query,
364  TlistRefType allow_tlist_ref) const {
365  auto arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
366  auto result = makeExpr<Analyzer::UOper>(kBOOLEAN, kISNULL, arg_expr);
367  if (is_not) {
368  result = makeExpr<Analyzer::UOper>(kBOOLEAN, kNOT, result);
369  }
370  return result;
371 }
372 
373 std::shared_ptr<Analyzer::Expr> InSubquery::analyze(
374  const Catalog_Namespace::Catalog& catalog,
375  Analyzer::Query& query,
376  TlistRefType allow_tlist_ref) const {
377  throw std::runtime_error("Subqueries are not supported yet.");
378  return nullptr;
379 }
380 
381 std::shared_ptr<Analyzer::Expr> InValues::analyze(
382  const Catalog_Namespace::Catalog& catalog,
383  Analyzer::Query& query,
384  TlistRefType allow_tlist_ref) const {
385  auto arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
386  SQLTypeInfo ti = arg_expr->get_type_info();
387  bool dict_comp = ti.get_compression() == kENCODING_DICT;
388  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
389  for (auto& p : value_list) {
390  auto e = p->analyze(catalog, query, allow_tlist_ref);
391  if (ti != e->get_type_info()) {
392  if (ti.is_string() && e->get_type_info().is_string()) {
393  ti = Analyzer::BinOper::common_string_type(ti, e->get_type_info());
394  } else if (ti.is_number() && e->get_type_info().is_number()) {
395  ti = Analyzer::BinOper::common_numeric_type(ti, e->get_type_info());
396  } else {
397  throw std::runtime_error("IN expressions must contain compatible types.");
398  }
399  }
400  if (dict_comp) {
401  value_exprs.push_back(e->add_cast(arg_expr->get_type_info()));
402  } else {
403  value_exprs.push_back(e);
404  }
405  }
406  if (!dict_comp) {
407  arg_expr = arg_expr->decompress();
408  arg_expr = arg_expr->add_cast(ti);
409  std::list<std::shared_ptr<Analyzer::Expr>> cast_vals;
410  for (auto p : value_exprs) {
411  cast_vals.push_back(p->add_cast(ti));
412  }
413  value_exprs.swap(cast_vals);
414  }
415  std::shared_ptr<Analyzer::Expr> result =
416  makeExpr<Analyzer::InValues>(arg_expr, value_exprs);
417  if (is_not) {
418  result = makeExpr<Analyzer::UOper>(kBOOLEAN, kNOT, result);
419  }
420  return result;
421 }
422 
423 std::shared_ptr<Analyzer::Expr> BetweenExpr::analyze(
424  const Catalog_Namespace::Catalog& catalog,
425  Analyzer::Query& query,
426  TlistRefType allow_tlist_ref) const {
427  auto arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
428  auto lower_expr = lower->analyze(catalog, query, allow_tlist_ref);
429  auto upper_expr = upper->analyze(catalog, query, allow_tlist_ref);
430  SQLTypeInfo new_left_type, new_right_type;
432  arg_expr->get_type_info(),
433  lower_expr->get_type_info(),
434  &new_left_type,
435  &new_right_type);
436  auto lower_pred =
437  makeExpr<Analyzer::BinOper>(kBOOLEAN,
438  kGE,
439  kONE,
440  arg_expr->add_cast(new_left_type)->decompress(),
441  lower_expr->add_cast(new_right_type)->decompress());
443  arg_expr->get_type_info(),
444  lower_expr->get_type_info(),
445  &new_left_type,
446  &new_right_type);
447  auto upper_pred = makeExpr<Analyzer::BinOper>(
448  kBOOLEAN,
449  kLE,
450  kONE,
451  arg_expr->deep_copy()->add_cast(new_left_type)->decompress(),
452  upper_expr->add_cast(new_right_type)->decompress());
453  std::shared_ptr<Analyzer::Expr> result =
454  makeExpr<Analyzer::BinOper>(kBOOLEAN, kAND, kONE, lower_pred, upper_pred);
455  if (is_not) {
456  result = makeExpr<Analyzer::UOper>(kBOOLEAN, kNOT, result);
457  }
458  return result;
459 }
460 
461 std::shared_ptr<Analyzer::Expr> CharLengthExpr::analyze(
462  const Catalog_Namespace::Catalog& catalog,
463  Analyzer::Query& query,
464  TlistRefType allow_tlist_ref) const {
465  auto arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
466  if (!arg_expr->get_type_info().is_string()) {
467  throw std::runtime_error(
468  "expression in char_length clause must be of a string type.");
469  }
470  std::shared_ptr<Analyzer::Expr> result =
471  makeExpr<Analyzer::CharLengthExpr>(arg_expr->decompress(), calc_encoded_length);
472  return result;
473 }
474 
475 std::shared_ptr<Analyzer::Expr> CardinalityExpr::analyze(
476  const Catalog_Namespace::Catalog& catalog,
477  Analyzer::Query& query,
478  TlistRefType allow_tlist_ref) const {
479  auto arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
480  if (!arg_expr->get_type_info().is_array()) {
481  throw std::runtime_error(
482  "expression in cardinality clause must be of an array type.");
483  }
484  std::shared_ptr<Analyzer::Expr> result =
485  makeExpr<Analyzer::CardinalityExpr>(arg_expr->decompress());
486  return result;
487 }
488 
489 void LikeExpr::check_like_expr(const std::string& like_str, char escape_char) {
490  if (like_str.back() == escape_char) {
491  throw std::runtime_error("LIKE pattern must not end with escape character.");
492  }
493 }
494 
495 bool LikeExpr::test_is_simple_expr(const std::string& like_str, char escape_char) {
496  // if not bounded by '%' then not a simple string
497  if (like_str.size() < 2 || like_str[0] != '%' || like_str[like_str.size() - 1] != '%') {
498  return false;
499  }
500  // if the last '%' is escaped then not a simple string
501  if (like_str[like_str.size() - 2] == escape_char &&
502  like_str[like_str.size() - 3] != escape_char) {
503  return false;
504  }
505  for (size_t i = 1; i < like_str.size() - 1; i++) {
506  if (like_str[i] == '%' || like_str[i] == '_' || like_str[i] == '[' ||
507  like_str[i] == ']') {
508  if (like_str[i - 1] != escape_char) {
509  return false;
510  }
511  }
512  }
513  return true;
514 }
515 
516 void LikeExpr::erase_cntl_chars(std::string& like_str, char escape_char) {
517  char prev_char = '\0';
518  // easier to create new string of allowable chars
519  // rather than erase chars from
520  // existing string
521  std::string new_str;
522  for (char& cur_char : like_str) {
523  if (cur_char == '%' || cur_char == escape_char) {
524  if (prev_char != escape_char) {
525  prev_char = cur_char;
526  continue;
527  }
528  }
529  new_str.push_back(cur_char);
530  prev_char = cur_char;
531  }
532  like_str = new_str;
533 }
534 
535 std::shared_ptr<Analyzer::Expr> LikeExpr::analyze(
536  const Catalog_Namespace::Catalog& catalog,
537  Analyzer::Query& query,
538  TlistRefType allow_tlist_ref) const {
539  auto arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
540  auto like_expr = like_string->analyze(catalog, query, allow_tlist_ref);
541  auto escape_expr = escape_string == nullptr
542  ? nullptr
543  : escape_string->analyze(catalog, query, allow_tlist_ref);
544  return LikeExpr::get(arg_expr, like_expr, escape_expr, is_ilike, is_not);
545 }
546 
547 std::shared_ptr<Analyzer::Expr> LikeExpr::get(std::shared_ptr<Analyzer::Expr> arg_expr,
548  std::shared_ptr<Analyzer::Expr> like_expr,
549  std::shared_ptr<Analyzer::Expr> escape_expr,
550  const bool is_ilike,
551  const bool is_not) {
552  if (!arg_expr->get_type_info().is_string()) {
553  throw std::runtime_error("expression before LIKE must be of a string type.");
554  }
555  if (!like_expr->get_type_info().is_string()) {
556  throw std::runtime_error("expression after LIKE must be of a string type.");
557  }
558  char escape_char = '\\';
559  if (escape_expr != nullptr) {
560  if (!escape_expr->get_type_info().is_string()) {
561  throw std::runtime_error("expression after ESCAPE must be of a string type.");
562  }
563  if (!escape_expr->get_type_info().is_string()) {
564  throw std::runtime_error("expression after ESCAPE must be of a string type.");
565  }
566  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(escape_expr);
567  if (c != nullptr && c->get_constval().stringval->length() > 1) {
568  throw std::runtime_error("String after ESCAPE must have a single character.");
569  }
570  escape_char = (*c->get_constval().stringval)[0];
571  }
572  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(like_expr);
573  bool is_simple = false;
574  if (c != nullptr) {
575  std::string& pattern = *c->get_constval().stringval;
576  if (is_ilike) {
577  std::transform(pattern.begin(), pattern.end(), pattern.begin(), ::tolower);
578  }
579  check_like_expr(pattern, escape_char);
580  is_simple = test_is_simple_expr(pattern, escape_char);
581  if (is_simple) {
582  erase_cntl_chars(pattern, escape_char);
583  }
584  }
585  std::shared_ptr<Analyzer::Expr> result = makeExpr<Analyzer::LikeExpr>(
586  arg_expr->decompress(), like_expr, escape_expr, is_ilike, is_simple);
587  if (is_not) {
588  result = makeExpr<Analyzer::UOper>(kBOOLEAN, kNOT, result);
589  }
590  return result;
591 }
592 
593 void RegexpExpr::check_pattern_expr(const std::string& pattern_str, char escape_char) {
594  if (pattern_str.back() == escape_char) {
595  throw std::runtime_error("REGEXP pattern must not end with escape character.");
596  }
597 }
598 
599 bool RegexpExpr::translate_to_like_pattern(std::string& pattern_str, char escape_char) {
600  char prev_char = '\0';
601  char prev_prev_char = '\0';
602  std::string like_str;
603  for (char& cur_char : pattern_str) {
604  if (prev_char == escape_char || isalnum(cur_char) || cur_char == ' ' ||
605  cur_char == '.') {
606  like_str.push_back((cur_char == '.') ? '_' : cur_char);
607  prev_prev_char = prev_char;
608  prev_char = cur_char;
609  continue;
610  }
611  if (prev_char == '.' && prev_prev_char != escape_char) {
612  if (cur_char == '*' || cur_char == '+') {
613  if (cur_char == '*') {
614  like_str.pop_back();
615  }
616  // .* --> %
617  // .+ --> _%
618  like_str.push_back('%');
619  prev_prev_char = prev_char;
620  prev_char = cur_char;
621  continue;
622  }
623  }
624  return false;
625  }
626  pattern_str = like_str;
627  return true;
628 }
629 
630 std::shared_ptr<Analyzer::Expr> RegexpExpr::analyze(
631  const Catalog_Namespace::Catalog& catalog,
632  Analyzer::Query& query,
633  TlistRefType allow_tlist_ref) const {
634  auto arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
635  auto pattern_expr = pattern_string->analyze(catalog, query, allow_tlist_ref);
636  auto escape_expr = escape_string == nullptr
637  ? nullptr
638  : escape_string->analyze(catalog, query, allow_tlist_ref);
639  return RegexpExpr::get(arg_expr, pattern_expr, escape_expr, is_not);
640 }
641 
642 std::shared_ptr<Analyzer::Expr> RegexpExpr::get(
643  std::shared_ptr<Analyzer::Expr> arg_expr,
644  std::shared_ptr<Analyzer::Expr> pattern_expr,
645  std::shared_ptr<Analyzer::Expr> escape_expr,
646  const bool is_not) {
647  if (!arg_expr->get_type_info().is_string()) {
648  throw std::runtime_error("expression before REGEXP must be of a string type.");
649  }
650  if (!pattern_expr->get_type_info().is_string()) {
651  throw std::runtime_error("expression after REGEXP must be of a string type.");
652  }
653  char escape_char = '\\';
654  if (escape_expr != nullptr) {
655  if (!escape_expr->get_type_info().is_string()) {
656  throw std::runtime_error("expression after ESCAPE must be of a string type.");
657  }
658  if (!escape_expr->get_type_info().is_string()) {
659  throw std::runtime_error("expression after ESCAPE must be of a string type.");
660  }
661  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(escape_expr);
662  if (c != nullptr && c->get_constval().stringval->length() > 1) {
663  throw std::runtime_error("String after ESCAPE must have a single character.");
664  }
665  escape_char = (*c->get_constval().stringval)[0];
666  if (escape_char != '\\') {
667  throw std::runtime_error("Only supporting '\\' escape character.");
668  }
669  }
670  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(pattern_expr);
671  if (c != nullptr) {
672  std::string& pattern = *c->get_constval().stringval;
673  if (translate_to_like_pattern(pattern, escape_char)) {
674  return LikeExpr::get(arg_expr, pattern_expr, escape_expr, false, is_not);
675  }
676  }
677  std::shared_ptr<Analyzer::Expr> result =
678  makeExpr<Analyzer::RegexpExpr>(arg_expr->decompress(), pattern_expr, escape_expr);
679  if (is_not) {
680  result = makeExpr<Analyzer::UOper>(kBOOLEAN, kNOT, result);
681  }
682  return result;
683 }
684 
685 std::shared_ptr<Analyzer::Expr> LikelihoodExpr::analyze(
686  const Catalog_Namespace::Catalog& catalog,
687  Analyzer::Query& query,
688  TlistRefType allow_tlist_ref) const {
689  auto arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
690  return LikelihoodExpr::get(arg_expr, likelihood, is_not);
691 }
692 
693 std::shared_ptr<Analyzer::Expr> LikelihoodExpr::get(
694  std::shared_ptr<Analyzer::Expr> arg_expr,
695  float likelihood,
696  const bool is_not) {
697  if (!arg_expr->get_type_info().is_boolean()) {
698  throw std::runtime_error("likelihood expression expects boolean type.");
699  }
700  std::shared_ptr<Analyzer::Expr> result = makeExpr<Analyzer::LikelihoodExpr>(
701  arg_expr->decompress(), is_not ? 1 - likelihood : likelihood);
702  return result;
703 }
704 
705 std::shared_ptr<Analyzer::Expr> ExistsExpr::analyze(
706  const Catalog_Namespace::Catalog& catalog,
707  Analyzer::Query& query,
708  TlistRefType allow_tlist_ref) const {
709  throw std::runtime_error("Subqueries are not supported yet.");
710  return nullptr;
711 }
712 
713 std::shared_ptr<Analyzer::Expr> ColumnRef::analyze(
714  const Catalog_Namespace::Catalog& catalog,
715  Analyzer::Query& query,
716  TlistRefType allow_tlist_ref) const {
717  int table_id{0};
718  int rte_idx{0};
719  const ColumnDescriptor* cd{nullptr};
720  if (column == nullptr) {
721  throw std::runtime_error("invalid column name *.");
722  }
723  if (table != nullptr) {
724  rte_idx = query.get_rte_idx(*table);
725  if (rte_idx < 0) {
726  throw std::runtime_error("range variable or table name " + *table +
727  " does not exist.");
728  }
729  Analyzer::RangeTableEntry* rte = query.get_rte(rte_idx);
730  cd = rte->get_column_desc(catalog, *column);
731  if (cd == nullptr) {
732  throw std::runtime_error("Column name " + *column + " does not exist.");
733  }
734  table_id = rte->get_table_id();
735  } else {
736  bool found = false;
737  int i = 0;
738  for (auto rte : query.get_rangetable()) {
739  cd = rte->get_column_desc(catalog, *column);
740  if (cd != nullptr && !found) {
741  found = true;
742  rte_idx = i;
743  table_id = rte->get_table_id();
744  } else if (cd != nullptr && found) {
745  throw std::runtime_error("Column name " + *column + " is ambiguous.");
746  }
747  i++;
748  }
749  if (cd == nullptr && allow_tlist_ref != TlistRefType::TLIST_NONE) {
750  // check if this is a reference to a targetlist entry
751  bool found = false;
752  int varno = -1;
753  int i = 1;
754  std::shared_ptr<Analyzer::TargetEntry> tle;
755  for (auto p : query.get_targetlist()) {
756  if (*column == p->get_resname() && !found) {
757  found = true;
758  varno = i;
759  tle = p;
760  } else if (*column == p->get_resname() && found) {
761  throw std::runtime_error("Output alias " + *column + " is ambiguous.");
762  }
763  i++;
764  }
765  if (found) {
766  if (dynamic_cast<Analyzer::Var*>(tle->get_expr())) {
767  Analyzer::Var* v = static_cast<Analyzer::Var*>(tle->get_expr());
769  return v->deep_copy();
770  }
771  }
772  if (allow_tlist_ref == TlistRefType::TLIST_COPY) {
773  return tle->get_expr()->deep_copy();
774  } else {
775  return makeExpr<Analyzer::Var>(
776  tle->get_expr()->get_type_info(), Analyzer::Var::kOUTPUT, varno);
777  }
778  }
779  }
780  if (cd == nullptr) {
781  throw std::runtime_error("Column name " + *column + " does not exist.");
782  }
783  }
784  return makeExpr<Analyzer::ColumnVar>(cd->columnType, table_id, cd->columnId, rte_idx);
785 }
786 
787 std::shared_ptr<Analyzer::Expr> FunctionRef::analyze(
788  const Catalog_Namespace::Catalog& catalog,
789  Analyzer::Query& query,
790  TlistRefType allow_tlist_ref) const {
791  SQLTypeInfo result_type;
792  SQLAgg agg_type;
793  std::shared_ptr<Analyzer::Expr> arg_expr;
794  bool is_distinct = false;
795  if (boost::iequals(*name, "count")) {
796  result_type = SQLTypeInfo(kBIGINT, false);
797  agg_type = kCOUNT;
798  if (arg) {
799  arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
800  const SQLTypeInfo& ti = arg_expr->get_type_info();
801  if (ti.is_string() && (ti.get_compression() != kENCODING_DICT || !distinct)) {
802  throw std::runtime_error(
803  "Strings must be dictionary-encoded in COUNT(DISTINCT).");
804  }
805  if (ti.get_type() == kARRAY && !distinct) {
806  throw std::runtime_error("Only COUNT(DISTINCT) is supported on arrays.");
807  }
808  }
809  is_distinct = distinct;
810  } else {
811  if (!arg) {
812  throw std::runtime_error("Cannot compute " + *name + " with argument '*'.");
813  }
814  if (boost::iequals(*name, "min")) {
815  agg_type = kMIN;
816  arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
817  arg_expr = arg_expr->decompress();
818  result_type = arg_expr->get_type_info();
819  } else if (boost::iequals(*name, "max")) {
820  agg_type = kMAX;
821  arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
822  arg_expr = arg_expr->decompress();
823  result_type = arg_expr->get_type_info();
824  } else if (boost::iequals(*name, "avg")) {
825  agg_type = kAVG;
826  arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
827  if (!arg_expr->get_type_info().is_number()) {
828  throw std::runtime_error("Cannot compute AVG on non-number-type arguments.");
829  }
830  arg_expr = arg_expr->decompress();
831  result_type = SQLTypeInfo(kDOUBLE, false);
832  } else if (boost::iequals(*name, "sum")) {
833  agg_type = kSUM;
834  arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
835  if (!arg_expr->get_type_info().is_number()) {
836  throw std::runtime_error("Cannot compute SUM on non-number-type arguments.");
837  }
838  arg_expr = arg_expr->decompress();
839  result_type = arg_expr->get_type_info().is_integer() ? SQLTypeInfo(kBIGINT, false)
840  : arg_expr->get_type_info();
841  } else if (boost::iequals(*name, "unnest")) {
842  arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
843  const SQLTypeInfo& arg_ti = arg_expr->get_type_info();
844  if (arg_ti.get_type() != kARRAY) {
845  throw std::runtime_error(arg->to_string() + " is not of array type.");
846  }
847  return makeExpr<Analyzer::UOper>(arg_ti.get_elem_type(), false, kUNNEST, arg_expr);
848  } else {
849  throw std::runtime_error("invalid function name: " + *name);
850  }
851  if (arg_expr->get_type_info().is_string() ||
852  arg_expr->get_type_info().get_type() == kARRAY) {
853  throw std::runtime_error(
854  "Only COUNT(DISTINCT ) aggregate is supported on strings and arrays.");
855  }
856  }
857  int naggs = query.get_num_aggs();
858  query.set_num_aggs(naggs + 1);
859  return makeExpr<Analyzer::AggExpr>(
860  result_type, agg_type, arg_expr, is_distinct, nullptr);
861 }
862 
863 std::shared_ptr<Analyzer::Expr> CastExpr::analyze(
864  const Catalog_Namespace::Catalog& catalog,
865  Analyzer::Query& query,
866  TlistRefType allow_tlist_ref) const {
867  target_type->check_type();
868  auto arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
869  SQLTypeInfo ti(target_type->get_type(),
870  target_type->get_param1(),
871  target_type->get_param2(),
872  arg_expr->get_type_info().get_notnull());
873  if (arg_expr->get_type_info().get_type() != target_type->get_type() &&
874  arg_expr->get_type_info().get_compression() != kENCODING_NONE) {
875  arg_expr->decompress();
876  }
877  return arg_expr->add_cast(ti);
878 }
879 
880 std::shared_ptr<Analyzer::Expr> CaseExpr::analyze(
881  const Catalog_Namespace::Catalog& catalog,
882  Analyzer::Query& query,
883  TlistRefType allow_tlist_ref) const {
884  SQLTypeInfo ti;
885  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
886  expr_pair_list;
887  for (auto& p : when_then_list) {
888  auto e1 = p->get_expr1()->analyze(catalog, query, allow_tlist_ref);
889  if (e1->get_type_info().get_type() != kBOOLEAN) {
890  throw std::runtime_error("Only boolean expressions can be used after WHEN.");
891  }
892  auto e2 = p->get_expr2()->analyze(catalog, query, allow_tlist_ref);
893  expr_pair_list.emplace_back(e1, e2);
894  }
895  auto else_e = else_expr ? else_expr->analyze(catalog, query, allow_tlist_ref) : nullptr;
896  return normalize(expr_pair_list, else_e);
897 }
898 
899 namespace {
900 
901 bool expr_is_null(const Analyzer::Expr* expr) {
902  if (expr->get_type_info().get_type() == kNULLT) {
903  return true;
904  }
905  const auto const_expr = dynamic_cast<const Analyzer::Constant*>(expr);
906  return const_expr && const_expr->get_is_null();
907 }
908 
910  const std::string* s = str_literal->get_stringval();
911  if (*s == "t" || *s == "true" || *s == "T" || *s == "True") {
912  return true;
913  } else if (*s == "f" || *s == "false" || *s == "F" || *s == "False") {
914  return false;
915  } else {
916  throw std::runtime_error("Invalid string for boolean " + *s);
917  }
918 }
919 
920 } // namespace
921 
922 std::shared_ptr<Analyzer::Expr> CaseExpr::normalize(
923  const std::list<std::pair<std::shared_ptr<Analyzer::Expr>,
924  std::shared_ptr<Analyzer::Expr>>>& expr_pair_list,
925  const std::shared_ptr<Analyzer::Expr> else_e_in) {
926  SQLTypeInfo ti;
927  bool has_agg = false;
928  for (auto& p : expr_pair_list) {
929  auto e1 = p.first;
930  CHECK(e1->get_type_info().is_boolean());
931  auto e2 = p.second;
932  if (ti.get_type() == kNULLT) {
933  ti = e2->get_type_info();
934  } else if (e2->get_type_info().get_type() == kNULLT) {
935  ti.set_notnull(false);
936  e2->set_type_info(ti);
937  } else if (ti != e2->get_type_info()) {
938  if (ti.is_string() && e2->get_type_info().is_string()) {
939  ti = Analyzer::BinOper::common_string_type(ti, e2->get_type_info());
940  } else if (ti.is_number() && e2->get_type_info().is_number()) {
941  ti = Analyzer::BinOper::common_numeric_type(ti, e2->get_type_info());
942  } else if (ti.is_boolean() && e2->get_type_info().is_boolean()) {
943  ti = Analyzer::BinOper::common_numeric_type(ti, e2->get_type_info());
944  } else {
945  throw std::runtime_error(
946  "expressions in THEN clause must be of the same or compatible types.");
947  }
948  }
949  if (e2->get_contains_agg()) {
950  has_agg = true;
951  }
952  }
953  auto else_e = else_e_in;
954  if (else_e) {
955  if (else_e->get_contains_agg()) {
956  has_agg = true;
957  }
958  if (expr_is_null(else_e.get())) {
959  ti.set_notnull(false);
960  else_e->set_type_info(ti);
961  } else if (ti != else_e->get_type_info()) {
962  ti.set_notnull(false);
963  if (ti.is_string() && else_e->get_type_info().is_string()) {
964  ti = Analyzer::BinOper::common_string_type(ti, else_e->get_type_info());
965  } else if (ti.is_number() && else_e->get_type_info().is_number()) {
966  ti = Analyzer::BinOper::common_numeric_type(ti, else_e->get_type_info());
967  } else if (ti.is_boolean() && else_e->get_type_info().is_boolean()) {
968  ti = Analyzer::BinOper::common_numeric_type(ti, else_e->get_type_info());
969  } else if (get_logical_type_info(ti) !=
970  get_logical_type_info(else_e->get_type_info())) {
971  throw std::runtime_error(
972  // types differing by encoding will be resolved at decode
973 
974  "expressions in ELSE clause must be of the same or compatible types as those "
975  "in the THEN clauses.");
976  }
977  }
978  }
979  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
980  cast_expr_pair_list;
981  for (auto p : expr_pair_list) {
982  ti.set_notnull(false);
983  cast_expr_pair_list.emplace_back(p.first, p.second->add_cast(ti));
984  }
985  if (else_e != nullptr) {
986  else_e = else_e->add_cast(ti);
987  } else {
988  Datum d;
989  // always create an else expr so that executor doesn't need to worry about it
990  ti.set_notnull(false);
991  else_e = makeExpr<Analyzer::Constant>(ti, true, d);
992  }
993  if (ti.get_type() == kNULLT) {
994  throw std::runtime_error(
995  "Can't deduce the type for case expressions, all branches null");
996  }
997  return makeExpr<Analyzer::CaseExpr>(ti, has_agg, cast_expr_pair_list, else_e);
998 }
999 
1000 std::string CaseExpr::to_string() const {
1001  std::string str("CASE ");
1002  for (auto& p : when_then_list) {
1003  str += "WHEN " + p->get_expr1()->to_string() + " THEN " +
1004  p->get_expr2()->to_string() + " ";
1005  }
1006  if (else_expr != nullptr) {
1007  str += "ELSE " + else_expr->to_string();
1008  }
1009  str += " END";
1010  return str;
1011 }
1012 
1013 void UnionQuery::analyze(const Catalog_Namespace::Catalog& catalog,
1014  Analyzer::Query& query) const {
1015  left->analyze(catalog, query);
1016  Analyzer::Query* right_query = new Analyzer::Query();
1017  right->analyze(catalog, *right_query);
1018  query.set_next_query(right_query);
1019  query.set_is_unionall(is_unionall);
1020 }
1021 
1022 void QuerySpec::analyze_having_clause(const Catalog_Namespace::Catalog& catalog,
1023  Analyzer::Query& query) const {
1024  std::shared_ptr<Analyzer::Expr> p;
1025  if (having_clause != nullptr) {
1026  p = having_clause->analyze(catalog, query, Expr::TlistRefType::TLIST_COPY);
1027  if (p->get_type_info().get_type() != kBOOLEAN) {
1028  throw std::runtime_error("Only boolean expressions can be in HAVING clause.");
1029  }
1030  p->check_group_by(query.get_group_by());
1031  }
1032  query.set_having_predicate(p);
1033 }
1034 
1035 void QuerySpec::analyze_group_by(const Catalog_Namespace::Catalog& catalog,
1036  Analyzer::Query& query) const {
1037  std::list<std::shared_ptr<Analyzer::Expr>> groupby;
1038  if (!groupby_clause.empty()) {
1039  int gexpr_no = 1;
1040  std::shared_ptr<Analyzer::Expr> gexpr;
1041  const std::vector<std::shared_ptr<Analyzer::TargetEntry>>& tlist =
1042  query.get_targetlist();
1043  for (auto& c : groupby_clause) {
1044  // special-case ordinal numbers in GROUP BY
1045  if (dynamic_cast<Literal*>(c.get())) {
1046  IntLiteral* i = dynamic_cast<IntLiteral*>(c.get());
1047  if (!i) {
1048  throw std::runtime_error("Invalid literal in GROUP BY clause.");
1049  }
1050  int varno = (int)i->get_intval();
1051  if (varno <= 0 || varno > static_cast<int>(tlist.size())) {
1052  throw std::runtime_error("Invalid ordinal number in GROUP BY clause.");
1053  }
1054  if (tlist[varno - 1]->get_expr()->get_contains_agg()) {
1055  throw std::runtime_error(
1056  "Ordinal number in GROUP BY cannot reference an expression containing "
1057  "aggregate "
1058  "functions.");
1059  }
1060  gexpr = makeExpr<Analyzer::Var>(
1061  tlist[varno - 1]->get_expr()->get_type_info(), Analyzer::Var::kOUTPUT, varno);
1062  } else {
1063  gexpr = c->analyze(catalog, query, Expr::TlistRefType::TLIST_REF);
1064  }
1065  const SQLTypeInfo gti = gexpr->get_type_info();
1066  bool set_new_type = false;
1067  SQLTypeInfo ti(gti);
1068  if (gti.is_string() && gti.get_compression() == kENCODING_NONE) {
1069  set_new_type = true;
1072  ti.set_fixed_size();
1073  }
1074  std::shared_ptr<Analyzer::Var> v;
1075  if (std::dynamic_pointer_cast<Analyzer::Var>(gexpr)) {
1076  v = std::static_pointer_cast<Analyzer::Var>(gexpr);
1077  int n = v->get_varno();
1078  gexpr = tlist[n - 1]->get_own_expr();
1079  auto cv = std::dynamic_pointer_cast<Analyzer::ColumnVar>(gexpr);
1080  if (cv != nullptr) {
1081  // inherit all ColumnVar info for lineage.
1082  *std::static_pointer_cast<Analyzer::ColumnVar>(v) = *cv;
1083  }
1084  v->set_which_row(Analyzer::Var::kGROUPBY);
1085  v->set_varno(gexpr_no);
1086  tlist[n - 1]->set_expr(v);
1087  }
1088  if (set_new_type) {
1089  auto new_e = gexpr->add_cast(ti);
1090  groupby.push_back(new_e);
1091  if (v != nullptr) {
1092  v->set_type_info(new_e->get_type_info());
1093  }
1094  } else {
1095  groupby.push_back(gexpr);
1096  }
1097  gexpr_no++;
1098  }
1099  }
1100  if (query.get_num_aggs() > 0 || !groupby.empty()) {
1101  for (auto t : query.get_targetlist()) {
1102  auto e = t->get_expr();
1103  e->check_group_by(groupby);
1104  }
1105  }
1106  query.set_group_by(groupby);
1107 }
1108 
1109 void QuerySpec::analyze_where_clause(const Catalog_Namespace::Catalog& catalog,
1110  Analyzer::Query& query) const {
1111  if (where_clause == nullptr) {
1112  query.set_where_predicate(nullptr);
1113  return;
1114  }
1115  auto p = where_clause->analyze(catalog, query, Expr::TlistRefType::TLIST_COPY);
1116  if (p->get_type_info().get_type() != kBOOLEAN) {
1117  throw std::runtime_error("Only boolean expressions can be in WHERE clause.");
1118  }
1119  query.set_where_predicate(p);
1120 }
1121 
1122 void QuerySpec::analyze_select_clause(const Catalog_Namespace::Catalog& catalog,
1123  Analyzer::Query& query) const {
1124  std::vector<std::shared_ptr<Analyzer::TargetEntry>>& tlist =
1125  query.get_targetlist_nonconst();
1126  if (select_clause.empty()) {
1127  // this means SELECT *
1128  int rte_idx = 0;
1129  for (auto rte : query.get_rangetable()) {
1130  rte->expand_star_in_targetlist(catalog, tlist, rte_idx++);
1131  }
1132  } else {
1133  for (auto& p : select_clause) {
1134  const Parser::Expr* select_expr = p->get_select_expr();
1135  // look for the case of range_var.*
1136  if (typeid(*select_expr) == typeid(ColumnRef) &&
1137  dynamic_cast<const ColumnRef*>(select_expr)->get_column() == nullptr) {
1138  const std::string* range_var_name =
1139  dynamic_cast<const ColumnRef*>(select_expr)->get_table();
1140  int rte_idx = query.get_rte_idx(*range_var_name);
1141  if (rte_idx < 0) {
1142  throw std::runtime_error("invalid range variable name: " + *range_var_name);
1143  }
1144  Analyzer::RangeTableEntry* rte = query.get_rte(rte_idx);
1145  rte->expand_star_in_targetlist(catalog, tlist, rte_idx);
1146  } else {
1147  auto e = select_expr->analyze(catalog, query);
1148  std::string resname;
1149 
1150  if (p->get_alias() != nullptr) {
1151  resname = *p->get_alias();
1152  } else if (std::dynamic_pointer_cast<Analyzer::ColumnVar>(e) &&
1153  !std::dynamic_pointer_cast<Analyzer::Var>(e)) {
1154  auto colvar = std::static_pointer_cast<Analyzer::ColumnVar>(e);
1155  const ColumnDescriptor* col_desc = catalog.getMetadataForColumn(
1156  colvar->get_table_id(), colvar->get_column_id());
1157  resname = col_desc->columnName;
1158  }
1159  if (e->get_type_info().get_type() == kNULLT) {
1160  throw std::runtime_error(
1161  "Untyped NULL in SELECT clause. Use CAST to specify a type.");
1162  }
1163  auto o = std::static_pointer_cast<Analyzer::UOper>(e);
1164  bool unnest = (o != nullptr && o->get_optype() == kUNNEST);
1165  auto tle = std::make_shared<Analyzer::TargetEntry>(resname, e, unnest);
1166  tlist.push_back(tle);
1167  }
1168  }
1169  }
1170 }
1171 
1172 void QuerySpec::analyze_from_clause(const Catalog_Namespace::Catalog& catalog,
1173  Analyzer::Query& query) const {
1175  for (auto& p : from_clause) {
1176  const TableDescriptor* table_desc;
1177  table_desc = catalog.getMetadataForTable(*p->get_table_name());
1178  if (table_desc == nullptr) {
1179  throw std::runtime_error("Table " + *p->get_table_name() + " does not exist.");
1180  }
1181  std::string range_var;
1182  if (p->get_range_var() == nullptr) {
1183  range_var = *p->get_table_name();
1184  } else {
1185  range_var = *p->get_range_var();
1186  }
1187  rte = new Analyzer::RangeTableEntry(range_var, table_desc, nullptr);
1188  query.add_rte(rte);
1189  }
1190 }
1191 
1192 void QuerySpec::analyze(const Catalog_Namespace::Catalog& catalog,
1193  Analyzer::Query& query) const {
1195  analyze_from_clause(catalog, query);
1196  analyze_select_clause(catalog, query);
1197  analyze_where_clause(catalog, query);
1198  analyze_group_by(catalog, query);
1199  analyze_having_clause(catalog, query);
1200 }
1201 
1202 void SelectStmt::analyze(const Catalog_Namespace::Catalog& catalog,
1203  Analyzer::Query& query) const {
1204  query.set_stmt_type(kSELECT);
1205  query.set_limit(limit);
1206  if (offset < 0) {
1207  throw std::runtime_error("OFFSET cannot be negative.");
1208  }
1209  query.set_offset(offset);
1210  query_expr->analyze(catalog, query);
1211  if (orderby_clause.empty() && !query.get_is_distinct()) {
1212  query.set_order_by(nullptr);
1213  return;
1214  }
1215  const std::vector<std::shared_ptr<Analyzer::TargetEntry>>& tlist =
1216  query.get_targetlist();
1217  std::list<Analyzer::OrderEntry>* order_by = new std::list<Analyzer::OrderEntry>();
1218  if (!orderby_clause.empty()) {
1219  for (auto& p : orderby_clause) {
1220  int tle_no = p->get_colno();
1221  if (tle_no == 0) {
1222  // use column name
1223  // search through targetlist for matching name
1224  const std::string* name = p->get_column()->get_column();
1225  tle_no = 1;
1226  bool found = false;
1227  for (auto tle : tlist) {
1228  if (tle->get_resname() == *name) {
1229  found = true;
1230  break;
1231  }
1232  tle_no++;
1233  }
1234  if (!found) {
1235  throw std::runtime_error("invalid name in order by: " + *name);
1236  }
1237  }
1238  order_by->push_back(
1239  Analyzer::OrderEntry(tle_no, p->get_is_desc(), p->get_nulls_first()));
1240  }
1241  }
1242  if (query.get_is_distinct()) {
1243  // extend order_by to include all targetlist entries.
1244  for (int i = 1; i <= static_cast<int>(tlist.size()); i++) {
1245  bool in_orderby = false;
1246  std::for_each(order_by->begin(),
1247  order_by->end(),
1248  [&in_orderby, i](const Analyzer::OrderEntry& oe) {
1249  in_orderby = in_orderby || (i == oe.tle_no);
1250  });
1251  if (!in_orderby) {
1252  order_by->push_back(Analyzer::OrderEntry(i, false, false));
1253  }
1254  }
1255  }
1256  query.set_order_by(order_by);
1257 }
1258 
1259 std::string SelectEntry::to_string() const {
1260  std::string str = select_expr->to_string();
1261  if (alias != nullptr) {
1262  str += " AS " + *alias;
1263  }
1264  return str;
1265 }
1266 
1267 std::string TableRef::to_string() const {
1268  std::string str = *table_name;
1269  if (range_var != nullptr) {
1270  str += " " + *range_var;
1271  }
1272  return str;
1273 }
1274 
1275 std::string ColumnRef::to_string() const {
1276  std::string str;
1277  if (table == nullptr) {
1278  str = *column;
1279  } else if (column == nullptr) {
1280  str = *table + ".*";
1281  } else {
1282  str = *table + "." + *column;
1283  }
1284  return str;
1285 }
1286 
1287 std::string OperExpr::to_string() const {
1288  std::string op_str[] = {
1289  "=", "===", "<>", "<", ">", "<=", ">=", " AND ", " OR ", "NOT", "-", "+", "*", "/"};
1290  std::string str;
1291  if (optype == kUMINUS) {
1292  str = "-(" + left->to_string() + ")";
1293  } else if (optype == kNOT) {
1294  str = "NOT (" + left->to_string() + ")";
1295  } else if (optype == kARRAY_AT) {
1296  str = left->to_string() + "[" + right->to_string() + "]";
1297  } else if (optype == kUNNEST) {
1298  str = "UNNEST(" + left->to_string() + ")";
1299  } else if (optype == kIN) {
1300  str = "(" + left->to_string() + " IN " + right->to_string() + ")";
1301  } else {
1302  str = "(" + left->to_string() + op_str[optype] + right->to_string() + ")";
1303  }
1304  return str;
1305 }
1306 
1307 std::string InExpr::to_string() const {
1308  std::string str = arg->to_string();
1309  if (is_not) {
1310  str += " NOT IN ";
1311  } else {
1312  str += " IN ";
1313  }
1314  return str;
1315 }
1316 
1317 std::string ExistsExpr::to_string() const {
1318  return "EXISTS (" + query->to_string() + ")";
1319 }
1320 
1321 std::string SubqueryExpr::to_string() const {
1322  std::string str;
1323  str = "(";
1324  str += query->to_string();
1325  str += ")";
1326  return str;
1327 }
1328 
1329 std::string IsNullExpr::to_string() const {
1330  std::string str = arg->to_string();
1331  if (is_not) {
1332  str += " IS NOT NULL";
1333  } else {
1334  str += " IS NULL";
1335  }
1336  return str;
1337 }
1338 
1339 std::string InSubquery::to_string() const {
1340  std::string str = InExpr::to_string();
1341  str += subquery->to_string();
1342  return str;
1343 }
1344 
1345 std::string InValues::to_string() const {
1346  std::string str = InExpr::to_string() + "(";
1347  bool notfirst = false;
1348  for (auto& p : value_list) {
1349  if (notfirst) {
1350  str += ", ";
1351  } else {
1352  notfirst = true;
1353  }
1354  str += p->to_string();
1355  }
1356  str += ")";
1357  return str;
1358 }
1359 
1360 std::string BetweenExpr::to_string() const {
1361  std::string str = arg->to_string();
1362  if (is_not) {
1363  str += " NOT BETWEEN ";
1364  } else {
1365  str += " BETWEEN ";
1366  }
1367  str += lower->to_string() + " AND " + upper->to_string();
1368  return str;
1369 }
1370 
1371 std::string CharLengthExpr::to_string() const {
1372  std::string str;
1373  if (calc_encoded_length) {
1374  str = "CHAR_LENGTH (" + arg->to_string() + ")";
1375  } else {
1376  str = "LENGTH (" + arg->to_string() + ")";
1377  }
1378  return str;
1379 }
1380 
1381 std::string CardinalityExpr::to_string() const {
1382  std::string str = "CARDINALITY(" + arg->to_string() + ")";
1383  return str;
1384 }
1385 
1386 std::string LikeExpr::to_string() const {
1387  std::string str = arg->to_string();
1388  if (is_not) {
1389  str += " NOT LIKE ";
1390  } else {
1391  str += " LIKE ";
1392  }
1393  str += like_string->to_string();
1394  if (escape_string != nullptr) {
1395  str += " ESCAPE " + escape_string->to_string();
1396  }
1397  return str;
1398 }
1399 
1400 std::string RegexpExpr::to_string() const {
1401  std::string str = arg->to_string();
1402  if (is_not) {
1403  str += " NOT REGEXP ";
1404  } else {
1405  str += " REGEXP ";
1406  }
1407  str += pattern_string->to_string();
1408  if (escape_string != nullptr) {
1409  str += " ESCAPE " + escape_string->to_string();
1410  }
1411  return str;
1412 }
1413 
1414 std::string LikelihoodExpr::to_string() const {
1415  std::string str = " LIKELIHOOD ";
1416  str += arg->to_string();
1417  str += " ";
1418  str += boost::lexical_cast<std::string>(is_not ? 1.0 - likelihood : likelihood);
1419  return str;
1420 }
1421 
1422 std::string FunctionRef::to_string() const {
1423  std::string str = *name + "(";
1424  if (distinct) {
1425  str += "DISTINCT ";
1426  }
1427  if (arg == nullptr) {
1428  str += "*)";
1429  } else {
1430  str += arg->to_string() + ")";
1431  }
1432  return str;
1433 }
1434 
1435 std::string QuerySpec::to_string() const {
1436  std::string query_str = "SELECT ";
1437  if (is_distinct) {
1438  query_str += "DISTINCT ";
1439  }
1440  if (select_clause.empty()) {
1441  query_str += "* ";
1442  } else {
1443  bool notfirst = false;
1444  for (auto& p : select_clause) {
1445  if (notfirst) {
1446  query_str += ", ";
1447  } else {
1448  notfirst = true;
1449  }
1450  query_str += p->to_string();
1451  }
1452  }
1453  query_str += " FROM ";
1454  bool notfirst = false;
1455  for (auto& p : from_clause) {
1456  if (notfirst) {
1457  query_str += ", ";
1458  } else {
1459  notfirst = true;
1460  }
1461  query_str += p->to_string();
1462  }
1463  if (where_clause) {
1464  query_str += " WHERE " + where_clause->to_string();
1465  }
1466  if (!groupby_clause.empty()) {
1467  query_str += " GROUP BY ";
1468  bool notfirst = false;
1469  for (auto& p : groupby_clause) {
1470  if (notfirst) {
1471  query_str += ", ";
1472  } else {
1473  notfirst = true;
1474  }
1475  query_str += p->to_string();
1476  }
1477  }
1478  if (having_clause) {
1479  query_str += " HAVING " + having_clause->to_string();
1480  }
1481  query_str += ";";
1482  return query_str;
1483 }
1484 
1485 void InsertStmt::analyze(const Catalog_Namespace::Catalog& catalog,
1486  Analyzer::Query& query) const {
1487  query.set_stmt_type(kINSERT);
1488  const TableDescriptor* td = catalog.getMetadataForTable(*table);
1489  if (td == nullptr) {
1490  throw std::runtime_error("Table " + *table + " does not exist.");
1491  }
1492  if (td->isView) {
1493  throw std::runtime_error("Insert to views is not supported yet.");
1494  }
1496  query.set_result_table_id(td->tableId);
1497  std::list<int> result_col_list;
1498  if (column_list.empty()) {
1499  const std::list<const ColumnDescriptor*> all_cols =
1500  catalog.getAllColumnMetadataForTable(td->tableId, false, false, true);
1501  for (auto cd : all_cols) {
1502  result_col_list.push_back(cd->columnId);
1503  }
1504  } else {
1505  for (auto& c : column_list) {
1506  const ColumnDescriptor* cd = catalog.getMetadataForColumn(td->tableId, *c);
1507  if (cd == nullptr) {
1508  throw std::runtime_error("Column " + *c + " does not exist.");
1509  }
1510  result_col_list.push_back(cd->columnId);
1511  const auto& col_ti = cd->columnType;
1512  if (col_ti.get_physical_cols() > 0) {
1513  CHECK(cd->columnType.is_geometry());
1514  for (auto i = 1; i <= col_ti.get_physical_cols(); i++) {
1515  const ColumnDescriptor* pcd =
1516  catalog.getMetadataForColumn(td->tableId, cd->columnId + i);
1517  if (pcd == nullptr) {
1518  throw std::runtime_error("Column " + *c + "'s metadata is incomplete.");
1519  }
1520  result_col_list.push_back(pcd->columnId);
1521  }
1522  }
1523  }
1524  if (catalog.getAllColumnMetadataForTable(td->tableId, false, false, true).size() !=
1525  result_col_list.size()) {
1526  throw std::runtime_error("Insert into a subset of columns is not supported yet.");
1527  }
1528  }
1529  query.set_result_col_list(result_col_list);
1530 }
1531 
1532 size_t InsertValuesStmt::determineLeafIndex(const Catalog_Namespace::Catalog& catalog,
1533  size_t num_leafs) {
1534  const TableDescriptor* td = catalog.getMetadataForTable(*table);
1535  if (td == nullptr) {
1536  throw std::runtime_error("Table " + *table + " does not exist.");
1537  }
1538  if (td->isView) {
1539  throw std::runtime_error("Insert to views is not supported yet.");
1540  }
1541 
1542  if (td->partitions == "REPLICATED") {
1543  throw std::runtime_error("Cannot determine leaf on replicated table.");
1544  }
1545 
1546  if (0 == td->nShards) {
1547  std::random_device rd;
1548  std::mt19937_64 gen(rd());
1549  std::uniform_int_distribution<size_t> dis;
1550  const auto leaf_idx = dis(gen) % num_leafs;
1551  return leaf_idx;
1552  }
1553 
1554  size_t indexOfShardColumn = 0;
1555  const ColumnDescriptor* shardColumn = catalog.getShardColumnMetadataForTable(td);
1556  CHECK(shardColumn);
1557 
1558  if (column_list.empty()) {
1559  auto all_cols =
1560  catalog.getAllColumnMetadataForTable(td->tableId, false, false, false);
1561  auto iter = std::find(all_cols.begin(), all_cols.end(), shardColumn);
1562  CHECK(iter != all_cols.end());
1563  indexOfShardColumn = std::distance(all_cols.begin(), iter);
1564  } else {
1565  for (auto& c : column_list) {
1566  if (*c == shardColumn->columnName) {
1567  break;
1568  }
1569  indexOfShardColumn++;
1570  }
1571 
1572  if (indexOfShardColumn == column_list.size()) {
1573  throw std::runtime_error("No value defined for shard column.");
1574  }
1575  }
1576 
1577  if (indexOfShardColumn >= value_list.size()) {
1578  throw std::runtime_error("No value defined for shard column.");
1579  }
1580 
1581  auto& shardColumnValueExpr = *(std::next(value_list.begin(), indexOfShardColumn));
1582 
1583  Analyzer::Query query;
1584  auto e = shardColumnValueExpr->analyze(catalog, query);
1585  e = e->add_cast(shardColumn->columnType);
1586  const Analyzer::Constant* con = dynamic_cast<Analyzer::Constant*>(e.get());
1587  if (!con) {
1588  auto col_cast = dynamic_cast<const Analyzer::UOper*>(e.get());
1589  CHECK(col_cast);
1590  CHECK_EQ(kCAST, col_cast->get_optype());
1591  con = dynamic_cast<const Analyzer::Constant*>(col_cast->get_operand());
1592  }
1593  CHECK(con);
1594 
1595  Datum d = con->get_constval();
1596 
1597  auto shard_count = td->nShards * num_leafs;
1598  int64_t shardId = 0;
1599 
1600  if (con->get_is_null()) {
1602  shard_count);
1603  } else if (shardColumn->columnType.is_string()) {
1604  auto dictDesc =
1605  catalog.getMetadataForDict(shardColumn->columnType.get_comp_param(), true);
1606  auto str_id = dictDesc->stringDict->getOrAdd(*d.stringval);
1607  bool invalid = false;
1608 
1609  if (4 == shardColumn->columnType.get_size()) {
1610  invalid = str_id > max_valid_int_value<int32_t>();
1611  } else if (2 == shardColumn->columnType.get_size()) {
1612  invalid = str_id > max_valid_int_value<uint16_t>();
1613  } else if (1 == shardColumn->columnType.get_size()) {
1614  invalid = str_id > max_valid_int_value<uint8_t>();
1615  }
1616 
1617  if (invalid || str_id == inline_int_null_value<int32_t>()) {
1618  str_id = inline_fixed_encoding_null_val(shardColumn->columnType);
1619  }
1620  shardId = SHARD_FOR_KEY(str_id, shard_count);
1621  } else {
1622  switch (shardColumn->columnType.get_logical_size()) {
1623  case 8:
1624  shardId = SHARD_FOR_KEY(d.bigintval, shard_count);
1625  break;
1626  case 4:
1627  shardId = SHARD_FOR_KEY(d.intval, shard_count);
1628  break;
1629  case 2:
1630  shardId = SHARD_FOR_KEY(d.smallintval, shard_count);
1631  break;
1632  case 1:
1633  shardId = SHARD_FOR_KEY(d.tinyintval, shard_count);
1634  break;
1635  default:
1636  CHECK(false);
1637  }
1638  }
1639 
1640  return shardId / td->nShards;
1641 }
1642 
1643 void InsertValuesStmt::analyze(const Catalog_Namespace::Catalog& catalog,
1644  Analyzer::Query& query) const {
1645  InsertStmt::analyze(catalog, query);
1646  std::vector<std::shared_ptr<Analyzer::TargetEntry>>& tlist =
1647  query.get_targetlist_nonconst();
1648  const auto tableId = query.get_result_table_id();
1649  const std::list<const ColumnDescriptor*> non_phys_cols =
1650  catalog.getAllColumnMetadataForTable(tableId, false, false, false);
1651  if (non_phys_cols.size() != value_list.size()) {
1652  throw std::runtime_error("Insert has more target columns than expressions.");
1653  }
1654  std::list<int>::const_iterator it = query.get_result_col_list().begin();
1655  for (auto& v : value_list) {
1656  auto e = v->analyze(catalog, query);
1657  const ColumnDescriptor* cd =
1658  catalog.getMetadataForColumn(query.get_result_table_id(), *it);
1659  CHECK(cd);
1660  if (cd->columnType.get_notnull()) {
1661  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(e);
1662  if (c != nullptr && c->get_is_null()) {
1663  throw std::runtime_error("Cannot insert NULL into column " + cd->columnName);
1664  }
1665  }
1666  e = e->add_cast(cd->columnType);
1667  tlist.emplace_back(new Analyzer::TargetEntry("", e, false));
1668  ++it;
1669 
1670  const auto& col_ti = cd->columnType;
1671  if (col_ti.get_physical_cols() > 0) {
1672  CHECK(cd->columnType.is_geometry());
1673  auto c = dynamic_cast<const Analyzer::Constant*>(e.get());
1674  if (!c) {
1675  auto uoper = std::dynamic_pointer_cast<Analyzer::UOper>(e);
1676  if (uoper && uoper->get_optype() == kCAST) {
1677  c = dynamic_cast<const Analyzer::Constant*>(uoper->get_operand());
1678  }
1679  }
1680  bool is_null = false;
1681  std::string* geo_string{nullptr};
1682  if (c) {
1683  is_null = c->get_is_null();
1684  if (!is_null) {
1685  geo_string = c->get_constval().stringval;
1686  }
1687  }
1688  if (!is_null && !geo_string) {
1689  throw std::runtime_error("Expecting a WKT or WKB hex string for column " +
1690  cd->columnName);
1691  }
1692  std::vector<double> coords;
1693  std::vector<double> bounds;
1694  std::vector<int> ring_sizes;
1695  std::vector<int> poly_rings;
1696  int render_group =
1697  0; // @TODO simon.eves where to get render_group from in this context?!
1698  SQLTypeInfo import_ti{cd->columnType};
1699  if (!is_null) {
1701  *geo_string, import_ti, coords, bounds, ring_sizes, poly_rings)) {
1702  throw std::runtime_error("Cannot read geometry to insert into column " +
1703  cd->columnName);
1704  }
1705  if (cd->columnType.get_type() != import_ti.get_type()) {
1706  // allow POLYGON to be inserted into MULTIPOLYGON column
1707  if (!(import_ti.get_type() == SQLTypes::kPOLYGON &&
1709  throw std::runtime_error(
1710  "Imported geometry doesn't match the type of column " + cd->columnName);
1711  }
1712  }
1713  } else {
1714  // Special case for NULL POINT, push NULL representation to coords
1715  if (cd->columnType.get_type() == kPOINT) {
1716  if (!coords.empty()) {
1717  throw std::runtime_error("NULL POINT with unexpected coordinates in column " +
1718  cd->columnName);
1719  }
1720  coords.push_back(NULL_ARRAY_DOUBLE);
1721  coords.push_back(NULL_DOUBLE);
1722  }
1723  }
1724 
1725  // TODO: check if import SRID matches columns SRID, may need to transform before
1726  // inserting
1727 
1728  int nextColumnOffset = 1;
1729 
1730  const ColumnDescriptor* cd_coords = catalog.getMetadataForColumn(
1731  query.get_result_table_id(), cd->columnId + nextColumnOffset);
1732  CHECK(cd_coords);
1733  CHECK_EQ(cd_coords->columnType.get_type(), kARRAY);
1734  CHECK_EQ(cd_coords->columnType.get_subtype(), kTINYINT);
1735  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
1736  if (!is_null || cd->columnType.get_type() == kPOINT) {
1737  auto compressed_coords = geospatial::compress_coords(coords, col_ti);
1738  for (auto cc : compressed_coords) {
1739  Datum d;
1740  d.tinyintval = cc;
1741  auto e = makeExpr<Analyzer::Constant>(kTINYINT, false, d);
1742  value_exprs.push_back(e);
1743  }
1744  }
1745  tlist.emplace_back(new Analyzer::TargetEntry(
1746  "",
1747  makeExpr<Analyzer::Constant>(cd_coords->columnType, is_null, value_exprs),
1748  false));
1749  ++it;
1750  nextColumnOffset++;
1751 
1752  if (cd->columnType.get_type() == kPOLYGON ||
1753  cd->columnType.get_type() == kMULTIPOLYGON) {
1754  // Put ring sizes array into separate physical column
1755  const ColumnDescriptor* cd_ring_sizes = catalog.getMetadataForColumn(
1756  query.get_result_table_id(), cd->columnId + nextColumnOffset);
1757  CHECK(cd_ring_sizes);
1758  CHECK_EQ(cd_ring_sizes->columnType.get_type(), kARRAY);
1759  CHECK_EQ(cd_ring_sizes->columnType.get_subtype(), kINT);
1760  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
1761  if (!is_null) {
1762  for (auto c : ring_sizes) {
1763  Datum d;
1764  d.intval = c;
1765  auto e = makeExpr<Analyzer::Constant>(kINT, false, d);
1766  value_exprs.push_back(e);
1767  }
1768  }
1769  tlist.emplace_back(new Analyzer::TargetEntry(
1770  "",
1771  makeExpr<Analyzer::Constant>(cd_ring_sizes->columnType, is_null, value_exprs),
1772  false));
1773  ++it;
1774  nextColumnOffset++;
1775 
1776  if (cd->columnType.get_type() == kMULTIPOLYGON) {
1777  // Put poly_rings array into separate physical column
1778  const ColumnDescriptor* cd_poly_rings = catalog.getMetadataForColumn(
1779  query.get_result_table_id(), cd->columnId + nextColumnOffset);
1780  CHECK(cd_poly_rings);
1781  CHECK_EQ(cd_poly_rings->columnType.get_type(), kARRAY);
1782  CHECK_EQ(cd_poly_rings->columnType.get_subtype(), kINT);
1783  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
1784  if (!is_null) {
1785  for (auto c : poly_rings) {
1786  Datum d;
1787  d.intval = c;
1788  auto e = makeExpr<Analyzer::Constant>(kINT, false, d);
1789  value_exprs.push_back(e);
1790  }
1791  }
1792  tlist.emplace_back(new Analyzer::TargetEntry(
1793  "",
1794  makeExpr<Analyzer::Constant>(
1795  cd_poly_rings->columnType, is_null, value_exprs),
1796  false));
1797  ++it;
1798  nextColumnOffset++;
1799  }
1800  }
1801 
1802  if (cd->columnType.get_type() == kLINESTRING ||
1803  cd->columnType.get_type() == kPOLYGON ||
1804  cd->columnType.get_type() == kMULTIPOLYGON) {
1805  const ColumnDescriptor* cd_bounds = catalog.getMetadataForColumn(
1806  query.get_result_table_id(), cd->columnId + nextColumnOffset);
1807  CHECK(cd_bounds);
1808  CHECK_EQ(cd_bounds->columnType.get_type(), kARRAY);
1809  CHECK_EQ(cd_bounds->columnType.get_subtype(), kDOUBLE);
1810  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
1811  if (!is_null) {
1812  for (auto b : bounds) {
1813  Datum d;
1814  d.doubleval = b;
1815  auto e = makeExpr<Analyzer::Constant>(kDOUBLE, false, d);
1816  value_exprs.push_back(e);
1817  }
1818  }
1819  tlist.emplace_back(new Analyzer::TargetEntry(
1820  "",
1821  makeExpr<Analyzer::Constant>(cd_bounds->columnType, is_null, value_exprs),
1822  false));
1823  ++it;
1824  nextColumnOffset++;
1825  }
1826 
1827  if (cd->columnType.get_type() == kPOLYGON ||
1828  cd->columnType.get_type() == kMULTIPOLYGON) {
1829  // Put render group into separate physical column
1830  const ColumnDescriptor* cd_render_group = catalog.getMetadataForColumn(
1831  query.get_result_table_id(), cd->columnId + nextColumnOffset);
1832  CHECK(cd_render_group);
1833  CHECK_EQ(cd_render_group->columnType.get_type(), kINT);
1834  Datum d;
1835  d.intval = render_group;
1836  tlist.emplace_back(new Analyzer::TargetEntry(
1837  "",
1838  makeExpr<Analyzer::Constant>(cd_render_group->columnType, is_null, d),
1839  false));
1840  ++it;
1841  nextColumnOffset++;
1842  }
1843  }
1844  }
1845 }
1846 
1847 void InsertValuesStmt::execute(const Catalog_Namespace::SessionInfo& session) {
1848  auto& catalog = session.getCatalog();
1849 
1850  if (!session.checkDBAccessPrivileges(
1852  throw std::runtime_error("User has no insert privileges on " + *table + ".");
1853  }
1854 
1855  auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
1858 
1859  Analyzer::Query query;
1860  analyze(catalog, query);
1861 
1862  // Acquire schema write lock -- leave data lock so the fragmenter can checkpoint. For
1863  // singleton inserts we just take a write lock on the schema, which prevents concurrent
1864  // inserts.
1865  auto result_table_id = query.get_result_table_id();
1866  const auto td_with_lock =
1868  catalog, result_table_id);
1869  auto td = td_with_lock();
1870  CHECK(td);
1871 
1872  if (td->isView) {
1873  throw std::runtime_error("Singleton inserts on views is not supported.");
1874  }
1875 
1877  RelAlgExecutor ra_executor(executor.get(), catalog);
1878 
1879  ra_executor.executeSimpleInsert(query);
1880 }
1881 
1882 void UpdateStmt::analyze(const Catalog_Namespace::Catalog& catalog,
1883  Analyzer::Query& query) const {
1884  throw std::runtime_error("UPDATE statement not supported yet.");
1885 }
1886 
1887 void DeleteStmt::analyze(const Catalog_Namespace::Catalog& catalog,
1888  Analyzer::Query& query) const {
1889  throw std::runtime_error("DELETE statement not supported yet.");
1890 }
1891 
1892 namespace {
1893 
1895  const auto& col_ti = cd.columnType;
1896  if (col_ti.is_integer() ||
1897  (col_ti.is_string() && col_ti.get_compression() == kENCODING_DICT) ||
1898  col_ti.is_time()) {
1899  return;
1900  }
1901  throw std::runtime_error("Cannot shard on type " + col_ti.get_type_name() +
1902  ", encoding " + col_ti.get_compression_name());
1903 }
1904 
1905 size_t shard_column_index(const std::string& name,
1906  const std::list<ColumnDescriptor>& columns) {
1907  size_t index = 1;
1908  for (const auto& cd : columns) {
1909  if (cd.columnName == name) {
1911  return index;
1912  }
1913  ++index;
1914  if (cd.columnType.is_geometry()) {
1915  index += cd.columnType.get_physical_cols();
1916  }
1917  }
1918  // Not found, return 0
1919  return 0;
1920 }
1921 
1922 size_t sort_column_index(const std::string& name,
1923  const std::list<ColumnDescriptor>& columns) {
1924  size_t index = 1;
1925  for (const auto& cd : columns) {
1926  if (boost::to_upper_copy<std::string>(cd.columnName) == name) {
1927  return index;
1928  }
1929  ++index;
1930  if (cd.columnType.is_geometry()) {
1931  index += cd.columnType.get_physical_cols();
1932  }
1933  }
1934  // Not found, return 0
1935  return 0;
1936 }
1937 
1938 void set_string_field(rapidjson::Value& obj,
1939  const std::string& field_name,
1940  const std::string& field_value,
1941  rapidjson::Document& document) {
1942  rapidjson::Value field_name_json_str;
1943  field_name_json_str.SetString(
1944  field_name.c_str(), field_name.size(), document.GetAllocator());
1945  rapidjson::Value field_value_json_str;
1946  field_value_json_str.SetString(
1947  field_value.c_str(), field_value.size(), document.GetAllocator());
1948  obj.AddMember(field_name_json_str, field_value_json_str, document.GetAllocator());
1949 }
1950 
1952  const ShardKeyDef* shard_key_def,
1953  const std::vector<SharedDictionaryDef>& shared_dict_defs) {
1954  rapidjson::Document document;
1955  auto& allocator = document.GetAllocator();
1956  rapidjson::Value arr(rapidjson::kArrayType);
1957  if (shard_key_def) {
1958  rapidjson::Value shard_key_obj(rapidjson::kObjectType);
1959  set_string_field(shard_key_obj, "type", "SHARD KEY", document);
1960  set_string_field(shard_key_obj, "name", shard_key_def->get_column(), document);
1961  arr.PushBack(shard_key_obj, allocator);
1962  }
1963  for (const auto& shared_dict_def : shared_dict_defs) {
1964  rapidjson::Value shared_dict_obj(rapidjson::kObjectType);
1965  set_string_field(shared_dict_obj, "type", "SHARED DICTIONARY", document);
1966  set_string_field(shared_dict_obj, "name", shared_dict_def.get_column(), document);
1968  shared_dict_obj, "foreign_table", shared_dict_def.get_foreign_table(), document);
1969  set_string_field(shared_dict_obj,
1970  "foreign_column",
1971  shared_dict_def.get_foreign_column(),
1972  document);
1973  arr.PushBack(shared_dict_obj, allocator);
1974  }
1975  rapidjson::StringBuffer buffer;
1976  rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
1977  arr.Accept(writer);
1978  return buffer.GetString();
1979 }
1980 
1981 template <typename LITERAL_TYPE,
1982  typename ASSIGNMENT,
1983  typename VALIDATE = DefaultValidate<LITERAL_TYPE>>
1984 decltype(auto) get_property_value(const NameValueAssign* p,
1985  ASSIGNMENT op,
1986  VALIDATE validate = VALIDATE()) {
1987  const auto val = validate(p);
1988  return op(val);
1989 }
1990 
1992  const NameValueAssign* p,
1993  const std::list<ColumnDescriptor>& columns) {
1994  auto assignment = [&td](const auto val) { td.storageType = val; };
1995  return get_property_value<StringLiteral, decltype(assignment), CaseSensitiveValidate>(
1996  p, assignment);
1997 }
1998 
2000  const NameValueAssign* p,
2001  const std::list<ColumnDescriptor>& columns) {
2002  return get_property_value<IntLiteral>(p,
2003  [&td](const auto val) { td.maxFragRows = val; });
2004 }
2005 
2007  const NameValueAssign* p,
2008  const std::list<ColumnDescriptor>& columns) {
2009  return get_property_value<IntLiteral>(
2010  p, [&df_td](const auto val) { df_td.maxFragRows = val; });
2011 }
2012 
2014  const NameValueAssign* p,
2015  const std::list<ColumnDescriptor>& columns) {
2016  return get_property_value<IntLiteral>(p,
2017  [&td](const auto val) { td.maxChunkSize = val; });
2018 }
2019 
2021  DataframeTableDescriptor& df_td,
2022  const NameValueAssign* p,
2023  const std::list<ColumnDescriptor>& columns) {
2024  return get_property_value<IntLiteral>(
2025  p, [&df_td](const auto val) { df_td.maxChunkSize = val; });
2026 }
2027 
2029  const NameValueAssign* p,
2030  const std::list<ColumnDescriptor>& columns) {
2031  return get_property_value<StringLiteral>(p, [&df_td](const auto val) {
2032  if (val.size() != 1) {
2033  throw std::runtime_error("Length of DELIMITER must be equal to 1.");
2034  }
2035  df_td.delimiter = val;
2036  });
2037 }
2038 
2040  const NameValueAssign* p,
2041  const std::list<ColumnDescriptor>& columns) {
2042  return get_property_value<StringLiteral>(p, [&df_td](const auto val) {
2043  if (val == "FALSE") {
2044  df_td.hasHeader = false;
2045  } else if (val == "TRUE") {
2046  df_td.hasHeader = true;
2047  } else {
2048  throw std::runtime_error("Option HEADER support only 'true' or 'false' values.");
2049  }
2050  });
2051 }
2052 
2054  const NameValueAssign* p,
2055  const std::list<ColumnDescriptor>& columns) {
2056  return get_property_value<IntLiteral>(p,
2057  [&td](const auto val) { td.fragPageSize = val; });
2058 }
2060  const NameValueAssign* p,
2061  const std::list<ColumnDescriptor>& columns) {
2062  return get_property_value<IntLiteral>(p, [&td](const auto val) { td.maxRows = val; });
2063 }
2064 
2066  const NameValueAssign* p,
2067  const std::list<ColumnDescriptor>& columns) {
2068  return get_property_value<IntLiteral>(
2069  p, [&df_td](const auto val) { df_td.skipRows = val; });
2070 }
2071 
2073  const NameValueAssign* p,
2074  const std::list<ColumnDescriptor>& columns) {
2075  return get_property_value<StringLiteral>(p, [&td](const auto partitions_uc) {
2076  if (partitions_uc != "SHARDED" && partitions_uc != "REPLICATED") {
2077  throw std::runtime_error("PARTITIONS must be SHARDED or REPLICATED");
2078  }
2079  if (td.shardedColumnId != 0 && partitions_uc == "REPLICATED") {
2080  throw std::runtime_error(
2081  "A table cannot be sharded and replicated at the same time");
2082  };
2083  td.partitions = partitions_uc;
2084  });
2085 }
2087  const NameValueAssign* p,
2088  const std::list<ColumnDescriptor>& columns) {
2089  if (!td.shardedColumnId) {
2090  throw std::runtime_error("SHARD KEY must be defined.");
2091  }
2092  return get_property_value<IntLiteral>(p, [&td](const auto shard_count) {
2093  if (g_leaf_count && shard_count % g_leaf_count) {
2094  throw std::runtime_error(
2095  "SHARD_COUNT must be a multiple of the number of leaves in the cluster.");
2096  }
2097  td.nShards = g_leaf_count ? shard_count / g_leaf_count : shard_count;
2098  if (!td.shardedColumnId && !td.nShards) {
2099  throw std::runtime_error(
2100  "Must specify the number of shards through the SHARD_COUNT option");
2101  };
2102  });
2103 }
2104 
2105 decltype(auto) get_vacuum_def(TableDescriptor& td,
2106  const NameValueAssign* p,
2107  const std::list<ColumnDescriptor>& columns) {
2108  return get_property_value<StringLiteral>(p, [&td](const auto vacuum_uc) {
2109  if (vacuum_uc != "IMMEDIATE" && vacuum_uc != "DELAYED") {
2110  throw std::runtime_error("VACUUM must be IMMEDIATE or DELAYED");
2111  }
2112  td.hasDeletedCol = boost::iequals(vacuum_uc, "IMMEDIATE") ? false : true;
2113  });
2114 }
2115 
2117  const NameValueAssign* p,
2118  const std::list<ColumnDescriptor>& columns) {
2119  return get_property_value<StringLiteral>(p, [&td, &columns](const auto sort_upper) {
2120  td.sortedColumnId = sort_column_index(sort_upper, columns);
2121  if (!td.sortedColumnId) {
2122  throw std::runtime_error("Specified sort column " + sort_upper + " doesn't exist");
2123  }
2124  });
2125 }
2126 
2127 static const std::map<const std::string, const TableDefFuncPtr> tableDefFuncMap = {
2128  {"fragment_size"s, get_frag_size_def},
2129  {"max_chunk_size"s, get_max_chunk_size_def},
2130  {"page_size"s, get_page_size_def},
2131  {"max_rows"s, get_max_rows_def},
2132  {"partitions"s, get_partions_def},
2133  {"shard_count"s, get_shard_count_def},
2134  {"vacuum"s, get_vacuum_def},
2135  {"sort_column"s, get_sort_column_def},
2136  {"storage_type"s, get_storage_type}};
2137 
2139  const std::unique_ptr<NameValueAssign>& p,
2140  const std::list<ColumnDescriptor>& columns) {
2141  const auto it = tableDefFuncMap.find(boost::to_lower_copy<std::string>(*p->get_name()));
2142  if (it == tableDefFuncMap.end()) {
2143  throw std::runtime_error(
2144  "Invalid CREATE TABLE option " + *p->get_name() +
2145  ". Should be FRAGMENT_SIZE, MAX_CHUNK_SIZE, PAGE_SIZE, MAX_ROWS, "
2146  "PARTITIONS, SHARD_COUNT, VACUUM, SORT_COLUMN, or STORAGE_TYPE.");
2147  }
2148  return it->second(td, p.get(), columns);
2149 }
2150 
2152  const std::unique_ptr<NameValueAssign>& p,
2153  const std::list<ColumnDescriptor>& columns) {
2154  const auto it = tableDefFuncMap.find(boost::to_lower_copy<std::string>(*p->get_name()));
2155  if (it == tableDefFuncMap.end()) {
2156  throw std::runtime_error(
2157  "Invalid CREATE TABLE AS option " + *p->get_name() +
2158  ". Should be FRAGMENT_SIZE, MAX_CHUNK_SIZE, PAGE_SIZE, MAX_ROWS, "
2159  "PARTITIONS, SHARD_COUNT, VACUUM, SORT_COLUMN, STORAGE_TYPE or "
2160  "USE_SHARED_DICTIONARIES.");
2161  }
2162  return it->second(td, p.get(), columns);
2163 }
2164 
2165 static const std::map<const std::string, const DataframeDefFuncPtr> dataframeDefFuncMap =
2166  {{"fragment_size"s, get_frag_size_dataframe_def},
2167  {"max_chunk_size"s, get_max_chunk_size_dataframe_def},
2168  {"skip_rows"s, get_skip_rows_def},
2169  {"delimiter"s, get_delimiter_def},
2170  {"header"s, get_header_def}};
2171 
2173  const std::unique_ptr<NameValueAssign>& p,
2174  const std::list<ColumnDescriptor>& columns) {
2175  const auto it =
2176  dataframeDefFuncMap.find(boost::to_lower_copy<std::string>(*p->get_name()));
2177  if (it == dataframeDefFuncMap.end()) {
2178  throw std::runtime_error(
2179  "Invalid CREATE DATAFRAME option " + *p->get_name() +
2180  ". Should be FRAGMENT_SIZE, MAX_CHUNK_SIZE, SKIP_ROWS, DELIMITER or HEADER.");
2181  }
2182  return it->second(df_td, p.get(), columns);
2183 }
2184 
2185 } // namespace
2186 
2187 void CreateTableStmt::executeDryRun(const Catalog_Namespace::SessionInfo& session,
2188  TableDescriptor& td,
2189  std::list<ColumnDescriptor>& columns,
2190  std::vector<SharedDictionaryDef>& shared_dict_defs) {
2191  std::unordered_set<std::string> uc_col_names;
2192  const auto& catalog = session.getCatalog();
2193  const ShardKeyDef* shard_key_def{nullptr};
2194  for (auto& e : table_element_list_) {
2195  if (dynamic_cast<SharedDictionaryDef*>(e.get())) {
2196  auto shared_dict_def = static_cast<SharedDictionaryDef*>(e.get());
2198  this, shared_dict_def, columns, shared_dict_defs, catalog);
2199  shared_dict_defs.push_back(*shared_dict_def);
2200  continue;
2201  }
2202  if (dynamic_cast<ShardKeyDef*>(e.get())) {
2203  if (shard_key_def) {
2204  throw std::runtime_error("Specified more than one shard key");
2205  }
2206  shard_key_def = static_cast<const ShardKeyDef*>(e.get());
2207  continue;
2208  }
2209  if (!dynamic_cast<ColumnDef*>(e.get())) {
2210  throw std::runtime_error("Table constraints are not supported yet.");
2211  }
2212  ColumnDef* coldef = static_cast<ColumnDef*>(e.get());
2213  ColumnDescriptor cd;
2214  cd.columnName = *coldef->get_column_name();
2216  setColumnDescriptor(cd, coldef);
2217  columns.push_back(cd);
2218  }
2219 
2220  ddl_utils::set_default_table_attributes(*table_, td, columns.size());
2221 
2222  if (shard_key_def) {
2223  td.shardedColumnId = shard_column_index(shard_key_def->get_column(), columns);
2224  if (!td.shardedColumnId) {
2225  throw std::runtime_error("Specified shard column " + shard_key_def->get_column() +
2226  " doesn't exist");
2227  }
2228  }
2229  if (is_temporary_) {
2231  } else {
2233  }
2234  if (!storage_options_.empty()) {
2235  for (auto& p : storage_options_) {
2236  get_table_definitions(td, p, columns);
2237  }
2238  }
2239  if (td.shardedColumnId && !td.nShards) {
2240  throw std::runtime_error("SHARD_COUNT needs to be specified with SHARD_KEY.");
2241  }
2242  td.keyMetainfo = serialize_key_metainfo(shard_key_def, shared_dict_defs);
2243 }
2244 
2245 void CreateTableStmt::execute(const Catalog_Namespace::SessionInfo& session) {
2246  auto& catalog = session.getCatalog();
2247 
2248  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
2251 
2252  // check access privileges
2255  throw std::runtime_error("Table " + *table_ +
2256  " will not be created. User has no create privileges.");
2257  }
2258 
2259  if (!catalog.validateNonExistentTableOrView(*table_, if_not_exists_)) {
2260  return;
2261  }
2262 
2263  TableDescriptor td;
2264  std::list<ColumnDescriptor> columns;
2265  std::vector<SharedDictionaryDef> shared_dict_defs;
2266 
2267  executeDryRun(session, td, columns, shared_dict_defs);
2268  td.userId = session.get_currentUser().userId;
2269 
2270  catalog.createShardedTable(td, columns, shared_dict_defs);
2271  // TODO (max): It's transactionally unsafe, should be fixed: we may create object w/o
2272  // privileges
2273  SysCatalog::instance().createDBObject(
2274  session.get_currentUser(), td.tableName, TableDBObjectType, catalog);
2275 }
2276 
2277 void CreateDataframeStmt::execute(const Catalog_Namespace::SessionInfo& session) {
2278  auto& catalog = session.getCatalog();
2279 
2280  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
2283 
2284  // check access privileges
2287  throw std::runtime_error("Table " + *table_ +
2288  " will not be created. User has no create privileges.");
2289  }
2290 
2291  if (catalog.getMetadataForTable(*table_) != nullptr) {
2292  throw std::runtime_error("Table " + *table_ + " already exists.");
2293  }
2295  std::list<ColumnDescriptor> columns;
2296  std::vector<SharedDictionaryDef> shared_dict_defs;
2297 
2298  std::unordered_set<std::string> uc_col_names;
2299  for (auto& e : table_element_list_) {
2300  if (dynamic_cast<SharedDictionaryDef*>(e.get())) {
2301  auto shared_dict_def = static_cast<SharedDictionaryDef*>(e.get());
2303  this, shared_dict_def, columns, shared_dict_defs, catalog);
2304  shared_dict_defs.push_back(*shared_dict_def);
2305  continue;
2306  }
2307  if (!dynamic_cast<ColumnDef*>(e.get())) {
2308  throw std::runtime_error("Table constraints are not supported yet.");
2309  }
2310  ColumnDef* coldef = static_cast<ColumnDef*>(e.get());
2311  ColumnDescriptor cd;
2312  cd.columnName = *coldef->get_column_name();
2313  const auto uc_col_name = boost::to_upper_copy<std::string>(cd.columnName);
2314  const auto it_ok = uc_col_names.insert(uc_col_name);
2315  if (!it_ok.second) {
2316  throw std::runtime_error("Column '" + cd.columnName + "' defined more than once");
2317  }
2318 
2319  setColumnDescriptor(cd, coldef);
2320  columns.push_back(cd);
2321  }
2322 
2323  df_td.tableName = *table_;
2324  df_td.nColumns = columns.size();
2325  df_td.isView = false;
2326  df_td.fragmenter = nullptr;
2331  df_td.maxRows = DEFAULT_MAX_ROWS;
2333  if (!storage_options_.empty()) {
2334  for (auto& p : storage_options_) {
2335  get_dataframe_definitions(df_td, p, columns);
2336  }
2337  }
2338  df_td.keyMetainfo = serialize_key_metainfo(nullptr, shared_dict_defs);
2339  df_td.userId = session.get_currentUser().userId;
2340  df_td.storageType = *filename_;
2341 
2342  catalog.createShardedTable(df_td, columns, shared_dict_defs);
2343  // TODO (max): It's transactionally unsafe, should be fixed: we may create object w/o
2344  // privileges
2345  SysCatalog::instance().createDBObject(
2346  session.get_currentUser(), df_td.tableName, TableDBObjectType, catalog);
2347 }
2348 
2349 std::shared_ptr<ResultSet> getResultSet(QueryStateProxy query_state_proxy,
2350  const std::string select_stmt,
2351  std::vector<TargetMetaInfo>& targets,
2352  bool validate_only = false,
2353  std::vector<size_t> outer_fragment_indices = {}) {
2354  auto const session = query_state_proxy.getQueryState().getConstSessionInfo();
2355  auto& catalog = session->getCatalog();
2356 
2358 #ifdef HAVE_CUDA
2359  const auto device_type = session->get_executor_device_type();
2360 #else
2361  const auto device_type = ExecutorDeviceType::CPU;
2362 #endif // HAVE_CUDA
2363  auto calcite_mgr = catalog.getCalciteMgr();
2364 
2365  // TODO MAT this should actually get the global or the session parameter for
2366  // view optimization
2367  const auto query_ra =
2368  calcite_mgr
2369  ->process(query_state_proxy, pg_shim(select_stmt), {}, true, false, false, true)
2370  .plan_result;
2371  RelAlgExecutor ra_executor(executor.get(), catalog, query_ra);
2373  const auto& query_hints = ra_executor.getParsedQueryHints();
2374  if (query_hints.cpu_mode) {
2375  co.device_type = ExecutorDeviceType::CPU;
2376  }
2378  // TODO(adb): Need a better method of dropping constants into this ExecutionOptions
2379  // struct
2380  ExecutionOptions eo = {false,
2381  true,
2382  false,
2383  true,
2384  false,
2385  false,
2386  validate_only,
2387  false,
2388  10000,
2389  false,
2390  false,
2391  0.9,
2392  false,
2393  1000,
2395  outer_fragment_indices};
2396  ExecutionResult result{std::make_shared<ResultSet>(std::vector<TargetInfo>{},
2399  nullptr,
2400  nullptr),
2401  {}};
2402  result = ra_executor.executeRelAlgQuery(co, eo, false, nullptr);
2403  targets = result.getTargetsMeta();
2404 
2405  return result.getRows();
2406 }
2407 
2408 size_t LocalConnector::getOuterFragmentCount(QueryStateProxy query_state_proxy,
2409  std::string& sql_query_string) {
2410  auto const session = query_state_proxy.getQueryState().getConstSessionInfo();
2411  auto& catalog = session->getCatalog();
2412 
2414 #ifdef HAVE_CUDA
2415  const auto device_type = session->get_executor_device_type();
2416 #else
2417  const auto device_type = ExecutorDeviceType::CPU;
2418 #endif // HAVE_CUDA
2419  auto calcite_mgr = catalog.getCalciteMgr();
2420 
2421  // TODO MAT this should actually get the global or the session parameter for
2422  // view optimization
2423  const auto query_ra =
2424  calcite_mgr
2425  ->process(
2426  query_state_proxy, pg_shim(sql_query_string), {}, true, false, false, true)
2427  .plan_result;
2428  RelAlgExecutor ra_executor(executor.get(), catalog, query_ra);
2429  const auto& query_hints = ra_executor.getParsedQueryHints();
2430  CompilationOptions co = {query_hints.cpu_mode ? ExecutorDeviceType::CPU : device_type,
2431  true,
2433  false};
2434  // TODO(adb): Need a better method of dropping constants into this ExecutionOptions
2435  // struct
2436  ExecutionOptions eo = {
2437  false, true, false, true, false, false, false, false, 10000, false, false, 0.9};
2438  return ra_executor.getOuterFragmentCount(co, eo);
2439 }
2440 
2441 AggregatedResult LocalConnector::query(QueryStateProxy query_state_proxy,
2442  std::string& sql_query_string,
2443  std::vector<size_t> outer_frag_indices,
2444  bool validate_only) {
2445  auto const session = query_state_proxy.getQueryState().getConstSessionInfo();
2446  // TODO(PS): Should we be using the shimmed query in getResultSet?
2447  std::string pg_shimmed_select_query = pg_shim(sql_query_string);
2448 
2449  std::vector<TargetMetaInfo> target_metainfos;
2450  auto result_rows = getResultSet(query_state_proxy,
2451  sql_query_string,
2452  target_metainfos,
2453  validate_only,
2454  outer_frag_indices);
2455  AggregatedResult res = {result_rows, target_metainfos};
2456  return res;
2457 }
2458 
2459 std::vector<AggregatedResult> LocalConnector::query(
2460  QueryStateProxy query_state_proxy,
2461  std::string& sql_query_string,
2462  std::vector<size_t> outer_frag_indices) {
2463  auto res = query(query_state_proxy, sql_query_string, outer_frag_indices, false);
2464  return {res};
2465 }
2466 
2467 void LocalConnector::insertDataToLeaf(const Catalog_Namespace::SessionInfo& session,
2468  const size_t leaf_idx,
2469  Fragmenter_Namespace::InsertData& insert_data) {
2470  CHECK(leaf_idx == 0);
2471  auto& catalog = session.getCatalog();
2472  auto created_td = catalog.getMetadataForTable(insert_data.tableId);
2473  ChunkKey chunkKey = {catalog.getCurrentDB().dbId, created_td->tableId};
2474  // TODO(adb): Ensure that we have previously obtained a write lock for this table's
2475  // data.
2476  created_td->fragmenter->insertDataNoCheckpoint(insert_data);
2477 }
2478 
2479 void LocalConnector::checkpoint(const Catalog_Namespace::SessionInfo& session,
2480  int tableId) {
2481  auto& catalog = session.getCatalog();
2482  auto dbId = catalog.getCurrentDB().dbId;
2483  catalog.getDataMgr().checkpoint(dbId, tableId);
2484 }
2485 
2486 void LocalConnector::rollback(const Catalog_Namespace::SessionInfo& session,
2487  int tableId) {
2488  auto& catalog = session.getCatalog();
2489  auto dbId = catalog.getCurrentDB().dbId;
2490  catalog.getDataMgr().checkpoint(dbId, tableId);
2491 }
2492 
2493 std::list<ColumnDescriptor> LocalConnector::getColumnDescriptors(AggregatedResult& result,
2494  bool for_create) {
2495  std::list<ColumnDescriptor> column_descriptors;
2496  std::list<ColumnDescriptor> column_descriptors_for_create;
2497 
2498  int rowid_suffix = 0;
2499  for (const auto& target_metainfo : result.targets_meta) {
2500  ColumnDescriptor cd;
2501  cd.columnName = target_metainfo.get_resname();
2502  if (cd.columnName == "rowid") {
2503  cd.columnName += std::to_string(rowid_suffix++);
2504  }
2505  cd.columnType = target_metainfo.get_physical_type_info();
2506 
2507  ColumnDescriptor cd_for_create = cd;
2508 
2510  // we need to reset the comp param (as this points to the actual dictionary)
2511  if (cd.columnType.is_array()) {
2512  // for dict encoded arrays, it is always 4 bytes
2513  cd_for_create.columnType.set_comp_param(32);
2514  } else {
2515  cd_for_create.columnType.set_comp_param(cd.columnType.get_size() * 8);
2516  }
2517  }
2518 
2519  if (cd.columnType.is_date() && !cd.columnType.is_date_in_days()) {
2520  // default to kENCODING_DATE_IN_DAYS encoding
2522  cd_for_create.columnType.set_comp_param(0);
2523  }
2524 
2525  column_descriptors_for_create.push_back(cd_for_create);
2526  column_descriptors.push_back(cd);
2527  }
2528 
2529  if (for_create) {
2530  return column_descriptors_for_create;
2531  }
2532 
2533  return column_descriptors;
2534 }
2535 
2536 void InsertIntoTableAsSelectStmt::populateData(QueryStateProxy query_state_proxy,
2537  bool validate_table) {
2538  auto const session = query_state_proxy.getQueryState().getConstSessionInfo();
2539  auto& catalog = session->getCatalog();
2540  const auto td_with_lock =
2542  catalog, table_name_);
2543  const auto td = td_with_lock();
2545 
2546  LocalConnector local_connector;
2547  bool populate_table = false;
2548 
2549  if (leafs_connector_) {
2550  populate_table = true;
2551  } else {
2552  leafs_connector_ = &local_connector;
2553  if (!g_cluster) {
2554  populate_table = true;
2555  }
2556  }
2557 
2558  auto get_target_column_descriptors = [this, &catalog](const TableDescriptor* td) {
2559  std::vector<const ColumnDescriptor*> target_column_descriptors;
2560  if (column_list_.empty()) {
2561  auto list = catalog.getAllColumnMetadataForTable(td->tableId, false, false, false);
2562  target_column_descriptors = {std::begin(list), std::end(list)};
2563 
2564  } else {
2565  for (auto& c : column_list_) {
2566  const ColumnDescriptor* cd = catalog.getMetadataForColumn(td->tableId, *c);
2567  if (cd == nullptr) {
2568  throw std::runtime_error("Column " + *c + " does not exist.");
2569  }
2570  target_column_descriptors.push_back(cd);
2571  }
2572  }
2573 
2574  return target_column_descriptors;
2575  };
2576 
2577  bool is_temporary = table_is_temporary(td);
2578 
2579  // Don't allow simultaneous inserts
2580  const auto insert_data_lock =
2582 
2583  if (validate_table) {
2584  // check access privileges
2585  if (!td) {
2586  throw std::runtime_error("Table " + table_name_ + " does not exist.");
2587  }
2588  if (td->isView) {
2589  throw std::runtime_error("Insert to views is not supported yet.");
2590  }
2591 
2592  if (!session->checkDBAccessPrivileges(DBObjectType::TableDBObjectType,
2594  table_name_)) {
2595  throw std::runtime_error("User has no insert privileges on " + table_name_ + ".");
2596  }
2597 
2598  // only validate the select query so we get the target types
2599  // correctly, but do not populate the result set
2600  auto result = local_connector.query(query_state_proxy, select_query_, {}, true);
2601  auto source_column_descriptors = local_connector.getColumnDescriptors(result, false);
2602 
2603  std::vector<const ColumnDescriptor*> target_column_descriptors =
2604  get_target_column_descriptors(td);
2605  if (catalog.getAllColumnMetadataForTable(td->tableId, false, false, false).size() !=
2606  target_column_descriptors.size()) {
2607  throw std::runtime_error("Insert into a subset of columns is not supported yet.");
2608  }
2609 
2610  if (source_column_descriptors.size() != target_column_descriptors.size()) {
2611  throw std::runtime_error("The number of source and target columns does not match.");
2612  }
2613 
2614  for (int i = 0; i < source_column_descriptors.size(); i++) {
2615  const ColumnDescriptor* source_cd =
2616  &(*std::next(source_column_descriptors.begin(), i));
2617  const ColumnDescriptor* target_cd = target_column_descriptors.at(i);
2618 
2619  if (source_cd->columnType.get_type() != target_cd->columnType.get_type()) {
2620  auto type_cannot_be_cast = [](const auto& col_type) {
2621  return (col_type.is_time() || col_type.is_geometry() || col_type.is_array() ||
2622  col_type.is_boolean());
2623  };
2624 
2625  if (type_cannot_be_cast(source_cd->columnType) ||
2626  type_cannot_be_cast(target_cd->columnType)) {
2627  throw std::runtime_error("Source '" + source_cd->columnName + " " +
2628  source_cd->columnType.get_type_name() +
2629  "' and target '" + target_cd->columnName + " " +
2630  target_cd->columnType.get_type_name() +
2631  "' column types do not match.");
2632  }
2633  }
2634  if (source_cd->columnType.is_array()) {
2635  if (source_cd->columnType.get_subtype() != target_cd->columnType.get_subtype()) {
2636  throw std::runtime_error("Source '" + source_cd->columnName + " " +
2637  source_cd->columnType.get_type_name() +
2638  "' and target '" + target_cd->columnName + " " +
2639  target_cd->columnType.get_type_name() +
2640  "' array column element types do not match.");
2641  }
2642  }
2643 
2644  if (source_cd->columnType.is_decimal() ||
2645  source_cd->columnType.get_elem_type().is_decimal()) {
2646  SQLTypeInfo sourceType = source_cd->columnType;
2647  SQLTypeInfo targetType = target_cd->columnType;
2648 
2649  if (source_cd->columnType.is_array()) {
2650  sourceType = source_cd->columnType.get_elem_type();
2651  targetType = target_cd->columnType.get_elem_type();
2652  }
2653 
2654  if (sourceType.get_scale() != targetType.get_scale()) {
2655  throw std::runtime_error("Source '" + source_cd->columnName + " " +
2656  source_cd->columnType.get_type_name() +
2657  "' and target '" + target_cd->columnName + " " +
2658  target_cd->columnType.get_type_name() +
2659  "' decimal columns scales do not match.");
2660  }
2661  }
2662 
2663  if (source_cd->columnType.is_string()) {
2664  if (!target_cd->columnType.is_string()) {
2665  throw std::runtime_error("Source '" + source_cd->columnName + " " +
2666  source_cd->columnType.get_type_name() +
2667  "' and target '" + target_cd->columnName + " " +
2668  target_cd->columnType.get_type_name() +
2669  "' column types do not match.");
2670  }
2671  if (source_cd->columnType.get_compression() !=
2672  target_cd->columnType.get_compression()) {
2673  throw std::runtime_error("Source '" + source_cd->columnName + " " +
2674  source_cd->columnType.get_type_name() +
2675  "' and target '" + target_cd->columnName + " " +
2676  target_cd->columnType.get_type_name() +
2677  "' columns string encodings do not match.");
2678  }
2679  }
2680 
2681  if (source_cd->columnType.is_timestamp() && target_cd->columnType.is_timestamp()) {
2682  if (source_cd->columnType.get_dimension() !=
2683  target_cd->columnType.get_dimension()) {
2684  throw std::runtime_error("Source '" + source_cd->columnName + " " +
2685  source_cd->columnType.get_type_name() +
2686  "' and target '" + target_cd->columnName + " " +
2687  target_cd->columnType.get_type_name() +
2688  "' timestamp column precisions do not match.");
2689  }
2690  }
2691 
2692  if (!source_cd->columnType.is_string() && !source_cd->columnType.is_geometry() &&
2693  !source_cd->columnType.is_integer() && !source_cd->columnType.is_decimal() &&
2694  !source_cd->columnType.is_date() && !source_cd->columnType.is_time() &&
2695  !source_cd->columnType.is_timestamp() &&
2696  source_cd->columnType.get_size() > target_cd->columnType.get_size()) {
2697  throw std::runtime_error("Source '" + source_cd->columnName + " " +
2698  source_cd->columnType.get_type_name() +
2699  "' and target '" + target_cd->columnName + " " +
2700  target_cd->columnType.get_type_name() +
2701  "' column encoding sizes do not match.");
2702  }
2703  }
2704  }
2705 
2706  if (!populate_table) {
2707  return;
2708  }
2709 
2710  int64_t total_row_count = 0;
2711  int64_t total_source_query_time_ms = 0;
2712  int64_t total_target_value_translate_time_ms = 0;
2713  int64_t total_data_load_time_ms = 0;
2714 
2715  Fragmenter_Namespace::InsertDataLoader insertDataLoader(*leafs_connector_);
2716  auto target_column_descriptors = get_target_column_descriptors(td);
2717 
2718  auto outer_frag_count =
2719  leafs_connector_->getOuterFragmentCount(query_state_proxy, select_query_);
2720 
2721  size_t outer_frag_end = outer_frag_count == 0 ? 1 : outer_frag_count;
2722 
2723  for (size_t outer_frag_idx = 0; outer_frag_idx < outer_frag_end; outer_frag_idx++) {
2724  std::vector<size_t> allowed_outer_fragment_indices;
2725 
2726  if (outer_frag_count) {
2727  allowed_outer_fragment_indices.push_back(outer_frag_idx);
2728  }
2729 
2730  const auto query_clock_begin = timer_start();
2731  std::vector<AggregatedResult> query_results = leafs_connector_->query(
2732  query_state_proxy, select_query_, allowed_outer_fragment_indices);
2733  total_source_query_time_ms += timer_stop(query_clock_begin);
2734 
2735  for (auto& res : query_results) {
2736  auto result_rows = res.rs;
2737  result_rows->setGeoReturnType(ResultSet::GeoReturnType::GeoTargetValue);
2738  const auto num_rows = result_rows->rowCount();
2739 
2740  if (0 == num_rows) {
2741  continue;
2742  }
2743 
2744  total_row_count += num_rows;
2745 
2746  size_t leaf_count = leafs_connector_->leafCount();
2747 
2748  size_t max_number_of_rows_per_package =
2749  std::min(num_rows / leaf_count, 64UL * 1024UL);
2750 
2751  size_t start_row = 0;
2752  size_t num_rows_to_process = std::min(num_rows, max_number_of_rows_per_package);
2753 
2754  // ensure that at least one row is being processed
2755  num_rows_to_process = std::max(num_rows_to_process, 1UL);
2756 
2757  std::vector<std::unique_ptr<TargetValueConverter>> value_converters;
2758 
2760 
2761  const int num_worker_threads = std::thread::hardware_concurrency();
2762 
2763  std::vector<size_t> thread_start_idx(num_worker_threads),
2764  thread_end_idx(num_worker_threads);
2765  bool can_go_parallel = !result_rows->isTruncated() && num_rows_to_process > 20000;
2766 
2767  std::atomic<size_t> row_idx{0};
2768 
2769  auto convert_function = [&result_rows,
2770  &value_converters,
2771  &row_idx,
2772  &num_rows_to_process,
2773  &thread_start_idx,
2774  &thread_end_idx](const int thread_id) {
2775  const int num_cols = value_converters.size();
2776  const size_t start = thread_start_idx[thread_id];
2777  const size_t end = thread_end_idx[thread_id];
2778  size_t idx = 0;
2779  for (idx = start; idx < end; ++idx) {
2780  const auto result_row = result_rows->getRowAtNoTranslations(idx);
2781  if (!result_row.empty()) {
2782  size_t target_row = row_idx.fetch_add(1);
2783 
2784  if (target_row >= num_rows_to_process) {
2785  break;
2786  }
2787 
2788  for (unsigned int col = 0; col < num_cols; col++) {
2789  const auto& mapd_variant = result_row[col];
2790  value_converters[col]->convertToColumnarFormat(target_row, &mapd_variant);
2791  }
2792  }
2793  }
2794 
2795  thread_start_idx[thread_id] = idx;
2796  };
2797 
2798  auto single_threaded_convert_function = [&result_rows,
2799  &value_converters,
2800  &row_idx,
2801  &num_rows_to_process,
2802  &thread_start_idx,
2803  &thread_end_idx](const int thread_id) {
2804  const int num_cols = value_converters.size();
2805  const size_t start = thread_start_idx[thread_id];
2806  const size_t end = thread_end_idx[thread_id];
2807  size_t idx = 0;
2808  for (idx = start; idx < end; ++idx) {
2809  size_t target_row = row_idx.fetch_add(1);
2810 
2811  if (target_row >= num_rows_to_process) {
2812  break;
2813  }
2814  const auto result_row = result_rows->getNextRow(false, false);
2815  CHECK(!result_row.empty());
2816  for (unsigned int col = 0; col < num_cols; col++) {
2817  const auto& mapd_variant = result_row[col];
2818  value_converters[col]->convertToColumnarFormat(target_row, &mapd_variant);
2819  }
2820  }
2821 
2822  thread_start_idx[thread_id] = idx;
2823  };
2824 
2825  if (can_go_parallel) {
2826  const size_t entryCount = result_rows->entryCount();
2827  for (size_t i = 0,
2828  start_entry = 0,
2829  stride = (entryCount + num_worker_threads - 1) / num_worker_threads;
2830  i < num_worker_threads && start_entry < entryCount;
2831  ++i, start_entry += stride) {
2832  const auto end_entry = std::min(start_entry + stride, entryCount);
2833  thread_start_idx[i] = start_entry;
2834  thread_end_idx[i] = end_entry;
2835  }
2836 
2837  } else {
2838  thread_start_idx[0] = 0;
2839  thread_end_idx[0] = result_rows->entryCount();
2840  }
2841 
2842  std::shared_ptr<Executor> executor;
2843 
2846  }
2847 
2848  while (start_row < num_rows) {
2849  try {
2850  value_converters.clear();
2851  row_idx = 0;
2852  int colNum = 0;
2853  for (const auto targetDescriptor : target_column_descriptors) {
2854  auto sourceDataMetaInfo = res.targets_meta[colNum++];
2855 
2857  num_rows_to_process,
2858  catalog,
2859  sourceDataMetaInfo,
2860  targetDescriptor,
2861  targetDescriptor->columnType,
2862  !targetDescriptor->columnType.get_notnull(),
2863  result_rows->getRowSetMemOwner()->getLiteralStringDictProxy(),
2865  ? executor->getStringDictionaryProxy(
2866  sourceDataMetaInfo.get_type_info().get_comp_param(),
2867  result_rows->getRowSetMemOwner(),
2868  true)
2869  : nullptr};
2870  auto converter = factory.create(param);
2871  value_converters.push_back(std::move(converter));
2872  }
2873 
2874  const auto translate_clock_begin = timer_start();
2875  if (can_go_parallel) {
2876  std::vector<std::future<void>> worker_threads;
2877  for (int i = 0; i < num_worker_threads; ++i) {
2878  worker_threads.push_back(
2879  std::async(std::launch::async, convert_function, i));
2880  }
2881 
2882  for (auto& child : worker_threads) {
2883  child.wait();
2884  }
2885  for (auto& child : worker_threads) {
2886  child.get();
2887  }
2888 
2889  } else {
2890  single_threaded_convert_function(0);
2891  }
2892 
2893  // finalize the insert data
2894  {
2895  auto finalizer_func =
2896  [](std::unique_ptr<TargetValueConverter>::pointer targetValueConverter) {
2897  targetValueConverter->finalizeDataBlocksForInsertData();
2898  };
2899  std::vector<std::future<void>> worker_threads;
2900  for (auto& converterPtr : value_converters) {
2901  worker_threads.push_back(
2902  std::async(std::launch::async, finalizer_func, converterPtr.get()));
2903  }
2904 
2905  for (auto& child : worker_threads) {
2906  child.wait();
2907  }
2908  for (auto& child : worker_threads) {
2909  child.get();
2910  }
2911  }
2912 
2914  insert_data.databaseId = catalog.getCurrentDB().dbId;
2915  CHECK(td);
2916  insert_data.tableId = td->tableId;
2917  insert_data.numRows = num_rows_to_process;
2918 
2919  for (int col_idx = 0; col_idx < target_column_descriptors.size(); col_idx++) {
2920  value_converters[col_idx]->addDataBlocksToInsertData(insert_data);
2921  }
2922  total_target_value_translate_time_ms += timer_stop(translate_clock_begin);
2923 
2924  const auto data_load_clock_begin = timer_start();
2925  insertDataLoader.insertData(*session, insert_data);
2926  total_data_load_time_ms += timer_stop(data_load_clock_begin);
2927 
2928  } catch (...) {
2929  try {
2930  if (td->nShards) {
2931  const auto shard_tables = catalog.getPhysicalTablesDescriptors(td);
2932  for (const auto ptd : shard_tables) {
2933  leafs_connector_->rollback(*session, ptd->tableId);
2934  }
2935  }
2936  leafs_connector_->rollback(*session, td->tableId);
2937  } catch (...) {
2938  // eat it
2939  }
2940  throw;
2941  }
2942  start_row += num_rows_to_process;
2943  num_rows_to_process =
2944  std::min(num_rows - start_row, max_number_of_rows_per_package);
2945  }
2946  }
2947  }
2948 
2949  int64_t total_time_ms = total_source_query_time_ms +
2950  total_target_value_translate_time_ms + total_data_load_time_ms;
2951 
2952  VLOG(1) << "CTAS/ITAS " << total_row_count << " rows loaded in " << total_time_ms
2953  << "ms (outer_frag_count=" << outer_frag_count
2954  << ", query_time=" << total_source_query_time_ms
2955  << "ms, translation_time=" << total_target_value_translate_time_ms
2956  << "ms, data_load_time=" << total_data_load_time_ms
2957  << "ms)\nquery: " << select_query_;
2958 
2959  if (!is_temporary) {
2960  if (td->nShards) {
2961  const auto shard_tables = catalog.getPhysicalTablesDescriptors(td);
2962  for (const auto ptd : shard_tables) {
2963  leafs_connector_->checkpoint(*session, ptd->tableId);
2964  }
2965  }
2966  leafs_connector_->checkpoint(*session, td->tableId);
2967  }
2968 }
2969 
2970 void InsertIntoTableAsSelectStmt::execute(const Catalog_Namespace::SessionInfo& session) {
2971  auto session_copy = session;
2972  auto session_ptr = std::shared_ptr<Catalog_Namespace::SessionInfo>(
2973  &session_copy, boost::null_deleter());
2974  auto query_state = query_state::QueryState::create(session_ptr, select_query_);
2975  auto stdlog = STDLOG(query_state);
2976  populateData(query_state->createQueryStateProxy(), true);
2977 }
2978 
2979 void CreateTableAsSelectStmt::execute(const Catalog_Namespace::SessionInfo& session) {
2980  auto session_copy = session;
2981  auto session_ptr = std::shared_ptr<Catalog_Namespace::SessionInfo>(
2982  &session_copy, boost::null_deleter());
2983  auto query_state = query_state::QueryState::create(session_ptr, select_query_);
2984  auto stdlog = STDLOG(query_state);
2985 
2986  LocalConnector local_connector;
2987  auto& catalog = session.getCatalog();
2988  bool create_table = nullptr == leafs_connector_;
2989 
2990  if (create_table) {
2991  // check access privileges
2994  throw std::runtime_error("CTAS failed. Table " + table_name_ +
2995  " will not be created. User has no create privileges.");
2996  }
2997 
2998  if (catalog.getMetadataForTable(table_name_) != nullptr) {
2999  if (if_not_exists_) {
3000  return;
3001  }
3002  throw std::runtime_error("Table " + table_name_ +
3003  " already exists and no data was loaded.");
3004  }
3005 
3006  // only validate the select query so we get the target types
3007  // correctly, but do not populate the result set
3008  auto result = local_connector.query(
3009  query_state->createQueryStateProxy(), select_query_, {}, true);
3010  const auto column_descriptors_for_create =
3011  local_connector.getColumnDescriptors(result, true);
3012 
3013  // some validation as the QE might return some out of range column types
3014  for (auto& cd : column_descriptors_for_create) {
3015  if (cd.columnType.is_decimal() && cd.columnType.get_precision() > 18) {
3016  throw std::runtime_error(cd.columnName + ": Precision too high, max 18.");
3017  }
3018  }
3019 
3020  TableDescriptor td;
3021  td.tableName = table_name_;
3022  td.userId = session.get_currentUser().userId;
3023  td.nColumns = column_descriptors_for_create.size();
3024  td.isView = false;
3025  td.fragmenter = nullptr;
3031  if (is_temporary_) {
3033  } else {
3035  }
3036 
3037  bool use_shared_dictionaries = true;
3038 
3039  if (!storage_options_.empty()) {
3040  for (auto& p : storage_options_) {
3041  if (boost::to_lower_copy<std::string>(*p->get_name()) ==
3042  "use_shared_dictionaries") {
3043  const StringLiteral* literal =
3044  dynamic_cast<const StringLiteral*>(p->get_value());
3045  if (nullptr == literal) {
3046  throw std::runtime_error(
3047  "USE_SHARED_DICTIONARIES must be a string parameter");
3048  }
3049  std::string val = boost::to_lower_copy<std::string>(*literal->get_stringval());
3050  use_shared_dictionaries = val == "true" || val == "1" || val == "t";
3051  } else {
3052  get_table_definitions_for_ctas(td, p, column_descriptors_for_create);
3053  }
3054  }
3055  }
3056 
3057  std::vector<SharedDictionaryDef> sharedDictionaryRefs;
3058 
3059  if (use_shared_dictionaries) {
3060  const auto source_column_descriptors =
3061  local_connector.getColumnDescriptors(result, false);
3062  const auto mapping = catalog.getDictionaryToColumnMapping();
3063 
3064  for (auto& source_cd : source_column_descriptors) {
3065  const auto& ti = source_cd.columnType;
3066  if (ti.is_string()) {
3067  if (ti.get_compression() == kENCODING_DICT) {
3068  int dict_id = ti.get_comp_param();
3069  auto it = mapping.find(dict_id);
3070  if (mapping.end() != it) {
3071  const auto targetColumn = it->second;
3072  auto targetTable =
3073  catalog.getMetadataForTable(targetColumn->tableId, false);
3074  CHECK(targetTable);
3075  LOG(INFO) << "CTAS: sharing text dictionary on column "
3076  << source_cd.columnName << " with " << targetTable->tableName
3077  << "." << targetColumn->columnName;
3078  sharedDictionaryRefs.push_back(
3079  SharedDictionaryDef(source_cd.columnName,
3080  targetTable->tableName,
3081  targetColumn->columnName));
3082  }
3083  }
3084  }
3085  }
3086  }
3087 
3088  // currently no means of defining sharding in CTAS
3089  td.keyMetainfo = serialize_key_metainfo(nullptr, sharedDictionaryRefs);
3090 
3091  catalog.createTable(td, column_descriptors_for_create, sharedDictionaryRefs, true);
3092  // TODO (max): It's transactionally unsafe, should be fixed: we may create object
3093  // w/o privileges
3094  SysCatalog::instance().createDBObject(
3095  session.get_currentUser(), td.tableName, TableDBObjectType, catalog);
3096  }
3097 
3098  try {
3099  populateData(query_state->createQueryStateProxy(), false);
3100  } catch (...) {
3101  if (!g_cluster) {
3102  const TableDescriptor* created_td = catalog.getMetadataForTable(table_name_);
3103  if (created_td) {
3104  catalog.dropTable(created_td);
3105  }
3106  }
3107  throw;
3108  }
3109 }
3110 
3111 void DropTableStmt::execute(const Catalog_Namespace::SessionInfo& session) {
3112  auto& catalog = session.getCatalog();
3113 
3114  // TODO(adb): the catalog should be handling this locking.
3115  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
3118 
3119  const TableDescriptor* td{nullptr};
3120  std::unique_ptr<lockmgr::TableSchemaLockContainer<lockmgr::WriteLock>> td_with_lock;
3121 
3122  try {
3123  td_with_lock =
3124  std::make_unique<lockmgr::TableSchemaLockContainer<lockmgr::WriteLock>>(
3126  catalog, *table, false));
3127  td = (*td_with_lock)();
3128  } catch (const std::runtime_error& e) {
3129  if (if_exists) {
3130  return;
3131  } else {
3132  throw e;
3133  }
3134  }
3135 
3136  CHECK(td);
3137  CHECK(td_with_lock);
3138 
3139  // check access privileges
3140  if (!session.checkDBAccessPrivileges(
3142  throw std::runtime_error("Table " + *table +
3143  " will not be dropped. User has no proper privileges.");
3144  }
3145 
3147 
3148  auto table_data_write_lock =
3150  catalog.dropTable(td);
3151 
3152  // invalidate cached hashtable
3154 }
3155 
3156 void TruncateTableStmt::execute(const Catalog_Namespace::SessionInfo& session) {
3157  auto& catalog = session.getCatalog();
3158 
3159  // TODO: Removal of the FileMgr is not thread safe. Take a global system write lock when
3160  // truncating a table
3161  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
3164 
3165  const auto td_with_lock =
3167  catalog, *table, true);
3168  const auto td = td_with_lock();
3169  if (!td) {
3170  throw std::runtime_error("Table " + *table + " does not exist.");
3171  }
3172 
3173  // check access privileges
3174  std::vector<DBObject> privObjects;
3175  DBObject dbObject(*table, TableDBObjectType);
3176  dbObject.loadKey(catalog);
3178  privObjects.push_back(dbObject);
3179  if (!SysCatalog::instance().checkPrivileges(session.get_currentUser(), privObjects)) {
3180  throw std::runtime_error("Table " + *table + " will not be truncated. User " +
3181  session.get_currentUser().userName +
3182  " has no proper privileges.");
3183  }
3184 
3185  if (td->isView) {
3186  throw std::runtime_error(*table + " is a view. Cannot Truncate.");
3187  }
3188  auto table_data_write_lock =
3190  catalog.truncateTable(td);
3191 
3192  // invalidate cached hashtable
3194 }
3195 
3197  const TableDescriptor* td) {
3198  if (session.get_currentUser().isSuper ||
3199  session.get_currentUser().userId == td->userId) {
3200  return;
3201  }
3202  std::vector<DBObject> privObjects;
3203  DBObject dbObject(td->tableName, TableDBObjectType);
3204  dbObject.loadKey(session.getCatalog());
3206  privObjects.push_back(dbObject);
3207  if (!SysCatalog::instance().checkPrivileges(session.get_currentUser(), privObjects)) {
3208  throw std::runtime_error("Current user does not have the privilege to alter table: " +
3209  td->tableName);
3210  }
3211 }
3212 
3213 void RenameUserStmt::execute(const Catalog_Namespace::SessionInfo& session) {
3214  if (!session.get_currentUser().isSuper) {
3215  throw std::runtime_error("Only a super user can rename users.");
3216  }
3217 
3219  if (!SysCatalog::instance().getMetadataForUser(*username_, user)) {
3220  throw std::runtime_error("User " + *username_ + " does not exist.");
3221  }
3222 
3223  SysCatalog::instance().renameUser(*username_, *new_username_);
3224 }
3225 
3226 void RenameDatabaseStmt::execute(const Catalog_Namespace::SessionInfo& session) {
3228 
3229  // TODO: use database lock instead
3230  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
3233 
3234  if (!SysCatalog::instance().getMetadataForDB(*database_name_, db)) {
3235  throw std::runtime_error("Database " + *database_name_ + " does not exist.");
3236  }
3237 
3238  if (!session.get_currentUser().isSuper &&
3239  session.get_currentUser().userId != db.dbOwner) {
3240  throw std::runtime_error("Only a super user or the owner can rename the database.");
3241  }
3242 
3243  SysCatalog::instance().renameDatabase(*database_name_, *new_database_name_);
3244 }
3245 
3246 void RenameTableStmt::execute(const Catalog_Namespace::SessionInfo& session) {
3247  auto& catalog = session.getCatalog();
3248 
3249  // TODO(adb): the catalog should be handling this locking (see AddColumStmt)
3250  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
3253 
3254  const auto td_with_lock =
3256  catalog, *table, false);
3257  const auto td = td_with_lock();
3258  CHECK(td);
3259 
3260  check_alter_table_privilege(session, td);
3261  if (catalog.getMetadataForTable(*new_table_name) != nullptr) {
3262  throw std::runtime_error("Table or View " + *new_table_name + " already exists.");
3263  }
3264  catalog.renameTable(td, *new_table_name);
3265 }
3266 
3267 void DDLStmt::setColumnDescriptor(ColumnDescriptor& cd, const ColumnDef* coldef) {
3268  bool not_null;
3269  const ColumnConstraintDef* cc = coldef->get_column_constraint();
3270  if (cc == nullptr) {
3271  not_null = false;
3272  } else {
3273  not_null = cc->get_notnull();
3274  }
3276  cd,
3277  coldef->get_column_type(),
3278  not_null,
3279  coldef->get_compression());
3280 }
3281 
3282 void AddColumnStmt::check_executable(const Catalog_Namespace::SessionInfo& session,
3283  const TableDescriptor* td) {
3284  auto& catalog = session.getCatalog();
3285  if (!td) {
3286  throw std::runtime_error("Table " + *table + " does not exist.");
3287  } else {
3288  if (td->isView) {
3289  throw std::runtime_error("Adding columns to a view is not supported.");
3290  }
3291  if (table_is_temporary(td)) {
3292  throw std::runtime_error(
3293  "Adding columns to temporary tables is not yet supported.");
3294  }
3295  };
3296 
3297  check_alter_table_privilege(session, td);
3298 
3299  if (0 == coldefs.size()) {
3300  coldefs.push_back(std::move(coldef));
3301  }
3302 
3303  for (const auto& coldef : coldefs) {
3304  auto& new_column_name = *coldef->get_column_name();
3305  if (catalog.getMetadataForColumn(td->tableId, new_column_name) != nullptr) {
3306  throw std::runtime_error("Column " + new_column_name + " already exists.");
3307  }
3308  }
3309 }
3310 
3311 void AddColumnStmt::execute(const Catalog_Namespace::SessionInfo& session) {
3312  auto& catalog = session.getCatalog();
3313 
3314  // TODO(adb): the catalog should be handling this locking.
3315  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
3318 
3319  const auto td_with_lock =
3321  catalog, *table, true);
3322  const auto td = td_with_lock();
3323 
3324  check_executable(session, td);
3325 
3326  CHECK(td->fragmenter);
3327  if (std::dynamic_pointer_cast<Fragmenter_Namespace::SortedOrderFragmenter>(
3328  td->fragmenter)) {
3329  throw std::runtime_error(
3330  "Adding columns to a table is not supported when using the \"sort_column\" "
3331  "option.");
3332  }
3333 
3334  // Do not take a data write lock, as the fragmenter may call `deleteFragments` during
3335  // a cap operation. Note that the schema write lock will prevent concurrent inserts
3336  // along with all other queries.
3337 
3338  catalog.getSqliteConnector().query("BEGIN TRANSACTION");
3339  try {
3340  std::map<const std::string, const ColumnDescriptor> cds;
3341  std::map<const int, const ColumnDef*> cid_coldefs;
3342  for (const auto& coldef : coldefs) {
3343  ColumnDescriptor cd;
3344  setColumnDescriptor(cd, coldef.get());
3345  catalog.addColumn(*td, cd);
3346  cds.emplace(*coldef->get_column_name(), cd);
3347  cid_coldefs.emplace(cd.columnId, coldef.get());
3348 
3349  // expand geo column to phy columns
3350  if (cd.columnType.is_geometry()) {
3351  std::list<ColumnDescriptor> phy_geo_columns;
3352  catalog.expandGeoColumn(cd, phy_geo_columns);
3353  for (auto& cd : phy_geo_columns) {
3354  catalog.addColumn(*td, cd);
3355  cds.emplace(cd.columnName, cd);
3356  cid_coldefs.emplace(cd.columnId, nullptr);
3357  }
3358  }
3359  }
3360 
3361  std::unique_ptr<import_export::Loader> loader(new import_export::Loader(catalog, td));
3362  auto import_buffers = import_export::setup_column_loaders(td, loader.get());
3363  loader->setReplicating(true);
3364 
3365  // set_geo_physical_import_buffer below needs a sorted import_buffers
3366  std::sort(import_buffers.begin(),
3367  import_buffers.end(),
3368  [](decltype(import_buffers[0])& a, decltype(import_buffers[0])& b) {
3369  return a->getColumnDesc()->columnId < b->getColumnDesc()->columnId;
3370  });
3371 
3372  size_t nrows = td->fragmenter->getNumRows();
3373  // if sharded, get total nrows from all sharded tables
3374  if (td->nShards > 0) {
3375  const auto physical_tds = catalog.getPhysicalTablesDescriptors(td);
3376  nrows = 0;
3377  std::for_each(physical_tds.begin(), physical_tds.end(), [&nrows](const auto& td) {
3378  nrows += td->fragmenter->getNumRows();
3379  });
3380  }
3381  if (nrows > 0) {
3382  int skip_physical_cols = 0;
3383  for (const auto cit : cid_coldefs) {
3384  const auto cd = catalog.getMetadataForColumn(td->tableId, cit.first);
3385  const auto coldef = cit.second;
3386  const auto column_constraint = coldef ? coldef->get_column_constraint() : nullptr;
3387  std::string defaultval = "";
3388  if (column_constraint) {
3389  auto defaultlp = column_constraint->get_defaultval();
3390  auto defaultsp = dynamic_cast<const StringLiteral*>(defaultlp);
3391  defaultval = defaultsp ? *defaultsp->get_stringval()
3392  : defaultlp ? defaultlp->to_string() : "";
3393  }
3394  bool isnull = column_constraint ? (0 == defaultval.size()) : true;
3395  if (boost::to_upper_copy<std::string>(defaultval) == "NULL") {
3396  isnull = true;
3397  }
3398 
3399  if (isnull) {
3400  if (column_constraint && column_constraint->get_notnull()) {
3401  throw std::runtime_error("Default value required for column " +
3402  cd->columnName + " (NULL value not supported)");
3403  }
3404  }
3405 
3406  for (auto it = import_buffers.begin(); it < import_buffers.end(); ++it) {
3407  auto& import_buffer = *it;
3408  if (cd->columnId == import_buffer->getColumnDesc()->columnId) {
3409  if (coldef != nullptr ||
3410  skip_physical_cols-- <= 0) { // skip non-null phy col
3411  import_buffer->add_value(
3412  cd, defaultval, isnull, import_export::CopyParams(), nrows);
3413  if (cd->columnType.is_geometry()) {
3414  std::vector<double> coords, bounds;
3415  std::vector<int> ring_sizes, poly_rings;
3416  int render_group = 0;
3417  SQLTypeInfo tinfo{cd->columnType};
3419  tinfo,
3420  coords,
3421  bounds,
3422  ring_sizes,
3423  poly_rings,
3424  false)) {
3425  throw std::runtime_error("Bad geometry data: '" + defaultval + "'");
3426  }
3427  size_t col_idx = 1 + std::distance(import_buffers.begin(), it);
3429  cd,
3430  import_buffers,
3431  col_idx,
3432  coords,
3433  bounds,
3434  ring_sizes,
3435  poly_rings,
3436  render_group,
3437  nrows);
3438  // skip following phy cols
3439  skip_physical_cols = cd->columnType.get_physical_cols();
3440  }
3441  }
3442  break;
3443  }
3444  }
3445  }
3446  }
3447 
3448  if (!loader->loadNoCheckpoint(import_buffers, nrows)) {
3449  throw std::runtime_error("loadNoCheckpoint failed!");
3450  }
3451  catalog.roll(true);
3452  loader->checkpoint();
3453  catalog.getSqliteConnector().query("END TRANSACTION");
3454  } catch (...) {
3455  catalog.roll(false);
3456  catalog.getSqliteConnector().query("ROLLBACK TRANSACTION");
3457  throw;
3458  }
3459 }
3460 
3461 void DropColumnStmt::execute(const Catalog_Namespace::SessionInfo& session) {
3462  auto& catalog = session.getCatalog();
3463 
3464  // TODO(adb): the catalog should be handling this locking.
3465  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
3468 
3469  const auto td_with_lock =
3471  catalog, *table, true);
3472  const auto td = td_with_lock();
3473  if (!td) {
3474  throw std::runtime_error("Table " + *table + " does not exist.");
3475  }
3476  if (td->isView) {
3477  throw std::runtime_error("Dropping a column from a view is not supported.");
3478  }
3479  if (table_is_temporary(td)) {
3480  throw std::runtime_error(
3481  "Dropping a column from a temporary table is not yet supported.");
3482  }
3483 
3484  check_alter_table_privilege(session, td);
3485 
3486  for (const auto& column : columns) {
3487  if (nullptr == catalog.getMetadataForColumn(td->tableId, *column)) {
3488  throw std::runtime_error("Column " + *column + " does not exist.");
3489  }
3490  }
3491 
3492  if (td->nColumns <= (td->hasDeletedCol ? 3 : 2)) {
3493  throw std::runtime_error("Table " + *table + " has only one column.");
3494  }
3495 
3496  catalog.getSqliteConnector().query("BEGIN TRANSACTION");
3497  try {
3498  std::vector<int> columnIds;
3499  for (const auto& column : columns) {
3500  ColumnDescriptor cd = *catalog.getMetadataForColumn(td->tableId, *column);
3501  if (td->nShards > 0 && td->shardedColumnId == cd.columnId) {
3502  throw std::runtime_error("Dropping sharding column " + cd.columnName +
3503  " is not supported.");
3504  }
3505  catalog.dropColumn(*td, cd);
3506  columnIds.push_back(cd.columnId);
3507  for (int i = 0; i < cd.columnType.get_physical_cols(); i++) {
3508  const auto pcd = catalog.getMetadataForColumn(td->tableId, cd.columnId + i + 1);
3509  CHECK(pcd);
3510  catalog.dropColumn(*td, *pcd);
3511  columnIds.push_back(cd.columnId + i + 1);
3512  }
3513  }
3514 
3515  for (auto shard : catalog.getPhysicalTablesDescriptors(td)) {
3516  shard->fragmenter->dropColumns(columnIds);
3517  }
3518  // if test forces to rollback
3520  throw std::runtime_error("lol!");
3521  }
3522  catalog.roll(true);
3523  if (td->persistenceLevel == Data_Namespace::MemoryLevel::DISK_LEVEL) {
3524  catalog.checkpoint(td->tableId);
3525  }
3526  catalog.getSqliteConnector().query("END TRANSACTION");
3527  } catch (...) {
3528  catalog.setForReload(td->tableId);
3529  catalog.roll(false);
3530  catalog.getSqliteConnector().query("ROLLBACK TRANSACTION");
3531  throw;
3532  }
3533 
3534  // invalidate cached hashtable
3536 }
3537 
3538 void RenameColumnStmt::execute(const Catalog_Namespace::SessionInfo& session) {
3539  auto& catalog = session.getCatalog();
3540 
3541  const auto td_with_lock =
3543  catalog, *table, false);
3544  const auto td = td_with_lock();
3545  CHECK(td);
3546 
3547  check_alter_table_privilege(session, td);
3548  const ColumnDescriptor* cd = catalog.getMetadataForColumn(td->tableId, *column);
3549  if (cd == nullptr) {
3550  throw std::runtime_error("Column " + *column + " does not exist.");
3551  }
3552  if (catalog.getMetadataForColumn(td->tableId, *new_column_name) != nullptr) {
3553  throw std::runtime_error("Column " + *new_column_name + " already exists.");
3554  }
3555  catalog.renameColumn(td, cd, *new_column_name);
3556 }
3557 
3558 void CopyTableStmt::execute(const Catalog_Namespace::SessionInfo& session) {
3559  auto importer_factory = [](Catalog_Namespace::Catalog& catalog,
3560  const TableDescriptor* td,
3561  const std::string& file_path,
3562  const import_export::CopyParams& copy_params) {
3563  return std::make_unique<import_export::Importer>(catalog, td, file_path, copy_params);
3564  };
3565  return execute(session, importer_factory);
3566 }
3567 
3568 void CopyTableStmt::execute(const Catalog_Namespace::SessionInfo& session,
3569  const std::function<std::unique_ptr<import_export::Importer>(
3571  const TableDescriptor*,
3572  const std::string&,
3573  const import_export::CopyParams&)>& importer_factory) {
3574  boost::regex non_local_file_regex{R"(^\s*(s3|http|https)://.+)",
3575  boost::regex::extended | boost::regex::icase};
3576  if (!boost::regex_match(*file_pattern, non_local_file_regex)) {
3578  *file_pattern, ddl_utils::DataTransferType::IMPORT, true);
3579  }
3580 
3581  size_t rows_completed = 0;
3582  size_t rows_rejected = 0;
3583  size_t total_time = 0;
3584  bool load_truncated = false;
3585 
3586  // Prevent simultaneous import / truncate (see TruncateTableStmt::execute)
3587  const auto execute_read_lock = mapd_shared_lock<mapd_shared_mutex>(
3590 
3591  const TableDescriptor* td{nullptr};
3592  std::unique_ptr<lockmgr::TableSchemaLockContainer<lockmgr::ReadLock>> td_with_lock;
3593  std::unique_ptr<lockmgr::WriteLock> insert_data_lock;
3594 
3595  auto& catalog = session.getCatalog();
3596 
3597  try {
3598  td_with_lock = std::make_unique<lockmgr::TableSchemaLockContainer<lockmgr::ReadLock>>(
3600  catalog, *table));
3601  td = (*td_with_lock)();
3602  insert_data_lock = std::make_unique<lockmgr::WriteLock>(
3604  } catch (const std::runtime_error& e) {
3605  // noop
3606  // TODO(adb): We're really only interested in whether the table exists or not.
3607  // Create a more refined exception.
3608  }
3609 
3610  // if the table already exists, it's locked, so check access privileges
3611  if (td) {
3612  std::vector<DBObject> privObjects;
3613  DBObject dbObject(*table, TableDBObjectType);
3614  dbObject.loadKey(catalog);
3616  privObjects.push_back(dbObject);
3617  if (!SysCatalog::instance().checkPrivileges(session.get_currentUser(), privObjects)) {
3618  throw std::runtime_error("Violation of access privileges: user " +
3619  session.get_currentUser().userName +
3620  " has no insert privileges for table " + *table + ".");
3621  }
3622  }
3623 
3624  // since we'll have not only posix file names but also s3/hdfs/... url
3625  // we do not expand wildcard or check file existence here.
3626  // from here on, file_path contains something which may be a url
3627  // or a wildcard of file names;
3628  std::string file_path = *file_pattern;
3629  import_export::CopyParams copy_params;
3630  if (!options.empty()) {
3631  for (auto& p : options) {
3632  if (boost::iequals(*p->get_name(), "max_reject")) {
3633  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
3634  if (int_literal == nullptr) {
3635  throw std::runtime_error("max_reject option must be an integer.");
3636  }
3637  copy_params.max_reject = int_literal->get_intval();
3638  } else if (boost::iequals(*p->get_name(), "buffer_size")) {
3639  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
3640  if (int_literal == nullptr) {
3641  throw std::runtime_error("buffer_size option must be an integer.");
3642  }
3643  copy_params.buffer_size = int_literal->get_intval();
3644  } else if (boost::iequals(*p->get_name(), "threads")) {
3645  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
3646  if (int_literal == nullptr) {
3647  throw std::runtime_error("Threads option must be an integer.");
3648  }
3649  copy_params.threads = int_literal->get_intval();
3650  } else if (boost::iequals(*p->get_name(), "delimiter")) {
3651  const StringLiteral* str_literal =
3652  dynamic_cast<const StringLiteral*>(p->get_value());
3653  if (str_literal == nullptr) {
3654  throw std::runtime_error("Delimiter option must be a string.");
3655  } else if (str_literal->get_stringval()->length() != 1) {
3656  throw std::runtime_error("Delimiter must be a single character string.");
3657  }
3658  copy_params.delimiter = (*str_literal->get_stringval())[0];
3659  } else if (boost::iequals(*p->get_name(), "nulls")) {
3660  const StringLiteral* str_literal =
3661  dynamic_cast<const StringLiteral*>(p->get_value());
3662  if (str_literal == nullptr) {
3663  throw std::runtime_error("Nulls option must be a string.");
3664  }
3665  copy_params.null_str = *str_literal->get_stringval();
3666  } else if (boost::iequals(*p->get_name(), "header")) {
3667  const StringLiteral* str_literal =
3668  dynamic_cast<const StringLiteral*>(p->get_value());
3669  if (str_literal == nullptr) {
3670  throw std::runtime_error("Header option must be a boolean.");
3671  }
3672  copy_params.has_header = bool_from_string_literal(str_literal)
3675 #ifdef ENABLE_IMPORT_PARQUET
3676  } else if (boost::iequals(*p->get_name(), "parquet")) {
3677  const StringLiteral* str_literal =
3678  dynamic_cast<const StringLiteral*>(p->get_value());
3679  if (str_literal == nullptr) {
3680  throw std::runtime_error("Parquet option must be a boolean.");
3681  }
3682  if (bool_from_string_literal(str_literal)) {
3683  // not sure a parquet "table" type is proper, but to make code
3684  // look consistent in some places, let's set "table" type too
3685  copy_params.file_type = import_export::FileType::PARQUET;
3686  }
3687 #endif // ENABLE_IMPORT_PARQUET
3688  } else if (boost::iequals(*p->get_name(), "s3_access_key")) {
3689  const StringLiteral* str_literal =
3690  dynamic_cast<const StringLiteral*>(p->get_value());
3691  if (str_literal == nullptr) {
3692  throw std::runtime_error("Option s3_access_key must be a string.");
3693  }
3694  copy_params.s3_access_key = *str_literal->get_stringval();
3695  } else if (boost::iequals(*p->get_name(), "s3_secret_key")) {
3696  const StringLiteral* str_literal =
3697  dynamic_cast<const StringLiteral*>(p->get_value());
3698  if (str_literal == nullptr) {
3699  throw std::runtime_error("Option s3_secret_key must be a string.");
3700  }
3701  copy_params.s3_secret_key = *str_literal->get_stringval();
3702  } else if (boost::iequals(*p->get_name(), "s3_region")) {
3703  const StringLiteral* str_literal =
3704  dynamic_cast<const StringLiteral*>(p->get_value());
3705  if (str_literal == nullptr) {
3706  throw std::runtime_error("Option s3_region must be a string.");
3707  }
3708  copy_params.s3_region = *str_literal->get_stringval();
3709  } else if (boost::iequals(*p->get_name(), "s3_endpoint")) {
3710  const StringLiteral* str_literal =
3711  dynamic_cast<const StringLiteral*>(p->get_value());
3712  if (str_literal == nullptr) {
3713  throw std::runtime_error("Option s3_endpoint must be a string.");
3714  }
3715  copy_params.s3_endpoint = *str_literal->get_stringval();
3716  } else if (boost::iequals(*p->get_name(), "quote")) {
3717  const StringLiteral* str_literal =
3718  dynamic_cast<const StringLiteral*>(p->get_value());
3719  if (str_literal == nullptr) {
3720  throw std::runtime_error("Quote option must be a string.");
3721  } else if (str_literal->get_stringval()->length() != 1) {
3722  throw std::runtime_error("Quote must be a single character string.");
3723  }
3724  copy_params.quote = (*str_literal->get_stringval())[0];
3725  } else if (boost::iequals(*p->get_name(), "escape")) {
3726  const StringLiteral* str_literal =
3727  dynamic_cast<const StringLiteral*>(p->get_value());
3728  if (str_literal == nullptr) {
3729  throw std::runtime_error("Escape option must be a string.");
3730  } else if (str_literal->get_stringval()->length() != 1) {
3731  throw std::runtime_error("Escape must be a single character string.");
3732  }
3733  copy_params.escape = (*str_literal->get_stringval())[0];
3734  } else if (boost::iequals(*p->get_name(), "line_delimiter")) {
3735  const StringLiteral* str_literal =
3736  dynamic_cast<const StringLiteral*>(p->get_value());
3737  if (str_literal == nullptr) {
3738  throw std::runtime_error("Line_delimiter option must be a string.");
3739  } else if (str_literal->get_stringval()->length() != 1) {
3740  throw std::runtime_error("Line_delimiter must be a single character string.");
3741  }
3742  copy_params.line_delim = (*str_literal->get_stringval())[0];
3743  } else if (boost::iequals(*p->get_name(), "quoted")) {
3744  const StringLiteral* str_literal =
3745  dynamic_cast<const StringLiteral*>(p->get_value());
3746  if (str_literal == nullptr) {
3747  throw std::runtime_error("Quoted option must be a boolean.");
3748  }
3749  copy_params.quoted = bool_from_string_literal(str_literal);
3750  } else if (boost::iequals(*p->get_name(), "plain_text")) {
3751  const StringLiteral* str_literal =
3752  dynamic_cast<const StringLiteral*>(p->get_value());
3753  if (str_literal == nullptr) {
3754  throw std::runtime_error("plain_text option must be a boolean.");
3755  }
3756  copy_params.plain_text = bool_from_string_literal(str_literal);
3757  } else if (boost::iequals(*p->get_name(), "array_marker")) {
3758  const StringLiteral* str_literal =
3759  dynamic_cast<const StringLiteral*>(p->get_value());
3760  if (str_literal == nullptr) {
3761  throw std::runtime_error("Array Marker option must be a string.");
3762  } else if (str_literal->get_stringval()->length() != 2) {
3763  throw std::runtime_error(
3764  "Array Marker option must be exactly two characters. Default is {}.");
3765  }
3766  copy_params.array_begin = (*str_literal->get_stringval())[0];
3767  copy_params.array_end = (*str_literal->get_stringval())[1];
3768  } else if (boost::iequals(*p->get_name(), "array_delimiter")) {
3769  const StringLiteral* str_literal =
3770  dynamic_cast<const StringLiteral*>(p->get_value());
3771  if (str_literal == nullptr) {
3772  throw std::runtime_error("Array Delimiter option must be a string.");
3773  } else if (str_literal->get_stringval()->length() != 1) {
3774  throw std::runtime_error("Array Delimiter must be a single character string.");
3775  }
3776  copy_params.array_delim = (*str_literal->get_stringval())[0];
3777  } else if (boost::iequals(*p->get_name(), "lonlat")) {
3778  const StringLiteral* str_literal =
3779  dynamic_cast<const StringLiteral*>(p->get_value());
3780  if (str_literal == nullptr) {
3781  throw std::runtime_error("Lonlat option must be a boolean.");
3782  }
3783  copy_params.lonlat = bool_from_string_literal(str_literal);
3784  } else if (boost::iequals(*p->get_name(), "geo")) {
3785  const StringLiteral* str_literal =
3786  dynamic_cast<const StringLiteral*>(p->get_value());
3787  if (str_literal == nullptr) {
3788  throw std::runtime_error("Geo option must be a boolean.");
3789  }
3790  copy_params.file_type = bool_from_string_literal(str_literal)
3793  } else if (boost::iequals(*p->get_name(), "geo_coords_type")) {
3794  const StringLiteral* str_literal =
3795  dynamic_cast<const StringLiteral*>(p->get_value());
3796  if (str_literal == nullptr) {
3797  throw std::runtime_error("'geo_coords_type' option must be a string");
3798  }
3799  const std::string* s = str_literal->get_stringval();
3800  if (boost::iequals(*s, "geography")) {
3801  throw std::runtime_error(
3802  "GEOGRAPHY coords type not yet supported. Please use GEOMETRY.");
3803  // copy_params.geo_coords_type = kGEOGRAPHY;
3804  } else if (boost::iequals(*s, "geometry")) {
3805  copy_params.geo_coords_type = kGEOMETRY;
3806  } else {
3807  throw std::runtime_error(
3808  "Invalid string for 'geo_coords_type' option (must be 'GEOGRAPHY' or "
3809  "'GEOMETRY'): " +
3810  *s);
3811  }
3812  } else if (boost::iequals(*p->get_name(), "geo_coords_encoding")) {
3813  const StringLiteral* str_literal =
3814  dynamic_cast<const StringLiteral*>(p->get_value());
3815  if (str_literal == nullptr) {
3816  throw std::runtime_error("'geo_coords_encoding' option must be a string");
3817  }
3818  const std::string* s = str_literal->get_stringval();
3819  if (boost::iequals(*s, "none")) {
3820  copy_params.geo_coords_encoding = kENCODING_NONE;
3821  copy_params.geo_coords_comp_param = 0;
3822  } else if (boost::iequals(*s, "compressed(32)")) {
3823  copy_params.geo_coords_encoding = kENCODING_GEOINT;
3824  copy_params.geo_coords_comp_param = 32;
3825  } else {
3826  throw std::runtime_error(
3827  "Invalid string for 'geo_coords_encoding' option (must be 'NONE' or "
3828  "'COMPRESSED(32)'): " +
3829  *s);
3830  }
3831  } else if (boost::iequals(*p->get_name(), "geo_coords_srid")) {
3832  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
3833  if (int_literal == nullptr) {
3834  throw std::runtime_error("'geo_coords_srid' option must be an integer");
3835  }
3836  const int srid = int_literal->get_intval();
3837  if (srid == 4326 || srid == 3857 || srid == 900913) {
3838  copy_params.geo_coords_srid = srid;
3839  } else {
3840  throw std::runtime_error(
3841  "Invalid value for 'geo_coords_srid' option (must be 4326, 3857, or "
3842  "900913): " +
3843  std::to_string(srid));
3844  }
3845  } else if (boost::iequals(*p->get_name(), "geo_layer_name")) {
3846  const StringLiteral* str_literal =
3847  dynamic_cast<const StringLiteral*>(p->get_value());
3848  if (str_literal == nullptr) {
3849  throw std::runtime_error("'geo_layer_name' option must be a string");
3850  }
3851  const std::string* layer_name = str_literal->get_stringval();
3852  if (layer_name) {
3853  copy_params.geo_layer_name = *layer_name;
3854  } else {
3855  throw std::runtime_error("Invalid value for 'geo_layer_name' option");
3856  }
3857  } else if (boost::iequals(*p->get_name(), "partitions")) {
3858  if (copy_params.file_type == import_export::FileType::POLYGON) {
3859  const auto partitions =
3860  static_cast<const StringLiteral*>(p->get_value())->get_stringval();
3861  CHECK(partitions);
3862  const auto partitions_uc = boost::to_upper_copy<std::string>(*partitions);
3863  if (partitions_uc != "REPLICATED") {
3864  throw std::runtime_error("PARTITIONS must be REPLICATED for geo COPY");
3865  }
3866  _geo_copy_from_partitions = partitions_uc;
3867  } else {
3868  throw std::runtime_error("PARTITIONS option not supported for non-geo COPY: " +
3869  *p->get_name());
3870  }
3871  } else if (boost::iequals(*p->get_name(), "geo_assign_render_groups")) {
3872  const StringLiteral* str_literal =
3873  dynamic_cast<const StringLiteral*>(p->get_value());
3874  if (str_literal == nullptr) {
3875  throw std::runtime_error("geo_assign_render_groups option must be a boolean.");
3876  }
3877  copy_params.geo_assign_render_groups = bool_from_string_literal(str_literal);
3878  } else if (boost::iequals(*p->get_name(), "geo_explode_collections")) {
3879  const StringLiteral* str_literal =
3880  dynamic_cast<const StringLiteral*>(p->get_value());
3881  if (str_literal == nullptr) {
3882  throw std::runtime_error("geo_explode_collections option must be a boolean.");
3883  }
3884  copy_params.geo_explode_collections = bool_from_string_literal(str_literal);
3885  } else {
3886  throw std::runtime_error("Invalid option for COPY: " + *p->get_name());
3887  }
3888  }
3889  }
3890 
3891  std::string tr;
3892  if (copy_params.file_type == import_export::FileType::POLYGON) {
3893  // geo import
3894  // we do nothing here, except stash the parameters so we can
3895  // do the import when we unwind to the top of the handler
3896  _geo_copy_from_file_name = file_path;
3897  _geo_copy_from_copy_params = copy_params;
3898  _was_geo_copy_from = true;
3899 
3900  // the result string
3901  // @TODO simon.eves put something more useful in here
3902  // except we really can't because we haven't done the import yet!
3903  if (td) {
3904  tr = std::string("Appending geo to table '") + *table + std::string("'...");
3905  } else {
3906  tr = std::string("Creating table '") + *table +
3907  std::string("' and importing geo...");
3908  }
3909  } else {
3910  if (td) {
3911  CHECK(td_with_lock);
3912 
3913  // regular import
3914  auto importer = importer_factory(catalog, td, file_path, copy_params);
3915  auto ms = measure<>::execution([&]() {
3916  auto res = importer->import();
3917  rows_completed += res.rows_completed;
3918  rows_rejected += res.rows_rejected;
3919  load_truncated = res.load_truncated;
3920  });
3921  total_time += ms;
3922 
3923  // results
3924  if (load_truncated || rows_rejected > copy_params.max_reject) {
3925  LOG(ERROR) << "COPY exited early due to reject records count during multi file "
3926  "processing ";
3927  // if we have crossed the truncated load threshold
3928  load_truncated = true;
3929  success = false;
3930  }
3931  if (!load_truncated) {
3932  tr = std::string("Loaded: " + std::to_string(rows_completed) +
3933  " recs, Rejected: " + std::to_string(rows_rejected) +
3934  " recs in " + std::to_string((double)total_time / 1000.0) +
3935  " secs");
3936  } else {
3937  tr = std::string("Loader truncated due to reject count. Processed : " +
3938  std::to_string(rows_completed) + " recs, Rejected: " +
3939  std::to_string(rows_rejected) + " recs in " +
3940  std::to_string((double)total_time / 1000.0) + " secs");
3941  }
3942  } else {
3943  throw std::runtime_error("Table '" + *table + "' must exist before COPY FROM");
3944  }
3945  }
3946 
3947  return_message.reset(new std::string(tr));
3948  LOG(INFO) << tr;
3949 }
3950 
3951 // CREATE ROLE payroll_dept_role;
3952 void CreateRoleStmt::execute(const Catalog_Namespace::SessionInfo& session) {
3953  const auto& currentUser = session.get_currentUser();
3954  if (!currentUser.isSuper) {
3955  throw std::runtime_error("CREATE ROLE " + get_role() +
3956  " failed. It can only be executed by super user.");
3957  }
3958  SysCatalog::instance().createRole(get_role());
3959 }
3960 
3961 // DROP ROLE payroll_dept_role;
3962 void DropRoleStmt::execute(const Catalog_Namespace::SessionInfo& session) {
3963  const auto& currentUser = session.get_currentUser();
3964  if (!currentUser.isSuper) {
3965  throw std::runtime_error("DROP ROLE " + get_role() +
3966  " failed. It can only be executed by super user.");
3967  }
3968  auto* rl = SysCatalog::instance().getRoleGrantee(get_role());
3969  if (!rl) {
3970  throw std::runtime_error("DROP ROLE " + get_role() +
3971  " failed because role with this name does not exist.");
3972  }
3973  SysCatalog::instance().dropRole(get_role());
3974 }
3975 
3976 std::vector<std::string> splitObjectHierName(const std::string& hierName) {
3977  std::vector<std::string> componentNames;
3978  boost::split(componentNames, hierName, boost::is_any_of("."));
3979  return componentNames;
3980 }
3981 
3982 std::string extractObjectNameFromHierName(const std::string& objectHierName,
3983  const std::string& objectType,
3985  std::string objectName;
3986  std::vector<std::string> componentNames = splitObjectHierName(objectHierName);
3987  if (objectType.compare("DATABASE") == 0) {
3988  if (componentNames.size() == 1) {
3989  objectName = componentNames[0];
3990  } else {
3991  throw std::runtime_error("DB object name is not correct " + objectHierName);
3992  }
3993  } else {
3994  if (objectType.compare("TABLE") == 0 || objectType.compare("DASHBOARD") == 0 ||
3995  objectType.compare("VIEW") == 0 || objectType.compare("SERVER") == 0) {
3996  switch (componentNames.size()) {
3997  case (1): {
3998  objectName = componentNames[0];
3999  break;
4000  }
4001  case (2): {
4002  objectName = componentNames[1];
4003  break;
4004  }
4005  default: {
4006  throw std::runtime_error("DB object name is not correct " + objectHierName);
4007  }
4008  }
4009  } else {
4010  throw std::runtime_error("DB object type " + objectType + " is not supported.");
4011  }
4012  }
4013  return objectName;
4014 }
4015 
4016 static std::pair<AccessPrivileges, DBObjectType> parseStringPrivs(
4017  const std::string& privs,
4018  const DBObjectType& objectType,
4019  const std::string& object_name) {
4020  static const std::map<std::pair<const std::string, const DBObjectType>,
4021  std::pair<const AccessPrivileges, const DBObjectType>>
4022  privileges_lookup{
4023  {{"ALL"s, DatabaseDBObjectType},
4024  {AccessPrivileges::ALL_DATABASE, DatabaseDBObjectType}},
4025  {{"ALL"s, TableDBObjectType}, {AccessPrivileges::ALL_TABLE, TableDBObjectType}},
4026  {{"ALL"s, DashboardDBObjectType},
4027  {AccessPrivileges::ALL_DASHBOARD, DashboardDBObjectType}},
4028  {{"ALL"s, ViewDBObjectType}, {AccessPrivileges::ALL_VIEW, ViewDBObjectType}},
4029  {{"ALL"s, ServerDBObjectType},
4030  {AccessPrivileges::ALL_SERVER, ServerDBObjectType}},
4031 
4032  {{"CREATE TABLE"s, DatabaseDBObjectType},
4033  {AccessPrivileges::CREATE_TABLE, TableDBObjectType}},
4034  {{"CREATE"s, DatabaseDBObjectType},
4035  {AccessPrivileges::CREATE_TABLE, TableDBObjectType}},
4036  {{"SELECT"s, DatabaseDBObjectType},
4037  {AccessPrivileges::SELECT_FROM_TABLE, TableDBObjectType}},
4038  {{"INSERT"s, DatabaseDBObjectType},
4039  {AccessPrivileges::INSERT_INTO_TABLE, TableDBObjectType}},
4040  {{"TRUNCATE"s, DatabaseDBObjectType},
4041  {AccessPrivileges::TRUNCATE_TABLE, TableDBObjectType}},
4042  {{"UPDATE"s, DatabaseDBObjectType},
4043  {AccessPrivileges::UPDATE_IN_TABLE, TableDBObjectType}},
4044  {{"DELETE"s, DatabaseDBObjectType},
4045  {AccessPrivileges::DELETE_FROM_TABLE, TableDBObjectType}},
4046  {{"DROP"s, DatabaseDBObjectType},
4047  {AccessPrivileges::DROP_TABLE, TableDBObjectType}},
4048  {{"ALTER"s, DatabaseDBObjectType},
4049  {AccessPrivileges::ALTER_TABLE, TableDBObjectType}},
4050 
4051  {{"SELECT"s, TableDBObjectType},
4052  {AccessPrivileges::SELECT_FROM_TABLE, TableDBObjectType}},
4053  {{"INSERT"s, TableDBObjectType},
4054  {AccessPrivileges::INSERT_INTO_TABLE, TableDBObjectType}},
4055  {{"TRUNCATE"s, TableDBObjectType},
4056  {AccessPrivileges::TRUNCATE_TABLE, TableDBObjectType}},
4057  {{"UPDATE"s, TableDBObjectType},
4058  {AccessPrivileges::UPDATE_IN_TABLE, TableDBObjectType}},
4059  {{"DELETE"s, TableDBObjectType},
4060  {AccessPrivileges::DELETE_FROM_TABLE, TableDBObjectType}},
4061  {{"DROP"s, TableDBObjectType},
4062  {AccessPrivileges::DROP_TABLE, TableDBObjectType}},
4063  {{"ALTER"s, TableDBObjectType},
4064  {AccessPrivileges::ALTER_TABLE, TableDBObjectType}},
4065 
4066  {{"CREATE VIEW"s, DatabaseDBObjectType},
4067  {AccessPrivileges::CREATE_VIEW, ViewDBObjectType}},
4068  {{"SELECT VIEW"s, DatabaseDBObjectType},
4069  {AccessPrivileges::SELECT_FROM_VIEW, ViewDBObjectType}},
4070  {{"DROP VIEW"s, DatabaseDBObjectType},
4071  {AccessPrivileges::DROP_VIEW, ViewDBObjectType}},
4072  {{"SELECT"s, ViewDBObjectType},
4073  {AccessPrivileges::SELECT_FROM_VIEW, ViewDBObjectType}},
4074  {{"DROP"s, ViewDBObjectType}, {AccessPrivileges::DROP_VIEW, ViewDBObjectType}},
4075 
4076  {{"CREATE DASHBOARD"s, DatabaseDBObjectType},
4077  {AccessPrivileges::CREATE_DASHBOARD, DashboardDBObjectType}},
4078  {{"EDIT DASHBOARD"s, DatabaseDBObjectType},
4079  {AccessPrivileges::EDIT_DASHBOARD, DashboardDBObjectType}},
4080  {{"VIEW DASHBOARD"s, DatabaseDBObjectType},
4081  {AccessPrivileges::VIEW_DASHBOARD, DashboardDBObjectType}},
4082  {{"DELETE DASHBOARD"s, DatabaseDBObjectType},
4083  {AccessPrivileges::DELETE_DASHBOARD, DashboardDBObjectType}},
4084  {{"VIEW"s, DashboardDBObjectType},
4085  {AccessPrivileges::VIEW_DASHBOARD, DashboardDBObjectType}},
4086  {{"EDIT"s, DashboardDBObjectType},
4087  {AccessPrivileges::EDIT_DASHBOARD, DashboardDBObjectType}},
4088  {{"DELETE"s, DashboardDBObjectType},
4089  {AccessPrivileges::DELETE_DASHBOARD, DashboardDBObjectType}},
4090 
4091  {{"CREATE SERVER"s, DatabaseDBObjectType},
4092  {AccessPrivileges::CREATE_SERVER, ServerDBObjectType}},
4093  {{"DROP SERVER"s, DatabaseDBObjectType},
4094  {AccessPrivileges::DROP_SERVER, ServerDBObjectType}},
4095  {{"DROP"s, ServerDBObjectType},
4096  {AccessPrivileges::DROP_SERVER, ServerDBObjectType}},
4097 
4098  {{"VIEW SQL EDITOR"s, DatabaseDBObjectType},
4099  {AccessPrivileges::VIEW_SQL_EDITOR, DatabaseDBObjectType}},
4100  {{"ACCESS"s, DatabaseDBObjectType},
4101  {AccessPrivileges::ACCESS, DatabaseDBObjectType}}};
4102 
4103  auto result = privileges_lookup.find(std::make_pair(privs, objectType));
4104  if (result == privileges_lookup.end()) {
4105  throw std::runtime_error("Privileges " + privs + " on DB object " + object_name +
4106  " are not correct.");
4107  }
4108  return result->second;
4109 }
4110 
4111 static DBObject createObject(const std::string& objectName, DBObjectType objectType) {
4112  if (objectType == DashboardDBObjectType) {
4113  int32_t dashboard_id = -1;
4114  if (!objectName.empty()) {
4115  try {
4116  dashboard_id = stoi(objectName);
4117  } catch (const std::exception&) {
4118  throw std::runtime_error(
4119  "Privileges on dashboards should be changed via integer dashboard ID");
4120  }
4121  }
4122  return DBObject(dashboard_id, objectType);
4123  } else {
4124  return DBObject(objectName, objectType);
4125  }
4126 }
4127 
4128 // GRANT SELECT/INSERT/CREATE ON TABLE payroll_table TO payroll_dept_role;
4129 void GrantPrivilegesStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4130  auto& catalog = session.getCatalog();
4131  const auto& currentUser = session.get_currentUser();
4132  const auto parserObjectType = boost::to_upper_copy<std::string>(get_object_type());
4133  const auto objectName =
4134  extractObjectNameFromHierName(get_object(), parserObjectType, catalog);
4135  auto objectType = DBObjectTypeFromString(parserObjectType);
4136  if (objectType == ServerDBObjectType && !g_enable_fsi) {
4137  throw std::runtime_error("GRANT failed. SERVER object unrecognized.");
4138  }
4139  DBObject dbObject = createObject(objectName, objectType);
4140  /* verify object ownership if not suser */
4141  if (!currentUser.isSuper) {
4142  if (!SysCatalog::instance().verifyDBObjectOwnership(currentUser, dbObject, catalog)) {
4143  throw std::runtime_error(
4144  "GRANT failed. It can only be executed by super user or owner of the "
4145  "object.");
4146  }
4147  }
4148  /* set proper values of privileges & grant them to the object */
4149  std::vector<DBObject> objects(get_privs().size(), dbObject);
4150  for (size_t i = 0; i < get_privs().size(); ++i) {
4151  std::pair<AccessPrivileges, DBObjectType> priv = parseStringPrivs(
4152  boost::to_upper_copy<std::string>(get_privs()[i]), objectType, get_object());
4153  objects[i].setPrivileges(priv.first);
4154  objects[i].setPermissionType(priv.second);
4155  if (priv.second == ServerDBObjectType && !g_enable_fsi) {
4156  throw std::runtime_error("GRANT failed. SERVER object unrecognized.");
4157  }
4158  }
4159  SysCatalog::instance().grantDBObjectPrivilegesBatch(grantees, objects, catalog);
4160 }
4161 
4162 // REVOKE SELECT/INSERT/CREATE ON TABLE payroll_table FROM payroll_dept_role;
4163 void RevokePrivilegesStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4164  auto& catalog = session.getCatalog();
4165  const auto& currentUser = session.get_currentUser();
4166  const auto parserObjectType = boost::to_upper_copy<std::string>(get_object_type());
4167  const auto objectName =
4168  extractObjectNameFromHierName(get_object(), parserObjectType, catalog);
4169  auto objectType = DBObjectTypeFromString(parserObjectType);
4170  if (objectType == ServerDBObjectType && !g_enable_fsi) {
4171  throw std::runtime_error("REVOKE failed. SERVER object unrecognized.");
4172  }
4173  DBObject dbObject = createObject(objectName, objectType);
4174  /* verify object ownership if not suser */
4175  if (!currentUser.isSuper) {
4176  if (!SysCatalog::instance().verifyDBObjectOwnership(currentUser, dbObject, catalog)) {
4177  throw std::runtime_error(
4178  "REVOKE failed. It can only be executed by super user or owner of the "
4179  "object.");
4180  }
4181  }
4182  /* set proper values of privileges & grant them to the object */
4183  std::vector<DBObject> objects(get_privs().size(), dbObject);
4184  for (size_t i = 0; i < get_privs().size(); ++i) {
4185  std::pair<AccessPrivileges, DBObjectType> priv = parseStringPrivs(
4186  boost::to_upper_copy<std::string>(get_privs()[i]), objectType, get_object());
4187  objects[i].setPrivileges(priv.first);
4188  objects[i].setPermissionType(priv.second);
4189  if (priv.second == ServerDBObjectType && !g_enable_fsi) {
4190  throw std::runtime_error("REVOKE failed. SERVER object unrecognized.");
4191  }
4192  }
4193  SysCatalog::instance().revokeDBObjectPrivilegesBatch(grantees, objects, catalog);
4194 }
4195 
4196 // NOTE: not used currently, will we ever use it?
4197 // SHOW ON TABLE payroll_table FOR payroll_dept_role;
4198 void ShowPrivilegesStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4199  auto& catalog = session.getCatalog();
4200  const auto& currentUser = session.get_currentUser();
4201  const auto parserObjectType = boost::to_upper_copy<std::string>(get_object_type());
4202  const auto objectName =
4203  extractObjectNameFromHierName(get_object(), parserObjectType, catalog);
4204  auto objectType = DBObjectTypeFromString(parserObjectType);
4205  DBObject dbObject = createObject(objectName, objectType);
4206  /* verify object ownership if not suser */
4207  if (!currentUser.isSuper) {
4208  if (!SysCatalog::instance().verifyDBObjectOwnership(currentUser, dbObject, catalog)) {
4209  throw std::runtime_error(
4210  "SHOW ON " + get_object() + " FOR " + get_role() +
4211  " failed. It can only be executed by super user or owner of the object.");
4212  }
4213  }
4214  /* get values of privileges for the object and report them */
4215  SysCatalog::instance().getDBObjectPrivileges(get_role(), dbObject, catalog);
4216  AccessPrivileges privs = dbObject.getPrivileges();
4217  printf("\nPRIVILEGES ON %s FOR %s ARE SET AS FOLLOWING: ",
4218  get_object().c_str(),
4219  get_role().c_str());
4220 
4221  if (objectType == DBObjectType::DatabaseDBObjectType) {
4223  printf(" CREATE");
4224  }
4226  printf(" DROP");
4227  }
4228  } else if (objectType == DBObjectType::TableDBObjectType) {
4230  printf(" CREATE");
4231  }
4233  printf(" DROP");
4234  }
4236  printf(" SELECT");
4237  }
4239  printf(" INSERT");
4240  }
4242  printf(" UPDATE");
4243  }
4245  printf(" DELETE");
4246  }
4248  printf(" TRUNCATE");
4249  }
4251  printf(" ALTER");
4252  }
4253  } else if (objectType == DBObjectType::DashboardDBObjectType) {
4255  printf(" CREATE");
4256  }
4258  printf(" DELETE");
4259  }
4261  printf(" VIEW");
4262  }
4264  printf(" EDIT");
4265  }
4266  } else if (objectType == DBObjectType::ViewDBObjectType) {
4268  printf(" CREATE");
4269  }
4271  printf(" DROP");
4272  }
4274  printf(" SELECT");
4275  }
4277  printf(" INSERT");
4278  }
4280  printf(" UPDATE");
4281  }
4283  printf(" DELETE");
4284  }
4285  }
4286  printf(".\n");
4287 }
4288 
4289 // GRANT payroll_dept_role TO joe;
4290 void GrantRoleStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4291  const auto& currentUser = session.get_currentUser();
4292  if (!currentUser.isSuper) {
4293  throw std::runtime_error(
4294  "GRANT failed, because it can only be executed by super user.");
4295  }
4296  if (std::find(get_grantees().begin(), get_grantees().end(), OMNISCI_ROOT_USER) !=
4297  get_grantees().end()) {
4298  throw std::runtime_error(
4299  "Request to grant role failed because mapd root user has all privileges by "
4300  "default.");
4301  }
4302  SysCatalog::instance().grantRoleBatch(get_roles(), get_grantees());
4303 }
4304 
4305 // REVOKE payroll_dept_role FROM joe;get_users
4306 void RevokeRoleStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4307  const auto& currentUser = session.get_currentUser();
4308  if (!currentUser.isSuper) {
4309  throw std::runtime_error(
4310  "REVOKE failed, because it can only be executed by super user.");
4311  }
4312  if (std::find(get_grantees().begin(), get_grantees().end(), OMNISCI_ROOT_USER) !=
4313  get_grantees().end()) {
4314  throw std::runtime_error(
4315  "Request to revoke role failed because privileges can not be revoked from mapd "
4316  "root user.");
4317  }
4318  SysCatalog::instance().revokeRoleBatch(get_roles(), get_grantees());
4319 }
4320 
4321 void ShowCreateTableStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4322  using namespace Catalog_Namespace;
4323 
4324  const auto execute_read_lock = mapd_shared_lock<mapd_shared_mutex>(
4327 
4328  auto& catalog = session.getCatalog();
4329  const TableDescriptor* td = catalog.getMetadataForTable(*table_);
4330  if (!td) {
4331  throw std::runtime_error("Table/View " + *table_ + " does not exist.");
4332  }
4333 
4335  dbObject.loadKey(catalog);
4336  std::vector<DBObject> privObjects = {dbObject};
4337 
4338  if (!SysCatalog::instance().hasAnyPrivileges(session.get_currentUser(), privObjects)) {
4339  throw std::runtime_error("Table/View " + *table_ + " does not exist.");
4340  }
4341  if (td->isView && !session.get_currentUser().isSuper) {
4342  // TODO: we need to run a validate query to ensure the user has access to the
4343  // underlying table, but we do not have any of the machinery in here. Disable for now,
4344  // unless the current user is a super user.
4345  throw std::runtime_error("SHOW CREATE TABLE not yet supported for views");
4346  }
4347 
4348  create_stmt_ = catalog.dumpCreateTable(td);
4349 }
4350 
4351 void ExportQueryStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4352  auto session_copy = session;
4353  auto session_ptr = std::shared_ptr<Catalog_Namespace::SessionInfo>(
4354  &session_copy, boost::null_deleter());
4355  auto query_state = query_state::QueryState::create(session_ptr, *select_stmt);
4356  auto stdlog = STDLOG(query_state);
4357  auto query_state_proxy = query_state->createQueryStateProxy();
4358 
4359  auto& catalog = session.getCatalog();
4360 
4361  LocalConnector local_connector;
4362 
4363  if (!leafs_connector_) {
4364  leafs_connector_ = &local_connector;
4365  }
4366 
4367  import_export::CopyParams copy_params;
4368  // @TODO(se) move rest to CopyParams when we have a Thrift endpoint
4371  std::string layer_name;
4376 
4377  parseOptions(copy_params, file_type, layer_name, file_compression, array_null_handling);
4378 
4379  if (file_path->empty()) {
4380  throw std::runtime_error("Invalid file path for COPY TO");
4381  } else if (!boost::filesystem::path(*file_path).is_absolute()) {
4382  std::string file_name = boost::filesystem::path(*file_path).filename().string();
4383  std::string file_dir =
4384  catalog.getBasePath() + "/mapd_export/" + session.get_session_id() + "/";
4385  if (!boost::filesystem::exists(file_dir)) {
4386  if (!boost::filesystem::create_directories(file_dir)) {
4387  throw std::runtime_error("Directory " + file_dir + " cannot be created.");
4388  }
4389  }
4390  *file_path = file_dir + file_name;
4391  } else {
4392  // Above branch will create a new file in the mapd_export directory. If that path is
4393  // not exercised, go through applicable file path validations.
4396  }
4397 
4398  // get column info
4399  auto column_info_result =
4400  local_connector.query(query_state_proxy, *select_stmt, {}, true);
4401 
4402  // create exporter for requested file type
4403  auto query_exporter = import_export::QueryExporter::create(file_type);
4404 
4405  // default layer name to file path stem if it wasn't specified
4406  if (layer_name.size() == 0) {
4407  layer_name = boost::filesystem::path(*file_path).stem().string();
4408  }
4409 
4410  // begin export
4411  query_exporter->beginExport(*file_path,
4412  layer_name,
4413  copy_params,
4414  column_info_result.targets_meta,
4415  file_compression,
4416  array_null_handling);
4417 
4418  // how many fragments?
4419  size_t outer_frag_count =
4420  leafs_connector_->getOuterFragmentCount(query_state_proxy, *select_stmt);
4421  size_t outer_frag_end = outer_frag_count == 0 ? 1 : outer_frag_count;
4422 
4423  // loop fragments
4424  for (size_t outer_frag_idx = 0; outer_frag_idx < outer_frag_end; outer_frag_idx++) {
4425  // limit the query to just this fragment
4426  std::vector<size_t> allowed_outer_fragment_indices;
4427  if (outer_frag_count) {
4428  allowed_outer_fragment_indices.push_back(outer_frag_idx);
4429  }
4430 
4431  // run the query
4432  std::vector<AggregatedResult> query_results = leafs_connector_->query(
4433  query_state_proxy, *select_stmt, allowed_outer_fragment_indices);
4434 
4435  // export the results
4436  query_exporter->exportResults(query_results);
4437  }
4438 
4439  // end export
4440  query_exporter->endExport();
4441 }
4442 
4443 void ExportQueryStmt::parseOptions(
4444  import_export::CopyParams& copy_params,
4446  std::string& layer_name,
4448  import_export::QueryExporter::ArrayNullHandling& array_null_handling) {
4449  // defaults for non-CopyParams values
4451  layer_name.clear();
4453 
4454  if (!options.empty()) {
4455  for (auto& p : options) {
4456  if (boost::iequals(*p->get_name(), "delimiter")) {
4457  const StringLiteral* str_literal =
4458  dynamic_cast<const StringLiteral*>(p->get_value());
4459  if (str_literal == nullptr) {
4460  throw std::runtime_error("Delimiter option must be a string.");
4461  } else if (str_literal->get_stringval()->length() != 1) {
4462  throw std::runtime_error("Delimiter must be a single character string.");
4463  }
4464  copy_params.delimiter = (*str_literal->get_stringval())[0];
4465  } else if (boost::iequals(*p->get_name(), "nulls")) {
4466  const StringLiteral* str_literal =
4467  dynamic_cast<const StringLiteral*>(p->get_value());
4468  if (str_literal == nullptr) {
4469  throw std::runtime_error("Nulls option must be a string.");
4470  }
4471  copy_params.null_str = *str_literal->get_stringval();
4472  } else if (boost::iequals(*p->get_name(), "header")) {
4473  const StringLiteral* str_literal =
4474  dynamic_cast<const StringLiteral*>(p->get_value());
4475  if (str_literal == nullptr) {
4476  throw std::runtime_error("Header option must be a boolean.");
4477  }
4478  copy_params.has_header = bool_from_string_literal(str_literal)
4481  } else if (boost::iequals(*p->get_name(), "quote")) {
4482  const StringLiteral* str_literal =
4483  dynamic_cast<const StringLiteral*>(p->get_value());
4484  if (str_literal == nullptr) {
4485  throw std::runtime_error("Quote option must be a string.");
4486  } else if (str_literal->get_stringval()->length() != 1) {
4487  throw std::runtime_error("Quote must be a single character string.");
4488  }
4489  copy_params.quote = (*str_literal->get_stringval())[0];
4490  } else if (boost::iequals(*p->get_name(), "escape")) {
4491  const StringLiteral* str_literal =
4492  dynamic_cast<const StringLiteral*>(p->get_value());
4493  if (str_literal == nullptr) {
4494  throw std::runtime_error("Escape option must be a string.");
4495  } else if (str_literal->get_stringval()->length() != 1) {
4496  throw std::runtime_error("Escape must be a single character string.");
4497  }
4498  copy_params.escape = (*str_literal->get_stringval())[0];
4499  } else if (boost::iequals(*p->get_name(), "line_delimiter")) {
4500  const StringLiteral* str_literal =
4501  dynamic_cast<const StringLiteral*>(p->get_value());
4502  if (str_literal == nullptr) {
4503  throw std::runtime_error("Line_delimiter option must be a string.");
4504  } else if (str_literal->get_stringval()->length() != 1) {
4505  throw std::runtime_error("Line_delimiter must be a single character string.");
4506  }
4507  copy_params.line_delim = (*str_literal->get_stringval())[0];
4508  } else if (boost::iequals(*p->get_name(), "quoted")) {
4509  const StringLiteral* str_literal =
4510  dynamic_cast<const StringLiteral*>(p->get_value());
4511  if (str_literal == nullptr) {
4512  throw std::runtime_error("Quoted option must be a boolean.");
4513  }
4514  copy_params.quoted = bool_from_string_literal(str_literal);
4515  } else if (boost::iequals(*p->get_name(), "file_type")) {
4516  const StringLiteral* str_literal =
4517  dynamic_cast<const StringLiteral*>(p->get_value());
4518  if (str_literal == nullptr) {
4519  throw std::runtime_error("File Type option must be a string.");
4520  }
4521  auto file_type_str =
4522  boost::algorithm::to_lower_copy(*str_literal->get_stringval());
4523  if (file_type_str == "csv") {
4525  } else if (file_type_str == "geojson") {
4527  } else if (file_type_str == "geojsonl") {
4529  } else if (file_type_str == "shapefile") {
4531  } else {
4532  throw std::runtime_error(
4533  "File Type option must be 'CSV', 'GeoJSON', 'GeoJSONL' or 'Shapefile'");
4534  }
4535  } else if (boost::iequals(*p->get_name(), "layer_name")) {
4536  const StringLiteral* str_literal =
4537  dynamic_cast<const StringLiteral*>(p->get_value());
4538  if (str_literal == nullptr) {
4539  throw std::runtime_error("Layer Name option must be a string.");
4540  }
4541  layer_name = *str_literal->get_stringval();
4542  } else if (boost::iequals(*p->get_name(), "file_compression")) {
4543  const StringLiteral* str_literal =
4544  dynamic_cast<const StringLiteral*>(p->get_value());
4545  if (str_literal == nullptr) {
4546  throw std::runtime_error("File Compression option must be a string.");
4547  }
4548  auto file_compression_str =
4549  boost::algorithm::to_lower_copy(*str_literal->get_stringval());
4550  if (file_compression_str == "none") {
4552  } else if (file_compression_str == "gzip") {
4554  } else if (file_compression_str == "zip") {
4556  } else {
4557  throw std::runtime_error(
4558  "File Compression option must be 'None', 'GZip', or 'Zip'");
4559  }
4560  } else if (boost::iequals(*p->get_name(), "array_null_handling")) {
4561  const StringLiteral* str_literal =
4562  dynamic_cast<const StringLiteral*>(p->get_value());
4563  if (str_literal == nullptr) {
4564  throw std::runtime_error("Array Null Handling option must be a string.");
4565  }
4566  auto array_null_handling_str =
4567  boost::algorithm::to_lower_copy(*str_literal->get_stringval());
4568  if (array_null_handling_str == "abort") {
4569  array_null_handling =
4571  } else if (array_null_handling_str == "raw") {
4572  array_null_handling =
4574  } else if (array_null_handling_str == "zero") {
4575  array_null_handling =
4577  } else if (array_null_handling_str == "nullfield") {
4578  array_null_handling =
4580  } else {
4581  throw std::runtime_error(
4582  "Array Null Handling option must be 'Abort', 'Raw', 'Zero', or "
4583  "'NullField'");
4584  }
4585  } else {
4586  throw std::runtime_error("Invalid option for COPY: " + *p->get_name());
4587  }
4588  }
4589  }
4590 }
4591 
4592 void CreateViewStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4593  auto session_copy = session;
4594  auto session_ptr = std::shared_ptr<Catalog_Namespace::SessionInfo>(
4595  &session_copy, boost::null_deleter());
4596  auto query_state = query_state::QueryState::create(session_ptr, select_query_);
4597  auto stdlog = STDLOG(query_state);
4598  auto& catalog = session.getCatalog();
4599 
4600  if (!catalog.validateNonExistentTableOrView(view_name_, if_not_exists_)) {
4601  return;
4602  }
4605  throw std::runtime_error("View " + view_name_ +
4606  " will not be created. User has no create view privileges.");
4607  }
4608 
4609  const auto query_after_shim = pg_shim(select_query_);
4610 
4611  // this now also ensures that access permissions are checked
4612  catalog.getCalciteMgr()->process(query_state->createQueryStateProxy(),
4613  query_after_shim,
4614  {},
4615  true,
4616  false,
4617  false,
4618  true);
4619 
4620  // Take write lock after the query is processed to ensure no deadlocks
4621  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
4624 
4625  TableDescriptor td;
4626  td.tableName = view_name_;
4627  td.userId = session.get_currentUser().userId;
4628  td.nColumns = 0;
4629  td.isView = true;
4630  td.viewSQL = query_after_shim;
4631  td.fragmenter = nullptr;
4633  td.maxFragRows =
4634  DEFAULT_FRAGMENT_ROWS; // @todo this stuff should not be InsertOrderFragmenter
4635  td.maxChunkSize =
4636  DEFAULT_MAX_CHUNK_SIZE; // @todo this stuff should not be InsertOrderFragmenter
4637  td.fragPageSize = DEFAULT_PAGE_SIZE;
4638  td.maxRows = DEFAULT_MAX_ROWS;
4639  catalog.createTable(td, {}, {}, true);
4640 
4641  // TODO (max): It's transactionally unsafe, should be fixed: we may create object w/o
4642  // privileges
4643  SysCatalog::instance().createDBObject(
4644  session.get_currentUser(), view_name_, ViewDBObjectType, catalog);
4645 }
4646 
4647 void DropViewStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4648  auto& catalog = session.getCatalog();
4649 
4650  const TableDescriptor* td{nullptr};
4651  std::unique_ptr<lockmgr::TableSchemaLockContainer<lockmgr::WriteLock>> td_with_lock;
4652 
4653  try {
4654  td_with_lock =
4655  std::make_unique<lockmgr::TableSchemaLockContainer<lockmgr::WriteLock>>(
4657  catalog, *view_name, false));
4658  td = (*td_with_lock)();
4659  } catch (const std::runtime_error& e) {
4660  if (if_exists) {
4661  return;
4662  } else {
4663  throw e;
4664  }
4665  }
4666 
4667  CHECK(td);
4668  CHECK(td_with_lock);
4669 
4670  if (!session.checkDBAccessPrivileges(
4672  throw std::runtime_error("View " + *view_name +
4673  " will not be dropped. User has no drop view privileges.");
4674  }
4675 
4677  catalog.dropTable(td);
4678 }
4679 
4680 static void checkStringLiteral(const std::string& option_name,
4681  const std::unique_ptr<NameValueAssign>& p) {
4682  CHECK(p);
4683  if (!dynamic_cast<const StringLiteral*>(p->get_value())) {
4684  throw std::runtime_error(option_name + " option must be a string literal.");
4685  }
4686 }
4687 
4688 void CreateDBStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4689  if (!session.get_currentUser().isSuper) {
4690  throw std::runtime_error(
4691  "CREATE DATABASE command can only be executed by super user.");
4692  }
4693 
4694  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
4697 
4699  if (SysCatalog::instance().getMetadataForDB(*db_name, db_meta) && if_not_exists_) {
4700  return;
4701  }
4702  int ownerId = session.get_currentUser().userId;
4703  if (!name_value_list.empty()) {
4704  for (auto& p : name_value_list) {
4705  if (boost::iequals(*p->get_name(), "owner")) {
4706  checkStringLiteral("Owner name", p);
4707  const std::string* str =
4708  static_cast<const StringLiteral*>(p->get_value())->get_stringval();
4710  if (!SysCatalog::instance().getMetadataForUser(*str, user)) {
4711  throw std::runtime_error("User " + *str + " does not exist.");
4712  }
4713  ownerId = user.userId;
4714  } else {
4715  throw std::runtime_error("Invalid CREATE DATABASE option " + *p->get_name() +
4716  ". Only OWNER supported.");
4717  }
4718  }
4719  }
4720  SysCatalog::instance().createDatabase(*db_name, ownerId);
4721 }
4722 
4723 void DropDBStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4724  if (!session.get_currentUser().isSuper) {
4725  throw std::runtime_error("DROP DATABASE command can only be executed by super user.");
4726  }
4727 
4728  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
4731 
4733  if (!SysCatalog::instance().getMetadataForDB(*db_name, db)) {
4734  if (if_exists_) {
4735  return;
4736  }
4737  throw std::runtime_error("Database " + *db_name + " does not exist.");
4738  }
4739 
4740  if (!session.get_currentUser().isSuper &&
4741  session.get_currentUser().userId != db.dbOwner) {
4742  throw std::runtime_error("Only the super user or the owner can drop database.");
4743  }
4744 
4745  SysCatalog::instance().dropDatabase(db);
4746 }
4747 
4748 static bool readBooleanLiteral(const std::string& option_name,
4749  const std::unique_ptr<NameValueAssign>& p) {
4750  CHECK(p);
4751  const std::string* str =
4752  static_cast<const StringLiteral*>(p->get_value())->get_stringval();
4753  if (boost::iequals(*str, "true")) {
4754  return true;
4755  } else if (boost::iequals(*str, "false")) {
4756  return false;
4757  } else {
4758  throw std::runtime_error("Value to " + option_name + " must be TRUE or FALSE.");
4759  }
4760 }
4761 
4762 void CreateUserStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4763  std::string passwd;
4764  bool is_super = false;
4765  std::string default_db;
4766  bool can_login = true;
4767  for (auto& p : name_value_list) {
4768  if (boost::iequals(*p->get_name(), "password")) {
4769  checkStringLiteral("Password", p);
4770  passwd = *static_cast<const StringLiteral*>(p->get_value())->get_stringval();
4771  } else if (boost::iequals(*p->get_name(), "is_super")) {
4772  checkStringLiteral("IS_SUPER", p);
4773  is_super = readBooleanLiteral("IS_SUPER", p);
4774  } else if (boost::iequals(*p->get_name(), "default_db")) {
4775  checkStringLiteral("DEFAULT_DB", p);
4776  default_db = *static_cast<const StringLiteral*>(p->get_value())->get_stringval();
4777  } else if (boost::iequals(*p->get_name(), "can_login")) {
4778  checkStringLiteral("CAN_LOGIN", p);
4779  can_login = readBooleanLiteral("can_login", p);
4780  } else {
4781  throw std::runtime_error("Invalid CREATE USER option " + *p->get_name() +
4782  ". Should be PASSWORD, IS_SUPER, CAN_LOGIN"
4783  " or DEFAULT_DB.");
4784  }
4785  }
4786  if (!session.get_currentUser().isSuper) {
4787  throw std::runtime_error("Only super user can create new users.");
4788  }
4789  SysCatalog::instance().createUser(*user_name, passwd, is_super, default_db, can_login);
4790 }
4791 
4792 void AlterUserStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4793  // Parse the statement
4794  const std::string* passwd = nullptr;
4795  bool is_super = false;
4796  bool* is_superp = nullptr;
4797  const std::string* default_db = nullptr;
4798  bool can_login = true;
4799  bool* can_loginp = nullptr;
4800  for (auto& p : name_value_list) {
4801  if (boost::iequals(*p->get_name(), "password")) {
4802  checkStringLiteral("Password", p);
4803  passwd = static_cast<const StringLiteral*>(p->get_value())->get_stringval();
4804  } else if (boost::iequals(*p->get_name(), "is_super")) {
4805  checkStringLiteral("IS_SUPER", p);
4806  is_super = readBooleanLiteral("IS_SUPER", p);
4807  is_superp = &is_super;
4808  } else if (boost::iequals(*p->get_name(), "default_db")) {
4809  if (dynamic_cast<const StringLiteral*>(p->get_value())) {
4810  default_db = static_cast<const StringLiteral*>(p->get_value())->get_stringval();
4811  } else if (dynamic_cast<const NullLiteral*>(p->get_value())) {
4812  static std::string blank;
4813  default_db = &blank;
4814  } else {
4815  throw std::runtime_error(
4816  "DEFAULT_DB option must be either a string literal or a NULL literal.");
4817  }
4818  } else if (boost::iequals(*p->get_name(), "can_login")) {
4819  checkStringLiteral("CAN_LOGIN", p);
4820  can_login = readBooleanLiteral("CAN_LOGIN", p);
4821  can_loginp = &can_login;
4822  } else {
4823  throw std::runtime_error("Invalid ALTER USER option " + *p->get_name() +
4824  ". Should be PASSWORD, DEFAULT_DB, CAN_LOGIN"
4825  " or IS_SUPER.");
4826  }
4827  }
4828 
4829  // Check if the user is authorized to execute ALTER USER statement
4831  if (!SysCatalog::instance().getMetadataForUser(*user_name, user)) {
4832  throw std::runtime_error("User " + *user_name + " does not exist.");
4833  }
4834  if (!session.get_currentUser().isSuper) {
4835  if (session.get_currentUser().userId != user.userId) {
4836  throw std::runtime_error("Only super user can change another user's attributes.");
4837  } else if (is_superp || can_loginp) {
4838  throw std::runtime_error(
4839  "A user can only update their own password or default database.");
4840  }
4841  }
4842 
4843  if (passwd || is_superp || default_db || can_loginp) {
4844  SysCatalog::instance().alterUser(
4845  user.userId, passwd, is_superp, default_db, can_loginp);
4846  }
4847 }
4848 
4849 void DropUserStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4850  if (!session.get_currentUser().isSuper) {
4851  throw std::runtime_error("Only super user can drop users.");
4852  }
4853  SysCatalog::instance().dropUser(*user_name);
4854 }
4855 
4856 void DumpTableStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4857  // check access privileges
4858  if (!session.checkDBAccessPrivileges(
4860  throw std::runtime_error("Table " + *table +
4861  " will not be dumped. User has no select privileges.");
4862  }
4865  throw std::runtime_error("Table " + *table +
4866  " will not be dumped. User has no create privileges.");
4867  }
4868  auto& catalog = session.getCatalog();
4869  const TableDescriptor* td = catalog.getMetadataForTable(*table);
4870  TableArchiver table_archiver(&catalog);
4871  table_archiver.dumpTable(td, *path, compression);
4872 }
4873 
4874 void RestoreTableStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4875  auto& catalog = session.getCatalog();
4876  const TableDescriptor* td = catalog.getMetadataForTable(*table, false);
4877  if (td) {
4878  // TODO: v1.0 simply throws to avoid accidentally overwrite target table.
4879  // Will add a REPLACE TABLE to explictly replace target table.
4880  // catalog.restoreTable(session, td, *path, compression);
4881  throw std::runtime_error("Table " + *table + " exists.");
4882  } else {
4883  // check access privileges
4886  throw std::runtime_error("Table " + *table +
4887  " will not be restored. User has no create privileges.");
4888  }
4889  TableArchiver table_archiver(&catalog);
4890  table_archiver.restoreTable(session, *table, *path, compression);
4891  }
4892 }
4893 
4894 } // namespace Parser
int8_t tinyintval
Definition: sqltypes.h:135
int32_t get_table_id() const
decltype(auto) get_max_rows_def(TableDescriptor &td, const NameValueAssign *p, const std::list< ColumnDescriptor > &columns)
void validate_non_foreign_table_write(const TableDescriptor *table_descriptor)
Definition: FsiUtils.h:22
static const AccessPrivileges VIEW_SQL_EDITOR
Definition: DBObject.h:153
decltype(auto) get_max_chunk_size_def(TableDescriptor &td, const NameValueAssign *p, const std::list< ColumnDescriptor > &columns)
static const std::map< const std::string, const TableDefFuncPtr > tableDefFuncMap
void set_compression(EncodingType c)
Definition: sqltypes.h:359
SQLAgg
Definition: sqldefs.h:71
#define CHECK_EQ(x, y)
Definition: Logger.h:205
std::string s3_secret_key
Definition: CopyParams.h:63
size_t shard_column_index(const std::string &name, const std::list< ColumnDescriptor > &columns)
std::string partitions
static const AccessPrivileges VIEW_DASHBOARD
Definition: DBObject.h:172
#define NULL_DOUBLE
Definition: sqltypes.h:186
static const int32_t DROP_VIEW
Definition: DBObject.h:115
bool is_time() const
Definition: sqltypes.h:422
static bool getGeoColumns(const std::string &wkt_or_wkb_hex, SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: geo_types.cpp:700
int64_t get_intval() const
Definition: ParserNode.h:151
bool is_array() const
Definition: sqltypes.h:424
static const AccessPrivileges DROP_SERVER
Definition: DBObject.h:190
bool is_string() const
Definition: sqltypes.h:416
bool is_boolean() const
Definition: sqltypes.h:423
static const int32_t SELECT_FROM_VIEW
Definition: DBObject.h:116
const std::vector< std::shared_ptr< TargetEntry > > & get_targetlist() const
Definition: Analyzer.h:1611
const int8_t const int64_t * num_rows
Definition: Analyzer.h:1568
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:86
SQLTypes
Definition: sqltypes.h:40
static void checkStringLiteral(const std::string &option_name, const std::unique_ptr< NameValueAssign > &p)
static const int32_t UPDATE_IN_VIEW
Definition: DBObject.h:118
std::string tableName
const std::vector< TargetMetaInfo > targets_meta
void add_rte(RangeTableEntry *rte)
Definition: Analyzer.cpp:1349
static const AccessPrivileges ALL_DATABASE
Definition: DBObject.h:152
SQLType * get_column_type() const
Definition: ParserNode.h:794
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
bool is_integer() const
Definition: sqltypes.h:418
decltype(auto) get_vacuum_def(TableDescriptor &td, const NameValueAssign *p, const std::list< ColumnDescriptor > &columns)
static const AccessPrivileges ALTER_TABLE
Definition: DBObject.h:166
DBObjectType
Definition: DBObject.h:42
static const int32_t CREATE_VIEW
Definition: DBObject.h:114
const std::list< std::shared_ptr< Analyzer::Expr > > & get_group_by() const
Definition: Analyzer.h:1619
void expand_star_in_targetlist(const Catalog_Namespace::Catalog &catalog, std::vector< std::shared_ptr< TargetEntry >> &tlist, int rte_idx)
static const AccessPrivileges TRUNCATE_TABLE
Definition: DBObject.h:165
#define NULL_ARRAY_DOUBLE
Definition: sqltypes.h:194
SQLQualifier
Definition: sqldefs.h:69
#define LOG(tag)
Definition: Logger.h:188
static WriteLock getWriteLockForTable(const Catalog_Namespace::Catalog &cat, const std::string &table_name)
Definition: LockMgrImpl.h:155
decltype(auto) get_skip_rows_def(DataframeTableDescriptor &df_td, const NameValueAssign *p, const std::list< ColumnDescriptor > &columns)
SQLOps
Definition: sqldefs.h:29
Definition: sqldefs.h:35
std::string storageType
static const AccessPrivileges INSERT_INTO_TABLE
Definition: DBObject.h:162
std::shared_ptr< Analyzer::Expr > deep_copy() const override
Definition: Analyzer.cpp:80
Definition: sqldefs.h:36
static const AccessPrivileges CREATE_DASHBOARD
Definition: DBObject.h:171
HOST DEVICE int get_comp_param() const
Definition: sqltypes.h:268
int get_result_table_id() const
Definition: Analyzer.h:1627
#define DEFAULT_MAX_CHUNK_SIZE
std::vector< std::unique_ptr< TypedImportBuffer > > setup_column_loaders(const TableDescriptor *td, Loader *loader)
Definition: Importer.cpp:5148
HOST DEVICE void set_subtype(SQLTypes st)
Definition: sqltypes.h:350
SQLTypeInfo get_logical_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:819
TypeR::rep timer_stop(Type clock_begin)
Definition: measure.h:46
bool g_enable_fsi
Definition: Catalog.cpp:90
bool is_date() const
Definition: sqltypes.h:641
Definition: sqldefs.h:49
HOST DEVICE int get_size() const
Definition: sqltypes.h:269
const UserMetadata & get_currentUser() const
Definition: SessionInfo.h:68
const ColumnDescriptor * getMetadataForColumn(int tableId, const std::string &colName) const
void set_order_by(std::list< OrderEntry > *o)
Definition: Analyzer.h:1635
std::string extractObjectNameFromHierName(const std::string &objectHierName, const std::string &objectType, const Catalog_Namespace::Catalog &cat)
static const int32_t ALTER_TABLE
Definition: DBObject.h:95
void set_result_col_list(const std::list< int > &col_list)
Definition: Analyzer.h:1629
static std::shared_ptr< Executor > getExecutor(const ExecutorId id, const std::string &debug_dir="", const std::string &debug_file="", const SystemParameters system_parameters=SystemParameters())
Definition: Execute.cpp:148
void setPrivileges(const AccessPrivileges &privs)
Definition: DBObject.h:217
static void invalidateCaches()
void set_offset(int64_t o)
Definition: Analyzer.h:1647
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:267
std::vector< std::string > splitObjectHierName(const std::string &hierName)
WhichRow get_which_row() const
Definition: Analyzer.h:274
std::string get_session_id() const
Definition: SessionInfo.h:73
static const AccessPrivileges SELECT_FROM_TABLE
Definition: DBObject.h:161
static std::unique_ptr< QueryExporter > create(const FileType file_type)
bool bool_from_string_literal(const Parser::StringLiteral *str_literal)
Definition: ParserNode.cpp:909
void get_table_definitions_for_ctas(TableDescriptor &td, const std::unique_ptr< NameValueAssign > &p, const std::list< ColumnDescriptor > &columns)
bool is_decimal() const
Definition: sqltypes.h:419
int32_t intval
Definition: sqltypes.h:137
HOST DEVICE int get_scale() const
Definition: sqltypes.h:264
void restoreTable(const Catalog_Namespace::SessionInfo &session, const TableDescriptor *td, const std::string &archive_path, const std::string &compression)
std::string to_string(char const *&&v)
std::vector< std::string > split(std::string_view str, std::string_view delim, std::optional< size_t > maxsplit)
split apart a string into a vector of substrings
static const AccessPrivileges ALL_VIEW
Definition: DBObject.h:178
void validate_non_duplicate_column(const std::string &column_name, std::unordered_set< std::string > &upper_column_names)
Definition: DdlUtils.cpp:520
std::shared_ptr< StringDictionary > stringDict
int tableId
identifies the database into which the data is being inserted
Definition: Fragmenter.h:61
DBObjectType DBObjectTypeFromString(const std::string &type)
Definition: DBObject.cpp:99
int get_logical_size() const
Definition: sqltypes.h:270
Definition: sqldefs.h:73
static void set_geo_physical_import_buffer(const Catalog_Namespace::Catalog &catalog, const ColumnDescriptor *cd, std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers, size_t &col_idx, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, int render_group, const int64_t replicate_count=0)
Definition: Importer.cpp:1422
const ColumnDescriptor * getShardColumnMetadataForTable(const TableDescriptor *td) const
Definition: Catalog.cpp:3489
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:266
size_t numRows
a vector of column ids for the row(s) being inserted
Definition: Fragmenter.h:63
This file contains the class specification and related data structures for Catalog.
float floatval
Definition: sqltypes.h:139
Datum get_constval() const
Definition: Analyzer.h:336
ImportHeaderRow has_header
Definition: CopyParams.h:48
boost::function< void(DataframeTableDescriptor &, const NameValueAssign *, const std::list< ColumnDescriptor > &columns)> DataframeDefFuncPtr
Definition: ParserNode.cpp:85
static SQLTypeInfo common_string_type(const SQLTypeInfo &type1, const SQLTypeInfo &type2)
Definition: Analyzer.cpp:398
#define DEFAULT_MAX_ROWS
RangeTableEntry * get_rte(int rte_idx) const
Definition: Analyzer.h:1641
std::string cat(Ts &&... args)
Classes representing a parse tree.
void set_fixed_size()
Definition: sqltypes.h:358
const std::string * get_column_name() const
Definition: ParserNode.h:793
void get_dataframe_definitions(DataframeTableDescriptor &df_td, const std::unique_ptr< NameValueAssign > &p, const std::list< ColumnDescriptor > &columns)
QueryState & getQueryState()
Definition: QueryState.h:172
static const int32_t DROP_DATABASE
Definition: DBObject.h:81
const std::list< int > & get_result_col_list() const
Definition: Analyzer.h:1628
static const AccessPrivileges DROP_TABLE
Definition: DBObject.h:160
const QueryHint getParsedQueryHints() const
decltype(auto) get_shard_count_def(TableDescriptor &td, const NameValueAssign *p, const std::list< ColumnDescriptor > &columns)
Catalog & getCatalog() const
Definition: SessionInfo.h:65
boost::function< void(TableDescriptor &, const NameValueAssign *, const std::list< ColumnDescriptor > &columns)> TableDefFuncPtr
Definition: ParserNode.cpp:80
void set_scale(int s)
Definition: sqltypes.h:354
int64_t bigintval
Definition: sqltypes.h:138
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:260
static const int32_t DELETE_FROM_TABLE
Definition: DBObject.h:93
static std::shared_ptr< QueryState > create(ARGS &&... args)
Definition: QueryState.h:140
static SQLTypeInfo common_numeric_type(const SQLTypeInfo &type1, const SQLTypeInfo &type2)
Definition: Analyzer.cpp:438
Definition: sqldefs.h:37
Definition: sqldefs.h:75
static const int32_t TRUNCATE_TABLE
Definition: DBObject.h:94
static const AccessPrivileges ALL_SERVER
Definition: DBObject.h:188
int16_t smallintval
Definition: sqltypes.h:136
const ColumnConstraintDef * get_column_constraint() const
Definition: ParserNode.h:796
static const AccessPrivileges CREATE_SERVER
Definition: DBObject.h:189
static const AccessPrivileges DELETE_FROM_TABLE
Definition: DBObject.h:164
int get_physical_cols() const
Definition: sqltypes.h:280
Datum StringToDatum(std::string_view s, SQLTypeInfo &ti)
Definition: Datum.cpp:124
specifies the content in-memory of a row in the column metadata table
bool expr_is_null(const Analyzer::Expr *expr)
Definition: ParserNode.cpp:901
static const std::map< const std::string, const DataframeDefFuncPtr > dataframeDefFuncMap
const DBMetadata & getCurrentDB() const
Definition: Catalog.h:195
static const int32_t EDIT_DASHBOARD
Definition: DBObject.h:106
static const int32_t DELETE_DASHBOARD
Definition: DBObject.h:104
static const AccessPrivileges CREATE_TABLE
Definition: DBObject.h:159
static const int32_t INSERT_INTO_TABLE
Definition: DBObject.h:91
std::string keyMetainfo
void set_num_aggs(int a)
Definition: Analyzer.h:1639
const DictDescriptor * getMetadataForDict(int dict_ref, bool loadDict=true) const
Definition: Catalog.cpp:1449
void set_group_by(std::list< std::shared_ptr< Analyzer::Expr >> &g)
Definition: Analyzer.h:1633
void set_default_table_attributes(const std::string &table_name, TableDescriptor &td, const int32_t column_count)
Definition: DdlUtils.cpp:506
std::shared_ptr< Fragmenter_Namespace::AbstractFragmenter > fragmenter
void set_next_query(Query *q)
Definition: Analyzer.h:1636
std::string * stringval
Definition: sqltypes.h:143
void validate_allowed_file_path(const std::string &file_path, const DataTransferType data_transfer_type, const bool allow_wildcards)
Definition: DdlUtils.cpp:611
AggregatedResult query(QueryStateProxy, std::string &sql_query_string, std::vector< size_t > outer_frag_indices, bool validate_only)
#define DEFAULT_PAGE_SIZE
bool g_enable_experimental_string_functions
void set_having_predicate(std::shared_ptr< Analyzer::Expr > p)
Definition: Analyzer.h:1634
void set_comp_param(int p)
Definition: sqltypes.h:360
void set_column_descriptor(const std::string &column_name, ColumnDescriptor &cd, SqlType *column_type, const bool not_null, const Encoding *encoding)
Definition: DdlUtils.cpp:492
void set_result_table_id(int id)
Definition: Analyzer.h:1630
std::string geo_layer_name
Definition: CopyParams.h:78
void loadKey()
Definition: DBObject.cpp:179
bool get_is_null() const
Definition: Analyzer.h:335
void validate_shard_column_type(const ColumnDescriptor &cd)
const AccessPrivileges & getPrivileges() const
Definition: DBObject.h:216
Definition: sqltypes.h:54
bool is_geometry() const
Definition: sqltypes.h:428
bool g_test_drop_column_rollback
Definition: ParserNode.cpp:71
void check_alter_table_privilege(const Catalog_Namespace::SessionInfo &session, const TableDescriptor *td)
Definition: sqldefs.h:69
ExecutionResult executeSimpleInsert(const Analyzer::Query &insert_query)
#define TRANSIENT_DICT_ID
Definition: sqltypes.h:198
const std::string OMNISCI_ROOT_USER
Definition: SysCatalog.h:59
std::shared_ptr< Catalog_Namespace::SessionInfo const > getConstSessionInfo() const
Definition: QueryState.cpp:77
void set_is_distinct(bool d)
Definition: Analyzer.h:1631
std::string get_type_name() const
Definition: sqltypes.h:362
static const AccessPrivileges SELECT_FROM_VIEW
Definition: DBObject.h:181
bool table_is_temporary(const TableDescriptor *const td)
bool is_null(const T &v, const SQLTypeInfo &t)
void validate_drop_table_type(const TableDescriptor *td, const TableType expected_table_type)
Definition: DdlUtils.cpp:537
std::vector< std::shared_ptr< TargetEntry > > & get_targetlist_nonconst()
Definition: Analyzer.h:1614
#define DEFAULT_FRAGMENT_ROWS
bool get_is_distinct() const
Definition: Analyzer.h:1609
HOST DEVICE int get_dimension() const
Definition: sqltypes.h:261
Fragmenter_Namespace::FragmenterType fragType
Definition: sqldefs.h:76
Data_Namespace::MemoryLevel persistenceLevel
int get_rte_idx(const std::string &range_var_name) const
Definition: Analyzer.cpp:1338
void set_is_unionall(bool u)
Definition: Analyzer.h:1637
static const int32_t CREATE_DATABASE
Definition: DBObject.h:80
Definition: Catalog.h:63
void insertData(const Catalog_Namespace::SessionInfo &session_info, InsertData &insert_data)
decltype(auto) get_sort_column_def(TableDescriptor &td, const NameValueAssign *p, const std::list< ColumnDescriptor > &columns)
static const AccessPrivileges ALL_DASHBOARD
Definition: DBObject.h:170
static const AccessPrivileges ACCESS
Definition: DBObject.h:154
static const int32_t VIEW_DASHBOARD
Definition: DBObject.h:105
std::vector< uint8_t > compress_coords(std::vector< double > &coords, const SQLTypeInfo &ti)
static std::pair< AccessPrivileges, DBObjectType > parseStringPrivs(const std::string &privs, const DBObjectType &objectType, const std::string &object_name)
bool checkDBAccessPrivileges(const DBObjectType &permissionType, const AccessPrivileges &privs, const std::string &objectName="") const
Definition: SessionInfo.cpp:24
virtual std::shared_ptr< Analyzer::Expr > add_cast(const SQLTypeInfo &new_type_info)
Definition: Analyzer.cpp:689
static const AccessPrivileges ALL_TABLE
Definition: DBObject.h:158