OmniSciDB  0b528656ed
ParserNode.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
25 #include "ParserNode.h"
26 
27 #include <boost/algorithm/string.hpp>
28 #include <boost/core/null_deleter.hpp>
29 #include <boost/filesystem.hpp>
30 #include <boost/function.hpp>
31 
32 #include <rapidjson/document.h>
33 #include <rapidjson/stringbuffer.h>
34 #include <rapidjson/writer.h>
35 
36 #include <cassert>
37 #include <cmath>
38 #include <limits>
39 #include <random>
40 #include <stdexcept>
41 #include <type_traits>
42 #include <typeinfo>
43 
45 #include "Catalog/Catalog.h"
51 #include "ImportExport/Importer.h"
52 #include "LockMgr/LockMgr.h"
54 #include "QueryEngine/Execute.h"
57 #include "ReservedKeywords.h"
58 #include "Shared/StringTransform.h"
59 #include "Shared/TimeGM.h"
60 #include "Shared/geo_compression.h"
61 #include "Shared/geo_types.h"
62 #include "Shared/mapd_glob.h"
63 #include "Shared/measure.h"
64 #include "Shared/shard_key.h"
66 #include "Utils/FsiUtils.h"
67 #include "gen-cpp/CalciteServer.h"
68 #include "parser.h"
69 
70 size_t g_leaf_count{0};
73 extern bool g_enable_fsi;
74 
76 using namespace std::string_literals;
77 
78 using TableDefFuncPtr = boost::function<void(TableDescriptor&,
79  const NameValueAssign*,
80  const std::list<ColumnDescriptor>& columns)>;
81 
82 using DataframeDefFuncPtr =
83  boost::function<void(DataframeTableDescriptor&,
84  const NameValueAssign*,
85  const std::list<ColumnDescriptor>& columns)>;
86 
87 namespace Parser {
88 std::shared_ptr<Analyzer::Expr> NullLiteral::analyze(
89  const Catalog_Namespace::Catalog& catalog,
90  Analyzer::Query& query,
91  TlistRefType allow_tlist_ref) const {
92  return makeExpr<Analyzer::Constant>(kNULLT, true);
93 }
94 
95 std::shared_ptr<Analyzer::Expr> StringLiteral::analyze(
96  const Catalog_Namespace::Catalog& catalog,
97  Analyzer::Query& query,
98  TlistRefType allow_tlist_ref) const {
99  return analyzeValue(*stringval);
100 }
101 
102 std::shared_ptr<Analyzer::Expr> StringLiteral::analyzeValue(
103  const std::string& stringval) {
104  SQLTypeInfo ti(kVARCHAR, stringval.length(), 0, true);
105  Datum d;
106  d.stringval = new std::string(stringval);
107  return makeExpr<Analyzer::Constant>(ti, false, d);
108 }
109 
110 std::shared_ptr<Analyzer::Expr> IntLiteral::analyze(
111  const Catalog_Namespace::Catalog& catalog,
112  Analyzer::Query& query,
113  TlistRefType allow_tlist_ref) const {
114  return analyzeValue(intval);
115 }
116 
117 std::shared_ptr<Analyzer::Expr> IntLiteral::analyzeValue(const int64_t intval) {
118  SQLTypes t;
119  Datum d;
120  if (intval >= INT16_MIN && intval <= INT16_MAX) {
121  t = kSMALLINT;
122  d.smallintval = (int16_t)intval;
123  } else if (intval >= INT32_MIN && intval <= INT32_MAX) {
124  t = kINT;
125  d.intval = (int32_t)intval;
126  } else {
127  t = kBIGINT;
128  d.bigintval = intval;
129  }
130  return makeExpr<Analyzer::Constant>(t, false, d);
131 }
132 
133 std::shared_ptr<Analyzer::Expr> FixedPtLiteral::analyze(
134  const Catalog_Namespace::Catalog& catalog,
135  Analyzer::Query& query,
136  TlistRefType allow_tlist_ref) const {
137  SQLTypeInfo ti(kNUMERIC, 0, 0, false);
138  Datum d = StringToDatum(*fixedptval, ti);
139  return makeExpr<Analyzer::Constant>(ti, false, d);
140 }
141 
142 std::shared_ptr<Analyzer::Expr> FixedPtLiteral::analyzeValue(const int64_t numericval,
143  const int scale,
144  const int precision) {
145  SQLTypeInfo ti(kNUMERIC, 0, 0, false);
146  ti.set_scale(scale);
147  ti.set_precision(precision);
148  Datum d;
149  d.bigintval = numericval;
150  return makeExpr<Analyzer::Constant>(ti, false, d);
151 }
152 
153 std::shared_ptr<Analyzer::Expr> FloatLiteral::analyze(
154  const Catalog_Namespace::Catalog& catalog,
155  Analyzer::Query& query,
156  TlistRefType allow_tlist_ref) const {
157  Datum d;
158  d.floatval = floatval;
159  return makeExpr<Analyzer::Constant>(kFLOAT, false, d);
160 }
161 
162 std::shared_ptr<Analyzer::Expr> DoubleLiteral::analyze(
163  const Catalog_Namespace::Catalog& catalog,
164  Analyzer::Query& query,
165  TlistRefType allow_tlist_ref) const {
166  Datum d;
167  d.doubleval = doubleval;
168  return makeExpr<Analyzer::Constant>(kDOUBLE, false, d);
169 }
170 
171 std::shared_ptr<Analyzer::Expr> TimestampLiteral::analyze(
172  const Catalog_Namespace::Catalog& catalog,
173  Analyzer::Query& query,
174  TlistRefType allow_tlist_ref) const {
175  return get(timestampval_);
176 }
177 
178 std::shared_ptr<Analyzer::Expr> TimestampLiteral::get(const int64_t timestampval) {
179  Datum d;
180  d.bigintval = timestampval;
181  return makeExpr<Analyzer::Constant>(kTIMESTAMP, false, d);
182 }
183 
184 std::shared_ptr<Analyzer::Expr> UserLiteral::analyze(
185  const Catalog_Namespace::Catalog& catalog,
186  Analyzer::Query& query,
187  TlistRefType allow_tlist_ref) const {
188  Datum d;
189  return makeExpr<Analyzer::Constant>(kTEXT, false, d);
190 }
191 
192 std::shared_ptr<Analyzer::Expr> UserLiteral::get(const std::string& user) {
193  Datum d;
194  d.stringval = new std::string(user);
195  return makeExpr<Analyzer::Constant>(kTEXT, false, d);
196 }
197 
198 std::shared_ptr<Analyzer::Expr> ArrayLiteral::analyze(
199  const Catalog_Namespace::Catalog& catalog,
200  Analyzer::Query& query,
201  TlistRefType allow_tlist_ref) const {
202  SQLTypeInfo ti = SQLTypeInfo(kARRAY, true);
203  bool set_subtype = true;
204  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
205  for (auto& p : value_list) {
206  auto e = p->analyze(catalog, query, allow_tlist_ref);
207  CHECK(e);
208  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(e);
209  if (c != nullptr && c->get_is_null()) {
210  value_exprs.push_back(c);
211  continue;
212  }
213  auto subtype = e->get_type_info().get_type();
214  if (subtype == kNULLT) {
215  // NULL element
216  } else if (set_subtype) {
217  ti.set_subtype(subtype);
218  set_subtype = false;
219  } else {
220  if (ti.get_subtype() != subtype) {
221  throw std::runtime_error("ARRAY element literals should be of the same type.");
222  }
223  }
224  value_exprs.push_back(e);
225  }
226  std::shared_ptr<Analyzer::Expr> result =
227  makeExpr<Analyzer::Constant>(ti, false, value_exprs);
228  return result;
229 }
230 
231 std::string ArrayLiteral::to_string() const {
232  std::string str = "{";
233  bool notfirst = false;
234  for (auto& p : value_list) {
235  if (notfirst) {
236  str += ", ";
237  } else {
238  notfirst = true;
239  }
240  str += p->to_string();
241  }
242  str += "}";
243  return str;
244 }
245 
246 std::shared_ptr<Analyzer::Expr> OperExpr::analyze(
247  const Catalog_Namespace::Catalog& catalog,
248  Analyzer::Query& query,
249  TlistRefType allow_tlist_ref) const {
250  auto left_expr = left->analyze(catalog, query, allow_tlist_ref);
251  const auto& left_type = left_expr->get_type_info();
252  if (right == nullptr) {
253  return makeExpr<Analyzer::UOper>(
254  left_type, left_expr->get_contains_agg(), optype, left_expr->decompress());
255  }
256  if (optype == kARRAY_AT) {
257  if (left_type.get_type() != kARRAY) {
258  throw std::runtime_error(left->to_string() + " is not of array type.");
259  }
260  auto right_expr = right->analyze(catalog, query, allow_tlist_ref);
261  const auto& right_type = right_expr->get_type_info();
262  if (!right_type.is_integer()) {
263  throw std::runtime_error(right->to_string() + " is not of integer type.");
264  }
265  return makeExpr<Analyzer::BinOper>(
266  left_type.get_elem_type(), false, kARRAY_AT, kONE, left_expr, right_expr);
267  }
268  auto right_expr = right->analyze(catalog, query, allow_tlist_ref);
269  return normalize(optype, opqualifier, left_expr, right_expr);
270 }
271 
272 std::shared_ptr<Analyzer::Expr> OperExpr::normalize(
273  const SQLOps optype,
274  const SQLQualifier qual,
275  std::shared_ptr<Analyzer::Expr> left_expr,
276  std::shared_ptr<Analyzer::Expr> right_expr) {
277  if (left_expr->get_type_info().is_date_in_days() ||
278  right_expr->get_type_info().is_date_in_days()) {
279  // Do not propogate encoding
280  left_expr = left_expr->decompress();
281  right_expr = right_expr->decompress();
282  }
283  const auto& left_type = left_expr->get_type_info();
284  auto right_type = right_expr->get_type_info();
285  if (qual != kONE) {
286  // subquery not supported yet.
287  CHECK(!std::dynamic_pointer_cast<Analyzer::Subquery>(right_expr));
288  if (right_type.get_type() != kARRAY) {
289  throw std::runtime_error(
290  "Existential or universal qualifiers can only be used in front of a subquery "
291  "or an "
292  "expression of array type.");
293  }
294  right_type = right_type.get_elem_type();
295  }
296  SQLTypeInfo new_left_type;
297  SQLTypeInfo new_right_type;
298  auto result_type = Analyzer::BinOper::analyze_type_info(
299  optype, left_type, right_type, &new_left_type, &new_right_type);
300  if (result_type.is_timeinterval()) {
301  return makeExpr<Analyzer::BinOper>(
302  result_type, false, optype, qual, left_expr, right_expr);
303  }
304  if (left_type != new_left_type) {
305  left_expr = left_expr->add_cast(new_left_type);
306  }
307  if (right_type != new_right_type) {
308  if (qual == kONE) {
309  right_expr = right_expr->add_cast(new_right_type);
310  } else {
311  right_expr = right_expr->add_cast(new_right_type.get_array_type());
312  }
313  }
314 
315  if (IS_COMPARISON(optype)) {
316  if (optype != kOVERLAPS && new_left_type.is_geometry() &&
317  new_right_type.is_geometry()) {
318  throw std::runtime_error(
319  "Comparison operators are not yet supported for geospatial types.");
320  }
321 
322  if (new_left_type.get_compression() == kENCODING_DICT &&
323  new_right_type.get_compression() == kENCODING_DICT &&
324  new_left_type.get_comp_param() == new_right_type.get_comp_param()) {
325  // do nothing
326  } else if (new_left_type.get_compression() == kENCODING_DICT &&
327  new_right_type.get_compression() == kENCODING_NONE) {
328  SQLTypeInfo ti(new_right_type);
329  ti.set_compression(new_left_type.get_compression());
330  ti.set_comp_param(new_left_type.get_comp_param());
331  ti.set_fixed_size();
332  right_expr = right_expr->add_cast(ti);
333  } else if (new_right_type.get_compression() == kENCODING_DICT &&
334  new_left_type.get_compression() == kENCODING_NONE) {
335  SQLTypeInfo ti(new_left_type);
336  ti.set_compression(new_right_type.get_compression());
337  ti.set_comp_param(new_right_type.get_comp_param());
338  ti.set_fixed_size();
339  left_expr = left_expr->add_cast(ti);
340  } else {
341  left_expr = left_expr->decompress();
342  right_expr = right_expr->decompress();
343  }
344  } else {
345  left_expr = left_expr->decompress();
346  right_expr = right_expr->decompress();
347  }
348  bool has_agg = (left_expr->get_contains_agg() || right_expr->get_contains_agg());
349  return makeExpr<Analyzer::BinOper>(
350  result_type, has_agg, optype, qual, left_expr, right_expr);
351 }
352 
353 std::shared_ptr<Analyzer::Expr> SubqueryExpr::analyze(
354  const Catalog_Namespace::Catalog& catalog,
355  Analyzer::Query& query,
356  TlistRefType allow_tlist_ref) const {
357  throw std::runtime_error("Subqueries are not supported yet.");
358  return nullptr;
359 }
360 
361 std::shared_ptr<Analyzer::Expr> IsNullExpr::analyze(
362  const Catalog_Namespace::Catalog& catalog,
363  Analyzer::Query& query,
364  TlistRefType allow_tlist_ref) const {
365  auto arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
366  auto result = makeExpr<Analyzer::UOper>(kBOOLEAN, kISNULL, arg_expr);
367  if (is_not) {
368  result = makeExpr<Analyzer::UOper>(kBOOLEAN, kNOT, result);
369  }
370  return result;
371 }
372 
373 std::shared_ptr<Analyzer::Expr> InSubquery::analyze(
374  const Catalog_Namespace::Catalog& catalog,
375  Analyzer::Query& query,
376  TlistRefType allow_tlist_ref) const {
377  throw std::runtime_error("Subqueries are not supported yet.");
378  return nullptr;
379 }
380 
381 std::shared_ptr<Analyzer::Expr> InValues::analyze(
382  const Catalog_Namespace::Catalog& catalog,
383  Analyzer::Query& query,
384  TlistRefType allow_tlist_ref) const {
385  auto arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
386  SQLTypeInfo ti = arg_expr->get_type_info();
387  bool dict_comp = ti.get_compression() == kENCODING_DICT;
388  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
389  for (auto& p : value_list) {
390  auto e = p->analyze(catalog, query, allow_tlist_ref);
391  if (ti != e->get_type_info()) {
392  if (ti.is_string() && e->get_type_info().is_string()) {
393  ti = Analyzer::BinOper::common_string_type(ti, e->get_type_info());
394  } else if (ti.is_number() && e->get_type_info().is_number()) {
395  ti = Analyzer::BinOper::common_numeric_type(ti, e->get_type_info());
396  } else {
397  throw std::runtime_error("IN expressions must contain compatible types.");
398  }
399  }
400  if (dict_comp) {
401  value_exprs.push_back(e->add_cast(arg_expr->get_type_info()));
402  } else {
403  value_exprs.push_back(e);
404  }
405  }
406  if (!dict_comp) {
407  arg_expr = arg_expr->decompress();
408  arg_expr = arg_expr->add_cast(ti);
409  std::list<std::shared_ptr<Analyzer::Expr>> cast_vals;
410  for (auto p : value_exprs) {
411  cast_vals.push_back(p->add_cast(ti));
412  }
413  value_exprs.swap(cast_vals);
414  }
415  std::shared_ptr<Analyzer::Expr> result =
416  makeExpr<Analyzer::InValues>(arg_expr, value_exprs);
417  if (is_not) {
418  result = makeExpr<Analyzer::UOper>(kBOOLEAN, kNOT, result);
419  }
420  return result;
421 }
422 
423 std::shared_ptr<Analyzer::Expr> BetweenExpr::analyze(
424  const Catalog_Namespace::Catalog& catalog,
425  Analyzer::Query& query,
426  TlistRefType allow_tlist_ref) const {
427  auto arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
428  auto lower_expr = lower->analyze(catalog, query, allow_tlist_ref);
429  auto upper_expr = upper->analyze(catalog, query, allow_tlist_ref);
430  SQLTypeInfo new_left_type, new_right_type;
432  arg_expr->get_type_info(),
433  lower_expr->get_type_info(),
434  &new_left_type,
435  &new_right_type);
436  auto lower_pred =
437  makeExpr<Analyzer::BinOper>(kBOOLEAN,
438  kGE,
439  kONE,
440  arg_expr->add_cast(new_left_type)->decompress(),
441  lower_expr->add_cast(new_right_type)->decompress());
443  arg_expr->get_type_info(),
444  lower_expr->get_type_info(),
445  &new_left_type,
446  &new_right_type);
447  auto upper_pred = makeExpr<Analyzer::BinOper>(
448  kBOOLEAN,
449  kLE,
450  kONE,
451  arg_expr->deep_copy()->add_cast(new_left_type)->decompress(),
452  upper_expr->add_cast(new_right_type)->decompress());
453  std::shared_ptr<Analyzer::Expr> result =
454  makeExpr<Analyzer::BinOper>(kBOOLEAN, kAND, kONE, lower_pred, upper_pred);
455  if (is_not) {
456  result = makeExpr<Analyzer::UOper>(kBOOLEAN, kNOT, result);
457  }
458  return result;
459 }
460 
461 std::shared_ptr<Analyzer::Expr> CharLengthExpr::analyze(
462  const Catalog_Namespace::Catalog& catalog,
463  Analyzer::Query& query,
464  TlistRefType allow_tlist_ref) const {
465  auto arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
466  if (!arg_expr->get_type_info().is_string()) {
467  throw std::runtime_error(
468  "expression in char_length clause must be of a string type.");
469  }
470  std::shared_ptr<Analyzer::Expr> result =
471  makeExpr<Analyzer::CharLengthExpr>(arg_expr->decompress(), calc_encoded_length);
472  return result;
473 }
474 
475 std::shared_ptr<Analyzer::Expr> CardinalityExpr::analyze(
476  const Catalog_Namespace::Catalog& catalog,
477  Analyzer::Query& query,
478  TlistRefType allow_tlist_ref) const {
479  auto arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
480  if (!arg_expr->get_type_info().is_array()) {
481  throw std::runtime_error(
482  "expression in cardinality clause must be of an array type.");
483  }
484  std::shared_ptr<Analyzer::Expr> result =
485  makeExpr<Analyzer::CardinalityExpr>(arg_expr->decompress());
486  return result;
487 }
488 
489 void LikeExpr::check_like_expr(const std::string& like_str, char escape_char) {
490  if (like_str.back() == escape_char) {
491  throw std::runtime_error("LIKE pattern must not end with escape character.");
492  }
493 }
494 
495 bool LikeExpr::test_is_simple_expr(const std::string& like_str, char escape_char) {
496  // if not bounded by '%' then not a simple string
497  if (like_str.size() < 2 || like_str[0] != '%' || like_str[like_str.size() - 1] != '%') {
498  return false;
499  }
500  // if the last '%' is escaped then not a simple string
501  if (like_str[like_str.size() - 2] == escape_char &&
502  like_str[like_str.size() - 3] != escape_char) {
503  return false;
504  }
505  for (size_t i = 1; i < like_str.size() - 1; i++) {
506  if (like_str[i] == '%' || like_str[i] == '_' || like_str[i] == '[' ||
507  like_str[i] == ']') {
508  if (like_str[i - 1] != escape_char) {
509  return false;
510  }
511  }
512  }
513  return true;
514 }
515 
516 void LikeExpr::erase_cntl_chars(std::string& like_str, char escape_char) {
517  char prev_char = '\0';
518  // easier to create new string of allowable chars
519  // rather than erase chars from
520  // existing string
521  std::string new_str;
522  for (char& cur_char : like_str) {
523  if (cur_char == '%' || cur_char == escape_char) {
524  if (prev_char != escape_char) {
525  prev_char = cur_char;
526  continue;
527  }
528  }
529  new_str.push_back(cur_char);
530  prev_char = cur_char;
531  }
532  like_str = new_str;
533 }
534 
535 std::shared_ptr<Analyzer::Expr> LikeExpr::analyze(
536  const Catalog_Namespace::Catalog& catalog,
537  Analyzer::Query& query,
538  TlistRefType allow_tlist_ref) const {
539  auto arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
540  auto like_expr = like_string->analyze(catalog, query, allow_tlist_ref);
541  auto escape_expr = escape_string == nullptr
542  ? nullptr
543  : escape_string->analyze(catalog, query, allow_tlist_ref);
544  return LikeExpr::get(arg_expr, like_expr, escape_expr, is_ilike, is_not);
545 }
546 
547 std::shared_ptr<Analyzer::Expr> LikeExpr::get(std::shared_ptr<Analyzer::Expr> arg_expr,
548  std::shared_ptr<Analyzer::Expr> like_expr,
549  std::shared_ptr<Analyzer::Expr> escape_expr,
550  const bool is_ilike,
551  const bool is_not) {
552  if (!arg_expr->get_type_info().is_string()) {
553  throw std::runtime_error("expression before LIKE must be of a string type.");
554  }
555  if (!like_expr->get_type_info().is_string()) {
556  throw std::runtime_error("expression after LIKE must be of a string type.");
557  }
558  char escape_char = '\\';
559  if (escape_expr != nullptr) {
560  if (!escape_expr->get_type_info().is_string()) {
561  throw std::runtime_error("expression after ESCAPE must be of a string type.");
562  }
563  if (!escape_expr->get_type_info().is_string()) {
564  throw std::runtime_error("expression after ESCAPE must be of a string type.");
565  }
566  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(escape_expr);
567  if (c != nullptr && c->get_constval().stringval->length() > 1) {
568  throw std::runtime_error("String after ESCAPE must have a single character.");
569  }
570  escape_char = (*c->get_constval().stringval)[0];
571  }
572  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(like_expr);
573  bool is_simple = false;
574  if (c != nullptr) {
575  std::string& pattern = *c->get_constval().stringval;
576  if (is_ilike) {
577  std::transform(pattern.begin(), pattern.end(), pattern.begin(), ::tolower);
578  }
579  check_like_expr(pattern, escape_char);
580  is_simple = test_is_simple_expr(pattern, escape_char);
581  if (is_simple) {
582  erase_cntl_chars(pattern, escape_char);
583  }
584  }
585  std::shared_ptr<Analyzer::Expr> result = makeExpr<Analyzer::LikeExpr>(
586  arg_expr->decompress(), like_expr, escape_expr, is_ilike, is_simple);
587  if (is_not) {
588  result = makeExpr<Analyzer::UOper>(kBOOLEAN, kNOT, result);
589  }
590  return result;
591 }
592 
593 void RegexpExpr::check_pattern_expr(const std::string& pattern_str, char escape_char) {
594  if (pattern_str.back() == escape_char) {
595  throw std::runtime_error("REGEXP pattern must not end with escape character.");
596  }
597 }
598 
599 bool RegexpExpr::translate_to_like_pattern(std::string& pattern_str, char escape_char) {
600  char prev_char = '\0';
601  char prev_prev_char = '\0';
602  std::string like_str;
603  for (char& cur_char : pattern_str) {
604  if (prev_char == escape_char || isalnum(cur_char) || cur_char == ' ' ||
605  cur_char == '.') {
606  like_str.push_back((cur_char == '.') ? '_' : cur_char);
607  prev_prev_char = prev_char;
608  prev_char = cur_char;
609  continue;
610  }
611  if (prev_char == '.' && prev_prev_char != escape_char) {
612  if (cur_char == '*' || cur_char == '+') {
613  if (cur_char == '*') {
614  like_str.pop_back();
615  }
616  // .* --> %
617  // .+ --> _%
618  like_str.push_back('%');
619  prev_prev_char = prev_char;
620  prev_char = cur_char;
621  continue;
622  }
623  }
624  return false;
625  }
626  pattern_str = like_str;
627  return true;
628 }
629 
630 std::shared_ptr<Analyzer::Expr> RegexpExpr::analyze(
631  const Catalog_Namespace::Catalog& catalog,
632  Analyzer::Query& query,
633  TlistRefType allow_tlist_ref) const {
634  auto arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
635  auto pattern_expr = pattern_string->analyze(catalog, query, allow_tlist_ref);
636  auto escape_expr = escape_string == nullptr
637  ? nullptr
638  : escape_string->analyze(catalog, query, allow_tlist_ref);
639  return RegexpExpr::get(arg_expr, pattern_expr, escape_expr, is_not);
640 }
641 
642 std::shared_ptr<Analyzer::Expr> RegexpExpr::get(
643  std::shared_ptr<Analyzer::Expr> arg_expr,
644  std::shared_ptr<Analyzer::Expr> pattern_expr,
645  std::shared_ptr<Analyzer::Expr> escape_expr,
646  const bool is_not) {
647  if (!arg_expr->get_type_info().is_string()) {
648  throw std::runtime_error("expression before REGEXP must be of a string type.");
649  }
650  if (!pattern_expr->get_type_info().is_string()) {
651  throw std::runtime_error("expression after REGEXP must be of a string type.");
652  }
653  char escape_char = '\\';
654  if (escape_expr != nullptr) {
655  if (!escape_expr->get_type_info().is_string()) {
656  throw std::runtime_error("expression after ESCAPE must be of a string type.");
657  }
658  if (!escape_expr->get_type_info().is_string()) {
659  throw std::runtime_error("expression after ESCAPE must be of a string type.");
660  }
661  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(escape_expr);
662  if (c != nullptr && c->get_constval().stringval->length() > 1) {
663  throw std::runtime_error("String after ESCAPE must have a single character.");
664  }
665  escape_char = (*c->get_constval().stringval)[0];
666  if (escape_char != '\\') {
667  throw std::runtime_error("Only supporting '\\' escape character.");
668  }
669  }
670  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(pattern_expr);
671  if (c != nullptr) {
672  std::string& pattern = *c->get_constval().stringval;
673  if (translate_to_like_pattern(pattern, escape_char)) {
674  return LikeExpr::get(arg_expr, pattern_expr, escape_expr, false, is_not);
675  }
676  }
677  std::shared_ptr<Analyzer::Expr> result =
678  makeExpr<Analyzer::RegexpExpr>(arg_expr->decompress(), pattern_expr, escape_expr);
679  if (is_not) {
680  result = makeExpr<Analyzer::UOper>(kBOOLEAN, kNOT, result);
681  }
682  return result;
683 }
684 
685 std::shared_ptr<Analyzer::Expr> LikelihoodExpr::analyze(
686  const Catalog_Namespace::Catalog& catalog,
687  Analyzer::Query& query,
688  TlistRefType allow_tlist_ref) const {
689  auto arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
690  return LikelihoodExpr::get(arg_expr, likelihood, is_not);
691 }
692 
693 std::shared_ptr<Analyzer::Expr> LikelihoodExpr::get(
694  std::shared_ptr<Analyzer::Expr> arg_expr,
695  float likelihood,
696  const bool is_not) {
697  if (!arg_expr->get_type_info().is_boolean()) {
698  throw std::runtime_error("likelihood expression expects boolean type.");
699  }
700  std::shared_ptr<Analyzer::Expr> result = makeExpr<Analyzer::LikelihoodExpr>(
701  arg_expr->decompress(), is_not ? 1 - likelihood : likelihood);
702  return result;
703 }
704 
705 std::shared_ptr<Analyzer::Expr> ExistsExpr::analyze(
706  const Catalog_Namespace::Catalog& catalog,
707  Analyzer::Query& query,
708  TlistRefType allow_tlist_ref) const {
709  throw std::runtime_error("Subqueries are not supported yet.");
710  return nullptr;
711 }
712 
713 std::shared_ptr<Analyzer::Expr> ColumnRef::analyze(
714  const Catalog_Namespace::Catalog& catalog,
715  Analyzer::Query& query,
716  TlistRefType allow_tlist_ref) const {
717  int table_id{0};
718  int rte_idx{0};
719  const ColumnDescriptor* cd{nullptr};
720  if (column == nullptr) {
721  throw std::runtime_error("invalid column name *.");
722  }
723  if (table != nullptr) {
724  rte_idx = query.get_rte_idx(*table);
725  if (rte_idx < 0) {
726  throw std::runtime_error("range variable or table name " + *table +
727  " does not exist.");
728  }
729  Analyzer::RangeTableEntry* rte = query.get_rte(rte_idx);
730  cd = rte->get_column_desc(catalog, *column);
731  if (cd == nullptr) {
732  throw std::runtime_error("Column name " + *column + " does not exist.");
733  }
734  table_id = rte->get_table_id();
735  } else {
736  bool found = false;
737  int i = 0;
738  for (auto rte : query.get_rangetable()) {
739  cd = rte->get_column_desc(catalog, *column);
740  if (cd != nullptr && !found) {
741  found = true;
742  rte_idx = i;
743  table_id = rte->get_table_id();
744  } else if (cd != nullptr && found) {
745  throw std::runtime_error("Column name " + *column + " is ambiguous.");
746  }
747  i++;
748  }
749  if (cd == nullptr && allow_tlist_ref != TlistRefType::TLIST_NONE) {
750  // check if this is a reference to a targetlist entry
751  bool found = false;
752  int varno = -1;
753  int i = 1;
754  std::shared_ptr<Analyzer::TargetEntry> tle;
755  for (auto p : query.get_targetlist()) {
756  if (*column == p->get_resname() && !found) {
757  found = true;
758  varno = i;
759  tle = p;
760  } else if (*column == p->get_resname() && found) {
761  throw std::runtime_error("Output alias " + *column + " is ambiguous.");
762  }
763  i++;
764  }
765  if (found) {
766  if (dynamic_cast<Analyzer::Var*>(tle->get_expr())) {
767  Analyzer::Var* v = static_cast<Analyzer::Var*>(tle->get_expr());
769  return v->deep_copy();
770  }
771  }
772  if (allow_tlist_ref == TlistRefType::TLIST_COPY) {
773  return tle->get_expr()->deep_copy();
774  } else {
775  return makeExpr<Analyzer::Var>(
776  tle->get_expr()->get_type_info(), Analyzer::Var::kOUTPUT, varno);
777  }
778  }
779  }
780  if (cd == nullptr) {
781  throw std::runtime_error("Column name " + *column + " does not exist.");
782  }
783  }
784  return makeExpr<Analyzer::ColumnVar>(cd->columnType, table_id, cd->columnId, rte_idx);
785 }
786 
787 std::shared_ptr<Analyzer::Expr> FunctionRef::analyze(
788  const Catalog_Namespace::Catalog& catalog,
789  Analyzer::Query& query,
790  TlistRefType allow_tlist_ref) const {
791  SQLTypeInfo result_type;
792  SQLAgg agg_type;
793  std::shared_ptr<Analyzer::Expr> arg_expr;
794  bool is_distinct = false;
795  if (boost::iequals(*name, "count")) {
796  result_type = SQLTypeInfo(kBIGINT, false);
797  agg_type = kCOUNT;
798  if (arg) {
799  arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
800  const SQLTypeInfo& ti = arg_expr->get_type_info();
801  if (ti.is_string() && (ti.get_compression() != kENCODING_DICT || !distinct)) {
802  throw std::runtime_error(
803  "Strings must be dictionary-encoded in COUNT(DISTINCT).");
804  }
805  if (ti.get_type() == kARRAY && !distinct) {
806  throw std::runtime_error("Only COUNT(DISTINCT) is supported on arrays.");
807  }
808  }
809  is_distinct = distinct;
810  } else {
811  if (!arg) {
812  throw std::runtime_error("Cannot compute " + *name + " with argument '*'.");
813  }
814  if (boost::iequals(*name, "min")) {
815  agg_type = kMIN;
816  arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
817  arg_expr = arg_expr->decompress();
818  result_type = arg_expr->get_type_info();
819  } else if (boost::iequals(*name, "max")) {
820  agg_type = kMAX;
821  arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
822  arg_expr = arg_expr->decompress();
823  result_type = arg_expr->get_type_info();
824  } else if (boost::iequals(*name, "avg")) {
825  agg_type = kAVG;
826  arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
827  if (!arg_expr->get_type_info().is_number()) {
828  throw std::runtime_error("Cannot compute AVG on non-number-type arguments.");
829  }
830  arg_expr = arg_expr->decompress();
831  result_type = SQLTypeInfo(kDOUBLE, false);
832  } else if (boost::iequals(*name, "sum")) {
833  agg_type = kSUM;
834  arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
835  if (!arg_expr->get_type_info().is_number()) {
836  throw std::runtime_error("Cannot compute SUM on non-number-type arguments.");
837  }
838  arg_expr = arg_expr->decompress();
839  result_type = arg_expr->get_type_info().is_integer() ? SQLTypeInfo(kBIGINT, false)
840  : arg_expr->get_type_info();
841  } else if (boost::iequals(*name, "unnest")) {
842  arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
843  const SQLTypeInfo& arg_ti = arg_expr->get_type_info();
844  if (arg_ti.get_type() != kARRAY) {
845  throw std::runtime_error(arg->to_string() + " is not of array type.");
846  }
847  return makeExpr<Analyzer::UOper>(arg_ti.get_elem_type(), false, kUNNEST, arg_expr);
848  } else {
849  throw std::runtime_error("invalid function name: " + *name);
850  }
851  if (arg_expr->get_type_info().is_string() ||
852  arg_expr->get_type_info().get_type() == kARRAY) {
853  throw std::runtime_error(
854  "Only COUNT(DISTINCT ) aggregate is supported on strings and arrays.");
855  }
856  }
857  int naggs = query.get_num_aggs();
858  query.set_num_aggs(naggs + 1);
859  return makeExpr<Analyzer::AggExpr>(
860  result_type, agg_type, arg_expr, is_distinct, nullptr);
861 }
862 
863 std::shared_ptr<Analyzer::Expr> CastExpr::analyze(
864  const Catalog_Namespace::Catalog& catalog,
865  Analyzer::Query& query,
866  TlistRefType allow_tlist_ref) const {
867  target_type->check_type();
868  auto arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
869  SQLTypeInfo ti(target_type->get_type(),
870  target_type->get_param1(),
871  target_type->get_param2(),
872  arg_expr->get_type_info().get_notnull());
873  if (arg_expr->get_type_info().get_type() != target_type->get_type() &&
874  arg_expr->get_type_info().get_compression() != kENCODING_NONE) {
875  arg_expr->decompress();
876  }
877  return arg_expr->add_cast(ti);
878 }
879 
880 std::shared_ptr<Analyzer::Expr> CaseExpr::analyze(
881  const Catalog_Namespace::Catalog& catalog,
882  Analyzer::Query& query,
883  TlistRefType allow_tlist_ref) const {
884  SQLTypeInfo ti;
885  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
886  expr_pair_list;
887  for (auto& p : when_then_list) {
888  auto e1 = p->get_expr1()->analyze(catalog, query, allow_tlist_ref);
889  if (e1->get_type_info().get_type() != kBOOLEAN) {
890  throw std::runtime_error("Only boolean expressions can be used after WHEN.");
891  }
892  auto e2 = p->get_expr2()->analyze(catalog, query, allow_tlist_ref);
893  expr_pair_list.emplace_back(e1, e2);
894  }
895  auto else_e = else_expr ? else_expr->analyze(catalog, query, allow_tlist_ref) : nullptr;
896  return normalize(expr_pair_list, else_e);
897 }
898 
899 namespace {
900 
901 bool expr_is_null(const Analyzer::Expr* expr) {
902  if (expr->get_type_info().get_type() == kNULLT) {
903  return true;
904  }
905  const auto const_expr = dynamic_cast<const Analyzer::Constant*>(expr);
906  return const_expr && const_expr->get_is_null();
907 }
908 
910  const std::string* s = str_literal->get_stringval();
911  if (*s == "t" || *s == "true" || *s == "T" || *s == "True") {
912  return true;
913  } else if (*s == "f" || *s == "false" || *s == "F" || *s == "False") {
914  return false;
915  } else {
916  throw std::runtime_error("Invalid string for boolean " + *s);
917  }
918 }
919 
920 } // namespace
921 
922 std::shared_ptr<Analyzer::Expr> CaseExpr::normalize(
923  const std::list<std::pair<std::shared_ptr<Analyzer::Expr>,
924  std::shared_ptr<Analyzer::Expr>>>& expr_pair_list,
925  const std::shared_ptr<Analyzer::Expr> else_e_in) {
926  SQLTypeInfo ti;
927  bool has_agg = false;
928  for (auto& p : expr_pair_list) {
929  auto e1 = p.first;
930  CHECK(e1->get_type_info().is_boolean());
931  auto e2 = p.second;
932  if (ti.get_type() == kNULLT) {
933  ti = e2->get_type_info();
934  } else if (e2->get_type_info().get_type() == kNULLT) {
935  ti.set_notnull(false);
936  e2->set_type_info(ti);
937  } else if (ti != e2->get_type_info()) {
938  if (ti.is_string() && e2->get_type_info().is_string()) {
939  ti = Analyzer::BinOper::common_string_type(ti, e2->get_type_info());
940  } else if (ti.is_number() && e2->get_type_info().is_number()) {
941  ti = Analyzer::BinOper::common_numeric_type(ti, e2->get_type_info());
942  } else if (ti.is_boolean() && e2->get_type_info().is_boolean()) {
943  ti = Analyzer::BinOper::common_numeric_type(ti, e2->get_type_info());
944  } else {
945  throw std::runtime_error(
946  "expressions in THEN clause must be of the same or compatible types.");
947  }
948  }
949  if (e2->get_contains_agg()) {
950  has_agg = true;
951  }
952  }
953  auto else_e = else_e_in;
954  if (else_e) {
955  if (else_e->get_contains_agg()) {
956  has_agg = true;
957  }
958  if (expr_is_null(else_e.get())) {
959  ti.set_notnull(false);
960  else_e->set_type_info(ti);
961  } else if (ti != else_e->get_type_info()) {
962  ti.set_notnull(false);
963  if (ti.is_string() && else_e->get_type_info().is_string()) {
964  ti = Analyzer::BinOper::common_string_type(ti, else_e->get_type_info());
965  } else if (ti.is_number() && else_e->get_type_info().is_number()) {
966  ti = Analyzer::BinOper::common_numeric_type(ti, else_e->get_type_info());
967  } else if (ti.is_boolean() && else_e->get_type_info().is_boolean()) {
968  ti = Analyzer::BinOper::common_numeric_type(ti, else_e->get_type_info());
969  } else if (get_logical_type_info(ti) !=
970  get_logical_type_info(else_e->get_type_info())) {
971  throw std::runtime_error(
972  // types differing by encoding will be resolved at decode
973 
974  "expressions in ELSE clause must be of the same or compatible types as those "
975  "in the THEN clauses.");
976  }
977  }
978  }
979  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
980  cast_expr_pair_list;
981  for (auto p : expr_pair_list) {
982  ti.set_notnull(false);
983  cast_expr_pair_list.emplace_back(p.first, p.second->add_cast(ti));
984  }
985  if (else_e != nullptr) {
986  else_e = else_e->add_cast(ti);
987  } else {
988  Datum d;
989  // always create an else expr so that executor doesn't need to worry about it
990  ti.set_notnull(false);
991  else_e = makeExpr<Analyzer::Constant>(ti, true, d);
992  }
993  if (ti.get_type() == kNULLT) {
994  throw std::runtime_error(
995  "Can't deduce the type for case expressions, all branches null");
996  }
997  return makeExpr<Analyzer::CaseExpr>(ti, has_agg, cast_expr_pair_list, else_e);
998 }
999 
1000 std::string CaseExpr::to_string() const {
1001  std::string str("CASE ");
1002  for (auto& p : when_then_list) {
1003  str += "WHEN " + p->get_expr1()->to_string() + " THEN " +
1004  p->get_expr2()->to_string() + " ";
1005  }
1006  if (else_expr != nullptr) {
1007  str += "ELSE " + else_expr->to_string();
1008  }
1009  str += " END";
1010  return str;
1011 }
1012 
1013 void UnionQuery::analyze(const Catalog_Namespace::Catalog& catalog,
1014  Analyzer::Query& query) const {
1015  left->analyze(catalog, query);
1016  Analyzer::Query* right_query = new Analyzer::Query();
1017  right->analyze(catalog, *right_query);
1018  query.set_next_query(right_query);
1019  query.set_is_unionall(is_unionall);
1020 }
1021 
1022 void QuerySpec::analyze_having_clause(const Catalog_Namespace::Catalog& catalog,
1023  Analyzer::Query& query) const {
1024  std::shared_ptr<Analyzer::Expr> p;
1025  if (having_clause != nullptr) {
1026  p = having_clause->analyze(catalog, query, Expr::TlistRefType::TLIST_COPY);
1027  if (p->get_type_info().get_type() != kBOOLEAN) {
1028  throw std::runtime_error("Only boolean expressions can be in HAVING clause.");
1029  }
1030  p->check_group_by(query.get_group_by());
1031  }
1032  query.set_having_predicate(p);
1033 }
1034 
1035 void QuerySpec::analyze_group_by(const Catalog_Namespace::Catalog& catalog,
1036  Analyzer::Query& query) const {
1037  std::list<std::shared_ptr<Analyzer::Expr>> groupby;
1038  if (!groupby_clause.empty()) {
1039  int gexpr_no = 1;
1040  std::shared_ptr<Analyzer::Expr> gexpr;
1041  const std::vector<std::shared_ptr<Analyzer::TargetEntry>>& tlist =
1042  query.get_targetlist();
1043  for (auto& c : groupby_clause) {
1044  // special-case ordinal numbers in GROUP BY
1045  if (dynamic_cast<Literal*>(c.get())) {
1046  IntLiteral* i = dynamic_cast<IntLiteral*>(c.get());
1047  if (!i) {
1048  throw std::runtime_error("Invalid literal in GROUP BY clause.");
1049  }
1050  int varno = (int)i->get_intval();
1051  if (varno <= 0 || varno > static_cast<int>(tlist.size())) {
1052  throw std::runtime_error("Invalid ordinal number in GROUP BY clause.");
1053  }
1054  if (tlist[varno - 1]->get_expr()->get_contains_agg()) {
1055  throw std::runtime_error(
1056  "Ordinal number in GROUP BY cannot reference an expression containing "
1057  "aggregate "
1058  "functions.");
1059  }
1060  gexpr = makeExpr<Analyzer::Var>(
1061  tlist[varno - 1]->get_expr()->get_type_info(), Analyzer::Var::kOUTPUT, varno);
1062  } else {
1063  gexpr = c->analyze(catalog, query, Expr::TlistRefType::TLIST_REF);
1064  }
1065  const SQLTypeInfo gti = gexpr->get_type_info();
1066  bool set_new_type = false;
1067  SQLTypeInfo ti(gti);
1068  if (gti.is_string() && gti.get_compression() == kENCODING_NONE) {
1069  set_new_type = true;
1072  ti.set_fixed_size();
1073  }
1074  std::shared_ptr<Analyzer::Var> v;
1075  if (std::dynamic_pointer_cast<Analyzer::Var>(gexpr)) {
1076  v = std::static_pointer_cast<Analyzer::Var>(gexpr);
1077  int n = v->get_varno();
1078  gexpr = tlist[n - 1]->get_own_expr();
1079  auto cv = std::dynamic_pointer_cast<Analyzer::ColumnVar>(gexpr);
1080  if (cv != nullptr) {
1081  // inherit all ColumnVar info for lineage.
1082  *std::static_pointer_cast<Analyzer::ColumnVar>(v) = *cv;
1083  }
1084  v->set_which_row(Analyzer::Var::kGROUPBY);
1085  v->set_varno(gexpr_no);
1086  tlist[n - 1]->set_expr(v);
1087  }
1088  if (set_new_type) {
1089  auto new_e = gexpr->add_cast(ti);
1090  groupby.push_back(new_e);
1091  if (v != nullptr) {
1092  v->set_type_info(new_e->get_type_info());
1093  }
1094  } else {
1095  groupby.push_back(gexpr);
1096  }
1097  gexpr_no++;
1098  }
1099  }
1100  if (query.get_num_aggs() > 0 || !groupby.empty()) {
1101  for (auto t : query.get_targetlist()) {
1102  auto e = t->get_expr();
1103  e->check_group_by(groupby);
1104  }
1105  }
1106  query.set_group_by(groupby);
1107 }
1108 
1109 void QuerySpec::analyze_where_clause(const Catalog_Namespace::Catalog& catalog,
1110  Analyzer::Query& query) const {
1111  if (where_clause == nullptr) {
1112  query.set_where_predicate(nullptr);
1113  return;
1114  }
1115  auto p = where_clause->analyze(catalog, query, Expr::TlistRefType::TLIST_COPY);
1116  if (p->get_type_info().get_type() != kBOOLEAN) {
1117  throw std::runtime_error("Only boolean expressions can be in WHERE clause.");
1118  }
1119  query.set_where_predicate(p);
1120 }
1121 
1122 void QuerySpec::analyze_select_clause(const Catalog_Namespace::Catalog& catalog,
1123  Analyzer::Query& query) const {
1124  std::vector<std::shared_ptr<Analyzer::TargetEntry>>& tlist =
1125  query.get_targetlist_nonconst();
1126  if (select_clause.empty()) {
1127  // this means SELECT *
1128  int rte_idx = 0;
1129  for (auto rte : query.get_rangetable()) {
1130  rte->expand_star_in_targetlist(catalog, tlist, rte_idx++);
1131  }
1132  } else {
1133  for (auto& p : select_clause) {
1134  const Parser::Expr* select_expr = p->get_select_expr();
1135  // look for the case of range_var.*
1136  if (typeid(*select_expr) == typeid(ColumnRef) &&
1137  dynamic_cast<const ColumnRef*>(select_expr)->get_column() == nullptr) {
1138  const std::string* range_var_name =
1139  dynamic_cast<const ColumnRef*>(select_expr)->get_table();
1140  int rte_idx = query.get_rte_idx(*range_var_name);
1141  if (rte_idx < 0) {
1142  throw std::runtime_error("invalid range variable name: " + *range_var_name);
1143  }
1144  Analyzer::RangeTableEntry* rte = query.get_rte(rte_idx);
1145  rte->expand_star_in_targetlist(catalog, tlist, rte_idx);
1146  } else {
1147  auto e = select_expr->analyze(catalog, query);
1148  std::string resname;
1149 
1150  if (p->get_alias() != nullptr) {
1151  resname = *p->get_alias();
1152  } else if (std::dynamic_pointer_cast<Analyzer::ColumnVar>(e) &&
1153  !std::dynamic_pointer_cast<Analyzer::Var>(e)) {
1154  auto colvar = std::static_pointer_cast<Analyzer::ColumnVar>(e);
1155  const ColumnDescriptor* col_desc = catalog.getMetadataForColumn(
1156  colvar->get_table_id(), colvar->get_column_id());
1157  resname = col_desc->columnName;
1158  }
1159  if (e->get_type_info().get_type() == kNULLT) {
1160  throw std::runtime_error(
1161  "Untyped NULL in SELECT clause. Use CAST to specify a type.");
1162  }
1163  auto o = std::static_pointer_cast<Analyzer::UOper>(e);
1164  bool unnest = (o != nullptr && o->get_optype() == kUNNEST);
1165  auto tle = std::make_shared<Analyzer::TargetEntry>(resname, e, unnest);
1166  tlist.push_back(tle);
1167  }
1168  }
1169  }
1170 }
1171 
1172 void QuerySpec::analyze_from_clause(const Catalog_Namespace::Catalog& catalog,
1173  Analyzer::Query& query) const {
1175  for (auto& p : from_clause) {
1176  const TableDescriptor* table_desc;
1177  table_desc = catalog.getMetadataForTable(*p->get_table_name());
1178  if (table_desc == nullptr) {
1179  throw std::runtime_error("Table " + *p->get_table_name() + " does not exist.");
1180  }
1181  std::string range_var;
1182  if (p->get_range_var() == nullptr) {
1183  range_var = *p->get_table_name();
1184  } else {
1185  range_var = *p->get_range_var();
1186  }
1187  rte = new Analyzer::RangeTableEntry(range_var, table_desc, nullptr);
1188  query.add_rte(rte);
1189  }
1190 }
1191 
1192 void QuerySpec::analyze(const Catalog_Namespace::Catalog& catalog,
1193  Analyzer::Query& query) const {
1195  analyze_from_clause(catalog, query);
1196  analyze_select_clause(catalog, query);
1197  analyze_where_clause(catalog, query);
1198  analyze_group_by(catalog, query);
1199  analyze_having_clause(catalog, query);
1200 }
1201 
1202 void SelectStmt::analyze(const Catalog_Namespace::Catalog& catalog,
1203  Analyzer::Query& query) const {
1204  query.set_stmt_type(kSELECT);
1205  query.set_limit(limit);
1206  if (offset < 0) {
1207  throw std::runtime_error("OFFSET cannot be negative.");
1208  }
1209  query.set_offset(offset);
1210  query_expr->analyze(catalog, query);
1211  if (orderby_clause.empty() && !query.get_is_distinct()) {
1212  query.set_order_by(nullptr);
1213  return;
1214  }
1215  const std::vector<std::shared_ptr<Analyzer::TargetEntry>>& tlist =
1216  query.get_targetlist();
1217  std::list<Analyzer::OrderEntry>* order_by = new std::list<Analyzer::OrderEntry>();
1218  if (!orderby_clause.empty()) {
1219  for (auto& p : orderby_clause) {
1220  int tle_no = p->get_colno();
1221  if (tle_no == 0) {
1222  // use column name
1223  // search through targetlist for matching name
1224  const std::string* name = p->get_column()->get_column();
1225  tle_no = 1;
1226  bool found = false;
1227  for (auto tle : tlist) {
1228  if (tle->get_resname() == *name) {
1229  found = true;
1230  break;
1231  }
1232  tle_no++;
1233  }
1234  if (!found) {
1235  throw std::runtime_error("invalid name in order by: " + *name);
1236  }
1237  }
1238  order_by->push_back(
1239  Analyzer::OrderEntry(tle_no, p->get_is_desc(), p->get_nulls_first()));
1240  }
1241  }
1242  if (query.get_is_distinct()) {
1243  // extend order_by to include all targetlist entries.
1244  for (int i = 1; i <= static_cast<int>(tlist.size()); i++) {
1245  bool in_orderby = false;
1246  std::for_each(order_by->begin(),
1247  order_by->end(),
1248  [&in_orderby, i](const Analyzer::OrderEntry& oe) {
1249  in_orderby = in_orderby || (i == oe.tle_no);
1250  });
1251  if (!in_orderby) {
1252  order_by->push_back(Analyzer::OrderEntry(i, false, false));
1253  }
1254  }
1255  }
1256  query.set_order_by(order_by);
1257 }
1258 
1259 std::string SelectEntry::to_string() const {
1260  std::string str = select_expr->to_string();
1261  if (alias != nullptr) {
1262  str += " AS " + *alias;
1263  }
1264  return str;
1265 }
1266 
1267 std::string TableRef::to_string() const {
1268  std::string str = *table_name;
1269  if (range_var != nullptr) {
1270  str += " " + *range_var;
1271  }
1272  return str;
1273 }
1274 
1275 std::string ColumnRef::to_string() const {
1276  std::string str;
1277  if (table == nullptr) {
1278  str = *column;
1279  } else if (column == nullptr) {
1280  str = *table + ".*";
1281  } else {
1282  str = *table + "." + *column;
1283  }
1284  return str;
1285 }
1286 
1287 std::string OperExpr::to_string() const {
1288  std::string op_str[] = {
1289  "=", "===", "<>", "<", ">", "<=", ">=", " AND ", " OR ", "NOT", "-", "+", "*", "/"};
1290  std::string str;
1291  if (optype == kUMINUS) {
1292  str = "-(" + left->to_string() + ")";
1293  } else if (optype == kNOT) {
1294  str = "NOT (" + left->to_string() + ")";
1295  } else if (optype == kARRAY_AT) {
1296  str = left->to_string() + "[" + right->to_string() + "]";
1297  } else if (optype == kUNNEST) {
1298  str = "UNNEST(" + left->to_string() + ")";
1299  } else if (optype == kIN) {
1300  str = "(" + left->to_string() + " IN " + right->to_string() + ")";
1301  } else {
1302  str = "(" + left->to_string() + op_str[optype] + right->to_string() + ")";
1303  }
1304  return str;
1305 }
1306 
1307 std::string InExpr::to_string() const {
1308  std::string str = arg->to_string();
1309  if (is_not) {
1310  str += " NOT IN ";
1311  } else {
1312  str += " IN ";
1313  }
1314  return str;
1315 }
1316 
1317 std::string ExistsExpr::to_string() const {
1318  return "EXISTS (" + query->to_string() + ")";
1319 }
1320 
1321 std::string SubqueryExpr::to_string() const {
1322  std::string str;
1323  str = "(";
1324  str += query->to_string();
1325  str += ")";
1326  return str;
1327 }
1328 
1329 std::string IsNullExpr::to_string() const {
1330  std::string str = arg->to_string();
1331  if (is_not) {
1332  str += " IS NOT NULL";
1333  } else {
1334  str += " IS NULL";
1335  }
1336  return str;
1337 }
1338 
1339 std::string InSubquery::to_string() const {
1340  std::string str = InExpr::to_string();
1341  str += subquery->to_string();
1342  return str;
1343 }
1344 
1345 std::string InValues::to_string() const {
1346  std::string str = InExpr::to_string() + "(";
1347  bool notfirst = false;
1348  for (auto& p : value_list) {
1349  if (notfirst) {
1350  str += ", ";
1351  } else {
1352  notfirst = true;
1353  }
1354  str += p->to_string();
1355  }
1356  str += ")";
1357  return str;
1358 }
1359 
1360 std::string BetweenExpr::to_string() const {
1361  std::string str = arg->to_string();
1362  if (is_not) {
1363  str += " NOT BETWEEN ";
1364  } else {
1365  str += " BETWEEN ";
1366  }
1367  str += lower->to_string() + " AND " + upper->to_string();
1368  return str;
1369 }
1370 
1371 std::string CharLengthExpr::to_string() const {
1372  std::string str;
1373  if (calc_encoded_length) {
1374  str = "CHAR_LENGTH (" + arg->to_string() + ")";
1375  } else {
1376  str = "LENGTH (" + arg->to_string() + ")";
1377  }
1378  return str;
1379 }
1380 
1381 std::string CardinalityExpr::to_string() const {
1382  std::string str = "CARDINALITY(" + arg->to_string() + ")";
1383  return str;
1384 }
1385 
1386 std::string LikeExpr::to_string() const {
1387  std::string str = arg->to_string();
1388  if (is_not) {
1389  str += " NOT LIKE ";
1390  } else {
1391  str += " LIKE ";
1392  }
1393  str += like_string->to_string();
1394  if (escape_string != nullptr) {
1395  str += " ESCAPE " + escape_string->to_string();
1396  }
1397  return str;
1398 }
1399 
1400 std::string RegexpExpr::to_string() const {
1401  std::string str = arg->to_string();
1402  if (is_not) {
1403  str += " NOT REGEXP ";
1404  } else {
1405  str += " REGEXP ";
1406  }
1407  str += pattern_string->to_string();
1408  if (escape_string != nullptr) {
1409  str += " ESCAPE " + escape_string->to_string();
1410  }
1411  return str;
1412 }
1413 
1414 std::string LikelihoodExpr::to_string() const {
1415  std::string str = " LIKELIHOOD ";
1416  str += arg->to_string();
1417  str += " ";
1418  str += boost::lexical_cast<std::string>(is_not ? 1.0 - likelihood : likelihood);
1419  return str;
1420 }
1421 
1422 std::string FunctionRef::to_string() const {
1423  std::string str = *name + "(";
1424  if (distinct) {
1425  str += "DISTINCT ";
1426  }
1427  if (arg == nullptr) {
1428  str += "*)";
1429  } else {
1430  str += arg->to_string() + ")";
1431  }
1432  return str;
1433 }
1434 
1435 std::string QuerySpec::to_string() const {
1436  std::string query_str = "SELECT ";
1437  if (is_distinct) {
1438  query_str += "DISTINCT ";
1439  }
1440  if (select_clause.empty()) {
1441  query_str += "* ";
1442  } else {
1443  bool notfirst = false;
1444  for (auto& p : select_clause) {
1445  if (notfirst) {
1446  query_str += ", ";
1447  } else {
1448  notfirst = true;
1449  }
1450  query_str += p->to_string();
1451  }
1452  }
1453  query_str += " FROM ";
1454  bool notfirst = false;
1455  for (auto& p : from_clause) {
1456  if (notfirst) {
1457  query_str += ", ";
1458  } else {
1459  notfirst = true;
1460  }
1461  query_str += p->to_string();
1462  }
1463  if (where_clause) {
1464  query_str += " WHERE " + where_clause->to_string();
1465  }
1466  if (!groupby_clause.empty()) {
1467  query_str += " GROUP BY ";
1468  bool notfirst = false;
1469  for (auto& p : groupby_clause) {
1470  if (notfirst) {
1471  query_str += ", ";
1472  } else {
1473  notfirst = true;
1474  }
1475  query_str += p->to_string();
1476  }
1477  }
1478  if (having_clause) {
1479  query_str += " HAVING " + having_clause->to_string();
1480  }
1481  query_str += ";";
1482  return query_str;
1483 }
1484 
1485 void InsertStmt::analyze(const Catalog_Namespace::Catalog& catalog,
1486  Analyzer::Query& query) const {
1487  query.set_stmt_type(kINSERT);
1488  const TableDescriptor* td = catalog.getMetadataForTable(*table);
1489  if (td == nullptr) {
1490  throw std::runtime_error("Table " + *table + " does not exist.");
1491  }
1492  if (td->isView) {
1493  throw std::runtime_error("Insert to views is not supported yet.");
1494  }
1496  query.set_result_table_id(td->tableId);
1497  std::list<int> result_col_list;
1498  if (column_list.empty()) {
1499  const std::list<const ColumnDescriptor*> all_cols =
1500  catalog.getAllColumnMetadataForTable(td->tableId, false, false, true);
1501  for (auto cd : all_cols) {
1502  result_col_list.push_back(cd->columnId);
1503  }
1504  } else {
1505  for (auto& c : column_list) {
1506  const ColumnDescriptor* cd = catalog.getMetadataForColumn(td->tableId, *c);
1507  if (cd == nullptr) {
1508  throw std::runtime_error("Column " + *c + " does not exist.");
1509  }
1510  result_col_list.push_back(cd->columnId);
1511  const auto& col_ti = cd->columnType;
1512  if (col_ti.get_physical_cols() > 0) {
1513  CHECK(cd->columnType.is_geometry());
1514  for (auto i = 1; i <= col_ti.get_physical_cols(); i++) {
1515  const ColumnDescriptor* pcd =
1516  catalog.getMetadataForColumn(td->tableId, cd->columnId + i);
1517  if (pcd == nullptr) {
1518  throw std::runtime_error("Column " + *c + "'s metadata is incomplete.");
1519  }
1520  result_col_list.push_back(pcd->columnId);
1521  }
1522  }
1523  }
1524  if (catalog.getAllColumnMetadataForTable(td->tableId, false, false, true).size() !=
1525  result_col_list.size()) {
1526  throw std::runtime_error("Insert into a subset of columns is not supported yet.");
1527  }
1528  }
1529  query.set_result_col_list(result_col_list);
1530 }
1531 
1532 size_t InsertValuesStmt::determineLeafIndex(const Catalog_Namespace::Catalog& catalog,
1533  size_t num_leafs) {
1534  const TableDescriptor* td = catalog.getMetadataForTable(*table);
1535  if (td == nullptr) {
1536  throw std::runtime_error("Table " + *table + " does not exist.");
1537  }
1538  if (td->isView) {
1539  throw std::runtime_error("Insert to views is not supported yet.");
1540  }
1541 
1542  if (td->partitions == "REPLICATED") {
1543  throw std::runtime_error("Cannot determine leaf on replicated table.");
1544  }
1545 
1546  if (0 == td->nShards) {
1547  std::random_device rd;
1548  std::mt19937_64 gen(rd());
1549  std::uniform_int_distribution<size_t> dis;
1550  const auto leaf_idx = dis(gen) % num_leafs;
1551  return leaf_idx;
1552  }
1553 
1554  size_t indexOfShardColumn = 0;
1555  const ColumnDescriptor* shardColumn = catalog.getShardColumnMetadataForTable(td);
1556  CHECK(shardColumn);
1557 
1558  if (column_list.empty()) {
1559  auto all_cols =
1560  catalog.getAllColumnMetadataForTable(td->tableId, false, false, false);
1561  auto iter = std::find(all_cols.begin(), all_cols.end(), shardColumn);
1562  CHECK(iter != all_cols.end());
1563  indexOfShardColumn = std::distance(all_cols.begin(), iter);
1564  } else {
1565  for (auto& c : column_list) {
1566  if (*c == shardColumn->columnName) {
1567  break;
1568  }
1569  indexOfShardColumn++;
1570  }
1571 
1572  if (indexOfShardColumn == column_list.size()) {
1573  throw std::runtime_error("No value defined for shard column.");
1574  }
1575  }
1576 
1577  if (indexOfShardColumn >= value_list.size()) {
1578  throw std::runtime_error("No value defined for shard column.");
1579  }
1580 
1581  auto& shardColumnValueExpr = *(std::next(value_list.begin(), indexOfShardColumn));
1582 
1583  Analyzer::Query query;
1584  auto e = shardColumnValueExpr->analyze(catalog, query);
1585  e = e->add_cast(shardColumn->columnType);
1586  const Analyzer::Constant* con = dynamic_cast<Analyzer::Constant*>(e.get());
1587  if (!con) {
1588  auto col_cast = dynamic_cast<const Analyzer::UOper*>(e.get());
1589  CHECK(col_cast);
1590  CHECK_EQ(kCAST, col_cast->get_optype());
1591  con = dynamic_cast<const Analyzer::Constant*>(col_cast->get_operand());
1592  }
1593  CHECK(con);
1594 
1595  Datum d = con->get_constval();
1596 
1597  auto shard_count = td->nShards * num_leafs;
1598  int64_t shardId = 0;
1599 
1600  if (con->get_is_null()) {
1602  shard_count);
1603  } else if (shardColumn->columnType.is_string()) {
1604  auto dictDesc =
1605  catalog.getMetadataForDict(shardColumn->columnType.get_comp_param(), true);
1606  auto str_id = dictDesc->stringDict->getOrAdd(*d.stringval);
1607  bool invalid = false;
1608 
1609  if (4 == shardColumn->columnType.get_size()) {
1610  invalid = str_id > max_valid_int_value<int32_t>();
1611  } else if (2 == shardColumn->columnType.get_size()) {
1612  invalid = str_id > max_valid_int_value<uint16_t>();
1613  } else if (1 == shardColumn->columnType.get_size()) {
1614  invalid = str_id > max_valid_int_value<uint8_t>();
1615  }
1616 
1617  if (invalid || str_id == inline_int_null_value<int32_t>()) {
1618  str_id = inline_fixed_encoding_null_val(shardColumn->columnType);
1619  }
1620  shardId = SHARD_FOR_KEY(str_id, shard_count);
1621  } else {
1622  switch (shardColumn->columnType.get_logical_size()) {
1623  case 8:
1624  shardId = SHARD_FOR_KEY(d.bigintval, shard_count);
1625  break;
1626  case 4:
1627  shardId = SHARD_FOR_KEY(d.intval, shard_count);
1628  break;
1629  case 2:
1630  shardId = SHARD_FOR_KEY(d.smallintval, shard_count);
1631  break;
1632  case 1:
1633  shardId = SHARD_FOR_KEY(d.tinyintval, shard_count);
1634  break;
1635  default:
1636  CHECK(false);
1637  }
1638  }
1639 
1640  return shardId / td->nShards;
1641 }
1642 
1643 void InsertValuesStmt::analyze(const Catalog_Namespace::Catalog& catalog,
1644  Analyzer::Query& query) const {
1645  InsertStmt::analyze(catalog, query);
1646  std::vector<std::shared_ptr<Analyzer::TargetEntry>>& tlist =
1647  query.get_targetlist_nonconst();
1648  const auto tableId = query.get_result_table_id();
1649  const std::list<const ColumnDescriptor*> non_phys_cols =
1650  catalog.getAllColumnMetadataForTable(tableId, false, false, false);
1651  if (non_phys_cols.size() != value_list.size()) {
1652  throw std::runtime_error("Insert has more target columns than expressions.");
1653  }
1654  std::list<int>::const_iterator it = query.get_result_col_list().begin();
1655  for (auto& v : value_list) {
1656  auto e = v->analyze(catalog, query);
1657  const ColumnDescriptor* cd =
1658  catalog.getMetadataForColumn(query.get_result_table_id(), *it);
1659  CHECK(cd);
1660  if (cd->columnType.get_notnull()) {
1661  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(e);
1662  if (c != nullptr && c->get_is_null()) {
1663  throw std::runtime_error("Cannot insert NULL into column " + cd->columnName);
1664  }
1665  }
1666  e = e->add_cast(cd->columnType);
1667  tlist.emplace_back(new Analyzer::TargetEntry("", e, false));
1668  ++it;
1669 
1670  const auto& col_ti = cd->columnType;
1671  if (col_ti.get_physical_cols() > 0) {
1672  CHECK(cd->columnType.is_geometry());
1673  auto c = dynamic_cast<const Analyzer::Constant*>(e.get());
1674  if (!c) {
1675  auto uoper = std::dynamic_pointer_cast<Analyzer::UOper>(e);
1676  if (uoper && uoper->get_optype() == kCAST) {
1677  c = dynamic_cast<const Analyzer::Constant*>(uoper->get_operand());
1678  }
1679  }
1680  bool is_null = false;
1681  std::string* wkt{nullptr};
1682  if (c) {
1683  is_null = c->get_is_null();
1684  if (!is_null) {
1685  wkt = c->get_constval().stringval;
1686  }
1687  }
1688  if (!is_null && !wkt) {
1689  throw std::runtime_error("Expecting a WKT string for column " + cd->columnName);
1690  }
1691  std::vector<double> coords;
1692  std::vector<double> bounds;
1693  std::vector<int> ring_sizes;
1694  std::vector<int> poly_rings;
1695  int render_group =
1696  0; // @TODO simon.eves where to get render_group from in this context?!
1697  SQLTypeInfo import_ti{cd->columnType};
1698  if (!is_null) {
1700  *wkt, import_ti, coords, bounds, ring_sizes, poly_rings)) {
1701  throw std::runtime_error("Cannot read geometry to insert into column " +
1702  cd->columnName);
1703  }
1704  if (cd->columnType.get_type() != import_ti.get_type()) {
1705  // allow POLYGON to be inserted into MULTIPOLYGON column
1706  if (!(import_ti.get_type() == SQLTypes::kPOLYGON &&
1708  throw std::runtime_error(
1709  "Imported geometry doesn't match the type of column " + cd->columnName);
1710  }
1711  }
1712  } else {
1713  // Special case for NULL POINT, push NULL representation to coords
1714  if (cd->columnType.get_type() == kPOINT) {
1715  if (!coords.empty()) {
1716  throw std::runtime_error("NULL POINT with unexpected coordinates in column " +
1717  cd->columnName);
1718  }
1719  coords.push_back(NULL_ARRAY_DOUBLE);
1720  coords.push_back(NULL_DOUBLE);
1721  }
1722  }
1723 
1724  // TODO: check if import SRID matches columns SRID, may need to transform before
1725  // inserting
1726 
1727  int nextColumnOffset = 1;
1728 
1729  const ColumnDescriptor* cd_coords = catalog.getMetadataForColumn(
1730  query.get_result_table_id(), cd->columnId + nextColumnOffset);
1731  CHECK(cd_coords);
1732  CHECK_EQ(cd_coords->columnType.get_type(), kARRAY);
1733  CHECK_EQ(cd_coords->columnType.get_subtype(), kTINYINT);
1734  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
1735  if (!is_null || cd->columnType.get_type() == kPOINT) {
1736  auto compressed_coords = geospatial::compress_coords(coords, col_ti);
1737  for (auto cc : compressed_coords) {
1738  Datum d;
1739  d.tinyintval = cc;
1740  auto e = makeExpr<Analyzer::Constant>(kTINYINT, false, d);
1741  value_exprs.push_back(e);
1742  }
1743  }
1744  tlist.emplace_back(new Analyzer::TargetEntry(
1745  "",
1746  makeExpr<Analyzer::Constant>(cd_coords->columnType, is_null, value_exprs),
1747  false));
1748  ++it;
1749  nextColumnOffset++;
1750 
1751  if (cd->columnType.get_type() == kPOLYGON ||
1752  cd->columnType.get_type() == kMULTIPOLYGON) {
1753  // Put ring sizes array into separate physical column
1754  const ColumnDescriptor* cd_ring_sizes = catalog.getMetadataForColumn(
1755  query.get_result_table_id(), cd->columnId + nextColumnOffset);
1756  CHECK(cd_ring_sizes);
1757  CHECK_EQ(cd_ring_sizes->columnType.get_type(), kARRAY);
1758  CHECK_EQ(cd_ring_sizes->columnType.get_subtype(), kINT);
1759  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
1760  if (!is_null) {
1761  for (auto c : ring_sizes) {
1762  Datum d;
1763  d.intval = c;
1764  auto e = makeExpr<Analyzer::Constant>(kINT, false, d);
1765  value_exprs.push_back(e);
1766  }
1767  }
1768  tlist.emplace_back(new Analyzer::TargetEntry(
1769  "",
1770  makeExpr<Analyzer::Constant>(cd_ring_sizes->columnType, is_null, value_exprs),
1771  false));
1772  ++it;
1773  nextColumnOffset++;
1774 
1775  if (cd->columnType.get_type() == kMULTIPOLYGON) {
1776  // Put poly_rings array into separate physical column
1777  const ColumnDescriptor* cd_poly_rings = catalog.getMetadataForColumn(
1778  query.get_result_table_id(), cd->columnId + nextColumnOffset);
1779  CHECK(cd_poly_rings);
1780  CHECK_EQ(cd_poly_rings->columnType.get_type(), kARRAY);
1781  CHECK_EQ(cd_poly_rings->columnType.get_subtype(), kINT);
1782  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
1783  if (!is_null) {
1784  for (auto c : poly_rings) {
1785  Datum d;
1786  d.intval = c;
1787  auto e = makeExpr<Analyzer::Constant>(kINT, false, d);
1788  value_exprs.push_back(e);
1789  }
1790  }
1791  tlist.emplace_back(new Analyzer::TargetEntry(
1792  "",
1793  makeExpr<Analyzer::Constant>(
1794  cd_poly_rings->columnType, is_null, value_exprs),
1795  false));
1796  ++it;
1797  nextColumnOffset++;
1798  }
1799  }
1800 
1801  if (cd->columnType.get_type() == kLINESTRING ||
1802  cd->columnType.get_type() == kPOLYGON ||
1803  cd->columnType.get_type() == kMULTIPOLYGON) {
1804  const ColumnDescriptor* cd_bounds = catalog.getMetadataForColumn(
1805  query.get_result_table_id(), cd->columnId + nextColumnOffset);
1806  CHECK(cd_bounds);
1807  CHECK_EQ(cd_bounds->columnType.get_type(), kARRAY);
1808  CHECK_EQ(cd_bounds->columnType.get_subtype(), kDOUBLE);
1809  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
1810  if (!is_null) {
1811  for (auto b : bounds) {
1812  Datum d;
1813  d.doubleval = b;
1814  auto e = makeExpr<Analyzer::Constant>(kDOUBLE, false, d);
1815  value_exprs.push_back(e);
1816  }
1817  }
1818  tlist.emplace_back(new Analyzer::TargetEntry(
1819  "",
1820  makeExpr<Analyzer::Constant>(cd_bounds->columnType, is_null, value_exprs),
1821  false));
1822  ++it;
1823  nextColumnOffset++;
1824  }
1825 
1826  if (cd->columnType.get_type() == kPOLYGON ||
1827  cd->columnType.get_type() == kMULTIPOLYGON) {
1828  // Put render group into separate physical column
1829  const ColumnDescriptor* cd_render_group = catalog.getMetadataForColumn(
1830  query.get_result_table_id(), cd->columnId + nextColumnOffset);
1831  CHECK(cd_render_group);
1832  CHECK_EQ(cd_render_group->columnType.get_type(), kINT);
1833  Datum d;
1834  d.intval = render_group;
1835  tlist.emplace_back(new Analyzer::TargetEntry(
1836  "",
1837  makeExpr<Analyzer::Constant>(cd_render_group->columnType, is_null, d),
1838  false));
1839  ++it;
1840  nextColumnOffset++;
1841  }
1842  }
1843  }
1844 }
1845 
1846 void InsertValuesStmt::execute(const Catalog_Namespace::SessionInfo& session) {
1847  auto& catalog = session.getCatalog();
1848 
1849  if (!session.checkDBAccessPrivileges(
1851  throw std::runtime_error("User has no insert privileges on " + *table + ".");
1852  }
1853 
1854  auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
1857 
1858  Analyzer::Query query;
1859  analyze(catalog, query);
1860 
1861  // Acquire schema write lock -- leave data lock so the fragmenter can checkpoint. For
1862  // singleton inserts we just take a write lock on the schema, which prevents concurrent
1863  // inserts.
1864  auto result_table_id = query.get_result_table_id();
1865  const auto td_with_lock =
1867  catalog, result_table_id);
1868  auto td = td_with_lock();
1869  CHECK(td);
1870 
1871  if (td->isView) {
1872  throw std::runtime_error("Singleton inserts on views is not supported.");
1873  }
1874 
1876  RelAlgExecutor ra_executor(executor.get(), catalog);
1877 
1878  ra_executor.executeSimpleInsert(query);
1879 }
1880 
1881 void UpdateStmt::analyze(const Catalog_Namespace::Catalog& catalog,
1882  Analyzer::Query& query) const {
1883  throw std::runtime_error("UPDATE statement not supported yet.");
1884 }
1885 
1886 void DeleteStmt::analyze(const Catalog_Namespace::Catalog& catalog,
1887  Analyzer::Query& query) const {
1888  throw std::runtime_error("DELETE statement not supported yet.");
1889 }
1890 
1891 namespace {
1892 
1894  const auto& col_ti = cd.columnType;
1895  if (col_ti.is_integer() ||
1896  (col_ti.is_string() && col_ti.get_compression() == kENCODING_DICT) ||
1897  col_ti.is_time()) {
1898  return;
1899  }
1900  throw std::runtime_error("Cannot shard on type " + col_ti.get_type_name() +
1901  ", encoding " + col_ti.get_compression_name());
1902 }
1903 
1904 size_t shard_column_index(const std::string& name,
1905  const std::list<ColumnDescriptor>& columns) {
1906  size_t index = 1;
1907  for (const auto& cd : columns) {
1908  if (cd.columnName == name) {
1910  return index;
1911  }
1912  ++index;
1913  if (cd.columnType.is_geometry()) {
1914  index += cd.columnType.get_physical_cols();
1915  }
1916  }
1917  // Not found, return 0
1918  return 0;
1919 }
1920 
1921 size_t sort_column_index(const std::string& name,
1922  const std::list<ColumnDescriptor>& columns) {
1923  size_t index = 1;
1924  for (const auto& cd : columns) {
1925  if (boost::to_upper_copy<std::string>(cd.columnName) == name) {
1926  return index;
1927  }
1928  ++index;
1929  if (cd.columnType.is_geometry()) {
1930  index += cd.columnType.get_physical_cols();
1931  }
1932  }
1933  // Not found, return 0
1934  return 0;
1935 }
1936 
1937 void set_string_field(rapidjson::Value& obj,
1938  const std::string& field_name,
1939  const std::string& field_value,
1940  rapidjson::Document& document) {
1941  rapidjson::Value field_name_json_str;
1942  field_name_json_str.SetString(
1943  field_name.c_str(), field_name.size(), document.GetAllocator());
1944  rapidjson::Value field_value_json_str;
1945  field_value_json_str.SetString(
1946  field_value.c_str(), field_value.size(), document.GetAllocator());
1947  obj.AddMember(field_name_json_str, field_value_json_str, document.GetAllocator());
1948 }
1949 
1951  const ShardKeyDef* shard_key_def,
1952  const std::vector<SharedDictionaryDef>& shared_dict_defs) {
1953  rapidjson::Document document;
1954  auto& allocator = document.GetAllocator();
1955  rapidjson::Value arr(rapidjson::kArrayType);
1956  if (shard_key_def) {
1957  rapidjson::Value shard_key_obj(rapidjson::kObjectType);
1958  set_string_field(shard_key_obj, "type", "SHARD KEY", document);
1959  set_string_field(shard_key_obj, "name", shard_key_def->get_column(), document);
1960  arr.PushBack(shard_key_obj, allocator);
1961  }
1962  for (const auto& shared_dict_def : shared_dict_defs) {
1963  rapidjson::Value shared_dict_obj(rapidjson::kObjectType);
1964  set_string_field(shared_dict_obj, "type", "SHARED DICTIONARY", document);
1965  set_string_field(shared_dict_obj, "name", shared_dict_def.get_column(), document);
1967  shared_dict_obj, "foreign_table", shared_dict_def.get_foreign_table(), document);
1968  set_string_field(shared_dict_obj,
1969  "foreign_column",
1970  shared_dict_def.get_foreign_column(),
1971  document);
1972  arr.PushBack(shared_dict_obj, allocator);
1973  }
1974  rapidjson::StringBuffer buffer;
1975  rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
1976  arr.Accept(writer);
1977  return buffer.GetString();
1978 }
1979 
1980 template <typename LITERAL_TYPE,
1981  typename ASSIGNMENT,
1982  typename VALIDATE = DefaultValidate<LITERAL_TYPE>>
1983 decltype(auto) get_property_value(const NameValueAssign* p,
1984  ASSIGNMENT op,
1985  VALIDATE validate = VALIDATE()) {
1986  const auto val = validate(p);
1987  return op(val);
1988 }
1989 
1991  const NameValueAssign* p,
1992  const std::list<ColumnDescriptor>& columns) {
1993  auto assignment = [&td](const auto val) { td.storageType = val; };
1994  return get_property_value<StringLiteral, decltype(assignment), CaseSensitiveValidate>(
1995  p, assignment);
1996 }
1997 
1999  const NameValueAssign* p,
2000  const std::list<ColumnDescriptor>& columns) {
2001  return get_property_value<IntLiteral>(p,
2002  [&td](const auto val) { td.maxFragRows = val; });
2003 }
2004 
2006  const NameValueAssign* p,
2007  const std::list<ColumnDescriptor>& columns) {
2008  return get_property_value<IntLiteral>(
2009  p, [&df_td](const auto val) { df_td.maxFragRows = val; });
2010 }
2011 
2013  const NameValueAssign* p,
2014  const std::list<ColumnDescriptor>& columns) {
2015  return get_property_value<IntLiteral>(p,
2016  [&td](const auto val) { td.maxChunkSize = val; });
2017 }
2018 
2020  DataframeTableDescriptor& df_td,
2021  const NameValueAssign* p,
2022  const std::list<ColumnDescriptor>& columns) {
2023  return get_property_value<IntLiteral>(
2024  p, [&df_td](const auto val) { df_td.maxChunkSize = val; });
2025 }
2026 
2028  const NameValueAssign* p,
2029  const std::list<ColumnDescriptor>& columns) {
2030  return get_property_value<StringLiteral>(p, [&df_td](const auto val) {
2031  if (val.size() != 1) {
2032  throw std::runtime_error("Length of DELIMITER must be equal to 1.");
2033  }
2034  df_td.delimiter = val;
2035  });
2036 }
2037 
2039  const NameValueAssign* p,
2040  const std::list<ColumnDescriptor>& columns) {
2041  return get_property_value<StringLiteral>(p, [&df_td](const auto val) {
2042  if (val == "FALSE") {
2043  df_td.hasHeader = false;
2044  } else if (val == "TRUE") {
2045  df_td.hasHeader = true;
2046  } else {
2047  throw std::runtime_error("Option HEADER support only 'true' or 'false' values.");
2048  }
2049  });
2050 }
2051 
2053  const NameValueAssign* p,
2054  const std::list<ColumnDescriptor>& columns) {
2055  return get_property_value<IntLiteral>(p,
2056  [&td](const auto val) { td.fragPageSize = val; });
2057 }
2059  const NameValueAssign* p,
2060  const std::list<ColumnDescriptor>& columns) {
2061  return get_property_value<IntLiteral>(p, [&td](const auto val) { td.maxRows = val; });
2062 }
2063 
2065  const NameValueAssign* p,
2066  const std::list<ColumnDescriptor>& columns) {
2067  return get_property_value<IntLiteral>(
2068  p, [&df_td](const auto val) { df_td.skipRows = val; });
2069 }
2070 
2072  const NameValueAssign* p,
2073  const std::list<ColumnDescriptor>& columns) {
2074  return get_property_value<StringLiteral>(p, [&td](const auto partitions_uc) {
2075  if (partitions_uc != "SHARDED" && partitions_uc != "REPLICATED") {
2076  throw std::runtime_error("PARTITIONS must be SHARDED or REPLICATED");
2077  }
2078  if (td.shardedColumnId != 0 && partitions_uc == "REPLICATED") {
2079  throw std::runtime_error(
2080  "A table cannot be sharded and replicated at the same time");
2081  };
2082  td.partitions = partitions_uc;
2083  });
2084 }
2086  const NameValueAssign* p,
2087  const std::list<ColumnDescriptor>& columns) {
2088  if (!td.shardedColumnId) {
2089  throw std::runtime_error("SHARD KEY must be defined.");
2090  }
2091  return get_property_value<IntLiteral>(p, [&td](const auto shard_count) {
2092  if (g_leaf_count && shard_count % g_leaf_count) {
2093  throw std::runtime_error(
2094  "SHARD_COUNT must be a multiple of the number of leaves in the cluster.");
2095  }
2096  td.nShards = g_leaf_count ? shard_count / g_leaf_count : shard_count;
2097  if (!td.shardedColumnId && !td.nShards) {
2098  throw std::runtime_error(
2099  "Must specify the number of shards through the SHARD_COUNT option");
2100  };
2101  });
2102 }
2103 
2104 decltype(auto) get_vacuum_def(TableDescriptor& td,
2105  const NameValueAssign* p,
2106  const std::list<ColumnDescriptor>& columns) {
2107  return get_property_value<StringLiteral>(p, [&td](const auto vacuum_uc) {
2108  if (vacuum_uc != "IMMEDIATE" && vacuum_uc != "DELAYED") {
2109  throw std::runtime_error("VACUUM must be IMMEDIATE or DELAYED");
2110  }
2111  td.hasDeletedCol = boost::iequals(vacuum_uc, "IMMEDIATE") ? false : true;
2112  });
2113 }
2114 
2116  const NameValueAssign* p,
2117  const std::list<ColumnDescriptor>& columns) {
2118  return get_property_value<StringLiteral>(p, [&td, &columns](const auto sort_upper) {
2119  td.sortedColumnId = sort_column_index(sort_upper, columns);
2120  if (!td.sortedColumnId) {
2121  throw std::runtime_error("Specified sort column " + sort_upper + " doesn't exist");
2122  }
2123  });
2124 }
2125 
2126 static const std::map<const std::string, const TableDefFuncPtr> tableDefFuncMap = {
2127  {"fragment_size"s, get_frag_size_def},
2128  {"max_chunk_size"s, get_max_chunk_size_def},
2129  {"page_size"s, get_page_size_def},
2130  {"max_rows"s, get_max_rows_def},
2131  {"partitions"s, get_partions_def},
2132  {"shard_count"s, get_shard_count_def},
2133  {"vacuum"s, get_vacuum_def},
2134  {"sort_column"s, get_sort_column_def},
2135  {"storage_type"s, get_storage_type}};
2136 
2138  const std::unique_ptr<NameValueAssign>& p,
2139  const std::list<ColumnDescriptor>& columns) {
2140  const auto it = tableDefFuncMap.find(boost::to_lower_copy<std::string>(*p->get_name()));
2141  if (it == tableDefFuncMap.end()) {
2142  throw std::runtime_error(
2143  "Invalid CREATE TABLE option " + *p->get_name() +
2144  ". Should be FRAGMENT_SIZE, MAX_CHUNK_SIZE, PAGE_SIZE, MAX_ROWS, "
2145  "PARTITIONS, SHARD_COUNT, VACUUM, SORT_COLUMN, or STORAGE_TYPE.");
2146  }
2147  return it->second(td, p.get(), columns);
2148 }
2149 
2151  const std::unique_ptr<NameValueAssign>& p,
2152  const std::list<ColumnDescriptor>& columns) {
2153  const auto it = tableDefFuncMap.find(boost::to_lower_copy<std::string>(*p->get_name()));
2154  if (it == tableDefFuncMap.end()) {
2155  throw std::runtime_error(
2156  "Invalid CREATE TABLE AS option " + *p->get_name() +
2157  ". Should be FRAGMENT_SIZE, MAX_CHUNK_SIZE, PAGE_SIZE, MAX_ROWS, "
2158  "PARTITIONS, SHARD_COUNT, VACUUM, SORT_COLUMN, STORAGE_TYPE or "
2159  "USE_SHARED_DICTIONARIES.");
2160  }
2161  return it->second(td, p.get(), columns);
2162 }
2163 
2164 static const std::map<const std::string, const DataframeDefFuncPtr> dataframeDefFuncMap =
2165  {{"fragment_size"s, get_frag_size_dataframe_def},
2166  {"max_chunk_size"s, get_max_chunk_size_dataframe_def},
2167  {"skip_rows"s, get_skip_rows_def},
2168  {"delimiter"s, get_delimiter_def},
2169  {"header"s, get_header_def}};
2170 
2172  const std::unique_ptr<NameValueAssign>& p,
2173  const std::list<ColumnDescriptor>& columns) {
2174  const auto it =
2175  dataframeDefFuncMap.find(boost::to_lower_copy<std::string>(*p->get_name()));
2176  if (it == dataframeDefFuncMap.end()) {
2177  throw std::runtime_error(
2178  "Invalid CREATE DATAFRAME option " + *p->get_name() +
2179  ". Should be FRAGMENT_SIZE, MAX_CHUNK_SIZE, SKIP_ROWS, DELIMITER or HEADER.");
2180  }
2181  return it->second(df_td, p.get(), columns);
2182 }
2183 
2184 } // namespace
2185 
2186 void CreateTableStmt::executeDryRun(const Catalog_Namespace::SessionInfo& session,
2187  TableDescriptor& td,
2188  std::list<ColumnDescriptor>& columns,
2189  std::vector<SharedDictionaryDef>& shared_dict_defs) {
2190  std::unordered_set<std::string> uc_col_names;
2191  const auto& catalog = session.getCatalog();
2192  const ShardKeyDef* shard_key_def{nullptr};
2193  for (auto& e : table_element_list_) {
2194  if (dynamic_cast<SharedDictionaryDef*>(e.get())) {
2195  auto shared_dict_def = static_cast<SharedDictionaryDef*>(e.get());
2197  this, shared_dict_def, columns, shared_dict_defs, catalog);
2198  shared_dict_defs.push_back(*shared_dict_def);
2199  continue;
2200  }
2201  if (dynamic_cast<ShardKeyDef*>(e.get())) {
2202  if (shard_key_def) {
2203  throw std::runtime_error("Specified more than one shard key");
2204  }
2205  shard_key_def = static_cast<const ShardKeyDef*>(e.get());
2206  continue;
2207  }
2208  if (!dynamic_cast<ColumnDef*>(e.get())) {
2209  throw std::runtime_error("Table constraints are not supported yet.");
2210  }
2211  ColumnDef* coldef = static_cast<ColumnDef*>(e.get());
2212  ColumnDescriptor cd;
2213  cd.columnName = *coldef->get_column_name();
2215  setColumnDescriptor(cd, coldef);
2216  columns.push_back(cd);
2217  }
2218 
2219  ddl_utils::set_default_table_attributes(*table_, td, columns.size());
2220 
2221  if (shard_key_def) {
2222  td.shardedColumnId = shard_column_index(shard_key_def->get_column(), columns);
2223  if (!td.shardedColumnId) {
2224  throw std::runtime_error("Specified shard column " + shard_key_def->get_column() +
2225  " doesn't exist");
2226  }
2227  }
2228  if (is_temporary_) {
2230  } else {
2232  }
2233  if (!storage_options_.empty()) {
2234  for (auto& p : storage_options_) {
2235  get_table_definitions(td, p, columns);
2236  }
2237  }
2238  if (td.shardedColumnId && !td.nShards) {
2239  throw std::runtime_error("SHARD_COUNT needs to be specified with SHARD_KEY.");
2240  }
2241  td.keyMetainfo = serialize_key_metainfo(shard_key_def, shared_dict_defs);
2242 }
2243 
2244 void CreateTableStmt::execute(const Catalog_Namespace::SessionInfo& session) {
2245  auto& catalog = session.getCatalog();
2246 
2247  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
2250 
2251  // check access privileges
2254  throw std::runtime_error("Table " + *table_ +
2255  " will not be created. User has no create privileges.");
2256  }
2257 
2258  if (!catalog.validateNonExistentTableOrView(*table_, if_not_exists_)) {
2259  return;
2260  }
2261 
2262  TableDescriptor td;
2263  std::list<ColumnDescriptor> columns;
2264  std::vector<SharedDictionaryDef> shared_dict_defs;
2265 
2266  executeDryRun(session, td, columns, shared_dict_defs);
2267  td.userId = session.get_currentUser().userId;
2268 
2269  catalog.createShardedTable(td, columns, shared_dict_defs);
2270  // TODO (max): It's transactionally unsafe, should be fixed: we may create object w/o
2271  // privileges
2272  SysCatalog::instance().createDBObject(
2273  session.get_currentUser(), td.tableName, TableDBObjectType, catalog);
2274 }
2275 
2276 void CreateDataframeStmt::execute(const Catalog_Namespace::SessionInfo& session) {
2277  auto& catalog = session.getCatalog();
2278 
2279  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
2282 
2283  // check access privileges
2286  throw std::runtime_error("Table " + *table_ +
2287  " will not be created. User has no create privileges.");
2288  }
2289 
2290  if (catalog.getMetadataForTable(*table_) != nullptr) {
2291  throw std::runtime_error("Table " + *table_ + " already exists.");
2292  }
2294  std::list<ColumnDescriptor> columns;
2295  std::vector<SharedDictionaryDef> shared_dict_defs;
2296 
2297  std::unordered_set<std::string> uc_col_names;
2298  for (auto& e : table_element_list_) {
2299  if (dynamic_cast<SharedDictionaryDef*>(e.get())) {
2300  auto shared_dict_def = static_cast<SharedDictionaryDef*>(e.get());
2302  this, shared_dict_def, columns, shared_dict_defs, catalog);
2303  shared_dict_defs.push_back(*shared_dict_def);
2304  continue;
2305  }
2306  if (!dynamic_cast<ColumnDef*>(e.get())) {
2307  throw std::runtime_error("Table constraints are not supported yet.");
2308  }
2309  ColumnDef* coldef = static_cast<ColumnDef*>(e.get());
2310  ColumnDescriptor cd;
2311  cd.columnName = *coldef->get_column_name();
2312  const auto uc_col_name = boost::to_upper_copy<std::string>(cd.columnName);
2313  const auto it_ok = uc_col_names.insert(uc_col_name);
2314  if (!it_ok.second) {
2315  throw std::runtime_error("Column '" + cd.columnName + "' defined more than once");
2316  }
2317 
2318  setColumnDescriptor(cd, coldef);
2319  columns.push_back(cd);
2320  }
2321 
2322  df_td.tableName = *table_;
2323  df_td.nColumns = columns.size();
2324  df_td.isView = false;
2325  df_td.fragmenter = nullptr;
2330  df_td.maxRows = DEFAULT_MAX_ROWS;
2332  if (!storage_options_.empty()) {
2333  for (auto& p : storage_options_) {
2334  get_dataframe_definitions(df_td, p, columns);
2335  }
2336  }
2337  df_td.keyMetainfo = serialize_key_metainfo(nullptr, shared_dict_defs);
2338  df_td.userId = session.get_currentUser().userId;
2339  df_td.storageType = *filename_;
2340 
2341  catalog.createShardedTable(df_td, columns, shared_dict_defs);
2342  // TODO (max): It's transactionally unsafe, should be fixed: we may create object w/o
2343  // privileges
2344  SysCatalog::instance().createDBObject(
2345  session.get_currentUser(), df_td.tableName, TableDBObjectType, catalog);
2346 }
2347 
2348 std::shared_ptr<ResultSet> getResultSet(QueryStateProxy query_state_proxy,
2349  const std::string select_stmt,
2350  std::vector<TargetMetaInfo>& targets,
2351  bool validate_only = false,
2352  std::vector<size_t> outer_fragment_indices = {}) {
2353  auto const session = query_state_proxy.getQueryState().getConstSessionInfo();
2354  auto& catalog = session->getCatalog();
2355 
2357 #ifdef HAVE_CUDA
2358  const auto device_type = session->get_executor_device_type();
2359 #else
2360  const auto device_type = ExecutorDeviceType::CPU;
2361 #endif // HAVE_CUDA
2362  auto calcite_mgr = catalog.getCalciteMgr();
2363 
2364  // TODO MAT this should actually get the global or the session parameter for
2365  // view optimization
2366  const auto query_ra =
2367  calcite_mgr
2368  ->process(query_state_proxy, pg_shim(select_stmt), {}, true, false, false, true)
2369  .plan_result;
2370  RelAlgExecutor ra_executor(executor.get(), catalog, query_ra);
2372  const auto& query_hints = ra_executor.getParsedQueryHints();
2373  if (query_hints.cpu_mode) {
2374  co.device_type = ExecutorDeviceType::CPU;
2375  }
2377  // TODO(adb): Need a better method of dropping constants into this ExecutionOptions
2378  // struct
2379  ExecutionOptions eo = {false,
2380  true,
2381  false,
2382  true,
2383  false,
2384  false,
2385  validate_only,
2386  false,
2387  10000,
2388  false,
2389  false,
2390  0.9,
2391  false,
2392  1000,
2394  outer_fragment_indices};
2395  ExecutionResult result{std::make_shared<ResultSet>(std::vector<TargetInfo>{},
2398  nullptr,
2399  nullptr),
2400  {}};
2401  result = ra_executor.executeRelAlgQuery(co, eo, false, nullptr);
2402  targets = result.getTargetsMeta();
2403 
2404  return result.getRows();
2405 }
2406 
2407 size_t LocalConnector::getOuterFragmentCount(QueryStateProxy query_state_proxy,
2408  std::string& sql_query_string) {
2409  auto const session = query_state_proxy.getQueryState().getConstSessionInfo();
2410  auto& catalog = session->getCatalog();
2411 
2413 #ifdef HAVE_CUDA
2414  const auto device_type = session->get_executor_device_type();
2415 #else
2416  const auto device_type = ExecutorDeviceType::CPU;
2417 #endif // HAVE_CUDA
2418  auto calcite_mgr = catalog.getCalciteMgr();
2419 
2420  // TODO MAT this should actually get the global or the session parameter for
2421  // view optimization
2422  const auto query_ra =
2423  calcite_mgr
2424  ->process(
2425  query_state_proxy, pg_shim(sql_query_string), {}, true, false, false, true)
2426  .plan_result;
2427  RelAlgExecutor ra_executor(executor.get(), catalog, query_ra);
2428  const auto& query_hints = ra_executor.getParsedQueryHints();
2429  CompilationOptions co = {query_hints.cpu_mode ? ExecutorDeviceType::CPU : device_type,
2430  true,
2432  false};
2433  // TODO(adb): Need a better method of dropping constants into this ExecutionOptions
2434  // struct
2435  ExecutionOptions eo = {
2436  false, true, false, true, false, false, false, false, 10000, false, false, 0.9};
2437  return ra_executor.getOuterFragmentCount(co, eo);
2438 }
2439 
2440 AggregatedResult LocalConnector::query(QueryStateProxy query_state_proxy,
2441  std::string& sql_query_string,
2442  std::vector<size_t> outer_frag_indices,
2443  bool validate_only) {
2444  auto const session = query_state_proxy.getQueryState().getConstSessionInfo();
2445  // TODO(PS): Should we be using the shimmed query in getResultSet?
2446  std::string pg_shimmed_select_query = pg_shim(sql_query_string);
2447 
2448  std::vector<TargetMetaInfo> target_metainfos;
2449  auto result_rows = getResultSet(query_state_proxy,
2450  sql_query_string,
2451  target_metainfos,
2452  validate_only,
2453  outer_frag_indices);
2454  AggregatedResult res = {result_rows, target_metainfos};
2455  return res;
2456 }
2457 
2458 std::vector<AggregatedResult> LocalConnector::query(
2459  QueryStateProxy query_state_proxy,
2460  std::string& sql_query_string,
2461  std::vector<size_t> outer_frag_indices) {
2462  auto res = query(query_state_proxy, sql_query_string, outer_frag_indices, false);
2463  return {res};
2464 }
2465 
2466 void LocalConnector::insertDataToLeaf(const Catalog_Namespace::SessionInfo& session,
2467  const size_t leaf_idx,
2468  Fragmenter_Namespace::InsertData& insert_data) {
2469  CHECK(leaf_idx == 0);
2470  auto& catalog = session.getCatalog();
2471  auto created_td = catalog.getMetadataForTable(insert_data.tableId);
2472  ChunkKey chunkKey = {catalog.getCurrentDB().dbId, created_td->tableId};
2473  // TODO(adb): Ensure that we have previously obtained a write lock for this table's
2474  // data.
2475  created_td->fragmenter->insertDataNoCheckpoint(insert_data);
2476 }
2477 
2478 void LocalConnector::checkpoint(const Catalog_Namespace::SessionInfo& session,
2479  int tableId) {
2480  auto& catalog = session.getCatalog();
2481  auto dbId = catalog.getCurrentDB().dbId;
2482  catalog.getDataMgr().checkpoint(dbId, tableId);
2483 }
2484 
2485 void LocalConnector::rollback(const Catalog_Namespace::SessionInfo& session,
2486  int tableId) {
2487  auto& catalog = session.getCatalog();
2488  auto dbId = catalog.getCurrentDB().dbId;
2489  catalog.getDataMgr().checkpoint(dbId, tableId);
2490 }
2491 
2492 std::list<ColumnDescriptor> LocalConnector::getColumnDescriptors(AggregatedResult& result,
2493  bool for_create) {
2494  std::list<ColumnDescriptor> column_descriptors;
2495  std::list<ColumnDescriptor> column_descriptors_for_create;
2496 
2497  int rowid_suffix = 0;
2498  for (const auto& target_metainfo : result.targets_meta) {
2499  ColumnDescriptor cd;
2500  cd.columnName = target_metainfo.get_resname();
2501  if (cd.columnName == "rowid") {
2502  cd.columnName += std::to_string(rowid_suffix++);
2503  }
2504  cd.columnType = target_metainfo.get_physical_type_info();
2505 
2506  ColumnDescriptor cd_for_create = cd;
2507 
2509  // we need to reset the comp param (as this points to the actual dictionary)
2510  if (cd.columnType.is_array()) {
2511  // for dict encoded arrays, it is always 4 bytes
2512  cd_for_create.columnType.set_comp_param(32);
2513  } else {
2514  cd_for_create.columnType.set_comp_param(cd.columnType.get_size() * 8);
2515  }
2516  }
2517 
2518  column_descriptors_for_create.push_back(cd_for_create);
2519  column_descriptors.push_back(cd);
2520  }
2521 
2522  if (for_create) {
2523  return column_descriptors_for_create;
2524  }
2525 
2526  return column_descriptors;
2527 }
2528 
2529 void InsertIntoTableAsSelectStmt::populateData(QueryStateProxy query_state_proxy,
2530  bool validate_table) {
2531  auto const session = query_state_proxy.getQueryState().getConstSessionInfo();
2532  auto& catalog = session->getCatalog();
2533  const auto td_with_lock =
2535  catalog, table_name_);
2536  const auto td = td_with_lock();
2538 
2539  LocalConnector local_connector;
2540  bool populate_table = false;
2541 
2542  if (leafs_connector_) {
2543  populate_table = true;
2544  } else {
2545  leafs_connector_ = &local_connector;
2546  if (!g_cluster) {
2547  populate_table = true;
2548  }
2549  }
2550 
2551  auto get_target_column_descriptors = [this, &catalog](const TableDescriptor* td) {
2552  std::vector<const ColumnDescriptor*> target_column_descriptors;
2553  if (column_list_.empty()) {
2554  auto list = catalog.getAllColumnMetadataForTable(td->tableId, false, false, false);
2555  target_column_descriptors = {std::begin(list), std::end(list)};
2556 
2557  } else {
2558  for (auto& c : column_list_) {
2559  const ColumnDescriptor* cd = catalog.getMetadataForColumn(td->tableId, *c);
2560  if (cd == nullptr) {
2561  throw std::runtime_error("Column " + *c + " does not exist.");
2562  }
2563  target_column_descriptors.push_back(cd);
2564  }
2565  }
2566 
2567  return target_column_descriptors;
2568  };
2569 
2570  bool is_temporary = table_is_temporary(td);
2571 
2572  // Don't allow simultaneous inserts
2573  const auto insert_data_lock =
2575 
2576  if (validate_table) {
2577  // check access privileges
2578  if (!td) {
2579  throw std::runtime_error("Table " + table_name_ + " does not exist.");
2580  }
2581  if (td->isView) {
2582  throw std::runtime_error("Insert to views is not supported yet.");
2583  }
2584 
2585  if (!session->checkDBAccessPrivileges(DBObjectType::TableDBObjectType,
2587  table_name_)) {
2588  throw std::runtime_error("User has no insert privileges on " + table_name_ + ".");
2589  }
2590 
2591  // only validate the select query so we get the target types
2592  // correctly, but do not populate the result set
2593  auto result = local_connector.query(query_state_proxy, select_query_, {}, true);
2594  auto source_column_descriptors = local_connector.getColumnDescriptors(result, false);
2595 
2596  std::vector<const ColumnDescriptor*> target_column_descriptors =
2597  get_target_column_descriptors(td);
2598  if (catalog.getAllColumnMetadataForTable(td->tableId, false, false, false).size() !=
2599  target_column_descriptors.size()) {
2600  throw std::runtime_error("Insert into a subset of columns is not supported yet.");
2601  }
2602 
2603  if (source_column_descriptors.size() != target_column_descriptors.size()) {
2604  throw std::runtime_error("The number of source and target columns does not match.");
2605  }
2606 
2607  for (int i = 0; i < source_column_descriptors.size(); i++) {
2608  const ColumnDescriptor* source_cd =
2609  &(*std::next(source_column_descriptors.begin(), i));
2610  const ColumnDescriptor* target_cd = target_column_descriptors.at(i);
2611 
2612  if ((!source_cd->columnType.is_string() && !target_cd->columnType.is_string()) &&
2613  (source_cd->columnType.get_type() != target_cd->columnType.get_type())) {
2614  throw std::runtime_error("Source '" + source_cd->columnName + " " +
2615  source_cd->columnType.get_type_name() +
2616  "' and target '" + target_cd->columnName + " " +
2617  target_cd->columnType.get_type_name() +
2618  "' column types do not match.");
2619  }
2620  if (source_cd->columnType.is_array()) {
2621  if (source_cd->columnType.get_subtype() != target_cd->columnType.get_subtype()) {
2622  throw std::runtime_error("Source '" + source_cd->columnName + " " +
2623  source_cd->columnType.get_type_name() +
2624  "' and target '" + target_cd->columnName + " " +
2625  target_cd->columnType.get_type_name() +
2626  "' array column element types do not match.");
2627  }
2628  }
2629 
2630  if (source_cd->columnType.is_decimal() ||
2631  source_cd->columnType.get_elem_type().is_decimal()) {
2632  SQLTypeInfo sourceType = source_cd->columnType;
2633  SQLTypeInfo targetType = target_cd->columnType;
2634 
2635  if (source_cd->columnType.is_array()) {
2636  sourceType = source_cd->columnType.get_elem_type();
2637  targetType = target_cd->columnType.get_elem_type();
2638  }
2639 
2640  if ((sourceType.get_dimension() != targetType.get_dimension()) ||
2641  (sourceType.get_scale() != targetType.get_scale())) {
2642  throw std::runtime_error(
2643  "Source '" + source_cd->columnName + " " +
2644  source_cd->columnType.get_type_name() + "' and target '" +
2645  target_cd->columnName + " " + target_cd->columnType.get_type_name() +
2646  "' decimal columns dimension and or scale do not match.");
2647  }
2648  }
2649 
2650  if (source_cd->columnType.is_string()) {
2651  if (source_cd->columnType.get_compression() !=
2652  target_cd->columnType.get_compression()) {
2653  throw std::runtime_error("Source '" + source_cd->columnName + " " +
2654  source_cd->columnType.get_type_name() +
2655  "' and target '" + target_cd->columnName + " " +
2656  target_cd->columnType.get_type_name() +
2657  "' columns string encodings do not match.");
2658  }
2659  }
2660 
2661  if (source_cd->columnType.is_timestamp() && target_cd->columnType.is_timestamp()) {
2662  if (source_cd->columnType.get_dimension() !=
2663  target_cd->columnType.get_dimension()) {
2664  throw std::runtime_error("Source '" + source_cd->columnName + " " +
2665  source_cd->columnType.get_type_name() +
2666  "' and target '" + target_cd->columnName + " " +
2667  target_cd->columnType.get_type_name() +
2668  "' timestamp column precisions do not match.");
2669  }
2670  }
2671 
2672  if (!source_cd->columnType.is_string() && !source_cd->columnType.is_geometry() &&
2673  source_cd->columnType.get_size() > target_cd->columnType.get_size()) {
2674  throw std::runtime_error("Source '" + source_cd->columnName + " " +
2675  source_cd->columnType.get_type_name() +
2676  "' and target '" + target_cd->columnName + " " +
2677  target_cd->columnType.get_type_name() +
2678  "' column encoding sizes do not match.");
2679  }
2680  }
2681  }
2682 
2683  if (!populate_table) {
2684  return;
2685  }
2686 
2687  Fragmenter_Namespace::InsertDataLoader insertDataLoader(*leafs_connector_);
2688  auto target_column_descriptors = get_target_column_descriptors(td);
2689 
2690  auto outer_frag_count =
2691  leafs_connector_->getOuterFragmentCount(query_state_proxy, select_query_);
2692 
2693  size_t outer_frag_end = outer_frag_count == 0 ? 1 : outer_frag_count;
2694 
2695  for (size_t outer_frag_idx = 0; outer_frag_idx < outer_frag_end; outer_frag_idx++) {
2696  std::vector<size_t> allowed_outer_fragment_indices;
2697 
2698  if (outer_frag_count) {
2699  allowed_outer_fragment_indices.push_back(outer_frag_idx);
2700  }
2701 
2702  std::vector<AggregatedResult> query_results = leafs_connector_->query(
2703  query_state_proxy, select_query_, allowed_outer_fragment_indices);
2704 
2705  for (auto& res : query_results) {
2706  auto result_rows = res.rs;
2707  result_rows->setGeoReturnType(ResultSet::GeoReturnType::GeoTargetValue);
2708  const auto num_rows = result_rows->rowCount();
2709 
2710  if (0 == num_rows) {
2711  continue;
2712  }
2713 
2714  size_t leaf_count = leafs_connector_->leafCount();
2715 
2716  size_t max_number_of_rows_per_package =
2717  std::min(num_rows / leaf_count, 64UL * 1024UL);
2718 
2719  size_t start_row = 0;
2720  size_t num_rows_to_process = std::min(num_rows, max_number_of_rows_per_package);
2721 
2722  // ensure that at least one row is being processed
2723  num_rows_to_process = std::max(num_rows_to_process, 1UL);
2724 
2725  std::vector<std::unique_ptr<TargetValueConverter>> value_converters;
2726 
2728 
2729  const int num_worker_threads = std::thread::hardware_concurrency();
2730 
2731  std::vector<size_t> thread_start_idx(num_worker_threads),
2732  thread_end_idx(num_worker_threads);
2733  bool can_go_parallel = !result_rows->isTruncated() && num_rows_to_process > 20000;
2734 
2735  std::atomic<size_t> row_idx{0};
2736 
2737  auto convert_function = [&result_rows,
2738  &value_converters,
2739  &row_idx,
2740  &num_rows_to_process,
2741  &thread_start_idx,
2742  &thread_end_idx](const int thread_id) {
2743  const int num_cols = value_converters.size();
2744  const size_t start = thread_start_idx[thread_id];
2745  const size_t end = thread_end_idx[thread_id];
2746  size_t idx = 0;
2747  for (idx = start; idx < end; ++idx) {
2748  const auto result_row = result_rows->getRowAtNoTranslations(idx);
2749  if (!result_row.empty()) {
2750  size_t target_row = row_idx.fetch_add(1);
2751 
2752  if (target_row >= num_rows_to_process) {
2753  break;
2754  }
2755 
2756  for (unsigned int col = 0; col < num_cols; col++) {
2757  const auto& mapd_variant = result_row[col];
2758  value_converters[col]->convertToColumnarFormat(target_row, &mapd_variant);
2759  }
2760  }
2761  }
2762 
2763  thread_start_idx[thread_id] = idx;
2764  };
2765 
2766  auto single_threaded_convert_function = [&result_rows,
2767  &value_converters,
2768  &row_idx,
2769  &num_rows_to_process,
2770  &thread_start_idx,
2771  &thread_end_idx](const int thread_id) {
2772  const int num_cols = value_converters.size();
2773  const size_t start = thread_start_idx[thread_id];
2774  const size_t end = thread_end_idx[thread_id];
2775  size_t idx = 0;
2776  for (idx = start; idx < end; ++idx) {
2777  size_t target_row = row_idx.fetch_add(1);
2778 
2779  if (target_row >= num_rows_to_process) {
2780  break;
2781  }
2782  const auto result_row = result_rows->getNextRow(false, false);
2783  CHECK(!result_row.empty());
2784  for (unsigned int col = 0; col < num_cols; col++) {
2785  const auto& mapd_variant = result_row[col];
2786  value_converters[col]->convertToColumnarFormat(target_row, &mapd_variant);
2787  }
2788  }
2789 
2790  thread_start_idx[thread_id] = idx;
2791  };
2792 
2793  if (can_go_parallel) {
2794  const size_t entryCount = result_rows->entryCount();
2795  for (size_t i = 0,
2796  start_entry = 0,
2797  stride = (entryCount + num_worker_threads - 1) / num_worker_threads;
2798  i < num_worker_threads && start_entry < entryCount;
2799  ++i, start_entry += stride) {
2800  const auto end_entry = std::min(start_entry + stride, entryCount);
2801  thread_start_idx[i] = start_entry;
2802  thread_end_idx[i] = end_entry;
2803  }
2804 
2805  } else {
2806  thread_start_idx[0] = 0;
2807  thread_end_idx[0] = result_rows->entryCount();
2808  }
2809 
2810  std::shared_ptr<Executor> executor;
2811 
2814  }
2815 
2816  while (start_row < num_rows) {
2817  try {
2818  value_converters.clear();
2819  row_idx = 0;
2820  int colNum = 0;
2821  for (const auto targetDescriptor : target_column_descriptors) {
2822  auto sourceDataMetaInfo = res.targets_meta[colNum++];
2823 
2825  num_rows_to_process,
2826  catalog,
2827  sourceDataMetaInfo,
2828  targetDescriptor,
2829  targetDescriptor->columnType,
2830  !targetDescriptor->columnType.get_notnull(),
2831  result_rows->getRowSetMemOwner()->getLiteralStringDictProxy(),
2833  ? executor->getStringDictionaryProxy(
2834  sourceDataMetaInfo.get_type_info().get_comp_param(),
2835  result_rows->getRowSetMemOwner(),
2836  true)
2837  : nullptr};
2838  auto converter = factory.create(param);
2839  value_converters.push_back(std::move(converter));
2840  }
2841 
2842  if (can_go_parallel) {
2843  std::vector<std::future<void>> worker_threads;
2844  for (int i = 0; i < num_worker_threads; ++i) {
2845  worker_threads.push_back(
2846  std::async(std::launch::async, convert_function, i));
2847  }
2848 
2849  for (auto& child : worker_threads) {
2850  child.wait();
2851  }
2852  for (auto& child : worker_threads) {
2853  child.get();
2854  }
2855 
2856  } else {
2857  single_threaded_convert_function(0);
2858  }
2859 
2860  // finalize the insert data
2861  {
2862  auto finalizer_func =
2863  [](std::unique_ptr<TargetValueConverter>::pointer targetValueConverter) {
2864  targetValueConverter->finalizeDataBlocksForInsertData();
2865  };
2866  std::vector<std::future<void>> worker_threads;
2867  for (auto& converterPtr : value_converters) {
2868  worker_threads.push_back(
2869  std::async(std::launch::async, finalizer_func, converterPtr.get()));
2870  }
2871 
2872  for (auto& child : worker_threads) {
2873  child.wait();
2874  }
2875  for (auto& child : worker_threads) {
2876  child.get();
2877  }
2878  }
2879 
2881  insert_data.databaseId = catalog.getCurrentDB().dbId;
2882  CHECK(td);
2883  insert_data.tableId = td->tableId;
2884  insert_data.numRows = num_rows_to_process;
2885 
2886  for (int col_idx = 0; col_idx < target_column_descriptors.size(); col_idx++) {
2887  value_converters[col_idx]->addDataBlocksToInsertData(insert_data);
2888  }
2889 
2890  insertDataLoader.insertData(*session, insert_data);
2891  } catch (...) {
2892  try {
2893  if (td->nShards) {
2894  const auto shard_tables = catalog.getPhysicalTablesDescriptors(td);
2895  for (const auto ptd : shard_tables) {
2896  leafs_connector_->rollback(*session, ptd->tableId);
2897  }
2898  }
2899  leafs_connector_->rollback(*session, td->tableId);
2900  } catch (...) {
2901  // eat it
2902  }
2903  throw;
2904  }
2905  start_row += num_rows_to_process;
2906  num_rows_to_process =
2907  std::min(num_rows - start_row, max_number_of_rows_per_package);
2908  }
2909  }
2910  }
2911 
2912  if (!is_temporary) {
2913  if (td->nShards) {
2914  const auto shard_tables = catalog.getPhysicalTablesDescriptors(td);
2915  for (const auto ptd : shard_tables) {
2916  leafs_connector_->checkpoint(*session, ptd->tableId);
2917  }
2918  }
2919  leafs_connector_->checkpoint(*session, td->tableId);
2920  }
2921 }
2922 
2923 void InsertIntoTableAsSelectStmt::execute(const Catalog_Namespace::SessionInfo& session) {
2924  auto session_copy = session;
2925  auto session_ptr = std::shared_ptr<Catalog_Namespace::SessionInfo>(
2926  &session_copy, boost::null_deleter());
2927  auto query_state = query_state::QueryState::create(session_ptr, select_query_);
2928  auto stdlog = STDLOG(query_state);
2929  populateData(query_state->createQueryStateProxy(), true);
2930 }
2931 
2932 void CreateTableAsSelectStmt::execute(const Catalog_Namespace::SessionInfo& session) {
2933  auto session_copy = session;
2934  auto session_ptr = std::shared_ptr<Catalog_Namespace::SessionInfo>(
2935  &session_copy, boost::null_deleter());
2936  auto query_state = query_state::QueryState::create(session_ptr, select_query_);
2937  auto stdlog = STDLOG(query_state);
2938 
2939  LocalConnector local_connector;
2940  auto& catalog = session.getCatalog();
2941  bool create_table = nullptr == leafs_connector_;
2942 
2943  if (create_table) {
2944  // check access privileges
2947  throw std::runtime_error("CTAS failed. Table " + table_name_ +
2948  " will not be created. User has no create privileges.");
2949  }
2950 
2951  if (catalog.getMetadataForTable(table_name_) != nullptr) {
2952  if (if_not_exists_) {
2953  return;
2954  }
2955  throw std::runtime_error("Table " + table_name_ +
2956  " already exists and no data was loaded.");
2957  }
2958 
2959  // only validate the select query so we get the target types
2960  // correctly, but do not populate the result set
2961  auto result = local_connector.query(
2962  query_state->createQueryStateProxy(), select_query_, {}, true);
2963  const auto column_descriptors_for_create =
2964  local_connector.getColumnDescriptors(result, true);
2965 
2966  // some validation as the QE might return some out of range column types
2967  for (auto& cd : column_descriptors_for_create) {
2968  if (cd.columnType.is_decimal() && cd.columnType.get_precision() > 18) {
2969  throw std::runtime_error(cd.columnName + ": Precision too high, max 18.");
2970  }
2971  }
2972 
2973  TableDescriptor td;
2974  td.tableName = table_name_;
2975  td.userId = session.get_currentUser().userId;
2976  td.nColumns = column_descriptors_for_create.size();
2977  td.isView = false;
2978  td.fragmenter = nullptr;
2984  if (is_temporary_) {
2986  } else {
2988  }
2989 
2990  bool use_shared_dictionaries = true;
2991 
2992  if (!storage_options_.empty()) {
2993  for (auto& p : storage_options_) {
2994  if (boost::to_lower_copy<std::string>(*p->get_name()) ==
2995  "use_shared_dictionaries") {
2996  const StringLiteral* literal =
2997  dynamic_cast<const StringLiteral*>(p->get_value());
2998  if (nullptr == literal) {
2999  throw std::runtime_error(
3000  "USE_SHARED_DICTIONARIES must be a string parameter");
3001  }
3002  std::string val = boost::to_lower_copy<std::string>(*literal->get_stringval());
3003  use_shared_dictionaries = val == "true" || val == "1" || val == "t";
3004  } else {
3005  get_table_definitions_for_ctas(td, p, column_descriptors_for_create);
3006  }
3007  }
3008  }
3009 
3010  std::vector<SharedDictionaryDef> sharedDictionaryRefs;
3011 
3012  if (use_shared_dictionaries) {
3013  const auto source_column_descriptors =
3014  local_connector.getColumnDescriptors(result, false);
3015  const auto mapping = catalog.getDictionaryToColumnMapping();
3016 
3017  for (auto& source_cd : source_column_descriptors) {
3018  const auto& ti = source_cd.columnType;
3019  if (ti.is_string()) {
3020  if (ti.get_compression() == kENCODING_DICT) {
3021  int dict_id = ti.get_comp_param();
3022  auto it = mapping.find(dict_id);
3023  if (mapping.end() != it) {
3024  const auto targetColumn = it->second;
3025  auto targetTable =
3026  catalog.getMetadataForTable(targetColumn->tableId, false);
3027  CHECK(targetTable);
3028  LOG(INFO) << "CTAS: sharing text dictionary on column "
3029  << source_cd.columnName << " with " << targetTable->tableName
3030  << "." << targetColumn->columnName;
3031  sharedDictionaryRefs.push_back(
3032  SharedDictionaryDef(source_cd.columnName,
3033  targetTable->tableName,
3034  targetColumn->columnName));
3035  }
3036  }
3037  }
3038  }
3039  }
3040 
3041  // currently no means of defining sharding in CTAS
3042  td.keyMetainfo = serialize_key_metainfo(nullptr, sharedDictionaryRefs);
3043 
3044  catalog.createTable(td, column_descriptors_for_create, sharedDictionaryRefs, true);
3045  // TODO (max): It's transactionally unsafe, should be fixed: we may create object
3046  // w/o privileges
3047  SysCatalog::instance().createDBObject(
3048  session.get_currentUser(), td.tableName, TableDBObjectType, catalog);
3049  }
3050 
3051  try {
3052  populateData(query_state->createQueryStateProxy(), false);
3053  } catch (...) {
3054  if (!g_cluster) {
3055  const TableDescriptor* created_td = catalog.getMetadataForTable(table_name_);
3056  if (created_td) {
3057  catalog.dropTable(created_td);
3058  }
3059  }
3060  throw;
3061  }
3062 }
3063 
3064 void DropTableStmt::execute(const Catalog_Namespace::SessionInfo& session) {
3065  auto& catalog = session.getCatalog();
3066 
3067  // TODO(adb): the catalog should be handling this locking.
3068  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
3071 
3072  const TableDescriptor* td{nullptr};
3073  std::unique_ptr<lockmgr::TableSchemaLockContainer<lockmgr::WriteLock>> td_with_lock;
3074 
3075  try {
3076  td_with_lock =
3077  std::make_unique<lockmgr::TableSchemaLockContainer<lockmgr::WriteLock>>(
3079  catalog, *table, false));
3080  td = (*td_with_lock)();
3081  } catch (const std::runtime_error& e) {
3082  if (if_exists) {
3083  return;
3084  } else {
3085  throw e;
3086  }
3087  }
3088 
3089  CHECK(td);
3090  CHECK(td_with_lock);
3091 
3092  // check access privileges
3093  if (!session.checkDBAccessPrivileges(
3095  throw std::runtime_error("Table " + *table +
3096  " will not be dropped. User has no proper privileges.");
3097  }
3098 
3100 
3101  auto table_data_write_lock =
3103  catalog.dropTable(td);
3104 
3105  // invalidate cached hashtable
3107 }
3108 
3109 void TruncateTableStmt::execute(const Catalog_Namespace::SessionInfo& session) {
3110  auto& catalog = session.getCatalog();
3111 
3112  // TODO: Removal of the FileMgr is not thread safe. Take a global system write lock when
3113  // truncating a table
3114  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
3117 
3118  const auto td_with_lock =
3120  catalog, *table, true);
3121  const auto td = td_with_lock();
3122  if (!td) {
3123  throw std::runtime_error("Table " + *table + " does not exist.");
3124  }
3125 
3126  // check access privileges
3127  std::vector<DBObject> privObjects;
3128  DBObject dbObject(*table, TableDBObjectType);
3129  dbObject.loadKey(catalog);
3131  privObjects.push_back(dbObject);
3132  if (!SysCatalog::instance().checkPrivileges(session.get_currentUser(), privObjects)) {
3133  throw std::runtime_error("Table " + *table + " will not be truncated. User " +
3134  session.get_currentUser().userName +
3135  " has no proper privileges.");
3136  }
3137 
3138  if (td->isView) {
3139  throw std::runtime_error(*table + " is a view. Cannot Truncate.");
3140  }
3141  auto table_data_write_lock =
3143  catalog.truncateTable(td);
3144 
3145  // invalidate cached hashtable
3147 }
3148 
3150  const TableDescriptor* td) {
3151  if (session.get_currentUser().isSuper ||
3152  session.get_currentUser().userId == td->userId) {
3153  return;
3154  }
3155  std::vector<DBObject> privObjects;
3156  DBObject dbObject(td->tableName, TableDBObjectType);
3157  dbObject.loadKey(session.getCatalog());
3159  privObjects.push_back(dbObject);
3160  if (!SysCatalog::instance().checkPrivileges(session.get_currentUser(), privObjects)) {
3161  throw std::runtime_error("Current user does not have the privilege to alter table: " +
3162  td->tableName);
3163  }
3164 }
3165 
3166 void RenameUserStmt::execute(const Catalog_Namespace::SessionInfo& session) {
3167  if (!session.get_currentUser().isSuper) {
3168  throw std::runtime_error("Only a super user can rename users.");
3169  }
3170 
3172  if (!SysCatalog::instance().getMetadataForUser(*username_, user)) {
3173  throw std::runtime_error("User " + *username_ + " does not exist.");
3174  }
3175 
3176  SysCatalog::instance().renameUser(*username_, *new_username_);
3177 }
3178 
3179 void RenameDatabaseStmt::execute(const Catalog_Namespace::SessionInfo& session) {
3181 
3182  // TODO: use database lock instead
3183  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
3186 
3187  if (!SysCatalog::instance().getMetadataForDB(*database_name_, db)) {
3188  throw std::runtime_error("Database " + *database_name_ + " does not exist.");
3189  }
3190 
3191  if (!session.get_currentUser().isSuper &&
3192  session.get_currentUser().userId != db.dbOwner) {
3193  throw std::runtime_error("Only a super user or the owner can rename the database.");
3194  }
3195 
3196  SysCatalog::instance().renameDatabase(*database_name_, *new_database_name_);
3197 }
3198 
3199 void RenameTableStmt::execute(const Catalog_Namespace::SessionInfo& session) {
3200  auto& catalog = session.getCatalog();
3201 
3202  // TODO(adb): the catalog should be handling this locking (see AddColumStmt)
3203  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
3206 
3207  const auto td_with_lock =
3209  catalog, *table, false);
3210  const auto td = td_with_lock();
3211  CHECK(td);
3212 
3213  check_alter_table_privilege(session, td);
3214  if (catalog.getMetadataForTable(*new_table_name) != nullptr) {
3215  throw std::runtime_error("Table or View " + *new_table_name + " already exists.");
3216  }
3217  catalog.renameTable(td, *new_table_name);
3218 }
3219 
3220 void DDLStmt::setColumnDescriptor(ColumnDescriptor& cd, const ColumnDef* coldef) {
3221  bool not_null;
3222  const ColumnConstraintDef* cc = coldef->get_column_constraint();
3223  if (cc == nullptr) {
3224  not_null = false;
3225  } else {
3226  not_null = cc->get_notnull();
3227  }
3229  cd,
3230  coldef->get_column_type(),
3231  not_null,
3232  coldef->get_compression());
3233 }
3234 
3235 void AddColumnStmt::check_executable(const Catalog_Namespace::SessionInfo& session,
3236  const TableDescriptor* td) {
3237  auto& catalog = session.getCatalog();
3238  if (!td) {
3239  throw std::runtime_error("Table " + *table + " does not exist.");
3240  } else {
3241  if (td->isView) {
3242  throw std::runtime_error("Adding columns to a view is not supported.");
3243  }
3244  if (table_is_temporary(td)) {
3245  throw std::runtime_error(
3246  "Adding columns to temporary tables is not yet supported.");
3247  }
3248  };
3249 
3250  check_alter_table_privilege(session, td);
3251 
3252  if (0 == coldefs.size()) {
3253  coldefs.push_back(std::move(coldef));
3254  }
3255 
3256  for (const auto& coldef : coldefs) {
3257  auto& new_column_name = *coldef->get_column_name();
3258  if (catalog.getMetadataForColumn(td->tableId, new_column_name) != nullptr) {
3259  throw std::runtime_error("Column " + new_column_name + " already exists.");
3260  }
3261  }
3262 }
3263 
3264 void AddColumnStmt::execute(const Catalog_Namespace::SessionInfo& session) {
3265  auto& catalog = session.getCatalog();
3266 
3267  // TODO(adb): the catalog should be handling this locking.
3268  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
3271 
3272  const auto td_with_lock =
3274  catalog, *table, true);
3275  const auto td = td_with_lock();
3276 
3277  check_executable(session, td);
3278 
3279  CHECK(td->fragmenter);
3280  if (std::dynamic_pointer_cast<Fragmenter_Namespace::SortedOrderFragmenter>(
3281  td->fragmenter)) {
3282  throw std::runtime_error(
3283  "Adding columns to a table is not supported when using the \"sort_column\" "
3284  "option.");
3285  }
3286 
3287  // Do not take a data write lock, as the fragmenter may call `deleteFragments` during
3288  // a cap operation. Note that the schema write lock will prevent concurrent inserts
3289  // along with all other queries.
3290 
3291  catalog.getSqliteConnector().query("BEGIN TRANSACTION");
3292  try {
3293  std::map<const std::string, const ColumnDescriptor> cds;
3294  std::map<const int, const ColumnDef*> cid_coldefs;
3295  for (const auto& coldef : coldefs) {
3296  ColumnDescriptor cd;
3297  setColumnDescriptor(cd, coldef.get());
3298  catalog.addColumn(*td, cd);
3299  cds.emplace(*coldef->get_column_name(), cd);
3300  cid_coldefs.emplace(cd.columnId, coldef.get());
3301 
3302  // expand geo column to phy columns
3303  if (cd.columnType.is_geometry()) {
3304  std::list<ColumnDescriptor> phy_geo_columns;
3305  catalog.expandGeoColumn(cd, phy_geo_columns);
3306  for (auto& cd : phy_geo_columns) {
3307  catalog.addColumn(*td, cd);
3308  cds.emplace(cd.columnName, cd);
3309  cid_coldefs.emplace(cd.columnId, nullptr);
3310  }
3311  }
3312  }
3313 
3314  std::unique_ptr<import_export::Loader> loader(new import_export::Loader(catalog, td));
3315  auto import_buffers = import_export::setup_column_loaders(td, loader.get());
3316  loader->setReplicating(true);
3317 
3318  // set_geo_physical_import_buffer below needs a sorted import_buffers
3319  std::sort(import_buffers.begin(),
3320  import_buffers.end(),
3321  [](decltype(import_buffers[0])& a, decltype(import_buffers[0])& b) {
3322  return a->getColumnDesc()->columnId < b->getColumnDesc()->columnId;
3323  });
3324 
3325  size_t nrows = td->fragmenter->getNumRows();
3326  // if sharded, get total nrows from all sharded tables
3327  if (td->nShards > 0) {
3328  const auto physical_tds = catalog.getPhysicalTablesDescriptors(td);
3329  nrows = 0;
3330  std::for_each(physical_tds.begin(), physical_tds.end(), [&nrows](const auto& td) {
3331  nrows += td->fragmenter->getNumRows();
3332  });
3333  }
3334  if (nrows > 0) {
3335  int skip_physical_cols = 0;
3336  for (const auto cit : cid_coldefs) {
3337  const auto cd = catalog.getMetadataForColumn(td->tableId, cit.first);
3338  const auto coldef = cit.second;
3339  const auto column_constraint = coldef ? coldef->get_column_constraint() : nullptr;
3340  std::string defaultval = "";
3341  if (column_constraint) {
3342  auto defaultlp = column_constraint->get_defaultval();
3343  auto defaultsp = dynamic_cast<const StringLiteral*>(defaultlp);
3344  defaultval = defaultsp ? *defaultsp->get_stringval()
3345  : defaultlp ? defaultlp->to_string() : "";
3346  }
3347  bool isnull = column_constraint ? (0 == defaultval.size()) : true;
3348  if (boost::to_upper_copy<std::string>(defaultval) == "NULL") {
3349  isnull = true;
3350  }
3351 
3352  if (isnull) {
3353  if (column_constraint && column_constraint->get_notnull()) {
3354  throw std::runtime_error("Default value required for column " +
3355  cd->columnName + " (NULL value not supported)");
3356  }
3357  }
3358 
3359  for (auto it = import_buffers.begin(); it < import_buffers.end(); ++it) {
3360  auto& import_buffer = *it;
3361  if (cd->columnId == import_buffer->getColumnDesc()->columnId) {
3362  if (coldef != nullptr ||
3363  skip_physical_cols-- <= 0) { // skip non-null phy col
3364  import_buffer->add_value(
3365  cd, defaultval, isnull, import_export::CopyParams(), nrows);
3366  if (cd->columnType.is_geometry()) {
3367  std::vector<double> coords, bounds;
3368  std::vector<int> ring_sizes, poly_rings;
3369  int render_group = 0;
3370  SQLTypeInfo tinfo{cd->columnType};
3372  tinfo,
3373  coords,
3374  bounds,
3375  ring_sizes,
3376  poly_rings,
3377  false)) {
3378  throw std::runtime_error("Bad geometry data: '" + defaultval + "'");
3379  }
3380  size_t col_idx = 1 + std::distance(import_buffers.begin(), it);
3382  cd,
3383  import_buffers,
3384  col_idx,
3385  coords,
3386  bounds,
3387  ring_sizes,
3388  poly_rings,
3389  render_group,
3390  nrows);
3391  // skip following phy cols
3392  skip_physical_cols = cd->columnType.get_physical_cols();
3393  }
3394  }
3395  break;
3396  }
3397  }
3398  }
3399  }
3400 
3401  if (!loader->loadNoCheckpoint(import_buffers, nrows)) {
3402  throw std::runtime_error("loadNoCheckpoint failed!");
3403  }
3404  catalog.roll(true);
3405  loader->checkpoint();
3406  catalog.getSqliteConnector().query("END TRANSACTION");
3407  } catch (...) {
3408  catalog.roll(false);
3409  catalog.getSqliteConnector().query("ROLLBACK TRANSACTION");
3410  throw;
3411  }
3412 }
3413 
3414 void DropColumnStmt::execute(const Catalog_Namespace::SessionInfo& session) {
3415  auto& catalog = session.getCatalog();
3416 
3417  // TODO(adb): the catalog should be handling this locking.
3418  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
3421 
3422  const auto td_with_lock =
3424  catalog, *table, true);
3425  const auto td = td_with_lock();
3426  if (!td) {
3427  throw std::runtime_error("Table " + *table + " does not exist.");
3428  }
3429  if (td->isView) {
3430  throw std::runtime_error("Dropping a column from a view is not supported.");
3431  }
3432  if (table_is_temporary(td)) {
3433  throw std::runtime_error(
3434  "Dropping a column from a temporary table is not yet supported.");
3435  }
3436 
3437  check_alter_table_privilege(session, td);
3438 
3439  for (const auto& column : columns) {
3440  if (nullptr == catalog.getMetadataForColumn(td->tableId, *column)) {
3441  throw std::runtime_error("Column " + *column + " does not exist.");
3442  }
3443  }
3444 
3445  if (td->nColumns <= (td->hasDeletedCol ? 3 : 2)) {
3446  throw std::runtime_error("Table " + *table + " has only one column.");
3447  }
3448 
3449  catalog.getSqliteConnector().query("BEGIN TRANSACTION");
3450  try {
3451  std::vector<int> columnIds;
3452  for (const auto& column : columns) {
3453  ColumnDescriptor cd = *catalog.getMetadataForColumn(td->tableId, *column);
3454  if (td->nShards > 0 && td->shardedColumnId == cd.columnId) {
3455  throw std::runtime_error("Dropping sharding column " + cd.columnName +
3456  " is not supported.");
3457  }
3458  catalog.dropColumn(*td, cd);
3459  columnIds.push_back(cd.columnId);
3460  for (int i = 0; i < cd.columnType.get_physical_cols(); i++) {
3461  const auto pcd = catalog.getMetadataForColumn(td->tableId, cd.columnId + i + 1);
3462  CHECK(pcd);
3463  catalog.dropColumn(*td, *pcd);
3464  columnIds.push_back(cd.columnId + i + 1);
3465  }
3466  }
3467 
3468  for (auto shard : catalog.getPhysicalTablesDescriptors(td)) {
3469  shard->fragmenter->dropColumns(columnIds);
3470  }
3471  // if test forces to rollback
3473  throw std::runtime_error("lol!");
3474  }
3475  catalog.roll(true);
3476  if (td->persistenceLevel == Data_Namespace::MemoryLevel::DISK_LEVEL) {
3477  catalog.checkpoint(td->tableId);
3478  }
3479  catalog.getSqliteConnector().query("END TRANSACTION");
3480  } catch (...) {
3481  catalog.setForReload(td->tableId);
3482  catalog.roll(false);
3483  catalog.getSqliteConnector().query("ROLLBACK TRANSACTION");
3484  throw;
3485  }
3486 
3487  // invalidate cached hashtable
3489 }
3490 
3491 void RenameColumnStmt::execute(const Catalog_Namespace::SessionInfo& session) {
3492  auto& catalog = session.getCatalog();
3493 
3494  const auto td_with_lock =
3496  catalog, *table, false);
3497  const auto td = td_with_lock();
3498  CHECK(td);
3499 
3500  check_alter_table_privilege(session, td);
3501  const ColumnDescriptor* cd = catalog.getMetadataForColumn(td->tableId, *column);
3502  if (cd == nullptr) {
3503  throw std::runtime_error("Column " + *column + " does not exist.");
3504  }
3505  if (catalog.getMetadataForColumn(td->tableId, *new_column_name) != nullptr) {
3506  throw std::runtime_error("Column " + *new_column_name + " already exists.");
3507  }
3508  catalog.renameColumn(td, cd, *new_column_name);
3509 }
3510 
3511 void CopyTableStmt::execute(const Catalog_Namespace::SessionInfo& session) {
3512  auto importer_factory = [](Catalog_Namespace::Catalog& catalog,
3513  const TableDescriptor* td,
3514  const std::string& file_path,
3515  const import_export::CopyParams& copy_params) {
3516  return std::make_unique<import_export::Importer>(catalog, td, file_path, copy_params);
3517  };
3518  return execute(session, importer_factory);
3519 }
3520 
3521 void CopyTableStmt::execute(const Catalog_Namespace::SessionInfo& session,
3522  const std::function<std::unique_ptr<import_export::Importer>(
3524  const TableDescriptor*,
3525  const std::string&,
3526  const import_export::CopyParams&)>& importer_factory) {
3527  boost::regex non_local_file_regex{R"(^\s*(s3|http|https)://.+)",
3528  boost::regex::extended | boost::regex::icase};
3529  if (!boost::regex_match(*file_pattern, non_local_file_regex)) {
3532  }
3533 
3534  size_t rows_completed = 0;
3535  size_t rows_rejected = 0;
3536  size_t total_time = 0;
3537  bool load_truncated = false;
3538 
3539  // Prevent simultaneous import / truncate (see TruncateTableStmt::execute)
3540  const auto execute_read_lock = mapd_shared_lock<mapd_shared_mutex>(
3543 
3544  const TableDescriptor* td{nullptr};
3545  std::unique_ptr<lockmgr::TableSchemaLockContainer<lockmgr::ReadLock>> td_with_lock;
3546  std::unique_ptr<lockmgr::WriteLock> insert_data_lock;
3547 
3548  auto& catalog = session.getCatalog();
3549 
3550  try {
3551  td_with_lock = std::make_unique<lockmgr::TableSchemaLockContainer<lockmgr::ReadLock>>(
3553  catalog, *table));
3554  td = (*td_with_lock)();
3555  insert_data_lock = std::make_unique<lockmgr::WriteLock>(
3557  } catch (const std::runtime_error& e) {
3558  // noop
3559  // TODO(adb): We're really only interested in whether the table exists or not.
3560  // Create a more refined exception.
3561  }
3562 
3563  // if the table already exists, it's locked, so check access privileges
3564  if (td) {
3565  std::vector<DBObject> privObjects;
3566  DBObject dbObject(*table, TableDBObjectType);
3567  dbObject.loadKey(catalog);
3569  privObjects.push_back(dbObject);
3570  if (!SysCatalog::instance().checkPrivileges(session.get_currentUser(), privObjects)) {
3571  throw std::runtime_error("Violation of access privileges: user " +
3572  session.get_currentUser().userName +
3573  " has no insert privileges for table " + *table + ".");
3574  }
3575  }
3576 
3577  // since we'll have not only posix file names but also s3/hdfs/... url
3578  // we do not expand wildcard or check file existence here.
3579  // from here on, file_path contains something which may be a url
3580  // or a wildcard of file names;
3581  std::string file_path = *file_pattern;
3582  import_export::CopyParams copy_params;
3583  if (!options.empty()) {
3584  for (auto& p : options) {
3585  if (boost::iequals(*p->get_name(), "max_reject")) {
3586  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
3587  if (int_literal == nullptr) {
3588  throw std::runtime_error("max_reject option must be an integer.");
3589  }
3590  copy_params.max_reject = int_literal->get_intval();
3591  } else if (boost::iequals(*p->get_name(), "buffer_size")) {
3592  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
3593  if (int_literal == nullptr) {
3594  throw std::runtime_error("buffer_size option must be an integer.");
3595  }
3596  copy_params.buffer_size = std::max<size_t>(1 << 20, int_literal->get_intval());
3597  } else if (boost::iequals(*p->get_name(), "threads")) {
3598  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
3599  if (int_literal == nullptr) {
3600  throw std::runtime_error("Threads option must be an integer.");
3601  }
3602  copy_params.threads = int_literal->get_intval();
3603  } else if (boost::iequals(*p->get_name(), "delimiter")) {
3604  const StringLiteral* str_literal =
3605  dynamic_cast<const StringLiteral*>(p->get_value());
3606  if (str_literal == nullptr) {
3607  throw std::runtime_error("Delimiter option must be a string.");
3608  } else if (str_literal->get_stringval()->length() != 1) {
3609  throw std::runtime_error("Delimiter must be a single character string.");
3610  }
3611  copy_params.delimiter = (*str_literal->get_stringval())[0];
3612  } else if (boost::iequals(*p->get_name(), "nulls")) {
3613  const StringLiteral* str_literal =
3614  dynamic_cast<const StringLiteral*>(p->get_value());
3615  if (str_literal == nullptr) {
3616  throw std::runtime_error("Nulls option must be a string.");
3617  }
3618  copy_params.null_str = *str_literal->get_stringval();
3619  } else if (boost::iequals(*p->get_name(), "header")) {
3620  const StringLiteral* str_literal =
3621  dynamic_cast<const StringLiteral*>(p->get_value());
3622  if (str_literal == nullptr) {
3623  throw std::runtime_error("Header option must be a boolean.");
3624  }
3625  copy_params.has_header = bool_from_string_literal(str_literal)
3628 #ifdef ENABLE_IMPORT_PARQUET
3629  } else if (boost::iequals(*p->get_name(), "parquet")) {
3630  const StringLiteral* str_literal =
3631  dynamic_cast<const StringLiteral*>(p->get_value());
3632  if (str_literal == nullptr) {
3633  throw std::runtime_error("Parquet option must be a boolean.");
3634  }
3635  if (bool_from_string_literal(str_literal)) {
3636  // not sure a parquet "table" type is proper, but to make code
3637  // look consistent in some places, let's set "table" type too
3638  copy_params.file_type = import_export::FileType::PARQUET;
3639  }
3640 #endif // ENABLE_IMPORT_PARQUET
3641  } else if (boost::iequals(*p->get_name(), "s3_access_key")) {
3642  const StringLiteral* str_literal =
3643  dynamic_cast<const StringLiteral*>(p->get_value());
3644  if (str_literal == nullptr) {
3645  throw std::runtime_error("Option s3_access_key must be a string.");
3646  }
3647  copy_params.s3_access_key = *str_literal->get_stringval();
3648  } else if (boost::iequals(*p->get_name(), "s3_secret_key")) {
3649  const StringLiteral* str_literal =
3650  dynamic_cast<const StringLiteral*>(p->get_value());
3651  if (str_literal == nullptr) {
3652  throw std::runtime_error("Option s3_secret_key must be a string.");
3653  }
3654  copy_params.s3_secret_key = *str_literal->get_stringval();
3655  } else if (boost::iequals(*p->get_name(), "s3_region")) {
3656  const StringLiteral* str_literal =
3657  dynamic_cast<const StringLiteral*>(p->get_value());
3658  if (str_literal == nullptr) {
3659  throw std::runtime_error("Option s3_region must be a string.");
3660  }
3661  copy_params.s3_region = *str_literal->get_stringval();
3662  } else if (boost::iequals(*p->get_name(), "s3_endpoint")) {
3663  const StringLiteral* str_literal =
3664  dynamic_cast<const StringLiteral*>(p->get_value());
3665  if (str_literal == nullptr) {
3666  throw std::runtime_error("Option s3_endpoint must be a string.");
3667  }
3668  copy_params.s3_endpoint = *str_literal->get_stringval();
3669  } else if (boost::iequals(*p->get_name(), "quote")) {
3670  const StringLiteral* str_literal =
3671  dynamic_cast<const StringLiteral*>(p->get_value());
3672  if (str_literal == nullptr) {
3673  throw std::runtime_error("Quote option must be a string.");
3674  } else if (str_literal->get_stringval()->length() != 1) {
3675  throw std::runtime_error("Quote must be a single character string.");
3676  }
3677  copy_params.quote = (*str_literal->get_stringval())[0];
3678  } else if (boost::iequals(*p->get_name(), "escape")) {
3679  const StringLiteral* str_literal =
3680  dynamic_cast<const StringLiteral*>(p->get_value());
3681  if (str_literal == nullptr) {
3682  throw std::runtime_error("Escape option must be a string.");
3683  } else if (str_literal->get_stringval()->length() != 1) {
3684  throw std::runtime_error("Escape must be a single character string.");
3685  }
3686  copy_params.escape = (*str_literal->get_stringval())[0];
3687  } else if (boost::iequals(*p->get_name(), "line_delimiter")) {
3688  const StringLiteral* str_literal =
3689  dynamic_cast<const StringLiteral*>(p->get_value());
3690  if (str_literal == nullptr) {
3691  throw std::runtime_error("Line_delimiter option must be a string.");
3692  } else if (str_literal->get_stringval()->length() != 1) {
3693  throw std::runtime_error("Line_delimiter must be a single character string.");
3694  }
3695  copy_params.line_delim = (*str_literal->get_stringval())[0];
3696  } else if (boost::iequals(*p->get_name(), "quoted")) {
3697  const StringLiteral* str_literal =
3698  dynamic_cast<const StringLiteral*>(p->get_value());
3699  if (str_literal == nullptr) {
3700  throw std::runtime_error("Quoted option must be a boolean.");
3701  }
3702  copy_params.quoted = bool_from_string_literal(str_literal);
3703  } else if (boost::iequals(*p->get_name(), "plain_text")) {
3704  const StringLiteral* str_literal =
3705  dynamic_cast<const StringLiteral*>(p->get_value());
3706  if (str_literal == nullptr) {
3707  throw std::runtime_error("plain_text option must be a boolean.");
3708  }
3709  copy_params.plain_text = bool_from_string_literal(str_literal);
3710  } else if (boost::iequals(*p->get_name(), "array_marker")) {
3711  const StringLiteral* str_literal =
3712  dynamic_cast<const StringLiteral*>(p->get_value());
3713  if (str_literal == nullptr) {
3714  throw std::runtime_error("Array Marker option must be a string.");
3715  } else if (str_literal->get_stringval()->length() != 2) {
3716  throw std::runtime_error(
3717  "Array Marker option must be exactly two characters. Default is {}.");
3718  }
3719  copy_params.array_begin = (*str_literal->get_stringval())[0];
3720  copy_params.array_end = (*str_literal->get_stringval())[1];
3721  } else if (boost::iequals(*p->get_name(), "array_delimiter")) {
3722  const StringLiteral* str_literal =
3723  dynamic_cast<const StringLiteral*>(p->get_value());
3724  if (str_literal == nullptr) {
3725  throw std::runtime_error("Array Delimiter option must be a string.");
3726  } else if (str_literal->get_stringval()->length() != 1) {
3727  throw std::runtime_error("Array Delimiter must be a single character string.");
3728  }
3729  copy_params.array_delim = (*str_literal->get_stringval())[0];
3730  } else if (boost::iequals(*p->get_name(), "lonlat")) {
3731  const StringLiteral* str_literal =
3732  dynamic_cast<const StringLiteral*>(p->get_value());
3733  if (str_literal == nullptr) {
3734  throw std::runtime_error("Lonlat option must be a boolean.");
3735  }
3736  copy_params.lonlat = bool_from_string_literal(str_literal);
3737  } else if (boost::iequals(*p->get_name(), "geo")) {
3738  const StringLiteral* str_literal =
3739  dynamic_cast<const StringLiteral*>(p->get_value());
3740  if (str_literal == nullptr) {
3741  throw std::runtime_error("Geo option must be a boolean.");
3742  }
3743  copy_params.file_type = bool_from_string_literal(str_literal)
3746  } else if (boost::iequals(*p->get_name(), "geo_coords_type")) {
3747  const StringLiteral* str_literal =
3748  dynamic_cast<const StringLiteral*>(p->get_value());
3749  if (str_literal == nullptr) {
3750  throw std::runtime_error("'geo_coords_type' option must be a string");
3751  }
3752  const std::string* s = str_literal->get_stringval();
3753  if (boost::iequals(*s, "geography")) {
3754  throw std::runtime_error(
3755  "GEOGRAPHY coords type not yet supported. Please use GEOMETRY.");
3756  // copy_params.geo_coords_type = kGEOGRAPHY;
3757  } else if (boost::iequals(*s, "geometry")) {
3758  copy_params.geo_coords_type = kGEOMETRY;
3759  } else {
3760  throw std::runtime_error(
3761  "Invalid string for 'geo_coords_type' option (must be 'GEOGRAPHY' or "
3762  "'GEOMETRY'): " +
3763  *s);
3764  }
3765  } else if (boost::iequals(*p->get_name(), "geo_coords_encoding")) {
3766  const StringLiteral* str_literal =
3767  dynamic_cast<const StringLiteral*>(p->get_value());
3768  if (str_literal == nullptr) {
3769  throw std::runtime_error("'geo_coords_encoding' option must be a string");
3770  }
3771  const std::string* s = str_literal->get_stringval();
3772  if (boost::iequals(*s, "none")) {
3773  copy_params.geo_coords_encoding = kENCODING_NONE;
3774  copy_params.geo_coords_comp_param = 0;
3775  } else if (boost::iequals(*s, "compressed(32)")) {
3776  copy_params.geo_coords_encoding = kENCODING_GEOINT;
3777  copy_params.geo_coords_comp_param = 32;
3778  } else {
3779  throw std::runtime_error(
3780  "Invalid string for 'geo_coords_encoding' option (must be 'NONE' or "
3781  "'COMPRESSED(32)'): " +
3782  *s);
3783  }
3784  } else if (boost::iequals(*p->get_name(), "geo_coords_srid")) {
3785  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
3786  if (int_literal == nullptr) {
3787  throw std::runtime_error("'geo_coords_srid' option must be an integer");
3788  }
3789  const int srid = int_literal->get_intval();
3790  if (srid == 4326 || srid == 3857 || srid == 900913) {
3791  copy_params.geo_coords_srid = srid;
3792  } else {
3793  throw std::runtime_error(
3794  "Invalid value for 'geo_coords_srid' option (must be 4326, 3857, or "
3795  "900913): " +
3796  std::to_string(srid));
3797  }
3798  } else if (boost::iequals(*p->get_name(), "geo_layer_name")) {
3799  const StringLiteral* str_literal =
3800  dynamic_cast<const StringLiteral*>(p->get_value());
3801  if (str_literal == nullptr) {
3802  throw std::runtime_error("'geo_layer_name' option must be a string");
3803  }
3804  const std::string* layer_name = str_literal->get_stringval();
3805  if (layer_name) {
3806  copy_params.geo_layer_name = *layer_name;
3807  } else {
3808  throw std::runtime_error("Invalid value for 'geo_layer_name' option");
3809  }
3810  } else if (boost::iequals(*p->get_name(), "partitions")) {
3811  if (copy_params.file_type == import_export::FileType::POLYGON) {
3812  const auto partitions =
3813  static_cast<const StringLiteral*>(p->get_value())->get_stringval();
3814  CHECK(partitions);
3815  const auto partitions_uc = boost::to_upper_copy<std::string>(*partitions);
3816  if (partitions_uc != "REPLICATED") {
3817  throw std::runtime_error("PARTITIONS must be REPLICATED for geo COPY");
3818  }
3819  _geo_copy_from_partitions = partitions_uc;
3820  } else {
3821  throw std::runtime_error("PARTITIONS option not supported for non-geo COPY: " +
3822  *p->get_name());
3823  }
3824  } else if (boost::iequals(*p->get_name(), "geo_assign_render_groups")) {
3825  const StringLiteral* str_literal =
3826  dynamic_cast<const StringLiteral*>(p->get_value());
3827  if (str_literal == nullptr) {
3828  throw std::runtime_error("geo_assign_render_groups option must be a boolean.");
3829  }
3830  copy_params.geo_assign_render_groups = bool_from_string_literal(str_literal);
3831  } else if (boost::iequals(*p->get_name(), "geo_explode_collections")) {
3832  const StringLiteral* str_literal =
3833  dynamic_cast<const StringLiteral*>(p->get_value());
3834  if (str_literal == nullptr) {
3835  throw std::runtime_error("geo_explode_collections option must be a boolean.");
3836  }
3837  copy_params.geo_explode_collections = bool_from_string_literal(str_literal);
3838  } else {
3839  throw std::runtime_error("Invalid option for COPY: " + *p->get_name());
3840  }
3841  }
3842  }
3843 
3844  std::string tr;
3845  if (copy_params.file_type == import_export::FileType::POLYGON) {
3846  // geo import
3847  // we do nothing here, except stash the parameters so we can
3848  // do the import when we unwind to the top of the handler
3849  _geo_copy_from_file_name = file_path;
3850  _geo_copy_from_copy_params = copy_params;
3851  _was_geo_copy_from = true;
3852 
3853  // the result string
3854  // @TODO simon.eves put something more useful in here
3855  // except we really can't because we haven't done the import yet!
3856  if (td) {
3857  tr = std::string("Appending geo to table '") + *table + std::string("'...");
3858  } else {
3859  tr = std::string("Creating table '") + *table +
3860  std::string("' and importing geo...");
3861  }
3862  } else {
3863  if (td) {
3864  CHECK(td_with_lock);
3865 
3866  // regular import
3867  auto importer = importer_factory(catalog, td, file_path, copy_params);
3868  auto ms = measure<>::execution([&]() {
3869  auto res = importer->import();
3870  rows_completed += res.rows_completed;
3871  rows_rejected += res.rows_rejected;
3872  load_truncated = res.load_truncated;
3873  });
3874  total_time += ms;
3875 
3876  // results
3877  if (load_truncated || rows_rejected > copy_params.max_reject) {
3878  LOG(ERROR) << "COPY exited early due to reject records count during multi file "
3879  "processing ";
3880  // if we have crossed the truncated load threshold
3881  load_truncated = true;
3882  success = false;
3883  }
3884  if (!load_truncated) {
3885  tr = std::string("Loaded: " + std::to_string(rows_completed) +
3886  " recs, Rejected: " + std::to_string(rows_rejected) +
3887  " recs in " + std::to_string((double)total_time / 1000.0) +
3888  " secs");
3889  } else {
3890  tr = std::string("Loader truncated due to reject count. Processed : " +
3891  std::to_string(rows_completed) + " recs, Rejected: " +
3892  std::to_string(rows_rejected) + " recs in " +
3893  std::to_string((double)total_time / 1000.0) + " secs");
3894  }
3895  } else {
3896  throw std::runtime_error("Table '" + *table + "' must exist before COPY FROM");
3897  }
3898  }
3899 
3900  return_message.reset(new std::string(tr));
3901  LOG(INFO) << tr;
3902 }
3903 
3904 // CREATE ROLE payroll_dept_role;
3905 void CreateRoleStmt::execute(const Catalog_Namespace::SessionInfo& session) {
3906  const auto& currentUser = session.get_currentUser();
3907  if (!currentUser.isSuper) {
3908  throw std::runtime_error("CREATE ROLE " + get_role() +
3909  " failed. It can only be executed by super user.");
3910  }
3911  SysCatalog::instance().createRole(get_role());
3912 }
3913 
3914 // DROP ROLE payroll_dept_role;
3915 void DropRoleStmt::execute(const Catalog_Namespace::SessionInfo& session) {
3916  const auto& currentUser = session.get_currentUser();
3917  if (!currentUser.isSuper) {
3918  throw std::runtime_error("DROP ROLE " + get_role() +
3919  " failed. It can only be executed by super user.");
3920  }
3921  auto* rl = SysCatalog::instance().getRoleGrantee(get_role());
3922  if (!rl) {
3923  throw std::runtime_error("DROP ROLE " + get_role() +
3924  " failed because role with this name does not exist.");
3925  }
3926  SysCatalog::instance().dropRole(get_role());
3927 }
3928 
3929 std::vector<std::string> splitObjectHierName(const std::string& hierName) {
3930  std::vector<std::string> componentNames;
3931  boost::split(componentNames, hierName, boost::is_any_of("."));
3932  return componentNames;
3933 }
3934 
3935 std::string extractObjectNameFromHierName(const std::string& objectHierName,
3936  const std::string& objectType,
3938  std::string objectName;
3939  std::vector<std::string> componentNames = splitObjectHierName(objectHierName);
3940  if (objectType.compare("DATABASE") == 0) {
3941  if (componentNames.size() == 1) {
3942  objectName = componentNames[0];
3943  } else {
3944  throw std::runtime_error("DB object name is not correct " + objectHierName);
3945  }
3946  } else {
3947  if (objectType.compare("TABLE") == 0 || objectType.compare("DASHBOARD") == 0 ||
3948  objectType.compare("VIEW") == 0 || objectType.compare("SERVER") == 0) {
3949  switch (componentNames.size()) {
3950  case (1): {
3951  objectName = componentNames[0];
3952  break;
3953  }
3954  case (2): {
3955  objectName = componentNames[1];
3956  break;
3957  }
3958  default: {
3959  throw std::runtime_error("DB object name is not correct " + objectHierName);
3960  }
3961  }
3962  } else {
3963  throw std::runtime_error("DB object type " + objectType + " is not supported.");
3964  }
3965  }
3966  return objectName;
3967 }
3968 
3969 static std::pair<AccessPrivileges, DBObjectType> parseStringPrivs(
3970  const std::string& privs,
3971  const DBObjectType& objectType,
3972  const std::string& object_name) {
3973  static const std::map<std::pair<const std::string, const DBObjectType>,
3974  std::pair<const AccessPrivileges, const DBObjectType>>
3975  privileges_lookup{
3976  {{"ALL"s, DatabaseDBObjectType},
3977  {AccessPrivileges::ALL_DATABASE, DatabaseDBObjectType}},
3978  {{"ALL"s, TableDBObjectType}, {AccessPrivileges::ALL_TABLE, TableDBObjectType}},
3979  {{"ALL"s, DashboardDBObjectType},
3980  {AccessPrivileges::ALL_DASHBOARD, DashboardDBObjectType}},
3981  {{"ALL"s, ViewDBObjectType}, {AccessPrivileges::ALL_VIEW, ViewDBObjectType}},
3982  {{"ALL"s, ServerDBObjectType},
3983  {AccessPrivileges::ALL_SERVER, ServerDBObjectType}},
3984 
3985  {{"CREATE TABLE"s, DatabaseDBObjectType},
3986  {AccessPrivileges::CREATE_TABLE, TableDBObjectType}},
3987  {{"CREATE"s, DatabaseDBObjectType},
3988  {AccessPrivileges::CREATE_TABLE, TableDBObjectType}},
3989  {{"SELECT"s, DatabaseDBObjectType},
3990  {AccessPrivileges::SELECT_FROM_TABLE, TableDBObjectType}},
3991  {{"INSERT"s, DatabaseDBObjectType},
3992  {AccessPrivileges::INSERT_INTO_TABLE, TableDBObjectType}},
3993  {{"TRUNCATE"s, DatabaseDBObjectType},
3994  {AccessPrivileges::TRUNCATE_TABLE, TableDBObjectType}},
3995  {{"UPDATE"s, DatabaseDBObjectType},
3996  {AccessPrivileges::UPDATE_IN_TABLE, TableDBObjectType}},
3997  {{"DELETE"s, DatabaseDBObjectType},
3998  {AccessPrivileges::DELETE_FROM_TABLE, TableDBObjectType}},
3999  {{"DROP"s, DatabaseDBObjectType},
4000  {AccessPrivileges::DROP_TABLE, TableDBObjectType}},
4001  {{"ALTER"s, DatabaseDBObjectType},
4002  {AccessPrivileges::ALTER_TABLE, TableDBObjectType}},
4003 
4004  {{"SELECT"s, TableDBObjectType},
4005  {AccessPrivileges::SELECT_FROM_TABLE, TableDBObjectType}},
4006  {{"INSERT"s, TableDBObjectType},
4007  {AccessPrivileges::INSERT_INTO_TABLE, TableDBObjectType}},
4008  {{"TRUNCATE"s, TableDBObjectType},
4009  {AccessPrivileges::TRUNCATE_TABLE, TableDBObjectType}},
4010  {{"UPDATE"s, TableDBObjectType},
4011  {AccessPrivileges::UPDATE_IN_TABLE, TableDBObjectType}},
4012  {{"DELETE"s, TableDBObjectType},
4013  {AccessPrivileges::DELETE_FROM_TABLE, TableDBObjectType}},
4014  {{"DROP"s, TableDBObjectType},
4015  {AccessPrivileges::DROP_TABLE, TableDBObjectType}},
4016  {{"ALTER"s, TableDBObjectType},
4017  {AccessPrivileges::ALTER_TABLE, TableDBObjectType}},
4018 
4019  {{"CREATE VIEW"s, DatabaseDBObjectType},
4020  {AccessPrivileges::CREATE_VIEW, ViewDBObjectType}},
4021  {{"SELECT VIEW"s, DatabaseDBObjectType},
4022  {AccessPrivileges::SELECT_FROM_VIEW, ViewDBObjectType}},
4023  {{"DROP VIEW"s, DatabaseDBObjectType},
4024  {AccessPrivileges::DROP_VIEW, ViewDBObjectType}},
4025  {{"SELECT"s, ViewDBObjectType},
4026  {AccessPrivileges::SELECT_FROM_VIEW, ViewDBObjectType}},
4027  {{"DROP"s, ViewDBObjectType}, {AccessPrivileges::DROP_VIEW, ViewDBObjectType}},
4028 
4029  {{"CREATE DASHBOARD"s, DatabaseDBObjectType},
4030  {AccessPrivileges::CREATE_DASHBOARD, DashboardDBObjectType}},
4031  {{"EDIT DASHBOARD"s, DatabaseDBObjectType},
4032  {AccessPrivileges::EDIT_DASHBOARD, DashboardDBObjectType}},
4033  {{"VIEW DASHBOARD"s, DatabaseDBObjectType},
4034  {AccessPrivileges::VIEW_DASHBOARD, DashboardDBObjectType}},
4035  {{"DELETE DASHBOARD"s, DatabaseDBObjectType},
4036  {AccessPrivileges::DELETE_DASHBOARD, DashboardDBObjectType}},
4037  {{"VIEW"s, DashboardDBObjectType},
4038  {AccessPrivileges::VIEW_DASHBOARD, DashboardDBObjectType}},
4039  {{"EDIT"s, DashboardDBObjectType},
4040  {AccessPrivileges::EDIT_DASHBOARD, DashboardDBObjectType}},
4041  {{"DELETE"s, DashboardDBObjectType},
4042  {AccessPrivileges::DELETE_DASHBOARD, DashboardDBObjectType}},
4043 
4044  {{"CREATE SERVER"s, DatabaseDBObjectType},
4045  {AccessPrivileges::CREATE_SERVER, ServerDBObjectType}},
4046  {{"DROP SERVER"s, DatabaseDBObjectType},
4047  {AccessPrivileges::DROP_SERVER, ServerDBObjectType}},
4048  {{"DROP"s, ServerDBObjectType},
4049  {AccessPrivileges::DROP_SERVER, ServerDBObjectType}},
4050 
4051  {{"VIEW SQL EDITOR"s, DatabaseDBObjectType},
4052  {AccessPrivileges::VIEW_SQL_EDITOR, DatabaseDBObjectType}},
4053  {{"ACCESS"s, DatabaseDBObjectType},
4054  {AccessPrivileges::ACCESS, DatabaseDBObjectType}}};
4055 
4056  auto result = privileges_lookup.find(std::make_pair(privs, objectType));
4057  if (result == privileges_lookup.end()) {
4058  throw std::runtime_error("Privileges " + privs + " on DB object " + object_name +
4059  " are not correct.");
4060  }
4061  return result->second;
4062 }
4063 
4064 static DBObject createObject(const std::string& objectName, DBObjectType objectType) {
4065  if (objectType == DashboardDBObjectType) {
4066  int32_t dashboard_id = -1;
4067  if (!objectName.empty()) {
4068  try {
4069  dashboard_id = stoi(objectName);
4070  } catch (const std::exception&) {
4071  throw std::runtime_error(
4072  "Privileges on dashboards should be changed via integer dashboard ID");
4073  }
4074  }
4075  return DBObject(dashboard_id, objectType);
4076  } else {
4077  return DBObject(objectName, objectType);
4078  }
4079 }
4080 
4081 // GRANT SELECT/INSERT/CREATE ON TABLE payroll_table TO payroll_dept_role;
4082 void GrantPrivilegesStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4083  auto& catalog = session.getCatalog();
4084  const auto& currentUser = session.get_currentUser();
4085  const auto parserObjectType = boost::to_upper_copy<std::string>(get_object_type());
4086  const auto objectName =
4087  extractObjectNameFromHierName(get_object(), parserObjectType, catalog);
4088  auto objectType = DBObjectTypeFromString(parserObjectType);
4089  if (objectType == ServerDBObjectType && !g_enable_fsi) {
4090  throw std::runtime_error("GRANT failed. SERVER object unrecognized.");
4091  }
4092  DBObject dbObject = createObject(objectName, objectType);
4093  /* verify object ownership if not suser */
4094  if (!currentUser.isSuper) {
4095  if (!SysCatalog::instance().verifyDBObjectOwnership(currentUser, dbObject, catalog)) {
4096  throw std::runtime_error(
4097  "GRANT failed. It can only be executed by super user or owner of the "
4098  "object.");
4099  }
4100  }
4101  /* set proper values of privileges & grant them to the object */
4102  std::vector<DBObject> objects(get_privs().size(), dbObject);
4103  for (size_t i = 0; i < get_privs().size(); ++i) {
4104  std::pair<AccessPrivileges, DBObjectType> priv = parseStringPrivs(
4105  boost::to_upper_copy<std::string>(get_privs()[i]), objectType, get_object());
4106  objects[i].setPrivileges(priv.first);
4107  objects[i].setPermissionType(priv.second);
4108  if (priv.second == ServerDBObjectType && !g_enable_fsi) {
4109  throw std::runtime_error("GRANT failed. SERVER object unrecognized.");
4110  }
4111  }
4112  SysCatalog::instance().grantDBObjectPrivilegesBatch(grantees, objects, catalog);
4113 }
4114 
4115 // REVOKE SELECT/INSERT/CREATE ON TABLE payroll_table FROM payroll_dept_role;
4116 void RevokePrivilegesStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4117  auto& catalog = session.getCatalog();
4118  const auto& currentUser = session.get_currentUser();
4119  const auto parserObjectType = boost::to_upper_copy<std::string>(get_object_type());
4120  const auto objectName =
4121  extractObjectNameFromHierName(get_object(), parserObjectType, catalog);
4122  auto objectType = DBObjectTypeFromString(parserObjectType);
4123  if (objectType == ServerDBObjectType && !g_enable_fsi) {
4124  throw std::runtime_error("REVOKE failed. SERVER object unrecognized.");
4125  }
4126  DBObject dbObject = createObject(objectName, objectType);
4127  /* verify object ownership if not suser */
4128  if (!currentUser.isSuper) {
4129  if (!SysCatalog::instance().verifyDBObjectOwnership(currentUser, dbObject, catalog)) {
4130  throw std::runtime_error(
4131  "REVOKE failed. It can only be executed by super user or owner of the "
4132  "object.");
4133  }
4134  }
4135  /* set proper values of privileges & grant them to the object */
4136  std::vector<DBObject> objects(get_privs().size(), dbObject);
4137  for (size_t i = 0; i < get_privs().size(); ++i) {
4138  std::pair<AccessPrivileges, DBObjectType> priv = parseStringPrivs(
4139  boost::to_upper_copy<std::string>(get_privs()[i]), objectType, get_object());
4140  objects[i].setPrivileges(priv.first);
4141  objects[i].setPermissionType(priv.second);
4142  if (priv.second == ServerDBObjectType && !g_enable_fsi) {
4143  throw std::runtime_error("REVOKE failed. SERVER object unrecognized.");
4144  }
4145  }
4146  SysCatalog::instance().revokeDBObjectPrivilegesBatch(grantees, objects, catalog);
4147 }
4148 
4149 // NOTE: not used currently, will we ever use it?
4150 // SHOW ON TABLE payroll_table FOR payroll_dept_role;
4151 void ShowPrivilegesStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4152  auto& catalog = session.getCatalog();
4153  const auto& currentUser = session.get_currentUser();
4154  const auto parserObjectType = boost::to_upper_copy<std::string>(get_object_type());
4155  const auto objectName =
4156  extractObjectNameFromHierName(get_object(), parserObjectType, catalog);
4157  auto objectType = DBObjectTypeFromString(parserObjectType);
4158  DBObject dbObject = createObject(objectName, objectType);
4159  /* verify object ownership if not suser */
4160  if (!currentUser.isSuper) {
4161  if (!SysCatalog::instance().verifyDBObjectOwnership(currentUser, dbObject, catalog)) {
4162  throw std::runtime_error(
4163  "SHOW ON " + get_object() + " FOR " + get_role() +
4164  " failed. It can only be executed by super user or owner of the object.");
4165  }
4166  }
4167  /* get values of privileges for the object and report them */
4168  SysCatalog::instance().getDBObjectPrivileges(get_role(), dbObject, catalog);
4169  AccessPrivileges privs = dbObject.getPrivileges();
4170  printf("\nPRIVILEGES ON %s FOR %s ARE SET AS FOLLOWING: ",
4171  get_object().c_str(),
4172  get_role().c_str());
4173 
4174  if (objectType == DBObjectType::DatabaseDBObjectType) {
4176  printf(" CREATE");
4177  }
4179  printf(" DROP");
4180  }
4181  } else if (objectType == DBObjectType::TableDBObjectType) {
4183  printf(" CREATE");
4184  }
4186  printf(" DROP");
4187  }
4189  printf(" SELECT");
4190  }
4192  printf(" INSERT");
4193  }
4195  printf(" UPDATE");
4196  }
4198  printf(" DELETE");
4199  }
4201  printf(" TRUNCATE");
4202  }
4204  printf(" ALTER");
4205  }
4206  } else if (objectType == DBObjectType::DashboardDBObjectType) {
4208  printf(" CREATE");
4209  }
4211  printf(" DELETE");
4212  }
4214  printf(" VIEW");
4215  }
4217  printf(" EDIT");
4218  }
4219  } else if (objectType == DBObjectType::ViewDBObjectType) {
4221  printf(" CREATE");
4222  }
4224  printf(" DROP");
4225  }
4227  printf(" SELECT");
4228  }
4230  printf(" INSERT");
4231  }
4233  printf(" UPDATE");
4234  }
4236  printf(" DELETE");
4237  }
4238  }
4239  printf(".\n");
4240 }
4241 
4242 // GRANT payroll_dept_role TO joe;
4243 void GrantRoleStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4244  const auto& currentUser = session.get_currentUser();
4245  if (!currentUser.isSuper) {
4246  throw std::runtime_error(
4247  "GRANT failed, because it can only be executed by super user.");
4248  }
4249  if (std::find(get_grantees().begin(), get_grantees().end(), OMNISCI_ROOT_USER) !=
4250  get_grantees().end()) {
4251  throw std::runtime_error(
4252  "Request to grant role failed because mapd root user has all privileges by "
4253  "default.");
4254  }
4255  SysCatalog::instance().grantRoleBatch(get_roles(), get_grantees());
4256 }
4257 
4258 // REVOKE payroll_dept_role FROM joe;get_users
4259 void RevokeRoleStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4260  const auto& currentUser = session.get_currentUser();
4261  if (!currentUser.isSuper) {
4262  throw std::runtime_error(
4263  "REVOKE failed, because it can only be executed by super user.");
4264  }
4265  if (std::find(get_grantees().begin(), get_grantees().end(), OMNISCI_ROOT_USER) !=
4266  get_grantees().end()) {
4267  throw std::runtime_error(
4268  "Request to revoke role failed because privileges can not be revoked from mapd "
4269  "root user.");
4270  }
4271  SysCatalog::instance().revokeRoleBatch(get_roles(), get_grantees());
4272 }
4273 
4274 void ShowCreateTableStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4275  using namespace Catalog_Namespace;
4276 
4277  const auto execute_read_lock = mapd_shared_lock<mapd_shared_mutex>(
4280 
4281  auto& catalog = session.getCatalog();
4282  const TableDescriptor* td = catalog.getMetadataForTable(*table_);
4283  if (!td) {
4284  throw std::runtime_error("Table/View " + *table_ + " does not exist.");
4285  }
4286 
4288  dbObject.loadKey(catalog);
4289  std::vector<DBObject> privObjects = {dbObject};
4290 
4291  if (!SysCatalog::instance().hasAnyPrivileges(session.get_currentUser(), privObjects)) {
4292  throw std::runtime_error("Table/View " + *table_ + " does not exist.");
4293  }
4294  if (td->isView && !session.get_currentUser().isSuper) {
4295  // TODO: we need to run a validate query to ensure the user has access to the
4296  // underlying table, but we do not have any of the machinery in here. Disable for now,
4297  // unless the current user is a super user.
4298  throw std::runtime_error("SHOW CREATE TABLE not yet supported for views");
4299  }
4300 
4301  create_stmt_ = catalog.dumpCreateTable(td);
4302 }
4303 
4304 void ExportQueryStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4305  auto session_copy = session;
4306  auto session_ptr = std::shared_ptr<Catalog_Namespace::SessionInfo>(
4307  &session_copy, boost::null_deleter());
4308  auto query_state = query_state::QueryState::create(session_ptr, *select_stmt);
4309  auto stdlog = STDLOG(query_state);
4310  auto query_state_proxy = query_state->createQueryStateProxy();
4311 
4312  auto& catalog = session.getCatalog();
4313 
4314  LocalConnector local_connector;
4315 
4316  if (!leafs_connector_) {
4317  leafs_connector_ = &local_connector;
4318  }
4319 
4320  import_export::CopyParams copy_params;
4321  // @TODO(se) move rest to CopyParams when we have a Thrift endpoint
4324  std::string layer_name;
4329 
4330  parseOptions(copy_params, file_type, layer_name, file_compression, array_null_handling);
4331 
4332  if (file_path->empty()) {
4333  throw std::runtime_error("Invalid file path for COPY TO");
4334  } else if (!boost::filesystem::path(*file_path).is_absolute()) {
4335  std::string file_name = boost::filesystem::path(*file_path).filename().string();
4336  std::string file_dir =
4337  catalog.getBasePath() + "/mapd_export/" + session.get_session_id() + "/";
4338  if (!boost::filesystem::exists(file_dir)) {
4339  if (!boost::filesystem::create_directories(file_dir)) {
4340  throw std::runtime_error("Directory " + file_dir + " cannot be created.");
4341  }
4342  }
4343  *file_path = file_dir + file_name;
4344  } else {
4345  // Above branch will create a new file in the mapd_export directory. If that path is
4346  // not exercised, go through applicable file path validations.
4349  }
4350 
4351  // get column info
4352  auto column_info_result =
4353  local_connector.query(query_state_proxy, *select_stmt, {}, true);
4354 
4355  // create exporter for requested file type
4356  auto query_exporter = import_export::QueryExporter::create(file_type);
4357 
4358  // default layer name to file path stem if it wasn't specified
4359  if (layer_name.size() == 0) {
4360  layer_name = boost::filesystem::path(*file_path).stem().string();
4361  }
4362 
4363  // begin export
4364  query_exporter->beginExport(*file_path,
4365  layer_name,
4366  copy_params,
4367  column_info_result.targets_meta,
4368  file_compression,
4369  array_null_handling);
4370 
4371  // how many fragments?
4372  size_t outer_frag_count =
4373  leafs_connector_->getOuterFragmentCount(query_state_proxy, *select_stmt);
4374  size_t outer_frag_end = outer_frag_count == 0 ? 1 : outer_frag_count;
4375 
4376  // loop fragments
4377  for (size_t outer_frag_idx = 0; outer_frag_idx < outer_frag_end; outer_frag_idx++) {
4378  // limit the query to just this fragment
4379  std::vector<size_t> allowed_outer_fragment_indices;
4380  if (outer_frag_count) {
4381  allowed_outer_fragment_indices.push_back(outer_frag_idx);
4382  }
4383 
4384  // run the query
4385  std::vector<AggregatedResult> query_results = leafs_connector_->query(
4386  query_state_proxy, *select_stmt, allowed_outer_fragment_indices);
4387 
4388  // export the results
4389  query_exporter->exportResults(query_results);
4390  }
4391 
4392  // end export
4393  query_exporter->endExport();
4394 }
4395 
4396 void ExportQueryStmt::parseOptions(
4397  import_export::CopyParams& copy_params,
4399  std::string& layer_name,
4401  import_export::QueryExporter::ArrayNullHandling& array_null_handling) {
4402  // defaults for non-CopyParams values
4404  layer_name.clear();
4406 
4407  if (!options.empty()) {
4408  for (auto& p : options) {
4409  if (boost::iequals(*p->get_name(), "delimiter")) {
4410  const StringLiteral* str_literal =
4411  dynamic_cast<const StringLiteral*>(p->get_value());
4412  if (str_literal == nullptr) {
4413  throw std::runtime_error("Delimiter option must be a string.");
4414  } else if (str_literal->get_stringval()->length() != 1) {
4415  throw std::runtime_error("Delimiter must be a single character string.");
4416  }
4417  copy_params.delimiter = (*str_literal->get_stringval())[0];
4418  } else if (boost::iequals(*p->get_name(), "nulls")) {
4419  const StringLiteral* str_literal =
4420  dynamic_cast<const StringLiteral*>(p->get_value());
4421  if (str_literal == nullptr) {
4422  throw std::runtime_error("Nulls option must be a string.");
4423  }
4424  copy_params.null_str = *str_literal->get_stringval();
4425  } else if (boost::iequals(*p->get_name(), "header")) {
4426  const StringLiteral* str_literal =
4427  dynamic_cast<const StringLiteral*>(p->get_value());
4428  if (str_literal == nullptr) {
4429  throw std::runtime_error("Header option must be a boolean.");
4430  }
4431  copy_params.has_header = bool_from_string_literal(str_literal)
4434  } else if (boost::iequals(*p->get_name(), "quote")) {
4435  const StringLiteral* str_literal =
4436  dynamic_cast<const StringLiteral*>(p->get_value());
4437  if (str_literal == nullptr) {
4438  throw std::runtime_error("Quote option must be a string.");
4439  } else if (str_literal->get_stringval()->length() != 1) {
4440  throw std::runtime_error("Quote must be a single character string.");
4441  }
4442  copy_params.quote = (*str_literal->get_stringval())[0];
4443  } else if (boost::iequals(*p->get_name(), "escape")) {
4444  const StringLiteral* str_literal =
4445  dynamic_cast<const StringLiteral*>(p->get_value());
4446  if (str_literal == nullptr) {
4447  throw std::runtime_error("Escape option must be a string.");
4448  } else if (str_literal->get_stringval()->length() != 1) {
4449  throw std::runtime_error("Escape must be a single character string.");
4450  }
4451  copy_params.escape = (*str_literal->get_stringval())[0];
4452  } else if (boost::iequals(*p->get_name(), "line_delimiter")) {
4453  const StringLiteral* str_literal =
4454  dynamic_cast<const StringLiteral*>(p->get_value());
4455  if (str_literal == nullptr) {
4456  throw std::runtime_error("Line_delimiter option must be a string.");
4457  } else if (str_literal->get_stringval()->length() != 1) {
4458  throw std::runtime_error("Line_delimiter must be a single character string.");
4459  }
4460  copy_params.line_delim = (*str_literal->get_stringval())[0];
4461  } else if (boost::iequals(*p->get_name(), "quoted")) {
4462  const StringLiteral* str_literal =
4463  dynamic_cast<const StringLiteral*>(p->get_value());
4464  if (str_literal == nullptr) {
4465  throw std::runtime_error("Quoted option must be a boolean.");
4466  }
4467  copy_params.quoted = bool_from_string_literal(str_literal);
4468  } else if (boost::iequals(*p->get_name(), "file_type")) {
4469  const StringLiteral* str_literal =
4470  dynamic_cast<const StringLiteral*>(p->get_value());
4471  if (str_literal == nullptr) {
4472  throw std::runtime_error("File Type option must be a string.");
4473  }
4474  auto file_type_str =
4475  boost::algorithm::to_lower_copy(*str_literal->get_stringval());
4476  if (file_type_str == "csv") {
4478  } else if (file_type_str == "geojson") {
4480  } else if (file_type_str == "geojsonl") {
4482  } else if (file_type_str == "shapefile") {
4484  } else {
4485  throw std::runtime_error(
4486  "File Type option must be 'CSV', 'GeoJSON', 'GeoJSONL' or 'Shapefile'");
4487  }
4488  } else if (boost::iequals(*p->get_name(), "layer_name")) {
4489  const StringLiteral* str_literal =
4490  dynamic_cast<const StringLiteral*>(p->get_value());
4491  if (str_literal == nullptr) {
4492  throw std::runtime_error("Layer Name option must be a string.");
4493  }
4494  layer_name = *str_literal->get_stringval();
4495  } else if (boost::iequals(*p->get_name(), "file_compression")) {
4496  const StringLiteral* str_literal =
4497  dynamic_cast<const StringLiteral*>(p->get_value());
4498  if (str_literal == nullptr) {
4499  throw std::runtime_error("File Compression option must be a string.");
4500  }
4501  auto file_compression_str =
4502  boost::algorithm::to_lower_copy(*str_literal->get_stringval());
4503  if (file_compression_str == "none") {
4505  } else if (file_compression_str == "gzip") {
4507  } else if (file_compression_str == "zip") {
4509  } else {
4510  throw std::runtime_error(
4511  "File Compression option must be 'None', 'GZip', or 'Zip'");
4512  }
4513  } else if (boost::iequals(*p->get_name(), "array_null_handling")) {
4514  const StringLiteral* str_literal =
4515  dynamic_cast<const StringLiteral*>(p->get_value());
4516  if (str_literal == nullptr) {
4517  throw std::runtime_error("Array Null Handling option must be a string.");
4518  }
4519  auto array_null_handling_str =
4520  boost::algorithm::to_lower_copy(*str_literal->get_stringval());
4521  if (array_null_handling_str == "abort") {
4522  array_null_handling =
4524  } else if (array_null_handling_str == "raw") {
4525  array_null_handling =
4527  } else if (array_null_handling_str == "zero") {
4528  array_null_handling =
4530  } else if (array_null_handling_str == "nullfield") {
4531  array_null_handling =
4533  } else {
4534  throw std::runtime_error(
4535  "Array Null Handling option must be 'Abort', 'Raw', 'Zero', or "
4536  "'NullField'");
4537  }
4538  } else {
4539  throw std::runtime_error("Invalid option for COPY: " + *p->get_name());
4540  }
4541  }
4542  }
4543 }
4544 
4545 void CreateViewStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4546  auto session_copy = session;
4547  auto session_ptr = std::shared_ptr<Catalog_Namespace::SessionInfo>(
4548  &session_copy, boost::null_deleter());
4549  auto query_state = query_state::QueryState::create(session_ptr, select_query_);
4550  auto stdlog = STDLOG(query_state);
4551  auto& catalog = session.getCatalog();
4552 
4553  if (!catalog.validateNonExistentTableOrView(view_name_, if_not_exists_)) {
4554  return;
4555  }
4558  throw std::runtime_error("View " + view_name_ +
4559  " will not be created. User has no create view privileges.");
4560  }
4561 
4562  const auto query_after_shim = pg_shim(select_query_);
4563 
4564  // this now also ensures that access permissions are checked
4565  catalog.getCalciteMgr()->process(query_state->createQueryStateProxy(),
4566  query_after_shim,
4567  {},
4568  true,
4569  false,
4570  false,
4571  true);
4572 
4573  // Take write lock after the query is processed to ensure no deadlocks
4574  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
4577 
4578  TableDescriptor td;
4579  td.tableName = view_name_;
4580  td.userId = session.get_currentUser().userId;
4581  td.nColumns = 0;
4582  td.isView = true;
4583  td.viewSQL = query_after_shim;
4584  td.fragmenter = nullptr;
4586  td.maxFragRows =
4587  DEFAULT_FRAGMENT_ROWS; // @todo this stuff should not be InsertOrderFragmenter
4588  td.maxChunkSize =
4589  DEFAULT_MAX_CHUNK_SIZE; // @todo this stuff should not be InsertOrderFragmenter
4590  td.fragPageSize = DEFAULT_PAGE_SIZE;
4591  td.maxRows = DEFAULT_MAX_ROWS;
4592  catalog.createTable(td, {}, {}, true);
4593 
4594  // TODO (max): It's transactionally unsafe, should be fixed: we may create object w/o
4595  // privileges
4596  SysCatalog::instance().createDBObject(
4597  session.get_currentUser(), view_name_, ViewDBObjectType, catalog);
4598 }
4599 
4600 void DropViewStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4601  auto& catalog = session.getCatalog();
4602 
4603  const TableDescriptor* td{nullptr};
4604  std::unique_ptr<lockmgr::TableSchemaLockContainer<lockmgr::WriteLock>> td_with_lock;
4605 
4606  try {
4607  td_with_lock =
4608  std::make_unique<lockmgr::TableSchemaLockContainer<lockmgr::WriteLock>>(
4610  catalog, *view_name, false));
4611  td = (*td_with_lock)();
4612  } catch (const std::runtime_error& e) {
4613  if (if_exists) {
4614  return;
4615  } else {
4616  throw e;
4617  }
4618  }
4619 
4620  CHECK(td);
4621  CHECK(td_with_lock);
4622 
4623  if (!session.checkDBAccessPrivileges(
4625  throw std::runtime_error("View " + *view_name +
4626  " will not be dropped. User has no drop view privileges.");
4627  }
4628 
4630  catalog.dropTable(td);
4631 }
4632 
4633 static void checkStringLiteral(const std::string& option_name,
4634  const std::unique_ptr<NameValueAssign>& p) {
4635  CHECK(p);
4636  if (!dynamic_cast<const StringLiteral*>(p->get_value())) {
4637  throw std::runtime_error(option_name + " option must be a string literal.");
4638  }
4639 }
4640 
4641 void CreateDBStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4642  if (!session.get_currentUser().isSuper) {
4643  throw std::runtime_error(
4644  "CREATE DATABASE command can only be executed by super user.");
4645  }
4646 
4647  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
4650 
4652  if (SysCatalog::instance().getMetadataForDB(*db_name, db_meta) && if_not_exists_) {
4653  return;
4654  }
4655  int ownerId = session.get_currentUser().userId;
4656  if (!name_value_list.empty()) {
4657  for (auto& p : name_value_list) {
4658  if (boost::iequals(*p->get_name(), "owner")) {
4659  checkStringLiteral("Owner name", p);
4660  const std::string* str =
4661  static_cast<const StringLiteral*>(p->get_value())->get_stringval();
4663  if (!SysCatalog::instance().getMetadataForUser(*str, user)) {
4664  throw std::runtime_error("User " + *str + " does not exist.");
4665  }
4666  ownerId = user.userId;
4667  } else {
4668  throw std::runtime_error("Invalid CREATE DATABASE option " + *p->get_name() +
4669  ". Only OWNER supported.");
4670  }
4671  }
4672  }
4673  SysCatalog::instance().createDatabase(*db_name, ownerId);
4674 }
4675 
4676 void DropDBStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4677  if (!session.get_currentUser().isSuper) {
4678  throw std::runtime_error("DROP DATABASE command can only be executed by super user.");
4679  }
4680 
4681  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
4684 
4686  if (!SysCatalog::instance().getMetadataForDB(*db_name, db)) {
4687  if (if_exists_) {
4688  return;
4689  }
4690  throw std::runtime_error("Database " + *db_name + " does not exist.");
4691  }
4692 
4693  if (!session.get_currentUser().isSuper &&
4694  session.get_currentUser().userId != db.dbOwner) {
4695  throw std::runtime_error("Only the super user or the owner can drop database.");
4696  }
4697 
4698  SysCatalog::instance().dropDatabase(db);
4699 }
4700 
4701 static bool readBooleanLiteral(const std::string& option_name,
4702  const std::unique_ptr<NameValueAssign>& p) {
4703  CHECK(p);
4704  const std::string* str =
4705  static_cast<const StringLiteral*>(p->get_value())->get_stringval();
4706  if (boost::iequals(*str, "true")) {
4707  return true;
4708  } else if (boost::iequals(*str, "false")) {
4709  return false;
4710  } else {
4711  throw std::runtime_error("Value to " + option_name + " must be TRUE or FALSE.");
4712  }
4713 }
4714 
4715 void CreateUserStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4716  std::string passwd;
4717  bool is_super = false;
4718  std::string default_db;
4719  bool can_login = true;
4720  for (auto& p : name_value_list) {
4721  if (boost::iequals(*p->get_name(), "password")) {
4722  checkStringLiteral("Password", p);
4723  passwd = *static_cast<const StringLiteral*>(p->get_value())->get_stringval();
4724  } else if (boost::iequals(*p->get_name(), "is_super")) {
4725  checkStringLiteral("IS_SUPER", p);
4726  is_super = readBooleanLiteral("IS_SUPER", p);
4727  } else if (boost::iequals(*p->get_name(), "default_db")) {
4728  checkStringLiteral("DEFAULT_DB", p);
4729  default_db = *static_cast<const StringLiteral*>(p->get_value())->get_stringval();
4730  } else if (boost::iequals(*p->get_name(), "can_login")) {
4731  checkStringLiteral("CAN_LOGIN", p);
4732  can_login = readBooleanLiteral("can_login", p);
4733  } else {
4734  throw std::runtime_error("Invalid CREATE USER option " + *p->get_name() +
4735  ". Should be PASSWORD, IS_SUPER, CAN_LOGIN"
4736  " or DEFAULT_DB.");
4737  }
4738  }
4739  if (!session.get_currentUser().isSuper) {
4740  throw std::runtime_error("Only super user can create new users.");
4741  }
4742  SysCatalog::instance().createUser(*user_name, passwd, is_super, default_db, can_login);
4743 }
4744 
4745 void AlterUserStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4746  // Parse the statement
4747  const std::string* passwd = nullptr;
4748  bool is_super = false;
4749  bool* is_superp = nullptr;
4750  const std::string* default_db = nullptr;
4751  bool can_login = true;
4752  bool* can_loginp = nullptr;
4753  for (auto& p : name_value_list) {
4754  if (boost::iequals(*p->get_name(), "password")) {
4755  checkStringLiteral("Password", p);
4756  passwd = static_cast<const StringLiteral*>(p->get_value())->get_stringval();
4757  } else if (boost::iequals(*p->get_name(), "is_super")) {
4758  checkStringLiteral("IS_SUPER", p);
4759  is_super = readBooleanLiteral("IS_SUPER", p);
4760  is_superp = &is_super;
4761  } else if (boost::iequals(*p->get_name(), "default_db")) {
4762  if (dynamic_cast<const StringLiteral*>(p->get_value())) {
4763  default_db = static_cast<const StringLiteral*>(p->get_value())->get_stringval();
4764  } else if (dynamic_cast<const NullLiteral*>(p->get_value())) {
4765  static std::string blank;
4766  default_db = &blank;
4767  } else {
4768  throw std::runtime_error(
4769  "DEFAULT_DB option must be either a string literal or a NULL literal.");
4770  }
4771  } else if (boost::iequals(*p->get_name(), "can_login")) {
4772  checkStringLiteral("CAN_LOGIN", p);
4773  can_login = readBooleanLiteral("CAN_LOGIN", p);
4774  can_loginp = &can_login;
4775  } else {
4776  throw std::runtime_error("Invalid ALTER USER option " + *p->get_name() +
4777  ". Should be PASSWORD, DEFAULT_DB, CAN_LOGIN"
4778  " or IS_SUPER.");
4779  }
4780  }
4781 
4782  // Check if the user is authorized to execute ALTER USER statement
4784  if (!SysCatalog::instance().getMetadataForUser(*user_name, user)) {
4785  throw std::runtime_error("User " + *user_name + " does not exist.");
4786  }
4787  if (!session.get_currentUser().isSuper) {
4788  if (session.get_currentUser().userId != user.userId) {
4789  throw std::runtime_error("Only super user can change another user's attributes.");
4790  } else if (is_superp || can_loginp) {
4791  throw std::runtime_error(
4792  "A user can only update their own password or default database.");
4793  }
4794  }
4795 
4796  if (passwd || is_superp || default_db || can_loginp) {
4797  SysCatalog::instance().alterUser(
4798  user.userId, passwd, is_superp, default_db, can_loginp);
4799  }
4800 }
4801 
4802 void DropUserStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4803  if (!session.get_currentUser().isSuper) {
4804  throw std::runtime_error("Only super user can drop users.");
4805  }
4806  SysCatalog::instance().dropUser(*user_name);
4807 }
4808 
4809 void DumpTableStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4810  // check access privileges
4811  if (!session.checkDBAccessPrivileges(
4813  throw std::runtime_error("Table " + *table +
4814  " will not be dumped. User has no select privileges.");
4815  }
4818  throw std::runtime_error("Table " + *table +
4819  " will not be dumped. User has no create privileges.");
4820  }
4821  auto& catalog = session.getCatalog();
4822  const TableDescriptor* td = catalog.getMetadataForTable(*table);
4823  TableArchiver table_archiver(&catalog);
4824  table_archiver.dumpTable(td, *path, compression);
4825 }
4826 
4827 void RestoreTableStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4828  auto& catalog = session.getCatalog();
4829  const TableDescriptor* td = catalog.getMetadataForTable(*table, false);
4830  if (td) {
4831  // TODO: v1.0 simply throws to avoid accidentally overwrite target table.
4832  // Will add a REPLACE TABLE to explictly replace target table.
4833  // catalog.restoreTable(session, td, *path, compression);
4834  throw std::runtime_error("Table " + *table + " exists.");
4835  } else {
4836  // check access privileges
4839  throw std::runtime_error("Table " + *table +
4840  " will not be restored. User has no create privileges.");
4841  }
4842  TableArchiver table_archiver(&catalog);
4843  table_archiver.restoreTable(session, *table, *path, compression);
4844  }
4845 }
4846 
4847 } // namespace Parser
int8_t tinyintval
Definition: sqltypes.h:133
int32_t get_table_id() const
decltype(auto) get_max_rows_def(TableDescriptor &td, const NameValueAssign *p, const std::list< ColumnDescriptor > &columns)
void validate_non_foreign_table_write(const TableDescriptor *table_descriptor)
Definition: FsiUtils.h:22
static const AccessPrivileges VIEW_SQL_EDITOR
Definition: DBObject.h:153
decltype(auto) get_max_chunk_size_def(TableDescriptor &td, const NameValueAssign *p, const std::list< ColumnDescriptor > &columns)
static const std::map< const std::string, const TableDefFuncPtr > tableDefFuncMap
void set_compression(EncodingType c)
Definition: sqltypes.h:357
SQLAgg
Definition: sqldefs.h:71
#define CHECK_EQ(x, y)
Definition: Logger.h:205
std::string s3_secret_key
Definition: CopyParams.h:63
size_t shard_column_index(const std::string &name, const std::list< ColumnDescriptor > &columns)
std::string partitions
static const AccessPrivileges VIEW_DASHBOARD
Definition: DBObject.h:172
#define NULL_DOUBLE
Definition: sqltypes.h:184
static const int32_t DROP_VIEW
Definition: DBObject.h:115
int64_t get_intval() const
Definition: ParserNode.h:151
bool is_array() const
Definition: sqltypes.h:416
static const AccessPrivileges DROP_SERVER
Definition: DBObject.h:190
bool is_string() const
Definition: sqltypes.h:408
bool is_boolean() const
Definition: sqltypes.h:415
static const int32_t SELECT_FROM_VIEW
Definition: DBObject.h:116
const std::vector< std::shared_ptr< TargetEntry > > & get_targetlist() const
Definition: Analyzer.h:1566
const int8_t const int64_t * num_rows
Definition: Analyzer.h:1523
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:86
SQLTypes
Definition: sqltypes.h:39
static void checkStringLiteral(const std::string &option_name, const std::unique_ptr< NameValueAssign > &p)
static const int32_t UPDATE_IN_VIEW
Definition: DBObject.h:118
std::string tableName
const std::vector< TargetMetaInfo > targets_meta
void add_rte(RangeTableEntry *rte)
Definition: Analyzer.cpp:1345
static const AccessPrivileges ALL_DATABASE
Definition: DBObject.h:152
SQLType * get_column_type() const
Definition: ParserNode.h:794
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
bool is_integer() const
Definition: sqltypes.h:410
decltype(auto) get_vacuum_def(TableDescriptor &td, const NameValueAssign *p, const std::list< ColumnDescriptor > &columns)
static const AccessPrivileges ALTER_TABLE
Definition: DBObject.h:166
DBObjectType
Definition: DBObject.h:42
static const int32_t CREATE_VIEW
Definition: DBObject.h:114
const std::list< std::shared_ptr< Analyzer::Expr > > & get_group_by() const
Definition: Analyzer.h:1574
void expand_star_in_targetlist(const Catalog_Namespace::Catalog &catalog, std::vector< std::shared_ptr< TargetEntry >> &tlist, int rte_idx)
static const AccessPrivileges TRUNCATE_TABLE
Definition: DBObject.h:165
#define NULL_ARRAY_DOUBLE
Definition: sqltypes.h:192
SQLQualifier
Definition: sqldefs.h:69
#define LOG(tag)
Definition: Logger.h:188
static WriteLock getWriteLockForTable(const Catalog_Namespace::Catalog &cat, const std::string &table_name)
Definition: LockMgrImpl.h:155
decltype(auto) get_skip_rows_def(DataframeTableDescriptor &df_td, const NameValueAssign *p, const std::list< ColumnDescriptor > &columns)
SQLOps
Definition: sqldefs.h:29
Definition: sqldefs.h:35
std::string storageType
static const AccessPrivileges INSERT_INTO_TABLE
Definition: DBObject.h:162
std::shared_ptr< Analyzer::Expr > deep_copy() const override
Definition: Analyzer.cpp:80
Definition: sqldefs.h:36
static const AccessPrivileges CREATE_DASHBOARD
Definition: DBObject.h:171
HOST DEVICE int get_comp_param() const
Definition: sqltypes.h:266
int get_result_table_id() const
Definition: Analyzer.h:1582
#define DEFAULT_MAX_CHUNK_SIZE
std::vector< std::unique_ptr< TypedImportBuffer > > setup_column_loaders(const TableDescriptor *td, Loader *loader)
Definition: Importer.cpp:5123
HOST DEVICE void set_subtype(SQLTypes st)
Definition: sqltypes.h:348
SQLTypeInfo get_logical_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:807
bool g_enable_fsi
Definition: Catalog.cpp:90
Definition: sqldefs.h:49
HOST DEVICE int get_size() const
Definition: sqltypes.h:267
const UserMetadata & get_currentUser() const
Definition: SessionInfo.h:68
const ColumnDescriptor * getMetadataForColumn(int tableId, const std::string &colName) const
void set_order_by(std::list< OrderEntry > *o)
Definition: Analyzer.h:1590
std::string extractObjectNameFromHierName(const std::string &objectHierName, const std::string &objectType, const Catalog_Namespace::Catalog &cat)
static const int32_t ALTER_TABLE
Definition: DBObject.h:95
void set_result_col_list(const std::list< int > &col_list)
Definition: Analyzer.h:1584
static std::shared_ptr< Executor > getExecutor(const ExecutorId id, const std::string &debug_dir="", const std::string &debug_file="", const SystemParameters system_parameters=SystemParameters())
Definition: Execute.cpp:150
void setPrivileges(const AccessPrivileges &privs)
Definition: DBObject.h:217
static void invalidateCaches()
void set_offset(int64_t o)
Definition: Analyzer.h:1602
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:265
std::vector< std::string > splitObjectHierName(const std::string &hierName)
WhichRow get_which_row() const
Definition: Analyzer.h:274
std::string get_session_id() const
Definition: SessionInfo.h:73
void validate_allowed_file_path(const std::string &file_path, const DataTransferType data_transfer_type)
Definition: DdlUtils.cpp:611
static const AccessPrivileges SELECT_FROM_TABLE
Definition: DBObject.h:161
static std::unique_ptr< QueryExporter > create(const FileType file_type)
bool bool_from_string_literal(const Parser::StringLiteral *str_literal)
Definition: ParserNode.cpp:909
void get_table_definitions_for_ctas(TableDescriptor &td, const std::unique_ptr< NameValueAssign > &p, const std::list< ColumnDescriptor > &columns)
bool is_decimal() const
Definition: sqltypes.h:411
int32_t intval
Definition: sqltypes.h:135
HOST DEVICE int get_scale() const
Definition: sqltypes.h:262
void restoreTable(const Catalog_Namespace::SessionInfo &session, const TableDescriptor *td, const std::string &archive_path, const std::string &compression)
std::string to_string(char const *&&v)
std::vector< std::string > split(std::string_view str, std::string_view delim, std::optional< size_t > maxsplit)
split apart a string into a vector of substrings
static const AccessPrivileges ALL_VIEW
Definition: DBObject.h:178
void validate_non_duplicate_column(const std::string &column_name, std::unordered_set< std::string > &upper_column_names)
Definition: DdlUtils.cpp:520
std::shared_ptr< StringDictionary > stringDict
int tableId
identifies the database into which the data is being inserted
Definition: Fragmenter.h:61
DBObjectType DBObjectTypeFromString(const std::string &type)
Definition: DBObject.cpp:99
int get_logical_size() const
Definition: sqltypes.h:268
Definition: sqldefs.h:73
static void set_geo_physical_import_buffer(const Catalog_Namespace::Catalog &catalog, const ColumnDescriptor *cd, std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers, size_t &col_idx, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, int render_group, const int64_t replicate_count=0)
Definition: Importer.cpp:1421
const ColumnDescriptor * getShardColumnMetadataForTable(const TableDescriptor *td) const
Definition: Catalog.cpp:3460
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:264
size_t numRows
a vector of column ids for the row(s) being inserted
Definition: Fragmenter.h:63
This file contains the class specification and related data structures for Catalog.
float floatval
Definition: sqltypes.h:137
Datum get_constval() const
Definition: Analyzer.h:336
ImportHeaderRow has_header
Definition: CopyParams.h:48
boost::function< void(DataframeTableDescriptor &, const NameValueAssign *, const std::list< ColumnDescriptor > &columns)> DataframeDefFuncPtr
Definition: ParserNode.cpp:85
static SQLTypeInfo common_string_type(const SQLTypeInfo &type1, const SQLTypeInfo &type2)
Definition: Analyzer.cpp:394
#define DEFAULT_MAX_ROWS
RangeTableEntry * get_rte(int rte_idx) const
Definition: Analyzer.h:1596
std::string cat(Ts &&... args)
Classes representing a parse tree.
void set_fixed_size()
Definition: sqltypes.h:356
const std::string * get_column_name() const
Definition: ParserNode.h:793
void get_dataframe_definitions(DataframeTableDescriptor &df_td, const std::unique_ptr< NameValueAssign > &p, const std::list< ColumnDescriptor > &columns)
QueryState & getQueryState()
Definition: QueryState.h:172
static const int32_t DROP_DATABASE
Definition: DBObject.h:81
const std::list< int > & get_result_col_list() const
Definition: Analyzer.h:1583
static const AccessPrivileges DROP_TABLE
Definition: DBObject.h:160
const QueryHint getParsedQueryHints() const
decltype(auto) get_shard_count_def(TableDescriptor &td, const NameValueAssign *p, const std::list< ColumnDescriptor > &columns)
Catalog & getCatalog() const
Definition: SessionInfo.h:65
boost::function< void(TableDescriptor &, const NameValueAssign *, const std::list< ColumnDescriptor > &columns)> TableDefFuncPtr
Definition: ParserNode.cpp:80
void set_scale(int s)
Definition: sqltypes.h:352
int64_t bigintval
Definition: sqltypes.h:136
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:258
static const int32_t DELETE_FROM_TABLE
Definition: DBObject.h:93
static std::shared_ptr< QueryState > create(ARGS &&... args)
Definition: QueryState.h:140
static SQLTypeInfo common_numeric_type(const SQLTypeInfo &type1, const SQLTypeInfo &type2)
Definition: Analyzer.cpp:434
Definition: sqldefs.h:37
Definition: sqldefs.h:75
static const int32_t TRUNCATE_TABLE
Definition: DBObject.h:94
static const AccessPrivileges ALL_SERVER
Definition: DBObject.h:188
int16_t smallintval
Definition: sqltypes.h:134
const ColumnConstraintDef * get_column_constraint() const
Definition: ParserNode.h:796
static const AccessPrivileges CREATE_SERVER
Definition: DBObject.h:189
static const AccessPrivileges DELETE_FROM_TABLE
Definition: DBObject.h:164
int get_physical_cols() const
Definition: sqltypes.h:278
Datum StringToDatum(std::string_view s, SQLTypeInfo &ti)
Definition: Datum.cpp:122
specifies the content in-memory of a row in the column metadata table
bool expr_is_null(const Analyzer::Expr *expr)
Definition: ParserNode.cpp:901
static const std::map< const std::string, const DataframeDefFuncPtr > dataframeDefFuncMap
const DBMetadata & getCurrentDB() const
Definition: Catalog.h:194
static const int32_t EDIT_DASHBOARD
Definition: DBObject.h:106
static const int32_t DELETE_DASHBOARD
Definition: DBObject.h:104
static const AccessPrivileges CREATE_TABLE
Definition: DBObject.h:159
static const int32_t INSERT_INTO_TABLE
Definition: DBObject.h:91
std::string keyMetainfo
void set_num_aggs(int a)
Definition: Analyzer.h:1594
const DictDescriptor * getMetadataForDict(int dict_ref, bool loadDict=true) const
Definition: Catalog.cpp:1449
void set_group_by(std::list< std::shared_ptr< Analyzer::Expr >> &g)
Definition: Analyzer.h:1588
void set_default_table_attributes(const std::string &table_name, TableDescriptor &td, const int32_t column_count)
Definition: DdlUtils.cpp:506
std::shared_ptr< Fragmenter_Namespace::AbstractFragmenter > fragmenter
void set_next_query(Query *q)
Definition: Analyzer.h:1591
std::string * stringval
Definition: sqltypes.h:141
AggregatedResult query(QueryStateProxy, std::string &sql_query_string, std::vector< size_t > outer_frag_indices, bool validate_only)
#define DEFAULT_PAGE_SIZE
bool g_enable_experimental_string_functions
void set_having_predicate(std::shared_ptr< Analyzer::Expr > p)
Definition: Analyzer.h:1589
void set_comp_param(int p)
Definition: sqltypes.h:358
void set_column_descriptor(const std::string &column_name, ColumnDescriptor &cd, SqlType *column_type, const bool not_null, const Encoding *encoding)
Definition: DdlUtils.cpp:492
void set_result_table_id(int id)
Definition: Analyzer.h:1585
std::string geo_layer_name
Definition: CopyParams.h:78
void loadKey()
Definition: DBObject.cpp:179
bool get_is_null() const
Definition: Analyzer.h:335
void validate_shard_column_type(const ColumnDescriptor &cd)
const AccessPrivileges & getPrivileges() const
Definition: DBObject.h:216
Definition: sqltypes.h:53
bool is_geometry() const
Definition: sqltypes.h:420
bool g_test_drop_column_rollback
Definition: ParserNode.cpp:71
void check_alter_table_privilege(const Catalog_Namespace::SessionInfo &session, const TableDescriptor *td)
Definition: sqldefs.h:69
ExecutionResult executeSimpleInsert(const Analyzer::Query &insert_query)
#define TRANSIENT_DICT_ID
Definition: sqltypes.h:196
const std::string OMNISCI_ROOT_USER
Definition: SysCatalog.h:59
std::shared_ptr< Catalog_Namespace::SessionInfo const > getConstSessionInfo() const
Definition: QueryState.cpp:77
void set_is_distinct(bool d)
Definition: Analyzer.h:1586
std::string get_type_name() const
Definition: sqltypes.h:360
static const AccessPrivileges SELECT_FROM_VIEW
Definition: DBObject.h:181
bool table_is_temporary(const TableDescriptor *const td)
bool is_null(const T &v, const SQLTypeInfo &t)
void validate_drop_table_type(const TableDescriptor *td, const TableType expected_table_type)
Definition: DdlUtils.cpp:537
std::vector< std::shared_ptr< TargetEntry > > & get_targetlist_nonconst()
Definition: Analyzer.h:1569
#define DEFAULT_FRAGMENT_ROWS
bool get_is_distinct() const
Definition: Analyzer.h:1564
HOST DEVICE int get_dimension() const
Definition: sqltypes.h:259
Fragmenter_Namespace::FragmenterType fragType
Definition: sqldefs.h:76
Data_Namespace::MemoryLevel persistenceLevel
int get_rte_idx(const std::string &range_var_name) const
Definition: Analyzer.cpp:1334
void set_is_unionall(bool u)
Definition: Analyzer.h:1592
static const int32_t CREATE_DATABASE
Definition: DBObject.h:80
Definition: Catalog.h:63
void insertData(const Catalog_Namespace::SessionInfo &session_info, InsertData &insert_data)
decltype(auto) get_sort_column_def(TableDescriptor &td, const NameValueAssign *p, const std::list< ColumnDescriptor > &columns)
static const AccessPrivileges ALL_DASHBOARD
Definition: DBObject.h:170
static bool getGeoColumns(const std::string &wkt, SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: geo_types.cpp:637
static const AccessPrivileges ACCESS
Definition: DBObject.h:154
static const int32_t VIEW_DASHBOARD
Definition: DBObject.h:105
std::vector< uint8_t > compress_coords(std::vector< double > &coords, const SQLTypeInfo &ti)
static std::pair< AccessPrivileges, DBObjectType > parseStringPrivs(const std::string &privs, const DBObjectType &objectType, const std::string &object_name)
bool checkDBAccessPrivileges(const DBObjectType &permissionType, const AccessPrivileges &privs, const std::string &objectName="") const
Definition: SessionInfo.cpp:24
virtual std::shared_ptr< Analyzer::Expr > add_cast(const SQLTypeInfo &new_type_info)
Definition: Analyzer.cpp:685
static const AccessPrivileges ALL_TABLE
Definition: DBObject.h:158
decltype(auto) get_frag_size_def(TableDescriptor &td, const NameValueAssign *p, const std::list< ColumnDescriptor > &columns)
decltype(auto) get_header_def(DataframeTableDescriptor &df_td, const NameValueAssign *p, const std::list< ColumnDescriptor > &columns)
void set_where_predicate(std::shared_ptr< Analyzer::Expr > p)
Definition: Analyzer.h:1587
void set_stmt_type(SQLStmtType t)
Definition: Analyzer.h:1593
const ColumnDescriptor * get_column_desc(const Catalog_Namespace::Catalog &catalog, const std::string &name)
Definition: sqldefs.h:53
static CompilationOptions defaults(const ExecutorDeviceType device_type=ExecutorDeviceType::GPU)
static const int32_t DROP_TABLE
Definition: DBObject.h:89
ThreadId thread_id()
Definition: Logger.cpp:715
int get_varno() const
Definition: Analyzer.h:276
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:79
static std::shared_ptr< MutexType > getMutex(const LockType lockType, const KeyType &key)
Definition: LegacyLockMgr.h:51
int get_num_aggs() const
Definition: Analyzer.h:1565
const std::vector< RangeTableEntry * > & get_rangetable() const
Definition: Analyzer.h:1572
static const int32_t INSERT_INTO_VIEW
Definition: DBObject.h:117
virtual std::shared_ptr< Analyzer::Expr > analyze(const Catalog_Namespace::Catalog &catalog, Analyzer::Query &query, TlistRefType allow_tlist_ref=TLIST_NONE) const =0
static const AccessPrivileges DROP_VIEW
Definition: DBObject.h:180
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:612
void set_notnull(bool n)
Definition: sqltypes.h:354
static bool readBooleanLiteral(const std::string &option_name, const std::unique_ptr< NameValueAssign > &p)
#define CHECK(condition)
Definition:<