OmniSciDB  94e8789169
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
ParserNode.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
25 #include "ParserNode.h"
26 
27 #include <boost/algorithm/string.hpp>
28 #include <boost/core/null_deleter.hpp>
29 #include <boost/filesystem.hpp>
30 #include <boost/function.hpp>
31 
32 #include <rapidjson/document.h>
33 #include <rapidjson/stringbuffer.h>
34 #include <rapidjson/writer.h>
35 
36 #include <cassert>
37 #include <cmath>
38 #include <limits>
39 #include <random>
40 #include <regex>
41 #include <stdexcept>
42 #include <type_traits>
43 #include <typeinfo>
44 
46 #include "Catalog/Catalog.h"
52 #include "Geospatial/Compression.h"
53 #include "Geospatial/Types.h"
54 #include "ImportExport/Importer.h"
55 #include "LockMgr/LockMgr.h"
58 #include "QueryEngine/Execute.h"
62 #include "ReservedKeywords.h"
63 #include "Shared/StringTransform.h"
64 #include "Shared/measure.h"
65 #include "Shared/shard_key.h"
67 #include "Utils/FsiUtils.h"
68 
69 #include "gen-cpp/CalciteServer.h"
70 #include "parser.h"
71 
73 
74 size_t g_leaf_count{0};
77 extern bool g_enable_fsi;
78 
80 using namespace std::string_literals;
81 
82 using TableDefFuncPtr = boost::function<void(TableDescriptor&,
83  const NameValueAssign*,
84  const std::list<ColumnDescriptor>& columns)>;
85 
86 using DataframeDefFuncPtr =
87  boost::function<void(DataframeTableDescriptor&,
88  const NameValueAssign*,
89  const std::list<ColumnDescriptor>& columns)>;
90 
91 namespace Parser {
92 std::shared_ptr<Analyzer::Expr> NullLiteral::analyze(
93  const Catalog_Namespace::Catalog& catalog,
94  Analyzer::Query& query,
95  TlistRefType allow_tlist_ref) const {
96  return makeExpr<Analyzer::Constant>(kNULLT, true);
97 }
98 
99 std::shared_ptr<Analyzer::Expr> StringLiteral::analyze(
100  const Catalog_Namespace::Catalog& catalog,
101  Analyzer::Query& query,
102  TlistRefType allow_tlist_ref) const {
103  return analyzeValue(*stringval);
104 }
105 
106 std::shared_ptr<Analyzer::Expr> StringLiteral::analyzeValue(
107  const std::string& stringval) {
108  SQLTypeInfo ti(kVARCHAR, stringval.length(), 0, true);
109  Datum d;
110  d.stringval = new std::string(stringval);
111  return makeExpr<Analyzer::Constant>(ti, false, d);
112 }
113 
114 std::shared_ptr<Analyzer::Expr> IntLiteral::analyze(
115  const Catalog_Namespace::Catalog& catalog,
116  Analyzer::Query& query,
117  TlistRefType allow_tlist_ref) const {
118  return analyzeValue(intval);
119 }
120 
121 std::shared_ptr<Analyzer::Expr> IntLiteral::analyzeValue(const int64_t intval) {
122  SQLTypes t;
123  Datum d;
124  if (intval >= INT16_MIN && intval <= INT16_MAX) {
125  t = kSMALLINT;
126  d.smallintval = (int16_t)intval;
127  } else if (intval >= INT32_MIN && intval <= INT32_MAX) {
128  t = kINT;
129  d.intval = (int32_t)intval;
130  } else {
131  t = kBIGINT;
132  d.bigintval = intval;
133  }
134  return makeExpr<Analyzer::Constant>(t, false, d);
135 }
136 
137 std::shared_ptr<Analyzer::Expr> FixedPtLiteral::analyze(
138  const Catalog_Namespace::Catalog& catalog,
139  Analyzer::Query& query,
140  TlistRefType allow_tlist_ref) const {
141  SQLTypeInfo ti(kNUMERIC, 0, 0, false);
142  Datum d = StringToDatum(*fixedptval, ti);
143  return makeExpr<Analyzer::Constant>(ti, false, d);
144 }
145 
146 std::shared_ptr<Analyzer::Expr> FixedPtLiteral::analyzeValue(const int64_t numericval,
147  const int scale,
148  const int precision) {
149  SQLTypeInfo ti(kNUMERIC, 0, 0, false);
150  ti.set_scale(scale);
151  ti.set_precision(precision);
152  Datum d;
153  d.bigintval = numericval;
154  return makeExpr<Analyzer::Constant>(ti, false, d);
155 }
156 
157 std::shared_ptr<Analyzer::Expr> FloatLiteral::analyze(
158  const Catalog_Namespace::Catalog& catalog,
159  Analyzer::Query& query,
160  TlistRefType allow_tlist_ref) const {
161  Datum d;
162  d.floatval = floatval;
163  return makeExpr<Analyzer::Constant>(kFLOAT, false, d);
164 }
165 
166 std::shared_ptr<Analyzer::Expr> DoubleLiteral::analyze(
167  const Catalog_Namespace::Catalog& catalog,
168  Analyzer::Query& query,
169  TlistRefType allow_tlist_ref) const {
170  Datum d;
171  d.doubleval = doubleval;
172  return makeExpr<Analyzer::Constant>(kDOUBLE, false, d);
173 }
174 
175 std::shared_ptr<Analyzer::Expr> TimestampLiteral::analyze(
176  const Catalog_Namespace::Catalog& catalog,
177  Analyzer::Query& query,
178  TlistRefType allow_tlist_ref) const {
179  return get(timestampval_);
180 }
181 
182 std::shared_ptr<Analyzer::Expr> TimestampLiteral::get(const int64_t timestampval) {
183  Datum d;
184  d.bigintval = timestampval;
185  return makeExpr<Analyzer::Constant>(kTIMESTAMP, false, d);
186 }
187 
188 std::shared_ptr<Analyzer::Expr> UserLiteral::analyze(
189  const Catalog_Namespace::Catalog& catalog,
190  Analyzer::Query& query,
191  TlistRefType allow_tlist_ref) const {
192  Datum d;
193  return makeExpr<Analyzer::Constant>(kTEXT, false, d);
194 }
195 
196 std::shared_ptr<Analyzer::Expr> UserLiteral::get(const std::string& user) {
197  Datum d;
198  d.stringval = new std::string(user);
199  return makeExpr<Analyzer::Constant>(kTEXT, false, d);
200 }
201 
202 std::shared_ptr<Analyzer::Expr> ArrayLiteral::analyze(
203  const Catalog_Namespace::Catalog& catalog,
204  Analyzer::Query& query,
205  TlistRefType allow_tlist_ref) const {
206  SQLTypeInfo ti = SQLTypeInfo(kARRAY, true);
207  bool set_subtype = true;
208  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
209  for (auto& p : value_list) {
210  auto e = p->analyze(catalog, query, allow_tlist_ref);
211  CHECK(e);
212  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(e);
213  if (c != nullptr && c->get_is_null()) {
214  value_exprs.push_back(c);
215  continue;
216  }
217  auto subtype = e->get_type_info().get_type();
218  if (subtype == kNULLT) {
219  // NULL element
220  } else if (set_subtype) {
221  ti.set_subtype(subtype);
222  set_subtype = false;
223  }
224  value_exprs.push_back(e);
225  }
226  std::shared_ptr<Analyzer::Expr> result =
227  makeExpr<Analyzer::Constant>(ti, false, value_exprs);
228  return result;
229 }
230 
231 std::string ArrayLiteral::to_string() const {
232  std::string str = "{";
233  bool notfirst = false;
234  for (auto& p : value_list) {
235  if (notfirst) {
236  str += ", ";
237  } else {
238  notfirst = true;
239  }
240  str += p->to_string();
241  }
242  str += "}";
243  return str;
244 }
245 
246 std::shared_ptr<Analyzer::Expr> OperExpr::analyze(
247  const Catalog_Namespace::Catalog& catalog,
248  Analyzer::Query& query,
249  TlistRefType allow_tlist_ref) const {
250  auto left_expr = left->analyze(catalog, query, allow_tlist_ref);
251  const auto& left_type = left_expr->get_type_info();
252  if (right == nullptr) {
253  return makeExpr<Analyzer::UOper>(
254  left_type, left_expr->get_contains_agg(), optype, left_expr->decompress());
255  }
256  if (optype == kARRAY_AT) {
257  if (left_type.get_type() != kARRAY) {
258  throw std::runtime_error(left->to_string() + " is not of array type.");
259  }
260  auto right_expr = right->analyze(catalog, query, allow_tlist_ref);
261  const auto& right_type = right_expr->get_type_info();
262  if (!right_type.is_integer()) {
263  throw std::runtime_error(right->to_string() + " is not of integer type.");
264  }
265  return makeExpr<Analyzer::BinOper>(
266  left_type.get_elem_type(), false, kARRAY_AT, kONE, left_expr, right_expr);
267  }
268  auto right_expr = right->analyze(catalog, query, allow_tlist_ref);
269  return normalize(optype, opqualifier, left_expr, right_expr);
270 }
271 
272 std::shared_ptr<Analyzer::Expr> OperExpr::normalize(
273  const SQLOps optype,
274  const SQLQualifier qual,
275  std::shared_ptr<Analyzer::Expr> left_expr,
276  std::shared_ptr<Analyzer::Expr> right_expr) {
277  if (left_expr->get_type_info().is_date_in_days() ||
278  right_expr->get_type_info().is_date_in_days()) {
279  // Do not propogate encoding
280  left_expr = left_expr->decompress();
281  right_expr = right_expr->decompress();
282  }
283  const auto& left_type = left_expr->get_type_info();
284  auto right_type = right_expr->get_type_info();
285  if (qual != kONE) {
286  // subquery not supported yet.
287  CHECK(!std::dynamic_pointer_cast<Analyzer::Subquery>(right_expr));
288  if (right_type.get_type() != kARRAY) {
289  throw std::runtime_error(
290  "Existential or universal qualifiers can only be used in front of a subquery "
291  "or an "
292  "expression of array type.");
293  }
294  right_type = right_type.get_elem_type();
295  }
296  SQLTypeInfo new_left_type;
297  SQLTypeInfo new_right_type;
298  auto result_type = Analyzer::BinOper::analyze_type_info(
299  optype, left_type, right_type, &new_left_type, &new_right_type);
300  if (result_type.is_timeinterval()) {
301  return makeExpr<Analyzer::BinOper>(
302  result_type, false, optype, qual, left_expr, right_expr);
303  }
304  if (left_type != new_left_type) {
305  left_expr = left_expr->add_cast(new_left_type);
306  }
307  if (right_type != new_right_type) {
308  if (qual == kONE) {
309  right_expr = right_expr->add_cast(new_right_type);
310  } else {
311  right_expr = right_expr->add_cast(new_right_type.get_array_type());
312  }
313  }
314 
315  if (IS_COMPARISON(optype)) {
316  if (optype != kOVERLAPS && new_left_type.is_geometry() &&
317  new_right_type.is_geometry()) {
318  throw std::runtime_error(
319  "Comparison operators are not yet supported for geospatial types.");
320  }
321 
322  if (new_left_type.get_compression() == kENCODING_DICT &&
323  new_right_type.get_compression() == kENCODING_DICT &&
324  new_left_type.get_comp_param() == new_right_type.get_comp_param()) {
325  // do nothing
326  } else if (new_left_type.get_compression() == kENCODING_DICT &&
327  new_right_type.get_compression() == kENCODING_NONE) {
328  SQLTypeInfo ti(new_right_type);
329  ti.set_compression(new_left_type.get_compression());
330  ti.set_comp_param(new_left_type.get_comp_param());
331  ti.set_fixed_size();
332  right_expr = right_expr->add_cast(ti);
333  } else if (new_right_type.get_compression() == kENCODING_DICT &&
334  new_left_type.get_compression() == kENCODING_NONE) {
335  SQLTypeInfo ti(new_left_type);
336  ti.set_compression(new_right_type.get_compression());
337  ti.set_comp_param(new_right_type.get_comp_param());
338  ti.set_fixed_size();
339  left_expr = left_expr->add_cast(ti);
340  } else {
341  left_expr = left_expr->decompress();
342  right_expr = right_expr->decompress();
343  }
344  } else {
345  left_expr = left_expr->decompress();
346  right_expr = right_expr->decompress();
347  }
348  bool has_agg = (left_expr->get_contains_agg() || right_expr->get_contains_agg());
349  return makeExpr<Analyzer::BinOper>(
350  result_type, has_agg, optype, qual, left_expr, right_expr);
351 }
352 
353 std::shared_ptr<Analyzer::Expr> SubqueryExpr::analyze(
354  const Catalog_Namespace::Catalog& catalog,
355  Analyzer::Query& query,
356  TlistRefType allow_tlist_ref) const {
357  throw std::runtime_error("Subqueries are not supported yet.");
358  return nullptr;
359 }
360 
361 std::shared_ptr<Analyzer::Expr> IsNullExpr::analyze(
362  const Catalog_Namespace::Catalog& catalog,
363  Analyzer::Query& query,
364  TlistRefType allow_tlist_ref) const {
365  auto arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
366  auto result = makeExpr<Analyzer::UOper>(kBOOLEAN, kISNULL, arg_expr);
367  if (is_not) {
368  result = makeExpr<Analyzer::UOper>(kBOOLEAN, kNOT, result);
369  }
370  return result;
371 }
372 
373 std::shared_ptr<Analyzer::Expr> InSubquery::analyze(
374  const Catalog_Namespace::Catalog& catalog,
375  Analyzer::Query& query,
376  TlistRefType allow_tlist_ref) const {
377  throw std::runtime_error("Subqueries are not supported yet.");
378  return nullptr;
379 }
380 
381 std::shared_ptr<Analyzer::Expr> InValues::analyze(
382  const Catalog_Namespace::Catalog& catalog,
383  Analyzer::Query& query,
384  TlistRefType allow_tlist_ref) const {
385  auto arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
386  SQLTypeInfo ti = arg_expr->get_type_info();
387  bool dict_comp = ti.get_compression() == kENCODING_DICT;
388  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
389  for (auto& p : value_list) {
390  auto e = p->analyze(catalog, query, allow_tlist_ref);
391  if (ti != e->get_type_info()) {
392  if (ti.is_string() && e->get_type_info().is_string()) {
393  ti = Analyzer::BinOper::common_string_type(ti, e->get_type_info());
394  } else if (ti.is_number() && e->get_type_info().is_number()) {
395  ti = Analyzer::BinOper::common_numeric_type(ti, e->get_type_info());
396  } else {
397  throw std::runtime_error("IN expressions must contain compatible types.");
398  }
399  }
400  if (dict_comp) {
401  value_exprs.push_back(e->add_cast(arg_expr->get_type_info()));
402  } else {
403  value_exprs.push_back(e);
404  }
405  }
406  if (!dict_comp) {
407  arg_expr = arg_expr->decompress();
408  arg_expr = arg_expr->add_cast(ti);
409  std::list<std::shared_ptr<Analyzer::Expr>> cast_vals;
410  for (auto p : value_exprs) {
411  cast_vals.push_back(p->add_cast(ti));
412  }
413  value_exprs.swap(cast_vals);
414  }
415  std::shared_ptr<Analyzer::Expr> result =
416  makeExpr<Analyzer::InValues>(arg_expr, value_exprs);
417  if (is_not) {
418  result = makeExpr<Analyzer::UOper>(kBOOLEAN, kNOT, result);
419  }
420  return result;
421 }
422 
423 std::shared_ptr<Analyzer::Expr> BetweenExpr::analyze(
424  const Catalog_Namespace::Catalog& catalog,
425  Analyzer::Query& query,
426  TlistRefType allow_tlist_ref) const {
427  auto arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
428  auto lower_expr = lower->analyze(catalog, query, allow_tlist_ref);
429  auto upper_expr = upper->analyze(catalog, query, allow_tlist_ref);
430  SQLTypeInfo new_left_type, new_right_type;
432  arg_expr->get_type_info(),
433  lower_expr->get_type_info(),
434  &new_left_type,
435  &new_right_type);
436  auto lower_pred =
437  makeExpr<Analyzer::BinOper>(kBOOLEAN,
438  kGE,
439  kONE,
440  arg_expr->add_cast(new_left_type)->decompress(),
441  lower_expr->add_cast(new_right_type)->decompress());
443  arg_expr->get_type_info(),
444  lower_expr->get_type_info(),
445  &new_left_type,
446  &new_right_type);
447  auto upper_pred = makeExpr<Analyzer::BinOper>(
448  kBOOLEAN,
449  kLE,
450  kONE,
451  arg_expr->deep_copy()->add_cast(new_left_type)->decompress(),
452  upper_expr->add_cast(new_right_type)->decompress());
453  std::shared_ptr<Analyzer::Expr> result =
454  makeExpr<Analyzer::BinOper>(kBOOLEAN, kAND, kONE, lower_pred, upper_pred);
455  if (is_not) {
456  result = makeExpr<Analyzer::UOper>(kBOOLEAN, kNOT, result);
457  }
458  return result;
459 }
460 
461 std::shared_ptr<Analyzer::Expr> CharLengthExpr::analyze(
462  const Catalog_Namespace::Catalog& catalog,
463  Analyzer::Query& query,
464  TlistRefType allow_tlist_ref) const {
465  auto arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
466  if (!arg_expr->get_type_info().is_string()) {
467  throw std::runtime_error(
468  "expression in char_length clause must be of a string type.");
469  }
470  std::shared_ptr<Analyzer::Expr> result =
471  makeExpr<Analyzer::CharLengthExpr>(arg_expr->decompress(), calc_encoded_length);
472  return result;
473 }
474 
475 std::shared_ptr<Analyzer::Expr> CardinalityExpr::analyze(
476  const Catalog_Namespace::Catalog& catalog,
477  Analyzer::Query& query,
478  TlistRefType allow_tlist_ref) const {
479  auto arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
480  if (!arg_expr->get_type_info().is_array()) {
481  throw std::runtime_error(
482  "expression in cardinality clause must be of an array type.");
483  }
484  std::shared_ptr<Analyzer::Expr> result =
485  makeExpr<Analyzer::CardinalityExpr>(arg_expr->decompress());
486  return result;
487 }
488 
489 void LikeExpr::check_like_expr(const std::string& like_str, char escape_char) {
490  if (like_str.back() == escape_char) {
491  throw std::runtime_error("LIKE pattern must not end with escape character.");
492  }
493 }
494 
495 bool LikeExpr::test_is_simple_expr(const std::string& like_str, char escape_char) {
496  // if not bounded by '%' then not a simple string
497  if (like_str.size() < 2 || like_str[0] != '%' || like_str[like_str.size() - 1] != '%') {
498  return false;
499  }
500  // if the last '%' is escaped then not a simple string
501  if (like_str[like_str.size() - 2] == escape_char &&
502  like_str[like_str.size() - 3] != escape_char) {
503  return false;
504  }
505  for (size_t i = 1; i < like_str.size() - 1; i++) {
506  if (like_str[i] == '%' || like_str[i] == '_' || like_str[i] == '[' ||
507  like_str[i] == ']') {
508  if (like_str[i - 1] != escape_char) {
509  return false;
510  }
511  }
512  }
513  return true;
514 }
515 
516 void LikeExpr::erase_cntl_chars(std::string& like_str, char escape_char) {
517  char prev_char = '\0';
518  // easier to create new string of allowable chars
519  // rather than erase chars from
520  // existing string
521  std::string new_str;
522  for (char& cur_char : like_str) {
523  if (cur_char == '%' || cur_char == escape_char) {
524  if (prev_char != escape_char) {
525  prev_char = cur_char;
526  continue;
527  }
528  }
529  new_str.push_back(cur_char);
530  prev_char = cur_char;
531  }
532  like_str = new_str;
533 }
534 
535 std::shared_ptr<Analyzer::Expr> LikeExpr::analyze(
536  const Catalog_Namespace::Catalog& catalog,
537  Analyzer::Query& query,
538  TlistRefType allow_tlist_ref) const {
539  auto arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
540  auto like_expr = like_string->analyze(catalog, query, allow_tlist_ref);
541  auto escape_expr = escape_string == nullptr
542  ? nullptr
543  : escape_string->analyze(catalog, query, allow_tlist_ref);
544  return LikeExpr::get(arg_expr, like_expr, escape_expr, is_ilike, is_not);
545 }
546 
547 std::shared_ptr<Analyzer::Expr> LikeExpr::get(std::shared_ptr<Analyzer::Expr> arg_expr,
548  std::shared_ptr<Analyzer::Expr> like_expr,
549  std::shared_ptr<Analyzer::Expr> escape_expr,
550  const bool is_ilike,
551  const bool is_not) {
552  if (!arg_expr->get_type_info().is_string()) {
553  throw std::runtime_error("expression before LIKE must be of a string type.");
554  }
555  if (!like_expr->get_type_info().is_string()) {
556  throw std::runtime_error("expression after LIKE must be of a string type.");
557  }
558  char escape_char = '\\';
559  if (escape_expr != nullptr) {
560  if (!escape_expr->get_type_info().is_string()) {
561  throw std::runtime_error("expression after ESCAPE must be of a string type.");
562  }
563  if (!escape_expr->get_type_info().is_string()) {
564  throw std::runtime_error("expression after ESCAPE must be of a string type.");
565  }
566  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(escape_expr);
567  if (c != nullptr && c->get_constval().stringval->length() > 1) {
568  throw std::runtime_error("String after ESCAPE must have a single character.");
569  }
570  escape_char = (*c->get_constval().stringval)[0];
571  }
572  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(like_expr);
573  bool is_simple = false;
574  if (c != nullptr) {
575  std::string& pattern = *c->get_constval().stringval;
576  if (is_ilike) {
577  std::transform(pattern.begin(), pattern.end(), pattern.begin(), ::tolower);
578  }
579  check_like_expr(pattern, escape_char);
580  is_simple = test_is_simple_expr(pattern, escape_char);
581  if (is_simple) {
582  erase_cntl_chars(pattern, escape_char);
583  }
584  }
585  std::shared_ptr<Analyzer::Expr> result = makeExpr<Analyzer::LikeExpr>(
586  arg_expr->decompress(), like_expr, escape_expr, is_ilike, is_simple);
587  if (is_not) {
588  result = makeExpr<Analyzer::UOper>(kBOOLEAN, kNOT, result);
589  }
590  return result;
591 }
592 
593 void RegexpExpr::check_pattern_expr(const std::string& pattern_str, char escape_char) {
594  if (pattern_str.back() == escape_char) {
595  throw std::runtime_error("REGEXP pattern must not end with escape character.");
596  }
597 }
598 
599 bool RegexpExpr::translate_to_like_pattern(std::string& pattern_str, char escape_char) {
600  char prev_char = '\0';
601  char prev_prev_char = '\0';
602  std::string like_str;
603  for (char& cur_char : pattern_str) {
604  if (prev_char == escape_char || isalnum(cur_char) || cur_char == ' ' ||
605  cur_char == '.') {
606  like_str.push_back((cur_char == '.') ? '_' : cur_char);
607  prev_prev_char = prev_char;
608  prev_char = cur_char;
609  continue;
610  }
611  if (prev_char == '.' && prev_prev_char != escape_char) {
612  if (cur_char == '*' || cur_char == '+') {
613  if (cur_char == '*') {
614  like_str.pop_back();
615  }
616  // .* --> %
617  // .+ --> _%
618  like_str.push_back('%');
619  prev_prev_char = prev_char;
620  prev_char = cur_char;
621  continue;
622  }
623  }
624  return false;
625  }
626  pattern_str = like_str;
627  return true;
628 }
629 
630 std::shared_ptr<Analyzer::Expr> RegexpExpr::analyze(
631  const Catalog_Namespace::Catalog& catalog,
632  Analyzer::Query& query,
633  TlistRefType allow_tlist_ref) const {
634  auto arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
635  auto pattern_expr = pattern_string->analyze(catalog, query, allow_tlist_ref);
636  auto escape_expr = escape_string == nullptr
637  ? nullptr
638  : escape_string->analyze(catalog, query, allow_tlist_ref);
639  return RegexpExpr::get(arg_expr, pattern_expr, escape_expr, is_not);
640 }
641 
642 std::shared_ptr<Analyzer::Expr> RegexpExpr::get(
643  std::shared_ptr<Analyzer::Expr> arg_expr,
644  std::shared_ptr<Analyzer::Expr> pattern_expr,
645  std::shared_ptr<Analyzer::Expr> escape_expr,
646  const bool is_not) {
647  if (!arg_expr->get_type_info().is_string()) {
648  throw std::runtime_error("expression before REGEXP must be of a string type.");
649  }
650  if (!pattern_expr->get_type_info().is_string()) {
651  throw std::runtime_error("expression after REGEXP must be of a string type.");
652  }
653  char escape_char = '\\';
654  if (escape_expr != nullptr) {
655  if (!escape_expr->get_type_info().is_string()) {
656  throw std::runtime_error("expression after ESCAPE must be of a string type.");
657  }
658  if (!escape_expr->get_type_info().is_string()) {
659  throw std::runtime_error("expression after ESCAPE must be of a string type.");
660  }
661  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(escape_expr);
662  if (c != nullptr && c->get_constval().stringval->length() > 1) {
663  throw std::runtime_error("String after ESCAPE must have a single character.");
664  }
665  escape_char = (*c->get_constval().stringval)[0];
666  if (escape_char != '\\') {
667  throw std::runtime_error("Only supporting '\\' escape character.");
668  }
669  }
670  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(pattern_expr);
671  if (c != nullptr) {
672  std::string& pattern = *c->get_constval().stringval;
673  if (translate_to_like_pattern(pattern, escape_char)) {
674  return LikeExpr::get(arg_expr, pattern_expr, escape_expr, false, is_not);
675  }
676  }
677  std::shared_ptr<Analyzer::Expr> result =
678  makeExpr<Analyzer::RegexpExpr>(arg_expr->decompress(), pattern_expr, escape_expr);
679  if (is_not) {
680  result = makeExpr<Analyzer::UOper>(kBOOLEAN, kNOT, result);
681  }
682  return result;
683 }
684 
685 std::shared_ptr<Analyzer::Expr> LikelihoodExpr::analyze(
686  const Catalog_Namespace::Catalog& catalog,
687  Analyzer::Query& query,
688  TlistRefType allow_tlist_ref) const {
689  auto arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
690  return LikelihoodExpr::get(arg_expr, likelihood, is_not);
691 }
692 
693 std::shared_ptr<Analyzer::Expr> LikelihoodExpr::get(
694  std::shared_ptr<Analyzer::Expr> arg_expr,
695  float likelihood,
696  const bool is_not) {
697  if (!arg_expr->get_type_info().is_boolean()) {
698  throw std::runtime_error("likelihood expression expects boolean type.");
699  }
700  std::shared_ptr<Analyzer::Expr> result = makeExpr<Analyzer::LikelihoodExpr>(
701  arg_expr->decompress(), is_not ? 1 - likelihood : likelihood);
702  return result;
703 }
704 
705 std::shared_ptr<Analyzer::Expr> ExistsExpr::analyze(
706  const Catalog_Namespace::Catalog& catalog,
707  Analyzer::Query& query,
708  TlistRefType allow_tlist_ref) const {
709  throw std::runtime_error("Subqueries are not supported yet.");
710  return nullptr;
711 }
712 
713 std::shared_ptr<Analyzer::Expr> ColumnRef::analyze(
714  const Catalog_Namespace::Catalog& catalog,
715  Analyzer::Query& query,
716  TlistRefType allow_tlist_ref) const {
717  int table_id{0};
718  int rte_idx{0};
719  const ColumnDescriptor* cd{nullptr};
720  if (column == nullptr) {
721  throw std::runtime_error("invalid column name *.");
722  }
723  if (table != nullptr) {
724  rte_idx = query.get_rte_idx(*table);
725  if (rte_idx < 0) {
726  throw std::runtime_error("range variable or table name " + *table +
727  " does not exist.");
728  }
729  Analyzer::RangeTableEntry* rte = query.get_rte(rte_idx);
730  cd = rte->get_column_desc(catalog, *column);
731  if (cd == nullptr) {
732  throw std::runtime_error("Column name " + *column + " does not exist.");
733  }
734  table_id = rte->get_table_id();
735  } else {
736  bool found = false;
737  int i = 0;
738  for (auto rte : query.get_rangetable()) {
739  cd = rte->get_column_desc(catalog, *column);
740  if (cd != nullptr && !found) {
741  found = true;
742  rte_idx = i;
743  table_id = rte->get_table_id();
744  } else if (cd != nullptr && found) {
745  throw std::runtime_error("Column name " + *column + " is ambiguous.");
746  }
747  i++;
748  }
749  if (cd == nullptr && allow_tlist_ref != TlistRefType::TLIST_NONE) {
750  // check if this is a reference to a targetlist entry
751  bool found = false;
752  int varno = -1;
753  int i = 1;
754  std::shared_ptr<Analyzer::TargetEntry> tle;
755  for (auto p : query.get_targetlist()) {
756  if (*column == p->get_resname() && !found) {
757  found = true;
758  varno = i;
759  tle = p;
760  } else if (*column == p->get_resname() && found) {
761  throw std::runtime_error("Output alias " + *column + " is ambiguous.");
762  }
763  i++;
764  }
765  if (found) {
766  if (dynamic_cast<Analyzer::Var*>(tle->get_expr())) {
767  Analyzer::Var* v = static_cast<Analyzer::Var*>(tle->get_expr());
769  return v->deep_copy();
770  }
771  }
772  if (allow_tlist_ref == TlistRefType::TLIST_COPY) {
773  return tle->get_expr()->deep_copy();
774  } else {
775  return makeExpr<Analyzer::Var>(
776  tle->get_expr()->get_type_info(), Analyzer::Var::kOUTPUT, varno);
777  }
778  }
779  }
780  if (cd == nullptr) {
781  throw std::runtime_error("Column name " + *column + " does not exist.");
782  }
783  }
784  return makeExpr<Analyzer::ColumnVar>(cd->columnType, table_id, cd->columnId, rte_idx);
785 }
786 
787 std::shared_ptr<Analyzer::Expr> FunctionRef::analyze(
788  const Catalog_Namespace::Catalog& catalog,
789  Analyzer::Query& query,
790  TlistRefType allow_tlist_ref) const {
791  SQLTypeInfo result_type;
792  SQLAgg agg_type;
793  std::shared_ptr<Analyzer::Expr> arg_expr;
794  bool is_distinct = false;
795  if (boost::iequals(*name, "count")) {
796  result_type = SQLTypeInfo(kBIGINT, false);
797  agg_type = kCOUNT;
798  if (arg) {
799  arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
800  const SQLTypeInfo& ti = arg_expr->get_type_info();
801  if (ti.is_string() && (ti.get_compression() != kENCODING_DICT || !distinct)) {
802  throw std::runtime_error(
803  "Strings must be dictionary-encoded in COUNT(DISTINCT).");
804  }
805  if (ti.get_type() == kARRAY && !distinct) {
806  throw std::runtime_error("Only COUNT(DISTINCT) is supported on arrays.");
807  }
808  }
809  is_distinct = distinct;
810  } else {
811  if (!arg) {
812  throw std::runtime_error("Cannot compute " + *name + " with argument '*'.");
813  }
814  if (boost::iequals(*name, "min")) {
815  agg_type = kMIN;
816  arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
817  arg_expr = arg_expr->decompress();
818  result_type = arg_expr->get_type_info();
819  } else if (boost::iequals(*name, "max")) {
820  agg_type = kMAX;
821  arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
822  arg_expr = arg_expr->decompress();
823  result_type = arg_expr->get_type_info();
824  } else if (boost::iequals(*name, "avg")) {
825  agg_type = kAVG;
826  arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
827  if (!arg_expr->get_type_info().is_number()) {
828  throw std::runtime_error("Cannot compute AVG on non-number-type arguments.");
829  }
830  arg_expr = arg_expr->decompress();
831  result_type = SQLTypeInfo(kDOUBLE, false);
832  } else if (boost::iequals(*name, "sum")) {
833  agg_type = kSUM;
834  arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
835  if (!arg_expr->get_type_info().is_number()) {
836  throw std::runtime_error("Cannot compute SUM on non-number-type arguments.");
837  }
838  arg_expr = arg_expr->decompress();
839  result_type = arg_expr->get_type_info().is_integer() ? SQLTypeInfo(kBIGINT, false)
840  : arg_expr->get_type_info();
841  } else if (boost::iequals(*name, "unnest")) {
842  arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
843  const SQLTypeInfo& arg_ti = arg_expr->get_type_info();
844  if (arg_ti.get_type() != kARRAY) {
845  throw std::runtime_error(arg->to_string() + " is not of array type.");
846  }
847  return makeExpr<Analyzer::UOper>(arg_ti.get_elem_type(), false, kUNNEST, arg_expr);
848  } else {
849  throw std::runtime_error("invalid function name: " + *name);
850  }
851  if (arg_expr->get_type_info().is_string() ||
852  arg_expr->get_type_info().get_type() == kARRAY) {
853  throw std::runtime_error(
854  "Only COUNT(DISTINCT ) aggregate is supported on strings and arrays.");
855  }
856  }
857  int naggs = query.get_num_aggs();
858  query.set_num_aggs(naggs + 1);
859  return makeExpr<Analyzer::AggExpr>(
860  result_type, agg_type, arg_expr, is_distinct, nullptr);
861 }
862 
863 std::shared_ptr<Analyzer::Expr> CastExpr::analyze(
864  const Catalog_Namespace::Catalog& catalog,
865  Analyzer::Query& query,
866  TlistRefType allow_tlist_ref) const {
867  target_type->check_type();
868  auto arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
869  SQLTypeInfo ti(target_type->get_type(),
870  target_type->get_param1(),
871  target_type->get_param2(),
872  arg_expr->get_type_info().get_notnull());
873  if (arg_expr->get_type_info().get_type() != target_type->get_type() &&
874  arg_expr->get_type_info().get_compression() != kENCODING_NONE) {
875  arg_expr->decompress();
876  }
877  return arg_expr->add_cast(ti);
878 }
879 
880 std::shared_ptr<Analyzer::Expr> CaseExpr::analyze(
881  const Catalog_Namespace::Catalog& catalog,
882  Analyzer::Query& query,
883  TlistRefType allow_tlist_ref) const {
884  SQLTypeInfo ti;
885  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
886  expr_pair_list;
887  for (auto& p : when_then_list) {
888  auto e1 = p->get_expr1()->analyze(catalog, query, allow_tlist_ref);
889  if (e1->get_type_info().get_type() != kBOOLEAN) {
890  throw std::runtime_error("Only boolean expressions can be used after WHEN.");
891  }
892  auto e2 = p->get_expr2()->analyze(catalog, query, allow_tlist_ref);
893  expr_pair_list.emplace_back(e1, e2);
894  }
895  auto else_e = else_expr ? else_expr->analyze(catalog, query, allow_tlist_ref) : nullptr;
896  return normalize(expr_pair_list, else_e);
897 }
898 
899 namespace {
900 
901 bool expr_is_null(const Analyzer::Expr* expr) {
902  if (expr->get_type_info().get_type() == kNULLT) {
903  return true;
904  }
905  const auto const_expr = dynamic_cast<const Analyzer::Constant*>(expr);
906  return const_expr && const_expr->get_is_null();
907 }
908 
910  const std::string* s = str_literal->get_stringval();
911  if (*s == "t" || *s == "true" || *s == "T" || *s == "True") {
912  return true;
913  } else if (*s == "f" || *s == "false" || *s == "F" || *s == "False") {
914  return false;
915  } else {
916  throw std::runtime_error("Invalid string for boolean " + *s);
917  }
918 }
919 
920 } // namespace
921 
922 std::shared_ptr<Analyzer::Expr> CaseExpr::normalize(
923  const std::list<std::pair<std::shared_ptr<Analyzer::Expr>,
924  std::shared_ptr<Analyzer::Expr>>>& expr_pair_list,
925  const std::shared_ptr<Analyzer::Expr> else_e_in) {
926  SQLTypeInfo ti;
927  bool has_agg = false;
928  std::set<int> dictionary_ids;
929 
930  for (auto& p : expr_pair_list) {
931  auto e1 = p.first;
932  CHECK(e1->get_type_info().is_boolean());
933  auto e2 = p.second;
934  if (e2->get_type_info().is_dict_encoded_string()) {
935  dictionary_ids.insert(e2->get_type_info().get_comp_param());
936  }
937  if (ti.get_type() == kNULLT) {
938  ti = e2->get_type_info();
939  } else if (e2->get_type_info().get_type() == kNULLT) {
940  ti.set_notnull(false);
941  e2->set_type_info(ti);
942  } else if (ti != e2->get_type_info()) {
943  if (ti.is_string() && e2->get_type_info().is_string()) {
944  ti = Analyzer::BinOper::common_string_type(ti, e2->get_type_info());
945  } else if (ti.is_number() && e2->get_type_info().is_number()) {
946  ti = Analyzer::BinOper::common_numeric_type(ti, e2->get_type_info());
947  } else if (ti.is_boolean() && e2->get_type_info().is_boolean()) {
948  ti = Analyzer::BinOper::common_numeric_type(ti, e2->get_type_info());
949  } else {
950  throw std::runtime_error(
951  "expressions in THEN clause must be of the same or compatible types.");
952  }
953  }
954  if (e2->get_contains_agg()) {
955  has_agg = true;
956  }
957  }
958  auto else_e = else_e_in;
959  if (else_e) {
960  if (else_e->get_contains_agg()) {
961  has_agg = true;
962  }
963  if (expr_is_null(else_e.get())) {
964  ti.set_notnull(false);
965  else_e->set_type_info(ti);
966  } else if (ti != else_e->get_type_info()) {
967  if (else_e->get_type_info().is_dict_encoded_string()) {
968  dictionary_ids.insert(else_e->get_type_info().get_comp_param());
969  }
970  ti.set_notnull(false);
971  if (ti.is_string() && else_e->get_type_info().is_string()) {
972  ti = Analyzer::BinOper::common_string_type(ti, else_e->get_type_info());
973  } else if (ti.is_number() && else_e->get_type_info().is_number()) {
974  ti = Analyzer::BinOper::common_numeric_type(ti, else_e->get_type_info());
975  } else if (ti.is_boolean() && else_e->get_type_info().is_boolean()) {
976  ti = Analyzer::BinOper::common_numeric_type(ti, else_e->get_type_info());
977  } else if (get_logical_type_info(ti) !=
978  get_logical_type_info(else_e->get_type_info())) {
979  throw std::runtime_error(
980  // types differing by encoding will be resolved at decode
981 
982  "expressions in ELSE clause must be of the same or compatible types as those "
983  "in the THEN clauses.");
984  }
985  }
986  }
987  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
988  cast_expr_pair_list;
989  for (auto p : expr_pair_list) {
990  ti.set_notnull(false);
991  cast_expr_pair_list.emplace_back(p.first, p.second->add_cast(ti));
992  }
993  if (else_e != nullptr) {
994  else_e = else_e->add_cast(ti);
995  } else {
996  Datum d;
997  // always create an else expr so that executor doesn't need to worry about it
998  ti.set_notnull(false);
999  else_e = makeExpr<Analyzer::Constant>(ti, true, d);
1000  }
1001  if (ti.get_type() == kNULLT) {
1002  throw std::runtime_error(
1003  "Can't deduce the type for case expressions, all branches null");
1004  }
1005 
1006  auto case_expr = makeExpr<Analyzer::CaseExpr>(ti, has_agg, cast_expr_pair_list, else_e);
1007  if (ti.get_compression() != kENCODING_DICT && dictionary_ids.size() == 1 &&
1008  *(dictionary_ids.begin()) > 0) {
1009  // the above logic makes two assumptions when strings are present. 1) that all types
1010  // in the case statement are either null or strings, and 2) that none-encoded strings
1011  // will always win out over dict encoding. If we only have one dictionary, and that
1012  // dictionary is not a transient dictionary, we can cast the entire case to be dict
1013  // encoded and use transient dictionaries for any literals
1014  ti.set_compression(kENCODING_DICT);
1015  ti.set_comp_param(*dictionary_ids.begin());
1016  case_expr->add_cast(ti);
1017  }
1018  return case_expr;
1019 }
1020 
1021 std::string CaseExpr::to_string() const {
1022  std::string str("CASE ");
1023  for (auto& p : when_then_list) {
1024  str += "WHEN " + p->get_expr1()->to_string() + " THEN " +
1025  p->get_expr2()->to_string() + " ";
1026  }
1027  if (else_expr != nullptr) {
1028  str += "ELSE " + else_expr->to_string();
1029  }
1030  str += " END";
1031  return str;
1032 }
1033 
1034 void UnionQuery::analyze(const Catalog_Namespace::Catalog& catalog,
1035  Analyzer::Query& query) const {
1036  left->analyze(catalog, query);
1037  Analyzer::Query* right_query = new Analyzer::Query();
1038  right->analyze(catalog, *right_query);
1039  query.set_next_query(right_query);
1040  query.set_is_unionall(is_unionall);
1041 }
1042 
1043 void QuerySpec::analyze_having_clause(const Catalog_Namespace::Catalog& catalog,
1044  Analyzer::Query& query) const {
1045  std::shared_ptr<Analyzer::Expr> p;
1046  if (having_clause != nullptr) {
1047  p = having_clause->analyze(catalog, query, Expr::TlistRefType::TLIST_COPY);
1048  if (p->get_type_info().get_type() != kBOOLEAN) {
1049  throw std::runtime_error("Only boolean expressions can be in HAVING clause.");
1050  }
1051  p->check_group_by(query.get_group_by());
1052  }
1053  query.set_having_predicate(p);
1054 }
1055 
1056 void QuerySpec::analyze_group_by(const Catalog_Namespace::Catalog& catalog,
1057  Analyzer::Query& query) const {
1058  std::list<std::shared_ptr<Analyzer::Expr>> groupby;
1059  if (!groupby_clause.empty()) {
1060  int gexpr_no = 1;
1061  std::shared_ptr<Analyzer::Expr> gexpr;
1062  const std::vector<std::shared_ptr<Analyzer::TargetEntry>>& tlist =
1063  query.get_targetlist();
1064  for (auto& c : groupby_clause) {
1065  // special-case ordinal numbers in GROUP BY
1066  if (dynamic_cast<Literal*>(c.get())) {
1067  IntLiteral* i = dynamic_cast<IntLiteral*>(c.get());
1068  if (!i) {
1069  throw std::runtime_error("Invalid literal in GROUP BY clause.");
1070  }
1071  int varno = (int)i->get_intval();
1072  if (varno <= 0 || varno > static_cast<int>(tlist.size())) {
1073  throw std::runtime_error("Invalid ordinal number in GROUP BY clause.");
1074  }
1075  if (tlist[varno - 1]->get_expr()->get_contains_agg()) {
1076  throw std::runtime_error(
1077  "Ordinal number in GROUP BY cannot reference an expression containing "
1078  "aggregate "
1079  "functions.");
1080  }
1081  gexpr = makeExpr<Analyzer::Var>(
1082  tlist[varno - 1]->get_expr()->get_type_info(), Analyzer::Var::kOUTPUT, varno);
1083  } else {
1084  gexpr = c->analyze(catalog, query, Expr::TlistRefType::TLIST_REF);
1085  }
1086  const SQLTypeInfo gti = gexpr->get_type_info();
1087  bool set_new_type = false;
1088  SQLTypeInfo ti(gti);
1089  if (gti.is_string() && gti.get_compression() == kENCODING_NONE) {
1090  set_new_type = true;
1093  ti.set_fixed_size();
1094  }
1095  std::shared_ptr<Analyzer::Var> v;
1096  if (std::dynamic_pointer_cast<Analyzer::Var>(gexpr)) {
1097  v = std::static_pointer_cast<Analyzer::Var>(gexpr);
1098  int n = v->get_varno();
1099  gexpr = tlist[n - 1]->get_own_expr();
1100  auto cv = std::dynamic_pointer_cast<Analyzer::ColumnVar>(gexpr);
1101  if (cv != nullptr) {
1102  // inherit all ColumnVar info for lineage.
1103  *std::static_pointer_cast<Analyzer::ColumnVar>(v) = *cv;
1104  }
1105  v->set_which_row(Analyzer::Var::kGROUPBY);
1106  v->set_varno(gexpr_no);
1107  tlist[n - 1]->set_expr(v);
1108  }
1109  if (set_new_type) {
1110  auto new_e = gexpr->add_cast(ti);
1111  groupby.push_back(new_e);
1112  if (v != nullptr) {
1113  v->set_type_info(new_e->get_type_info());
1114  }
1115  } else {
1116  groupby.push_back(gexpr);
1117  }
1118  gexpr_no++;
1119  }
1120  }
1121  if (query.get_num_aggs() > 0 || !groupby.empty()) {
1122  for (auto t : query.get_targetlist()) {
1123  auto e = t->get_expr();
1124  e->check_group_by(groupby);
1125  }
1126  }
1127  query.set_group_by(groupby);
1128 }
1129 
1130 void QuerySpec::analyze_where_clause(const Catalog_Namespace::Catalog& catalog,
1131  Analyzer::Query& query) const {
1132  if (where_clause == nullptr) {
1133  query.set_where_predicate(nullptr);
1134  return;
1135  }
1136  auto p = where_clause->analyze(catalog, query, Expr::TlistRefType::TLIST_COPY);
1137  if (p->get_type_info().get_type() != kBOOLEAN) {
1138  throw std::runtime_error("Only boolean expressions can be in WHERE clause.");
1139  }
1140  query.set_where_predicate(p);
1141 }
1142 
1143 void QuerySpec::analyze_select_clause(const Catalog_Namespace::Catalog& catalog,
1144  Analyzer::Query& query) const {
1145  std::vector<std::shared_ptr<Analyzer::TargetEntry>>& tlist =
1146  query.get_targetlist_nonconst();
1147  if (select_clause.empty()) {
1148  // this means SELECT *
1149  int rte_idx = 0;
1150  for (auto rte : query.get_rangetable()) {
1151  rte->expand_star_in_targetlist(catalog, tlist, rte_idx++);
1152  }
1153  } else {
1154  for (auto& p : select_clause) {
1155  const Parser::Expr* select_expr = p->get_select_expr();
1156  // look for the case of range_var.*
1157  if (typeid(*select_expr) == typeid(ColumnRef) &&
1158  dynamic_cast<const ColumnRef*>(select_expr)->get_column() == nullptr) {
1159  const std::string* range_var_name =
1160  dynamic_cast<const ColumnRef*>(select_expr)->get_table();
1161  int rte_idx = query.get_rte_idx(*range_var_name);
1162  if (rte_idx < 0) {
1163  throw std::runtime_error("invalid range variable name: " + *range_var_name);
1164  }
1165  Analyzer::RangeTableEntry* rte = query.get_rte(rte_idx);
1166  rte->expand_star_in_targetlist(catalog, tlist, rte_idx);
1167  } else {
1168  auto e = select_expr->analyze(catalog, query);
1169  std::string resname;
1170 
1171  if (p->get_alias() != nullptr) {
1172  resname = *p->get_alias();
1173  } else if (std::dynamic_pointer_cast<Analyzer::ColumnVar>(e) &&
1174  !std::dynamic_pointer_cast<Analyzer::Var>(e)) {
1175  auto colvar = std::static_pointer_cast<Analyzer::ColumnVar>(e);
1176  const ColumnDescriptor* col_desc = catalog.getMetadataForColumn(
1177  colvar->get_table_id(), colvar->get_column_id());
1178  resname = col_desc->columnName;
1179  }
1180  if (e->get_type_info().get_type() == kNULLT) {
1181  throw std::runtime_error(
1182  "Untyped NULL in SELECT clause. Use CAST to specify a type.");
1183  }
1184  auto o = std::static_pointer_cast<Analyzer::UOper>(e);
1185  bool unnest = (o != nullptr && o->get_optype() == kUNNEST);
1186  auto tle = std::make_shared<Analyzer::TargetEntry>(resname, e, unnest);
1187  tlist.push_back(tle);
1188  }
1189  }
1190  }
1191 }
1192 
1193 void QuerySpec::analyze_from_clause(const Catalog_Namespace::Catalog& catalog,
1194  Analyzer::Query& query) const {
1196  for (auto& p : from_clause) {
1197  const TableDescriptor* table_desc;
1198  table_desc = catalog.getMetadataForTable(*p->get_table_name());
1199  if (table_desc == nullptr) {
1200  throw std::runtime_error("Table " + *p->get_table_name() + " does not exist.");
1201  }
1202  std::string range_var;
1203  if (p->get_range_var() == nullptr) {
1204  range_var = *p->get_table_name();
1205  } else {
1206  range_var = *p->get_range_var();
1207  }
1208  rte = new Analyzer::RangeTableEntry(range_var, table_desc, nullptr);
1209  query.add_rte(rte);
1210  }
1211 }
1212 
1213 void QuerySpec::analyze(const Catalog_Namespace::Catalog& catalog,
1214  Analyzer::Query& query) const {
1216  analyze_from_clause(catalog, query);
1217  analyze_select_clause(catalog, query);
1218  analyze_where_clause(catalog, query);
1219  analyze_group_by(catalog, query);
1220  analyze_having_clause(catalog, query);
1221 }
1222 
1223 void SelectStmt::analyze(const Catalog_Namespace::Catalog& catalog,
1224  Analyzer::Query& query) const {
1225  query.set_stmt_type(kSELECT);
1226  query.set_limit(limit);
1227  if (offset < 0) {
1228  throw std::runtime_error("OFFSET cannot be negative.");
1229  }
1230  query.set_offset(offset);
1231  query_expr->analyze(catalog, query);
1232  if (orderby_clause.empty() && !query.get_is_distinct()) {
1233  query.set_order_by(nullptr);
1234  return;
1235  }
1236  const std::vector<std::shared_ptr<Analyzer::TargetEntry>>& tlist =
1237  query.get_targetlist();
1238  std::list<Analyzer::OrderEntry>* order_by = new std::list<Analyzer::OrderEntry>();
1239  if (!orderby_clause.empty()) {
1240  for (auto& p : orderby_clause) {
1241  int tle_no = p->get_colno();
1242  if (tle_no == 0) {
1243  // use column name
1244  // search through targetlist for matching name
1245  const std::string* name = p->get_column()->get_column();
1246  tle_no = 1;
1247  bool found = false;
1248  for (auto tle : tlist) {
1249  if (tle->get_resname() == *name) {
1250  found = true;
1251  break;
1252  }
1253  tle_no++;
1254  }
1255  if (!found) {
1256  throw std::runtime_error("invalid name in order by: " + *name);
1257  }
1258  }
1259  order_by->push_back(
1260  Analyzer::OrderEntry(tle_no, p->get_is_desc(), p->get_nulls_first()));
1261  }
1262  }
1263  if (query.get_is_distinct()) {
1264  // extend order_by to include all targetlist entries.
1265  for (int i = 1; i <= static_cast<int>(tlist.size()); i++) {
1266  bool in_orderby = false;
1267  std::for_each(order_by->begin(),
1268  order_by->end(),
1269  [&in_orderby, i](const Analyzer::OrderEntry& oe) {
1270  in_orderby = in_orderby || (i == oe.tle_no);
1271  });
1272  if (!in_orderby) {
1273  order_by->push_back(Analyzer::OrderEntry(i, false, false));
1274  }
1275  }
1276  }
1277  query.set_order_by(order_by);
1278 }
1279 
1280 std::string SelectEntry::to_string() const {
1281  std::string str = select_expr->to_string();
1282  if (alias != nullptr) {
1283  str += " AS " + *alias;
1284  }
1285  return str;
1286 }
1287 
1288 std::string TableRef::to_string() const {
1289  std::string str = *table_name;
1290  if (range_var != nullptr) {
1291  str += " " + *range_var;
1292  }
1293  return str;
1294 }
1295 
1296 std::string ColumnRef::to_string() const {
1297  std::string str;
1298  if (table == nullptr) {
1299  str = *column;
1300  } else if (column == nullptr) {
1301  str = *table + ".*";
1302  } else {
1303  str = *table + "." + *column;
1304  }
1305  return str;
1306 }
1307 
1308 std::string OperExpr::to_string() const {
1309  std::string op_str[] = {
1310  "=", "===", "<>", "<", ">", "<=", ">=", " AND ", " OR ", "NOT", "-", "+", "*", "/"};
1311  std::string str;
1312  if (optype == kUMINUS) {
1313  str = "-(" + left->to_string() + ")";
1314  } else if (optype == kNOT) {
1315  str = "NOT (" + left->to_string() + ")";
1316  } else if (optype == kARRAY_AT) {
1317  str = left->to_string() + "[" + right->to_string() + "]";
1318  } else if (optype == kUNNEST) {
1319  str = "UNNEST(" + left->to_string() + ")";
1320  } else if (optype == kIN) {
1321  str = "(" + left->to_string() + " IN " + right->to_string() + ")";
1322  } else {
1323  str = "(" + left->to_string() + op_str[optype] + right->to_string() + ")";
1324  }
1325  return str;
1326 }
1327 
1328 std::string InExpr::to_string() const {
1329  std::string str = arg->to_string();
1330  if (is_not) {
1331  str += " NOT IN ";
1332  } else {
1333  str += " IN ";
1334  }
1335  return str;
1336 }
1337 
1338 std::string ExistsExpr::to_string() const {
1339  return "EXISTS (" + query->to_string() + ")";
1340 }
1341 
1342 std::string SubqueryExpr::to_string() const {
1343  std::string str;
1344  str = "(";
1345  str += query->to_string();
1346  str += ")";
1347  return str;
1348 }
1349 
1350 std::string IsNullExpr::to_string() const {
1351  std::string str = arg->to_string();
1352  if (is_not) {
1353  str += " IS NOT NULL";
1354  } else {
1355  str += " IS NULL";
1356  }
1357  return str;
1358 }
1359 
1360 std::string InSubquery::to_string() const {
1361  std::string str = InExpr::to_string();
1362  str += subquery->to_string();
1363  return str;
1364 }
1365 
1366 std::string InValues::to_string() const {
1367  std::string str = InExpr::to_string() + "(";
1368  bool notfirst = false;
1369  for (auto& p : value_list) {
1370  if (notfirst) {
1371  str += ", ";
1372  } else {
1373  notfirst = true;
1374  }
1375  str += p->to_string();
1376  }
1377  str += ")";
1378  return str;
1379 }
1380 
1381 std::string BetweenExpr::to_string() const {
1382  std::string str = arg->to_string();
1383  if (is_not) {
1384  str += " NOT BETWEEN ";
1385  } else {
1386  str += " BETWEEN ";
1387  }
1388  str += lower->to_string() + " AND " + upper->to_string();
1389  return str;
1390 }
1391 
1392 std::string CharLengthExpr::to_string() const {
1393  std::string str;
1394  if (calc_encoded_length) {
1395  str = "CHAR_LENGTH (" + arg->to_string() + ")";
1396  } else {
1397  str = "LENGTH (" + arg->to_string() + ")";
1398  }
1399  return str;
1400 }
1401 
1402 std::string CardinalityExpr::to_string() const {
1403  std::string str = "CARDINALITY(" + arg->to_string() + ")";
1404  return str;
1405 }
1406 
1407 std::string LikeExpr::to_string() const {
1408  std::string str = arg->to_string();
1409  if (is_not) {
1410  str += " NOT LIKE ";
1411  } else {
1412  str += " LIKE ";
1413  }
1414  str += like_string->to_string();
1415  if (escape_string != nullptr) {
1416  str += " ESCAPE " + escape_string->to_string();
1417  }
1418  return str;
1419 }
1420 
1421 std::string RegexpExpr::to_string() const {
1422  std::string str = arg->to_string();
1423  if (is_not) {
1424  str += " NOT REGEXP ";
1425  } else {
1426  str += " REGEXP ";
1427  }
1428  str += pattern_string->to_string();
1429  if (escape_string != nullptr) {
1430  str += " ESCAPE " + escape_string->to_string();
1431  }
1432  return str;
1433 }
1434 
1435 std::string LikelihoodExpr::to_string() const {
1436  std::string str = " LIKELIHOOD ";
1437  str += arg->to_string();
1438  str += " ";
1439  str += boost::lexical_cast<std::string>(is_not ? 1.0 - likelihood : likelihood);
1440  return str;
1441 }
1442 
1443 std::string FunctionRef::to_string() const {
1444  std::string str = *name + "(";
1445  if (distinct) {
1446  str += "DISTINCT ";
1447  }
1448  if (arg == nullptr) {
1449  str += "*)";
1450  } else {
1451  str += arg->to_string() + ")";
1452  }
1453  return str;
1454 }
1455 
1456 std::string QuerySpec::to_string() const {
1457  std::string query_str = "SELECT ";
1458  if (is_distinct) {
1459  query_str += "DISTINCT ";
1460  }
1461  if (select_clause.empty()) {
1462  query_str += "* ";
1463  } else {
1464  bool notfirst = false;
1465  for (auto& p : select_clause) {
1466  if (notfirst) {
1467  query_str += ", ";
1468  } else {
1469  notfirst = true;
1470  }
1471  query_str += p->to_string();
1472  }
1473  }
1474  query_str += " FROM ";
1475  bool notfirst = false;
1476  for (auto& p : from_clause) {
1477  if (notfirst) {
1478  query_str += ", ";
1479  } else {
1480  notfirst = true;
1481  }
1482  query_str += p->to_string();
1483  }
1484  if (where_clause) {
1485  query_str += " WHERE " + where_clause->to_string();
1486  }
1487  if (!groupby_clause.empty()) {
1488  query_str += " GROUP BY ";
1489  bool notfirst = false;
1490  for (auto& p : groupby_clause) {
1491  if (notfirst) {
1492  query_str += ", ";
1493  } else {
1494  notfirst = true;
1495  }
1496  query_str += p->to_string();
1497  }
1498  }
1499  if (having_clause) {
1500  query_str += " HAVING " + having_clause->to_string();
1501  }
1502  query_str += ";";
1503  return query_str;
1504 }
1505 
1506 void InsertStmt::analyze(const Catalog_Namespace::Catalog& catalog,
1507  Analyzer::Query& query) const {
1508  query.set_stmt_type(kINSERT);
1509  const TableDescriptor* td = catalog.getMetadataForTable(*table);
1510  if (td == nullptr) {
1511  throw std::runtime_error("Table " + *table + " does not exist.");
1512  }
1513  if (td->isView) {
1514  throw std::runtime_error("Insert to views is not supported yet.");
1515  }
1517  query.set_result_table_id(td->tableId);
1518  std::list<int> result_col_list;
1519  if (column_list.empty()) {
1520  const std::list<const ColumnDescriptor*> all_cols =
1521  catalog.getAllColumnMetadataForTable(td->tableId, false, false, true);
1522  for (auto cd : all_cols) {
1523  result_col_list.push_back(cd->columnId);
1524  }
1525  } else {
1526  for (auto& c : column_list) {
1527  const ColumnDescriptor* cd = catalog.getMetadataForColumn(td->tableId, *c);
1528  if (cd == nullptr) {
1529  throw std::runtime_error("Column " + *c + " does not exist.");
1530  }
1531  result_col_list.push_back(cd->columnId);
1532  const auto& col_ti = cd->columnType;
1533  if (col_ti.get_physical_cols() > 0) {
1534  CHECK(cd->columnType.is_geometry());
1535  for (auto i = 1; i <= col_ti.get_physical_cols(); i++) {
1536  const ColumnDescriptor* pcd =
1537  catalog.getMetadataForColumn(td->tableId, cd->columnId + i);
1538  if (pcd == nullptr) {
1539  throw std::runtime_error("Column " + *c + "'s metadata is incomplete.");
1540  }
1541  result_col_list.push_back(pcd->columnId);
1542  }
1543  }
1544  }
1545  if (catalog.getAllColumnMetadataForTable(td->tableId, false, false, true).size() !=
1546  result_col_list.size()) {
1547  throw std::runtime_error("Insert into a subset of columns is not supported yet.");
1548  }
1549  }
1550  query.set_result_col_list(result_col_list);
1551 }
1552 
1553 size_t InsertValuesStmt::determineLeafIndex(const Catalog_Namespace::Catalog& catalog,
1554  size_t num_leafs) {
1555  const TableDescriptor* td = catalog.getMetadataForTable(*table);
1556  if (td == nullptr) {
1557  throw std::runtime_error("Table " + *table + " does not exist.");
1558  }
1559  if (td->isView) {
1560  throw std::runtime_error("Insert to views is not supported yet.");
1561  }
1563  if (td->partitions == "REPLICATED") {
1564  throw std::runtime_error("Cannot determine leaf on replicated table.");
1565  }
1566 
1567  if (0 == td->nShards) {
1568  std::random_device rd;
1569  std::mt19937_64 gen(rd());
1570  std::uniform_int_distribution<size_t> dis;
1571  const auto leaf_idx = dis(gen) % num_leafs;
1572  return leaf_idx;
1573  }
1574 
1575  size_t indexOfShardColumn = 0;
1576  const ColumnDescriptor* shardColumn = catalog.getShardColumnMetadataForTable(td);
1577  CHECK(shardColumn);
1578 
1579  if (column_list.empty()) {
1580  auto all_cols =
1581  catalog.getAllColumnMetadataForTable(td->tableId, false, false, false);
1582  auto iter = std::find(all_cols.begin(), all_cols.end(), shardColumn);
1583  CHECK(iter != all_cols.end());
1584  indexOfShardColumn = std::distance(all_cols.begin(), iter);
1585  } else {
1586  for (auto& c : column_list) {
1587  if (*c == shardColumn->columnName) {
1588  break;
1589  }
1590  indexOfShardColumn++;
1591  }
1592 
1593  if (indexOfShardColumn == column_list.size()) {
1594  throw std::runtime_error("No value defined for shard column.");
1595  }
1596  }
1597 
1598  if (indexOfShardColumn >= value_list.size()) {
1599  throw std::runtime_error("No value defined for shard column.");
1600  }
1601 
1602  auto& shardColumnValueExpr = *(std::next(value_list.begin(), indexOfShardColumn));
1603 
1604  Analyzer::Query query;
1605  auto e = shardColumnValueExpr->analyze(catalog, query);
1606  e = e->add_cast(shardColumn->columnType);
1607  const Analyzer::Constant* con = dynamic_cast<Analyzer::Constant*>(e.get());
1608  if (!con) {
1609  auto col_cast = dynamic_cast<const Analyzer::UOper*>(e.get());
1610  CHECK(col_cast);
1611  CHECK_EQ(kCAST, col_cast->get_optype());
1612  con = dynamic_cast<const Analyzer::Constant*>(col_cast->get_operand());
1613  }
1614  CHECK(con);
1615 
1616  Datum d = con->get_constval();
1617 
1618  auto shard_count = td->nShards * num_leafs;
1619  int64_t shardId = 0;
1620 
1621  if (con->get_is_null()) {
1623  shard_count);
1624  } else if (shardColumn->columnType.is_string()) {
1625  auto dictDesc =
1626  catalog.getMetadataForDict(shardColumn->columnType.get_comp_param(), true);
1627  auto str_id = dictDesc->stringDict->getOrAdd(*d.stringval);
1628  bool invalid = false;
1629 
1630  if (4 == shardColumn->columnType.get_size()) {
1631  invalid = str_id > max_valid_int_value<int32_t>();
1632  } else if (2 == shardColumn->columnType.get_size()) {
1633  invalid = str_id > max_valid_int_value<uint16_t>();
1634  } else if (1 == shardColumn->columnType.get_size()) {
1635  invalid = str_id > max_valid_int_value<uint8_t>();
1636  }
1637 
1638  if (invalid || str_id == inline_int_null_value<int32_t>()) {
1639  str_id = inline_fixed_encoding_null_val(shardColumn->columnType);
1640  }
1641  shardId = SHARD_FOR_KEY(str_id, shard_count);
1642  } else {
1643  switch (shardColumn->columnType.get_logical_size()) {
1644  case 8:
1645  shardId = SHARD_FOR_KEY(d.bigintval, shard_count);
1646  break;
1647  case 4:
1648  shardId = SHARD_FOR_KEY(d.intval, shard_count);
1649  break;
1650  case 2:
1651  shardId = SHARD_FOR_KEY(d.smallintval, shard_count);
1652  break;
1653  case 1:
1654  shardId = SHARD_FOR_KEY(d.tinyintval, shard_count);
1655  break;
1656  default:
1657  CHECK(false);
1658  }
1659  }
1660 
1661  return shardId / td->nShards;
1662 }
1663 
1664 void InsertValuesStmt::analyze(const Catalog_Namespace::Catalog& catalog,
1665  Analyzer::Query& query) const {
1666  InsertStmt::analyze(catalog, query);
1667  std::vector<std::shared_ptr<Analyzer::TargetEntry>>& tlist =
1668  query.get_targetlist_nonconst();
1669  const auto tableId = query.get_result_table_id();
1670  const std::list<const ColumnDescriptor*> non_phys_cols =
1671  catalog.getAllColumnMetadataForTable(tableId, false, false, false);
1672  if (non_phys_cols.size() != value_list.size()) {
1673  throw std::runtime_error("Insert has more target columns than expressions.");
1674  }
1675  std::list<int>::const_iterator it = query.get_result_col_list().begin();
1676  for (auto& v : value_list) {
1677  auto e = v->analyze(catalog, query);
1678  const ColumnDescriptor* cd =
1679  catalog.getMetadataForColumn(query.get_result_table_id(), *it);
1680  CHECK(cd);
1681  if (cd->columnType.get_notnull()) {
1682  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(e);
1683  if (c != nullptr && c->get_is_null()) {
1684  throw std::runtime_error("Cannot insert NULL into column " + cd->columnName);
1685  }
1686  }
1687  e = e->add_cast(cd->columnType);
1688  tlist.emplace_back(new Analyzer::TargetEntry("", e, false));
1689  ++it;
1690 
1691  const auto& col_ti = cd->columnType;
1692  if (col_ti.get_physical_cols() > 0) {
1693  CHECK(cd->columnType.is_geometry());
1694  auto c = dynamic_cast<const Analyzer::Constant*>(e.get());
1695  if (!c) {
1696  auto uoper = std::dynamic_pointer_cast<Analyzer::UOper>(e);
1697  if (uoper && uoper->get_optype() == kCAST) {
1698  c = dynamic_cast<const Analyzer::Constant*>(uoper->get_operand());
1699  }
1700  }
1701  bool is_null = false;
1702  std::string* geo_string{nullptr};
1703  if (c) {
1704  is_null = c->get_is_null();
1705  if (!is_null) {
1706  geo_string = c->get_constval().stringval;
1707  }
1708  }
1709  if (!is_null && !geo_string) {
1710  throw std::runtime_error("Expecting a WKT or WKB hex string for column " +
1711  cd->columnName);
1712  }
1713  std::vector<double> coords;
1714  std::vector<double> bounds;
1715  std::vector<int> ring_sizes;
1716  std::vector<int> poly_rings;
1717  int render_group =
1718  0; // @TODO simon.eves where to get render_group from in this context?!
1719  SQLTypeInfo import_ti{cd->columnType};
1720  if (!is_null) {
1722  *geo_string, import_ti, coords, bounds, ring_sizes, poly_rings)) {
1723  throw std::runtime_error("Cannot read geometry to insert into column " +
1724  cd->columnName);
1725  }
1726  if (coords.empty()) {
1727  // Importing from geo_string WKT resulted in empty coords: dealing with a NULL
1728  is_null = true;
1729  }
1730  if (cd->columnType.get_type() != import_ti.get_type()) {
1731  // allow POLYGON to be inserted into MULTIPOLYGON column
1732  if (!(import_ti.get_type() == SQLTypes::kPOLYGON &&
1734  throw std::runtime_error(
1735  "Imported geometry doesn't match the type of column " + cd->columnName);
1736  }
1737  }
1738  } else {
1739  // Special case for NULL POINT, push NULL representation to coords
1740  if (cd->columnType.get_type() == kPOINT) {
1741  if (!coords.empty()) {
1742  throw std::runtime_error("NULL POINT with unexpected coordinates in column " +
1743  cd->columnName);
1744  }
1745  coords.push_back(NULL_ARRAY_DOUBLE);
1746  coords.push_back(NULL_DOUBLE);
1747  }
1748  }
1749 
1750  // TODO: check if import SRID matches columns SRID, may need to transform before
1751  // inserting
1752 
1753  int nextColumnOffset = 1;
1754 
1755  const ColumnDescriptor* cd_coords = catalog.getMetadataForColumn(
1756  query.get_result_table_id(), cd->columnId + nextColumnOffset);
1757  CHECK(cd_coords);
1758  CHECK_EQ(cd_coords->columnType.get_type(), kARRAY);
1759  CHECK_EQ(cd_coords->columnType.get_subtype(), kTINYINT);
1760  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
1761  if (!is_null || cd->columnType.get_type() == kPOINT) {
1762  auto compressed_coords = Geospatial::compress_coords(coords, col_ti);
1763  for (auto cc : compressed_coords) {
1764  Datum d;
1765  d.tinyintval = cc;
1766  auto e = makeExpr<Analyzer::Constant>(kTINYINT, false, d);
1767  value_exprs.push_back(e);
1768  }
1769  }
1770  tlist.emplace_back(new Analyzer::TargetEntry(
1771  "",
1772  makeExpr<Analyzer::Constant>(cd_coords->columnType, is_null, value_exprs),
1773  false));
1774  ++it;
1775  nextColumnOffset++;
1776 
1777  if (cd->columnType.get_type() == kPOLYGON ||
1778  cd->columnType.get_type() == kMULTIPOLYGON) {
1779  // Put ring sizes array into separate physical column
1780  const ColumnDescriptor* cd_ring_sizes = catalog.getMetadataForColumn(
1781  query.get_result_table_id(), cd->columnId + nextColumnOffset);
1782  CHECK(cd_ring_sizes);
1783  CHECK_EQ(cd_ring_sizes->columnType.get_type(), kARRAY);
1784  CHECK_EQ(cd_ring_sizes->columnType.get_subtype(), kINT);
1785  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
1786  if (!is_null) {
1787  for (auto c : ring_sizes) {
1788  Datum d;
1789  d.intval = c;
1790  auto e = makeExpr<Analyzer::Constant>(kINT, false, d);
1791  value_exprs.push_back(e);
1792  }
1793  }
1794  tlist.emplace_back(new Analyzer::TargetEntry(
1795  "",
1796  makeExpr<Analyzer::Constant>(cd_ring_sizes->columnType, is_null, value_exprs),
1797  false));
1798  ++it;
1799  nextColumnOffset++;
1800 
1801  if (cd->columnType.get_type() == kMULTIPOLYGON) {
1802  // Put poly_rings array into separate physical column
1803  const ColumnDescriptor* cd_poly_rings = catalog.getMetadataForColumn(
1804  query.get_result_table_id(), cd->columnId + nextColumnOffset);
1805  CHECK(cd_poly_rings);
1806  CHECK_EQ(cd_poly_rings->columnType.get_type(), kARRAY);
1807  CHECK_EQ(cd_poly_rings->columnType.get_subtype(), kINT);
1808  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
1809  if (!is_null) {
1810  for (auto c : poly_rings) {
1811  Datum d;
1812  d.intval = c;
1813  auto e = makeExpr<Analyzer::Constant>(kINT, false, d);
1814  value_exprs.push_back(e);
1815  }
1816  }
1817  tlist.emplace_back(new Analyzer::TargetEntry(
1818  "",
1819  makeExpr<Analyzer::Constant>(
1820  cd_poly_rings->columnType, is_null, value_exprs),
1821  false));
1822  ++it;
1823  nextColumnOffset++;
1824  }
1825  }
1826 
1827  if (cd->columnType.get_type() == kLINESTRING ||
1828  cd->columnType.get_type() == kPOLYGON ||
1829  cd->columnType.get_type() == kMULTIPOLYGON) {
1830  const ColumnDescriptor* cd_bounds = catalog.getMetadataForColumn(
1831  query.get_result_table_id(), cd->columnId + nextColumnOffset);
1832  CHECK(cd_bounds);
1833  CHECK_EQ(cd_bounds->columnType.get_type(), kARRAY);
1834  CHECK_EQ(cd_bounds->columnType.get_subtype(), kDOUBLE);
1835  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
1836  if (!is_null) {
1837  for (auto b : bounds) {
1838  Datum d;
1839  d.doubleval = b;
1840  auto e = makeExpr<Analyzer::Constant>(kDOUBLE, false, d);
1841  value_exprs.push_back(e);
1842  }
1843  }
1844  tlist.emplace_back(new Analyzer::TargetEntry(
1845  "",
1846  makeExpr<Analyzer::Constant>(cd_bounds->columnType, is_null, value_exprs),
1847  false));
1848  ++it;
1849  nextColumnOffset++;
1850  }
1851 
1852  if (cd->columnType.get_type() == kPOLYGON ||
1853  cd->columnType.get_type() == kMULTIPOLYGON) {
1854  // Put render group into separate physical column
1855  const ColumnDescriptor* cd_render_group = catalog.getMetadataForColumn(
1856  query.get_result_table_id(), cd->columnId + nextColumnOffset);
1857  CHECK(cd_render_group);
1858  CHECK_EQ(cd_render_group->columnType.get_type(), kINT);
1859  Datum d;
1860  d.intval = render_group;
1861  tlist.emplace_back(new Analyzer::TargetEntry(
1862  "",
1863  makeExpr<Analyzer::Constant>(cd_render_group->columnType, is_null, d),
1864  false));
1865  ++it;
1866  nextColumnOffset++;
1867  }
1868  }
1869  }
1870 }
1871 
1872 void InsertValuesStmt::execute(const Catalog_Namespace::SessionInfo& session) {
1873  auto& catalog = session.getCatalog();
1874 
1875  if (!session.checkDBAccessPrivileges(
1877  throw std::runtime_error("User has no insert privileges on " + *table + ".");
1878  }
1879 
1880  auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
1883 
1884  Analyzer::Query query;
1885  analyze(catalog, query);
1886 
1887  // Acquire schema write lock -- leave data lock so the fragmenter can checkpoint. For
1888  // singleton inserts we just take a write lock on the schema, which prevents concurrent
1889  // inserts.
1890  auto result_table_id = query.get_result_table_id();
1891  const auto td_with_lock =
1893  catalog, result_table_id);
1894  auto td = td_with_lock();
1895  CHECK(td);
1896 
1897  if (td->isView) {
1898  throw std::runtime_error("Singleton inserts on views is not supported.");
1899  }
1901 
1903  RelAlgExecutor ra_executor(executor.get(), catalog);
1904 
1905  ra_executor.executeSimpleInsert(query);
1906 }
1907 
1908 void UpdateStmt::analyze(const Catalog_Namespace::Catalog& catalog,
1909  Analyzer::Query& query) const {
1910  throw std::runtime_error("UPDATE statement not supported yet.");
1911 }
1912 
1913 void DeleteStmt::analyze(const Catalog_Namespace::Catalog& catalog,
1914  Analyzer::Query& query) const {
1915  throw std::runtime_error("DELETE statement not supported yet.");
1916 }
1917 
1918 namespace {
1919 
1921  const auto& col_ti = cd.columnType;
1922  if (col_ti.is_integer() ||
1923  (col_ti.is_string() && col_ti.get_compression() == kENCODING_DICT) ||
1924  col_ti.is_time()) {
1925  return;
1926  }
1927  throw std::runtime_error("Cannot shard on type " + col_ti.get_type_name() +
1928  ", encoding " + col_ti.get_compression_name());
1929 }
1930 
1931 size_t shard_column_index(const std::string& name,
1932  const std::list<ColumnDescriptor>& columns) {
1933  size_t index = 1;
1934  for (const auto& cd : columns) {
1935  if (cd.columnName == name) {
1937  return index;
1938  }
1939  ++index;
1940  if (cd.columnType.is_geometry()) {
1941  index += cd.columnType.get_physical_cols();
1942  }
1943  }
1944  // Not found, return 0
1945  return 0;
1946 }
1947 
1948 size_t sort_column_index(const std::string& name,
1949  const std::list<ColumnDescriptor>& columns) {
1950  size_t index = 1;
1951  for (const auto& cd : columns) {
1952  if (boost::to_upper_copy<std::string>(cd.columnName) == name) {
1953  return index;
1954  }
1955  ++index;
1956  if (cd.columnType.is_geometry()) {
1957  index += cd.columnType.get_physical_cols();
1958  }
1959  }
1960  // Not found, return 0
1961  return 0;
1962 }
1963 
1964 void set_string_field(rapidjson::Value& obj,
1965  const std::string& field_name,
1966  const std::string& field_value,
1967  rapidjson::Document& document) {
1968  rapidjson::Value field_name_json_str;
1969  field_name_json_str.SetString(
1970  field_name.c_str(), field_name.size(), document.GetAllocator());
1971  rapidjson::Value field_value_json_str;
1972  field_value_json_str.SetString(
1973  field_value.c_str(), field_value.size(), document.GetAllocator());
1974  obj.AddMember(field_name_json_str, field_value_json_str, document.GetAllocator());
1975 }
1976 
1978  const ShardKeyDef* shard_key_def,
1979  const std::vector<SharedDictionaryDef>& shared_dict_defs) {
1980  rapidjson::Document document;
1981  auto& allocator = document.GetAllocator();
1982  rapidjson::Value arr(rapidjson::kArrayType);
1983  if (shard_key_def) {
1984  rapidjson::Value shard_key_obj(rapidjson::kObjectType);
1985  set_string_field(shard_key_obj, "type", "SHARD KEY", document);
1986  set_string_field(shard_key_obj, "name", shard_key_def->get_column(), document);
1987  arr.PushBack(shard_key_obj, allocator);
1988  }
1989  for (const auto& shared_dict_def : shared_dict_defs) {
1990  rapidjson::Value shared_dict_obj(rapidjson::kObjectType);
1991  set_string_field(shared_dict_obj, "type", "SHARED DICTIONARY", document);
1992  set_string_field(shared_dict_obj, "name", shared_dict_def.get_column(), document);
1994  shared_dict_obj, "foreign_table", shared_dict_def.get_foreign_table(), document);
1995  set_string_field(shared_dict_obj,
1996  "foreign_column",
1997  shared_dict_def.get_foreign_column(),
1998  document);
1999  arr.PushBack(shared_dict_obj, allocator);
2000  }
2001  rapidjson::StringBuffer buffer;
2002  rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
2003  arr.Accept(writer);
2004  return buffer.GetString();
2005 }
2006 
2007 template <typename LITERAL_TYPE,
2008  typename ASSIGNMENT,
2009  typename VALIDATE = DefaultValidate<LITERAL_TYPE>>
2010 decltype(auto) get_property_value(const NameValueAssign* p,
2011  ASSIGNMENT op,
2012  VALIDATE validate = VALIDATE()) {
2013  const auto val = validate(p);
2014  return op(val);
2015 }
2016 
2018  const NameValueAssign* p,
2019  const std::list<ColumnDescriptor>& columns) {
2020  auto assignment = [&td](const auto val) { td.storageType = val; };
2021  return get_property_value<StringLiteral, decltype(assignment), CaseSensitiveValidate>(
2022  p, assignment);
2023 }
2024 
2026  const NameValueAssign* p,
2027  const std::list<ColumnDescriptor>& columns) {
2028  return get_property_value<IntLiteral>(p,
2029  [&td](const auto val) { td.maxFragRows = val; });
2030 }
2031 
2033  const NameValueAssign* p,
2034  const std::list<ColumnDescriptor>& columns) {
2035  return get_property_value<IntLiteral>(
2036  p, [&df_td](const auto val) { df_td.maxFragRows = val; });
2037 }
2038 
2040  const NameValueAssign* p,
2041  const std::list<ColumnDescriptor>& columns) {
2042  return get_property_value<IntLiteral>(p,
2043  [&td](const auto val) { td.maxChunkSize = val; });
2044 }
2045 
2047  DataframeTableDescriptor& df_td,
2048  const NameValueAssign* p,
2049  const std::list<ColumnDescriptor>& columns) {
2050  return get_property_value<IntLiteral>(
2051  p, [&df_td](const auto val) { df_td.maxChunkSize = val; });
2052 }
2053 
2055  const NameValueAssign* p,
2056  const std::list<ColumnDescriptor>& columns) {
2057  return get_property_value<StringLiteral>(p, [&df_td](const auto val) {
2058  if (val.size() != 1) {
2059  throw std::runtime_error("Length of DELIMITER must be equal to 1.");
2060  }
2061  df_td.delimiter = val;
2062  });
2063 }
2064 
2066  const NameValueAssign* p,
2067  const std::list<ColumnDescriptor>& columns) {
2068  return get_property_value<StringLiteral>(p, [&df_td](const auto val) {
2069  if (val == "FALSE") {
2070  df_td.hasHeader = false;
2071  } else if (val == "TRUE") {
2072  df_td.hasHeader = true;
2073  } else {
2074  throw std::runtime_error("Option HEADER support only 'true' or 'false' values.");
2075  }
2076  });
2077 }
2078 
2080  const NameValueAssign* p,
2081  const std::list<ColumnDescriptor>& columns) {
2082  return get_property_value<IntLiteral>(p,
2083  [&td](const auto val) { td.fragPageSize = val; });
2084 }
2086  const NameValueAssign* p,
2087  const std::list<ColumnDescriptor>& columns) {
2088  return get_property_value<IntLiteral>(p, [&td](const auto val) { td.maxRows = val; });
2089 }
2090 
2092  const NameValueAssign* p,
2093  const std::list<ColumnDescriptor>& columns) {
2094  return get_property_value<IntLiteral>(
2095  p, [&df_td](const auto val) { df_td.skipRows = val; });
2096 }
2097 
2099  const NameValueAssign* p,
2100  const std::list<ColumnDescriptor>& columns) {
2101  return get_property_value<StringLiteral>(p, [&td](const auto partitions_uc) {
2102  if (partitions_uc != "SHARDED" && partitions_uc != "REPLICATED") {
2103  throw std::runtime_error("PARTITIONS must be SHARDED or REPLICATED");
2104  }
2105  if (td.shardedColumnId != 0 && partitions_uc == "REPLICATED") {
2106  throw std::runtime_error(
2107  "A table cannot be sharded and replicated at the same time");
2108  };
2109  td.partitions = partitions_uc;
2110  });
2111 }
2113  const NameValueAssign* p,
2114  const std::list<ColumnDescriptor>& columns) {
2115  if (!td.shardedColumnId) {
2116  throw std::runtime_error("SHARD KEY must be defined.");
2117  }
2118  return get_property_value<IntLiteral>(p, [&td](const auto shard_count) {
2119  if (g_leaf_count && shard_count % g_leaf_count) {
2120  throw std::runtime_error(
2121  "SHARD_COUNT must be a multiple of the number of leaves in the cluster.");
2122  }
2123  td.nShards = g_leaf_count ? shard_count / g_leaf_count : shard_count;
2124  if (!td.shardedColumnId && !td.nShards) {
2125  throw std::runtime_error(
2126  "Must specify the number of shards through the SHARD_COUNT option");
2127  };
2128  });
2129 }
2130 
2131 decltype(auto) get_vacuum_def(TableDescriptor& td,
2132  const NameValueAssign* p,
2133  const std::list<ColumnDescriptor>& columns) {
2134  return get_property_value<StringLiteral>(p, [&td](const auto vacuum_uc) {
2135  if (vacuum_uc != "IMMEDIATE" && vacuum_uc != "DELAYED") {
2136  throw std::runtime_error("VACUUM must be IMMEDIATE or DELAYED");
2137  }
2138  td.hasDeletedCol = boost::iequals(vacuum_uc, "IMMEDIATE") ? false : true;
2139  });
2140 }
2141 
2143  const NameValueAssign* p,
2144  const std::list<ColumnDescriptor>& columns) {
2145  return get_property_value<StringLiteral>(p, [&td, &columns](const auto sort_upper) {
2146  td.sortedColumnId = sort_column_index(sort_upper, columns);
2147  if (!td.sortedColumnId) {
2148  throw std::runtime_error("Specified sort column " + sort_upper + " doesn't exist");
2149  }
2150  });
2151 }
2152 
2154  const NameValueAssign* p,
2155  const std::list<ColumnDescriptor>& columns) {
2156  auto assignment = [&td](const auto val) {
2157  td.maxRollbackEpochs =
2158  val < 0 ? -1 : val; // Anything < 0 means unlimited rollbacks. Note that 0
2159  // still means keeping a shadow copy of data/metdata
2160  // between epochs so bad writes can be rolled back
2161  };
2162  return get_property_value<IntLiteral, decltype(assignment), PositiveOrZeroValidate>(
2163  p, assignment);
2164 }
2165 
2166 static const std::map<const std::string, const TableDefFuncPtr> tableDefFuncMap = {
2167  {"fragment_size"s, get_frag_size_def},
2168  {"max_chunk_size"s, get_max_chunk_size_def},
2169  {"page_size"s, get_page_size_def},
2170  {"max_rows"s, get_max_rows_def},
2171  {"partitions"s, get_partions_def},
2172  {"shard_count"s, get_shard_count_def},
2173  {"vacuum"s, get_vacuum_def},
2174  {"sort_column"s, get_sort_column_def},
2175  {"storage_type"s, get_storage_type},
2176  {"max_rollback_epochs", get_max_rollback_epochs_def}};
2177 
2179  const std::unique_ptr<NameValueAssign>& p,
2180  const std::list<ColumnDescriptor>& columns) {
2181  const auto it = tableDefFuncMap.find(boost::to_lower_copy<std::string>(*p->get_name()));
2182  if (it == tableDefFuncMap.end()) {
2183  throw std::runtime_error(
2184  "Invalid CREATE TABLE option " + *p->get_name() +
2185  ". Should be FRAGMENT_SIZE, MAX_CHUNK_SIZE, PAGE_SIZE, MAX_ROLLBACK_EPOCHS, "
2186  "MAX_ROWS, "
2187  "PARTITIONS, SHARD_COUNT, VACUUM, SORT_COLUMN, STORAGE_TYPE.");
2188  }
2189  return it->second(td, p.get(), columns);
2190 }
2191 
2193  const std::unique_ptr<NameValueAssign>& p,
2194  const std::list<ColumnDescriptor>& columns) {
2195  const auto it = tableDefFuncMap.find(boost::to_lower_copy<std::string>(*p->get_name()));
2196  if (it == tableDefFuncMap.end()) {
2197  throw std::runtime_error(
2198  "Invalid CREATE TABLE AS option " + *p->get_name() +
2199  ". Should be FRAGMENT_SIZE, MAX_CHUNK_SIZE, PAGE_SIZE, MAX_ROLLBACK_EPOCHS, "
2200  "MAX_ROWS, "
2201  "PARTITIONS, SHARD_COUNT, VACUUM, SORT_COLUMN, STORAGE_TYPE or "
2202  "USE_SHARED_DICTIONARIES.");
2203  }
2204  return it->second(td, p.get(), columns);
2205 }
2206 
2207 static const std::map<const std::string, const DataframeDefFuncPtr> dataframeDefFuncMap =
2208  {{"fragment_size"s, get_frag_size_dataframe_def},
2209  {"max_chunk_size"s, get_max_chunk_size_dataframe_def},
2210  {"skip_rows"s, get_skip_rows_def},
2211  {"delimiter"s, get_delimiter_def},
2212  {"header"s, get_header_def}};
2213 
2215  const std::unique_ptr<NameValueAssign>& p,
2216  const std::list<ColumnDescriptor>& columns) {
2217  const auto it =
2218  dataframeDefFuncMap.find(boost::to_lower_copy<std::string>(*p->get_name()));
2219  if (it == dataframeDefFuncMap.end()) {
2220  throw std::runtime_error(
2221  "Invalid CREATE DATAFRAME option " + *p->get_name() +
2222  ". Should be FRAGMENT_SIZE, MAX_CHUNK_SIZE, SKIP_ROWS, DELIMITER or HEADER.");
2223  }
2224  return it->second(df_td, p.get(), columns);
2225 }
2226 
2227 } // namespace
2228 
2229 CreateTableStmt::CreateTableStmt(const rapidjson::Value& payload) {
2230  CHECK(payload.HasMember("name"));
2231  table_ = std::make_unique<std::string>(json_str(payload["name"]));
2232  CHECK(payload.HasMember("elements"));
2233  CHECK(payload["elements"].IsArray());
2234 
2235  // TODO: support temporary tables
2236  is_temporary_ = false;
2237 
2238  if_not_exists_ = false;
2239  if (payload.HasMember("ifNotExists")) {
2240  if_not_exists_ = json_bool(payload["ifNotExists"]);
2241  }
2242 
2243  const auto elements = payload["elements"].GetArray();
2244  for (const auto& element : elements) {
2245  CHECK(element.IsObject());
2246  CHECK(element.HasMember("type"));
2247  if (json_str(element["type"]) == "SQL_COLUMN_DECLARATION") {
2248  CHECK(element.HasMember("name"));
2249  auto col_name = std::make_unique<std::string>(json_str(element["name"]));
2250  CHECK(element.HasMember("sqltype"));
2251  const auto sql_types = to_sql_type(json_str(element["sqltype"]));
2252 
2253  // decimal / numeric precision / scale
2254  int precision = -1;
2255  int scale = -1;
2256  if (element.HasMember("precision")) {
2257  precision = json_i64(element["precision"]);
2258  }
2259  if (element.HasMember("scale")) {
2260  scale = json_i64(element["scale"]);
2261  }
2262 
2263  std::optional<int64_t> array_size;
2264  if (element.HasMember("arraySize")) {
2265  // We do not yet support geo arrays
2266  array_size = json_i64(element["arraySize"]);
2267  }
2268  std::unique_ptr<SQLType> sql_type;
2269  if (element.HasMember("subtype")) {
2270  CHECK(element.HasMember("coordinateSystem"));
2271  const auto subtype_sql_types = to_sql_type(json_str(element["subtype"]));
2272  sql_type = std::make_unique<SQLType>(
2273  subtype_sql_types,
2274  static_cast<int>(sql_types),
2275  static_cast<int>(json_i64(element["coordinateSystem"])),
2276  false);
2277  } else if (precision > 0 && scale > 0) {
2278  sql_type = std::make_unique<SQLType>(sql_types,
2279  precision,
2280  scale,
2281  /*is_array=*/array_size.has_value(),
2282  array_size ? *array_size : -1);
2283  } else if (precision > 0) {
2284  sql_type = std::make_unique<SQLType>(sql_types,
2285  precision,
2286  0,
2287  /*is_array=*/array_size.has_value(),
2288  array_size ? *array_size : -1);
2289  } else {
2290  sql_type = std::make_unique<SQLType>(sql_types,
2291  /*is_array=*/array_size.has_value(),
2292  array_size ? *array_size : -1);
2293  }
2294  CHECK(sql_type);
2295 
2296  CHECK(element.HasMember("nullable"));
2297  const auto nullable = json_bool(element["nullable"]);
2298  std::unique_ptr<ColumnConstraintDef> constraint_def;
2299  if (!nullable) {
2300  constraint_def = std::make_unique<ColumnConstraintDef>(/*notnull=*/true,
2301  /*unique=*/false,
2302  /*primarykey=*/false,
2303  /*defaultval=*/nullptr);
2304  }
2305  std::unique_ptr<CompressDef> compress_def;
2306  if (element.HasMember("encodingType") && !element["encodingType"].IsNull()) {
2307  std::string encoding_type = json_str(element["encodingType"]);
2308  CHECK(element.HasMember("encodingSize"));
2309  auto encoding_name =
2310  std::make_unique<std::string>(json_str(element["encodingType"]));
2311  compress_def = std::make_unique<CompressDef>(encoding_name.release(),
2312  json_i64(element["encodingSize"]));
2313  }
2314  auto col_def = std::make_unique<ColumnDef>(
2315  col_name.release(),
2316  sql_type.release(),
2317  compress_def ? compress_def.release() : nullptr,
2318  constraint_def ? constraint_def.release() : nullptr);
2319  table_element_list_.emplace_back(std::move(col_def));
2320  } else if (json_str(element["type"]) == "SQL_COLUMN_CONSTRAINT") {
2321  CHECK(element.HasMember("name"));
2322  if (json_str(element["name"]) == "SHARD_KEY") {
2323  CHECK(element.HasMember("columns"));
2324  CHECK(element["columns"].IsArray());
2325  const auto& columns = element["columns"].GetArray();
2326  if (columns.Size() != size_t(1)) {
2327  throw std::runtime_error("Only one shard column is currently supported.");
2328  }
2329  auto shard_key_def = std::make_unique<ShardKeyDef>(json_str(columns[0]));
2330  table_element_list_.emplace_back(std::move(shard_key_def));
2331  } else if (json_str(element["name"]) == "SHARED_DICT") {
2332  CHECK(element.HasMember("columns"));
2333  CHECK(element["columns"].IsArray());
2334  const auto& columns = element["columns"].GetArray();
2335  if (columns.Size() != size_t(1)) {
2336  throw std::runtime_error(
2337  R"(Only one column per shared dictionary entry is currently supported. Use multiple SHARED DICT statements to share dictionaries from multiple columns.)");
2338  }
2339  CHECK(element.HasMember("references") && element["references"].IsObject());
2340  const auto& references = element["references"].GetObject();
2341  std::string references_table_name;
2342  if (references.HasMember("table")) {
2343  references_table_name = json_str(references["table"]);
2344  } else {
2345  references_table_name = *table_;
2346  }
2347  CHECK(references.HasMember("column"));
2348 
2349  auto shared_dict_def = std::make_unique<SharedDictionaryDef>(
2350  json_str(columns[0]), references_table_name, json_str(references["column"]));
2351  table_element_list_.emplace_back(std::move(shared_dict_def));
2352 
2353  } else {
2354  LOG(FATAL) << "Unsupported type for SQL_COLUMN_CONSTRAINT: "
2355  << json_str(element["name"]);
2356  }
2357  } else {
2358  LOG(FATAL) << "Unsupported element type for CREATE TABLE: "
2359  << element["type"].GetString();
2360  }
2361  }
2362 
2363  CHECK(payload.HasMember("options"));
2364  if (payload["options"].IsObject()) {
2365  for (const auto& option : payload["options"].GetObject()) {
2366  auto option_name = std::make_unique<std::string>(json_str(option.name));
2367  std::unique_ptr<Literal> literal_value;
2368  if (option.value.IsString()) {
2369  auto literal_string = std::make_unique<std::string>(json_str(option.value));
2370  literal_value = std::make_unique<StringLiteral>(literal_string.release());
2371  } else if (option.value.IsInt() || option.value.IsInt64()) {
2372  literal_value = std::make_unique<IntLiteral>(json_i64(option.value));
2373  } else if (option.value.IsNull()) {
2374  literal_value = std::make_unique<NullLiteral>();
2375  } else {
2376  throw std::runtime_error("Unable to handle literal for " + *option_name);
2377  }
2378  CHECK(literal_value);
2379 
2380  storage_options_.emplace_back(std::make_unique<NameValueAssign>(
2381  option_name.release(), literal_value.release()));
2382  }
2383  } else {
2384  CHECK(payload["options"].IsNull());
2385  }
2386 }
2387 
2388 void CreateTableStmt::executeDryRun(const Catalog_Namespace::SessionInfo& session,
2389  TableDescriptor& td,
2390  std::list<ColumnDescriptor>& columns,
2391  std::vector<SharedDictionaryDef>& shared_dict_defs) {
2392  std::unordered_set<std::string> uc_col_names;
2393  const auto& catalog = session.getCatalog();
2394  const ShardKeyDef* shard_key_def{nullptr};
2395  for (auto& e : table_element_list_) {
2396  if (dynamic_cast<SharedDictionaryDef*>(e.get())) {
2397  auto shared_dict_def = static_cast<SharedDictionaryDef*>(e.get());
2399  this, shared_dict_def, columns, shared_dict_defs, catalog);
2400  shared_dict_defs.push_back(*shared_dict_def);
2401  continue;
2402  }
2403  if (dynamic_cast<ShardKeyDef*>(e.get())) {
2404  if (shard_key_def) {
2405  throw std::runtime_error("Specified more than one shard key");
2406  }
2407  shard_key_def = static_cast<const ShardKeyDef*>(e.get());
2408  continue;
2409  }
2410  if (!dynamic_cast<ColumnDef*>(e.get())) {
2411  throw std::runtime_error("Table constraints are not supported yet.");
2412  }
2413  ColumnDef* coldef = static_cast<ColumnDef*>(e.get());
2414  ColumnDescriptor cd;
2415  cd.columnName = *coldef->get_column_name();
2417  setColumnDescriptor(cd, coldef);
2418  columns.push_back(cd);
2419  }
2420 
2421  ddl_utils::set_default_table_attributes(*table_, td, columns.size());
2422 
2423  if (shard_key_def) {
2424  td.shardedColumnId = shard_column_index(shard_key_def->get_column(), columns);
2425  if (!td.shardedColumnId) {
2426  throw std::runtime_error("Specified shard column " + shard_key_def->get_column() +
2427  " doesn't exist");
2428  }
2429  }
2430  if (is_temporary_) {
2432  } else {
2434  }
2435  if (!storage_options_.empty()) {
2436  for (auto& p : storage_options_) {
2437  get_table_definitions(td, p, columns);
2438  }
2439  }
2440  if (td.shardedColumnId && !td.nShards) {
2441  throw std::runtime_error("SHARD_COUNT needs to be specified with SHARD_KEY.");
2442  }
2443  td.keyMetainfo = serialize_key_metainfo(shard_key_def, shared_dict_defs);
2444 }
2445 
2446 void CreateTableStmt::execute(const Catalog_Namespace::SessionInfo& session) {
2447  auto& catalog = session.getCatalog();
2448 
2449  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
2452 
2453  // check access privileges
2456  throw std::runtime_error("Table " + *table_ +
2457  " will not be created. User has no create privileges.");
2458  }
2459 
2460  if (!catalog.validateNonExistentTableOrView(*table_, if_not_exists_)) {
2461  return;
2462  }
2463 
2464  TableDescriptor td;
2465  std::list<ColumnDescriptor> columns;
2466  std::vector<SharedDictionaryDef> shared_dict_defs;
2467 
2468  executeDryRun(session, td, columns, shared_dict_defs);
2469  td.userId = session.get_currentUser().userId;
2470 
2471  catalog.createShardedTable(td, columns, shared_dict_defs);
2472  // TODO (max): It's transactionally unsafe, should be fixed: we may create object w/o
2473  // privileges
2474  SysCatalog::instance().createDBObject(
2475  session.get_currentUser(), td.tableName, TableDBObjectType, catalog);
2476 }
2477 
2478 void CreateDataframeStmt::execute(const Catalog_Namespace::SessionInfo& session) {
2479  auto& catalog = session.getCatalog();
2480 
2481  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
2484 
2485  // check access privileges
2488  throw std::runtime_error("Table " + *table_ +
2489  " will not be created. User has no create privileges.");
2490  }
2491 
2492  if (catalog.getMetadataForTable(*table_) != nullptr) {
2493  throw std::runtime_error("Table " + *table_ + " already exists.");
2494  }
2496  std::list<ColumnDescriptor> columns;
2497  std::vector<SharedDictionaryDef> shared_dict_defs;
2498 
2499  std::unordered_set<std::string> uc_col_names;
2500  for (auto& e : table_element_list_) {
2501  if (dynamic_cast<SharedDictionaryDef*>(e.get())) {
2502  auto shared_dict_def = static_cast<SharedDictionaryDef*>(e.get());
2504  this, shared_dict_def, columns, shared_dict_defs, catalog);
2505  shared_dict_defs.push_back(*shared_dict_def);
2506  continue;
2507  }
2508  if (!dynamic_cast<ColumnDef*>(e.get())) {
2509  throw std::runtime_error("Table constraints are not supported yet.");
2510  }
2511  ColumnDef* coldef = static_cast<ColumnDef*>(e.get());
2512  ColumnDescriptor cd;
2513  cd.columnName = *coldef->get_column_name();
2514  const auto uc_col_name = boost::to_upper_copy<std::string>(cd.columnName);
2515  const auto it_ok = uc_col_names.insert(uc_col_name);
2516  if (!it_ok.second) {
2517  throw std::runtime_error("Column '" + cd.columnName + "' defined more than once");
2518  }
2519 
2520  setColumnDescriptor(cd, coldef);
2521  columns.push_back(cd);
2522  }
2523 
2524  df_td.tableName = *table_;
2525  df_td.nColumns = columns.size();
2526  df_td.isView = false;
2527  df_td.fragmenter = nullptr;
2532  df_td.maxRows = DEFAULT_MAX_ROWS;
2534  if (!storage_options_.empty()) {
2535  for (auto& p : storage_options_) {
2536  get_dataframe_definitions(df_td, p, columns);
2537  }
2538  }
2539  df_td.keyMetainfo = serialize_key_metainfo(nullptr, shared_dict_defs);
2540  df_td.userId = session.get_currentUser().userId;
2541  df_td.storageType = *filename_;
2542 
2543  catalog.createShardedTable(df_td, columns, shared_dict_defs);
2544  // TODO (max): It's transactionally unsafe, should be fixed: we may create object w/o
2545  // privileges
2546  SysCatalog::instance().createDBObject(
2547  session.get_currentUser(), df_td.tableName, TableDBObjectType, catalog);
2548 }
2549 
2550 std::shared_ptr<ResultSet> getResultSet(QueryStateProxy query_state_proxy,
2551  const std::string select_stmt,
2552  std::vector<TargetMetaInfo>& targets,
2553  bool validate_only = false,
2554  std::vector<size_t> outer_fragment_indices = {}) {
2555  auto const session = query_state_proxy.getQueryState().getConstSessionInfo();
2556  auto& catalog = session->getCatalog();
2557 
2559 #ifdef HAVE_CUDA
2560  const auto device_type = session->get_executor_device_type();
2561 #else
2562  const auto device_type = ExecutorDeviceType::CPU;
2563 #endif // HAVE_CUDA
2564  auto calcite_mgr = catalog.getCalciteMgr();
2565 
2566  // TODO MAT this should actually get the global or the session parameter for
2567  // view optimization
2568  const auto query_ra =
2569  calcite_mgr
2570  ->process(query_state_proxy, pg_shim(select_stmt), {}, true, false, false, true)
2571  .plan_result;
2572  RelAlgExecutor ra_executor(executor.get(), catalog, query_ra);
2574  const auto& query_hints = ra_executor.getParsedQueryHints();
2575  if (query_hints.cpu_mode) {
2576  co.device_type = ExecutorDeviceType::CPU;
2577  }
2579  // TODO(adb): Need a better method of dropping constants into this ExecutionOptions
2580  // struct
2581  ExecutionOptions eo = {false,
2582  true,
2583  false,
2584  true,
2585  false,
2586  false,
2587  validate_only,
2588  false,
2589  10000,
2590  false,
2591  false,
2592  0.9,
2593  false,
2594  1000,
2596  outer_fragment_indices};
2597  ExecutionResult result{std::make_shared<ResultSet>(std::vector<TargetInfo>{},
2600  nullptr,
2601  nullptr,
2602  0,
2603  0),
2604  {}};
2605  result = ra_executor.executeRelAlgQuery(co, eo, false, nullptr);
2606  targets = result.getTargetsMeta();
2607 
2608  return result.getRows();
2609 }
2610 
2611 size_t LocalConnector::getOuterFragmentCount(QueryStateProxy query_state_proxy,
2612  std::string& sql_query_string) {
2613  auto const session = query_state_proxy.getQueryState().getConstSessionInfo();
2614  auto& catalog = session->getCatalog();
2615 
2617 #ifdef HAVE_CUDA
2618  const auto device_type = session->get_executor_device_type();
2619 #else
2620  const auto device_type = ExecutorDeviceType::CPU;
2621 #endif // HAVE_CUDA
2622  auto calcite_mgr = catalog.getCalciteMgr();
2623 
2624  // TODO MAT this should actually get the global or the session parameter for
2625  // view optimization
2626  const auto query_ra =
2627  calcite_mgr
2628  ->process(
2629  query_state_proxy, pg_shim(sql_query_string), {}, true, false, false, true)
2630  .plan_result;
2631  RelAlgExecutor ra_executor(executor.get(), catalog, query_ra);
2632  const auto& query_hints = ra_executor.getParsedQueryHints();
2633  CompilationOptions co = {query_hints.cpu_mode ? ExecutorDeviceType::CPU : device_type,
2634  true,
2636  false};
2637  // TODO(adb): Need a better method of dropping constants into this ExecutionOptions
2638  // struct
2639  ExecutionOptions eo = {
2640  false, true, false, true, false, false, false, false, 10000, false, false, 0.9};
2641  return ra_executor.getOuterFragmentCount(co, eo);
2642 }
2643 
2644 AggregatedResult LocalConnector::query(QueryStateProxy query_state_proxy,
2645  std::string& sql_query_string,
2646  std::vector<size_t> outer_frag_indices,
2647  bool validate_only) {
2648  auto const session = query_state_proxy.getQueryState().getConstSessionInfo();
2649  // TODO(PS): Should we be using the shimmed query in getResultSet?
2650  std::string pg_shimmed_select_query = pg_shim(sql_query_string);
2651 
2652  std::vector<TargetMetaInfo> target_metainfos;
2653  auto result_rows = getResultSet(query_state_proxy,
2654  sql_query_string,
2655  target_metainfos,
2656  validate_only,
2657  outer_frag_indices);
2658  AggregatedResult res = {result_rows, target_metainfos};
2659  return res;
2660 }
2661 
2662 std::vector<AggregatedResult> LocalConnector::query(
2663  QueryStateProxy query_state_proxy,
2664  std::string& sql_query_string,
2665  std::vector<size_t> outer_frag_indices) {
2666  auto res = query(query_state_proxy, sql_query_string, outer_frag_indices, false);
2667  return {res};
2668 }
2669 
2670 void LocalConnector::insertDataToLeaf(const Catalog_Namespace::SessionInfo& session,
2671  const size_t leaf_idx,
2672  Fragmenter_Namespace::InsertData& insert_data) {
2673  CHECK(leaf_idx == 0);
2674  auto& catalog = session.getCatalog();
2675  auto created_td = catalog.getMetadataForTable(insert_data.tableId);
2676  ChunkKey chunkKey = {catalog.getCurrentDB().dbId, created_td->tableId};
2677  // TODO(adb): Ensure that we have previously obtained a write lock for this table's
2678  // data.
2679  created_td->fragmenter->insertDataNoCheckpoint(insert_data);
2680 }
2681 
2682 void LocalConnector::checkpoint(const Catalog_Namespace::SessionInfo& session,
2683  int table_id) {
2684  auto& catalog = session.getCatalog();
2685  catalog.checkpointWithAutoRollback(table_id);
2686 }
2687 
2688 void LocalConnector::rollback(const Catalog_Namespace::SessionInfo& session,
2689  int table_id) {
2690  auto& catalog = session.getCatalog();
2691  auto db_id = catalog.getDatabaseId();
2692  auto table_epochs = catalog.getTableEpochs(db_id, table_id);
2693  catalog.setTableEpochs(db_id, table_epochs);
2694 }
2695 
2696 std::list<ColumnDescriptor> LocalConnector::getColumnDescriptors(AggregatedResult& result,
2697  bool for_create) {
2698  std::list<ColumnDescriptor> column_descriptors;
2699  std::list<ColumnDescriptor> column_descriptors_for_create;
2700 
2701  int rowid_suffix = 0;
2702  for (const auto& target_metainfo : result.targets_meta) {
2703  ColumnDescriptor cd;
2704  cd.columnName = target_metainfo.get_resname();
2705  if (cd.columnName == "rowid") {
2706  cd.columnName += std::to_string(rowid_suffix++);
2707  }
2708  cd.columnType = target_metainfo.get_physical_type_info();
2709 
2710  ColumnDescriptor cd_for_create = cd;
2711 
2713  // we need to reset the comp param (as this points to the actual dictionary)
2714  if (cd.columnType.is_array()) {
2715  // for dict encoded arrays, it is always 4 bytes
2716  cd_for_create.columnType.set_comp_param(32);
2717  } else {
2718  cd_for_create.columnType.set_comp_param(cd.columnType.get_size() * 8);
2719  }
2720  }
2721 
2722  if (cd.columnType.is_date() && !cd.columnType.is_date_in_days()) {
2723  // default to kENCODING_DATE_IN_DAYS encoding
2725  cd_for_create.columnType.set_comp_param(0);
2726  }
2727 
2728  column_descriptors_for_create.push_back(cd_for_create);
2729  column_descriptors.push_back(cd);
2730  }
2731 
2732  if (for_create) {
2733  return column_descriptors_for_create;
2734  }
2735 
2736  return column_descriptors;
2737 }
2738 
2739 void InsertIntoTableAsSelectStmt::populateData(QueryStateProxy query_state_proxy,
2740  const TableDescriptor* td,
2741  bool validate_table) {
2742  auto const session = query_state_proxy.getQueryState().getConstSessionInfo();
2743  auto& catalog = session->getCatalog();
2745 
2746  LocalConnector local_connector;
2747  bool populate_table = false;
2748 
2749  if (leafs_connector_) {
2750  populate_table = true;
2751  } else {
2752  leafs_connector_ = &local_connector;
2753  if (!g_cluster) {
2754  populate_table = true;
2755  }
2756  }
2757 
2758  auto get_target_column_descriptors = [this, &catalog](const TableDescriptor* td) {
2759  std::vector<const ColumnDescriptor*> target_column_descriptors;
2760  if (column_list_.empty()) {
2761  auto list = catalog.getAllColumnMetadataForTable(td->tableId, false, false, false);
2762  target_column_descriptors = {std::begin(list), std::end(list)};
2763 
2764  } else {
2765  for (auto& c : column_list_) {
2766  const ColumnDescriptor* cd = catalog.getMetadataForColumn(td->tableId, *c);
2767  if (cd == nullptr) {
2768  throw std::runtime_error("Column " + *c + " does not exist.");
2769  }
2770  target_column_descriptors.push_back(cd);
2771  }
2772  }
2773 
2774  return target_column_descriptors;
2775  };
2776 
2777  bool is_temporary = table_is_temporary(td);
2778 
2779  if (validate_table) {
2780  // check access privileges
2781  if (!td) {
2782  throw std::runtime_error("Table " + table_name_ + " does not exist.");
2783  }
2784  if (td->isView) {
2785  throw std::runtime_error("Insert to views is not supported yet.");
2786  }
2787 
2788  if (!session->checkDBAccessPrivileges(DBObjectType::TableDBObjectType,
2790  table_name_)) {
2791  throw std::runtime_error("User has no insert privileges on " + table_name_ + ".");
2792  }
2793 
2794  // only validate the select query so we get the target types
2795  // correctly, but do not populate the result set
2796  auto result = local_connector.query(query_state_proxy, select_query_, {}, true);
2797  auto source_column_descriptors = local_connector.getColumnDescriptors(result, false);
2798 
2799  std::vector<const ColumnDescriptor*> target_column_descriptors =
2800  get_target_column_descriptors(td);
2801  if (catalog.getAllColumnMetadataForTable(td->tableId, false, false, false).size() !=
2802  target_column_descriptors.size()) {
2803  throw std::runtime_error("Insert into a subset of columns is not supported yet.");
2804  }
2805 
2806  if (source_column_descriptors.size() != target_column_descriptors.size()) {
2807  throw std::runtime_error("The number of source and target columns does not match.");
2808  }
2809 
2810  for (int i = 0; i < source_column_descriptors.size(); i++) {
2811  const ColumnDescriptor* source_cd =
2812  &(*std::next(source_column_descriptors.begin(), i));
2813  const ColumnDescriptor* target_cd = target_column_descriptors.at(i);
2814 
2815  if (source_cd->columnType.get_type() != target_cd->columnType.get_type()) {
2816  auto type_cannot_be_cast = [](const auto& col_type) {
2817  return (col_type.is_time() || col_type.is_geometry() || col_type.is_array() ||
2818  col_type.is_boolean());
2819  };
2820 
2821  if (type_cannot_be_cast(source_cd->columnType) ||
2822  type_cannot_be_cast(target_cd->columnType)) {
2823  throw std::runtime_error("Source '" + source_cd->columnName + " " +
2824  source_cd->columnType.get_type_name() +
2825  "' and target '" + target_cd->columnName + " " +
2826  target_cd->columnType.get_type_name() +
2827  "' column types do not match.");
2828  }
2829  }
2830  if (source_cd->columnType.is_array()) {
2831  if (source_cd->columnType.get_subtype() != target_cd->columnType.get_subtype()) {
2832  throw std::runtime_error("Source '" + source_cd->columnName + " " +
2833  source_cd->columnType.get_type_name() +
2834  "' and target '" + target_cd->columnName + " " +
2835  target_cd->columnType.get_type_name() +
2836  "' array column element types do not match.");
2837  }
2838  }
2839 
2840  if (source_cd->columnType.is_decimal() ||
2841  source_cd->columnType.get_elem_type().is_decimal()) {
2842  SQLTypeInfo sourceType = source_cd->columnType;
2843  SQLTypeInfo targetType = target_cd->columnType;
2844 
2845  if (source_cd->columnType.is_array()) {
2846  sourceType = source_cd->columnType.get_elem_type();
2847  targetType = target_cd->columnType.get_elem_type();
2848  }
2849 
2850  if (sourceType.get_scale() != targetType.get_scale()) {
2851  throw std::runtime_error("Source '" + source_cd->columnName + " " +
2852  source_cd->columnType.get_type_name() +
2853  "' and target '" + target_cd->columnName + " " +
2854  target_cd->columnType.get_type_name() +
2855  "' decimal columns scales do not match.");
2856  }
2857  }
2858 
2859  if (source_cd->columnType.is_string()) {
2860  if (!target_cd->columnType.is_string()) {
2861  throw std::runtime_error("Source '" + source_cd->columnName + " " +
2862  source_cd->columnType.get_type_name() +
2863  "' and target '" + target_cd->columnName + " " +
2864  target_cd->columnType.get_type_name() +
2865  "' column types do not match.");
2866  }
2867  if (source_cd->columnType.get_compression() !=
2868  target_cd->columnType.get_compression()) {
2869  throw std::runtime_error("Source '" + source_cd->columnName + " " +
2870  source_cd->columnType.get_type_name() +
2871  "' and target '" + target_cd->columnName + " " +
2872  target_cd->columnType.get_type_name() +
2873  "' columns string encodings do not match.");
2874  }
2875  }
2876 
2877  if (source_cd->columnType.is_timestamp() && target_cd->columnType.is_timestamp()) {
2878  if (source_cd->columnType.get_dimension() !=
2879  target_cd->columnType.get_dimension()) {
2880  throw std::runtime_error("Source '" + source_cd->columnName + " " +
2881  source_cd->columnType.get_type_name() +
2882  "' and target '" + target_cd->columnName + " " +
2883  target_cd->columnType.get_type_name() +
2884  "' timestamp column precisions do not match.");
2885  }
2886  }
2887 
2888  if (!source_cd->columnType.is_string() && !source_cd->columnType.is_geometry() &&
2889  !source_cd->columnType.is_integer() && !source_cd->columnType.is_decimal() &&
2890  !source_cd->columnType.is_date() && !source_cd->columnType.is_time() &&
2891  !source_cd->columnType.is_timestamp() &&
2892  source_cd->columnType.get_size() > target_cd->columnType.get_size()) {
2893  throw std::runtime_error("Source '" + source_cd->columnName + " " +
2894  source_cd->columnType.get_type_name() +
2895  "' and target '" + target_cd->columnName + " " +
2896  target_cd->columnType.get_type_name() +
2897  "' column encoding sizes do not match.");
2898  }
2899  }
2900  }
2901 
2902  if (!populate_table) {
2903  return;
2904  }
2905 
2906  int64_t total_row_count = 0;
2907  int64_t total_source_query_time_ms = 0;
2908  int64_t total_target_value_translate_time_ms = 0;
2909  int64_t total_data_load_time_ms = 0;
2910 
2911  Fragmenter_Namespace::InsertDataLoader insertDataLoader(*leafs_connector_);
2912  auto target_column_descriptors = get_target_column_descriptors(td);
2913 
2914  auto outer_frag_count =
2915  leafs_connector_->getOuterFragmentCount(query_state_proxy, select_query_);
2916 
2917  size_t outer_frag_end = outer_frag_count == 0 ? 1 : outer_frag_count;
2918 
2919  try {
2920  for (size_t outer_frag_idx = 0; outer_frag_idx < outer_frag_end; outer_frag_idx++) {
2921  std::vector<size_t> allowed_outer_fragment_indices;
2922 
2923  if (outer_frag_count) {
2924  allowed_outer_fragment_indices.push_back(outer_frag_idx);
2925  }
2926 
2927  const auto query_clock_begin = timer_start();
2928  std::vector<AggregatedResult> query_results = leafs_connector_->query(
2929  query_state_proxy, select_query_, allowed_outer_fragment_indices);
2930  total_source_query_time_ms += timer_stop(query_clock_begin);
2931 
2932  for (auto& res : query_results) {
2933  auto result_rows = res.rs;
2934  result_rows->setGeoReturnType(ResultSet::GeoReturnType::GeoTargetValue);
2935  const auto num_rows = result_rows->rowCount();
2936 
2937  if (0 == num_rows) {
2938  continue;
2939  }
2940 
2941  total_row_count += num_rows;
2942 
2943  size_t leaf_count = leafs_connector_->leafCount();
2944 
2945  size_t max_number_of_rows_per_package =
2946  std::min(num_rows / leaf_count, 64UL * 1024UL);
2947 
2948  size_t start_row = 0;
2949  size_t num_rows_to_process = std::min(num_rows, max_number_of_rows_per_package);
2950 
2951  // ensure that at least one row is being processed
2952  num_rows_to_process = std::max(num_rows_to_process, 1UL);
2953 
2954  std::vector<std::unique_ptr<TargetValueConverter>> value_converters;
2955 
2957 
2958  const int num_worker_threads = std::thread::hardware_concurrency();
2959 
2960  std::vector<size_t> thread_start_idx(num_worker_threads),
2961  thread_end_idx(num_worker_threads);
2962  bool can_go_parallel = !result_rows->isTruncated() && num_rows_to_process > 20000;
2963 
2964  std::atomic<size_t> row_idx{0};
2965 
2966  auto convert_function = [&result_rows,
2967  &value_converters,
2968  &row_idx,
2969  &num_rows_to_process,
2970  &thread_start_idx,
2971  &thread_end_idx](const int thread_id) {
2972  const int num_cols = value_converters.size();
2973  const size_t start = thread_start_idx[thread_id];
2974  const size_t end = thread_end_idx[thread_id];
2975  size_t idx = 0;
2976  for (idx = start; idx < end; ++idx) {
2977  const auto result_row = result_rows->getRowAtNoTranslations(idx);
2978  if (!result_row.empty()) {
2979  size_t target_row = row_idx.fetch_add(1);
2980 
2981  if (target_row >= num_rows_to_process) {
2982  break;
2983  }
2984 
2985  for (unsigned int col = 0; col < num_cols; col++) {
2986  const auto& mapd_variant = result_row[col];
2987  value_converters[col]->convertToColumnarFormat(target_row, &mapd_variant);
2988  }
2989  }
2990  }
2991 
2992  thread_start_idx[thread_id] = idx;
2993  };
2994 
2995  auto single_threaded_convert_function = [&result_rows,
2996  &value_converters,
2997  &row_idx,
2998  &num_rows_to_process,
2999  &thread_start_idx,
3000  &thread_end_idx](const int thread_id) {
3001  const int num_cols = value_converters.size();
3002  const size_t start = thread_start_idx[thread_id];
3003  const size_t end = thread_end_idx[thread_id];
3004  size_t idx = 0;
3005  for (idx = start; idx < end; ++idx) {
3006  size_t target_row = row_idx.fetch_add(1);
3007 
3008  if (target_row >= num_rows_to_process) {
3009  break;
3010  }
3011  const auto result_row = result_rows->getNextRow(false, false);
3012  CHECK(!result_row.empty());
3013  for (unsigned int col = 0; col < num_cols; col++) {
3014  const auto& mapd_variant = result_row[col];
3015  value_converters[col]->convertToColumnarFormat(target_row, &mapd_variant);
3016  }
3017  }
3018 
3019  thread_start_idx[thread_id] = idx;
3020  };
3021 
3022  if (can_go_parallel) {
3023  const size_t entryCount = result_rows->entryCount();
3024  for (size_t i = 0,
3025  start_entry = 0,
3026  stride = (entryCount + num_worker_threads - 1) / num_worker_threads;
3027  i < num_worker_threads && start_entry < entryCount;
3028  ++i, start_entry += stride) {
3029  const auto end_entry = std::min(start_entry + stride, entryCount);
3030  thread_start_idx[i] = start_entry;
3031  thread_end_idx[i] = end_entry;
3032  }
3033 
3034  } else {
3035  thread_start_idx[0] = 0;
3036  thread_end_idx[0] = result_rows->entryCount();
3037  }
3038 
3039  std::shared_ptr<Executor> executor;
3040 
3043  }
3044 
3045  while (start_row < num_rows) {
3046  value_converters.clear();
3047  row_idx = 0;
3048  int colNum = 0;
3049  for (const auto targetDescriptor : target_column_descriptors) {
3050  auto sourceDataMetaInfo = res.targets_meta[colNum++];
3051 
3053  num_rows_to_process,
3054  catalog,
3055  sourceDataMetaInfo,
3056  targetDescriptor,
3057  targetDescriptor->columnType,
3058  !targetDescriptor->columnType.get_notnull(),
3059  result_rows->getRowSetMemOwner()->getLiteralStringDictProxy(),
3061  ? executor->getStringDictionaryProxy(
3062  sourceDataMetaInfo.get_type_info().get_comp_param(),
3063  result_rows->getRowSetMemOwner(),
3064  true)
3065  : nullptr};
3066  auto converter = factory.create(param);
3067  value_converters.push_back(std::move(converter));
3068  }
3069 
3070  const auto translate_clock_begin = timer_start();
3071  if (can_go_parallel) {
3072  std::vector<std::future<void>> worker_threads;
3073  for (int i = 0; i < num_worker_threads; ++i) {
3074  worker_threads.push_back(
3075  std::async(std::launch::async, convert_function, i));
3076  }
3077 
3078  for (auto& child : worker_threads) {
3079  child.wait();
3080  }
3081  for (auto& child : worker_threads) {
3082  child.get();
3083  }
3084 
3085  } else {
3086  single_threaded_convert_function(0);
3087  }
3088 
3089  // finalize the insert data
3090  {
3091  auto finalizer_func =
3092  [](std::unique_ptr<TargetValueConverter>::pointer targetValueConverter) {
3093  targetValueConverter->finalizeDataBlocksForInsertData();
3094  };
3095  std::vector<std::future<void>> worker_threads;
3096  for (auto& converterPtr : value_converters) {
3097  worker_threads.push_back(
3098  std::async(std::launch::async, finalizer_func, converterPtr.get()));
3099  }
3100 
3101  for (auto& child : worker_threads) {
3102  child.wait();
3103  }
3104  for (auto& child : worker_threads) {
3105  child.get();
3106  }
3107  }
3108 
3110  insert_data.databaseId = catalog.getCurrentDB().dbId;
3111  CHECK(td);
3112  insert_data.tableId = td->tableId;
3113  insert_data.numRows = num_rows_to_process;
3114 
3115  for (int col_idx = 0; col_idx < target_column_descriptors.size(); col_idx++) {
3116  value_converters[col_idx]->addDataBlocksToInsertData(insert_data);
3117  }
3118  total_target_value_translate_time_ms += timer_stop(translate_clock_begin);
3119 
3120  const auto data_load_clock_begin = timer_start();
3121  insertDataLoader.insertData(*session, insert_data);
3122  total_data_load_time_ms += timer_stop(data_load_clock_begin);
3123 
3124  start_row += num_rows_to_process;
3125  num_rows_to_process =
3126  std::min(num_rows - start_row, max_number_of_rows_per_package);
3127  }
3128  }
3129  }
3130  } catch (...) {
3131  try {
3132  leafs_connector_->rollback(*session, td->tableId);
3133  } catch (std::exception& e) {
3134  LOG(ERROR) << "An error occurred during ITAS rollback attempt. Table id: "
3135  << td->tableId << ", Error: " << e.what();
3136  }
3137  throw;
3138  }
3139 
3140  int64_t total_time_ms = total_source_query_time_ms +
3141  total_target_value_translate_time_ms + total_data_load_time_ms;
3142 
3143  VLOG(1) << "CTAS/ITAS " << total_row_count << " rows loaded in " << total_time_ms
3144  << "ms (outer_frag_count=" << outer_frag_count
3145  << ", query_time=" << total_source_query_time_ms
3146  << "ms, translation_time=" << total_target_value_translate_time_ms
3147  << "ms, data_load_time=" << total_data_load_time_ms
3148  << "ms)\nquery: " << select_query_;
3149 
3150  if (!is_temporary) {
3151  leafs_connector_->checkpoint(*session, td->tableId);
3152  }
3153 }
3154 
3155 void InsertIntoTableAsSelectStmt::execute(const Catalog_Namespace::SessionInfo& session) {
3156  auto session_copy = session;
3157  auto session_ptr = std::shared_ptr<Catalog_Namespace::SessionInfo>(
3158  &session_copy, boost::null_deleter());
3159  auto query_state = query_state::QueryState::create(session_ptr, select_query_);
3160  auto stdlog = STDLOG(query_state);
3161 
3162  auto& catalog = session_ptr->getCatalog();
3163 
3164  const auto execute_read_lock = mapd_shared_lock<mapd_shared_mutex>(
3167 
3169  std::vector<std::string> tables;
3170 
3171  // get the table info
3172  auto calcite_mgr = catalog.getCalciteMgr();
3173 
3174  // TODO MAT this should actually get the global or the session parameter for
3175  // view optimization
3176  const auto result = calcite_mgr->process(query_state->createQueryStateProxy(),
3177  pg_shim(select_query_),
3178  {},
3179  true,
3180  false,
3181  false,
3182  true);
3183 
3184  tables.insert(tables.end(),
3185  result.resolved_accessed_objects.tables_selected_from.begin(),
3186  result.resolved_accessed_objects.tables_selected_from.end());
3187  tables.emplace_back(table_name_);
3188 
3189  // force sort into tableid order in case of name change to guarantee fixed order of
3190  // mutex access
3191  std::sort(tables.begin(),
3192  tables.end(),
3193  [&catalog](const std::string& a, const std::string& b) {
3194  return catalog.getMetadataForTable(a, false)->tableId <
3195  catalog.getMetadataForTable(b, false)->tableId;
3196  });
3197 
3198  tables.erase(unique(tables.begin(), tables.end()), tables.end());
3199  for (const auto& table : tables) {
3200  locks.emplace_back(
3203  catalog, table)));
3204  if (table == table_name_) {
3205  // Aquire an insert data lock for updates/deletes, consistent w/ insert. The
3206  // table data lock will be aquired in the fragmenter during checkpoint.
3207  locks.emplace_back(
3210  catalog.getDatabaseId(), (*locks.back())())));
3211  } else {
3212  locks.emplace_back(
3215  catalog.getDatabaseId(), (*locks.back())())));
3216  }
3217  }
3218 
3219  const TableDescriptor* td = catalog.getMetadataForTable(table_name_);
3220 
3221  populateData(query_state->createQueryStateProxy(), td, true);
3222 }
3223 
3224 void CreateTableAsSelectStmt::execute(const Catalog_Namespace::SessionInfo& session) {
3225  auto session_copy = session;
3226  auto session_ptr = std::shared_ptr<Catalog_Namespace::SessionInfo>(
3227  &session_copy, boost::null_deleter());
3228  auto query_state = query_state::QueryState::create(session_ptr, select_query_);
3229  auto stdlog = STDLOG(query_state);
3230 
3231  LocalConnector local_connector;
3232  auto& catalog = session.getCatalog();
3233  bool create_table = nullptr == leafs_connector_;
3234 
3235  std::set<std::string> select_tables;
3236  if (create_table) {
3237  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
3240 
3241  // check access privileges
3244  throw std::runtime_error("CTAS failed. Table " + table_name_ +
3245  " will not be created. User has no create privileges.");
3246  }
3247 
3248  if (catalog.getMetadataForTable(table_name_) != nullptr) {
3249  if (if_not_exists_) {
3250  return;
3251  }
3252  throw std::runtime_error("Table " + table_name_ +
3253  " already exists and no data was loaded.");
3254  }
3255 
3256  // get the table info
3257  auto calcite_mgr = catalog.getCalciteMgr();
3258 
3259  // TODO MAT this should actually get the global or the session parameter for
3260  // view optimization
3261  const auto result = calcite_mgr->process(query_state->createQueryStateProxy(),
3262  pg_shim(select_query_),
3263  {},
3264  true,
3265  false,
3266  false,
3267  true);
3268 
3269  select_tables.insert(result.resolved_accessed_objects.tables_selected_from.begin(),
3270  result.resolved_accessed_objects.tables_selected_from.end());
3271 
3272  // only validate the select query so we get the target types
3273  // correctly, but do not populate the result set
3274  // we currently have exclusive access to the system so this is safe
3275  auto validate_result = local_connector.query(
3276  query_state->createQueryStateProxy(), select_query_, {}, true);
3277 
3278  const auto column_descriptors_for_create =
3279  local_connector.getColumnDescriptors(validate_result, true);
3280 
3281  // some validation as the QE might return some out of range column types
3282  for (auto& cd : column_descriptors_for_create) {
3283  if (cd.columnType.is_decimal() && cd.columnType.get_precision() > 18) {
3284  throw std::runtime_error(cd.columnName + ": Precision too high, max 18.");
3285  }
3286  }
3287 
3288  TableDescriptor td;
3289  td.tableName = table_name_;
3290  td.userId = session.get_currentUser().userId;
3291  td.nColumns = column_descriptors_for_create.size();
3292  td.isView = false;
3293  td.fragmenter = nullptr;
3300  if (is_temporary_) {
3302  } else {
3304  }
3305 
3306  bool use_shared_dictionaries = true;
3307 
3308  if (!storage_options_.empty()) {
3309  for (auto& p : storage_options_) {
3310  if (boost::to_lower_copy<std::string>(*p->get_name()) ==
3311  "use_shared_dictionaries") {
3312  const StringLiteral* literal =
3313  dynamic_cast<const StringLiteral*>(p->get_value());
3314  if (nullptr == literal) {
3315  throw std::runtime_error(
3316  "USE_SHARED_DICTIONARIES must be a string parameter");
3317  }
3318  std::string val = boost::to_lower_copy<std::string>(*literal->get_stringval());
3319  use_shared_dictionaries = val == "true" || val == "1" || val == "t";
3320  } else {
3321  get_table_definitions_for_ctas(td, p, column_descriptors_for_create);
3322  }
3323  }
3324  }
3325 
3326  std::vector<SharedDictionaryDef> sharedDictionaryRefs;
3327 
3328  if (use_shared_dictionaries) {
3329  const auto source_column_descriptors =
3330  local_connector.getColumnDescriptors(validate_result, false);
3331  const auto mapping = catalog.getDictionaryToColumnMapping();
3332 
3333  for (auto& source_cd : source_column_descriptors) {
3334  const auto& ti = source_cd.columnType;
3335  if (ti.is_string()) {
3336  if (ti.get_compression() == kENCODING_DICT) {
3337  int dict_id = ti.get_comp_param();
3338  auto it = mapping.find(dict_id);
3339  if (mapping.end() != it) {
3340  const auto targetColumn = it->second;
3341  auto targetTable =
3342  catalog.getMetadataForTable(targetColumn->tableId, false);
3343  CHECK(targetTable);
3344  LOG(INFO) << "CTAS: sharing text dictionary on column "
3345  << source_cd.columnName << " with " << targetTable->tableName
3346  << "." << targetColumn->columnName;
3347  sharedDictionaryRefs.push_back(
3348  SharedDictionaryDef(source_cd.columnName,
3349  targetTable->tableName,
3350  targetColumn->columnName));
3351  }
3352  }
3353  }
3354  }
3355  }
3356 
3357  // currently no means of defining sharding in CTAS
3358  td.keyMetainfo = serialize_key_metainfo(nullptr, sharedDictionaryRefs);
3359 
3360  catalog.createTable(td, column_descriptors_for_create, sharedDictionaryRefs, true);
3361  // TODO (max): It's transactionally unsafe, should be fixed: we may create object
3362  // w/o privileges
3363  SysCatalog::instance().createDBObject(
3364  session.get_currentUser(), td.tableName, TableDBObjectType, catalog);
3365  }
3366 
3367  // note there is a time where we do not have any executor outer lock here. someone could
3368  // come along and mess with the data or other tables.
3369  const auto execute_read_lock = mapd_shared_lock<mapd_shared_mutex>(
3372 
3374  std::vector<std::string> tables;
3375  tables.insert(tables.end(), select_tables.begin(), select_tables.end());
3376  CHECK_EQ(tables.size(), select_tables.size());
3377  tables.emplace_back(table_name_);
3378  // force sort into tableid order in case of name change to guarantee fixed order of
3379  // mutex access
3380  std::sort(tables.begin(),
3381  tables.end(),
3382  [&catalog](const std::string& a, const std::string& b) {
3383  return catalog.getMetadataForTable(a, false)->tableId <
3384  catalog.getMetadataForTable(b, false)->tableId;
3385  });
3386  tables.erase(unique(tables.begin(), tables.end()), tables.end());
3387  for (const auto& table : tables) {
3388  locks.emplace_back(
3391  catalog, table)));
3392  if (table == table_name_) {
3393  // Aquire an insert data lock for updates/deletes, consistent w/ insert. The
3394  // table data lock will be aquired in the fragmenter during checkpoint.
3395  locks.emplace_back(
3398  catalog.getDatabaseId(), (*locks.back())())));
3399  } else {
3400  locks.emplace_back(
3403  catalog.getDatabaseId(), (*locks.back())())));
3404  }
3405  }
3406 
3407  const TableDescriptor* td = catalog.getMetadataForTable(table_name_);
3408 
3409  try {
3410  populateData(query_state->createQueryStateProxy(), td, false);
3411  } catch (...) {
3412  if (!g_cluster) {
3413  const TableDescriptor* created_td = catalog.getMetadataForTable(table_name_);
3414  if (created_td) {
3415  catalog.dropTable(created_td);
3416  }
3417  }
3418  throw;
3419  }
3420 }
3421 
3422 DropTableStmt::DropTableStmt(const rapidjson::Value& payload) {
3423  CHECK(payload.HasMember("tableName"));
3424  table = std::make_unique<std::string>(json_str(payload["tableName"]));
3425 
3426  if_exists = false;
3427  if (payload.HasMember("ifExists")) {
3428  if_exists = json_bool(payload["ifExists"]);
3429  }
3430 }
3431 
3432 void DropTableStmt::execute(const Catalog_Namespace::SessionInfo& session) {
3433  auto& catalog = session.getCatalog();
3434 
3435  // TODO(adb): the catalog should be handling this locking.
3436  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
3439 
3440  const TableDescriptor* td{nullptr};
3441  std::unique_ptr<lockmgr::TableSchemaLockContainer<lockmgr::WriteLock>> td_with_lock;
3442 
3443  try {
3444  td_with_lock =
3445  std::make_unique<lockmgr::TableSchemaLockContainer<lockmgr::WriteLock>>(
3447  catalog, *table, false));
3448  td = (*td_with_lock)();
3449  } catch (const std::runtime_error& e) {
3450  if (if_exists) {
3451  return;
3452  } else {
3453  throw e;
3454  }
3455  }
3456 
3457  CHECK(td);
3458  CHECK(td_with_lock);
3459 
3460  // check access privileges
3461  if (!session.checkDBAccessPrivileges(
3463  throw std::runtime_error("Table " + *table +
3464  " will not be dropped. User has no proper privileges.");
3465  }
3466 
3468 
3469  auto table_data_write_lock =
3471  catalog.dropTable(td);
3472 
3473  // invalidate cached hashtable
3475 }
3476 
3477 void TruncateTableStmt::execute(const Catalog_Namespace::SessionInfo& session) {
3478  auto& catalog = session.getCatalog();
3479 
3480  // TODO: Removal of the FileMgr is not thread safe. Take a global system write lock
3481  // when truncating a table
3482  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
3485 
3486  const auto td_with_lock =
3488  catalog, *table, true);
3489  const auto td = td_with_lock();
3490  if (!td) {
3491  throw std::runtime_error("Table " + *table + " does not exist.");
3492  }
3493 
3494  // check access privileges
3495  std::vector<DBObject> privObjects;
3496  DBObject dbObject(*table, TableDBObjectType);
3497  dbObject.loadKey(catalog);
3499  privObjects.push_back(dbObject);
3500  if (!SysCatalog::instance().checkPrivileges(session.get_currentUser(), privObjects)) {
3501  throw std::runtime_error("Table " + *table + " will not be truncated. User " +
3502  session.get_currentUser().userLoggable() +
3503  " has no proper privileges.");
3504  }
3505 
3506  if (td->isView) {
3507  throw std::runtime_error(*table + " is a view. Cannot Truncate.");
3508  }
3510  auto table_data_write_lock =
3512  catalog.truncateTable(td);
3513 
3514  // invalidate cached hashtable
3516 }
3517 
3519  const TableDescriptor* td) {
3520  if (session.get_currentUser().isSuper ||
3521  session.get_currentUser().userId == td->userId) {
3522  return;
3523  }
3524  std::vector<DBObject> privObjects;
3525  DBObject dbObject(td->tableName, TableDBObjectType);
3526  dbObject.loadKey(session.getCatalog());
3528  privObjects.push_back(dbObject);
3529  if (!SysCatalog::instance().checkPrivileges(session.get_currentUser(), privObjects)) {
3530  throw std::runtime_error("Current user does not have the privilege to alter table: " +
3531  td->tableName);
3532  }
3533 }
3534 
3535 void RenameUserStmt::execute(const Catalog_Namespace::SessionInfo& session) {
3536  if (!session.get_currentUser().isSuper) {
3537  throw std::runtime_error("Only a super user can rename users.");
3538  }
3539 
3541  if (!SysCatalog::instance().getMetadataForUser(*username_, user)) {
3542  throw std::runtime_error("User " + *username_ + " does not exist.");
3543  }
3544 
3545  SysCatalog::instance().renameUser(*username_, *new_username_);
3546 }
3547 
3548 void RenameDatabaseStmt::execute(const Catalog_Namespace::SessionInfo& session) {
3550 
3551  // TODO: use database lock instead
3552  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
3555 
3556  if (!SysCatalog::instance().getMetadataForDB(*database_name_, db)) {
3557  throw std::runtime_error("Database " + *database_name_ + " does not exist.");
3558  }
3559 
3560  if (!session.get_currentUser().isSuper &&
3561  session.get_currentUser().userId != db.dbOwner) {
3562  throw std::runtime_error("Only a super user or the owner can rename the database.");
3563  }
3564 
3565  SysCatalog::instance().renameDatabase(*database_name_, *new_database_name_);
3566 }
3567 
3568 void RenameTableStmt::execute(const Catalog_Namespace::SessionInfo& session) {
3569  auto& catalog = session.getCatalog();
3570 
3571  // TODO(adb): the catalog should be handling this locking (see AddColumStmt)
3572  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
3575 
3576  const auto td_with_lock =
3578  catalog, *table, false);
3579  const auto td = td_with_lock();
3580  CHECK(td);
3582 
3583  check_alter_table_privilege(session, td);
3584  if (catalog.getMetadataForTable(*new_table_name) != nullptr) {
3585  throw std::runtime_error("Table or View " + *new_table_name + " already exists.");
3586  }
3587  catalog.renameTable(td, *new_table_name);
3588 }
3589 
3590 void DDLStmt::setColumnDescriptor(ColumnDescriptor& cd, const ColumnDef* coldef) {
3591  bool not_null;
3592  const ColumnConstraintDef* cc = coldef->get_column_constraint();
3593  if (cc == nullptr) {
3594  not_null = false;
3595  } else {
3596  not_null = cc->get_notnull();
3597  }
3599  cd,
3600  coldef->get_column_type(),
3601  not_null,
3602  coldef->get_compression());
3603 }
3604 
3605 void AddColumnStmt::check_executable(const Catalog_Namespace::SessionInfo& session,
3606  const TableDescriptor* td) {
3607  auto& catalog = session.getCatalog();
3608  if (!td) {
3609  throw std::runtime_error("Table " + *table + " does not exist.");
3610  } else {
3611  if (td->isView) {
3612  throw std::runtime_error("Adding columns to a view is not supported.");
3613  }
3615  if (table_is_temporary(td)) {
3616  throw std::runtime_error(
3617  "Adding columns to temporary tables is not yet supported.");
3618  }
3619  };
3620 
3621  check_alter_table_privilege(session, td);
3622 
3623  if (0 == coldefs.size()) {
3624  coldefs.push_back(std::move(coldef));
3625  }
3626 
3627  for (const auto& coldef : coldefs) {
3628  auto& new_column_name = *coldef->get_column_name();
3629  if (catalog.getMetadataForColumn(td->tableId, new_column_name) != nullptr) {
3630  throw std::runtime_error("Column " + new_column_name + " already exists.");
3631  }
3632  }
3633 }
3634 
3635 void AddColumnStmt::execute(const Catalog_Namespace::SessionInfo& session) {
3636  auto& catalog = session.getCatalog();
3637 
3638  // TODO(adb): the catalog should be handling this locking.
3639  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
3642 
3643  const auto td_with_lock =
3645  catalog, *table, true);
3646  const auto td = td_with_lock();
3647 
3648  check_executable(session, td);
3649 
3650  CHECK(td->fragmenter);
3651  if (std::dynamic_pointer_cast<Fragmenter_Namespace::SortedOrderFragmenter>(
3652  td->fragmenter)) {
3653  throw std::runtime_error(
3654  "Adding columns to a table is not supported when using the \"sort_column\" "
3655  "option.");
3656  }
3657 
3658  // Do not take a data write lock, as the fragmenter may call `deleteFragments` during
3659  // a cap operation. Note that the schema write lock will prevent concurrent inserts
3660  // along with all other queries.
3661 
3662  catalog.getSqliteConnector().query("BEGIN TRANSACTION");
3663  try {
3664  std::map<const std::string, const ColumnDescriptor> cds;
3665  std::map<const int, const ColumnDef*> cid_coldefs;
3666  for (const auto& coldef : coldefs) {
3667  ColumnDescriptor cd;
3668  setColumnDescriptor(cd, coldef.get());
3669  catalog.addColumn(*td, cd);
3670  cds.emplace(*coldef->get_column_name(), cd);
3671  cid_coldefs.emplace(cd.columnId, coldef.get());
3672 
3673  // expand geo column to phy columns
3674  if (cd.columnType.is_geometry()) {
3675  std::list<ColumnDescriptor> phy_geo_columns;
3676  catalog.expandGeoColumn(cd, phy_geo_columns);
3677  for (auto& cd : phy_geo_columns) {
3678  catalog.addColumn(*td, cd);
3679  cds.emplace(cd.columnName, cd);
3680  cid_coldefs.emplace(cd.columnId, nullptr);
3681  }
3682  }
3683  }
3684 
3685  std::unique_ptr<import_export::Loader> loader(new import_export::Loader(catalog, td));
3686  auto import_buffers = import_export::setup_column_loaders(td, loader.get());
3687  loader->setReplicating(true);
3688 
3689  // set_geo_physical_import_buffer below needs a sorted import_buffers
3690  std::sort(import_buffers.begin(),
3691  import_buffers.end(),
3692  [](decltype(import_buffers[0])& a, decltype(import_buffers[0])& b) {
3693  return a->getColumnDesc()->columnId < b->getColumnDesc()->columnId;
3694  });
3695 
3696  size_t nrows = td->fragmenter->getNumRows();
3697  // if sharded, get total nrows from all sharded tables
3698  if (td->nShards > 0) {
3699  const auto physical_tds = catalog.getPhysicalTablesDescriptors(td);
3700  nrows = 0;
3701  std::for_each(physical_tds.begin(), physical_tds.end(), [&nrows](const auto& td) {
3702  nrows += td->fragmenter->getNumRows();
3703  });
3704  }
3705  if (nrows > 0) {
3706  int skip_physical_cols = 0;
3707  for (const auto cit : cid_coldefs) {
3708  const auto cd = catalog.getMetadataForColumn(td->tableId, cit.first);
3709  const auto coldef = cit.second;
3710  const auto column_constraint = coldef ? coldef->get_column_constraint() : nullptr;
3711  std::string defaultval = "";
3712  if (column_constraint) {
3713  auto defaultlp = column_constraint->get_defaultval();
3714  auto defaultsp = dynamic_cast<const StringLiteral*>(defaultlp);
3715  defaultval = defaultsp ? *defaultsp->get_stringval()
3716  : defaultlp ? defaultlp->to_string() : "";
3717  }
3718  bool isnull = column_constraint ? (0 == defaultval.size()) : true;
3719  if (boost::to_upper_copy<std::string>(defaultval) == "NULL") {
3720  isnull = true;
3721  }
3722 
3723  if (isnull) {
3724  if (column_constraint && column_constraint->get_notnull()) {
3725  throw std::runtime_error("Default value required for column " +
3726  cd->columnName + " (NULL value not supported)");
3727  }
3728  }
3729 
3730  for (auto it = import_buffers.begin(); it < import_buffers.end(); ++it) {
3731  auto& import_buffer = *it;
3732  if (cd->columnId == import_buffer->getColumnDesc()->columnId) {
3733  if (coldef != nullptr ||
3734  skip_physical_cols-- <= 0) { // skip non-null phy col
3735  import_buffer->add_value(
3736  cd, defaultval, isnull, import_export::CopyParams(), nrows);
3737  if (cd->columnType.is_geometry()) {
3738  std::vector<double> coords, bounds;
3739  std::vector<int> ring_sizes, poly_rings;
3740  int render_group = 0;
3741  SQLTypeInfo tinfo{cd->columnType};
3743  tinfo,
3744  coords,
3745  bounds,
3746  ring_sizes,
3747  poly_rings,
3748  false)) {
3749  throw std::runtime_error("Bad geometry data: '" + defaultval + "'");
3750  }
3751  size_t col_idx = 1 + std::distance(import_buffers.begin(), it);
3753  cd,
3754  import_buffers,
3755  col_idx,
3756  coords,
3757  bounds,
3758  ring_sizes,
3759  poly_rings,
3760  render_group,
3761  nrows);
3762  // skip following phy cols
3763  skip_physical_cols = cd->columnType.get_physical_cols();
3764  }
3765  }
3766  break;
3767  }
3768  }
3769  }
3770  }
3771 
3772  if (!loader->loadNoCheckpoint(import_buffers, nrows)) {
3773  throw std::runtime_error("loadNoCheckpoint failed!");
3774  }
3775  catalog.roll(true);
3776  loader->checkpoint();
3777  catalog.getSqliteConnector().query("END TRANSACTION");
3778  } catch (...) {
3779  catalog.roll(false);
3780  catalog.getSqliteConnector().query("ROLLBACK TRANSACTION");
3781  throw;
3782  }
3783 }
3784 
3785 void DropColumnStmt::execute(const Catalog_Namespace::SessionInfo& session) {
3786  auto& catalog = session.getCatalog();
3787 
3788  // TODO(adb): the catalog should be handling this locking.
3789  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
3792 
3793  const auto td_with_lock =
3795  catalog, *table, true);
3796  const auto td = td_with_lock();
3797  if (!td) {
3798  throw std::runtime_error("Table " + *table + " does not exist.");
3799  }
3801  if (td->isView) {
3802  throw std::runtime_error("Dropping a column from a view is not supported.");
3803  }
3804  if (table_is_temporary(td)) {
3805  throw std::runtime_error(
3806  "Dropping a column from a temporary table is not yet supported.");
3807  }
3808 
3809  check_alter_table_privilege(session, td);
3810 
3811  for (const auto& column : columns) {
3812  if (nullptr == catalog.getMetadataForColumn(td->tableId, *column)) {
3813  throw std::runtime_error("Column " + *column + " does not exist.");
3814  }
3815  }
3816 
3817  if (td->nColumns <= (td->hasDeletedCol ? 3 : 2)) {
3818  throw std::runtime_error("Table " + *table + " has only one column.");
3819  }
3820 
3821  catalog.getSqliteConnector().query("BEGIN TRANSACTION");
3822  try {
3823  std::vector<int> columnIds;
3824  for (const auto& column : columns) {
3825  ColumnDescriptor cd = *catalog.getMetadataForColumn(td->tableId, *column);
3826  if (td->nShards > 0 && td->shardedColumnId == cd.columnId) {
3827  throw std::runtime_error("Dropping sharding column " + cd.columnName +
3828  " is not supported.");
3829  }
3830  catalog.dropColumn(*td, cd);
3831  columnIds.push_back(cd.columnId);
3832  for (int i = 0; i < cd.columnType.get_physical_cols(); i++) {
3833  const auto pcd = catalog.getMetadataForColumn(td->tableId, cd.columnId + i + 1);
3834  CHECK(pcd);
3835  catalog.dropColumn(*td, *pcd);
3836  columnIds.push_back(cd.columnId + i + 1);
3837  }
3838  }
3839 
3840  for (auto shard : catalog.getPhysicalTablesDescriptors(td)) {
3841  shard->fragmenter->dropColumns(columnIds);
3842  }
3843  // if test forces to rollback
3845  throw std::runtime_error("lol!");
3846  }
3847  catalog.roll(true);
3849  catalog.checkpoint(td->tableId);
3850  }
3851  catalog.getSqliteConnector().query("END TRANSACTION");
3852  } catch (...) {
3853  catalog.setForReload(td->tableId);
3854  catalog.roll(false);
3855  catalog.getSqliteConnector().query("ROLLBACK TRANSACTION");
3856  throw;
3857  }
3858 
3859  // invalidate cached hashtable
3861 }
3862 
3863 void RenameColumnStmt::execute(const Catalog_Namespace::SessionInfo& session) {
3864  auto& catalog = session.getCatalog();
3865 
3866  const auto td_with_lock =
3868  catalog, *table, false);
3869  const auto td = td_with_lock();
3870  CHECK(td);
3872 
3873  check_alter_table_privilege(session, td);
3874  const ColumnDescriptor* cd = catalog.getMetadataForColumn(td->tableId, *column);
3875  if (cd == nullptr) {
3876  throw std::runtime_error("Column " + *column + " does not exist.");
3877  }
3878  if (catalog.getMetadataForColumn(td->tableId, *new_column_name) != nullptr) {
3879  throw std::runtime_error("Column " + *new_column_name + " already exists.");
3880  }
3881  catalog.renameColumn(td, cd, *new_column_name);
3882 }
3883 
3884 void AlterTableParamStmt::execute(const Catalog_Namespace::SessionInfo& session) {
3885  enum TableParamType { MaxRollbackEpochs, Epoch };
3886  static const std::unordered_map<std::string, TableParamType> param_map = {
3887  {"max_rollback_epochs", TableParamType::MaxRollbackEpochs},
3888  {"epoch", TableParamType::Epoch}};
3889  // Below is to ensure that executor is not currently executing on table when we might be
3890  // changing it's storage. Question: will/should catalog write lock take care of this?
3891  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
3894  auto& catalog = session.getCatalog();
3895  const auto td_with_lock =
3897  catalog, *table, false);
3898  const auto td = td_with_lock();
3899  if (!td) {
3900  throw std::runtime_error("Table " + *table + " does not exist.");
3901  }
3902  if (td->isView) {
3903  throw std::runtime_error("Setting parameters for a view is not supported.");
3904  }
3905  if (table_is_temporary(td)) {
3906  throw std::runtime_error(
3907  "Setting parameters for a temporary table is not yet supported.");
3908  }
3909  check_alter_table_privilege(session, td);
3910 
3911  // Only allow max_rollback_epochs for now
3912  std::string param_name(*param->get_name());
3913  boost::algorithm::to_lower(param_name);
3914  const IntLiteral* val_int_literal = dynamic_cast<const IntLiteral*>(param->get_value());
3915  if (val_int_literal == nullptr) {
3916  throw std::runtime_error("Table parameters should be integers.");
3917  }
3918  const int32_t param_val = val_int_literal->get_intval();
3919 
3920  const auto param_it = param_map.find(param_name);
3921  if (param_it == param_map.end()) {
3922  throw std::runtime_error(param_name + " is not a settable table parameter.");
3923  }
3924  switch (param_it->second) {
3925  case MaxRollbackEpochs: {
3926  catalog.setMaxRollbackEpochs(td->tableId, param_val);
3927  break;
3928  }
3929  case Epoch: {
3930  catalog.setTableEpoch(catalog.getDatabaseId(), td->tableId, param_val);
3931  }
3932  }
3933 }
3934 
3935 void CopyTableStmt::execute(const Catalog_Namespace::SessionInfo& session) {
3936  auto importer_factory = [](Catalog_Namespace::Catalog& catalog,
3937  const TableDescriptor* td,
3938  const std::string& file_path,
3939  const import_export::CopyParams& copy_params) {
3940  return std::make_unique<import_export::Importer>(catalog, td, file_path, copy_params);
3941  };
3942  return execute(session, importer_factory);
3943 }
3944 
3945 void CopyTableStmt::execute(const Catalog_Namespace::SessionInfo& session,
3946  const std::function<std::unique_ptr<import_export::Importer>(
3948  const TableDescriptor*,
3949  const std::string&,
3950  const import_export::CopyParams&)>& importer_factory) {
3951  boost::regex non_local_file_regex{R"(^\s*(s3|http|https)://.+)",
3952  boost::regex::extended | boost::regex::icase};
3953  if (!boost::regex_match(*file_pattern, non_local_file_regex)) {
3955  *file_pattern, ddl_utils::DataTransferType::IMPORT, true);
3956  }
3957 
3958  size_t rows_completed = 0;
3959  size_t rows_rejected = 0;
3960  size_t total_time = 0;
3961  bool load_truncated = false;
3962 
3963  // Prevent simultaneous import / truncate (see TruncateTableStmt::execute)
3964  const auto execute_read_lock = mapd_shared_lock<mapd_shared_mutex>(
3967 
3968  const TableDescriptor* td{nullptr};
3969  std::unique_ptr<lockmgr::TableSchemaLockContainer<lockmgr::ReadLock>> td_with_lock;
3970  std::unique_ptr<lockmgr::WriteLock> insert_data_lock;
3971 
3972  auto& catalog = session.getCatalog();
3973 
3974  try {
3975  td_with_lock = std::make_unique<lockmgr::TableSchemaLockContainer<lockmgr::ReadLock>>(
3977  catalog, *table));
3978  td = (*td_with_lock)();
3979  insert_data_lock = std::make_unique<lockmgr::WriteLock>(
3981  } catch (const std::runtime_error& e) {
3982  // noop
3983  // TODO(adb): We're really only interested in whether the table exists or not.
3984  // Create a more refined exception.
3985  }
3986 
3987  // if the table already exists, it's locked, so check access privileges
3988  if (td) {
3989  std::vector<DBObject> privObjects;
3990  DBObject dbObject(*table, TableDBObjectType);
3991  dbObject.loadKey(catalog);
3993  privObjects.push_back(dbObject);
3994  if (!SysCatalog::instance().checkPrivileges(session.get_currentUser(), privObjects)) {
3995  throw std::runtime_error("Violation of access privileges: user " +
3996  session.get_currentUser().userLoggable() +
3997  " has no insert privileges for table " + *table + ".");
3998  }
3999  }
4000 
4001  // since we'll have not only posix file names but also s3/hdfs/... url
4002  // we do not expand wildcard or check file existence here.
4003  // from here on, file_path contains something which may be a url
4004  // or a wildcard of file names;
4005  std::string file_path = *file_pattern;
4006  import_export::CopyParams copy_params;
4007  if (!options.empty()) {
4008  for (auto& p : options) {
4009  if (boost::iequals(*p->get_name(), "max_reject")) {
4010  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
4011  if (int_literal == nullptr) {
4012  throw std::runtime_error("max_reject option must be an integer.");
4013  }
4014  copy_params.max_reject = int_literal->get_intval();
4015  } else if (boost::iequals(*p->get_name(), "buffer_size")) {
4016  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
4017  if (int_literal == nullptr) {
4018  throw std::runtime_error("buffer_size option must be an integer.");
4019  }
4020  copy_params.buffer_size = int_literal->get_intval();
4021  } else if (boost::iequals(*p->get_name(), "threads")) {
4022  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
4023  if (int_literal == nullptr) {
4024  throw std::runtime_error("Threads option must be an integer.");
4025  }
4026  copy_params.threads = int_literal->get_intval();
4027  } else if (boost::iequals(*p->get_name(), "delimiter")) {
4028  const StringLiteral* str_literal =
4029  dynamic_cast<const StringLiteral*>(p->get_value());
4030  if (str_literal == nullptr) {
4031  throw std::runtime_error("Delimiter option must be a string.");
4032  } else if (str_literal->get_stringval()->length() != 1) {
4033  throw std::runtime_error("Delimiter must be a single character string.");
4034  }
4035  copy_params.delimiter = (*str_literal->get_stringval())[0];
4036  } else if (boost::iequals(*p->get_name(), "nulls")) {
4037  const StringLiteral* str_literal =
4038  dynamic_cast<const StringLiteral*>(p->get_value());
4039  if (str_literal == nullptr) {
4040  throw std::runtime_error("Nulls option must be a string.");
4041  }
4042  copy_params.null_str = *str_literal->get_stringval();
4043  } else if (boost::iequals(*p->get_name(), "header")) {
4044  const StringLiteral* str_literal =
4045  dynamic_cast<const StringLiteral*>(p->get_value());
4046  if (str_literal == nullptr) {
4047  throw std::runtime_error("Header option must be a boolean.");
4048  }
4049  copy_params.has_header = bool_from_string_literal(str_literal)
4052 #ifdef ENABLE_IMPORT_PARQUET
4053  } else if (boost::iequals(*p->get_name(), "parquet")) {
4054  const StringLiteral* str_literal =
4055  dynamic_cast<const StringLiteral*>(p->get_value());
4056  if (str_literal == nullptr) {
4057  throw std::runtime_error("Parquet option must be a boolean.");
4058  }
4059  if (bool_from_string_literal(str_literal)) {
4060  // not sure a parquet "table" type is proper, but to make code
4061  // look consistent in some places, let's set "table" type too
4062  copy_params.file_type = import_export::FileType::PARQUET;
4063  }
4064 #endif // ENABLE_IMPORT_PARQUET
4065  } else if (boost::iequals(*p->get_name(), "s3_access_key")) {
4066  const StringLiteral* str_literal =
4067  dynamic_cast<const StringLiteral*>(p->get_value());
4068  if (str_literal == nullptr) {
4069  throw std::runtime_error("Option s3_access_key must be a string.");
4070  }
4071  copy_params.s3_access_key = *str_literal->get_stringval();
4072  } else if (boost::iequals(*p->get_name(), "s3_secret_key")) {
4073  const StringLiteral* str_literal =
4074  dynamic_cast<const StringLiteral*>(p->get_value());
4075  if (str_literal == nullptr) {
4076  throw std::runtime_error("Option s3_secret_key must be a string.");
4077  }
4078  copy_params.s3_secret_key = *str_literal->get_stringval();
4079  } else if (boost::iequals(*p->get_name(), "s3_region")) {
4080  const StringLiteral* str_literal =
4081  dynamic_cast<const StringLiteral*>(p->get_value());
4082  if (str_literal == nullptr) {
4083  throw std::runtime_error("Option s3_region must be a string.");
4084  }
4085  copy_params.s3_region = *str_literal->get_stringval();
4086  } else if (boost::iequals(*p->get_name(), "s3_endpoint")) {
4087  const StringLiteral* str_literal =
4088  dynamic_cast<const StringLiteral*>(p->get_value());
4089  if (str_literal == nullptr) {
4090  throw std::runtime_error("Option s3_endpoint must be a string.");
4091  }
4092  copy_params.s3_endpoint = *str_literal->get_stringval();
4093  } else if (boost::iequals(*p->get_name(), "quote")) {
4094  const StringLiteral* str_literal =
4095  dynamic_cast<const StringLiteral*>(p->get_value());
4096  if (str_literal == nullptr) {
4097  throw std::runtime_error("Quote option must be a string.");
4098  } else if (str_literal->get_stringval()->length() != 1) {
4099  throw std::runtime_error("Quote must be a single character string.");
4100  }
4101  copy_params.quote = (*str_literal->get_stringval())[0];
4102  } else if (boost::iequals(*p->get_name(), "escape")) {
4103  const StringLiteral* str_literal =
4104  dynamic_cast<const StringLiteral*>(p->get_value());
4105  if (str_literal == nullptr) {
4106  throw std::runtime_error("Escape option must be a string.");
4107  } else if (str_literal->get_stringval()->length() != 1) {
4108  throw std::runtime_error("Escape must be a single character string.");
4109  }
4110  copy_params.escape = (*str_literal->get_stringval())[0];
4111  } else if (boost::iequals(*p->get_name(), "line_delimiter")) {
4112  const StringLiteral* str_literal =
4113  dynamic_cast<const StringLiteral*>(p->get_value());
4114  if (str_literal == nullptr) {
4115  throw std::runtime_error("Line_delimiter option must be a string.");
4116  } else if (str_literal->get_stringval()->length() != 1) {
4117  throw std::runtime_error("Line_delimiter must be a single character string.");
4118  }
4119  copy_params.line_delim = (*str_literal->get_stringval())[0];
4120  } else if (boost::iequals(*p->get_name(), "quoted")) {
4121  const StringLiteral* str_literal =
4122  dynamic_cast<const StringLiteral*>(p->get_value());
4123  if (str_literal == nullptr) {
4124  throw std::runtime_error("Quoted option must be a boolean.");
4125  }
4126  copy_params.quoted = bool_from_string_literal(str_literal);
4127  } else if (boost::iequals(*p->get_name(), "plain_text")) {
4128  const StringLiteral* str_literal =
4129  dynamic_cast<const StringLiteral*>(p->get_value());
4130  if (str_literal == nullptr) {
4131  throw std::runtime_error("plain_text option must be a boolean.");
4132  }
4133  copy_params.plain_text = bool_from_string_literal(str_literal);
4134  } else if (boost::iequals(*p->get_name(), "array_marker")) {
4135  const StringLiteral* str_literal =
4136  dynamic_cast<const StringLiteral*>(p->get_value());
4137  if (str_literal == nullptr) {
4138  throw std::runtime_error("Array Marker option must be a string.");
4139  } else if (str_literal->get_stringval()->length() != 2) {
4140  throw std::runtime_error(
4141  "Array Marker option must be exactly two characters. Default is {}.");
4142  }
4143  copy_params.array_begin = (*str_literal->get_stringval())[0];
4144  copy_params.array_end = (*str_literal->get_stringval())[1];
4145  } else if (boost::iequals(*p->get_name(), "array_delimiter")) {
4146  const StringLiteral* str_literal =
4147  dynamic_cast<const StringLiteral*>(p->get_value());
4148  if (str_literal == nullptr) {
4149  throw std::runtime_error("Array Delimiter option must be a string.");
4150  } else if (str_literal->get_stringval()->length() != 1) {
4151  throw std::runtime_error("Array Delimiter must be a single character string.");
4152  }
4153  copy_params.array_delim = (*str_literal->get_stringval())[0];
4154  } else if (boost::iequals(*p->get_name(), "lonlat")) {
4155  const StringLiteral* str_literal =
4156  dynamic_cast<const StringLiteral*>(p->get_value());
4157  if (str_literal == nullptr) {
4158  throw std::runtime_error("Lonlat option must be a boolean.");
4159  }
4160  copy_params.lonlat = bool_from_string_literal(str_literal);
4161  } else if (boost::iequals(*p->get_name(), "geo")) {
4162  const StringLiteral* str_literal =
4163  dynamic_cast<const StringLiteral*>(p->get_value());
4164  if (str_literal == nullptr) {
4165  throw std::runtime_error("Geo option must be a boolean.");
4166  }
4167  copy_params.file_type = bool_from_string_literal(str_literal)
4170  } else if (boost::iequals(*p->get_name(), "geo_coords_type")) {
4171  const StringLiteral* str_literal =
4172  dynamic_cast<const StringLiteral*>(p->get_value());
4173  if (str_literal == nullptr) {
4174  throw std::runtime_error("'geo_coords_type' option must be a string");
4175  }
4176  const std::string* s = str_literal->get_stringval();
4177  if (boost::iequals(*s, "geography")) {
4178  throw std::runtime_error(
4179  "GEOGRAPHY coords type not yet supported. Please use GEOMETRY.");
4180  // copy_params.geo_coords_type = kGEOGRAPHY;
4181  } else if (boost::iequals(*s, "geometry")) {
4182  copy_params.geo_coords_type = kGEOMETRY;
4183  } else {
4184  throw std::runtime_error(
4185  "Invalid string for 'geo_coords_type' option (must be 'GEOGRAPHY' or "
4186  "'GEOMETRY'): " +
4187  *s);
4188  }
4189  } else if (boost::iequals(*p->get_name(), "geo_coords_encoding")) {
4190  const StringLiteral* str_literal =
4191  dynamic_cast<const StringLiteral*>(p->get_value());
4192  if (str_literal == nullptr) {
4193  throw std::runtime_error("'geo_coords_encoding' option must be a string");
4194  }
4195  const std::string* s = str_literal->get_stringval();
4196  if (boost::iequals(*s, "none")) {
4197  copy_params.geo_coords_encoding = kENCODING_NONE;
4198  copy_params.geo_coords_comp_param = 0;
4199  } else if (boost::iequals(*s, "compressed(32)")) {
4200  copy_params.geo_coords_encoding = kENCODING_GEOINT;
4201  copy_params.geo_coords_comp_param = 32;
4202  } else {
4203  throw std::runtime_error(
4204  "Invalid string for 'geo_coords_encoding' option (must be 'NONE' or "
4205  "'COMPRESSED(32)'): " +
4206  *s);
4207  }
4208  } else if (boost::iequals(*p->get_name(), "geo_coords_srid")) {
4209  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
4210  if (int_literal == nullptr) {
4211  throw std::runtime_error("'geo_coords_srid' option must be an integer");
4212  }
4213  const int srid = int_literal->get_intval();
4214  if (srid == 4326 || srid == 3857 || srid == 900913) {
4215  copy_params.geo_coords_srid = srid;
4216  } else {
4217  throw std::runtime_error(
4218  "Invalid value for 'geo_coords_srid' option (must be 4326, 3857, or "
4219  "900913): " +
4220  std::to_string(srid));
4221  }
4222  } else if (boost::iequals(*p->get_name(), "geo_layer_name")) {
4223  const StringLiteral* str_literal =
4224  dynamic_cast<const StringLiteral*>(p->get_value());
4225  if (str_literal == nullptr) {
4226  throw std::runtime_error("'geo_layer_name' option must be a string");
4227  }
4228  const std::string* layer_name = str_literal->get_stringval();
4229  if (layer_name) {
4230  copy_params.geo_layer_name = *layer_name;
4231  } else {
4232  throw std::runtime_error("Invalid value for 'geo_layer_name' option");
4233  }
4234  } else if (boost::iequals(*p->get_name(), "partitions")) {
4235  if (copy_params.file_type == import_export::FileType::POLYGON) {
4236  const auto partitions =
4237  static_cast<const StringLiteral*>(p->get_value())->get_stringval();
4238  CHECK(partitions);
4239  const auto partitions_uc = boost::to_upper_copy<std::string>(*partitions);
4240  if (partitions_uc != "REPLICATED") {
4241  throw std::runtime_error("PARTITIONS must be REPLICATED for geo COPY");
4242  }
4243  _geo_copy_from_partitions = partitions_uc;
4244  } else {
4245  throw std::runtime_error("PARTITIONS option not supported for non-geo COPY: " +
4246  *p->get_name());
4247  }
4248  } else if (boost::iequals(*p->get_name(), "geo_assign_render_groups")) {
4249  const StringLiteral* str_literal =
4250  dynamic_cast<const StringLiteral*>(p->get_value());
4251  if (str_literal == nullptr) {
4252  throw std::runtime_error("geo_assign_render_groups option must be a boolean.");
4253  }
4254  copy_params.geo_assign_render_groups = bool_from_string_literal(str_literal);
4255  } else if (boost::iequals(*p->get_name(), "geo_explode_collections")) {
4256  const StringLiteral* str_literal =
4257  dynamic_cast<const StringLiteral*>(p->get_value());
4258  if (str_literal == nullptr) {
4259  throw std::runtime_error("geo_explode_collections option must be a boolean.");
4260  }
4261  copy_params.geo_explode_collections = bool_from_string_literal(str_literal);
4262  } else if (boost::iequals(*p->get_name(), "source_srid")) {
4263  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
4264  if (int_literal == nullptr) {
4265  throw std::runtime_error("'source_srid' option must be an integer");
4266  }
4267  const int srid = int_literal->get_intval();
4268  if (copy_params.file_type == import_export::FileType::DELIMITED) {
4269  copy_params.source_srid = srid;
4270  } else {
4271  throw std::runtime_error(
4272  "'source_srid' option can only be used on csv/tsv files");
4273  }
4274  } else {
4275  throw std::runtime_error("Invalid option for COPY: " + *p->get_name());
4276  }
4277  }
4278  }
4279 
4280  std::string tr;
4281  if (copy_params.file_type == import_export::FileType::POLYGON) {
4282  // geo import
4283  // we do nothing here, except stash the parameters so we can
4284  // do the import when we unwind to the top of the handler
4285  _geo_copy_from_file_name = file_path;
4286  _geo_copy_from_copy_params = copy_params;
4287  _was_geo_copy_from = true;
4288 
4289  // the result string
4290  // @TODO simon.eves put something more useful in here
4291  // except we really can't because we haven't done the import yet!
4292  if (td) {
4293  tr = std::string("Appending geo to table '") + *table + std::string("'...");
4294  } else {
4295  tr = std::string("Creating table '") + *table +
4296  std::string("' and importing geo...");
4297  }
4298  } else {
4299  if (td) {
4300  CHECK(td_with_lock);
4301 
4302  // regular import
4303  auto importer = importer_factory(catalog, td, file_path, copy_params);
4304  auto ms = measure<>::execution([&]() {
4305  auto res = importer->import();
4306  rows_completed += res.rows_completed;
4307  rows_rejected += res.rows_rejected;
4308  load_truncated = res.load_truncated;
4309  });
4310  total_time += ms;
4311 
4312  // results
4313  if (load_truncated || rows_rejected > copy_params.max_reject) {
4314  LOG(ERROR) << "COPY exited early due to reject records count during multi file "
4315  "processing ";
4316  // if we have crossed the truncated load threshold
4317  load_truncated = true;
4318  success = false;
4319  }
4320  if (!load_truncated) {
4321  tr = std::string("Loaded: " + std::to_string(rows_completed) +
4322  " recs, Rejected: " + std::to_string(rows_rejected) +
4323  " recs in " + std::to_string((double)total_time / 1000.0) +
4324  " secs");
4325  } else {
4326  tr = std::string("Loader truncated due to reject count. Processed : " +
4327  std::to_string(rows_completed) + " recs, Rejected: " +
4328  std::to_string(rows_rejected) + " recs in " +
4329  std::to_string((double)total_time / 1000.0) + " secs");
4330  }
4331  } else {
4332  throw std::runtime_error("Table '" + *table + "' must exist before COPY FROM");
4333  }
4334  }
4335 
4336  return_message.reset(new std::string(tr));
4337  LOG(INFO) << tr;
4338 }
4339 
4340 // CREATE ROLE payroll_dept_role;
4341 void CreateRoleStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4342  const auto& currentUser = session.get_currentUser();
4343  if (!currentUser.isSuper) {
4344  throw std::runtime_error("CREATE ROLE " + get_role() +
4345  " failed. It can only be executed by super user.");
4346  }
4347  SysCatalog::instance().createRole(get_role());
4348 }
4349 
4350 // DROP ROLE payroll_dept_role;
4351 void DropRoleStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4352  const auto& currentUser = session.get_currentUser();
4353  if (!currentUser.isSuper) {
4354  throw std::runtime_error("DROP ROLE " + get_role() +
4355  " failed. It can only be executed by super user.");
4356  }
4357  auto* rl = SysCatalog::instance().getRoleGrantee(get_role());
4358  if (!rl) {
4359  throw std::runtime_error("DROP ROLE " + get_role() +
4360  " failed because role with this name does not exist.");
4361  }
4362  SysCatalog::instance().dropRole(get_role());
4363 }
4364 
4365 std::vector<std::string> splitObjectHierName(const std::string& hierName) {
4366  std::vector<std::string> componentNames;
4367  boost::split(componentNames, hierName, boost::is_any_of("."));
4368  return componentNames;
4369 }
4370 
4371 std::string extractObjectNameFromHierName(const std::string& objectHierName,
4372  const std::string& objectType,
4374  std::string objectName;
4375  std::vector<std::string> componentNames = splitObjectHierName(objectHierName);
4376  if (objectType.compare("DATABASE") == 0) {
4377  if (componentNames.size() == 1) {
4378  objectName = componentNames[0];
4379  } else {
4380  throw std::runtime_error("DB object name is not correct " + objectHierName);
4381  }
4382  } else {
4383  if (objectType.compare("TABLE") == 0 || objectType.compare("DASHBOARD") == 0 ||
4384  objectType.compare("VIEW") == 0 || objectType.compare("SERVER") == 0) {
4385  switch (componentNames.size()) {
4386  case (1): {
4387  objectName = componentNames[0];
4388  break;
4389  }
4390  case (2): {
4391  objectName = componentNames[1];
4392  break;
4393  }
4394  default: {
4395  throw std::runtime_error("DB object name is not correct " + objectHierName);
4396  }
4397  }
4398  } else {
4399  throw std::runtime_error("DB object type " + objectType + " is not supported.");
4400  }
4401  }
4402  return objectName;
4403 }
4404 
4405 static std::pair<AccessPrivileges, DBObjectType> parseStringPrivs(
4406  const std::string& privs,
4407  const DBObjectType& objectType,
4408  const std::string& object_name) {
4409  static const std::map<std::pair<const std::string, const DBObjectType>,
4410  std::pair<const AccessPrivileges, const DBObjectType>>
4411  privileges_lookup{
4412  {{"ALL"s, DatabaseDBObjectType},
4415  {{"ALL"s, DashboardDBObjectType},
4418  {{"ALL"s, ServerDBObjectType},
4420 
4421  {{"CREATE TABLE"s, DatabaseDBObjectType},
4423  {{"CREATE"s, DatabaseDBObjectType},
4425  {{"SELECT"s, DatabaseDBObjectType},
4427  {{"INSERT"s, DatabaseDBObjectType},
4429  {{"TRUNCATE"s, DatabaseDBObjectType},
4431  {{"UPDATE"s, DatabaseDBObjectType},
4433  {{"DELETE"s, DatabaseDBObjectType},
4435  {{"DROP"s, DatabaseDBObjectType},
4437  {{"ALTER"s, DatabaseDBObjectType},
4439 
4440  {{"SELECT"s, TableDBObjectType},
4442  {{"INSERT"s, TableDBObjectType},
4444  {{"TRUNCATE"s, TableDBObjectType},
4446  {{"UPDATE"s, TableDBObjectType},
4448  {{"DELETE"s, TableDBObjectType},
4450  {{"DROP"s, TableDBObjectType},
4452  {{"ALTER"s, TableDBObjectType},
4454 
4455  {{"CREATE VIEW"s, DatabaseDBObjectType},
4457  {{"SELECT VIEW"s, DatabaseDBObjectType},
4459  {{"DROP VIEW"s, DatabaseDBObjectType},
4461  {{"SELECT"s, ViewDBObjectType},
4464 
4465  {{"CREATE DASHBOARD"s, DatabaseDBObjectType},
4467  {{"EDIT DASHBOARD"s, DatabaseDBObjectType},
4469  {{"VIEW DASHBOARD"s, DatabaseDBObjectType},
4471  {{"DELETE DASHBOARD"s, DatabaseDBObjectType},
4473  {{"VIEW"s, DashboardDBObjectType},
4475  {{"EDIT"s, DashboardDBObjectType},
4477  {{"DELETE"s, DashboardDBObjectType},
4479 
4480  {{"CREATE SERVER"s, DatabaseDBObjectType},
4482  {{"DROP SERVER"s, DatabaseDBObjectType},
4484  {{"DROP"s, ServerDBObjectType},
4486  {{"ALTER SERVER"s, DatabaseDBObjectType},
4488  {{"ALTER"s, ServerDBObjectType},
4490 
4491  {{"VIEW SQL EDITOR"s, DatabaseDBObjectType},
4493  {{"ACCESS"s, DatabaseDBObjectType},
4495 
4496  auto result = privileges_lookup.find(std::make_pair(privs, objectType));
4497  if (result == privileges_lookup.end()) {
4498  throw std::runtime_error("Privileges " + privs + " on DB object " + object_name +
4499  " are not correct.");
4500  }
4501  return result->second;
4502 }
4503 
4504 static DBObject createObject(const std::string& objectName, DBObjectType objectType) {
4505  if (objectType == DashboardDBObjectType) {
4506  int32_t dashboard_id = -1;
4507  if (!objectName.empty()) {
4508  try {
4509  dashboard_id = stoi(objectName);
4510  } catch (const std::exception&) {
4511  throw std::runtime_error(
4512  "Privileges on dashboards should be changed via integer dashboard ID");
4513  }
4514  }
4515  return DBObject(dashboard_id, objectType);
4516  } else {
4517  return DBObject(objectName, objectType);
4518  }
4519 }
4520 
4521 // GRANT SELECT/INSERT/CREATE ON TABLE payroll_table TO payroll_dept_role;
4522 void GrantPrivilegesStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4523  auto& catalog = session.getCatalog();
4524  const auto& currentUser = session.get_currentUser();
4525  const auto parserObjectType = boost::to_upper_copy<std::string>(get_object_type());
4526  const auto objectName =
4527  extractObjectNameFromHierName(get_object(), parserObjectType, catalog);
4528  auto objectType = DBObjectTypeFromString(parserObjectType);
4529  if (objectType == ServerDBObjectType && !g_enable_fsi) {
4530  throw std::runtime_error("GRANT failed. SERVER object unrecognized.");
4531  }
4532  DBObject dbObject = createObject(objectName, objectType);
4533  /* verify object ownership if not suser */
4534  if (!currentUser.isSuper) {
4535  if (!SysCatalog::instance().verifyDBObjectOwnership(currentUser, dbObject, catalog)) {
4536  throw std::runtime_error(
4537  "GRANT failed. It can only be executed by super user or owner of the "
4538  "object.");
4539  }
4540  }
4541  /* set proper values of privileges & grant them to the object */
4542  std::vector<DBObject> objects(get_privs().size(), dbObject);
4543  for (size_t i = 0; i < get_privs().size(); ++i) {
4544  std::pair<AccessPrivileges, DBObjectType> priv = parseStringPrivs(
4545  boost::to_upper_copy<std::string>(get_privs()[i]), objectType, get_object());
4546  objects[i].setPrivileges(priv.first);
4547  objects[i].setPermissionType(priv.second);
4548  if (priv.second == ServerDBObjectType && !g_enable_fsi) {
4549  throw std::runtime_error("GRANT failed. SERVER object unrecognized.");
4550  }
4551  }
4552  SysCatalog::instance().grantDBObjectPrivilegesBatch(grantees, objects, catalog);
4553 }
4554 
4555 // REVOKE SELECT/INSERT/CREATE ON TABLE payroll_table FROM payroll_dept_role;
4556 void RevokePrivilegesStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4557  auto& catalog = session.getCatalog();
4558  const auto& currentUser = session.get_currentUser();
4559  const auto parserObjectType = boost::to_upper_copy<std::string>(get_object_type());
4560  const auto objectName =
4561  extractObjectNameFromHierName(get_object(), parserObjectType, catalog);
4562  auto objectType = DBObjectTypeFromString(parserObjectType);
4563  if (objectType == ServerDBObjectType && !g_enable_fsi) {
4564  throw std::runtime_error("REVOKE failed. SERVER object unrecognized.");
4565  }
4566  DBObject dbObject = createObject(objectName, objectType);
4567  /* verify object ownership if not suser */
4568  if (!currentUser.isSuper) {
4569  if (!SysCatalog::instance().verifyDBObjectOwnership(currentUser, dbObject, catalog)) {
4570  throw std::runtime_error(
4571  "REVOKE failed. It can only be executed by super user or owner of the "
4572  "object.");
4573  }
4574  }
4575  /* set proper values of privileges & grant them to the object */
4576  std::vector<DBObject> objects(get_privs().size(), dbObject);
4577  for (size_t i = 0; i < get_privs().size(); ++i) {
4578  std::pair<AccessPrivileges, DBObjectType> priv = parseStringPrivs(
4579  boost::to_upper_copy<std::string>(get_privs()[i]), objectType, get_object());
4580  objects[i].setPrivileges(priv.first);
4581  objects[i].setPermissionType(priv.second);
4582  if (priv.second == ServerDBObjectType && !g_enable_fsi) {
4583  throw std::runtime_error("REVOKE failed. SERVER object unrecognized.");
4584  }
4585  }
4586  SysCatalog::instance().revokeDBObjectPrivilegesBatch(grantees, objects, catalog);
4587 }
4588 
4589 // NOTE: not used currently, will we ever use it?
4590 // SHOW ON TABLE payroll_table FOR payroll_dept_role;
4591 void ShowPrivilegesStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4592  auto& catalog = session.getCatalog();
4593  const auto& currentUser = session.get_currentUser();
4594  const auto parserObjectType = boost::to_upper_copy<std::string>(get_object_type());
4595  const auto objectName =
4596  extractObjectNameFromHierName(get_object(), parserObjectType, catalog);
4597  auto objectType = DBObjectTypeFromString(parserObjectType);
4598  DBObject dbObject = createObject(objectName, objectType);
4599  /* verify object ownership if not suser */
4600  if (!currentUser.isSuper) {
4601  if (!SysCatalog::instance().verifyDBObjectOwnership(currentUser, dbObject, catalog)) {
4602  throw std::runtime_error(
4603  "SHOW ON " + get_object() + " FOR " + get_role() +
4604  " failed. It can only be executed by super user or owner of the object.");
4605  }
4606  }
4607  /* get values of privileges for the object and report them */
4608  SysCatalog::instance().getDBObjectPrivileges(get_role(), dbObject, catalog);
4609  AccessPrivileges privs = dbObject.getPrivileges();
4610  printf("\nPRIVILEGES ON %s FOR %s ARE SET AS FOLLOWING: ",
4611  get_object().c_str(),
4612  get_role().c_str());
4613 
4614  if (objectType == DBObjectType::DatabaseDBObjectType) {
4616  printf(" CREATE");
4617  }
4619  printf(" DROP");
4620  }
4621  } else if (objectType == DBObjectType::TableDBObjectType) {
4623  printf(" CREATE");
4624  }
4626  printf(" DROP");
4627  }
4629  printf(" SELECT");
4630  }
4632  printf(" INSERT");
4633  }
4635  printf(" UPDATE");
4636  }
4638  printf(" DELETE");
4639  }
4641  printf(" TRUNCATE");
4642  }
4644  printf(" ALTER");
4645  }
4646  } else if (objectType == DBObjectType::DashboardDBObjectType) {
4648  printf(" CREATE");
4649  }
4651  printf(" DELETE");
4652  }
4654  printf(" VIEW");
4655  }
4657  printf(" EDIT");
4658  }
4659  } else if (objectType == DBObjectType::ViewDBObjectType) {
4661  printf(" CREATE");
4662  }
4664  printf(" DROP");
4665  }
4667  printf(" SELECT");
4668  }
4670  printf(" INSERT");
4671  }
4673  printf(" UPDATE");
4674  }
4676  printf(" DELETE");
4677  }
4678  }
4679  printf(".\n");
4680 }
4681 
4682 // GRANT payroll_dept_role TO joe;
4683 void GrantRoleStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4684  const auto& currentUser = session.get_currentUser();
4685  if (!currentUser.isSuper) {
4686  throw std::runtime_error(
4687  "GRANT failed, because it can only be executed by super user.");
4688  }
4689  if (std::find(get_grantees().begin(), get_grantees().end(), OMNISCI_ROOT_USER) !=
4690  get_grantees().end()) {
4691  throw std::runtime_error(
4692  "Request to grant role failed because mapd root user has all privileges by "
4693  "default.");
4694  }
4695  SysCatalog::instance().grantRoleBatch(get_roles(), get_grantees());
4696 }
4697 
4698 // REVOKE payroll_dept_role FROM joe;get_users
4699 void RevokeRoleStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4700  const auto& currentUser = session.get_currentUser();
4701  if (!currentUser.isSuper) {
4702  throw std::runtime_error(
4703  "REVOKE failed, because it can only be executed by super user.");
4704  }
4705  if (std::find(get_grantees().begin(), get_grantees().end(), OMNISCI_ROOT_USER) !=
4706  get_grantees().end()) {
4707  throw std::runtime_error(
4708  "Request to revoke role failed because privileges can not be revoked from mapd "
4709  "root user.");
4710  }
4711  SysCatalog::instance().revokeRoleBatch(get_roles(), get_grantees());
4712 }
4713 
4714 void ShowCreateTableStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4715  using namespace Catalog_Namespace;
4716 
4717  const auto execute_read_lock = mapd_shared_lock<mapd_shared_mutex>(
4720 
4721  auto& catalog = session.getCatalog();
4722  const TableDescriptor* td = catalog.getMetadataForTable(*table_, false);
4723  if (!td) {
4724  throw std::runtime_error("Table/View " + *table_ + " does not exist.");
4725  }
4726 
4728  dbObject.loadKey(catalog);
4729  std::vector<DBObject> privObjects = {dbObject};
4730 
4731  if (!SysCatalog::instance().hasAnyPrivileges(session.get_currentUser(), privObjects)) {
4732  throw std::runtime_error("Table/View " + *table_ + " does not exist.");
4733  }
4734  if (td->isView && !session.get_currentUser().isSuper) {
4735  // TODO: we need to run a validate query to ensure the user has access to the
4736  // underlying table, but we do not have any of the machinery in here. Disable for
4737  // now, unless the current user is a super user.
4738  throw std::runtime_error("SHOW CREATE TABLE not yet supported for views");
4739  }
4740 
4741  create_stmt_ = catalog.dumpCreateTable(td);
4742 }
4743 
4744 void ExportQueryStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4745  auto session_copy = session;
4746  auto session_ptr = std::shared_ptr<Catalog_Namespace::SessionInfo>(
4747  &session_copy, boost::null_deleter());
4748  auto query_state = query_state::QueryState::create(session_ptr, *select_stmt);
4749  auto stdlog = STDLOG(query_state);
4750  auto query_state_proxy = query_state->createQueryStateProxy();
4751 
4752  auto& catalog = session.getCatalog();
4753 
4754  LocalConnector local_connector;
4755 
4756  if (!leafs_connector_) {
4757  leafs_connector_ = &local_connector;
4758  }
4759 
4760  import_export::CopyParams copy_params;
4761  // @TODO(se) move rest to CopyParams when we have a Thrift endpoint
4764  std::string layer_name;
4769 
4770  parseOptions(copy_params, file_type, layer_name, file_compression, array_null_handling);
4771 
4772  if (file_path->empty()) {
4773  throw std::runtime_error("Invalid file path for COPY TO");
4774  } else if (!boost::filesystem::path(*file_path).is_absolute()) {
4775  std::string file_name = boost::filesystem::path(*file_path).filename().string();
4776  std::string file_dir =
4777  catalog.getBasePath() + "/mapd_export/" + session.get_session_id() + "/";
4778  if (!boost::filesystem::exists(file_dir)) {
4779  if (!boost::filesystem::create_directories(file_dir)) {
4780  throw std::runtime_error("Directory " + file_dir + " cannot be created.");
4781  }
4782  }
4783  *file_path = file_dir + file_name;
4784  } else {
4785  // Above branch will create a new file in the mapd_export directory. If that path is
4786  // not exercised, go through applicable file path validations.
4789  }
4790 
4791  // get column info
4792  auto column_info_result =
4793  local_connector.query(query_state_proxy, *select_stmt, {}, true);
4794 
4795  // create exporter for requested file type
4796  auto query_exporter = import_export::QueryExporter::create(file_type);
4797 
4798  // default layer name to file path stem if it wasn't specified
4799  if (layer_name.size() == 0) {
4800  layer_name = boost::filesystem::path(*file_path).stem().string();
4801  }
4802 
4803  // begin export
4804  query_exporter->beginExport(*file_path,
4805  layer_name,
4806  copy_params,
4807  column_info_result.targets_meta,
4808  file_compression,
4809  array_null_handling);
4810 
4811  // how many fragments?
4812  size_t outer_frag_count =
4813  leafs_connector_->getOuterFragmentCount(query_state_proxy, *select_stmt);
4814  size_t outer_frag_end = outer_frag_count == 0 ? 1 : outer_frag_count;
4815 
4816  // loop fragments
4817  for (size_t outer_frag_idx = 0; outer_frag_idx < outer_frag_end; outer_frag_idx++) {
4818  // limit the query to just this fragment
4819  std::vector<size_t> allowed_outer_fragment_indices;
4820  if (outer_frag_count) {
4821  allowed_outer_fragment_indices.push_back(outer_frag_idx);
4822  }
4823 
4824  // run the query
4825  std::vector<AggregatedResult> query_results = leafs_connector_->query(
4826  query_state_proxy, *select_stmt, allowed_outer_fragment_indices);
4827 
4828  // export the results
4829  query_exporter->exportResults(query_results);
4830  }
4831 
4832  // end export
4833  query_exporter->endExport();
4834 }
4835 
4836 void ExportQueryStmt::parseOptions(
4837  import_export::CopyParams& copy_params,
4839  std::string& layer_name,
4841  import_export::QueryExporter::ArrayNullHandling& array_null_handling) {
4842  // defaults for non-CopyParams values
4844  layer_name.clear();
4846 
4847  if (!options.empty()) {
4848  for (auto& p : options) {
4849  if (boost::iequals(*p->get_name(), "delimiter")) {
4850  const StringLiteral* str_literal =
4851  dynamic_cast<const StringLiteral*>(p->get_value());
4852  if (str_literal == nullptr) {
4853  throw std::runtime_error("Delimiter option must be a string.");
4854  } else if (str_literal->get_stringval()->length() != 1) {
4855  throw std::runtime_error("Delimiter must be a single character string.");
4856  }
4857  copy_params.delimiter = (*str_literal->get_stringval())[0];
4858  } else if (boost::iequals(*p->get_name(), "nulls")) {
4859  const StringLiteral* str_literal =
4860  dynamic_cast<const StringLiteral*>(p->get_value());
4861  if (str_literal == nullptr) {
4862  throw std::runtime_error("Nulls option must be a string.");
4863  }
4864  copy_params.null_str = *str_literal->get_stringval();
4865  } else if (boost::iequals(*p->get_name(), "header")) {
4866  const StringLiteral* str_literal =
4867  dynamic_cast<const StringLiteral*>(p->get_value());
4868  if (str_literal == nullptr) {
4869  throw std::runtime_error("Header option must be a boolean.");
4870  }
4871  copy_params.has_header = bool_from_string_literal(str_literal)
4874  } else if (boost::iequals(*p->get_name(), "quote")) {
4875  const StringLiteral* str_literal =
4876  dynamic_cast<const StringLiteral*>(p->get_value());
4877  if (str_literal == nullptr) {
4878  throw std::runtime_error("Quote option must be a string.");
4879  } else if (str_literal->get_stringval()->length() != 1) {
4880  throw std::runtime_error("Quote must be a single character string.");
4881  }
4882  copy_params.quote = (*str_literal->get_stringval())[0];
4883  } else if (boost::iequals(*p->get_name(), "escape")) {
4884  const StringLiteral* str_literal =
4885  dynamic_cast<const StringLiteral*>(p->get_value());
4886  if (str_literal == nullptr) {
4887  throw std::runtime_error("Escape option must be a string.");
4888  } else if (str_literal->get_stringval()->length() != 1) {
4889  throw std::runtime_error("Escape must be a single character string.");
4890  }
4891  copy_params.escape = (*str_literal->get_stringval())[0];
4892  } else if (boost::iequals(*p->get_name(), "line_delimiter")) {
4893  const StringLiteral* str_literal =
4894  dynamic_cast<const StringLiteral*>(p->get_value());
4895  if (str_literal == nullptr) {
4896  throw std::runtime_error("Line_delimiter option must be a string.");
4897  } else if (str_literal->get_stringval()->length() != 1) {
4898  throw std::runtime_error("Line_delimiter must be a single character string.");
4899  }
4900  copy_params.line_delim = (*str_literal->get_stringval())[0];
4901  } else if (boost::iequals(*p->get_name(), "quoted")) {
4902  const StringLiteral* str_literal =
4903  dynamic_cast<const StringLiteral*>(p->get_value());
4904  if (str_literal == nullptr) {
4905  throw std::runtime_error("Quoted option must be a boolean.");
4906  }
4907  copy_params.quoted = bool_from_string_literal(str_literal);
4908  } else if (boost::iequals(*p->get_name(), "file_type")) {
4909  const StringLiteral* str_literal =
4910  dynamic_cast<const StringLiteral*>(p->get_value());
4911  if (str_literal == nullptr) {
4912  throw std::runtime_error("File Type option must be a string.");
4913  }
4914  auto file_type_str =
4915  boost::algorithm::to_lower_copy(*str_literal->get_stringval());
4916  if (file_type_str == "csv") {
4918  } else if (file_type_str == "geojson") {
4920  } else if (file_type_str == "geojsonl") {
4922  } else if (file_type_str == "shapefile") {
4924  } else {
4925  throw std::runtime_error(
4926  "File Type option must be 'CSV', 'GeoJSON', 'GeoJSONL' or 'Shapefile'");
4927  }
4928  } else if (boost::iequals(*p->get_name(), "layer_name")) {
4929  const StringLiteral* str_literal =
4930  dynamic_cast<const StringLiteral*>(p->get_value());
4931  if (str_literal == nullptr) {
4932  throw std::runtime_error("Layer Name option must be a string.");
4933  }
4934  layer_name = *str_literal->get_stringval();
4935  } else if (boost::iequals(*p->get_name(), "file_compression")) {
4936  const StringLiteral* str_literal =
4937  dynamic_cast<const StringLiteral*>(p->get_value());
4938  if (str_literal == nullptr) {
4939  throw std::runtime_error("File Compression option must be a string.");
4940  }
4941  auto file_compression_str =
4942  boost::algorithm::to_lower_copy(*str_literal->get_stringval());
4943  if (file_compression_str == "none") {
4945  } else if (file_compression_str == "gzip") {
4947  } else if (file_compression_str == "zip") {
4949  } else {
4950  throw std::runtime_error(
4951  "File Compression option must be 'None', 'GZip', or 'Zip'");
4952  }
4953  } else if (boost::iequals(*p->get_name(), "array_null_handling")) {
4954  const StringLiteral* str_literal =
4955  dynamic_cast<const StringLiteral*>(p->get_value());
4956  if (str_literal == nullptr) {
4957  throw std::runtime_error("Array Null Handling option must be a string.");
4958  }
4959  auto array_null_handling_str =
4960  boost::algorithm::to_lower_copy(*str_literal->get_stringval());
4961  if (array_null_handling_str == "abort") {
4962  array_null_handling =
4964  } else if (array_null_handling_str == "raw") {
4965  array_null_handling =
4967  } else if (array_null_handling_str == "zero") {
4968  array_null_handling =
4970  } else if (array_null_handling_str == "nullfield") {
4971  array_null_handling =
4973  } else {
4974  throw std::runtime_error(
4975  "Array Null Handling option must be 'Abort', 'Raw', 'Zero', or "
4976  "'NullField'");
4977  }
4978  } else {
4979  throw std::runtime_error("Invalid option for COPY: " + *p->get_name());
4980  }
4981  }
4982  }
4983 }
4984 
4985 CreateViewStmt::CreateViewStmt(const rapidjson::Value& payload) {
4986  CHECK(payload.HasMember("name"));
4987  view_name_ = json_str(payload["name"]);
4988 
4989  if_not_exists_ = false;
4990  if (payload.HasMember("ifNotExists")) {
4991  if_not_exists_ = json_bool(payload["ifNotExists"]);
4992  }
4993 
4994  CHECK(payload.HasMember("query"));
4995  select_query_ = json_str(payload["query"]);
4996  std::regex newline_re("\\n");
4997  select_query_ = std::regex_replace(select_query_, newline_re, " ");
4998  // ensure a trailing semicolon is present on the select query
4999  if (select_query_.back() != ';') {
5000  select_query_.push_back(';');
5001  }
5002 }
5003 
5004 void CreateViewStmt::execute(const Catalog_Namespace::SessionInfo& session) {
5005  auto session_copy = session;
5006  auto session_ptr = std::shared_ptr<Catalog_Namespace::SessionInfo>(
5007  &session_copy, boost::null_deleter());
5008  auto query_state = query_state::QueryState::create(session_ptr, select_query_);
5009  auto stdlog = STDLOG(query_state);
5010  auto& catalog = session.getCatalog();
5011 
5012  if (!catalog.validateNonExistentTableOrView(view_name_, if_not_exists_)) {
5013  return;
5014  }
5017  throw std::runtime_error("View " + view_name_ +
5018  " will not be created. User has no create view privileges.");
5019  }
5020 
5021  const auto query_after_shim = pg_shim(select_query_);
5022 
5023  // this now also ensures that access permissions are checked
5024  catalog.getCalciteMgr()->process(query_state->createQueryStateProxy(),
5025  query_after_shim,
5026  {},
5027  true,
5028  false,
5029  false,
5030  true);
5031 
5032  // Take write lock after the query is processed to ensure no deadlocks
5033  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
5036 
5037  TableDescriptor td;
5038  td.tableName = view_name_;
5039  td.userId = session.get_currentUser().userId;
5040  td.nColumns = 0;
5041  td.isView = true;
5042  td.viewSQL = query_after_shim;
5043  td.fragmenter = nullptr;
5045  td.maxFragRows =
5046  DEFAULT_FRAGMENT_ROWS; // @todo this stuff should not be InsertOrderFragmenter
5047  td.maxChunkSize =
5048  DEFAULT_MAX_CHUNK_SIZE; // @todo this stuff should not be InsertOrderFragmenter
5049  td.fragPageSize = DEFAULT_PAGE_SIZE;
5050  td.maxRows = DEFAULT_MAX_ROWS;
5051  catalog.createTable(td, {}, {}, true);
5052 
5053  // TODO (max): It's transactionally unsafe, should be fixed: we may create object w/o
5054  // privileges
5055  SysCatalog::instance().createDBObject(
5056  session.get_currentUser(), view_name_, ViewDBObjectType, catalog);
5057 }
5058 
5059 DropViewStmt::DropViewStmt(const rapidjson::Value& payload) {
5060  CHECK(payload.HasMember("viewName"));
5061  view_name = std::make_unique<std::string>(json_str(payload["viewName"]));
5062 
5063  if_exists = false;
5064  if (payload.HasMember("ifExists")) {
5065  if_exists = json_bool(payload["ifExists"]);
5066  }
5067 }
5068 
5069 void DropViewStmt::execute(const Catalog_Namespace::SessionInfo& session) {
5070  auto& catalog = session.getCatalog();
5071 
5072  const TableDescriptor* td{nullptr};
5073  std::unique_ptr<lockmgr::TableSchemaLockContainer<lockmgr::WriteLock>> td_with_lock;
5074 
5075  try {
5076  td_with_lock =
5077  std::make_unique<lockmgr::TableSchemaLockContainer<lockmgr::WriteLock>>(
5079  catalog, *view_name, false));
5080  td = (*td_with_lock)();
5081  } catch (const std::runtime_error& e) {
5082  if (if_exists) {
5083  return;
5084  } else {
5085  throw e;
5086  }
5087  }
5088 
5089  CHECK(td);
5090  CHECK(td_with_lock);
5091 
5092  if (!session.checkDBAccessPrivileges(
5094  throw std::runtime_error("View " + *view_name +
5095  " will not be dropped. User has no drop view privileges.");
5096  }
5097 
5099  catalog.dropTable(td);
5100 }
5101 
5102 static void checkStringLiteral(const std::string& option_name,
5103  const std::unique_ptr<NameValueAssign>& p) {
5104  CHECK(p);
5105  if (!dynamic_cast<const StringLiteral*>(p->get_value())) {
5106  throw std::runtime_error(option_name + " option must be a string literal.");
5107  }
5108 }
5109 
5110 void CreateDBStmt::execute(const Catalog_Namespace::SessionInfo& session) {
5111  if (!session.get_currentUser().isSuper) {
5112  throw std::runtime_error(
5113  "CREATE DATABASE command can only be executed by super user.");
5114  }
5115 
5116  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
5119 
5121  if (SysCatalog::instance().getMetadataForDB(*db_name, db_meta) && if_not_exists_) {
5122  return;
5123  }
5124  int ownerId = session.get_currentUser().userId;
5125  if (!name_value_list.empty()) {
5126  for (auto& p : name_value_list) {
5127  if (boost::iequals(*p->get_name(), "owner")) {
5128  checkStringLiteral("Owner name", p);
5129  const std::string* str =
5130  static_cast<const StringLiteral*>(p->get_value())->get_stringval();
5132  if (!SysCatalog::instance().getMetadataForUser(*str, user)) {
5133  throw std::runtime_error("User " + *str + " does not exist.");
5134  }
5135  ownerId = user.userId;
5136  } else {
5137  throw std::runtime_error("Invalid CREATE DATABASE option " + *p->get_name() +
5138  ". Only OWNER supported.");
5139  }
5140  }
5141  }
5142  SysCatalog::instance().createDatabase(*db_name, ownerId);
5143 }
5144 
5145 void DropDBStmt::execute(const Catalog_Namespace::SessionInfo& session) {
5146  if (!session.get_currentUser().isSuper) {
5147  throw std::runtime_error("DROP DATABASE command can only be executed by super user.");
5148  }
5149 
5150  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
5153 
5155  if (!SysCatalog::instance().getMetadataForDB(*db_name, db)) {
5156  if (if_exists_) {
5157  return;
5158  }
5159  throw std::runtime_error("Database " + *db_name + " does not exist.");
5160  }
5161 
5162  if (!session.get_currentUser().isSuper &&
5163  session.get_currentUser().userId != db.dbOwner) {
5164  throw std::runtime_error("Only the super user or the owner can drop database.");
5165  }
5166 
5167  SysCatalog::instance().dropDatabase(db);
5168 }
5169 
5170 static bool readBooleanLiteral(const std::string& option_name,
5171  const std::unique_ptr<NameValueAssign>& p) {
5172  CHECK(p);
5173  const std::string* str =
5174  static_cast<const StringLiteral*>(p->get_value())->get_stringval();
5175  if (boost::iequals(*str, "true")) {
5176  return true;
5177  } else if (boost::iequals(*str, "false")) {
5178  return false;
5179  } else {
5180  throw std::runtime_error("Value to " + option_name + " must be TRUE or FALSE.");
5181  }
5182 }
5183 
5184 void CreateUserStmt::execute(const Catalog_Namespace::SessionInfo& session) {
5185  std::string passwd;
5186  bool is_super = false;
5187  std::string default_db;
5188  bool can_login = true;
5189  for (auto& p : name_value_list) {
5190  if (boost::iequals(*p->get_name(), "password")) {
5191  checkStringLiteral("Password", p);
5192  passwd = *static_cast<const StringLiteral*>(p->get_value())->get_stringval();
5193  } else if (boost::iequals(*p->get_name(), "is_super")) {
5194  checkStringLiteral("IS_SUPER", p);
5195  is_super = readBooleanLiteral("IS_SUPER", p);
5196  } else if (boost::iequals(*p->get_name(), "default_db")) {
5197  checkStringLiteral("DEFAULT_DB", p);
5198  default_db = *static_cast<const StringLiteral*>(p->get_value())->get_stringval();
5199  } else if (boost::iequals(*p->get_name(), "can_login")) {
5200  checkStringLiteral("CAN_LOGIN", p);
5201  can_login = readBooleanLiteral("can_login", p);
5202  } else {
5203  throw std::runtime_error("Invalid CREATE USER option " + *p->get_name() +
5204  ". Should be PASSWORD, IS_SUPER, CAN_LOGIN"
5205  " or DEFAULT_DB.");
5206  }
5207  }
5208  if (!session.get_currentUser().isSuper) {
5209  throw std::runtime_error("Only super user can create new users.");
5210  }
5211  SysCatalog::instance().createUser(*user_name, passwd, is_super, default_db, can_login);
5212 }
5213 
5214 void AlterUserStmt::execute(const Catalog_Namespace::SessionInfo& session) {
5215  // Parse the statement
5216  const std::string* passwd = nullptr;
5217  bool is_super = false;
5218  bool* is_superp = nullptr;
5219  const std::string* default_db = nullptr;
5220  bool can_login = true;
5221  bool* can_loginp = nullptr;
5222  for (auto& p : name_value_list) {
5223  if (boost::iequals(*p->get_name(), "password")) {
5224  checkStringLiteral("Password", p);
5225  passwd = static_cast<const StringLiteral*>(p->get_value())->get_stringval();
5226  } else if (boost::iequals(*p->get_name(), "is_super")) {
5227  checkStringLiteral("IS_SUPER", p);
5228  is_super = readBooleanLiteral("IS_SUPER", p);
5229  is_superp = &is_super;
5230  } else if (boost::iequals(*p->get_name(), "default_db")) {
5231  if (dynamic_cast<const StringLiteral*>(p->get_value())) {
5232  default_db = static_cast<const StringLiteral*>(p->get_value())->get_stringval();
5233  } else if (dynamic_cast<const NullLiteral*>(p->get_value())) {
5234  static std::string blank;
5235  default_db = &blank;
5236  } else {
5237  throw std::runtime_error(
5238  "DEFAULT_DB option must be either a string literal or a NULL literal.");
5239  }
5240  } else if (boost::iequals(*p->get_name(), "can_login")) {
5241  checkStringLiteral("CAN_LOGIN", p);
5242  can_login = readBooleanLiteral("CAN_LOGIN", p);
5243  can_loginp = &can_login;
5244  } else {
5245  throw std::runtime_error("Invalid ALTER USER option " + *p->get_name() +
5246  ". Should be PASSWORD, DEFAULT_DB, CAN_LOGIN"
5247  " or IS_SUPER.");
5248  }
5249  }
5250 
5251  // Check if the user is authorized to execute ALTER USER statement
5253  if (!SysCatalog::instance().getMetadataForUser(*user_name, user)) {
5254  throw std::runtime_error("User " + *user_name + " does not exist.");
5255  }
5256  if (!session.get_currentUser().isSuper) {
5257  if (session.get_currentUser().userId != user.userId) {
5258  throw std::runtime_error("Only super user can change another user's attributes.");
5259  } else if (is_superp || can_loginp) {
5260  throw std::runtime_error(
5261  "A user can only update their own password or default database.");
5262  }
5263  }
5264 
5265  if (passwd || is_superp || default_db || can_loginp) {
5266  SysCatalog::instance().alterUser(
5267  user.userId, passwd, is_superp, default_db, can_loginp);
5268  }
5269 }
5270 
5271 void DropUserStmt::execute(const Catalog_Namespace::SessionInfo& session) {
5272  if (!session.get_currentUser().isSuper) {
5273  throw std::runtime_error("Only super user can drop users.");
5274  }
5275  SysCatalog::instance().dropUser(*user_name);
5276 }
5277 
5278 void DumpTableStmt::execute(const Catalog_Namespace::SessionInfo& session) {
5279  // check access privileges
5280  if (!session.checkDBAccessPrivileges(
5282  throw std::runtime_error("Table " + *table +
5283  " will not be dumped. User has no select privileges.");
5284  }
5287  throw std::runtime_error("Table " + *table +
5288  " will not be dumped. User has no create privileges.");
5289  }
5290  auto& catalog = session.getCatalog();
5291  const TableDescriptor* td = catalog.getMetadataForTable(*table);
5292  TableArchiver table_archiver(&catalog);
5293  table_archiver.dumpTable(td, *path, compression);
5294 }
5295 
5296 void RestoreTableStmt::execute(const Catalog_Namespace::SessionInfo& session) {
5297  auto& catalog = session.getCatalog();
5298  const TableDescriptor* td = catalog.getMetadataForTable(*table, false);
5299  if (td) {
5300  // TODO: v1.0 simply throws to avoid accidentally overwrite target table.
5301  // Will add a REPLACE TABLE to explictly replace target table.
5302  // catalog.restoreTable(session, td, *path, compression);
5303  throw std::runtime_error("Table " + *table + " exists.");
5304  } else {
5305  // check access privileges
5308  throw std::runtime_error("Table " + *table +
5309  " will not be restored. User has no create privileges.");
5310  }
5311  TableArchiver table_archiver(&catalog);
5312  table_archiver.restoreTable(session, *table, *path, compression);
5313  }
5314 }
5315 
5317  const std::string& ddl_statement,
5318  std::shared_ptr<Catalog_Namespace::SessionInfo const> session_ptr) {
5319  CHECK(!ddl_statement.empty());
5320  VLOG(2) << "Parsing JSON DDL from Calcite: " << ddl_statement;
5321  rapidjson::Document ddl_query;
5322  ddl_query.Parse(ddl_statement);
5323  CHECK(ddl_query.IsObject());
5324  CHECK(ddl_query.HasMember("payload"));
5325  CHECK(ddl_query["payload"].IsObject());
5326  const auto& payload = ddl_query["payload"].GetObject();
5327  CHECK(payload.HasMember("command"));
5328  CHECK(payload["command"].IsString());
5329 
5330  const auto& ddl_command = std::string_view(payload["command"].GetString());
5331  if (ddl_command == "CREATE_TABLE") {
5332  auto create_table_stmt = Parser::CreateTableStmt(payload);
5333  create_table_stmt.execute(*session_ptr);
5334  } else if (ddl_command == "DROP_TABLE") {
5335  auto drop_table_stmt = Parser::DropTableStmt(payload);
5336  drop_table_stmt.execute(*session_ptr);
5337  } else if (ddl_command == "CREATE_VIEW") {
5338  auto create_view_stmt = Parser::CreateViewStmt(payload);
5339  create_view_stmt.execute(*session_ptr);
5340  } else if (ddl_command == "DROP_VIEW") {
5341  auto drop_view_stmt = Parser::DropViewStmt(payload);
5342  drop_view_stmt.execute(*session_ptr);
5343  } else {
5344  throw std::runtime_error("Unsupported DDL command");
5345  }
5346 }
5347 
5348 } // namespace Parser
int8_t tinyintval
Definition: sqltypes.h:203
SQLTypes to_sql_type(const std::string &type_name)
const ColumnConstraintDef * get_column_constraint() const
Definition: ParserNode.h:800
decltype(auto) get_max_rows_def(TableDescriptor &td, const NameValueAssign *p, const std::list< ColumnDescriptor > &columns)
int32_t maxRollbackEpochs
void execute_calcite_ddl(const std::string &ddl_statement, std::shared_ptr< Catalog_Namespace::SessionInfo const > session_ptr)
void validate_non_foreign_table_write(const TableDescriptor *table_descriptor)
Definition: FsiUtils.h:22
static const AccessPrivileges VIEW_SQL_EDITOR
Definition: DBObject.h:153
decltype(auto) get_max_chunk_size_def(TableDescriptor &td, const NameValueAssign *p, const std::list< ColumnDescriptor > &columns)
std::string to_lower(const std::string &str)
static const std::map< const std::string, const TableDefFuncPtr > tableDefFuncMap
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:312
void set_compression(EncodingType c)
Definition: sqltypes.h:411
SQLAgg
Definition: sqldefs.h:71
#define CHECK_EQ(x, y)
Definition: Logger.h:205
std::string s3_secret_key
Definition: CopyParams.h:63
size_t shard_column_index(const std::string &name, const std::list< ColumnDescriptor > &columns)
const std::string & get_column() const
Definition: ParserNode.h:921
std::string partitions
std::vector< int > ChunkKey
Definition: types.h:37
std::vector< std::unique_ptr< lockmgr::AbstractLockContainer< const TableDescriptor * >>> LockedTableDescriptors
Definition: LockMgr.h:270
static const AccessPrivileges VIEW_DASHBOARD
Definition: DBObject.h:172
static const int32_t DROP_VIEW
Definition: DBObject.h:115
#define NULL_DOUBLE
HOST DEVICE int get_size() const
Definition: sqltypes.h:321
SQLType * get_column_type() const
Definition: ParserNode.h:798
static const AccessPrivileges DROP_SERVER
Definition: DBObject.h:190
std::string cat(Ts &&...args)
static const int32_t SELECT_FROM_VIEW
Definition: DBObject.h:116
Definition: Analyzer.h:1567
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:101
SQLTypes
Definition: sqltypes.h:37
static void checkStringLiteral(const std::string &option_name, const std::unique_ptr< NameValueAssign > &p)
static const int32_t UPDATE_IN_VIEW
Definition: DBObject.h:118
std::string tableName
const std::vector< TargetMetaInfo > targets_meta