OmniSciDB  fe05a0c208
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
ParserNode.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
25 #include "ParserNode.h"
26 
27 #include <boost/algorithm/string.hpp>
28 #include <boost/core/null_deleter.hpp>
29 #include <boost/filesystem.hpp>
30 #include <boost/function.hpp>
31 
32 #include <rapidjson/document.h>
33 #include <rapidjson/stringbuffer.h>
34 #include <rapidjson/writer.h>
35 
36 #include <cassert>
37 #include <cmath>
38 #include <limits>
39 #include <random>
40 #include <regex>
41 #include <stdexcept>
42 #include <string>
43 #include <type_traits>
44 #include <typeinfo>
45 
47 #include "Catalog/Catalog.h"
53 #include "Geospatial/Compression.h"
54 #include "Geospatial/Types.h"
55 #include "ImportExport/Importer.h"
56 #include "LockMgr/LockMgr.h"
60 #include "QueryEngine/Execute.h"
64 #include "ReservedKeywords.h"
65 #include "Shared/StringTransform.h"
66 #include "Shared/measure.h"
67 #include "Shared/shard_key.h"
69 #include "Utils/FsiUtils.h"
70 
71 #include "gen-cpp/CalciteServer.h"
72 #include "parser.h"
73 
75 
76 size_t g_leaf_count{0};
79 extern bool g_enable_fsi;
80 
82 using namespace std::string_literals;
83 
84 using TableDefFuncPtr = boost::function<void(TableDescriptor&,
86  const std::list<ColumnDescriptor>& columns)>;
87 
88 using DataframeDefFuncPtr =
89  boost::function<void(DataframeTableDescriptor&,
91  const std::list<ColumnDescriptor>& columns)>;
92 
93 namespace Parser {
94 bool checkInterrupt(const QuerySessionId& query_session, Executor* executor) {
96  mapd_shared_lock<mapd_shared_mutex> session_read_lock(executor->getSessionLock());
97  const auto isInterrupted =
98  executor->checkIsQuerySessionInterrupted(query_session, session_read_lock);
99  return isInterrupted;
100  }
101  return false;
102 }
103 
104 std::shared_ptr<Analyzer::Expr> NullLiteral::analyze(
105  const Catalog_Namespace::Catalog& catalog,
106  Analyzer::Query& query,
107  TlistRefType allow_tlist_ref) const {
108  return makeExpr<Analyzer::Constant>(kNULLT, true);
109 }
110 
111 std::shared_ptr<Analyzer::Expr> StringLiteral::analyze(
112  const Catalog_Namespace::Catalog& catalog,
113  Analyzer::Query& query,
114  TlistRefType allow_tlist_ref) const {
115  return analyzeValue(*stringval);
116 }
117 
118 std::shared_ptr<Analyzer::Expr> StringLiteral::analyzeValue(
119  const std::string& stringval) {
120  SQLTypeInfo ti(kVARCHAR, stringval.length(), 0, true);
121  Datum d;
122  d.stringval = new std::string(stringval);
123  return makeExpr<Analyzer::Constant>(ti, false, d);
124 }
125 
126 std::shared_ptr<Analyzer::Expr> IntLiteral::analyze(
127  const Catalog_Namespace::Catalog& catalog,
128  Analyzer::Query& query,
129  TlistRefType allow_tlist_ref) const {
130  return analyzeValue(intval);
131 }
132 
133 std::shared_ptr<Analyzer::Expr> IntLiteral::analyzeValue(const int64_t intval) {
134  SQLTypes t;
135  Datum d;
136  if (intval >= INT16_MIN && intval <= INT16_MAX) {
137  t = kSMALLINT;
138  d.smallintval = (int16_t)intval;
139  } else if (intval >= INT32_MIN && intval <= INT32_MAX) {
140  t = kINT;
141  d.intval = (int32_t)intval;
142  } else {
143  t = kBIGINT;
144  d.bigintval = intval;
145  }
146  return makeExpr<Analyzer::Constant>(t, false, d);
147 }
148 
149 std::shared_ptr<Analyzer::Expr> FixedPtLiteral::analyze(
150  const Catalog_Namespace::Catalog& catalog,
151  Analyzer::Query& query,
152  TlistRefType allow_tlist_ref) const {
153  SQLTypeInfo ti(kNUMERIC, 0, 0, false);
154  Datum d = StringToDatum(*fixedptval, ti);
155  return makeExpr<Analyzer::Constant>(ti, false, d);
156 }
157 
158 std::shared_ptr<Analyzer::Expr> FixedPtLiteral::analyzeValue(const int64_t numericval,
159  const int scale,
160  const int precision) {
161  SQLTypeInfo ti(kNUMERIC, 0, 0, false);
162  ti.set_scale(scale);
163  ti.set_precision(precision);
164  Datum d;
165  d.bigintval = numericval;
166  return makeExpr<Analyzer::Constant>(ti, false, d);
167 }
168 
169 std::shared_ptr<Analyzer::Expr> FloatLiteral::analyze(
170  const Catalog_Namespace::Catalog& catalog,
171  Analyzer::Query& query,
172  TlistRefType allow_tlist_ref) const {
173  Datum d;
174  d.floatval = floatval;
175  return makeExpr<Analyzer::Constant>(kFLOAT, false, d);
176 }
177 
178 std::shared_ptr<Analyzer::Expr> DoubleLiteral::analyze(
179  const Catalog_Namespace::Catalog& catalog,
180  Analyzer::Query& query,
181  TlistRefType allow_tlist_ref) const {
182  Datum d;
183  d.doubleval = doubleval;
184  return makeExpr<Analyzer::Constant>(kDOUBLE, false, d);
185 }
186 
187 std::shared_ptr<Analyzer::Expr> TimestampLiteral::analyze(
188  const Catalog_Namespace::Catalog& catalog,
189  Analyzer::Query& query,
190  TlistRefType allow_tlist_ref) const {
191  return get(timestampval_);
192 }
193 
194 std::shared_ptr<Analyzer::Expr> TimestampLiteral::get(const int64_t timestampval) {
195  Datum d;
196  d.bigintval = timestampval;
197  return makeExpr<Analyzer::Constant>(kTIMESTAMP, false, d);
198 }
199 
200 std::shared_ptr<Analyzer::Expr> UserLiteral::analyze(
201  const Catalog_Namespace::Catalog& catalog,
202  Analyzer::Query& query,
203  TlistRefType allow_tlist_ref) const {
204  Datum d;
205  return makeExpr<Analyzer::Constant>(kTEXT, false, d);
206 }
207 
208 std::shared_ptr<Analyzer::Expr> UserLiteral::get(const std::string& user) {
209  Datum d;
210  d.stringval = new std::string(user);
211  return makeExpr<Analyzer::Constant>(kTEXT, false, d);
212 }
213 
214 std::shared_ptr<Analyzer::Expr> ArrayLiteral::analyze(
215  const Catalog_Namespace::Catalog& catalog,
216  Analyzer::Query& query,
217  TlistRefType allow_tlist_ref) const {
218  SQLTypeInfo ti = SQLTypeInfo(kARRAY, true);
219  bool set_subtype = true;
220  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
221  for (auto& p : value_list) {
222  auto e = p->analyze(catalog, query, allow_tlist_ref);
223  CHECK(e);
224  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(e);
225  if (c != nullptr && c->get_is_null()) {
226  value_exprs.push_back(c);
227  continue;
228  }
229  auto subtype = e->get_type_info().get_type();
230  if (subtype == kNULLT) {
231  // NULL element
232  } else if (set_subtype) {
233  ti.set_subtype(subtype);
234  set_subtype = false;
235  }
236  value_exprs.push_back(e);
237  }
238  std::shared_ptr<Analyzer::Expr> result =
239  makeExpr<Analyzer::Constant>(ti, false, value_exprs);
240  return result;
241 }
242 
243 std::string ArrayLiteral::to_string() const {
244  std::string str = "{";
245  bool notfirst = false;
246  for (auto& p : value_list) {
247  if (notfirst) {
248  str += ", ";
249  } else {
250  notfirst = true;
251  }
252  str += p->to_string();
253  }
254  str += "}";
255  return str;
256 }
257 
258 std::shared_ptr<Analyzer::Expr> OperExpr::analyze(
259  const Catalog_Namespace::Catalog& catalog,
260  Analyzer::Query& query,
261  TlistRefType allow_tlist_ref) const {
262  auto left_expr = left->analyze(catalog, query, allow_tlist_ref);
263  const auto& left_type = left_expr->get_type_info();
264  if (right == nullptr) {
265  return makeExpr<Analyzer::UOper>(
266  left_type, left_expr->get_contains_agg(), optype, left_expr->decompress());
267  }
268  if (optype == kARRAY_AT) {
269  if (left_type.get_type() != kARRAY) {
270  throw std::runtime_error(left->to_string() + " is not of array type.");
271  }
272  auto right_expr = right->analyze(catalog, query, allow_tlist_ref);
273  const auto& right_type = right_expr->get_type_info();
274  if (!right_type.is_integer()) {
275  throw std::runtime_error(right->to_string() + " is not of integer type.");
276  }
277  return makeExpr<Analyzer::BinOper>(
278  left_type.get_elem_type(), false, kARRAY_AT, kONE, left_expr, right_expr);
279  }
280  auto right_expr = right->analyze(catalog, query, allow_tlist_ref);
281  return normalize(optype, opqualifier, left_expr, right_expr);
282 }
283 
284 std::shared_ptr<Analyzer::Expr> OperExpr::normalize(
285  const SQLOps optype,
286  const SQLQualifier qual,
287  std::shared_ptr<Analyzer::Expr> left_expr,
288  std::shared_ptr<Analyzer::Expr> right_expr) {
289  if (left_expr->get_type_info().is_date_in_days() ||
290  right_expr->get_type_info().is_date_in_days()) {
291  // Do not propogate encoding
292  left_expr = left_expr->decompress();
293  right_expr = right_expr->decompress();
294  }
295  const auto& left_type = left_expr->get_type_info();
296  auto right_type = right_expr->get_type_info();
297  if (qual != kONE) {
298  // subquery not supported yet.
299  CHECK(!std::dynamic_pointer_cast<Analyzer::Subquery>(right_expr));
300  if (right_type.get_type() != kARRAY) {
301  throw std::runtime_error(
302  "Existential or universal qualifiers can only be used in front of a subquery "
303  "or an "
304  "expression of array type.");
305  }
306  right_type = right_type.get_elem_type();
307  }
308  SQLTypeInfo new_left_type;
309  SQLTypeInfo new_right_type;
310  auto result_type = Analyzer::BinOper::analyze_type_info(
311  optype, left_type, right_type, &new_left_type, &new_right_type);
312  if (result_type.is_timeinterval()) {
313  return makeExpr<Analyzer::BinOper>(
314  result_type, false, optype, qual, left_expr, right_expr);
315  }
316  if (left_type != new_left_type) {
317  left_expr = left_expr->add_cast(new_left_type);
318  }
319  if (right_type != new_right_type) {
320  if (qual == kONE) {
321  right_expr = right_expr->add_cast(new_right_type);
322  } else {
323  right_expr = right_expr->add_cast(new_right_type.get_array_type());
324  }
325  }
326 
327  if (IS_COMPARISON(optype)) {
328  if (optype != kOVERLAPS && new_left_type.is_geometry() &&
329  new_right_type.is_geometry()) {
330  throw std::runtime_error(
331  "Comparison operators are not yet supported for geospatial types.");
332  }
333 
334  if (new_left_type.get_compression() == kENCODING_DICT &&
335  new_right_type.get_compression() == kENCODING_DICT &&
336  new_left_type.get_comp_param() == new_right_type.get_comp_param()) {
337  // do nothing
338  } else if (new_left_type.get_compression() == kENCODING_DICT &&
339  new_right_type.get_compression() == kENCODING_NONE) {
340  SQLTypeInfo ti(new_right_type);
341  ti.set_compression(new_left_type.get_compression());
342  ti.set_comp_param(new_left_type.get_comp_param());
343  ti.set_fixed_size();
344  right_expr = right_expr->add_cast(ti);
345  } else if (new_right_type.get_compression() == kENCODING_DICT &&
346  new_left_type.get_compression() == kENCODING_NONE) {
347  SQLTypeInfo ti(new_left_type);
348  ti.set_compression(new_right_type.get_compression());
349  ti.set_comp_param(new_right_type.get_comp_param());
350  ti.set_fixed_size();
351  left_expr = left_expr->add_cast(ti);
352  } else {
353  left_expr = left_expr->decompress();
354  right_expr = right_expr->decompress();
355  }
356  } else {
357  left_expr = left_expr->decompress();
358  right_expr = right_expr->decompress();
359  }
360  bool has_agg = (left_expr->get_contains_agg() || right_expr->get_contains_agg());
361  return makeExpr<Analyzer::BinOper>(
362  result_type, has_agg, optype, qual, left_expr, right_expr);
363 }
364 
365 std::shared_ptr<Analyzer::Expr> SubqueryExpr::analyze(
366  const Catalog_Namespace::Catalog& catalog,
367  Analyzer::Query& query,
368  TlistRefType allow_tlist_ref) const {
369  throw std::runtime_error("Subqueries are not supported yet.");
370  return nullptr;
371 }
372 
373 std::shared_ptr<Analyzer::Expr> IsNullExpr::analyze(
374  const Catalog_Namespace::Catalog& catalog,
375  Analyzer::Query& query,
376  TlistRefType allow_tlist_ref) const {
377  auto arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
378  auto result = makeExpr<Analyzer::UOper>(kBOOLEAN, kISNULL, arg_expr);
379  if (is_not) {
380  result = makeExpr<Analyzer::UOper>(kBOOLEAN, kNOT, result);
381  }
382  return result;
383 }
384 
385 std::shared_ptr<Analyzer::Expr> InSubquery::analyze(
386  const Catalog_Namespace::Catalog& catalog,
387  Analyzer::Query& query,
388  TlistRefType allow_tlist_ref) const {
389  throw std::runtime_error("Subqueries are not supported yet.");
390  return nullptr;
391 }
392 
393 std::shared_ptr<Analyzer::Expr> InValues::analyze(
394  const Catalog_Namespace::Catalog& catalog,
395  Analyzer::Query& query,
396  TlistRefType allow_tlist_ref) const {
397  auto arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
398  SQLTypeInfo ti = arg_expr->get_type_info();
399  bool dict_comp = ti.get_compression() == kENCODING_DICT;
400  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
401  for (auto& p : value_list) {
402  auto e = p->analyze(catalog, query, allow_tlist_ref);
403  if (ti != e->get_type_info()) {
404  if (ti.is_string() && e->get_type_info().is_string()) {
405  ti = Analyzer::BinOper::common_string_type(ti, e->get_type_info());
406  } else if (ti.is_number() && e->get_type_info().is_number()) {
407  ti = Analyzer::BinOper::common_numeric_type(ti, e->get_type_info());
408  } else {
409  throw std::runtime_error("IN expressions must contain compatible types.");
410  }
411  }
412  if (dict_comp) {
413  value_exprs.push_back(e->add_cast(arg_expr->get_type_info()));
414  } else {
415  value_exprs.push_back(e);
416  }
417  }
418  if (!dict_comp) {
419  arg_expr = arg_expr->decompress();
420  arg_expr = arg_expr->add_cast(ti);
421  std::list<std::shared_ptr<Analyzer::Expr>> cast_vals;
422  for (auto p : value_exprs) {
423  cast_vals.push_back(p->add_cast(ti));
424  }
425  value_exprs.swap(cast_vals);
426  }
427  std::shared_ptr<Analyzer::Expr> result =
428  makeExpr<Analyzer::InValues>(arg_expr, value_exprs);
429  if (is_not) {
430  result = makeExpr<Analyzer::UOper>(kBOOLEAN, kNOT, result);
431  }
432  return result;
433 }
434 
435 std::shared_ptr<Analyzer::Expr> BetweenExpr::analyze(
436  const Catalog_Namespace::Catalog& catalog,
437  Analyzer::Query& query,
438  TlistRefType allow_tlist_ref) const {
439  auto arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
440  auto lower_expr = lower->analyze(catalog, query, allow_tlist_ref);
441  auto upper_expr = upper->analyze(catalog, query, allow_tlist_ref);
442  SQLTypeInfo new_left_type, new_right_type;
444  arg_expr->get_type_info(),
445  lower_expr->get_type_info(),
446  &new_left_type,
447  &new_right_type);
448  auto lower_pred =
449  makeExpr<Analyzer::BinOper>(kBOOLEAN,
450  kGE,
451  kONE,
452  arg_expr->add_cast(new_left_type)->decompress(),
453  lower_expr->add_cast(new_right_type)->decompress());
455  arg_expr->get_type_info(),
456  lower_expr->get_type_info(),
457  &new_left_type,
458  &new_right_type);
459  auto upper_pred = makeExpr<Analyzer::BinOper>(
460  kBOOLEAN,
461  kLE,
462  kONE,
463  arg_expr->deep_copy()->add_cast(new_left_type)->decompress(),
464  upper_expr->add_cast(new_right_type)->decompress());
465  std::shared_ptr<Analyzer::Expr> result =
466  makeExpr<Analyzer::BinOper>(kBOOLEAN, kAND, kONE, lower_pred, upper_pred);
467  if (is_not) {
468  result = makeExpr<Analyzer::UOper>(kBOOLEAN, kNOT, result);
469  }
470  return result;
471 }
472 
473 std::shared_ptr<Analyzer::Expr> CharLengthExpr::analyze(
474  const Catalog_Namespace::Catalog& catalog,
475  Analyzer::Query& query,
476  TlistRefType allow_tlist_ref) const {
477  auto arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
478  if (!arg_expr->get_type_info().is_string()) {
479  throw std::runtime_error(
480  "expression in char_length clause must be of a string type.");
481  }
482  std::shared_ptr<Analyzer::Expr> result =
483  makeExpr<Analyzer::CharLengthExpr>(arg_expr->decompress(), calc_encoded_length);
484  return result;
485 }
486 
487 std::shared_ptr<Analyzer::Expr> CardinalityExpr::analyze(
488  const Catalog_Namespace::Catalog& catalog,
489  Analyzer::Query& query,
490  TlistRefType allow_tlist_ref) const {
491  auto arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
492  if (!arg_expr->get_type_info().is_array()) {
493  throw std::runtime_error(
494  "expression in cardinality clause must be of an array type.");
495  }
496  std::shared_ptr<Analyzer::Expr> result =
497  makeExpr<Analyzer::CardinalityExpr>(arg_expr->decompress());
498  return result;
499 }
500 
501 void LikeExpr::check_like_expr(const std::string& like_str, char escape_char) {
502  if (like_str.back() == escape_char) {
503  throw std::runtime_error("LIKE pattern must not end with escape character.");
504  }
505 }
506 
507 bool LikeExpr::test_is_simple_expr(const std::string& like_str, char escape_char) {
508  // if not bounded by '%' then not a simple string
509  if (like_str.size() < 2 || like_str[0] != '%' || like_str[like_str.size() - 1] != '%') {
510  return false;
511  }
512  // if the last '%' is escaped then not a simple string
513  if (like_str[like_str.size() - 2] == escape_char &&
514  like_str[like_str.size() - 3] != escape_char) {
515  return false;
516  }
517  for (size_t i = 1; i < like_str.size() - 1; i++) {
518  if (like_str[i] == '%' || like_str[i] == '_' || like_str[i] == '[' ||
519  like_str[i] == ']') {
520  if (like_str[i - 1] != escape_char) {
521  return false;
522  }
523  }
524  }
525  return true;
526 }
527 
528 void LikeExpr::erase_cntl_chars(std::string& like_str, char escape_char) {
529  char prev_char = '\0';
530  // easier to create new string of allowable chars
531  // rather than erase chars from
532  // existing string
533  std::string new_str;
534  for (char& cur_char : like_str) {
535  if (cur_char == '%' || cur_char == escape_char) {
536  if (prev_char != escape_char) {
537  prev_char = cur_char;
538  continue;
539  }
540  }
541  new_str.push_back(cur_char);
542  prev_char = cur_char;
543  }
544  like_str = new_str;
545 }
546 
547 std::shared_ptr<Analyzer::Expr> LikeExpr::analyze(
548  const Catalog_Namespace::Catalog& catalog,
549  Analyzer::Query& query,
550  TlistRefType allow_tlist_ref) const {
551  auto arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
552  auto like_expr = like_string->analyze(catalog, query, allow_tlist_ref);
553  auto escape_expr = escape_string == nullptr
554  ? nullptr
555  : escape_string->analyze(catalog, query, allow_tlist_ref);
556  return LikeExpr::get(arg_expr, like_expr, escape_expr, is_ilike, is_not);
557 }
558 
559 std::shared_ptr<Analyzer::Expr> LikeExpr::get(std::shared_ptr<Analyzer::Expr> arg_expr,
560  std::shared_ptr<Analyzer::Expr> like_expr,
561  std::shared_ptr<Analyzer::Expr> escape_expr,
562  const bool is_ilike,
563  const bool is_not) {
564  if (!arg_expr->get_type_info().is_string()) {
565  throw std::runtime_error("expression before LIKE must be of a string type.");
566  }
567  if (!like_expr->get_type_info().is_string()) {
568  throw std::runtime_error("expression after LIKE must be of a string type.");
569  }
570  char escape_char = '\\';
571  if (escape_expr != nullptr) {
572  if (!escape_expr->get_type_info().is_string()) {
573  throw std::runtime_error("expression after ESCAPE must be of a string type.");
574  }
575  if (!escape_expr->get_type_info().is_string()) {
576  throw std::runtime_error("expression after ESCAPE must be of a string type.");
577  }
578  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(escape_expr);
579  if (c != nullptr && c->get_constval().stringval->length() > 1) {
580  throw std::runtime_error("String after ESCAPE must have a single character.");
581  }
582  escape_char = (*c->get_constval().stringval)[0];
583  }
584  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(like_expr);
585  bool is_simple = false;
586  if (c != nullptr) {
587  std::string& pattern = *c->get_constval().stringval;
588  if (is_ilike) {
589  std::transform(pattern.begin(), pattern.end(), pattern.begin(), ::tolower);
590  }
591  check_like_expr(pattern, escape_char);
592  is_simple = test_is_simple_expr(pattern, escape_char);
593  if (is_simple) {
594  erase_cntl_chars(pattern, escape_char);
595  }
596  }
597  std::shared_ptr<Analyzer::Expr> result = makeExpr<Analyzer::LikeExpr>(
598  arg_expr->decompress(), like_expr, escape_expr, is_ilike, is_simple);
599  if (is_not) {
600  result = makeExpr<Analyzer::UOper>(kBOOLEAN, kNOT, result);
601  }
602  return result;
603 }
604 
605 void RegexpExpr::check_pattern_expr(const std::string& pattern_str, char escape_char) {
606  if (pattern_str.back() == escape_char) {
607  throw std::runtime_error("REGEXP pattern must not end with escape character.");
608  }
609 }
610 
611 bool RegexpExpr::translate_to_like_pattern(std::string& pattern_str, char escape_char) {
612  char prev_char = '\0';
613  char prev_prev_char = '\0';
614  std::string like_str;
615  for (char& cur_char : pattern_str) {
616  if (prev_char == escape_char || isalnum(cur_char) || cur_char == ' ' ||
617  cur_char == '.') {
618  like_str.push_back((cur_char == '.') ? '_' : cur_char);
619  prev_prev_char = prev_char;
620  prev_char = cur_char;
621  continue;
622  }
623  if (prev_char == '.' && prev_prev_char != escape_char) {
624  if (cur_char == '*' || cur_char == '+') {
625  if (cur_char == '*') {
626  like_str.pop_back();
627  }
628  // .* --> %
629  // .+ --> _%
630  like_str.push_back('%');
631  prev_prev_char = prev_char;
632  prev_char = cur_char;
633  continue;
634  }
635  }
636  return false;
637  }
638  pattern_str = like_str;
639  return true;
640 }
641 
642 std::shared_ptr<Analyzer::Expr> RegexpExpr::analyze(
643  const Catalog_Namespace::Catalog& catalog,
644  Analyzer::Query& query,
645  TlistRefType allow_tlist_ref) const {
646  auto arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
647  auto pattern_expr = pattern_string->analyze(catalog, query, allow_tlist_ref);
648  auto escape_expr = escape_string == nullptr
649  ? nullptr
650  : escape_string->analyze(catalog, query, allow_tlist_ref);
651  return RegexpExpr::get(arg_expr, pattern_expr, escape_expr, is_not);
652 }
653 
654 std::shared_ptr<Analyzer::Expr> RegexpExpr::get(
655  std::shared_ptr<Analyzer::Expr> arg_expr,
656  std::shared_ptr<Analyzer::Expr> pattern_expr,
657  std::shared_ptr<Analyzer::Expr> escape_expr,
658  const bool is_not) {
659  if (!arg_expr->get_type_info().is_string()) {
660  throw std::runtime_error("expression before REGEXP must be of a string type.");
661  }
662  if (!pattern_expr->get_type_info().is_string()) {
663  throw std::runtime_error("expression after REGEXP must be of a string type.");
664  }
665  char escape_char = '\\';
666  if (escape_expr != nullptr) {
667  if (!escape_expr->get_type_info().is_string()) {
668  throw std::runtime_error("expression after ESCAPE must be of a string type.");
669  }
670  if (!escape_expr->get_type_info().is_string()) {
671  throw std::runtime_error("expression after ESCAPE must be of a string type.");
672  }
673  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(escape_expr);
674  if (c != nullptr && c->get_constval().stringval->length() > 1) {
675  throw std::runtime_error("String after ESCAPE must have a single character.");
676  }
677  escape_char = (*c->get_constval().stringval)[0];
678  if (escape_char != '\\') {
679  throw std::runtime_error("Only supporting '\\' escape character.");
680  }
681  }
682  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(pattern_expr);
683  if (c != nullptr) {
684  std::string& pattern = *c->get_constval().stringval;
685  if (translate_to_like_pattern(pattern, escape_char)) {
686  return LikeExpr::get(arg_expr, pattern_expr, escape_expr, false, is_not);
687  }
688  }
689  std::shared_ptr<Analyzer::Expr> result =
690  makeExpr<Analyzer::RegexpExpr>(arg_expr->decompress(), pattern_expr, escape_expr);
691  if (is_not) {
692  result = makeExpr<Analyzer::UOper>(kBOOLEAN, kNOT, result);
693  }
694  return result;
695 }
696 
697 std::shared_ptr<Analyzer::Expr> LikelihoodExpr::analyze(
698  const Catalog_Namespace::Catalog& catalog,
699  Analyzer::Query& query,
700  TlistRefType allow_tlist_ref) const {
701  auto arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
702  return LikelihoodExpr::get(arg_expr, likelihood, is_not);
703 }
704 
705 std::shared_ptr<Analyzer::Expr> LikelihoodExpr::get(
706  std::shared_ptr<Analyzer::Expr> arg_expr,
707  float likelihood,
708  const bool is_not) {
709  if (!arg_expr->get_type_info().is_boolean()) {
710  throw std::runtime_error("likelihood expression expects boolean type.");
711  }
712  std::shared_ptr<Analyzer::Expr> result = makeExpr<Analyzer::LikelihoodExpr>(
713  arg_expr->decompress(), is_not ? 1 - likelihood : likelihood);
714  return result;
715 }
716 
717 std::shared_ptr<Analyzer::Expr> ExistsExpr::analyze(
718  const Catalog_Namespace::Catalog& catalog,
719  Analyzer::Query& query,
720  TlistRefType allow_tlist_ref) const {
721  throw std::runtime_error("Subqueries are not supported yet.");
722  return nullptr;
723 }
724 
725 std::shared_ptr<Analyzer::Expr> ColumnRef::analyze(
726  const Catalog_Namespace::Catalog& catalog,
727  Analyzer::Query& query,
728  TlistRefType allow_tlist_ref) const {
729  int table_id{0};
730  int rte_idx{0};
731  const ColumnDescriptor* cd{nullptr};
732  if (column == nullptr) {
733  throw std::runtime_error("invalid column name *.");
734  }
735  if (table != nullptr) {
736  rte_idx = query.get_rte_idx(*table);
737  if (rte_idx < 0) {
738  throw std::runtime_error("range variable or table name " + *table +
739  " does not exist.");
740  }
741  Analyzer::RangeTableEntry* rte = query.get_rte(rte_idx);
742  cd = rte->get_column_desc(catalog, *column);
743  if (cd == nullptr) {
744  throw std::runtime_error("Column name " + *column + " does not exist.");
745  }
746  table_id = rte->get_table_id();
747  } else {
748  bool found = false;
749  int i = 0;
750  for (auto rte : query.get_rangetable()) {
751  cd = rte->get_column_desc(catalog, *column);
752  if (cd != nullptr && !found) {
753  found = true;
754  rte_idx = i;
755  table_id = rte->get_table_id();
756  } else if (cd != nullptr && found) {
757  throw std::runtime_error("Column name " + *column + " is ambiguous.");
758  }
759  i++;
760  }
761  if (cd == nullptr && allow_tlist_ref != TlistRefType::TLIST_NONE) {
762  // check if this is a reference to a targetlist entry
763  bool found = false;
764  int varno = -1;
765  int i = 1;
766  std::shared_ptr<Analyzer::TargetEntry> tle;
767  for (auto p : query.get_targetlist()) {
768  if (*column == p->get_resname() && !found) {
769  found = true;
770  varno = i;
771  tle = p;
772  } else if (*column == p->get_resname() && found) {
773  throw std::runtime_error("Output alias " + *column + " is ambiguous.");
774  }
775  i++;
776  }
777  if (found) {
778  if (dynamic_cast<Analyzer::Var*>(tle->get_expr())) {
779  Analyzer::Var* v = static_cast<Analyzer::Var*>(tle->get_expr());
781  return v->deep_copy();
782  }
783  }
784  if (allow_tlist_ref == TlistRefType::TLIST_COPY) {
785  return tle->get_expr()->deep_copy();
786  } else {
787  return makeExpr<Analyzer::Var>(
788  tle->get_expr()->get_type_info(), Analyzer::Var::kOUTPUT, varno);
789  }
790  }
791  }
792  if (cd == nullptr) {
793  throw std::runtime_error("Column name " + *column + " does not exist.");
794  }
795  }
796  return makeExpr<Analyzer::ColumnVar>(cd->columnType, table_id, cd->columnId, rte_idx);
797 }
798 
799 std::shared_ptr<Analyzer::Expr> FunctionRef::analyze(
800  const Catalog_Namespace::Catalog& catalog,
801  Analyzer::Query& query,
802  TlistRefType allow_tlist_ref) const {
803  SQLTypeInfo result_type;
804  SQLAgg agg_type;
805  std::shared_ptr<Analyzer::Expr> arg_expr;
806  bool is_distinct = false;
807  if (boost::iequals(*name, "count")) {
808  result_type = SQLTypeInfo(kBIGINT, false);
809  agg_type = kCOUNT;
810  if (arg) {
811  arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
812  const SQLTypeInfo& ti = arg_expr->get_type_info();
813  if (ti.is_string() && (ti.get_compression() != kENCODING_DICT || !distinct)) {
814  throw std::runtime_error(
815  "Strings must be dictionary-encoded in COUNT(DISTINCT).");
816  }
817  if (ti.get_type() == kARRAY && !distinct) {
818  throw std::runtime_error("Only COUNT(DISTINCT) is supported on arrays.");
819  }
820  }
821  is_distinct = distinct;
822  } else {
823  if (!arg) {
824  throw std::runtime_error("Cannot compute " + *name + " with argument '*'.");
825  }
826  if (boost::iequals(*name, "min")) {
827  agg_type = kMIN;
828  arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
829  arg_expr = arg_expr->decompress();
830  result_type = arg_expr->get_type_info();
831  } else if (boost::iequals(*name, "max")) {
832  agg_type = kMAX;
833  arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
834  arg_expr = arg_expr->decompress();
835  result_type = arg_expr->get_type_info();
836  } else if (boost::iequals(*name, "avg")) {
837  agg_type = kAVG;
838  arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
839  if (!arg_expr->get_type_info().is_number()) {
840  throw std::runtime_error("Cannot compute AVG on non-number-type arguments.");
841  }
842  arg_expr = arg_expr->decompress();
843  result_type = SQLTypeInfo(kDOUBLE, false);
844  } else if (boost::iequals(*name, "sum")) {
845  agg_type = kSUM;
846  arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
847  if (!arg_expr->get_type_info().is_number()) {
848  throw std::runtime_error("Cannot compute SUM on non-number-type arguments.");
849  }
850  arg_expr = arg_expr->decompress();
851  result_type = arg_expr->get_type_info().is_integer() ? SQLTypeInfo(kBIGINT, false)
852  : arg_expr->get_type_info();
853  } else if (boost::iequals(*name, "unnest")) {
854  arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
855  const SQLTypeInfo& arg_ti = arg_expr->get_type_info();
856  if (arg_ti.get_type() != kARRAY) {
857  throw std::runtime_error(arg->to_string() + " is not of array type.");
858  }
859  return makeExpr<Analyzer::UOper>(arg_ti.get_elem_type(), false, kUNNEST, arg_expr);
860  } else {
861  throw std::runtime_error("invalid function name: " + *name);
862  }
863  if (arg_expr->get_type_info().is_string() ||
864  arg_expr->get_type_info().get_type() == kARRAY) {
865  throw std::runtime_error(
866  "Only COUNT(DISTINCT ) aggregate is supported on strings and arrays.");
867  }
868  }
869  int naggs = query.get_num_aggs();
870  query.set_num_aggs(naggs + 1);
871  return makeExpr<Analyzer::AggExpr>(
872  result_type, agg_type, arg_expr, is_distinct, nullptr);
873 }
874 
875 std::shared_ptr<Analyzer::Expr> CastExpr::analyze(
876  const Catalog_Namespace::Catalog& catalog,
877  Analyzer::Query& query,
878  TlistRefType allow_tlist_ref) const {
879  target_type->check_type();
880  auto arg_expr = arg->analyze(catalog, query, allow_tlist_ref);
881  SQLTypeInfo ti(target_type->get_type(),
882  target_type->get_param1(),
883  target_type->get_param2(),
884  arg_expr->get_type_info().get_notnull());
885  if (arg_expr->get_type_info().get_type() != target_type->get_type() &&
886  arg_expr->get_type_info().get_compression() != kENCODING_NONE) {
887  arg_expr->decompress();
888  }
889  return arg_expr->add_cast(ti);
890 }
891 
892 std::shared_ptr<Analyzer::Expr> CaseExpr::analyze(
893  const Catalog_Namespace::Catalog& catalog,
894  Analyzer::Query& query,
895  TlistRefType allow_tlist_ref) const {
896  SQLTypeInfo ti;
897  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
898  expr_pair_list;
899  for (auto& p : when_then_list) {
900  auto e1 = p->get_expr1()->analyze(catalog, query, allow_tlist_ref);
901  if (e1->get_type_info().get_type() != kBOOLEAN) {
902  throw std::runtime_error("Only boolean expressions can be used after WHEN.");
903  }
904  auto e2 = p->get_expr2()->analyze(catalog, query, allow_tlist_ref);
905  expr_pair_list.emplace_back(e1, e2);
906  }
907  auto else_e = else_expr ? else_expr->analyze(catalog, query, allow_tlist_ref) : nullptr;
908  return normalize(expr_pair_list, else_e);
909 }
910 
911 namespace {
912 
913 bool expr_is_null(const Analyzer::Expr* expr) {
914  if (expr->get_type_info().get_type() == kNULLT) {
915  return true;
916  }
917  const auto const_expr = dynamic_cast<const Analyzer::Constant*>(expr);
918  return const_expr && const_expr->get_is_null();
919 }
920 
922  const std::string* s = str_literal->get_stringval();
923  if (*s == "t" || *s == "true" || *s == "T" || *s == "True") {
924  return true;
925  } else if (*s == "f" || *s == "false" || *s == "F" || *s == "False") {
926  return false;
927  } else {
928  throw std::runtime_error("Invalid string for boolean " + *s);
929  }
930 }
931 
932 } // namespace
933 
934 std::shared_ptr<Analyzer::Expr> CaseExpr::normalize(
935  const std::list<std::pair<std::shared_ptr<Analyzer::Expr>,
936  std::shared_ptr<Analyzer::Expr>>>& expr_pair_list,
937  const std::shared_ptr<Analyzer::Expr> else_e_in) {
938  SQLTypeInfo ti;
939  bool has_agg = false;
940  std::set<int> dictionary_ids;
941 
942  for (auto& p : expr_pair_list) {
943  auto e1 = p.first;
944  CHECK(e1->get_type_info().is_boolean());
945  auto e2 = p.second;
946  if (e2->get_type_info().is_dict_encoded_string()) {
947  dictionary_ids.insert(e2->get_type_info().get_comp_param());
948  }
949  if (ti.get_type() == kNULLT) {
950  ti = e2->get_type_info();
951  } else if (e2->get_type_info().get_type() == kNULLT) {
952  ti.set_notnull(false);
953  e2->set_type_info(ti);
954  } else if (ti != e2->get_type_info()) {
955  if (ti.is_string() && e2->get_type_info().is_string()) {
956  ti = Analyzer::BinOper::common_string_type(ti, e2->get_type_info());
957  } else if (ti.is_number() && e2->get_type_info().is_number()) {
958  ti = Analyzer::BinOper::common_numeric_type(ti, e2->get_type_info());
959  } else if (ti.is_boolean() && e2->get_type_info().is_boolean()) {
960  ti = Analyzer::BinOper::common_numeric_type(ti, e2->get_type_info());
961  } else {
962  throw std::runtime_error(
963  "expressions in THEN clause must be of the same or compatible types.");
964  }
965  }
966  if (e2->get_contains_agg()) {
967  has_agg = true;
968  }
969  }
970  auto else_e = else_e_in;
971  if (else_e) {
972  if (else_e->get_contains_agg()) {
973  has_agg = true;
974  }
975  if (expr_is_null(else_e.get())) {
976  ti.set_notnull(false);
977  else_e->set_type_info(ti);
978  } else if (ti != else_e->get_type_info()) {
979  if (else_e->get_type_info().is_dict_encoded_string()) {
980  dictionary_ids.insert(else_e->get_type_info().get_comp_param());
981  }
982  ti.set_notnull(false);
983  if (ti.is_string() && else_e->get_type_info().is_string()) {
984  ti = Analyzer::BinOper::common_string_type(ti, else_e->get_type_info());
985  } else if (ti.is_number() && else_e->get_type_info().is_number()) {
986  ti = Analyzer::BinOper::common_numeric_type(ti, else_e->get_type_info());
987  } else if (ti.is_boolean() && else_e->get_type_info().is_boolean()) {
988  ti = Analyzer::BinOper::common_numeric_type(ti, else_e->get_type_info());
989  } else if (get_logical_type_info(ti) !=
990  get_logical_type_info(else_e->get_type_info())) {
991  throw std::runtime_error(
992  // types differing by encoding will be resolved at decode
993 
994  "expressions in ELSE clause must be of the same or compatible types as those "
995  "in the THEN clauses.");
996  }
997  }
998  }
999  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
1000  cast_expr_pair_list;
1001  for (auto p : expr_pair_list) {
1002  ti.set_notnull(false);
1003  cast_expr_pair_list.emplace_back(p.first, p.second->add_cast(ti));
1004  }
1005  if (else_e != nullptr) {
1006  else_e = else_e->add_cast(ti);
1007  } else {
1008  Datum d;
1009  // always create an else expr so that executor doesn't need to worry about it
1010  ti.set_notnull(false);
1011  else_e = makeExpr<Analyzer::Constant>(ti, true, d);
1012  }
1013  if (ti.get_type() == kNULLT) {
1014  throw std::runtime_error(
1015  "Can't deduce the type for case expressions, all branches null");
1016  }
1017 
1018  auto case_expr = makeExpr<Analyzer::CaseExpr>(ti, has_agg, cast_expr_pair_list, else_e);
1019  if (ti.get_compression() != kENCODING_DICT && dictionary_ids.size() == 1 &&
1020  *(dictionary_ids.begin()) > 0) {
1021  // the above logic makes two assumptions when strings are present. 1) that all types
1022  // in the case statement are either null or strings, and 2) that none-encoded strings
1023  // will always win out over dict encoding. If we only have one dictionary, and that
1024  // dictionary is not a transient dictionary, we can cast the entire case to be dict
1025  // encoded and use transient dictionaries for any literals
1026  ti.set_compression(kENCODING_DICT);
1027  ti.set_comp_param(*dictionary_ids.begin());
1028  case_expr->add_cast(ti);
1029  }
1030  return case_expr;
1031 }
1032 
1033 std::string CaseExpr::to_string() const {
1034  std::string str("CASE ");
1035  for (auto& p : when_then_list) {
1036  str += "WHEN " + p->get_expr1()->to_string() + " THEN " +
1037  p->get_expr2()->to_string() + " ";
1038  }
1039  if (else_expr != nullptr) {
1040  str += "ELSE " + else_expr->to_string();
1041  }
1042  str += " END";
1043  return str;
1044 }
1045 
1046 void UnionQuery::analyze(const Catalog_Namespace::Catalog& catalog,
1047  Analyzer::Query& query) const {
1048  left->analyze(catalog, query);
1049  Analyzer::Query* right_query = new Analyzer::Query();
1050  right->analyze(catalog, *right_query);
1051  query.set_next_query(right_query);
1052  query.set_is_unionall(is_unionall);
1053 }
1054 
1055 void QuerySpec::analyze_having_clause(const Catalog_Namespace::Catalog& catalog,
1056  Analyzer::Query& query) const {
1057  std::shared_ptr<Analyzer::Expr> p;
1058  if (having_clause != nullptr) {
1059  p = having_clause->analyze(catalog, query, Expr::TlistRefType::TLIST_COPY);
1060  if (p->get_type_info().get_type() != kBOOLEAN) {
1061  throw std::runtime_error("Only boolean expressions can be in HAVING clause.");
1062  }
1063  p->check_group_by(query.get_group_by());
1064  }
1065  query.set_having_predicate(p);
1066 }
1067 
1068 void QuerySpec::analyze_group_by(const Catalog_Namespace::Catalog& catalog,
1069  Analyzer::Query& query) const {
1070  std::list<std::shared_ptr<Analyzer::Expr>> groupby;
1071  if (!groupby_clause.empty()) {
1072  int gexpr_no = 1;
1073  std::shared_ptr<Analyzer::Expr> gexpr;
1074  const std::vector<std::shared_ptr<Analyzer::TargetEntry>>& tlist =
1075  query.get_targetlist();
1076  for (auto& c : groupby_clause) {
1077  // special-case ordinal numbers in GROUP BY
1078  if (dynamic_cast<Literal*>(c.get())) {
1079  IntLiteral* i = dynamic_cast<IntLiteral*>(c.get());
1080  if (!i) {
1081  throw std::runtime_error("Invalid literal in GROUP BY clause.");
1082  }
1083  int varno = (int)i->get_intval();
1084  if (varno <= 0 || varno > static_cast<int>(tlist.size())) {
1085  throw std::runtime_error("Invalid ordinal number in GROUP BY clause.");
1086  }
1087  if (tlist[varno - 1]->get_expr()->get_contains_agg()) {
1088  throw std::runtime_error(
1089  "Ordinal number in GROUP BY cannot reference an expression containing "
1090  "aggregate "
1091  "functions.");
1092  }
1093  gexpr = makeExpr<Analyzer::Var>(
1094  tlist[varno - 1]->get_expr()->get_type_info(), Analyzer::Var::kOUTPUT, varno);
1095  } else {
1096  gexpr = c->analyze(catalog, query, Expr::TlistRefType::TLIST_REF);
1097  }
1098  const SQLTypeInfo gti = gexpr->get_type_info();
1099  bool set_new_type = false;
1100  SQLTypeInfo ti(gti);
1101  if (gti.is_string() && gti.get_compression() == kENCODING_NONE) {
1102  set_new_type = true;
1105  ti.set_fixed_size();
1106  }
1107  std::shared_ptr<Analyzer::Var> v;
1108  if (std::dynamic_pointer_cast<Analyzer::Var>(gexpr)) {
1109  v = std::static_pointer_cast<Analyzer::Var>(gexpr);
1110  int n = v->get_varno();
1111  gexpr = tlist[n - 1]->get_own_expr();
1112  auto cv = std::dynamic_pointer_cast<Analyzer::ColumnVar>(gexpr);
1113  if (cv != nullptr) {
1114  // inherit all ColumnVar info for lineage.
1115  *std::static_pointer_cast<Analyzer::ColumnVar>(v) = *cv;
1116  }
1117  v->set_which_row(Analyzer::Var::kGROUPBY);
1118  v->set_varno(gexpr_no);
1119  tlist[n - 1]->set_expr(v);
1120  }
1121  if (set_new_type) {
1122  auto new_e = gexpr->add_cast(ti);
1123  groupby.push_back(new_e);
1124  if (v != nullptr) {
1125  v->set_type_info(new_e->get_type_info());
1126  }
1127  } else {
1128  groupby.push_back(gexpr);
1129  }
1130  gexpr_no++;
1131  }
1132  }
1133  if (query.get_num_aggs() > 0 || !groupby.empty()) {
1134  for (auto t : query.get_targetlist()) {
1135  auto e = t->get_expr();
1136  e->check_group_by(groupby);
1137  }
1138  }
1139  query.set_group_by(groupby);
1140 }
1141 
1142 void QuerySpec::analyze_where_clause(const Catalog_Namespace::Catalog& catalog,
1143  Analyzer::Query& query) const {
1144  if (where_clause == nullptr) {
1145  query.set_where_predicate(nullptr);
1146  return;
1147  }
1148  auto p = where_clause->analyze(catalog, query, Expr::TlistRefType::TLIST_COPY);
1149  if (p->get_type_info().get_type() != kBOOLEAN) {
1150  throw std::runtime_error("Only boolean expressions can be in WHERE clause.");
1151  }
1152  query.set_where_predicate(p);
1153 }
1154 
1155 void QuerySpec::analyze_select_clause(const Catalog_Namespace::Catalog& catalog,
1156  Analyzer::Query& query) const {
1157  std::vector<std::shared_ptr<Analyzer::TargetEntry>>& tlist =
1158  query.get_targetlist_nonconst();
1159  if (select_clause.empty()) {
1160  // this means SELECT *
1161  int rte_idx = 0;
1162  for (auto rte : query.get_rangetable()) {
1163  rte->expand_star_in_targetlist(catalog, tlist, rte_idx++);
1164  }
1165  } else {
1166  for (auto& p : select_clause) {
1167  const Parser::Expr* select_expr = p->get_select_expr();
1168  // look for the case of range_var.*
1169  if (typeid(*select_expr) == typeid(ColumnRef) &&
1170  dynamic_cast<const ColumnRef*>(select_expr)->get_column() == nullptr) {
1171  const std::string* range_var_name =
1172  dynamic_cast<const ColumnRef*>(select_expr)->get_table();
1173  int rte_idx = query.get_rte_idx(*range_var_name);
1174  if (rte_idx < 0) {
1175  throw std::runtime_error("invalid range variable name: " + *range_var_name);
1176  }
1177  Analyzer::RangeTableEntry* rte = query.get_rte(rte_idx);
1178  rte->expand_star_in_targetlist(catalog, tlist, rte_idx);
1179  } else {
1180  auto e = select_expr->analyze(catalog, query);
1181  std::string resname;
1182 
1183  if (p->get_alias() != nullptr) {
1184  resname = *p->get_alias();
1185  } else if (std::dynamic_pointer_cast<Analyzer::ColumnVar>(e) &&
1186  !std::dynamic_pointer_cast<Analyzer::Var>(e)) {
1187  auto colvar = std::static_pointer_cast<Analyzer::ColumnVar>(e);
1188  const ColumnDescriptor* col_desc = catalog.getMetadataForColumn(
1189  colvar->get_table_id(), colvar->get_column_id());
1190  resname = col_desc->columnName;
1191  }
1192  if (e->get_type_info().get_type() == kNULLT) {
1193  throw std::runtime_error(
1194  "Untyped NULL in SELECT clause. Use CAST to specify a type.");
1195  }
1196  auto o = std::static_pointer_cast<Analyzer::UOper>(e);
1197  bool unnest = (o != nullptr && o->get_optype() == kUNNEST);
1198  auto tle = std::make_shared<Analyzer::TargetEntry>(resname, e, unnest);
1199  tlist.push_back(tle);
1200  }
1201  }
1202  }
1203 }
1204 
1205 void QuerySpec::analyze_from_clause(const Catalog_Namespace::Catalog& catalog,
1206  Analyzer::Query& query) const {
1208  for (auto& p : from_clause) {
1209  const TableDescriptor* table_desc;
1210  table_desc = catalog.getMetadataForTable(*p->get_table_name());
1211  if (table_desc == nullptr) {
1212  throw std::runtime_error("Table " + *p->get_table_name() + " does not exist.");
1213  }
1214  std::string range_var;
1215  if (p->get_range_var() == nullptr) {
1216  range_var = *p->get_table_name();
1217  } else {
1218  range_var = *p->get_range_var();
1219  }
1220  rte = new Analyzer::RangeTableEntry(range_var, table_desc, nullptr);
1221  query.add_rte(rte);
1222  }
1223 }
1224 
1225 void QuerySpec::analyze(const Catalog_Namespace::Catalog& catalog,
1226  Analyzer::Query& query) const {
1228  analyze_from_clause(catalog, query);
1229  analyze_select_clause(catalog, query);
1230  analyze_where_clause(catalog, query);
1231  analyze_group_by(catalog, query);
1232  analyze_having_clause(catalog, query);
1233 }
1234 
1235 void SelectStmt::analyze(const Catalog_Namespace::Catalog& catalog,
1236  Analyzer::Query& query) const {
1237  query.set_stmt_type(kSELECT);
1238  query.set_limit(limit);
1239  if (offset < 0) {
1240  throw std::runtime_error("OFFSET cannot be negative.");
1241  }
1242  query.set_offset(offset);
1243  query_expr->analyze(catalog, query);
1244  if (orderby_clause.empty() && !query.get_is_distinct()) {
1245  query.set_order_by(nullptr);
1246  return;
1247  }
1248  const std::vector<std::shared_ptr<Analyzer::TargetEntry>>& tlist =
1249  query.get_targetlist();
1250  std::list<Analyzer::OrderEntry>* order_by = new std::list<Analyzer::OrderEntry>();
1251  if (!orderby_clause.empty()) {
1252  for (auto& p : orderby_clause) {
1253  int tle_no = p->get_colno();
1254  if (tle_no == 0) {
1255  // use column name
1256  // search through targetlist for matching name
1257  const std::string* name = p->get_column()->get_column();
1258  tle_no = 1;
1259  bool found = false;
1260  for (auto tle : tlist) {
1261  if (tle->get_resname() == *name) {
1262  found = true;
1263  break;
1264  }
1265  tle_no++;
1266  }
1267  if (!found) {
1268  throw std::runtime_error("invalid name in order by: " + *name);
1269  }
1270  }
1271  order_by->push_back(
1272  Analyzer::OrderEntry(tle_no, p->get_is_desc(), p->get_nulls_first()));
1273  }
1274  }
1275  if (query.get_is_distinct()) {
1276  // extend order_by to include all targetlist entries.
1277  for (int i = 1; i <= static_cast<int>(tlist.size()); i++) {
1278  bool in_orderby = false;
1279  std::for_each(order_by->begin(),
1280  order_by->end(),
1281  [&in_orderby, i](const Analyzer::OrderEntry& oe) {
1282  in_orderby = in_orderby || (i == oe.tle_no);
1283  });
1284  if (!in_orderby) {
1285  order_by->push_back(Analyzer::OrderEntry(i, false, false));
1286  }
1287  }
1288  }
1289  query.set_order_by(order_by);
1290 }
1291 
1292 std::string SelectEntry::to_string() const {
1293  std::string str = select_expr->to_string();
1294  if (alias != nullptr) {
1295  str += " AS " + *alias;
1296  }
1297  return str;
1298 }
1299 
1300 std::string TableRef::to_string() const {
1301  std::string str = *table_name;
1302  if (range_var != nullptr) {
1303  str += " " + *range_var;
1304  }
1305  return str;
1306 }
1307 
1308 std::string ColumnRef::to_string() const {
1309  std::string str;
1310  if (table == nullptr) {
1311  str = *column;
1312  } else if (column == nullptr) {
1313  str = *table + ".*";
1314  } else {
1315  str = *table + "." + *column;
1316  }
1317  return str;
1318 }
1319 
1320 std::string OperExpr::to_string() const {
1321  std::string op_str[] = {
1322  "=", "===", "<>", "<", ">", "<=", ">=", " AND ", " OR ", "NOT", "-", "+", "*", "/"};
1323  std::string str;
1324  if (optype == kUMINUS) {
1325  str = "-(" + left->to_string() + ")";
1326  } else if (optype == kNOT) {
1327  str = "NOT (" + left->to_string() + ")";
1328  } else if (optype == kARRAY_AT) {
1329  str = left->to_string() + "[" + right->to_string() + "]";
1330  } else if (optype == kUNNEST) {
1331  str = "UNNEST(" + left->to_string() + ")";
1332  } else if (optype == kIN) {
1333  str = "(" + left->to_string() + " IN " + right->to_string() + ")";
1334  } else {
1335  str = "(" + left->to_string() + op_str[optype] + right->to_string() + ")";
1336  }
1337  return str;
1338 }
1339 
1340 std::string InExpr::to_string() const {
1341  std::string str = arg->to_string();
1342  if (is_not) {
1343  str += " NOT IN ";
1344  } else {
1345  str += " IN ";
1346  }
1347  return str;
1348 }
1349 
1350 std::string ExistsExpr::to_string() const {
1351  return "EXISTS (" + query->to_string() + ")";
1352 }
1353 
1354 std::string SubqueryExpr::to_string() const {
1355  std::string str;
1356  str = "(";
1357  str += query->to_string();
1358  str += ")";
1359  return str;
1360 }
1361 
1362 std::string IsNullExpr::to_string() const {
1363  std::string str = arg->to_string();
1364  if (is_not) {
1365  str += " IS NOT NULL";
1366  } else {
1367  str += " IS NULL";
1368  }
1369  return str;
1370 }
1371 
1372 std::string InSubquery::to_string() const {
1373  std::string str = InExpr::to_string();
1374  str += subquery->to_string();
1375  return str;
1376 }
1377 
1378 std::string InValues::to_string() const {
1379  std::string str = InExpr::to_string() + "(";
1380  bool notfirst = false;
1381  for (auto& p : value_list) {
1382  if (notfirst) {
1383  str += ", ";
1384  } else {
1385  notfirst = true;
1386  }
1387  str += p->to_string();
1388  }
1389  str += ")";
1390  return str;
1391 }
1392 
1393 std::string BetweenExpr::to_string() const {
1394  std::string str = arg->to_string();
1395  if (is_not) {
1396  str += " NOT BETWEEN ";
1397  } else {
1398  str += " BETWEEN ";
1399  }
1400  str += lower->to_string() + " AND " + upper->to_string();
1401  return str;
1402 }
1403 
1404 std::string CharLengthExpr::to_string() const {
1405  std::string str;
1406  if (calc_encoded_length) {
1407  str = "CHAR_LENGTH (" + arg->to_string() + ")";
1408  } else {
1409  str = "LENGTH (" + arg->to_string() + ")";
1410  }
1411  return str;
1412 }
1413 
1414 std::string CardinalityExpr::to_string() const {
1415  std::string str = "CARDINALITY(" + arg->to_string() + ")";
1416  return str;
1417 }
1418 
1419 std::string LikeExpr::to_string() const {
1420  std::string str = arg->to_string();
1421  if (is_not) {
1422  str += " NOT LIKE ";
1423  } else {
1424  str += " LIKE ";
1425  }
1426  str += like_string->to_string();
1427  if (escape_string != nullptr) {
1428  str += " ESCAPE " + escape_string->to_string();
1429  }
1430  return str;
1431 }
1432 
1433 std::string RegexpExpr::to_string() const {
1434  std::string str = arg->to_string();
1435  if (is_not) {
1436  str += " NOT REGEXP ";
1437  } else {
1438  str += " REGEXP ";
1439  }
1440  str += pattern_string->to_string();
1441  if (escape_string != nullptr) {
1442  str += " ESCAPE " + escape_string->to_string();
1443  }
1444  return str;
1445 }
1446 
1447 std::string LikelihoodExpr::to_string() const {
1448  std::string str = " LIKELIHOOD ";
1449  str += arg->to_string();
1450  str += " ";
1451  str += boost::lexical_cast<std::string>(is_not ? 1.0 - likelihood : likelihood);
1452  return str;
1453 }
1454 
1455 std::string FunctionRef::to_string() const {
1456  std::string str = *name + "(";
1457  if (distinct) {
1458  str += "DISTINCT ";
1459  }
1460  if (arg == nullptr) {
1461  str += "*)";
1462  } else {
1463  str += arg->to_string() + ")";
1464  }
1465  return str;
1466 }
1467 
1468 std::string QuerySpec::to_string() const {
1469  std::string query_str = "SELECT ";
1470  if (is_distinct) {
1471  query_str += "DISTINCT ";
1472  }
1473  if (select_clause.empty()) {
1474  query_str += "* ";
1475  } else {
1476  bool notfirst = false;
1477  for (auto& p : select_clause) {
1478  if (notfirst) {
1479  query_str += ", ";
1480  } else {
1481  notfirst = true;
1482  }
1483  query_str += p->to_string();
1484  }
1485  }
1486  query_str += " FROM ";
1487  bool notfirst = false;
1488  for (auto& p : from_clause) {
1489  if (notfirst) {
1490  query_str += ", ";
1491  } else {
1492  notfirst = true;
1493  }
1494  query_str += p->to_string();
1495  }
1496  if (where_clause) {
1497  query_str += " WHERE " + where_clause->to_string();
1498  }
1499  if (!groupby_clause.empty()) {
1500  query_str += " GROUP BY ";
1501  bool notfirst = false;
1502  for (auto& p : groupby_clause) {
1503  if (notfirst) {
1504  query_str += ", ";
1505  } else {
1506  notfirst = true;
1507  }
1508  query_str += p->to_string();
1509  }
1510  }
1511  if (having_clause) {
1512  query_str += " HAVING " + having_clause->to_string();
1513  }
1514  query_str += ";";
1515  return query_str;
1516 }
1517 
1518 void InsertStmt::analyze(const Catalog_Namespace::Catalog& catalog,
1519  Analyzer::Query& query) const {
1520  query.set_stmt_type(kINSERT);
1521  const TableDescriptor* td = catalog.getMetadataForTable(*table);
1522  if (td == nullptr) {
1523  throw std::runtime_error("Table " + *table + " does not exist.");
1524  }
1525  if (td->isView) {
1526  throw std::runtime_error("Insert to views is not supported yet.");
1527  }
1529  query.set_result_table_id(td->tableId);
1530  std::list<int> result_col_list;
1531  if (column_list.empty()) {
1532  const std::list<const ColumnDescriptor*> all_cols =
1533  catalog.getAllColumnMetadataForTable(td->tableId, false, false, true);
1534  for (auto cd : all_cols) {
1535  result_col_list.push_back(cd->columnId);
1536  }
1537  } else {
1538  for (auto& c : column_list) {
1539  const ColumnDescriptor* cd = catalog.getMetadataForColumn(td->tableId, *c);
1540  if (cd == nullptr) {
1541  throw std::runtime_error("Column " + *c + " does not exist.");
1542  }
1543  result_col_list.push_back(cd->columnId);
1544  const auto& col_ti = cd->columnType;
1545  if (col_ti.get_physical_cols() > 0) {
1546  CHECK(cd->columnType.is_geometry());
1547  for (auto i = 1; i <= col_ti.get_physical_cols(); i++) {
1548  const ColumnDescriptor* pcd =
1549  catalog.getMetadataForColumn(td->tableId, cd->columnId + i);
1550  if (pcd == nullptr) {
1551  throw std::runtime_error("Column " + *c + "'s metadata is incomplete.");
1552  }
1553  result_col_list.push_back(pcd->columnId);
1554  }
1555  }
1556  }
1557  }
1558  query.set_result_col_list(result_col_list);
1559 }
1560 
1561 size_t InsertValuesStmt::determineLeafIndex(const Catalog_Namespace::Catalog& catalog,
1562  size_t num_leafs) {
1563  const TableDescriptor* td = catalog.getMetadataForTable(*table);
1564  if (td == nullptr) {
1565  throw std::runtime_error("Table " + *table + " does not exist.");
1566  }
1567  if (td->isView) {
1568  throw std::runtime_error("Insert to views is not supported yet.");
1569  }
1571  if (td->partitions == "REPLICATED") {
1572  throw std::runtime_error("Cannot determine leaf on replicated table.");
1573  }
1574 
1575  if (0 == td->nShards) {
1576  std::random_device rd;
1577  std::mt19937_64 gen(rd());
1578  std::uniform_int_distribution<size_t> dis;
1579  const auto leaf_idx = dis(gen) % num_leafs;
1580  return leaf_idx;
1581  }
1582 
1583  size_t indexOfShardColumn = 0;
1584  const ColumnDescriptor* shardColumn = catalog.getShardColumnMetadataForTable(td);
1585  CHECK(shardColumn);
1586  auto shard_count = td->nShards * num_leafs;
1587  int64_t shardId = 0;
1588 
1589  if (column_list.empty()) {
1590  auto all_cols =
1591  catalog.getAllColumnMetadataForTable(td->tableId, false, false, false);
1592  auto iter = std::find(all_cols.begin(), all_cols.end(), shardColumn);
1593  CHECK(iter != all_cols.end());
1594  indexOfShardColumn = std::distance(all_cols.begin(), iter);
1595  } else {
1596  for (auto& c : column_list) {
1597  if (*c == shardColumn->columnName) {
1598  break;
1599  }
1600  indexOfShardColumn++;
1601  }
1602 
1603  if (indexOfShardColumn == column_list.size()) {
1605  shard_count);
1606  return shardId / td->nShards;
1607  }
1608  }
1609 
1610  if (indexOfShardColumn >= value_list.size()) {
1611  throw std::runtime_error("No value defined for shard column.");
1612  }
1613 
1614  auto& shardColumnValueExpr = *(std::next(value_list.begin(), indexOfShardColumn));
1615 
1616  Analyzer::Query query;
1617  auto e = shardColumnValueExpr->analyze(catalog, query);
1618  e = e->add_cast(shardColumn->columnType);
1619  const Analyzer::Constant* con = dynamic_cast<Analyzer::Constant*>(e.get());
1620  if (!con) {
1621  auto col_cast = dynamic_cast<const Analyzer::UOper*>(e.get());
1622  CHECK(col_cast);
1623  CHECK_EQ(kCAST, col_cast->get_optype());
1624  con = dynamic_cast<const Analyzer::Constant*>(col_cast->get_operand());
1625  }
1626  CHECK(con);
1627 
1628  Datum d = con->get_constval();
1629  if (con->get_is_null()) {
1631  shard_count);
1632  } else if (shardColumn->columnType.is_string()) {
1633  auto dictDesc =
1634  catalog.getMetadataForDict(shardColumn->columnType.get_comp_param(), true);
1635  auto str_id = dictDesc->stringDict->getOrAdd(*d.stringval);
1636  bool invalid = false;
1637 
1638  if (4 == shardColumn->columnType.get_size()) {
1639  invalid = str_id > max_valid_int_value<int32_t>();
1640  } else if (2 == shardColumn->columnType.get_size()) {
1641  invalid = str_id > max_valid_int_value<uint16_t>();
1642  } else if (1 == shardColumn->columnType.get_size()) {
1643  invalid = str_id > max_valid_int_value<uint8_t>();
1644  }
1645 
1646  if (invalid || str_id == inline_int_null_value<int32_t>()) {
1647  str_id = inline_fixed_encoding_null_val(shardColumn->columnType);
1648  }
1649  shardId = SHARD_FOR_KEY(str_id, shard_count);
1650  } else {
1651  switch (shardColumn->columnType.get_logical_size()) {
1652  case 8:
1653  shardId = SHARD_FOR_KEY(d.bigintval, shard_count);
1654  break;
1655  case 4:
1656  shardId = SHARD_FOR_KEY(d.intval, shard_count);
1657  break;
1658  case 2:
1659  shardId = SHARD_FOR_KEY(d.smallintval, shard_count);
1660  break;
1661  case 1:
1662  shardId = SHARD_FOR_KEY(d.tinyintval, shard_count);
1663  break;
1664  default:
1665  CHECK(false);
1666  }
1667  }
1668 
1669  return shardId / td->nShards;
1670 }
1671 
1672 void InsertValuesStmt::analyze(const Catalog_Namespace::Catalog& catalog,
1673  Analyzer::Query& query) const {
1674  InsertStmt::analyze(catalog, query);
1675  std::vector<std::shared_ptr<Analyzer::TargetEntry>>& tlist =
1676  query.get_targetlist_nonconst();
1677  const auto tableId = query.get_result_table_id();
1678  if (!column_list.empty()) {
1679  if (value_list.size() != column_list.size()) {
1680  throw std::runtime_error(
1681  "Numbers of columns and values don't match for the "
1682  "insert.");
1683  }
1684  } else {
1685  const std::list<const ColumnDescriptor*> non_phys_cols =
1686  catalog.getAllColumnMetadataForTable(tableId, false, false, false);
1687  if (non_phys_cols.size() != value_list.size()) {
1688  throw std::runtime_error(
1689  "Number of columns in table does not match the list of values given in the "
1690  "insert.");
1691  }
1692  }
1693  std::list<int>::const_iterator it = query.get_result_col_list().begin();
1694  for (auto& v : value_list) {
1695  auto e = v->analyze(catalog, query);
1696  const ColumnDescriptor* cd =
1697  catalog.getMetadataForColumn(query.get_result_table_id(), *it);
1698  CHECK(cd);
1699  if (cd->columnType.get_notnull()) {
1700  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(e);
1701  if (c != nullptr && c->get_is_null()) {
1702  throw std::runtime_error("Cannot insert NULL into column " + cd->columnName);
1703  }
1704  }
1705  e = e->add_cast(cd->columnType);
1706  tlist.emplace_back(new Analyzer::TargetEntry("", e, false));
1707  ++it;
1708 
1709  const auto& col_ti = cd->columnType;
1710  if (col_ti.get_physical_cols() > 0) {
1711  CHECK(cd->columnType.is_geometry());
1712  auto c = dynamic_cast<const Analyzer::Constant*>(e.get());
1713  if (!c) {
1714  auto uoper = std::dynamic_pointer_cast<Analyzer::UOper>(e);
1715  if (uoper && uoper->get_optype() == kCAST) {
1716  c = dynamic_cast<const Analyzer::Constant*>(uoper->get_operand());
1717  }
1718  }
1719  bool is_null = false;
1720  std::string* geo_string{nullptr};
1721  if (c) {
1722  is_null = c->get_is_null();
1723  if (!is_null) {
1724  geo_string = c->get_constval().stringval;
1725  }
1726  }
1727  if (!is_null && !geo_string) {
1728  throw std::runtime_error("Expecting a WKT or WKB hex string for column " +
1729  cd->columnName);
1730  }
1731  std::vector<double> coords;
1732  std::vector<double> bounds;
1733  std::vector<int> ring_sizes;
1734  std::vector<int> poly_rings;
1735  int render_group =
1736  0; // @TODO simon.eves where to get render_group from in this context?!
1737  SQLTypeInfo import_ti{cd->columnType};
1738  if (!is_null) {
1740  *geo_string, import_ti, coords, bounds, ring_sizes, poly_rings)) {
1741  throw std::runtime_error("Cannot read geometry to insert into column " +
1742  cd->columnName);
1743  }
1744  if (coords.empty()) {
1745  // Importing from geo_string WKT resulted in empty coords: dealing with a NULL
1746  is_null = true;
1747  }
1748  if (cd->columnType.get_type() != import_ti.get_type()) {
1749  // allow POLYGON to be inserted into MULTIPOLYGON column
1750  if (!(import_ti.get_type() == SQLTypes::kPOLYGON &&
1752  throw std::runtime_error(
1753  "Imported geometry doesn't match the type of column " + cd->columnName);
1754  }
1755  }
1756  } else {
1757  // Special case for NULL POINT, push NULL representation to coords
1758  if (cd->columnType.get_type() == kPOINT) {
1759  if (!coords.empty()) {
1760  throw std::runtime_error("NULL POINT with unexpected coordinates in column " +
1761  cd->columnName);
1762  }
1763  coords.push_back(NULL_ARRAY_DOUBLE);
1764  coords.push_back(NULL_DOUBLE);
1765  }
1766  }
1767 
1768  // TODO: check if import SRID matches columns SRID, may need to transform before
1769  // inserting
1770 
1771  int nextColumnOffset = 1;
1772 
1773  const ColumnDescriptor* cd_coords = catalog.getMetadataForColumn(
1774  query.get_result_table_id(), cd->columnId + nextColumnOffset);
1775  CHECK(cd_coords);
1776  CHECK_EQ(cd_coords->columnType.get_type(), kARRAY);
1777  CHECK_EQ(cd_coords->columnType.get_subtype(), kTINYINT);
1778  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
1779  if (!is_null || cd->columnType.get_type() == kPOINT) {
1780  auto compressed_coords = Geospatial::compress_coords(coords, col_ti);
1781  for (auto cc : compressed_coords) {
1782  Datum d;
1783  d.tinyintval = cc;
1784  auto e = makeExpr<Analyzer::Constant>(kTINYINT, false, d);
1785  value_exprs.push_back(e);
1786  }
1787  }
1788  tlist.emplace_back(new Analyzer::TargetEntry(
1789  "",
1790  makeExpr<Analyzer::Constant>(cd_coords->columnType, is_null, value_exprs),
1791  false));
1792  ++it;
1793  nextColumnOffset++;
1794 
1795  if (cd->columnType.get_type() == kPOLYGON ||
1796  cd->columnType.get_type() == kMULTIPOLYGON) {
1797  // Put ring sizes array into separate physical column
1798  const ColumnDescriptor* cd_ring_sizes = catalog.getMetadataForColumn(
1799  query.get_result_table_id(), cd->columnId + nextColumnOffset);
1800  CHECK(cd_ring_sizes);
1801  CHECK_EQ(cd_ring_sizes->columnType.get_type(), kARRAY);
1802  CHECK_EQ(cd_ring_sizes->columnType.get_subtype(), kINT);
1803  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
1804  if (!is_null) {
1805  for (auto c : ring_sizes) {
1806  Datum d;
1807  d.intval = c;
1808  auto e = makeExpr<Analyzer::Constant>(kINT, false, d);
1809  value_exprs.push_back(e);
1810  }
1811  }
1812  tlist.emplace_back(new Analyzer::TargetEntry(
1813  "",
1814  makeExpr<Analyzer::Constant>(cd_ring_sizes->columnType, is_null, value_exprs),
1815  false));
1816  ++it;
1817  nextColumnOffset++;
1818 
1819  if (cd->columnType.get_type() == kMULTIPOLYGON) {
1820  // Put poly_rings array into separate physical column
1821  const ColumnDescriptor* cd_poly_rings = catalog.getMetadataForColumn(
1822  query.get_result_table_id(), cd->columnId + nextColumnOffset);
1823  CHECK(cd_poly_rings);
1824  CHECK_EQ(cd_poly_rings->columnType.get_type(), kARRAY);
1825  CHECK_EQ(cd_poly_rings->columnType.get_subtype(), kINT);
1826  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
1827  if (!is_null) {
1828  for (auto c : poly_rings) {
1829  Datum d;
1830  d.intval = c;
1831  auto e = makeExpr<Analyzer::Constant>(kINT, false, d);
1832  value_exprs.push_back(e);
1833  }
1834  }
1835  tlist.emplace_back(new Analyzer::TargetEntry(
1836  "",
1837  makeExpr<Analyzer::Constant>(
1838  cd_poly_rings->columnType, is_null, value_exprs),
1839  false));
1840  ++it;
1841  nextColumnOffset++;
1842  }
1843  }
1844 
1845  if (cd->columnType.get_type() == kLINESTRING ||
1846  cd->columnType.get_type() == kPOLYGON ||
1847  cd->columnType.get_type() == kMULTIPOLYGON) {
1848  const ColumnDescriptor* cd_bounds = catalog.getMetadataForColumn(
1849  query.get_result_table_id(), cd->columnId + nextColumnOffset);
1850  CHECK(cd_bounds);
1851  CHECK_EQ(cd_bounds->columnType.get_type(), kARRAY);
1852  CHECK_EQ(cd_bounds->columnType.get_subtype(), kDOUBLE);
1853  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
1854  if (!is_null) {
1855  for (auto b : bounds) {
1856  Datum d;
1857  d.doubleval = b;
1858  auto e = makeExpr<Analyzer::Constant>(kDOUBLE, false, d);
1859  value_exprs.push_back(e);
1860  }
1861  }
1862  tlist.emplace_back(new Analyzer::TargetEntry(
1863  "",
1864  makeExpr<Analyzer::Constant>(cd_bounds->columnType, is_null, value_exprs),
1865  false));
1866  ++it;
1867  nextColumnOffset++;
1868  }
1869 
1870  if (cd->columnType.get_type() == kPOLYGON ||
1871  cd->columnType.get_type() == kMULTIPOLYGON) {
1872  // Put render group into separate physical column
1873  const ColumnDescriptor* cd_render_group = catalog.getMetadataForColumn(
1874  query.get_result_table_id(), cd->columnId + nextColumnOffset);
1875  CHECK(cd_render_group);
1876  CHECK_EQ(cd_render_group->columnType.get_type(), kINT);
1877  Datum d;
1878  d.intval = render_group;
1879  tlist.emplace_back(new Analyzer::TargetEntry(
1880  "",
1881  makeExpr<Analyzer::Constant>(cd_render_group->columnType, is_null, d),
1882  false));
1883  ++it;
1884  nextColumnOffset++;
1885  }
1886  }
1887  }
1888 }
1889 
1890 void InsertValuesStmt::execute(const Catalog_Namespace::SessionInfo& session) {
1891  auto& catalog = session.getCatalog();
1892 
1893  if (!session.checkDBAccessPrivileges(
1895  throw std::runtime_error("User has no insert privileges on " + *table + ".");
1896  }
1897 
1898  auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
1901 
1902  Analyzer::Query query;
1903  analyze(catalog, query);
1904 
1905  // Acquire schema write lock -- leave data lock so the fragmenter can checkpoint. For
1906  // singleton inserts we just take a write lock on the schema, which prevents concurrent
1907  // inserts.
1908  auto result_table_id = query.get_result_table_id();
1909  const auto td_with_lock =
1911  catalog, result_table_id);
1912  // NOTE(max): we do the same checks as below just a few calls earlier in analyze().
1913  // Do we keep those intentionally to make sure nothing changed in between w/o
1914  // catalog locks or is it just a duplicate work?
1915  auto td = td_with_lock();
1916  CHECK(td);
1917  if (td->isView) {
1918  throw std::runtime_error("Singleton inserts on views is not supported.");
1919  }
1921 
1923  RelAlgExecutor ra_executor(executor.get(), catalog);
1924 
1925  ra_executor.executeSimpleInsert(query);
1926 }
1927 
1928 void UpdateStmt::analyze(const Catalog_Namespace::Catalog& catalog,
1929  Analyzer::Query& query) const {
1930  throw std::runtime_error("UPDATE statement not supported yet.");
1931 }
1932 
1933 void DeleteStmt::analyze(const Catalog_Namespace::Catalog& catalog,
1934  Analyzer::Query& query) const {
1935  throw std::runtime_error("DELETE statement not supported yet.");
1936 }
1937 
1938 namespace {
1939 
1941  const auto& col_ti = cd.columnType;
1942  if (col_ti.is_integer() ||
1943  (col_ti.is_string() && col_ti.get_compression() == kENCODING_DICT) ||
1944  col_ti.is_time()) {
1945  return;
1946  }
1947  throw std::runtime_error("Cannot shard on type " + col_ti.get_type_name() +
1948  ", encoding " + col_ti.get_compression_name());
1949 }
1950 
1951 size_t shard_column_index(const std::string& name,
1952  const std::list<ColumnDescriptor>& columns) {
1953  size_t index = 1;
1954  for (const auto& cd : columns) {
1955  if (cd.columnName == name) {
1957  return index;
1958  }
1959  ++index;
1960  if (cd.columnType.is_geometry()) {
1961  index += cd.columnType.get_physical_cols();
1962  }
1963  }
1964  // Not found, return 0
1965  return 0;
1966 }
1967 
1968 size_t sort_column_index(const std::string& name,
1969  const std::list<ColumnDescriptor>& columns) {
1970  size_t index = 1;
1971  for (const auto& cd : columns) {
1972  if (boost::to_upper_copy<std::string>(cd.columnName) == name) {
1973  return index;
1974  }
1975  ++index;
1976  if (cd.columnType.is_geometry()) {
1977  index += cd.columnType.get_physical_cols();
1978  }
1979  }
1980  // Not found, return 0
1981  return 0;
1982 }
1983 
1984 void set_string_field(rapidjson::Value& obj,
1985  const std::string& field_name,
1986  const std::string& field_value,
1987  rapidjson::Document& document) {
1988  rapidjson::Value field_name_json_str;
1989  field_name_json_str.SetString(
1990  field_name.c_str(), field_name.size(), document.GetAllocator());
1991  rapidjson::Value field_value_json_str;
1992  field_value_json_str.SetString(
1993  field_value.c_str(), field_value.size(), document.GetAllocator());
1994  obj.AddMember(field_name_json_str, field_value_json_str, document.GetAllocator());
1995 }
1996 
1998  const ShardKeyDef* shard_key_def,
1999  const std::vector<SharedDictionaryDef>& shared_dict_defs) {
2000  rapidjson::Document document;
2001  auto& allocator = document.GetAllocator();
2002  rapidjson::Value arr(rapidjson::kArrayType);
2003  if (shard_key_def) {
2004  rapidjson::Value shard_key_obj(rapidjson::kObjectType);
2005  set_string_field(shard_key_obj, "type", "SHARD KEY", document);
2006  set_string_field(shard_key_obj, "name", shard_key_def->get_column(), document);
2007  arr.PushBack(shard_key_obj, allocator);
2008  }
2009  for (const auto& shared_dict_def : shared_dict_defs) {
2010  rapidjson::Value shared_dict_obj(rapidjson::kObjectType);
2011  set_string_field(shared_dict_obj, "type", "SHARED DICTIONARY", document);
2012  set_string_field(shared_dict_obj, "name", shared_dict_def.get_column(), document);
2014  shared_dict_obj, "foreign_table", shared_dict_def.get_foreign_table(), document);
2015  set_string_field(shared_dict_obj,
2016  "foreign_column",
2017  shared_dict_def.get_foreign_column(),
2018  document);
2019  arr.PushBack(shared_dict_obj, allocator);
2020  }
2021  rapidjson::StringBuffer buffer;
2022  rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
2023  arr.Accept(writer);
2024  return buffer.GetString();
2025 }
2026 
2027 template <typename LITERAL_TYPE,
2028  typename ASSIGNMENT,
2031  ASSIGNMENT op,
2032  VALIDATE validate = VALIDATE()) {
2033  const auto val = validate(p);
2034  return op(val);
2035 }
2036 
2039  const std::list<ColumnDescriptor>& columns) {
2040  auto assignment = [&td](const auto val) { td.storageType = val; };
2041  return get_property_value<StringLiteral, decltype(assignment), CaseSensitiveValidate>(
2042  p, assignment);
2043 }
2044 
2047  const std::list<ColumnDescriptor>& columns) {
2048  return get_property_value<IntLiteral>(p,
2049  [&td](const auto val) { td.maxFragRows = val; });
2050 }
2051 
2054  const std::list<ColumnDescriptor>& columns) {
2055  return get_property_value<IntLiteral>(
2056  p, [&df_td](const auto val) { df_td.maxFragRows = val; });
2057 }
2058 
2061  const std::list<ColumnDescriptor>& columns) {
2062  return get_property_value<IntLiteral>(p,
2063  [&td](const auto val) { td.maxChunkSize = val; });
2064 }
2065 
2067  DataframeTableDescriptor& df_td,
2069  const std::list<ColumnDescriptor>& columns) {
2070  return get_property_value<IntLiteral>(
2071  p, [&df_td](const auto val) { df_td.maxChunkSize = val; });
2072 }
2073 
2076  const std::list<ColumnDescriptor>& columns) {
2077  return get_property_value<StringLiteral>(p, [&df_td](const auto val) {
2078  if (val.size() != 1) {
2079  throw std::runtime_error("Length of DELIMITER must be equal to 1.");
2080  }
2081  df_td.delimiter = val;
2082  });
2083 }
2084 
2087  const std::list<ColumnDescriptor>& columns) {
2088  return get_property_value<StringLiteral>(p, [&df_td](const auto val) {
2089  if (val == "FALSE") {
2090  df_td.hasHeader = false;
2091  } else if (val == "TRUE") {
2092  df_td.hasHeader = true;
2093  } else {
2094  throw std::runtime_error("Option HEADER support only 'true' or 'false' values.");
2095  }
2096  });
2097 }
2098 
2101  const std::list<ColumnDescriptor>& columns) {
2102  return get_property_value<IntLiteral>(p,
2103  [&td](const auto val) { td.fragPageSize = val; });
2104 }
2107  const std::list<ColumnDescriptor>& columns) {
2108  return get_property_value<IntLiteral>(p, [&td](const auto val) { td.maxRows = val; });
2109 }
2110 
2113  const std::list<ColumnDescriptor>& columns) {
2114  return get_property_value<IntLiteral>(
2115  p, [&df_td](const auto val) { df_td.skipRows = val; });
2116 }
2117 
2120  const std::list<ColumnDescriptor>& columns) {
2121  return get_property_value<StringLiteral>(p, [&td](const auto partitions_uc) {
2122  if (partitions_uc != "SHARDED" && partitions_uc != "REPLICATED") {
2123  throw std::runtime_error("PARTITIONS must be SHARDED or REPLICATED");
2124  }
2125  if (td.shardedColumnId != 0 && partitions_uc == "REPLICATED") {
2126  throw std::runtime_error(
2127  "A table cannot be sharded and replicated at the same time");
2128  };
2129  td.partitions = partitions_uc;
2130  });
2131 }
2134  const std::list<ColumnDescriptor>& columns) {
2135  if (!td.shardedColumnId) {
2136  throw std::runtime_error("SHARD KEY must be defined.");
2137  }
2138  return get_property_value<IntLiteral>(p, [&td](const auto shard_count) {
2139  if (g_leaf_count && shard_count % g_leaf_count) {
2140  throw std::runtime_error(
2141  "SHARD_COUNT must be a multiple of the number of leaves in the cluster.");
2142  }
2143  td.nShards = g_leaf_count ? shard_count / g_leaf_count : shard_count;
2144  if (!td.shardedColumnId && !td.nShards) {
2145  throw std::runtime_error(
2146  "Must specify the number of shards through the SHARD_COUNT option");
2147  };
2148  });
2149 }
2150 
2151 decltype(auto) get_vacuum_def(TableDescriptor& td,
2153  const std::list<ColumnDescriptor>& columns) {
2154  return get_property_value<StringLiteral>(p, [&td](const auto vacuum_uc) {
2155  if (vacuum_uc != "IMMEDIATE" && vacuum_uc != "DELAYED") {
2156  throw std::runtime_error("VACUUM must be IMMEDIATE or DELAYED");
2157  }
2158  td.hasDeletedCol = boost::iequals(vacuum_uc, "IMMEDIATE") ? false : true;
2159  });
2160 }
2161 
2164  const std::list<ColumnDescriptor>& columns) {
2165  return get_property_value<StringLiteral>(p, [&td, &columns](const auto sort_upper) {
2166  td.sortedColumnId = sort_column_index(sort_upper, columns);
2167  if (!td.sortedColumnId) {
2168  throw std::runtime_error("Specified sort column " + sort_upper + " doesn't exist");
2169  }
2170  });
2171 }
2172 
2175  const std::list<ColumnDescriptor>& columns) {
2176  auto assignment = [&td](const auto val) {
2177  td.maxRollbackEpochs =
2178  val < 0 ? -1 : val; // Anything < 0 means unlimited rollbacks. Note that 0
2179  // still means keeping a shadow copy of data/metdata
2180  // between epochs so bad writes can be rolled back
2181  };
2182  return get_property_value<IntLiteral, decltype(assignment), PositiveOrZeroValidate>(
2183  p, assignment);
2184 }
2185 
2186 static const std::map<const std::string, const TableDefFuncPtr> tableDefFuncMap = {
2187  {"fragment_size"s, get_frag_size_def},
2188  {"max_chunk_size"s, get_max_chunk_size_def},
2189  {"page_size"s, get_page_size_def},
2190  {"max_rows"s, get_max_rows_def},
2191  {"partitions"s, get_partions_def},
2192  {"shard_count"s, get_shard_count_def},
2193  {"vacuum"s, get_vacuum_def},
2194  {"sort_column"s, get_sort_column_def},
2195  {"storage_type"s, get_storage_type},
2196  {"max_rollback_epochs", get_max_rollback_epochs_def}};
2197 
2199  const std::unique_ptr<NameValueAssign>& p,
2200  const std::list<ColumnDescriptor>& columns) {
2201  const auto it = tableDefFuncMap.find(boost::to_lower_copy<std::string>(*p->get_name()));
2202  if (it == tableDefFuncMap.end()) {
2203  throw std::runtime_error(
2204  "Invalid CREATE TABLE option " + *p->get_name() +
2205  ". Should be FRAGMENT_SIZE, MAX_CHUNK_SIZE, PAGE_SIZE, MAX_ROLLBACK_EPOCHS, "
2206  "MAX_ROWS, "
2207  "PARTITIONS, SHARD_COUNT, VACUUM, SORT_COLUMN, STORAGE_TYPE.");
2208  }
2209  return it->second(td, p.get(), columns);
2210 }
2211 
2213  const std::unique_ptr<NameValueAssign>& p,
2214  const std::list<ColumnDescriptor>& columns) {
2215  const auto it = tableDefFuncMap.find(boost::to_lower_copy<std::string>(*p->get_name()));
2216  if (it == tableDefFuncMap.end()) {
2217  throw std::runtime_error(
2218  "Invalid CREATE TABLE AS option " + *p->get_name() +
2219  ". Should be FRAGMENT_SIZE, MAX_CHUNK_SIZE, PAGE_SIZE, MAX_ROLLBACK_EPOCHS, "
2220  "MAX_ROWS, "
2221  "PARTITIONS, SHARD_COUNT, VACUUM, SORT_COLUMN, STORAGE_TYPE or "
2222  "USE_SHARED_DICTIONARIES.");
2223  }
2224  return it->second(td, p.get(), columns);
2225 }
2226 
2227 static const std::map<const std::string, const DataframeDefFuncPtr> dataframeDefFuncMap =
2228  {{"fragment_size"s, get_frag_size_dataframe_def},
2229  {"max_chunk_size"s, get_max_chunk_size_dataframe_def},
2230  {"skip_rows"s, get_skip_rows_def},
2231  {"delimiter"s, get_delimiter_def},
2232  {"header"s, get_header_def}};
2233 
2235  const std::unique_ptr<NameValueAssign>& p,
2236  const std::list<ColumnDescriptor>& columns) {
2237  const auto it =
2238  dataframeDefFuncMap.find(boost::to_lower_copy<std::string>(*p->get_name()));
2239  if (it == dataframeDefFuncMap.end()) {
2240  throw std::runtime_error(
2241  "Invalid CREATE DATAFRAME option " + *p->get_name() +
2242  ". Should be FRAGMENT_SIZE, MAX_CHUNK_SIZE, SKIP_ROWS, DELIMITER or HEADER.");
2243  }
2244  return it->second(df_td, p.get(), columns);
2245 }
2246 
2247 } // namespace
2248 
2249 CreateTableStmt::CreateTableStmt(const rapidjson::Value& payload) {
2250  CHECK(payload.HasMember("name"));
2251  table_ = std::make_unique<std::string>(json_str(payload["name"]));
2252  CHECK(payload.HasMember("elements"));
2253  CHECK(payload["elements"].IsArray());
2254 
2255  // TODO: support temporary tables
2256  is_temporary_ = false;
2257 
2258  if_not_exists_ = false;
2259  if (payload.HasMember("ifNotExists")) {
2260  if_not_exists_ = json_bool(payload["ifNotExists"]);
2261  }
2262 
2263  const auto elements = payload["elements"].GetArray();
2264  for (const auto& element : elements) {
2265  CHECK(element.IsObject());
2266  CHECK(element.HasMember("type"));
2267  if (json_str(element["type"]) == "SQL_COLUMN_DECLARATION") {
2268  CHECK(element.HasMember("name"));
2269  auto col_name = std::make_unique<std::string>(json_str(element["name"]));
2270  CHECK(element.HasMember("sqltype"));
2271  const auto sql_types = to_sql_type(json_str(element["sqltype"]));
2272 
2273  // decimal / numeric precision / scale
2274  int precision = -1;
2275  int scale = -1;
2276  if (element.HasMember("precision")) {
2277  precision = json_i64(element["precision"]);
2278  }
2279  if (element.HasMember("scale")) {
2280  scale = json_i64(element["scale"]);
2281  }
2282 
2283  std::optional<int64_t> array_size;
2284  if (element.HasMember("arraySize")) {
2285  // We do not yet support geo arrays
2286  array_size = json_i64(element["arraySize"]);
2287  }
2288  std::unique_ptr<SQLType> sql_type;
2289  if (element.HasMember("subtype")) {
2290  CHECK(element.HasMember("coordinateSystem"));
2291  const auto subtype_sql_types = to_sql_type(json_str(element["subtype"]));
2292  sql_type = std::make_unique<SQLType>(
2293  subtype_sql_types,
2294  static_cast<int>(sql_types),
2295  static_cast<int>(json_i64(element["coordinateSystem"])),
2296  false);
2297  } else if (precision > 0 && scale > 0) {
2298  sql_type = std::make_unique<SQLType>(sql_types,
2299  precision,
2300  scale,
2301  /*is_array=*/array_size.has_value(),
2302  array_size ? *array_size : -1);
2303  } else if (precision > 0) {
2304  sql_type = std::make_unique<SQLType>(sql_types,
2305  precision,
2306  0,
2307  /*is_array=*/array_size.has_value(),
2308  array_size ? *array_size : -1);
2309  } else {
2310  sql_type = std::make_unique<SQLType>(sql_types,
2311  /*is_array=*/array_size.has_value(),
2312  array_size ? *array_size : -1);
2313  }
2314  CHECK(sql_type);
2315 
2316  CHECK(element.HasMember("nullable"));
2317  const auto nullable = json_bool(element["nullable"]);
2318  std::unique_ptr<ColumnConstraintDef> constraint_def;
2319  if (!nullable) {
2320  constraint_def = std::make_unique<ColumnConstraintDef>(/*notnull=*/true,
2321  /*unique=*/false,
2322  /*primarykey=*/false,
2323  /*defaultval=*/nullptr);
2324  }
2325  std::unique_ptr<CompressDef> compress_def;
2326  if (element.HasMember("encodingType") && !element["encodingType"].IsNull()) {
2327  std::string encoding_type = json_str(element["encodingType"]);
2328  CHECK(element.HasMember("encodingSize"));
2329  auto encoding_name =
2330  std::make_unique<std::string>(json_str(element["encodingType"]));
2331  compress_def = std::make_unique<CompressDef>(encoding_name.release(),
2332  json_i64(element["encodingSize"]));
2333  }
2334  auto col_def = std::make_unique<ColumnDef>(
2335  col_name.release(),
2336  sql_type.release(),
2337  compress_def ? compress_def.release() : nullptr,
2338  constraint_def ? constraint_def.release() : nullptr);
2339  table_element_list_.emplace_back(std::move(col_def));
2340  } else if (json_str(element["type"]) == "SQL_COLUMN_CONSTRAINT") {
2341  CHECK(element.HasMember("name"));
2342  if (json_str(element["name"]) == "SHARD_KEY") {
2343  CHECK(element.HasMember("columns"));
2344  CHECK(element["columns"].IsArray());
2345  const auto& columns = element["columns"].GetArray();
2346  if (columns.Size() != size_t(1)) {
2347  throw std::runtime_error("Only one shard column is currently supported.");
2348  }
2349  auto shard_key_def = std::make_unique<ShardKeyDef>(json_str(columns[0]));
2350  table_element_list_.emplace_back(std::move(shard_key_def));
2351  } else if (json_str(element["name"]) == "SHARED_DICT") {
2352  CHECK(element.HasMember("columns"));
2353  CHECK(element["columns"].IsArray());
2354  const auto& columns = element["columns"].GetArray();
2355  if (columns.Size() != size_t(1)) {
2356  throw std::runtime_error(
2357  R"(Only one column per shared dictionary entry is currently supported. Use multiple SHARED DICT statements to share dictionaries from multiple columns.)");
2358  }
2359  CHECK(element.HasMember("references") && element["references"].IsObject());
2360  const auto& references = element["references"].GetObject();
2361  std::string references_table_name;
2362  if (references.HasMember("table")) {
2363  references_table_name = json_str(references["table"]);
2364  } else {
2365  references_table_name = *table_;
2366  }
2367  CHECK(references.HasMember("column"));
2368 
2369  auto shared_dict_def = std::make_unique<SharedDictionaryDef>(
2370  json_str(columns[0]), references_table_name, json_str(references["column"]));
2371  table_element_list_.emplace_back(std::move(shared_dict_def));
2372 
2373  } else {
2374  LOG(FATAL) << "Unsupported type for SQL_COLUMN_CONSTRAINT: "
2375  << json_str(element["name"]);
2376  }
2377  } else {
2378  LOG(FATAL) << "Unsupported element type for CREATE TABLE: "
2379  << element["type"].GetString();
2380  }
2381  }
2382 
2383  CHECK(payload.HasMember("options"));
2384  if (payload["options"].IsObject()) {
2385  for (const auto& option : payload["options"].GetObject()) {
2386  auto option_name = std::make_unique<std::string>(json_str(option.name));
2387  std::unique_ptr<Literal> literal_value;
2388  if (option.value.IsString()) {
2389  auto literal_string = std::make_unique<std::string>(json_str(option.value));
2390  literal_value = std::make_unique<StringLiteral>(literal_string.release());
2391  } else if (option.value.IsInt() || option.value.IsInt64()) {
2392  literal_value = std::make_unique<IntLiteral>(json_i64(option.value));
2393  } else if (option.value.IsNull()) {
2394  literal_value = std::make_unique<NullLiteral>();
2395  } else {
2396  throw std::runtime_error("Unable to handle literal for " + *option_name);
2397  }
2398  CHECK(literal_value);
2399 
2400  storage_options_.emplace_back(std::make_unique<NameValueAssign>(
2401  option_name.release(), literal_value.release()));
2402  }
2403  } else {
2404  CHECK(payload["options"].IsNull());
2405  }
2406 }
2407 
2408 void CreateTableStmt::executeDryRun(const Catalog_Namespace::SessionInfo& session,
2409  TableDescriptor& td,
2410  std::list<ColumnDescriptor>& columns,
2411  std::vector<SharedDictionaryDef>& shared_dict_defs) {
2412  std::unordered_set<std::string> uc_col_names;
2413  const auto& catalog = session.getCatalog();
2414  const ShardKeyDef* shard_key_def{nullptr};
2415  for (auto& e : table_element_list_) {
2416  if (dynamic_cast<SharedDictionaryDef*>(e.get())) {
2417  auto shared_dict_def = static_cast<SharedDictionaryDef*>(e.get());
2419  this, shared_dict_def, columns, shared_dict_defs, catalog);
2420  shared_dict_defs.push_back(*shared_dict_def);
2421  continue;
2422  }
2423  if (dynamic_cast<ShardKeyDef*>(e.get())) {
2424  if (shard_key_def) {
2425  throw std::runtime_error("Specified more than one shard key");
2426  }
2427  shard_key_def = static_cast<const ShardKeyDef*>(e.get());
2428  continue;
2429  }
2430  if (!dynamic_cast<ColumnDef*>(e.get())) {
2431  throw std::runtime_error("Table constraints are not supported yet.");
2432  }
2433  ColumnDef* coldef = static_cast<ColumnDef*>(e.get());
2434  ColumnDescriptor cd;
2435  cd.columnName = *coldef->get_column_name();
2437  setColumnDescriptor(cd, coldef);
2438  columns.push_back(cd);
2439  }
2440 
2441  ddl_utils::set_default_table_attributes(*table_, td, columns.size());
2442 
2443  if (shard_key_def) {
2444  td.shardedColumnId = shard_column_index(shard_key_def->get_column(), columns);
2445  if (!td.shardedColumnId) {
2446  throw std::runtime_error("Specified shard column " + shard_key_def->get_column() +
2447  " doesn't exist");
2448  }
2449  }
2450  if (is_temporary_) {
2452  } else {
2454  }
2455  if (!storage_options_.empty()) {
2456  for (auto& p : storage_options_) {
2457  get_table_definitions(td, p, columns);
2458  }
2459  }
2460  if (td.shardedColumnId && !td.nShards) {
2461  throw std::runtime_error("SHARD_COUNT needs to be specified with SHARD_KEY.");
2462  }
2463  td.keyMetainfo = serialize_key_metainfo(shard_key_def, shared_dict_defs);
2464 }
2465 
2466 void CreateTableStmt::execute(const Catalog_Namespace::SessionInfo& session) {
2467  auto& catalog = session.getCatalog();
2468 
2469  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
2472 
2473  // check access privileges
2476  throw std::runtime_error("Table " + *table_ +
2477  " will not be created. User has no create privileges.");
2478  }
2479 
2480  if (!catalog.validateNonExistentTableOrView(*table_, if_not_exists_)) {
2481  return;
2482  }
2483 
2484  TableDescriptor td;
2485  std::list<ColumnDescriptor> columns;
2486  std::vector<SharedDictionaryDef> shared_dict_defs;
2487 
2488  executeDryRun(session, td, columns, shared_dict_defs);
2489  td.userId = session.get_currentUser().userId;
2490 
2491  catalog.createShardedTable(td, columns, shared_dict_defs);
2492  // TODO (max): It's transactionally unsafe, should be fixed: we may create object w/o
2493  // privileges
2494  SysCatalog::instance().createDBObject(
2495  session.get_currentUser(), td.tableName, TableDBObjectType, catalog);
2496 }
2497 
2498 void CreateDataframeStmt::execute(const Catalog_Namespace::SessionInfo& session) {
2499  auto& catalog = session.getCatalog();
2500 
2501  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
2504 
2505  // check access privileges
2508  throw std::runtime_error("Table " + *table_ +
2509  " will not be created. User has no create privileges.");
2510  }
2511 
2512  if (catalog.getMetadataForTable(*table_) != nullptr) {
2513  throw std::runtime_error("Table " + *table_ + " already exists.");
2514  }
2516  std::list<ColumnDescriptor> columns;
2517  std::vector<SharedDictionaryDef> shared_dict_defs;
2518 
2519  std::unordered_set<std::string> uc_col_names;
2520  for (auto& e : table_element_list_) {
2521  if (dynamic_cast<SharedDictionaryDef*>(e.get())) {
2522  auto shared_dict_def = static_cast<SharedDictionaryDef*>(e.get());
2524  this, shared_dict_def, columns, shared_dict_defs, catalog);
2525  shared_dict_defs.push_back(*shared_dict_def);
2526  continue;
2527  }
2528  if (!dynamic_cast<ColumnDef*>(e.get())) {
2529  throw std::runtime_error("Table constraints are not supported yet.");
2530  }
2531  ColumnDef* coldef = static_cast<ColumnDef*>(e.get());
2532  ColumnDescriptor cd;
2533  cd.columnName = *coldef->get_column_name();
2534  const auto uc_col_name = boost::to_upper_copy<std::string>(cd.columnName);
2535  const auto it_ok = uc_col_names.insert(uc_col_name);
2536  if (!it_ok.second) {
2537  throw std::runtime_error("Column '" + cd.columnName + "' defined more than once");
2538  }
2539 
2540  setColumnDescriptor(cd, coldef);
2541  columns.push_back(cd);
2542  }
2543 
2544  df_td.tableName = *table_;
2545  df_td.nColumns = columns.size();
2546  df_td.isView = false;
2547  df_td.fragmenter = nullptr;
2552  df_td.maxRows = DEFAULT_MAX_ROWS;
2554  if (!storage_options_.empty()) {
2555  for (auto& p : storage_options_) {
2556  get_dataframe_definitions(df_td, p, columns);
2557  }
2558  }
2559  df_td.keyMetainfo = serialize_key_metainfo(nullptr, shared_dict_defs);
2560  df_td.userId = session.get_currentUser().userId;
2561  df_td.storageType = *filename_;
2562 
2563  catalog.createShardedTable(df_td, columns, shared_dict_defs);
2564  // TODO (max): It's transactionally unsafe, should be fixed: we may create object w/o
2565  // privileges
2566  SysCatalog::instance().createDBObject(
2567  session.get_currentUser(), df_td.tableName, TableDBObjectType, catalog);
2568 }
2569 
2570 std::shared_ptr<ResultSet> getResultSet(QueryStateProxy query_state_proxy,
2571  const std::string select_stmt,
2572  std::vector<TargetMetaInfo>& targets,
2573  bool validate_only = false,
2574  std::vector<size_t> outer_fragment_indices = {},
2575  bool allow_interrupt = false) {
2576  auto const session = query_state_proxy.getQueryState().getConstSessionInfo();
2577  auto& catalog = session->getCatalog();
2578 
2580 #ifdef HAVE_CUDA
2581  const auto device_type = session->get_executor_device_type();
2582 #else
2583  const auto device_type = ExecutorDeviceType::CPU;
2584 #endif // HAVE_CUDA
2585  auto calcite_mgr = catalog.getCalciteMgr();
2586 
2587  // TODO MAT this should actually get the global or the session parameter for
2588  // view optimization
2589  const auto query_ra =
2590  calcite_mgr
2591  ->process(query_state_proxy, pg_shim(select_stmt), {}, true, false, false, true)
2592  .plan_result;
2593  RelAlgExecutor ra_executor(executor.get(),
2594  catalog,
2595  query_ra,
2596  query_state_proxy.getQueryState().shared_from_this());
2598  const auto& query_hints = ra_executor.getParsedQueryHints();
2599  const auto cpu_mode_enabled = query_hints.isHintRegistered(QueryHint::kCpuMode);
2600  if (cpu_mode_enabled) {
2601  co.device_type = ExecutorDeviceType::CPU;
2602  }
2604  // TODO(adb): Need a better method of dropping constants into this ExecutionOptions
2605  // struct
2606  ExecutionOptions eo = {false,
2607  true,
2608  false,
2609  true,
2610  false,
2611  false,
2612  validate_only,
2613  false,
2614  10000,
2615  false,
2616  false,
2617  1000,
2618  allow_interrupt,
2622  outer_fragment_indices};
2623  ExecutionResult result{std::make_shared<ResultSet>(std::vector<TargetInfo>{},
2626  nullptr,
2627  nullptr,
2628  0,
2629  0),
2630  {}};
2631  result = ra_executor.executeRelAlgQuery(co, eo, false, nullptr);
2632  targets = result.getTargetsMeta();
2633 
2634  return result.getRows();
2635 }
2636 
2637 size_t LocalConnector::getOuterFragmentCount(QueryStateProxy query_state_proxy,
2638  std::string& sql_query_string) {
2639  auto const session = query_state_proxy.getQueryState().getConstSessionInfo();
2640  auto& catalog = session->getCatalog();
2641 
2643 #ifdef HAVE_CUDA
2644  const auto device_type = session->get_executor_device_type();
2645 #else
2646  const auto device_type = ExecutorDeviceType::CPU;
2647 #endif // HAVE_CUDA
2648  auto calcite_mgr = catalog.getCalciteMgr();
2649 
2650  // TODO MAT this should actually get the global or the session parameter for
2651  // view optimization
2652  const auto query_ra =
2653  calcite_mgr
2654  ->process(
2655  query_state_proxy, pg_shim(sql_query_string), {}, true, false, false, true)
2656  .plan_result;
2657  RelAlgExecutor ra_executor(executor.get(), catalog, query_ra);
2658  const auto& query_hints = ra_executor.getParsedQueryHints();
2659  const bool cpu_mode_enabled = query_hints.isHintRegistered(QueryHint::kCpuMode);
2660  CompilationOptions co = {cpu_mode_enabled ? ExecutorDeviceType::CPU : device_type,
2661  true,
2663  false};
2664  // TODO(adb): Need a better method of dropping constants into this ExecutionOptions
2665  // struct
2666  ExecutionOptions eo = {false,
2667  true,
2668  false,
2669  true,
2670  false,
2671  false,
2672  false,
2673  false,
2674  10000,
2675  false,
2676  false,
2677  0.9,
2678  false};
2679  return ra_executor.getOuterFragmentCount(co, eo);
2680 }
2681 
2682 AggregatedResult LocalConnector::query(QueryStateProxy query_state_proxy,
2683  std::string& sql_query_string,
2684  std::vector<size_t> outer_frag_indices,
2685  bool validate_only,
2686  bool allow_interrupt) {
2687  // TODO(PS): Should we be using the shimmed query in getResultSet?
2688  std::string pg_shimmed_select_query = pg_shim(sql_query_string);
2689 
2690  std::vector<TargetMetaInfo> target_metainfos;
2692  auto const session = query_state_proxy.getQueryState().getConstSessionInfo();
2693  auto query_session = session ? session->get_session_id() : "";
2694  auto query_submitted_time = query_state_proxy.getQueryState().getQuerySubmittedTime();
2695  if (allow_interrupt && !validate_only && !query_session.empty()) {
2696  executor->enrollQuerySession(query_session,
2697  sql_query_string,
2698  query_submitted_time,
2700  QuerySessionStatus::QueryStatus::PENDING_EXECUTOR);
2701  }
2702  auto result_rows = getResultSet(query_state_proxy,
2703  sql_query_string,
2704  target_metainfos,
2705  validate_only,
2706  outer_frag_indices,
2707  allow_interrupt);
2708  AggregatedResult res = {result_rows, target_metainfos};
2709  return res;
2710 }
2711 
2712 std::vector<AggregatedResult> LocalConnector::query(
2713  QueryStateProxy query_state_proxy,
2714  std::string& sql_query_string,
2715  std::vector<size_t> outer_frag_indices,
2716  bool allow_interrupt) {
2717  auto res = query(
2718  query_state_proxy, sql_query_string, outer_frag_indices, false, allow_interrupt);
2719  return {res};
2720 }
2721 
2722 void LocalConnector::insertDataToLeaf(const Catalog_Namespace::SessionInfo& session,
2723  const size_t leaf_idx,
2724  Fragmenter_Namespace::InsertData& insert_data) {
2725  CHECK(leaf_idx == 0);
2726  auto& catalog = session.getCatalog();
2727  auto created_td = catalog.getMetadataForTable(insert_data.tableId);
2728  ChunkKey chunkKey = {catalog.getCurrentDB().dbId, created_td->tableId};
2729  // TODO(adb): Ensure that we have previously obtained a write lock for this table's
2730  // data.
2731  created_td->fragmenter->insertDataNoCheckpoint(insert_data);
2732 }
2733 
2734 void LocalConnector::checkpoint(const Catalog_Namespace::SessionInfo& session,
2735  int table_id) {
2736  auto& catalog = session.getCatalog();
2737  catalog.checkpointWithAutoRollback(table_id);
2738 }
2739 
2740 void LocalConnector::rollback(const Catalog_Namespace::SessionInfo& session,
2741  int table_id) {
2742  auto& catalog = session.getCatalog();
2743  auto db_id = catalog.getDatabaseId();
2744  auto table_epochs = catalog.getTableEpochs(db_id, table_id);
2745  catalog.setTableEpochs(db_id, table_epochs);
2746 }
2747 
2748 std::list<ColumnDescriptor> LocalConnector::getColumnDescriptors(AggregatedResult& result,
2749  bool for_create) {
2750  std::list<ColumnDescriptor> column_descriptors;
2751  std::list<ColumnDescriptor> column_descriptors_for_create;
2752 
2753  int rowid_suffix = 0;
2754  for (const auto& target_metainfo : result.targets_meta) {
2755  ColumnDescriptor cd;
2756  cd.columnName = target_metainfo.get_resname();
2757  if (cd.columnName == "rowid") {
2758  cd.columnName += std::to_string(rowid_suffix++);
2759  }
2760  cd.columnType = target_metainfo.get_physical_type_info();
2761 
2762  ColumnDescriptor cd_for_create = cd;
2763 
2765  // we need to reset the comp param (as this points to the actual dictionary)
2766  if (cd.columnType.is_array()) {
2767  // for dict encoded arrays, it is always 4 bytes
2768  cd_for_create.columnType.set_comp_param(32);
2769  } else {
2770  cd_for_create.columnType.set_comp_param(cd.columnType.get_size() * 8);
2771  }
2772  }
2773 
2774  if (cd.columnType.is_date() && !cd.columnType.is_date_in_days()) {
2775  // default to kENCODING_DATE_IN_DAYS encoding
2777  cd_for_create.columnType.set_comp_param(0);
2778  }
2779 
2780  column_descriptors_for_create.push_back(cd_for_create);
2781  column_descriptors.push_back(cd);
2782  }
2783 
2784  if (for_create) {
2785  return column_descriptors_for_create;
2786  }
2787 
2788  return column_descriptors;
2789 }
2790 
2791 void InsertIntoTableAsSelectStmt::populateData(QueryStateProxy query_state_proxy,
2792  const TableDescriptor* td,
2793  bool validate_table,
2794  bool for_CTAS) {
2795  auto const session = query_state_proxy.getQueryState().getConstSessionInfo();
2796  auto& catalog = session->getCatalog();
2798 
2799  LocalConnector local_connector;
2800  bool populate_table = false;
2801 
2802  if (leafs_connector_) {
2803  populate_table = true;
2804  } else {
2805  leafs_connector_ = &local_connector;
2806  if (!g_cluster) {
2807  populate_table = true;
2808  }
2809  }
2810 
2811  auto get_target_column_descriptors = [this, &catalog](const TableDescriptor* td) {
2812  std::vector<const ColumnDescriptor*> target_column_descriptors;
2813  if (column_list_.empty()) {
2814  auto list = catalog.getAllColumnMetadataForTable(td->tableId, false, false, false);
2815  target_column_descriptors = {std::begin(list), std::end(list)};
2816  } else {
2817  for (auto& c : column_list_) {
2818  const ColumnDescriptor* cd = catalog.getMetadataForColumn(td->tableId, *c);
2819  if (cd == nullptr) {
2820  throw std::runtime_error("Column " + *c + " does not exist.");
2821  }
2822  target_column_descriptors.push_back(cd);
2823  }
2824  }
2825 
2826  return target_column_descriptors;
2827  };
2828 
2829  bool is_temporary = table_is_temporary(td);
2830 
2831  if (validate_table) {
2832  // check access privileges
2833  if (!td) {
2834  throw std::runtime_error("Table " + table_name_ + " does not exist.");
2835  }
2836  if (td->isView) {
2837  throw std::runtime_error("Insert to views is not supported yet.");
2838  }
2839 
2840  if (!session->checkDBAccessPrivileges(DBObjectType::TableDBObjectType,
2842  table_name_)) {
2843  throw std::runtime_error("User has no insert privileges on " + table_name_ + ".");
2844  }
2845 
2846  // only validate the select query so we get the target types
2847  // correctly, but do not populate the result set
2848  auto result = local_connector.query(query_state_proxy, select_query_, {}, true, true);
2849  auto source_column_descriptors = local_connector.getColumnDescriptors(result, false);
2850 
2851  std::vector<const ColumnDescriptor*> target_column_descriptors =
2852  get_target_column_descriptors(td);
2853 
2854  if (source_column_descriptors.size() != target_column_descriptors.size()) {
2855  throw std::runtime_error("The number of source and target columns does not match.");
2856  }
2857 
2858  for (int i = 0; i < source_column_descriptors.size(); i++) {
2859  const ColumnDescriptor* source_cd =
2860  &(*std::next(source_column_descriptors.begin(), i));
2861  const ColumnDescriptor* target_cd = target_column_descriptors.at(i);
2862 
2863  if (source_cd->columnType.get_type() != target_cd->columnType.get_type()) {
2864  auto type_cannot_be_cast = [](const auto& col_type) {
2865  return (col_type.is_time() || col_type.is_geometry() || col_type.is_array() ||
2866  col_type.is_boolean());
2867  };
2868 
2869  if (type_cannot_be_cast(source_cd->columnType) ||
2870  type_cannot_be_cast(target_cd->columnType)) {
2871  throw std::runtime_error("Source '" + source_cd->columnName + " " +
2872  source_cd->columnType.get_type_name() +
2873  "' and target '" + target_cd->columnName + " " +
2874  target_cd->columnType.get_type_name() +
2875  "' column types do not match.");
2876  }
2877  }
2878  if (source_cd->columnType.is_array()) {
2879  if (source_cd->columnType.get_subtype() != target_cd->columnType.get_subtype()) {
2880  throw std::runtime_error("Source '" + source_cd->columnName + " " +
2881  source_cd->columnType.get_type_name() +
2882  "' and target '" + target_cd->columnName + " " +
2883  target_cd->columnType.get_type_name() +
2884  "' array column element types do not match.");
2885  }
2886  }
2887 
2888  if (source_cd->columnType.is_decimal() ||
2889  source_cd->columnType.get_elem_type().is_decimal()) {
2890  SQLTypeInfo sourceType = source_cd->columnType;
2891  SQLTypeInfo targetType = target_cd->columnType;
2892 
2893  if (source_cd->columnType.is_array()) {
2894  sourceType = source_cd->columnType.get_elem_type();
2895  targetType = target_cd->columnType.get_elem_type();
2896  }
2897 
2898  if (sourceType.get_scale() != targetType.get_scale()) {
2899  throw std::runtime_error("Source '" + source_cd->columnName + " " +
2900  source_cd->columnType.get_type_name() +
2901  "' and target '" + target_cd->columnName + " " +
2902  target_cd->columnType.get_type_name() +
2903  "' decimal columns scales do not match.");
2904  }
2905  }
2906 
2907  if (source_cd->columnType.is_string()) {
2908  if (!target_cd->columnType.is_string()) {
2909  throw std::runtime_error("Source '" + source_cd->columnName + " " +
2910  source_cd->columnType.get_type_name() +
2911  "' and target '" + target_cd->columnName + " " +
2912  target_cd->columnType.get_type_name() +
2913  "' column types do not match.");
2914  }
2915  if (source_cd->columnType.get_compression() !=
2916  target_cd->columnType.get_compression()) {
2917  throw std::runtime_error("Source '" + source_cd->columnName + " " +
2918  source_cd->columnType.get_type_name() +
2919  "' and target '" + target_cd->columnName + " " +
2920  target_cd->columnType.get_type_name() +
2921  "' columns string encodings do not match.");
2922  }
2923  }
2924 
2925  if (source_cd->columnType.is_timestamp() && target_cd->columnType.is_timestamp()) {
2926  if (source_cd->columnType.get_dimension() !=
2927  target_cd->columnType.get_dimension()) {
2928  throw std::runtime_error("Source '" + source_cd->columnName + " " +
2929  source_cd->columnType.get_type_name() +
2930  "' and target '" + target_cd->columnName + " " +
2931  target_cd->columnType.get_type_name() +
2932  "' timestamp column precisions do not match.");
2933  }
2934  }
2935 
2936  if (!source_cd->columnType.is_string() && !source_cd->columnType.is_geometry() &&
2937  !source_cd->columnType.is_integer() && !source_cd->columnType.is_decimal() &&
2938  !source_cd->columnType.is_date() && !source_cd->columnType.is_time() &&
2939  !source_cd->columnType.is_timestamp() &&
2940  source_cd->columnType.get_size() > target_cd->columnType.get_size()) {
2941  throw std::runtime_error("Source '" + source_cd->columnName + " " +
2942  source_cd->columnType.get_type_name() +
2943  "' and target '" + target_cd->columnName + " " +
2944  target_cd->columnType.get_type_name() +
2945  "' column encoding sizes do not match.");
2946  }
2947  }
2948  }
2949 
2950  if (!populate_table) {
2951  return;
2952  }
2953 
2954  int64_t total_row_count = 0;
2955  int64_t total_source_query_time_ms = 0;
2956  int64_t total_target_value_translate_time_ms = 0;
2957  int64_t total_data_load_time_ms = 0;
2958 
2959  Fragmenter_Namespace::InsertDataLoader insertDataLoader(*leafs_connector_);
2960  auto target_column_descriptors = get_target_column_descriptors(td);
2961  auto outer_frag_count =
2962  leafs_connector_->getOuterFragmentCount(query_state_proxy, select_query_);
2963 
2964  size_t outer_frag_end = outer_frag_count == 0 ? 1 : outer_frag_count;
2965  auto query_session = session ? session->get_session_id() : "";
2967  std::string work_type_str = for_CTAS ? "CTAS" : "ITAS";
2968  try {
2969  for (size_t outer_frag_idx = 0; outer_frag_idx < outer_frag_end; outer_frag_idx++) {
2970  std::vector<size_t> allowed_outer_fragment_indices;
2971 
2972  if (outer_frag_count) {
2973  allowed_outer_fragment_indices.push_back(outer_frag_idx);
2974  }
2975 
2976  const auto query_clock_begin = timer_start();
2977  std::vector<AggregatedResult> query_results =
2978  leafs_connector_->query(query_state_proxy,
2979  select_query_,
2980  allowed_outer_fragment_indices,
2982  total_source_query_time_ms += timer_stop(query_clock_begin);
2983 
2984  auto start_time = query_state_proxy.getQueryState().getQuerySubmittedTime();
2985  auto query_str = "INSERT_DATA for " + work_type_str;
2987  // In the clean-up phase of the query execution for collecting aggregated result
2988  // of SELECT query, we remove its query session info, so we need to enroll the
2989  // session info again
2990  executor->enrollQuerySession(query_session,
2991  query_str,
2992  start_time,
2994  QuerySessionStatus::QueryStatus::RUNNING);
2995  }
2996 
2997  ScopeGuard clearInterruptStatus = [executor, &query_session, &start_time] {
2998  // this data population is non-kernel operation, so we manually cleanup
2999  // the query session info in the cleanup phase
3001  executor->clearQuerySessionStatus(query_session, start_time, false);
3002  }
3003  };
3004 
3005  for (auto& res : query_results) {
3006  if (UNLIKELY(checkInterrupt(query_session, executor))) {
3007  throw std::runtime_error(
3008  "Query execution has been interrupted while performing " + work_type_str);
3009  }
3010  auto result_rows = res.rs;
3011  result_rows->setGeoReturnType(ResultSet::GeoReturnType::GeoTargetValue);
3012  const auto num_rows = result_rows->rowCount();
3013 
3014  if (0 == num_rows) {
3015  continue;
3016  }
3017 
3018  total_row_count += num_rows;
3019 
3020  size_t leaf_count = leafs_connector_->leafCount();
3021 
3022  size_t max_number_of_rows_per_package =
3023  std::min(num_rows / leaf_count, size_t(64 * 1024));
3024 
3025  size_t start_row = 0;
3026  size_t num_rows_to_process = std::min(num_rows, max_number_of_rows_per_package);
3027 
3028  // ensure that at least one row is being processed
3029  num_rows_to_process = std::max(num_rows_to_process, size_t(1));
3030 
3031  std::vector<std::unique_ptr<TargetValueConverter>> value_converters;
3032 
3034 
3035  const int num_worker_threads = std::thread::hardware_concurrency();
3036 
3037  std::vector<size_t> thread_start_idx(num_worker_threads),
3038  thread_end_idx(num_worker_threads);
3039  bool can_go_parallel = !result_rows->isTruncated() && num_rows_to_process > 20000;
3040 
3041  std::atomic<size_t> row_idx{0};
3042 
3043  auto do_work = [&result_rows, &num_rows_to_process, &value_converters, &row_idx](
3044  const size_t idx,
3045  const size_t end,
3046  const size_t num_cols,
3047  const size_t thread_id,
3048  bool& stop_convert) {
3049  const auto result_row = result_rows->getRowAtNoTranslations(idx);
3050  if (!result_row.empty()) {
3051  size_t target_row = row_idx.fetch_add(1);
3052  if (target_row >= num_rows_to_process) {
3053  stop_convert = true;
3054  return;
3055  }
3056  for (unsigned int col = 0; col < num_cols; col++) {
3057  const auto& mapd_variant = result_row[col];
3058  value_converters[col]->convertToColumnarFormat(target_row, &mapd_variant);
3059  }
3060  }
3061  };
3062 
3063  auto convert_function = [&thread_start_idx,
3064  &thread_end_idx,
3065  &value_converters,
3066  &executor,
3067  &query_session,
3068  &work_type_str,
3069  &do_work](const int thread_id) {
3070  const int num_cols = value_converters.size();
3071  const size_t start = thread_start_idx[thread_id];
3072  const size_t end = thread_end_idx[thread_id];
3073  size_t idx = 0;
3074  bool stop_convert = false;
3076  size_t local_idx = 0;
3077  for (idx = start; idx < end; ++idx, ++local_idx) {
3078  if (UNLIKELY((local_idx & 0xFFFF) == 0 &&
3079  checkInterrupt(query_session, executor))) {
3080  throw std::runtime_error(
3081  "Query execution has been interrupted while performing " +
3082  work_type_str);
3083  }
3084  do_work(idx, end, num_cols, thread_id, stop_convert);
3085  if (stop_convert) {
3086  break;
3087  }
3088  }
3089  } else {
3090  for (idx = start; idx < end; ++idx) {
3091  do_work(idx, end, num_cols, thread_id, stop_convert);
3092  if (stop_convert) {
3093  break;
3094  }
3095  }
3096  }
3097  thread_start_idx[thread_id] = idx;
3098  };
3099 
3100  auto single_threaded_value_converter =
3101  [&row_idx, &num_rows_to_process, &value_converters, &result_rows](
3102  const size_t idx,
3103  const size_t end,
3104  const size_t num_cols,
3105  bool& stop_convert) {
3106  size_t target_row = row_idx.fetch_add(1);
3107  if (target_row >= num_rows_to_process) {
3108  stop_convert = true;
3109  return;
3110  }
3111  const auto result_row = result_rows->getNextRow(false, false);
3112  CHECK(!result_row.empty());
3113  for (unsigned int col = 0; col < num_cols; col++) {
3114  const auto& mapd_variant = result_row[col];
3115  value_converters[col]->convertToColumnarFormat(target_row, &mapd_variant);
3116  }
3117  };
3118 
3119  auto single_threaded_convert_function =
3120  [&value_converters,
3121  &thread_start_idx,
3122  &thread_end_idx,
3123  &executor,
3124  &query_session,
3125  &work_type_str,
3126  &single_threaded_value_converter](const int thread_id) {
3127  const int num_cols = value_converters.size();
3128  const size_t start = thread_start_idx[thread_id];
3129  const size_t end = thread_end_idx[thread_id];
3130  size_t idx = 0;
3131  bool stop_convert = false;
3133  size_t local_idx = 0;
3134  for (idx = start; idx < end; ++idx, ++local_idx) {
3135  if (UNLIKELY((local_idx & 0xFFFF) == 0 &&
3136  checkInterrupt(query_session, executor))) {
3137  throw std::runtime_error(
3138  "Query execution has been interrupted while performing " +
3139  work_type_str);
3140  }
3141  single_threaded_value_converter(idx, end, num_cols, stop_convert);
3142  if (stop_convert) {
3143  break;
3144  }
3145  }
3146  } else {
3147  for (idx = start; idx < end; ++idx) {
3148  single_threaded_value_converter(idx, end, num_cols, stop_convert);
3149  if (stop_convert) {
3150  break;
3151  }
3152  }
3153  }
3154  thread_start_idx[thread_id] = idx;
3155  };
3156 
3157  if (can_go_parallel) {
3158  const size_t entryCount = result_rows->entryCount();
3159  for (size_t i = 0,
3160  start_entry = 0,
3161  stride = (entryCount + num_worker_threads - 1) / num_worker_threads;
3162  i < num_worker_threads && start_entry < entryCount;
3163  ++i, start_entry += stride) {
3164  const auto end_entry = std::min(start_entry + stride, entryCount);
3165  thread_start_idx[i] = start_entry;
3166  thread_end_idx[i] = end_entry;
3167  }
3168  } else {
3169  thread_start_idx[0] = 0;
3170  thread_end_idx[0] = result_rows->entryCount();
3171  }
3172 
3173  while (start_row < num_rows) {
3174  value_converters.clear();
3175  row_idx = 0;
3176  int colNum = 0;
3177  for (const auto targetDescriptor : target_column_descriptors) {
3178  auto sourceDataMetaInfo = res.targets_meta[colNum++];
3179 
3181  num_rows_to_process,
3182  catalog,
3183  sourceDataMetaInfo,
3184  targetDescriptor,
3185  targetDescriptor->columnType,
3186  !targetDescriptor->columnType.get_notnull(),
3187  result_rows->getRowSetMemOwner()->getLiteralStringDictProxy(),
3189  ? executor->getStringDictionaryProxy(
3190  sourceDataMetaInfo.get_type_info().get_comp_param(),
3191  result_rows->getRowSetMemOwner(),
3192  true)
3193  : nullptr};
3194  auto converter = factory.create(param);
3195  value_converters.push_back(std::move(converter));
3196  }
3197 
3198  const auto translate_clock_begin = timer_start();
3199  if (can_go_parallel) {
3200  std::vector<std::future<void>> worker_threads;
3201  for (int i = 0; i < num_worker_threads; ++i) {
3202  worker_threads.push_back(
3203  std::async(std::launch::async, convert_function, i));
3204  }
3205 
3206  for (auto& child : worker_threads) {
3207  child.wait();
3208  }
3209  for (auto& child : worker_threads) {
3210  child.get();
3211  }
3212 
3213  } else {
3214  single_threaded_convert_function(0);
3215  }
3216 
3217  // finalize the insert data
3218  auto finalizer_func =
3219  [](std::unique_ptr<TargetValueConverter>::pointer targetValueConverter) {
3220  targetValueConverter->finalizeDataBlocksForInsertData();
3221  };
3222 
3223  std::vector<std::future<void>> worker_threads;
3224  for (auto& converterPtr : value_converters) {
3225  worker_threads.push_back(
3226  std::async(std::launch::async, finalizer_func, converterPtr.get()));
3227  }
3228 
3229  for (auto& child : worker_threads) {
3230  child.wait();
3231  }
3232  for (auto& child : worker_threads) {
3233  child.get();
3234  }
3235 
3237  insert_data.databaseId = catalog.getCurrentDB().dbId;
3238  CHECK(td);
3239  insert_data.tableId = td->tableId;
3240  insert_data.numRows = num_rows_to_process;
3241 
3242  for (int col_idx = 0; col_idx < target_column_descriptors.size(); col_idx++) {
3244  checkInterrupt(query_session, executor))) {
3245  throw std::runtime_error(
3246  "Query execution has been interrupted while performing " +
3247  work_type_str);
3248  }
3249  value_converters[col_idx]->addDataBlocksToInsertData(insert_data);
3250  }
3251  total_target_value_translate_time_ms += timer_stop(translate_clock_begin);
3252 
3253  const auto data_load_clock_begin = timer_start();
3254  auto data_memory_holder =
3255  import_export::fill_missing_columns(&catalog, insert_data);
3256  insertDataLoader.insertData(*session, insert_data);
3257  total_data_load_time_ms += timer_stop(data_load_clock_begin);
3258 
3259  start_row += num_rows_to_process;
3260  num_rows_to_process =
3261  std::min(num_rows - start_row, max_number_of_rows_per_package);
3262  }
3263  }
3264  }
3265  } catch (...) {
3266  try {
3267  leafs_connector_->rollback(*session, td->tableId);
3268  } catch (std::exception& e) {
3269  LOG(ERROR) << "An error occurred during ITAS rollback attempt. Table id: "
3270  << td->tableId << ", Error: " << e.what();
3271  }
3272  throw;
3273  }
3274 
3275  int64_t total_time_ms = total_source_query_time_ms +
3276  total_target_value_translate_time_ms + total_data_load_time_ms;
3277 
3278  VLOG(1) << "CTAS/ITAS " << total_row_count << " rows loaded in " << total_time_ms
3279  << "ms (outer_frag_count=" << outer_frag_count
3280  << ", query_time=" << total_source_query_time_ms
3281  << "ms, translation_time=" << total_target_value_translate_time_ms
3282  << "ms, data_load_time=" << total_data_load_time_ms
3283  << "ms)\nquery: " << select_query_;
3284 
3285  if (!is_temporary) {
3286  leafs_connector_->checkpoint(*session, td->tableId);
3287  }
3288 }
3289 
3290 void InsertIntoTableAsSelectStmt::execute(const Catalog_Namespace::SessionInfo& session) {
3291  auto session_copy = session;
3292  auto session_ptr = std::shared_ptr<Catalog_Namespace::SessionInfo>(
3293  &session_copy, boost::null_deleter());
3294  auto query_state = query_state::QueryState::create(session_ptr, select_query_);
3295  auto stdlog = STDLOG(query_state);
3296  auto& catalog = session_ptr->getCatalog();
3297 
3298  const auto execute_read_lock = mapd_shared_lock<mapd_shared_mutex>(
3301 
3302  if (catalog.getMetadataForTable(table_name_) == nullptr) {
3303  throw std::runtime_error("ITAS failed: table " + table_name_ + " does not exist.");
3304  }
3305 
3307  std::vector<std::string> tables;
3308 
3309  // get the table info
3310  auto calcite_mgr = catalog.getCalciteMgr();
3311 
3312  // TODO MAT this should actually get the global or the session parameter for
3313  // view optimization
3314  const auto result = calcite_mgr->process(query_state->createQueryStateProxy(),
3315  pg_shim(select_query_),
3316  {},
3317  true,
3318  false,
3319  false,
3320  true);
3321 
3322  for (auto& tab : result.resolved_accessed_objects.tables_selected_from) {
3323  tables.emplace_back(tab[0]);
3324  }
3325  tables.emplace_back(table_name_);
3326 
3327  // force sort into tableid order in case of name change to guarantee fixed order of
3328  // mutex access
3329  std::sort(tables.begin(),
3330  tables.end(),
3331  [&catalog](const std::string& a, const std::string& b) {
3332  return catalog.getMetadataForTable(a, false)->tableId <
3333  catalog.getMetadataForTable(b, false)->tableId;
3334  });
3335 
3336  tables.erase(unique(tables.begin(), tables.end()), tables.end());
3337  for (const auto& table : tables) {
3338  locks.emplace_back(
3341  catalog, table)));
3342  if (table == table_name_) {
3343  // Aquire an insert data lock for updates/deletes, consistent w/ insert. The
3344  // table data lock will be aquired in the fragmenter during checkpoint.
3345  locks.emplace_back(
3348  catalog.getDatabaseId(), (*locks.back())())));
3349  } else {
3350  locks.emplace_back(
3353  catalog.getDatabaseId(), (*locks.back())())));
3354  }
3355  }
3356 
3357  const TableDescriptor* td = catalog.getMetadataForTable(table_name_);
3358  try {
3359  populateData(query_state->createQueryStateProxy(), td, true, false);
3360  } catch (...) {
3361  throw;
3362  }
3363 }
3364 
3365 void CreateTableAsSelectStmt::execute(const Catalog_Namespace::SessionInfo& session) {
3366  auto session_copy = session;
3367  auto session_ptr = std::shared_ptr<Catalog_Namespace::SessionInfo>(
3368  &session_copy, boost::null_deleter());
3369  auto query_state = query_state::QueryState::create(session_ptr, select_query_);
3370  auto stdlog = STDLOG(query_state);
3371  LocalConnector local_connector;
3372  auto& catalog = session.getCatalog();
3373  bool create_table = nullptr == leafs_connector_;
3374 
3375  std::set<std::string> select_tables;
3376  if (create_table) {
3377  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
3380 
3381  // check access privileges
3384  throw std::runtime_error("CTAS failed. Table " + table_name_ +
3385  " will not be created. User has no create privileges.");
3386  }
3387 
3388  if (catalog.getMetadataForTable(table_name_) != nullptr) {
3389  if (if_not_exists_) {
3390  return;
3391  }
3392  throw std::runtime_error("Table " + table_name_ +
3393  " already exists and no data was loaded.");
3394  }
3395 
3396  // get the table info
3397  auto calcite_mgr = catalog.getCalciteMgr();
3398 
3399  // TODO MAT this should actually get the global or the session parameter for
3400  // view optimization
3401  const auto result = calcite_mgr->process(query_state->createQueryStateProxy(),
3402  pg_shim(select_query_),
3403  {},
3404  true,
3405  false,
3406  false,
3407  true);
3408 
3409  // TODO 12 Apr 2021 MAT schema change need to keep schema in future
3410  // just keeping it moving for now
3411  for (auto& tab : result.resolved_accessed_objects.tables_selected_from) {
3412  select_tables.insert(tab[0]);
3413  }
3414 
3415  // only validate the select query so we get the target types
3416  // correctly, but do not populate the result set
3417  // we currently have exclusive access to the system so this is safe
3418  auto validate_result = local_connector.query(
3419  query_state->createQueryStateProxy(), select_query_, {}, true, false);
3420 
3421  const auto column_descriptors_for_create =
3422  local_connector.getColumnDescriptors(validate_result, true);
3423 
3424  // some validation as the QE might return some out of range column types
3425  for (auto& cd : column_descriptors_for_create) {
3426  if (cd.columnType.is_decimal() && cd.columnType.get_precision() > 18) {
3427  throw std::runtime_error(cd.columnName + ": Precision too high, max 18.");
3428  }
3429  }
3430 
3431  TableDescriptor td;
3432  td.tableName = table_name_;
3433  td.userId = session.get_currentUser().userId;
3434  td.nColumns = column_descriptors_for_create.size();
3435  td.isView = false;
3436  td.fragmenter = nullptr;
3443  if (is_temporary_) {
3445  } else {
3447  }
3448 
3449  bool use_shared_dictionaries = true;
3450 
3451  if (!storage_options_.empty()) {
3452  for (auto& p : storage_options_) {
3453  if (boost::to_lower_copy<std::string>(*p->get_name()) ==
3454  "use_shared_dictionaries") {
3455  const StringLiteral* literal =
3456  dynamic_cast<const StringLiteral*>(p->get_value());
3457  if (nullptr == literal) {
3458  throw std::runtime_error(
3459  "USE_SHARED_DICTIONARIES must be a string parameter");
3460  }
3461  std::string val = boost::to_lower_copy<std::string>(*literal->get_stringval());
3462  use_shared_dictionaries = val == "true" || val == "1" || val == "t";
3463  } else {
3464  get_table_definitions_for_ctas(td, p, column_descriptors_for_create);
3465  }
3466  }
3467  }
3468 
3469  std::vector<SharedDictionaryDef> sharedDictionaryRefs;
3470 
3471  if (use_shared_dictionaries) {
3472  const auto source_column_descriptors =
3473  local_connector.getColumnDescriptors(validate_result, false);
3474  const auto mapping = catalog.getDictionaryToColumnMapping();
3475 
3476  for (auto& source_cd : source_column_descriptors) {
3477  const auto& ti = source_cd.columnType;
3478  if (ti.is_string()) {
3479  if (ti.get_compression() == kENCODING_DICT) {
3480  int dict_id = ti.get_comp_param();
3481  auto it = mapping.find(dict_id);
3482  if (mapping.end() != it) {
3483  const auto targetColumn = it->second;
3484  auto targetTable =
3485  catalog.getMetadataForTable(targetColumn->tableId, false);
3486  CHECK(targetTable);
3487  LOG(INFO) << "CTAS: sharing text dictionary on column "
3488  << source_cd.columnName << " with " << targetTable->tableName
3489  << "." << targetColumn->columnName;
3490  sharedDictionaryRefs.push_back(
3491  SharedDictionaryDef(source_cd.columnName,
3492  targetTable->tableName,
3493  targetColumn->columnName));
3494  }
3495  }
3496  }
3497  }
3498  }
3499 
3500  // currently no means of defining sharding in CTAS
3501  td.keyMetainfo = serialize_key_metainfo(nullptr, sharedDictionaryRefs);
3502 
3503  catalog.createTable(td, column_descriptors_for_create, sharedDictionaryRefs, true);
3504  // TODO (max): It's transactionally unsafe, should be fixed: we may create object
3505  // w/o privileges
3506  SysCatalog::instance().createDBObject(
3507  session.get_currentUser(), td.tableName, TableDBObjectType, catalog);
3508  }
3509 
3510  // note there is a time where we do not have any executor outer lock here. someone could
3511  // come along and mess with the data or other tables.
3512  const auto execute_read_lock = mapd_shared_lock<mapd_shared_mutex>(
3515 
3517  std::vector<std::string> tables;
3518  tables.insert(tables.end(), select_tables.begin(), select_tables.end());
3519  CHECK_EQ(tables.size(), select_tables.size());
3520  tables.emplace_back(table_name_);
3521  // force sort into tableid order in case of name change to guarantee fixed order of
3522  // mutex access
3523  std::sort(tables.begin(),
3524  tables.end(),
3525  [&catalog](const std::string& a, const std::string& b) {
3526  return catalog.getMetadataForTable(a, false)->tableId <
3527  catalog.getMetadataForTable(b, false)->tableId;
3528  });
3529  tables.erase(unique(tables.begin(), tables.end()), tables.end());
3530  for (const auto& table : tables) {
3531  locks.emplace_back(
3534  catalog, table)));
3535  if (table == table_name_) {
3536  // Aquire an insert data lock for updates/deletes, consistent w/ insert. The
3537  // table data lock will be aquired in the fragmenter during checkpoint.
3538  locks.emplace_back(
3541  catalog.getDatabaseId(), (*locks.back())())));
3542  } else {
3543  locks.emplace_back(
3546  catalog.getDatabaseId(), (*locks.back())())));
3547  }
3548  }
3549 
3550  const TableDescriptor* td = catalog.getMetadataForTable(table_name_);
3551 
3552  try {
3553  populateData(query_state->createQueryStateProxy(), td, false, true);
3554  } catch (...) {
3555  if (!g_cluster) {
3556  const TableDescriptor* created_td = catalog.getMetadataForTable(table_name_);
3557  if (created_td) {
3558  catalog.dropTable(created_td);
3559  }
3560  }
3561  throw;
3562  }
3563 }
3564 
3565 DropTableStmt::DropTableStmt(const rapidjson::Value& payload) {
3566  CHECK(payload.HasMember("tableName"));
3567  table = std::make_unique<std::string>(json_str(payload["tableName"]));
3568 
3569  if_exists = false;
3570  if (payload.HasMember("ifExists")) {
3571  if_exists = json_bool(payload["ifExists"]);
3572  }
3573 }
3574 
3575 void DropTableStmt::execute(const Catalog_Namespace::SessionInfo& session) {
3576  auto& catalog = session.getCatalog();
3577 
3578  // TODO(adb): the catalog should be handling this locking.
3579  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
3582 
3583  const TableDescriptor* td{nullptr};
3584  std::unique_ptr<lockmgr::TableSchemaLockContainer<lockmgr::WriteLock>> td_with_lock;
3585 
3586  try {
3587  td_with_lock =
3588  std::make_unique<lockmgr::TableSchemaLockContainer<lockmgr::WriteLock>>(
3590  catalog, *table, false));
3591  td = (*td_with_lock)();
3592  } catch (const std::runtime_error& e) {
3593  if (if_exists) {
3594  return;
3595  } else {
3596  throw e;
3597  }
3598  }
3599 
3600  CHECK(td);
3601  CHECK(td_with_lock);
3602 
3603  // check access privileges
3604  if (!session.checkDBAccessPrivileges(
3606  throw std::runtime_error("Table " + *table +
3607  " will not be dropped. User has no proper privileges.");
3608  }
3609 
3611 
3612  auto table_data_write_lock =
3614  catalog.dropTable(td);
3615 
3616  // invalidate cached hashtable
3618 }
3619 
3620 void AlterTableStmt::execute(const Catalog_Namespace::SessionInfo& session) {}
3621 
3622 void AlterTableStmt::delegateExecute(const rapidjson::Value& payload,
3623  const Catalog_Namespace::SessionInfo& session) {
3624  CHECK(payload.HasMember("tableName"));
3625  auto tableName = json_str(payload["tableName"]);
3626 
3627  CHECK(payload.HasMember("alterType"));
3628  auto type = json_str(payload["alterType"]);
3629 
3630  if (type == "RENAME_TABLE") {
3631  CHECK(payload.HasMember("newTableName"));
3632  auto newTableName = json_str(payload["newTableName"]);
3633  Parser::RenameTableStmt(new std::string(tableName), new std::string(newTableName))
3634  .execute(session);
3635 
3636  } else if (type == "RENAME_COLUMN") {
3637  CHECK(payload.HasMember("columnName"));
3638  auto columnName = json_str(payload["columnName"]);
3639  CHECK(payload.HasMember("newColumnName"));
3640  auto newColumnName = json_str(payload["newColumnName"]);
3641  Parser::RenameColumnStmt(new std::string(tableName),
3642  new std::string(columnName),
3643  new std::string(newColumnName))
3644  .execute(session);
3645 
3646  } else if (type == "ADD_COLUMN") {
3647  CHECK(payload.HasMember("columnData"));
3648  CHECK(payload["columnData"].IsArray());
3649 
3650  // TODO : Shares alot of code below with CREATE_TABLE -> try to merge these two ?
3651  // unique_ptr vs regular
3652 
3653  // New Columns go into this list
3654  std::list<ColumnDef*>* table_element_list_ = new std::list<ColumnDef*>;
3655 
3656  const auto elements = payload["columnData"].GetArray();
3657  for (const auto& element : elements) {
3658  CHECK(element.IsObject());
3659  CHECK(element.HasMember("type"));
3660  if (json_str(element["type"]) == "SQL_COLUMN_DECLARATION") {
3661  CHECK(element.HasMember("name"));
3662  auto col_name = std::make_unique<std::string>(json_str(element["name"]));
3663  CHECK(element.HasMember("sqltype"));
3664  const auto sql_types = to_sql_type(json_str(element["sqltype"]));
3665 
3666  // decimal / numeric precision / scale
3667  int precision = -1;
3668  int scale = -1;
3669  if (element.HasMember("precision")) {
3670  precision = json_i64(element["precision"]);
3671  }
3672  if (element.HasMember("scale")) {
3673  scale = json_i64(element["scale"]);
3674  }
3675 
3676  std::optional<int64_t> array_size;
3677  if (element.HasMember("arraySize")) {
3678  // We do not yet support geo arrays
3679  array_size = json_i64(element["arraySize"]);
3680  }
3681  std::unique_ptr<SQLType> sql_type;
3682  if (element.HasMember("subtype")) {
3683  CHECK(element.HasMember("coordinateSystem"));
3684  const auto subtype_sql_types = to_sql_type(json_str(element["subtype"]));
3685  sql_type = std::make_unique<SQLType>(
3686  subtype_sql_types,
3687  static_cast<int>(sql_types),
3688  static_cast<int>(json_i64(element["coordinateSystem"])),
3689  false);
3690  } else if (precision > 0 && scale > 0) {
3691  sql_type = std::make_unique<SQLType>(sql_types,
3692  precision,
3693  scale,
3694  /*is_array=*/array_size.has_value(),
3695  array_size ? *array_size : -1);
3696  } else if (precision > 0) {
3697  sql_type = std::make_unique<SQLType>(sql_types,
3698  precision,
3699  0,
3700  /*is_array=*/array_size.has_value(),
3701  array_size ? *array_size : -1);
3702  } else {
3703  sql_type = std::make_unique<SQLType>(sql_types,
3704  /*is_array=*/array_size.has_value(),
3705  array_size ? *array_size : -1);
3706  }
3707  CHECK(sql_type);
3708 
3709  CHECK(element.HasMember("nullable"));
3710  const auto nullable = json_bool(element["nullable"]);
3711  std::unique_ptr<ColumnConstraintDef> constraint_def;
3712  if (!nullable) {
3713  StringLiteral* str_literal = nullptr;
3714  if (element.HasMember("expression") && !element["expression"].IsNull()) {
3715  std::string* defaultval = new std::string(json_str(element["expression"]));
3716  boost::algorithm::trim_if(*defaultval, boost::is_any_of(" \"'`"));
3717  str_literal = new StringLiteral(defaultval);
3718  }
3719 
3720  constraint_def =
3721  std::make_unique<ColumnConstraintDef>(/*notnull=*/true,
3722  /*unique=*/false,
3723  /*primarykey=*/false,
3724  /*defaultval=*/str_literal);
3725  }
3726  std::unique_ptr<CompressDef> compress_def;
3727  if (element.HasMember("encodingType") && !element["encodingType"].IsNull()) {
3728  std::string encoding_type = json_str(element["encodingType"]);
3729  CHECK(element.HasMember("encodingSize"));
3730  auto encoding_name =
3731  std::make_unique<std::string>(json_str(element["encodingType"]));
3732  compress_def = std::make_unique<CompressDef>(encoding_name.release(),
3733  json_i64(element["encodingSize"]));
3734  }
3735  auto col_def = new ColumnDef(col_name.release(),
3736  sql_type.release(),
3737  compress_def ? compress_def.release() : nullptr,
3738  constraint_def ? constraint_def.release() : nullptr);
3739  table_element_list_->emplace_back(col_def);
3740 
3741  } else {
3742  LOG(FATAL) << "Unsupported element type for ALTER TABLE: "
3743  << element["type"].GetString();
3744  }
3745  }
3746 
3747  Parser::AddColumnStmt(new std::string(tableName), table_element_list_)
3748  .execute(session);
3749 
3750  } else if (type == "DROP_COLUMN") {
3751  CHECK(payload.HasMember("columnData"));
3752  auto columnData = json_str(payload["columnData"]);
3753  // Convert columnData to std::list<std::string*>*
3754  // allocate std::list<> as DropColumnStmt will delete it;
3755  std::list<std::string*>* cols = new std::list<std::string*>;
3756  std::vector<std::string> cols1;
3757  boost::split(cols1, columnData, boost::is_any_of(","));
3758  for (auto s : cols1) {
3759  // strip leading/trailing spaces/quotes/single quotes
3760  boost::algorithm::trim_if(s, boost::is_any_of(" \"'`"));
3761  std::string* str = new std::string(s);
3762  cols->emplace_back(str);
3763  }
3764 
3765  Parser::DropColumnStmt(new std::string(tableName), cols).execute(session);
3766 
3767  } else if (type == "ALTER_OPTIONS") {
3768  CHECK(payload.HasMember("options"));
3769 
3770  if (payload["options"].IsObject()) {
3771  for (const auto& option : payload["options"].GetObject()) {
3772  std::string* option_name = new std::string(json_str(option.name));
3773  Literal* literal_value;
3774  if (option.value.IsString()) {
3775  std::string literal_string = json_str(option.value);
3776 
3777  // iff this string can be converted to INT
3778  // ... do so because it is necessary for AlterTableParamStmt
3779  std::size_t sz;
3780  int iVal = std::stoi(literal_string, &sz);
3781  if (sz == literal_string.size()) {
3782  literal_value = new IntLiteral(iVal);
3783  } else {
3784  literal_value = new StringLiteral(&literal_string);
3785  }
3786  } else if (option.value.IsInt() || option.value.IsInt64()) {
3787  literal_value = new IntLiteral(json_i64(option.value));
3788  } else if (option.value.IsNull()) {
3789  literal_value = new NullLiteral();
3790  } else {
3791  throw std::runtime_error("Unable to handle literal for " + *option_name);
3792  }
3793  CHECK(literal_value);
3794 
3795  NameValueAssign* p1 = new NameValueAssign(option_name, literal_value);
3796  Parser::AlterTableParamStmt(new std::string(tableName), p1).execute(session);
3797  }
3798 
3799  } else {
3800  CHECK(payload["options"].IsNull());
3801  }
3802  }
3803 }
3804 
3805 void TruncateTableStmt::execute(const Catalog_Namespace::SessionInfo& session) {
3806  auto& catalog = session.getCatalog();
3807 
3808  // TODO: Removal of the FileMgr is not thread safe. Take a global system write lock
3809  // when truncating a table
3810  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
3813 
3814  const auto td_with_lock =
3816  catalog, *table, true);
3817  const auto td = td_with_lock();
3818  if (!td) {
3819  throw std::runtime_error("Table " + *table + " does not exist.");
3820  }
3821 
3822  // check access privileges
3823  std::vector<DBObject> privObjects;
3824  DBObject dbObject(*table, TableDBObjectType);
3825  dbObject.loadKey(catalog);
3827  privObjects.push_back(dbObject);
3828  if (!SysCatalog::instance().checkPrivileges(session.get_currentUser(), privObjects)) {
3829  throw std::runtime_error("Table " + *table + " will not be truncated. User " +
3830  session.get_currentUser().userLoggable() +
3831  " has no proper privileges.");
3832  }
3833 
3834  if (td->isView) {
3835  throw std::runtime_error(*table + " is a view. Cannot Truncate.");
3836  }
3838  auto table_data_write_lock =
3840  catalog.truncateTable(td);
3841 
3842  // invalidate cached hashtable
3844 }
3845 
3847  const TableDescriptor* td) {
3848  if (session.get_currentUser().isSuper ||
3849  session.get_currentUser().userId == td->userId) {
3850  return;
3851  }
3852  std::vector<DBObject> privObjects;
3853  DBObject dbObject(td->tableName, TableDBObjectType);
3854  dbObject.loadKey(session.getCatalog());
3856  privObjects.push_back(dbObject);
3857  if (!SysCatalog::instance().checkPrivileges(session.get_currentUser(), privObjects)) {
3858  throw std::runtime_error("Current user does not have the privilege to alter table: " +
3859  td->tableName);
3860  }
3861 }
3862 
3863 void RenameUserStmt::execute(const Catalog_Namespace::SessionInfo& session) {
3864  if (!session.get_currentUser().isSuper) {
3865  throw std::runtime_error("Only a super user can rename users.");
3866  }
3867 
3869  if (!SysCatalog::instance().getMetadataForUser(*username_, user)) {
3870  throw std::runtime_error("User " + *username_ + " does not exist.");
3871  }
3872 
3873  SysCatalog::instance().renameUser(*username_, *new_username_);
3874 }
3875 
3876 void RenameDatabaseStmt::execute(const Catalog_Namespace::SessionInfo& session) {
3878 
3879  // TODO: use database lock instead
3880  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
3883 
3884  if (!SysCatalog::instance().getMetadataForDB(*database_name_, db)) {
3885  throw std::runtime_error("Database " + *database_name_ + " does not exist.");
3886  }
3887 
3888  if (!session.get_currentUser().isSuper &&
3889  session.get_currentUser().userId != db.dbOwner) {
3890  throw std::runtime_error("Only a super user or the owner can rename the database.");
3891  }
3892 
3893  SysCatalog::instance().renameDatabase(*database_name_, *new_database_name_);
3894 }
3895 
3896 RenameTableStmt::RenameTableStmt(const rapidjson::Value& payload) {
3897  CHECK(payload.HasMember("command"));
3898 
3899  CHECK(payload.HasMember("tableNames"));
3900  CHECK(payload["tableNames"].IsArray());
3901  const auto elements = payload["tableNames"].GetArray();
3902  for (const auto& element : elements) {
3903  CHECK(element.HasMember("name"));
3904  CHECK(element.HasMember("newName"));
3905  tablesToRename.emplace_back(new std::string(json_str(element["name"])),
3906  new std::string(json_str(element["newName"])));
3907  }
3908 }
3909 
3910 RenameTableStmt::RenameTableStmt(std::string* tab_name, std::string* new_tab_name) {
3911  tablesToRename.emplace_back(tab_name, new_tab_name);
3912 }
3913 
3914 RenameTableStmt::RenameTableStmt(
3915  std::list<std::pair<std::string, std::string>> tableNames) {
3916  for (auto item : tableNames) {
3917  tablesToRename.emplace_back(new std::string(item.first),
3918  new std::string(item.second));
3919  }
3920 }
3921 
3922 using SubstituteMap = std::map<std::string, std::string>;
3923 
3924 // Namespace fns used to track a left-to-right execution of RENAME TABLE
3925 // and verify that the command should be (entirely/mostly) valid
3926 //
3927 namespace {
3928 
3929 static constexpr char const* EMPTY_NAME{""};
3930 
3931 std::string generateUniqueTableName(std::string name) {
3932  // TODO - is there a "better" way to create a tmp name for the table
3933  std::time_t result = std::time(nullptr);
3934  return name + "_tmp" + std::to_string(result);
3935 }
3936 
3937 void recordRename(SubstituteMap& sMap, std::string oldName, std::string newName) {
3938  sMap[oldName] = newName;
3939 }
3940 
3942  SubstituteMap& sMap,
3943  std::string tableName) {
3944  if (sMap.find(tableName) != sMap.end()) {
3945  if (sMap[tableName] == EMPTY_NAME)
3946  return tableName;
3947  return sMap[tableName];
3948  } else {
3949  // lookup table in src catalog
3950  const TableDescriptor* td = catalog.getMetadataForTable(tableName);
3951  if (td) {
3952  sMap[tableName] = tableName;
3953  } else {
3954  sMap[tableName] = EMPTY_NAME;
3955  }
3956  }
3957  return tableName;
3958 }
3959 
3960 bool hasData(SubstituteMap& sMap, std::string tableName) {
3961  // assumes loadTable has been previously called
3962  return (sMap[tableName] != EMPTY_NAME);
3963 }
3964 
3966  // Substition map should be clean at end of rename:
3967  // all items in map must (map to self) or (map to EMPTY_STRING) by end
3968 
3969  for (auto it : sMap) {
3970  if ((it.second) != EMPTY_NAME && (it.first) != (it.second)) {
3971  throw std::runtime_error(
3972  "Error: Attempted to overwrite and lose data in table: \'" + (it.first) + "\'");
3973  }
3974  }
3975 }
3976 } // namespace
3977 
3978 void RenameTableStmt::execute(const Catalog_Namespace::SessionInfo& session) {
3979  auto& catalog = session.getCatalog();
3980 
3981  // TODO(adb): the catalog should be handling this locking (see AddColumStmt)
3982  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
3985 
3986  // accumulated vector of table names: oldName->newName
3987  std::vector<std::pair<std::string, std::string>> names;
3988 
3989  SubstituteMap tableSubtituteMap;
3990 
3991  for (auto& item : tablesToRename) {
3992  std::string curTableName = *(item.first);
3993  std::string newTableName = *(item.second);
3994 
3995  // Note: if rename (a->b, b->a)
3996  // requires a tmp name change (a->tmp, b->a, tmp->a),
3997  // inject that here because
3998  // catalog.renameTable() assumes cleanliness else will fail
3999 
4000  std::string altCurTableName = loadTable(catalog, tableSubtituteMap, curTableName);
4001  std::string altNewTableName = loadTable(catalog, tableSubtituteMap, newTableName);
4002 
4003  if (altCurTableName != curTableName && altCurTableName != EMPTY_NAME) {
4004  // rename is a one-shot deal, reset the mapping once used
4005  recordRename(tableSubtituteMap, curTableName, curTableName);
4006  }
4007 
4008  // Check to see if the command (as-entered) will likely execute cleanly (logic-wise)
4009  // src tables exist before coping from
4010  // destination table collisions
4011  // handled (a->b, b->a)
4012  // or flagged (pre-existing a,b ... "RENAME TABLE a->c, b->c" )
4013  // handle mulitple chained renames, tmp names (a_>tmp, b->a, tmp->a)
4014  // etc.
4015  //
4016  if (hasData(tableSubtituteMap, altCurTableName)) {
4017  const TableDescriptor* td = catalog.getMetadataForTable(altCurTableName);
4018  if (td) {
4019  // any table that pre-exists must pass these tests
4021  check_alter_table_privilege(session, td);
4022  }
4023 
4024  if (hasData(tableSubtituteMap, altNewTableName)) {
4025  std::string tmpNewTableName = generateUniqueTableName(altNewTableName);
4026  // rename: newTableName to tmpNewTableName to get it out of the way
4027  // because it was full
4028  recordRename(tableSubtituteMap, altCurTableName, EMPTY_NAME);
4029  recordRename(tableSubtituteMap, altNewTableName, tmpNewTableName);
4030  recordRename(tableSubtituteMap, tmpNewTableName, tmpNewTableName);
4031  names.push_back(
4032  std::pair<std::string, std::string>(altNewTableName, tmpNewTableName));
4033  names.push_back(
4034  std::pair<std::string, std::string>(altCurTableName, altNewTableName));
4035  } else {
4036  // rename: curNewTableName to newTableName
4037  recordRename(tableSubtituteMap, altCurTableName, EMPTY_NAME);
4038  recordRename(tableSubtituteMap, altNewTableName, altNewTableName);
4039  names.push_back(
4040  std::pair<std::string, std::string>(altCurTableName, altNewTableName));
4041  }
4042  } else {
4043  throw std::runtime_error("Source table \'" + curTableName + "\' does not exist.");
4044  }
4045  }
4046  checkNameSubstition(tableSubtituteMap);
4047 
4048  catalog.renameTable(names);
4049 
4050  // just to be explicit, clean out the list, the unique_ptr will delete
4051  while (!tablesToRename.empty()) {
4052  tablesToRename.pop_front();
4053  }
4054 } // namespace Parser
4055 
4056 void DDLStmt::setColumnDescriptor(ColumnDescriptor& cd, const ColumnDef* coldef) {
4057  bool not_null;
4058  const ColumnConstraintDef* cc = coldef->get_column_constraint();
4059  if (cc == nullptr) {
4060  not_null = false;
4061  } else {
4062  not_null = cc->get_notnull();
4063  }
4065  cd,
4066  coldef->get_column_type(),
4067  not_null,
4068  coldef->get_compression());
4069 }
4070 
4071 void AddColumnStmt::check_executable(const Catalog_Namespace::SessionInfo& session,
4072  const TableDescriptor* td) {
4073  auto& catalog = session.getCatalog();
4074  if (!td) {
4075  throw std::runtime_error("Table " + *table + " does not exist.");
4076  } else {
4077  if (td->isView) {
4078  throw std::runtime_error("Adding columns to a view is not supported.");
4079  }
4081  if (table_is_temporary(td)) {
4082  throw std::runtime_error(
4083  "Adding columns to temporary tables is not yet supported.");
4084  }
4085  };
4086 
4087  check_alter_table_privilege(session, td);
4088 
4089  if (0 == coldefs.size()) {
4090  coldefs.push_back(std::move(coldef));
4091  }
4092 
4093  for (const auto& coldef : coldefs) {
4094  auto& new_column_name = *coldef->get_column_name();
4095  if (catalog.getMetadataForColumn(td->tableId, new_column_name) != nullptr) {
4096  throw std::runtime_error("Column " + new_column_name + " already exists.");
4097  }
4098  }
4099 }
4100 
4101 void AddColumnStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4102  auto& catalog = session.getCatalog();
4103 
4104  // TODO(adb): the catalog should be handling this locking.
4105  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
4108 
4109  const auto td_with_lock =
4111  catalog, *table, true);
4112  const auto td = td_with_lock();
4113 
4114  check_executable(session, td);
4115 
4116  CHECK(td->fragmenter);
4117  if (std::dynamic_pointer_cast<Fragmenter_Namespace::SortedOrderFragmenter>(
4118  td->fragmenter)) {
4119  throw std::runtime_error(
4120  "Adding columns to a table is not supported when using the \"sort_column\" "
4121  "option.");
4122  }
4123 
4124  // Do not take a data write lock, as the fragmenter may call `deleteFragments`
4125  // during a cap operation. Note that the schema write lock will prevent concurrent
4126  // inserts along with all other queries.
4127 
4128  catalog.getSqliteConnector().query("BEGIN TRANSACTION");
4129  try {
4130  std::map<const std::string, const ColumnDescriptor> cds;
4131  std::map<const int, const ColumnDef*> cid_coldefs;
4132  for (const auto& coldef : coldefs) {
4133  ColumnDescriptor cd;
4134  setColumnDescriptor(cd, coldef.get());
4135  catalog.addColumn(*td, cd);
4136  cds.emplace(*coldef->get_column_name(), cd);
4137  cid_coldefs.emplace(cd.columnId, coldef.get());
4138 
4139  // expand geo column to phy columns
4140  if (cd.columnType.is_geometry()) {
4141  std::list<ColumnDescriptor> phy_geo_columns;
4142  catalog.expandGeoColumn(cd, phy_geo_columns);
4143  for (auto& cd : phy_geo_columns) {
4144  catalog.addColumn(*td, cd);
4145  cds.emplace(cd.columnName, cd);
4146  cid_coldefs.emplace(cd.columnId, nullptr);
4147  }
4148  }
4149  }
4150 
4151  std::unique_ptr<import_export::Loader> loader(new import_export::Loader(catalog, td));
4152  std::vector<std::unique_ptr<import_export::TypedImportBuffer>> import_buffers;
4153  for (const auto& cd : cds) {
4154  import_buffers.emplace_back(std::make_unique<import_export::TypedImportBuffer>(
4155  &cd.second, loader->getStringDict(&cd.second)));
4156  }
4157  loader->setAddingColumns(true);
4158 
4159  // set_geo_physical_import_buffer below needs a sorted import_buffers
4160  std::sort(import_buffers.begin(),
4161  import_buffers.end(),
4162  [](decltype(import_buffers[0])& a, decltype(import_buffers[0])& b) {
4163  return a->getColumnDesc()->columnId < b->getColumnDesc()->columnId;
4164  });
4165 
4166  size_t nrows = td->fragmenter->getNumRows();
4167  // if sharded, get total nrows from all sharded tables
4168  if (td->nShards > 0) {
4169  const auto physical_tds = catalog.getPhysicalTablesDescriptors(td);
4170  nrows = 0;
4171  std::for_each(physical_tds.begin(), physical_tds.end(), [&nrows](const auto& td) {
4172  nrows += td->fragmenter->getNumRows();
4173  });
4174  }
4175  if (nrows > 0) {
4176  int skip_physical_cols = 0;
4177  for (const auto cit : cid_coldefs) {
4178  const auto cd = catalog.getMetadataForColumn(td->tableId, cit.first);
4179  const auto coldef = cit.second;
4180  const auto column_constraint = coldef ? coldef->get_column_constraint() : nullptr;
4181  std::string defaultval = "";
4182  if (column_constraint) {
4183  auto defaultlp = column_constraint->get_defaultval();
4184  auto defaultsp = dynamic_cast<const StringLiteral*>(defaultlp);
4185  defaultval = defaultsp ? *defaultsp->get_stringval()
4186  : defaultlp ? defaultlp->to_string() : "";
4187  }
4188  bool isnull = column_constraint ? (0 == defaultval.size()) : true;
4189  if (boost::to_upper_copy<std::string>(defaultval) == "NULL") {
4190  isnull = true;
4191  }
4192 
4193  if (isnull) {
4194  if (column_constraint && column_constraint->get_notnull()) {
4195  throw std::runtime_error("Default value required for column " +
4196  cd->columnName + " (NULL value not supported)");
4197  }
4198  }
4199 
4200  for (auto it = import_buffers.begin(); it < import_buffers.end(); ++it) {
4201  auto& import_buffer = *it;
4202  if (cd->columnId == import_buffer->getColumnDesc()->columnId) {
4203  if (coldef != nullptr ||
4204  skip_physical_cols-- <= 0) { // skip non-null phy col
4205  import_buffer->add_value(
4206  cd, defaultval, isnull, import_export::CopyParams());
4207  if (cd->columnType.is_geometry()) {
4208  std::vector<double> coords, bounds;
4209  std::vector<int> ring_sizes, poly_rings;
4210  int render_group = 0;
4211  SQLTypeInfo tinfo{cd->columnType};
4213  tinfo,
4214  coords,
4215  bounds,
4216  ring_sizes,
4217  poly_rings,
4218  false)) {
4219  throw std::runtime_error("Bad geometry data: '" + defaultval + "'");
4220  }
4221  size_t col_idx = 1 + std::distance(import_buffers.begin(), it);
4223  cd,
4224  import_buffers,
4225  col_idx,
4226  coords,
4227  bounds,
4228  ring_sizes,
4229  poly_rings,
4230  render_group);
4231  // skip following phy cols
4232  skip_physical_cols = cd->columnType.get_physical_cols();
4233  }
4234  }
4235  break;
4236  }
4237  }
4238  }
4239  }
4240 
4241  if (!loader->loadNoCheckpoint(import_buffers, nrows, &session)) {
4242  throw std::runtime_error("loadNoCheckpoint failed!");
4243  }
4244  catalog.roll(true);
4245  loader->checkpoint();
4246  catalog.getSqliteConnector().query("END TRANSACTION");
4247  } catch (...) {
4248  catalog.roll(false);
4249  catalog.getSqliteConnector().query("ROLLBACK TRANSACTION");
4250  throw;
4251  }
4252 }
4253 
4254 void DropColumnStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4255  auto& catalog = session.getCatalog();
4256 
4257  // TODO(adb): the catalog should be handling this locking.
4258  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
4261 
4262  const auto td_with_lock =
4264  catalog, *table, true);
4265  const auto td = td_with_lock();
4266  if (!td) {
4267  throw std::runtime_error("Table " + *table + " does not exist.");
4268  }
4270  if (td->isView) {
4271  throw std::runtime_error("Dropping a column from a view is not supported.");
4272  }
4273  if (table_is_temporary(td)) {
4274  throw std::runtime_error(
4275  "Dropping a column from a temporary table is not yet supported.");
4276  }
4277 
4278  check_alter_table_privilege(session, td);
4279 
4280  for (const auto& column : columns) {
4281  if (nullptr == catalog.getMetadataForColumn(td->tableId, *column)) {
4282  throw std::runtime_error("Column " + *column + " does not exist.");
4283  }
4284  }
4285 
4286  if (td->nColumns <= (td->hasDeletedCol ? 3 : 2)) {
4287  throw std::runtime_error("Table " + *table + " has only one column.");
4288  }
4289 
4290  catalog.getSqliteConnector().query("BEGIN TRANSACTION");
4291  try {
4292  std::vector<int> columnIds;
4293  for (const auto& column : columns) {
4294  ColumnDescriptor cd = *catalog.getMetadataForColumn(td->tableId, *column);
4295  if (td->nShards > 0 && td->shardedColumnId == cd.columnId) {
4296  throw std::runtime_error("Dropping sharding column " + cd.columnName +
4297  " is not supported.");
4298  }
4299  catalog.dropColumn(*td, cd);
4300  columnIds.push_back(cd.columnId);
4301  for (int i = 0; i < cd.columnType.get_physical_cols(); i++) {
4302  const auto pcd = catalog.getMetadataForColumn(td->tableId, cd.columnId + i + 1);
4303  CHECK(pcd);
4304  catalog.dropColumn(*td, *pcd);
4305  columnIds.push_back(cd.columnId + i + 1);
4306  }
4307  }
4308 
4309  for (auto shard : catalog.getPhysicalTablesDescriptors(td)) {
4310  shard->fragmenter->dropColumns(columnIds);
4311  }
4312  // if test forces to rollback
4314  throw std::runtime_error("lol!");
4315  }
4316  catalog.roll(true);
4318  catalog.checkpoint(td->tableId);
4319  }
4320  catalog.getSqliteConnector().query("END TRANSACTION");
4321  } catch (...) {
4322  catalog.setForReload(td->tableId);
4323  catalog.roll(false);
4324  catalog.getSqliteConnector().query("ROLLBACK TRANSACTION");
4325  throw;
4326  }
4327 
4328  // invalidate cached hashtable
4330 }
4331 
4332 void RenameColumnStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4333  auto& catalog = session.getCatalog();
4334 
4335  const auto td_with_lock =
4337  catalog, *table, false);
4338  const auto td = td_with_lock();
4339  CHECK(td);
4341 
4342  check_alter_table_privilege(session, td);
4343  const ColumnDescriptor* cd = catalog.getMetadataForColumn(td->tableId, *column);
4344  if (cd == nullptr) {
4345  throw std::runtime_error("Column " + *column + " does not exist.");
4346  }
4347  if (catalog.getMetadataForColumn(td->tableId, *new_column_name) != nullptr) {
4348  throw std::runtime_error("Column " + *new_column_name + " already exists.");
4349  }
4350  catalog.renameColumn(td, cd, *new_column_name);
4351 }
4352 
4353 void AlterTableParamStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4354  enum TableParamType { MaxRollbackEpochs, Epoch, MaxRows };
4355  static const std::unordered_map<std::string, TableParamType> param_map = {
4356  {"max_rollback_epochs", TableParamType::MaxRollbackEpochs},
4357  {"epoch", TableParamType::Epoch},
4358  {"max_rows", TableParamType::MaxRows}};
4359  // Below is to ensure that executor is not currently executing on table when we might be
4360  // changing it's storage. Question: will/should catalog write lock take care of this?
4361  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
4364  auto& catalog = session.getCatalog();
4365  const auto td_with_lock =
4367  catalog, *table, false);
4368  const auto td = td_with_lock();
4369  if (!td) {
4370  throw std::runtime_error("Table " + *table + " does not exist.");
4371  }
4372  if (td->isView) {
4373  throw std::runtime_error("Setting parameters for a view is not supported.");
4374  }
4375  if (table_is_temporary(td)) {
4376  throw std::runtime_error(
4377  "Setting parameters for a temporary table is not yet supported.");
4378  }
4379  check_alter_table_privilege(session, td);
4380 
4381  std::string param_name(*param->get_name());
4382  boost::algorithm::to_lower(param_name);
4383  const IntLiteral* val_int_literal = dynamic_cast<const IntLiteral*>(param->get_value());
4384  if (val_int_literal == nullptr) {
4385  throw std::runtime_error("Table parameters should be integers.");
4386  }
4387  const int64_t param_val = val_int_literal->get_intval();
4388 
4389  const auto param_it = param_map.find(param_name);
4390  if (param_it == param_map.end()) {
4391  throw std::runtime_error(param_name + " is not a settable table parameter.");
4392  }
4393  switch (param_it->second) {
4394  case MaxRollbackEpochs: {
4395  catalog.setMaxRollbackEpochs(td->tableId, param_val);
4396  break;
4397  }
4398  case Epoch: {
4399  catalog.setTableEpoch(catalog.getDatabaseId(), td->tableId, param_val);
4400  break;
4401  }
4402  case MaxRows: {
4403  catalog.setMaxRows(td->tableId, param_val);
4404  break;
4405  }
4406  default: {
4407  UNREACHABLE() << "Unexpected TableParamType value: " << param_it->second
4408  << ", key: " << param_it->first;
4409  }
4410  }
4411 }
4412 
4413 void CopyTableStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4414  auto importer_factory = [](Catalog_Namespace::Catalog& catalog,
4415  const TableDescriptor* td,
4416  const std::string& file_path,
4417  const import_export::CopyParams& copy_params) {
4418  return std::make_unique<import_export::Importer>(catalog, td, file_path, copy_params);
4419  };
4420  return execute(session, importer_factory);
4421 }
4422 
4423 void CopyTableStmt::execute(const Catalog_Namespace::SessionInfo& session,
4424  const std::function<std::unique_ptr<import_export::Importer>(
4426  const TableDescriptor*,
4427  const std::string&,
4428  const import_export::CopyParams&)>& importer_factory) {
4429  boost::regex non_local_file_regex{R"(^\s*(s3|http|https)://.+)",
4430  boost::regex::extended | boost::regex::icase};
4431  if (!boost::regex_match(*file_pattern, non_local_file_regex)) {
4433  *file_pattern, ddl_utils::DataTransferType::IMPORT, true);
4434  }
4435 
4436  size_t total_time = 0;
4437 
4438  // Prevent simultaneous import / truncate (see TruncateTableStmt::execute)
4439  const auto execute_read_lock = mapd_shared_lock<mapd_shared_mutex>(
4442 
4443  const TableDescriptor* td{nullptr};
4444  std::unique_ptr<lockmgr::TableSchemaLockContainer<lockmgr::ReadLock>> td_with_lock;
4445  std::unique_ptr<lockmgr::WriteLock> insert_data_lock;
4446 
4447  auto& catalog = session.getCatalog();
4448 
4449  try {
4450  td_with_lock = std::make_unique<lockmgr::TableSchemaLockContainer<lockmgr::ReadLock>>(
4452  catalog, *table));
4453  td = (*td_with_lock)();
4454  insert_data_lock = std::make_unique<lockmgr::WriteLock>(
4456  } catch (const std::runtime_error& e) {
4457  // noop
4458  // TODO(adb): We're really only interested in whether the table exists or not.
4459  // Create a more refined exception.
4460  }
4461 
4462  // if the table already exists, it's locked, so check access privileges
4463  if (td) {
4464  std::vector<DBObject> privObjects;
4465  DBObject dbObject(*table, TableDBObjectType);
4466  dbObject.loadKey(catalog);
4468  privObjects.push_back(dbObject);
4469  if (!SysCatalog::instance().checkPrivileges(session.get_currentUser(), privObjects)) {
4470  throw std::runtime_error("Violation of access privileges: user " +
4471  session.get_currentUser().userLoggable() +
4472  " has no insert privileges for table " + *table + ".");
4473  }
4474  }
4475 
4476  // since we'll have not only posix file names but also s3/hdfs/... url
4477  // we do not expand wildcard or check file existence here.
4478  // from here on, file_path contains something which may be a url
4479  // or a wildcard of file names;
4480  std::string file_path = *file_pattern;
4481  import_export::CopyParams copy_params;
4482  if (!options.empty()) {
4483  for (auto& p : options) {
4484  if (boost::iequals(*p->get_name(), "max_reject")) {
4485  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
4486  if (int_literal == nullptr) {
4487  throw std::runtime_error("max_reject option must be an integer.");
4488  }
4489  copy_params.max_reject = int_literal->get_intval();
4490  } else if (boost::iequals(*p->get_name(), "buffer_size")) {
4491  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
4492  if (int_literal == nullptr) {
4493  throw std::runtime_error("buffer_size option must be an integer.");
4494  }
4495  copy_params.buffer_size = int_literal->get_intval();
4496  } else if (boost::iequals(*p->get_name(), "threads")) {
4497  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
4498  if (int_literal == nullptr) {
4499  throw std::runtime_error("Threads option must be an integer.");
4500  }
4501  copy_params.threads = int_literal->get_intval();
4502  } else if (boost::iequals(*p->get_name(), "delimiter")) {
4503  const StringLiteral* str_literal =
4504  dynamic_cast<const StringLiteral*>(p->get_value());
4505  if (str_literal == nullptr) {
4506  throw std::runtime_error("Delimiter option must be a string.");
4507  } else if (str_literal->get_stringval()->length() != 1) {
4508  throw std::runtime_error("Delimiter must be a single character string.");
4509  }
4510  copy_params.delimiter = (*str_literal->get_stringval())[0];
4511  } else if (boost::iequals(*p->get_name(), "nulls")) {
4512  const StringLiteral* str_literal =
4513  dynamic_cast<const StringLiteral*>(p->get_value());
4514  if (str_literal == nullptr) {
4515  throw std::runtime_error("Nulls option must be a string.");
4516  }
4517  copy_params.null_str = *str_literal->get_stringval();
4518  } else if (boost::iequals(*p->get_name(), "header")) {
4519  const StringLiteral* str_literal =
4520  dynamic_cast<const StringLiteral*>(p->get_value());
4521  if (str_literal == nullptr) {
4522  throw std::runtime_error("Header option must be a boolean.");
4523  }
4524  copy_params.has_header = bool_from_string_literal(str_literal)
4527 #ifdef ENABLE_IMPORT_PARQUET
4528  } else if (boost::iequals(*p->get_name(), "parquet")) {
4529  const StringLiteral* str_literal =
4530  dynamic_cast<const StringLiteral*>(p->get_value());
4531  if (str_literal == nullptr) {
4532  throw std::runtime_error("Parquet option must be a boolean.");
4533  }
4534  if (bool_from_string_literal(str_literal)) {
4535  // not sure a parquet "table" type is proper, but to make code
4536  // look consistent in some places, let's set "table" type too
4537  copy_params.file_type = import_export::FileType::PARQUET;
4538  }
4539 #endif // ENABLE_IMPORT_PARQUET
4540  } else if (boost::iequals(*p->get_name(), "s3_access_key")) {
4541  const StringLiteral* str_literal =
4542  dynamic_cast<const StringLiteral*>(p->get_value());
4543  if (str_literal == nullptr) {
4544  throw std::runtime_error("Option s3_access_key must be a string.");
4545  }
4546  copy_params.s3_access_key = *str_literal->get_stringval();
4547  } else if (boost::iequals(*p->get_name(), "s3_secret_key")) {
4548  const StringLiteral* str_literal =
4549  dynamic_cast<const StringLiteral*>(p->get_value());
4550  if (str_literal == nullptr) {
4551  throw std::runtime_error("Option s3_secret_key must be a string.");
4552  }
4553  copy_params.s3_secret_key = *str_literal->get_stringval();
4554  } else if (boost::iequals(*p->get_name(), "s3_session_token")) {
4555  const StringLiteral* str_literal =
4556  dynamic_cast<const StringLiteral*>(p->get_value());
4557  if (str_literal == nullptr) {
4558  throw std::runtime_error("Option s3_session_token must be a string.");
4559  }
4560  copy_params.s3_session_token = *str_literal->get_stringval();
4561  } else if (boost::iequals(*p->get_name(), "s3_region")) {
4562  const StringLiteral* str_literal =
4563  dynamic_cast<const StringLiteral*>(p->get_value());
4564  if (str_literal == nullptr) {
4565  throw std::runtime_error("Option s3_region must be a string.");
4566  }
4567  copy_params.s3_region = *str_literal->get_stringval();
4568  } else if (boost::iequals(*p->get_name(), "s3_endpoint")) {
4569  const StringLiteral* str_literal =
4570  dynamic_cast<const StringLiteral*>(p->get_value());
4571  if (str_literal == nullptr) {
4572  throw std::runtime_error("Option s3_endpoint must be a string.");
4573  }
4574  copy_params.s3_endpoint = *str_literal->get_stringval();
4575  } else if (boost::iequals(*p->get_name(), "quote")) {
4576  const StringLiteral* str_literal =
4577  dynamic_cast<const StringLiteral*>(p->get_value());
4578  if (str_literal == nullptr) {
4579  throw std::runtime_error("Quote option must be a string.");
4580  } else if (str_literal->get_stringval()->length() != 1) {
4581  throw std::runtime_error("Quote must be a single character string.");
4582  }
4583  copy_params.quote = (*str_literal->get_stringval())[0];
4584  } else if (boost::iequals(*p->get_name(), "escape")) {
4585  const StringLiteral* str_literal =
4586  dynamic_cast<const StringLiteral*>(p->get_value());
4587  if (str_literal == nullptr) {
4588  throw std::runtime_error("Escape option must be a string.");
4589  } else if (str_literal->get_stringval()->length() != 1) {
4590  throw std::runtime_error("Escape must be a single character string.");
4591  }
4592  copy_params.escape = (*str_literal->get_stringval())[0];
4593  } else if (boost::iequals(*p->get_name(), "line_delimiter")) {
4594  const StringLiteral* str_literal =
4595  dynamic_cast<const StringLiteral*>(p->get_value());
4596  if (str_literal == nullptr) {
4597  throw std::runtime_error("Line_delimiter option must be a string.");
4598  } else if (str_literal->get_stringval()->length() != 1) {
4599  throw std::runtime_error("Line_delimiter must be a single character string.");
4600  }
4601  copy_params.line_delim = (*str_literal->get_stringval())[0];
4602  } else if (boost::iequals(*p->get_name(), "quoted")) {
4603  const StringLiteral* str_literal =
4604  dynamic_cast<const StringLiteral*>(p->get_value());
4605  if (str_literal == nullptr) {
4606  throw std::runtime_error("Quoted option must be a boolean.");
4607  }
4608  copy_params.quoted = bool_from_string_literal(str_literal);
4609  } else if (boost::iequals(*p->get_name(), "plain_text")) {
4610  const StringLiteral* str_literal =
4611  dynamic_cast<const StringLiteral*>(p->get_value());
4612  if (str_literal == nullptr) {
4613  throw std::runtime_error("plain_text option must be a boolean.");
4614  }
4615  copy_params.plain_text = bool_from_string_literal(str_literal);
4616  } else if (boost::iequals(*p->get_name(), "array_marker")) {
4617  const StringLiteral* str_literal =
4618  dynamic_cast<const StringLiteral*>(p->get_value());
4619  if (str_literal == nullptr) {
4620  throw std::runtime_error("Array Marker option must be a string.");
4621  } else if (str_literal->get_stringval()->length() != 2) {
4622  throw std::runtime_error(
4623  "Array Marker option must be exactly two characters. Default is {}.");
4624  }
4625  copy_params.array_begin = (*str_literal->get_stringval())[0];
4626  copy_params.array_end = (*str_literal->get_stringval())[1];
4627  } else if (boost::iequals(*p->get_name(), "array_delimiter")) {
4628  const StringLiteral* str_literal =
4629  dynamic_cast<const StringLiteral*>(p->get_value());
4630  if (str_literal == nullptr) {
4631  throw std::runtime_error("Array Delimiter option must be a string.");
4632  } else if (str_literal->get_stringval()->length() != 1) {
4633  throw std::runtime_error("Array Delimiter must be a single character string.");
4634  }
4635  copy_params.array_delim = (*str_literal->get_stringval())[0];
4636  } else if (boost::iequals(*p->get_name(), "lonlat")) {
4637  const StringLiteral* str_literal =
4638  dynamic_cast<const StringLiteral*>(p->get_value());
4639  if (str_literal == nullptr) {
4640  throw std::runtime_error("Lonlat option must be a boolean.");
4641  }
4642  copy_params.lonlat = bool_from_string_literal(str_literal);
4643  } else if (boost::iequals(*p->get_name(), "geo")) {
4644  const StringLiteral* str_literal =
4645  dynamic_cast<const StringLiteral*>(p->get_value());
4646  if (str_literal == nullptr) {
4647  throw std::runtime_error("Geo option must be a boolean.");
4648  }
4649  copy_params.file_type = bool_from_string_literal(str_literal)
4652  } else if (boost::iequals(*p->get_name(), "geo_coords_type")) {
4653  const StringLiteral* str_literal =
4654  dynamic_cast<const StringLiteral*>(p->get_value());
4655  if (str_literal == nullptr) {
4656  throw std::runtime_error("'geo_coords_type' option must be a string");
4657  }
4658  const std::string* s = str_literal->get_stringval();
4659  if (boost::iequals(*s, "geography")) {
4660  throw std::runtime_error(
4661  "GEOGRAPHY coords type not yet supported. Please use GEOMETRY.");
4662  // copy_params.geo_coords_type = kGEOGRAPHY;
4663  } else if (boost::iequals(*s, "geometry")) {
4664  copy_params.geo_coords_type = kGEOMETRY;
4665  } else {
4666  throw std::runtime_error(
4667  "Invalid string for 'geo_coords_type' option (must be 'GEOGRAPHY' or "
4668  "'GEOMETRY'): " +
4669  *s);
4670  }
4671  } else if (boost::iequals(*p->get_name(), "geo_coords_encoding")) {
4672  const StringLiteral* str_literal =
4673  dynamic_cast<const StringLiteral*>(p->get_value());
4674  if (str_literal == nullptr) {
4675  throw std::runtime_error("'geo_coords_encoding' option must be a string");
4676  }
4677  const std::string* s = str_literal->get_stringval();
4678  if (boost::iequals(*s, "none")) {
4679  copy_params.geo_coords_encoding = kENCODING_NONE;
4680  copy_params.geo_coords_comp_param = 0;
4681  } else if (boost::iequals(*s, "compressed(32)")) {
4682  copy_params.geo_coords_encoding = kENCODING_GEOINT;
4683  copy_params.geo_coords_comp_param = 32;
4684  } else {
4685  throw std::runtime_error(
4686  "Invalid string for 'geo_coords_encoding' option (must be 'NONE' or "
4687  "'COMPRESSED(32)'): " +
4688  *s);
4689  }
4690  } else if (boost::iequals(*p->get_name(), "geo_coords_srid")) {
4691  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
4692  if (int_literal == nullptr) {
4693  throw std::runtime_error("'geo_coords_srid' option must be an integer");
4694  }
4695  const int srid = int_literal->get_intval();
4696  if (srid == 4326 || srid == 3857 || srid == 900913) {
4697  copy_params.geo_coords_srid = srid;
4698  } else {
4699  throw std::runtime_error(
4700  "Invalid value for 'geo_coords_srid' option (must be 4326, 3857, or "
4701  "900913): " +
4702  std::to_string(srid));
4703  }
4704  } else if (boost::iequals(*p->get_name(), "geo_layer_name")) {
4705  const StringLiteral* str_literal =
4706  dynamic_cast<const StringLiteral*>(p->get_value());
4707  if (str_literal == nullptr) {
4708  throw std::runtime_error("'geo_layer_name' option must be a string");
4709  }
4710  const std::string* layer_name = str_literal->get_stringval();
4711  if (layer_name) {
4712  copy_params.geo_layer_name = *layer_name;
4713  } else {
4714  throw std::runtime_error("Invalid value for 'geo_layer_name' option");
4715  }
4716  } else if (boost::iequals(*p->get_name(), "partitions")) {
4717  if (copy_params.file_type == import_export::FileType::POLYGON) {
4718  const auto partitions =
4719  static_cast<const StringLiteral*>(p->get_value())->get_stringval();
4720  CHECK(partitions);
4721  const auto partitions_uc = boost::to_upper_copy<std::string>(*partitions);
4722  if (partitions_uc != "REPLICATED") {
4723  throw std::runtime_error("PARTITIONS must be REPLICATED for geo COPY");
4724  }
4725  _geo_copy_from_partitions = partitions_uc;
4726  } else {
4727  throw std::runtime_error("PARTITIONS option not supported for non-geo COPY: " +
4728  *p->get_name());
4729  }
4730  } else if (boost::iequals(*p->get_name(), "geo_assign_render_groups")) {
4731  const StringLiteral* str_literal =
4732  dynamic_cast<const StringLiteral*>(p->get_value());
4733  if (str_literal == nullptr) {
4734  throw std::runtime_error("geo_assign_render_groups option must be a boolean.");
4735  }
4736  copy_params.geo_assign_render_groups = bool_from_string_literal(str_literal);
4737  } else if (boost::iequals(*p->get_name(), "geo_explode_collections")) {
4738  const StringLiteral* str_literal =
4739  dynamic_cast<const StringLiteral*>(p->get_value());
4740  if (str_literal == nullptr) {
4741  throw std::runtime_error("geo_explode_collections option must be a boolean.");
4742  }
4743  copy_params.geo_explode_collections = bool_from_string_literal(str_literal);
4744  } else if (boost::iequals(*p->get_name(), "source_srid")) {
4745  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
4746  if (int_literal == nullptr) {
4747  throw std::runtime_error("'source_srid' option must be an integer");
4748  }
4749  const int srid = int_literal->get_intval();
4750  if (copy_params.file_type == import_export::FileType::DELIMITED) {
4751  copy_params.source_srid = srid;
4752  } else {
4753  throw std::runtime_error(
4754  "'source_srid' option can only be used on csv/tsv files");
4755  }
4756  } else {
4757  throw std::runtime_error("Invalid option for COPY: " + *p->get_name());
4758  }
4759  }
4760  }
4761 
4762  std::string tr;
4763  if (copy_params.file_type == import_export::FileType::POLYGON) {
4764  // geo import
4765  // we do nothing here, except stash the parameters so we can
4766  // do the import when we unwind to the top of the handler
4767  _geo_copy_from_file_name = file_path;
4768  _geo_copy_from_copy_params = copy_params;
4769  _was_geo_copy_from = true;
4770 
4771  // the result string
4772  // @TODO simon.eves put something more useful in here
4773  // except we really can't because we haven't done the import yet!
4774  if (td) {
4775  tr = std::string("Appending geo to table '") + *table + std::string("'...");
4776  } else {
4777  tr = std::string("Creating table '") + *table +
4778  std::string("' and importing geo...");
4779  }
4780  } else {
4781  if (td) {
4782  CHECK(td_with_lock);
4783 
4784  // regular import
4785  auto importer = importer_factory(catalog, td, file_path, copy_params);
4786  auto start_time = ::toString(std::chrono::system_clock::now());
4787  auto prev_table_epoch = importer->getLoader()->getTableEpochs();
4789  auto query_session = session.get_session_id();
4790  auto query_str = "COPYING " + td->tableName;
4792  executor->enrollQuerySession(query_session,
4793  query_str,
4794  start_time,
4796  QuerySessionStatus::QueryStatus::RUNNING);
4797  }
4798 
4799  ScopeGuard clearInterruptStatus =
4800  [executor, &query_str, &query_session, &start_time, &importer] {
4801  // reset the runtime query interrupt status
4803  executor->clearQuerySessionStatus(query_session, start_time, false);
4804  }
4805  };
4806  import_export::ImportStatus import_result;
4807  auto ms =
4808  measure<>::execution([&]() { import_result = importer->import(&session); });
4809  total_time += ms;
4810  // results
4811  if (!import_result.load_failed &&
4812  import_result.rows_rejected > copy_params.max_reject) {
4813  LOG(ERROR) << "COPY exited early due to reject records count during multi file "
4814  "processing ";
4815  // if we have crossed the truncated load threshold
4816  import_result.load_failed = true;
4817  import_result.load_msg =
4818  "COPY exited early due to reject records count during multi file "
4819  "processing ";
4820  success = false;
4821  }
4822  if (!import_result.load_failed) {
4823  tr = std::string(
4824  "Loaded: " + std::to_string(import_result.rows_completed) +
4825  " recs, Rejected: " + std::to_string(import_result.rows_rejected) +
4826  " recs in " + std::to_string((double)total_time / 1000.0) + " secs");
4827  } else {
4828  tr = std::string("Loader Failed due to : " + import_result.load_msg + " in " +
4829  std::to_string((double)total_time / 1000.0) + " secs");
4830  }
4831  } else {
4832  throw std::runtime_error("Table '" + *table + "' must exist before COPY FROM");
4833  }
4834  }
4835 
4836  return_message.reset(new std::string(tr));
4837  LOG(INFO) << tr;
4838 } // namespace Parser
4839 
4840 // CREATE ROLE payroll_dept_role;
4841 void CreateRoleStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4842  const auto& currentUser = session.get_currentUser();
4843  if (!currentUser.isSuper) {
4844  throw std::runtime_error("CREATE ROLE " + get_role() +
4845  " failed. It can only be executed by super user.");
4846  }
4847  SysCatalog::instance().createRole(get_role());
4848 }
4849 
4850 // DROP ROLE payroll_dept_role;
4851 void DropRoleStmt::execute(const Catalog_Namespace::SessionInfo& session) {
4852  const auto& currentUser = session.get_currentUser();
4853  if (!currentUser.isSuper) {
4854  throw std::runtime_error("DROP ROLE " + get_role() +
4855  " failed. It can only be executed by super user.");
4856  }
4857  auto* rl = SysCatalog::instance().getRoleGrantee(get_role());
4858  if (!rl) {
4859  throw std::runtime_error("DROP ROLE " + get_role() +
4860  " failed because role with this name does not exist.");
4861  }
4862  SysCatalog::instance().dropRole(get_role());
4863 }
4864 
4865 std::vector<std::string> splitObjectHierName(const std::string& hierName) {
4866  std::vector<std::string> componentNames;
4867  boost::split(componentNames, hierName, boost::is_any_of("."));
4868  return componentNames;
4869 }
4870 
4871 std::string extractObjectNameFromHierName(const std::string& objectHierName,
4872  const std::string& objectType,
4874  std::string objectName;
4875  std::vector<std::string> componentNames = splitObjectHierName(objectHierName);
4876  if (objectType.compare("DATABASE") == 0) {
4877  if (componentNames.size() == 1) {
4878  objectName = componentNames[0];
4879  } else {
4880  throw std::runtime_error("DB object name is not correct " + objectHierName);
4881  }
4882  } else {
4883  if (objectType.compare("TABLE") == 0 || objectType.compare("DASHBOARD") == 0 ||
4884  objectType.compare("VIEW") == 0 || objectType.compare("SERVER") == 0) {
4885  switch (componentNames.size()) {
4886  case (1): {
4887  objectName = componentNames[0];
4888  break;
4889  }
4890  case (2): {
4891  objectName = componentNames[1];
4892  break;
4893  }
4894  default: {
4895  throw std::runtime_error("DB object name is not correct " + objectHierName);
4896  }
4897  }
4898  } else {
4899  throw std::runtime_error("DB object type " + objectType + " is not supported.");
4900  }
4901  }
4902  return objectName;
4903 }
4904 
4905 static std::pair<AccessPrivileges, DBObjectType> parseStringPrivs(
4906  const std::string& privs,
4907  const DBObjectType& objectType,
4908  const std::string& object_name) {
4909  static const std::map<std::pair<const std::string, const DBObjectType>,
4910  std::pair<const AccessPrivileges, const DBObjectType>>
4911  privileges_lookup{
4912  {{"ALL"s, DatabaseDBObjectType},
4915  {{"ALL"s, DashboardDBObjectType},
4918  {{"ALL"s, ServerDBObjectType},
4920 
4921  {{"CREATE TABLE"s, DatabaseDBObjectType},
4923  {{"CREATE"s, DatabaseDBObjectType},
4925  {{"SELECT"s, DatabaseDBObjectType},
4927  {{"INSERT"s, DatabaseDBObjectType},
4929  {{"TRUNCATE"s, DatabaseDBObjectType},
4931  {{"UPDATE"s, DatabaseDBObjectType},
4933  {{"DELETE"s, DatabaseDBObjectType},
4935  {{"DROP"s, DatabaseDBObjectType},
4937  {{"ALTER"s, DatabaseDBObjectType},
4939 
4940  {{"SELECT"s, TableDBObjectType},
4942  {{"INSERT"s, TableDBObjectType},
4944  {{"TRUNCATE"s, TableDBObjectType},
4946  {{"UPDATE"s, TableDBObjectType},
4948  {{"DELETE"s, TableDBObjectType},
4950  {{"DROP"s, TableDBObjectType},
4952  {{"ALTER"s, TableDBObjectType},
4954 
4955  {{"CREATE VIEW"s, DatabaseDBObjectType},
4957  {{"SELECT VIEW"s, DatabaseDBObjectType},
4959  {{"DROP VIEW"s, DatabaseDBObjectType},
4961  {{"SELECT"s, ViewDBObjectType},
4964 
4965  {{"CREATE DASHBOARD"s, DatabaseDBObjectType},
4967  {{"EDIT DASHBOARD"s, DatabaseDBObjectType},
4969  {{"VIEW DASHBOARD"s, DatabaseDBObjectType},
4971  {{"DELETE DASHBOARD"s, DatabaseDBObjectType},
4973  {{"VIEW"s, DashboardDBObjectType},
4975  {{"EDIT"s, DashboardDBObjectType},
4977  {{"DELETE"s, DashboardDBObjectType},
4979 
4980  {{"CREATE SERVER"s, DatabaseDBObjectType},
4982  {{"DROP SERVER"s, DatabaseDBObjectType},
4984  {{"DROP"s, ServerDBObjectType},
4986  {{"ALTER SERVER"s, DatabaseDBObjectType},
4988  {{"ALTER"s, ServerDBObjectType},
4990  {{"USAGE"s, ServerDBObjectType},
4992  {{"SERVER USAGE"s, DatabaseDBObjectType},
4994 
4995  {{"VIEW SQL EDITOR"s, DatabaseDBObjectType},
4997  {{"ACCESS"s, DatabaseDBObjectType},
4999 
5000  auto result = privileges_lookup.find(std::make_pair(privs, objectType));
5001  if (result == privileges_lookup.end()) {
5002  throw std::runtime_error("Privileges " + privs + " on DB object " + object_name +
5003  " are not correct.");
5004  }
5005  return result->second;
5006 }
5007 
5008 static DBObject createObject(const std::string& objectName, DBObjectType objectType) {
5009  if (objectType == DashboardDBObjectType) {
5010  int32_t dashboard_id = -1;
5011  if (!objectName.empty()) {
5012  try {
5013  dashboard_id = stoi(objectName);
5014  } catch (const std::exception&) {
5015  throw std::runtime_error(
5016  "Privileges on dashboards should be changed via integer dashboard ID");
5017  }
5018  }
5019  return DBObject(dashboard_id, objectType);
5020  } else {
5021  return DBObject(objectName, objectType);
5022  }
5023 }
5024 
5025 // GRANT SELECT/INSERT/CREATE ON TABLE payroll_table TO payroll_dept_role;
5026 void GrantPrivilegesStmt::execute(const Catalog_Namespace::SessionInfo& session) {
5027  auto& catalog = session.getCatalog();
5028  const auto& currentUser = session.get_currentUser();
5029  const auto parserObjectType = boost::to_upper_copy<std::string>(get_object_type());
5030  const auto objectName =
5031  extractObjectNameFromHierName(get_object(), parserObjectType, catalog);
5032  auto objectType = DBObjectTypeFromString(parserObjectType);
5033  if (objectType == ServerDBObjectType && !g_enable_fsi) {
5034  throw std::runtime_error("GRANT failed. SERVER object unrecognized.");
5035  }
5036  DBObject dbObject = createObject(objectName, objectType);
5037  /* verify object ownership if not suser */
5038  if (!currentUser.isSuper) {
5039  if (!SysCatalog::instance().verifyDBObjectOwnership(currentUser, dbObject, catalog)) {
5040  throw std::runtime_error(
5041  "GRANT failed. It can only be executed by super user or owner of the "
5042  "object.");
5043  }
5044  }
5045  /* set proper values of privileges & grant them to the object */
5046  std::vector<DBObject> objects(get_privs().size(), dbObject);
5047  for (size_t i = 0; i < get_privs().size(); ++i) {
5048  std::pair<AccessPrivileges, DBObjectType> priv = parseStringPrivs(
5049  boost::to_upper_copy<std::string>(get_privs()[i]), objectType, get_object());
5050  objects[i].setPrivileges(priv.first);
5051  objects[i].setPermissionType(priv.second);
5052  if (priv.second == ServerDBObjectType && !g_enable_fsi) {
5053  throw std::runtime_error("GRANT failed. SERVER object unrecognized.");
5054  }
5055  }
5056  SysCatalog::instance().grantDBObjectPrivilegesBatch(grantees, objects, catalog);
5057 }
5058 
5059 // REVOKE SELECT/INSERT/CREATE ON TABLE payroll_table FROM payroll_dept_role;
5060 void RevokePrivilegesStmt::execute(const Catalog_Namespace::SessionInfo& session) {
5061  auto& catalog = session.getCatalog();
5062  const auto& currentUser = session.get_currentUser();
5063  const auto parserObjectType = boost::to_upper_copy<std::string>(get_object_type());
5064  const auto objectName =
5065  extractObjectNameFromHierName(get_object(), parserObjectType, catalog);
5066  auto objectType = DBObjectTypeFromString(parserObjectType);
5067  if (objectType == ServerDBObjectType && !g_enable_fsi) {
5068  throw std::runtime_error("REVOKE failed. SERVER object unrecognized.");
5069  }
5070  DBObject dbObject = createObject(objectName, objectType);
5071  /* verify object ownership if not suser */
5072  if (!currentUser.isSuper) {
5073  if (!SysCatalog::instance().verifyDBObjectOwnership(currentUser, dbObject, catalog)) {
5074  throw std::runtime_error(
5075  "REVOKE failed. It can only be executed by super user or owner of the "
5076  "object.");
5077  }
5078  }
5079  /* set proper values of privileges & grant them to the object */
5080  std::vector<DBObject> objects(get_privs().size(), dbObject);
5081  for (size_t i = 0; i < get_privs().size(); ++i) {
5082  std::pair<AccessPrivileges, DBObjectType> priv = parseStringPrivs(
5083  boost::to_upper_copy<std::string>(get_privs()[i]), objectType, get_object());
5084  objects[i].setPrivileges(priv.first);
5085  objects[i].setPermissionType(priv.second);
5086  if (priv.second == ServerDBObjectType && !g_enable_fsi) {
5087  throw std::runtime_error("REVOKE failed. SERVER object unrecognized.");
5088  }
5089  }
5090  SysCatalog::instance().revokeDBObjectPrivilegesBatch(grantees, objects, catalog);
5091 }
5092 
5093 // NOTE: not used currently, will we ever use it?
5094 // SHOW ON TABLE payroll_table FOR payroll_dept_role;
5095 void ShowPrivilegesStmt::execute(const Catalog_Namespace::SessionInfo& session) {
5096  auto& catalog = session.getCatalog();
5097  const auto& currentUser = session.get_currentUser();
5098  const auto parserObjectType = boost::to_upper_copy<std::string>(get_object_type());
5099  const auto objectName =
5100  extractObjectNameFromHierName(get_object(), parserObjectType, catalog);
5101  auto objectType = DBObjectTypeFromString(parserObjectType);
5102  DBObject dbObject = createObject(objectName, objectType);
5103  /* verify object ownership if not suser */
5104  if (!currentUser.isSuper) {
5105  if (!SysCatalog::instance().verifyDBObjectOwnership(currentUser, dbObject, catalog)) {
5106  throw std::runtime_error(
5107  "SHOW ON " + get_object() + " FOR " + get_role() +
5108  " failed. It can only be executed by super user or owner of the object.");
5109  }
5110  }
5111  /* get values of privileges for the object and report them */
5112  SysCatalog::instance().getDBObjectPrivileges(get_role(), dbObject, catalog);
5113  AccessPrivileges privs = dbObject.getPrivileges();
5114  printf("\nPRIVILEGES ON %s FOR %s ARE SET AS FOLLOWING: ",
5115  get_object().c_str(),
5116  get_role().c_str());
5117 
5118  if (objectType == DBObjectType::DatabaseDBObjectType) {
5120  printf(" CREATE");
5121  }
5123  printf(" DROP");
5124  }
5125  } else if (objectType == DBObjectType::TableDBObjectType) {
5127  printf(" CREATE");
5128  }
5130  printf(" DROP");
5131  }
5133  printf(" SELECT");
5134  }
5136  printf(" INSERT");
5137  }
5139  printf(" UPDATE");
5140  }
5142  printf(" DELETE");
5143  }
5145  printf(" TRUNCATE");
5146  }
5148  printf(" ALTER");
5149  }
5150  } else if (objectType == DBObjectType::DashboardDBObjectType) {
5152  printf(" CREATE");
5153  }
5155  printf(" DELETE");
5156  }
5158  printf(" VIEW");
5159  }
5161  printf(" EDIT");
5162  }
5163  } else if (objectType == DBObjectType::ViewDBObjectType) {
5165  printf(" CREATE");
5166  }
5168  printf(" DROP");
5169  }
5171  printf(" SELECT");
5172  }
5174  printf(" INSERT");
5175  }
5177  printf(" UPDATE");
5178  }
5180  printf(" DELETE");
5181  }
5182  }
5183  printf(".\n");
5184 }
5185 
5186 // GRANT payroll_dept_role TO joe;
5187 void GrantRoleStmt::execute(const Catalog_Namespace::SessionInfo& session) {
5188  const auto& currentUser = session.get_currentUser();
5189  if (!currentUser.isSuper) {
5190  throw std::runtime_error(
5191  "GRANT failed, because it can only be executed by super user.");
5192  }
5193  if (std::find(get_grantees().begin(), get_grantees().end(), OMNISCI_ROOT_USER) !=
5194  get_grantees().end()) {
5195  throw std::runtime_error(
5196  "Request to grant role failed because mapd root user has all privileges by "
5197  "default.");
5198  }
5199  SysCatalog::instance().grantRoleBatch(get_roles(), get_grantees());
5200 }
5201 
5202 // REVOKE payroll_dept_role FROM joe;get_users
5203 void RevokeRoleStmt::execute(const Catalog_Namespace::SessionInfo& session) {
5204  const auto& currentUser = session.get_currentUser();
5205  if (!currentUser.isSuper) {
5206  throw std::runtime_error(
5207  "REVOKE failed, because it can only be executed by super user.");
5208  }
5209  if (std::find(get_grantees().begin(), get_grantees().end(), OMNISCI_ROOT_USER) !=
5210  get_grantees().end()) {
5211  throw std::runtime_error(
5212  "Request to revoke role failed because privileges can not be revoked from "
5213  "mapd "
5214  "root user.");
5215  }
5216  SysCatalog::instance().revokeRoleBatch(get_roles(), get_grantees());
5217 }
5218 
5219 void ShowCreateTableStmt::execute(const Catalog_Namespace::SessionInfo& session) {
5220  using namespace Catalog_Namespace;
5221 
5222  const auto execute_read_lock = mapd_shared_lock<mapd_shared_mutex>(
5225 
5226  auto& catalog = session.getCatalog();
5227  const TableDescriptor* td = catalog.getMetadataForTable(*table_, false);
5228  if (!td) {
5229  throw std::runtime_error("Table/View " + *table_ + " does not exist.");
5230  }
5231 
5233  dbObject.loadKey(catalog);
5234  std::vector<DBObject> privObjects = {dbObject};
5235 
5236  if (!SysCatalog::instance().hasAnyPrivileges(session.get_currentUser(), privObjects)) {
5237  throw std::runtime_error("Table/View " + *table_ + " does not exist.");
5238  }
5239  if (td->isView && !session.get_currentUser().isSuper) {
5240  // TODO: we need to run a validate query to ensure the user has access to the
5241  // underlying table, but we do not have any of the machinery in here. Disable for
5242  // now, unless the current user is a super user.
5243  throw std::runtime_error("SHOW CREATE TABLE not yet supported for views");
5244  }
5245 
5246  create_stmt_ = catalog.dumpCreateTable(td);
5247 }
5248 
5249 void ExportQueryStmt::execute(const Catalog_Namespace::SessionInfo& session) {
5250  auto session_copy = session;
5251  auto session_ptr = std::shared_ptr<Catalog_Namespace::SessionInfo>(
5252  &session_copy, boost::null_deleter());
5253  auto query_state = query_state::QueryState::create(session_ptr, *select_stmt);
5254  auto stdlog = STDLOG(query_state);
5255  auto query_state_proxy = query_state->createQueryStateProxy();
5256 
5257  LocalConnector local_connector;
5258 
5259  if (!leafs_connector_) {
5260  leafs_connector_ = &local_connector;
5261  }
5262 
5263  import_export::CopyParams copy_params;
5264  // @TODO(se) move rest to CopyParams when we have a Thrift endpoint
5267  std::string layer_name;
5272 
5273  parseOptions(copy_params, file_type, layer_name, file_compression, array_null_handling);
5274 
5275  if (file_path->empty()) {
5276  throw std::runtime_error("Invalid file path for COPY TO");
5277  } else if (!boost::filesystem::path(*file_path).is_absolute()) {
5278  std::string file_name = boost::filesystem::path(*file_path).filename().string();
5279  std::string file_dir = g_base_path + "/mapd_export/" + session.get_session_id() + "/";
5280  if (!boost::filesystem::exists(file_dir)) {
5281  if (!boost::filesystem::create_directories(file_dir)) {
5282  throw std::runtime_error("Directory " + file_dir + " cannot be created.");
5283  }
5284  }
5285  *file_path = file_dir + file_name;
5286  } else {
5287  // Above branch will create a new file in the mapd_export directory. If that path
5288  // is not exercised, go through applicable file path validations.
5291  }
5292 
5293  // get column info
5294  auto column_info_result =
5295  local_connector.query(query_state_proxy, *select_stmt, {}, true, false);
5296 
5297  // create exporter for requested file type
5298  auto query_exporter = import_export::QueryExporter::create(file_type);
5299 
5300  // default layer name to file path stem if it wasn't specified
5301  if (layer_name.size() == 0) {
5302  layer_name = boost::filesystem::path(*file_path).stem().string();
5303  }
5304 
5305  // begin export
5306  query_exporter->beginExport(*file_path,
5307  layer_name,
5308  copy_params,
5309  column_info_result.targets_meta,
5310  file_compression,
5311  array_null_handling);
5312 
5313  // how many fragments?
5314  size_t outer_frag_count =
5315  leafs_connector_->getOuterFragmentCount(query_state_proxy, *select_stmt);
5316  size_t outer_frag_end = outer_frag_count == 0 ? 1 : outer_frag_count;
5317 
5318  // loop fragments
5319  for (size_t outer_frag_idx = 0; outer_frag_idx < outer_frag_end; outer_frag_idx++) {
5320  // limit the query to just this fragment
5321  std::vector<size_t> allowed_outer_fragment_indices;
5322  if (outer_frag_count) {
5323  allowed_outer_fragment_indices.push_back(outer_frag_idx);
5324  }
5325 
5326  // run the query
5327  std::vector<AggregatedResult> query_results = leafs_connector_->query(
5328  query_state_proxy, *select_stmt, allowed_outer_fragment_indices, false);
5329 
5330  // export the results
5331  query_exporter->exportResults(query_results);
5332  }
5333 
5334  // end export
5335  query_exporter->endExport();
5336 }
5337 
5338 void ExportQueryStmt::parseOptions(
5339  import_export::CopyParams& copy_params,
5341  std::string& layer_name,
5343  import_export::QueryExporter::ArrayNullHandling& array_null_handling) {
5344  // defaults for non-CopyParams values
5346  layer_name.clear();
5348 
5349  if (!options.empty()) {
5350  for (auto& p : options) {
5351  if (boost::iequals(*p->get_name(), "delimiter")) {
5352  const StringLiteral* str_literal =
5353  dynamic_cast<const StringLiteral*>(p->get_value());
5354  if (str_literal == nullptr) {
5355  throw std::runtime_error("Delimiter option must be a string.");
5356  } else if (str_literal->get_stringval()->length() != 1) {
5357  throw std::runtime_error("Delimiter must be a single character string.");
5358  }
5359  copy_params.delimiter = (*str_literal->get_stringval())[0];
5360  } else if (boost::iequals(*p->get_name(), "nulls")) {
5361  const StringLiteral* str_literal =
5362  dynamic_cast<const StringLiteral*>(p->get_value());
5363  if (str_literal == nullptr) {
5364  throw std::runtime_error("Nulls option must be a string.");
5365  }
5366  copy_params.null_str = *str_literal->get_stringval();
5367  } else if (boost::iequals(*p->get_name(), "header")) {
5368  const StringLiteral* str_literal =
5369  dynamic_cast<const StringLiteral*>(p->get_value());
5370  if (str_literal == nullptr) {
5371  throw std::runtime_error("Header option must be a boolean.");
5372  }
5373  copy_params.has_header = bool_from_string_literal(str_literal)
5376  } else if (boost::iequals(*p->get_name(), "quote")) {
5377  const StringLiteral* str_literal =
5378  dynamic_cast<const StringLiteral*>(p->get_value());
5379  if (str_literal == nullptr) {
5380  throw std::runtime_error("Quote option must be a string.");
5381  } else if (str_literal->get_stringval()->length() != 1) {
5382  throw std::runtime_error("Quote must be a single character string.");
5383  }
5384  copy_params.quote = (*str_literal->get_stringval())[0];
5385  } else if (boost::iequals(*p->get_name(), "escape")) {
5386  const StringLiteral* str_literal =
5387  dynamic_cast<const StringLiteral*>(p->get_value());
5388  if (str_literal == nullptr) {
5389  throw std::runtime_error("Escape option must be a string.");
5390  } else if (str_literal->get_stringval()->length() != 1) {
5391  throw std::runtime_error("Escape must be a single character string.");
5392  }
5393  copy_params.escape = (*str_literal->get_stringval())[0];
5394  } else if (boost::iequals(*p->get_name(), "line_delimiter")) {
5395  const StringLiteral* str_literal =
5396  dynamic_cast<const StringLiteral*>(p->get_value());
5397  if (str_literal == nullptr) {
5398  throw std::runtime_error("Line_delimiter option must be a string.");
5399  } else if (str_literal->get_stringval()->length() != 1) {
5400  throw std::runtime_error("Line_delimiter must be a single character string.");
5401  }
5402  copy_params.line_delim = (*str_literal->get_stringval())[0];
5403  } else if (boost::iequals(*p->get_name(), "quoted")) {
5404  const StringLiteral* str_literal =
5405  dynamic_cast<const StringLiteral*>(p->get_value());
5406  if (str_literal == nullptr) {
5407  throw std::runtime_error("Quoted option must be a boolean.");
5408  }
5409  copy_params.quoted = bool_from_string_literal(str_literal);
5410  } else if (boost::iequals(*p->get_name(), "file_type")) {
5411  const StringLiteral* str_literal =
5412  dynamic_cast<const StringLiteral*>(p->get_value());
5413  if (str_literal == nullptr) {
5414  throw std::runtime_error("File Type option must be a string."