OmniSciDB  21ac014ffc
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ParserNode.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
25 #include "ParserNode.h"
26 
27 #include <boost/algorithm/string.hpp>
28 #include <boost/core/null_deleter.hpp>
29 #include <boost/filesystem.hpp>
30 #include <boost/function.hpp>
31 
32 #include <rapidjson/document.h>
33 #include <rapidjson/stringbuffer.h>
34 #include <rapidjson/writer.h>
35 
36 #include <cassert>
37 #include <cmath>
38 #include <limits>
39 #include <random>
40 #include <regex>
41 #include <stdexcept>
42 #include <string>
43 #include <type_traits>
44 #include <typeinfo>
45 
47 #include "Catalog/Catalog.h"
53 #include "Geospatial/Compression.h"
54 #include "Geospatial/Types.h"
55 #include "ImportExport/Importer.h"
56 #include "LockMgr/LockMgr.h"
60 #include "QueryEngine/Execute.h"
64 #include "ReservedKeywords.h"
65 #include "Shared/StringTransform.h"
66 #include "Shared/measure.h"
67 #include "Shared/shard_key.h"
69 #include "Utils/FsiUtils.h"
70 
71 #include "gen-cpp/CalciteServer.h"
72 #include "parser.h"
73 
75 
76 size_t g_leaf_count{0};
79 extern bool g_enable_fsi;
80 
82 using namespace std::string_literals;
83 
84 using TableDefFuncPtr = boost::function<void(TableDescriptor&,
86  const std::list<ColumnDescriptor>& columns)>;
87 
88 using DataframeDefFuncPtr =
89  boost::function<void(DataframeTableDescriptor&,
91  const std::list<ColumnDescriptor>& columns)>;
92 
93 namespace Parser {
94 bool checkInterrupt(const QuerySessionId& query_session, Executor* executor) {
96  mapd_shared_lock<mapd_shared_mutex> session_read_lock(executor->getSessionLock());
97  const auto isInterrupted =
98  executor->checkIsQuerySessionInterrupted(query_session, session_read_lock);
99  return isInterrupted;
100  }
101  return false;
102 }
103 
104 std::shared_ptr<Analyzer::Expr> NullLiteral::analyze(
105  const Catalog_Namespace::Catalog& catalog,
106  Analyzer::Query& query,
107  TlistRefType allow_tlist_ref) const {
108  return makeExpr<Analyzer::Constant>(kNULLT, true);
109 }
110 
111 std::shared_ptr<Analyzer::Expr> StringLiteral::analyze(
112  const Catalog_Namespace::Catalog& catalog,
113  Analyzer::Query& query,
114  TlistRefType allow_tlist_ref) const {
115  return analyzeValue(*stringval_);
116 }
117 
118 std::shared_ptr<Analyzer::Expr> StringLiteral::analyzeValue(
119  const std::string& stringval) {
120  SQLTypeInfo ti(kVARCHAR, stringval.length(), 0, true);
121  Datum d;
122  d.stringval = new std::string(stringval);
123  return makeExpr<Analyzer::Constant>(ti, false, d);
124 }
125 
126 std::shared_ptr<Analyzer::Expr> IntLiteral::analyze(
127  const Catalog_Namespace::Catalog& catalog,
128  Analyzer::Query& query,
129  TlistRefType allow_tlist_ref) const {
130  return analyzeValue(intval_);
131 }
132 
133 std::shared_ptr<Analyzer::Expr> IntLiteral::analyzeValue(const int64_t intval) {
134  SQLTypes t;
135  Datum d;
136  if (intval >= INT16_MIN && intval <= INT16_MAX) {
137  t = kSMALLINT;
138  d.smallintval = (int16_t)intval;
139  } else if (intval >= INT32_MIN && intval <= INT32_MAX) {
140  t = kINT;
141  d.intval = (int32_t)intval;
142  } else {
143  t = kBIGINT;
144  d.bigintval = intval;
145  }
146  return makeExpr<Analyzer::Constant>(t, false, d);
147 }
148 
149 std::shared_ptr<Analyzer::Expr> FixedPtLiteral::analyze(
150  const Catalog_Namespace::Catalog& catalog,
151  Analyzer::Query& query,
152  TlistRefType allow_tlist_ref) const {
153  SQLTypeInfo ti(kNUMERIC, 0, 0, false);
154  Datum d = StringToDatum(*fixedptval_, ti);
155  return makeExpr<Analyzer::Constant>(ti, false, d);
156 }
157 
158 std::shared_ptr<Analyzer::Expr> FixedPtLiteral::analyzeValue(const int64_t numericval,
159  const int scale,
160  const int precision) {
161  SQLTypeInfo ti(kNUMERIC, 0, 0, false);
162  ti.set_scale(scale);
163  ti.set_precision(precision);
164  Datum d;
165  d.bigintval = numericval;
166  return makeExpr<Analyzer::Constant>(ti, false, d);
167 }
168 
169 std::shared_ptr<Analyzer::Expr> FloatLiteral::analyze(
170  const Catalog_Namespace::Catalog& catalog,
171  Analyzer::Query& query,
172  TlistRefType allow_tlist_ref) const {
173  Datum d;
174  d.floatval = floatval_;
175  return makeExpr<Analyzer::Constant>(kFLOAT, false, d);
176 }
177 
178 std::shared_ptr<Analyzer::Expr> DoubleLiteral::analyze(
179  const Catalog_Namespace::Catalog& catalog,
180  Analyzer::Query& query,
181  TlistRefType allow_tlist_ref) const {
182  Datum d;
183  d.doubleval = doubleval_;
184  return makeExpr<Analyzer::Constant>(kDOUBLE, false, d);
185 }
186 
187 std::shared_ptr<Analyzer::Expr> TimestampLiteral::analyze(
188  const Catalog_Namespace::Catalog& catalog,
189  Analyzer::Query& query,
190  TlistRefType allow_tlist_ref) const {
191  return get(timestampval_);
192 }
193 
194 std::shared_ptr<Analyzer::Expr> TimestampLiteral::get(const int64_t timestampval) {
195  Datum d;
196  d.bigintval = timestampval;
197  return makeExpr<Analyzer::Constant>(kTIMESTAMP, false, d);
198 }
199 
200 std::shared_ptr<Analyzer::Expr> UserLiteral::analyze(
201  const Catalog_Namespace::Catalog& catalog,
202  Analyzer::Query& query,
203  TlistRefType allow_tlist_ref) const {
204  Datum d;
205  return makeExpr<Analyzer::Constant>(kTEXT, false, d);
206 }
207 
208 std::shared_ptr<Analyzer::Expr> UserLiteral::get(const std::string& user) {
209  Datum d;
210  d.stringval = new std::string(user);
211  return makeExpr<Analyzer::Constant>(kTEXT, false, d);
212 }
213 
214 std::shared_ptr<Analyzer::Expr> ArrayLiteral::analyze(
215  const Catalog_Namespace::Catalog& catalog,
216  Analyzer::Query& query,
217  TlistRefType allow_tlist_ref) const {
218  SQLTypeInfo ti = SQLTypeInfo(kARRAY, true);
219  bool set_subtype = true;
220  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
221  for (auto& p : value_list_) {
222  auto e = p->analyze(catalog, query, allow_tlist_ref);
223  CHECK(e);
224  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(e);
225  if (c != nullptr && c->get_is_null()) {
226  value_exprs.push_back(c);
227  continue;
228  }
229  auto subtype = e->get_type_info().get_type();
230  if (subtype == kNULLT) {
231  // NULL element
232  } else if (set_subtype) {
233  ti.set_subtype(subtype);
234  set_subtype = false;
235  }
236  value_exprs.push_back(e);
237  }
238  std::shared_ptr<Analyzer::Expr> result =
239  makeExpr<Analyzer::Constant>(ti, false, value_exprs);
240  return result;
241 }
242 
243 std::string ArrayLiteral::to_string() const {
244  std::string str = "{";
245  bool notfirst = false;
246  for (auto& p : value_list_) {
247  if (notfirst) {
248  str += ", ";
249  } else {
250  notfirst = true;
251  }
252  str += p->to_string();
253  }
254  str += "}";
255  return str;
256 }
257 
258 std::shared_ptr<Analyzer::Expr> OperExpr::analyze(
259  const Catalog_Namespace::Catalog& catalog,
260  Analyzer::Query& query,
261  TlistRefType allow_tlist_ref) const {
262  auto left_expr = left_->analyze(catalog, query, allow_tlist_ref);
263  const auto& left_type = left_expr->get_type_info();
264  if (right_ == nullptr) {
265  return makeExpr<Analyzer::UOper>(
266  left_type, left_expr->get_contains_agg(), optype_, left_expr->decompress());
267  }
268  if (optype_ == kARRAY_AT) {
269  if (left_type.get_type() != kARRAY) {
270  throw std::runtime_error(left_->to_string() + " is not of array type.");
271  }
272  auto right_expr = right_->analyze(catalog, query, allow_tlist_ref);
273  const auto& right_type = right_expr->get_type_info();
274  if (!right_type.is_integer()) {
275  throw std::runtime_error(right_->to_string() + " is not of integer type.");
276  }
277  return makeExpr<Analyzer::BinOper>(
278  left_type.get_elem_type(), false, kARRAY_AT, kONE, left_expr, right_expr);
279  }
280  auto right_expr = right_->analyze(catalog, query, allow_tlist_ref);
281  return normalize(optype_, opqualifier_, left_expr, right_expr);
282 }
283 
284 std::shared_ptr<Analyzer::Expr> OperExpr::normalize(
285  const SQLOps optype,
286  const SQLQualifier qual,
287  std::shared_ptr<Analyzer::Expr> left_expr,
288  std::shared_ptr<Analyzer::Expr> right_expr) {
289  if (left_expr->get_type_info().is_date_in_days() ||
290  right_expr->get_type_info().is_date_in_days()) {
291  // Do not propogate encoding
292  left_expr = left_expr->decompress();
293  right_expr = right_expr->decompress();
294  }
295  const auto& left_type = left_expr->get_type_info();
296  auto right_type = right_expr->get_type_info();
297  if (qual != kONE) {
298  // subquery not supported yet.
299  CHECK(!std::dynamic_pointer_cast<Analyzer::Subquery>(right_expr));
300  if (right_type.get_type() != kARRAY) {
301  throw std::runtime_error(
302  "Existential or universal qualifiers can only be used in front of a subquery "
303  "or an "
304  "expression of array type.");
305  }
306  right_type = right_type.get_elem_type();
307  }
308  SQLTypeInfo new_left_type;
309  SQLTypeInfo new_right_type;
310  auto result_type = Analyzer::BinOper::analyze_type_info(
311  optype, left_type, right_type, &new_left_type, &new_right_type);
312  if (result_type.is_timeinterval()) {
313  return makeExpr<Analyzer::BinOper>(
314  result_type, false, optype, qual, left_expr, right_expr);
315  }
316  if (left_type != new_left_type) {
317  left_expr = left_expr->add_cast(new_left_type);
318  }
319  if (right_type != new_right_type) {
320  if (qual == kONE) {
321  right_expr = right_expr->add_cast(new_right_type);
322  } else {
323  right_expr = right_expr->add_cast(new_right_type.get_array_type());
324  }
325  }
326 
327  if (IS_COMPARISON(optype)) {
328  if (optype != kOVERLAPS && new_left_type.is_geometry() &&
329  new_right_type.is_geometry()) {
330  throw std::runtime_error(
331  "Comparison operators are not yet supported for geospatial types.");
332  }
333 
334  if (new_left_type.get_compression() == kENCODING_DICT &&
335  new_right_type.get_compression() == kENCODING_DICT &&
336  new_left_type.get_comp_param() == new_right_type.get_comp_param()) {
337  // do nothing
338  } else if (new_left_type.get_compression() == kENCODING_DICT &&
339  new_right_type.get_compression() == kENCODING_NONE) {
340  SQLTypeInfo ti(new_right_type);
341  ti.set_compression(new_left_type.get_compression());
342  ti.set_comp_param(new_left_type.get_comp_param());
343  ti.set_fixed_size();
344  right_expr = right_expr->add_cast(ti);
345  } else if (new_right_type.get_compression() == kENCODING_DICT &&
346  new_left_type.get_compression() == kENCODING_NONE) {
347  SQLTypeInfo ti(new_left_type);
348  ti.set_compression(new_right_type.get_compression());
349  ti.set_comp_param(new_right_type.get_comp_param());
350  ti.set_fixed_size();
351  left_expr = left_expr->add_cast(ti);
352  } else {
353  left_expr = left_expr->decompress();
354  right_expr = right_expr->decompress();
355  }
356  } else {
357  left_expr = left_expr->decompress();
358  right_expr = right_expr->decompress();
359  }
360  bool has_agg = (left_expr->get_contains_agg() || right_expr->get_contains_agg());
361  return makeExpr<Analyzer::BinOper>(
362  result_type, has_agg, optype, qual, left_expr, right_expr);
363 }
364 
365 std::shared_ptr<Analyzer::Expr> SubqueryExpr::analyze(
366  const Catalog_Namespace::Catalog& catalog,
367  Analyzer::Query& query,
368  TlistRefType allow_tlist_ref) const {
369  throw std::runtime_error("Subqueries are not supported yet.");
370  return nullptr;
371 }
372 
373 std::shared_ptr<Analyzer::Expr> IsNullExpr::analyze(
374  const Catalog_Namespace::Catalog& catalog,
375  Analyzer::Query& query,
376  TlistRefType allow_tlist_ref) const {
377  auto arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
378  auto result = makeExpr<Analyzer::UOper>(kBOOLEAN, kISNULL, arg_expr);
379  if (is_not_) {
380  result = makeExpr<Analyzer::UOper>(kBOOLEAN, kNOT, result);
381  }
382  return result;
383 }
384 
385 std::shared_ptr<Analyzer::Expr> InSubquery::analyze(
386  const Catalog_Namespace::Catalog& catalog,
387  Analyzer::Query& query,
388  TlistRefType allow_tlist_ref) const {
389  throw std::runtime_error("Subqueries are not supported yet.");
390  return nullptr;
391 }
392 
393 std::shared_ptr<Analyzer::Expr> InValues::analyze(
394  const Catalog_Namespace::Catalog& catalog,
395  Analyzer::Query& query,
396  TlistRefType allow_tlist_ref) const {
397  auto arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
398  SQLTypeInfo ti = arg_expr->get_type_info();
399  bool dict_comp = ti.get_compression() == kENCODING_DICT;
400  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
401  for (auto& p : value_list_) {
402  auto e = p->analyze(catalog, query, allow_tlist_ref);
403  if (ti != e->get_type_info()) {
404  if (ti.is_string() && e->get_type_info().is_string()) {
405  ti = Analyzer::BinOper::common_string_type(ti, e->get_type_info());
406  } else if (ti.is_number() && e->get_type_info().is_number()) {
407  ti = Analyzer::BinOper::common_numeric_type(ti, e->get_type_info());
408  } else {
409  throw std::runtime_error("IN expressions must contain compatible types.");
410  }
411  }
412  if (dict_comp) {
413  value_exprs.push_back(e->add_cast(arg_expr->get_type_info()));
414  } else {
415  value_exprs.push_back(e);
416  }
417  }
418  if (!dict_comp) {
419  arg_expr = arg_expr->decompress();
420  arg_expr = arg_expr->add_cast(ti);
421  std::list<std::shared_ptr<Analyzer::Expr>> cast_vals;
422  for (auto p : value_exprs) {
423  cast_vals.push_back(p->add_cast(ti));
424  }
425  value_exprs.swap(cast_vals);
426  }
427  std::shared_ptr<Analyzer::Expr> result =
428  makeExpr<Analyzer::InValues>(arg_expr, value_exprs);
429  if (is_not_) {
430  result = makeExpr<Analyzer::UOper>(kBOOLEAN, kNOT, result);
431  }
432  return result;
433 }
434 
435 std::shared_ptr<Analyzer::Expr> BetweenExpr::analyze(
436  const Catalog_Namespace::Catalog& catalog,
437  Analyzer::Query& query,
438  TlistRefType allow_tlist_ref) const {
439  auto arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
440  auto lower_expr = lower_->analyze(catalog, query, allow_tlist_ref);
441  auto upper_expr = upper_->analyze(catalog, query, allow_tlist_ref);
442  SQLTypeInfo new_left_type, new_right_type;
444  arg_expr->get_type_info(),
445  lower_expr->get_type_info(),
446  &new_left_type,
447  &new_right_type);
448  auto lower_pred =
449  makeExpr<Analyzer::BinOper>(kBOOLEAN,
450  kGE,
451  kONE,
452  arg_expr->add_cast(new_left_type)->decompress(),
453  lower_expr->add_cast(new_right_type)->decompress());
455  arg_expr->get_type_info(),
456  lower_expr->get_type_info(),
457  &new_left_type,
458  &new_right_type);
459  auto upper_pred = makeExpr<Analyzer::BinOper>(
460  kBOOLEAN,
461  kLE,
462  kONE,
463  arg_expr->deep_copy()->add_cast(new_left_type)->decompress(),
464  upper_expr->add_cast(new_right_type)->decompress());
465  std::shared_ptr<Analyzer::Expr> result =
466  makeExpr<Analyzer::BinOper>(kBOOLEAN, kAND, kONE, lower_pred, upper_pred);
467  if (is_not_) {
468  result = makeExpr<Analyzer::UOper>(kBOOLEAN, kNOT, result);
469  }
470  return result;
471 }
472 
473 std::shared_ptr<Analyzer::Expr> CharLengthExpr::analyze(
474  const Catalog_Namespace::Catalog& catalog,
475  Analyzer::Query& query,
476  TlistRefType allow_tlist_ref) const {
477  auto arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
478  if (!arg_expr->get_type_info().is_string()) {
479  throw std::runtime_error(
480  "expression in char_length clause must be of a string type.");
481  }
482  std::shared_ptr<Analyzer::Expr> result =
483  makeExpr<Analyzer::CharLengthExpr>(arg_expr->decompress(), calc_encoded_length_);
484  return result;
485 }
486 
487 std::shared_ptr<Analyzer::Expr> CardinalityExpr::analyze(
488  const Catalog_Namespace::Catalog& catalog,
489  Analyzer::Query& query,
490  TlistRefType allow_tlist_ref) const {
491  auto arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
492  if (!arg_expr->get_type_info().is_array()) {
493  throw std::runtime_error(
494  "expression in cardinality clause must be of an array type.");
495  }
496  std::shared_ptr<Analyzer::Expr> result =
497  makeExpr<Analyzer::CardinalityExpr>(arg_expr->decompress());
498  return result;
499 }
500 
501 void LikeExpr::check_like_expr(const std::string& like_str, char escape_char) {
502  if (like_str.back() == escape_char) {
503  throw std::runtime_error("LIKE pattern must not end with escape character.");
504  }
505 }
506 
507 bool LikeExpr::test_is_simple_expr(const std::string& like_str, char escape_char) {
508  // if not bounded by '%' then not a simple string
509  if (like_str.size() < 2 || like_str[0] != '%' || like_str[like_str.size() - 1] != '%') {
510  return false;
511  }
512  // if the last '%' is escaped then not a simple string
513  if (like_str[like_str.size() - 2] == escape_char &&
514  like_str[like_str.size() - 3] != escape_char) {
515  return false;
516  }
517  for (size_t i = 1; i < like_str.size() - 1; i++) {
518  if (like_str[i] == '%' || like_str[i] == '_' || like_str[i] == '[' ||
519  like_str[i] == ']') {
520  if (like_str[i - 1] != escape_char) {
521  return false;
522  }
523  }
524  }
525  return true;
526 }
527 
528 void LikeExpr::erase_cntl_chars(std::string& like_str, char escape_char) {
529  char prev_char = '\0';
530  // easier to create new string of allowable chars
531  // rather than erase chars from
532  // existing string
533  std::string new_str;
534  for (char& cur_char : like_str) {
535  if (cur_char == '%' || cur_char == escape_char) {
536  if (prev_char != escape_char) {
537  prev_char = cur_char;
538  continue;
539  }
540  }
541  new_str.push_back(cur_char);
542  prev_char = cur_char;
543  }
544  like_str = new_str;
545 }
546 
547 std::shared_ptr<Analyzer::Expr> LikeExpr::analyze(
548  const Catalog_Namespace::Catalog& catalog,
549  Analyzer::Query& query,
550  TlistRefType allow_tlist_ref) const {
551  auto arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
552  auto like_expr = like_string_->analyze(catalog, query, allow_tlist_ref);
553  auto escape_expr = escape_string_ == nullptr
554  ? nullptr
555  : escape_string_->analyze(catalog, query, allow_tlist_ref);
556  return LikeExpr::get(arg_expr, like_expr, escape_expr, is_ilike_, is_not_);
557 }
558 
559 std::shared_ptr<Analyzer::Expr> LikeExpr::get(std::shared_ptr<Analyzer::Expr> arg_expr,
560  std::shared_ptr<Analyzer::Expr> like_expr,
561  std::shared_ptr<Analyzer::Expr> escape_expr,
562  const bool is_ilike,
563  const bool is_not) {
564  if (!arg_expr->get_type_info().is_string()) {
565  throw std::runtime_error("expression before LIKE must be of a string type.");
566  }
567  if (!like_expr->get_type_info().is_string()) {
568  throw std::runtime_error("expression after LIKE must be of a string type.");
569  }
570  char escape_char = '\\';
571  if (escape_expr != nullptr) {
572  if (!escape_expr->get_type_info().is_string()) {
573  throw std::runtime_error("expression after ESCAPE must be of a string type.");
574  }
575  if (!escape_expr->get_type_info().is_string()) {
576  throw std::runtime_error("expression after ESCAPE must be of a string type.");
577  }
578  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(escape_expr);
579  if (c != nullptr && c->get_constval().stringval->length() > 1) {
580  throw std::runtime_error("String after ESCAPE must have a single character.");
581  }
582  escape_char = (*c->get_constval().stringval)[0];
583  }
584  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(like_expr);
585  bool is_simple = false;
586  if (c != nullptr) {
587  std::string& pattern = *c->get_constval().stringval;
588  if (is_ilike) {
589  std::transform(pattern.begin(), pattern.end(), pattern.begin(), ::tolower);
590  }
591  check_like_expr(pattern, escape_char);
592  is_simple = test_is_simple_expr(pattern, escape_char);
593  if (is_simple) {
594  erase_cntl_chars(pattern, escape_char);
595  }
596  }
597  std::shared_ptr<Analyzer::Expr> result = makeExpr<Analyzer::LikeExpr>(
598  arg_expr->decompress(), like_expr, escape_expr, is_ilike, is_simple);
599  if (is_not) {
600  result = makeExpr<Analyzer::UOper>(kBOOLEAN, kNOT, result);
601  }
602  return result;
603 }
604 
605 void RegexpExpr::check_pattern_expr(const std::string& pattern_str, char escape_char) {
606  if (pattern_str.back() == escape_char) {
607  throw std::runtime_error("REGEXP pattern must not end with escape character.");
608  }
609 }
610 
611 bool RegexpExpr::translate_to_like_pattern(std::string& pattern_str, char escape_char) {
612  char prev_char = '\0';
613  char prev_prev_char = '\0';
614  std::string like_str;
615  for (char& cur_char : pattern_str) {
616  if (prev_char == escape_char || isalnum(cur_char) || cur_char == ' ' ||
617  cur_char == '.') {
618  like_str.push_back((cur_char == '.') ? '_' : cur_char);
619  prev_prev_char = prev_char;
620  prev_char = cur_char;
621  continue;
622  }
623  if (prev_char == '.' && prev_prev_char != escape_char) {
624  if (cur_char == '*' || cur_char == '+') {
625  if (cur_char == '*') {
626  like_str.pop_back();
627  }
628  // .* --> %
629  // .+ --> _%
630  like_str.push_back('%');
631  prev_prev_char = prev_char;
632  prev_char = cur_char;
633  continue;
634  }
635  }
636  return false;
637  }
638  pattern_str = like_str;
639  return true;
640 }
641 
642 std::shared_ptr<Analyzer::Expr> RegexpExpr::analyze(
643  const Catalog_Namespace::Catalog& catalog,
644  Analyzer::Query& query,
645  TlistRefType allow_tlist_ref) const {
646  auto arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
647  auto pattern_expr = pattern_string_->analyze(catalog, query, allow_tlist_ref);
648  auto escape_expr = escape_string_ == nullptr
649  ? nullptr
650  : escape_string_->analyze(catalog, query, allow_tlist_ref);
651  return RegexpExpr::get(arg_expr, pattern_expr, escape_expr, is_not_);
652 }
653 
654 std::shared_ptr<Analyzer::Expr> RegexpExpr::get(
655  std::shared_ptr<Analyzer::Expr> arg_expr,
656  std::shared_ptr<Analyzer::Expr> pattern_expr,
657  std::shared_ptr<Analyzer::Expr> escape_expr,
658  const bool is_not) {
659  if (!arg_expr->get_type_info().is_string()) {
660  throw std::runtime_error("expression before REGEXP must be of a string type.");
661  }
662  if (!pattern_expr->get_type_info().is_string()) {
663  throw std::runtime_error("expression after REGEXP must be of a string type.");
664  }
665  char escape_char = '\\';
666  if (escape_expr != nullptr) {
667  if (!escape_expr->get_type_info().is_string()) {
668  throw std::runtime_error("expression after ESCAPE must be of a string type.");
669  }
670  if (!escape_expr->get_type_info().is_string()) {
671  throw std::runtime_error("expression after ESCAPE must be of a string type.");
672  }
673  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(escape_expr);
674  if (c != nullptr && c->get_constval().stringval->length() > 1) {
675  throw std::runtime_error("String after ESCAPE must have a single character.");
676  }
677  escape_char = (*c->get_constval().stringval)[0];
678  if (escape_char != '\\') {
679  throw std::runtime_error("Only supporting '\\' escape character.");
680  }
681  }
682  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(pattern_expr);
683  if (c != nullptr) {
684  std::string& pattern = *c->get_constval().stringval;
685  if (translate_to_like_pattern(pattern, escape_char)) {
686  return LikeExpr::get(arg_expr, pattern_expr, escape_expr, false, is_not);
687  }
688  }
689  std::shared_ptr<Analyzer::Expr> result =
690  makeExpr<Analyzer::RegexpExpr>(arg_expr->decompress(), pattern_expr, escape_expr);
691  if (is_not) {
692  result = makeExpr<Analyzer::UOper>(kBOOLEAN, kNOT, result);
693  }
694  return result;
695 }
696 
697 std::shared_ptr<Analyzer::Expr> LikelihoodExpr::analyze(
698  const Catalog_Namespace::Catalog& catalog,
699  Analyzer::Query& query,
700  TlistRefType allow_tlist_ref) const {
701  auto arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
702  return LikelihoodExpr::get(arg_expr, likelihood_, is_not_);
703 }
704 
705 std::shared_ptr<Analyzer::Expr> LikelihoodExpr::get(
706  std::shared_ptr<Analyzer::Expr> arg_expr,
707  float likelihood,
708  const bool is_not) {
709  if (!arg_expr->get_type_info().is_boolean()) {
710  throw std::runtime_error("likelihood expression expects boolean type.");
711  }
712  std::shared_ptr<Analyzer::Expr> result = makeExpr<Analyzer::LikelihoodExpr>(
713  arg_expr->decompress(), is_not ? 1 - likelihood : likelihood);
714  return result;
715 }
716 
717 std::shared_ptr<Analyzer::Expr> ExistsExpr::analyze(
718  const Catalog_Namespace::Catalog& catalog,
719  Analyzer::Query& query,
720  TlistRefType allow_tlist_ref) const {
721  throw std::runtime_error("Subqueries are not supported yet.");
722  return nullptr;
723 }
724 
725 std::shared_ptr<Analyzer::Expr> ColumnRef::analyze(
726  const Catalog_Namespace::Catalog& catalog,
727  Analyzer::Query& query,
728  TlistRefType allow_tlist_ref) const {
729  int table_id{0};
730  int rte_idx{0};
731  const ColumnDescriptor* cd{nullptr};
732  if (column_ == nullptr) {
733  throw std::runtime_error("invalid column name *.");
734  }
735  if (table_ != nullptr) {
736  rte_idx = query.get_rte_idx(*table_);
737  if (rte_idx < 0) {
738  throw std::runtime_error("range variable or table name " + *table_ +
739  " does not exist.");
740  }
741  Analyzer::RangeTableEntry* rte = query.get_rte(rte_idx);
742  cd = rte->get_column_desc(catalog, *column_);
743  if (cd == nullptr) {
744  throw std::runtime_error("Column name " + *column_ + " does not exist.");
745  }
746  table_id = rte->get_table_id();
747  } else {
748  bool found = false;
749  int i = 0;
750  for (auto rte : query.get_rangetable()) {
751  cd = rte->get_column_desc(catalog, *column_);
752  if (cd != nullptr && !found) {
753  found = true;
754  rte_idx = i;
755  table_id = rte->get_table_id();
756  } else if (cd != nullptr && found) {
757  throw std::runtime_error("Column name " + *column_ + " is ambiguous.");
758  }
759  i++;
760  }
761  if (cd == nullptr && allow_tlist_ref != TlistRefType::TLIST_NONE) {
762  // check if this is a reference to a targetlist entry
763  bool found = false;
764  int varno = -1;
765  int i = 1;
766  std::shared_ptr<Analyzer::TargetEntry> tle;
767  for (auto p : query.get_targetlist()) {
768  if (*column_ == p->get_resname() && !found) {
769  found = true;
770  varno = i;
771  tle = p;
772  } else if (*column_ == p->get_resname() && found) {
773  throw std::runtime_error("Output alias " + *column_ + " is ambiguous.");
774  }
775  i++;
776  }
777  if (found) {
778  if (dynamic_cast<Analyzer::Var*>(tle->get_expr())) {
779  Analyzer::Var* v = static_cast<Analyzer::Var*>(tle->get_expr());
781  return v->deep_copy();
782  }
783  }
784  if (allow_tlist_ref == TlistRefType::TLIST_COPY) {
785  return tle->get_expr()->deep_copy();
786  } else {
787  return makeExpr<Analyzer::Var>(
788  tle->get_expr()->get_type_info(), Analyzer::Var::kOUTPUT, varno);
789  }
790  }
791  }
792  if (cd == nullptr) {
793  throw std::runtime_error("Column name " + *column_ + " does not exist.");
794  }
795  }
796  return makeExpr<Analyzer::ColumnVar>(cd->columnType, table_id, cd->columnId, rte_idx);
797 }
798 
799 std::shared_ptr<Analyzer::Expr> FunctionRef::analyze(
800  const Catalog_Namespace::Catalog& catalog,
801  Analyzer::Query& query,
802  TlistRefType allow_tlist_ref) const {
803  SQLTypeInfo result_type;
804  SQLAgg agg_type;
805  std::shared_ptr<Analyzer::Expr> arg_expr;
806  bool is_distinct = false;
807  if (boost::iequals(*name_, "count")) {
808  result_type = SQLTypeInfo(kBIGINT, false);
809  agg_type = kCOUNT;
810  if (arg_) {
811  arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
812  const SQLTypeInfo& ti = arg_expr->get_type_info();
813  if (ti.is_string() && (ti.get_compression() != kENCODING_DICT || !distinct_)) {
814  throw std::runtime_error(
815  "Strings must be dictionary-encoded in COUNT(DISTINCT).");
816  }
817  if (ti.get_type() == kARRAY && !distinct_) {
818  throw std::runtime_error("Only COUNT(DISTINCT) is supported on arrays.");
819  }
820  }
821  is_distinct = distinct_;
822  } else {
823  if (!arg_) {
824  throw std::runtime_error("Cannot compute " + *name_ + " with argument '*'.");
825  }
826  if (boost::iequals(*name_, "min")) {
827  agg_type = kMIN;
828  arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
829  arg_expr = arg_expr->decompress();
830  result_type = arg_expr->get_type_info();
831  } else if (boost::iequals(*name_, "max")) {
832  agg_type = kMAX;
833  arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
834  arg_expr = arg_expr->decompress();
835  result_type = arg_expr->get_type_info();
836  } else if (boost::iequals(*name_, "avg")) {
837  agg_type = kAVG;
838  arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
839  if (!arg_expr->get_type_info().is_number()) {
840  throw std::runtime_error("Cannot compute AVG on non-number-type arguments.");
841  }
842  arg_expr = arg_expr->decompress();
843  result_type = SQLTypeInfo(kDOUBLE, false);
844  } else if (boost::iequals(*name_, "sum")) {
845  agg_type = kSUM;
846  arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
847  if (!arg_expr->get_type_info().is_number()) {
848  throw std::runtime_error("Cannot compute SUM on non-number-type arguments.");
849  }
850  arg_expr = arg_expr->decompress();
851  result_type = arg_expr->get_type_info().is_integer() ? SQLTypeInfo(kBIGINT, false)
852  : arg_expr->get_type_info();
853  } else if (boost::iequals(*name_, "unnest")) {
854  arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
855  const SQLTypeInfo& arg_ti = arg_expr->get_type_info();
856  if (arg_ti.get_type() != kARRAY) {
857  throw std::runtime_error(arg_->to_string() + " is not of array type.");
858  }
859  return makeExpr<Analyzer::UOper>(arg_ti.get_elem_type(), false, kUNNEST, arg_expr);
860  } else {
861  throw std::runtime_error("invalid function name: " + *name_);
862  }
863  if (arg_expr->get_type_info().is_string() ||
864  arg_expr->get_type_info().get_type() == kARRAY) {
865  throw std::runtime_error(
866  "Only COUNT(DISTINCT ) aggregate is supported on strings and arrays.");
867  }
868  }
869  int naggs = query.get_num_aggs();
870  query.set_num_aggs(naggs + 1);
871  return makeExpr<Analyzer::AggExpr>(
872  result_type, agg_type, arg_expr, is_distinct, nullptr);
873 }
874 
875 std::shared_ptr<Analyzer::Expr> CastExpr::analyze(
876  const Catalog_Namespace::Catalog& catalog,
877  Analyzer::Query& query,
878  TlistRefType allow_tlist_ref) const {
879  target_type_->check_type();
880  auto arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
881  SQLTypeInfo ti(target_type_->get_type(),
882  target_type_->get_param1(),
883  target_type_->get_param2(),
884  arg_expr->get_type_info().get_notnull());
885  if (arg_expr->get_type_info().get_type() != target_type_->get_type() &&
886  arg_expr->get_type_info().get_compression() != kENCODING_NONE) {
887  arg_expr->decompress();
888  }
889  return arg_expr->add_cast(ti);
890 }
891 
892 std::shared_ptr<Analyzer::Expr> CaseExpr::analyze(
893  const Catalog_Namespace::Catalog& catalog,
894  Analyzer::Query& query,
895  TlistRefType allow_tlist_ref) const {
896  SQLTypeInfo ti;
897  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
898  expr_pair_list;
899  for (auto& p : when_then_list_) {
900  auto e1 = p->get_expr1()->analyze(catalog, query, allow_tlist_ref);
901  if (e1->get_type_info().get_type() != kBOOLEAN) {
902  throw std::runtime_error("Only boolean expressions can be used after WHEN.");
903  }
904  auto e2 = p->get_expr2()->analyze(catalog, query, allow_tlist_ref);
905  expr_pair_list.emplace_back(e1, e2);
906  }
907  auto else_e =
908  else_expr_ ? else_expr_->analyze(catalog, query, allow_tlist_ref) : nullptr;
909  return normalize(expr_pair_list, else_e);
910 }
911 
912 namespace {
913 
914 bool expr_is_null(const Analyzer::Expr* expr) {
915  if (expr->get_type_info().get_type() == kNULLT) {
916  return true;
917  }
918  const auto const_expr = dynamic_cast<const Analyzer::Constant*>(expr);
919  return const_expr && const_expr->get_is_null();
920 }
921 
923  const std::string* s = str_literal->get_stringval();
924  if (*s == "t" || *s == "true" || *s == "T" || *s == "True") {
925  return true;
926  } else if (*s == "f" || *s == "false" || *s == "F" || *s == "False") {
927  return false;
928  } else {
929  throw std::runtime_error("Invalid string for boolean " + *s);
930  }
931 }
932 
933 } // namespace
934 
935 std::shared_ptr<Analyzer::Expr> CaseExpr::normalize(
936  const std::list<std::pair<std::shared_ptr<Analyzer::Expr>,
937  std::shared_ptr<Analyzer::Expr>>>& expr_pair_list,
938  const std::shared_ptr<Analyzer::Expr> else_e_in) {
939  SQLTypeInfo ti;
940  bool has_agg = false;
941  std::set<int> dictionary_ids;
942  bool has_none_encoded_str_projection = false;
943 
944  for (auto& p : expr_pair_list) {
945  auto e1 = p.first;
946  CHECK(e1->get_type_info().is_boolean());
947  auto e2 = p.second;
948  if (e2->get_type_info().is_string()) {
949  if (e2->get_type_info().is_dict_encoded_string()) {
950  dictionary_ids.insert(e2->get_type_info().get_comp_param());
951  // allow literals to potentially fall down the transient path
952  } else if (std::dynamic_pointer_cast<const Analyzer::ColumnVar>(e2)) {
953  has_none_encoded_str_projection = true;
954  }
955  }
956 
957  if (ti.get_type() == kNULLT) {
958  ti = e2->get_type_info();
959  } else if (e2->get_type_info().get_type() == kNULLT) {
960  ti.set_notnull(false);
961  e2->set_type_info(ti);
962  } else if (ti != e2->get_type_info()) {
963  if (ti.is_string() && e2->get_type_info().is_string()) {
964  ti = Analyzer::BinOper::common_string_type(ti, e2->get_type_info());
965  } else if (ti.is_number() && e2->get_type_info().is_number()) {
966  ti = Analyzer::BinOper::common_numeric_type(ti, e2->get_type_info());
967  } else if (ti.is_boolean() && e2->get_type_info().is_boolean()) {
968  ti = Analyzer::BinOper::common_numeric_type(ti, e2->get_type_info());
969  } else {
970  throw std::runtime_error(
971  "expressions in THEN clause must be of the same or compatible types.");
972  }
973  }
974  if (e2->get_contains_agg()) {
975  has_agg = true;
976  }
977  }
978  auto else_e = else_e_in;
979  if (else_e) {
980  if (else_e->get_contains_agg()) {
981  has_agg = true;
982  }
983  if (expr_is_null(else_e.get())) {
984  ti.set_notnull(false);
985  else_e->set_type_info(ti);
986  } else if (ti != else_e->get_type_info()) {
987  if (else_e->get_type_info().is_string()) {
988  if (else_e->get_type_info().is_dict_encoded_string()) {
989  dictionary_ids.insert(else_e->get_type_info().get_comp_param());
990  // allow literals to potentially fall down the transient path
991  } else if (std::dynamic_pointer_cast<const Analyzer::ColumnVar>(else_e)) {
992  has_none_encoded_str_projection = true;
993  }
994  }
995  ti.set_notnull(false);
996  if (ti.is_string() && else_e->get_type_info().is_string()) {
997  ti = Analyzer::BinOper::common_string_type(ti, else_e->get_type_info());
998  } else if (ti.is_number() && else_e->get_type_info().is_number()) {
999  ti = Analyzer::BinOper::common_numeric_type(ti, else_e->get_type_info());
1000  } else if (ti.is_boolean() && else_e->get_type_info().is_boolean()) {
1001  ti = Analyzer::BinOper::common_numeric_type(ti, else_e->get_type_info());
1002  } else if (get_logical_type_info(ti) !=
1003  get_logical_type_info(else_e->get_type_info())) {
1004  throw std::runtime_error(
1005  // types differing by encoding will be resolved at decode
1006 
1007  "expressions in ELSE clause must be of the same or compatible types as those "
1008  "in the THEN clauses.");
1009  }
1010  }
1011  }
1012  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
1013  cast_expr_pair_list;
1014  for (auto p : expr_pair_list) {
1015  ti.set_notnull(false);
1016  cast_expr_pair_list.emplace_back(p.first, p.second->add_cast(ti));
1017  }
1018  if (else_e != nullptr) {
1019  else_e = else_e->add_cast(ti);
1020  } else {
1021  Datum d;
1022  // always create an else expr so that executor doesn't need to worry about it
1023  ti.set_notnull(false);
1024  else_e = makeExpr<Analyzer::Constant>(ti, true, d);
1025  }
1026  if (ti.get_type() == kNULLT) {
1027  throw std::runtime_error(
1028  "Can't deduce the type for case expressions, all branches null");
1029  }
1030 
1031  auto case_expr = makeExpr<Analyzer::CaseExpr>(ti, has_agg, cast_expr_pair_list, else_e);
1032  if (ti.get_compression() != kENCODING_DICT && dictionary_ids.size() == 1 &&
1033  *(dictionary_ids.begin()) > 0 && !has_none_encoded_str_projection) {
1034  // the above logic makes two assumptions when strings are present. 1) that all types
1035  // in the case statement are either null or strings, and 2) that none-encoded strings
1036  // will always win out over dict encoding. If we only have one dictionary, and that
1037  // dictionary is not a transient dictionary, we can cast the entire case to be dict
1038  // encoded and use transient dictionaries for any literals
1039  ti.set_compression(kENCODING_DICT);
1040  ti.set_comp_param(*dictionary_ids.begin());
1041  case_expr->add_cast(ti);
1042  }
1043  return case_expr;
1044 }
1045 
1046 std::string CaseExpr::to_string() const {
1047  std::string str("CASE ");
1048  for (auto& p : when_then_list_) {
1049  str += "WHEN " + p->get_expr1()->to_string() + " THEN " +
1050  p->get_expr2()->to_string() + " ";
1051  }
1052  if (else_expr_ != nullptr) {
1053  str += "ELSE " + else_expr_->to_string();
1054  }
1055  str += " END";
1056  return str;
1057 }
1058 
1059 void UnionQuery::analyze(const Catalog_Namespace::Catalog& catalog,
1060  Analyzer::Query& query) const {
1061  left_->analyze(catalog, query);
1062  Analyzer::Query* right_query = new Analyzer::Query();
1063  right_->analyze(catalog, *right_query);
1064  query.set_next_query(right_query);
1065  query.set_is_unionall(is_unionall_);
1066 }
1067 
1068 void QuerySpec::analyze_having_clause(const Catalog_Namespace::Catalog& catalog,
1069  Analyzer::Query& query) const {
1070  std::shared_ptr<Analyzer::Expr> p;
1071  if (having_clause_ != nullptr) {
1072  p = having_clause_->analyze(catalog, query, Expr::TlistRefType::TLIST_COPY);
1073  if (p->get_type_info().get_type() != kBOOLEAN) {
1074  throw std::runtime_error("Only boolean expressions can be in HAVING clause.");
1075  }
1076  p->check_group_by(query.get_group_by());
1077  }
1078  query.set_having_predicate(p);
1079 }
1080 
1081 void QuerySpec::analyze_group_by(const Catalog_Namespace::Catalog& catalog,
1082  Analyzer::Query& query) const {
1083  std::list<std::shared_ptr<Analyzer::Expr>> groupby;
1084  if (!groupby_clause_.empty()) {
1085  int gexpr_no = 1;
1086  std::shared_ptr<Analyzer::Expr> gexpr;
1087  const std::vector<std::shared_ptr<Analyzer::TargetEntry>>& tlist =
1088  query.get_targetlist();
1089  for (auto& c : groupby_clause_) {
1090  // special-case ordinal numbers in GROUP BY
1091  if (dynamic_cast<Literal*>(c.get())) {
1092  IntLiteral* i = dynamic_cast<IntLiteral*>(c.get());
1093  if (!i) {
1094  throw std::runtime_error("Invalid literal in GROUP BY clause.");
1095  }
1096  int varno = (int)i->get_intval();
1097  if (varno <= 0 || varno > static_cast<int>(tlist.size())) {
1098  throw std::runtime_error("Invalid ordinal number in GROUP BY clause.");
1099  }
1100  if (tlist[varno - 1]->get_expr()->get_contains_agg()) {
1101  throw std::runtime_error(
1102  "Ordinal number in GROUP BY cannot reference an expression containing "
1103  "aggregate "
1104  "functions.");
1105  }
1106  gexpr = makeExpr<Analyzer::Var>(
1107  tlist[varno - 1]->get_expr()->get_type_info(), Analyzer::Var::kOUTPUT, varno);
1108  } else {
1109  gexpr = c->analyze(catalog, query, Expr::TlistRefType::TLIST_REF);
1110  }
1111  const SQLTypeInfo gti = gexpr->get_type_info();
1112  bool set_new_type = false;
1113  SQLTypeInfo ti(gti);
1114  if (gti.is_string() && gti.get_compression() == kENCODING_NONE) {
1115  set_new_type = true;
1118  ti.set_fixed_size();
1119  }
1120  std::shared_ptr<Analyzer::Var> v;
1121  if (std::dynamic_pointer_cast<Analyzer::Var>(gexpr)) {
1122  v = std::static_pointer_cast<Analyzer::Var>(gexpr);
1123  int n = v->get_varno();
1124  gexpr = tlist[n - 1]->get_own_expr();
1125  auto cv = std::dynamic_pointer_cast<Analyzer::ColumnVar>(gexpr);
1126  if (cv != nullptr) {
1127  // inherit all ColumnVar info for lineage.
1128  *std::static_pointer_cast<Analyzer::ColumnVar>(v) = *cv;
1129  }
1130  v->set_which_row(Analyzer::Var::kGROUPBY);
1131  v->set_varno(gexpr_no);
1132  tlist[n - 1]->set_expr(v);
1133  }
1134  if (set_new_type) {
1135  auto new_e = gexpr->add_cast(ti);
1136  groupby.push_back(new_e);
1137  if (v != nullptr) {
1138  v->set_type_info(new_e->get_type_info());
1139  }
1140  } else {
1141  groupby.push_back(gexpr);
1142  }
1143  gexpr_no++;
1144  }
1145  }
1146  if (query.get_num_aggs() > 0 || !groupby.empty()) {
1147  for (auto t : query.get_targetlist()) {
1148  auto e = t->get_expr();
1149  e->check_group_by(groupby);
1150  }
1151  }
1152  query.set_group_by(groupby);
1153 }
1154 
1155 void QuerySpec::analyze_where_clause(const Catalog_Namespace::Catalog& catalog,
1156  Analyzer::Query& query) const {
1157  if (where_clause_ == nullptr) {
1158  query.set_where_predicate(nullptr);
1159  return;
1160  }
1161  auto p = where_clause_->analyze(catalog, query, Expr::TlistRefType::TLIST_COPY);
1162  if (p->get_type_info().get_type() != kBOOLEAN) {
1163  throw std::runtime_error("Only boolean expressions can be in WHERE clause.");
1164  }
1165  query.set_where_predicate(p);
1166 }
1167 
1168 void QuerySpec::analyze_select_clause(const Catalog_Namespace::Catalog& catalog,
1169  Analyzer::Query& query) const {
1170  std::vector<std::shared_ptr<Analyzer::TargetEntry>>& tlist =
1171  query.get_targetlist_nonconst();
1172  if (select_clause_.empty()) {
1173  // this means SELECT *
1174  int rte_idx = 0;
1175  for (auto rte : query.get_rangetable()) {
1176  rte->expand_star_in_targetlist(catalog, tlist, rte_idx++);
1177  }
1178  } else {
1179  for (auto& p : select_clause_) {
1180  const Parser::Expr* select_expr = p->get_select_expr();
1181  // look for the case of range_var.*
1182  if (typeid(*select_expr) == typeid(ColumnRef) &&
1183  dynamic_cast<const ColumnRef*>(select_expr)->get_column() == nullptr) {
1184  const std::string* range_var_name =
1185  dynamic_cast<const ColumnRef*>(select_expr)->get_table();
1186  int rte_idx = query.get_rte_idx(*range_var_name);
1187  if (rte_idx < 0) {
1188  throw std::runtime_error("invalid range variable name: " + *range_var_name);
1189  }
1190  Analyzer::RangeTableEntry* rte = query.get_rte(rte_idx);
1191  rte->expand_star_in_targetlist(catalog, tlist, rte_idx);
1192  } else {
1193  auto e = select_expr->analyze(catalog, query);
1194  std::string resname;
1195 
1196  if (p->get_alias() != nullptr) {
1197  resname = *p->get_alias();
1198  } else if (std::dynamic_pointer_cast<Analyzer::ColumnVar>(e) &&
1199  !std::dynamic_pointer_cast<Analyzer::Var>(e)) {
1200  auto colvar = std::static_pointer_cast<Analyzer::ColumnVar>(e);
1201  const ColumnDescriptor* col_desc = catalog.getMetadataForColumn(
1202  colvar->get_table_id(), colvar->get_column_id());
1203  resname = col_desc->columnName;
1204  }
1205  if (e->get_type_info().get_type() == kNULLT) {
1206  throw std::runtime_error(
1207  "Untyped NULL in SELECT clause. Use CAST to specify a type.");
1208  }
1209  auto o = std::static_pointer_cast<Analyzer::UOper>(e);
1210  bool unnest = (o != nullptr && o->get_optype() == kUNNEST);
1211  auto tle = std::make_shared<Analyzer::TargetEntry>(resname, e, unnest);
1212  tlist.push_back(tle);
1213  }
1214  }
1215  }
1216 }
1217 
1218 void QuerySpec::analyze_from_clause(const Catalog_Namespace::Catalog& catalog,
1219  Analyzer::Query& query) const {
1221  for (auto& p : from_clause_) {
1222  const TableDescriptor* table_desc;
1223  table_desc = catalog.getMetadataForTable(*p->get_table_name());
1224  if (table_desc == nullptr) {
1225  throw std::runtime_error("Table " + *p->get_table_name() + " does not exist.");
1226  }
1227  std::string range_var;
1228  if (p->get_range_var() == nullptr) {
1229  range_var = *p->get_table_name();
1230  } else {
1231  range_var = *p->get_range_var();
1232  }
1233  rte = new Analyzer::RangeTableEntry(range_var, table_desc, nullptr);
1234  query.add_rte(rte);
1235  }
1236 }
1237 
1238 void QuerySpec::analyze(const Catalog_Namespace::Catalog& catalog,
1239  Analyzer::Query& query) const {
1240  query.set_is_distinct(is_distinct_);
1241  analyze_from_clause(catalog, query);
1242  analyze_select_clause(catalog, query);
1243  analyze_where_clause(catalog, query);
1244  analyze_group_by(catalog, query);
1245  analyze_having_clause(catalog, query);
1246 }
1247 
1248 void SelectStmt::analyze(const Catalog_Namespace::Catalog& catalog,
1249  Analyzer::Query& query) const {
1250  query.set_stmt_type(kSELECT);
1251  query.set_limit(limit_);
1252  if (offset_ < 0) {
1253  throw std::runtime_error("OFFSET cannot be negative.");
1254  }
1255  query.set_offset(offset_);
1256  query_expr_->analyze(catalog, query);
1257  if (orderby_clause_.empty() && !query.get_is_distinct()) {
1258  query.set_order_by(nullptr);
1259  return;
1260  }
1261  const std::vector<std::shared_ptr<Analyzer::TargetEntry>>& tlist =
1262  query.get_targetlist();
1263  std::list<Analyzer::OrderEntry>* order_by = new std::list<Analyzer::OrderEntry>();
1264  if (!orderby_clause_.empty()) {
1265  for (auto& p : orderby_clause_) {
1266  int tle_no = p->get_colno();
1267  if (tle_no == 0) {
1268  // use column name
1269  // search through targetlist for matching name
1270  const std::string* name = p->get_column()->get_column();
1271  tle_no = 1;
1272  bool found = false;
1273  for (auto tle : tlist) {
1274  if (tle->get_resname() == *name) {
1275  found = true;
1276  break;
1277  }
1278  tle_no++;
1279  }
1280  if (!found) {
1281  throw std::runtime_error("invalid name in order by: " + *name);
1282  }
1283  }
1284  order_by->push_back(
1285  Analyzer::OrderEntry(tle_no, p->get_is_desc(), p->get_nulls_first()));
1286  }
1287  }
1288  if (query.get_is_distinct()) {
1289  // extend order_by to include all targetlist entries.
1290  for (int i = 1; i <= static_cast<int>(tlist.size()); i++) {
1291  bool in_orderby = false;
1292  std::for_each(order_by->begin(),
1293  order_by->end(),
1294  [&in_orderby, i](const Analyzer::OrderEntry& oe) {
1295  in_orderby = in_orderby || (i == oe.tle_no);
1296  });
1297  if (!in_orderby) {
1298  order_by->push_back(Analyzer::OrderEntry(i, false, false));
1299  }
1300  }
1301  }
1302  query.set_order_by(order_by);
1303 }
1304 
1305 std::string SelectEntry::to_string() const {
1306  std::string str = select_expr_->to_string();
1307  if (alias_ != nullptr) {
1308  str += " AS " + *alias_;
1309  }
1310  return str;
1311 }
1312 
1313 std::string TableRef::to_string() const {
1314  std::string str = *table_name_;
1315  if (range_var_ != nullptr) {
1316  str += " " + *range_var_;
1317  }
1318  return str;
1319 }
1320 
1321 std::string ColumnRef::to_string() const {
1322  std::string str;
1323  if (table_ == nullptr) {
1324  str = *column_;
1325  } else if (column_ == nullptr) {
1326  str = *table_ + ".*";
1327  } else {
1328  str = *table_ + "." + *column_;
1329  }
1330  return str;
1331 }
1332 
1333 std::string OperExpr::to_string() const {
1334  std::string op_str[] = {
1335  "=", "===", "<>", "<", ">", "<=", ">=", " AND ", " OR ", "NOT", "-", "+", "*", "/"};
1336  std::string str;
1337  if (optype_ == kUMINUS) {
1338  str = "-(" + left_->to_string() + ")";
1339  } else if (optype_ == kNOT) {
1340  str = "NOT (" + left_->to_string() + ")";
1341  } else if (optype_ == kARRAY_AT) {
1342  str = left_->to_string() + "[" + right_->to_string() + "]";
1343  } else if (optype_ == kUNNEST) {
1344  str = "UNNEST(" + left_->to_string() + ")";
1345  } else if (optype_ == kIN) {
1346  str = "(" + left_->to_string() + " IN " + right_->to_string() + ")";
1347  } else {
1348  str = "(" + left_->to_string() + op_str[optype_] + right_->to_string() + ")";
1349  }
1350  return str;
1351 }
1352 
1353 std::string InExpr::to_string() const {
1354  std::string str = arg_->to_string();
1355  if (is_not_) {
1356  str += " NOT IN ";
1357  } else {
1358  str += " IN ";
1359  }
1360  return str;
1361 }
1362 
1363 std::string ExistsExpr::to_string() const {
1364  return "EXISTS (" + query_->to_string() + ")";
1365 }
1366 
1367 std::string SubqueryExpr::to_string() const {
1368  std::string str;
1369  str = "(";
1370  str += query_->to_string();
1371  str += ")";
1372  return str;
1373 }
1374 
1375 std::string IsNullExpr::to_string() const {
1376  std::string str = arg_->to_string();
1377  if (is_not_) {
1378  str += " IS NOT NULL";
1379  } else {
1380  str += " IS NULL";
1381  }
1382  return str;
1383 }
1384 
1385 std::string InSubquery::to_string() const {
1386  std::string str = InExpr::to_string();
1387  str += subquery_->to_string();
1388  return str;
1389 }
1390 
1391 std::string InValues::to_string() const {
1392  std::string str = InExpr::to_string() + "(";
1393  bool notfirst = false;
1394  for (auto& p : value_list_) {
1395  if (notfirst) {
1396  str += ", ";
1397  } else {
1398  notfirst = true;
1399  }
1400  str += p->to_string();
1401  }
1402  str += ")";
1403  return str;
1404 }
1405 
1406 std::string BetweenExpr::to_string() const {
1407  std::string str = arg_->to_string();
1408  if (is_not_) {
1409  str += " NOT BETWEEN ";
1410  } else {
1411  str += " BETWEEN ";
1412  }
1413  str += lower_->to_string() + " AND " + upper_->to_string();
1414  return str;
1415 }
1416 
1417 std::string CharLengthExpr::to_string() const {
1418  std::string str;
1419  if (calc_encoded_length_) {
1420  str = "CHAR_LENGTH (" + arg_->to_string() + ")";
1421  } else {
1422  str = "LENGTH (" + arg_->to_string() + ")";
1423  }
1424  return str;
1425 }
1426 
1427 std::string CardinalityExpr::to_string() const {
1428  std::string str = "CARDINALITY(" + arg_->to_string() + ")";
1429  return str;
1430 }
1431 
1432 std::string LikeExpr::to_string() const {
1433  std::string str = arg_->to_string();
1434  if (is_not_) {
1435  str += " NOT LIKE ";
1436  } else {
1437  str += " LIKE ";
1438  }
1439  str += like_string_->to_string();
1440  if (escape_string_ != nullptr) {
1441  str += " ESCAPE " + escape_string_->to_string();
1442  }
1443  return str;
1444 }
1445 
1446 std::string RegexpExpr::to_string() const {
1447  std::string str = arg_->to_string();
1448  if (is_not_) {
1449  str += " NOT REGEXP ";
1450  } else {
1451  str += " REGEXP ";
1452  }
1453  str += pattern_string_->to_string();
1454  if (escape_string_ != nullptr) {
1455  str += " ESCAPE " + escape_string_->to_string();
1456  }
1457  return str;
1458 }
1459 
1460 std::string LikelihoodExpr::to_string() const {
1461  std::string str = " LIKELIHOOD ";
1462  str += arg_->to_string();
1463  str += " ";
1464  str += boost::lexical_cast<std::string>(is_not_ ? 1.0 - likelihood_ : likelihood_);
1465  return str;
1466 }
1467 
1468 std::string FunctionRef::to_string() const {
1469  std::string str = *name_ + "(";
1470  if (distinct_) {
1471  str += "DISTINCT ";
1472  }
1473  if (arg_ == nullptr) {
1474  str += "*)";
1475  } else {
1476  str += arg_->to_string() + ")";
1477  }
1478  return str;
1479 }
1480 
1481 std::string QuerySpec::to_string() const {
1482  std::string query_str = "SELECT ";
1483  if (is_distinct_) {
1484  query_str += "DISTINCT ";
1485  }
1486  if (select_clause_.empty()) {
1487  query_str += "* ";
1488  } else {
1489  bool notfirst = false;
1490  for (auto& p : select_clause_) {
1491  if (notfirst) {
1492  query_str += ", ";
1493  } else {
1494  notfirst = true;
1495  }
1496  query_str += p->to_string();
1497  }
1498  }
1499  query_str += " FROM ";
1500  bool notfirst = false;
1501  for (auto& p : from_clause_) {
1502  if (notfirst) {
1503  query_str += ", ";
1504  } else {
1505  notfirst = true;
1506  }
1507  query_str += p->to_string();
1508  }
1509  if (where_clause_) {
1510  query_str += " WHERE " + where_clause_->to_string();
1511  }
1512  if (!groupby_clause_.empty()) {
1513  query_str += " GROUP BY ";
1514  bool notfirst = false;
1515  for (auto& p : groupby_clause_) {
1516  if (notfirst) {
1517  query_str += ", ";
1518  } else {
1519  notfirst = true;
1520  }
1521  query_str += p->to_string();
1522  }
1523  }
1524  if (having_clause_) {
1525  query_str += " HAVING " + having_clause_->to_string();
1526  }
1527  query_str += ";";
1528  return query_str;
1529 }
1530 
1531 void InsertStmt::analyze(const Catalog_Namespace::Catalog& catalog,
1532  Analyzer::Query& query) const {
1533  query.set_stmt_type(kINSERT);
1534  const TableDescriptor* td = catalog.getMetadataForTable(*table_);
1535  if (td == nullptr) {
1536  throw std::runtime_error("Table " + *table_ + " does not exist.");
1537  }
1538  if (td->isView) {
1539  throw std::runtime_error("Insert to views is not supported yet.");
1540  }
1542  query.set_result_table_id(td->tableId);
1543  std::list<int> result_col_list;
1544  if (column_list_.empty()) {
1545  const std::list<const ColumnDescriptor*> all_cols =
1546  catalog.getAllColumnMetadataForTable(td->tableId, false, false, true);
1547  for (auto cd : all_cols) {
1548  result_col_list.push_back(cd->columnId);
1549  }
1550  } else {
1551  for (auto& c : column_list_) {
1552  const ColumnDescriptor* cd = catalog.getMetadataForColumn(td->tableId, *c);
1553  if (cd == nullptr) {
1554  throw std::runtime_error("Column " + *c + " does not exist.");
1555  }
1556  result_col_list.push_back(cd->columnId);
1557  const auto& col_ti = cd->columnType;
1558  if (col_ti.get_physical_cols() > 0) {
1559  CHECK(cd->columnType.is_geometry());
1560  for (auto i = 1; i <= col_ti.get_physical_cols(); i++) {
1561  const ColumnDescriptor* pcd =
1562  catalog.getMetadataForColumn(td->tableId, cd->columnId + i);
1563  if (pcd == nullptr) {
1564  throw std::runtime_error("Column " + *c + "'s metadata is incomplete.");
1565  }
1566  result_col_list.push_back(pcd->columnId);
1567  }
1568  }
1569  }
1570  }
1571  query.set_result_col_list(result_col_list);
1572 }
1573 
1574 size_t InsertValuesStmt::determineLeafIndex(const Catalog_Namespace::Catalog& catalog,
1575  size_t num_leafs) {
1576  const TableDescriptor* td = catalog.getMetadataForTable(*table_);
1577  if (td == nullptr) {
1578  throw std::runtime_error("Table " + *table_ + " does not exist.");
1579  }
1580  if (td->isView) {
1581  throw std::runtime_error("Insert to views is not supported yet.");
1582  }
1584  if (td->partitions == "REPLICATED") {
1585  throw std::runtime_error("Cannot determine leaf on replicated table.");
1586  }
1587 
1588  if (0 == td->nShards) {
1589  std::random_device rd;
1590  std::mt19937_64 gen(rd());
1591  std::uniform_int_distribution<size_t> dis;
1592  const auto leaf_idx = dis(gen) % num_leafs;
1593  return leaf_idx;
1594  }
1595 
1596  size_t indexOfShardColumn = 0;
1597  const ColumnDescriptor* shardColumn = catalog.getShardColumnMetadataForTable(td);
1598  CHECK(shardColumn);
1599  auto shard_count = td->nShards * num_leafs;
1600  int64_t shardId = 0;
1601 
1602  if (column_list_.empty()) {
1603  auto all_cols =
1604  catalog.getAllColumnMetadataForTable(td->tableId, false, false, false);
1605  auto iter = std::find(all_cols.begin(), all_cols.end(), shardColumn);
1606  CHECK(iter != all_cols.end());
1607  indexOfShardColumn = std::distance(all_cols.begin(), iter);
1608  } else {
1609  for (auto& c : column_list_) {
1610  if (*c == shardColumn->columnName) {
1611  break;
1612  }
1613  indexOfShardColumn++;
1614  }
1615 
1616  if (indexOfShardColumn == column_list_.size()) {
1618  shard_count);
1619  return shardId / td->nShards;
1620  }
1621  }
1622 
1623  if (indexOfShardColumn >= value_list_.size()) {
1624  throw std::runtime_error("No value defined for shard column.");
1625  }
1626 
1627  auto& shardColumnValueExpr = *(std::next(value_list_.begin(), indexOfShardColumn));
1628 
1629  Analyzer::Query query;
1630  auto e = shardColumnValueExpr->analyze(catalog, query);
1631  e = e->add_cast(shardColumn->columnType);
1632  const Analyzer::Constant* con = dynamic_cast<Analyzer::Constant*>(e.get());
1633  if (!con) {
1634  auto col_cast = dynamic_cast<const Analyzer::UOper*>(e.get());
1635  CHECK(col_cast);
1636  CHECK_EQ(kCAST, col_cast->get_optype());
1637  con = dynamic_cast<const Analyzer::Constant*>(col_cast->get_operand());
1638  }
1639  CHECK(con);
1640 
1641  Datum d = con->get_constval();
1642  if (con->get_is_null()) {
1644  shard_count);
1645  } else if (shardColumn->columnType.is_string()) {
1646  auto dictDesc =
1647  catalog.getMetadataForDict(shardColumn->columnType.get_comp_param(), true);
1648  auto str_id = dictDesc->stringDict->getOrAdd(*d.stringval);
1649  bool invalid = false;
1650 
1651  if (4 == shardColumn->columnType.get_size()) {
1652  invalid = str_id > max_valid_int_value<int32_t>();
1653  } else if (2 == shardColumn->columnType.get_size()) {
1654  invalid = str_id > max_valid_int_value<uint16_t>();
1655  } else if (1 == shardColumn->columnType.get_size()) {
1656  invalid = str_id > max_valid_int_value<uint8_t>();
1657  }
1658 
1659  if (invalid || str_id == inline_int_null_value<int32_t>()) {
1660  str_id = inline_fixed_encoding_null_val(shardColumn->columnType);
1661  }
1662  shardId = SHARD_FOR_KEY(str_id, shard_count);
1663  } else {
1664  switch (shardColumn->columnType.get_logical_size()) {
1665  case 8:
1666  shardId = SHARD_FOR_KEY(d.bigintval, shard_count);
1667  break;
1668  case 4:
1669  shardId = SHARD_FOR_KEY(d.intval, shard_count);
1670  break;
1671  case 2:
1672  shardId = SHARD_FOR_KEY(d.smallintval, shard_count);
1673  break;
1674  case 1:
1675  shardId = SHARD_FOR_KEY(d.tinyintval, shard_count);
1676  break;
1677  default:
1678  CHECK(false);
1679  }
1680  }
1681 
1682  return shardId / td->nShards;
1683 }
1684 
1685 void InsertValuesStmt::analyze(const Catalog_Namespace::Catalog& catalog,
1686  Analyzer::Query& query) const {
1687  InsertStmt::analyze(catalog, query);
1688  std::vector<std::shared_ptr<Analyzer::TargetEntry>>& tlist =
1689  query.get_targetlist_nonconst();
1690  const auto tableId = query.get_result_table_id();
1691  if (!column_list_.empty()) {
1692  if (value_list_.size() != column_list_.size()) {
1693  throw std::runtime_error(
1694  "Numbers of columns and values don't match for the "
1695  "insert.");
1696  }
1697  } else {
1698  const std::list<const ColumnDescriptor*> non_phys_cols =
1699  catalog.getAllColumnMetadataForTable(tableId, false, false, false);
1700  if (non_phys_cols.size() != value_list_.size()) {
1701  throw std::runtime_error(
1702  "Number of columns in table does not match the list of values given in the "
1703  "insert.");
1704  }
1705  }
1706  std::list<int>::const_iterator it = query.get_result_col_list().begin();
1707  for (auto& v : value_list_) {
1708  auto e = v->analyze(catalog, query);
1709  const ColumnDescriptor* cd =
1710  catalog.getMetadataForColumn(query.get_result_table_id(), *it);
1711  CHECK(cd);
1712  if (cd->columnType.get_notnull()) {
1713  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(e);
1714  if (c != nullptr && c->get_is_null()) {
1715  throw std::runtime_error("Cannot insert NULL into column " + cd->columnName);
1716  }
1717  }
1718  e = e->add_cast(cd->columnType);
1719  tlist.emplace_back(new Analyzer::TargetEntry("", e, false));
1720  ++it;
1721 
1722  const auto& col_ti = cd->columnType;
1723  if (col_ti.get_physical_cols() > 0) {
1724  CHECK(cd->columnType.is_geometry());
1725  auto c = dynamic_cast<const Analyzer::Constant*>(e.get());
1726  if (!c) {
1727  auto uoper = std::dynamic_pointer_cast<Analyzer::UOper>(e);
1728  if (uoper && uoper->get_optype() == kCAST) {
1729  c = dynamic_cast<const Analyzer::Constant*>(uoper->get_operand());
1730  }
1731  }
1732  bool is_null = false;
1733  std::string* geo_string{nullptr};
1734  if (c) {
1735  is_null = c->get_is_null();
1736  if (!is_null) {
1737  geo_string = c->get_constval().stringval;
1738  }
1739  }
1740  if (!is_null && !geo_string) {
1741  throw std::runtime_error("Expecting a WKT or WKB hex string for column " +
1742  cd->columnName);
1743  }
1744  std::vector<double> coords;
1745  std::vector<double> bounds;
1746  std::vector<int> ring_sizes;
1747  std::vector<int> poly_rings;
1748  int render_group =
1749  0; // @TODO simon.eves where to get render_group from in this context?!
1750  SQLTypeInfo import_ti{cd->columnType};
1751  if (!is_null) {
1753  *geo_string, import_ti, coords, bounds, ring_sizes, poly_rings)) {
1754  throw std::runtime_error("Cannot read geometry to insert into column " +
1755  cd->columnName);
1756  }
1757  if (coords.empty()) {
1758  // Importing from geo_string WKT resulted in empty coords: dealing with a NULL
1759  is_null = true;
1760  }
1761  if (cd->columnType.get_type() != import_ti.get_type()) {
1762  // allow POLYGON to be inserted into MULTIPOLYGON column
1763  if (!(import_ti.get_type() == SQLTypes::kPOLYGON &&
1765  throw std::runtime_error(
1766  "Imported geometry doesn't match the type of column " + cd->columnName);
1767  }
1768  }
1769  } else {
1770  // Special case for NULL POINT, push NULL representation to coords
1771  if (cd->columnType.get_type() == kPOINT) {
1772  if (!coords.empty()) {
1773  throw std::runtime_error("NULL POINT with unexpected coordinates in column " +
1774  cd->columnName);
1775  }
1776  coords.push_back(NULL_ARRAY_DOUBLE);
1777  coords.push_back(NULL_DOUBLE);
1778  }
1779  }
1780 
1781  // TODO: check if import SRID matches columns SRID, may need to transform before
1782  // inserting
1783 
1784  int nextColumnOffset = 1;
1785 
1786  const ColumnDescriptor* cd_coords = catalog.getMetadataForColumn(
1787  query.get_result_table_id(), cd->columnId + nextColumnOffset);
1788  CHECK(cd_coords);
1789  CHECK_EQ(cd_coords->columnType.get_type(), kARRAY);
1790  CHECK_EQ(cd_coords->columnType.get_subtype(), kTINYINT);
1791  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
1792  if (!is_null || cd->columnType.get_type() == kPOINT) {
1793  auto compressed_coords = Geospatial::compress_coords(coords, col_ti);
1794  for (auto cc : compressed_coords) {
1795  Datum d;
1796  d.tinyintval = cc;
1797  auto e = makeExpr<Analyzer::Constant>(kTINYINT, false, d);
1798  value_exprs.push_back(e);
1799  }
1800  }
1801  tlist.emplace_back(new Analyzer::TargetEntry(
1802  "",
1803  makeExpr<Analyzer::Constant>(cd_coords->columnType, is_null, value_exprs),
1804  false));
1805  ++it;
1806  nextColumnOffset++;
1807 
1808  if (cd->columnType.get_type() == kPOLYGON ||
1809  cd->columnType.get_type() == kMULTIPOLYGON) {
1810  // Put ring sizes array into separate physical column
1811  const ColumnDescriptor* cd_ring_sizes = catalog.getMetadataForColumn(
1812  query.get_result_table_id(), cd->columnId + nextColumnOffset);
1813  CHECK(cd_ring_sizes);
1814  CHECK_EQ(cd_ring_sizes->columnType.get_type(), kARRAY);
1815  CHECK_EQ(cd_ring_sizes->columnType.get_subtype(), kINT);
1816  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
1817  if (!is_null) {
1818  for (auto c : ring_sizes) {
1819  Datum d;
1820  d.intval = c;
1821  auto e = makeExpr<Analyzer::Constant>(kINT, false, d);
1822  value_exprs.push_back(e);
1823  }
1824  }
1825  tlist.emplace_back(new Analyzer::TargetEntry(
1826  "",
1827  makeExpr<Analyzer::Constant>(cd_ring_sizes->columnType, is_null, value_exprs),
1828  false));
1829  ++it;
1830  nextColumnOffset++;
1831 
1832  if (cd->columnType.get_type() == kMULTIPOLYGON) {
1833  // Put poly_rings array into separate physical column
1834  const ColumnDescriptor* cd_poly_rings = catalog.getMetadataForColumn(
1835  query.get_result_table_id(), cd->columnId + nextColumnOffset);
1836  CHECK(cd_poly_rings);
1837  CHECK_EQ(cd_poly_rings->columnType.get_type(), kARRAY);
1838  CHECK_EQ(cd_poly_rings->columnType.get_subtype(), kINT);
1839  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
1840  if (!is_null) {
1841  for (auto c : poly_rings) {
1842  Datum d;
1843  d.intval = c;
1844  auto e = makeExpr<Analyzer::Constant>(kINT, false, d);
1845  value_exprs.push_back(e);
1846  }
1847  }
1848  tlist.emplace_back(new Analyzer::TargetEntry(
1849  "",
1850  makeExpr<Analyzer::Constant>(
1851  cd_poly_rings->columnType, is_null, value_exprs),
1852  false));
1853  ++it;
1854  nextColumnOffset++;
1855  }
1856  }
1857 
1858  if (cd->columnType.get_type() == kLINESTRING ||
1859  cd->columnType.get_type() == kPOLYGON ||
1860  cd->columnType.get_type() == kMULTIPOLYGON) {
1861  const ColumnDescriptor* cd_bounds = catalog.getMetadataForColumn(
1862  query.get_result_table_id(), cd->columnId + nextColumnOffset);
1863  CHECK(cd_bounds);
1864  CHECK_EQ(cd_bounds->columnType.get_type(), kARRAY);
1865  CHECK_EQ(cd_bounds->columnType.get_subtype(), kDOUBLE);
1866  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
1867  if (!is_null) {
1868  for (auto b : bounds) {
1869  Datum d;
1870  d.doubleval = b;
1871  auto e = makeExpr<Analyzer::Constant>(kDOUBLE, false, d);
1872  value_exprs.push_back(e);
1873  }
1874  }
1875  tlist.emplace_back(new Analyzer::TargetEntry(
1876  "",
1877  makeExpr<Analyzer::Constant>(cd_bounds->columnType, is_null, value_exprs),
1878  false));
1879  ++it;
1880  nextColumnOffset++;
1881  }
1882 
1883  if (cd->columnType.get_type() == kPOLYGON ||
1884  cd->columnType.get_type() == kMULTIPOLYGON) {
1885  // Put render group into separate physical column
1886  const ColumnDescriptor* cd_render_group = catalog.getMetadataForColumn(
1887  query.get_result_table_id(), cd->columnId + nextColumnOffset);
1888  CHECK(cd_render_group);
1889  CHECK_EQ(cd_render_group->columnType.get_type(), kINT);
1890  Datum d;
1891  d.intval = render_group;
1892  tlist.emplace_back(new Analyzer::TargetEntry(
1893  "",
1894  makeExpr<Analyzer::Constant>(cd_render_group->columnType, is_null, d),
1895  false));
1896  ++it;
1897  nextColumnOffset++;
1898  }
1899  }
1900  }
1901 }
1902 
1903 void InsertValuesStmt::execute(const Catalog_Namespace::SessionInfo& session) {
1904  auto& catalog = session.getCatalog();
1905 
1908  *table_)) {
1909  throw std::runtime_error("User has no insert privileges on " + *table_ + ".");
1910  }
1911 
1912  auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
1915 
1916  Analyzer::Query query;
1917  analyze(catalog, query);
1918 
1919  // Acquire schema write lock -- leave data lock so the fragmenter can checkpoint. For
1920  // singleton inserts we just take a write lock on the schema, which prevents concurrent
1921  // inserts.
1922  auto result_table_id = query.get_result_table_id();
1923  const auto td_with_lock =
1925  catalog, result_table_id);
1926  // NOTE(max): we do the same checks as below just a few calls earlier in analyze().
1927  // Do we keep those intentionally to make sure nothing changed in between w/o
1928  // catalog locks or is it just a duplicate work?
1929  auto td = td_with_lock();
1930  CHECK(td);
1931  if (td->isView) {
1932  throw std::runtime_error("Singleton inserts on views is not supported.");
1933  }
1935 
1937  RelAlgExecutor ra_executor(executor.get(), catalog);
1938 
1939  ra_executor.executeSimpleInsert(query);
1940 }
1941 
1942 void UpdateStmt::analyze(const Catalog_Namespace::Catalog& catalog,
1943  Analyzer::Query& query) const {
1944  throw std::runtime_error("UPDATE statement not supported yet.");
1945 }
1946 
1947 void DeleteStmt::analyze(const Catalog_Namespace::Catalog& catalog,
1948  Analyzer::Query& query) const {
1949  throw std::runtime_error("DELETE statement not supported yet.");
1950 }
1951 
1952 namespace {
1953 
1955  const auto& col_ti = cd.columnType;
1956  if (col_ti.is_integer() ||
1957  (col_ti.is_string() && col_ti.get_compression() == kENCODING_DICT) ||
1958  col_ti.is_time()) {
1959  if (cd.default_value.has_value()) {
1960  throw std::runtime_error("Default values for shard keys are not supported yet.");
1961  }
1962  } else {
1963  throw std::runtime_error("Cannot shard on type " + col_ti.get_type_name() +
1964  ", encoding " + col_ti.get_compression_name());
1965  }
1966 }
1967 
1968 size_t shard_column_index(const std::string& name,
1969  const std::list<ColumnDescriptor>& columns) {
1970  size_t index = 1;
1971  for (const auto& cd : columns) {
1972  if (cd.columnName == name) {
1974  return index;
1975  }
1976  ++index;
1977  if (cd.columnType.is_geometry()) {
1978  index += cd.columnType.get_physical_cols();
1979  }
1980  }
1981  // Not found, return 0
1982  return 0;
1983 }
1984 
1985 size_t sort_column_index(const std::string& name,
1986  const std::list<ColumnDescriptor>& columns) {
1987  size_t index = 1;
1988  for (const auto& cd : columns) {
1989  if (boost::to_upper_copy<std::string>(cd.columnName) == name) {
1990  return index;
1991  }
1992  ++index;
1993  if (cd.columnType.is_geometry()) {
1994  index += cd.columnType.get_physical_cols();
1995  }
1996  }
1997  // Not found, return 0
1998  return 0;
1999 }
2000 
2001 void set_string_field(rapidjson::Value& obj,
2002  const std::string& field_name,
2003  const std::string& field_value,
2004  rapidjson::Document& document) {
2005  rapidjson::Value field_name_json_str;
2006  field_name_json_str.SetString(
2007  field_name.c_str(), field_name.size(), document.GetAllocator());
2008  rapidjson::Value field_value_json_str;
2009  field_value_json_str.SetString(
2010  field_value.c_str(), field_value.size(), document.GetAllocator());
2011  obj.AddMember(field_name_json_str, field_value_json_str, document.GetAllocator());
2012 }
2013 
2015  const ShardKeyDef* shard_key_def,
2016  const std::vector<SharedDictionaryDef>& shared_dict_defs) {
2017  rapidjson::Document document;
2018  auto& allocator = document.GetAllocator();
2019  rapidjson::Value arr(rapidjson::kArrayType);
2020  if (shard_key_def) {
2021  rapidjson::Value shard_key_obj(rapidjson::kObjectType);
2022  set_string_field(shard_key_obj, "type", "SHARD KEY", document);
2023  set_string_field(shard_key_obj, "name", shard_key_def->get_column(), document);
2024  arr.PushBack(shard_key_obj, allocator);
2025  }
2026  for (const auto& shared_dict_def : shared_dict_defs) {
2027  rapidjson::Value shared_dict_obj(rapidjson::kObjectType);
2028  set_string_field(shared_dict_obj, "type", "SHARED DICTIONARY", document);
2029  set_string_field(shared_dict_obj, "name", shared_dict_def.get_column(), document);
2031  shared_dict_obj, "foreign_table", shared_dict_def.get_foreign_table(), document);
2032  set_string_field(shared_dict_obj,
2033  "foreign_column",
2034  shared_dict_def.get_foreign_column(),
2035  document);
2036  arr.PushBack(shared_dict_obj, allocator);
2037  }
2038  rapidjson::StringBuffer buffer;
2039  rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
2040  arr.Accept(writer);
2041  return buffer.GetString();
2042 }
2043 
2044 template <typename LITERAL_TYPE,
2045  typename ASSIGNMENT,
2048  ASSIGNMENT op,
2049  VALIDATE validate = VALIDATE()) {
2050  const auto val = validate(p);
2051  return op(val);
2052 }
2053 
2056  const std::list<ColumnDescriptor>& columns) {
2057  auto assignment = [&td](const auto val) { td.storageType = val; };
2058  return get_property_value<StringLiteral, decltype(assignment), CaseSensitiveValidate>(
2059  p, assignment);
2060 }
2061 
2064  const std::list<ColumnDescriptor>& columns) {
2065  return get_property_value<IntLiteral>(p,
2066  [&td](const auto val) { td.maxFragRows = val; });
2067 }
2068 
2071  const std::list<ColumnDescriptor>& columns) {
2072  return get_property_value<IntLiteral>(
2073  p, [&df_td](const auto val) { df_td.maxFragRows = val; });
2074 }
2075 
2078  const std::list<ColumnDescriptor>& columns) {
2079  return get_property_value<IntLiteral>(p,
2080  [&td](const auto val) { td.maxChunkSize = val; });
2081 }
2082 
2084  DataframeTableDescriptor& df_td,
2086  const std::list<ColumnDescriptor>& columns) {
2087  return get_property_value<IntLiteral>(
2088  p, [&df_td](const auto val) { df_td.maxChunkSize = val; });
2089 }
2090 
2093  const std::list<ColumnDescriptor>& columns) {
2094  return get_property_value<StringLiteral>(p, [&df_td](const auto val) {
2095  if (val.size() != 1) {
2096  throw std::runtime_error("Length of DELIMITER must be equal to 1.");
2097  }
2098  df_td.delimiter = val;
2099  });
2100 }
2101 
2104  const std::list<ColumnDescriptor>& columns) {
2105  return get_property_value<StringLiteral>(p, [&df_td](const auto val) {
2106  if (val == "FALSE") {
2107  df_td.hasHeader = false;
2108  } else if (val == "TRUE") {
2109  df_td.hasHeader = true;
2110  } else {
2111  throw std::runtime_error("Option HEADER support only 'true' or 'false' values.");
2112  }
2113  });
2114 }
2115 
2118  const std::list<ColumnDescriptor>& columns) {
2119  return get_property_value<IntLiteral>(p,
2120  [&td](const auto val) { td.fragPageSize = val; });
2121 }
2124  const std::list<ColumnDescriptor>& columns) {
2125  return get_property_value<IntLiteral>(p, [&td](const auto val) { td.maxRows = val; });
2126 }
2127 
2130  const std::list<ColumnDescriptor>& columns) {
2131  return get_property_value<IntLiteral>(
2132  p, [&df_td](const auto val) { df_td.skipRows = val; });
2133 }
2134 
2137  const std::list<ColumnDescriptor>& columns) {
2138  return get_property_value<StringLiteral>(p, [&td](const auto partitions_uc) {
2139  if (partitions_uc != "SHARDED" && partitions_uc != "REPLICATED") {
2140  throw std::runtime_error("PARTITIONS must be SHARDED or REPLICATED");
2141  }
2142  if (td.shardedColumnId != 0 && partitions_uc == "REPLICATED") {
2143  throw std::runtime_error(
2144  "A table cannot be sharded and replicated at the same time");
2145  };
2146  td.partitions = partitions_uc;
2147  });
2148 }
2151  const std::list<ColumnDescriptor>& columns) {
2152  if (!td.shardedColumnId) {
2153  throw std::runtime_error("SHARD KEY must be defined.");
2154  }
2155  return get_property_value<IntLiteral>(p, [&td](const auto shard_count) {
2156  if (g_leaf_count && shard_count % g_leaf_count) {
2157  throw std::runtime_error(
2158  "SHARD_COUNT must be a multiple of the number of leaves in the cluster.");
2159  }
2160  td.nShards = g_leaf_count ? shard_count / g_leaf_count : shard_count;
2161  if (!td.shardedColumnId && !td.nShards) {
2162  throw std::runtime_error(
2163  "Must specify the number of shards through the SHARD_COUNT option");
2164  };
2165  });
2166 }
2167 
2168 decltype(auto) get_vacuum_def(TableDescriptor& td,
2170  const std::list<ColumnDescriptor>& columns) {
2171  return get_property_value<StringLiteral>(p, [&td](const auto vacuum_uc) {
2172  if (vacuum_uc != "IMMEDIATE" && vacuum_uc != "DELAYED") {
2173  throw std::runtime_error("VACUUM must be IMMEDIATE or DELAYED");
2174  }
2175  td.hasDeletedCol = boost::iequals(vacuum_uc, "IMMEDIATE") ? false : true;
2176  });
2177 }
2178 
2181  const std::list<ColumnDescriptor>& columns) {
2182  return get_property_value<StringLiteral>(p, [&td, &columns](const auto sort_upper) {
2183  td.sortedColumnId = sort_column_index(sort_upper, columns);
2184  if (!td.sortedColumnId) {
2185  throw std::runtime_error("Specified sort column " + sort_upper + " doesn't exist");
2186  }
2187  });
2188 }
2189 
2192  const std::list<ColumnDescriptor>& columns) {
2193  auto assignment = [&td](const auto val) {
2194  td.maxRollbackEpochs =
2195  val < 0 ? -1 : val; // Anything < 0 means unlimited rollbacks. Note that 0
2196  // still means keeping a shadow copy of data/metdata
2197  // between epochs so bad writes can be rolled back
2198  };
2199  return get_property_value<IntLiteral, decltype(assignment), PositiveOrZeroValidate>(
2200  p, assignment);
2201 }
2202 
2203 static const std::map<const std::string, const TableDefFuncPtr> tableDefFuncMap = {
2204  {"fragment_size"s, get_frag_size_def},
2205  {"max_chunk_size"s, get_max_chunk_size_def},
2206  {"page_size"s, get_page_size_def},
2207  {"max_rows"s, get_max_rows_def},
2208  {"partitions"s, get_partions_def},
2209  {"shard_count"s, get_shard_count_def},
2210  {"vacuum"s, get_vacuum_def},
2211  {"sort_column"s, get_sort_column_def},
2212  {"storage_type"s, get_storage_type},
2213  {"max_rollback_epochs", get_max_rollback_epochs_def}};
2214 
2216  const std::unique_ptr<NameValueAssign>& p,
2217  const std::list<ColumnDescriptor>& columns) {
2218  const auto it = tableDefFuncMap.find(boost::to_lower_copy<std::string>(*p->get_name()));
2219  if (it == tableDefFuncMap.end()) {
2220  throw std::runtime_error(
2221  "Invalid CREATE TABLE option " + *p->get_name() +
2222  ". Should be FRAGMENT_SIZE, MAX_CHUNK_SIZE, PAGE_SIZE, MAX_ROLLBACK_EPOCHS, "
2223  "MAX_ROWS, "
2224  "PARTITIONS, SHARD_COUNT, VACUUM, SORT_COLUMN, STORAGE_TYPE.");
2225  }
2226  return it->second(td, p.get(), columns);
2227 }
2228 
2230  const std::unique_ptr<NameValueAssign>& p,
2231  const std::list<ColumnDescriptor>& columns) {
2232  const auto it = tableDefFuncMap.find(boost::to_lower_copy<std::string>(*p->get_name()));
2233  if (it == tableDefFuncMap.end()) {
2234  throw std::runtime_error(
2235  "Invalid CREATE TABLE AS option " + *p->get_name() +
2236  ". Should be FRAGMENT_SIZE, MAX_CHUNK_SIZE, PAGE_SIZE, MAX_ROLLBACK_EPOCHS, "
2237  "MAX_ROWS, "
2238  "PARTITIONS, SHARD_COUNT, VACUUM, SORT_COLUMN, STORAGE_TYPE or "
2239  "USE_SHARED_DICTIONARIES.");
2240  }
2241  return it->second(td, p.get(), columns);
2242 }
2243 
2244 static const std::map<const std::string, const DataframeDefFuncPtr> dataframeDefFuncMap =
2245  {{"fragment_size"s, get_frag_size_dataframe_def},
2246  {"max_chunk_size"s, get_max_chunk_size_dataframe_def},
2247  {"skip_rows"s, get_skip_rows_def},
2248  {"delimiter"s, get_delimiter_def},
2249  {"header"s, get_header_def}};
2250 
2252  const std::unique_ptr<NameValueAssign>& p,
2253  const std::list<ColumnDescriptor>& columns) {
2254  const auto it =
2255  dataframeDefFuncMap.find(boost::to_lower_copy<std::string>(*p->get_name()));
2256  if (it == dataframeDefFuncMap.end()) {
2257  throw std::runtime_error(
2258  "Invalid CREATE DATAFRAME option " + *p->get_name() +
2259  ". Should be FRAGMENT_SIZE, MAX_CHUNK_SIZE, SKIP_ROWS, DELIMITER or HEADER.");
2260  }
2261  return it->second(df_td, p.get(), columns);
2262 }
2263 
2264 std::unique_ptr<ColumnDef> column_from_json(const rapidjson::Value& element) {
2265  CHECK(element.HasMember("name"));
2266  auto col_name = std::make_unique<std::string>(json_str(element["name"]));
2267  CHECK(element.HasMember("sqltype"));
2268  const auto sql_types = to_sql_type(json_str(element["sqltype"]));
2269 
2270  // decimal / numeric precision / scale
2271  int precision = -1;
2272  int scale = -1;
2273  if (element.HasMember("precision")) {
2274  precision = json_i64(element["precision"]);
2275  }
2276  if (element.HasMember("scale")) {
2277  scale = json_i64(element["scale"]);
2278  }
2279 
2280  std::optional<int64_t> array_size;
2281  if (element.HasMember("arraySize")) {
2282  // We do not yet support geo arrays
2283  array_size = json_i64(element["arraySize"]);
2284  }
2285  std::unique_ptr<SQLType> sql_type;
2286  if (element.HasMember("subtype")) {
2287  CHECK(element.HasMember("coordinateSystem"));
2288  const auto subtype_sql_types = to_sql_type(json_str(element["subtype"]));
2289  sql_type =
2290  std::make_unique<SQLType>(subtype_sql_types,
2291  static_cast<int>(sql_types),
2292  static_cast<int>(json_i64(element["coordinateSystem"])),
2293  false);
2294  } else if (precision > 0 && scale > 0) {
2295  sql_type = std::make_unique<SQLType>(sql_types,
2296  precision,
2297  scale,
2298  /*is_array=*/array_size.has_value(),
2299  array_size ? *array_size : -1);
2300  } else if (precision > 0) {
2301  sql_type = std::make_unique<SQLType>(sql_types,
2302  precision,
2303  0,
2304  /*is_array=*/array_size.has_value(),
2305  array_size ? *array_size : -1);
2306  } else {
2307  sql_type = std::make_unique<SQLType>(sql_types,
2308  /*is_array=*/array_size.has_value(),
2309  array_size ? *array_size : -1);
2310  }
2311  CHECK(sql_type);
2312 
2313  CHECK(element.HasMember("nullable"));
2314  const auto nullable = json_bool(element["nullable"]);
2315  std::unique_ptr<ColumnConstraintDef> constraint_def;
2316  StringLiteral* str_literal = nullptr;
2317  if (element.HasMember("default") && !element["default"].IsNull()) {
2318  std::string* defaultval = new std::string(json_str(element["default"]));
2319  boost::algorithm::trim_if(*defaultval, boost::is_any_of(" \"'`"));
2320  str_literal = new StringLiteral(defaultval);
2321  }
2322 
2323  constraint_def = std::make_unique<ColumnConstraintDef>(/*notnull=*/!nullable,
2324  /*unique=*/false,
2325  /*primarykey=*/false,
2326  /*defaultval=*/str_literal);
2327  std::unique_ptr<CompressDef> compress_def;
2328  if (element.HasMember("encodingType") && !element["encodingType"].IsNull()) {
2329  std::string encoding_type = json_str(element["encodingType"]);
2330  CHECK(element.HasMember("encodingSize"));
2331  auto encoding_name = std::make_unique<std::string>(json_str(element["encodingType"]));
2332  compress_def = std::make_unique<CompressDef>(encoding_name.release(),
2333  json_i64(element["encodingSize"]));
2334  }
2335  return std::make_unique<ColumnDef>(col_name.release(),
2336  sql_type.release(),
2337  compress_def ? compress_def.release() : nullptr,
2338  constraint_def ? constraint_def.release() : nullptr);
2339 }
2340 
2341 } // namespace
2342 
2343 CreateTableStmt::CreateTableStmt(const rapidjson::Value& payload) {
2344  CHECK(payload.HasMember("name"));
2345  table_ = std::make_unique<std::string>(json_str(payload["name"]));
2346  CHECK(payload.HasMember("elements"));
2347  CHECK(payload["elements"].IsArray());
2348 
2349  // TODO: support temporary tables
2350  is_temporary_ = false;
2351 
2352  if_not_exists_ = false;
2353  if (payload.HasMember("ifNotExists")) {
2354  if_not_exists_ = json_bool(payload["ifNotExists"]);
2355  }
2356 
2357  const auto elements = payload["elements"].GetArray();
2358  for (const auto& element : elements) {
2359  CHECK(element.IsObject());
2360  CHECK(element.HasMember("type"));
2361  if (json_str(element["type"]) == "SQL_COLUMN_DECLARATION") {
2362  auto col_def = column_from_json(element);
2363  table_element_list_.emplace_back(std::move(col_def));
2364  } else if (json_str(element["type"]) == "SQL_COLUMN_CONSTRAINT") {
2365  CHECK(element.HasMember("name"));
2366  if (json_str(element["name"]) == "SHARD_KEY") {
2367  CHECK(element.HasMember("columns"));
2368  CHECK(element["columns"].IsArray());
2369  const auto& columns = element["columns"].GetArray();
2370  if (columns.Size() != size_t(1)) {
2371  throw std::runtime_error("Only one shard column is currently supported.");
2372  }
2373  auto shard_key_def = std::make_unique<ShardKeyDef>(json_str(columns[0]));
2374  table_element_list_.emplace_back(std::move(shard_key_def));
2375  } else if (json_str(element["name"]) == "SHARED_DICT") {
2376  CHECK(element.HasMember("columns"));
2377  CHECK(element["columns"].IsArray());
2378  const auto& columns = element["columns"].GetArray();
2379  if (columns.Size() != size_t(1)) {
2380  throw std::runtime_error(
2381  R"(Only one column per shared dictionary entry is currently supported. Use multiple SHARED DICT statements to share dictionaries from multiple columns.)");
2382  }
2383  CHECK(element.HasMember("references") && element["references"].IsObject());
2384  const auto& references = element["references"].GetObject();
2385  std::string references_table_name;
2386  if (references.HasMember("table")) {
2387  references_table_name = json_str(references["table"]);
2388  } else {
2389  references_table_name = *table_;
2390  }
2391  CHECK(references.HasMember("column"));
2392 
2393  auto shared_dict_def = std::make_unique<SharedDictionaryDef>(
2394  json_str(columns[0]), references_table_name, json_str(references["column"]));
2395  table_element_list_.emplace_back(std::move(shared_dict_def));
2396 
2397  } else {
2398  LOG(FATAL) << "Unsupported type for SQL_COLUMN_CONSTRAINT: "
2399  << json_str(element["name"]);
2400  }
2401  } else {
2402  LOG(FATAL) << "Unsupported element type for CREATE TABLE: "
2403  << element["type"].GetString();
2404  }
2405  }
2406 
2407  CHECK(payload.HasMember("options"));
2408  if (payload["options"].IsObject()) {
2409  for (const auto& option : payload["options"].GetObject()) {
2410  auto option_name = std::make_unique<std::string>(json_str(option.name));
2411  std::unique_ptr<Literal> literal_value;
2412  if (option.value.IsString()) {
2413  auto literal_string = std::make_unique<std::string>(json_str(option.value));
2414  literal_value = std::make_unique<StringLiteral>(literal_string.release());
2415  } else if (option.value.IsInt() || option.value.IsInt64()) {
2416  literal_value = std::make_unique<IntLiteral>(json_i64(option.value));
2417  } else if (option.value.IsNull()) {
2418  literal_value = std::make_unique<NullLiteral>();
2419  } else {
2420  throw std::runtime_error("Unable to handle literal for " + *option_name);
2421  }
2422  CHECK(literal_value);
2423 
2424  storage_options_.emplace_back(std::make_unique<NameValueAssign>(
2425  option_name.release(), literal_value.release()));
2426  }
2427  } else {
2428  CHECK(payload["options"].IsNull());
2429  }
2430 }
2431 
2432 void CreateTableStmt::executeDryRun(const Catalog_Namespace::SessionInfo& session,
2433  TableDescriptor& td,
2434  std::list<ColumnDescriptor>& columns,
2435  std::vector<SharedDictionaryDef>& shared_dict_defs) {
2436  std::unordered_set<std::string> uc_col_names;
2437  const auto& catalog = session.getCatalog();
2438  const ShardKeyDef* shard_key_def{nullptr};
2439  for (auto& e : table_element_list_) {
2440  if (dynamic_cast<SharedDictionaryDef*>(e.get())) {
2441  auto shared_dict_def = static_cast<SharedDictionaryDef*>(e.get());
2443  this, shared_dict_def, columns, shared_dict_defs, catalog);
2444  shared_dict_defs.push_back(*shared_dict_def);
2445  continue;
2446  }
2447  if (dynamic_cast<ShardKeyDef*>(e.get())) {
2448  if (shard_key_def) {
2449  throw std::runtime_error("Specified more than one shard key");
2450  }
2451  shard_key_def = static_cast<const ShardKeyDef*>(e.get());
2452  continue;
2453  }
2454  if (!dynamic_cast<ColumnDef*>(e.get())) {
2455  throw std::runtime_error("Table constraints are not supported yet.");
2456  }
2457  ColumnDef* coldef = static_cast<ColumnDef*>(e.get());
2458  ColumnDescriptor cd;
2459  cd.columnName = *coldef->get_column_name();
2461  setColumnDescriptor(cd, coldef);
2462  columns.push_back(cd);
2463  }
2464 
2465  ddl_utils::set_default_table_attributes(*table_, td, columns.size());
2466 
2467  if (shard_key_def) {
2468  td.shardedColumnId = shard_column_index(shard_key_def->get_column(), columns);
2469  if (!td.shardedColumnId) {
2470  throw std::runtime_error("Specified shard column " + shard_key_def->get_column() +
2471  " doesn't exist");
2472  }
2473  }
2474  if (is_temporary_) {
2476  } else {
2478  }
2479  if (!storage_options_.empty()) {
2480  for (auto& p : storage_options_) {
2481  get_table_definitions(td, p, columns);
2482  }
2483  }
2484  if (td.shardedColumnId && !td.nShards) {
2485  throw std::runtime_error("SHARD_COUNT needs to be specified with SHARD_KEY.");
2486  }
2487  td.keyMetainfo = serialize_key_metainfo(shard_key_def, shared_dict_defs);
2488 }
2489 
2490 void CreateTableStmt::execute(const Catalog_Namespace::SessionInfo& session) {
2491  auto& catalog = session.getCatalog();
2492 
2493  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
2496 
2497  // check access privileges
2500  throw std::runtime_error("Table " + *table_ +
2501  " will not be created. User has no create privileges.");
2502  }
2503 
2504  if (!catalog.validateNonExistentTableOrView(*table_, if_not_exists_)) {
2505  return;
2506  }
2507 
2508  TableDescriptor td;
2509  std::list<ColumnDescriptor> columns;
2510  std::vector<SharedDictionaryDef> shared_dict_defs;
2511 
2512  executeDryRun(session, td, columns, shared_dict_defs);
2513  td.userId = session.get_currentUser().userId;
2514 
2515  catalog.createShardedTable(td, columns, shared_dict_defs);
2516  // TODO (max): It's transactionally unsafe, should be fixed: we may create object w/o
2517  // privileges
2518  SysCatalog::instance().createDBObject(
2519  session.get_currentUser(), td.tableName, TableDBObjectType, catalog);
2520 }
2521 
2522 void CreateDataframeStmt::execute(const Catalog_Namespace::SessionInfo& session) {
2523  auto& catalog = session.getCatalog();
2524 
2525  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
2528 
2529  // check access privileges
2532  throw std::runtime_error("Table " + *table_ +
2533  " will not be created. User has no create privileges.");
2534  }
2535 
2536  if (catalog.getMetadataForTable(*table_) != nullptr) {
2537  throw std::runtime_error("Table " + *table_ + " already exists.");
2538  }
2540  std::list<ColumnDescriptor> columns;
2541  std::vector<SharedDictionaryDef> shared_dict_defs;
2542 
2543  std::unordered_set<std::string> uc_col_names;
2544  for (auto& e : table_element_list_) {
2545  if (dynamic_cast<SharedDictionaryDef*>(e.get())) {
2546  auto shared_dict_def = static_cast<SharedDictionaryDef*>(e.get());
2548  this, shared_dict_def, columns, shared_dict_defs, catalog);
2549  shared_dict_defs.push_back(*shared_dict_def);
2550  continue;
2551  }
2552  if (!dynamic_cast<ColumnDef*>(e.get())) {
2553  throw std::runtime_error("Table constraints are not supported yet.");
2554  }
2555  ColumnDef* coldef = static_cast<ColumnDef*>(e.get());
2556  ColumnDescriptor cd;
2557  cd.columnName = *coldef->get_column_name();
2558  const auto uc_col_name = boost::to_upper_copy<std::string>(cd.columnName);
2559  const auto it_ok = uc_col_names.insert(uc_col_name);
2560  if (!it_ok.second) {
2561  throw std::runtime_error("Column '" + cd.columnName + "' defined more than once");
2562  }
2563  setColumnDescriptor(cd, coldef);
2564  columns.push_back(cd);
2565  }
2566 
2567  df_td.tableName = *table_;
2568  df_td.nColumns = columns.size();
2569  df_td.isView = false;
2570  df_td.fragmenter = nullptr;
2575  df_td.maxRows = DEFAULT_MAX_ROWS;
2577  if (!storage_options_.empty()) {
2578  for (auto& p : storage_options_) {
2579  get_dataframe_definitions(df_td, p, columns);
2580  }
2581  }
2582  df_td.keyMetainfo = serialize_key_metainfo(nullptr, shared_dict_defs);
2583  df_td.userId = session.get_currentUser().userId;
2584  df_td.storageType = *filename_;
2585 
2586  catalog.createShardedTable(df_td, columns, shared_dict_defs);
2587  // TODO (max): It's transactionally unsafe, should be fixed: we may create object w/o
2588  // privileges
2589  SysCatalog::instance().createDBObject(
2590  session.get_currentUser(), df_td.tableName, TableDBObjectType, catalog);
2591 }
2592 
2593 std::shared_ptr<ResultSet> getResultSet(QueryStateProxy query_state_proxy,
2594  const std::string select_stmt,
2595  std::vector<TargetMetaInfo>& targets,
2596  bool validate_only = false,
2597  std::vector<size_t> outer_fragment_indices = {},
2598  bool allow_interrupt = false) {
2599  auto const session = query_state_proxy.getQueryState().getConstSessionInfo();
2600  auto& catalog = session->getCatalog();
2601 
2603 #ifdef HAVE_CUDA
2604  const auto device_type = session->get_executor_device_type();
2605 #else
2606  const auto device_type = ExecutorDeviceType::CPU;
2607 #endif // HAVE_CUDA
2608  auto calcite_mgr = catalog.getCalciteMgr();
2609 
2610  // TODO MAT this should actually get the global or the session parameter for
2611  // view optimization
2612  const auto query_ra =
2613  calcite_mgr
2614  ->process(query_state_proxy, pg_shim(select_stmt), {}, true, false, false, true)
2615  .plan_result;
2616  RelAlgExecutor ra_executor(executor.get(),
2617  catalog,
2618  query_ra,
2619  query_state_proxy.getQueryState().shared_from_this());
2621  const auto& query_hints = ra_executor.getParsedQueryHints();
2622  const auto cpu_mode_enabled = query_hints.isHintRegistered(QueryHint::kCpuMode);
2623  if (cpu_mode_enabled) {
2624  co.device_type = ExecutorDeviceType::CPU;
2625  }
2627  // TODO(adb): Need a better method of dropping constants into this ExecutionOptions
2628  // struct
2629  ExecutionOptions eo = {false,
2630  true,
2631  false,
2632  true,
2633  false,
2634  false,
2635  validate_only,
2636  false,
2637  10000,
2638  false,
2639  false,
2640  1000,
2641  allow_interrupt,
2645  outer_fragment_indices};
2646  ExecutionResult result{std::make_shared<ResultSet>(std::vector<TargetInfo>{},
2649  nullptr,
2650  nullptr,
2651  0,
2652  0),
2653  {}};
2654  result = ra_executor.executeRelAlgQuery(co, eo, false, nullptr);
2655  targets = result.getTargetsMeta();
2656 
2657  return result.getRows();
2658 }
2659 
2660 size_t LocalConnector::getOuterFragmentCount(QueryStateProxy query_state_proxy,
2661  std::string& sql_query_string) {
2662  auto const session = query_state_proxy.getQueryState().getConstSessionInfo();
2663  auto& catalog = session->getCatalog();
2664 
2666 #ifdef HAVE_CUDA
2667  const auto device_type = session->get_executor_device_type();
2668 #else
2669  const auto device_type = ExecutorDeviceType::CPU;
2670 #endif // HAVE_CUDA
2671  auto calcite_mgr = catalog.getCalciteMgr();
2672 
2673  // TODO MAT this should actually get the global or the session parameter for
2674  // view optimization
2675  const auto query_ra =
2676  calcite_mgr
2677  ->process(
2678  query_state_proxy, pg_shim(sql_query_string), {}, true, false, false, true)
2679  .plan_result;
2680  RelAlgExecutor ra_executor(executor.get(), catalog, query_ra);
2681  const auto& query_hints = ra_executor.getParsedQueryHints();
2682  const bool cpu_mode_enabled = query_hints.isHintRegistered(QueryHint::kCpuMode);
2683  CompilationOptions co = {cpu_mode_enabled ? ExecutorDeviceType::CPU : device_type,
2684  true,
2686  false};
2687  // TODO(adb): Need a better method of dropping constants into this ExecutionOptions
2688  // struct
2689  ExecutionOptions eo = {false,
2690  true,
2691  false,
2692  true,
2693  false,
2694  false,
2695  false,
2696  false,
2697  10000,
2698  false,
2699  false,
2700  0.9,
2701  false};
2702  return ra_executor.getOuterFragmentCount(co, eo);
2703 }
2704 
2705 AggregatedResult LocalConnector::query(QueryStateProxy query_state_proxy,
2706  std::string& sql_query_string,
2707  std::vector<size_t> outer_frag_indices,
2708  bool validate_only,
2709  bool allow_interrupt) {
2710  // TODO(PS): Should we be using the shimmed query in getResultSet?
2711  std::string pg_shimmed_select_query = pg_shim(sql_query_string);
2712 
2713  std::vector<TargetMetaInfo> target_metainfos;
2715  auto const session = query_state_proxy.getQueryState().getConstSessionInfo();
2716  auto query_session = session ? session->get_session_id() : "";
2717  auto query_submitted_time = query_state_proxy.getQueryState().getQuerySubmittedTime();
2718  if (allow_interrupt && !validate_only && !query_session.empty()) {
2719  executor->enrollQuerySession(query_session,
2720  sql_query_string,
2721  query_submitted_time,
2723  QuerySessionStatus::QueryStatus::PENDING_EXECUTOR);
2724  }
2725  auto result_rows = getResultSet(query_state_proxy,
2726  sql_query_string,
2727  target_metainfos,
2728  validate_only,
2729  outer_frag_indices,
2730  allow_interrupt);
2731  AggregatedResult res = {result_rows, target_metainfos};
2732  return res;
2733 }
2734 
2735 std::vector<AggregatedResult> LocalConnector::query(
2736  QueryStateProxy query_state_proxy,
2737  std::string& sql_query_string,
2738  std::vector<size_t> outer_frag_indices,
2739  bool allow_interrupt) {
2740  auto res = query(
2741  query_state_proxy, sql_query_string, outer_frag_indices, false, allow_interrupt);
2742  return {res};
2743 }
2744 
2745 void LocalConnector::insertDataToLeaf(const Catalog_Namespace::SessionInfo& session,
2746  const size_t leaf_idx,
2747  Fragmenter_Namespace::InsertData& insert_data) {
2748  CHECK(leaf_idx == 0);
2749  auto& catalog = session.getCatalog();
2750  auto created_td = catalog.getMetadataForTable(insert_data.tableId);
2751  ChunkKey chunkKey = {catalog.getCurrentDB().dbId, created_td->tableId};
2752  // TODO(adb): Ensure that we have previously obtained a write lock for this table's
2753  // data.
2754  created_td->fragmenter->insertDataNoCheckpoint(insert_data);
2755 }
2756 
2757 void LocalConnector::checkpoint(const Catalog_Namespace::SessionInfo& session,
2758  int table_id) {
2759  auto& catalog = session.getCatalog();
2760  catalog.checkpointWithAutoRollback(table_id);
2761 }
2762 
2763 void LocalConnector::rollback(const Catalog_Namespace::SessionInfo& session,
2764  int table_id) {
2765  auto& catalog = session.getCatalog();
2766  auto db_id = catalog.getDatabaseId();
2767  auto table_epochs = catalog.getTableEpochs(db_id, table_id);
2768  catalog.setTableEpochs(db_id, table_epochs);
2769 }
2770 
2771 std::list<ColumnDescriptor> LocalConnector::getColumnDescriptors(AggregatedResult& result,
2772  bool for_create) {
2773  std::list<ColumnDescriptor> column_descriptors;
2774  std::list<ColumnDescriptor> column_descriptors_for_create;
2775 
2776  int rowid_suffix = 0;
2777  for (const auto& target_metainfo : result.targets_meta) {
2778  ColumnDescriptor cd;
2779  cd.columnName = target_metainfo.get_resname();
2780  if (cd.columnName == "rowid") {
2781  cd.columnName += std::to_string(rowid_suffix++);
2782  }
2783  cd.columnType = target_metainfo.get_physical_type_info();
2784 
2785  ColumnDescriptor cd_for_create = cd;
2786 
2788  // we need to reset the comp param (as this points to the actual dictionary)
2789  if (cd.columnType.is_array()) {
2790  // for dict encoded arrays, it is always 4 bytes
2791  cd_for_create.columnType.set_comp_param(32);
2792  } else {
2793  cd_for_create.columnType.set_comp_param(cd.columnType.get_size() * 8);
2794  }
2795  }
2796 
2797  if (cd.columnType.is_date() && !cd.columnType.is_date_in_days()) {
2798  // default to kENCODING_DATE_IN_DAYS encoding
2800  cd_for_create.columnType.set_comp_param(0);
2801  }
2802 
2803  column_descriptors_for_create.push_back(cd_for_create);
2804  column_descriptors.push_back(cd);
2805  }
2806 
2807  if (for_create) {
2808  return column_descriptors_for_create;
2809  }
2810 
2811  return column_descriptors;
2812 }
2813 
2814 InsertIntoTableAsSelectStmt::InsertIntoTableAsSelectStmt(
2815  const rapidjson::Value& payload) {
2816  CHECK(payload.HasMember("name"));
2817  table_name_ = json_str(payload["name"]);
2818 
2819  CHECK(payload.HasMember("query"));
2820  select_query_ = json_str(payload["query"]);
2821 
2822  boost::replace_all(select_query_, "\n", " ");
2823  select_query_ = "(" + select_query_ + ")";
2824 
2825  if (payload.HasMember("columns")) {
2826  CHECK(payload["columns"].IsArray());
2827  for (auto& column : payload["columns"].GetArray()) {
2828  std::string s = json_str(column);
2829  column_list_.emplace_back(std::unique_ptr<std::string>(new std::string(s)));
2830  }
2831  }
2832 }
2833 
2834 void InsertIntoTableAsSelectStmt::populateData(QueryStateProxy query_state_proxy,
2835  const TableDescriptor* td,
2836  bool validate_table,
2837  bool for_CTAS) {
2838  auto const session = query_state_proxy.getQueryState().getConstSessionInfo();
2839  auto& catalog = session->getCatalog();
2841 
2842  LocalConnector local_connector;
2843  bool populate_table = false;
2844 
2845  if (leafs_connector_) {
2846  populate_table = true;
2847  } else {
2848  leafs_connector_ = &local_connector;
2849  if (!g_cluster) {
2850  populate_table = true;
2851  }
2852  }
2853 
2854  auto get_target_column_descriptors = [this, &catalog](const TableDescriptor* td) {
2855  std::vector<const ColumnDescriptor*> target_column_descriptors;
2856  if (column_list_.empty()) {
2857  auto list = catalog.getAllColumnMetadataForTable(td->tableId, false, false, false);
2858  target_column_descriptors = {std::begin(list), std::end(list)};
2859  } else {
2860  for (auto& c : column_list_) {
2861  const ColumnDescriptor* cd = catalog.getMetadataForColumn(td->tableId, *c);
2862  if (cd == nullptr) {
2863  throw std::runtime_error("Column " + *c + " does not exist.");
2864  }
2865  target_column_descriptors.push_back(cd);
2866  }
2867  }
2868 
2869  return target_column_descriptors;
2870  };
2871 
2872  bool is_temporary = table_is_temporary(td);
2873 
2874  if (validate_table) {
2875  // check access privileges
2876  if (!td) {
2877  throw std::runtime_error("Table " + table_name_ + " does not exist.");
2878  }
2879  if (td->isView) {
2880  throw std::runtime_error("Insert to views is not supported yet.");
2881  }
2882 
2883  if (!session->checkDBAccessPrivileges(DBObjectType::TableDBObjectType,
2885  table_name_)) {
2886  throw std::runtime_error("User has no insert privileges on " + table_name_ + ".");
2887  }
2888 
2889  // only validate the select query so we get the target types
2890  // correctly, but do not populate the result set
2891  auto result = local_connector.query(query_state_proxy, select_query_, {}, true, true);
2892  auto source_column_descriptors = local_connector.getColumnDescriptors(result, false);
2893 
2894  std::vector<const ColumnDescriptor*> target_column_descriptors =
2895  get_target_column_descriptors(td);
2896 
2897  if (source_column_descriptors.size() != target_column_descriptors.size()) {
2898  throw std::runtime_error("The number of source and target columns does not match.");
2899  }
2900 
2901  for (int i = 0; i < source_column_descriptors.size(); i++) {
2902  const ColumnDescriptor* source_cd =
2903  &(*std::next(source_column_descriptors.begin(), i));
2904  const ColumnDescriptor* target_cd = target_column_descriptors.at(i);
2905 
2906  if (source_cd->columnType.get_type() != target_cd->columnType.get_type()) {
2907  auto type_cannot_be_cast = [](const auto& col_type) {
2908  return (col_type.is_time() || col_type.is_geometry() || col_type.is_array() ||
2909  col_type.is_boolean());
2910  };
2911 
2912  if (type_cannot_be_cast(source_cd->columnType) ||
2913  type_cannot_be_cast(target_cd->columnType)) {
2914  throw std::runtime_error("Source '" + source_cd->columnName + " " +
2915  source_cd->columnType.get_type_name() +
2916  "' and target '" + target_cd->columnName + " " +
2917  target_cd->columnType.get_type_name() +
2918  "' column types do not match.");
2919  }
2920  }
2921  if (source_cd->columnType.is_array()) {
2922  if (source_cd->columnType.get_subtype() != target_cd->columnType.get_subtype()) {
2923  throw std::runtime_error("Source '" + source_cd->columnName + " " +
2924  source_cd->columnType.get_type_name() +
2925  "' and target '" + target_cd->columnName + " " +
2926  target_cd->columnType.get_type_name() +
2927  "' array column element types do not match.");
2928  }
2929  }
2930 
2931  if (source_cd->columnType.is_decimal() ||
2932  source_cd->columnType.get_elem_type().is_decimal()) {
2933  SQLTypeInfo sourceType = source_cd->columnType;
2934  SQLTypeInfo targetType = target_cd->columnType;
2935 
2936  if (source_cd->columnType.is_array()) {
2937  sourceType = source_cd->columnType.get_elem_type();
2938  targetType = target_cd->columnType.get_elem_type();
2939  }
2940 
2941  if (sourceType.get_scale() != targetType.get_scale()) {
2942  throw std::runtime_error("Source '" + source_cd->columnName + " " +
2943  source_cd->columnType.get_type_name() +
2944  "' and target '" + target_cd->columnName + " " +
2945  target_cd->columnType.get_type_name() +
2946  "' decimal columns scales do not match.");
2947  }
2948  }
2949 
2950  if (source_cd->columnType.is_string()) {
2951  if (!target_cd->columnType.is_string()) {
2952  throw std::runtime_error("Source '" + source_cd->columnName + " " +
2953  source_cd->columnType.get_type_name() +
2954  "' and target '" + target_cd->columnName + " " +
2955  target_cd->columnType.get_type_name() +
2956  "' column types do not match.");
2957  }
2958  if (source_cd->columnType.get_compression() !=
2959  target_cd->columnType.get_compression()) {
2960  throw std::runtime_error("Source '" + source_cd->columnName + " " +
2961  source_cd->columnType.get_type_name() +
2962  "' and target '" + target_cd->columnName + " " +
2963  target_cd->columnType.get_type_name() +
2964  "' columns string encodings do not match.");
2965  }
2966  }
2967 
2968  if (source_cd->columnType.is_timestamp() && target_cd->columnType.is_timestamp()) {
2969  if (source_cd->columnType.get_dimension() !=
2970  target_cd->columnType.get_dimension()) {
2971  throw std::runtime_error("Source '" + source_cd->columnName + " " +
2972  source_cd->columnType.get_type_name() +
2973  "' and target '" + target_cd->columnName + " " +
2974  target_cd->columnType.get_type_name() +
2975  "' timestamp column precisions do not match.");
2976  }
2977  }
2978 
2979  if (!source_cd->columnType.is_string() && !source_cd->columnType.is_geometry() &&
2980  !source_cd->columnType.is_integer() && !source_cd->columnType.is_decimal() &&
2981  !source_cd->columnType.is_date() && !source_cd->columnType.is_time() &&
2982  !source_cd->columnType.is_timestamp() &&
2983  source_cd->columnType.get_size() > target_cd->columnType.get_size()) {
2984  throw std::runtime_error("Source '" + source_cd->columnName + " " +
2985  source_cd->columnType.get_type_name() +
2986  "' and target '" + target_cd->columnName + " " +
2987  target_cd->columnType.get_type_name() +
2988  "' column encoding sizes do not match.");
2989  }
2990  }
2991  }
2992 
2993  if (!populate_table) {
2994  return;
2995  }
2996 
2997  int64_t total_row_count = 0;
2998  int64_t total_source_query_time_ms = 0;
2999  int64_t total_target_value_translate_time_ms = 0;
3000  int64_t total_data_load_time_ms = 0;
3001 
3002  Fragmenter_Namespace::InsertDataLoader insertDataLoader(*leafs_connector_);
3003  auto target_column_descriptors = get_target_column_descriptors(td);
3004  auto outer_frag_count =
3005  leafs_connector_->getOuterFragmentCount(query_state_proxy, select_query_);
3006 
3007  size_t outer_frag_end = outer_frag_count == 0 ? 1 : outer_frag_count;
3008  auto query_session = session ? session->get_session_id() : "";
3010  std::string work_type_str = for_CTAS ? "CTAS" : "ITAS";
3011  try {
3012  for (size_t outer_frag_idx = 0; outer_frag_idx < outer_frag_end; outer_frag_idx++) {
3013  std::vector<size_t> allowed_outer_fragment_indices;
3014 
3015  if (outer_frag_count) {
3016  allowed_outer_fragment_indices.push_back(outer_frag_idx);
3017  }
3018 
3019  const auto query_clock_begin = timer_start();
3020  std::vector<AggregatedResult> query_results =
3021  leafs_connector_->query(query_state_proxy,
3022  select_query_,
3023  allowed_outer_fragment_indices,
3025  total_source_query_time_ms += timer_stop(query_clock_begin);
3026 
3027  auto start_time = query_state_proxy.getQueryState().getQuerySubmittedTime();
3028  auto query_str = "INSERT_DATA for " + work_type_str;
3030  // In the clean-up phase of the query execution for collecting aggregated result
3031  // of SELECT query, we remove its query session info, so we need to enroll the
3032  // session info again
3033  executor->enrollQuerySession(query_session,
3034  query_str,
3035  start_time,
3037  QuerySessionStatus::QueryStatus::RUNNING);
3038  }
3039 
3040  ScopeGuard clearInterruptStatus = [executor, &query_session, &start_time] {
3041  // this data population is non-kernel operation, so we manually cleanup
3042  // the query session info in the cleanup phase
3044  executor->clearQuerySessionStatus(query_session, start_time, false);
3045  }
3046  };
3047 
3048  for (auto& res : query_results) {
3049  if (UNLIKELY(checkInterrupt(query_session, executor))) {
3050  throw std::runtime_error(
3051  "Query execution has been interrupted while performing " + work_type_str);
3052  }
3053  auto& result_rows = res.rs;
3054  result_rows->setGeoReturnType(ResultSet::GeoReturnType::GeoTargetValue);
3055  const auto num_rows = result_rows->rowCount();
3056 
3057  if (0 == num_rows) {
3058  continue;
3059  }
3060 
3061  total_row_count += num_rows;
3062 
3063  size_t leaf_count = leafs_connector_->leafCount();
3064 
3065  // ensure that at least 1 row is processed per block up to a maximum of 65536 rows
3066  const size_t rows_per_block =
3067  std::max(std::min(num_rows / leaf_count, size_t(64 * 1024)), size_t(1));
3068 
3069  std::vector<std::unique_ptr<TargetValueConverter>> value_converters;
3070 
3072 
3073  const int num_worker_threads = std::thread::hardware_concurrency();
3074 
3075  std::vector<size_t> thread_start_idx(num_worker_threads),
3076  thread_end_idx(num_worker_threads);
3077  bool can_go_parallel = !result_rows->isTruncated() && rows_per_block > 20000;
3078 
3079  std::atomic<size_t> crt_row_idx{0};
3080 
3081  auto do_work = [&result_rows, &value_converters, &crt_row_idx](
3082  const size_t idx,
3083  const size_t block_end,
3084  const size_t num_cols,
3085  const size_t thread_id,
3086  bool& stop_convert) {
3087  const auto result_row = result_rows->getRowAtNoTranslations(idx);
3088  if (!result_row.empty()) {
3089  size_t target_row = crt_row_idx.fetch_add(1);
3090  if (target_row >= block_end) {
3091  stop_convert = true;
3092  return;
3093  }
3094  for (unsigned int col = 0; col < num_cols; col++) {
3095  const auto& mapd_variant = result_row[col];
3096  value_converters[col]->convertToColumnarFormat(target_row, &mapd_variant);
3097  }
3098  }
3099  };
3100 
3101  auto convert_function = [&thread_start_idx,
3102  &thread_end_idx,
3103  &value_converters,
3104  &executor,
3105  &query_session,
3106  &work_type_str,
3107  &do_work](const int thread_id, const size_t block_end) {
3108  const int num_cols = value_converters.size();
3109  const size_t start = thread_start_idx[thread_id];
3110  const size_t end = thread_end_idx[thread_id];
3111  size_t idx = 0;
3112  bool stop_convert = false;
3114  size_t local_idx = 0;
3115  for (idx = start; idx < end; ++idx, ++local_idx) {
3116  if (UNLIKELY((local_idx & 0xFFFF) == 0 &&
3117  checkInterrupt(query_session, executor))) {
3118  throw std::runtime_error(
3119  "Query execution has been interrupted while performing " +
3120  work_type_str);
3121  }
3122  do_work(idx, block_end, num_cols, thread_id, stop_convert);
3123  if (stop_convert) {
3124  break;
3125  }
3126  }
3127  } else {
3128  for (idx = start; idx < end; ++idx) {
3129  do_work(idx, block_end, num_cols, thread_id, stop_convert);
3130  if (stop_convert) {
3131  break;
3132  }
3133  }
3134  }
3135  thread_start_idx[thread_id] = idx;
3136  };
3137 
3138  auto single_threaded_value_converter =
3139  [&crt_row_idx, &value_converters, &result_rows](const size_t idx,
3140  const size_t block_end,
3141  const size_t num_cols,
3142  bool& stop_convert) {
3143  size_t target_row = crt_row_idx.fetch_add(1);
3144  if (target_row >= block_end) {
3145  stop_convert = true;
3146  return;
3147  }
3148  const auto result_row = result_rows->getNextRow(false, false);
3149  CHECK(!result_row.empty());
3150  for (unsigned int col = 0; col < num_cols; col++) {
3151  const auto& mapd_variant = result_row[col];
3152  value_converters[col]->convertToColumnarFormat(target_row, &mapd_variant);
3153  }
3154  };
3155 
3156  auto single_threaded_convert_function = [&value_converters,
3157  &thread_start_idx,
3158  &thread_end_idx,
3159  &executor,
3160  &query_session,
3161  &work_type_str,
3162  &single_threaded_value_converter](
3163  const int thread_id,
3164  const size_t block_end) {
3165  const int num_cols = value_converters.size();
3166  const size_t start = thread_start_idx[thread_id];
3167  const size_t end = thread_end_idx[thread_id];
3168  size_t idx = 0;
3169  bool stop_convert = false;
3171  size_t local_idx = 0;
3172  for (idx = start; idx < end; ++idx, ++local_idx) {
3173  if (UNLIKELY((local_idx & 0xFFFF) == 0 &&
3174  checkInterrupt(query_session, executor))) {
3175  throw std::runtime_error(
3176  "Query execution has been interrupted while performing " +
3177  work_type_str);
3178  }
3179  single_threaded_value_converter(idx, block_end, num_cols, stop_convert);
3180  if (stop_convert) {
3181  break;
3182  }
3183  }
3184  } else {
3185  for (idx = start; idx < end; ++idx) {
3186  single_threaded_value_converter(idx, end, num_cols, stop_convert);
3187  if (stop_convert) {
3188  break;
3189  }
3190  }
3191  }
3192  thread_start_idx[thread_id] = idx;
3193  };
3194 
3195  if (can_go_parallel) {
3196  const size_t entry_count = result_rows->entryCount();
3197  for (size_t
3198  i = 0,
3199  start_entry = 0,
3200  stride = (entry_count + num_worker_threads - 1) / num_worker_threads;
3201  i < num_worker_threads && start_entry < entry_count;
3202  ++i, start_entry += stride) {
3203  const auto end_entry = std::min(start_entry + stride, entry_count);
3204  thread_start_idx[i] = start_entry;
3205  thread_end_idx[i] = end_entry;
3206  }
3207  } else {
3208  thread_start_idx[0] = 0;
3209  thread_end_idx[0] = result_rows->entryCount();
3210  }
3211 
3212  for (size_t block_start = 0; block_start < num_rows;
3213  block_start += rows_per_block) {
3214  const auto num_rows_this_itr = block_start + rows_per_block < num_rows
3215  ? rows_per_block
3216  : num_rows - block_start;
3217  crt_row_idx = 0; // reset block tracker
3218  value_converters.clear();
3219  int colNum = 0;
3220  for (const auto targetDescriptor : target_column_descriptors) {
3221  auto sourceDataMetaInfo = res.targets_meta[colNum++];
3222 
3224  num_rows_this_itr,
3225  catalog,
3226  sourceDataMetaInfo,
3227  targetDescriptor,
3228  targetDescriptor->columnType,
3229  !targetDescriptor->columnType.get_notnull(),
3230  result_rows->getRowSetMemOwner()->getLiteralStringDictProxy(),
3232  ? executor->getStringDictionaryProxy(
3233  sourceDataMetaInfo.get_type_info().get_comp_param(),
3234  result_rows->getRowSetMemOwner(),
3235  true)
3236  : nullptr};
3237  auto converter = factory.create(param);
3238  value_converters.push_back(std::move(converter));
3239  }
3240 
3241  const auto translate_clock_begin = timer_start();
3242  if (can_go_parallel) {
3243  std::vector<std::future<void>> worker_threads;
3244  for (int i = 0; i < num_worker_threads; ++i) {
3245  worker_threads.push_back(
3246  std::async(std::launch::async, convert_function, i, num_rows_this_itr));
3247  }
3248 
3249  for (auto& child : worker_threads) {
3250  child.wait();
3251  }
3252  for (auto& child : worker_threads) {
3253  child.get();
3254  }
3255 
3256  } else {
3257  single_threaded_convert_function(0, num_rows_this_itr);
3258  }
3259 
3260  // finalize the insert data
3261  auto finalizer_func =
3262  [](std::unique_ptr<TargetValueConverter>::pointer targetValueConverter) {
3263  targetValueConverter->finalizeDataBlocksForInsertData();
3264  };
3265 
3266  std::vector<std::future<void>> worker_threads;
3267  for (auto& converterPtr : value_converters) {
3268  worker_threads.push_back(
3269  std::async(std::launch::async, finalizer_func, converterPtr.get()));
3270  }
3271 
3272  for (auto& child : worker_threads) {
3273  child.wait();
3274  }
3275  for (auto& child : worker_threads) {
3276  child.get();
3277  }
3278 
3280  insert_data.databaseId = catalog.getCurrentDB().dbId;
3281  CHECK(td);
3282  insert_data.tableId = td->tableId;
3283  insert_data.numRows = num_rows_this_itr;
3284 
3285  for (int col_idx = 0; col_idx < target_column_descriptors.size(); col_idx++) {
3287  checkInterrupt(query_session, executor))) {
3288  throw std::runtime_error(
3289  "Query execution has been interrupted while performing " +
3290  work_type_str);
3291  }
3292  value_converters[col_idx]->addDataBlocksToInsertData(insert_data);
3293  }
3294  total_target_value_translate_time_ms += timer_stop(translate_clock_begin);
3295 
3296  const auto data_load_clock_begin = timer_start();
3297  auto data_memory_holder =
3298  import_export::fill_missing_columns(&catalog, insert_data);
3299  insertDataLoader.insertData(*session, insert_data);
3300  total_data_load_time_ms += timer_stop(data_load_clock_begin);
3301  }
3302  }
3303  }
3304  } catch (...) {
3305  try {
3306  leafs_connector_->rollback(*session, td->tableId);
3307  } catch (std::exception& e) {
3308  LOG(ERROR) << "An error occurred during ITAS rollback attempt. Table id: "
3309  << td->tableId << ", Error: " << e.what();
3310  }
3311  throw;
3312  }
3313 
3314  int64_t total_time_ms = total_source_query_time_ms +
3315  total_target_value_translate_time_ms + total_data_load_time_ms;
3316 
3317  VLOG(1) << "CTAS/ITAS " << total_row_count << " rows loaded in " << total_time_ms
3318  << "ms (outer_frag_count=" << outer_frag_count
3319  << ", query_time=" << total_source_query_time_ms
3320  << "ms, translation_time=" << total_target_value_translate_time_ms
3321  << "ms, data_load_time=" << total_data_load_time_ms
3322  << "ms)\nquery: " << select_query_;
3323 
3324  if (!is_temporary) {
3325  leafs_connector_->checkpoint(*session, td->tableId);
3326  }
3327 }
3328 
3329 void InsertIntoTableAsSelectStmt::execute(const Catalog_Namespace::SessionInfo& session) {
3330  auto session_copy = session;
3331  auto session_ptr = std::shared_ptr<Catalog_Namespace::SessionInfo>(
3332  &session_copy, boost::null_deleter());
3333  auto query_state = query_state::QueryState::create(session_ptr, select_query_);
3334  auto stdlog = STDLOG(query_state);
3335  auto& catalog = session_ptr->getCatalog();
3336 
3337  const auto execute_read_lock = mapd_shared_lock<mapd_shared_mutex>(
3340 
3341  if (catalog.getMetadataForTable(table_name_) == nullptr) {
3342  throw std::runtime_error("ITAS failed: table " + table_name_ + " does not exist.");
3343  }
3344 
3346  std::vector<std::string> tables;
3347 
3348  // get the table info
3349  auto calcite_mgr = catalog.getCalciteMgr();
3350 
3351  // TODO MAT this should actually get the global or the session parameter for
3352  // view optimization
3353  const auto result = calcite_mgr->process(query_state->createQueryStateProxy(),
3354  pg_shim(select_query_),
3355  {},
3356  true,
3357  false,
3358  false,
3359  true);
3360 
3361  for (auto& tab : result.resolved_accessed_objects.tables_selected_from) {
3362  tables.emplace_back(tab[0]);
3363  }
3364  tables.emplace_back(table_name_);
3365 
3366  // force sort into tableid order in case of name change to guarantee fixed order of
3367  // mutex access
3368  std::sort(tables.begin(),
3369  tables.end(),
3370  [&catalog](const std::string& a, const std::string& b) {
3371  return catalog.getMetadataForTable(a, false)->tableId <
3372  catalog.getMetadataForTable(b, false)->tableId;
3373  });
3374 
3375  tables.erase(unique(tables.begin(), tables.end()), tables.end());
3376  for (const auto& table : tables) {
3377  locks.emplace_back(
3380  catalog, table)));
3381  if (table == table_name_) {
3382  // Aquire an insert data lock for updates/deletes, consistent w/ insert. The
3383  // table data lock will be aquired in the fragmenter during checkpoint.
3384  locks.emplace_back(
3387  catalog.getDatabaseId(), (*locks.back())())));
3388  } else {
3389  locks.emplace_back(
3392  catalog.getDatabaseId(), (*locks.back())())));
3393  }
3394  }
3395 
3396  const TableDescriptor* td = catalog.getMetadataForTable(table_name_);
3397  try {
3398  populateData(query_state->createQueryStateProxy(), td, true, false);
3399  } catch (...) {
3400  throw;
3401  }
3402 }
3403 
3404 CreateTableAsSelectStmt::CreateTableAsSelectStmt(const rapidjson::Value& payload)
3405  : InsertIntoTableAsSelectStmt(payload) {
3406  if (payload.HasMember("temporary")) {
3407  is_temporary_ = json_bool(payload["temporary"]);
3408  } else {
3409  is_temporary_ = false;
3410  }
3411 
3412  if (payload.HasMember("ifNotExists")) {
3413  if_not_exists_ = json_bool(payload["ifNotExists"]);
3414  } else {
3415  if_not_exists_ = false;
3416  }
3417 
3418  CHECK(payload.HasMember("options"));
3419  if (payload["options"].IsObject()) {
3420  for (const auto& option : payload["options"].GetObject()) {
3421  auto option_name = std::make_unique<std::string>(json_str(option.name));
3422  std::unique_ptr<Literal> literal_value;
3423  if (option.value.IsString()) {
3424  auto literal_string = std::make_unique<std::string>(json_str(option.value));
3425  literal_value = std::make_unique<StringLiteral>(literal_string.release());
3426  } else if (option.value.IsInt() || option.value.IsInt64()) {
3427  literal_value = std::make_unique<IntLiteral>(json_i64(option.value));
3428  } else if (option.value.IsNull()) {
3429  literal_value = std::make_unique<NullLiteral>();
3430  } else {
3431  throw std::runtime_error("Unable to handle literal for " + *option_name);
3432  }
3433  CHECK(literal_value);
3434 
3435  storage_options_.emplace_back(std::make_unique<NameValueAssign>(
3436  option_name.release(), literal_value.release()));
3437  }
3438  } else {
3439  CHECK(payload["options"].IsNull());
3440  }
3441 }
3442 
3444  auto session_copy = session;
3445  auto session_ptr = std::shared_ptr<Catalog_Namespace::SessionInfo>(
3446  &session_copy, boost::null_deleter());
3447  auto query_state = query_state::QueryState::create(session_ptr, select_query_);
3448  auto stdlog = STDLOG(query_state);
3449  LocalConnector local_connector;
3450  auto& catalog = session.getCatalog();
3451  bool create_table = nullptr == leafs_connector_;
3452 
3453  std::set<std::string> select_tables;
3454  if (create_table) {
3455  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
3458 
3459  // check access privileges
3462  throw std::runtime_error("CTAS failed. Table " + table_name_ +
3463  " will not be created. User has no create privileges.");
3464  }
3465 
3466  if (catalog.getMetadataForTable(table_name_) != nullptr) {
3467  if (if_not_exists_) {
3468  return;
3469  }
3470  throw std::runtime_error("Table " + table_name_ +
3471  " already exists and no data was loaded.");
3472  }
3473 
3474  // get the table info
3475  auto calcite_mgr = catalog.getCalciteMgr();
3476 
3477  // TODO MAT this should actually get the global or the session parameter for
3478  // view optimization
3479  const auto result = calcite_mgr->process(query_state->createQueryStateProxy(),
3481  {},
3482  true,
3483  false,
3484  false,
3485  true);
3486 
3487  // TODO 12 Apr 2021 MAT schema change need to keep schema in future
3488  // just keeping it moving for now
3489  for (auto& tab : result.resolved_accessed_objects.tables_selected_from) {
3490  select_tables.insert(tab[0]);
3491  }
3492 
3493  // only validate the select query so we get the target types
3494  // correctly, but do not populate the result set
3495  // we currently have exclusive access to the system so this is safe
3496  auto validate_result = local_connector.query(
3497  query_state->createQueryStateProxy(), select_query_, {}, true, false);
3498 
3499  const auto column_descriptors_for_create =
3500  local_connector.getColumnDescriptors(validate_result, true);
3501 
3502  // some validation as the QE might return some out of range column types
3503  for (auto& cd : column_descriptors_for_create) {
3504  if (cd.columnType.is_decimal() && cd.columnType.get_precision() > 18) {
3505  throw std::runtime_error(cd.columnName + ": Precision too high, max 18.");
3506  }
3507  }
3508 
3509  TableDescriptor td;
3510  td.tableName = table_name_;
3511  td.userId = session.get_currentUser().userId;
3512  td.nColumns = column_descriptors_for_create.size();
3513  td.isView = false;
3514  td.fragmenter = nullptr;
3521  if (is_temporary_) {
3523  } else {
3525  }
3526 
3527  bool use_shared_dictionaries = true;
3528 
3529  if (!storage_options_.empty()) {
3530  for (auto& p : storage_options_) {
3531  if (boost::to_lower_copy<std::string>(*p->get_name()) ==
3532  "use_shared_dictionaries") {
3533  const StringLiteral* literal =
3534  dynamic_cast<const StringLiteral*>(p->get_value());
3535  if (nullptr == literal) {
3536  throw std::runtime_error(
3537  "USE_SHARED_DICTIONARIES must be a string parameter");
3538  }
3539  std::string val = boost::to_lower_copy<std::string>(*literal->get_stringval());
3540  use_shared_dictionaries = val == "true" || val == "1" || val == "t";
3541  } else {
3542  get_table_definitions_for_ctas(td, p, column_descriptors_for_create);
3543  }
3544  }
3545  }
3546 
3547  std::vector<SharedDictionaryDef> sharedDictionaryRefs;
3548 
3549  if (use_shared_dictionaries) {
3550  const auto source_column_descriptors =
3551  local_connector.getColumnDescriptors(validate_result, false);
3552  const auto mapping = catalog.getDictionaryToColumnMapping();
3553 
3554  for (auto& source_cd : source_column_descriptors) {
3555  const auto& ti = source_cd.columnType;
3556  if (ti.is_string()) {
3557  if (ti.get_compression() == kENCODING_DICT) {
3558  int dict_id = ti.get_comp_param();
3559  auto it = mapping.find(dict_id);
3560  if (mapping.end() != it) {
3561  const auto targetColumn = it->second;
3562  auto targetTable =
3563  catalog.getMetadataForTable(targetColumn->tableId, false);
3564  CHECK(targetTable);
3565  LOG(INFO) << "CTAS: sharing text dictionary on column "
3566  << source_cd.columnName << " with " << targetTable->tableName
3567  << "." << targetColumn->columnName;
3568  sharedDictionaryRefs.push_back(
3569  SharedDictionaryDef(source_cd.columnName,
3570  targetTable->tableName,
3571  targetColumn->columnName));
3572  }
3573  }
3574  }
3575  }
3576  }
3577 
3578  // currently no means of defining sharding in CTAS
3579  td.keyMetainfo = serialize_key_metainfo(nullptr, sharedDictionaryRefs);
3580 
3581  catalog.createTable(td, column_descriptors_for_create, sharedDictionaryRefs, true);
3582  // TODO (max): It's transactionally unsafe, should be fixed: we may create object
3583  // w/o privileges
3584  SysCatalog::instance().createDBObject(
3585  session.get_currentUser(), td.tableName, TableDBObjectType, catalog);
3586  }
3587 
3588  // note there is a time where we do not have any executor outer lock here. someone could
3589  // come along and mess with the data or other tables.
3590  const auto execute_read_lock = mapd_shared_lock<mapd_shared_mutex>(
3593 
3595  std::vector<std::string> tables;
3596  tables.insert(tables.end(), select_tables.begin(), select_tables.end());
3597  CHECK_EQ(tables.size(), select_tables.size());
3598  tables.emplace_back(table_name_);
3599  // force sort into tableid order in case of name change to guarantee fixed order of
3600  // mutex access
3601  std::sort(tables.begin(),
3602  tables.end(),
3603  [&catalog](const std::string& a, const std::string& b) {
3604  return catalog.getMetadataForTable(a, false)->tableId <
3605  catalog.getMetadataForTable(b, false)->tableId;
3606  });
3607  tables.erase(unique(tables.begin(), tables.end()), tables.end());
3608  for (const auto& table : tables) {
3609  locks.emplace_back(
3612  catalog, table)));
3613  if (table == table_name_) {
3614  // Aquire an insert data lock for updates/deletes, consistent w/ insert. The
3615  // table data lock will be aquired in the fragmenter during checkpoint.
3616  locks.emplace_back(
3619  catalog.getDatabaseId(), (*locks.back())())));
3620  } else {
3621  locks.emplace_back(
3624  catalog.getDatabaseId(), (*locks.back())())));
3625  }
3626  }
3627 
3628  const TableDescriptor* td = catalog.getMetadataForTable(table_name_);
3629 
3630  try {
3631  populateData(query_state->createQueryStateProxy(), td, false, true);
3632  } catch (...) {
3633  if (!g_cluster) {
3634  const TableDescriptor* created_td = catalog.getMetadataForTable(table_name_);
3635  if (created_td) {
3636  catalog.dropTable(created_td);
3637  }
3638  }
3639  throw;
3640  }
3641 }
3642 
3643 DropTableStmt::DropTableStmt(const rapidjson::Value& payload) {
3644  CHECK(payload.HasMember("tableName"));
3645  table_ = std::make_unique<std::string>(json_str(payload["tableName"]));
3646 
3647  if_exists_ = false;
3648  if (payload.HasMember("ifExists")) {
3649  if_exists_ = json_bool(payload["ifExists"]);
3650  }
3651 }
3652 
3654  auto& catalog = session.getCatalog();
3655 
3656  // TODO(adb): the catalog should be handling this locking.
3657  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
3660 
3661  const TableDescriptor* td{nullptr};
3662  std::unique_ptr<lockmgr::TableSchemaLockContainer<lockmgr::WriteLock>> td_with_lock;
3663 
3664  try {
3665  td_with_lock =
3666  std::make_unique<lockmgr::TableSchemaLockContainer<lockmgr::WriteLock>>(
3668  catalog, *table_, false));
3669  td = (*td_with_lock)();
3670  } catch (const std::runtime_error& e) {
3671  if (if_exists_) {
3672  return;
3673  } else {
3674  throw e;
3675  }
3676  }
3677 
3678  CHECK(td);
3679  CHECK(td_with_lock);
3680 
3681  // check access privileges
3682  if (!session.checkDBAccessPrivileges(
3684  throw std::runtime_error("Table " + *table_ +
3685  " will not be dropped. User has no proper privileges.");
3686  }
3687 
3689 
3690  auto table_data_write_lock =
3692  catalog.dropTable(td);
3693 
3694  // invalidate cached hashtable
3696 }
3697 
3699 
3700 void AlterTableStmt::delegateExecute(const rapidjson::Value& payload,
3701  const Catalog_Namespace::SessionInfo& session) {
3702  CHECK(payload.HasMember("tableName"));
3703  auto tableName = json_str(payload["tableName"]);
3704 
3705  CHECK(payload.HasMember("alterType"));
3706  auto type = json_str(payload["alterType"]);
3707 
3708  if (type == "RENAME_TABLE") {
3709  CHECK(payload.HasMember("newTableName"));
3710  auto newTableName = json_str(payload["newTableName"]);
3711  Parser::RenameTableStmt(new std::string(tableName), new std::string(newTableName))
3712  .execute(session);
3713 
3714  } else if (type == "RENAME_COLUMN") {
3715  CHECK(payload.HasMember("columnName"));
3716  auto columnName = json_str(payload["columnName"]);
3717  CHECK(payload.HasMember("newColumnName"));
3718  auto newColumnName = json_str(payload["newColumnName"]);
3719  Parser::RenameColumnStmt(new std::string(tableName),
3720  new std::string(columnName),
3721  new std::string(newColumnName))
3722  .execute(session);
3723 
3724  } else if (type == "ADD_COLUMN") {
3725  CHECK(payload.HasMember("columnData"));
3726  CHECK(payload["columnData"].IsArray());
3727 
3728  // New Columns go into this list
3729  std::list<ColumnDef*>* table_element_list_ = new std::list<ColumnDef*>;
3730 
3731  const auto elements = payload["columnData"].GetArray();
3732  for (const auto& element : elements) {
3733  CHECK(element.IsObject());
3734  CHECK(element.HasMember("type"));
3735  if (json_str(element["type"]) == "SQL_COLUMN_DECLARATION") {
3736  auto col_def = column_from_json(element);
3737  table_element_list_->emplace_back(col_def.release());
3738  } else {
3739  LOG(FATAL) << "Unsupported element type for ALTER TABLE: "
3740  << element["type"].GetString();
3741  }
3742  }
3743 
3744  Parser::AddColumnStmt(new std::string(tableName), table_element_list_)
3745  .execute(session);
3746 
3747  } else if (type == "DROP_COLUMN") {
3748  CHECK(payload.HasMember("columnData"));
3749  auto columnData = json_str(payload["columnData"]);
3750  // Convert columnData to std::list<std::string*>*
3751  // allocate std::list<> as DropColumnStmt will delete it;
3752  std::list<std::string*>* cols = new std::list<std::string*>;
3753  std::vector<std::string> cols1;
3754  boost::split(cols1, columnData, boost::is_any_of(","));
3755  for (auto s : cols1) {
3756  // strip leading/trailing spaces/quotes/single quotes
3757  boost::algorithm::trim_if(s, boost::is_any_of(" \"'`"));
3758  std::string* str = new std::string(s);
3759  cols->emplace_back(str);
3760  }
3761 
3762  Parser::DropColumnStmt(new std::string(tableName), cols).execute(session);
3763 
3764  } else if (type == "ALTER_OPTIONS") {
3765  CHECK(payload.HasMember("options"));
3766 
3767  if (payload["options"].IsObject()) {
3768  for (const auto& option : payload["options"].GetObject()) {
3769  std::string* option_name = new std::string(json_str(option.name));
3770  Literal* literal_value;
3771  if (option.value.IsString()) {
3772  std::string literal_string = json_str(option.value);
3773 
3774  // iff this string can be converted to INT
3775  // ... do so because it is necessary for AlterTableParamStmt
3776  std::size_t sz;
3777  int iVal = std::stoi(literal_string, &sz);
3778  if (sz == literal_string.size()) {
3779  literal_value = new IntLiteral(iVal);
3780  } else {
3781  literal_value = new StringLiteral(&literal_string);
3782  }
3783  } else if (option.value.IsInt() || option.value.IsInt64()) {
3784  literal_value = new IntLiteral(json_i64(option.value));
3785  } else if (option.value.IsNull()) {
3786  literal_value = new NullLiteral();
3787  } else {
3788  throw std::runtime_error("Unable to handle literal for " + *option_name);
3789  }
3790  CHECK(literal_value);
3791 
3792  NameValueAssign* p1 = new NameValueAssign(option_name, literal_value);
3793  Parser::AlterTableParamStmt(new std::string(tableName), p1).execute(session);
3794  }
3795 
3796  } else {
3797  CHECK(payload["options"].IsNull());
3798  }
3799  }
3800 }
3801 
3803  auto& catalog = session.getCatalog();
3804 
3805  // TODO: Removal of the FileMgr is not thread safe. Take a global system write lock
3806  // when truncating a table
3807  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
3810 
3811  const auto td_with_lock =
3813  catalog, *table_, true);
3814  const auto td = td_with_lock();
3815  if (!td) {
3816  throw std::runtime_error("Table " + *table_ + " does not exist.");
3817  }
3818 
3819  // check access privileges
3820  std::vector<DBObject> privObjects;
3821  DBObject dbObject(*table_, TableDBObjectType);
3822  dbObject.loadKey(catalog);
3824  privObjects.push_back(dbObject);
3825  if (!SysCatalog::instance().checkPrivileges(session.get_currentUser(), privObjects)) {
3826  throw std::runtime_error("Table " + *table_ + " will not be truncated. User " +
3827  session.get_currentUser().userLoggable() +
3828  " has no proper privileges.");
3829  }
3830 
3831  if (td->isView) {
3832  throw std::runtime_error(*table_ + " is a view. Cannot Truncate.");
3833  }
3835  auto table_data_write_lock =
3837  catalog.truncateTable(td);
3838 
3839  // invalidate cached hashtable
3841 }
3842 
3844  const TableDescriptor* td) {
3845  if (session.get_currentUser().isSuper ||
3846  session.get_currentUser().userId == td->userId) {
3847  return;
3848  }
3849  std::vector<DBObject> privObjects;
3850  DBObject dbObject(td->tableName, TableDBObjectType);
3851  dbObject.loadKey(session.getCatalog());
3853  privObjects.push_back(dbObject);
3854  if (!SysCatalog::instance().checkPrivileges(session.get_currentUser(), privObjects)) {
3855  throw std::runtime_error("Current user does not have the privilege to alter table: " +
3856  td->tableName);
3857  }
3858 }
3859 
3860 RenameUserStmt::RenameUserStmt(const rapidjson::Value& payload) {
3861  CHECK(payload.HasMember("name"));
3862  username_ = std::make_unique<std::string>(json_str(payload["name"]));
3863  CHECK(payload.HasMember("newName"));
3864  new_username_ = std::make_unique<std::string>(json_str(payload["newName"]));
3865 }
3866 
3868  if (!session.get_currentUser().isSuper) {
3869  throw std::runtime_error("Only a super user can rename users.");
3870  }
3871 
3873  if (!SysCatalog::instance().getMetadataForUser(*username_, user)) {
3874  throw std::runtime_error("User " + *username_ + " does not exist.");
3875  }
3876 
3877  SysCatalog::instance().renameUser(*username_, *new_username_);
3878 }
3879 
3880 RenameDBStmt::RenameDBStmt(const rapidjson::Value& payload) {
3881  CHECK(payload.HasMember("name"));
3882  database_name_ = std::make_unique<std::string>(json_str(payload["name"]));
3883  CHECK(payload.HasMember("newName"));
3884  new_database_name_ = std::make_unique<std::string>(json_str(payload["newName"]));
3885 }
3886 
3889 
3890  // TODO: use database lock instead
3891  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
3894 
3895  if (!SysCatalog::instance().getMetadataForDB(*database_name_, db)) {
3896  throw std::runtime_error("Database " + *database_name_ + " does not exist.");
3897  }
3898 
3899  if (!session.get_currentUser().isSuper &&
3900  session.get_currentUser().userId != db.dbOwner) {
3901  throw std::runtime_error("Only a super user or the owner can rename the database.");
3902  }
3903 
3904  SysCatalog::instance().renameDatabase(*database_name_, *new_database_name_);
3905 }
3906 
3907 RenameTableStmt::RenameTableStmt(const rapidjson::Value& payload) {
3908  CHECK(payload.HasMember("tableNames"));
3909  CHECK(payload["tableNames"].IsArray());
3910  const auto elements = payload["tableNames"].GetArray();
3911  for (const auto& element : elements) {
3912  CHECK(element.HasMember("name"));
3913  CHECK(element.HasMember("newName"));
3914  tablesToRename_.emplace_back(new std::string(json_str(element["name"])),
3915  new std::string(json_str(element["newName"])));
3916  }
3917 }
3918 
3919 RenameTableStmt::RenameTableStmt(std::string* tab_name, std::string* new_tab_name) {
3920  tablesToRename_.emplace_back(tab_name, new_tab_name);
3921 }
3922 
3924  std::list<std::pair<std::string, std::string>> tableNames) {
3925  for (auto item : tableNames) {
3926  tablesToRename_.emplace_back(new std::string(item.first),
3927  new std::string(item.second));
3928  }
3929 }
3930 
3931 using SubstituteMap = std::map<std::string, std::string>;
3932 
3933 // Namespace fns used to track a left-to-right execution of RENAME TABLE
3934 // and verify that the command should be (entirely/mostly) valid
3935 //
3936 namespace {
3937 
3938 static constexpr char const* EMPTY_NAME{""};
3939 
3940 std::string generateUniqueTableName(std::string name) {
3941  // TODO - is there a "better" way to create a tmp name for the table
3942  std::time_t result = std::time(nullptr);
3943  return name + "_tmp" + std::to_string(result);
3944 }
3945 
3946 void recordRename(SubstituteMap& sMap, std::string oldName, std::string newName) {
3947  sMap[oldName] = newName;
3948 }
3949 
3951  SubstituteMap& sMap,
3952  std::string tableName) {
3953  if (sMap.find(tableName) != sMap.end()) {
3954  if (sMap[tableName] == EMPTY_NAME)
3955  return tableName;
3956  return sMap[tableName];
3957  } else {
3958  // lookup table in src catalog
3959  const TableDescriptor* td = catalog.getMetadataForTable(tableName);
3960  if (td) {
3961  sMap[tableName] = tableName;
3962  } else {
3963  sMap[tableName] = EMPTY_NAME;
3964  }
3965  }
3966  return tableName;
3967 }
3968 
3969 bool hasData(SubstituteMap& sMap, std::string tableName) {
3970  // assumes loadTable has been previously called
3971  return (sMap[tableName] != EMPTY_NAME);
3972 }
3973 
3975  // Substition map should be clean at end of rename:
3976  // all items in map must (map to self) or (map to EMPTY_STRING) by end
3977 
3978  for (auto it : sMap) {
3979  if ((it.second) != EMPTY_NAME && (it.first) != (it.second)) {
3980  throw std::runtime_error(
3981  "Error: Attempted to overwrite and lose data in table: \'" + (it.first) + "\'");
3982  }
3983  }
3984 }
3985 } // namespace
3986 
3988  auto& catalog = session.getCatalog();
3989 
3990  // TODO(adb): the catalog should be handling this locking (see AddColumStmt)
3991  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
3994 
3995  // accumulated vector of table names: oldName->newName
3996  std::vector<std::pair<std::string, std::string>> names;
3997 
3998  SubstituteMap tableSubtituteMap;
3999 
4000  for (auto& item : tablesToRename_) {
4001  std::string curTableName = *(item.first);
4002  std::string newTableName = *(item.second);
4003 
4004  // Note: if rename (a->b, b->a)
4005  // requires a tmp name change (a->tmp, b->a, tmp->a),
4006  // inject that here because
4007  // catalog.renameTable() assumes cleanliness else will fail
4008 
4009  std::string altCurTableName = loadTable(catalog, tableSubtituteMap, curTableName);
4010  std::string altNewTableName = loadTable(catalog, tableSubtituteMap, newTableName);
4011 
4012  if (altCurTableName != curTableName && altCurTableName != EMPTY_NAME) {
4013  // rename is a one-shot deal, reset the mapping once used
4014  recordRename(tableSubtituteMap, curTableName, curTableName);
4015  }
4016 
4017  // Check to see if the command (as-entered) will likely execute cleanly (logic-wise)
4018  // src tables exist before coping from
4019  // destination table collisions
4020  // handled (a->b, b->a)
4021  // or flagged (pre-existing a,b ... "RENAME TABLE a->c, b->c" )
4022  // handle mulitple chained renames, tmp names (a_>tmp, b->a, tmp->a)
4023  // etc.
4024  //
4025  if (hasData(tableSubtituteMap, altCurTableName)) {
4026  const TableDescriptor* td = catalog.getMetadataForTable(altCurTableName);
4027  if (td) {
4028  // any table that pre-exists must pass these tests
4030  check_alter_table_privilege(session, td);
4031  }
4032 
4033  if (hasData(tableSubtituteMap, altNewTableName)) {
4034  std::string tmpNewTableName = generateUniqueTableName(altNewTableName);
4035  // rename: newTableName to tmpNewTableName to get it out of the way
4036  // because it was full
4037  recordRename(tableSubtituteMap, altCurTableName, EMPTY_NAME);
4038  recordRename(tableSubtituteMap, altNewTableName, tmpNewTableName);
4039  recordRename(tableSubtituteMap, tmpNewTableName, tmpNewTableName);
4040  names.push_back(
4041  std::pair<std::string, std::string>(altNewTableName, tmpNewTableName));
4042  names.push_back(
4043  std::pair<std::string, std::string>(altCurTableName, altNewTableName));
4044  } else {
4045  // rename: curNewTableName to newTableName
4046  recordRename(tableSubtituteMap, altCurTableName, EMPTY_NAME);
4047  recordRename(tableSubtituteMap, altNewTableName, altNewTableName);
4048  names.push_back(
4049  std::pair<std::string, std::string>(altCurTableName, altNewTableName));
4050  }
4051  } else {
4052  throw std::runtime_error("Source table \'" + curTableName + "\' does not exist.");
4053  }
4054  }
4055  checkNameSubstition(tableSubtituteMap);
4056 
4057  catalog.renameTable(names);
4058 
4059  // just to be explicit, clean out the list, the unique_ptr will delete
4060  while (!tablesToRename_.empty()) {
4061  tablesToRename_.pop_front();
4062  }
4063 } // namespace Parser
4064 
4066  bool not_null;
4067  const ColumnConstraintDef* cc = coldef->get_column_constraint();
4068  if (cc == nullptr) {
4069  not_null = false;
4070  } else {
4071  not_null = cc->get_notnull();
4072  }
4073  std::string default_value;
4074  const std::string* default_value_ptr = nullptr;
4075  if (cc) {
4076  if (auto def_val_literal = cc->get_defaultval()) {
4077  auto defaultsp = dynamic_cast<const StringLiteral*>(def_val_literal);
4078  default_value =
4079  defaultsp ? *defaultsp->get_stringval() : def_val_literal->to_string();
4080  // The preprocessing below is needed because:
4081  // a) TypedImportBuffer expects arrays in the {...} format
4082  // b) TypedImportBuffer expects string literals inside arrays w/o any quotes
4083  if (coldef->get_column_type()->get_is_array()) {
4084  std::regex array_re(R"(^ARRAY\s*\[(.*)\]$)", std::regex_constants::icase);
4085  default_value = std::regex_replace(default_value, array_re, "{$1}");
4086  boost::erase_all(default_value, "\'");
4087  }
4088  // NULL for default value means no default value
4089  if (!boost::iequals(default_value, "NULL")) {
4090  default_value_ptr = &default_value;
4091  }
4092  }
4093  }
4095  cd,
4096  coldef->get_column_type(),
4097  not_null,
4098  coldef->get_compression(),
4099  default_value_ptr);
4100 }
4101 
4103  const TableDescriptor* td) {
4104  auto& catalog = session.getCatalog();
4105  if (!td) {
4106  throw std::runtime_error("Table " + *table_ + " does not exist.");
4107  } else {
4108  if (td->isView) {
4109  throw std::runtime_error("Adding columns to a view is not supported.");
4110  }
4112  if (table_is_temporary(td)) {
4113  throw std::runtime_error(
4114  "Adding columns to temporary tables is not yet supported.");
4115  }
4116  };
4117 
4118  check_alter_table_privilege(session, td);
4119 
4120  if (0 == coldefs_.size()) {
4121  coldefs_.push_back(std::move(coldef_));
4122  }
4123 
4124  for (const auto& coldef : coldefs_) {
4125  auto& new_column_name = *coldef->get_column_name();
4126  if (catalog.getMetadataForColumn(td->tableId, new_column_name) != nullptr) {
4127  throw std::runtime_error("Column " + new_column_name + " already exists.");
4128  }
4129  }
4130 }
4131 
4133  auto& catalog = session.getCatalog();
4134 
4135  // TODO(adb): the catalog should be handling this locking.
4136  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
4139 
4140  const auto td_with_lock =
4142  catalog, *table_, true);
4143  const auto td = td_with_lock();
4144 
4145  check_executable(session, td);
4146 
4147  CHECK(td->fragmenter);
4148  if (std::dynamic_pointer_cast<Fragmenter_Namespace::SortedOrderFragmenter>(
4149  td->fragmenter)) {
4150  throw std::runtime_error(
4151  "Adding columns to a table is not supported when using the \"sort_column\" "
4152  "option.");
4153  }
4154 
4155  // Do not take a data write lock, as the fragmenter may call `deleteFragments`
4156  // during a cap operation. Note that the schema write lock will prevent concurrent
4157  // inserts along with all other queries.
4158 
4159  catalog.getSqliteConnector().query("BEGIN TRANSACTION");
4160  try {
4161  std::map<const std::string, const ColumnDescriptor> cds;
4162  std::map<const int, const ColumnDef*> cid_coldefs;
4163  for (const auto& coldef : coldefs_) {
4164  ColumnDescriptor cd;
4165  setColumnDescriptor(cd, coldef.get());
4166  catalog.addColumn(*td, cd);
4167  cds.emplace(*coldef->get_column_name(), cd);
4168  cid_coldefs.emplace(cd.columnId, coldef.get());
4169 
4170  // expand geo column to phy columns
4171  if (cd.columnType.is_geometry()) {
4172  std::list<ColumnDescriptor> phy_geo_columns;
4173  catalog.expandGeoColumn(cd, phy_geo_columns);
4174  for (auto& cd : phy_geo_columns) {
4175  catalog.addColumn(*td, cd);
4176  cds.emplace(cd.columnName, cd);
4177  cid_coldefs.emplace(cd.columnId, nullptr);
4178  }
4179  }
4180  }
4181 
4182  std::unique_ptr<import_export::Loader> loader(new import_export::Loader(catalog, td));
4183  std::vector<std::unique_ptr<import_export::TypedImportBuffer>> import_buffers;
4184  for (const auto& cd : cds) {
4185  import_buffers.emplace_back(std::make_unique<import_export::TypedImportBuffer>(
4186  &cd.second, loader->getStringDict(&cd.second)));
4187  }
4188  loader->setAddingColumns(true);
4189 
4190  // set_geo_physical_import_buffer below needs a sorted import_buffers
4191  std::sort(import_buffers.begin(),
4192  import_buffers.end(),
4193  [](decltype(import_buffers[0])& a, decltype(import_buffers[0])& b) {
4194  return a->getColumnDesc()->columnId < b->getColumnDesc()->columnId;
4195  });
4196 
4197  size_t nrows = td->fragmenter->getNumRows();
4198  // if sharded, get total nrows from all sharded tables
4199  if (td->nShards > 0) {
4200  const auto physical_tds = catalog.getPhysicalTablesDescriptors(td);
4201  nrows = 0;
4202  std::for_each(physical_tds.begin(), physical_tds.end(), [&nrows](const auto& td) {
4203  nrows += td->fragmenter->getNumRows();
4204  });
4205  }
4206  if (nrows > 0) {
4207  int skip_physical_cols = 0;
4208  for (const auto cit : cid_coldefs) {
4209  const auto cd = catalog.getMetadataForColumn(td->tableId, cit.first);
4210  const auto coldef = cit.second;
4211  const bool is_null = !cd->default_value.has_value();
4212 
4213  if (cd->columnType.get_notnull() && is_null) {
4214  throw std::runtime_error("Default value required for column " + cd->columnName +
4215  " because of NOT NULL constraint");
4216  }
4217 
4218  for (auto it = import_buffers.begin(); it < import_buffers.end(); ++it) {
4219  auto& import_buffer = *it;
4220  if (cd->columnId == import_buffer->getColumnDesc()->columnId) {
4221  if (coldef != nullptr ||
4222  skip_physical_cols-- <= 0) { // skip non-null phy col
4223  import_buffer->add_value(cd,
4224  cd->default_value.value_or("NULL"),
4225  is_null,
4227  if (cd->columnType.is_geometry()) {
4228  std::vector<double> coords, bounds;
4229  std::vector<int> ring_sizes, poly_rings;
4230  int render_group = 0;
4231  SQLTypeInfo tinfo{cd->columnType};
4233  cd->default_value.value_or("NULL"),
4234  tinfo,
4235  coords,
4236  bounds,
4237  ring_sizes,
4238  poly_rings,
4239  false)) {
4240  throw std::runtime_error("Bad geometry data: '" +
4241  cd->default_value.value_or("NULL") + "'");
4242  }
4243  size_t col_idx = 1 + std::distance(import_buffers.begin(), it);
4245  cd,
4246  import_buffers,
4247  col_idx,
4248  coords,
4249  bounds,
4250  ring_sizes,
4251  poly_rings,
4252  render_group);
4253  // skip following phy cols
4254  skip_physical_cols = cd->columnType.get_physical_cols();
4255  }
4256  }
4257  break;
4258  }
4259  }
4260  }
4261  }
4262 
4263  if (!loader->loadNoCheckpoint(import_buffers, nrows, &session)) {
4264  throw std::runtime_error("loadNoCheckpoint failed!");
4265  }
4266  catalog.roll(true);
4267  loader->checkpoint();
4268  catalog.getSqliteConnector().query("END TRANSACTION");
4269  } catch (...) {
4270  catalog.roll(false);
4271  catalog.getSqliteConnector().query("ROLLBACK TRANSACTION");
4272  throw;
4273  }
4274 }
4275 
4277  auto& catalog = session.getCatalog();
4278 
4279  // TODO(adb): the catalog should be handling this locking.
4280  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
4283 
4284  const auto td_with_lock =
4286  catalog, *table_, true);
4287  const auto td = td_with_lock();
4288  if (!td) {
4289  throw std::runtime_error("Table " + *table_ + " does not exist.");
4290  }
4292  if (td->isView) {
4293  throw std::runtime_error("Dropping a column from a view is not supported.");
4294  }
4295  if (table_is_temporary(td)) {
4296  throw std::runtime_error(
4297  "Dropping a column from a temporary table is not yet supported.");
4298  }
4299 
4300  check_alter_table_privilege(session, td);
4301 
4302  for (const auto& column : columns_) {
4303  if (nullptr == catalog.getMetadataForColumn(td->tableId, *column)) {
4304  throw std::runtime_error("Column " + *column + " does not exist.");
4305  }
4306  }
4307 
4308  if (td->nColumns <= (td->hasDeletedCol ? 3 : 2)) {
4309  throw std::runtime_error("Table " + *table_ + " has only one column.");
4310  }
4311 
4312  catalog.getSqliteConnector().query("BEGIN TRANSACTION");
4313  try {
4314  std::vector<int> columnIds;
4315  for (const auto& column : columns_) {
4316  ColumnDescriptor cd = *catalog.getMetadataForColumn(td->tableId, *column);
4317  if (td->nShards > 0 && td->shardedColumnId == cd.columnId) {
4318  throw std::runtime_error("Dropping sharding column " + cd.columnName +
4319  " is not supported.");
4320  }
4321  catalog.dropColumn(*td, cd);
4322  columnIds.push_back(cd.columnId);
4323  for (int i = 0; i < cd.columnType.get_physical_cols(); i++) {
4324  const auto pcd = catalog.getMetadataForColumn(td->tableId, cd.columnId + i + 1);
4325  CHECK(pcd);
4326  catalog.dropColumn(*td, *pcd);
4327  columnIds.push_back(cd.columnId + i + 1);
4328  }
4329  }
4330 
4331  for (auto shard : catalog.getPhysicalTablesDescriptors(td)) {
4332  shard->fragmenter->dropColumns(columnIds);
4333  }
4334  // if test forces to rollback
4336  throw std::runtime_error("lol!");
4337  }
4338  catalog.roll(true);
4340  catalog.checkpoint(td->tableId);
4341  }
4342  catalog.getSqliteConnector().query("END TRANSACTION");
4343  } catch (...) {
4344  catalog.setForReload(td->tableId);
4345  catalog.roll(false);
4346  catalog.getSqliteConnector().query("ROLLBACK TRANSACTION");
4347  throw;
4348  }
4349 
4350  // invalidate cached hashtable
4352 }
4353 
4355  auto& catalog = session.getCatalog();
4356 
4357  const auto td_with_lock =
4359  catalog, *table_, false);
4360  const auto td = td_with_lock();
4361  CHECK(td);
4363 
4364  check_alter_table_privilege(session, td);
4365  const ColumnDescriptor* cd = catalog.getMetadataForColumn(td->tableId, *column_);
4366  if (cd == nullptr) {
4367  throw std::runtime_error("Column " + *column_ + " does not exist.");
4368  }
4369  if (catalog.getMetadataForColumn(td->tableId, *new_column_name_) != nullptr) {
4370  throw std::runtime_error("Column " + *new_column_name_ + " already exists.");
4371  }
4372  catalog.renameColumn(td, cd, *new_column_name_);
4373 }
4374 
4376  enum TableParamType { MaxRollbackEpochs, Epoch, MaxRows };
4377  static const std::unordered_map<std::string, TableParamType> param_map = {
4378  {"max_rollback_epochs", TableParamType::MaxRollbackEpochs},
4379  {"epoch", TableParamType::Epoch},
4380  {"max_rows", TableParamType::MaxRows}};
4381  // Below is to ensure that executor is not currently executing on table when we might be
4382  // changing it's storage. Question: will/should catalog write lock take care of this?
4383  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
4386  auto& catalog = session.getCatalog();
4387  const auto td_with_lock =
4389  catalog, *table_, false);
4390  const auto td = td_with_lock();
4391  if (!td) {
4392  throw std::runtime_error("Table " + *table_ + " does not exist.");
4393  }
4394  if (td->isView) {
4395  throw std::runtime_error("Setting parameters for a view is not supported.");
4396  }
4397  if (table_is_temporary(td)) {
4398  throw std::runtime_error(
4399  "Setting parameters for a temporary table is not yet supported.");
4400  }
4401  check_alter_table_privilege(session, td);
4402 
4403  std::string param_name(*param_->get_name());
4404  boost::algorithm::to_lower(param_name);
4405  const IntLiteral* val_int_literal =
4406  dynamic_cast<const IntLiteral*>(param_->get_value());
4407  if (val_int_literal == nullptr) {
4408  throw std::runtime_error("Table parameters should be integers.");
4409  }
4410  const int64_t param_val = val_int_literal->get_intval();
4411 
4412  const auto param_it = param_map.find(param_name);
4413  if (param_it == param_map.end()) {
4414  throw std::runtime_error(param_name + " is not a settable table parameter.");
4415  }
4416  switch (param_it->second) {
4417  case MaxRollbackEpochs: {
4418  catalog.setMaxRollbackEpochs(td->tableId, param_val);
4419  break;
4420  }
4421  case Epoch: {
4422  catalog.setTableEpoch(catalog.getDatabaseId(), td->tableId, param_val);
4423  break;
4424  }
4425  case MaxRows: {
4426  catalog.setMaxRows(td->tableId, param_val);
4427  break;
4428  }
4429  default: {
4430  UNREACHABLE() << "Unexpected TableParamType value: " << param_it->second
4431  << ", key: " << param_it->first;
4432  }
4433  }
4434 }
4435 
4437  auto importer_factory = [](Catalog_Namespace::Catalog& catalog,
4438  const TableDescriptor* td,
4439  const std::string& file_path,
4440  const import_export::CopyParams& copy_params) {
4441  return std::make_unique<import_export::Importer>(catalog, td, file_path, copy_params);
4442  };
4443  return execute(session, importer_factory);
4444 }
4445 
4447  const std::function<std::unique_ptr<import_export::Importer>(
4449  const TableDescriptor*,
4450  const std::string&,
4451  const import_export::CopyParams&)>& importer_factory) {
4452  boost::regex non_local_file_regex{R"(^\s*(s3|http|https)://.+)",
4453  boost::regex::extended | boost::regex::icase};
4454  if (!boost::regex_match(*file_pattern_, non_local_file_regex)) {
4457  }
4458 
4459  size_t total_time = 0;
4460 
4461  // Prevent simultaneous import / truncate (see TruncateTableStmt::execute)
4462  const auto execute_read_lock = mapd_shared_lock<mapd_shared_mutex>(
4465 
4466  const TableDescriptor* td{nullptr};
4467  std::unique_ptr<lockmgr::TableSchemaLockContainer<lockmgr::ReadLock>> td_with_lock;
4468  std::unique_ptr<lockmgr::WriteLock> insert_data_lock;
4469 
4470  auto& catalog = session.getCatalog();
4471 
4472  try {
4473  td_with_lock = std::make_unique<lockmgr::TableSchemaLockContainer<lockmgr::ReadLock>>(
4475  catalog, *table_));
4476  td = (*td_with_lock)();
4477  insert_data_lock = std::make_unique<lockmgr::WriteLock>(
4479  } catch (const std::runtime_error& e) {
4480  // noop
4481  // TODO(adb): We're really only interested in whether the table exists or not.
4482  // Create a more refined exception.
4483  }
4484 
4485  // if the table already exists, it's locked, so check access privileges
4486  if (td) {
4487  std::vector<DBObject> privObjects;
4488  DBObject dbObject(*table_, TableDBObjectType);
4489  dbObject.loadKey(catalog);
4491  privObjects.push_back(dbObject);
4492  if (!SysCatalog::instance().checkPrivileges(session.get_currentUser(), privObjects)) {
4493  throw std::runtime_error("Violation of access privileges: user " +
4494  session.get_currentUser().userLoggable() +
4495  " has no insert privileges for table " + *table_ + ".");
4496  }
4497  }
4498 
4499  // since we'll have not only posix file names but also s3/hdfs/... url
4500  // we do not expand wildcard or check file existence here.
4501  // from here on, file_path contains something which may be a url
4502  // or a wildcard of file names;
4503  std::string file_path = *file_pattern_;
4504  import_export::CopyParams copy_params;
4505  if (!options_.empty()) {
4506  for (auto& p : options_) {
4507  if (boost::iequals(*p->get_name(), "max_reject")) {
4508  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
4509  if (int_literal == nullptr) {
4510  throw std::runtime_error("max_reject option must be an integer.");
4511  }
4512  copy_params.max_reject = int_literal->get_intval();
4513  } else if (boost::iequals(*p->get_name(), "buffer_size")) {
4514  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
4515  if (int_literal == nullptr) {
4516  throw std::runtime_error("buffer_size option must be an integer.");
4517  }
4518  copy_params.buffer_size = int_literal->get_intval();
4519  } else if (boost::iequals(*p->get_name(), "threads")) {
4520  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
4521  if (int_literal == nullptr) {
4522  throw std::runtime_error("Threads option must be an integer.");
4523  }
4524  copy_params.threads = int_literal->get_intval();
4525  } else if (boost::iequals(*p->get_name(), "delimiter")) {
4526  const StringLiteral* str_literal =
4527  dynamic_cast<const StringLiteral*>(p->get_value());
4528  if (str_literal == nullptr) {
4529  throw std::runtime_error("Delimiter option must be a string.");
4530  } else if (str_literal->get_stringval()->length() != 1) {
4531  throw std::runtime_error("Delimiter must be a single character string.");
4532  }
4533  copy_params.delimiter = (*str_literal->get_stringval())[0];
4534  } else if (boost::iequals(*p->get_name(), "nulls")) {
4535  const StringLiteral* str_literal =
4536  dynamic_cast<const StringLiteral*>(p->get_value());
4537  if (str_literal == nullptr) {
4538  throw std::runtime_error("Nulls option must be a string.");
4539  }
4540  copy_params.null_str = *str_literal->get_stringval();
4541  } else if (boost::iequals(*p->get_name(), "header")) {
4542  const StringLiteral* str_literal =
4543  dynamic_cast<const StringLiteral*>(p->get_value());
4544  if (str_literal == nullptr) {
4545  throw std::runtime_error("Header option must be a boolean.");
4546  }
4547  copy_params.has_header = bool_from_string_literal(str_literal)
4550 #ifdef ENABLE_IMPORT_PARQUET
4551  } else if (boost::iequals(*p->get_name(), "parquet")) {
4552  const StringLiteral* str_literal =
4553  dynamic_cast<const StringLiteral*>(p->get_value());
4554  if (str_literal == nullptr) {
4555  throw std::runtime_error("Parquet option must be a boolean.");
4556  }
4557  if (bool_from_string_literal(str_literal)) {
4558  // not sure a parquet "table" type is proper, but to make code
4559  // look consistent in some places, let's set "table" type too
4560  copy_params.file_type = import_export::FileType::PARQUET;
4561  }
4562 #endif // ENABLE_IMPORT_PARQUET
4563  } else if (boost::iequals(*p->get_name(), "s3_access_key")) {
4564  const StringLiteral* str_literal =
4565  dynamic_cast<const StringLiteral*>(p->get_value());
4566  if (str_literal == nullptr) {
4567  throw std::runtime_error("Option s3_access_key must be a string.");
4568  }
4569  copy_params.s3_access_key = *str_literal->get_stringval();
4570  } else if (boost::iequals(*p->get_name(), "s3_secret_key")) {
4571  const StringLiteral* str_literal =
4572  dynamic_cast<const StringLiteral*>(p->get_value());
4573  if (str_literal == nullptr) {
4574  throw std::runtime_error("Option s3_secret_key must be a string.");
4575  }
4576  copy_params.s3_secret_key = *str_literal->get_stringval();
4577  } else if (boost::iequals(*p->get_name(), "s3_session_token")) {
4578  const StringLiteral* str_literal =
4579  dynamic_cast<const StringLiteral*>(p->get_value());
4580  if (str_literal == nullptr) {
4581  throw std::runtime_error("Option s3_session_token must be a string.");
4582  }
4583  copy_params.s3_session_token = *str_literal->get_stringval();
4584  } else if (boost::iequals(*p->get_name(), "s3_region")) {
4585  const StringLiteral* str_literal =
4586  dynamic_cast<const StringLiteral*>(p->get_value());
4587  if (str_literal == nullptr) {
4588  throw std::runtime_error("Option s3_region must be a string.");
4589  }
4590  copy_params.s3_region = *str_literal->get_stringval();
4591  } else if (boost::iequals(*p->get_name(), "s3_endpoint")) {
4592  const StringLiteral* str_literal =
4593  dynamic_cast<const StringLiteral*>(p->get_value());
4594  if (str_literal == nullptr) {
4595  throw std::runtime_error("Option s3_endpoint must be a string.");
4596  }
4597  copy_params.s3_endpoint = *str_literal->get_stringval();
4598  } else if (boost::iequals(*p->get_name(), "quote")) {
4599  const StringLiteral* str_literal =
4600  dynamic_cast<const StringLiteral*>(p->get_value());
4601  if (str_literal == nullptr) {
4602  throw std::runtime_error("Quote option must be a string.");
4603  } else if (str_literal->get_stringval()->length() != 1) {
4604  throw std::runtime_error("Quote must be a single character string.");
4605  }
4606  copy_params.quote = (*str_literal->get_stringval())[0];
4607  } else if (boost::iequals(*p->get_name(), "escape")) {
4608  const StringLiteral* str_literal =
4609  dynamic_cast<const StringLiteral*>(p->get_value());
4610  if (str_literal == nullptr) {
4611  throw std::runtime_error("Escape option must be a string.");
4612  } else if (str_literal->get_stringval()->length() != 1) {
4613  throw std::runtime_error("Escape must be a single character string.");
4614  }
4615  copy_params.escape = (*str_literal->get_stringval())[0];
4616  } else if (boost::iequals(*p->get_name(), "line_delimiter")) {
4617  const StringLiteral* str_literal =
4618  dynamic_cast<const StringLiteral*>(p->get_value());
4619  if (str_literal == nullptr) {
4620  throw std::runtime_error("Line_delimiter option must be a string.");
4621  } else if (str_literal->get_stringval()->length() != 1) {
4622  throw std::runtime_error("Line_delimiter must be a single character string.");
4623  }
4624  copy_params.line_delim = (*str_literal->get_stringval())[0];
4625  } else if (boost::iequals(*p->get_name(), "quoted")) {
4626  const StringLiteral* str_literal =
4627  dynamic_cast<const StringLiteral*>(p->get_value());
4628  if (str_literal == nullptr) {
4629  throw std::runtime_error("Quoted option must be a boolean.");
4630  }
4631  copy_params.quoted = bool_from_string_literal(str_literal);
4632  } else if (boost::iequals(*p->get_name(), "plain_text")) {
4633  const StringLiteral* str_literal =
4634  dynamic_cast<const StringLiteral*>(p->get_value());
4635  if (str_literal == nullptr) {
4636  throw std::runtime_error("plain_text option must be a boolean.");
4637  }
4638  copy_params.plain_text = bool_from_string_literal(str_literal);
4639  } else if (boost::iequals(*p->get_name(), "array_marker")) {
4640  const StringLiteral* str_literal =
4641  dynamic_cast<const StringLiteral*>(p->get_value());
4642  if (str_literal == nullptr) {
4643  throw std::runtime_error("Array Marker option must be a string.");
4644  } else if (str_literal->get_stringval()->length() != 2) {
4645  throw std::runtime_error(
4646  "Array Marker option must be exactly two characters. Default is {}.");
4647  }
4648  copy_params.array_begin = (*str_literal->get_stringval())[0];
4649  copy_params.array_end = (*str_literal->get_stringval())[1];
4650  } else if (boost::iequals(*p->get_name(), "array_delimiter")) {
4651  const StringLiteral* str_literal =
4652  dynamic_cast<const StringLiteral*>(p->get_value());
4653  if (str_literal == nullptr) {
4654  throw std::runtime_error("Array Delimiter option must be a string.");
4655  } else if (str_literal->get_stringval()->length() != 1) {
4656  throw std::runtime_error("Array Delimiter must be a single character string.");
4657  }
4658  copy_params.array_delim = (*str_literal->get_stringval())[0];
4659  } else if (boost::iequals(*p->get_name(), "lonlat")) {
4660  const StringLiteral* str_literal =
4661  dynamic_cast<const StringLiteral*>(p->get_value());
4662  if (str_literal == nullptr) {
4663  throw std::runtime_error("Lonlat option must be a boolean.");
4664  }
4665  copy_params.lonlat = bool_from_string_literal(str_literal);
4666  } else if (boost::iequals(*p->get_name(), "geo")) {
4667  const StringLiteral* str_literal =
4668  dynamic_cast<const StringLiteral*>(p->get_value());
4669  if (str_literal == nullptr) {
4670  throw std::runtime_error("Geo option must be a boolean.");
4671  }
4672  copy_params.file_type = bool_from_string_literal(str_literal)
4675  } else if (boost::iequals(*p->get_name(), "geo_coords_type")) {
4676  const StringLiteral* str_literal =
4677  dynamic_cast<const StringLiteral*>(p->get_value());
4678  if (str_literal == nullptr) {
4679  throw std::runtime_error("'geo_coords_type' option must be a string");
4680  }
4681  const std::string* s = str_literal->get_stringval();
4682  if (boost::iequals(*s, "geography")) {
4683  throw std::runtime_error(
4684  "GEOGRAPHY coords type not yet supported. Please use GEOMETRY.");
4685  // copy_params.geo_coords_type = kGEOGRAPHY;
4686  } else if (boost::iequals(*s, "geometry")) {
4687  copy_params.geo_coords_type = kGEOMETRY;
4688  } else {
4689  throw std::runtime_error(
4690  "Invalid string for 'geo_coords_type' option (must be 'GEOGRAPHY' or "
4691  "'GEOMETRY'): " +
4692  *s);
4693  }
4694  } else if (boost::iequals(*p->get_name(), "geo_coords_encoding")) {
4695  const StringLiteral* str_literal =
4696  dynamic_cast<const StringLiteral*>(p->get_value());
4697  if (str_literal == nullptr) {
4698  throw std::runtime_error("'geo_coords_encoding' option must be a string");
4699  }
4700  const std::string* s = str_literal->get_stringval();
4701  if (boost::iequals(*s, "none")) {
4702  copy_params.geo_coords_encoding = kENCODING_NONE;
4703  copy_params.geo_coords_comp_param = 0;
4704  } else if (boost::iequals(*s, "compressed(32)")) {
4705  copy_params.geo_coords_encoding = kENCODING_GEOINT;
4706  copy_params.geo_coords_comp_param = 32;
4707  } else {
4708  throw std::runtime_error(
4709  "Invalid string for 'geo_coords_encoding' option (must be 'NONE' or "
4710  "'COMPRESSED(32)'): " +
4711  *s);
4712  }
4713  } else if (boost::iequals(*p->get_name(), "geo_coords_srid")) {
4714  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
4715  if (int_literal == nullptr) {
4716  throw std::runtime_error("'geo_coords_srid' option must be an integer");
4717  }
4718  const int srid = int_literal->get_intval();
4719  if (srid == 4326 || srid == 3857 || srid == 900913) {
4720  copy_params.geo_coords_srid = srid;
4721  } else {
4722  throw std::runtime_error(
4723  "Invalid value for 'geo_coords_srid' option (must be 4326, 3857, or "
4724  "900913): " +
4725  std::to_string(srid));
4726  }
4727  } else if (boost::iequals(*p->get_name(), "geo_layer_name")) {
4728  const StringLiteral* str_literal =
4729  dynamic_cast<const StringLiteral*>(p->get_value());
4730  if (str_literal == nullptr) {
4731  throw std::runtime_error("'geo_layer_name' option must be a string");
4732  }
4733  const std::string* layer_name = str_literal->get_stringval();
4734  if (layer_name) {
4735  copy_params.geo_layer_name = *layer_name;
4736  } else {
4737  throw std::runtime_error("Invalid value for 'geo_layer_name' option");
4738  }
4739  } else if (boost::iequals(*p->get_name(), "partitions")) {
4740  if (copy_params.file_type == import_export::FileType::POLYGON) {
4741  const auto partitions =
4742  static_cast<const StringLiteral*>(p->get_value())->get_stringval();
4743  CHECK(partitions);
4744  const auto partitions_uc = boost::to_upper_copy<std::string>(*partitions);
4745  if (partitions_uc != "REPLICATED") {
4746  throw std::runtime_error("PARTITIONS must be REPLICATED for geo COPY");
4747  }
4748  geo_copy_from_partitions_ = partitions_uc;
4749  } else {
4750  throw std::runtime_error("PARTITIONS option not supported for non-geo COPY: " +
4751  *p->get_name());
4752  }
4753  } else if (boost::iequals(*p->get_name(), "geo_assign_render_groups")) {
4754  const StringLiteral* str_literal =
4755  dynamic_cast<const StringLiteral*>(p->get_value());
4756  if (str_literal == nullptr) {
4757  throw std::runtime_error("geo_assign_render_groups option must be a boolean.");
4758  }
4759  copy_params.geo_assign_render_groups = bool_from_string_literal(str_literal);
4760  } else if (boost::iequals(*p->get_name(), "geo_explode_collections")) {
4761  const StringLiteral* str_literal =
4762  dynamic_cast<const StringLiteral*>(p->get_value());
4763  if (str_literal == nullptr) {
4764  throw std::runtime_error("geo_explode_collections option must be a boolean.");
4765  }
4766  copy_params.geo_explode_collections = bool_from_string_literal(str_literal);
4767  } else if (boost::iequals(*p->get_name(), "source_srid")) {
4768  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
4769  if (int_literal == nullptr) {
4770  throw std::runtime_error("'source_srid' option must be an integer");
4771  }
4772  const int srid = int_literal->get_intval();
4773  if (copy_params.file_type == import_export::FileType::DELIMITED) {
4774  copy_params.source_srid = srid;
4775  } else {
4776  throw std::runtime_error(
4777  "'source_srid' option can only be used on csv/tsv files");
4778  }
4779  } else {
4780  throw std::runtime_error("Invalid option for COPY: " + *p->get_name());
4781  }
4782  }
4783  }
4784 
4785  std::string tr;
4786  if (copy_params.file_type == import_export::FileType::POLYGON) {
4787  // geo import
4788  // we do nothing here, except stash the parameters so we can
4789  // do the import when we unwind to the top of the handler
4790  geo_copy_from_file_name_ = file_path;
4791  geo_copy_from_copy_params_ = copy_params;
4792  was_geo_copy_from_ = true;
4793 
4794  // the result string
4795  // @TODO simon.eves put something more useful in here
4796  // except we really can't because we haven't done the import yet!
4797  if (td) {
4798  tr = std::string("Appending geo to table '") + *table_ + std::string("'...");
4799  } else {
4800  tr = std::string("Creating table '") + *table_ +
4801  std::string("' and importing geo...");
4802  }
4803  } else {
4804  if (td) {
4805  CHECK(td_with_lock);
4806 
4807  // regular import
4808  auto importer = importer_factory(catalog, td, file_path, copy_params);
4809  auto start_time = ::toString(std::chrono::system_clock::now());
4810  auto prev_table_epoch = importer->getLoader()->getTableEpochs();
4812  auto query_session = session.get_session_id();
4813  auto query_str = "COPYING " + td->tableName;
4815  executor->enrollQuerySession(query_session,
4816  query_str,
4817  start_time,
4819  QuerySessionStatus::QueryStatus::RUNNING);
4820  }
4821 
4822  ScopeGuard clearInterruptStatus =
4823  [executor, &query_str, &query_session, &start_time, &importer] {
4824  // reset the runtime query interrupt status
4826  executor->clearQuerySessionStatus(query_session, start_time, false);
4827  }
4828  };
4829  import_export::ImportStatus import_result;
4830  auto ms =
4831  measure<>::execution([&]() { import_result = importer->import(&session); });
4832  total_time += ms;
4833  // results
4834  if (!import_result.load_failed &&
4835  import_result.rows_rejected > copy_params.max_reject) {
4836  LOG(ERROR) << "COPY exited early due to reject records count during multi file "
4837  "processing ";
4838  // if we have crossed the truncated load threshold
4839  import_result.load_failed = true;
4840  import_result.load_msg =
4841  "COPY exited early due to reject records count during multi file "
4842  "processing ";
4843  success_ = false;
4844  }
4845  if (!import_result.load_failed) {
4846  tr = std::string(
4847  "Loaded: " + std::to_string(import_result.rows_completed) +
4848  " recs, Rejected: " + std::to_string(import_result.rows_rejected) +
4849  " recs in " + std::to_string((double)total_time / 1000.0) + " secs");
4850  } else {
4851  tr = std::string("Loader Failed due to : " + import_result.load_msg + " in " +
4852  std::to_string((double)total_time / 1000.0) + " secs");
4853  }
4854  } else {
4855  throw std::runtime_error("Table '" + *table_ + "' must exist before COPY FROM");
4856  }
4857  }
4858 
4859  return_message.reset(new std::string(tr));
4860  LOG(INFO) << tr;
4861 } // namespace Parser
4862 
4863 // CREATE ROLE payroll_dept_role;
4865  const auto& currentUser = session.get_currentUser();
4866  if (!currentUser.isSuper) {
4867  throw std::runtime_error("CREATE ROLE " + get_role() +
4868  " failed. It can only be executed by super user.");
4869  }
4870  SysCatalog::instance().createRole(get_role());
4871 }
4872 
4873 // DROP ROLE payroll_dept_role;
4875  const auto& currentUser = session.get_currentUser();
4876  if (!currentUser.isSuper) {
4877  throw std::runtime_error("DROP ROLE " + get_role() +
4878  " failed. It can only be executed by super user.");
4879  }
4880  auto* rl = SysCatalog::instance().getRoleGrantee(get_role());
4881  if (!rl) {
4882  throw std::runtime_error("DROP ROLE " + get_role() +
4883  " failed because role with this name does not exist.");
4884  }
4885  SysCatalog::instance().dropRole(get_role());
4886 }
4887 
4888 std::vector<std::string> splitObjectHierName(const std::string& hierName) {
4889  std::vector<std::string> componentNames;
4890  boost::split(componentNames, hierName, boost::is_any_of("."));
4891  return componentNames;
4892 }
4893 
4894 std::string extractObjectNameFromHierName(const std::string& objectHierName,
4895  const std::string& objectType,
4897  std::string objectName;
4898  std::vector<std::string> componentNames = splitObjectHierName(objectHierName);
4899  if (objectType.compare("DATABASE") == 0) {
4900  if (componentNames.size() == 1) {
4901  objectName = componentNames[0];
4902  } else {
4903  throw std::runtime_error("DB object name is not correct " + objectHierName);
4904  }
4905  } else {
4906  if (objectType.compare("TABLE") == 0 || objectType.compare("DASHBOARD") == 0 ||
4907  objectType.compare("VIEW") == 0 || objectType.compare("SERVER") == 0) {
4908  switch (componentNames.size()) {
4909  case (1): {
4910  objectName = componentNames[0];
4911  break;
4912  }
4913  case (2): {
4914  objectName = componentNames[1];
4915  break;
4916  }
4917  default: {
4918  throw std::runtime_error("DB object name is not correct " + objectHierName);
4919  }
4920  }
4921  } else {
4922  throw std::runtime_error("DB object type " + objectType + " is not supported.");
4923  }
4924  }
4925  return objectName;
4926 }
4927 
4928 static std::pair<AccessPrivileges, DBObjectType> parseStringPrivs(
4929  const std::string& privs,
4930  const DBObjectType& objectType,
4931  const std::string& object_name) {
4932  static const std::map<std::pair<const std::string, const DBObjectType>,
4933  std::pair<const AccessPrivileges, const DBObjectType>>
4934  privileges_lookup{
4935  {{"ALL"s, DatabaseDBObjectType},
4938  {{"ALL"s, DashboardDBObjectType},
4941  {{"ALL"s, ServerDBObjectType},
4943 
4944  {{"CREATE TABLE"s, DatabaseDBObjectType},
4946  {{"CREATE"s, DatabaseDBObjectType},
4948  {{"SELECT"s, DatabaseDBObjectType},
4950  {{"INSERT"s, DatabaseDBObjectType},
4952  {{"TRUNCATE"s, DatabaseDBObjectType},
4954  {{"UPDATE"s, DatabaseDBObjectType},
4956  {{"DELETE"s, DatabaseDBObjectType},
4958  {{"DROP"s, DatabaseDBObjectType},
4960  {{"ALTER"s, DatabaseDBObjectType},
4962 
4963  {{"SELECT"s, TableDBObjectType},
4965  {{"INSERT"s, TableDBObjectType},
4967  {{"TRUNCATE"s, TableDBObjectType},
4969  {{"UPDATE"s, TableDBObjectType},
4971  {{"DELETE"s, TableDBObjectType},
4973  {{"DROP"s, TableDBObjectType},
4975  {{"ALTER"s, TableDBObjectType},
4977 
4978  {{"CREATE VIEW"s, DatabaseDBObjectType},
4980  {{"SELECT VIEW"s, DatabaseDBObjectType},
4982  {{"DROP VIEW"s, DatabaseDBObjectType},
4984  {{"SELECT"s, ViewDBObjectType},
4987 
4988  {{"CREATE DASHBOARD"s, DatabaseDBObjectType},
4990  {{"EDIT DASHBOARD"s, DatabaseDBObjectType},
4992  {{"VIEW DASHBOARD"s, DatabaseDBObjectType},
4994  {{"DELETE DASHBOARD"s, DatabaseDBObjectType},
4996  {{"VIEW"s, DashboardDBObjectType},
4998  {{"EDIT"s, DashboardDBObjectType},
5000  {{"DELETE"s, DashboardDBObjectType},
5002 
5003  {{"CREATE SERVER"s, DatabaseDBObjectType},
5005  {{"DROP SERVER"s, DatabaseDBObjectType},
5007  {{"DROP"s, ServerDBObjectType},
5009  {{"ALTER SERVER"s, DatabaseDBObjectType},
5011  {{"ALTER"s, ServerDBObjectType},
5013  {{"USAGE"s, ServerDBObjectType},
5015  {{"SERVER USAGE"s, DatabaseDBObjectType},
5017 
5018  {{"VIEW SQL EDITOR"s, DatabaseDBObjectType},
5020  {{"ACCESS"s, DatabaseDBObjectType},
5022 
5023  auto result = privileges_lookup.find(std::make_pair(privs, objectType));
5024  if (result == privileges_lookup.end()) {
5025  throw std::runtime_error("Privileges " + privs + " on DB object " + object_name +
5026  " are not correct.");
5027  }
5028  return result->second;
5029 }
5030 
5031 static DBObject createObject(const std::string& objectName, DBObjectType objectType) {
5032  if (objectType == DashboardDBObjectType) {
5033  int32_t dashboard_id = -1;
5034  if (!objectName.empty()) {
5035  try {
5036  dashboard_id = stoi(objectName);
5037  } catch (const std::exception&) {
5038  throw std::runtime_error(
5039  "Privileges on dashboards should be changed via integer dashboard ID");
5040  }
5041  }
5042  return DBObject(dashboard_id, objectType);
5043  } else {
5044  return DBObject(objectName, objectType);
5045  }
5046 }
5047 
5048 // GRANT SELECT/INSERT/CREATE ON TABLE payroll_table TO payroll_dept_role;
5050  auto& catalog = session.getCatalog();
5051  const auto& currentUser = session.get_currentUser();
5052  const auto parserObjectType = boost::to_upper_copy<std::string>(get_object_type());
5053  const auto objectName =
5054  extractObjectNameFromHierName(get_object(), parserObjectType, catalog);
5055  auto objectType = DBObjectTypeFromString(parserObjectType);
5056  if (objectType == ServerDBObjectType && !g_enable_fsi) {
5057  throw std::runtime_error("GRANT failed. SERVER object unrecognized.");
5058  }
5059  DBObject dbObject = createObject(objectName, objectType);
5060  /* verify object ownership if not suser */
5061  if (!currentUser.isSuper) {
5062  if (!SysCatalog::instance().verifyDBObjectOwnership(currentUser, dbObject, catalog)) {
5063  throw std::runtime_error(
5064  "GRANT failed. It can only be executed by super user or owner of the "
5065  "object.");
5066  }
5067  }
5068  /* set proper values of privileges & grant them to the object */
5069  std::vector<DBObject> objects(get_privs().size(), dbObject);
5070  for (size_t i = 0; i < get_privs().size(); ++i) {
5071  std::pair<AccessPrivileges, DBObjectType> priv = parseStringPrivs(
5072  boost::to_upper_copy<std::string>(get_privs()[i]), objectType, get_object());
5073  objects[i].setPrivileges(priv.first);
5074  objects[i].setPermissionType(priv.second);
5075  if (priv.second == ServerDBObjectType && !g_enable_fsi) {
5076  throw std::runtime_error("GRANT failed. SERVER object unrecognized.");
5077  }
5078  }
5079  SysCatalog::instance().grantDBObjectPrivilegesBatch(grantees_, objects, catalog);
5080 }
5081 
5082 // REVOKE SELECT/INSERT/CREATE ON TABLE payroll_table FROM payroll_dept_role;
5084  auto& catalog = session.getCatalog();
5085  const auto& currentUser = session.get_currentUser();
5086  const auto parserObjectType = boost::to_upper_copy<std::string>(get_object_type());
5087  const auto objectName =
5088  extractObjectNameFromHierName(get_object(), parserObjectType, catalog);
5089  auto objectType = DBObjectTypeFromString(parserObjectType);
5090  if (objectType == ServerDBObjectType && !g_enable_fsi) {
5091  throw std::runtime_error("REVOKE failed. SERVER object unrecognized.");
5092  }
5093  DBObject dbObject = createObject(objectName, objectType);
5094  /* verify object ownership if not suser */
5095  if (!currentUser.isSuper) {
5096  if (!SysCatalog::instance().verifyDBObjectOwnership(currentUser, dbObject, catalog)) {
5097  throw std::runtime_error(
5098  "REVOKE failed. It can only be executed by super user or owner of the "
5099  "object.");
5100  }
5101  }
5102  /* set proper values of privileges & grant them to the object */
5103  std::vector<DBObject> objects(get_privs().size(), dbObject);
5104  for (size_t i = 0; i < get_privs().size(); ++i) {
5105  std::pair<AccessPrivileges, DBObjectType> priv = parseStringPrivs(
5106  boost::to_upper_copy<std::string>(get_privs()[i]), objectType, get_object());
5107  objects[i].setPrivileges(priv.first);
5108  objects[i].setPermissionType(priv.second);
5109  if (priv.second == ServerDBObjectType && !g_enable_fsi) {
5110  throw std::runtime_error("REVOKE failed. SERVER object unrecognized.");
5111  }
5112  }
5113  SysCatalog::instance().revokeDBObjectPrivilegesBatch(grantees_, objects, catalog);
5114 }
5115 
5116 // NOTE: not used currently, will we ever use it?
5117 // SHOW ON TABLE payroll_table FOR payroll_dept_role;
5119  auto& catalog = session.getCatalog();
5120  const auto& currentUser = session.get_currentUser();
5121  const auto parserObjectType = boost::to_upper_copy<std::string>(get_object_type());
5122  const auto objectName =
5123  extractObjectNameFromHierName(get_object(), parserObjectType, catalog);
5124  auto objectType = DBObjectTypeFromString(parserObjectType);
5125  DBObject dbObject = createObject(objectName, objectType);
5126  /* verify object ownership if not suser */
5127  if (!currentUser.isSuper) {
5128  if (!SysCatalog::instance().verifyDBObjectOwnership(currentUser, dbObject, catalog)) {
5129  throw std::runtime_error(
5130  "SHOW ON " + get_object() + " FOR " + get_role() +
5131  " failed. It can only be executed by super user or owner of the object.");
5132  }
5133  }
5134  /* get values of privileges for the object and report them */
5135  SysCatalog::instance().getDBObjectPrivileges(get_role(), dbObject, catalog);
5136  AccessPrivileges privs = dbObject.getPrivileges();
5137  printf("\nPRIVILEGES ON %s FOR %s ARE SET AS FOLLOWING: ",
5138  get_object().c_str(),
5139  get_role().c_str());
5140 
5141  if (objectType == DBObjectType::DatabaseDBObjectType) {
5143  printf(" CREATE");
5144  }
5146  printf(" DROP");
5147  }
5148  } else if (objectType == DBObjectType::TableDBObjectType) {
5150  printf(" CREATE");
5151  }
5153  printf(" DROP");
5154  }
5156  printf(" SELECT");
5157  }
5159  printf(" INSERT");
5160  }
5162  printf(" UPDATE");
5163  }
5165  printf(" DELETE");
5166  }
5168  printf(" TRUNCATE");
5169  }
5171  printf(" ALTER");
5172  }
5173  } else if (objectType == DBObjectType::DashboardDBObjectType) {
5175  printf(" CREATE");
5176  }
5178  printf(" DELETE");
5179  }
5181  printf(" VIEW");
5182  }
5184  printf(" EDIT");
5185  }
5186  } else if (objectType == DBObjectType::ViewDBObjectType) {
5188  printf(" CREATE");
5189  }
5191  printf(" DROP");
5192  }
5194  printf(" SELECT");
5195  }
5197  printf(" INSERT");
5198  }
5200  printf(" UPDATE");
5201  }
5203  printf(" DELETE");
5204  }
5205  }
5206  printf(".\n");
5207 }
5208 
5209 // GRANT payroll_dept_role TO joe;
5211  const auto& currentUser = session.get_currentUser();
5212  if (!currentUser.isSuper) {
5213  throw std::runtime_error(
5214  "GRANT failed, because it can only be executed by super user.");
5215  }
5216  if (std::find(get_grantees().begin(), get_grantees().end(), OMNISCI_ROOT_USER) !=
5217  get_grantees().end()) {
5218  throw std::runtime_error(
5219  "Request to grant role failed because mapd root user has all privileges by "
5220  "default.");
5221  }
5222  SysCatalog::instance().grantRoleBatch(get_roles(), get_grantees());
5223 }
5224 
5225 // REVOKE payroll_dept_role FROM joe;get_users
5227  const auto& currentUser = session.get_currentUser();
5228  if (!currentUser.isSuper) {
5229  throw std::runtime_error(
5230  "REVOKE failed, because it can only be executed by super user.");
5231  }
5232  if (std::find(get_grantees().begin(), get_grantees().end(), OMNISCI_ROOT_USER) !=
5233  get_grantees().end()) {
5234  throw std::runtime_error(
5235  "Request to revoke role failed because privileges can not be revoked from "
5236  "mapd root user.");
5237  }
5238  SysCatalog::instance().revokeRoleBatch(get_roles(), get_grantees());
5239 }
5240 
5242  using namespace Catalog_Namespace;
5243 
5244  const auto execute_read_lock = mapd_shared_lock<mapd_shared_mutex>(
5247 
5248  auto& catalog = session.getCatalog();
5249  const TableDescriptor* td = catalog.getMetadataForTable(*table_, false);
5250  if (!td) {
5251  throw std::runtime_error("Table/View " + *table_ + " does not exist.");
5252  }
5253 
5255  dbObject.loadKey(catalog);
5256  std::vector<DBObject> privObjects = {dbObject};
5257 
5258  if (!SysCatalog::instance().hasAnyPrivileges(session.get_currentUser(), privObjects)) {
5259  throw std::runtime_error("Table/View " + *table_ + " does not exist.");
5260  }
5261  if (td->isView && !session.get_currentUser().isSuper) {
5262  // TODO: we need to run a validate query to ensure the user has access to the
5263  // underlying table, but we do not have any of the machinery in here. Disable
5264  // for now, unless the current user is a super user.
5265  throw std::runtime_error("SHOW CREATE TABLE not yet supported for views");
5266  }
5267 
5268  create_stmt_ = catalog.dumpCreateTable(td);
5269 }
5270 
5272  auto session_copy = session;
5273  auto session_ptr = std::shared_ptr<Catalog_Namespace::SessionInfo>(
5274  &session_copy, boost::null_deleter());
5275  auto query_state = query_state::QueryState::create(session_ptr, *select_stmt_);
5276  auto stdlog = STDLOG(query_state);
5277  auto query_state_proxy = query_state->createQueryStateProxy();
5278 
5279  LocalConnector local_connector;
5280 
5281  if (!leafs_connector_) {
5282  leafs_connector_ = &local_connector;
5283  }
5284 
5285  import_export::CopyParams copy_params;
5286  // @TODO(se) move rest to CopyParams when we have a Thrift endpoint
5289  std::string layer_name;
5294 
5295  parseOptions(copy_params, file_type, layer_name, file_compression, array_null_handling);
5296 
5297  if (file_path_->empty()) {
5298  throw std::runtime_error("Invalid file path for COPY TO");
5299  } else if (!boost::filesystem::path(*file_path_).is_absolute()) {
5300  std::string file_name = boost::filesystem::path(*file_path_).filename().string();
5301  std::string file_dir = g_base_path + "/mapd_export/" + session.get_session_id() + "/";
5302  if (!boost::filesystem::exists(file_dir)) {
5303  if (!boost::filesystem::create_directories(file_dir)) {
5304  throw std::runtime_error("Directory " + file_dir + " cannot be created.");
5305  }
5306  }
5307  *file_path_ = file_dir + file_name;
5308  } else {
5309  // Above branch will create a new file in the mapd_export directory. If that
5310  // path is not exercised, go through applicable file path validations.
5313  }
5314 
5315  // get column info
5316  auto column_info_result =
5317  local_connector.query(query_state_proxy, *select_stmt_, {}, true, false);
5318 
5319  // create exporter for requested file type
5320  auto query_exporter = import_export::QueryExporter::create(file_type);
5321 
5322  // default layer name to file path stem if it wasn't specified
5323  if (layer_name.size() == 0) {
5324  layer_name = boost::filesystem::path(*file_path_).stem().string();
5325  }
5326 
5327  // begin export
5328  query_exporter->beginExport(*file_path_,
5329  layer_name,
5330  copy_params,
5331  column_info_result.targets_meta,
5332  file_compression,
5333  array_null_handling);
5334 
5335  // how many fragments?
5336  size_t outer_frag_count =
5337  leafs_connector_->getOuterFragmentCount(query_state_proxy, *select_stmt_);
5338  size_t outer_frag_end = outer_frag_count == 0 ? 1 : outer_frag_count;
5339 
5340  // loop fragments
5341  for (size_t outer_frag_idx = 0; outer_frag_idx < outer_frag_end; outer_frag_idx++) {
5342  // limit the query to just this fragment
5343  std::vector<size_t> allowed_outer_fragment_indices;
5344  if (outer_frag_count) {
5345  allowed_outer_fragment_indices.push_back(outer_frag_idx);
5346  }
5347 
5348  // run the query
5349  std::vector<AggregatedResult> query_results = leafs_connector_->query(
5350  query_state_proxy, *select_stmt_, allowed_outer_fragment_indices, false);
5351 
5352  // export the results
5353  query_exporter->exportResults(query_results);
5354  }
5355 
5356  // end export
5357  query_exporter->endExport();
5358 }
5359 
5361  import_export::CopyParams& copy_params,
5363  std::string& layer_name,
5365  import_export::QueryExporter::ArrayNullHandling& array_null_handling) {