OmniSciDB  6686921089
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ParserNode.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
25 #include "ParserNode.h"
26 
27 #include <boost/algorithm/string.hpp>
28 #include <boost/core/null_deleter.hpp>
29 #include <boost/filesystem.hpp>
30 #include <boost/function.hpp>
31 
32 #include <rapidjson/document.h>
33 #include <rapidjson/stringbuffer.h>
34 #include <rapidjson/writer.h>
35 
36 #include <cassert>
37 #include <cmath>
38 #include <limits>
39 #include <random>
40 #include <regex>
41 #include <stdexcept>
42 #include <string>
43 #include <type_traits>
44 #include <typeinfo>
45 
47 #include "Catalog/Catalog.h"
53 #include "Geospatial/Compression.h"
54 #include "Geospatial/Types.h"
56 #include "ImportExport/Importer.h"
57 #include "LockMgr/LockMgr.h"
61 #include "QueryEngine/Execute.h"
66 #include "ReservedKeywords.h"
67 #include "Shared/StringTransform.h"
68 #include "Shared/measure.h"
69 #include "Shared/shard_key.h"
71 #include "Utils/FsiUtils.h"
72 
73 #include "gen-cpp/CalciteServer.h"
74 #include "parser.h"
75 
77 
78 size_t g_leaf_count{0};
81 extern bool g_enable_fsi;
82 
83 #ifdef ENABLE_IMPORT_PARQUET
84 bool g_enable_parquet_import_fsi{false};
85 #endif
86 
88 using namespace std::string_literals;
89 
90 using TableDefFuncPtr = boost::function<void(TableDescriptor&,
92  const std::list<ColumnDescriptor>& columns)>;
93 
94 using DataframeDefFuncPtr =
95  boost::function<void(DataframeTableDescriptor&,
97  const std::list<ColumnDescriptor>& columns)>;
98 
99 namespace Parser {
100 bool check_session_interrupted(const QuerySessionId& query_session, Executor* executor) {
101  // we call this function with unitary executor but is okay since
102  // we know the exact session info from a global session map object
103  // in the executor
105  mapd_shared_lock<mapd_shared_mutex> session_read_lock(executor->getSessionLock());
106  return executor->checkIsQuerySessionInterrupted(query_session, session_read_lock);
107  }
108  return false;
109 }
110 
111 std::shared_ptr<Analyzer::Expr> NullLiteral::analyze(
112  const Catalog_Namespace::Catalog& catalog,
113  Analyzer::Query& query,
114  TlistRefType allow_tlist_ref) const {
115  return makeExpr<Analyzer::Constant>(kNULLT, true);
116 }
117 
118 std::shared_ptr<Analyzer::Expr> StringLiteral::analyze(
119  const Catalog_Namespace::Catalog& catalog,
120  Analyzer::Query& query,
121  TlistRefType allow_tlist_ref) const {
122  return analyzeValue(*stringval_);
123 }
124 
125 std::shared_ptr<Analyzer::Expr> StringLiteral::analyzeValue(
126  const std::string& stringval) {
127  SQLTypeInfo ti(kVARCHAR, stringval.length(), 0, true);
128  Datum d;
129  d.stringval = new std::string(stringval);
130  return makeExpr<Analyzer::Constant>(ti, false, d);
131 }
132 
133 std::shared_ptr<Analyzer::Expr> IntLiteral::analyze(
134  const Catalog_Namespace::Catalog& catalog,
135  Analyzer::Query& query,
136  TlistRefType allow_tlist_ref) const {
137  return analyzeValue(intval_);
138 }
139 
140 std::shared_ptr<Analyzer::Expr> IntLiteral::analyzeValue(const int64_t intval) {
141  SQLTypes t;
142  Datum d;
143  if (intval >= INT16_MIN && intval <= INT16_MAX) {
144  t = kSMALLINT;
145  d.smallintval = (int16_t)intval;
146  } else if (intval >= INT32_MIN && intval <= INT32_MAX) {
147  t = kINT;
148  d.intval = (int32_t)intval;
149  } else {
150  t = kBIGINT;
151  d.bigintval = intval;
152  }
153  return makeExpr<Analyzer::Constant>(t, false, d);
154 }
155 
156 std::shared_ptr<Analyzer::Expr> FixedPtLiteral::analyze(
157  const Catalog_Namespace::Catalog& catalog,
158  Analyzer::Query& query,
159  TlistRefType allow_tlist_ref) const {
160  SQLTypeInfo ti(kNUMERIC, 0, 0, false);
161  Datum d = StringToDatum(*fixedptval_, ti);
162  return makeExpr<Analyzer::Constant>(ti, false, d);
163 }
164 
165 std::shared_ptr<Analyzer::Expr> FixedPtLiteral::analyzeValue(const int64_t numericval,
166  const int scale,
167  const int precision) {
168  SQLTypeInfo ti(kNUMERIC, 0, 0, false);
169  ti.set_scale(scale);
170  ti.set_precision(precision);
171  Datum d;
172  d.bigintval = numericval;
173  return makeExpr<Analyzer::Constant>(ti, false, d);
174 }
175 
176 std::shared_ptr<Analyzer::Expr> FloatLiteral::analyze(
177  const Catalog_Namespace::Catalog& catalog,
178  Analyzer::Query& query,
179  TlistRefType allow_tlist_ref) const {
180  Datum d;
181  d.floatval = floatval_;
182  return makeExpr<Analyzer::Constant>(kFLOAT, false, d);
183 }
184 
185 std::shared_ptr<Analyzer::Expr> DoubleLiteral::analyze(
186  const Catalog_Namespace::Catalog& catalog,
187  Analyzer::Query& query,
188  TlistRefType allow_tlist_ref) const {
189  Datum d;
190  d.doubleval = doubleval_;
191  return makeExpr<Analyzer::Constant>(kDOUBLE, false, d);
192 }
193 
194 std::shared_ptr<Analyzer::Expr> TimestampLiteral::analyze(
195  const Catalog_Namespace::Catalog& catalog,
196  Analyzer::Query& query,
197  TlistRefType allow_tlist_ref) const {
198  return get(timestampval_);
199 }
200 
201 std::shared_ptr<Analyzer::Expr> TimestampLiteral::get(const int64_t timestampval) {
202  Datum d;
203  d.bigintval = timestampval;
204  return makeExpr<Analyzer::Constant>(kTIMESTAMP, false, d);
205 }
206 
207 std::shared_ptr<Analyzer::Expr> UserLiteral::analyze(
208  const Catalog_Namespace::Catalog& catalog,
209  Analyzer::Query& query,
210  TlistRefType allow_tlist_ref) const {
211  Datum d;
212  return makeExpr<Analyzer::Constant>(kTEXT, false, d);
213 }
214 
215 std::shared_ptr<Analyzer::Expr> UserLiteral::get(const std::string& user) {
216  Datum d;
217  d.stringval = new std::string(user);
218  return makeExpr<Analyzer::Constant>(kTEXT, false, d);
219 }
220 
221 std::shared_ptr<Analyzer::Expr> ArrayLiteral::analyze(
222  const Catalog_Namespace::Catalog& catalog,
223  Analyzer::Query& query,
224  TlistRefType allow_tlist_ref) const {
225  SQLTypeInfo ti = SQLTypeInfo(kARRAY, true);
226  bool set_subtype = true;
227  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
228  for (auto& p : value_list_) {
229  auto e = p->analyze(catalog, query, allow_tlist_ref);
230  CHECK(e);
231  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(e);
232  if (c != nullptr && c->get_is_null()) {
233  value_exprs.push_back(c);
234  continue;
235  }
236  auto subtype = e->get_type_info().get_type();
237  if (subtype == kNULLT) {
238  // NULL element
239  } else if (set_subtype) {
240  ti.set_subtype(subtype);
241  set_subtype = false;
242  }
243  value_exprs.push_back(e);
244  }
245  std::shared_ptr<Analyzer::Expr> result =
246  makeExpr<Analyzer::Constant>(ti, false, value_exprs);
247  return result;
248 }
249 
250 std::string ArrayLiteral::to_string() const {
251  std::string str = "{";
252  bool notfirst = false;
253  for (auto& p : value_list_) {
254  if (notfirst) {
255  str += ", ";
256  } else {
257  notfirst = true;
258  }
259  str += p->to_string();
260  }
261  str += "}";
262  return str;
263 }
264 
265 std::shared_ptr<Analyzer::Expr> OperExpr::analyze(
266  const Catalog_Namespace::Catalog& catalog,
267  Analyzer::Query& query,
268  TlistRefType allow_tlist_ref) const {
269  auto left_expr = left_->analyze(catalog, query, allow_tlist_ref);
270  const auto& left_type = left_expr->get_type_info();
271  if (right_ == nullptr) {
272  return makeExpr<Analyzer::UOper>(
273  left_type, left_expr->get_contains_agg(), optype_, left_expr->decompress());
274  }
275  if (optype_ == kARRAY_AT) {
276  if (left_type.get_type() != kARRAY) {
277  throw std::runtime_error(left_->to_string() + " is not of array type.");
278  }
279  auto right_expr = right_->analyze(catalog, query, allow_tlist_ref);
280  const auto& right_type = right_expr->get_type_info();
281  if (!right_type.is_integer()) {
282  throw std::runtime_error(right_->to_string() + " is not of integer type.");
283  }
284  return makeExpr<Analyzer::BinOper>(
285  left_type.get_elem_type(), false, kARRAY_AT, kONE, left_expr, right_expr);
286  }
287  auto right_expr = right_->analyze(catalog, query, allow_tlist_ref);
288  return normalize(optype_, opqualifier_, left_expr, right_expr);
289 }
290 
291 std::shared_ptr<Analyzer::Expr> OperExpr::normalize(
292  const SQLOps optype,
293  const SQLQualifier qual,
294  std::shared_ptr<Analyzer::Expr> left_expr,
295  std::shared_ptr<Analyzer::Expr> right_expr) {
296  if (left_expr->get_type_info().is_date_in_days() ||
297  right_expr->get_type_info().is_date_in_days()) {
298  // Do not propogate encoding
299  left_expr = left_expr->decompress();
300  right_expr = right_expr->decompress();
301  }
302  const auto& left_type = left_expr->get_type_info();
303  auto right_type = right_expr->get_type_info();
304  if (qual != kONE) {
305  // subquery not supported yet.
306  CHECK(!std::dynamic_pointer_cast<Analyzer::Subquery>(right_expr));
307  if (right_type.get_type() != kARRAY) {
308  throw std::runtime_error(
309  "Existential or universal qualifiers can only be used in front of a subquery "
310  "or an "
311  "expression of array type.");
312  }
313  right_type = right_type.get_elem_type();
314  }
315  SQLTypeInfo new_left_type;
316  SQLTypeInfo new_right_type;
317  auto result_type = Analyzer::BinOper::analyze_type_info(
318  optype, left_type, right_type, &new_left_type, &new_right_type);
319  if (result_type.is_timeinterval()) {
320  return makeExpr<Analyzer::BinOper>(
321  result_type, false, optype, qual, left_expr, right_expr);
322  }
323  if (left_type != new_left_type) {
324  left_expr = left_expr->add_cast(new_left_type);
325  }
326  if (right_type != new_right_type) {
327  if (qual == kONE) {
328  right_expr = right_expr->add_cast(new_right_type);
329  } else {
330  right_expr = right_expr->add_cast(new_right_type.get_array_type());
331  }
332  }
333 
334  if (IS_COMPARISON(optype)) {
335  if (optype != kOVERLAPS && new_left_type.is_geometry() &&
336  new_right_type.is_geometry()) {
337  throw std::runtime_error(
338  "Comparison operators are not yet supported for geospatial types.");
339  }
340 
341  if (new_left_type.get_compression() == kENCODING_DICT &&
342  new_right_type.get_compression() == kENCODING_DICT &&
343  new_left_type.get_comp_param() == new_right_type.get_comp_param()) {
344  // do nothing
345  } else if (new_left_type.get_compression() == kENCODING_DICT &&
346  new_right_type.get_compression() == kENCODING_NONE) {
347  SQLTypeInfo ti(new_right_type);
348  ti.set_compression(new_left_type.get_compression());
349  ti.set_comp_param(new_left_type.get_comp_param());
350  ti.set_fixed_size();
351  right_expr = right_expr->add_cast(ti);
352  } else if (new_right_type.get_compression() == kENCODING_DICT &&
353  new_left_type.get_compression() == kENCODING_NONE) {
354  SQLTypeInfo ti(new_left_type);
355  ti.set_compression(new_right_type.get_compression());
356  ti.set_comp_param(new_right_type.get_comp_param());
357  ti.set_fixed_size();
358  left_expr = left_expr->add_cast(ti);
359  } else {
360  left_expr = left_expr->decompress();
361  right_expr = right_expr->decompress();
362  }
363  } else {
364  left_expr = left_expr->decompress();
365  right_expr = right_expr->decompress();
366  }
367  bool has_agg = (left_expr->get_contains_agg() || right_expr->get_contains_agg());
368  return makeExpr<Analyzer::BinOper>(
369  result_type, has_agg, optype, qual, left_expr, right_expr);
370 }
371 
372 std::shared_ptr<Analyzer::Expr> SubqueryExpr::analyze(
373  const Catalog_Namespace::Catalog& catalog,
374  Analyzer::Query& query,
375  TlistRefType allow_tlist_ref) const {
376  throw std::runtime_error("Subqueries are not supported yet.");
377  return nullptr;
378 }
379 
380 std::shared_ptr<Analyzer::Expr> IsNullExpr::analyze(
381  const Catalog_Namespace::Catalog& catalog,
382  Analyzer::Query& query,
383  TlistRefType allow_tlist_ref) const {
384  auto arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
385  auto result = makeExpr<Analyzer::UOper>(kBOOLEAN, kISNULL, arg_expr);
386  if (is_not_) {
387  result = makeExpr<Analyzer::UOper>(kBOOLEAN, kNOT, result);
388  }
389  return result;
390 }
391 
392 std::shared_ptr<Analyzer::Expr> InSubquery::analyze(
393  const Catalog_Namespace::Catalog& catalog,
394  Analyzer::Query& query,
395  TlistRefType allow_tlist_ref) const {
396  throw std::runtime_error("Subqueries are not supported yet.");
397  return nullptr;
398 }
399 
400 std::shared_ptr<Analyzer::Expr> InValues::analyze(
401  const Catalog_Namespace::Catalog& catalog,
402  Analyzer::Query& query,
403  TlistRefType allow_tlist_ref) const {
404  auto arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
405  SQLTypeInfo ti = arg_expr->get_type_info();
406  bool dict_comp = ti.get_compression() == kENCODING_DICT;
407  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
408  for (auto& p : value_list_) {
409  auto e = p->analyze(catalog, query, allow_tlist_ref);
410  if (ti != e->get_type_info()) {
411  if (ti.is_string() && e->get_type_info().is_string()) {
412  ti = Analyzer::BinOper::common_string_type(ti, e->get_type_info());
413  } else if (ti.is_number() && e->get_type_info().is_number()) {
414  ti = Analyzer::BinOper::common_numeric_type(ti, e->get_type_info());
415  } else {
416  throw std::runtime_error("IN expressions must contain compatible types.");
417  }
418  }
419  if (dict_comp) {
420  value_exprs.push_back(e->add_cast(arg_expr->get_type_info()));
421  } else {
422  value_exprs.push_back(e);
423  }
424  }
425  if (!dict_comp) {
426  arg_expr = arg_expr->decompress();
427  arg_expr = arg_expr->add_cast(ti);
428  std::list<std::shared_ptr<Analyzer::Expr>> cast_vals;
429  for (auto p : value_exprs) {
430  cast_vals.push_back(p->add_cast(ti));
431  }
432  value_exprs.swap(cast_vals);
433  }
434  std::shared_ptr<Analyzer::Expr> result =
435  makeExpr<Analyzer::InValues>(arg_expr, value_exprs);
436  if (is_not_) {
437  result = makeExpr<Analyzer::UOper>(kBOOLEAN, kNOT, result);
438  }
439  return result;
440 }
441 
442 std::shared_ptr<Analyzer::Expr> BetweenExpr::analyze(
443  const Catalog_Namespace::Catalog& catalog,
444  Analyzer::Query& query,
445  TlistRefType allow_tlist_ref) const {
446  auto arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
447  auto lower_expr = lower_->analyze(catalog, query, allow_tlist_ref);
448  auto upper_expr = upper_->analyze(catalog, query, allow_tlist_ref);
449  SQLTypeInfo new_left_type, new_right_type;
451  arg_expr->get_type_info(),
452  lower_expr->get_type_info(),
453  &new_left_type,
454  &new_right_type);
455  auto lower_pred =
456  makeExpr<Analyzer::BinOper>(kBOOLEAN,
457  kGE,
458  kONE,
459  arg_expr->add_cast(new_left_type)->decompress(),
460  lower_expr->add_cast(new_right_type)->decompress());
462  arg_expr->get_type_info(),
463  lower_expr->get_type_info(),
464  &new_left_type,
465  &new_right_type);
466  auto upper_pred = makeExpr<Analyzer::BinOper>(
467  kBOOLEAN,
468  kLE,
469  kONE,
470  arg_expr->deep_copy()->add_cast(new_left_type)->decompress(),
471  upper_expr->add_cast(new_right_type)->decompress());
472  std::shared_ptr<Analyzer::Expr> result =
473  makeExpr<Analyzer::BinOper>(kBOOLEAN, kAND, kONE, lower_pred, upper_pred);
474  if (is_not_) {
475  result = makeExpr<Analyzer::UOper>(kBOOLEAN, kNOT, result);
476  }
477  return result;
478 }
479 
480 std::shared_ptr<Analyzer::Expr> CharLengthExpr::analyze(
481  const Catalog_Namespace::Catalog& catalog,
482  Analyzer::Query& query,
483  TlistRefType allow_tlist_ref) const {
484  auto arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
485  if (!arg_expr->get_type_info().is_string()) {
486  throw std::runtime_error(
487  "expression in char_length clause must be of a string type.");
488  }
489  std::shared_ptr<Analyzer::Expr> result =
490  makeExpr<Analyzer::CharLengthExpr>(arg_expr->decompress(), calc_encoded_length_);
491  return result;
492 }
493 
494 std::shared_ptr<Analyzer::Expr> CardinalityExpr::analyze(
495  const Catalog_Namespace::Catalog& catalog,
496  Analyzer::Query& query,
497  TlistRefType allow_tlist_ref) const {
498  auto arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
499  if (!arg_expr->get_type_info().is_array()) {
500  throw std::runtime_error(
501  "expression in cardinality clause must be of an array type.");
502  }
503  std::shared_ptr<Analyzer::Expr> result =
504  makeExpr<Analyzer::CardinalityExpr>(arg_expr->decompress());
505  return result;
506 }
507 
508 void LikeExpr::check_like_expr(const std::string& like_str, char escape_char) {
509  if (like_str.back() == escape_char) {
510  throw std::runtime_error("LIKE pattern must not end with escape character.");
511  }
512 }
513 
514 bool LikeExpr::test_is_simple_expr(const std::string& like_str, char escape_char) {
515  // if not bounded by '%' then not a simple string
516  if (like_str.size() < 2 || like_str[0] != '%' || like_str[like_str.size() - 1] != '%') {
517  return false;
518  }
519  // if the last '%' is escaped then not a simple string
520  if (like_str[like_str.size() - 2] == escape_char &&
521  like_str[like_str.size() - 3] != escape_char) {
522  return false;
523  }
524  for (size_t i = 1; i < like_str.size() - 1; i++) {
525  if (like_str[i] == '%' || like_str[i] == '_' || like_str[i] == '[' ||
526  like_str[i] == ']') {
527  if (like_str[i - 1] != escape_char) {
528  return false;
529  }
530  }
531  }
532  return true;
533 }
534 
535 void LikeExpr::erase_cntl_chars(std::string& like_str, char escape_char) {
536  char prev_char = '\0';
537  // easier to create new string of allowable chars
538  // rather than erase chars from
539  // existing string
540  std::string new_str;
541  for (char& cur_char : like_str) {
542  if (cur_char == '%' || cur_char == escape_char) {
543  if (prev_char != escape_char) {
544  prev_char = cur_char;
545  continue;
546  }
547  }
548  new_str.push_back(cur_char);
549  prev_char = cur_char;
550  }
551  like_str = new_str;
552 }
553 
554 std::shared_ptr<Analyzer::Expr> LikeExpr::analyze(
555  const Catalog_Namespace::Catalog& catalog,
556  Analyzer::Query& query,
557  TlistRefType allow_tlist_ref) const {
558  auto arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
559  auto like_expr = like_string_->analyze(catalog, query, allow_tlist_ref);
560  auto escape_expr = escape_string_ == nullptr
561  ? nullptr
562  : escape_string_->analyze(catalog, query, allow_tlist_ref);
563  return LikeExpr::get(arg_expr, like_expr, escape_expr, is_ilike_, is_not_);
564 }
565 
566 std::shared_ptr<Analyzer::Expr> LikeExpr::get(std::shared_ptr<Analyzer::Expr> arg_expr,
567  std::shared_ptr<Analyzer::Expr> like_expr,
568  std::shared_ptr<Analyzer::Expr> escape_expr,
569  const bool is_ilike,
570  const bool is_not) {
571  if (!arg_expr->get_type_info().is_string()) {
572  throw std::runtime_error("expression before LIKE must be of a string type.");
573  }
574  if (!like_expr->get_type_info().is_string()) {
575  throw std::runtime_error("expression after LIKE must be of a string type.");
576  }
577  char escape_char = '\\';
578  if (escape_expr != nullptr) {
579  if (!escape_expr->get_type_info().is_string()) {
580  throw std::runtime_error("expression after ESCAPE must be of a string type.");
581  }
582  if (!escape_expr->get_type_info().is_string()) {
583  throw std::runtime_error("expression after ESCAPE must be of a string type.");
584  }
585  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(escape_expr);
586  if (c != nullptr && c->get_constval().stringval->length() > 1) {
587  throw std::runtime_error("String after ESCAPE must have a single character.");
588  }
589  escape_char = (*c->get_constval().stringval)[0];
590  }
591  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(like_expr);
592  bool is_simple = false;
593  if (c != nullptr) {
594  std::string& pattern = *c->get_constval().stringval;
595  if (is_ilike) {
596  std::transform(pattern.begin(), pattern.end(), pattern.begin(), ::tolower);
597  }
598  check_like_expr(pattern, escape_char);
599  is_simple = test_is_simple_expr(pattern, escape_char);
600  if (is_simple) {
601  erase_cntl_chars(pattern, escape_char);
602  }
603  }
604  std::shared_ptr<Analyzer::Expr> result = makeExpr<Analyzer::LikeExpr>(
605  arg_expr->decompress(), like_expr, escape_expr, is_ilike, is_simple);
606  if (is_not) {
607  result = makeExpr<Analyzer::UOper>(kBOOLEAN, kNOT, result);
608  }
609  return result;
610 }
611 
612 void RegexpExpr::check_pattern_expr(const std::string& pattern_str, char escape_char) {
613  if (pattern_str.back() == escape_char) {
614  throw std::runtime_error("REGEXP pattern must not end with escape character.");
615  }
616 }
617 
618 bool RegexpExpr::translate_to_like_pattern(std::string& pattern_str, char escape_char) {
619  char prev_char = '\0';
620  char prev_prev_char = '\0';
621  std::string like_str;
622  for (char& cur_char : pattern_str) {
623  if (prev_char == escape_char || isalnum(cur_char) || cur_char == ' ' ||
624  cur_char == '.') {
625  like_str.push_back((cur_char == '.') ? '_' : cur_char);
626  prev_prev_char = prev_char;
627  prev_char = cur_char;
628  continue;
629  }
630  if (prev_char == '.' && prev_prev_char != escape_char) {
631  if (cur_char == '*' || cur_char == '+') {
632  if (cur_char == '*') {
633  like_str.pop_back();
634  }
635  // .* --> %
636  // .+ --> _%
637  like_str.push_back('%');
638  prev_prev_char = prev_char;
639  prev_char = cur_char;
640  continue;
641  }
642  }
643  return false;
644  }
645  pattern_str = like_str;
646  return true;
647 }
648 
649 std::shared_ptr<Analyzer::Expr> RegexpExpr::analyze(
650  const Catalog_Namespace::Catalog& catalog,
651  Analyzer::Query& query,
652  TlistRefType allow_tlist_ref) const {
653  auto arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
654  auto pattern_expr = pattern_string_->analyze(catalog, query, allow_tlist_ref);
655  auto escape_expr = escape_string_ == nullptr
656  ? nullptr
657  : escape_string_->analyze(catalog, query, allow_tlist_ref);
658  return RegexpExpr::get(arg_expr, pattern_expr, escape_expr, is_not_);
659 }
660 
661 std::shared_ptr<Analyzer::Expr> RegexpExpr::get(
662  std::shared_ptr<Analyzer::Expr> arg_expr,
663  std::shared_ptr<Analyzer::Expr> pattern_expr,
664  std::shared_ptr<Analyzer::Expr> escape_expr,
665  const bool is_not) {
666  if (!arg_expr->get_type_info().is_string()) {
667  throw std::runtime_error("expression before REGEXP must be of a string type.");
668  }
669  if (!pattern_expr->get_type_info().is_string()) {
670  throw std::runtime_error("expression after REGEXP must be of a string type.");
671  }
672  char escape_char = '\\';
673  if (escape_expr != nullptr) {
674  if (!escape_expr->get_type_info().is_string()) {
675  throw std::runtime_error("expression after ESCAPE must be of a string type.");
676  }
677  if (!escape_expr->get_type_info().is_string()) {
678  throw std::runtime_error("expression after ESCAPE must be of a string type.");
679  }
680  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(escape_expr);
681  if (c != nullptr && c->get_constval().stringval->length() > 1) {
682  throw std::runtime_error("String after ESCAPE must have a single character.");
683  }
684  escape_char = (*c->get_constval().stringval)[0];
685  if (escape_char != '\\') {
686  throw std::runtime_error("Only supporting '\\' escape character.");
687  }
688  }
689  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(pattern_expr);
690  if (c != nullptr) {
691  std::string& pattern = *c->get_constval().stringval;
692  if (translate_to_like_pattern(pattern, escape_char)) {
693  return LikeExpr::get(arg_expr, pattern_expr, escape_expr, false, is_not);
694  }
695  }
696  std::shared_ptr<Analyzer::Expr> result =
697  makeExpr<Analyzer::RegexpExpr>(arg_expr->decompress(), pattern_expr, escape_expr);
698  if (is_not) {
699  result = makeExpr<Analyzer::UOper>(kBOOLEAN, kNOT, result);
700  }
701  return result;
702 }
703 
704 std::shared_ptr<Analyzer::Expr> LikelihoodExpr::analyze(
705  const Catalog_Namespace::Catalog& catalog,
706  Analyzer::Query& query,
707  TlistRefType allow_tlist_ref) const {
708  auto arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
709  return LikelihoodExpr::get(arg_expr, likelihood_, is_not_);
710 }
711 
712 std::shared_ptr<Analyzer::Expr> LikelihoodExpr::get(
713  std::shared_ptr<Analyzer::Expr> arg_expr,
714  float likelihood,
715  const bool is_not) {
716  if (!arg_expr->get_type_info().is_boolean()) {
717  throw std::runtime_error("likelihood expression expects boolean type.");
718  }
719  std::shared_ptr<Analyzer::Expr> result = makeExpr<Analyzer::LikelihoodExpr>(
720  arg_expr->decompress(), is_not ? 1 - likelihood : likelihood);
721  return result;
722 }
723 
724 std::shared_ptr<Analyzer::Expr> WidthBucketExpr::analyze(
725  const Catalog_Namespace::Catalog& catalog,
726  Analyzer::Query& query,
727  TlistRefType allow_tlist_ref) const {
728  auto target_value = target_value_->analyze(catalog, query, allow_tlist_ref);
729  auto lower_bound = lower_bound_->analyze(catalog, query, allow_tlist_ref);
730  auto upper_bound = upper_bound_->analyze(catalog, query, allow_tlist_ref);
731  auto partition_count = partition_count_->analyze(catalog, query, allow_tlist_ref);
732  return WidthBucketExpr::get(target_value, lower_bound, upper_bound, partition_count);
733 }
734 
735 std::shared_ptr<Analyzer::Expr> WidthBucketExpr::get(
736  std::shared_ptr<Analyzer::Expr> target_value,
737  std::shared_ptr<Analyzer::Expr> lower_bound,
738  std::shared_ptr<Analyzer::Expr> upper_bound,
739  std::shared_ptr<Analyzer::Expr> partition_count) {
740  std::shared_ptr<Analyzer::Expr> result = makeExpr<Analyzer::WidthBucketExpr>(
741  target_value, lower_bound, upper_bound, partition_count);
742  return result;
743 }
744 
745 std::shared_ptr<Analyzer::Expr> ExistsExpr::analyze(
746  const Catalog_Namespace::Catalog& catalog,
747  Analyzer::Query& query,
748  TlistRefType allow_tlist_ref) const {
749  throw std::runtime_error("Subqueries are not supported yet.");
750  return nullptr;
751 }
752 
753 std::shared_ptr<Analyzer::Expr> ColumnRef::analyze(
754  const Catalog_Namespace::Catalog& catalog,
755  Analyzer::Query& query,
756  TlistRefType allow_tlist_ref) const {
757  int table_id{0};
758  int rte_idx{0};
759  const ColumnDescriptor* cd{nullptr};
760  if (column_ == nullptr) {
761  throw std::runtime_error("invalid column name *.");
762  }
763  if (table_ != nullptr) {
764  rte_idx = query.get_rte_idx(*table_);
765  if (rte_idx < 0) {
766  throw std::runtime_error("range variable or table name " + *table_ +
767  " does not exist.");
768  }
769  Analyzer::RangeTableEntry* rte = query.get_rte(rte_idx);
770  cd = rte->get_column_desc(catalog, *column_);
771  if (cd == nullptr) {
772  throw std::runtime_error("Column name " + *column_ + " does not exist.");
773  }
774  table_id = rte->get_table_id();
775  } else {
776  bool found = false;
777  int i = 0;
778  for (auto rte : query.get_rangetable()) {
779  cd = rte->get_column_desc(catalog, *column_);
780  if (cd != nullptr && !found) {
781  found = true;
782  rte_idx = i;
783  table_id = rte->get_table_id();
784  } else if (cd != nullptr && found) {
785  throw std::runtime_error("Column name " + *column_ + " is ambiguous.");
786  }
787  i++;
788  }
789  if (cd == nullptr && allow_tlist_ref != TlistRefType::TLIST_NONE) {
790  // check if this is a reference to a targetlist entry
791  bool found = false;
792  int varno = -1;
793  int i = 1;
794  std::shared_ptr<Analyzer::TargetEntry> tle;
795  for (auto p : query.get_targetlist()) {
796  if (*column_ == p->get_resname() && !found) {
797  found = true;
798  varno = i;
799  tle = p;
800  } else if (*column_ == p->get_resname() && found) {
801  throw std::runtime_error("Output alias " + *column_ + " is ambiguous.");
802  }
803  i++;
804  }
805  if (found) {
806  if (dynamic_cast<Analyzer::Var*>(tle->get_expr())) {
807  Analyzer::Var* v = static_cast<Analyzer::Var*>(tle->get_expr());
809  return v->deep_copy();
810  }
811  }
812  if (allow_tlist_ref == TlistRefType::TLIST_COPY) {
813  return tle->get_expr()->deep_copy();
814  } else {
815  return makeExpr<Analyzer::Var>(
816  tle->get_expr()->get_type_info(), Analyzer::Var::kOUTPUT, varno);
817  }
818  }
819  }
820  if (cd == nullptr) {
821  throw std::runtime_error("Column name " + *column_ + " does not exist.");
822  }
823  }
824  return makeExpr<Analyzer::ColumnVar>(cd->columnType, table_id, cd->columnId, rte_idx);
825 }
826 
827 std::shared_ptr<Analyzer::Expr> FunctionRef::analyze(
828  const Catalog_Namespace::Catalog& catalog,
829  Analyzer::Query& query,
830  TlistRefType allow_tlist_ref) const {
831  SQLTypeInfo result_type;
832  SQLAgg agg_type;
833  std::shared_ptr<Analyzer::Expr> arg_expr;
834  bool is_distinct = false;
835  if (boost::iequals(*name_, "count")) {
836  result_type = SQLTypeInfo(kBIGINT, false);
837  agg_type = kCOUNT;
838  if (arg_) {
839  arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
840  const SQLTypeInfo& ti = arg_expr->get_type_info();
841  if (ti.is_string() && (ti.get_compression() != kENCODING_DICT || !distinct_)) {
842  throw std::runtime_error(
843  "Strings must be dictionary-encoded in COUNT(DISTINCT).");
844  }
845  if (ti.get_type() == kARRAY && !distinct_) {
846  throw std::runtime_error("Only COUNT(DISTINCT) is supported on arrays.");
847  }
848  }
849  is_distinct = distinct_;
850  } else {
851  if (!arg_) {
852  throw std::runtime_error("Cannot compute " + *name_ + " with argument '*'.");
853  }
854  if (boost::iequals(*name_, "min")) {
855  agg_type = kMIN;
856  arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
857  arg_expr = arg_expr->decompress();
858  result_type = arg_expr->get_type_info();
859  } else if (boost::iequals(*name_, "max")) {
860  agg_type = kMAX;
861  arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
862  arg_expr = arg_expr->decompress();
863  result_type = arg_expr->get_type_info();
864  } else if (boost::iequals(*name_, "avg")) {
865  agg_type = kAVG;
866  arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
867  if (!arg_expr->get_type_info().is_number()) {
868  throw std::runtime_error("Cannot compute AVG on non-number-type arguments.");
869  }
870  arg_expr = arg_expr->decompress();
871  result_type = SQLTypeInfo(kDOUBLE, false);
872  } else if (boost::iequals(*name_, "sum")) {
873  agg_type = kSUM;
874  arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
875  if (!arg_expr->get_type_info().is_number()) {
876  throw std::runtime_error("Cannot compute SUM on non-number-type arguments.");
877  }
878  arg_expr = arg_expr->decompress();
879  result_type = arg_expr->get_type_info().is_integer() ? SQLTypeInfo(kBIGINT, false)
880  : arg_expr->get_type_info();
881  } else if (boost::iequals(*name_, "unnest")) {
882  arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
883  const SQLTypeInfo& arg_ti = arg_expr->get_type_info();
884  if (arg_ti.get_type() != kARRAY) {
885  throw std::runtime_error(arg_->to_string() + " is not of array type.");
886  }
887  return makeExpr<Analyzer::UOper>(arg_ti.get_elem_type(), false, kUNNEST, arg_expr);
888  } else {
889  throw std::runtime_error("invalid function name: " + *name_);
890  }
891  if (arg_expr->get_type_info().is_string() ||
892  arg_expr->get_type_info().get_type() == kARRAY) {
893  throw std::runtime_error(
894  "Only COUNT(DISTINCT ) aggregate is supported on strings and arrays.");
895  }
896  }
897  int naggs = query.get_num_aggs();
898  query.set_num_aggs(naggs + 1);
899  return makeExpr<Analyzer::AggExpr>(
900  result_type, agg_type, arg_expr, is_distinct, nullptr);
901 }
902 
903 std::shared_ptr<Analyzer::Expr> CastExpr::analyze(
904  const Catalog_Namespace::Catalog& catalog,
905  Analyzer::Query& query,
906  TlistRefType allow_tlist_ref) const {
907  target_type_->check_type();
908  auto arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
909  SQLTypeInfo ti(target_type_->get_type(),
910  target_type_->get_param1(),
911  target_type_->get_param2(),
912  arg_expr->get_type_info().get_notnull());
913  if (arg_expr->get_type_info().get_type() != target_type_->get_type() &&
914  arg_expr->get_type_info().get_compression() != kENCODING_NONE) {
915  arg_expr->decompress();
916  }
917  return arg_expr->add_cast(ti);
918 }
919 
920 std::shared_ptr<Analyzer::Expr> CaseExpr::analyze(
921  const Catalog_Namespace::Catalog& catalog,
922  Analyzer::Query& query,
923  TlistRefType allow_tlist_ref) const {
924  SQLTypeInfo ti;
925  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
926  expr_pair_list;
927  for (auto& p : when_then_list_) {
928  auto e1 = p->get_expr1()->analyze(catalog, query, allow_tlist_ref);
929  if (e1->get_type_info().get_type() != kBOOLEAN) {
930  throw std::runtime_error("Only boolean expressions can be used after WHEN.");
931  }
932  auto e2 = p->get_expr2()->analyze(catalog, query, allow_tlist_ref);
933  expr_pair_list.emplace_back(e1, e2);
934  }
935  auto else_e =
936  else_expr_ ? else_expr_->analyze(catalog, query, allow_tlist_ref) : nullptr;
937  return normalize(expr_pair_list, else_e);
938 }
939 
940 namespace {
941 
942 bool expr_is_null(const Analyzer::Expr* expr) {
943  if (expr->get_type_info().get_type() == kNULLT) {
944  return true;
945  }
946  const auto const_expr = dynamic_cast<const Analyzer::Constant*>(expr);
947  return const_expr && const_expr->get_is_null();
948 }
949 
951  const std::string* s = str_literal->get_stringval();
952  if (*s == "t" || *s == "true" || *s == "T" || *s == "True") {
953  return true;
954  } else if (*s == "f" || *s == "false" || *s == "F" || *s == "False") {
955  return false;
956  } else {
957  throw std::runtime_error("Invalid string for boolean " + *s);
958  }
959 }
960 
961 } // namespace
962 
963 std::shared_ptr<Analyzer::Expr> CaseExpr::normalize(
964  const std::list<std::pair<std::shared_ptr<Analyzer::Expr>,
965  std::shared_ptr<Analyzer::Expr>>>& expr_pair_list,
966  const std::shared_ptr<Analyzer::Expr> else_e_in) {
967  SQLTypeInfo ti;
968  bool has_agg = false;
969  std::set<int> dictionary_ids;
970  bool has_none_encoded_str_projection = false;
971 
972  for (auto& p : expr_pair_list) {
973  auto e1 = p.first;
974  CHECK(e1->get_type_info().is_boolean());
975  auto e2 = p.second;
976  if (e2->get_type_info().is_string()) {
977  if (e2->get_type_info().is_dict_encoded_string()) {
978  dictionary_ids.insert(e2->get_type_info().get_comp_param());
979  // allow literals to potentially fall down the transient path
980  } else if (std::dynamic_pointer_cast<const Analyzer::ColumnVar>(e2)) {
981  has_none_encoded_str_projection = true;
982  }
983  }
984 
985  if (ti.get_type() == kNULLT) {
986  ti = e2->get_type_info();
987  } else if (e2->get_type_info().get_type() == kNULLT) {
988  ti.set_notnull(false);
989  e2->set_type_info(ti);
990  } else if (ti != e2->get_type_info()) {
991  if (ti.is_string() && e2->get_type_info().is_string()) {
992  ti = Analyzer::BinOper::common_string_type(ti, e2->get_type_info());
993  } else if (ti.is_number() && e2->get_type_info().is_number()) {
994  ti = Analyzer::BinOper::common_numeric_type(ti, e2->get_type_info());
995  } else if (ti.is_boolean() && e2->get_type_info().is_boolean()) {
996  ti = Analyzer::BinOper::common_numeric_type(ti, e2->get_type_info());
997  } else {
998  throw std::runtime_error(
999  "expressions in THEN clause must be of the same or compatible types.");
1000  }
1001  }
1002  if (e2->get_contains_agg()) {
1003  has_agg = true;
1004  }
1005  }
1006  auto else_e = else_e_in;
1007  if (else_e) {
1008  if (else_e->get_contains_agg()) {
1009  has_agg = true;
1010  }
1011  if (expr_is_null(else_e.get())) {
1012  ti.set_notnull(false);
1013  else_e->set_type_info(ti);
1014  } else if (ti != else_e->get_type_info()) {
1015  if (else_e->get_type_info().is_string()) {
1016  if (else_e->get_type_info().is_dict_encoded_string()) {
1017  dictionary_ids.insert(else_e->get_type_info().get_comp_param());
1018  // allow literals to potentially fall down the transient path
1019  } else if (std::dynamic_pointer_cast<const Analyzer::ColumnVar>(else_e)) {
1020  has_none_encoded_str_projection = true;
1021  }
1022  }
1023  ti.set_notnull(false);
1024  if (ti.is_string() && else_e->get_type_info().is_string()) {
1025  ti = Analyzer::BinOper::common_string_type(ti, else_e->get_type_info());
1026  } else if (ti.is_number() && else_e->get_type_info().is_number()) {
1027  ti = Analyzer::BinOper::common_numeric_type(ti, else_e->get_type_info());
1028  } else if (ti.is_boolean() && else_e->get_type_info().is_boolean()) {
1029  ti = Analyzer::BinOper::common_numeric_type(ti, else_e->get_type_info());
1030  } else if (get_logical_type_info(ti) !=
1031  get_logical_type_info(else_e->get_type_info())) {
1032  throw std::runtime_error(
1033  // types differing by encoding will be resolved at decode
1034 
1035  "expressions in ELSE clause must be of the same or compatible types as those "
1036  "in the THEN clauses.");
1037  }
1038  }
1039  }
1040  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
1041  cast_expr_pair_list;
1042  for (auto p : expr_pair_list) {
1043  ti.set_notnull(false);
1044  cast_expr_pair_list.emplace_back(p.first, p.second->add_cast(ti));
1045  }
1046  if (else_e != nullptr) {
1047  else_e = else_e->add_cast(ti);
1048  } else {
1049  Datum d;
1050  // always create an else expr so that executor doesn't need to worry about it
1051  ti.set_notnull(false);
1052  else_e = makeExpr<Analyzer::Constant>(ti, true, d);
1053  }
1054  if (ti.get_type() == kNULLT) {
1055  throw std::runtime_error(
1056  "Can't deduce the type for case expressions, all branches null");
1057  }
1058 
1059  auto case_expr = makeExpr<Analyzer::CaseExpr>(ti, has_agg, cast_expr_pair_list, else_e);
1060  if (ti.get_compression() != kENCODING_DICT && dictionary_ids.size() == 1 &&
1061  *(dictionary_ids.begin()) > 0 && !has_none_encoded_str_projection) {
1062  // the above logic makes two assumptions when strings are present. 1) that all types
1063  // in the case statement are either null or strings, and 2) that none-encoded strings
1064  // will always win out over dict encoding. If we only have one dictionary, and that
1065  // dictionary is not a transient dictionary, we can cast the entire case to be dict
1066  // encoded and use transient dictionaries for any literals
1067  ti.set_compression(kENCODING_DICT);
1068  ti.set_comp_param(*dictionary_ids.begin());
1069  case_expr->add_cast(ti);
1070  }
1071  return case_expr;
1072 }
1073 
1074 std::string CaseExpr::to_string() const {
1075  std::string str("CASE ");
1076  for (auto& p : when_then_list_) {
1077  str += "WHEN " + p->get_expr1()->to_string() + " THEN " +
1078  p->get_expr2()->to_string() + " ";
1079  }
1080  if (else_expr_ != nullptr) {
1081  str += "ELSE " + else_expr_->to_string();
1082  }
1083  str += " END";
1084  return str;
1085 }
1086 
1087 void UnionQuery::analyze(const Catalog_Namespace::Catalog& catalog,
1088  Analyzer::Query& query) const {
1089  left_->analyze(catalog, query);
1090  Analyzer::Query* right_query = new Analyzer::Query();
1091  right_->analyze(catalog, *right_query);
1092  query.set_next_query(right_query);
1093  query.set_is_unionall(is_unionall_);
1094 }
1095 
1096 void QuerySpec::analyze_having_clause(const Catalog_Namespace::Catalog& catalog,
1097  Analyzer::Query& query) const {
1098  std::shared_ptr<Analyzer::Expr> p;
1099  if (having_clause_ != nullptr) {
1100  p = having_clause_->analyze(catalog, query, Expr::TlistRefType::TLIST_COPY);
1101  if (p->get_type_info().get_type() != kBOOLEAN) {
1102  throw std::runtime_error("Only boolean expressions can be in HAVING clause.");
1103  }
1104  p->check_group_by(query.get_group_by());
1105  }
1106  query.set_having_predicate(p);
1107 }
1108 
1109 void QuerySpec::analyze_group_by(const Catalog_Namespace::Catalog& catalog,
1110  Analyzer::Query& query) const {
1111  std::list<std::shared_ptr<Analyzer::Expr>> groupby;
1112  if (!groupby_clause_.empty()) {
1113  int gexpr_no = 1;
1114  std::shared_ptr<Analyzer::Expr> gexpr;
1115  const std::vector<std::shared_ptr<Analyzer::TargetEntry>>& tlist =
1116  query.get_targetlist();
1117  for (auto& c : groupby_clause_) {
1118  // special-case ordinal numbers in GROUP BY
1119  if (dynamic_cast<Literal*>(c.get())) {
1120  IntLiteral* i = dynamic_cast<IntLiteral*>(c.get());
1121  if (!i) {
1122  throw std::runtime_error("Invalid literal in GROUP BY clause.");
1123  }
1124  int varno = (int)i->get_intval();
1125  if (varno <= 0 || varno > static_cast<int>(tlist.size())) {
1126  throw std::runtime_error("Invalid ordinal number in GROUP BY clause.");
1127  }
1128  if (tlist[varno - 1]->get_expr()->get_contains_agg()) {
1129  throw std::runtime_error(
1130  "Ordinal number in GROUP BY cannot reference an expression containing "
1131  "aggregate "
1132  "functions.");
1133  }
1134  gexpr = makeExpr<Analyzer::Var>(
1135  tlist[varno - 1]->get_expr()->get_type_info(), Analyzer::Var::kOUTPUT, varno);
1136  } else {
1137  gexpr = c->analyze(catalog, query, Expr::TlistRefType::TLIST_REF);
1138  }
1139  const SQLTypeInfo gti = gexpr->get_type_info();
1140  bool set_new_type = false;
1141  SQLTypeInfo ti(gti);
1142  if (gti.is_string() && gti.get_compression() == kENCODING_NONE) {
1143  set_new_type = true;
1146  ti.set_fixed_size();
1147  }
1148  std::shared_ptr<Analyzer::Var> v;
1149  if (std::dynamic_pointer_cast<Analyzer::Var>(gexpr)) {
1150  v = std::static_pointer_cast<Analyzer::Var>(gexpr);
1151  int n = v->get_varno();
1152  gexpr = tlist[n - 1]->get_own_expr();
1153  auto cv = std::dynamic_pointer_cast<Analyzer::ColumnVar>(gexpr);
1154  if (cv != nullptr) {
1155  // inherit all ColumnVar info for lineage.
1156  *std::static_pointer_cast<Analyzer::ColumnVar>(v) = *cv;
1157  }
1158  v->set_which_row(Analyzer::Var::kGROUPBY);
1159  v->set_varno(gexpr_no);
1160  tlist[n - 1]->set_expr(v);
1161  }
1162  if (set_new_type) {
1163  auto new_e = gexpr->add_cast(ti);
1164  groupby.push_back(new_e);
1165  if (v != nullptr) {
1166  v->set_type_info(new_e->get_type_info());
1167  }
1168  } else {
1169  groupby.push_back(gexpr);
1170  }
1171  gexpr_no++;
1172  }
1173  }
1174  if (query.get_num_aggs() > 0 || !groupby.empty()) {
1175  for (auto t : query.get_targetlist()) {
1176  auto e = t->get_expr();
1177  e->check_group_by(groupby);
1178  }
1179  }
1180  query.set_group_by(groupby);
1181 }
1182 
1183 void QuerySpec::analyze_where_clause(const Catalog_Namespace::Catalog& catalog,
1184  Analyzer::Query& query) const {
1185  if (where_clause_ == nullptr) {
1186  query.set_where_predicate(nullptr);
1187  return;
1188  }
1189  auto p = where_clause_->analyze(catalog, query, Expr::TlistRefType::TLIST_COPY);
1190  if (p->get_type_info().get_type() != kBOOLEAN) {
1191  throw std::runtime_error("Only boolean expressions can be in WHERE clause.");
1192  }
1193  query.set_where_predicate(p);
1194 }
1195 
1196 void QuerySpec::analyze_select_clause(const Catalog_Namespace::Catalog& catalog,
1197  Analyzer::Query& query) const {
1198  std::vector<std::shared_ptr<Analyzer::TargetEntry>>& tlist =
1199  query.get_targetlist_nonconst();
1200  if (select_clause_.empty()) {
1201  // this means SELECT *
1202  int rte_idx = 0;
1203  for (auto rte : query.get_rangetable()) {
1204  rte->expand_star_in_targetlist(catalog, tlist, rte_idx++);
1205  }
1206  } else {
1207  for (auto& p : select_clause_) {
1208  const Parser::Expr* select_expr = p->get_select_expr();
1209  // look for the case of range_var.*
1210  if (typeid(*select_expr) == typeid(ColumnRef) &&
1211  dynamic_cast<const ColumnRef*>(select_expr)->get_column() == nullptr) {
1212  const std::string* range_var_name =
1213  dynamic_cast<const ColumnRef*>(select_expr)->get_table();
1214  int rte_idx = query.get_rte_idx(*range_var_name);
1215  if (rte_idx < 0) {
1216  throw std::runtime_error("invalid range variable name: " + *range_var_name);
1217  }
1218  Analyzer::RangeTableEntry* rte = query.get_rte(rte_idx);
1219  rte->expand_star_in_targetlist(catalog, tlist, rte_idx);
1220  } else {
1221  auto e = select_expr->analyze(catalog, query);
1222  std::string resname;
1223 
1224  if (p->get_alias() != nullptr) {
1225  resname = *p->get_alias();
1226  } else if (std::dynamic_pointer_cast<Analyzer::ColumnVar>(e) &&
1227  !std::dynamic_pointer_cast<Analyzer::Var>(e)) {
1228  auto colvar = std::static_pointer_cast<Analyzer::ColumnVar>(e);
1229  const ColumnDescriptor* col_desc = catalog.getMetadataForColumn(
1230  colvar->get_table_id(), colvar->get_column_id());
1231  resname = col_desc->columnName;
1232  }
1233  if (e->get_type_info().get_type() == kNULLT) {
1234  throw std::runtime_error(
1235  "Untyped NULL in SELECT clause. Use CAST to specify a type.");
1236  }
1237  auto o = std::static_pointer_cast<Analyzer::UOper>(e);
1238  bool unnest = (o != nullptr && o->get_optype() == kUNNEST);
1239  auto tle = std::make_shared<Analyzer::TargetEntry>(resname, e, unnest);
1240  tlist.push_back(tle);
1241  }
1242  }
1243  }
1244 }
1245 
1246 void QuerySpec::analyze_from_clause(const Catalog_Namespace::Catalog& catalog,
1247  Analyzer::Query& query) const {
1249  for (auto& p : from_clause_) {
1250  const TableDescriptor* table_desc;
1251  table_desc = catalog.getMetadataForTable(*p->get_table_name());
1252  if (table_desc == nullptr) {
1253  throw std::runtime_error("Table " + *p->get_table_name() + " does not exist.");
1254  }
1255  std::string range_var;
1256  if (p->get_range_var() == nullptr) {
1257  range_var = *p->get_table_name();
1258  } else {
1259  range_var = *p->get_range_var();
1260  }
1261  rte = new Analyzer::RangeTableEntry(range_var, table_desc, nullptr);
1262  query.add_rte(rte);
1263  }
1264 }
1265 
1266 void QuerySpec::analyze(const Catalog_Namespace::Catalog& catalog,
1267  Analyzer::Query& query) const {
1268  query.set_is_distinct(is_distinct_);
1269  analyze_from_clause(catalog, query);
1270  analyze_select_clause(catalog, query);
1271  analyze_where_clause(catalog, query);
1272  analyze_group_by(catalog, query);
1273  analyze_having_clause(catalog, query);
1274 }
1275 
1276 namespace {
1277 
1278 // clean known escape'd chars without having to do a full json parse
1279 std::string unescape(std::string s) {
1280  boost::replace_all(s, "\\\\t", "\t");
1281  boost::replace_all(s, "\\t", "\t");
1282  boost::replace_all(s, "\\\\n", "\n");
1283  boost::replace_all(s, "\\n", "\n");
1284  return s;
1285 }
1286 
1287 void parse_options(const rapidjson::Value& payload,
1288  std::list<std::unique_ptr<NameValueAssign>>& nameValueList,
1289  bool stringToNull = false,
1290  bool stringToInteger = false) {
1291  if (payload.HasMember("options") && payload["options"].IsObject()) {
1292  for (const auto& option : payload["options"].GetObject()) {
1293  auto option_name = std::make_unique<std::string>(option.name.GetString());
1294  std::unique_ptr<Literal> literal_value;
1295  if (option.value.IsString()) {
1296  std::string str = option.value.GetString();
1297  if (stringToNull && str == "") {
1298  literal_value = std::make_unique<NullLiteral>();
1299  } else if (stringToInteger && std::all_of(str.begin(), str.end(), ::isdigit)) {
1300  int iVal = std::stoi(str);
1301  literal_value = std::make_unique<IntLiteral>(iVal);
1302  } else {
1303  // Rapidjson will deliberately provide escape'd strings when accessed
1304  // ... but the literal should have a copy of the raw unescaped string
1305  auto unique_literal_string = std::make_unique<std::string>(unescape(str));
1306  literal_value =
1307  std::make_unique<StringLiteral>(unique_literal_string.release());
1308  }
1309  } else if (option.value.IsInt() || option.value.IsInt64()) {
1310  literal_value = std::make_unique<IntLiteral>(json_i64(option.value));
1311  } else if (option.value.IsNull()) {
1312  literal_value = std::make_unique<NullLiteral>();
1313  } else {
1314  throw std::runtime_error("Unable to handle literal for " + *option_name);
1315  }
1316  CHECK(literal_value);
1317 
1318  nameValueList.emplace_back(std::make_unique<NameValueAssign>(
1319  option_name.release(), literal_value.release()));
1320  }
1321  }
1322 }
1323 } // namespace
1324 
1325 void SelectStmt::analyze(const Catalog_Namespace::Catalog& catalog,
1326  Analyzer::Query& query) const {
1327  query.set_stmt_type(kSELECT);
1328  query.set_limit(limit_);
1329  if (offset_ < 0) {
1330  throw std::runtime_error("OFFSET cannot be negative.");
1331  }
1332  query.set_offset(offset_);
1333  query_expr_->analyze(catalog, query);
1334  if (orderby_clause_.empty() && !query.get_is_distinct()) {
1335  query.set_order_by(nullptr);
1336  return;
1337  }
1338  const std::vector<std::shared_ptr<Analyzer::TargetEntry>>& tlist =
1339  query.get_targetlist();
1340  std::list<Analyzer::OrderEntry>* order_by = new std::list<Analyzer::OrderEntry>();
1341  if (!orderby_clause_.empty()) {
1342  for (auto& p : orderby_clause_) {
1343  int tle_no = p->get_colno();
1344  if (tle_no == 0) {
1345  // use column name
1346  // search through targetlist for matching name
1347  const std::string* name = p->get_column()->get_column();
1348  tle_no = 1;
1349  bool found = false;
1350  for (auto tle : tlist) {
1351  if (tle->get_resname() == *name) {
1352  found = true;
1353  break;
1354  }
1355  tle_no++;
1356  }
1357  if (!found) {
1358  throw std::runtime_error("invalid name in order by: " + *name);
1359  }
1360  }
1361  order_by->push_back(
1362  Analyzer::OrderEntry(tle_no, p->get_is_desc(), p->get_nulls_first()));
1363  }
1364  }
1365  if (query.get_is_distinct()) {
1366  // extend order_by to include all targetlist entries.
1367  for (int i = 1; i <= static_cast<int>(tlist.size()); i++) {
1368  bool in_orderby = false;
1369  std::for_each(order_by->begin(),
1370  order_by->end(),
1371  [&in_orderby, i](const Analyzer::OrderEntry& oe) {
1372  in_orderby = in_orderby || (i == oe.tle_no);
1373  });
1374  if (!in_orderby) {
1375  order_by->push_back(Analyzer::OrderEntry(i, false, false));
1376  }
1377  }
1378  }
1379  query.set_order_by(order_by);
1380 }
1381 
1382 std::string SelectEntry::to_string() const {
1383  std::string str = select_expr_->to_string();
1384  if (alias_ != nullptr) {
1385  str += " AS " + *alias_;
1386  }
1387  return str;
1388 }
1389 
1390 std::string TableRef::to_string() const {
1391  std::string str = *table_name_;
1392  if (range_var_ != nullptr) {
1393  str += " " + *range_var_;
1394  }
1395  return str;
1396 }
1397 
1398 std::string ColumnRef::to_string() const {
1399  std::string str;
1400  if (table_ == nullptr) {
1401  str = *column_;
1402  } else if (column_ == nullptr) {
1403  str = *table_ + ".*";
1404  } else {
1405  str = *table_ + "." + *column_;
1406  }
1407  return str;
1408 }
1409 
1410 std::string OperExpr::to_string() const {
1411  std::string op_str[] = {
1412  "=", "===", "<>", "<", ">", "<=", ">=", " AND ", " OR ", "NOT", "-", "+", "*", "/"};
1413  std::string str;
1414  if (optype_ == kUMINUS) {
1415  str = "-(" + left_->to_string() + ")";
1416  } else if (optype_ == kNOT) {
1417  str = "NOT (" + left_->to_string() + ")";
1418  } else if (optype_ == kARRAY_AT) {
1419  str = left_->to_string() + "[" + right_->to_string() + "]";
1420  } else if (optype_ == kUNNEST) {
1421  str = "UNNEST(" + left_->to_string() + ")";
1422  } else if (optype_ == kIN) {
1423  str = "(" + left_->to_string() + " IN " + right_->to_string() + ")";
1424  } else {
1425  str = "(" + left_->to_string() + op_str[optype_] + right_->to_string() + ")";
1426  }
1427  return str;
1428 }
1429 
1430 std::string InExpr::to_string() const {
1431  std::string str = arg_->to_string();
1432  if (is_not_) {
1433  str += " NOT IN ";
1434  } else {
1435  str += " IN ";
1436  }
1437  return str;
1438 }
1439 
1440 std::string ExistsExpr::to_string() const {
1441  return "EXISTS (" + query_->to_string() + ")";
1442 }
1443 
1444 std::string SubqueryExpr::to_string() const {
1445  std::string str;
1446  str = "(";
1447  str += query_->to_string();
1448  str += ")";
1449  return str;
1450 }
1451 
1452 std::string IsNullExpr::to_string() const {
1453  std::string str = arg_->to_string();
1454  if (is_not_) {
1455  str += " IS NOT NULL";
1456  } else {
1457  str += " IS NULL";
1458  }
1459  return str;
1460 }
1461 
1462 std::string InSubquery::to_string() const {
1463  std::string str = InExpr::to_string();
1464  str += subquery_->to_string();
1465  return str;
1466 }
1467 
1468 std::string InValues::to_string() const {
1469  std::string str = InExpr::to_string() + "(";
1470  bool notfirst = false;
1471  for (auto& p : value_list_) {
1472  if (notfirst) {
1473  str += ", ";
1474  } else {
1475  notfirst = true;
1476  }
1477  str += p->to_string();
1478  }
1479  str += ")";
1480  return str;
1481 }
1482 
1483 std::string BetweenExpr::to_string() const {
1484  std::string str = arg_->to_string();
1485  if (is_not_) {
1486  str += " NOT BETWEEN ";
1487  } else {
1488  str += " BETWEEN ";
1489  }
1490  str += lower_->to_string() + " AND " + upper_->to_string();
1491  return str;
1492 }
1493 
1494 std::string CharLengthExpr::to_string() const {
1495  std::string str;
1496  if (calc_encoded_length_) {
1497  str = "CHAR_LENGTH (" + arg_->to_string() + ")";
1498  } else {
1499  str = "LENGTH (" + arg_->to_string() + ")";
1500  }
1501  return str;
1502 }
1503 
1504 std::string CardinalityExpr::to_string() const {
1505  std::string str = "CARDINALITY(" + arg_->to_string() + ")";
1506  return str;
1507 }
1508 
1509 std::string LikeExpr::to_string() const {
1510  std::string str = arg_->to_string();
1511  if (is_not_) {
1512  str += " NOT LIKE ";
1513  } else {
1514  str += " LIKE ";
1515  }
1516  str += like_string_->to_string();
1517  if (escape_string_ != nullptr) {
1518  str += " ESCAPE " + escape_string_->to_string();
1519  }
1520  return str;
1521 }
1522 
1523 std::string RegexpExpr::to_string() const {
1524  std::string str = arg_->to_string();
1525  if (is_not_) {
1526  str += " NOT REGEXP ";
1527  } else {
1528  str += " REGEXP ";
1529  }
1530  str += pattern_string_->to_string();
1531  if (escape_string_ != nullptr) {
1532  str += " ESCAPE " + escape_string_->to_string();
1533  }
1534  return str;
1535 }
1536 
1537 std::string WidthBucketExpr::to_string() const {
1538  std::string str = " WIDTH_BUCKET ";
1539  str += target_value_->to_string();
1540  str += " ";
1541  str += lower_bound_->to_string();
1542  str += " ";
1543  str += upper_bound_->to_string();
1544  str += " ";
1545  str += partition_count_->to_string();
1546  str += " ";
1547  return str;
1548 }
1549 
1550 std::string LikelihoodExpr::to_string() const {
1551  std::string str = " LIKELIHOOD ";
1552  str += arg_->to_string();
1553  str += " ";
1554  str += boost::lexical_cast<std::string>(is_not_ ? 1.0 - likelihood_ : likelihood_);
1555  return str;
1556 }
1557 
1558 std::string FunctionRef::to_string() const {
1559  std::string str = *name_ + "(";
1560  if (distinct_) {
1561  str += "DISTINCT ";
1562  }
1563  if (arg_ == nullptr) {
1564  str += "*)";
1565  } else {
1566  str += arg_->to_string() + ")";
1567  }
1568  return str;
1569 }
1570 
1571 std::string QuerySpec::to_string() const {
1572  std::string query_str = "SELECT ";
1573  if (is_distinct_) {
1574  query_str += "DISTINCT ";
1575  }
1576  if (select_clause_.empty()) {
1577  query_str += "* ";
1578  } else {
1579  bool notfirst = false;
1580  for (auto& p : select_clause_) {
1581  if (notfirst) {
1582  query_str += ", ";
1583  } else {
1584  notfirst = true;
1585  }
1586  query_str += p->to_string();
1587  }
1588  }
1589  query_str += " FROM ";
1590  bool notfirst = false;
1591  for (auto& p : from_clause_) {
1592  if (notfirst) {
1593  query_str += ", ";
1594  } else {
1595  notfirst = true;
1596  }
1597  query_str += p->to_string();
1598  }
1599  if (where_clause_) {
1600  query_str += " WHERE " + where_clause_->to_string();
1601  }
1602  if (!groupby_clause_.empty()) {
1603  query_str += " GROUP BY ";
1604  bool notfirst = false;
1605  for (auto& p : groupby_clause_) {
1606  if (notfirst) {
1607  query_str += ", ";
1608  } else {
1609  notfirst = true;
1610  }
1611  query_str += p->to_string();
1612  }
1613  }
1614  if (having_clause_) {
1615  query_str += " HAVING " + having_clause_->to_string();
1616  }
1617  query_str += ";";
1618  return query_str;
1619 }
1620 
1621 void InsertStmt::analyze(const Catalog_Namespace::Catalog& catalog,
1622  Analyzer::Query& query) const {
1623  query.set_stmt_type(kINSERT);
1624  const TableDescriptor* td = catalog.getMetadataForTable(*table_);
1625  if (td == nullptr) {
1626  throw std::runtime_error("Table " + *table_ + " does not exist.");
1627  }
1628  if (td->isView) {
1629  throw std::runtime_error("Insert to views is not supported yet.");
1630  }
1632  query.set_result_table_id(td->tableId);
1633  std::list<int> result_col_list;
1634  if (column_list_.empty()) {
1635  const std::list<const ColumnDescriptor*> all_cols =
1636  catalog.getAllColumnMetadataForTable(td->tableId, false, false, true);
1637  for (auto cd : all_cols) {
1638  result_col_list.push_back(cd->columnId);
1639  }
1640  } else {
1641  for (auto& c : column_list_) {
1642  const ColumnDescriptor* cd = catalog.getMetadataForColumn(td->tableId, *c);
1643  if (cd == nullptr) {
1644  throw std::runtime_error("Column " + *c + " does not exist.");
1645  }
1646  result_col_list.push_back(cd->columnId);
1647  const auto& col_ti = cd->columnType;
1648  if (col_ti.get_physical_cols() > 0) {
1649  CHECK(cd->columnType.is_geometry());
1650  for (auto i = 1; i <= col_ti.get_physical_cols(); i++) {
1651  const ColumnDescriptor* pcd =
1652  catalog.getMetadataForColumn(td->tableId, cd->columnId + i);
1653  if (pcd == nullptr) {
1654  throw std::runtime_error("Column " + *c + "'s metadata is incomplete.");
1655  }
1656  result_col_list.push_back(pcd->columnId);
1657  }
1658  }
1659  }
1660  }
1661  query.set_result_col_list(result_col_list);
1662 }
1663 
1664 size_t InsertValuesStmt::determineLeafIndex(const Catalog_Namespace::Catalog& catalog,
1665  size_t num_leafs) {
1666  const TableDescriptor* td = catalog.getMetadataForTable(*table_);
1667  if (td == nullptr) {
1668  throw std::runtime_error("Table " + *table_ + " does not exist.");
1669  }
1670  if (td->isView) {
1671  throw std::runtime_error("Insert to views is not supported yet.");
1672  }
1674  if (td->partitions == "REPLICATED") {
1675  throw std::runtime_error("Cannot determine leaf on replicated table.");
1676  }
1677 
1678  if (0 == td->nShards) {
1679  std::random_device rd;
1680  std::mt19937_64 gen(rd());
1681  std::uniform_int_distribution<size_t> dis;
1682  const auto leaf_idx = dis(gen) % num_leafs;
1683  return leaf_idx;
1684  }
1685 
1686  size_t indexOfShardColumn = 0;
1687  const ColumnDescriptor* shardColumn = catalog.getShardColumnMetadataForTable(td);
1688  CHECK(shardColumn);
1689  auto shard_count = td->nShards * num_leafs;
1690  int64_t shardId = 0;
1691 
1692  if (column_list_.empty()) {
1693  auto all_cols =
1694  catalog.getAllColumnMetadataForTable(td->tableId, false, false, false);
1695  auto iter = std::find(all_cols.begin(), all_cols.end(), shardColumn);
1696  CHECK(iter != all_cols.end());
1697  indexOfShardColumn = std::distance(all_cols.begin(), iter);
1698  } else {
1699  for (auto& c : column_list_) {
1700  if (*c == shardColumn->columnName) {
1701  break;
1702  }
1703  indexOfShardColumn++;
1704  }
1705 
1706  if (indexOfShardColumn == column_list_.size()) {
1708  shard_count);
1709  return shardId / td->nShards;
1710  }
1711  }
1712 
1713  if (indexOfShardColumn >= value_list_.size()) {
1714  throw std::runtime_error("No value defined for shard column.");
1715  }
1716 
1717  auto& shardColumnValueExpr = *(std::next(value_list_.begin(), indexOfShardColumn));
1718 
1719  Analyzer::Query query;
1720  auto e = shardColumnValueExpr->analyze(catalog, query);
1721  e = e->add_cast(shardColumn->columnType);
1722  const Analyzer::Constant* con = dynamic_cast<Analyzer::Constant*>(e.get());
1723  if (!con) {
1724  auto col_cast = dynamic_cast<const Analyzer::UOper*>(e.get());
1725  CHECK(col_cast);
1726  CHECK_EQ(kCAST, col_cast->get_optype());
1727  con = dynamic_cast<const Analyzer::Constant*>(col_cast->get_operand());
1728  }
1729  CHECK(con);
1730 
1731  Datum d = con->get_constval();
1732  if (con->get_is_null()) {
1734  shard_count);
1735  } else if (shardColumn->columnType.is_string()) {
1736  auto dictDesc =
1737  catalog.getMetadataForDict(shardColumn->columnType.get_comp_param(), true);
1738  auto str_id = dictDesc->stringDict->getOrAdd(*d.stringval);
1739  bool invalid = false;
1740 
1741  if (4 == shardColumn->columnType.get_size()) {
1742  invalid = str_id > max_valid_int_value<int32_t>();
1743  } else if (2 == shardColumn->columnType.get_size()) {
1744  invalid = str_id > max_valid_int_value<uint16_t>();
1745  } else if (1 == shardColumn->columnType.get_size()) {
1746  invalid = str_id > max_valid_int_value<uint8_t>();
1747  }
1748 
1749  if (invalid || str_id == inline_int_null_value<int32_t>()) {
1750  str_id = inline_fixed_encoding_null_val(shardColumn->columnType);
1751  }
1752  shardId = SHARD_FOR_KEY(str_id, shard_count);
1753  } else {
1754  switch (shardColumn->columnType.get_logical_size()) {
1755  case 8:
1756  shardId = SHARD_FOR_KEY(d.bigintval, shard_count);
1757  break;
1758  case 4:
1759  shardId = SHARD_FOR_KEY(d.intval, shard_count);
1760  break;
1761  case 2:
1762  shardId = SHARD_FOR_KEY(d.smallintval, shard_count);
1763  break;
1764  case 1:
1765  shardId = SHARD_FOR_KEY(d.tinyintval, shard_count);
1766  break;
1767  default:
1768  CHECK(false);
1769  }
1770  }
1771 
1772  return shardId / td->nShards;
1773 }
1774 
1775 void InsertValuesStmt::analyze(const Catalog_Namespace::Catalog& catalog,
1776  Analyzer::Query& query) const {
1777  InsertStmt::analyze(catalog, query);
1778  std::vector<std::shared_ptr<Analyzer::TargetEntry>>& tlist =
1779  query.get_targetlist_nonconst();
1780  const auto tableId = query.get_result_table_id();
1781  if (!column_list_.empty()) {
1782  if (value_list_.size() != column_list_.size()) {
1783  throw std::runtime_error(
1784  "Numbers of columns and values don't match for the "
1785  "insert.");
1786  }
1787  } else {
1788  const std::list<const ColumnDescriptor*> non_phys_cols =
1789  catalog.getAllColumnMetadataForTable(tableId, false, false, false);
1790  if (non_phys_cols.size() != value_list_.size()) {
1791  throw std::runtime_error(
1792  "Number of columns in table does not match the list of values given in the "
1793  "insert.");
1794  }
1795  }
1796  std::list<int>::const_iterator it = query.get_result_col_list().begin();
1797  for (auto& v : value_list_) {
1798  auto e = v->analyze(catalog, query);
1799  const ColumnDescriptor* cd =
1800  catalog.getMetadataForColumn(query.get_result_table_id(), *it);
1801  CHECK(cd);
1802  if (cd->columnType.get_notnull()) {
1803  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(e);
1804  if (c != nullptr && c->get_is_null()) {
1805  throw std::runtime_error("Cannot insert NULL into column " + cd->columnName);
1806  }
1807  }
1808  e = e->add_cast(cd->columnType);
1809  tlist.emplace_back(new Analyzer::TargetEntry("", e, false));
1810  ++it;
1811 
1812  const auto& col_ti = cd->columnType;
1813  if (col_ti.get_physical_cols() > 0) {
1814  CHECK(cd->columnType.is_geometry());
1815  auto c = dynamic_cast<const Analyzer::Constant*>(e.get());
1816  if (!c) {
1817  auto uoper = std::dynamic_pointer_cast<Analyzer::UOper>(e);
1818  if (uoper && uoper->get_optype() == kCAST) {
1819  c = dynamic_cast<const Analyzer::Constant*>(uoper->get_operand());
1820  }
1821  }
1822  bool is_null = false;
1823  std::string* geo_string{nullptr};
1824  if (c) {
1825  is_null = c->get_is_null();
1826  if (!is_null) {
1827  geo_string = c->get_constval().stringval;
1828  }
1829  }
1830  if (!is_null && !geo_string) {
1831  throw std::runtime_error("Expecting a WKT or WKB hex string for column " +
1832  cd->columnName);
1833  }
1834  std::vector<double> coords;
1835  std::vector<double> bounds;
1836  std::vector<int> ring_sizes;
1837  std::vector<int> poly_rings;
1838  int render_group =
1839  0; // @TODO simon.eves where to get render_group from in this context?!
1840  SQLTypeInfo import_ti{cd->columnType};
1841  if (!is_null) {
1843  *geo_string, import_ti, coords, bounds, ring_sizes, poly_rings)) {
1844  throw std::runtime_error("Cannot read geometry to insert into column " +
1845  cd->columnName);
1846  }
1847  if (coords.empty()) {
1848  // Importing from geo_string WKT resulted in empty coords: dealing with a NULL
1849  is_null = true;
1850  }
1851  if (cd->columnType.get_type() != import_ti.get_type()) {
1852  // allow POLYGON to be inserted into MULTIPOLYGON column
1853  if (!(import_ti.get_type() == SQLTypes::kPOLYGON &&
1855  throw std::runtime_error(
1856  "Imported geometry doesn't match the type of column " + cd->columnName);
1857  }
1858  }
1859  } else {
1860  // Special case for NULL POINT, push NULL representation to coords
1861  if (cd->columnType.get_type() == kPOINT) {
1862  if (!coords.empty()) {
1863  throw std::runtime_error("NULL POINT with unexpected coordinates in column " +
1864  cd->columnName);
1865  }
1866  coords.push_back(NULL_ARRAY_DOUBLE);
1867  coords.push_back(NULL_DOUBLE);
1868  }
1869  }
1870 
1871  // TODO: check if import SRID matches columns SRID, may need to transform before
1872  // inserting
1873 
1874  int nextColumnOffset = 1;
1875 
1876  const ColumnDescriptor* cd_coords = catalog.getMetadataForColumn(
1877  query.get_result_table_id(), cd->columnId + nextColumnOffset);
1878  CHECK(cd_coords);
1879  CHECK_EQ(cd_coords->columnType.get_type(), kARRAY);
1880  CHECK_EQ(cd_coords->columnType.get_subtype(), kTINYINT);
1881  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
1882  if (!is_null || cd->columnType.get_type() == kPOINT) {
1883  auto compressed_coords = Geospatial::compress_coords(coords, col_ti);
1884  for (auto cc : compressed_coords) {
1885  Datum d;
1886  d.tinyintval = cc;
1887  auto e = makeExpr<Analyzer::Constant>(kTINYINT, false, d);
1888  value_exprs.push_back(e);
1889  }
1890  }
1891  tlist.emplace_back(new Analyzer::TargetEntry(
1892  "",
1893  makeExpr<Analyzer::Constant>(cd_coords->columnType, is_null, value_exprs),
1894  false));
1895  ++it;
1896  nextColumnOffset++;
1897 
1898  if (cd->columnType.get_type() == kPOLYGON ||
1899  cd->columnType.get_type() == kMULTIPOLYGON) {
1900  // Put ring sizes array into separate physical column
1901  const ColumnDescriptor* cd_ring_sizes = catalog.getMetadataForColumn(
1902  query.get_result_table_id(), cd->columnId + nextColumnOffset);
1903  CHECK(cd_ring_sizes);
1904  CHECK_EQ(cd_ring_sizes->columnType.get_type(), kARRAY);
1905  CHECK_EQ(cd_ring_sizes->columnType.get_subtype(), kINT);
1906  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
1907  if (!is_null) {
1908  for (auto c : ring_sizes) {
1909  Datum d;
1910  d.intval = c;
1911  auto e = makeExpr<Analyzer::Constant>(kINT, false, d);
1912  value_exprs.push_back(e);
1913  }
1914  }
1915  tlist.emplace_back(new Analyzer::TargetEntry(
1916  "",
1917  makeExpr<Analyzer::Constant>(cd_ring_sizes->columnType, is_null, value_exprs),
1918  false));
1919  ++it;
1920  nextColumnOffset++;
1921 
1922  if (cd->columnType.get_type() == kMULTIPOLYGON) {
1923  // Put poly_rings array into separate physical column
1924  const ColumnDescriptor* cd_poly_rings = catalog.getMetadataForColumn(
1925  query.get_result_table_id(), cd->columnId + nextColumnOffset);
1926  CHECK(cd_poly_rings);
1927  CHECK_EQ(cd_poly_rings->columnType.get_type(), kARRAY);
1928  CHECK_EQ(cd_poly_rings->columnType.get_subtype(), kINT);
1929  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
1930  if (!is_null) {
1931  for (auto c : poly_rings) {
1932  Datum d;
1933  d.intval = c;
1934  auto e = makeExpr<Analyzer::Constant>(kINT, false, d);
1935  value_exprs.push_back(e);
1936  }
1937  }
1938  tlist.emplace_back(new Analyzer::TargetEntry(
1939  "",
1940  makeExpr<Analyzer::Constant>(
1941  cd_poly_rings->columnType, is_null, value_exprs),
1942  false));
1943  ++it;
1944  nextColumnOffset++;
1945  }
1946  }
1947 
1948  if (cd->columnType.get_type() == kLINESTRING ||
1949  cd->columnType.get_type() == kPOLYGON ||
1950  cd->columnType.get_type() == kMULTIPOLYGON) {
1951  const ColumnDescriptor* cd_bounds = catalog.getMetadataForColumn(
1952  query.get_result_table_id(), cd->columnId + nextColumnOffset);
1953  CHECK(cd_bounds);
1954  CHECK_EQ(cd_bounds->columnType.get_type(), kARRAY);
1955  CHECK_EQ(cd_bounds->columnType.get_subtype(), kDOUBLE);
1956  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
1957  if (!is_null) {
1958  for (auto b : bounds) {
1959  Datum d;
1960  d.doubleval = b;
1961  auto e = makeExpr<Analyzer::Constant>(kDOUBLE, false, d);
1962  value_exprs.push_back(e);
1963  }
1964  }
1965  tlist.emplace_back(new Analyzer::TargetEntry(
1966  "",
1967  makeExpr<Analyzer::Constant>(cd_bounds->columnType, is_null, value_exprs),
1968  false));
1969  ++it;
1970  nextColumnOffset++;
1971  }
1972 
1973  if (cd->columnType.get_type() == kPOLYGON ||
1974  cd->columnType.get_type() == kMULTIPOLYGON) {
1975  // Put render group into separate physical column
1976  const ColumnDescriptor* cd_render_group = catalog.getMetadataForColumn(
1977  query.get_result_table_id(), cd->columnId + nextColumnOffset);
1978  CHECK(cd_render_group);
1979  CHECK_EQ(cd_render_group->columnType.get_type(), kINT);
1980  Datum d;
1981  d.intval = render_group;
1982  tlist.emplace_back(new Analyzer::TargetEntry(
1983  "",
1984  makeExpr<Analyzer::Constant>(cd_render_group->columnType, is_null, d),
1985  false));
1986  ++it;
1987  nextColumnOffset++;
1988  }
1989  }
1990  }
1991 }
1992 
1993 void InsertValuesStmt::execute(const Catalog_Namespace::SessionInfo& session) {
1994  auto& catalog = session.getCatalog();
1995 
1998  *table_)) {
1999  throw std::runtime_error("User has no insert privileges on " + *table_ + ".");
2000  }
2001 
2002  auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
2005 
2006  Analyzer::Query query;
2007  analyze(catalog, query);
2008 
2009  // Acquire schema write lock -- leave data lock so the fragmenter can checkpoint. For
2010  // singleton inserts we just take a write lock on the schema, which prevents concurrent
2011  // inserts.
2012  auto result_table_id = query.get_result_table_id();
2013  const auto td_with_lock =
2015  catalog, result_table_id);
2016  // NOTE(max): we do the same checks as below just a few calls earlier in analyze().
2017  // Do we keep those intentionally to make sure nothing changed in between w/o
2018  // catalog locks or is it just a duplicate work?
2019  auto td = td_with_lock();
2020  CHECK(td);
2021  if (td->isView) {
2022  throw std::runtime_error("Singleton inserts on views is not supported.");
2023  }
2025 
2027  RelAlgExecutor ra_executor(executor.get(), catalog);
2028 
2029  ra_executor.executeSimpleInsert(query);
2030 }
2031 
2032 void UpdateStmt::analyze(const Catalog_Namespace::Catalog& catalog,
2033  Analyzer::Query& query) const {
2034  throw std::runtime_error("UPDATE statement not supported yet.");
2035 }
2036 
2037 void DeleteStmt::analyze(const Catalog_Namespace::Catalog& catalog,
2038  Analyzer::Query& query) const {
2039  throw std::runtime_error("DELETE statement not supported yet.");
2040 }
2041 
2042 namespace {
2043 
2045  const auto& col_ti = cd.columnType;
2046  if (col_ti.is_integer() ||
2047  (col_ti.is_string() && col_ti.get_compression() == kENCODING_DICT) ||
2048  col_ti.is_time()) {
2049  if (cd.default_value.has_value()) {
2050  throw std::runtime_error("Default values for shard keys are not supported yet.");
2051  }
2052  } else {
2053  throw std::runtime_error("Cannot shard on type " + col_ti.get_type_name() +
2054  ", encoding " + col_ti.get_compression_name());
2055  }
2056 }
2057 
2058 size_t shard_column_index(const std::string& name,
2059  const std::list<ColumnDescriptor>& columns) {
2060  size_t index = 1;
2061  for (const auto& cd : columns) {
2062  if (cd.columnName == name) {
2064  return index;
2065  }
2066  ++index;
2067  if (cd.columnType.is_geometry()) {
2068  index += cd.columnType.get_physical_cols();
2069  }
2070  }
2071  // Not found, return 0
2072  return 0;
2073 }
2074 
2075 size_t sort_column_index(const std::string& name,
2076  const std::list<ColumnDescriptor>& columns) {
2077  size_t index = 1;
2078  for (const auto& cd : columns) {
2079  if (boost::to_upper_copy<std::string>(cd.columnName) == name) {
2080  return index;
2081  }
2082  ++index;
2083  if (cd.columnType.is_geometry()) {
2084  index += cd.columnType.get_physical_cols();
2085  }
2086  }
2087  // Not found, return 0
2088  return 0;
2089 }
2090 
2091 void set_string_field(rapidjson::Value& obj,
2092  const std::string& field_name,
2093  const std::string& field_value,
2094  rapidjson::Document& document) {
2095  rapidjson::Value field_name_json_str;
2096  field_name_json_str.SetString(
2097  field_name.c_str(), field_name.size(), document.GetAllocator());
2098  rapidjson::Value field_value_json_str;
2099  field_value_json_str.SetString(
2100  field_value.c_str(), field_value.size(), document.GetAllocator());
2101  obj.AddMember(field_name_json_str, field_value_json_str, document.GetAllocator());
2102 }
2103 
2105  const ShardKeyDef* shard_key_def,
2106  const std::vector<SharedDictionaryDef>& shared_dict_defs) {
2107  rapidjson::Document document;
2108  auto& allocator = document.GetAllocator();
2109  rapidjson::Value arr(rapidjson::kArrayType);
2110  if (shard_key_def) {
2111  rapidjson::Value shard_key_obj(rapidjson::kObjectType);
2112  set_string_field(shard_key_obj, "type", "SHARD KEY", document);
2113  set_string_field(shard_key_obj, "name", shard_key_def->get_column(), document);
2114  arr.PushBack(shard_key_obj, allocator);
2115  }
2116  for (const auto& shared_dict_def : shared_dict_defs) {
2117  rapidjson::Value shared_dict_obj(rapidjson::kObjectType);
2118  set_string_field(shared_dict_obj, "type", "SHARED DICTIONARY", document);
2119  set_string_field(shared_dict_obj, "name", shared_dict_def.get_column(), document);
2121  shared_dict_obj, "foreign_table", shared_dict_def.get_foreign_table(), document);
2122  set_string_field(shared_dict_obj,
2123  "foreign_column",
2124  shared_dict_def.get_foreign_column(),
2125  document);
2126  arr.PushBack(shared_dict_obj, allocator);
2127  }
2128  rapidjson::StringBuffer buffer;
2129  rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
2130  arr.Accept(writer);
2131  return buffer.GetString();
2132 }
2133 
2134 template <typename LITERAL_TYPE,
2135  typename ASSIGNMENT,
2138  ASSIGNMENT op,
2139  VALIDATE validate = VALIDATE()) {
2140  const auto val = validate(p);
2141  return op(val);
2142 }
2143 
2146  const std::list<ColumnDescriptor>& columns) {
2147  auto assignment = [&td](const auto val) { td.storageType = val; };
2148  return get_property_value<StringLiteral, decltype(assignment), CaseSensitiveValidate>(
2149  p, assignment);
2150 }
2151 
2154  const std::list<ColumnDescriptor>& columns) {
2155  return get_property_value<IntLiteral>(p,
2156  [&td](const auto val) { td.maxFragRows = val; });
2157 }
2158 
2161  const std::list<ColumnDescriptor>& columns) {
2162  return get_property_value<IntLiteral>(
2163  p, [&df_td](const auto val) { df_td.maxFragRows = val; });
2164 }
2165 
2168  const std::list<ColumnDescriptor>& columns) {
2169  return get_property_value<IntLiteral>(p,
2170  [&td](const auto val) { td.maxChunkSize = val; });
2171 }
2172 
2174  DataframeTableDescriptor& df_td,
2176  const std::list<ColumnDescriptor>& columns) {
2177  return get_property_value<IntLiteral>(
2178  p, [&df_td](const auto val) { df_td.maxChunkSize = val; });
2179 }
2180 
2183  const std::list<ColumnDescriptor>& columns) {
2184  return get_property_value<StringLiteral>(p, [&df_td](const auto val) {
2185  if (val.size() != 1) {
2186  throw std::runtime_error("Length of DELIMITER must be equal to 1.");
2187  }
2188  df_td.delimiter = val;
2189  });
2190 }
2191 
2194  const std::list<ColumnDescriptor>& columns) {
2195  return get_property_value<StringLiteral>(p, [&df_td](const auto val) {
2196  if (val == "FALSE") {
2197  df_td.hasHeader = false;
2198  } else if (val == "TRUE") {
2199  df_td.hasHeader = true;
2200  } else {
2201  throw std::runtime_error("Option HEADER support only 'true' or 'false' values.");
2202  }
2203  });
2204 }
2205 
2208  const std::list<ColumnDescriptor>& columns) {
2209  return get_property_value<IntLiteral>(p,
2210  [&td](const auto val) { td.fragPageSize = val; });
2211 }
2214  const std::list<ColumnDescriptor>& columns) {
2215  return get_property_value<IntLiteral>(p, [&td](const auto val) { td.maxRows = val; });
2216 }
2217 
2220  const std::list<ColumnDescriptor>& columns) {
2221  return get_property_value<IntLiteral>(
2222  p, [&df_td](const auto val) { df_td.skipRows = val; });
2223 }
2224 
2227  const std::list<ColumnDescriptor>& columns) {
2228  return get_property_value<StringLiteral>(p, [&td](const auto partitions_uc) {
2229  if (partitions_uc != "SHARDED" && partitions_uc != "REPLICATED") {
2230  throw std::runtime_error("PARTITIONS must be SHARDED or REPLICATED");
2231  }
2232  if (td.shardedColumnId != 0 && partitions_uc == "REPLICATED") {
2233  throw std::runtime_error(
2234  "A table cannot be sharded and replicated at the same time");
2235  };
2236  td.partitions = partitions_uc;
2237  });
2238 }
2241  const std::list<ColumnDescriptor>& columns) {
2242  if (!td.shardedColumnId) {
2243  throw std::runtime_error("SHARD KEY must be defined.");
2244  }
2245  return get_property_value<IntLiteral>(p, [&td](const auto shard_count) {
2246  if (g_leaf_count && shard_count % g_leaf_count) {
2247  throw std::runtime_error(
2248  "SHARD_COUNT must be a multiple of the number of leaves in the cluster.");
2249  }
2250  td.nShards = g_leaf_count ? shard_count / g_leaf_count : shard_count;
2251  if (!td.shardedColumnId && !td.nShards) {
2252  throw std::runtime_error(
2253  "Must specify the number of shards through the SHARD_COUNT option");
2254  };
2255  });
2256 }
2257 
2258 decltype(auto) get_vacuum_def(TableDescriptor& td,
2260  const std::list<ColumnDescriptor>& columns) {
2261  return get_property_value<StringLiteral>(p, [&td](const auto vacuum_uc) {
2262  if (vacuum_uc != "IMMEDIATE" && vacuum_uc != "DELAYED") {
2263  throw std::runtime_error("VACUUM must be IMMEDIATE or DELAYED");
2264  }
2265  td.hasDeletedCol = boost::iequals(vacuum_uc, "IMMEDIATE") ? false : true;
2266  });
2267 }
2268 
2271  const std::list<ColumnDescriptor>& columns) {
2272  return get_property_value<StringLiteral>(p, [&td, &columns](const auto sort_upper) {
2273  td.sortedColumnId = sort_column_index(sort_upper, columns);
2274  if (!td.sortedColumnId) {
2275  throw std::runtime_error("Specified sort column " + sort_upper + " doesn't exist");
2276  }
2277  });
2278 }
2279 
2282  const std::list<ColumnDescriptor>& columns) {
2283  auto assignment = [&td](const auto val) {
2284  td.maxRollbackEpochs =
2285  val < 0 ? -1 : val; // Anything < 0 means unlimited rollbacks. Note that 0
2286  // still means keeping a shadow copy of data/metdata
2287  // between epochs so bad writes can be rolled back
2288  };
2289  return get_property_value<IntLiteral, decltype(assignment), PositiveOrZeroValidate>(
2290  p, assignment);
2291 }
2292 
2293 static const std::map<const std::string, const TableDefFuncPtr> tableDefFuncMap = {
2294  {"fragment_size"s, get_frag_size_def},
2295  {"max_chunk_size"s, get_max_chunk_size_def},
2296  {"page_size"s, get_page_size_def},
2297  {"max_rows"s, get_max_rows_def},
2298  {"partitions"s, get_partions_def},
2299  {"shard_count"s, get_shard_count_def},
2300  {"vacuum"s, get_vacuum_def},
2301  {"sort_column"s, get_sort_column_def},
2302  {"storage_type"s, get_storage_type},
2303  {"max_rollback_epochs", get_max_rollback_epochs_def}};
2304 
2306  const std::unique_ptr<NameValueAssign>& p,
2307  const std::list<ColumnDescriptor>& columns) {
2308  const auto it = tableDefFuncMap.find(boost::to_lower_copy<std::string>(*p->get_name()));
2309  if (it == tableDefFuncMap.end()) {
2310  throw std::runtime_error(
2311  "Invalid CREATE TABLE option " + *p->get_name() +
2312  ". Should be FRAGMENT_SIZE, MAX_CHUNK_SIZE, PAGE_SIZE, MAX_ROLLBACK_EPOCHS, "
2313  "MAX_ROWS, "
2314  "PARTITIONS, SHARD_COUNT, VACUUM, SORT_COLUMN, STORAGE_TYPE.");
2315  }
2316  return it->second(td, p.get(), columns);
2317 }
2318 
2320  const std::unique_ptr<NameValueAssign>& p,
2321  const std::list<ColumnDescriptor>& columns) {
2322  const auto it = tableDefFuncMap.find(boost::to_lower_copy<std::string>(*p->get_name()));
2323  if (it == tableDefFuncMap.end()) {
2324  throw std::runtime_error(
2325  "Invalid CREATE TABLE AS option " + *p->get_name() +
2326  ". Should be FRAGMENT_SIZE, MAX_CHUNK_SIZE, PAGE_SIZE, MAX_ROLLBACK_EPOCHS, "
2327  "MAX_ROWS, "
2328  "PARTITIONS, SHARD_COUNT, VACUUM, SORT_COLUMN, STORAGE_TYPE or "
2329  "USE_SHARED_DICTIONARIES.");
2330  }
2331  return it->second(td, p.get(), columns);
2332 }
2333 
2334 static const std::map<const std::string, const DataframeDefFuncPtr> dataframeDefFuncMap =
2335  {{"fragment_size"s, get_frag_size_dataframe_def},
2336  {"max_chunk_size"s, get_max_chunk_size_dataframe_def},
2337  {"skip_rows"s, get_skip_rows_def},
2338  {"delimiter"s, get_delimiter_def},
2339  {"header"s, get_header_def}};
2340 
2342  const std::unique_ptr<NameValueAssign>& p,
2343  const std::list<ColumnDescriptor>& columns) {
2344  const auto it =
2345  dataframeDefFuncMap.find(boost::to_lower_copy<std::string>(*p->get_name()));
2346  if (it == dataframeDefFuncMap.end()) {
2347  throw std::runtime_error(
2348  "Invalid CREATE DATAFRAME option " + *p->get_name() +
2349  ". Should be FRAGMENT_SIZE, MAX_CHUNK_SIZE, SKIP_ROWS, DELIMITER or HEADER.");
2350  }
2351  return it->second(df_td, p.get(), columns);
2352 }
2353 
2354 std::unique_ptr<ColumnDef> column_from_json(const rapidjson::Value& element) {
2355  CHECK(element.HasMember("name"));
2356  auto col_name = std::make_unique<std::string>(json_str(element["name"]));
2357  CHECK(element.HasMember("sqltype"));
2358  const auto sql_types = to_sql_type(json_str(element["sqltype"]));
2359 
2360  // decimal / numeric precision / scale
2361  int precision = -1;
2362  int scale = -1;
2363  if (element.HasMember("precision")) {
2364  precision = json_i64(element["precision"]);
2365  }
2366  if (element.HasMember("scale")) {
2367  scale = json_i64(element["scale"]);
2368  }
2369 
2370  std::optional<int64_t> array_size;
2371  if (element.HasMember("arraySize")) {
2372  // We do not yet support geo arrays
2373  array_size = json_i64(element["arraySize"]);
2374  }
2375  std::unique_ptr<SQLType> sql_type;
2376  if (element.HasMember("subtype")) {
2377  CHECK(element.HasMember("coordinateSystem"));
2378  const auto subtype_sql_types = to_sql_type(json_str(element["subtype"]));
2379  sql_type =
2380  std::make_unique<SQLType>(subtype_sql_types,
2381  static_cast<int>(sql_types),
2382  static_cast<int>(json_i64(element["coordinateSystem"])),
2383  false);
2384  } else if (precision > 0 && scale > 0) {
2385  sql_type = std::make_unique<SQLType>(sql_types,
2386  precision,
2387  scale,
2388  /*is_array=*/array_size.has_value(),
2389  array_size ? *array_size : -1);
2390  } else if (precision > 0) {
2391  sql_type = std::make_unique<SQLType>(sql_types,
2392  precision,
2393  0,
2394  /*is_array=*/array_size.has_value(),
2395  array_size ? *array_size : -1);
2396  } else {
2397  sql_type = std::make_unique<SQLType>(sql_types,
2398  /*is_array=*/array_size.has_value(),
2399  array_size ? *array_size : -1);
2400  }
2401  CHECK(sql_type);
2402 
2403  CHECK(element.HasMember("nullable"));
2404  const auto nullable = json_bool(element["nullable"]);
2405  std::unique_ptr<ColumnConstraintDef> constraint_def;
2406  StringLiteral* str_literal = nullptr;
2407  if (element.HasMember("default") && !element["default"].IsNull()) {
2408  std::string* defaultval = new std::string(json_str(element["default"]));
2409  boost::algorithm::trim_if(*defaultval, boost::is_any_of(" \"'`"));
2410  str_literal = new StringLiteral(defaultval);
2411  }
2412 
2413  constraint_def = std::make_unique<ColumnConstraintDef>(/*notnull=*/!nullable,
2414  /*unique=*/false,
2415  /*primarykey=*/false,
2416  /*defaultval=*/str_literal);
2417  std::unique_ptr<CompressDef> compress_def;
2418  if (element.HasMember("encodingType") && !element["encodingType"].IsNull()) {
2419  std::string encoding_type = json_str(element["encodingType"]);
2420  CHECK(element.HasMember("encodingSize"));
2421  auto encoding_name = std::make_unique<std::string>(json_str(element["encodingType"]));
2422  compress_def = std::make_unique<CompressDef>(encoding_name.release(),
2423  json_i64(element["encodingSize"]));
2424  }
2425  return std::make_unique<ColumnDef>(col_name.release(),
2426  sql_type.release(),
2427  compress_def ? compress_def.release() : nullptr,
2428  constraint_def ? constraint_def.release() : nullptr);
2429 }
2430 
2431 void parse_elements(const rapidjson::Value& payload,
2432  std::string element_name,
2433  std::string& table_name,
2434  std::list<std::unique_ptr<TableElement>>& table_element_list) {
2435  const auto elements = payload[element_name].GetArray();
2436  for (const auto& element : elements) {
2437  CHECK(element.IsObject());
2438  CHECK(element.HasMember("type"));
2439  if (json_str(element["type"]) == "SQL_COLUMN_DECLARATION") {
2440  auto col_def = column_from_json(element);
2441  table_element_list.emplace_back(std::move(col_def));
2442  } else if (json_str(element["type"]) == "SQL_COLUMN_CONSTRAINT") {
2443  CHECK(element.HasMember("name"));
2444  if (json_str(element["name"]) == "SHARD_KEY") {
2445  CHECK(element.HasMember("columns"));
2446  CHECK(element["columns"].IsArray());
2447  const auto& columns = element["columns"].GetArray();
2448  if (columns.Size() != size_t(1)) {
2449  throw std::runtime_error("Only one shard column is currently supported.");
2450  }
2451  auto shard_key_def = std::make_unique<ShardKeyDef>(json_str(columns[0]));
2452  table_element_list.emplace_back(std::move(shard_key_def));
2453  } else if (json_str(element["name"]) == "SHARED_DICT") {
2454  CHECK(element.HasMember("columns"));
2455  CHECK(element["columns"].IsArray());
2456  const auto& columns = element["columns"].GetArray();
2457  if (columns.Size() != size_t(1)) {
2458  throw std::runtime_error(
2459  R"(Only one column per shared dictionary entry is currently supported. Use multiple SHARED DICT statements to share dictionaries from multiple columns.)");
2460  }
2461  CHECK(element.HasMember("references") && element["references"].IsObject());
2462  const auto& references = element["references"].GetObject();
2463  std::string references_table_name;
2464  if (references.HasMember("table")) {
2465  references_table_name = json_str(references["table"]);
2466  } else {
2467  references_table_name = table_name;
2468  }
2469  CHECK(references.HasMember("column"));
2470 
2471  auto shared_dict_def = std::make_unique<SharedDictionaryDef>(
2472  json_str(columns[0]), references_table_name, json_str(references["column"]));
2473  table_element_list.emplace_back(std::move(shared_dict_def));
2474 
2475  } else {
2476  LOG(FATAL) << "Unsupported type for SQL_COLUMN_CONSTRAINT: "
2477  << json_str(element["name"]);
2478  }
2479  } else {
2480  LOG(FATAL) << "Unsupported element type for CREATE TABLE: "
2481  << element["type"].GetString();
2482  }
2483  }
2484 }
2485 } // namespace
2486 
2487 CreateTableStmt::CreateTableStmt(const rapidjson::Value& payload) {
2488  CHECK(payload.HasMember("name"));
2489  table_ = std::make_unique<std::string>(json_str(payload["name"]));
2490  CHECK(payload.HasMember("elements"));
2491  CHECK(payload["elements"].IsArray());
2492 
2493  is_temporary_ = false;
2494  if (payload.HasMember("temporary")) {
2495  is_temporary_ = json_bool(payload["temporary"]);
2496  }
2497 
2498  if_not_exists_ = false;
2499  if (payload.HasMember("ifNotExists")) {
2500  if_not_exists_ = json_bool(payload["ifNotExists"]);
2501  }
2502 
2503  parse_elements(payload, "elements", *table_, table_element_list_);
2504 
2505  parse_options(payload, storage_options_);
2506 }
2507 
2508 void CreateTableStmt::executeDryRun(const Catalog_Namespace::SessionInfo& session,
2509  TableDescriptor& td,
2510  std::list<ColumnDescriptor>& columns,
2511  std::vector<SharedDictionaryDef>& shared_dict_defs) {
2512  std::unordered_set<std::string> uc_col_names;
2513  const auto& catalog = session.getCatalog();
2514  const ShardKeyDef* shard_key_def{nullptr};
2515  for (auto& e : table_element_list_) {
2516  if (dynamic_cast<SharedDictionaryDef*>(e.get())) {
2517  auto shared_dict_def = static_cast<SharedDictionaryDef*>(e.get());
2519  this, shared_dict_def, columns, shared_dict_defs, catalog);
2520  shared_dict_defs.push_back(*shared_dict_def);
2521  continue;
2522  }
2523  if (dynamic_cast<ShardKeyDef*>(e.get())) {
2524  if (shard_key_def) {
2525  throw std::runtime_error("Specified more than one shard key");
2526  }
2527  shard_key_def = static_cast<const ShardKeyDef*>(e.get());
2528  continue;
2529  }
2530  if (!dynamic_cast<ColumnDef*>(e.get())) {
2531  throw std::runtime_error("Table constraints are not supported yet.");
2532  }
2533  ColumnDef* coldef = static_cast<ColumnDef*>(e.get());
2534  ColumnDescriptor cd;
2535  cd.columnName = *coldef->get_column_name();
2537  setColumnDescriptor(cd, coldef);
2538  columns.push_back(cd);
2539  }
2540 
2541  ddl_utils::set_default_table_attributes(*table_, td, columns.size());
2542 
2543  if (shard_key_def) {
2544  td.shardedColumnId = shard_column_index(shard_key_def->get_column(), columns);
2545  if (!td.shardedColumnId) {
2546  throw std::runtime_error("Specified shard column " + shard_key_def->get_column() +
2547  " doesn't exist");
2548  }
2549  }
2550  if (is_temporary_) {
2552  } else {
2554  }
2555  if (!storage_options_.empty()) {
2556  for (auto& p : storage_options_) {
2557  get_table_definitions(td, p, columns);
2558  }
2559  }
2560  if (td.shardedColumnId && !td.nShards) {
2561  throw std::runtime_error("SHARD_COUNT needs to be specified with SHARD_KEY.");
2562  }
2563  td.keyMetainfo = serialize_key_metainfo(shard_key_def, shared_dict_defs);
2564 }
2565 
2566 void CreateTableStmt::execute(const Catalog_Namespace::SessionInfo& session) {
2567  auto& catalog = session.getCatalog();
2568 
2569  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
2572 
2573  // check access privileges
2576  throw std::runtime_error("Table " + *table_ +
2577  " will not be created. User has no create privileges.");
2578  }
2579 
2580  if (!catalog.validateNonExistentTableOrView(*table_, if_not_exists_)) {
2581  return;
2582  }
2583 
2584  TableDescriptor td;
2585  std::list<ColumnDescriptor> columns;
2586  std::vector<SharedDictionaryDef> shared_dict_defs;
2587 
2588  executeDryRun(session, td, columns, shared_dict_defs);
2589  td.userId = session.get_currentUser().userId;
2590 
2591  catalog.createShardedTable(td, columns, shared_dict_defs);
2592  // TODO (max): It's transactionally unsafe, should be fixed: we may create object w/o
2593  // privileges
2594  SysCatalog::instance().createDBObject(
2595  session.get_currentUser(), td.tableName, TableDBObjectType, catalog);
2596 }
2597 
2598 CreateDataframeStmt::CreateDataframeStmt(const rapidjson::Value& payload) {
2599  CHECK(payload.HasMember("name"));
2600  table_ = std::make_unique<std::string>(json_str(payload["name"]));
2601 
2602  CHECK(payload.HasMember("elementList"));
2603  parse_elements(payload, "elementList", *table_, table_element_list_);
2604 
2605  CHECK(payload.HasMember("filePath"));
2606  std::string fs = json_str(payload["filePath"]);
2607  // strip leading/trailing spaces/quotes/single quotes
2608  boost::algorithm::trim_if(fs, boost::is_any_of(" \"'`"));
2609  filename_ = std::make_unique<std::string>(fs);
2610 
2611  parse_options(payload, storage_options_);
2612 }
2613 
2614 void CreateDataframeStmt::execute(const Catalog_Namespace::SessionInfo& session) {
2615  auto& catalog = session.getCatalog();
2616 
2617  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
2620 
2621  // check access privileges
2624  throw std::runtime_error("Table " + *table_ +
2625  " will not be created. User has no create privileges.");
2626  }
2627 
2628  if (catalog.getMetadataForTable(*table_) != nullptr) {
2629  throw std::runtime_error("Table " + *table_ + " already exists.");
2630  }
2632  std::list<ColumnDescriptor> columns;
2633  std::vector<SharedDictionaryDef> shared_dict_defs;
2634 
2635  std::unordered_set<std::string> uc_col_names;
2636  for (auto& e : table_element_list_) {
2637  if (dynamic_cast<SharedDictionaryDef*>(e.get())) {
2638  auto shared_dict_def = static_cast<SharedDictionaryDef*>(e.get());
2640  this, shared_dict_def, columns, shared_dict_defs, catalog);
2641  shared_dict_defs.push_back(*shared_dict_def);
2642  continue;
2643  }
2644  if (!dynamic_cast<ColumnDef*>(e.get())) {
2645  throw std::runtime_error("Table constraints are not supported yet.");
2646  }
2647  ColumnDef* coldef = static_cast<ColumnDef*>(e.get());
2648  ColumnDescriptor cd;
2649  cd.columnName = *coldef->get_column_name();
2650  const auto uc_col_name = boost::to_upper_copy<std::string>(cd.columnName);
2651  const auto it_ok = uc_col_names.insert(uc_col_name);
2652  if (!it_ok.second) {
2653  throw std::runtime_error("Column '" + cd.columnName + "' defined more than once");
2654  }
2655  setColumnDescriptor(cd, coldef);
2656  columns.push_back(cd);
2657  }
2658 
2659  df_td.tableName = *table_;
2660  df_td.nColumns = columns.size();
2661  df_td.isView = false;
2662  df_td.fragmenter = nullptr;
2667  df_td.maxRows = DEFAULT_MAX_ROWS;
2669  if (!storage_options_.empty()) {
2670  for (auto& p : storage_options_) {
2671  get_dataframe_definitions(df_td, p, columns);
2672  }
2673  }
2674  df_td.keyMetainfo = serialize_key_metainfo(nullptr, shared_dict_defs);
2675  df_td.userId = session.get_currentUser().userId;
2676  df_td.storageType = *filename_;
2677 
2678  catalog.createShardedTable(df_td, columns, shared_dict_defs);
2679  // TODO (max): It's transactionally unsafe, should be fixed: we may create object w/o
2680  // privileges
2681  SysCatalog::instance().createDBObject(
2682  session.get_currentUser(), df_td.tableName, TableDBObjectType, catalog);
2683 }
2684 
2685 std::shared_ptr<ResultSet> getResultSet(QueryStateProxy query_state_proxy,
2686  const std::string select_stmt,
2687  std::vector<TargetMetaInfo>& targets,
2688  bool validate_only = false,
2689  std::vector<size_t> outer_fragment_indices = {},
2690  bool allow_interrupt = false) {
2691  auto const session = query_state_proxy.getQueryState().getConstSessionInfo();
2692  auto& catalog = session->getCatalog();
2693 
2695 #ifdef HAVE_CUDA
2696  const auto device_type = session->get_executor_device_type();
2697 #else
2698  const auto device_type = ExecutorDeviceType::CPU;
2699 #endif // HAVE_CUDA
2700  auto calcite_mgr = catalog.getCalciteMgr();
2701 
2702  // TODO MAT this should actually get the global or the session parameter for
2703  // view optimization
2704  const auto query_ra =
2705  calcite_mgr
2706  ->process(query_state_proxy, pg_shim(select_stmt), {}, true, false, false, true)
2707  .plan_result;
2708  RelAlgExecutor ra_executor(executor.get(),
2709  catalog,
2710  query_ra,
2711  query_state_proxy.getQueryState().shared_from_this());
2714  // TODO(adb): Need a better method of dropping constants into this ExecutionOptions
2715  // struct
2716  ExecutionOptions eo = {false,
2717  true,
2718  false,
2719  true,
2720  false,
2721  false,
2722  validate_only,
2723  false,
2724  10000,
2725  false,
2726  false,
2727  1000,
2728  allow_interrupt,
2732  outer_fragment_indices};
2733 
2734  ExecutionResult result{std::make_shared<ResultSet>(std::vector<TargetInfo>{},
2737  nullptr,
2738  nullptr,
2739  0,
2740  0),
2741  {}};
2742  result = ra_executor.executeRelAlgQuery(co, eo, false, nullptr);
2743  targets = result.getTargetsMeta();
2744 
2745  return result.getRows();
2746 }
2747 
2748 size_t LocalConnector::getOuterFragmentCount(QueryStateProxy query_state_proxy,
2749  std::string& sql_query_string) {
2750  auto const session = query_state_proxy.getQueryState().getConstSessionInfo();
2751  auto& catalog = session->getCatalog();
2752 
2754 #ifdef HAVE_CUDA
2755  const auto device_type = session->get_executor_device_type();
2756 #else
2757  const auto device_type = ExecutorDeviceType::CPU;
2758 #endif // HAVE_CUDA
2759  auto calcite_mgr = catalog.getCalciteMgr();
2760 
2761  // TODO MAT this should actually get the global or the session parameter for
2762  // view optimization
2763  const auto query_ra =
2764  calcite_mgr
2765  ->process(
2766  query_state_proxy, pg_shim(sql_query_string), {}, true, false, false, true)
2767  .plan_result;
2768  RelAlgExecutor ra_executor(executor.get(), catalog, query_ra);
2769  CompilationOptions co = {
2770  device_type, true, ExecutorOptLevel::LoopStrengthReduction, false};
2771  // TODO(adb): Need a better method of dropping constants into this ExecutionOptions
2772  // struct
2773  ExecutionOptions eo = {false,
2774  true,
2775  false,
2776  true,
2777  false,
2778  false,
2779  false,
2780  false,
2781  10000,
2782  false,
2783  false,
2784  0.9,
2785  false};
2786  return ra_executor.getOuterFragmentCount(co, eo);
2787 }
2788 
2789 AggregatedResult LocalConnector::query(QueryStateProxy query_state_proxy,
2790  std::string& sql_query_string,
2791  std::vector<size_t> outer_frag_indices,
2792  bool validate_only,
2793  bool allow_interrupt) {
2794  // TODO(PS): Should we be using the shimmed query in getResultSet?
2795  std::string pg_shimmed_select_query = pg_shim(sql_query_string);
2796 
2797  std::vector<TargetMetaInfo> target_metainfos;
2799  auto const session = query_state_proxy.getQueryState().getConstSessionInfo();
2800  auto query_session = session ? session->get_session_id() : "";
2801  auto query_submitted_time = query_state_proxy.getQueryState().getQuerySubmittedTime();
2802  if (allow_interrupt && !validate_only && !query_session.empty()) {
2803  executor->enrollQuerySession(query_session,
2804  sql_query_string,
2805  query_submitted_time,
2807  QuerySessionStatus::QueryStatus::PENDING_EXECUTOR);
2808  }
2809  auto result_rows = getResultSet(query_state_proxy,
2810  sql_query_string,
2811  target_metainfos,
2812  validate_only,
2813  outer_frag_indices,
2814  allow_interrupt);
2815  AggregatedResult res = {result_rows, target_metainfos};
2816  return res;
2817 }
2818 
2819 std::vector<AggregatedResult> LocalConnector::query(
2820  QueryStateProxy query_state_proxy,
2821  std::string& sql_query_string,
2822  std::vector<size_t> outer_frag_indices,
2823  bool allow_interrupt) {
2824  auto res = query(
2825  query_state_proxy, sql_query_string, outer_frag_indices, false, allow_interrupt);
2826  return {res};
2827 }
2828 
2829 void LocalConnector::insertDataToLeaf(const Catalog_Namespace::SessionInfo& session,
2830  const size_t leaf_idx,
2831  Fragmenter_Namespace::InsertData& insert_data) {
2832  CHECK(leaf_idx == 0);
2833  auto& catalog = session.getCatalog();
2834  auto created_td = catalog.getMetadataForTable(insert_data.tableId);
2835  ChunkKey chunkKey = {catalog.getCurrentDB().dbId, created_td->tableId};
2836  // TODO(adb): Ensure that we have previously obtained a write lock for this table's
2837  // data.
2838  created_td->fragmenter->insertDataNoCheckpoint(insert_data);
2839 }
2840 
2841 void LocalConnector::checkpoint(const Catalog_Namespace::SessionInfo& session,
2842  int table_id) {
2843  auto& catalog = session.getCatalog();
2844  catalog.checkpointWithAutoRollback(table_id);
2845 }
2846 
2847 void LocalConnector::rollback(const Catalog_Namespace::SessionInfo& session,
2848  int table_id) {
2849  auto& catalog = session.getCatalog();
2850  auto db_id = catalog.getDatabaseId();
2851  auto table_epochs = catalog.getTableEpochs(db_id, table_id);
2852  catalog.setTableEpochs(db_id, table_epochs);
2853 }
2854 
2855 std::list<ColumnDescriptor> LocalConnector::getColumnDescriptors(AggregatedResult& result,
2856  bool for_create) {
2857  std::list<ColumnDescriptor> column_descriptors;
2858  std::list<ColumnDescriptor> column_descriptors_for_create;
2859 
2860  int rowid_suffix = 0;
2861  for (const auto& target_metainfo : result.targets_meta) {
2862  ColumnDescriptor cd;
2863  cd.columnName = target_metainfo.get_resname();
2864  if (cd.columnName == "rowid") {
2865  cd.columnName += std::to_string(rowid_suffix++);
2866  }
2867  cd.columnType = target_metainfo.get_physical_type_info();
2868 
2869  ColumnDescriptor cd_for_create = cd;
2870 
2872  // we need to reset the comp param (as this points to the actual dictionary)
2873  if (cd.columnType.is_array()) {
2874  // for dict encoded arrays, it is always 4 bytes
2875  cd_for_create.columnType.set_comp_param(32);
2876  } else {
2877  cd_for_create.columnType.set_comp_param(cd.columnType.get_size() * 8);
2878  }
2879  }
2880 
2881  if (cd.columnType.is_date() && !cd.columnType.is_date_in_days()) {
2882  // default to kENCODING_DATE_IN_DAYS encoding
2884  cd_for_create.columnType.set_comp_param(0);
2885  }
2886 
2887  column_descriptors_for_create.push_back(cd_for_create);
2888  column_descriptors.push_back(cd);
2889  }
2890 
2891  if (for_create) {
2892  return column_descriptors_for_create;
2893  }
2894 
2895  return column_descriptors;
2896 }
2897 
2898 InsertIntoTableAsSelectStmt::InsertIntoTableAsSelectStmt(
2899  const rapidjson::Value& payload) {
2900  CHECK(payload.HasMember("name"));
2901  table_name_ = json_str(payload["name"]);
2902 
2903  CHECK(payload.HasMember("query"));
2904  select_query_ = json_str(payload["query"]);
2905 
2906  boost::replace_all(select_query_, "\n", " ");
2907  select_query_ = "(" + select_query_ + ")";
2908 
2909  if (payload.HasMember("columns")) {
2910  CHECK(payload["columns"].IsArray());
2911  for (auto& column : payload["columns"].GetArray()) {
2912  std::string s = json_str(column);
2913  column_list_.emplace_back(std::unique_ptr<std::string>(new std::string(s)));
2914  }
2915  }
2916 }
2917 
2918 void InsertIntoTableAsSelectStmt::populateData(QueryStateProxy query_state_proxy,
2919  const TableDescriptor* td,
2920  bool validate_table,
2921  bool for_CTAS) {
2922  auto const session = query_state_proxy.getQueryState().getConstSessionInfo();
2923  auto& catalog = session->getCatalog();
2925 
2926  LocalConnector local_connector;
2927  bool populate_table = false;
2928 
2929  if (leafs_connector_) {
2930  populate_table = true;
2931  } else {
2932  leafs_connector_ = &local_connector;
2933  if (!g_cluster) {
2934  populate_table = true;
2935  }
2936  }
2937 
2938  auto get_target_column_descriptors = [this, &catalog](const TableDescriptor* td) {
2939  std::vector<const ColumnDescriptor*> target_column_descriptors;
2940  if (column_list_.empty()) {
2941  auto list = catalog.getAllColumnMetadataForTable(td->tableId, false, false, false);
2942  target_column_descriptors = {std::begin(list), std::end(list)};
2943  } else {
2944  for (auto& c : column_list_) {
2945  const ColumnDescriptor* cd = catalog.getMetadataForColumn(td->tableId, *c);
2946  if (cd == nullptr) {
2947  throw std::runtime_error("Column " + *c + " does not exist.");
2948  }
2949  target_column_descriptors.push_back(cd);
2950  }
2951  }
2952 
2953  return target_column_descriptors;
2954  };
2955 
2956  bool is_temporary = table_is_temporary(td);
2957 
2958  if (validate_table) {
2959  // check access privileges
2960  if (!td) {
2961  throw std::runtime_error("Table " + table_name_ + " does not exist.");
2962  }
2963  if (td->isView) {
2964  throw std::runtime_error("Insert to views is not supported yet.");
2965  }
2966 
2967  if (!session->checkDBAccessPrivileges(DBObjectType::TableDBObjectType,
2969  table_name_)) {
2970  throw std::runtime_error("User has no insert privileges on " + table_name_ + ".");
2971  }
2972 
2973  // only validate the select query so we get the target types
2974  // correctly, but do not populate the result set
2975  auto result = local_connector.query(query_state_proxy, select_query_, {}, true, true);
2976  auto source_column_descriptors = local_connector.getColumnDescriptors(result, false);
2977 
2978  std::vector<const ColumnDescriptor*> target_column_descriptors =
2979  get_target_column_descriptors(td);
2980 
2981  if (source_column_descriptors.size() != target_column_descriptors.size()) {
2982  throw std::runtime_error("The number of source and target columns does not match.");
2983  }
2984 
2985  for (int i = 0; i < source_column_descriptors.size(); i++) {
2986  const ColumnDescriptor* source_cd =
2987  &(*std::next(source_column_descriptors.begin(), i));
2988  const ColumnDescriptor* target_cd = target_column_descriptors.at(i);
2989 
2990  if (source_cd->columnType.get_type() != target_cd->columnType.get_type()) {
2991  auto type_cannot_be_cast = [](const auto& col_type) {
2992  return (col_type.is_time() || col_type.is_geometry() || col_type.is_array() ||
2993  col_type.is_boolean());
2994  };
2995 
2996  if (type_cannot_be_cast(source_cd->columnType) ||
2997  type_cannot_be_cast(target_cd->columnType)) {
2998  throw std::runtime_error("Source '" + source_cd->columnName + " " +
2999  source_cd->columnType.get_type_name() +
3000  "' and target '" + target_cd->columnName + " " +
3001  target_cd->columnType.get_type_name() +
3002  "' column types do not match.");
3003  }
3004  }
3005  if (source_cd->columnType.is_array()) {
3006  if (source_cd->columnType.get_subtype() != target_cd->columnType.get_subtype()) {
3007  throw std::runtime_error("Source '" + source_cd->columnName + " " +
3008  source_cd->columnType.get_type_name() +
3009  "' and target '" + target_cd->columnName + " " +
3010  target_cd->columnType.get_type_name() +
3011  "' array column element types do not match.");
3012  }
3013  }
3014 
3015  if (source_cd->columnType.is_decimal() ||
3016  source_cd->columnType.get_elem_type().is_decimal()) {
3017  SQLTypeInfo sourceType = source_cd->columnType;
3018  SQLTypeInfo targetType = target_cd->columnType;
3019 
3020  if (source_cd->columnType.is_array()) {
3021  sourceType = source_cd->columnType.get_elem_type();
3022  targetType = target_cd->columnType.get_elem_type();
3023  }
3024 
3025  if (sourceType.get_scale() != targetType.get_scale()) {
3026  throw std::runtime_error("Source '" + source_cd->columnName + " " +
3027  source_cd->columnType.get_type_name() +
3028  "' and target '" + target_cd->columnName + " " +
3029  target_cd->columnType.get_type_name() +
3030  "' decimal columns scales do not match.");
3031  }
3032  }
3033 
3034  if (source_cd->columnType.is_string()) {
3035  if (!target_cd->columnType.is_string()) {
3036  throw std::runtime_error("Source '" + source_cd->columnName + " " +
3037  source_cd->columnType.get_type_name() +
3038  "' and target '" + target_cd->columnName + " " +
3039  target_cd->columnType.get_type_name() +
3040  "' column types do not match.");
3041  }
3042  if (source_cd->columnType.get_compression() !=
3043  target_cd->columnType.get_compression()) {
3044  throw std::runtime_error("Source '" + source_cd->columnName + " " +
3045  source_cd->columnType.get_type_name() +
3046  "' and target '" + target_cd->columnName + " " +
3047  target_cd->columnType.get_type_name() +
3048  "' columns string encodings do not match.");
3049  }
3050  }
3051 
3052  if (source_cd->columnType.is_timestamp() && target_cd->columnType.is_timestamp()) {
3053  if (source_cd->columnType.get_dimension() !=
3054  target_cd->columnType.get_dimension()) {
3055  throw std::runtime_error("Source '" + source_cd->columnName + " " +
3056  source_cd->columnType.get_type_name() +
3057  "' and target '" + target_cd->columnName + " " +
3058  target_cd->columnType.get_type_name() +
3059  "' timestamp column precisions do not match.");
3060  }
3061  }
3062 
3063  if (!source_cd->columnType.is_string() && !source_cd->columnType.is_geometry() &&
3064  !source_cd->columnType.is_integer() && !source_cd->columnType.is_decimal() &&
3065  !source_cd->columnType.is_date() && !source_cd->columnType.is_time() &&
3066  !source_cd->columnType.is_timestamp() &&
3067  source_cd->columnType.get_size() > target_cd->columnType.get_size()) {
3068  throw std::runtime_error("Source '" + source_cd->columnName + " " +
3069  source_cd->columnType.get_type_name() +
3070  "' and target '" + target_cd->columnName + " " +
3071  target_cd->columnType.get_type_name() +
3072  "' column encoding sizes do not match.");
3073  }
3074  }
3075  }
3076 
3077  if (!populate_table) {
3078  return;
3079  }
3080 
3081  int64_t total_row_count = 0;
3082  int64_t total_source_query_time_ms = 0;
3083  int64_t total_target_value_translate_time_ms = 0;
3084  int64_t total_data_load_time_ms = 0;
3085 
3086  Fragmenter_Namespace::InsertDataLoader insertDataLoader(*leafs_connector_);
3087  auto target_column_descriptors = get_target_column_descriptors(td);
3088  auto outer_frag_count =
3089  leafs_connector_->getOuterFragmentCount(query_state_proxy, select_query_);
3090 
3091  size_t outer_frag_end = outer_frag_count == 0 ? 1 : outer_frag_count;
3092  auto query_session = session ? session->get_session_id() : "";
3094  std::string work_type_str = for_CTAS ? "CTAS" : "ITAS";
3095  try {
3096  for (size_t outer_frag_idx = 0; outer_frag_idx < outer_frag_end; outer_frag_idx++) {
3097  std::vector<size_t> allowed_outer_fragment_indices;
3098 
3099  if (outer_frag_count) {
3100  allowed_outer_fragment_indices.push_back(outer_frag_idx);
3101  }
3102 
3103  const auto query_clock_begin = timer_start();
3104  std::vector<AggregatedResult> query_results =
3105  leafs_connector_->query(query_state_proxy,
3106  select_query_,
3107  allowed_outer_fragment_indices,
3109  total_source_query_time_ms += timer_stop(query_clock_begin);
3110 
3111  auto start_time = query_state_proxy.getQueryState().getQuerySubmittedTime();
3112  auto query_str = "INSERT_DATA for " + work_type_str;
3114  // In the clean-up phase of the query execution for collecting aggregated result
3115  // of SELECT query, we remove its query session info, so we need to enroll the
3116  // session info again
3117  executor->enrollQuerySession(query_session,
3118  query_str,
3119  start_time,
3121  QuerySessionStatus::QueryStatus::RUNNING_IMPORTER);
3122  }
3123 
3124  ScopeGuard clearInterruptStatus = [executor, &query_session, &start_time] {
3125  // this data population is non-kernel operation, so we manually cleanup
3126  // the query session info in the cleanup phase
3128  executor->clearQuerySessionStatus(query_session, start_time);
3129  }
3130  };
3131 
3132  for (auto& res : query_results) {
3133  if (UNLIKELY(check_session_interrupted(query_session, executor))) {
3134  throw std::runtime_error(
3135  "Query execution has been interrupted while performing " + work_type_str);
3136  }
3137  auto& result_rows = res.rs;
3138  result_rows->setGeoReturnType(ResultSet::GeoReturnType::GeoTargetValue);
3139  const auto num_rows = result_rows->rowCount();
3140 
3141  if (0 == num_rows) {
3142  continue;
3143  }
3144 
3145  total_row_count += num_rows;
3146 
3147  size_t leaf_count = leafs_connector_->leafCount();
3148 
3149  // ensure that at least 1 row is processed per block up to a maximum of 65536 rows
3150  const size_t rows_per_block =
3151  std::max(std::min(num_rows / leaf_count, size_t(64 * 1024)), size_t(1));
3152 
3153  std::vector<std::unique_ptr<TargetValueConverter>> value_converters;
3154 
3156 
3157  const int num_worker_threads = std::thread::hardware_concurrency();
3158 
3159  std::vector<size_t> thread_start_idx(num_worker_threads),
3160  thread_end_idx(num_worker_threads);
3161  bool can_go_parallel = !result_rows->isTruncated() && rows_per_block > 20000;
3162 
3163  std::atomic<size_t> crt_row_idx{0};
3164 
3165  auto do_work = [&result_rows, &value_converters, &crt_row_idx](
3166  const size_t idx,
3167  const size_t block_end,
3168  const size_t num_cols,
3169  const size_t thread_id,
3170  bool& stop_convert) {
3171  const auto result_row = result_rows->getRowAtNoTranslations(idx);
3172  if (!result_row.empty()) {
3173  size_t target_row = crt_row_idx.fetch_add(1);
3174  if (target_row >= block_end) {
3175  stop_convert = true;
3176  return;
3177  }
3178  for (unsigned int col = 0; col < num_cols; col++) {
3179  const auto& mapd_variant = result_row[col];
3180  value_converters[col]->convertToColumnarFormat(target_row, &mapd_variant);
3181  }
3182  }
3183  };
3184 
3185  auto convert_function = [&thread_start_idx,
3186  &thread_end_idx,
3187  &value_converters,
3188  &executor,
3189  &query_session,
3190  &work_type_str,
3191  &do_work](const int thread_id, const size_t block_end) {
3192  const int num_cols = value_converters.size();
3193  const size_t start = thread_start_idx[thread_id];
3194  const size_t end = thread_end_idx[thread_id];
3195  size_t idx = 0;
3196  bool stop_convert = false;
3198  size_t local_idx = 0;
3199  for (idx = start; idx < end; ++idx, ++local_idx) {
3200  if (UNLIKELY((local_idx & 0xFFFF) == 0 &&
3201  check_session_interrupted(query_session, executor))) {
3202  throw std::runtime_error(
3203  "Query execution has been interrupted while performing " +
3204  work_type_str);
3205  }
3206  do_work(idx, block_end, num_cols, thread_id, stop_convert);
3207  if (stop_convert) {
3208  break;
3209  }
3210  }
3211  } else {
3212  for (idx = start; idx < end; ++idx) {
3213  do_work(idx, block_end, num_cols, thread_id, stop_convert);
3214  if (stop_convert) {
3215  break;
3216  }
3217  }
3218  }
3219  thread_start_idx[thread_id] = idx;
3220  };
3221 
3222  auto single_threaded_value_converter =
3223  [&crt_row_idx, &value_converters, &result_rows](const size_t idx,
3224  const size_t block_end,
3225  const size_t num_cols,
3226  bool& stop_convert) {
3227  size_t target_row = crt_row_idx.fetch_add(1);
3228  if (target_row >= block_end) {
3229  stop_convert = true;
3230  return;
3231  }
3232  const auto result_row = result_rows->getNextRow(false, false);
3233  CHECK(!result_row.empty());
3234  for (unsigned int col = 0; col < num_cols; col++) {
3235  const auto& mapd_variant = result_row[col];
3236  value_converters[col]->convertToColumnarFormat(target_row, &mapd_variant);
3237  }
3238  };
3239 
3240  auto single_threaded_convert_function = [&value_converters,
3241  &thread_start_idx,
3242  &thread_end_idx,
3243  &executor,
3244  &query_session,
3245  &work_type_str,
3246  &single_threaded_value_converter](
3247  const int thread_id,
3248  const size_t block_end) {
3249  const int num_cols = value_converters.size();
3250  const size_t start = thread_start_idx[thread_id];
3251  const size_t end = thread_end_idx[thread_id];
3252  size_t idx = 0;
3253  bool stop_convert = false;
3255  size_t local_idx = 0;
3256  for (idx = start; idx < end; ++idx, ++local_idx) {
3257  if (UNLIKELY((local_idx & 0xFFFF) == 0 &&
3258  check_session_interrupted(query_session, executor))) {
3259  throw std::runtime_error(
3260  "Query execution has been interrupted while performing " +
3261  work_type_str);
3262  }
3263  single_threaded_value_converter(idx, block_end, num_cols, stop_convert);
3264  if (stop_convert) {
3265  break;
3266  }
3267  }
3268  } else {
3269  for (idx = start; idx < end; ++idx) {
3270  single_threaded_value_converter(idx, end, num_cols, stop_convert);
3271  if (stop_convert) {
3272  break;
3273  }
3274  }
3275  }
3276  thread_start_idx[thread_id] = idx;
3277  };
3278 
3279  if (can_go_parallel) {
3280  const size_t entry_count = result_rows->entryCount();
3281  for (size_t
3282  i = 0,
3283  start_entry = 0,
3284  stride = (entry_count + num_worker_threads - 1) / num_worker_threads;
3285  i < num_worker_threads && start_entry < entry_count;
3286  ++i, start_entry += stride) {
3287  const auto end_entry = std::min(start_entry + stride, entry_count);
3288  thread_start_idx[i] = start_entry;
3289  thread_end_idx[i] = end_entry;
3290  }
3291  } else {
3292  thread_start_idx[0] = 0;
3293  thread_end_idx[0] = result_rows->entryCount();
3294  }
3295 
3296  for (size_t block_start = 0; block_start < num_rows;
3297  block_start += rows_per_block) {
3298  const auto num_rows_this_itr = block_start + rows_per_block < num_rows
3299  ? rows_per_block
3300  : num_rows - block_start;
3301  crt_row_idx = 0; // reset block tracker
3302  value_converters.clear();
3303  int colNum = 0;
3304  for (const auto targetDescriptor : target_column_descriptors) {
3305  auto sourceDataMetaInfo = res.targets_meta[colNum++];
3306 
3308  num_rows_this_itr,
3309  catalog,
3310  sourceDataMetaInfo,
3311  targetDescriptor,
3312  targetDescriptor->columnType,
3313  !targetDescriptor->columnType.get_notnull(),
3314  result_rows->getRowSetMemOwner()->getLiteralStringDictProxy(),
3316  ? executor->getStringDictionaryProxy(
3317  sourceDataMetaInfo.get_type_info().get_comp_param(),
3318  result_rows->getRowSetMemOwner(),
3319  true)
3320  : nullptr};
3321  auto converter = factory.create(param);
3322  value_converters.push_back(std::move(converter));
3323  }
3324 
3325  const auto translate_clock_begin = timer_start();
3326  if (can_go_parallel) {
3327  std::vector<std::future<void>> worker_threads;
3328  for (int i = 0; i < num_worker_threads; ++i) {
3329  worker_threads.push_back(
3330  std::async(std::launch::async, convert_function, i, num_rows_this_itr));
3331  }
3332 
3333  for (auto& child : worker_threads) {
3334  child.wait();
3335  }
3336  for (auto& child : worker_threads) {
3337  child.get();
3338  }
3339 
3340  } else {
3341  single_threaded_convert_function(0, num_rows_this_itr);
3342  }
3343 
3344  // finalize the insert data
3345  auto finalizer_func =
3346  [](std::unique_ptr<TargetValueConverter>::pointer targetValueConverter) {
3347  targetValueConverter->finalizeDataBlocksForInsertData();
3348  };
3349 
3350  std::vector<std::future<void>> worker_threads;
3351  for (auto& converterPtr : value_converters) {
3352  worker_threads.push_back(
3353  std::async(std::launch::async, finalizer_func, converterPtr.get()));
3354  }
3355 
3356  for (auto& child : worker_threads) {
3357  child.wait();
3358  }
3359  for (auto& child : worker_threads) {
3360  child.get();
3361  }
3362 
3364  insert_data.databaseId = catalog.getCurrentDB().dbId;
3365  CHECK(td);
3366  insert_data.tableId = td->tableId;
3367  insert_data.numRows = num_rows_this_itr;
3368 
3369  for (int col_idx = 0; col_idx < target_column_descriptors.size(); col_idx++) {
3371  check_session_interrupted(query_session, executor))) {
3372  throw std::runtime_error(
3373  "Query execution has been interrupted while performing " +
3374  work_type_str);
3375  }
3376  value_converters[col_idx]->addDataBlocksToInsertData(insert_data);
3377  }
3378  total_target_value_translate_time_ms += timer_stop(translate_clock_begin);
3379 
3380  const auto data_load_clock_begin = timer_start();
3381  auto data_memory_holder =
3382  import_export::fill_missing_columns(&catalog, insert_data);
3383  insertDataLoader.insertData(*session, insert_data);
3384  total_data_load_time_ms += timer_stop(data_load_clock_begin);
3385  }
3386  }
3387  }
3388  } catch (...) {
3389  try {
3390  leafs_connector_->rollback(*session, td->tableId);
3391  } catch (std::exception& e) {
3392  LOG(ERROR) << "An error occurred during ITAS rollback attempt. Table id: "
3393  << td->tableId << ", Error: " << e.what();
3394  }
3395  throw;
3396  }
3397 
3398  int64_t total_time_ms = total_source_query_time_ms +
3399  total_target_value_translate_time_ms + total_data_load_time_ms;
3400 
3401  VLOG(1) << "CTAS/ITAS " << total_row_count << " rows loaded in " << total_time_ms
3402  << "ms (outer_frag_count=" << outer_frag_count
3403  << ", query_time=" << total_source_query_time_ms
3404  << "ms, translation_time=" << total_target_value_translate_time_ms
3405  << "ms, data_load_time=" << total_data_load_time_ms
3406  << "ms)\nquery: " << select_query_;
3407 
3408  if (!is_temporary) {
3409  leafs_connector_->checkpoint(*session, td->tableId);
3410  }
3411 }
3412 
3413 void InsertIntoTableAsSelectStmt::execute(const Catalog_Namespace::SessionInfo& session) {
3414  auto session_copy = session;
3415  auto session_ptr = std::shared_ptr<Catalog_Namespace::SessionInfo>(
3416  &session_copy, boost::null_deleter());
3417  auto query_state = query_state::QueryState::create(session_ptr, select_query_);
3418  auto stdlog = STDLOG(query_state);
3419  auto& catalog = session_ptr->getCatalog();
3420 
3421  const auto execute_read_lock = mapd_shared_lock<mapd_shared_mutex>(
3424 
3425  if (catalog.getMetadataForTable(table_name_) == nullptr) {
3426  throw std::runtime_error("ITAS failed: table " + table_name_ + " does not exist.");
3427  }
3428 
3430  std::vector<std::string> tables;
3431 
3432  // get the table info
3433  auto calcite_mgr = catalog.getCalciteMgr();
3434 
3435  // TODO MAT this should actually get the global or the session parameter for
3436  // view optimization
3437  const auto result = calcite_mgr->process(query_state->createQueryStateProxy(),
3438  pg_shim(select_query_),
3439  {},
3440  true,
3441  false,
3442  false,
3443  true);
3444 
3445  for (auto& tab : result.resolved_accessed_objects.tables_selected_from) {
3446  tables.emplace_back(tab[0]);
3447  }
3448  tables.emplace_back(table_name_);
3449 
3450  // force sort into tableid order in case of name change to guarantee fixed order of
3451  // mutex access
3452  std::sort(tables.begin(),
3453  tables.end(),
3454  [&catalog](const std::string& a, const std::string& b) {
3455  return catalog.getMetadataForTable(a, false)->tableId <
3456  catalog.getMetadataForTable(b, false)->tableId;
3457  });
3458 
3459  tables.erase(unique(tables.begin(), tables.end()), tables.end());
3460  for (const auto& table : tables) {
3461  locks.emplace_back(
3464  catalog, table)));
3465  if (table == table_name_) {
3466  // Aquire an insert data lock for updates/deletes, consistent w/ insert. The
3467  // table data lock will be aquired in the fragmenter during checkpoint.
3468  locks.emplace_back(
3471  catalog.getDatabaseId(), (*locks.back())())));
3472  } else {
3473  locks.emplace_back(
3476  catalog.getDatabaseId(), (*locks.back())())));
3477  }
3478  }
3479 
3480  const TableDescriptor* td = catalog.getMetadataForTable(table_name_);
3481  try {
3482  populateData(query_state->createQueryStateProxy(), td, true, false);
3483  } catch (...) {
3484  throw;
3485  }
3486 }
3487 
3488 CreateTableAsSelectStmt::CreateTableAsSelectStmt(const rapidjson::Value& payload)
3489  : InsertIntoTableAsSelectStmt(payload) {
3490  if (payload.HasMember("temporary")) {
3491  is_temporary_ = json_bool(payload["temporary"]);
3492  } else {
3493  is_temporary_ = false;
3494  }
3495 
3496  if (payload.HasMember("ifNotExists")) {
3497  if_not_exists_ = json_bool(payload["ifNotExists"]);
3498  } else {
3499  if_not_exists_ = false;
3500  }
3501 
3502  parse_options(payload, storage_options_);
3503 }
3504 
3506  auto session_copy = session;
3507  auto session_ptr = std::shared_ptr<Catalog_Namespace::SessionInfo>(
3508  &session_copy, boost::null_deleter());
3509  auto query_state = query_state::QueryState::create(session_ptr, select_query_);
3510  auto stdlog = STDLOG(query_state);
3511  LocalConnector local_connector;
3512  auto& catalog = session.getCatalog();
3513  bool create_table = nullptr == leafs_connector_;
3514 
3515  std::set<std::string> select_tables;
3516  if (create_table) {
3517  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
3520 
3521  // check access privileges
3524  throw std::runtime_error("CTAS failed. Table " + table_name_ +
3525  " will not be created. User has no create privileges.");
3526  }
3527 
3528  if (catalog.getMetadataForTable(table_name_) != nullptr) {
3529  if (if_not_exists_) {
3530  return;
3531  }
3532  throw std::runtime_error("Table " + table_name_ +
3533  " already exists and no data was loaded.");
3534  }
3535 
3536  // get the table info
3537  auto calcite_mgr = catalog.getCalciteMgr();
3538 
3539  // TODO MAT this should actually get the global or the session parameter for
3540  // view optimization
3541  const auto result = calcite_mgr->process(query_state->createQueryStateProxy(),
3543  {},
3544  true,
3545  false,
3546  false,
3547  true);
3548 
3549  // TODO 12 Apr 2021 MAT schema change need to keep schema in future
3550  // just keeping it moving for now
3551  for (auto& tab : result.resolved_accessed_objects.tables_selected_from) {
3552  select_tables.insert(tab[0]);
3553  }
3554 
3555  // only validate the select query so we get the target types
3556  // correctly, but do not populate the result set
3557  // we currently have exclusive access to the system so this is safe
3558  auto validate_result = local_connector.query(
3559  query_state->createQueryStateProxy(), select_query_, {}, true, false);
3560 
3561  const auto column_descriptors_for_create =
3562  local_connector.getColumnDescriptors(validate_result, true);
3563 
3564  // some validation as the QE might return some out of range column types
3565  for (auto& cd : column_descriptors_for_create) {
3566  if (cd.columnType.is_decimal() && cd.columnType.get_precision() > 18) {
3567  throw std::runtime_error(cd.columnName + ": Precision too high, max 18.");
3568  }
3569  }
3570 
3571  TableDescriptor td;
3572  td.tableName = table_name_;
3573  td.userId = session.get_currentUser().userId;
3574  td.nColumns = column_descriptors_for_create.size();
3575  td.isView = false;
3576  td.fragmenter = nullptr;
3583  if (is_temporary_) {
3585  } else {
3587  }
3588 
3589  bool use_shared_dictionaries = true;
3590 
3591  if (!storage_options_.empty()) {
3592  for (auto& p : storage_options_) {
3593  if (boost::to_lower_copy<std::string>(*p->get_name()) ==
3594  "use_shared_dictionaries") {
3595  const StringLiteral* literal =
3596  dynamic_cast<const StringLiteral*>(p->get_value());
3597  if (nullptr == literal) {
3598  throw std::runtime_error(
3599  "USE_SHARED_DICTIONARIES must be a string parameter");
3600  }
3601  std::string val = boost::to_lower_copy<std::string>(*literal->get_stringval());
3602  use_shared_dictionaries = val == "true" || val == "1" || val == "t";
3603  } else {
3604  get_table_definitions_for_ctas(td, p, column_descriptors_for_create);
3605  }
3606  }
3607  }
3608 
3609  std::vector<SharedDictionaryDef> sharedDictionaryRefs;
3610 
3611  if (use_shared_dictionaries) {
3612  const auto source_column_descriptors =
3613  local_connector.getColumnDescriptors(validate_result, false);
3614  const auto mapping = catalog.getDictionaryToColumnMapping();
3615 
3616  for (auto& source_cd : source_column_descriptors) {
3617  const auto& ti = source_cd.columnType;
3618  if (ti.is_string()) {
3619  if (ti.get_compression() == kENCODING_DICT) {
3620  int dict_id = ti.get_comp_param();
3621  auto it = mapping.find(dict_id);
3622  if (mapping.end() != it) {
3623  const auto targetColumn = it->second;
3624  auto targetTable =
3625  catalog.getMetadataForTable(targetColumn->tableId, false);
3626  CHECK(targetTable);
3627  LOG(INFO) << "CTAS: sharing text dictionary on column "
3628  << source_cd.columnName << " with " << targetTable->tableName
3629  << "." << targetColumn->columnName;
3630  sharedDictionaryRefs.emplace_back(
3631  source_cd.columnName, targetTable->tableName, targetColumn->columnName);
3632  }
3633  }
3634  }
3635  }
3636  }
3637 
3638  // currently no means of defining sharding in CTAS
3639  td.keyMetainfo = serialize_key_metainfo(nullptr, sharedDictionaryRefs);
3640 
3641  catalog.createTable(td, column_descriptors_for_create, sharedDictionaryRefs, true);
3642  // TODO (max): It's transactionally unsafe, should be fixed: we may create object
3643  // w/o privileges
3644  SysCatalog::instance().createDBObject(
3645  session.get_currentUser(), td.tableName, TableDBObjectType, catalog);
3646  }
3647 
3648  // note there is a time where we do not have any executor outer lock here. someone could
3649  // come along and mess with the data or other tables.
3650  const auto execute_read_lock = mapd_shared_lock<mapd_shared_mutex>(
3653 
3655  std::vector<std::string> tables;
3656  tables.insert(tables.end(), select_tables.begin(), select_tables.end());
3657  CHECK_EQ(tables.size(), select_tables.size());
3658  tables.emplace_back(table_name_);
3659  // force sort into tableid order in case of name change to guarantee fixed order of
3660  // mutex access
3661  std::sort(tables.begin(),
3662  tables.end(),
3663  [&catalog](const std::string& a, const std::string& b) {
3664  return catalog.getMetadataForTable(a, false)->tableId <
3665  catalog.getMetadataForTable(b, false)->tableId;
3666  });
3667  tables.erase(unique(tables.begin(), tables.end()), tables.end());
3668  for (const auto& table : tables) {
3669  locks.emplace_back(
3672  catalog, table)));
3673  if (table == table_name_) {
3674  // Aquire an insert data lock for updates/deletes, consistent w/ insert. The
3675  // table data lock will be aquired in the fragmenter during checkpoint.
3676  locks.emplace_back(
3679  catalog.getDatabaseId(), (*locks.back())())));
3680  } else {
3681  locks.emplace_back(
3684  catalog.getDatabaseId(), (*locks.back())())));
3685  }
3686  }
3687 
3688  const TableDescriptor* td = catalog.getMetadataForTable(table_name_);
3689 
3690  try {
3691  populateData(query_state->createQueryStateProxy(), td, false, true);
3692  } catch (...) {
3693  if (!g_cluster) {
3694  const TableDescriptor* created_td = catalog.getMetadataForTable(table_name_);
3695  if (created_td) {
3696  catalog.dropTable(created_td);
3697  }
3698  }
3699  throw;
3700  }
3701 }
3702 
3703 DropTableStmt::DropTableStmt(const rapidjson::Value& payload) {
3704  CHECK(payload.HasMember("tableName"));
3705  table_ = std::make_unique<std::string>(json_str(payload["tableName"]));
3706 
3707  if_exists_ = false;
3708  if (payload.HasMember("ifExists")) {
3709  if_exists_ = json_bool(payload["ifExists"]);
3710  }
3711 }
3712 
3714  auto& catalog = session.getCatalog();
3715 
3716  // TODO(adb): the catalog should be handling this locking.
3717  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
3720 
3721  const TableDescriptor* td{nullptr};
3722  std::unique_ptr<lockmgr::TableSchemaLockContainer<lockmgr::WriteLock>> td_with_lock;
3723 
3724  try {
3725  td_with_lock =
3726  std::make_unique<lockmgr::TableSchemaLockContainer<lockmgr::WriteLock>>(
3728  catalog, *table_, false));
3729  td = (*td_with_lock)();
3730  } catch (const std::runtime_error& e) {
3731  if (if_exists_) {
3732  return;
3733  } else {
3734  throw e;
3735  }
3736  }
3737 
3738  CHECK(td);
3739  CHECK(td_with_lock);
3740 
3741  // check access privileges
3742  if (!session.checkDBAccessPrivileges(
3744  throw std::runtime_error("Table " + *table_ +
3745  " will not be dropped. User has no proper privileges.");
3746  }
3747 
3749 
3750  auto table_data_write_lock =
3752  catalog.dropTable(td);
3753 
3754  // invalidate cached hashtable
3756 }
3757 
3759 
3760 std::unique_ptr<DDLStmt> AlterTableStmt::delegate(const rapidjson::Value& payload) {
3761  CHECK(payload.HasMember("tableName"));
3762  auto tableName = json_str(payload["tableName"]);
3763 
3764  CHECK(payload.HasMember("alterType"));
3765  auto type = json_str(payload["alterType"]);
3766 
3767  if (type == "RENAME_TABLE") {
3768  CHECK(payload.HasMember("newTableName"));
3769  auto newTableName = json_str(payload["newTableName"]);
3770  return std::unique_ptr<DDLStmt>(new Parser::RenameTableStmt(
3771  new std::string(tableName), new std::string(newTableName)));
3772 
3773  } else if (type == "RENAME_COLUMN") {
3774  CHECK(payload.HasMember("columnName"));
3775  auto columnName = json_str(payload["columnName"]);
3776  CHECK(payload.HasMember("newColumnName"));
3777  auto newColumnName = json_str(payload["newColumnName"]);
3778  return std::unique_ptr<DDLStmt>(
3779  new Parser::RenameColumnStmt(new std::string(tableName),
3780  new std::string(columnName),
3781  new std::string(newColumnName)));
3782 
3783  } else if (type == "ADD_COLUMN") {
3784  CHECK(payload.HasMember("columnData"));
3785  CHECK(payload["columnData"].IsArray());
3786 
3787  // New Columns go into this list
3788  std::list<ColumnDef*>* table_element_list_ = new std::list<ColumnDef*>;
3789 
3790  const auto elements = payload["columnData"].GetArray();
3791  for (const auto& element : elements) {
3792  CHECK(element.IsObject());
3793  CHECK(element.HasMember("type"));
3794  if (json_str(element["type"]) == "SQL_COLUMN_DECLARATION") {
3795  auto col_def = column_from_json(element);
3796  table_element_list_->emplace_back(col_def.release());
3797  } else {
3798  LOG(FATAL) << "Unsupported element type for ALTER TABLE: "
3799  << element["type"].GetString();
3800  }
3801  }
3802 
3803  return std::unique_ptr<DDLStmt>(
3804  new Parser::AddColumnStmt(new std::string(tableName), table_element_list_));
3805 
3806  } else if (type == "DROP_COLUMN") {
3807  CHECK(payload.HasMember("columnData"));
3808  auto columnData = json_str(payload["columnData"]);
3809  // Convert columnData to std::list<std::string*>*
3810  // allocate std::list<> as DropColumnStmt will delete it;
3811  std::list<std::string*>* cols = new std::list<std::string*>;
3812  std::vector<std::string> cols1;
3813  boost::split(cols1, columnData, boost::is_any_of(","));
3814  for (auto s : cols1) {
3815  // strip leading/trailing spaces/quotes/single quotes
3816  boost::algorithm::trim_if(s, boost::is_any_of(" \"'`"));
3817  std::string* str = new std::string(s);
3818  cols->emplace_back(str);
3819  }
3820 
3821  return std::unique_ptr<DDLStmt>(
3822  new Parser::DropColumnStmt(new std::string(tableName), cols));
3823 
3824  } else if (type == "ALTER_OPTIONS") {
3825  CHECK(payload.HasMember("options"));
3826 
3827  if (payload["options"].IsObject()) {
3828  for (const auto& option : payload["options"].GetObject()) {
3829  std::string* option_name = new std::string(json_str(option.name));
3830  Literal* literal_value;
3831  if (option.value.IsString()) {
3832  std::string literal_string = json_str(option.value);
3833 
3834  // iff this string can be converted to INT
3835  // ... do so because it is necessary for AlterTableParamStmt
3836  std::size_t sz;
3837  int iVal = std::stoi(literal_string, &sz);
3838  if (sz == literal_string.size()) {
3839  literal_value = new IntLiteral(iVal);
3840  } else {
3841  literal_value = new StringLiteral(&literal_string);
3842  }
3843  } else if (option.value.IsInt() || option.value.IsInt64()) {
3844  literal_value = new IntLiteral(json_i64(option.value));
3845  } else if (option.value.IsNull()) {
3846  literal_value = new NullLiteral();
3847  } else {
3848  throw std::runtime_error("Unable to handle literal for " + *option_name);
3849  }
3850  CHECK(literal_value);
3851 
3852  NameValueAssign* nv = new NameValueAssign(option_name, literal_value);
3853  return std::unique_ptr<DDLStmt>(
3854  new Parser::AlterTableParamStmt(new std::string(tableName), nv));
3855  }
3856 
3857  } else {
3858  CHECK(payload["options"].IsNull());
3859  }
3860  }
3861  return nullptr;
3862 }
3863 
3864 TruncateTableStmt::TruncateTableStmt(const rapidjson::Value& payload) {
3865  CHECK(payload.HasMember("tableName"));
3866  table_ = std::make_unique<std::string>(json_str(payload["tableName"]));
3867 }
3868 
3870  auto& catalog = session.getCatalog();
3871 
3872  // TODO: Removal of the FileMgr is not thread safe. Take a global system write lock
3873  // when truncating a table
3874  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
3877 
3878  const auto td_with_lock =
3880  catalog, *table_, true);
3881  const auto td = td_with_lock();
3882  if (!td) {
3883  throw std::runtime_error("Table " + *table_ + " does not exist.");
3884  }
3885 
3886  // check access privileges
3887  std::vector<DBObject> privObjects;
3888  DBObject dbObject(*table_, TableDBObjectType);
3889  dbObject.loadKey(catalog);
3891  privObjects.push_back(dbObject);
3892  if (!SysCatalog::instance().checkPrivileges(session.get_currentUser(), privObjects)) {
3893  throw std::runtime_error("Table " + *table_ + " will not be truncated. User " +
3894  session.get_currentUser().userLoggable() +
3895  " has no proper privileges.");
3896  }
3897 
3898  if (td->isView) {
3899  throw std::runtime_error(*table_ + " is a view. Cannot Truncate.");
3900  }
3902  auto table_data_write_lock =
3904  catalog.truncateTable(td);
3905 
3906  // invalidate cached hashtable
3908 }
3909 
3910 OptimizeTableStmt::OptimizeTableStmt(const rapidjson::Value& payload) {
3911  CHECK(payload.HasMember("tableName"));
3912  table_ = std::make_unique<std::string>(json_str(payload["tableName"]));
3913  parse_options(payload, options_);
3914 }
3915 
3916 namespace {
3918  const TableDescriptor* td,
3919  const AccessPrivileges access_priv) {
3920  CHECK(td);
3921  auto& cat = session_info.getCatalog();
3922  std::vector<DBObject> privObjects;
3923  DBObject dbObject(td->tableName, TableDBObjectType);
3924  dbObject.loadKey(cat);
3925  dbObject.setPrivileges(access_priv);
3926  privObjects.push_back(dbObject);
3927  return SysCatalog::instance().checkPrivileges(session_info.get_currentUser(),
3928  privObjects);
3929 };
3930 } // namespace
3931 
3933  auto& catalog = session.getCatalog();
3934 
3935  const auto execute_read_lock = mapd_shared_lock<mapd_shared_mutex>(
3938 
3939  const auto td_with_lock =
3941  catalog, *table_);
3942  const auto td = td_with_lock();
3943 
3944  if (!td || !user_can_access_table(session, td, AccessPrivileges::DELETE_FROM_TABLE)) {
3945  throw std::runtime_error("Table " + *table_ + " does not exist.");
3946  }
3947 
3948  if (td->isView) {
3949  throw std::runtime_error("OPTIMIZE TABLE command is not supported on views.");
3950  }
3951 
3953  const TableOptimizer optimizer(td, executor, catalog);
3954  if (shouldVacuumDeletedRows()) {
3955  optimizer.vacuumDeletedRows();
3956  }
3957  optimizer.recomputeMetadata();
3958 }
3959 
3960 bool repair_type(std::list<std::unique_ptr<NameValueAssign>>& options) {
3961  for (const auto& opt : options) {
3962  if (boost::iequals(*opt->get_name(), "REPAIR_TYPE")) {
3963  const auto repair_type =
3964  static_cast<const StringLiteral*>(opt->get_value())->get_stringval();
3965  CHECK(repair_type);
3966  if (boost::iequals(*repair_type, "REMOVE")) {
3967  return true;
3968  } else {
3969  throw std::runtime_error("REPAIR_TYPE must be REMOVE.");
3970  }
3971  } else {
3972  throw std::runtime_error("The only VALIDATE WITH options is REPAIR_TYPE.");
3973  }
3974  }
3975  return false;
3976 }
3977 
3978 ValidateStmt::ValidateStmt(std::string* type, std::list<NameValueAssign*>* with_opts)
3979  : type_(type) {
3980  if (!type) {
3981  throw std::runtime_error("Validation Type is required for VALIDATE command.");
3982  }
3983  std::list<std::unique_ptr<NameValueAssign>> options;
3984  if (with_opts) {
3985  for (const auto e : *with_opts) {
3986  options.emplace_back(e);
3987  }
3988  delete with_opts;
3989 
3990  isRepairTypeRemove_ = repair_type(options);
3991  }
3992 }
3993 
3994 ValidateStmt::ValidateStmt(const rapidjson::Value& payload) {
3995  CHECK(payload.HasMember("type"));
3996  type_ = std::make_unique<std::string>(json_str(payload["type"]));
3997 
3998  std::list<std::unique_ptr<NameValueAssign>> options;
3999  parse_options(payload, options);
4000 
4001  isRepairTypeRemove_ = repair_type(options);
4002 }
4003 
4005  const TableDescriptor* td) {
4006  if (session.get_currentUser().isSuper ||
4007  session.get_currentUser().userId == td->userId) {
4008  return;
4009  }
4010  std::vector<DBObject> privObjects;
4011  DBObject dbObject(td->tableName, TableDBObjectType);
4012  dbObject.loadKey(session.getCatalog());
4014  privObjects.push_back(dbObject);
4015  if (!SysCatalog::instance().checkPrivileges(session.get_currentUser(), privObjects)) {
4016  throw std::runtime_error("Current user does not have the privilege to alter table: " +
4017  td->tableName);
4018  }
4019 }
4020 
4021 RenameUserStmt::RenameUserStmt(const rapidjson::Value& payload) {
4022  CHECK(payload.HasMember("name"));
4023  username_ = std::make_unique<std::string>(json_str(payload["name"]));
4024  CHECK(payload.HasMember("newName"));
4025  new_username_ = std::make_unique<std::string>(json_str(payload["newName"]));
4026 }
4027 
4029  if (!session.get_currentUser().isSuper) {
4030  throw std::runtime_error("Only a super user can rename users.");
4031  }
4032 
4034  if (!SysCatalog::instance().getMetadataForUser(*username_, user)) {
4035  throw std::runtime_error("User " + *username_ + " does not exist.");
4036  }
4037 
4038  SysCatalog::instance().renameUser(*username_, *new_username_);
4039 }
4040 
4041 RenameDBStmt::RenameDBStmt(const rapidjson::Value& payload) {
4042  CHECK(payload.HasMember("name"));
4043  database_name_ = std::make_unique<std::string>(json_str(payload["name"]));
4044  CHECK(payload.HasMember("newName"));
4045  new_database_name_ = std::make_unique<std::string>(json_str(payload["newName"]));
4046 }
4047 
4050 
4051  // TODO: use database lock instead
4052  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
4055 
4056  if (!SysCatalog::instance().getMetadataForDB(*database_name_, db)) {
4057  throw std::runtime_error("Database " + *database_name_ + " does not exist.");
4058  }
4059 
4060  if (!session.get_currentUser().isSuper &&
4061  session.get_currentUser().userId != db.dbOwner) {
4062  throw std::runtime_error("Only a super user or the owner can rename the database.");
4063  }
4064 
4065  SysCatalog::instance().renameDatabase(*database_name_, *new_database_name_);
4066 }
4067 
4068 RenameTableStmt::RenameTableStmt(const rapidjson::Value& payload) {
4069  CHECK(payload.HasMember("tableNames"));
4070  CHECK(payload["tableNames"].IsArray());
4071  const auto elements = payload["tableNames"].GetArray();
4072  for (const auto& element : elements) {
4073  CHECK(element.HasMember("name"));
4074  CHECK(element.HasMember("newName"));
4075  tablesToRename_.emplace_back(new std::string(json_str(element["name"])),
4076  new std::string(json_str(element["newName"])));
4077  }
4078 }
4079 
4080 RenameTableStmt::RenameTableStmt(std::string* tab_name, std::string* new_tab_name) {
4081  tablesToRename_.emplace_back(tab_name, new_tab_name);
4082 }
4083 
4085  std::list<std::pair<std::string, std::string>> tableNames) {
4086  for (auto item : tableNames) {
4087  tablesToRename_.emplace_back(new std::string(item.first),
4088  new std::string(item.second));
4089  }
4090 }
4091 
4092 using SubstituteMap = std::map<std::string, std::string>;
4093 
4094 // Namespace fns used to track a left-to-right execution of RENAME TABLE
4095 // and verify that the command should be (entirely/mostly) valid
4096 //
4097 namespace {
4098 
4099 static constexpr char const* EMPTY_NAME{""};
4100 
4101 std::string generateUniqueTableName(std::string name) {
4102  // TODO - is there a "better" way to create a tmp name for the table
4103  std::time_t result = std::time(nullptr);
4104  return name + "_tmp" + std::to_string(result);
4105 }
4106 
4107 void recordRename(SubstituteMap& sMap, std::string oldName, std::string newName) {
4108  sMap[oldName] = newName;
4109 }
4110 
4112  SubstituteMap& sMap,
4113  std::string tableName) {
4114  if (sMap.find(tableName) != sMap.end()) {
4115  if (sMap[tableName] == EMPTY_NAME) {
4116  return tableName;
4117  }
4118  return sMap[tableName];
4119  } else {
4120  // lookup table in src catalog
4121  const TableDescriptor* td = catalog.getMetadataForTable(tableName);
4122  if (td) {
4123  sMap[tableName] = tableName;
4124  } else {
4125  sMap[tableName] = EMPTY_NAME;
4126  }
4127  }
4128  return tableName;
4129 }
4130 
4131 bool hasData(SubstituteMap& sMap, std::string tableName) {
4132  // assumes loadTable has been previously called
4133  return (sMap[tableName] != EMPTY_NAME);
4134 }
4135 
4137  // Substition map should be clean at end of rename:
4138  // all items in map must (map to self) or (map to EMPTY_STRING) by end
4139 
4140  for (auto it : sMap) {
4141  if ((it.second) != EMPTY_NAME && (it.first) != (it.second)) {
4142  throw std::runtime_error(
4143  "Error: Attempted to overwrite and lose data in table: \'" + (it.first) + "\'");
4144  }
4145  }
4146 }
4147 } // namespace
4148 
4149 namespace {
4152  throw std::runtime_error(td->tableName + " is a foreign table. " +
4153  "Use ALTER FOREIGN TABLE.");
4154  }
4155 }
4156 } // namespace
4157 
4159  auto& catalog = session.getCatalog();
4160 
4161  // TODO(adb): the catalog should be handling this locking (see AddColumStmt)
4162  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
4165 
4166  // accumulated vector of table names: oldName->newName
4167  std::vector<std::pair<std::string, std::string>> names;
4168 
4169  SubstituteMap tableSubtituteMap;
4170 
4171  for (auto& item : tablesToRename_) {
4172  std::string curTableName = *(item.first);
4173  std::string newTableName = *(item.second);
4174 
4175  // Note: if rename (a->b, b->a)
4176  // requires a tmp name change (a->tmp, b->a, tmp->a),
4177  // inject that here because
4178  // catalog.renameTable() assumes cleanliness else will fail
4179 
4180  std::string altCurTableName = loadTable(catalog, tableSubtituteMap, curTableName);
4181  std::string altNewTableName = loadTable(catalog, tableSubtituteMap, newTableName);
4182 
4183  if (altCurTableName != curTableName && altCurTableName != EMPTY_NAME) {
4184  // rename is a one-shot deal, reset the mapping once used
4185  recordRename(tableSubtituteMap, curTableName, curTableName);
4186  }
4187 
4188  // Check to see if the command (as-entered) will likely execute cleanly (logic-wise)
4189  // src tables exist before coping from
4190  // destination table collisions
4191  // handled (a->b, b->a)
4192  // or flagged (pre-existing a,b ... "RENAME TABLE a->c, b->c" )
4193  // handle mulitple chained renames, tmp names (a_>tmp, b->a, tmp->a)
4194  // etc.
4195  //
4196  if (hasData(tableSubtituteMap, altCurTableName)) {
4197  const TableDescriptor* td = catalog.getMetadataForTable(altCurTableName);
4198  if (td) {
4199  // Tables *and* views may be renamed here, foreign tables not
4200  // -> just block just foreign tables
4202  check_alter_table_privilege(session, td);
4203  }
4204 
4205  if (hasData(tableSubtituteMap, altNewTableName)) {
4206  std::string tmpNewTableName = generateUniqueTableName(altNewTableName);
4207  // rename: newTableName to tmpNewTableName to get it out of the way
4208  // because it was full
4209  recordRename(tableSubtituteMap, altCurTableName, EMPTY_NAME);
4210  recordRename(tableSubtituteMap, altNewTableName, tmpNewTableName);
4211  recordRename(tableSubtituteMap, tmpNewTableName, tmpNewTableName);
4212  names.emplace_back(altNewTableName, tmpNewTableName);
4213  names.emplace_back(altCurTableName, altNewTableName);
4214  } else {
4215  // rename: curNewTableName to newTableName
4216  recordRename(tableSubtituteMap, altCurTableName, EMPTY_NAME);
4217  recordRename(tableSubtituteMap, altNewTableName, altNewTableName);
4218  names.emplace_back(altCurTableName, altNewTableName);
4219  }
4220  } else {
4221  throw std::runtime_error("Source table \'" + curTableName + "\' does not exist.");
4222  }
4223  }
4224  checkNameSubstition(tableSubtituteMap);
4225 
4226  catalog.renameTable(names);
4227 
4228  // just to be explicit, clean out the list, the unique_ptr will delete
4229  while (!tablesToRename_.empty()) {
4230  tablesToRename_.pop_front();
4231  }
4232 } // namespace Parser
4233 
4235  bool not_null;
4236  const ColumnConstraintDef* cc = coldef->get_column_constraint();
4237  if (cc == nullptr) {
4238  not_null = false;
4239  } else {
4240  not_null = cc->get_notnull();
4241  }
4242  std::string default_value;
4243  const std::string* default_value_ptr = nullptr;
4244  if (cc) {
4245  if (auto def_val_literal = cc->get_defaultval()) {
4246  auto defaultsp = dynamic_cast<const StringLiteral*>(def_val_literal);
4247  default_value =
4248  defaultsp ? *defaultsp->get_stringval() : def_val_literal->to_string();
4249  // The preprocessing below is needed because:
4250  // a) TypedImportBuffer expects arrays in the {...} format
4251  // b) TypedImportBuffer expects string literals inside arrays w/o any quotes
4252  if (coldef->get_column_type()->get_is_array()) {
4253  std::regex array_re(R"(^ARRAY\s*\[(.*)\]$)", std::regex_constants::icase);
4254  default_value = std::regex_replace(default_value, array_re, "{$1}");
4255  boost::erase_all(default_value, "\'");
4256  }
4257  default_value_ptr = &default_value;
4258  }
4259  }
4261  cd,
4262  coldef->get_column_type(),
4263  not_null,
4264  coldef->get_compression(),
4265  default_value_ptr);
4266 }
4267 
4269  const TableDescriptor* td) {
4270  auto& catalog = session.getCatalog();
4271  if (!td) {
4272  throw std::runtime_error("Table " + *table_ + " does not exist.");
4273  } else {
4274  if (td->isView) {
4275  throw std::runtime_error("Adding columns to a view is not supported.");
4276  }
4278  if (table_is_temporary(td)) {
4279  throw std::runtime_error(
4280  "Adding columns to temporary tables is not yet supported.");
4281  }
4282  };
4283 
4284  check_alter_table_privilege(session, td);
4285 
4286  if (0 == coldefs_.size()) {
4287  coldefs_.push_back(std::move(coldef_));
4288  }
4289 
4290  for (const auto& coldef : coldefs_) {
4291  auto& new_column_name = *coldef->get_column_name();
4292  if (catalog.getMetadataForColumn(td->tableId, new_column_name) != nullptr) {
4293  throw std::runtime_error("Column " + new_column_name + " already exists.");
4294  }
4295  }
4296 }
4297 
4299  auto& catalog = session.getCatalog();
4300 
4301  // TODO(adb): the catalog should be handling this locking.
4302  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
4305 
4306  const auto td_with_lock =
4308  catalog, *table_, true);
4309  const auto td = td_with_lock();
4310 
4311  check_executable(session, td);
4312 
4313  CHECK(td->fragmenter);
4314  if (std::dynamic_pointer_cast<Fragmenter_Namespace::SortedOrderFragmenter>(
4315  td->fragmenter)) {
4316  throw std::runtime_error(
4317  "Adding columns to a table is not supported when using the \"sort_column\" "
4318  "option.");
4319  }
4320 
4321  // Do not take a data write lock, as the fragmenter may call `deleteFragments`
4322  // during a cap operation. Note that the schema write lock will prevent concurrent
4323  // inserts along with all other queries.
4324 
4325  catalog.getSqliteConnector().query("BEGIN TRANSACTION");
4326  try {
4327  std::map<const std::string, const ColumnDescriptor> cds;
4328  std::map<const int, const ColumnDef*> cid_coldefs;
4329  for (const auto& coldef : coldefs_) {
4330  ColumnDescriptor cd;
4331  setColumnDescriptor(cd, coldef.get());
4332  catalog.addColumn(*td, cd);
4333  cds.emplace(*coldef->get_column_name(), cd);
4334  cid_coldefs.emplace(cd.columnId, coldef.get());
4335 
4336  // expand geo column to phy columns
4337  if (cd.columnType.is_geometry()) {
4338  std::list<ColumnDescriptor> phy_geo_columns;
4339  catalog.expandGeoColumn(cd, phy_geo_columns);
4340  for (auto& cd : phy_geo_columns) {
4341  catalog.addColumn(*td, cd);
4342  cds.emplace(cd.columnName, cd);
4343  cid_coldefs.emplace(cd.columnId, nullptr);
4344  }
4345  }
4346  }
4347 
4348  std::unique_ptr<import_export::Loader> loader(new import_export::Loader(catalog, td));
4349  std::vector<std::unique_ptr<import_export::TypedImportBuffer>> import_buffers;
4350  for (const auto& cd : cds) {
4351  import_buffers.emplace_back(std::make_unique<import_export::TypedImportBuffer>(
4352  &cd.second, loader->getStringDict(&cd.second)));
4353  }
4354  loader->setAddingColumns(true);
4355 
4356  // set_geo_physical_import_buffer below needs a sorted import_buffers
4357  std::sort(import_buffers.begin(),
4358  import_buffers.end(),
4359  [](decltype(import_buffers[0])& a, decltype(import_buffers[0])& b) {
4360  return a->getColumnDesc()->columnId < b->getColumnDesc()->columnId;
4361  });
4362 
4363  size_t nrows = td->fragmenter->getNumRows();
4364  // if sharded, get total nrows from all sharded tables
4365  if (td->nShards > 0) {
4366  const auto physical_tds = catalog.getPhysicalTablesDescriptors(td);
4367  nrows = 0;
4368  std::for_each(physical_tds.begin(), physical_tds.end(), [&nrows](const auto& td) {
4369  nrows += td->fragmenter->getNumRows();
4370  });
4371  }
4372  if (nrows > 0) {
4373  int skip_physical_cols = 0;
4374  for (const auto cit : cid_coldefs) {
4375  const auto cd = catalog.getMetadataForColumn(td->tableId, cit.first);
4376  const auto coldef = cit.second;
4377  const bool is_null = !cd->default_value.has_value();
4378 
4379  if (cd->columnType.get_notnull() && is_null) {
4380  throw std::runtime_error("Default value required for column " + cd->columnName +
4381  " because of NOT NULL constraint");
4382  }
4383 
4384  for (auto it = import_buffers.begin(); it < import_buffers.end(); ++it) {
4385  auto& import_buffer = *it;
4386  if (cd->columnId == import_buffer->getColumnDesc()->columnId) {
4387  if (coldef != nullptr ||
4388  skip_physical_cols-- <= 0) { // skip non-null phy col
4389  import_buffer->add_value(cd,
4390  cd->default_value.value_or("NULL"),
4391  is_null,
4393  if (cd->columnType.is_geometry()) {
4394  std::vector<double> coords, bounds;
4395  std::vector<int> ring_sizes, poly_rings;
4396  int render_group = 0;
4397  SQLTypeInfo tinfo{cd->columnType};
4399  cd->default_value.value_or("NULL"),
4400  tinfo,
4401  coords,
4402  bounds,
4403  ring_sizes,
4404  poly_rings,
4405  false)) {
4406  throw std::runtime_error("Bad geometry data: '" +
4407  cd->default_value.value_or("NULL") + "'");
4408  }
4409  size_t col_idx = 1 + std::distance(import_buffers.begin(), it);
4411  cd,
4412  import_buffers,
4413  col_idx,
4414  coords,
4415  bounds,
4416  ring_sizes,
4417  poly_rings,
4418  render_group);
4419  // skip following phy cols
4420  skip_physical_cols = cd->columnType.get_physical_cols();
4421  }
4422  }
4423  break;
4424  }
4425  }
4426  }
4427  }
4428 
4429  if (!loader->loadNoCheckpoint(import_buffers, nrows, &session)) {
4430  throw std::runtime_error("loadNoCheckpoint failed!");
4431  }
4432  catalog.roll(true);
4433  loader->checkpoint();
4434  catalog.getSqliteConnector().query("END TRANSACTION");
4435  } catch (...) {
4436  catalog.roll(false);
4437  catalog.getSqliteConnector().query("ROLLBACK TRANSACTION");
4438  throw;
4439  }
4440 }
4441 
4443  auto& catalog = session.getCatalog();
4444 
4445  // TODO(adb): the catalog should be handling this locking.
4446  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
4449 
4450  const auto td_with_lock =
4452  catalog, *table_, true);
4453  const auto td = td_with_lock();
4454  if (!td) {
4455  throw std::runtime_error("Table " + *table_ + " does not exist.");
4456  }
4458  if (td->isView) {
4459  throw std::runtime_error("Dropping a column from a view is not supported.");
4460  }
4461  if (table_is_temporary(td)) {
4462  throw std::runtime_error(
4463  "Dropping a column from a temporary table is not yet supported.");
4464  }
4465 
4466  check_alter_table_privilege(session, td);
4467 
4468  for (const auto& column : columns_) {
4469  if (nullptr == catalog.getMetadataForColumn(td->tableId, *column)) {
4470  throw std::runtime_error("Column " + *column + " does not exist.");
4471  }
4472  }
4473 
4474  if (td->nColumns <= (td->hasDeletedCol ? 3 : 2)) {
4475  throw std::runtime_error("Table " + *table_ + " has only one column.");
4476  }
4477 
4478  catalog.getSqliteConnector().query("BEGIN TRANSACTION");
4479  try {
4480  std::vector<int> columnIds;
4481  for (const auto& column : columns_) {
4482  ColumnDescriptor cd = *catalog.getMetadataForColumn(td->tableId, *column);
4483  if (td->nShards > 0 && td->shardedColumnId == cd.columnId) {
4484  throw std::runtime_error("Dropping sharding column " + cd.columnName +
4485  " is not supported.");
4486  }
4487  catalog.dropColumn(*td, cd);
4488  columnIds.push_back(cd.columnId);
4489  for (int i = 0; i < cd.columnType.get_physical_cols(); i++) {
4490  const auto pcd = catalog.getMetadataForColumn(td->tableId, cd.columnId + i + 1);
4491  CHECK(pcd);
4492  catalog.dropColumn(*td, *pcd);
4493  columnIds.push_back(cd.columnId + i + 1);
4494  }
4495  }
4496 
4497  for (auto shard : catalog.getPhysicalTablesDescriptors(td)) {
4498  shard->fragmenter->dropColumns(columnIds);
4499  }
4500  // if test forces to rollback
4502  throw std::runtime_error("lol!");
4503  }
4504  catalog.roll(true);
4506  catalog.checkpoint(td->tableId);
4507  }
4508  catalog.getSqliteConnector().query("END TRANSACTION");
4509  } catch (...) {
4510  catalog.setForReload(td->tableId);
4511  catalog.roll(false);
4512  catalog.getSqliteConnector().query("ROLLBACK TRANSACTION");
4513  throw;
4514  }
4515 
4516  // invalidate cached hashtable
4518 }
4519 
4521  auto& catalog = session.getCatalog();
4522 
4523  const auto execute_read_lock = mapd_shared_lock<mapd_shared_mutex>(
4526 
4527  const auto td_with_lock =
4529  catalog, *table_, false);
4530  const auto td = td_with_lock();
4531  CHECK(td);
4533 
4534  check_alter_table_privilege(session, td);
4535  const ColumnDescriptor* cd = catalog.getMetadataForColumn(td->tableId, *column_);
4536  if (cd == nullptr) {
4537  throw std::runtime_error("Column " + *column_ + " does not exist.");
4538  }
4539  if (catalog.getMetadataForColumn(td->tableId, *new_column_name_) != nullptr) {
4540  throw std::runtime_error("Column " + *new_column_name_ + " already exists.");
4541  }
4542  catalog.renameColumn(td, cd, *new_column_name_);
4543 }
4544 
4546  enum TableParamType { MaxRollbackEpochs, Epoch, MaxRows };
4547  static const std::unordered_map<std::string, TableParamType> param_map = {
4548  {"max_rollback_epochs", TableParamType::MaxRollbackEpochs},
4549  {"epoch", TableParamType::Epoch},
4550  {"max_rows", TableParamType::MaxRows}};
4551  // Below is to ensure that executor is not currently executing on table when we might be
4552  // changing it's storage. Question: will/should catalog write lock take care of this?
4553  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
4556  auto& catalog = session.getCatalog();
4557  const auto td_with_lock =
4559  catalog, *table_, false);
4560  const auto td = td_with_lock();
4561  if (!td) {
4562  throw std::runtime_error("Table " + *table_ + " does not exist.");
4563  }
4564  if (td->isView) {
4565  throw std::runtime_error("Setting parameters for a view is not supported.");
4566  }
4567  if (table_is_temporary(td)) {
4568  throw std::runtime_error(
4569  "Setting parameters for a temporary table is not yet supported.");
4570  }
4571  check_alter_table_privilege(session, td);
4572 
4573  std::string param_name(*param_->get_name());
4574  boost::algorithm::to_lower(param_name);
4575  const IntLiteral* val_int_literal =
4576  dynamic_cast<const IntLiteral*>(param_->get_value());
4577  if (val_int_literal == nullptr) {
4578  throw std::runtime_error("Table parameters should be integers.");
4579  }
4580  const int64_t param_val = val_int_literal->get_intval();
4581 
4582  const auto param_it = param_map.find(param_name);
4583  if (param_it == param_map.end()) {
4584  throw std::runtime_error(param_name + " is not a settable table parameter.");
4585  }
4586  switch (param_it->second) {
4587  case MaxRollbackEpochs: {
4588  catalog.setMaxRollbackEpochs(td->tableId, param_val);
4589  break;
4590  }
4591  case Epoch: {
4592  catalog.setTableEpoch(catalog.getDatabaseId(), td->tableId, param_val);
4593  break;
4594  }
4595  case MaxRows: {
4596  catalog.setMaxRows(td->tableId, param_val);
4597  break;
4598  }
4599  default: {
4600  UNREACHABLE() << "Unexpected TableParamType value: " << param_it->second
4601  << ", key: " << param_it->first;
4602  }
4603  }
4604 }
4605 
4607  std::string* f,
4608  std::list<NameValueAssign*>* o)
4609  : table_(t), file_pattern_(f), success_(true) {
4610  if (o) {
4611  for (const auto e : *o) {
4612  options_.emplace_back(e);
4613  }
4614  delete o;
4615  }
4616 }
4617 
4618 CopyTableStmt::CopyTableStmt(const rapidjson::Value& payload) : success_(true) {
4619  CHECK(payload.HasMember("table"));
4620  table_ = std::make_unique<std::string>(json_str(payload["table"]));
4621 
4622  CHECK(payload.HasMember("filePath"));
4623  std::string fs = json_str(payload["filePath"]);
4624  // strip leading/trailing spaces/quotes/single quotes
4625  boost::algorithm::trim_if(fs, boost::is_any_of(" \"'`"));
4626  file_pattern_ = std::make_unique<std::string>(fs);
4627 
4628  parse_options(payload, options_);
4629 }
4630 
4632  auto importer_factory = [](Catalog_Namespace::Catalog& catalog,
4633  const TableDescriptor* td,
4634  const std::string& file_path,
4635  const import_export::CopyParams& copy_params)
4636  -> std::unique_ptr<import_export::AbstractImporter> {
4637 #ifdef ENABLE_IMPORT_PARQUET
4638  if (copy_params.file_type == import_export::FileType::PARQUET &&
4639  g_enable_parquet_import_fsi) {
4640  return std::make_unique<import_export::ForeignDataImporter>(
4641  file_path, copy_params, td);
4642  }
4643 #endif
4644  return std::make_unique<import_export::Importer>(catalog, td, file_path, copy_params);
4645  };
4646  return execute(session, importer_factory);
4647 }
4648 
4650  const Catalog_Namespace::SessionInfo& session,
4651  const std::function<std::unique_ptr<import_export::AbstractImporter>(
4653  const TableDescriptor*,
4654  const std::string&,
4655  const import_export::CopyParams&)>& importer_factory) {
4656  boost::regex non_local_file_regex{R"(^\s*(s3|http|https)://.+)",
4657  boost::regex::extended | boost::regex::icase};
4658  if (!boost::regex_match(*file_pattern_, non_local_file_regex)) {
4661  }
4662 
4663  size_t total_time = 0;
4664 
4665  // Prevent simultaneous import / truncate (see TruncateTableStmt::execute)
4666  const auto execute_read_lock = mapd_shared_lock<mapd_shared_mutex>(
4669 
4670  const TableDescriptor* td{nullptr};
4671  std::unique_ptr<lockmgr::TableSchemaLockContainer<lockmgr::ReadLock>> td_with_lock;
4672  std::unique_ptr<lockmgr::WriteLock> insert_data_lock;
4673 
4674  auto& catalog = session.getCatalog();
4675 
4676  try {
4677  td_with_lock = std::make_unique<lockmgr::TableSchemaLockContainer<lockmgr::ReadLock>>(
4679  catalog, *table_));
4680  td = (*td_with_lock)();
4681  insert_data_lock = std::make_unique<lockmgr::WriteLock>(
4683  } catch (const std::runtime_error& e) {
4684  // noop
4685  // TODO(adb): We're really only interested in whether the table exists or not.
4686  // Create a more refined exception.
4687  }
4688 
4689  // if the table already exists, it's locked, so check access privileges
4690  if (td) {
4691  std::vector<DBObject> privObjects;
4692  DBObject dbObject(*table_, TableDBObjectType);
4693  dbObject.loadKey(catalog);
4695  privObjects.push_back(dbObject);
4696  if (!SysCatalog::instance().checkPrivileges(session.get_currentUser(), privObjects)) {
4697  throw std::runtime_error("Violation of access privileges: user " +
4698  session.get_currentUser().userLoggable() +
4699  " has no insert privileges for table " + *table_ + ".");
4700  }
4701  }
4702 
4703  // since we'll have not only posix file names but also s3/hdfs/... url
4704  // we do not expand wildcard or check file existence here.
4705  // from here on, file_path contains something which may be a url
4706  // or a wildcard of file names;
4707  std::string file_path = *file_pattern_;
4708  import_export::CopyParams copy_params;
4709  if (!options_.empty()) {
4710  for (auto& p : options_) {
4711  if (boost::iequals(*p->get_name(), "max_reject")) {
4712  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
4713  if (int_literal == nullptr) {
4714  throw std::runtime_error("max_reject option must be an integer.");
4715  }
4716  copy_params.max_reject = int_literal->get_intval();
4717  } else if (boost::iequals(*p->get_name(), "buffer_size")) {
4718  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
4719  if (int_literal == nullptr) {
4720  throw std::runtime_error("buffer_size option must be an integer.");
4721  }
4722  copy_params.buffer_size = int_literal->get_intval();
4723  } else if (boost::iequals(*p->get_name(), "threads")) {
4724  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
4725  if (int_literal == nullptr) {
4726  throw std::runtime_error("Threads option must be an integer.");
4727  }
4728  copy_params.threads = int_literal->get_intval();
4729  } else if (boost::iequals(*p->get_name(), "delimiter")) {
4730  const StringLiteral* str_literal =
4731  dynamic_cast<const StringLiteral*>(p->get_value());
4732  if (str_literal == nullptr) {
4733  throw std::runtime_error("Delimiter option must be a string.");
4734  } else if (str_literal->get_stringval()->length() != 1) {
4735  throw std::runtime_error("Delimiter must be a single character string.");
4736  }
4737  copy_params.delimiter = (*str_literal->get_stringval())[0];
4738  } else if (boost::iequals(*p->get_name(), "nulls")) {
4739  const StringLiteral* str_literal =
4740  dynamic_cast<const StringLiteral*>(p->get_value());
4741  if (str_literal == nullptr) {
4742  throw std::runtime_error("Nulls option must be a string.");
4743  }
4744  copy_params.null_str = *str_literal->get_stringval();
4745  } else if (boost::iequals(*p->get_name(), "header")) {
4746  const StringLiteral* str_literal =
4747  dynamic_cast<const StringLiteral*>(p->get_value());
4748  if (str_literal == nullptr) {
4749  throw std::runtime_error("Header option must be a boolean.");
4750  }
4751  copy_params.has_header = bool_from_string_literal(str_literal)
4754 #ifdef ENABLE_IMPORT_PARQUET
4755  } else if (boost::iequals(*p->get_name(), "parquet")) {
4756  const StringLiteral* str_literal =
4757  dynamic_cast<const StringLiteral*>(p->get_value());
4758  if (str_literal == nullptr) {
4759  throw std::runtime_error("Parquet option must be a boolean.");
4760  }
4761  if (bool_from_string_literal(str_literal)) {
4762  // not sure a parquet "table" type is proper, but to make code
4763  // look consistent in some places, let's set "table" type too
4764  copy_params.file_type = import_export::FileType::PARQUET;
4765  }
4766 #endif // ENABLE_IMPORT_PARQUET
4767  } else if (boost::iequals(*p->get_name(), "s3_access_key")) {
4768  const StringLiteral* str_literal =
4769  dynamic_cast<const StringLiteral*>(p->get_value());
4770  if (str_literal == nullptr) {
4771  throw std::runtime_error("Option s3_access_key must be a string.");
4772  }
4773  copy_params.s3_access_key = *str_literal->get_stringval();
4774  } else if (boost::iequals(*p->get_name(), "s3_secret_key")) {
4775  const StringLiteral* str_literal =
4776  dynamic_cast<const StringLiteral*>(p->get_value());
4777  if (str_literal == nullptr) {
4778  throw std::runtime_error("Option s3_secret_key must be a string.");
4779  }
4780  copy_params.s3_secret_key = *str_literal->get_stringval();
4781  } else if (boost::iequals(*p->get_name(), "s3_session_token")) {
4782  const StringLiteral* str_literal =
4783  dynamic_cast<const StringLiteral*>(p->get_value());
4784  if (str_literal == nullptr) {
4785  throw std::runtime_error("Option s3_session_token must be a string.");
4786  }
4787  copy_params.s3_session_token = *str_literal->get_stringval();
4788  } else if (boost::iequals(*p->get_name(), "s3_region")) {
4789  const StringLiteral* str_literal =
4790  dynamic_cast<const StringLiteral*>(p->get_value());
4791  if (str_literal == nullptr) {
4792  throw std::runtime_error("Option s3_region must be a string.");
4793  }
4794  copy_params.s3_region = *str_literal->get_stringval();
4795  } else if (boost::iequals(*p->get_name(), "s3_endpoint")) {
4796  const StringLiteral* str_literal =
4797  dynamic_cast<const StringLiteral*>(p->get_value());
4798  if (str_literal == nullptr) {
4799  throw std::runtime_error("Option s3_endpoint must be a string.");
4800  }
4801  copy_params.s3_endpoint = *str_literal->get_stringval();
4802  } else if (boost::iequals(*p->get_name(), "quote")) {
4803  const StringLiteral* str_literal =
4804  dynamic_cast<const StringLiteral*>(p->get_value());
4805  if (str_literal == nullptr) {
4806  throw std::runtime_error("Quote option must be a string.");
4807  } else if (str_literal->get_stringval()->length() != 1) {
4808  throw std::runtime_error("Quote must be a single character string.");
4809  }
4810  copy_params.quote = (*str_literal->get_stringval())[0];
4811  } else if (boost::iequals(*p->get_name(), "escape")) {
4812  const StringLiteral* str_literal =
4813  dynamic_cast<const StringLiteral*>(p->get_value());
4814  if (str_literal == nullptr) {
4815  throw std::runtime_error("Escape option must be a string.");
4816  } else if (str_literal->get_stringval()->length() != 1) {
4817  throw std::runtime_error("Escape must be a single character string.");
4818  }
4819  copy_params.escape = (*str_literal->get_stringval())[0];
4820  } else if (boost::iequals(*p->get_name(), "line_delimiter")) {
4821  const StringLiteral* str_literal =
4822  dynamic_cast<const StringLiteral*>(p->get_value());
4823  if (str_literal == nullptr) {
4824  throw std::runtime_error("Line_delimiter option must be a string.");
4825  } else if (str_literal->get_stringval()->length() != 1) {
4826  throw std::runtime_error("Line_delimiter must be a single character string.");
4827  }
4828  copy_params.line_delim = (*str_literal->get_stringval())[0];
4829  } else if (boost::iequals(*p->get_name(), "quoted")) {
4830  const StringLiteral* str_literal =
4831  dynamic_cast<const StringLiteral*>(p->get_value());
4832  if (str_literal == nullptr) {
4833  throw std::runtime_error("Quoted option must be a boolean.");
4834  }
4835  copy_params.quoted = bool_from_string_literal(str_literal);
4836  } else if (boost::iequals(*p->get_name(), "plain_text")) {
4837  const StringLiteral* str_literal =
4838  dynamic_cast<const StringLiteral*>(p->get_value());
4839  if (str_literal == nullptr) {
4840  throw std::runtime_error("plain_text option must be a boolean.");
4841  }
4842  copy_params.plain_text = bool_from_string_literal(str_literal);
4843  } else if (boost::iequals(*p->get_name(), "array_marker")) {
4844  const StringLiteral* str_literal =
4845  dynamic_cast<const StringLiteral*>(p->get_value());
4846  if (str_literal == nullptr) {
4847  throw std::runtime_error("Array Marker option must be a string.");
4848  } else if (str_literal->get_stringval()->length() != 2) {
4849  throw std::runtime_error(
4850  "Array Marker option must be exactly two characters. Default is {}.");
4851  }
4852  copy_params.array_begin = (*str_literal->get_stringval())[0];
4853  copy_params.array_end = (*str_literal->get_stringval())[1];
4854  } else if (boost::iequals(*p->get_name(), "array_delimiter")) {
4855  const StringLiteral* str_literal =
4856  dynamic_cast<const StringLiteral*>(p->get_value());
4857  if (str_literal == nullptr) {
4858  throw std::runtime_error("Array Delimiter option must be a string.");
4859  } else if (str_literal->get_stringval()->length() != 1) {
4860  throw std::runtime_error("Array Delimiter must be a single character string.");
4861  }
4862  copy_params.array_delim = (*str_literal->get_stringval())[0];
4863  } else if (boost::iequals(*p->get_name(), "lonlat")) {
4864  const StringLiteral* str_literal =
4865  dynamic_cast<const StringLiteral*>(p->get_value());
4866  if (str_literal == nullptr) {
4867  throw std::runtime_error("Lonlat option must be a boolean.");
4868  }
4869  copy_params.lonlat = bool_from_string_literal(str_literal);
4870  } else if (boost::iequals(*p->get_name(), "geo")) {
4871  const StringLiteral* str_literal =
4872  dynamic_cast<const StringLiteral*>(p->get_value());
4873  if (str_literal == nullptr) {
4874  throw std::runtime_error("Geo option must be a boolean.");
4875  }
4876  copy_params.file_type = bool_from_string_literal(str_literal)
4879  } else if (boost::iequals(*p->get_name(), "geo_coords_type")) {
4880  const StringLiteral* str_literal =
4881  dynamic_cast<const StringLiteral*>(p->get_value());
4882  if (str_literal == nullptr) {
4883  throw std::runtime_error("'geo_coords_type' option must be a string");
4884  }
4885  const std::string* s = str_literal->get_stringval();
4886  if (boost::iequals(*s, "geography")) {
4887  throw std::runtime_error(
4888  "GEOGRAPHY coords type not yet supported. Please use GEOMETRY.");
4889  // copy_params.geo_coords_type = kGEOGRAPHY;
4890  } else if (boost::iequals(*s, "geometry")) {
4891  copy_params.geo_coords_type = kGEOMETRY;
4892  } else {
4893  throw std::runtime_error(
4894  "Invalid string for 'geo_coords_type' option (must be 'GEOGRAPHY' or "
4895  "'GEOMETRY'): " +
4896  *s);
4897  }
4898  } else if (boost::iequals(*p->get_name(), "geo_coords_encoding")) {
4899  const StringLiteral* str_literal =
4900  dynamic_cast<const StringLiteral*>(p->get_value());
4901  if (str_literal == nullptr) {
4902  throw std::runtime_error("'geo_coords_encoding' option must be a string");
4903  }
4904  const std::string* s = str_literal->get_stringval();
4905  if (boost::iequals(*s, "none")) {
4906  copy_params.geo_coords_encoding = kENCODING_NONE;
4907  copy_params.geo_coords_comp_param = 0;
4908  } else if (boost::iequals(*s, "compressed(32)")) {
4909  copy_params.geo_coords_encoding = kENCODING_GEOINT;
4910  copy_params.geo_coords_comp_param = 32;
4911  } else {
4912  throw std::runtime_error(
4913  "Invalid string for 'geo_coords_encoding' option (must be 'NONE' or "
4914  "'COMPRESSED(32)'): " +
4915  *s);
4916  }
4917  } else if (boost::iequals(*p->get_name(), "geo_coords_srid")) {
4918  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
4919  if (int_literal == nullptr) {
4920  throw std::runtime_error("'geo_coords_srid' option must be an integer");
4921  }
4922  const int srid = int_literal->get_intval();
4923  if (srid == 4326 || srid == 3857 || srid == 900913) {
4924  copy_params.geo_coords_srid = srid;
4925  } else {
4926  throw std::runtime_error(
4927  "Invalid value for 'geo_coords_srid' option (must be 4326, 3857, or "
4928  "900913): " +
4929  std::to_string(srid));
4930  }
4931  } else if (boost::iequals(*p->get_name(), "geo_layer_name")) {
4932  const StringLiteral* str_literal =
4933  dynamic_cast<const StringLiteral*>(p->get_value());
4934  if (str_literal == nullptr) {
4935  throw std::runtime_error("'geo_layer_name' option must be a string");
4936  }
4937  const std::string* layer_name = str_literal->get_stringval();
4938  if (layer_name) {
4939  copy_params.geo_layer_name = *layer_name;
4940  } else {
4941  throw std::runtime_error("Invalid value for 'geo_layer_name' option");
4942  }
4943  } else if (boost::iequals(*p->get_name(), "partitions")) {
4944  if (copy_params.file_type == import_export::FileType::POLYGON) {
4945  const auto partitions =
4946  static_cast<const StringLiteral*>(p->get_value())->get_stringval();
4947  CHECK(partitions);
4948  const auto partitions_uc = boost::to_upper_copy<std::string>(*partitions);
4949  if (partitions_uc != "REPLICATED") {
4950  throw std::runtime_error("PARTITIONS must be REPLICATED for geo COPY");
4951  }
4952  geo_copy_from_partitions_ = partitions_uc;
4953  } else {
4954  throw std::runtime_error("PARTITIONS option not supported for non-geo COPY: " +
4955  *p->get_name());
4956  }
4957  } else if (boost::iequals(*p->get_name(), "geo_assign_render_groups")) {
4958  const StringLiteral* str_literal =
4959  dynamic_cast<const StringLiteral*>(p->get_value());
4960  if (str_literal == nullptr) {
4961  throw std::runtime_error("geo_assign_render_groups option must be a boolean.");
4962  }
4963  copy_params.geo_assign_render_groups = bool_from_string_literal(str_literal);
4964  } else if (boost::iequals(*p->get_name(), "geo_explode_collections")) {
4965  const StringLiteral* str_literal =
4966  dynamic_cast<const StringLiteral*>(p->get_value());
4967  if (str_literal == nullptr) {
4968  throw std::runtime_error("geo_explode_collections option must be a boolean.");
4969  }
4970  copy_params.geo_explode_collections = bool_from_string_literal(str_literal);
4971  } else if (boost::iequals(*p->get_name(), "source_srid")) {
4972  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
4973  if (int_literal == nullptr) {
4974  throw std::runtime_error("'source_srid' option must be an integer");
4975  }
4976  const int srid = int_literal->get_intval();
4977  if (copy_params.file_type == import_export::FileType::DELIMITED) {
4978  copy_params.source_srid = srid;
4979  } else {
4980  throw std::runtime_error(
4981  "'source_srid' option can only be used on csv/tsv files");
4982  }
4983  } else if (boost::iequals(*p->get_name(), "regex_path_filter")) {
4984  const StringLiteral* str_literal =
4985  dynamic_cast<const StringLiteral*>(p->get_value());
4986  if (str_literal == nullptr) {
4987  throw std::runtime_error("Option regex_path_filter must be a string.");
4988  }
4989  const auto string_val = *str_literal->get_stringval();
4990  copy_params.regex_path_filter =
4991  string_val.empty() ? std::nullopt : std::optional<std::string>{string_val};
4992  } else if (boost::iequals(*p->get_name(), "file_sort_order_by")) {
4993  const StringLiteral* str_literal =
4994  dynamic_cast<const StringLiteral*>(p->get_value());
4995  if (str_literal == nullptr) {
4996  throw std::runtime_error("Option file_sort_order_by must be a string.");
4997  }
4998  const auto string_val = *str_literal->get_stringval();
4999  copy_params.file_sort_order_by =
5000  string_val.empty() ? std::nullopt : std::optional<std::string>{string_val};
5001  } else if (boost::iequals(*p->get_name(), "file_sort_regex")) {
5002  const StringLiteral* str_literal =
5003  dynamic_cast<const StringLiteral*>(p->get_value());
5004  if (str_literal == nullptr) {
5005  throw std::runtime_error("Option file_sort_regex must be a string.");
5006  }
5007  const auto string_val = *str_literal->get_stringval();
5008  copy_params.file_sort_regex =
5009  string_val.empty() ? std::nullopt : std::optional<std::string>{string_val};
5010  } else {
5011  throw std::runtime_error("Invalid option for COPY: " + *p->get_name());
5012  }
5013  }
5014  }
5015 
5016  std::string tr;
5017  if (copy_params.file_type == import_export::FileType::POLYGON) {
5018  // geo import
5019  // we do nothing here, except stash the parameters so we can
5020  // do the import when we unwind to the top of the handler
5021  geo_copy_from_file_name_ = file_path;
5022  geo_copy_from_copy_params_ = copy_params;
5023  was_geo_copy_from_ = true;
5024 
5025  // the result string
5026  // @TODO simon.eves put something more useful in here
5027  // except we really can't because we haven't done the import yet!
5028  if (td) {
5029  tr = std::string("Appending geo to table '") + *table_ + std::string("'...");
5030  } else {
5031  tr = std::string("Creating table '") + *table_ +
5032  std::string("' and importing geo...");
5033  }
5034  } else {
5035  if (td) {
5036  CHECK(td_with_lock);
5037 
5038  // regular import
5039  auto importer = importer_factory(catalog, td, file_path, copy_params);
5040  auto start_time = ::toString(std::chrono::system_clock::now());
5042  auto query_session = session.get_session_id();
5043  auto query_str = "COPYING " + td->tableName;
5045  executor->enrollQuerySession(query_session,
5046  query_str,
5047  start_time,
5049  QuerySessionStatus::QueryStatus::RUNNING_IMPORTER);
5050  }
5051 
5052  ScopeGuard clearInterruptStatus =
5053  [executor, &query_str, &query_session, &start_time, &importer] {
5054  // reset the runtime query interrupt status
5056  executor->clearQuerySessionStatus(query_session, start_time);
5057  }
5058  };
5059  import_export::ImportStatus import_result;
5060  auto ms =
5061  measure<>::execution([&]() { import_result = importer->import(&session); });
5062  total_time += ms;
5063  // results
5064  if (!import_result.load_failed &&
5065  import_result.rows_rejected > copy_params.max_reject) {
5066  LOG(ERROR) << "COPY exited early due to reject records count during multi file "
5067  "processing ";
5068  // if we have crossed the truncated load threshold
5069  import_result.load_failed = true;
5070  import_result.load_msg =
5071  "COPY exited early due to reject records count during multi file "
5072  "processing ";
5073  success_ = false;
5074  }
5075  if (!import_result.load_failed) {
5076  tr = std::string(
5077  "Loaded: " + std::to_string(import_result.rows_completed) +
5078  " recs, Rejected: " + std::to_string(import_result.rows_rejected) +
5079  " recs in " + std::to_string((double)total_time / 1000.0) + " secs");
5080  } else {
5081  tr = std::string("Loader Failed due to : " + import_result.load_msg + " in " +
5082  std::to_string((double)total_time / 1000.0) + " secs");
5083  }
5084  } else {
5085  throw std::runtime_error("Table '" + *table_ + "' must exist before COPY FROM");
5086  }
5087  }
5088 
5089  return_message.reset(new std::string(tr));
5090  LOG(INFO) << tr;
5091 } // namespace Parser
5092 
5093 // CREATE ROLE payroll_dept_role;
5094 CreateRoleStmt::CreateRoleStmt(const rapidjson::Value& payload) {
5095  CHECK(payload.HasMember("role"));
5096  role_ = std::make_unique<std::string>(json_str(payload["role"]));
5097 }
5098 
5100  const auto& currentUser = session.get_currentUser();
5101  if (!currentUser.isSuper) {
5102  throw std::runtime_error("CREATE ROLE " + get_role() +
5103  " failed. It can only be executed by super user.");
5104  }
5105  SysCatalog::instance().createRole(
5106  get_role(), /*user_private_role=*/false, /*is_temporary=*/false);
5107 }
5108 
5109 // DROP ROLE payroll_dept_role;
5110 DropRoleStmt::DropRoleStmt(const rapidjson::Value& payload) {
5111  CHECK(payload.HasMember("role"));
5112  role_ = std::make_unique<std::string>(json_str(payload["role"]));
5113 }
5114 
5116  const auto& currentUser = session.get_currentUser();
5117  if (!currentUser.isSuper) {
5118  throw std::runtime_error("DROP ROLE " + get_role() +
5119  " failed. It can only be executed by super user.");
5120  }
5121  auto* rl = SysCatalog::instance().getRoleGrantee(get_role());
5122  if (!rl) {
5123  throw std::runtime_error("DROP ROLE " + get_role() +
5124  " failed because role with this name does not exist.");
5125  }
5126  SysCatalog::instance().dropRole(get_role(), /*is_temporary=*/false);
5127 }
5128 
5129 std::vector<std::string> splitObjectHierName(const std::string& hierName) {
5130  std::vector<std::string> componentNames;
5131  boost::split(componentNames, hierName, boost::is_any_of("."));
5132  return componentNames;
5133 }
5134 
5135 std::string extractObjectNameFromHierName(const std::string& objectHierName,
5136  const std::string& objectType,
5138  std::string objectName;
5139  std::vector<std::string> componentNames = splitObjectHierName(objectHierName);
5140  if (objectType.compare("DATABASE") == 0) {
5141  if (componentNames.size() == 1) {
5142  objectName = componentNames[0];
5143  } else {
5144  throw std::runtime_error("DB object name is not correct " + objectHierName);
5145  }
5146  } else {
5147  if (objectType.compare("TABLE") == 0 || objectType.compare("DASHBOARD") == 0 ||
5148  objectType.compare("VIEW") == 0 || objectType.compare("SERVER") == 0) {
5149  switch (componentNames.size()) {
5150  case (1): {
5151  objectName = componentNames[0];
5152  break;
5153  }
5154  case (2): {
5155  objectName = componentNames[1];
5156  break;
5157  }
5158  default: {
5159  throw std::runtime_error("DB object name is not correct " + objectHierName);
5160  }
5161  }
5162  } else {
5163  throw std::runtime_error("DB object type " + objectType + " is not supported.");
5164  }
5165  }
5166  return objectName;
5167 }
5168 
5169 static std::pair<AccessPrivileges, DBObjectType> parseStringPrivs(
5170  const std::string& privs,
5171  const DBObjectType& objectType,
5172  const std::string& object_name) {
5173  static const std::map<std::pair<const std::string, const DBObjectType>,
5174  std::pair<const AccessPrivileges, const DBObjectType>>
5175  privileges_lookup{
5176  {{"ALL"s, DatabaseDBObjectType},
5179  {{"ALL"s, DashboardDBObjectType},
5182  {{"ALL"s, ServerDBObjectType},
5184 
5185  {{"CREATE TABLE"s, DatabaseDBObjectType},
5187  {{"CREATE"s, DatabaseDBObjectType},
5189  {{"SELECT"s, DatabaseDBObjectType},
5191  {{"INSERT"s, DatabaseDBObjectType},
5193  {{"TRUNCATE"s, DatabaseDBObjectType},
5195  {{"UPDATE"s, DatabaseDBObjectType},
5197  {{"DELETE"s, DatabaseDBObjectType},
5199  {{"DROP"s, DatabaseDBObjectType},
5201  {{"ALTER"s, DatabaseDBObjectType},
5203 
5204  {{"SELECT"s, TableDBObjectType},
5206  {{"INSERT"s, TableDBObjectType},
5208  {{"TRUNCATE"s, TableDBObjectType},
5210  {{"UPDATE"s, TableDBObjectType},
5212  {{"DELETE"s, TableDBObjectType},
5214  {{"DROP"s, TableDBObjectType},
5216  {{"ALTER"s, TableDBObjectType},
5218 
5219  {{"CREATE VIEW"s, DatabaseDBObjectType},
5221  {{"SELECT VIEW"s, DatabaseDBObjectType},
5223  {{"DROP VIEW"s, DatabaseDBObjectType},
5225  {{"SELECT"s, ViewDBObjectType},
5228 
5229  {{"CREATE DASHBOARD"s, DatabaseDBObjectType},
5231  {{"EDIT DASHBOARD"s, DatabaseDBObjectType},
5233  {{"VIEW DASHBOARD"s, DatabaseDBObjectType},
5235  {{"DELETE DASHBOARD"s, DatabaseDBObjectType},
5237  {{"VIEW"s, DashboardDBObjectType},
5239  {{"EDIT"s, DashboardDBObjectType},
5241  {{"DELETE"s, DashboardDBObjectType},
5243 
5244  {{"CREATE SERVER"s, DatabaseDBObjectType},
5246  {{"DROP SERVER"s, DatabaseDBObjectType},
5248  {{"DROP"s, ServerDBObjectType},
5250  {{"ALTER SERVER"s, DatabaseDBObjectType},
5252  {{"ALTER"s, ServerDBObjectType},
5254  {{"USAGE"s, ServerDBObjectType},
5256  {{"SERVER USAGE"s, DatabaseDBObjectType},
5258 
5259  {{"VIEW SQL EDITOR"s, DatabaseDBObjectType},
5261  {{"ACCESS"s, DatabaseDBObjectType},
5263 
5264  auto result = privileges_lookup.find(std::make_pair(privs, objectType));
5265  if (result == privileges_lookup.end()) {
5266  throw std::runtime_error("Privileges " + privs + " on DB object " + object_name +
5267  " are not correct.");
5268  }
5269  return result->second;
5270 }
5271 
5272 static DBObject createObject(const std::string& objectName, DBObjectType objectType) {
5273  if (objectType == DashboardDBObjectType) {
5274  int32_t dashboard_id = -1;
5275  if (!objectName.empty()) {
5276  try {
5277  dashboard_id = stoi(objectName);
5278  } catch (const std::exception&) {
5279  throw std::runtime_error(
5280  "Privileges on dashboards should be changed via integer dashboard ID");
5281  }
5282  }
5283  return DBObject(dashboard_id, objectType);
5284  } else {
5285  return DBObject(objectName, objectType);
5286  }
5287 }
5288 
5289 // Pre-execution PRIVILEGE failure conditions that cannot be detected elsewhere
5290 // For types: Table, View, Database, Server, Dashboard
5291 static void verifyObject(Catalog_Namespace::Catalog& sessionCatalog,
5292  const std::string& objectName,
5293  DBObjectType objectType,
5294  const std::string& command) {
5295  if (objectType == TableDBObjectType) {
5296  auto td = sessionCatalog.getMetadataForTable(objectName, false);
5297  if (!td || td->isView) {
5298  // expected TABLE, found VIEW
5299  throw std::runtime_error(command + " failed. Object '" + objectName + "' of type " +
5300  DBObjectTypeToString(objectType) + " not found.");
5301  }
5302 
5303  } else if (objectType == ViewDBObjectType) {
5304  auto td = sessionCatalog.getMetadataForTable(objectName, false);
5305  if (!td || !td->isView) {
5306  // expected VIEW, found TABLE
5307  throw std::runtime_error(command + " failed. Object '" + objectName + "' of type " +
5308  DBObjectTypeToString(objectType) + " not found.");
5309  }
5310  }
5311 }
5312 
5313 // GRANT SELECT/INSERT/CREATE ON TABLE payroll_table TO payroll_dept_role;
5314 GrantPrivilegesStmt::GrantPrivilegesStmt(const rapidjson::Value& payload) {
5315  CHECK(payload.HasMember("type"));
5316  type_ = std::make_unique<std::string>(json_str(payload["type"]));
5317 
5318  CHECK(payload.HasMember("target"));
5319  target_ = std::make_unique<std::string>(json_str(payload["target"]));
5320 
5321  if (payload.HasMember("privileges")) {
5322  CHECK(payload["privileges"].IsArray());
5323  for (auto& privilege : payload["privileges"].GetArray()) {
5324  auto r = json_str(privilege);
5325  // privilege was a StringLiteral
5326  // and is wrapped with quotes which need to get removed
5327  boost::algorithm::trim_if(r, boost::is_any_of(" \"'`"));
5328  privileges_.emplace_back(r);
5329  }
5330  }
5331  if (payload.HasMember("grantees")) {
5332  CHECK(payload["grantees"].IsArray());
5333  for (auto& grantee : payload["grantees"].GetArray()) {
5334  std::string g = json_str(grantee);
5335  grantees_.emplace_back(g);
5336  }
5337  }
5338 }
5339 
5341  auto& catalog = session.getCatalog();
5342  const auto& currentUser = session.get_currentUser();
5343  const auto parserObjectType = boost::to_upper_copy<std::string>(get_object_type());
5344  const auto objectName =
5345  extractObjectNameFromHierName(get_object(), parserObjectType, catalog);
5346  auto objectType = DBObjectTypeFromString(parserObjectType);
5347  if (objectType == ServerDBObjectType && !g_enable_fsi) {
5348  throw std::runtime_error("GRANT failed. SERVER object unrecognized.");
5349  }
5350  /* verify object exists and is of proper type *before* trying to execute */
5351  verifyObject(catalog, objectName, objectType, "GRANT");
5352 
5353  DBObject dbObject = createObject(objectName, objectType);
5354  /* verify object ownership if not suser */
5355  if (!currentUser.isSuper) {
5356  if (!SysCatalog::instance().verifyDBObjectOwnership(currentUser, dbObject, catalog)) {
5357  throw std::runtime_error(
5358  "GRANT failed. It can only be executed by super user or owner of the "
5359  "object.");
5360  }
5361  }
5362  /* set proper values of privileges & grant them to the object */
5363  std::vector<DBObject> objects(get_privs().size(), dbObject);
5364  for (size_t i = 0; i < get_privs().size(); ++i) {
5365  std::pair<AccessPrivileges, DBObjectType> priv = parseStringPrivs(
5366  boost::to_upper_copy<std::string>(get_privs()[i]), objectType, get_object());
5367  objects[i].setPrivileges(priv.first);
5368  objects[i].setPermissionType(priv.second);
5369  if (priv.second == ServerDBObjectType && !g_enable_fsi) {
5370  throw std::runtime_error("GRANT failed. SERVER object unrecognized.");
5371  }
5372  }
5373  SysCatalog::instance().grantDBObjectPrivilegesBatch(grantees_, objects, catalog);
5374 }
5375 
5376 // REVOKE SELECT/INSERT/CREATE ON TABLE payroll_table FROM payroll_dept_role;