OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ParserNode.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
23 #include "ParserNode.h"
24 #include "Shared/base64.h"
25 
26 #include <boost/algorithm/string.hpp>
27 #include <boost/core/null_deleter.hpp>
28 #include <boost/filesystem.hpp>
29 #include <boost/function.hpp>
30 
31 #include <rapidjson/document.h>
32 #include <rapidjson/stringbuffer.h>
33 #include <rapidjson/writer.h>
34 
35 #include <cassert>
36 #include <cmath>
37 #include <limits>
38 #include <random>
39 #include <regex>
40 #include <stdexcept>
41 #include <string>
42 #include <type_traits>
43 #include <typeinfo>
44 
46 #include "Catalog/Catalog.h"
52 #include "Geospatial/Compression.h"
54 #include "Geospatial/Types.h"
56 #include "ImportExport/Importer.h"
57 #include "LockMgr/LockMgr.h"
61 #include "QueryEngine/Execute.h"
67 #include "ReservedKeywords.h"
68 #include "Shared/DbObjectKeys.h"
69 #include "Shared/StringTransform.h"
70 #include "Shared/SysDefinitions.h"
71 #include "Shared/measure.h"
72 #include "Shared/shard_key.h"
74 #include "Utils/FsiUtils.h"
75 
76 #include "gen-cpp/CalciteServer.h"
77 
78 size_t g_leaf_count{0};
80 extern bool g_enable_string_functions;
81 extern bool g_enable_fsi;
82 
84 #ifdef ENABLE_IMPORT_PARQUET
85 bool g_enable_legacy_parquet_import{false};
86 #endif
88 
90 
91 extern bool g_enable_ml_functions;
92 
94 using namespace std::string_literals;
95 
96 using TableDefFuncPtr = boost::function<void(TableDescriptor&,
98  const std::list<ColumnDescriptor>& columns)>;
99 
100 using DataframeDefFuncPtr =
101  boost::function<void(DataframeTableDescriptor&,
103  const std::list<ColumnDescriptor>& columns)>;
104 
105 namespace Parser {
106 bool check_session_interrupted(const QuerySessionId& query_session, Executor* executor) {
107  // we call this function with unitary executor but is okay since
108  // we know the exact session info from a global session map object
109  // in the executor
112  executor->getSessionLock());
113  return executor->checkIsQuerySessionInterrupted(query_session, session_read_lock);
114  }
115  return false;
116 }
117 
118 std::vector<int> getTableChunkKey(const TableDescriptor* td,
119  Catalog_Namespace::Catalog& catalog) {
120  std::vector<int> table_chunk_key_prefix;
121  if (td) {
122  if (td->fragmenter) {
123  table_chunk_key_prefix = td->fragmenter->getFragmentsForQuery().chunkKeyPrefix;
124  } else {
125  table_chunk_key_prefix.push_back(catalog.getCurrentDB().dbId);
126  table_chunk_key_prefix.push_back(td->tableId);
127  }
128  }
129  return table_chunk_key_prefix;
130 }
131 
132 std::shared_ptr<Analyzer::Expr> NullLiteral::analyze(
133  const Catalog_Namespace::Catalog& catalog,
134  Analyzer::Query& query,
135  TlistRefType allow_tlist_ref) const {
136  return makeExpr<Analyzer::Constant>(kNULLT, true);
137 }
138 
139 std::shared_ptr<Analyzer::Expr> StringLiteral::analyze(
140  const Catalog_Namespace::Catalog& catalog,
141  Analyzer::Query& query,
142  TlistRefType allow_tlist_ref) const {
143  return analyzeValue(*stringval_, false);
144 }
145 
146 std::shared_ptr<Analyzer::Expr> StringLiteral::analyzeValue(const std::string& stringval,
147  const bool is_null) {
148  if (!is_null) {
149  const SQLTypeInfo ti(kVARCHAR, stringval.length(), 0, true);
150  Datum d;
151  d.stringval = new std::string(stringval);
152  return makeExpr<Analyzer::Constant>(ti, false, d);
153  }
154  // Null value
155  return makeExpr<Analyzer::Constant>(kVARCHAR, true);
156 }
157 
158 std::shared_ptr<Analyzer::Expr> IntLiteral::analyze(
159  const Catalog_Namespace::Catalog& catalog,
160  Analyzer::Query& query,
161  TlistRefType allow_tlist_ref) const {
162  return analyzeValue(intval_);
163 }
164 
165 std::shared_ptr<Analyzer::Expr> IntLiteral::analyzeValue(const int64_t intval) {
166  SQLTypes t;
167  Datum d;
168  if (intval >= INT16_MIN && intval <= INT16_MAX) {
169  t = kSMALLINT;
170  d.smallintval = (int16_t)intval;
171  } else if (intval >= INT32_MIN && intval <= INT32_MAX) {
172  t = kINT;
173  d.intval = (int32_t)intval;
174  } else {
175  t = kBIGINT;
176  d.bigintval = intval;
177  }
178  return makeExpr<Analyzer::Constant>(t, false, d);
179 }
180 
181 std::shared_ptr<Analyzer::Expr> FixedPtLiteral::analyze(
182  const Catalog_Namespace::Catalog& catalog,
183  Analyzer::Query& query,
184  TlistRefType allow_tlist_ref) const {
185  SQLTypeInfo ti(kNUMERIC, 0, 0, false);
186  Datum d = StringToDatum(*fixedptval_, ti);
187  return makeExpr<Analyzer::Constant>(ti, false, d);
188 }
189 
190 std::shared_ptr<Analyzer::Expr> FixedPtLiteral::analyzeValue(const int64_t numericval,
191  const int scale,
192  const int precision) {
193  SQLTypeInfo ti(kNUMERIC, 0, 0, false);
194  ti.set_scale(scale);
195  ti.set_precision(precision);
196  Datum d;
197  d.bigintval = numericval;
198  return makeExpr<Analyzer::Constant>(ti, false, d);
199 }
200 
201 std::shared_ptr<Analyzer::Expr> FloatLiteral::analyze(
202  const Catalog_Namespace::Catalog& catalog,
203  Analyzer::Query& query,
204  TlistRefType allow_tlist_ref) const {
205  Datum d;
206  d.floatval = floatval_;
207  return makeExpr<Analyzer::Constant>(kFLOAT, false, d);
208 }
209 
210 std::shared_ptr<Analyzer::Expr> DoubleLiteral::analyze(
211  const Catalog_Namespace::Catalog& catalog,
212  Analyzer::Query& query,
213  TlistRefType allow_tlist_ref) const {
214  Datum d;
215  d.doubleval = doubleval_;
216  return makeExpr<Analyzer::Constant>(kDOUBLE, false, d);
217 }
218 
219 std::shared_ptr<Analyzer::Expr> TimestampLiteral::analyze(
220  const Catalog_Namespace::Catalog& catalog,
221  Analyzer::Query& query,
222  TlistRefType allow_tlist_ref) const {
223  return get(timestampval_);
224 }
225 
226 std::shared_ptr<Analyzer::Expr> TimestampLiteral::get(const int64_t timestampval) {
227  Datum d;
228  d.bigintval = timestampval;
229  return makeExpr<Analyzer::Constant>(kTIMESTAMP, false, d);
230 }
231 
232 std::shared_ptr<Analyzer::Expr> UserLiteral::analyze(
233  const Catalog_Namespace::Catalog& catalog,
234  Analyzer::Query& query,
235  TlistRefType allow_tlist_ref) const {
236  Datum d;
237  return makeExpr<Analyzer::Constant>(kTEXT, false, d);
238 }
239 
240 std::shared_ptr<Analyzer::Expr> UserLiteral::get(const std::string& user) {
241  Datum d;
242  d.stringval = new std::string(user);
243  return makeExpr<Analyzer::Constant>(kTEXT, false, d);
244 }
245 
246 std::shared_ptr<Analyzer::Expr> ArrayLiteral::analyze(
247  const Catalog_Namespace::Catalog& catalog,
248  Analyzer::Query& query,
249  TlistRefType allow_tlist_ref) const {
250  SQLTypeInfo ti = SQLTypeInfo(kARRAY, true);
251  bool set_subtype = true;
252  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
253  for (auto& p : value_list_) {
254  auto e = p->analyze(catalog, query, allow_tlist_ref);
255  CHECK(e);
256  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(e);
257  if (c != nullptr && c->get_is_null()) {
258  value_exprs.push_back(c);
259  continue;
260  }
261  auto subtype = e->get_type_info().get_type();
262  if (subtype == kNULLT) {
263  // NULL element
264  } else if (set_subtype) {
265  ti.set_subtype(subtype);
266  set_subtype = false;
267  }
268  value_exprs.push_back(e);
269  }
270  std::shared_ptr<Analyzer::Expr> result =
271  makeExpr<Analyzer::Constant>(ti, false, value_exprs);
272  return result;
273 }
274 
275 std::string ArrayLiteral::to_string() const {
276  std::string str = "{";
277  bool notfirst = false;
278  for (auto& p : value_list_) {
279  if (notfirst) {
280  str += ", ";
281  } else {
282  notfirst = true;
283  }
284  str += p->to_string();
285  }
286  str += "}";
287  return str;
288 }
289 
290 std::shared_ptr<Analyzer::Expr> OperExpr::analyze(
291  const Catalog_Namespace::Catalog& catalog,
292  Analyzer::Query& query,
293  TlistRefType allow_tlist_ref) const {
294  auto left_expr = left_->analyze(catalog, query, allow_tlist_ref);
295  const auto& left_type = left_expr->get_type_info();
296  if (right_ == nullptr) {
297  return makeExpr<Analyzer::UOper>(
298  left_type, left_expr->get_contains_agg(), optype_, left_expr->decompress());
299  }
300  if (optype_ == kARRAY_AT) {
301  if (left_type.get_type() != kARRAY) {
302  throw std::runtime_error(left_->to_string() + " is not of array type.");
303  }
304  auto right_expr = right_->analyze(catalog, query, allow_tlist_ref);
305  const auto& right_type = right_expr->get_type_info();
306  if (!right_type.is_integer()) {
307  throw std::runtime_error(right_->to_string() + " is not of integer type.");
308  }
309  return makeExpr<Analyzer::BinOper>(
310  left_type.get_elem_type(), false, kARRAY_AT, kONE, left_expr, right_expr);
311  }
312  auto right_expr = right_->analyze(catalog, query, allow_tlist_ref);
313  return normalize(optype_, opqualifier_, left_expr, right_expr);
314 }
315 
316 bool should_translate_strings(const std::shared_ptr<Analyzer::Expr>& lhs_expr,
317  const std::shared_ptr<Analyzer::Expr>& rhs_expr) {
318  if (dynamic_cast<Analyzer::Constant*>(rhs_expr.get())) {
319  // we must translate rhs string literal against lhs str dictionary
320  return true;
321  }
322  std::set<int> lhs_rte_idx;
323  lhs_expr->collect_rte_idx(lhs_rte_idx);
324  CHECK(!lhs_rte_idx.empty());
325  std::set<int> rhs_rte_idx;
326  rhs_expr->collect_rte_idx(rhs_rte_idx);
327  CHECK(!rhs_rte_idx.empty());
328  return lhs_rte_idx.size() == 1UL && lhs_rte_idx == rhs_rte_idx;
329 }
330 
331 SQLTypeInfo const& get_str_dict_cast_type(const SQLTypeInfo& lhs_type_info,
332  const SQLTypeInfo& rhs_type_info,
333  const Executor* executor) {
334  CHECK(lhs_type_info.is_string());
335  CHECK(lhs_type_info.get_compression() == kENCODING_DICT);
336  CHECK(rhs_type_info.is_string());
337  CHECK(rhs_type_info.get_compression() == kENCODING_DICT);
338  const auto& lhs_dict_key = lhs_type_info.getStringDictKey();
339  const auto& rhs_dict_key = rhs_type_info.getStringDictKey();
340  CHECK_NE(lhs_dict_key, rhs_dict_key);
341  if (lhs_dict_key.isTransientDict()) {
342  return rhs_type_info;
343  }
344  if (rhs_dict_key.isTransientDict()) {
345  return lhs_type_info;
346  }
347  // If here then neither lhs or rhs type was transient, we should see which
348  // type has the largest dictionary and make that the destination type
349  const auto lhs_sdp = executor->getStringDictionaryProxy(lhs_dict_key, true);
350  const auto rhs_sdp = executor->getStringDictionaryProxy(rhs_dict_key, true);
351  return lhs_sdp->entryCount() >= rhs_sdp->entryCount() ? lhs_type_info : rhs_type_info;
352 }
353 
355  const SQLTypeInfo& rhs_type_info,
356  const Executor* executor) {
357  CHECK(lhs_type_info.is_string());
358  CHECK(rhs_type_info.is_string());
359  if (lhs_type_info.is_dict_encoded_string() && rhs_type_info.is_dict_encoded_string()) {
360  const auto& lhs_dict_key = lhs_type_info.getStringDictKey();
361  const auto& rhs_dict_key = rhs_type_info.getStringDictKey();
362  if (lhs_dict_key == rhs_dict_key ||
363  (lhs_dict_key.db_id == rhs_dict_key.db_id &&
364  lhs_dict_key.dict_id == TRANSIENT_DICT(rhs_dict_key.dict_id))) {
365  return lhs_dict_key.dict_id <= rhs_dict_key.dict_id ? lhs_type_info : rhs_type_info;
366  }
367  return get_str_dict_cast_type(lhs_type_info, rhs_type_info, executor);
368  }
369  CHECK(lhs_type_info.is_none_encoded_string() || rhs_type_info.is_none_encoded_string());
370  SQLTypeInfo ret_ti =
371  rhs_type_info.is_none_encoded_string() ? lhs_type_info : rhs_type_info;
372  if (ret_ti.is_none_encoded_string()) {
373  ret_ti.set_dimension(
374  std::max(lhs_type_info.get_dimension(), rhs_type_info.get_dimension()));
375  }
376  return ret_ti;
377 }
378 
379 std::shared_ptr<Analyzer::Expr> OperExpr::normalize(
380  const SQLOps optype,
381  const SQLQualifier qual,
382  std::shared_ptr<Analyzer::Expr> left_expr,
383  std::shared_ptr<Analyzer::Expr> right_expr,
384  const Executor* executor) {
385  if (left_expr->get_type_info().is_date_in_days() ||
386  right_expr->get_type_info().is_date_in_days()) {
387  // Do not propogate encoding
388  left_expr = left_expr->decompress();
389  right_expr = right_expr->decompress();
390  }
391  const auto& left_type = left_expr->get_type_info();
392  auto right_type = right_expr->get_type_info();
393  if (qual != kONE) {
394  // subquery not supported yet.
395  CHECK(!std::dynamic_pointer_cast<Analyzer::Subquery>(right_expr));
396  if (right_type.get_type() != kARRAY) {
397  throw std::runtime_error(
398  "Existential or universal qualifiers can only be used in front of a subquery "
399  "or an "
400  "expression of array type.");
401  }
402  right_type = right_type.get_elem_type();
403  }
404  SQLTypeInfo new_left_type;
405  SQLTypeInfo new_right_type;
406  auto result_type = Analyzer::BinOper::analyze_type_info(
407  optype, left_type, right_type, &new_left_type, &new_right_type);
408  if (result_type.is_timeinterval()) {
409  return makeExpr<Analyzer::BinOper>(
410  result_type, false, optype, qual, left_expr, right_expr);
411  }
412  if (left_type != new_left_type) {
413  left_expr = left_expr->add_cast(new_left_type);
414  }
415  if (right_type != new_right_type) {
416  if (qual == kONE) {
417  right_expr = right_expr->add_cast(new_right_type);
418  } else {
419  right_expr = right_expr->add_cast(new_right_type.get_array_type());
420  }
421  }
422 
423  if (IS_COMPARISON(optype)) {
424  if (optype != kBBOX_INTERSECT && new_left_type.is_geometry() &&
425  new_right_type.is_geometry()) {
426  throw std::runtime_error(
427  "Comparison operators are not yet supported for geospatial types.");
428  }
429 
430  if (new_left_type.get_compression() == kENCODING_DICT &&
431  new_right_type.get_compression() == kENCODING_DICT) {
432  if (new_left_type.getStringDictKey() != new_right_type.getStringDictKey()) {
433  if (optype == kEQ || optype == kNE) {
434  // Join framework does its own string dictionary translation
435  // (at least partly since the rhs table projection does not use
436  // the normal runtime execution framework), so if we detect
437  // that the rte idxs of the two tables are different, bail
438  // on translating
439  if (should_translate_strings(left_expr, right_expr)) {
440  CHECK(executor);
441  // Make the type we're casting to the transient dictionary, if it exists,
442  // otherwise the largest dictionary in terms of number of entries
443  SQLTypeInfo ti(
444  get_str_dict_cast_type(new_left_type, new_right_type, executor));
445  auto& expr_to_cast = ti == new_left_type ? right_expr : left_expr;
446  ti.set_fixed_size();
448  expr_to_cast = expr_to_cast->add_cast(ti);
449  } else { // Ordered comparison operator
450  // We do not currently support ordered (i.e. >, <=) comparisons between
451  // dictionary-encoded columns, and need to decompress when translation
452  // is turned off even for kEQ and KNE
453  left_expr = left_expr->decompress();
454  right_expr = right_expr->decompress();
455  }
456  } else { // Ordered comparison operator
457  // We do not currently support ordered (i.e. >, <=) comparisons between
458  // dictionary-encoded columns, and need to decompress when translation
459  // is turned off even for kEQ and KNE
460  left_expr = left_expr->decompress();
461  right_expr = right_expr->decompress();
462  }
463  } else { // Strings shared comp param
464  if (!(optype == kEQ || optype == kNE)) {
465  // We do not currently support ordered (i.e. >, <=) comparisons between
466  // encoded columns, so try to decode (will only succeed with watchdog off)
467  left_expr = left_expr->decompress();
468  right_expr = right_expr->decompress();
469  } else {
470  // do nothing, can directly support equals/non-equals comparisons between two
471  // dictionary encoded columns sharing the same dictionary as these are
472  // effectively integer comparisons in the same dictionary space
473  }
474  }
475  } else if (new_left_type.get_compression() == kENCODING_DICT &&
476  new_right_type.get_compression() == kENCODING_NONE) {
477  SQLTypeInfo ti(new_right_type);
478  ti.set_compression(new_left_type.get_compression());
479  ti.set_comp_param(new_left_type.get_comp_param());
480  ti.setStringDictKey(new_left_type.getStringDictKey());
481  ti.set_fixed_size();
482  right_expr = right_expr->add_cast(ti);
483  } else if (new_right_type.get_compression() == kENCODING_DICT &&
484  new_left_type.get_compression() == kENCODING_NONE) {
485  SQLTypeInfo ti(new_left_type);
486  ti.set_compression(new_right_type.get_compression());
487  ti.set_comp_param(new_right_type.get_comp_param());
488  ti.setStringDictKey(new_right_type.getStringDictKey());
489  ti.set_fixed_size();
490  left_expr = left_expr->add_cast(ti);
491  } else {
492  left_expr = left_expr->decompress();
493  right_expr = right_expr->decompress();
494  }
495  } else {
496  // Is this now a no-op just for pairs of none-encoded string columns
497  left_expr = left_expr->decompress();
498  right_expr = right_expr->decompress();
499  }
500  bool has_agg = (left_expr->get_contains_agg() || right_expr->get_contains_agg());
501  return makeExpr<Analyzer::BinOper>(
502  result_type, has_agg, optype, qual, left_expr, right_expr);
503 }
504 
505 std::shared_ptr<Analyzer::Expr> SubqueryExpr::analyze(
506  const Catalog_Namespace::Catalog& catalog,
507  Analyzer::Query& query,
508  TlistRefType allow_tlist_ref) const {
509  throw std::runtime_error("Subqueries are not supported yet.");
510  return nullptr;
511 }
512 
513 std::shared_ptr<Analyzer::Expr> IsNullExpr::analyze(
514  const Catalog_Namespace::Catalog& catalog,
515  Analyzer::Query& query,
516  TlistRefType allow_tlist_ref) const {
517  auto arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
518  auto result = makeExpr<Analyzer::UOper>(kBOOLEAN, kISNULL, arg_expr);
519  if (is_not_) {
520  result = makeExpr<Analyzer::UOper>(kBOOLEAN, kNOT, result);
521  }
522  return result;
523 }
524 
525 std::shared_ptr<Analyzer::Expr> InSubquery::analyze(
526  const Catalog_Namespace::Catalog& catalog,
527  Analyzer::Query& query,
528  TlistRefType allow_tlist_ref) const {
529  throw std::runtime_error("Subqueries are not supported yet.");
530  return nullptr;
531 }
532 
533 std::shared_ptr<Analyzer::Expr> InValues::analyze(
534  const Catalog_Namespace::Catalog& catalog,
535  Analyzer::Query& query,
536  TlistRefType allow_tlist_ref) const {
537  auto arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
538  SQLTypeInfo ti = arg_expr->get_type_info();
539  bool dict_comp = ti.get_compression() == kENCODING_DICT;
540  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
541  for (auto& p : value_list_) {
542  auto e = p->analyze(catalog, query, allow_tlist_ref);
543  if (ti != e->get_type_info()) {
544  if (ti.is_string() && e->get_type_info().is_string()) {
545  // Todo(todd): Can we have this leverage the cast framework as well
546  ti = Analyzer::BinOper::common_string_type(ti, e->get_type_info());
547  } else if (ti.is_number() && e->get_type_info().is_number()) {
548  ti = Analyzer::BinOper::common_numeric_type(ti, e->get_type_info());
549  } else {
550  throw std::runtime_error("IN expressions must contain compatible types.");
551  }
552  }
553  if (dict_comp) {
554  value_exprs.push_back(e->add_cast(arg_expr->get_type_info()));
555  } else {
556  value_exprs.push_back(e);
557  }
558  }
559  if (!dict_comp) {
560  arg_expr = arg_expr->decompress();
561  arg_expr = arg_expr->add_cast(ti);
562  std::list<std::shared_ptr<Analyzer::Expr>> cast_vals;
563  for (auto p : value_exprs) {
564  cast_vals.push_back(p->add_cast(ti));
565  }
566  value_exprs.swap(cast_vals);
567  }
568  std::shared_ptr<Analyzer::Expr> result =
569  makeExpr<Analyzer::InValues>(arg_expr, value_exprs);
570  if (is_not_) {
571  result = makeExpr<Analyzer::UOper>(kBOOLEAN, kNOT, result);
572  }
573  return result;
574 }
575 
576 std::shared_ptr<Analyzer::Expr> BetweenExpr::analyze(
577  const Catalog_Namespace::Catalog& catalog,
578  Analyzer::Query& query,
579  TlistRefType allow_tlist_ref) const {
580  auto arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
581  auto lower_expr = lower_->analyze(catalog, query, allow_tlist_ref);
582  auto upper_expr = upper_->analyze(catalog, query, allow_tlist_ref);
583  SQLTypeInfo new_left_type, new_right_type;
585  arg_expr->get_type_info(),
586  lower_expr->get_type_info(),
587  &new_left_type,
588  &new_right_type);
589  auto lower_pred =
590  makeExpr<Analyzer::BinOper>(kBOOLEAN,
591  kGE,
592  kONE,
593  arg_expr->add_cast(new_left_type)->decompress(),
594  lower_expr->add_cast(new_right_type)->decompress());
596  arg_expr->get_type_info(),
597  lower_expr->get_type_info(),
598  &new_left_type,
599  &new_right_type);
600  auto upper_pred = makeExpr<Analyzer::BinOper>(
601  kBOOLEAN,
602  kLE,
603  kONE,
604  arg_expr->deep_copy()->add_cast(new_left_type)->decompress(),
605  upper_expr->add_cast(new_right_type)->decompress());
606  std::shared_ptr<Analyzer::Expr> result =
607  makeExpr<Analyzer::BinOper>(kBOOLEAN, kAND, kONE, lower_pred, upper_pred);
608  if (is_not_) {
609  result = makeExpr<Analyzer::UOper>(kBOOLEAN, kNOT, result);
610  }
611  return result;
612 }
613 
614 std::shared_ptr<Analyzer::Expr> CharLengthExpr::analyze(
615  const Catalog_Namespace::Catalog& catalog,
616  Analyzer::Query& query,
617  TlistRefType allow_tlist_ref) const {
618  auto arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
619  if (!arg_expr->get_type_info().is_string()) {
620  throw std::runtime_error(
621  "expression in char_length clause must be of a string type.");
622  }
623  std::shared_ptr<Analyzer::Expr> result =
624  makeExpr<Analyzer::CharLengthExpr>(arg_expr->decompress(), calc_encoded_length_);
625  return result;
626 }
627 
628 std::shared_ptr<Analyzer::Expr> CardinalityExpr::analyze(
629  const Catalog_Namespace::Catalog& catalog,
630  Analyzer::Query& query,
631  TlistRefType allow_tlist_ref) const {
632  auto arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
633  if (!arg_expr->get_type_info().is_array()) {
634  throw std::runtime_error(
635  "expression in cardinality clause must be of an array type.");
636  }
637  std::shared_ptr<Analyzer::Expr> result =
638  makeExpr<Analyzer::CardinalityExpr>(arg_expr->decompress());
639  return result;
640 }
641 
642 void LikeExpr::check_like_expr(const std::string& like_str, char escape_char) {
643  if (like_str.back() == escape_char) {
644  throw std::runtime_error("LIKE pattern must not end with escape character.");
645  }
646 }
647 
648 bool LikeExpr::test_is_simple_expr(const std::string& like_str, char escape_char) {
649  // if not bounded by '%' then not a simple string
650  if (like_str.size() < 2 || like_str[0] != '%' || like_str[like_str.size() - 1] != '%') {
651  return false;
652  }
653  // if the last '%' is escaped then not a simple string
654  if (like_str[like_str.size() - 2] == escape_char &&
655  like_str[like_str.size() - 3] != escape_char) {
656  return false;
657  }
658  for (size_t i = 1; i < like_str.size() - 1; i++) {
659  if (like_str[i] == '%' || like_str[i] == '_' || like_str[i] == '[' ||
660  like_str[i] == ']') {
661  if (like_str[i - 1] != escape_char) {
662  return false;
663  }
664  }
665  }
666  return true;
667 }
668 
669 void LikeExpr::erase_cntl_chars(std::string& like_str, char escape_char) {
670  char prev_char = '\0';
671  // easier to create new string of allowable chars
672  // rather than erase chars from
673  // existing string
674  std::string new_str;
675  for (char& cur_char : like_str) {
676  if (cur_char == '%' || cur_char == escape_char) {
677  if (prev_char != escape_char) {
678  prev_char = cur_char;
679  continue;
680  }
681  }
682  new_str.push_back(cur_char);
683  prev_char = cur_char;
684  }
685  like_str = new_str;
686 }
687 
688 std::shared_ptr<Analyzer::Expr> LikeExpr::analyze(
689  const Catalog_Namespace::Catalog& catalog,
690  Analyzer::Query& query,
691  TlistRefType allow_tlist_ref) const {
692  auto arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
693  auto like_expr = like_string_->analyze(catalog, query, allow_tlist_ref);
694  auto escape_expr = escape_string_ == nullptr
695  ? nullptr
696  : escape_string_->analyze(catalog, query, allow_tlist_ref);
697  return LikeExpr::get(arg_expr, like_expr, escape_expr, is_ilike_, is_not_);
698 }
699 
700 std::shared_ptr<Analyzer::Expr> LikeExpr::get(std::shared_ptr<Analyzer::Expr> arg_expr,
701  std::shared_ptr<Analyzer::Expr> like_expr,
702  std::shared_ptr<Analyzer::Expr> escape_expr,
703  const bool is_ilike,
704  const bool is_not) {
705  if (!arg_expr->get_type_info().is_string()) {
706  throw std::runtime_error("expression before LIKE must be of a string type.");
707  }
708  if (!like_expr->get_type_info().is_string()) {
709  throw std::runtime_error("expression after LIKE must be of a string type.");
710  }
711  char escape_char = '\\';
712  if (escape_expr != nullptr) {
713  if (!escape_expr->get_type_info().is_string()) {
714  throw std::runtime_error("expression after ESCAPE must be of a string type.");
715  }
716  if (!escape_expr->get_type_info().is_string()) {
717  throw std::runtime_error("expression after ESCAPE must be of a string type.");
718  }
719  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(escape_expr);
720  if (c != nullptr && c->get_constval().stringval->length() > 1) {
721  throw std::runtime_error("String after ESCAPE must have a single character.");
722  }
723  escape_char = (*c->get_constval().stringval)[0];
724  }
725  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(like_expr);
726  bool is_simple = false;
727  if (c != nullptr) {
728  std::string& pattern = *c->get_constval().stringval;
729  if (is_ilike) {
730  std::transform(pattern.begin(), pattern.end(), pattern.begin(), ::tolower);
731  }
732  check_like_expr(pattern, escape_char);
733  is_simple = test_is_simple_expr(pattern, escape_char);
734  if (is_simple) {
735  erase_cntl_chars(pattern, escape_char);
736  }
737  }
738  std::shared_ptr<Analyzer::Expr> result = makeExpr<Analyzer::LikeExpr>(
739  arg_expr->decompress(), like_expr, escape_expr, is_ilike, is_simple);
740  if (is_not) {
741  result = makeExpr<Analyzer::UOper>(kBOOLEAN, kNOT, result);
742  }
743  return result;
744 }
745 
746 void RegexpExpr::check_pattern_expr(const std::string& pattern_str, char escape_char) {
747  if (pattern_str.back() == escape_char) {
748  throw std::runtime_error("REGEXP pattern must not end with escape character.");
749  }
750 }
751 
752 bool RegexpExpr::translate_to_like_pattern(std::string& pattern_str, char escape_char) {
753  char prev_char = '\0';
754  char prev_prev_char = '\0';
755  std::string like_str;
756  for (char& cur_char : pattern_str) {
757  if (prev_char == escape_char || isalnum(cur_char) || cur_char == ' ' ||
758  cur_char == '.') {
759  like_str.push_back((cur_char == '.') ? '_' : cur_char);
760  prev_prev_char = prev_char;
761  prev_char = cur_char;
762  continue;
763  }
764  if (prev_char == '.' && prev_prev_char != escape_char) {
765  if (cur_char == '*' || cur_char == '+') {
766  if (cur_char == '*') {
767  like_str.pop_back();
768  }
769  // .* --> %
770  // .+ --> _%
771  like_str.push_back('%');
772  prev_prev_char = prev_char;
773  prev_char = cur_char;
774  continue;
775  }
776  }
777  return false;
778  }
779  pattern_str = like_str;
780  return true;
781 }
782 
783 std::shared_ptr<Analyzer::Expr> RegexpExpr::analyze(
784  const Catalog_Namespace::Catalog& catalog,
785  Analyzer::Query& query,
786  TlistRefType allow_tlist_ref) const {
787  auto arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
788  auto pattern_expr = pattern_string_->analyze(catalog, query, allow_tlist_ref);
789  auto escape_expr = escape_string_ == nullptr
790  ? nullptr
791  : escape_string_->analyze(catalog, query, allow_tlist_ref);
792  return RegexpExpr::get(arg_expr, pattern_expr, escape_expr, is_not_);
793 }
794 
795 std::shared_ptr<Analyzer::Expr> RegexpExpr::get(
796  std::shared_ptr<Analyzer::Expr> arg_expr,
797  std::shared_ptr<Analyzer::Expr> pattern_expr,
798  std::shared_ptr<Analyzer::Expr> escape_expr,
799  const bool is_not) {
800  if (!arg_expr->get_type_info().is_string()) {
801  throw std::runtime_error("expression before REGEXP must be of a string type.");
802  }
803  if (!pattern_expr->get_type_info().is_string()) {
804  throw std::runtime_error("expression after REGEXP must be of a string type.");
805  }
806  char escape_char = '\\';
807  if (escape_expr != nullptr) {
808  if (!escape_expr->get_type_info().is_string()) {
809  throw std::runtime_error("expression after ESCAPE must be of a string type.");
810  }
811  if (!escape_expr->get_type_info().is_string()) {
812  throw std::runtime_error("expression after ESCAPE must be of a string type.");
813  }
814  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(escape_expr);
815  if (c != nullptr && c->get_constval().stringval->length() > 1) {
816  throw std::runtime_error("String after ESCAPE must have a single character.");
817  }
818  escape_char = (*c->get_constval().stringval)[0];
819  if (escape_char != '\\') {
820  throw std::runtime_error("Only supporting '\\' escape character.");
821  }
822  }
823  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(pattern_expr);
824  if (c != nullptr) {
825  std::string& pattern = *c->get_constval().stringval;
826  if (translate_to_like_pattern(pattern, escape_char)) {
827  return LikeExpr::get(arg_expr, pattern_expr, escape_expr, false, is_not);
828  }
829  }
830  std::shared_ptr<Analyzer::Expr> result =
831  makeExpr<Analyzer::RegexpExpr>(arg_expr->decompress(), pattern_expr, escape_expr);
832  if (is_not) {
833  result = makeExpr<Analyzer::UOper>(kBOOLEAN, kNOT, result);
834  }
835  return result;
836 }
837 
838 std::shared_ptr<Analyzer::Expr> LikelihoodExpr::analyze(
839  const Catalog_Namespace::Catalog& catalog,
840  Analyzer::Query& query,
841  TlistRefType allow_tlist_ref) const {
842  auto arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
843  return LikelihoodExpr::get(arg_expr, likelihood_, is_not_);
844 }
845 
846 std::shared_ptr<Analyzer::Expr> LikelihoodExpr::get(
847  std::shared_ptr<Analyzer::Expr> arg_expr,
848  float likelihood,
849  const bool is_not) {
850  if (!arg_expr->get_type_info().is_boolean()) {
851  throw std::runtime_error("likelihood expression expects boolean type.");
852  }
853  std::shared_ptr<Analyzer::Expr> result = makeExpr<Analyzer::LikelihoodExpr>(
854  arg_expr->decompress(), is_not ? 1 - likelihood : likelihood);
855  return result;
856 }
857 
858 std::shared_ptr<Analyzer::Expr> WidthBucketExpr::analyze(
859  const Catalog_Namespace::Catalog& catalog,
860  Analyzer::Query& query,
861  TlistRefType allow_tlist_ref) const {
862  auto target_value = target_value_->analyze(catalog, query, allow_tlist_ref);
863  auto lower_bound = lower_bound_->analyze(catalog, query, allow_tlist_ref);
864  auto upper_bound = upper_bound_->analyze(catalog, query, allow_tlist_ref);
865  auto partition_count = partition_count_->analyze(catalog, query, allow_tlist_ref);
866  return WidthBucketExpr::get(target_value, lower_bound, upper_bound, partition_count);
867 }
868 
869 std::shared_ptr<Analyzer::Expr> WidthBucketExpr::get(
870  std::shared_ptr<Analyzer::Expr> target_value,
871  std::shared_ptr<Analyzer::Expr> lower_bound,
872  std::shared_ptr<Analyzer::Expr> upper_bound,
873  std::shared_ptr<Analyzer::Expr> partition_count) {
874  std::shared_ptr<Analyzer::Expr> result = makeExpr<Analyzer::WidthBucketExpr>(
875  target_value, lower_bound, upper_bound, partition_count);
876  return result;
877 }
878 
879 std::shared_ptr<Analyzer::Expr> ExistsExpr::analyze(
880  const Catalog_Namespace::Catalog& catalog,
881  Analyzer::Query& query,
882  TlistRefType allow_tlist_ref) const {
883  throw std::runtime_error("Subqueries are not supported yet.");
884  return nullptr;
885 }
886 
887 std::shared_ptr<Analyzer::Expr> ColumnRef::analyze(
888  const Catalog_Namespace::Catalog& catalog,
889  Analyzer::Query& query,
890  TlistRefType allow_tlist_ref) const {
891  int table_id{0};
892  int rte_idx{0};
893  const ColumnDescriptor* cd{nullptr};
894  if (column_ == nullptr) {
895  throw std::runtime_error("invalid column name *.");
896  }
897  if (table_ != nullptr) {
898  rte_idx = query.get_rte_idx(*table_);
899  if (rte_idx < 0) {
900  throw std::runtime_error("range variable or table name " + *table_ +
901  " does not exist.");
902  }
903  Analyzer::RangeTableEntry* rte = query.get_rte(rte_idx);
904  cd = rte->get_column_desc(catalog, *column_);
905  if (cd == nullptr) {
906  throw std::runtime_error("Column name " + *column_ + " does not exist.");
907  }
908  table_id = rte->get_table_id();
909  } else {
910  bool found = false;
911  int i = 0;
912  for (auto rte : query.get_rangetable()) {
913  cd = rte->get_column_desc(catalog, *column_);
914  if (cd != nullptr && !found) {
915  found = true;
916  rte_idx = i;
917  table_id = rte->get_table_id();
918  } else if (cd != nullptr && found) {
919  throw std::runtime_error("Column name " + *column_ + " is ambiguous.");
920  }
921  i++;
922  }
923  if (cd == nullptr && allow_tlist_ref != TlistRefType::TLIST_NONE) {
924  // check if this is a reference to a targetlist entry
925  bool found = false;
926  int varno = -1;
927  int i = 1;
928  std::shared_ptr<Analyzer::TargetEntry> tle;
929  for (auto p : query.get_targetlist()) {
930  if (*column_ == p->get_resname() && !found) {
931  found = true;
932  varno = i;
933  tle = p;
934  } else if (*column_ == p->get_resname() && found) {
935  throw std::runtime_error("Output alias " + *column_ + " is ambiguous.");
936  }
937  i++;
938  }
939  if (found) {
940  if (dynamic_cast<Analyzer::Var*>(tle->get_expr())) {
941  Analyzer::Var* v = static_cast<Analyzer::Var*>(tle->get_expr());
943  return v->deep_copy();
944  }
945  }
946  if (allow_tlist_ref == TlistRefType::TLIST_COPY) {
947  return tle->get_expr()->deep_copy();
948  } else {
949  return makeExpr<Analyzer::Var>(
950  tle->get_expr()->get_type_info(), Analyzer::Var::kOUTPUT, varno);
951  }
952  }
953  }
954  if (cd == nullptr) {
955  throw std::runtime_error("Column name " + *column_ + " does not exist.");
956  }
957  }
958  return makeExpr<Analyzer::ColumnVar>(
959  cd->columnType,
960  shared::ColumnKey{catalog.getDatabaseId(), table_id, cd->columnId},
961  rte_idx);
962 }
963 
964 std::shared_ptr<Analyzer::Expr> FunctionRef::analyze(
965  const Catalog_Namespace::Catalog& catalog,
966  Analyzer::Query& query,
967  TlistRefType allow_tlist_ref) const {
968  SQLTypeInfo result_type;
969  SQLAgg agg_type;
970  std::shared_ptr<Analyzer::Expr> arg_expr;
971  bool is_distinct = false;
972  if (boost::iequals(*name_, "count")) {
973  result_type = SQLTypeInfo(kBIGINT, false);
974  agg_type = kCOUNT;
975  if (arg_) {
976  arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
977  const SQLTypeInfo& ti = arg_expr->get_type_info();
978  if (ti.is_string() && (ti.get_compression() != kENCODING_DICT || !distinct_)) {
979  throw std::runtime_error(
980  "Strings must be dictionary-encoded in COUNT(DISTINCT).");
981  }
982  if (ti.get_type() == kARRAY && !distinct_) {
983  throw std::runtime_error("Only COUNT(DISTINCT) is supported on arrays.");
984  }
985  }
986  is_distinct = distinct_;
987  } else {
988  if (!arg_) {
989  throw std::runtime_error("Cannot compute " + *name_ + " with argument '*'.");
990  }
991  if (boost::iequals(*name_, "min")) {
992  agg_type = kMIN;
993  arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
994  arg_expr = arg_expr->decompress();
995  result_type = arg_expr->get_type_info();
996  } else if (boost::iequals(*name_, "max")) {
997  agg_type = kMAX;
998  arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
999  arg_expr = arg_expr->decompress();
1000  result_type = arg_expr->get_type_info();
1001  } else if (boost::iequals(*name_, "avg")) {
1002  agg_type = kAVG;
1003  arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
1004  if (!arg_expr->get_type_info().is_number()) {
1005  throw std::runtime_error("Cannot compute AVG on non-number-type arguments.");
1006  }
1007  arg_expr = arg_expr->decompress();
1008  result_type = SQLTypeInfo(kDOUBLE, false);
1009  } else if (boost::iequals(*name_, "sum")) {
1010  agg_type = kSUM;
1011  arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
1012  if (!arg_expr->get_type_info().is_number()) {
1013  throw std::runtime_error("Cannot compute SUM on non-number-type arguments.");
1014  }
1015  arg_expr = arg_expr->decompress();
1016  result_type = arg_expr->get_type_info().is_integer() ? SQLTypeInfo(kBIGINT, false)
1017  : arg_expr->get_type_info();
1018  } else if (boost::iequals(*name_, "unnest")) {
1019  arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
1020  const SQLTypeInfo& arg_ti = arg_expr->get_type_info();
1021  if (arg_ti.get_type() != kARRAY) {
1022  throw std::runtime_error(arg_->to_string() + " is not of array type.");
1023  }
1024  return makeExpr<Analyzer::UOper>(arg_ti.get_elem_type(), false, kUNNEST, arg_expr);
1025  } else {
1026  throw std::runtime_error("invalid function name: " + *name_);
1027  }
1028  if (arg_expr->get_type_info().is_string() ||
1029  arg_expr->get_type_info().get_type() == kARRAY) {
1030  throw std::runtime_error(
1031  "Only COUNT(DISTINCT ) aggregate is supported on strings and arrays.");
1032  }
1033  }
1034  int naggs = query.get_num_aggs();
1035  query.set_num_aggs(naggs + 1);
1036  return makeExpr<Analyzer::AggExpr>(
1037  result_type, agg_type, arg_expr, is_distinct, nullptr);
1038 }
1039 
1040 std::shared_ptr<Analyzer::Expr> CastExpr::analyze(
1041  const Catalog_Namespace::Catalog& catalog,
1042  Analyzer::Query& query,
1043  TlistRefType allow_tlist_ref) const {
1044  target_type_->check_type();
1045  auto arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
1046  SQLTypeInfo ti(target_type_->get_type(),
1047  target_type_->get_param1(),
1048  target_type_->get_param2(),
1049  arg_expr->get_type_info().get_notnull());
1050  if (arg_expr->get_type_info().get_type() != target_type_->get_type() &&
1051  arg_expr->get_type_info().get_compression() != kENCODING_NONE) {
1052  arg_expr->decompress();
1053  }
1054  return arg_expr->add_cast(ti);
1055 }
1056 
1057 std::shared_ptr<Analyzer::Expr> CaseExpr::analyze(
1058  const Catalog_Namespace::Catalog& catalog,
1059  Analyzer::Query& query,
1060  TlistRefType allow_tlist_ref) const {
1061  SQLTypeInfo ti;
1062  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
1063  expr_pair_list;
1064  for (auto& p : when_then_list_) {
1065  auto e1 = p->get_expr1()->analyze(catalog, query, allow_tlist_ref);
1066  if (e1->get_type_info().get_type() != kBOOLEAN) {
1067  throw std::runtime_error("Only boolean expressions can be used after WHEN.");
1068  }
1069  auto e2 = p->get_expr2()->analyze(catalog, query, allow_tlist_ref);
1070  expr_pair_list.emplace_back(e1, e2);
1071  }
1072  auto else_e =
1073  else_expr_ ? else_expr_->analyze(catalog, query, allow_tlist_ref) : nullptr;
1074  return normalize(expr_pair_list, else_e);
1075 }
1076 
1077 namespace {
1078 
1080  const std::string* s = str_literal->get_stringval();
1081  if (*s == "t" || *s == "true" || *s == "T" || *s == "True") {
1082  return true;
1083  } else if (*s == "f" || *s == "false" || *s == "F" || *s == "False") {
1084  return false;
1085  } else {
1086  throw std::runtime_error("Invalid string for boolean " + *s);
1087  }
1088 }
1089 
1090 void parse_copy_params(const std::list<std::unique_ptr<NameValueAssign>>& options_,
1091  import_export::CopyParams& copy_params,
1092  std::vector<std::string>& warnings,
1093  std::string& deferred_copy_from_partitions_) {
1094  if (!options_.empty()) {
1095  for (auto& p : options_) {
1096  if (boost::iequals(*p->get_name(), "max_reject")) {
1097  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
1098  if (int_literal == nullptr) {
1099  throw std::runtime_error("max_reject option must be an integer.");
1100  }
1101  copy_params.max_reject = int_literal->get_intval();
1102  } else if (boost::iequals(*p->get_name(), "max_import_batch_row_count")) {
1103  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
1104  if (int_literal == nullptr) {
1105  throw std::runtime_error(
1106  "max_import_batch_row_count option must be an integer.");
1107  }
1108  if (int_literal->get_intval() <= 0) {
1109  throw std::runtime_error(
1110  "max_import_batch_row_count option must be a positive integer (greater "
1111  "than 0).");
1112  }
1113  copy_params.max_import_batch_row_count = int_literal->get_intval();
1114  } else if (boost::iequals(*p->get_name(), "buffer_size")) {
1115  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
1116  if (int_literal == nullptr) {
1117  throw std::runtime_error("buffer_size option must be an integer.");
1118  }
1119  copy_params.buffer_size = int_literal->get_intval();
1120  } else if (boost::iequals(*p->get_name(), "threads")) {
1121  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
1122  if (int_literal == nullptr) {
1123  throw std::runtime_error("Threads option must be an integer.");
1124  }
1125  copy_params.threads = int_literal->get_intval();
1126  } else if (boost::iequals(*p->get_name(), "delimiter")) {
1127  const StringLiteral* str_literal =
1128  dynamic_cast<const StringLiteral*>(p->get_value());
1129  if (str_literal == nullptr) {
1130  throw std::runtime_error("Delimiter option must be a string.");
1131  } else if (str_literal->get_stringval()->length() != 1) {
1132  throw std::runtime_error("Delimiter must be a single character string.");
1133  }
1134  copy_params.delimiter = (*str_literal->get_stringval())[0];
1135  } else if (boost::iequals(*p->get_name(), "nulls")) {
1136  const StringLiteral* str_literal =
1137  dynamic_cast<const StringLiteral*>(p->get_value());
1138  if (str_literal == nullptr) {
1139  throw std::runtime_error("Nulls option must be a string.");
1140  }
1141  copy_params.null_str = *str_literal->get_stringval();
1142  } else if (boost::iequals(*p->get_name(), "header")) {
1143  const StringLiteral* str_literal =
1144  dynamic_cast<const StringLiteral*>(p->get_value());
1145  if (str_literal == nullptr) {
1146  throw std::runtime_error("Header option must be a boolean.");
1147  }
1148  copy_params.has_header = bool_from_string_literal(str_literal)
1151 #ifdef ENABLE_IMPORT_PARQUET
1152  } else if (boost::iequals(*p->get_name(), "parquet")) {
1153  warnings.emplace_back(
1154  "Deprecation Warning: COPY FROM WITH (parquet='true') is deprecated. Use "
1155  "WITH (source_type='parquet_file') instead.");
1156  const StringLiteral* str_literal =
1157  dynamic_cast<const StringLiteral*>(p->get_value());
1158  if (str_literal == nullptr) {
1159  throw std::runtime_error("'parquet' option must be a boolean.");
1160  }
1161  if (bool_from_string_literal(str_literal)) {
1162  // not sure a parquet "table" type is proper, but to make code
1163  // look consistent in some places, let's set "table" type too
1165  }
1166 #endif // ENABLE_IMPORT_PARQUET
1167  } else if (boost::iequals(*p->get_name(), "s3_access_key")) {
1168  const StringLiteral* str_literal =
1169  dynamic_cast<const StringLiteral*>(p->get_value());
1170  if (str_literal == nullptr) {
1171  throw std::runtime_error("Option s3_access_key must be a string.");
1172  }
1173  copy_params.s3_access_key = *str_literal->get_stringval();
1174  } else if (boost::iequals(*p->get_name(), "s3_secret_key")) {
1175  const StringLiteral* str_literal =
1176  dynamic_cast<const StringLiteral*>(p->get_value());
1177  if (str_literal == nullptr) {
1178  throw std::runtime_error("Option s3_secret_key must be a string.");
1179  }
1180  copy_params.s3_secret_key = *str_literal->get_stringval();
1181  } else if (boost::iequals(*p->get_name(), "s3_session_token")) {
1182  const StringLiteral* str_literal =
1183  dynamic_cast<const StringLiteral*>(p->get_value());
1184  if (str_literal == nullptr) {
1185  throw std::runtime_error("Option s3_session_token must be a string.");
1186  }
1187  copy_params.s3_session_token = *str_literal->get_stringval();
1188  } else if (boost::iequals(*p->get_name(), "s3_region")) {
1189  const StringLiteral* str_literal =
1190  dynamic_cast<const StringLiteral*>(p->get_value());
1191  if (str_literal == nullptr) {
1192  throw std::runtime_error("Option s3_region must be a string.");
1193  }
1194  copy_params.s3_region = *str_literal->get_stringval();
1195  } else if (boost::iequals(*p->get_name(), "s3_endpoint")) {
1196  const StringLiteral* str_literal =
1197  dynamic_cast<const StringLiteral*>(p->get_value());
1198  if (str_literal == nullptr) {
1199  throw std::runtime_error("Option s3_endpoint must be a string.");
1200  }
1201  copy_params.s3_endpoint = *str_literal->get_stringval();
1202  } else if (boost::iequals(*p->get_name(), "s3_max_concurrent_downloads")) {
1203  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
1204  if (int_literal == nullptr) {
1205  throw std::runtime_error(
1206  "'s3_max_concurrent_downloads' option must be an integer");
1207  }
1208  const int s3_max_concurrent_downloads = int_literal->get_intval();
1209  if (s3_max_concurrent_downloads > 0) {
1210  copy_params.s3_max_concurrent_downloads = s3_max_concurrent_downloads;
1211  } else {
1212  throw std::runtime_error(
1213  "Invalid value for 's3_max_concurrent_downloads' option (must be > 0): " +
1214  std::to_string(s3_max_concurrent_downloads));
1215  }
1216  } else if (boost::iequals(*p->get_name(), "quote")) {
1217  const StringLiteral* str_literal =
1218  dynamic_cast<const StringLiteral*>(p->get_value());
1219  if (str_literal == nullptr) {
1220  throw std::runtime_error("Quote option must be a string.");
1221  } else if (str_literal->get_stringval()->length() != 1) {
1222  throw std::runtime_error("Quote must be a single character string.");
1223  }
1224  copy_params.quote = (*str_literal->get_stringval())[0];
1225  } else if (boost::iequals(*p->get_name(), "escape")) {
1226  const StringLiteral* str_literal =
1227  dynamic_cast<const StringLiteral*>(p->get_value());
1228  if (str_literal == nullptr) {
1229  throw std::runtime_error("Escape option must be a string.");
1230  } else if (str_literal->get_stringval()->length() != 1) {
1231  throw std::runtime_error("Escape must be a single character string.");
1232  }
1233  copy_params.escape = (*str_literal->get_stringval())[0];
1234  } else if (boost::iequals(*p->get_name(), "line_delimiter")) {
1235  const StringLiteral* str_literal =
1236  dynamic_cast<const StringLiteral*>(p->get_value());
1237  if (str_literal == nullptr) {
1238  throw std::runtime_error("Line_delimiter option must be a string.");
1239  } else if (str_literal->get_stringval()->length() != 1) {
1240  throw std::runtime_error("Line_delimiter must be a single character string.");
1241  }
1242  copy_params.line_delim = (*str_literal->get_stringval())[0];
1243  } else if (boost::iequals(*p->get_name(), "quoted")) {
1244  const StringLiteral* str_literal =
1245  dynamic_cast<const StringLiteral*>(p->get_value());
1246  if (str_literal == nullptr) {
1247  throw std::runtime_error("Quoted option must be a boolean.");
1248  }
1249  copy_params.quoted = bool_from_string_literal(str_literal);
1250  } else if (boost::iequals(*p->get_name(), "plain_text")) {
1251  const StringLiteral* str_literal =
1252  dynamic_cast<const StringLiteral*>(p->get_value());
1253  if (str_literal == nullptr) {
1254  throw std::runtime_error("plain_text option must be a boolean.");
1255  }
1256  copy_params.plain_text = bool_from_string_literal(str_literal);
1257  } else if (boost::iequals(*p->get_name(), "trim_spaces")) {
1258  const StringLiteral* str_literal =
1259  dynamic_cast<const StringLiteral*>(p->get_value());
1260  if (str_literal == nullptr) {
1261  throw std::runtime_error("trim_spaces option must be a boolean.");
1262  }
1263  copy_params.trim_spaces = bool_from_string_literal(str_literal);
1264  } else if (boost::iequals(*p->get_name(), "array_marker")) {
1265  const StringLiteral* str_literal =
1266  dynamic_cast<const StringLiteral*>(p->get_value());
1267  if (str_literal == nullptr) {
1268  throw std::runtime_error("Array Marker option must be a string.");
1269  } else if (str_literal->get_stringval()->length() != 2) {
1270  throw std::runtime_error(
1271  "Array Marker option must be exactly two characters. Default is {}.");
1272  }
1273  copy_params.array_begin = (*str_literal->get_stringval())[0];
1274  copy_params.array_end = (*str_literal->get_stringval())[1];
1275  } else if (boost::iequals(*p->get_name(), "array_delimiter")) {
1276  const StringLiteral* str_literal =
1277  dynamic_cast<const StringLiteral*>(p->get_value());
1278  if (str_literal == nullptr) {
1279  throw std::runtime_error("Array Delimiter option must be a string.");
1280  } else if (str_literal->get_stringval()->length() != 1) {
1281  throw std::runtime_error("Array Delimiter must be a single character string.");
1282  }
1283  copy_params.array_delim = (*str_literal->get_stringval())[0];
1284  } else if (boost::iequals(*p->get_name(), "lonlat")) {
1285  const StringLiteral* str_literal =
1286  dynamic_cast<const StringLiteral*>(p->get_value());
1287  if (str_literal == nullptr) {
1288  throw std::runtime_error("Lonlat option must be a boolean.");
1289  }
1290  copy_params.lonlat = bool_from_string_literal(str_literal);
1291  } else if (boost::iequals(*p->get_name(), "geo")) {
1292  warnings.emplace_back(
1293  "Deprecation Warning: COPY FROM WITH (geo='true') is deprecated. Use WITH "
1294  "(source_type='geo_file') instead.");
1295  const StringLiteral* str_literal =
1296  dynamic_cast<const StringLiteral*>(p->get_value());
1297  if (str_literal == nullptr) {
1298  throw std::runtime_error("'geo' option must be a boolean.");
1299  }
1300  if (bool_from_string_literal(str_literal)) {
1302  }
1303  } else if (boost::iequals(*p->get_name(), "source_type")) {
1304  const StringLiteral* str_literal =
1305  dynamic_cast<const StringLiteral*>(p->get_value());
1306  if (str_literal == nullptr) {
1307  throw std::runtime_error("'source_type' option must be a string.");
1308  }
1309  const std::string* s = str_literal->get_stringval();
1310  if (boost::iequals(*s, "delimited_file")) {
1312  } else if (boost::iequals(*s, "geo_file")) {
1314 #if ENABLE_IMPORT_PARQUET
1315  } else if (boost::iequals(*s, "parquet_file")) {
1317 #endif
1318  } else if (boost::iequals(*s, "raster_file")) {
1320  } else if (boost::iequals(*s, "regex_parsed_file")) {
1322  } else {
1323  throw std::runtime_error(
1324  "Invalid string for 'source_type' option (must be 'GEO_FILE', 'RASTER_FILE'"
1325 #if ENABLE_IMPORT_PARQUET
1326  ", 'PARQUET_FILE'"
1327 #endif
1328  ", 'REGEX_PARSED_FILE'"
1329  " or 'DELIMITED_FILE'): " +
1330  *s);
1331  }
1332  } else if (boost::iequals(*p->get_name(), "geo_coords_type")) {
1333  const StringLiteral* str_literal =
1334  dynamic_cast<const StringLiteral*>(p->get_value());
1335  if (str_literal == nullptr) {
1336  throw std::runtime_error("'geo_coords_type' option must be a string");
1337  }
1338  const std::string* s = str_literal->get_stringval();
1339  if (boost::iequals(*s, "geography")) {
1340  throw std::runtime_error(
1341  "GEOGRAPHY coords type not yet supported. Please use GEOMETRY.");
1342  // copy_params.geo_coords_type = kGEOGRAPHY;
1343  } else if (boost::iequals(*s, "geometry")) {
1344  copy_params.geo_coords_type = kGEOMETRY;
1345  } else {
1346  throw std::runtime_error(
1347  "Invalid string for 'geo_coords_type' option (must be 'GEOGRAPHY' or "
1348  "'GEOMETRY'): " +
1349  *s);
1350  }
1351  } else if (boost::iequals(*p->get_name(), "raster_point_type")) {
1352  const StringLiteral* str_literal =
1353  dynamic_cast<const StringLiteral*>(p->get_value());
1354  if (str_literal == nullptr) {
1355  throw std::runtime_error("'raster_point_type' option must be a string");
1356  }
1357  const std::string* s = str_literal->get_stringval();
1358  if (boost::iequals(*s, "none")) {
1360  } else if (boost::iequals(*s, "auto")) {
1362  } else if (boost::iequals(*s, "smallint")) {
1364  } else if (boost::iequals(*s, "int")) {
1366  } else if (boost::iequals(*s, "float")) {
1368  } else if (boost::iequals(*s, "double")) {
1370  } else if (boost::iequals(*s, "point")) {
1372  } else {
1373  throw std::runtime_error(
1374  "Invalid string for 'raster_point_type' option (must be 'NONE', 'AUTO', "
1375  "'SMALLINT', 'INT', 'FLOAT', 'DOUBLE' or 'POINT'): " +
1376  *s);
1377  }
1378  } else if (boost::iequals(*p->get_name(), "raster_point_transform")) {
1379  const StringLiteral* str_literal =
1380  dynamic_cast<const StringLiteral*>(p->get_value());
1381  if (str_literal == nullptr) {
1382  throw std::runtime_error("'raster_point_transform' option must be a string");
1383  }
1384  const std::string* s = str_literal->get_stringval();
1385  if (boost::iequals(*s, "none")) {
1387  } else if (boost::iequals(*s, "auto")) {
1389  } else if (boost::iequals(*s, "file")) {
1391  } else if (boost::iequals(*s, "world")) {
1392  copy_params.raster_point_transform =
1394  } else {
1395  throw std::runtime_error(
1396  "Invalid string for 'raster_point_transform' option (must be 'NONE', "
1397  "'AUTO', 'FILE' or 'WORLD'): " +
1398  *s);
1399  }
1400  } else if (boost::iequals(*p->get_name(), "raster_import_bands")) {
1401  const StringLiteral* str_literal =
1402  dynamic_cast<const StringLiteral*>(p->get_value());
1403  if (str_literal == nullptr) {
1404  throw std::runtime_error("'raster_import_bands' option must be a string");
1405  }
1406  const std::string* raster_import_bands = str_literal->get_stringval();
1407  if (raster_import_bands) {
1408  copy_params.raster_import_bands = *raster_import_bands;
1409  } else {
1410  throw std::runtime_error("Invalid value for 'raster_import_bands' option");
1411  }
1412  } else if (boost::iequals(*p->get_name(), "raster_import_dimensions")) {
1413  const StringLiteral* str_literal =
1414  dynamic_cast<const StringLiteral*>(p->get_value());
1415  if (str_literal == nullptr) {
1416  throw std::runtime_error("'raster_import_dimensions' option must be a string");
1417  }
1418  const std::string* raster_import_dimensions = str_literal->get_stringval();
1419  if (raster_import_dimensions) {
1420  copy_params.raster_import_dimensions = *raster_import_dimensions;
1421  } else {
1422  throw std::runtime_error("Invalid value for 'raster_import_dimensions' option");
1423  }
1424  } else if (boost::iequals(*p->get_name(), "geo_coords_encoding")) {
1425  const StringLiteral* str_literal =
1426  dynamic_cast<const StringLiteral*>(p->get_value());
1427  if (str_literal == nullptr) {
1428  throw std::runtime_error("'geo_coords_encoding' option must be a string");
1429  }
1430  const std::string* s = str_literal->get_stringval();
1431  if (boost::iequals(*s, "none")) {
1432  copy_params.geo_coords_encoding = kENCODING_NONE;
1433  copy_params.geo_coords_comp_param = 0;
1434  } else if (boost::iequals(*s, "compressed(32)")) {
1435  copy_params.geo_coords_encoding = kENCODING_GEOINT;
1436  copy_params.geo_coords_comp_param = 32;
1437  } else {
1438  throw std::runtime_error(
1439  "Invalid string for 'geo_coords_encoding' option (must be 'NONE' or "
1440  "'COMPRESSED(32)'): " +
1441  *s);
1442  }
1443  } else if (boost::iequals(*p->get_name(), "raster_scanlines_per_thread")) {
1444  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
1445  if (int_literal == nullptr) {
1446  throw std::runtime_error(
1447  "'raster_scanlines_per_thread' option must be an integer");
1448  }
1449  const int raster_scanlines_per_thread = int_literal->get_intval();
1450  if (raster_scanlines_per_thread < 0) {
1451  throw std::runtime_error(
1452  "'raster_scanlines_per_thread' option must be >= 0, with 0 denoting auto "
1453  "sizing");
1454  }
1455  copy_params.raster_scanlines_per_thread = raster_scanlines_per_thread;
1456  } else if (boost::iequals(*p->get_name(), "geo_coords_srid")) {
1457  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
1458  if (int_literal == nullptr) {
1459  throw std::runtime_error("'geo_coords_srid' option must be an integer");
1460  }
1461  const int srid = int_literal->get_intval();
1462  if (srid == 4326 || srid == 3857 || srid == 900913) {
1463  copy_params.geo_coords_srid = srid;
1464  } else {
1465  throw std::runtime_error(
1466  "Invalid value for 'geo_coords_srid' option (must be 4326, 3857, or "
1467  "900913): " +
1468  std::to_string(srid));
1469  }
1470  } else if (boost::iequals(*p->get_name(), "geo_layer_name")) {
1471  const StringLiteral* str_literal =
1472  dynamic_cast<const StringLiteral*>(p->get_value());
1473  if (str_literal == nullptr) {
1474  throw std::runtime_error("'geo_layer_name' option must be a string");
1475  }
1476  const std::string* layer_name = str_literal->get_stringval();
1477  if (layer_name) {
1478  copy_params.geo_layer_name = *layer_name;
1479  } else {
1480  throw std::runtime_error("Invalid value for 'geo_layer_name' option");
1481  }
1482  } else if (boost::iequals(*p->get_name(), "partitions")) {
1483  const auto partitions =
1484  static_cast<const StringLiteral*>(p->get_value())->get_stringval();
1485  CHECK(partitions);
1486  const auto partitions_uc = boost::to_upper_copy<std::string>(*partitions);
1487  if (partitions_uc != "REPLICATED") {
1488  throw std::runtime_error(
1489  "Invalid value for 'partitions' option. Must be 'REPLICATED'.");
1490  }
1491  deferred_copy_from_partitions_ = partitions_uc;
1492  } else if (boost::iequals(*p->get_name(), "geo_explode_collections")) {
1493  const StringLiteral* str_literal =
1494  dynamic_cast<const StringLiteral*>(p->get_value());
1495  if (str_literal == nullptr) {
1496  throw std::runtime_error("geo_explode_collections option must be a boolean.");
1497  }
1498  copy_params.geo_explode_collections = bool_from_string_literal(str_literal);
1499  } else if (boost::iequals(*p->get_name(), "geo_validate_geometry")) {
1500  const StringLiteral* str_literal =
1501  dynamic_cast<const StringLiteral*>(p->get_value());
1502  if (str_literal == nullptr) {
1503  throw std::runtime_error("geo_validate_geometry option must be a boolean.");
1504  }
1505  copy_params.geo_validate_geometry = false;
1506  auto const value = bool_from_string_literal(str_literal);
1507  if (value) {
1509  copy_params.geo_validate_geometry = true;
1510  } else {
1511  throw std::runtime_error("GEOS geometry validation is not available.");
1512  }
1513  }
1514  } else if (boost::iequals(*p->get_name(), "source_srid")) {
1515  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
1516  if (int_literal == nullptr) {
1517  throw std::runtime_error("'source_srid' option must be an integer");
1518  }
1519  const int srid = int_literal->get_intval();
1521  copy_params.source_srid = srid;
1522  } else {
1523  throw std::runtime_error(
1524  "'source_srid' option can only be used on csv/tsv files");
1525  }
1526  } else if (boost::iequals(*p->get_name(), "regex_path_filter")) {
1527  const StringLiteral* str_literal =
1528  dynamic_cast<const StringLiteral*>(p->get_value());
1529  if (str_literal == nullptr) {
1530  throw std::runtime_error("Option regex_path_filter must be a string.");
1531  }
1532  const auto string_val = *str_literal->get_stringval();
1533  copy_params.regex_path_filter =
1534  string_val.empty() ? std::nullopt : std::optional<std::string>{string_val};
1535  } else if (boost::iequals(*p->get_name(), "file_sort_order_by")) {
1536  const StringLiteral* str_literal =
1537  dynamic_cast<const StringLiteral*>(p->get_value());
1538  if (str_literal == nullptr) {
1539  throw std::runtime_error("Option file_sort_order_by must be a string.");
1540  }
1541  const auto string_val = *str_literal->get_stringval();
1542  copy_params.file_sort_order_by =
1543  string_val.empty() ? std::nullopt : std::optional<std::string>{string_val};
1544  } else if (boost::iequals(*p->get_name(), "file_sort_regex")) {
1545  const StringLiteral* str_literal =
1546  dynamic_cast<const StringLiteral*>(p->get_value());
1547  if (str_literal == nullptr) {
1548  throw std::runtime_error("Option file_sort_regex must be a string.");
1549  }
1550  const auto string_val = *str_literal->get_stringval();
1551  copy_params.file_sort_regex =
1552  string_val.empty() ? std::nullopt : std::optional<std::string>{string_val};
1553  } else if (boost::iequals(*p->get_name(), "raster_point_compute_angle")) {
1554  const StringLiteral* str_literal =
1555  dynamic_cast<const StringLiteral*>(p->get_value());
1556  if (str_literal == nullptr) {
1557  throw std::runtime_error(
1558  "'raster_point_compute_angle' option must be a boolean.");
1559  }
1560  if (bool_from_string_literal(str_literal)) {
1561  copy_params.raster_point_compute_angle = true;
1562  }
1563  } else if (boost::iequals(*p->get_name(), "sql_order_by")) {
1564  if (auto str_literal = dynamic_cast<const StringLiteral*>(p->get_value())) {
1565  copy_params.sql_order_by = *str_literal->get_stringval();
1566  } else {
1567  throw std::runtime_error("Option sql_order_by must be a string.");
1568  }
1569  } else if (boost::iequals(*p->get_name(), "username")) {
1570  const StringLiteral* str_literal =
1571  dynamic_cast<const StringLiteral*>(p->get_value());
1572  if (str_literal == nullptr) {
1573  throw std::runtime_error("Option username must be a string.");
1574  }
1575  const auto string_val = *str_literal->get_stringval();
1576  copy_params.username = string_val;
1577  } else if (boost::iequals(*p->get_name(), "password")) {
1578  const StringLiteral* str_literal =
1579  dynamic_cast<const StringLiteral*>(p->get_value());
1580  if (str_literal == nullptr) {
1581  throw std::runtime_error("Option password must be a string.");
1582  }
1583  const auto string_val = *str_literal->get_stringval();
1584  copy_params.password = string_val;
1585  } else if (boost::iequals(*p->get_name(), "credential_string")) {
1586  const StringLiteral* str_literal =
1587  dynamic_cast<const StringLiteral*>(p->get_value());
1588  if (str_literal == nullptr) {
1589  throw std::runtime_error("Option credential_string must be a string.");
1590  }
1591  const auto string_val = *str_literal->get_stringval();
1592  copy_params.credential_string = string_val;
1593  } else if (boost::iequals(*p->get_name(), "data_source_name")) {
1594  const StringLiteral* str_literal =
1595  dynamic_cast<const StringLiteral*>(p->get_value());
1596  if (str_literal == nullptr) {
1597  throw std::runtime_error("Option data_source_name must be a string.");
1598  }
1599  const auto string_val = *str_literal->get_stringval();
1600  copy_params.dsn = string_val;
1601  } else if (boost::iequals(*p->get_name(), "connection_string")) {
1602  const StringLiteral* str_literal =
1603  dynamic_cast<const StringLiteral*>(p->get_value());
1604  if (str_literal == nullptr) {
1605  throw std::runtime_error("Option connection_string must be a string.");
1606  }
1607  const auto string_val = *str_literal->get_stringval();
1608  copy_params.connection_string = string_val;
1609  } else if (boost::iequals(*p->get_name(), "line_start_regex")) {
1610  const StringLiteral* str_literal =
1611  dynamic_cast<const StringLiteral*>(p->get_value());
1612  if (str_literal == nullptr) {
1613  throw std::runtime_error("Option line_start_regex must be a string.");
1614  }
1615  const auto string_val = *str_literal->get_stringval();
1616  copy_params.line_start_regex = string_val;
1617  } else if (boost::iequals(*p->get_name(), "line_regex")) {
1618  const StringLiteral* str_literal =
1619  dynamic_cast<const StringLiteral*>(p->get_value());
1620  if (str_literal == nullptr) {
1621  throw std::runtime_error("Option line_regex must be a string.");
1622  }
1623  const auto string_val = *str_literal->get_stringval();
1624  copy_params.line_regex = string_val;
1625  } else if (boost::iequals(*p->get_name(), "add_metadata_columns") &&
1627  const StringLiteral* str_literal =
1628  dynamic_cast<const StringLiteral*>(p->get_value());
1629  if (str_literal == nullptr) {
1630  throw std::runtime_error("'add_metadata_columns' option must be a string.");
1631  }
1632  copy_params.add_metadata_columns = *str_literal->get_stringval();
1633  } else {
1634  throw std::runtime_error("Invalid option for COPY: " + *p->get_name());
1635  }
1636  }
1637  }
1638 }
1639 
1640 bool expr_is_null(const Analyzer::Expr* expr) {
1641  if (expr->get_type_info().get_type() == kNULLT) {
1642  return true;
1643  }
1644  auto array_expr = dynamic_cast<const Analyzer::ArrayExpr*>(expr);
1645  if (array_expr && array_expr->isNull()) {
1646  return true;
1647  }
1648  const auto const_expr = dynamic_cast<const Analyzer::Constant*>(expr);
1649  return const_expr && const_expr->get_is_null();
1650 }
1651 
1652 } // namespace
1653 
1654 std::shared_ptr<Analyzer::Expr> CaseExpr::normalize(
1655  const std::list<std::pair<std::shared_ptr<Analyzer::Expr>,
1656  std::shared_ptr<Analyzer::Expr>>>& expr_pair_list,
1657  const std::shared_ptr<Analyzer::Expr> else_e_in,
1658  const Executor* executor) {
1659  SQLTypeInfo ti;
1660  bool has_agg = false;
1661  // We need to keep track of whether there was at
1662  // least one none-encoded string literal expression
1663  // type among any of the case sub-expressions separately
1664  // from rest of type determination logic, as it will
1665  // be casted to the output dictionary type if all output
1666  // types are either dictionary encoded or none-encoded
1667  // literals, or a transient encoded dictionary if all
1668  // types are none-encoded (column or literal)
1669  SQLTypeInfo none_encoded_literal_ti;
1670 
1671  for (auto& p : expr_pair_list) {
1672  auto e1 = p.first;
1673  CHECK(e1->get_type_info().is_boolean());
1674  auto e2 = p.second;
1675  if (e2->get_contains_agg()) {
1676  has_agg = true;
1677  }
1678  const auto& e2_ti = e2->get_type_info();
1679  const auto col_var = std::dynamic_pointer_cast<const Analyzer::ColumnVar>(e2);
1680  if (e2_ti.is_string() && !e2_ti.is_dict_encoded_string() && !col_var) {
1681  CHECK(e2_ti.is_none_encoded_string());
1682  none_encoded_literal_ti =
1683  none_encoded_literal_ti.get_type() == kNULLT
1684  ? e2_ti
1685  : common_string_type(none_encoded_literal_ti, e2_ti, executor);
1686  continue;
1687  }
1688  if (ti.get_type() == kNULLT) {
1689  if (!expr_is_null(e2.get())) {
1690  ti = e2_ti;
1691  }
1692  } else if (expr_is_null(e2.get())) {
1693  ti.set_notnull(false);
1694  e2->set_type_info(ti);
1695  } else if (ti != e2_ti) {
1696  if (ti.is_string() && e2_ti.is_string()) {
1697  // Executor is needed to determine which dictionary is the largest
1698  // in case of two dictionary types with different encodings
1699  ti = common_string_type(ti, e2_ti, executor);
1700  } else if (ti.is_number() && e2_ti.is_number()) {
1702  } else if (ti.is_boolean() && e2_ti.is_boolean()) {
1704  } else {
1705  throw std::runtime_error(
1706  "Expressions in THEN clause must be of the same or compatible types.");
1707  }
1708  }
1709  }
1710  auto else_e = else_e_in;
1711  const auto& else_ti = else_e->get_type_info();
1712  if (else_e) {
1713  const auto col_var = std::dynamic_pointer_cast<const Analyzer::ColumnVar>(else_e);
1714  if (else_e->get_contains_agg()) {
1715  has_agg = true;
1716  }
1717  if (else_ti.is_string() && !else_ti.is_dict_encoded_string() && !col_var) {
1718  CHECK(else_ti.is_none_encoded_string());
1719  none_encoded_literal_ti =
1720  none_encoded_literal_ti.get_type() == kNULLT
1721  ? else_ti
1722  : common_string_type(none_encoded_literal_ti, else_ti, executor);
1723  } else {
1724  if (ti.get_type() == kNULLT) {
1725  ti = else_ti;
1726  } else if (expr_is_null(else_e.get())) {
1727  ti.set_notnull(false);
1728  else_e->set_type_info(ti);
1729  } else if (ti != else_ti) {
1730  ti.set_notnull(false);
1731  if (ti.is_string() && else_ti.is_string()) {
1732  // Executor is needed to determine which dictionary is the largest
1733  // in case of two dictionary types with different encodings
1734  ti = common_string_type(ti, else_ti, executor);
1735  } else if (ti.is_number() && else_ti.is_number()) {
1736  ti = Analyzer::BinOper::common_numeric_type(ti, else_ti);
1737  } else if (ti.is_boolean() && else_ti.is_boolean()) {
1738  ti = Analyzer::BinOper::common_numeric_type(ti, else_ti);
1739  } else if (get_logical_type_info(ti) != get_logical_type_info(else_ti)) {
1740  throw std::runtime_error(
1741  // types differing by encoding will be resolved at decode
1742  "Expressions in ELSE clause must be of the same or compatible types as "
1743  "those in the THEN clauses.");
1744  }
1745  }
1746  }
1747  }
1748 
1749  if (ti.get_type() == kNULLT && none_encoded_literal_ti.get_type() != kNULLT) {
1750  // If we haven't set a type so far it's because
1751  // every case sub-expression has a none-encoded
1752  // literal output. Output a transient-encoded dictionary
1753  // so we can use the output downstream
1755  }
1756 
1757  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
1758  cast_expr_pair_list;
1759  for (auto p : expr_pair_list) {
1760  ti.set_notnull(false);
1761  cast_expr_pair_list.emplace_back(p.first, p.second->add_cast(ti));
1762  }
1763  if (else_e != nullptr) {
1764  else_e = else_e->add_cast(ti);
1765  } else {
1766  Datum d;
1767  // always create an else expr so that executor doesn't need to worry about it
1768  ti.set_notnull(false);
1769  else_e = makeExpr<Analyzer::Constant>(ti, true, d);
1770  }
1771  if (ti.get_type() == kNULLT) {
1772  throw std::runtime_error(
1773  "Cannot deduce the type for case expressions, all branches null");
1774  }
1775 
1776  auto case_expr = makeExpr<Analyzer::CaseExpr>(ti, has_agg, cast_expr_pair_list, else_e);
1777  return case_expr;
1778 }
1779 
1780 std::string CaseExpr::to_string() const {
1781  std::string str("CASE ");
1782  for (auto& p : when_then_list_) {
1783  str += "WHEN " + p->get_expr1()->to_string() + " THEN " +
1784  p->get_expr2()->to_string() + " ";
1785  }
1786  if (else_expr_ != nullptr) {
1787  str += "ELSE " + else_expr_->to_string();
1788  }
1789  str += " END";
1790  return str;
1791 }
1792 
1793 void UnionQuery::analyze(const Catalog_Namespace::Catalog& catalog,
1794  Analyzer::Query& query) const {
1795  left_->analyze(catalog, query);
1796  Analyzer::Query* right_query = new Analyzer::Query();
1797  right_->analyze(catalog, *right_query);
1798  query.set_next_query(right_query);
1799  query.set_is_unionall(is_unionall_);
1800 }
1801 
1802 void QuerySpec::analyze_having_clause(const Catalog_Namespace::Catalog& catalog,
1803  Analyzer::Query& query) const {
1804  std::shared_ptr<Analyzer::Expr> p;
1805  if (having_clause_ != nullptr) {
1806  p = having_clause_->analyze(catalog, query, Expr::TlistRefType::TLIST_COPY);
1807  if (p->get_type_info().get_type() != kBOOLEAN) {
1808  throw std::runtime_error("Only boolean expressions can be in HAVING clause.");
1809  }
1810  p->check_group_by(query.get_group_by());
1811  }
1812  query.set_having_predicate(p);
1813 }
1814 
1815 void QuerySpec::analyze_group_by(const Catalog_Namespace::Catalog& catalog,
1816  Analyzer::Query& query) const {
1817  std::list<std::shared_ptr<Analyzer::Expr>> groupby;
1818  if (!groupby_clause_.empty()) {
1819  int gexpr_no = 1;
1820  std::shared_ptr<Analyzer::Expr> gexpr;
1821  const std::vector<std::shared_ptr<Analyzer::TargetEntry>>& tlist =
1822  query.get_targetlist();
1823  for (auto& c : groupby_clause_) {
1824  // special-case ordinal numbers in GROUP BY
1825  if (dynamic_cast<Literal*>(c.get())) {
1826  IntLiteral* i = dynamic_cast<IntLiteral*>(c.get());
1827  if (!i) {
1828  throw std::runtime_error("Invalid literal in GROUP BY clause.");
1829  }
1830  int varno = (int)i->get_intval();
1831  if (varno <= 0 || varno > static_cast<int>(tlist.size())) {
1832  throw std::runtime_error("Invalid ordinal number in GROUP BY clause.");
1833  }
1834  if (tlist[varno - 1]->get_expr()->get_contains_agg()) {
1835  throw std::runtime_error(
1836  "Ordinal number in GROUP BY cannot reference an expression containing "
1837  "aggregate "
1838  "functions.");
1839  }
1840  gexpr = makeExpr<Analyzer::Var>(
1841  tlist[varno - 1]->get_expr()->get_type_info(), Analyzer::Var::kOUTPUT, varno);
1842  } else {
1843  gexpr = c->analyze(catalog, query, Expr::TlistRefType::TLIST_REF);
1844  }
1845  const SQLTypeInfo gti = gexpr->get_type_info();
1846  bool set_new_type = false;
1847  SQLTypeInfo ti(gti);
1848  if (gti.is_string() && gti.get_compression() == kENCODING_NONE) {
1849  set_new_type = true;
1853  ti.set_fixed_size();
1854  }
1855  std::shared_ptr<Analyzer::Var> v;
1856  if (std::dynamic_pointer_cast<Analyzer::Var>(gexpr)) {
1857  v = std::static_pointer_cast<Analyzer::Var>(gexpr);
1858  int n = v->get_varno();
1859  gexpr = tlist[n - 1]->get_own_expr();
1860  auto cv = std::dynamic_pointer_cast<Analyzer::ColumnVar>(gexpr);
1861  if (cv != nullptr) {
1862  // inherit all ColumnVar info for lineage.
1863  *std::static_pointer_cast<Analyzer::ColumnVar>(v) = *cv;
1864  }
1865  v->set_which_row(Analyzer::Var::kGROUPBY);
1866  v->set_varno(gexpr_no);
1867  tlist[n - 1]->set_expr(v);
1868  }
1869  if (set_new_type) {
1870  auto new_e = gexpr->add_cast(ti);
1871  groupby.push_back(new_e);
1872  if (v != nullptr) {
1873  v->set_type_info(new_e->get_type_info());
1874  }
1875  } else {
1876  groupby.push_back(gexpr);
1877  }
1878  gexpr_no++;
1879  }
1880  }
1881  if (query.get_num_aggs() > 0 || !groupby.empty()) {
1882  for (auto t : query.get_targetlist()) {
1883  auto e = t->get_expr();
1884  e->check_group_by(groupby);
1885  }
1886  }
1887  query.set_group_by(groupby);
1888 }
1889 
1890 void QuerySpec::analyze_where_clause(const Catalog_Namespace::Catalog& catalog,
1891  Analyzer::Query& query) const {
1892  if (where_clause_ == nullptr) {
1893  query.set_where_predicate(nullptr);
1894  return;
1895  }
1896  auto p = where_clause_->analyze(catalog, query, Expr::TlistRefType::TLIST_COPY);
1897  if (p->get_type_info().get_type() != kBOOLEAN) {
1898  throw std::runtime_error("Only boolean expressions can be in WHERE clause.");
1899  }
1900  query.set_where_predicate(p);
1901 }
1902 
1903 void QuerySpec::analyze_select_clause(const Catalog_Namespace::Catalog& catalog,
1904  Analyzer::Query& query) const {
1905  std::vector<std::shared_ptr<Analyzer::TargetEntry>>& tlist =
1906  query.get_targetlist_nonconst();
1907  if (select_clause_.empty()) {
1908  // this means SELECT *
1909  int rte_idx = 0;
1910  for (auto rte : query.get_rangetable()) {
1911  rte->expand_star_in_targetlist(catalog, tlist, rte_idx++);
1912  }
1913  } else {
1914  for (auto& p : select_clause_) {
1915  const Parser::Expr* select_expr = p->get_select_expr();
1916  // look for the case of range_var.*
1917  if (typeid(*select_expr) == typeid(ColumnRef) &&
1918  dynamic_cast<const ColumnRef*>(select_expr)->get_column() == nullptr) {
1919  const std::string* range_var_name =
1920  dynamic_cast<const ColumnRef*>(select_expr)->get_table();
1921  int rte_idx = query.get_rte_idx(*range_var_name);
1922  if (rte_idx < 0) {
1923  throw std::runtime_error("invalid range variable name: " + *range_var_name);
1924  }
1925  Analyzer::RangeTableEntry* rte = query.get_rte(rte_idx);
1926  rte->expand_star_in_targetlist(catalog, tlist, rte_idx);
1927  } else {
1928  auto e = select_expr->analyze(catalog, query);
1929  std::string resname;
1930 
1931  if (p->get_alias() != nullptr) {
1932  resname = *p->get_alias();
1933  } else if (std::dynamic_pointer_cast<Analyzer::ColumnVar>(e) &&
1934  !std::dynamic_pointer_cast<Analyzer::Var>(e)) {
1935  auto colvar = std::static_pointer_cast<Analyzer::ColumnVar>(e);
1936  const auto& column_key = colvar->getColumnKey();
1937  const ColumnDescriptor* col_desc =
1938  catalog.getMetadataForColumn(column_key.table_id, column_key.column_id);
1939  resname = col_desc->columnName;
1940  }
1941  if (e->get_type_info().get_type() == kNULLT) {
1942  throw std::runtime_error(
1943  "Untyped NULL in SELECT clause. Use CAST to specify a type.");
1944  }
1945  auto o = std::static_pointer_cast<Analyzer::UOper>(e);
1946  bool unnest = (o != nullptr && o->get_optype() == kUNNEST);
1947  auto tle = std::make_shared<Analyzer::TargetEntry>(resname, e, unnest);
1948  tlist.push_back(tle);
1949  }
1950  }
1951  }
1952 }
1953 
1954 void QuerySpec::analyze_from_clause(const Catalog_Namespace::Catalog& catalog,
1955  Analyzer::Query& query) const {
1957  for (auto& p : from_clause_) {
1958  const TableDescriptor* table_desc;
1959  table_desc = catalog.getMetadataForTable(*p->get_table_name());
1960  if (table_desc == nullptr) {
1961  throw std::runtime_error("Table " + *p->get_table_name() + " does not exist.");
1962  }
1963  std::string range_var;
1964  if (p->get_range_var() == nullptr) {
1965  range_var = *p->get_table_name();
1966  } else {
1967  range_var = *p->get_range_var();
1968  }
1969  rte = new Analyzer::RangeTableEntry(range_var, table_desc, nullptr);
1970  query.add_rte(rte);
1971  }
1972 }
1973 
1974 void QuerySpec::analyze(const Catalog_Namespace::Catalog& catalog,
1975  Analyzer::Query& query) const {
1976  query.set_is_distinct(is_distinct_);
1977  analyze_from_clause(catalog, query);
1978  analyze_select_clause(catalog, query);
1979  analyze_where_clause(catalog, query);
1980  analyze_group_by(catalog, query);
1981  analyze_having_clause(catalog, query);
1982 }
1983 
1984 namespace {
1985 
1986 // clean known escape'd chars without having to do a full json parse
1987 std::string unescape(std::string s) {
1988  boost::replace_all(s, "\\\\t", "\t");
1989  boost::replace_all(s, "\\t", "\t");
1990  boost::replace_all(s, "\\\\n", "\n");
1991  boost::replace_all(s, "\\n", "\n");
1992 
1993  // handle numerics
1994  std::smatch m;
1995 
1996  // "\x00"
1997  std::regex e1("(\\\\x[0-9A-Fa-f][0-9A-Fa-f])");
1998  while (std::regex_search(s, m, e1)) {
1999  std::string original(m[0].first, m[0].second);
2000  std::string replacement;
2001  long val = strtol(original.substr(2, 2).c_str(), NULL, 16);
2002  replacement.push_back(val);
2003  boost::replace_all(s, original, replacement);
2004  }
2005 
2006  // "\u0000"
2007  std::regex e2("(\\\\u[0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f])");
2008  while (std::regex_search(s, m, e2)) {
2009  std::string original(m[0].first, m[0].second);
2010  std::string replacement;
2011  long val = strtol(original.substr(2, 4).c_str(), NULL, 16);
2012  replacement.push_back(val);
2013  boost::replace_all(s, original, replacement);
2014  }
2015 
2016  return s;
2017 }
2018 
2019 void parse_options(const rapidjson::Value& payload,
2020  std::list<std::unique_ptr<NameValueAssign>>& nameValueList,
2021  bool stringToNull = false,
2022  bool stringToInteger = false) {
2023  if (payload.HasMember("options") && payload["options"].IsObject()) {
2024  const auto& options = payload["options"];
2025  for (auto itr = options.MemberBegin(); itr != options.MemberEnd(); ++itr) {
2026  auto option_name = std::make_unique<std::string>(itr->name.GetString());
2027  std::unique_ptr<Literal> literal_value;
2028  if (itr->value.IsString()) {
2029  std::string str = itr->value.GetString();
2030  if (stringToNull && str == "") {
2031  literal_value = std::make_unique<NullLiteral>();
2032  } else if (stringToInteger && std::all_of(str.begin(), str.end(), ::isdigit)) {
2033  int iVal = std::stoi(str);
2034  literal_value = std::make_unique<IntLiteral>(iVal);
2035  } else {
2036  // Rapidjson will deliberately provide escape'd strings when accessed
2037  // ... but the literal should have a copy of the raw unescaped string
2038  auto unique_literal_string = std::make_unique<std::string>(unescape(str));
2039  literal_value =
2040  std::make_unique<StringLiteral>(unique_literal_string.release());
2041  }
2042  } else if (itr->value.IsInt() || itr->value.IsInt64()) {
2043  literal_value = std::make_unique<IntLiteral>(json_i64(itr->value));
2044  } else if (itr->value.IsDouble()) {
2045  literal_value = std::make_unique<DoubleLiteral>(json_double(itr->value));
2046  } else if (itr->value.IsNull()) {
2047  literal_value = std::make_unique<NullLiteral>();
2048  } else {
2049  throw std::runtime_error("Unable to handle literal for " + *option_name);
2050  }
2051  CHECK(literal_value);
2052 
2053  nameValueList.emplace_back(std::make_unique<NameValueAssign>(
2054  option_name.release(), literal_value.release()));
2055  }
2056  }
2057 }
2058 } // namespace
2059 
2060 void SelectStmt::analyze(const Catalog_Namespace::Catalog& catalog,
2061  Analyzer::Query& query) const {
2062  query.set_stmt_type(kSELECT);
2063  query.set_limit(limit_);
2064  if (offset_ < 0) {
2065  throw std::runtime_error("OFFSET cannot be negative.");
2066  }
2067  query.set_offset(offset_);
2068  query_expr_->analyze(catalog, query);
2069  if (orderby_clause_.empty() && !query.get_is_distinct()) {
2070  query.set_order_by(nullptr);
2071  return;
2072  }
2073  const std::vector<std::shared_ptr<Analyzer::TargetEntry>>& tlist =
2074  query.get_targetlist();
2075  std::list<Analyzer::OrderEntry>* order_by = new std::list<Analyzer::OrderEntry>();
2076  if (!orderby_clause_.empty()) {
2077  for (auto& p : orderby_clause_) {
2078  int tle_no = p->get_colno();
2079  if (tle_no == 0) {
2080  // use column name
2081  // search through targetlist for matching name
2082  const std::string* name = p->get_column()->get_column();
2083  tle_no = 1;
2084  bool found = false;
2085  for (auto tle : tlist) {
2086  if (tle->get_resname() == *name) {
2087  found = true;
2088  break;
2089  }
2090  tle_no++;
2091  }
2092  if (!found) {
2093  throw std::runtime_error("invalid name in order by: " + *name);
2094  }
2095  }
2096  order_by->push_back(
2097  Analyzer::OrderEntry(tle_no, p->get_is_desc(), p->get_nulls_first()));
2098  }
2099  }
2100  if (query.get_is_distinct()) {
2101  // extend order_by to include all targetlist entries.
2102  for (int i = 1; i <= static_cast<int>(tlist.size()); i++) {
2103  bool in_orderby = false;
2104  std::for_each(order_by->begin(),
2105  order_by->end(),
2106  [&in_orderby, i](const Analyzer::OrderEntry& oe) {
2107  in_orderby = in_orderby || (i == oe.tle_no);
2108  });
2109  if (!in_orderby) {
2110  order_by->push_back(Analyzer::OrderEntry(i, false, false));
2111  }
2112  }
2113  }
2114  query.set_order_by(order_by);
2115 }
2116 
2117 std::string SelectEntry::to_string() const {
2118  std::string str = select_expr_->to_string();
2119  if (alias_ != nullptr) {
2120  str += " AS " + *alias_;
2121  }
2122  return str;
2123 }
2124 
2125 std::string TableRef::to_string() const {
2126  std::string str = *table_name_;
2127  if (range_var_ != nullptr) {
2128  str += " " + *range_var_;
2129  }
2130  return str;
2131 }
2132 
2133 std::string ColumnRef::to_string() const {
2134  std::string str;
2135  if (table_ == nullptr) {
2136  str = *column_;
2137  } else if (column_ == nullptr) {
2138  str = *table_ + ".*";
2139  } else {
2140  str = *table_ + "." + *column_;
2141  }
2142  return str;
2143 }
2144 
2145 std::string OperExpr::to_string() const {
2146  std::string op_str[] = {
2147  "=", "===", "<>", "<", ">", "<=", ">=", " AND ", " OR ", "NOT", "-", "+", "*", "/"};
2148  std::string str;
2149  if (optype_ == kUMINUS) {
2150  str = "-(" + left_->to_string() + ")";
2151  } else if (optype_ == kNOT) {
2152  str = "NOT (" + left_->to_string() + ")";
2153  } else if (optype_ == kARRAY_AT) {
2154  str = left_->to_string() + "[" + right_->to_string() + "]";
2155  } else if (optype_ == kUNNEST) {
2156  str = "UNNEST(" + left_->to_string() + ")";
2157  } else if (optype_ == kIN) {
2158  str = "(" + left_->to_string() + " IN " + right_->to_string() + ")";
2159  } else {
2160  str = "(" + left_->to_string() + op_str[optype_] + right_->to_string() + ")";
2161  }
2162  return str;
2163 }
2164 
2165 std::string InExpr::to_string() const {
2166  std::string str = arg_->to_string();
2167  if (is_not_) {
2168  str += " NOT IN ";
2169  } else {
2170  str += " IN ";
2171  }
2172  return str;
2173 }
2174 
2175 std::string ExistsExpr::to_string() const {
2176  return "EXISTS (" + query_->to_string() + ")";
2177 }
2178 
2179 std::string SubqueryExpr::to_string() const {
2180  std::string str;
2181  str = "(";
2182  str += query_->to_string();
2183  str += ")";
2184  return str;
2185 }
2186 
2187 std::string IsNullExpr::to_string() const {
2188  std::string str = arg_->to_string();
2189  if (is_not_) {
2190  str += " IS NOT NULL";
2191  } else {
2192  str += " IS NULL";
2193  }
2194  return str;
2195 }
2196 
2197 std::string InSubquery::to_string() const {
2198  std::string str = InExpr::to_string();
2199  str += subquery_->to_string();
2200  return str;
2201 }
2202 
2203 std::string InValues::to_string() const {
2204  std::string str = InExpr::to_string() + "(";
2205  bool notfirst = false;
2206  for (auto& p : value_list_) {
2207  if (notfirst) {
2208  str += ", ";
2209  } else {
2210  notfirst = true;
2211  }
2212  str += p->to_string();
2213  }
2214  str += ")";
2215  return str;
2216 }
2217 
2218 std::string BetweenExpr::to_string() const {
2219  std::string str = arg_->to_string();
2220  if (is_not_) {
2221  str += " NOT BETWEEN ";
2222  } else {
2223  str += " BETWEEN ";
2224  }
2225  str += lower_->to_string() + " AND " + upper_->to_string();
2226  return str;
2227 }
2228 
2229 std::string CharLengthExpr::to_string() const {
2230  std::string str;
2231  if (calc_encoded_length_) {
2232  str = "CHAR_LENGTH (" + arg_->to_string() + ")";
2233  } else {
2234  str = "LENGTH (" + arg_->to_string() + ")";
2235  }
2236  return str;
2237 }
2238 
2239 std::string CardinalityExpr::to_string() const {
2240  std::string str = "CARDINALITY(" + arg_->to_string() + ")";
2241  return str;
2242 }
2243 
2244 std::string LikeExpr::to_string() const {
2245  std::string str = arg_->to_string();
2246  if (is_not_) {
2247  str += " NOT LIKE ";
2248  } else {
2249  str += " LIKE ";
2250  }
2251  str += like_string_->to_string();
2252  if (escape_string_ != nullptr) {
2253  str += " ESCAPE " + escape_string_->to_string();
2254  }
2255  return str;
2256 }
2257 
2258 std::string RegexpExpr::to_string() const {
2259  std::string str = arg_->to_string();
2260  if (is_not_) {
2261  str += " NOT REGEXP ";
2262  } else {
2263  str += " REGEXP ";
2264  }
2265  str += pattern_string_->to_string();
2266  if (escape_string_ != nullptr) {
2267  str += " ESCAPE " + escape_string_->to_string();
2268  }
2269  return str;
2270 }
2271 
2272 std::string WidthBucketExpr::to_string() const {
2273  std::string str = " WIDTH_BUCKET ";
2274  str += target_value_->to_string();
2275  str += " ";
2276  str += lower_bound_->to_string();
2277  str += " ";
2278  str += upper_bound_->to_string();
2279  str += " ";
2280  str += partition_count_->to_string();
2281  str += " ";
2282  return str;
2283 }
2284 
2285 std::string LikelihoodExpr::to_string() const {
2286  std::string str = " LIKELIHOOD ";
2287  str += arg_->to_string();
2288  str += " ";
2289  str += boost::lexical_cast<std::string>(is_not_ ? 1.0 - likelihood_ : likelihood_);
2290  return str;
2291 }
2292 
2293 std::string FunctionRef::to_string() const {
2294  std::string str = *name_ + "(";
2295  if (distinct_) {
2296  str += "DISTINCT ";
2297  }
2298  if (arg_ == nullptr) {
2299  str += "*)";
2300  } else {
2301  str += arg_->to_string() + ")";
2302  }
2303  return str;
2304 }
2305 
2306 std::string QuerySpec::to_string() const {
2307  std::string query_str = "SELECT ";
2308  if (is_distinct_) {
2309  query_str += "DISTINCT ";
2310  }
2311  if (select_clause_.empty()) {
2312  query_str += "* ";
2313  } else {
2314  bool notfirst = false;
2315  for (auto& p : select_clause_) {
2316  if (notfirst) {
2317  query_str += ", ";
2318  } else {
2319  notfirst = true;
2320  }
2321  query_str += p->to_string();
2322  }
2323  }
2324  query_str += " FROM ";
2325  bool notfirst = false;
2326  for (auto& p : from_clause_) {
2327  if (notfirst) {
2328  query_str += ", ";
2329  } else {
2330  notfirst = true;
2331  }
2332  query_str += p->to_string();
2333  }
2334  if (where_clause_) {
2335  query_str += " WHERE " + where_clause_->to_string();
2336  }
2337  if (!groupby_clause_.empty()) {
2338  query_str += " GROUP BY ";
2339  bool notfirst = false;
2340  for (auto& p : groupby_clause_) {
2341  if (notfirst) {
2342  query_str += ", ";
2343  } else {
2344  notfirst = true;
2345  }
2346  query_str += p->to_string();
2347  }
2348  }
2349  if (having_clause_) {
2350  query_str += " HAVING " + having_clause_->to_string();
2351  }
2352  query_str += ";";
2353  return query_str;
2354 }
2355 
2356 void InsertStmt::analyze(const Catalog_Namespace::Catalog& catalog,
2357  Analyzer::Query& query) const {
2358  query.set_stmt_type(kINSERT);
2359  const TableDescriptor* td = catalog.getMetadataForTable(*table_);
2360  if (td == nullptr) {
2361  throw std::runtime_error("Table " + *table_ + " does not exist.");
2362  }
2363  if (td->isView) {
2364  throw std::runtime_error("Insert to views is not supported yet.");
2365  }
2367  query.set_result_table_id(td->tableId);
2368  std::list<int> result_col_list;
2369  if (column_list_.empty()) {
2370  const std::list<const ColumnDescriptor*> all_cols =
2371  catalog.getAllColumnMetadataForTable(td->tableId, false, false, true);
2372  for (auto cd : all_cols) {
2373  result_col_list.push_back(cd->columnId);
2374  }
2375  } else {
2376  for (auto& c : column_list_) {
2377  const ColumnDescriptor* cd = catalog.getMetadataForColumn(td->tableId, *c);
2378  if (cd == nullptr) {
2379  throw std::runtime_error("Column " + *c + " does not exist.");
2380  }
2381  result_col_list.push_back(cd->columnId);
2382  const auto& col_ti = cd->columnType;
2383  if (col_ti.get_physical_cols() > 0) {
2384  CHECK(cd->columnType.is_geometry());
2385  for (auto i = 1; i <= col_ti.get_physical_cols(); i++) {
2386  const ColumnDescriptor* pcd =
2387  catalog.getMetadataForColumn(td->tableId, cd->columnId + i);
2388  if (pcd == nullptr) {
2389  throw std::runtime_error("Column " + *c + "'s metadata is incomplete.");
2390  }
2391  result_col_list.push_back(pcd->columnId);
2392  }
2393  }
2394  }
2395  }
2396  query.set_result_col_list(result_col_list);
2397 }
2398 
2399 namespace {
2400 Literal* parse_insert_literal(const rapidjson::Value& literal) {
2401  CHECK(literal.IsObject());
2402  CHECK(literal.HasMember("literal"));
2403  CHECK(literal.HasMember("type"));
2404  auto type = json_str(literal["type"]);
2405  if (type == "NULL") {
2406  return new NullLiteral();
2407  } else if (type == "CHAR" || type == "BOOLEAN") {
2408  auto* val = new std::string(json_str(literal["literal"]));
2409  return new StringLiteral(val);
2410  } else if (type == "DECIMAL") {
2411  CHECK(literal.HasMember("scale"));
2412  CHECK(literal.HasMember("precision"));
2413  auto scale = json_i64(literal["scale"]);
2414  auto precision = json_i64(literal["precision"]);
2415  if (scale == 0) {
2416  auto int_val = std::stol(json_str(literal["literal"]));
2417  return new IntLiteral(int_val);
2418  } else if (precision > sql_constants::kMaxNumericPrecision) {
2419  auto dbl_val = std::stod(json_str(literal["literal"]));
2420  return new DoubleLiteral(dbl_val);
2421  } else {
2422  auto* val = new std::string(json_str(literal["literal"]));
2423  return new FixedPtLiteral(val);
2424  }
2425  } else if (type == "DOUBLE") {
2426  auto dbl_val = std::stod(json_str(literal["literal"]));
2427  return new DoubleLiteral(dbl_val);
2428  } else {
2429  CHECK(false) << "Unexpected calcite data type: " << type;
2430  }
2431  return nullptr;
2432 }
2433 
2434 ArrayLiteral* parse_insert_array_literal(const rapidjson::Value& array) {
2435  CHECK(array.IsArray());
2436  auto json_elements = array.GetArray();
2437  auto* elements = new std::list<Expr*>();
2438  for (const auto& e : json_elements) {
2439  elements->push_back(parse_insert_literal(e));
2440  }
2441  return new ArrayLiteral(elements);
2442 }
2443 } // namespace
2444 
2445 InsertValuesStmt::InsertValuesStmt(const Catalog_Namespace::Catalog& catalog,
2446  const rapidjson::Value& payload)
2447  : InsertStmt(nullptr, nullptr) {
2448  CHECK(payload.HasMember("name"));
2449  table_ = std::make_unique<std::string>(json_str(payload["name"]));
2450 
2451  if (payload.HasMember("columns")) {
2452  CHECK(payload["columns"].IsArray());
2453  for (auto& column : payload["columns"].GetArray()) {
2454  std::string s = json_str(column);
2455  column_list_.emplace_back(std::make_unique<std::string>(s));
2456  }
2457  }
2458 
2459  CHECK(payload.HasMember("values") && payload["values"].IsArray());
2460  auto tuples = payload["values"].GetArray();
2461  if (tuples.Empty()) {
2462  throw std::runtime_error("Values statement cannot be empty");
2463  }
2464  values_lists_.reserve(tuples.Size());
2465  int column_offset = 0;
2466  try {
2467  for (const auto& json_tuple : tuples) {
2468  auto values_list = std::make_unique<ValuesList>();
2469  CHECK(json_tuple.IsArray());
2470  auto tuple = json_tuple.GetArray();
2471  column_offset = 0;
2472  for (const auto& value : tuple) {
2473  CHECK(value.IsObject());
2474  if (value.HasMember("array")) {
2475  values_list->push_back(parse_insert_array_literal(value["array"]));
2476  } else {
2477  values_list->push_back(parse_insert_literal(value));
2478  }
2479  ++column_offset;
2480  }
2481  values_lists_.push_back(std::move(values_list));
2482  }
2483  } catch (std::out_of_range const& e) {
2484  auto* td = catalog.getMetadataForTable(*table_, false);
2485  CHECK(td);
2486  auto cds = catalog.getAllColumnMetadataForTable(td->tableId, false, false, false);
2487  auto target_col_iter = cds.begin();
2488  std::advance(target_col_iter, column_offset);
2489  auto* cd = *target_col_iter;
2490  CHECK(cd);
2491  auto const col_identifier = td->tableName + "." + cd->columnName;
2492  throw std::runtime_error(
2493  "Detected an out-of-range exception when inserting a value into column \"" +
2494  col_identifier + "\"");
2495  }
2496 }
2497 
2499  Analyzer::Query& query) const {
2500  InsertStmt::analyze(catalog, query);
2501  size_t list_size = values_lists_[0]->get_value_list().size();
2502  if (!column_list_.empty()) {
2503  if (list_size != column_list_.size()) {
2504  throw std::runtime_error(
2505  "Numbers of columns and values don't match for the "
2506  "insert.");
2507  }
2508  } else {
2509  const auto tableId = query.get_result_table_id();
2510  const std::list<const ColumnDescriptor*> non_phys_cols =
2511  catalog.getAllColumnMetadataForTable(tableId, false, false, false);
2512  if (non_phys_cols.size() != list_size) {
2513  throw std::runtime_error(
2514  "Number of columns in table does not match the list of values given in the "
2515  "insert.");
2516  }
2517  }
2518  std::vector<const ColumnDescriptor*> cds;
2519  cds.reserve(query.get_result_col_list().size());
2520  for (auto id : query.get_result_col_list()) {
2521  const auto* cd = catalog.getMetadataForColumn(query.get_result_table_id(), id);
2522  CHECK(cd);
2523  cds.push_back(cd);
2524  }
2525  auto& query_values_lists = query.get_values_lists();
2526  query_values_lists.resize(values_lists_.size());
2527  for (size_t i = 0; i < values_lists_.size(); ++i) {
2528  const auto& values_list = values_lists_[i]->get_value_list();
2529  if (values_list.size() != list_size) {
2530  throw std::runtime_error(
2531  "Insert values lists should be of the same size. Expected: " +
2532  std::to_string(list_size) + ", Got: " + std::to_string(values_list.size()));
2533  }
2534  auto& query_values_list = query_values_lists[i];
2535  size_t cds_id = 0;
2536  for (auto& v : values_list) {
2537  auto e = v->analyze(catalog, query);
2538  const auto* cd = cds[cds_id];
2539  const auto& col_ti = cd->columnType;
2540  if (col_ti.get_notnull()) {
2541  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(e);
2542  if (c != nullptr && c->get_is_null()) {
2543  throw std::runtime_error("Cannot insert NULL into column " + cd->columnName);
2544  }
2545  }
2546  e = e->add_cast(col_ti);
2547  query_values_list.emplace_back(new Analyzer::TargetEntry("", e, false));
2548  ++cds_id;
2549 
2550  if (col_ti.get_physical_cols() > 0) {
2551  CHECK(cd->columnType.is_geometry());
2552  auto c = dynamic_cast<const Analyzer::Constant*>(e.get());
2553  if (!c) {
2554  auto uoper = std::dynamic_pointer_cast<Analyzer::UOper>(e);
2555  if (uoper && uoper->get_optype() == kCAST) {
2556  c = dynamic_cast<const Analyzer::Constant*>(uoper->get_operand());
2557  }
2558  }
2559  bool is_null = false;
2560  std::string* geo_string{nullptr};
2561  if (c) {
2562  is_null = c->get_is_null();
2563  if (!is_null) {
2564  geo_string = c->get_constval().stringval;
2565  }
2566  }
2567  if (!is_null && !geo_string) {
2568  throw std::runtime_error("Expecting a WKT or WKB hex string for column " +
2569  cd->columnName);
2570  }
2571  std::vector<double> coords;
2572  std::vector<double> bounds;
2573  std::vector<int> ring_sizes;
2574  std::vector<int> poly_rings;
2575  SQLTypeInfo import_ti{cd->columnType};
2576  if (!is_null) {
2577  const bool validate_with_geos_if_available = false;
2579  *geo_string,
2580  import_ti,
2581  coords,
2582  bounds,
2583  ring_sizes,
2584  poly_rings,
2585  validate_with_geos_if_available)) {
2586  throw std::runtime_error("Cannot read geometry to insert into column " +
2587  cd->columnName);
2588  }
2589  if (coords.empty()) {
2590  // Importing from geo_string WKT resulted in empty coords: dealing with a NULL
2591  is_null = true;
2592  }
2593  if (!geo_promoted_type_match(import_ti.get_type(), cd->columnType.get_type())) {
2594  throw std::runtime_error(
2595  "Imported geometry doesn't match the type of column " + cd->columnName);
2596  }
2597  } else {
2598  // Special case for NULL POINT, push NULL representation to coords
2599  if (cd->columnType.get_type() == kPOINT) {
2600  if (!coords.empty()) {
2601  throw std::runtime_error(
2602  "NULL POINT with unexpected coordinates in column " + cd->columnName);
2603  }
2604  coords.push_back(NULL_ARRAY_DOUBLE);
2605  coords.push_back(NULL_DOUBLE);
2606  }
2607  }
2608 
2609  // TODO: check if import SRID matches columns SRID, may need to transform before
2610  // inserting
2611 
2612  const auto* cd_coords = cds[cds_id];
2613  CHECK_EQ(cd_coords->columnType.get_type(), kARRAY);
2614  CHECK_EQ(cd_coords->columnType.get_subtype(), kTINYINT);
2615  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
2616  if (!is_null || cd->columnType.get_type() == kPOINT) {
2617  auto compressed_coords = Geospatial::compress_coords(coords, col_ti);
2618  for (auto cc : compressed_coords) {
2619  Datum d;
2620  d.tinyintval = cc;
2621  auto e = makeExpr<Analyzer::Constant>(kTINYINT, false, d);
2622  value_exprs.push_back(e);
2623  }
2624  }
2625  query_values_list.emplace_back(new Analyzer::TargetEntry(
2626  "",
2627  makeExpr<Analyzer::Constant>(cd_coords->columnType, is_null, value_exprs),
2628  false));
2629  ++cds_id;
2630 
2631  if (cd->columnType.get_type() == kMULTILINESTRING ||
2632  cd->columnType.get_type() == kPOLYGON ||
2633  cd->columnType.get_type() == kMULTIPOLYGON) {
2634  // Put [linest]ring sizes array into separate physical column
2635  const auto* cd_ring_sizes = cds[cds_id];
2636  CHECK(cd_ring_sizes);
2637  CHECK_EQ(cd_ring_sizes->columnType.get_type(), kARRAY);
2638  CHECK_EQ(cd_ring_sizes->columnType.get_subtype(), kINT);
2639  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
2640  if (!is_null) {
2641  for (auto c : ring_sizes) {
2642  Datum d;
2643  d.intval = c;
2644  auto e = makeExpr<Analyzer::Constant>(kINT, false, d);
2645  value_exprs.push_back(e);
2646  }
2647  }
2648  query_values_list.emplace_back(new Analyzer::TargetEntry(
2649  "",
2650  makeExpr<Analyzer::Constant>(
2651  cd_ring_sizes->columnType, is_null, value_exprs),
2652  false));
2653  ++cds_id;
2654 
2655  if (cd->columnType.get_type() == kMULTIPOLYGON) {
2656  // Put poly_rings array into separate physical column
2657  const auto* cd_poly_rings = cds[cds_id];
2658  CHECK(cd_poly_rings);
2659  CHECK_EQ(cd_poly_rings->columnType.get_type(), kARRAY);
2660  CHECK_EQ(cd_poly_rings->columnType.get_subtype(), kINT);
2661  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
2662  if (!is_null) {
2663  for (auto c : poly_rings) {
2664  Datum d;
2665  d.intval = c;
2666  auto e = makeExpr<Analyzer::Constant>(kINT, false, d);
2667  value_exprs.push_back(e);
2668  }
2669  }
2670  query_values_list.emplace_back(new Analyzer::TargetEntry(
2671  "",
2672  makeExpr<Analyzer::Constant>(
2673  cd_poly_rings->columnType, is_null, value_exprs),
2674  false));
2675  ++cds_id;
2676  }
2677  }
2678 
2679  if (cd->columnType.get_type() == kMULTIPOINT ||
2680  cd->columnType.get_type() == kLINESTRING ||
2681  cd->columnType.get_type() == kMULTILINESTRING ||
2682  cd->columnType.get_type() == kPOLYGON ||
2683  cd->columnType.get_type() == kMULTIPOLYGON) {
2684  const auto* cd_bounds = cds[cds_id];
2685  CHECK(cd_bounds);
2686  CHECK_EQ(cd_bounds->columnType.get_type(), kARRAY);
2687  CHECK_EQ(cd_bounds->columnType.get_subtype(), kDOUBLE);
2688  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
2689  if (!is_null) {
2690  for (auto b : bounds) {
2691  Datum d;
2692  d.doubleval = b;
2693  auto e = makeExpr<Analyzer::Constant>(kDOUBLE, false, d);
2694  value_exprs.push_back(e);
2695  }
2696  }
2697  query_values_list.emplace_back(new Analyzer::TargetEntry(
2698  "",
2699  makeExpr<Analyzer::Constant>(cd_bounds->columnType, is_null, value_exprs),
2700  false));
2701  ++cds_id;
2702  }
2703  }
2704  }
2705  }
2706 }
2707 
2709  bool read_only_mode) {
2710  if (read_only_mode) {
2711  throw std::runtime_error("INSERT values invalid in read only mode.");
2712  }
2713  auto execute_read_lock = legacylockmgr::getExecuteReadLock();
2714  auto& catalog = session.getCatalog();
2715  const auto td_with_lock =
2717  catalog, *table_);
2720  *table_)) {
2721  throw std::runtime_error("User has no insert privileges on " + *table_ + ".");
2722  }
2723  Analyzer::Query query;
2724  analyze(catalog, query);
2725 
2726  // Take an insert data write lock, which prevents concurrent inserts.
2727  const auto insert_data_lock =
2729 
2730  // NOTE(max): we do the same checks as below just a few calls earlier in analyze().
2731  // Do we keep those intentionally to make sure nothing changed in between w/o
2732  // catalog locks or is it just a duplicate work?
2733  auto td = td_with_lock();
2734  CHECK(td);
2735  if (td->isView) {
2736  throw std::runtime_error("Singleton inserts on views is not supported.");
2737  }
2739 
2741  RelAlgExecutor ra_executor(executor.get());
2742 
2743  if (!leafs_connector_) {
2744  leafs_connector_ = std::make_unique<Fragmenter_Namespace::LocalInsertConnector>();
2745  }
2747  try {
2748  ra_executor.executeSimpleInsert(query, insert_data_loader, session);
2749  } catch (...) {
2750  try {
2751  leafs_connector_->rollback(session, td->tableId);
2752  } catch (std::exception& e) {
2753  LOG(ERROR) << "An error occurred during insert rollback attempt. Table id: "
2754  << td->tableId << ", Error: " << e.what();
2755  }
2756  throw;
2757  }
2758  if (!td->isTemporaryTable()) {
2759  leafs_connector_->checkpoint(session, td->tableId);
2760  }
2761 }
2762 
2764  Analyzer::Query& query) const {
2765  throw std::runtime_error("UPDATE statement not supported yet.");
2766 }
2767 
2769  Analyzer::Query& query) const {
2770  throw std::runtime_error("DELETE statement not supported yet.");
2771 }
2772 
2773 namespace {
2774 
2776  const auto& col_ti = cd.columnType;
2777  if (!col_ti.is_integer() && !col_ti.is_time() &&
2778  !(col_ti.is_string() && col_ti.get_compression() == kENCODING_DICT)) {
2779  throw std::runtime_error("Cannot shard on type " + col_ti.get_type_name() +
2780  ", encoding " + col_ti.get_compression_name());
2781  }
2782 }
2783 
2784 size_t shard_column_index(const std::string& name,
2785  const std::list<ColumnDescriptor>& columns) {
2786  size_t index = 1;
2787  for (const auto& cd : columns) {
2788  if (cd.columnName == name) {
2790  return index;
2791  }
2792  ++index;
2793  if (cd.columnType.is_geometry()) {
2794  index += cd.columnType.get_physical_cols();
2795  }
2796  }
2797  // Not found, return 0
2798  return 0;
2799 }
2800 
2801 size_t sort_column_index(const std::string& name,
2802  const std::list<ColumnDescriptor>& columns) {
2803  size_t index = 1;
2804  for (const auto& cd : columns) {
2805  if (boost::to_upper_copy<std::string>(cd.columnName) == name) {
2806  return index;
2807  }
2808  ++index;
2809  if (cd.columnType.is_geometry()) {
2810  index += cd.columnType.get_physical_cols();
2811  }
2812  }
2813  // Not found, return 0
2814  return 0;
2815 }
2816 
2817 void set_string_field(rapidjson::Value& obj,
2818  const std::string& field_name,
2819  const std::string& field_value,
2820  rapidjson::Document& document) {
2821  rapidjson::Value field_name_json_str;
2822  field_name_json_str.SetString(
2823  field_name.c_str(), field_name.size(), document.GetAllocator());
2824  rapidjson::Value field_value_json_str;
2825  field_value_json_str.SetString(
2826  field_value.c_str(), field_value.size(), document.GetAllocator());
2827  obj.AddMember(field_name_json_str, field_value_json_str, document.GetAllocator());
2828 }
2829 
2831  const ShardKeyDef* shard_key_def,
2832  const std::vector<SharedDictionaryDef>& shared_dict_defs) {
2833  rapidjson::Document document;
2834  auto& allocator = document.GetAllocator();
2835  rapidjson::Value arr(rapidjson::kArrayType);
2836  if (shard_key_def) {
2837  rapidjson::Value shard_key_obj(rapidjson::kObjectType);
2838  set_string_field(shard_key_obj, "type", "SHARD KEY", document);
2839  set_string_field(shard_key_obj, "name", shard_key_def->get_column(), document);
2840  arr.PushBack(shard_key_obj, allocator);
2841  }
2842  for (const auto& shared_dict_def : shared_dict_defs) {
2843  rapidjson::Value shared_dict_obj(rapidjson::kObjectType);
2844  set_string_field(shared_dict_obj, "type", "SHARED DICTIONARY", document);
2845  set_string_field(shared_dict_obj, "name", shared_dict_def.get_column(), document);
2847  shared_dict_obj, "foreign_table", shared_dict_def.get_foreign_table(), document);
2848  set_string_field(shared_dict_obj,
2849  "foreign_column",
2850  shared_dict_def.get_foreign_column(),
2851  document);
2852  arr.PushBack(shared_dict_obj, allocator);
2853  }
2854  rapidjson::StringBuffer buffer;
2855  rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
2856  arr.Accept(writer);
2857  return buffer.GetString();
2858 }
2859 
2860 template <typename LITERAL_TYPE,
2861  typename ASSIGNMENT,
2862  typename VALIDATE = DefaultValidate<LITERAL_TYPE>>
2863 decltype(auto) get_property_value(const NameValueAssign* p,
2864  ASSIGNMENT op,
2865  VALIDATE validate = VALIDATE()) {
2866  const auto val = validate(p);
2867  return op(val);
2868 }
2869 
2871  const NameValueAssign* p,
2872  const std::list<ColumnDescriptor>& columns) {
2873  auto assignment = [&td](const auto val) { td.storageType = val; };
2874  return get_property_value<StringLiteral, decltype(assignment), CaseSensitiveValidate>(
2875  p, assignment);
2876 }
2877 
2879  const NameValueAssign* p,
2880  const std::list<ColumnDescriptor>& columns) {
2881  return get_property_value<IntLiteral>(p, [&td](const auto val) {
2882  td.maxFragRows = validate_and_get_fragment_size(std::to_string(val));
2883  });
2884 }
2885 
2887  const NameValueAssign* p,
2888  const std::list<ColumnDescriptor>& columns) {
2889  return get_property_value<IntLiteral>(
2890  p, [&df_td](const auto val) { df_td.maxFragRows = val; });
2891 }
2892 
2894  const NameValueAssign* p,
2895  const std::list<ColumnDescriptor>& columns) {
2896  return get_property_value<IntLiteral>(p,
2897  [&td](const auto val) { td.maxChunkSize = val; });
2898 }
2899 
2901  DataframeTableDescriptor& df_td,
2902  const NameValueAssign* p,
2903  const std::list<ColumnDescriptor>& columns) {
2904  return get_property_value<IntLiteral>(
2905  p, [&df_td](const auto val) { df_td.maxChunkSize = val; });
2906 }
2907 
2909  const NameValueAssign* p,
2910  const std::list<ColumnDescriptor>& columns) {
2911  return get_property_value<StringLiteral>(p, [&df_td](const auto val) {
2912  if (val.size() != 1) {
2913  throw std::runtime_error("Length of DELIMITER must be equal to 1.");
2914  }
2915  df_td.delimiter = val;
2916  });
2917 }
2918 
2920  const NameValueAssign* p,
2921  const std::list<ColumnDescriptor>& columns) {
2922  return get_property_value<StringLiteral>(p, [&df_td](const auto val) {
2923  if (val == "FALSE") {
2924  df_td.hasHeader = false;
2925  } else if (val == "TRUE") {
2926  df_td.hasHeader = true;
2927  } else {
2928  throw std::runtime_error("Option HEADER support only 'true' or 'false' values.");
2929  }
2930  });
2931 }
2932 
2934  const NameValueAssign* p,
2935  const std::list<ColumnDescriptor>& columns) {
2936  return get_property_value<IntLiteral>(p,
2937  [&td](const auto val) { td.fragPageSize = val; });
2938 }
2940  const NameValueAssign* p,
2941  const std::list<ColumnDescriptor>& columns) {
2942  return get_property_value<IntLiteral>(p, [&td](const auto val) { td.maxRows = val; });
2943 }
2944 
2946  const NameValueAssign* p,
2947  const std::list<ColumnDescriptor>& columns) {
2948  return get_property_value<IntLiteral>(
2949  p, [&df_td](const auto val) { df_td.skipRows = val; });
2950 }
2951 
2953  const NameValueAssign* p,
2954  const std::list<ColumnDescriptor>& columns) {
2955  return get_property_value<StringLiteral>(p, [&td](const auto partitions_uc) {
2956  if (partitions_uc != "SHARDED" && partitions_uc != "REPLICATED") {
2957  throw std::runtime_error("PARTITIONS must be SHARDED or REPLICATED");
2958  }
2959  if (td.shardedColumnId != 0 && partitions_uc == "REPLICATED") {
2960  throw std::runtime_error(
2961  "A table cannot be sharded and replicated at the same time");
2962  };
2963  td.partitions = partitions_uc;
2964  });
2965 }
2967  const NameValueAssign* p,
2968  const std::list<ColumnDescriptor>& columns) {
2969  if (!td.shardedColumnId) {
2970  throw std::runtime_error("SHARD KEY must be defined.");
2971  }
2972  return get_property_value<IntLiteral>(p, [&td](const auto shard_count) {
2973  if (g_leaf_count && shard_count % g_leaf_count) {
2974  throw std::runtime_error(
2975  "SHARD_COUNT must be a multiple of the number of leaves in the cluster.");
2976  }
2977  td.nShards = g_leaf_count ? shard_count / g_leaf_count : shard_count;
2978  if (!td.shardedColumnId && !td.nShards) {
2979  throw std::runtime_error(
2980  "Must specify the number of shards through the SHARD_COUNT option");
2981  };
2982  });
2983 }
2984 
2985 decltype(auto) get_vacuum_def(TableDescriptor& td,
2986  const NameValueAssign* p,
2987  const std::list<ColumnDescriptor>& columns) {
2988  return get_property_value<StringLiteral>(p, [&td](const auto vacuum_uc) {
2989  if (vacuum_uc != "IMMEDIATE" && vacuum_uc != "DELAYED") {
2990  throw std::runtime_error("VACUUM must be IMMEDIATE or DELAYED");
2991  }
2992  td.hasDeletedCol = boost::iequals(vacuum_uc, "IMMEDIATE") ? false : true;
2993  });
2994 }
2995 
2997  const NameValueAssign* p,
2998  const std::list<ColumnDescriptor>& columns) {
2999  return get_property_value<StringLiteral>(p, [&td, &columns](const auto sort_upper) {
3000  td.sortedColumnId = sort_column_index(sort_upper, columns);
3001  if (!td.sortedColumnId) {
3002  throw std::runtime_error("Specified sort column " + sort_upper + " doesn't exist");
3003  }
3004  });
3005 }
3006 
3008  const NameValueAssign* p,
3009  const std::list<ColumnDescriptor>& columns) {
3010  auto assignment = [&td](const auto val) {
3011  td.maxRollbackEpochs =
3012  val < 0 ? -1 : val; // Anything < 0 means unlimited rollbacks. Note that 0
3013  // still means keeping a shadow copy of data/metdata
3014  // between epochs so bad writes can be rolled back
3015  };
3016  return get_property_value<IntLiteral, decltype(assignment), PositiveOrZeroValidate>(
3017  p, assignment);
3018 }
3019 
3020 static const std::map<const std::string, const TableDefFuncPtr> tableDefFuncMap = {
3021  {"fragment_size"s, get_frag_size_def},
3022  {"max_chunk_size"s, get_max_chunk_size_def},
3023  {"page_size"s, get_page_size_def},
3024  {"max_rows"s, get_max_rows_def},
3025  {"partitions"s, get_partions_def},
3026  {"shard_count"s, get_shard_count_def},
3027  {"vacuum"s, get_vacuum_def},
3028  {"sort_column"s, get_sort_column_def},
3029  {"storage_type"s, get_storage_type},
3030  {"max_rollback_epochs", get_max_rollback_epochs_def}};
3031 
3033  const std::unique_ptr<NameValueAssign>& p,
3034  const std::list<ColumnDescriptor>& columns) {
3035  const auto it = tableDefFuncMap.find(boost::to_lower_copy<std::string>(*p->get_name()));
3036  if (it == tableDefFuncMap.end()) {
3037  throw std::runtime_error(
3038  "Invalid CREATE TABLE option " + *p->get_name() +
3039  ". Should be FRAGMENT_SIZE, MAX_CHUNK_SIZE, PAGE_SIZE, MAX_ROLLBACK_EPOCHS, "
3040  "MAX_ROWS, "
3041  "PARTITIONS, SHARD_COUNT, VACUUM, SORT_COLUMN, STORAGE_TYPE.");
3042  }
3043  return it->second(td, p.get(), columns);
3044 }
3045 
3047  const std::unique_ptr<NameValueAssign>& p,
3048  const std::list<ColumnDescriptor>& columns) {
3049  const auto it = tableDefFuncMap.find(boost::to_lower_copy<std::string>(*p->get_name()));
3050  if (it == tableDefFuncMap.end()) {
3051  throw std::runtime_error(
3052  "Invalid CREATE TABLE AS option " + *p->get_name() +
3053  ". Should be FRAGMENT_SIZE, MAX_CHUNK_SIZE, PAGE_SIZE, MAX_ROLLBACK_EPOCHS, "
3054  "MAX_ROWS, "
3055  "PARTITIONS, SHARD_COUNT, VACUUM, SORT_COLUMN, STORAGE_TYPE, "
3056  "USE_SHARED_DICTIONARIES or FORCE_GEO_COMPRESSION.");
3057  }
3058  return it->second(td, p.get(), columns);
3059 }
3060 
3061 static const std::map<const std::string, const DataframeDefFuncPtr> dataframeDefFuncMap =
3062  {{"fragment_size"s, get_frag_size_dataframe_def},
3063  {"max_chunk_size"s, get_max_chunk_size_dataframe_def},
3064  {"skip_rows"s, get_skip_rows_def},
3065  {"delimiter"s, get_delimiter_def},
3066  {"header"s, get_header_def}};
3067 
3069  const std::unique_ptr<NameValueAssign>& p,
3070  const std::list<ColumnDescriptor>& columns) {
3071  const auto it =
3072  dataframeDefFuncMap.find(boost::to_lower_copy<std::string>(*p->get_name()));
3073  if (it == dataframeDefFuncMap.end()) {
3074  throw std::runtime_error(
3075  "Invalid CREATE DATAFRAME option " + *p->get_name() +
3076  ". Should be FRAGMENT_SIZE, MAX_CHUNK_SIZE, SKIP_ROWS, DELIMITER or HEADER.");
3077  }
3078  return it->second(df_td, p.get(), columns);
3079 }
3080 
3081 void parse_elements(const rapidjson::Value& payload,
3082  std::string element_name,
3083  std::string& table_name,
3084  std::list<std::unique_ptr<TableElement>>& table_element_list) {
3085  const auto elements = payload[element_name].GetArray();
3086  for (const auto& element : elements) {
3087  CHECK(element.IsObject());
3088  CHECK(element.HasMember("type"));
3089  if (json_str(element["type"]) == "SQL_COLUMN_DECLARATION") {
3090  auto col_def = column_from_json(element);
3091  table_element_list.emplace_back(std::move(col_def));
3092  } else if (json_str(element["type"]) == "SQL_COLUMN_CONSTRAINT") {
3093  CHECK(element.HasMember("name"));
3094  if (json_str(element["name"]) == "SHARD_KEY") {
3095  CHECK(element.HasMember("columns"));
3096  CHECK(element["columns"].IsArray());
3097  const auto& columns = element["columns"].GetArray();
3098  if (columns.Size() != size_t(1)) {
3099  throw std::runtime_error("Only one shard column is currently supported.");
3100  }
3101  auto shard_key_def = std::make_unique<ShardKeyDef>(json_str(columns[0]));
3102  table_element_list.emplace_back(std::move(shard_key_def));
3103  } else if (json_str(element["name"]) == "SHARED_DICT") {
3104  CHECK(element.HasMember("columns"));
3105  CHECK(element["columns"].IsArray());
3106  const auto& columns = element["columns"].GetArray();
3107  if (columns.Size() != size_t(1)) {
3108  throw std::runtime_error(
3109  R"(Only one column per shared dictionary entry is currently supported. Use multiple SHARED DICT statements to share dictionaries from multiple columns.)");
3110  }
3111  CHECK(element.HasMember("references") && element["references"].IsObject());
3112  const auto& references = element["references"].GetObject();
3113  std::string references_table_name;
3114  if (references.HasMember("table")) {
3115  references_table_name = json_str(references["table"]);
3116  } else {
3117  references_table_name = table_name;
3118  }
3119  CHECK(references.HasMember("column"));
3120 
3121  auto shared_dict_def = std::make_unique<SharedDictionaryDef>(
3122  json_str(columns[0]), references_table_name, json_str(references["column"]));
3123  table_element_list.emplace_back(std::move(shared_dict_def));
3124 
3125  } else {
3126  LOG(FATAL) << "Unsupported type for SQL_COLUMN_CONSTRAINT: "
3127  << json_str(element["name"]);
3128  }
3129  } else {
3130  LOG(FATAL) << "Unsupported element type for CREATE TABLE: "
3131  << element["type"].GetString();
3132  }
3133  }
3134 }
3135 } // namespace
3136 
3137 std::unique_ptr<ColumnDef> column_from_json(const rapidjson::Value& element) {
3138  CHECK(element.HasMember("name"));
3139  auto col_name = std::make_unique<std::string>(json_str(element["name"]));
3140  CHECK(element.HasMember("sqltype"));
3141  const auto sql_types = to_sql_type(json_str(element["sqltype"]));
3142 
3143  // decimal / numeric precision / scale
3144  int precision = -1;
3145  int scale = -1;
3146  if (element.HasMember("precision")) {
3147  precision = json_i64(element["precision"]);
3148  }
3149  if (element.HasMember("scale")) {
3150  scale = json_i64(element["scale"]);
3151  }
3152 
3153  std::optional<int64_t> array_size;
3154  if (element.HasMember("arraySize")) {
3155  // We do not yet support geo arrays
3156  array_size = json_i64(element["arraySize"]);
3157  }
3158  std::unique_ptr<SQLType> sql_type;
3159  if (element.HasMember("subtype")) {
3160  CHECK(element.HasMember("coordinateSystem"));
3161  const auto subtype_sql_types = to_sql_type(json_str(element["subtype"]));
3162  sql_type =
3163  std::make_unique<SQLType>(subtype_sql_types,
3164  static_cast<int>(sql_types),
3165  static_cast<int>(json_i64(element["coordinateSystem"])),
3166  false);
3167  } else if (precision > 0 && scale > 0) {
3168  sql_type = std::make_unique<SQLType>(sql_types,
3169  precision,
3170  scale,
3171  /*is_array=*/array_size.has_value(),
3172  array_size ? *array_size : -1);
3173  } else if (precision > 0) {
3174  sql_type = std::make_unique<SQLType>(sql_types,
3175  precision,
3176  0,
3177  /*is_array=*/array_size.has_value(),
3178  array_size ? *array_size : -1);
3179  } else {
3180  sql_type = std::make_unique<SQLType>(sql_types,
3181  /*is_array=*/array_size.has_value(),
3182  array_size ? *array_size : -1);
3183  }
3184  CHECK(sql_type);
3185 
3186  CHECK(element.HasMember("nullable"));
3187  const auto nullable = json_bool(element["nullable"]);
3188  std::unique_ptr<ColumnConstraintDef> constraint_def;
3189  StringLiteral* str_literal = nullptr;
3190  if (element.HasMember("default") && !element["default"].IsNull()) {
3191  std::string* defaultval = new std::string(json_str(element["default"]));
3192  boost::algorithm::trim_if(*defaultval, boost::is_any_of(" \"'`"));
3193  str_literal = new StringLiteral(defaultval);
3194  }
3195 
3196  constraint_def = std::make_unique<ColumnConstraintDef>(/*notnull=*/!nullable,
3197  /*unique=*/false,
3198  /*primarykey=*/false,
3199  /*defaultval=*/str_literal);
3200  std::unique_ptr<CompressDef> compress_def;
3201  if (element.HasMember("encodingType") && !element["encodingType"].IsNull()) {
3202  std::string encoding_type = json_str(element["encodingType"]);
3203  CHECK(element.HasMember("encodingSize"));
3204  auto encoding_name = std::make_unique<std::string>(json_str(element["encodingType"]));
3205  compress_def = std::make_unique<CompressDef>(encoding_name.release(),
3206  json_i64(element["encodingSize"]));
3207  }
3208  return std::make_unique<ColumnDef>(col_name.release(),
3209  sql_type.release(),
3210  compress_def ? compress_def.release() : nullptr,
3211  constraint_def ? constraint_def.release() : nullptr);
3212 }
3213 
3214 std::list<ColumnDef> get_columns_from_json_payload(const std::string& payload_key,
3215  const rapidjson::Value& payload) {
3216  std::list<ColumnDef> table_element_list;
3217  CHECK(payload[payload_key].IsArray());
3218 
3219  const auto elements = payload[payload_key].GetArray();
3220  for (const auto& element : elements) {
3221  CHECK(element.IsObject());
3222  CHECK(element.HasMember("type"));
3223  if (json_str(element["type"]) == "SQL_COLUMN_DECLARATION") {
3224  auto col_def = column_from_json(element);
3225  table_element_list.emplace_back(std::move(*col_def));
3226  } else {
3227  LOG(FATAL) << "Unsupported element type for ALTER TABLE: "
3228  << element["type"].GetString();
3229  }
3230  }
3231  return table_element_list;
3232 }
3233 
3234 CreateTableStmt::CreateTableStmt(const rapidjson::Value& payload) {
3235  CHECK(payload.HasMember("name"));
3236  table_ = std::make_unique<std::string>(json_str(payload["name"]));
3237  CHECK(payload.HasMember("elements"));
3238  CHECK(payload["elements"].IsArray());
3239 
3240  is_temporary_ = false;
3241  if (payload.HasMember("temporary")) {
3242  is_temporary_ = json_bool(payload["temporary"]);
3243  }
3244 
3245  if_not_exists_ = false;
3246  if (payload.HasMember("ifNotExists")) {
3247  if_not_exists_ = json_bool(payload["ifNotExists"]);
3248  }
3249 
3250  parse_elements(payload, "elements", *table_, table_element_list_);
3251 
3252  parse_options(payload, storage_options_);
3253 }
3254 
3256  TableDescriptor& td,
3257  std::list<ColumnDescriptor>& columns,
3258  std::vector<SharedDictionaryDef>& shared_dict_defs) {
3259  std::unordered_set<std::string> uc_col_names;
3260  const auto& catalog = session.getCatalog();
3261  const ShardKeyDef* shard_key_def{nullptr};
3262  for (auto& e : table_element_list_) {
3263  if (dynamic_cast<SharedDictionaryDef*>(e.get())) {
3264  auto shared_dict_def = static_cast<SharedDictionaryDef*>(e.get());
3266  this, shared_dict_def, columns, shared_dict_defs, catalog);
3267  shared_dict_defs.push_back(*shared_dict_def);
3268  continue;
3269  }
3270  if (dynamic_cast<ShardKeyDef*>(e.get())) {
3271  if (shard_key_def) {
3272  throw std::runtime_error("Specified more than one shard key");
3273  }
3274  shard_key_def = static_cast<const ShardKeyDef*>(e.get());
3275  continue;
3276  }
3277  if (!dynamic_cast<ColumnDef*>(e.get())) {
3278  throw std::runtime_error("Table constraints are not supported yet.");
3279  }
3280  ColumnDef* coldef = static_cast<ColumnDef*>(e.get());
3281  ColumnDescriptor cd;
3282  cd.columnName = *coldef->get_column_name();
3284  setColumnDescriptor(cd, coldef);
3285  columns.push_back(cd);
3286  }
3287 
3288  ddl_utils::set_default_table_attributes(*table_, td, columns.size());
3289 
3290  if (shard_key_def) {
3291  td.shardedColumnId = shard_column_index(shard_key_def->get_column(), columns);
3292  if (!td.shardedColumnId) {
3293  throw std::runtime_error("Specified shard column " + shard_key_def->get_column() +
3294  " doesn't exist");
3295  }
3296  }
3297  if (is_temporary_) {
3299  } else {
3301  }
3302  if (!storage_options_.empty()) {
3303  for (auto& p : storage_options_) {
3304  get_table_definitions(td, p, columns);
3305  }
3306  }
3307  if (td.shardedColumnId && !td.nShards) {
3308  throw std::runtime_error("SHARD_COUNT needs to be specified with SHARD_KEY.");
3309  }
3310  td.keyMetainfo = serialize_key_metainfo(shard_key_def, shared_dict_defs);
3311 }
3312 
3314  bool read_only_mode) {
3315  if (read_only_mode) {
3316  throw std::runtime_error("CREATE TABLE invalid in read only mode.");
3317  }
3318  auto& catalog = session.getCatalog();
3319 
3320  // Until we create the table we don't have a table descriptor to lock and guarantee
3321  // exclusive use of. Because of that we need a global write lock to make sure we have
3322  // exclusive access to the system for now.
3323  const auto execute_write_lock = legacylockmgr::getExecuteWriteLock();
3324 
3325  // check access privileges
3328  throw std::runtime_error("Table " + *table_ +
3329  " will not be created. User has no create privileges.");
3330  }
3331 
3332  if (!catalog.validateNonExistentTableOrView(*table_, if_not_exists_)) {
3333  return;
3334  }
3335 
3336  TableDescriptor td;
3337  std::list<ColumnDescriptor> columns;
3338  std::vector<SharedDictionaryDef> shared_dict_defs;
3339 
3340  executeDryRun(session, td, columns, shared_dict_defs);
3341  td.userId = session.get_currentUser().userId;
3342 
3343  catalog.createShardedTable(td, columns, shared_dict_defs);
3344  // TODO (max): It's transactionally unsafe, should be fixed: we may create object w/o
3345  // privileges
3346  SysCatalog::instance().createDBObject(
3347  session.get_currentUser(), td.tableName, TableDBObjectType, catalog);
3348 }
3349 
3350 CreateDataframeStmt::CreateDataframeStmt(const rapidjson::Value& payload) {
3351  CHECK(payload.HasMember("name"));
3352  table_ = std::make_unique<std::string>(json_str(payload["name"]));
3353 
3354  CHECK(payload.HasMember("elementList"));
3355  parse_elements(payload, "elementList", *table_, table_element_list_);
3356 
3357  CHECK(payload.HasMember("filePath"));
3358  std::string fs = json_str(payload["filePath"]);
3359  // strip leading/trailing spaces/quotes/single quotes
3360  boost::algorithm::trim_if(fs, boost::is_any_of(" \"'`"));
3361  filename_ = std::make_unique<std::string>(fs);
3362 
3363  parse_options(payload, storage_options_);
3364 }
3365 
3367  bool read_only_mode) {
3368  if (read_only_mode) {
3369  throw std::runtime_error("CREATE DATAFRAME invalid in read only mode.");
3370  }
3371  auto& catalog = session.getCatalog();
3372 
3373  const auto execute_write_lock = legacylockmgr::getExecuteWriteLock();
3374 
3375  // check access privileges
3378  throw std::runtime_error("Table " + *table_ +
3379  " will not be created. User has no create privileges.");
3380  }
3381 
3382  if (catalog.getMetadataForTable(*table_) != nullptr) {
3383  throw std::runtime_error("Table " + *table_ + " already exists.");
3384  }
3386  std::list<ColumnDescriptor> columns;
3387  std::vector<SharedDictionaryDef> shared_dict_defs;
3388 
3389  std::unordered_set<std::string> uc_col_names;
3390  for (auto& e : table_element_list_) {
3391  if (dynamic_cast<SharedDictionaryDef*>(e.get())) {
3392  auto shared_dict_def = static_cast<SharedDictionaryDef*>(e.get());
3394  this, shared_dict_def, columns, shared_dict_defs, catalog);
3395  shared_dict_defs.push_back(*shared_dict_def);
3396  continue;
3397  }
3398  if (!dynamic_cast<ColumnDef*>(e.get())) {
3399  throw std::runtime_error("Table constraints are not supported yet.");
3400  }
3401  ColumnDef* coldef = static_cast<ColumnDef*>(e.get());
3402  ColumnDescriptor cd;
3403  cd.columnName = *coldef->get_column_name();
3404  const auto uc_col_name = boost::to_upper_copy<std::string>(cd.columnName);
3405  const auto it_ok = uc_col_names.insert(uc_col_name);
3406  if (!it_ok.second) {
3407  throw std::runtime_error("Column '" + cd.columnName + "' defined more than once");
3408  }
3409  setColumnDescriptor(cd, coldef);
3410  columns.push_back(cd);
3411  }
3412 
3413  df_td.tableName = *table_;
3414  df_td.nColumns = columns.size();
3415  df_td.isView = false;
3416  df_td.fragmenter = nullptr;
3421  df_td.maxRows = DEFAULT_MAX_ROWS;
3423  if (!storage_options_.empty()) {
3424  for (auto& p : storage_options_) {
3425  get_dataframe_definitions(df_td, p, columns);
3426  }
3427  }
3428  df_td.keyMetainfo = serialize_key_metainfo(nullptr, shared_dict_defs);
3429  df_td.userId = session.get_currentUser().userId;
3430  df_td.storageType = *filename_;
3431 
3432  catalog.createShardedTable(df_td, columns, shared_dict_defs);
3433  // TODO (max): It's transactionally unsafe, should be fixed: we may create object w/o
3434  // privileges
3435  SysCatalog::instance().createDBObject(
3436  session.get_currentUser(), df_td.tableName, TableDBObjectType, catalog);
3437 }
3438 
3439 CreateModelStmt::CreateModelStmt(const rapidjson::Value& payload) {
3440  if (!g_enable_ml_functions) {
3441  throw std::runtime_error("Cannot create model. ML functions are disabled.");
3442  }
3443  CHECK(payload.HasMember("name"));
3444  const std::string model_type_str = json_str(payload["type"]);
3445  model_type_ = get_ml_model_type_from_str(model_type_str);
3446  model_name_ = json_str(payload["name"]);
3447  replace_ = false;
3448  if (payload.HasMember("replace")) {
3449  replace_ = json_bool(payload["replace"]);
3450  }
3451 
3452  if_not_exists_ = false;
3453  if (payload.HasMember("ifNotExists")) {
3454  if_not_exists_ = json_bool(payload["ifNotExists"]);
3455  }
3456 
3457  CHECK(payload.HasMember("query"));
3458  select_query_ = json_str(payload["query"]);
3459  std::regex newline_re("\\n");
3460  std::regex backtick_re("`");
3461  select_query_ = std::regex_replace(select_query_, newline_re, " ");
3462  select_query_ = std::regex_replace(select_query_, backtick_re, "");
3463 
3464  // No need to ensure trailing semicolon as we will wrap this select statement
3465  // in a CURSOR as input to the train model table function
3466  parse_options(payload, model_options_);
3467 }
3468 
3469 std::string write_model_params_to_json(const std::string& predicted,
3470  const std::vector<std::string>& features,
3471  const std::string& training_query,
3472  const double data_split_train_fraction,
3473  const double data_split_eval_fraction,
3474  const std::vector<int64_t>& feature_permutations) {
3475  // Create a RapidJSON document
3476  rapidjson::Document doc;
3477  doc.SetObject();
3478 
3479  // Add the fields to the document
3480  rapidjson::Value predicted_value;
3481  predicted_value.SetString(predicted.c_str(), predicted.length(), doc.GetAllocator());
3482  doc.AddMember("predicted", predicted_value, doc.GetAllocator());
3483 
3484  rapidjson::Value features_array(rapidjson::kArrayType);
3485  for (const auto& feature : features) {
3486  rapidjson::Value feature_value;
3487  feature_value.SetString(feature.c_str(), feature.length(), doc.GetAllocator());
3488  features_array.PushBack(feature_value, doc.GetAllocator());
3489  }
3490  doc.AddMember("features", features_array, doc.GetAllocator());
3491 
3492  rapidjson::Value training_query_value;
3493  training_query_value.SetString(
3494  training_query.c_str(), training_query.length(), doc.GetAllocator());
3495  doc.AddMember("training_query", training_query_value, doc.GetAllocator());
3496 
3497  rapidjson::Value data_split_train_fraction_key("data_split_train_fraction",
3498  doc.GetAllocator());
3499 
3500  rapidjson::Value data_split_train_fraction_value(data_split_train_fraction);
3501 
3502  doc.AddMember(
3503  data_split_train_fraction_key, data_split_train_fraction_value, doc.GetAllocator());
3504 
3505  rapidjson::Value data_split_eval_fraction_key("data_split_eval_fraction",
3506  doc.GetAllocator());
3507 
3508  rapidjson::Value data_split_eval_fraction_value(data_split_eval_fraction);
3509 
3510  doc.AddMember(
3511  data_split_eval_fraction_key, data_split_eval_fraction_value, doc.GetAllocator());
3512 
3513  rapidjson::Value feature_permutations_array(rapidjson::kArrayType);
3514  for (const auto& feature_permutation : feature_permutations) {
3515  rapidjson::Value feature_permutation_value;
3516  feature_permutation_value.SetInt64(feature_permutation);
3517  feature_permutations_array.PushBack(feature_permutation_value, doc.GetAllocator());
3518  }
3519  doc.AddMember("feature_permutations", feature_permutations_array, doc.GetAllocator());
3520 
3521  // Convert the document to a JSON string
3522  rapidjson::StringBuffer buffer;
3523  rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
3524  doc.Accept(writer);
3525 
3526  return buffer.GetString();
3527 }
3528 
3531  if (if_not_exists_) {
3532  // Returning true tells the caller we should just return early and silently (without
3533  // error)
3534  return true;
3535  }
3536  if (!replace_) {
3537  std::ostringstream error_oss;
3538  error_oss << "Model " << get_model_name() << " already exists.";
3539  throw std::runtime_error(error_oss.str());
3540  }
3541  }
3542  // Returning false tells the caller all is clear to proceed with the create model,
3543  // whether that means creating a new one or overwriting an existing model
3544  return false;
3545 }
3546 
3548  bool train_fraction_specified = false;
3549  bool eval_fraction_specified = false;
3550  for (auto& p : model_options_) {
3551  const auto key = boost::to_lower_copy<std::string>(*p->get_name());
3552  if (key == "train_fraction" || key == "data_split_train_fraction") {
3553  if (train_fraction_specified) {
3554  throw std::runtime_error(
3555  "Error parsing DATA_SPLIT_TRAIN_FRACTION value. "
3556  "Expected only one value.");
3557  }
3558  const DoubleLiteral* fp_literal =
3559  dynamic_cast<const DoubleLiteral*>(p->get_value());
3560  if (fp_literal != nullptr) {
3561  data_split_train_fraction_ = fp_literal->get_doubleval();
3562  if (data_split_train_fraction_ <= 0.0 || data_split_train_fraction_ > 1.0) {
3563  throw std::runtime_error(
3564  "Error parsing DATA_SPLIT_TRAIN_FRACTION value. "
3565  "Expected value between 0.0 and 1.0.");
3566  }
3567  } else {
3568  throw std::runtime_error(
3569  "Error parsing DATA_SPLIT_TRAIN_FRACTION value. "
3570  "Expected floating point value betwen 0.0 and 1.0.");
3571  }
3572  train_fraction_specified = true;
3573  continue;
3574  }
3575  if (key == "eval_fraction" || key == "data_split_eval_fraction") {
3576  if (eval_fraction_specified) {
3577  throw std::runtime_error(
3578  "Error parsing DATA_SPLIT_EVAL_FRACTION value. "
3579  "Expected only one value.");
3580  }
3581  const DoubleLiteral* fp_literal =
3582  dynamic_cast<const DoubleLiteral*>(p->get_value());
3583  if (fp_literal != nullptr) {
3584  data_split_eval_fraction_ = fp_literal->get_doubleval();
3585  if (data_split_eval_fraction_ < 0.0 || data_split_eval_fraction_ >= 1.0) {
3586  throw std::runtime_error(
3587  "Error parsing DATA_SPLIT_EVAL_FRACTION value. "
3588  "Expected value between 0.0 and 1.0.");
3589  }
3590  } else {
3591  throw std::runtime_error(
3592  "Error parsing DATA_SPLIT_EVAL_FRACTION value. "
3593  "Expected floating point value betwen 0.0 and 1.0.");
3594  }
3595  eval_fraction_specified = true;
3596  continue;
3597  }
3598  if (num_options_) {
3599  options_oss_ << ", ";
3600  }
3601  num_options_++;
3602  options_oss_ << key << " => ";
3603  const StringLiteral* str_literal = dynamic_cast<const StringLiteral*>(p->get_value());
3604  if (str_literal != nullptr) {
3605  options_oss_ << "'"
3606  << boost::to_lower_copy<std::string>(*str_literal->get_stringval())
3607  << "'";
3608  continue;
3609  }
3610  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
3611  if (int_literal != nullptr) {
3612  options_oss_ << int_literal->get_intval();
3613  continue;
3614  }
3615  const DoubleLiteral* fp_literal = dynamic_cast<const DoubleLiteral*>(p->get_value());
3616  if (fp_literal != nullptr) {
3617  options_oss_ << fp_literal->get_doubleval();
3618  continue;
3619  }
3620  throw std::runtime_error("Error parsing value.");
3621  }
3622 
3623  // First handle case where data_split_train_fraction was left to default value
3624  // and data_split_eval_fraction was specified. We shouldn't error here,
3625  // but rather set data_split_train_fraction to 1.0 - data_split_eval_fraction
3626  // Likewise if data_split_eval_fraction was left to default value and we have
3627  // a specified data_split_train_fraction, we should set data_split_eval_fraction
3628  // to 1.0 - data_split_train_fraction
3631  } else if (data_split_eval_fraction_ == 0.0 && data_split_train_fraction_ < 1.0) {
3633  }
3634 
3635  // If data_split_train_fraction was specified, and data_split_train_fraction +
3636  // data_split_eval_fraction > 1.0, then we should error
3638  throw std::runtime_error(
3639  "Error parsing DATA_SPLIT_TRAIN_FRACTION and DATA_SPLIT_EVAL_FRACTION values. "
3640  "Expected sum of values to be less than or equal to 1.0.");
3641  }
3642 }
3644  const std::shared_ptr<Catalog_Namespace::SessionInfo> session_ptr) {
3645  auto validate_query_state = query_state::QueryState::create(session_ptr, select_query_);
3646 
3647  LocalQueryConnector local_connector;
3648 
3649  auto validate_result = local_connector.query(
3650  validate_query_state->createQueryStateProxy(), select_query_, {}, true, false);
3651 
3652  auto column_descriptors_for_model_create =
3653  local_connector.getColumnDescriptors(validate_result, true);
3654 
3655  std::vector<size_t> categorical_feature_idxs;
3656  std::vector<size_t> numeric_feature_idxs;
3657  bool numeric_feature_seen = false;
3658  bool all_categorical_features_placed_first = true;
3659  bool model_has_predicted_var = is_regression_model(model_type_);
3660  model_feature_vars_.reserve(column_descriptors_for_model_create.size() -
3661  (model_has_predicted_var ? 1 : 0));
3662  bool is_predicted = model_has_predicted_var ? true : false;
3663  size_t feature_idx = 0;
3664  for (auto& cd : column_descriptors_for_model_create) {
3665  // Check to see if the projected column is an expression without a user-provided
3666  // alias, as we don't allow this.
3667  if (cd.columnName.rfind("EXPR$", 0) == 0) {
3668  throw std::runtime_error(
3669  "All projected expressions (i.e. col * 2) that are not column references (i.e. "
3670  "col) must be aliased.");
3671  }
3672  if (is_predicted) {
3673  model_predicted_var_ = cd.columnName;
3674  if (!cd.columnType.is_number()) {
3675  throw std::runtime_error(
3676  "Numeric predicted column expression should be first argument to CREATE "
3677  "MODEL.");
3678  }
3679  is_predicted = false;
3680  } else {
3681  if (cd.columnType.is_number()) {
3682  numeric_feature_idxs.emplace_back(feature_idx);
3683  numeric_feature_seen = true;
3684  } else if (cd.columnType.is_string()) {
3685  categorical_feature_idxs.emplace_back(feature_idx);
3686  if (numeric_feature_seen) {
3687  all_categorical_features_placed_first = false;
3688  }
3689  } else {
3690  throw std::runtime_error("Feature column expression should be numeric or TEXT.");
3691  }
3692  model_feature_vars_.emplace_back(cd.columnName);
3693  feature_idx++;
3694  }
3695  }
3696  auto modified_select_query = select_query_;
3697  if (!all_categorical_features_placed_first) {
3698  std::ostringstream modified_query_oss;
3699  modified_query_oss << "SELECT ";
3700  if (model_has_predicted_var) {
3701  modified_query_oss << model_predicted_var_ << ", ";
3702  }
3703  for (auto categorical_feature_idx : categorical_feature_idxs) {
3704  modified_query_oss << model_feature_vars_[categorical_feature_idx] << ", ";
3705  feature_permutations_.emplace_back(static_cast<int64_t>(categorical_feature_idx));
3706  }
3707  for (auto numeric_feature_idx : numeric_feature_idxs) {
3708  modified_query_oss << model_feature_vars_[numeric_feature_idx];
3709  feature_permutations_.emplace_back(static_cast<int64_t>(numeric_feature_idx));
3710  if (numeric_feature_idx != numeric_feature_idxs.back()) {
3711  modified_query_oss << ", ";
3712  }
3713  }
3714  modified_query_oss << " FROM (" << modified_select_query << ")";
3715  modified_select_query = modified_query_oss.str();
3716  }
3717 
3718  if (data_split_train_fraction_ < 1.0) {
3719  std::ostringstream modified_query_oss;
3720  if (all_categorical_features_placed_first) {
3721  modified_query_oss << "SELECT * FROM (" << modified_select_query << ")";
3722  } else {
3723  modified_query_oss << modified_select_query;
3724  }
3725  modified_query_oss << " WHERE SAMPLE_RATIO(" << data_split_train_fraction_ << ")";
3726  modified_select_query = modified_query_oss.str();
3727  }
3728  return modified_select_query;
3729 }
3730 
3732  if (check_model_exists()) {
3733  // Will return true if model exists and if_not_exists_ is true, in this
3734  // case we should return only
3735  return;
3736  }
3737 
3739 
3740  auto session_copy = session;
3741  auto session_ptr = std::shared_ptr<Catalog_Namespace::SessionInfo>(
3742  &session_copy, boost::null_deleter());
3743 
3744  // We need to do various manipulations on the raw select query, such
3745  // as adding in any sampling or feature permutation logic. All of this
3746  // work is encapsulated in build_model_query
3747 
3748  const auto modified_select_query = build_model_query(session_ptr);
3749 
3750  // We have to base64 encode the model metadata because depending on the query,
3751  // the training data can have single quotes that trips up the parsing of the combined
3752  // select query with this metadata embedded.
3753 
3754  // This is just a temporary workaround until we store this info in the Catalog
3755  // rather than in the stored model pointer itself (and have to pass the metadata
3756  // down through the table function call)
3757  const auto model_metadata =
3760  select_query_,
3764  if (num_options_) {
3765  // The options string does not have a trailing comma,
3766  // so add it
3767  options_oss_ << ", ";
3768  }
3769  options_oss_ << "model_metadata => '" << model_metadata << "'";
3770 
3771  const std::string options_str = options_oss_.str();
3772 
3773  const std::string model_train_func = get_ml_model_type_str(model_type_) + "_FIT";
3774 
3775  std::ostringstream model_query_oss;
3776  model_query_oss << "SELECT * FROM TABLE(" << model_train_func << "(model_name=>'"
3777  << get_model_name() << "', data=>CURSOR(" << modified_select_query
3778  << ")";
3779  model_query_oss << ", " << options_str;
3780  model_query_oss << "))";
3781 
3782  std::string wrapped_model_query = model_query_oss.str();
3783  auto query_state = query_state::QueryState::create(session_ptr, wrapped_model_query);
3784  // Don't need result back from query, as the query will create the model
3785  LocalQueryConnector local_connector;
3786  local_connector.query(
3787  query_state->createQueryStateProxy(), wrapped_model_query, {}, false);
3788 }
3789 
3791  bool read_only_mode) {
3792  if (read_only_mode) {
3793  throw std::runtime_error("CREATE MODEL invalid in read only mode.");
3794  }
3795 
3796  try {
3797  train_model(session);
3798  } catch (std::exception& e) {
3799  std::ostringstream error_oss;
3800  // Error messages from table functions come back like this:
3801  // Error executing table function: MLTableFunctions.hpp:269 linear_reg_fit_impl: No
3802  // rows exist in training input. Training input must at least contain 1 row.
3803 
3804  // We want to take everything after the function name, so we will search for the
3805  // third colon.
3806  // Todo(todd): Look at making this less hacky by setting a mode for the table
3807  // function that will return only the core error string and not the preprending
3808  // metadata
3809 
3810  auto get_error_substring = [](const std::string& message) -> std::string {
3811  size_t colon_position = std::string::npos;
3812  for (int i = 0; i < 3; ++i) {
3813  colon_position = message.find(':', colon_position + 1);
3814  if (colon_position == std::string::npos) {
3815  return message;
3816  }
3817  }
3818 
3819  if (colon_position + 2 >= message.length()) {
3820  return message;
3821  }
3822  return message.substr(colon_position + 2);
3823  };
3824 
3825  const auto error_substr = get_error_substring(e.what());
3826 
3827  error_oss << "Could not create model " << model_name_ << ". " << error_substr;
3828  throw std::runtime_error(error_oss.str());
3829  }
3830 }
3831 
3832 DropModelStmt::DropModelStmt(const rapidjson::Value& payload) {
3833  CHECK(payload.HasMember("modelName"));
3834  model_name_ = json_str(payload["modelName"]);
3835 
3836  if_exists_ = false;
3837  if (payload.HasMember("ifExists")) {
3838  if_exists_ = json_bool(payload["ifExists"]);
3839  }
3840 }
3841 
3843  bool read_only_mode) {
3844  if (read_only_mode) {
3845  throw std::runtime_error("DROP MODEL invalid in read only mode.");
3846  }
3847  try {
3849  } catch (std::runtime_error& e) {
3850  if (!if_exists_) {
3851  throw e;
3852  }
3853  // If NOT EXISTS is set, ignore the error
3854  }
3855 }
3856 
3857 std::shared_ptr<ResultSet> getResultSet(QueryStateProxy query_state_proxy,
3858  const std::string select_stmt,
3859  std::vector<TargetMetaInfo>& targets,
3860  bool validate_only = false,
3861  std::vector<size_t> outer_fragment_indices = {},
3862  bool allow_interrupt = false) {
3863  auto const session = query_state_proxy->getConstSessionInfo();
3864  auto& catalog = session->getCatalog();
3865 
3867 #ifdef HAVE_CUDA
3868  const auto device_type = session->get_executor_device_type();
3869 #else
3870  const auto device_type = ExecutorDeviceType::CPU;
3871 #endif // HAVE_CUDA
3872  auto calcite_mgr = catalog.getCalciteMgr();
3873 
3874  // TODO MAT this should actually get the global or the session parameter for
3875  // view optimization
3876  const auto calciteQueryParsingOption =
3877  calcite_mgr->getCalciteQueryParsingOption(true, false, true, false);
3878  const auto calciteOptimizationOption = calcite_mgr->getCalciteOptimizationOption(
3879  false,
3881  {},
3883  const auto query_ra = calcite_mgr
3884  ->process(query_state_proxy,
3885  pg_shim(select_stmt),
3886  calciteQueryParsingOption,
3887  calciteOptimizationOption)
3888  .plan_result;
3889  RelAlgExecutor ra_executor(
3890  executor.get(), query_ra, query_state_proxy->shared_from_this());
3892  // TODO(adb): Need a better method of dropping constants into this ExecutionOptions
3893  // struct
3894  ExecutionOptions eo = {false,
3895  false,
3896  true,
3897  false,
3898  true,
3899  false,
3900  false,
3901  validate_only,
3902  false,
3903  10000,
3904  false,
3905  false,
3906  1000,
3907  allow_interrupt,
3911  false,
3912  std::numeric_limits<size_t>::max(),
3914  outer_fragment_indices};
3915 
3916  ExecutionResult result{std::make_shared<ResultSet>(std::vector<TargetInfo>{},
3919  nullptr,
3920  0,
3921  0),
3922  {}};
3923  result = ra_executor.executeRelAlgQuery(co, eo, false, false, nullptr);
3924  targets = result.getTargetsMeta();
3925 
3926  return result.getRows();
3927 }
3928 
3930  std::string& sql_query_string) {
3931  auto const session = query_state_proxy->getConstSessionInfo();
3932  auto& catalog = session->getCatalog();
3933 
3935 #ifdef HAVE_CUDA
3936  const auto device_type = session->get_executor_device_type();
3937 #else
3938  const auto device_type = ExecutorDeviceType::CPU;
3939 #endif // HAVE_CUDA
3940  auto calcite_mgr = catalog.getCalciteMgr();
3941 
3942  // TODO MAT this should actually get the global or the session parameter for
3943  // view optimization
3944  const auto calciteQueryParsingOption =
3945  calcite_mgr->getCalciteQueryParsingOption(true, false, true, false);
3946  const auto calciteOptimizationOption = calcite_mgr->getCalciteOptimizationOption(
3947  false,
3949  {},
3951  const auto query_ra = calcite_mgr
3952  ->process(query_state_proxy,
3953  pg_shim(sql_query_string),
3954  calciteQueryParsingOption,
3955  calciteOptimizationOption)
3956  .plan_result;
3957  RelAlgExecutor ra_executor(executor.get(), query_ra);
3958  CompilationOptions co = {device_type, true, ExecutorOptLevel::Default, false};
3959  // TODO(adb): Need a better method of dropping constants into this ExecutionOptions
3960  // struct
3961  ExecutionOptions eo = {false,
3962  false,
3963  true,
3964  false,
3965  true,
3966  false,
3967  false,
3968  false,
3969  false,
3970  10000,
3971  false,
3972  false,
3973  0.9,
3974  false,
3975  false};
3976  return ra_executor.getOuterFragmentCount(co, eo);
3977 }
3978 
3980  std::string& sql_query_string,
3981  std::vector<size_t> outer_frag_indices,
3982  bool validate_only,
3983  bool allow_interrupt) {
3984  // TODO(PS): Should we be using the shimmed query in getResultSet?
3985  std::string pg_shimmed_select_query = pg_shim(sql_query_string);
3986 
3987  std::vector<TargetMetaInfo> target_metainfos;
3989  auto const session = query_state_proxy->getConstSessionInfo();
3990  auto query_session = session ? session->get_session_id() : "";
3991  auto query_submitted_time = query_state_proxy->getQuerySubmittedTime();
3992  if (allow_interrupt && !validate_only && !query_session.empty()) {
3993  executor->enrollQuerySession(query_session,
3994  sql_query_string,
3995  query_submitted_time,
3997  QuerySessionStatus::QueryStatus::PENDING_EXECUTOR);
3998  }
3999  auto result_rows = getResultSet(query_state_proxy,
4000  sql_query_string,
4001  target_metainfos,
4002  validate_only,
4003  outer_frag_indices,
4004  allow_interrupt);
4005  AggregatedResult res = {result_rows, target_metainfos};
4006  return res;
4007 }
4008 
4009 std::vector<AggregatedResult> LocalQueryConnector::query(
4010  QueryStateProxy query_state_proxy,
4011  std::string& sql_query_string,
4012  std::vector<size_t> outer_frag_indices,
4013  bool allow_interrupt) {
4014  auto res = query(
4015  query_state_proxy, sql_query_string, outer_frag_indices, false, allow_interrupt);
4016  return {res};
4017 }
4018 
4019 std::list<ColumnDescriptor> LocalQueryConnector::getColumnDescriptors(
4021  bool for_create) {
4022  std::list<ColumnDescriptor> column_descriptors;
4023  std::list<ColumnDescriptor> column_descriptors_for_create;
4024 
4025  int rowid_suffix = 0;
4026  for (const auto& target_metainfo : result.targets_meta) {
4027  ColumnDescriptor cd;
4028  cd.columnName = target_metainfo.get_resname();
4029  if (cd.columnName == "rowid") {
4030  cd.columnName += std::to_string(rowid_suffix++);
4031  }
4032  cd.columnType = target_metainfo.get_physical_type_info();
4033 
4034  ColumnDescriptor cd_for_create = cd;
4035 
4037  // we need to reset the comp param (as this points to the actual dictionary)
4038  if (cd.columnType.is_array()) {
4039  // for dict encoded arrays, it is always 4 bytes
4040  cd_for_create.columnType.set_comp_param(32);
4041  } else {
4042  cd_for_create.columnType.set_comp_param(cd.columnType.get_size() * 8);
4043  }
4044  }
4045 
4046  if (cd.columnType.is_date() && !cd.columnType.is_date_in_days()) {
4047  // default to kENCODING_DATE_IN_DAYS encoding
4049  cd_for_create.columnType.set_comp_param(0);
4050  }
4051 
4052  column_descriptors_for_create.push_back(cd_for_create);
4053  column_descriptors.push_back(cd);
4054  }
4055 
4056  if (for_create) {
4057  return column_descriptors_for_create;
4058  }
4059 
4060  return column_descriptors;
4061 }
4062 
4064  const rapidjson::Value& payload) {
4065  CHECK(payload.HasMember("name"));
4066  table_name_ = json_str(payload["name"]);
4067 
4068  CHECK(payload.HasMember("query"));
4069  select_query_ = json_str(payload["query"]);
4070 
4071  boost::replace_all(select_query_, "\n", " ");
4072  select_query_ = "(" + select_query_ + ")";
4073 
4074  if (payload.HasMember("columns")) {
4075  CHECK(payload["columns"].IsArray());
4076  for (auto& column : payload["columns"].GetArray()) {
4077  std::string s = json_str(column);
4078  column_list_.emplace_back(std::unique_ptr<std::string>(new std::string(s)));
4079  }
4080  }
4081 }
4082 
4084  const TableDescriptor* td,
4085  bool validate_table,
4086  bool for_CTAS) {
4087  auto const session = query_state_proxy->getConstSessionInfo();
4088  auto& catalog = session->getCatalog();
4090  bool populate_table = false;
4091 
4092  if (leafs_connector_) {
4093  populate_table = true;
4094  } else {
4095  leafs_connector_ = std::make_unique<LocalQueryConnector>();
4096  if (!g_cluster) {
4097  populate_table = true;
4098  }
4099  }
4100 
4101  auto get_target_column_descriptors = [this, &catalog](const TableDescriptor* td) {
4102  std::vector<const ColumnDescriptor*> target_column_descriptors;
4103  if (column_list_.empty()) {
4104  auto list = catalog.getAllColumnMetadataForTable(td->tableId, false, false, false);
4105  target_column_descriptors = {std::begin(list), std::end(list)};
4106  } else {
4107  for (auto& c : column_list_) {
4108  const ColumnDescriptor* cd = catalog.getMetadataForColumn(td->tableId, *c);
4109  if (cd == nullptr) {
4110  throw std::runtime_error("Column " + *c + " does not exist.");
4111  }
4112  target_column_descriptors.push_back(cd);
4113  }
4114  }
4115 
4116  return target_column_descriptors;
4117  };
4118 
4119  bool is_temporary = table_is_temporary(td);
4120 
4121  if (validate_table) {
4122  // check access privileges
4123  if (!td) {
4124  throw std::runtime_error("Table " + table_name_ + " does not exist.");
4125  }
4126  if (td->isView) {
4127  throw std::runtime_error("Insert to views is not supported yet.");
4128  }
4129 
4130  if (!session->checkDBAccessPrivileges(DBObjectType::TableDBObjectType,
4132  table_name_)) {
4133  throw std::runtime_error("User has no insert privileges on " + table_name_ + ".");
4134  }
4135 
4136  // only validate the select query so we get the target types
4137  // correctly, but do not populate the result set
4138  LocalQueryConnector local_connector;
4139  auto result = local_connector.query(query_state_proxy, select_query_, {}, true, true);
4140  auto source_column_descriptors = local_connector.getColumnDescriptors(result, false);
4141 
4142  std::vector<const ColumnDescriptor*> target_column_descriptors =
4143  get_target_column_descriptors(td);
4144 
4145  if (source_column_descriptors.size() != target_column_descriptors.size()) {
4146  throw std::runtime_error("The number of source and target columns does not match.");
4147  }
4148 
4149  for (int i = 0; i < source_column_descriptors.size(); i++) {
4150  const ColumnDescriptor* source_cd =
4151  &(*std::next(source_column_descriptors.begin(), i));
4152  const ColumnDescriptor* target_cd = target_column_descriptors.at(i);
4153 
4154  if (source_cd->columnType.get_type() != target_cd->columnType.get_type()) {
4155  auto type_cannot_be_cast = [](const auto& col_type) {
4156  return (col_type.is_time() || col_type.is_geometry() || col_type.is_array() ||
4157  col_type.is_boolean());
4158  };
4159 
4160  if (type_cannot_be_cast(source_cd->columnType) ||
4161  type_cannot_be_cast(target_cd->columnType)) {
4162  throw std::runtime_error("Source '" + source_cd->columnName + " " +
4163  source_cd->columnType.get_type_name() +
4164  "' and target '" + target_cd->columnName + " " +
4165  target_cd->columnType.get_type_name() +
4166  "' column types do not match.");
4167  }
4168  }
4169  if (source_cd->columnType.is_array()) {
4170  if (source_cd->columnType.get_subtype() != target_cd->columnType.get_subtype()) {
4171  throw std::runtime_error("Source '" + source_cd->columnName + " " +
4172  source_cd->columnType.get_type_name() +
4173  "' and target '" + target_cd->columnName + " " +
4174  target_cd->columnType.get_type_name() +
4175  "' array column element types do not match.");
4176  }
4177  }
4178 
4179  if (target_cd->columnType.is_string() && !source_cd->columnType.is_string()) {
4180  throw std::runtime_error("Source '" + source_cd->columnName + " " +
4181  source_cd->columnType.get_type_name() +
4182  "' and target '" + target_cd->columnName + " " +
4183  target_cd->columnType.get_type_name() +
4184  "' column types do not match.");
4185  }
4186 
4187  if (source_cd->columnType.is_decimal() ||
4188  source_cd->columnType.get_elem_type().is_decimal()) {
4189  SQLTypeInfo sourceType = source_cd->columnType;
4190  SQLTypeInfo targetType = target_cd->columnType;
4191 
4192  if (source_cd->columnType.is_array()) {
4193  sourceType = source_cd->columnType.get_elem_type();
4194  targetType = target_cd->columnType.get_elem_type();
4195  }
4196 
4197  if (sourceType.get_scale() != targetType.get_scale()) {
4198  throw std::runtime_error("Source '" + source_cd->columnName + " " +
4199  source_cd->columnType.get_type_name() +
4200  "' and target '" + target_cd->columnName + " " +
4201  target_cd->columnType.get_type_name() +
4202  "' decimal columns scales do not match.");
4203  }
4204  }
4205 
4206  if (source_cd->columnType.is_string()) {
4207  if (!target_cd->columnType.is_string()) {
4208  throw std::runtime_error("Source '" + source_cd->columnName + " " +
4209  source_cd->columnType.get_type_name() +
4210  "' and target '" + target_cd->columnName + " " +
4211  target_cd->columnType.get_type_name() +
4212  "' column types do not match.");
4213  }
4214  if (source_cd->columnType.get_compression() !=
4215  target_cd->columnType.get_compression()) {
4216  throw std::runtime_error("Source '" + source_cd->columnName + " " +
4217  source_cd->columnType.get_type_name() +
4218  "' and target '" + target_cd->columnName + " " +
4219  target_cd->columnType.get_type_name() +
4220  "' columns string encodings do not match.");
4221  }
4222  }
4223 
4224  if (source_cd->columnType.is_timestamp() && target_cd->columnType.is_timestamp()) {
4225  if (source_cd->columnType.get_dimension() !=
4226  target_cd->columnType.get_dimension()) {
4227  throw std::runtime_error("Source '" + source_cd->columnName + " " +
4228  source_cd->columnType.get_type_name() +
4229  "' and target '" + target_cd->columnName + " " +
4230  target_cd->columnType.get_type_name() +
4231  "' timestamp column precisions do not match.");
4232  }
4233  }
4234 
4235  if (!source_cd->columnType.is_string() && !source_cd->columnType.is_geometry() &&
4236  !source_cd->columnType.is_integer() && !source_cd->columnType.is_decimal() &&
4237  !source_cd->columnType.is_date() && !source_cd->columnType.is_time() &&
4238  !source_cd->columnType.is_timestamp() &&
4239  source_cd->columnType.get_size() > target_cd->columnType.get_size()) {
4240  throw std::runtime_error("Source '" + source_cd->columnName + " " +
4241  source_cd->columnType.get_type_name() +
4242  "' and target '" + target_cd->columnName + " " +
4243  target_cd->columnType.get_type_name() +
4244  "' column encoding sizes do not match.");
4245  }
4246  }
4247  }
4248 
4249  if (!populate_table) {
4250  return;
4251  }
4252 
4253  int64_t total_row_count = 0;
4254  int64_t total_source_query_time_ms = 0;
4255  int64_t total_target_value_translate_time_ms = 0;
4256  int64_t total_data_load_time_ms = 0;
4257 
4259  auto target_column_descriptors = get_target_column_descriptors(td);
4260  auto outer_frag_count =
4261  leafs_connector_->getOuterFragmentCount(query_state_proxy, select_query_);
4262 
4263  size_t outer_frag_end = outer_frag_count == 0 ? 1 : outer_frag_count;
4264  auto query_session = session ? session->get_session_id() : "";
4266  std::string work_type_str = for_CTAS ? "CTAS" : "ITAS";
4267  try {
4268  for (size_t outer_frag_idx = 0; outer_frag_idx < outer_frag_end; outer_frag_idx++) {
4269  std::vector<size_t> allowed_outer_fragment_indices;
4270 
4271  if (outer_frag_count) {
4272  allowed_outer_fragment_indices.push_back(outer_frag_idx);
4273  }
4274 
4275  const auto query_clock_begin = timer_start();
4276  std::vector<AggregatedResult> query_results =
4277  leafs_connector_->query(query_state_proxy,
4278  select_query_,
4279  allowed_outer_fragment_indices,
4281  total_source_query_time_ms += timer_stop(query_clock_begin);
4282 
4283  auto start_time = query_state_proxy->getQuerySubmittedTime();
4284  auto query_str = "INSERT_DATA for " + work_type_str;
4286  // In the clean-up phase of the query execution for collecting aggregated result
4287  // of SELECT query, we remove its query session info, so we need to enroll the
4288  // session info again
4289  executor->enrollQuerySession(query_session,
4290  query_str,
4291  start_time,
4293  QuerySessionStatus::QueryStatus::RUNNING_IMPORTER);
4294  }
4295 
4296  ScopeGuard clearInterruptStatus = [executor, &query_session, &start_time] {
4297  // this data population is non-kernel operation, so we manually cleanup
4298  // the query session info in the cleanup phase
4300  executor->clearQuerySessionStatus(query_session, start_time);
4301  }
4302  };
4303 
4304  for (auto& res : query_results) {
4305  if (UNLIKELY(check_session_interrupted(query_session, executor))) {
4306  throw std::runtime_error(
4307  "Query execution has been interrupted while performing " + work_type_str);
4308  }
4309  auto& result_rows = res.rs;
4310  result_rows->setGeoReturnType(ResultSet::GeoReturnType::GeoTargetValue);
4311  const auto num_rows = result_rows->rowCount();
4312 
4313  if (0 == num_rows) {
4314  continue;
4315  }
4316 
4317  total_row_count += num_rows;
4318 
4319  size_t leaf_count = leafs_connector_->leafCount();
4320 
4321  // ensure that at least 1 row is processed per block up to a maximum of 65536 rows
4322  const size_t rows_per_block =
4323  std::max(std::min(num_rows / leaf_count, size_t(64 * 1024)), size_t(1));
4324 
4325  std::vector<std::unique_ptr<TargetValueConverter>> value_converters;
4326 
4328 
4329  const int num_worker_threads = std::thread::hardware_concurrency();
4330 
4331  std::vector<size_t> thread_start_idx(num_worker_threads),
4332  thread_end_idx(num_worker_threads);
4333  bool can_go_parallel = !result_rows->isTruncated() && rows_per_block > 20000;
4334 
4335  std::atomic<size_t> crt_row_idx{0};
4336 
4337  auto do_work = [&result_rows, &value_converters, &crt_row_idx](
4338  const size_t idx,
4339  const size_t block_end,
4340  const size_t num_cols,
4341  const size_t thread_id,
4342  bool& stop_convert) {
4343  const auto result_row = result_rows->getRowAtNoTranslations(idx);
4344  if (!result_row.empty()) {
4345  size_t target_row = crt_row_idx.fetch_add(1);
4346  if (target_row >= block_end) {
4347  stop_convert = true;
4348  return;
4349  }
4350  for (unsigned int col = 0; col < num_cols; col++) {
4351  const auto& mapd_variant = result_row[col];
4352  value_converters[col]->convertToColumnarFormat(target_row, &mapd_variant);
4353  }
4354  }
4355  };
4356 
4357  auto convert_function = [&thread_start_idx,
4358  &thread_end_idx,
4359  &value_converters,
4360  &executor,
4361  &query_session,
4362  &work_type_str,
4363  &do_work](const int thread_id, const size_t block_end) {
4364  const int num_cols = value_converters.size();
4365  const size_t start = thread_start_idx[thread_id];
4366  const size_t end = thread_end_idx[thread_id];
4367  size_t idx = 0;
4368  bool stop_convert = false;
4370  size_t local_idx = 0;
4371  for (idx = start; idx < end; ++idx, ++local_idx) {
4372  if (UNLIKELY((local_idx & 0xFFFF) == 0 &&
4373  check_session_interrupted(query_session, executor))) {
4374  throw std::runtime_error(
4375  "Query execution has been interrupted while performing " +
4376  work_type_str);
4377  }
4378  do_work(idx, block_end, num_cols, thread_id, stop_convert);
4379  if (stop_convert) {
4380  break;
4381  }
4382  }
4383  } else {
4384  for (idx = start; idx < end; ++idx) {
4385  do_work(idx, block_end, num_cols, thread_id, stop_convert);
4386  if (stop_convert) {
4387  break;
4388  }
4389  }
4390  }
4391  thread_start_idx[thread_id] = idx;
4392  };
4393 
4394  auto single_threaded_value_converter =
4395  [&crt_row_idx, &value_converters, &result_rows](const size_t idx,
4396  const size_t block_end,
4397  const size_t num_cols,
4398  bool& stop_convert) {
4399  size_t target_row = crt_row_idx.fetch_add(1);
4400  if (target_row >= block_end) {
4401  stop_convert = true;
4402  return;
4403  }
4404  const auto result_row = result_rows->getNextRow(false, false);
4405  CHECK(!result_row.empty());
4406  for (unsigned int col = 0; col < num_cols; col++) {
4407  const auto& mapd_variant = result_row[col];
4408  value_converters[col]->convertToColumnarFormat(target_row, &mapd_variant);
4409  }
4410  };
4411 
4412  auto single_threaded_convert_function = [&value_converters,
4413  &thread_start_idx,
4414  &thread_end_idx,
4415  &executor,
4416  &query_session,
4417  &work_type_str,
4418  &single_threaded_value_converter](
4419  const int thread_id,
4420  const size_t block_end) {
4421  const int num_cols = value_converters.size();
4422  const size_t start = thread_start_idx[thread_id];
4423  const size_t end = thread_end_idx[thread_id];
4424  size_t idx = 0;
4425  bool stop_convert = false;
4427  size_t local_idx = 0;
4428  for (idx = start; idx < end; ++idx, ++local_idx) {
4429  if (UNLIKELY((local_idx & 0xFFFF) == 0 &&
4430  check_session_interrupted(query_session, executor))) {
4431  throw std::runtime_error(
4432  "Query execution has been interrupted while performing " +
4433  work_type_str);
4434  }
4435  single_threaded_value_converter(idx, block_end, num_cols, stop_convert);
4436  if (stop_convert) {
4437  break;
4438  }
4439  }
4440  } else {
4441  for (idx = start; idx < end; ++idx) {
4442  single_threaded_value_converter(idx, end, num_cols, stop_convert);
4443  if (stop_convert) {
4444  break;
4445  }
4446  }
4447  }
4448  thread_start_idx[thread_id] = idx;
4449  };
4450 
4451  if (can_go_parallel) {
4452  const size_t entry_count = result_rows->entryCount();
4453  for (size_t
4454  i = 0,
4455  start_entry = 0,
4456  stride = (entry_count + num_worker_threads - 1) / num_worker_threads;
4457  i < num_worker_threads && start_entry < entry_count;
4458  ++i, start_entry += stride) {
4459  const auto end_entry = std::min(start_entry + stride, entry_count);
4460  thread_start_idx[i] = start_entry;
4461  thread_end_idx[i] = end_entry;
4462  }
4463  } else {
4464  thread_start_idx[0] = 0;
4465  thread_end_idx[0] = result_rows->entryCount();
4466  }
4467 
4468  for (size_t block_start = 0; block_start < num_rows;
4469  block_start += rows_per_block) {
4470  const auto num_rows_this_itr = block_start + rows_per_block < num_rows
4471  ? rows_per_block
4472  : num_rows - block_start;
4473  crt_row_idx = 0; // reset block tracker
4474  value_converters.clear();
4475  int colNum = 0;
4476  for (const auto targetDescriptor : target_column_descriptors) {
4477  auto sourceDataMetaInfo = res.targets_meta[colNum++];
4479  num_rows_this_itr,
4480  sourceDataMetaInfo,
4481  targetDescriptor,
4482  catalog,
4483  targetDescriptor->columnType,
4484  !targetDescriptor->columnType.get_notnull(),
4485  result_rows->getRowSetMemOwner()->getLiteralStringDictProxy(),
4487  sourceDataMetaInfo.get_type_info().is_dict_encoded_string()
4488  ? executor->getStringDictionaryProxy(
4489  sourceDataMetaInfo.get_type_info().getStringDictKey(),
4490  result_rows->getRowSetMemOwner(),
4491  true)
4492  : nullptr};
4493  auto converter = factory.create(param);
4494  value_converters.push_back(std::move(converter));
4495  }
4496 
4497  const auto translate_clock_begin = timer_start();
4498  if (can_go_parallel) {
4499  std::vector<std::future<void>> worker_threads;
4500  for (int i = 0; i < num_worker_threads; ++i) {
4501  worker_threads.push_back(
4502  std::async(std::launch::async, convert_function, i, num_rows_this_itr));
4503  }
4504 
4505  for (auto& child : worker_threads) {
4506  child.wait();
4507  }
4508  for (auto& child : worker_threads) {
4509  child.get();
4510  }
4511 
4512  } else {
4513  single_threaded_convert_function(0, num_rows_this_itr);
4514  }
4515 
4516  // finalize the insert data
4517  auto finalizer_func =
4518  [](std::unique_ptr<TargetValueConverter>::pointer targetValueConverter) {
4519  targetValueConverter->finalizeDataBlocksForInsertData();
4520  };
4521 
4522  std::vector<std::future<void>> worker_threads;
4523  for (auto& converterPtr : value_converters) {
4524  worker_threads.push_back(
4525  std::async(std::launch::async, finalizer_func, converterPtr.get()));
4526  }
4527 
4528  for (auto& child : worker_threads) {
4529  child.wait();
4530  }
4531  for (auto& child : worker_threads) {
4532  child.get();
4533  }
4534 
4536  insert_data.databaseId = catalog.getCurrentDB().dbId;
4537  CHECK(td);
4538  insert_data.tableId = td->tableId;
4539  insert_data.numRows = num_rows_this_itr;
4540 
4541  for (int col_idx = 0; col_idx < target_column_descriptors.size(); col_idx++) {
4543  check_session_interrupted(query_session, executor))) {
4544  throw std::runtime_error(
4545  "Query execution has been interrupted while performing " +
4546  work_type_str);
4547  }
4548  value_converters[col_idx]->addDataBlocksToInsertData(insert_data);
4549  }
4550  total_target_value_translate_time_ms += timer_stop(translate_clock_begin);
4551 
4552  const auto data_load_clock_begin = timer_start();
4553  auto data_memory_holder =
4554  import_export::fill_missing_columns(&catalog, insert_data);
4555  insertDataLoader.insertData(*session, insert_data);
4556  total_data_load_time_ms += timer_stop(data_load_clock_begin);
4557  }
4558  }
4559  }
4560  } catch (...) {
4561  try {
4562  leafs_connector_->rollback(*session, td->tableId);
4563  } catch (std::exception& e) {
4564  LOG(ERROR) << "An error occurred during ITAS rollback attempt. Table id: "
4565  << td->tableId << ", Error: " << e.what();
4566  }
4567  throw;
4568  }
4569 
4570  int64_t total_time_ms = total_source_query_time_ms +
4571  total_target_value_translate_time_ms + total_data_load_time_ms;
4572 
4573  VLOG(1) << "CTAS/ITAS " << total_row_count << " rows loaded in " << total_time_ms
4574  << "ms (outer_frag_count=" << outer_frag_count
4575  << ", query_time=" << total_source_query_time_ms
4576  << "ms, translation_time=" << total_target_value_translate_time_ms
4577  << "ms, data_load_time=" << total_data_load_time_ms
4578  << "ms)\nquery: " << select_query_;
4579 
4580  if (!is_temporary) {
4581  leafs_connector_->checkpoint(*session, td->tableId);
4582  }
4583 }
4584 
4585 namespace {
4586 shared::TableKey get_table_key(const std::vector<std::string>& table) {
4587  const auto catalog = SysCatalog::instance().getCatalog(table[1]);
4588  CHECK(catalog);
4589  const auto table_id = catalog->getTableId(table[0]);
4590  if (!table_id.has_value()) {
4591  throw std::runtime_error{"Table \"" + table[0] +
4592  "\" does not exist in catalog: " + table[1] + "."};
4593  }
4594  return {catalog->getDatabaseId(), table_id.value()};
4595 }
4596 
4598  const std::string& insert_table_db_name,
4599  const std::string& query_str,
4600  const QueryStateProxy& query_state_proxy,
4601  const std::optional<std::string>& insert_table_name = {}) {
4602  auto& sys_catalog = SysCatalog::instance();
4603  auto& calcite_mgr = sys_catalog.getCalciteMgr();
4604  const auto calciteQueryParsingOption =
4605  calcite_mgr.getCalciteQueryParsingOption(true, false, true, false);
4606  const auto calciteOptimizationOption = calcite_mgr.getCalciteOptimizationOption(
4607  false, g_enable_watchdog, {}, sys_catalog.isAggregator());
4608  const auto result = calcite_mgr.process(query_state_proxy,
4609  pg_shim(query_str),
4610  calciteQueryParsingOption,
4611  calciteOptimizationOption);
4612  // force sort into tableid order in case of name change to guarantee fixed order of
4613  // mutex access
4614  auto comparator = [](const std::vector<std::string>& table_1,
4615  const std::vector<std::string>& table_2) {
4616  return get_table_key(table_1) < get_table_key(table_2);
4617  };
4618  std::set<std::vector<std::string>, decltype(comparator)> tables(comparator);
4619  for (auto& tab : result.resolved_accessed_objects.tables_selected_from) {
4620  tables.emplace(tab);
4621  }
4622  if (insert_table_name.has_value()) {
4623  tables.emplace(
4624  std::vector<std::string>{insert_table_name.value(), insert_table_db_name});
4625  }
4627  for (const auto& table : tables) {
4628  const auto catalog = sys_catalog.getCatalog(table[1]);
4629  CHECK(catalog);
4630  locks.emplace_back(
4633  *catalog, table[0])));
4634  if (insert_table_name.has_value() && table[0] == insert_table_name.value() &&
4635  table[1] == insert_table_db_name) {
4636  locks.emplace_back(
4639  catalog->getDatabaseId(), (*locks.back())())));
4640  } else {
4641  locks.emplace_back(
4644  catalog->getDatabaseId(), (*locks.back())())));
4645  }
4646  }
4647  return locks;
4648 }
4649 } // namespace
4650 
4652  bool read_only_mode) {
4653  if (read_only_mode) {
4654  throw std::runtime_error("INSERT INTO TABLE invalid in read only mode.");
4655  }
4656  auto session_copy = session;
4657  auto session_ptr = std::shared_ptr<Catalog_Namespace::SessionInfo>(
4658  &session_copy, boost::null_deleter());
4659  auto query_state = query_state::QueryState::create(session_ptr, select_query_);
4660  auto stdlog = STDLOG(query_state);
4661  auto& catalog = session_ptr->getCatalog();
4662 
4663  const auto execute_read_lock = legacylockmgr::getExecuteReadLock();
4664 
4665  if (catalog.getMetadataForTable(table_name_) == nullptr) {
4666  throw std::runtime_error("ITAS failed: table " + table_name_ + " does not exist.");
4667  }
4668 
4669  auto locks = acquire_query_table_locks(
4670  catalog.name(), select_query_, query_state->createQueryStateProxy(), table_name_);
4671  const TableDescriptor* td = catalog.getMetadataForTable(table_name_);
4672 
4673  Executor::clearExternalCaches(true, td, catalog.getCurrentDB().dbId);
4674 
4675  try {
4676  populateData(query_state->createQueryStateProxy(), td, true, false);
4677  } catch (...) {
4678  throw;
4679  }
4680 }
4681 
4683  : InsertIntoTableAsSelectStmt(payload) {
4684  if (payload.HasMember("temporary")) {
4685  is_temporary_ = json_bool(payload["temporary"]);
4686  } else {
4687  is_temporary_ = false;
4688  }
4689 
4690  if (payload.HasMember("ifNotExists")) {
4691  if_not_exists_ = json_bool(payload["ifNotExists"]);
4692  } else {
4693  if_not_exists_ = false;
4694  }
4695 
4696  parse_options(payload, storage_options_);
4697 }
4698 
4700  bool read_only_mode) {
4701  if (read_only_mode) {
4702  throw std::runtime_error("CREATE TABLE invalid in read only mode.");
4703  }
4704  auto session_copy = session;
4705  auto session_ptr = std::shared_ptr<Catalog_Namespace::SessionInfo>(
4706  &session_copy, boost::null_deleter());
4707  auto query_state = query_state::QueryState::create(session_ptr, select_query_);
4708  auto stdlog = STDLOG(query_state);
4709  LocalQueryConnector local_connector;
4710  auto& catalog = session.getCatalog();
4711  bool create_table = nullptr == leafs_connector_;
4712 
4713  std::set<std::string> select_tables;
4714  if (create_table) {
4715  const auto execute_write_lock = legacylockmgr::getExecuteWriteLock();
4716 
4717  // check access privileges
4720  throw std::runtime_error("CTAS failed. Table " + table_name_ +
4721  " will not be created. User has no create privileges.");
4722  }
4723 
4724  if (catalog.getMetadataForTable(table_name_) != nullptr) {
4725  if (if_not_exists_) {
4726  return;
4727  }
4728  throw std::runtime_error("Table " + table_name_ +
4729  " already exists and no data was loaded.");
4730  }
4731 
4732  // only validate the select query so we get the target types
4733  // correctly, but do not populate the result set
4734  // we currently have exclusive access to the system so this is safe
4735  auto validate_result = local_connector.query(
4736  query_state->createQueryStateProxy(), select_query_, {}, true, false);
4737 
4738  auto column_descriptors_for_create =
4739  local_connector.getColumnDescriptors(validate_result, true);
4740 
4741  // some validation as the QE might return some out of range column types
4742  for (auto& cd : column_descriptors_for_create) {
4743  if (cd.columnType.is_decimal() &&
4744  cd.columnType.get_precision() > sql_constants::kMaxNumericPrecision) {
4745  throw std::runtime_error(cd.columnName + ": Precision too high, max " +
4747  ".");
4748  }
4749  }
4750 
4751  TableDescriptor td;
4752  td.tableName = table_name_;
4753  td.userId = session.get_currentUser().userId;
4754  td.nColumns = column_descriptors_for_create.size();
4755  td.isView = false;
4756  td.fragmenter = nullptr;
4763  if (is_temporary_) {
4765  } else {
4767  }
4768 
4769  bool use_shared_dictionaries = true;
4770  bool force_geo_compression = true;
4771 
4772  if (!storage_options_.empty()) {
4773  for (auto& p : storage_options_) {
4774  if (boost::to_lower_copy<std::string>(*p->get_name()) ==
4775  "use_shared_dictionaries") {
4776  const StringLiteral* literal =
4777  dynamic_cast<const StringLiteral*>(p->get_value());
4778  if (nullptr == literal) {
4779  throw std::runtime_error(
4780  "USE_SHARED_DICTIONARIES must be a string parameter");
4781  }
4782  std::string val = boost::to_lower_copy<std::string>(*literal->get_stringval());
4783  use_shared_dictionaries = val == "true" || val == "1" || val == "t";
4784  } else if (boost::to_lower_copy<std::string>(*p->get_name()) ==
4785  "force_geo_compression") {
4786  const StringLiteral* literal =
4787  dynamic_cast<const StringLiteral*>(p->get_value());
4788  if (nullptr == literal) {
4789  throw std::runtime_error("FORCE_GEO_COMPRESSION must be a string parameter");
4790  }
4791  std::string val = boost::to_lower_copy<std::string>(*literal->get_stringval());
4792  force_geo_compression = val == "true" || val == "1" || val == "t";
4793  } else {
4794  get_table_definitions_for_ctas(td, p, column_descriptors_for_create);
4795  }
4796  }
4797  }
4798 
4799  std::vector<SharedDictionaryDef> sharedDictionaryRefs;
4800 
4801  if (use_shared_dictionaries) {
4802  const auto source_column_descriptors =
4803  local_connector.getColumnDescriptors(validate_result, false);
4804  const auto mapping = catalog.getDictionaryToColumnMapping();
4805 
4806  for (auto& source_cd : source_column_descriptors) {
4807  const auto& ti = source_cd.columnType;
4808  if (ti.is_string()) {
4809  if (ti.get_compression() == kENCODING_DICT) {
4810  int dict_id = ti.get_comp_param();
4811  auto it = mapping.find(dict_id);
4812  if (mapping.end() != it) {
4813  const auto targetColumn = it->second;
4814  auto targetTable =
4815  catalog.getMetadataForTable(targetColumn->tableId, false);
4816  CHECK(targetTable);
4817  LOG(INFO) << "CTAS: sharing text dictionary on column "
4818  << source_cd.columnName << " with " << targetTable->tableName
4819  << "." << targetColumn->columnName;
4820  sharedDictionaryRefs.emplace_back(
4821  source_cd.columnName, targetTable->tableName, targetColumn->columnName);
4822  }
4823  }
4824  }
4825  }
4826  }
4827 
4828  if (force_geo_compression) {
4829  for (auto& cd_for_create : column_descriptors_for_create) {
4830  auto& ti = cd_for_create.columnType;
4831  if (ti.is_geometry() && ti.get_output_srid() == 4326) {
4832  // turn on GEOINT32 compression
4833  ti.set_compression(kENCODING_GEOINT);
4834  ti.set_comp_param(32);
4835  }
4836  }
4837  }
4838 
4839  // currently no means of defining sharding in CTAS
4840  td.keyMetainfo = serialize_key_metainfo(nullptr, sharedDictionaryRefs);
4841 
4842  catalog.createTable(td, column_descriptors_for_create, sharedDictionaryRefs, true);
4843  // TODO (max): It's transactionally unsafe, should be fixed: we may create object
4844  // w/o privileges
4845  SysCatalog::instance().createDBObject(
4846  session.get_currentUser(), td.tableName, TableDBObjectType, catalog);
4847  }
4848 
4849  // note there is a time where we do not have any executor outer lock here. someone could
4850  // come along and mess with the data or other tables.
4851  const auto execute_read_lock = legacylockmgr::getExecuteReadLock();
4852 
4853  auto locks = acquire_query_table_locks(
4854  catalog.name(), select_query_, query_state->createQueryStateProxy(), table_name_);
4855  const TableDescriptor* td = catalog.getMetadataForTable(table_name_);
4856  try {
4857  populateData(query_state->createQueryStateProxy(), td, false, true);
4858  } catch (...) {
4859  if (!g_cluster) {
4860  const TableDescriptor* created_td = catalog.getMetadataForTable(table_name_);
4861  if (created_td) {
4862  catalog.dropTable(created_td);
4863  }
4864  }
4865  throw;
4866  }
4867 }
4868 
4869 DropTableStmt::DropTableStmt(const rapidjson::Value& payload) {
4870  CHECK(payload.HasMember("tableName"));
4871  table_ = std::make_unique<std::string>(json_str(payload["tableName"]));
4872 
4873  if_exists_ = false;
4874  if (payload.HasMember("ifExists")) {
4875  if_exists_ = json_bool(payload["ifExists"]);
4876  }
4877 }
4878 
4880  bool read_only_mode) {
4881  if (read_only_mode) {
4882  throw std::runtime_error("DROP TABLE invalid in read only mode.");
4883  }
4884  // Because we are able to acquire a unique_lock on the table descriptor to be dropped we
4885  // can get away with only using a shared_lock on the executor, as anything that will
4886  // touch the table being dropped with block, but other transactions are ok.
4887  const auto execute_read_lock = legacylockmgr::getExecuteReadLock();
4888  auto& catalog = session.getCatalog();
4889  const TableDescriptor* td{nullptr};
4890  std::unique_ptr<lockmgr::TableSchemaLockContainer<lockmgr::WriteLock>> td_with_lock;
4891  try {
4892  td_with_lock =
4893  std::make_unique<lockmgr::TableSchemaLockContainer<lockmgr::WriteLock>>(
4895  catalog, *table_, false));
4896  td = (*td_with_lock)();
4897  } catch (const std::runtime_error& e) {
4898  if (if_exists_) {
4899  return;
4900  } else {
4901  throw e;
4902  }
4903  }
4904 
4905  CHECK(td);
4906  CHECK(td_with_lock);
4907 
4908  // check access privileges
4909  if (!session.checkDBAccessPrivileges(
4911  throw std::runtime_error("Table " + *table_ +
4912  " will not be dropped. User has no proper privileges.");
4913  }
4914 
4916 
4917  {
4918  auto table_data_read_lock =
4920  Executor::clearExternalCaches(false, td, catalog.getCurrentDB().dbId);
4921  }
4922 
4923  auto table_data_write_lock =
4925  catalog.dropTable(td);
4926 }
4927 
4928 TruncateTableStmt::TruncateTableStmt(const rapidjson::Value& payload) {
4929  CHECK(payload.HasMember("tableName"));
4930  table_ = std::make_unique<std::string>(json_str(payload["tableName"]));
4931 }
4932 
4934  bool read_only_mode) {
4935  if (read_only_mode) {
4936  throw std::runtime_error("TRUNCATE TABLE invalid in read only mode.");
4937  }
4938  const auto execute_read_lock = legacylockmgr::getExecuteReadLock();
4939  auto& catalog = session.getCatalog();
4940  const auto td_with_lock =
4942  catalog, *table_, true);
4943  const auto td = td_with_lock();
4944  if (!td) {
4945  throw std::runtime_error("Table " + *table_ + " does not exist.");
4946  }
4947 
4948  // check access privileges
4949  std::vector<DBObject> privObjects;
4950  DBObject dbObject(*table_, TableDBObjectType);
4951  dbObject.loadKey(catalog);
4953  privObjects.push_back(dbObject);
4954  if (!SysCatalog::instance().checkPrivileges(session.get_currentUser(), privObjects)) {
4955  throw std::runtime_error("Table " + *table_ + " will not be truncated. User " +
4956  session.get_currentUser().userLoggable() +
4957  " has no proper privileges.");
4958  }
4959 
4960  if (td->isView) {
4961  throw std::runtime_error(*table_ + " is a view. Cannot Truncate.");
4962  }
4964 
4965  // invalidate cached item
4966  {
4967  auto table_data_read_lock =
4969  Executor::clearExternalCaches(false, td, catalog.getCurrentDB().dbId);
4970  }
4971 
4972  auto table_data_write_lock =
4974  catalog.truncateTable(td);
4975 }
4976 
4977 OptimizeTableStmt::OptimizeTableStmt(const rapidjson::Value& payload) {
4978  CHECK(payload.HasMember("tableName"));
4979  table_ = std::make_unique<std::string>(json_str(payload["tableName"]));
4980  parse_options(payload, options_);
4981 }
4982 
4983 namespace {
4985  const TableDescriptor* td,
4986  const AccessPrivileges access_priv) {
4987  CHECK(td);
4988  auto& cat = session_info.getCatalog();
4989  std::vector<DBObject> privObjects;
4990  DBObject dbObject(td->tableName, TableDBObjectType);
4991  dbObject.loadKey(cat);
4992  dbObject.setPrivileges(access_priv);
4993  privObjects.push_back(dbObject);
4994  return SysCatalog::instance().checkPrivileges(session_info.get_currentUser(),
4995  privObjects);
4996 };
4997 } // namespace
4998 
5000  bool read_only_mode) {
5001  if (read_only_mode) {
5002  throw std::runtime_error("OPTIMIZE TABLE invalid in read only mode.");
5003  }
5004  auto& catalog = session.getCatalog();
5005 
5006  const auto execute_read_lock = legacylockmgr::getExecuteReadLock();
5007 
5008  const auto td_with_lock =
5010  catalog, *table_);
5011  const auto td = td_with_lock();
5012 
5013  if (!td || !user_can_access_table(session, td, AccessPrivileges::DELETE_FROM_TABLE)) {
5014  throw std::runtime_error("Table " + *table_ + " does not exist.");
5015  }
5016 
5017  if (td->isView) {
5018  throw std::runtime_error("OPTIMIZE TABLE command is not supported on views.");
5019  }
5020 
5021  // invalidate cached item
5022  Executor::clearExternalCaches(true, td, catalog.getDatabaseId());
5023 
5025  const TableOptimizer optimizer(td, executor, catalog);
5026  if (shouldVacuumDeletedRows()) {
5027  optimizer.vacuumDeletedRows();
5028  }
5029  optimizer.recomputeMetadata();
5030 }
5031 
5032 bool repair_type(std::list<std::unique_ptr<NameValueAssign>>& options) {
5033  for (const auto& opt : options) {
5034  if (boost::iequals(*opt->get_name(), "REPAIR_TYPE")) {
5035  const auto repair_type =
5036  static_cast<const StringLiteral*>(opt->get_value())->get_stringval();
5037  CHECK(repair_type);
5038  if (boost::iequals(*repair_type, "REMOVE")) {
5039  return true;
5040  } else {
5041  throw std::runtime_error("REPAIR_TYPE must be REMOVE.");
5042  }
5043  } else {
5044  throw std::runtime_error("The only VALIDATE WITH options is REPAIR_TYPE.");
5045  }
5046  }
5047  return false;
5048 }
5049 
5050 ValidateStmt::ValidateStmt(std::string* type, std::list<NameValueAssign*>* with_opts)
5051  : type_(type) {
5052  if (!type) {
5053  throw std::runtime_error("Validation Type is required for VALIDATE command.");
5054  }
5055  std::list<std::unique_ptr<NameValueAssign>> options;
5056  if (with_opts) {
5057  for (const auto e : *with_opts) {
5058  options.emplace_back(e);
5059  }
5060  delete with_opts;
5061 
5062  isRepairTypeRemove_ = repair_type(options);
5063  }
5064 }
5065 
5066 ValidateStmt::ValidateStmt(const rapidjson::Value& payload) {
5067  CHECK(payload.HasMember("type"));
5068  type_ = std::make_unique<std::string>(json_str(payload["type"]));
5069 
5070  std::list<std::unique_ptr<NameValueAssign>> options;
5071  parse_options(payload, options);
5072 
5073  isRepairTypeRemove_ = repair_type(options);
5074 }
5075 
5077  const TableDescriptor* td) {
5078  if (session.get_currentUser().isSuper ||
5079  session.get_currentUser().userId == td->userId) {
5080  return;
5081  }
5082  std::vector<DBObject> privObjects;
5083  DBObject dbObject(td->tableName, TableDBObjectType);
5084  dbObject.loadKey(session.getCatalog());
5086  privObjects.push_back(dbObject);
5087  if (!SysCatalog::instance().checkPrivileges(session.get_currentUser(), privObjects)) {
5088  throw std::runtime_error("Current user does not have the privilege to alter table: " +
5089  td->tableName);
5090  }
5091 }
5092 
5093 RenameUserStmt::RenameUserStmt(const rapidjson::Value& payload) {
5094  CHECK(payload.HasMember("name"));
5095  username_ = std::make_unique<std::string>(json_str(payload["name"]));
5096  CHECK(payload.HasMember("newName"));
5097  new_username_ = std::make_unique<std::string>(json_str(payload["newName"]));
5098 }
5099 
5101  bool read_only_mode) {
5102  if (read_only_mode) {
5103  throw std::runtime_error("RENAME TABLE invalid in read only mode.");
5104  }
5105  if (!session.get_currentUser().isSuper) {
5106  throw std::runtime_error("Only a super user can rename users.");
5107  }
5108 
5110  if (!SysCatalog::instance().getMetadataForUser(*username_, user)) {
5111  throw std::runtime_error("User " + *username_ + " does not exist.");
5112  }
5113 
5114  SysCatalog::instance().renameUser(*username_, *new_username_);
5115 }
5116 
5117 RenameDBStmt::RenameDBStmt(const rapidjson::Value& payload) {
5118  CHECK(payload.HasMember("name"));
5119  database_name_ = std::make_unique<std::string>(json_str(payload["name"]));
5120  CHECK(payload.HasMember("newName"));
5121  new_database_name_ = std::make_unique<std::string>(json_str(payload["newName"]));
5122 }
5123 
5125  bool read_only_mode) {
5126  if (read_only_mode) {
5127  throw std::runtime_error("RENAME DATABASE invalid in read only mode.");
5128  }
5130 
5131  // TODO: use database lock instead
5132  const auto execute_write_lock = legacylockmgr::getExecuteWriteLock();
5133 
5134  if (!SysCatalog::instance().getMetadataForDB(*database_name_, db)) {
5135  throw std::runtime_error("Database " + *database_name_ + " does not exist.");
5136  }
5137 
5138  if (!session.get_currentUser().isSuper &&
5139  session.get_currentUser().userId != db.dbOwner) {
5140  throw std::runtime_error("Only a super user or the owner can rename the database.");
5141  }
5142 
5143  SysCatalog::instance().renameDatabase(*database_name_, *new_database_name_);
5144 }
5145 
5146 RenameTableStmt::RenameTableStmt(const rapidjson::Value& payload) {
5147  CHECK(payload.HasMember("tableNames"));
5148  CHECK(payload["tableNames"].IsArray());
5149  const auto elements = payload["tableNames"].GetArray();
5150  for (const auto& element : elements) {
5151  CHECK(element.HasMember("name"));
5152  CHECK(element.HasMember("newName"));
5153  tablesToRename_.emplace_back(new std::string(json_str(element["name"])),
5154  new std::string(json_str(element["newName"])));
5155  }
5156 }
5157 
5158 RenameTableStmt::RenameTableStmt(std::string* tab_name, std::string* new_tab_name) {
5159  tablesToRename_.emplace_back(tab_name, new_tab_name);
5160 }
5161 
5163  std::list<std::pair<std::string, std::string>> tableNames) {
5164  for (auto item : tableNames) {
5165  tablesToRename_.emplace_back(new std::string(item.first),
5166  new std::string(item.second));
5167  }
5168 }
5169 
5170 using SubstituteMap = std::map<std::string, std::string>;
5171 
5172 // Namespace fns used to track a left-to-right execution of RENAME TABLE
5173 // and verify that the command should be (entirely/mostly) valid
5174 //
5175 namespace {
5176 
5177 static constexpr char const* EMPTY_NAME{""};
5178 
5179 std::string generateUniqueTableName(std::string name) {
5180  // TODO - is there a "better" way to create a tmp name for the table
5181  std::time_t result = std::time(nullptr);
5182  return name + "_tmp" + std::to_string(result);
5183 }
5184 
5185 void recordRename(SubstituteMap& sMap, std::string oldName, std::string newName) {
5186  sMap[oldName] = newName;
5187 }
5188 
5190  SubstituteMap& sMap,
5191  std::string tableName) {
5192  if (sMap.find(tableName) != sMap.end()) {
5193  if (sMap[tableName] == EMPTY_NAME) {
5194  return tableName;
5195  }
5196  return sMap[tableName];
5197  } else {
5198  // lookup table in src catalog
5199  const TableDescriptor* td = catalog.getMetadataForTable(tableName);
5200  if (td) {
5201  sMap[tableName] = tableName;
5202  } else {
5203  sMap[tableName] = EMPTY_NAME;
5204  }
5205  }
5206  return tableName;
5207 }
5208 
5209 bool hasData(SubstituteMap& sMap, std::string tableName) {
5210  // assumes loadTable has been previously called
5211  return (sMap[tableName] != EMPTY_NAME);
5212 }
5213 
5215  // Substition map should be clean at end of rename:
5216  // all items in map must (map to self) or (map to EMPTY_STRING) by end
5217 
5218  for (auto it : sMap) {
5219  if ((it.second) != EMPTY_NAME && (it.first) != (it.second)) {
5220  throw std::runtime_error(
5221  "Error: Attempted to overwrite and lose data in table: \'" + (it.first) + "\'");
5222  }
5223  }
5224 }
5225 } // namespace
5226 
5227 namespace {
5230  throw std::runtime_error(td->tableName + " is a foreign table. " +
5231  "Use ALTER FOREIGN TABLE.");
5232  }
5233 }
5234 } // namespace
5235 
5237  bool read_only_mode) {
5238  if (read_only_mode) {
5239  throw std::runtime_error("RENAME TABLE invalid in read only mode.");
5240  }
5241  auto& catalog = session.getCatalog();
5242 
5243  // TODO(adb): the catalog should be handling this locking (see AddColumStmt)
5244  const auto execute_write_lock = legacylockmgr::getExecuteWriteLock();
5245 
5246  // accumulated vector of table names: oldName->newName
5247  std::vector<std::pair<std::string, std::string>> names;
5248 
5249  SubstituteMap tableSubtituteMap;
5250 
5251  for (auto& item : tablesToRename_) {
5252  std::string curTableName = *(item.first);
5253  std::string newTableName = *(item.second);
5254 
5255  // Note: if rename (a->b, b->a)
5256  // requires a tmp name change (a->tmp, b->a, tmp->a),
5257  // inject that here because
5258  // catalog.renameTable() assumes cleanliness else will fail
5259 
5260  std::string altCurTableName = loadTable(catalog, tableSubtituteMap, curTableName);
5261  std::string altNewTableName = loadTable(catalog, tableSubtituteMap, newTableName);
5262 
5263  if (altCurTableName != curTableName && altCurTableName != EMPTY_NAME) {
5264  // rename is a one-shot deal, reset the mapping once used
5265  recordRename(tableSubtituteMap, curTableName, curTableName);
5266  }
5267 
5268  // Check to see if the command (as-entered) will likely execute cleanly (logic-wise)
5269  // src tables exist before coping from
5270  // destination table collisions
5271  // handled (a->b, b->a)
5272  // or flagged (pre-existing a,b ... "RENAME TABLE a->c, b->c" )
5273  // handle mulitple chained renames, tmp names (a_>tmp, b->a, tmp->a)
5274  // etc.
5275  //
5276  if (hasData(tableSubtituteMap, altCurTableName)) {
5277  const TableDescriptor* td = catalog.getMetadataForTable(altCurTableName);
5278  if (td) {
5279  // Tables *and* views may be renamed here, foreign tables not
5280  // -> just block foreign tables
5282  check_alter_table_privilege(session, td);
5283  }
5284 
5285  if (hasData(tableSubtituteMap, altNewTableName)) {
5286  std::string tmpNewTableName = generateUniqueTableName(altNewTableName);
5287  // rename: newTableName to tmpNewTableName to get it out of the way
5288  // because it was full
5289  recordRename(tableSubtituteMap, altCurTableName, EMPTY_NAME);
5290  recordRename(tableSubtituteMap, altNewTableName, tmpNewTableName);
5291  recordRename(tableSubtituteMap, tmpNewTableName, tmpNewTableName);
5292  names.emplace_back(altNewTableName, tmpNewTableName);
5293  names.emplace_back(altCurTableName, altNewTableName);
5294  } else {
5295  // rename: curNewTableName to newTableName
5296  recordRename(tableSubtituteMap, altCurTableName, EMPTY_NAME);
5297  recordRename(tableSubtituteMap, altNewTableName, altNewTableName);
5298  names.emplace_back(altCurTableName, altNewTableName);
5299  }
5300  } else {
5301  throw std::runtime_error("Source table \'" + curTableName + "\' does not exist.");
5302  }
5303  }
5304  checkNameSubstition(tableSubtituteMap);
5305 
5306  catalog.renameTables(names);
5307 
5308  // just to be explicit, clean out the list, the unique_ptr will delete
5309  while (!tablesToRename_.empty()) {
5310  tablesToRename_.pop_front();
5311  }
5312 } // namespace Parser
5313 
5315  bool not_null;
5316  const ColumnConstraintDef* cc = coldef->get_column_constraint();
5317  if (cc == nullptr) {
5318  not_null = false;
5319  } else {
5320  not_null = cc->get_notnull();
5321  }
5322  std::string default_value;
5323  const std::string* default_value_ptr = nullptr;
5324  if (cc) {
5325  if (auto def_val_literal = cc->get_defaultval()) {
5326  auto defaultsp = dynamic_cast<const StringLiteral*>(def_val_literal);
5327  default_value =
5328  defaultsp ? *defaultsp->get_stringval() : def_val_literal->to_string();
5329  // The preprocessing below is needed because:
5330  // a) TypedImportBuffer expects arrays in the {...} format
5331  // b) TypedImportBuffer expects string literals inside arrays w/o any quotes
5332  if (coldef->get_column_type()->get_is_array()) {
5333  std::regex array_re(R"(^ARRAY\s*\[(.*)\]$)", std::regex_constants::icase);
5334  default_value = std::regex_replace(default_value, array_re, "{$1}");
5335  boost::erase_all(default_value, "\'");
5336  }
5337  default_value_ptr = &default_value;
5338  }
5339  }
5341  cd,
5342  coldef->get_column_type(),
5343  not_null,
5344  coldef->get_compression(),
5345  default_value_ptr);
5346 }
5347 
5349  set_column_descriptor(cd, coldef);
5350 }
5351 
5353  const TableDescriptor* td) {
5354  auto& catalog = session.getCatalog();
5355  if (!td) {
5356  throw std::runtime_error("Table " + *table_ + " does not exist.");
5357  } else {
5358  if (td->isView) {
5359  throw std::runtime_error("Adding columns to a view is not supported.");
5360  }
5362  if (table_is_temporary(td)) {
5363  throw std::runtime_error(
5364  "Adding columns to temporary tables is not yet supported.");
5365  }
5366  }
5367 
5368  check_alter_table_privilege(session, td);
5369 
5370  if (0 == coldefs_.size()) {
5371  coldefs_.push_back(std::move(coldef_));
5372  }
5373 
5374  for (const auto& coldef : coldefs_) {
5375  auto& new_column_name = *coldef->get_column_name();
5376  if (catalog.getMetadataForColumn(td->tableId, new_column_name) != nullptr) {
5377  throw std::runtime_error("Column " + new_column_name + " already exists.");
5378  }
5379  }
5380 }
5381 
5383  bool read_only_mode) {
5384  if (read_only_mode) {
5385  throw std::runtime_error("ADD COLUMN invalid in read only mode.");
5386  }
5387  // TODO: Review add and drop column implementation
5388  const auto execute_write_lock = legacylockmgr::getExecuteWriteLock();
5389  auto& catalog = session.getCatalog();
5390  const auto td_with_lock =
5392  catalog, *table_, true);
5393  const auto td = td_with_lock();
5394 
5395  check_executable(session, td);
5396 
5397  CHECK(td->fragmenter);
5398  if (std::dynamic_pointer_cast<Fragmenter_Namespace::SortedOrderFragmenter>(
5399  td->fragmenter)) {
5400  throw std::runtime_error(
5401  "Adding columns to a table is not supported when using the \"sort_column\" "
5402  "option.");
5403  }
5404 
5405  // invalidate cached item
5406  Executor::clearExternalCaches(true, td, catalog.getDatabaseId());
5407 
5408  // Do not take a data write lock, as the fragmenter may call `deleteFragments`
5409  // during a cap operation. Note that the schema write lock will prevent concurrent
5410  // inserts along with all other queries.
5411 
5412  catalog.getSqliteConnector().query("BEGIN TRANSACTION");
5413  try {
5414  std::map<const std::string, const ColumnDescriptor> cds;
5415  std::map<const int, const ColumnDef*> cid_coldefs;
5416  for (const auto& coldef : coldefs_) {
5417  ColumnDescriptor cd;
5418  setColumnDescriptor(cd, coldef.get());
5419  catalog.addColumn(*td, cd);
5420  cds.emplace(*coldef->get_column_name(), cd);
5421  cid_coldefs.emplace(cd.columnId, coldef.get());
5422 
5423  // expand geo column to phy columns
5424  if (cd.columnType.is_geometry()) {
5425  std::list<ColumnDescriptor> phy_geo_columns;
5426  catalog.expandGeoColumn(cd, phy_geo_columns);
5427  for (auto& cd : phy_geo_columns) {
5428  catalog.addColumn(*td, cd);
5429  cds.emplace(cd.columnName, cd);
5430  cid_coldefs.emplace(cd.columnId, nullptr);
5431  }
5432  }
5433  }
5434 
5435  std::unique_ptr<import_export::Loader> loader(new import_export::Loader(catalog, td));
5436  std::vector<std::unique_ptr<import_export::TypedImportBuffer>> import_buffers;
5437  for (const auto& cd : cds) {
5438  import_buffers.emplace_back(std::make_unique<import_export::TypedImportBuffer>(
5439  &cd.second, loader->getStringDict(&cd.second)));
5440  }
5441  loader->setAddingColumns(true);
5442 
5443  // set_geo_physical_import_buffer below needs a sorted import_buffers
5444  std::sort(import_buffers.begin(),
5445  import_buffers.end(),
5446  [](decltype(import_buffers[0])& a, decltype(import_buffers[0])& b) {
5447  return a->getColumnDesc()->columnId < b->getColumnDesc()->columnId;
5448  });
5449 
5450  size_t nrows = td->fragmenter->getNumRows();
5451  // if sharded, get total nrows from all sharded tables
5452  if (td->nShards > 0) {
5453  const auto physical_tds = catalog.getPhysicalTablesDescriptors(td);
5454  nrows = 0;
5455  std::for_each(physical_tds.begin(), physical_tds.end(), [&nrows](const auto& td) {
5456  nrows += td->fragmenter->getNumRows();
5457  });
5458  }
5459  if (nrows > 0) {
5460  int skip_physical_cols = 0;
5461  for (const auto cit : cid_coldefs) {
5462  const auto cd = catalog.getMetadataForColumn(td->tableId, cit.first);
5463  const auto coldef = cit.second;
5464  const bool is_null = !cd->default_value.has_value();
5465 
5466  if (cd->columnType.get_notnull() && is_null) {
5467  throw std::runtime_error("Default value required for column " + cd->columnName +
5468  " because of NOT NULL constraint");
5469  }
5470 
5471  for (auto it = import_buffers.begin(); it < import_buffers.end(); ++it) {
5472  auto& import_buffer = *it;
5473  if (cd->columnId == import_buffer->getColumnDesc()->columnId) {
5474  if (coldef != nullptr ||
5475  skip_physical_cols-- <= 0) { // skip non-null phy col
5476  import_buffer->add_value(cd,
5477  cd->default_value.value_or("NULL"),
5478  is_null,
5480  if (cd->columnType.is_geometry()) {
5481  std::vector<double> coords, bounds;
5482  std::vector<int> ring_sizes, poly_rings;
5483  SQLTypeInfo tinfo{cd->columnType};
5484  const bool validate_with_geos_if_available = false;
5486  cd->default_value.value_or("NULL"),
5487  tinfo,
5488  coords,
5489  bounds,
5490  ring_sizes,
5491  poly_rings,
5492  validate_with_geos_if_available)) {
5493  throw std::runtime_error("Bad geometry data: '" +
5494  cd->default_value.value_or("NULL") + "'");
5495  }
5496  size_t col_idx = 1 + std::distance(import_buffers.begin(), it);
5498  cd,
5499  import_buffers,
5500  col_idx,
5501  coords,
5502  bounds,
5503  ring_sizes,
5504  poly_rings);
5505  // skip following phy cols
5506  skip_physical_cols = cd->columnType.get_physical_cols();
5507  }
5508  }
5509  break;
5510  }
5511  }
5512  }
5513  }
5514 
5515  if (!loader->loadNoCheckpoint(import_buffers, nrows, &session)) {
5516  throw std::runtime_error("loadNoCheckpoint failed!");
5517  }
5518  catalog.rollLegacy(true);
5519  catalog.resetTableEpochFloor(td->tableId);
5520  loader->checkpoint();
5521  catalog.getSqliteConnector().query("END TRANSACTION");
5522  } catch (...) {
5523  catalog.rollLegacy(false);
5524  catalog.getSqliteConnector().query("ROLLBACK TRANSACTION");
5525  throw;
5526  }
5527 }
5528 
5530  bool read_only_mode) {
5531  if (read_only_mode) {
5532  throw std::runtime_error("DROP COLUMN invalid in read only mode.");
5533  }
5534  // TODO: Review add and drop column implementation
5535  const auto execute_write_lock = legacylockmgr::getExecuteWriteLock();
5536  auto& catalog = session.getCatalog();
5537  const auto td_with_lock =
5539  catalog, *table_, true);
5540  const auto td = td_with_lock();
5541  if (!td) {
5542  throw std::runtime_error("Table " + *table_ + " does not exist.");
5543  }
5545  if (td->isView) {
5546  throw std::runtime_error("Dropping a column from a view is not supported.");
5547  }
5548  if (table_is_temporary(td)) {
5549  throw std::runtime_error(
5550  "Dropping a column from a temporary table is not yet supported.");
5551  }
5552 
5553  check_alter_table_privilege(session, td);
5554 
5555  for (const auto& column : columns_) {
5556  if (nullptr == catalog.getMetadataForColumn(td->tableId, *column)) {
5557  throw std::runtime_error("Column " + *column + " does not exist.");
5558  }
5559  }
5560 
5561  if (td->nColumns <= (td->hasDeletedCol ? 3 : 2)) {
5562  throw std::runtime_error("Table " + *table_ + " has only one column.");
5563  }
5564 
5565  // invalidate cached item
5566  Executor::clearExternalCaches(false, td, catalog.getCurrentDB().dbId);
5567 
5568  catalog.getSqliteConnector().query("BEGIN TRANSACTION");
5569  try {
5570  std::vector<int> columnIds;
5571  for (const auto& column : columns_) {
5572  ColumnDescriptor cd = *catalog.getMetadataForColumn(td->tableId, *column);
5573  if (td->nShards > 0 && td->shardedColumnId == cd.columnId) {
5574  throw std::runtime_error("Dropping sharding column " + cd.columnName +
5575  " is not supported.");
5576  }
5577  catalog.dropColumn(*td, cd);
5578  columnIds.push_back(cd.columnId);
5579  for (int i = 0; i < cd.columnType.get_physical_cols(); i++) {
5580  const auto pcd = catalog.getMetadataForColumn(td->tableId, cd.columnId + i + 1);
5581  CHECK(pcd);
5582  catalog.dropColumn(*td, *pcd);
5583  columnIds.push_back(cd.columnId + i + 1);
5584  }
5585  }
5586 
5587  for (auto shard : catalog.getPhysicalTablesDescriptors(td)) {
5588  shard->fragmenter->dropColumns(columnIds);
5589  }
5590  // if test forces to rollback
5592  throw std::runtime_error("lol!");
5593  }
5594  catalog.rollLegacy(true);
5596  catalog.resetTableEpochFloor(td->tableId);
5597  catalog.checkpoint(td->tableId);
5598  }
5599  catalog.getSqliteConnector().query("END TRANSACTION");
5600  } catch (...) {
5601  catalog.setForReload(td->tableId);
5602  catalog.rollLegacy(false);
5603  catalog.getSqliteConnector().query("ROLLBACK TRANSACTION");
5604  throw;
5605  }
5606 }
5607 
5609  bool read_only_mode) {
5610  if (read_only_mode) {
5611  throw std::runtime_error("RENAME COLUMN invalid in read only mode.");
5612  }
5613  auto& catalog = session.getCatalog();
5614 
5615  const auto execute_read_lock = legacylockmgr::getExecuteReadLock();
5616 
5617  const auto td_with_lock =
5619  catalog, *table_, false);
5620  const auto td = td_with_lock();
5621  CHECK(td);
5623 
5624  check_alter_table_privilege(session, td);
5625  const ColumnDescriptor* cd = catalog.getMetadataForColumn(td->tableId, *column_);
5626  if (cd == nullptr) {
5627  throw std::runtime_error("Column " + *column_ + " does not exist.");
5628  }
5629  if (catalog.getMetadataForColumn(td->tableId, *new_column_name_) != nullptr) {
5630  throw std::runtime_error("Column " + *new_column_name_ + " already exists.");
5631  }
5632  catalog.renameColumn(td, cd, *new_column_name_);
5633 }
5634 
5636  bool read_only_mode) {
5637  if (read_only_mode) {
5638  throw std::runtime_error("ALTER TABLE invalid in read only mode.");
5639  }
5640  enum TableParamType { MaxRollbackEpochs, Epoch, MaxRows };
5641  static const std::unordered_map<std::string, TableParamType> param_map = {
5642  {"max_rollback_epochs", TableParamType::MaxRollbackEpochs},
5643  {"epoch", TableParamType::Epoch},
5644  {"max_rows", TableParamType::MaxRows}};
5645  const auto execute_read_lock = legacylockmgr::getExecuteReadLock();
5646  auto& catalog = session.getCatalog();
5647  const auto td_with_lock =
5649  catalog, *table_, false);
5650  const auto td = td_with_lock();
5651  if (!td) {
5652  throw std::runtime_error("Table " + *table_ + " does not exist.");
5653  }
5654  if (td->isView) {
5655  throw std::runtime_error("Setting parameters for a view is not supported.");
5656  }
5657  if (table_is_temporary(td)) {
5658  throw std::runtime_error(
5659  "Setting parameters for a temporary table is not yet supported.");
5660  }
5661  check_alter_table_privilege(session, td);
5662 
5663  // invalidate cached item
5664  Executor::clearExternalCaches(true, td, catalog.getDatabaseId());
5665 
5666  std::string param_name(*param_->get_name());
5667  boost::algorithm::to_lower(param_name);
5668  const IntLiteral* val_int_literal =
5669  dynamic_cast<const IntLiteral*>(param_->get_value());
5670  if (val_int_literal == nullptr) {
5671  throw std::runtime_error("Table parameters should be integers.");
5672  }
5673  const int64_t param_val = val_int_literal->get_intval();
5674 
5675  const auto param_it = param_map.find(param_name);
5676  if (param_it == param_map.end()) {
5677  throw std::runtime_error(param_name + " is not a settable table parameter.");
5678  }
5679  switch (param_it->second) {
5680  case MaxRollbackEpochs: {
5681  catalog.setMaxRollbackEpochs(td->tableId, param_val);
5682  break;
5683  }
5684  case Epoch: {
5685  catalog.setTableEpoch(catalog.getDatabaseId(), td->tableId, param_val);
5686  break;
5687  }
5688  case MaxRows: {
5689  catalog.setMaxRows(td->tableId, param_val);
5690  break;
5691  }
5692  default: {
5693  UNREACHABLE() << "Unexpected TableParamType value: " << param_it->second
5694  << ", key: " << param_it->first;
5695  }
5696  }
5697 }
5698 
5700  std::string* f,
5701  std::list<NameValueAssign*>* o)
5702  : table_(t), copy_from_source_pattern_(f), success_(true) {
5703  if (o) {
5704  for (const auto e : *o) {
5705  options_.emplace_back(e);
5706  }
5707  delete o;
5708  }
5709 }
5710 
5711 CopyTableStmt::CopyTableStmt(const rapidjson::Value& payload) : success_(true) {
5712  CHECK(payload.HasMember("table"));
5713  table_ = std::make_unique<std::string>(json_str(payload["table"]));
5714 
5715  CHECK(payload.HasMember("filePath"));
5716  std::string fs = json_str(payload["filePath"]);
5717  // strip leading/trailing spaces/quotes/single quotes
5718  boost::algorithm::trim_if(fs, boost::is_any_of(" \"'`"));
5719  copy_from_source_pattern_ = std::make_unique<std::string>(fs);
5720 
5721  parse_options(payload, options_);
5722 }
5723 
5725  bool read_only_mode) {
5726  if (read_only_mode) {
5727  throw std::runtime_error("IMPORT invalid in read only mode.");
5728  }
5729  auto importer_factory = [](Catalog_Namespace::Catalog& catalog,
5730  const TableDescriptor* td,
5731  const std::string& copy_from_source,
5732  const import_export::CopyParams& copy_params)
5733  -> std::unique_ptr<import_export::AbstractImporter> {
5734  return import_export::create_importer(catalog, td, copy_from_source, copy_params);
5735  };
5736  return execute(session, read_only_mode, importer_factory);
5737 }
5738 
5740  const Catalog_Namespace::SessionInfo& session,
5741  bool read_only_mode,
5742  const std::function<std::unique_ptr<import_export::AbstractImporter>(
5744  const TableDescriptor*,
5745  const std::string&,
5746  const import_export::CopyParams&)>& importer_factory) {
5747  if (read_only_mode) {
5748  throw std::runtime_error("COPY FROM invalid in read only mode.");
5749  }
5750 
5751  size_t total_time = 0;
5752 
5753  // Prevent simultaneous import / truncate (see TruncateTableStmt::execute)
5754  const auto execute_read_lock = legacylockmgr::getExecuteReadLock();
5755 
5756  const TableDescriptor* td{nullptr};
5757  std::unique_ptr<lockmgr::TableSchemaLockContainer<lockmgr::ReadLock>> td_with_lock;
5758  std::unique_ptr<lockmgr::WriteLock> insert_data_lock;
5759 
5760  auto& catalog = session.getCatalog();
5761 
5762  try {
5763  td_with_lock = std::make_unique<lockmgr::TableSchemaLockContainer<lockmgr::ReadLock>>(
5765  catalog, *table_));
5766  td = (*td_with_lock)();
5767  insert_data_lock = std::make_unique<lockmgr::WriteLock>(
5769  } catch (const std::runtime_error& e) {
5770  // noop
5771  // TODO(adb): We're really only interested in whether the table exists or not.
5772  // Create a more refined exception.
5773  }
5774 
5775  // if the table already exists, it's locked, so check access privileges
5776  if (td) {
5777  std::vector<DBObject> privObjects;
5778  DBObject dbObject(*table_, TableDBObjectType);
5779  dbObject.loadKey(catalog);
5781  privObjects.push_back(dbObject);
5782  if (!SysCatalog::instance().checkPrivileges(session.get_currentUser(), privObjects)) {
5783  throw std::runtime_error("Violation of access privileges: user " +
5784  session.get_currentUser().userLoggable() +
5785  " has no insert privileges for table " + *table_ + ".");
5786  }
5787 
5788  // invalidate cached item
5789  Executor::clearExternalCaches(true, td, catalog.getCurrentDB().dbId);
5790  }
5791 
5792  import_export::CopyParams copy_params;
5793  std::vector<std::string> warnings;
5795 
5796  boost::regex non_local_file_regex{R"(^\s*(s3|http|https)://.+)",
5797  boost::regex::extended | boost::regex::icase};
5798  if (!boost::regex_match(*copy_from_source_pattern_, non_local_file_regex) &&
5802  }
5803  // since we'll have not only posix file names but also s3/hdfs/... url
5804  // we do not expand wildcard or check file existence here.
5805  // from here on, copy_from_source contains something which may be a url
5806  // a wildcard of file names, or a sql select statement;
5807  std::string copy_from_source = *copy_from_source_pattern_;
5808 
5809  if (copy_params.source_type == import_export::SourceType::kOdbc) {
5810  copy_params.sql_select = copy_from_source;
5811  if (copy_params.sql_order_by.empty()) {
5812  throw std::runtime_error(
5813  "Option \"SQL ORDER BY\" must be specified when copying from an ODBC source.");
5814  }
5815  }
5816 
5817  std::string tr;
5818 
5819  for (auto const& warning : warnings) {
5820  tr += warning + "\n";
5821  }
5822 
5823  if (copy_params.source_type == import_export::SourceType::kGeoFile ||
5825  // geo import
5826  // we do nothing here, except stash the parameters so we can
5827  // do the import when we unwind to the top of the handler
5828  deferred_copy_from_file_name_ = copy_from_source;
5829  deferred_copy_from_copy_params_ = copy_params;
5830  was_deferred_copy_from_ = true;
5831 
5832  // the result string
5833  // @TODO simon.eves put something more useful in here
5834  // except we really can't because we haven't done the import yet!
5835  if (td) {
5836  tr += std::string("Appending geo to table '") + *table_ + std::string("'...");
5837  } else {
5838  tr += std::string("Creating table '") + *table_ +
5839  std::string("' and importing geo...");
5840  }
5841  } else {
5842  if (td) {
5843  CHECK(td_with_lock);
5844 
5845  // regular import
5846  auto importer = importer_factory(catalog, td, copy_from_source, copy_params);
5847  auto start_time = ::toString(std::chrono::system_clock::now());
5849  auto query_session = session.get_session_id();
5850  auto query_str = "COPYING " + td->tableName;
5852  executor->enrollQuerySession(query_session,
5853  query_str,
5854  start_time,
5856  QuerySessionStatus::QueryStatus::RUNNING_IMPORTER);
5857  }
5858 
5859  ScopeGuard clearInterruptStatus =
5860  [executor, &query_str, &query_session, &start_time, &importer] {
5861  // reset the runtime query interrupt status
5863  executor->clearQuerySessionStatus(query_session, start_time);
5864  }
5865  };
5866  import_export::ImportStatus import_result;
5867  auto ms =
5868  measure<>::execution([&]() { import_result = importer->import(&session); });
5869  total_time += ms;
5870  // results
5871  if (!import_result.load_failed &&
5872  import_result.rows_rejected > copy_params.max_reject) {
5873  LOG(ERROR) << "COPY exited early due to reject records count during multi file "
5874  "processing ";
5875  // if we have crossed the truncated load threshold
5876  import_result.load_failed = true;
5877  import_result.load_msg =
5878  "COPY exited early due to reject records count during multi file "
5879  "processing ";
5880  success_ = false;
5881  }
5882  if (!import_result.load_failed) {
5883  tr += std::string(
5884  "Loaded: " + std::to_string(import_result.rows_completed) +
5885  " recs, Rejected: " + std::to_string(import_result.rows_rejected) +
5886  " recs in " + std::to_string((double)total_time / 1000.0) + " secs");
5887  } else {
5888  tr += std::string("Loader Failed due to : " + import_result.load_msg + " in " +
5889  std::to_string((double)total_time / 1000.0) + " secs");
5890  }
5891  } else {
5892  throw std::runtime_error("Table '" + *table_ + "' must exist before COPY FROM");
5893  }
5894  }
5895  return_message.reset(new std::string(tr));
5896  LOG(INFO) << tr;
5897 }
5898 
5899 // CREATE ROLE payroll_dept_role;
5900 CreateRoleStmt::CreateRoleStmt(const rapidjson::Value& payload) {
5901  CHECK(payload.HasMember("role"));
5902  role_ = std::make_unique<std::string>(json_str(payload["role"]));
5903 }
5904 
5906  bool read_only_mode) {
5907  if (read_only_mode) {
5908  throw std::runtime_error("CREATE ROLE invalid in read only mode.");
5909  }
5910  const auto& currentUser = session.get_currentUser();
5911  if (!currentUser.isSuper) {
5912  throw std::runtime_error("CREATE ROLE " + get_role() +
5913  " failed. It can only be executed by super user.");
5914  }
5915  SysCatalog::instance().createRole(
5916  get_role(), /*user_private_role=*/false, /*is_temporary=*/false);
5917 }
5918 
5919 // DROP ROLE payroll_dept_role;
5920 DropRoleStmt::DropRoleStmt(const rapidjson::Value& payload) {
5921  CHECK(payload.HasMember("role"));
5922  role_ = std::make_unique<std::string>(json_str(payload["role"]));
5923 
5924  if_exists_ = false;
5925  if (payload.HasMember("ifExists")) {
5926  if_exists_ = json_bool(payload["ifExists"]);
5927  }
5928 }
5929 
5931  bool read_only_mode) {
5932  if (read_only_mode) {
5933  throw std::runtime_error("DROP ROLE invalid in read only mode.");
5934  }
5935  const auto& currentUser = session.get_currentUser();
5936  if (!currentUser.isSuper) {
5937  throw std::runtime_error("DROP ROLE " + get_role() +
5938  " failed. It can only be executed by super user.");
5939  }
5940  auto* rl = SysCatalog::instance().getRoleGrantee(get_role());
5941  if (rl) {
5942  SysCatalog::instance().dropRole(get_role(), /*is_temporary=*/false);
5943  } else if (!if_exists_) {
5944  throw std::runtime_error("DROP ROLE " + get_role() +
5945  " failed because role with this name does not exist.");
5946  }
5947 }
5948 
5949 std::vector<std::string> splitObjectHierName(const std::string& hierName) {
5950  std::vector<std::string> componentNames;
5951  boost::split(componentNames, hierName, boost::is_any_of("."));
5952  return componentNames;
5953 }
5954 
5955 std::string extractObjectNameFromHierName(const std::string& objectHierName,
5956  const std::string& objectType,
5958  std::string objectName;
5959  std::vector<std::string> componentNames = splitObjectHierName(objectHierName);
5960  if (objectType.compare("DATABASE") == 0) {
5961  if (componentNames.size() == 1) {
5962  objectName = componentNames[0];
5963  } else {
5964  throw std::runtime_error("DB object name is not correct " + objectHierName);
5965  }
5966  } else {
5967  if (objectType.compare("TABLE") == 0 || objectType.compare("DASHBOARD") == 0 ||
5968  objectType.compare("VIEW") == 0 || objectType.compare("SERVER") == 0) {
5969  switch (componentNames.size()) {
5970  case (1): {
5971  objectName = componentNames[0];
5972  break;
5973  }
5974  case (2): {
5975  objectName = componentNames[1];
5976  break;
5977  }
5978  default: {
5979  throw std::runtime_error("DB object name is not correct " + objectHierName);
5980  }
5981  }
5982  } else {
5983  throw std::runtime_error("DB object type " + objectType + " is not supported.");
5984  }
5985  }
5986  return objectName;
5987 }
5988 
5989 static std::pair<AccessPrivileges, DBObjectType> parseStringPrivs(
5990  const std::string& privs,
5991  const DBObjectType& objectType,
5992  const std::string& object_name) {
5993  static const std::map<std::pair<const std::string, const DBObjectType>,
5994  std::pair<const AccessPrivileges, const DBObjectType>>
5995  privileges_lookup{
5996  {{"ALL"s, DatabaseDBObjectType},
5999  {{"ALL"s, DashboardDBObjectType},
6002  {{"ALL"s, ServerDBObjectType},
6004 
6005  {{"CREATE TABLE"s, DatabaseDBObjectType},
6007  {{"CREATE"s, DatabaseDBObjectType},
6009  {{"SELECT"s, DatabaseDBObjectType},
6011  {{"INSERT"s, DatabaseDBObjectType},
6013  {{"TRUNCATE"s, DatabaseDBObjectType},
6015  {{"UPDATE"s, DatabaseDBObjectType},
6017  {{"DELETE"s, DatabaseDBObjectType},
6019  {{"DROP"s, DatabaseDBObjectType},
6021  {{"ALTER"s, DatabaseDBObjectType},
6023 
6024  {{"SELECT"s, TableDBObjectType},
6026  {{"INSERT"s, TableDBObjectType},
6028  {{"TRUNCATE"s, TableDBObjectType},
6030  {{"UPDATE"s, TableDBObjectType},
6032  {{"DELETE"s, TableDBObjectType},
6034  {{"DROP"s, TableDBObjectType},
6036  {{"ALTER"s, TableDBObjectType},
6038 
6039  {{"CREATE VIEW"s, DatabaseDBObjectType},
6041  {{"SELECT VIEW"s, DatabaseDBObjectType},
6043  {{"DROP VIEW"s, DatabaseDBObjectType},
6045  {{"SELECT"s, ViewDBObjectType},
6048 
6049  {{"CREATE DASHBOARD"s, DatabaseDBObjectType},
6051  {{"EDIT DASHBOARD"s, DatabaseDBObjectType},
6053  {{"VIEW DASHBOARD"s, DatabaseDBObjectType},
6055  {{"DELETE DASHBOARD"s, DatabaseDBObjectType},
6057  {{"VIEW"s, DashboardDBObjectType},
6059  {{"EDIT"s, DashboardDBObjectType},
6061  {{"DELETE"s, DashboardDBObjectType},
6063 
6064  {{"CREATE SERVER"s, DatabaseDBObjectType},
6066  {{"DROP SERVER"s, DatabaseDBObjectType},
6068  {{"DROP"s, ServerDBObjectType},
6070  {{"ALTER SERVER"s, DatabaseDBObjectType},
6072  {{"ALTER"s, ServerDBObjectType},
6074  {{"USAGE"s, ServerDBObjectType},
6076  {{"SERVER USAGE"s, DatabaseDBObjectType},
6078 
6079  {{"VIEW SQL EDITOR"s, DatabaseDBObjectType},
6081  {{"ACCESS"s, DatabaseDBObjectType},
6083 
6084  auto result = privileges_lookup.find(std::make_pair(privs, objectType));
6085  if (result == privileges_lookup.end()) {
6086  throw std::runtime_error("Privileges " + privs + " on DB object " + object_name +
6087  " are not correct.");
6088  }
6089  return result->second;
6090 }
6091 
6092 static DBObject createObject(const std::string& objectName, DBObjectType objectType) {
6093  if (objectType == DashboardDBObjectType) {
6094  int32_t dashboard_id = -1;
6095  if (!objectName.empty()) {
6096  try {
6097  dashboard_id = stoi(objectName);
6098  } catch (const std::exception&) {
6099  throw std::runtime_error(
6100  "Privileges on dashboards should be changed via integer dashboard ID");
6101  }
6102  }
6103  return DBObject(dashboard_id, objectType);
6104  } else {
6105  return DBObject(objectName, objectType);
6106  }
6107 }
6108 
6109 // Pre-execution PRIVILEGE failure conditions that cannot be detected elsewhere
6110 // For types: Table, View, Database, Server, Dashboard
6111 static void verifyObject(Catalog_Namespace::Catalog& sessionCatalog,
6112  const std::string& objectName,
6113  DBObjectType objectType,
6114  const std::string& command) {
6115  if (objectType == TableDBObjectType) {
6116  auto td = sessionCatalog.getMetadataForTable(objectName, false);
6117  if (!td || td->isView) {
6118  // expected TABLE, found VIEW
6119  throw std::runtime_error(command + " failed. Object '" + objectName + "' of type " +
6120  DBObjectTypeToString(objectType) + " not found.");
6121  }
6122 
6123  } else if (objectType == ViewDBObjectType) {
6124  auto td = sessionCatalog.getMetadataForTable(objectName, false);
6125  if (!td || !td->isView) {
6126  // expected VIEW, found TABLE
6127  throw std::runtime_error(command + " failed. Object '" + objectName + "' of type " +
6128  DBObjectTypeToString(objectType) + " not found.");
6129  }
6130  }
6131 }
6132 
6133 // GRANT SELECT/INSERT/CREATE ON TABLE payroll_table TO payroll_dept_role;
6134 GrantPrivilegesStmt::GrantPrivilegesStmt(const rapidjson::Value& payload) {
6135  CHECK(payload.HasMember("type"));
6136  type_ = std::make_unique<std::string>(json_str(payload["type"]));
6137 
6138  CHECK(payload.HasMember("target"));
6139  target_ = std::make_unique<std::string>(json_str(payload["target"]));
6140 
6141  if (payload.HasMember("privileges")) {
6142  CHECK(payload["privileges"].IsArray());
6143  for (auto& privilege : payload["privileges"].GetArray()) {
6144  auto r = json_str(privilege);
6145  // privilege was a StringLiteral
6146  // and is wrapped with quotes which need to get removed
6147  boost::algorithm::trim_if(r, boost::is_any_of(" \"'`"));
6148  privileges_.emplace_back(r);
6149  }
6150  }
6151  if (payload.HasMember("grantees")) {
6152  CHECK(payload["grantees"].IsArray());
6153  for (auto& grantee : payload["grantees"].GetArray()) {
6154  std::string g = json_str(grantee);
6155  grantees_.emplace_back(g);
6156  }
6157  }
6158 }
6159 
6161  bool read_only_mode) {
6162  if (read_only_mode) {
6163  throw std::runtime_error("GRANT invalid in read only mode.");
6164  }
6165  auto& catalog = session.getCatalog();
6166  const auto& currentUser = session.get_currentUser();
6167  const auto parserObjectType = boost::to_upper_copy<std::string>(get_object_type());
6168  const auto objectName =
6169  extractObjectNameFromHierName(get_object(), parserObjectType, catalog);
6170  auto objectType = DBObjectTypeFromString(parserObjectType);
6171  if (objectType == ServerDBObjectType && !g_enable_fsi) {
6172  throw std::runtime_error("GRANT failed. SERVER object unrecognized.");
6173  }
6174  /* verify object exists and is of proper type *before* trying to execute */
6175  verifyObject(catalog, objectName, objectType, "GRANT");
6176 
6177  DBObject dbObject = createObject(objectName, objectType);
6178  /* verify object ownership if not suser */
6179  if (!currentUser.isSuper) {
6180  if (!SysCatalog::instance().verifyDBObjectOwnership(currentUser, dbObject, catalog)) {
6181  throw std::runtime_error(
6182  "GRANT failed. It can only be executed by super user or owner of the "
6183  "object.");
6184  }
6185  }
6186  /* set proper values of privileges & grant them to the object */
6187  std::vector<DBObject> objects(get_privs().size(), dbObject);
6188  for (size_t i = 0; i < get_privs().size(); ++i) {
6189  std::pair<AccessPrivileges, DBObjectType> priv = parseStringPrivs(
6190  boost::to_upper_copy<std::string>(get_privs()[i]), objectType, get_object());
6191  objects[i].setPrivileges(priv.first);
6192  objects[i].setPermissionType(priv.second);
6193  if (priv.second == ServerDBObjectType && !g_enable_fsi) {
6194  throw std::runtime_error("GRANT failed. SERVER object unrecognized.");
6195  }
6196  }
6197  SysCatalog::instance().grantDBObjectPrivilegesBatch(grantees_, objects, catalog);
6198 }
6199 
6200 // REVOKE SELECT/INSERT/CREATE ON TABLE payroll_table FROM payroll_dept_role;
6201 RevokePrivilegesStmt::RevokePrivilegesStmt(const rapidjson::Value& payload) {
6202  CHECK(payload.HasMember("type"));
6203  type_ = std::make_unique<std::string>(json_str(payload["type"]));
6204 
6205  CHECK(payload.HasMember("target"));
6206  target_ = std::make_unique<std::string>(json_str(payload["target"]));
6207 
6208  if (payload.HasMember("privileges")) {
6209  CHECK(payload["privileges"].IsArray());
6210  for (auto& privilege : payload["privileges"].GetArray()) {
6211  auto r = json_str(privilege);
6212  // privilege was a StringLiteral
6213  // and is wrapped with quotes which need to get removed
6214  boost::algorithm::trim_if(r, boost::is_any_of(" \"'`"));
6215  privileges_.emplace_back(r);
6216  }
6217  }
6218  if (payload.HasMember("grantees")) {
6219  CHECK(payload["grantees"].IsArray());
6220  for (auto& grantee : payload["grantees"].GetArray()) {
6221  std::string g = json_str(grantee);
6222  grantees_.emplace_back(g);
6223  }
6224  }
6225 }
6226 
6228  bool read_only_mode) {
6229  if (read_only_mode) {
6230  throw std::runtime_error("REVOKE invalid in read only mode.");
6231  }
6232  auto& catalog = session.getCatalog();
6233  const auto& currentUser = session.get_currentUser();
6234  const auto parserObjectType = boost::to_upper_copy<std::string>(get_object_type());
6235  const auto objectName =
6236  extractObjectNameFromHierName(get_object(), parserObjectType, catalog);
6237  auto objectType = DBObjectTypeFromString(parserObjectType);
6238  if (objectType == ServerDBObjectType && !g_enable_fsi) {
6239  throw std::runtime_error("REVOKE failed. SERVER object unrecognized.");
6240  }
6241  /* verify object exists and is of proper type *before* trying to execute */
6242  verifyObject(catalog, objectName, objectType, "REVOKE");
6243 
6244  DBObject dbObject = createObject(objectName, objectType);
6245  /* verify object ownership if not suser */
6246  if (!currentUser.isSuper) {
6247  if (!SysCatalog::instance().verifyDBObjectOwnership(currentUser, dbObject, catalog)) {
6248  throw std::runtime_error(
6249  "REVOKE failed. It can only be executed by super user or owner of the "
6250  "object.");
6251  }
6252  }
6253  /* set proper values of privileges & grant them to the object */
6254  std::vector<DBObject> objects(get_privs().size(), dbObject);
6255  for (size_t i = 0; i < get_privs().size(); ++i) {
6256  std::pair<AccessPrivileges, DBObjectType> priv = parseStringPrivs(
6257  boost::to_upper_copy<std::string>(get_privs()[i]), objectType, get_object());
6258  objects[i].setPrivileges(priv.first);
6259  objects[i].setPermissionType(priv.second);
6260  if (priv.second == ServerDBObjectType && !g_enable_fsi) {
6261  throw std::runtime_error("REVOKE failed. SERVER object unrecognized.");
6262  }
6263  }
6264  SysCatalog::instance().revokeDBObjectPrivilegesBatch(grantees_, objects, catalog);
6265 }
6266 
6267 // NOTE: not used currently, will we ever use it?
6268 // SHOW ON TABLE payroll_table FOR payroll_dept_role;
6270  bool read_only_mode) {
6271  // valid in read_only_mode
6272 
6273  auto& catalog = session.getCatalog();
6274  const auto& currentUser = session.get_currentUser();
6275  const auto parserObjectType = boost::to_upper_copy<std::string>(get_object_type());
6276  const auto objectName =
6277  extractObjectNameFromHierName(get_object(), parserObjectType, catalog);
6278  auto objectType = DBObjectTypeFromString(parserObjectType);
6279  /* verify object exists and is of proper type *before* trying to execute */
6280  verifyObject(catalog, objectName, objectType, "SHOW");
6281 
6282  DBObject dbObject = createObject(objectName, objectType);
6283  /* verify object ownership if not suser */
6284  if (!currentUser.isSuper) {
6285  if (!SysCatalog::instance().verifyDBObjectOwnership(currentUser, dbObject, catalog)) {
6286  throw std::runtime_error(
6287  "SHOW ON " + get_object() + " FOR " + get_role() +
6288  " failed. It can only be executed by super user or owner of the object.");
6289  }
6290  }
6291  /* get values of privileges for the object and report them */
6292  SysCatalog::instance().getDBObjectPrivileges(get_role(), dbObject, catalog);
6293  AccessPrivileges privs = dbObject.getPrivileges();
6294  printf("\nPRIVILEGES ON %s FOR %s ARE SET AS FOLLOWING: ",
6295  get_object().c_str(),
6296  get_role().c_str());
6297 
6298  if (objectType == DBObjectType::DatabaseDBObjectType) {
6300  printf(" CREATE");
6301  }
6303  printf(" DROP");
6304  }
6305  } else if (objectType == DBObjectType::TableDBObjectType) {
6307  printf(" CREATE");
6308  }
6310  printf(" DROP");
6311  }
6313  printf(" SELECT");
6314  }
6316  printf(" INSERT");
6317  }
6319  printf(" UPDATE");
6320  }
6322  printf(" DELETE");
6323  }
6325  printf(" TRUNCATE");
6326  }
6328  printf(" ALTER");
6329  }
6330  } else if (objectType == DBObjectType::DashboardDBObjectType) {
6332  printf(" CREATE");
6333  }
6335  printf(" DELETE");
6336  }
6338  printf(" VIEW");
6339  }
6341  printf(" EDIT");
6342  }
6343  } else if (objectType == DBObjectType::ViewDBObjectType) {
6345  printf(" CREATE");
6346  }
6348  printf(" DROP");
6349  }
6351  printf(" SELECT");
6352  }
6354  printf(" INSERT");
6355  }
6357  printf(" UPDATE");
6358  }
6360  printf(" DELETE");
6361  }
6362  }
6363  printf(".\n");
6364 }
6365 
6366 // GRANT payroll_dept_role TO joe;
6367 GrantRoleStmt::GrantRoleStmt(const rapidjson::Value& payload) {
6368  if (payload.HasMember("roles")) {
6369  CHECK(payload["roles"].IsArray());
6370  for (auto& role : payload["roles"].GetArray()) {
6371  std::string r = json_str(role);
6372  roles_.emplace_back(r);
6373  }
6374  }
6375  if (payload.HasMember("grantees")) {
6376  CHECK(payload["grantees"].IsArray());
6377  for (auto& grantee : payload["grantees"].GetArray()) {
6378  std::string g = json_str(grantee);
6379  grantees_.emplace_back(g);
6380  }
6381  }
6382 }
6383 
6385  bool read_only_mode) {
6386  if (read_only_mode) {
6387  throw std::runtime_error("GRANT ROLE invalid in read only mode.");
6388  }
6389  const auto& currentUser = session.get_currentUser();
6390  if (!currentUser.isSuper) {
6391  throw std::runtime_error(
6392  "GRANT failed, because it can only be executed by super user.");
6393  }
6394  if (std::find(get_grantees().begin(), get_grantees().end(), shared::kRootUsername) !=
6395  get_grantees().end()) {
6396  throw std::runtime_error(
6397  "Request to grant role failed because mapd root user has all privileges by "
6398  "default.");
6399  }
6400  SysCatalog::instance().grantRoleBatch(get_roles(), get_grantees());
6401 }
6402 
6403 // REVOKE payroll_dept_role FROM joe;
6404 RevokeRoleStmt::RevokeRoleStmt(const rapidjson::Value& payload) {
6405  if (payload.HasMember("roles")) {
6406  CHECK(payload["roles"].IsArray());
6407  for (auto& role : payload["roles"].GetArray()) {
6408  std::string r = json_str(role);
6409  roles_.emplace_back(r);
6410  }
6411  }
6412  if (payload.HasMember("grantees")) {
6413  CHECK(payload["grantees"].IsArray());
6414  for (auto& grantee : payload["grantees"].GetArray()) {
6415  std::string g = json_str(grantee);
6416  grantees_.emplace_back(g);
6417  }
6418  }
6419 }
6420 
6422  bool read_only_mode) {
6423  if (read_only_mode) {
6424  throw std::runtime_error("REVOKE ROLE invalid in read only mode.");
6425  }
6426  const auto& currentUser = session.get_currentUser();
6427  if (!currentUser.isSuper) {
6428  throw std::runtime_error(
6429  "REVOKE failed, because it can only be executed by super user.");
6430  }
6431  if (std::find(get_grantees().begin(), get_grantees().end(), shared::kRootUsername) !=
6432  get_grantees().end()) {
6433  throw std::runtime_error(
6434  "Request to revoke role failed because privileges can not be revoked from "
6435  "mapd root user.");
6436  }
6437  SysCatalog::instance().revokeRoleBatch(get_roles(), get_grantees());
6438 }
6439 
6440 ExportQueryStmt::ExportQueryStmt(const rapidjson::Value& payload) {
6441  CHECK(payload.HasMember("filePath"));
6442  file_path_ = std::make_unique<std::string>(json_str(payload["filePath"]));
6443 
6444  CHECK(payload.HasMember("query"));
6445  select_stmt_ = std::make_unique<std::string>(json_str(payload["query"]));
6446 
6447  if ((*select_stmt_).back() != ';') {
6448  (*select_stmt_).push_back(';');
6449  }
6450  // Export wrapped everything with ` quotes which need cleanup
6451  boost::replace_all((*select_stmt_), "`", "");
6452 
6453  parse_options(payload, options_);
6454 }
6455 
6457  bool read_only_mode) {
6458  // valid in read_only_mode
6459  auto session_copy = session;
6460  auto session_ptr = std::shared_ptr<Catalog_Namespace::SessionInfo>(
6461  &session_copy, boost::null_deleter());
6462  auto query_state = query_state::QueryState::create(session_ptr, *select_stmt_);
6463  auto stdlog = STDLOG(query_state);
6464  auto query_state_proxy = query_state->createQueryStateProxy();
6465 
6466  if (!leafs_connector_) {
6467  leafs_connector_ = std::make_unique<LocalQueryConnector>();
6468  }
6469 
6470  import_export::CopyParams copy_params;
6471  // @TODO(se) move rest to CopyParams when we have a Thrift endpoint
6474  std::string layer_name;
6479 
6480  parseOptions(copy_params, file_type, layer_name, file_compression, array_null_handling);
6481 
6482  if (file_path_->empty()) {
6483  throw std::runtime_error("Invalid file path for COPY TO");
6484  } else if (!boost::filesystem::path(*file_path_).is_absolute()) {
6485  std::string file_name = boost::filesystem::path(*file_path_).filename().string();
6486  std::string file_dir = g_base_path + "/" + shared::kDefaultExportDirName + "/" +
6487  session.get_session_id() + "/";
6488  if (!boost::filesystem::exists(file_dir)) {
6489  if (!boost::filesystem::create_directories(file_dir)) {
6490  throw std::runtime_error("Directory " + file_dir + " cannot be created.");
6491  }
6492  }
6493  *file_path_ = file_dir + file_name;
6494  } else {
6495  // Above branch will create a new file in the export directory. If that
6496  // path is not exercised, go through applicable file path validations.
6499  }
6500 
6501  const auto execute_read_lock = legacylockmgr::getExecuteReadLock();
6502  auto locks = acquire_query_table_locks(
6503  session_ptr->getCatalog().name(), *select_stmt_, query_state_proxy);
6504 
6505  // get column info
6506  LocalQueryConnector local_connector;
6507  auto column_info_result =
6508  local_connector.query(query_state_proxy, *select_stmt_, {}, true, false);
6509 
6510  // create exporter for requested file type
6511  auto query_exporter = import_export::QueryExporter::create(file_type);
6512 
6513  // default layer name to file path stem if it wasn't specified
6514  if (layer_name.size() == 0) {
6515  layer_name = boost::filesystem::path(*file_path_).stem().string();
6516  }
6517 
6518  // begin export
6519  query_exporter->beginExport(*file_path_,
6520  layer_name,
6521  copy_params,
6522  column_info_result.targets_meta,
6523  file_compression,
6524  array_null_handling);
6525 
6526  // how many fragments?
6527  size_t outer_frag_count =
6528  leafs_connector_->getOuterFragmentCount(query_state_proxy, *select_stmt_);
6529  size_t outer_frag_end = outer_frag_count == 0 ? 1 : outer_frag_count;
6530 
6531  // loop fragments
6532  for (size_t outer_frag_idx = 0; outer_frag_idx < outer_frag_end; outer_frag_idx++) {
6533  // limit the query to just this fragment
6534  std::vector<size_t> allowed_outer_fragment_indices;
6535  if (outer_frag_count) {
6536  allowed_outer_fragment_indices.push_back(outer_frag_idx);
6537  }
6538 
6539  // run the query
6540  std::vector<AggregatedResult> query_results = leafs_connector_->query(
6541  query_state_proxy, *select_stmt_, allowed_outer_fragment_indices, false);
6542 
6543  // export the results
6544  query_exporter->exportResults(query_results);
6545  }
6546 
6547  // end export
6548  query_exporter->endExport();
6549 }
6550 
6552  import_export::CopyParams& copy_params,
6554  std::string& layer_name,
6556  import_export::QueryExporter::ArrayNullHandling& array_null_handling) {
6557  // defaults for non-CopyParams values
6559  layer_name.clear();
6561 
6562  if (!options_.empty()) {
6563  for (auto& p : options_) {
6564  if (boost::iequals(*p->get_name(), "delimiter")) {
6565  const StringLiteral* str_literal =
6566  dynamic_cast<const StringLiteral*>(p->get_value());
6567  if (str_literal == nullptr) {
6568  throw std::runtime_error("Delimiter option must be a string.");
6569  } else if (str_literal->get_stringval()->length() != 1) {
6570  throw std::runtime_error("Delimiter must be a single character string.");
6571  }
6572  copy_params.delimiter = (*str_literal->get_stringval())[0];
6573  } else if (boost::iequals(*p->get_name(), "nulls")) {
6574  const StringLiteral* str_literal =
6575  dynamic_cast<const StringLiteral*>(p->get_value());
6576  if (str_literal == nullptr) {
6577  throw std::runtime_error("Nulls option must be a string.");
6578  }
6579  copy_params.null_str = *str_literal->get_stringval();
6580  } else if (boost::iequals(*p->get_name(), "header")) {
6581  const StringLiteral* str_literal =
6582  dynamic_cast<const StringLiteral*>(p->get_value());
6583  if (str_literal == nullptr) {
6584  throw std::runtime_error("Header option must be a boolean.");
6585  }
6586  copy_params.has_header = bool_from_string_literal(str_literal)
6589  } else if (boost::iequals(*p->get_name(), "quote")) {
6590  const StringLiteral* str_literal =
6591  dynamic_cast<const StringLiteral*>(p->get_value());
6592  if (str_literal == nullptr) {
6593  throw std::runtime_error("Quote option must be a string.");
6594  } else if (str_literal->get_stringval()->length() != 1) {
6595  throw std::runtime_error("Quote must be a single character string.");
6596  }
6597  copy_params.quote = (*str_literal->get_stringval())[0];
6598  } else if (boost::iequals(*p->get_name(), "escape")) {
6599  const StringLiteral* str_literal =
6600  dynamic_cast<const StringLiteral*>(p->get_value());
6601  if (str_literal == nullptr) {
6602  throw std::runtime_error("Escape option must be a string.");
6603  } else if (str_literal->get_stringval()->length() != 1) {
6604  throw std::runtime_error("Escape must be a single character string.");
6605  }
6606  copy_params.escape = (*str_literal->get_stringval())[0];
6607  } else if (boost::iequals(*p->get_name(), "line_delimiter")) {
6608  const StringLiteral* str_literal =
6609  dynamic_cast<const StringLiteral*>(p->get_value());
6610  if (str_literal == nullptr) {
6611  throw std::runtime_error("Line_delimiter option must be a string.");
6612  } else if (str_literal->get_stringval()->length() != 1) {
6613  throw std::runtime_error("Line_delimiter must be a single character string.");
6614  }
6615  copy_params.line_delim = (*str_literal->get_stringval())[0];
6616  } else if (boost::iequals(*p->get_name(), "quoted")) {
6617  const StringLiteral* str_literal =
6618  dynamic_cast<const StringLiteral*>(p->get_value());
6619  if (str_literal == nullptr) {
6620  throw std::runtime_error("Quoted option must be a boolean.");
6621  }
6622  copy_params.quoted = bool_from_string_literal(str_literal);
6623  } else if (boost::iequals(*p->get_name(), "file_type")) {
6624  const StringLiteral* str_literal =
6625  dynamic_cast<const StringLiteral*>(p->get_value());
6626  if (str_literal == nullptr) {
6627  throw std::runtime_error("File Type option must be a string.");
6628  }
6629  auto file_type_str =
6630  boost::algorithm::to_lower_copy(*str_literal->get_stringval());
6631  if (file_type_str == "csv") {
6633  } else if (file_type_str == "geojson") {
6635  } else if (file_type_str == "geojsonl") {
6637  } else if (file_type_str == "shapefile") {
6639  } else if (file_type_str == "flatgeobuf") {
6641  } else {
6642  throw std::runtime_error(
6643  "File Type option must be 'CSV', 'GeoJSON', 'GeoJSONL', "
6644  "'Shapefile', or 'FlatGeobuf'");
6645  }
6646  } else if (boost::iequals(*p->get_name(), "layer_name")) {
6647  const StringLiteral* str_literal =
6648  dynamic_cast<const StringLiteral*>(p->get_value());
6649  if (str_literal == nullptr) {
6650  throw std::runtime_error("Layer Name option must be a string.");
6651  }
6652  layer_name = *str_literal->get_stringval();
6653  } else if (boost::iequals(*p->get_name(), "file_compression")) {
6654  const StringLiteral* str_literal =
6655  dynamic_cast<const StringLiteral*>(p->get_value());
6656  if (str_literal == nullptr) {
6657  throw std::runtime_error("File Compression option must be a string.");
6658  }
6659  auto file_compression_str =
6660  boost::algorithm::to_lower_copy(*str_literal->get_stringval());
6661  if (file_compression_str == "none") {
6663  } else if (file_compression_str == "gzip") {
6665  } else if (file_compression_str == "zip") {
6667  } else {
6668  throw std::runtime_error(
6669  "File Compression option must be 'None', 'GZip', or 'Zip'");
6670  }
6671  } else if (boost::iequals(*p->get_name(), "array_null_handling")) {
6672  const StringLiteral* str_literal =
6673  dynamic_cast<const StringLiteral*>(p->get_value());
6674  if (str_literal == nullptr) {
6675  throw std::runtime_error("Array Null Handling option must be a string.");
6676  }
6677  auto array_null_handling_str =
6678  boost::algorithm::to_lower_copy(*str_literal->get_stringval());
6679  if (array_null_handling_str == "abort") {
6680  array_null_handling =
6682  } else if (array_null_handling_str == "raw") {
6683  array_null_handling =
6685  } else if (array_null_handling_str == "zero") {
6686  array_null_handling =
6688  } else if (array_null_handling_str == "nullfield") {
6689  array_null_handling =
6691  } else {
6692  throw std::runtime_error(
6693  "Array Null Handling option must be 'Abort', 'Raw', 'Zero', or "
6694  "'NullField'");
6695  }
6696  } else {
6697  throw std::runtime_error("Invalid option for COPY: " + *p->get_name());
6698  }
6699  }
6700  }
6701 }
6702 
6703 CreateViewStmt::CreateViewStmt(const rapidjson::Value& payload) {
6704  CHECK(payload.HasMember("name"));
6705  view_name_ = json_str(payload["name"]);
6706 
6707  if_not_exists_ = false;
6708  if (payload.HasMember("ifNotExists")) {
6709  if_not_exists_ = json_bool(payload["ifNotExists"]);
6710  }
6711 
6712  CHECK(payload.HasMember("query"));
6713  select_query_ = json_str(payload["query"]);
6714  std::regex newline_re("\\n");
6715  select_query_ = std::regex_replace(select_query_, newline_re, " ");
6716  // ensure a trailing semicolon is present on the select query
6717  if (select_query_.back() != ';') {
6718  select_query_.push_back(';');
6719  }
6720 }
6721 
6723  bool read_only_mode) {
6724  if (read_only_mode) {
6725  throw std::runtime_error("CREATE VIEW invalid in read only mode.");
6726  }
6727  auto session_copy = session;
6728  auto session_ptr = std::shared_ptr<Catalog_Namespace::SessionInfo>(
6729  &session_copy, boost::null_deleter());
6730  auto query_state = query_state::QueryState::create(session_ptr, select_query_);
6731  auto stdlog = STDLOG(query_state);
6732  auto& catalog = session.getCatalog();
6733 
6734  if (!catalog.validateNonExistentTableOrView(view_name_, if_not_exists_)) {
6735  return;
6736  }
6739  throw std::runtime_error("View " + view_name_ +
6740  " will not be created. User has no create view privileges.");
6741  }
6742 
6743  const auto query_after_shim = pg_shim(select_query_);
6744  auto calcite_mgr = catalog.getCalciteMgr();
6745 
6746  // this now also ensures that access permissions are checked
6747  const auto calciteQueryParsingOption =
6748  calcite_mgr->getCalciteQueryParsingOption(true, false, true, false);
6749  const auto calciteOptimizationOption = calcite_mgr->getCalciteOptimizationOption(
6750  false, g_enable_watchdog, {}, SysCatalog::instance().isAggregator());
6751  calcite_mgr->process(query_state->createQueryStateProxy(),
6752  query_after_shim,
6753  calciteQueryParsingOption,
6754  calciteOptimizationOption);
6755 
6756  // Take write lock after the query is processed to ensure no deadlocks
6757  const auto execute_write_lock = legacylockmgr::getExecuteWriteLock();
6758 
6759  TableDescriptor td;
6760  td.tableName = view_name_;
6761  td.userId = session.get_currentUser().userId;
6762  td.nColumns = 0;
6763  td.isView = true;
6764  td.viewSQL = query_after_shim;
6765  td.fragmenter = nullptr;
6767  td.maxFragRows = DEFAULT_FRAGMENT_ROWS; // @todo this stuff should not be
6768  // InsertOrderFragmenter
6769  td.maxChunkSize = DEFAULT_MAX_CHUNK_SIZE; // @todo this stuff should not be
6770  // InsertOrderFragmenter
6773  catalog.createTable(td, {}, {}, true);
6774 
6775  // TODO (max): It's transactionally unsafe, should be fixed: we may create
6776  // object w/o privileges
6777  SysCatalog::instance().createDBObject(
6778  session.get_currentUser(), view_name_, ViewDBObjectType, catalog);
6779 }
6780 
6781 DropViewStmt::DropViewStmt(const rapidjson::Value& payload) {
6782  CHECK(payload.HasMember("viewName"));
6783  view_name_ = std::make_unique<std::string>(json_str(payload["viewName"]));
6784 
6785  if_exists_ = false;
6786  if (payload.HasMember("ifExists")) {
6787  if_exists_ = json_bool(payload["ifExists"]);
6788  }
6789 }
6790 
6792  bool read_only_mode) {
6793  if (read_only_mode) {
6794  throw std::runtime_error("DROP VIEW invalid in read only mode.");
6795  }
6796  auto& catalog = session.getCatalog();
6797 
6798  const auto execute_read_lock = legacylockmgr::getExecuteReadLock();
6799 
6800  const TableDescriptor* td{nullptr};
6801  std::unique_ptr<lockmgr::TableSchemaLockContainer<lockmgr::WriteLock>> td_with_lock;
6802 
6803  try {
6804  td_with_lock =
6805  std::make_unique<lockmgr::TableSchemaLockContainer<lockmgr::WriteLock>>(
6807  catalog, *view_name_, false));
6808  td = (*td_with_lock)();
6809  } catch (const std::runtime_error& e) {
6810  if (if_exists_) {
6811  return;
6812  } else {
6813  throw e;
6814  }
6815  }
6816 
6817  CHECK(td);
6818  CHECK(td_with_lock);
6819 
6820  if (!session.checkDBAccessPrivileges(
6822  throw std::runtime_error("View " + *view_name_ +
6823  " will not be dropped. User has no drop view privileges.");
6824  }
6825 
6827  catalog.dropTable(td);
6828 }
6829 
6830 static void checkStringLiteral(const std::string& option_name,
6831  const std::unique_ptr<NameValueAssign>& p) {
6832  CHECK(p);
6833  if (!dynamic_cast<const StringLiteral*>(p->get_value())) {
6834  throw std::runtime_error(option_name + " option must be a string literal.");
6835  }
6836 }
6837 
6838 CreateDBStmt::CreateDBStmt(const rapidjson::Value& payload) {
6839  CHECK(payload.HasMember("name"));
6840  db_name_ = std::make_unique<std::string>(json_str(payload["name"]));
6841 
6842  if_not_exists_ = false;
6843  if (payload.HasMember("ifNotExists")) {
6844  if_not_exists_ = json_bool(payload["ifNotExists"]);
6845  }
6846 
6847  parse_options(payload, options_);
6848 }
6849 
6851  bool read_only_mode) {
6852  if (read_only_mode) {
6853  throw std::runtime_error("CREATE DATABASE invalid in read only mode.");
6854  }
6855  if (!session.get_currentUser().isSuper) {
6856  throw std::runtime_error(
6857  "CREATE DATABASE command can only be executed by super user.");
6858  }
6859 
6860  const auto execute_write_lock = legacylockmgr::getExecuteWriteLock();
6861 
6863  if (SysCatalog::instance().getMetadataForDB(*db_name_, db_meta) && if_not_exists_) {
6864  return;
6865  }
6866  int ownerId = session.get_currentUser().userId;
6867  if (!options_.empty()) {
6868  for (auto& p : options_) {
6869  if (boost::iequals(*p->get_name(), "owner")) {
6870  checkStringLiteral("Owner name", p);
6871  const std::string* str =
6872  static_cast<const StringLiteral*>(p->get_value())->get_stringval();
6874  if (!SysCatalog::instance().getMetadataForUser(*str, user)) {
6875  throw std::runtime_error("User " + *str + " does not exist.");
6876  }
6877  ownerId = user.userId;
6878  } else {
6879  throw std::runtime_error("Invalid CREATE DATABASE option " + *p->get_name() +
6880  ". Only OWNER supported.");
6881  }
6882  }
6883  }
6884  SysCatalog::instance().createDatabase(*db_name_, ownerId);
6885 }
6886 
6887 DropDBStmt::DropDBStmt(const rapidjson::Value& payload) {
6888  CHECK(payload.HasMember("name"));
6889  db_name_ = std::make_unique<std::string>(json_str(payload["name"]));
6890 
6891  if_exists_ = false;
6892  if (payload.HasMember("ifExists")) {
6893  if_exists_ = json_bool(payload["ifExists"]);
6894  }
6895 }
6896 
6898  bool read_only_mode) {
6899  if (read_only_mode) {
6900  throw std::runtime_error("DROP DATABASE invalid in read only mode.");
6901  }
6902  const auto execute_write_lock = legacylockmgr::getExecuteWriteLock();
6903 
6905  if (!SysCatalog::instance().getMetadataForDB(*db_name_, db)) {
6906  if (if_exists_) {
6907  return;
6908  }
6909  throw std::runtime_error("Database " + *db_name_ + " does not exist.");
6910  }
6911 
6912  if (!session.get_currentUser().isSuper &&
6913  session.get_currentUser().userId != db.dbOwner) {
6914  throw std::runtime_error(
6915  "DROP DATABASE command can only be executed by the owner or by a super "
6916  "user.");
6917  }
6918 
6919  SysCatalog::instance().dropDatabase(db);
6920 }
6921 
6922 static bool readBooleanLiteral(const std::string& option_name,
6923  const std::unique_ptr<NameValueAssign>& p) {
6924  CHECK(p);
6925  const std::string* str =
6926  static_cast<const StringLiteral*>(p->get_value())->get_stringval();
6927  if (boost::iequals(*str, "true")) {
6928  return true;
6929  } else if (boost::iequals(*str, "false")) {
6930  return false;
6931  } else {
6932  throw std::runtime_error("Value to " + option_name + " must be TRUE or FALSE.");
6933  }
6934 }
6935 
6936 CreateUserStmt::CreateUserStmt(const rapidjson::Value& payload) {
6937  CHECK(payload.HasMember("name"));
6938  user_name_ = std::make_unique<std::string>(json_str(payload["name"]));
6939 
6940  parse_options(payload, options_);
6941 }
6942 
6944  bool read_only_mode) {
6945  if (read_only_mode) {
6946  throw std::runtime_error("CREATE USER invalid in read only mode.");
6947  }
6949  for (auto& p : options_) {
6950  if (boost::iequals(*p->get_name(), "password")) {
6951  checkStringLiteral("Password", p);
6952  alts.passwd = *static_cast<const StringLiteral*>(p->get_value())->get_stringval();
6953  } else if (boost::iequals(*p->get_name(), "is_super")) {
6954  checkStringLiteral("IS_SUPER", p);
6955  alts.is_super = readBooleanLiteral("IS_SUPER", p);
6956  } else if (boost::iequals(*p->get_name(), "default_db")) {
6957  checkStringLiteral("DEFAULT_DB", p);
6958  alts.default_db =
6959  *static_cast<const StringLiteral*>(p->get_value())->get_stringval();
6960  } else if (boost::iequals(*p->get_name(), "can_login")) {
6961  checkStringLiteral("CAN_LOGIN", p);
6962  alts.can_login = readBooleanLiteral("can_login", p);
6963  } else {
6964  throw std::runtime_error("Invalid CREATE USER option " + *p->get_name() +
6965  ". Should be PASSWORD, IS_SUPER, CAN_LOGIN"
6966  " or DEFAULT_DB.");
6967  }
6968  }
6969  if (!session.get_currentUser().isSuper) {
6970  throw std::runtime_error("Only super user can create new users.");
6971  }
6972  SysCatalog::instance().createUser(*user_name_, alts, /*is_temporary=*/false);
6973 }
6974 
6975 AlterUserStmt::AlterUserStmt(const rapidjson::Value& payload) {
6976  CHECK(payload.HasMember("name"));
6977  user_name_ = std::make_unique<std::string>(json_str(payload["name"]));
6978 
6979  parse_options(payload, options_, true, false);
6980 }
6981 
6983  bool read_only_mode) {
6984  if (read_only_mode) {
6985  throw std::runtime_error("ALTER USER invalid in read only mode.");
6986  }
6987  // Parse the statement
6989  for (auto& p : options_) {
6990  if (boost::iequals(*p->get_name(), "password")) {
6991  checkStringLiteral("Password", p);
6992  alts.passwd = *static_cast<const StringLiteral*>(p->get_value())->get_stringval();
6993  } else if (boost::iequals(*p->get_name(), "is_super")) {
6994  checkStringLiteral("IS_SUPER", p);
6995  alts.is_super = readBooleanLiteral("IS_SUPER", p);
6996  } else if (boost::iequals(*p->get_name(), "default_db")) {
6997  if (dynamic_cast<const StringLiteral*>(p->get_value())) {
6998  alts.default_db =
6999  *static_cast<const StringLiteral*>(p->get_value())->get_stringval();
7000  } else if (dynamic_cast<const NullLiteral*>(p->get_value())) {
7001  alts.default_db = "";
7002  } else {
7003  throw std::runtime_error(
7004  "DEFAULT_DB option must be either a string literal or a NULL "
7005  "literal.");
7006  }
7007  } else if (boost::iequals(*p->get_name(), "can_login")) {
7008  alts.can_login = readBooleanLiteral("CAN_LOGIN", p);
7009  } else {
7010  throw std::runtime_error("Invalid ALTER USER option " + *p->get_name() +
7011  ". Should be PASSWORD, DEFAULT_DB, CAN_LOGIN"
7012  " or IS_SUPER.");
7013  }
7014  }
7015 
7016  // Check if the user is authorized to execute ALTER USER statement
7018  if (!SysCatalog::instance().getMetadataForUser(*user_name_, user)) {
7019  throw std::runtime_error("User " + *user_name_ + " does not exist.");
7020  }
7021  if (!session.get_currentUser().isSuper) {
7022  if (session.get_currentUser().userId != user.userId) {
7023  throw std::runtime_error("Only super user can change another user's attributes.");
7024  } else if (alts.is_super || alts.can_login) {
7025  throw std::runtime_error(
7026  "A user can only update their own password or default database.");
7027  }
7028  }
7029 
7030  SysCatalog::instance().alterUser(*user_name_, alts);
7031 }
7032 
7033 DropUserStmt::DropUserStmt(const rapidjson::Value& payload) {
7034  CHECK(payload.HasMember("name"));
7035  user_name_ = std::make_unique<std::string>(json_str(payload["name"]));
7036 
7037  if_exists_ = false;
7038  if (payload.HasMember("ifExists")) {
7039  if_exists_ = json_bool(payload["ifExists"]);
7040  }
7041 }
7042 
7044  bool read_only_mode) {
7045  if (read_only_mode) {
7046  throw std::runtime_error("DROP USER invalid in read only mode.");
7047  }
7048 
7049  if (!session.get_currentUser().isSuper) {
7050  throw std::runtime_error("Only super user can drop users.");
7051  }
7052 
7053  SysCatalog::instance().dropUser(*user_name_, if_exists_);
7054 }
7055 
7056 namespace Compress {
7057 const std::string sGZIP = "gzip";
7058 const std::string sUNGZIP = "gunzip";
7059 const std::string sLZ4 = "lz4";
7060 const std::string sUNLZ4 = "unlz4";
7061 const std::string sNONE = "none";
7062 } // namespace Compress
7063 
7064 namespace {
7065 std::optional<std::string> get_string_option(const NameValueAssign* option,
7066  const std::string& option_name) {
7067  CHECK(option);
7068  if (boost::iequals(*option->get_name(), option_name)) {
7069  if (const auto str_literal =
7070  dynamic_cast<const StringLiteral*>(option->get_value())) {
7071  return *str_literal->get_stringval();
7072  } else {
7073  throw std::runtime_error("\"" + option_name + "\" option must be a string.");
7074  }
7075  }
7076  return {};
7077 }
7078 } // namespace
7079 
7081  const bool is_restore) {
7082  CHECK(payload.HasMember("tableName"));
7083  table_ = std::make_unique<std::string>(json_str(payload["tableName"]));
7084 
7085  CHECK(payload.HasMember("filePath"));
7086  path_ = std::make_unique<std::string>(json_str(payload["filePath"]));
7087 
7088  compression_ = defaultCompression(is_restore);
7089 
7090  std::list<std::unique_ptr<NameValueAssign>> options;
7091  parse_options(payload, options);
7092 
7093  if (!options.empty()) {
7094  for (const auto& option : options) {
7095  if (auto compression = get_string_option(option.get(), "compression");
7096  compression.has_value()) {
7097  compression_ = validateCompression(compression.value(), is_restore);
7098 #ifdef HAVE_AWS_S3
7099  } else if (auto s3_access_key = get_string_option(option.get(), "s3_access_key");
7100  s3_access_key.has_value()) {
7101  s3_options_.s3_access_key = s3_access_key.value();
7102  } else if (auto s3_secret_key = get_string_option(option.get(), "s3_secret_key");
7103  s3_secret_key.has_value()) {
7104  s3_options_.s3_secret_key = s3_secret_key.value();
7105  } else if (auto s3_session_token =
7106  get_string_option(option.get(), "s3_session_token");
7107  s3_session_token.has_value()) {
7108  s3_options_.s3_session_token = s3_session_token.value();
7109  } else if (auto s3_region = get_string_option(option.get(), "s3_region");
7110  s3_region.has_value()) {
7111  s3_options_.s3_region = s3_region.value();
7112  } else if (auto s3_endpoint = get_string_option(option.get(), "s3_endpoint");
7113  s3_endpoint.has_value()) {
7114  s3_options_.s3_endpoint = s3_endpoint.value();
7115 #endif
7116  } else {
7117  throw std::runtime_error("Invalid WITH option: " + *option->get_name());
7118  }
7119  }
7120  }
7121 
7122  for (const auto& program : {"tar", "rm", "mkdir", "mv", "cat"}) {
7123  if (boost::process::search_path(program).empty()) {
7124  throw std::runtime_error{"Required program \"" + std::string{program} +
7125  "\" was not found."};
7126  }
7127  }
7128 }
7129 
7130 // select default compression type based upon available executables
7132  const bool is_restore) {
7133  if (boost::process::search_path(is_restore ? Compress::sUNGZIP : Compress::sGZIP)
7134  .string()
7135  .size()) {
7136  return CompressionType::kGZIP;
7137  } else if (boost::process::search_path(is_restore ? Compress::sUNLZ4 : Compress::sLZ4)
7138  .string()
7139  .size()) {
7140  return CompressionType::kLZ4;
7141  }
7142  return CompressionType::kNONE;
7143 }
7144 
7146  const std::string& compression_type,
7147  const bool is_restore) {
7148  // only allow ('gzip', 'lz4', 'none') compression types
7149  const std::string compression = boost::algorithm::to_lower_copy(compression_type);
7150 
7151  // verify correct compression executable is available
7152  if (boost::iequals(compression, Compress::sGZIP)) {
7153  const auto prog_name = is_restore ? Compress::sUNGZIP : Compress::sGZIP;
7154  const auto prog_path = boost::process::search_path(prog_name);
7155  if (prog_path.string().empty()) {
7156  throw std::runtime_error("Compression program " + prog_name + " is not found.");
7157  }
7158  return CompressionType::kGZIP;
7159 
7160  } else if (boost::iequals(compression, Compress::sLZ4)) {
7161  const auto prog_name = is_restore ? Compress::sUNLZ4 : Compress::sLZ4;
7162  const auto prog_path = boost::process::search_path(prog_name);
7163  if (prog_path.string().empty()) {
7164  throw std::runtime_error("Compression program " + prog_name + " is not found.");
7165  }
7166  return CompressionType::kLZ4;
7167 
7168  } else if (!boost::iequals(compression, Compress::sNONE)) {
7169  throw std::runtime_error("Compression program " + compression + " is not supported.");
7170  }
7171 
7172  return CompressionType::kNONE;
7173 }
7174 
7175 // construct a valid tar option string for compression setting
7177  const bool is_restore) {
7178  if (compression_type == CompressionType::kGZIP) {
7179  return "--use-compress-program=" + (is_restore ? Compress::sUNGZIP : Compress::sGZIP);
7180  } else if (compression_type == CompressionType::kLZ4) {
7181  return "--use-compress-program=" + (is_restore ? Compress::sUNLZ4 : Compress::sLZ4);
7182  }
7183  // kNONE uses "none' as a user input, but an empty string "" for tar
7184  return "";
7185 }
7186 
7187 DumpTableStmt::DumpTableStmt(const rapidjson::Value& payload)
7188  : DumpRestoreTableStmtBase(payload, false) {}
7189 
7191  bool read_only_mode) {
7192  // valid in read_only_mode
7193  const auto execute_read_lock = legacylockmgr::getExecuteReadLock();
7194 
7195  auto& catalog = session.getCatalog();
7196  // Prevent modification of the table schema during a dump operation, while allowing
7197  // concurrent inserts.
7198  auto table_read_lock =
7200 
7201  // check access privileges
7204  *table_)) {
7205  throw std::runtime_error("Table " + *table_ +
7206  " will not be dumped. User has no select privileges.");
7207  }
7210  throw std::runtime_error("Table " + *table_ +
7211  " will not be dumped. User has no create privileges.");
7212  }
7213  const TableDescriptor* td = catalog.getMetadataForTable(*table_);
7214  TableArchiver table_archiver(&catalog);
7215  table_archiver.dumpTable(td, *path_, tarCompressionStr(compression_, false));
7216 }
7217 
7218 RestoreTableStmt::RestoreTableStmt(const rapidjson::Value& payload)
7219  : DumpRestoreTableStmtBase(payload, true) {}
7220 
7222  bool read_only_mode) {
7223  if (read_only_mode) {
7224  throw std::runtime_error("RESTORE TABLE invalid in read only mode.");
7225  }
7226  auto& catalog = session.getCatalog();
7227  const TableDescriptor* td = catalog.getMetadataForTable(*table_, false);
7228  if (td) {
7229  // TODO: v1.0 simply throws to avoid accidentally overwrite target table.
7230  // Will add a REPLACE TABLE to explictly replace target table.
7231  // catalog.restoreTable(session, td, *path, compression_);
7232  // TODO (yoonmin): if the above feature is delivered, we have to invalidate cached
7233  // items for the table
7234  throw std::runtime_error("Table " + *table_ + " exists.");
7235  } else {
7236  // check access privileges
7239  throw std::runtime_error("Table " + *table_ +
7240  " will not be restored. User has no create privileges.");
7241  }
7242  TableArchiver table_archiver(&catalog);
7243  table_archiver.restoreTable(
7244  session, *table_, *path_, tarCompressionStr(compression_, true), s3_options_);
7245  }
7246 }
7247 
7248 std::unique_ptr<Parser::Stmt> create_stmt_for_query(
7249  const std::string& queryStr,
7250  const Catalog_Namespace::SessionInfo& session_info) {
7251  auto session_copy = session_info;
7252  auto session_ptr = std::shared_ptr<Catalog_Namespace::SessionInfo>(
7253  &session_copy, boost::null_deleter());
7254  auto query_state = query_state::QueryState::create(session_ptr, queryStr);
7255  const auto& cat = session_info.getCatalog();
7256  auto calcite_mgr = cat.getCalciteMgr();
7257  const auto calciteQueryParsingOption =
7258  calcite_mgr->getCalciteQueryParsingOption(true, false, true, false);
7259  const auto calciteOptimizationOption = calcite_mgr->getCalciteOptimizationOption(
7260  false,
7262  {},
7264  const auto query_json = calcite_mgr
7265  ->process(query_state->createQueryStateProxy(),
7266  pg_shim(queryStr),
7267  calciteQueryParsingOption,
7268  calciteOptimizationOption)
7269  .plan_result;
7270  return create_stmt_for_json(query_json);
7271 }
7272 
7273 std::unique_ptr<Parser::Stmt> create_stmt_for_json(const std::string& query_json) {
7274  CHECK(!query_json.empty());
7275  VLOG(2) << "Parsing JSON DDL from Calcite: " << query_json;
7276  rapidjson::Document ddl_query;
7277  ddl_query.Parse(query_json);
7278  CHECK(ddl_query.IsObject());
7279  CHECK(ddl_query.HasMember("payload"));
7280  CHECK(ddl_query["payload"].IsObject());
7281  const auto& payload = ddl_query["payload"].GetObject();
7282  CHECK(payload.HasMember("command"));
7283  CHECK(payload["command"].IsString());
7284 
7285  const auto& ddl_command = std::string_view(payload["command"].GetString());
7286 
7287  Parser::Stmt* stmt = nullptr;
7288  if (ddl_command == "CREATE_TABLE") {
7289  stmt = new Parser::CreateTableStmt(payload);
7290  } else if (ddl_command == "DROP_TABLE") {
7291  stmt = new Parser::DropTableStmt(payload);
7292  } else if (ddl_command == "RENAME_TABLE") {
7293  stmt = new Parser::RenameTableStmt(payload);
7294  } else if (ddl_command == "ALTER_TABLE") {
7295  // no-op: fall-back to DdlCommandExecutor by returning a nullptr
7296  } else if (ddl_command == "TRUNCATE_TABLE") {
7297  stmt = new Parser::TruncateTableStmt(payload);
7298  } else if (ddl_command == "DUMP_TABLE") {
7299  stmt = new Parser::DumpTableStmt(payload);
7300  } else if (ddl_command == "RESTORE_TABLE") {
7301  stmt = new Parser::RestoreTableStmt(payload);
7302  } else if (ddl_command == "OPTIMIZE_TABLE") {
7303  stmt = new Parser::OptimizeTableStmt(payload);
7304  } else if (ddl_command == "COPY_TABLE") {
7305  stmt = new Parser::CopyTableStmt(payload);
7306  } else if (ddl_command == "EXPORT_QUERY") {
7307  stmt = new Parser::ExportQueryStmt(payload);
7308  } else if (ddl_command == "CREATE_VIEW") {
7309  stmt = new Parser::CreateViewStmt(payload);
7310  } else if (ddl_command == "DROP_VIEW") {
7311  stmt = new Parser::DropViewStmt(payload);
7312  } else if (ddl_command == "CREATE_DB") {
7313  stmt = new Parser::CreateDBStmt(payload);
7314  } else if (ddl_command == "DROP_DB") {
7315  stmt = new Parser::DropDBStmt(payload);
7316  } else if (ddl_command == "RENAME_DB") {
7317  stmt = new Parser::RenameDBStmt(payload);
7318  } else if (ddl_command == "CREATE_USER") {
7319  stmt = new Parser::CreateUserStmt(payload);
7320  } else if (ddl_command == "DROP_USER") {
7321  stmt = new Parser::DropUserStmt(payload);
7322  } else if (ddl_command == "ALTER_USER") {
7323  stmt = new Parser::AlterUserStmt(payload);
7324  } else if (ddl_command == "RENAME_USER") {
7325  stmt = new Parser::RenameUserStmt(payload);
7326  } else if (ddl_command == "CREATE_ROLE") {
7327  stmt = new Parser::CreateRoleStmt(payload);
7328  } else if (ddl_command == "DROP_ROLE") {
7329  stmt = new Parser::DropRoleStmt(payload);
7330  } else if (ddl_command == "GRANT_ROLE") {
7331  stmt = new Parser::GrantRoleStmt(payload);
7332  } else if (ddl_command == "REVOKE_ROLE") {
7333  stmt = new Parser::RevokeRoleStmt(payload);
7334  } else if (ddl_command == "GRANT_PRIVILEGE") {
7335  stmt = new Parser::GrantPrivilegesStmt(payload);
7336  } else if (ddl_command == "REVOKE_PRIVILEGE") {
7337  stmt = new Parser::RevokePrivilegesStmt(payload);
7338  } else if (ddl_command == "CREATE_DATAFRAME") {
7339  stmt = new Parser::CreateDataframeStmt(payload);
7340  } else if (ddl_command == "CREATE_MODEL") {
7341  stmt = new Parser::CreateModelStmt(payload);
7342  } else if (ddl_command == "DROP_MODEL") {
7343  stmt = new Parser::DropModelStmt(payload);
7344  } else if (ddl_command == "VALIDATE_SYSTEM") {
7345  // VALIDATE should have been excuted in outer context before it reaches here
7346  UNREACHABLE(); // not-implemented alterType
7347  } else {
7348  throw std::runtime_error("Unsupported DDL command");
7349  }
7350  return std::unique_ptr<Parser::Stmt>(stmt);
7351 }
7352 
7354  const std::string& query_json,
7355  std::shared_ptr<Catalog_Namespace::SessionInfo const> session_ptr,
7356  bool read_only_mode) {
7357  std::unique_ptr<Parser::Stmt> stmt = create_stmt_for_json(query_json);
7358  auto ddl = dynamic_cast<Parser::DDLStmt*>(stmt.get());
7359  if (ddl != nullptr) {
7360  (*ddl).execute(*session_ptr, read_only_mode);
7361  }
7362 }
7363 
7364 int32_t validate_and_get_fragment_size(const std::string& fragment_size_str) {
7365  int64_t fragment_size_value{-1};
7366  bool validation_failed{false};
7367  try {
7368  fragment_size_value = std::stoll(fragment_size_str);
7369  } catch (std::out_of_range& e) {
7370  validation_failed = true;
7371  }
7372  constexpr int64_t max_fragment_size = std::numeric_limits<int32_t>::max();
7373  if (!validation_failed &&
7374  (fragment_size_value <= 0 || fragment_size_value > max_fragment_size)) {
7375  validation_failed = true;
7376  }
7377  if (validation_failed) {
7378  throw std::runtime_error(
7379  "Invalid value \"" + fragment_size_str +
7380  "\" provided for FRAGMENT_SIZE option, expected a positive integer between "
7381  "1 and " +
7382  std::to_string(max_fragment_size) + ".");
7383  }
7384  return fragment_size_value;
7385 }
7386 } // namespace Parser
DEVICE auto upper_bound(ARGS &&...args)
Definition: gpu_enabled.h:123
bool repair_type(std::list< std::unique_ptr< NameValueAssign >> &options)
std::optional< std::string > passwd
Definition: SysCatalog.h:117
int8_t tinyintval
Definition: Datum.h:71
SQLTypes to_sql_type(const std::string &type_name)
const ColumnConstraintDef * get_column_constraint() const
Definition: ParserNode.h:830
decltype(auto) get_max_rows_def(TableDescriptor &td, const NameValueAssign *p, const std::list< ColumnDescriptor > &columns)
void execute(const Catalog_Namespace::SessionInfo &session, bool read_only_mode) override
int32_t maxRollbackEpochs
std::string s3_secret_key
Definition: TableArchiver.h:26
void validate_non_foreign_table_write(const TableDescriptor *table_descriptor)
Definition: FsiUtils.h:22
static const AccessPrivileges VIEW_SQL_EDITOR
Definition: DBObject.h:152
decltype(auto) get_max_chunk_size_def(TableDescriptor &td, const NameValueAssign *p, const std::list< ColumnDescriptor > &columns)
std::string to_lower(const std::string &str)
bool geo_promoted_type_match(const SQLTypes a, const SQLTypes b)
Definition: sqltypes.h:2029
static const std::map< const std::string, const TableDefFuncPtr > tableDefFuncMap
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:392
void set_compression(EncodingType c)
Definition: sqltypes.h:479
ValidateStmt(std::string *type, std::list< NameValueAssign * > *with_opts)
bool hasData(SubstituteMap &sMap, std::string tableName)
std::unique_ptr< std::string > username_
Definition: ParserNode.h:1320
SQLAgg
Definition: sqldefs.h:73
std::vector< std::string > privileges_
Definition: ParserNode.h:1575
#define CHECK_EQ(x, y)
Definition: Logger.h:301
std::string s3_secret_key
Definition: CopyParams.h:62
size_t shard_column_index(const std::string &name, const std::list< ColumnDescriptor > &columns)
const Literal * get_value() const
Definition: ParserNode.h:965
CreateDBStmt(const rapidjson::Value &payload)
const std::string & get_column() const
Definition: ParserNode.h:951
std::vector< std::unique_ptr< lockmgr::AbstractLockContainer< const TableDescriptor * >>> LockedTableDescriptors
Definition: LockMgr.h:272
double g_running_query_interrupt_freq
Definition: Execute.cpp:137
std::string get_ml_model_type_str(const MLModelType model_type)
Definition: MLModelType.h:27
int32_t raster_scanlines_per_thread
Definition: CopyParams.h:90
RevokePrivilegesStmt(std::list< std::string * > *p, std::string *t, std::string *o, std::list< std::string * > *g)
Definition: ParserNode.h:1587
static const AccessPrivileges VIEW_DASHBOARD
Definition: DBObject.h:171
bool check_session_interrupted(const QuerySessionId &query_session, Executor *executor)
Definition: Importer.cpp:124
GrantPrivilegesStmt(std::list< std::string * > *p, std::string *t, std::string *o, std::list< std::string * > *g)
Definition: ParserNode.h:1557
static const int32_t DROP_VIEW
Definition: DBObject.h:113
ExportQueryStmt(std::string *q, std::string *p, std::list< NameValueAssign * > *o)
Definition: ParserNode.h:1837
#define NULL_DOUBLE
std::unique_ptr< QueryConnector > leafs_connector_
Definition: ParserNode.h:1851
boost::function< void(TableDescriptor &, const Parser::NameValueAssign *, const std::list< ColumnDescriptor > &columns)> TableDefFuncPtr
Definition: ParserNode.cpp:98
HOST DEVICE int get_size() const
Definition: sqltypes.h:403
std::unique_ptr< std::string > new_database_name_
Definition: ParserNode.h:1306
std::unique_ptr< std::string > view_name_
Definition: ParserNode.h:1906
std::shared_ptr< ResultSet > getResultSet(QueryStateProxy query_state_proxy, const std::string select_stmt, std::vector< TargetMetaInfo > &targets, bool validate_only=false, std::vector< size_t > outer_fragment_indices={}, bool allow_interrupt=false)
SQLType * get_column_type() const
Definition: ParserNode.h:828
std::list< std::unique_ptr< NameValueAssign > > storage_options_
Definition: ParserNode.h:1120
QueryStateProxy createQueryStateProxy()
Definition: QueryState.cpp:71
static const AccessPrivileges DROP_SERVER
Definition: DBObject.h:189
void setColumnDescriptor(ColumnDescriptor &cd, const ColumnDef *coldef)
std::string cat(Ts &&...args)
static const int32_t SELECT_FROM_VIEW
Definition: DBObject.h:114
static constexpr int32_t kMaxNumericPrecision
Definition: sqltypes.h:58
std::unique_ptr< std::string > db_name_
Definition: ParserNode.h:1950
std::optional< std::string > default_db
Definition: SysCatalog.h:119
Definition: Analyzer.h:2977
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:143
std::unique_ptr< std::string > target_
Definition: ParserNode.h:1577
std::list< std::unique_ptr< NameValueAssign > > model_options_
Definition: ParserNode.h:1975
std::unique_ptr< std::string > user_name_
Definition: ParserNode.h:2049
SQLTypes
Definition: sqltypes.h:65
static void checkStringLiteral(const std::string &option_name, const std::unique_ptr< NameValueAssign > &p)
static const int32_t UPDATE_IN_VIEW
Definition: DBObject.h:116
const std::vector< std::vector< std::shared_ptr< TargetEntry > > > & get_values_lists() const
Definition: Analyzer.h:3026
std::string tableName
void executeDryRun(const Catalog_Namespace::SessionInfo &session, TableDescriptor &td, std::list< ColumnDescriptor > &columns, std::vector< SharedDictionaryDef > &shared_dict_defs)
const std::vector< TargetMetaInfo > targets_meta
void add_rte(RangeTableEntry *rte)
Definition: Analyzer.cpp:1506
bool is_timestamp() const
Definition: sqltypes.h:1044
static const AccessPrivileges ALL_DATABASE
Definition: DBObject.h:151
static TimeT::rep execution(F func, Args &&...args)
Definition: sample.cpp:29
void set_column_descriptor(ColumnDescriptor &cd, const ColumnDef *coldef)
void parseOptions(import_export::CopyParams &copy_params, import_export::QueryExporter::FileType &file_type, std::string &layer_name, import_export::QueryExporter::FileCompression &file_compression, import_export::QueryExporter::ArrayNullHandling &array_null_handling)
decltype(auto) get_vacuum_def(TableDescriptor &td, const NameValueAssign *p, const std::list< ColumnDescriptor > &columns)
static const AccessPrivileges ALTER_TABLE
Definition: DBObject.h:165
std::unique_ptr< std::string > copy_from_source_pattern_
Definition: ParserNode.h:1498
CreateTableStmt(std::string *tab, const std::string *storage, std::list< TableElement * > *table_elems, bool is_temporary, bool if_not_exists, std::list< NameValueAssign * > *s)
Definition: ParserNode.h:988
RenameUserStmt(const rapidjson::Value &payload)
void execute(const Catalog_Namespace::SessionInfo &session, bool read_only_mode) override
DBObjectType
Definition: DBObject.h:40
static const int32_t CREATE_VIEW
Definition: DBObject.h:112
std::list< std::unique_ptr< TableElement > > table_element_list_
Definition: ParserNode.h:1025
std::list< std::unique_ptr< NameValueAssign > > options_
Definition: ParserNode.h:2050
auto getExecuteReadLock()
DropViewStmt(std::string *v, bool i)
Definition: ParserNode.h:1898
size_t getOuterFragmentCount(QueryStateProxy, std::string &sql_query_string) override
const std::string & get_model_name() const
Definition: ParserNode.h:1963
bool g_enable_legacy_delimited_import
Definition: ParserNode.cpp:83
bool user_can_access_table(const Catalog_Namespace::SessionInfo &session_info, const TableDescriptor *td, const AccessPrivileges access_priv)
std::unique_ptr< std::string > table_
Definition: ParserNode.h:1269
void expand_star_in_targetlist(const Catalog_Namespace::Catalog &catalog, std::vector< std::shared_ptr< TargetEntry >> &tlist, int rte_idx)
static const AccessPrivileges TRUNCATE_TABLE
Definition: DBObject.h:164
SQLQualifier
Definition: sqldefs.h:71
const std::string & get_role() const
Definition: ParserNode.h:1532
unsigned g_pending_query_interrupt_freq
Definition: Execute.cpp:136
#define LOG(tag)
Definition: Logger.h:285
std::unique_ptr< std::string > table_
Definition: ParserNode.h:1236
std::unique_ptr< std::string > return_message
Definition: ParserNode.h:1474
RenameDBStmt(const rapidjson::Value &payload)
static WriteLock getWriteLockForTable(const Catalog_Namespace::Catalog &cat, const std::string &table_name)
Definition: LockMgrImpl.h:225
const bool json_bool(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:51
decltype(auto) get_skip_rows_def(DataframeTableDescriptor &df_td, const NameValueAssign *p, const std::list< ColumnDescriptor > &columns)
HOST DEVICE int get_scale() const
Definition: sqltypes.h:396
SQLOps
Definition: sqldefs.h:28
std::vector< std::string > grantees_
Definition: ParserNode.h:1672
void populateData(QueryStateProxy, const TableDescriptor *td, bool validate_table, bool for_CTAS=false)
Definition: sqldefs.h:34
std::string storageType
static const AccessPrivileges INSERT_INTO_TABLE
Definition: DBObject.h:161
const std::string json_str(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:46
std::vector< std::unique_ptr< TypedImportBuffer > > fill_missing_columns(const Catalog_Namespace::Catalog *cat, Fragmenter_Namespace::InsertData &insert_data)
Definition: Importer.cpp:6141
std::shared_ptr< Analyzer::Expr > deep_copy() const override
Definition: Analyzer.cpp:87
Definition: sqldefs.h:35
static const AccessPrivileges CREATE_DASHBOARD
Definition: DBObject.h:170
static const AccessPrivileges SERVER_USAGE
Definition: DBObject.h:191
static std::shared_ptr< QueryState > create(ARGS &&...args)
Definition: QueryState.h:148
bool get_is_null() const
Definition: Analyzer.h:347
#define DEFAULT_MAX_CHUNK_SIZE
std::list< std::unique_ptr< NameValueAssign > > options_
Definition: ParserNode.h:2027
#define UNREACHABLE()
Definition: Logger.h:338
HOST DEVICE void set_subtype(SQLTypes st)
Definition: sqltypes.h:469
DEVICE void sort(ARGS &&...args)
Definition: gpu_enabled.h:105
std::unique_ptr< std::string > new_username_
Definition: ParserNode.h:1321
std::unique_ptr< std::string > file_path_
Definition: ParserNode.h:1855
std::string write_model_params_to_json(const std::string &predicted, const std::vector< std::string > &features, const std::string &training_query, const double data_split_train_fraction, const double data_split_eval_fraction, const std::vector< int64_t > &feature_permutations)
std::string tarCompressionStr(CompressionType compression, const bool is_restore)
Driver for running cleanup processes on a table. TableOptimizer provides functions for various cleanu...
SQLTypeInfo get_logical_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:1470
TypeR::rep timer_stop(Type clock_begin)
Definition: measure.h:48
Definition: sqldefs.h:48
Definition: sqldefs.h:29
std::string deferred_copy_from_partitions_
Definition: ParserNode.h:1505
void analyze(const Catalog_Namespace::Catalog &catalog, Analyzer::Query &query) const override=0
void set_order_by(std::list< OrderEntry > *o)
Definition: Analyzer.h:3050
std::string extractObjectNameFromHierName(const std::string &objectHierName, const std::string &objectType, const Catalog_Namespace::Catalog &cat)
OptimizeTableStmt(std::string *table, std::list< NameValueAssign * > *o)
Definition: ParserNode.h:1241
const Literal * get_defaultval() const
Definition: ParserNode.h:795
std::unique_ptr< std::string > column_
Definition: ParserNode.h:1354
static const int32_t ALTER_TABLE
Definition: DBObject.h:93
void set_result_col_list(const std::list< int > &col_list)
Definition: Analyzer.h:3044
int get_num_aggs() const
Definition: Analyzer.h:3019
std::string connection_string
Definition: CopyParams.h:104
const std::string kDefaultExportDirName
std::string toString(const QueryDescriptionType &type)
Definition: Types.h:64
std::unique_ptr< std::string > user_name_
Definition: ParserNode.h:2026
std::unique_ptr< std::string > table_
Definition: ParserNode.h:1117
std::string model_predicted_var_
Definition: ParserNode.h:1980
void setPrivileges(const AccessPrivileges &privs)
Definition: DBObject.h:227
bool g_enable_non_kernel_time_query_interrupt
Definition: Execute.cpp:134
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:391
void set_offset(int64_t o)
Definition: Analyzer.h:3062
const std::vector< std::string > & get_roles() const
Definition: ParserNode.h:1643
SQLTypeInfo common_string_type(const SQLTypeInfo &lhs_type_info, const SQLTypeInfo &rhs_type_info, const Executor *executor)
Definition: ParserNode.cpp:354
std::vector< std::string > splitObjectHierName(const std::string &hierName)
std::vector< std::string > model_feature_vars_
Definition: ParserNode.h:1981
std::string raster_import_dimensions
Definition: CopyParams.h:93
#define TRANSIENT_DICT_ID
Definition: DbObjectKeys.h:24
static const AccessPrivileges SELECT_FROM_TABLE
Definition: DBObject.h:160
static std::unique_ptr< QueryExporter > create(const FileType file_type)
bool is_number() const
Definition: sqltypes.h:574
std::vector< int64_t > feature_permutations_
Definition: ParserNode.h:1982
std::optional< bool > is_super
Definition: SysCatalog.h:118
AggregatedResult query(QueryStateProxy, std::string &sql_query_string, std::vector< size_t > outer_frag_indices, bool validate_only, bool allow_interrupt)
bool bool_from_string_literal(const Parser::StringLiteral *str_literal)
const std::string sLZ4
void get_table_definitions_for_ctas(TableDescriptor &td, const std::unique_ptr< NameValueAssign > &p, const std::list< ColumnDescriptor > &columns)
void train_model(const Catalog_Namespace::SessionInfo &session)
void insertData(const Catalog_Namespace::SessionInfo &session_info, InsertData &insert_data)
int32_t intval
Definition: Datum.h:73
bool is_time() const
Definition: sqltypes.h:577
std::unique_ptr< AbstractImporter > create_importer(Catalog_Namespace::Catalog &catalog, const TableDescriptor *td, const std::string &copy_from_source, const import_export::CopyParams &copy_params)
Definition: Importer.cpp:6211
int32_t get_table_id() const
ChunkKey get_table_key(const ChunkKey &key)
Definition: types.h:57
void execute(const Catalog_Namespace::SessionInfo &session, bool read_only_mode) override
std::string to_string(char const *&&v)
std::vector< std::string > split(std::string_view str, std::string_view delim, std::optional< size_t > maxsplit)
split apart a string into a vector of substrings
std::unique_ptr< std::string > table_
Definition: ParserNode.h:1403
int get_rte_idx(const std::string &range_var_name) const
Definition: Analyzer.cpp:1495
TableArchiverS3Options s3_options_
Definition: ParserNode.h:1435
std::unique_ptr< std::string > table_
Definition: ParserNode.h:2092
const std::string & get_object_type() const
Definition: ParserNode.h:1568
CompressionType defaultCompression(bool is_restore)
static const AccessPrivileges ALL_VIEW
Definition: DBObject.h:177
const std::string & get_object() const
Definition: ParserNode.h:1620
void validate_non_duplicate_column(const std::string &column_name, std::unordered_set< std::string > &upper_column_names)
Definition: DdlUtils.cpp:728
constexpr double a
Definition: Utm.h:32
void execute(const Catalog_Namespace::SessionInfo &session, bool read_only_mode)
bool g_enable_string_functions
std::unique_ptr< std::string > database_name_
Definition: ParserNode.h:1305
int tableId
identifies the database into which the data is being inserted
Definition: Fragmenter.h:70
DBObjectType DBObjectTypeFromString(const std::string &type)
Definition: DBObject.cpp:110
void execute(const Catalog_Namespace::SessionInfo &session, bool read_only_mode) override
Definition: sqldefs.h:75
std::shared_lock< T > shared_lock
static void set_geo_physical_import_buffer(const Catalog_Namespace::Catalog &catalog, const ColumnDescriptor *cd, std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers, size_t &col_idx, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool force_null=false)
Definition: Importer.cpp:1636
void set_column_descriptor(const std::string &column_name, ColumnDescriptor &cd, SqlType *column_type, const bool not_null, const Encoding *encoding, const std::string *default_value)
Definition: DdlUtils.cpp:698
static std::shared_ptr< Executor > getExecutor(const ExecutorId id, const std::string &debug_dir="", const std::string &debug_file="", const SystemParameters &system_parameters=SystemParameters())
Definition: Execute.cpp:509
void execute(const Catalog_Namespace::SessionInfo &session, bool read_only_mode) override
static const AccessPrivileges ALTER_SERVER
Definition: DBObject.h:190
ArrayLiteral * parse_insert_array_literal(const rapidjson::Value &array)
std::list< std::unique_ptr< std::string > > columns_
Definition: ParserNode.h:1393
std::string add_metadata_columns
Definition: CopyParams.h:94
size_t numRows
a vector of column ids for the row(s) being inserted
Definition: Fragmenter.h:72
This file contains the class specification and related data structures for Catalog.
float floatval
Definition: Datum.h:75
std::vector< std::string > roles_
Definition: ParserNode.h:1671
ImportHeaderRow has_header
Definition: CopyParams.h:46
const std::string & get_role() const
Definition: ParserNode.h:1621
DumpTableStmt(const rapidjson::Value &payload)
std::ostringstream options_oss_
Definition: ParserNode.h:1976
std::string generateUniqueTableName(std::string name)
bool geos_validation_available()
static SQLTypeInfo common_string_type(const SQLTypeInfo &type1, const SQLTypeInfo &type2)
Definition: Analyzer.cpp:452
std::unique_ptr< std::string > table_
Definition: ParserNode.h:1374
#define DEFAULT_MAX_ROWS
std::vector< uint8_t > compress_coords(const std::vector< double > &coords, const SQLTypeInfo &ti)
Definition: Compression.cpp:52
std::optional< std::string > regex_path_filter
Definition: CopyParams.h:85
int get_physical_cols() const
Definition: sqltypes.h:430
Supported runtime functions management and retrieval.
void execute(const Catalog_Namespace::SessionInfo &session, bool read_only_mode) override
future< Result > async(Fn &&fn, Args &&...args)
static SysCatalog & instance()
Definition: SysCatalog.h:343
CreateRoleStmt(std::string *r)
Definition: ParserNode.h:1514
void execute(const Catalog_Namespace::SessionInfo &session, bool read_only_mode) override
void execute(const Catalog_Namespace::SessionInfo &session, bool read_only_mode) override
std::list< std::unique_ptr< NameValueAssign > > options_
Definition: ParserNode.h:1932
int32_t validate_and_get_fragment_size(const std::string &fragment_size_str)
const std::string * get_stringval() const
Definition: ParserNode.h:136
std::unique_ptr< std::string > select_stmt_
Definition: ParserNode.h:1854
std::unique_ptr< QueryConnector > leafs_connector_
Definition: ParserNode.h:1157
void recordRename(SubstituteMap &sMap, std::string oldName, std::string newName)
CONSTEXPR DEVICE bool is_null(const T &value)
RasterPointType raster_point_type
Definition: CopyParams.h:88
Classes representing a parse tree.
std::string build_model_query(const std::shared_ptr< Catalog_Namespace::SessionInfo > session_ptr)
void set_fixed_size()
Definition: sqltypes.h:477
RangeTableEntry * get_rte(int rte_idx) const
Definition: Analyzer.h:3056
void get_dataframe_definitions(DataframeTableDescriptor &df_td, const std::unique_ptr< NameValueAssign > &p, const std::list< ColumnDescriptor > &columns)
std::vector< std::string > roles_
Definition: ParserNode.h:1649
const DBMetadata & getCurrentDB() const
Definition: Catalog.h:265
const std::string & get_role() const
Definition: ParserNode.h:1516
std::vector< std::string > grantees_
Definition: ParserNode.h:1578
boost::function< void(DataframeTableDescriptor &, const Parser::NameValueAssign *, const std::list< ColumnDescriptor > &columns)> DataframeDefFuncPtr
Definition: ParserNode.cpp:103
std::list< std::unique_ptr< NameValueAssign > > storage_options_
Definition: ParserNode.h:1194
void execute(const Catalog_Namespace::SessionInfo &session, bool read_only_mode) override
std::string g_base_path
Definition: SysCatalog.cpp:62
const std::vector< std::string > & get_grantees() const
Definition: ParserNode.h:1666
static const int32_t DROP_DATABASE
Definition: DBObject.h:79
bool is_integer() const
Definition: sqltypes.h:565
std::list< std::unique_ptr< ColumnDef > > coldefs_
Definition: ParserNode.h:1376
const std::string sNONE
#define CHECK_NE(x, y)
Definition: Logger.h:302
std::unique_ptr< std::string > type_
Definition: ParserNode.h:1288
std::unique_ptr< Parser::Stmt > create_stmt_for_json(const std::string &query_json)
std::list< TableNamePair > tablesToRename_
Definition: ParserNode.h:1342
static const AccessPrivileges DROP_TABLE
Definition: DBObject.h:159
decltype(auto) get_shard_count_def(TableDescriptor &td, const NameValueAssign *p, const std::list< ColumnDescriptor > &columns)
void execute(const Catalog_Namespace::SessionInfo &session, bool read_only_mode) override
void set_scale(int s)
Definition: sqltypes.h:473
const int64_t json_i64(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:41
int64_t bigintval
Definition: Datum.h:74
std::unique_ptr< std::string > role_
Definition: ParserNode.h:1521
RestoreTableStmt(const rapidjson::Value &payload)
WhichRow get_which_row() const
Definition: Analyzer.h:286
std::list< ColumnDef > get_columns_from_json_payload(const std::string &payload_key, const rapidjson::Value &payload)
std::unique_ptr< std::string > type_
Definition: ParserNode.h:1606
void execute(const Catalog_Namespace::SessionInfo &session, bool read_only_mode) override
int64_t get_intval() const
Definition: ParserNode.h:156
static const int32_t DELETE_FROM_TABLE
Definition: DBObject.h:91
const double json_double(const rapidjson::Value &obj) noexcept
Definition: JsonAccessors.h:56
std::string select_query_
Definition: ParserNode.h:1972
bool get_is_distinct() const
Definition: Analyzer.h:3018
int32_t s3_max_concurrent_downloads
Definition: CopyParams.h:66
std::string deferred_copy_from_file_name_
Definition: ParserNode.h:1503
const ColumnDescriptor * getMetadataForColumn(int tableId, const std::string &colName) const
bool should_translate_strings(const std::shared_ptr< Analyzer::Expr > &lhs_expr, const std::shared_ptr< Analyzer::Expr > &rhs_expr)
Definition: ParserNode.cpp:316
void execute(const Catalog_Namespace::SessionInfo &session, bool read_only_mode) override
static SQLTypeInfo common_numeric_type(const SQLTypeInfo &type1, const SQLTypeInfo &type2)
Definition: Analyzer.cpp:500
Definition: sqldefs.h:36
std::string select_query_
Definition: ParserNode.h:1888
RevokeRoleStmt(std::list< std::string * > *r, std::list< std::string * > *g)
Definition: ParserNode.h:1659
Definition: sqldefs.h:77
static const int32_t TRUNCATE_TABLE
Definition: DBObject.h:92
std::string sql_order_by
Definition: CopyParams.h:97
Checked json field retrieval.
bool g_enable_watchdog
static bool getGeoColumns(const std::string &wkt_or_wkb_hex, SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool validate_with_geos_if_available)
Definition: Types.cpp:1121
int getDatabaseId() const
Definition: Catalog.h:326
const std::string * get_column_name() const
Definition: ParserNode.h:827
static const AccessPrivileges ALL_SERVER
Definition: DBObject.h:187
int16_t smallintval
Definition: Datum.h:72
static const AccessPrivileges CREATE_SERVER
Definition: DBObject.h:188
DropRoleStmt(std::string *r, bool e)
Definition: ParserNode.h:1530
void execute(const Catalog_Namespace::SessionInfo &session, bool read_only_mode) override
void vacuumDeletedRows() const
Compacts fragments to remove deleted rows. When a row is deleted, a boolean deleted system column is ...
static const AccessPrivileges DELETE_FROM_TABLE
Definition: DBObject.h:163
Datum StringToDatum(const std::string_view s, SQLTypeInfo &ti)
Definition: Datum.cpp:339
import_export::SourceType source_type
Definition: CopyParams.h:57
std::unique_ptr< NameValueAssign > param_
Definition: ParserNode.h:1404
RenameTableStmt(const rapidjson::Value &payload)
specifies the content in-memory of a row in the column metadata table
OUTPUT transform(INPUT const &input, FUNC const &func)
Definition: misc.h:320
#define DEFAULT_MAX_ROLLBACK_EPOCHS
specifies the content in-memory of a row in the table metadata table
bool is_boolean() const
Definition: sqltypes.h:580
std::unique_ptr< std::string > table_
Definition: ParserNode.h:1219
std::list< std::unique_ptr< NameValueAssign > > options_
Definition: ParserNode.h:1270
void parse_elements(const rapidjson::Value &payload, std::string element_name, std::string &table_name, std::list< std::unique_ptr< TableElement >> &table_element_list)
bool modelExists(const std::string &model_name) const
Definition: MLModel.h:43
bool expr_is_null(const Analyzer::Expr *expr)
const std::vector< std::string > & get_roles() const
Definition: ParserNode.h:1665
static const std::map< const std::string, const DataframeDefFuncPtr > dataframeDefFuncMap
std::unique_ptr< Fragmenter_Namespace::InsertDataLoader::InsertConnector > leafs_connector_
Definition: ParserNode.h:2138
std::unique_ptr< std::string > table_
Definition: ParserNode.h:1432
bool check_session_interrupted(const QuerySessionId &query_session, Executor *executor)
Definition: ParserNode.cpp:106
bool g_enable_smem_group_by true
std::unique_ptr< std::string > new_column_name_
Definition: ParserNode.h:1355
void execute(const Catalog_Namespace::SessionInfo &session, bool read_only_mode) override
static const int32_t EDIT_DASHBOARD
Definition: DBObject.h:104
static const int32_t DELETE_DASHBOARD
Definition: DBObject.h:102
static const AccessPrivileges CREATE_TABLE
Definition: DBObject.h:158
std::vector< std::string > grantees_
Definition: ParserNode.h:1608
std::unique_ptr< std::string > path_
Definition: ParserNode.h:1433
static const int32_t INSERT_INTO_TABLE
Definition: DBObject.h:89
lockmgr::LockedTableDescriptors acquire_query_table_locks(const std::string &insert_table_db_name, const std::string &query_str, const QueryStateProxy &query_state_proxy, const std::optional< std::string > &insert_table_name={})
std::string keyMetainfo
void set_num_aggs(int a)
Definition: Analyzer.h:3054
void set_group_by(std::list< std::shared_ptr< Analyzer::Expr >> &g)
Definition: Analyzer.h:3048
void set_default_table_attributes(const std::string &table_name, TableDescriptor &td, const int32_t column_count)
Definition: DdlUtils.cpp:714
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:79
std::shared_ptr< Fragmenter_Namespace::AbstractFragmenter > fragmenter
SQLTypeInfo get_array_type() const
Definition: sqltypes.h:1008
bool g_optimize_cuda_block_and_grid_sizes
Definition: Execute.cpp:165
std::unique_ptr< std::string > table_
Definition: ParserNode.h:1497
void set_next_query(Query *q)
Definition: Analyzer.h:3051
std::string DBObjectTypeToString(DBObjectType type)
Definition: DBObject.cpp:92
std::string * stringval
Definition: Datum.h:79
void validate_allowed_file_path(const std::string &file_path, const DataTransferType data_transfer_type, const bool allow_wildcards)
Definition: DdlUtils.cpp:822
void parse_options(const rapidjson::Value &payload, std::list< std::unique_ptr< NameValueAssign >> &nameValueList, bool stringToNull=false, bool stringToInteger=false)
int get_result_table_id() const
Definition: Analyzer.h:3042
std::unique_ptr< std::string > user_name_
Definition: ParserNode.h:2066
void check_executable(const Catalog_Namespace::SessionInfo &session, const TableDescriptor *td)
DropUserStmt(const rapidjson::Value &payload)
#define DEFAULT_PAGE_SIZE
void execute(const Catalog_Namespace::SessionInfo &session, bool read_only_mode) override
#define UNLIKELY(x)
Definition: likely.h:25
std::unique_ptr< std::string > role_
Definition: ParserNode.h:1537
void parse_copy_params(const std::list< std::unique_ptr< NameValueAssign >> &options_, import_export::CopyParams &copy_params, std::vector< std::string > &warnings, std::string &deferred_copy_from_partitions_)
TruncateTableStmt(std::string *tab)
Definition: ParserNode.h:1229
void set_having_predicate(std::shared_ptr< Analyzer::Expr > p)
Definition: Analyzer.h:3049
Literal * parse_insert_literal(const rapidjson::Value &literal)
GrantRoleStmt(std::list< std::string * > *r, std::list< std::string * > *g)
Definition: ParserNode.h:1637
void set_comp_param(int p)
Definition: sqltypes.h:480
std::string get_session_id() const
Definition: SessionInfo.h:93
void analyze(const Catalog_Namespace::Catalog &catalog, Analyzer::Query &query) const override
void set_result_table_id(int id)
Definition: Analyzer.h:3045
const std::string * get_name() const
Definition: ParserNode.h:964
std::string geo_layer_name
Definition: CopyParams.h:81
MLModelMap g_ml_models
Definition: MLModel.h:124
void loadKey()
Definition: DBObject.cpp:190
const AccessPrivileges & getPrivileges() const
Definition: DBObject.h:226
void validate_shard_column_type(const ColumnDescriptor &cd)
const std::string kRootUsername
Definition: sqltypes.h:79
void execute(const Catalog_Namespace::SessionInfo &session, bool read_only_mode) override
bool g_test_drop_column_rollback
Definition: ParserNode.cpp:79
const std::string sUNGZIP
void check_alter_table_privilege(const Catalog_Namespace::SessionInfo &session, const TableDescriptor *td)
Definition: sqldefs.h:71
const shared::ColumnKey & getColumnKey() const
Definition: Analyzer.h:198
DEVICE auto lower_bound(ARGS &&...args)
Definition: gpu_enabled.h:78
void set_is_distinct(bool d)
Definition: Analyzer.h:3046
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:399
Definition: Epoch.h:30
bool is_date_in_days() const
Definition: sqltypes.h:1016
static const AccessPrivileges SELECT_FROM_VIEW
Definition: DBObject.h:180
bool table_is_temporary(const TableDescriptor *const td)
const CompressDef * get_compression() const
Definition: ParserNode.h:829
std::vector< std::shared_ptr< TargetEntry > > & get_targetlist_nonconst()
Definition: Analyzer.h:3023
bool g_enable_fsi_regex_import
Definition: ParserNode.cpp:87
Catalog & getCatalog() const
Definition: SessionInfo.h:75
static ReadLock getReadLockForTable(Catalog_Namespace::Catalog &cat, const std::string &table_name)
Definition: LockMgrImpl.h:238
void set_dimension(int d)
Definition: sqltypes.h:470
#define DEFAULT_FRAGMENT_ROWS
void setStringDictKey(const shared::StringDictKey &dict_key)
Definition: sqltypes.h:1061
std::list< std::unique_ptr< NameValueAssign > > options_
Definition: ParserNode.h:1856
Datum get_constval() const
Definition: Analyzer.h:348
void execute_stmt_for_json(const std::string &query_json, std::shared_ptr< Catalog_Namespace::SessionInfo const > session_ptr, bool read_only_mode)
import_export::CopyParams deferred_copy_from_copy_params_
Definition: ParserNode.h:1504
void execute(const Catalog_Namespace::SessionInfo &session, bool read_only_mode) override
DropTableStmt(std::string *tab, bool i)
Definition: ParserNode.h:1211
std::string line_start_regex
Definition: CopyParams.h:106
Definition: sqldefs.h:31
std::vector< std::string > grantees_
Definition: ParserNode.h:1650
void execute(const Catalog_Namespace::SessionInfo &session, bool read_only_mode) override
Fragmenter_Namespace::FragmenterType fragType
std::list< const ColumnDescriptor * > getAllColumnMetadataForTable(const int tableId, const bool fetchSystemColumns, const bool fetchVirtualColumns, const bool fetchPhysicalColumns) const
Returns a list of pointers to constant ColumnDescriptor structs for all the columns from a particular...
Definition: Catalog.cpp:2172
Definition: sqldefs.h:78
Data_Namespace::MemoryLevel persistenceLevel
void set_is_unionall(bool u)
Definition: Analyzer.h:3052
bool is_none_encoded_string() const
Definition: sqltypes.h:645
std::string s3_session_token
Definition: CopyParams.h:63
static const int32_t CREATE_DATABASE
Definition: DBObject.h:78
HOST DEVICE int get_dimension() const
Definition: sqltypes.h:393
InsertIntoTableAsSelectStmt(const rapidjson::Value &payload)
std::vector< int > getTableChunkKey(const TableDescriptor *td, Catalog_Namespace::Catalog &catalog)
Definition: ParserNode.cpp:118
void disable_foreign_tables(const TableDescriptor *td)
void execute(const Catalog_Namespace::SessionInfo &session, bool read_only_mode) override
std::list< std::unique_ptr< std::string > > column_list_
Definition: ParserNode.h:2093
std::string s3_session_token
Definition: TableArchiver.h:27
decltype(auto) get_sort_column_def(TableDescriptor &td, const NameValueAssign *p, const std::list< ColumnDescriptor > &columns)
static const AccessPrivileges ALL_DASHBOARD
Definition: DBObject.h:169
std::string get_type_name() const
Definition: sqltypes.h:482
std::list< std::unique_ptr< NameValueAssign > > storage_options_
Definition: ParserNode.h:1028
static const AccessPrivileges ACCESS
Definition: DBObject.h:153
torch::Tensor f(torch::Tensor x, torch::Tensor W_target, torch::Tensor b_target)
CreateModelStmt(const rapidjson::Value &payload)
static const int32_t VIEW_DASHBOARD
Definition: DBObject.h:103
static std::pair< AccessPrivileges, DBObjectType > parseStringPrivs(const std::string &privs, const DBObjectType &objectType, const std::string &object_name)
virtual std::shared_ptr< Analyzer::Expr > add_cast(const SQLTypeInfo &new_type_info)
Definition: Analyzer.cpp:774
void deleteModel(const std::string &model_name)
Definition: MLModel.h:61
static const AccessPrivileges ALL_TABLE
Definition: DBObject.h:157
decltype(auto) get_frag_size_def(TableDescriptor &td, const NameValueAssign *p, const std::list< ColumnDescriptor > &columns)
bool g_enable_ml_functions
Definition: Execute.cpp:118
void execute(const Catalog_Namespace::SessionInfo &session, bool read_only_mode) override
CreateViewStmt(const std::string &view_name, const std::string &select_query, const bool if_not_exists)
Definition: ParserNode.h:1872
decltype(auto) get_header_def(DataframeTableDescriptor &df_td, const NameValueAssign *p, const std::list< ColumnDescriptor > &columns)
void set_where_predicate(std::shared_ptr< Analyzer::Expr > p)
Definition: Analyzer.h:3047
HOST DEVICE int get_comp_param() const
Definition: sqltypes.h:402
void set_stmt_type(SQLStmtType t)
Definition: Analyzer.h:3053
const ColumnDescriptor * get_column_desc(const Catalog_Namespace::Catalog &catalog, const std::string &name)
static const StringDictKey kTransientDictKey
Definition: DbObjectKeys.h:45
Definition: sqldefs.h:52
std::unique_ptr< std::string > type_
Definition: ParserNode.h:1576
static void verifyObject(Catalog_Namespace::Catalog &sessionCatalog, const std::string &objectName, DBObjectType objectType, const std::string &command)
static CompilationOptions defaults(const ExecutorDeviceType device_type=ExecutorDeviceType::GPU)
DumpRestoreTableStmtBase(const rapidjson::Value &payload, const bool is_restore)
std::optional< bool > can_login
Definition: SysCatalog.h:120
void set_dict_intersection()
Definition: sqltypes.h:478
static const int32_t DROP_TABLE
Definition: DBObject.h:87
std::vector< std::unique_ptr< std::string > > column_list_
Definition: ParserNode.h:1160
bool shouldVacuumDeletedRows() const
Definition: ParserNode.h:1256
ThreadId thread_id()
Definition: Logger.cpp:877
#define NULL_ARRAY_DOUBLE
void checkNameSubstition(SubstituteMap &sMap)
void validate_table_type(const TableDescriptor *td, const TableType expected_table_type, const std::string &command)
Definition: DdlUtils.cpp:745
std::unique_ptr< std::string > db_name_
Definition: ParserNode.h:1931
static const int32_t INSERT_INTO_VIEW
Definition: DBObject.h:115
void execute(const Catalog_Namespace::SessionInfo &session, bool read_only_mode) override
std::string raster_import_bands
Definition: CopyParams.h:89
virtual std::shared_ptr< Analyzer::Expr > analyze(const Catalog_Namespace::Catalog &catalog, Analyzer::Query &query, TlistRefType allow_tlist_ref=TLIST_NONE) const =0
std::string QuerySessionId
Definition: Execute.h:86
static const AccessPrivileges DROP_VIEW
Definition: DBObject.h:179
bool g_enable_watchdog false
Definition: Execute.cpp:80
std::unique_ptr< std::string > target_
Definition: ParserNode.h:1607
void execute(const Catalog_Namespace::SessionInfo &session, bool read_only_mode) override
void set_notnull(bool n)
Definition: sqltypes.h:475
static bool readBooleanLiteral(const std::string &option_name, const std::unique_ptr< NameValueAssign > &p)
void execute(const Catalog_Namespace::SessionInfo &session, bool read_only_mode) override
#define CHECK(condition)
Definition: Logger.h:291
static const int32_t DELETE_FROM_VIEW
Definition: DBObject.h:117
bool is_geometry() const
Definition: sqltypes.h:595
static const AccessPrivileges CREATE_VIEW
Definition: DBObject.h:178
static const int32_t CREATE_TABLE
Definition: DBObject.h:86
void validate_shared_dictionary(const Parser::CreateTableBaseStmt *stmt, const Parser::SharedDictionaryDef *shared_dict_def, const std::list< ColumnDescriptor > &columns, const std::vector< Parser::SharedDictionaryDef > &shared_dict_defs_so_far, const Catalog_Namespace::Catalog &catalog)
CreateDataframeStmt(std::string *tab, std::list< TableElement * > *table_elems, std::string *filename, std::list< NameValueAssign * > *s)
Definition: ParserNode.h:1088
void get_table_definitions(TableDescriptor &td, const std::unique_ptr< NameValueAssign > &p, const std::list< ColumnDescriptor > &columns)
decltype(auto) get_partions_def(TableDescriptor &td, const NameValueAssign *p, const std::list< ColumnDescriptor > &columns)
const std::vector< std::string > & get_privs() const
Definition: ParserNode.h:1567
std::vector< std::string > privileges_
Definition: ParserNode.h:1605
CopyTableStmt(std::string *t, std::string *f, std::list< NameValueAssign * > *o)
std::unique_ptr< ColumnDef > column_from_json(const rapidjson::Value &element)
std::list< std::unique_ptr< NameValueAssign > > options_
Definition: ParserNode.h:1500
static std::string encode_base64(const std::string &val)
Definition: base64.h:45
static void clearExternalCaches(bool for_update, const TableDescriptor *td, const int current_db_id)
Definition: Execute.h:438
static const int32_t CREATE_DASHBOARD
Definition: DBObject.h:101
decltype(auto) get_page_size_def(TableDescriptor &td, const NameValueAssign *p, const std::list< ColumnDescriptor > &columns)
const std::string & get_object_type() const
Definition: ParserNode.h:1619
const std::vector< std::string > & get_grantees() const
Definition: ParserNode.h:1644
decltype(auto) get_property_value(const NameValueAssign *p, ASSIGNMENT op, VALIDATE validate=VALIDATE())
const std::list< std::shared_ptr< Analyzer::Expr > > & get_group_by() const
Definition: Analyzer.h:3034
bool g_cluster
const std::string sGZIP
void execute(const Catalog_Namespace::SessionInfo &session, bool read_only_mode) override
std::vector< std::unique_ptr< ValuesList > > values_lists_
Definition: ParserNode.h:2141
The data to be inserted using the fragment manager.
Definition: Fragmenter.h:68
auto getExecuteWriteLock()
void execute(const Catalog_Namespace::SessionInfo &session, bool read_only_mode) override
const std::vector< std::string > & get_privs() const
Definition: ParserNode.h:1597
bool is_regression_model(const MLModelType model_type)
Definition: MLModelType.h:69
static DBObject createObject(const std::string &objectName, DBObjectType objectType)
MLModelType get_ml_model_type_from_str(const std::string &model_type_str)
Definition: MLModelType.h:52
void analyze(const Catalog_Namespace::Catalog &catalog, Analyzer::Query &query) const override
std::string viewSQL
decltype(auto) get_max_chunk_size_dataframe_def(DataframeTableDescriptor &df_td, const NameValueAssign *p, const std::list< ColumnDescriptor > &columns)
std::list< ColumnDescriptor > getColumnDescriptors(AggregatedResult &result, bool for_create)
void execute(const Catalog_Namespace::SessionInfo &session, bool read_only_mode) override
static const AccessPrivileges DELETE_DASHBOARD
Definition: DBObject.h:173
void execute(const Catalog_Namespace::SessionInfo &session, bool read_only_mode) override
bool is_dict_encoded_string() const
Definition: sqltypes.h:641
Definition: sqltypes.h:72
static const int32_t SELECT_FROM_TABLE
Definition: DBObject.h:88
SQLTypeInfo columnType
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
std::string s3_access_key
Definition: TableArchiver.h:25
std::string model_name_
Definition: ParserNode.h:2003
static constexpr ExecutorId UNITARY_EXECUTOR_ID
Definition: Execute.h:423
const std::string & get_object() const
Definition: ParserNode.h:1599
bool is_string() const
Definition: sqltypes.h:559
decltype(auto) get_frag_size_dataframe_def(DataframeTableDescriptor &df_td, const NameValueAssign *p, const std::list< ColumnDescriptor > &columns)
bool checkDBAccessPrivileges(const DBObjectType &permissionType, const AccessPrivileges &privs, const std::string &objectName="") const
Definition: SessionInfo.cpp:24
string name
Definition: setup.in.py:72
std::unique_ptr< std::string > table_
Definition: ParserNode.h:1353
constexpr double n
Definition: Utm.h:38
size_t g_leaf_count
Definition: ParserNode.cpp:78
void execute(const Catalog_Namespace::SessionInfo &session, bool read_only_mode) override
AlterUserStmt(const rapidjson::Value &payload)
Definition: Analyzer.h:2675
void set_limit(int64_t l)
Definition: Analyzer.h:3060
decltype(auto) get_max_rollback_epochs_def(TableDescriptor &td, const NameValueAssign *p, const std::list< ColumnDescriptor > &columns)
std::string view_name_
Definition: ParserNode.h:1887
virtual void execute(const Catalog_Namespace::SessionInfo &session, bool read_only_mode)=0
static constexpr char const * FOREIGN_TABLE
Definition: sqldefs.h:76
void execute(const Catalog_Namespace::SessionInfo &session, bool read_only_mode) override
void restoreTable(const Catalog_Namespace::SessionInfo &session, const std::string &table_name, const std::string &archive_path, const std::string &compression, const TableArchiverS3Options &s3_options)
const std::list< int > & get_result_col_list() const
Definition: Analyzer.h:3043
static const AccessPrivileges EDIT_DASHBOARD
Definition: DBObject.h:172
bool g_enable_fsi
Definition: Catalog.cpp:96
static const AccessPrivileges UPDATE_IN_TABLE
Definition: DBObject.h:162
std::map< std::string, std::string > SubstituteMap
static const int32_t UPDATE_IN_TABLE
Definition: DBObject.h:90
std::list< std::unique_ptr< TableElement > > table_element_list_
Definition: ParserNode.h:1118
Definition: Datum.h:69
std::string userLoggable() const
Definition: SysCatalog.cpp:158
std::string s3_access_key
Definition: CopyParams.h:61
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:975
bool is_decimal() const
Definition: sqltypes.h:568
static constexpr char const * EMPTY_NAME
std::string columnName
std::shared_ptr< Catalog_Namespace::SessionInfo const > getConstSessionInfo() const
Definition: QueryState.cpp:84
Definition: sqldefs.h:74
const std::vector< std::shared_ptr< TargetEntry > > & get_targetlist() const
Definition: Analyzer.h:3020
CreateTableAsSelectStmt(const rapidjson::Value &payload)
CompressionType validateCompression(const std::string &compression, const bool is_restore)
RasterPointTransform raster_point_transform
Definition: CopyParams.h:91
bool g_enable_add_metadata_columns
Definition: ParserNode.cpp:89
bool hasPermission(int permission) const
Definition: DBObject.h:141
DropModelStmt(const rapidjson::Value &payload)
void execute(const Catalog_Namespace::SessionInfo &session, bool read_only_mode) override
bool is_distinct(const size_t input_idx, const RelAlgNode *node)
DropDBStmt(const rapidjson::Value &payload)
std::unique_ptr< std::string > table_
Definition: ParserNode.h:1024
std::string loadTable(Catalog_Namespace::Catalog &catalog, SubstituteMap &sMap, std::string tableName)
void dumpTable(const TableDescriptor *td, const std::string &archive_path, const std::string &compression)
const UserMetadata & get_currentUser() const
Definition: SessionInfo.h:88
size_t sort_column_index(const std::string &name, const std::list< ColumnDescriptor > &columns)
std::optional< std::string > file_sort_order_by
Definition: CopyParams.h:86
std::string pg_shim(const std::string &query)
Definition: sqldefs.h:38
std::unique_ptr< Parser::Stmt > create_stmt_for_query(const std::string &queryStr, const Catalog_Namespace::SessionInfo &session_info)
const std::vector< RangeTableEntry * > & get_rangetable() const
Definition: Analyzer.h:3032
#define TRANSIENT_DICT(ID)
Definition: sqltypes.h:322
CreateUserStmt(const rapidjson::Value &payload)
double get_doubleval() const
Definition: ParserNode.h:218
decltype(auto) get_storage_type(TableDescriptor &td, const NameValueAssign *p, const std::list< ColumnDescriptor > &columns)
const std::string & get_object_type() const
Definition: ParserNode.h:1598
std::string credential_string
Definition: CopyParams.h:101
std::unique_ptr< std::string > filename_
Definition: ParserNode.h:1119
static SQLTypeInfo analyze_type_info(SQLOps op, const SQLTypeInfo &left_type, const SQLTypeInfo &right_type, SQLTypeInfo *new_left_type, SQLTypeInfo *new_right_type)
Definition: Analyzer.cpp:267
bool is_date() const
Definition: sqltypes.h:1026
bool is_array() const
Definition: sqltypes.h:583
#define STDLOG(...)
Definition: QueryState.h:234
const std::string getQuerySubmittedTime() const
Definition: QueryState.cpp:101
decltype(auto) get_delimiter_def(DataframeTableDescriptor &df_td, const NameValueAssign *p, const std::list< ColumnDescriptor > &columns)
#define VLOG(n)
Definition: Logger.h:388
Type timer_start()
Definition: measure.h:42
SQLTypeInfo const & get_str_dict_cast_type(const SQLTypeInfo &lhs_type_info, const SQLTypeInfo &rhs_type_info, const Executor *executor)
Definition: ParserNode.cpp:331
void execute(const Catalog_Namespace::SessionInfo &session, bool read_only_mode) override
std::string serialize_key_metainfo(const ShardKeyDef *shard_key_def, const std::vector< SharedDictionaryDef > &shared_dict_defs)
void recomputeMetadata() const
Recomputes per-chunk metadata for each fragment in the table. Updates and deletes can cause chunk met...
const std::string sUNLZ4
void execute(const Catalog_Namespace::SessionInfo &session, bool read_only_mode) override
std::atomic< bool > isSuper
Definition: SysCatalog.h:107
void set_precision(int d)
Definition: sqltypes.h:471
std::unique_ptr< TargetValueConverter > create(ConverterCreateParameter param)
void set_string_field(rapidjson::Value &obj, const std::string &field_name, const std::string &field_value, rapidjson::Document &document)
void analyze(const Catalog_Namespace::Catalog &catalog, Analyzer::Query &query) const override
std::unique_ptr< std::string > table_
Definition: ParserNode.h:1392
#define IS_COMPARISON(X)
Definition: sqldefs.h:58
const std::string & get_object() const
Definition: ParserNode.h:1569
virtual bool get_is_array() const
Definition: DdlUtils.cpp:96
double doubleval
Definition: Datum.h:76
std::optional< std::string > file_sort_regex
Definition: CopyParams.h:87
std::unique_ptr< ColumnDef > coldef_
Definition: ParserNode.h:1375
const shared::StringDictKey & getStringDictKey() const
Definition: sqltypes.h:1055
std::optional< std::string > get_string_option(const NameValueAssign *option, const std::string &option_name)
EncodingType geo_coords_encoding
Definition: CopyParams.h:76
void execute(const Catalog_Namespace::SessionInfo &session, bool read_only_mode) override
void execute(const Catalog_Namespace::SessionInfo &session, bool read_only_mode) override
specifies the content in-memory of a row in the table metadata table