OmniSciDB  c1a53651b2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ParserNode.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
23 #include "ParserNode.h"
24 
25 #include <boost/algorithm/string.hpp>
26 #include <boost/core/null_deleter.hpp>
27 #include <boost/filesystem.hpp>
28 #include <boost/function.hpp>
29 
30 #include <rapidjson/document.h>
31 #include <rapidjson/stringbuffer.h>
32 #include <rapidjson/writer.h>
33 
34 #include <cassert>
35 #include <cmath>
36 #include <limits>
37 #include <random>
38 #include <regex>
39 #include <stdexcept>
40 #include <string>
41 #include <type_traits>
42 #include <typeinfo>
43 
45 #include "Catalog/Catalog.h"
51 #include "Geospatial/Compression.h"
52 #include "Geospatial/Types.h"
54 #include "ImportExport/Importer.h"
56 #include "LockMgr/LockMgr.h"
60 #include "QueryEngine/Execute.h"
65 #include "ReservedKeywords.h"
66 #include "Shared/DbObjectKeys.h"
67 #include "Shared/StringTransform.h"
68 #include "Shared/SysDefinitions.h"
70 #include "Shared/measure.h"
71 #include "Shared/shard_key.h"
73 #include "Utils/FsiUtils.h"
74 
75 #include "gen-cpp/CalciteServer.h"
76 
77 size_t g_leaf_count{0};
79 extern bool g_enable_string_functions;
80 extern bool g_enable_fsi;
81 
83 #ifdef ENABLE_IMPORT_PARQUET
84 bool g_enable_legacy_parquet_import{false};
85 #endif
87 
89 
91 using namespace std::string_literals;
92 
93 using TableDefFuncPtr = boost::function<void(TableDescriptor&,
95  const std::list<ColumnDescriptor>& columns)>;
96 
97 using DataframeDefFuncPtr =
98  boost::function<void(DataframeTableDescriptor&,
100  const std::list<ColumnDescriptor>& columns)>;
101 
102 namespace Parser {
103 bool check_session_interrupted(const QuerySessionId& query_session, Executor* executor) {
104  // we call this function with unitary executor but is okay since
105  // we know the exact session info from a global session map object
106  // in the executor
109  executor->getSessionLock());
110  return executor->checkIsQuerySessionInterrupted(query_session, session_read_lock);
111  }
112  return false;
113 }
114 
115 std::vector<int> getTableChunkKey(const TableDescriptor* td,
116  Catalog_Namespace::Catalog& catalog) {
117  std::vector<int> table_chunk_key_prefix;
118  if (td) {
119  if (td->fragmenter) {
120  table_chunk_key_prefix = td->fragmenter->getFragmentsForQuery().chunkKeyPrefix;
121  } else {
122  table_chunk_key_prefix.push_back(catalog.getCurrentDB().dbId);
123  table_chunk_key_prefix.push_back(td->tableId);
124  }
125  }
126  return table_chunk_key_prefix;
127 }
128 
129 std::shared_ptr<Analyzer::Expr> NullLiteral::analyze(
130  const Catalog_Namespace::Catalog& catalog,
131  Analyzer::Query& query,
132  TlistRefType allow_tlist_ref) const {
133  return makeExpr<Analyzer::Constant>(kNULLT, true);
134 }
135 
136 std::shared_ptr<Analyzer::Expr> StringLiteral::analyze(
137  const Catalog_Namespace::Catalog& catalog,
138  Analyzer::Query& query,
139  TlistRefType allow_tlist_ref) const {
140  return analyzeValue(*stringval_, false);
141 }
142 
143 std::shared_ptr<Analyzer::Expr> StringLiteral::analyzeValue(const std::string& stringval,
144  const bool is_null) {
145  if (!is_null) {
146  const SQLTypeInfo ti(kVARCHAR, stringval.length(), 0, true);
147  Datum d;
148  d.stringval = new std::string(stringval);
149  return makeExpr<Analyzer::Constant>(ti, false, d);
150  }
151  // Null value
152  return makeExpr<Analyzer::Constant>(kVARCHAR, true);
153 }
154 
155 std::shared_ptr<Analyzer::Expr> IntLiteral::analyze(
156  const Catalog_Namespace::Catalog& catalog,
157  Analyzer::Query& query,
158  TlistRefType allow_tlist_ref) const {
159  return analyzeValue(intval_);
160 }
161 
162 std::shared_ptr<Analyzer::Expr> IntLiteral::analyzeValue(const int64_t intval) {
163  SQLTypes t;
164  Datum d;
165  if (intval >= INT16_MIN && intval <= INT16_MAX) {
166  t = kSMALLINT;
167  d.smallintval = (int16_t)intval;
168  } else if (intval >= INT32_MIN && intval <= INT32_MAX) {
169  t = kINT;
170  d.intval = (int32_t)intval;
171  } else {
172  t = kBIGINT;
173  d.bigintval = intval;
174  }
175  return makeExpr<Analyzer::Constant>(t, false, d);
176 }
177 
178 std::shared_ptr<Analyzer::Expr> FixedPtLiteral::analyze(
179  const Catalog_Namespace::Catalog& catalog,
180  Analyzer::Query& query,
181  TlistRefType allow_tlist_ref) const {
182  SQLTypeInfo ti(kNUMERIC, 0, 0, false);
183  Datum d = StringToDatum(*fixedptval_, ti);
184  return makeExpr<Analyzer::Constant>(ti, false, d);
185 }
186 
187 std::shared_ptr<Analyzer::Expr> FixedPtLiteral::analyzeValue(const int64_t numericval,
188  const int scale,
189  const int precision) {
190  SQLTypeInfo ti(kNUMERIC, 0, 0, false);
191  ti.set_scale(scale);
192  ti.set_precision(precision);
193  Datum d;
194  d.bigintval = numericval;
195  return makeExpr<Analyzer::Constant>(ti, false, d);
196 }
197 
198 std::shared_ptr<Analyzer::Expr> FloatLiteral::analyze(
199  const Catalog_Namespace::Catalog& catalog,
200  Analyzer::Query& query,
201  TlistRefType allow_tlist_ref) const {
202  Datum d;
203  d.floatval = floatval_;
204  return makeExpr<Analyzer::Constant>(kFLOAT, false, d);
205 }
206 
207 std::shared_ptr<Analyzer::Expr> DoubleLiteral::analyze(
208  const Catalog_Namespace::Catalog& catalog,
209  Analyzer::Query& query,
210  TlistRefType allow_tlist_ref) const {
211  Datum d;
212  d.doubleval = doubleval_;
213  return makeExpr<Analyzer::Constant>(kDOUBLE, false, d);
214 }
215 
216 std::shared_ptr<Analyzer::Expr> TimestampLiteral::analyze(
217  const Catalog_Namespace::Catalog& catalog,
218  Analyzer::Query& query,
219  TlistRefType allow_tlist_ref) const {
220  return get(timestampval_);
221 }
222 
223 std::shared_ptr<Analyzer::Expr> TimestampLiteral::get(const int64_t timestampval) {
224  Datum d;
225  d.bigintval = timestampval;
226  return makeExpr<Analyzer::Constant>(kTIMESTAMP, false, d);
227 }
228 
229 std::shared_ptr<Analyzer::Expr> UserLiteral::analyze(
230  const Catalog_Namespace::Catalog& catalog,
231  Analyzer::Query& query,
232  TlistRefType allow_tlist_ref) const {
233  Datum d;
234  return makeExpr<Analyzer::Constant>(kTEXT, false, d);
235 }
236 
237 std::shared_ptr<Analyzer::Expr> UserLiteral::get(const std::string& user) {
238  Datum d;
239  d.stringval = new std::string(user);
240  return makeExpr<Analyzer::Constant>(kTEXT, false, d);
241 }
242 
243 std::shared_ptr<Analyzer::Expr> ArrayLiteral::analyze(
244  const Catalog_Namespace::Catalog& catalog,
245  Analyzer::Query& query,
246  TlistRefType allow_tlist_ref) const {
247  SQLTypeInfo ti = SQLTypeInfo(kARRAY, true);
248  bool set_subtype = true;
249  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
250  for (auto& p : value_list_) {
251  auto e = p->analyze(catalog, query, allow_tlist_ref);
252  CHECK(e);
253  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(e);
254  if (c != nullptr && c->get_is_null()) {
255  value_exprs.push_back(c);
256  continue;
257  }
258  auto subtype = e->get_type_info().get_type();
259  if (subtype == kNULLT) {
260  // NULL element
261  } else if (set_subtype) {
262  ti.set_subtype(subtype);
263  set_subtype = false;
264  }
265  value_exprs.push_back(e);
266  }
267  std::shared_ptr<Analyzer::Expr> result =
268  makeExpr<Analyzer::Constant>(ti, false, value_exprs);
269  return result;
270 }
271 
272 std::string ArrayLiteral::to_string() const {
273  std::string str = "{";
274  bool notfirst = false;
275  for (auto& p : value_list_) {
276  if (notfirst) {
277  str += ", ";
278  } else {
279  notfirst = true;
280  }
281  str += p->to_string();
282  }
283  str += "}";
284  return str;
285 }
286 
287 std::shared_ptr<Analyzer::Expr> OperExpr::analyze(
288  const Catalog_Namespace::Catalog& catalog,
289  Analyzer::Query& query,
290  TlistRefType allow_tlist_ref) const {
291  auto left_expr = left_->analyze(catalog, query, allow_tlist_ref);
292  const auto& left_type = left_expr->get_type_info();
293  if (right_ == nullptr) {
294  return makeExpr<Analyzer::UOper>(
295  left_type, left_expr->get_contains_agg(), optype_, left_expr->decompress());
296  }
297  if (optype_ == kARRAY_AT) {
298  if (left_type.get_type() != kARRAY) {
299  throw std::runtime_error(left_->to_string() + " is not of array type.");
300  }
301  auto right_expr = right_->analyze(catalog, query, allow_tlist_ref);
302  const auto& right_type = right_expr->get_type_info();
303  if (!right_type.is_integer()) {
304  throw std::runtime_error(right_->to_string() + " is not of integer type.");
305  }
306  return makeExpr<Analyzer::BinOper>(
307  left_type.get_elem_type(), false, kARRAY_AT, kONE, left_expr, right_expr);
308  }
309  auto right_expr = right_->analyze(catalog, query, allow_tlist_ref);
310  return normalize(optype_, opqualifier_, left_expr, right_expr);
311 }
312 
313 bool exprs_share_one_and_same_rte_idx(const std::shared_ptr<Analyzer::Expr>& lhs_expr,
314  const std::shared_ptr<Analyzer::Expr>& rhs_expr) {
315  std::set<int> lhs_rte_idx;
316  lhs_expr->collect_rte_idx(lhs_rte_idx);
317  CHECK(!lhs_rte_idx.empty());
318  std::set<int> rhs_rte_idx;
319  rhs_expr->collect_rte_idx(rhs_rte_idx);
320  CHECK(!rhs_rte_idx.empty());
321  return lhs_rte_idx.size() == 1UL && lhs_rte_idx == rhs_rte_idx;
322 }
323 
324 SQLTypeInfo const& get_str_dict_cast_type(const SQLTypeInfo& lhs_type_info,
325  const SQLTypeInfo& rhs_type_info,
326  const Executor* executor) {
327  CHECK(lhs_type_info.is_string());
328  CHECK(lhs_type_info.get_compression() == kENCODING_DICT);
329  CHECK(rhs_type_info.is_string());
330  CHECK(rhs_type_info.get_compression() == kENCODING_DICT);
331  const auto& lhs_dict_key = lhs_type_info.getStringDictKey();
332  const auto& rhs_dict_key = rhs_type_info.getStringDictKey();
333  CHECK_NE(lhs_dict_key, rhs_dict_key);
334  if (lhs_dict_key.isTransientDict()) {
335  return rhs_type_info;
336  }
337  if (rhs_dict_key.isTransientDict()) {
338  return lhs_type_info;
339  }
340  // If here then neither lhs or rhs type was transient, we should see which
341  // type has the largest dictionary and make that the destination type
342  const auto lhs_sdp = executor->getStringDictionaryProxy(lhs_dict_key, true);
343  const auto rhs_sdp = executor->getStringDictionaryProxy(rhs_dict_key, true);
344  return lhs_sdp->entryCount() >= rhs_sdp->entryCount() ? lhs_type_info : rhs_type_info;
345 }
346 
348  const SQLTypeInfo& rhs_type_info,
349  const Executor* executor) {
350  CHECK(lhs_type_info.is_string());
351  CHECK(rhs_type_info.is_string());
352  if (lhs_type_info.is_dict_encoded_string() && rhs_type_info.is_dict_encoded_string()) {
353  const auto& lhs_dict_key = lhs_type_info.getStringDictKey();
354  const auto& rhs_dict_key = rhs_type_info.getStringDictKey();
355  if (lhs_dict_key == rhs_dict_key ||
356  (lhs_dict_key.db_id == rhs_dict_key.db_id &&
357  lhs_dict_key.dict_id == TRANSIENT_DICT(rhs_dict_key.dict_id))) {
358  return lhs_dict_key.dict_id <= rhs_dict_key.dict_id ? lhs_type_info : rhs_type_info;
359  }
360  return get_str_dict_cast_type(lhs_type_info, rhs_type_info, executor);
361  }
362  CHECK(lhs_type_info.is_none_encoded_string() || rhs_type_info.is_none_encoded_string());
363  SQLTypeInfo ret_ti =
364  rhs_type_info.is_none_encoded_string() ? lhs_type_info : rhs_type_info;
365  if (ret_ti.is_none_encoded_string()) {
366  ret_ti.set_dimension(
367  std::max(lhs_type_info.get_dimension(), rhs_type_info.get_dimension()));
368  }
369  return ret_ti;
370 }
371 
372 std::shared_ptr<Analyzer::Expr> OperExpr::normalize(
373  const SQLOps optype,
374  const SQLQualifier qual,
375  std::shared_ptr<Analyzer::Expr> left_expr,
376  std::shared_ptr<Analyzer::Expr> right_expr,
377  const Executor* executor) {
378  if (left_expr->get_type_info().is_date_in_days() ||
379  right_expr->get_type_info().is_date_in_days()) {
380  // Do not propogate encoding
381  left_expr = left_expr->decompress();
382  right_expr = right_expr->decompress();
383  }
384  const auto& left_type = left_expr->get_type_info();
385  auto right_type = right_expr->get_type_info();
386  if (qual != kONE) {
387  // subquery not supported yet.
388  CHECK(!std::dynamic_pointer_cast<Analyzer::Subquery>(right_expr));
389  if (right_type.get_type() != kARRAY) {
390  throw std::runtime_error(
391  "Existential or universal qualifiers can only be used in front of a subquery "
392  "or an "
393  "expression of array type.");
394  }
395  right_type = right_type.get_elem_type();
396  }
397  SQLTypeInfo new_left_type;
398  SQLTypeInfo new_right_type;
399  auto result_type = Analyzer::BinOper::analyze_type_info(
400  optype, left_type, right_type, &new_left_type, &new_right_type);
401  if (result_type.is_timeinterval()) {
402  return makeExpr<Analyzer::BinOper>(
403  result_type, false, optype, qual, left_expr, right_expr);
404  }
405  if (left_type != new_left_type) {
406  left_expr = left_expr->add_cast(new_left_type);
407  }
408  if (right_type != new_right_type) {
409  if (qual == kONE) {
410  right_expr = right_expr->add_cast(new_right_type);
411  } else {
412  right_expr = right_expr->add_cast(new_right_type.get_array_type());
413  }
414  }
415 
416  if (IS_COMPARISON(optype)) {
417  if (optype != kOVERLAPS && new_left_type.is_geometry() &&
418  new_right_type.is_geometry()) {
419  throw std::runtime_error(
420  "Comparison operators are not yet supported for geospatial types.");
421  }
422 
423  if (new_left_type.get_compression() == kENCODING_DICT &&
424  new_right_type.get_compression() == kENCODING_DICT) {
425  if (new_left_type.getStringDictKey() != new_right_type.getStringDictKey()) {
426  if (optype == kEQ || optype == kNE) {
427  // Join framework does its own string dictionary translation
428  // (at least partly since the rhs table projection does not use
429  // the normal runtime execution framework), so if we detect
430  // that the rte idxs of the two tables are different, bail
431  // on translating
432  const bool should_translate_strings =
433  exprs_share_one_and_same_rte_idx(left_expr, right_expr);
434  if (should_translate_strings && (optype == kEQ || optype == kNE)) {
435  CHECK(executor);
436  // Make the type we're casting to the transient dictionary, if it exists,
437  // otherwise the largest dictionary in terms of number of entries
438  SQLTypeInfo ti(
439  get_str_dict_cast_type(new_left_type, new_right_type, executor));
440  auto& expr_to_cast = ti == new_left_type ? right_expr : left_expr;
441  ti.set_fixed_size();
443  expr_to_cast = expr_to_cast->add_cast(ti);
444  } else { // Ordered comparison operator
445  // We do not currently support ordered (i.e. >, <=) comparisons between
446  // dictionary-encoded columns, and need to decompress when translation
447  // is turned off even for kEQ and KNE
448  left_expr = left_expr->decompress();
449  right_expr = right_expr->decompress();
450  }
451  } else { // Ordered comparison operator
452  // We do not currently support ordered (i.e. >, <=) comparisons between
453  // dictionary-encoded columns, and need to decompress when translation
454  // is turned off even for kEQ and KNE
455  left_expr = left_expr->decompress();
456  right_expr = right_expr->decompress();
457  }
458  } else { // Strings shared comp param
459  if (!(optype == kEQ || optype == kNE)) {
460  // We do not currently support ordered (i.e. >, <=) comparisons between
461  // encoded columns, so try to decode (will only succeed with watchdog off)
462  left_expr = left_expr->decompress();
463  right_expr = right_expr->decompress();
464  } else {
465  // do nothing, can directly support equals/non-equals comparisons between two
466  // dictionary encoded columns sharing the same dictionary as these are
467  // effectively integer comparisons in the same dictionary space
468  }
469  }
470  } else if (new_left_type.get_compression() == kENCODING_DICT &&
471  new_right_type.get_compression() == kENCODING_NONE) {
472  SQLTypeInfo ti(new_right_type);
473  ti.set_compression(new_left_type.get_compression());
474  ti.set_comp_param(new_left_type.get_comp_param());
475  ti.setStringDictKey(new_left_type.getStringDictKey());
476  ti.set_fixed_size();
477  right_expr = right_expr->add_cast(ti);
478  } else if (new_right_type.get_compression() == kENCODING_DICT &&
479  new_left_type.get_compression() == kENCODING_NONE) {
480  SQLTypeInfo ti(new_left_type);
481  ti.set_compression(new_right_type.get_compression());
482  ti.set_comp_param(new_right_type.get_comp_param());
483  ti.setStringDictKey(new_right_type.getStringDictKey());
484  ti.set_fixed_size();
485  left_expr = left_expr->add_cast(ti);
486  } else {
487  left_expr = left_expr->decompress();
488  right_expr = right_expr->decompress();
489  }
490  } else {
491  // Is this now a no-op just for pairs of none-encoded string columns
492  left_expr = left_expr->decompress();
493  right_expr = right_expr->decompress();
494  }
495  bool has_agg = (left_expr->get_contains_agg() || right_expr->get_contains_agg());
496  return makeExpr<Analyzer::BinOper>(
497  result_type, has_agg, optype, qual, left_expr, right_expr);
498 }
499 
500 std::shared_ptr<Analyzer::Expr> SubqueryExpr::analyze(
501  const Catalog_Namespace::Catalog& catalog,
502  Analyzer::Query& query,
503  TlistRefType allow_tlist_ref) const {
504  throw std::runtime_error("Subqueries are not supported yet.");
505  return nullptr;
506 }
507 
508 std::shared_ptr<Analyzer::Expr> IsNullExpr::analyze(
509  const Catalog_Namespace::Catalog& catalog,
510  Analyzer::Query& query,
511  TlistRefType allow_tlist_ref) const {
512  auto arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
513  auto result = makeExpr<Analyzer::UOper>(kBOOLEAN, kISNULL, arg_expr);
514  if (is_not_) {
515  result = makeExpr<Analyzer::UOper>(kBOOLEAN, kNOT, result);
516  }
517  return result;
518 }
519 
520 std::shared_ptr<Analyzer::Expr> InSubquery::analyze(
521  const Catalog_Namespace::Catalog& catalog,
522  Analyzer::Query& query,
523  TlistRefType allow_tlist_ref) const {
524  throw std::runtime_error("Subqueries are not supported yet.");
525  return nullptr;
526 }
527 
528 std::shared_ptr<Analyzer::Expr> InValues::analyze(
529  const Catalog_Namespace::Catalog& catalog,
530  Analyzer::Query& query,
531  TlistRefType allow_tlist_ref) const {
532  auto arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
533  SQLTypeInfo ti = arg_expr->get_type_info();
534  bool dict_comp = ti.get_compression() == kENCODING_DICT;
535  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
536  for (auto& p : value_list_) {
537  auto e = p->analyze(catalog, query, allow_tlist_ref);
538  if (ti != e->get_type_info()) {
539  if (ti.is_string() && e->get_type_info().is_string()) {
540  // Todo(todd): Can we have this leverage the cast framework as well
541  ti = Analyzer::BinOper::common_string_type(ti, e->get_type_info());
542  } else if (ti.is_number() && e->get_type_info().is_number()) {
543  ti = Analyzer::BinOper::common_numeric_type(ti, e->get_type_info());
544  } else {
545  throw std::runtime_error("IN expressions must contain compatible types.");
546  }
547  }
548  if (dict_comp) {
549  value_exprs.push_back(e->add_cast(arg_expr->get_type_info()));
550  } else {
551  value_exprs.push_back(e);
552  }
553  }
554  if (!dict_comp) {
555  arg_expr = arg_expr->decompress();
556  arg_expr = arg_expr->add_cast(ti);
557  std::list<std::shared_ptr<Analyzer::Expr>> cast_vals;
558  for (auto p : value_exprs) {
559  cast_vals.push_back(p->add_cast(ti));
560  }
561  value_exprs.swap(cast_vals);
562  }
563  std::shared_ptr<Analyzer::Expr> result =
564  makeExpr<Analyzer::InValues>(arg_expr, value_exprs);
565  if (is_not_) {
566  result = makeExpr<Analyzer::UOper>(kBOOLEAN, kNOT, result);
567  }
568  return result;
569 }
570 
571 std::shared_ptr<Analyzer::Expr> BetweenExpr::analyze(
572  const Catalog_Namespace::Catalog& catalog,
573  Analyzer::Query& query,
574  TlistRefType allow_tlist_ref) const {
575  auto arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
576  auto lower_expr = lower_->analyze(catalog, query, allow_tlist_ref);
577  auto upper_expr = upper_->analyze(catalog, query, allow_tlist_ref);
578  SQLTypeInfo new_left_type, new_right_type;
580  arg_expr->get_type_info(),
581  lower_expr->get_type_info(),
582  &new_left_type,
583  &new_right_type);
584  auto lower_pred =
585  makeExpr<Analyzer::BinOper>(kBOOLEAN,
586  kGE,
587  kONE,
588  arg_expr->add_cast(new_left_type)->decompress(),
589  lower_expr->add_cast(new_right_type)->decompress());
591  arg_expr->get_type_info(),
592  lower_expr->get_type_info(),
593  &new_left_type,
594  &new_right_type);
595  auto upper_pred = makeExpr<Analyzer::BinOper>(
596  kBOOLEAN,
597  kLE,
598  kONE,
599  arg_expr->deep_copy()->add_cast(new_left_type)->decompress(),
600  upper_expr->add_cast(new_right_type)->decompress());
601  std::shared_ptr<Analyzer::Expr> result =
602  makeExpr<Analyzer::BinOper>(kBOOLEAN, kAND, kONE, lower_pred, upper_pred);
603  if (is_not_) {
604  result = makeExpr<Analyzer::UOper>(kBOOLEAN, kNOT, result);
605  }
606  return result;
607 }
608 
609 std::shared_ptr<Analyzer::Expr> CharLengthExpr::analyze(
610  const Catalog_Namespace::Catalog& catalog,
611  Analyzer::Query& query,
612  TlistRefType allow_tlist_ref) const {
613  auto arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
614  if (!arg_expr->get_type_info().is_string()) {
615  throw std::runtime_error(
616  "expression in char_length clause must be of a string type.");
617  }
618  std::shared_ptr<Analyzer::Expr> result =
619  makeExpr<Analyzer::CharLengthExpr>(arg_expr->decompress(), calc_encoded_length_);
620  return result;
621 }
622 
623 std::shared_ptr<Analyzer::Expr> CardinalityExpr::analyze(
624  const Catalog_Namespace::Catalog& catalog,
625  Analyzer::Query& query,
626  TlistRefType allow_tlist_ref) const {
627  auto arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
628  if (!arg_expr->get_type_info().is_array()) {
629  throw std::runtime_error(
630  "expression in cardinality clause must be of an array type.");
631  }
632  std::shared_ptr<Analyzer::Expr> result =
633  makeExpr<Analyzer::CardinalityExpr>(arg_expr->decompress());
634  return result;
635 }
636 
637 void LikeExpr::check_like_expr(const std::string& like_str, char escape_char) {
638  if (like_str.back() == escape_char) {
639  throw std::runtime_error("LIKE pattern must not end with escape character.");
640  }
641 }
642 
643 bool LikeExpr::test_is_simple_expr(const std::string& like_str, char escape_char) {
644  // if not bounded by '%' then not a simple string
645  if (like_str.size() < 2 || like_str[0] != '%' || like_str[like_str.size() - 1] != '%') {
646  return false;
647  }
648  // if the last '%' is escaped then not a simple string
649  if (like_str[like_str.size() - 2] == escape_char &&
650  like_str[like_str.size() - 3] != escape_char) {
651  return false;
652  }
653  for (size_t i = 1; i < like_str.size() - 1; i++) {
654  if (like_str[i] == '%' || like_str[i] == '_' || like_str[i] == '[' ||
655  like_str[i] == ']') {
656  if (like_str[i - 1] != escape_char) {
657  return false;
658  }
659  }
660  }
661  return true;
662 }
663 
664 void LikeExpr::erase_cntl_chars(std::string& like_str, char escape_char) {
665  char prev_char = '\0';
666  // easier to create new string of allowable chars
667  // rather than erase chars from
668  // existing string
669  std::string new_str;
670  for (char& cur_char : like_str) {
671  if (cur_char == '%' || cur_char == escape_char) {
672  if (prev_char != escape_char) {
673  prev_char = cur_char;
674  continue;
675  }
676  }
677  new_str.push_back(cur_char);
678  prev_char = cur_char;
679  }
680  like_str = new_str;
681 }
682 
683 std::shared_ptr<Analyzer::Expr> LikeExpr::analyze(
684  const Catalog_Namespace::Catalog& catalog,
685  Analyzer::Query& query,
686  TlistRefType allow_tlist_ref) const {
687  auto arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
688  auto like_expr = like_string_->analyze(catalog, query, allow_tlist_ref);
689  auto escape_expr = escape_string_ == nullptr
690  ? nullptr
691  : escape_string_->analyze(catalog, query, allow_tlist_ref);
692  return LikeExpr::get(arg_expr, like_expr, escape_expr, is_ilike_, is_not_);
693 }
694 
695 std::shared_ptr<Analyzer::Expr> LikeExpr::get(std::shared_ptr<Analyzer::Expr> arg_expr,
696  std::shared_ptr<Analyzer::Expr> like_expr,
697  std::shared_ptr<Analyzer::Expr> escape_expr,
698  const bool is_ilike,
699  const bool is_not) {
700  if (!arg_expr->get_type_info().is_string()) {
701  throw std::runtime_error("expression before LIKE must be of a string type.");
702  }
703  if (!like_expr->get_type_info().is_string()) {
704  throw std::runtime_error("expression after LIKE must be of a string type.");
705  }
706  char escape_char = '\\';
707  if (escape_expr != nullptr) {
708  if (!escape_expr->get_type_info().is_string()) {
709  throw std::runtime_error("expression after ESCAPE must be of a string type.");
710  }
711  if (!escape_expr->get_type_info().is_string()) {
712  throw std::runtime_error("expression after ESCAPE must be of a string type.");
713  }
714  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(escape_expr);
715  if (c != nullptr && c->get_constval().stringval->length() > 1) {
716  throw std::runtime_error("String after ESCAPE must have a single character.");
717  }
718  escape_char = (*c->get_constval().stringval)[0];
719  }
720  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(like_expr);
721  bool is_simple = false;
722  if (c != nullptr) {
723  std::string& pattern = *c->get_constval().stringval;
724  if (is_ilike) {
725  std::transform(pattern.begin(), pattern.end(), pattern.begin(), ::tolower);
726  }
727  check_like_expr(pattern, escape_char);
728  is_simple = test_is_simple_expr(pattern, escape_char);
729  if (is_simple) {
730  erase_cntl_chars(pattern, escape_char);
731  }
732  }
733  std::shared_ptr<Analyzer::Expr> result = makeExpr<Analyzer::LikeExpr>(
734  arg_expr->decompress(), like_expr, escape_expr, is_ilike, is_simple);
735  if (is_not) {
736  result = makeExpr<Analyzer::UOper>(kBOOLEAN, kNOT, result);
737  }
738  return result;
739 }
740 
741 void RegexpExpr::check_pattern_expr(const std::string& pattern_str, char escape_char) {
742  if (pattern_str.back() == escape_char) {
743  throw std::runtime_error("REGEXP pattern must not end with escape character.");
744  }
745 }
746 
747 bool RegexpExpr::translate_to_like_pattern(std::string& pattern_str, char escape_char) {
748  char prev_char = '\0';
749  char prev_prev_char = '\0';
750  std::string like_str;
751  for (char& cur_char : pattern_str) {
752  if (prev_char == escape_char || isalnum(cur_char) || cur_char == ' ' ||
753  cur_char == '.') {
754  like_str.push_back((cur_char == '.') ? '_' : cur_char);
755  prev_prev_char = prev_char;
756  prev_char = cur_char;
757  continue;
758  }
759  if (prev_char == '.' && prev_prev_char != escape_char) {
760  if (cur_char == '*' || cur_char == '+') {
761  if (cur_char == '*') {
762  like_str.pop_back();
763  }
764  // .* --> %
765  // .+ --> _%
766  like_str.push_back('%');
767  prev_prev_char = prev_char;
768  prev_char = cur_char;
769  continue;
770  }
771  }
772  return false;
773  }
774  pattern_str = like_str;
775  return true;
776 }
777 
778 std::shared_ptr<Analyzer::Expr> RegexpExpr::analyze(
779  const Catalog_Namespace::Catalog& catalog,
780  Analyzer::Query& query,
781  TlistRefType allow_tlist_ref) const {
782  auto arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
783  auto pattern_expr = pattern_string_->analyze(catalog, query, allow_tlist_ref);
784  auto escape_expr = escape_string_ == nullptr
785  ? nullptr
786  : escape_string_->analyze(catalog, query, allow_tlist_ref);
787  return RegexpExpr::get(arg_expr, pattern_expr, escape_expr, is_not_);
788 }
789 
790 std::shared_ptr<Analyzer::Expr> RegexpExpr::get(
791  std::shared_ptr<Analyzer::Expr> arg_expr,
792  std::shared_ptr<Analyzer::Expr> pattern_expr,
793  std::shared_ptr<Analyzer::Expr> escape_expr,
794  const bool is_not) {
795  if (!arg_expr->get_type_info().is_string()) {
796  throw std::runtime_error("expression before REGEXP must be of a string type.");
797  }
798  if (!pattern_expr->get_type_info().is_string()) {
799  throw std::runtime_error("expression after REGEXP must be of a string type.");
800  }
801  char escape_char = '\\';
802  if (escape_expr != nullptr) {
803  if (!escape_expr->get_type_info().is_string()) {
804  throw std::runtime_error("expression after ESCAPE must be of a string type.");
805  }
806  if (!escape_expr->get_type_info().is_string()) {
807  throw std::runtime_error("expression after ESCAPE must be of a string type.");
808  }
809  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(escape_expr);
810  if (c != nullptr && c->get_constval().stringval->length() > 1) {
811  throw std::runtime_error("String after ESCAPE must have a single character.");
812  }
813  escape_char = (*c->get_constval().stringval)[0];
814  if (escape_char != '\\') {
815  throw std::runtime_error("Only supporting '\\' escape character.");
816  }
817  }
818  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(pattern_expr);
819  if (c != nullptr) {
820  std::string& pattern = *c->get_constval().stringval;
821  if (translate_to_like_pattern(pattern, escape_char)) {
822  return LikeExpr::get(arg_expr, pattern_expr, escape_expr, false, is_not);
823  }
824  }
825  std::shared_ptr<Analyzer::Expr> result =
826  makeExpr<Analyzer::RegexpExpr>(arg_expr->decompress(), pattern_expr, escape_expr);
827  if (is_not) {
828  result = makeExpr<Analyzer::UOper>(kBOOLEAN, kNOT, result);
829  }
830  return result;
831 }
832 
833 std::shared_ptr<Analyzer::Expr> LikelihoodExpr::analyze(
834  const Catalog_Namespace::Catalog& catalog,
835  Analyzer::Query& query,
836  TlistRefType allow_tlist_ref) const {
837  auto arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
838  return LikelihoodExpr::get(arg_expr, likelihood_, is_not_);
839 }
840 
841 std::shared_ptr<Analyzer::Expr> LikelihoodExpr::get(
842  std::shared_ptr<Analyzer::Expr> arg_expr,
843  float likelihood,
844  const bool is_not) {
845  if (!arg_expr->get_type_info().is_boolean()) {
846  throw std::runtime_error("likelihood expression expects boolean type.");
847  }
848  std::shared_ptr<Analyzer::Expr> result = makeExpr<Analyzer::LikelihoodExpr>(
849  arg_expr->decompress(), is_not ? 1 - likelihood : likelihood);
850  return result;
851 }
852 
853 std::shared_ptr<Analyzer::Expr> WidthBucketExpr::analyze(
854  const Catalog_Namespace::Catalog& catalog,
855  Analyzer::Query& query,
856  TlistRefType allow_tlist_ref) const {
857  auto target_value = target_value_->analyze(catalog, query, allow_tlist_ref);
858  auto lower_bound = lower_bound_->analyze(catalog, query, allow_tlist_ref);
859  auto upper_bound = upper_bound_->analyze(catalog, query, allow_tlist_ref);
860  auto partition_count = partition_count_->analyze(catalog, query, allow_tlist_ref);
861  return WidthBucketExpr::get(target_value, lower_bound, upper_bound, partition_count);
862 }
863 
864 std::shared_ptr<Analyzer::Expr> WidthBucketExpr::get(
865  std::shared_ptr<Analyzer::Expr> target_value,
866  std::shared_ptr<Analyzer::Expr> lower_bound,
867  std::shared_ptr<Analyzer::Expr> upper_bound,
868  std::shared_ptr<Analyzer::Expr> partition_count) {
869  std::shared_ptr<Analyzer::Expr> result = makeExpr<Analyzer::WidthBucketExpr>(
870  target_value, lower_bound, upper_bound, partition_count);
871  return result;
872 }
873 
874 std::shared_ptr<Analyzer::Expr> ExistsExpr::analyze(
875  const Catalog_Namespace::Catalog& catalog,
876  Analyzer::Query& query,
877  TlistRefType allow_tlist_ref) const {
878  throw std::runtime_error("Subqueries are not supported yet.");
879  return nullptr;
880 }
881 
882 std::shared_ptr<Analyzer::Expr> ColumnRef::analyze(
883  const Catalog_Namespace::Catalog& catalog,
884  Analyzer::Query& query,
885  TlistRefType allow_tlist_ref) const {
886  int table_id{0};
887  int rte_idx{0};
888  const ColumnDescriptor* cd{nullptr};
889  if (column_ == nullptr) {
890  throw std::runtime_error("invalid column name *.");
891  }
892  if (table_ != nullptr) {
893  rte_idx = query.get_rte_idx(*table_);
894  if (rte_idx < 0) {
895  throw std::runtime_error("range variable or table name " + *table_ +
896  " does not exist.");
897  }
898  Analyzer::RangeTableEntry* rte = query.get_rte(rte_idx);
899  cd = rte->get_column_desc(catalog, *column_);
900  if (cd == nullptr) {
901  throw std::runtime_error("Column name " + *column_ + " does not exist.");
902  }
903  table_id = rte->get_table_id();
904  } else {
905  bool found = false;
906  int i = 0;
907  for (auto rte : query.get_rangetable()) {
908  cd = rte->get_column_desc(catalog, *column_);
909  if (cd != nullptr && !found) {
910  found = true;
911  rte_idx = i;
912  table_id = rte->get_table_id();
913  } else if (cd != nullptr && found) {
914  throw std::runtime_error("Column name " + *column_ + " is ambiguous.");
915  }
916  i++;
917  }
918  if (cd == nullptr && allow_tlist_ref != TlistRefType::TLIST_NONE) {
919  // check if this is a reference to a targetlist entry
920  bool found = false;
921  int varno = -1;
922  int i = 1;
923  std::shared_ptr<Analyzer::TargetEntry> tle;
924  for (auto p : query.get_targetlist()) {
925  if (*column_ == p->get_resname() && !found) {
926  found = true;
927  varno = i;
928  tle = p;
929  } else if (*column_ == p->get_resname() && found) {
930  throw std::runtime_error("Output alias " + *column_ + " is ambiguous.");
931  }
932  i++;
933  }
934  if (found) {
935  if (dynamic_cast<Analyzer::Var*>(tle->get_expr())) {
936  Analyzer::Var* v = static_cast<Analyzer::Var*>(tle->get_expr());
938  return v->deep_copy();
939  }
940  }
941  if (allow_tlist_ref == TlistRefType::TLIST_COPY) {
942  return tle->get_expr()->deep_copy();
943  } else {
944  return makeExpr<Analyzer::Var>(
945  tle->get_expr()->get_type_info(), Analyzer::Var::kOUTPUT, varno);
946  }
947  }
948  }
949  if (cd == nullptr) {
950  throw std::runtime_error("Column name " + *column_ + " does not exist.");
951  }
952  }
953  return makeExpr<Analyzer::ColumnVar>(
954  cd->columnType,
955  shared::ColumnKey{catalog.getDatabaseId(), table_id, cd->columnId},
956  rte_idx);
957 }
958 
959 std::shared_ptr<Analyzer::Expr> FunctionRef::analyze(
960  const Catalog_Namespace::Catalog& catalog,
961  Analyzer::Query& query,
962  TlistRefType allow_tlist_ref) const {
963  SQLTypeInfo result_type;
964  SQLAgg agg_type;
965  std::shared_ptr<Analyzer::Expr> arg_expr;
966  bool is_distinct = false;
967  if (boost::iequals(*name_, "count")) {
968  result_type = SQLTypeInfo(kBIGINT, false);
969  agg_type = kCOUNT;
970  if (arg_) {
971  arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
972  const SQLTypeInfo& ti = arg_expr->get_type_info();
973  if (ti.is_string() && (ti.get_compression() != kENCODING_DICT || !distinct_)) {
974  throw std::runtime_error(
975  "Strings must be dictionary-encoded in COUNT(DISTINCT).");
976  }
977  if (ti.get_type() == kARRAY && !distinct_) {
978  throw std::runtime_error("Only COUNT(DISTINCT) is supported on arrays.");
979  }
980  }
981  is_distinct = distinct_;
982  } else {
983  if (!arg_) {
984  throw std::runtime_error("Cannot compute " + *name_ + " with argument '*'.");
985  }
986  if (boost::iequals(*name_, "min")) {
987  agg_type = kMIN;
988  arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
989  arg_expr = arg_expr->decompress();
990  result_type = arg_expr->get_type_info();
991  } else if (boost::iequals(*name_, "max")) {
992  agg_type = kMAX;
993  arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
994  arg_expr = arg_expr->decompress();
995  result_type = arg_expr->get_type_info();
996  } else if (boost::iequals(*name_, "avg")) {
997  agg_type = kAVG;
998  arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
999  if (!arg_expr->get_type_info().is_number()) {
1000  throw std::runtime_error("Cannot compute AVG on non-number-type arguments.");
1001  }
1002  arg_expr = arg_expr->decompress();
1003  result_type = SQLTypeInfo(kDOUBLE, false);
1004  } else if (boost::iequals(*name_, "sum")) {
1005  agg_type = kSUM;
1006  arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
1007  if (!arg_expr->get_type_info().is_number()) {
1008  throw std::runtime_error("Cannot compute SUM on non-number-type arguments.");
1009  }
1010  arg_expr = arg_expr->decompress();
1011  result_type = arg_expr->get_type_info().is_integer() ? SQLTypeInfo(kBIGINT, false)
1012  : arg_expr->get_type_info();
1013  } else if (boost::iequals(*name_, "unnest")) {
1014  arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
1015  const SQLTypeInfo& arg_ti = arg_expr->get_type_info();
1016  if (arg_ti.get_type() != kARRAY) {
1017  throw std::runtime_error(arg_->to_string() + " is not of array type.");
1018  }
1019  return makeExpr<Analyzer::UOper>(arg_ti.get_elem_type(), false, kUNNEST, arg_expr);
1020  } else {
1021  throw std::runtime_error("invalid function name: " + *name_);
1022  }
1023  if (arg_expr->get_type_info().is_string() ||
1024  arg_expr->get_type_info().get_type() == kARRAY) {
1025  throw std::runtime_error(
1026  "Only COUNT(DISTINCT ) aggregate is supported on strings and arrays.");
1027  }
1028  }
1029  int naggs = query.get_num_aggs();
1030  query.set_num_aggs(naggs + 1);
1031  return makeExpr<Analyzer::AggExpr>(
1032  result_type, agg_type, arg_expr, is_distinct, nullptr);
1033 }
1034 
1035 std::shared_ptr<Analyzer::Expr> CastExpr::analyze(
1036  const Catalog_Namespace::Catalog& catalog,
1037  Analyzer::Query& query,
1038  TlistRefType allow_tlist_ref) const {
1039  target_type_->check_type();
1040  auto arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
1041  SQLTypeInfo ti(target_type_->get_type(),
1042  target_type_->get_param1(),
1043  target_type_->get_param2(),
1044  arg_expr->get_type_info().get_notnull());
1045  if (arg_expr->get_type_info().get_type() != target_type_->get_type() &&
1046  arg_expr->get_type_info().get_compression() != kENCODING_NONE) {
1047  arg_expr->decompress();
1048  }
1049  return arg_expr->add_cast(ti);
1050 }
1051 
1052 std::shared_ptr<Analyzer::Expr> CaseExpr::analyze(
1053  const Catalog_Namespace::Catalog& catalog,
1054  Analyzer::Query& query,
1055  TlistRefType allow_tlist_ref) const {
1056  SQLTypeInfo ti;
1057  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
1058  expr_pair_list;
1059  for (auto& p : when_then_list_) {
1060  auto e1 = p->get_expr1()->analyze(catalog, query, allow_tlist_ref);
1061  if (e1->get_type_info().get_type() != kBOOLEAN) {
1062  throw std::runtime_error("Only boolean expressions can be used after WHEN.");
1063  }
1064  auto e2 = p->get_expr2()->analyze(catalog, query, allow_tlist_ref);
1065  expr_pair_list.emplace_back(e1, e2);
1066  }
1067  auto else_e =
1068  else_expr_ ? else_expr_->analyze(catalog, query, allow_tlist_ref) : nullptr;
1069  return normalize(expr_pair_list, else_e);
1070 }
1071 
1072 namespace {
1073 
1075  const std::string* s = str_literal->get_stringval();
1076  if (*s == "t" || *s == "true" || *s == "T" || *s == "True") {
1077  return true;
1078  } else if (*s == "f" || *s == "false" || *s == "F" || *s == "False") {
1079  return false;
1080  } else {
1081  throw std::runtime_error("Invalid string for boolean " + *s);
1082  }
1083 }
1084 
1085 void parse_copy_params(const std::list<std::unique_ptr<NameValueAssign>>& options_,
1086  import_export::CopyParams& copy_params,
1087  std::vector<std::string>& warnings,
1088  std::string& deferred_copy_from_partitions_) {
1089  if (!options_.empty()) {
1090  for (auto& p : options_) {
1091  if (boost::iequals(*p->get_name(), "max_reject")) {
1092  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
1093  if (int_literal == nullptr) {
1094  throw std::runtime_error("max_reject option must be an integer.");
1095  }
1096  copy_params.max_reject = int_literal->get_intval();
1097  } else if (boost::iequals(*p->get_name(), "max_import_batch_row_count")) {
1098  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
1099  if (int_literal == nullptr) {
1100  throw std::runtime_error(
1101  "max_import_batch_row_count option must be an integer.");
1102  }
1103  if (int_literal->get_intval() <= 0) {
1104  throw std::runtime_error(
1105  "max_import_batch_row_count option must be a positive integer (greater "
1106  "than 0).");
1107  }
1108  copy_params.max_import_batch_row_count = int_literal->get_intval();
1109  } else if (boost::iequals(*p->get_name(), "buffer_size")) {
1110  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
1111  if (int_literal == nullptr) {
1112  throw std::runtime_error("buffer_size option must be an integer.");
1113  }
1114  copy_params.buffer_size = int_literal->get_intval();
1115  } else if (boost::iequals(*p->get_name(), "threads")) {
1116  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
1117  if (int_literal == nullptr) {
1118  throw std::runtime_error("Threads option must be an integer.");
1119  }
1120  copy_params.threads = int_literal->get_intval();
1121  } else if (boost::iequals(*p->get_name(), "delimiter")) {
1122  const StringLiteral* str_literal =
1123  dynamic_cast<const StringLiteral*>(p->get_value());
1124  if (str_literal == nullptr) {
1125  throw std::runtime_error("Delimiter option must be a string.");
1126  } else if (str_literal->get_stringval()->length() != 1) {
1127  throw std::runtime_error("Delimiter must be a single character string.");
1128  }
1129  copy_params.delimiter = (*str_literal->get_stringval())[0];
1130  } else if (boost::iequals(*p->get_name(), "nulls")) {
1131  const StringLiteral* str_literal =
1132  dynamic_cast<const StringLiteral*>(p->get_value());
1133  if (str_literal == nullptr) {
1134  throw std::runtime_error("Nulls option must be a string.");
1135  }
1136  copy_params.null_str = *str_literal->get_stringval();
1137  } else if (boost::iequals(*p->get_name(), "header")) {
1138  const StringLiteral* str_literal =
1139  dynamic_cast<const StringLiteral*>(p->get_value());
1140  if (str_literal == nullptr) {
1141  throw std::runtime_error("Header option must be a boolean.");
1142  }
1143  copy_params.has_header = bool_from_string_literal(str_literal)
1146 #ifdef ENABLE_IMPORT_PARQUET
1147  } else if (boost::iequals(*p->get_name(), "parquet")) {
1148  warnings.push_back(
1149  "Deprecation Warning: COPY FROM WITH (parquet='true') is deprecated. Use "
1150  "WITH (source_type='parquet_file') instead.");
1151  const StringLiteral* str_literal =
1152  dynamic_cast<const StringLiteral*>(p->get_value());
1153  if (str_literal == nullptr) {
1154  throw std::runtime_error("'parquet' option must be a boolean.");
1155  }
1156  if (bool_from_string_literal(str_literal)) {
1157  // not sure a parquet "table" type is proper, but to make code
1158  // look consistent in some places, let's set "table" type too
1160  }
1161 #endif // ENABLE_IMPORT_PARQUET
1162  } else if (boost::iequals(*p->get_name(), "s3_access_key")) {
1163  const StringLiteral* str_literal =
1164  dynamic_cast<const StringLiteral*>(p->get_value());
1165  if (str_literal == nullptr) {
1166  throw std::runtime_error("Option s3_access_key must be a string.");
1167  }
1168  copy_params.s3_access_key = *str_literal->get_stringval();
1169  } else if (boost::iequals(*p->get_name(), "s3_secret_key")) {
1170  const StringLiteral* str_literal =
1171  dynamic_cast<const StringLiteral*>(p->get_value());
1172  if (str_literal == nullptr) {
1173  throw std::runtime_error("Option s3_secret_key must be a string.");
1174  }
1175  copy_params.s3_secret_key = *str_literal->get_stringval();
1176  } else if (boost::iequals(*p->get_name(), "s3_session_token")) {
1177  const StringLiteral* str_literal =
1178  dynamic_cast<const StringLiteral*>(p->get_value());
1179  if (str_literal == nullptr) {
1180  throw std::runtime_error("Option s3_session_token must be a string.");
1181  }
1182  copy_params.s3_session_token = *str_literal->get_stringval();
1183  } else if (boost::iequals(*p->get_name(), "s3_region")) {
1184  const StringLiteral* str_literal =
1185  dynamic_cast<const StringLiteral*>(p->get_value());
1186  if (str_literal == nullptr) {
1187  throw std::runtime_error("Option s3_region must be a string.");
1188  }
1189  copy_params.s3_region = *str_literal->get_stringval();
1190  } else if (boost::iequals(*p->get_name(), "s3_endpoint")) {
1191  const StringLiteral* str_literal =
1192  dynamic_cast<const StringLiteral*>(p->get_value());
1193  if (str_literal == nullptr) {
1194  throw std::runtime_error("Option s3_endpoint must be a string.");
1195  }
1196  copy_params.s3_endpoint = *str_literal->get_stringval();
1197  } else if (boost::iequals(*p->get_name(), "s3_max_concurrent_downloads")) {
1198  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
1199  if (int_literal == nullptr) {
1200  throw std::runtime_error(
1201  "'s3_max_concurrent_downloads' option must be an integer");
1202  }
1203  const int s3_max_concurrent_downloads = int_literal->get_intval();
1204  if (s3_max_concurrent_downloads > 0) {
1205  copy_params.s3_max_concurrent_downloads = s3_max_concurrent_downloads;
1206  } else {
1207  throw std::runtime_error(
1208  "Invalid value for 's3_max_concurrent_downloads' option (must be > 0): " +
1209  std::to_string(s3_max_concurrent_downloads));
1210  }
1211  } else if (boost::iequals(*p->get_name(), "quote")) {
1212  const StringLiteral* str_literal =
1213  dynamic_cast<const StringLiteral*>(p->get_value());
1214  if (str_literal == nullptr) {
1215  throw std::runtime_error("Quote option must be a string.");
1216  } else if (str_literal->get_stringval()->length() != 1) {
1217  throw std::runtime_error("Quote must be a single character string.");
1218  }
1219  copy_params.quote = (*str_literal->get_stringval())[0];
1220  } else if (boost::iequals(*p->get_name(), "escape")) {
1221  const StringLiteral* str_literal =
1222  dynamic_cast<const StringLiteral*>(p->get_value());
1223  if (str_literal == nullptr) {
1224  throw std::runtime_error("Escape option must be a string.");
1225  } else if (str_literal->get_stringval()->length() != 1) {
1226  throw std::runtime_error("Escape must be a single character string.");
1227  }
1228  copy_params.escape = (*str_literal->get_stringval())[0];
1229  } else if (boost::iequals(*p->get_name(), "line_delimiter")) {
1230  const StringLiteral* str_literal =
1231  dynamic_cast<const StringLiteral*>(p->get_value());
1232  if (str_literal == nullptr) {
1233  throw std::runtime_error("Line_delimiter option must be a string.");
1234  } else if (str_literal->get_stringval()->length() != 1) {
1235  throw std::runtime_error("Line_delimiter must be a single character string.");
1236  }
1237  copy_params.line_delim = (*str_literal->get_stringval())[0];
1238  } else if (boost::iequals(*p->get_name(), "quoted")) {
1239  const StringLiteral* str_literal =
1240  dynamic_cast<const StringLiteral*>(p->get_value());
1241  if (str_literal == nullptr) {
1242  throw std::runtime_error("Quoted option must be a boolean.");
1243  }
1244  copy_params.quoted = bool_from_string_literal(str_literal);
1245  } else if (boost::iequals(*p->get_name(), "plain_text")) {
1246  const StringLiteral* str_literal =
1247  dynamic_cast<const StringLiteral*>(p->get_value());
1248  if (str_literal == nullptr) {
1249  throw std::runtime_error("plain_text option must be a boolean.");
1250  }
1251  copy_params.plain_text = bool_from_string_literal(str_literal);
1252  } else if (boost::iequals(*p->get_name(), "trim_spaces")) {
1253  const StringLiteral* str_literal =
1254  dynamic_cast<const StringLiteral*>(p->get_value());
1255  if (str_literal == nullptr) {
1256  throw std::runtime_error("trim_spaces option must be a boolean.");
1257  }
1258  copy_params.trim_spaces = bool_from_string_literal(str_literal);
1259  } else if (boost::iequals(*p->get_name(), "array_marker")) {
1260  const StringLiteral* str_literal =
1261  dynamic_cast<const StringLiteral*>(p->get_value());
1262  if (str_literal == nullptr) {
1263  throw std::runtime_error("Array Marker option must be a string.");
1264  } else if (str_literal->get_stringval()->length() != 2) {
1265  throw std::runtime_error(
1266  "Array Marker option must be exactly two characters. Default is {}.");
1267  }
1268  copy_params.array_begin = (*str_literal->get_stringval())[0];
1269  copy_params.array_end = (*str_literal->get_stringval())[1];
1270  } else if (boost::iequals(*p->get_name(), "array_delimiter")) {
1271  const StringLiteral* str_literal =
1272  dynamic_cast<const StringLiteral*>(p->get_value());
1273  if (str_literal == nullptr) {
1274  throw std::runtime_error("Array Delimiter option must be a string.");
1275  } else if (str_literal->get_stringval()->length() != 1) {
1276  throw std::runtime_error("Array Delimiter must be a single character string.");
1277  }
1278  copy_params.array_delim = (*str_literal->get_stringval())[0];
1279  } else if (boost::iequals(*p->get_name(), "lonlat")) {
1280  const StringLiteral* str_literal =
1281  dynamic_cast<const StringLiteral*>(p->get_value());
1282  if (str_literal == nullptr) {
1283  throw std::runtime_error("Lonlat option must be a boolean.");
1284  }
1285  copy_params.lonlat = bool_from_string_literal(str_literal);
1286  } else if (boost::iequals(*p->get_name(), "geo")) {
1287  warnings.push_back(
1288  "Deprecation Warning: COPY FROM WITH (geo='true') is deprecated. Use WITH "
1289  "(source_type='geo_file') instead.");
1290  const StringLiteral* str_literal =
1291  dynamic_cast<const StringLiteral*>(p->get_value());
1292  if (str_literal == nullptr) {
1293  throw std::runtime_error("'geo' option must be a boolean.");
1294  }
1295  if (bool_from_string_literal(str_literal)) {
1297  }
1298  } else if (boost::iequals(*p->get_name(), "source_type")) {
1299  const StringLiteral* str_literal =
1300  dynamic_cast<const StringLiteral*>(p->get_value());
1301  if (str_literal == nullptr) {
1302  throw std::runtime_error("'source_type' option must be a string.");
1303  }
1304  const std::string* s = str_literal->get_stringval();
1305  if (boost::iequals(*s, "delimited_file")) {
1307  } else if (boost::iequals(*s, "geo_file")) {
1309 #if ENABLE_IMPORT_PARQUET
1310  } else if (boost::iequals(*s, "parquet_file")) {
1312 #endif
1313  } else if (boost::iequals(*s, "raster_file")) {
1315  } else if (boost::iequals(*s, "regex_parsed_file")) {
1317  } else {
1318  throw std::runtime_error(
1319  "Invalid string for 'source_type' option (must be 'GEO_FILE', 'RASTER_FILE'"
1320 #if ENABLE_IMPORT_PARQUET
1321  ", 'PARQUET_FILE'"
1322 #endif
1323  ", 'REGEX_PARSED_FILE'"
1324  " or 'DELIMITED_FILE'): " +
1325  *s);
1326  }
1327  } else if (boost::iequals(*p->get_name(), "geo_coords_type")) {
1328  const StringLiteral* str_literal =
1329  dynamic_cast<const StringLiteral*>(p->get_value());
1330  if (str_literal == nullptr) {
1331  throw std::runtime_error("'geo_coords_type' option must be a string");
1332  }
1333  const std::string* s = str_literal->get_stringval();
1334  if (boost::iequals(*s, "geography")) {
1335  throw std::runtime_error(
1336  "GEOGRAPHY coords type not yet supported. Please use GEOMETRY.");
1337  // copy_params.geo_coords_type = kGEOGRAPHY;
1338  } else if (boost::iequals(*s, "geometry")) {
1339  copy_params.geo_coords_type = kGEOMETRY;
1340  } else {
1341  throw std::runtime_error(
1342  "Invalid string for 'geo_coords_type' option (must be 'GEOGRAPHY' or "
1343  "'GEOMETRY'): " +
1344  *s);
1345  }
1346  } else if (boost::iequals(*p->get_name(), "raster_point_type")) {
1347  const StringLiteral* str_literal =
1348  dynamic_cast<const StringLiteral*>(p->get_value());
1349  if (str_literal == nullptr) {
1350  throw std::runtime_error("'raster_point_type' option must be a string");
1351  }
1352  const std::string* s = str_literal->get_stringval();
1353  if (boost::iequals(*s, "none")) {
1355  } else if (boost::iequals(*s, "auto")) {
1357  } else if (boost::iequals(*s, "smallint")) {
1359  } else if (boost::iequals(*s, "int")) {
1361  } else if (boost::iequals(*s, "float")) {
1363  } else if (boost::iequals(*s, "double")) {
1365  } else if (boost::iequals(*s, "point")) {
1367  } else {
1368  throw std::runtime_error(
1369  "Invalid string for 'raster_point_type' option (must be 'NONE', 'AUTO', "
1370  "'SMALLINT', 'INT', 'FLOAT', 'DOUBLE' or 'POINT'): " +
1371  *s);
1372  }
1373  } else if (boost::iequals(*p->get_name(), "raster_point_transform")) {
1374  const StringLiteral* str_literal =
1375  dynamic_cast<const StringLiteral*>(p->get_value());
1376  if (str_literal == nullptr) {
1377  throw std::runtime_error("'raster_point_transform' option must be a string");
1378  }
1379  const std::string* s = str_literal->get_stringval();
1380  if (boost::iequals(*s, "none")) {
1382  } else if (boost::iequals(*s, "auto")) {
1384  } else if (boost::iequals(*s, "file")) {
1386  } else if (boost::iequals(*s, "world")) {
1387  copy_params.raster_point_transform =
1389  } else {
1390  throw std::runtime_error(
1391  "Invalid string for 'raster_point_transform' option (must be 'NONE', "
1392  "'AUTO', 'FILE' or 'WORLD'): " +
1393  *s);
1394  }
1395  } else if (boost::iequals(*p->get_name(), "raster_import_bands")) {
1396  const StringLiteral* str_literal =
1397  dynamic_cast<const StringLiteral*>(p->get_value());
1398  if (str_literal == nullptr) {
1399  throw std::runtime_error("'raster_import_bands' option must be a string");
1400  }
1401  const std::string* raster_import_bands = str_literal->get_stringval();
1402  if (raster_import_bands) {
1403  copy_params.raster_import_bands = *raster_import_bands;
1404  } else {
1405  throw std::runtime_error("Invalid value for 'raster_import_bands' option");
1406  }
1407  } else if (boost::iequals(*p->get_name(), "raster_import_dimensions")) {
1408  const StringLiteral* str_literal =
1409  dynamic_cast<const StringLiteral*>(p->get_value());
1410  if (str_literal == nullptr) {
1411  throw std::runtime_error("'raster_import_dimensions' option must be a string");
1412  }
1413  const std::string* raster_import_dimensions = str_literal->get_stringval();
1414  if (raster_import_dimensions) {
1415  copy_params.raster_import_dimensions = *raster_import_dimensions;
1416  } else {
1417  throw std::runtime_error("Invalid value for 'raster_import_dimensions' option");
1418  }
1419  } else if (boost::iequals(*p->get_name(), "geo_coords_encoding")) {
1420  const StringLiteral* str_literal =
1421  dynamic_cast<const StringLiteral*>(p->get_value());
1422  if (str_literal == nullptr) {
1423  throw std::runtime_error("'geo_coords_encoding' option must be a string");
1424  }
1425  const std::string* s = str_literal->get_stringval();
1426  if (boost::iequals(*s, "none")) {
1427  copy_params.geo_coords_encoding = kENCODING_NONE;
1428  copy_params.geo_coords_comp_param = 0;
1429  } else if (boost::iequals(*s, "compressed(32)")) {
1430  copy_params.geo_coords_encoding = kENCODING_GEOINT;
1431  copy_params.geo_coords_comp_param = 32;
1432  } else {
1433  throw std::runtime_error(
1434  "Invalid string for 'geo_coords_encoding' option (must be 'NONE' or "
1435  "'COMPRESSED(32)'): " +
1436  *s);
1437  }
1438  } else if (boost::iequals(*p->get_name(), "raster_scanlines_per_thread")) {
1439  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
1440  if (int_literal == nullptr) {
1441  throw std::runtime_error(
1442  "'raster_scanlines_per_thread' option must be an integer");
1443  }
1444  const int raster_scanlines_per_thread = int_literal->get_intval();
1445  if (raster_scanlines_per_thread < 0) {
1446  throw std::runtime_error(
1447  "'raster_scanlines_per_thread' option must be >= 0, with 0 denoting auto "
1448  "sizing");
1449  }
1450  copy_params.raster_scanlines_per_thread = raster_scanlines_per_thread;
1451  } else if (boost::iequals(*p->get_name(), "geo_coords_srid")) {
1452  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
1453  if (int_literal == nullptr) {
1454  throw std::runtime_error("'geo_coords_srid' option must be an integer");
1455  }
1456  const int srid = int_literal->get_intval();
1457  if (srid == 4326 || srid == 3857 || srid == 900913) {
1458  copy_params.geo_coords_srid = srid;
1459  } else {
1460  throw std::runtime_error(
1461  "Invalid value for 'geo_coords_srid' option (must be 4326, 3857, or "
1462  "900913): " +
1463  std::to_string(srid));
1464  }
1465  } else if (boost::iequals(*p->get_name(), "geo_layer_name")) {
1466  const StringLiteral* str_literal =
1467  dynamic_cast<const StringLiteral*>(p->get_value());
1468  if (str_literal == nullptr) {
1469  throw std::runtime_error("'geo_layer_name' option must be a string");
1470  }
1471  const std::string* layer_name = str_literal->get_stringval();
1472  if (layer_name) {
1473  copy_params.geo_layer_name = *layer_name;
1474  } else {
1475  throw std::runtime_error("Invalid value for 'geo_layer_name' option");
1476  }
1477  } else if (boost::iequals(*p->get_name(), "partitions")) {
1478  const auto partitions =
1479  static_cast<const StringLiteral*>(p->get_value())->get_stringval();
1480  CHECK(partitions);
1481  const auto partitions_uc = boost::to_upper_copy<std::string>(*partitions);
1482  if (partitions_uc != "REPLICATED") {
1483  throw std::runtime_error(
1484  "Invalid value for 'partitions' option. Must be 'REPLICATED'.");
1485  }
1486  deferred_copy_from_partitions_ = partitions_uc;
1487  } else if (boost::iequals(*p->get_name(), "geo_assign_render_groups")) {
1488  const StringLiteral* str_literal =
1489  dynamic_cast<const StringLiteral*>(p->get_value());
1490  if (str_literal == nullptr) {
1491  throw std::runtime_error("geo_assign_render_groups option must be a boolean.");
1492  }
1493  copy_params.geo_assign_render_groups = bool_from_string_literal(str_literal);
1494  } else if (boost::iequals(*p->get_name(), "geo_explode_collections")) {
1495  const StringLiteral* str_literal =
1496  dynamic_cast<const StringLiteral*>(p->get_value());
1497  if (str_literal == nullptr) {
1498  throw std::runtime_error("geo_explode_collections option must be a boolean.");
1499  }
1500  copy_params.geo_explode_collections = bool_from_string_literal(str_literal);
1501  } else if (boost::iequals(*p->get_name(), "source_srid")) {
1502  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
1503  if (int_literal == nullptr) {
1504  throw std::runtime_error("'source_srid' option must be an integer");
1505  }
1506  const int srid = int_literal->get_intval();
1508  copy_params.source_srid = srid;
1509  } else {
1510  throw std::runtime_error(
1511  "'source_srid' option can only be used on csv/tsv files");
1512  }
1513  } else if (boost::iequals(*p->get_name(), "regex_path_filter")) {
1514  const StringLiteral* str_literal =
1515  dynamic_cast<const StringLiteral*>(p->get_value());
1516  if (str_literal == nullptr) {
1517  throw std::runtime_error("Option regex_path_filter must be a string.");
1518  }
1519  const auto string_val = *str_literal->get_stringval();
1520  copy_params.regex_path_filter =
1521  string_val.empty() ? std::nullopt : std::optional<std::string>{string_val};
1522  } else if (boost::iequals(*p->get_name(), "file_sort_order_by")) {
1523  const StringLiteral* str_literal =
1524  dynamic_cast<const StringLiteral*>(p->get_value());
1525  if (str_literal == nullptr) {
1526  throw std::runtime_error("Option file_sort_order_by must be a string.");
1527  }
1528  const auto string_val = *str_literal->get_stringval();
1529  copy_params.file_sort_order_by =
1530  string_val.empty() ? std::nullopt : std::optional<std::string>{string_val};
1531  } else if (boost::iequals(*p->get_name(), "file_sort_regex")) {
1532  const StringLiteral* str_literal =
1533  dynamic_cast<const StringLiteral*>(p->get_value());
1534  if (str_literal == nullptr) {
1535  throw std::runtime_error("Option file_sort_regex must be a string.");
1536  }
1537  const auto string_val = *str_literal->get_stringval();
1538  copy_params.file_sort_regex =
1539  string_val.empty() ? std::nullopt : std::optional<std::string>{string_val};
1540  } else if (boost::iequals(*p->get_name(), "raster_point_compute_angle")) {
1541  const StringLiteral* str_literal =
1542  dynamic_cast<const StringLiteral*>(p->get_value());
1543  if (str_literal == nullptr) {
1544  throw std::runtime_error(
1545  "'raster_point_compute_angle' option must be a boolean.");
1546  }
1547  if (bool_from_string_literal(str_literal)) {
1548  copy_params.raster_point_compute_angle = true;
1549  }
1550  } else if (boost::iequals(*p->get_name(), "sql_order_by")) {
1551  if (auto str_literal = dynamic_cast<const StringLiteral*>(p->get_value())) {
1552  copy_params.sql_order_by = *str_literal->get_stringval();
1553  } else {
1554  throw std::runtime_error("Option sql_order_by must be a string.");
1555  }
1556  } else if (boost::iequals(*p->get_name(), "username")) {
1557  const StringLiteral* str_literal =
1558  dynamic_cast<const StringLiteral*>(p->get_value());
1559  if (str_literal == nullptr) {
1560  throw std::runtime_error("Option username must be a string.");
1561  }
1562  const auto string_val = *str_literal->get_stringval();
1563  copy_params.username = string_val;
1564  } else if (boost::iequals(*p->get_name(), "password")) {
1565  const StringLiteral* str_literal =
1566  dynamic_cast<const StringLiteral*>(p->get_value());
1567  if (str_literal == nullptr) {
1568  throw std::runtime_error("Option password must be a string.");
1569  }
1570  const auto string_val = *str_literal->get_stringval();
1571  copy_params.password = string_val;
1572  } else if (boost::iequals(*p->get_name(), "credential_string")) {
1573  const StringLiteral* str_literal =
1574  dynamic_cast<const StringLiteral*>(p->get_value());
1575  if (str_literal == nullptr) {
1576  throw std::runtime_error("Option credential_string must be a string.");
1577  }
1578  const auto string_val = *str_literal->get_stringval();
1579  copy_params.credential_string = string_val;
1580  } else if (boost::iequals(*p->get_name(), "data_source_name")) {
1581  const StringLiteral* str_literal =
1582  dynamic_cast<const StringLiteral*>(p->get_value());
1583  if (str_literal == nullptr) {
1584  throw std::runtime_error("Option data_source_name must be a string.");
1585  }
1586  const auto string_val = *str_literal->get_stringval();
1587  copy_params.dsn = string_val;
1588  } else if (boost::iequals(*p->get_name(), "connection_string")) {
1589  const StringLiteral* str_literal =
1590  dynamic_cast<const StringLiteral*>(p->get_value());
1591  if (str_literal == nullptr) {
1592  throw std::runtime_error("Option connection_string must be a string.");
1593  }
1594  const auto string_val = *str_literal->get_stringval();
1595  copy_params.connection_string = string_val;
1596  } else if (boost::iequals(*p->get_name(), "line_start_regex")) {
1597  const StringLiteral* str_literal =
1598  dynamic_cast<const StringLiteral*>(p->get_value());
1599  if (str_literal == nullptr) {
1600  throw std::runtime_error("Option line_start_regex must be a string.");
1601  }
1602  const auto string_val = *str_literal->get_stringval();
1603  copy_params.line_start_regex = string_val;
1604  } else if (boost::iequals(*p->get_name(), "line_regex")) {
1605  const StringLiteral* str_literal =
1606  dynamic_cast<const StringLiteral*>(p->get_value());
1607  if (str_literal == nullptr) {
1608  throw std::runtime_error("Option line_regex must be a string.");
1609  }
1610  const auto string_val = *str_literal->get_stringval();
1611  copy_params.line_regex = string_val;
1612  } else if (boost::iequals(*p->get_name(), "add_metadata_columns") &&
1614  const StringLiteral* str_literal =
1615  dynamic_cast<const StringLiteral*>(p->get_value());
1616  if (str_literal == nullptr) {
1617  throw std::runtime_error("'add_metadata_columns' option must be a string.");
1618  }
1619  copy_params.add_metadata_columns = *str_literal->get_stringval();
1620  } else {
1621  throw std::runtime_error("Invalid option for COPY: " + *p->get_name());
1622  }
1623  }
1624  }
1625 }
1626 
1627 bool expr_is_null(const Analyzer::Expr* expr) {
1628  if (expr->get_type_info().get_type() == kNULLT) {
1629  return true;
1630  }
1631  const auto const_expr = dynamic_cast<const Analyzer::Constant*>(expr);
1632  return const_expr && const_expr->get_is_null();
1633 }
1634 
1635 } // namespace
1636 
1637 std::shared_ptr<Analyzer::Expr> CaseExpr::normalize(
1638  const std::list<std::pair<std::shared_ptr<Analyzer::Expr>,
1639  std::shared_ptr<Analyzer::Expr>>>& expr_pair_list,
1640  const std::shared_ptr<Analyzer::Expr> else_e_in,
1641  const Executor* executor) {
1642  SQLTypeInfo ti;
1643  bool has_agg = false;
1644  // We need to keep track of whether there was at
1645  // least one none-encoded string literal expression
1646  // type among any of the case sub-expressions separately
1647  // from rest of type determination logic, as it will
1648  // be casted to the output dictionary type if all output
1649  // types are either dictionary encoded or none-encoded
1650  // literals, or a transient encoded dictionary if all
1651  // types are none-encoded (column or literal)
1652  SQLTypeInfo none_encoded_literal_ti;
1653 
1654  for (auto& p : expr_pair_list) {
1655  auto e1 = p.first;
1656  CHECK(e1->get_type_info().is_boolean());
1657  auto e2 = p.second;
1658  if (e2->get_contains_agg()) {
1659  has_agg = true;
1660  }
1661  const auto& e2_ti = e2->get_type_info();
1662  const auto col_var = std::dynamic_pointer_cast<const Analyzer::ColumnVar>(e2);
1663  if (e2_ti.is_string() && !e2_ti.is_dict_encoded_string() && !col_var) {
1664  CHECK(e2_ti.is_none_encoded_string());
1665  none_encoded_literal_ti =
1666  none_encoded_literal_ti.get_type() == kNULLT
1667  ? e2_ti
1668  : common_string_type(none_encoded_literal_ti, e2_ti, executor);
1669  continue;
1670  }
1671  if (ti.get_type() == kNULLT) {
1672  ti = e2_ti;
1673  } else if (e2_ti.get_type() == kNULLT) {
1674  ti.set_notnull(false);
1675  e2->set_type_info(ti);
1676  } else if (ti != e2_ti) {
1677  if (ti.is_string() && e2_ti.is_string()) {
1678  // Executor is needed to determine which dictionary is the largest
1679  // in case of two dictionary types with different encodings
1680  ti = common_string_type(ti, e2_ti, executor);
1681  } else if (ti.is_number() && e2_ti.is_number()) {
1683  } else if (ti.is_boolean() && e2_ti.is_boolean()) {
1685  } else {
1686  throw std::runtime_error(
1687  "Expressions in THEN clause must be of the same or compatible types.");
1688  }
1689  }
1690  }
1691  auto else_e = else_e_in;
1692  const auto& else_ti = else_e->get_type_info();
1693  if (else_e) {
1694  const auto col_var = std::dynamic_pointer_cast<const Analyzer::ColumnVar>(else_e);
1695  if (else_e->get_contains_agg()) {
1696  has_agg = true;
1697  }
1698  if (else_ti.is_string() && !else_ti.is_dict_encoded_string() && !col_var) {
1699  CHECK(else_ti.is_none_encoded_string());
1700  none_encoded_literal_ti =
1701  none_encoded_literal_ti.get_type() == kNULLT
1702  ? else_ti
1703  : common_string_type(none_encoded_literal_ti, else_ti, executor);
1704  } else {
1705  if (ti.get_type() == kNULLT) {
1706  ti = else_ti;
1707  } else if (expr_is_null(else_e.get())) {
1708  ti.set_notnull(false);
1709  else_e->set_type_info(ti);
1710  } else if (ti != else_ti) {
1711  ti.set_notnull(false);
1712  if (ti.is_string() && else_ti.is_string()) {
1713  // Executor is needed to determine which dictionary is the largest
1714  // in case of two dictionary types with different encodings
1715  ti = common_string_type(ti, else_ti, executor);
1716  } else if (ti.is_number() && else_ti.is_number()) {
1717  ti = Analyzer::BinOper::common_numeric_type(ti, else_ti);
1718  } else if (ti.is_boolean() && else_ti.is_boolean()) {
1719  ti = Analyzer::BinOper::common_numeric_type(ti, else_ti);
1720  } else if (get_logical_type_info(ti) != get_logical_type_info(else_ti)) {
1721  throw std::runtime_error(
1722  // types differing by encoding will be resolved at decode
1723  "Expressions in ELSE clause must be of the same or compatible types as "
1724  "those in the THEN clauses.");
1725  }
1726  }
1727  }
1728  }
1729 
1730  if (ti.get_type() == kNULLT && none_encoded_literal_ti.get_type() != kNULLT) {
1731  // If we haven't set a type so far it's because
1732  // every case sub-expression has a none-encoded
1733  // literal output. Output a transient-encoded dictionary
1734  // so we can use the output downstream
1736  }
1737 
1738  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
1739  cast_expr_pair_list;
1740  for (auto p : expr_pair_list) {
1741  ti.set_notnull(false);
1742  cast_expr_pair_list.emplace_back(p.first, p.second->add_cast(ti));
1743  }
1744  if (else_e != nullptr) {
1745  else_e = else_e->add_cast(ti);
1746  } else {
1747  Datum d;
1748  // always create an else expr so that executor doesn't need to worry about it
1749  ti.set_notnull(false);
1750  else_e = makeExpr<Analyzer::Constant>(ti, true, d);
1751  }
1752  if (ti.get_type() == kNULLT) {
1753  throw std::runtime_error(
1754  "Cannot deduce the type for case expressions, all branches null");
1755  }
1756 
1757  auto case_expr = makeExpr<Analyzer::CaseExpr>(ti, has_agg, cast_expr_pair_list, else_e);
1758  return case_expr;
1759 }
1760 
1761 std::string CaseExpr::to_string() const {
1762  std::string str("CASE ");
1763  for (auto& p : when_then_list_) {
1764  str += "WHEN " + p->get_expr1()->to_string() + " THEN " +
1765  p->get_expr2()->to_string() + " ";
1766  }
1767  if (else_expr_ != nullptr) {
1768  str += "ELSE " + else_expr_->to_string();
1769  }
1770  str += " END";
1771  return str;
1772 }
1773 
1774 void UnionQuery::analyze(const Catalog_Namespace::Catalog& catalog,
1775  Analyzer::Query& query) const {
1776  left_->analyze(catalog, query);
1777  Analyzer::Query* right_query = new Analyzer::Query();
1778  right_->analyze(catalog, *right_query);
1779  query.set_next_query(right_query);
1780  query.set_is_unionall(is_unionall_);
1781 }
1782 
1783 void QuerySpec::analyze_having_clause(const Catalog_Namespace::Catalog& catalog,
1784  Analyzer::Query& query) const {
1785  std::shared_ptr<Analyzer::Expr> p;
1786  if (having_clause_ != nullptr) {
1787  p = having_clause_->analyze(catalog, query, Expr::TlistRefType::TLIST_COPY);
1788  if (p->get_type_info().get_type() != kBOOLEAN) {
1789  throw std::runtime_error("Only boolean expressions can be in HAVING clause.");
1790  }
1791  p->check_group_by(query.get_group_by());
1792  }
1793  query.set_having_predicate(p);
1794 }
1795 
1796 void QuerySpec::analyze_group_by(const Catalog_Namespace::Catalog& catalog,
1797  Analyzer::Query& query) const {
1798  std::list<std::shared_ptr<Analyzer::Expr>> groupby;
1799  if (!groupby_clause_.empty()) {
1800  int gexpr_no = 1;
1801  std::shared_ptr<Analyzer::Expr> gexpr;
1802  const std::vector<std::shared_ptr<Analyzer::TargetEntry>>& tlist =
1803  query.get_targetlist();
1804  for (auto& c : groupby_clause_) {
1805  // special-case ordinal numbers in GROUP BY
1806  if (dynamic_cast<Literal*>(c.get())) {
1807  IntLiteral* i = dynamic_cast<IntLiteral*>(c.get());
1808  if (!i) {
1809  throw std::runtime_error("Invalid literal in GROUP BY clause.");
1810  }
1811  int varno = (int)i->get_intval();
1812  if (varno <= 0 || varno > static_cast<int>(tlist.size())) {
1813  throw std::runtime_error("Invalid ordinal number in GROUP BY clause.");
1814  }
1815  if (tlist[varno - 1]->get_expr()->get_contains_agg()) {
1816  throw std::runtime_error(
1817  "Ordinal number in GROUP BY cannot reference an expression containing "
1818  "aggregate "
1819  "functions.");
1820  }
1821  gexpr = makeExpr<Analyzer::Var>(
1822  tlist[varno - 1]->get_expr()->get_type_info(), Analyzer::Var::kOUTPUT, varno);
1823  } else {
1824  gexpr = c->analyze(catalog, query, Expr::TlistRefType::TLIST_REF);
1825  }
1826  const SQLTypeInfo gti = gexpr->get_type_info();
1827  bool set_new_type = false;
1828  SQLTypeInfo ti(gti);
1829  if (gti.is_string() && gti.get_compression() == kENCODING_NONE) {
1830  set_new_type = true;
1833  ti.set_fixed_size();
1834  }
1835  std::shared_ptr<Analyzer::Var> v;
1836  if (std::dynamic_pointer_cast<Analyzer::Var>(gexpr)) {
1837  v = std::static_pointer_cast<Analyzer::Var>(gexpr);
1838  int n = v->get_varno();
1839  gexpr = tlist[n - 1]->get_own_expr();
1840  auto cv = std::dynamic_pointer_cast<Analyzer::ColumnVar>(gexpr);
1841  if (cv != nullptr) {
1842  // inherit all ColumnVar info for lineage.
1843  *std::static_pointer_cast<Analyzer::ColumnVar>(v) = *cv;
1844  }
1845  v->set_which_row(Analyzer::Var::kGROUPBY);
1846  v->set_varno(gexpr_no);
1847  tlist[n - 1]->set_expr(v);
1848  }
1849  if (set_new_type) {
1850  auto new_e = gexpr->add_cast(ti);
1851  groupby.push_back(new_e);
1852  if (v != nullptr) {
1853  v->set_type_info(new_e->get_type_info());
1854  }
1855  } else {
1856  groupby.push_back(gexpr);
1857  }
1858  gexpr_no++;
1859  }
1860  }
1861  if (query.get_num_aggs() > 0 || !groupby.empty()) {
1862  for (auto t : query.get_targetlist()) {
1863  auto e = t->get_expr();
1864  e->check_group_by(groupby);
1865  }
1866  }
1867  query.set_group_by(groupby);
1868 }
1869 
1870 void QuerySpec::analyze_where_clause(const Catalog_Namespace::Catalog& catalog,
1871  Analyzer::Query& query) const {
1872  if (where_clause_ == nullptr) {
1873  query.set_where_predicate(nullptr);
1874  return;
1875  }
1876  auto p = where_clause_->analyze(catalog, query, Expr::TlistRefType::TLIST_COPY);
1877  if (p->get_type_info().get_type() != kBOOLEAN) {
1878  throw std::runtime_error("Only boolean expressions can be in WHERE clause.");
1879  }
1880  query.set_where_predicate(p);
1881 }
1882 
1883 void QuerySpec::analyze_select_clause(const Catalog_Namespace::Catalog& catalog,
1884  Analyzer::Query& query) const {
1885  std::vector<std::shared_ptr<Analyzer::TargetEntry>>& tlist =
1886  query.get_targetlist_nonconst();
1887  if (select_clause_.empty()) {
1888  // this means SELECT *
1889  int rte_idx = 0;
1890  for (auto rte : query.get_rangetable()) {
1891  rte->expand_star_in_targetlist(catalog, tlist, rte_idx++);
1892  }
1893  } else {
1894  for (auto& p : select_clause_) {
1895  const Parser::Expr* select_expr = p->get_select_expr();
1896  // look for the case of range_var.*
1897  if (typeid(*select_expr) == typeid(ColumnRef) &&
1898  dynamic_cast<const ColumnRef*>(select_expr)->get_column() == nullptr) {
1899  const std::string* range_var_name =
1900  dynamic_cast<const ColumnRef*>(select_expr)->get_table();
1901  int rte_idx = query.get_rte_idx(*range_var_name);
1902  if (rte_idx < 0) {
1903  throw std::runtime_error("invalid range variable name: " + *range_var_name);
1904  }
1905  Analyzer::RangeTableEntry* rte = query.get_rte(rte_idx);
1906  rte->expand_star_in_targetlist(catalog, tlist, rte_idx);
1907  } else {
1908  auto e = select_expr->analyze(catalog, query);
1909  std::string resname;
1910 
1911  if (p->get_alias() != nullptr) {
1912  resname = *p->get_alias();
1913  } else if (std::dynamic_pointer_cast<Analyzer::ColumnVar>(e) &&
1914  !std::dynamic_pointer_cast<Analyzer::Var>(e)) {
1915  auto colvar = std::static_pointer_cast<Analyzer::ColumnVar>(e);
1916  const auto& column_key = colvar->getColumnKey();
1917  const ColumnDescriptor* col_desc =
1918  catalog.getMetadataForColumn(column_key.table_id, column_key.column_id);
1919  resname = col_desc->columnName;
1920  }
1921  if (e->get_type_info().get_type() == kNULLT) {
1922  throw std::runtime_error(
1923  "Untyped NULL in SELECT clause. Use CAST to specify a type.");
1924  }
1925  auto o = std::static_pointer_cast<Analyzer::UOper>(e);
1926  bool unnest = (o != nullptr && o->get_optype() == kUNNEST);
1927  auto tle = std::make_shared<Analyzer::TargetEntry>(resname, e, unnest);
1928  tlist.push_back(tle);
1929  }
1930  }
1931  }
1932 }
1933 
1934 void QuerySpec::analyze_from_clause(const Catalog_Namespace::Catalog& catalog,
1935  Analyzer::Query& query) const {
1937  for (auto& p : from_clause_) {
1938  const TableDescriptor* table_desc;
1939  table_desc = catalog.getMetadataForTable(*p->get_table_name());
1940  if (table_desc == nullptr) {
1941  throw std::runtime_error("Table " + *p->get_table_name() + " does not exist.");
1942  }
1943  std::string range_var;
1944  if (p->get_range_var() == nullptr) {
1945  range_var = *p->get_table_name();
1946  } else {
1947  range_var = *p->get_range_var();
1948  }
1949  rte = new Analyzer::RangeTableEntry(range_var, table_desc, nullptr);
1950  query.add_rte(rte);
1951  }
1952 }
1953 
1954 void QuerySpec::analyze(const Catalog_Namespace::Catalog& catalog,
1955  Analyzer::Query& query) const {
1956  query.set_is_distinct(is_distinct_);
1957  analyze_from_clause(catalog, query);
1958  analyze_select_clause(catalog, query);
1959  analyze_where_clause(catalog, query);
1960  analyze_group_by(catalog, query);
1961  analyze_having_clause(catalog, query);
1962 }
1963 
1964 namespace {
1965 
1966 // clean known escape'd chars without having to do a full json parse
1967 std::string unescape(std::string s) {
1968  boost::replace_all(s, "\\\\t", "\t");
1969  boost::replace_all(s, "\\t", "\t");
1970  boost::replace_all(s, "\\\\n", "\n");
1971  boost::replace_all(s, "\\n", "\n");
1972 
1973  // handle numerics
1974  std::smatch m;
1975 
1976  // "\x00"
1977  std::regex e1("(\\\\x[0-9A-Fa-f][0-9A-Fa-f])");
1978  while (std::regex_search(s, m, e1)) {
1979  std::string original(m[0].first, m[0].second);
1980  std::string replacement;
1981  long val = strtol(original.substr(2, 2).c_str(), NULL, 16);
1982  replacement.push_back(val);
1983  boost::replace_all(s, original, replacement);
1984  }
1985 
1986  // "\u0000"
1987  std::regex e2("(\\\\u[0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f])");
1988  while (std::regex_search(s, m, e2)) {
1989  std::string original(m[0].first, m[0].second);
1990  std::string replacement;
1991  long val = strtol(original.substr(2, 4).c_str(), NULL, 16);
1992  replacement.push_back(val);
1993  boost::replace_all(s, original, replacement);
1994  }
1995 
1996  return s;
1997 }
1998 
1999 void parse_options(const rapidjson::Value& payload,
2000  std::list<std::unique_ptr<NameValueAssign>>& nameValueList,
2001  bool stringToNull = false,
2002  bool stringToInteger = false) {
2003  if (payload.HasMember("options") && payload["options"].IsObject()) {
2004  const auto& options = payload["options"];
2005  for (auto itr = options.MemberBegin(); itr != options.MemberEnd(); ++itr) {
2006  auto option_name = std::make_unique<std::string>(itr->name.GetString());
2007  std::unique_ptr<Literal> literal_value;
2008  if (itr->value.IsString()) {
2009  std::string str = itr->value.GetString();
2010  if (stringToNull && str == "") {
2011  literal_value = std::make_unique<NullLiteral>();
2012  } else if (stringToInteger && std::all_of(str.begin(), str.end(), ::isdigit)) {
2013  int iVal = std::stoi(str);
2014  literal_value = std::make_unique<IntLiteral>(iVal);
2015  } else {
2016  // Rapidjson will deliberately provide escape'd strings when accessed
2017  // ... but the literal should have a copy of the raw unescaped string
2018  auto unique_literal_string = std::make_unique<std::string>(unescape(str));
2019  literal_value =
2020  std::make_unique<StringLiteral>(unique_literal_string.release());
2021  }
2022  } else if (itr->value.IsInt() || itr->value.IsInt64()) {
2023  literal_value = std::make_unique<IntLiteral>(json_i64(itr->value));
2024  } else if (itr->value.IsNull()) {
2025  literal_value = std::make_unique<NullLiteral>();
2026  } else {
2027  throw std::runtime_error("Unable to handle literal for " + *option_name);
2028  }
2029  CHECK(literal_value);
2030 
2031  nameValueList.emplace_back(std::make_unique<NameValueAssign>(
2032  option_name.release(), literal_value.release()));
2033  }
2034  }
2035 }
2036 } // namespace
2037 
2038 void SelectStmt::analyze(const Catalog_Namespace::Catalog& catalog,
2039  Analyzer::Query& query) const {
2040  query.set_stmt_type(kSELECT);
2041  query.set_limit(limit_);
2042  if (offset_ < 0) {
2043  throw std::runtime_error("OFFSET cannot be negative.");
2044  }
2045  query.set_offset(offset_);
2046  query_expr_->analyze(catalog, query);
2047  if (orderby_clause_.empty() && !query.get_is_distinct()) {
2048  query.set_order_by(nullptr);
2049  return;
2050  }
2051  const std::vector<std::shared_ptr<Analyzer::TargetEntry>>& tlist =
2052  query.get_targetlist();
2053  std::list<Analyzer::OrderEntry>* order_by = new std::list<Analyzer::OrderEntry>();
2054  if (!orderby_clause_.empty()) {
2055  for (auto& p : orderby_clause_) {
2056  int tle_no = p->get_colno();
2057  if (tle_no == 0) {
2058  // use column name
2059  // search through targetlist for matching name
2060  const std::string* name = p->get_column()->get_column();
2061  tle_no = 1;
2062  bool found = false;
2063  for (auto tle : tlist) {
2064  if (tle->get_resname() == *name) {
2065  found = true;
2066  break;
2067  }
2068  tle_no++;
2069  }
2070  if (!found) {
2071  throw std::runtime_error("invalid name in order by: " + *name);
2072  }
2073  }
2074  order_by->push_back(
2075  Analyzer::OrderEntry(tle_no, p->get_is_desc(), p->get_nulls_first()));
2076  }
2077  }
2078  if (query.get_is_distinct()) {
2079  // extend order_by to include all targetlist entries.
2080  for (int i = 1; i <= static_cast<int>(tlist.size()); i++) {
2081  bool in_orderby = false;
2082  std::for_each(order_by->begin(),
2083  order_by->end(),
2084  [&in_orderby, i](const Analyzer::OrderEntry& oe) {
2085  in_orderby = in_orderby || (i == oe.tle_no);
2086  });
2087  if (!in_orderby) {
2088  order_by->push_back(Analyzer::OrderEntry(i, false, false));
2089  }
2090  }
2091  }
2092  query.set_order_by(order_by);
2093 }
2094 
2095 std::string SelectEntry::to_string() const {
2096  std::string str = select_expr_->to_string();
2097  if (alias_ != nullptr) {
2098  str += " AS " + *alias_;
2099  }
2100  return str;
2101 }
2102 
2103 std::string TableRef::to_string() const {
2104  std::string str = *table_name_;
2105  if (range_var_ != nullptr) {
2106  str += " " + *range_var_;
2107  }
2108  return str;
2109 }
2110 
2111 std::string ColumnRef::to_string() const {
2112  std::string str;
2113  if (table_ == nullptr) {
2114  str = *column_;
2115  } else if (column_ == nullptr) {
2116  str = *table_ + ".*";
2117  } else {
2118  str = *table_ + "." + *column_;
2119  }
2120  return str;
2121 }
2122 
2123 std::string OperExpr::to_string() const {
2124  std::string op_str[] = {
2125  "=", "===", "<>", "<", ">", "<=", ">=", " AND ", " OR ", "NOT", "-", "+", "*", "/"};
2126  std::string str;
2127  if (optype_ == kUMINUS) {
2128  str = "-(" + left_->to_string() + ")";
2129  } else if (optype_ == kNOT) {
2130  str = "NOT (" + left_->to_string() + ")";
2131  } else if (optype_ == kARRAY_AT) {
2132  str = left_->to_string() + "[" + right_->to_string() + "]";
2133  } else if (optype_ == kUNNEST) {
2134  str = "UNNEST(" + left_->to_string() + ")";
2135  } else if (optype_ == kIN) {
2136  str = "(" + left_->to_string() + " IN " + right_->to_string() + ")";
2137  } else {
2138  str = "(" + left_->to_string() + op_str[optype_] + right_->to_string() + ")";
2139  }
2140  return str;
2141 }
2142 
2143 std::string InExpr::to_string() const {
2144  std::string str = arg_->to_string();
2145  if (is_not_) {
2146  str += " NOT IN ";
2147  } else {
2148  str += " IN ";
2149  }
2150  return str;
2151 }
2152 
2153 std::string ExistsExpr::to_string() const {
2154  return "EXISTS (" + query_->to_string() + ")";
2155 }
2156 
2157 std::string SubqueryExpr::to_string() const {
2158  std::string str;
2159  str = "(";
2160  str += query_->to_string();
2161  str += ")";
2162  return str;
2163 }
2164 
2165 std::string IsNullExpr::to_string() const {
2166  std::string str = arg_->to_string();
2167  if (is_not_) {
2168  str += " IS NOT NULL";
2169  } else {
2170  str += " IS NULL";
2171  }
2172  return str;
2173 }
2174 
2175 std::string InSubquery::to_string() const {
2176  std::string str = InExpr::to_string();
2177  str += subquery_->to_string();
2178  return str;
2179 }
2180 
2181 std::string InValues::to_string() const {
2182  std::string str = InExpr::to_string() + "(";
2183  bool notfirst = false;
2184  for (auto& p : value_list_) {
2185  if (notfirst) {
2186  str += ", ";
2187  } else {
2188  notfirst = true;
2189  }
2190  str += p->to_string();
2191  }
2192  str += ")";
2193  return str;
2194 }
2195 
2196 std::string BetweenExpr::to_string() const {
2197  std::string str = arg_->to_string();
2198  if (is_not_) {
2199  str += " NOT BETWEEN ";
2200  } else {
2201  str += " BETWEEN ";
2202  }
2203  str += lower_->to_string() + " AND " + upper_->to_string();
2204  return str;
2205 }
2206 
2207 std::string CharLengthExpr::to_string() const {
2208  std::string str;
2209  if (calc_encoded_length_) {
2210  str = "CHAR_LENGTH (" + arg_->to_string() + ")";
2211  } else {
2212  str = "LENGTH (" + arg_->to_string() + ")";
2213  }
2214  return str;
2215 }
2216 
2217 std::string CardinalityExpr::to_string() const {
2218  std::string str = "CARDINALITY(" + arg_->to_string() + ")";
2219  return str;
2220 }
2221 
2222 std::string LikeExpr::to_string() const {
2223  std::string str = arg_->to_string();
2224  if (is_not_) {
2225  str += " NOT LIKE ";
2226  } else {
2227  str += " LIKE ";
2228  }
2229  str += like_string_->to_string();
2230  if (escape_string_ != nullptr) {
2231  str += " ESCAPE " + escape_string_->to_string();
2232  }
2233  return str;
2234 }
2235 
2236 std::string RegexpExpr::to_string() const {
2237  std::string str = arg_->to_string();
2238  if (is_not_) {
2239  str += " NOT REGEXP ";
2240  } else {
2241  str += " REGEXP ";
2242  }
2243  str += pattern_string_->to_string();
2244  if (escape_string_ != nullptr) {
2245  str += " ESCAPE " + escape_string_->to_string();
2246  }
2247  return str;
2248 }
2249 
2250 std::string WidthBucketExpr::to_string() const {
2251  std::string str = " WIDTH_BUCKET ";
2252  str += target_value_->to_string();
2253  str += " ";
2254  str += lower_bound_->to_string();
2255  str += " ";
2256  str += upper_bound_->to_string();
2257  str += " ";
2258  str += partition_count_->to_string();
2259  str += " ";
2260  return str;
2261 }
2262 
2263 std::string LikelihoodExpr::to_string() const {
2264  std::string str = " LIKELIHOOD ";
2265  str += arg_->to_string();
2266  str += " ";
2267  str += boost::lexical_cast<std::string>(is_not_ ? 1.0 - likelihood_ : likelihood_);
2268  return str;
2269 }
2270 
2271 std::string FunctionRef::to_string() const {
2272  std::string str = *name_ + "(";
2273  if (distinct_) {
2274  str += "DISTINCT ";
2275  }
2276  if (arg_ == nullptr) {
2277  str += "*)";
2278  } else {
2279  str += arg_->to_string() + ")";
2280  }
2281  return str;
2282 }
2283 
2284 std::string QuerySpec::to_string() const {
2285  std::string query_str = "SELECT ";
2286  if (is_distinct_) {
2287  query_str += "DISTINCT ";
2288  }
2289  if (select_clause_.empty()) {
2290  query_str += "* ";
2291  } else {
2292  bool notfirst = false;
2293  for (auto& p : select_clause_) {
2294  if (notfirst) {
2295  query_str += ", ";
2296  } else {
2297  notfirst = true;
2298  }
2299  query_str += p->to_string();
2300  }
2301  }
2302  query_str += " FROM ";
2303  bool notfirst = false;
2304  for (auto& p : from_clause_) {
2305  if (notfirst) {
2306  query_str += ", ";
2307  } else {
2308  notfirst = true;
2309  }
2310  query_str += p->to_string();
2311  }
2312  if (where_clause_) {
2313  query_str += " WHERE " + where_clause_->to_string();
2314  }
2315  if (!groupby_clause_.empty()) {
2316  query_str += " GROUP BY ";
2317  bool notfirst = false;
2318  for (auto& p : groupby_clause_) {
2319  if (notfirst) {
2320  query_str += ", ";
2321  } else {
2322  notfirst = true;
2323  }
2324  query_str += p->to_string();
2325  }
2326  }
2327  if (having_clause_) {
2328  query_str += " HAVING " + having_clause_->to_string();
2329  }
2330  query_str += ";";
2331  return query_str;
2332 }
2333 
2334 void InsertStmt::analyze(const Catalog_Namespace::Catalog& catalog,
2335  Analyzer::Query& query) const {
2336  query.set_stmt_type(kINSERT);
2337  const TableDescriptor* td = catalog.getMetadataForTable(*table_);
2338  if (td == nullptr) {
2339  throw std::runtime_error("Table " + *table_ + " does not exist.");
2340  }
2341  if (td->isView) {
2342  throw std::runtime_error("Insert to views is not supported yet.");
2343  }
2345  query.set_result_table_id(td->tableId);
2346  std::list<int> result_col_list;
2347  if (column_list_.empty()) {
2348  const std::list<const ColumnDescriptor*> all_cols =
2349  catalog.getAllColumnMetadataForTable(td->tableId, false, false, true);
2350  for (auto cd : all_cols) {
2351  result_col_list.push_back(cd->columnId);
2352  }
2353  } else {
2354  for (auto& c : column_list_) {
2355  const ColumnDescriptor* cd = catalog.getMetadataForColumn(td->tableId, *c);
2356  if (cd == nullptr) {
2357  throw std::runtime_error("Column " + *c + " does not exist.");
2358  }
2359  result_col_list.push_back(cd->columnId);
2360  const auto& col_ti = cd->columnType;
2361  if (col_ti.get_physical_cols() > 0) {
2362  CHECK(cd->columnType.is_geometry());
2363  for (auto i = 1; i <= col_ti.get_physical_cols(); i++) {
2364  const ColumnDescriptor* pcd =
2365  catalog.getMetadataForColumn(td->tableId, cd->columnId + i);
2366  if (pcd == nullptr) {
2367  throw std::runtime_error("Column " + *c + "'s metadata is incomplete.");
2368  }
2369  result_col_list.push_back(pcd->columnId);
2370  }
2371  }
2372  }
2373  }
2374  query.set_result_col_list(result_col_list);
2375 }
2376 
2377 namespace {
2378 Literal* parse_insert_literal(const rapidjson::Value& literal) {
2379  CHECK(literal.IsObject());
2380  CHECK(literal.HasMember("literal"));
2381  CHECK(literal.HasMember("type"));
2382  auto type = json_str(literal["type"]);
2383  if (type == "NULL") {
2384  return new NullLiteral();
2385  } else if (type == "CHAR" || type == "BOOLEAN") {
2386  auto* val = new std::string(json_str(literal["literal"]));
2387  return new StringLiteral(val);
2388  } else if (type == "DECIMAL") {
2389  CHECK(literal.HasMember("scale"));
2390  CHECK(literal.HasMember("precision"));
2391  auto scale = json_i64(literal["scale"]);
2392  auto precision = json_i64(literal["precision"]);
2393  if (scale == 0) {
2394  auto int_val = std::stol(json_str(literal["literal"]));
2395  return new IntLiteral(int_val);
2396  } else if (precision > sql_constants::kMaxNumericPrecision) {
2397  auto dbl_val = std::stod(json_str(literal["literal"]));
2398  return new DoubleLiteral(dbl_val);
2399  } else {
2400  auto* val = new std::string(json_str(literal["literal"]));
2401  return new FixedPtLiteral(val);
2402  }
2403  } else if (type == "DOUBLE") {
2404  auto dbl_val = std::stod(json_str(literal["literal"]));
2405  return new DoubleLiteral(dbl_val);
2406  } else {
2407  CHECK(false) << "Unexpected calcite data type: " << type;
2408  }
2409  return nullptr;
2410 }
2411 
2412 ArrayLiteral* parse_insert_array_literal(const rapidjson::Value& array) {
2413  CHECK(array.IsArray());
2414  auto json_elements = array.GetArray();
2415  auto* elements = new std::list<Expr*>();
2416  for (const auto& e : json_elements) {
2417  elements->push_back(parse_insert_literal(e));
2418  }
2419  return new ArrayLiteral(elements);
2420 }
2421 } // namespace
2422 
2423 InsertValuesStmt::InsertValuesStmt(const rapidjson::Value& payload)
2424  : InsertStmt(nullptr, nullptr) {
2425  CHECK(payload.HasMember("name"));
2426  table_ = std::make_unique<std::string>(json_str(payload["name"]));
2427 
2428  if (payload.HasMember("columns")) {
2429  CHECK(payload["columns"].IsArray());
2430  for (auto& column : payload["columns"].GetArray()) {
2431  std::string s = json_str(column);
2432  column_list_.emplace_back(std::make_unique<std::string>(s));
2433  }
2434  }
2435 
2436  CHECK(payload.HasMember("values") && payload["values"].IsArray());
2437  auto tuples = payload["values"].GetArray();
2438  if (tuples.Empty()) {
2439  throw std::runtime_error("Values statement cannot be empty");
2440  }
2441  values_lists_.reserve(tuples.Size());
2442  for (const auto& json_tuple : tuples) {
2443  auto values_list = std::make_unique<ValuesList>();
2444  CHECK(json_tuple.IsArray());
2445  auto tuple = json_tuple.GetArray();
2446  for (const auto& value : tuple) {
2447  CHECK(value.IsObject());
2448  if (value.HasMember("array")) {
2449  values_list->push_back(parse_insert_array_literal(value["array"]));
2450  } else {
2451  values_list->push_back(parse_insert_literal(value));
2452  }
2453  }
2454  values_lists_.push_back(std::move(values_list));
2455  }
2456 }
2457 
2459  Analyzer::Query& query) const {
2460  InsertStmt::analyze(catalog, query);
2461  size_t list_size = values_lists_[0]->get_value_list().size();
2462  if (!column_list_.empty()) {
2463  if (list_size != column_list_.size()) {
2464  throw std::runtime_error(
2465  "Numbers of columns and values don't match for the "
2466  "insert.");
2467  }
2468  } else {
2469  const auto tableId = query.get_result_table_id();
2470  const std::list<const ColumnDescriptor*> non_phys_cols =
2471  catalog.getAllColumnMetadataForTable(tableId, false, false, false);
2472  if (non_phys_cols.size() != list_size) {
2473  throw std::runtime_error(
2474  "Number of columns in table does not match the list of values given in the "
2475  "insert.");
2476  }
2477  }
2478  std::vector<const ColumnDescriptor*> cds;
2479  cds.reserve(query.get_result_col_list().size());
2480  for (auto id : query.get_result_col_list()) {
2481  const auto* cd = catalog.getMetadataForColumn(query.get_result_table_id(), id);
2482  CHECK(cd);
2483  cds.push_back(cd);
2484  }
2485  auto& query_values_lists = query.get_values_lists();
2486  query_values_lists.resize(values_lists_.size());
2487  for (size_t i = 0; i < values_lists_.size(); ++i) {
2488  const auto& values_list = values_lists_[i]->get_value_list();
2489  if (values_list.size() != list_size) {
2490  throw std::runtime_error(
2491  "Insert values lists should be of the same size. Expected: " +
2492  std::to_string(list_size) + ", Got: " + std::to_string(values_list.size()));
2493  }
2494  auto& query_values_list = query_values_lists[i];
2495  size_t cds_id = 0;
2496  for (auto& v : values_list) {
2497  auto e = v->analyze(catalog, query);
2498  const auto* cd = cds[cds_id];
2499  const auto& col_ti = cd->columnType;
2500  if (col_ti.get_notnull()) {
2501  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(e);
2502  if (c != nullptr && c->get_is_null()) {
2503  throw std::runtime_error("Cannot insert NULL into column " + cd->columnName);
2504  }
2505  }
2506  e = e->add_cast(col_ti);
2507  query_values_list.emplace_back(new Analyzer::TargetEntry("", e, false));
2508  ++cds_id;
2509 
2510  if (col_ti.get_physical_cols() > 0) {
2511  CHECK(cd->columnType.is_geometry());
2512  auto c = dynamic_cast<const Analyzer::Constant*>(e.get());
2513  if (!c) {
2514  auto uoper = std::dynamic_pointer_cast<Analyzer::UOper>(e);
2515  if (uoper && uoper->get_optype() == kCAST) {
2516  c = dynamic_cast<const Analyzer::Constant*>(uoper->get_operand());
2517  }
2518  }
2519  bool is_null = false;
2520  std::string* geo_string{nullptr};
2521  if (c) {
2522  is_null = c->get_is_null();
2523  if (!is_null) {
2524  geo_string = c->get_constval().stringval;
2525  }
2526  }
2527  if (!is_null && !geo_string) {
2528  throw std::runtime_error("Expecting a WKT or WKB hex string for column " +
2529  cd->columnName);
2530  }
2531  std::vector<double> coords;
2532  std::vector<double> bounds;
2533  std::vector<int> ring_sizes;
2534  std::vector<int> poly_rings;
2535  int render_group =
2536  0; // @TODO simon.eves where to get render_group from in this context?!
2537  SQLTypeInfo import_ti{cd->columnType};
2538  if (!is_null) {
2540  *geo_string, import_ti, coords, bounds, ring_sizes, poly_rings)) {
2541  throw std::runtime_error("Cannot read geometry to insert into column " +
2542  cd->columnName);
2543  }
2544  if (coords.empty()) {
2545  // Importing from geo_string WKT resulted in empty coords: dealing with a NULL
2546  is_null = true;
2547  }
2548  if (cd->columnType.get_type() != import_ti.get_type()) {
2549  // allow POLYGON to be inserted into MULTIPOLYGON column
2550  if (!(import_ti.get_type() == SQLTypes::kPOLYGON &&
2551  cd->columnType.get_type() == SQLTypes::kMULTIPOLYGON)) {
2552  throw std::runtime_error(
2553  "Imported geometry doesn't match the type of column " + cd->columnName);
2554  }
2555  }
2556  } else {
2557  // Special case for NULL POINT, push NULL representation to coords
2558  if (cd->columnType.get_type() == kPOINT) {
2559  if (!coords.empty()) {
2560  throw std::runtime_error(
2561  "NULL POINT with unexpected coordinates in column " + cd->columnName);
2562  }
2563  coords.push_back(NULL_ARRAY_DOUBLE);
2564  coords.push_back(NULL_DOUBLE);
2565  }
2566  }
2567 
2568  // TODO: check if import SRID matches columns SRID, may need to transform before
2569  // inserting
2570 
2571  const auto* cd_coords = cds[cds_id];
2572  CHECK_EQ(cd_coords->columnType.get_type(), kARRAY);
2573  CHECK_EQ(cd_coords->columnType.get_subtype(), kTINYINT);
2574  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
2575  if (!is_null || cd->columnType.get_type() == kPOINT) {
2576  auto compressed_coords = Geospatial::compress_coords(coords, col_ti);
2577  for (auto cc : compressed_coords) {
2578  Datum d;
2579  d.tinyintval = cc;
2580  auto e = makeExpr<Analyzer::Constant>(kTINYINT, false, d);
2581  value_exprs.push_back(e);
2582  }
2583  }
2584  query_values_list.emplace_back(new Analyzer::TargetEntry(
2585  "",
2586  makeExpr<Analyzer::Constant>(cd_coords->columnType, is_null, value_exprs),
2587  false));
2588  ++cds_id;
2589 
2590  if (cd->columnType.get_type() == kMULTILINESTRING ||
2591  cd->columnType.get_type() == kPOLYGON ||
2592  cd->columnType.get_type() == kMULTIPOLYGON) {
2593  // Put [linest]ring sizes array into separate physical column
2594  const auto* cd_ring_sizes = cds[cds_id];
2595  CHECK(cd_ring_sizes);
2596  CHECK_EQ(cd_ring_sizes->columnType.get_type(), kARRAY);
2597  CHECK_EQ(cd_ring_sizes->columnType.get_subtype(), kINT);
2598  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
2599  if (!is_null) {
2600  for (auto c : ring_sizes) {
2601  Datum d;
2602  d.intval = c;
2603  auto e = makeExpr<Analyzer::Constant>(kINT, false, d);
2604  value_exprs.push_back(e);
2605  }
2606  }
2607  query_values_list.emplace_back(new Analyzer::TargetEntry(
2608  "",
2609  makeExpr<Analyzer::Constant>(
2610  cd_ring_sizes->columnType, is_null, value_exprs),
2611  false));
2612  ++cds_id;
2613 
2614  if (cd->columnType.get_type() == kMULTIPOLYGON) {
2615  // Put poly_rings array into separate physical column
2616  const auto* cd_poly_rings = cds[cds_id];
2617  CHECK(cd_poly_rings);
2618  CHECK_EQ(cd_poly_rings->columnType.get_type(), kARRAY);
2619  CHECK_EQ(cd_poly_rings->columnType.get_subtype(), kINT);
2620  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
2621  if (!is_null) {
2622  for (auto c : poly_rings) {
2623  Datum d;
2624  d.intval = c;
2625  auto e = makeExpr<Analyzer::Constant>(kINT, false, d);
2626  value_exprs.push_back(e);
2627  }
2628  }
2629  query_values_list.emplace_back(new Analyzer::TargetEntry(
2630  "",
2631  makeExpr<Analyzer::Constant>(
2632  cd_poly_rings->columnType, is_null, value_exprs),
2633  false));
2634  ++cds_id;
2635  }
2636  }
2637 
2638  if (cd->columnType.get_type() == kMULTIPOINT ||
2639  cd->columnType.get_type() == kLINESTRING ||
2640  cd->columnType.get_type() == kMULTILINESTRING ||
2641  cd->columnType.get_type() == kPOLYGON ||
2642  cd->columnType.get_type() == kMULTIPOLYGON) {
2643  const auto* cd_bounds = cds[cds_id];
2644  CHECK(cd_bounds);
2645  CHECK_EQ(cd_bounds->columnType.get_type(), kARRAY);
2646  CHECK_EQ(cd_bounds->columnType.get_subtype(), kDOUBLE);
2647  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
2648  if (!is_null) {
2649  for (auto b : bounds) {
2650  Datum d;
2651  d.doubleval = b;
2652  auto e = makeExpr<Analyzer::Constant>(kDOUBLE, false, d);
2653  value_exprs.push_back(e);
2654  }
2655  }
2656  query_values_list.emplace_back(new Analyzer::TargetEntry(
2657  "",
2658  makeExpr<Analyzer::Constant>(cd_bounds->columnType, is_null, value_exprs),
2659  false));
2660  ++cds_id;
2661  }
2662 
2663  if (cd->columnType.get_type() == kPOLYGON ||
2664  cd->columnType.get_type() == kMULTIPOLYGON) {
2665  // Put render group into separate physical column
2666  const auto* cd_render_group = cds[cds_id];
2667  CHECK(cd_render_group);
2668  CHECK_EQ(cd_render_group->columnType.get_type(), kINT);
2669  Datum d;
2670  d.intval = render_group;
2671  query_values_list.emplace_back(new Analyzer::TargetEntry(
2672  "",
2673  makeExpr<Analyzer::Constant>(cd_render_group->columnType, is_null, d),
2674  false));
2675  ++cds_id;
2676  }
2677  }
2678  }
2679  }
2680 }
2681 
2683  bool read_only_mode) {
2684  if (read_only_mode) {
2685  throw std::runtime_error("INSERT values invalid in read only mode.");
2686  }
2687  auto execute_read_lock =
2691  auto& catalog = session.getCatalog();
2692  const auto td_with_lock =
2694  catalog, *table_);
2697  *table_)) {
2698  throw std::runtime_error("User has no insert privileges on " + *table_ + ".");
2699  }
2700  Analyzer::Query query;
2701  analyze(catalog, query);
2702 
2703  // Take an insert data write lock, which prevents concurrent inserts.
2704  const auto insert_data_lock =
2706 
2707  // NOTE(max): we do the same checks as below just a few calls earlier in analyze().
2708  // Do we keep those intentionally to make sure nothing changed in between w/o
2709  // catalog locks or is it just a duplicate work?
2710  auto td = td_with_lock();
2711  CHECK(td);
2712  if (td->isView) {
2713  throw std::runtime_error("Singleton inserts on views is not supported.");
2714  }
2716 
2718  RelAlgExecutor ra_executor(executor.get());
2719 
2720  if (!leafs_connector_) {
2721  leafs_connector_ = std::make_unique<Fragmenter_Namespace::LocalInsertConnector>();
2722  }
2724  try {
2725  ra_executor.executeSimpleInsert(query, insert_data_loader, session);
2726  } catch (...) {
2727  try {
2728  leafs_connector_->rollback(session, td->tableId);
2729  } catch (std::exception& e) {
2730  LOG(ERROR) << "An error occurred during insert rollback attempt. Table id: "
2731  << td->tableId << ", Error: " << e.what();
2732  }
2733  throw;
2734  }
2735  if (!td->isTemporaryTable()) {
2736  leafs_connector_->checkpoint(session, td->tableId);
2737  }
2738 }
2739 
2741  Analyzer::Query& query) const {
2742  throw std::runtime_error("UPDATE statement not supported yet.");
2743 }
2744 
2746  Analyzer::Query& query) const {
2747  throw std::runtime_error("DELETE statement not supported yet.");
2748 }
2749 
2750 namespace {
2751 
2753  const auto& col_ti = cd.columnType;
2754  if (!col_ti.is_integer() && !col_ti.is_time() &&
2755  !(col_ti.is_string() && col_ti.get_compression() == kENCODING_DICT)) {
2756  throw std::runtime_error("Cannot shard on type " + col_ti.get_type_name() +
2757  ", encoding " + col_ti.get_compression_name());
2758  }
2759 }
2760 
2761 size_t shard_column_index(const std::string& name,
2762  const std::list<ColumnDescriptor>& columns) {
2763  size_t index = 1;
2764  for (const auto& cd : columns) {
2765  if (cd.columnName == name) {
2767  return index;
2768  }
2769  ++index;
2770  if (cd.columnType.is_geometry()) {
2771  index += cd.columnType.get_physical_cols();
2772  }
2773  }
2774  // Not found, return 0
2775  return 0;
2776 }
2777 
2778 size_t sort_column_index(const std::string& name,
2779  const std::list<ColumnDescriptor>& columns) {
2780  size_t index = 1;
2781  for (const auto& cd : columns) {
2782  if (boost::to_upper_copy<std::string>(cd.columnName) == name) {
2783  return index;
2784  }
2785  ++index;
2786  if (cd.columnType.is_geometry()) {
2787  index += cd.columnType.get_physical_cols();
2788  }
2789  }
2790  // Not found, return 0
2791  return 0;
2792 }
2793 
2794 void set_string_field(rapidjson::Value& obj,
2795  const std::string& field_name,
2796  const std::string& field_value,
2797  rapidjson::Document& document) {
2798  rapidjson::Value field_name_json_str;
2799  field_name_json_str.SetString(
2800  field_name.c_str(), field_name.size(), document.GetAllocator());
2801  rapidjson::Value field_value_json_str;
2802  field_value_json_str.SetString(
2803  field_value.c_str(), field_value.size(), document.GetAllocator());
2804  obj.AddMember(field_name_json_str, field_value_json_str, document.GetAllocator());
2805 }
2806 
2808  const ShardKeyDef* shard_key_def,
2809  const std::vector<SharedDictionaryDef>& shared_dict_defs) {
2810  rapidjson::Document document;
2811  auto& allocator = document.GetAllocator();
2812  rapidjson::Value arr(rapidjson::kArrayType);
2813  if (shard_key_def) {
2814  rapidjson::Value shard_key_obj(rapidjson::kObjectType);
2815  set_string_field(shard_key_obj, "type", "SHARD KEY", document);
2816  set_string_field(shard_key_obj, "name", shard_key_def->get_column(), document);
2817  arr.PushBack(shard_key_obj, allocator);
2818  }
2819  for (const auto& shared_dict_def : shared_dict_defs) {
2820  rapidjson::Value shared_dict_obj(rapidjson::kObjectType);
2821  set_string_field(shared_dict_obj, "type", "SHARED DICTIONARY", document);
2822  set_string_field(shared_dict_obj, "name", shared_dict_def.get_column(), document);
2824  shared_dict_obj, "foreign_table", shared_dict_def.get_foreign_table(), document);
2825  set_string_field(shared_dict_obj,
2826  "foreign_column",
2827  shared_dict_def.get_foreign_column(),
2828  document);
2829  arr.PushBack(shared_dict_obj, allocator);
2830  }
2831  rapidjson::StringBuffer buffer;
2832  rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
2833  arr.Accept(writer);
2834  return buffer.GetString();
2835 }
2836 
2837 template <typename LITERAL_TYPE,
2838  typename ASSIGNMENT,
2839  typename VALIDATE = DefaultValidate<LITERAL_TYPE>>
2840 decltype(auto) get_property_value(const NameValueAssign* p,
2841  ASSIGNMENT op,
2842  VALIDATE validate = VALIDATE()) {
2843  const auto val = validate(p);
2844  return op(val);
2845 }
2846 
2848  const NameValueAssign* p,
2849  const std::list<ColumnDescriptor>& columns) {
2850  auto assignment = [&td](const auto val) { td.storageType = val; };
2851  return get_property_value<StringLiteral, decltype(assignment), CaseSensitiveValidate>(
2852  p, assignment);
2853 }
2854 
2856  const NameValueAssign* p,
2857  const std::list<ColumnDescriptor>& columns) {
2858  return get_property_value<IntLiteral>(p,
2859  [&td](const auto val) { td.maxFragRows = val; });
2860 }
2861 
2863  const NameValueAssign* p,
2864  const std::list<ColumnDescriptor>& columns) {
2865  return get_property_value<IntLiteral>(
2866  p, [&df_td](const auto val) { df_td.maxFragRows = val; });
2867 }
2868 
2870  const NameValueAssign* p,
2871  const std::list<ColumnDescriptor>& columns) {
2872  return get_property_value<IntLiteral>(p,
2873  [&td](const auto val) { td.maxChunkSize = val; });
2874 }
2875 
2877  DataframeTableDescriptor& df_td,
2878  const NameValueAssign* p,
2879  const std::list<ColumnDescriptor>& columns) {
2880  return get_property_value<IntLiteral>(
2881  p, [&df_td](const auto val) { df_td.maxChunkSize = val; });
2882 }
2883 
2885  const NameValueAssign* p,
2886  const std::list<ColumnDescriptor>& columns) {
2887  return get_property_value<StringLiteral>(p, [&df_td](const auto val) {
2888  if (val.size() != 1) {
2889  throw std::runtime_error("Length of DELIMITER must be equal to 1.");
2890  }
2891  df_td.delimiter = val;
2892  });
2893 }
2894 
2896  const NameValueAssign* p,
2897  const std::list<ColumnDescriptor>& columns) {
2898  return get_property_value<StringLiteral>(p, [&df_td](const auto val) {
2899  if (val == "FALSE") {
2900  df_td.hasHeader = false;
2901  } else if (val == "TRUE") {
2902  df_td.hasHeader = true;
2903  } else {
2904  throw std::runtime_error("Option HEADER support only 'true' or 'false' values.");
2905  }
2906  });
2907 }
2908 
2910  const NameValueAssign* p,
2911  const std::list<ColumnDescriptor>& columns) {
2912  return get_property_value<IntLiteral>(p,
2913  [&td](const auto val) { td.fragPageSize = val; });
2914 }
2916  const NameValueAssign* p,
2917  const std::list<ColumnDescriptor>& columns) {
2918  return get_property_value<IntLiteral>(p, [&td](const auto val) { td.maxRows = val; });
2919 }
2920 
2922  const NameValueAssign* p,
2923  const std::list<ColumnDescriptor>& columns) {
2924  return get_property_value<IntLiteral>(
2925  p, [&df_td](const auto val) { df_td.skipRows = val; });
2926 }
2927 
2929  const NameValueAssign* p,
2930  const std::list<ColumnDescriptor>& columns) {
2931  return get_property_value<StringLiteral>(p, [&td](const auto partitions_uc) {
2932  if (partitions_uc != "SHARDED" && partitions_uc != "REPLICATED") {
2933  throw std::runtime_error("PARTITIONS must be SHARDED or REPLICATED");
2934  }
2935  if (td.shardedColumnId != 0 && partitions_uc == "REPLICATED") {
2936  throw std::runtime_error(
2937  "A table cannot be sharded and replicated at the same time");
2938  };
2939  td.partitions = partitions_uc;
2940  });
2941 }
2943  const NameValueAssign* p,
2944  const std::list<ColumnDescriptor>& columns) {
2945  if (!td.shardedColumnId) {
2946  throw std::runtime_error("SHARD KEY must be defined.");
2947  }
2948  return get_property_value<IntLiteral>(p, [&td](const auto shard_count) {
2949  if (g_leaf_count && shard_count % g_leaf_count) {
2950  throw std::runtime_error(
2951  "SHARD_COUNT must be a multiple of the number of leaves in the cluster.");
2952  }
2953  td.nShards = g_leaf_count ? shard_count / g_leaf_count : shard_count;
2954  if (!td.shardedColumnId && !td.nShards) {
2955  throw std::runtime_error(
2956  "Must specify the number of shards through the SHARD_COUNT option");
2957  };
2958  });
2959 }
2960 
2961 decltype(auto) get_vacuum_def(TableDescriptor& td,
2962  const NameValueAssign* p,
2963  const std::list<ColumnDescriptor>& columns) {
2964  return get_property_value<StringLiteral>(p, [&td](const auto vacuum_uc) {
2965  if (vacuum_uc != "IMMEDIATE" && vacuum_uc != "DELAYED") {
2966  throw std::runtime_error("VACUUM must be IMMEDIATE or DELAYED");
2967  }
2968  td.hasDeletedCol = boost::iequals(vacuum_uc, "IMMEDIATE") ? false : true;
2969  });
2970 }
2971 
2973  const NameValueAssign* p,
2974  const std::list<ColumnDescriptor>& columns) {
2975  return get_property_value<StringLiteral>(p, [&td, &columns](const auto sort_upper) {
2976  td.sortedColumnId = sort_column_index(sort_upper, columns);
2977  if (!td.sortedColumnId) {
2978  throw std::runtime_error("Specified sort column " + sort_upper + " doesn't exist");
2979  }
2980  });
2981 }
2982 
2984  const NameValueAssign* p,
2985  const std::list<ColumnDescriptor>& columns) {
2986  auto assignment = [&td](const auto val) {
2987  td.maxRollbackEpochs =
2988  val < 0 ? -1 : val; // Anything < 0 means unlimited rollbacks. Note that 0
2989  // still means keeping a shadow copy of data/metdata
2990  // between epochs so bad writes can be rolled back
2991  };
2992  return get_property_value<IntLiteral, decltype(assignment), PositiveOrZeroValidate>(
2993  p, assignment);
2994 }
2995 
2996 static const std::map<const std::string, const TableDefFuncPtr> tableDefFuncMap = {
2997  {"fragment_size"s, get_frag_size_def},
2998  {"max_chunk_size"s, get_max_chunk_size_def},
2999  {"page_size"s, get_page_size_def},
3000  {"max_rows"s, get_max_rows_def},
3001  {"partitions"s, get_partions_def},
3002  {"shard_count"s, get_shard_count_def},
3003  {"vacuum"s, get_vacuum_def},
3004  {"sort_column"s, get_sort_column_def},
3005  {"storage_type"s, get_storage_type},
3006  {"max_rollback_epochs", get_max_rollback_epochs_def}};
3007 
3009  const std::unique_ptr<NameValueAssign>& p,
3010  const std::list<ColumnDescriptor>& columns) {
3011  const auto it = tableDefFuncMap.find(boost::to_lower_copy<std::string>(*p->get_name()));
3012  if (it == tableDefFuncMap.end()) {
3013  throw std::runtime_error(
3014  "Invalid CREATE TABLE option " + *p->get_name() +
3015  ". Should be FRAGMENT_SIZE, MAX_CHUNK_SIZE, PAGE_SIZE, MAX_ROLLBACK_EPOCHS, "
3016  "MAX_ROWS, "
3017  "PARTITIONS, SHARD_COUNT, VACUUM, SORT_COLUMN, STORAGE_TYPE.");
3018  }
3019  return it->second(td, p.get(), columns);
3020 }
3021 
3023  const std::unique_ptr<NameValueAssign>& p,
3024  const std::list<ColumnDescriptor>& columns) {
3025  const auto it = tableDefFuncMap.find(boost::to_lower_copy<std::string>(*p->get_name()));
3026  if (it == tableDefFuncMap.end()) {
3027  throw std::runtime_error(
3028  "Invalid CREATE TABLE AS option " + *p->get_name() +
3029  ". Should be FRAGMENT_SIZE, MAX_CHUNK_SIZE, PAGE_SIZE, MAX_ROLLBACK_EPOCHS, "
3030  "MAX_ROWS, "
3031  "PARTITIONS, SHARD_COUNT, VACUUM, SORT_COLUMN, STORAGE_TYPE, "
3032  "USE_SHARED_DICTIONARIES or FORCE_GEO_COMPRESSION.");
3033  }
3034  return it->second(td, p.get(), columns);
3035 }
3036 
3037 static const std::map<const std::string, const DataframeDefFuncPtr> dataframeDefFuncMap =
3038  {{"fragment_size"s, get_frag_size_dataframe_def},
3039  {"max_chunk_size"s, get_max_chunk_size_dataframe_def},
3040  {"skip_rows"s, get_skip_rows_def},
3041  {"delimiter"s, get_delimiter_def},
3042  {"header"s, get_header_def}};
3043 
3045  const std::unique_ptr<NameValueAssign>& p,
3046  const std::list<ColumnDescriptor>& columns) {
3047  const auto it =
3048  dataframeDefFuncMap.find(boost::to_lower_copy<std::string>(*p->get_name()));
3049  if (it == dataframeDefFuncMap.end()) {
3050  throw std::runtime_error(
3051  "Invalid CREATE DATAFRAME option " + *p->get_name() +
3052  ". Should be FRAGMENT_SIZE, MAX_CHUNK_SIZE, SKIP_ROWS, DELIMITER or HEADER.");
3053  }
3054  return it->second(df_td, p.get(), columns);
3055 }
3056 
3057 std::unique_ptr<ColumnDef> column_from_json(const rapidjson::Value& element) {
3058  CHECK(element.HasMember("name"));
3059  auto col_name = std::make_unique<std::string>(json_str(element["name"]));
3060  CHECK(element.HasMember("sqltype"));
3061  const auto sql_types = to_sql_type(json_str(element["sqltype"]));
3062 
3063  // decimal / numeric precision / scale
3064  int precision = -1;
3065  int scale = -1;
3066  if (element.HasMember("precision")) {
3067  precision = json_i64(element["precision"]);
3068  }
3069  if (element.HasMember("scale")) {
3070  scale = json_i64(element["scale"]);
3071  }
3072 
3073  std::optional<int64_t> array_size;
3074  if (element.HasMember("arraySize")) {
3075  // We do not yet support geo arrays
3076  array_size = json_i64(element["arraySize"]);
3077  }
3078  std::unique_ptr<SQLType> sql_type;
3079  if (element.HasMember("subtype")) {
3080  CHECK(element.HasMember("coordinateSystem"));
3081  const auto subtype_sql_types = to_sql_type(json_str(element["subtype"]));
3082  sql_type =
3083  std::make_unique<SQLType>(subtype_sql_types,
3084  static_cast<int>(sql_types),
3085  static_cast<int>(json_i64(element["coordinateSystem"])),
3086  false);
3087  } else if (precision > 0 && scale > 0) {
3088  sql_type = std::make_unique<SQLType>(sql_types,
3089  precision,
3090  scale,
3091  /*is_array=*/array_size.has_value(),
3092  array_size ? *array_size : -1);
3093  } else if (precision > 0) {
3094  sql_type = std::make_unique<SQLType>(sql_types,
3095  precision,
3096  0,
3097  /*is_array=*/array_size.has_value(),
3098  array_size ? *array_size : -1);
3099  } else {
3100  sql_type = std::make_unique<SQLType>(sql_types,
3101  /*is_array=*/array_size.has_value(),
3102  array_size ? *array_size : -1);
3103  }
3104  CHECK(sql_type);
3105 
3106  CHECK(element.HasMember("nullable"));
3107  const auto nullable = json_bool(element["nullable"]);
3108  std::unique_ptr<ColumnConstraintDef> constraint_def;
3109  StringLiteral* str_literal = nullptr;
3110  if (element.HasMember("default") && !element["default"].IsNull()) {
3111  std::string* defaultval = new std::string(json_str(element["default"]));
3112  boost::algorithm::trim_if(*defaultval, boost::is_any_of(" \"'`"));
3113  str_literal = new StringLiteral(defaultval);
3114  }
3115 
3116  constraint_def = std::make_unique<ColumnConstraintDef>(/*notnull=*/!nullable,
3117  /*unique=*/false,
3118  /*primarykey=*/false,
3119  /*defaultval=*/str_literal);
3120  std::unique_ptr<CompressDef> compress_def;
3121  if (element.HasMember("encodingType") && !element["encodingType"].IsNull()) {
3122  std::string encoding_type = json_str(element["encodingType"]);
3123  CHECK(element.HasMember("encodingSize"));
3124  auto encoding_name = std::make_unique<std::string>(json_str(element["encodingType"]));
3125  compress_def = std::make_unique<CompressDef>(encoding_name.release(),
3126  json_i64(element["encodingSize"]));
3127  }
3128  return std::make_unique<ColumnDef>(col_name.release(),
3129  sql_type.release(),
3130  compress_def ? compress_def.release() : nullptr,
3131  constraint_def ? constraint_def.release() : nullptr);
3132 }
3133 
3134 void parse_elements(const rapidjson::Value& payload,
3135  std::string element_name,
3136  std::string& table_name,
3137  std::list<std::unique_ptr<TableElement>>& table_element_list) {
3138  const auto elements = payload[element_name].GetArray();
3139  for (const auto& element : elements) {
3140  CHECK(element.IsObject());
3141  CHECK(element.HasMember("type"));
3142  if (json_str(element["type"]) == "SQL_COLUMN_DECLARATION") {
3143  auto col_def = column_from_json(element);
3144  table_element_list.emplace_back(std::move(col_def));
3145  } else if (json_str(element["type"]) == "SQL_COLUMN_CONSTRAINT") {
3146  CHECK(element.HasMember("name"));
3147  if (json_str(element["name"]) == "SHARD_KEY") {
3148  CHECK(element.HasMember("columns"));
3149  CHECK(element["columns"].IsArray());
3150  const auto& columns = element["columns"].GetArray();
3151  if (columns.Size() != size_t(1)) {
3152  throw std::runtime_error("Only one shard column is currently supported.");
3153  }
3154  auto shard_key_def = std::make_unique<ShardKeyDef>(json_str(columns[0]));
3155  table_element_list.emplace_back(std::move(shard_key_def));
3156  } else if (json_str(element["name"]) == "SHARED_DICT") {
3157  CHECK(element.HasMember("columns"));
3158  CHECK(element["columns"].IsArray());
3159  const auto& columns = element["columns"].GetArray();
3160  if (columns.Size() != size_t(1)) {
3161  throw std::runtime_error(
3162  R"(Only one column per shared dictionary entry is currently supported. Use multiple SHARED DICT statements to share dictionaries from multiple columns.)");
3163  }
3164  CHECK(element.HasMember("references") && element["references"].IsObject());
3165  const auto& references = element["references"].GetObject();
3166  std::string references_table_name;
3167  if (references.HasMember("table")) {
3168  references_table_name = json_str(references["table"]);
3169  } else {
3170  references_table_name = table_name;
3171  }
3172  CHECK(references.HasMember("column"));
3173 
3174  auto shared_dict_def = std::make_unique<SharedDictionaryDef>(
3175  json_str(columns[0]), references_table_name, json_str(references["column"]));
3176  table_element_list.emplace_back(std::move(shared_dict_def));
3177 
3178  } else {
3179  LOG(FATAL) << "Unsupported type for SQL_COLUMN_CONSTRAINT: "
3180  << json_str(element["name"]);
3181  }
3182  } else {
3183  LOG(FATAL) << "Unsupported element type for CREATE TABLE: "
3184  << element["type"].GetString();
3185  }
3186  }
3187 }
3188 } // namespace
3189 
3190 CreateTableStmt::CreateTableStmt(const rapidjson::Value& payload) {
3191  CHECK(payload.HasMember("name"));
3192  table_ = std::make_unique<std::string>(json_str(payload["name"]));
3193  CHECK(payload.HasMember("elements"));
3194  CHECK(payload["elements"].IsArray());
3195 
3196  is_temporary_ = false;
3197  if (payload.HasMember("temporary")) {
3198  is_temporary_ = json_bool(payload["temporary"]);
3199  }
3200 
3201  if_not_exists_ = false;
3202  if (payload.HasMember("ifNotExists")) {
3203  if_not_exists_ = json_bool(payload["ifNotExists"]);
3204  }
3205 
3206  parse_elements(payload, "elements", *table_, table_element_list_);
3207 
3208  parse_options(payload, storage_options_);
3209 }
3210 
3212  TableDescriptor& td,
3213  std::list<ColumnDescriptor>& columns,
3214  std::vector<SharedDictionaryDef>& shared_dict_defs) {
3215  std::unordered_set<std::string> uc_col_names;
3216  const auto& catalog = session.getCatalog();
3217  const ShardKeyDef* shard_key_def{nullptr};
3218  for (auto& e : table_element_list_) {
3219  if (dynamic_cast<SharedDictionaryDef*>(e.get())) {
3220  auto shared_dict_def = static_cast<SharedDictionaryDef*>(e.get());
3222  this, shared_dict_def, columns, shared_dict_defs, catalog);
3223  shared_dict_defs.push_back(*shared_dict_def);
3224  continue;
3225  }
3226  if (dynamic_cast<ShardKeyDef*>(e.get())) {
3227  if (shard_key_def) {
3228  throw std::runtime_error("Specified more than one shard key");
3229  }
3230  shard_key_def = static_cast<const ShardKeyDef*>(e.get());
3231  continue;
3232  }
3233  if (!dynamic_cast<ColumnDef*>(e.get())) {
3234  throw std::runtime_error("Table constraints are not supported yet.");
3235  }
3236  ColumnDef* coldef = static_cast<ColumnDef*>(e.get());
3237  ColumnDescriptor cd;
3238  cd.columnName = *coldef->get_column_name();
3240  setColumnDescriptor(cd, coldef);
3241  columns.push_back(cd);
3242  }
3243 
3244  ddl_utils::set_default_table_attributes(*table_, td, columns.size());
3245 
3246  if (shard_key_def) {
3247  td.shardedColumnId = shard_column_index(shard_key_def->get_column(), columns);
3248  if (!td.shardedColumnId) {
3249  throw std::runtime_error("Specified shard column " + shard_key_def->get_column() +
3250  " doesn't exist");
3251  }
3252  }
3253  if (is_temporary_) {
3255  } else {
3257  }
3258  if (!storage_options_.empty()) {
3259  for (auto& p : storage_options_) {
3260  get_table_definitions(td, p, columns);
3261  }
3262  }
3263  if (td.shardedColumnId && !td.nShards) {
3264  throw std::runtime_error("SHARD_COUNT needs to be specified with SHARD_KEY.");
3265  }
3266  td.keyMetainfo = serialize_key_metainfo(shard_key_def, shared_dict_defs);
3267 }
3268 
3270  bool read_only_mode) {
3271  if (read_only_mode) {
3272  throw std::runtime_error("CREATE TABLE invalid in read only mode.");
3273  }
3274  auto& catalog = session.getCatalog();
3275 
3276  // Until we create the table we don't have a table descriptor to lock and guarantee
3277  // exclusive use of. Because of that we need a global write lock to make sure we have
3278  // exclusive access to the system for now.
3279  const auto execute_write_lock =
3283 
3284  // check access privileges
3287  throw std::runtime_error("Table " + *table_ +
3288  " will not be created. User has no create privileges.");
3289  }
3290 
3291  if (!catalog.validateNonExistentTableOrView(*table_, if_not_exists_)) {
3292  return;
3293  }
3294 
3295  TableDescriptor td;
3296  std::list<ColumnDescriptor> columns;
3297  std::vector<SharedDictionaryDef> shared_dict_defs;
3298 
3299  executeDryRun(session, td, columns, shared_dict_defs);
3300  td.userId = session.get_currentUser().userId;
3301 
3302  catalog.createShardedTable(td, columns, shared_dict_defs);
3303  // TODO (max): It's transactionally unsafe, should be fixed: we may create object w/o
3304  // privileges
3305  SysCatalog::instance().createDBObject(
3306  session.get_currentUser(), td.tableName, TableDBObjectType, catalog);
3307 }
3308 
3309 CreateDataframeStmt::CreateDataframeStmt(const rapidjson::Value& payload) {
3310  CHECK(payload.HasMember("name"));
3311  table_ = std::make_unique<std::string>(json_str(payload["name"]));
3312 
3313  CHECK(payload.HasMember("elementList"));
3314  parse_elements(payload, "elementList", *table_, table_element_list_);
3315 
3316  CHECK(payload.HasMember("filePath"));
3317  std::string fs = json_str(payload["filePath"]);
3318  // strip leading/trailing spaces/quotes/single quotes
3319  boost::algorithm::trim_if(fs, boost::is_any_of(" \"'`"));
3320  filename_ = std::make_unique<std::string>(fs);
3321 
3322  parse_options(payload, storage_options_);
3323 }
3324 
3326  bool read_only_mode) {
3327  if (read_only_mode) {
3328  throw std::runtime_error("CREATE DATAFRAME invalid in read only mode.");
3329  }
3330  auto& catalog = session.getCatalog();
3331 
3332  const auto execute_write_lock =
3336 
3337  // check access privileges
3340  throw std::runtime_error("Table " + *table_ +
3341  " will not be created. User has no create privileges.");
3342  }
3343 
3344  if (catalog.getMetadataForTable(*table_) != nullptr) {
3345  throw std::runtime_error("Table " + *table_ + " already exists.");
3346  }
3348  std::list<ColumnDescriptor> columns;
3349  std::vector<SharedDictionaryDef> shared_dict_defs;
3350 
3351  std::unordered_set<std::string> uc_col_names;
3352  for (auto& e : table_element_list_) {
3353  if (dynamic_cast<SharedDictionaryDef*>(e.get())) {
3354  auto shared_dict_def = static_cast<SharedDictionaryDef*>(e.get());
3356  this, shared_dict_def, columns, shared_dict_defs, catalog);
3357  shared_dict_defs.push_back(*shared_dict_def);
3358  continue;
3359  }
3360  if (!dynamic_cast<ColumnDef*>(e.get())) {
3361  throw std::runtime_error("Table constraints are not supported yet.");
3362  }
3363  ColumnDef* coldef = static_cast<ColumnDef*>(e.get());
3364  ColumnDescriptor cd;
3365  cd.columnName = *coldef->get_column_name();
3366  const auto uc_col_name = boost::to_upper_copy<std::string>(cd.columnName);
3367  const auto it_ok = uc_col_names.insert(uc_col_name);
3368  if (!it_ok.second) {
3369  throw std::runtime_error("Column '" + cd.columnName + "' defined more than once");
3370  }
3371  setColumnDescriptor(cd, coldef);
3372  columns.push_back(cd);
3373  }
3374 
3375  df_td.tableName = *table_;
3376  df_td.nColumns = columns.size();
3377  df_td.isView = false;
3378  df_td.fragmenter = nullptr;
3383  df_td.maxRows = DEFAULT_MAX_ROWS;
3385  if (!storage_options_.empty()) {
3386  for (auto& p : storage_options_) {
3387  get_dataframe_definitions(df_td, p, columns);
3388  }
3389  }
3390  df_td.keyMetainfo = serialize_key_metainfo(nullptr, shared_dict_defs);
3391  df_td.userId = session.get_currentUser().userId;
3392  df_td.storageType = *filename_;
3393 
3394  catalog.createShardedTable(df_td, columns, shared_dict_defs);
3395  // TODO (max): It's transactionally unsafe, should be fixed: we may create object w/o
3396  // privileges
3397  SysCatalog::instance().createDBObject(
3398  session.get_currentUser(), df_td.tableName, TableDBObjectType, catalog);
3399 }
3400 
3401 std::shared_ptr<ResultSet> getResultSet(QueryStateProxy query_state_proxy,
3402  const std::string select_stmt,
3403  std::vector<TargetMetaInfo>& targets,
3404  bool validate_only = false,
3405  std::vector<size_t> outer_fragment_indices = {},
3406  bool allow_interrupt = false) {
3407  auto const session = query_state_proxy->getConstSessionInfo();
3408  auto& catalog = session->getCatalog();
3409 
3411 #ifdef HAVE_CUDA
3412  const auto device_type = session->get_executor_device_type();
3413 #else
3414  const auto device_type = ExecutorDeviceType::CPU;
3415 #endif // HAVE_CUDA
3416  auto calcite_mgr = catalog.getCalciteMgr();
3417 
3418  // TODO MAT this should actually get the global or the session parameter for
3419  // view optimization
3420  const auto calciteQueryParsingOption =
3421  calcite_mgr->getCalciteQueryParsingOption(true, false, true);
3422  const auto calciteOptimizationOption = calcite_mgr->getCalciteOptimizationOption(
3423  false,
3425  {},
3427  const auto query_ra = calcite_mgr
3428  ->process(query_state_proxy,
3429  pg_shim(select_stmt),
3430  calciteQueryParsingOption,
3431  calciteOptimizationOption)
3432  .plan_result;
3433  RelAlgExecutor ra_executor(
3434  executor.get(), query_ra, query_state_proxy->shared_from_this());
3436  // TODO(adb): Need a better method of dropping constants into this ExecutionOptions
3437  // struct
3438  ExecutionOptions eo = {false,
3439  false,
3440  true,
3441  false,
3442  true,
3443  false,
3444  false,
3445  validate_only,
3446  false,
3447  10000,
3448  false,
3449  false,
3450  1000,
3451  allow_interrupt,
3455  std::numeric_limits<size_t>::max(),
3457  outer_fragment_indices};
3458 
3459  ExecutionResult result{std::make_shared<ResultSet>(std::vector<TargetInfo>{},
3462  nullptr,
3463  0,
3464  0),
3465  {}};
3466  result = ra_executor.executeRelAlgQuery(co, eo, false, nullptr);
3467  targets = result.getTargetsMeta();
3468 
3469  return result.getRows();
3470 }
3471 
3473  std::string& sql_query_string) {
3474  auto const session = query_state_proxy->getConstSessionInfo();
3475  auto& catalog = session->getCatalog();
3476 
3478 #ifdef HAVE_CUDA
3479  const auto device_type = session->get_executor_device_type();
3480 #else
3481  const auto device_type = ExecutorDeviceType::CPU;
3482 #endif // HAVE_CUDA
3483  auto calcite_mgr = catalog.getCalciteMgr();
3484 
3485  // TODO MAT this should actually get the global or the session parameter for
3486  // view optimization
3487  const auto calciteQueryParsingOption =
3488  calcite_mgr->getCalciteQueryParsingOption(true, false, true);
3489  const auto calciteOptimizationOption = calcite_mgr->getCalciteOptimizationOption(
3490  false,
3492  {},
3494  const auto query_ra = calcite_mgr
3495  ->process(query_state_proxy,
3496  pg_shim(sql_query_string),
3497  calciteQueryParsingOption,
3498  calciteOptimizationOption)
3499  .plan_result;
3500  RelAlgExecutor ra_executor(executor.get(), query_ra);
3501  CompilationOptions co = {device_type, true, ExecutorOptLevel::Default, false};
3502  // TODO(adb): Need a better method of dropping constants into this ExecutionOptions
3503  // struct
3504  ExecutionOptions eo = {false,
3505  false,
3506  true,
3507  false,
3508  true,
3509  false,
3510  false,
3511  false,
3512  false,
3513  10000,
3514  false,
3515  false,
3516  0.9,
3517  false,
3518  false};
3519  return ra_executor.getOuterFragmentCount(co, eo);
3520 }
3521 
3523  std::string& sql_query_string,
3524  std::vector<size_t> outer_frag_indices,
3525  bool validate_only,
3526  bool allow_interrupt) {
3527  // TODO(PS): Should we be using the shimmed query in getResultSet?
3528  std::string pg_shimmed_select_query = pg_shim(sql_query_string);
3529 
3530  std::vector<TargetMetaInfo> target_metainfos;
3532  auto const session = query_state_proxy->getConstSessionInfo();
3533  auto query_session = session ? session->get_session_id() : "";
3534  auto query_submitted_time = query_state_proxy->getQuerySubmittedTime();
3535  if (allow_interrupt && !validate_only && !query_session.empty()) {
3536  executor->enrollQuerySession(query_session,
3537  sql_query_string,
3538  query_submitted_time,
3540  QuerySessionStatus::QueryStatus::PENDING_EXECUTOR);
3541  }
3542  auto result_rows = getResultSet(query_state_proxy,
3543  sql_query_string,
3544  target_metainfos,
3545  validate_only,
3546  outer_frag_indices,
3547  allow_interrupt);
3548  AggregatedResult res = {result_rows, target_metainfos};
3549  return res;
3550 }
3551 
3552 std::vector<AggregatedResult> LocalQueryConnector::query(
3553  QueryStateProxy query_state_proxy,
3554  std::string& sql_query_string,
3555  std::vector<size_t> outer_frag_indices,
3556  bool allow_interrupt) {
3557  auto res = query(
3558  query_state_proxy, sql_query_string, outer_frag_indices, false, allow_interrupt);
3559  return {res};
3560 }
3561 
3562 std::list<ColumnDescriptor> LocalQueryConnector::getColumnDescriptors(
3564  bool for_create) {
3565  std::list<ColumnDescriptor> column_descriptors;
3566  std::list<ColumnDescriptor> column_descriptors_for_create;
3567 
3568  int rowid_suffix = 0;
3569  for (const auto& target_metainfo : result.targets_meta) {
3570  ColumnDescriptor cd;
3571  cd.columnName = target_metainfo.get_resname();
3572  if (cd.columnName == "rowid") {
3573  cd.columnName += std::to_string(rowid_suffix++);
3574  }
3575  cd.columnType = target_metainfo.get_physical_type_info();
3576 
3577  ColumnDescriptor cd_for_create = cd;
3578 
3580  // we need to reset the comp param (as this points to the actual dictionary)
3581  if (cd.columnType.is_array()) {
3582  // for dict encoded arrays, it is always 4 bytes
3583  cd_for_create.columnType.set_comp_param(32);
3584  } else {
3585  cd_for_create.columnType.set_comp_param(cd.columnType.get_size() * 8);
3586  }
3587  }
3588 
3589  if (cd.columnType.is_date() && !cd.columnType.is_date_in_days()) {
3590  // default to kENCODING_DATE_IN_DAYS encoding
3592  cd_for_create.columnType.set_comp_param(0);
3593  }
3594 
3595  column_descriptors_for_create.push_back(cd_for_create);
3596  column_descriptors.push_back(cd);
3597  }
3598 
3599  if (for_create) {
3600  return column_descriptors_for_create;
3601  }
3602 
3603  return column_descriptors;
3604 }
3605 
3607  const rapidjson::Value& payload) {
3608  CHECK(payload.HasMember("name"));
3609  table_name_ = json_str(payload["name"]);
3610 
3611  CHECK(payload.HasMember("query"));
3612  select_query_ = json_str(payload["query"]);
3613 
3614  boost::replace_all(select_query_, "\n", " ");
3615  select_query_ = "(" + select_query_ + ")";
3616 
3617  if (payload.HasMember("columns")) {
3618  CHECK(payload["columns"].IsArray());
3619  for (auto& column : payload["columns"].GetArray()) {
3620  std::string s = json_str(column);
3621  column_list_.emplace_back(std::unique_ptr<std::string>(new std::string(s)));
3622  }
3623  }
3624 }
3625 
3627  const TableDescriptor* td,
3628  bool validate_table,
3629  bool for_CTAS) {
3630  auto const session = query_state_proxy->getConstSessionInfo();
3631  auto& catalog = session->getCatalog();
3633  bool populate_table = false;
3634 
3635  if (leafs_connector_) {
3636  populate_table = true;
3637  } else {
3638  leafs_connector_ = std::make_unique<LocalQueryConnector>();
3639  if (!g_cluster) {
3640  populate_table = true;
3641  }
3642  }
3643 
3644  auto get_target_column_descriptors = [this, &catalog](const TableDescriptor* td) {
3645  std::vector<const ColumnDescriptor*> target_column_descriptors;
3646  if (column_list_.empty()) {
3647  auto list = catalog.getAllColumnMetadataForTable(td->tableId, false, false, false);
3648  target_column_descriptors = {std::begin(list), std::end(list)};
3649  } else {
3650  for (auto& c : column_list_) {
3651  const ColumnDescriptor* cd = catalog.getMetadataForColumn(td->tableId, *c);
3652  if (cd == nullptr) {
3653  throw std::runtime_error("Column " + *c + " does not exist.");
3654  }
3655  target_column_descriptors.push_back(cd);
3656  }
3657  }
3658 
3659  return target_column_descriptors;
3660  };
3661 
3662  bool is_temporary = table_is_temporary(td);
3663 
3664  if (validate_table) {
3665  // check access privileges
3666  if (!td) {
3667  throw std::runtime_error("Table " + table_name_ + " does not exist.");
3668  }
3669  if (td->isView) {
3670  throw std::runtime_error("Insert to views is not supported yet.");
3671  }
3672 
3673  if (!session->checkDBAccessPrivileges(DBObjectType::TableDBObjectType,
3675  table_name_)) {
3676  throw std::runtime_error("User has no insert privileges on " + table_name_ + ".");
3677  }
3678 
3679  // only validate the select query so we get the target types
3680  // correctly, but do not populate the result set
3681  LocalQueryConnector local_connector;
3682  auto result = local_connector.query(query_state_proxy, select_query_, {}, true, true);
3683  auto source_column_descriptors = local_connector.getColumnDescriptors(result, false);
3684 
3685  std::vector<const ColumnDescriptor*> target_column_descriptors =
3686  get_target_column_descriptors(td);
3687 
3688  if (source_column_descriptors.size() != target_column_descriptors.size()) {
3689  throw std::runtime_error("The number of source and target columns does not match.");
3690  }
3691 
3692  for (int i = 0; i < source_column_descriptors.size(); i++) {
3693  const ColumnDescriptor* source_cd =
3694  &(*std::next(source_column_descriptors.begin(), i));
3695  const ColumnDescriptor* target_cd = target_column_descriptors.at(i);
3696 
3697  if (source_cd->columnType.get_type() != target_cd->columnType.get_type()) {
3698  auto type_cannot_be_cast = [](const auto& col_type) {
3699  return (col_type.is_time() || col_type.is_geometry() || col_type.is_array() ||
3700  col_type.is_boolean());
3701  };
3702 
3703  if (type_cannot_be_cast(source_cd->columnType) ||
3704  type_cannot_be_cast(target_cd->columnType)) {
3705  throw std::runtime_error("Source '" + source_cd->columnName + " " +
3706  source_cd->columnType.get_type_name() +
3707  "' and target '" + target_cd->columnName + " " +
3708  target_cd->columnType.get_type_name() +
3709  "' column types do not match.");
3710  }
3711  }
3712  if (source_cd->columnType.is_array()) {
3713  if (source_cd->columnType.get_subtype() != target_cd->columnType.get_subtype()) {
3714  throw std::runtime_error("Source '" + source_cd->columnName + " " +
3715  source_cd->columnType.get_type_name() +
3716  "' and target '" + target_cd->columnName + " " +
3717  target_cd->columnType.get_type_name() +
3718  "' array column element types do not match.");
3719  }
3720  }
3721 
3722  if (target_cd->columnType.is_string() && !source_cd->columnType.is_string()) {
3723  throw std::runtime_error("Source '" + source_cd->columnName + " " +
3724  source_cd->columnType.get_type_name() +
3725  "' and target '" + target_cd->columnName + " " +
3726  target_cd->columnType.get_type_name() +
3727  "' column types do not match.");
3728  }
3729 
3730  if (source_cd->columnType.is_decimal() ||
3731  source_cd->columnType.get_elem_type().is_decimal()) {
3732  SQLTypeInfo sourceType = source_cd->columnType;
3733  SQLTypeInfo targetType = target_cd->columnType;
3734 
3735  if (source_cd->columnType.is_array()) {
3736  sourceType = source_cd->columnType.get_elem_type();
3737  targetType = target_cd->columnType.get_elem_type();
3738  }
3739 
3740  if (sourceType.get_scale() != targetType.get_scale()) {
3741  throw std::runtime_error("Source '" + source_cd->columnName + " " +
3742  source_cd->columnType.get_type_name() +
3743  "' and target '" + target_cd->columnName + " " +
3744  target_cd->columnType.get_type_name() +
3745  "' decimal columns scales do not match.");
3746  }
3747  }
3748 
3749  if (source_cd->columnType.is_string()) {
3750  if (!target_cd->columnType.is_string()) {
3751  throw std::runtime_error("Source '" + source_cd->columnName + " " +
3752  source_cd->columnType.get_type_name() +
3753  "' and target '" + target_cd->columnName + " " +
3754  target_cd->columnType.get_type_name() +
3755  "' column types do not match.");
3756  }
3757  if (source_cd->columnType.get_compression() !=
3758  target_cd->columnType.get_compression()) {
3759  throw std::runtime_error("Source '" + source_cd->columnName + " " +
3760  source_cd->columnType.get_type_name() +
3761  "' and target '" + target_cd->columnName + " " +
3762  target_cd->columnType.get_type_name() +
3763  "' columns string encodings do not match.");
3764  }
3765  }
3766 
3767  if (source_cd->columnType.is_timestamp() && target_cd->columnType.is_timestamp()) {
3768  if (source_cd->columnType.get_dimension() !=
3769  target_cd->columnType.get_dimension()) {
3770  throw std::runtime_error("Source '" + source_cd->columnName + " " +
3771  source_cd->columnType.get_type_name() +
3772  "' and target '" + target_cd->columnName + " " +
3773  target_cd->columnType.get_type_name() +
3774  "' timestamp column precisions do not match.");
3775  }
3776  }
3777 
3778  if (!source_cd->columnType.is_string() && !source_cd->columnType.is_geometry() &&
3779  !source_cd->columnType.is_integer() && !source_cd->columnType.is_decimal() &&
3780  !source_cd->columnType.is_date() && !source_cd->columnType.is_time() &&
3781  !source_cd->columnType.is_timestamp() &&
3782  source_cd->columnType.get_size() > target_cd->columnType.get_size()) {
3783  throw std::runtime_error("Source '" + source_cd->columnName + " " +
3784  source_cd->columnType.get_type_name() +
3785  "' and target '" + target_cd->columnName + " " +
3786  target_cd->columnType.get_type_name() +
3787  "' column encoding sizes do not match.");
3788  }
3789  }
3790  }
3791 
3792  if (!populate_table) {
3793  return;
3794  }
3795 
3796  int64_t total_row_count = 0;
3797  int64_t total_source_query_time_ms = 0;
3798  int64_t total_target_value_translate_time_ms = 0;
3799  int64_t total_data_load_time_ms = 0;
3800 
3802  auto target_column_descriptors = get_target_column_descriptors(td);
3803  auto outer_frag_count =
3804  leafs_connector_->getOuterFragmentCount(query_state_proxy, select_query_);
3805 
3806  size_t outer_frag_end = outer_frag_count == 0 ? 1 : outer_frag_count;
3807  auto query_session = session ? session->get_session_id() : "";
3809  std::string work_type_str = for_CTAS ? "CTAS" : "ITAS";
3810  try {
3811  for (size_t outer_frag_idx = 0; outer_frag_idx < outer_frag_end; outer_frag_idx++) {
3812  std::vector<size_t> allowed_outer_fragment_indices;
3813 
3814  if (outer_frag_count) {
3815  allowed_outer_fragment_indices.push_back(outer_frag_idx);
3816  }
3817 
3818  const auto query_clock_begin = timer_start();
3819  std::vector<AggregatedResult> query_results =
3820  leafs_connector_->query(query_state_proxy,
3821  select_query_,
3822  allowed_outer_fragment_indices,
3824  total_source_query_time_ms += timer_stop(query_clock_begin);
3825 
3826  auto start_time = query_state_proxy->getQuerySubmittedTime();
3827  auto query_str = "INSERT_DATA for " + work_type_str;
3829  // In the clean-up phase of the query execution for collecting aggregated result
3830  // of SELECT query, we remove its query session info, so we need to enroll the
3831  // session info again
3832  executor->enrollQuerySession(query_session,
3833  query_str,
3834  start_time,
3836  QuerySessionStatus::QueryStatus::RUNNING_IMPORTER);
3837  }
3838 
3839  ScopeGuard clearInterruptStatus = [executor, &query_session, &start_time] {
3840  // this data population is non-kernel operation, so we manually cleanup
3841  // the query session info in the cleanup phase
3843  executor->clearQuerySessionStatus(query_session, start_time);
3844  }
3845  };
3846 
3847  for (auto& res : query_results) {
3848  if (UNLIKELY(check_session_interrupted(query_session, executor))) {
3849  throw std::runtime_error(
3850  "Query execution has been interrupted while performing " + work_type_str);
3851  }
3852  auto& result_rows = res.rs;
3853  result_rows->setGeoReturnType(ResultSet::GeoReturnType::GeoTargetValue);
3854  const auto num_rows = result_rows->rowCount();
3855 
3856  if (0 == num_rows) {
3857  continue;
3858  }
3859 
3860  total_row_count += num_rows;
3861 
3862  size_t leaf_count = leafs_connector_->leafCount();
3863 
3864  // ensure that at least 1 row is processed per block up to a maximum of 65536 rows
3865  const size_t rows_per_block =
3866  std::max(std::min(num_rows / leaf_count, size_t(64 * 1024)), size_t(1));
3867 
3868  std::vector<std::unique_ptr<TargetValueConverter>> value_converters;
3869 
3871 
3872  const int num_worker_threads = std::thread::hardware_concurrency();
3873 
3874  std::vector<size_t> thread_start_idx(num_worker_threads),
3875  thread_end_idx(num_worker_threads);
3876  bool can_go_parallel = !result_rows->isTruncated() && rows_per_block > 20000;
3877 
3878  std::atomic<size_t> crt_row_idx{0};
3879 
3880  auto do_work = [&result_rows, &value_converters, &crt_row_idx](
3881  const size_t idx,
3882  const size_t block_end,
3883  const size_t num_cols,
3884  const size_t thread_id,
3885  bool& stop_convert) {
3886  const auto result_row = result_rows->getRowAtNoTranslations(idx);
3887  if (!result_row.empty()) {
3888  size_t target_row = crt_row_idx.fetch_add(1);
3889  if (target_row >= block_end) {
3890  stop_convert = true;
3891  return;
3892  }
3893  for (unsigned int col = 0; col < num_cols; col++) {
3894  const auto& mapd_variant = result_row[col];
3895  value_converters[col]->convertToColumnarFormat(target_row, &mapd_variant);
3896  }
3897  }
3898  };
3899 
3900  auto convert_function = [&thread_start_idx,
3901  &thread_end_idx,
3902  &value_converters,
3903  &executor,
3904  &query_session,
3905  &work_type_str,
3906  &do_work](const int thread_id, const size_t block_end) {
3907  const int num_cols = value_converters.size();
3908  const size_t start = thread_start_idx[thread_id];
3909  const size_t end = thread_end_idx[thread_id];
3910  size_t idx = 0;
3911  bool stop_convert = false;
3913  size_t local_idx = 0;
3914  for (idx = start; idx < end; ++idx, ++local_idx) {
3915  if (UNLIKELY((local_idx & 0xFFFF) == 0 &&
3916  check_session_interrupted(query_session, executor))) {
3917  throw std::runtime_error(
3918  "Query execution has been interrupted while performing " +
3919  work_type_str);
3920  }
3921  do_work(idx, block_end, num_cols, thread_id, stop_convert);
3922  if (stop_convert) {
3923  break;
3924  }
3925  }
3926  } else {
3927  for (idx = start; idx < end; ++idx) {
3928  do_work(idx, block_end, num_cols, thread_id, stop_convert);
3929  if (stop_convert) {
3930  break;
3931  }
3932  }
3933  }
3934  thread_start_idx[thread_id] = idx;
3935  };
3936 
3937  auto single_threaded_value_converter =
3938  [&crt_row_idx, &value_converters, &result_rows](const size_t idx,
3939  const size_t block_end,
3940  const size_t num_cols,
3941  bool& stop_convert) {
3942  size_t target_row = crt_row_idx.fetch_add(1);
3943  if (target_row >= block_end) {
3944  stop_convert = true;
3945  return;
3946  }
3947  const auto result_row = result_rows->getNextRow(false, false);
3948  CHECK(!result_row.empty());
3949  for (unsigned int col = 0; col < num_cols; col++) {
3950  const auto& mapd_variant = result_row[col];
3951  value_converters[col]->convertToColumnarFormat(target_row, &mapd_variant);
3952  }
3953  };
3954 
3955  auto single_threaded_convert_function = [&value_converters,
3956  &thread_start_idx,
3957  &thread_end_idx,
3958  &executor,
3959  &query_session,
3960  &work_type_str,
3961  &single_threaded_value_converter](
3962  const int thread_id,
3963  const size_t block_end) {
3964  const int num_cols = value_converters.size();
3965  const size_t start = thread_start_idx[thread_id];
3966  const size_t end = thread_end_idx[thread_id];
3967  size_t idx = 0;
3968  bool stop_convert = false;
3970  size_t local_idx = 0;
3971  for (idx = start; idx < end; ++idx, ++local_idx) {
3972  if (UNLIKELY((local_idx & 0xFFFF) == 0 &&
3973  check_session_interrupted(query_session, executor))) {
3974  throw std::runtime_error(
3975  "Query execution has been interrupted while performing " +
3976  work_type_str);
3977  }
3978  single_threaded_value_converter(idx, block_end, num_cols, stop_convert);
3979  if (stop_convert) {
3980  break;
3981  }
3982  }
3983  } else {
3984  for (idx = start; idx < end; ++idx) {
3985  single_threaded_value_converter(idx, end, num_cols, stop_convert);
3986  if (stop_convert) {
3987  break;
3988  }
3989  }
3990  }
3991  thread_start_idx[thread_id] = idx;
3992  };
3993 
3994  if (can_go_parallel) {
3995  const size_t entry_count = result_rows->entryCount();
3996  for (size_t
3997  i = 0,
3998  start_entry = 0,
3999  stride = (entry_count + num_worker_threads - 1) / num_worker_threads;
4000  i < num_worker_threads && start_entry < entry_count;
4001  ++i, start_entry += stride) {
4002  const auto end_entry = std::min(start_entry + stride, entry_count);
4003  thread_start_idx[i] = start_entry;
4004  thread_end_idx[i] = end_entry;
4005  }
4006  } else {
4007  thread_start_idx[0] = 0;
4008  thread_end_idx[0] = result_rows->entryCount();
4009  }
4010 
4011  RenderGroupAnalyzerMap render_group_analyzer_map;
4012 
4013  for (size_t block_start = 0; block_start < num_rows;
4014  block_start += rows_per_block) {
4015  const auto num_rows_this_itr = block_start + rows_per_block < num_rows
4016  ? rows_per_block
4017  : num_rows - block_start;
4018  crt_row_idx = 0; // reset block tracker
4019  value_converters.clear();
4020  int colNum = 0;
4021  for (const auto targetDescriptor : target_column_descriptors) {
4022  auto sourceDataMetaInfo = res.targets_meta[colNum++];
4024  num_rows_this_itr,
4025  sourceDataMetaInfo,
4026  targetDescriptor,
4027  catalog,
4028  targetDescriptor->columnType,
4029  !targetDescriptor->columnType.get_notnull(),
4030  result_rows->getRowSetMemOwner()->getLiteralStringDictProxy(),
4032  sourceDataMetaInfo.get_type_info().is_dict_encoded_string()
4033  ? executor->getStringDictionaryProxy(
4034  sourceDataMetaInfo.get_type_info().getStringDictKey(),
4035  result_rows->getRowSetMemOwner(),
4036  true)
4037  : nullptr,
4038  IS_GEO_POLY(targetDescriptor->columnType.get_type()) &&
4040  ? &render_group_analyzer_map
4041  : nullptr};
4042  auto converter = factory.create(param);
4043  value_converters.push_back(std::move(converter));
4044  }
4045 
4046  const auto translate_clock_begin = timer_start();
4047  if (can_go_parallel) {
4048  std::vector<std::future<void>> worker_threads;
4049  for (int i = 0; i < num_worker_threads; ++i) {
4050  worker_threads.push_back(
4051  std::async(std::launch::async, convert_function, i, num_rows_this_itr));
4052  }
4053 
4054  for (auto& child : worker_threads) {
4055  child.wait();
4056  }
4057  for (auto& child : worker_threads) {
4058  child.get();
4059  }
4060 
4061  } else {
4062  single_threaded_convert_function(0, num_rows_this_itr);
4063  }
4064 
4065  // finalize the insert data
4066  auto finalizer_func =
4067  [](std::unique_ptr<TargetValueConverter>::pointer targetValueConverter) {
4068  targetValueConverter->finalizeDataBlocksForInsertData();
4069  };
4070 
4071  std::vector<std::future<void>> worker_threads;
4072  for (auto& converterPtr : value_converters) {
4073  worker_threads.push_back(
4074  std::async(std::launch::async, finalizer_func, converterPtr.get()));
4075  }
4076 
4077  for (auto& child : worker_threads) {
4078  child.wait();
4079  }
4080  for (auto& child : worker_threads) {
4081  child.get();
4082  }
4083 
4085  insert_data.databaseId = catalog.getCurrentDB().dbId;
4086  CHECK(td);
4087  insert_data.tableId = td->tableId;
4088  insert_data.numRows = num_rows_this_itr;
4089 
4090  for (int col_idx = 0; col_idx < target_column_descriptors.size(); col_idx++) {
4092  check_session_interrupted(query_session, executor))) {
4093  throw std::runtime_error(
4094  "Query execution has been interrupted while performing " +
4095  work_type_str);
4096  }
4097  value_converters[col_idx]->addDataBlocksToInsertData(insert_data);
4098  }
4099  total_target_value_translate_time_ms += timer_stop(translate_clock_begin);
4100 
4101  const auto data_load_clock_begin = timer_start();
4102  auto data_memory_holder =
4103  import_export::fill_missing_columns(&catalog, insert_data);
4104  insertDataLoader.insertData(*session, insert_data);
4105  total_data_load_time_ms += timer_stop(data_load_clock_begin);
4106  }
4107  }
4108  }
4109  } catch (...) {
4110  try {
4111  leafs_connector_->rollback(*session, td->tableId);
4112  } catch (std::exception& e) {
4113  LOG(ERROR) << "An error occurred during ITAS rollback attempt. Table id: "
4114  << td->tableId << ", Error: " << e.what();
4115  }
4116  throw;
4117  }
4118 
4119  int64_t total_time_ms = total_source_query_time_ms +
4120  total_target_value_translate_time_ms + total_data_load_time_ms;
4121 
4122  VLOG(1) << "CTAS/ITAS " << total_row_count << " rows loaded in " << total_time_ms
4123  << "ms (outer_frag_count=" << outer_frag_count
4124  << ", query_time=" << total_source_query_time_ms
4125  << "ms, translation_time=" << total_target_value_translate_time_ms
4126  << "ms, data_load_time=" << total_data_load_time_ms
4127  << "ms)\nquery: " << select_query_;
4128 
4129  if (!is_temporary) {
4130  leafs_connector_->checkpoint(*session, td->tableId);
4131  }
4132 }
4133 
4134 namespace {
4135 shared::TableKey get_table_key(const std::vector<std::string>& table) {
4136  const auto catalog = SysCatalog::instance().getCatalog(table[1]);
4137  CHECK(catalog);
4138  const auto table_id = catalog->getTableId(table[0]);
4139  if (!table_id.has_value()) {
4140  throw std::runtime_error{"Table \"" + table[0] +
4141  "\" does not exist in catalog: " + table[1] + "."};
4142  }
4143  return {catalog->getDatabaseId(), table_id.value()};
4144 }
4145 
4147  const std::string& insert_table_db_name,
4148  const std::string& query_str,
4149  const QueryStateProxy& query_state_proxy,
4150  const std::optional<std::string>& insert_table_name = {}) {
4151  auto& sys_catalog = SysCatalog::instance();
4152  auto& calcite_mgr = sys_catalog.getCalciteMgr();
4153  const auto calciteQueryParsingOption =
4154  calcite_mgr.getCalciteQueryParsingOption(true, false, true);
4155  const auto calciteOptimizationOption = calcite_mgr.getCalciteOptimizationOption(
4156  false, g_enable_watchdog, {}, sys_catalog.isAggregator());
4157  const auto result = calcite_mgr.process(query_state_proxy,
4158  pg_shim(query_str),
4159  calciteQueryParsingOption,
4160  calciteOptimizationOption);
4161  // force sort into tableid order in case of name change to guarantee fixed order of
4162  // mutex access
4163  auto comparator = [](const std::vector<std::string>& table_1,
4164  const std::vector<std::string>& table_2) {
4165  return get_table_key(table_1) < get_table_key(table_2);
4166  };
4167  std::set<std::vector<std::string>, decltype(comparator)> tables(comparator);
4168  for (auto& tab : result.resolved_accessed_objects.tables_selected_from) {
4169  tables.emplace(tab);
4170  }
4171  if (insert_table_name.has_value()) {
4172  tables.emplace(
4173  std::vector<std::string>{insert_table_name.value(), insert_table_db_name});
4174  }
4176  for (const auto& table : tables) {
4177  const auto catalog = sys_catalog.getCatalog(table[1]);
4178  CHECK(catalog);
4179  locks.emplace_back(
4182  *catalog, table[0])));
4183  if (insert_table_name.has_value() && table[0] == insert_table_name.value() &&
4184  table[1] == insert_table_db_name) {
4185  locks.emplace_back(
4188  catalog->getDatabaseId(), (*locks.back())())));
4189  } else {
4190  locks.emplace_back(
4193  catalog->getDatabaseId(), (*locks.back())())));
4194  }
4195  }
4196  return locks;
4197 }
4198 } // namespace
4199 
4201  bool read_only_mode) {
4202  if (read_only_mode) {
4203  throw std::runtime_error("INSERT INTO TABLE invalid in read only mode.");
4204  }
4205  auto session_copy = session;
4206  auto session_ptr = std::shared_ptr<Catalog_Namespace::SessionInfo>(
4207  &session_copy, boost::null_deleter());
4208  auto query_state = query_state::QueryState::create(session_ptr, select_query_);
4209  auto stdlog = STDLOG(query_state);
4210  auto& catalog = session_ptr->getCatalog();
4211 
4212  const auto execute_read_lock =
4216 
4217  if (catalog.getMetadataForTable(table_name_) == nullptr) {
4218  throw std::runtime_error("ITAS failed: table " + table_name_ + " does not exist.");
4219  }
4220 
4221  auto locks = acquire_query_table_locks(
4222  catalog.name(), select_query_, query_state->createQueryStateProxy(), table_name_);
4223  const TableDescriptor* td = catalog.getMetadataForTable(table_name_);
4224 
4225  Executor::clearExternalCaches(true, td, catalog.getCurrentDB().dbId);
4226 
4227  try {
4228  populateData(query_state->createQueryStateProxy(), td, true, false);
4229  } catch (...) {
4230  throw;
4231  }
4232 }
4233 
4235  : InsertIntoTableAsSelectStmt(payload) {
4236  if (payload.HasMember("temporary")) {
4237  is_temporary_ = json_bool(payload["temporary"]);
4238  } else {
4239  is_temporary_ = false;
4240  }
4241 
4242  if (payload.HasMember("ifNotExists")) {
4243  if_not_exists_ = json_bool(payload["ifNotExists"]);
4244  } else {
4245  if_not_exists_ = false;
4246  }
4247 
4248  parse_options(payload, storage_options_);
4249 }
4250 
4252  bool read_only_mode) {
4253  if (read_only_mode) {
4254  throw std::runtime_error("CREATE TABLE invalid in read only mode.");
4255  }
4256  auto session_copy = session;
4257  auto session_ptr = std::shared_ptr<Catalog_Namespace::SessionInfo>(
4258  &session_copy, boost::null_deleter());
4259  auto query_state = query_state::QueryState::create(session_ptr, select_query_);
4260  auto stdlog = STDLOG(query_state);
4261  LocalQueryConnector local_connector;
4262  auto& catalog = session.getCatalog();
4263  bool create_table = nullptr == leafs_connector_;
4264 
4265  std::set<std::string> select_tables;
4266  if (create_table) {
4267  const auto execute_write_lock =
4271 
4272  // check access privileges
4275  throw std::runtime_error("CTAS failed. Table " + table_name_ +
4276  " will not be created. User has no create privileges.");
4277  }
4278 
4279  if (catalog.getMetadataForTable(table_name_) != nullptr) {
4280  if (if_not_exists_) {
4281  return;
4282  }
4283  throw std::runtime_error("Table " + table_name_ +
4284  " already exists and no data was loaded.");
4285  }
4286 
4287  // only validate the select query so we get the target types
4288  // correctly, but do not populate the result set
4289  // we currently have exclusive access to the system so this is safe
4290  auto validate_result = local_connector.query(
4291  query_state->createQueryStateProxy(), select_query_, {}, true, false);
4292 
4293  auto column_descriptors_for_create =
4294  local_connector.getColumnDescriptors(validate_result, true);
4295 
4296  // some validation as the QE might return some out of range column types
4297  for (auto& cd : column_descriptors_for_create) {
4298  if (cd.columnType.is_decimal() &&
4299  cd.columnType.get_precision() > sql_constants::kMaxNumericPrecision) {
4300  throw std::runtime_error(cd.columnName + ": Precision too high, max " +
4302  ".");
4303  }
4304  }
4305 
4306  TableDescriptor td;
4307  td.tableName = table_name_;
4308  td.userId = session.get_currentUser().userId;
4309  td.nColumns = column_descriptors_for_create.size();
4310  td.isView = false;
4311  td.fragmenter = nullptr;
4318  if (is_temporary_) {
4320  } else {
4322  }
4323 
4324  bool use_shared_dictionaries = true;
4325  bool force_geo_compression = true;
4326 
4327  if (!storage_options_.empty()) {
4328  for (auto& p : storage_options_) {
4329  if (boost::to_lower_copy<std::string>(*p->get_name()) ==
4330  "use_shared_dictionaries") {
4331  const StringLiteral* literal =
4332  dynamic_cast<const StringLiteral*>(p->get_value());
4333  if (nullptr == literal) {
4334  throw std::runtime_error(
4335  "USE_SHARED_DICTIONARIES must be a string parameter");
4336  }
4337  std::string val = boost::to_lower_copy<std::string>(*literal->get_stringval());
4338  use_shared_dictionaries = val == "true" || val == "1" || val == "t";
4339  } else if (boost::to_lower_copy<std::string>(*p->get_name()) ==
4340  "force_geo_compression") {
4341  const StringLiteral* literal =
4342  dynamic_cast<const StringLiteral*>(p->get_value());
4343  if (nullptr == literal) {
4344  throw std::runtime_error("FORCE_GEO_COMPRESSION must be a string parameter");
4345  }
4346  std::string val = boost::to_lower_copy<std::string>(*literal->get_stringval());
4347  force_geo_compression = val == "true" || val == "1" || val == "t";
4348  } else {
4349  get_table_definitions_for_ctas(td, p, column_descriptors_for_create);
4350  }
4351  }
4352  }
4353 
4354  std::vector<SharedDictionaryDef> sharedDictionaryRefs;
4355 
4356  if (use_shared_dictionaries) {
4357  const auto source_column_descriptors =
4358  local_connector.getColumnDescriptors(validate_result, false);
4359  const auto mapping = catalog.getDictionaryToColumnMapping();
4360 
4361  for (auto& source_cd : source_column_descriptors) {
4362  const auto& ti = source_cd.columnType;
4363  if (ti.is_string()) {
4364  if (ti.get_compression() == kENCODING_DICT) {
4365  int dict_id = ti.get_comp_param();
4366  auto it = mapping.find(dict_id);
4367  if (mapping.end() != it) {
4368  const auto targetColumn = it->second;
4369  auto targetTable =
4370  catalog.getMetadataForTable(targetColumn->tableId, false);
4371  CHECK(targetTable);
4372  LOG(INFO) << "CTAS: sharing text dictionary on column "
4373  << source_cd.columnName << " with " << targetTable->tableName
4374  << "." << targetColumn->columnName;
4375  sharedDictionaryRefs.emplace_back(
4376  source_cd.columnName, targetTable->tableName, targetColumn->columnName);
4377  }
4378  }
4379  }
4380  }
4381  }
4382 
4383  if (force_geo_compression) {
4384  for (auto& cd_for_create : column_descriptors_for_create) {
4385  auto& ti = cd_for_create.columnType;
4386  if (ti.is_geometry() && ti.get_output_srid() == 4326) {
4387  // turn on GEOINT32 compression
4388  ti.set_compression(kENCODING_GEOINT);
4389  ti.set_comp_param(32);
4390  }
4391  }
4392  }
4393 
4394  // currently no means of defining sharding in CTAS
4395  td.keyMetainfo = serialize_key_metainfo(nullptr, sharedDictionaryRefs);
4396 
4397  catalog.createTable(td, column_descriptors_for_create, sharedDictionaryRefs, true);
4398  // TODO (max): It's transactionally unsafe, should be fixed: we may create object
4399  // w/o privileges
4400  SysCatalog::instance().createDBObject(
4401  session.get_currentUser(), td.tableName, TableDBObjectType, catalog);
4402  }
4403 
4404  // note there is a time where we do not have any executor outer lock here. someone could
4405  // come along and mess with the data or other tables.
4406  const auto execute_read_lock =
4410 
4411  auto locks = acquire_query_table_locks(
4412  catalog.name(), select_query_, query_state->createQueryStateProxy(), table_name_);
4413  const TableDescriptor* td = catalog.getMetadataForTable(table_name_);
4414  try {
4415  populateData(query_state->createQueryStateProxy(), td, false, true);
4416  } catch (...) {
4417  if (!g_cluster) {
4418  const TableDescriptor* created_td = catalog.getMetadataForTable(table_name_);
4419  if (created_td) {
4420  catalog.dropTable(created_td);
4421  }
4422  }
4423  throw;
4424  }
4425 }
4426 
4427 DropTableStmt::DropTableStmt(const rapidjson::Value& payload) {
4428  CHECK(payload.HasMember("tableName"));
4429  table_ = std::make_unique<std::string>(json_str(payload["tableName"]));
4430 
4431  if_exists_ = false;
4432  if (payload.HasMember("ifExists")) {
4433  if_exists_ = json_bool(payload["ifExists"]);
4434  }
4435 }
4436 
4438  bool read_only_mode) {
4439  if (read_only_mode) {
4440  throw std::runtime_error("DROP TABLE invalid in read only mode.");
4441  }
4442  // Because we are able to acquire a unique_lock on the table descriptor to be dropped we
4443  // can get away with only using a shared_lock on the executor, as anything that will
4444  // touch the table being dropped with block, but other transactions are ok.
4445  const auto execute_read_lock =
4449  auto& catalog = session.getCatalog();
4450  const TableDescriptor* td{nullptr};
4451  std::unique_ptr<lockmgr::TableSchemaLockContainer<lockmgr::WriteLock>> td_with_lock;
4452  try {
4453  td_with_lock =
4454  std::make_unique<lockmgr::TableSchemaLockContainer<lockmgr::WriteLock>>(
4456  catalog, *table_, false));
4457  td = (*td_with_lock)();
4458  } catch (const std::runtime_error& e) {
4459  if (if_exists_) {
4460  return;
4461  } else {
4462  throw e;
4463  }
4464  }
4465 
4466  CHECK(td);
4467  CHECK(td_with_lock);
4468 
4469  // check access privileges
4470  if (!session.checkDBAccessPrivileges(
4472  throw std::runtime_error("Table " + *table_ +
4473  " will not be dropped. User has no proper privileges.");
4474  }
4475 
4477 
4478  {
4479  auto table_data_read_lock =
4481  Executor::clearExternalCaches(false, td, catalog.getCurrentDB().dbId);
4482  }
4483 
4484  auto table_data_write_lock =
4486  catalog.dropTable(td);
4487 }
4488 
4490  bool read_only_mode) {}
4491 
4492 std::unique_ptr<DDLStmt> AlterTableStmt::delegate(const rapidjson::Value& payload) {
4493  CHECK(payload.HasMember("tableName"));
4494  auto tableName = json_str(payload["tableName"]);
4495 
4496  CHECK(payload.HasMember("alterType"));
4497  auto type = json_str(payload["alterType"]);
4498 
4499  if (type == "RENAME_TABLE") {
4500  CHECK(payload.HasMember("newTableName"));
4501  auto newTableName = json_str(payload["newTableName"]);
4502  return std::unique_ptr<DDLStmt>(new Parser::RenameTableStmt(
4503  new std::string(tableName), new std::string(newTableName)));
4504 
4505  } else if (type == "RENAME_COLUMN") {
4506  CHECK(payload.HasMember("columnName"));
4507  auto columnName = json_str(payload["columnName"]);
4508  CHECK(payload.HasMember("newColumnName"));
4509  auto newColumnName = json_str(payload["newColumnName"]);
4510  return std::unique_ptr<DDLStmt>(
4511  new Parser::RenameColumnStmt(new std::string(tableName),
4512  new std::string(columnName),
4513  new std::string(newColumnName)));
4514 
4515  } else if (type == "ALTER_COLUMN") {
4516  CHECK(payload.HasMember("alterData"));
4517  CHECK(payload["alterData"].IsArray());
4518  throw std::runtime_error("ALTER TABLE ALTER COLUMN is not implemented.");
4519  } else if (type == "ADD_COLUMN") {
4520  CHECK(payload.HasMember("columnData"));
4521  CHECK(payload["columnData"].IsArray());
4522 
4523  // New Columns go into this list
4524  std::list<ColumnDef*>* table_element_list_ = new std::list<ColumnDef*>;
4525 
4526  const auto elements = payload["columnData"].GetArray();
4527  for (const auto& element : elements) {
4528  CHECK(element.IsObject());
4529  CHECK(element.HasMember("type"));
4530  if (json_str(element["type"]) == "SQL_COLUMN_DECLARATION") {
4531  auto col_def = column_from_json(element);
4532  table_element_list_->emplace_back(col_def.release());
4533  } else {
4534  LOG(FATAL) << "Unsupported element type for ALTER TABLE: "
4535  << element["type"].GetString();
4536  }
4537  }
4538 
4539  return std::unique_ptr<DDLStmt>(
4540  new Parser::AddColumnStmt(new std::string(tableName), table_element_list_));
4541 
4542  } else if (type == "DROP_COLUMN") {
4543  CHECK(payload.HasMember("columnData"));
4544  auto columnData = json_str(payload["columnData"]);
4545  // Convert columnData to std::list<std::string*>*
4546  // allocate std::list<> as DropColumnStmt will delete it;
4547  std::list<std::string*>* cols = new std::list<std::string*>;
4548  std::vector<std::string> cols1;
4549  boost::split(cols1, columnData, boost::is_any_of(","));
4550  for (auto s : cols1) {
4551  // strip leading/trailing spaces/quotes/single quotes
4552  boost::algorithm::trim_if(s, boost::is_any_of(" \"'`"));
4553  std::string* str = new std::string(s);
4554  cols->emplace_back(str);
4555  }
4556 
4557  return std::unique_ptr<DDLStmt>(
4558  new Parser::DropColumnStmt(new std::string(tableName), cols));
4559 
4560  } else if (type == "ALTER_OPTIONS") {
4561  CHECK(payload.HasMember("options"));
4562  const auto& options = payload["options"];
4563  if (options.IsObject()) {
4564  for (auto itr = options.MemberBegin(); itr != options.MemberEnd(); ++itr) {
4565  std::string* option_name = new std::string(json_str(itr->name));
4566  Literal* literal_value;
4567  if (itr->value.IsString()) {
4568  std::string literal_string = json_str(itr->value);
4569 
4570  // iff this string can be converted to INT
4571  // ... do so because it is necessary for AlterTableParamStmt
4572  std::size_t sz;
4573  int iVal = std::stoi(literal_string, &sz);
4574  if (sz == literal_string.size()) {
4575  literal_value = new IntLiteral(iVal);
4576  } else {
4577  literal_value = new StringLiteral(&literal_string);
4578  }
4579  } else if (itr->value.IsInt() || itr->value.IsInt64()) {
4580  literal_value = new IntLiteral(json_i64(itr->value));
4581  } else if (itr->value.IsNull()) {
4582  literal_value = new NullLiteral();
4583  } else {
4584  throw std::runtime_error("Unable to handle literal for " + *option_name);
4585  }
4586  CHECK(literal_value);
4587 
4588  NameValueAssign* nv = new NameValueAssign(option_name, literal_value);
4589  return std::unique_ptr<DDLStmt>(
4590  new Parser::AlterTableParamStmt(new std::string(tableName), nv));
4591  }
4592  } else {
4593  CHECK(options.IsNull());
4594  }
4595  }
4596  return nullptr;
4597 }
4598 
4599 TruncateTableStmt::TruncateTableStmt(const rapidjson::Value& payload) {
4600  CHECK(payload.HasMember("tableName"));
4601  table_ = std::make_unique<std::string>(json_str(payload["tableName"]));
4602 }
4603 
4605  bool read_only_mode) {
4606  if (read_only_mode) {
4607  throw std::runtime_error("TRUNCATE TABLE invalid in read only mode.");
4608  }
4609  const auto execute_read_lock =
4613  auto& catalog = session.getCatalog();
4614  const auto td_with_lock =
4616  catalog, *table_, true);
4617  const auto td = td_with_lock();
4618  if (!td) {
4619  throw std::runtime_error("Table " + *table_ + " does not exist.");
4620  }
4621 
4622  // check access privileges
4623  std::vector<DBObject> privObjects;
4624  DBObject dbObject(*table_, TableDBObjectType);
4625  dbObject.loadKey(catalog);
4627  privObjects.push_back(dbObject);
4628  if (!SysCatalog::instance().checkPrivileges(session.get_currentUser(), privObjects)) {
4629  throw std::runtime_error("Table " + *table_ + " will not be truncated. User " +
4630  session.get_currentUser().userLoggable() +
4631  " has no proper privileges.");
4632  }
4633 
4634  if (td->isView) {
4635  throw std::runtime_error(*table_ + " is a view. Cannot Truncate.");
4636  }
4638 
4639  // invalidate cached item
4640  {
4641  auto table_data_read_lock =
4643  Executor::clearExternalCaches(false, td, catalog.getCurrentDB().dbId);
4644  }
4645 
4646  auto table_data_write_lock =
4648  catalog.truncateTable(td);
4649 }
4650 
4651 OptimizeTableStmt::OptimizeTableStmt(const rapidjson::Value& payload) {
4652  CHECK(payload.HasMember("tableName"));
4653  table_ = std::make_unique<std::string>(json_str(payload["tableName"]));
4654  parse_options(payload, options_);
4655 }
4656 
4657 namespace {
4659  const TableDescriptor* td,
4660  const AccessPrivileges access_priv) {
4661  CHECK(td);
4662  auto& cat = session_info.getCatalog();
4663  std::vector<DBObject> privObjects;
4664  DBObject dbObject(td->tableName, TableDBObjectType);
4665  dbObject.loadKey(cat);
4666  dbObject.setPrivileges(access_priv);
4667  privObjects.push_back(dbObject);
4668  return SysCatalog::instance().checkPrivileges(session_info.get_currentUser(),
4669  privObjects);
4670 };
4671 } // namespace
4672 
4674  bool read_only_mode) {
4675  if (read_only_mode) {
4676  throw std::runtime_error("OPTIMIZE TABLE invalid in read only mode.");
4677  }
4678  auto& catalog = session.getCatalog();
4679 
4680  const auto execute_read_lock =
4684 
4685  const auto td_with_lock =
4687  catalog, *table_);
4688  const auto td = td_with_lock();
4689 
4690  if (!td || !user_can_access_table(session, td, AccessPrivileges::DELETE_FROM_TABLE)) {
4691  throw std::runtime_error("Table " + *table_ + " does not exist.");
4692  }
4693 
4694  if (td->isView) {
4695  throw std::runtime_error("OPTIMIZE TABLE command is not supported on views.");
4696  }
4697 
4698  // invalidate cached item
4699  std::vector<int> table_key{catalog.getCurrentDB().dbId, td->tableId};
4700  ResultSetCacheInvalidator::invalidateCachesByTable(boost::hash_value(table_key));
4701 
4703  const TableOptimizer optimizer(td, executor, catalog);
4704  if (shouldVacuumDeletedRows()) {
4705  optimizer.vacuumDeletedRows();
4706  }
4707  optimizer.recomputeMetadata();
4708 }
4709 
4710 bool repair_type(std::list<std::unique_ptr<NameValueAssign>>& options) {
4711  for (const auto& opt : options) {
4712  if (boost::iequals(*opt->get_name(), "REPAIR_TYPE")) {
4713  const auto repair_type =
4714  static_cast<const StringLiteral*>(opt->get_value())->get_stringval();
4715  CHECK(repair_type);
4716  if (boost::iequals(*repair_type, "REMOVE")) {
4717  return true;
4718  } else {
4719  throw std::runtime_error("REPAIR_TYPE must be REMOVE.");
4720  }
4721  } else {
4722  throw std::runtime_error("The only VALIDATE WITH options is REPAIR_TYPE.");
4723  }
4724  }
4725  return false;
4726 }
4727 
4728 ValidateStmt::ValidateStmt(std::string* type, std::list<NameValueAssign*>* with_opts)
4729  : type_(type) {
4730  if (!type) {
4731  throw std::runtime_error("Validation Type is required for VALIDATE command.");
4732  }
4733  std::list<std::unique_ptr<NameValueAssign>> options;
4734  if (with_opts) {
4735  for (const auto e : *with_opts) {
4736  options.emplace_back(e);
4737  }
4738  delete with_opts;
4739 
4740  isRepairTypeRemove_ = repair_type(options);
4741  }
4742 }
4743 
4744 ValidateStmt::ValidateStmt(const rapidjson::Value& payload) {
4745  CHECK(payload.HasMember("type"));
4746  type_ = std::make_unique<std::string>(json_str(payload["type"]));
4747 
4748  std::list<std::unique_ptr<NameValueAssign>> options;
4749  parse_options(payload, options);
4750 
4751  isRepairTypeRemove_ = repair_type(options);
4752 }
4753 
4755  const TableDescriptor* td) {
4756  if (session.get_currentUser().isSuper ||
4757  session.get_currentUser().userId == td->userId) {
4758  return;
4759  }
4760  std::vector<DBObject> privObjects;
4761  DBObject dbObject(td->tableName, TableDBObjectType);
4762  dbObject.loadKey(session.getCatalog());
4764  privObjects.push_back(dbObject);
4765  if (!SysCatalog::instance().checkPrivileges(session.get_currentUser(), privObjects)) {
4766  throw std::runtime_error("Current user does not have the privilege to alter table: " +
4767  td->tableName);
4768  }
4769 }
4770 
4771 RenameUserStmt::RenameUserStmt(const rapidjson::Value& payload) {
4772  CHECK(payload.HasMember("name"));
4773  username_ = std::make_unique<std::string>(json_str(payload["name"]));
4774  CHECK(payload.HasMember("newName"));
4775  new_username_ = std::make_unique<std::string>(json_str(payload["newName"]));
4776 }
4777 
4779  bool read_only_mode) {
4780  if (read_only_mode) {
4781  throw std::runtime_error("RENAME TABLE invalid in read only mode.");
4782  }
4783  if (!session.get_currentUser().isSuper) {
4784  throw std::runtime_error("Only a super user can rename users.");
4785  }
4786 
4788  if (!SysCatalog::instance().getMetadataForUser(*username_, user)) {
4789  throw std::runtime_error("User " + *username_ + " does not exist.");
4790  }
4791 
4792  SysCatalog::instance().renameUser(*username_, *new_username_);
4793 }
4794 
4795 RenameDBStmt::RenameDBStmt(const rapidjson::Value& payload) {
4796  CHECK(payload.HasMember("name"));
4797  database_name_ = std::make_unique<std::string>(json_str(payload["name"]));
4798  CHECK(payload.HasMember("newName"));
4799  new_database_name_ = std::make_unique<std::string>(json_str(payload["newName"]));
4800 }
4801 
4803  bool read_only_mode) {
4804  if (read_only_mode) {
4805  throw std::runtime_error("RENAME DATABASE invalid in read only mode.");
4806  }
4808 
4809  // TODO: use database lock instead
4810  const auto execute_write_lock =
4814 
4815  if (!SysCatalog::instance().getMetadataForDB(*database_name_, db)) {
4816  throw std::runtime_error("Database " + *database_name_ + " does not exist.");
4817  }
4818 
4819  if (!session.get_currentUser().isSuper &&
4820  session.get_currentUser().userId != db.dbOwner) {
4821  throw std::runtime_error("Only a super user or the owner can rename the database.");
4822  }
4823 
4824  SysCatalog::instance().renameDatabase(*database_name_, *new_database_name_);
4825 }
4826 
4827 RenameTableStmt::RenameTableStmt(const rapidjson::Value& payload) {
4828  CHECK(payload.HasMember("tableNames"));
4829  CHECK(payload["tableNames"].IsArray());
4830  const auto elements = payload["tableNames"].GetArray();
4831  for (const auto& element : elements) {
4832  CHECK(element.HasMember("name"));
4833  CHECK(element.HasMember("newName"));
4834  tablesToRename_.emplace_back(new std::string(json_str(element["name"])),
4835  new std::string(json_str(element["newName"])));
4836  }
4837 }
4838 
4839 RenameTableStmt::RenameTableStmt(std::string* tab_name, std::string* new_tab_name) {
4840  tablesToRename_.emplace_back(tab_name, new_tab_name);
4841 }
4842 
4844  std::list<std::pair<std::string, std::string>> tableNames) {
4845  for (auto item : tableNames) {
4846  tablesToRename_.emplace_back(new std::string(item.first),
4847  new std::string(item.second));
4848  }
4849 }
4850 
4851 using SubstituteMap = std::map<std::string, std::string>;
4852 
4853 // Namespace fns used to track a left-to-right execution of RENAME TABLE
4854 // and verify that the command should be (entirely/mostly) valid
4855 //
4856 namespace {
4857 
4858 static constexpr char const* EMPTY_NAME{""};
4859 
4860 std::string generateUniqueTableName(std::string name) {
4861  // TODO - is there a "better" way to create a tmp name for the table
4862  std::time_t result = std::time(nullptr);
4863  return name + "_tmp" + std::to_string(result);
4864 }
4865 
4866 void recordRename(SubstituteMap& sMap, std::string oldName, std::string newName) {
4867  sMap[oldName] = newName;
4868 }
4869 
4871  SubstituteMap& sMap,
4872  std::string tableName) {
4873  if (sMap.find(tableName) != sMap.end()) {
4874  if (sMap[tableName] == EMPTY_NAME) {
4875  return tableName;
4876  }
4877  return sMap[tableName];
4878  } else {
4879  // lookup table in src catalog
4880  const TableDescriptor* td = catalog.getMetadataForTable(tableName);
4881  if (td) {
4882  sMap[tableName] = tableName;
4883  } else {
4884  sMap[tableName] = EMPTY_NAME;
4885  }
4886  }
4887  return tableName;
4888 }
4889 
4890 bool hasData(SubstituteMap& sMap, std::string tableName) {
4891  // assumes loadTable has been previously called
4892  return (sMap[tableName] != EMPTY_NAME);
4893 }
4894 
4896  // Substition map should be clean at end of rename:
4897  // all items in map must (map to self) or (map to EMPTY_STRING) by end
4898 
4899  for (auto it : sMap) {
4900  if ((it.second) != EMPTY_NAME && (it.first) != (it.second)) {
4901  throw std::runtime_error(
4902  "Error: Attempted to overwrite and lose data in table: \'" + (it.first) + "\'");
4903  }
4904  }
4905 }
4906 } // namespace
4907 
4908 namespace {
4911  throw std::runtime_error(td->tableName + " is a foreign table. " +
4912  "Use ALTER FOREIGN TABLE.");
4913  }
4914 }
4915 } // namespace
4916 
4918  bool read_only_mode) {
4919  if (read_only_mode) {
4920  throw std::runtime_error("RENAME TABLE invalid in read only mode.");
4921  }
4922  auto& catalog = session.getCatalog();
4923 
4924  // TODO(adb): the catalog should be handling this locking (see AddColumStmt)
4925  const auto execute_write_lock =
4929 
4930  // accumulated vector of table names: oldName->newName
4931  std::vector<std::pair<std::string, std::string>> names;
4932 
4933  SubstituteMap tableSubtituteMap;
4934 
4935  for (auto& item : tablesToRename_) {
4936  std::string curTableName = *(item.first);
4937  std::string newTableName = *(item.second);
4938 
4939  // Note: if rename (a->b, b->a)
4940  // requires a tmp name change (a->tmp, b->a, tmp->a),
4941  // inject that here because
4942  // catalog.renameTable() assumes cleanliness else will fail
4943 
4944  std::string altCurTableName = loadTable(catalog, tableSubtituteMap, curTableName);
4945  std::string altNewTableName = loadTable(catalog, tableSubtituteMap, newTableName);
4946 
4947  if (altCurTableName != curTableName && altCurTableName != EMPTY_NAME) {
4948  // rename is a one-shot deal, reset the mapping once used
4949  recordRename(tableSubtituteMap, curTableName, curTableName);
4950  }
4951 
4952  // Check to see if the command (as-entered) will likely execute cleanly (logic-wise)
4953  // src tables exist before coping from
4954  // destination table collisions
4955  // handled (a->b, b->a)
4956  // or flagged (pre-existing a,b ... "RENAME TABLE a->c, b->c" )
4957  // handle mulitple chained renames, tmp names (a_>tmp, b->a, tmp->a)
4958  // etc.
4959  //
4960  if (hasData(tableSubtituteMap, altCurTableName)) {
4961  const TableDescriptor* td = catalog.getMetadataForTable(altCurTableName);
4962  if (td) {
4963  // Tables *and* views may be renamed here, foreign tables not
4964  // -> just block foreign tables
4966  check_alter_table_privilege(session, td);
4967  }
4968 
4969  if (hasData(tableSubtituteMap, altNewTableName)) {
4970  std::string tmpNewTableName = generateUniqueTableName(altNewTableName);
4971  // rename: newTableName to tmpNewTableName to get it out of the way
4972  // because it was full
4973  recordRename(tableSubtituteMap, altCurTableName, EMPTY_NAME);
4974  recordRename(tableSubtituteMap, altNewTableName, tmpNewTableName);
4975  recordRename(tableSubtituteMap, tmpNewTableName, tmpNewTableName);
4976  names.emplace_back(altNewTableName, tmpNewTableName);
4977  names.emplace_back(altCurTableName, altNewTableName);
4978  } else {
4979  // rename: curNewTableName to newTableName
4980  recordRename(tableSubtituteMap, altCurTableName, EMPTY_NAME);
4981  recordRename(tableSubtituteMap, altNewTableName, altNewTableName);
4982  names.emplace_back(altCurTableName, altNewTableName);
4983  }
4984  } else {
4985  throw std::runtime_error("Source table \'" + curTableName + "\' does not exist.");
4986  }
4987  }
4988  checkNameSubstition(tableSubtituteMap);
4989 
4990  catalog.renameTable(names);
4991 
4992  // just to be explicit, clean out the list, the unique_ptr will delete
4993  while (!tablesToRename_.empty()) {
4994  tablesToRename_.pop_front();
4995  }
4996 } // namespace Parser
4997 
4999  bool not_null;
5000  const ColumnConstraintDef* cc = coldef->get_column_constraint();
5001  if (cc == nullptr) {
5002  not_null = false;
5003  } else {
5004  not_null = cc->get_notnull();
5005  }
5006  std::string default_value;
5007  const std::string* default_value_ptr = nullptr;
5008  if (cc) {
5009  if (auto def_val_literal = cc->get_defaultval()) {
5010  auto defaultsp = dynamic_cast<const StringLiteral*>(def_val_literal);
5011  default_value =
5012  defaultsp ? *defaultsp->get_stringval() : def_val_literal->to_string();
5013  // The preprocessing below is needed because:
5014  // a) TypedImportBuffer expects arrays in the {...} format
5015  // b) TypedImportBuffer expects string literals inside arrays w/o any quotes
5016  if (coldef->get_column_type()->get_is_array()) {
5017  std::regex array_re(R"(^ARRAY\s*\[(.*)\]$)", std::regex_constants::icase);
5018  default_value = std::regex_replace(default_value, array_re, "{$1}");
5019  boost::erase_all(default_value, "\'");
5020  }
5021  default_value_ptr = &default_value;
5022  }
5023  }
5025  cd,
5026  coldef->get_column_type(),
5027  not_null,
5028  coldef->get_compression(),
5029  default_value_ptr);
5030 }
5031 
5033  const TableDescriptor* td) {
5034  auto& catalog = session.getCatalog();
5035  if (!td) {
5036  throw std::runtime_error("Table " + *table_ + " does not exist.");
5037  } else {
5038  if (td->isView) {
5039  throw std::runtime_error("Adding columns to a view is not supported.");
5040  }
5042  if (table_is_temporary(td)) {
5043  throw std::runtime_error(
5044  "Adding columns to temporary tables is not yet supported.");
5045  }
5046  }
5047 
5048  check_alter_table_privilege(session, td);
5049 
5050  if (0 == coldefs_.size()) {
5051  coldefs_.push_back(std::move(coldef_));
5052  }
5053 
5054  for (const auto& coldef : coldefs_) {
5055  auto& new_column_name = *coldef->get_column_name();
5056  if (catalog.getMetadataForColumn(td->tableId, new_column_name) != nullptr) {
5057  throw std::runtime_error("Column " + new_column_name + " already exists.");
5058  }
5059  }
5060 }
5061 
5063  bool read_only_mode) {
5064  if (read_only_mode) {
5065  throw std::runtime_error("ADD COLUMN invalid in read only mode.");
5066  }
5067  // TODO: Review add and drop column implementation
5068  const auto execute_write_lock =
5072  auto& catalog = session.getCatalog();
5073  const auto td_with_lock =
5075  catalog, *table_, true);
5076  const auto td = td_with_lock();
5077 
5078  check_executable(session, td);
5079 
5080  CHECK(td->fragmenter);
5081  if (std::dynamic_pointer_cast<Fragmenter_Namespace::SortedOrderFragmenter>(
5082  td->fragmenter)) {
5083  throw std::runtime_error(
5084  "Adding columns to a table is not supported when using the \"sort_column\" "
5085  "option.");
5086  }
5087 
5088  // invalidate cached item
5089  std::vector<int> table_key{catalog.getCurrentDB().dbId, td->tableId};
5090  ResultSetCacheInvalidator::invalidateCachesByTable(boost::hash_value(table_key));
5091 
5092  // Do not take a data write lock, as the fragmenter may call `deleteFragments`
5093  // during a cap operation. Note that the schema write lock will prevent concurrent
5094  // inserts along with all other queries.
5095 
5096  catalog.getSqliteConnector().query("BEGIN TRANSACTION");
5097  try {
5098  std::map<const std::string, const ColumnDescriptor> cds;
5099  std::map<const int, const ColumnDef*> cid_coldefs;
5100  for (const auto& coldef : coldefs_) {
5101  ColumnDescriptor cd;
5102  setColumnDescriptor(cd, coldef.get());
5103  catalog.addColumn(*td, cd);
5104  cds.emplace(*coldef->get_column_name(), cd);
5105  cid_coldefs.emplace(cd.columnId, coldef.get());
5106 
5107  // expand geo column to phy columns
5108  if (cd.columnType.is_geometry()) {
5109  std::list<ColumnDescriptor> phy_geo_columns;
5110  catalog.expandGeoColumn(cd, phy_geo_columns);
5111  for (auto& cd : phy_geo_columns) {
5112  catalog.addColumn(*td, cd);
5113  cds.emplace(cd.columnName, cd);
5114  cid_coldefs.emplace(cd.columnId, nullptr);
5115  }
5116  }
5117  }
5118 
5119  std::unique_ptr<import_export::Loader> loader(new import_export::Loader(catalog, td));
5120  std::vector<std::unique_ptr<import_export::TypedImportBuffer>> import_buffers;
5121  for (const auto& cd : cds) {
5122  import_buffers.emplace_back(std::make_unique<import_export::TypedImportBuffer>(
5123  &cd.second, loader->getStringDict(&cd.second)));
5124  }
5125  loader->setAddingColumns(true);
5126 
5127  // set_geo_physical_import_buffer below needs a sorted import_buffers
5128  std::sort(import_buffers.begin(),
5129  import_buffers.end(),
5130  [](decltype(import_buffers[0])& a, decltype(import_buffers[0])& b) {
5131  return a->getColumnDesc()->columnId < b->getColumnDesc()->columnId;
5132  });
5133 
5134  size_t nrows = td->fragmenter->getNumRows();
5135  // if sharded, get total nrows from all sharded tables
5136  if (td->nShards > 0) {
5137  const auto physical_tds = catalog.getPhysicalTablesDescriptors(td);
5138  nrows = 0;
5139  std::for_each(physical_tds.begin(), physical_tds.end(), [&nrows](const auto& td) {
5140  nrows += td->fragmenter->getNumRows();
5141  });
5142  }
5143  if (nrows > 0) {
5144  int skip_physical_cols = 0;
5145  for (const auto cit : cid_coldefs) {
5146  const auto cd = catalog.getMetadataForColumn(td->tableId, cit.first);
5147  const auto coldef = cit.second;
5148  const bool is_null = !cd->default_value.has_value();
5149 
5150  if (cd->columnType.get_notnull() && is_null) {
5151  throw std::runtime_error("Default value required for column " + cd->columnName +
5152  " because of NOT NULL constraint");
5153  }
5154 
5155  for (auto it = import_buffers.begin(); it < import_buffers.end(); ++it) {
5156  auto& import_buffer = *it;
5157  if (cd->columnId == import_buffer->getColumnDesc()->columnId) {
5158  if (coldef != nullptr ||
5159  skip_physical_cols-- <= 0) { // skip non-null phy col
5160  import_buffer->add_value(cd,
5161  cd->default_value.value_or("NULL"),
5162  is_null,
5164  if (cd->columnType.is_geometry()) {
5165  std::vector<double> coords, bounds;
5166  std::vector<int> ring_sizes, poly_rings;
5167  int render_group = 0;
5168  SQLTypeInfo tinfo{cd->columnType};
5170  cd->default_value.value_or("NULL"),
5171  tinfo,
5172  coords,
5173  bounds,
5174  ring_sizes,
5175  poly_rings,
5176  false)) {
5177  throw std::runtime_error("Bad geometry data: '" +
5178  cd->default_value.value_or("NULL") + "'");
5179  }
5180  size_t col_idx = 1 + std::distance(import_buffers.begin(), it);
5182  cd,
5183  import_buffers,
5184  col_idx,
5185  coords,
5186  bounds,
5187  ring_sizes,
5188  poly_rings,
5189  render_group);
5190  // skip following phy cols
5191  skip_physical_cols = cd->columnType.get_physical_cols();
5192  }
5193  }
5194  break;
5195  }
5196  }
5197  }
5198  }
5199 
5200  if (!loader->loadNoCheckpoint(import_buffers, nrows, &session)) {
5201  throw std::runtime_error("loadNoCheckpoint failed!");
5202  }
5203  catalog.roll(true);
5204  catalog.resetTableEpochFloor(td->tableId);
5205  loader->checkpoint();
5206  catalog.getSqliteConnector().query("END TRANSACTION");
5207  } catch (...) {
5208  catalog.roll(false);
5209  catalog.getSqliteConnector().query("ROLLBACK TRANSACTION");
5210  throw;
5211  }
5212 }
5213 
5215  bool read_only_mode) {
5216  if (read_only_mode) {
5217  throw std::runtime_error("DROP COLUMN invalid in read only mode.");
5218  }
5219  // TODO: Review add and drop column implementation
5220  const auto execute_write_lock =
5224  auto& catalog = session.getCatalog();
5225  const auto td_with_lock =
5227  catalog, *table_, true);
5228  const auto td = td_with_lock();
5229  if (!td) {
5230  throw std::runtime_error("Table " + *table_ + " does not exist.");
5231  }
5233  if (td->isView) {
5234  throw std::runtime_error("Dropping a column from a view is not supported.");
5235  }
5236  if (table_is_temporary(td)) {
5237  throw std::runtime_error(
5238  "Dropping a column from a temporary table is not yet supported.");
5239  }
5240 
5241  check_alter_table_privilege(session, td);
5242 
5243  for (const auto& column : columns_) {
5244  if (nullptr == catalog.getMetadataForColumn(td->tableId, *column)) {
5245  throw std::runtime_error("Column " + *column + " does not exist.");
5246  }
5247  }
5248 
5249  if (td->nColumns <= (td->hasDeletedCol ? 3 : 2)) {
5250  throw std::runtime_error("Table " + *table_ + " has only one column.");
5251  }
5252 
5253  // invalidate cached item
5254  Executor::clearExternalCaches(false, td, catalog.getCurrentDB().dbId);
5255 
5256  catalog.getSqliteConnector().query("BEGIN TRANSACTION");
5257  try {
5258  std::vector<int> columnIds;
5259  for (const auto& column : columns_) {
5260  ColumnDescriptor cd = *catalog.getMetadataForColumn(td->tableId, *column);
5261  if (td->nShards > 0 && td->shardedColumnId == cd.columnId) {
5262  throw std::runtime_error("Dropping sharding column " + cd.columnName +
5263  " is not supported.");
5264  }
5265  catalog.dropColumn(*td, cd);
5266  columnIds.push_back(cd.columnId);
5267  for (int i = 0; i < cd.columnType.get_physical_cols(); i++) {
5268  const auto pcd = catalog.getMetadataForColumn(td->tableId, cd.columnId + i + 1);
5269  CHECK(pcd);
5270  catalog.dropColumn(*td, *pcd);
5271  columnIds.push_back(cd.columnId + i + 1);
5272  }
5273  }
5274 
5275  for (auto shard : catalog.getPhysicalTablesDescriptors(td)) {
5276  shard->fragmenter->dropColumns(columnIds);
5277  }
5278  // if test forces to rollback
5280  throw std::runtime_error("lol!");
5281  }
5282  catalog.roll(true);
5284  catalog.resetTableEpochFloor(td->tableId);
5285  catalog.checkpoint(td->tableId);
5286  }
5287  catalog.getSqliteConnector().query("END TRANSACTION");
5288  } catch (...) {
5289  catalog.setForReload(td->tableId);
5290  catalog.roll(false);
5291  catalog.getSqliteConnector().query("ROLLBACK TRANSACTION");
5292  throw;
5293  }
5294 }
5295 
5297  bool read_only_mode) {
5298  if (read_only_mode) {
5299  throw std::runtime_error("RENAME COLUMN invalid in read only mode.");
5300  }
5301  auto& catalog = session.getCatalog();
5302 
5303  const auto execute_read_lock =
5307 
5308  const auto td_with_lock =
5310  catalog, *table_, false);
5311  const auto td = td_with_lock();
5312  CHECK(td);
5314 
5315  check_alter_table_privilege(session, td);
5316  const ColumnDescriptor* cd = catalog.getMetadataForColumn(td->tableId, *column_);
5317  if (cd == nullptr) {
5318  throw std::runtime_error("Column " + *column_ + " does not exist.");
5319  }
5320  if (catalog.getMetadataForColumn(td->tableId, *new_column_name_) != nullptr) {
5321  throw std::runtime_error("Column " + *new_column_name_ + " already exists.");
5322  }
5323  catalog.renameColumn(td, cd, *new_column_name_);
5324 }
5325 
5327  bool read_only_mode) {
5328  if (read_only_mode) {
5329  throw std::runtime_error("ALTER TABLE invalid in read only mode.");
5330  }
5331  enum TableParamType { MaxRollbackEpochs, Epoch, MaxRows };
5332  static const std::unordered_map<std::string, TableParamType> param_map = {
5333  {"max_rollback_epochs", TableParamType::MaxRollbackEpochs},
5334  {"epoch", TableParamType::Epoch},
5335  {"max_rows", TableParamType::MaxRows}};
5336  const auto execute_read_lock =
5340  auto& catalog = session.getCatalog();
5341  const auto td_with_lock =
5343  catalog, *table_, false);
5344  const auto td = td_with_lock();
5345  if (!td) {
5346  throw std::runtime_error("Table " + *table_ + " does not exist.");
5347  }
5348  if (td->isView) {
5349  throw std::runtime_error("Setting parameters for a view is not supported.");
5350  }
5351  if (table_is_temporary(td)) {
5352  throw std::runtime_error(
5353  "Setting parameters for a temporary table is not yet supported.");
5354  }
5355  check_alter_table_privilege(session, td);
5356 
5357  // invalidate cached item
5358  std::vector<int> table_key{catalog.getCurrentDB().dbId, td->tableId};
5359  ResultSetCacheInvalidator::invalidateCachesByTable(boost::hash_value(table_key));
5360 
5361  std::string param_name(*param_->get_name());
5362  boost::algorithm::to_lower(param_name);
5363  const IntLiteral* val_int_literal =
5364  dynamic_cast<const IntLiteral*>(param_->get_value());
5365  if (val_int_literal == nullptr) {
5366  throw std::runtime_error("Table parameters should be integers.");
5367  }
5368  const int64_t param_val = val_int_literal->get_intval();
5369 
5370  const auto param_it = param_map.find(param_name);
5371  if (param_it == param_map.end()) {
5372  throw std::runtime_error(param_name + " is not a settable table parameter.");
5373  }
5374  switch (param_it->second) {
5375  case MaxRollbackEpochs: {
5376  catalog.setMaxRollbackEpochs(td->tableId, param_val);
5377  break;
5378  }
5379  case Epoch: {
5380  catalog.setTableEpoch(catalog.getDatabaseId(), td->tableId, param_val);
5381  break;
5382  }
5383  case MaxRows: {
5384  catalog.setMaxRows(td->tableId, param_val);
5385  break;
5386  }
5387  default: {
5388  UNREACHABLE() << "Unexpected TableParamType value: " << param_it->second
5389  << ", key: " << param_it->first;
5390  }
5391  }
5392 }
5393 
5395  std::string* f,
5396  std::list<NameValueAssign*>* o)
5397  : table_(t), copy_from_source_pattern_(f), success_(true) {
5398  if (o) {
5399  for (const auto e : *o) {
5400  options_.emplace_back(e);
5401  }
5402  delete o;