OmniSciDB  ca0c39ec8f
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ParserNode.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
23 #include "ParserNode.h"
24 
25 #include <boost/algorithm/string.hpp>
26 #include <boost/core/null_deleter.hpp>
27 #include <boost/filesystem.hpp>
28 #include <boost/function.hpp>
29 
30 #include <rapidjson/document.h>
31 #include <rapidjson/stringbuffer.h>
32 #include <rapidjson/writer.h>
33 
34 #include <cassert>
35 #include <cmath>
36 #include <limits>
37 #include <random>
38 #include <regex>
39 #include <stdexcept>
40 #include <string>
41 #include <type_traits>
42 #include <typeinfo>
43 
45 #include "Catalog/Catalog.h"
51 #include "Geospatial/Compression.h"
52 #include "Geospatial/Types.h"
54 #include "ImportExport/Importer.h"
56 #include "LockMgr/LockMgr.h"
60 #include "QueryEngine/Execute.h"
65 #include "ReservedKeywords.h"
66 #include "Shared/StringTransform.h"
67 #include "Shared/SysDefinitions.h"
69 #include "Shared/measure.h"
70 #include "Shared/shard_key.h"
72 #include "Utils/FsiUtils.h"
73 
74 #include "gen-cpp/CalciteServer.h"
75 
76 size_t g_leaf_count{0};
78 extern bool g_enable_string_functions;
79 extern bool g_enable_fsi;
80 
82 #ifdef ENABLE_IMPORT_PARQUET
83 bool g_enable_legacy_parquet_import{false};
84 #endif
86 
88 
90 using namespace std::string_literals;
91 
92 using TableDefFuncPtr = boost::function<void(TableDescriptor&,
94  const std::list<ColumnDescriptor>& columns)>;
95 
96 using DataframeDefFuncPtr =
97  boost::function<void(DataframeTableDescriptor&,
99  const std::list<ColumnDescriptor>& columns)>;
100 
101 namespace Parser {
102 bool check_session_interrupted(const QuerySessionId& query_session, Executor* executor) {
103  // we call this function with unitary executor but is okay since
104  // we know the exact session info from a global session map object
105  // in the executor
108  executor->getSessionLock());
109  return executor->checkIsQuerySessionInterrupted(query_session, session_read_lock);
110  }
111  return false;
112 }
113 
114 std::vector<int> getTableChunkKey(const TableDescriptor* td,
115  Catalog_Namespace::Catalog& catalog) {
116  std::vector<int> table_chunk_key_prefix;
117  if (td) {
118  if (td->fragmenter) {
119  table_chunk_key_prefix = td->fragmenter->getFragmentsForQuery().chunkKeyPrefix;
120  } else {
121  table_chunk_key_prefix.push_back(catalog.getCurrentDB().dbId);
122  table_chunk_key_prefix.push_back(td->tableId);
123  }
124  }
125  return table_chunk_key_prefix;
126 }
127 
128 std::shared_ptr<Analyzer::Expr> NullLiteral::analyze(
129  const Catalog_Namespace::Catalog& catalog,
130  Analyzer::Query& query,
131  TlistRefType allow_tlist_ref) const {
132  return makeExpr<Analyzer::Constant>(kNULLT, true);
133 }
134 
135 std::shared_ptr<Analyzer::Expr> StringLiteral::analyze(
136  const Catalog_Namespace::Catalog& catalog,
137  Analyzer::Query& query,
138  TlistRefType allow_tlist_ref) const {
139  return analyzeValue(*stringval_, false);
140 }
141 
142 std::shared_ptr<Analyzer::Expr> StringLiteral::analyzeValue(const std::string& stringval,
143  const bool is_null) {
144  if (!is_null) {
145  const SQLTypeInfo ti(kVARCHAR, stringval.length(), 0, true);
146  Datum d;
147  d.stringval = new std::string(stringval);
148  return makeExpr<Analyzer::Constant>(ti, false, d);
149  }
150  // Null value
151  return makeExpr<Analyzer::Constant>(kVARCHAR, true);
152 }
153 
154 std::shared_ptr<Analyzer::Expr> IntLiteral::analyze(
155  const Catalog_Namespace::Catalog& catalog,
156  Analyzer::Query& query,
157  TlistRefType allow_tlist_ref) const {
158  return analyzeValue(intval_);
159 }
160 
161 std::shared_ptr<Analyzer::Expr> IntLiteral::analyzeValue(const int64_t intval) {
162  SQLTypes t;
163  Datum d;
164  if (intval >= INT16_MIN && intval <= INT16_MAX) {
165  t = kSMALLINT;
166  d.smallintval = (int16_t)intval;
167  } else if (intval >= INT32_MIN && intval <= INT32_MAX) {
168  t = kINT;
169  d.intval = (int32_t)intval;
170  } else {
171  t = kBIGINT;
172  d.bigintval = intval;
173  }
174  return makeExpr<Analyzer::Constant>(t, false, d);
175 }
176 
177 std::shared_ptr<Analyzer::Expr> FixedPtLiteral::analyze(
178  const Catalog_Namespace::Catalog& catalog,
179  Analyzer::Query& query,
180  TlistRefType allow_tlist_ref) const {
181  SQLTypeInfo ti(kNUMERIC, 0, 0, false);
182  Datum d = StringToDatum(*fixedptval_, ti);
183  return makeExpr<Analyzer::Constant>(ti, false, d);
184 }
185 
186 std::shared_ptr<Analyzer::Expr> FixedPtLiteral::analyzeValue(const int64_t numericval,
187  const int scale,
188  const int precision) {
189  SQLTypeInfo ti(kNUMERIC, 0, 0, false);
190  ti.set_scale(scale);
191  ti.set_precision(precision);
192  Datum d;
193  d.bigintval = numericval;
194  return makeExpr<Analyzer::Constant>(ti, false, d);
195 }
196 
197 std::shared_ptr<Analyzer::Expr> FloatLiteral::analyze(
198  const Catalog_Namespace::Catalog& catalog,
199  Analyzer::Query& query,
200  TlistRefType allow_tlist_ref) const {
201  Datum d;
202  d.floatval = floatval_;
203  return makeExpr<Analyzer::Constant>(kFLOAT, false, d);
204 }
205 
206 std::shared_ptr<Analyzer::Expr> DoubleLiteral::analyze(
207  const Catalog_Namespace::Catalog& catalog,
208  Analyzer::Query& query,
209  TlistRefType allow_tlist_ref) const {
210  Datum d;
211  d.doubleval = doubleval_;
212  return makeExpr<Analyzer::Constant>(kDOUBLE, false, d);
213 }
214 
215 std::shared_ptr<Analyzer::Expr> TimestampLiteral::analyze(
216  const Catalog_Namespace::Catalog& catalog,
217  Analyzer::Query& query,
218  TlistRefType allow_tlist_ref) const {
219  return get(timestampval_);
220 }
221 
222 std::shared_ptr<Analyzer::Expr> TimestampLiteral::get(const int64_t timestampval) {
223  Datum d;
224  d.bigintval = timestampval;
225  return makeExpr<Analyzer::Constant>(kTIMESTAMP, false, d);
226 }
227 
228 std::shared_ptr<Analyzer::Expr> UserLiteral::analyze(
229  const Catalog_Namespace::Catalog& catalog,
230  Analyzer::Query& query,
231  TlistRefType allow_tlist_ref) const {
232  Datum d;
233  return makeExpr<Analyzer::Constant>(kTEXT, false, d);
234 }
235 
236 std::shared_ptr<Analyzer::Expr> UserLiteral::get(const std::string& user) {
237  Datum d;
238  d.stringval = new std::string(user);
239  return makeExpr<Analyzer::Constant>(kTEXT, false, d);
240 }
241 
242 std::shared_ptr<Analyzer::Expr> ArrayLiteral::analyze(
243  const Catalog_Namespace::Catalog& catalog,
244  Analyzer::Query& query,
245  TlistRefType allow_tlist_ref) const {
246  SQLTypeInfo ti = SQLTypeInfo(kARRAY, true);
247  bool set_subtype = true;
248  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
249  for (auto& p : value_list_) {
250  auto e = p->analyze(catalog, query, allow_tlist_ref);
251  CHECK(e);
252  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(e);
253  if (c != nullptr && c->get_is_null()) {
254  value_exprs.push_back(c);
255  continue;
256  }
257  auto subtype = e->get_type_info().get_type();
258  if (subtype == kNULLT) {
259  // NULL element
260  } else if (set_subtype) {
261  ti.set_subtype(subtype);
262  set_subtype = false;
263  }
264  value_exprs.push_back(e);
265  }
266  std::shared_ptr<Analyzer::Expr> result =
267  makeExpr<Analyzer::Constant>(ti, false, value_exprs);
268  return result;
269 }
270 
271 std::string ArrayLiteral::to_string() const {
272  std::string str = "{";
273  bool notfirst = false;
274  for (auto& p : value_list_) {
275  if (notfirst) {
276  str += ", ";
277  } else {
278  notfirst = true;
279  }
280  str += p->to_string();
281  }
282  str += "}";
283  return str;
284 }
285 
286 std::shared_ptr<Analyzer::Expr> OperExpr::analyze(
287  const Catalog_Namespace::Catalog& catalog,
288  Analyzer::Query& query,
289  TlistRefType allow_tlist_ref) const {
290  auto left_expr = left_->analyze(catalog, query, allow_tlist_ref);
291  const auto& left_type = left_expr->get_type_info();
292  if (right_ == nullptr) {
293  return makeExpr<Analyzer::UOper>(
294  left_type, left_expr->get_contains_agg(), optype_, left_expr->decompress());
295  }
296  if (optype_ == kARRAY_AT) {
297  if (left_type.get_type() != kARRAY) {
298  throw std::runtime_error(left_->to_string() + " is not of array type.");
299  }
300  auto right_expr = right_->analyze(catalog, query, allow_tlist_ref);
301  const auto& right_type = right_expr->get_type_info();
302  if (!right_type.is_integer()) {
303  throw std::runtime_error(right_->to_string() + " is not of integer type.");
304  }
305  return makeExpr<Analyzer::BinOper>(
306  left_type.get_elem_type(), false, kARRAY_AT, kONE, left_expr, right_expr);
307  }
308  auto right_expr = right_->analyze(catalog, query, allow_tlist_ref);
309  return normalize(optype_, opqualifier_, left_expr, right_expr);
310 }
311 
312 bool exprs_share_one_and_same_rte_idx(const std::shared_ptr<Analyzer::Expr>& lhs_expr,
313  const std::shared_ptr<Analyzer::Expr>& rhs_expr) {
314  std::set<int> lhs_rte_idx;
315  lhs_expr->collect_rte_idx(lhs_rte_idx);
316  CHECK(!lhs_rte_idx.empty());
317  std::set<int> rhs_rte_idx;
318  rhs_expr->collect_rte_idx(rhs_rte_idx);
319  CHECK(!rhs_rte_idx.empty());
320  return lhs_rte_idx.size() == 1UL && lhs_rte_idx == rhs_rte_idx;
321 }
322 
323 SQLTypeInfo const& get_str_dict_cast_type(const SQLTypeInfo& lhs_type_info,
324  const SQLTypeInfo& rhs_type_info,
325  const Executor* executor) {
326  CHECK(lhs_type_info.is_string());
327  CHECK(lhs_type_info.get_compression() == kENCODING_DICT);
328  CHECK(rhs_type_info.is_string());
329  CHECK(rhs_type_info.get_compression() == kENCODING_DICT);
330  const auto lhs_comp_param = lhs_type_info.get_comp_param();
331  const auto rhs_comp_param = rhs_type_info.get_comp_param();
332  CHECK_NE(lhs_comp_param, rhs_comp_param);
333  if (lhs_type_info.get_comp_param() == TRANSIENT_DICT_ID) {
334  return rhs_type_info;
335  }
336  if (rhs_type_info.get_comp_param() == TRANSIENT_DICT_ID) {
337  return lhs_type_info;
338  }
339  // If here then neither lhs or rhs type was transient, we should see which
340  // type has the largest dictionary and make that the destination type
341  const auto lhs_sdp = executor->getStringDictionaryProxy(lhs_comp_param, true);
342  const auto rhs_sdp = executor->getStringDictionaryProxy(rhs_comp_param, true);
343  return lhs_sdp->entryCount() >= rhs_sdp->entryCount() ? lhs_type_info : rhs_type_info;
344 }
345 
347  const SQLTypeInfo& rhs_type_info,
348  const Executor* executor) {
349  CHECK(lhs_type_info.is_string());
350  CHECK(rhs_type_info.is_string());
351  const auto lhs_comp_param = lhs_type_info.get_comp_param();
352  const auto rhs_comp_param = rhs_type_info.get_comp_param();
353  if (lhs_type_info.is_dict_encoded_string() && rhs_type_info.is_dict_encoded_string()) {
354  if (lhs_comp_param == rhs_comp_param ||
355  lhs_comp_param == TRANSIENT_DICT(rhs_comp_param)) {
356  return lhs_comp_param <= rhs_comp_param ? lhs_type_info : rhs_type_info;
357  }
358  return get_str_dict_cast_type(lhs_type_info, rhs_type_info, executor);
359  }
360  CHECK(lhs_type_info.is_none_encoded_string() || rhs_type_info.is_none_encoded_string());
361  SQLTypeInfo ret_ti =
362  rhs_type_info.is_none_encoded_string() ? lhs_type_info : rhs_type_info;
363  if (ret_ti.is_none_encoded_string()) {
364  ret_ti.set_dimension(
365  std::max(lhs_type_info.get_dimension(), rhs_type_info.get_dimension()));
366  }
367  return ret_ti;
368 }
369 
370 std::shared_ptr<Analyzer::Expr> OperExpr::normalize(
371  const SQLOps optype,
372  const SQLQualifier qual,
373  std::shared_ptr<Analyzer::Expr> left_expr,
374  std::shared_ptr<Analyzer::Expr> right_expr,
375  const Executor* executor) {
376  if (left_expr->get_type_info().is_date_in_days() ||
377  right_expr->get_type_info().is_date_in_days()) {
378  // Do not propogate encoding
379  left_expr = left_expr->decompress();
380  right_expr = right_expr->decompress();
381  }
382  const auto& left_type = left_expr->get_type_info();
383  auto right_type = right_expr->get_type_info();
384  if (qual != kONE) {
385  // subquery not supported yet.
386  CHECK(!std::dynamic_pointer_cast<Analyzer::Subquery>(right_expr));
387  if (right_type.get_type() != kARRAY) {
388  throw std::runtime_error(
389  "Existential or universal qualifiers can only be used in front of a subquery "
390  "or an "
391  "expression of array type.");
392  }
393  right_type = right_type.get_elem_type();
394  }
395  SQLTypeInfo new_left_type;
396  SQLTypeInfo new_right_type;
397  auto result_type = Analyzer::BinOper::analyze_type_info(
398  optype, left_type, right_type, &new_left_type, &new_right_type);
399  if (result_type.is_timeinterval()) {
400  return makeExpr<Analyzer::BinOper>(
401  result_type, false, optype, qual, left_expr, right_expr);
402  }
403  if (left_type != new_left_type) {
404  left_expr = left_expr->add_cast(new_left_type);
405  }
406  if (right_type != new_right_type) {
407  if (qual == kONE) {
408  right_expr = right_expr->add_cast(new_right_type);
409  } else {
410  right_expr = right_expr->add_cast(new_right_type.get_array_type());
411  }
412  }
413 
414  if (IS_COMPARISON(optype)) {
415  if (optype != kOVERLAPS && new_left_type.is_geometry() &&
416  new_right_type.is_geometry()) {
417  throw std::runtime_error(
418  "Comparison operators are not yet supported for geospatial types.");
419  }
420 
421  if (new_left_type.get_compression() == kENCODING_DICT &&
422  new_right_type.get_compression() == kENCODING_DICT) {
423  if (new_left_type.get_comp_param() != new_right_type.get_comp_param()) {
424  if (optype == kEQ || optype == kNE) {
425  // Join framework does its own string dictionary translation
426  // (at least partly since the rhs table projection does not use
427  // the normal runtime execution framework), so if we detect
428  // that the rte idxs of the two tables are different, bail
429  // on translating
430  const bool should_translate_strings =
431  exprs_share_one_and_same_rte_idx(left_expr, right_expr);
432  if (should_translate_strings && (optype == kEQ || optype == kNE)) {
433  CHECK(executor);
434  // Make the type we're casting to the transient dictionary, if it exists,
435  // otherwise the largest dictionary in terms of number of entries
436  SQLTypeInfo ti(
437  get_str_dict_cast_type(new_left_type, new_right_type, executor));
438  auto& expr_to_cast = ti == new_left_type ? right_expr : left_expr;
439  ti.set_fixed_size();
441  expr_to_cast = expr_to_cast->add_cast(ti);
442  } else { // Ordered comparison operator
443  // We do not currently support ordered (i.e. >, <=) comparisons between
444  // dictionary-encoded columns, and need to decompress when translation
445  // is turned off even for kEQ and KNE
446  left_expr = left_expr->decompress();
447  right_expr = right_expr->decompress();
448  }
449  } else { // Ordered comparison operator
450  // We do not currently support ordered (i.e. >, <=) comparisons between
451  // dictionary-encoded columns, and need to decompress when translation
452  // is turned off even for kEQ and KNE
453  left_expr = left_expr->decompress();
454  right_expr = right_expr->decompress();
455  }
456  } else { // Strings shared comp param
457  if (!(optype == kEQ || optype == kNE)) {
458  // We do not currently support ordered (i.e. >, <=) comparisons between
459  // encoded columns, so try to decode (will only succeed with watchdog off)
460  left_expr = left_expr->decompress();
461  right_expr = right_expr->decompress();
462  } else {
463  // do nothing, can directly support equals/non-equals comparisons between two
464  // dictionary encoded columns sharing the same dictionary as these are
465  // effectively integer comparisons in the same dictionary space
466  }
467  }
468  } else if (new_left_type.get_compression() == kENCODING_DICT &&
469  new_right_type.get_compression() == kENCODING_NONE) {
470  SQLTypeInfo ti(new_right_type);
471  ti.set_compression(new_left_type.get_compression());
472  ti.set_comp_param(new_left_type.get_comp_param());
473  ti.set_fixed_size();
474  right_expr = right_expr->add_cast(ti);
475  } else if (new_right_type.get_compression() == kENCODING_DICT &&
476  new_left_type.get_compression() == kENCODING_NONE) {
477  SQLTypeInfo ti(new_left_type);
478  ti.set_compression(new_right_type.get_compression());
479  ti.set_comp_param(new_right_type.get_comp_param());
480  ti.set_fixed_size();
481  left_expr = left_expr->add_cast(ti);
482  } else {
483  left_expr = left_expr->decompress();
484  right_expr = right_expr->decompress();
485  }
486  } else {
487  // Is this now a no-op just for pairs of none-encoded string columns
488  left_expr = left_expr->decompress();
489  right_expr = right_expr->decompress();
490  }
491  bool has_agg = (left_expr->get_contains_agg() || right_expr->get_contains_agg());
492  return makeExpr<Analyzer::BinOper>(
493  result_type, has_agg, optype, qual, left_expr, right_expr);
494 }
495 
496 std::shared_ptr<Analyzer::Expr> SubqueryExpr::analyze(
497  const Catalog_Namespace::Catalog& catalog,
498  Analyzer::Query& query,
499  TlistRefType allow_tlist_ref) const {
500  throw std::runtime_error("Subqueries are not supported yet.");
501  return nullptr;
502 }
503 
504 std::shared_ptr<Analyzer::Expr> IsNullExpr::analyze(
505  const Catalog_Namespace::Catalog& catalog,
506  Analyzer::Query& query,
507  TlistRefType allow_tlist_ref) const {
508  auto arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
509  auto result = makeExpr<Analyzer::UOper>(kBOOLEAN, kISNULL, arg_expr);
510  if (is_not_) {
511  result = makeExpr<Analyzer::UOper>(kBOOLEAN, kNOT, result);
512  }
513  return result;
514 }
515 
516 std::shared_ptr<Analyzer::Expr> InSubquery::analyze(
517  const Catalog_Namespace::Catalog& catalog,
518  Analyzer::Query& query,
519  TlistRefType allow_tlist_ref) const {
520  throw std::runtime_error("Subqueries are not supported yet.");
521  return nullptr;
522 }
523 
524 std::shared_ptr<Analyzer::Expr> InValues::analyze(
525  const Catalog_Namespace::Catalog& catalog,
526  Analyzer::Query& query,
527  TlistRefType allow_tlist_ref) const {
528  auto arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
529  SQLTypeInfo ti = arg_expr->get_type_info();
530  bool dict_comp = ti.get_compression() == kENCODING_DICT;
531  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
532  for (auto& p : value_list_) {
533  auto e = p->analyze(catalog, query, allow_tlist_ref);
534  if (ti != e->get_type_info()) {
535  if (ti.is_string() && e->get_type_info().is_string()) {
536  // Todo(todd): Can we have this leverage the cast framework as well
537  ti = Analyzer::BinOper::common_string_type(ti, e->get_type_info());
538  } else if (ti.is_number() && e->get_type_info().is_number()) {
539  ti = Analyzer::BinOper::common_numeric_type(ti, e->get_type_info());
540  } else {
541  throw std::runtime_error("IN expressions must contain compatible types.");
542  }
543  }
544  if (dict_comp) {
545  value_exprs.push_back(e->add_cast(arg_expr->get_type_info()));
546  } else {
547  value_exprs.push_back(e);
548  }
549  }
550  if (!dict_comp) {
551  arg_expr = arg_expr->decompress();
552  arg_expr = arg_expr->add_cast(ti);
553  std::list<std::shared_ptr<Analyzer::Expr>> cast_vals;
554  for (auto p : value_exprs) {
555  cast_vals.push_back(p->add_cast(ti));
556  }
557  value_exprs.swap(cast_vals);
558  }
559  std::shared_ptr<Analyzer::Expr> result =
560  makeExpr<Analyzer::InValues>(arg_expr, value_exprs);
561  if (is_not_) {
562  result = makeExpr<Analyzer::UOper>(kBOOLEAN, kNOT, result);
563  }
564  return result;
565 }
566 
567 std::shared_ptr<Analyzer::Expr> BetweenExpr::analyze(
568  const Catalog_Namespace::Catalog& catalog,
569  Analyzer::Query& query,
570  TlistRefType allow_tlist_ref) const {
571  auto arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
572  auto lower_expr = lower_->analyze(catalog, query, allow_tlist_ref);
573  auto upper_expr = upper_->analyze(catalog, query, allow_tlist_ref);
574  SQLTypeInfo new_left_type, new_right_type;
576  arg_expr->get_type_info(),
577  lower_expr->get_type_info(),
578  &new_left_type,
579  &new_right_type);
580  auto lower_pred =
581  makeExpr<Analyzer::BinOper>(kBOOLEAN,
582  kGE,
583  kONE,
584  arg_expr->add_cast(new_left_type)->decompress(),
585  lower_expr->add_cast(new_right_type)->decompress());
587  arg_expr->get_type_info(),
588  lower_expr->get_type_info(),
589  &new_left_type,
590  &new_right_type);
591  auto upper_pred = makeExpr<Analyzer::BinOper>(
592  kBOOLEAN,
593  kLE,
594  kONE,
595  arg_expr->deep_copy()->add_cast(new_left_type)->decompress(),
596  upper_expr->add_cast(new_right_type)->decompress());
597  std::shared_ptr<Analyzer::Expr> result =
598  makeExpr<Analyzer::BinOper>(kBOOLEAN, kAND, kONE, lower_pred, upper_pred);
599  if (is_not_) {
600  result = makeExpr<Analyzer::UOper>(kBOOLEAN, kNOT, result);
601  }
602  return result;
603 }
604 
605 std::shared_ptr<Analyzer::Expr> CharLengthExpr::analyze(
606  const Catalog_Namespace::Catalog& catalog,
607  Analyzer::Query& query,
608  TlistRefType allow_tlist_ref) const {
609  auto arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
610  if (!arg_expr->get_type_info().is_string()) {
611  throw std::runtime_error(
612  "expression in char_length clause must be of a string type.");
613  }
614  std::shared_ptr<Analyzer::Expr> result =
615  makeExpr<Analyzer::CharLengthExpr>(arg_expr->decompress(), calc_encoded_length_);
616  return result;
617 }
618 
619 std::shared_ptr<Analyzer::Expr> CardinalityExpr::analyze(
620  const Catalog_Namespace::Catalog& catalog,
621  Analyzer::Query& query,
622  TlistRefType allow_tlist_ref) const {
623  auto arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
624  if (!arg_expr->get_type_info().is_array()) {
625  throw std::runtime_error(
626  "expression in cardinality clause must be of an array type.");
627  }
628  std::shared_ptr<Analyzer::Expr> result =
629  makeExpr<Analyzer::CardinalityExpr>(arg_expr->decompress());
630  return result;
631 }
632 
633 void LikeExpr::check_like_expr(const std::string& like_str, char escape_char) {
634  if (like_str.back() == escape_char) {
635  throw std::runtime_error("LIKE pattern must not end with escape character.");
636  }
637 }
638 
639 bool LikeExpr::test_is_simple_expr(const std::string& like_str, char escape_char) {
640  // if not bounded by '%' then not a simple string
641  if (like_str.size() < 2 || like_str[0] != '%' || like_str[like_str.size() - 1] != '%') {
642  return false;
643  }
644  // if the last '%' is escaped then not a simple string
645  if (like_str[like_str.size() - 2] == escape_char &&
646  like_str[like_str.size() - 3] != escape_char) {
647  return false;
648  }
649  for (size_t i = 1; i < like_str.size() - 1; i++) {
650  if (like_str[i] == '%' || like_str[i] == '_' || like_str[i] == '[' ||
651  like_str[i] == ']') {
652  if (like_str[i - 1] != escape_char) {
653  return false;
654  }
655  }
656  }
657  return true;
658 }
659 
660 void LikeExpr::erase_cntl_chars(std::string& like_str, char escape_char) {
661  char prev_char = '\0';
662  // easier to create new string of allowable chars
663  // rather than erase chars from
664  // existing string
665  std::string new_str;
666  for (char& cur_char : like_str) {
667  if (cur_char == '%' || cur_char == escape_char) {
668  if (prev_char != escape_char) {
669  prev_char = cur_char;
670  continue;
671  }
672  }
673  new_str.push_back(cur_char);
674  prev_char = cur_char;
675  }
676  like_str = new_str;
677 }
678 
679 std::shared_ptr<Analyzer::Expr> LikeExpr::analyze(
680  const Catalog_Namespace::Catalog& catalog,
681  Analyzer::Query& query,
682  TlistRefType allow_tlist_ref) const {
683  auto arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
684  auto like_expr = like_string_->analyze(catalog, query, allow_tlist_ref);
685  auto escape_expr = escape_string_ == nullptr
686  ? nullptr
687  : escape_string_->analyze(catalog, query, allow_tlist_ref);
688  return LikeExpr::get(arg_expr, like_expr, escape_expr, is_ilike_, is_not_);
689 }
690 
691 std::shared_ptr<Analyzer::Expr> LikeExpr::get(std::shared_ptr<Analyzer::Expr> arg_expr,
692  std::shared_ptr<Analyzer::Expr> like_expr,
693  std::shared_ptr<Analyzer::Expr> escape_expr,
694  const bool is_ilike,
695  const bool is_not) {
696  if (!arg_expr->get_type_info().is_string()) {
697  throw std::runtime_error("expression before LIKE must be of a string type.");
698  }
699  if (!like_expr->get_type_info().is_string()) {
700  throw std::runtime_error("expression after LIKE must be of a string type.");
701  }
702  char escape_char = '\\';
703  if (escape_expr != nullptr) {
704  if (!escape_expr->get_type_info().is_string()) {
705  throw std::runtime_error("expression after ESCAPE must be of a string type.");
706  }
707  if (!escape_expr->get_type_info().is_string()) {
708  throw std::runtime_error("expression after ESCAPE must be of a string type.");
709  }
710  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(escape_expr);
711  if (c != nullptr && c->get_constval().stringval->length() > 1) {
712  throw std::runtime_error("String after ESCAPE must have a single character.");
713  }
714  escape_char = (*c->get_constval().stringval)[0];
715  }
716  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(like_expr);
717  bool is_simple = false;
718  if (c != nullptr) {
719  std::string& pattern = *c->get_constval().stringval;
720  if (is_ilike) {
721  std::transform(pattern.begin(), pattern.end(), pattern.begin(), ::tolower);
722  }
723  check_like_expr(pattern, escape_char);
724  is_simple = test_is_simple_expr(pattern, escape_char);
725  if (is_simple) {
726  erase_cntl_chars(pattern, escape_char);
727  }
728  }
729  std::shared_ptr<Analyzer::Expr> result = makeExpr<Analyzer::LikeExpr>(
730  arg_expr->decompress(), like_expr, escape_expr, is_ilike, is_simple);
731  if (is_not) {
732  result = makeExpr<Analyzer::UOper>(kBOOLEAN, kNOT, result);
733  }
734  return result;
735 }
736 
737 void RegexpExpr::check_pattern_expr(const std::string& pattern_str, char escape_char) {
738  if (pattern_str.back() == escape_char) {
739  throw std::runtime_error("REGEXP pattern must not end with escape character.");
740  }
741 }
742 
743 bool RegexpExpr::translate_to_like_pattern(std::string& pattern_str, char escape_char) {
744  char prev_char = '\0';
745  char prev_prev_char = '\0';
746  std::string like_str;
747  for (char& cur_char : pattern_str) {
748  if (prev_char == escape_char || isalnum(cur_char) || cur_char == ' ' ||
749  cur_char == '.') {
750  like_str.push_back((cur_char == '.') ? '_' : cur_char);
751  prev_prev_char = prev_char;
752  prev_char = cur_char;
753  continue;
754  }
755  if (prev_char == '.' && prev_prev_char != escape_char) {
756  if (cur_char == '*' || cur_char == '+') {
757  if (cur_char == '*') {
758  like_str.pop_back();
759  }
760  // .* --> %
761  // .+ --> _%
762  like_str.push_back('%');
763  prev_prev_char = prev_char;
764  prev_char = cur_char;
765  continue;
766  }
767  }
768  return false;
769  }
770  pattern_str = like_str;
771  return true;
772 }
773 
774 std::shared_ptr<Analyzer::Expr> RegexpExpr::analyze(
775  const Catalog_Namespace::Catalog& catalog,
776  Analyzer::Query& query,
777  TlistRefType allow_tlist_ref) const {
778  auto arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
779  auto pattern_expr = pattern_string_->analyze(catalog, query, allow_tlist_ref);
780  auto escape_expr = escape_string_ == nullptr
781  ? nullptr
782  : escape_string_->analyze(catalog, query, allow_tlist_ref);
783  return RegexpExpr::get(arg_expr, pattern_expr, escape_expr, is_not_);
784 }
785 
786 std::shared_ptr<Analyzer::Expr> RegexpExpr::get(
787  std::shared_ptr<Analyzer::Expr> arg_expr,
788  std::shared_ptr<Analyzer::Expr> pattern_expr,
789  std::shared_ptr<Analyzer::Expr> escape_expr,
790  const bool is_not) {
791  if (!arg_expr->get_type_info().is_string()) {
792  throw std::runtime_error("expression before REGEXP must be of a string type.");
793  }
794  if (!pattern_expr->get_type_info().is_string()) {
795  throw std::runtime_error("expression after REGEXP must be of a string type.");
796  }
797  char escape_char = '\\';
798  if (escape_expr != nullptr) {
799  if (!escape_expr->get_type_info().is_string()) {
800  throw std::runtime_error("expression after ESCAPE must be of a string type.");
801  }
802  if (!escape_expr->get_type_info().is_string()) {
803  throw std::runtime_error("expression after ESCAPE must be of a string type.");
804  }
805  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(escape_expr);
806  if (c != nullptr && c->get_constval().stringval->length() > 1) {
807  throw std::runtime_error("String after ESCAPE must have a single character.");
808  }
809  escape_char = (*c->get_constval().stringval)[0];
810  if (escape_char != '\\') {
811  throw std::runtime_error("Only supporting '\\' escape character.");
812  }
813  }
814  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(pattern_expr);
815  if (c != nullptr) {
816  std::string& pattern = *c->get_constval().stringval;
817  if (translate_to_like_pattern(pattern, escape_char)) {
818  return LikeExpr::get(arg_expr, pattern_expr, escape_expr, false, is_not);
819  }
820  }
821  std::shared_ptr<Analyzer::Expr> result =
822  makeExpr<Analyzer::RegexpExpr>(arg_expr->decompress(), pattern_expr, escape_expr);
823  if (is_not) {
824  result = makeExpr<Analyzer::UOper>(kBOOLEAN, kNOT, result);
825  }
826  return result;
827 }
828 
829 std::shared_ptr<Analyzer::Expr> LikelihoodExpr::analyze(
830  const Catalog_Namespace::Catalog& catalog,
831  Analyzer::Query& query,
832  TlistRefType allow_tlist_ref) const {
833  auto arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
834  return LikelihoodExpr::get(arg_expr, likelihood_, is_not_);
835 }
836 
837 std::shared_ptr<Analyzer::Expr> LikelihoodExpr::get(
838  std::shared_ptr<Analyzer::Expr> arg_expr,
839  float likelihood,
840  const bool is_not) {
841  if (!arg_expr->get_type_info().is_boolean()) {
842  throw std::runtime_error("likelihood expression expects boolean type.");
843  }
844  std::shared_ptr<Analyzer::Expr> result = makeExpr<Analyzer::LikelihoodExpr>(
845  arg_expr->decompress(), is_not ? 1 - likelihood : likelihood);
846  return result;
847 }
848 
849 std::shared_ptr<Analyzer::Expr> WidthBucketExpr::analyze(
850  const Catalog_Namespace::Catalog& catalog,
851  Analyzer::Query& query,
852  TlistRefType allow_tlist_ref) const {
853  auto target_value = target_value_->analyze(catalog, query, allow_tlist_ref);
854  auto lower_bound = lower_bound_->analyze(catalog, query, allow_tlist_ref);
855  auto upper_bound = upper_bound_->analyze(catalog, query, allow_tlist_ref);
856  auto partition_count = partition_count_->analyze(catalog, query, allow_tlist_ref);
857  return WidthBucketExpr::get(target_value, lower_bound, upper_bound, partition_count);
858 }
859 
860 std::shared_ptr<Analyzer::Expr> WidthBucketExpr::get(
861  std::shared_ptr<Analyzer::Expr> target_value,
862  std::shared_ptr<Analyzer::Expr> lower_bound,
863  std::shared_ptr<Analyzer::Expr> upper_bound,
864  std::shared_ptr<Analyzer::Expr> partition_count) {
865  std::shared_ptr<Analyzer::Expr> result = makeExpr<Analyzer::WidthBucketExpr>(
866  target_value, lower_bound, upper_bound, partition_count);
867  return result;
868 }
869 
870 std::shared_ptr<Analyzer::Expr> ExistsExpr::analyze(
871  const Catalog_Namespace::Catalog& catalog,
872  Analyzer::Query& query,
873  TlistRefType allow_tlist_ref) const {
874  throw std::runtime_error("Subqueries are not supported yet.");
875  return nullptr;
876 }
877 
878 std::shared_ptr<Analyzer::Expr> ColumnRef::analyze(
879  const Catalog_Namespace::Catalog& catalog,
880  Analyzer::Query& query,
881  TlistRefType allow_tlist_ref) const {
882  int table_id{0};
883  int rte_idx{0};
884  const ColumnDescriptor* cd{nullptr};
885  if (column_ == nullptr) {
886  throw std::runtime_error("invalid column name *.");
887  }
888  if (table_ != nullptr) {
889  rte_idx = query.get_rte_idx(*table_);
890  if (rte_idx < 0) {
891  throw std::runtime_error("range variable or table name " + *table_ +
892  " does not exist.");
893  }
894  Analyzer::RangeTableEntry* rte = query.get_rte(rte_idx);
895  cd = rte->get_column_desc(catalog, *column_);
896  if (cd == nullptr) {
897  throw std::runtime_error("Column name " + *column_ + " does not exist.");
898  }
899  table_id = rte->get_table_id();
900  } else {
901  bool found = false;
902  int i = 0;
903  for (auto rte : query.get_rangetable()) {
904  cd = rte->get_column_desc(catalog, *column_);
905  if (cd != nullptr && !found) {
906  found = true;
907  rte_idx = i;
908  table_id = rte->get_table_id();
909  } else if (cd != nullptr && found) {
910  throw std::runtime_error("Column name " + *column_ + " is ambiguous.");
911  }
912  i++;
913  }
914  if (cd == nullptr && allow_tlist_ref != TlistRefType::TLIST_NONE) {
915  // check if this is a reference to a targetlist entry
916  bool found = false;
917  int varno = -1;
918  int i = 1;
919  std::shared_ptr<Analyzer::TargetEntry> tle;
920  for (auto p : query.get_targetlist()) {
921  if (*column_ == p->get_resname() && !found) {
922  found = true;
923  varno = i;
924  tle = p;
925  } else if (*column_ == p->get_resname() && found) {
926  throw std::runtime_error("Output alias " + *column_ + " is ambiguous.");
927  }
928  i++;
929  }
930  if (found) {
931  if (dynamic_cast<Analyzer::Var*>(tle->get_expr())) {
932  Analyzer::Var* v = static_cast<Analyzer::Var*>(tle->get_expr());
934  return v->deep_copy();
935  }
936  }
937  if (allow_tlist_ref == TlistRefType::TLIST_COPY) {
938  return tle->get_expr()->deep_copy();
939  } else {
940  return makeExpr<Analyzer::Var>(
941  tle->get_expr()->get_type_info(), Analyzer::Var::kOUTPUT, varno);
942  }
943  }
944  }
945  if (cd == nullptr) {
946  throw std::runtime_error("Column name " + *column_ + " does not exist.");
947  }
948  }
949  return makeExpr<Analyzer::ColumnVar>(cd->columnType, table_id, cd->columnId, rte_idx);
950 }
951 
952 std::shared_ptr<Analyzer::Expr> FunctionRef::analyze(
953  const Catalog_Namespace::Catalog& catalog,
954  Analyzer::Query& query,
955  TlistRefType allow_tlist_ref) const {
956  SQLTypeInfo result_type;
957  SQLAgg agg_type;
958  std::shared_ptr<Analyzer::Expr> arg_expr;
959  bool is_distinct = false;
960  if (boost::iequals(*name_, "count")) {
961  result_type = SQLTypeInfo(kBIGINT, false);
962  agg_type = kCOUNT;
963  if (arg_) {
964  arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
965  const SQLTypeInfo& ti = arg_expr->get_type_info();
966  if (ti.is_string() && (ti.get_compression() != kENCODING_DICT || !distinct_)) {
967  throw std::runtime_error(
968  "Strings must be dictionary-encoded in COUNT(DISTINCT).");
969  }
970  if (ti.get_type() == kARRAY && !distinct_) {
971  throw std::runtime_error("Only COUNT(DISTINCT) is supported on arrays.");
972  }
973  }
974  is_distinct = distinct_;
975  } else {
976  if (!arg_) {
977  throw std::runtime_error("Cannot compute " + *name_ + " with argument '*'.");
978  }
979  if (boost::iequals(*name_, "min")) {
980  agg_type = kMIN;
981  arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
982  arg_expr = arg_expr->decompress();
983  result_type = arg_expr->get_type_info();
984  } else if (boost::iequals(*name_, "max")) {
985  agg_type = kMAX;
986  arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
987  arg_expr = arg_expr->decompress();
988  result_type = arg_expr->get_type_info();
989  } else if (boost::iequals(*name_, "avg")) {
990  agg_type = kAVG;
991  arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
992  if (!arg_expr->get_type_info().is_number()) {
993  throw std::runtime_error("Cannot compute AVG on non-number-type arguments.");
994  }
995  arg_expr = arg_expr->decompress();
996  result_type = SQLTypeInfo(kDOUBLE, false);
997  } else if (boost::iequals(*name_, "sum")) {
998  agg_type = kSUM;
999  arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
1000  if (!arg_expr->get_type_info().is_number()) {
1001  throw std::runtime_error("Cannot compute SUM on non-number-type arguments.");
1002  }
1003  arg_expr = arg_expr->decompress();
1004  result_type = arg_expr->get_type_info().is_integer() ? SQLTypeInfo(kBIGINT, false)
1005  : arg_expr->get_type_info();
1006  } else if (boost::iequals(*name_, "unnest")) {
1007  arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
1008  const SQLTypeInfo& arg_ti = arg_expr->get_type_info();
1009  if (arg_ti.get_type() != kARRAY) {
1010  throw std::runtime_error(arg_->to_string() + " is not of array type.");
1011  }
1012  return makeExpr<Analyzer::UOper>(arg_ti.get_elem_type(), false, kUNNEST, arg_expr);
1013  } else {
1014  throw std::runtime_error("invalid function name: " + *name_);
1015  }
1016  if (arg_expr->get_type_info().is_string() ||
1017  arg_expr->get_type_info().get_type() == kARRAY) {
1018  throw std::runtime_error(
1019  "Only COUNT(DISTINCT ) aggregate is supported on strings and arrays.");
1020  }
1021  }
1022  int naggs = query.get_num_aggs();
1023  query.set_num_aggs(naggs + 1);
1024  return makeExpr<Analyzer::AggExpr>(
1025  result_type, agg_type, arg_expr, is_distinct, nullptr);
1026 }
1027 
1028 std::shared_ptr<Analyzer::Expr> CastExpr::analyze(
1029  const Catalog_Namespace::Catalog& catalog,
1030  Analyzer::Query& query,
1031  TlistRefType allow_tlist_ref) const {
1032  target_type_->check_type();
1033  auto arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
1034  SQLTypeInfo ti(target_type_->get_type(),
1035  target_type_->get_param1(),
1036  target_type_->get_param2(),
1037  arg_expr->get_type_info().get_notnull());
1038  if (arg_expr->get_type_info().get_type() != target_type_->get_type() &&
1039  arg_expr->get_type_info().get_compression() != kENCODING_NONE) {
1040  arg_expr->decompress();
1041  }
1042  return arg_expr->add_cast(ti);
1043 }
1044 
1045 std::shared_ptr<Analyzer::Expr> CaseExpr::analyze(
1046  const Catalog_Namespace::Catalog& catalog,
1047  Analyzer::Query& query,
1048  TlistRefType allow_tlist_ref) const {
1049  SQLTypeInfo ti;
1050  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
1051  expr_pair_list;
1052  for (auto& p : when_then_list_) {
1053  auto e1 = p->get_expr1()->analyze(catalog, query, allow_tlist_ref);
1054  if (e1->get_type_info().get_type() != kBOOLEAN) {
1055  throw std::runtime_error("Only boolean expressions can be used after WHEN.");
1056  }
1057  auto e2 = p->get_expr2()->analyze(catalog, query, allow_tlist_ref);
1058  expr_pair_list.emplace_back(e1, e2);
1059  }
1060  auto else_e =
1061  else_expr_ ? else_expr_->analyze(catalog, query, allow_tlist_ref) : nullptr;
1062  return normalize(expr_pair_list, else_e);
1063 }
1064 
1065 namespace {
1066 
1068  const std::string* s = str_literal->get_stringval();
1069  if (*s == "t" || *s == "true" || *s == "T" || *s == "True") {
1070  return true;
1071  } else if (*s == "f" || *s == "false" || *s == "F" || *s == "False") {
1072  return false;
1073  } else {
1074  throw std::runtime_error("Invalid string for boolean " + *s);
1075  }
1076 }
1077 
1078 void parse_copy_params(const std::list<std::unique_ptr<NameValueAssign>>& options_,
1079  import_export::CopyParams& copy_params,
1080  std::vector<std::string>& warnings,
1081  std::string& deferred_copy_from_partitions_) {
1082  if (!options_.empty()) {
1083  for (auto& p : options_) {
1084  if (boost::iequals(*p->get_name(), "max_reject")) {
1085  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
1086  if (int_literal == nullptr) {
1087  throw std::runtime_error("max_reject option must be an integer.");
1088  }
1089  copy_params.max_reject = int_literal->get_intval();
1090  } else if (boost::iequals(*p->get_name(), "max_import_batch_row_count")) {
1091  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
1092  if (int_literal == nullptr) {
1093  throw std::runtime_error(
1094  "max_import_batch_row_count option must be an integer.");
1095  }
1096  if (int_literal->get_intval() <= 0) {
1097  throw std::runtime_error(
1098  "max_import_batch_row_count option must be a positive integer (greater "
1099  "than 0).");
1100  }
1101  copy_params.max_import_batch_row_count = int_literal->get_intval();
1102  } else if (boost::iequals(*p->get_name(), "buffer_size")) {
1103  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
1104  if (int_literal == nullptr) {
1105  throw std::runtime_error("buffer_size option must be an integer.");
1106  }
1107  copy_params.buffer_size = int_literal->get_intval();
1108  } else if (boost::iequals(*p->get_name(), "threads")) {
1109  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
1110  if (int_literal == nullptr) {
1111  throw std::runtime_error("Threads option must be an integer.");
1112  }
1113  copy_params.threads = int_literal->get_intval();
1114  } else if (boost::iequals(*p->get_name(), "delimiter")) {
1115  const StringLiteral* str_literal =
1116  dynamic_cast<const StringLiteral*>(p->get_value());
1117  if (str_literal == nullptr) {
1118  throw std::runtime_error("Delimiter option must be a string.");
1119  } else if (str_literal->get_stringval()->length() != 1) {
1120  throw std::runtime_error("Delimiter must be a single character string.");
1121  }
1122  copy_params.delimiter = (*str_literal->get_stringval())[0];
1123  } else if (boost::iequals(*p->get_name(), "nulls")) {
1124  const StringLiteral* str_literal =
1125  dynamic_cast<const StringLiteral*>(p->get_value());
1126  if (str_literal == nullptr) {
1127  throw std::runtime_error("Nulls option must be a string.");
1128  }
1129  copy_params.null_str = *str_literal->get_stringval();
1130  } else if (boost::iequals(*p->get_name(), "header")) {
1131  const StringLiteral* str_literal =
1132  dynamic_cast<const StringLiteral*>(p->get_value());
1133  if (str_literal == nullptr) {
1134  throw std::runtime_error("Header option must be a boolean.");
1135  }
1136  copy_params.has_header = bool_from_string_literal(str_literal)
1139 #ifdef ENABLE_IMPORT_PARQUET
1140  } else if (boost::iequals(*p->get_name(), "parquet")) {
1141  warnings.push_back(
1142  "Deprecation Warning: COPY FROM WITH (parquet='true') is deprecated. Use "
1143  "WITH (source_type='parquet_file') instead.");
1144  const StringLiteral* str_literal =
1145  dynamic_cast<const StringLiteral*>(p->get_value());
1146  if (str_literal == nullptr) {
1147  throw std::runtime_error("'parquet' option must be a boolean.");
1148  }
1149  if (bool_from_string_literal(str_literal)) {
1150  // not sure a parquet "table" type is proper, but to make code
1151  // look consistent in some places, let's set "table" type too
1153  }
1154 #endif // ENABLE_IMPORT_PARQUET
1155  } else if (boost::iequals(*p->get_name(), "s3_access_key")) {
1156  const StringLiteral* str_literal =
1157  dynamic_cast<const StringLiteral*>(p->get_value());
1158  if (str_literal == nullptr) {
1159  throw std::runtime_error("Option s3_access_key must be a string.");
1160  }
1161  copy_params.s3_access_key = *str_literal->get_stringval();
1162  } else if (boost::iequals(*p->get_name(), "s3_secret_key")) {
1163  const StringLiteral* str_literal =
1164  dynamic_cast<const StringLiteral*>(p->get_value());
1165  if (str_literal == nullptr) {
1166  throw std::runtime_error("Option s3_secret_key must be a string.");
1167  }
1168  copy_params.s3_secret_key = *str_literal->get_stringval();
1169  } else if (boost::iequals(*p->get_name(), "s3_session_token")) {
1170  const StringLiteral* str_literal =
1171  dynamic_cast<const StringLiteral*>(p->get_value());
1172  if (str_literal == nullptr) {
1173  throw std::runtime_error("Option s3_session_token must be a string.");
1174  }
1175  copy_params.s3_session_token = *str_literal->get_stringval();
1176  } else if (boost::iequals(*p->get_name(), "s3_region")) {
1177  const StringLiteral* str_literal =
1178  dynamic_cast<const StringLiteral*>(p->get_value());
1179  if (str_literal == nullptr) {
1180  throw std::runtime_error("Option s3_region must be a string.");
1181  }
1182  copy_params.s3_region = *str_literal->get_stringval();
1183  } else if (boost::iequals(*p->get_name(), "s3_endpoint")) {
1184  const StringLiteral* str_literal =
1185  dynamic_cast<const StringLiteral*>(p->get_value());
1186  if (str_literal == nullptr) {
1187  throw std::runtime_error("Option s3_endpoint must be a string.");
1188  }
1189  copy_params.s3_endpoint = *str_literal->get_stringval();
1190  } else if (boost::iequals(*p->get_name(), "s3_max_concurrent_downloads")) {
1191  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
1192  if (int_literal == nullptr) {
1193  throw std::runtime_error(
1194  "'s3_max_concurrent_downloads' option must be an integer");
1195  }
1196  const int s3_max_concurrent_downloads = int_literal->get_intval();
1197  if (s3_max_concurrent_downloads > 0) {
1198  copy_params.s3_max_concurrent_downloads = s3_max_concurrent_downloads;
1199  } else {
1200  throw std::runtime_error(
1201  "Invalid value for 's3_max_concurrent_downloads' option (must be > 0): " +
1202  std::to_string(s3_max_concurrent_downloads));
1203  }
1204  } else if (boost::iequals(*p->get_name(), "quote")) {
1205  const StringLiteral* str_literal =
1206  dynamic_cast<const StringLiteral*>(p->get_value());
1207  if (str_literal == nullptr) {
1208  throw std::runtime_error("Quote option must be a string.");
1209  } else if (str_literal->get_stringval()->length() != 1) {
1210  throw std::runtime_error("Quote must be a single character string.");
1211  }
1212  copy_params.quote = (*str_literal->get_stringval())[0];
1213  } else if (boost::iequals(*p->get_name(), "escape")) {
1214  const StringLiteral* str_literal =
1215  dynamic_cast<const StringLiteral*>(p->get_value());
1216  if (str_literal == nullptr) {
1217  throw std::runtime_error("Escape option must be a string.");
1218  } else if (str_literal->get_stringval()->length() != 1) {
1219  throw std::runtime_error("Escape must be a single character string.");
1220  }
1221  copy_params.escape = (*str_literal->get_stringval())[0];
1222  } else if (boost::iequals(*p->get_name(), "line_delimiter")) {
1223  const StringLiteral* str_literal =
1224  dynamic_cast<const StringLiteral*>(p->get_value());
1225  if (str_literal == nullptr) {
1226  throw std::runtime_error("Line_delimiter option must be a string.");
1227  } else if (str_literal->get_stringval()->length() != 1) {
1228  throw std::runtime_error("Line_delimiter must be a single character string.");
1229  }
1230  copy_params.line_delim = (*str_literal->get_stringval())[0];
1231  } else if (boost::iequals(*p->get_name(), "quoted")) {
1232  const StringLiteral* str_literal =
1233  dynamic_cast<const StringLiteral*>(p->get_value());
1234  if (str_literal == nullptr) {
1235  throw std::runtime_error("Quoted option must be a boolean.");
1236  }
1237  copy_params.quoted = bool_from_string_literal(str_literal);
1238  } else if (boost::iequals(*p->get_name(), "plain_text")) {
1239  const StringLiteral* str_literal =
1240  dynamic_cast<const StringLiteral*>(p->get_value());
1241  if (str_literal == nullptr) {
1242  throw std::runtime_error("plain_text option must be a boolean.");
1243  }
1244  copy_params.plain_text = bool_from_string_literal(str_literal);
1245  } else if (boost::iequals(*p->get_name(), "trim_spaces")) {
1246  const StringLiteral* str_literal =
1247  dynamic_cast<const StringLiteral*>(p->get_value());
1248  if (str_literal == nullptr) {
1249  throw std::runtime_error("trim_spaces option must be a boolean.");
1250  }
1251  copy_params.trim_spaces = bool_from_string_literal(str_literal);
1252  } else if (boost::iequals(*p->get_name(), "array_marker")) {
1253  const StringLiteral* str_literal =
1254  dynamic_cast<const StringLiteral*>(p->get_value());
1255  if (str_literal == nullptr) {
1256  throw std::runtime_error("Array Marker option must be a string.");
1257  } else if (str_literal->get_stringval()->length() != 2) {
1258  throw std::runtime_error(
1259  "Array Marker option must be exactly two characters. Default is {}.");
1260  }
1261  copy_params.array_begin = (*str_literal->get_stringval())[0];
1262  copy_params.array_end = (*str_literal->get_stringval())[1];
1263  } else if (boost::iequals(*p->get_name(), "array_delimiter")) {
1264  const StringLiteral* str_literal =
1265  dynamic_cast<const StringLiteral*>(p->get_value());
1266  if (str_literal == nullptr) {
1267  throw std::runtime_error("Array Delimiter option must be a string.");
1268  } else if (str_literal->get_stringval()->length() != 1) {
1269  throw std::runtime_error("Array Delimiter must be a single character string.");
1270  }
1271  copy_params.array_delim = (*str_literal->get_stringval())[0];
1272  } else if (boost::iequals(*p->get_name(), "lonlat")) {
1273  const StringLiteral* str_literal =
1274  dynamic_cast<const StringLiteral*>(p->get_value());
1275  if (str_literal == nullptr) {
1276  throw std::runtime_error("Lonlat option must be a boolean.");
1277  }
1278  copy_params.lonlat = bool_from_string_literal(str_literal);
1279  } else if (boost::iequals(*p->get_name(), "geo")) {
1280  warnings.push_back(
1281  "Deprecation Warning: COPY FROM WITH (geo='true') is deprecated. Use WITH "
1282  "(source_type='geo_file') instead.");
1283  const StringLiteral* str_literal =
1284  dynamic_cast<const StringLiteral*>(p->get_value());
1285  if (str_literal == nullptr) {
1286  throw std::runtime_error("'geo' option must be a boolean.");
1287  }
1288  if (bool_from_string_literal(str_literal)) {
1290  }
1291  } else if (boost::iequals(*p->get_name(), "source_type")) {
1292  const StringLiteral* str_literal =
1293  dynamic_cast<const StringLiteral*>(p->get_value());
1294  if (str_literal == nullptr) {
1295  throw std::runtime_error("'source_type' option must be a string.");
1296  }
1297  const std::string* s = str_literal->get_stringval();
1298  if (boost::iequals(*s, "delimited_file")) {
1300  } else if (boost::iequals(*s, "geo_file")) {
1302 #if ENABLE_IMPORT_PARQUET
1303  } else if (boost::iequals(*s, "parquet_file")) {
1305 #endif
1306  } else if (boost::iequals(*s, "raster_file")) {
1308  } else if (boost::iequals(*s, "regex_parsed_file")) {
1310  } else {
1311  throw std::runtime_error(
1312  "Invalid string for 'source_type' option (must be 'GEO_FILE', 'RASTER_FILE'"
1313 #if ENABLE_IMPORT_PARQUET
1314  ", 'PARQUET_FILE'"
1315 #endif
1316  ", 'REGEX_PARSED_FILE'"
1317  " or 'DELIMITED_FILE'): " +
1318  *s);
1319  }
1320  } else if (boost::iequals(*p->get_name(), "geo_coords_type")) {
1321  const StringLiteral* str_literal =
1322  dynamic_cast<const StringLiteral*>(p->get_value());
1323  if (str_literal == nullptr) {
1324  throw std::runtime_error("'geo_coords_type' option must be a string");
1325  }
1326  const std::string* s = str_literal->get_stringval();
1327  if (boost::iequals(*s, "geography")) {
1328  throw std::runtime_error(
1329  "GEOGRAPHY coords type not yet supported. Please use GEOMETRY.");
1330  // copy_params.geo_coords_type = kGEOGRAPHY;
1331  } else if (boost::iequals(*s, "geometry")) {
1332  copy_params.geo_coords_type = kGEOMETRY;
1333  } else {
1334  throw std::runtime_error(
1335  "Invalid string for 'geo_coords_type' option (must be 'GEOGRAPHY' or "
1336  "'GEOMETRY'): " +
1337  *s);
1338  }
1339  } else if (boost::iequals(*p->get_name(), "raster_point_type")) {
1340  const StringLiteral* str_literal =
1341  dynamic_cast<const StringLiteral*>(p->get_value());
1342  if (str_literal == nullptr) {
1343  throw std::runtime_error("'raster_point_type' option must be a string");
1344  }
1345  const std::string* s = str_literal->get_stringval();
1346  if (boost::iequals(*s, "none")) {
1348  } else if (boost::iequals(*s, "auto")) {
1350  } else if (boost::iequals(*s, "smallint")) {
1352  } else if (boost::iequals(*s, "int")) {
1354  } else if (boost::iequals(*s, "float")) {
1356  } else if (boost::iequals(*s, "double")) {
1358  } else if (boost::iequals(*s, "point")) {
1360  } else {
1361  throw std::runtime_error(
1362  "Invalid string for 'raster_point_type' option (must be 'NONE', 'AUTO', "
1363  "'SMALLINT', 'INT', 'FLOAT', 'DOUBLE' or 'POINT'): " +
1364  *s);
1365  }
1366  } else if (boost::iequals(*p->get_name(), "raster_point_transform")) {
1367  const StringLiteral* str_literal =
1368  dynamic_cast<const StringLiteral*>(p->get_value());
1369  if (str_literal == nullptr) {
1370  throw std::runtime_error("'raster_point_transform' option must be a string");
1371  }
1372  const std::string* s = str_literal->get_stringval();
1373  if (boost::iequals(*s, "none")) {
1375  } else if (boost::iequals(*s, "auto")) {
1377  } else if (boost::iequals(*s, "file")) {
1379  } else if (boost::iequals(*s, "world")) {
1380  copy_params.raster_point_transform =
1382  } else {
1383  throw std::runtime_error(
1384  "Invalid string for 'raster_point_transform' option (must be 'NONE', "
1385  "'AUTO', 'FILE' or 'WORLD'): " +
1386  *s);
1387  }
1388  } else if (boost::iequals(*p->get_name(), "raster_import_bands")) {
1389  const StringLiteral* str_literal =
1390  dynamic_cast<const StringLiteral*>(p->get_value());
1391  if (str_literal == nullptr) {
1392  throw std::runtime_error("'raster_import_bands' option must be a string");
1393  }
1394  const std::string* raster_import_bands = str_literal->get_stringval();
1395  if (raster_import_bands) {
1396  copy_params.raster_import_bands = *raster_import_bands;
1397  } else {
1398  throw std::runtime_error("Invalid value for 'raster_import_bands' option");
1399  }
1400  } else if (boost::iequals(*p->get_name(), "raster_import_dimensions")) {
1401  const StringLiteral* str_literal =
1402  dynamic_cast<const StringLiteral*>(p->get_value());
1403  if (str_literal == nullptr) {
1404  throw std::runtime_error("'raster_import_dimensions' option must be a string");
1405  }
1406  const std::string* raster_import_dimensions = str_literal->get_stringval();
1407  if (raster_import_dimensions) {
1408  copy_params.raster_import_dimensions = *raster_import_dimensions;
1409  } else {
1410  throw std::runtime_error("Invalid value for 'raster_import_dimensions' option");
1411  }
1412  } else if (boost::iequals(*p->get_name(), "geo_coords_encoding")) {
1413  const StringLiteral* str_literal =
1414  dynamic_cast<const StringLiteral*>(p->get_value());
1415  if (str_literal == nullptr) {
1416  throw std::runtime_error("'geo_coords_encoding' option must be a string");
1417  }
1418  const std::string* s = str_literal->get_stringval();
1419  if (boost::iequals(*s, "none")) {
1420  copy_params.geo_coords_encoding = kENCODING_NONE;
1421  copy_params.geo_coords_comp_param = 0;
1422  } else if (boost::iequals(*s, "compressed(32)")) {
1423  copy_params.geo_coords_encoding = kENCODING_GEOINT;
1424  copy_params.geo_coords_comp_param = 32;
1425  } else {
1426  throw std::runtime_error(
1427  "Invalid string for 'geo_coords_encoding' option (must be 'NONE' or "
1428  "'COMPRESSED(32)'): " +
1429  *s);
1430  }
1431  } else if (boost::iequals(*p->get_name(), "raster_scanlines_per_thread")) {
1432  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
1433  if (int_literal == nullptr) {
1434  throw std::runtime_error(
1435  "'raster_scanlines_per_thread' option must be an integer");
1436  }
1437  const int raster_scanlines_per_thread = int_literal->get_intval();
1438  if (raster_scanlines_per_thread < 0) {
1439  throw std::runtime_error(
1440  "'raster_scanlines_per_thread' option must be >= 0, with 0 denoting auto "
1441  "sizing");
1442  }
1443  copy_params.raster_scanlines_per_thread = raster_scanlines_per_thread;
1444  } else if (boost::iequals(*p->get_name(), "geo_coords_srid")) {
1445  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
1446  if (int_literal == nullptr) {
1447  throw std::runtime_error("'geo_coords_srid' option must be an integer");
1448  }
1449  const int srid = int_literal->get_intval();
1450  if (srid == 4326 || srid == 3857 || srid == 900913) {
1451  copy_params.geo_coords_srid = srid;
1452  } else {
1453  throw std::runtime_error(
1454  "Invalid value for 'geo_coords_srid' option (must be 4326, 3857, or "
1455  "900913): " +
1456  std::to_string(srid));
1457  }
1458  } else if (boost::iequals(*p->get_name(), "geo_layer_name")) {
1459  const StringLiteral* str_literal =
1460  dynamic_cast<const StringLiteral*>(p->get_value());
1461  if (str_literal == nullptr) {
1462  throw std::runtime_error("'geo_layer_name' option must be a string");
1463  }
1464  const std::string* layer_name = str_literal->get_stringval();
1465  if (layer_name) {
1466  copy_params.geo_layer_name = *layer_name;
1467  } else {
1468  throw std::runtime_error("Invalid value for 'geo_layer_name' option");
1469  }
1470  } else if (boost::iequals(*p->get_name(), "partitions")) {
1471  const auto partitions =
1472  static_cast<const StringLiteral*>(p->get_value())->get_stringval();
1473  CHECK(partitions);
1474  const auto partitions_uc = boost::to_upper_copy<std::string>(*partitions);
1475  if (partitions_uc != "REPLICATED") {
1476  throw std::runtime_error(
1477  "Invalid value for 'partitions' option. Must be 'REPLICATED'.");
1478  }
1479  deferred_copy_from_partitions_ = partitions_uc;
1480  } else if (boost::iequals(*p->get_name(), "geo_assign_render_groups")) {
1481  const StringLiteral* str_literal =
1482  dynamic_cast<const StringLiteral*>(p->get_value());
1483  if (str_literal == nullptr) {
1484  throw std::runtime_error("geo_assign_render_groups option must be a boolean.");
1485  }
1486  copy_params.geo_assign_render_groups = bool_from_string_literal(str_literal);
1487  } else if (boost::iequals(*p->get_name(), "geo_explode_collections")) {
1488  const StringLiteral* str_literal =
1489  dynamic_cast<const StringLiteral*>(p->get_value());
1490  if (str_literal == nullptr) {
1491  throw std::runtime_error("geo_explode_collections option must be a boolean.");
1492  }
1493  copy_params.geo_explode_collections = bool_from_string_literal(str_literal);
1494  } else if (boost::iequals(*p->get_name(), "source_srid")) {
1495  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
1496  if (int_literal == nullptr) {
1497  throw std::runtime_error("'source_srid' option must be an integer");
1498  }
1499  const int srid = int_literal->get_intval();
1501  copy_params.source_srid = srid;
1502  } else {
1503  throw std::runtime_error(
1504  "'source_srid' option can only be used on csv/tsv files");
1505  }
1506  } else if (boost::iequals(*p->get_name(), "regex_path_filter")) {
1507  const StringLiteral* str_literal =
1508  dynamic_cast<const StringLiteral*>(p->get_value());
1509  if (str_literal == nullptr) {
1510  throw std::runtime_error("Option regex_path_filter must be a string.");
1511  }
1512  const auto string_val = *str_literal->get_stringval();
1513  copy_params.regex_path_filter =
1514  string_val.empty() ? std::nullopt : std::optional<std::string>{string_val};
1515  } else if (boost::iequals(*p->get_name(), "file_sort_order_by")) {
1516  const StringLiteral* str_literal =
1517  dynamic_cast<const StringLiteral*>(p->get_value());
1518  if (str_literal == nullptr) {
1519  throw std::runtime_error("Option file_sort_order_by must be a string.");
1520  }
1521  const auto string_val = *str_literal->get_stringval();
1522  copy_params.file_sort_order_by =
1523  string_val.empty() ? std::nullopt : std::optional<std::string>{string_val};
1524  } else if (boost::iequals(*p->get_name(), "file_sort_regex")) {
1525  const StringLiteral* str_literal =
1526  dynamic_cast<const StringLiteral*>(p->get_value());
1527  if (str_literal == nullptr) {
1528  throw std::runtime_error("Option file_sort_regex must be a string.");
1529  }
1530  const auto string_val = *str_literal->get_stringval();
1531  copy_params.file_sort_regex =
1532  string_val.empty() ? std::nullopt : std::optional<std::string>{string_val};
1533  } else if (boost::iequals(*p->get_name(), "raster_point_compute_angle")) {
1534  const StringLiteral* str_literal =
1535  dynamic_cast<const StringLiteral*>(p->get_value());
1536  if (str_literal == nullptr) {
1537  throw std::runtime_error(
1538  "'raster_point_compute_angle' option must be a boolean.");
1539  }
1540  if (bool_from_string_literal(str_literal)) {
1541  copy_params.raster_point_compute_angle = true;
1542  }
1543  } else if (boost::iequals(*p->get_name(), "sql_order_by")) {
1544  if (auto str_literal = dynamic_cast<const StringLiteral*>(p->get_value())) {
1545  copy_params.sql_order_by = *str_literal->get_stringval();
1546  } else {
1547  throw std::runtime_error("Option sql_order_by must be a string.");
1548  }
1549  } else if (boost::iequals(*p->get_name(), "username")) {
1550  const StringLiteral* str_literal =
1551  dynamic_cast<const StringLiteral*>(p->get_value());
1552  if (str_literal == nullptr) {
1553  throw std::runtime_error("Option username must be a string.");
1554  }
1555  const auto string_val = *str_literal->get_stringval();
1556  copy_params.username = string_val;
1557  } else if (boost::iequals(*p->get_name(), "password")) {
1558  const StringLiteral* str_literal =
1559  dynamic_cast<const StringLiteral*>(p->get_value());
1560  if (str_literal == nullptr) {
1561  throw std::runtime_error("Option password must be a string.");
1562  }
1563  const auto string_val = *str_literal->get_stringval();
1564  copy_params.password = string_val;
1565  } else if (boost::iequals(*p->get_name(), "credential_string")) {
1566  const StringLiteral* str_literal =
1567  dynamic_cast<const StringLiteral*>(p->get_value());
1568  if (str_literal == nullptr) {
1569  throw std::runtime_error("Option credential_string must be a string.");
1570  }
1571  const auto string_val = *str_literal->get_stringval();
1572  copy_params.credential_string = string_val;
1573  } else if (boost::iequals(*p->get_name(), "data_source_name")) {
1574  const StringLiteral* str_literal =
1575  dynamic_cast<const StringLiteral*>(p->get_value());
1576  if (str_literal == nullptr) {
1577  throw std::runtime_error("Option data_source_name must be a string.");
1578  }
1579  const auto string_val = *str_literal->get_stringval();
1580  copy_params.dsn = string_val;
1581  } else if (boost::iequals(*p->get_name(), "connection_string")) {
1582  const StringLiteral* str_literal =
1583  dynamic_cast<const StringLiteral*>(p->get_value());
1584  if (str_literal == nullptr) {
1585  throw std::runtime_error("Option connection_string must be a string.");
1586  }
1587  const auto string_val = *str_literal->get_stringval();
1588  copy_params.connection_string = string_val;
1589  } else if (boost::iequals(*p->get_name(), "line_start_regex")) {
1590  const StringLiteral* str_literal =
1591  dynamic_cast<const StringLiteral*>(p->get_value());
1592  if (str_literal == nullptr) {
1593  throw std::runtime_error("Option line_start_regex must be a string.");
1594  }
1595  const auto string_val = *str_literal->get_stringval();
1596  copy_params.line_start_regex = string_val;
1597  } else if (boost::iequals(*p->get_name(), "line_regex")) {
1598  const StringLiteral* str_literal =
1599  dynamic_cast<const StringLiteral*>(p->get_value());
1600  if (str_literal == nullptr) {
1601  throw std::runtime_error("Option line_regex must be a string.");
1602  }
1603  const auto string_val = *str_literal->get_stringval();
1604  copy_params.line_regex = string_val;
1605  } else if (boost::iequals(*p->get_name(), "add_metadata_columns") &&
1607  const StringLiteral* str_literal =
1608  dynamic_cast<const StringLiteral*>(p->get_value());
1609  if (str_literal == nullptr) {
1610  throw std::runtime_error("'add_metadata_columns' option must be a string.");
1611  }
1612  copy_params.add_metadata_columns = *str_literal->get_stringval();
1613  } else {
1614  throw std::runtime_error("Invalid option for COPY: " + *p->get_name());
1615  }
1616  }
1617  }
1618 }
1619 
1620 bool expr_is_null(const Analyzer::Expr* expr) {
1621  if (expr->get_type_info().get_type() == kNULLT) {
1622  return true;
1623  }
1624  const auto const_expr = dynamic_cast<const Analyzer::Constant*>(expr);
1625  return const_expr && const_expr->get_is_null();
1626 }
1627 
1628 } // namespace
1629 
1630 std::shared_ptr<Analyzer::Expr> CaseExpr::normalize(
1631  const std::list<std::pair<std::shared_ptr<Analyzer::Expr>,
1632  std::shared_ptr<Analyzer::Expr>>>& expr_pair_list,
1633  const std::shared_ptr<Analyzer::Expr> else_e_in,
1634  const Executor* executor) {
1635  SQLTypeInfo ti;
1636  bool has_agg = false;
1637  // We need to keep track of whether there was at
1638  // least one none-encoded string literal expression
1639  // type among any of the case sub-expressions separately
1640  // from rest of type determination logic, as it will
1641  // be casted to the output dictionary type if all output
1642  // types are either dictionary encoded or none-encoded
1643  // literals, or kept as none-encoded if all sub-expression
1644  // types are none-encoded (column or literal)
1645  SQLTypeInfo none_encoded_literal_ti;
1646 
1647  for (auto& p : expr_pair_list) {
1648  auto e1 = p.first;
1649  CHECK(e1->get_type_info().is_boolean());
1650  auto e2 = p.second;
1651  if (e2->get_contains_agg()) {
1652  has_agg = true;
1653  }
1654  const auto& e2_ti = e2->get_type_info();
1655  if (e2_ti.is_string() && !e2_ti.is_dict_encoded_string() &&
1656  !std::dynamic_pointer_cast<const Analyzer::ColumnVar>(e2)) {
1657  CHECK(e2_ti.is_none_encoded_string());
1658  none_encoded_literal_ti =
1659  none_encoded_literal_ti.get_type() == kNULLT
1660  ? e2_ti
1661  : common_string_type(none_encoded_literal_ti, e2_ti, executor);
1662  continue;
1663  }
1664  if (ti.get_type() == kNULLT) {
1665  ti = e2_ti;
1666  } else if (e2_ti.get_type() == kNULLT) {
1667  ti.set_notnull(false);
1668  e2->set_type_info(ti);
1669  } else if (ti != e2_ti) {
1670  if (ti.is_string() && e2_ti.is_string()) {
1671  // Executor is needed to determine which dictionary is the largest
1672  // in case of two dictionary types with different encodings
1673  ti = common_string_type(ti, e2_ti, executor);
1674  } else if (ti.is_number() && e2_ti.is_number()) {
1676  } else if (ti.is_boolean() && e2_ti.is_boolean()) {
1678  } else {
1679  throw std::runtime_error(
1680  "Expressions in THEN clause must be of the same or compatible types.");
1681  }
1682  }
1683  }
1684  auto else_e = else_e_in;
1685  const auto& else_ti = else_e->get_type_info();
1686  if (else_e) {
1687  if (else_e->get_contains_agg()) {
1688  has_agg = true;
1689  }
1690  if (else_ti.is_string() && !else_ti.is_dict_encoded_string() &&
1691  !std::dynamic_pointer_cast<const Analyzer::ColumnVar>(else_e)) {
1692  CHECK(else_ti.is_none_encoded_string());
1693  none_encoded_literal_ti =
1694  none_encoded_literal_ti.get_type() == kNULLT
1695  ? else_ti
1696  : common_string_type(none_encoded_literal_ti, else_ti, executor);
1697  } else {
1698  if (ti.get_type() == kNULLT) {
1699  ti = else_ti;
1700  } else if (expr_is_null(else_e.get())) {
1701  ti.set_notnull(false);
1702  else_e->set_type_info(ti);
1703  } else if (ti != else_ti) {
1704  ti.set_notnull(false);
1705  if (ti.is_string() && else_ti.is_string()) {
1706  // Executor is needed to determine which dictionary is the largest
1707  // in case of two dictionary types with different encodings
1708  ti = common_string_type(ti, else_ti, executor);
1709  } else if (ti.is_number() && else_ti.is_number()) {
1710  ti = Analyzer::BinOper::common_numeric_type(ti, else_ti);
1711  } else if (ti.is_boolean() && else_ti.is_boolean()) {
1712  ti = Analyzer::BinOper::common_numeric_type(ti, else_ti);
1713  } else if (get_logical_type_info(ti) != get_logical_type_info(else_ti)) {
1714  throw std::runtime_error(
1715  // types differing by encoding will be resolved at decode
1716  "Expressions in ELSE clause must be of the same or compatible types as "
1717  "those in the THEN clauses.");
1718  }
1719  }
1720  }
1721  }
1722 
1723  if (ti.get_type() == kNULLT && none_encoded_literal_ti.get_type() != kNULLT) {
1724  // If we haven't set a type so far it's because
1725  // every case sub-expression has a none-encoded
1726  // literal output. Make this our output type
1727  ti = none_encoded_literal_ti;
1728  }
1729 
1730  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
1731  cast_expr_pair_list;
1732  for (auto p : expr_pair_list) {
1733  ti.set_notnull(false);
1734  cast_expr_pair_list.emplace_back(p.first, p.second->add_cast(ti));
1735  }
1736  if (else_e != nullptr) {
1737  else_e = else_e->add_cast(ti);
1738  } else {
1739  Datum d;
1740  // always create an else expr so that executor doesn't need to worry about it
1741  ti.set_notnull(false);
1742  else_e = makeExpr<Analyzer::Constant>(ti, true, d);
1743  }
1744  if (ti.get_type() == kNULLT) {
1745  throw std::runtime_error(
1746  "Cannot deduce the type for case expressions, all branches null");
1747  }
1748 
1749  auto case_expr = makeExpr<Analyzer::CaseExpr>(ti, has_agg, cast_expr_pair_list, else_e);
1750  return case_expr;
1751 }
1752 
1753 std::string CaseExpr::to_string() const {
1754  std::string str("CASE ");
1755  for (auto& p : when_then_list_) {
1756  str += "WHEN " + p->get_expr1()->to_string() + " THEN " +
1757  p->get_expr2()->to_string() + " ";
1758  }
1759  if (else_expr_ != nullptr) {
1760  str += "ELSE " + else_expr_->to_string();
1761  }
1762  str += " END";
1763  return str;
1764 }
1765 
1766 void UnionQuery::analyze(const Catalog_Namespace::Catalog& catalog,
1767  Analyzer::Query& query) const {
1768  left_->analyze(catalog, query);
1769  Analyzer::Query* right_query = new Analyzer::Query();
1770  right_->analyze(catalog, *right_query);
1771  query.set_next_query(right_query);
1772  query.set_is_unionall(is_unionall_);
1773 }
1774 
1775 void QuerySpec::analyze_having_clause(const Catalog_Namespace::Catalog& catalog,
1776  Analyzer::Query& query) const {
1777  std::shared_ptr<Analyzer::Expr> p;
1778  if (having_clause_ != nullptr) {
1779  p = having_clause_->analyze(catalog, query, Expr::TlistRefType::TLIST_COPY);
1780  if (p->get_type_info().get_type() != kBOOLEAN) {
1781  throw std::runtime_error("Only boolean expressions can be in HAVING clause.");
1782  }
1783  p->check_group_by(query.get_group_by());
1784  }
1785  query.set_having_predicate(p);
1786 }
1787 
1788 void QuerySpec::analyze_group_by(const Catalog_Namespace::Catalog& catalog,
1789  Analyzer::Query& query) const {
1790  std::list<std::shared_ptr<Analyzer::Expr>> groupby;
1791  if (!groupby_clause_.empty()) {
1792  int gexpr_no = 1;
1793  std::shared_ptr<Analyzer::Expr> gexpr;
1794  const std::vector<std::shared_ptr<Analyzer::TargetEntry>>& tlist =
1795  query.get_targetlist();
1796  for (auto& c : groupby_clause_) {
1797  // special-case ordinal numbers in GROUP BY
1798  if (dynamic_cast<Literal*>(c.get())) {
1799  IntLiteral* i = dynamic_cast<IntLiteral*>(c.get());
1800  if (!i) {
1801  throw std::runtime_error("Invalid literal in GROUP BY clause.");
1802  }
1803  int varno = (int)i->get_intval();
1804  if (varno <= 0 || varno > static_cast<int>(tlist.size())) {
1805  throw std::runtime_error("Invalid ordinal number in GROUP BY clause.");
1806  }
1807  if (tlist[varno - 1]->get_expr()->get_contains_agg()) {
1808  throw std::runtime_error(
1809  "Ordinal number in GROUP BY cannot reference an expression containing "
1810  "aggregate "
1811  "functions.");
1812  }
1813  gexpr = makeExpr<Analyzer::Var>(
1814  tlist[varno - 1]->get_expr()->get_type_info(), Analyzer::Var::kOUTPUT, varno);
1815  } else {
1816  gexpr = c->analyze(catalog, query, Expr::TlistRefType::TLIST_REF);
1817  }
1818  const SQLTypeInfo gti = gexpr->get_type_info();
1819  bool set_new_type = false;
1820  SQLTypeInfo ti(gti);
1821  if (gti.is_string() && gti.get_compression() == kENCODING_NONE) {
1822  set_new_type = true;
1825  ti.set_fixed_size();
1826  }
1827  std::shared_ptr<Analyzer::Var> v;
1828  if (std::dynamic_pointer_cast<Analyzer::Var>(gexpr)) {
1829  v = std::static_pointer_cast<Analyzer::Var>(gexpr);
1830  int n = v->get_varno();
1831  gexpr = tlist[n - 1]->get_own_expr();
1832  auto cv = std::dynamic_pointer_cast<Analyzer::ColumnVar>(gexpr);
1833  if (cv != nullptr) {
1834  // inherit all ColumnVar info for lineage.
1835  *std::static_pointer_cast<Analyzer::ColumnVar>(v) = *cv;
1836  }
1837  v->set_which_row(Analyzer::Var::kGROUPBY);
1838  v->set_varno(gexpr_no);
1839  tlist[n - 1]->set_expr(v);
1840  }
1841  if (set_new_type) {
1842  auto new_e = gexpr->add_cast(ti);
1843  groupby.push_back(new_e);
1844  if (v != nullptr) {
1845  v->set_type_info(new_e->get_type_info());
1846  }
1847  } else {
1848  groupby.push_back(gexpr);
1849  }
1850  gexpr_no++;
1851  }
1852  }
1853  if (query.get_num_aggs() > 0 || !groupby.empty()) {
1854  for (auto t : query.get_targetlist()) {
1855  auto e = t->get_expr();
1856  e->check_group_by(groupby);
1857  }
1858  }
1859  query.set_group_by(groupby);
1860 }
1861 
1862 void QuerySpec::analyze_where_clause(const Catalog_Namespace::Catalog& catalog,
1863  Analyzer::Query& query) const {
1864  if (where_clause_ == nullptr) {
1865  query.set_where_predicate(nullptr);
1866  return;
1867  }
1868  auto p = where_clause_->analyze(catalog, query, Expr::TlistRefType::TLIST_COPY);
1869  if (p->get_type_info().get_type() != kBOOLEAN) {
1870  throw std::runtime_error("Only boolean expressions can be in WHERE clause.");
1871  }
1872  query.set_where_predicate(p);
1873 }
1874 
1875 void QuerySpec::analyze_select_clause(const Catalog_Namespace::Catalog& catalog,
1876  Analyzer::Query& query) const {
1877  std::vector<std::shared_ptr<Analyzer::TargetEntry>>& tlist =
1878  query.get_targetlist_nonconst();
1879  if (select_clause_.empty()) {
1880  // this means SELECT *
1881  int rte_idx = 0;
1882  for (auto rte : query.get_rangetable()) {
1883  rte->expand_star_in_targetlist(catalog, tlist, rte_idx++);
1884  }
1885  } else {
1886  for (auto& p : select_clause_) {
1887  const Parser::Expr* select_expr = p->get_select_expr();
1888  // look for the case of range_var.*
1889  if (typeid(*select_expr) == typeid(ColumnRef) &&
1890  dynamic_cast<const ColumnRef*>(select_expr)->get_column() == nullptr) {
1891  const std::string* range_var_name =
1892  dynamic_cast<const ColumnRef*>(select_expr)->get_table();
1893  int rte_idx = query.get_rte_idx(*range_var_name);
1894  if (rte_idx < 0) {
1895  throw std::runtime_error("invalid range variable name: " + *range_var_name);
1896  }
1897  Analyzer::RangeTableEntry* rte = query.get_rte(rte_idx);
1898  rte->expand_star_in_targetlist(catalog, tlist, rte_idx);
1899  } else {
1900  auto e = select_expr->analyze(catalog, query);
1901  std::string resname;
1902 
1903  if (p->get_alias() != nullptr) {
1904  resname = *p->get_alias();
1905  } else if (std::dynamic_pointer_cast<Analyzer::ColumnVar>(e) &&
1906  !std::dynamic_pointer_cast<Analyzer::Var>(e)) {
1907  auto colvar = std::static_pointer_cast<Analyzer::ColumnVar>(e);
1908  const ColumnDescriptor* col_desc = catalog.getMetadataForColumn(
1909  colvar->get_table_id(), colvar->get_column_id());
1910  resname = col_desc->columnName;
1911  }
1912  if (e->get_type_info().get_type() == kNULLT) {
1913  throw std::runtime_error(
1914  "Untyped NULL in SELECT clause. Use CAST to specify a type.");
1915  }
1916  auto o = std::static_pointer_cast<Analyzer::UOper>(e);
1917  bool unnest = (o != nullptr && o->get_optype() == kUNNEST);
1918  auto tle = std::make_shared<Analyzer::TargetEntry>(resname, e, unnest);
1919  tlist.push_back(tle);
1920  }
1921  }
1922  }
1923 }
1924 
1925 void QuerySpec::analyze_from_clause(const Catalog_Namespace::Catalog& catalog,
1926  Analyzer::Query& query) const {
1928  for (auto& p : from_clause_) {
1929  const TableDescriptor* table_desc;
1930  table_desc = catalog.getMetadataForTable(*p->get_table_name());
1931  if (table_desc == nullptr) {
1932  throw std::runtime_error("Table " + *p->get_table_name() + " does not exist.");
1933  }
1934  std::string range_var;
1935  if (p->get_range_var() == nullptr) {
1936  range_var = *p->get_table_name();
1937  } else {
1938  range_var = *p->get_range_var();
1939  }
1940  rte = new Analyzer::RangeTableEntry(range_var, table_desc, nullptr);
1941  query.add_rte(rte);
1942  }
1943 }
1944 
1945 void QuerySpec::analyze(const Catalog_Namespace::Catalog& catalog,
1946  Analyzer::Query& query) const {
1947  query.set_is_distinct(is_distinct_);
1948  analyze_from_clause(catalog, query);
1949  analyze_select_clause(catalog, query);
1950  analyze_where_clause(catalog, query);
1951  analyze_group_by(catalog, query);
1952  analyze_having_clause(catalog, query);
1953 }
1954 
1955 namespace {
1956 
1957 // clean known escape'd chars without having to do a full json parse
1958 std::string unescape(std::string s) {
1959  boost::replace_all(s, "\\\\t", "\t");
1960  boost::replace_all(s, "\\t", "\t");
1961  boost::replace_all(s, "\\\\n", "\n");
1962  boost::replace_all(s, "\\n", "\n");
1963 
1964  // handle numerics
1965  std::smatch m;
1966 
1967  // "\x00"
1968  std::regex e1("(\\\\x[0-9A-Fa-f][0-9A-Fa-f])");
1969  while (std::regex_search(s, m, e1)) {
1970  std::string original(m[0].first, m[0].second);
1971  std::string replacement;
1972  long val = strtol(original.substr(2, 2).c_str(), NULL, 16);
1973  replacement.push_back(val);
1974  boost::replace_all(s, original, replacement);
1975  }
1976 
1977  // "\u0000"
1978  std::regex e2("(\\\\u[0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f])");
1979  while (std::regex_search(s, m, e2)) {
1980  std::string original(m[0].first, m[0].second);
1981  std::string replacement;
1982  long val = strtol(original.substr(2, 4).c_str(), NULL, 16);
1983  replacement.push_back(val);
1984  boost::replace_all(s, original, replacement);
1985  }
1986 
1987  return s;
1988 }
1989 
1990 void parse_options(const rapidjson::Value& payload,
1991  std::list<std::unique_ptr<NameValueAssign>>& nameValueList,
1992  bool stringToNull = false,
1993  bool stringToInteger = false) {
1994  if (payload.HasMember("options") && payload["options"].IsObject()) {
1995  const auto& options = payload["options"];
1996  for (auto itr = options.MemberBegin(); itr != options.MemberEnd(); ++itr) {
1997  auto option_name = std::make_unique<std::string>(itr->name.GetString());
1998  std::unique_ptr<Literal> literal_value;
1999  if (itr->value.IsString()) {
2000  std::string str = itr->value.GetString();
2001  if (stringToNull && str == "") {
2002  literal_value = std::make_unique<NullLiteral>();
2003  } else if (stringToInteger && std::all_of(str.begin(), str.end(), ::isdigit)) {
2004  int iVal = std::stoi(str);
2005  literal_value = std::make_unique<IntLiteral>(iVal);
2006  } else {
2007  // Rapidjson will deliberately provide escape'd strings when accessed
2008  // ... but the literal should have a copy of the raw unescaped string
2009  auto unique_literal_string = std::make_unique<std::string>(unescape(str));
2010  literal_value =
2011  std::make_unique<StringLiteral>(unique_literal_string.release());
2012  }
2013  } else if (itr->value.IsInt() || itr->value.IsInt64()) {
2014  literal_value = std::make_unique<IntLiteral>(json_i64(itr->value));
2015  } else if (itr->value.IsNull()) {
2016  literal_value = std::make_unique<NullLiteral>();
2017  } else {
2018  throw std::runtime_error("Unable to handle literal for " + *option_name);
2019  }
2020  CHECK(literal_value);
2021 
2022  nameValueList.emplace_back(std::make_unique<NameValueAssign>(
2023  option_name.release(), literal_value.release()));
2024  }
2025  }
2026 }
2027 } // namespace
2028 
2029 void SelectStmt::analyze(const Catalog_Namespace::Catalog& catalog,
2030  Analyzer::Query& query) const {
2031  query.set_stmt_type(kSELECT);
2032  query.set_limit(limit_);
2033  if (offset_ < 0) {
2034  throw std::runtime_error("OFFSET cannot be negative.");
2035  }
2036  query.set_offset(offset_);
2037  query_expr_->analyze(catalog, query);
2038  if (orderby_clause_.empty() && !query.get_is_distinct()) {
2039  query.set_order_by(nullptr);
2040  return;
2041  }
2042  const std::vector<std::shared_ptr<Analyzer::TargetEntry>>& tlist =
2043  query.get_targetlist();
2044  std::list<Analyzer::OrderEntry>* order_by = new std::list<Analyzer::OrderEntry>();
2045  if (!orderby_clause_.empty()) {
2046  for (auto& p : orderby_clause_) {
2047  int tle_no = p->get_colno();
2048  if (tle_no == 0) {
2049  // use column name
2050  // search through targetlist for matching name
2051  const std::string* name = p->get_column()->get_column();
2052  tle_no = 1;
2053  bool found = false;
2054  for (auto tle : tlist) {
2055  if (tle->get_resname() == *name) {
2056  found = true;
2057  break;
2058  }
2059  tle_no++;
2060  }
2061  if (!found) {
2062  throw std::runtime_error("invalid name in order by: " + *name);
2063  }
2064  }
2065  order_by->push_back(
2066  Analyzer::OrderEntry(tle_no, p->get_is_desc(), p->get_nulls_first()));
2067  }
2068  }
2069  if (query.get_is_distinct()) {
2070  // extend order_by to include all targetlist entries.
2071  for (int i = 1; i <= static_cast<int>(tlist.size()); i++) {
2072  bool in_orderby = false;
2073  std::for_each(order_by->begin(),
2074  order_by->end(),
2075  [&in_orderby, i](const Analyzer::OrderEntry& oe) {
2076  in_orderby = in_orderby || (i == oe.tle_no);
2077  });
2078  if (!in_orderby) {
2079  order_by->push_back(Analyzer::OrderEntry(i, false, false));
2080  }
2081  }
2082  }
2083  query.set_order_by(order_by);
2084 }
2085 
2086 std::string SelectEntry::to_string() const {
2087  std::string str = select_expr_->to_string();
2088  if (alias_ != nullptr) {
2089  str += " AS " + *alias_;
2090  }
2091  return str;
2092 }
2093 
2094 std::string TableRef::to_string() const {
2095  std::string str = *table_name_;
2096  if (range_var_ != nullptr) {
2097  str += " " + *range_var_;
2098  }
2099  return str;
2100 }
2101 
2102 std::string ColumnRef::to_string() const {
2103  std::string str;
2104  if (table_ == nullptr) {
2105  str = *column_;
2106  } else if (column_ == nullptr) {
2107  str = *table_ + ".*";
2108  } else {
2109  str = *table_ + "." + *column_;
2110  }
2111  return str;
2112 }
2113 
2114 std::string OperExpr::to_string() const {
2115  std::string op_str[] = {
2116  "=", "===", "<>", "<", ">", "<=", ">=", " AND ", " OR ", "NOT", "-", "+", "*", "/"};
2117  std::string str;
2118  if (optype_ == kUMINUS) {
2119  str = "-(" + left_->to_string() + ")";
2120  } else if (optype_ == kNOT) {
2121  str = "NOT (" + left_->to_string() + ")";
2122  } else if (optype_ == kARRAY_AT) {
2123  str = left_->to_string() + "[" + right_->to_string() + "]";
2124  } else if (optype_ == kUNNEST) {
2125  str = "UNNEST(" + left_->to_string() + ")";
2126  } else if (optype_ == kIN) {
2127  str = "(" + left_->to_string() + " IN " + right_->to_string() + ")";
2128  } else {
2129  str = "(" + left_->to_string() + op_str[optype_] + right_->to_string() + ")";
2130  }
2131  return str;
2132 }
2133 
2134 std::string InExpr::to_string() const {
2135  std::string str = arg_->to_string();
2136  if (is_not_) {
2137  str += " NOT IN ";
2138  } else {
2139  str += " IN ";
2140  }
2141  return str;
2142 }
2143 
2144 std::string ExistsExpr::to_string() const {
2145  return "EXISTS (" + query_->to_string() + ")";
2146 }
2147 
2148 std::string SubqueryExpr::to_string() const {
2149  std::string str;
2150  str = "(";
2151  str += query_->to_string();
2152  str += ")";
2153  return str;
2154 }
2155 
2156 std::string IsNullExpr::to_string() const {
2157  std::string str = arg_->to_string();
2158  if (is_not_) {
2159  str += " IS NOT NULL";
2160  } else {
2161  str += " IS NULL";
2162  }
2163  return str;
2164 }
2165 
2166 std::string InSubquery::to_string() const {
2167  std::string str = InExpr::to_string();
2168  str += subquery_->to_string();
2169  return str;
2170 }
2171 
2172 std::string InValues::to_string() const {
2173  std::string str = InExpr::to_string() + "(";
2174  bool notfirst = false;
2175  for (auto& p : value_list_) {
2176  if (notfirst) {
2177  str += ", ";
2178  } else {
2179  notfirst = true;
2180  }
2181  str += p->to_string();
2182  }
2183  str += ")";
2184  return str;
2185 }
2186 
2187 std::string BetweenExpr::to_string() const {
2188  std::string str = arg_->to_string();
2189  if (is_not_) {
2190  str += " NOT BETWEEN ";
2191  } else {
2192  str += " BETWEEN ";
2193  }
2194  str += lower_->to_string() + " AND " + upper_->to_string();
2195  return str;
2196 }
2197 
2198 std::string CharLengthExpr::to_string() const {
2199  std::string str;
2200  if (calc_encoded_length_) {
2201  str = "CHAR_LENGTH (" + arg_->to_string() + ")";
2202  } else {
2203  str = "LENGTH (" + arg_->to_string() + ")";
2204  }
2205  return str;
2206 }
2207 
2208 std::string CardinalityExpr::to_string() const {
2209  std::string str = "CARDINALITY(" + arg_->to_string() + ")";
2210  return str;
2211 }
2212 
2213 std::string LikeExpr::to_string() const {
2214  std::string str = arg_->to_string();
2215  if (is_not_) {
2216  str += " NOT LIKE ";
2217  } else {
2218  str += " LIKE ";
2219  }
2220  str += like_string_->to_string();
2221  if (escape_string_ != nullptr) {
2222  str += " ESCAPE " + escape_string_->to_string();
2223  }
2224  return str;
2225 }
2226 
2227 std::string RegexpExpr::to_string() const {
2228  std::string str = arg_->to_string();
2229  if (is_not_) {
2230  str += " NOT REGEXP ";
2231  } else {
2232  str += " REGEXP ";
2233  }
2234  str += pattern_string_->to_string();
2235  if (escape_string_ != nullptr) {
2236  str += " ESCAPE " + escape_string_->to_string();
2237  }
2238  return str;
2239 }
2240 
2241 std::string WidthBucketExpr::to_string() const {
2242  std::string str = " WIDTH_BUCKET ";
2243  str += target_value_->to_string();
2244  str += " ";
2245  str += lower_bound_->to_string();
2246  str += " ";
2247  str += upper_bound_->to_string();
2248  str += " ";
2249  str += partition_count_->to_string();
2250  str += " ";
2251  return str;
2252 }
2253 
2254 std::string LikelihoodExpr::to_string() const {
2255  std::string str = " LIKELIHOOD ";
2256  str += arg_->to_string();
2257  str += " ";
2258  str += boost::lexical_cast<std::string>(is_not_ ? 1.0 - likelihood_ : likelihood_);
2259  return str;
2260 }
2261 
2262 std::string FunctionRef::to_string() const {
2263  std::string str = *name_ + "(";
2264  if (distinct_) {
2265  str += "DISTINCT ";
2266  }
2267  if (arg_ == nullptr) {
2268  str += "*)";
2269  } else {
2270  str += arg_->to_string() + ")";
2271  }
2272  return str;
2273 }
2274 
2275 std::string QuerySpec::to_string() const {
2276  std::string query_str = "SELECT ";
2277  if (is_distinct_) {
2278  query_str += "DISTINCT ";
2279  }
2280  if (select_clause_.empty()) {
2281  query_str += "* ";
2282  } else {
2283  bool notfirst = false;
2284  for (auto& p : select_clause_) {
2285  if (notfirst) {
2286  query_str += ", ";
2287  } else {
2288  notfirst = true;
2289  }
2290  query_str += p->to_string();
2291  }
2292  }
2293  query_str += " FROM ";
2294  bool notfirst = false;
2295  for (auto& p : from_clause_) {
2296  if (notfirst) {
2297  query_str += ", ";
2298  } else {
2299  notfirst = true;
2300  }
2301  query_str += p->to_string();
2302  }
2303  if (where_clause_) {
2304  query_str += " WHERE " + where_clause_->to_string();
2305  }
2306  if (!groupby_clause_.empty()) {
2307  query_str += " GROUP BY ";
2308  bool notfirst = false;
2309  for (auto& p : groupby_clause_) {
2310  if (notfirst) {
2311  query_str += ", ";
2312  } else {
2313  notfirst = true;
2314  }
2315  query_str += p->to_string();
2316  }
2317  }
2318  if (having_clause_) {
2319  query_str += " HAVING " + having_clause_->to_string();
2320  }
2321  query_str += ";";
2322  return query_str;
2323 }
2324 
2325 void InsertStmt::analyze(const Catalog_Namespace::Catalog& catalog,
2326  Analyzer::Query& query) const {
2327  query.set_stmt_type(kINSERT);
2328  const TableDescriptor* td = catalog.getMetadataForTable(*table_);
2329  if (td == nullptr) {
2330  throw std::runtime_error("Table " + *table_ + " does not exist.");
2331  }
2332  if (td->isView) {
2333  throw std::runtime_error("Insert to views is not supported yet.");
2334  }
2336  query.set_result_table_id(td->tableId);
2337  std::list<int> result_col_list;
2338  if (column_list_.empty()) {
2339  const std::list<const ColumnDescriptor*> all_cols =
2340  catalog.getAllColumnMetadataForTable(td->tableId, false, false, true);
2341  for (auto cd : all_cols) {
2342  result_col_list.push_back(cd->columnId);
2343  }
2344  } else {
2345  for (auto& c : column_list_) {
2346  const ColumnDescriptor* cd = catalog.getMetadataForColumn(td->tableId, *c);
2347  if (cd == nullptr) {
2348  throw std::runtime_error("Column " + *c + " does not exist.");
2349  }
2350  result_col_list.push_back(cd->columnId);
2351  const auto& col_ti = cd->columnType;
2352  if (col_ti.get_physical_cols() > 0) {
2353  CHECK(cd->columnType.is_geometry());
2354  for (auto i = 1; i <= col_ti.get_physical_cols(); i++) {
2355  const ColumnDescriptor* pcd =
2356  catalog.getMetadataForColumn(td->tableId, cd->columnId + i);
2357  if (pcd == nullptr) {
2358  throw std::runtime_error("Column " + *c + "'s metadata is incomplete.");
2359  }
2360  result_col_list.push_back(pcd->columnId);
2361  }
2362  }
2363  }
2364  }
2365  query.set_result_col_list(result_col_list);
2366 }
2367 
2368 namespace {
2369 Literal* parse_insert_literal(const rapidjson::Value& literal) {
2370  CHECK(literal.IsObject());
2371  CHECK(literal.HasMember("literal"));
2372  CHECK(literal.HasMember("type"));
2373  auto type = json_str(literal["type"]);
2374  if (type == "NULL") {
2375  return new NullLiteral();
2376  } else if (type == "CHAR" || type == "BOOLEAN") {
2377  auto* val = new std::string(json_str(literal["literal"]));
2378  return new StringLiteral(val);
2379  } else if (type == "DECIMAL") {
2380  CHECK(literal.HasMember("scale"));
2381  CHECK(literal.HasMember("precision"));
2382  auto scale = json_i64(literal["scale"]);
2383  auto precision = json_i64(literal["precision"]);
2384  if (scale == 0) {
2385  auto int_val = std::stol(json_str(literal["literal"]));
2386  return new IntLiteral(int_val);
2387  } else if (precision > sql_constants::kMaxNumericPrecision) {
2388  auto dbl_val = std::stod(json_str(literal["literal"]));
2389  return new DoubleLiteral(dbl_val);
2390  } else {
2391  auto* val = new std::string(json_str(literal["literal"]));
2392  return new FixedPtLiteral(val);
2393  }
2394  } else if (type == "DOUBLE") {
2395  auto dbl_val = std::stod(json_str(literal["literal"]));
2396  return new DoubleLiteral(dbl_val);
2397  } else {
2398  CHECK(false) << "Unexpected calcite data type: " << type;
2399  }
2400  return nullptr;
2401 }
2402 
2403 ArrayLiteral* parse_insert_array_literal(const rapidjson::Value& array) {
2404  CHECK(array.IsArray());
2405  auto json_elements = array.GetArray();
2406  auto* elements = new std::list<Expr*>();
2407  for (const auto& e : json_elements) {
2408  elements->push_back(parse_insert_literal(e));
2409  }
2410  return new ArrayLiteral(elements);
2411 }
2412 } // namespace
2413 
2414 InsertValuesStmt::InsertValuesStmt(const rapidjson::Value& payload)
2415  : InsertStmt(nullptr, nullptr) {
2416  CHECK(payload.HasMember("name"));
2417  table_ = std::make_unique<std::string>(json_str(payload["name"]));
2418 
2419  if (payload.HasMember("columns")) {
2420  CHECK(payload["columns"].IsArray());
2421  for (auto& column : payload["columns"].GetArray()) {
2422  std::string s = json_str(column);
2423  column_list_.emplace_back(std::make_unique<std::string>(s));
2424  }
2425  }
2426 
2427  CHECK(payload.HasMember("values") && payload["values"].IsArray());
2428  auto tuples = payload["values"].GetArray();
2429  if (tuples.Empty()) {
2430  throw std::runtime_error("Values statement cannot be empty");
2431  }
2432  values_lists_.reserve(tuples.Size());
2433  for (const auto& json_tuple : tuples) {
2434  auto values_list = std::make_unique<ValuesList>();
2435  CHECK(json_tuple.IsArray());
2436  auto tuple = json_tuple.GetArray();
2437  for (const auto& value : tuple) {
2438  CHECK(value.IsObject());
2439  if (value.HasMember("array")) {
2440  values_list->push_back(parse_insert_array_literal(value["array"]));
2441  } else {
2442  values_list->push_back(parse_insert_literal(value));
2443  }
2444  }
2445  values_lists_.push_back(std::move(values_list));
2446  }
2447 }
2448 
2450  Analyzer::Query& query) const {
2451  InsertStmt::analyze(catalog, query);
2452  size_t list_size = values_lists_[0]->get_value_list().size();
2453  if (!column_list_.empty()) {
2454  if (list_size != column_list_.size()) {
2455  throw std::runtime_error(
2456  "Numbers of columns and values don't match for the "
2457  "insert.");
2458  }
2459  } else {
2460  const auto tableId = query.get_result_table_id();
2461  const std::list<const ColumnDescriptor*> non_phys_cols =
2462  catalog.getAllColumnMetadataForTable(tableId, false, false, false);
2463  if (non_phys_cols.size() != list_size) {
2464  throw std::runtime_error(
2465  "Number of columns in table does not match the list of values given in the "
2466  "insert.");
2467  }
2468  }
2469  std::vector<const ColumnDescriptor*> cds;
2470  cds.reserve(query.get_result_col_list().size());
2471  for (auto id : query.get_result_col_list()) {
2472  const auto* cd = catalog.getMetadataForColumn(query.get_result_table_id(), id);
2473  CHECK(cd);
2474  cds.push_back(cd);
2475  }
2476  auto& query_values_lists = query.get_values_lists();
2477  query_values_lists.resize(values_lists_.size());
2478  for (size_t i = 0; i < values_lists_.size(); ++i) {
2479  const auto& values_list = values_lists_[i]->get_value_list();
2480  if (values_list.size() != list_size) {
2481  throw std::runtime_error(
2482  "Insert values lists should be of the same size. Expected: " +
2483  std::to_string(list_size) + ", Got: " + std::to_string(values_list.size()));
2484  }
2485  auto& query_values_list = query_values_lists[i];
2486  size_t cds_id = 0;
2487  for (auto& v : values_list) {
2488  auto e = v->analyze(catalog, query);
2489  const auto* cd = cds[cds_id];
2490  const auto& col_ti = cd->columnType;
2491  if (col_ti.get_notnull()) {
2492  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(e);
2493  if (c != nullptr && c->get_is_null()) {
2494  throw std::runtime_error("Cannot insert NULL into column " + cd->columnName);
2495  }
2496  }
2497  e = e->add_cast(col_ti);
2498  query_values_list.emplace_back(new Analyzer::TargetEntry("", e, false));
2499  ++cds_id;
2500 
2501  if (col_ti.get_physical_cols() > 0) {
2502  CHECK(cd->columnType.is_geometry());
2503  auto c = dynamic_cast<const Analyzer::Constant*>(e.get());
2504  if (!c) {
2505  auto uoper = std::dynamic_pointer_cast<Analyzer::UOper>(e);
2506  if (uoper && uoper->get_optype() == kCAST) {
2507  c = dynamic_cast<const Analyzer::Constant*>(uoper->get_operand());
2508  }
2509  }
2510  bool is_null = false;
2511  std::string* geo_string{nullptr};
2512  if (c) {
2513  is_null = c->get_is_null();
2514  if (!is_null) {
2515  geo_string = c->get_constval().stringval;
2516  }
2517  }
2518  if (!is_null && !geo_string) {
2519  throw std::runtime_error("Expecting a WKT or WKB hex string for column " +
2520  cd->columnName);
2521  }
2522  std::vector<double> coords;
2523  std::vector<double> bounds;
2524  std::vector<int> ring_sizes;
2525  std::vector<int> poly_rings;
2526  int render_group =
2527  0; // @TODO simon.eves where to get render_group from in this context?!
2528  SQLTypeInfo import_ti{cd->columnType};
2529  if (!is_null) {
2531  *geo_string, import_ti, coords, bounds, ring_sizes, poly_rings)) {
2532  throw std::runtime_error("Cannot read geometry to insert into column " +
2533  cd->columnName);
2534  }
2535  if (coords.empty()) {
2536  // Importing from geo_string WKT resulted in empty coords: dealing with a NULL
2537  is_null = true;
2538  }
2539  if (cd->columnType.get_type() != import_ti.get_type()) {
2540  // allow POLYGON to be inserted into MULTIPOLYGON column
2541  if (!(import_ti.get_type() == SQLTypes::kPOLYGON &&
2542  cd->columnType.get_type() == SQLTypes::kMULTIPOLYGON)) {
2543  throw std::runtime_error(
2544  "Imported geometry doesn't match the type of column " + cd->columnName);
2545  }
2546  }
2547  } else {
2548  // Special case for NULL POINT, push NULL representation to coords
2549  if (cd->columnType.get_type() == kPOINT) {
2550  if (!coords.empty()) {
2551  throw std::runtime_error(
2552  "NULL POINT with unexpected coordinates in column " + cd->columnName);
2553  }
2554  coords.push_back(NULL_ARRAY_DOUBLE);
2555  coords.push_back(NULL_DOUBLE);
2556  }
2557  }
2558 
2559  // TODO: check if import SRID matches columns SRID, may need to transform before
2560  // inserting
2561 
2562  const auto* cd_coords = cds[cds_id];
2563  CHECK_EQ(cd_coords->columnType.get_type(), kARRAY);
2564  CHECK_EQ(cd_coords->columnType.get_subtype(), kTINYINT);
2565  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
2566  if (!is_null || cd->columnType.get_type() == kPOINT) {
2567  auto compressed_coords = Geospatial::compress_coords(coords, col_ti);
2568  for (auto cc : compressed_coords) {
2569  Datum d;
2570  d.tinyintval = cc;
2571  auto e = makeExpr<Analyzer::Constant>(kTINYINT, false, d);
2572  value_exprs.push_back(e);
2573  }
2574  }
2575  query_values_list.emplace_back(new Analyzer::TargetEntry(
2576  "",
2577  makeExpr<Analyzer::Constant>(cd_coords->columnType, is_null, value_exprs),
2578  false));
2579  ++cds_id;
2580 
2581  if (cd->columnType.get_type() == kMULTILINESTRING ||
2582  cd->columnType.get_type() == kPOLYGON ||
2583  cd->columnType.get_type() == kMULTIPOLYGON) {
2584  // Put [linest]ring sizes array into separate physical column
2585  const auto* cd_ring_sizes = cds[cds_id];
2586  CHECK(cd_ring_sizes);
2587  CHECK_EQ(cd_ring_sizes->columnType.get_type(), kARRAY);
2588  CHECK_EQ(cd_ring_sizes->columnType.get_subtype(), kINT);
2589  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
2590  if (!is_null) {
2591  for (auto c : ring_sizes) {
2592  Datum d;
2593  d.intval = c;
2594  auto e = makeExpr<Analyzer::Constant>(kINT, false, d);
2595  value_exprs.push_back(e);
2596  }
2597  }
2598  query_values_list.emplace_back(new Analyzer::TargetEntry(
2599  "",
2600  makeExpr<Analyzer::Constant>(
2601  cd_ring_sizes->columnType, is_null, value_exprs),
2602  false));
2603  ++cds_id;
2604 
2605  if (cd->columnType.get_type() == kMULTIPOLYGON) {
2606  // Put poly_rings array into separate physical column
2607  const auto* cd_poly_rings = cds[cds_id];
2608  CHECK(cd_poly_rings);
2609  CHECK_EQ(cd_poly_rings->columnType.get_type(), kARRAY);
2610  CHECK_EQ(cd_poly_rings->columnType.get_subtype(), kINT);
2611  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
2612  if (!is_null) {
2613  for (auto c : poly_rings) {
2614  Datum d;
2615  d.intval = c;
2616  auto e = makeExpr<Analyzer::Constant>(kINT, false, d);
2617  value_exprs.push_back(e);
2618  }
2619  }
2620  query_values_list.emplace_back(new Analyzer::TargetEntry(
2621  "",
2622  makeExpr<Analyzer::Constant>(
2623  cd_poly_rings->columnType, is_null, value_exprs),
2624  false));
2625  ++cds_id;
2626  }
2627  }
2628 
2629  if (cd->columnType.get_type() == kMULTIPOINT ||
2630  cd->columnType.get_type() == kLINESTRING ||
2631  cd->columnType.get_type() == kMULTILINESTRING ||
2632  cd->columnType.get_type() == kPOLYGON ||
2633  cd->columnType.get_type() == kMULTIPOLYGON) {
2634  const auto* cd_bounds = cds[cds_id];
2635  CHECK(cd_bounds);
2636  CHECK_EQ(cd_bounds->columnType.get_type(), kARRAY);
2637  CHECK_EQ(cd_bounds->columnType.get_subtype(), kDOUBLE);
2638  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
2639  if (!is_null) {
2640  for (auto b : bounds) {
2641  Datum d;
2642  d.doubleval = b;
2643  auto e = makeExpr<Analyzer::Constant>(kDOUBLE, false, d);
2644  value_exprs.push_back(e);
2645  }
2646  }
2647  query_values_list.emplace_back(new Analyzer::TargetEntry(
2648  "",
2649  makeExpr<Analyzer::Constant>(cd_bounds->columnType, is_null, value_exprs),
2650  false));
2651  ++cds_id;
2652  }
2653 
2654  if (cd->columnType.get_type() == kPOLYGON ||
2655  cd->columnType.get_type() == kMULTIPOLYGON) {
2656  // Put render group into separate physical column
2657  const auto* cd_render_group = cds[cds_id];
2658  CHECK(cd_render_group);
2659  CHECK_EQ(cd_render_group->columnType.get_type(), kINT);
2660  Datum d;
2661  d.intval = render_group;
2662  query_values_list.emplace_back(new Analyzer::TargetEntry(
2663  "",
2664  makeExpr<Analyzer::Constant>(cd_render_group->columnType, is_null, d),
2665  false));
2666  ++cds_id;
2667  }
2668  }
2669  }
2670  }
2671 }
2672 
2674  bool read_only_mode) {
2675  if (read_only_mode) {
2676  throw std::runtime_error("INSERT values invalid in read only mode.");
2677  }
2678  auto execute_read_lock =
2682  auto& catalog = session.getCatalog();
2683  const auto td_with_lock =
2685  catalog, *table_);
2688  *table_)) {
2689  throw std::runtime_error("User has no insert privileges on " + *table_ + ".");
2690  }
2691  Analyzer::Query query;
2692  analyze(catalog, query);
2693 
2694  // Take an insert data write lock, which prevents concurrent inserts.
2695  const auto insert_data_lock =
2697 
2698  // NOTE(max): we do the same checks as below just a few calls earlier in analyze().
2699  // Do we keep those intentionally to make sure nothing changed in between w/o
2700  // catalog locks or is it just a duplicate work?
2701  auto td = td_with_lock();
2702  CHECK(td);
2703  if (td->isView) {
2704  throw std::runtime_error("Singleton inserts on views is not supported.");
2705  }
2707 
2709  RelAlgExecutor ra_executor(executor.get(), catalog);
2710 
2711  if (!leafs_connector_) {
2712  leafs_connector_ = std::make_unique<Fragmenter_Namespace::LocalInsertConnector>();
2713  }
2715  try {
2716  ra_executor.executeSimpleInsert(query, insert_data_loader, session);
2717  } catch (...) {
2718  try {
2719  leafs_connector_->rollback(session, td->tableId);
2720  } catch (std::exception& e) {
2721  LOG(ERROR) << "An error occurred during insert rollback attempt. Table id: "
2722  << td->tableId << ", Error: " << e.what();
2723  }
2724  throw;
2725  }
2726  if (!td->isTemporaryTable()) {
2727  leafs_connector_->checkpoint(session, td->tableId);
2728  }
2729 }
2730 
2732  Analyzer::Query& query) const {
2733  throw std::runtime_error("UPDATE statement not supported yet.");
2734 }
2735 
2737  Analyzer::Query& query) const {
2738  throw std::runtime_error("DELETE statement not supported yet.");
2739 }
2740 
2741 namespace {
2742 
2744  const auto& col_ti = cd.columnType;
2745  if (!col_ti.is_integer() && !col_ti.is_time() &&
2746  !(col_ti.is_string() && col_ti.get_compression() == kENCODING_DICT)) {
2747  throw std::runtime_error("Cannot shard on type " + col_ti.get_type_name() +
2748  ", encoding " + col_ti.get_compression_name());
2749  }
2750 }
2751 
2752 size_t shard_column_index(const std::string& name,
2753  const std::list<ColumnDescriptor>& columns) {
2754  size_t index = 1;
2755  for (const auto& cd : columns) {
2756  if (cd.columnName == name) {
2758  return index;
2759  }
2760  ++index;
2761  if (cd.columnType.is_geometry()) {
2762  index += cd.columnType.get_physical_cols();
2763  }
2764  }
2765  // Not found, return 0
2766  return 0;
2767 }
2768 
2769 size_t sort_column_index(const std::string& name,
2770  const std::list<ColumnDescriptor>& columns) {
2771  size_t index = 1;
2772  for (const auto& cd : columns) {
2773  if (boost::to_upper_copy<std::string>(cd.columnName) == name) {
2774  return index;
2775  }
2776  ++index;
2777  if (cd.columnType.is_geometry()) {
2778  index += cd.columnType.get_physical_cols();
2779  }
2780  }
2781  // Not found, return 0
2782  return 0;
2783 }
2784 
2785 void set_string_field(rapidjson::Value& obj,
2786  const std::string& field_name,
2787  const std::string& field_value,
2788  rapidjson::Document& document) {
2789  rapidjson::Value field_name_json_str;
2790  field_name_json_str.SetString(
2791  field_name.c_str(), field_name.size(), document.GetAllocator());
2792  rapidjson::Value field_value_json_str;
2793  field_value_json_str.SetString(
2794  field_value.c_str(), field_value.size(), document.GetAllocator());
2795  obj.AddMember(field_name_json_str, field_value_json_str, document.GetAllocator());
2796 }
2797 
2799  const ShardKeyDef* shard_key_def,
2800  const std::vector<SharedDictionaryDef>& shared_dict_defs) {
2801  rapidjson::Document document;
2802  auto& allocator = document.GetAllocator();
2803  rapidjson::Value arr(rapidjson::kArrayType);
2804  if (shard_key_def) {
2805  rapidjson::Value shard_key_obj(rapidjson::kObjectType);
2806  set_string_field(shard_key_obj, "type", "SHARD KEY", document);
2807  set_string_field(shard_key_obj, "name", shard_key_def->get_column(), document);
2808  arr.PushBack(shard_key_obj, allocator);
2809  }
2810  for (const auto& shared_dict_def : shared_dict_defs) {
2811  rapidjson::Value shared_dict_obj(rapidjson::kObjectType);
2812  set_string_field(shared_dict_obj, "type", "SHARED DICTIONARY", document);
2813  set_string_field(shared_dict_obj, "name", shared_dict_def.get_column(), document);
2815  shared_dict_obj, "foreign_table", shared_dict_def.get_foreign_table(), document);
2816  set_string_field(shared_dict_obj,
2817  "foreign_column",
2818  shared_dict_def.get_foreign_column(),
2819  document);
2820  arr.PushBack(shared_dict_obj, allocator);
2821  }
2822  rapidjson::StringBuffer buffer;
2823  rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
2824  arr.Accept(writer);
2825  return buffer.GetString();
2826 }
2827 
2828 template <typename LITERAL_TYPE,
2829  typename ASSIGNMENT,
2830  typename VALIDATE = DefaultValidate<LITERAL_TYPE>>
2831 decltype(auto) get_property_value(const NameValueAssign* p,
2832  ASSIGNMENT op,
2833  VALIDATE validate = VALIDATE()) {
2834  const auto val = validate(p);
2835  return op(val);
2836 }
2837 
2839  const NameValueAssign* p,
2840  const std::list<ColumnDescriptor>& columns) {
2841  auto assignment = [&td](const auto val) { td.storageType = val; };
2842  return get_property_value<StringLiteral, decltype(assignment), CaseSensitiveValidate>(
2843  p, assignment);
2844 }
2845 
2847  const NameValueAssign* p,
2848  const std::list<ColumnDescriptor>& columns) {
2849  return get_property_value<IntLiteral>(p,
2850  [&td](const auto val) { td.maxFragRows = val; });
2851 }
2852 
2854  const NameValueAssign* p,
2855  const std::list<ColumnDescriptor>& columns) {
2856  return get_property_value<IntLiteral>(
2857  p, [&df_td](const auto val) { df_td.maxFragRows = val; });
2858 }
2859 
2861  const NameValueAssign* p,
2862  const std::list<ColumnDescriptor>& columns) {
2863  return get_property_value<IntLiteral>(p,
2864  [&td](const auto val) { td.maxChunkSize = val; });
2865 }
2866 
2868  DataframeTableDescriptor& df_td,
2869  const NameValueAssign* p,
2870  const std::list<ColumnDescriptor>& columns) {
2871  return get_property_value<IntLiteral>(
2872  p, [&df_td](const auto val) { df_td.maxChunkSize = val; });
2873 }
2874 
2876  const NameValueAssign* p,
2877  const std::list<ColumnDescriptor>& columns) {
2878  return get_property_value<StringLiteral>(p, [&df_td](const auto val) {
2879  if (val.size() != 1) {
2880  throw std::runtime_error("Length of DELIMITER must be equal to 1.");
2881  }
2882  df_td.delimiter = val;
2883  });
2884 }
2885 
2887  const NameValueAssign* p,
2888  const std::list<ColumnDescriptor>& columns) {
2889  return get_property_value<StringLiteral>(p, [&df_td](const auto val) {
2890  if (val == "FALSE") {
2891  df_td.hasHeader = false;
2892  } else if (val == "TRUE") {
2893  df_td.hasHeader = true;
2894  } else {
2895  throw std::runtime_error("Option HEADER support only 'true' or 'false' values.");
2896  }
2897  });
2898 }
2899 
2901  const NameValueAssign* p,
2902  const std::list<ColumnDescriptor>& columns) {
2903  return get_property_value<IntLiteral>(p,
2904  [&td](const auto val) { td.fragPageSize = val; });
2905 }
2907  const NameValueAssign* p,
2908  const std::list<ColumnDescriptor>& columns) {
2909  return get_property_value<IntLiteral>(p, [&td](const auto val) { td.maxRows = val; });
2910 }
2911 
2913  const NameValueAssign* p,
2914  const std::list<ColumnDescriptor>& columns) {
2915  return get_property_value<IntLiteral>(
2916  p, [&df_td](const auto val) { df_td.skipRows = val; });
2917 }
2918 
2920  const NameValueAssign* p,
2921  const std::list<ColumnDescriptor>& columns) {
2922  return get_property_value<StringLiteral>(p, [&td](const auto partitions_uc) {
2923  if (partitions_uc != "SHARDED" && partitions_uc != "REPLICATED") {
2924  throw std::runtime_error("PARTITIONS must be SHARDED or REPLICATED");
2925  }
2926  if (td.shardedColumnId != 0 && partitions_uc == "REPLICATED") {
2927  throw std::runtime_error(
2928  "A table cannot be sharded and replicated at the same time");
2929  };
2930  td.partitions = partitions_uc;
2931  });
2932 }
2934  const NameValueAssign* p,
2935  const std::list<ColumnDescriptor>& columns) {
2936  if (!td.shardedColumnId) {
2937  throw std::runtime_error("SHARD KEY must be defined.");
2938  }
2939  return get_property_value<IntLiteral>(p, [&td](const auto shard_count) {
2940  if (g_leaf_count && shard_count % g_leaf_count) {
2941  throw std::runtime_error(
2942  "SHARD_COUNT must be a multiple of the number of leaves in the cluster.");
2943  }
2944  td.nShards = g_leaf_count ? shard_count / g_leaf_count : shard_count;
2945  if (!td.shardedColumnId && !td.nShards) {
2946  throw std::runtime_error(
2947  "Must specify the number of shards through the SHARD_COUNT option");
2948  };
2949  });
2950 }
2951 
2952 decltype(auto) get_vacuum_def(TableDescriptor& td,
2953  const NameValueAssign* p,
2954  const std::list<ColumnDescriptor>& columns) {
2955  return get_property_value<StringLiteral>(p, [&td](const auto vacuum_uc) {
2956  if (vacuum_uc != "IMMEDIATE" && vacuum_uc != "DELAYED") {
2957  throw std::runtime_error("VACUUM must be IMMEDIATE or DELAYED");
2958  }
2959  td.hasDeletedCol = boost::iequals(vacuum_uc, "IMMEDIATE") ? false : true;
2960  });
2961 }
2962 
2964  const NameValueAssign* p,
2965  const std::list<ColumnDescriptor>& columns) {
2966  return get_property_value<StringLiteral>(p, [&td, &columns](const auto sort_upper) {
2967  td.sortedColumnId = sort_column_index(sort_upper, columns);
2968  if (!td.sortedColumnId) {
2969  throw std::runtime_error("Specified sort column " + sort_upper + " doesn't exist");
2970  }
2971  });
2972 }
2973 
2975  const NameValueAssign* p,
2976  const std::list<ColumnDescriptor>& columns) {
2977  auto assignment = [&td](const auto val) {
2978  td.maxRollbackEpochs =
2979  val < 0 ? -1 : val; // Anything < 0 means unlimited rollbacks. Note that 0
2980  // still means keeping a shadow copy of data/metdata
2981  // between epochs so bad writes can be rolled back
2982  };
2983  return get_property_value<IntLiteral, decltype(assignment), PositiveOrZeroValidate>(
2984  p, assignment);
2985 }
2986 
2987 static const std::map<const std::string, const TableDefFuncPtr> tableDefFuncMap = {
2988  {"fragment_size"s, get_frag_size_def},
2989  {"max_chunk_size"s, get_max_chunk_size_def},
2990  {"page_size"s, get_page_size_def},
2991  {"max_rows"s, get_max_rows_def},
2992  {"partitions"s, get_partions_def},
2993  {"shard_count"s, get_shard_count_def},
2994  {"vacuum"s, get_vacuum_def},
2995  {"sort_column"s, get_sort_column_def},
2996  {"storage_type"s, get_storage_type},
2997  {"max_rollback_epochs", get_max_rollback_epochs_def}};
2998 
3000  const std::unique_ptr<NameValueAssign>& p,
3001  const std::list<ColumnDescriptor>& columns) {
3002  const auto it = tableDefFuncMap.find(boost::to_lower_copy<std::string>(*p->get_name()));
3003  if (it == tableDefFuncMap.end()) {
3004  throw std::runtime_error(
3005  "Invalid CREATE TABLE option " + *p->get_name() +
3006  ". Should be FRAGMENT_SIZE, MAX_CHUNK_SIZE, PAGE_SIZE, MAX_ROLLBACK_EPOCHS, "
3007  "MAX_ROWS, "
3008  "PARTITIONS, SHARD_COUNT, VACUUM, SORT_COLUMN, STORAGE_TYPE.");
3009  }
3010  return it->second(td, p.get(), columns);
3011 }
3012 
3014  const std::unique_ptr<NameValueAssign>& p,
3015  const std::list<ColumnDescriptor>& columns) {
3016  const auto it = tableDefFuncMap.find(boost::to_lower_copy<std::string>(*p->get_name()));
3017  if (it == tableDefFuncMap.end()) {
3018  throw std::runtime_error(
3019  "Invalid CREATE TABLE AS option " + *p->get_name() +
3020  ". Should be FRAGMENT_SIZE, MAX_CHUNK_SIZE, PAGE_SIZE, MAX_ROLLBACK_EPOCHS, "
3021  "MAX_ROWS, "
3022  "PARTITIONS, SHARD_COUNT, VACUUM, SORT_COLUMN, STORAGE_TYPE, "
3023  "USE_SHARED_DICTIONARIES or FORCE_GEO_COMPRESSION.");
3024  }
3025  return it->second(td, p.get(), columns);
3026 }
3027 
3028 static const std::map<const std::string, const DataframeDefFuncPtr> dataframeDefFuncMap =
3029  {{"fragment_size"s, get_frag_size_dataframe_def},
3030  {"max_chunk_size"s, get_max_chunk_size_dataframe_def},
3031  {"skip_rows"s, get_skip_rows_def},
3032  {"delimiter"s, get_delimiter_def},
3033  {"header"s, get_header_def}};
3034 
3036  const std::unique_ptr<NameValueAssign>& p,
3037  const std::list<ColumnDescriptor>& columns) {
3038  const auto it =
3039  dataframeDefFuncMap.find(boost::to_lower_copy<std::string>(*p->get_name()));
3040  if (it == dataframeDefFuncMap.end()) {
3041  throw std::runtime_error(
3042  "Invalid CREATE DATAFRAME option " + *p->get_name() +
3043  ". Should be FRAGMENT_SIZE, MAX_CHUNK_SIZE, SKIP_ROWS, DELIMITER or HEADER.");
3044  }
3045  return it->second(df_td, p.get(), columns);
3046 }
3047 
3048 std::unique_ptr<ColumnDef> column_from_json(const rapidjson::Value& element) {
3049  CHECK(element.HasMember("name"));
3050  auto col_name = std::make_unique<std::string>(json_str(element["name"]));
3051  CHECK(element.HasMember("sqltype"));
3052  const auto sql_types = to_sql_type(json_str(element["sqltype"]));
3053 
3054  // decimal / numeric precision / scale
3055  int precision = -1;
3056  int scale = -1;
3057  if (element.HasMember("precision")) {
3058  precision = json_i64(element["precision"]);
3059  }
3060  if (element.HasMember("scale")) {
3061  scale = json_i64(element["scale"]);
3062  }
3063 
3064  std::optional<int64_t> array_size;
3065  if (element.HasMember("arraySize")) {
3066  // We do not yet support geo arrays
3067  array_size = json_i64(element["arraySize"]);
3068  }
3069  std::unique_ptr<SQLType> sql_type;
3070  if (element.HasMember("subtype")) {
3071  CHECK(element.HasMember("coordinateSystem"));
3072  const auto subtype_sql_types = to_sql_type(json_str(element["subtype"]));
3073  sql_type =
3074  std::make_unique<SQLType>(subtype_sql_types,
3075  static_cast<int>(sql_types),
3076  static_cast<int>(json_i64(element["coordinateSystem"])),
3077  false);
3078  } else if (precision > 0 && scale > 0) {
3079  sql_type = std::make_unique<SQLType>(sql_types,
3080  precision,
3081  scale,
3082  /*is_array=*/array_size.has_value(),
3083  array_size ? *array_size : -1);
3084  } else if (precision > 0) {
3085  sql_type = std::make_unique<SQLType>(sql_types,
3086  precision,
3087  0,
3088  /*is_array=*/array_size.has_value(),
3089  array_size ? *array_size : -1);
3090  } else {
3091  sql_type = std::make_unique<SQLType>(sql_types,
3092  /*is_array=*/array_size.has_value(),
3093  array_size ? *array_size : -1);
3094  }
3095  CHECK(sql_type);
3096 
3097  CHECK(element.HasMember("nullable"));
3098  const auto nullable = json_bool(element["nullable"]);
3099  std::unique_ptr<ColumnConstraintDef> constraint_def;
3100  StringLiteral* str_literal = nullptr;
3101  if (element.HasMember("default") && !element["default"].IsNull()) {
3102  std::string* defaultval = new std::string(json_str(element["default"]));
3103  boost::algorithm::trim_if(*defaultval, boost::is_any_of(" \"'`"));
3104  str_literal = new StringLiteral(defaultval);
3105  }
3106 
3107  constraint_def = std::make_unique<ColumnConstraintDef>(/*notnull=*/!nullable,
3108  /*unique=*/false,
3109  /*primarykey=*/false,
3110  /*defaultval=*/str_literal);
3111  std::unique_ptr<CompressDef> compress_def;
3112  if (element.HasMember("encodingType") && !element["encodingType"].IsNull()) {
3113  std::string encoding_type = json_str(element["encodingType"]);
3114  CHECK(element.HasMember("encodingSize"));
3115  auto encoding_name = std::make_unique<std::string>(json_str(element["encodingType"]));
3116  compress_def = std::make_unique<CompressDef>(encoding_name.release(),
3117  json_i64(element["encodingSize"]));
3118  }
3119  return std::make_unique<ColumnDef>(col_name.release(),
3120  sql_type.release(),
3121  compress_def ? compress_def.release() : nullptr,
3122  constraint_def ? constraint_def.release() : nullptr);
3123 }
3124 
3125 void parse_elements(const rapidjson::Value& payload,
3126  std::string element_name,
3127  std::string& table_name,
3128  std::list<std::unique_ptr<TableElement>>& table_element_list) {
3129  const auto elements = payload[element_name].GetArray();
3130  for (const auto& element : elements) {
3131  CHECK(element.IsObject());
3132  CHECK(element.HasMember("type"));
3133  if (json_str(element["type"]) == "SQL_COLUMN_DECLARATION") {
3134  auto col_def = column_from_json(element);
3135  table_element_list.emplace_back(std::move(col_def));
3136  } else if (json_str(element["type"]) == "SQL_COLUMN_CONSTRAINT") {
3137  CHECK(element.HasMember("name"));
3138  if (json_str(element["name"]) == "SHARD_KEY") {
3139  CHECK(element.HasMember("columns"));
3140  CHECK(element["columns"].IsArray());
3141  const auto& columns = element["columns"].GetArray();
3142  if (columns.Size() != size_t(1)) {
3143  throw std::runtime_error("Only one shard column is currently supported.");
3144  }
3145  auto shard_key_def = std::make_unique<ShardKeyDef>(json_str(columns[0]));
3146  table_element_list.emplace_back(std::move(shard_key_def));
3147  } else if (json_str(element["name"]) == "SHARED_DICT") {
3148  CHECK(element.HasMember("columns"));
3149  CHECK(element["columns"].IsArray());
3150  const auto& columns = element["columns"].GetArray();
3151  if (columns.Size() != size_t(1)) {
3152  throw std::runtime_error(
3153  R"(Only one column per shared dictionary entry is currently supported. Use multiple SHARED DICT statements to share dictionaries from multiple columns.)");
3154  }
3155  CHECK(element.HasMember("references") && element["references"].IsObject());
3156  const auto& references = element["references"].GetObject();
3157  std::string references_table_name;
3158  if (references.HasMember("table")) {
3159  references_table_name = json_str(references["table"]);
3160  } else {
3161  references_table_name = table_name;
3162  }
3163  CHECK(references.HasMember("column"));
3164 
3165  auto shared_dict_def = std::make_unique<SharedDictionaryDef>(
3166  json_str(columns[0]), references_table_name, json_str(references["column"]));
3167  table_element_list.emplace_back(std::move(shared_dict_def));
3168 
3169  } else {
3170  LOG(FATAL) << "Unsupported type for SQL_COLUMN_CONSTRAINT: "
3171  << json_str(element["name"]);
3172  }
3173  } else {
3174  LOG(FATAL) << "Unsupported element type for CREATE TABLE: "
3175  << element["type"].GetString();
3176  }
3177  }
3178 }
3179 } // namespace
3180 
3181 CreateTableStmt::CreateTableStmt(const rapidjson::Value& payload) {
3182  CHECK(payload.HasMember("name"));
3183  table_ = std::make_unique<std::string>(json_str(payload["name"]));
3184  CHECK(payload.HasMember("elements"));
3185  CHECK(payload["elements"].IsArray());
3186 
3187  is_temporary_ = false;
3188  if (payload.HasMember("temporary")) {
3189  is_temporary_ = json_bool(payload["temporary"]);
3190  }
3191 
3192  if_not_exists_ = false;
3193  if (payload.HasMember("ifNotExists")) {
3194  if_not_exists_ = json_bool(payload["ifNotExists"]);
3195  }
3196 
3197  parse_elements(payload, "elements", *table_, table_element_list_);
3198 
3199  parse_options(payload, storage_options_);
3200 }
3201 
3203  TableDescriptor& td,
3204  std::list<ColumnDescriptor>& columns,
3205  std::vector<SharedDictionaryDef>& shared_dict_defs) {
3206  std::unordered_set<std::string> uc_col_names;
3207  const auto& catalog = session.getCatalog();
3208  const ShardKeyDef* shard_key_def{nullptr};
3209  for (auto& e : table_element_list_) {
3210  if (dynamic_cast<SharedDictionaryDef*>(e.get())) {
3211  auto shared_dict_def = static_cast<SharedDictionaryDef*>(e.get());
3213  this, shared_dict_def, columns, shared_dict_defs, catalog);
3214  shared_dict_defs.push_back(*shared_dict_def);
3215  continue;
3216  }
3217  if (dynamic_cast<ShardKeyDef*>(e.get())) {
3218  if (shard_key_def) {
3219  throw std::runtime_error("Specified more than one shard key");
3220  }
3221  shard_key_def = static_cast<const ShardKeyDef*>(e.get());
3222  continue;
3223  }
3224  if (!dynamic_cast<ColumnDef*>(e.get())) {
3225  throw std::runtime_error("Table constraints are not supported yet.");
3226  }
3227  ColumnDef* coldef = static_cast<ColumnDef*>(e.get());
3228  ColumnDescriptor cd;
3229  cd.columnName = *coldef->get_column_name();
3231  setColumnDescriptor(cd, coldef);
3232  columns.push_back(cd);
3233  }
3234 
3235  ddl_utils::set_default_table_attributes(*table_, td, columns.size());
3236 
3237  if (shard_key_def) {
3238  td.shardedColumnId = shard_column_index(shard_key_def->get_column(), columns);
3239  if (!td.shardedColumnId) {
3240  throw std::runtime_error("Specified shard column " + shard_key_def->get_column() +
3241  " doesn't exist");
3242  }
3243  }
3244  if (is_temporary_) {
3246  } else {
3248  }
3249  if (!storage_options_.empty()) {
3250  for (auto& p : storage_options_) {
3251  get_table_definitions(td, p, columns);
3252  }
3253  }
3254  if (td.shardedColumnId && !td.nShards) {
3255  throw std::runtime_error("SHARD_COUNT needs to be specified with SHARD_KEY.");
3256  }
3257  td.keyMetainfo = serialize_key_metainfo(shard_key_def, shared_dict_defs);
3258 }
3259 
3261  bool read_only_mode) {
3262  if (read_only_mode) {
3263  throw std::runtime_error("CREATE TABLE invalid in read only mode.");
3264  }
3265  auto& catalog = session.getCatalog();
3266 
3267  const auto execute_write_lock =
3271 
3272  // check access privileges
3275  throw std::runtime_error("Table " + *table_ +
3276  " will not be created. User has no create privileges.");
3277  }
3278 
3279  if (!catalog.validateNonExistentTableOrView(*table_, if_not_exists_)) {
3280  return;
3281  }
3282 
3283  TableDescriptor td;
3284  std::list<ColumnDescriptor> columns;
3285  std::vector<SharedDictionaryDef> shared_dict_defs;
3286 
3287  executeDryRun(session, td, columns, shared_dict_defs);
3288  td.userId = session.get_currentUser().userId;
3289 
3290  catalog.createShardedTable(td, columns, shared_dict_defs);
3291  // TODO (max): It's transactionally unsafe, should be fixed: we may create object w/o
3292  // privileges
3293  SysCatalog::instance().createDBObject(
3294  session.get_currentUser(), td.tableName, TableDBObjectType, catalog);
3295 }
3296 
3297 CreateDataframeStmt::CreateDataframeStmt(const rapidjson::Value& payload) {
3298  CHECK(payload.HasMember("name"));
3299  table_ = std::make_unique<std::string>(json_str(payload["name"]));
3300 
3301  CHECK(payload.HasMember("elementList"));
3302  parse_elements(payload, "elementList", *table_, table_element_list_);
3303 
3304  CHECK(payload.HasMember("filePath"));
3305  std::string fs = json_str(payload["filePath"]);
3306  // strip leading/trailing spaces/quotes/single quotes
3307  boost::algorithm::trim_if(fs, boost::is_any_of(" \"'`"));
3308  filename_ = std::make_unique<std::string>(fs);
3309 
3310  parse_options(payload, storage_options_);
3311 }
3312 
3314  bool read_only_mode) {
3315  if (read_only_mode) {
3316  throw std::runtime_error("CREATE DATAFRAME invalid in read only mode.");
3317  }
3318  auto& catalog = session.getCatalog();
3319 
3320  const auto execute_write_lock =
3324 
3325  // check access privileges
3328  throw std::runtime_error("Table " + *table_ +
3329  " will not be created. User has no create privileges.");
3330  }
3331 
3332  if (catalog.getMetadataForTable(*table_) != nullptr) {
3333  throw std::runtime_error("Table " + *table_ + " already exists.");
3334  }
3336  std::list<ColumnDescriptor> columns;
3337  std::vector<SharedDictionaryDef> shared_dict_defs;
3338 
3339  std::unordered_set<std::string> uc_col_names;
3340  for (auto& e : table_element_list_) {
3341  if (dynamic_cast<SharedDictionaryDef*>(e.get())) {
3342  auto shared_dict_def = static_cast<SharedDictionaryDef*>(e.get());
3344  this, shared_dict_def, columns, shared_dict_defs, catalog);
3345  shared_dict_defs.push_back(*shared_dict_def);
3346  continue;
3347  }
3348  if (!dynamic_cast<ColumnDef*>(e.get())) {
3349  throw std::runtime_error("Table constraints are not supported yet.");
3350  }
3351  ColumnDef* coldef = static_cast<ColumnDef*>(e.get());
3352  ColumnDescriptor cd;
3353  cd.columnName = *coldef->get_column_name();
3354  const auto uc_col_name = boost::to_upper_copy<std::string>(cd.columnName);
3355  const auto it_ok = uc_col_names.insert(uc_col_name);
3356  if (!it_ok.second) {
3357  throw std::runtime_error("Column '" + cd.columnName + "' defined more than once");
3358  }
3359  setColumnDescriptor(cd, coldef);
3360  columns.push_back(cd);
3361  }
3362 
3363  df_td.tableName = *table_;
3364  df_td.nColumns = columns.size();
3365  df_td.isView = false;
3366  df_td.fragmenter = nullptr;
3371  df_td.maxRows = DEFAULT_MAX_ROWS;
3373  if (!storage_options_.empty()) {
3374  for (auto& p : storage_options_) {
3375  get_dataframe_definitions(df_td, p, columns);
3376  }
3377  }
3378  df_td.keyMetainfo = serialize_key_metainfo(nullptr, shared_dict_defs);
3379  df_td.userId = session.get_currentUser().userId;
3380  df_td.storageType = *filename_;
3381 
3382  catalog.createShardedTable(df_td, columns, shared_dict_defs);
3383  // TODO (max): It's transactionally unsafe, should be fixed: we may create object w/o
3384  // privileges
3385  SysCatalog::instance().createDBObject(
3386  session.get_currentUser(), df_td.tableName, TableDBObjectType, catalog);
3387 }
3388 
3389 std::shared_ptr<ResultSet> getResultSet(QueryStateProxy query_state_proxy,
3390  const std::string select_stmt,
3391  std::vector<TargetMetaInfo>& targets,
3392  bool validate_only = false,
3393  std::vector<size_t> outer_fragment_indices = {},
3394  bool allow_interrupt = false) {
3395  auto const session = query_state_proxy.getQueryState().getConstSessionInfo();
3396  auto& catalog = session->getCatalog();
3397 
3399 #ifdef HAVE_CUDA
3400  const auto device_type = session->get_executor_device_type();
3401 #else
3402  const auto device_type = ExecutorDeviceType::CPU;
3403 #endif // HAVE_CUDA
3404  auto calcite_mgr = catalog.getCalciteMgr();
3405 
3406  // TODO MAT this should actually get the global or the session parameter for
3407  // view optimization
3408  const auto calciteQueryParsingOption =
3409  calcite_mgr->getCalciteQueryParsingOption(true, false, true);
3410  const auto calciteOptimizationOption = calcite_mgr->getCalciteOptimizationOption(
3411  false,
3413  {},
3415  const auto query_ra = calcite_mgr
3416  ->process(query_state_proxy,
3417  pg_shim(select_stmt),
3418  calciteQueryParsingOption,
3419  calciteOptimizationOption)
3420  .plan_result;
3421  RelAlgExecutor ra_executor(executor.get(),
3422  catalog,
3423  query_ra,
3424  query_state_proxy.getQueryState().shared_from_this());
3426  // TODO(adb): Need a better method of dropping constants into this ExecutionOptions
3427  // struct
3428  ExecutionOptions eo = {false,
3429  false,
3430  true,
3431  false,
3432  true,
3433  false,
3434  false,
3435  validate_only,
3436  false,
3437  10000,
3438  false,
3439  false,
3440  1000,
3441  allow_interrupt,
3445  outer_fragment_indices};
3446 
3447  ExecutionResult result{std::make_shared<ResultSet>(std::vector<TargetInfo>{},
3450  nullptr,
3451  nullptr,
3452  0,
3453  0),
3454  {}};
3455  result = ra_executor.executeRelAlgQuery(co, eo, false, nullptr);
3456  targets = result.getTargetsMeta();
3457 
3458  return result.getRows();
3459 }
3460 
3462  std::string& sql_query_string) {
3463  auto const session = query_state_proxy.getQueryState().getConstSessionInfo();
3464  auto& catalog = session->getCatalog();
3465 
3467 #ifdef HAVE_CUDA
3468  const auto device_type = session->get_executor_device_type();
3469 #else
3470  const auto device_type = ExecutorDeviceType::CPU;
3471 #endif // HAVE_CUDA
3472  auto calcite_mgr = catalog.getCalciteMgr();
3473 
3474  // TODO MAT this should actually get the global or the session parameter for
3475  // view optimization
3476  const auto calciteQueryParsingOption =
3477  calcite_mgr->getCalciteQueryParsingOption(true, false, true);
3478  const auto calciteOptimizationOption = calcite_mgr->getCalciteOptimizationOption(
3479  false,
3481  {},
3483  const auto query_ra = calcite_mgr
3484  ->process(query_state_proxy,
3485  pg_shim(sql_query_string),
3486  calciteQueryParsingOption,
3487  calciteOptimizationOption)
3488  .plan_result;
3489  RelAlgExecutor ra_executor(executor.get(), catalog, query_ra);
3490  CompilationOptions co = {device_type, true, ExecutorOptLevel::Default, false};
3491  // TODO(adb): Need a better method of dropping constants into this ExecutionOptions
3492  // struct
3493  ExecutionOptions eo = {false,
3494  false,
3495  true,
3496  false,
3497  true,
3498  false,
3499  false,
3500  false,
3501  false,
3502  10000,
3503  false,
3504  false,
3505  0.9,
3506  false};
3507  return ra_executor.getOuterFragmentCount(co, eo);
3508 }
3509 
3511  std::string& sql_query_string,
3512  std::vector<size_t> outer_frag_indices,
3513  bool validate_only,
3514  bool allow_interrupt) {
3515  // TODO(PS): Should we be using the shimmed query in getResultSet?
3516  std::string pg_shimmed_select_query = pg_shim(sql_query_string);
3517 
3518  std::vector<TargetMetaInfo> target_metainfos;
3520  auto const session = query_state_proxy.getQueryState().getConstSessionInfo();
3521  auto query_session = session ? session->get_session_id() : "";
3522  auto query_submitted_time = query_state_proxy.getQueryState().getQuerySubmittedTime();
3523  if (allow_interrupt && !validate_only && !query_session.empty()) {
3524  executor->enrollQuerySession(query_session,
3525  sql_query_string,
3526  query_submitted_time,
3528  QuerySessionStatus::QueryStatus::PENDING_EXECUTOR);
3529  }
3530  auto result_rows = getResultSet(query_state_proxy,
3531  sql_query_string,
3532  target_metainfos,
3533  validate_only,
3534  outer_frag_indices,
3535  allow_interrupt);
3536  AggregatedResult res = {result_rows, target_metainfos};
3537  return res;
3538 }
3539 
3540 std::vector<AggregatedResult> LocalQueryConnector::query(
3541  QueryStateProxy query_state_proxy,
3542  std::string& sql_query_string,
3543  std::vector<size_t> outer_frag_indices,
3544  bool allow_interrupt) {
3545  auto res = query(
3546  query_state_proxy, sql_query_string, outer_frag_indices, false, allow_interrupt);
3547  return {res};
3548 }
3549 
3550 std::list<ColumnDescriptor> LocalQueryConnector::getColumnDescriptors(
3552  bool for_create) {
3553  std::list<ColumnDescriptor> column_descriptors;
3554  std::list<ColumnDescriptor> column_descriptors_for_create;
3555 
3556  int rowid_suffix = 0;
3557  for (const auto& target_metainfo : result.targets_meta) {
3558  ColumnDescriptor cd;
3559  cd.columnName = target_metainfo.get_resname();
3560  if (cd.columnName == "rowid") {
3561  cd.columnName += std::to_string(rowid_suffix++);
3562  }
3563  cd.columnType = target_metainfo.get_physical_type_info();
3564 
3565  ColumnDescriptor cd_for_create = cd;
3566 
3568  // we need to reset the comp param (as this points to the actual dictionary)
3569  if (cd.columnType.is_array()) {
3570  // for dict encoded arrays, it is always 4 bytes
3571  cd_for_create.columnType.set_comp_param(32);
3572  } else {
3573  cd_for_create.columnType.set_comp_param(cd.columnType.get_size() * 8);
3574  }
3575  }
3576 
3577  if (cd.columnType.is_date() && !cd.columnType.is_date_in_days()) {
3578  // default to kENCODING_DATE_IN_DAYS encoding
3580  cd_for_create.columnType.set_comp_param(0);
3581  }
3582 
3583  column_descriptors_for_create.push_back(cd_for_create);
3584  column_descriptors.push_back(cd);
3585  }
3586 
3587  if (for_create) {
3588  return column_descriptors_for_create;
3589  }
3590 
3591  return column_descriptors;
3592 }
3593 
3595  const rapidjson::Value& payload) {
3596  CHECK(payload.HasMember("name"));
3597  table_name_ = json_str(payload["name"]);
3598 
3599  CHECK(payload.HasMember("query"));
3600  select_query_ = json_str(payload["query"]);
3601 
3602  boost::replace_all(select_query_, "\n", " ");
3603  select_query_ = "(" + select_query_ + ")";
3604 
3605  if (payload.HasMember("columns")) {
3606  CHECK(payload["columns"].IsArray());
3607  for (auto& column : payload["columns"].GetArray()) {
3608  std::string s = json_str(column);
3609  column_list_.emplace_back(std::unique_ptr<std::string>(new std::string(s)));
3610  }
3611  }
3612 }
3613 
3615  const TableDescriptor* td,
3616  bool validate_table,
3617  bool for_CTAS) {
3618  auto const session = query_state_proxy.getQueryState().getConstSessionInfo();
3619  auto& catalog = session->getCatalog();
3621  bool populate_table = false;
3622 
3623  if (leafs_connector_) {
3624  populate_table = true;
3625  } else {
3626  leafs_connector_ = std::make_unique<LocalQueryConnector>();
3627  if (!g_cluster) {
3628  populate_table = true;
3629  }
3630  }
3631 
3632  auto get_target_column_descriptors = [this, &catalog](const TableDescriptor* td) {
3633  std::vector<const ColumnDescriptor*> target_column_descriptors;
3634  if (column_list_.empty()) {
3635  auto list = catalog.getAllColumnMetadataForTable(td->tableId, false, false, false);
3636  target_column_descriptors = {std::begin(list), std::end(list)};
3637  } else {
3638  for (auto& c : column_list_) {
3639  const ColumnDescriptor* cd = catalog.getMetadataForColumn(td->tableId, *c);
3640  if (cd == nullptr) {
3641  throw std::runtime_error("Column " + *c + " does not exist.");
3642  }
3643  target_column_descriptors.push_back(cd);
3644  }
3645  }
3646 
3647  return target_column_descriptors;
3648  };
3649 
3650  bool is_temporary = table_is_temporary(td);
3651 
3652  if (validate_table) {
3653  // check access privileges
3654  if (!td) {
3655  throw std::runtime_error("Table " + table_name_ + " does not exist.");
3656  }
3657  if (td->isView) {
3658  throw std::runtime_error("Insert to views is not supported yet.");
3659  }
3660 
3661  if (!session->checkDBAccessPrivileges(DBObjectType::TableDBObjectType,
3663  table_name_)) {
3664  throw std::runtime_error("User has no insert privileges on " + table_name_ + ".");
3665  }
3666 
3667  // only validate the select query so we get the target types
3668  // correctly, but do not populate the result set
3669  LocalQueryConnector local_connector;
3670  auto result = local_connector.query(query_state_proxy, select_query_, {}, true, true);
3671  auto source_column_descriptors = local_connector.getColumnDescriptors(result, false);
3672 
3673  std::vector<const ColumnDescriptor*> target_column_descriptors =
3674  get_target_column_descriptors(td);
3675 
3676  if (source_column_descriptors.size() != target_column_descriptors.size()) {
3677  throw std::runtime_error("The number of source and target columns does not match.");
3678  }
3679 
3680  for (int i = 0; i < source_column_descriptors.size(); i++) {
3681  const ColumnDescriptor* source_cd =
3682  &(*std::next(source_column_descriptors.begin(), i));
3683  const ColumnDescriptor* target_cd = target_column_descriptors.at(i);
3684 
3685  if (source_cd->columnType.get_type() != target_cd->columnType.get_type()) {
3686  auto type_cannot_be_cast = [](const auto& col_type) {
3687  return (col_type.is_time() || col_type.is_geometry() || col_type.is_array() ||
3688  col_type.is_boolean());
3689  };
3690 
3691  if (type_cannot_be_cast(source_cd->columnType) ||
3692  type_cannot_be_cast(target_cd->columnType)) {
3693  throw std::runtime_error("Source '" + source_cd->columnName + " " +
3694  source_cd->columnType.get_type_name() +
3695  "' and target '" + target_cd->columnName + " " +
3696  target_cd->columnType.get_type_name() +
3697  "' column types do not match.");
3698  }
3699  }
3700  if (source_cd->columnType.is_array()) {
3701  if (source_cd->columnType.get_subtype() != target_cd->columnType.get_subtype()) {
3702  throw std::runtime_error("Source '" + source_cd->columnName + " " +
3703  source_cd->columnType.get_type_name() +
3704  "' and target '" + target_cd->columnName + " " +
3705  target_cd->columnType.get_type_name() +
3706  "' array column element types do not match.");
3707  }
3708  }
3709 
3710  if (target_cd->columnType.is_string() && !source_cd->columnType.is_string()) {
3711  throw std::runtime_error("Source '" + source_cd->columnName + " " +
3712  source_cd->columnType.get_type_name() +
3713  "' and target '" + target_cd->columnName + " " +
3714  target_cd->columnType.get_type_name() +
3715  "' column types do not match.");
3716  }
3717 
3718  if (source_cd->columnType.is_decimal() ||
3719  source_cd->columnType.get_elem_type().is_decimal()) {
3720  SQLTypeInfo sourceType = source_cd->columnType;
3721  SQLTypeInfo targetType = target_cd->columnType;
3722 
3723  if (source_cd->columnType.is_array()) {
3724  sourceType = source_cd->columnType.get_elem_type();
3725  targetType = target_cd->columnType.get_elem_type();
3726  }
3727 
3728  if (sourceType.get_scale() != targetType.get_scale()) {
3729  throw std::runtime_error("Source '" + source_cd->columnName + " " +
3730  source_cd->columnType.get_type_name() +
3731  "' and target '" + target_cd->columnName + " " +
3732  target_cd->columnType.get_type_name() +
3733  "' decimal columns scales do not match.");
3734  }
3735  }
3736 
3737  if (source_cd->columnType.is_string()) {
3738  if (!target_cd->columnType.is_string()) {
3739  throw std::runtime_error("Source '" + source_cd->columnName + " " +
3740  source_cd->columnType.get_type_name() +
3741  "' and target '" + target_cd->columnName + " " +
3742  target_cd->columnType.get_type_name() +
3743  "' column types do not match.");
3744  }
3745  if (source_cd->columnType.get_compression() !=
3746  target_cd->columnType.get_compression()) {
3747  throw std::runtime_error("Source '" + source_cd->columnName + " " +
3748  source_cd->columnType.get_type_name() +
3749  "' and target '" + target_cd->columnName + " " +
3750  target_cd->columnType.get_type_name() +
3751  "' columns string encodings do not match.");
3752  }
3753  }
3754 
3755  if (source_cd->columnType.is_timestamp() && target_cd->columnType.is_timestamp()) {
3756  if (source_cd->columnType.get_dimension() !=
3757  target_cd->columnType.get_dimension()) {
3758  throw std::runtime_error("Source '" + source_cd->columnName + " " +
3759  source_cd->columnType.get_type_name() +
3760  "' and target '" + target_cd->columnName + " " +
3761  target_cd->columnType.get_type_name() +
3762  "' timestamp column precisions do not match.");
3763  }
3764  }
3765 
3766  if (!source_cd->columnType.is_string() && !source_cd->columnType.is_geometry() &&
3767  !source_cd->columnType.is_integer() && !source_cd->columnType.is_decimal() &&
3768  !source_cd->columnType.is_date() && !source_cd->columnType.is_time() &&
3769  !source_cd->columnType.is_timestamp() &&
3770  source_cd->columnType.get_size() > target_cd->columnType.get_size()) {
3771  throw std::runtime_error("Source '" + source_cd->columnName + " " +
3772  source_cd->columnType.get_type_name() +
3773  "' and target '" + target_cd->columnName + " " +
3774  target_cd->columnType.get_type_name() +
3775  "' column encoding sizes do not match.");
3776  }
3777  }
3778  }
3779 
3780  if (!populate_table) {
3781  return;
3782  }
3783 
3784  int64_t total_row_count = 0;
3785  int64_t total_source_query_time_ms = 0;
3786  int64_t total_target_value_translate_time_ms = 0;
3787  int64_t total_data_load_time_ms = 0;
3788 
3790  auto target_column_descriptors = get_target_column_descriptors(td);
3791  auto outer_frag_count =
3792  leafs_connector_->getOuterFragmentCount(query_state_proxy, select_query_);
3793 
3794  size_t outer_frag_end = outer_frag_count == 0 ? 1 : outer_frag_count;
3795  auto query_session = session ? session->get_session_id() : "";
3797  std::string work_type_str = for_CTAS ? "CTAS" : "ITAS";
3798  try {
3799  for (size_t outer_frag_idx = 0; outer_frag_idx < outer_frag_end; outer_frag_idx++) {
3800  std::vector<size_t> allowed_outer_fragment_indices;
3801 
3802  if (outer_frag_count) {
3803  allowed_outer_fragment_indices.push_back(outer_frag_idx);
3804  }
3805 
3806  const auto query_clock_begin = timer_start();
3807  std::vector<AggregatedResult> query_results =
3808  leafs_connector_->query(query_state_proxy,
3809  select_query_,
3810  allowed_outer_fragment_indices,
3812  total_source_query_time_ms += timer_stop(query_clock_begin);
3813 
3814  auto start_time = query_state_proxy.getQueryState().getQuerySubmittedTime();
3815  auto query_str = "INSERT_DATA for " + work_type_str;
3817  // In the clean-up phase of the query execution for collecting aggregated result
3818  // of SELECT query, we remove its query session info, so we need to enroll the
3819  // session info again
3820  executor->enrollQuerySession(query_session,
3821  query_str,
3822  start_time,
3824  QuerySessionStatus::QueryStatus::RUNNING_IMPORTER);
3825  }
3826 
3827  ScopeGuard clearInterruptStatus = [executor, &query_session, &start_time] {
3828  // this data population is non-kernel operation, so we manually cleanup
3829  // the query session info in the cleanup phase
3831  executor->clearQuerySessionStatus(query_session, start_time);
3832  }
3833  };
3834 
3835  for (auto& res : query_results) {
3836  if (UNLIKELY(check_session_interrupted(query_session, executor))) {
3837  throw std::runtime_error(
3838  "Query execution has been interrupted while performing " + work_type_str);
3839  }
3840  auto& result_rows = res.rs;
3841  result_rows->setGeoReturnType(ResultSet::GeoReturnType::GeoTargetValue);
3842  const auto num_rows = result_rows->rowCount();
3843 
3844  if (0 == num_rows) {
3845  continue;
3846  }
3847 
3848  total_row_count += num_rows;
3849 
3850  size_t leaf_count = leafs_connector_->leafCount();
3851 
3852  // ensure that at least 1 row is processed per block up to a maximum of 65536 rows
3853  const size_t rows_per_block =
3854  std::max(std::min(num_rows / leaf_count, size_t(64 * 1024)), size_t(1));
3855 
3856  std::vector<std::unique_ptr<TargetValueConverter>> value_converters;
3857 
3859 
3860  const int num_worker_threads = std::thread::hardware_concurrency();
3861 
3862  std::vector<size_t> thread_start_idx(num_worker_threads),
3863  thread_end_idx(num_worker_threads);
3864  bool can_go_parallel = !result_rows->isTruncated() && rows_per_block > 20000;
3865 
3866  std::atomic<size_t> crt_row_idx{0};
3867 
3868  auto do_work = [&result_rows, &value_converters, &crt_row_idx](
3869  const size_t idx,
3870  const size_t block_end,
3871  const size_t num_cols,
3872  const size_t thread_id,
3873  bool& stop_convert) {
3874  const auto result_row = result_rows->getRowAtNoTranslations(idx);
3875  if (!result_row.empty()) {
3876  size_t target_row = crt_row_idx.fetch_add(1);
3877  if (target_row >= block_end) {
3878  stop_convert = true;
3879  return;
3880  }
3881  for (unsigned int col = 0; col < num_cols; col++) {
3882  const auto& mapd_variant = result_row[col];
3883  value_converters[col]->convertToColumnarFormat(target_row, &mapd_variant);
3884  }
3885  }
3886  };
3887 
3888  auto convert_function = [&thread_start_idx,
3889  &thread_end_idx,
3890  &value_converters,
3891  &executor,
3892  &query_session,
3893  &work_type_str,
3894  &do_work](const int thread_id, const size_t block_end) {
3895  const int num_cols = value_converters.size();
3896  const size_t start = thread_start_idx[thread_id];
3897  const size_t end = thread_end_idx[thread_id];
3898  size_t idx = 0;
3899  bool stop_convert = false;
3901  size_t local_idx = 0;
3902  for (idx = start; idx < end; ++idx, ++local_idx) {
3903  if (UNLIKELY((local_idx & 0xFFFF) == 0 &&
3904  check_session_interrupted(query_session, executor))) {
3905  throw std::runtime_error(
3906  "Query execution has been interrupted while performing " +
3907  work_type_str);
3908  }
3909  do_work(idx, block_end, num_cols, thread_id, stop_convert);
3910  if (stop_convert) {
3911  break;
3912  }
3913  }
3914  } else {
3915  for (idx = start; idx < end; ++idx) {
3916  do_work(idx, block_end, num_cols, thread_id, stop_convert);
3917  if (stop_convert) {
3918  break;
3919  }
3920  }
3921  }
3922  thread_start_idx[thread_id] = idx;
3923  };
3924 
3925  auto single_threaded_value_converter =
3926  [&crt_row_idx, &value_converters, &result_rows](const size_t idx,
3927  const size_t block_end,
3928  const size_t num_cols,
3929  bool& stop_convert) {
3930  size_t target_row = crt_row_idx.fetch_add(1);
3931  if (target_row >= block_end) {
3932  stop_convert = true;
3933  return;
3934  }
3935  const auto result_row = result_rows->getNextRow(false, false);
3936  CHECK(!result_row.empty());
3937  for (unsigned int col = 0; col < num_cols; col++) {
3938  const auto& mapd_variant = result_row[col];
3939  value_converters[col]->convertToColumnarFormat(target_row, &mapd_variant);
3940  }
3941  };
3942 
3943  auto single_threaded_convert_function = [&value_converters,
3944  &thread_start_idx,
3945  &thread_end_idx,
3946  &executor,
3947  &query_session,
3948  &work_type_str,
3949  &single_threaded_value_converter](
3950  const int thread_id,
3951  const size_t block_end) {
3952  const int num_cols = value_converters.size();
3953  const size_t start = thread_start_idx[thread_id];
3954  const size_t end = thread_end_idx[thread_id];
3955  size_t idx = 0;
3956  bool stop_convert = false;
3958  size_t local_idx = 0;
3959  for (idx = start; idx < end; ++idx, ++local_idx) {
3960  if (UNLIKELY((local_idx & 0xFFFF) == 0 &&
3961  check_session_interrupted(query_session, executor))) {
3962  throw std::runtime_error(
3963  "Query execution has been interrupted while performing " +
3964  work_type_str);
3965  }
3966  single_threaded_value_converter(idx, block_end, num_cols, stop_convert);
3967  if (stop_convert) {
3968  break;
3969  }
3970  }
3971  } else {
3972  for (idx = start; idx < end; ++idx) {
3973  single_threaded_value_converter(idx, end, num_cols, stop_convert);
3974  if (stop_convert) {
3975  break;
3976  }
3977  }
3978  }
3979  thread_start_idx[thread_id] = idx;
3980  };
3981 
3982  if (can_go_parallel) {
3983  const size_t entry_count = result_rows->entryCount();
3984  for (size_t
3985  i = 0,
3986  start_entry = 0,
3987  stride = (entry_count + num_worker_threads - 1) / num_worker_threads;
3988  i < num_worker_threads && start_entry < entry_count;
3989  ++i, start_entry += stride) {
3990  const auto end_entry = std::min(start_entry + stride, entry_count);
3991  thread_start_idx[i] = start_entry;
3992  thread_end_idx[i] = end_entry;
3993  }
3994  } else {
3995  thread_start_idx[0] = 0;
3996  thread_end_idx[0] = result_rows->entryCount();
3997  }
3998 
3999  RenderGroupAnalyzerMap render_group_analyzer_map;
4000 
4001  for (size_t block_start = 0; block_start < num_rows;
4002  block_start += rows_per_block) {
4003  const auto num_rows_this_itr = block_start + rows_per_block < num_rows
4004  ? rows_per_block
4005  : num_rows - block_start;
4006  crt_row_idx = 0; // reset block tracker
4007  value_converters.clear();
4008  int colNum = 0;
4009  for (const auto targetDescriptor : target_column_descriptors) {
4010  auto sourceDataMetaInfo = res.targets_meta[colNum++];
4012  num_rows_this_itr,
4013  catalog,
4014  sourceDataMetaInfo,
4015  targetDescriptor,
4016  targetDescriptor->columnType,
4017  !targetDescriptor->columnType.get_notnull(),
4018  result_rows->getRowSetMemOwner()->getLiteralStringDictProxy(),
4020  sourceDataMetaInfo.get_type_info().is_dict_encoded_string()
4021  ? executor->getStringDictionaryProxy(
4022  sourceDataMetaInfo.get_type_info().get_comp_param(),
4023  result_rows->getRowSetMemOwner(),
4024  true)
4025  : nullptr,
4026  IS_GEO_POLY(targetDescriptor->columnType.get_type()) &&
4028  ? &render_group_analyzer_map
4029  : nullptr};
4030  auto converter = factory.create(param);
4031  value_converters.push_back(std::move(converter));
4032  }
4033 
4034  const auto translate_clock_begin = timer_start();
4035  if (can_go_parallel) {
4036  std::vector<std::future<void>> worker_threads;
4037  for (int i = 0; i < num_worker_threads; ++i) {
4038  worker_threads.push_back(
4039  std::async(std::launch::async, convert_function, i, num_rows_this_itr));
4040  }
4041 
4042  for (auto& child : worker_threads) {
4043  child.wait();
4044  }
4045  for (auto& child : worker_threads) {
4046  child.get();
4047  }
4048 
4049  } else {
4050  single_threaded_convert_function(0, num_rows_this_itr);
4051  }
4052 
4053  // finalize the insert data
4054  auto finalizer_func =
4055  [](std::unique_ptr<TargetValueConverter>::pointer targetValueConverter) {
4056  targetValueConverter->finalizeDataBlocksForInsertData();
4057  };
4058 
4059  std::vector<std::future<void>> worker_threads;
4060  for (auto& converterPtr : value_converters) {
4061  worker_threads.push_back(
4062  std::async(std::launch::async, finalizer_func, converterPtr.get()));
4063  }
4064 
4065  for (auto& child : worker_threads) {
4066  child.wait();
4067  }
4068  for (auto& child : worker_threads) {
4069  child.get();
4070  }
4071 
4073  insert_data.databaseId = catalog.getCurrentDB().dbId;
4074  CHECK(td);
4075  insert_data.tableId = td->tableId;
4076  insert_data.numRows = num_rows_this_itr;
4077 
4078  for (int col_idx = 0; col_idx < target_column_descriptors.size(); col_idx++) {
4080  check_session_interrupted(query_session, executor))) {
4081  throw std::runtime_error(
4082  "Query execution has been interrupted while performing " +
4083  work_type_str);
4084  }
4085  value_converters[col_idx]->addDataBlocksToInsertData(insert_data);
4086  }
4087  total_target_value_translate_time_ms += timer_stop(translate_clock_begin);
4088 
4089  const auto data_load_clock_begin = timer_start();
4090  auto data_memory_holder =
4091  import_export::fill_missing_columns(&catalog, insert_data);
4092  insertDataLoader.insertData(*session, insert_data);
4093  total_data_load_time_ms += timer_stop(data_load_clock_begin);
4094  }
4095  }
4096  }
4097  } catch (...) {
4098  try {
4099  leafs_connector_->rollback(*session, td->tableId);
4100  } catch (std::exception& e) {
4101  LOG(ERROR) << "An error occurred during ITAS rollback attempt. Table id: "
4102  << td->tableId << ", Error: " << e.what();
4103  }
4104  throw;
4105  }
4106 
4107  int64_t total_time_ms = total_source_query_time_ms +
4108  total_target_value_translate_time_ms + total_data_load_time_ms;
4109 
4110  VLOG(1) << "CTAS/ITAS " << total_row_count << " rows loaded in " << total_time_ms
4111  << "ms (outer_frag_count=" << outer_frag_count
4112  << ", query_time=" << total_source_query_time_ms
4113  << "ms, translation_time=" << total_target_value_translate_time_ms
4114  << "ms, data_load_time=" << total_data_load_time_ms
4115  << "ms)\nquery: " << select_query_;
4116 
4117  if (!is_temporary) {
4118  leafs_connector_->checkpoint(*session, td->tableId);
4119  }
4120 }
4121 
4122 namespace {
4124  const std::string& table_name) {
4125  auto table_id = catalog.getTableId(table_name);
4126  if (!table_id.has_value()) {
4127  throw std::runtime_error{"Table \"" + table_name + "\" does not exist."};
4128  }
4129  return table_id.value();
4130 }
4131 
4133  Catalog_Namespace::Catalog& catalog,
4134  const std::string& query_str,
4135  const QueryStateProxy& query_state_proxy,
4136  const std::optional<std::string>& insert_table_name = {}) {
4137  auto calcite_mgr = catalog.getCalciteMgr();
4138  const auto calciteQueryParsingOption =
4139  calcite_mgr->getCalciteQueryParsingOption(true, false, true);
4140  const auto calciteOptimizationOption = calcite_mgr->getCalciteOptimizationOption(
4141  false, g_enable_watchdog, {}, SysCatalog::instance().isAggregator());
4142  const auto result = calcite_mgr->process(query_state_proxy,
4143  pg_shim(query_str),
4144  calciteQueryParsingOption,
4145  calciteOptimizationOption);
4146  // force sort into tableid order in case of name change to guarantee fixed order of
4147  // mutex access
4148  auto comparator = [&catalog](const std::string& table_1, const std::string& table_2) {
4149  return get_table_id(catalog, table_1) < get_table_id(catalog, table_2);
4150  };
4151  std::set<std::string, decltype(comparator)> tables(comparator);
4152  for (auto& tab : result.resolved_accessed_objects.tables_selected_from) {
4153  tables.emplace(tab[0]);
4154  }
4155  if (insert_table_name.has_value()) {
4156  tables.emplace(insert_table_name.value());
4157  }
4159  for (const auto& table : tables) {
4160  locks.emplace_back(
4163  catalog, table)));
4164  if (insert_table_name.has_value() && table == insert_table_name.value()) {
4165  locks.emplace_back(
4168  catalog.getDatabaseId(), (*locks.back())())));
4169  } else {
4170  locks.emplace_back(
4173  catalog.getDatabaseId(), (*locks.back())())));
4174  }
4175  }
4176  return locks;
4177 }
4178 } // namespace
4179 
4181  bool read_only_mode) {
4182  if (read_only_mode) {
4183  throw std::runtime_error("INSERT INTO TABLE invalid in read only mode.");
4184  }
4185  auto session_copy = session;
4186  auto session_ptr = std::shared_ptr<Catalog_Namespace::SessionInfo>(
4187  &session_copy, boost::null_deleter());
4188  auto query_state = query_state::QueryState::create(session_ptr, select_query_);
4189  auto stdlog = STDLOG(query_state);
4190  auto& catalog = session_ptr->getCatalog();
4191 
4192  const auto execute_read_lock =
4196 
4197  if (catalog.getMetadataForTable(table_name_) == nullptr) {
4198  throw std::runtime_error("ITAS failed: table " + table_name_ + " does not exist.");
4199  }
4200 
4201  auto locks = acquire_query_table_locks(
4202  catalog, select_query_, query_state->createQueryStateProxy(), table_name_);
4203  const TableDescriptor* td = catalog.getMetadataForTable(table_name_);
4204 
4205  Executor::clearExternalCaches(true, td, catalog.getCurrentDB().dbId);
4206 
4207  try {
4208  populateData(query_state->createQueryStateProxy(), td, true, false);
4209  } catch (...) {
4210  throw;
4211  }
4212 }
4213 
4215  : InsertIntoTableAsSelectStmt(payload) {
4216  if (payload.HasMember("temporary")) {
4217  is_temporary_ = json_bool(payload["temporary"]);
4218  } else {
4219  is_temporary_ = false;
4220  }
4221 
4222  if (payload.HasMember("ifNotExists")) {
4223  if_not_exists_ = json_bool(payload["ifNotExists"]);
4224  } else {
4225  if_not_exists_ = false;
4226  }
4227 
4228  parse_options(payload, storage_options_);
4229 }
4230 
4232  bool read_only_mode) {
4233  if (read_only_mode) {
4234  throw std::runtime_error("CREATE TABLE invalid in read only mode.");
4235  }
4236  auto session_copy = session;
4237  auto session_ptr = std::shared_ptr<Catalog_Namespace::SessionInfo>(
4238  &session_copy, boost::null_deleter());
4239  auto query_state = query_state::QueryState::create(session_ptr, select_query_);
4240  auto stdlog = STDLOG(query_state);
4241  LocalQueryConnector local_connector;
4242  auto& catalog = session.getCatalog();
4243  bool create_table = nullptr == leafs_connector_;
4244 
4245  std::set<std::string> select_tables;
4246  if (create_table) {
4247  const auto execute_write_lock =
4251 
4252  // check access privileges
4255  throw std::runtime_error("CTAS failed. Table " + table_name_ +
4256  " will not be created. User has no create privileges.");
4257  }
4258 
4259  if (catalog.getMetadataForTable(table_name_) != nullptr) {
4260  if (if_not_exists_) {
4261  return;
4262  }
4263  throw std::runtime_error("Table " + table_name_ +
4264  " already exists and no data was loaded.");
4265  }
4266 
4267  // only validate the select query so we get the target types
4268  // correctly, but do not populate the result set
4269  // we currently have exclusive access to the system so this is safe
4270  auto validate_result = local_connector.query(
4271  query_state->createQueryStateProxy(), select_query_, {}, true, false);
4272 
4273  auto column_descriptors_for_create =
4274  local_connector.getColumnDescriptors(validate_result, true);
4275 
4276  // some validation as the QE might return some out of range column types
4277  for (auto& cd : column_descriptors_for_create) {
4278  if (cd.columnType.is_decimal() &&
4279  cd.columnType.get_precision() > sql_constants::kMaxNumericPrecision) {
4280  throw std::runtime_error(cd.columnName + ": Precision too high, max " +
4282  ".");
4283  }
4284  }
4285 
4286  TableDescriptor td;
4287  td.tableName = table_name_;
4288  td.userId = session.get_currentUser().userId;
4289  td.nColumns = column_descriptors_for_create.size();
4290  td.isView = false;
4291  td.fragmenter = nullptr;
4298  if (is_temporary_) {
4300  } else {
4302  }
4303 
4304  bool use_shared_dictionaries = true;
4305  bool force_geo_compression = true;
4306 
4307  if (!storage_options_.empty()) {
4308  for (auto& p : storage_options_) {
4309  if (boost::to_lower_copy<std::string>(*p->get_name()) ==
4310  "use_shared_dictionaries") {
4311  const StringLiteral* literal =
4312  dynamic_cast<const StringLiteral*>(p->get_value());
4313  if (nullptr == literal) {
4314  throw std::runtime_error(
4315  "USE_SHARED_DICTIONARIES must be a string parameter");
4316  }
4317  std::string val = boost::to_lower_copy<std::string>(*literal->get_stringval());
4318  use_shared_dictionaries = val == "true" || val == "1" || val == "t";
4319  } else if (boost::to_lower_copy<std::string>(*p->get_name()) ==
4320  "force_geo_compression") {
4321  const StringLiteral* literal =
4322  dynamic_cast<const StringLiteral*>(p->get_value());
4323  if (nullptr == literal) {
4324  throw std::runtime_error("FORCE_GEO_COMPRESSION must be a string parameter");
4325  }
4326  std::string val = boost::to_lower_copy<std::string>(*literal->get_stringval());
4327  force_geo_compression = val == "true" || val == "1" || val == "t";
4328  } else {
4329  get_table_definitions_for_ctas(td, p, column_descriptors_for_create);
4330  }
4331  }
4332  }
4333 
4334  std::vector<SharedDictionaryDef> sharedDictionaryRefs;
4335 
4336  if (use_shared_dictionaries) {
4337  const auto source_column_descriptors =
4338  local_connector.getColumnDescriptors(validate_result, false);
4339  const auto mapping = catalog.getDictionaryToColumnMapping();
4340 
4341  for (auto& source_cd : source_column_descriptors) {
4342  const auto& ti = source_cd.columnType;
4343  if (ti.is_string()) {
4344  if (ti.get_compression() == kENCODING_DICT) {
4345  int dict_id = ti.get_comp_param();
4346  auto it = mapping.find(dict_id);
4347  if (mapping.end() != it) {
4348  const auto targetColumn = it->second;
4349  auto targetTable =
4350  catalog.getMetadataForTable(targetColumn->tableId, false);
4351  CHECK(targetTable);
4352  LOG(INFO) << "CTAS: sharing text dictionary on column "
4353  << source_cd.columnName << " with " << targetTable->tableName
4354  << "." << targetColumn->columnName;
4355  sharedDictionaryRefs.emplace_back(
4356  source_cd.columnName, targetTable->tableName, targetColumn->columnName);
4357  }
4358  }
4359  }
4360  }
4361  }
4362 
4363  if (force_geo_compression) {
4364  for (auto& cd_for_create : column_descriptors_for_create) {
4365  auto& ti = cd_for_create.columnType;
4366  if (ti.is_geometry() && ti.get_output_srid() == 4326) {
4367  // turn on GEOINT32 compression
4368  ti.set_compression(kENCODING_GEOINT);
4369  ti.set_comp_param(32);
4370  }
4371  }
4372  }
4373 
4374  // currently no means of defining sharding in CTAS
4375  td.keyMetainfo = serialize_key_metainfo(nullptr, sharedDictionaryRefs);
4376 
4377  catalog.createTable(td, column_descriptors_for_create, sharedDictionaryRefs, true);
4378  // TODO (max): It's transactionally unsafe, should be fixed: we may create object
4379  // w/o privileges
4380  SysCatalog::instance().createDBObject(
4381  session.get_currentUser(), td.tableName, TableDBObjectType, catalog);
4382  }
4383 
4384  // note there is a time where we do not have any executor outer lock here. someone could
4385  // come along and mess with the data or other tables.
4386  const auto execute_read_lock =
4390 
4391  auto locks = acquire_query_table_locks(
4392  catalog, select_query_, query_state->createQueryStateProxy(), table_name_);
4393  const TableDescriptor* td = catalog.getMetadataForTable(table_name_);
4394  try {
4395  populateData(query_state->createQueryStateProxy(), td, false, true);
4396  } catch (...) {
4397  if (!g_cluster) {
4398  const TableDescriptor* created_td = catalog.getMetadataForTable(table_name_);
4399  if (created_td) {
4400  catalog.dropTable(created_td);
4401  }
4402  }
4403  throw;
4404  }
4405 }
4406 
4407 DropTableStmt::DropTableStmt(const rapidjson::Value& payload) {
4408  CHECK(payload.HasMember("tableName"));
4409  table_ = std::make_unique<std::string>(json_str(payload["tableName"]));
4410 
4411  if_exists_ = false;
4412  if (payload.HasMember("ifExists")) {
4413  if_exists_ = json_bool(payload["ifExists"]);
4414  }
4415 }
4416 
4418  bool read_only_mode) {
4419  if (read_only_mode) {
4420  throw std::runtime_error("DROP TABLE invalid in read only mode.");
4421  }
4422  const auto execute_read_lock =
4426  auto& catalog = session.getCatalog();
4427  const TableDescriptor* td{nullptr};
4428  std::unique_ptr<lockmgr::TableSchemaLockContainer<lockmgr::WriteLock>> td_with_lock;
4429  try {
4430  td_with_lock =
4431  std::make_unique<lockmgr::TableSchemaLockContainer<lockmgr::WriteLock>>(
4433  catalog, *table_, false));
4434  td = (*td_with_lock)();
4435  } catch (const std::runtime_error& e) {
4436  if (if_exists_) {
4437  return;
4438  } else {
4439  throw e;
4440  }
4441  }
4442 
4443  CHECK(td);
4444  CHECK(td_with_lock);
4445 
4446  // check access privileges
4447  if (!session.checkDBAccessPrivileges(
4449  throw std::runtime_error("Table " + *table_ +
4450  " will not be dropped. User has no proper privileges.");
4451  }
4452 
4454 
4455  {
4456  auto table_data_read_lock =
4458  Executor::clearExternalCaches(false, td, catalog.getCurrentDB().dbId);
4459  }
4460 
4461  auto table_data_write_lock =
4463  catalog.dropTable(td);
4464 }
4465 
4467  bool read_only_mode) {}
4468 
4469 std::unique_ptr<DDLStmt> AlterTableStmt::delegate(const rapidjson::Value& payload) {
4470  CHECK(payload.HasMember("tableName"));
4471  auto tableName = json_str(payload["tableName"]);
4472 
4473  CHECK(payload.HasMember("alterType"));
4474  auto type = json_str(payload["alterType"]);
4475 
4476  if (type == "RENAME_TABLE") {
4477  CHECK(payload.HasMember("newTableName"));
4478  auto newTableName = json_str(payload["newTableName"]);
4479  return std::unique_ptr<DDLStmt>(new Parser::RenameTableStmt(
4480  new std::string(tableName), new std::string(newTableName)));
4481 
4482  } else if (type == "RENAME_COLUMN") {
4483  CHECK(payload.HasMember("columnName"));
4484  auto columnName = json_str(payload["columnName"]);
4485  CHECK(payload.HasMember("newColumnName"));
4486  auto newColumnName = json_str(payload["newColumnName"]);
4487  return std::unique_ptr<DDLStmt>(
4488  new Parser::RenameColumnStmt(new std::string(tableName),
4489  new std::string(columnName),
4490  new std::string(newColumnName)));
4491 
4492  } else if (type == "ADD_COLUMN") {
4493  CHECK(payload.HasMember("columnData"));
4494  CHECK(payload["columnData"].IsArray());
4495 
4496  // New Columns go into this list
4497  std::list<ColumnDef*>* table_element_list_ = new std::list<ColumnDef*>;
4498 
4499  const auto elements = payload["columnData"].GetArray();
4500  for (const auto& element : elements) {
4501  CHECK(element.IsObject());
4502  CHECK(element.HasMember("type"));
4503  if (json_str(element["type"]) == "SQL_COLUMN_DECLARATION") {
4504  auto col_def = column_from_json(element);
4505  table_element_list_->emplace_back(col_def.release());
4506  } else {
4507  LOG(FATAL) << "Unsupported element type for ALTER TABLE: "
4508  << element["type"].GetString();
4509  }
4510  }
4511 
4512  return std::unique_ptr<DDLStmt>(
4513  new Parser::AddColumnStmt(new std::string(tableName), table_element_list_));
4514 
4515  } else if (type == "DROP_COLUMN") {
4516  CHECK(payload.HasMember("columnData"));
4517  auto columnData = json_str(payload["columnData"]);
4518  // Convert columnData to std::list<std::string*>*
4519  // allocate std::list<> as DropColumnStmt will delete it;
4520  std::list<std::string*>* cols = new std::list<std::string*>;
4521  std::vector<std::string> cols1;
4522  boost::split(cols1, columnData, boost::is_any_of(","));
4523  for (auto s : cols1) {
4524  // strip leading/trailing spaces/quotes/single quotes
4525  boost::algorithm::trim_if(s, boost::is_any_of(" \"'`"));
4526  std::string* str = new std::string(s);
4527  cols->emplace_back(str);
4528  }
4529 
4530  return std::unique_ptr<DDLStmt>(
4531  new Parser::DropColumnStmt(new std::string(tableName), cols));
4532 
4533  } else if (type == "ALTER_OPTIONS") {
4534  CHECK(payload.HasMember("options"));
4535  const auto& options = payload["options"];
4536  if (options.IsObject()) {
4537  for (auto itr = options.MemberBegin(); itr != options.MemberEnd(); ++itr) {
4538  std::string* option_name = new std::string(json_str(itr->name));
4539  Literal* literal_value;
4540  if (itr->value.IsString()) {
4541  std::string literal_string = json_str(itr->value);
4542 
4543  // iff this string can be converted to INT
4544  // ... do so because it is necessary for AlterTableParamStmt
4545  std::size_t sz;
4546  int iVal = std::stoi(literal_string, &sz);
4547  if (sz == literal_string.size()) {
4548  literal_value = new IntLiteral(iVal);
4549  } else {
4550  literal_value = new StringLiteral(&literal_string);
4551  }
4552  } else if (itr->value.IsInt() || itr->value.IsInt64()) {
4553  literal_value = new IntLiteral(json_i64(itr->value));
4554  } else if (itr->value.IsNull()) {
4555  literal_value = new NullLiteral();
4556  } else {
4557  throw std::runtime_error("Unable to handle literal for " + *option_name);
4558  }
4559  CHECK(literal_value);
4560 
4561  NameValueAssign* nv = new NameValueAssign(option_name, literal_value);
4562  return std::unique_ptr<DDLStmt>(
4563  new Parser::AlterTableParamStmt(new std::string(tableName), nv));
4564  }
4565  } else {
4566  CHECK(options.IsNull());
4567  }
4568  }
4569  return nullptr;
4570 }
4571 
4572 TruncateTableStmt::TruncateTableStmt(const rapidjson::Value& payload) {
4573  CHECK(payload.HasMember("tableName"));
4574  table_ = std::make_unique<std::string>(json_str(payload["tableName"]));
4575 }
4576 
4578  bool read_only_mode) {
4579  if (read_only_mode) {
4580  throw std::runtime_error("TRUNCATE TABLE invalid in read only mode.");
4581  }
4582  const auto execute_read_lock =
4586  auto& catalog = session.getCatalog();
4587  const auto td_with_lock =
4589  catalog, *table_, true);
4590  const auto td = td_with_lock();
4591  if (!td) {
4592  throw std::runtime_error("Table " + *table_ + " does not exist.");
4593  }
4594 
4595  // check access privileges
4596  std::vector<DBObject> privObjects;
4597  DBObject dbObject(*table_, TableDBObjectType);
4598  dbObject.loadKey(catalog);
4600  privObjects.push_back(dbObject);
4601  if (!SysCatalog::instance().checkPrivileges(session.get_currentUser(), privObjects)) {
4602  throw std::runtime_error("Table " + *table_ + " will not be truncated. User " +
4603  session.get_currentUser().userLoggable() +
4604  " has no proper privileges.");
4605  }
4606 
4607  if (td->isView) {
4608  throw std::runtime_error(*table_ + " is a view. Cannot Truncate.");
4609  }
4611 
4612  // invalidate cached item
4613  {
4614  auto table_data_read_lock =
4616  Executor::clearExternalCaches(false, td, catalog.getCurrentDB().dbId);
4617  }
4618 
4619  auto table_data_write_lock =
4621  catalog.truncateTable(td);
4622 }
4623 
4624 OptimizeTableStmt::OptimizeTableStmt(const rapidjson::Value& payload) {
4625  CHECK(payload.HasMember("tableName"));
4626  table_ = std::make_unique<std::string>(json_str(payload["tableName"]));
4627  parse_options(payload, options_);
4628 }
4629 
4630 namespace {
4632  const TableDescriptor* td,
4633  const AccessPrivileges access_priv) {
4634  CHECK(td);
4635  auto& cat = session_info.getCatalog();
4636  std::vector<DBObject> privObjects;
4637  DBObject dbObject(td->tableName, TableDBObjectType);
4638  dbObject.loadKey(cat);
4639  dbObject.setPrivileges(access_priv);
4640  privObjects.push_back(dbObject);
4641  return SysCatalog::instance().checkPrivileges(session_info.get_currentUser(),
4642  privObjects);
4643 };
4644 } // namespace
4645 
4647  bool read_only_mode) {
4648  if (read_only_mode) {
4649  throw std::runtime_error("OPTIMIZE TABLE invalid in read only mode.");
4650  }
4651  auto& catalog = session.getCatalog();
4652 
4653  const auto execute_read_lock =
4657 
4658  const auto td_with_lock =
4660  catalog, *table_);
4661  const auto td = td_with_lock();
4662 
4663  if (!td || !user_can_access_table(session, td, AccessPrivileges::DELETE_FROM_TABLE)) {
4664  throw std::runtime_error("Table " + *table_ + " does not exist.");
4665  }
4666 
4667  if (td->isView) {
4668  throw std::runtime_error("OPTIMIZE TABLE command is not supported on views.");
4669  }
4670 
4671  // invalidate cached item
4672  std::vector<int> table_key{catalog.getCurrentDB().dbId, td->tableId};
4673  ResultSetCacheInvalidator::invalidateCachesByTable(boost::hash_value(table_key));
4674 
4676  const TableOptimizer optimizer(td, executor, catalog);
4677  if (shouldVacuumDeletedRows()) {
4678  optimizer.vacuumDeletedRows();
4679  }
4680  optimizer.recomputeMetadata();
4681 }
4682 
4683 bool repair_type(std::list<std::unique_ptr<NameValueAssign>>& options) {
4684  for (const auto& opt : options) {
4685  if (boost::iequals(*opt->get_name(), "REPAIR_TYPE")) {
4686  const auto repair_type =
4687  static_cast<const StringLiteral*>(opt->get_value())->get_stringval();
4688  CHECK(repair_type);
4689  if (boost::iequals(*repair_type, "REMOVE")) {
4690  return true;
4691  } else {
4692  throw std::runtime_error("REPAIR_TYPE must be REMOVE.");
4693  }
4694  } else {
4695  throw std::runtime_error("The only VALIDATE WITH options is REPAIR_TYPE.");
4696  }
4697  }
4698  return false;
4699 }
4700 
4701 ValidateStmt::ValidateStmt(std::string* type, std::list<NameValueAssign*>* with_opts)
4702  : type_(type) {
4703  if (!type) {
4704  throw std::runtime_error("Validation Type is required for VALIDATE command.");
4705  }
4706  std::list<std::unique_ptr<NameValueAssign>> options;
4707  if (with_opts) {
4708  for (const auto e : *with_opts) {
4709  options.emplace_back(e);
4710  }
4711  delete with_opts;
4712 
4713  isRepairTypeRemove_ = repair_type(options);
4714  }
4715 }
4716 
4717 ValidateStmt::ValidateStmt(const rapidjson::Value& payload) {
4718  CHECK(payload.HasMember("type"));
4719  type_ = std::make_unique<std::string>(json_str(payload["type"]));
4720 
4721  std::list<std::unique_ptr<NameValueAssign>> options;
4722  parse_options(payload, options);
4723 
4724  isRepairTypeRemove_ = repair_type(options);
4725 }
4726 
4728  const TableDescriptor* td) {
4729  if (session.get_currentUser().isSuper ||
4730  session.get_currentUser().userId == td->userId) {
4731  return;
4732  }
4733  std::vector<DBObject> privObjects;
4734  DBObject dbObject(td->tableName, TableDBObjectType);
4735  dbObject.loadKey(session.getCatalog());
4737  privObjects.push_back(dbObject);
4738  if (!SysCatalog::instance().checkPrivileges(session.get_currentUser(), privObjects)) {
4739  throw std::runtime_error("Current user does not have the privilege to alter table: " +
4740  td->tableName);
4741  }
4742 }
4743 
4744 RenameUserStmt::RenameUserStmt(const rapidjson::Value& payload) {
4745  CHECK(payload.HasMember("name"));
4746  username_ = std::make_unique<std::string>(json_str(payload["name"]));
4747  CHECK(payload.HasMember("newName"));
4748  new_username_ = std::make_unique<std::string>(json_str(payload["newName"]));
4749 }
4750 
4752  bool read_only_mode) {
4753  if (read_only_mode) {
4754  throw std::runtime_error("RENAME TABLE invalid in read only mode.");
4755  }
4756  if (!session.get_currentUser().isSuper) {
4757  throw std::runtime_error("Only a super user can rename users.");
4758  }
4759 
4761  if (!SysCatalog::instance().getMetadataForUser(*username_, user)) {
4762  throw std::runtime_error("User " + *username_ + " does not exist.");
4763  }
4764 
4765  SysCatalog::instance().renameUser(*username_, *new_username_);
4766 }
4767 
4768 RenameDBStmt::RenameDBStmt(const rapidjson::Value& payload) {
4769  CHECK(payload.HasMember("name"));
4770  database_name_ = std::make_unique<std::string>(json_str(payload["name"]));
4771  CHECK(payload.HasMember("newName"));
4772  new_database_name_ = std::make_unique<std::string>(json_str(payload["newName"]));
4773 }
4774 
4776  bool read_only_mode) {
4777  if (read_only_mode) {
4778  throw std::runtime_error("RENAME DATABASE invalid in read only mode.");
4779  }
4781 
4782  // TODO: use database lock instead
4783  const auto execute_write_lock =
4787 
4788  if (!SysCatalog::instance().getMetadataForDB(*database_name_, db)) {
4789  throw std::runtime_error("Database " + *database_name_ + " does not exist.");
4790  }
4791 
4792  if (!session.get_currentUser().isSuper &&
4793  session.get_currentUser().userId != db.dbOwner) {
4794  throw std::runtime_error("Only a super user or the owner can rename the database.");
4795  }
4796 
4797  SysCatalog::instance().renameDatabase(*database_name_, *new_database_name_);
4798 }
4799 
4800 RenameTableStmt::RenameTableStmt(const rapidjson::Value& payload) {
4801  CHECK(payload.HasMember("tableNames"));
4802  CHECK(payload["tableNames"].IsArray());
4803  const auto elements = payload["tableNames"].GetArray();
4804  for (const auto& element : elements) {
4805  CHECK(element.HasMember("name"));
4806  CHECK(element.HasMember("newName"));
4807  tablesToRename_.emplace_back(new std::string(json_str(element["name"])),
4808  new std::string(json_str(element["newName"])));
4809  }
4810 }
4811 
4812 RenameTableStmt::RenameTableStmt(std::string* tab_name, std::string* new_tab_name) {
4813  tablesToRename_.emplace_back(tab_name, new_tab_name);
4814 }
4815 
4817  std::list<std::pair<std::string, std::string>> tableNames) {
4818  for (auto item : tableNames) {
4819  tablesToRename_.emplace_back(new std::string(item.first),
4820  new std::string(item.second));
4821  }
4822 }
4823 
4824 using SubstituteMap = std::map<std::string, std::string>;
4825 
4826 // Namespace fns used to track a left-to-right execution of RENAME TABLE
4827 // and verify that the command should be (entirely/mostly) valid
4828 //
4829 namespace {
4830 
4831 static constexpr char const* EMPTY_NAME{""};
4832 
4833 std::string generateUniqueTableName(std::string name) {
4834  // TODO - is there a "better" way to create a tmp name for the table
4835  std::time_t result = std::time(nullptr);
4836  return name + "_tmp" + std::to_string(result);
4837 }
4838 
4839 void recordRename(SubstituteMap& sMap, std::string oldName, std::string newName) {
4840  sMap[oldName] = newName;
4841 }
4842 
4844  SubstituteMap& sMap,
4845  std::string tableName) {
4846  if (sMap.find(tableName) != sMap.end()) {
4847  if (sMap[tableName] == EMPTY_NAME) {
4848  return tableName;
4849  }
4850  return sMap[tableName];
4851  } else {
4852  // lookup table in src catalog
4853  const TableDescriptor* td = catalog.getMetadataForTable(tableName);
4854  if (td) {
4855  sMap[tableName] = tableName;
4856  } else {
4857  sMap[tableName] = EMPTY_NAME;
4858  }
4859  }
4860  return tableName;
4861 }
4862 
4863 bool hasData(SubstituteMap& sMap, std::string tableName) {
4864  // assumes loadTable has been previously called
4865  return (sMap[tableName] != EMPTY_NAME);
4866 }
4867 
4869  // Substition map should be clean at end of rename:
4870  // all items in map must (map to self) or (map to EMPTY_STRING) by end
4871 
4872  for (auto it : sMap) {
4873  if ((it.second) != EMPTY_NAME && (it.first) != (it.second)) {
4874  throw std::runtime_error(
4875  "Error: Attempted to overwrite and lose data in table: \'" + (it.first) + "\'");
4876  }
4877  }
4878 }
4879 } // namespace
4880 
4881 namespace {
4884  throw std::runtime_error(td->tableName + " is a foreign table. " +
4885  "Use ALTER FOREIGN TABLE.");
4886  }
4887 }
4888 } // namespace
4889 
4891  bool read_only_mode) {
4892  if (read_only_mode) {
4893  throw std::runtime_error("RENAME TABLE invalid in read only mode.");
4894  }
4895  auto& catalog = session.getCatalog();
4896 
4897  // TODO(adb): the catalog should be handling this locking (see AddColumStmt)
4898  const auto execute_write_lock =
4902 
4903  // accumulated vector of table names: oldName->newName
4904  std::vector<std::pair<std::string, std::string>> names;
4905 
4906  SubstituteMap tableSubtituteMap;
4907 
4908  for (auto& item : tablesToRename_) {
4909  std::string curTableName = *(item.first);
4910  std::string newTableName = *(item.second);
4911 
4912  // Note: if rename (a->b, b->a)
4913  // requires a tmp name change (a->tmp, b->a, tmp->a),
4914  // inject that here because
4915  // catalog.renameTable() assumes cleanliness else will fail
4916 
4917  std::string altCurTableName = loadTable(catalog, tableSubtituteMap, curTableName);
4918  std::string altNewTableName = loadTable(catalog, tableSubtituteMap, newTableName);
4919 
4920  if (altCurTableName != curTableName && altCurTableName != EMPTY_NAME) {
4921  // rename is a one-shot deal, reset the mapping once used
4922  recordRename(tableSubtituteMap, curTableName, curTableName);
4923  }
4924 
4925  // Check to see if the command (as-entered) will likely execute cleanly (logic-wise)
4926  // src tables exist before coping from
4927  // destination table collisions
4928  // handled (a->b, b->a)
4929  // or flagged (pre-existing a,b ... "RENAME TABLE a->c, b->c" )
4930  // handle mulitple chained renames, tmp names (a_>tmp, b->a, tmp->a)
4931  // etc.
4932  //
4933  if (hasData(tableSubtituteMap, altCurTableName)) {
4934  const TableDescriptor* td = catalog.getMetadataForTable(altCurTableName);
4935  if (td) {
4936  // Tables *and* views may be renamed here, foreign tables not
4937  // -> just block foreign tables
4939  check_alter_table_privilege(session, td);
4940  }
4941 
4942  if (hasData(tableSubtituteMap, altNewTableName)) {
4943  std::string tmpNewTableName = generateUniqueTableName(altNewTableName);
4944  // rename: newTableName to tmpNewTableName to get it out of the way
4945  // because it was full
4946  recordRename(tableSubtituteMap, altCurTableName, EMPTY_NAME);
4947  recordRename(tableSubtituteMap, altNewTableName, tmpNewTableName);
4948  recordRename(tableSubtituteMap, tmpNewTableName, tmpNewTableName);
4949  names.emplace_back(altNewTableName, tmpNewTableName);
4950  names.emplace_back(altCurTableName, altNewTableName);
4951  } else {
4952  // rename: curNewTableName to newTableName
4953  recordRename(tableSubtituteMap, altCurTableName, EMPTY_NAME);
4954  recordRename(tableSubtituteMap, altNewTableName, altNewTableName);
4955  names.emplace_back(altCurTableName, altNewTableName);
4956  }
4957  } else {
4958  throw std::runtime_error("Source table \'" + curTableName + "\' does not exist.");
4959  }
4960  }
4961  checkNameSubstition(tableSubtituteMap);
4962 
4963  catalog.renameTable(names);
4964 
4965  // just to be explicit, clean out the list, the unique_ptr will delete
4966  while (!tablesToRename_.empty()) {
4967  tablesToRename_.pop_front();
4968  }
4969 } // namespace Parser
4970 
4972  bool not_null;
4973  const ColumnConstraintDef* cc = coldef->get_column_constraint();
4974  if (cc == nullptr) {
4975  not_null = false;
4976  } else {
4977  not_null = cc->get_notnull();
4978  }
4979  std::string default_value;
4980  const std::string* default_value_ptr = nullptr;
4981  if (cc) {
4982  if (auto def_val_literal = cc->get_defaultval()) {
4983  auto defaultsp = dynamic_cast<const StringLiteral*>(def_val_literal);
4984  default_value =
4985  defaultsp ? *defaultsp->get_stringval() : def_val_literal->to_string();
4986  // The preprocessing below is needed because:
4987  // a) TypedImportBuffer expects arrays in the {...} format
4988  // b) TypedImportBuffer expects string literals inside arrays w/o any quotes
4989  if (coldef->get_column_type()->get_is_array()) {
4990  std::regex array_re(R"(^ARRAY\s*\[(.*)\]$)", std::regex_constants::icase);
4991  default_value = std::regex_replace(default_value, array_re, "{$1}");
4992  boost::erase_all(default_value, "\'");
4993  }
4994  default_value_ptr = &default_value;
4995  }
4996  }
4998  cd,
4999  coldef->get_column_type(),
5000  not_null,
5001  coldef->get_compression(),
5002  default_value_ptr);
5003 }
5004 
5006  const TableDescriptor* td) {
5007  auto& catalog = session.getCatalog();
5008  if (!td) {
5009  throw std::runtime_error("Table " + *table_ + " does not exist.");
5010  } else {
5011  if (td->isView) {
5012  throw std::runtime_error("Adding columns to a view is not supported.");
5013  }
5015  if (table_is_temporary(td)) {
5016  throw std::runtime_error(
5017  "Adding columns to temporary tables is not yet supported.");
5018  }
5019  }
5020 
5021  check_alter_table_privilege(session, td);
5022 
5023  if (0 == coldefs_.size()) {
5024  coldefs_.push_back(std::move(coldef_));
5025  }
5026 
5027  for (const auto& coldef : coldefs_) {
5028  auto& new_column_name = *coldef->get_column_name();
5029  if (catalog.getMetadataForColumn(td->tableId, new_column_name) != nullptr) {
5030  throw std::runtime_error("Column " + new_column_name + " already exists.");
5031  }
5032  }
5033 }
5034 
5036  bool read_only_mode) {
5037  if (read_only_mode) {
5038  throw std::runtime_error("ADD COLUMN invalid in read only mode.");
5039  }
5040  // TODO: Review add and drop column implementation
5041  const auto execute_write_lock =
5045  auto& catalog = session.getCatalog();
5046  const auto td_with_lock =
5048  catalog, *table_, true);
5049  const auto td = td_with_lock();
5050 
5051  check_executable(session, td);
5052 
5053  CHECK(td->fragmenter);
5054  if (std::dynamic_pointer_cast<Fragmenter_Namespace::SortedOrderFragmenter>(
5055  td->fragmenter)) {
5056  throw std::runtime_error(
5057  "Adding columns to a table is not supported when using the \"sort_column\" "
5058  "option.");
5059  }
5060 
5061  // invalidate cached item
5062  std::vector<int> table_key{catalog.getCurrentDB().dbId, td->tableId};
5063  ResultSetCacheInvalidator::invalidateCachesByTable(boost::hash_value(table_key));
5064 
5065  // Do not take a data write lock, as the fragmenter may call `deleteFragments`
5066  // during a cap operation. Note that the schema write lock will prevent concurrent
5067  // inserts along with all other queries.
5068 
5069  catalog.getSqliteConnector().query("BEGIN TRANSACTION");
5070  try {
5071  std::map<const std::string, const ColumnDescriptor> cds;
5072  std::map<const int, const ColumnDef*> cid_coldefs;
5073  for (const auto& coldef : coldefs_) {
5074  ColumnDescriptor cd;
5075  setColumnDescriptor(cd, coldef.get());
5076  catalog.addColumn(*td, cd);
5077  cds.emplace(*coldef->get_column_name(), cd);
5078  cid_coldefs.emplace(cd.columnId, coldef.get());
5079 
5080  // expand geo column to phy columns
5081  if (cd.columnType.is_geometry()) {
5082  std::list<ColumnDescriptor> phy_geo_columns;
5083  catalog.expandGeoColumn(cd, phy_geo_columns);
5084  for (auto& cd : phy_geo_columns) {
5085  catalog.addColumn(*td, cd);
5086  cds.emplace(cd.columnName, cd);
5087  cid_coldefs.emplace(cd.columnId, nullptr);
5088  }
5089  }
5090  }
5091 
5092  std::unique_ptr<import_export::Loader> loader(new import_export::Loader(catalog, td));
5093  std::vector<std::unique_ptr<import_export::TypedImportBuffer>> import_buffers;
5094  for (const auto& cd : cds) {
5095  import_buffers.emplace_back(std::make_unique<import_export::TypedImportBuffer>(
5096  &cd.second, loader->getStringDict(&cd.second)));
5097  }
5098  loader->setAddingColumns(true);
5099 
5100  // set_geo_physical_import_buffer below needs a sorted import_buffers
5101  std::sort(import_buffers.begin(),
5102  import_buffers.end(),
5103  [](decltype(import_buffers[0])& a, decltype(import_buffers[0])& b) {
5104  return a->getColumnDesc()->columnId < b->getColumnDesc()->columnId;
5105  });
5106 
5107  size_t nrows = td->fragmenter->getNumRows();
5108  // if sharded, get total nrows from all sharded tables
5109  if (td->nShards > 0) {
5110  const auto physical_tds = catalog.getPhysicalTablesDescriptors(td);
5111  nrows = 0;
5112  std::for_each(physical_tds.begin(), physical_tds.end(), [&nrows](const auto& td) {
5113  nrows += td->fragmenter->getNumRows();
5114  });
5115  }
5116  if (nrows > 0) {
5117  int skip_physical_cols = 0;
5118  for (const auto cit : cid_coldefs) {
5119  const auto cd = catalog.getMetadataForColumn(td->tableId, cit.first);
5120  const auto coldef = cit.second;
5121  const bool is_null = !cd->default_value.has_value();
5122 
5123  if (cd->columnType.get_notnull() && is_null) {
5124  throw std::runtime_error("Default value required for column " + cd->columnName +
5125  " because of NOT NULL constraint");
5126  }
5127 
5128  for (auto it = import_buffers.begin(); it < import_buffers.end(); ++it) {
5129  auto& import_buffer = *it;
5130  if (cd->columnId == import_buffer->getColumnDesc()->columnId) {
5131  if (coldef != nullptr ||
5132  skip_physical_cols-- <= 0) { // skip non-null phy col
5133  import_buffer->add_value(cd,
5134  cd->default_value.value_or("NULL"),
5135  is_null,
5137  if (cd->columnType.is_geometry()) {
5138  std::vector<double> coords, bounds;
5139  std::vector<int> ring_sizes, poly_rings;
5140  int render_group = 0;
5141  SQLTypeInfo tinfo{cd->columnType};
5143  cd->default_value.value_or("NULL"),
5144  tinfo,
5145  coords,
5146  bounds,
5147  ring_sizes,
5148  poly_rings,
5149  false)) {
5150  throw std::runtime_error("Bad geometry data: '" +
5151  cd->default_value.value_or("NULL") + "'");
5152  }
5153  size_t col_idx = 1 + std::distance(import_buffers.begin(), it);
5155  cd,
5156  import_buffers,
5157  col_idx,
5158  coords,
5159  bounds,
5160  ring_sizes,
5161  poly_rings,
5162  render_group);
5163  // skip following phy cols
5164  skip_physical_cols = cd->columnType.get_physical_cols();
5165  }
5166  }
5167  break;
5168  }
5169  }
5170  }
5171  }
5172 
5173  if (!loader->loadNoCheckpoint(import_buffers, nrows, &session)) {
5174  throw std::runtime_error("loadNoCheckpoint failed!");
5175  }
5176  catalog.roll(true);
5177  catalog.resetTableEpochFloor(td->tableId);
5178  loader->checkpoint();
5179  catalog.getSqliteConnector().query("END TRANSACTION");
5180  } catch (...) {
5181  catalog.roll(false);
5182  catalog.getSqliteConnector().query("ROLLBACK TRANSACTION");
5183  throw;
5184  }
5185 }
5186 
5188  bool read_only_mode) {
5189  if (read_only_mode) {
5190  throw std::runtime_error("DROP COLUMN invalid in read only mode.");
5191  }
5192  // TODO: Review add and drop column implementation
5193  const auto execute_write_lock =
5197  auto& catalog = session.getCatalog();
5198  const auto td_with_lock =
5200  catalog, *table_, true);
5201  const auto td = td_with_lock();
5202  if (!td) {
5203  throw std::runtime_error("Table " + *table_ + " does not exist.");
5204  }
5206  if (td->isView) {
5207  throw std::runtime_error("Dropping a column from a view is not supported.");
5208  }
5209  if (table_is_temporary(td)) {
5210  throw std::runtime_error(
5211  "Dropping a column from a temporary table is not yet supported.");
5212  }
5213 
5214  check_alter_table_privilege(session, td);
5215 
5216  for (const auto& column : columns_) {
5217  if (nullptr == catalog.getMetadataForColumn(td->tableId, *column)) {
5218  throw std::runtime_error("Column " + *column + " does not exist.");
5219  }
5220  }
5221 
5222  if (td->nColumns <= (td->hasDeletedCol ? 3 : 2)) {
5223  throw std::runtime_error("Table " + *table_ + " has only one column.");
5224  }
5225 
5226  // invalidate cached item
5227  Executor::clearExternalCaches(false, td, catalog.getCurrentDB().dbId);
5228 
5229  catalog.getSqliteConnector().query("BEGIN TRANSACTION");
5230  try {
5231  std::vector<int> columnIds;
5232  for (const auto& column : columns_) {
5233  ColumnDescriptor cd = *catalog.getMetadataForColumn(td->tableId, *column);
5234  if (td->nShards > 0 && td->shardedColumnId == cd.columnId) {
5235  throw std::runtime_error("Dropping sharding column " + cd.columnName +
5236  " is not supported.");
5237  }
5238  catalog.dropColumn(*td, cd);
5239  columnIds.push_back(cd.columnId);
5240  for (int i = 0; i < cd.columnType.get_physical_cols(); i++) {
5241  const auto pcd = catalog.getMetadataForColumn(td->tableId, cd.columnId + i + 1);
5242  CHECK(pcd);
5243  catalog.dropColumn(*td, *pcd);
5244  columnIds.push_back(cd.columnId + i + 1);
5245  }
5246  }
5247 
5248  for (auto shard : catalog.getPhysicalTablesDescriptors(td)) {
5249  shard->fragmenter->dropColumns(columnIds);
5250  }
5251  // if test forces to rollback
5253  throw std::runtime_error("lol!");
5254  }
5255  catalog.roll(true);
5257  catalog.resetTableEpochFloor(td->tableId);
5258  catalog.checkpoint(td->tableId);
5259  }
5260  catalog.getSqliteConnector().query("END TRANSACTION");
5261  } catch (...) {
5262  catalog.setForReload(td->tableId);
5263  catalog.roll(false);
5264  catalog.getSqliteConnector().query("ROLLBACK TRANSACTION");
5265  throw;
5266  }
5267 }
5268 
5270  bool read_only_mode) {
5271  if (read_only_mode) {
5272  throw std::runtime_error("RENAME COLUMN invalid in read only mode.");
5273  }
5274  auto& catalog = session.getCatalog();
5275 
5276  const auto execute_read_lock =
5280 
5281  const auto td_with_lock =
5283  catalog, *table_, false);
5284  const auto td = td_with_lock();
5285  CHECK(td);
5287 
5288  check_alter_table_privilege(session, td);
5289  const ColumnDescriptor* cd = catalog.getMetadataForColumn(td->tableId, *column_);
5290  if (cd == nullptr) {
5291  throw std::runtime_error("Column " + *column_ + " does not exist.");
5292  }
5293  if (catalog.getMetadataForColumn(td->tableId, *new_column_name_) != nullptr) {
5294  throw std::runtime_error("Column " + *new_column_name_ + " already exists.");
5295  }
5296  catalog.renameColumn(td, cd, *new_column_name_);
5297 }
5298 
5300  bool read_only_mode) {
5301  if (read_only_mode) {
5302  throw std::runtime_error("ALTER TABLE invalid in read only mode.");
5303  }
5304  enum TableParamType { MaxRollbackEpochs, Epoch, MaxRows };
5305  static const std::unordered_map<std::string, TableParamType> param_map = {
5306  {"max_rollback_epochs", TableParamType::MaxRollbackEpochs},
5307  {"epoch", TableParamType::Epoch},
5308  {"max_rows", TableParamType::MaxRows}};
5309  const auto execute_read_lock =
5313  auto& catalog = session.getCatalog();
5314  const auto td_with_lock =
5316  catalog, *table_, false);
5317  const auto td = td_with_lock();
5318  if (!td) {
5319  throw std::runtime_error("Table " + *table_ + " does not exist.");
5320  }
5321  if (td->isView) {
5322  throw std::runtime_error("Setting parameters for a view is not supported.");
5323  }
5324  if (table_is_temporary(td)) {
5325  throw std::runtime_error(
5326  "Setting parameters for a temporary table is not yet supported.");
5327  }
5328  check_alter_table_privilege(session, td);
5329 
5330  // invalidate cached item
5331  std::vector<int> table_key{catalog.getCurrentDB().dbId, td->tableId};
5332  ResultSetCacheInvalidator::invalidateCachesByTable(boost::hash_value(table_key));
5333 
5334  std::string param_name(*param_->get_name());
5335  boost::algorithm::to_lower(param_name);
5336  const IntLiteral* val_int_literal =
5337  dynamic_cast<const IntLiteral*>(param_->get_value());
5338  if (val_int_literal == nullptr) {
5339  throw std::runtime_error("Table parameters should be integers.");
5340  }
5341  const int64_t param_val = val_int_literal->get_intval();
5342 
5343  const auto param_it = param_map.find(param_name);
5344  if (param_it == param_map.end()) {
5345  throw std::runtime_error(param_name + " is not a settable table parameter.");
5346  }
5347  switch (param_it->second) {
5348  case MaxRollbackEpochs: {
5349  catalog.setMaxRollbackEpochs(td->tableId, param_val);
5350  break;
5351  }
5352  case Epoch: {
5353  catalog.setTableEpoch(catalog.getDatabaseId(), td->tableId, param_val);
5354  break;
5355  }
5356  case MaxRows: {
5357  catalog.setMaxRows(td->tableId, param_val);
5358  break;
5359  }
5360  default: {
5361  UNREACHABLE() << "Unexpected TableParamType value: " << param_it->second
5362  << ", key: " << param_it->first;
5363  }
5364  }
5365 }
5366 
5368  std::string* f,
5369  std::list<NameValueAssign*>* o)
5370  : table_(t), copy_from_source_pattern_(f), success_(true) {
5371  if (o) {
5372  for (const auto e : *o) {
5373  options_.emplace_back(e);
5374  }
5375  delete o;
5376  }
5377 }
5378 
5379 CopyTableStmt::CopyTableStmt(const rapidjson::Value& payload) : success_(true) {
5380  CHECK(payload.HasMember("table"));
5381  table_ = std::make_unique<std::string>(json_str(payload["table"]));
5382 
5383  CHECK(payload.HasMember("filePath"));
5384  std::string fs = json_str(payload["filePath"]);
5385  // strip leading/trailing spaces/quotes/single quotes
5386  boost::algorithm::trim_if(fs, boost::is_any_of(" \"'`"));
5387  copy_from_source_pattern_ = std::make_unique<std::string>(fs);