OmniSciDB  085a039ca4
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ParserNode.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
25 #include "ParserNode.h"
26 
27 #include <boost/algorithm/string.hpp>
28 #include <boost/core/null_deleter.hpp>
29 #include <boost/filesystem.hpp>
30 #include <boost/function.hpp>
31 
32 #include <rapidjson/document.h>
33 #include <rapidjson/stringbuffer.h>
34 #include <rapidjson/writer.h>
35 
36 #include <cassert>
37 #include <cmath>
38 #include <limits>
39 #include <random>
40 #include <regex>
41 #include <stdexcept>
42 #include <string>
43 #include <type_traits>
44 #include <typeinfo>
45 
47 #include "Catalog/Catalog.h"
53 #include "Geospatial/Compression.h"
54 #include "Geospatial/Types.h"
56 #include "ImportExport/Importer.h"
58 #include "LockMgr/LockMgr.h"
62 #include "QueryEngine/Execute.h"
67 #include "ReservedKeywords.h"
68 #include "Shared/StringTransform.h"
69 #include "Shared/SysDefinitions.h"
71 #include "Shared/measure.h"
72 #include "Shared/shard_key.h"
74 #include "Utils/FsiUtils.h"
75 
76 #include "gen-cpp/CalciteServer.h"
77 
78 size_t g_leaf_count{0};
80 extern bool g_enable_string_functions;
81 extern bool g_enable_fsi;
82 
84 #ifdef ENABLE_IMPORT_PARQUET
85 bool g_enable_legacy_parquet_import{false};
86 #endif
88 
90 
92 using namespace std::string_literals;
93 
94 using TableDefFuncPtr = boost::function<void(TableDescriptor&,
96  const std::list<ColumnDescriptor>& columns)>;
97 
98 using DataframeDefFuncPtr =
99  boost::function<void(DataframeTableDescriptor&,
101  const std::list<ColumnDescriptor>& columns)>;
102 
103 namespace Parser {
104 bool check_session_interrupted(const QuerySessionId& query_session, Executor* executor) {
105  // we call this function with unitary executor but is okay since
106  // we know the exact session info from a global session map object
107  // in the executor
109  mapd_shared_lock<mapd_shared_mutex> session_read_lock(executor->getSessionLock());
110  return executor->checkIsQuerySessionInterrupted(query_session, session_read_lock);
111  }
112  return false;
113 }
114 
115 std::vector<int> getTableChunkKey(const TableDescriptor* td,
116  Catalog_Namespace::Catalog& catalog) {
117  std::vector<int> table_chunk_key_prefix;
118  if (td) {
119  if (td->fragmenter) {
120  table_chunk_key_prefix = td->fragmenter->getFragmentsForQuery().chunkKeyPrefix;
121  } else {
122  table_chunk_key_prefix.push_back(catalog.getCurrentDB().dbId);
123  table_chunk_key_prefix.push_back(td->tableId);
124  }
125  }
126  return table_chunk_key_prefix;
127 }
128 
129 std::shared_ptr<Analyzer::Expr> NullLiteral::analyze(
130  const Catalog_Namespace::Catalog& catalog,
131  Analyzer::Query& query,
132  TlistRefType allow_tlist_ref) const {
133  return makeExpr<Analyzer::Constant>(kNULLT, true);
134 }
135 
136 std::shared_ptr<Analyzer::Expr> StringLiteral::analyze(
137  const Catalog_Namespace::Catalog& catalog,
138  Analyzer::Query& query,
139  TlistRefType allow_tlist_ref) const {
140  return analyzeValue(*stringval_, false);
141 }
142 
143 std::shared_ptr<Analyzer::Expr> StringLiteral::analyzeValue(const std::string& stringval,
144  const bool is_null) {
145  if (!is_null) {
146  const SQLTypeInfo ti(kVARCHAR, stringval.length(), 0, true);
147  Datum d;
148  d.stringval = new std::string(stringval);
149  return makeExpr<Analyzer::Constant>(ti, false, d);
150  }
151  // Null value
152  return makeExpr<Analyzer::Constant>(kVARCHAR, true);
153 }
154 
155 std::shared_ptr<Analyzer::Expr> IntLiteral::analyze(
156  const Catalog_Namespace::Catalog& catalog,
157  Analyzer::Query& query,
158  TlistRefType allow_tlist_ref) const {
159  return analyzeValue(intval_);
160 }
161 
162 std::shared_ptr<Analyzer::Expr> IntLiteral::analyzeValue(const int64_t intval) {
163  SQLTypes t;
164  Datum d;
165  if (intval >= INT16_MIN && intval <= INT16_MAX) {
166  t = kSMALLINT;
167  d.smallintval = (int16_t)intval;
168  } else if (intval >= INT32_MIN && intval <= INT32_MAX) {
169  t = kINT;
170  d.intval = (int32_t)intval;
171  } else {
172  t = kBIGINT;
173  d.bigintval = intval;
174  }
175  return makeExpr<Analyzer::Constant>(t, false, d);
176 }
177 
178 std::shared_ptr<Analyzer::Expr> FixedPtLiteral::analyze(
179  const Catalog_Namespace::Catalog& catalog,
180  Analyzer::Query& query,
181  TlistRefType allow_tlist_ref) const {
182  SQLTypeInfo ti(kNUMERIC, 0, 0, false);
183  Datum d = StringToDatum(*fixedptval_, ti);
184  return makeExpr<Analyzer::Constant>(ti, false, d);
185 }
186 
187 std::shared_ptr<Analyzer::Expr> FixedPtLiteral::analyzeValue(const int64_t numericval,
188  const int scale,
189  const int precision) {
190  SQLTypeInfo ti(kNUMERIC, 0, 0, false);
191  ti.set_scale(scale);
192  ti.set_precision(precision);
193  Datum d;
194  d.bigintval = numericval;
195  return makeExpr<Analyzer::Constant>(ti, false, d);
196 }
197 
198 std::shared_ptr<Analyzer::Expr> FloatLiteral::analyze(
199  const Catalog_Namespace::Catalog& catalog,
200  Analyzer::Query& query,
201  TlistRefType allow_tlist_ref) const {
202  Datum d;
203  d.floatval = floatval_;
204  return makeExpr<Analyzer::Constant>(kFLOAT, false, d);
205 }
206 
207 std::shared_ptr<Analyzer::Expr> DoubleLiteral::analyze(
208  const Catalog_Namespace::Catalog& catalog,
209  Analyzer::Query& query,
210  TlistRefType allow_tlist_ref) const {
211  Datum d;
212  d.doubleval = doubleval_;
213  return makeExpr<Analyzer::Constant>(kDOUBLE, false, d);
214 }
215 
216 std::shared_ptr<Analyzer::Expr> TimestampLiteral::analyze(
217  const Catalog_Namespace::Catalog& catalog,
218  Analyzer::Query& query,
219  TlistRefType allow_tlist_ref) const {
220  return get(timestampval_);
221 }
222 
223 std::shared_ptr<Analyzer::Expr> TimestampLiteral::get(const int64_t timestampval) {
224  Datum d;
225  d.bigintval = timestampval;
226  return makeExpr<Analyzer::Constant>(kTIMESTAMP, false, d);
227 }
228 
229 std::shared_ptr<Analyzer::Expr> UserLiteral::analyze(
230  const Catalog_Namespace::Catalog& catalog,
231  Analyzer::Query& query,
232  TlistRefType allow_tlist_ref) const {
233  Datum d;
234  return makeExpr<Analyzer::Constant>(kTEXT, false, d);
235 }
236 
237 std::shared_ptr<Analyzer::Expr> UserLiteral::get(const std::string& user) {
238  Datum d;
239  d.stringval = new std::string(user);
240  return makeExpr<Analyzer::Constant>(kTEXT, false, d);
241 }
242 
243 std::shared_ptr<Analyzer::Expr> ArrayLiteral::analyze(
244  const Catalog_Namespace::Catalog& catalog,
245  Analyzer::Query& query,
246  TlistRefType allow_tlist_ref) const {
247  SQLTypeInfo ti = SQLTypeInfo(kARRAY, true);
248  bool set_subtype = true;
249  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
250  for (auto& p : value_list_) {
251  auto e = p->analyze(catalog, query, allow_tlist_ref);
252  CHECK(e);
253  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(e);
254  if (c != nullptr && c->get_is_null()) {
255  value_exprs.push_back(c);
256  continue;
257  }
258  auto subtype = e->get_type_info().get_type();
259  if (subtype == kNULLT) {
260  // NULL element
261  } else if (set_subtype) {
262  ti.set_subtype(subtype);
263  set_subtype = false;
264  }
265  value_exprs.push_back(e);
266  }
267  std::shared_ptr<Analyzer::Expr> result =
268  makeExpr<Analyzer::Constant>(ti, false, value_exprs);
269  return result;
270 }
271 
272 std::string ArrayLiteral::to_string() const {
273  std::string str = "{";
274  bool notfirst = false;
275  for (auto& p : value_list_) {
276  if (notfirst) {
277  str += ", ";
278  } else {
279  notfirst = true;
280  }
281  str += p->to_string();
282  }
283  str += "}";
284  return str;
285 }
286 
287 std::shared_ptr<Analyzer::Expr> OperExpr::analyze(
288  const Catalog_Namespace::Catalog& catalog,
289  Analyzer::Query& query,
290  TlistRefType allow_tlist_ref) const {
291  auto left_expr = left_->analyze(catalog, query, allow_tlist_ref);
292  const auto& left_type = left_expr->get_type_info();
293  if (right_ == nullptr) {
294  return makeExpr<Analyzer::UOper>(
295  left_type, left_expr->get_contains_agg(), optype_, left_expr->decompress());
296  }
297  if (optype_ == kARRAY_AT) {
298  if (left_type.get_type() != kARRAY) {
299  throw std::runtime_error(left_->to_string() + " is not of array type.");
300  }
301  auto right_expr = right_->analyze(catalog, query, allow_tlist_ref);
302  const auto& right_type = right_expr->get_type_info();
303  if (!right_type.is_integer()) {
304  throw std::runtime_error(right_->to_string() + " is not of integer type.");
305  }
306  return makeExpr<Analyzer::BinOper>(
307  left_type.get_elem_type(), false, kARRAY_AT, kONE, left_expr, right_expr);
308  }
309  auto right_expr = right_->analyze(catalog, query, allow_tlist_ref);
310  return normalize(optype_, opqualifier_, left_expr, right_expr);
311 }
312 
313 bool exprs_share_one_and_same_rte_idx(const std::shared_ptr<Analyzer::Expr>& lhs_expr,
314  const std::shared_ptr<Analyzer::Expr>& rhs_expr) {
315  std::set<int> lhs_rte_idx;
316  lhs_expr->collect_rte_idx(lhs_rte_idx);
317  CHECK(!lhs_rte_idx.empty());
318  std::set<int> rhs_rte_idx;
319  rhs_expr->collect_rte_idx(rhs_rte_idx);
320  CHECK(!rhs_rte_idx.empty());
321  return lhs_rte_idx.size() == 1UL && lhs_rte_idx == rhs_rte_idx;
322 }
323 
324 SQLTypeInfo const& get_str_dict_cast_type(const SQLTypeInfo& lhs_type_info,
325  const SQLTypeInfo& rhs_type_info,
326  const Executor* executor) {
327  CHECK(lhs_type_info.is_string());
328  CHECK(lhs_type_info.get_compression() == kENCODING_DICT);
329  CHECK(rhs_type_info.is_string());
330  CHECK(rhs_type_info.get_compression() == kENCODING_DICT);
331  const auto lhs_comp_param = lhs_type_info.get_comp_param();
332  const auto rhs_comp_param = rhs_type_info.get_comp_param();
333  CHECK_NE(lhs_comp_param, rhs_comp_param);
334  if (lhs_type_info.get_comp_param() == TRANSIENT_DICT_ID) {
335  return rhs_type_info;
336  }
337  if (rhs_type_info.get_comp_param() == TRANSIENT_DICT_ID) {
338  return lhs_type_info;
339  }
340  // If here then neither lhs or rhs type was transient, we should see which
341  // type has the largest dictionary and make that the destination type
342  const auto lhs_sdp = executor->getStringDictionaryProxy(lhs_comp_param, true);
343  const auto rhs_sdp = executor->getStringDictionaryProxy(rhs_comp_param, true);
344  return lhs_sdp->entryCount() >= rhs_sdp->entryCount() ? lhs_type_info : rhs_type_info;
345 }
346 
348  const SQLTypeInfo& rhs_type_info,
349  const Executor* executor) {
350  CHECK(lhs_type_info.is_string());
351  CHECK(rhs_type_info.is_string());
352  const auto lhs_comp_param = lhs_type_info.get_comp_param();
353  const auto rhs_comp_param = rhs_type_info.get_comp_param();
354  if (lhs_type_info.is_dict_encoded_string() && rhs_type_info.is_dict_encoded_string()) {
355  if (lhs_comp_param == rhs_comp_param ||
356  lhs_comp_param == TRANSIENT_DICT(rhs_comp_param)) {
357  return lhs_comp_param <= rhs_comp_param ? lhs_type_info : rhs_type_info;
358  }
359  return get_str_dict_cast_type(lhs_type_info, rhs_type_info, executor);
360  }
361  CHECK(lhs_type_info.is_none_encoded_string() || rhs_type_info.is_none_encoded_string());
362  SQLTypeInfo ret_ti =
363  rhs_type_info.is_none_encoded_string() ? lhs_type_info : rhs_type_info;
364  if (ret_ti.is_none_encoded_string()) {
365  ret_ti.set_dimension(
366  std::max(lhs_type_info.get_dimension(), rhs_type_info.get_dimension()));
367  }
368  return ret_ti;
369 }
370 
371 std::shared_ptr<Analyzer::Expr> OperExpr::normalize(
372  const SQLOps optype,
373  const SQLQualifier qual,
374  std::shared_ptr<Analyzer::Expr> left_expr,
375  std::shared_ptr<Analyzer::Expr> right_expr,
376  const Executor* executor) {
377  if (left_expr->get_type_info().is_date_in_days() ||
378  right_expr->get_type_info().is_date_in_days()) {
379  // Do not propogate encoding
380  left_expr = left_expr->decompress();
381  right_expr = right_expr->decompress();
382  }
383  const auto& left_type = left_expr->get_type_info();
384  auto right_type = right_expr->get_type_info();
385  if (qual != kONE) {
386  // subquery not supported yet.
387  CHECK(!std::dynamic_pointer_cast<Analyzer::Subquery>(right_expr));
388  if (right_type.get_type() != kARRAY) {
389  throw std::runtime_error(
390  "Existential or universal qualifiers can only be used in front of a subquery "
391  "or an "
392  "expression of array type.");
393  }
394  right_type = right_type.get_elem_type();
395  }
396  SQLTypeInfo new_left_type;
397  SQLTypeInfo new_right_type;
398  auto result_type = Analyzer::BinOper::analyze_type_info(
399  optype, left_type, right_type, &new_left_type, &new_right_type);
400  if (result_type.is_timeinterval()) {
401  return makeExpr<Analyzer::BinOper>(
402  result_type, false, optype, qual, left_expr, right_expr);
403  }
404  if (left_type != new_left_type) {
405  left_expr = left_expr->add_cast(new_left_type);
406  }
407  if (right_type != new_right_type) {
408  if (qual == kONE) {
409  right_expr = right_expr->add_cast(new_right_type);
410  } else {
411  right_expr = right_expr->add_cast(new_right_type.get_array_type());
412  }
413  }
414 
415  if (IS_COMPARISON(optype)) {
416  if (optype != kOVERLAPS && new_left_type.is_geometry() &&
417  new_right_type.is_geometry()) {
418  throw std::runtime_error(
419  "Comparison operators are not yet supported for geospatial types.");
420  }
421 
422  if (new_left_type.get_compression() == kENCODING_DICT &&
423  new_right_type.get_compression() == kENCODING_DICT) {
424  if (new_left_type.get_comp_param() != new_right_type.get_comp_param()) {
425  if (optype == kEQ || optype == kNE) {
426  // Join framework does its own string dictionary translation
427  // (at least partly since the rhs table projection does not use
428  // the normal runtime execution framework), so if we detect
429  // that the rte idxs of the two tables are different, bail
430  // on translating
431  const bool should_translate_strings =
432  exprs_share_one_and_same_rte_idx(left_expr, right_expr);
433  if (should_translate_strings && (optype == kEQ || optype == kNE)) {
434  CHECK(executor);
435  // Make the type we're casting to the transient dictionary, if it exists,
436  // otherwise the largest dictionary in terms of number of entries
437  SQLTypeInfo ti(
438  get_str_dict_cast_type(new_left_type, new_right_type, executor));
439  auto& expr_to_cast = ti == new_left_type ? right_expr : left_expr;
440  ti.set_fixed_size();
442  expr_to_cast = expr_to_cast->add_cast(ti);
443  } else { // Ordered comparison operator
444  // We do not currently support ordered (i.e. >, <=) comparisons between
445  // dictionary-encoded columns, and need to decompress when translation
446  // is turned off even for kEQ and KNE
447  left_expr = left_expr->decompress();
448  right_expr = right_expr->decompress();
449  }
450  } else { // Ordered comparison operator
451  // We do not currently support ordered (i.e. >, <=) comparisons between
452  // dictionary-encoded columns, and need to decompress when translation
453  // is turned off even for kEQ and KNE
454  left_expr = left_expr->decompress();
455  right_expr = right_expr->decompress();
456  }
457  } else { // Strings shared comp param
458  if (!(optype == kEQ || optype == kNE)) {
459  // We do not currently support ordered (i.e. >, <=) comparisons between
460  // encoded columns, so try to decode (will only succeed with watchdog off)
461  left_expr = left_expr->decompress();
462  right_expr = right_expr->decompress();
463  } else {
464  // do nothing, can directly support equals/non-equals comparisons between two
465  // dictionary encoded columns sharing the same dictionary as these are
466  // effectively integer comparisons in the same dictionary space
467  }
468  }
469  } else if (new_left_type.get_compression() == kENCODING_DICT &&
470  new_right_type.get_compression() == kENCODING_NONE) {
471  SQLTypeInfo ti(new_right_type);
472  ti.set_compression(new_left_type.get_compression());
473  ti.set_comp_param(new_left_type.get_comp_param());
474  ti.set_fixed_size();
475  right_expr = right_expr->add_cast(ti);
476  } else if (new_right_type.get_compression() == kENCODING_DICT &&
477  new_left_type.get_compression() == kENCODING_NONE) {
478  SQLTypeInfo ti(new_left_type);
479  ti.set_compression(new_right_type.get_compression());
480  ti.set_comp_param(new_right_type.get_comp_param());
481  ti.set_fixed_size();
482  left_expr = left_expr->add_cast(ti);
483  } else {
484  left_expr = left_expr->decompress();
485  right_expr = right_expr->decompress();
486  }
487  } else {
488  // Is this now a no-op just for pairs of none-encoded string columns
489  left_expr = left_expr->decompress();
490  right_expr = right_expr->decompress();
491  }
492  bool has_agg = (left_expr->get_contains_agg() || right_expr->get_contains_agg());
493  return makeExpr<Analyzer::BinOper>(
494  result_type, has_agg, optype, qual, left_expr, right_expr);
495 }
496 
497 std::shared_ptr<Analyzer::Expr> SubqueryExpr::analyze(
498  const Catalog_Namespace::Catalog& catalog,
499  Analyzer::Query& query,
500  TlistRefType allow_tlist_ref) const {
501  throw std::runtime_error("Subqueries are not supported yet.");
502  return nullptr;
503 }
504 
505 std::shared_ptr<Analyzer::Expr> IsNullExpr::analyze(
506  const Catalog_Namespace::Catalog& catalog,
507  Analyzer::Query& query,
508  TlistRefType allow_tlist_ref) const {
509  auto arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
510  auto result = makeExpr<Analyzer::UOper>(kBOOLEAN, kISNULL, arg_expr);
511  if (is_not_) {
512  result = makeExpr<Analyzer::UOper>(kBOOLEAN, kNOT, result);
513  }
514  return result;
515 }
516 
517 std::shared_ptr<Analyzer::Expr> InSubquery::analyze(
518  const Catalog_Namespace::Catalog& catalog,
519  Analyzer::Query& query,
520  TlistRefType allow_tlist_ref) const {
521  throw std::runtime_error("Subqueries are not supported yet.");
522  return nullptr;
523 }
524 
525 std::shared_ptr<Analyzer::Expr> InValues::analyze(
526  const Catalog_Namespace::Catalog& catalog,
527  Analyzer::Query& query,
528  TlistRefType allow_tlist_ref) const {
529  auto arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
530  SQLTypeInfo ti = arg_expr->get_type_info();
531  bool dict_comp = ti.get_compression() == kENCODING_DICT;
532  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
533  for (auto& p : value_list_) {
534  auto e = p->analyze(catalog, query, allow_tlist_ref);
535  if (ti != e->get_type_info()) {
536  if (ti.is_string() && e->get_type_info().is_string()) {
537  // Todo(todd): Can we have this leverage the cast framework as well
538  ti = Analyzer::BinOper::common_string_type(ti, e->get_type_info());
539  } else if (ti.is_number() && e->get_type_info().is_number()) {
540  ti = Analyzer::BinOper::common_numeric_type(ti, e->get_type_info());
541  } else {
542  throw std::runtime_error("IN expressions must contain compatible types.");
543  }
544  }
545  if (dict_comp) {
546  value_exprs.push_back(e->add_cast(arg_expr->get_type_info()));
547  } else {
548  value_exprs.push_back(e);
549  }
550  }
551  if (!dict_comp) {
552  arg_expr = arg_expr->decompress();
553  arg_expr = arg_expr->add_cast(ti);
554  std::list<std::shared_ptr<Analyzer::Expr>> cast_vals;
555  for (auto p : value_exprs) {
556  cast_vals.push_back(p->add_cast(ti));
557  }
558  value_exprs.swap(cast_vals);
559  }
560  std::shared_ptr<Analyzer::Expr> result =
561  makeExpr<Analyzer::InValues>(arg_expr, value_exprs);
562  if (is_not_) {
563  result = makeExpr<Analyzer::UOper>(kBOOLEAN, kNOT, result);
564  }
565  return result;
566 }
567 
568 std::shared_ptr<Analyzer::Expr> BetweenExpr::analyze(
569  const Catalog_Namespace::Catalog& catalog,
570  Analyzer::Query& query,
571  TlistRefType allow_tlist_ref) const {
572  auto arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
573  auto lower_expr = lower_->analyze(catalog, query, allow_tlist_ref);
574  auto upper_expr = upper_->analyze(catalog, query, allow_tlist_ref);
575  SQLTypeInfo new_left_type, new_right_type;
577  arg_expr->get_type_info(),
578  lower_expr->get_type_info(),
579  &new_left_type,
580  &new_right_type);
581  auto lower_pred =
582  makeExpr<Analyzer::BinOper>(kBOOLEAN,
583  kGE,
584  kONE,
585  arg_expr->add_cast(new_left_type)->decompress(),
586  lower_expr->add_cast(new_right_type)->decompress());
588  arg_expr->get_type_info(),
589  lower_expr->get_type_info(),
590  &new_left_type,
591  &new_right_type);
592  auto upper_pred = makeExpr<Analyzer::BinOper>(
593  kBOOLEAN,
594  kLE,
595  kONE,
596  arg_expr->deep_copy()->add_cast(new_left_type)->decompress(),
597  upper_expr->add_cast(new_right_type)->decompress());
598  std::shared_ptr<Analyzer::Expr> result =
599  makeExpr<Analyzer::BinOper>(kBOOLEAN, kAND, kONE, lower_pred, upper_pred);
600  if (is_not_) {
601  result = makeExpr<Analyzer::UOper>(kBOOLEAN, kNOT, result);
602  }
603  return result;
604 }
605 
606 std::shared_ptr<Analyzer::Expr> CharLengthExpr::analyze(
607  const Catalog_Namespace::Catalog& catalog,
608  Analyzer::Query& query,
609  TlistRefType allow_tlist_ref) const {
610  auto arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
611  if (!arg_expr->get_type_info().is_string()) {
612  throw std::runtime_error(
613  "expression in char_length clause must be of a string type.");
614  }
615  std::shared_ptr<Analyzer::Expr> result =
616  makeExpr<Analyzer::CharLengthExpr>(arg_expr->decompress(), calc_encoded_length_);
617  return result;
618 }
619 
620 std::shared_ptr<Analyzer::Expr> CardinalityExpr::analyze(
621  const Catalog_Namespace::Catalog& catalog,
622  Analyzer::Query& query,
623  TlistRefType allow_tlist_ref) const {
624  auto arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
625  if (!arg_expr->get_type_info().is_array()) {
626  throw std::runtime_error(
627  "expression in cardinality clause must be of an array type.");
628  }
629  std::shared_ptr<Analyzer::Expr> result =
630  makeExpr<Analyzer::CardinalityExpr>(arg_expr->decompress());
631  return result;
632 }
633 
634 void LikeExpr::check_like_expr(const std::string& like_str, char escape_char) {
635  if (like_str.back() == escape_char) {
636  throw std::runtime_error("LIKE pattern must not end with escape character.");
637  }
638 }
639 
640 bool LikeExpr::test_is_simple_expr(const std::string& like_str, char escape_char) {
641  // if not bounded by '%' then not a simple string
642  if (like_str.size() < 2 || like_str[0] != '%' || like_str[like_str.size() - 1] != '%') {
643  return false;
644  }
645  // if the last '%' is escaped then not a simple string
646  if (like_str[like_str.size() - 2] == escape_char &&
647  like_str[like_str.size() - 3] != escape_char) {
648  return false;
649  }
650  for (size_t i = 1; i < like_str.size() - 1; i++) {
651  if (like_str[i] == '%' || like_str[i] == '_' || like_str[i] == '[' ||
652  like_str[i] == ']') {
653  if (like_str[i - 1] != escape_char) {
654  return false;
655  }
656  }
657  }
658  return true;
659 }
660 
661 void LikeExpr::erase_cntl_chars(std::string& like_str, char escape_char) {
662  char prev_char = '\0';
663  // easier to create new string of allowable chars
664  // rather than erase chars from
665  // existing string
666  std::string new_str;
667  for (char& cur_char : like_str) {
668  if (cur_char == '%' || cur_char == escape_char) {
669  if (prev_char != escape_char) {
670  prev_char = cur_char;
671  continue;
672  }
673  }
674  new_str.push_back(cur_char);
675  prev_char = cur_char;
676  }
677  like_str = new_str;
678 }
679 
680 std::shared_ptr<Analyzer::Expr> LikeExpr::analyze(
681  const Catalog_Namespace::Catalog& catalog,
682  Analyzer::Query& query,
683  TlistRefType allow_tlist_ref) const {
684  auto arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
685  auto like_expr = like_string_->analyze(catalog, query, allow_tlist_ref);
686  auto escape_expr = escape_string_ == nullptr
687  ? nullptr
688  : escape_string_->analyze(catalog, query, allow_tlist_ref);
689  return LikeExpr::get(arg_expr, like_expr, escape_expr, is_ilike_, is_not_);
690 }
691 
692 std::shared_ptr<Analyzer::Expr> LikeExpr::get(std::shared_ptr<Analyzer::Expr> arg_expr,
693  std::shared_ptr<Analyzer::Expr> like_expr,
694  std::shared_ptr<Analyzer::Expr> escape_expr,
695  const bool is_ilike,
696  const bool is_not) {
697  if (!arg_expr->get_type_info().is_string()) {
698  throw std::runtime_error("expression before LIKE must be of a string type.");
699  }
700  if (!like_expr->get_type_info().is_string()) {
701  throw std::runtime_error("expression after LIKE must be of a string type.");
702  }
703  char escape_char = '\\';
704  if (escape_expr != nullptr) {
705  if (!escape_expr->get_type_info().is_string()) {
706  throw std::runtime_error("expression after ESCAPE must be of a string type.");
707  }
708  if (!escape_expr->get_type_info().is_string()) {
709  throw std::runtime_error("expression after ESCAPE must be of a string type.");
710  }
711  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(escape_expr);
712  if (c != nullptr && c->get_constval().stringval->length() > 1) {
713  throw std::runtime_error("String after ESCAPE must have a single character.");
714  }
715  escape_char = (*c->get_constval().stringval)[0];
716  }
717  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(like_expr);
718  bool is_simple = false;
719  if (c != nullptr) {
720  std::string& pattern = *c->get_constval().stringval;
721  if (is_ilike) {
722  std::transform(pattern.begin(), pattern.end(), pattern.begin(), ::tolower);
723  }
724  check_like_expr(pattern, escape_char);
725  is_simple = test_is_simple_expr(pattern, escape_char);
726  if (is_simple) {
727  erase_cntl_chars(pattern, escape_char);
728  }
729  }
730  std::shared_ptr<Analyzer::Expr> result = makeExpr<Analyzer::LikeExpr>(
731  arg_expr->decompress(), like_expr, escape_expr, is_ilike, is_simple);
732  if (is_not) {
733  result = makeExpr<Analyzer::UOper>(kBOOLEAN, kNOT, result);
734  }
735  return result;
736 }
737 
738 void RegexpExpr::check_pattern_expr(const std::string& pattern_str, char escape_char) {
739  if (pattern_str.back() == escape_char) {
740  throw std::runtime_error("REGEXP pattern must not end with escape character.");
741  }
742 }
743 
744 bool RegexpExpr::translate_to_like_pattern(std::string& pattern_str, char escape_char) {
745  char prev_char = '\0';
746  char prev_prev_char = '\0';
747  std::string like_str;
748  for (char& cur_char : pattern_str) {
749  if (prev_char == escape_char || isalnum(cur_char) || cur_char == ' ' ||
750  cur_char == '.') {
751  like_str.push_back((cur_char == '.') ? '_' : cur_char);
752  prev_prev_char = prev_char;
753  prev_char = cur_char;
754  continue;
755  }
756  if (prev_char == '.' && prev_prev_char != escape_char) {
757  if (cur_char == '*' || cur_char == '+') {
758  if (cur_char == '*') {
759  like_str.pop_back();
760  }
761  // .* --> %
762  // .+ --> _%
763  like_str.push_back('%');
764  prev_prev_char = prev_char;
765  prev_char = cur_char;
766  continue;
767  }
768  }
769  return false;
770  }
771  pattern_str = like_str;
772  return true;
773 }
774 
775 std::shared_ptr<Analyzer::Expr> RegexpExpr::analyze(
776  const Catalog_Namespace::Catalog& catalog,
777  Analyzer::Query& query,
778  TlistRefType allow_tlist_ref) const {
779  auto arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
780  auto pattern_expr = pattern_string_->analyze(catalog, query, allow_tlist_ref);
781  auto escape_expr = escape_string_ == nullptr
782  ? nullptr
783  : escape_string_->analyze(catalog, query, allow_tlist_ref);
784  return RegexpExpr::get(arg_expr, pattern_expr, escape_expr, is_not_);
785 }
786 
787 std::shared_ptr<Analyzer::Expr> RegexpExpr::get(
788  std::shared_ptr<Analyzer::Expr> arg_expr,
789  std::shared_ptr<Analyzer::Expr> pattern_expr,
790  std::shared_ptr<Analyzer::Expr> escape_expr,
791  const bool is_not) {
792  if (!arg_expr->get_type_info().is_string()) {
793  throw std::runtime_error("expression before REGEXP must be of a string type.");
794  }
795  if (!pattern_expr->get_type_info().is_string()) {
796  throw std::runtime_error("expression after REGEXP must be of a string type.");
797  }
798  char escape_char = '\\';
799  if (escape_expr != nullptr) {
800  if (!escape_expr->get_type_info().is_string()) {
801  throw std::runtime_error("expression after ESCAPE must be of a string type.");
802  }
803  if (!escape_expr->get_type_info().is_string()) {
804  throw std::runtime_error("expression after ESCAPE must be of a string type.");
805  }
806  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(escape_expr);
807  if (c != nullptr && c->get_constval().stringval->length() > 1) {
808  throw std::runtime_error("String after ESCAPE must have a single character.");
809  }
810  escape_char = (*c->get_constval().stringval)[0];
811  if (escape_char != '\\') {
812  throw std::runtime_error("Only supporting '\\' escape character.");
813  }
814  }
815  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(pattern_expr);
816  if (c != nullptr) {
817  std::string& pattern = *c->get_constval().stringval;
818  if (translate_to_like_pattern(pattern, escape_char)) {
819  return LikeExpr::get(arg_expr, pattern_expr, escape_expr, false, is_not);
820  }
821  }
822  std::shared_ptr<Analyzer::Expr> result =
823  makeExpr<Analyzer::RegexpExpr>(arg_expr->decompress(), pattern_expr, escape_expr);
824  if (is_not) {
825  result = makeExpr<Analyzer::UOper>(kBOOLEAN, kNOT, result);
826  }
827  return result;
828 }
829 
830 std::shared_ptr<Analyzer::Expr> LikelihoodExpr::analyze(
831  const Catalog_Namespace::Catalog& catalog,
832  Analyzer::Query& query,
833  TlistRefType allow_tlist_ref) const {
834  auto arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
835  return LikelihoodExpr::get(arg_expr, likelihood_, is_not_);
836 }
837 
838 std::shared_ptr<Analyzer::Expr> LikelihoodExpr::get(
839  std::shared_ptr<Analyzer::Expr> arg_expr,
840  float likelihood,
841  const bool is_not) {
842  if (!arg_expr->get_type_info().is_boolean()) {
843  throw std::runtime_error("likelihood expression expects boolean type.");
844  }
845  std::shared_ptr<Analyzer::Expr> result = makeExpr<Analyzer::LikelihoodExpr>(
846  arg_expr->decompress(), is_not ? 1 - likelihood : likelihood);
847  return result;
848 }
849 
850 std::shared_ptr<Analyzer::Expr> WidthBucketExpr::analyze(
851  const Catalog_Namespace::Catalog& catalog,
852  Analyzer::Query& query,
853  TlistRefType allow_tlist_ref) const {
854  auto target_value = target_value_->analyze(catalog, query, allow_tlist_ref);
855  auto lower_bound = lower_bound_->analyze(catalog, query, allow_tlist_ref);
856  auto upper_bound = upper_bound_->analyze(catalog, query, allow_tlist_ref);
857  auto partition_count = partition_count_->analyze(catalog, query, allow_tlist_ref);
858  return WidthBucketExpr::get(target_value, lower_bound, upper_bound, partition_count);
859 }
860 
861 std::shared_ptr<Analyzer::Expr> WidthBucketExpr::get(
862  std::shared_ptr<Analyzer::Expr> target_value,
863  std::shared_ptr<Analyzer::Expr> lower_bound,
864  std::shared_ptr<Analyzer::Expr> upper_bound,
865  std::shared_ptr<Analyzer::Expr> partition_count) {
866  std::shared_ptr<Analyzer::Expr> result = makeExpr<Analyzer::WidthBucketExpr>(
867  target_value, lower_bound, upper_bound, partition_count);
868  return result;
869 }
870 
871 std::shared_ptr<Analyzer::Expr> ExistsExpr::analyze(
872  const Catalog_Namespace::Catalog& catalog,
873  Analyzer::Query& query,
874  TlistRefType allow_tlist_ref) const {
875  throw std::runtime_error("Subqueries are not supported yet.");
876  return nullptr;
877 }
878 
879 std::shared_ptr<Analyzer::Expr> ColumnRef::analyze(
880  const Catalog_Namespace::Catalog& catalog,
881  Analyzer::Query& query,
882  TlistRefType allow_tlist_ref) const {
883  int table_id{0};
884  int rte_idx{0};
885  const ColumnDescriptor* cd{nullptr};
886  if (column_ == nullptr) {
887  throw std::runtime_error("invalid column name *.");
888  }
889  if (table_ != nullptr) {
890  rte_idx = query.get_rte_idx(*table_);
891  if (rte_idx < 0) {
892  throw std::runtime_error("range variable or table name " + *table_ +
893  " does not exist.");
894  }
895  Analyzer::RangeTableEntry* rte = query.get_rte(rte_idx);
896  cd = rte->get_column_desc(catalog, *column_);
897  if (cd == nullptr) {
898  throw std::runtime_error("Column name " + *column_ + " does not exist.");
899  }
900  table_id = rte->get_table_id();
901  } else {
902  bool found = false;
903  int i = 0;
904  for (auto rte : query.get_rangetable()) {
905  cd = rte->get_column_desc(catalog, *column_);
906  if (cd != nullptr && !found) {
907  found = true;
908  rte_idx = i;
909  table_id = rte->get_table_id();
910  } else if (cd != nullptr && found) {
911  throw std::runtime_error("Column name " + *column_ + " is ambiguous.");
912  }
913  i++;
914  }
915  if (cd == nullptr && allow_tlist_ref != TlistRefType::TLIST_NONE) {
916  // check if this is a reference to a targetlist entry
917  bool found = false;
918  int varno = -1;
919  int i = 1;
920  std::shared_ptr<Analyzer::TargetEntry> tle;
921  for (auto p : query.get_targetlist()) {
922  if (*column_ == p->get_resname() && !found) {
923  found = true;
924  varno = i;
925  tle = p;
926  } else if (*column_ == p->get_resname() && found) {
927  throw std::runtime_error("Output alias " + *column_ + " is ambiguous.");
928  }
929  i++;
930  }
931  if (found) {
932  if (dynamic_cast<Analyzer::Var*>(tle->get_expr())) {
933  Analyzer::Var* v = static_cast<Analyzer::Var*>(tle->get_expr());
935  return v->deep_copy();
936  }
937  }
938  if (allow_tlist_ref == TlistRefType::TLIST_COPY) {
939  return tle->get_expr()->deep_copy();
940  } else {
941  return makeExpr<Analyzer::Var>(
942  tle->get_expr()->get_type_info(), Analyzer::Var::kOUTPUT, varno);
943  }
944  }
945  }
946  if (cd == nullptr) {
947  throw std::runtime_error("Column name " + *column_ + " does not exist.");
948  }
949  }
950  return makeExpr<Analyzer::ColumnVar>(cd->columnType, table_id, cd->columnId, rte_idx);
951 }
952 
953 std::shared_ptr<Analyzer::Expr> FunctionRef::analyze(
954  const Catalog_Namespace::Catalog& catalog,
955  Analyzer::Query& query,
956  TlistRefType allow_tlist_ref) const {
957  SQLTypeInfo result_type;
958  SQLAgg agg_type;
959  std::shared_ptr<Analyzer::Expr> arg_expr;
960  bool is_distinct = false;
961  if (boost::iequals(*name_, "count")) {
962  result_type = SQLTypeInfo(kBIGINT, false);
963  agg_type = kCOUNT;
964  if (arg_) {
965  arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
966  const SQLTypeInfo& ti = arg_expr->get_type_info();
967  if (ti.is_string() && (ti.get_compression() != kENCODING_DICT || !distinct_)) {
968  throw std::runtime_error(
969  "Strings must be dictionary-encoded in COUNT(DISTINCT).");
970  }
971  if (ti.get_type() == kARRAY && !distinct_) {
972  throw std::runtime_error("Only COUNT(DISTINCT) is supported on arrays.");
973  }
974  }
975  is_distinct = distinct_;
976  } else {
977  if (!arg_) {
978  throw std::runtime_error("Cannot compute " + *name_ + " with argument '*'.");
979  }
980  if (boost::iequals(*name_, "min")) {
981  agg_type = kMIN;
982  arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
983  arg_expr = arg_expr->decompress();
984  result_type = arg_expr->get_type_info();
985  } else if (boost::iequals(*name_, "max")) {
986  agg_type = kMAX;
987  arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
988  arg_expr = arg_expr->decompress();
989  result_type = arg_expr->get_type_info();
990  } else if (boost::iequals(*name_, "avg")) {
991  agg_type = kAVG;
992  arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
993  if (!arg_expr->get_type_info().is_number()) {
994  throw std::runtime_error("Cannot compute AVG on non-number-type arguments.");
995  }
996  arg_expr = arg_expr->decompress();
997  result_type = SQLTypeInfo(kDOUBLE, false);
998  } else if (boost::iequals(*name_, "sum")) {
999  agg_type = kSUM;
1000  arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
1001  if (!arg_expr->get_type_info().is_number()) {
1002  throw std::runtime_error("Cannot compute SUM on non-number-type arguments.");
1003  }
1004  arg_expr = arg_expr->decompress();
1005  result_type = arg_expr->get_type_info().is_integer() ? SQLTypeInfo(kBIGINT, false)
1006  : arg_expr->get_type_info();
1007  } else if (boost::iequals(*name_, "unnest")) {
1008  arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
1009  const SQLTypeInfo& arg_ti = arg_expr->get_type_info();
1010  if (arg_ti.get_type() != kARRAY) {
1011  throw std::runtime_error(arg_->to_string() + " is not of array type.");
1012  }
1013  return makeExpr<Analyzer::UOper>(arg_ti.get_elem_type(), false, kUNNEST, arg_expr);
1014  } else {
1015  throw std::runtime_error("invalid function name: " + *name_);
1016  }
1017  if (arg_expr->get_type_info().is_string() ||
1018  arg_expr->get_type_info().get_type() == kARRAY) {
1019  throw std::runtime_error(
1020  "Only COUNT(DISTINCT ) aggregate is supported on strings and arrays.");
1021  }
1022  }
1023  int naggs = query.get_num_aggs();
1024  query.set_num_aggs(naggs + 1);
1025  return makeExpr<Analyzer::AggExpr>(
1026  result_type, agg_type, arg_expr, is_distinct, nullptr);
1027 }
1028 
1029 std::shared_ptr<Analyzer::Expr> CastExpr::analyze(
1030  const Catalog_Namespace::Catalog& catalog,
1031  Analyzer::Query& query,
1032  TlistRefType allow_tlist_ref) const {
1033  target_type_->check_type();
1034  auto arg_expr = arg_->analyze(catalog, query, allow_tlist_ref);
1035  SQLTypeInfo ti(target_type_->get_type(),
1036  target_type_->get_param1(),
1037  target_type_->get_param2(),
1038  arg_expr->get_type_info().get_notnull());
1039  if (arg_expr->get_type_info().get_type() != target_type_->get_type() &&
1040  arg_expr->get_type_info().get_compression() != kENCODING_NONE) {
1041  arg_expr->decompress();
1042  }
1043  return arg_expr->add_cast(ti);
1044 }
1045 
1046 std::shared_ptr<Analyzer::Expr> CaseExpr::analyze(
1047  const Catalog_Namespace::Catalog& catalog,
1048  Analyzer::Query& query,
1049  TlistRefType allow_tlist_ref) const {
1050  SQLTypeInfo ti;
1051  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
1052  expr_pair_list;
1053  for (auto& p : when_then_list_) {
1054  auto e1 = p->get_expr1()->analyze(catalog, query, allow_tlist_ref);
1055  if (e1->get_type_info().get_type() != kBOOLEAN) {
1056  throw std::runtime_error("Only boolean expressions can be used after WHEN.");
1057  }
1058  auto e2 = p->get_expr2()->analyze(catalog, query, allow_tlist_ref);
1059  expr_pair_list.emplace_back(e1, e2);
1060  }
1061  auto else_e =
1062  else_expr_ ? else_expr_->analyze(catalog, query, allow_tlist_ref) : nullptr;
1063  return normalize(expr_pair_list, else_e);
1064 }
1065 
1066 namespace {
1067 
1069  const std::string* s = str_literal->get_stringval();
1070  if (*s == "t" || *s == "true" || *s == "T" || *s == "True") {
1071  return true;
1072  } else if (*s == "f" || *s == "false" || *s == "F" || *s == "False") {
1073  return false;
1074  } else {
1075  throw std::runtime_error("Invalid string for boolean " + *s);
1076  }
1077 }
1078 
1079 void parse_copy_params(const std::list<std::unique_ptr<NameValueAssign>>& options_,
1080  import_export::CopyParams& copy_params,
1081  std::vector<std::string>& warnings,
1082  std::string& deferred_copy_from_partitions_) {
1083  if (!options_.empty()) {
1084  for (auto& p : options_) {
1085  if (boost::iequals(*p->get_name(), "max_reject")) {
1086  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
1087  if (int_literal == nullptr) {
1088  throw std::runtime_error("max_reject option must be an integer.");
1089  }
1090  copy_params.max_reject = int_literal->get_intval();
1091  } else if (boost::iequals(*p->get_name(), "buffer_size")) {
1092  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
1093  if (int_literal == nullptr) {
1094  throw std::runtime_error("buffer_size option must be an integer.");
1095  }
1096  copy_params.buffer_size = int_literal->get_intval();
1097  } else if (boost::iequals(*p->get_name(), "threads")) {
1098  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
1099  if (int_literal == nullptr) {
1100  throw std::runtime_error("Threads option must be an integer.");
1101  }
1102  copy_params.threads = int_literal->get_intval();
1103  } else if (boost::iequals(*p->get_name(), "delimiter")) {
1104  const StringLiteral* str_literal =
1105  dynamic_cast<const StringLiteral*>(p->get_value());
1106  if (str_literal == nullptr) {
1107  throw std::runtime_error("Delimiter option must be a string.");
1108  } else if (str_literal->get_stringval()->length() != 1) {
1109  throw std::runtime_error("Delimiter must be a single character string.");
1110  }
1111  copy_params.delimiter = (*str_literal->get_stringval())[0];
1112  } else if (boost::iequals(*p->get_name(), "nulls")) {
1113  const StringLiteral* str_literal =
1114  dynamic_cast<const StringLiteral*>(p->get_value());
1115  if (str_literal == nullptr) {
1116  throw std::runtime_error("Nulls option must be a string.");
1117  }
1118  copy_params.null_str = *str_literal->get_stringval();
1119  } else if (boost::iequals(*p->get_name(), "header")) {
1120  const StringLiteral* str_literal =
1121  dynamic_cast<const StringLiteral*>(p->get_value());
1122  if (str_literal == nullptr) {
1123  throw std::runtime_error("Header option must be a boolean.");
1124  }
1125  copy_params.has_header = bool_from_string_literal(str_literal)
1128 #ifdef ENABLE_IMPORT_PARQUET
1129  } else if (boost::iequals(*p->get_name(), "parquet")) {
1130  warnings.push_back(
1131  "Deprecation Warning: COPY FROM WITH (parquet='true') is deprecated. Use "
1132  "WITH (source_type='parquet_file') instead.");
1133  const StringLiteral* str_literal =
1134  dynamic_cast<const StringLiteral*>(p->get_value());
1135  if (str_literal == nullptr) {
1136  throw std::runtime_error("'parquet' option must be a boolean.");
1137  }
1138  if (bool_from_string_literal(str_literal)) {
1139  // not sure a parquet "table" type is proper, but to make code
1140  // look consistent in some places, let's set "table" type too
1142  }
1143 #endif // ENABLE_IMPORT_PARQUET
1144  } else if (boost::iequals(*p->get_name(), "s3_access_key")) {
1145  const StringLiteral* str_literal =
1146  dynamic_cast<const StringLiteral*>(p->get_value());
1147  if (str_literal == nullptr) {
1148  throw std::runtime_error("Option s3_access_key must be a string.");
1149  }
1150  copy_params.s3_access_key = *str_literal->get_stringval();
1151  } else if (boost::iequals(*p->get_name(), "s3_secret_key")) {
1152  const StringLiteral* str_literal =
1153  dynamic_cast<const StringLiteral*>(p->get_value());
1154  if (str_literal == nullptr) {
1155  throw std::runtime_error("Option s3_secret_key must be a string.");
1156  }
1157  copy_params.s3_secret_key = *str_literal->get_stringval();
1158  } else if (boost::iequals(*p->get_name(), "s3_session_token")) {
1159  const StringLiteral* str_literal =
1160  dynamic_cast<const StringLiteral*>(p->get_value());
1161  if (str_literal == nullptr) {
1162  throw std::runtime_error("Option s3_session_token must be a string.");
1163  }
1164  copy_params.s3_session_token = *str_literal->get_stringval();
1165  } else if (boost::iequals(*p->get_name(), "s3_region")) {
1166  const StringLiteral* str_literal =
1167  dynamic_cast<const StringLiteral*>(p->get_value());
1168  if (str_literal == nullptr) {
1169  throw std::runtime_error("Option s3_region must be a string.");
1170  }
1171  copy_params.s3_region = *str_literal->get_stringval();
1172  } else if (boost::iequals(*p->get_name(), "s3_endpoint")) {
1173  const StringLiteral* str_literal =
1174  dynamic_cast<const StringLiteral*>(p->get_value());
1175  if (str_literal == nullptr) {
1176  throw std::runtime_error("Option s3_endpoint must be a string.");
1177  }
1178  copy_params.s3_endpoint = *str_literal->get_stringval();
1179  } else if (boost::iequals(*p->get_name(), "s3_max_concurrent_downloads")) {
1180  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
1181  if (int_literal == nullptr) {
1182  throw std::runtime_error(
1183  "'s3_max_concurrent_downloads' option must be an integer");
1184  }
1185  const int s3_max_concurrent_downloads = int_literal->get_intval();
1186  if (s3_max_concurrent_downloads > 0) {
1187  copy_params.s3_max_concurrent_downloads = s3_max_concurrent_downloads;
1188  } else {
1189  throw std::runtime_error(
1190  "Invalid value for 's3_max_concurrent_downloads' option (must be > 0): " +
1191  std::to_string(s3_max_concurrent_downloads));
1192  }
1193  } else if (boost::iequals(*p->get_name(), "quote")) {
1194  const StringLiteral* str_literal =
1195  dynamic_cast<const StringLiteral*>(p->get_value());
1196  if (str_literal == nullptr) {
1197  throw std::runtime_error("Quote option must be a string.");
1198  } else if (str_literal->get_stringval()->length() != 1) {
1199  throw std::runtime_error("Quote must be a single character string.");
1200  }
1201  copy_params.quote = (*str_literal->get_stringval())[0];
1202  } else if (boost::iequals(*p->get_name(), "escape")) {
1203  const StringLiteral* str_literal =
1204  dynamic_cast<const StringLiteral*>(p->get_value());
1205  if (str_literal == nullptr) {
1206  throw std::runtime_error("Escape option must be a string.");
1207  } else if (str_literal->get_stringval()->length() != 1) {
1208  throw std::runtime_error("Escape must be a single character string.");
1209  }
1210  copy_params.escape = (*str_literal->get_stringval())[0];
1211  } else if (boost::iequals(*p->get_name(), "line_delimiter")) {
1212  const StringLiteral* str_literal =
1213  dynamic_cast<const StringLiteral*>(p->get_value());
1214  if (str_literal == nullptr) {
1215  throw std::runtime_error("Line_delimiter option must be a string.");
1216  } else if (str_literal->get_stringval()->length() != 1) {
1217  throw std::runtime_error("Line_delimiter must be a single character string.");
1218  }
1219  copy_params.line_delim = (*str_literal->get_stringval())[0];
1220  } else if (boost::iequals(*p->get_name(), "quoted")) {
1221  const StringLiteral* str_literal =
1222  dynamic_cast<const StringLiteral*>(p->get_value());
1223  if (str_literal == nullptr) {
1224  throw std::runtime_error("Quoted option must be a boolean.");
1225  }
1226  copy_params.quoted = bool_from_string_literal(str_literal);
1227  } else if (boost::iequals(*p->get_name(), "plain_text")) {
1228  const StringLiteral* str_literal =
1229  dynamic_cast<const StringLiteral*>(p->get_value());
1230  if (str_literal == nullptr) {
1231  throw std::runtime_error("plain_text option must be a boolean.");
1232  }
1233  copy_params.plain_text = bool_from_string_literal(str_literal);
1234  } else if (boost::iequals(*p->get_name(), "array_marker")) {
1235  const StringLiteral* str_literal =
1236  dynamic_cast<const StringLiteral*>(p->get_value());
1237  if (str_literal == nullptr) {
1238  throw std::runtime_error("Array Marker option must be a string.");
1239  } else if (str_literal->get_stringval()->length() != 2) {
1240  throw std::runtime_error(
1241  "Array Marker option must be exactly two characters. Default is {}.");
1242  }
1243  copy_params.array_begin = (*str_literal->get_stringval())[0];
1244  copy_params.array_end = (*str_literal->get_stringval())[1];
1245  } else if (boost::iequals(*p->get_name(), "array_delimiter")) {
1246  const StringLiteral* str_literal =
1247  dynamic_cast<const StringLiteral*>(p->get_value());
1248  if (str_literal == nullptr) {
1249  throw std::runtime_error("Array Delimiter option must be a string.");
1250  } else if (str_literal->get_stringval()->length() != 1) {
1251  throw std::runtime_error("Array Delimiter must be a single character string.");
1252  }
1253  copy_params.array_delim = (*str_literal->get_stringval())[0];
1254  } else if (boost::iequals(*p->get_name(), "lonlat")) {
1255  const StringLiteral* str_literal =
1256  dynamic_cast<const StringLiteral*>(p->get_value());
1257  if (str_literal == nullptr) {
1258  throw std::runtime_error("Lonlat option must be a boolean.");
1259  }
1260  copy_params.lonlat = bool_from_string_literal(str_literal);
1261  } else if (boost::iequals(*p->get_name(), "geo")) {
1262  warnings.push_back(
1263  "Deprecation Warning: COPY FROM WITH (geo='true') is deprecated. Use WITH "
1264  "(source_type='geo_file') instead.");
1265  const StringLiteral* str_literal =
1266  dynamic_cast<const StringLiteral*>(p->get_value());
1267  if (str_literal == nullptr) {
1268  throw std::runtime_error("'geo' option must be a boolean.");
1269  }
1270  if (bool_from_string_literal(str_literal)) {
1272  }
1273  } else if (boost::iequals(*p->get_name(), "source_type")) {
1274  const StringLiteral* str_literal =
1275  dynamic_cast<const StringLiteral*>(p->get_value());
1276  if (str_literal == nullptr) {
1277  throw std::runtime_error("'source_type' option must be a string.");
1278  }
1279  const std::string* s = str_literal->get_stringval();
1280  if (boost::iequals(*s, "delimited_file")) {
1282  } else if (boost::iequals(*s, "geo_file")) {
1284 #if ENABLE_IMPORT_PARQUET
1285  } else if (boost::iequals(*s, "parquet_file")) {
1287 #endif
1288  } else if (boost::iequals(*s, "raster_file")) {
1290  } else if (boost::iequals(*s, "regex_parsed_file")) {
1292  } else {
1293  throw std::runtime_error(
1294  "Invalid string for 'source_type' option (must be 'GEO_FILE', 'RASTER_FILE'"
1295 #if ENABLE_IMPORT_PARQUET
1296  ", 'PARQUET_FILE'"
1297 #endif
1298  ", 'REGEX_PARSED_FILE'"
1299  " or 'DELIMITED_FILE'): " +
1300  *s);
1301  }
1302  } else if (boost::iequals(*p->get_name(), "geo_coords_type")) {
1303  const StringLiteral* str_literal =
1304  dynamic_cast<const StringLiteral*>(p->get_value());
1305  if (str_literal == nullptr) {
1306  throw std::runtime_error("'geo_coords_type' option must be a string");
1307  }
1308  const std::string* s = str_literal->get_stringval();
1309  if (boost::iequals(*s, "geography")) {
1310  throw std::runtime_error(
1311  "GEOGRAPHY coords type not yet supported. Please use GEOMETRY.");
1312  // copy_params.geo_coords_type = kGEOGRAPHY;
1313  } else if (boost::iequals(*s, "geometry")) {
1314  copy_params.geo_coords_type = kGEOMETRY;
1315  } else {
1316  throw std::runtime_error(
1317  "Invalid string for 'geo_coords_type' option (must be 'GEOGRAPHY' or "
1318  "'GEOMETRY'): " +
1319  *s);
1320  }
1321  } else if (boost::iequals(*p->get_name(), "raster_point_type")) {
1322  const StringLiteral* str_literal =
1323  dynamic_cast<const StringLiteral*>(p->get_value());
1324  if (str_literal == nullptr) {
1325  throw std::runtime_error("'raster_point_type' option must be a string");
1326  }
1327  const std::string* s = str_literal->get_stringval();
1328  if (boost::iequals(*s, "none")) {
1330  } else if (boost::iequals(*s, "auto")) {
1332  } else if (boost::iequals(*s, "smallint")) {
1334  } else if (boost::iequals(*s, "int")) {
1336  } else if (boost::iequals(*s, "float")) {
1338  } else if (boost::iequals(*s, "double")) {
1340  } else if (boost::iequals(*s, "point")) {
1342  } else {
1343  throw std::runtime_error(
1344  "Invalid string for 'raster_point_type' option (must be 'NONE', 'AUTO', "
1345  "'SMALLINT', 'INT', 'FLOAT', 'DOUBLE' or 'POINT'): " +
1346  *s);
1347  }
1348  } else if (boost::iequals(*p->get_name(), "raster_point_transform")) {
1349  const StringLiteral* str_literal =
1350  dynamic_cast<const StringLiteral*>(p->get_value());
1351  if (str_literal == nullptr) {
1352  throw std::runtime_error("'raster_point_transform' option must be a string");
1353  }
1354  const std::string* s = str_literal->get_stringval();
1355  if (boost::iequals(*s, "none")) {
1357  } else if (boost::iequals(*s, "auto")) {
1359  } else if (boost::iequals(*s, "file")) {
1361  } else if (boost::iequals(*s, "world")) {
1362  copy_params.raster_point_transform =
1364  } else {
1365  throw std::runtime_error(
1366  "Invalid string for 'raster_point_transform' option (must be 'NONE', "
1367  "'AUTO', 'FILE' or 'WORLD'): " +
1368  *s);
1369  }
1370  } else if (boost::iequals(*p->get_name(), "raster_import_bands")) {
1371  const StringLiteral* str_literal =
1372  dynamic_cast<const StringLiteral*>(p->get_value());
1373  if (str_literal == nullptr) {
1374  throw std::runtime_error("'raster_import_bands' option must be a string");
1375  }
1376  const std::string* raster_import_bands = str_literal->get_stringval();
1377  if (raster_import_bands) {
1378  copy_params.raster_import_bands = *raster_import_bands;
1379  } else {
1380  throw std::runtime_error("Invalid value for 'raster_import_bands' option");
1381  }
1382  } else if (boost::iequals(*p->get_name(), "raster_import_dimensions")) {
1383  const StringLiteral* str_literal =
1384  dynamic_cast<const StringLiteral*>(p->get_value());
1385  if (str_literal == nullptr) {
1386  throw std::runtime_error("'raster_import_dimensions' option must be a string");
1387  }
1388  const std::string* raster_import_dimensions = str_literal->get_stringval();
1389  if (raster_import_dimensions) {
1390  copy_params.raster_import_dimensions = *raster_import_dimensions;
1391  } else {
1392  throw std::runtime_error("Invalid value for 'raster_import_dimensions' option");
1393  }
1394  } else if (boost::iequals(*p->get_name(), "geo_coords_encoding")) {
1395  const StringLiteral* str_literal =
1396  dynamic_cast<const StringLiteral*>(p->get_value());
1397  if (str_literal == nullptr) {
1398  throw std::runtime_error("'geo_coords_encoding' option must be a string");
1399  }
1400  const std::string* s = str_literal->get_stringval();
1401  if (boost::iequals(*s, "none")) {
1402  copy_params.geo_coords_encoding = kENCODING_NONE;
1403  copy_params.geo_coords_comp_param = 0;
1404  } else if (boost::iequals(*s, "compressed(32)")) {
1405  copy_params.geo_coords_encoding = kENCODING_GEOINT;
1406  copy_params.geo_coords_comp_param = 32;
1407  } else {
1408  throw std::runtime_error(
1409  "Invalid string for 'geo_coords_encoding' option (must be 'NONE' or "
1410  "'COMPRESSED(32)'): " +
1411  *s);
1412  }
1413  } else if (boost::iequals(*p->get_name(), "raster_scanlines_per_thread")) {
1414  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
1415  if (int_literal == nullptr) {
1416  throw std::runtime_error(
1417  "'raster_scanlines_per_thread' option must be an integer");
1418  }
1419  const int raster_scanlines_per_thread = int_literal->get_intval();
1420  if (raster_scanlines_per_thread < 0) {
1421  throw std::runtime_error(
1422  "'raster_scanlines_per_thread' option must be >= 0, with 0 denoting auto "
1423  "sizing");
1424  }
1425  copy_params.raster_scanlines_per_thread = raster_scanlines_per_thread;
1426  } else if (boost::iequals(*p->get_name(), "geo_coords_srid")) {
1427  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
1428  if (int_literal == nullptr) {
1429  throw std::runtime_error("'geo_coords_srid' option must be an integer");
1430  }
1431  const int srid = int_literal->get_intval();
1432  if (srid == 4326 || srid == 3857 || srid == 900913) {
1433  copy_params.geo_coords_srid = srid;
1434  } else {
1435  throw std::runtime_error(
1436  "Invalid value for 'geo_coords_srid' option (must be 4326, 3857, or "
1437  "900913): " +
1438  std::to_string(srid));
1439  }
1440  } else if (boost::iequals(*p->get_name(), "geo_layer_name")) {
1441  const StringLiteral* str_literal =
1442  dynamic_cast<const StringLiteral*>(p->get_value());
1443  if (str_literal == nullptr) {
1444  throw std::runtime_error("'geo_layer_name' option must be a string");
1445  }
1446  const std::string* layer_name = str_literal->get_stringval();
1447  if (layer_name) {
1448  copy_params.geo_layer_name = *layer_name;
1449  } else {
1450  throw std::runtime_error("Invalid value for 'geo_layer_name' option");
1451  }
1452  } else if (boost::iequals(*p->get_name(), "partitions")) {
1453  const auto partitions =
1454  static_cast<const StringLiteral*>(p->get_value())->get_stringval();
1455  CHECK(partitions);
1456  const auto partitions_uc = boost::to_upper_copy<std::string>(*partitions);
1457  if (partitions_uc != "REPLICATED") {
1458  throw std::runtime_error(
1459  "Invalid value for 'partitions' option. Must be 'REPLICATED'.");
1460  }
1461  deferred_copy_from_partitions_ = partitions_uc;
1462  } else if (boost::iequals(*p->get_name(), "geo_assign_render_groups")) {
1463  const StringLiteral* str_literal =
1464  dynamic_cast<const StringLiteral*>(p->get_value());
1465  if (str_literal == nullptr) {
1466  throw std::runtime_error("geo_assign_render_groups option must be a boolean.");
1467  }
1468  copy_params.geo_assign_render_groups = bool_from_string_literal(str_literal);
1469  } else if (boost::iequals(*p->get_name(), "geo_explode_collections")) {
1470  const StringLiteral* str_literal =
1471  dynamic_cast<const StringLiteral*>(p->get_value());
1472  if (str_literal == nullptr) {
1473  throw std::runtime_error("geo_explode_collections option must be a boolean.");
1474  }
1475  copy_params.geo_explode_collections = bool_from_string_literal(str_literal);
1476  } else if (boost::iequals(*p->get_name(), "source_srid")) {
1477  const IntLiteral* int_literal = dynamic_cast<const IntLiteral*>(p->get_value());
1478  if (int_literal == nullptr) {
1479  throw std::runtime_error("'source_srid' option must be an integer");
1480  }
1481  const int srid = int_literal->get_intval();
1483  copy_params.source_srid = srid;
1484  } else {
1485  throw std::runtime_error(
1486  "'source_srid' option can only be used on csv/tsv files");
1487  }
1488  } else if (boost::iequals(*p->get_name(), "regex_path_filter")) {
1489  const StringLiteral* str_literal =
1490  dynamic_cast<const StringLiteral*>(p->get_value());
1491  if (str_literal == nullptr) {
1492  throw std::runtime_error("Option regex_path_filter must be a string.");
1493  }
1494  const auto string_val = *str_literal->get_stringval();
1495  copy_params.regex_path_filter =
1496  string_val.empty() ? std::nullopt : std::optional<std::string>{string_val};
1497  } else if (boost::iequals(*p->get_name(), "file_sort_order_by")) {
1498  const StringLiteral* str_literal =
1499  dynamic_cast<const StringLiteral*>(p->get_value());
1500  if (str_literal == nullptr) {
1501  throw std::runtime_error("Option file_sort_order_by must be a string.");
1502  }
1503  const auto string_val = *str_literal->get_stringval();
1504  copy_params.file_sort_order_by =
1505  string_val.empty() ? std::nullopt : std::optional<std::string>{string_val};
1506  } else if (boost::iequals(*p->get_name(), "file_sort_regex")) {
1507  const StringLiteral* str_literal =
1508  dynamic_cast<const StringLiteral*>(p->get_value());
1509  if (str_literal == nullptr) {
1510  throw std::runtime_error("Option file_sort_regex must be a string.");
1511  }
1512  const auto string_val = *str_literal->get_stringval();
1513  copy_params.file_sort_regex =
1514  string_val.empty() ? std::nullopt : std::optional<std::string>{string_val};
1515  } else if (boost::iequals(*p->get_name(), "raster_point_compute_angle")) {
1516  const StringLiteral* str_literal =
1517  dynamic_cast<const StringLiteral*>(p->get_value());
1518  if (str_literal == nullptr) {
1519  throw std::runtime_error(
1520  "'raster_point_compute_angle' option must be a boolean.");
1521  }
1522  if (bool_from_string_literal(str_literal)) {
1523  copy_params.raster_point_compute_angle = true;
1524  }
1525  } else if (boost::iequals(*p->get_name(), "odbc_sql_order_by")) {
1526  if (auto str_literal = dynamic_cast<const StringLiteral*>(p->get_value())) {
1527  copy_params.odbc_sql_order_by = *str_literal->get_stringval();
1528  } else {
1529  throw std::runtime_error("Option odbc_sql_order_by must be a string.");
1530  }
1531  } else if (boost::iequals(*p->get_name(), "odbc_username")) {
1532  const StringLiteral* str_literal =
1533  dynamic_cast<const StringLiteral*>(p->get_value());
1534  if (str_literal == nullptr) {
1535  throw std::runtime_error("Option odbc_username must be a string.");
1536  }
1537  const auto string_val = *str_literal->get_stringval();
1538  copy_params.odbc_username = string_val;
1539  } else if (boost::iequals(*p->get_name(), "odbc_password")) {
1540  const StringLiteral* str_literal =
1541  dynamic_cast<const StringLiteral*>(p->get_value());
1542  if (str_literal == nullptr) {
1543  throw std::runtime_error("Option odbc_password must be a string.");
1544  }
1545  const auto string_val = *str_literal->get_stringval();
1546  copy_params.odbc_password = string_val;
1547  } else if (boost::iequals(*p->get_name(), "odbc_credential_string")) {
1548  const StringLiteral* str_literal =
1549  dynamic_cast<const StringLiteral*>(p->get_value());
1550  if (str_literal == nullptr) {
1551  throw std::runtime_error("Option odbc_credential_string must be a string.");
1552  }
1553  const auto string_val = *str_literal->get_stringval();
1554  copy_params.odbc_credential_string = string_val;
1555  } else if (boost::iequals(*p->get_name(), "odbc_dsn")) {
1556  const StringLiteral* str_literal =
1557  dynamic_cast<const StringLiteral*>(p->get_value());
1558  if (str_literal == nullptr) {
1559  throw std::runtime_error("Option odbc_dsn must be a string.");
1560  }
1561  const auto string_val = *str_literal->get_stringval();
1562  copy_params.odbc_dsn = string_val;
1563  } else if (boost::iequals(*p->get_name(), "odbc_connection_string")) {
1564  const StringLiteral* str_literal =
1565  dynamic_cast<const StringLiteral*>(p->get_value());
1566  if (str_literal == nullptr) {
1567  throw std::runtime_error("Option odbc_connection_string must be a string.");
1568  }
1569  const auto string_val = *str_literal->get_stringval();
1570  copy_params.odbc_connection_string = string_val;
1571  } else if (boost::iequals(*p->get_name(), "line_start_regex")) {
1572  const StringLiteral* str_literal =
1573  dynamic_cast<const StringLiteral*>(p->get_value());
1574  if (str_literal == nullptr) {
1575  throw std::runtime_error("Option line_start_regex must be a string.");
1576  }
1577  const auto string_val = *str_literal->get_stringval();
1578  copy_params.line_start_regex = string_val;
1579  } else if (boost::iequals(*p->get_name(), "line_regex")) {
1580  const StringLiteral* str_literal =
1581  dynamic_cast<const StringLiteral*>(p->get_value());
1582  if (str_literal == nullptr) {
1583  throw std::runtime_error("Option line_regex must be a string.");
1584  }
1585  const auto string_val = *str_literal->get_stringval();
1586  copy_params.line_regex = string_val;
1587  } else if (boost::iequals(*p->get_name(), "add_metadata_columns") &&
1589  const StringLiteral* str_literal =
1590  dynamic_cast<const StringLiteral*>(p->get_value());
1591  if (str_literal == nullptr) {
1592  throw std::runtime_error("'add_metadata_columns' option must be a string.");
1593  }
1594  copy_params.add_metadata_columns = *str_literal->get_stringval();
1595  } else {
1596  throw std::runtime_error("Invalid option for COPY: " + *p->get_name());
1597  }
1598  }
1599  }
1600 }
1601 
1602 bool expr_is_null(const Analyzer::Expr* expr) {
1603  if (expr->get_type_info().get_type() == kNULLT) {
1604  return true;
1605  }
1606  const auto const_expr = dynamic_cast<const Analyzer::Constant*>(expr);
1607  return const_expr && const_expr->get_is_null();
1608 }
1609 
1610 } // namespace
1611 
1612 std::shared_ptr<Analyzer::Expr> CaseExpr::normalize(
1613  const std::list<std::pair<std::shared_ptr<Analyzer::Expr>,
1614  std::shared_ptr<Analyzer::Expr>>>& expr_pair_list,
1615  const std::shared_ptr<Analyzer::Expr> else_e_in,
1616  const Executor* executor) {
1617  SQLTypeInfo ti;
1618  bool has_agg = false;
1619  // We need to keep track of whether there was at
1620  // least one none-encoded string literal expression
1621  // type among any of the case sub-expressions separately
1622  // from rest of type determination logic, as it will
1623  // be casted to the output dictionary type if all output
1624  // types are either dictionary encoded or none-encoded
1625  // literals, or kept as none-encoded if all sub-expression
1626  // types are none-encoded (column or literal)
1627  SQLTypeInfo none_encoded_literal_ti;
1628 
1629  for (auto& p : expr_pair_list) {
1630  auto e1 = p.first;
1631  CHECK(e1->get_type_info().is_boolean());
1632  auto e2 = p.second;
1633  if (e2->get_contains_agg()) {
1634  has_agg = true;
1635  }
1636  const auto& e2_ti = e2->get_type_info();
1637  if (e2_ti.is_string() && !e2_ti.is_dict_encoded_string() &&
1638  !std::dynamic_pointer_cast<const Analyzer::ColumnVar>(e2)) {
1639  CHECK(e2_ti.is_none_encoded_string());
1640  none_encoded_literal_ti =
1641  none_encoded_literal_ti.get_type() == kNULLT
1642  ? e2_ti
1643  : common_string_type(none_encoded_literal_ti, e2_ti, executor);
1644  continue;
1645  }
1646  if (ti.get_type() == kNULLT) {
1647  ti = e2_ti;
1648  } else if (e2_ti.get_type() == kNULLT) {
1649  ti.set_notnull(false);
1650  e2->set_type_info(ti);
1651  } else if (ti != e2_ti) {
1652  if (ti.is_string() && e2_ti.is_string()) {
1653  // Executor is needed to determine which dictionary is the largest
1654  // in case of two dictionary types with different encodings
1655  ti = common_string_type(ti, e2_ti, executor);
1656  } else if (ti.is_number() && e2_ti.is_number()) {
1658  } else if (ti.is_boolean() && e2_ti.is_boolean()) {
1660  } else {
1661  throw std::runtime_error(
1662  "Expressions in THEN clause must be of the same or compatible types.");
1663  }
1664  }
1665  }
1666  auto else_e = else_e_in;
1667  const auto& else_ti = else_e->get_type_info();
1668  if (else_e) {
1669  if (else_e->get_contains_agg()) {
1670  has_agg = true;
1671  }
1672  if (else_ti.is_string() && !else_ti.is_dict_encoded_string() &&
1673  !std::dynamic_pointer_cast<const Analyzer::ColumnVar>(else_e)) {
1674  CHECK(else_ti.is_none_encoded_string());
1675  none_encoded_literal_ti =
1676  none_encoded_literal_ti.get_type() == kNULLT
1677  ? else_ti
1678  : common_string_type(none_encoded_literal_ti, else_ti, executor);
1679  } else {
1680  if (ti.get_type() == kNULLT) {
1681  ti = else_ti;
1682  } else if (expr_is_null(else_e.get())) {
1683  ti.set_notnull(false);
1684  else_e->set_type_info(ti);
1685  } else if (ti != else_ti) {
1686  ti.set_notnull(false);
1687  if (ti.is_string() && else_ti.is_string()) {
1688  // Executor is needed to determine which dictionary is the largest
1689  // in case of two dictionary types with different encodings
1690  ti = common_string_type(ti, else_ti, executor);
1691  } else if (ti.is_number() && else_ti.is_number()) {
1692  ti = Analyzer::BinOper::common_numeric_type(ti, else_ti);
1693  } else if (ti.is_boolean() && else_ti.is_boolean()) {
1694  ti = Analyzer::BinOper::common_numeric_type(ti, else_ti);
1695  } else if (get_logical_type_info(ti) != get_logical_type_info(else_ti)) {
1696  throw std::runtime_error(
1697  // types differing by encoding will be resolved at decode
1698  "Expressions in ELSE clause must be of the same or compatible types as "
1699  "those in the THEN clauses.");
1700  }
1701  }
1702  }
1703  }
1704 
1705  if (ti.get_type() == kNULLT && none_encoded_literal_ti.get_type() != kNULLT) {
1706  // If we haven't set a type so far it's because
1707  // every case sub-expression has a none-encoded
1708  // literal output. Make this our output type
1709  ti = none_encoded_literal_ti;
1710  }
1711 
1712  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
1713  cast_expr_pair_list;
1714  for (auto p : expr_pair_list) {
1715  ti.set_notnull(false);
1716  cast_expr_pair_list.emplace_back(p.first, p.second->add_cast(ti));
1717  }
1718  if (else_e != nullptr) {
1719  else_e = else_e->add_cast(ti);
1720  } else {
1721  Datum d;
1722  // always create an else expr so that executor doesn't need to worry about it
1723  ti.set_notnull(false);
1724  else_e = makeExpr<Analyzer::Constant>(ti, true, d);
1725  }
1726  if (ti.get_type() == kNULLT) {
1727  throw std::runtime_error(
1728  "Cannot deduce the type for case expressions, all branches null");
1729  }
1730 
1731  auto case_expr = makeExpr<Analyzer::CaseExpr>(ti, has_agg, cast_expr_pair_list, else_e);
1732  return case_expr;
1733 }
1734 
1735 std::string CaseExpr::to_string() const {
1736  std::string str("CASE ");
1737  for (auto& p : when_then_list_) {
1738  str += "WHEN " + p->get_expr1()->to_string() + " THEN " +
1739  p->get_expr2()->to_string() + " ";
1740  }
1741  if (else_expr_ != nullptr) {
1742  str += "ELSE " + else_expr_->to_string();
1743  }
1744  str += " END";
1745  return str;
1746 }
1747 
1748 void UnionQuery::analyze(const Catalog_Namespace::Catalog& catalog,
1749  Analyzer::Query& query) const {
1750  left_->analyze(catalog, query);
1751  Analyzer::Query* right_query = new Analyzer::Query();
1752  right_->analyze(catalog, *right_query);
1753  query.set_next_query(right_query);
1754  query.set_is_unionall(is_unionall_);
1755 }
1756 
1757 void QuerySpec::analyze_having_clause(const Catalog_Namespace::Catalog& catalog,
1758  Analyzer::Query& query) const {
1759  std::shared_ptr<Analyzer::Expr> p;
1760  if (having_clause_ != nullptr) {
1761  p = having_clause_->analyze(catalog, query, Expr::TlistRefType::TLIST_COPY);
1762  if (p->get_type_info().get_type() != kBOOLEAN) {
1763  throw std::runtime_error("Only boolean expressions can be in HAVING clause.");
1764  }
1765  p->check_group_by(query.get_group_by());
1766  }
1767  query.set_having_predicate(p);
1768 }
1769 
1770 void QuerySpec::analyze_group_by(const Catalog_Namespace::Catalog& catalog,
1771  Analyzer::Query& query) const {
1772  std::list<std::shared_ptr<Analyzer::Expr>> groupby;
1773  if (!groupby_clause_.empty()) {
1774  int gexpr_no = 1;
1775  std::shared_ptr<Analyzer::Expr> gexpr;
1776  const std::vector<std::shared_ptr<Analyzer::TargetEntry>>& tlist =
1777  query.get_targetlist();
1778  for (auto& c : groupby_clause_) {
1779  // special-case ordinal numbers in GROUP BY
1780  if (dynamic_cast<Literal*>(c.get())) {
1781  IntLiteral* i = dynamic_cast<IntLiteral*>(c.get());
1782  if (!i) {
1783  throw std::runtime_error("Invalid literal in GROUP BY clause.");
1784  }
1785  int varno = (int)i->get_intval();
1786  if (varno <= 0 || varno > static_cast<int>(tlist.size())) {
1787  throw std::runtime_error("Invalid ordinal number in GROUP BY clause.");
1788  }
1789  if (tlist[varno - 1]->get_expr()->get_contains_agg()) {
1790  throw std::runtime_error(
1791  "Ordinal number in GROUP BY cannot reference an expression containing "
1792  "aggregate "
1793  "functions.");
1794  }
1795  gexpr = makeExpr<Analyzer::Var>(
1796  tlist[varno - 1]->get_expr()->get_type_info(), Analyzer::Var::kOUTPUT, varno);
1797  } else {
1798  gexpr = c->analyze(catalog, query, Expr::TlistRefType::TLIST_REF);
1799  }
1800  const SQLTypeInfo gti = gexpr->get_type_info();
1801  bool set_new_type = false;
1802  SQLTypeInfo ti(gti);
1803  if (gti.is_string() && gti.get_compression() == kENCODING_NONE) {
1804  set_new_type = true;
1807  ti.set_fixed_size();
1808  }
1809  std::shared_ptr<Analyzer::Var> v;
1810  if (std::dynamic_pointer_cast<Analyzer::Var>(gexpr)) {
1811  v = std::static_pointer_cast<Analyzer::Var>(gexpr);
1812  int n = v->get_varno();
1813  gexpr = tlist[n - 1]->get_own_expr();
1814  auto cv = std::dynamic_pointer_cast<Analyzer::ColumnVar>(gexpr);
1815  if (cv != nullptr) {
1816  // inherit all ColumnVar info for lineage.
1817  *std::static_pointer_cast<Analyzer::ColumnVar>(v) = *cv;
1818  }
1819  v->set_which_row(Analyzer::Var::kGROUPBY);
1820  v->set_varno(gexpr_no);
1821  tlist[n - 1]->set_expr(v);
1822  }
1823  if (set_new_type) {
1824  auto new_e = gexpr->add_cast(ti);
1825  groupby.push_back(new_e);
1826  if (v != nullptr) {
1827  v->set_type_info(new_e->get_type_info());
1828  }
1829  } else {
1830  groupby.push_back(gexpr);
1831  }
1832  gexpr_no++;
1833  }
1834  }
1835  if (query.get_num_aggs() > 0 || !groupby.empty()) {
1836  for (auto t : query.get_targetlist()) {
1837  auto e = t->get_expr();
1838  e->check_group_by(groupby);
1839  }
1840  }
1841  query.set_group_by(groupby);
1842 }
1843 
1844 void QuerySpec::analyze_where_clause(const Catalog_Namespace::Catalog& catalog,
1845  Analyzer::Query& query) const {
1846  if (where_clause_ == nullptr) {
1847  query.set_where_predicate(nullptr);
1848  return;
1849  }
1850  auto p = where_clause_->analyze(catalog, query, Expr::TlistRefType::TLIST_COPY);
1851  if (p->get_type_info().get_type() != kBOOLEAN) {
1852  throw std::runtime_error("Only boolean expressions can be in WHERE clause.");
1853  }
1854  query.set_where_predicate(p);
1855 }
1856 
1857 void QuerySpec::analyze_select_clause(const Catalog_Namespace::Catalog& catalog,
1858  Analyzer::Query& query) const {
1859  std::vector<std::shared_ptr<Analyzer::TargetEntry>>& tlist =
1860  query.get_targetlist_nonconst();
1861  if (select_clause_.empty()) {
1862  // this means SELECT *
1863  int rte_idx = 0;
1864  for (auto rte : query.get_rangetable()) {
1865  rte->expand_star_in_targetlist(catalog, tlist, rte_idx++);
1866  }
1867  } else {
1868  for (auto& p : select_clause_) {
1869  const Parser::Expr* select_expr = p->get_select_expr();
1870  // look for the case of range_var.*
1871  if (typeid(*select_expr) == typeid(ColumnRef) &&
1872  dynamic_cast<const ColumnRef*>(select_expr)->get_column() == nullptr) {
1873  const std::string* range_var_name =
1874  dynamic_cast<const ColumnRef*>(select_expr)->get_table();
1875  int rte_idx = query.get_rte_idx(*range_var_name);
1876  if (rte_idx < 0) {
1877  throw std::runtime_error("invalid range variable name: " + *range_var_name);
1878  }
1879  Analyzer::RangeTableEntry* rte = query.get_rte(rte_idx);
1880  rte->expand_star_in_targetlist(catalog, tlist, rte_idx);
1881  } else {
1882  auto e = select_expr->analyze(catalog, query);
1883  std::string resname;
1884 
1885  if (p->get_alias() != nullptr) {
1886  resname = *p->get_alias();
1887  } else if (std::dynamic_pointer_cast<Analyzer::ColumnVar>(e) &&
1888  !std::dynamic_pointer_cast<Analyzer::Var>(e)) {
1889  auto colvar = std::static_pointer_cast<Analyzer::ColumnVar>(e);
1890  const ColumnDescriptor* col_desc = catalog.getMetadataForColumn(
1891  colvar->get_table_id(), colvar->get_column_id());
1892  resname = col_desc->columnName;
1893  }
1894  if (e->get_type_info().get_type() == kNULLT) {
1895  throw std::runtime_error(
1896  "Untyped NULL in SELECT clause. Use CAST to specify a type.");
1897  }
1898  auto o = std::static_pointer_cast<Analyzer::UOper>(e);
1899  bool unnest = (o != nullptr && o->get_optype() == kUNNEST);
1900  auto tle = std::make_shared<Analyzer::TargetEntry>(resname, e, unnest);
1901  tlist.push_back(tle);
1902  }
1903  }
1904  }
1905 }
1906 
1907 void QuerySpec::analyze_from_clause(const Catalog_Namespace::Catalog& catalog,
1908  Analyzer::Query& query) const {
1910  for (auto& p : from_clause_) {
1911  const TableDescriptor* table_desc;
1912  table_desc = catalog.getMetadataForTable(*p->get_table_name());
1913  if (table_desc == nullptr) {
1914  throw std::runtime_error("Table " + *p->get_table_name() + " does not exist.");
1915  }
1916  std::string range_var;
1917  if (p->get_range_var() == nullptr) {
1918  range_var = *p->get_table_name();
1919  } else {
1920  range_var = *p->get_range_var();
1921  }
1922  rte = new Analyzer::RangeTableEntry(range_var, table_desc, nullptr);
1923  query.add_rte(rte);
1924  }
1925 }
1926 
1927 void QuerySpec::analyze(const Catalog_Namespace::Catalog& catalog,
1928  Analyzer::Query& query) const {
1929  query.set_is_distinct(is_distinct_);
1930  analyze_from_clause(catalog, query);
1931  analyze_select_clause(catalog, query);
1932  analyze_where_clause(catalog, query);
1933  analyze_group_by(catalog, query);
1934  analyze_having_clause(catalog, query);
1935 }
1936 
1937 namespace {
1938 
1939 // clean known escape'd chars without having to do a full json parse
1940 std::string unescape(std::string s) {
1941  boost::replace_all(s, "\\\\t", "\t");
1942  boost::replace_all(s, "\\t", "\t");
1943  boost::replace_all(s, "\\\\n", "\n");
1944  boost::replace_all(s, "\\n", "\n");
1945 
1946  // handle numerics
1947  std::smatch m;
1948 
1949  // "\x00"
1950  std::regex e1("(\\\\x[0-9A-Fa-f][0-9A-Fa-f])");
1951  while (std::regex_search(s, m, e1)) {
1952  std::string original(m[0].first, m[0].second);
1953  std::string replacement;
1954  long val = strtol(original.substr(2, 2).c_str(), NULL, 16);
1955  replacement.push_back(val);
1956  boost::replace_all(s, original, replacement);
1957  }
1958 
1959  // "\u0000"
1960  std::regex e2("(\\\\u[0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f])");
1961  while (std::regex_search(s, m, e2)) {
1962  std::string original(m[0].first, m[0].second);
1963  std::string replacement;
1964  long val = strtol(original.substr(2, 4).c_str(), NULL, 16);
1965  replacement.push_back(val);
1966  boost::replace_all(s, original, replacement);
1967  }
1968 
1969  return s;
1970 }
1971 
1972 void parse_options(const rapidjson::Value& payload,
1973  std::list<std::unique_ptr<NameValueAssign>>& nameValueList,
1974  bool stringToNull = false,
1975  bool stringToInteger = false) {
1976  if (payload.HasMember("options") && payload["options"].IsObject()) {
1977  for (const auto& option : payload["options"].GetObject()) {
1978  auto option_name = std::make_unique<std::string>(option.name.GetString());
1979  std::unique_ptr<Literal> literal_value;
1980  if (option.value.IsString()) {
1981  std::string str = option.value.GetString();
1982  if (stringToNull && str == "") {
1983  literal_value = std::make_unique<NullLiteral>();
1984  } else if (stringToInteger && std::all_of(str.begin(), str.end(), ::isdigit)) {
1985  int iVal = std::stoi(str);
1986  literal_value = std::make_unique<IntLiteral>(iVal);
1987  } else {
1988  // Rapidjson will deliberately provide escape'd strings when accessed
1989  // ... but the literal should have a copy of the raw unescaped string
1990  auto unique_literal_string = std::make_unique<std::string>(unescape(str));
1991  literal_value =
1992  std::make_unique<StringLiteral>(unique_literal_string.release());
1993  }
1994  } else if (option.value.IsInt() || option.value.IsInt64()) {
1995  literal_value = std::make_unique<IntLiteral>(json_i64(option.value));
1996  } else if (option.value.IsNull()) {
1997  literal_value = std::make_unique<NullLiteral>();
1998  } else {
1999  throw std::runtime_error("Unable to handle literal for " + *option_name);
2000  }
2001  CHECK(literal_value);
2002 
2003  nameValueList.emplace_back(std::make_unique<NameValueAssign>(
2004  option_name.release(), literal_value.release()));
2005  }
2006  }
2007 }
2008 } // namespace
2009 
2010 void SelectStmt::analyze(const Catalog_Namespace::Catalog& catalog,
2011  Analyzer::Query& query) const {
2012  query.set_stmt_type(kSELECT);
2013  query.set_limit(limit_);
2014  if (offset_ < 0) {
2015  throw std::runtime_error("OFFSET cannot be negative.");
2016  }
2017  query.set_offset(offset_);
2018  query_expr_->analyze(catalog, query);
2019  if (orderby_clause_.empty() && !query.get_is_distinct()) {
2020  query.set_order_by(nullptr);
2021  return;
2022  }
2023  const std::vector<std::shared_ptr<Analyzer::TargetEntry>>& tlist =
2024  query.get_targetlist();
2025  std::list<Analyzer::OrderEntry>* order_by = new std::list<Analyzer::OrderEntry>();
2026  if (!orderby_clause_.empty()) {
2027  for (auto& p : orderby_clause_) {
2028  int tle_no = p->get_colno();
2029  if (tle_no == 0) {
2030  // use column name
2031  // search through targetlist for matching name
2032  const std::string* name = p->get_column()->get_column();
2033  tle_no = 1;
2034  bool found = false;
2035  for (auto tle : tlist) {
2036  if (tle->get_resname() == *name) {
2037  found = true;
2038  break;
2039  }
2040  tle_no++;
2041  }
2042  if (!found) {
2043  throw std::runtime_error("invalid name in order by: " + *name);
2044  }
2045  }
2046  order_by->push_back(
2047  Analyzer::OrderEntry(tle_no, p->get_is_desc(), p->get_nulls_first()));
2048  }
2049  }
2050  if (query.get_is_distinct()) {
2051  // extend order_by to include all targetlist entries.
2052  for (int i = 1; i <= static_cast<int>(tlist.size()); i++) {
2053  bool in_orderby = false;
2054  std::for_each(order_by->begin(),
2055  order_by->end(),
2056  [&in_orderby, i](const Analyzer::OrderEntry& oe) {
2057  in_orderby = in_orderby || (i == oe.tle_no);
2058  });
2059  if (!in_orderby) {
2060  order_by->push_back(Analyzer::OrderEntry(i, false, false));
2061  }
2062  }
2063  }
2064  query.set_order_by(order_by);
2065 }
2066 
2067 std::string SelectEntry::to_string() const {
2068  std::string str = select_expr_->to_string();
2069  if (alias_ != nullptr) {
2070  str += " AS " + *alias_;
2071  }
2072  return str;
2073 }
2074 
2075 std::string TableRef::to_string() const {
2076  std::string str = *table_name_;
2077  if (range_var_ != nullptr) {
2078  str += " " + *range_var_;
2079  }
2080  return str;
2081 }
2082 
2083 std::string ColumnRef::to_string() const {
2084  std::string str;
2085  if (table_ == nullptr) {
2086  str = *column_;
2087  } else if (column_ == nullptr) {
2088  str = *table_ + ".*";
2089  } else {
2090  str = *table_ + "." + *column_;
2091  }
2092  return str;
2093 }
2094 
2095 std::string OperExpr::to_string() const {
2096  std::string op_str[] = {
2097  "=", "===", "<>", "<", ">", "<=", ">=", " AND ", " OR ", "NOT", "-", "+", "*", "/"};
2098  std::string str;
2099  if (optype_ == kUMINUS) {
2100  str = "-(" + left_->to_string() + ")";
2101  } else if (optype_ == kNOT) {
2102  str = "NOT (" + left_->to_string() + ")";
2103  } else if (optype_ == kARRAY_AT) {
2104  str = left_->to_string() + "[" + right_->to_string() + "]";
2105  } else if (optype_ == kUNNEST) {
2106  str = "UNNEST(" + left_->to_string() + ")";
2107  } else if (optype_ == kIN) {
2108  str = "(" + left_->to_string() + " IN " + right_->to_string() + ")";
2109  } else {
2110  str = "(" + left_->to_string() + op_str[optype_] + right_->to_string() + ")";
2111  }
2112  return str;
2113 }
2114 
2115 std::string InExpr::to_string() const {
2116  std::string str = arg_->to_string();
2117  if (is_not_) {
2118  str += " NOT IN ";
2119  } else {
2120  str += " IN ";
2121  }
2122  return str;
2123 }
2124 
2125 std::string ExistsExpr::to_string() const {
2126  return "EXISTS (" + query_->to_string() + ")";
2127 }
2128 
2129 std::string SubqueryExpr::to_string() const {
2130  std::string str;
2131  str = "(";
2132  str += query_->to_string();
2133  str += ")";
2134  return str;
2135 }
2136 
2137 std::string IsNullExpr::to_string() const {
2138  std::string str = arg_->to_string();
2139  if (is_not_) {
2140  str += " IS NOT NULL";
2141  } else {
2142  str += " IS NULL";
2143  }
2144  return str;
2145 }
2146 
2147 std::string InSubquery::to_string() const {
2148  std::string str = InExpr::to_string();
2149  str += subquery_->to_string();
2150  return str;
2151 }
2152 
2153 std::string InValues::to_string() const {
2154  std::string str = InExpr::to_string() + "(";
2155  bool notfirst = false;
2156  for (auto& p : value_list_) {
2157  if (notfirst) {
2158  str += ", ";
2159  } else {
2160  notfirst = true;
2161  }
2162  str += p->to_string();
2163  }
2164  str += ")";
2165  return str;
2166 }
2167 
2168 std::string BetweenExpr::to_string() const {
2169  std::string str = arg_->to_string();
2170  if (is_not_) {
2171  str += " NOT BETWEEN ";
2172  } else {
2173  str += " BETWEEN ";
2174  }
2175  str += lower_->to_string() + " AND " + upper_->to_string();
2176  return str;
2177 }
2178 
2179 std::string CharLengthExpr::to_string() const {
2180  std::string str;
2181  if (calc_encoded_length_) {
2182  str = "CHAR_LENGTH (" + arg_->to_string() + ")";
2183  } else {
2184  str = "LENGTH (" + arg_->to_string() + ")";
2185  }
2186  return str;
2187 }
2188 
2189 std::string CardinalityExpr::to_string() const {
2190  std::string str = "CARDINALITY(" + arg_->to_string() + ")";
2191  return str;
2192 }
2193 
2194 std::string LikeExpr::to_string() const {
2195  std::string str = arg_->to_string();
2196  if (is_not_) {
2197  str += " NOT LIKE ";
2198  } else {
2199  str += " LIKE ";
2200  }
2201  str += like_string_->to_string();
2202  if (escape_string_ != nullptr) {
2203  str += " ESCAPE " + escape_string_->to_string();
2204  }
2205  return str;
2206 }
2207 
2208 std::string RegexpExpr::to_string() const {
2209  std::string str = arg_->to_string();
2210  if (is_not_) {
2211  str += " NOT REGEXP ";
2212  } else {
2213  str += " REGEXP ";
2214  }
2215  str += pattern_string_->to_string();
2216  if (escape_string_ != nullptr) {
2217  str += " ESCAPE " + escape_string_->to_string();
2218  }
2219  return str;
2220 }
2221 
2222 std::string WidthBucketExpr::to_string() const {
2223  std::string str = " WIDTH_BUCKET ";
2224  str += target_value_->to_string();
2225  str += " ";
2226  str += lower_bound_->to_string();
2227  str += " ";
2228  str += upper_bound_->to_string();
2229  str += " ";
2230  str += partition_count_->to_string();
2231  str += " ";
2232  return str;
2233 }
2234 
2235 std::string LikelihoodExpr::to_string() const {
2236  std::string str = " LIKELIHOOD ";
2237  str += arg_->to_string();
2238  str += " ";
2239  str += boost::lexical_cast<std::string>(is_not_ ? 1.0 - likelihood_ : likelihood_);
2240  return str;
2241 }
2242 
2243 std::string FunctionRef::to_string() const {
2244  std::string str = *name_ + "(";
2245  if (distinct_) {
2246  str += "DISTINCT ";
2247  }
2248  if (arg_ == nullptr) {
2249  str += "*)";
2250  } else {
2251  str += arg_->to_string() + ")";
2252  }
2253  return str;
2254 }
2255 
2256 std::string QuerySpec::to_string() const {
2257  std::string query_str = "SELECT ";
2258  if (is_distinct_) {
2259  query_str += "DISTINCT ";
2260  }
2261  if (select_clause_.empty()) {
2262  query_str += "* ";
2263  } else {
2264  bool notfirst = false;
2265  for (auto& p : select_clause_) {
2266  if (notfirst) {
2267  query_str += ", ";
2268  } else {
2269  notfirst = true;
2270  }
2271  query_str += p->to_string();
2272  }
2273  }
2274  query_str += " FROM ";
2275  bool notfirst = false;
2276  for (auto& p : from_clause_) {
2277  if (notfirst) {
2278  query_str += ", ";
2279  } else {
2280  notfirst = true;
2281  }
2282  query_str += p->to_string();
2283  }
2284  if (where_clause_) {
2285  query_str += " WHERE " + where_clause_->to_string();
2286  }
2287  if (!groupby_clause_.empty()) {
2288  query_str += " GROUP BY ";
2289  bool notfirst = false;
2290  for (auto& p : groupby_clause_) {
2291  if (notfirst) {
2292  query_str += ", ";
2293  } else {
2294  notfirst = true;
2295  }
2296  query_str += p->to_string();
2297  }
2298  }
2299  if (having_clause_) {
2300  query_str += " HAVING " + having_clause_->to_string();
2301  }
2302  query_str += ";";
2303  return query_str;
2304 }
2305 
2306 void InsertStmt::analyze(const Catalog_Namespace::Catalog& catalog,
2307  Analyzer::Query& query) const {
2308  query.set_stmt_type(kINSERT);
2309  const TableDescriptor* td = catalog.getMetadataForTable(*table_);
2310  if (td == nullptr) {
2311  throw std::runtime_error("Table " + *table_ + " does not exist.");
2312  }
2313  if (td->isView) {
2314  throw std::runtime_error("Insert to views is not supported yet.");
2315  }
2317  query.set_result_table_id(td->tableId);
2318  std::list<int> result_col_list;
2319  if (column_list_.empty()) {
2320  const std::list<const ColumnDescriptor*> all_cols =
2321  catalog.getAllColumnMetadataForTable(td->tableId, false, false, true);
2322  for (auto cd : all_cols) {
2323  result_col_list.push_back(cd->columnId);
2324  }
2325  } else {
2326  for (auto& c : column_list_) {
2327  const ColumnDescriptor* cd = catalog.getMetadataForColumn(td->tableId, *c);
2328  if (cd == nullptr) {
2329  throw std::runtime_error("Column " + *c + " does not exist.");
2330  }
2331  result_col_list.push_back(cd->columnId);
2332  const auto& col_ti = cd->columnType;
2333  if (col_ti.get_physical_cols() > 0) {
2334  CHECK(cd->columnType.is_geometry());
2335  for (auto i = 1; i <= col_ti.get_physical_cols(); i++) {
2336  const ColumnDescriptor* pcd =
2337  catalog.getMetadataForColumn(td->tableId, cd->columnId + i);
2338  if (pcd == nullptr) {
2339  throw std::runtime_error("Column " + *c + "'s metadata is incomplete.");
2340  }
2341  result_col_list.push_back(pcd->columnId);
2342  }
2343  }
2344  }
2345  }
2346  query.set_result_col_list(result_col_list);
2347 }
2348 
2349 namespace {
2350 Literal* parse_insert_literal(const rapidjson::Value& literal) {
2351  CHECK(literal.IsObject());
2352  CHECK(literal.HasMember("literal"));
2353  CHECK(literal.HasMember("type"));
2354  auto type = json_str(literal["type"]);
2355  if (type == "NULL") {
2356  return new NullLiteral();
2357  } else if (type == "CHAR" || type == "BOOLEAN") {
2358  auto* val = new std::string(json_str(literal["literal"]));
2359  return new StringLiteral(val);
2360  } else if (type == "DECIMAL") {
2361  CHECK(literal.HasMember("scale"));
2362  CHECK(literal.HasMember("precision"));
2363  auto scale = json_i64(literal["scale"]);
2364  auto precision = json_i64(literal["precision"]);
2365  if (scale == 0) {
2366  auto int_val = std::stol(json_str(literal["literal"]));
2367  return new IntLiteral(int_val);
2368  } else if (precision > 18) {
2369  auto dbl_val = std::stod(json_str(literal["literal"]));
2370  return new DoubleLiteral(dbl_val);
2371  } else {
2372  auto* val = new std::string(json_str(literal["literal"]));
2373  return new FixedPtLiteral(val);
2374  }
2375  } else if (type == "DOUBLE") {
2376  auto dbl_val = std::stod(json_str(literal["literal"]));
2377  return new DoubleLiteral(dbl_val);
2378  } else {
2379  CHECK(false) << "Unexpected calcite data type: " << type;
2380  }
2381  return nullptr;
2382 }
2383 
2384 ArrayLiteral* parse_insert_array_literal(const rapidjson::Value& array) {
2385  CHECK(array.IsArray());
2386  auto json_elements = array.GetArray();
2387  auto* elements = new std::list<Expr*>();
2388  for (const auto& e : json_elements) {
2389  elements->push_back(parse_insert_literal(e));
2390  }
2391  return new ArrayLiteral(elements);
2392 }
2393 } // namespace
2394 
2395 InsertValuesStmt::InsertValuesStmt(const rapidjson::Value& payload)
2396  : InsertStmt(nullptr, nullptr) {
2397  CHECK(payload.HasMember("name"));
2398  table_ = std::make_unique<std::string>(json_str(payload["name"]));
2399 
2400  if (payload.HasMember("columns")) {
2401  CHECK(payload["columns"].IsArray());
2402  for (auto& column : payload["columns"].GetArray()) {
2403  std::string s = json_str(column);
2404  column_list_.emplace_back(std::make_unique<std::string>(s));
2405  }
2406  }
2407 
2408  CHECK(payload.HasMember("values") && payload["values"].IsArray());
2409  auto tuples = payload["values"].GetArray();
2410  if (tuples.Empty()) {
2411  throw std::runtime_error("Values statement cannot be empty");
2412  }
2413  values_lists_.reserve(tuples.Size());
2414  for (const auto& json_tuple : tuples) {
2415  auto values_list = std::make_unique<ValuesList>();
2416  CHECK(json_tuple.IsArray());
2417  auto tuple = json_tuple.GetArray();
2418  for (const auto& value : tuple) {
2419  CHECK(value.IsObject());
2420  if (value.HasMember("array")) {
2421  values_list->push_back(parse_insert_array_literal(value["array"]));
2422  } else {
2423  values_list->push_back(parse_insert_literal(value));
2424  }
2425  }
2426  values_lists_.push_back(std::move(values_list));
2427  }
2428 }
2429 
2431  Analyzer::Query& query) const {
2432  InsertStmt::analyze(catalog, query);
2433  size_t list_size = values_lists_[0]->get_value_list().size();
2434  if (!column_list_.empty()) {
2435  if (list_size != column_list_.size()) {
2436  throw std::runtime_error(
2437  "Numbers of columns and values don't match for the "
2438  "insert.");
2439  }
2440  } else {
2441  const auto tableId = query.get_result_table_id();
2442  const std::list<const ColumnDescriptor*> non_phys_cols =
2443  catalog.getAllColumnMetadataForTable(tableId, false, false, false);
2444  if (non_phys_cols.size() != list_size) {
2445  throw std::runtime_error(
2446  "Number of columns in table does not match the list of values given in the "
2447  "insert.");
2448  }
2449  }
2450  std::vector<const ColumnDescriptor*> cds;
2451  cds.reserve(query.get_result_col_list().size());
2452  for (auto id : query.get_result_col_list()) {
2453  const auto* cd = catalog.getMetadataForColumn(query.get_result_table_id(), id);
2454  CHECK(cd);
2455  cds.push_back(cd);
2456  }
2457  auto& query_values_lists = query.get_values_lists();
2458  query_values_lists.resize(values_lists_.size());
2459  for (size_t i = 0; i < values_lists_.size(); ++i) {
2460  const auto& values_list = values_lists_[i]->get_value_list();
2461  if (values_list.size() != list_size) {
2462  throw std::runtime_error(
2463  "Insert values lists should be of the same size. Expected: " +
2464  std::to_string(list_size) + ", Got: " + std::to_string(values_list.size()));
2465  }
2466  auto& query_values_list = query_values_lists[i];
2467  size_t cds_id = 0;
2468  for (auto& v : values_list) {
2469  auto e = v->analyze(catalog, query);
2470  const auto* cd = cds[cds_id];
2471  const auto& col_ti = cd->columnType;
2472  if (col_ti.get_notnull()) {
2473  auto c = std::dynamic_pointer_cast<Analyzer::Constant>(e);
2474  if (c != nullptr && c->get_is_null()) {
2475  throw std::runtime_error("Cannot insert NULL into column " + cd->columnName);
2476  }
2477  }
2478  e = e->add_cast(col_ti);
2479  query_values_list.emplace_back(new Analyzer::TargetEntry("", e, false));
2480  ++cds_id;
2481 
2482  if (col_ti.get_physical_cols() > 0) {
2483  CHECK(cd->columnType.is_geometry());
2484  auto c = dynamic_cast<const Analyzer::Constant*>(e.get());
2485  if (!c) {
2486  auto uoper = std::dynamic_pointer_cast<Analyzer::UOper>(e);
2487  if (uoper && uoper->get_optype() == kCAST) {
2488  c = dynamic_cast<const Analyzer::Constant*>(uoper->get_operand());
2489  }
2490  }
2491  bool is_null = false;
2492  std::string* geo_string{nullptr};
2493  if (c) {
2494  is_null = c->get_is_null();
2495  if (!is_null) {
2496  geo_string = c->get_constval().stringval;
2497  }
2498  }
2499  if (!is_null && !geo_string) {
2500  throw std::runtime_error("Expecting a WKT or WKB hex string for column " +
2501  cd->columnName);
2502  }
2503  std::vector<double> coords;
2504  std::vector<double> bounds;
2505  std::vector<int> ring_sizes;
2506  std::vector<int> poly_rings;
2507  int render_group =
2508  0; // @TODO simon.eves where to get render_group from in this context?!
2509  SQLTypeInfo import_ti{cd->columnType};
2510  if (!is_null) {
2512  *geo_string, import_ti, coords, bounds, ring_sizes, poly_rings)) {
2513  throw std::runtime_error("Cannot read geometry to insert into column " +
2514  cd->columnName);
2515  }
2516  if (coords.empty()) {
2517  // Importing from geo_string WKT resulted in empty coords: dealing with a NULL
2518  is_null = true;
2519  }
2520  if (cd->columnType.get_type() != import_ti.get_type()) {
2521  // allow POLYGON to be inserted into MULTIPOLYGON column
2522  if (!(import_ti.get_type() == SQLTypes::kPOLYGON &&
2523  cd->columnType.get_type() == SQLTypes::kMULTIPOLYGON)) {
2524  throw std::runtime_error(
2525  "Imported geometry doesn't match the type of column " + cd->columnName);
2526  }
2527  }
2528  } else {
2529  // Special case for NULL POINT, push NULL representation to coords
2530  if (cd->columnType.get_type() == kPOINT) {
2531  if (!coords.empty()) {
2532  throw std::runtime_error(
2533  "NULL POINT with unexpected coordinates in column " + cd->columnName);
2534  }
2535  coords.push_back(NULL_ARRAY_DOUBLE);
2536  coords.push_back(NULL_DOUBLE);
2537  }
2538  }
2539 
2540  // TODO: check if import SRID matches columns SRID, may need to transform before
2541  // inserting
2542 
2543  const auto* cd_coords = cds[cds_id];
2544  CHECK_EQ(cd_coords->columnType.get_type(), kARRAY);
2545  CHECK_EQ(cd_coords->columnType.get_subtype(), kTINYINT);
2546  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
2547  if (!is_null || cd->columnType.get_type() == kPOINT) {
2548  auto compressed_coords = Geospatial::compress_coords(coords, col_ti);
2549  for (auto cc : compressed_coords) {
2550  Datum d;
2551  d.tinyintval = cc;
2552  auto e = makeExpr<Analyzer::Constant>(kTINYINT, false, d);
2553  value_exprs.push_back(e);
2554  }
2555  }
2556  query_values_list.emplace_back(new Analyzer::TargetEntry(
2557  "",
2558  makeExpr<Analyzer::Constant>(cd_coords->columnType, is_null, value_exprs),
2559  false));
2560  ++cds_id;
2561 
2562  if (cd->columnType.get_type() == kPOLYGON ||
2563  cd->columnType.get_type() == kMULTIPOLYGON) {
2564  // Put ring sizes array into separate physical column
2565  const auto* cd_ring_sizes = cds[cds_id];
2566  CHECK(cd_ring_sizes);
2567  CHECK_EQ(cd_ring_sizes->columnType.get_type(), kARRAY);
2568  CHECK_EQ(cd_ring_sizes->columnType.get_subtype(), kINT);
2569  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
2570  if (!is_null) {
2571  for (auto c : ring_sizes) {
2572  Datum d;
2573  d.intval = c;
2574  auto e = makeExpr<Analyzer::Constant>(kINT, false, d);
2575  value_exprs.push_back(e);
2576  }
2577  }
2578  query_values_list.emplace_back(new Analyzer::TargetEntry(
2579  "",
2580  makeExpr<Analyzer::Constant>(
2581  cd_ring_sizes->columnType, is_null, value_exprs),
2582  false));
2583  ++cds_id;
2584 
2585  if (cd->columnType.get_type() == kMULTIPOLYGON) {
2586  // Put poly_rings array into separate physical column
2587  const auto* cd_poly_rings = cds[cds_id];
2588  CHECK(cd_poly_rings);
2589  CHECK_EQ(cd_poly_rings->columnType.get_type(), kARRAY);
2590  CHECK_EQ(cd_poly_rings->columnType.get_subtype(), kINT);
2591  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
2592  if (!is_null) {
2593  for (auto c : poly_rings) {
2594  Datum d;
2595  d.intval = c;
2596  auto e = makeExpr<Analyzer::Constant>(kINT, false, d);
2597  value_exprs.push_back(e);
2598  }
2599  }
2600  query_values_list.emplace_back(new Analyzer::TargetEntry(
2601  "",
2602  makeExpr<Analyzer::Constant>(
2603  cd_poly_rings->columnType, is_null, value_exprs),
2604  false));
2605  ++cds_id;
2606  }
2607  }
2608 
2609  if (cd->columnType.get_type() == kLINESTRING ||
2610  cd->columnType.get_type() == kPOLYGON ||
2611  cd->columnType.get_type() == kMULTIPOLYGON) {
2612  const auto* cd_bounds = cds[cds_id];
2613  CHECK(cd_bounds);
2614  CHECK_EQ(cd_bounds->columnType.get_type(), kARRAY);
2615  CHECK_EQ(cd_bounds->columnType.get_subtype(), kDOUBLE);
2616  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
2617  if (!is_null) {
2618  for (auto b : bounds) {
2619  Datum d;
2620  d.doubleval = b;
2621  auto e = makeExpr<Analyzer::Constant>(kDOUBLE, false, d);
2622  value_exprs.push_back(e);
2623  }
2624  }
2625  query_values_list.emplace_back(new Analyzer::TargetEntry(
2626  "",
2627  makeExpr<Analyzer::Constant>(cd_bounds->columnType, is_null, value_exprs),
2628  false));
2629  ++cds_id;
2630  }
2631 
2632  if (cd->columnType.get_type() == kPOLYGON ||
2633  cd->columnType.get_type() == kMULTIPOLYGON) {
2634  // Put render group into separate physical column
2635  const auto* cd_render_group = cds[cds_id];
2636  CHECK(cd_render_group);
2637  CHECK_EQ(cd_render_group->columnType.get_type(), kINT);
2638  Datum d;
2639  d.intval = render_group;
2640  query_values_list.emplace_back(new Analyzer::TargetEntry(
2641  "",
2642  makeExpr<Analyzer::Constant>(cd_render_group->columnType, is_null, d),
2643  false));
2644  ++cds_id;
2645  }
2646  }
2647  }
2648  }
2649 }
2650 
2652  auto execute_read_lock = mapd_shared_lock<mapd_shared_mutex>(
2655  auto& catalog = session.getCatalog();
2656  const auto td_with_lock =
2658  catalog, *table_);
2661  *table_)) {
2662  throw std::runtime_error("User has no insert privileges on " + *table_ + ".");
2663  }
2664  Analyzer::Query query;
2665  analyze(catalog, query);
2666 
2667  // Take an insert data write lock, which prevents concurrent inserts.
2668  const auto insert_data_lock =
2670 
2671  // NOTE(max): we do the same checks as below just a few calls earlier in analyze().
2672  // Do we keep those intentionally to make sure nothing changed in between w/o
2673  // catalog locks or is it just a duplicate work?
2674  auto td = td_with_lock();
2675  CHECK(td);
2676  if (td->isView) {
2677  throw std::runtime_error("Singleton inserts on views is not supported.");
2678  }
2680 
2682  RelAlgExecutor ra_executor(executor.get(), catalog);
2683 
2685  if (!leafs_connector_) {
2686  leafs_connector_ = &local_connector;
2687  }
2689  try {
2690  ra_executor.executeSimpleInsert(query, insert_data_loader, session);
2691  } catch (...) {
2692  try {
2693  leafs_connector_->rollback(session, td->tableId);
2694  } catch (std::exception& e) {
2695  LOG(ERROR) << "An error occurred during insert rollback attempt. Table id: "
2696  << td->tableId << ", Error: " << e.what();
2697  }
2698  throw;
2699  }
2700  if (!td->isTemporaryTable()) {
2701  leafs_connector_->checkpoint(session, td->tableId);
2702  }
2703 }
2704 
2706  Analyzer::Query& query) const {
2707  throw std::runtime_error("UPDATE statement not supported yet.");
2708 }
2709 
2711  Analyzer::Query& query) const {
2712  throw std::runtime_error("DELETE statement not supported yet.");
2713 }
2714 
2715 namespace {
2716 
2718  const auto& col_ti = cd.columnType;
2719  if (!col_ti.is_integer() && !col_ti.is_time() &&
2720  !(col_ti.is_string() && col_ti.get_compression() == kENCODING_DICT)) {
2721  throw std::runtime_error("Cannot shard on type " + col_ti.get_type_name() +
2722  ", encoding " + col_ti.get_compression_name());
2723  }
2724 }
2725 
2726 size_t shard_column_index(const std::string& name,
2727  const std::list<ColumnDescriptor>& columns) {
2728  size_t index = 1;
2729  for (const auto& cd : columns) {
2730  if (cd.columnName == name) {
2732  return index;
2733  }
2734  ++index;
2735  if (cd.columnType.is_geometry()) {
2736  index += cd.columnType.get_physical_cols();
2737  }
2738  }
2739  // Not found, return 0
2740  return 0;
2741 }
2742 
2743 size_t sort_column_index(const std::string& name,
2744  const std::list<ColumnDescriptor>& columns) {
2745  size_t index = 1;
2746  for (const auto& cd : columns) {
2747  if (boost::to_upper_copy<std::string>(cd.columnName) == name) {
2748  return index;
2749  }
2750  ++index;
2751  if (cd.columnType.is_geometry()) {
2752  index += cd.columnType.get_physical_cols();
2753  }
2754  }
2755  // Not found, return 0
2756  return 0;
2757 }
2758 
2759 void set_string_field(rapidjson::Value& obj,
2760  const std::string& field_name,
2761  const std::string& field_value,
2762  rapidjson::Document& document) {
2763  rapidjson::Value field_name_json_str;
2764  field_name_json_str.SetString(
2765  field_name.c_str(), field_name.size(), document.GetAllocator());
2766  rapidjson::Value field_value_json_str;
2767  field_value_json_str.SetString(
2768  field_value.c_str(), field_value.size(), document.GetAllocator());
2769  obj.AddMember(field_name_json_str, field_value_json_str, document.GetAllocator());
2770 }
2771 
2773  const ShardKeyDef* shard_key_def,
2774  const std::vector<SharedDictionaryDef>& shared_dict_defs) {
2775  rapidjson::Document document;
2776  auto& allocator = document.GetAllocator();
2777  rapidjson::Value arr(rapidjson::kArrayType);
2778  if (shard_key_def) {
2779  rapidjson::Value shard_key_obj(rapidjson::kObjectType);
2780  set_string_field(shard_key_obj, "type", "SHARD KEY", document);
2781  set_string_field(shard_key_obj, "name", shard_key_def->get_column(), document);
2782  arr.PushBack(shard_key_obj, allocator);
2783  }
2784  for (const auto& shared_dict_def : shared_dict_defs) {
2785  rapidjson::Value shared_dict_obj(rapidjson::kObjectType);
2786  set_string_field(shared_dict_obj, "type", "SHARED DICTIONARY", document);
2787  set_string_field(shared_dict_obj, "name", shared_dict_def.get_column(), document);
2789  shared_dict_obj, "foreign_table", shared_dict_def.get_foreign_table(), document);
2790  set_string_field(shared_dict_obj,
2791  "foreign_column",
2792  shared_dict_def.get_foreign_column(),
2793  document);
2794  arr.PushBack(shared_dict_obj, allocator);
2795  }
2796  rapidjson::StringBuffer buffer;
2797  rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
2798  arr.Accept(writer);
2799  return buffer.GetString();
2800 }
2801 
2802 template <typename LITERAL_TYPE,
2803  typename ASSIGNMENT,
2804  typename VALIDATE = DefaultValidate<LITERAL_TYPE>>
2805 decltype(auto) get_property_value(const NameValueAssign* p,
2806  ASSIGNMENT op,
2807  VALIDATE validate = VALIDATE()) {
2808  const auto val = validate(p);
2809  return op(val);
2810 }
2811 
2813  const NameValueAssign* p,
2814  const std::list<ColumnDescriptor>& columns) {
2815  auto assignment = [&td](const auto val) { td.storageType = val; };
2816  return get_property_value<StringLiteral, decltype(assignment), CaseSensitiveValidate>(
2817  p, assignment);
2818 }
2819 
2821  const NameValueAssign* p,
2822  const std::list<ColumnDescriptor>& columns) {
2823  return get_property_value<IntLiteral>(p,
2824  [&td](const auto val) { td.maxFragRows = val; });
2825 }
2826 
2828  const NameValueAssign* p,
2829  const std::list<ColumnDescriptor>& columns) {
2830  return get_property_value<IntLiteral>(
2831  p, [&df_td](const auto val) { df_td.maxFragRows = val; });
2832 }
2833 
2835  const NameValueAssign* p,
2836  const std::list<ColumnDescriptor>& columns) {
2837  return get_property_value<IntLiteral>(p,
2838  [&td](const auto val) { td.maxChunkSize = val; });
2839 }
2840 
2842  DataframeTableDescriptor& df_td,
2843  const NameValueAssign* p,
2844  const std::list<ColumnDescriptor>& columns) {
2845  return get_property_value<IntLiteral>(
2846  p, [&df_td](const auto val) { df_td.maxChunkSize = val; });
2847 }
2848 
2850  const NameValueAssign* p,
2851  const std::list<ColumnDescriptor>& columns) {
2852  return get_property_value<StringLiteral>(p, [&df_td](const auto val) {
2853  if (val.size() != 1) {
2854  throw std::runtime_error("Length of DELIMITER must be equal to 1.");
2855  }
2856  df_td.delimiter = val;
2857  });
2858 }
2859 
2861  const NameValueAssign* p,
2862  const std::list<ColumnDescriptor>& columns) {
2863  return get_property_value<StringLiteral>(p, [&df_td](const auto val) {
2864  if (val == "FALSE") {
2865  df_td.hasHeader = false;
2866  } else if (val == "TRUE") {
2867  df_td.hasHeader = true;
2868  } else {
2869  throw std::runtime_error("Option HEADER support only 'true' or 'false' values.");
2870  }
2871  });
2872 }
2873 
2875  const NameValueAssign* p,
2876  const std::list<ColumnDescriptor>& columns) {
2877  return get_property_value<IntLiteral>(p,
2878  [&td](const auto val) { td.fragPageSize = val; });
2879 }
2881  const NameValueAssign* p,
2882  const std::list<ColumnDescriptor>& columns) {
2883  return get_property_value<IntLiteral>(p, [&td](const auto val) { td.maxRows = val; });
2884 }
2885 
2887  const NameValueAssign* p,
2888  const std::list<ColumnDescriptor>& columns) {
2889  return get_property_value<IntLiteral>(
2890  p, [&df_td](const auto val) { df_td.skipRows = val; });
2891 }
2892 
2894  const NameValueAssign* p,
2895  const std::list<ColumnDescriptor>& columns) {
2896  return get_property_value<StringLiteral>(p, [&td](const auto partitions_uc) {
2897  if (partitions_uc != "SHARDED" && partitions_uc != "REPLICATED") {
2898  throw std::runtime_error("PARTITIONS must be SHARDED or REPLICATED");
2899  }
2900  if (td.shardedColumnId != 0 && partitions_uc == "REPLICATED") {
2901  throw std::runtime_error(
2902  "A table cannot be sharded and replicated at the same time");
2903  };
2904  td.partitions = partitions_uc;
2905  });
2906 }
2908  const NameValueAssign* p,
2909  const std::list<ColumnDescriptor>& columns) {
2910  if (!td.shardedColumnId) {
2911  throw std::runtime_error("SHARD KEY must be defined.");
2912  }
2913  return get_property_value<IntLiteral>(p, [&td](const auto shard_count) {
2914  if (g_leaf_count && shard_count % g_leaf_count) {
2915  throw std::runtime_error(
2916  "SHARD_COUNT must be a multiple of the number of leaves in the cluster.");
2917  }
2918  td.nShards = g_leaf_count ? shard_count / g_leaf_count : shard_count;
2919  if (!td.shardedColumnId && !td.nShards) {
2920  throw std::runtime_error(
2921  "Must specify the number of shards through the SHARD_COUNT option");
2922  };
2923  });
2924 }
2925 
2926 decltype(auto) get_vacuum_def(TableDescriptor& td,
2927  const NameValueAssign* p,
2928  const std::list<ColumnDescriptor>& columns) {
2929  return get_property_value<StringLiteral>(p, [&td](const auto vacuum_uc) {
2930  if (vacuum_uc != "IMMEDIATE" && vacuum_uc != "DELAYED") {
2931  throw std::runtime_error("VACUUM must be IMMEDIATE or DELAYED");
2932  }
2933  td.hasDeletedCol = boost::iequals(vacuum_uc, "IMMEDIATE") ? false : true;
2934  });
2935 }
2936 
2938  const NameValueAssign* p,
2939  const std::list<ColumnDescriptor>& columns) {
2940  return get_property_value<StringLiteral>(p, [&td, &columns](const auto sort_upper) {
2941  td.sortedColumnId = sort_column_index(sort_upper, columns);
2942  if (!td.sortedColumnId) {
2943  throw std::runtime_error("Specified sort column " + sort_upper + " doesn't exist");
2944  }
2945  });
2946 }
2947 
2949  const NameValueAssign* p,
2950  const std::list<ColumnDescriptor>& columns) {
2951  auto assignment = [&td](const auto val) {
2952  td.maxRollbackEpochs =
2953  val < 0 ? -1 : val; // Anything < 0 means unlimited rollbacks. Note that 0
2954  // still means keeping a shadow copy of data/metdata
2955  // between epochs so bad writes can be rolled back
2956  };
2957  return get_property_value<IntLiteral, decltype(assignment), PositiveOrZeroValidate>(
2958  p, assignment);
2959 }
2960 
2961 static const std::map<const std::string, const TableDefFuncPtr> tableDefFuncMap = {
2962  {"fragment_size"s, get_frag_size_def},
2963  {"max_chunk_size"s, get_max_chunk_size_def},
2964  {"page_size"s, get_page_size_def},
2965  {"max_rows"s, get_max_rows_def},
2966  {"partitions"s, get_partions_def},
2967  {"shard_count"s, get_shard_count_def},
2968  {"vacuum"s, get_vacuum_def},
2969  {"sort_column"s, get_sort_column_def},
2970  {"storage_type"s, get_storage_type},
2971  {"max_rollback_epochs", get_max_rollback_epochs_def}};
2972 
2974  const std::unique_ptr<NameValueAssign>& p,
2975  const std::list<ColumnDescriptor>& columns) {
2976  const auto it = tableDefFuncMap.find(boost::to_lower_copy<std::string>(*p->get_name()));
2977  if (it == tableDefFuncMap.end()) {
2978  throw std::runtime_error(
2979  "Invalid CREATE TABLE option " + *p->get_name() +
2980  ". Should be FRAGMENT_SIZE, MAX_CHUNK_SIZE, PAGE_SIZE, MAX_ROLLBACK_EPOCHS, "
2981  "MAX_ROWS, "
2982  "PARTITIONS, SHARD_COUNT, VACUUM, SORT_COLUMN, STORAGE_TYPE.");
2983  }
2984  return it->second(td, p.get(), columns);
2985 }
2986 
2988  const std::unique_ptr<NameValueAssign>& p,
2989  const std::list<ColumnDescriptor>& columns) {
2990  const auto it = tableDefFuncMap.find(boost::to_lower_copy<std::string>(*p->get_name()));
2991  if (it == tableDefFuncMap.end()) {
2992  throw std::runtime_error(
2993  "Invalid CREATE TABLE AS option " + *p->get_name() +
2994  ". Should be FRAGMENT_SIZE, MAX_CHUNK_SIZE, PAGE_SIZE, MAX_ROLLBACK_EPOCHS, "
2995  "MAX_ROWS, "
2996  "PARTITIONS, SHARD_COUNT, VACUUM, SORT_COLUMN, STORAGE_TYPE or "
2997  "USE_SHARED_DICTIONARIES.");
2998  }
2999  return it->second(td, p.get(), columns);
3000 }
3001 
3002 static const std::map<const std::string, const DataframeDefFuncPtr> dataframeDefFuncMap =
3003  {{"fragment_size"s, get_frag_size_dataframe_def},
3004  {"max_chunk_size"s, get_max_chunk_size_dataframe_def},
3005  {"skip_rows"s, get_skip_rows_def},
3006  {"delimiter"s, get_delimiter_def},
3007  {"header"s, get_header_def}};
3008 
3010  const std::unique_ptr<NameValueAssign>& p,
3011  const std::list<ColumnDescriptor>& columns) {
3012  const auto it =
3013  dataframeDefFuncMap.find(boost::to_lower_copy<std::string>(*p->get_name()));
3014  if (it == dataframeDefFuncMap.end()) {
3015  throw std::runtime_error(
3016  "Invalid CREATE DATAFRAME option " + *p->get_name() +
3017  ". Should be FRAGMENT_SIZE, MAX_CHUNK_SIZE, SKIP_ROWS, DELIMITER or HEADER.");
3018  }
3019  return it->second(df_td, p.get(), columns);
3020 }
3021 
3022 std::unique_ptr<ColumnDef> column_from_json(const rapidjson::Value& element) {
3023  CHECK(element.HasMember("name"));
3024  auto col_name = std::make_unique<std::string>(json_str(element["name"]));
3025  CHECK(element.HasMember("sqltype"));
3026  const auto sql_types = to_sql_type(json_str(element["sqltype"]));
3027 
3028  // decimal / numeric precision / scale
3029  int precision = -1;
3030  int scale = -1;
3031  if (element.HasMember("precision")) {
3032  precision = json_i64(element["precision"]);
3033  }
3034  if (element.HasMember("scale")) {
3035  scale = json_i64(element["scale"]);
3036  }
3037 
3038  std::optional<int64_t> array_size;
3039  if (element.HasMember("arraySize")) {
3040  // We do not yet support geo arrays
3041  array_size = json_i64(element["arraySize"]);
3042  }
3043  std::unique_ptr<SQLType> sql_type;
3044  if (element.HasMember("subtype")) {
3045  CHECK(element.HasMember("coordinateSystem"));
3046  const auto subtype_sql_types = to_sql_type(json_str(element["subtype"]));
3047  sql_type =
3048  std::make_unique<SQLType>(subtype_sql_types,
3049  static_cast<int>(sql_types),
3050  static_cast<int>(json_i64(element["coordinateSystem"])),
3051  false);
3052  } else if (precision > 0 && scale > 0) {
3053  sql_type = std::make_unique<SQLType>(sql_types,
3054  precision,
3055  scale,
3056  /*is_array=*/array_size.has_value(),
3057  array_size ? *array_size : -1);
3058  } else if (precision > 0) {
3059  sql_type = std::make_unique<SQLType>(sql_types,
3060  precision,
3061  0,
3062  /*is_array=*/array_size.has_value(),
3063  array_size ? *array_size : -1);
3064  } else {
3065  sql_type = std::make_unique<SQLType>(sql_types,
3066  /*is_array=*/array_size.has_value(),
3067  array_size ? *array_size : -1);
3068  }
3069  CHECK(sql_type);
3070 
3071  CHECK(element.HasMember("nullable"));
3072  const auto nullable = json_bool(element["nullable"]);
3073  std::unique_ptr<ColumnConstraintDef> constraint_def;
3074  StringLiteral* str_literal = nullptr;
3075  if (element.HasMember("default") && !element["default"].IsNull()) {
3076  std::string* defaultval = new std::string(json_str(element["default"]));
3077  boost::algorithm::trim_if(*defaultval, boost::is_any_of(" \"'`"));
3078  str_literal = new StringLiteral(defaultval);
3079  }
3080 
3081  constraint_def = std::make_unique<ColumnConstraintDef>(/*notnull=*/!nullable,
3082  /*unique=*/false,
3083  /*primarykey=*/false,
3084  /*defaultval=*/str_literal);
3085  std::unique_ptr<CompressDef> compress_def;
3086  if (element.HasMember("encodingType") && !element["encodingType"].IsNull()) {
3087  std::string encoding_type = json_str(element["encodingType"]);
3088  CHECK(element.HasMember("encodingSize"));
3089  auto encoding_name = std::make_unique<std::string>(json_str(element["encodingType"]));
3090  compress_def = std::make_unique<CompressDef>(encoding_name.release(),
3091  json_i64(element["encodingSize"]));
3092  }
3093  return std::make_unique<ColumnDef>(col_name.release(),
3094  sql_type.release(),
3095  compress_def ? compress_def.release() : nullptr,
3096  constraint_def ? constraint_def.release() : nullptr);
3097 }
3098 
3099 void parse_elements(const rapidjson::Value& payload,
3100  std::string element_name,
3101  std::string& table_name,
3102  std::list<std::unique_ptr<TableElement>>& table_element_list) {
3103  const auto elements = payload[element_name].GetArray();
3104  for (const auto& element : elements) {
3105  CHECK(element.IsObject());
3106  CHECK(element.HasMember("type"));
3107  if (json_str(element["type"]) == "SQL_COLUMN_DECLARATION") {
3108  auto col_def = column_from_json(element);
3109  table_element_list.emplace_back(std::move(col_def));
3110  } else if (json_str(element["type"]) == "SQL_COLUMN_CONSTRAINT") {
3111  CHECK(element.HasMember("name"));
3112  if (json_str(element["name"]) == "SHARD_KEY") {
3113  CHECK(element.HasMember("columns"));
3114  CHECK(element["columns"].IsArray());
3115  const auto& columns = element["columns"].GetArray();
3116  if (columns.Size() != size_t(1)) {
3117  throw std::runtime_error("Only one shard column is currently supported.");
3118  }
3119  auto shard_key_def = std::make_unique<ShardKeyDef>(json_str(columns[0]));
3120  table_element_list.emplace_back(std::move(shard_key_def));
3121  } else if (json_str(element["name"]) == "SHARED_DICT") {
3122  CHECK(element.HasMember("columns"));
3123  CHECK(element["columns"].IsArray());
3124  const auto& columns = element["columns"].GetArray();
3125  if (columns.Size() != size_t(1)) {
3126  throw std::runtime_error(
3127  R"(Only one column per shared dictionary entry is currently supported. Use multiple SHARED DICT statements to share dictionaries from multiple columns.)");
3128  }
3129  CHECK(element.HasMember("references") && element["references"].IsObject());
3130  const auto& references = element["references"].GetObject();
3131  std::string references_table_name;
3132  if (references.HasMember("table")) {
3133  references_table_name = json_str(references["table"]);
3134  } else {
3135  references_table_name = table_name;
3136  }
3137  CHECK(references.HasMember("column"));
3138 
3139  auto shared_dict_def = std::make_unique<SharedDictionaryDef>(
3140  json_str(columns[0]), references_table_name, json_str(references["column"]));
3141  table_element_list.emplace_back(std::move(shared_dict_def));
3142 
3143  } else {
3144  LOG(FATAL) << "Unsupported type for SQL_COLUMN_CONSTRAINT: "
3145  << json_str(element["name"]);
3146  }
3147  } else {
3148  LOG(FATAL) << "Unsupported element type for CREATE TABLE: "
3149  << element["type"].GetString();
3150  }
3151  }
3152 }
3153 } // namespace
3154 
3155 CreateTableStmt::CreateTableStmt(const rapidjson::Value& payload) {
3156  CHECK(payload.HasMember("name"));
3157  table_ = std::make_unique<std::string>(json_str(payload["name"]));
3158  CHECK(payload.HasMember("elements"));
3159  CHECK(payload["elements"].IsArray());
3160 
3161  is_temporary_ = false;
3162  if (payload.HasMember("temporary")) {
3163  is_temporary_ = json_bool(payload["temporary"]);
3164  }
3165 
3166  if_not_exists_ = false;
3167  if (payload.HasMember("ifNotExists")) {
3168  if_not_exists_ = json_bool(payload["ifNotExists"]);
3169  }
3170 
3171  parse_elements(payload, "elements", *table_, table_element_list_);
3172 
3173  parse_options(payload, storage_options_);
3174 }
3175 
3177  TableDescriptor& td,
3178  std::list<ColumnDescriptor>& columns,
3179  std::vector<SharedDictionaryDef>& shared_dict_defs) {
3180  std::unordered_set<std::string> uc_col_names;
3181  const auto& catalog = session.getCatalog();
3182  const ShardKeyDef* shard_key_def{nullptr};
3183  for (auto& e : table_element_list_) {
3184  if (dynamic_cast<SharedDictionaryDef*>(e.get())) {
3185  auto shared_dict_def = static_cast<SharedDictionaryDef*>(e.get());
3187  this, shared_dict_def, columns, shared_dict_defs, catalog);
3188  shared_dict_defs.push_back(*shared_dict_def);
3189  continue;
3190  }
3191  if (dynamic_cast<ShardKeyDef*>(e.get())) {
3192  if (shard_key_def) {
3193  throw std::runtime_error("Specified more than one shard key");
3194  }
3195  shard_key_def = static_cast<const ShardKeyDef*>(e.get());
3196  continue;
3197  }
3198  if (!dynamic_cast<ColumnDef*>(e.get())) {
3199  throw std::runtime_error("Table constraints are not supported yet.");
3200  }
3201  ColumnDef* coldef = static_cast<ColumnDef*>(e.get());
3202  ColumnDescriptor cd;
3203  cd.columnName = *coldef->get_column_name();
3205  setColumnDescriptor(cd, coldef);
3206  columns.push_back(cd);
3207  }
3208 
3209  ddl_utils::set_default_table_attributes(*table_, td, columns.size());
3210 
3211  if (shard_key_def) {
3212  td.shardedColumnId = shard_column_index(shard_key_def->get_column(), columns);
3213  if (!td.shardedColumnId) {
3214  throw std::runtime_error("Specified shard column " + shard_key_def->get_column() +
3215  " doesn't exist");
3216  }
3217  }
3218  if (is_temporary_) {
3220  } else {
3222  }
3223  if (!storage_options_.empty()) {
3224  for (auto& p : storage_options_) {
3225  get_table_definitions(td, p, columns);
3226  }
3227  }
3228  if (td.shardedColumnId && !td.nShards) {
3229  throw std::runtime_error("SHARD_COUNT needs to be specified with SHARD_KEY.");
3230  }
3231  td.keyMetainfo = serialize_key_metainfo(shard_key_def, shared_dict_defs);
3232 }
3233 
3235  auto& catalog = session.getCatalog();
3236 
3237  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
3240 
3241  // check access privileges
3244  throw std::runtime_error("Table " + *table_ +
3245  " will not be created. User has no create privileges.");
3246  }
3247 
3248  if (!catalog.validateNonExistentTableOrView(*table_, if_not_exists_)) {
3249  return;
3250  }
3251 
3252  TableDescriptor td;
3253  std::list<ColumnDescriptor> columns;
3254  std::vector<SharedDictionaryDef> shared_dict_defs;
3255 
3256  executeDryRun(session, td, columns, shared_dict_defs);
3257  td.userId = session.get_currentUser().userId;
3258 
3259  catalog.createShardedTable(td, columns, shared_dict_defs);
3260  // TODO (max): It's transactionally unsafe, should be fixed: we may create object w/o
3261  // privileges
3262  SysCatalog::instance().createDBObject(
3263  session.get_currentUser(), td.tableName, TableDBObjectType, catalog);
3264 }
3265 
3266 CreateDataframeStmt::CreateDataframeStmt(const rapidjson::Value& payload) {
3267  CHECK(payload.HasMember("name"));
3268  table_ = std::make_unique<std::string>(json_str(payload["name"]));
3269 
3270  CHECK(payload.HasMember("elementList"));
3271  parse_elements(payload, "elementList", *table_, table_element_list_);
3272 
3273  CHECK(payload.HasMember("filePath"));
3274  std::string fs = json_str(payload["filePath"]);
3275  // strip leading/trailing spaces/quotes/single quotes
3276  boost::algorithm::trim_if(fs, boost::is_any_of(" \"'`"));
3277  filename_ = std::make_unique<std::string>(fs);
3278 
3279  parse_options(payload, storage_options_);
3280 }
3281 
3283  auto& catalog = session.getCatalog();
3284 
3285  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
3288 
3289  // check access privileges
3292  throw std::runtime_error("Table " + *table_ +
3293  " will not be created. User has no create privileges.");
3294  }
3295 
3296  if (catalog.getMetadataForTable(*table_) != nullptr) {
3297  throw std::runtime_error("Table " + *table_ + " already exists.");
3298  }
3300  std::list<ColumnDescriptor> columns;
3301  std::vector<SharedDictionaryDef> shared_dict_defs;
3302 
3303  std::unordered_set<std::string> uc_col_names;
3304  for (auto& e : table_element_list_) {
3305  if (dynamic_cast<SharedDictionaryDef*>(e.get())) {
3306  auto shared_dict_def = static_cast<SharedDictionaryDef*>(e.get());
3308  this, shared_dict_def, columns, shared_dict_defs, catalog);
3309  shared_dict_defs.push_back(*shared_dict_def);
3310  continue;
3311  }
3312  if (!dynamic_cast<ColumnDef*>(e.get())) {
3313  throw std::runtime_error("Table constraints are not supported yet.");
3314  }
3315  ColumnDef* coldef = static_cast<ColumnDef*>(e.get());
3316  ColumnDescriptor cd;
3317  cd.columnName = *coldef->get_column_name();
3318  const auto uc_col_name = boost::to_upper_copy<std::string>(cd.columnName);
3319  const auto it_ok = uc_col_names.insert(uc_col_name);
3320  if (!it_ok.second) {
3321  throw std::runtime_error("Column '" + cd.columnName + "' defined more than once");
3322  }
3323  setColumnDescriptor(cd, coldef);
3324  columns.push_back(cd);
3325  }
3326 
3327  df_td.tableName = *table_;
3328  df_td.nColumns = columns.size();
3329  df_td.isView = false;
3330  df_td.fragmenter = nullptr;
3335  df_td.maxRows = DEFAULT_MAX_ROWS;
3337  if (!storage_options_.empty()) {
3338  for (auto& p : storage_options_) {
3339  get_dataframe_definitions(df_td, p, columns);
3340  }
3341  }
3342  df_td.keyMetainfo = serialize_key_metainfo(nullptr, shared_dict_defs);
3343  df_td.userId = session.get_currentUser().userId;
3344  df_td.storageType = *filename_;
3345 
3346  catalog.createShardedTable(df_td, columns, shared_dict_defs);
3347  // TODO (max): It's transactionally unsafe, should be fixed: we may create object w/o
3348  // privileges
3349  SysCatalog::instance().createDBObject(
3350  session.get_currentUser(), df_td.tableName, TableDBObjectType, catalog);
3351 }
3352 
3353 std::shared_ptr<ResultSet> getResultSet(QueryStateProxy query_state_proxy,
3354  const std::string select_stmt,
3355  std::vector<TargetMetaInfo>& targets,
3356  bool validate_only = false,
3357  std::vector<size_t> outer_fragment_indices = {},
3358  bool allow_interrupt = false) {
3359  auto const session = query_state_proxy.getQueryState().getConstSessionInfo();
3360  auto& catalog = session->getCatalog();
3361 
3363 #ifdef HAVE_CUDA
3364  const auto device_type = session->get_executor_device_type();
3365 #else
3366  const auto device_type = ExecutorDeviceType::CPU;
3367 #endif // HAVE_CUDA
3368  auto calcite_mgr = catalog.getCalciteMgr();
3369 
3370  // TODO MAT this should actually get the global or the session parameter for
3371  // view optimization
3372  const auto calciteQueryParsingOption =
3373  calcite_mgr->getCalciteQueryParsingOption(true, false, true);
3374  const auto calciteOptimizationOption =
3375  calcite_mgr->getCalciteOptimizationOption(false, g_enable_watchdog, {});
3376  const auto query_ra = calcite_mgr
3377  ->process(query_state_proxy,
3378  pg_shim(select_stmt),
3379  calciteQueryParsingOption,
3380  calciteOptimizationOption)
3381  .plan_result;
3382  RelAlgExecutor ra_executor(executor.get(),
3383  catalog,
3384  query_ra,
3385  query_state_proxy.getQueryState().shared_from_this());
3387  // TODO(adb): Need a better method of dropping constants into this ExecutionOptions
3388  // struct
3389  ExecutionOptions eo = {false,
3390  false,
3391  true,
3392  false,
3393  true,
3394  false,
3395  false,
3396  validate_only,
3397  false,
3398  10000,
3399  false,
3400  false,
3401  1000,
3402  allow_interrupt,
3406  outer_fragment_indices};
3407 
3408  ExecutionResult result{std::make_shared<ResultSet>(std::vector<TargetInfo>{},
3411  nullptr,
3412  nullptr,
3413  0,
3414  0),
3415  {}};
3416  result = ra_executor.executeRelAlgQuery(co, eo, false, nullptr);
3417  targets = result.getTargetsMeta();
3418 
3419  return result.getRows();
3420 }
3421 
3423  std::string& sql_query_string) {
3424  auto const session = query_state_proxy.getQueryState().getConstSessionInfo();
3425  auto& catalog = session->getCatalog();
3426 
3428 #ifdef HAVE_CUDA
3429  const auto device_type = session->get_executor_device_type();
3430 #else
3431  const auto device_type = ExecutorDeviceType::CPU;
3432 #endif // HAVE_CUDA
3433  auto calcite_mgr = catalog.getCalciteMgr();
3434 
3435  // TODO MAT this should actually get the global or the session parameter for
3436  // view optimization
3437  const auto calciteQueryParsingOption =
3438  calcite_mgr->getCalciteQueryParsingOption(true, false, true);
3439  const auto calciteOptimizationOption =
3440  calcite_mgr->getCalciteOptimizationOption(false, g_enable_watchdog, {});
3441  const auto query_ra = calcite_mgr
3442  ->process(query_state_proxy,
3443  pg_shim(sql_query_string),
3444  calciteQueryParsingOption,
3445  calciteOptimizationOption)
3446  .plan_result;
3447  RelAlgExecutor ra_executor(executor.get(), catalog, query_ra);
3448  CompilationOptions co = {device_type, true, ExecutorOptLevel::Default, false};
3449  // TODO(adb): Need a better method of dropping constants into this ExecutionOptions
3450  // struct
3451  ExecutionOptions eo = {false,
3452  false,
3453  true,
3454  false,
3455  true,
3456  false,
3457  false,
3458  false,
3459  false,
3460  10000,
3461  false,
3462  false,
3463  0.9,
3464  false};
3465  return ra_executor.getOuterFragmentCount(co, eo);
3466 }
3467 
3469  std::string& sql_query_string,
3470  std::vector<size_t> outer_frag_indices,
3471  bool validate_only,
3472  bool allow_interrupt) {
3473  // TODO(PS): Should we be using the shimmed query in getResultSet?
3474  std::string pg_shimmed_select_query = pg_shim(sql_query_string);
3475 
3476  std::vector<TargetMetaInfo> target_metainfos;
3478  auto const session = query_state_proxy.getQueryState().getConstSessionInfo();
3479  auto query_session = session ? session->get_session_id() : "";
3480  auto query_submitted_time = query_state_proxy.getQueryState().getQuerySubmittedTime();
3481  if (allow_interrupt && !validate_only && !query_session.empty()) {
3482  executor->enrollQuerySession(query_session,
3483  sql_query_string,
3484  query_submitted_time,
3486  QuerySessionStatus::QueryStatus::PENDING_EXECUTOR);
3487  }
3488  auto result_rows = getResultSet(query_state_proxy,
3489  sql_query_string,
3490  target_metainfos,
3491  validate_only,
3492  outer_frag_indices,
3493  allow_interrupt);
3494  AggregatedResult res = {result_rows, target_metainfos};
3495  return res;
3496 }
3497 
3498 std::vector<AggregatedResult> LocalQueryConnector::query(
3499  QueryStateProxy query_state_proxy,
3500  std::string& sql_query_string,
3501  std::vector<size_t> outer_frag_indices,
3502  bool allow_interrupt) {
3503  auto res = query(
3504  query_state_proxy, sql_query_string, outer_frag_indices, false, allow_interrupt);
3505  return {res};
3506 }
3507 
3508 std::list<ColumnDescriptor> LocalQueryConnector::getColumnDescriptors(
3510  bool for_create) {
3511  std::list<ColumnDescriptor> column_descriptors;
3512  std::list<ColumnDescriptor> column_descriptors_for_create;
3513 
3514  int rowid_suffix = 0;
3515  for (const auto& target_metainfo : result.targets_meta) {
3516  ColumnDescriptor cd;
3517  cd.columnName = target_metainfo.get_resname();
3518  if (cd.columnName == "rowid") {
3519  cd.columnName += std::to_string(rowid_suffix++);
3520  }
3521  cd.columnType = target_metainfo.get_physical_type_info();
3522 
3523  ColumnDescriptor cd_for_create = cd;
3524 
3526  // we need to reset the comp param (as this points to the actual dictionary)
3527  if (cd.columnType.is_array()) {
3528  // for dict encoded arrays, it is always 4 bytes
3529  cd_for_create.columnType.set_comp_param(32);
3530  } else {
3531  cd_for_create.columnType.set_comp_param(cd.columnType.get_size() * 8);
3532  }
3533  }
3534 
3535  if (cd.columnType.is_date() && !cd.columnType.is_date_in_days()) {
3536  // default to kENCODING_DATE_IN_DAYS encoding
3538  cd_for_create.columnType.set_comp_param(0);
3539  }
3540 
3541  column_descriptors_for_create.push_back(cd_for_create);
3542  column_descriptors.push_back(cd);
3543  }
3544 
3545  if (for_create) {
3546  return column_descriptors_for_create;
3547  }
3548 
3549  return column_descriptors;
3550 }
3551 
3553  const rapidjson::Value& payload) {
3554  CHECK(payload.HasMember("name"));
3555  table_name_ = json_str(payload["name"]);
3556 
3557  CHECK(payload.HasMember("query"));
3558  select_query_ = json_str(payload["query"]);
3559 
3560  boost::replace_all(select_query_, "\n", " ");
3561  select_query_ = "(" + select_query_ + ")";
3562 
3563  if (payload.HasMember("columns")) {
3564  CHECK(payload["columns"].IsArray());
3565  for (auto& column : payload["columns"].GetArray()) {
3566  std::string s = json_str(column);
3567  column_list_.emplace_back(std::unique_ptr<std::string>(new std::string(s)));
3568  }
3569  }
3570 }
3571 
3573  const TableDescriptor* td,
3574  bool validate_table,
3575  bool for_CTAS) {
3576  auto const session = query_state_proxy.getQueryState().getConstSessionInfo();
3577  auto& catalog = session->getCatalog();
3579 
3580  LocalQueryConnector local_connector;
3581  bool populate_table = false;
3582 
3583  if (leafs_connector_) {
3584  populate_table = true;
3585  } else {
3586  leafs_connector_ = &local_connector;
3587  if (!g_cluster) {
3588  populate_table = true;
3589  }
3590  }
3591 
3592  auto get_target_column_descriptors = [this, &catalog](const TableDescriptor* td) {
3593  std::vector<const ColumnDescriptor*> target_column_descriptors;
3594  if (column_list_.empty()) {
3595  auto list = catalog.getAllColumnMetadataForTable(td->tableId, false, false, false);
3596  target_column_descriptors = {std::begin(list), std::end(list)};
3597  } else {
3598  for (auto& c : column_list_) {
3599  const ColumnDescriptor* cd = catalog.getMetadataForColumn(td->tableId, *c);
3600  if (cd == nullptr) {
3601  throw std::runtime_error("Column " + *c + " does not exist.");
3602  }
3603  target_column_descriptors.push_back(cd);
3604  }
3605  }
3606 
3607  return target_column_descriptors;
3608  };
3609 
3610  bool is_temporary = table_is_temporary(td);
3611 
3612  if (validate_table) {
3613  // check access privileges
3614  if (!td) {
3615  throw std::runtime_error("Table " + table_name_ + " does not exist.");
3616  }
3617  if (td->isView) {
3618  throw std::runtime_error("Insert to views is not supported yet.");
3619  }
3620 
3621  if (!session->checkDBAccessPrivileges(DBObjectType::TableDBObjectType,
3623  table_name_)) {
3624  throw std::runtime_error("User has no insert privileges on " + table_name_ + ".");
3625  }
3626 
3627  // only validate the select query so we get the target types
3628  // correctly, but do not populate the result set
3629  auto result = local_connector.query(query_state_proxy, select_query_, {}, true, true);
3630  auto source_column_descriptors = local_connector.getColumnDescriptors(result, false);
3631 
3632  std::vector<const ColumnDescriptor*> target_column_descriptors =
3633  get_target_column_descriptors(td);
3634 
3635  if (source_column_descriptors.size() != target_column_descriptors.size()) {
3636  throw std::runtime_error("The number of source and target columns does not match.");
3637  }
3638 
3639  for (int i = 0; i < source_column_descriptors.size(); i++) {
3640  const ColumnDescriptor* source_cd =
3641  &(*std::next(source_column_descriptors.begin(), i));
3642  const ColumnDescriptor* target_cd = target_column_descriptors.at(i);
3643 
3644  if (source_cd->columnType.get_type() != target_cd->columnType.get_type()) {
3645  auto type_cannot_be_cast = [](const auto& col_type) {
3646  return (col_type.is_time() || col_type.is_geometry() || col_type.is_array() ||
3647  col_type.is_boolean());
3648  };
3649 
3650  if (type_cannot_be_cast(source_cd->columnType) ||
3651  type_cannot_be_cast(target_cd->columnType)) {
3652  throw std::runtime_error("Source '" + source_cd->columnName + " " +
3653  source_cd->columnType.get_type_name() +
3654  "' and target '" + target_cd->columnName + " " +
3655  target_cd->columnType.get_type_name() +
3656  "' column types do not match.");
3657  }
3658  }
3659  if (source_cd->columnType.is_array()) {
3660  if (source_cd->columnType.get_subtype() != target_cd->columnType.get_subtype()) {
3661  throw std::runtime_error("Source '" + source_cd->columnName + " " +
3662  source_cd->columnType.get_type_name() +
3663  "' and target '" + target_cd->columnName + " " +
3664  target_cd->columnType.get_type_name() +
3665  "' array column element types do not match.");
3666  }
3667  }
3668 
3669  if (source_cd->columnType.is_decimal() ||
3670  source_cd->columnType.get_elem_type().is_decimal()) {
3671  SQLTypeInfo sourceType = source_cd->columnType;
3672  SQLTypeInfo targetType = target_cd->columnType;
3673 
3674  if (source_cd->columnType.is_array()) {
3675  sourceType = source_cd->columnType.get_elem_type();
3676  targetType = target_cd->columnType.get_elem_type();
3677  }
3678 
3679  if (sourceType.get_scale() != targetType.get_scale()) {
3680  throw std::runtime_error("Source '" + source_cd->columnName + " " +
3681  source_cd->columnType.get_type_name() +
3682  "' and target '" + target_cd->columnName + " " +
3683  target_cd->columnType.get_type_name() +
3684  "' decimal columns scales do not match.");
3685  }
3686  }
3687 
3688  if (source_cd->columnType.is_string()) {
3689  if (!target_cd->columnType.is_string()) {
3690  throw std::runtime_error("Source '" + source_cd->columnName + " " +
3691  source_cd->columnType.get_type_name() +
3692  "' and target '" + target_cd->columnName + " " +
3693  target_cd->columnType.get_type_name() +
3694  "' column types do not match.");
3695  }
3696  if (source_cd->columnType.get_compression() !=
3697  target_cd->columnType.get_compression()) {
3698  throw std::runtime_error("Source '" + source_cd->columnName + " " +
3699  source_cd->columnType.get_type_name() +
3700  "' and target '" + target_cd->columnName + " " +
3701  target_cd->columnType.get_type_name() +
3702  "' columns string encodings do not match.");
3703  }
3704  }
3705 
3706  if (source_cd->columnType.is_timestamp() && target_cd->columnType.is_timestamp()) {
3707  if (source_cd->columnType.get_dimension() !=
3708  target_cd->columnType.get_dimension()) {
3709  throw std::runtime_error("Source '" + source_cd->columnName + " " +
3710  source_cd->columnType.get_type_name() +
3711  "' and target '" + target_cd->columnName + " " +
3712  target_cd->columnType.get_type_name() +
3713  "' timestamp column precisions do not match.");
3714  }
3715  }
3716 
3717  if (!source_cd->columnType.is_string() && !source_cd->columnType.is_geometry() &&
3718  !source_cd->columnType.is_integer() && !source_cd->columnType.is_decimal() &&
3719  !source_cd->columnType.is_date() && !source_cd->columnType.is_time() &&
3720  !source_cd->columnType.is_timestamp() &&
3721  source_cd->columnType.get_size() > target_cd->columnType.get_size()) {
3722  throw std::runtime_error("Source '" + source_cd->columnName + " " +
3723  source_cd->columnType.get_type_name() +
3724  "' and target '" + target_cd->columnName + " " +
3725  target_cd->columnType.get_type_name() +
3726  "' column encoding sizes do not match.");
3727  }
3728  }
3729  }
3730 
3731  if (!populate_table) {
3732  return;
3733  }
3734 
3735  int64_t total_row_count = 0;
3736  int64_t total_source_query_time_ms = 0;
3737  int64_t total_target_value_translate_time_ms = 0;
3738  int64_t total_data_load_time_ms = 0;
3739 
3741  auto target_column_descriptors = get_target_column_descriptors(td);
3742  auto outer_frag_count =
3744 
3745  size_t outer_frag_end = outer_frag_count == 0 ? 1 : outer_frag_count;
3746  auto query_session = session ? session->get_session_id() : "";
3748  std::string work_type_str = for_CTAS ? "CTAS" : "ITAS";
3749  try {
3750  for (size_t outer_frag_idx = 0; outer_frag_idx < outer_frag_end; outer_frag_idx++) {
3751  std::vector<size_t> allowed_outer_fragment_indices;
3752 
3753  if (outer_frag_count) {
3754  allowed_outer_fragment_indices.push_back(outer_frag_idx);
3755  }
3756 
3757  const auto query_clock_begin = timer_start();
3758  std::vector<AggregatedResult> query_results =
3759  leafs_connector_->query(query_state_proxy,
3760  select_query_,
3761  allowed_outer_fragment_indices,
3763  total_source_query_time_ms += timer_stop(query_clock_begin);
3764 
3765  auto start_time = query_state_proxy.getQueryState().getQuerySubmittedTime();
3766  auto query_str = "INSERT_DATA for " + work_type_str;
3768  // In the clean-up phase of the query execution for collecting aggregated result
3769  // of SELECT query, we remove its query session info, so we need to enroll the
3770  // session info again
3771  executor->enrollQuerySession(query_session,
3772  query_str,
3773  start_time,
3775  QuerySessionStatus::QueryStatus::RUNNING_IMPORTER);
3776  }
3777 
3778  ScopeGuard clearInterruptStatus = [executor, &query_session, &start_time] {
3779  // this data population is non-kernel operation, so we manually cleanup
3780  // the query session info in the cleanup phase
3782  executor->clearQuerySessionStatus(query_session, start_time);
3783  }
3784  };
3785 
3786  for (auto& res : query_results) {
3787  if (UNLIKELY(check_session_interrupted(query_session, executor))) {
3788  throw std::runtime_error(
3789  "Query execution has been interrupted while performing " + work_type_str);
3790  }
3791  auto& result_rows = res.rs;
3792  result_rows->setGeoReturnType(ResultSet::GeoReturnType::GeoTargetValue);
3793  const auto num_rows = result_rows->rowCount();
3794 
3795  if (0 == num_rows) {
3796  continue;
3797  }
3798 
3799  total_row_count += num_rows;
3800 
3801  size_t leaf_count = leafs_connector_->leafCount();
3802 
3803  // ensure that at least 1 row is processed per block up to a maximum of 65536 rows
3804  const size_t rows_per_block =
3805  std::max(std::min(num_rows / leaf_count, size_t(64 * 1024)), size_t(1));
3806 
3807  std::vector<std::unique_ptr<TargetValueConverter>> value_converters;
3808 
3810 
3811  const int num_worker_threads = std::thread::hardware_concurrency();
3812 
3813  std::vector<size_t> thread_start_idx(num_worker_threads),
3814  thread_end_idx(num_worker_threads);
3815  bool can_go_parallel = !result_rows->isTruncated() && rows_per_block > 20000;
3816 
3817  std::atomic<size_t> crt_row_idx{0};
3818 
3819  auto do_work = [&result_rows, &value_converters, &crt_row_idx](
3820  const size_t idx,
3821  const size_t block_end,
3822  const size_t num_cols,
3823  const size_t thread_id,
3824  bool& stop_convert) {
3825  const auto result_row = result_rows->getRowAtNoTranslations(idx);
3826  if (!result_row.empty()) {
3827  size_t target_row = crt_row_idx.fetch_add(1);
3828  if (target_row >= block_end) {
3829  stop_convert = true;
3830  return;
3831  }
3832  for (unsigned int col = 0; col < num_cols; col++) {
3833  const auto& mapd_variant = result_row[col];
3834  value_converters[col]->convertToColumnarFormat(target_row, &mapd_variant);
3835  }
3836  }
3837  };
3838 
3839  auto convert_function = [&thread_start_idx,
3840  &thread_end_idx,
3841  &value_converters,
3842  &executor,
3843  &query_session,
3844  &work_type_str,
3845  &do_work](const int thread_id, const size_t block_end) {
3846  const int num_cols = value_converters.size();
3847  const size_t start = thread_start_idx[thread_id];
3848  const size_t end = thread_end_idx[thread_id];
3849  size_t idx = 0;
3850  bool stop_convert = false;
3852  size_t local_idx = 0;
3853  for (idx = start; idx < end; ++idx, ++local_idx) {
3854  if (UNLIKELY((local_idx & 0xFFFF) == 0 &&
3855  check_session_interrupted(query_session, executor))) {
3856  throw std::runtime_error(
3857  "Query execution has been interrupted while performing " +
3858  work_type_str);
3859  }
3860  do_work(idx, block_end, num_cols, thread_id, stop_convert);
3861  if (stop_convert) {
3862  break;
3863  }
3864  }
3865  } else {
3866  for (idx = start; idx < end; ++idx) {
3867  do_work(idx, block_end, num_cols, thread_id, stop_convert);
3868  if (stop_convert) {
3869  break;
3870  }
3871  }
3872  }
3873  thread_start_idx[thread_id] = idx;
3874  };
3875 
3876  auto single_threaded_value_converter =
3877  [&crt_row_idx, &value_converters, &result_rows](const size_t idx,
3878  const size_t block_end,
3879  const size_t num_cols,
3880  bool& stop_convert) {
3881  size_t target_row = crt_row_idx.fetch_add(1);
3882  if (target_row >= block_end) {
3883  stop_convert = true;
3884  return;
3885  }
3886  const auto result_row = result_rows->getNextRow(false, false);
3887  CHECK(!result_row.empty());
3888  for (unsigned int col = 0; col < num_cols; col++) {
3889  const auto& mapd_variant = result_row[col];
3890  value_converters[col]->convertToColumnarFormat(target_row, &mapd_variant);
3891  }
3892  };
3893 
3894  auto single_threaded_convert_function = [&value_converters,
3895  &thread_start_idx,
3896  &thread_end_idx,
3897  &executor,
3898  &query_session,
3899  &work_type_str,
3900  &single_threaded_value_converter](
3901  const int thread_id,
3902  const size_t block_end) {
3903  const int num_cols = value_converters.size();
3904  const size_t start = thread_start_idx[thread_id];
3905  const size_t end = thread_end_idx[thread_id];
3906  size_t idx = 0;
3907  bool stop_convert = false;
3909  size_t local_idx = 0;
3910  for (idx = start; idx < end; ++idx, ++local_idx) {
3911  if (UNLIKELY((local_idx & 0xFFFF) == 0 &&
3912  check_session_interrupted(query_session, executor))) {
3913  throw std::runtime_error(
3914  "Query execution has been interrupted while performing " +
3915  work_type_str);
3916  }
3917  single_threaded_value_converter(idx, block_end, num_cols, stop_convert);
3918  if (stop_convert) {
3919  break;
3920  }
3921  }
3922  } else {
3923  for (idx = start; idx < end; ++idx) {
3924  single_threaded_value_converter(idx, end, num_cols, stop_convert);
3925  if (stop_convert) {
3926  break;
3927  }
3928  }
3929  }
3930  thread_start_idx[thread_id] = idx;
3931  };
3932 
3933  if (can_go_parallel) {
3934  const size_t entry_count = result_rows->entryCount();
3935  for (size_t
3936  i = 0,
3937  start_entry = 0,
3938  stride = (entry_count + num_worker_threads - 1) / num_worker_threads;
3939  i < num_worker_threads && start_entry < entry_count;
3940  ++i, start_entry += stride) {
3941  const auto end_entry = std::min(start_entry + stride, entry_count);
3942  thread_start_idx[i] = start_entry;
3943  thread_end_idx[i] = end_entry;
3944  }
3945  } else {
3946  thread_start_idx[0] = 0;
3947  thread_end_idx[0] = result_rows->entryCount();
3948  }
3949 
3950  RenderGroupAnalyzerMap render_group_analyzer_map;
3951 
3952  for (size_t block_start = 0; block_start < num_rows;
3953  block_start += rows_per_block) {
3954  const auto num_rows_this_itr = block_start + rows_per_block < num_rows
3955  ? rows_per_block
3956  : num_rows - block_start;
3957  crt_row_idx = 0; // reset block tracker
3958  value_converters.clear();
3959  int colNum = 0;
3960  for (const auto targetDescriptor : target_column_descriptors) {
3961  auto sourceDataMetaInfo = res.targets_meta[colNum++];
3963  num_rows_this_itr,
3964  catalog,
3965  sourceDataMetaInfo,
3966  targetDescriptor,
3967  targetDescriptor->columnType,
3968  !targetDescriptor->columnType.get_notnull(),
3969  result_rows->getRowSetMemOwner()->getLiteralStringDictProxy(),
3971  sourceDataMetaInfo.get_type_info().is_dict_encoded_string()
3972  ? executor->getStringDictionaryProxy(
3973  sourceDataMetaInfo.get_type_info().get_comp_param(),
3974  result_rows->getRowSetMemOwner(),
3975  true)
3976  : nullptr,
3977  IS_GEO_POLY(targetDescriptor->columnType.get_type()) &&
3979  ? &render_group_analyzer_map
3980  : nullptr};
3981  auto converter = factory.create(param);
3982  value_converters.push_back(std::move(converter));
3983  }
3984 
3985  const auto translate_clock_begin = timer_start();
3986  if (can_go_parallel) {
3987  std::vector<std::future<void>> worker_threads;
3988  for (int i = 0; i < num_worker_threads; ++i) {
3989  worker_threads.push_back(
3990  std::async(std::launch::async, convert_function, i, num_rows_this_itr));
3991  }
3992 
3993  for (auto& child : worker_threads) {
3994  child.wait();
3995  }
3996  for (auto& child : worker_threads) {
3997  child.get();
3998  }
3999 
4000  } else {
4001  single_threaded_convert_function(0, num_rows_this_itr);
4002  }
4003 
4004  // finalize the insert data
4005  auto finalizer_func =
4006  [](std::unique_ptr<TargetValueConverter>::pointer targetValueConverter) {
4007  targetValueConverter->finalizeDataBlocksForInsertData();
4008  };
4009 
4010  std::vector<std::future<void>> worker_threads;
4011  for (auto& converterPtr : value_converters) {
4012  worker_threads.push_back(
4013  std::async(std::launch::async, finalizer_func, converterPtr.get()));
4014  }
4015 
4016  for (auto& child : worker_threads) {
4017  child.wait();
4018  }
4019  for (auto& child : worker_threads) {
4020  child.get();
4021  }
4022 
4024  insert_data.databaseId = catalog.getCurrentDB().dbId;
4025  CHECK(td);
4026  insert_data.tableId = td->tableId;
4027  insert_data.numRows = num_rows_this_itr;
4028 
4029  for (int col_idx = 0; col_idx < target_column_descriptors.size(); col_idx++) {
4031  check_session_interrupted(query_session, executor))) {
4032  throw std::runtime_error(
4033  "Query execution has been interrupted while performing " +
4034  work_type_str);
4035  }
4036  value_converters[col_idx]->addDataBlocksToInsertData(insert_data);
4037  }
4038  total_target_value_translate_time_ms += timer_stop(translate_clock_begin);
4039 
4040  const auto data_load_clock_begin = timer_start();
4041  auto data_memory_holder =
4042  import_export::fill_missing_columns(&catalog, insert_data);
4043  insertDataLoader.insertData(*session, insert_data);
4044  total_data_load_time_ms += timer_stop(data_load_clock_begin);
4045  }
4046  }
4047  }
4048  } catch (...) {
4049  try {
4050  leafs_connector_->rollback(*session, td->tableId);
4051  } catch (std::exception& e) {
4052  LOG(ERROR) << "An error occurred during ITAS rollback attempt. Table id: "
4053  << td->tableId << ", Error: " << e.what();
4054  }
4055  throw;
4056  }
4057 
4058  int64_t total_time_ms = total_source_query_time_ms +
4059  total_target_value_translate_time_ms + total_data_load_time_ms;
4060 
4061  VLOG(1) << "CTAS/ITAS " << total_row_count << " rows loaded in " << total_time_ms
4062  << "ms (outer_frag_count=" << outer_frag_count
4063  << ", query_time=" << total_source_query_time_ms
4064  << "ms, translation_time=" << total_target_value_translate_time_ms
4065  << "ms, data_load_time=" << total_data_load_time_ms
4066  << "ms)\nquery: " << select_query_;
4067 
4068  if (!is_temporary) {
4069  leafs_connector_->checkpoint(*session, td->tableId);
4070  }
4071 }
4072 
4073 namespace {
4075  const std::string& table_name) {
4076  auto table_id = catalog.getTableId(table_name);
4077  if (!table_id.has_value()) {
4078  throw std::runtime_error{"Table \"" + table_name + "\" does not exist."};
4079  }
4080  return table_id.value();
4081 }
4082 
4084  const Catalog_Namespace::Catalog& catalog,
4085  const std::string& query_str,
4086  const QueryStateProxy& query_state_proxy,
4087  const std::optional<std::string>& insert_table_name = {}) {
4088  auto calcite_mgr = catalog.getCalciteMgr();
4089  const auto calciteQueryParsingOption =
4090  calcite_mgr->getCalciteQueryParsingOption(true, false, true);
4091  const auto calciteOptimizationOption =
4092  calcite_mgr->getCalciteOptimizationOption(false, g_enable_watchdog, {});
4093  const auto result = calcite_mgr->process(query_state_proxy,
4094  pg_shim(query_str),
4095  calciteQueryParsingOption,
4096  calciteOptimizationOption);
4097  // force sort into tableid order in case of name change to guarantee fixed order of
4098  // mutex access
4099  auto comparator = [&catalog](const std::string& table_1, const std::string& table_2) {
4100  return get_table_id(catalog, table_1) < get_table_id(catalog, table_2);
4101  };
4102  std::set<std::string, decltype(comparator)> tables(comparator);
4103  for (auto& tab : result.resolved_accessed_objects.tables_selected_from) {
4104  tables.emplace(tab[0]);
4105  }
4106  if (insert_table_name.has_value()) {
4107  tables.emplace(insert_table_name.value());
4108  }
4110  for (const auto& table : tables) {
4111  locks.emplace_back(
4114  catalog, table)));
4115  if (insert_table_name.has_value() && table == insert_table_name.value()) {
4116  locks.emplace_back(
4119  catalog.getDatabaseId(), (*locks.back())())));
4120  } else {
4121  locks.emplace_back(
4124  catalog.getDatabaseId(), (*locks.back())())));
4125  }
4126  }
4127  return locks;
4128 }
4129 } // namespace
4130 
4132  auto session_copy = session;
4133  auto session_ptr = std::shared_ptr<Catalog_Namespace::SessionInfo>(
4134  &session_copy, boost::null_deleter());
4135  auto query_state = query_state::QueryState::create(session_ptr, select_query_);
4136  auto stdlog = STDLOG(query_state);
4137  auto& catalog = session_ptr->getCatalog();
4138 
4139  const auto execute_read_lock = mapd_shared_lock<mapd_shared_mutex>(
4142 
4143  if (catalog.getMetadataForTable(table_name_) == nullptr) {
4144  throw std::runtime_error("ITAS failed: table " + table_name_ + " does not exist.");
4145  }
4146 
4147  auto locks = acquire_query_table_locks(
4148  catalog, select_query_, query_state->createQueryStateProxy(), table_name_);
4149  const TableDescriptor* td = catalog.getMetadataForTable(table_name_);
4150 
4151  Executor::clearExternalCaches(true, td, catalog.getCurrentDB().dbId);
4152 
4153  try {
4154  populateData(query_state->createQueryStateProxy(), td, true, false);
4155  } catch (...) {
4156  throw;
4157  }
4158 }
4159 
4161  : InsertIntoTableAsSelectStmt(payload) {
4162  if (payload.HasMember("temporary")) {
4163  is_temporary_ = json_bool(payload["temporary"]);
4164  } else {
4165  is_temporary_ = false;
4166  }
4167 
4168  if (payload.HasMember("ifNotExists")) {
4169  if_not_exists_ = json_bool(payload["ifNotExists"]);
4170  } else {
4171  if_not_exists_ = false;
4172  }
4173 
4174  parse_options(payload, storage_options_);
4175 }
4176 
4178  auto session_copy = session;
4179  auto session_ptr = std::shared_ptr<Catalog_Namespace::SessionInfo>(
4180  &session_copy, boost::null_deleter());
4181  auto query_state = query_state::QueryState::create(session_ptr, select_query_);
4182  auto stdlog = STDLOG(query_state);
4183  LocalQueryConnector local_connector;
4184  auto& catalog = session.getCatalog();
4185  bool create_table = nullptr == leafs_connector_;
4186 
4187  std::set<std::string> select_tables;
4188  if (create_table) {
4189  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
4192 
4193  // check access privileges
4196  throw std::runtime_error("CTAS failed. Table " + table_name_ +
4197  " will not be created. User has no create privileges.");
4198  }
4199 
4200  if (catalog.getMetadataForTable(table_name_) != nullptr) {
4201  if (if_not_exists_) {
4202  return;
4203  }
4204  throw std::runtime_error("Table " + table_name_ +
4205  " already exists and no data was loaded.");
4206  }
4207 
4208  // only validate the select query so we get the target types
4209  // correctly, but do not populate the result set
4210  // we currently have exclusive access to the system so this is safe
4211  auto validate_result = local_connector.query(
4212  query_state->createQueryStateProxy(), select_query_, {}, true, false);
4213 
4214  const auto column_descriptors_for_create =
4215  local_connector.getColumnDescriptors(validate_result, true);
4216 
4217  // some validation as the QE might return some out of range column types
4218  for (auto& cd : column_descriptors_for_create) {
4219  if (cd.columnType.is_decimal() && cd.columnType.get_precision() > 18) {
4220  throw std::runtime_error(cd.columnName + ": Precision too high, max 18.");
4221  }
4222  }
4223 
4224  TableDescriptor td;
4225  td.tableName = table_name_;
4226  td.userId = session.get_currentUser().userId;
4227  td.nColumns = column_descriptors_for_create.size();
4228  td.isView = false;
4229  td.fragmenter = nullptr;
4236  if (is_temporary_) {
4238  } else {
4240  }
4241 
4242  bool use_shared_dictionaries = true;
4243 
4244  if (!storage_options_.empty()) {
4245  for (auto& p : storage_options_) {
4246  if (boost::to_lower_copy<std::string>(*p->get_name()) ==
4247  "use_shared_dictionaries") {
4248  const StringLiteral* literal =
4249  dynamic_cast<const StringLiteral*>(p->get_value());
4250  if (nullptr == literal) {
4251  throw std::runtime_error(
4252  "USE_SHARED_DICTIONARIES must be a string parameter");
4253  }
4254  std::string val = boost::to_lower_copy<std::string>(*literal->get_stringval());
4255  use_shared_dictionaries = val == "true" || val == "1" || val == "t";
4256  } else {
4257  get_table_definitions_for_ctas(td, p, column_descriptors_for_create);
4258  }
4259  }
4260  }
4261 
4262  std::vector<SharedDictionaryDef> sharedDictionaryRefs;
4263 
4264  if (use_shared_dictionaries) {
4265  const auto source_column_descriptors =
4266  local_connector.getColumnDescriptors(validate_result, false);
4267  const auto mapping = catalog.getDictionaryToColumnMapping();
4268 
4269  for (auto& source_cd : source_column_descriptors) {
4270  const auto& ti = source_cd.columnType;
4271  if (ti.is_string()) {
4272  if (ti.get_compression() == kENCODING_DICT) {
4273  int dict_id = ti.get_comp_param();
4274  auto it = mapping.find(dict_id);
4275  if (mapping.end() != it) {
4276  const auto targetColumn = it->second;
4277  auto targetTable =
4278  catalog.getMetadataForTable(targetColumn->tableId, false);
4279  CHECK(targetTable);
4280  LOG(INFO) << "CTAS: sharing text dictionary on column "
4281  << source_cd.columnName << " with " << targetTable->tableName
4282  << "." << targetColumn->columnName;
4283  sharedDictionaryRefs.emplace_back(
4284  source_cd.columnName, targetTable->tableName, targetColumn->columnName);
4285  }
4286  }
4287  }
4288  }
4289  }
4290 
4291  // currently no means of defining sharding in CTAS
4292  td.keyMetainfo = serialize_key_metainfo(nullptr, sharedDictionaryRefs);
4293 
4294  catalog.createTable(td, column_descriptors_for_create, sharedDictionaryRefs, true);
4295  // TODO (max): It's transactionally unsafe, should be fixed: we may create object
4296  // w/o privileges
4297  SysCatalog::instance().createDBObject(
4298  session.get_currentUser(), td.tableName, TableDBObjectType, catalog);
4299  }
4300 
4301  // note there is a time where we do not have any executor outer lock here. someone could
4302  // come along and mess with the data or other tables.
4303  const auto execute_read_lock = mapd_shared_lock<mapd_shared_mutex>(
4306 
4307  auto locks = acquire_query_table_locks(
4308  catalog, select_query_, query_state->createQueryStateProxy(), table_name_);
4309  const TableDescriptor* td = catalog.getMetadataForTable(table_name_);
4310  try {
4311  populateData(query_state->createQueryStateProxy(), td, false, true);
4312  } catch (...) {
4313  if (!g_cluster) {
4314  const TableDescriptor* created_td = catalog.getMetadataForTable(table_name_);
4315  if (created_td) {
4316  catalog.dropTable(created_td);
4317  }
4318  }
4319  throw;
4320  }
4321 }
4322 
4323 DropTableStmt::DropTableStmt(const rapidjson::Value& payload) {
4324  CHECK(payload.HasMember("tableName"));
4325  table_ = std::make_unique<std::string>(json_str(payload["tableName"]));
4326 
4327  if_exists_ = false;
4328  if (payload.HasMember("ifExists")) {
4329  if_exists_ = json_bool(payload["ifExists"]);
4330  }
4331 }
4332 
4334  const auto execute_read_lock = mapd_shared_lock<mapd_shared_mutex>(
4337  auto& catalog = session.getCatalog();
4338  const TableDescriptor* td{nullptr};
4339  std::unique_ptr<lockmgr::TableSchemaLockContainer<lockmgr::WriteLock>> td_with_lock;
4340  try {
4341  td_with_lock =
4342  std::make_unique<lockmgr::TableSchemaLockContainer<lockmgr::WriteLock>>(
4344  catalog, *table_, false));
4345  td = (*td_with_lock)();
4346  } catch (const std::runtime_error& e) {
4347  if (if_exists_) {
4348  return;
4349  } else {
4350  throw e;
4351  }
4352  }
4353 
4354  CHECK(td);
4355  CHECK(td_with_lock);
4356 
4357  // check access privileges
4358  if (!session.checkDBAccessPrivileges(
4360  throw std::runtime_error("Table " + *table_ +
4361  " will not be dropped. User has no proper privileges.");
4362  }
4363 
4365 
4366  {
4367  auto table_data_read_lock =
4369  Executor::clearExternalCaches(false, td, catalog.getCurrentDB().dbId);
4370  }
4371 
4372  auto table_data_write_lock =
4374  catalog.dropTable(td);
4375 }
4376 
4378 
4379 std::unique_ptr<DDLStmt> AlterTableStmt::delegate(const rapidjson::Value& payload) {
4380  CHECK(payload.HasMember("tableName"));
4381  auto tableName = json_str(payload["tableName"]);
4382 
4383  CHECK(payload.HasMember("alterType"));
4384  auto type = json_str(payload["alterType"]);
4385 
4386  if (type == "RENAME_TABLE") {
4387  CHECK(payload.HasMember("newTableName"));
4388  auto newTableName = json_str(payload["newTableName"]);
4389  return std::unique_ptr<DDLStmt>(new Parser::RenameTableStmt(
4390  new std::string(tableName), new std::string(newTableName)));
4391 
4392  } else if (type == "RENAME_COLUMN") {
4393  CHECK(payload.HasMember("columnName"));
4394  auto columnName = json_str(payload["columnName"]);
4395  CHECK(payload.HasMember("newColumnName"));
4396  auto newColumnName = json_str(payload["newColumnName"]);
4397  return std::unique_ptr<DDLStmt>(
4398  new Parser::RenameColumnStmt(new std::string(tableName),
4399  new std::string(columnName),
4400  new std::string(newColumnName)));
4401 
4402  } else if (type == "ADD_COLUMN") {
4403  CHECK(payload.HasMember("columnData"));
4404  CHECK(payload["columnData"].IsArray());
4405 
4406  // New Columns go into this list
4407  std::list<ColumnDef*>* table_element_list_ = new std::list<ColumnDef*>;
4408 
4409  const auto elements = payload["columnData"].GetArray();
4410  for (const auto& element : elements) {
4411  CHECK(element.IsObject());
4412  CHECK(element.HasMember("type"));
4413  if (json_str(element["type"]) == "SQL_COLUMN_DECLARATION") {
4414  auto col_def = column_from_json(element);
4415  table_element_list_->emplace_back(col_def.release());
4416  } else {
4417  LOG(FATAL) << "Unsupported element type for ALTER TABLE: "
4418  << element["type"].GetString();
4419  }
4420  }
4421 
4422  return std::unique_ptr<DDLStmt>(
4423  new Parser::AddColumnStmt(new std::string(tableName), table_element_list_));
4424 
4425  } else if (type == "DROP_COLUMN") {
4426  CHECK(payload.HasMember("columnData"));
4427  auto columnData = json_str(payload["columnData"]);
4428  // Convert columnData to std::list<std::string*>*
4429  // allocate std::list<> as DropColumnStmt will delete it;
4430  std::list<std::string*>* cols = new std::list<std::string*>;
4431  std::vector<std::string> cols1;
4432  boost::split(cols1, columnData, boost::is_any_of(","));
4433  for (auto s : cols1) {
4434  // strip leading/trailing spaces/quotes/single quotes
4435  boost::algorithm::trim_if(s, boost::is_any_of(" \"'`"));
4436  std::string* str = new std::string(s);
4437  cols->emplace_back(str);
4438  }
4439 
4440  return std::unique_ptr<DDLStmt>(
4441  new Parser::DropColumnStmt(new std::string(tableName), cols));
4442 
4443  } else if (type == "ALTER_OPTIONS") {
4444  CHECK(payload.HasMember("options"));
4445 
4446  if (payload["options"].IsObject()) {
4447  for (const auto& option : payload["options"].GetObject()) {
4448  std::string* option_name = new std::string(json_str(option.name));
4449  Literal* literal_value;
4450  if (option.value.IsString()) {
4451  std::string literal_string = json_str(option.value);
4452 
4453  // iff this string can be converted to INT
4454  // ... do so because it is necessary for AlterTableParamStmt
4455  std::size_t sz;
4456  int iVal = std::stoi(literal_string, &sz);
4457  if (sz == literal_string.size()) {
4458  literal_value = new IntLiteral(iVal);
4459  } else {
4460  literal_value = new StringLiteral(&literal_string);
4461  }
4462  } else if (option.value.IsInt() || option.value.IsInt64()) {
4463  literal_value = new IntLiteral(json_i64(option.value));
4464  } else if (option.value.IsNull()) {
4465  literal_value = new NullLiteral();
4466  } else {
4467  throw std::runtime_error("Unable to handle literal for " + *option_name);
4468  }
4469  CHECK(literal_value);
4470 
4471  NameValueAssign* nv = new NameValueAssign(option_name, literal_value);
4472  return std::unique_ptr<DDLStmt>(
4473  new Parser::AlterTableParamStmt(new std::string(tableName), nv));
4474  }
4475 
4476  } else {
4477  CHECK(payload["options"].IsNull());
4478  }
4479  }
4480  return nullptr;
4481 }
4482 
4483 TruncateTableStmt::TruncateTableStmt(const rapidjson::Value& payload) {
4484  CHECK(payload.HasMember("tableName"));
4485  table_ = std::make_unique<std::string>(json_str(payload["tableName"]));
4486 }
4487 
4489  const auto execute_read_lock = mapd_shared_lock<mapd_shared_mutex>(
4492  auto& catalog = session.getCatalog();
4493  const auto td_with_lock =
4495  catalog, *table_, true);
4496  const auto td = td_with_lock();
4497  if (!td) {
4498  throw std::runtime_error("Table " + *table_ + " does not exist.");
4499  }
4500 
4501  // check access privileges
4502  std::vector<DBObject> privObjects;
4503  DBObject dbObject(*table_, TableDBObjectType);
4504  dbObject.loadKey(catalog);
4506  privObjects.push_back(dbObject);
4507  if (!SysCatalog::instance().checkPrivileges(session.get_currentUser(), privObjects)) {
4508  throw std::runtime_error("Table " + *table_ + " will not be truncated. User " +
4509  session.get_currentUser().userLoggable() +
4510  " has no proper privileges.");
4511  }
4512 
4513  if (td->isView) {
4514  throw std::runtime_error(*table_ + " is a view. Cannot Truncate.");
4515  }
4517 
4518  // invalidate cached item
4519  {
4520  auto table_data_read_lock =
4522  Executor::clearExternalCaches(false, td, catalog.getCurrentDB().dbId);
4523  }
4524 
4525  auto table_data_write_lock =
4527  catalog.truncateTable(td);
4528 }
4529 
4530 OptimizeTableStmt::OptimizeTableStmt(const rapidjson::Value& payload) {
4531  CHECK(payload.HasMember("tableName"));
4532  table_ = std::make_unique<std::string>(json_str(payload["tableName"]));
4533  parse_options(payload, options_);
4534 }
4535 
4536 namespace {
4538  const TableDescriptor* td,
4539  const AccessPrivileges access_priv) {
4540  CHECK(td);
4541  auto& cat = session_info.getCatalog();
4542  std::vector<DBObject> privObjects;
4543  DBObject dbObject(td->tableName, TableDBObjectType);
4544  dbObject.loadKey(cat);
4545  dbObject.setPrivileges(access_priv);
4546  privObjects.push_back(dbObject);
4547  return SysCatalog::instance().checkPrivileges(session_info.get_currentUser(),
4548  privObjects);
4549 };
4550 } // namespace
4551 
4553  auto& catalog = session.getCatalog();
4554 
4555  const auto execute_read_lock = mapd_shared_lock<mapd_shared_mutex>(
4558 
4559  const auto td_with_lock =
4561  catalog, *table_);
4562  const auto td = td_with_lock();
4563 
4564  if (!td || !user_can_access_table(session, td, AccessPrivileges::DELETE_FROM_TABLE)) {
4565  throw std::runtime_error("Table " + *table_ + " does not exist.");
4566  }
4567 
4568  if (td->isView) {
4569  throw std::runtime_error("OPTIMIZE TABLE command is not supported on views.");
4570  }
4571 
4572  // invalidate cached item
4573  std::vector<int> table_key{catalog.getCurrentDB().dbId, td->tableId};
4574  ResultSetCacheInvalidator::invalidateCachesByTable(boost::hash_value(table_key));
4575 
4577  const TableOptimizer optimizer(td, executor, catalog);
4578  if (shouldVacuumDeletedRows()) {
4579  optimizer.vacuumDeletedRows();
4580  }
4581  optimizer.recomputeMetadata();
4582 }
4583 
4584 bool repair_type(std::list<std::unique_ptr<NameValueAssign>>& options) {
4585  for (const auto& opt : options) {
4586  if (boost::iequals(*opt->get_name(), "REPAIR_TYPE")) {
4587  const auto repair_type =
4588  static_cast<const StringLiteral*>(opt->get_value())->get_stringval();
4589  CHECK(repair_type);
4590  if (boost::iequals(*repair_type, "REMOVE")) {
4591  return true;
4592  } else {
4593  throw std::runtime_error("REPAIR_TYPE must be REMOVE.");
4594  }
4595  } else {
4596  throw std::runtime_error("The only VALIDATE WITH options is REPAIR_TYPE.");
4597  }
4598  }
4599  return false;
4600 }
4601 
4602 ValidateStmt::ValidateStmt(std::string* type, std::list<NameValueAssign*>* with_opts)
4603  : type_(type) {
4604  if (!type) {
4605  throw std::runtime_error("Validation Type is required for VALIDATE command.");
4606  }
4607  std::list<std::unique_ptr<NameValueAssign>> options;
4608  if (with_opts) {
4609  for (const auto e : *with_opts) {
4610  options.emplace_back(e);
4611  }
4612  delete with_opts;
4613 
4614  isRepairTypeRemove_ = repair_type(options);
4615  }
4616 }
4617 
4618 ValidateStmt::ValidateStmt(const rapidjson::Value& payload) {
4619  CHECK(payload.HasMember("type"));
4620  type_ = std::make_unique<std::string>(json_str(payload["type"]));
4621 
4622  std::list<std::unique_ptr<NameValueAssign>> options;
4623  parse_options(payload, options);
4624 
4625  isRepairTypeRemove_ = repair_type(options);
4626 }
4627 
4629  const TableDescriptor* td) {
4630  if (session.get_currentUser().isSuper ||
4631  session.get_currentUser().userId == td->userId) {
4632  return;
4633  }
4634  std::vector<DBObject> privObjects;
4635  DBObject dbObject(td->tableName, TableDBObjectType);
4636  dbObject.loadKey(session.getCatalog());
4638  privObjects.push_back(dbObject);
4639  if (!SysCatalog::instance().checkPrivileges(session.get_currentUser(), privObjects)) {
4640  throw std::runtime_error("Current user does not have the privilege to alter table: " +
4641  td->tableName);
4642  }
4643 }
4644 
4645 RenameUserStmt::RenameUserStmt(const rapidjson::Value& payload) {
4646  CHECK(payload.HasMember("name"));
4647  username_ = std::make_unique<std::string>(json_str(payload["name"]));
4648  CHECK(payload.HasMember("newName"));
4649  new_username_ = std::make_unique<std::string>(json_str(payload["newName"]));
4650 }
4651 
4653  if (!session.get_currentUser().isSuper) {
4654  throw std::runtime_error("Only a super user can rename users.");
4655  }
4656 
4658  if (!SysCatalog::instance().getMetadataForUser(*username_, user)) {
4659  throw std::runtime_error("User " + *username_ + " does not exist.");
4660  }
4661 
4662  SysCatalog::instance().renameUser(*username_, *new_username_);
4663 }
4664 
4665 RenameDBStmt::RenameDBStmt(const rapidjson::Value& payload) {
4666  CHECK(payload.HasMember("name"));
4667  database_name_ = std::make_unique<std::string>(json_str(payload["name"]));
4668  CHECK(payload.HasMember("newName"));
4669  new_database_name_ = std::make_unique<std::string>(json_str(payload["newName"]));
4670 }
4671 
4674 
4675  // TODO: use database lock instead
4676  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
4679 
4680  if (!SysCatalog::instance().getMetadataForDB(*database_name_, db)) {
4681  throw std::runtime_error("Database " + *database_name_ + " does not exist.");
4682  }
4683 
4684  if (!session.get_currentUser().isSuper &&
4685  session.get_currentUser().userId != db.dbOwner) {
4686  throw std::runtime_error("Only a super user or the owner can rename the database.");
4687  }
4688 
4689  SysCatalog::instance().renameDatabase(*database_name_, *new_database_name_);
4690 }
4691 
4692 RenameTableStmt::RenameTableStmt(const rapidjson::Value& payload) {
4693  CHECK(payload.HasMember("tableNames"));
4694  CHECK(payload["tableNames"].IsArray());
4695  const auto elements = payload["tableNames"].GetArray();
4696  for (const auto& element : elements) {
4697  CHECK(element.HasMember("name"));
4698  CHECK(element.HasMember("newName"));
4699  tablesToRename_.emplace_back(new std::string(json_str(element["name"])),
4700  new std::string(json_str(element["newName"])));
4701  }
4702 }
4703 
4704 RenameTableStmt::RenameTableStmt(std::string* tab_name, std::string* new_tab_name) {
4705  tablesToRename_.emplace_back(tab_name, new_tab_name);
4706 }
4707 
4709  std::list<std::pair<std::string, std::string>> tableNames) {
4710  for (auto item : tableNames) {
4711  tablesToRename_.emplace_back(new std::string(item.first),
4712  new std::string(item.second));
4713  }
4714 }
4715 
4716 using SubstituteMap = std::map<std::string, std::string>;
4717 
4718 // Namespace fns used to track a left-to-right execution of RENAME TABLE
4719 // and verify that the command should be (entirely/mostly) valid
4720 //
4721 namespace {
4722 
4723 static constexpr char const* EMPTY_NAME{""};
4724 
4725 std::string generateUniqueTableName(std::string name) {
4726  // TODO - is there a "better" way to create a tmp name for the table
4727  std::time_t result = std::time(nullptr);
4728  return name + "_tmp" + std::to_string(result);
4729 }
4730 
4731 void recordRename(SubstituteMap& sMap, std::string oldName, std::string newName) {
4732  sMap[oldName] = newName;
4733 }
4734 
4736  SubstituteMap& sMap,
4737  std::string tableName) {
4738  if (sMap.find(tableName) != sMap.end()) {
4739  if (sMap[tableName] == EMPTY_NAME) {
4740  return tableName;
4741  }
4742  return sMap[tableName];
4743  } else {
4744  // lookup table in src catalog
4745  const TableDescriptor* td = catalog.getMetadataForTable(tableName);
4746  if (td) {
4747  sMap[tableName] = tableName;
4748  } else {
4749  sMap[tableName] = EMPTY_NAME;
4750  }
4751  }
4752  return tableName;
4753 }
4754 
4755 bool hasData(SubstituteMap& sMap, std::string tableName) {
4756  // assumes loadTable has been previously called
4757  return (sMap[tableName] != EMPTY_NAME);
4758 }
4759 
4761  // Substition map should be clean at end of rename:
4762  // all items in map must (map to self) or (map to EMPTY_STRING) by end
4763 
4764  for (auto it : sMap) {
4765  if ((it.second) != EMPTY_NAME && (it.first) != (it.second)) {
4766  throw std::runtime_error(
4767  "Error: Attempted to overwrite and lose data in table: \'" + (it.first) + "\'");
4768  }
4769  }
4770 }
4771 } // namespace
4772 
4773 namespace {
4776  throw std::runtime_error(td->tableName + " is a foreign table. " +
4777  "Use ALTER FOREIGN TABLE.");
4778  }
4779 }
4780 } // namespace
4781 
4783  auto& catalog = session.getCatalog();
4784 
4785  // TODO(adb): the catalog should be handling this locking (see AddColumStmt)
4786  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
4789 
4790  // accumulated vector of table names: oldName->newName
4791  std::vector<std::pair<std::string, std::string>> names;
4792 
4793  SubstituteMap tableSubtituteMap;
4794 
4795  for (auto& item : tablesToRename_) {
4796  std::string curTableName = *(item.first);
4797  std::string newTableName = *(item.second);
4798 
4799  // Note: if rename (a->b, b->a)
4800  // requires a tmp name change (a->tmp, b->a, tmp->a),
4801  // inject that here because
4802  // catalog.renameTable() assumes cleanliness else will fail
4803 
4804  std::string altCurTableName = loadTable(catalog, tableSubtituteMap, curTableName);
4805  std::string altNewTableName = loadTable(catalog, tableSubtituteMap, newTableName);
4806 
4807  if (altCurTableName != curTableName && altCurTableName != EMPTY_NAME) {
4808  // rename is a one-shot deal, reset the mapping once used
4809  recordRename(tableSubtituteMap, curTableName, curTableName);
4810  }
4811 
4812  // Check to see if the command (as-entered) will likely execute cleanly (logic-wise)
4813  // src tables exist before coping from
4814  // destination table collisions
4815  // handled (a->b, b->a)
4816  // or flagged (pre-existing a,b ... "RENAME TABLE a->c, b->c" )
4817  // handle mulitple chained renames, tmp names (a_>tmp, b->a, tmp->a)
4818  // etc.
4819  //
4820  if (hasData(tableSubtituteMap, altCurTableName)) {
4821  const TableDescriptor* td = catalog.getMetadataForTable(altCurTableName);
4822  if (td) {
4823  // Tables *and* views may be renamed here, foreign tables not
4824  // -> just block foreign tables
4826  check_alter_table_privilege(session, td);
4827  }
4828 
4829  if (hasData(tableSubtituteMap, altNewTableName)) {
4830  std::string tmpNewTableName = generateUniqueTableName(altNewTableName);
4831  // rename: newTableName to tmpNewTableName to get it out of the way
4832  // because it was full
4833  recordRename(tableSubtituteMap, altCurTableName, EMPTY_NAME);
4834  recordRename(tableSubtituteMap, altNewTableName, tmpNewTableName);
4835  recordRename(tableSubtituteMap, tmpNewTableName, tmpNewTableName);
4836  names.emplace_back(altNewTableName, tmpNewTableName);
4837  names.emplace_back(altCurTableName, altNewTableName);
4838  } else {
4839  // rename: curNewTableName to newTableName
4840  recordRename(tableSubtituteMap, altCurTableName, EMPTY_NAME);
4841  recordRename(tableSubtituteMap, altNewTableName, altNewTableName);
4842  names.emplace_back(altCurTableName, altNewTableName);
4843  }
4844  } else {
4845  throw std::runtime_error("Source table \'" + curTableName + "\' does not exist.");
4846  }
4847  }
4848  checkNameSubstition(tableSubtituteMap);
4849 
4850  catalog.renameTable(names);
4851 
4852  // just to be explicit, clean out the list, the unique_ptr will delete
4853  while (!tablesToRename_.empty()) {
4854  tablesToRename_.pop_front();
4855  }
4856 } // namespace Parser
4857 
4859  bool not_null;
4860  const ColumnConstraintDef* cc = coldef->get_column_constraint();
4861  if (cc == nullptr) {
4862  not_null = false;
4863  } else {
4864  not_null = cc->get_notnull();
4865  }
4866  std::string default_value;
4867  const std::string* default_value_ptr = nullptr;
4868  if (cc) {
4869  if (auto def_val_literal = cc->get_defaultval()) {
4870  auto defaultsp = dynamic_cast<const StringLiteral*>(def_val_literal);
4871  default_value =
4872  defaultsp ? *defaultsp->get_stringval() : def_val_literal->to_string();
4873  // The preprocessing below is needed because:
4874  // a) TypedImportBuffer expects arrays in the {...} format
4875  // b) TypedImportBuffer expects string literals inside arrays w/o any quotes
4876  if (coldef->get_column_type()->get_is_array()) {
4877  std::regex array_re(R"(^ARRAY\s*\[(.*)\]$)", std::regex_constants::icase);
4878  default_value = std::regex_replace(default_value, array_re, "{$1}");
4879  boost::erase_all(default_value, "\'");
4880  }
4881  default_value_ptr = &default_value;
4882  }
4883  }
4885  cd,
4886  coldef->get_column_type(),
4887  not_null,
4888  coldef->get_compression(),
4889  default_value_ptr);
4890 }
4891 
4893  const TableDescriptor* td) {
4894  auto& catalog = session.getCatalog();
4895  if (!td) {
4896  throw std::runtime_error("Table " + *table_ + " does not exist.");
4897  } else {
4898  if (td->isView) {
4899  throw std::runtime_error("Adding columns to a view is not supported.");
4900  }
4902  if (table_is_temporary(td)) {
4903  throw std::runtime_error(
4904  "Adding columns to temporary tables is not yet supported.");
4905  }
4906  }
4907 
4908  check_alter_table_privilege(session, td);
4909 
4910  if (0 == coldefs_.size()) {
4911  coldefs_.push_back(std::move(coldef_));
4912  }
4913 
4914  for (const auto& coldef : coldefs_) {
4915  auto& new_column_name = *coldef->get_column_name();
4916  if (catalog.getMetadataForColumn(td->tableId, new_column_name) != nullptr) {
4917  throw std::runtime_error("Column " + new_column_name + " already exists.");
4918  }
4919  }
4920 }
4921 
4923  // TODO: Review add and drop column implementation
4924  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
4927  auto& catalog = session.getCatalog();
4928  const auto td_with_lock =
4930  catalog, *table_, true);
4931  const auto td = td_with_lock();
4932 
4933  check_executable(session, td);
4934 
4935  CHECK(td->fragmenter);
4936  if (std::dynamic_pointer_cast<Fragmenter_Namespace::SortedOrderFragmenter>(
4937  td->fragmenter)) {
4938  throw std::runtime_error(
4939  "Adding columns to a table is not supported when using the \"sort_column\" "
4940  "option.");
4941  }
4942 
4943  // invalidate cached item
4944  std::vector<int> table_key{catalog.getCurrentDB().dbId, td->tableId};
4945  ResultSetCacheInvalidator::invalidateCachesByTable(boost::hash_value(table_key));
4946 
4947  // Do not take a data write lock, as the fragmenter may call `deleteFragments`
4948  // during a cap operation. Note that the schema write lock will prevent concurrent
4949  // inserts along with all other queries.
4950 
4951  catalog.getSqliteConnector().query("BEGIN TRANSACTION");
4952  try {
4953  std::map<const std::string, const ColumnDescriptor> cds;
4954  std::map<const int, const ColumnDef*> cid_coldefs;
4955  for (const auto& coldef : coldefs_) {
4956  ColumnDescriptor cd;
4957  setColumnDescriptor(cd, coldef.get());
4958  catalog.addColumn(*td, cd);
4959  cds.emplace(*coldef->get_column_name(), cd);
4960  cid_coldefs.emplace(cd.columnId, coldef.get());
4961 
4962  // expand geo column to phy columns
4963  if (cd.columnType.is_geometry()) {
4964  std::list<ColumnDescriptor> phy_geo_columns;
4965  catalog.expandGeoColumn(cd, phy_geo_columns);
4966  for (auto& cd : phy_geo_columns) {
4967  catalog.addColumn(*td, cd);
4968  cds.emplace(cd.columnName, cd);
4969  cid_coldefs.emplace(cd.columnId, nullptr);
4970  }
4971  }
4972  }
4973 
4974  std::unique_ptr<import_export::Loader> loader(new import_export::Loader(catalog, td));
4975  std::vector<std::unique_ptr<import_export::TypedImportBuffer>> import_buffers;
4976  for (const auto& cd : cds) {
4977  import_buffers.emplace_back(std::make_unique<import_export::TypedImportBuffer>(
4978  &cd.second, loader->getStringDict(&cd.second)));
4979  }
4980  loader->setAddingColumns(true);
4981 
4982  // set_geo_physical_import_buffer below needs a sorted import_buffers
4983  std::sort(import_buffers.begin(),
4984  import_buffers.end(),
4985  [](decltype(import_buffers[0])& a, decltype(import_buffers[0])& b) {
4986  return a->getColumnDesc()->columnId < b->getColumnDesc()->columnId;
4987  });
4988 
4989  size_t nrows = td->fragmenter->getNumRows();
4990  // if sharded, get total nrows from all sharded tables
4991  if (td->nShards > 0) {
4992  const auto physical_tds = catalog.getPhysicalTablesDescriptors(td);
4993  nrows = 0;
4994  std::for_each(physical_tds.begin(), physical_tds.end(), [&nrows](const auto& td) {
4995  nrows += td->fragmenter->getNumRows();
4996  });
4997  }
4998  if (nrows > 0) {
4999  int skip_physical_cols = 0;
5000  for (const auto cit : cid_coldefs) {
5001  const auto cd = catalog.getMetadataForColumn(td->tableId, cit.first);
5002  const auto coldef = cit.second;
5003  const bool is_null = !cd->default_value.has_value();
5004 
5005  if (cd->columnType.get_notnull() && is_null) {
5006  throw std::runtime_error("Default value required for column " + cd->columnName +
5007  " because of NOT NULL constraint");
5008  }
5009 
5010  for (auto it = import_buffers.begin(); it < import_buffers.end(); ++it) {
5011  auto& import_buffer = *it;
5012  if (cd->columnId == import_buffer->getColumnDesc()->columnId) {
5013  if (coldef != nullptr ||
5014  skip_physical_cols-- <= 0) { // skip non-null phy col
5015  import_buffer->add_value(cd,
5016  cd->default_value.value_or("NULL"),
5017  is_null,
5019  if (cd->columnType.is_geometry()) {
5020  std::vector<double> coords, bounds;
5021  std::vector<int> ring_sizes, poly_rings;
5022  int render_group = 0;
5023  SQLTypeInfo tinfo{cd->columnType};
5025  cd->default_value.value_or("NULL"),
5026  tinfo,
5027  coords,
5028  bounds,
5029  ring_sizes,
5030  poly_rings,
5031  false)) {
5032  throw std::runtime_error("Bad geometry data: '" +
5033  cd->default_value.value_or("NULL") + "'");
5034  }
5035  size_t col_idx = 1 + std::distance(import_buffers.begin(), it);
5037  cd,
5038  import_buffers,
5039  col_idx,
5040  coords,
5041  bounds,
5042  ring_sizes,
5043  poly_rings,
5044  render_group);
5045  // skip following phy cols
5046  skip_physical_cols = cd->columnType.get_physical_cols();
5047  }
5048  }
5049  break;
5050  }
5051  }
5052  }
5053  }
5054 
5055  if (!loader->loadNoCheckpoint(import_buffers, nrows, &session)) {
5056  throw std::runtime_error("loadNoCheckpoint failed!");
5057  }
5058  catalog.roll(true);
5059  catalog.resetTableEpochFloor(td->tableId);
5060  loader->checkpoint();
5061  catalog.getSqliteConnector().query("END TRANSACTION");
5062  } catch (...) {
5063  catalog.roll(false);
5064  catalog.getSqliteConnector().query("ROLLBACK TRANSACTION");
5065  throw;
5066  }
5067 }
5068 
5070  // TODO: Review add and drop column implementation
5071  const auto execute_write_lock = mapd_unique_lock<mapd_shared_mutex>(
5074  auto& catalog = session.getCatalog();
5075  const auto td_with_lock =
5077  catalog, *table_, true);
5078  const auto td = td_with_lock();
5079  if (!td) {
5080  throw std::runtime_error("Table " + *table_ + " does not exist.");
5081  }
5083  if (td->isView) {
5084  throw std::runtime_error("Dropping a column from a view is not supported.");
5085  }
5086  if (table_is_temporary(td)) {
5087  throw std::runtime_error(
5088  "Dropping a column from a temporary table is not yet supported.");
5089  }
5090 
5091  check_alter_table_privilege(session, td);
5092 
5093  for (const auto& column : columns_) {
5094  if (nullptr == catalog.getMetadataForColumn(td->tableId, *column)) {
5095  throw std::runtime_error("Column " + *column + " does not exist.");
5096  }
5097  }
5098 
5099  if (td->nColumns <= (td->hasDeletedCol ? 3 : 2)) {
5100  throw std::runtime_error("Table " + *table_ + " has only one column.");
5101  }
5102 
5103  // invalidate cached item
5104  Executor::clearExternalCaches(false, td, catalog.getCurrentDB().dbId);
5105 
5106  catalog.getSqliteConnector().query("BEGIN TRANSACTION");
5107  try {
5108  std::vector<int> columnIds;
5109  for (const auto& column : columns_) {
5110  ColumnDescriptor cd = *catalog.getMetadataForColumn(td->tableId, *column);
5111  if (td->nShards > 0 && td->shardedColumnId == cd.columnId) {
5112  throw std::runtime_error("Dropping sharding column " + cd.columnName +
5113  " is not supported.");
5114  }
5115  catalog.dropColumn(*td, cd);
5116  columnIds.push_back(cd.columnId);
5117  for (int i = 0; i < cd.columnType.get_physical_cols(); i++) {
5118  const auto pcd = catalog.getMetadataForColumn(td->tableId, cd.columnId + i + 1);
5119  CHECK(pcd);
5120  catalog.dropColumn(*td, *pcd);
5121  columnIds.push_back(cd.columnId + i + 1);
5122  }
5123  }
5124 
5125  for (auto shard : catalog.getPhysicalTablesDescriptors(td)) {
5126  shard->fragmenter->dropColumns(columnIds);
5127  }
5128  // if test forces to rollback
5130  throw std::runtime_error("lol!");
5131  }
5132  catalog.roll(true);
5134  catalog.resetTableEpochFloor(td->tableId);
5135  catalog.checkpoint(td->tableId);
5136  }
5137  catalog.getSqliteConnector().query("END TRANSACTION");
5138  } catch (...) {
5139  catalog.setForReload(td->tableId);
5140  catalog.roll(false);
5141  catalog.getSqliteConnector().query("ROLLBACK TRANSACTION");
5142  throw;
5143  }
5144 }
5145 
5147  auto& catalog = session.getCatalog();
5148 
5149  const auto execute_read_lock = mapd_shared_lock<mapd_shared_mutex>(
5152 
5153  const auto td_with_lock =
5155  catalog, *table_, false);
5156  const auto td = td_with_lock();
5157  CHECK(td);
5159 
5160  check_alter_table_privilege(session, td);
5161  const ColumnDescriptor* cd = catalog.getMetadataForColumn(td->tableId, *column_);
5162  if (cd == nullptr) {
5163  throw std::runtime_error("Column " + *column_ + " does not exist.");
5164  }
5165  if (catalog.getMetadataForColumn(td->tableId, *new_column_name_) != nullptr) {
5166  throw std::runtime_error("Column " + *new_column_name_ + " already exists.");
5167  }
5168  catalog.renameColumn(td, cd, *new_column_name_);
5169 }
5170 
5172  enum TableParamType { MaxRollbackEpochs, Epoch, MaxRows };
5173  static const std::unordered_map<std::string, TableParamType> param_map = {
5174  {"max_rollback_epochs", TableParamType::MaxRollbackEpochs},
5175  {"epoch", TableParamType::Epoch},
5176  {"max_rows", TableParamType::MaxRows}};
5177  const auto execute_read_lock = mapd_shared_lock<mapd_shared_mutex>(
5180  auto& catalog = session.getCatalog();
5181  const auto td_with_lock =
5183  catalog, *table_, false);
5184  const auto td = td_with_lock();
5185  if (!td) {
5186  throw std::runtime_error("Table " + *table_ + " does not exist.");
5187  }
5188  if (td->isView) {
5189  throw std::runtime_error("Setting parameters for a view is not supported.");
5190  }
5191  if (table_is_temporary(td)) {
5192  throw std::runtime_error(
5193  "Setting parameters for a temporary table is not yet supported.");
5194  }
5195  check_alter_table_privilege(session, td);
5196 
5197  // invalidate cached item
5198  std::vector<int> table_key{catalog.getCurrentDB().dbId, td->tableId};
5199  ResultSetCacheInvalidator::invalidateCachesByTable(boost::hash_value(table_key));
5200 
5201  std::string param_name(*param_->get_name());
5202  boost::algorithm::to_lower(param_name);
5203  const IntLiteral* val_int_literal =
5204  dynamic_cast<const IntLiteral*>(param_->get_value());
5205  if (val_int_literal == nullptr) {
5206  throw std::runtime_error("Table parameters should be integers.");
5207  }
5208  const int64_t param_val = val_int_literal->get_intval();
5209 
5210  const auto param_it = param_map.find(param_name);
5211  if (param_it == param_map.end()) {
5212  throw std::runtime_error(param_name + " is not a settable table parameter.");
5213  }
5214  switch (param_it->second) {
5215  case MaxRollbackEpochs: {
5216  catalog.setMaxRollbackEpochs(td->tableId, param_val);
5217  break;
5218  }
5219  case Epoch: {
5220  catalog.setTableEpoch(catalog.getDatabaseId(), td->tableId, param_val);
5221  break;
5222  }
5223  case MaxRows: {
5224  catalog.setMaxRows(td->tableId, param_val);
5225  break;
5226  }
5227  default: {
5228  UNREACHABLE() << "Unexpected TableParamType value: " << param_it->second
5229  << ", key: " << param_it->first;
5230  }
5231  }
5232 }
5233 
5235  std::string* f,
5236  std::list<NameValueAssign*>* o)
5237  : table_(t), copy_from_source_pattern_(f), success_(true) {
5238  if (o) {
5239  for (const auto e : *o) {
5240  options_.emplace_back(e);
5241  }
5242  delete o;
5243  }
5244 }
5245 
5246 CopyTableStmt::CopyTableStmt(const rapidjson::Value& payload) : success_(true) {
5247  CHECK(payload.HasMember("table"));
5248  table_ = std::make_unique<std::string>(json_str(payload["table"]));
5249 
5250  CHECK(payload.HasMember("filePath"));
5251  std::string fs = json_str(payload["filePath"]);
5252  // strip leading/trailing spaces/quotes/single quotes
5253  boost::algorithm::trim_if(fs, boost::is_any_of(" \"'`"));
5254  copy_from_source_pattern_ = std::make_unique<std::string>(fs);
5255 
5256  parse_options(payload, options_);
5257 }
5258 
5260  auto importer_factory = [](Catalog_Namespace::Catalog& catalog,
5261  const TableDescriptor* td,
5262  const std::string& copy_from_source,
5263  const import_export::CopyParams& copy_params)
5264  -> std::unique_ptr<import_export::AbstractImporter> {
5265  return import_export::create_importer(catalog, td, copy_from_source, copy_params);
5266  };
5267  return execute(session, importer_factory);
5268 }
5269 
5271  const Catalog_Namespace::SessionInfo& session,
5272  const std::function<std::unique_ptr<import_export::AbstractImporter>(
5274  const TableDescriptor*,
5275  const std::string&,
5276  const import_export::CopyParams&)>& importer_factory) {
5277  size_t total_time = 0;
5278 
5279  // Prevent simultaneous import / truncate (see TruncateTableStmt::execute)
5280  const auto execute_read_lock = mapd_shared_lock<mapd_shared_mutex>(
5283 
5284  const TableDescriptor* td{nullptr};
5285  std::unique_ptr<lockmgr::TableSchemaLockContainer<lockmgr::ReadLock>> td_with_lock;
5286  std::unique_ptr<lockmgr::WriteLock> insert_data_lock;
5287 
5288  auto& catalog = session.getCatalog();
5289 
5290  try {
5291  td_with_lock = std::make_unique<lockmgr::TableSchemaLockContainer<lockmgr::ReadLock>>(
5293  catalog, *table_));
5294  td = (*td_with_lock)();
5295  insert_data_lock = std::make_unique<lockmgr::WriteLock>(
5297  } catch (const std::runtime_error& e) {
5298  // noop
5299  // TODO(adb): We're really only interested in whether the table exists or not.
5300  // Create a more refined exception.
5301  }
5302 
5303  // if the table already exists, it's locked, so check access privileges
5304  if (td) {
5305  std::vector<DBObject> privObjects;
5306  DBObject dbObject(*table_, TableDBObjectType);
5307  dbObject.loadKey(catalog);
5309  privObjects.push_back(dbObject);
5310  if (!SysCatalog::instance().checkPrivileges(session.get_currentUser(), privObjects)) {
5311  throw std::runtime_error("Violation of access privileges: user " +
5312  session.get_currentUser().userLoggable() +
5313  " has no insert privileges for table " + *table_ + ".");
5314  }
5315 
5316  // invalidate cached item
5317  Executor::clearExternalCaches(true, td, catalog.getCurrentDB().dbId);
5318  }
5319 
5320  import_export::CopyParams copy_params;
5321  std::vector<std::string> warnings;
5323 
5324  boost::regex non_local_file_regex{R"(^\s*(s3|http|https)://.+)",
5325  boost::regex::extended | boost::regex::icase};
5326  if (!boost::regex_match(*copy_from_source_pattern_, non_local_file_regex) &&
5330  }
5331  // since we'll have not only posix file names but also s3/hdfs/... url
5332  // we do not expand wildcard or check file existence here.
5333  // from here on, copy_from_source contains something which may be a url
5334  // a wildcard of file names, or a sql select statement;
5335  std::string copy_from_source = *copy_from_source_pattern_;
5336 
5337  if (copy_params.source_type == import_export::SourceType::kOdbc) {
5338  copy_params.odbc_sql_select = copy_from_source;
5339  if (copy_params.odbc_sql_order_by.empty()) {
5340  throw std::runtime_error(
5341  "Option \"SQL ORDER BY\" must be specified when copying from an ODBC source.");
5342  }
5343  }
5344 
5345  std::string tr;
5346 
5347  for (auto const& warning : warnings) {
5348  tr += warning + "\n";
5349  }
5350 
5351  if (copy_params.source_type == import_export::SourceType::kGeoFile ||
5353  // geo import
5354  // we do nothing here, except stash the parameters so we can
5355  // do the import when we unwind to the top of the handler
5356  deferred_copy_from_file_name_ = copy_from_source;
5357  deferred_copy_from_copy_params_ = copy_params;
5358  was_deferred_copy_from_ = true;
5359 
5360  // the result string
5361  // @TODO simon.eves put something more useful in here
5362  // except we really can't because we haven't done the import yet!
5363  if (td) {
5364  tr += std::string("Appending geo to table '") + *table_ + std::string("'...");
5365  } else {
5366  tr += std::string("Creating table '") + *table_ +
5367  std::string("' and importing geo...");
5368  }
5369  } else {
5370  if (td) {
5371  CHECK(td_with_lock);
5372 
5373  // regular import
5374  auto importer = importer_factory(catalog, td, copy_from_source, copy_params);
5375  auto start_time = ::toString(std::chrono::system_clock::now());
5377  auto query_session = session.get_session_id();
5378  auto query_str = "COPYING " + td->tableName;
5380  executor->enrollQuerySession(query_session,
5381  query_str,