OmniSciDB  a987f07e93
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
RelAlgTranslator.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "RelAlgTranslator.h"
18 #include "Analyzer/Analyzer.h"
20 #include "DateTimePlusRewrite.h"
21 #include "DateTimeTranslator.h"
23 #include "ExpressionRewrite.h"
26 #include "Parser/ParserNode.h"
27 #include "RelAlgDag.h"
28 #include "ScalarExprVisitor.h"
29 #include "Shared/SqlTypesLayout.h"
30 #include "Shared/likely.h"
31 #include "Shared/scope.h"
32 #include "Shared/thread_count.h"
33 #include "WindowContext.h"
34 
35 #include <future>
36 #include <sstream>
37 
38 extern bool g_enable_watchdog;
39 
41 
42 namespace {
43 
45  const int scale,
46  const int precision) {
47  SQLTypeInfo ti(sql_type, 0, 0, true);
48  if (ti.is_decimal()) {
49  ti.set_scale(scale);
50  ti.set_precision(precision);
51  }
52  return ti;
53 }
54 
55 } // namespace
56 
57 std::pair<std::shared_ptr<Analyzer::Expr>, SQLQualifier>
59  std::shared_ptr<Analyzer::Expr> rhs;
60  SQLQualifier sql_qual{kONE};
61  const auto rex_operator = dynamic_cast<const RexOperator*>(rex_scalar);
62  if (!rex_operator) {
63  return std::make_pair(rhs, sql_qual);
64  }
65  const auto rex_function = dynamic_cast<const RexFunctionOperator*>(rex_operator);
66  const auto qual_str = rex_function ? rex_function->getName() : "";
67  if (qual_str == "PG_ANY"sv || qual_str == "PG_ALL"sv) {
68  CHECK_EQ(size_t(1), rex_function->size());
69  rhs = translateScalarRex(rex_function->getOperand(0));
70  sql_qual = (qual_str == "PG_ANY"sv) ? kANY : kALL;
71  }
72  if (!rhs && rex_operator->getOperator() == kCAST) {
73  CHECK_EQ(size_t(1), rex_operator->size());
74  std::tie(rhs, sql_qual) = getQuantifiedRhs(rex_operator->getOperand(0));
75  }
76  return std::make_pair(rhs, sql_qual);
77 }
78 
79 namespace {
80 
81 std::pair<Datum, bool> datum_from_scalar_tv(const ScalarTargetValue* scalar_tv,
82  const SQLTypeInfo& ti) noexcept {
83  Datum d{0};
84  bool is_null_const{false};
85  switch (ti.get_type()) {
86  case kBOOLEAN: {
87  const auto ival = boost::get<int64_t>(scalar_tv);
88  CHECK(ival);
89  if (*ival == inline_int_null_val(ti)) {
90  is_null_const = true;
91  } else {
92  d.boolval = *ival;
93  }
94  break;
95  }
96  case kTINYINT: {
97  const auto ival = boost::get<int64_t>(scalar_tv);
98  CHECK(ival);
99  if (*ival == inline_int_null_val(ti)) {
100  is_null_const = true;
101  } else {
102  d.tinyintval = *ival;
103  }
104  break;
105  }
106  case kSMALLINT: {
107  const auto ival = boost::get<int64_t>(scalar_tv);
108  CHECK(ival);
109  if (*ival == inline_int_null_val(ti)) {
110  is_null_const = true;
111  } else {
112  d.smallintval = *ival;
113  }
114  break;
115  }
116  case kINT: {
117  const auto ival = boost::get<int64_t>(scalar_tv);
118  CHECK(ival);
119  if (*ival == inline_int_null_val(ti)) {
120  is_null_const = true;
121  } else {
122  d.intval = *ival;
123  }
124  break;
125  }
126  case kDECIMAL:
127  case kNUMERIC:
128  case kBIGINT:
129  case kDATE:
130  case kTIME:
131  case kTIMESTAMP: {
132  const auto ival = boost::get<int64_t>(scalar_tv);
133  CHECK(ival);
134  if (*ival == inline_int_null_val(ti)) {
135  is_null_const = true;
136  } else {
137  d.bigintval = *ival;
138  }
139  break;
140  }
141  case kDOUBLE: {
142  const auto dval = boost::get<double>(scalar_tv);
143  CHECK(dval);
144  if (*dval == inline_fp_null_val(ti)) {
145  is_null_const = true;
146  } else {
147  d.doubleval = *dval;
148  }
149  break;
150  }
151  case kFLOAT: {
152  const auto fval = boost::get<float>(scalar_tv);
153  CHECK(fval);
154  if (*fval == inline_fp_null_val(ti)) {
155  is_null_const = true;
156  } else {
157  d.floatval = *fval;
158  }
159  break;
160  }
161  case kTEXT:
162  case kVARCHAR:
163  case kCHAR: {
164  auto nullable_sptr = boost::get<NullableString>(scalar_tv);
165  CHECK(nullable_sptr);
166  if (boost::get<void*>(nullable_sptr)) {
167  is_null_const = true;
168  } else {
169  auto sptr = boost::get<std::string>(nullable_sptr);
170  d.stringval = new std::string(*sptr);
171  }
172  break;
173  }
174  default:
175  CHECK(false) << "Unhandled type: " << ti.get_type_name();
176  }
177  return {d, is_null_const};
178 }
179 
180 using Handler =
181  std::shared_ptr<Analyzer::Expr> (RelAlgTranslator::*)(RexScalar const*) const;
182 using IndexedHandler = std::pair<std::type_index, Handler>;
183 
184 template <typename... Ts>
185 std::array<IndexedHandler, sizeof...(Ts)> makeHandlers() {
186  return {IndexedHandler{std::type_index(typeid(Ts)),
187  &RelAlgTranslator::translateRexScalar<Ts>}...};
188 }
189 
190 struct ByTypeIndex {
191  std::type_index const type_index_;
192  ByTypeIndex(std::type_info const& type_info)
193  : type_index_(std::type_index(type_info)) {}
194  bool operator()(IndexedHandler const& pair) const { return pair.first == type_index_; }
195 };
196 
197 } // namespace
198 
199 template <>
200 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateRexScalar<RexInput>(
201  RexScalar const* rex) const {
202  return translateInput(static_cast<RexInput const*>(rex));
203 }
204 template <>
205 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateRexScalar<RexLiteral>(
206  RexScalar const* rex) const {
207  return translateLiteral(static_cast<RexLiteral const*>(rex));
208 }
209 template <>
210 std::shared_ptr<Analyzer::Expr>
211 RelAlgTranslator::translateRexScalar<RexWindowFunctionOperator>(
212  RexScalar const* rex) const {
213  return translateWindowFunction(static_cast<RexWindowFunctionOperator const*>(rex));
214 }
215 template <>
216 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateRexScalar<RexFunctionOperator>(
217  RexScalar const* rex) const {
218  return translateFunction(static_cast<RexFunctionOperator const*>(rex));
219 }
220 template <>
221 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateRexScalar<RexOperator>(
222  RexScalar const* rex) const {
223  return translateOper(static_cast<RexOperator const*>(rex));
224 }
225 template <>
226 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateRexScalar<RexCase>(
227  RexScalar const* rex) const {
228  return translateCase(static_cast<RexCase const*>(rex));
229 }
230 template <>
231 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateRexScalar<RexSubQuery>(
232  RexScalar const* rex) const {
233  return translateScalarSubquery(static_cast<RexSubQuery const*>(rex));
234 }
235 
236 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateScalarRex(
237  RexScalar const* rex) const {
238  auto cache_itr = cache_.find(rex);
239  if (cache_itr == cache_.end()) {
240  // Order types from most likely to least as they are compared seriatim.
241  static auto const handlers = makeHandlers<RexInput,
242  RexLiteral,
243  RexOperator,
244  RexCase,
247  RexSubQuery>();
248  static_assert(std::is_trivially_destructible_v<decltype(handlers)>);
249  auto it = std::find_if(handlers.cbegin(), handlers.cend(), ByTypeIndex{typeid(*rex)});
250  CHECK(it != handlers.cend()) << "Unhandled type: " << typeid(*rex).name();
251  // Call handler based on typeid(*rex) and cache the std::shared_ptr<Analyzer::Expr>.
252  auto cached = cache_.emplace(rex, (this->*it->second)(rex));
253  CHECK(cached.second) << "Failed to emplace rex of type " << typeid(*rex).name();
254  cache_itr = cached.first;
255  }
256  return cache_itr->second;
257 }
258 
259 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translate(RexScalar const* rex) const {
260  ScopeGuard clear_cache{[this] { cache_.clear(); }};
261  return translateScalarRex(rex);
262 }
263 
264 namespace {
265 
266 bool is_agg_supported_for_type(const SQLAgg& agg_kind, const SQLTypeInfo& arg_ti) {
267  if ((agg_kind == kMIN || agg_kind == kMAX || agg_kind == kSUM || agg_kind == kAVG) &&
268  !(arg_ti.is_number() || arg_ti.is_boolean() || arg_ti.is_time())) {
269  return false;
270  }
271 
272  return true;
273 }
274 
275 } // namespace
276 
277 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateAggregateRex(
278  const RexAgg* rex,
279  const std::vector<std::shared_ptr<Analyzer::Expr>>& scalar_sources) {
280  SQLAgg agg_kind = rex->getKind();
281  const bool is_distinct = rex->isDistinct();
282  const bool takes_arg{rex->size() > 0};
283  std::shared_ptr<Analyzer::Expr> arg_expr;
284  std::shared_ptr<Analyzer::Expr> arg1; // 2nd aggregate parameter
285  if (takes_arg) {
286  const auto operand = rex->getOperand(0);
287  CHECK_LT(operand, scalar_sources.size());
288  CHECK_LE(rex->size(), 2u);
289  arg_expr = scalar_sources[operand];
290  switch (agg_kind) {
292  if (rex->size() == 2) {
293  auto const const_arg1 = std::dynamic_pointer_cast<Analyzer::Constant>(
294  scalar_sources[rex->getOperand(1)]);
295  if (!const_arg1 || const_arg1->get_type_info().get_type() != kINT ||
296  const_arg1->get_constval().intval < 1 ||
297  const_arg1->get_constval().intval > 100) {
298  throw std::runtime_error(
299  "APPROX_COUNT_DISTINCT's second parameter should be SMALLINT literal "
300  "between "
301  "1 and 100");
302  }
303  arg1 = scalar_sources[rex->getOperand(1)];
304  }
305  break;
306  case kAPPROX_QUANTILE:
307  if (g_cluster) {
308  throw std::runtime_error(
309  "APPROX_PERCENTILE/MEDIAN is not supported in distributed mode at this "
310  "time.");
311  }
312  // If second parameter is not given then APPROX_MEDIAN is assumed.
313  if (rex->size() == 2) {
314  arg1 = std::dynamic_pointer_cast<Analyzer::Constant>(
315  std::dynamic_pointer_cast<Analyzer::Constant>(
316  scalar_sources[rex->getOperand(1)])
317  ->add_cast(SQLTypeInfo(kDOUBLE)));
318  } else {
319 #ifdef _WIN32
320  Datum median;
321  median.doubleval = 0.5;
322 #else
323  constexpr Datum median{.doubleval = 0.5};
324 #endif
325  arg1 = std::make_shared<Analyzer::Constant>(kDOUBLE, false, median);
326  }
327  break;
328  case kMODE:
329  if (g_cluster) {
330  throw std::runtime_error(
331  "MODE is not supported in distributed mode at this time.");
332  }
333  break;
334  case kCOUNT_IF:
335  if (rex->isDistinct()) {
336  throw std::runtime_error(
337  "Currently, COUNT_IF function does not support DISTINCT qualifier.");
338  }
339  break;
340  case kSUM_IF:
341  arg1 = scalar_sources[rex->getOperand(1)];
342  if (arg1->get_type_info().get_type() != kBOOLEAN) {
343  throw std::runtime_error("Conditional argument must be a boolean expression.");
344  }
345  break;
346  default:
347  break;
348  }
349  const auto& arg_ti = arg_expr->get_type_info();
350  if (!is_agg_supported_for_type(agg_kind, arg_ti)) {
351  throw std::runtime_error("Aggregate on " + arg_ti.get_type_name() +
352  " is not supported yet.");
353  }
354  }
355  const auto agg_ti = get_agg_type(agg_kind, arg_expr.get());
356  return makeExpr<Analyzer::AggExpr>(agg_ti, agg_kind, arg_expr, is_distinct, arg1);
357 }
358 
359 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateLiteral(
360  const RexLiteral* rex_literal) {
361  auto lit_ti = build_type_info(
362  rex_literal->getType(), rex_literal->getScale(), rex_literal->getPrecision());
363  auto target_ti = build_type_info(rex_literal->getTargetType(),
364  rex_literal->getTargetScale(),
365  rex_literal->getTargetPrecision());
366  switch (rex_literal->getType()) {
367  case kINT:
368  case kBIGINT: {
369  Datum d;
370  d.bigintval = rex_literal->getVal<int64_t>();
371  return makeExpr<Analyzer::Constant>(rex_literal->getType(), false, d);
372  }
373  case kDECIMAL: {
374  const auto val = rex_literal->getVal<int64_t>();
375  const int precision = rex_literal->getPrecision();
376  const int scale = rex_literal->getScale();
377  if (target_ti.is_fp() && !scale) {
378  return make_fp_constant(val, target_ti);
379  }
380  auto lit_expr = scale ? Parser::FixedPtLiteral::analyzeValue(val, scale, precision)
382  return lit_ti != target_ti ? lit_expr->add_cast(target_ti) : lit_expr;
383  }
384  case kTEXT: {
385  return Parser::StringLiteral::analyzeValue(rex_literal->getVal<std::string>(),
386  false);
387  }
388  case kBOOLEAN: {
389  Datum d;
390  d.boolval = rex_literal->getVal<bool>();
391  return makeExpr<Analyzer::Constant>(kBOOLEAN, false, d);
392  }
393  case kDOUBLE: {
394  Datum d;
395  d.doubleval = rex_literal->getVal<double>();
396  auto lit_expr =
397  makeExpr<Analyzer::Constant>(SQLTypeInfo(rex_literal->getType(),
398  rex_literal->getPrecision(),
399  rex_literal->getScale(),
400  false),
401  false,
402  d);
403  return lit_ti != target_ti ? lit_expr->add_cast(target_ti) : lit_expr;
404  }
405  case kINTERVAL_DAY_TIME:
406  case kINTERVAL_YEAR_MONTH: {
407  Datum d;
408  d.bigintval = rex_literal->getVal<int64_t>();
409  return makeExpr<Analyzer::Constant>(rex_literal->getType(), false, d);
410  }
411  case kTIME:
412  case kTIMESTAMP: {
413  Datum d;
414  d.bigintval =
415  rex_literal->getType() == kTIMESTAMP && rex_literal->getPrecision() > 0
416  ? rex_literal->getVal<int64_t>()
417  : rex_literal->getVal<int64_t>() / 1000;
418  return makeExpr<Analyzer::Constant>(
419  SQLTypeInfo(rex_literal->getType(), rex_literal->getPrecision(), 0, false),
420  false,
421  d);
422  }
423  case kDATE: {
424  Datum d;
425  d.bigintval = rex_literal->getVal<int64_t>() * 24 * 3600;
426  return makeExpr<Analyzer::Constant>(rex_literal->getType(), false, d);
427  }
428  case kNULLT: {
429  if (target_ti.is_array()) {
431  // defaulting to valid sub-type for convenience
432  target_ti.set_subtype(kBOOLEAN);
433  return makeExpr<Analyzer::ArrayExpr>(target_ti, args, true);
434  }
435  return makeExpr<Analyzer::Constant>(rex_literal->getTargetType(), true, Datum{0});
436  }
437  default: {
438  LOG(FATAL) << "Unexpected literal type " << lit_ti.get_type_name();
439  }
440  }
441  return nullptr;
442 }
443 
444 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateScalarSubquery(
445  const RexSubQuery* rex_subquery) const {
446  if (just_explain_) {
447  throw std::runtime_error("EXPLAIN is not supported with sub-queries");
448  }
449  CHECK(rex_subquery);
450  auto result = rex_subquery->getExecutionResult();
451  auto row_set = result->getRows();
452  const size_t row_count = row_set->rowCount();
453  if (row_count > size_t(1)) {
454  throw std::runtime_error("Scalar sub-query returned multiple rows");
455  }
456  if (row_count == size_t(0)) {
457  if (row_set->isValidationOnlyRes()) {
458  Datum d{0};
459  return makeExpr<Analyzer::Constant>(rex_subquery->getType(), false, d);
460  }
461  throw std::runtime_error("Scalar sub-query returned no results");
462  }
463  CHECK_EQ(row_count, size_t(1));
464  row_set->moveToBegin();
465  auto first_row = row_set->getNextRow(false, false);
466  CHECK_EQ(first_row.size(), size_t(1));
467  auto scalar_tv = boost::get<ScalarTargetValue>(&first_row[0]);
468  auto ti = rex_subquery->getType();
469  if (ti.is_string()) {
470  throw std::runtime_error("Scalar sub-queries which return strings not supported");
471  }
472  Datum d{0};
473  bool is_null_const{false};
474  std::tie(d, is_null_const) = datum_from_scalar_tv(scalar_tv, ti);
475  return makeExpr<Analyzer::Constant>(ti, is_null_const, d);
476 }
477 
478 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateInput(
479  const RexInput* rex_input) const {
480  const auto source = rex_input->getSourceNode();
481  const auto it_rte_idx = input_to_nest_level_.find(source);
482  CHECK(it_rte_idx != input_to_nest_level_.end())
483  << "Not found in input_to_nest_level_, source="
484  << source->toString(RelRexToStringConfig::defaults());
485  const int rte_idx = it_rte_idx->second;
486  const auto scan_source = dynamic_cast<const RelScan*>(source);
487  const auto& in_metainfo = source->getOutputMetainfo();
488  if (scan_source) {
489  // We're at leaf (scan) level and not supposed to have input metadata,
490  // the name and type information come directly from the catalog.
491  CHECK(in_metainfo.empty());
492  const auto table_desc = scan_source->getTableDescriptor();
493  const auto cd =
494  cat_.getMetadataForColumnBySpi(table_desc->tableId, rex_input->getIndex() + 1);
495  CHECK(cd);
496  auto col_ti = cd->columnType;
497  if (col_ti.is_string()) {
498  col_ti.set_type(kTEXT);
499  }
500  if (cd->isVirtualCol) {
501  // TODO(alex): remove at some point, we only need this fixup for backwards
502  // compatibility with old imported data
503  CHECK_EQ("rowid", cd->columnName);
504  col_ti.set_size(8);
505  }
506  CHECK_LE(static_cast<size_t>(rte_idx), join_types_.size());
507  if (rte_idx > 0 && join_types_[rte_idx - 1] == JoinType::LEFT) {
508  col_ti.set_notnull(false);
509  }
510  return std::make_shared<Analyzer::ColumnVar>(
511  col_ti, table_desc->tableId, cd->columnId, rte_idx);
512  }
513  CHECK(!in_metainfo.empty()) << "for "
514  << source->toString(RelRexToStringConfig::defaults());
515  CHECK_GE(rte_idx, 0);
516  const size_t col_id = rex_input->getIndex();
517  CHECK_LT(col_id, in_metainfo.size());
518  auto col_ti = in_metainfo[col_id].get_type_info();
519 
520  if (join_types_.size() > 0) {
521  CHECK_LE(static_cast<size_t>(rte_idx), join_types_.size());
522  if (rte_idx > 0 && join_types_[rte_idx - 1] == JoinType::LEFT) {
523  col_ti.set_notnull(false);
524  }
525  }
526 
527  return std::make_shared<Analyzer::ColumnVar>(col_ti, -source->getId(), col_id, rte_idx);
528 }
529 
530 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateUoper(
531  const RexOperator* rex_operator) const {
532  CHECK_EQ(size_t(1), rex_operator->size());
533  const auto operand_expr = translateScalarRex(rex_operator->getOperand(0));
534  const auto sql_op = rex_operator->getOperator();
535  switch (sql_op) {
536  case kCAST: {
537  const auto& target_ti = rex_operator->getType();
538  CHECK_NE(kNULLT, target_ti.get_type());
539  const auto& operand_ti = operand_expr->get_type_info();
540  if (operand_ti.is_string() && target_ti.is_string()) {
541  return operand_expr;
542  }
543  if (target_ti.is_time() ||
544  operand_ti
545  .is_string()) { // TODO(alex): check and unify with the rest of the cases
546  // Do not propogate encoding on small dates
547  return target_ti.is_date_in_days()
548  ? operand_expr->add_cast(SQLTypeInfo(kDATE, false))
549  : operand_expr->add_cast(target_ti);
550  }
551  if (!operand_ti.is_string() && target_ti.is_string()) {
552  return operand_expr->add_cast(target_ti);
553  }
554  return std::make_shared<Analyzer::UOper>(target_ti, false, sql_op, operand_expr);
555  }
556  case kENCODE_TEXT: {
557  const auto& target_ti = rex_operator->getType();
558  CHECK_NE(kNULLT, target_ti.get_type());
559  const auto& operand_ti = operand_expr->get_type_info();
560  CHECK(operand_ti.is_string());
561  if (operand_ti.is_dict_encoded_string()) {
562  // No cast needed
563  return operand_expr;
564  }
565  if (operand_expr->get_num_column_vars(true) == 0UL) {
566  return operand_expr;
567  }
568  if (g_cluster) {
569  throw std::runtime_error(
570  "ENCODE_TEXT is not currently supported in distributed mode at this time.");
571  }
572  SQLTypeInfo casted_target_ti = operand_ti;
573  casted_target_ti.set_type(kTEXT);
574  casted_target_ti.set_compression(kENCODING_DICT);
575  casted_target_ti.set_comp_param(TRANSIENT_DICT_ID);
576  casted_target_ti.set_fixed_size();
577  return makeExpr<Analyzer::UOper>(
578  casted_target_ti, operand_expr->get_contains_agg(), kCAST, operand_expr);
579  }
580  case kNOT:
581  case kISNULL: {
582  return std::make_shared<Analyzer::UOper>(kBOOLEAN, sql_op, operand_expr);
583  }
584  case kISNOTNULL: {
585  auto is_null = std::make_shared<Analyzer::UOper>(kBOOLEAN, kISNULL, operand_expr);
586  return std::make_shared<Analyzer::UOper>(kBOOLEAN, kNOT, is_null);
587  }
588  case kMINUS: {
589  const auto& ti = operand_expr->get_type_info();
590  return std::make_shared<Analyzer::UOper>(ti, false, kUMINUS, operand_expr);
591  }
592  case kUNNEST: {
593  const auto& ti = operand_expr->get_type_info();
594  CHECK(ti.is_array());
595  return makeExpr<Analyzer::UOper>(ti.get_elem_type(), false, kUNNEST, operand_expr);
596  }
597  default:
598  CHECK(false);
599  }
600  return nullptr;
601 }
602 
603 namespace {
604 
605 std::shared_ptr<Analyzer::Expr> get_in_values_expr(std::shared_ptr<Analyzer::Expr> arg,
606  const ResultSet& val_set) {
608  return nullptr;
609  }
610  if (val_set.rowCount() > 5000000 && g_enable_watchdog) {
611  throw std::runtime_error(
612  "Unable to handle 'expr IN (subquery)', subquery returned 5M+ rows.");
613  }
614  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
615  const size_t fetcher_count = cpu_threads();
616  std::vector<std::list<std::shared_ptr<Analyzer::Expr>>> expr_set(
617  fetcher_count, std::list<std::shared_ptr<Analyzer::Expr>>());
618  std::vector<std::future<void>> fetcher_threads;
619  const auto& ti = arg->get_type_info();
620  const auto entry_count = val_set.entryCount();
621  for (size_t i = 0,
622  start_entry = 0,
623  stride = (entry_count + fetcher_count - 1) / fetcher_count;
624  i < fetcher_count && start_entry < entry_count;
625  ++i, start_entry += stride) {
626  const auto end_entry = std::min(start_entry + stride, entry_count);
627  fetcher_threads.push_back(std::async(
629  [&](std::list<std::shared_ptr<Analyzer::Expr>>& in_vals,
630  const size_t start,
631  const size_t end) {
632  for (auto index = start; index < end; ++index) {
633  auto row = val_set.getRowAt(index);
634  if (row.empty()) {
635  continue;
636  }
637  auto scalar_tv = boost::get<ScalarTargetValue>(&row[0]);
638  Datum d{0};
639  bool is_null_const{false};
640  std::tie(d, is_null_const) = datum_from_scalar_tv(scalar_tv, ti);
641  if (ti.is_string() && ti.get_compression() != kENCODING_NONE) {
642  auto ti_none_encoded = ti;
643  ti_none_encoded.set_compression(kENCODING_NONE);
644  auto none_encoded_string =
645  makeExpr<Analyzer::Constant>(ti, is_null_const, d);
646  auto dict_encoded_string = std::make_shared<Analyzer::UOper>(
647  ti, false, kCAST, none_encoded_string);
648  in_vals.push_back(dict_encoded_string);
649  } else {
650  in_vals.push_back(makeExpr<Analyzer::Constant>(ti, is_null_const, d));
651  }
652  }
653  },
654  std::ref(expr_set[i]),
655  start_entry,
656  end_entry));
657  }
658  for (auto& child : fetcher_threads) {
659  child.get();
660  }
661 
662  val_set.moveToBegin();
663  for (auto& exprs : expr_set) {
664  value_exprs.splice(value_exprs.end(), exprs);
665  }
666  return makeExpr<Analyzer::InValues>(arg, value_exprs);
667 }
668 
669 } // namespace
670 
671 // Creates an Analyzer expression for an IN subquery which subsequently goes through the
672 // regular Executor::codegen() mechanism. The creation of the expression out of
673 // subquery's result set is parallelized whenever possible. In addition, take advantage
674 // of additional information that elements in the right hand side are constants; see
675 // getInIntegerSetExpr().
676 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateInOper(
677  const RexOperator* rex_operator) const {
678  if (just_explain_) {
679  throw std::runtime_error("EXPLAIN is not supported with sub-queries");
680  }
681  CHECK(rex_operator->size() == 2);
682  const auto lhs = translateScalarRex(rex_operator->getOperand(0));
683  const auto rhs = rex_operator->getOperand(1);
684  const auto rex_subquery = dynamic_cast<const RexSubQuery*>(rhs);
685  CHECK(rex_subquery);
686  auto ti = lhs->get_type_info();
687  auto result = rex_subquery->getExecutionResult();
688  CHECK(result);
689  auto& row_set = result->getRows();
690  CHECK_EQ(size_t(1), row_set->colCount());
691  const auto& rhs_ti = row_set->getColType(0);
692  if (rhs_ti.get_type() != ti.get_type()) {
693  throw std::runtime_error(
694  "The two sides of the IN operator must have the same type; found " +
695  ti.get_type_name() + " and " + rhs_ti.get_type_name());
696  }
697  row_set->moveToBegin();
698  if (row_set->entryCount() > 10000) {
699  std::shared_ptr<Analyzer::Expr> expr;
700  if ((ti.is_integer() || (ti.is_string() && ti.get_compression() == kENCODING_DICT)) &&
701  !row_set->getQueryMemDesc().didOutputColumnar()) {
702  expr = getInIntegerSetExpr(lhs, *row_set);
703  // Handle the highly unlikely case when the InIntegerSet ended up being tiny.
704  // Just let it fall through the usual InValues path at the end of this method,
705  // its codegen knows to use inline comparisons for few values.
706  if (expr && std::static_pointer_cast<Analyzer::InIntegerSet>(expr)
707  ->get_value_list()
708  .size() <= 100) {
709  expr = nullptr;
710  }
711  } else {
712  expr = get_in_values_expr(lhs, *row_set);
713  }
714  if (expr) {
715  return expr;
716  }
717  }
718  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
719  while (true) {
720  auto row = row_set->getNextRow(true, false);
721  if (row.empty()) {
722  break;
723  }
724  if (g_enable_watchdog && value_exprs.size() >= 10000) {
725  throw std::runtime_error(
726  "Unable to handle 'expr IN (subquery)', subquery returned 10000+ rows.");
727  }
728  auto scalar_tv = boost::get<ScalarTargetValue>(&row[0]);
729  Datum d{0};
730  bool is_null_const{false};
731  std::tie(d, is_null_const) = datum_from_scalar_tv(scalar_tv, ti);
732  if (ti.is_string() && ti.get_compression() != kENCODING_NONE) {
733  auto ti_none_encoded = ti;
734  ti_none_encoded.set_compression(kENCODING_NONE);
735  auto none_encoded_string = makeExpr<Analyzer::Constant>(ti, is_null_const, d);
736  auto dict_encoded_string =
737  std::make_shared<Analyzer::UOper>(ti, false, kCAST, none_encoded_string);
738  value_exprs.push_back(dict_encoded_string);
739  } else {
740  value_exprs.push_back(makeExpr<Analyzer::Constant>(ti, is_null_const, d));
741  }
742  }
743  return makeExpr<Analyzer::InValues>(lhs, value_exprs);
744 }
745 
746 namespace {
747 
748 const size_t g_max_integer_set_size{1 << 25};
749 
751  std::vector<int64_t>& in_vals,
752  std::atomic<size_t>& total_in_vals_count,
753  const ResultSet* values_rowset,
754  const std::pair<int64_t, int64_t> values_rowset_slice,
755  const StringDictionaryProxy* source_dict,
756  const StringDictionaryProxy* dest_dict,
757  const int64_t needle_null_val) {
758  CHECK(in_vals.empty());
759  bool dicts_are_equal = source_dict == dest_dict;
760  for (auto index = values_rowset_slice.first; index < values_rowset_slice.second;
761  ++index) {
762  const auto row = values_rowset->getOneColRow(index);
763  if (UNLIKELY(!row.valid)) {
764  continue;
765  }
766  if (dicts_are_equal) {
767  in_vals.push_back(row.value);
768  } else {
769  const int string_id =
770  row.value == needle_null_val
771  ? needle_null_val
772  : dest_dict->getIdOfString(source_dict->getString(row.value));
773  if (string_id != StringDictionary::INVALID_STR_ID) {
774  in_vals.push_back(string_id);
775  }
776  }
777  if (UNLIKELY(g_enable_watchdog && (in_vals.size() & 1023) == 0 &&
778  total_in_vals_count.fetch_add(1024) >= g_max_integer_set_size)) {
779  throw std::runtime_error(
780  "Unable to handle 'expr IN (subquery)', subquery returned 30M+ rows.");
781  }
782  }
783 }
784 
785 void fill_integer_in_vals(std::vector<int64_t>& in_vals,
786  std::atomic<size_t>& total_in_vals_count,
787  const ResultSet* values_rowset,
788  const std::pair<int64_t, int64_t> values_rowset_slice) {
789  CHECK(in_vals.empty());
790  for (auto index = values_rowset_slice.first; index < values_rowset_slice.second;
791  ++index) {
792  const auto row = values_rowset->getOneColRow(index);
793  if (row.valid) {
794  in_vals.push_back(row.value);
795  if (UNLIKELY(g_enable_watchdog && (in_vals.size() & 1023) == 0 &&
796  total_in_vals_count.fetch_add(1024) >= g_max_integer_set_size)) {
797  throw std::runtime_error(
798  "Unable to handle 'expr IN (subquery)', subquery returned 30M+ rows.");
799  }
800  }
801  }
802 }
803 
804 // Multi-node counterpart of the other version. Saves round-trips, which is crucial
805 // for a big right-hand side result. It only handles physical string dictionary ids,
806 // therefore it won't be able to handle a right-hand side sub-query with a CASE
807 // returning literals on some branches. That case isn't hard too handle either, but
808 // it's not clear it's actually important in practice.
809 // RelAlgTranslator::getInIntegerSetExpr makes sure, by checking the encodings, that
810 // this function isn't called in such cases.
812  std::vector<int64_t>& in_vals,
813  std::atomic<size_t>& total_in_vals_count,
814  const ResultSet* values_rowset,
815  const std::pair<int64_t, int64_t> values_rowset_slice,
816  const std::vector<LeafHostInfo>& leaf_hosts,
817  const DictRef source_dict_ref,
818  const DictRef dest_dict_ref,
819  const int32_t dest_generation,
820  const int64_t needle_null_val) {
821  CHECK(in_vals.empty());
822  std::vector<int32_t> source_ids;
823  source_ids.reserve(values_rowset->entryCount());
824  bool has_nulls = false;
825  if (source_dict_ref == dest_dict_ref) {
826  in_vals.reserve(values_rowset_slice.second - values_rowset_slice.first +
827  1); // Add 1 to cover interval
828  for (auto index = values_rowset_slice.first; index < values_rowset_slice.second;
829  ++index) {
830  const auto row = values_rowset->getOneColRow(index);
831  if (!row.valid) {
832  continue;
833  }
834  if (row.value != needle_null_val) {
835  in_vals.push_back(row.value);
836  if (UNLIKELY(g_enable_watchdog && (in_vals.size() & 1023) == 0 &&
837  total_in_vals_count.fetch_add(1024) >= g_max_integer_set_size)) {
838  throw std::runtime_error(
839  "Unable to handle 'expr IN (subquery)', subquery returned 30M+ rows.");
840  }
841  } else {
842  has_nulls = true;
843  }
844  }
845  if (has_nulls) {
846  in_vals.push_back(
847  needle_null_val); // we've deduped null values as an optimization, although
848  // this is not required by consumer
849  }
850  return;
851  }
852  // Code path below is for when dictionaries are not shared
853  for (auto index = values_rowset_slice.first; index < values_rowset_slice.second;
854  ++index) {
855  const auto row = values_rowset->getOneColRow(index);
856  if (row.valid) {
857  if (row.value != needle_null_val) {
858  source_ids.push_back(row.value);
859  } else {
860  has_nulls = true;
861  }
862  }
863  }
864  std::vector<int32_t> dest_ids;
865  translate_string_ids(dest_ids,
866  leaf_hosts.front(),
867  dest_dict_ref,
868  source_ids,
869  source_dict_ref,
870  dest_generation);
871  CHECK_EQ(dest_ids.size(), source_ids.size());
872  in_vals.reserve(dest_ids.size() + (has_nulls ? 1 : 0));
873  if (has_nulls) {
874  in_vals.push_back(needle_null_val);
875  }
876  for (const int32_t dest_id : dest_ids) {
877  if (dest_id != StringDictionary::INVALID_STR_ID) {
878  in_vals.push_back(dest_id);
879  if (UNLIKELY(g_enable_watchdog && (in_vals.size() & 1023) == 0 &&
880  total_in_vals_count.fetch_add(1024) >= g_max_integer_set_size)) {
881  throw std::runtime_error(
882  "Unable to handle 'expr IN (subquery)', subquery returned 30M+ rows.");
883  }
884  }
885  }
886 }
887 
888 } // namespace
889 
890 // The typical IN subquery involves either dictionary-encoded strings or integers.
891 // Analyzer::InValues is a very heavy representation of the right hand side of such
892 // a query since we already know the right hand would be a list of Analyzer::Constant
893 // shared pointers. We can avoid the big overhead of each Analyzer::Constant and the
894 // refcounting associated with shared pointers by creating an abbreviated InIntegerSet
895 // representation of the IN expression which takes advantage of the this information.
896 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::getInIntegerSetExpr(
897  std::shared_ptr<Analyzer::Expr> arg,
898  const ResultSet& val_set) const {
900  return nullptr;
901  }
902  std::vector<int64_t> value_exprs;
903  const size_t fetcher_count = cpu_threads();
904  std::vector<std::vector<int64_t>> expr_set(fetcher_count);
905  std::vector<std::future<void>> fetcher_threads;
906  const auto& arg_type = arg->get_type_info();
907  const auto entry_count = val_set.entryCount();
908  CHECK_EQ(size_t(1), val_set.colCount());
909  const auto& col_type = val_set.getColType(0);
910  if (g_cluster && arg_type.is_string() &&
911  (col_type.get_comp_param() <= 0 || arg_type.get_comp_param() <= 0)) {
912  // Skip this case for now, see comment for fill_dictionary_encoded_in_vals.
913  return nullptr;
914  }
915  std::atomic<size_t> total_in_vals_count{0};
916  for (size_t i = 0,
917  start_entry = 0,
918  stride = (entry_count + fetcher_count - 1) / fetcher_count;
919  i < fetcher_count && start_entry < entry_count;
920  ++i, start_entry += stride) {
921  expr_set[i].reserve(entry_count / fetcher_count);
922  const auto end_entry = std::min(start_entry + stride, entry_count);
923  if (arg_type.is_string()) {
924  CHECK_EQ(kENCODING_DICT, arg_type.get_compression());
925  // const int32_t dest_dict_id = arg_type.get_comp_param();
926  // const int32_t source_dict_id = col_type.get_comp_param();
927  const DictRef dest_dict_ref(arg_type.get_comp_param(), cat_.getDatabaseId());
928  const DictRef source_dict_ref(col_type.get_comp_param(), cat_.getDatabaseId());
929  const auto dd = executor_->getStringDictionaryProxy(
930  arg_type.get_comp_param(), val_set.getRowSetMemOwner(), true);
931  const auto sd = executor_->getStringDictionaryProxy(
932  col_type.get_comp_param(), val_set.getRowSetMemOwner(), true);
933  CHECK(sd);
934  const auto needle_null_val = inline_int_null_val(arg_type);
935  fetcher_threads.push_back(std::async(
937  [this,
938  &val_set,
939  &total_in_vals_count,
940  sd,
941  dd,
942  source_dict_ref,
943  dest_dict_ref,
944  needle_null_val](
945  std::vector<int64_t>& in_vals, const size_t start, const size_t end) {
946  if (g_cluster) {
947  CHECK_GE(dd->getGeneration(), 0);
949  total_in_vals_count,
950  &val_set,
951  {start, end},
953  source_dict_ref,
954  dest_dict_ref,
955  dd->getGeneration(),
956  needle_null_val);
957  } else {
959  total_in_vals_count,
960  &val_set,
961  {start, end},
962  sd,
963  dd,
964  needle_null_val);
965  }
966  },
967  std::ref(expr_set[i]),
968  start_entry,
969  end_entry));
970  } else {
971  CHECK(arg_type.is_integer());
972  fetcher_threads.push_back(std::async(
974  [&val_set, &total_in_vals_count](
975  std::vector<int64_t>& in_vals, const size_t start, const size_t end) {
976  fill_integer_in_vals(in_vals, total_in_vals_count, &val_set, {start, end});
977  },
978  std::ref(expr_set[i]),
979  start_entry,
980  end_entry));
981  }
982  }
983  for (auto& child : fetcher_threads) {
984  child.get();
985  }
986 
987  val_set.moveToBegin();
988  value_exprs.reserve(entry_count);
989  for (auto& exprs : expr_set) {
990  value_exprs.insert(value_exprs.end(), exprs.begin(), exprs.end());
991  }
992  return makeExpr<Analyzer::InIntegerSet>(
993  arg, value_exprs, arg_type.get_notnull() && col_type.get_notnull());
994 }
995 
996 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateOper(
997  const RexOperator* rex_operator) const {
998  CHECK_GT(rex_operator->size(), size_t(0));
999  if (rex_operator->size() == 1) {
1000  return translateUoper(rex_operator);
1001  }
1002  const auto sql_op = rex_operator->getOperator();
1003  if (sql_op == kIN) {
1004  return translateInOper(rex_operator);
1005  }
1006  if (sql_op == kMINUS || sql_op == kPLUS) {
1007  auto date_plus_minus = translateDatePlusMinus(rex_operator);
1008  if (date_plus_minus) {
1009  return date_plus_minus;
1010  }
1011  }
1012  if (sql_op == kOVERLAPS) {
1013  return translateOverlapsOper(rex_operator);
1014  } else if (IS_COMPARISON(sql_op)) {
1015  auto geo_comp = translateGeoComparison(rex_operator);
1016  if (geo_comp) {
1017  return geo_comp;
1018  }
1019  }
1020  auto lhs = translateScalarRex(rex_operator->getOperand(0));
1021  for (size_t i = 1; i < rex_operator->size(); ++i) {
1022  std::shared_ptr<Analyzer::Expr> rhs;
1023  SQLQualifier sql_qual{kONE};
1024  const auto rhs_op = rex_operator->getOperand(i);
1025  std::tie(rhs, sql_qual) = getQuantifiedRhs(rhs_op);
1026  if (!rhs) {
1027  rhs = translateScalarRex(rhs_op);
1028  }
1029  CHECK(rhs);
1030 
1031  // Pass in executor to get string proxy info if cast needed between
1032  // string columns
1033  lhs = Parser::OperExpr::normalize(sql_op, sql_qual, lhs, rhs, executor_);
1034  }
1035  return lhs;
1036 }
1037 
1038 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateOverlapsOper(
1039  const RexOperator* rex_operator) const {
1040  const auto sql_op = rex_operator->getOperator();
1041  CHECK(sql_op == kOVERLAPS);
1042 
1043  const auto lhs = translateScalarRex(rex_operator->getOperand(0));
1044  const auto lhs_ti = lhs->get_type_info();
1045  if (lhs_ti.is_geometry()) {
1046  return translateGeoOverlapsOper(rex_operator);
1047  } else {
1048  throw std::runtime_error(
1049  "Overlaps equivalence is currently only supported for geospatial types");
1050  }
1051 }
1052 
1053 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateCase(
1054  const RexCase* rex_case) const {
1055  std::shared_ptr<Analyzer::Expr> else_expr;
1056  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
1057  expr_list;
1058  for (size_t i = 0; i < rex_case->branchCount(); ++i) {
1059  const auto when_expr = translateScalarRex(rex_case->getWhen(i));
1060  const auto then_expr = translateScalarRex(rex_case->getThen(i));
1061  expr_list.emplace_back(when_expr, then_expr);
1062  }
1063  if (rex_case->getElse()) {
1064  else_expr = translateScalarRex(rex_case->getElse());
1065  }
1066  return Parser::CaseExpr::normalize(expr_list, else_expr, executor_);
1067 }
1068 
1069 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateWidthBucket(
1070  const RexFunctionOperator* rex_function) const {
1071  CHECK(rex_function->size() == 4);
1072  auto target_value = translateScalarRex(rex_function->getOperand(0));
1073  auto lower_bound = translateScalarRex(rex_function->getOperand(1));
1074  auto upper_bound = translateScalarRex(rex_function->getOperand(2));
1075  auto partition_count = translateScalarRex(rex_function->getOperand(3));
1076  if (!partition_count->get_type_info().is_integer()) {
1077  throw std::runtime_error(
1078  "PARTITION_COUNT expression of width_bucket function expects an integer type.");
1079  }
1080  auto check_numeric_type =
1081  [](const std::string& col_name, const Analyzer::Expr* expr, bool allow_null_type) {
1082  if (expr->get_type_info().get_type() == kNULLT) {
1083  if (!allow_null_type) {
1084  throw std::runtime_error(
1085  col_name + " expression of width_bucket function expects non-null type.");
1086  }
1087  return;
1088  }
1089  if (!expr->get_type_info().is_number()) {
1090  throw std::runtime_error(
1091  col_name + " expression of width_bucket function expects a numeric type.");
1092  }
1093  };
1094  // target value may have null value
1095  check_numeric_type("TARGET_VALUE", target_value.get(), true);
1096  check_numeric_type("LOWER_BOUND", lower_bound.get(), false);
1097  check_numeric_type("UPPER_BOUND", upper_bound.get(), false);
1098 
1099  auto cast_to_double_if_necessary = [](std::shared_ptr<Analyzer::Expr> arg) {
1100  const auto& arg_ti = arg->get_type_info();
1101  if (arg_ti.get_type() != kDOUBLE) {
1102  const auto& double_ti = SQLTypeInfo(kDOUBLE, arg_ti.get_notnull());
1103  return arg->add_cast(double_ti);
1104  }
1105  return arg;
1106  };
1107  target_value = cast_to_double_if_necessary(target_value);
1108  lower_bound = cast_to_double_if_necessary(lower_bound);
1109  upper_bound = cast_to_double_if_necessary(upper_bound);
1110  return makeExpr<Analyzer::WidthBucketExpr>(
1111  target_value, lower_bound, upper_bound, partition_count);
1112 }
1113 
1114 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateLike(
1115  const RexFunctionOperator* rex_function) const {
1116  CHECK(rex_function->size() == 2 || rex_function->size() == 3);
1117  const auto arg = translateScalarRex(rex_function->getOperand(0));
1118  const auto like = translateScalarRex(rex_function->getOperand(1));
1119  if (!std::dynamic_pointer_cast<const Analyzer::Constant>(like)) {
1120  throw std::runtime_error("The matching pattern must be a literal.");
1121  }
1122  const auto escape = (rex_function->size() == 3)
1123  ? translateScalarRex(rex_function->getOperand(2))
1124  : nullptr;
1125  const bool is_ilike = rex_function->getName() == "PG_ILIKE"sv;
1126  return Parser::LikeExpr::get(arg, like, escape, is_ilike, false);
1127 }
1128 
1129 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateRegexp(
1130  const RexFunctionOperator* rex_function) const {
1131  CHECK(rex_function->size() == 2 || rex_function->size() == 3);
1132  const auto arg = translateScalarRex(rex_function->getOperand(0));
1133  const auto pattern = translateScalarRex(rex_function->getOperand(1));
1134  if (!std::dynamic_pointer_cast<const Analyzer::Constant>(pattern)) {
1135  throw std::runtime_error("The matching pattern must be a literal.");
1136  }
1137  const auto escape = (rex_function->size() == 3)
1138  ? translateScalarRex(rex_function->getOperand(2))
1139  : nullptr;
1140  return Parser::RegexpExpr::get(arg, pattern, escape, false);
1141 }
1142 
1143 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateLikely(
1144  const RexFunctionOperator* rex_function) const {
1145  CHECK(rex_function->size() == 1);
1146  const auto arg = translateScalarRex(rex_function->getOperand(0));
1147  return makeExpr<Analyzer::LikelihoodExpr>(arg, 0.9375);
1148 }
1149 
1150 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateUnlikely(
1151  const RexFunctionOperator* rex_function) const {
1152  CHECK(rex_function->size() == 1);
1153  const auto arg = translateScalarRex(rex_function->getOperand(0));
1154  return makeExpr<Analyzer::LikelihoodExpr>(arg, 0.0625);
1155 }
1156 
1157 namespace {
1158 
1160  const std::shared_ptr<Analyzer::Constant> literal_expr) {
1161  if (!literal_expr || literal_expr->get_is_null()) {
1162  throw std::runtime_error("The 'DatePart' argument must be a not 'null' literal.");
1163  }
1164 }
1165 
1166 } // namespace
1167 
1168 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateExtract(
1169  const RexFunctionOperator* rex_function) const {
1170  CHECK_EQ(size_t(2), rex_function->size());
1171  const auto timeunit = translateScalarRex(rex_function->getOperand(0));
1172  const auto timeunit_lit = std::dynamic_pointer_cast<Analyzer::Constant>(timeunit);
1174  const auto from_expr = translateScalarRex(rex_function->getOperand(1));
1175  const bool is_date_trunc = rex_function->getName() == "PG_DATE_TRUNC"sv;
1176  if (is_date_trunc) {
1177  return DateTruncExpr::generate(from_expr, *timeunit_lit->get_constval().stringval);
1178  } else {
1179  return ExtractExpr::generate(from_expr, *timeunit_lit->get_constval().stringval);
1180  }
1181 }
1182 
1183 namespace {
1184 
1185 std::shared_ptr<Analyzer::Constant> makeNumericConstant(const SQLTypeInfo& ti,
1186  const long val) {
1187  CHECK(ti.is_number());
1188  Datum datum{0};
1189  switch (ti.get_type()) {
1190  case kTINYINT: {
1191  datum.tinyintval = val;
1192  break;
1193  }
1194  case kSMALLINT: {
1195  datum.smallintval = val;
1196  break;
1197  }
1198  case kINT: {
1199  datum.intval = val;
1200  break;
1201  }
1202  case kBIGINT: {
1203  datum.bigintval = val;
1204  break;
1205  }
1206  case kDECIMAL:
1207  case kNUMERIC: {
1208  datum.bigintval = val * exp_to_scale(ti.get_scale());
1209  break;
1210  }
1211  case kFLOAT: {
1212  datum.floatval = val;
1213  break;
1214  }
1215  case kDOUBLE: {
1216  datum.doubleval = val;
1217  break;
1218  }
1219  default:
1220  CHECK(false);
1221  }
1222  return makeExpr<Analyzer::Constant>(ti, false, datum);
1223 }
1224 
1225 } // namespace
1226 
1227 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateDateadd(
1228  const RexFunctionOperator* rex_function) const {
1229  CHECK_EQ(size_t(3), rex_function->size());
1230  const auto timeunit = translateScalarRex(rex_function->getOperand(0));
1231  const auto timeunit_lit = std::dynamic_pointer_cast<Analyzer::Constant>(timeunit);
1233  const auto number_units = translateScalarRex(rex_function->getOperand(1));
1234  const auto number_units_const =
1235  std::dynamic_pointer_cast<Analyzer::Constant>(number_units);
1236  if (number_units_const && number_units_const->get_is_null()) {
1237  throw std::runtime_error("The 'Interval' argument literal must not be 'null'.");
1238  }
1239  const auto cast_number_units = number_units->add_cast(SQLTypeInfo(kBIGINT, false));
1240  const auto datetime = translateScalarRex(rex_function->getOperand(2));
1241  const auto& datetime_ti = datetime->get_type_info();
1242  if (datetime_ti.get_type() == kTIME) {
1243  throw std::runtime_error("DateAdd operation not supported for TIME.");
1244  }
1245  const auto& field = to_dateadd_field(*timeunit_lit->get_constval().stringval);
1246  const int dim = datetime_ti.get_dimension();
1247  return makeExpr<Analyzer::DateaddExpr>(
1248  SQLTypeInfo(kTIMESTAMP, dim, 0, false), field, cast_number_units, datetime);
1249 }
1250 
1251 namespace {
1252 
1254  CHECK(op == kPLUS);
1255  return "DATETIME_PLUS"s;
1256 }
1257 
1258 } // namespace
1259 
1260 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateDatePlusMinus(
1261  const RexOperator* rex_operator) const {
1262  if (rex_operator->size() != 2) {
1263  return nullptr;
1264  }
1265  const auto datetime = translateScalarRex(rex_operator->getOperand(0));
1266  const auto datetime_ti = datetime->get_type_info();
1267  if (!datetime_ti.is_timestamp() && !datetime_ti.is_date()) {
1268  if (datetime_ti.get_type() == kTIME) {
1269  throw std::runtime_error("DateTime addition/subtraction not supported for TIME.");
1270  }
1271  return nullptr;
1272  }
1273  const auto rhs = translateScalarRex(rex_operator->getOperand(1));
1274  const auto rhs_ti = rhs->get_type_info();
1275  if (rhs_ti.get_type() == kTIMESTAMP || rhs_ti.get_type() == kDATE) {
1276  if (datetime_ti.is_high_precision_timestamp() ||
1277  rhs_ti.is_high_precision_timestamp()) {
1278  throw std::runtime_error(
1279  "High Precision timestamps are not supported for TIMESTAMPDIFF operation. "
1280  "Use "
1281  "DATEDIFF.");
1282  }
1283  auto bigint_ti = SQLTypeInfo(kBIGINT, false);
1284  const auto& rex_operator_ti = rex_operator->getType();
1285  const auto datediff_field =
1286  (rex_operator_ti.get_type() == kINTERVAL_DAY_TIME) ? dtSECOND : dtMONTH;
1287  auto result =
1288  makeExpr<Analyzer::DatediffExpr>(bigint_ti, datediff_field, rhs, datetime);
1289  // multiply 1000 to result since expected result should be in millisecond precision.
1290  if (rex_operator_ti.get_type() == kINTERVAL_DAY_TIME) {
1291  return makeExpr<Analyzer::BinOper>(bigint_ti.get_type(),
1292  kMULTIPLY,
1293  kONE,
1294  result,
1295  makeNumericConstant(bigint_ti, 1000));
1296  } else {
1297  return result;
1298  }
1299  }
1300  const auto op = rex_operator->getOperator();
1301  if (op == kPLUS) {
1302  std::vector<std::shared_ptr<Analyzer::Expr>> args = {datetime, rhs};
1303  auto dt_plus = makeExpr<Analyzer::FunctionOper>(
1304  datetime_ti, get_datetimeplus_rewrite_funcname(op), args);
1305  const auto date_trunc = rewrite_to_date_trunc(dt_plus.get());
1306  if (date_trunc) {
1307  return date_trunc;
1308  }
1309  }
1310  const auto interval = fold_expr(rhs.get());
1311  auto interval_ti = interval->get_type_info();
1312  auto bigint_ti = SQLTypeInfo(kBIGINT, false);
1313  const auto interval_lit = std::dynamic_pointer_cast<Analyzer::Constant>(interval);
1314  if (interval_ti.get_type() == kINTERVAL_DAY_TIME) {
1315  std::shared_ptr<Analyzer::Expr> interval_sec;
1316  if (interval_lit) {
1317  interval_sec =
1318  makeNumericConstant(bigint_ti,
1319  (op == kMINUS ? -interval_lit->get_constval().bigintval
1320  : interval_lit->get_constval().bigintval) /
1321  1000);
1322  } else {
1323  interval_sec = makeExpr<Analyzer::BinOper>(bigint_ti.get_type(),
1324  kDIVIDE,
1325  kONE,
1326  interval,
1327  makeNumericConstant(bigint_ti, 1000));
1328  if (op == kMINUS) {
1329  interval_sec =
1330  std::make_shared<Analyzer::UOper>(bigint_ti, false, kUMINUS, interval_sec);
1331  }
1332  }
1333  return makeExpr<Analyzer::DateaddExpr>(datetime_ti, daSECOND, interval_sec, datetime);
1334  }
1335  CHECK(interval_ti.get_type() == kINTERVAL_YEAR_MONTH);
1336  const auto interval_months = op == kMINUS ? std::make_shared<Analyzer::UOper>(
1337  bigint_ti, false, kUMINUS, interval)
1338  : interval;
1339  return makeExpr<Analyzer::DateaddExpr>(datetime_ti, daMONTH, interval_months, datetime);
1340 }
1341 
1342 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateDatediff(
1343  const RexFunctionOperator* rex_function) const {
1344  CHECK_EQ(size_t(3), rex_function->size());
1345  const auto timeunit = translateScalarRex(rex_function->getOperand(0));
1346  const auto timeunit_lit = std::dynamic_pointer_cast<Analyzer::Constant>(timeunit);
1348  const auto start = translateScalarRex(rex_function->getOperand(1));
1349  const auto end = translateScalarRex(rex_function->getOperand(2));
1350  const auto field = to_datediff_field(*timeunit_lit->get_constval().stringval);
1351  return makeExpr<Analyzer::DatediffExpr>(SQLTypeInfo(kBIGINT, false), field, start, end);
1352 }
1353 
1354 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateDatepart(
1355  const RexFunctionOperator* rex_function) const {
1356  CHECK_EQ(size_t(2), rex_function->size());
1357  const auto timeunit = translateScalarRex(rex_function->getOperand(0));
1358  const auto timeunit_lit = std::dynamic_pointer_cast<Analyzer::Constant>(timeunit);
1360  const auto from_expr = translateScalarRex(rex_function->getOperand(1));
1361  return ExtractExpr::generate(
1362  from_expr, to_datepart_field(*timeunit_lit->get_constval().stringval));
1363 }
1364 
1365 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateLength(
1366  const RexFunctionOperator* rex_function) const {
1367  CHECK_EQ(size_t(1), rex_function->size());
1368  const auto str_arg = translateScalarRex(rex_function->getOperand(0));
1369  return makeExpr<Analyzer::CharLengthExpr>(str_arg->decompress(),
1370  rex_function->getName() == "CHAR_LENGTH"sv);
1371 }
1372 
1373 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateKeyForString(
1374  const RexFunctionOperator* rex_function) const {
1375  const auto& args = translateFunctionArgs(rex_function);
1376  CHECK_EQ(size_t(1), args.size());
1377  const auto expr = dynamic_cast<Analyzer::Expr*>(args[0].get());
1378  if (nullptr == expr || !expr->get_type_info().is_string() ||
1379  expr->get_type_info().is_varlen()) {
1380  throw std::runtime_error(rex_function->getName() +
1381  " expects a dictionary encoded text column.");
1382  }
1383  auto unnest_arg = dynamic_cast<Analyzer::UOper*>(expr);
1384  if (unnest_arg && unnest_arg->get_optype() == SQLOps::kUNNEST) {
1385  throw std::runtime_error(
1386  rex_function->getName() +
1387  " does not support unnest operator as its input expression.");
1388  }
1389  return makeExpr<Analyzer::KeyForStringExpr>(args[0]);
1390 }
1391 
1392 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateSampleRatio(
1393  const RexFunctionOperator* rex_function) const {
1394  CHECK_EQ(size_t(1), rex_function->size());
1395  auto arg = translateScalarRex(rex_function->getOperand(0));
1396  const auto& arg_ti = arg->get_type_info();
1397  if (arg_ti.get_type() != kDOUBLE) {
1398  const auto& double_ti = SQLTypeInfo(kDOUBLE, arg_ti.get_notnull());
1399  arg = arg->add_cast(double_ti);
1400  }
1401  return makeExpr<Analyzer::SampleRatioExpr>(arg);
1402 }
1403 
1404 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateCurrentUser(
1405  const RexFunctionOperator* rex_function) const {
1406  std::string user{"SESSIONLESS_USER"};
1407  if (query_state_) {
1408  user = query_state_->getConstSessionInfo()->get_currentUser().userName;
1409  }
1410  return Parser::UserLiteral::get(user);
1411 }
1412 
1413 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateStringOper(
1414  const RexFunctionOperator* rex_function) const {
1415  const auto func_name = rex_function->getName();
1417  std::ostringstream oss;
1418  oss << "Function " << func_name << " not supported.";
1419  throw std::runtime_error(oss.str());
1420  }
1421  const auto string_op_kind = ::name_to_string_op_kind(func_name);
1422  auto args = translateFunctionArgs(rex_function);
1423 
1424  switch (string_op_kind) {
1426  return makeExpr<Analyzer::LowerStringOper>(args);
1428  return makeExpr<Analyzer::UpperStringOper>(args);
1430  return makeExpr<Analyzer::InitCapStringOper>(args);
1432  return makeExpr<Analyzer::ReverseStringOper>(args);
1434  return makeExpr<Analyzer::RepeatStringOper>(args);
1436  return makeExpr<Analyzer::ConcatStringOper>(args);
1437  case SqlStringOpKind::LPAD:
1438  case SqlStringOpKind::RPAD: {
1439  return makeExpr<Analyzer::PadStringOper>(string_op_kind, args);
1440  }
1441  case SqlStringOpKind::TRIM:
1443  case SqlStringOpKind::RTRIM: {
1444  return makeExpr<Analyzer::TrimStringOper>(string_op_kind, args);
1445  }
1447  return makeExpr<Analyzer::SubstringStringOper>(args);
1449  return makeExpr<Analyzer::OverlayStringOper>(args);
1451  return makeExpr<Analyzer::ReplaceStringOper>(args);
1453  return makeExpr<Analyzer::SplitPartStringOper>(args);
1455  return makeExpr<Analyzer::RegexpReplaceStringOper>(args);
1457  return makeExpr<Analyzer::RegexpSubstrStringOper>(args);
1459  return makeExpr<Analyzer::JsonValueStringOper>(args);
1461  return makeExpr<Analyzer::Base64EncodeStringOper>(args);
1463  return makeExpr<Analyzer::Base64DecodeStringOper>(args);
1465  return makeExpr<Analyzer::TryStringCastOper>(rex_function->getType(), args);
1467  return makeExpr<Analyzer::PositionStringOper>(args);
1468  default: {
1469  throw std::runtime_error("Unsupported string function.");
1470  }
1471  }
1472 }
1473 
1475  const RexFunctionOperator* rex_function) const {
1476  const auto ret_ti = rex_function->getType();
1477  const auto arg = translateScalarRex(rex_function->getOperand(0));
1478  const auto arg_ti = arg->get_type_info();
1479  if (!arg_ti.is_array()) {
1480  throw std::runtime_error(rex_function->getName() + " expects an array expression.");
1481  }
1482  if (arg_ti.get_subtype() == kARRAY) {
1483  throw std::runtime_error(rex_function->getName() +
1484  " expects one-dimension array expression.");
1485  }
1486  const auto array_size = arg_ti.get_size();
1487  const auto array_elem_size = arg_ti.get_elem_type().get_array_context_logical_size();
1488 
1489  if (array_size > 0) {
1490  if (array_elem_size <= 0) {
1491  throw std::runtime_error(rex_function->getName() +
1492  ": unexpected array element type.");
1493  }
1494  // Return cardinality of a fixed length array
1495  return makeNumericConstant(ret_ti, array_size / array_elem_size);
1496  }
1497  // Variable length array cardinality will be calculated at runtime
1498  return makeExpr<Analyzer::CardinalityExpr>(arg);
1499 }
1500 
1501 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateItem(
1502  const RexFunctionOperator* rex_function) const {
1503  CHECK_EQ(size_t(2), rex_function->size());
1504  const auto base = translateScalarRex(rex_function->getOperand(0));
1505  const auto index = translateScalarRex(rex_function->getOperand(1));
1506  return makeExpr<Analyzer::BinOper>(
1507  base->get_type_info().get_elem_type(), false, kARRAY_AT, kONE, base, index);
1508 }
1509 
1510 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateCurrentDate() const {
1511  constexpr bool is_null = false;
1512  Datum datum;
1513  datum.bigintval = now_ - now_ % (24 * 60 * 60); // Assumes 0 < now_.
1514  return makeExpr<Analyzer::Constant>(kDATE, is_null, datum);
1515 }
1516 
1517 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateCurrentTime() const {
1518  constexpr bool is_null = false;
1519  Datum datum;
1520  datum.bigintval = now_ % (24 * 60 * 60); // Assumes 0 < now_.
1521  return makeExpr<Analyzer::Constant>(kTIME, is_null, datum);
1522 }
1523 
1524 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateCurrentTimestamp() const {
1526 }
1527 
1528 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateDatetime(
1529  const RexFunctionOperator* rex_function) const {
1530  CHECK_EQ(size_t(1), rex_function->size());
1531  const auto arg = translateScalarRex(rex_function->getOperand(0));
1532  const auto arg_lit = std::dynamic_pointer_cast<Analyzer::Constant>(arg);
1533  const std::string datetime_err{R"(Only DATETIME('NOW') supported for now.)"};
1534  if (!arg_lit || arg_lit->get_is_null()) {
1535  throw std::runtime_error(datetime_err);
1536  }
1537  CHECK(arg_lit->get_type_info().is_string());
1538  if (*arg_lit->get_constval().stringval != "NOW"sv) {
1539  throw std::runtime_error(datetime_err);
1540  }
1541  return translateCurrentTimestamp();
1542 }
1543 
1544 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateAbs(
1545  const RexFunctionOperator* rex_function) const {
1546  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
1547  expr_list;
1548  CHECK_EQ(size_t(1), rex_function->size());
1549  const auto operand = translateScalarRex(rex_function->getOperand(0));
1550  const auto& operand_ti = operand->get_type_info();
1551  CHECK(operand_ti.is_number());
1552  const auto zero = makeNumericConstant(operand_ti, 0);
1553  const auto lt_zero = makeExpr<Analyzer::BinOper>(kBOOLEAN, kLT, kONE, operand, zero);
1554  const auto uminus_operand =
1555  makeExpr<Analyzer::UOper>(operand_ti.get_type(), kUMINUS, operand);
1556  expr_list.emplace_back(lt_zero, uminus_operand);
1557  return makeExpr<Analyzer::CaseExpr>(operand_ti, false, expr_list, operand);
1558 }
1559 
1560 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateSign(
1561  const RexFunctionOperator* rex_function) const {
1562  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
1563  expr_list;
1564  CHECK_EQ(size_t(1), rex_function->size());
1565  const auto operand = translateScalarRex(rex_function->getOperand(0));
1566  const auto& operand_ti = operand->get_type_info();
1567  CHECK(operand_ti.is_number());
1568  const auto zero = makeNumericConstant(operand_ti, 0);
1569  const auto lt_zero = makeExpr<Analyzer::BinOper>(kBOOLEAN, kLT, kONE, operand, zero);
1570  expr_list.emplace_back(lt_zero, makeNumericConstant(operand_ti, -1));
1571  const auto eq_zero = makeExpr<Analyzer::BinOper>(kBOOLEAN, kEQ, kONE, operand, zero);
1572  expr_list.emplace_back(eq_zero, makeNumericConstant(operand_ti, 0));
1573  const auto gt_zero = makeExpr<Analyzer::BinOper>(kBOOLEAN, kGT, kONE, operand, zero);
1574  expr_list.emplace_back(gt_zero, makeNumericConstant(operand_ti, 1));
1575  return makeExpr<Analyzer::CaseExpr>(
1576  operand_ti,
1577  false,
1578  expr_list,
1579  makeExpr<Analyzer::Constant>(operand_ti, true, Datum{0}));
1580 }
1581 
1582 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateOffsetInFragment() const {
1583  return makeExpr<Analyzer::OffsetInFragment>();
1584 }
1585 
1587  const RexFunctionOperator* rex_function) const {
1588  if (rex_function->getType().get_subtype() == kNULLT) {
1589  auto sql_type = rex_function->getType();
1590  CHECK(sql_type.get_type() == kARRAY);
1591 
1592  // FIX-ME: Deal with NULL arrays
1593  auto translated_function_args(translateFunctionArgs(rex_function));
1594  if (translated_function_args.size() > 0) {
1595  const auto first_element_logical_type =
1596  get_nullable_logical_type_info(translated_function_args[0]->get_type_info());
1597 
1598  auto diff_elem_itr =
1599  std::find_if(translated_function_args.begin(),
1600  translated_function_args.end(),
1601  [first_element_logical_type](const auto expr) {
1602  return first_element_logical_type !=
1603  get_nullable_logical_type_info(expr->get_type_info());
1604  });
1605  if (diff_elem_itr != translated_function_args.end()) {
1606  throw std::runtime_error(
1607  "Element " +
1608  std::to_string(diff_elem_itr - translated_function_args.begin()) +
1609  " is not of the same type as other elements of the array. Consider casting "
1610  "to force this condition.\nElement Type: " +
1611  get_nullable_logical_type_info((*diff_elem_itr)->get_type_info())
1612  .to_string() +
1613  "\nArray type: " + first_element_logical_type.to_string());
1614  }
1615 
1616  if (first_element_logical_type.is_string() &&
1617  !first_element_logical_type.is_dict_encoded_string()) {
1618  sql_type.set_subtype(first_element_logical_type.get_type());
1619  sql_type.set_compression(kENCODING_FIXED);
1620  } else if (first_element_logical_type.is_dict_encoded_string()) {
1621  sql_type.set_subtype(first_element_logical_type.get_type());
1622  sql_type.set_comp_param(TRANSIENT_DICT_ID);
1623  } else {
1624  sql_type.set_subtype(first_element_logical_type.get_type());
1625  sql_type.set_scale(first_element_logical_type.get_scale());
1626  sql_type.set_precision(first_element_logical_type.get_precision());
1627  }
1628 
1629  return makeExpr<Analyzer::ArrayExpr>(sql_type, translated_function_args);
1630  } else {
1631  // defaulting to valid sub-type for convenience
1632  sql_type.set_subtype(kBOOLEAN);
1633  return makeExpr<Analyzer::ArrayExpr>(sql_type, translated_function_args);
1634  }
1635  } else {
1636  return makeExpr<Analyzer::ArrayExpr>(rex_function->getType(),
1637  translateFunctionArgs(rex_function));
1638  }
1639 }
1640 
1641 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateFunction(
1642  const RexFunctionOperator* rex_function) const {
1643  if (func_resolve(rex_function->getName(), "LIKE"sv, "PG_ILIKE"sv)) {
1644  return translateLike(rex_function);
1645  }
1646  if (rex_function->getName() == "REGEXP_LIKE"sv) {
1647  return translateRegexp(rex_function);
1648  }
1649  if (rex_function->getName() == "LIKELY"sv) {
1650  return translateLikely(rex_function);
1651  }
1652  if (rex_function->getName() == "UNLIKELY"sv) {
1653  return translateUnlikely(rex_function);
1654  }
1655  if (func_resolve(rex_function->getName(), "PG_EXTRACT"sv, "PG_DATE_TRUNC"sv)) {
1656  return translateExtract(rex_function);
1657  }
1658  if (rex_function->getName() == "DATEADD"sv) {
1659  return translateDateadd(rex_function);
1660  }
1661  if (rex_function->getName() == "DATEDIFF"sv) {
1662  return translateDatediff(rex_function);
1663  }
1664  if (rex_function->getName() == "DATEPART"sv) {
1665  return translateDatepart(rex_function);
1666  }
1667  if (func_resolve(rex_function->getName(), "LENGTH"sv, "CHAR_LENGTH"sv)) {
1668  return translateLength(rex_function);
1669  }
1670  if (rex_function->getName() == "KEY_FOR_STRING"sv) {
1671  return translateKeyForString(rex_function);
1672  }
1673  if (rex_function->getName() == "WIDTH_BUCKET"sv) {
1674  return translateWidthBucket(rex_function);
1675  }
1676  if (rex_function->getName() == "SAMPLE_RATIO"sv) {
1677  return translateSampleRatio(rex_function);
1678  }
1679  if (rex_function->getName() == "CURRENT_USER"sv) {
1680  return translateCurrentUser(rex_function);
1681  }
1682  if (func_resolve(rex_function->getName(),
1683  "LOWER"sv,
1684  "UPPER"sv,
1685  "INITCAP"sv,
1686  "REVERSE"sv,
1687  "REPEAT"sv,
1688  "||"sv,
1689  "LPAD"sv,
1690  "RPAD"sv,
1691  "TRIM"sv,
1692  "LTRIM"sv,
1693  "RTRIM"sv,
1694  "SUBSTRING"sv,
1695  "OVERLAY"sv,
1696  "REPLACE"sv,
1697  "SPLIT_PART"sv,
1698  "REGEXP_REPLACE"sv,
1699  "REGEXP_SUBSTR"sv,
1700  "REGEXP_MATCH"sv,
1701  "JSON_VALUE"sv,
1702  "BASE64_ENCODE"sv,
1703  "BASE64_DECODE"sv,
1704  "TRY_CAST"sv,
1705  "POSITION"sv)) {
1706  return translateStringOper(rex_function);
1707  }
1708  if (func_resolve(rex_function->getName(), "CARDINALITY"sv, "ARRAY_LENGTH"sv)) {
1709  return translateCardinality(rex_function);
1710  }
1711  if (rex_function->getName() == "ITEM"sv) {
1712  return translateItem(rex_function);
1713  }
1714  if (rex_function->getName() == "CURRENT_DATE"sv) {
1715  return translateCurrentDate();
1716  }
1717  if (rex_function->getName() == "CURRENT_TIME"sv) {
1718  return translateCurrentTime();
1719  }
1720  if (rex_function->getName() == "CURRENT_TIMESTAMP"sv) {
1721  return translateCurrentTimestamp();
1722  }
1723  if (rex_function->getName() == "NOW"sv) {
1724  return translateCurrentTimestamp();
1725  }
1726  if (rex_function->getName() == "DATETIME"sv) {
1727  return translateDatetime(rex_function);
1728  }
1729  if (func_resolve(rex_function->getName(), "usTIMESTAMP"sv, "nsTIMESTAMP"sv)) {
1730  return translateHPTLiteral(rex_function);
1731  }
1732  if (rex_function->getName() == "ABS"sv) {
1733  return translateAbs(rex_function);
1734  }
1735  if (rex_function->getName() == "SIGN"sv) {
1736  return translateSign(rex_function);
1737  }
1738  if (func_resolve(rex_function->getName(), "CEIL"sv, "FLOOR"sv)) {
1739  return makeExpr<Analyzer::FunctionOperWithCustomTypeHandling>(
1740  rex_function->getType(),
1741  rex_function->getName(),
1742  translateFunctionArgs(rex_function));
1743  } else if (rex_function->getName() == "ROUND"sv) {
1744  std::vector<std::shared_ptr<Analyzer::Expr>> args =
1745  translateFunctionArgs(rex_function);
1746 
1747  if (rex_function->size() == 1) {
1748  // push a 0 constant if 2nd operand is missing.
1749  // this needs to be done as calcite returns
1750  // only the 1st operand without defaulting the 2nd one
1751  // when the user did not specify the 2nd operand.
1752  SQLTypes t = kSMALLINT;
1753  Datum d;
1754  d.smallintval = 0;
1755  args.push_back(makeExpr<Analyzer::Constant>(t, false, d));
1756  }
1757 
1758  // make sure we have only 2 operands
1759  CHECK(args.size() == 2);
1760 
1761  if (!args[0]->get_type_info().is_number()) {
1762  throw std::runtime_error("Only numeric 1st operands are supported");
1763  }
1764 
1765  // the 2nd operand does not need to be a constant
1766  // it can happily reference another integer column
1767  if (!args[1]->get_type_info().is_integer()) {
1768  throw std::runtime_error("Only integer 2nd operands are supported");
1769  }
1770 
1771  // Calcite may upcast decimals in a way that is
1772  // incompatible with the extension function input. Play it safe and stick with the
1773  // argument type instead.
1774  const SQLTypeInfo ret_ti = args[0]->get_type_info().is_decimal()
1775  ? args[0]->get_type_info()
1776  : rex_function->getType();
1777 
1778  return makeExpr<Analyzer::FunctionOperWithCustomTypeHandling>(
1779  ret_ti, rex_function->getName(), args);
1780  }
1781  if (rex_function->getName() == "DATETIME_PLUS"sv) {
1782  auto dt_plus = makeExpr<Analyzer::FunctionOper>(rex_function->getType(),
1783  rex_function->getName(),
1784  translateFunctionArgs(rex_function));
1785  const auto date_trunc = rewrite_to_date_trunc(dt_plus.get());
1786  if (date_trunc) {
1787  return date_trunc;
1788  }
1789  return translateDateadd(rex_function);
1790  }
1791  if (rex_function->getName() == "/INT"sv) {
1792  CHECK_EQ(size_t(2), rex_function->size());
1793  std::shared_ptr<Analyzer::Expr> lhs = translateScalarRex(rex_function->getOperand(0));
1794  std::shared_ptr<Analyzer::Expr> rhs = translateScalarRex(rex_function->getOperand(1));
1795  const auto rhs_lit = std::dynamic_pointer_cast<Analyzer::Constant>(rhs);
1796  return Parser::OperExpr::normalize(kDIVIDE, kONE, lhs, rhs);
1797  }
1798  if (rex_function->getName() == "Reinterpret"sv) {
1799  CHECK_EQ(size_t(1), rex_function->size());
1800  return translateScalarRex(rex_function->getOperand(0));
1801  }
1802  if (func_resolve(rex_function->getName(),
1803  "ST_X"sv,
1804  "ST_Y"sv,
1805  "ST_XMin"sv,
1806  "ST_YMin"sv,
1807  "ST_XMax"sv,
1808  "ST_YMax"sv,
1809  "ST_NRings"sv,
1810  "ST_NumGeometries"sv,
1811  "ST_NPoints"sv,
1812  "ST_Length"sv,
1813  "ST_Perimeter"sv,
1814  "ST_Area"sv,
1815  "ST_SRID"sv,
1816  "HeavyDB_Geo_PolyBoundsPtr"sv,
1817  "HeavyDB_Geo_PolyRenderGroup"sv)) {
1818  CHECK_EQ(rex_function->size(), size_t(1));
1819  return translateUnaryGeoFunction(rex_function);
1820  }
1821  if (func_resolve(rex_function->getName(), "ST_ConvexHull"sv)) {
1822  CHECK_EQ(rex_function->size(), size_t(1));
1823  SQLTypeInfo ti;
1824  return translateUnaryGeoConstructor(rex_function, ti, false);
1825  }
1826  if (func_resolve(rex_function->getName(),
1827  "convert_meters_to_pixel_width"sv,
1828  "convert_meters_to_pixel_height"sv,
1829  "is_point_in_view"sv,
1830  "is_point_size_in_view"sv)) {
1831  return translateFunctionWithGeoArg(rex_function);
1832  }
1833  if (func_resolve(rex_function->getName(),
1834  "ST_Distance"sv,
1835  "ST_MaxDistance"sv,
1836  "ST_Intersects"sv,
1837  "ST_Disjoint"sv,
1838  "ST_Contains"sv,
1839  "ST_Overlaps"sv,
1840  "ST_Approx_Overlaps"sv,
1841  "ST_Within"sv)) {
1842  CHECK_EQ(rex_function->size(), size_t(2));
1843  return translateBinaryGeoFunction(rex_function);
1844  }
1845  if (func_resolve(rex_function->getName(), "ST_DWithin"sv, "ST_DFullyWithin"sv)) {
1846  CHECK_EQ(rex_function->size(), size_t(3));
1847  return translateTernaryGeoFunction(rex_function);
1848  }
1849  if (rex_function->getName() == "OFFSET_IN_FRAGMENT"sv) {
1850  CHECK_EQ(size_t(0), rex_function->size());
1851  return translateOffsetInFragment();
1852  }
1853  if (rex_function->getName() == "ARRAY"sv) {
1854  // Var args; currently no check. Possible fix-me -- can array have 0 elements?
1855  return translateArrayFunction(rex_function);
1856  }
1857  if (func_resolve(rex_function->getName(),
1858  "ST_GeomFromText"sv,
1859  "ST_GeogFromText"sv,
1860  "ST_Centroid"sv,
1861  "ST_SetSRID"sv,
1862  "ST_Point"sv, // TODO: where should this and below live?
1863  "ST_PointN"sv,
1864  "ST_StartPoint"sv,
1865  "ST_EndPoint"sv,
1866  "ST_Transform"sv)) {
1867  SQLTypeInfo ti;
1868  return translateGeoProjection(rex_function, ti, false);
1869  }
1870  if (func_resolve(rex_function->getName(),
1871  "ST_Intersection"sv,
1872  "ST_Difference"sv,
1873  "ST_Union"sv,
1874  "ST_Buffer"sv,
1875  "ST_ConcaveHull"sv)) {
1876  CHECK_EQ(rex_function->size(), size_t(2));
1877  SQLTypeInfo ti;
1878  return translateBinaryGeoConstructor(rex_function, ti, false);
1879  }
1880  if (func_resolve(rex_function->getName(), "ST_IsEmpty"sv, "ST_IsValid"sv)) {
1881  CHECK_EQ(rex_function->size(), size_t(1));
1882  SQLTypeInfo ti;
1883  return translateUnaryGeoPredicate(rex_function, ti, false);
1884  }
1885  if (func_resolve(rex_function->getName(), "ST_Equals"sv)) {
1886  CHECK_EQ(rex_function->size(), size_t(2));
1887  // Attempt to generate a distance based check for points
1888  if (auto distance_check = translateBinaryGeoFunction(rex_function)) {
1889  return distance_check;
1890  }
1891  SQLTypeInfo ti;
1892  return translateBinaryGeoPredicate(rex_function, ti, false);
1893  }
1894 
1895  auto arg_expr_list = translateFunctionArgs(rex_function);
1896  if (rex_function->getName() == std::string("||") ||
1897  rex_function->getName() == std::string("SUBSTRING")) {
1898  SQLTypeInfo ret_ti(kTEXT, false);
1899  return makeExpr<Analyzer::FunctionOper>(
1900  ret_ti, rex_function->getName(), arg_expr_list);
1901  }
1902 
1903  // Reset possibly wrong return type of rex_function to the return
1904  // type of the optimal valid implementation. The return type can be
1905  // wrong in the case of multiple implementations of UDF functions
1906  // that have different return types but Calcite specifies the return
1907  // type according to the first implementation.
1908  SQLTypeInfo ret_ti;
1909  try {
1910  auto ext_func_sig = bind_function(rex_function->getName(), arg_expr_list);
1911  auto ext_func_args = ext_func_sig.getInputArgs();
1912  CHECK_LE(arg_expr_list.size(), ext_func_args.size());
1913  for (size_t i = 0, di = 0; i < arg_expr_list.size(); i++) {
1914  CHECK_LT(i + di, ext_func_args.size());
1915  auto ext_func_arg = ext_func_args[i + di];
1916  if (ext_func_arg == ExtArgumentType::PInt8 ||
1917  ext_func_arg == ExtArgumentType::PInt16 ||
1918  ext_func_arg == ExtArgumentType::PInt32 ||
1919  ext_func_arg == ExtArgumentType::PInt64 ||
1920  ext_func_arg == ExtArgumentType::PFloat ||
1921  ext_func_arg == ExtArgumentType::PDouble ||
1922  ext_func_arg == ExtArgumentType::PBool) {
1923  di++;
1924  // pointer argument follows length argument:
1925  CHECK(ext_func_args[i + di] == ExtArgumentType::Int64);
1926  }
1927  // fold casts on constants
1928  if (auto constant =
1929  std::dynamic_pointer_cast<Analyzer::Constant>(arg_expr_list[i])) {
1930  auto ext_func_arg_ti = ext_arg_type_to_type_info(ext_func_arg);
1931  if (ext_func_arg_ti != arg_expr_list[i]->get_type_info()) {
1932  arg_expr_list[i] = constant->add_cast(ext_func_arg_ti);
1933  }
1934  }
1935  }
1936 
1937  ret_ti = ext_arg_type_to_type_info(ext_func_sig.getRet());
1938  } catch (ExtensionFunctionBindingError& e) {
1939  LOG(WARNING) << "RelAlgTranslator::translateFunction: " << e.what();
1940  throw;
1941  }
1942 
1943  // By default, the extension function type will not allow nulls. If one of the arguments
1944  // is nullable, the extension function must also explicitly allow nulls.
1945  bool arguments_not_null = true;
1946  for (const auto& arg_expr : arg_expr_list) {
1947  if (!arg_expr->get_type_info().get_notnull()) {
1948  arguments_not_null = false;
1949  break;
1950  }
1951  }
1952  ret_ti.set_notnull(arguments_not_null);
1953 
1954  return makeExpr<Analyzer::FunctionOper>(ret_ti, rex_function->getName(), arg_expr_list);
1955 }
1956 
1957 namespace {
1958 
1959 std::vector<Analyzer::OrderEntry> translate_collation(
1960  const std::vector<SortField>& sort_fields) {
1961  std::vector<Analyzer::OrderEntry> collation;
1962  for (size_t i = 0; i < sort_fields.size(); ++i) {
1963  const auto& sort_field = sort_fields[i];
1964  collation.emplace_back(i,
1965  sort_field.getSortDir() == SortDirection::Descending,
1966  sort_field.getNullsPosition() == NullSortedPosition::First);
1967  }
1968  return collation;
1969 }
1970 
1971 size_t determineTimeValMultiplierForTimeType(const SQLTypes& window_frame_bound_type,
1972  const Analyzer::Constant* const_expr) {
1973  const auto time_unit_val = const_expr->get_constval().bigintval;
1974  if (window_frame_bound_type == kINTERVAL_DAY_TIME) {
1975  if (time_unit_val == kMilliSecsPerSec) {
1976  return 1;
1977  } else if (time_unit_val == kMilliSecsPerMin) {
1978  return kSecsPerMin;
1979  } else if (time_unit_val == kMilliSecsPerHour) {
1980  return kSecsPerHour;
1981  }
1982  }
1983  CHECK(false);
1984  return kUNKNOWN_FIELD;
1985 }
1986 
1987 ExtractField determineTimeUnit(const SQLTypes& window_frame_bound_type,
1988  const Analyzer::Constant* const_expr) {
1989  const auto time_unit_val = const_expr->get_constval().bigintval;
1990  if (window_frame_bound_type == kINTERVAL_DAY_TIME) {
1991  if (time_unit_val == kMilliSecsPerSec) {
1992  return kSECOND;
1993  } else if (time_unit_val == kMilliSecsPerMin) {
1994  return kMINUTE;
1995  } else if (time_unit_val == kMilliSecsPerHour) {
1996  return kHOUR;
1997  } else if (time_unit_val == kMilliSecsPerDay) {
1998  return kDAY;
1999  }
2000  } else {
2001  CHECK(window_frame_bound_type == kINTERVAL_YEAR_MONTH);
2002  if (time_unit_val == 1) {
2003  return kMONTH;
2004  } else if (time_unit_val == 12) {
2005  return kYEAR;
2006  }
2007  }
2008  CHECK(false);
2009  return kUNKNOWN_FIELD;
2010 }
2011 } // namespace
2012 
2013 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateWindowFunction(
2014  const RexWindowFunctionOperator* rex_window_function) const {
2015  std::vector<std::shared_ptr<Analyzer::Expr>> args;
2016  for (size_t i = 0; i < rex_window_function->size(); ++i) {
2017  args.push_back(translateScalarRex(rex_window_function->getOperand(i)));
2018  }
2019  std::vector<std::shared_ptr<Analyzer::Expr>> partition_keys;
2020  for (const auto& partition_key : rex_window_function->getPartitionKeys()) {
2021  partition_keys.push_back(translateScalarRex(partition_key.get()));
2022  }
2023  std::vector<std::shared_ptr<Analyzer::Expr>> order_keys;
2024  for (const auto& order_key : rex_window_function->getOrderKeys()) {
2025  order_keys.push_back(translateScalarRex(order_key.get()));
2026  }
2027  auto ti = rex_window_function->getType();
2028  auto window_func_kind = rex_window_function->getKind();
2029  if (window_function_is_value(window_func_kind)) {
2030  CHECK_GE(args.size(), 1u);
2031  ti = args.front()->get_type_info();
2032  } else if (window_function_conditional_aggregate(window_func_kind)) {
2033  switch (window_func_kind) {
2035  // count_if should have an input expression having boolean type
2036  // but returned value should have the same as a normal count agg expr
2037  // so we force to set its type to bigint
2038  CHECK(ti.is_boolean());
2039  ti = SQLTypeInfo(kBIGINT);
2040  break;
2041  default:
2042  break;
2043  }
2044  }
2045  auto determine_frame_bound_type =
2047  if (bound.unbounded) {
2048  CHECK(!bound.bound_expr && !bound.is_current_row);
2049  if (bound.following) {
2051  } else if (bound.preceding) {
2053  }
2054  } else {
2055  if (bound.is_current_row) {
2056  CHECK(!bound.unbounded && !bound.bound_expr);
2058  } else {
2059  CHECK(!bound.unbounded && bound.bound_expr);
2060  if (bound.following) {
2062  } else if (bound.preceding) {
2064  }
2065  }
2066  }
2068  };
2069  auto is_negative_framing_bound =
2070  [](const SQLTypes t, const Datum& d, bool is_time_unit = false) {
2071  switch (t) {
2072  case kTINYINT:
2073  return d.tinyintval < 0;
2074  case kSMALLINT:
2075  return d.smallintval < 0;
2076  case kINT:
2077  return d.intval < 0;
2078  case kDOUBLE: {
2079  // the only case that double type is used is for handling time interval
2080  // i.e., represent tiny time units like nanosecond and microsecond as the
2081  // equivalent time value with SECOND time unit
2082  CHECK(is_time_unit);
2083  return d.doubleval < 0;
2084  }
2085  case kDECIMAL:
2086  case kNUMERIC:
2087  case kBIGINT:
2088  return d.bigintval < 0;
2089  default: {
2090  throw std::runtime_error(
2091  "We currently only support integer-type literal expression as a window "
2092  "frame bound expression");
2093  }
2094  }
2095  };
2096 
2097  bool negative_constant = false;
2098  bool detect_invalid_frame_start_bound_expr = false;
2099  bool detect_invalid_frame_end_bound_expr = false;
2100  auto& frame_start_bound = rex_window_function->getFrameStartBound();
2101  auto& frame_end_bound = rex_window_function->getFrameEndBound();
2102  bool has_end_bound_frame_expr = false;
2103  std::shared_ptr<Analyzer::Expr> frame_start_bound_expr;
2104  SqlWindowFrameBoundType frame_start_bound_type =
2105  determine_frame_bound_type(frame_start_bound);
2106  std::shared_ptr<Analyzer::Expr> frame_end_bound_expr;
2107  SqlWindowFrameBoundType frame_end_bound_type =
2108  determine_frame_bound_type(frame_end_bound);
2109  bool has_framing_clause =
2111  auto frame_mode = rex_window_function->isRows()
2114  if (order_keys.empty()) {
2115  if (frame_start_bound_type == SqlWindowFrameBoundType::UNBOUNDED_PRECEDING &&
2116  frame_end_bound_type == SqlWindowFrameBoundType::UNBOUNDED_FOLLOWING) {
2117  // Calcite sets UNBOUNDED PRECEDING ~ UNBOUNDED_FOLLOWING as its default frame bound
2118  // if the window context has no order by clause regardless of the existence of
2119  // user-given window frame bound but at this point we have no way to recognize the
2120  // absence of the frame definition of this window context
2121  has_framing_clause = false;
2122  }
2123  } else {
2124  if (frame_start_bound_type == SqlWindowFrameBoundType::UNBOUNDED_PRECEDING &&
2125  frame_end_bound_type == SqlWindowFrameBoundType::CURRENT_ROW) {
2126  // Calcite sets this frame bound by default when order by clause is given but has no
2127  // window frame definition (even if user gives the same bound, our previous window
2128  // computation logic returns exactly the same result)
2129  has_framing_clause = false;
2130  }
2131  auto translate_frame_bound_expr = [&](const RexScalar* bound_expr) {
2132  std::shared_ptr<Analyzer::Expr> translated_expr;
2133  const auto rex_oper = dynamic_cast<const RexOperator*>(bound_expr);
2134  if (rex_oper && rex_oper->getType().is_timeinterval()) {
2135  translated_expr = translateScalarRex(rex_oper);
2136  const auto bin_oper =
2137  dynamic_cast<const Analyzer::BinOper*>(translated_expr.get());
2138  auto time_literal_expr =
2139  dynamic_cast<const Analyzer::Constant*>(bin_oper->get_left_operand());
2140  CHECK(time_literal_expr);
2141  negative_constant =
2142  is_negative_framing_bound(time_literal_expr->get_type_info().get_type(),
2143  time_literal_expr->get_constval(),
2144  true);
2145  return std::make_pair(false, translated_expr);
2146  }
2147  if (dynamic_cast<const RexLiteral*>(bound_expr)) {
2148  translated_expr = translateScalarRex(bound_expr);
2149  if (auto literal_expr =
2150  dynamic_cast<const Analyzer::Constant*>(translated_expr.get())) {
2151  negative_constant = is_negative_framing_bound(
2152  literal_expr->get_type_info().get_type(), literal_expr->get_constval());
2153  return std::make_pair(false, translated_expr);
2154  }
2155  }
2156  return std::make_pair(true, translated_expr);
2157  };
2158 
2159  if (frame_start_bound.bound_expr) {
2160  std::tie(detect_invalid_frame_start_bound_expr, frame_start_bound_expr) =
2161  translate_frame_bound_expr(frame_start_bound.bound_expr.get());
2162  }
2163 
2164  if (frame_end_bound.bound_expr) {
2165  std::tie(detect_invalid_frame_end_bound_expr, frame_end_bound_expr) =
2166  translate_frame_bound_expr(frame_end_bound.bound_expr.get());
2167  }
2168 
2169  // currently we only support literal expression as frame bound expression
2170  if (detect_invalid_frame_start_bound_expr || detect_invalid_frame_end_bound_expr) {
2171  throw std::runtime_error(
2172  "We currently only support literal expression as a window frame bound "
2173  "expression");
2174  }
2175 
2176  // note that Calcite already has frame-bound constraint checking logic, but we
2177  // also check various invalid cases for safety
2178  if (negative_constant) {
2179  throw std::runtime_error(
2180  "A constant expression for window framing should have nonnegative value.");
2181  }
2182 
2183  auto handle_time_interval_expr_if_necessary = [&](const Analyzer::Expr* bound_expr,
2184  SqlWindowFrameBoundType bound_type,
2185  bool for_start_bound) {
2186  if (bound_expr && bound_expr->get_type_info().is_timeinterval()) {
2187  const auto bound_bin_oper = dynamic_cast<const Analyzer::BinOper*>(bound_expr);
2188  CHECK(bound_bin_oper->get_optype() == kMULTIPLY);
2189  auto translated_expr = translateIntervalExprForWindowFraming(
2190  order_keys.front(),
2192  bound_bin_oper);
2193  if (for_start_bound) {
2194  frame_start_bound_expr = translated_expr;
2195  } else {
2196  frame_end_bound_expr = translated_expr;
2197  }
2198  }
2199  };
2200  handle_time_interval_expr_if_necessary(
2201  frame_start_bound_expr.get(), frame_start_bound_type, true);
2202  handle_time_interval_expr_if_necessary(
2203  frame_end_bound_expr.get(), frame_end_bound_type, false);
2204  }
2205 
2206  if (frame_start_bound.following) {
2207  if (frame_end_bound.is_current_row) {
2208  throw std::runtime_error(
2209  "Window framing starting from following row cannot end with current row.");
2210  } else if (has_end_bound_frame_expr && frame_end_bound.preceding) {
2211  throw std::runtime_error(
2212  "Window framing starting from following row cannot have preceding rows.");
2213  }
2214  }
2215  if (frame_start_bound.is_current_row && frame_end_bound.preceding &&
2216  !frame_end_bound.unbounded && has_end_bound_frame_expr) {
2217  throw std::runtime_error(
2218  "Window framing starting from current row cannot have preceding rows.");
2219  }
2220  if (has_framing_clause) {
2222  if (order_keys.size() != 1) {
2223  throw std::runtime_error(
2224  "Window framing with range mode requires a single order-by column");
2225  }
2226  if (!frame_start_bound_expr &&
2227  frame_start_bound_type == SqlWindowFrameBoundType::UNBOUNDED_PRECEDING &&
2228  !frame_end_bound_expr &&
2229  frame_end_bound_type == SqlWindowFrameBoundType::CURRENT_ROW) {
2230  has_framing_clause = false;
2231  VLOG(1) << "Ignore range framing mode with a frame bound between "
2232  "UNBOUNDED_PRECEDING and CURRENT_ROW";
2233  }
2234  std::set<const Analyzer::ColumnVar*,
2235  bool (*)(const Analyzer::ColumnVar*, const Analyzer::ColumnVar*)>
2237  order_keys.front()->collect_column_var(colvar_set, false);
2238  for (auto cv : colvar_set) {
2239  if (!(cv->get_type_info().is_integer() || cv->get_type_info().is_fp() ||
2240  cv->get_type_info().is_time())) {
2241  has_framing_clause = false;
2242  VLOG(1) << "Range framing mode with non-number type ordering column is not "
2243  "supported yet, skip window framing";
2244  }
2245  }
2246  }
2247  }
2248  switch (window_func_kind) {
2251  if (order_keys.empty()) {
2252  throw std::runtime_error(::toString(window_func_kind) +
2253  " requires an ORDER BY clause");
2254  }
2255  if (!has_framing_clause) {
2256  throw std::runtime_error(::toString(window_func_kind) +
2257  " requires window frame definition");
2258  }
2259  const auto num_args = args.size();
2260  const auto func_name = ::toString(window_func_kind);
2261  if (num_args == 1) {
2262  Datum d;
2263  d.intval = 1;
2264  args.push_back(makeExpr<Analyzer::Constant>(kINT, false, d));
2265  } else if (num_args < 1 || num_args > 2) {
2266  throw std::runtime_error(func_name + " has an invalid number of input arguments");
2267  }
2268  const auto target_expr_cv =
2269  dynamic_cast<const Analyzer::ColumnVar*>(args.front().get());
2270  if (!target_expr_cv) {
2271  throw std::runtime_error("Currently, " + func_name +
2272  " only allows a column reference as its first argument");
2273  }
2274  const auto target_ti = target_expr_cv->get_type_info();
2275  if (target_ti.is_dict_encoded_string()) {
2276  // Calcite does not represent a window function having dictionary encoded text
2277  // type as its output properly, so we need to set its output type manually
2278  ti.set_compression(kENCODING_DICT);
2279  ti.set_comp_param(target_expr_cv->get_comp_param());
2280  ti.set_fixed_size();
2281  }
2282  const auto target_offset_cv =
2283  dynamic_cast<const Analyzer::Constant*>(args[1].get());
2284  if (!target_expr_cv ||
2285  is_negative_framing_bound(target_offset_cv->get_type_info().get_type(),
2286  target_offset_cv->get_constval())) {
2287  throw std::runtime_error(
2288  "Currently, " + func_name +
2289  " only allows non-negative constant as its second argument");
2290  }
2291  break;
2292  }
2294  // todo (yoonmin) : args.size() will be three if we support default value
2295  CHECK_EQ(2u, args.size());
2296  // NTH_VALUE may return null value even if the argument is non-null column
2297  ti.set_notnull(false);
2298  if (!args[1]) {
2299  throw std::runtime_error(
2300  "NTH_VALUE window function must have a positional argument expression.");
2301  }
2302  if (args[1]->get_type_info().is_integer()) {
2303  if (auto* n_value_ptr = dynamic_cast<Analyzer::Constant*>(args[1].get())) {
2304  if (0 < n_value_ptr->get_constval().intval) {
2305  // i.e., having N larger than the partition size
2306  // set the proper N to match the zero-start index pos
2307  auto d = n_value_ptr->get_constval();
2308  d.intval -= 1;
2309  n_value_ptr->set_constval(d);
2310  break;
2311  }
2312  }
2313  }
2314  throw std::runtime_error(
2315  "The positional argument of the NTH_VALUE window function must be a positive "
2316  "integer constant.");
2317  default:
2318  break;
2319  }
2320  if (!has_framing_clause) {
2321  frame_start_bound_type = SqlWindowFrameBoundType::UNKNOWN;
2322  frame_end_bound_type = SqlWindowFrameBoundType::UNKNOWN;
2323  frame_start_bound_expr = nullptr;
2324  frame_end_bound_expr = nullptr;
2325  }
2326  if (window_func_kind == SqlWindowFunctionKind::COUNT && has_framing_clause &&
2327  args.empty()) {
2328  args.push_back(makeExpr<Analyzer::Constant>(g_bigint_count ? kBIGINT : kINT, true));
2329  }
2330  return makeExpr<Analyzer::WindowFunction>(
2331  ti,
2332  rex_window_function->getKind(),
2333  args,
2334  partition_keys,
2335  order_keys,
2336  has_framing_clause ? frame_mode : Analyzer::WindowFunction::FrameBoundType::NONE,
2337  makeExpr<Analyzer::WindowFrame>(frame_start_bound_type, frame_start_bound_expr),
2338  makeExpr<Analyzer::WindowFrame>(frame_end_bound_type, frame_end_bound_expr),
2339  translate_collation(rex_window_function->getCollation()));
2340 }
2341 
2343  std::shared_ptr<Analyzer::Expr> order_key,
2344  bool for_preceding_bound,
2345  const Analyzer::BinOper* frame_bound_expr) const {
2346  // translate time interval expression and prepare appropriate frame bound expression:
2347  // a) manually compute time unit datum: time type
2348  // b) use dateadd expression: date and timestamp
2349  const auto order_key_ti = order_key->get_type_info();
2350  const auto frame_bound_ti = frame_bound_expr->get_type_info();
2351  const auto time_val_expr =
2352  dynamic_cast<const Analyzer::Constant*>(frame_bound_expr->get_left_operand());
2353  const auto time_unit_val_expr =
2354  dynamic_cast<const Analyzer::Constant*>(frame_bound_expr->get_right_operand());
2355  ExtractField time_unit =
2356  determineTimeUnit(frame_bound_ti.get_type(), time_unit_val_expr);
2357  bool invalid_time_unit_type = false;
2358  bool invalid_frame_bound_expr_type = false;
2359  Datum d;
2360  auto prepare_time_value_datum = [&d,
2361  &invalid_frame_bound_expr_type,
2362  &time_val_expr,
2363  &for_preceding_bound](bool is_timestamp_second) {
2364  // currently, Calcite only accepts interval with second, so to represent
2365  // smaller time units like millisecond, we have to use decimal point like
2366  // INTERVAL 0.003 SECOND (for millisecond)
2367  // thus, depending on what time unit we want to represent, Calcite analyzes
2368  // the time value to one of following two types: integer and decimal (and
2369  // numeric) types
2370  switch (time_val_expr->get_type_info().get_type()) {
2371  case kTINYINT: {
2372  d.bigintval = time_val_expr->get_constval().tinyintval;
2373  break;
2374  }
2375  case kSMALLINT: {
2376  d.bigintval = time_val_expr->get_constval().smallintval;
2377  break;
2378  }
2379  case kINT: {
2380  d.bigintval = time_val_expr->get_constval().intval;
2381  break;
2382  }
2383  case kBIGINT: {
2384  d.bigintval = time_val_expr->get_constval().bigintval;
2385  break;
2386  }
2387  case kDECIMAL:
2388  case kNUMERIC: {
2389  if (!is_timestamp_second) {
2390  // date and time type only use integer type as their time value
2391  invalid_frame_bound_expr_type = true;
2392  break;
2393  }
2394  d.bigintval = time_val_expr->get_constval().bigintval;
2395  break;
2396  }
2397  case kDOUBLE: {
2398  if (!is_timestamp_second) {
2399  // date and time type only use integer type as their time value
2400  invalid_frame_bound_expr_type = true;
2401  break;
2402  }
2403  d.bigintval = time_val_expr->get_constval().doubleval *
2404  pow(10, time_val_expr->get_type_info().get_scale());
2405  break;
2406  }
2407  default: {
2408  invalid_frame_bound_expr_type = true;
2409  break;
2410  }
2411  }
2412  if (for_preceding_bound) {
2413  d.bigintval *= -1;
2414  }
2415  };
2416 
2417  switch (order_key_ti.get_type()) {
2418  case kTIME: {
2419  if (time_val_expr->get_type_info().is_integer()) {
2420  if (time_unit == kSECOND || time_unit == kMINUTE || time_unit == kHOUR) {
2421  const auto time_multiplier = determineTimeValMultiplierForTimeType(
2422  frame_bound_ti.get_type(), time_unit_val_expr);
2423  switch (time_val_expr->get_type_info().get_type()) {
2424  case kTINYINT: {
2425  d.bigintval = time_val_expr->get_constval().tinyintval * time_multiplier;
2426  break;
2427  }
2428  case kSMALLINT: {
2429  d.bigintval = time_val_expr->get_constval().smallintval * time_multiplier;
2430  break;
2431  }
2432  case kINT: {
2433  d.bigintval = time_val_expr->get_constval().intval * time_multiplier;
2434  break;
2435  }
2436  case kBIGINT: {
2437  d.bigintval = time_val_expr->get_constval().bigintval * time_multiplier;
2438  break;
2439  }
2440  default: {
2441  UNREACHABLE();
2442  break;
2443  }
2444  }
2445  } else {
2446  invalid_frame_bound_expr_type = true;
2447  }
2448  } else {
2449  invalid_time_unit_type = true;
2450  }
2451  if (invalid_frame_bound_expr_type) {
2452  throw std::runtime_error(
2453  "Invalid time unit is used to define window frame bound expression for " +
2454  order_key_ti.get_type_name() + " type");
2455  } else if (invalid_time_unit_type) {
2456  throw std::runtime_error(
2457  "Window frame bound expression has an invalid type for " +
2458  order_key_ti.get_type_name() + " type");
2459  }
2460  return std::make_shared<Analyzer::Constant>(kBIGINT, false, d);
2461  }
2462  case kDATE: {
2464  if (time_val_expr->get_type_info().is_integer()) {
2465  switch (time_unit) {
2466  case kDAY: {
2467  daField = to_dateadd_field("day");
2468  break;
2469  }
2470  case kMONTH: {
2471  daField = to_dateadd_field("month");
2472  break;
2473  }
2474  case kYEAR: {
2475  daField = to_dateadd_field("year");
2476  break;
2477  }
2478  default: {
2479  invalid_frame_bound_expr_type = true;
2480  break;
2481  }
2482  }
2483  } else {
2484  invalid_time_unit_type = true;
2485  }
2486  if (invalid_frame_bound_expr_type) {
2487  throw std::runtime_error(
2488  "Invalid time unit is used to define window frame bound expression for " +
2489  order_key_ti.get_type_name() + " type");
2490  } else if (invalid_time_unit_type) {
2491  throw std::runtime_error(
2492  "Window frame bound expression has an invalid type for " +
2493  order_key_ti.get_type_name() + " type");
2494  }
2496  prepare_time_value_datum(false);
2497  const auto cast_number_units = makeExpr<Analyzer::Constant>(kBIGINT, false, d);
2498  const int dim = order_key_ti.get_dimension();
2499  return makeExpr<Analyzer::DateaddExpr>(
2500  SQLTypeInfo(kTIMESTAMP, dim, 0, false), daField, cast_number_units, order_key);
2501  }
2502  case kTIMESTAMP: {
2504  switch (time_unit) {
2505  case kSECOND: {
2506  switch (time_val_expr->get_type_info().get_scale()) {
2507  case 0: {
2508  daField = to_dateadd_field("second");
2509  break;
2510  }
2511  case 3: {
2512  daField = to_dateadd_field("millisecond");
2513  break;
2514  }
2515  case 6: {
2516  daField = to_dateadd_field("microsecond");
2517  break;
2518  }
2519  case 9: {
2520  daField = to_dateadd_field("nanosecond");
2521  break;
2522  }
2523  default:
2524  UNREACHABLE();
2525  break;
2526  }
2527  prepare_time_value_datum(true);
2528  break;
2529  }
2530  case kMINUTE: {
2531  daField = to_dateadd_field("minute");
2532  prepare_time_value_datum(false);
2533  break;
2534  }
2535  case kHOUR: {
2536  daField = to_dateadd_field("hour");
2537  prepare_time_value_datum(false);
2538  break;
2539  }
2540  case kDAY: {
2541  daField = to_dateadd_field("day");
2542  prepare_time_value_datum(false);
2543  break;
2544  }
2545  case kMONTH: {
2546  daField = to_dateadd_field("month");
2547  prepare_time_value_datum(false);
2548  break;
2549  }
2550  case kYEAR: {
2551  daField = to_dateadd_field("year");
2552  prepare_time_value_datum(false);
2553  break;
2554  }
2555  default: {
2556  invalid_time_unit_type = true;
2557  break;
2558  }
2559  }
2560  if (!invalid_time_unit_type) {
2562  const auto cast_number_units = makeExpr<Analyzer::Constant>(kBIGINT, false, d);
2563  const int dim = order_key_ti.get_dimension();
2564  return makeExpr<Analyzer::DateaddExpr>(SQLTypeInfo(kTIMESTAMP, dim, 0, false),
2565  daField,
2566  cast_number_units,
2567  order_key);
2568  }
2569  return nullptr;
2570  }
2571  default: {
2572  UNREACHABLE();
2573  break;
2574  }
2575  }
2576  if (invalid_frame_bound_expr_type) {
2577  throw std::runtime_error(
2578  "Invalid time unit is used to define window frame bound expression for " +
2579  order_key_ti.get_type_name() + " type");
2580  } else if (invalid_time_unit_type) {
2581  throw std::runtime_error("Window frame bound expression has an invalid type for " +
2582  order_key_ti.get_type_name() + " type");
2583  }
2584  return nullptr;
2585 }
2586 
2588  const RexFunctionOperator* rex_function) const {
2589  std::vector<std::shared_ptr<Analyzer::Expr>> args;
2590  for (size_t i = 0; i < rex_function->size(); ++i) {
2591  args.push_back(translateScalarRex(rex_function->getOperand(i)));
2592  }
2593  return args;
2594 }
2595 
2597  const std::shared_ptr<Analyzer::Expr> qual_expr) {
2598  CHECK(qual_expr);
2599  auto bin_oper = std::dynamic_pointer_cast<const Analyzer::BinOper>(qual_expr);
2600  if (!bin_oper) {
2601  const auto rewritten_qual_expr = rewrite_expr(qual_expr.get());
2602  return {{}, {rewritten_qual_expr ? rewritten_qual_expr : qual_expr}};
2603  }
2604 
2605  if (bin_oper->get_optype() == kAND) {
2606  const auto lhs_cf = qual_to_conjunctive_form(bin_oper->get_own_left_operand());
2607  const auto rhs_cf = qual_to_conjunctive_form(bin_oper->get_own_right_operand());
2608  auto simple_quals = lhs_cf.simple_quals;
2609  simple_quals.insert(
2610  simple_quals.end(), rhs_cf.simple_quals.begin(), rhs_cf.simple_quals.end());
2611  auto quals = lhs_cf.quals;
2612  quals.insert(quals.end(), rhs_cf.quals.begin(), rhs_cf.quals.end());
2613  return {simple_quals, quals};
2614  }
2615  int rte_idx{0};
2616  const auto simple_qual = bin_oper->normalize_simple_predicate(rte_idx);
2617  return simple_qual ? QualsConjunctiveForm{{simple_qual}, {}}
2618  : QualsConjunctiveForm{{}, {qual_expr}};
2619 }
2620 
2621 std::vector<std::shared_ptr<Analyzer::Expr>> qual_to_disjunctive_form(
2622  const std::shared_ptr<Analyzer::Expr>& qual_expr) {
2623  CHECK(qual_expr);
2624  const auto bin_oper = std::dynamic_pointer_cast<const Analyzer::BinOper>(qual_expr);
2625  if (!bin_oper) {
2626  const auto rewritten_qual_expr = rewrite_expr(qual_expr.get());
2627  return {rewritten_qual_expr ? rewritten_qual_expr : qual_expr};
2628  }
2629  if (bin_oper->get_optype() == kOR) {
2630  const auto lhs_df = qual_to_disjunctive_form(bin_oper->get_own_left_operand());
2631  const auto rhs_df = qual_to_disjunctive_form(bin_oper->get_own_right_operand());
2632  auto quals = lhs_df;
2633  quals.insert(quals.end(), rhs_df.begin(), rhs_df.end());
2634  return quals;
2635  }
2636  return {qual_expr};
2637 }
2638 
2639 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateHPTLiteral(
2640  const RexFunctionOperator* rex_function) const {
2641  /* since calcite uses Avatica package called DateTimeUtils to parse timestamp strings.
2642  Therefore any string having fractional seconds more 3 places after the decimal
2643  (milliseconds) will get truncated to 3 decimal places, therefore we lose precision
2644  (us|ns). Issue: [BE-2461] Here we are hijacking literal cast to Timestamp(6|9) from
2645  calcite and translating them to generate our own casts.
2646  */
2647  CHECK_EQ(size_t(1), rex_function->size());
2648  const auto operand = translateScalarRex(rex_function->getOperand(0));
2649  const auto& operand_ti = operand->get_type_info();
2650  const auto& target_ti = rex_function->getType();
2651  if (!operand_ti.is_string()) {
2652  throw std::runtime_error(
2653  "High precision timestamp cast argument must be a string. Input type is: " +
2654  operand_ti.get_type_name());
2655  } else if (!target_ti.is_high_precision_timestamp()) {
2656  throw std::runtime_error(
2657  "Cast target type should be high precision timestamp. Input type is: " +
2658  target_ti.get_type_name());
2659  } else if (target_ti.get_dimension() != 6 && target_ti.get_dimension() != 9) {
2660  throw std::runtime_error(
2661  "Cast target type should be TIMESTAMP(6|9). Input type is: TIMESTAMP(" +
2662  std::to_string(target_ti.get_dimension()) + ")");
2663  } else {
2664  return operand->add_cast(target_ti);
2665  }
2666 }
DEVICE auto upper_bound(ARGS &&...args)
Definition: gpu_enabled.h:123
Defines data structures for the semantic analysis phase of query processing.
Definition: sqldefs.h:71
SqlWindowFrameBoundType
Definition: sqldefs.h:149
const RexScalar * getThen(const size_t idx) const
Definition: RelAlgDag.h:443
const std::vector< JoinType > join_types_
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:381
void set_compression(EncodingType c)
Definition: sqltypes.h:501
SQLAgg
Definition: sqldefs.h:73
static std::shared_ptr< Analyzer::Expr > normalize(const std::list< std::pair< std::shared_ptr< Analyzer::Expr >, std::shared_ptr< Analyzer::Expr >>> &, const std::shared_ptr< Analyzer::Expr >, const Executor *executor=nullptr)
#define CHECK_EQ(x, y)
Definition: Logger.h:297
auto func_resolve
std::shared_ptr< Analyzer::Expr > translateOffsetInFragment() const
SqlStringOpKind name_to_string_op_kind(const std::string &func_name)
Definition: sqldefs.h:374
static std::shared_ptr< Analyzer::Expr > get(const std::string &)
Definition: ParserNode.cpp:236
std::shared_ptr< Analyzer::Expr > translateCurrentTimestamp() const
std::shared_ptr< Analyzer::Expr > translateBinaryGeoPredicate(const RexFunctionOperator *, SQLTypeInfo &, const bool with_bounds) const
SQLAgg getKind() const
Definition: RelAlgDag.h:813
Definition: sqltypes.h:64
std::shared_ptr< Analyzer::Expr > translateRegexp(const RexFunctionOperator *) const
static bool colvar_comp(const ColumnVar *l, const ColumnVar *r)
Definition: Analyzer.h:217
SQLTypes
Definition: sqltypes.h:53
static constexpr int64_t kSecsPerHour
size_t getOperand(size_t idx) const
Definition: RelAlgDag.h:819
const Executor * executor_
std::shared_ptr< Analyzer::Expr > translateUnlikely(const RexFunctionOperator *) const
const RexScalar * getElse() const
Definition: RelAlgDag.h:448
void collect_column_var(std::set< const ColumnVar *, bool(*)(const ColumnVar *, const ColumnVar *)> &colvar_set, bool include_agg) const override
Definition: Analyzer.h:222
static constexpr int64_t kSecsPerMin
std::shared_ptr< Analyzer::Expr >(RelAlgTranslator::*)(RexScalar const *) const Handler
SQLQualifier
Definition: sqldefs.h:71
static std::shared_ptr< Analyzer::Expr > analyzeValue(const int64_t intval)
Definition: ParserNode.cpp:161
bool window_function_conditional_aggregate(const SqlWindowFunctionKind kind)
Definition: WindowContext.h:59
std::shared_ptr< Analyzer::Expr > translateFunction(const RexFunctionOperator *) const
SQLTypeInfo get_nullable_logical_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:1227
#define LOG(tag)
Definition: Logger.h:283
std::shared_ptr< Analyzer::Expr > translateScalarRex(const RexScalar *rex) const
const SQLTypeInfo & getType() const
Definition: RelAlgDag.h:284
size_t size() const
Definition: RelAlgDag.h:270
static constexpr int64_t kMilliSecsPerDay
const RexScalar * getOperand(const size_t idx) const
Definition: RelAlgDag.h:272
std::shared_ptr< Analyzer::Expr > translateUoper(const RexOperator *) const
HOST DEVICE int get_scale() const
Definition: sqltypes.h:385
const Expr * get_right_operand() const
Definition: Analyzer.h:452
const std::vector< SortField > & getCollation() const
Definition: RelAlgDag.h:654
SQLOps
Definition: sqldefs.h:28
std::shared_ptr< Analyzer::Expr > translateDateadd(const RexFunctionOperator *) const
int8_t boolval
Definition: Datum.h:68
static bool isFramingAvailableWindowFunc(SqlWindowFunctionKind kind)
Definition: Analyzer.h:2611
static std::shared_ptr< Analyzer::Expr > normalize(const SQLOps optype, const SQLQualifier qual, std::shared_ptr< Analyzer::Expr > left_expr, std::shared_ptr< Analyzer::Expr > right_expr, const Executor *executor=nullptr)
Definition: ParserNode.cpp:370
Definition: sqldefs.h:37
#define UNREACHABLE()
Definition: Logger.h:333
std::shared_ptr< Analyzer::Expr > translateIntervalExprForWindowFraming(std::shared_ptr< Analyzer::Expr > order_key, bool for_preceding_bound, const Analyzer::BinOper *frame_bound_expr) const
#define CHECK_GE(x, y)
Definition: Logger.h:302
std::shared_ptr< Analyzer::Expr > translateAbs(const RexFunctionOperator *) const
Definition: sqldefs.h:48
Definition: sqldefs.h:29
const RexScalar * getWhen(const size_t idx) const
Definition: RelAlgDag.h:438
std::shared_ptr< Analyzer::Expr > ExpressionPtr
Definition: Analyzer.h:188
std::string getString(int32_t string_id) const
std::shared_ptr< Analyzer::Expr > getInIntegerSetExpr(std::shared_ptr< Analyzer::Expr > arg, const ResultSet &val_set) const
Definition: sqldefs.h:40
SQLTypeInfo get_agg_type(const SQLAgg agg_kind, const Analyzer::Expr *arg_expr)
std::shared_ptr< Analyzer::Expr > translateItem(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Constant > makeNumericConstant(const SQLTypeInfo &ti, const long val)
Analyzer::ExpressionPtr rewrite_expr(const Analyzer::Expr *expr)
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:380
bool operator()(IndexedHandler const &pair) const
QualsConjunctiveForm qual_to_conjunctive_form(const std::shared_ptr< Analyzer::Expr > qual_expr)
bool is_number() const
Definition: sqltypes.h:581
#define CHECK_GT(x, y)
Definition: Logger.h:301
bool is_agg_supported_for_type(const SQLAgg &agg_kind, const SQLTypeInfo &arg_ti)
std::shared_ptr< Analyzer::Expr > translateGeoProjection(const RexFunctionOperator *, SQLTypeInfo &, const bool with_bounds) const
double inline_fp_null_val(const SQL_TYPE_INFO &ti)
int32_t intval
Definition: Datum.h:71
bool is_time() const
Definition: sqltypes.h:582
std::shared_ptr< Analyzer::Expr > translateOper(const RexOperator *) const
std::string to_string(char const *&&v)
std::shared_ptr< Analyzer::Expr > translateDatediff(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Expr > translateInput(const RexInput *) const
std::shared_ptr< Analyzer::Expr > translateSign(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Expr > translateUnaryGeoFunction(const RexFunctionOperator *) const
static constexpr int64_t kMilliSecsPerMin
bool g_enable_string_functions
std::shared_ptr< Analyzer::Expr > translateGeoOverlapsOper(const RexOperator *) const
Definition: sqldefs.h:75
static constexpr int64_t kMilliSecsPerSec
ExtractField to_datepart_field(const std::string &field)
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:31
robin_hood::unordered_map< RexScalar const *, std::shared_ptr< Analyzer::Expr > > cache_
unsigned getIndex() const
Definition: RelAlgDag.h:77
Supported runtime functions management and retrieval.
future< Result > async(Fn &&fn, Args &&...args)
static std::shared_ptr< Analyzer::Expr > translateLiteral(const RexLiteral *)
SQLOps getOperator() const
Definition: RelAlgDag.h:282
bool window_function_is_value(const SqlWindowFunctionKind kind)
Definition: WindowContext.h:29
static constexpr int32_t INVALID_STR_ID
CONSTEXPR DEVICE bool is_null(const T &value)
Classes representing a parse tree.
std::shared_ptr< Analyzer::Expr > translateDatetime(const RexFunctionOperator *) const
void set_fixed_size()
Definition: sqltypes.h:499
DateaddField
Definition: DateAdd.h:42
void fill_dictionary_encoded_in_vals(std::vector< int64_t > &in_vals, std::atomic< size_t > &total_in_vals_count, const ResultSet *values_rowset, const std::pair< int64_t, int64_t > values_rowset_slice, const StringDictionaryProxy *source_dict, const StringDictionaryProxy *dest_dict, const int64_t needle_null_val)
#define CHECK_NE(x, y)
Definition: Logger.h:298
const std::shared_ptr< Analyzer::Expr > generate() const
std::shared_ptr< Analyzer::Expr > translateStringOper(const RexFunctionOperator *) const
static std::shared_ptr< Analyzer::Expr > get(std::shared_ptr< Analyzer::Expr > arg_expr, std::shared_ptr< Analyzer::Expr > pattern_expr, std::shared_ptr< Analyzer::Expr > escape_expr, const bool is_not)
Definition: ParserNode.cpp:786
size_t determineTimeValMultiplierForTimeType(const SQLTypes &window_frame_bound_type, const Analyzer::Constant *const_expr)
void set_scale(int s)
Definition: sqltypes.h:495
int64_t bigintval
Definition: Datum.h:72
bool is_timeinterval() const
Definition: sqltypes.h:587
std::pair< std::shared_ptr< Analyzer::Expr >, SQLQualifier > getQuantifiedRhs(const RexScalar *) const
std::vector< Analyzer::OrderEntry > translate_collation(const std::vector< SortField > &sort_fields)
size_t branchCount() const
Definition: RelAlgDag.h:436
std::shared_ptr< Analyzer::Expr > translateCurrentTime() const
bool g_bigint_count
Definition: sqldefs.h:36
Definition: sqldefs.h:77
bool g_enable_watchdog
Definition: sqldefs.h:71
int getDatabaseId() const
Definition: Catalog.h:304
int16_t smallintval
Definition: Datum.h:70
SQLTypeInfo build_type_info(const SQLTypes sql_type, const int scale, const int precision)
DatetruncField to_datediff_field(const std::string &field)
std::string toString(const ExecutorDeviceType &device_type)
void translate_string_ids(std::vector< int32_t > &dest_ids, const LeafHostInfo &dict_server_host, const DictRef dest_dict_ref, const std::vector< int32_t > &source_ids, const DictRef source_dict_ref, const int32_t dest_generation)
bool is_boolean() const
Definition: sqltypes.h:583
std::array< IndexedHandler, sizeof...(Ts)> makeHandlers()
const RexWindowBound & getFrameEndBound() const
Definition: RelAlgDag.h:658
std::shared_ptr< Analyzer::Expr > translate(const RexScalar *rex) const
const std::vector< LeafHostInfo > & getStringDictionaryHosts() const
Definition: Catalog.cpp:2022
std::tuple< T, std::vector< SQLTypeInfo > > bind_function(std::string name, Analyzer::ExpressionPtrVector func_args, const std::vector< T > &ext_funcs, const std::string processor)
const ColumnDescriptor * getMetadataForColumnBySpi(const int tableId, const size_t spi) const
Definition: Catalog.cpp:2083
Argument type based extension function binding.
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:83
const std::unordered_map< const RelAlgNode *, int > input_to_nest_level_
#define UNLIKELY(x)
Definition: likely.h:25
Definition: sqldefs.h:33
void set_comp_param(int p)
Definition: sqltypes.h:502
#define CHECK_LT(x, y)
Definition: Logger.h:299
Definition: sqltypes.h:67
Definition: sqltypes.h:68
Analyzer::ExpressionPtrVector translateFunctionArgs(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Expr > translateUnaryGeoPredicate(const RexFunctionOperator *, SQLTypeInfo &, const bool with_bounds) const
Definition: sqldefs.h:39
Definition: sqldefs.h:71
const ConstRexScalarPtrVector & getPartitionKeys() const
Definition: RelAlgDag.h:627
#define TRANSIENT_DICT_ID
Definition: sqltypes.h:310
static std::shared_ptr< Analyzer::Expr > analyzeValue(const std::string &stringval, const bool is_null)
Definition: ParserNode.cpp:142
DEVICE auto lower_bound(ARGS &&...args)
Definition: gpu_enabled.h:78
#define CHECK_LE(x, y)
Definition: Logger.h:300
const RexWindowBound & getFrameStartBound() const
Definition: RelAlgDag.h:656
std::shared_ptr< Analyzer::Expr > translateOverlapsOper(const RexOperator *) const
std::shared_ptr< Analyzer::Expr > translateUnaryGeoConstructor(const RexFunctionOperator *, SQLTypeInfo &, const bool with_bounds) const
std::shared_ptr< Analyzer::Expr > translateArrayFunction(const RexFunctionOperator *) const
static std::shared_ptr< Analyzer::Expr > get(std::shared_ptr< Analyzer::Expr > arg_expr, std::shared_ptr< Analyzer::Expr > like_expr, std::shared_ptr< Analyzer::Expr > escape_expr, const bool is_ilike, const bool is_not)
Definition: ParserNode.cpp:691
std::pair< std::type_index, Handler > IndexedHandler
static RelRexToStringConfig defaults()
Definition: RelAlgDag.h:49
Datum get_constval() const
Definition: Analyzer.h:344
std::shared_ptr< Analyzer::Expr > translateCurrentUser(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Expr > translateSampleRatio(const RexFunctionOperator *) const
SqlWindowFunctionKind getKind() const
Definition: RelAlgDag.h:625
std::shared_ptr< Analyzer::Expr > translateLike(const RexFunctionOperator *) const
const RelAlgNode * getSourceNode() const
Definition: RelAlgDag.h:389
Definition: sqltypes.h:56
bool takes_arg(const TargetInfo &target_info)
static std::shared_ptr< Analyzer::Expr > analyzeValue(const int64_t numericval, const int scale, const int precision)
Definition: ParserNode.cpp:186
ExtractField
std::shared_ptr< Analyzer::Expr > translateLikely(const RexFunctionOperator *) const
Definition: sqldefs.h:52
std::shared_ptr< Analyzer::Expr > get_in_values_expr(std::shared_ptr< Analyzer::Expr > arg, const ResultSet &val_set)
static std::shared_ptr< Analyzer::Expr > get(const int64_t)
Definition: ParserNode.cpp:222
bool isDistinct() const
Definition: RelAlgDag.h:815
void set_notnull(bool n)
Definition: sqltypes.h:497
static constexpr int64_t kMilliSecsPerHour
#define CHECK(condition)
Definition: Logger.h:289
std::shared_ptr< Analyzer::Expr > translateTernaryGeoFunction(const RexFunctionOperator *) const
const ConstRexScalarPtrVector & getOrderKeys() const
Definition: RelAlgDag.h:637
std::vector< std::shared_ptr< Analyzer::Expr > > qual_to_disjunctive_form(const std::shared_ptr< Analyzer::Expr > &qual_expr)
std::shared_ptr< Analyzer::Expr > translateBinaryGeoFunction(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Constant > make_fp_constant(const int64_t val, const SQLTypeInfo &ti)
std::pair< Datum, bool > datum_from_scalar_tv(const ScalarTargetValue *scalar_tv, const SQLTypeInfo &ti) noexcept
std::shared_ptr< Analyzer::Expr > translateWidthBucket(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Expr > translateInOper(const RexOperator *) const
uint64_t exp_to_scale(const unsigned exp)
size_t size() const
Definition: RelAlgDag.h:817
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
std::vector< ExpressionPtr > ExpressionPtrVector
Definition: Analyzer.h:190
bool g_cluster
Definition: sqldefs.h:32
const Expr * get_left_operand() const
Definition: Analyzer.h:451
bool isRows() const
Definition: RelAlgDag.h:660
std::shared_ptr< Analyzer::Expr > translateCase(const RexCase *) const
std::shared_ptr< Analyzer::Expr > translateFunctionWithGeoArg(const RexFunctionOperator *) const
Definition: sqltypes.h:60
std::shared_ptr< const query_state::QueryState > query_state_
const std::string & getName() const
Definition: RelAlgDag.h:500
std::shared_ptr< Analyzer::Expr > translateCurrentDate() const
std::string get_datetimeplus_rewrite_funcname(const SQLOps &op)
void validate_datetime_datepart_argument(const std::shared_ptr< Analyzer::Constant > literal_expr)
std::shared_ptr< Analyzer::Expr > translateCardinality(const RexFunctionOperator *) const
Definition: sqldefs.h:76
int cpu_threads()
Definition: thread_count.h:25
const bool just_explain_
Definition: Datum.h:67
const std::vector< TargetMetaInfo > & getOutputMetainfo() const
Definition: RelAlgDag.h:876
bool is_decimal() const
Definition: sqltypes.h:579
std::shared_ptr< Analyzer::Expr > translateGeoComparison(const RexOperator *) const
std::shared_ptr< Analyzer::Expr > translateDatePlusMinus(const RexOperator *) const
const std::list< std::shared_ptr< Analyzer::Expr > > simple_quals
std::shared_ptr< Analyzer::Expr > translateHPTLiteral(const RexFunctionOperator *) const
Definition: sqldefs.h:74
bool is_distinct(const size_t input_idx, const RelAlgNode *node)
int32_t getIdOfString(const std::string &str) const
std::shared_ptr< Analyzer::Expr > translateDatepart(const RexFunctionOperator *) const
bool can_use_parallel_algorithms(const ResultSet &rows)
Definition: ResultSet.cpp:1561
std::shared_ptr< Analyzer::Expr > translateBinaryGeoConstructor(const RexFunctionOperator *, SQLTypeInfo &, const bool with_bounds) const
Definition: sqldefs.h:38
std::shared_ptr< Analyzer::Expr > rewrite_to_date_trunc(const Analyzer::FunctionOper *dt_plus)
Definition: sqldefs.h:83
SQLTypeInfo ext_arg_type_to_type_info(const ExtArgumentType ext_arg_type)
#define VLOG(n)
Definition: Logger.h:383
DateaddField to_dateadd_field(const std::string &field)
std::shared_ptr< Analyzer::Expr > fold_expr(const Analyzer::Expr *expr)
void set_precision(int d)
Definition: sqltypes.h:493
#define IS_COMPARISON(X)
Definition: sqldefs.h:58
double doubleval
Definition: Datum.h:74
void fill_integer_in_vals(std::vector< int64_t > &in_vals, std::atomic< size_t > &total_in_vals_count, const ResultSet *values_rowset, const std::pair< int64_t, int64_t > values_rowset_slice)
const Catalog_Namespace::Catalog & cat_
std::shared_ptr< Analyzer::Expr > translateKeyForString(const RexFunctionOperator *) const
static std::shared_ptr< Analyzer::Expr > translateAggregateRex(const RexAgg *rex, const std::vector< std::shared_ptr< Analyzer::Expr >> &scalar_sources)
std::shared_ptr< Analyzer::Expr > translateWindowFunction(const RexWindowFunctionOperator *) const
const std::shared_ptr< Analyzer::Expr > generate() const
std::shared_ptr< Analyzer::Expr > translateScalarSubquery(const RexSubQuery *) const
boost::variant< int64_t, double, float, NullableString > ScalarTargetValue
Definition: TargetValue.h:180
std::shared_ptr< Analyzer::Expr > translateLength(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Expr > translateExtract(const RexFunctionOperator *) const
ExtractField determineTimeUnit(const SQLTypes &window_frame_bound_type, const Analyzer::Constant *const_expr)
HOST DEVICE void set_type(SQLTypes t)
Definition: sqltypes.h:490