OmniSciDB  c1a53651b2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
RelAlgTranslator.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "RelAlgTranslator.h"
18 #include "Analyzer/Analyzer.h"
20 #include "DateTimePlusRewrite.h"
21 #include "DateTimeTranslator.h"
23 #include "ExpressionRewrite.h"
26 #include "Parser/ParserNode.h"
27 #include "RelAlgDag.h"
28 #include "ScalarExprVisitor.h"
29 #include "Shared/SqlTypesLayout.h"
30 #include "Shared/likely.h"
31 #include "Shared/scope.h"
32 #include "Shared/thread_count.h"
33 #include "WindowContext.h"
34 
35 #include <future>
36 #include <sstream>
37 
38 extern bool g_enable_watchdog;
39 
41 
42 namespace {
43 
45  const int scale,
46  const int precision) {
47  SQLTypeInfo ti(sql_type, 0, 0, true);
48  if (ti.is_decimal()) {
49  ti.set_scale(scale);
50  ti.set_precision(precision);
51  }
52  return ti;
53 }
54 
55 } // namespace
56 
57 std::pair<std::shared_ptr<Analyzer::Expr>, SQLQualifier>
59  std::shared_ptr<Analyzer::Expr> rhs;
60  SQLQualifier sql_qual{kONE};
61  const auto rex_operator = dynamic_cast<const RexOperator*>(rex_scalar);
62  if (!rex_operator) {
63  return std::make_pair(rhs, sql_qual);
64  }
65  const auto rex_function = dynamic_cast<const RexFunctionOperator*>(rex_operator);
66  const auto qual_str = rex_function ? rex_function->getName() : "";
67  if (qual_str == "PG_ANY"sv || qual_str == "PG_ALL"sv) {
68  CHECK_EQ(size_t(1), rex_function->size());
69  rhs = translateScalarRex(rex_function->getOperand(0));
70  sql_qual = (qual_str == "PG_ANY"sv) ? kANY : kALL;
71  }
72  if (!rhs && rex_operator->getOperator() == kCAST) {
73  CHECK_EQ(size_t(1), rex_operator->size());
74  std::tie(rhs, sql_qual) = getQuantifiedRhs(rex_operator->getOperand(0));
75  }
76  return std::make_pair(rhs, sql_qual);
77 }
78 
79 namespace {
80 
81 std::pair<Datum, bool> datum_from_scalar_tv(const ScalarTargetValue* scalar_tv,
82  const SQLTypeInfo& ti) noexcept {
83  Datum d{0};
84  bool is_null_const{false};
85  switch (ti.get_type()) {
86  case kBOOLEAN: {
87  const auto ival = boost::get<int64_t>(scalar_tv);
88  CHECK(ival);
89  if (*ival == inline_int_null_val(ti)) {
90  is_null_const = true;
91  } else {
92  d.boolval = *ival;
93  }
94  break;
95  }
96  case kTINYINT: {
97  const auto ival = boost::get<int64_t>(scalar_tv);
98  CHECK(ival);
99  if (*ival == inline_int_null_val(ti)) {
100  is_null_const = true;
101  } else {
102  d.tinyintval = *ival;
103  }
104  break;
105  }
106  case kSMALLINT: {
107  const auto ival = boost::get<int64_t>(scalar_tv);
108  CHECK(ival);
109  if (*ival == inline_int_null_val(ti)) {
110  is_null_const = true;
111  } else {
112  d.smallintval = *ival;
113  }
114  break;
115  }
116  case kINT: {
117  const auto ival = boost::get<int64_t>(scalar_tv);
118  CHECK(ival);
119  if (*ival == inline_int_null_val(ti)) {
120  is_null_const = true;
121  } else {
122  d.intval = *ival;
123  }
124  break;
125  }
126  case kDECIMAL:
127  case kNUMERIC:
128  case kBIGINT:
129  case kDATE:
130  case kTIME:
131  case kTIMESTAMP: {
132  const auto ival = boost::get<int64_t>(scalar_tv);
133  CHECK(ival);
134  if (*ival == inline_int_null_val(ti)) {
135  is_null_const = true;
136  } else {
137  d.bigintval = *ival;
138  }
139  break;
140  }
141  case kDOUBLE: {
142  const auto dval = boost::get<double>(scalar_tv);
143  CHECK(dval);
144  if (*dval == inline_fp_null_val(ti)) {
145  is_null_const = true;
146  } else {
147  d.doubleval = *dval;
148  }
149  break;
150  }
151  case kFLOAT: {
152  const auto fval = boost::get<float>(scalar_tv);
153  CHECK(fval);
154  if (*fval == inline_fp_null_val(ti)) {
155  is_null_const = true;
156  } else {
157  d.floatval = *fval;
158  }
159  break;
160  }
161  case kTEXT:
162  case kVARCHAR:
163  case kCHAR: {
164  auto nullable_sptr = boost::get<NullableString>(scalar_tv);
165  CHECK(nullable_sptr);
166  if (boost::get<void*>(nullable_sptr)) {
167  is_null_const = true;
168  } else {
169  auto sptr = boost::get<std::string>(nullable_sptr);
170  d.stringval = new std::string(*sptr);
171  }
172  break;
173  }
174  default:
175  CHECK(false) << "Unhandled type: " << ti.get_type_name();
176  }
177  return {d, is_null_const};
178 }
179 
180 using Handler =
181  std::shared_ptr<Analyzer::Expr> (RelAlgTranslator::*)(RexScalar const*) const;
182 using IndexedHandler = std::pair<std::type_index, Handler>;
183 
184 template <typename... Ts>
185 std::array<IndexedHandler, sizeof...(Ts)> makeHandlers() {
186  return {IndexedHandler{std::type_index(typeid(Ts)),
187  &RelAlgTranslator::translateRexScalar<Ts>}...};
188 }
189 
190 struct ByTypeIndex {
191  std::type_index const type_index_;
192  ByTypeIndex(std::type_info const& type_info)
193  : type_index_(std::type_index(type_info)) {}
194  bool operator()(IndexedHandler const& pair) const { return pair.first == type_index_; }
195 };
196 
197 } // namespace
198 
199 template <>
200 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateRexScalar<RexInput>(
201  RexScalar const* rex) const {
202  return translateInput(static_cast<RexInput const*>(rex));
203 }
204 template <>
205 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateRexScalar<RexLiteral>(
206  RexScalar const* rex) const {
207  return translateLiteral(static_cast<RexLiteral const*>(rex));
208 }
209 template <>
210 std::shared_ptr<Analyzer::Expr>
211 RelAlgTranslator::translateRexScalar<RexWindowFunctionOperator>(
212  RexScalar const* rex) const {
213  return translateWindowFunction(static_cast<RexWindowFunctionOperator const*>(rex));
214 }
215 template <>
216 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateRexScalar<RexFunctionOperator>(
217  RexScalar const* rex) const {
218  return translateFunction(static_cast<RexFunctionOperator const*>(rex));
219 }
220 template <>
221 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateRexScalar<RexOperator>(
222  RexScalar const* rex) const {
223  return translateOper(static_cast<RexOperator const*>(rex));
224 }
225 template <>
226 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateRexScalar<RexCase>(
227  RexScalar const* rex) const {
228  return translateCase(static_cast<RexCase const*>(rex));
229 }
230 template <>
231 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateRexScalar<RexSubQuery>(
232  RexScalar const* rex) const {
233  return translateScalarSubquery(static_cast<RexSubQuery const*>(rex));
234 }
235 
236 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateScalarRex(
237  RexScalar const* rex) const {
238  auto cache_itr = cache_.find(rex);
239  if (cache_itr == cache_.end()) {
240  // Order types from most likely to least as they are compared seriatim.
241  static auto const handlers = makeHandlers<RexInput,
242  RexLiteral,
243  RexOperator,
244  RexCase,
247  RexSubQuery>();
248  static_assert(std::is_trivially_destructible_v<decltype(handlers)>);
249  auto it = std::find_if(handlers.cbegin(), handlers.cend(), ByTypeIndex{typeid(*rex)});
250  CHECK(it != handlers.cend()) << "Unhandled type: " << typeid(*rex).name();
251  // Call handler based on typeid(*rex) and cache the std::shared_ptr<Analyzer::Expr>.
252  auto cached = cache_.emplace(rex, (this->*it->second)(rex));
253  CHECK(cached.second) << "Failed to emplace rex of type " << typeid(*rex).name();
254  cache_itr = cached.first;
255  }
256  return cache_itr->second;
257 }
258 
259 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translate(RexScalar const* rex) const {
260  ScopeGuard clear_cache{[this] { cache_.clear(); }};
261  return translateScalarRex(rex);
262 }
263 
264 namespace {
265 
266 bool is_agg_supported_for_type(const SQLAgg& agg_kind, const SQLTypeInfo& arg_ti) {
267  if ((agg_kind == kMIN || agg_kind == kMAX || agg_kind == kSUM || agg_kind == kAVG) &&
268  !(arg_ti.is_number() || arg_ti.is_boolean() || arg_ti.is_time())) {
269  return false;
270  }
271 
272  return true;
273 }
274 
275 } // namespace
276 
277 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateAggregateRex(
278  const RexAgg* rex,
279  const std::vector<std::shared_ptr<Analyzer::Expr>>& scalar_sources) {
280  SQLAgg agg_kind = rex->getKind();
281  const bool is_distinct = rex->isDistinct();
282  const bool takes_arg{rex->size() > 0};
283  std::shared_ptr<Analyzer::Expr> arg_expr;
284  std::shared_ptr<Analyzer::Expr> arg1; // 2nd aggregate parameter
285  if (takes_arg) {
286  const auto operand = rex->getOperand(0);
287  CHECK_LT(operand, scalar_sources.size());
288  CHECK_LE(rex->size(), 2u);
289  arg_expr = scalar_sources[operand];
290  switch (agg_kind) {
292  if (rex->size() == 2) {
293  auto const const_arg1 = std::dynamic_pointer_cast<Analyzer::Constant>(
294  scalar_sources[rex->getOperand(1)]);
295  if (!const_arg1 || const_arg1->get_type_info().get_type() != kINT ||
296  const_arg1->get_constval().intval < 1 ||
297  const_arg1->get_constval().intval > 100) {
298  throw std::runtime_error(
299  "APPROX_COUNT_DISTINCT's second parameter should be SMALLINT literal "
300  "between "
301  "1 and 100");
302  }
303  arg1 = scalar_sources[rex->getOperand(1)];
304  }
305  break;
306  case kAPPROX_QUANTILE:
307  if (g_cluster) {
308  throw std::runtime_error(
309  "APPROX_PERCENTILE/MEDIAN is not supported in distributed mode at this "
310  "time.");
311  }
312  // If second parameter is not given then APPROX_MEDIAN is assumed.
313  if (rex->size() == 2) {
314  arg1 = std::dynamic_pointer_cast<Analyzer::Constant>(
315  std::dynamic_pointer_cast<Analyzer::Constant>(
316  scalar_sources[rex->getOperand(1)])
317  ->add_cast(SQLTypeInfo(kDOUBLE)));
318  } else {
319 #ifdef _WIN32
320  Datum median;
321  median.doubleval = 0.5;
322 #else
323  constexpr Datum median{.doubleval = 0.5};
324 #endif
325  arg1 = std::make_shared<Analyzer::Constant>(kDOUBLE, false, median);
326  }
327  break;
328  case kMODE:
329  if (g_cluster) {
330  throw std::runtime_error(
331  "MODE is not supported in distributed mode at this time.");
332  }
333  break;
334  case kCOUNT_IF:
335  if (rex->isDistinct()) {
336  throw std::runtime_error(
337  "Currently, COUNT_IF function does not support DISTINCT qualifier.");
338  }
339  break;
340  case kSUM_IF:
341  arg1 = scalar_sources[rex->getOperand(1)];
342  if (arg1->get_type_info().get_type() != kBOOLEAN) {
343  throw std::runtime_error("Conditional argument must be a boolean expression.");
344  }
345  break;
346  default:
347  break;
348  }
349  const auto& arg_ti = arg_expr->get_type_info();
350  if (!is_agg_supported_for_type(agg_kind, arg_ti)) {
351  throw std::runtime_error("Aggregate on " + arg_ti.get_type_name() +
352  " is not supported yet.");
353  }
354  }
355  const auto agg_ti = get_agg_type(agg_kind, arg_expr.get());
356  return makeExpr<Analyzer::AggExpr>(agg_ti, agg_kind, arg_expr, is_distinct, arg1);
357 }
358 
359 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateLiteral(
360  const RexLiteral* rex_literal) {
361  auto lit_ti = build_type_info(
362  rex_literal->getType(), rex_literal->getScale(), rex_literal->getPrecision());
363  auto target_ti = build_type_info(rex_literal->getTargetType(),
364  rex_literal->getTargetScale(),
365  rex_literal->getTargetPrecision());
366  switch (rex_literal->getType()) {
367  case kINT:
368  case kBIGINT: {
369  Datum d;
370  d.bigintval = rex_literal->getVal<int64_t>();
371  return makeExpr<Analyzer::Constant>(rex_literal->getType(), false, d);
372  }
373  case kDECIMAL: {
374  const auto val = rex_literal->getVal<int64_t>();
375  const int precision = rex_literal->getPrecision();
376  const int scale = rex_literal->getScale();
377  if (target_ti.is_fp() && !scale) {
378  return make_fp_constant(val, target_ti);
379  }
380  auto lit_expr = scale ? Parser::FixedPtLiteral::analyzeValue(val, scale, precision)
382  return lit_ti != target_ti ? lit_expr->add_cast(target_ti) : lit_expr;
383  }
384  case kTEXT: {
385  return Parser::StringLiteral::analyzeValue(rex_literal->getVal<std::string>(),
386  false);
387  }
388  case kBOOLEAN: {
389  Datum d;
390  d.boolval = rex_literal->getVal<bool>();
391  return makeExpr<Analyzer::Constant>(kBOOLEAN, false, d);
392  }
393  case kDOUBLE: {
394  Datum d;
395  d.doubleval = rex_literal->getVal<double>();
396  auto lit_expr =
397  makeExpr<Analyzer::Constant>(SQLTypeInfo(rex_literal->getType(),
398  rex_literal->getPrecision(),
399  rex_literal->getScale(),
400  false),
401  false,
402  d);
403  return lit_ti != target_ti ? lit_expr->add_cast(target_ti) : lit_expr;
404  }
405  case kINTERVAL_DAY_TIME:
406  case kINTERVAL_YEAR_MONTH: {
407  Datum d;
408  d.bigintval = rex_literal->getVal<int64_t>();
409  return makeExpr<Analyzer::Constant>(rex_literal->getType(), false, d);
410  }
411  case kTIME:
412  case kTIMESTAMP: {
413  Datum d;
414  d.bigintval =
415  rex_literal->getType() == kTIMESTAMP && rex_literal->getPrecision() > 0
416  ? rex_literal->getVal<int64_t>()
417  : rex_literal->getVal<int64_t>() / 1000;
418  return makeExpr<Analyzer::Constant>(
419  SQLTypeInfo(rex_literal->getType(), rex_literal->getPrecision(), 0, false),
420  false,
421  d);
422  }
423  case kDATE: {
424  Datum d;
425  d.bigintval = rex_literal->getVal<int64_t>() * 24 * 3600;
426  return makeExpr<Analyzer::Constant>(rex_literal->getType(), false, d);
427  }
428  case kNULLT: {
429  if (target_ti.is_array()) {
431  // defaulting to valid sub-type for convenience
432  target_ti.set_subtype(kBOOLEAN);
433  return makeExpr<Analyzer::ArrayExpr>(target_ti, args, true);
434  }
435  return makeExpr<Analyzer::Constant>(rex_literal->getTargetType(), true, Datum{0});
436  }
437  default: {
438  LOG(FATAL) << "Unexpected literal type " << lit_ti.get_type_name();
439  }
440  }
441  return nullptr;
442 }
443 
444 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateScalarSubquery(
445  const RexSubQuery* rex_subquery) const {
446  if (just_explain_) {
447  throw std::runtime_error("EXPLAIN is not supported with sub-queries");
448  }
449  CHECK(rex_subquery);
450  auto result = rex_subquery->getExecutionResult();
451  auto row_set = result->getRows();
452  const size_t row_count = row_set->rowCount();
453  if (row_count > size_t(1)) {
454  throw std::runtime_error("Scalar sub-query returned multiple rows");
455  }
456  if (row_count == size_t(0)) {
457  if (row_set->isValidationOnlyRes()) {
458  Datum d{0};
459  return makeExpr<Analyzer::Constant>(rex_subquery->getType(), false, d);
460  }
461  throw std::runtime_error("Scalar sub-query returned no results");
462  }
463  CHECK_EQ(row_count, size_t(1));
464  row_set->moveToBegin();
465  auto first_row = row_set->getNextRow(false, false);
466  CHECK_EQ(first_row.size(), size_t(1));
467  auto scalar_tv = boost::get<ScalarTargetValue>(&first_row[0]);
468  auto ti = rex_subquery->getType();
469  if (ti.is_string()) {
470  throw std::runtime_error("Scalar sub-queries which return strings not supported");
471  }
472  Datum d{0};
473  bool is_null_const{false};
474  std::tie(d, is_null_const) = datum_from_scalar_tv(scalar_tv, ti);
475  return makeExpr<Analyzer::Constant>(ti, is_null_const, d);
476 }
477 
478 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateInput(
479  const RexInput* rex_input) const {
480  const auto source = rex_input->getSourceNode();
481  const auto it_rte_idx = input_to_nest_level_.find(source);
482  CHECK(it_rte_idx != input_to_nest_level_.end())
483  << "Not found in input_to_nest_level_, source="
484  << source->toString(RelRexToStringConfig::defaults());
485  const int rte_idx = it_rte_idx->second;
486  const auto scan_source = dynamic_cast<const RelScan*>(source);
487  const auto& in_metainfo = source->getOutputMetainfo();
488  if (scan_source) {
489  // We're at leaf (scan) level and not supposed to have input metadata,
490  // the name and type information come directly from the catalog.
491  CHECK(in_metainfo.empty());
492  const auto table_desc = scan_source->getTableDescriptor();
493  const auto& catalog = scan_source->getCatalog();
494  const auto cd =
495  catalog.getMetadataForColumnBySpi(table_desc->tableId, rex_input->getIndex() + 1);
496  CHECK(cd);
497  auto col_ti = cd->columnType;
498  if (col_ti.is_string()) {
499  col_ti.set_type(kTEXT);
500  }
501  if (cd->isVirtualCol) {
502  // TODO(alex): remove at some point, we only need this fixup for backwards
503  // compatibility with old imported data
504  CHECK_EQ("rowid", cd->columnName);
505  col_ti.set_size(8);
506  }
507  CHECK_LE(static_cast<size_t>(rte_idx), join_types_.size());
508  if (rte_idx > 0 && join_types_[rte_idx - 1] == JoinType::LEFT) {
509  col_ti.set_notnull(false);
510  }
511  return std::make_shared<Analyzer::ColumnVar>(
512  col_ti,
513  shared::ColumnKey{catalog.getDatabaseId(), table_desc->tableId, cd->columnId},
514  rte_idx);
515  }
516  CHECK(!in_metainfo.empty()) << "for "
517  << source->toString(RelRexToStringConfig::defaults());
518  CHECK_GE(rte_idx, 0);
519  const int32_t col_id = rex_input->getIndex();
520  CHECK_LT(col_id, in_metainfo.size());
521  auto col_ti = in_metainfo[col_id].get_type_info();
522 
523  if (join_types_.size() > 0) {
524  CHECK_LE(static_cast<size_t>(rte_idx), join_types_.size());
525  if (rte_idx > 0 && join_types_[rte_idx - 1] == JoinType::LEFT) {
526  col_ti.set_notnull(false);
527  }
528  }
529 
530  return std::make_shared<Analyzer::ColumnVar>(
531  col_ti, shared::ColumnKey{0, int32_t(-source->getId()), col_id}, rte_idx);
532 }
533 
534 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateUoper(
535  const RexOperator* rex_operator) const {
536  CHECK_EQ(size_t(1), rex_operator->size());
537  const auto operand_expr = translateScalarRex(rex_operator->getOperand(0));
538  const auto sql_op = rex_operator->getOperator();
539  switch (sql_op) {
540  case kCAST: {
541  const auto& target_ti = rex_operator->getType();
542  CHECK_NE(kNULLT, target_ti.get_type());
543  const auto& operand_ti = operand_expr->get_type_info();
544  if (operand_ti.is_string() && target_ti.is_string()) {
545  return operand_expr;
546  }
547  if (target_ti.is_time() ||
548  operand_ti
549  .is_string()) { // TODO(alex): check and unify with the rest of the cases
550  // Do not propogate encoding on small dates
551  return target_ti.is_date_in_days()
552  ? operand_expr->add_cast(SQLTypeInfo(kDATE, false))
553  : operand_expr->add_cast(target_ti);
554  }
555  if (!operand_ti.is_string() && target_ti.is_string()) {
556  return operand_expr->add_cast(target_ti);
557  }
558  return std::make_shared<Analyzer::UOper>(target_ti, false, sql_op, operand_expr);
559  }
560  case kENCODE_TEXT: {
561  const auto& target_ti = rex_operator->getType();
562  CHECK_NE(kNULLT, target_ti.get_type());
563  const auto& operand_ti = operand_expr->get_type_info();
564  CHECK(operand_ti.is_string());
565  if (operand_ti.is_dict_encoded_string()) {
566  // No cast needed
567  return operand_expr;
568  }
569  if (operand_expr->get_num_column_vars(true) == 0UL) {
570  return operand_expr;
571  }
572  if (g_cluster) {
573  throw std::runtime_error(
574  "ENCODE_TEXT is not currently supported in distributed mode at this time.");
575  }
576  SQLTypeInfo casted_target_ti = operand_ti;
577  casted_target_ti.set_type(kTEXT);
578  casted_target_ti.set_compression(kENCODING_DICT);
579  casted_target_ti.set_comp_param(TRANSIENT_DICT_ID);
580  casted_target_ti.set_fixed_size();
581  return makeExpr<Analyzer::UOper>(
582  casted_target_ti, operand_expr->get_contains_agg(), kCAST, operand_expr);
583  }
584  case kNOT:
585  case kISNULL: {
586  return std::make_shared<Analyzer::UOper>(kBOOLEAN, sql_op, operand_expr);
587  }
588  case kISNOTNULL: {
589  auto is_null = std::make_shared<Analyzer::UOper>(kBOOLEAN, kISNULL, operand_expr);
590  return std::make_shared<Analyzer::UOper>(kBOOLEAN, kNOT, is_null);
591  }
592  case kMINUS: {
593  const auto& ti = operand_expr->get_type_info();
594  return std::make_shared<Analyzer::UOper>(ti, false, kUMINUS, operand_expr);
595  }
596  case kUNNEST: {
597  const auto& ti = operand_expr->get_type_info();
598  CHECK(ti.is_array());
599  return makeExpr<Analyzer::UOper>(ti.get_elem_type(), false, kUNNEST, operand_expr);
600  }
601  default:
602  CHECK(false);
603  }
604  return nullptr;
605 }
606 
607 namespace {
608 
609 std::shared_ptr<Analyzer::Expr> get_in_values_expr(std::shared_ptr<Analyzer::Expr> arg,
610  const ResultSet& val_set) {
612  return nullptr;
613  }
614  if (val_set.rowCount() > 5000000 && g_enable_watchdog) {
615  throw std::runtime_error(
616  "Unable to handle 'expr IN (subquery)', subquery returned 5M+ rows.");
617  }
618  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
619  const size_t fetcher_count = cpu_threads();
620  std::vector<std::list<std::shared_ptr<Analyzer::Expr>>> expr_set(
621  fetcher_count, std::list<std::shared_ptr<Analyzer::Expr>>());
622  std::vector<std::future<void>> fetcher_threads;
623  const auto& ti = arg->get_type_info();
624  const auto entry_count = val_set.entryCount();
625  for (size_t i = 0,
626  start_entry = 0,
627  stride = (entry_count + fetcher_count - 1) / fetcher_count;
628  i < fetcher_count && start_entry < entry_count;
629  ++i, start_entry += stride) {
630  const auto end_entry = std::min(start_entry + stride, entry_count);
631  fetcher_threads.push_back(std::async(
633  [&](std::list<std::shared_ptr<Analyzer::Expr>>& in_vals,
634  const size_t start,
635  const size_t end) {
636  for (auto index = start; index < end; ++index) {
637  auto row = val_set.getRowAt(index);
638  if (row.empty()) {
639  continue;
640  }
641  auto scalar_tv = boost::get<ScalarTargetValue>(&row[0]);
642  Datum d{0};
643  bool is_null_const{false};
644  std::tie(d, is_null_const) = datum_from_scalar_tv(scalar_tv, ti);
645  if (ti.is_string() && ti.get_compression() != kENCODING_NONE) {
646  auto ti_none_encoded = ti;
647  ti_none_encoded.set_compression(kENCODING_NONE);
648  auto none_encoded_string =
649  makeExpr<Analyzer::Constant>(ti, is_null_const, d);
650  auto dict_encoded_string = std::make_shared<Analyzer::UOper>(
651  ti, false, kCAST, none_encoded_string);
652  in_vals.push_back(dict_encoded_string);
653  } else {
654  in_vals.push_back(makeExpr<Analyzer::Constant>(ti, is_null_const, d));
655  }
656  }
657  },
658  std::ref(expr_set[i]),
659  start_entry,
660  end_entry));
661  }
662  for (auto& child : fetcher_threads) {
663  child.get();
664  }
665 
666  val_set.moveToBegin();
667  for (auto& exprs : expr_set) {
668  value_exprs.splice(value_exprs.end(), exprs);
669  }
670  return makeExpr<Analyzer::InValues>(arg, value_exprs);
671 }
672 
673 } // namespace
674 
675 // Creates an Analyzer expression for an IN subquery which subsequently goes through the
676 // regular Executor::codegen() mechanism. The creation of the expression out of
677 // subquery's result set is parallelized whenever possible. In addition, take advantage
678 // of additional information that elements in the right hand side are constants; see
679 // getInIntegerSetExpr().
680 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateInOper(
681  const RexOperator* rex_operator) const {
682  if (just_explain_) {
683  throw std::runtime_error("EXPLAIN is not supported with sub-queries");
684  }
685  CHECK(rex_operator->size() == 2);
686  const auto lhs = translateScalarRex(rex_operator->getOperand(0));
687  const auto rhs = rex_operator->getOperand(1);
688  const auto rex_subquery = dynamic_cast<const RexSubQuery*>(rhs);
689  CHECK(rex_subquery);
690  auto ti = lhs->get_type_info();
691  auto result = rex_subquery->getExecutionResult();
692  CHECK(result);
693  auto& row_set = result->getRows();
694  CHECK_EQ(size_t(1), row_set->colCount());
695  const auto& rhs_ti = row_set->getColType(0);
696  if (rhs_ti.get_type() != ti.get_type()) {
697  throw std::runtime_error(
698  "The two sides of the IN operator must have the same type; found " +
699  ti.get_type_name() + " and " + rhs_ti.get_type_name());
700  }
701  row_set->moveToBegin();
702  if (row_set->entryCount() > 10000) {
703  std::shared_ptr<Analyzer::Expr> expr;
704  if ((ti.is_integer() || (ti.is_string() && ti.get_compression() == kENCODING_DICT)) &&
705  !row_set->getQueryMemDesc().didOutputColumnar()) {
706  expr = getInIntegerSetExpr(lhs, *row_set);
707  // Handle the highly unlikely case when the InIntegerSet ended up being tiny.
708  // Just let it fall through the usual InValues path at the end of this method,
709  // its codegen knows to use inline comparisons for few values.
710  if (expr && std::static_pointer_cast<Analyzer::InIntegerSet>(expr)
711  ->get_value_list()
712  .size() <= 100) {
713  expr = nullptr;
714  }
715  } else {
716  expr = get_in_values_expr(lhs, *row_set);
717  }
718  if (expr) {
719  return expr;
720  }
721  }
722  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
723  while (true) {
724  auto row = row_set->getNextRow(true, false);
725  if (row.empty()) {
726  break;
727  }
728  if (g_enable_watchdog && value_exprs.size() >= 10000) {
729  throw std::runtime_error(
730  "Unable to handle 'expr IN (subquery)', subquery returned 10000+ rows.");
731  }
732  auto scalar_tv = boost::get<ScalarTargetValue>(&row[0]);
733  Datum d{0};
734  bool is_null_const{false};
735  std::tie(d, is_null_const) = datum_from_scalar_tv(scalar_tv, ti);
736  if (ti.is_string() && ti.get_compression() != kENCODING_NONE) {
737  auto ti_none_encoded = ti;
738  ti_none_encoded.set_compression(kENCODING_NONE);
739  auto none_encoded_string = makeExpr<Analyzer::Constant>(ti, is_null_const, d);
740  auto dict_encoded_string =
741  std::make_shared<Analyzer::UOper>(ti, false, kCAST, none_encoded_string);
742  value_exprs.push_back(dict_encoded_string);
743  } else {
744  value_exprs.push_back(makeExpr<Analyzer::Constant>(ti, is_null_const, d));
745  }
746  }
747  return makeExpr<Analyzer::InValues>(lhs, value_exprs);
748 }
749 
750 namespace {
751 
752 const size_t g_max_integer_set_size{1 << 25};
753 
755  std::vector<int64_t>& in_vals,
756  std::atomic<size_t>& total_in_vals_count,
757  const ResultSet* values_rowset,
758  const std::pair<int64_t, int64_t> values_rowset_slice,
759  const StringDictionaryProxy* source_dict,
760  const StringDictionaryProxy* dest_dict,
761  const int64_t needle_null_val) {
762  CHECK(in_vals.empty());
763  bool dicts_are_equal = source_dict == dest_dict;
764  for (auto index = values_rowset_slice.first; index < values_rowset_slice.second;
765  ++index) {
766  const auto row = values_rowset->getOneColRow(index);
767  if (UNLIKELY(!row.valid)) {
768  continue;
769  }
770  if (dicts_are_equal) {
771  in_vals.push_back(row.value);
772  } else {
773  const int string_id =
774  row.value == needle_null_val
775  ? needle_null_val
776  : dest_dict->getIdOfString(source_dict->getString(row.value));
777  if (string_id != StringDictionary::INVALID_STR_ID) {
778  in_vals.push_back(string_id);
779  }
780  }
781  if (UNLIKELY(g_enable_watchdog && (in_vals.size() & 1023) == 0 &&
782  total_in_vals_count.fetch_add(1024) >= g_max_integer_set_size)) {
783  throw std::runtime_error(
784  "Unable to handle 'expr IN (subquery)', subquery returned 30M+ rows.");
785  }
786  }
787 }
788 
789 void fill_integer_in_vals(std::vector<int64_t>& in_vals,
790  std::atomic<size_t>& total_in_vals_count,
791  const ResultSet* values_rowset,
792  const std::pair<int64_t, int64_t> values_rowset_slice) {
793  CHECK(in_vals.empty());
794  for (auto index = values_rowset_slice.first; index < values_rowset_slice.second;
795  ++index) {
796  const auto row = values_rowset->getOneColRow(index);
797  if (row.valid) {
798  in_vals.push_back(row.value);
799  if (UNLIKELY(g_enable_watchdog && (in_vals.size() & 1023) == 0 &&
800  total_in_vals_count.fetch_add(1024) >= g_max_integer_set_size)) {
801  throw std::runtime_error(
802  "Unable to handle 'expr IN (subquery)', subquery returned 30M+ rows.");
803  }
804  }
805  }
806 }
807 
808 // Multi-node counterpart of the other version. Saves round-trips, which is crucial
809 // for a big right-hand side result. It only handles physical string dictionary ids,
810 // therefore it won't be able to handle a right-hand side sub-query with a CASE
811 // returning literals on some branches. That case isn't hard too handle either, but
812 // it's not clear it's actually important in practice.
813 // RelAlgTranslator::getInIntegerSetExpr makes sure, by checking the encodings, that
814 // this function isn't called in such cases.
816  std::vector<int64_t>& in_vals,
817  std::atomic<size_t>& total_in_vals_count,
818  const ResultSet* values_rowset,
819  const std::pair<int64_t, int64_t> values_rowset_slice,
820  const std::vector<LeafHostInfo>& leaf_hosts,
821  const DictRef source_dict_ref,
822  const DictRef dest_dict_ref,
823  const int32_t dest_generation,
824  const int64_t needle_null_val) {
825  CHECK(in_vals.empty());
826  std::vector<int32_t> source_ids;
827  source_ids.reserve(values_rowset->entryCount());
828  bool has_nulls = false;
829  if (source_dict_ref == dest_dict_ref) {
830  in_vals.reserve(values_rowset_slice.second - values_rowset_slice.first +
831  1); // Add 1 to cover interval
832  for (auto index = values_rowset_slice.first; index < values_rowset_slice.second;
833  ++index) {
834  const auto row = values_rowset->getOneColRow(index);
835  if (!row.valid) {
836  continue;
837  }
838  if (row.value != needle_null_val) {
839  in_vals.push_back(row.value);
840  if (UNLIKELY(g_enable_watchdog && (in_vals.size() & 1023) == 0 &&
841  total_in_vals_count.fetch_add(1024) >= g_max_integer_set_size)) {
842  throw std::runtime_error(
843  "Unable to handle 'expr IN (subquery)', subquery returned 30M+ rows.");
844  }
845  } else {
846  has_nulls = true;
847  }
848  }
849  if (has_nulls) {
850  in_vals.push_back(
851  needle_null_val); // we've deduped null values as an optimization, although
852  // this is not required by consumer
853  }
854  return;
855  }
856  // Code path below is for when dictionaries are not shared
857  for (auto index = values_rowset_slice.first; index < values_rowset_slice.second;
858  ++index) {
859  const auto row = values_rowset->getOneColRow(index);
860  if (row.valid) {
861  if (row.value != needle_null_val) {
862  source_ids.push_back(row.value);
863  } else {
864  has_nulls = true;
865  }
866  }
867  }
868  std::vector<int32_t> dest_ids;
869  translate_string_ids(dest_ids,
870  leaf_hosts.front(),
871  dest_dict_ref,
872  source_ids,
873  source_dict_ref,
874  dest_generation);
875  CHECK_EQ(dest_ids.size(), source_ids.size());
876  in_vals.reserve(dest_ids.size() + (has_nulls ? 1 : 0));
877  if (has_nulls) {
878  in_vals.push_back(needle_null_val);
879  }
880  for (const int32_t dest_id : dest_ids) {
881  if (dest_id != StringDictionary::INVALID_STR_ID) {
882  in_vals.push_back(dest_id);
883  if (UNLIKELY(g_enable_watchdog && (in_vals.size() & 1023) == 0 &&
884  total_in_vals_count.fetch_add(1024) >= g_max_integer_set_size)) {
885  throw std::runtime_error(
886  "Unable to handle 'expr IN (subquery)', subquery returned 30M+ rows.");
887  }
888  }
889  }
890 }
891 
892 } // namespace
893 
894 // The typical IN subquery involves either dictionary-encoded strings or integers.
895 // Analyzer::InValues is a very heavy representation of the right hand side of such
896 // a query since we already know the right hand would be a list of Analyzer::Constant
897 // shared pointers. We can avoid the big overhead of each Analyzer::Constant and the
898 // refcounting associated with shared pointers by creating an abbreviated InIntegerSet
899 // representation of the IN expression which takes advantage of the this information.
900 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::getInIntegerSetExpr(
901  std::shared_ptr<Analyzer::Expr> arg,
902  const ResultSet& val_set) const {
904  return nullptr;
905  }
906  std::vector<int64_t> value_exprs;
907  const size_t fetcher_count = cpu_threads();
908  std::vector<std::vector<int64_t>> expr_set(fetcher_count);
909  std::vector<std::future<void>> fetcher_threads;
910  const auto& arg_type = arg->get_type_info();
911  const auto entry_count = val_set.entryCount();
912  CHECK_EQ(size_t(1), val_set.colCount());
913  const auto& col_type = val_set.getColType(0);
914  if (g_cluster && arg_type.is_string() &&
915  (col_type.get_comp_param() <= 0 || arg_type.get_comp_param() <= 0)) {
916  // Skip this case for now, see comment for fill_dictionary_encoded_in_vals.
917  return nullptr;
918  }
919  std::atomic<size_t> total_in_vals_count{0};
920  for (size_t i = 0,
921  start_entry = 0,
922  stride = (entry_count + fetcher_count - 1) / fetcher_count;
923  i < fetcher_count && start_entry < entry_count;
924  ++i, start_entry += stride) {
925  expr_set[i].reserve(entry_count / fetcher_count);
926  const auto end_entry = std::min(start_entry + stride, entry_count);
927  if (arg_type.is_string()) {
928  CHECK_EQ(kENCODING_DICT, arg_type.get_compression());
929  auto col_expr = dynamic_cast<const Analyzer::ColumnVar*>(arg.get());
930  CHECK(col_expr);
931  const auto& dest_dict_key = arg_type.getStringDictKey();
932  const auto& source_dict_key = col_type.getStringDictKey();
933  const auto dd = executor_->getStringDictionaryProxy(
934  arg_type.getStringDictKey(), val_set.getRowSetMemOwner(), true);
935  const auto sd = executor_->getStringDictionaryProxy(
936  col_type.getStringDictKey(), val_set.getRowSetMemOwner(), true);
937  CHECK(sd);
938  const auto needle_null_val = inline_int_null_val(arg_type);
940  col_expr->getColumnKey().db_id);
941  CHECK(catalog);
942  fetcher_threads.push_back(std::async(
944  [this,
945  &val_set,
946  &total_in_vals_count,
947  sd,
948  dd,
949  &source_dict_key,
950  &dest_dict_key,
951  needle_null_val,
952  catalog](std::vector<int64_t>& in_vals, const size_t start, const size_t end) {
953  if (g_cluster) {
954  CHECK_GE(dd->getGeneration(), 0);
956  in_vals,
957  total_in_vals_count,
958  &val_set,
959  {start, end},
960  catalog->getStringDictionaryHosts(),
961  {source_dict_key.db_id, source_dict_key.dict_id},
962  {dest_dict_key.db_id, dest_dict_key.dict_id},
963  dd->getGeneration(),
964  needle_null_val);
965  } else {
967  total_in_vals_count,
968  &val_set,
969  {start, end},
970  sd,
971  dd,
972  needle_null_val);
973  }
974  },
975  std::ref(expr_set[i]),
976  start_entry,
977  end_entry));
978  } else {
979  CHECK(arg_type.is_integer());
980  fetcher_threads.push_back(std::async(
982  [&val_set, &total_in_vals_count](
983  std::vector<int64_t>& in_vals, const size_t start, const size_t end) {
984  fill_integer_in_vals(in_vals, total_in_vals_count, &val_set, {start, end});
985  },
986  std::ref(expr_set[i]),
987  start_entry,
988  end_entry));
989  }
990  }
991  for (auto& child : fetcher_threads) {
992  child.get();
993  }
994 
995  val_set.moveToBegin();
996  value_exprs.reserve(entry_count);
997  for (auto& exprs : expr_set) {
998  value_exprs.insert(value_exprs.end(), exprs.begin(), exprs.end());
999  }
1000  return makeExpr<Analyzer::InIntegerSet>(
1001  arg, value_exprs, arg_type.get_notnull() && col_type.get_notnull());
1002 }
1003 
1004 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateOper(
1005  const RexOperator* rex_operator) const {
1006  CHECK_GT(rex_operator->size(), size_t(0));
1007  if (rex_operator->size() == 1) {
1008  return translateUoper(rex_operator);
1009  }
1010  const auto sql_op = rex_operator->getOperator();
1011  if (sql_op == kIN) {
1012  return translateInOper(rex_operator);
1013  }
1014  if (sql_op == kMINUS || sql_op == kPLUS) {
1015  auto date_plus_minus = translateDatePlusMinus(rex_operator);
1016  if (date_plus_minus) {
1017  return date_plus_minus;
1018  }
1019  }
1020  if (sql_op == kOVERLAPS) {
1021  return translateOverlapsOper(rex_operator);
1022  } else if (IS_COMPARISON(sql_op)) {
1023  auto geo_comp = translateGeoComparison(rex_operator);
1024  if (geo_comp) {
1025  return geo_comp;
1026  }
1027  }
1028  auto lhs = translateScalarRex(rex_operator->getOperand(0));
1029  for (size_t i = 1; i < rex_operator->size(); ++i) {
1030  std::shared_ptr<Analyzer::Expr> rhs;
1031  SQLQualifier sql_qual{kONE};
1032  const auto rhs_op = rex_operator->getOperand(i);
1033  std::tie(rhs, sql_qual) = getQuantifiedRhs(rhs_op);
1034  if (!rhs) {
1035  rhs = translateScalarRex(rhs_op);
1036  }
1037  CHECK(rhs);
1038 
1039  // Pass in executor to get string proxy info if cast needed between
1040  // string columns
1041  lhs = Parser::OperExpr::normalize(sql_op, sql_qual, lhs, rhs, executor_);
1042  }
1043  return lhs;
1044 }
1045 
1046 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateOverlapsOper(
1047  const RexOperator* rex_operator) const {
1048  const auto sql_op = rex_operator->getOperator();
1049  CHECK(sql_op == kOVERLAPS);
1050 
1051  const auto lhs = translateScalarRex(rex_operator->getOperand(0));
1052  const auto lhs_ti = lhs->get_type_info();
1053  if (lhs_ti.is_geometry()) {
1054  return translateGeoOverlapsOper(rex_operator);
1055  } else {
1056  throw std::runtime_error(
1057  "Overlaps equivalence is currently only supported for geospatial types");
1058  }
1059 }
1060 
1061 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateCase(
1062  const RexCase* rex_case) const {
1063  std::shared_ptr<Analyzer::Expr> else_expr;
1064  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
1065  expr_list;
1066  for (size_t i = 0; i < rex_case->branchCount(); ++i) {
1067  const auto when_expr = translateScalarRex(rex_case->getWhen(i));
1068  const auto then_expr = translateScalarRex(rex_case->getThen(i));
1069  expr_list.emplace_back(when_expr, then_expr);
1070  }
1071  if (rex_case->getElse()) {
1072  else_expr = translateScalarRex(rex_case->getElse());
1073  }
1074  return Parser::CaseExpr::normalize(expr_list, else_expr, executor_);
1075 }
1076 
1077 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateWidthBucket(
1078  const RexFunctionOperator* rex_function) const {
1079  CHECK(rex_function->size() == 4);
1080  auto target_value = translateScalarRex(rex_function->getOperand(0));
1081  auto lower_bound = translateScalarRex(rex_function->getOperand(1));
1082  auto upper_bound = translateScalarRex(rex_function->getOperand(2));
1083  auto partition_count = translateScalarRex(rex_function->getOperand(3));
1084  if (!partition_count->get_type_info().is_integer()) {
1085  throw std::runtime_error(
1086  "PARTITION_COUNT expression of width_bucket function expects an integer type.");
1087  }
1088  auto check_numeric_type =
1089  [](const std::string& col_name, const Analyzer::Expr* expr, bool allow_null_type) {
1090  if (expr->get_type_info().get_type() == kNULLT) {
1091  if (!allow_null_type) {
1092  throw std::runtime_error(
1093  col_name + " expression of width_bucket function expects non-null type.");
1094  }
1095  return;
1096  }
1097  if (!expr->get_type_info().is_number()) {
1098  throw std::runtime_error(
1099  col_name + " expression of width_bucket function expects a numeric type.");
1100  }
1101  };
1102  // target value may have null value
1103  check_numeric_type("TARGET_VALUE", target_value.get(), true);
1104  check_numeric_type("LOWER_BOUND", lower_bound.get(), false);
1105  check_numeric_type("UPPER_BOUND", upper_bound.get(), false);
1106 
1107  auto cast_to_double_if_necessary = [](std::shared_ptr<Analyzer::Expr> arg) {
1108  const auto& arg_ti = arg->get_type_info();
1109  if (arg_ti.get_type() != kDOUBLE) {
1110  const auto& double_ti = SQLTypeInfo(kDOUBLE, arg_ti.get_notnull());
1111  return arg->add_cast(double_ti);
1112  }
1113  return arg;
1114  };
1115  target_value = cast_to_double_if_necessary(target_value);
1116  lower_bound = cast_to_double_if_necessary(lower_bound);
1117  upper_bound = cast_to_double_if_necessary(upper_bound);
1118  return makeExpr<Analyzer::WidthBucketExpr>(
1119  target_value, lower_bound, upper_bound, partition_count);
1120 }
1121 
1122 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateLike(
1123  const RexFunctionOperator* rex_function) const {
1124  CHECK(rex_function->size() == 2 || rex_function->size() == 3);
1125  const auto arg = translateScalarRex(rex_function->getOperand(0));
1126  const auto like = translateScalarRex(rex_function->getOperand(1));
1127  if (!std::dynamic_pointer_cast<const Analyzer::Constant>(like)) {
1128  throw std::runtime_error("The matching pattern must be a literal.");
1129  }
1130  const auto escape = (rex_function->size() == 3)
1131  ? translateScalarRex(rex_function->getOperand(2))
1132  : nullptr;
1133  const bool is_ilike = rex_function->getName() == "PG_ILIKE"sv;
1134  return Parser::LikeExpr::get(arg, like, escape, is_ilike, false);
1135 }
1136 
1137 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateRegexp(
1138  const RexFunctionOperator* rex_function) const {
1139  CHECK(rex_function->size() == 2 || rex_function->size() == 3);
1140  const auto arg = translateScalarRex(rex_function->getOperand(0));
1141  const auto pattern = translateScalarRex(rex_function->getOperand(1));
1142  if (!std::dynamic_pointer_cast<const Analyzer::Constant>(pattern)) {
1143  throw std::runtime_error("The matching pattern must be a literal.");
1144  }
1145  const auto escape = (rex_function->size() == 3)
1146  ? translateScalarRex(rex_function->getOperand(2))
1147  : nullptr;
1148  return Parser::RegexpExpr::get(arg, pattern, escape, false);
1149 }
1150 
1151 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateLikely(
1152  const RexFunctionOperator* rex_function) const {
1153  CHECK(rex_function->size() == 1);
1154  const auto arg = translateScalarRex(rex_function->getOperand(0));
1155  return makeExpr<Analyzer::LikelihoodExpr>(arg, 0.9375);
1156 }
1157 
1158 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateUnlikely(
1159  const RexFunctionOperator* rex_function) const {
1160  CHECK(rex_function->size() == 1);
1161  const auto arg = translateScalarRex(rex_function->getOperand(0));
1162  return makeExpr<Analyzer::LikelihoodExpr>(arg, 0.0625);
1163 }
1164 
1165 namespace {
1166 
1168  const std::shared_ptr<Analyzer::Constant> literal_expr) {
1169  if (!literal_expr || literal_expr->get_is_null()) {
1170  throw std::runtime_error("The 'DatePart' argument must be a not 'null' literal.");
1171  }
1172 }
1173 
1174 } // namespace
1175 
1176 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateExtract(
1177  const RexFunctionOperator* rex_function) const {
1178  CHECK_EQ(size_t(2), rex_function->size());
1179  const auto timeunit = translateScalarRex(rex_function->getOperand(0));
1180  const auto timeunit_lit = std::dynamic_pointer_cast<Analyzer::Constant>(timeunit);
1182  const auto from_expr = translateScalarRex(rex_function->getOperand(1));
1183  const bool is_date_trunc = rex_function->getName() == "PG_DATE_TRUNC"sv;
1184  if (is_date_trunc) {
1185  return DateTruncExpr::generate(from_expr, *timeunit_lit->get_constval().stringval);
1186  } else {
1187  return ExtractExpr::generate(from_expr, *timeunit_lit->get_constval().stringval);
1188  }
1189 }
1190 
1191 namespace {
1192 
1193 std::shared_ptr<Analyzer::Constant> makeNumericConstant(const SQLTypeInfo& ti,
1194  const long val) {
1195  CHECK(ti.is_number());
1196  Datum datum{0};
1197  switch (ti.get_type()) {
1198  case kTINYINT: {
1199  datum.tinyintval = val;
1200  break;
1201  }
1202  case kSMALLINT: {
1203  datum.smallintval = val;
1204  break;
1205  }
1206  case kINT: {
1207  datum.intval = val;
1208  break;
1209  }
1210  case kBIGINT: {
1211  datum.bigintval = val;
1212  break;
1213  }
1214  case kDECIMAL:
1215  case kNUMERIC: {
1216  datum.bigintval = val * exp_to_scale(ti.get_scale());
1217  break;
1218  }
1219  case kFLOAT: {
1220  datum.floatval = val;
1221  break;
1222  }
1223  case kDOUBLE: {
1224  datum.doubleval = val;
1225  break;
1226  }
1227  default:
1228  CHECK(false);
1229  }
1230  return makeExpr<Analyzer::Constant>(ti, false, datum);
1231 }
1232 
1233 } // namespace
1234 
1235 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateDateadd(
1236  const RexFunctionOperator* rex_function) const {
1237  CHECK_EQ(size_t(3), rex_function->size());
1238  const auto timeunit = translateScalarRex(rex_function->getOperand(0));
1239  const auto timeunit_lit = std::dynamic_pointer_cast<Analyzer::Constant>(timeunit);
1241  const auto number_units = translateScalarRex(rex_function->getOperand(1));
1242  const auto number_units_const =
1243  std::dynamic_pointer_cast<Analyzer::Constant>(number_units);
1244  if (number_units_const && number_units_const->get_is_null()) {
1245  throw std::runtime_error("The 'Interval' argument literal must not be 'null'.");
1246  }
1247  const auto cast_number_units = number_units->add_cast(SQLTypeInfo(kBIGINT, false));
1248  const auto datetime = translateScalarRex(rex_function->getOperand(2));
1249  const auto& datetime_ti = datetime->get_type_info();
1250  if (datetime_ti.get_type() == kTIME) {
1251  throw std::runtime_error("DateAdd operation not supported for TIME.");
1252  }
1253  const auto& field = to_dateadd_field(*timeunit_lit->get_constval().stringval);
1254  const int dim = datetime_ti.get_dimension();
1255  return makeExpr<Analyzer::DateaddExpr>(
1256  SQLTypeInfo(kTIMESTAMP, dim, 0, false), field, cast_number_units, datetime);
1257 }
1258 
1259 namespace {
1260 
1262  CHECK(op == kPLUS);
1263  return "DATETIME_PLUS"s;
1264 }
1265 
1266 } // namespace
1267 
1268 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateDatePlusMinus(
1269  const RexOperator* rex_operator) const {
1270  if (rex_operator->size() != 2) {
1271  return nullptr;
1272  }
1273  const auto datetime = translateScalarRex(rex_operator->getOperand(0));
1274  const auto datetime_ti = datetime->get_type_info();
1275  if (!datetime_ti.is_timestamp() && !datetime_ti.is_date()) {
1276  if (datetime_ti.get_type() == kTIME) {
1277  throw std::runtime_error("DateTime addition/subtraction not supported for TIME.");
1278  }
1279  return nullptr;
1280  }
1281  const auto rhs = translateScalarRex(rex_operator->getOperand(1));
1282  const auto rhs_ti = rhs->get_type_info();
1283  if (rhs_ti.get_type() == kTIMESTAMP || rhs_ti.get_type() == kDATE) {
1284  if (datetime_ti.is_high_precision_timestamp() ||
1285  rhs_ti.is_high_precision_timestamp()) {
1286  throw std::runtime_error(
1287  "High Precision timestamps are not supported for TIMESTAMPDIFF operation. "
1288  "Use "
1289  "DATEDIFF.");
1290  }
1291  auto bigint_ti = SQLTypeInfo(kBIGINT, false);
1292  const auto& rex_operator_ti = rex_operator->getType();
1293  const auto datediff_field =
1294  (rex_operator_ti.get_type() == kINTERVAL_DAY_TIME) ? dtSECOND : dtMONTH;
1295  auto result =
1296  makeExpr<Analyzer::DatediffExpr>(bigint_ti, datediff_field, rhs, datetime);
1297  // multiply 1000 to result since expected result should be in millisecond precision.
1298  if (rex_operator_ti.get_type() == kINTERVAL_DAY_TIME) {
1299  return makeExpr<Analyzer::BinOper>(bigint_ti.get_type(),
1300  kMULTIPLY,
1301  kONE,
1302  result,
1303  makeNumericConstant(bigint_ti, 1000));
1304  } else {
1305  return result;
1306  }
1307  }
1308  const auto op = rex_operator->getOperator();
1309  if (op == kPLUS) {
1310  std::vector<std::shared_ptr<Analyzer::Expr>> args = {datetime, rhs};
1311  auto dt_plus = makeExpr<Analyzer::FunctionOper>(
1312  datetime_ti, get_datetimeplus_rewrite_funcname(op), args);
1313  const auto date_trunc = rewrite_to_date_trunc(dt_plus.get());
1314  if (date_trunc) {
1315  return date_trunc;
1316  }
1317  }
1318  const auto interval = fold_expr(rhs.get());
1319  auto interval_ti = interval->get_type_info();
1320  auto bigint_ti = SQLTypeInfo(kBIGINT, false);
1321  const auto interval_lit = std::dynamic_pointer_cast<Analyzer::Constant>(interval);
1322  if (interval_ti.get_type() == kINTERVAL_DAY_TIME) {
1323  std::shared_ptr<Analyzer::Expr> interval_sec;
1324  if (interval_lit) {
1325  interval_sec =
1326  makeNumericConstant(bigint_ti,
1327  (op == kMINUS ? -interval_lit->get_constval().bigintval
1328  : interval_lit->get_constval().bigintval) /
1329  1000);
1330  } else {
1331  interval_sec = makeExpr<Analyzer::BinOper>(bigint_ti.get_type(),
1332  kDIVIDE,
1333  kONE,
1334  interval,
1335  makeNumericConstant(bigint_ti, 1000));
1336  if (op == kMINUS) {
1337  interval_sec =
1338  std::make_shared<Analyzer::UOper>(bigint_ti, false, kUMINUS, interval_sec);
1339  }
1340  }
1341  return makeExpr<Analyzer::DateaddExpr>(datetime_ti, daSECOND, interval_sec, datetime);
1342  }
1343  CHECK(interval_ti.get_type() == kINTERVAL_YEAR_MONTH);
1344  const auto interval_months = op == kMINUS ? std::make_shared<Analyzer::UOper>(
1345  bigint_ti, false, kUMINUS, interval)
1346  : interval;
1347  return makeExpr<Analyzer::DateaddExpr>(datetime_ti, daMONTH, interval_months, datetime);
1348 }
1349 
1350 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateDatediff(
1351  const RexFunctionOperator* rex_function) const {
1352  CHECK_EQ(size_t(3), rex_function->size());
1353  const auto timeunit = translateScalarRex(rex_function->getOperand(0));
1354  const auto timeunit_lit = std::dynamic_pointer_cast<Analyzer::Constant>(timeunit);
1356  const auto start = translateScalarRex(rex_function->getOperand(1));
1357  const auto end = translateScalarRex(rex_function->getOperand(2));
1358  const auto field = to_datediff_field(*timeunit_lit->get_constval().stringval);
1359  return makeExpr<Analyzer::DatediffExpr>(SQLTypeInfo(kBIGINT, false), field, start, end);
1360 }
1361 
1362 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateDatepart(
1363  const RexFunctionOperator* rex_function) const {
1364  CHECK_EQ(size_t(2), rex_function->size());
1365  const auto timeunit = translateScalarRex(rex_function->getOperand(0));
1366  const auto timeunit_lit = std::dynamic_pointer_cast<Analyzer::Constant>(timeunit);
1368  const auto from_expr = translateScalarRex(rex_function->getOperand(1));
1369  return ExtractExpr::generate(
1370  from_expr, to_datepart_field(*timeunit_lit->get_constval().stringval));
1371 }
1372 
1373 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateLength(
1374  const RexFunctionOperator* rex_function) const {
1375  CHECK_EQ(size_t(1), rex_function->size());
1376  const auto str_arg = translateScalarRex(rex_function->getOperand(0));
1377  return makeExpr<Analyzer::CharLengthExpr>(str_arg->decompress(),
1378  rex_function->getName() == "CHAR_LENGTH"sv);
1379 }
1380 
1381 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateKeyForString(
1382  const RexFunctionOperator* rex_function) const {
1383  const auto& args = translateFunctionArgs(rex_function);
1384  CHECK_EQ(size_t(1), args.size());
1385  const auto expr = dynamic_cast<Analyzer::Expr*>(args[0].get());
1386  if (nullptr == expr || !expr->get_type_info().is_string() ||
1387  expr->get_type_info().is_varlen()) {
1388  throw std::runtime_error(rex_function->getName() +
1389  " expects a dictionary encoded text column.");
1390  }
1391  auto unnest_arg = dynamic_cast<Analyzer::UOper*>(expr);
1392  if (unnest_arg && unnest_arg->get_optype() == SQLOps::kUNNEST) {
1393  throw std::runtime_error(
1394  rex_function->getName() +
1395  " does not support unnest operator as its input expression.");
1396  }
1397  return makeExpr<Analyzer::KeyForStringExpr>(args[0]);
1398 }
1399 
1400 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateSampleRatio(
1401  const RexFunctionOperator* rex_function) const {
1402  CHECK_EQ(size_t(1), rex_function->size());
1403  auto arg = translateScalarRex(rex_function->getOperand(0));
1404  const auto& arg_ti = arg->get_type_info();
1405  if (arg_ti.get_type() != kDOUBLE) {
1406  const auto& double_ti = SQLTypeInfo(kDOUBLE, arg_ti.get_notnull());
1407  arg = arg->add_cast(double_ti);
1408  }
1409  return makeExpr<Analyzer::SampleRatioExpr>(arg);
1410 }
1411 
1412 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateCurrentUser(
1413  const RexFunctionOperator* rex_function) const {
1414  std::string user{"SESSIONLESS_USER"};
1415  if (query_state_) {
1416  user = query_state_->getConstSessionInfo()->get_currentUser().userName;
1417  }
1418  return Parser::UserLiteral::get(user);
1419 }
1420 
1421 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateStringOper(
1422  const RexFunctionOperator* rex_function) const {
1423  const auto func_name = rex_function->getName();
1425  std::ostringstream oss;
1426  oss << "Function " << func_name << " not supported.";
1427  throw std::runtime_error(oss.str());
1428  }
1429  const auto string_op_kind = ::name_to_string_op_kind(func_name);
1430  auto args = translateFunctionArgs(rex_function);
1431 
1432  switch (string_op_kind) {
1434  return makeExpr<Analyzer::LowerStringOper>(args);
1436  return makeExpr<Analyzer::UpperStringOper>(args);
1438  return makeExpr<Analyzer::InitCapStringOper>(args);
1440  return makeExpr<Analyzer::ReverseStringOper>(args);
1442  return makeExpr<Analyzer::RepeatStringOper>(args);
1444  return makeExpr<Analyzer::ConcatStringOper>(args);
1445  case SqlStringOpKind::LPAD:
1446  case SqlStringOpKind::RPAD: {
1447  return makeExpr<Analyzer::PadStringOper>(string_op_kind, args);
1448  }
1449  case SqlStringOpKind::TRIM:
1451  case SqlStringOpKind::RTRIM: {
1452  return makeExpr<Analyzer::TrimStringOper>(string_op_kind, args);
1453  }
1455  return makeExpr<Analyzer::SubstringStringOper>(args);
1457  return makeExpr<Analyzer::OverlayStringOper>(args);
1459  return makeExpr<Analyzer::ReplaceStringOper>(args);
1461  return makeExpr<Analyzer::SplitPartStringOper>(args);
1463  return makeExpr<Analyzer::RegexpReplaceStringOper>(args);
1465  return makeExpr<Analyzer::RegexpSubstrStringOper>(args);
1467  return makeExpr<Analyzer::JsonValueStringOper>(args);
1469  return makeExpr<Analyzer::Base64EncodeStringOper>(args);
1471  return makeExpr<Analyzer::Base64DecodeStringOper>(args);
1473  return makeExpr<Analyzer::TryStringCastOper>(rex_function->getType(), args);
1475  return makeExpr<Analyzer::PositionStringOper>(args);
1476  default: {
1477  throw std::runtime_error("Unsupported string function.");
1478  }
1479  }
1480 }
1481 
1483  const RexFunctionOperator* rex_function) const {
1484  const auto ret_ti = rex_function->getType();
1485  const auto arg = translateScalarRex(rex_function->getOperand(0));
1486  const auto arg_ti = arg->get_type_info();
1487  if (!arg_ti.is_array()) {
1488  throw std::runtime_error(rex_function->getName() + " expects an array expression.");
1489  }
1490  if (arg_ti.get_subtype() == kARRAY) {
1491  throw std::runtime_error(rex_function->getName() +
1492  " expects one-dimension array expression.");
1493  }
1494  const auto array_size = arg_ti.get_size();
1495  const auto array_elem_size = arg_ti.get_elem_type().get_array_context_logical_size();
1496 
1497  if (array_size > 0) {
1498  if (array_elem_size <= 0) {
1499  throw std::runtime_error(rex_function->getName() +
1500  ": unexpected array element type.");
1501  }
1502  // Return cardinality of a fixed length array
1503  return makeNumericConstant(ret_ti, array_size / array_elem_size);
1504  }
1505  // Variable length array cardinality will be calculated at runtime
1506  return makeExpr<Analyzer::CardinalityExpr>(arg);
1507 }
1508 
1509 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateItem(
1510  const RexFunctionOperator* rex_function) const {
1511  CHECK_EQ(size_t(2), rex_function->size());
1512  const auto base = translateScalarRex(rex_function->getOperand(0));
1513  const auto index = translateScalarRex(rex_function->getOperand(1));
1514  return makeExpr<Analyzer::BinOper>(
1515  base->get_type_info().get_elem_type(), false, kARRAY_AT, kONE, base, index);
1516 }
1517 
1518 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateCurrentDate() const {
1519  constexpr bool is_null = false;
1520  Datum datum;
1521  datum.bigintval = now_ - now_ % (24 * 60 * 60); // Assumes 0 < now_.
1522  return makeExpr<Analyzer::Constant>(kDATE, is_null, datum);
1523 }
1524 
1525 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateCurrentTime() const {
1526  constexpr bool is_null = false;
1527  Datum datum;
1528  datum.bigintval = now_ % (24 * 60 * 60); // Assumes 0 < now_.
1529  return makeExpr<Analyzer::Constant>(kTIME, is_null, datum);
1530 }
1531 
1532 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateCurrentTimestamp() const {
1534 }
1535 
1536 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateDatetime(
1537  const RexFunctionOperator* rex_function) const {
1538  CHECK_EQ(size_t(1), rex_function->size());
1539  const auto arg = translateScalarRex(rex_function->getOperand(0));
1540  const auto arg_lit = std::dynamic_pointer_cast<Analyzer::Constant>(arg);
1541  const std::string datetime_err{R"(Only DATETIME('NOW') supported for now.)"};
1542  if (!arg_lit || arg_lit->get_is_null()) {
1543  throw std::runtime_error(datetime_err);
1544  }
1545  CHECK(arg_lit->get_type_info().is_string());
1546  if (*arg_lit->get_constval().stringval != "NOW"sv) {
1547  throw std::runtime_error(datetime_err);
1548  }
1549  return translateCurrentTimestamp();
1550 }
1551 
1552 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateAbs(
1553  const RexFunctionOperator* rex_function) const {
1554  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
1555  expr_list;
1556  CHECK_EQ(size_t(1), rex_function->size());
1557  const auto operand = translateScalarRex(rex_function->getOperand(0));
1558  const auto& operand_ti = operand->get_type_info();
1559  CHECK(operand_ti.is_number());
1560  const auto zero = makeNumericConstant(operand_ti, 0);
1561  const auto lt_zero = makeExpr<Analyzer::BinOper>(kBOOLEAN, kLT, kONE, operand, zero);
1562  const auto uminus_operand =
1563  makeExpr<Analyzer::UOper>(operand_ti.get_type(), kUMINUS, operand);
1564  expr_list.emplace_back(lt_zero, uminus_operand);
1565  return makeExpr<Analyzer::CaseExpr>(operand_ti, false, expr_list, operand);
1566 }
1567 
1568 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateSign(
1569  const RexFunctionOperator* rex_function) const {
1570  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
1571  expr_list;
1572  CHECK_EQ(size_t(1), rex_function->size());
1573  const auto operand = translateScalarRex(rex_function->getOperand(0));
1574  const auto& operand_ti = operand->get_type_info();
1575  CHECK(operand_ti.is_number());
1576  const auto zero = makeNumericConstant(operand_ti, 0);
1577  const auto lt_zero = makeExpr<Analyzer::BinOper>(kBOOLEAN, kLT, kONE, operand, zero);
1578  expr_list.emplace_back(lt_zero, makeNumericConstant(operand_ti, -1));
1579  const auto eq_zero = makeExpr<Analyzer::BinOper>(kBOOLEAN, kEQ, kONE, operand, zero);
1580  expr_list.emplace_back(eq_zero, makeNumericConstant(operand_ti, 0));
1581  const auto gt_zero = makeExpr<Analyzer::BinOper>(kBOOLEAN, kGT, kONE, operand, zero);
1582  expr_list.emplace_back(gt_zero, makeNumericConstant(operand_ti, 1));
1583  return makeExpr<Analyzer::CaseExpr>(
1584  operand_ti,
1585  false,
1586  expr_list,
1587  makeExpr<Analyzer::Constant>(operand_ti, true, Datum{0}));
1588 }
1589 
1590 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateOffsetInFragment() const {
1591  return makeExpr<Analyzer::OffsetInFragment>();
1592 }
1593 
1595  const RexFunctionOperator* rex_function) const {
1596  if (rex_function->getType().get_subtype() == kNULLT) {
1597  auto sql_type = rex_function->getType();
1598  CHECK(sql_type.get_type() == kARRAY);
1599 
1600  // FIX-ME: Deal with NULL arrays
1601  auto translated_function_args(translateFunctionArgs(rex_function));
1602  if (translated_function_args.size() > 0) {
1603  const auto first_element_logical_type =
1604  get_nullable_logical_type_info(translated_function_args[0]->get_type_info());
1605 
1606  auto diff_elem_itr =
1607  std::find_if(translated_function_args.begin(),
1608  translated_function_args.end(),
1609  [first_element_logical_type](const auto expr) {
1610  const auto element_logical_type =
1611  get_nullable_logical_type_info(expr->get_type_info());
1612  if (first_element_logical_type != element_logical_type) {
1613  if (first_element_logical_type.is_none_encoded_string() &&
1614  element_logical_type.is_none_encoded_string()) {
1615  return false;
1616  }
1617  return true;
1618  }
1619  return false;
1620  });
1621  if (diff_elem_itr != translated_function_args.end()) {
1622  throw std::runtime_error(
1623  "Element " +
1624  std::to_string(diff_elem_itr - translated_function_args.begin()) +
1625  " is not of the same type as other elements of the array. Consider casting "
1626  "to force this condition.\nElement Type: " +
1627  get_nullable_logical_type_info((*diff_elem_itr)->get_type_info())
1628  .to_string() +
1629  "\nArray type: " + first_element_logical_type.to_string());
1630  }
1631 
1632  if (first_element_logical_type.is_string()) {
1633  sql_type.set_subtype(kTEXT);
1634  sql_type.set_compression(kENCODING_DICT);
1635  if (first_element_logical_type.is_none_encoded_string()) {
1636  sql_type.set_comp_param(TRANSIENT_DICT_ID);
1637  sql_type.setStringDictKey({TRANSIENT_DICT_DB_ID, TRANSIENT_DICT_ID});
1638  } else {
1639  CHECK(first_element_logical_type.is_dict_encoded_string());
1640  sql_type.set_comp_param(first_element_logical_type.get_comp_param());
1641  sql_type.setStringDictKey(first_element_logical_type.getStringDictKey());
1642  }
1643  } else if (first_element_logical_type.is_dict_encoded_string()) {
1644  sql_type.set_subtype(kTEXT);
1645  sql_type.set_compression(kENCODING_DICT);
1646  sql_type.set_comp_param(first_element_logical_type.get_comp_param());
1647  sql_type.setStringDictKey(first_element_logical_type.getStringDictKey());
1648  } else {
1649  sql_type.set_subtype(first_element_logical_type.get_type());
1650  sql_type.set_scale(first_element_logical_type.get_scale());
1651  sql_type.set_precision(first_element_logical_type.get_precision());
1652  }
1653 
1654  return makeExpr<Analyzer::ArrayExpr>(sql_type, translated_function_args);
1655  } else {
1656  // defaulting to valid sub-type for convenience
1657  sql_type.set_subtype(kBOOLEAN);
1658  return makeExpr<Analyzer::ArrayExpr>(sql_type, translated_function_args);
1659  }
1660  } else {
1661  return makeExpr<Analyzer::ArrayExpr>(rex_function->getType(),
1662  translateFunctionArgs(rex_function));
1663  }
1664 }
1665 
1666 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateFunction(
1667  const RexFunctionOperator* rex_function) const {
1668  if (func_resolve(rex_function->getName(), "LIKE"sv, "PG_ILIKE"sv)) {
1669  return translateLike(rex_function);
1670  }
1671  if (rex_function->getName() == "REGEXP_LIKE"sv) {
1672  return translateRegexp(rex_function);
1673  }
1674  if (rex_function->getName() == "LIKELY"sv) {
1675  return translateLikely(rex_function);
1676  }
1677  if (rex_function->getName() == "UNLIKELY"sv) {
1678  return translateUnlikely(rex_function);
1679  }
1680  if (func_resolve(rex_function->getName(), "PG_EXTRACT"sv, "PG_DATE_TRUNC"sv)) {
1681  return translateExtract(rex_function);
1682  }
1683  if (rex_function->getName() == "DATEADD"sv) {
1684  return translateDateadd(rex_function);
1685  }
1686  if (rex_function->getName() == "DATEDIFF"sv) {
1687  return translateDatediff(rex_function);
1688  }
1689  if (rex_function->getName() == "DATEPART"sv) {
1690  return translateDatepart(rex_function);
1691  }
1692  if (func_resolve(rex_function->getName(), "LENGTH"sv, "CHAR_LENGTH"sv)) {
1693  return translateLength(rex_function);
1694  }
1695  if (rex_function->getName() == "KEY_FOR_STRING"sv) {
1696  return translateKeyForString(rex_function);
1697  }
1698  if (rex_function->getName() == "WIDTH_BUCKET"sv) {
1699  return translateWidthBucket(rex_function);
1700  }
1701  if (rex_function->getName() == "SAMPLE_RATIO"sv) {
1702  return translateSampleRatio(rex_function);
1703  }
1704  if (rex_function->getName() == "CURRENT_USER"sv) {
1705  return translateCurrentUser(rex_function);
1706  }
1707  if (func_resolve(rex_function->getName(),
1708  "LOWER"sv,
1709  "UPPER"sv,
1710  "INITCAP"sv,
1711  "REVERSE"sv,
1712  "REPEAT"sv,
1713  "||"sv,
1714  "LPAD"sv,
1715  "RPAD"sv,
1716  "TRIM"sv,
1717  "LTRIM"sv,
1718  "RTRIM"sv,
1719  "SUBSTRING"sv,
1720  "OVERLAY"sv,
1721  "REPLACE"sv,
1722  "SPLIT_PART"sv,
1723  "REGEXP_REPLACE"sv,
1724  "REGEXP_SUBSTR"sv,
1725  "REGEXP_MATCH"sv,
1726  "JSON_VALUE"sv,
1727  "BASE64_ENCODE"sv,
1728  "BASE64_DECODE"sv,
1729  "TRY_CAST"sv,
1730  "POSITION"sv)) {
1731  return translateStringOper(rex_function);
1732  }
1733  if (func_resolve(rex_function->getName(), "CARDINALITY"sv, "ARRAY_LENGTH"sv)) {
1734  return translateCardinality(rex_function);
1735  }
1736  if (rex_function->getName() == "ITEM"sv) {
1737  return translateItem(rex_function);
1738  }
1739  if (rex_function->getName() == "CURRENT_DATE"sv) {
1740  return translateCurrentDate();
1741  }
1742  if (rex_function->getName() == "CURRENT_TIME"sv) {
1743  return translateCurrentTime();
1744  }
1745  if (rex_function->getName() == "CURRENT_TIMESTAMP"sv) {
1746  return translateCurrentTimestamp();
1747  }
1748  if (rex_function->getName() == "NOW"sv) {
1749  return translateCurrentTimestamp();
1750  }
1751  if (rex_function->getName() == "DATETIME"sv) {
1752  return translateDatetime(rex_function);
1753  }
1754  if (func_resolve(rex_function->getName(), "usTIMESTAMP"sv, "nsTIMESTAMP"sv)) {
1755  return translateHPTLiteral(rex_function);
1756  }
1757  if (rex_function->getName() == "ABS"sv) {
1758  return translateAbs(rex_function);
1759  }
1760  if (rex_function->getName() == "SIGN"sv) {
1761  return translateSign(rex_function);
1762  }
1763  if (func_resolve(rex_function->getName(), "CEIL"sv, "FLOOR"sv)) {
1764  return makeExpr<Analyzer::FunctionOperWithCustomTypeHandling>(
1765  rex_function->getType(),
1766  rex_function->getName(),
1767  translateFunctionArgs(rex_function));
1768  } else if (rex_function->getName() == "ROUND"sv) {
1769  std::vector<std::shared_ptr<Analyzer::Expr>> args =
1770  translateFunctionArgs(rex_function);
1771 
1772  if (rex_function->size() == 1) {
1773  // push a 0 constant if 2nd operand is missing.
1774  // this needs to be done as calcite returns
1775  // only the 1st operand without defaulting the 2nd one
1776  // when the user did not specify the 2nd operand.
1777  SQLTypes t = kSMALLINT;
1778  Datum d;
1779  d.smallintval = 0;
1780  args.push_back(makeExpr<Analyzer::Constant>(t, false, d));
1781  }
1782 
1783  // make sure we have only 2 operands
1784  CHECK(args.size() == 2);
1785 
1786  if (!args[0]->get_type_info().is_number()) {
1787  throw std::runtime_error("Only numeric 1st operands are supported");
1788  }
1789 
1790  // the 2nd operand does not need to be a constant
1791  // it can happily reference another integer column
1792  if (!args[1]->get_type_info().is_integer()) {
1793  throw std::runtime_error("Only integer 2nd operands are supported");
1794  }
1795 
1796  // Calcite may upcast decimals in a way that is
1797  // incompatible with the extension function input. Play it safe and stick with the
1798  // argument type instead.
1799  const SQLTypeInfo ret_ti = args[0]->get_type_info().is_decimal()
1800  ? args[0]->get_type_info()
1801  : rex_function->getType();
1802 
1803  return makeExpr<Analyzer::FunctionOperWithCustomTypeHandling>(
1804  ret_ti, rex_function->getName(), args);
1805  }
1806  if (rex_function->getName() == "DATETIME_PLUS"sv) {
1807  auto dt_plus = makeExpr<Analyzer::FunctionOper>(rex_function->getType(),
1808  rex_function->getName(),
1809  translateFunctionArgs(rex_function));
1810  const auto date_trunc = rewrite_to_date_trunc(dt_plus.get());
1811  if (date_trunc) {
1812  return date_trunc;
1813  }
1814  return translateDateadd(rex_function);
1815  }
1816  if (rex_function->getName() == "/INT"sv) {
1817  CHECK_EQ(size_t(2), rex_function->size());
1818  std::shared_ptr<Analyzer::Expr> lhs = translateScalarRex(rex_function->getOperand(0));
1819  std::shared_ptr<Analyzer::Expr> rhs = translateScalarRex(rex_function->getOperand(1));
1820  const auto rhs_lit = std::dynamic_pointer_cast<Analyzer::Constant>(rhs);
1821  return Parser::OperExpr::normalize(kDIVIDE, kONE, lhs, rhs);
1822  }
1823  if (rex_function->getName() == "Reinterpret"sv) {
1824  CHECK_EQ(size_t(1), rex_function->size());
1825  return translateScalarRex(rex_function->getOperand(0));
1826  }
1827  if (func_resolve(rex_function->getName(),
1828  "ST_X"sv,
1829  "ST_Y"sv,
1830  "ST_XMin"sv,
1831  "ST_YMin"sv,
1832  "ST_XMax"sv,
1833  "ST_YMax"sv,
1834  "ST_NRings"sv,
1835  "ST_NumGeometries"sv,
1836  "ST_NPoints"sv,
1837  "ST_Length"sv,
1838  "ST_Perimeter"sv,
1839  "ST_Area"sv,
1840  "ST_SRID"sv,
1841  "HeavyDB_Geo_PolyBoundsPtr"sv,
1842  "HeavyDB_Geo_PolyRenderGroup"sv,
1843  "HeavyDB_Geo_PolyCoordsArray"sv,
1844  "HeavyDB_Geo_PolyRingSizesArray"sv,
1845  "HeavyDB_Geo_PolyPolyRingsArray"sv)) {
1846  CHECK_EQ(rex_function->size(), size_t(1));
1847  return translateUnaryGeoFunction(rex_function);
1848  }
1849  if (func_resolve(rex_function->getName(), "ST_ConvexHull"sv)) {
1850  CHECK_EQ(rex_function->size(), size_t(1));
1851  SQLTypeInfo ti;
1852  return translateUnaryGeoConstructor(rex_function, ti, false);
1853  }
1854  if (func_resolve(rex_function->getName(),
1855  "convert_meters_to_pixel_width"sv,
1856  "convert_meters_to_pixel_height"sv,
1857  "is_point_in_view"sv,
1858  "is_point_size_in_view"sv)) {
1859  return translateFunctionWithGeoArg(rex_function);
1860  }
1861  if (func_resolve(rex_function->getName(),
1862  "ST_Distance"sv,
1863  "ST_MaxDistance"sv,
1864  "ST_Intersects"sv,
1865  "ST_Disjoint"sv,
1866  "ST_Contains"sv,
1867  "ST_Overlaps"sv,
1868  "ST_Approx_Overlaps"sv,
1869  "ST_Within"sv)) {
1870  CHECK_EQ(rex_function->size(), size_t(2));
1871  return translateBinaryGeoFunction(rex_function);
1872  }
1873  if (func_resolve(rex_function->getName(), "ST_DWithin"sv, "ST_DFullyWithin"sv)) {
1874  CHECK_EQ(rex_function->size(), size_t(3));
1875  return translateTernaryGeoFunction(rex_function);
1876  }
1877  if (rex_function->getName() == "OFFSET_IN_FRAGMENT"sv) {
1878  CHECK_EQ(size_t(0), rex_function->size());
1879  return translateOffsetInFragment();
1880  }
1881  if (rex_function->getName() == "ARRAY"sv) {
1882  // Var args; currently no check. Possible fix-me -- can array have 0 elements?
1883  return translateArrayFunction(rex_function);
1884  }
1885  if (func_resolve(rex_function->getName(),
1886  "ST_GeomFromText"sv,
1887  "ST_GeogFromText"sv,
1888  "ST_Centroid"sv,
1889  "ST_SetSRID"sv,
1890  "ST_Point"sv, // TODO: where should this and below live?
1891  "ST_PointN"sv,
1892  "ST_StartPoint"sv,
1893  "ST_EndPoint"sv,
1894  "ST_Transform"sv)) {
1895  SQLTypeInfo ti;
1896  return translateGeoProjection(rex_function, ti, false);
1897  }
1898  if (func_resolve(rex_function->getName(),
1899  "ST_Intersection"sv,
1900  "ST_Difference"sv,
1901  "ST_Union"sv,
1902  "ST_Buffer"sv,
1903  "ST_ConcaveHull"sv)) {
1904  CHECK_EQ(rex_function->size(), size_t(2));
1905  SQLTypeInfo ti;
1906  return translateBinaryGeoConstructor(rex_function, ti, false);
1907  }
1908  if (func_resolve(rex_function->getName(), "ST_IsEmpty"sv, "ST_IsValid"sv)) {
1909  CHECK_EQ(rex_function->size(), size_t(1));
1910  SQLTypeInfo ti;
1911  return translateUnaryGeoPredicate(rex_function, ti, false);
1912  }
1913  if (func_resolve(rex_function->getName(), "ST_Equals"sv)) {
1914  CHECK_EQ(rex_function->size(), size_t(2));
1915  // Attempt to generate a distance based check for points
1916  if (auto distance_check = translateBinaryGeoFunction(rex_function)) {
1917  return distance_check;
1918  }
1919  SQLTypeInfo ti;
1920  return translateBinaryGeoPredicate(rex_function, ti, false);
1921  }
1922 
1923  auto arg_expr_list = translateFunctionArgs(rex_function);
1924  if (rex_function->getName() == std::string("||") ||
1925  rex_function->getName() == std::string("SUBSTRING")) {
1926  SQLTypeInfo ret_ti(kTEXT, false);
1927  return makeExpr<Analyzer::FunctionOper>(
1928  ret_ti, rex_function->getName(), arg_expr_list);
1929  }
1930 
1931  // Reset possibly wrong return type of rex_function to the return
1932  // type of the optimal valid implementation. The return type can be
1933  // wrong in the case of multiple implementations of UDF functions
1934  // that have different return types but Calcite specifies the return
1935  // type according to the first implementation.
1936  SQLTypeInfo ret_ti;
1937  try {
1938  auto ext_func_sig = bind_function(rex_function->getName(), arg_expr_list);
1939  auto ext_func_args = ext_func_sig.getInputArgs();
1940  CHECK_LE(arg_expr_list.size(), ext_func_args.size());
1941  for (size_t i = 0, di = 0; i < arg_expr_list.size(); i++) {
1942  CHECK_LT(i + di, ext_func_args.size());
1943  auto ext_func_arg = ext_func_args[i + di];
1944  if (ext_func_arg == ExtArgumentType::PInt8 ||
1945  ext_func_arg == ExtArgumentType::PInt16 ||
1946  ext_func_arg == ExtArgumentType::PInt32 ||
1947  ext_func_arg == ExtArgumentType::PInt64 ||
1948  ext_func_arg == ExtArgumentType::PFloat ||
1949  ext_func_arg == ExtArgumentType::PDouble ||
1950  ext_func_arg == ExtArgumentType::PBool) {
1951  di++;
1952  // pointer argument follows length argument:
1953  CHECK(ext_func_args[i + di] == ExtArgumentType::Int64);
1954  }
1955  // fold casts on constants
1956  if (auto constant =
1957  std::dynamic_pointer_cast<Analyzer::Constant>(arg_expr_list[i])) {
1958  auto ext_func_arg_ti = ext_arg_type_to_type_info(ext_func_arg);
1959  if (ext_func_arg_ti != arg_expr_list[i]->get_type_info()) {
1960  arg_expr_list[i] = constant->add_cast(ext_func_arg_ti);
1961  }
1962  }
1963  }
1964 
1965  ret_ti = ext_arg_type_to_type_info(ext_func_sig.getRet());
1966  } catch (ExtensionFunctionBindingError& e) {
1967  LOG(WARNING) << "RelAlgTranslator::translateFunction: " << e.what();
1968  throw;
1969  }
1970 
1971  // By default, the extension function type will not allow nulls. If one of the arguments
1972  // is nullable, the extension function must also explicitly allow nulls.
1973  bool arguments_not_null = true;
1974  for (const auto& arg_expr : arg_expr_list) {
1975  if (!arg_expr->get_type_info().get_notnull()) {
1976  arguments_not_null = false;
1977  break;
1978  }
1979  }
1980  ret_ti.set_notnull(arguments_not_null);
1981 
1982  return makeExpr<Analyzer::FunctionOper>(ret_ti, rex_function->getName(), arg_expr_list);
1983 }
1984 
1985 namespace {
1986 
1987 std::vector<Analyzer::OrderEntry> translate_collation(
1988  const std::vector<SortField>& sort_fields) {
1989  std::vector<Analyzer::OrderEntry> collation;
1990  for (size_t i = 0; i < sort_fields.size(); ++i) {
1991  const auto& sort_field = sort_fields[i];
1992  collation.emplace_back(i,
1993  sort_field.getSortDir() == SortDirection::Descending,
1994  sort_field.getNullsPosition() == NullSortedPosition::First);
1995  }
1996  return collation;
1997 }
1998 
1999 size_t determineTimeValMultiplierForTimeType(const SQLTypes& window_frame_bound_type,
2000  const Analyzer::Constant* const_expr) {
2001  const auto time_unit_val = const_expr->get_constval().bigintval;
2002  if (window_frame_bound_type == kINTERVAL_DAY_TIME) {
2003  if (time_unit_val == kMilliSecsPerSec) {
2004  return 1;
2005  } else if (time_unit_val == kMilliSecsPerMin) {
2006  return kSecsPerMin;
2007  } else if (time_unit_val == kMilliSecsPerHour) {
2008  return kSecsPerHour;
2009  }
2010  }
2011  CHECK(false);
2012  return kUNKNOWN_FIELD;
2013 }
2014 
2015 ExtractField determineTimeUnit(const SQLTypes& window_frame_bound_type,
2016  const Analyzer::Constant* const_expr) {
2017  const auto time_unit_val = const_expr->get_constval().bigintval;
2018  if (window_frame_bound_type == kINTERVAL_DAY_TIME) {
2019  if (time_unit_val == kMilliSecsPerSec) {
2020  return kSECOND;
2021  } else if (time_unit_val == kMilliSecsPerMin) {
2022  return kMINUTE;
2023  } else if (time_unit_val == kMilliSecsPerHour) {
2024  return kHOUR;
2025  } else if (time_unit_val == kMilliSecsPerDay) {
2026  return kDAY;
2027  }
2028  } else {
2029  CHECK(window_frame_bound_type == kINTERVAL_YEAR_MONTH);
2030  if (time_unit_val == 1) {
2031  return kMONTH;
2032  } else if (time_unit_val == 12) {
2033  return kYEAR;
2034  }
2035  }
2036  CHECK(false);
2037  return kUNKNOWN_FIELD;
2038 }
2039 } // namespace
2040 
2041 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateWindowFunction(
2042  const RexWindowFunctionOperator* rex_window_function) const {
2043  std::vector<std::shared_ptr<Analyzer::Expr>> args;
2044  for (size_t i = 0; i < rex_window_function->size(); ++i) {
2045  args.push_back(translateScalarRex(rex_window_function->getOperand(i)));
2046  }
2047  std::vector<std::shared_ptr<Analyzer::Expr>> partition_keys;
2048  for (const auto& partition_key : rex_window_function->getPartitionKeys()) {
2049  partition_keys.push_back(translateScalarRex(partition_key.get()));
2050  }
2051  std::vector<std::shared_ptr<Analyzer::Expr>> order_keys;
2052  for (const auto& order_key : rex_window_function->getOrderKeys()) {
2053  order_keys.push_back(translateScalarRex(order_key.get()));
2054  }
2055  auto ti = rex_window_function->getType();
2056  auto window_func_kind = rex_window_function->getKind();
2057  if (window_function_is_value(window_func_kind)) {
2058  CHECK_GE(args.size(), 1u);
2059  ti = args.front()->get_type_info();
2060  } else if (window_function_conditional_aggregate(window_func_kind)) {
2061  switch (window_func_kind) {
2063  // count_if should have an input expression having boolean type
2064  // but returned value should have the same as a normal count agg expr
2065  // so we force to set its type to bigint
2066  CHECK(ti.is_boolean());
2067  ti = SQLTypeInfo(kBIGINT);
2068  break;
2069  default:
2070  break;
2071  }
2072  }
2073  auto determine_frame_bound_type =
2075  if (bound.unbounded) {
2076  CHECK(!bound.bound_expr && !bound.is_current_row);
2077  if (bound.following) {
2079  } else if (bound.preceding) {
2081  }
2082  } else {
2083  if (bound.is_current_row) {
2084  CHECK(!bound.unbounded && !bound.bound_expr);
2086  } else {
2087  CHECK(!bound.unbounded && bound.bound_expr);
2088  if (bound.following) {
2090  } else if (bound.preceding) {
2092  }
2093  }
2094  }
2096  };
2097  auto is_negative_framing_bound =
2098  [](const SQLTypes t, const Datum& d, bool is_time_unit = false) {
2099  switch (t) {
2100  case kTINYINT:
2101  return d.tinyintval < 0;
2102  case kSMALLINT:
2103  return d.smallintval < 0;
2104  case kINT:
2105  return d.intval < 0;
2106  case kDOUBLE: {
2107  // the only case that double type is used is for handling time interval
2108  // i.e., represent tiny time units like nanosecond and microsecond as the
2109  // equivalent time value with SECOND time unit
2110  CHECK(is_time_unit);
2111  return d.doubleval < 0;
2112  }
2113  case kDECIMAL:
2114  case kNUMERIC:
2115  case kBIGINT:
2116  return d.bigintval < 0;
2117  default: {
2118  throw std::runtime_error(
2119  "We currently only support integer-type literal expression as a window "
2120  "frame bound expression");
2121  }
2122  }
2123  };
2124 
2125  bool negative_constant = false;
2126  bool detect_invalid_frame_start_bound_expr = false;
2127  bool detect_invalid_frame_end_bound_expr = false;
2128  auto& frame_start_bound = rex_window_function->getFrameStartBound();
2129  auto& frame_end_bound = rex_window_function->getFrameEndBound();
2130  bool has_end_bound_frame_expr = false;
2131  std::shared_ptr<Analyzer::Expr> frame_start_bound_expr;
2132  SqlWindowFrameBoundType frame_start_bound_type =
2133  determine_frame_bound_type(frame_start_bound);
2134  std::shared_ptr<Analyzer::Expr> frame_end_bound_expr;
2135  SqlWindowFrameBoundType frame_end_bound_type =
2136  determine_frame_bound_type(frame_end_bound);
2137  bool has_framing_clause =
2139  auto frame_mode = rex_window_function->isRows()
2142  if (order_keys.empty()) {
2143  if (frame_start_bound_type == SqlWindowFrameBoundType::UNBOUNDED_PRECEDING &&
2144  frame_end_bound_type == SqlWindowFrameBoundType::UNBOUNDED_FOLLOWING) {
2145  // Calcite sets UNBOUNDED PRECEDING ~ UNBOUNDED_FOLLOWING as its default frame bound
2146  // if the window context has no order by clause regardless of the existence of
2147  // user-given window frame bound but at this point we have no way to recognize the
2148  // absence of the frame definition of this window context
2149  has_framing_clause = false;
2150  }
2151  } else {
2152  if (frame_start_bound_type == SqlWindowFrameBoundType::UNBOUNDED_PRECEDING &&
2153  frame_end_bound_type == SqlWindowFrameBoundType::CURRENT_ROW) {
2154  // Calcite sets this frame bound by default when order by clause is given but has no
2155  // window frame definition (even if user gives the same bound, our previous window
2156  // computation logic returns exactly the same result)
2157  has_framing_clause = false;
2158  }
2159  auto translate_frame_bound_expr = [&](const RexScalar* bound_expr) {
2160  std::shared_ptr<Analyzer::Expr> translated_expr;
2161  const auto rex_oper = dynamic_cast<const RexOperator*>(bound_expr);
2162  if (rex_oper && rex_oper->getType().is_timeinterval()) {
2163  translated_expr = translateScalarRex(rex_oper);
2164  const auto bin_oper =
2165  dynamic_cast<const Analyzer::BinOper*>(translated_expr.get());
2166  auto time_literal_expr =
2167  dynamic_cast<const Analyzer::Constant*>(bin_oper->get_left_operand());
2168  CHECK(time_literal_expr);
2169  negative_constant =
2170  is_negative_framing_bound(time_literal_expr->get_type_info().get_type(),
2171  time_literal_expr->get_constval(),
2172  true);
2173  return std::make_pair(false, translated_expr);
2174  }
2175  if (dynamic_cast<const RexLiteral*>(bound_expr)) {
2176  translated_expr = translateScalarRex(bound_expr);
2177  if (auto literal_expr =
2178  dynamic_cast<const Analyzer::Constant*>(translated_expr.get())) {
2179  negative_constant = is_negative_framing_bound(
2180  literal_expr->get_type_info().get_type(), literal_expr->get_constval());
2181  return std::make_pair(false, translated_expr);
2182  }
2183  }
2184  return std::make_pair(true, translated_expr);
2185  };
2186 
2187  if (frame_start_bound.bound_expr) {
2188  std::tie(detect_invalid_frame_start_bound_expr, frame_start_bound_expr) =
2189  translate_frame_bound_expr(frame_start_bound.bound_expr.get());
2190  }
2191 
2192  if (frame_end_bound.bound_expr) {
2193  std::tie(detect_invalid_frame_end_bound_expr, frame_end_bound_expr) =
2194  translate_frame_bound_expr(frame_end_bound.bound_expr.get());
2195  }
2196 
2197  // currently we only support literal expression as frame bound expression
2198  if (detect_invalid_frame_start_bound_expr || detect_invalid_frame_end_bound_expr) {
2199  throw std::runtime_error(
2200  "We currently only support literal expression as a window frame bound "
2201  "expression");
2202  }
2203 
2204  // note that Calcite already has frame-bound constraint checking logic, but we
2205  // also check various invalid cases for safety
2206  if (negative_constant) {
2207  throw std::runtime_error(
2208  "A constant expression for window framing should have nonnegative value.");
2209  }
2210 
2211  auto handle_time_interval_expr_if_necessary = [&](const Analyzer::Expr* bound_expr,
2212  SqlWindowFrameBoundType bound_type,
2213  bool for_start_bound) {
2214  if (bound_expr && bound_expr->get_type_info().is_timeinterval()) {
2215  const auto bound_bin_oper = dynamic_cast<const Analyzer::BinOper*>(bound_expr);
2216  CHECK(bound_bin_oper->get_optype() == kMULTIPLY);
2217  auto translated_expr = translateIntervalExprForWindowFraming(
2218  order_keys.front(),
2220  bound_bin_oper);
2221  if (for_start_bound) {
2222  frame_start_bound_expr = translated_expr;
2223  } else {
2224  frame_end_bound_expr = translated_expr;
2225  }
2226  }
2227  };
2228  handle_time_interval_expr_if_necessary(
2229  frame_start_bound_expr.get(), frame_start_bound_type, true);
2230  handle_time_interval_expr_if_necessary(
2231  frame_end_bound_expr.get(), frame_end_bound_type, false);
2232  }
2233 
2234  if (frame_start_bound.following) {
2235  if (frame_end_bound.is_current_row) {
2236  throw std::runtime_error(
2237  "Window framing starting from following row cannot end with current row.");
2238  } else if (has_end_bound_frame_expr && frame_end_bound.preceding) {
2239  throw std::runtime_error(
2240  "Window framing starting from following row cannot have preceding rows.");
2241  }
2242  }
2243  if (frame_start_bound.is_current_row && frame_end_bound.preceding &&
2244  !frame_end_bound.unbounded && has_end_bound_frame_expr) {
2245  throw std::runtime_error(
2246  "Window framing starting from current row cannot have preceding rows.");
2247  }
2248  if (has_framing_clause) {
2250  if (order_keys.size() != 1) {
2251  throw std::runtime_error(
2252  "Window framing with range mode requires a single order-by column");
2253  }
2254  if (!frame_start_bound_expr &&
2255  frame_start_bound_type == SqlWindowFrameBoundType::UNBOUNDED_PRECEDING &&
2256  !frame_end_bound_expr &&
2257  frame_end_bound_type == SqlWindowFrameBoundType::CURRENT_ROW) {
2258  has_framing_clause = false;
2259  VLOG(1) << "Ignore range framing mode with a frame bound between "
2260  "UNBOUNDED_PRECEDING and CURRENT_ROW";
2261  }
2262  std::set<const Analyzer::ColumnVar*,
2263  bool (*)(const Analyzer::ColumnVar*, const Analyzer::ColumnVar*)>
2265  order_keys.front()->collect_column_var(colvar_set, false);
2266  for (auto cv : colvar_set) {
2267  if (!(cv->get_type_info().is_integer() || cv->get_type_info().is_fp() ||
2268  cv->get_type_info().is_time())) {
2269  has_framing_clause = false;
2270  VLOG(1) << "Range framing mode with non-number type ordering column is not "
2271  "supported yet, skip window framing";
2272  }
2273  }
2274  }
2275  }
2276  switch (window_func_kind) {
2279  if (order_keys.empty()) {
2280  throw std::runtime_error(::toString(window_func_kind) +
2281  " requires an ORDER BY clause");
2282  }
2283  if (!has_framing_clause) {
2284  throw std::runtime_error(::toString(window_func_kind) +
2285  " requires window frame definition");
2286  }
2287  const auto num_args = args.size();
2288  const auto func_name = ::toString(window_func_kind);
2289  if (num_args == 1) {
2290  Datum d;
2291  d.intval = 1;
2292  args.push_back(makeExpr<Analyzer::Constant>(kINT, false, d));
2293  } else if (num_args < 1 || num_args > 2) {
2294  throw std::runtime_error(func_name + " has an invalid number of input arguments");
2295  }
2296  const auto target_expr_cv =
2297  dynamic_cast<const Analyzer::ColumnVar*>(args.front().get());
2298  if (!target_expr_cv) {
2299  throw std::runtime_error("Currently, " + func_name +
2300  " only allows a column reference as its first argument");
2301  }
2302  const auto target_ti = target_expr_cv->get_type_info();
2303  if (target_ti.is_dict_encoded_string()) {
2304  // Calcite does not represent a window function having dictionary encoded text
2305  // type as its output properly, so we need to set its output type manually
2306  ti.set_compression(kENCODING_DICT);
2307  ti.set_comp_param(target_expr_cv->get_type_info().get_comp_param());
2308  ti.setStringDictKey(target_expr_cv->get_type_info().getStringDictKey());
2309  ti.set_fixed_size();
2310  }
2311  const auto target_offset_cv =
2312  dynamic_cast<const Analyzer::Constant*>(args[1].get());
2313  if (!target_expr_cv ||
2314  is_negative_framing_bound(target_offset_cv->get_type_info().get_type(),
2315  target_offset_cv->get_constval())) {
2316  throw std::runtime_error(
2317  "Currently, " + func_name +
2318  " only allows non-negative constant as its second argument");
2319  }
2320  break;
2321  }
2324  // todo (yoonmin) : args.size() will be three if we support default value
2325  CHECK_EQ(2u, args.size());
2326  // NTH_VALUE(_IN_FRAME) may return null value even if the argument is non-null
2327  // column
2328  ti.set_notnull(false);
2329  if (!args[1]) {
2330  throw std::runtime_error(
2331  "NTH_VALUE window function must have a positional argument expression.");
2332  }
2333  if (window_func_kind == SqlWindowFunctionKind::NTH_VALUE_IN_FRAME) {
2334  if (order_keys.empty()) {
2335  throw std::runtime_error(::toString(window_func_kind) +
2336  " requires an ORDER BY clause");
2337  }
2338  if (!has_framing_clause) {
2339  throw std::runtime_error(::toString(window_func_kind) +
2340  " requires window frame definition");
2341  }
2342  }
2343  if (args[1]->get_type_info().is_integer()) {
2344  if (auto* n_value_ptr = dynamic_cast<Analyzer::Constant*>(args[1].get())) {
2345  if (0 < n_value_ptr->get_constval().intval) {
2346  // i.e., having N larger than the partition size
2347  // set the proper N to match the zero-start index pos
2348  auto d = n_value_ptr->get_constval();
2349  d.intval -= 1;
2350  n_value_ptr->set_constval(d);
2351  break;
2352  }
2353  }
2354  }
2355  throw std::runtime_error(
2356  "The positional argument of the NTH_VALUE window function must be a positive "
2357  "integer constant.");
2358  default:
2359  break;
2360  }
2361  if (!has_framing_clause) {
2362  frame_start_bound_type = SqlWindowFrameBoundType::UNKNOWN;
2363  frame_end_bound_type = SqlWindowFrameBoundType::UNKNOWN;
2364  frame_start_bound_expr = nullptr;
2365  frame_end_bound_expr = nullptr;
2366  }
2367  if (window_func_kind == SqlWindowFunctionKind::COUNT && has_framing_clause &&
2368  args.empty()) {
2369  args.push_back(makeExpr<Analyzer::Constant>(g_bigint_count ? kBIGINT : kINT, true));
2370  }
2371  return makeExpr<Analyzer::WindowFunction>(
2372  ti,
2373  rex_window_function->getKind(),
2374  args,
2375  partition_keys,
2376  order_keys,
2377  has_framing_clause ? frame_mode : Analyzer::WindowFunction::FrameBoundType::NONE,
2378  makeExpr<Analyzer::WindowFrame>(frame_start_bound_type, frame_start_bound_expr),
2379  makeExpr<Analyzer::WindowFrame>(frame_end_bound_type, frame_end_bound_expr),
2380  translate_collation(rex_window_function->getCollation()));
2381 }
2382 
2384  std::shared_ptr<Analyzer::Expr> order_key,
2385  bool for_preceding_bound,
2386  const Analyzer::BinOper* frame_bound_expr) const {
2387  // translate time interval expression and prepare appropriate frame bound expression:
2388  // a) manually compute time unit datum: time type
2389  // b) use dateadd expression: date and timestamp
2390  const auto order_key_ti = order_key->get_type_info();
2391  const auto frame_bound_ti = frame_bound_expr->get_type_info();
2392  const auto time_val_expr =
2393  dynamic_cast<const Analyzer::Constant*>(frame_bound_expr->get_left_operand());
2394  const auto time_unit_val_expr =
2395  dynamic_cast<const Analyzer::Constant*>(frame_bound_expr->get_right_operand());
2396  ExtractField time_unit =
2397  determineTimeUnit(frame_bound_ti.get_type(), time_unit_val_expr);
2398  bool invalid_time_unit_type = false;
2399  bool invalid_frame_bound_expr_type = false;
2400  Datum d;
2401  auto prepare_time_value_datum = [&d,
2402  &invalid_frame_bound_expr_type,
2403  &time_val_expr,
2404  &for_preceding_bound](bool is_timestamp_second) {
2405  // currently, Calcite only accepts interval with second, so to represent
2406  // smaller time units like millisecond, we have to use decimal point like
2407  // INTERVAL 0.003 SECOND (for millisecond)
2408  // thus, depending on what time unit we want to represent, Calcite analyzes
2409  // the time value to one of following two types: integer and decimal (and
2410  // numeric) types
2411  switch (time_val_expr->get_type_info().get_type()) {
2412  case kTINYINT: {
2413  d.bigintval = time_val_expr->get_constval().tinyintval;
2414  break;
2415  }
2416  case kSMALLINT: {
2417  d.bigintval = time_val_expr->get_constval().smallintval;
2418  break;
2419  }
2420  case kINT: {
2421  d.bigintval = time_val_expr->get_constval().intval;
2422  break;
2423  }
2424  case kBIGINT: {
2425  d.bigintval = time_val_expr->get_constval().bigintval;
2426  break;
2427  }
2428  case kDECIMAL:
2429  case kNUMERIC: {
2430  if (!is_timestamp_second) {
2431  // date and time type only use integer type as their time value
2432  invalid_frame_bound_expr_type = true;
2433  break;
2434  }
2435  d.bigintval = time_val_expr->get_constval().bigintval;
2436  break;
2437  }
2438  case kDOUBLE: {
2439  if (!is_timestamp_second) {
2440  // date and time type only use integer type as their time value
2441  invalid_frame_bound_expr_type = true;
2442  break;
2443  }
2444  d.bigintval = time_val_expr->get_constval().doubleval *
2445  pow(10, time_val_expr->get_type_info().get_scale());
2446  break;
2447  }
2448  default: {
2449  invalid_frame_bound_expr_type = true;
2450  break;
2451  }
2452  }
2453  if (for_preceding_bound) {
2454  d.bigintval *= -1;
2455  }
2456  };
2457 
2458  switch (order_key_ti.get_type()) {
2459  case kTIME: {
2460  if (time_val_expr->get_type_info().is_integer()) {
2461  if (time_unit == kSECOND || time_unit == kMINUTE || time_unit == kHOUR) {
2462  const auto time_multiplier = determineTimeValMultiplierForTimeType(
2463  frame_bound_ti.get_type(), time_unit_val_expr);
2464  switch (time_val_expr->get_type_info().get_type()) {
2465  case kTINYINT: {
2466  d.bigintval = time_val_expr->get_constval().tinyintval * time_multiplier;
2467  break;
2468  }
2469  case kSMALLINT: {
2470  d.bigintval = time_val_expr->get_constval().smallintval * time_multiplier;
2471  break;
2472  }
2473  case kINT: {
2474  d.bigintval = time_val_expr->get_constval().intval * time_multiplier;
2475  break;
2476  }
2477  case kBIGINT: {
2478  d.bigintval = time_val_expr->get_constval().bigintval * time_multiplier;
2479  break;
2480  }
2481  default: {
2482  UNREACHABLE();
2483  break;
2484  }
2485  }
2486  } else {
2487  invalid_frame_bound_expr_type = true;
2488  }
2489  } else {
2490  invalid_time_unit_type = true;
2491  }
2492  if (invalid_frame_bound_expr_type) {
2493  throw std::runtime_error(
2494  "Invalid time unit is used to define window frame bound expression for " +
2495  order_key_ti.get_type_name() + " type");
2496  } else if (invalid_time_unit_type) {
2497  throw std::runtime_error(
2498  "Window frame bound expression has an invalid type for " +
2499  order_key_ti.get_type_name() + " type");
2500  }
2501  return std::make_shared<Analyzer::Constant>(kBIGINT, false, d);
2502  }
2503  case kDATE: {
2505  if (time_val_expr->get_type_info().is_integer()) {
2506  switch (time_unit) {
2507  case kDAY: {
2508  daField = to_dateadd_field("day");
2509  break;
2510  }
2511  case kMONTH: {
2512  daField = to_dateadd_field("month");
2513  break;
2514  }
2515  case kYEAR: {
2516  daField = to_dateadd_field("year");
2517  break;
2518  }
2519  default: {
2520  invalid_frame_bound_expr_type = true;
2521  break;
2522  }
2523  }
2524  } else {
2525  invalid_time_unit_type = true;
2526  }
2527  if (invalid_frame_bound_expr_type) {
2528  throw std::runtime_error(
2529  "Invalid time unit is used to define window frame bound expression for " +
2530  order_key_ti.get_type_name() + " type");
2531  } else if (invalid_time_unit_type) {
2532  throw std::runtime_error(
2533  "Window frame bound expression has an invalid type for " +
2534  order_key_ti.get_type_name() + " type");
2535  }
2537  prepare_time_value_datum(false);
2538  const auto cast_number_units = makeExpr<Analyzer::Constant>(kBIGINT, false, d);
2539  const int dim = order_key_ti.get_dimension();
2540  return makeExpr<Analyzer::DateaddExpr>(
2541  SQLTypeInfo(kTIMESTAMP, dim, 0, false), daField, cast_number_units, order_key);
2542  }
2543  case kTIMESTAMP: {
2545  switch (time_unit) {
2546  case kSECOND: {
2547  switch (time_val_expr->get_type_info().get_scale()) {
2548  case 0: {
2549  daField = to_dateadd_field("second");
2550  break;
2551  }
2552  case 3: {
2553  daField = to_dateadd_field("millisecond");
2554  break;
2555  }
2556  case 6: {
2557  daField = to_dateadd_field("microsecond");
2558  break;
2559  }
2560  case 9: {
2561  daField = to_dateadd_field("nanosecond");
2562  break;
2563  }
2564  default:
2565  UNREACHABLE();
2566  break;
2567  }
2568  prepare_time_value_datum(true);
2569  break;
2570  }
2571  case kMINUTE: {
2572  daField = to_dateadd_field("minute");
2573  prepare_time_value_datum(false);
2574  break;
2575  }
2576  case kHOUR: {
2577  daField = to_dateadd_field("hour");
2578  prepare_time_value_datum(false);
2579  break;
2580  }
2581  case kDAY: {
2582  daField = to_dateadd_field("day");
2583  prepare_time_value_datum(false);
2584  break;
2585  }
2586  case kMONTH: {
2587  daField = to_dateadd_field("month");
2588  prepare_time_value_datum(false);
2589  break;
2590  }
2591  case kYEAR: {
2592  daField = to_dateadd_field("year");
2593  prepare_time_value_datum(false);
2594  break;
2595  }
2596  default: {
2597  invalid_time_unit_type = true;
2598  break;
2599  }
2600  }
2601  if (!invalid_time_unit_type) {
2603  const auto cast_number_units = makeExpr<Analyzer::Constant>(kBIGINT, false, d);
2604  const int dim = order_key_ti.get_dimension();
2605  return makeExpr<Analyzer::DateaddExpr>(SQLTypeInfo(kTIMESTAMP, dim, 0, false),
2606  daField,
2607  cast_number_units,
2608  order_key);
2609  }
2610  return nullptr;
2611  }
2612  default: {
2613  UNREACHABLE();
2614  break;
2615  }
2616  }
2617  if (invalid_frame_bound_expr_type) {
2618  throw std::runtime_error(
2619  "Invalid time unit is used to define window frame bound expression for " +
2620  order_key_ti.get_type_name() + " type");
2621  } else if (invalid_time_unit_type) {
2622  throw std::runtime_error("Window frame bound expression has an invalid type for " +
2623  order_key_ti.get_type_name() + " type");
2624  }
2625  return nullptr;
2626 }
2627 
2629  const RexFunctionOperator* rex_function) const {
2630  std::vector<std::shared_ptr<Analyzer::Expr>> args;
2631  for (size_t i = 0; i < rex_function->size(); ++i) {
2632  args.push_back(translateScalarRex(rex_function->getOperand(i)));
2633  }
2634  return args;
2635 }
2636 
2638  const std::shared_ptr<Analyzer::Expr> qual_expr) {
2639  CHECK(qual_expr);
2640  auto bin_oper = std::dynamic_pointer_cast<const Analyzer::BinOper>(qual_expr);
2641  if (!bin_oper) {
2642  const auto rewritten_qual_expr = rewrite_expr(qual_expr.get());
2643  return {{}, {rewritten_qual_expr ? rewritten_qual_expr : qual_expr}};
2644  }
2645 
2646  if (bin_oper->get_optype() == kAND) {
2647  const auto lhs_cf = qual_to_conjunctive_form(bin_oper->get_own_left_operand());
2648  const auto rhs_cf = qual_to_conjunctive_form(bin_oper->get_own_right_operand());
2649  auto simple_quals = lhs_cf.simple_quals;
2650  simple_quals.insert(
2651  simple_quals.end(), rhs_cf.simple_quals.begin(), rhs_cf.simple_quals.end());
2652  auto quals = lhs_cf.quals;
2653  quals.insert(quals.end(), rhs_cf.quals.begin(), rhs_cf.quals.end());
2654  return {simple_quals, quals};
2655  }
2656  int rte_idx{0};
2657  const auto simple_qual = bin_oper->normalize_simple_predicate(rte_idx);
2658  return simple_qual ? QualsConjunctiveForm{{simple_qual}, {}}
2659  : QualsConjunctiveForm{{}, {qual_expr}};
2660 }
2661 
2662 std::vector<std::shared_ptr<Analyzer::Expr>> qual_to_disjunctive_form(
2663  const std::shared_ptr<Analyzer::Expr>& qual_expr) {
2664  CHECK(qual_expr);
2665  const auto bin_oper = std::dynamic_pointer_cast<const Analyzer::BinOper>(qual_expr);
2666  if (!bin_oper) {
2667  const auto rewritten_qual_expr = rewrite_expr(qual_expr.get());
2668  return {rewritten_qual_expr ? rewritten_qual_expr : qual_expr};
2669  }
2670  if (bin_oper->get_optype() == kOR) {
2671  const auto lhs_df = qual_to_disjunctive_form(bin_oper->get_own_left_operand());
2672  const auto rhs_df = qual_to_disjunctive_form(bin_oper->get_own_right_operand());
2673  auto quals = lhs_df;
2674  quals.insert(quals.end(), rhs_df.begin(), rhs_df.end());
2675  return quals;
2676  }
2677  return {qual_expr};
2678 }
2679 
2680 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateHPTLiteral(
2681  const RexFunctionOperator* rex_function) const {
2682  /* since calcite uses Avatica package called DateTimeUtils to parse timestamp strings.
2683  Therefore any string having fractional seconds more 3 places after the decimal
2684  (milliseconds) will get truncated to 3 decimal places, therefore we lose precision
2685  (us|ns). Issue: [BE-2461] Here we are hijacking literal cast to Timestamp(6|9) from
2686  calcite and translating them to generate our own casts.
2687  */
2688  CHECK_EQ(size_t(1), rex_function->size());
2689  const auto operand = translateScalarRex(rex_function->getOperand(0));
2690  const auto& operand_ti = operand->get_type_info();
2691  const auto& target_ti = rex_function->getType();
2692  if (!operand_ti.is_string()) {
2693  throw std::runtime_error(
2694  "High precision timestamp cast argument must be a string. Input type is: " +
2695  operand_ti.get_type_name());
2696  } else if (!target_ti.is_high_precision_timestamp()) {
2697  throw std::runtime_error(
2698  "Cast target type should be high precision timestamp. Input type is: " +
2699  target_ti.get_type_name());
2700  } else if (target_ti.get_dimension() != 6 && target_ti.get_dimension() != 9) {
2701  throw std::runtime_error(
2702  "Cast target type should be TIMESTAMP(6|9). Input type is: TIMESTAMP(" +
2703  std::to_string(target_ti.get_dimension()) + ")");
2704  } else {
2705  return operand->add_cast(target_ti);
2706  }
2707 }
DEVICE auto upper_bound(ARGS &&...args)
Definition: gpu_enabled.h:123
Defines data structures for the semantic analysis phase of query processing.
Definition: sqldefs.h:71
SqlWindowFrameBoundType
Definition: sqldefs.h:150
const RexScalar * getThen(const size_t idx) const
Definition: RelAlgDag.h:443
const std::vector< JoinType > join_types_
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:382
void set_compression(EncodingType c)
Definition: sqltypes.h:504
SQLAgg
Definition: sqldefs.h:73
static std::shared_ptr< Analyzer::Expr > normalize(const std::list< std::pair< std::shared_ptr< Analyzer::Expr >, std::shared_ptr< Analyzer::Expr >>> &, const std::shared_ptr< Analyzer::Expr >, const Executor *executor=nullptr)
#define CHECK_EQ(x, y)
Definition: Logger.h:301
auto func_resolve
std::shared_ptr< Analyzer::Expr > translateOffsetInFragment() const
SqlStringOpKind name_to_string_op_kind(const std::string &func_name)
Definition: sqldefs.h:375
static std::shared_ptr< Analyzer::Expr > get(const std::string &)
Definition: ParserNode.cpp:237
std::shared_ptr< Analyzer::Expr > translateCurrentTimestamp() const
std::shared_ptr< Analyzer::Expr > translateBinaryGeoPredicate(const RexFunctionOperator *, SQLTypeInfo &, const bool with_bounds) const
SQLAgg getKind() const
Definition: RelAlgDag.h:813
Definition: sqltypes.h:66
std::shared_ptr< Analyzer::Expr > translateRegexp(const RexFunctionOperator *) const
static bool colvar_comp(const ColumnVar *l, const ColumnVar *r)
Definition: Analyzer.h:215
SQLTypes
Definition: sqltypes.h:55
static constexpr int64_t kSecsPerHour
size_t getOperand(size_t idx) const
Definition: RelAlgDag.h:819
const Executor * executor_
std::shared_ptr< Analyzer::Expr > translateUnlikely(const RexFunctionOperator *) const
const RexScalar * getElse() const
Definition: RelAlgDag.h:448
void collect_column_var(std::set< const ColumnVar *, bool(*)(const ColumnVar *, const ColumnVar *)> &colvar_set, bool include_agg) const override
Definition: Analyzer.h:222
static constexpr int64_t kSecsPerMin
std::shared_ptr< Analyzer::Expr >(RelAlgTranslator::*)(RexScalar const *) const Handler
SQLQualifier
Definition: sqldefs.h:71
static std::shared_ptr< Analyzer::Expr > analyzeValue(const int64_t intval)
Definition: ParserNode.cpp:162
bool window_function_conditional_aggregate(const SqlWindowFunctionKind kind)
Definition: WindowContext.h:59
std::shared_ptr< Analyzer::Expr > translateFunction(const RexFunctionOperator *) const
SQLTypeInfo get_nullable_logical_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:1253
#define LOG(tag)
Definition: Logger.h:285
std::shared_ptr< Analyzer::Expr > translateScalarRex(const RexScalar *rex) const
const SQLTypeInfo & getType() const
Definition: RelAlgDag.h:284
size_t size() const
Definition: RelAlgDag.h:270
static constexpr int64_t kMilliSecsPerDay
const RexScalar * getOperand(const size_t idx) const
Definition: RelAlgDag.h:272
std::shared_ptr< Analyzer::Expr > translateUoper(const RexOperator *) const
HOST DEVICE int get_scale() const
Definition: sqltypes.h:386
const Expr * get_right_operand() const
Definition: Analyzer.h:456
const std::vector< SortField > & getCollation() const
Definition: RelAlgDag.h:654
SQLOps
Definition: sqldefs.h:28
std::shared_ptr< Analyzer::Expr > translateDateadd(const RexFunctionOperator *) const
int8_t boolval
Definition: Datum.h:68
static bool isFramingAvailableWindowFunc(SqlWindowFunctionKind kind)
Definition: Analyzer.h:2617
static std::shared_ptr< Analyzer::Expr > normalize(const SQLOps optype, const SQLQualifier qual, std::shared_ptr< Analyzer::Expr > left_expr, std::shared_ptr< Analyzer::Expr > right_expr, const Executor *executor=nullptr)
Definition: ParserNode.cpp:372
Definition: sqldefs.h:37
#define UNREACHABLE()
Definition: Logger.h:337
std::shared_ptr< Analyzer::Expr > translateIntervalExprForWindowFraming(std::shared_ptr< Analyzer::Expr > order_key, bool for_preceding_bound, const Analyzer::BinOper *frame_bound_expr) const
#define CHECK_GE(x, y)
Definition: Logger.h:306
std::shared_ptr< Analyzer::Expr > translateAbs(const RexFunctionOperator *) const
Definition: sqldefs.h:48
Definition: sqldefs.h:29
const RexScalar * getWhen(const size_t idx) const
Definition: RelAlgDag.h:438
std::shared_ptr< Analyzer::Expr > ExpressionPtr
Definition: Analyzer.h:184
std::string getString(int32_t string_id) const
#define TRANSIENT_DICT_DB_ID
Definition: DbObjectKeys.h:25
std::shared_ptr< Analyzer::Expr > getInIntegerSetExpr(std::shared_ptr< Analyzer::Expr > arg, const ResultSet &val_set) const
Definition: sqldefs.h:40
SQLTypeInfo get_agg_type(const SQLAgg agg_kind, const Analyzer::Expr *arg_expr)
std::shared_ptr< Analyzer::Expr > translateItem(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Constant > makeNumericConstant(const SQLTypeInfo &ti, const long val)
Analyzer::ExpressionPtr rewrite_expr(const Analyzer::Expr *expr)
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:381
bool operator()(IndexedHandler const &pair) const
QualsConjunctiveForm qual_to_conjunctive_form(const std::shared_ptr< Analyzer::Expr > qual_expr)
#define TRANSIENT_DICT_ID
Definition: DbObjectKeys.h:24
bool is_number() const
Definition: sqltypes.h:585
#define CHECK_GT(x, y)
Definition: Logger.h:305
bool is_agg_supported_for_type(const SQLAgg &agg_kind, const SQLTypeInfo &arg_ti)
std::shared_ptr< Analyzer::Expr > translateGeoProjection(const RexFunctionOperator *, SQLTypeInfo &, const bool with_bounds) const
double inline_fp_null_val(const SQL_TYPE_INFO &ti)
int32_t intval
Definition: Datum.h:71
bool is_time() const
Definition: sqltypes.h:586
std::shared_ptr< Analyzer::Expr > translateOper(const RexOperator *) const
std::string to_string(char const *&&v)
std::shared_ptr< Analyzer::Expr > translateDatediff(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Expr > translateInput(const RexInput *) const
std::shared_ptr< Analyzer::Expr > translateSign(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Expr > translateUnaryGeoFunction(const RexFunctionOperator *) const
static constexpr int64_t kMilliSecsPerMin
bool g_enable_string_functions
std::shared_ptr< Analyzer::Expr > translateGeoOverlapsOper(const RexOperator *) const
Definition: sqldefs.h:75
static constexpr int64_t kMilliSecsPerSec
ExtractField to_datepart_field(const std::string &field)
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:31
robin_hood::unordered_map< RexScalar const *, std::shared_ptr< Analyzer::Expr > > cache_
unsigned getIndex() const
Definition: RelAlgDag.h:77
Supported runtime functions management and retrieval.
future< Result > async(Fn &&fn, Args &&...args)
static SysCatalog & instance()
Definition: SysCatalog.h:343
static std::shared_ptr< Analyzer::Expr > translateLiteral(const RexLiteral *)
SQLOps getOperator() const
Definition: RelAlgDag.h:282
bool window_function_is_value(const SqlWindowFunctionKind kind)
Definition: WindowContext.h:29
static constexpr int32_t INVALID_STR_ID
CONSTEXPR DEVICE bool is_null(const T &value)
Classes representing a parse tree.
std::shared_ptr< Analyzer::Expr > translateDatetime(const RexFunctionOperator *) const
void set_fixed_size()
Definition: sqltypes.h:502
DateaddField
Definition: DateAdd.h:42
void fill_dictionary_encoded_in_vals(std::vector< int64_t > &in_vals, std::atomic< size_t > &total_in_vals_count, const ResultSet *values_rowset, const std::pair< int64_t, int64_t > values_rowset_slice, const StringDictionaryProxy *source_dict, const StringDictionaryProxy *dest_dict, const int64_t needle_null_val)
#define CHECK_NE(x, y)
Definition: Logger.h:302
const std::shared_ptr< Analyzer::Expr > generate() const
std::shared_ptr< Analyzer::Expr > translateStringOper(const RexFunctionOperator *) const
static std::shared_ptr< Analyzer::Expr > get(std::shared_ptr< Analyzer::Expr > arg_expr, std::shared_ptr< Analyzer::Expr > pattern_expr, std::shared_ptr< Analyzer::Expr > escape_expr, const bool is_not)
Definition: ParserNode.cpp:790
size_t determineTimeValMultiplierForTimeType(const SQLTypes &window_frame_bound_type, const Analyzer::Constant *const_expr)
void set_scale(int s)
Definition: sqltypes.h:498
int64_t bigintval
Definition: Datum.h:72
bool is_timeinterval() const
Definition: sqltypes.h:591
std::pair< std::shared_ptr< Analyzer::Expr >, SQLQualifier > getQuantifiedRhs(const RexScalar *) const
std::vector< Analyzer::OrderEntry > translate_collation(const std::vector< SortField > &sort_fields)
size_t branchCount() const
Definition: RelAlgDag.h:436
std::shared_ptr< Analyzer::Expr > translateCurrentTime() const
bool g_bigint_count
Definition: sqldefs.h:36
Definition: sqldefs.h:77
bool g_enable_watchdog
Definition: sqldefs.h:71
int16_t smallintval
Definition: Datum.h:70
SQLTypeInfo build_type_info(const SQLTypes sql_type, const int scale, const int precision)
DatetruncField to_datediff_field(const std::string &field)
std::string toString(const ExecutorDeviceType &device_type)
bool is_boolean() const
Definition: sqltypes.h:587
std::array< IndexedHandler, sizeof...(Ts)> makeHandlers()
const RexWindowBound & getFrameEndBound() const
Definition: RelAlgDag.h:658
std::shared_ptr< Analyzer::Expr > translate(const RexScalar *rex) const
std::tuple< T, std::vector< SQLTypeInfo > > bind_function(std::string name, Analyzer::ExpressionPtrVector func_args, const std::vector< T > &ext_funcs, const std::string processor)
Argument type based extension function binding.
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:79
const std::unordered_map< const RelAlgNode *, int > input_to_nest_level_
#define UNLIKELY(x)
Definition: likely.h:25
void translate_string_ids(std::vector< int32_t > &dest_ids, const LeafHostInfo &dict_server_host, const shared::StringDictKey &dest_dict_key, const std::vector< int32_t > &source_ids, const shared::StringDictKey &source_dict_key, const int32_t dest_generation)
Definition: sqldefs.h:33
void set_comp_param(int p)
Definition: sqltypes.h:505
std::shared_ptr< Catalog > getCatalog(const std::string &dbName)
#define CHECK_LT(x, y)
Definition: Logger.h:303
Definition: sqltypes.h:69
Definition: sqltypes.h:70
Analyzer::ExpressionPtrVector translateFunctionArgs(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Expr > translateUnaryGeoPredicate(const RexFunctionOperator *, SQLTypeInfo &, const bool with_bounds) const
Definition: sqldefs.h:39
Definition: sqldefs.h:71
const ConstRexScalarPtrVector & getPartitionKeys() const
Definition: RelAlgDag.h:627
static std::shared_ptr< Analyzer::Expr > analyzeValue(const std::string &stringval, const bool is_null)
Definition: ParserNode.cpp:143
DEVICE auto lower_bound(ARGS &&...args)
Definition: gpu_enabled.h:78
#define CHECK_LE(x, y)
Definition: Logger.h:304
const RexWindowBound & getFrameStartBound() const
Definition: RelAlgDag.h:656
std::shared_ptr< Analyzer::Expr > translateOverlapsOper(const RexOperator *) const
std::shared_ptr< Analyzer::Expr > translateUnaryGeoConstructor(const RexFunctionOperator *, SQLTypeInfo &, const bool with_bounds) const
std::shared_ptr< Analyzer::Expr > translateArrayFunction(const RexFunctionOperator *) const
static std::shared_ptr< Analyzer::Expr > get(std::shared_ptr< Analyzer::Expr > arg_expr, std::shared_ptr< Analyzer::Expr > like_expr, std::shared_ptr< Analyzer::Expr > escape_expr, const bool is_ilike, const bool is_not)
Definition: ParserNode.cpp:695
std::pair< std::type_index, Handler > IndexedHandler
static RelRexToStringConfig defaults()
Definition: RelAlgDag.h:49
Datum get_constval() const
Definition: Analyzer.h:348
std::shared_ptr< Analyzer::Expr > translateCurrentUser(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Expr > translateSampleRatio(const RexFunctionOperator *) const
SqlWindowFunctionKind getKind() const
Definition: RelAlgDag.h:625
std::shared_ptr< Analyzer::Expr > translateLike(const RexFunctionOperator *) const
const RelAlgNode * getSourceNode() const
Definition: RelAlgDag.h:389
Definition: sqltypes.h:58
bool takes_arg(const TargetInfo &target_info)
static std::shared_ptr< Analyzer::Expr > analyzeValue(const int64_t numericval, const int scale, const int precision)
Definition: ParserNode.cpp:187
ExtractField
std::shared_ptr< Analyzer::Expr > translateLikely(const RexFunctionOperator *) const
Definition: sqldefs.h:52
std::shared_ptr< Analyzer::Expr > get_in_values_expr(std::shared_ptr< Analyzer::Expr > arg, const ResultSet &val_set)
static std::shared_ptr< Analyzer::Expr > get(const int64_t)
Definition: ParserNode.cpp:223
bool isDistinct() const
Definition: RelAlgDag.h:815
void set_notnull(bool n)
Definition: sqltypes.h:500
static constexpr int64_t kMilliSecsPerHour
#define CHECK(condition)
Definition: Logger.h:291
std::shared_ptr< Analyzer::Expr > translateTernaryGeoFunction(const RexFunctionOperator *) const
const ConstRexScalarPtrVector & getOrderKeys() const
Definition: RelAlgDag.h:637
std::vector< std::shared_ptr< Analyzer::Expr > > qual_to_disjunctive_form(const std::shared_ptr< Analyzer::Expr > &qual_expr)
std::shared_ptr< Analyzer::Expr > translateBinaryGeoFunction(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Constant > make_fp_constant(const int64_t val, const SQLTypeInfo &ti)
std::pair< Datum, bool > datum_from_scalar_tv(const ScalarTargetValue *scalar_tv, const SQLTypeInfo &ti) noexcept
std::shared_ptr< Analyzer::Expr > translateWidthBucket(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Expr > translateInOper(const RexOperator *) const
uint64_t exp_to_scale(const unsigned exp)
size_t size() const
Definition: RelAlgDag.h:817
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
std::vector< ExpressionPtr > ExpressionPtrVector
Definition: Analyzer.h:186
bool g_cluster
Definition: sqldefs.h:32
const Expr * get_left_operand() const
Definition: Analyzer.h:455
bool isRows() const
Definition: RelAlgDag.h:660
std::shared_ptr< Analyzer::Expr > translateCase(const RexCase *) const
std::shared_ptr< Analyzer::Expr > translateFunctionWithGeoArg(const RexFunctionOperator *) const
Definition: sqltypes.h:62
std::shared_ptr< const query_state::QueryState > query_state_
const std::string & getName() const
Definition: RelAlgDag.h:500
std::shared_ptr< Analyzer::Expr > translateCurrentDate() const
std::string get_datetimeplus_rewrite_funcname(const SQLOps &op)
void validate_datetime_datepart_argument(const std::shared_ptr< Analyzer::Constant > literal_expr)
std::shared_ptr< Analyzer::Expr > translateCardinality(const RexFunctionOperator *) const
Definition: sqldefs.h:76
int cpu_threads()
Definition: thread_count.h:25
const bool just_explain_
Definition: Datum.h:67
const std::vector< TargetMetaInfo > & getOutputMetainfo() const
Definition: RelAlgDag.h:876
bool is_decimal() const
Definition: sqltypes.h:583
std::shared_ptr< Analyzer::Expr > translateGeoComparison(const RexOperator *) const
std::shared_ptr< Analyzer::Expr > translateDatePlusMinus(const RexOperator *) const
const std::list< std::shared_ptr< Analyzer::Expr > > simple_quals
std::shared_ptr< Analyzer::Expr > translateHPTLiteral(const RexFunctionOperator *) const
Definition: sqldefs.h:74
bool is_distinct(const size_t input_idx, const RelAlgNode *node)
int32_t getIdOfString(const std::string &str) const
std::shared_ptr< Analyzer::Expr > translateDatepart(const RexFunctionOperator *) const
bool can_use_parallel_algorithms(const ResultSet &rows)
Definition: ResultSet.cpp:1558
std::shared_ptr< Analyzer::Expr > translateBinaryGeoConstructor(const RexFunctionOperator *, SQLTypeInfo &, const bool with_bounds) const
Definition: sqldefs.h:38
std::shared_ptr< Analyzer::Expr > rewrite_to_date_trunc(const Analyzer::FunctionOper *dt_plus)
Definition: sqldefs.h:83
SQLTypeInfo ext_arg_type_to_type_info(const ExtArgumentType ext_arg_type)
#define VLOG(n)
Definition: Logger.h:387
DateaddField to_dateadd_field(const std::string &field)
std::shared_ptr< Analyzer::Expr > fold_expr(const Analyzer::Expr *expr)
void set_precision(int d)
Definition: sqltypes.h:496
#define IS_COMPARISON(X)
Definition: sqldefs.h:58
double doubleval
Definition: Datum.h:74
void fill_integer_in_vals(std::vector< int64_t > &in_vals, std::atomic< size_t > &total_in_vals_count, const ResultSet *values_rowset, const std::pair< int64_t, int64_t > values_rowset_slice)
std::shared_ptr< Analyzer::Expr > translateKeyForString(const RexFunctionOperator *) const
static std::shared_ptr< Analyzer::Expr > translateAggregateRex(const RexAgg *rex, const std::vector< std::shared_ptr< Analyzer::Expr >> &scalar_sources)
std::shared_ptr< Analyzer::Expr > translateWindowFunction(const RexWindowFunctionOperator *) const
const std::shared_ptr< Analyzer::Expr > generate() const
std::shared_ptr< Analyzer::Expr > translateScalarSubquery(const RexSubQuery *) const
boost::variant< int64_t, double, float, NullableString > ScalarTargetValue
Definition: TargetValue.h:180
std::shared_ptr< Analyzer::Expr > translateLength(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Expr > translateExtract(const RexFunctionOperator *) const
ExtractField determineTimeUnit(const SQLTypes &window_frame_bound_type, const Analyzer::Constant *const_expr)
HOST DEVICE void set_type(SQLTypes t)
Definition: sqltypes.h:493