OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
RelAlgTranslator.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "RelAlgTranslator.h"
18 #include "Analyzer/Analyzer.h"
20 #include "DateTimePlusRewrite.h"
21 #include "DateTimeTranslator.h"
23 #include "ExpressionRewrite.h"
26 #include "Parser/ParserNode.h"
27 #include "RelAlgDag.h"
28 #include "ScalarExprVisitor.h"
29 #include "Shared/SqlTypesLayout.h"
30 #include "Shared/likely.h"
31 #include "Shared/scope.h"
32 #include "Shared/thread_count.h"
33 #include "WindowContext.h"
34 
35 #include <future>
36 #include <sstream>
37 
38 extern bool g_enable_watchdog;
39 
41 
42 namespace {
43 
45  const int scale,
46  const int precision) {
47  SQLTypeInfo ti(sql_type, 0, 0, true);
48  if (ti.is_decimal()) {
49  ti.set_scale(scale);
50  ti.set_precision(precision);
51  }
52  return ti;
53 }
54 
55 } // namespace
56 
57 std::pair<std::shared_ptr<Analyzer::Expr>, SQLQualifier>
59  std::shared_ptr<Analyzer::Expr> rhs;
60  SQLQualifier sql_qual{kONE};
61  const auto rex_operator = dynamic_cast<const RexOperator*>(rex_scalar);
62  if (!rex_operator) {
63  return std::make_pair(rhs, sql_qual);
64  }
65  const auto rex_function = dynamic_cast<const RexFunctionOperator*>(rex_operator);
66  const auto qual_str = rex_function ? rex_function->getName() : "";
67  if (qual_str == "PG_ANY"sv || qual_str == "PG_ALL"sv) {
68  CHECK_EQ(size_t(1), rex_function->size());
69  rhs = translateScalarRex(rex_function->getOperand(0));
70  sql_qual = (qual_str == "PG_ANY"sv) ? kANY : kALL;
71  }
72  if (!rhs && rex_operator->getOperator() == kCAST) {
73  CHECK_EQ(size_t(1), rex_operator->size());
74  std::tie(rhs, sql_qual) = getQuantifiedRhs(rex_operator->getOperand(0));
75  }
76  return std::make_pair(rhs, sql_qual);
77 }
78 
79 namespace {
80 
81 std::pair<Datum, bool> datum_from_scalar_tv(const ScalarTargetValue* scalar_tv,
82  const SQLTypeInfo& ti) noexcept {
83  Datum d{0};
84  bool is_null_const{false};
85  switch (ti.get_type()) {
86  case kBOOLEAN: {
87  const auto ival = boost::get<int64_t>(scalar_tv);
88  CHECK(ival);
89  if (*ival == inline_int_null_val(ti)) {
90  is_null_const = true;
91  } else {
92  d.boolval = *ival;
93  }
94  break;
95  }
96  case kTINYINT: {
97  const auto ival = boost::get<int64_t>(scalar_tv);
98  CHECK(ival);
99  if (*ival == inline_int_null_val(ti)) {
100  is_null_const = true;
101  } else {
102  d.tinyintval = *ival;
103  }
104  break;
105  }
106  case kSMALLINT: {
107  const auto ival = boost::get<int64_t>(scalar_tv);
108  CHECK(ival);
109  if (*ival == inline_int_null_val(ti)) {
110  is_null_const = true;
111  } else {
112  d.smallintval = *ival;
113  }
114  break;
115  }
116  case kINT: {
117  const auto ival = boost::get<int64_t>(scalar_tv);
118  CHECK(ival);
119  if (*ival == inline_int_null_val(ti)) {
120  is_null_const = true;
121  } else {
122  d.intval = *ival;
123  }
124  break;
125  }
126  case kDECIMAL:
127  case kNUMERIC:
128  case kBIGINT:
129  case kDATE:
130  case kTIME:
131  case kTIMESTAMP: {
132  const auto ival = boost::get<int64_t>(scalar_tv);
133  CHECK(ival);
134  if (*ival == inline_int_null_val(ti)) {
135  is_null_const = true;
136  } else {
137  d.bigintval = *ival;
138  }
139  break;
140  }
141  case kDOUBLE: {
142  const auto dval = boost::get<double>(scalar_tv);
143  CHECK(dval);
144  if (*dval == inline_fp_null_val(ti)) {
145  is_null_const = true;
146  } else {
147  d.doubleval = *dval;
148  }
149  break;
150  }
151  case kFLOAT: {
152  const auto fval = boost::get<float>(scalar_tv);
153  CHECK(fval);
154  if (*fval == inline_fp_null_val(ti)) {
155  is_null_const = true;
156  } else {
157  d.floatval = *fval;
158  }
159  break;
160  }
161  case kTEXT:
162  case kVARCHAR:
163  case kCHAR: {
164  auto nullable_sptr = boost::get<NullableString>(scalar_tv);
165  CHECK(nullable_sptr);
166  if (boost::get<void*>(nullable_sptr)) {
167  is_null_const = true;
168  } else {
169  auto sptr = boost::get<std::string>(nullable_sptr);
170  d.stringval = new std::string(*sptr);
171  }
172  break;
173  }
174  default:
175  CHECK(false) << "Unhandled type: " << ti.get_type_name();
176  }
177  return {d, is_null_const};
178 }
179 
180 using Handler =
181  std::shared_ptr<Analyzer::Expr> (RelAlgTranslator::*)(RexScalar const*) const;
182 using IndexedHandler = std::pair<std::type_index, Handler>;
183 
184 template <typename... Ts>
185 std::array<IndexedHandler, sizeof...(Ts)> makeHandlers() {
186  return {IndexedHandler{std::type_index(typeid(Ts)),
187  &RelAlgTranslator::translateRexScalar<Ts>}...};
188 }
189 
190 struct ByTypeIndex {
191  std::type_index const type_index_;
192  ByTypeIndex(std::type_info const& type_info)
193  : type_index_(std::type_index(type_info)) {}
194  bool operator()(IndexedHandler const& pair) const { return pair.first == type_index_; }
195 };
196 
197 } // namespace
198 
199 template <>
200 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateRexScalar<RexInput>(
201  RexScalar const* rex) const {
202  return translateInput(static_cast<RexInput const*>(rex));
203 }
204 template <>
205 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateRexScalar<RexLiteral>(
206  RexScalar const* rex) const {
207  return translateLiteral(static_cast<RexLiteral const*>(rex));
208 }
209 template <>
210 std::shared_ptr<Analyzer::Expr>
211 RelAlgTranslator::translateRexScalar<RexWindowFunctionOperator>(
212  RexScalar const* rex) const {
213  return translateWindowFunction(static_cast<RexWindowFunctionOperator const*>(rex));
214 }
215 template <>
216 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateRexScalar<RexFunctionOperator>(
217  RexScalar const* rex) const {
218  return translateFunction(static_cast<RexFunctionOperator const*>(rex));
219 }
220 template <>
221 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateRexScalar<RexOperator>(
222  RexScalar const* rex) const {
223  return translateOper(static_cast<RexOperator const*>(rex));
224 }
225 template <>
226 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateRexScalar<RexCase>(
227  RexScalar const* rex) const {
228  return translateCase(static_cast<RexCase const*>(rex));
229 }
230 template <>
231 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateRexScalar<RexSubQuery>(
232  RexScalar const* rex) const {
233  return translateScalarSubquery(static_cast<RexSubQuery const*>(rex));
234 }
235 
236 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateScalarRex(
237  RexScalar const* rex) const {
238  auto cache_itr = cache_.find(rex);
239  if (cache_itr == cache_.end()) {
240  // Order types from most likely to least as they are compared seriatim.
241  static auto const handlers = makeHandlers<RexInput,
242  RexLiteral,
243  RexOperator,
244  RexCase,
247  RexSubQuery>();
248  static_assert(std::is_trivially_destructible_v<decltype(handlers)>);
249  auto it = std::find_if(handlers.cbegin(), handlers.cend(), ByTypeIndex{typeid(*rex)});
250  CHECK(it != handlers.cend()) << "Unhandled type: " << typeid(*rex).name();
251  // Call handler based on typeid(*rex) and cache the std::shared_ptr<Analyzer::Expr>.
252  auto cached = cache_.emplace(rex, (this->*it->second)(rex));
253  CHECK(cached.second) << "Failed to emplace rex of type " << typeid(*rex).name();
254  cache_itr = cached.first;
255  }
256  return cache_itr->second;
257 }
258 
259 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translate(RexScalar const* rex) const {
260  ScopeGuard clear_cache{[this] { cache_.clear(); }};
261  return translateScalarRex(rex);
262 }
263 
264 namespace {
265 
266 bool is_agg_supported_for_type(const SQLAgg& agg_kind, const SQLTypeInfo& arg_ti) {
267  return arg_ti.is_number() || arg_ti.is_boolean() || arg_ti.is_time() ||
268  (agg_kind == kMODE && arg_ti.is_string()) ||
269  !shared::is_any<kAVG, kMIN, kMAX, kSUM, kAPPROX_QUANTILE, kMODE>(agg_kind);
270 }
271 
272 bool is_distinct_supported(SQLAgg const agg_kind) {
273  return shared::is_any<kMIN, kMAX, kCOUNT, kAPPROX_COUNT_DISTINCT>(agg_kind);
274 }
275 
276 } // namespace
277 
278 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateAggregateRex(
279  const RexAgg* rex,
280  const std::vector<std::shared_ptr<Analyzer::Expr>>& scalar_sources) {
281  SQLAgg agg_kind = rex->getKind();
282  const bool is_distinct = rex->isDistinct();
283  const bool takes_arg{rex->size() > 0};
284  std::shared_ptr<Analyzer::Expr> arg_expr;
285  std::shared_ptr<Analyzer::Expr> arg1; // 2nd aggregate parameter
286  if (takes_arg) {
287  const auto operand = rex->getOperand(0);
288  CHECK_LT(operand, scalar_sources.size());
289  CHECK_LE(rex->size(), 2u);
290  arg_expr = scalar_sources[operand];
291  switch (agg_kind) {
293  if (rex->size() == 2) {
294  auto const const_arg1 = std::dynamic_pointer_cast<Analyzer::Constant>(
295  scalar_sources[rex->getOperand(1)]);
296  if (!const_arg1 || const_arg1->get_type_info().get_type() != kINT ||
297  const_arg1->get_constval().intval < 1 ||
298  const_arg1->get_constval().intval > 100) {
299  throw std::runtime_error(
300  "APPROX_COUNT_DISTINCT's second parameter must be a SMALLINT literal "
301  "between 1 and 100");
302  }
303  arg1 = scalar_sources[rex->getOperand(1)];
304  }
305  break;
306  case kAPPROX_QUANTILE:
307  if (g_cluster) {
308  throw std::runtime_error(
309  "APPROX_PERCENTILE/MEDIAN is not supported in distributed mode at this "
310  "time.");
311  }
312  // If second parameter is not given then APPROX_MEDIAN is assumed.
313  if (rex->size() == 2) {
314  arg1 = std::dynamic_pointer_cast<Analyzer::Constant>(
315  std::dynamic_pointer_cast<Analyzer::Constant>(
316  scalar_sources[rex->getOperand(1)])
317  ->add_cast(SQLTypeInfo(kDOUBLE)));
318  } else {
319 #ifdef _WIN32
320  Datum median;
321  median.doubleval = 0.5;
322 #else
323  constexpr Datum median{.doubleval = 0.5};
324 #endif
325  arg1 = std::make_shared<Analyzer::Constant>(kDOUBLE, false, median);
326  }
327  break;
328  case kMODE:
329  if (g_cluster) {
330  throw std::runtime_error(
331  "MODE is not supported in distributed mode at this time.");
332  }
333  break;
334  case kCOUNT_IF:
335  if (arg_expr->get_type_info().is_geometry()) {
336  throw std::runtime_error(
337  "COUNT_IF does not currently support geospatial types.");
338  }
339  break;
340  case kSUM_IF:
341  arg1 = scalar_sources[rex->getOperand(1)];
342  if (arg1->get_type_info().get_type() != kBOOLEAN) {
343  throw std::runtime_error("Conditional argument must be a boolean expression.");
344  }
345  break;
346  default:
347  break;
348  }
349  const auto& arg_ti = arg_expr->get_type_info();
350  if (!is_agg_supported_for_type(agg_kind, arg_ti)) {
351  throw std::runtime_error("Aggregate on " + arg_ti.get_type_name() +
352  " is not supported yet.");
353  }
354  if (is_distinct && !is_distinct_supported(agg_kind)) {
355  throw std::runtime_error(toString(agg_kind) +
356  " does not currently support the DISTINCT qualifier.");
357  }
358  }
359  const auto agg_ti = get_agg_type(agg_kind, arg_expr.get());
360  return makeExpr<Analyzer::AggExpr>(agg_ti, agg_kind, arg_expr, is_distinct, arg1);
361 }
362 
363 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateLiteral(
364  const RexLiteral* rex_literal) {
365  auto lit_ti = build_type_info(
366  rex_literal->getType(), rex_literal->getScale(), rex_literal->getPrecision());
367  auto target_ti = build_type_info(rex_literal->getTargetType(),
368  rex_literal->getTargetScale(),
369  rex_literal->getTargetPrecision());
370  switch (rex_literal->getType()) {
371  case kINT:
372  case kBIGINT: {
373  Datum d;
374  d.bigintval = rex_literal->getVal<int64_t>();
375  return makeExpr<Analyzer::Constant>(rex_literal->getType(), false, d);
376  }
377  case kDECIMAL: {
378  const auto val = rex_literal->getVal<int64_t>();
379  const int precision = rex_literal->getPrecision();
380  const int scale = rex_literal->getScale();
381  if (target_ti.is_fp() && !scale) {
382  return make_fp_constant(val, target_ti);
383  }
384  auto lit_expr = scale ? Parser::FixedPtLiteral::analyzeValue(val, scale, precision)
386  return lit_ti != target_ti ? lit_expr->add_cast(target_ti) : lit_expr;
387  }
388  case kTEXT: {
389  return Parser::StringLiteral::analyzeValue(rex_literal->getVal<std::string>(),
390  false);
391  }
392  case kBOOLEAN: {
393  Datum d;
394  d.boolval = rex_literal->getVal<bool>();
395  return makeExpr<Analyzer::Constant>(kBOOLEAN, false, d);
396  }
397  case kDOUBLE: {
398  Datum d;
399  d.doubleval = rex_literal->getVal<double>();
400  auto lit_expr =
401  makeExpr<Analyzer::Constant>(SQLTypeInfo(rex_literal->getType(),
402  rex_literal->getPrecision(),
403  rex_literal->getScale(),
404  false),
405  false,
406  d);
407  return lit_ti != target_ti ? lit_expr->add_cast(target_ti) : lit_expr;
408  }
409  case kINTERVAL_DAY_TIME:
410  case kINTERVAL_YEAR_MONTH: {
411  Datum d;
412  d.bigintval = rex_literal->getVal<int64_t>();
413  return makeExpr<Analyzer::Constant>(rex_literal->getType(), false, d);
414  }
415  case kTIME:
416  case kTIMESTAMP: {
417  Datum d;
418  d.bigintval =
419  rex_literal->getType() == kTIMESTAMP && rex_literal->getPrecision() > 0
420  ? rex_literal->getVal<int64_t>()
421  : rex_literal->getVal<int64_t>() / 1000;
422  return makeExpr<Analyzer::Constant>(
423  SQLTypeInfo(rex_literal->getType(), rex_literal->getPrecision(), 0, false),
424  false,
425  d);
426  }
427  case kDATE: {
428  Datum d;
429  d.bigintval = rex_literal->getVal<int64_t>() * 24 * 3600;
430  return makeExpr<Analyzer::Constant>(rex_literal->getType(), false, d);
431  }
432  case kNULLT: {
433  if (target_ti.is_array()) {
435  // defaulting to valid sub-type for convenience
436  target_ti.set_subtype(kBOOLEAN);
437  return makeExpr<Analyzer::ArrayExpr>(target_ti, args, true);
438  }
439  if (target_ti.get_type() == kGEOMETRY) {
440  // Specific geo type will be set in a normalization step if needed.
441  return makeExpr<Analyzer::Constant>(kNULLT, true, Datum{0});
442  }
443  return makeExpr<Analyzer::Constant>(rex_literal->getTargetType(), true, Datum{0});
444  }
445  default: {
446  LOG(FATAL) << "Unexpected literal type " << lit_ti.get_type_name();
447  }
448  }
449  return nullptr;
450 }
451 
452 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateScalarSubquery(
453  const RexSubQuery* rex_subquery) const {
454  if (just_explain_) {
455  throw std::runtime_error("EXPLAIN is not supported with sub-queries");
456  }
457  CHECK(rex_subquery);
458  auto result = rex_subquery->getExecutionResult();
459  auto row_set = result->getRows();
460  const size_t row_count = row_set->rowCount();
461  if (row_count > size_t(1)) {
462  throw std::runtime_error("Scalar sub-query returned multiple rows");
463  }
464  auto ti = rex_subquery->getType();
465  if (g_cluster && ti.is_string()) {
466  throw std::runtime_error(
467  "Scalar sub-queries which return strings not supported in distributed mode");
468  }
469  if (row_count == size_t(0)) {
470  if (row_set->isValidationOnlyRes()) {
471  Datum d{0};
472  if (ti.is_string()) {
473  // keep the valid ptr to avoid crash during the query validation
474  // this ptr will be removed when destructing corresponding constant variable
475  d.stringval = new std::string();
476  }
477  if (ti.is_dict_encoded_string()) {
478  // we set a valid ptr for string literal in above which is not dictionary-encoded
479  ti.set_compression(EncodingType::kENCODING_NONE);
480  }
481  return makeExpr<Analyzer::Constant>(ti, false, d);
482  }
483  throw std::runtime_error("Scalar sub-query returned no results");
484  }
485  CHECK_EQ(row_count, size_t(1));
486  row_set->moveToBegin();
487  auto const first_row = row_set->getNextRow(ti.is_dict_encoded_string(), false);
488  CHECK_EQ(first_row.size(), size_t(1));
489  Datum d{0};
490  bool is_null_const{false};
491  auto scalar_tv = boost::get<ScalarTargetValue>(&first_row[0]);
492  std::tie(d, is_null_const) = datum_from_scalar_tv(scalar_tv, ti);
493  if (ti.is_dict_encoded_string()) {
494  // we already translate the string, so let's make its type as a string literal
495  ti.set_compression(EncodingType::kENCODING_NONE);
496  }
497  return makeExpr<Analyzer::Constant>(ti, is_null_const, d);
498 }
499 
500 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateInput(
501  const RexInput* rex_input) const {
502  const auto source = rex_input->getSourceNode();
503  const auto it_rte_idx = input_to_nest_level_.find(source);
504  CHECK(it_rte_idx != input_to_nest_level_.end())
505  << "Not found in input_to_nest_level_, source="
506  << source->toString(RelRexToStringConfig::defaults());
507  const int rte_idx = it_rte_idx->second;
508  const auto scan_source = dynamic_cast<const RelScan*>(source);
509  const auto& in_metainfo = source->getOutputMetainfo();
510  if (scan_source) {
511  // We're at leaf (scan) level and not supposed to have input metadata,
512  // the name and type information come directly from the catalog.
513  CHECK(in_metainfo.empty());
514  const auto table_desc = scan_source->getTableDescriptor();
515  const auto& catalog = scan_source->getCatalog();
516  const auto cd =
517  catalog.getMetadataForColumnBySpi(table_desc->tableId, rex_input->getIndex() + 1);
518  CHECK(cd);
519  auto col_ti = cd->columnType;
520  if (col_ti.is_string()) {
521  col_ti.set_type(kTEXT);
522  }
523  if (cd->isVirtualCol) {
524  // TODO(alex): remove at some point, we only need this fixup for backwards
525  // compatibility with old imported data
526  CHECK_EQ("rowid", cd->columnName);
527  col_ti.set_size(8);
528  }
529  CHECK_LE(static_cast<size_t>(rte_idx), join_types_.size());
530  if (rte_idx > 0 && join_types_[rte_idx - 1] == JoinType::LEFT) {
531  col_ti.set_notnull(false);
532  }
533  return std::make_shared<Analyzer::ColumnVar>(
534  col_ti,
535  shared::ColumnKey{catalog.getDatabaseId(), table_desc->tableId, cd->columnId},
536  rte_idx);
537  }
538  CHECK(!in_metainfo.empty()) << "for "
539  << source->toString(RelRexToStringConfig::defaults());
540  CHECK_GE(rte_idx, 0);
541  const int32_t col_id = rex_input->getIndex();
542  CHECK_LT(col_id, in_metainfo.size());
543  auto col_ti = in_metainfo[col_id].get_type_info();
544 
545  if (join_types_.size() > 0) {
546  CHECK_LE(static_cast<size_t>(rte_idx), join_types_.size());
547  if (rte_idx > 0 && join_types_[rte_idx - 1] == JoinType::LEFT) {
548  col_ti.set_notnull(false);
549  }
550  }
551 
552  return std::make_shared<Analyzer::ColumnVar>(
553  col_ti, shared::ColumnKey{0, int32_t(-source->getId()), col_id}, rte_idx);
554 }
555 
556 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateUoper(
557  const RexOperator* rex_operator) const {
558  CHECK_EQ(size_t(1), rex_operator->size());
559  const auto operand_expr = translateScalarRex(rex_operator->getOperand(0));
560  const auto sql_op = rex_operator->getOperator();
561  switch (sql_op) {
562  case kCAST: {
563  const auto& target_ti = rex_operator->getType();
564  CHECK_NE(kNULLT, target_ti.get_type());
565  const auto& operand_ti = operand_expr->get_type_info();
566  if (operand_ti.is_string() && target_ti.is_string()) {
567  return operand_expr;
568  }
569  if (target_ti.is_time() ||
570  operand_ti
571  .is_string()) { // TODO(alex): check and unify with the rest of the cases
572  // Do not propogate encoding on small dates
573  return target_ti.is_date_in_days()
574  ? operand_expr->add_cast(SQLTypeInfo(kDATE, false))
575  : operand_expr->add_cast(target_ti);
576  }
577  if (!operand_ti.is_string() && target_ti.is_string()) {
578  return operand_expr->add_cast(target_ti);
579  }
580  return std::make_shared<Analyzer::UOper>(target_ti, false, sql_op, operand_expr);
581  }
582  case kENCODE_TEXT: {
583  const auto& target_ti = rex_operator->getType();
584  CHECK_NE(kNULLT, target_ti.get_type());
585  const auto& operand_ti = operand_expr->get_type_info();
586  CHECK(operand_ti.is_string());
587  if (operand_ti.is_dict_encoded_string()) {
588  // No cast needed
589  return operand_expr;
590  }
591  if (operand_expr->get_num_column_vars(true) == 0UL) {
592  return operand_expr;
593  }
594  if (g_cluster) {
595  throw std::runtime_error(
596  "ENCODE_TEXT is not currently supported in distributed mode at this time.");
597  }
598  SQLTypeInfo casted_target_ti = operand_ti;
599  casted_target_ti.set_type(kTEXT);
600  casted_target_ti.set_compression(kENCODING_DICT);
601  casted_target_ti.set_comp_param(TRANSIENT_DICT_ID);
603  casted_target_ti.set_fixed_size();
604  return makeExpr<Analyzer::UOper>(
605  casted_target_ti, operand_expr->get_contains_agg(), kCAST, operand_expr);
606  }
607  case kNOT:
608  case kISNULL: {
609  return std::make_shared<Analyzer::UOper>(kBOOLEAN, sql_op, operand_expr);
610  }
611  case kISNOTNULL: {
612  auto is_null = std::make_shared<Analyzer::UOper>(kBOOLEAN, kISNULL, operand_expr);
613  return std::make_shared<Analyzer::UOper>(kBOOLEAN, kNOT, is_null);
614  }
615  case kMINUS: {
616  const auto& ti = operand_expr->get_type_info();
617  return std::make_shared<Analyzer::UOper>(ti, false, kUMINUS, operand_expr);
618  }
619  case kUNNEST: {
620  const auto& ti = operand_expr->get_type_info();
621  CHECK(ti.is_array());
622  return makeExpr<Analyzer::UOper>(ti.get_elem_type(), false, kUNNEST, operand_expr);
623  }
624  default:
625  CHECK(false);
626  }
627  return nullptr;
628 }
629 
630 namespace {
631 
632 std::shared_ptr<Analyzer::Expr> get_in_values_expr(std::shared_ptr<Analyzer::Expr> arg,
633  const ResultSet& val_set) {
635  return nullptr;
636  }
637  if (val_set.rowCount() > 5000000 && g_enable_watchdog) {
638  throw std::runtime_error(
639  "Unable to handle 'expr IN (subquery)', subquery returned 5M+ rows.");
640  }
641  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
642  const size_t fetcher_count = cpu_threads();
643  std::vector<std::list<std::shared_ptr<Analyzer::Expr>>> expr_set(
644  fetcher_count, std::list<std::shared_ptr<Analyzer::Expr>>());
645  std::vector<std::future<void>> fetcher_threads;
646  const auto& ti = arg->get_type_info();
647  const auto entry_count = val_set.entryCount();
648  for (size_t i = 0,
649  start_entry = 0,
650  stride = (entry_count + fetcher_count - 1) / fetcher_count;
651  i < fetcher_count && start_entry < entry_count;
652  ++i, start_entry += stride) {
653  const auto end_entry = std::min(start_entry + stride, entry_count);
654  fetcher_threads.push_back(std::async(
656  [&](std::list<std::shared_ptr<Analyzer::Expr>>& in_vals,
657  const size_t start,
658  const size_t end) {
659  for (auto index = start; index < end; ++index) {
660  auto row = val_set.getRowAt(index);
661  if (row.empty()) {
662  continue;
663  }
664  auto scalar_tv = boost::get<ScalarTargetValue>(&row[0]);
665  Datum d{0};
666  bool is_null_const{false};
667  std::tie(d, is_null_const) = datum_from_scalar_tv(scalar_tv, ti);
668  if (ti.is_string() && ti.get_compression() != kENCODING_NONE) {
669  auto ti_none_encoded = ti;
670  ti_none_encoded.set_compression(kENCODING_NONE);
671  auto none_encoded_string =
672  makeExpr<Analyzer::Constant>(ti, is_null_const, d);
673  auto dict_encoded_string = std::make_shared<Analyzer::UOper>(
674  ti, false, kCAST, none_encoded_string);
675  in_vals.push_back(dict_encoded_string);
676  } else {
677  in_vals.push_back(makeExpr<Analyzer::Constant>(ti, is_null_const, d));
678  }
679  }
680  },
681  std::ref(expr_set[i]),
682  start_entry,
683  end_entry));
684  }
685  for (auto& child : fetcher_threads) {
686  child.get();
687  }
688 
689  val_set.moveToBegin();
690  for (auto& exprs : expr_set) {
691  value_exprs.splice(value_exprs.end(), exprs);
692  }
693  return makeExpr<Analyzer::InValues>(arg, value_exprs);
694 }
695 
696 } // namespace
697 
698 // Creates an Analyzer expression for an IN subquery which subsequently goes through the
699 // regular Executor::codegen() mechanism. The creation of the expression out of
700 // subquery's result set is parallelized whenever possible. In addition, take advantage
701 // of additional information that elements in the right hand side are constants; see
702 // getInIntegerSetExpr().
703 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateInOper(
704  const RexOperator* rex_operator) const {
705  if (just_explain_) {
706  throw std::runtime_error("EXPLAIN is not supported with sub-queries");
707  }
708  CHECK(rex_operator->size() == 2);
709  const auto lhs = translateScalarRex(rex_operator->getOperand(0));
710  const auto rhs = rex_operator->getOperand(1);
711  const auto rex_subquery = dynamic_cast<const RexSubQuery*>(rhs);
712  CHECK(rex_subquery);
713  auto ti = lhs->get_type_info();
714  auto result = rex_subquery->getExecutionResult();
715  CHECK(result);
716  auto& row_set = result->getRows();
717  CHECK_EQ(size_t(1), row_set->colCount());
718  const auto& rhs_ti = row_set->getColType(0);
719  if (rhs_ti.get_type() != ti.get_type()) {
720  throw std::runtime_error(
721  "The two sides of the IN operator must have the same type; found " +
722  ti.get_type_name() + " and " + rhs_ti.get_type_name());
723  }
724  row_set->moveToBegin();
725  if (row_set->entryCount() > 10000) {
726  std::shared_ptr<Analyzer::Expr> expr;
727  if ((ti.is_integer() || (ti.is_string() && ti.get_compression() == kENCODING_DICT)) &&
728  !row_set->getQueryMemDesc().didOutputColumnar()) {
729  expr = getInIntegerSetExpr(lhs, *row_set);
730  // Handle the highly unlikely case when the InIntegerSet ended up being tiny.
731  // Just let it fall through the usual InValues path at the end of this method,
732  // its codegen knows to use inline comparisons for few values.
733  if (expr && std::static_pointer_cast<Analyzer::InIntegerSet>(expr)
734  ->get_value_list()
735  .size() <= 100) {
736  expr = nullptr;
737  }
738  } else {
739  expr = get_in_values_expr(lhs, *row_set);
740  }
741  if (expr) {
742  return expr;
743  }
744  }
745  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
746  while (true) {
747  auto row = row_set->getNextRow(true, false);
748  if (row.empty()) {
749  break;
750  }
751  if (g_enable_watchdog && value_exprs.size() >= 10000) {
752  throw std::runtime_error(
753  "Unable to handle 'expr IN (subquery)', subquery returned 10000+ rows.");
754  }
755  auto scalar_tv = boost::get<ScalarTargetValue>(&row[0]);
756  Datum d{0};
757  bool is_null_const{false};
758  std::tie(d, is_null_const) = datum_from_scalar_tv(scalar_tv, ti);
759  if (ti.is_string() && ti.get_compression() != kENCODING_NONE) {
760  auto ti_none_encoded = ti;
761  ti_none_encoded.set_compression(kENCODING_NONE);
762  auto none_encoded_string = makeExpr<Analyzer::Constant>(ti, is_null_const, d);
763  auto dict_encoded_string =
764  std::make_shared<Analyzer::UOper>(ti, false, kCAST, none_encoded_string);
765  value_exprs.push_back(dict_encoded_string);
766  } else {
767  value_exprs.push_back(makeExpr<Analyzer::Constant>(ti, is_null_const, d));
768  }
769  }
770  return makeExpr<Analyzer::InValues>(lhs, value_exprs);
771 }
772 
773 namespace {
774 
775 const size_t g_max_integer_set_size{1 << 25};
776 
778  std::vector<int64_t>& in_vals,
779  std::atomic<size_t>& total_in_vals_count,
780  const ResultSet* values_rowset,
781  const std::pair<int64_t, int64_t> values_rowset_slice,
782  const StringDictionaryProxy* source_dict,
783  const StringDictionaryProxy* dest_dict,
784  const int64_t needle_null_val) {
785  CHECK(in_vals.empty());
786  bool dicts_are_equal = source_dict == dest_dict;
787  for (auto index = values_rowset_slice.first; index < values_rowset_slice.second;
788  ++index) {
789  const auto row = values_rowset->getOneColRow(index);
790  if (UNLIKELY(!row.valid)) {
791  continue;
792  }
793  if (dicts_are_equal) {
794  in_vals.push_back(row.value);
795  } else {
796  const int string_id =
797  row.value == needle_null_val
798  ? needle_null_val
799  : dest_dict->getIdOfString(source_dict->getString(row.value));
800  if (string_id != StringDictionary::INVALID_STR_ID) {
801  in_vals.push_back(string_id);
802  }
803  }
804  if (UNLIKELY(g_enable_watchdog && (in_vals.size() & 1023) == 0 &&
805  total_in_vals_count.fetch_add(1024) >= g_max_integer_set_size)) {
806  throw std::runtime_error(
807  "Unable to handle 'expr IN (subquery)', subquery returned 30M+ rows.");
808  }
809  }
810 }
811 
812 void fill_integer_in_vals(std::vector<int64_t>& in_vals,
813  std::atomic<size_t>& total_in_vals_count,
814  const ResultSet* values_rowset,
815  const std::pair<int64_t, int64_t> values_rowset_slice) {
816  CHECK(in_vals.empty());
817  for (auto index = values_rowset_slice.first; index < values_rowset_slice.second;
818  ++index) {
819  const auto row = values_rowset->getOneColRow(index);
820  if (row.valid) {
821  in_vals.push_back(row.value);
822  if (UNLIKELY(g_enable_watchdog && (in_vals.size() & 1023) == 0 &&
823  total_in_vals_count.fetch_add(1024) >= g_max_integer_set_size)) {
824  throw std::runtime_error(
825  "Unable to handle 'expr IN (subquery)', subquery returned 30M+ rows.");
826  }
827  }
828  }
829 }
830 
831 // Multi-node counterpart of the other version. Saves round-trips, which is crucial
832 // for a big right-hand side result. It only handles physical string dictionary ids,
833 // therefore it won't be able to handle a right-hand side sub-query with a CASE
834 // returning literals on some branches. That case isn't hard too handle either, but
835 // it's not clear it's actually important in practice.
836 // RelAlgTranslator::getInIntegerSetExpr makes sure, by checking the encodings, that
837 // this function isn't called in such cases.
839  std::vector<int64_t>& in_vals,
840  std::atomic<size_t>& total_in_vals_count,
841  const ResultSet* values_rowset,
842  const std::pair<int64_t, int64_t> values_rowset_slice,
843  const std::vector<LeafHostInfo>& leaf_hosts,
844  const DictRef source_dict_ref,
845  const DictRef dest_dict_ref,
846  const int32_t dest_generation,
847  const int64_t needle_null_val) {
848  CHECK(in_vals.empty());
849  std::vector<int32_t> source_ids;
850  source_ids.reserve(values_rowset->entryCount());
851  bool has_nulls = false;
852  if (source_dict_ref == dest_dict_ref) {
853  in_vals.reserve(values_rowset_slice.second - values_rowset_slice.first +
854  1); // Add 1 to cover interval
855  for (auto index = values_rowset_slice.first; index < values_rowset_slice.second;
856  ++index) {
857  const auto row = values_rowset->getOneColRow(index);
858  if (!row.valid) {
859  continue;
860  }
861  if (row.value != needle_null_val) {
862  in_vals.push_back(row.value);
863  if (UNLIKELY(g_enable_watchdog && (in_vals.size() & 1023) == 0 &&
864  total_in_vals_count.fetch_add(1024) >= g_max_integer_set_size)) {
865  throw std::runtime_error(
866  "Unable to handle 'expr IN (subquery)', subquery returned 30M+ rows.");
867  }
868  } else {
869  has_nulls = true;
870  }
871  }
872  if (has_nulls) {
873  in_vals.push_back(
874  needle_null_val); // we've deduped null values as an optimization, although
875  // this is not required by consumer
876  }
877  return;
878  }
879  // Code path below is for when dictionaries are not shared
880  for (auto index = values_rowset_slice.first; index < values_rowset_slice.second;
881  ++index) {
882  const auto row = values_rowset->getOneColRow(index);
883  if (row.valid) {
884  if (row.value != needle_null_val) {
885  source_ids.push_back(row.value);
886  } else {
887  has_nulls = true;
888  }
889  }
890  }
891  std::vector<int32_t> dest_ids;
892  translate_string_ids(dest_ids,
893  leaf_hosts.front(),
894  dest_dict_ref,
895  source_ids,
896  source_dict_ref,
897  dest_generation);
898  CHECK_EQ(dest_ids.size(), source_ids.size());
899  in_vals.reserve(dest_ids.size() + (has_nulls ? 1 : 0));
900  if (has_nulls) {
901  in_vals.push_back(needle_null_val);
902  }
903  for (const int32_t dest_id : dest_ids) {
904  if (dest_id != StringDictionary::INVALID_STR_ID) {
905  in_vals.push_back(dest_id);
906  if (UNLIKELY(g_enable_watchdog && (in_vals.size() & 1023) == 0 &&
907  total_in_vals_count.fetch_add(1024) >= g_max_integer_set_size)) {
908  throw std::runtime_error(
909  "Unable to handle 'expr IN (subquery)', subquery returned 30M+ rows.");
910  }
911  }
912  }
913 }
914 
915 } // namespace
916 
917 // The typical IN subquery involves either dictionary-encoded strings or integers.
918 // Analyzer::InValues is a very heavy representation of the right hand side of such
919 // a query since we already know the right hand would be a list of Analyzer::Constant
920 // shared pointers. We can avoid the big overhead of each Analyzer::Constant and the
921 // refcounting associated with shared pointers by creating an abbreviated InIntegerSet
922 // representation of the IN expression which takes advantage of the this information.
923 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::getInIntegerSetExpr(
924  std::shared_ptr<Analyzer::Expr> arg,
925  const ResultSet& val_set) const {
927  return nullptr;
928  }
929  std::vector<int64_t> value_exprs;
930  const size_t fetcher_count = cpu_threads();
931  std::vector<std::vector<int64_t>> expr_set(fetcher_count);
932  std::vector<std::future<void>> fetcher_threads;
933  const auto& arg_type = arg->get_type_info();
934  const auto entry_count = val_set.entryCount();
935  CHECK_EQ(size_t(1), val_set.colCount());
936  const auto& col_type = val_set.getColType(0);
937  if (g_cluster && arg_type.is_string() &&
938  (col_type.get_comp_param() <= 0 || arg_type.get_comp_param() <= 0)) {
939  // Skip this case for now, see comment for fill_dictionary_encoded_in_vals.
940  return nullptr;
941  }
942  std::atomic<size_t> total_in_vals_count{0};
943  for (size_t i = 0,
944  start_entry = 0,
945  stride = (entry_count + fetcher_count - 1) / fetcher_count;
946  i < fetcher_count && start_entry < entry_count;
947  ++i, start_entry += stride) {
948  expr_set[i].reserve(entry_count / fetcher_count);
949  const auto end_entry = std::min(start_entry + stride, entry_count);
950  if (arg_type.is_string()) {
951  CHECK_EQ(kENCODING_DICT, arg_type.get_compression());
952  auto col_expr = dynamic_cast<const Analyzer::ColumnVar*>(arg.get());
953  CHECK(col_expr);
954  const auto& dest_dict_key = arg_type.getStringDictKey();
955  const auto& source_dict_key = col_type.getStringDictKey();
956  const auto dd = executor_->getStringDictionaryProxy(
957  arg_type.getStringDictKey(), val_set.getRowSetMemOwner(), true);
958  const auto sd = executor_->getStringDictionaryProxy(
959  col_type.getStringDictKey(), val_set.getRowSetMemOwner(), true);
960  CHECK(sd);
961  const auto needle_null_val = inline_int_null_val(arg_type);
963  col_expr->getColumnKey().db_id);
964  CHECK(catalog);
965  fetcher_threads.push_back(std::async(
967  [&val_set,
968  &total_in_vals_count,
969  sd,
970  dd,
971  &source_dict_key,
972  &dest_dict_key,
973  needle_null_val,
974  catalog](std::vector<int64_t>& in_vals, const size_t start, const size_t end) {
975  if (g_cluster) {
976  CHECK_GE(dd->getGeneration(), 0);
978  in_vals,
979  total_in_vals_count,
980  &val_set,
981  {start, end},
982  catalog->getStringDictionaryHosts(),
983  {source_dict_key.db_id, source_dict_key.dict_id},
984  {dest_dict_key.db_id, dest_dict_key.dict_id},
985  dd->getGeneration(),
986  needle_null_val);
987  } else {
989  total_in_vals_count,
990  &val_set,
991  {start, end},
992  sd,
993  dd,
994  needle_null_val);
995  }
996  },
997  std::ref(expr_set[i]),
998  start_entry,
999  end_entry));
1000  } else {
1001  CHECK(arg_type.is_integer());
1002  fetcher_threads.push_back(std::async(
1004  [&val_set, &total_in_vals_count](
1005  std::vector<int64_t>& in_vals, const size_t start, const size_t end) {
1006  fill_integer_in_vals(in_vals, total_in_vals_count, &val_set, {start, end});
1007  },
1008  std::ref(expr_set[i]),
1009  start_entry,
1010  end_entry));
1011  }
1012  }
1013  for (auto& child : fetcher_threads) {
1014  child.get();
1015  }
1016 
1017  val_set.moveToBegin();
1018  value_exprs.reserve(entry_count);
1019  for (auto& exprs : expr_set) {
1020  value_exprs.insert(value_exprs.end(), exprs.begin(), exprs.end());
1021  }
1022  return makeExpr<Analyzer::InIntegerSet>(
1023  arg, value_exprs, arg_type.get_notnull() && col_type.get_notnull());
1024 }
1025 
1026 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateOper(
1027  const RexOperator* rex_operator) const {
1028  CHECK_GT(rex_operator->size(), size_t(0));
1029  if (rex_operator->size() == 1) {
1030  return translateUoper(rex_operator);
1031  }
1032  const auto sql_op = rex_operator->getOperator();
1033  if (sql_op == kIN) {
1034  return translateInOper(rex_operator);
1035  }
1036  if (sql_op == kMINUS || sql_op == kPLUS) {
1037  auto date_plus_minus = translateDatePlusMinus(rex_operator);
1038  if (date_plus_minus) {
1039  return date_plus_minus;
1040  }
1041  }
1042  if (sql_op == kBBOX_INTERSECT) {
1043  return translateBoundingBoxIntersectOper(rex_operator);
1044  } else if (IS_COMPARISON(sql_op)) {
1045  auto geo_comp = translateGeoComparison(rex_operator);
1046  if (geo_comp) {
1047  return geo_comp;
1048  }
1049  }
1050  auto lhs = translateScalarRex(rex_operator->getOperand(0));
1051  for (size_t i = 1; i < rex_operator->size(); ++i) {
1052  std::shared_ptr<Analyzer::Expr> rhs;
1053  SQLQualifier sql_qual{kONE};
1054  const auto rhs_op = rex_operator->getOperand(i);
1055  std::tie(rhs, sql_qual) = getQuantifiedRhs(rhs_op);
1056  if (!rhs) {
1057  rhs = translateScalarRex(rhs_op);
1058  }
1059  CHECK(rhs);
1060 
1061  // Pass in executor to get string proxy info if cast needed between
1062  // string columns
1063  lhs = Parser::OperExpr::normalize(sql_op, sql_qual, lhs, rhs, executor_);
1064  }
1065  return lhs;
1066 }
1067 
1069  const RexOperator* rex_operator) const {
1070  const auto sql_op = rex_operator->getOperator();
1071  CHECK(sql_op == kBBOX_INTERSECT);
1072 
1073  const auto lhs = translateScalarRex(rex_operator->getOperand(0));
1074  const auto lhs_ti = lhs->get_type_info();
1075  if (lhs_ti.is_geometry()) {
1076  return translateGeoBoundingBoxIntersectOper(rex_operator);
1077  } else {
1078  throw std::runtime_error(
1079  "Bounding Box Intersection equivalence is currently only supported for "
1080  "geospatial types");
1081  }
1082 }
1083 
1084 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateCase(
1085  const RexCase* rex_case) const {
1086  std::shared_ptr<Analyzer::Expr> else_expr;
1087  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
1088  expr_list;
1089  for (size_t i = 0; i < rex_case->branchCount(); ++i) {
1090  const auto when_expr = translateScalarRex(rex_case->getWhen(i));
1091  const auto then_expr = translateScalarRex(rex_case->getThen(i));
1092  expr_list.emplace_back(when_expr, then_expr);
1093  }
1094  if (rex_case->getElse()) {
1095  else_expr = translateScalarRex(rex_case->getElse());
1096  }
1097  return Parser::CaseExpr::normalize(expr_list, else_expr, executor_);
1098 }
1099 
1100 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateMLPredict(
1101  const RexFunctionOperator* rex_function) const {
1102  const auto num_operands = rex_function->size();
1103  CHECK_GE(num_operands, 2UL);
1104  auto model_value = translateScalarRex(rex_function->getOperand(0));
1105  std::vector<std::shared_ptr<Analyzer::Expr>> regressor_values;
1106  for (size_t regressor_idx = 1; regressor_idx < num_operands; ++regressor_idx) {
1107  regressor_values.emplace_back(
1108  translateScalarRex(rex_function->getOperand(regressor_idx)));
1109  }
1110  return makeExpr<Analyzer::MLPredictExpr>(model_value, regressor_values);
1111 }
1112 
1113 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translatePCAProject(
1114  const RexFunctionOperator* rex_function) const {
1115  const auto num_operands = rex_function->size();
1116  CHECK_GE(num_operands, 3UL);
1117  auto model_value = translateScalarRex(rex_function->getOperand(0));
1118  std::vector<std::shared_ptr<Analyzer::Expr>> feature_values;
1119  for (size_t feature_idx = 1; feature_idx < num_operands - 1; ++feature_idx) {
1120  feature_values.emplace_back(
1121  translateScalarRex(rex_function->getOperand(feature_idx)));
1122  }
1123  auto pc_dimension_value =
1124  translateScalarRex(rex_function->getOperand(num_operands - 1));
1125  return makeExpr<Analyzer::PCAProjectExpr>(
1126  model_value, feature_values, pc_dimension_value);
1127 }
1128 
1129 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateWidthBucket(
1130  const RexFunctionOperator* rex_function) const {
1131  CHECK(rex_function->size() == 4);
1132  auto target_value = translateScalarRex(rex_function->getOperand(0));
1133  auto lower_bound = translateScalarRex(rex_function->getOperand(1));
1134  auto upper_bound = translateScalarRex(rex_function->getOperand(2));
1135  auto partition_count = translateScalarRex(rex_function->getOperand(3));
1136  if (!partition_count->get_type_info().is_integer()) {
1137  throw std::runtime_error(
1138  "PARTITION_COUNT expression of width_bucket function expects an integer type.");
1139  }
1140  auto check_numeric_type =
1141  [](const std::string& col_name, const Analyzer::Expr* expr, bool allow_null_type) {
1142  if (expr->get_type_info().get_type() == kNULLT) {
1143  if (!allow_null_type) {
1144  throw std::runtime_error(
1145  col_name + " expression of width_bucket function expects non-null type.");
1146  }
1147  return;
1148  }
1149  if (!expr->get_type_info().is_number()) {
1150  throw std::runtime_error(
1151  col_name + " expression of width_bucket function expects a numeric type.");
1152  }
1153  };
1154  // target value may have null value
1155  check_numeric_type("TARGET_VALUE", target_value.get(), true);
1156  check_numeric_type("LOWER_BOUND", lower_bound.get(), false);
1157  check_numeric_type("UPPER_BOUND", upper_bound.get(), false);
1158 
1159  auto cast_to_double_if_necessary = [](std::shared_ptr<Analyzer::Expr> arg) {
1160  const auto& arg_ti = arg->get_type_info();
1161  if (arg_ti.get_type() != kDOUBLE) {
1162  const auto& double_ti = SQLTypeInfo(kDOUBLE, arg_ti.get_notnull());
1163  return arg->add_cast(double_ti);
1164  }
1165  return arg;
1166  };
1167  target_value = cast_to_double_if_necessary(target_value);
1168  lower_bound = cast_to_double_if_necessary(lower_bound);
1169  upper_bound = cast_to_double_if_necessary(upper_bound);
1170  return makeExpr<Analyzer::WidthBucketExpr>(
1171  target_value, lower_bound, upper_bound, partition_count);
1172 }
1173 
1174 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateLike(
1175  const RexFunctionOperator* rex_function) const {
1176  CHECK(rex_function->size() == 2 || rex_function->size() == 3);
1177  const auto arg = translateScalarRex(rex_function->getOperand(0));
1178  const auto like = translateScalarRex(rex_function->getOperand(1));
1179  if (!std::dynamic_pointer_cast<const Analyzer::Constant>(like)) {
1180  throw std::runtime_error("The matching pattern must be a literal.");
1181  }
1182  const auto escape = (rex_function->size() == 3)
1183  ? translateScalarRex(rex_function->getOperand(2))
1184  : nullptr;
1185  const bool is_ilike = rex_function->getName() == "PG_ILIKE"sv;
1186  return Parser::LikeExpr::get(arg, like, escape, is_ilike, false);
1187 }
1188 
1189 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateRegexp(
1190  const RexFunctionOperator* rex_function) const {
1191  CHECK(rex_function->size() == 2 || rex_function->size() == 3);
1192  const auto arg = translateScalarRex(rex_function->getOperand(0));
1193  const auto pattern = translateScalarRex(rex_function->getOperand(1));
1194  if (!std::dynamic_pointer_cast<const Analyzer::Constant>(pattern)) {
1195  throw std::runtime_error("The matching pattern must be a literal.");
1196  }
1197  const auto escape = (rex_function->size() == 3)
1198  ? translateScalarRex(rex_function->getOperand(2))
1199  : nullptr;
1200  return Parser::RegexpExpr::get(arg, pattern, escape, false);
1201 }
1202 
1203 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateLikely(
1204  const RexFunctionOperator* rex_function) const {
1205  CHECK(rex_function->size() == 1);
1206  const auto arg = translateScalarRex(rex_function->getOperand(0));
1207  return makeExpr<Analyzer::LikelihoodExpr>(arg, 0.9375);
1208 }
1209 
1210 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateUnlikely(
1211  const RexFunctionOperator* rex_function) const {
1212  CHECK(rex_function->size() == 1);
1213  const auto arg = translateScalarRex(rex_function->getOperand(0));
1214  return makeExpr<Analyzer::LikelihoodExpr>(arg, 0.0625);
1215 }
1216 
1217 namespace {
1218 
1220  const std::shared_ptr<Analyzer::Constant> literal_expr) {
1221  if (!literal_expr || literal_expr->get_is_null()) {
1222  throw std::runtime_error("The 'DatePart' argument must be a not 'null' literal.");
1223  }
1224 }
1225 
1226 } // namespace
1227 
1228 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateExtract(
1229  const RexFunctionOperator* rex_function) const {
1230  CHECK_EQ(size_t(2), rex_function->size());
1231  const auto timeunit = translateScalarRex(rex_function->getOperand(0));
1232  const auto timeunit_lit = std::dynamic_pointer_cast<Analyzer::Constant>(timeunit);
1234  const auto from_expr = translateScalarRex(rex_function->getOperand(1));
1235  const bool is_date_trunc = rex_function->getName() == "PG_DATE_TRUNC"sv;
1236  if (is_date_trunc) {
1237  return DateTruncExpr::generate(from_expr, *timeunit_lit->get_constval().stringval);
1238  } else {
1239  return ExtractExpr::generate(from_expr, *timeunit_lit->get_constval().stringval);
1240  }
1241 }
1242 
1243 namespace {
1244 
1245 std::shared_ptr<Analyzer::Constant> makeNumericConstant(const SQLTypeInfo& ti,
1246  const long val) {
1247  CHECK(ti.is_number());
1248  Datum datum{0};
1249  switch (ti.get_type()) {
1250  case kTINYINT: {
1251  datum.tinyintval = val;
1252  break;
1253  }
1254  case kSMALLINT: {
1255  datum.smallintval = val;
1256  break;
1257  }
1258  case kINT: {
1259  datum.intval = val;
1260  break;
1261  }
1262  case kBIGINT: {
1263  datum.bigintval = val;
1264  break;
1265  }
1266  case kDECIMAL:
1267  case kNUMERIC: {
1268  datum.bigintval = val * exp_to_scale(ti.get_scale());
1269  break;
1270  }
1271  case kFLOAT: {
1272  datum.floatval = val;
1273  break;
1274  }
1275  case kDOUBLE: {
1276  datum.doubleval = val;
1277  break;
1278  }
1279  default:
1280  CHECK(false);
1281  }
1282  return makeExpr<Analyzer::Constant>(ti, false, datum);
1283 }
1284 
1285 } // namespace
1286 
1287 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateDateadd(
1288  const RexFunctionOperator* rex_function) const {
1289  CHECK_EQ(size_t(3), rex_function->size());
1290  const auto timeunit = translateScalarRex(rex_function->getOperand(0));
1291  const auto timeunit_lit = std::dynamic_pointer_cast<Analyzer::Constant>(timeunit);
1293  const auto number_units = translateScalarRex(rex_function->getOperand(1));
1294  const auto number_units_const =
1295  std::dynamic_pointer_cast<Analyzer::Constant>(number_units);
1296  if (number_units_const && number_units_const->get_is_null()) {
1297  throw std::runtime_error("The 'Interval' argument literal must not be 'null'.");
1298  }
1299  const auto cast_number_units = number_units->add_cast(SQLTypeInfo(kBIGINT, false));
1300  const auto datetime = translateScalarRex(rex_function->getOperand(2));
1301  const auto& datetime_ti = datetime->get_type_info();
1302  if (datetime_ti.get_type() == kTIME) {
1303  throw std::runtime_error("DateAdd operation not supported for TIME.");
1304  }
1305  const auto& field = to_dateadd_field(*timeunit_lit->get_constval().stringval);
1306  const int dim = datetime_ti.get_dimension();
1307  return makeExpr<Analyzer::DateaddExpr>(
1308  SQLTypeInfo(kTIMESTAMP, dim, 0, false), field, cast_number_units, datetime);
1309 }
1310 
1311 namespace {
1312 
1314  CHECK(op == kPLUS);
1315  return "DATETIME_PLUS"s;
1316 }
1317 
1318 } // namespace
1319 
1320 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateDatePlusMinus(
1321  const RexOperator* rex_operator) const {
1322  if (rex_operator->size() != 2) {
1323  return nullptr;
1324  }
1325  const auto datetime = translateScalarRex(rex_operator->getOperand(0));
1326  const auto datetime_ti = datetime->get_type_info();
1327  if (!datetime_ti.is_timestamp() && !datetime_ti.is_date()) {
1328  if (datetime_ti.get_type() == kTIME) {
1329  throw std::runtime_error("DateTime addition/subtraction not supported for TIME.");
1330  }
1331  return nullptr;
1332  }
1333  const auto rhs = translateScalarRex(rex_operator->getOperand(1));
1334  const auto rhs_ti = rhs->get_type_info();
1335  if (rhs_ti.get_type() == kTIMESTAMP || rhs_ti.get_type() == kDATE) {
1336  if (datetime_ti.is_high_precision_timestamp() ||
1337  rhs_ti.is_high_precision_timestamp()) {
1338  throw std::runtime_error(
1339  "High Precision timestamps are not supported for TIMESTAMPDIFF operation. "
1340  "Use "
1341  "DATEDIFF.");
1342  }
1343  auto bigint_ti = SQLTypeInfo(kBIGINT, false);
1344  const auto& rex_operator_ti = rex_operator->getType();
1345  const auto datediff_field =
1346  (rex_operator_ti.get_type() == kINTERVAL_DAY_TIME) ? dtSECOND : dtMONTH;
1347  auto result =
1348  makeExpr<Analyzer::DatediffExpr>(bigint_ti, datediff_field, rhs, datetime);
1349  // multiply 1000 to result since expected result should be in millisecond precision.
1350  if (rex_operator_ti.get_type() == kINTERVAL_DAY_TIME) {
1351  return makeExpr<Analyzer::BinOper>(bigint_ti.get_type(),
1352  kMULTIPLY,
1353  kONE,
1354  result,
1355  makeNumericConstant(bigint_ti, 1000));
1356  } else {
1357  return result;
1358  }
1359  }
1360  const auto op = rex_operator->getOperator();
1361  if (op == kPLUS) {
1362  std::vector<std::shared_ptr<Analyzer::Expr>> args = {datetime, rhs};
1363  auto dt_plus = makeExpr<Analyzer::FunctionOper>(
1364  datetime_ti, get_datetimeplus_rewrite_funcname(op), args);
1365  const auto date_trunc = rewrite_to_date_trunc(dt_plus.get());
1366  if (date_trunc) {
1367  return date_trunc;
1368  }
1369  }
1370  const auto interval = fold_expr(rhs.get());
1371  auto interval_ti = interval->get_type_info();
1372  auto bigint_ti = SQLTypeInfo(kBIGINT, false);
1373  const auto interval_lit = std::dynamic_pointer_cast<Analyzer::Constant>(interval);
1374  if (interval_ti.get_type() == kINTERVAL_DAY_TIME) {
1375  std::shared_ptr<Analyzer::Expr> interval_sec;
1376  if (interval_lit) {
1377  interval_sec =
1378  makeNumericConstant(bigint_ti,
1379  (op == kMINUS ? -interval_lit->get_constval().bigintval
1380  : interval_lit->get_constval().bigintval) /
1381  1000);
1382  } else {
1383  interval_sec = makeExpr<Analyzer::BinOper>(bigint_ti.get_type(),
1384  kDIVIDE,
1385  kONE,
1386  interval,
1387  makeNumericConstant(bigint_ti, 1000));
1388  if (op == kMINUS) {
1389  interval_sec =
1390  std::make_shared<Analyzer::UOper>(bigint_ti, false, kUMINUS, interval_sec);
1391  }
1392  }
1393  return makeExpr<Analyzer::DateaddExpr>(datetime_ti, daSECOND, interval_sec, datetime);
1394  }
1395  CHECK(interval_ti.get_type() == kINTERVAL_YEAR_MONTH);
1396  const auto interval_months = op == kMINUS ? std::make_shared<Analyzer::UOper>(
1397  bigint_ti, false, kUMINUS, interval)
1398  : interval;
1399  return makeExpr<Analyzer::DateaddExpr>(datetime_ti, daMONTH, interval_months, datetime);
1400 }
1401 
1402 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateDatediff(
1403  const RexFunctionOperator* rex_function) const {
1404  CHECK_EQ(size_t(3), rex_function->size());
1405  const auto timeunit = translateScalarRex(rex_function->getOperand(0));
1406  const auto timeunit_lit = std::dynamic_pointer_cast<Analyzer::Constant>(timeunit);
1408  const auto start = translateScalarRex(rex_function->getOperand(1));
1409  const auto end = translateScalarRex(rex_function->getOperand(2));
1410  const auto field = to_datediff_field(*timeunit_lit->get_constval().stringval);
1411  return makeExpr<Analyzer::DatediffExpr>(SQLTypeInfo(kBIGINT, false), field, start, end);
1412 }
1413 
1414 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateDatepart(
1415  const RexFunctionOperator* rex_function) const {
1416  CHECK_EQ(size_t(2), rex_function->size());
1417  const auto timeunit = translateScalarRex(rex_function->getOperand(0));
1418  const auto timeunit_lit = std::dynamic_pointer_cast<Analyzer::Constant>(timeunit);
1420  const auto from_expr = translateScalarRex(rex_function->getOperand(1));
1421  return ExtractExpr::generate(
1422  from_expr, to_datepart_field(*timeunit_lit->get_constval().stringval));
1423 }
1424 
1425 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateLength(
1426  const RexFunctionOperator* rex_function) const {
1427  CHECK_EQ(size_t(1), rex_function->size());
1428  const auto str_arg = translateScalarRex(rex_function->getOperand(0));
1429  return makeExpr<Analyzer::CharLengthExpr>(str_arg->decompress(),
1430  rex_function->getName() == "CHAR_LENGTH"sv);
1431 }
1432 
1433 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateKeyForString(
1434  const RexFunctionOperator* rex_function) const {
1435  const auto& args = translateFunctionArgs(rex_function);
1436  CHECK_EQ(size_t(1), args.size());
1437  const auto expr = dynamic_cast<Analyzer::Expr*>(args[0].get());
1438  if (nullptr == expr || !expr->get_type_info().is_string() ||
1439  expr->get_type_info().is_varlen()) {
1440  throw std::runtime_error(rex_function->getName() +
1441  " expects a dictionary encoded text column.");
1442  }
1443  auto unnest_arg = dynamic_cast<Analyzer::UOper*>(expr);
1444  if (unnest_arg && unnest_arg->get_optype() == SQLOps::kUNNEST) {
1445  throw std::runtime_error(
1446  rex_function->getName() +
1447  " does not support unnest operator as its input expression.");
1448  }
1449  return makeExpr<Analyzer::KeyForStringExpr>(args[0]);
1450 }
1451 
1452 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateSampleRatio(
1453  const RexFunctionOperator* rex_function) const {
1454  CHECK_EQ(size_t(1), rex_function->size());
1455  auto arg = translateScalarRex(rex_function->getOperand(0));
1456  const auto& arg_ti = arg->get_type_info();
1457  if (arg_ti.get_type() != kDOUBLE) {
1458  const auto& double_ti = SQLTypeInfo(kDOUBLE, arg_ti.get_notnull());
1459  arg = arg->add_cast(double_ti);
1460  }
1461  return makeExpr<Analyzer::SampleRatioExpr>(arg);
1462 }
1463 
1464 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateCurrentUser(
1465  const RexFunctionOperator* rex_function) const {
1466  std::string user{"SESSIONLESS_USER"};
1467  if (query_state_) {
1468  user = query_state_->getConstSessionInfo()->get_currentUser().userName;
1469  }
1470  return Parser::UserLiteral::get(user);
1471 }
1472 
1473 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateStringOper(
1474  const RexFunctionOperator* rex_function) const {
1475  const auto func_name = rex_function->getName();
1477  std::ostringstream oss;
1478  oss << "Function " << func_name << " not supported.";
1479  throw std::runtime_error(oss.str());
1480  }
1481  const auto string_op_kind = ::name_to_string_op_kind(func_name);
1482  auto args = translateFunctionArgs(rex_function);
1483 
1484  switch (string_op_kind) {
1486  return makeExpr<Analyzer::LowerStringOper>(args);
1488  return makeExpr<Analyzer::UpperStringOper>(args);
1490  return makeExpr<Analyzer::InitCapStringOper>(args);
1492  return makeExpr<Analyzer::ReverseStringOper>(args);
1494  return makeExpr<Analyzer::RepeatStringOper>(args);
1496  return makeExpr<Analyzer::ConcatStringOper>(args);
1497  case SqlStringOpKind::LPAD:
1498  case SqlStringOpKind::RPAD: {
1499  return makeExpr<Analyzer::PadStringOper>(string_op_kind, args);
1500  }
1501  case SqlStringOpKind::TRIM:
1503  case SqlStringOpKind::RTRIM: {
1504  return makeExpr<Analyzer::TrimStringOper>(string_op_kind, args);
1505  }
1507  return makeExpr<Analyzer::SubstringStringOper>(args);
1509  return makeExpr<Analyzer::OverlayStringOper>(args);
1511  return makeExpr<Analyzer::ReplaceStringOper>(args);
1513  return makeExpr<Analyzer::SplitPartStringOper>(args);
1515  return makeExpr<Analyzer::RegexpReplaceStringOper>(args);
1517  return makeExpr<Analyzer::RegexpSubstrStringOper>(args);
1519  return makeExpr<Analyzer::JsonValueStringOper>(args);
1521  return makeExpr<Analyzer::Base64EncodeStringOper>(args);
1523  return makeExpr<Analyzer::Base64DecodeStringOper>(args);
1525  return makeExpr<Analyzer::TryStringCastOper>(rex_function->getType(), args);
1527  return makeExpr<Analyzer::PositionStringOper>(args);
1529  return makeExpr<Analyzer::JarowinklerSimilarityStringOper>(args);
1531  return makeExpr<Analyzer::LevenshteinDistanceStringOper>(args);
1532  default: {
1533  throw std::runtime_error("Unsupported string function.");
1534  }
1535  }
1536 }
1537 
1539  const RexFunctionOperator* rex_function) const {
1540  const auto ret_ti = rex_function->getType();
1541  const auto arg = translateScalarRex(rex_function->getOperand(0));
1542  const auto arg_ti = arg->get_type_info();
1543  if (!arg_ti.is_array()) {
1544  throw std::runtime_error(rex_function->getName() + " expects an array expression.");
1545  }
1546  if (arg_ti.get_subtype() == kARRAY) {
1547  throw std::runtime_error(rex_function->getName() +
1548  " expects one-dimension array expression.");
1549  }
1550  const auto array_size = arg_ti.get_size();
1551  const auto array_elem_size = arg_ti.get_elem_type().get_array_context_logical_size();
1552 
1553  if (array_size > 0) {
1554  if (array_elem_size <= 0) {
1555  throw std::runtime_error(rex_function->getName() +
1556  ": unexpected array element type.");
1557  }
1558  // Return cardinality of a fixed length array
1559  return makeNumericConstant(ret_ti, array_size / array_elem_size);
1560  }
1561  // Variable length array cardinality will be calculated at runtime
1562  return makeExpr<Analyzer::CardinalityExpr>(arg);
1563 }
1564 
1565 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateItem(
1566  const RexFunctionOperator* rex_function) const {
1567  CHECK_EQ(size_t(2), rex_function->size());
1568  const auto base = translateScalarRex(rex_function->getOperand(0));
1569  const auto index = translateScalarRex(rex_function->getOperand(1));
1570  return makeExpr<Analyzer::BinOper>(
1571  base->get_type_info().get_elem_type(), false, kARRAY_AT, kONE, base, index);
1572 }
1573 
1574 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateCurrentDate() const {
1575  constexpr bool is_null = false;
1576  Datum datum;
1577  datum.bigintval = now_ - now_ % (24 * 60 * 60); // Assumes 0 < now_.
1578  return makeExpr<Analyzer::Constant>(kDATE, is_null, datum);
1579 }
1580 
1581 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateCurrentTime() const {
1582  constexpr bool is_null = false;
1583  Datum datum;
1584  datum.bigintval = now_ % (24 * 60 * 60); // Assumes 0 < now_.
1585  return makeExpr<Analyzer::Constant>(kTIME, is_null, datum);
1586 }
1587 
1588 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateCurrentTimestamp() const {
1590 }
1591 
1592 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateDatetime(
1593  const RexFunctionOperator* rex_function) const {
1594  CHECK_EQ(size_t(1), rex_function->size());
1595  const auto arg = translateScalarRex(rex_function->getOperand(0));
1596  const auto arg_lit = std::dynamic_pointer_cast<Analyzer::Constant>(arg);
1597  const std::string datetime_err{R"(Only DATETIME('NOW') supported for now.)"};
1598  if (!arg_lit || arg_lit->get_is_null()) {
1599  throw std::runtime_error(datetime_err);
1600  }
1601  CHECK(arg_lit->get_type_info().is_string());
1602  if (*arg_lit->get_constval().stringval != "NOW"sv) {
1603  throw std::runtime_error(datetime_err);
1604  }
1605  return translateCurrentTimestamp();
1606 }
1607 
1608 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateAbs(
1609  const RexFunctionOperator* rex_function) const {
1610  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
1611  expr_list;
1612  CHECK_EQ(size_t(1), rex_function->size());
1613  const auto operand = translateScalarRex(rex_function->getOperand(0));
1614  const auto& operand_ti = operand->get_type_info();
1615  CHECK(operand_ti.is_number());
1616  const auto zero = makeNumericConstant(operand_ti, 0);
1617  const auto lt_zero = makeExpr<Analyzer::BinOper>(kBOOLEAN, kLT, kONE, operand, zero);
1618  const auto uminus_operand =
1619  makeExpr<Analyzer::UOper>(operand_ti.get_type(), kUMINUS, operand);
1620  expr_list.emplace_back(lt_zero, uminus_operand);
1621  return makeExpr<Analyzer::CaseExpr>(operand_ti, false, expr_list, operand);
1622 }
1623 
1624 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateSign(
1625  const RexFunctionOperator* rex_function) const {
1626  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
1627  expr_list;
1628  CHECK_EQ(size_t(1), rex_function->size());
1629  const auto operand = translateScalarRex(rex_function->getOperand(0));
1630  const auto& operand_ti = operand->get_type_info();
1631  CHECK(operand_ti.is_number());
1632  const auto zero = makeNumericConstant(operand_ti, 0);
1633  const auto lt_zero = makeExpr<Analyzer::BinOper>(kBOOLEAN, kLT, kONE, operand, zero);
1634  expr_list.emplace_back(lt_zero, makeNumericConstant(operand_ti, -1));
1635  const auto eq_zero = makeExpr<Analyzer::BinOper>(kBOOLEAN, kEQ, kONE, operand, zero);
1636  expr_list.emplace_back(eq_zero, makeNumericConstant(operand_ti, 0));
1637  const auto gt_zero = makeExpr<Analyzer::BinOper>(kBOOLEAN, kGT, kONE, operand, zero);
1638  expr_list.emplace_back(gt_zero, makeNumericConstant(operand_ti, 1));
1639  return makeExpr<Analyzer::CaseExpr>(
1640  operand_ti,
1641  false,
1642  expr_list,
1643  makeExpr<Analyzer::Constant>(operand_ti, true, Datum{0}));
1644 }
1645 
1646 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateOffsetInFragment() const {
1647  return makeExpr<Analyzer::OffsetInFragment>();
1648 }
1649 
1651  const RexFunctionOperator* rex_function) const {
1652  if (rex_function->getType().get_subtype() == kNULLT) {
1653  auto sql_type = rex_function->getType();
1654  CHECK(sql_type.get_type() == kARRAY);
1655 
1656  // FIX-ME: Deal with NULL arrays
1657  auto translated_function_args(translateFunctionArgs(rex_function));
1658  if (translated_function_args.size() > 0) {
1659  const auto first_element_logical_type =
1660  get_nullable_logical_type_info(translated_function_args[0]->get_type_info());
1661 
1662  auto diff_elem_itr =
1663  std::find_if(translated_function_args.begin(),
1664  translated_function_args.end(),
1665  [first_element_logical_type](const auto expr) {
1666  const auto element_logical_type =
1667  get_nullable_logical_type_info(expr->get_type_info());
1668  if (first_element_logical_type != element_logical_type) {
1669  if (first_element_logical_type.is_none_encoded_string() &&
1670  element_logical_type.is_none_encoded_string()) {
1671  return false;
1672  }
1673  return true;
1674  }
1675  return false;
1676  });
1677  if (diff_elem_itr != translated_function_args.end()) {
1678  throw std::runtime_error(
1679  "Element " +
1680  std::to_string(diff_elem_itr - translated_function_args.begin()) +
1681  " is not of the same type as other elements of the array. Consider casting "
1682  "to force this condition.\nElement Type: " +
1683  get_nullable_logical_type_info((*diff_elem_itr)->get_type_info())
1684  .to_string() +
1685  "\nArray type: " + first_element_logical_type.to_string());
1686  }
1687 
1688  if (first_element_logical_type.is_string()) {
1689  sql_type.set_subtype(kTEXT);
1690  sql_type.set_compression(kENCODING_DICT);
1691  if (first_element_logical_type.is_none_encoded_string()) {
1692  sql_type.set_comp_param(TRANSIENT_DICT_ID);
1693  sql_type.setStringDictKey(shared::StringDictKey::kTransientDictKey);
1694  } else {
1695  CHECK(first_element_logical_type.is_dict_encoded_string());
1696  sql_type.set_comp_param(first_element_logical_type.get_comp_param());
1697  sql_type.setStringDictKey(first_element_logical_type.getStringDictKey());
1698  }
1699  } else if (first_element_logical_type.is_dict_encoded_string()) {
1700  sql_type.set_subtype(kTEXT);
1701  sql_type.set_compression(kENCODING_DICT);
1702  sql_type.set_comp_param(first_element_logical_type.get_comp_param());
1703  sql_type.setStringDictKey(first_element_logical_type.getStringDictKey());
1704  } else {
1705  sql_type.set_subtype(first_element_logical_type.get_type());
1706  sql_type.set_scale(first_element_logical_type.get_scale());
1707  sql_type.set_precision(first_element_logical_type.get_precision());
1708  }
1709 
1710  return makeExpr<Analyzer::ArrayExpr>(sql_type, translated_function_args);
1711  } else {
1712  // defaulting to valid sub-type for convenience
1713  sql_type.set_subtype(kBOOLEAN);
1714  return makeExpr<Analyzer::ArrayExpr>(sql_type, translated_function_args);
1715  }
1716  } else {
1717  return makeExpr<Analyzer::ArrayExpr>(rex_function->getType(),
1718  translateFunctionArgs(rex_function));
1719  }
1720 }
1721 
1722 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateFunction(
1723  const RexFunctionOperator* rex_function) const {
1724  if (func_resolve(rex_function->getName(), "LIKE"sv, "PG_ILIKE"sv)) {
1725  return translateLike(rex_function);
1726  }
1727  if (rex_function->getName() == "REGEXP_LIKE"sv) {
1728  return translateRegexp(rex_function);
1729  }
1730  if (rex_function->getName() == "LIKELY"sv) {
1731  return translateLikely(rex_function);
1732  }
1733  if (rex_function->getName() == "UNLIKELY"sv) {
1734  return translateUnlikely(rex_function);
1735  }
1736  if (func_resolve(rex_function->getName(), "PG_EXTRACT"sv, "PG_DATE_TRUNC"sv)) {
1737  return translateExtract(rex_function);
1738  }
1739  if (rex_function->getName() == "DATEADD"sv) {
1740  return translateDateadd(rex_function);
1741  }
1742  if (rex_function->getName() == "DATEDIFF"sv) {
1743  return translateDatediff(rex_function);
1744  }
1745  if (rex_function->getName() == "DATEPART"sv) {
1746  return translateDatepart(rex_function);
1747  }
1748  if (func_resolve(rex_function->getName(), "LENGTH"sv, "CHAR_LENGTH"sv)) {
1749  return translateLength(rex_function);
1750  }
1751  if (rex_function->getName() == "KEY_FOR_STRING"sv) {
1752  return translateKeyForString(rex_function);
1753  }
1754  if (rex_function->getName() == "WIDTH_BUCKET"sv) {
1755  return translateWidthBucket(rex_function);
1756  }
1757  if (rex_function->getName() == "SAMPLE_RATIO"sv) {
1758  return translateSampleRatio(rex_function);
1759  }
1760  if (rex_function->getName() == "CURRENT_USER"sv) {
1761  return translateCurrentUser(rex_function);
1762  }
1763  if (rex_function->getName() == "ML_PREDICT"sv) {
1764  return translateMLPredict(rex_function);
1765  }
1766  if (rex_function->getName() == "PCA_PROJECT"sv) {
1767  return translatePCAProject(rex_function);
1768  }
1769  if (func_resolve(rex_function->getName(),
1770  "LOWER"sv,
1771  "UPPER"sv,
1772  "INITCAP"sv,
1773  "REVERSE"sv,
1774  "REPEAT"sv,
1775  "||"sv,
1776  "LPAD"sv,
1777  "RPAD"sv,
1778  "TRIM"sv,
1779  "LTRIM"sv,
1780  "RTRIM"sv,
1781  "SUBSTRING"sv,
1782  "OVERLAY"sv,
1783  "REPLACE"sv,
1784  "SPLIT_PART"sv,
1785  "REGEXP_REPLACE"sv,
1786  "REGEXP_SUBSTR"sv,
1787  "REGEXP_MATCH"sv,
1788  "JSON_VALUE"sv,
1789  "BASE64_ENCODE"sv,
1790  "BASE64_DECODE"sv,
1791  "TRY_CAST"sv,
1792  "POSITION"sv,
1793  "JAROWINKLER_SIMILARITY"sv,
1794  "LEVENSHTEIN_DISTANCE"sv)) {
1795  return translateStringOper(rex_function);
1796  }
1797  if (func_resolve(rex_function->getName(), "CARDINALITY"sv, "ARRAY_LENGTH"sv)) {
1798  return translateCardinality(rex_function);
1799  }
1800  if (rex_function->getName() == "ITEM"sv) {
1801  return translateItem(rex_function);
1802  }
1803  if (rex_function->getName() == "CURRENT_DATE"sv) {
1804  return translateCurrentDate();
1805  }
1806  if (rex_function->getName() == "CURRENT_TIME"sv) {
1807  return translateCurrentTime();
1808  }
1809  if (rex_function->getName() == "CURRENT_TIMESTAMP"sv) {
1810  return translateCurrentTimestamp();
1811  }
1812  if (rex_function->getName() == "NOW"sv) {
1813  return translateCurrentTimestamp();
1814  }
1815  if (rex_function->getName() == "DATETIME"sv) {
1816  return translateDatetime(rex_function);
1817  }
1818  if (func_resolve(rex_function->getName(), "usTIMESTAMP"sv, "nsTIMESTAMP"sv)) {
1819  return translateHPTLiteral(rex_function);
1820  }
1821  if (rex_function->getName() == "ABS"sv) {
1822  return translateAbs(rex_function);
1823  }
1824  if (rex_function->getName() == "SIGN"sv) {
1825  return translateSign(rex_function);
1826  }
1827  if (func_resolve(rex_function->getName(), "CEIL"sv, "FLOOR"sv)) {
1828  return makeExpr<Analyzer::FunctionOperWithCustomTypeHandling>(
1829  rex_function->getType(),
1830  rex_function->getName(),
1831  translateFunctionArgs(rex_function));
1832  } else if (rex_function->getName() == "ROUND"sv) {
1833  std::vector<std::shared_ptr<Analyzer::Expr>> args =
1834  translateFunctionArgs(rex_function);
1835 
1836  if (rex_function->size() == 1) {
1837  // push a 0 constant if 2nd operand is missing.
1838  // this needs to be done as calcite returns
1839  // only the 1st operand without defaulting the 2nd one
1840  // when the user did not specify the 2nd operand.
1841  SQLTypes t = kSMALLINT;
1842  Datum d;
1843  d.smallintval = 0;
1844  args.push_back(makeExpr<Analyzer::Constant>(t, false, d));
1845  }
1846 
1847  // make sure we have only 2 operands
1848  CHECK(args.size() == 2);
1849 
1850  if (!args[0]->get_type_info().is_number()) {
1851  throw std::runtime_error("Only numeric 1st operands are supported");
1852  }
1853 
1854  // the 2nd operand does not need to be a constant
1855  // it can happily reference another integer column
1856  if (!args[1]->get_type_info().is_integer()) {
1857  throw std::runtime_error("Only integer 2nd operands are supported");
1858  }
1859 
1860  // Calcite may upcast decimals in a way that is
1861  // incompatible with the extension function input. Play it safe and stick with the
1862  // argument type instead.
1863  const SQLTypeInfo ret_ti = args[0]->get_type_info().is_decimal()
1864  ? args[0]->get_type_info()
1865  : rex_function->getType();
1866 
1867  return makeExpr<Analyzer::FunctionOperWithCustomTypeHandling>(
1868  ret_ti, rex_function->getName(), args);
1869  }
1870  if (rex_function->getName() == "DATETIME_PLUS"sv) {
1871  auto dt_plus = makeExpr<Analyzer::FunctionOper>(rex_function->getType(),
1872  rex_function->getName(),
1873  translateFunctionArgs(rex_function));
1874  const auto date_trunc = rewrite_to_date_trunc(dt_plus.get());
1875  if (date_trunc) {
1876  return date_trunc;
1877  }
1878  return translateDateadd(rex_function);
1879  }
1880  if (rex_function->getName() == "/INT"sv) {
1881  CHECK_EQ(size_t(2), rex_function->size());
1882  std::shared_ptr<Analyzer::Expr> lhs = translateScalarRex(rex_function->getOperand(0));
1883  std::shared_ptr<Analyzer::Expr> rhs = translateScalarRex(rex_function->getOperand(1));
1884  const auto rhs_lit = std::dynamic_pointer_cast<Analyzer::Constant>(rhs);
1885  return Parser::OperExpr::normalize(kDIVIDE, kONE, lhs, rhs);
1886  }
1887  if (rex_function->getName() == "Reinterpret"sv) {
1888  CHECK_EQ(size_t(1), rex_function->size());
1889  return translateScalarRex(rex_function->getOperand(0));
1890  }
1891  if (func_resolve(rex_function->getName(),
1892  "ST_X"sv,
1893  "ST_Y"sv,
1894  "ST_XMin"sv,
1895  "ST_YMin"sv,
1896  "ST_XMax"sv,
1897  "ST_YMax"sv,
1898  "ST_NRings"sv,
1899  "ST_NumGeometries"sv,
1900  "ST_NPoints"sv,
1901  "ST_Length"sv,
1902  "ST_Perimeter"sv,
1903  "ST_Area"sv,
1904  "ST_SRID"sv,
1905  "HeavyDB_Geo_PolyBoundsPtr"sv)) {
1906  CHECK_EQ(rex_function->size(), size_t(1));
1907  return translateUnaryGeoFunction(rex_function);
1908  }
1909  if (func_resolve(rex_function->getName(), "ST_ConvexHull"sv)) {
1910  CHECK_EQ(rex_function->size(), size_t(1));
1911  SQLTypeInfo ti;
1912  return translateUnaryGeoConstructor(rex_function, ti, false);
1913  }
1914  if (func_resolve(rex_function->getName(),
1915  "convert_meters_to_pixel_width"sv,
1916  "convert_meters_to_pixel_height"sv,
1917  "is_point_in_view"sv,
1918  "is_point_size_in_view"sv)) {
1919  return translateFunctionWithGeoArg(rex_function);
1920  }
1921  if (func_resolve(rex_function->getName(),
1922  "ST_Distance"sv,
1923  "ST_MaxDistance"sv,
1924  "ST_Intersects"sv,
1925  "ST_Disjoint"sv,
1926  "ST_Contains"sv,
1927  "ST_IntersectsBox"sv,
1928  "ST_Approx_Overlaps"sv,
1929  "ST_Within"sv)) {
1930  CHECK_EQ(rex_function->size(), size_t(2));
1931  return translateBinaryGeoFunction(rex_function);
1932  }
1933  if (func_resolve(rex_function->getName(), "ST_DWithin"sv, "ST_DFullyWithin"sv)) {
1934  CHECK_EQ(rex_function->size(), size_t(3));
1935  return translateTernaryGeoFunction(rex_function);
1936  }
1937  if (rex_function->getName() == "OFFSET_IN_FRAGMENT"sv) {
1938  CHECK_EQ(size_t(0), rex_function->size());
1939  return translateOffsetInFragment();
1940  }
1941  if (rex_function->getName() == "ARRAY"sv) {
1942  // Var args; currently no check. Possible fix-me -- can array have 0 elements?
1943  return translateArrayFunction(rex_function);
1944  }
1945  if (func_resolve(rex_function->getName(),
1946  "ST_GeomFromText"sv,
1947  "ST_GeogFromText"sv,
1948  "ST_Centroid"sv,
1949  "ST_SetSRID"sv,
1950  "ST_Point"sv, // TODO: where should this and below live?
1951  "ST_PointN"sv,
1952  "ST_StartPoint"sv,
1953  "ST_EndPoint"sv,
1954  "ST_Transform"sv)) {
1955  SQLTypeInfo ti;
1956  return translateGeoProjection(rex_function, ti, false);
1957  }
1958  if (func_resolve(rex_function->getName(),
1959  "ST_Intersection"sv,
1960  "ST_Difference"sv,
1961  "ST_Union"sv,
1962  "ST_Buffer"sv,
1963  "ST_ConcaveHull"sv)) {
1964  CHECK_EQ(rex_function->size(), size_t(2));
1965  SQLTypeInfo ti;
1966  return translateBinaryGeoConstructor(rex_function, ti, false);
1967  }
1968  if (func_resolve(rex_function->getName(), "ST_IsEmpty"sv, "ST_IsValid"sv)) {
1969  CHECK_EQ(rex_function->size(), size_t(1));
1970  SQLTypeInfo ti;
1971  return translateUnaryGeoPredicate(rex_function, ti, false);
1972  }
1973  if (func_resolve(rex_function->getName(), "ST_Equals"sv)) {
1974  CHECK_EQ(rex_function->size(), size_t(2));
1975  // Attempt to generate a distance based check for points
1976  if (auto distance_check = translateBinaryGeoFunction(rex_function)) {
1977  return distance_check;
1978  }
1979  SQLTypeInfo ti;
1980  return translateBinaryGeoPredicate(rex_function, ti, false);
1981  }
1982 
1983  auto arg_expr_list = translateFunctionArgs(rex_function);
1984  if (rex_function->getName() == std::string("||") ||
1985  rex_function->getName() == std::string("SUBSTRING")) {
1986  SQLTypeInfo ret_ti(kTEXT, false);
1987  return makeExpr<Analyzer::FunctionOper>(
1988  ret_ti, rex_function->getName(), arg_expr_list);
1989  }
1990 
1991  // Reset possibly wrong return type of rex_function to the return
1992  // type of the optimal valid implementation. The return type can be
1993  // wrong in the case of multiple implementations of UDF functions
1994  // that have different return types but Calcite specifies the return
1995  // type according to the first implementation.
1996  SQLTypeInfo ret_ti;
1997  try {
1998  auto ext_func_sig = bind_function(rex_function->getName(), arg_expr_list);
1999  auto ext_func_args = ext_func_sig.getInputArgs();
2000  CHECK_LE(arg_expr_list.size(), ext_func_args.size());
2001  for (size_t i = 0, di = 0; i < arg_expr_list.size(); i++) {
2002  CHECK_LT(i + di, ext_func_args.size());
2003  auto ext_func_arg = ext_func_args[i + di];
2004  if (ext_func_arg == ExtArgumentType::PInt8 ||
2005  ext_func_arg == ExtArgumentType::PInt16 ||
2006  ext_func_arg == ExtArgumentType::PInt32 ||
2007  ext_func_arg == ExtArgumentType::PInt64 ||
2008  ext_func_arg == ExtArgumentType::PFloat ||
2009  ext_func_arg == ExtArgumentType::PDouble ||
2010  ext_func_arg == ExtArgumentType::PBool) {
2011  di++;
2012  // pointer argument follows length argument:
2013  CHECK(ext_func_args[i + di] == ExtArgumentType::Int64);
2014  }
2015  // fold casts on constants
2016  if (auto constant =
2017  std::dynamic_pointer_cast<Analyzer::Constant>(arg_expr_list[i])) {
2018  auto ext_func_arg_ti = ext_arg_type_to_type_info(ext_func_arg);
2019  if (ext_func_arg_ti != arg_expr_list[i]->get_type_info()) {
2020  arg_expr_list[i] = constant->add_cast(ext_func_arg_ti);
2021  }
2022  }
2023  }
2024 
2025  ret_ti = ext_arg_type_to_type_info(ext_func_sig.getRet());
2026  } catch (ExtensionFunctionBindingError& e) {
2027  LOG(WARNING) << "RelAlgTranslator::translateFunction: " << e.what();
2028  throw;
2029  }
2030 
2031  // By default, the extension function type will not allow nulls. If one of the arguments
2032  // is nullable, the extension function must also explicitly allow nulls.
2033  bool arguments_not_null = true;
2034  for (const auto& arg_expr : arg_expr_list) {
2035  if (!arg_expr->get_type_info().get_notnull()) {
2036  arguments_not_null = false;
2037  break;
2038  }
2039  }
2040  ret_ti.set_notnull(arguments_not_null);
2041 
2042  return makeExpr<Analyzer::FunctionOper>(ret_ti, rex_function->getName(), arg_expr_list);
2043 }
2044 
2045 namespace {
2046 
2047 std::vector<Analyzer::OrderEntry> translate_collation(
2048  const std::vector<SortField>& sort_fields) {
2049  std::vector<Analyzer::OrderEntry> collation;
2050  for (size_t i = 0; i < sort_fields.size(); ++i) {
2051  const auto& sort_field = sort_fields[i];
2052  collation.emplace_back(i,
2053  sort_field.getSortDir() == SortDirection::Descending,
2054  sort_field.getNullsPosition() == NullSortedPosition::First);
2055  }
2056  return collation;
2057 }
2058 
2059 size_t determineTimeValMultiplierForTimeType(const SQLTypes& window_frame_bound_type,
2060  const Analyzer::Constant* const_expr) {
2061  const auto time_unit_val = const_expr->get_constval().bigintval;
2062  if (window_frame_bound_type == kINTERVAL_DAY_TIME) {
2063  if (time_unit_val == kMilliSecsPerSec) {
2064  return 1;
2065  } else if (time_unit_val == kMilliSecsPerMin) {
2066  return kSecsPerMin;
2067  } else if (time_unit_val == kMilliSecsPerHour) {
2068  return kSecsPerHour;
2069  }
2070  }
2071  CHECK(false);
2072  return kUNKNOWN_FIELD;
2073 }
2074 
2075 ExtractField determineTimeUnit(const SQLTypes& window_frame_bound_type,
2076  const Analyzer::Constant* const_expr) {
2077  const auto time_unit_val = const_expr->get_constval().bigintval;
2078  if (window_frame_bound_type == kINTERVAL_DAY_TIME) {
2079  if (time_unit_val == kMilliSecsPerSec) {
2080  return kSECOND;
2081  } else if (time_unit_val == kMilliSecsPerMin) {
2082  return kMINUTE;
2083  } else if (time_unit_val == kMilliSecsPerHour) {
2084  return kHOUR;
2085  } else if (time_unit_val == kMilliSecsPerDay) {
2086  return kDAY;
2087  }
2088  } else {
2089  CHECK(window_frame_bound_type == kINTERVAL_YEAR_MONTH);
2090  if (time_unit_val == 1) {
2091  return kMONTH;
2092  } else if (time_unit_val == 12) {
2093  return kYEAR;
2094  }
2095  }
2096  CHECK(false);
2097  return kUNKNOWN_FIELD;
2098 }
2099 } // namespace
2100 
2101 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateWindowFunction(
2102  const RexWindowFunctionOperator* rex_window_function) const {
2103  std::vector<std::shared_ptr<Analyzer::Expr>> args;
2104  for (size_t i = 0; i < rex_window_function->size(); ++i) {
2105  args.push_back(translateScalarRex(rex_window_function->getOperand(i)));
2106  }
2107  std::vector<std::shared_ptr<Analyzer::Expr>> partition_keys;
2108  for (const auto& partition_key : rex_window_function->getPartitionKeys()) {
2109  partition_keys.push_back(translateScalarRex(partition_key.get()));
2110  }
2111  std::vector<std::shared_ptr<Analyzer::Expr>> order_keys;
2112  for (const auto& order_key : rex_window_function->getOrderKeys()) {
2113  order_keys.push_back(translateScalarRex(order_key.get()));
2114  }
2115  std::vector<Analyzer::OrderEntry> collation =
2116  translate_collation(rex_window_function->getCollation());
2117 
2118  auto ti = rex_window_function->getType();
2119  auto window_func_kind = rex_window_function->getKind();
2120  if (window_function_is_value(window_func_kind)) {
2121  CHECK_GE(args.size(), 1u);
2122  ti = args.front()->get_type_info();
2123  }
2124  auto determine_frame_bound_type =
2126  if (bound.unbounded) {
2127  CHECK(!bound.bound_expr && !bound.is_current_row);
2128  if (bound.following) {
2130  } else if (bound.preceding) {
2132  }
2133  } else {
2134  if (bound.is_current_row) {
2135  CHECK(!bound.unbounded && !bound.bound_expr);
2137  } else {
2138  CHECK(!bound.unbounded && bound.bound_expr);
2139  if (bound.following) {
2141  } else if (bound.preceding) {
2143  }
2144  }
2145  }
2147  };
2148  auto is_negative_framing_bound =
2149  [](const SQLTypes t, const Datum& d, bool is_time_unit = false) {
2150  switch (t) {
2151  case kTINYINT:
2152  return d.tinyintval < 0;
2153  case kSMALLINT:
2154  return d.smallintval < 0;
2155  case kINT:
2156  return d.intval < 0;
2157  case kDOUBLE: {
2158  // the only case that double type is used is for handling time interval
2159  // i.e., represent tiny time units like nanosecond and microsecond as the
2160  // equivalent time value with SECOND time unit
2161  CHECK(is_time_unit);
2162  return d.doubleval < 0;
2163  }
2164  case kDECIMAL:
2165  case kNUMERIC:
2166  case kBIGINT:
2167  return d.bigintval < 0;
2168  default: {
2169  throw std::runtime_error(
2170  "We currently only support integer-type literal expression as a window "
2171  "frame bound expression");
2172  }
2173  }
2174  };
2175 
2176  bool negative_constant = false;
2177  bool detect_invalid_frame_start_bound_expr = false;
2178  bool detect_invalid_frame_end_bound_expr = false;
2179  auto& frame_start_bound = rex_window_function->getFrameStartBound();
2180  auto& frame_end_bound = rex_window_function->getFrameEndBound();
2181  bool has_end_bound_frame_expr = false;
2182  std::shared_ptr<Analyzer::Expr> frame_start_bound_expr;
2183  SqlWindowFrameBoundType frame_start_bound_type =
2184  determine_frame_bound_type(frame_start_bound);
2185  std::shared_ptr<Analyzer::Expr> frame_end_bound_expr;
2186  SqlWindowFrameBoundType frame_end_bound_type =
2187  determine_frame_bound_type(frame_end_bound);
2188  bool has_framing_clause =
2190  auto frame_mode = rex_window_function->isRows()
2193  if (order_keys.empty()) {
2194  if (frame_start_bound_type == SqlWindowFrameBoundType::UNBOUNDED_PRECEDING &&
2195  frame_end_bound_type == SqlWindowFrameBoundType::UNBOUNDED_FOLLOWING) {
2196  // Calcite sets UNBOUNDED PRECEDING ~ UNBOUNDED_FOLLOWING as its default frame bound
2197  // if the window context has no order by clause regardless of the existence of
2198  // user-given window frame bound but at this point we have no way to recognize the
2199  // absence of the frame definition of this window context
2200  has_framing_clause = false;
2201  }
2202  } else {
2203  auto translate_frame_bound_expr = [&](const RexScalar* bound_expr) {
2204  std::shared_ptr<Analyzer::Expr> translated_expr;
2205  const auto rex_oper = dynamic_cast<const RexOperator*>(bound_expr);
2206  if (rex_oper && rex_oper->getType().is_timeinterval()) {
2207  translated_expr = translateScalarRex(rex_oper);
2208  const auto bin_oper =
2209  dynamic_cast<const Analyzer::BinOper*>(translated_expr.get());
2210  auto time_literal_expr =
2211  dynamic_cast<const Analyzer::Constant*>(bin_oper->get_left_operand());
2212  CHECK(time_literal_expr);
2213  negative_constant =
2214  is_negative_framing_bound(time_literal_expr->get_type_info().get_type(),
2215  time_literal_expr->get_constval(),
2216  true);
2217  return std::make_pair(false, translated_expr);
2218  }
2219  if (dynamic_cast<const RexLiteral*>(bound_expr)) {
2220  translated_expr = translateScalarRex(bound_expr);
2221  if (auto literal_expr =
2222  dynamic_cast<const Analyzer::Constant*>(translated_expr.get())) {
2223  negative_constant = is_negative_framing_bound(
2224  literal_expr->get_type_info().get_type(), literal_expr->get_constval());
2225  return std::make_pair(false, translated_expr);
2226  }
2227  }
2228  return std::make_pair(true, translated_expr);
2229  };
2230 
2231  if (frame_start_bound.bound_expr) {
2232  std::tie(detect_invalid_frame_start_bound_expr, frame_start_bound_expr) =
2233  translate_frame_bound_expr(frame_start_bound.bound_expr.get());
2234  }
2235 
2236  if (frame_end_bound.bound_expr) {
2237  std::tie(detect_invalid_frame_end_bound_expr, frame_end_bound_expr) =
2238  translate_frame_bound_expr(frame_end_bound.bound_expr.get());
2239  }
2240 
2241  // currently we only support literal expression as frame bound expression
2242  if (detect_invalid_frame_start_bound_expr || detect_invalid_frame_end_bound_expr) {
2243  throw std::runtime_error(
2244  "We currently only support literal expression as a window frame bound "
2245  "expression");
2246  }
2247 
2248  // note that Calcite already has frame-bound constraint checking logic, but we
2249  // also check various invalid cases for safety
2250  if (negative_constant) {
2251  throw std::runtime_error(
2252  "A constant expression for window framing should have nonnegative value.");
2253  }
2254 
2255  auto handle_time_interval_expr_if_necessary = [&](const Analyzer::Expr* bound_expr,
2256  SqlWindowFrameBoundType bound_type,
2257  bool for_start_bound) {
2258  if (bound_expr && bound_expr->get_type_info().is_timeinterval()) {
2259  const auto bound_bin_oper = dynamic_cast<const Analyzer::BinOper*>(bound_expr);
2260  CHECK(bound_bin_oper->get_optype() == kMULTIPLY);
2261  auto translated_expr = translateIntervalExprForWindowFraming(
2262  order_keys.front(),
2264  bound_bin_oper);
2265  if (for_start_bound) {
2266  frame_start_bound_expr = translated_expr;
2267  } else {
2268  frame_end_bound_expr = translated_expr;
2269  }
2270  }
2271  };
2272  handle_time_interval_expr_if_necessary(
2273  frame_start_bound_expr.get(), frame_start_bound_type, true);
2274  handle_time_interval_expr_if_necessary(
2275  frame_end_bound_expr.get(), frame_end_bound_type, false);
2276  }
2277 
2278  if (frame_start_bound.following) {
2279  if (frame_end_bound.is_current_row) {
2280  throw std::runtime_error(
2281  "Window framing starting from following row cannot end with current row.");
2282  } else if (has_end_bound_frame_expr && frame_end_bound.preceding) {
2283  throw std::runtime_error(
2284  "Window framing starting from following row cannot have preceding rows.");
2285  }
2286  }
2287  if (frame_start_bound.is_current_row && frame_end_bound.preceding &&
2288  !frame_end_bound.unbounded && has_end_bound_frame_expr) {
2289  throw std::runtime_error(
2290  "Window framing starting from current row cannot have preceding rows.");
2291  }
2292  if (has_framing_clause) {
2294  if (order_keys.size() != 1) {
2295  throw std::runtime_error(
2296  "Window framing with range mode requires a single order-by column");
2297  }
2298  if (!frame_start_bound_expr &&
2299  frame_start_bound_type == SqlWindowFrameBoundType::UNBOUNDED_PRECEDING &&
2300  !frame_end_bound_expr &&
2301  frame_end_bound_type == SqlWindowFrameBoundType::CURRENT_ROW) {
2302  has_framing_clause = false;
2303  VLOG(1) << "Ignore range framing mode with a frame bound between "
2304  "UNBOUNDED_PRECEDING and CURRENT_ROW";
2305  }
2306  std::set<const Analyzer::ColumnVar*,
2307  bool (*)(const Analyzer::ColumnVar*, const Analyzer::ColumnVar*)>
2309  order_keys.front()->collect_column_var(colvar_set, false);
2310  for (auto cv : colvar_set) {
2311  if (!(cv->get_type_info().is_integer() || cv->get_type_info().is_fp() ||
2312  cv->get_type_info().is_time())) {
2313  has_framing_clause = false;
2314  VLOG(1) << "Range framing mode with non-number type ordering column is not "
2315  "supported yet, skip window framing";
2316  }
2317  }
2318  }
2319  }
2320  auto const func_name = ::toString(window_func_kind);
2321  auto const num_args = args.size();
2322  bool need_order_by_clause = false;
2323  bool need_frame_def = false;
2324  switch (window_func_kind) {
2327  need_order_by_clause = true;
2328  need_frame_def = true;
2329  if (num_args != 2) {
2330  throw std::runtime_error(func_name + " has an invalid number of input arguments");
2331  }
2332  Datum d;
2333  d.intval = 1;
2334  args.push_back(makeExpr<Analyzer::Constant>(kINT, false, d));
2335  const auto target_expr_cv =
2336  dynamic_cast<const Analyzer::ColumnVar*>(args.front().get());
2337  if (!target_expr_cv) {
2338  throw std::runtime_error("Currently, " + func_name +
2339  " only allows a column reference as its first argument");
2340  }
2341  const auto target_ti = target_expr_cv->get_type_info();
2342  if (target_ti.is_dict_encoded_string()) {
2343  // Calcite does not represent a window function having dictionary encoded text
2344  // type as its output properly, so we need to set its output type manually
2345  ti.set_compression(kENCODING_DICT);
2346  ti.set_comp_param(target_expr_cv->get_type_info().get_comp_param());
2347  ti.setStringDictKey(target_expr_cv->get_type_info().getStringDictKey());
2348  ti.set_fixed_size();
2349  }
2350  const auto target_offset_cv =
2351  dynamic_cast<const Analyzer::Constant*>(args[1].get());
2352  if (!target_expr_cv ||
2353  is_negative_framing_bound(target_offset_cv->get_type_info().get_type(),
2354  target_offset_cv->get_constval())) {
2355  throw std::runtime_error(
2356  "Currently, " + func_name +
2357  " only allows non-negative constant as its second argument");
2358  }
2359  break;
2360  }
2363  if (num_args != 1) {
2364  throw std::runtime_error(func_name + " has an invalid number of input arguments");
2365  }
2366  need_order_by_clause = true;
2367  need_frame_def = true;
2368  break;
2371  if (has_framing_clause) {
2372  throw std::runtime_error(func_name + " does not support window framing clause");
2373  }
2374  auto const input_expr_ti = args.front()->get_type_info();
2375  if (input_expr_ti.is_string()) {
2376  throw std::runtime_error(func_name + " not supported on " +
2377  input_expr_ti.get_type_name() + " type yet");
2378  }
2379  need_order_by_clause = true;
2380  std::string const arg_str{args.front()->toString()};
2381  bool needs_inject_input_arg_ordering =
2382  !std::any_of(order_keys.cbegin(),
2383  order_keys.cend(),
2384  [&arg_str](std::shared_ptr<Analyzer::Expr> const& expr) {
2385  return boost::equals(arg_str, expr->toString());
2386  });
2387  if (needs_inject_input_arg_ordering) {
2388  VLOG(1) << "Inject " << args.front()->toString() << " as ordering column of the "
2389  << func_name << " function";
2390  order_keys.push_back(args.front());
2391  // forward_fill can fill null values if it is ordered with NULLS LAST
2392  // in contrast, we make NULLS FIRST ordering for the backward_fill function
2393  collation.emplace_back(collation.size() + 1,
2394  false,
2395  window_func_kind != SqlWindowFunctionKind::FORWARD_FILL);
2396  }
2397  break;
2398  }
2401  // todo (yoonmin) : args.size() will be three if we support default value
2402  if (num_args != 2) {
2403  throw std::runtime_error(func_name + " has an invalid number of input arguments");
2404  }
2405  // NTH_VALUE(_IN_FRAME) may return null value even if the argument is non-null
2406  // column
2407  ti.set_notnull(false);
2408  if (window_func_kind == SqlWindowFunctionKind::NTH_VALUE_IN_FRAME) {
2409  need_order_by_clause = true;
2410  need_frame_def = true;
2411  }
2412  if (!args[1]) {
2413  throw std::runtime_error(func_name +
2414  " must have a positional argument expression.");
2415  }
2416  bool has_valid_arg = false;
2417  if (args[1]->get_type_info().is_integer()) {
2418  if (auto* n_value_ptr = dynamic_cast<Analyzer::Constant*>(args[1].get())) {
2419  if (0 < n_value_ptr->get_constval().intval) {
2420  // i.e., having N larger than the partition size
2421  // set the proper N to match the zero-start index pos
2422  auto d = n_value_ptr->get_constval();
2423  d.intval -= 1;
2424  n_value_ptr->set_constval(d);
2425  has_valid_arg = true;
2426  }
2427  }
2428  }
2429  if (!has_valid_arg) {
2430  throw std::runtime_error("The positional argument of the " + func_name +
2431  " must be a positive integer constant.");
2432  }
2433  break;
2434  }
2436  if (order_keys.empty()) {
2437  throw std::runtime_error(
2438  ::toString(window_func_kind) +
2439  " requires an ORDER BY sub-clause within the window clause");
2440  }
2441  if (has_framing_clause) {
2442  LOG(INFO)
2443  << ::toString(window_func_kind)
2444  << " must use a pre-defined window frame range (e.g., ROWS BETWEEN "
2445  "UNBOUNDED PRECEDING AND CURRENT ROW). "
2446  "Thus, we skip the user-defined window frame for this window function";
2447  }
2448  has_framing_clause = true;
2450  frame_start_bound_type = SqlWindowFrameBoundType::UNBOUNDED_PRECEDING;
2451  frame_end_bound_type = SqlWindowFrameBoundType::CURRENT_ROW;
2452  break;
2453  default:;
2454  }
2455  if (need_order_by_clause && order_keys.empty()) {
2456  throw std::runtime_error(func_name + " requires an ORDER BY clause");
2457  }
2458  if (need_frame_def && !has_framing_clause) {
2459  throw std::runtime_error(func_name + " requires window frame definition");
2460  }
2461  if (!has_framing_clause) {
2462  frame_start_bound_type = SqlWindowFrameBoundType::UNKNOWN;
2463  frame_end_bound_type = SqlWindowFrameBoundType::UNKNOWN;
2464  frame_start_bound_expr = nullptr;
2465  frame_end_bound_expr = nullptr;
2466  }
2467  if (window_func_kind == SqlWindowFunctionKind::COUNT && has_framing_clause &&
2468  args.empty()) {
2469  args.push_back(makeExpr<Analyzer::Constant>(g_bigint_count ? kBIGINT : kINT, true));
2470  }
2471  return makeExpr<Analyzer::WindowFunction>(
2472  ti,
2473  rex_window_function->getKind(),
2474  args,
2475  partition_keys,
2476  order_keys,
2477  has_framing_clause ? frame_mode : Analyzer::WindowFunction::FrameBoundType::NONE,
2478  makeExpr<Analyzer::WindowFrame>(frame_start_bound_type, frame_start_bound_expr),
2479  makeExpr<Analyzer::WindowFrame>(frame_end_bound_type, frame_end_bound_expr),
2480  collation);
2481 }
2482 
2484  std::shared_ptr<Analyzer::Expr> order_key,
2485  bool for_preceding_bound,
2486  const Analyzer::BinOper* frame_bound_expr) const {
2487  // translate time interval expression and prepare appropriate frame bound expression:
2488  // a) manually compute time unit datum: time type
2489  // b) use dateadd expression: date and timestamp
2490  const auto order_key_ti = order_key->get_type_info();
2491  const auto frame_bound_ti = frame_bound_expr->get_type_info();
2492  const auto time_val_expr =
2493  dynamic_cast<const Analyzer::Constant*>(frame_bound_expr->get_left_operand());
2494  const auto time_unit_val_expr =
2495  dynamic_cast<const Analyzer::Constant*>(frame_bound_expr->get_right_operand());
2496  ExtractField time_unit =
2497  determineTimeUnit(frame_bound_ti.get_type(), time_unit_val_expr);
2498  bool invalid_time_unit_type = false;
2499  bool invalid_frame_bound_expr_type = false;
2500  Datum d;
2501  auto prepare_time_value_datum = [&d,
2502  &invalid_frame_bound_expr_type,
2503  &time_val_expr,
2504  &for_preceding_bound](bool is_timestamp_second) {
2505  // currently, Calcite only accepts interval with second, so to represent
2506  // smaller time units like millisecond, we have to use decimal point like
2507  // INTERVAL 0.003 SECOND (for millisecond)
2508  // thus, depending on what time unit we want to represent, Calcite analyzes
2509  // the time value to one of following two types: integer and decimal (and
2510  // numeric) types
2511  switch (time_val_expr->get_type_info().get_type()) {
2512  case kTINYINT: {
2513  d.bigintval = time_val_expr->get_constval().tinyintval;
2514  break;
2515  }
2516  case kSMALLINT: {
2517  d.bigintval = time_val_expr->get_constval().smallintval;
2518  break;
2519  }
2520  case kINT: {
2521  d.bigintval = time_val_expr->get_constval().intval;
2522  break;
2523  }
2524  case kBIGINT: {
2525  d.bigintval = time_val_expr->get_constval().bigintval;
2526  break;
2527  }
2528  case kDECIMAL:
2529  case kNUMERIC: {
2530  if (!is_timestamp_second) {
2531  // date and time type only use integer type as their time value
2532  invalid_frame_bound_expr_type = true;
2533  break;
2534  }
2535  d.bigintval = time_val_expr->get_constval().bigintval;
2536  break;
2537  }
2538  case kDOUBLE: {
2539  if (!is_timestamp_second) {
2540  // date and time type only use integer type as their time value
2541  invalid_frame_bound_expr_type = true;
2542  break;
2543  }
2544  d.bigintval = time_val_expr->get_constval().doubleval *
2545  pow(10, time_val_expr->get_type_info().get_scale());
2546  break;
2547  }
2548  default: {
2549  invalid_frame_bound_expr_type = true;
2550  break;
2551  }
2552  }
2553  if (for_preceding_bound) {
2554  d.bigintval *= -1;
2555  }
2556  };
2557 
2558  switch (order_key_ti.get_type()) {
2559  case kTIME: {
2560  if (time_val_expr->get_type_info().is_integer()) {
2561  if (time_unit == kSECOND || time_unit == kMINUTE || time_unit == kHOUR) {
2562  const auto time_multiplier = determineTimeValMultiplierForTimeType(
2563  frame_bound_ti.get_type(), time_unit_val_expr);
2564  switch (time_val_expr->get_type_info().get_type()) {
2565  case kTINYINT: {
2566  d.bigintval = time_val_expr->get_constval().tinyintval * time_multiplier;
2567  break;
2568  }
2569  case kSMALLINT: {
2570  d.bigintval = time_val_expr->get_constval().smallintval * time_multiplier;
2571  break;
2572  }
2573  case kINT: {
2574  d.bigintval = time_val_expr->get_constval().intval * time_multiplier;
2575  break;
2576  }
2577  case kBIGINT: {
2578  d.bigintval = time_val_expr->get_constval().bigintval * time_multiplier;
2579  break;
2580  }
2581  default: {
2582  UNREACHABLE();
2583  break;
2584  }
2585  }
2586  } else {
2587  invalid_frame_bound_expr_type = true;
2588  }
2589  } else {
2590  invalid_time_unit_type = true;
2591  }
2592  if (invalid_frame_bound_expr_type) {
2593  throw std::runtime_error(
2594  "Invalid time unit is used to define window frame bound expression for " +
2595  order_key_ti.get_type_name() + " type");
2596  } else if (invalid_time_unit_type) {
2597  throw std::runtime_error(
2598  "Window frame bound expression has an invalid type for " +
2599  order_key_ti.get_type_name() + " type");
2600  }
2601  return std::make_shared<Analyzer::Constant>(kBIGINT, false, d);
2602  }
2603  case kDATE: {
2605  if (time_val_expr->get_type_info().is_integer()) {
2606  switch (time_unit) {
2607  case kDAY: {
2608  daField = to_dateadd_field("day");
2609  break;
2610  }
2611  case kMONTH: {
2612  daField = to_dateadd_field("month");
2613  break;
2614  }
2615  case kYEAR: {
2616  daField = to_dateadd_field("year");
2617  break;
2618  }
2619  default: {
2620  invalid_frame_bound_expr_type = true;
2621  break;
2622  }
2623  }
2624  } else {
2625  invalid_time_unit_type = true;
2626  }
2627  if (invalid_frame_bound_expr_type) {
2628  throw std::runtime_error(
2629  "Invalid time unit is used to define window frame bound expression for " +
2630  order_key_ti.get_type_name() + " type");
2631  } else if (invalid_time_unit_type) {
2632  throw std::runtime_error(
2633  "Window frame bound expression has an invalid type for " +
2634  order_key_ti.get_type_name() + " type");
2635  }
2637  prepare_time_value_datum(false);
2638  const auto cast_number_units = makeExpr<Analyzer::Constant>(kBIGINT, false, d);
2639  const int dim = order_key_ti.get_dimension();
2640  return makeExpr<Analyzer::DateaddExpr>(
2641  SQLTypeInfo(kTIMESTAMP, dim, 0, false), daField, cast_number_units, order_key);
2642  }
2643  case kTIMESTAMP: {
2645  switch (time_unit) {
2646  case kSECOND: {
2647  switch (time_val_expr->get_type_info().get_scale()) {
2648  case 0: {
2649  daField = to_dateadd_field("second");
2650  break;
2651  }
2652  case 3: {
2653  daField = to_dateadd_field("millisecond");
2654  break;
2655  }
2656  case 6: {
2657  daField = to_dateadd_field("microsecond");
2658  break;
2659  }
2660  case 9: {
2661  daField = to_dateadd_field("nanosecond");
2662  break;
2663  }
2664  default:
2665  UNREACHABLE();
2666  break;
2667  }
2668  prepare_time_value_datum(true);
2669  break;
2670  }
2671  case kMINUTE: {
2672  daField = to_dateadd_field("minute");
2673  prepare_time_value_datum(false);
2674  break;
2675  }
2676  case kHOUR: {
2677  daField = to_dateadd_field("hour");
2678  prepare_time_value_datum(false);
2679  break;
2680  }
2681  case kDAY: {
2682  daField = to_dateadd_field("day");
2683  prepare_time_value_datum(false);
2684  break;
2685  }
2686  case kMONTH: {
2687  daField = to_dateadd_field("month");
2688  prepare_time_value_datum(false);
2689  break;
2690  }
2691  case kYEAR: {
2692  daField = to_dateadd_field("year");
2693  prepare_time_value_datum(false);
2694  break;
2695  }
2696  default: {
2697  invalid_time_unit_type = true;
2698  break;
2699  }
2700  }
2701  if (!invalid_time_unit_type) {
2703  const auto cast_number_units = makeExpr<Analyzer::Constant>(kBIGINT, false, d);
2704  const int dim = order_key_ti.get_dimension();
2705  return makeExpr<Analyzer::DateaddExpr>(SQLTypeInfo(kTIMESTAMP, dim, 0, false),
2706  daField,
2707  cast_number_units,
2708  order_key);
2709  }
2710  return nullptr;
2711  }
2712  default: {
2713  UNREACHABLE();
2714  break;
2715  }
2716  }
2717  if (invalid_frame_bound_expr_type) {
2718  throw std::runtime_error(
2719  "Invalid time unit is used to define window frame bound expression for " +
2720  order_key_ti.get_type_name() + " type");
2721  } else if (invalid_time_unit_type) {
2722  throw std::runtime_error("Window frame bound expression has an invalid type for " +
2723  order_key_ti.get_type_name() + " type");
2724  }
2725  return nullptr;
2726 }
2727 
2729  const RexFunctionOperator* rex_function) const {
2730  std::vector<std::shared_ptr<Analyzer::Expr>> args;
2731  for (size_t i = 0; i < rex_function->size(); ++i) {
2732  args.push_back(translateScalarRex(rex_function->getOperand(i)));
2733  }
2734  return args;
2735 }
2736 
2738  const std::shared_ptr<Analyzer::Expr> qual_expr) {
2739  CHECK(qual_expr);
2740  auto bin_oper = std::dynamic_pointer_cast<const Analyzer::BinOper>(qual_expr);
2741  if (!bin_oper) {
2742  const auto rewritten_qual_expr = rewrite_expr(qual_expr.get());
2743  return {{}, {rewritten_qual_expr ? rewritten_qual_expr : qual_expr}};
2744  }
2745 
2746  if (bin_oper->get_optype() == kAND) {
2747  const auto lhs_cf = qual_to_conjunctive_form(bin_oper->get_own_left_operand());
2748  const auto rhs_cf = qual_to_conjunctive_form(bin_oper->get_own_right_operand());
2749  auto simple_quals = lhs_cf.simple_quals;
2750  simple_quals.insert(
2751  simple_quals.end(), rhs_cf.simple_quals.begin(), rhs_cf.simple_quals.end());
2752  auto quals = lhs_cf.quals;
2753  quals.insert(quals.end(), rhs_cf.quals.begin(), rhs_cf.quals.end());
2754  return {simple_quals, quals};
2755  }
2756  int rte_idx{0};
2757  const auto simple_qual = bin_oper->normalize_simple_predicate(rte_idx);
2758  return simple_qual ? QualsConjunctiveForm{{simple_qual}, {}}
2759  : QualsConjunctiveForm{{}, {qual_expr}};
2760 }
2761 
2762 std::vector<std::shared_ptr<Analyzer::Expr>> qual_to_disjunctive_form(
2763  const std::shared_ptr<Analyzer::Expr>& qual_expr) {
2764  CHECK(qual_expr);
2765  const auto bin_oper = std::dynamic_pointer_cast<const Analyzer::BinOper>(qual_expr);
2766  if (!bin_oper) {
2767  const auto rewritten_qual_expr = rewrite_expr(qual_expr.get());
2768  return {rewritten_qual_expr ? rewritten_qual_expr : qual_expr};
2769  }
2770  if (bin_oper->get_optype() == kOR) {
2771  const auto lhs_df = qual_to_disjunctive_form(bin_oper->get_own_left_operand());
2772  const auto rhs_df = qual_to_disjunctive_form(bin_oper->get_own_right_operand());
2773  auto quals = lhs_df;
2774  quals.insert(quals.end(), rhs_df.begin(), rhs_df.end());
2775  return quals;
2776  }
2777  return {qual_expr};
2778 }
2779 
2780 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateHPTLiteral(
2781  const RexFunctionOperator* rex_function) const {
2782  /* since calcite uses Avatica package called DateTimeUtils to parse timestamp strings.
2783  Therefore any string having fractional seconds more 3 places after the decimal
2784  (milliseconds) will get truncated to 3 decimal places, therefore we lose precision
2785  (us|ns). Issue: [BE-2461] Here we are hijacking literal cast to Timestamp(6|9) from
2786  calcite and translating them to generate our own casts.
2787  */
2788  CHECK_EQ(size_t(1), rex_function->size());
2789  const auto operand = translateScalarRex(rex_function->getOperand(0));
2790  const auto& operand_ti = operand->get_type_info();
2791  const auto& target_ti = rex_function->getType();
2792  if (!operand_ti.is_string()) {
2793  throw std::runtime_error(
2794  "High precision timestamp cast argument must be a string. Input type is: " +
2795  operand_ti.get_type_name());
2796  } else if (!target_ti.is_high_precision_timestamp()) {
2797  throw std::runtime_error(
2798  "Cast target type should be high precision timestamp. Input type is: " +
2799  target_ti.get_type_name());
2800  } else if (target_ti.get_dimension() != 6 && target_ti.get_dimension() != 9) {
2801  throw std::runtime_error(
2802  "Cast target type should be TIMESTAMP(6|9). Input type is: TIMESTAMP(" +
2803  std::to_string(target_ti.get_dimension()) + ")");
2804  } else {
2805  return operand->add_cast(target_ti);
2806  }
2807 }
DEVICE auto upper_bound(ARGS &&...args)
Definition: gpu_enabled.h:123
Defines data structures for the semantic analysis phase of query processing.
Definition: sqldefs.h:71
SqlWindowFrameBoundType
Definition: sqldefs.h:157
const RexScalar * getThen(const size_t idx) const
Definition: RelAlgDag.h:440
const std::vector< JoinType > join_types_
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:392
void set_compression(EncodingType c)
Definition: sqltypes.h:479
SQLAgg
Definition: sqldefs.h:73
static std::shared_ptr< Analyzer::Expr > normalize(const std::list< std::pair< std::shared_ptr< Analyzer::Expr >, std::shared_ptr< Analyzer::Expr >>> &, const std::shared_ptr< Analyzer::Expr >, const Executor *executor=nullptr)
#define CHECK_EQ(x, y)
Definition: Logger.h:301
auto func_resolve
std::shared_ptr< Analyzer::Expr > translateOffsetInFragment() const
SqlStringOpKind name_to_string_op_kind(const std::string &func_name)
Definition: sqldefs.h:388
static std::shared_ptr< Analyzer::Expr > get(const std::string &)
Definition: ParserNode.cpp:240
std::shared_ptr< Analyzer::Expr > translateCurrentTimestamp() const
std::shared_ptr< Analyzer::Expr > translateBinaryGeoPredicate(const RexFunctionOperator *, SQLTypeInfo &, const bool with_bounds) const
SQLAgg getKind() const
Definition: RelAlgDag.h:799
Definition: sqltypes.h:76
std::shared_ptr< Analyzer::Expr > translateRegexp(const RexFunctionOperator *) const
static bool colvar_comp(const ColumnVar *l, const ColumnVar *r)
Definition: Analyzer.h:215
SQLTypes
Definition: sqltypes.h:65
static constexpr int64_t kSecsPerHour
size_t getOperand(size_t idx) const
Definition: RelAlgDag.h:805
const Executor * executor_
std::shared_ptr< Analyzer::Expr > translateUnlikely(const RexFunctionOperator *) const
const RexScalar * getElse() const
Definition: RelAlgDag.h:445
void collect_column_var(std::set< const ColumnVar *, bool(*)(const ColumnVar *, const ColumnVar *)> &colvar_set, bool include_agg) const override
Definition: Analyzer.h:222
static constexpr int64_t kSecsPerMin
std::shared_ptr< Analyzer::Expr >(RelAlgTranslator::*)(RexScalar const *) const Handler
SQLQualifier
Definition: sqldefs.h:71
static std::shared_ptr< Analyzer::Expr > analyzeValue(const int64_t intval)
Definition: ParserNode.cpp:165
std::shared_ptr< Analyzer::Expr > translateFunction(const RexFunctionOperator *) const
SQLTypeInfo get_nullable_logical_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:1488
#define LOG(tag)
Definition: Logger.h:285
std::shared_ptr< Analyzer::Expr > translateScalarRex(const RexScalar *rex) const
const SQLTypeInfo & getType() const
Definition: RelAlgDag.h:378
size_t size() const
Definition: RelAlgDag.h:364
static constexpr int64_t kMilliSecsPerDay
const RexScalar * getOperand(const size_t idx) const
Definition: RelAlgDag.h:366
std::shared_ptr< Analyzer::Expr > translateUoper(const RexOperator *) const
HOST DEVICE int get_scale() const
Definition: sqltypes.h:396
const Expr * get_right_operand() const
Definition: Analyzer.h:456
const std::vector< SortField > & getCollation() const
Definition: RelAlgDag.h:670
SQLOps
Definition: sqldefs.h:28
std::shared_ptr< Analyzer::Expr > translateDateadd(const RexFunctionOperator *) const
int8_t boolval
Definition: Datum.h:70
static bool isFramingAvailableWindowFunc(SqlWindowFunctionKind kind)
Definition: Analyzer.h:2835
static std::shared_ptr< Analyzer::Expr > normalize(const SQLOps optype, const SQLQualifier qual, std::shared_ptr< Analyzer::Expr > left_expr, std::shared_ptr< Analyzer::Expr > right_expr, const Executor *executor=nullptr)
Definition: ParserNode.cpp:379
Definition: sqldefs.h:37
#define UNREACHABLE()
Definition: Logger.h:338
std::shared_ptr< Analyzer::Expr > translateIntervalExprForWindowFraming(std::shared_ptr< Analyzer::Expr > order_key, bool for_preceding_bound, const Analyzer::BinOper *frame_bound_expr) const
#define CHECK_GE(x, y)
Definition: Logger.h:306
std::shared_ptr< Analyzer::Expr > translateAbs(const RexFunctionOperator *) const
Definition: sqldefs.h:48
Definition: sqldefs.h:29
const RexScalar * getWhen(const size_t idx) const
Definition: RelAlgDag.h:435
std::shared_ptr< Analyzer::Expr > ExpressionPtr
Definition: Analyzer.h:184
std::string getString(int32_t string_id) const
std::shared_ptr< Analyzer::Expr > getInIntegerSetExpr(std::shared_ptr< Analyzer::Expr > arg, const ResultSet &val_set) const
Definition: sqldefs.h:40
SQLTypeInfo get_agg_type(const SQLAgg agg_kind, const Analyzer::Expr *arg_expr)
std::shared_ptr< Analyzer::Expr > translateItem(const RexFunctionOperator *) const
std::string toString(const QueryDescriptionType &type)
Definition: Types.h:64
std::shared_ptr< Analyzer::Constant > makeNumericConstant(const SQLTypeInfo &ti, const long val)
Analyzer::ExpressionPtr rewrite_expr(const Analyzer::Expr *expr)
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:391
bool operator()(IndexedHandler const &pair) const
QualsConjunctiveForm qual_to_conjunctive_form(const std::shared_ptr< Analyzer::Expr > qual_expr)
#define TRANSIENT_DICT_ID
Definition: DbObjectKeys.h:24
bool is_number() const
Definition: sqltypes.h:574
#define CHECK_GT(x, y)
Definition: Logger.h:305
bool is_agg_supported_for_type(const SQLAgg &agg_kind, const SQLTypeInfo &arg_ti)
std::shared_ptr< Analyzer::Expr > translateGeoProjection(const RexFunctionOperator *, SQLTypeInfo &, const bool with_bounds) const
double inline_fp_null_val(const SQL_TYPE_INFO &ti)
int32_t intval
Definition: Datum.h:73
bool is_time() const
Definition: sqltypes.h:577
std::shared_ptr< Analyzer::Expr > translateOper(const RexOperator *) const
std::string to_string(char const *&&v)
std::shared_ptr< Analyzer::Expr > translateDatediff(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Expr > translateInput(const RexInput *) const
std::shared_ptr< Analyzer::Expr > translateSign(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Expr > translateUnaryGeoFunction(const RexFunctionOperator *) const
static constexpr int64_t kMilliSecsPerMin
bool g_enable_string_functions
static constexpr int64_t kMilliSecsPerSec
std::shared_ptr< Analyzer::Expr > translateBoundingBoxIntersectOper(const RexOperator *) const
ExtractField to_datepart_field(const std::string &field)
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:33
robin_hood::unordered_map< RexScalar const *, std::shared_ptr< Analyzer::Expr > > cache_
unsigned getIndex() const
Definition: RelAlgDag.h:174
Supported runtime functions management and retrieval.
future< Result > async(Fn &&fn, Args &&...args)
static SysCatalog & instance()
Definition: SysCatalog.h:343
static std::shared_ptr< Analyzer::Expr > translateLiteral(const RexLiteral *)
SQLOps getOperator() const
Definition: RelAlgDag.h:376
bool window_function_is_value(const SqlWindowFunctionKind kind)
Definition: WindowContext.h:29
static constexpr int32_t INVALID_STR_ID
CONSTEXPR DEVICE bool is_null(const T &value)
Classes representing a parse tree.
std::shared_ptr< Analyzer::Expr > translateDatetime(const RexFunctionOperator *) const
void set_fixed_size()
Definition: sqltypes.h:477
DateaddField
Definition: DateAdd.h:42
void fill_dictionary_encoded_in_vals(std::vector< int64_t > &in_vals, std::atomic< size_t > &total_in_vals_count, const ResultSet *values_rowset, const std::pair< int64_t, int64_t > values_rowset_slice, const StringDictionaryProxy *source_dict, const StringDictionaryProxy *dest_dict, const int64_t needle_null_val)
#define CHECK_NE(x, y)
Definition: Logger.h:302
const std::shared_ptr< Analyzer::Expr > generate() const
std::shared_ptr< Analyzer::Expr > translateStringOper(const RexFunctionOperator *) const
static std::shared_ptr< Analyzer::Expr > get(std::shared_ptr< Analyzer::Expr > arg_expr, std::shared_ptr< Analyzer::Expr > pattern_expr, std::shared_ptr< Analyzer::Expr > escape_expr, const bool is_not)
Definition: ParserNode.cpp:795
size_t determineTimeValMultiplierForTimeType(const SQLTypes &window_frame_bound_type, const Analyzer::Constant *const_expr)
void set_scale(int s)
Definition: sqltypes.h:473
int64_t bigintval
Definition: Datum.h:74
bool is_timeinterval() const
Definition: sqltypes.h:592
std::pair< std::shared_ptr< Analyzer::Expr >, SQLQualifier > getQuantifiedRhs(const RexScalar *) const
std::vector< Analyzer::OrderEntry > translate_collation(const std::vector< SortField > &sort_fields)
size_t branchCount() const
Definition: RelAlgDag.h:433
std::shared_ptr< Analyzer::Expr > translateCurrentTime() const
bool g_bigint_count
Definition: sqldefs.h:36
bool is_distinct_supported(SQLAgg const agg_kind)
bool g_enable_watchdog
Definition: sqldefs.h:71
int16_t smallintval
Definition: Datum.h:72
SQLTypeInfo build_type_info(const SQLTypes sql_type, const int scale, const int precision)
DatetruncField to_datediff_field(const std::string &field)
bool is_boolean() const
Definition: sqltypes.h:580
std::array< IndexedHandler, sizeof...(Ts)> makeHandlers()
const RexWindowBound & getFrameEndBound() const
Definition: RelAlgDag.h:674
std::shared_ptr< Analyzer::Expr > translate(const RexScalar *rex) const
std::tuple< T, std::vector< SQLTypeInfo > > bind_function(std::string name, Analyzer::ExpressionPtrVector func_args, const std::vector< T > &ext_funcs, const std::string processor)
Argument type based extension function binding.
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:79
std::string * stringval
Definition: Datum.h:79
std::shared_ptr< Analyzer::Expr > translatePCAProject(const RexFunctionOperator *) const
const std::unordered_map< const RelAlgNode *, int > input_to_nest_level_
#define UNLIKELY(x)
Definition: likely.h:25
void translate_string_ids(std::vector< int32_t > &dest_ids, const LeafHostInfo &dict_server_host, const shared::StringDictKey &dest_dict_key, const std::vector< int32_t > &source_ids, const shared::StringDictKey &source_dict_key, const int32_t dest_generation)
Definition: sqldefs.h:33
void set_comp_param(int p)
Definition: sqltypes.h:480
std::shared_ptr< Catalog > getCatalog(const std::string &dbName)
#define CHECK_LT(x, y)
Definition: Logger.h:303
Definition: sqltypes.h:79
Definition: sqltypes.h:80
Analyzer::ExpressionPtrVector translateFunctionArgs(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Expr > translateUnaryGeoPredicate(const RexFunctionOperator *, SQLTypeInfo &, const bool with_bounds) const
Definition: sqldefs.h:39
Definition: sqldefs.h:71
const ConstRexScalarPtrVector & getPartitionKeys() const
Definition: RelAlgDag.h:643
static std::shared_ptr< Analyzer::Expr > analyzeValue(const std::string &stringval, const bool is_null)
Definition: ParserNode.cpp:146
DEVICE auto lower_bound(ARGS &&...args)
Definition: gpu_enabled.h:78
#define CHECK_LE(x, y)
Definition: Logger.h:304
const RexWindowBound & getFrameStartBound() const
Definition: RelAlgDag.h:672
std::shared_ptr< Analyzer::Expr > translateUnaryGeoConstructor(const RexFunctionOperator *, SQLTypeInfo &, const bool with_bounds) const
std::shared_ptr< Analyzer::Expr > translateArrayFunction(const RexFunctionOperator *) const
static std::shared_ptr< Analyzer::Expr > get(std::shared_ptr< Analyzer::Expr > arg_expr, std::shared_ptr< Analyzer::Expr > like_expr, std::shared_ptr< Analyzer::Expr > escape_expr, const bool is_ilike, const bool is_not)
Definition: ParserNode.cpp:700
std::pair< std::type_index, Handler > IndexedHandler
void setStringDictKey(const shared::StringDictKey &dict_key)
Definition: sqltypes.h:1061
static RelRexToStringConfig defaults()
Definition: RelAlgDag.h:78
Datum get_constval() const
Definition: Analyzer.h:348
std::shared_ptr< Analyzer::Expr > translateMLPredict(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Expr > translateCurrentUser(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Expr > translateSampleRatio(const RexFunctionOperator *) const
SqlWindowFunctionKind getKind() const
Definition: RelAlgDag.h:641
std::shared_ptr< Analyzer::Expr > translateLike(const RexFunctionOperator *) const
const RelAlgNode * getSourceNode() const
Definition: RelAlgDag.h:1056
Definition: sqltypes.h:68
bool takes_arg(const TargetInfo &target_info)
static std::shared_ptr< Analyzer::Expr > analyzeValue(const int64_t numericval, const int scale, const int precision)
Definition: ParserNode.cpp:190
ExtractField
std::shared_ptr< Analyzer::Expr > translateLikely(const RexFunctionOperator *) const
static const StringDictKey kTransientDictKey
Definition: DbObjectKeys.h:45
Definition: sqldefs.h:52
std::shared_ptr< Analyzer::Expr > get_in_values_expr(std::shared_ptr< Analyzer::Expr > arg, const ResultSet &val_set)
static std::shared_ptr< Analyzer::Expr > get(const int64_t)
Definition: ParserNode.cpp:226
bool isDistinct() const
Definition: RelAlgDag.h:801
void set_notnull(bool n)
Definition: sqltypes.h:475
static constexpr int64_t kMilliSecsPerHour
#define CHECK(condition)
Definition: Logger.h:291
std::shared_ptr< Analyzer::Expr > translateTernaryGeoFunction(const RexFunctionOperator *) const
const ConstRexScalarPtrVector & getOrderKeys() const
Definition: RelAlgDag.h:653
std::vector< std::shared_ptr< Analyzer::Expr > > qual_to_disjunctive_form(const std::shared_ptr< Analyzer::Expr > &qual_expr)
std::shared_ptr< Analyzer::Expr > translateBinaryGeoFunction(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Constant > make_fp_constant(const int64_t val, const SQLTypeInfo &ti)
std::pair< Datum, bool > datum_from_scalar_tv(const ScalarTargetValue *scalar_tv, const SQLTypeInfo &ti) noexcept
std::shared_ptr< Analyzer::Expr > translateWidthBucket(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Expr > translateInOper(const RexOperator *) const
uint64_t exp_to_scale(const unsigned exp)
size_t size() const
Definition: RelAlgDag.h:803
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
std::vector< ExpressionPtr > ExpressionPtrVector
Definition: Analyzer.h:186
bool g_cluster
Definition: sqldefs.h:32
const Expr * get_left_operand() const
Definition: Analyzer.h:455
bool isRows() const
Definition: RelAlgDag.h:676
std::shared_ptr< Analyzer::Expr > translateCase(const RexCase *) const
std::shared_ptr< Analyzer::Expr > translateFunctionWithGeoArg(const RexFunctionOperator *) const
Definition: sqltypes.h:72
bool any_of(std::vector< Analyzer::Expr * > const &target_exprs)
std::shared_ptr< const query_state::QueryState > query_state_
const std::string & getName() const
Definition: RelAlgDag.h:506
bool is_string() const
Definition: sqltypes.h:559
std::shared_ptr< Analyzer::Expr > translateCurrentDate() const
std::string get_datetimeplus_rewrite_funcname(const SQLOps &op)
void validate_datetime_datepart_argument(const std::shared_ptr< Analyzer::Constant > literal_expr)
std::shared_ptr< Analyzer::Expr > translateCardinality(const RexFunctionOperator *) const
int cpu_threads()
Definition: thread_count.h:25
const bool just_explain_
Definition: Datum.h:69
const std::vector< TargetMetaInfo > & getOutputMetainfo() const
Definition: RelAlgDag.h:865
bool is_decimal() const
Definition: sqltypes.h:568
std::shared_ptr< Analyzer::Expr > translateGeoComparison(const RexOperator *) const
std::shared_ptr< Analyzer::Expr > translateDatePlusMinus(const RexOperator *) const
const std::list< std::shared_ptr< Analyzer::Expr > > simple_quals
std::shared_ptr< Analyzer::Expr > translateHPTLiteral(const RexFunctionOperator *) const
bool is_distinct(const size_t input_idx, const RelAlgNode *node)
int32_t getIdOfString(const std::string &str) const
std::shared_ptr< Analyzer::Expr > translateDatepart(const RexFunctionOperator *) const
bool can_use_parallel_algorithms(const ResultSet &rows)
Definition: ResultSet.cpp:1581
std::shared_ptr< Analyzer::Expr > translateBinaryGeoConstructor(const RexFunctionOperator *, SQLTypeInfo &, const bool with_bounds) const
Definition: sqldefs.h:38
std::shared_ptr< Analyzer::Expr > rewrite_to_date_trunc(const Analyzer::FunctionOper *dt_plus)
Definition: sqldefs.h:83
SQLTypeInfo ext_arg_type_to_type_info(const ExtArgumentType ext_arg_type)
#define VLOG(n)
Definition: Logger.h:388
DateaddField to_dateadd_field(const std::string &field)
std::shared_ptr< Analyzer::Expr > fold_expr(const Analyzer::Expr *expr)
void set_precision(int d)
Definition: sqltypes.h:471
std::shared_ptr< Analyzer::Expr > translateGeoBoundingBoxIntersectOper(const RexOperator *) const
#define IS_COMPARISON(X)
Definition: sqldefs.h:58
double doubleval
Definition: Datum.h:76
void fill_integer_in_vals(std::vector< int64_t > &in_vals, std::atomic< size_t > &total_in_vals_count, const ResultSet *values_rowset, const std::pair< int64_t, int64_t > values_rowset_slice)
std::shared_ptr< Analyzer::Expr > translateKeyForString(const RexFunctionOperator *) const
static std::shared_ptr< Analyzer::Expr > translateAggregateRex(const RexAgg *rex, const std::vector< std::shared_ptr< Analyzer::Expr >> &scalar_sources)
std::shared_ptr< Analyzer::Expr > translateWindowFunction(const RexWindowFunctionOperator *) const
const std::shared_ptr< Analyzer::Expr > generate() const
std::shared_ptr< Analyzer::Expr > translateScalarSubquery(const RexSubQuery *) const
boost::variant< int64_t, double, float, NullableString > ScalarTargetValue
Definition: TargetValue.h:180
std::shared_ptr< Analyzer::Expr > translateLength(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Expr > translateExtract(const RexFunctionOperator *) const
ExtractField determineTimeUnit(const SQLTypes &window_frame_bound_type, const Analyzer::Constant *const_expr)
HOST DEVICE void set_type(SQLTypes t)
Definition: sqltypes.h:468