OmniSciDB  94e8789169
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
RelAlgTranslator.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "RelAlgTranslator.h"
18 #include "Shared/SqlTypesLayout.h"
19 
21 #include "DateTimePlusRewrite.h"
22 #include "DateTimeTranslator.h"
24 #include "ExpressionRewrite.h"
27 #include "RelAlgDagBuilder.h"
28 #include "WindowContext.h"
29 
30 #include <future>
31 
32 #include "Analyzer/Analyzer.h"
33 #include "Parser/ParserNode.h"
34 #include "Shared/likely.h"
35 #include "Shared/thread_count.h"
36 
37 extern bool g_enable_watchdog;
38 
40 
41 namespace {
42 
44  const int scale,
45  const int precision) {
46  SQLTypeInfo ti(sql_type, 0, 0, true);
47  if (ti.is_decimal()) {
48  ti.set_scale(scale);
49  ti.set_precision(precision);
50  }
51  return ti;
52 }
53 
54 std::pair<std::shared_ptr<Analyzer::Expr>, SQLQualifier> get_quantified_rhs(
55  const RexScalar* rex_scalar,
56  const RelAlgTranslator& translator) {
57  std::shared_ptr<Analyzer::Expr> rhs;
58  SQLQualifier sql_qual{kONE};
59  const auto rex_operator = dynamic_cast<const RexOperator*>(rex_scalar);
60  if (!rex_operator) {
61  return std::make_pair(rhs, sql_qual);
62  }
63  const auto rex_function = dynamic_cast<const RexFunctionOperator*>(rex_operator);
64  const auto qual_str = rex_function ? rex_function->getName() : "";
65  if (qual_str == "PG_ANY"sv || qual_str == "PG_ALL"sv) {
66  CHECK_EQ(size_t(1), rex_function->size());
67  rhs = translator.translateScalarRex(rex_function->getOperand(0));
68  sql_qual = (qual_str == "PG_ANY"sv) ? kANY : kALL;
69  }
70  if (!rhs && rex_operator->getOperator() == kCAST) {
71  CHECK_EQ(size_t(1), rex_operator->size());
72  std::tie(rhs, sql_qual) = get_quantified_rhs(rex_operator->getOperand(0), translator);
73  }
74  return std::make_pair(rhs, sql_qual);
75 }
76 
77 std::pair<Datum, bool> datum_from_scalar_tv(const ScalarTargetValue* scalar_tv,
78  const SQLTypeInfo& ti) noexcept {
79  Datum d{0};
80  bool is_null_const{false};
81  switch (ti.get_type()) {
82  case kBOOLEAN: {
83  const auto ival = boost::get<int64_t>(scalar_tv);
84  CHECK(ival);
85  if (*ival == inline_int_null_val(ti)) {
86  is_null_const = true;
87  } else {
88  d.boolval = *ival;
89  }
90  break;
91  }
92  case kTINYINT: {
93  const auto ival = boost::get<int64_t>(scalar_tv);
94  CHECK(ival);
95  if (*ival == inline_int_null_val(ti)) {
96  is_null_const = true;
97  } else {
98  d.tinyintval = *ival;
99  }
100  break;
101  }
102  case kSMALLINT: {
103  const auto ival = boost::get<int64_t>(scalar_tv);
104  CHECK(ival);
105  if (*ival == inline_int_null_val(ti)) {
106  is_null_const = true;
107  } else {
108  d.smallintval = *ival;
109  }
110  break;
111  }
112  case kINT: {
113  const auto ival = boost::get<int64_t>(scalar_tv);
114  CHECK(ival);
115  if (*ival == inline_int_null_val(ti)) {
116  is_null_const = true;
117  } else {
118  d.intval = *ival;
119  }
120  break;
121  }
122  case kDECIMAL:
123  case kNUMERIC:
124  case kBIGINT:
125  case kDATE:
126  case kTIME:
127  case kTIMESTAMP: {
128  const auto ival = boost::get<int64_t>(scalar_tv);
129  CHECK(ival);
130  if (*ival == inline_int_null_val(ti)) {
131  is_null_const = true;
132  } else {
133  d.bigintval = *ival;
134  }
135  break;
136  }
137  case kDOUBLE: {
138  const auto dval = boost::get<double>(scalar_tv);
139  CHECK(dval);
140  if (*dval == inline_fp_null_val(ti)) {
141  is_null_const = true;
142  } else {
143  d.doubleval = *dval;
144  }
145  break;
146  }
147  case kFLOAT: {
148  const auto fval = boost::get<float>(scalar_tv);
149  CHECK(fval);
150  if (*fval == inline_fp_null_val(ti)) {
151  is_null_const = true;
152  } else {
153  d.floatval = *fval;
154  }
155  break;
156  }
157  case kTEXT:
158  case kVARCHAR:
159  case kCHAR: {
160  auto nullable_sptr = boost::get<NullableString>(scalar_tv);
161  CHECK(nullable_sptr);
162  if (boost::get<void*>(nullable_sptr)) {
163  is_null_const = true;
164  } else {
165  auto sptr = boost::get<std::string>(nullable_sptr);
166  d.stringval = new std::string(*sptr);
167  }
168  break;
169  }
170  default:
171  CHECK(false) << "Unhandled type: " << ti.get_type_name();
172  }
173  return {d, is_null_const};
174 }
175 
176 } // namespace
177 
178 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateScalarRex(
179  const RexScalar* rex) const {
180  const auto rex_input = dynamic_cast<const RexInput*>(rex);
181  if (rex_input) {
182  return translateInput(rex_input);
183  }
184  const auto rex_literal = dynamic_cast<const RexLiteral*>(rex);
185  if (rex_literal) {
186  return translateLiteral(rex_literal);
187  }
188  const auto rex_window_function = dynamic_cast<const RexWindowFunctionOperator*>(rex);
189  if (rex_window_function) {
190  return translateWindowFunction(rex_window_function);
191  }
192  const auto rex_function = dynamic_cast<const RexFunctionOperator*>(rex);
193  if (rex_function) {
194  return translateFunction(rex_function);
195  }
196  const auto rex_operator = dynamic_cast<const RexOperator*>(rex);
197  if (rex_operator) {
198  return translateOper(rex_operator);
199  }
200  const auto rex_case = dynamic_cast<const RexCase*>(rex);
201  if (rex_case) {
202  return translateCase(rex_case);
203  }
204  const auto rex_subquery = dynamic_cast<const RexSubQuery*>(rex);
205  if (rex_subquery) {
206  return translateScalarSubquery(rex_subquery);
207  }
208  CHECK(false);
209  return nullptr;
210 }
211 
212 namespace {
213 
214 bool is_agg_supported_for_type(const SQLAgg& agg_kind, const SQLTypeInfo& arg_ti) {
215  if ((agg_kind == kMIN || agg_kind == kMAX || agg_kind == kSUM || agg_kind == kAVG) &&
216  !(arg_ti.is_number() || arg_ti.is_boolean() || arg_ti.is_time())) {
217  return false;
218  }
219 
220  return true;
221 }
222 
223 } // namespace
224 
225 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateAggregateRex(
226  const RexAgg* rex,
227  const std::vector<std::shared_ptr<Analyzer::Expr>>& scalar_sources) {
228  const auto agg_kind = rex->getKind();
229  const bool is_distinct = rex->isDistinct();
230  const bool takes_arg{rex->size() > 0};
231  std::shared_ptr<Analyzer::Expr> arg_expr;
232  std::shared_ptr<Analyzer::Constant> err_rate;
233  if (takes_arg) {
234  const auto operand = rex->getOperand(0);
235  CHECK_LT(operand, scalar_sources.size());
236  CHECK_LE(rex->size(), 2u);
237  arg_expr = scalar_sources[operand];
238  if (agg_kind == kAPPROX_COUNT_DISTINCT && rex->size() == 2) {
239  err_rate = std::dynamic_pointer_cast<Analyzer::Constant>(
240  scalar_sources[rex->getOperand(1)]);
241  if (!err_rate || err_rate->get_type_info().get_type() != kINT ||
242  err_rate->get_constval().intval < 1 || err_rate->get_constval().intval > 100) {
243  throw std::runtime_error(
244  "APPROX_COUNT_DISTINCT's second parameter should be SMALLINT literal between "
245  "1 and 100");
246  }
247  }
248  const auto& arg_ti = arg_expr->get_type_info();
249  if (!is_agg_supported_for_type(agg_kind, arg_ti)) {
250  throw std::runtime_error("Aggregate on " + arg_ti.get_type_name() +
251  " is not supported yet.");
252  }
253  }
254  const auto agg_ti = get_agg_type(agg_kind, arg_expr.get());
255  return makeExpr<Analyzer::AggExpr>(agg_ti, agg_kind, arg_expr, is_distinct, err_rate);
256 }
257 
258 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateLiteral(
259  const RexLiteral* rex_literal) {
260  auto lit_ti = build_type_info(
261  rex_literal->getType(), rex_literal->getScale(), rex_literal->getPrecision());
262  auto target_ti = build_type_info(rex_literal->getTargetType(),
263  rex_literal->getTypeScale(),
264  rex_literal->getTypePrecision());
265  switch (rex_literal->getType()) {
266  case kINT:
267  case kBIGINT: {
268  Datum d;
269  d.bigintval = rex_literal->getVal<int64_t>();
270  return makeExpr<Analyzer::Constant>(rex_literal->getType(), false, d);
271  }
272  case kDECIMAL: {
273  const auto val = rex_literal->getVal<int64_t>();
274  const int precision = rex_literal->getPrecision();
275  const int scale = rex_literal->getScale();
276  if (target_ti.is_fp() && !scale) {
277  return make_fp_constant(val, target_ti);
278  }
279  auto lit_expr = scale ? Parser::FixedPtLiteral::analyzeValue(val, scale, precision)
281  return lit_ti != target_ti ? lit_expr->add_cast(target_ti) : lit_expr;
282  }
283  case kTEXT: {
284  return Parser::StringLiteral::analyzeValue(rex_literal->getVal<std::string>());
285  }
286  case kBOOLEAN: {
287  Datum d;
288  d.boolval = rex_literal->getVal<bool>();
289  return makeExpr<Analyzer::Constant>(kBOOLEAN, false, d);
290  }
291  case kDOUBLE: {
292  Datum d;
293  d.doubleval = rex_literal->getVal<double>();
294  auto lit_expr = makeExpr<Analyzer::Constant>(kDOUBLE, false, d);
295  return lit_ti != target_ti ? lit_expr->add_cast(target_ti) : lit_expr;
296  }
297  case kINTERVAL_DAY_TIME:
298  case kINTERVAL_YEAR_MONTH: {
299  Datum d;
300  d.bigintval = rex_literal->getVal<int64_t>();
301  return makeExpr<Analyzer::Constant>(rex_literal->getType(), false, d);
302  }
303  case kTIME:
304  case kTIMESTAMP: {
305  Datum d;
306  d.bigintval =
307  rex_literal->getType() == kTIMESTAMP && rex_literal->getPrecision() > 0
308  ? rex_literal->getVal<int64_t>()
309  : rex_literal->getVal<int64_t>() / 1000;
310  return makeExpr<Analyzer::Constant>(
311  SQLTypeInfo(rex_literal->getType(), rex_literal->getPrecision(), 0, false),
312  false,
313  d);
314  }
315  case kDATE: {
316  Datum d;
317  d.bigintval = rex_literal->getVal<int64_t>() * 24 * 3600;
318  return makeExpr<Analyzer::Constant>(rex_literal->getType(), false, d);
319  }
320  case kNULLT: {
321  if (target_ti.is_array()) {
323  // defaulting to valid sub-type for convenience
324  target_ti.set_subtype(kBOOLEAN);
325  return makeExpr<Analyzer::ArrayExpr>(target_ti, args, true);
326  }
327  return makeExpr<Analyzer::Constant>(rex_literal->getTargetType(), true, Datum{0});
328  }
329  default: {
330  LOG(FATAL) << "Unexpected literal type " << lit_ti.get_type_name();
331  }
332  }
333  return nullptr;
334 }
335 
336 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateScalarSubquery(
337  const RexSubQuery* rex_subquery) const {
338  if (just_explain_) {
339  throw std::runtime_error("EXPLAIN is not supported with sub-queries");
340  }
341  CHECK(rex_subquery);
342  auto result = rex_subquery->getExecutionResult();
343  auto row_set = result->getRows();
344  const size_t row_count = row_set->rowCount();
345  if (row_count > size_t(1)) {
346  throw std::runtime_error("Scalar sub-query returned multiple rows");
347  }
348  if (row_count == size_t(0)) {
349  if (row_set->isValidationOnlyRes()) {
350  Datum d{0};
351  return makeExpr<Analyzer::Constant>(rex_subquery->getType(), false, d);
352  }
353  throw std::runtime_error("Scalar sub-query returned no results");
354  }
355  CHECK_EQ(row_count, size_t(1));
356  row_set->moveToBegin();
357  auto first_row = row_set->getNextRow(false, false);
358  CHECK_EQ(first_row.size(), size_t(1));
359  auto scalar_tv = boost::get<ScalarTargetValue>(&first_row[0]);
360  auto ti = rex_subquery->getType();
361  if (ti.is_string()) {
362  throw std::runtime_error("Scalar sub-queries which return strings not supported");
363  }
364  Datum d{0};
365  bool is_null_const{false};
366  std::tie(d, is_null_const) = datum_from_scalar_tv(scalar_tv, ti);
367  return makeExpr<Analyzer::Constant>(ti, is_null_const, d);
368 }
369 
370 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateInput(
371  const RexInput* rex_input) const {
372  const auto source = rex_input->getSourceNode();
373  const auto it_rte_idx = input_to_nest_level_.find(source);
374  CHECK(it_rte_idx != input_to_nest_level_.end())
375  << "Not found in input_to_nest_level_, source=" << source->toString();
376  const int rte_idx = it_rte_idx->second;
377  const auto scan_source = dynamic_cast<const RelScan*>(source);
378  const auto& in_metainfo = source->getOutputMetainfo();
379  if (scan_source) {
380  // We're at leaf (scan) level and not supposed to have input metadata,
381  // the name and type information come directly from the catalog.
382  CHECK(in_metainfo.empty());
383  const auto table_desc = scan_source->getTableDescriptor();
384  const auto cd =
385  cat_.getMetadataForColumnBySpi(table_desc->tableId, rex_input->getIndex() + 1);
386  CHECK(cd);
387  auto col_ti = cd->columnType;
388  if (col_ti.is_string()) {
389  col_ti.set_type(kTEXT);
390  }
391  if (cd->isVirtualCol) {
392  // TODO(alex): remove at some point, we only need this fixup for backwards
393  // compatibility with old imported data
394  CHECK_EQ("rowid", cd->columnName);
395  col_ti.set_size(8);
396  }
397  CHECK_LE(static_cast<size_t>(rte_idx), join_types_.size());
398  if (rte_idx > 0 && join_types_[rte_idx - 1] == JoinType::LEFT) {
399  col_ti.set_notnull(false);
400  }
401  return std::make_shared<Analyzer::ColumnVar>(
402  col_ti, table_desc->tableId, cd->columnId, rte_idx);
403  }
404  CHECK(!in_metainfo.empty()) << "for " << source->toString();
405  CHECK_GE(rte_idx, 0);
406  const size_t col_id = rex_input->getIndex();
407  CHECK_LT(col_id, in_metainfo.size());
408  auto col_ti = in_metainfo[col_id].get_type_info();
409 
410  if (join_types_.size() > 0) {
411  CHECK_LE(static_cast<size_t>(rte_idx), join_types_.size());
412  if (rte_idx > 0 && join_types_[rte_idx - 1] == JoinType::LEFT) {
413  col_ti.set_notnull(false);
414  }
415  }
416 
417  return std::make_shared<Analyzer::ColumnVar>(col_ti, -source->getId(), col_id, rte_idx);
418 }
419 
420 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateUoper(
421  const RexOperator* rex_operator) const {
422  CHECK_EQ(size_t(1), rex_operator->size());
423  const auto operand_expr = translateScalarRex(rex_operator->getOperand(0));
424  const auto sql_op = rex_operator->getOperator();
425  switch (sql_op) {
426  case kCAST: {
427  const auto& target_ti = rex_operator->getType();
428  CHECK_NE(kNULLT, target_ti.get_type());
429  const auto& operand_ti = operand_expr->get_type_info();
430  if (operand_ti.is_string() && target_ti.is_string()) {
431  return operand_expr;
432  }
433  if (target_ti.is_time() ||
434  operand_ti
435  .is_string()) { // TODO(alex): check and unify with the rest of the cases
436  // Do not propogate encoding on small dates
437  return target_ti.is_date_in_days()
438  ? operand_expr->add_cast(SQLTypeInfo(kDATE, false))
439  : operand_expr->add_cast(target_ti);
440  }
441  if (!operand_ti.is_string() && target_ti.is_string()) {
442  return operand_expr->add_cast(target_ti);
443  }
444 
445  return std::make_shared<Analyzer::UOper>(target_ti, false, sql_op, operand_expr);
446  }
447  case kNOT:
448  case kISNULL: {
449  return std::make_shared<Analyzer::UOper>(kBOOLEAN, sql_op, operand_expr);
450  }
451  case kISNOTNULL: {
452  auto is_null = std::make_shared<Analyzer::UOper>(kBOOLEAN, kISNULL, operand_expr);
453  return std::make_shared<Analyzer::UOper>(kBOOLEAN, kNOT, is_null);
454  }
455  case kMINUS: {
456  const auto& ti = operand_expr->get_type_info();
457  return std::make_shared<Analyzer::UOper>(ti, false, kUMINUS, operand_expr);
458  }
459  case kUNNEST: {
460  const auto& ti = operand_expr->get_type_info();
461  CHECK(ti.is_array());
462  return makeExpr<Analyzer::UOper>(ti.get_elem_type(), false, kUNNEST, operand_expr);
463  }
464  default:
465  CHECK(false);
466  }
467  return nullptr;
468 }
469 
470 namespace {
471 
472 std::shared_ptr<Analyzer::Expr> get_in_values_expr(std::shared_ptr<Analyzer::Expr> arg,
473  const ResultSet& val_set) {
475  return nullptr;
476  }
477  if (val_set.rowCount() > 5000000 && g_enable_watchdog) {
478  throw std::runtime_error(
479  "Unable to handle 'expr IN (subquery)', subquery returned 5M+ rows.");
480  }
481  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
482  const size_t fetcher_count = cpu_threads();
483  std::vector<std::list<std::shared_ptr<Analyzer::Expr>>> expr_set(
484  fetcher_count, std::list<std::shared_ptr<Analyzer::Expr>>());
485  std::vector<std::future<void>> fetcher_threads;
486  const auto& ti = arg->get_type_info();
487  const auto entry_count = val_set.entryCount();
488  for (size_t i = 0,
489  start_entry = 0,
490  stride = (entry_count + fetcher_count - 1) / fetcher_count;
491  i < fetcher_count && start_entry < entry_count;
492  ++i, start_entry += stride) {
493  const auto end_entry = std::min(start_entry + stride, entry_count);
494  fetcher_threads.push_back(std::async(
495  std::launch::async,
496  [&](std::list<std::shared_ptr<Analyzer::Expr>>& in_vals,
497  const size_t start,
498  const size_t end) {
499  for (auto index = start; index < end; ++index) {
500  auto row = val_set.getRowAt(index);
501  if (row.empty()) {
502  continue;
503  }
504  auto scalar_tv = boost::get<ScalarTargetValue>(&row[0]);
505  Datum d{0};
506  bool is_null_const{false};
507  std::tie(d, is_null_const) = datum_from_scalar_tv(scalar_tv, ti);
508  if (ti.is_string() && ti.get_compression() != kENCODING_NONE) {
509  auto ti_none_encoded = ti;
510  ti_none_encoded.set_compression(kENCODING_NONE);
511  auto none_encoded_string =
512  makeExpr<Analyzer::Constant>(ti, is_null_const, d);
513  auto dict_encoded_string = std::make_shared<Analyzer::UOper>(
514  ti, false, kCAST, none_encoded_string);
515  in_vals.push_back(dict_encoded_string);
516  } else {
517  in_vals.push_back(makeExpr<Analyzer::Constant>(ti, is_null_const, d));
518  }
519  }
520  },
521  std::ref(expr_set[i]),
522  start_entry,
523  end_entry));
524  }
525  for (auto& child : fetcher_threads) {
526  child.get();
527  }
528 
529  val_set.moveToBegin();
530  for (auto& exprs : expr_set) {
531  value_exprs.splice(value_exprs.end(), exprs);
532  }
533  return makeExpr<Analyzer::InValues>(arg, value_exprs);
534 }
535 
536 } // namespace
537 
538 // Creates an Analyzer expression for an IN subquery which subsequently goes through the
539 // regular Executor::codegen() mechanism. The creation of the expression out of
540 // subquery's result set is parallelized whenever possible. In addition, take advantage
541 // of additional information that elements in the right hand side are constants; see
542 // getInIntegerSetExpr().
543 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateInOper(
544  const RexOperator* rex_operator) const {
545  if (just_explain_) {
546  throw std::runtime_error("EXPLAIN is not supported with sub-queries");
547  }
548  CHECK(rex_operator->size() == 2);
549  const auto lhs = translateScalarRex(rex_operator->getOperand(0));
550  const auto rhs = rex_operator->getOperand(1);
551  const auto rex_subquery = dynamic_cast<const RexSubQuery*>(rhs);
552  CHECK(rex_subquery);
553  auto ti = lhs->get_type_info();
554  auto result = rex_subquery->getExecutionResult();
555  CHECK(result);
556  auto& row_set = result->getRows();
557  CHECK_EQ(size_t(1), row_set->colCount());
558  const auto& rhs_ti = row_set->getColType(0);
559  if (rhs_ti.get_type() != ti.get_type()) {
560  throw std::runtime_error(
561  "The two sides of the IN operator must have the same type; found " +
562  ti.get_type_name() + " and " + rhs_ti.get_type_name());
563  }
564  row_set->moveToBegin();
565  if (row_set->entryCount() > 10000) {
566  std::shared_ptr<Analyzer::Expr> expr;
567  if ((ti.is_integer() || (ti.is_string() && ti.get_compression() == kENCODING_DICT)) &&
568  !row_set->getQueryMemDesc().didOutputColumnar()) {
569  expr = getInIntegerSetExpr(lhs, *row_set);
570  // Handle the highly unlikely case when the InIntegerSet ended up being tiny.
571  // Just let it fall through the usual InValues path at the end of this method,
572  // its codegen knows to use inline comparisons for few values.
573  if (expr && std::static_pointer_cast<Analyzer::InIntegerSet>(expr)
574  ->get_value_list()
575  .size() <= 100) {
576  expr = nullptr;
577  }
578  } else {
579  expr = get_in_values_expr(lhs, *row_set);
580  }
581  if (expr) {
582  return expr;
583  }
584  }
585  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
586  while (true) {
587  auto row = row_set->getNextRow(true, false);
588  if (row.empty()) {
589  break;
590  }
591  if (g_enable_watchdog && value_exprs.size() >= 10000) {
592  throw std::runtime_error(
593  "Unable to handle 'expr IN (subquery)', subquery returned 10000+ rows.");
594  }
595  auto scalar_tv = boost::get<ScalarTargetValue>(&row[0]);
596  Datum d{0};
597  bool is_null_const{false};
598  std::tie(d, is_null_const) = datum_from_scalar_tv(scalar_tv, ti);
599  if (ti.is_string() && ti.get_compression() != kENCODING_NONE) {
600  auto ti_none_encoded = ti;
601  ti_none_encoded.set_compression(kENCODING_NONE);
602  auto none_encoded_string = makeExpr<Analyzer::Constant>(ti, is_null_const, d);
603  auto dict_encoded_string =
604  std::make_shared<Analyzer::UOper>(ti, false, kCAST, none_encoded_string);
605  value_exprs.push_back(dict_encoded_string);
606  } else {
607  value_exprs.push_back(makeExpr<Analyzer::Constant>(ti, is_null_const, d));
608  }
609  }
610  return makeExpr<Analyzer::InValues>(lhs, value_exprs);
611 }
612 
613 namespace {
614 
615 const size_t g_max_integer_set_size{1 << 25};
616 
618  std::vector<int64_t>& in_vals,
619  std::atomic<size_t>& total_in_vals_count,
620  const ResultSet* values_rowset,
621  const std::pair<int64_t, int64_t> values_rowset_slice,
622  const StringDictionaryProxy* source_dict,
623  const StringDictionaryProxy* dest_dict,
624  const int64_t needle_null_val) {
625  CHECK(in_vals.empty());
626  bool dicts_are_equal = source_dict == dest_dict;
627  for (auto index = values_rowset_slice.first; index < values_rowset_slice.second;
628  ++index) {
629  const auto row = values_rowset->getOneColRow(index);
630  if (UNLIKELY(!row.valid)) {
631  continue;
632  }
633  if (dicts_are_equal) {
634  in_vals.push_back(row.value);
635  } else {
636  const int string_id =
637  row.value == needle_null_val
638  ? needle_null_val
639  : dest_dict->getIdOfString(source_dict->getString(row.value));
640  if (string_id != StringDictionary::INVALID_STR_ID) {
641  in_vals.push_back(string_id);
642  }
643  }
644  if (UNLIKELY(g_enable_watchdog && (in_vals.size() & 1023) == 0 &&
645  total_in_vals_count.fetch_add(1024) >= g_max_integer_set_size)) {
646  throw std::runtime_error(
647  "Unable to handle 'expr IN (subquery)', subquery returned 30M+ rows.");
648  }
649  }
650 }
651 
652 void fill_integer_in_vals(std::vector<int64_t>& in_vals,
653  std::atomic<size_t>& total_in_vals_count,
654  const ResultSet* values_rowset,
655  const std::pair<int64_t, int64_t> values_rowset_slice) {
656  CHECK(in_vals.empty());
657  for (auto index = values_rowset_slice.first; index < values_rowset_slice.second;
658  ++index) {
659  const auto row = values_rowset->getOneColRow(index);
660  if (row.valid) {
661  in_vals.push_back(row.value);
662  if (UNLIKELY(g_enable_watchdog && (in_vals.size() & 1023) == 0 &&
663  total_in_vals_count.fetch_add(1024) >= g_max_integer_set_size)) {
664  throw std::runtime_error(
665  "Unable to handle 'expr IN (subquery)', subquery returned 30M+ rows.");
666  }
667  }
668  }
669 }
670 
671 // Multi-node counterpart of the other version. Saves round-trips, which is crucial
672 // for a big right-hand side result. It only handles physical string dictionary ids,
673 // therefore it won't be able to handle a right-hand side sub-query with a CASE
674 // returning literals on some branches. That case isn't hard too handle either, but
675 // it's not clear it's actually important in practice.
676 // RelAlgTranslator::getInIntegerSetExpr makes sure, by checking the encodings, that
677 // this function isn't called in such cases.
679  std::vector<int64_t>& in_vals,
680  std::atomic<size_t>& total_in_vals_count,
681  const ResultSet* values_rowset,
682  const std::pair<int64_t, int64_t> values_rowset_slice,
683  const std::vector<LeafHostInfo>& leaf_hosts,
684  const DictRef source_dict_ref,
685  const DictRef dest_dict_ref,
686  const int32_t dest_generation,
687  const int64_t needle_null_val) {
688  CHECK(in_vals.empty());
689  std::vector<int32_t> source_ids;
690  source_ids.reserve(values_rowset->entryCount());
691  bool has_nulls = false;
692  if (source_dict_ref == dest_dict_ref) {
693  in_vals.reserve(values_rowset_slice.second - values_rowset_slice.first +
694  1); // Add 1 to cover interval
695  for (auto index = values_rowset_slice.first; index < values_rowset_slice.second;
696  ++index) {
697  const auto row = values_rowset->getOneColRow(index);
698  if (!row.valid) {
699  continue;
700  }
701  if (row.value != needle_null_val) {
702  in_vals.push_back(row.value);
703  if (UNLIKELY(g_enable_watchdog && (in_vals.size() & 1023) == 0 &&
704  total_in_vals_count.fetch_add(1024) >= g_max_integer_set_size)) {
705  throw std::runtime_error(
706  "Unable to handle 'expr IN (subquery)', subquery returned 30M+ rows.");
707  }
708  } else {
709  has_nulls = true;
710  }
711  }
712  if (has_nulls) {
713  in_vals.push_back(
714  needle_null_val); // we've deduped null values as an optimization, although
715  // this is not required by consumer
716  }
717  return;
718  }
719  // Code path below is for when dictionaries are not shared
720  for (auto index = values_rowset_slice.first; index < values_rowset_slice.second;
721  ++index) {
722  const auto row = values_rowset->getOneColRow(index);
723  if (row.valid) {
724  if (row.value != needle_null_val) {
725  source_ids.push_back(row.value);
726  } else {
727  has_nulls = true;
728  }
729  }
730  }
731  std::vector<int32_t> dest_ids;
732  translate_string_ids(dest_ids,
733  leaf_hosts.front(),
734  dest_dict_ref,
735  source_ids,
736  source_dict_ref,
737  dest_generation);
738  CHECK_EQ(dest_ids.size(), source_ids.size());
739  in_vals.reserve(dest_ids.size() + (has_nulls ? 1 : 0));
740  if (has_nulls) {
741  in_vals.push_back(needle_null_val);
742  }
743  for (const int32_t dest_id : dest_ids) {
744  if (dest_id != StringDictionary::INVALID_STR_ID) {
745  in_vals.push_back(dest_id);
746  if (UNLIKELY(g_enable_watchdog && (in_vals.size() & 1023) == 0 &&
747  total_in_vals_count.fetch_add(1024) >= g_max_integer_set_size)) {
748  throw std::runtime_error(
749  "Unable to handle 'expr IN (subquery)', subquery returned 30M+ rows.");
750  }
751  }
752  }
753 }
754 
755 } // namespace
756 
757 // The typical IN subquery involves either dictionary-encoded strings or integers.
758 // Analyzer::InValues is a very heavy representation of the right hand side of such
759 // a query since we already know the right hand would be a list of Analyzer::Constant
760 // shared pointers. We can avoid the big overhead of each Analyzer::Constant and the
761 // refcounting associated with shared pointers by creating an abbreviated InIntegerSet
762 // representation of the IN expression which takes advantage of the this information.
763 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::getInIntegerSetExpr(
764  std::shared_ptr<Analyzer::Expr> arg,
765  const ResultSet& val_set) const {
767  return nullptr;
768  }
769  std::vector<int64_t> value_exprs;
770  const size_t fetcher_count = cpu_threads();
771  std::vector<std::vector<int64_t>> expr_set(fetcher_count);
772  std::vector<std::future<void>> fetcher_threads;
773  const auto& arg_type = arg->get_type_info();
774  const auto entry_count = val_set.entryCount();
775  CHECK_EQ(size_t(1), val_set.colCount());
776  const auto& col_type = val_set.getColType(0);
777  if (g_cluster && arg_type.is_string() &&
778  (col_type.get_comp_param() <= 0 || arg_type.get_comp_param() <= 0)) {
779  // Skip this case for now, see comment for fill_dictionary_encoded_in_vals.
780  return nullptr;
781  }
782  std::atomic<size_t> total_in_vals_count{0};
783  for (size_t i = 0,
784  start_entry = 0,
785  stride = (entry_count + fetcher_count - 1) / fetcher_count;
786  i < fetcher_count && start_entry < entry_count;
787  ++i, start_entry += stride) {
788  expr_set[i].reserve(entry_count / fetcher_count);
789  const auto end_entry = std::min(start_entry + stride, entry_count);
790  if (arg_type.is_string()) {
791  CHECK_EQ(kENCODING_DICT, arg_type.get_compression());
792  // const int32_t dest_dict_id = arg_type.get_comp_param();
793  // const int32_t source_dict_id = col_type.get_comp_param();
794  const DictRef dest_dict_ref(arg_type.get_comp_param(), cat_.getDatabaseId());
795  const DictRef source_dict_ref(col_type.get_comp_param(), cat_.getDatabaseId());
796  const auto dd = executor_->getStringDictionaryProxy(
797  arg_type.get_comp_param(), val_set.getRowSetMemOwner(), true);
798  const auto sd = executor_->getStringDictionaryProxy(
799  col_type.get_comp_param(), val_set.getRowSetMemOwner(), true);
800  CHECK(sd);
801  const auto needle_null_val = inline_int_null_val(arg_type);
802  fetcher_threads.push_back(std::async(
803  std::launch::async,
804  [this,
805  &val_set,
806  &total_in_vals_count,
807  sd,
808  dd,
809  source_dict_ref,
810  dest_dict_ref,
811  needle_null_val](
812  std::vector<int64_t>& in_vals, const size_t start, const size_t end) {
813  if (g_cluster) {
814  CHECK_GE(dd->getGeneration(), 0);
816  total_in_vals_count,
817  &val_set,
818  {start, end},
820  source_dict_ref,
821  dest_dict_ref,
822  dd->getGeneration(),
823  needle_null_val);
824  } else {
826  total_in_vals_count,
827  &val_set,
828  {start, end},
829  sd,
830  dd,
831  needle_null_val);
832  }
833  },
834  std::ref(expr_set[i]),
835  start_entry,
836  end_entry));
837  } else {
838  CHECK(arg_type.is_integer());
839  fetcher_threads.push_back(std::async(
840  std::launch::async,
841  [&val_set, &total_in_vals_count](
842  std::vector<int64_t>& in_vals, const size_t start, const size_t end) {
843  fill_integer_in_vals(in_vals, total_in_vals_count, &val_set, {start, end});
844  },
845  std::ref(expr_set[i]),
846  start_entry,
847  end_entry));
848  }
849  }
850  for (auto& child : fetcher_threads) {
851  child.get();
852  }
853 
854  val_set.moveToBegin();
855  value_exprs.reserve(entry_count);
856  for (auto& exprs : expr_set) {
857  value_exprs.insert(value_exprs.end(), exprs.begin(), exprs.end());
858  }
859  return makeExpr<Analyzer::InIntegerSet>(
860  arg, value_exprs, arg_type.get_notnull() && col_type.get_notnull());
861 }
862 
863 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateOper(
864  const RexOperator* rex_operator) const {
865  CHECK_GT(rex_operator->size(), size_t(0));
866  if (rex_operator->size() == 1) {
867  return translateUoper(rex_operator);
868  }
869  const auto sql_op = rex_operator->getOperator();
870  if (sql_op == kIN) {
871  return translateInOper(rex_operator);
872  }
873  if (sql_op == kMINUS || sql_op == kPLUS) {
874  auto date_plus_minus = translateDatePlusMinus(rex_operator);
875  if (date_plus_minus) {
876  return date_plus_minus;
877  }
878  }
879  if (sql_op == kOVERLAPS) {
880  return translateOverlapsOper(rex_operator);
881  } else if (IS_COMPARISON(sql_op)) {
882  auto geo_comp = translateGeoComparison(rex_operator);
883  if (geo_comp) {
884  return geo_comp;
885  }
886  }
887  auto lhs = translateScalarRex(rex_operator->getOperand(0));
888  for (size_t i = 1; i < rex_operator->size(); ++i) {
889  std::shared_ptr<Analyzer::Expr> rhs;
890  SQLQualifier sql_qual{kONE};
891  const auto rhs_op = rex_operator->getOperand(i);
892  std::tie(rhs, sql_qual) = get_quantified_rhs(rhs_op, *this);
893  if (!rhs) {
894  rhs = translateScalarRex(rhs_op);
895  }
896  CHECK(rhs);
897  lhs = Parser::OperExpr::normalize(sql_op, sql_qual, lhs, rhs);
898  }
899  return lhs;
900 }
901 
902 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateOverlapsOper(
903  const RexOperator* rex_operator) const {
904  const auto sql_op = rex_operator->getOperator();
905  CHECK(sql_op == kOVERLAPS);
906 
907  const auto lhs = translateScalarRex(rex_operator->getOperand(0));
908  const auto lhs_ti = lhs->get_type_info();
909  if (lhs_ti.is_geometry()) {
910  return translateGeoOverlapsOper(rex_operator);
911  } else {
912  throw std::runtime_error(
913  "Overlaps equivalence is currently only supported for geospatial types");
914  }
915 }
916 
917 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateCase(
918  const RexCase* rex_case) const {
919  std::shared_ptr<Analyzer::Expr> else_expr;
920  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
921  expr_list;
922  for (size_t i = 0; i < rex_case->branchCount(); ++i) {
923  const auto when_expr = translateScalarRex(rex_case->getWhen(i));
924  const auto then_expr = translateScalarRex(rex_case->getThen(i));
925  expr_list.emplace_back(when_expr, then_expr);
926  }
927  if (rex_case->getElse()) {
928  else_expr = translateScalarRex(rex_case->getElse());
929  }
930  return Parser::CaseExpr::normalize(expr_list, else_expr);
931 }
932 
933 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateLike(
934  const RexFunctionOperator* rex_function) const {
935  CHECK(rex_function->size() == 2 || rex_function->size() == 3);
936  const auto arg = translateScalarRex(rex_function->getOperand(0));
937  const auto like = translateScalarRex(rex_function->getOperand(1));
938  if (!std::dynamic_pointer_cast<const Analyzer::Constant>(like)) {
939  throw std::runtime_error("The matching pattern must be a literal.");
940  }
941  const auto escape = (rex_function->size() == 3)
942  ? translateScalarRex(rex_function->getOperand(2))
943  : nullptr;
944  const bool is_ilike = rex_function->getName() == "PG_ILIKE"sv;
945  return Parser::LikeExpr::get(arg, like, escape, is_ilike, false);
946 }
947 
948 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateRegexp(
949  const RexFunctionOperator* rex_function) const {
950  CHECK(rex_function->size() == 2 || rex_function->size() == 3);
951  const auto arg = translateScalarRex(rex_function->getOperand(0));
952  const auto pattern = translateScalarRex(rex_function->getOperand(1));
953  if (!std::dynamic_pointer_cast<const Analyzer::Constant>(pattern)) {
954  throw std::runtime_error("The matching pattern must be a literal.");
955  }
956  const auto escape = (rex_function->size() == 3)
957  ? translateScalarRex(rex_function->getOperand(2))
958  : nullptr;
959  return Parser::RegexpExpr::get(arg, pattern, escape, false);
960 }
961 
962 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateLikely(
963  const RexFunctionOperator* rex_function) const {
964  CHECK(rex_function->size() == 1);
965  const auto arg = translateScalarRex(rex_function->getOperand(0));
966  return makeExpr<Analyzer::LikelihoodExpr>(arg, 0.9375);
967 }
968 
969 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateUnlikely(
970  const RexFunctionOperator* rex_function) const {
971  CHECK(rex_function->size() == 1);
972  const auto arg = translateScalarRex(rex_function->getOperand(0));
973  return makeExpr<Analyzer::LikelihoodExpr>(arg, 0.0625);
974 }
975 
976 namespace {
977 
979  const std::shared_ptr<Analyzer::Constant> literal_expr) {
980  if (!literal_expr || literal_expr->get_is_null()) {
981  throw std::runtime_error("The 'DatePart' argument must be a not 'null' literal.");
982  }
983 }
984 
985 } // namespace
986 
987 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateExtract(
988  const RexFunctionOperator* rex_function) const {
989  CHECK_EQ(size_t(2), rex_function->size());
990  const auto timeunit = translateScalarRex(rex_function->getOperand(0));
991  const auto timeunit_lit = std::dynamic_pointer_cast<Analyzer::Constant>(timeunit);
993  const auto from_expr = translateScalarRex(rex_function->getOperand(1));
994  const bool is_date_trunc = rex_function->getName() == "PG_DATE_TRUNC"sv;
995  if (is_date_trunc) {
996  return DateTruncExpr::generate(from_expr, *timeunit_lit->get_constval().stringval);
997  } else {
998  return ExtractExpr::generate(from_expr, *timeunit_lit->get_constval().stringval);
999  }
1000 }
1001 
1002 namespace {
1003 
1004 std::shared_ptr<Analyzer::Constant> makeNumericConstant(const SQLTypeInfo& ti,
1005  const long val) {
1006  CHECK(ti.is_number());
1007  Datum datum{0};
1008  switch (ti.get_type()) {
1009  case kTINYINT: {
1010  datum.tinyintval = val;
1011  break;
1012  }
1013  case kSMALLINT: {
1014  datum.smallintval = val;
1015  break;
1016  }
1017  case kINT: {
1018  datum.intval = val;
1019  break;
1020  }
1021  case kBIGINT: {
1022  datum.bigintval = val;
1023  break;
1024  }
1025  case kDECIMAL:
1026  case kNUMERIC: {
1027  datum.bigintval = val * exp_to_scale(ti.get_scale());
1028  break;
1029  }
1030  case kFLOAT: {
1031  datum.floatval = val;
1032  break;
1033  }
1034  case kDOUBLE: {
1035  datum.doubleval = val;
1036  break;
1037  }
1038  default:
1039  CHECK(false);
1040  }
1041  return makeExpr<Analyzer::Constant>(ti, false, datum);
1042 }
1043 
1044 } // namespace
1045 
1046 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateDateadd(
1047  const RexFunctionOperator* rex_function) const {
1048  CHECK_EQ(size_t(3), rex_function->size());
1049  const auto timeunit = translateScalarRex(rex_function->getOperand(0));
1050  const auto timeunit_lit = std::dynamic_pointer_cast<Analyzer::Constant>(timeunit);
1052  const auto number_units = translateScalarRex(rex_function->getOperand(1));
1053  const auto number_units_const =
1054  std::dynamic_pointer_cast<Analyzer::Constant>(number_units);
1055  if (number_units_const && number_units_const->get_is_null()) {
1056  throw std::runtime_error("The 'Interval' argument literal must not be 'null'.");
1057  }
1058  const auto cast_number_units = number_units->add_cast(SQLTypeInfo(kBIGINT, false));
1059  const auto datetime = translateScalarRex(rex_function->getOperand(2));
1060  const auto& datetime_ti = datetime->get_type_info();
1061  if (datetime_ti.get_type() == kTIME) {
1062  throw std::runtime_error("DateAdd operation not supported for TIME.");
1063  }
1064  const auto& field = to_dateadd_field(*timeunit_lit->get_constval().stringval);
1065  const int dim = datetime_ti.get_dimension();
1066  return makeExpr<Analyzer::DateaddExpr>(
1067  SQLTypeInfo(kTIMESTAMP, dim, 0, false), field, cast_number_units, datetime);
1068 }
1069 
1070 namespace {
1071 
1073  CHECK(op == kPLUS);
1074  return "DATETIME_PLUS"s;
1075 }
1076 
1077 } // namespace
1078 
1079 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateDatePlusMinus(
1080  const RexOperator* rex_operator) const {
1081  if (rex_operator->size() != 2) {
1082  return nullptr;
1083  }
1084  const auto datetime = translateScalarRex(rex_operator->getOperand(0));
1085  const auto datetime_ti = datetime->get_type_info();
1086  if (!datetime_ti.is_timestamp() && !datetime_ti.is_date()) {
1087  if (datetime_ti.get_type() == kTIME) {
1088  throw std::runtime_error("DateTime addition/subtraction not supported for TIME.");
1089  }
1090  return nullptr;
1091  }
1092  const auto rhs = translateScalarRex(rex_operator->getOperand(1));
1093  const auto rhs_ti = rhs->get_type_info();
1094  if (rhs_ti.get_type() == kTIMESTAMP || rhs_ti.get_type() == kDATE) {
1095  if (datetime_ti.is_high_precision_timestamp() ||
1096  rhs_ti.is_high_precision_timestamp()) {
1097  throw std::runtime_error(
1098  "High Precision timestamps are not supported for TIMESTAMPDIFF operation. "
1099  "Use "
1100  "DATEDIFF.");
1101  }
1102  auto bigint_ti = SQLTypeInfo(kBIGINT, false);
1103  const auto& rex_operator_ti = rex_operator->getType();
1104  const auto datediff_field =
1105  (rex_operator_ti.get_type() == kINTERVAL_DAY_TIME) ? dtSECOND : dtMONTH;
1106  auto result =
1107  makeExpr<Analyzer::DatediffExpr>(bigint_ti, datediff_field, rhs, datetime);
1108  // multiply 1000 to result since expected result should be in millisecond precision.
1109  if (rex_operator_ti.get_type() == kINTERVAL_DAY_TIME) {
1110  return makeExpr<Analyzer::BinOper>(bigint_ti.get_type(),
1111  kMULTIPLY,
1112  kONE,
1113  result,
1114  makeNumericConstant(bigint_ti, 1000));
1115  } else {
1116  return result;
1117  }
1118  }
1119  const auto op = rex_operator->getOperator();
1120  if (op == kPLUS) {
1121  std::vector<std::shared_ptr<Analyzer::Expr>> args = {datetime, rhs};
1122  auto dt_plus = makeExpr<Analyzer::FunctionOper>(
1123  datetime_ti, get_datetimeplus_rewrite_funcname(op), args);
1124  const auto date_trunc = rewrite_to_date_trunc(dt_plus.get());
1125  if (date_trunc) {
1126  return date_trunc;
1127  }
1128  }
1129  const auto interval = fold_expr(rhs.get());
1130  auto interval_ti = interval->get_type_info();
1131  auto bigint_ti = SQLTypeInfo(kBIGINT, false);
1132  const auto interval_lit = std::dynamic_pointer_cast<Analyzer::Constant>(interval);
1133  if (interval_ti.get_type() == kINTERVAL_DAY_TIME) {
1134  std::shared_ptr<Analyzer::Expr> interval_sec;
1135  if (interval_lit) {
1136  interval_sec =
1137  makeNumericConstant(bigint_ti,
1138  (op == kMINUS ? -interval_lit->get_constval().bigintval
1139  : interval_lit->get_constval().bigintval) /
1140  1000);
1141  } else {
1142  interval_sec = makeExpr<Analyzer::BinOper>(bigint_ti.get_type(),
1143  kDIVIDE,
1144  kONE,
1145  interval,
1146  makeNumericConstant(bigint_ti, 1000));
1147  if (op == kMINUS) {
1148  interval_sec =
1149  std::make_shared<Analyzer::UOper>(bigint_ti, false, kUMINUS, interval_sec);
1150  }
1151  }
1152  return makeExpr<Analyzer::DateaddExpr>(datetime_ti, daSECOND, interval_sec, datetime);
1153  }
1154  CHECK(interval_ti.get_type() == kINTERVAL_YEAR_MONTH);
1155  const auto interval_months = op == kMINUS ? std::make_shared<Analyzer::UOper>(
1156  bigint_ti, false, kUMINUS, interval)
1157  : interval;
1158  return makeExpr<Analyzer::DateaddExpr>(datetime_ti, daMONTH, interval_months, datetime);
1159 }
1160 
1161 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateDatediff(
1162  const RexFunctionOperator* rex_function) const {
1163  CHECK_EQ(size_t(3), rex_function->size());
1164  const auto timeunit = translateScalarRex(rex_function->getOperand(0));
1165  const auto timeunit_lit = std::dynamic_pointer_cast<Analyzer::Constant>(timeunit);
1167  const auto start = translateScalarRex(rex_function->getOperand(1));
1168  const auto end = translateScalarRex(rex_function->getOperand(2));
1169  const auto field = to_datediff_field(*timeunit_lit->get_constval().stringval);
1170  return makeExpr<Analyzer::DatediffExpr>(SQLTypeInfo(kBIGINT, false), field, start, end);
1171 }
1172 
1173 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateDatepart(
1174  const RexFunctionOperator* rex_function) const {
1175  CHECK_EQ(size_t(2), rex_function->size());
1176  const auto timeunit = translateScalarRex(rex_function->getOperand(0));
1177  const auto timeunit_lit = std::dynamic_pointer_cast<Analyzer::Constant>(timeunit);
1179  const auto from_expr = translateScalarRex(rex_function->getOperand(1));
1180  return ExtractExpr::generate(
1181  from_expr, to_datepart_field(*timeunit_lit->get_constval().stringval));
1182 }
1183 
1184 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateLength(
1185  const RexFunctionOperator* rex_function) const {
1186  CHECK_EQ(size_t(1), rex_function->size());
1187  const auto str_arg = translateScalarRex(rex_function->getOperand(0));
1188  return makeExpr<Analyzer::CharLengthExpr>(str_arg->decompress(),
1189  rex_function->getName() == "CHAR_LENGTH"sv);
1190 }
1191 
1192 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateKeyForString(
1193  const RexFunctionOperator* rex_function) const {
1194  const auto& args = translateFunctionArgs(rex_function);
1195  CHECK_EQ(size_t(1), args.size());
1196  const auto expr = dynamic_cast<Analyzer::Expr*>(args[0].get());
1197  if (nullptr == expr || !expr->get_type_info().is_string() ||
1198  expr->get_type_info().is_varlen()) {
1199  throw std::runtime_error(rex_function->getName() +
1200  " expects a dictionary encoded text column.");
1201  }
1202  return makeExpr<Analyzer::KeyForStringExpr>(args[0]);
1203 }
1204 
1205 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateSampleRatio(
1206  const RexFunctionOperator* rex_function) const {
1207  CHECK_EQ(size_t(1), rex_function->size());
1208  auto arg = translateScalarRex(rex_function->getOperand(0));
1209  const auto& arg_ti = arg->get_type_info();
1210  if (arg_ti.get_type() != kDOUBLE) {
1211  const auto& double_ti = SQLTypeInfo(kDOUBLE, arg_ti.get_notnull());
1212  arg = arg->add_cast(double_ti);
1213  }
1214  return makeExpr<Analyzer::SampleRatioExpr>(arg);
1215 }
1216 
1217 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateCurrentUser(
1218  const RexFunctionOperator* rex_function) const {
1219  std::string user{"SESSIONLESS_USER"};
1220  if (query_state_) {
1221  user = query_state_->getConstSessionInfo()->get_currentUser().userName;
1222  }
1223  return Parser::UserLiteral::get(user);
1224 }
1225 
1226 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateLower(
1227  const RexFunctionOperator* rex_function) const {
1228  const auto& args = translateFunctionArgs(rex_function);
1229  CHECK_EQ(size_t(1), args.size());
1230  CHECK(args[0]);
1231 
1232  if (args[0]->get_type_info().is_dict_encoded_string() ||
1233  dynamic_cast<Analyzer::Constant*>(args[0].get())) {
1234  return makeExpr<Analyzer::LowerExpr>(args[0]);
1235  }
1236 
1237  throw std::runtime_error(rex_function->getName() +
1238  " expects a dictionary encoded text column or a literal.");
1239 }
1240 
1242  const RexFunctionOperator* rex_function) const {
1243  const auto ret_ti = rex_function->getType();
1244  const auto arg = translateScalarRex(rex_function->getOperand(0));
1245  const auto arg_ti = arg->get_type_info();
1246  if (!arg_ti.is_array()) {
1247  throw std::runtime_error(rex_function->getName() + " expects an array expression.");
1248  }
1249  if (arg_ti.get_subtype() == kARRAY) {
1250  throw std::runtime_error(rex_function->getName() +
1251  " expects one-dimension array expression.");
1252  }
1253  const auto array_size = arg_ti.get_size();
1254  const auto array_elem_size = arg_ti.get_elem_type().get_array_context_logical_size();
1255 
1256  if (array_size > 0) {
1257  if (array_elem_size <= 0) {
1258  throw std::runtime_error(rex_function->getName() +
1259  ": unexpected array element type.");
1260  }
1261  // Return cardinality of a fixed length array
1262  return makeNumericConstant(ret_ti, array_size / array_elem_size);
1263  }
1264  // Variable length array cardinality will be calculated at runtime
1265  return makeExpr<Analyzer::CardinalityExpr>(arg);
1266 }
1267 
1268 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateItem(
1269  const RexFunctionOperator* rex_function) const {
1270  CHECK_EQ(size_t(2), rex_function->size());
1271  const auto base = translateScalarRex(rex_function->getOperand(0));
1272  const auto index = translateScalarRex(rex_function->getOperand(1));
1273  return makeExpr<Analyzer::BinOper>(
1274  base->get_type_info().get_elem_type(), false, kARRAY_AT, kONE, base, index);
1275 }
1276 
1277 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateNow() const {
1279 }
1280 
1281 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateDatetime(
1282  const RexFunctionOperator* rex_function) const {
1283  CHECK_EQ(size_t(1), rex_function->size());
1284  const auto arg = translateScalarRex(rex_function->getOperand(0));
1285  const auto arg_lit = std::dynamic_pointer_cast<Analyzer::Constant>(arg);
1286  const std::string datetime_err{R"(Only DATETIME('NOW') supported for now.)"};
1287  if (!arg_lit || arg_lit->get_is_null()) {
1288  throw std::runtime_error(datetime_err);
1289  }
1290  CHECK(arg_lit->get_type_info().is_string());
1291  if (*arg_lit->get_constval().stringval != "NOW"sv) {
1292  throw std::runtime_error(datetime_err);
1293  }
1294  return translateNow();
1295 }
1296 
1297 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateAbs(
1298  const RexFunctionOperator* rex_function) const {
1299  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
1300  expr_list;
1301  CHECK_EQ(size_t(1), rex_function->size());
1302  const auto operand = translateScalarRex(rex_function->getOperand(0));
1303  const auto& operand_ti = operand->get_type_info();
1304  CHECK(operand_ti.is_number());
1305  const auto zero = makeNumericConstant(operand_ti, 0);
1306  const auto lt_zero = makeExpr<Analyzer::BinOper>(kBOOLEAN, kLT, kONE, operand, zero);
1307  const auto uminus_operand =
1308  makeExpr<Analyzer::UOper>(operand_ti.get_type(), kUMINUS, operand);
1309  expr_list.emplace_back(lt_zero, uminus_operand);
1310  return makeExpr<Analyzer::CaseExpr>(operand_ti, false, expr_list, operand);
1311 }
1312 
1313 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateSign(
1314  const RexFunctionOperator* rex_function) const {
1315  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
1316  expr_list;
1317  CHECK_EQ(size_t(1), rex_function->size());
1318  const auto operand = translateScalarRex(rex_function->getOperand(0));
1319  const auto& operand_ti = operand->get_type_info();
1320  CHECK(operand_ti.is_number());
1321  const auto zero = makeNumericConstant(operand_ti, 0);
1322  const auto lt_zero = makeExpr<Analyzer::BinOper>(kBOOLEAN, kLT, kONE, operand, zero);
1323  expr_list.emplace_back(lt_zero, makeNumericConstant(operand_ti, -1));
1324  const auto eq_zero = makeExpr<Analyzer::BinOper>(kBOOLEAN, kEQ, kONE, operand, zero);
1325  expr_list.emplace_back(eq_zero, makeNumericConstant(operand_ti, 0));
1326  const auto gt_zero = makeExpr<Analyzer::BinOper>(kBOOLEAN, kGT, kONE, operand, zero);
1327  expr_list.emplace_back(gt_zero, makeNumericConstant(operand_ti, 1));
1328  return makeExpr<Analyzer::CaseExpr>(
1329  operand_ti,
1330  false,
1331  expr_list,
1332  makeExpr<Analyzer::Constant>(operand_ti, true, Datum{0}));
1333 }
1334 
1335 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateOffsetInFragment() const {
1336  return makeExpr<Analyzer::OffsetInFragment>();
1337 }
1338 
1340  const RexFunctionOperator* rex_function) const {
1341  if (rex_function->getType().get_subtype() == kNULLT) {
1342  auto sql_type = rex_function->getType();
1343  CHECK(sql_type.get_type() == kARRAY);
1344 
1345  // FIX-ME: Deal with NULL arrays
1346  auto translated_function_args(translateFunctionArgs(rex_function));
1347  if (translated_function_args.size() > 0) {
1348  const auto first_element_logical_type =
1349  get_nullable_logical_type_info(translated_function_args[0]->get_type_info());
1350 
1351  auto diff_elem_itr =
1352  std::find_if(translated_function_args.begin(),
1353  translated_function_args.end(),
1354  [first_element_logical_type](const auto expr) {
1355  return first_element_logical_type !=
1356  get_nullable_logical_type_info(expr->get_type_info());
1357  });
1358  if (diff_elem_itr != translated_function_args.end()) {
1359  throw std::runtime_error(
1360  "Element " +
1361  std::to_string(diff_elem_itr - translated_function_args.begin()) +
1362  " is not of the same type as other elements of the array. Consider casting "
1363  "to force this condition.\nElement Type: " +
1364  get_nullable_logical_type_info((*diff_elem_itr)->get_type_info())
1365  .to_string() +
1366  "\nArray type: " + first_element_logical_type.to_string());
1367  }
1368 
1369  if (first_element_logical_type.is_string() &&
1370  !first_element_logical_type.is_dict_encoded_string()) {
1371  sql_type.set_subtype(first_element_logical_type.get_type());
1372  sql_type.set_compression(kENCODING_FIXED);
1373  } else if (first_element_logical_type.is_dict_encoded_string()) {
1374  sql_type.set_subtype(first_element_logical_type.get_type());
1375  sql_type.set_comp_param(TRANSIENT_DICT_ID);
1376  } else {
1377  sql_type.set_subtype(first_element_logical_type.get_type());
1378  sql_type.set_scale(first_element_logical_type.get_scale());
1379  sql_type.set_precision(first_element_logical_type.get_precision());
1380  }
1381 
1382  return makeExpr<Analyzer::ArrayExpr>(sql_type, translated_function_args);
1383  } else {
1384  // defaulting to valid sub-type for convenience
1385  sql_type.set_subtype(kBOOLEAN);
1386  return makeExpr<Analyzer::ArrayExpr>(sql_type, translated_function_args);
1387  }
1388  } else {
1389  return makeExpr<Analyzer::ArrayExpr>(rex_function->getType(),
1390  translateFunctionArgs(rex_function));
1391  }
1392 }
1393 
1394 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateFunction(
1395  const RexFunctionOperator* rex_function) const {
1396  if (func_resolve(rex_function->getName(), "LIKE"sv, "PG_ILIKE"sv)) {
1397  return translateLike(rex_function);
1398  }
1399  if (rex_function->getName() == "REGEXP_LIKE"sv) {
1400  return translateRegexp(rex_function);
1401  }
1402  if (rex_function->getName() == "LIKELY"sv) {
1403  return translateLikely(rex_function);
1404  }
1405  if (rex_function->getName() == "UNLIKELY"sv) {
1406  return translateUnlikely(rex_function);
1407  }
1408  if (func_resolve(rex_function->getName(), "PG_EXTRACT"sv, "PG_DATE_TRUNC"sv)) {
1409  return translateExtract(rex_function);
1410  }
1411  if (rex_function->getName() == "DATEADD"sv) {
1412  return translateDateadd(rex_function);
1413  }
1414  if (rex_function->getName() == "DATEDIFF"sv) {
1415  return translateDatediff(rex_function);
1416  }
1417  if (rex_function->getName() == "DATEPART"sv) {
1418  return translateDatepart(rex_function);
1419  }
1420  if (func_resolve(rex_function->getName(), "LENGTH"sv, "CHAR_LENGTH"sv)) {
1421  return translateLength(rex_function);
1422  }
1423  if (rex_function->getName() == "KEY_FOR_STRING"sv) {
1424  return translateKeyForString(rex_function);
1425  }
1426  if (rex_function->getName() == "SAMPLE_RATIO"sv) {
1427  return translateSampleRatio(rex_function);
1428  }
1429  if (rex_function->getName() == "CURRENT_USER"sv) {
1430  return translateCurrentUser(rex_function);
1431  }
1432  if (g_enable_experimental_string_functions && rex_function->getName() == "LOWER"sv) {
1433  return translateLower(rex_function);
1434  }
1435  if (func_resolve(rex_function->getName(), "CARDINALITY"sv, "ARRAY_LENGTH"sv)) {
1436  return translateCardinality(rex_function);
1437  }
1438  if (rex_function->getName() == "ITEM"sv) {
1439  return translateItem(rex_function);
1440  }
1441  if (rex_function->getName() == "NOW"sv) {
1442  return translateNow();
1443  }
1444  if (rex_function->getName() == "DATETIME"sv) {
1445  return translateDatetime(rex_function);
1446  }
1447  if (func_resolve(rex_function->getName(), "usTIMESTAMP"sv, "nsTIMESTAMP"sv)) {
1448  return translateHPTLiteral(rex_function);
1449  }
1450  if (rex_function->getName() == "ABS"sv) {
1451  return translateAbs(rex_function);
1452  }
1453  if (rex_function->getName() == "SIGN"sv) {
1454  return translateSign(rex_function);
1455  }
1456  if (func_resolve(rex_function->getName(), "CEIL"sv, "FLOOR"sv)) {
1457  return makeExpr<Analyzer::FunctionOperWithCustomTypeHandling>(
1458  rex_function->getType(),
1459  rex_function->getName(),
1460  translateFunctionArgs(rex_function));
1461  } else if (rex_function->getName() == "ROUND"sv) {
1462  std::vector<std::shared_ptr<Analyzer::Expr>> args =
1463  translateFunctionArgs(rex_function);
1464 
1465  if (rex_function->size() == 1) {
1466  // push a 0 constant if 2nd operand is missing.
1467  // this needs to be done as calcite returns
1468  // only the 1st operand without defaulting the 2nd one
1469  // when the user did not specify the 2nd operand.
1470  SQLTypes t = kSMALLINT;
1471  Datum d;
1472  d.smallintval = 0;
1473  args.push_back(makeExpr<Analyzer::Constant>(t, false, d));
1474  }
1475 
1476  // make sure we have only 2 operands
1477  CHECK(args.size() == 2);
1478 
1479  if (!args[0]->get_type_info().is_number()) {
1480  throw std::runtime_error("Only numeric 1st operands are supported");
1481  }
1482 
1483  // the 2nd operand does not need to be a constant
1484  // it can happily reference another integer column
1485  if (!args[1]->get_type_info().is_integer()) {
1486  throw std::runtime_error("Only integer 2nd operands are supported");
1487  }
1488 
1489  // Calcite may upcast decimals in a way that is
1490  // incompatible with the extension function input. Play it safe and stick with the
1491  // argument type instead.
1492  const SQLTypeInfo ret_ti = args[0]->get_type_info().is_decimal()
1493  ? args[0]->get_type_info()
1494  : rex_function->getType();
1495 
1496  return makeExpr<Analyzer::FunctionOperWithCustomTypeHandling>(
1497  ret_ti, rex_function->getName(), args);
1498  }
1499  if (rex_function->getName() == "DATETIME_PLUS"sv) {
1500  auto dt_plus = makeExpr<Analyzer::FunctionOper>(rex_function->getType(),
1501  rex_function->getName(),
1502  translateFunctionArgs(rex_function));
1503  const auto date_trunc = rewrite_to_date_trunc(dt_plus.get());
1504  if (date_trunc) {
1505  return date_trunc;
1506  }
1507  return translateDateadd(rex_function);
1508  }
1509  if (rex_function->getName() == "/INT"sv) {
1510  CHECK_EQ(size_t(2), rex_function->size());
1511  std::shared_ptr<Analyzer::Expr> lhs = translateScalarRex(rex_function->getOperand(0));
1512  std::shared_ptr<Analyzer::Expr> rhs = translateScalarRex(rex_function->getOperand(1));
1513  const auto rhs_lit = std::dynamic_pointer_cast<Analyzer::Constant>(rhs);
1514  return Parser::OperExpr::normalize(kDIVIDE, kONE, lhs, rhs);
1515  }
1516  if (rex_function->getName() == "Reinterpret"sv) {
1517  CHECK_EQ(size_t(1), rex_function->size());
1518  return translateScalarRex(rex_function->getOperand(0));
1519  }
1520  if (func_resolve(rex_function->getName(),
1521  "ST_X"sv,
1522  "ST_Y"sv,
1523  "ST_XMin"sv,
1524  "ST_YMin"sv,
1525  "ST_XMax"sv,
1526  "ST_YMax"sv,
1527  "ST_NRings"sv,
1528  "ST_NPoints"sv,
1529  "ST_Length"sv,
1530  "ST_Perimeter"sv,
1531  "ST_Area"sv,
1532  "ST_SRID"sv,
1533  "MapD_GeoPolyBoundsPtr"sv /* deprecated */,
1534  "MapD_GeoPolyBoundsPtr"sv /* deprecated */,
1535  "OmniSci_Geo_PolyBoundsPtr"sv,
1536  "OmniSci_Geo_PolyRenderGroup"sv)) {
1537  CHECK_EQ(rex_function->size(), size_t(1));
1538  return translateUnaryGeoFunction(rex_function);
1539  }
1540  if (func_resolve(rex_function->getName(),
1541  "convert_meters_to_pixel_width"sv,
1542  "convert_meters_to_pixel_height"sv,
1543  "is_point_in_view"sv,
1544  "is_point_size_in_view"sv)) {
1545  return translateFunctionWithGeoArg(rex_function);
1546  }
1547  if (func_resolve(rex_function->getName(),
1548  "ST_Distance"sv,
1549  "ST_MaxDistance"sv,
1550  "ST_Intersects"sv,
1551  "ST_Disjoint"sv,
1552  "ST_Contains"sv,
1553  "ST_Overlaps"sv,
1554  "ST_Approx_Overlaps"sv,
1555  "ST_Within"sv)) {
1556  CHECK_EQ(rex_function->size(), size_t(2));
1557  return translateBinaryGeoFunction(rex_function);
1558  }
1559  if (func_resolve(rex_function->getName(), "ST_DWithin"sv, "ST_DFullyWithin"sv)) {
1560  CHECK_EQ(rex_function->size(), size_t(3));
1561  return translateTernaryGeoFunction(rex_function);
1562  }
1563  if (rex_function->getName() == "OFFSET_IN_FRAGMENT"sv) {
1564  CHECK_EQ(size_t(0), rex_function->size());
1565  return translateOffsetInFragment();
1566  }
1567  if (rex_function->getName() == "ARRAY"sv) {
1568  // Var args; currently no check. Possible fix-me -- can array have 0 elements?
1569  return translateArrayFunction(rex_function);
1570  }
1571  if (func_resolve(rex_function->getName(),
1572  "ST_GeomFromText"sv,
1573  "ST_GeogFromText"sv,
1574  "ST_Point"sv,
1575  "ST_Centroid"sv,
1576  "ST_SetSRID"sv)) {
1577  SQLTypeInfo ti;
1578  return translateGeoProjection(rex_function, ti, false);
1579  }
1580  if (func_resolve(rex_function->getName(),
1581  "ST_Intersection"sv,
1582  "ST_Difference"sv,
1583  "ST_Union"sv,
1584  "ST_Buffer"sv)) {
1585  SQLTypeInfo ti;
1586  return translateGeoBinaryConstructor(rex_function, ti, false);
1587  }
1588  if (func_resolve(rex_function->getName(), "ST_IsEmpty"sv, "ST_IsValid"sv)) {
1589  SQLTypeInfo ti;
1590  return translateGeoPredicate(rex_function, ti, false);
1591  }
1592 
1593  auto arg_expr_list = translateFunctionArgs(rex_function);
1594  if (rex_function->getName() == std::string("||") ||
1595  rex_function->getName() == std::string("SUBSTRING")) {
1596  SQLTypeInfo ret_ti(kTEXT, false);
1597  return makeExpr<Analyzer::FunctionOper>(
1598  ret_ti, rex_function->getName(), arg_expr_list);
1599  }
1600  // Reset possibly wrong return type of rex_function to the return
1601  // type of the optimal valid implementation. The return type can be
1602  // wrong in the case of multiple implementations of UDF functions
1603  // that have different return types but Calcite specifies the return
1604  // type according to the first implementation.
1605  SQLTypeInfo ret_ti;
1606  try {
1607  auto ext_func_sig = bind_function(rex_function->getName(), arg_expr_list);
1608 
1609  auto ext_func_args = ext_func_sig.getArgs();
1610  CHECK_EQ(arg_expr_list.size(), ext_func_args.size());
1611  for (size_t i = 0; i < arg_expr_list.size(); i++) {
1612  // fold casts on constants
1613  if (auto constant =
1614  std::dynamic_pointer_cast<Analyzer::Constant>(arg_expr_list[i])) {
1615  auto ext_func_arg_ti = ext_arg_type_to_type_info(ext_func_args[i]);
1616  if (ext_func_arg_ti != arg_expr_list[i]->get_type_info()) {
1617  arg_expr_list[i] = constant->add_cast(ext_func_arg_ti);
1618  }
1619  }
1620  }
1621 
1622  ret_ti = ext_arg_type_to_type_info(ext_func_sig.getRet());
1623  } catch (ExtensionFunctionBindingError& e) {
1624  LOG(WARNING) << "RelAlgTranslator::translateFunction: " << e.what();
1625  throw;
1626  }
1627 
1628  // By default, the extension function type will not allow nulls. If one of the arguments
1629  // is nullable, the extension function must also explicitly allow nulls.
1630  bool arguments_not_null = true;
1631  for (const auto& arg_expr : arg_expr_list) {
1632  if (!arg_expr->get_type_info().get_notnull()) {
1633  arguments_not_null = false;
1634  break;
1635  }
1636  }
1637  ret_ti.set_notnull(arguments_not_null);
1638 
1639  return makeExpr<Analyzer::FunctionOper>(ret_ti, rex_function->getName(), arg_expr_list);
1640 }
1641 
1642 namespace {
1643 
1644 std::vector<Analyzer::OrderEntry> translate_collation(
1645  const std::vector<SortField>& sort_fields) {
1646  std::vector<Analyzer::OrderEntry> collation;
1647  for (size_t i = 0; i < sort_fields.size(); ++i) {
1648  const auto& sort_field = sort_fields[i];
1649  collation.emplace_back(i,
1650  sort_field.getSortDir() == SortDirection::Descending,
1651  sort_field.getNullsPosition() == NullSortedPosition::First);
1652  }
1653  return collation;
1654 }
1655 
1657  const RexWindowFunctionOperator::RexWindowBound& window_bound) {
1658  return window_bound.unbounded && window_bound.preceding && !window_bound.following &&
1659  !window_bound.is_current_row && !window_bound.offset &&
1660  window_bound.order_key == 0;
1661 }
1662 
1663 bool supported_upper_bound(const RexWindowFunctionOperator* rex_window_function) {
1664  const auto& window_bound = rex_window_function->getUpperBound();
1665  const bool to_current_row = !window_bound.unbounded && !window_bound.preceding &&
1666  !window_bound.following && window_bound.is_current_row &&
1667  !window_bound.offset && window_bound.order_key == 1;
1668  switch (rex_window_function->getKind()) {
1673  return to_current_row;
1674  }
1675  default: {
1676  return rex_window_function->getOrderKeys().empty()
1677  ? (window_bound.unbounded && !window_bound.preceding &&
1678  window_bound.following && !window_bound.is_current_row &&
1679  !window_bound.offset && window_bound.order_key == 2)
1680  : to_current_row;
1681  }
1682  }
1683 }
1684 
1685 } // namespace
1686 
1687 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateWindowFunction(
1688  const RexWindowFunctionOperator* rex_window_function) const {
1689  if (!supported_lower_bound(rex_window_function->getLowerBound()) ||
1690  !supported_upper_bound(rex_window_function) ||
1691  ((rex_window_function->getKind() == SqlWindowFunctionKind::ROW_NUMBER) !=
1692  rex_window_function->isRows())) {
1693  throw std::runtime_error("Frame specification not supported");
1694  }
1695  std::vector<std::shared_ptr<Analyzer::Expr>> args;
1696  for (size_t i = 0; i < rex_window_function->size(); ++i) {
1697  args.push_back(translateScalarRex(rex_window_function->getOperand(i)));
1698  }
1699  std::vector<std::shared_ptr<Analyzer::Expr>> partition_keys;
1700  for (const auto& partition_key : rex_window_function->getPartitionKeys()) {
1701  partition_keys.push_back(translateScalarRex(partition_key.get()));
1702  }
1703  std::vector<std::shared_ptr<Analyzer::Expr>> order_keys;
1704  for (const auto& order_key : rex_window_function->getOrderKeys()) {
1705  order_keys.push_back(translateScalarRex(order_key.get()));
1706  }
1707  auto ti = rex_window_function->getType();
1708  if (window_function_is_value(rex_window_function->getKind())) {
1709  CHECK_GE(args.size(), 1u);
1710  ti = args.front()->get_type_info();
1711  }
1712  return makeExpr<Analyzer::WindowFunction>(
1713  ti,
1714  rex_window_function->getKind(),
1715  args,
1716  partition_keys,
1717  order_keys,
1718  translate_collation(rex_window_function->getCollation()));
1719 }
1720 
1722  const RexFunctionOperator* rex_function) const {
1723  std::vector<std::shared_ptr<Analyzer::Expr>> args;
1724  for (size_t i = 0; i < rex_function->size(); ++i) {
1725  args.push_back(translateScalarRex(rex_function->getOperand(i)));
1726  }
1727  return args;
1728 }
1729 
1731  const std::shared_ptr<Analyzer::Expr> qual_expr) {
1732  CHECK(qual_expr);
1733  auto bin_oper = std::dynamic_pointer_cast<const Analyzer::BinOper>(qual_expr);
1734  if (!bin_oper) {
1735  const auto rewritten_qual_expr = rewrite_expr(qual_expr.get());
1736  return {{}, {rewritten_qual_expr ? rewritten_qual_expr : qual_expr}};
1737  }
1738 
1739  if (bin_oper->get_optype() == kAND) {
1740  const auto lhs_cf = qual_to_conjunctive_form(bin_oper->get_own_left_operand());
1741  const auto rhs_cf = qual_to_conjunctive_form(bin_oper->get_own_right_operand());
1742  auto simple_quals = lhs_cf.simple_quals;
1743  simple_quals.insert(
1744  simple_quals.end(), rhs_cf.simple_quals.begin(), rhs_cf.simple_quals.end());
1745  auto quals = lhs_cf.quals;
1746  quals.insert(quals.end(), rhs_cf.quals.begin(), rhs_cf.quals.end());
1747  return {simple_quals, quals};
1748  }
1749  int rte_idx{0};
1750  const auto simple_qual = bin_oper->normalize_simple_predicate(rte_idx);
1751  return simple_qual ? QualsConjunctiveForm{{simple_qual}, {}}
1752  : QualsConjunctiveForm{{}, {qual_expr}};
1753 }
1754 
1755 std::vector<std::shared_ptr<Analyzer::Expr>> qual_to_disjunctive_form(
1756  const std::shared_ptr<Analyzer::Expr>& qual_expr) {
1757  CHECK(qual_expr);
1758  const auto bin_oper = std::dynamic_pointer_cast<const Analyzer::BinOper>(qual_expr);
1759  if (!bin_oper) {
1760  const auto rewritten_qual_expr = rewrite_expr(qual_expr.get());
1761  return {rewritten_qual_expr ? rewritten_qual_expr : qual_expr};
1762  }
1763  if (bin_oper->get_optype() == kOR) {
1764  const auto lhs_df = qual_to_disjunctive_form(bin_oper->get_own_left_operand());
1765  const auto rhs_df = qual_to_disjunctive_form(bin_oper->get_own_right_operand());
1766  auto quals = lhs_df;
1767  quals.insert(quals.end(), rhs_df.begin(), rhs_df.end());
1768  return quals;
1769  }
1770  return {qual_expr};
1771 }
1772 
1773 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateHPTLiteral(
1774  const RexFunctionOperator* rex_function) const {
1775  /* since calcite uses Avatica package called DateTimeUtils to parse timestamp strings.
1776  Therefore any string having fractional seconds more 3 places after the decimal
1777  (milliseconds) will get truncated to 3 decimal places, therefore we lose precision
1778  (us|ns). Issue: [BE-2461] Here we are hijacking literal cast to Timestamp(6|9) from
1779  calcite and translating them to generate our own casts.
1780  */
1781  CHECK_EQ(size_t(1), rex_function->size());
1782  const auto operand = translateScalarRex(rex_function->getOperand(0));
1783  const auto& operand_ti = operand->get_type_info();
1784  const auto& target_ti = rex_function->getType();
1785  if (!operand_ti.is_string()) {
1786  throw std::runtime_error(
1787  "High precision timestamp cast argument must be a string. Input type is: " +
1788  operand_ti.get_type_name());
1789  } else if (!target_ti.is_high_precision_timestamp()) {
1790  throw std::runtime_error(
1791  "Cast target type should be high precision timestamp. Input type is: " +
1792  target_ti.get_type_name());
1793  } else if (target_ti.get_dimension() != 6 && target_ti.get_dimension() != 9) {
1794  throw std::runtime_error(
1795  "Cast target type should be TIMESTAMP(6|9). Input type is: TIMESTAMP(" +
1796  std::to_string(target_ti.get_dimension()) + ")");
1797  } else {
1798  return operand->add_cast(target_ti);
1799  }
1800 }
Defines data structures for the semantic analysis phase of query processing.
Definition: sqldefs.h:69
const RexScalar * getThen(const size_t idx) const
const std::vector< JoinType > join_types_
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:312
SQLAgg
Definition: sqldefs.h:71
#define CHECK_EQ(x, y)
Definition: Logger.h:205
auto func_resolve
std::shared_ptr< Analyzer::Expr > translateOffsetInFragment() const
static std::shared_ptr< Analyzer::Expr > get(const std::string &)
Definition: ParserNode.cpp:196
bool g_enable_watchdog
SQLAgg getKind() const
Definition: sqltypes.h:48
std::shared_ptr< Analyzer::Expr > translateRegexp(const RexFunctionOperator *) const
bool supported_lower_bound(const RexWindowFunctionOperator::RexWindowBound &window_bound)
SQLTypes
Definition: sqltypes.h:37
size_t getOperand(size_t idx) const
const Executor * executor_
std::shared_ptr< Analyzer::Expr > translateUnlikely(const RexFunctionOperator *) const
const RexScalar * getElse() const
SQLQualifier
Definition: sqldefs.h:69
static std::shared_ptr< Analyzer::Expr > analyzeValue(const int64_t intval)
Definition: ParserNode.cpp:121
std::shared_ptr< Analyzer::Expr > translateFunction(const RexFunctionOperator *) const
SQLTypeInfo get_nullable_logical_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:914
#define LOG(tag)
Definition: Logger.h:188
std::shared_ptr< Analyzer::Expr > translateScalarRex(const RexScalar *rex) const
const SQLTypeInfo & getType() const
bool boolval
Definition: sqltypes.h:202
size_t size() const
const RexScalar * getOperand(const size_t idx) const
std::shared_ptr< Analyzer::Expr > translateUoper(const RexOperator *) const
HOST DEVICE int get_scale() const
Definition: sqltypes.h:316
const std::vector< SortField > & getCollation() const
SQLOps
Definition: sqldefs.h:29
std::shared_ptr< Analyzer::Expr > translateDateadd(const RexFunctionOperator *) const
Definition: sqldefs.h:38
std::shared_ptr< Analyzer::Expr > translateNow() const
#define CHECK_GE(x, y)
Definition: Logger.h:210
std::shared_ptr< Analyzer::Expr > translateAbs(const RexFunctionOperator *) const
Definition: sqldefs.h:49
Definition: sqldefs.h:30
const RexScalar * getWhen(const size_t idx) const
std::shared_ptr< Analyzer::Expr > ExpressionPtr
Definition: Analyzer.h:180
std::string getString(int32_t string_id) const
std::shared_ptr< Analyzer::Expr > getInIntegerSetExpr(std::shared_ptr< Analyzer::Expr > arg, const ResultSet &val_set) const
Definition: sqldefs.h:41
SQLTypeInfo get_agg_type(const SQLAgg agg_kind, const Analyzer::Expr *arg_expr)
std::shared_ptr< Analyzer::Expr > translateItem(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Constant > makeNumericConstant(const SQLTypeInfo &ti, const long val)
Analyzer::ExpressionPtr rewrite_expr(const Analyzer::Expr *expr)
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:311
QualsConjunctiveForm qual_to_conjunctive_form(const std::shared_ptr< Analyzer::Expr > qual_expr)
bool is_number() const
Definition: sqltypes.h:483
#define CHECK_GT(x, y)
Definition: Logger.h:209
bool is_agg_supported_for_type(const SQLAgg &agg_kind, const SQLTypeInfo &arg_ti)
std::shared_ptr< Analyzer::Expr > translateGeoProjection(const RexFunctionOperator *, SQLTypeInfo &, const bool with_bounds) const
double inline_fp_null_val(const SQL_TYPE_INFO &ti)
bool is_time() const
Definition: sqltypes.h:484
std::shared_ptr< Analyzer::Expr > translateOper(const RexOperator *) const
std::string to_string(char const *&&v)
std::shared_ptr< Analyzer::Expr > translateDatediff(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Expr > translateInput(const RexInput *) const
std::shared_ptr< Analyzer::Expr > translateSign(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Expr > translateUnaryGeoFunction(const RexFunctionOperator *) const
static std::shared_ptr< Analyzer::Expr > normalize(const std::list< std::pair< std::shared_ptr< Analyzer::Expr >, std::shared_ptr< Analyzer::Expr >>> &, const std::shared_ptr< Analyzer::Expr >)
Definition: ParserNode.cpp:922
std::shared_ptr< Analyzer::Expr > translateGeoOverlapsOper(const RexOperator *) const
Definition: sqldefs.h:73
std::shared_ptr< Analyzer::Expr > translateLower(const RexFunctionOperator *) const
ExtractField to_datepart_field(const std::string &field)
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:31
unsigned getIndex() const
static std::shared_ptr< Analyzer::Expr > translateLiteral(const RexLiteral *)
SQLOps getOperator() const
bool window_function_is_value(const SqlWindowFunctionKind kind)
Definition: WindowContext.h:27
static constexpr int32_t INVALID_STR_ID
CONSTEXPR DEVICE bool is_null(const T &value)
Classes representing a parse tree.
std::shared_ptr< Analyzer::Expr > translateDatetime(const RexFunctionOperator *) const
void fill_dictionary_encoded_in_vals(std::vector< int64_t > &in_vals, std::atomic< size_t > &total_in_vals_count, const ResultSet *values_rowset, const std::pair< int64_t, int64_t > values_rowset_slice, const StringDictionaryProxy *source_dict, const StringDictionaryProxy *dest_dict, const int64_t needle_null_val)
#define CHECK_NE(x, y)
Definition: Logger.h:206
const std::shared_ptr< Analyzer::Expr > generate() const
static std::shared_ptr< Analyzer::Expr > get(std::shared_ptr< Analyzer::Expr > arg_expr, std::shared_ptr< Analyzer::Expr > pattern_expr, std::shared_ptr< Analyzer::Expr > escape_expr, const bool is_not)
Definition: ParserNode.cpp:642
void set_scale(int s)
Definition: sqltypes.h:406
int64_t bigintval
Definition: sqltypes.h:206
std::shared_ptr< const RexScalar > offset
std::vector< Analyzer::OrderEntry > translate_collation(const std::vector< SortField > &sort_fields)
size_t branchCount() const
Definition: sqldefs.h:37
Definition: sqldefs.h:75
Definition: sqldefs.h:69
int getDatabaseId() const
Definition: Catalog.h:274
int16_t smallintval
Definition: sqltypes.h:204
SQLTypeInfo build_type_info(const SQLTypes sql_type, const int scale, const int precision)
static std::shared_ptr< Analyzer::Expr > analyzeValue(const std::string &)
Definition: ParserNode.cpp:106
DatetruncField to_datediff_field(const std::string &field)
void translate_string_ids(std::vector< int32_t > &dest_ids, const LeafHostInfo &dict_server_host, const DictRef dest_dict_ref, const std::vector< int32_t > &source_ids, const DictRef source_dict_ref, const int32_t dest_generation)
bool is_boolean() const
Definition: sqltypes.h:485
const std::vector< LeafHostInfo > & getStringDictionaryHosts() const
Definition: Catalog.cpp:1492
const ColumnDescriptor * getMetadataForColumnBySpi(const int tableId, const size_t spi) const
Definition: Catalog.cpp:1547
const std::unordered_map< const RelAlgNode *, int > input_to_nest_level_
#define UNLIKELY(x)
Definition: likely.h:25
static std::shared_ptr< Analyzer::Expr > normalize(const SQLOps optype, const SQLQualifier qual, std::shared_ptr< Analyzer::Expr > left_expr, std::shared_ptr< Analyzer::Expr > right_expr)
Definition: ParserNode.cpp:272
std::pair< std::shared_ptr< Analyzer::Expr >, SQLQualifier > get_quantified_rhs(const RexScalar *rex_scalar, const RelAlgTranslator &translator)
Definition: sqldefs.h:34
#define CHECK_LT(x, y)
Definition: Logger.h:207
Definition: sqltypes.h:51
Definition: sqltypes.h:52
Analyzer::ExpressionPtrVector translateFunctionArgs(const RexFunctionOperator *) const
Definition: sqldefs.h:40
Definition: sqldefs.h:69
const ConstRexScalarPtrVector & getPartitionKeys() const
#define TRANSIENT_DICT_ID
Definition: sqltypes.h:250
const RexWindowBound & getLowerBound() const
#define CHECK_LE(x, y)
Definition: Logger.h:208
std::shared_ptr< Analyzer::Expr > translateOverlapsOper(const RexOperator *) const
std::shared_ptr< Analyzer::Expr > translateArrayFunction(const RexFunctionOperator *) const
static std::shared_ptr< Analyzer::Expr > get(std::shared_ptr< Analyzer::Expr > arg_expr, std::shared_ptr< Analyzer::Expr > like_expr, std::shared_ptr< Analyzer::Expr > escape_expr, const bool is_ilike, const bool is_not)
Definition: ParserNode.cpp:547
std::shared_ptr< Analyzer::Expr > translateCurrentUser(const RexFunctionOperator *) const
bool g_enable_experimental_string_functions
std::shared_ptr< Analyzer::Expr > translateSampleRatio(const RexFunctionOperator *) const
SqlWindowFunctionKind getKind() const
std::shared_ptr< Analyzer::Expr > translateLike(const RexFunctionOperator *) const
const RelAlgNode * getSourceNode() const
std::shared_ptr< Analyzer::Expr > translateGeoBinaryConstructor(const RexFunctionOperator *, SQLTypeInfo &, const bool with_bounds) const
Definition: sqltypes.h:40
bool supported_upper_bound(const RexWindowFunctionOperator *rex_window_function)
bool takes_arg(const TargetInfo &target_info)
T bind_function(std::string name, Analyzer::ExpressionPtrVector func_args, const std::vector< T > &ext_funcs, const std::string processor)
static std::shared_ptr< Analyzer::Expr > analyzeValue(const int64_t numericval, const int scale, const int precision)
Definition: ParserNode.cpp:146
std::shared_ptr< Analyzer::Expr > translateLikely(const RexFunctionOperator *) const
const RexWindowBound & getUpperBound() const
Definition: sqldefs.h:53
std::shared_ptr< Analyzer::Expr > get_in_values_expr(std::shared_ptr< Analyzer::Expr > arg, const ResultSet &val_set)
static std::shared_ptr< Analyzer::Expr > get(const int64_t)
Definition: ParserNode.cpp:182
bool isDistinct() const
void set_notnull(bool n)
Definition: sqltypes.h:408
#define CHECK(condition)
Definition: Logger.h:197
std::shared_ptr< Analyzer::Expr > translateTernaryGeoFunction(const RexFunctionOperator *) const
const ConstRexScalarPtrVector & getOrderKeys() const
std::vector< std::shared_ptr< Analyzer::Expr > > qual_to_disjunctive_form(const std::shared_ptr< Analyzer::Expr > &qual_expr)
std::shared_ptr< Analyzer::Expr > translateBinaryGeoFunction(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Constant > make_fp_constant(const int64_t val, const SQLTypeInfo &ti)
std::pair< Datum, bool > datum_from_scalar_tv(const ScalarTargetValue *scalar_tv, const SQLTypeInfo &ti) noexcept
std::shared_ptr< Analyzer::Expr > translateInOper(const RexOperator *) const
uint64_t exp_to_scale(const unsigned exp)
size_t size() const
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
std::vector< ExpressionPtr > ExpressionPtrVector
Definition: Analyzer.h:182
bool g_cluster
Definition: sqldefs.h:33
std::shared_ptr< Analyzer::Expr > translateCase(const RexCase *) const
std::shared_ptr< Analyzer::Expr > translateFunctionWithGeoArg(const RexFunctionOperator *) const
Definition: sqltypes.h:44
std::shared_ptr< const query_state::QueryState > query_state_
const std::string & getName() const
std::string get_datetimeplus_rewrite_funcname(const SQLOps &op)
void validate_datetime_datepart_argument(const std::shared_ptr< Analyzer::Constant > literal_expr)
std::shared_ptr< Analyzer::Expr > translateCardinality(const RexFunctionOperator *) const
Definition: sqldefs.h:74
int cpu_threads()
Definition: thread_count.h:24
const bool just_explain_
const std::vector< TargetMetaInfo > & getOutputMetainfo() const
bool is_decimal() const
Definition: sqltypes.h:481
std::shared_ptr< Analyzer::Expr > translateGeoComparison(const RexOperator *) const
std::shared_ptr< Analyzer::Expr > translateDatePlusMinus(const RexOperator *) const
const std::list< std::shared_ptr< Analyzer::Expr > > simple_quals
std::shared_ptr< Analyzer::Expr > translateHPTLiteral(const RexFunctionOperator *) const
Definition: sqldefs.h:72
bool is_distinct(const size_t input_idx, const RelAlgNode *node)
int32_t getIdOfString(const std::string &str) const
std::shared_ptr< Analyzer::Expr > translateDatepart(const RexFunctionOperator *) const
bool can_use_parallel_algorithms(const ResultSet &rows)
Definition: ResultSet.cpp:1131
Definition: sqldefs.h:39
std::shared_ptr< Analyzer::Expr > rewrite_to_date_trunc(const Analyzer::FunctionOper *dt_plus)
SQLTypeInfo ext_arg_type_to_type_info(const ExtArgumentType ext_arg_type)
DateaddField to_dateadd_field(const std::string &field)
std::shared_ptr< Analyzer::Expr > fold_expr(const Analyzer::Expr *expr)
void set_precision(int d)
Definition: sqltypes.h:404
#define IS_COMPARISON(X)
Definition: sqldefs.h:57
double doubleval
Definition: sqltypes.h:208
void fill_integer_in_vals(std::vector< int64_t > &in_vals, std::atomic< size_t > &total_in_vals_count, const ResultSet *values_rowset, const std::pair< int64_t, int64_t > values_rowset_slice)
const Catalog_Namespace::Catalog & cat_
std::shared_ptr< Analyzer::Expr > translateKeyForString(const RexFunctionOperator *) const
static std::shared_ptr< Analyzer::Expr > translateAggregateRex(const RexAgg *rex, const std::vector< std::shared_ptr< Analyzer::Expr >> &scalar_sources)
std::shared_ptr< Analyzer::Expr > translateWindowFunction(const RexWindowFunctionOperator *) const
const std::shared_ptr< Analyzer::Expr > generate() const
std::shared_ptr< Analyzer::Expr > translateScalarSubquery(const RexSubQuery *) const
std::shared_ptr< Analyzer::Expr > translateGeoPredicate(const RexFunctionOperator *, SQLTypeInfo &, const bool with_bounds) const
boost::variant< int64_t, double, float, NullableString > ScalarTargetValue
Definition: TargetValue.h:156
std::shared_ptr< Analyzer::Expr > translateLength(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Expr > translateExtract(const RexFunctionOperator *) const