OmniSciDB  fe05a0c208
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
RelAlgTranslator.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "RelAlgTranslator.h"
18 #include "Shared/SqlTypesLayout.h"
19 
21 #include "DateTimePlusRewrite.h"
22 #include "DateTimeTranslator.h"
24 #include "ExpressionRewrite.h"
27 #include "RelAlgDagBuilder.h"
28 #include "WindowContext.h"
29 
30 #include <future>
31 
32 #include "Analyzer/Analyzer.h"
33 #include "Parser/ParserNode.h"
34 #include "Shared/likely.h"
35 #include "Shared/thread_count.h"
36 
37 extern bool g_enable_watchdog;
38 
40 
41 namespace {
42 
44  const int scale,
45  const int precision) {
46  SQLTypeInfo ti(sql_type, 0, 0, true);
47  if (ti.is_decimal()) {
48  ti.set_scale(scale);
49  ti.set_precision(precision);
50  }
51  return ti;
52 }
53 
54 std::pair<std::shared_ptr<Analyzer::Expr>, SQLQualifier> get_quantified_rhs(
55  const RexScalar* rex_scalar,
56  const RelAlgTranslator& translator) {
57  std::shared_ptr<Analyzer::Expr> rhs;
58  SQLQualifier sql_qual{kONE};
59  const auto rex_operator = dynamic_cast<const RexOperator*>(rex_scalar);
60  if (!rex_operator) {
61  return std::make_pair(rhs, sql_qual);
62  }
63  const auto rex_function = dynamic_cast<const RexFunctionOperator*>(rex_operator);
64  const auto qual_str = rex_function ? rex_function->getName() : "";
65  if (qual_str == "PG_ANY"sv || qual_str == "PG_ALL"sv) {
66  CHECK_EQ(size_t(1), rex_function->size());
67  rhs = translator.translateScalarRex(rex_function->getOperand(0));
68  sql_qual = (qual_str == "PG_ANY"sv) ? kANY : kALL;
69  }
70  if (!rhs && rex_operator->getOperator() == kCAST) {
71  CHECK_EQ(size_t(1), rex_operator->size());
72  std::tie(rhs, sql_qual) = get_quantified_rhs(rex_operator->getOperand(0), translator);
73  }
74  return std::make_pair(rhs, sql_qual);
75 }
76 
77 std::pair<Datum, bool> datum_from_scalar_tv(const ScalarTargetValue* scalar_tv,
78  const SQLTypeInfo& ti) noexcept {
79  Datum d{0};
80  bool is_null_const{false};
81  switch (ti.get_type()) {
82  case kBOOLEAN: {
83  const auto ival = boost::get<int64_t>(scalar_tv);
84  CHECK(ival);
85  if (*ival == inline_int_null_val(ti)) {
86  is_null_const = true;
87  } else {
88  d.boolval = *ival;
89  }
90  break;
91  }
92  case kTINYINT: {
93  const auto ival = boost::get<int64_t>(scalar_tv);
94  CHECK(ival);
95  if (*ival == inline_int_null_val(ti)) {
96  is_null_const = true;
97  } else {
98  d.tinyintval = *ival;
99  }
100  break;
101  }
102  case kSMALLINT: {
103  const auto ival = boost::get<int64_t>(scalar_tv);
104  CHECK(ival);
105  if (*ival == inline_int_null_val(ti)) {
106  is_null_const = true;
107  } else {
108  d.smallintval = *ival;
109  }
110  break;
111  }
112  case kINT: {
113  const auto ival = boost::get<int64_t>(scalar_tv);
114  CHECK(ival);
115  if (*ival == inline_int_null_val(ti)) {
116  is_null_const = true;
117  } else {
118  d.intval = *ival;
119  }
120  break;
121  }
122  case kDECIMAL:
123  case kNUMERIC:
124  case kBIGINT:
125  case kDATE:
126  case kTIME:
127  case kTIMESTAMP: {
128  const auto ival = boost::get<int64_t>(scalar_tv);
129  CHECK(ival);
130  if (*ival == inline_int_null_val(ti)) {
131  is_null_const = true;
132  } else {
133  d.bigintval = *ival;
134  }
135  break;
136  }
137  case kDOUBLE: {
138  const auto dval = boost::get<double>(scalar_tv);
139  CHECK(dval);
140  if (*dval == inline_fp_null_val(ti)) {
141  is_null_const = true;
142  } else {
143  d.doubleval = *dval;
144  }
145  break;
146  }
147  case kFLOAT: {
148  const auto fval = boost::get<float>(scalar_tv);
149  CHECK(fval);
150  if (*fval == inline_fp_null_val(ti)) {
151  is_null_const = true;
152  } else {
153  d.floatval = *fval;
154  }
155  break;
156  }
157  case kTEXT:
158  case kVARCHAR:
159  case kCHAR: {
160  auto nullable_sptr = boost::get<NullableString>(scalar_tv);
161  CHECK(nullable_sptr);
162  if (boost::get<void*>(nullable_sptr)) {
163  is_null_const = true;
164  } else {
165  auto sptr = boost::get<std::string>(nullable_sptr);
166  d.stringval = new std::string(*sptr);
167  }
168  break;
169  }
170  default:
171  CHECK(false) << "Unhandled type: " << ti.get_type_name();
172  }
173  return {d, is_null_const};
174 }
175 
176 } // namespace
177 
178 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateScalarRex(
179  const RexScalar* rex) const {
180  const auto rex_input = dynamic_cast<const RexInput*>(rex);
181  if (rex_input) {
182  return translateInput(rex_input);
183  }
184  const auto rex_literal = dynamic_cast<const RexLiteral*>(rex);
185  if (rex_literal) {
186  return translateLiteral(rex_literal);
187  }
188  const auto rex_window_function = dynamic_cast<const RexWindowFunctionOperator*>(rex);
189  if (rex_window_function) {
190  return translateWindowFunction(rex_window_function);
191  }
192  const auto rex_function = dynamic_cast<const RexFunctionOperator*>(rex);
193  if (rex_function) {
194  return translateFunction(rex_function);
195  }
196  const auto rex_operator = dynamic_cast<const RexOperator*>(rex);
197  if (rex_operator) {
198  return translateOper(rex_operator);
199  }
200  const auto rex_case = dynamic_cast<const RexCase*>(rex);
201  if (rex_case) {
202  return translateCase(rex_case);
203  }
204  const auto rex_subquery = dynamic_cast<const RexSubQuery*>(rex);
205  if (rex_subquery) {
206  return translateScalarSubquery(rex_subquery);
207  }
208  CHECK(false);
209  return nullptr;
210 }
211 
212 namespace {
213 
214 bool is_agg_supported_for_type(const SQLAgg& agg_kind, const SQLTypeInfo& arg_ti) {
215  if ((agg_kind == kMIN || agg_kind == kMAX || agg_kind == kSUM || agg_kind == kAVG) &&
216  !(arg_ti.is_number() || arg_ti.is_boolean() || arg_ti.is_time())) {
217  return false;
218  }
219 
220  return true;
221 }
222 
223 } // namespace
224 
225 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateAggregateRex(
226  const RexAgg* rex,
227  const std::vector<std::shared_ptr<Analyzer::Expr>>& scalar_sources) {
228  const auto agg_kind = rex->getKind();
229  const bool is_distinct = rex->isDistinct();
230  const bool takes_arg{rex->size() > 0};
231  std::shared_ptr<Analyzer::Expr> arg_expr;
232  std::shared_ptr<Analyzer::Constant> err_rate;
233  if (takes_arg) {
234  const auto operand = rex->getOperand(0);
235  CHECK_LT(operand, scalar_sources.size());
236  CHECK_LE(rex->size(), 2u);
237  arg_expr = scalar_sources[operand];
238  if (agg_kind == kAPPROX_COUNT_DISTINCT && rex->size() == 2) {
239  err_rate = std::dynamic_pointer_cast<Analyzer::Constant>(
240  scalar_sources[rex->getOperand(1)]);
241  if (!err_rate || err_rate->get_type_info().get_type() != kINT ||
242  err_rate->get_constval().intval < 1 || err_rate->get_constval().intval > 100) {
243  throw std::runtime_error(
244  "APPROX_COUNT_DISTINCT's second parameter should be SMALLINT literal between "
245  "1 and 100");
246  }
247  }
248  if (g_cluster && agg_kind == kAPPROX_MEDIAN) {
249  throw std::runtime_error(
250  "APPROX_MEDIAN is not supported in distributed mode at this time.");
251  }
252  const auto& arg_ti = arg_expr->get_type_info();
253  if (!is_agg_supported_for_type(agg_kind, arg_ti)) {
254  throw std::runtime_error("Aggregate on " + arg_ti.get_type_name() +
255  " is not supported yet.");
256  }
257  }
258  const auto agg_ti = get_agg_type(agg_kind, arg_expr.get());
259  return makeExpr<Analyzer::AggExpr>(agg_ti, agg_kind, arg_expr, is_distinct, err_rate);
260 }
261 
262 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateLiteral(
263  const RexLiteral* rex_literal) {
264  auto lit_ti = build_type_info(
265  rex_literal->getType(), rex_literal->getScale(), rex_literal->getPrecision());
266  auto target_ti = build_type_info(rex_literal->getTargetType(),
267  rex_literal->getTypeScale(),
268  rex_literal->getTypePrecision());
269  switch (rex_literal->getType()) {
270  case kINT:
271  case kBIGINT: {
272  Datum d;
273  d.bigintval = rex_literal->getVal<int64_t>();
274  return makeExpr<Analyzer::Constant>(rex_literal->getType(), false, d);
275  }
276  case kDECIMAL: {
277  const auto val = rex_literal->getVal<int64_t>();
278  const int precision = rex_literal->getPrecision();
279  const int scale = rex_literal->getScale();
280  if (target_ti.is_fp() && !scale) {
281  return make_fp_constant(val, target_ti);
282  }
283  auto lit_expr = scale ? Parser::FixedPtLiteral::analyzeValue(val, scale, precision)
285  return lit_ti != target_ti ? lit_expr->add_cast(target_ti) : lit_expr;
286  }
287  case kTEXT: {
288  return Parser::StringLiteral::analyzeValue(rex_literal->getVal<std::string>());
289  }
290  case kBOOLEAN: {
291  Datum d;
292  d.boolval = rex_literal->getVal<bool>();
293  return makeExpr<Analyzer::Constant>(kBOOLEAN, false, d);
294  }
295  case kDOUBLE: {
296  Datum d;
297  d.doubleval = rex_literal->getVal<double>();
298  auto lit_expr = makeExpr<Analyzer::Constant>(kDOUBLE, false, d);
299  return lit_ti != target_ti ? lit_expr->add_cast(target_ti) : lit_expr;
300  }
301  case kINTERVAL_DAY_TIME:
302  case kINTERVAL_YEAR_MONTH: {
303  Datum d;
304  d.bigintval = rex_literal->getVal<int64_t>();
305  return makeExpr<Analyzer::Constant>(rex_literal->getType(), false, d);
306  }
307  case kTIME:
308  case kTIMESTAMP: {
309  Datum d;
310  d.bigintval =
311  rex_literal->getType() == kTIMESTAMP && rex_literal->getPrecision() > 0
312  ? rex_literal->getVal<int64_t>()
313  : rex_literal->getVal<int64_t>() / 1000;
314  return makeExpr<Analyzer::Constant>(
315  SQLTypeInfo(rex_literal->getType(), rex_literal->getPrecision(), 0, false),
316  false,
317  d);
318  }
319  case kDATE: {
320  Datum d;
321  d.bigintval = rex_literal->getVal<int64_t>() * 24 * 3600;
322  return makeExpr<Analyzer::Constant>(rex_literal->getType(), false, d);
323  }
324  case kNULLT: {
325  if (target_ti.is_array()) {
327  // defaulting to valid sub-type for convenience
328  target_ti.set_subtype(kBOOLEAN);
329  return makeExpr<Analyzer::ArrayExpr>(target_ti, args, true);
330  }
331  return makeExpr<Analyzer::Constant>(rex_literal->getTargetType(), true, Datum{0});
332  }
333  default: {
334  LOG(FATAL) << "Unexpected literal type " << lit_ti.get_type_name();
335  }
336  }
337  return nullptr;
338 }
339 
340 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateScalarSubquery(
341  const RexSubQuery* rex_subquery) const {
342  if (just_explain_) {
343  throw std::runtime_error("EXPLAIN is not supported with sub-queries");
344  }
345  CHECK(rex_subquery);
346  auto result = rex_subquery->getExecutionResult();
347  auto row_set = result->getRows();
348  const size_t row_count = row_set->rowCount();
349  if (row_count > size_t(1)) {
350  throw std::runtime_error("Scalar sub-query returned multiple rows");
351  }
352  if (row_count == size_t(0)) {
353  if (row_set->isValidationOnlyRes()) {
354  Datum d{0};
355  return makeExpr<Analyzer::Constant>(rex_subquery->getType(), false, d);
356  }
357  throw std::runtime_error("Scalar sub-query returned no results");
358  }
359  CHECK_EQ(row_count, size_t(1));
360  row_set->moveToBegin();
361  auto first_row = row_set->getNextRow(false, false);
362  CHECK_EQ(first_row.size(), size_t(1));
363  auto scalar_tv = boost::get<ScalarTargetValue>(&first_row[0]);
364  auto ti = rex_subquery->getType();
365  if (ti.is_string()) {
366  throw std::runtime_error("Scalar sub-queries which return strings not supported");
367  }
368  Datum d{0};
369  bool is_null_const{false};
370  std::tie(d, is_null_const) = datum_from_scalar_tv(scalar_tv, ti);
371  return makeExpr<Analyzer::Constant>(ti, is_null_const, d);
372 }
373 
374 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateInput(
375  const RexInput* rex_input) const {
376  const auto source = rex_input->getSourceNode();
377  const auto it_rte_idx = input_to_nest_level_.find(source);
378  CHECK(it_rte_idx != input_to_nest_level_.end())
379  << "Not found in input_to_nest_level_, source=" << source->toString();
380  const int rte_idx = it_rte_idx->second;
381  const auto scan_source = dynamic_cast<const RelScan*>(source);
382  const auto& in_metainfo = source->getOutputMetainfo();
383  if (scan_source) {
384  // We're at leaf (scan) level and not supposed to have input metadata,
385  // the name and type information come directly from the catalog.
386  CHECK(in_metainfo.empty());
387  const auto table_desc = scan_source->getTableDescriptor();
388  const auto cd =
389  cat_.getMetadataForColumnBySpi(table_desc->tableId, rex_input->getIndex() + 1);
390  CHECK(cd);
391  auto col_ti = cd->columnType;
392  if (col_ti.is_string()) {
393  col_ti.set_type(kTEXT);
394  }
395  if (cd->isVirtualCol) {
396  // TODO(alex): remove at some point, we only need this fixup for backwards
397  // compatibility with old imported data
398  CHECK_EQ("rowid", cd->columnName);
399  col_ti.set_size(8);
400  }
401  CHECK_LE(static_cast<size_t>(rte_idx), join_types_.size());
402  if (rte_idx > 0 && join_types_[rte_idx - 1] == JoinType::LEFT) {
403  col_ti.set_notnull(false);
404  }
405  return std::make_shared<Analyzer::ColumnVar>(
406  col_ti, table_desc->tableId, cd->columnId, rte_idx);
407  }
408  CHECK(!in_metainfo.empty()) << "for " << source->toString();
409  CHECK_GE(rte_idx, 0);
410  const size_t col_id = rex_input->getIndex();
411  CHECK_LT(col_id, in_metainfo.size());
412  auto col_ti = in_metainfo[col_id].get_type_info();
413 
414  if (join_types_.size() > 0) {
415  CHECK_LE(static_cast<size_t>(rte_idx), join_types_.size());
416  if (rte_idx > 0 && join_types_[rte_idx - 1] == JoinType::LEFT) {
417  col_ti.set_notnull(false);
418  }
419  }
420 
421  return std::make_shared<Analyzer::ColumnVar>(col_ti, -source->getId(), col_id, rte_idx);
422 }
423 
424 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateUoper(
425  const RexOperator* rex_operator) const {
426  CHECK_EQ(size_t(1), rex_operator->size());
427  const auto operand_expr = translateScalarRex(rex_operator->getOperand(0));
428  const auto sql_op = rex_operator->getOperator();
429  switch (sql_op) {
430  case kCAST: {
431  const auto& target_ti = rex_operator->getType();
432  CHECK_NE(kNULLT, target_ti.get_type());
433  const auto& operand_ti = operand_expr->get_type_info();
434  if (operand_ti.is_string() && target_ti.is_string()) {
435  return operand_expr;
436  }
437  if (target_ti.is_time() ||
438  operand_ti
439  .is_string()) { // TODO(alex): check and unify with the rest of the cases
440  // Do not propogate encoding on small dates
441  return target_ti.is_date_in_days()
442  ? operand_expr->add_cast(SQLTypeInfo(kDATE, false))
443  : operand_expr->add_cast(target_ti);
444  }
445  if (!operand_ti.is_string() && target_ti.is_string()) {
446  return operand_expr->add_cast(target_ti);
447  }
448 
449  return std::make_shared<Analyzer::UOper>(target_ti, false, sql_op, operand_expr);
450  }
451  case kNOT:
452  case kISNULL: {
453  return std::make_shared<Analyzer::UOper>(kBOOLEAN, sql_op, operand_expr);
454  }
455  case kISNOTNULL: {
456  auto is_null = std::make_shared<Analyzer::UOper>(kBOOLEAN, kISNULL, operand_expr);
457  return std::make_shared<Analyzer::UOper>(kBOOLEAN, kNOT, is_null);
458  }
459  case kMINUS: {
460  const auto& ti = operand_expr->get_type_info();
461  return std::make_shared<Analyzer::UOper>(ti, false, kUMINUS, operand_expr);
462  }
463  case kUNNEST: {
464  const auto& ti = operand_expr->get_type_info();
465  CHECK(ti.is_array());
466  return makeExpr<Analyzer::UOper>(ti.get_elem_type(), false, kUNNEST, operand_expr);
467  }
468  default:
469  CHECK(false);
470  }
471  return nullptr;
472 }
473 
474 namespace {
475 
476 std::shared_ptr<Analyzer::Expr> get_in_values_expr(std::shared_ptr<Analyzer::Expr> arg,
477  const ResultSet& val_set) {
479  return nullptr;
480  }
481  if (val_set.rowCount() > 5000000 && g_enable_watchdog) {
482  throw std::runtime_error(
483  "Unable to handle 'expr IN (subquery)', subquery returned 5M+ rows.");
484  }
485  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
486  const size_t fetcher_count = cpu_threads();
487  std::vector<std::list<std::shared_ptr<Analyzer::Expr>>> expr_set(
488  fetcher_count, std::list<std::shared_ptr<Analyzer::Expr>>());
489  std::vector<std::future<void>> fetcher_threads;
490  const auto& ti = arg->get_type_info();
491  const auto entry_count = val_set.entryCount();
492  for (size_t i = 0,
493  start_entry = 0,
494  stride = (entry_count + fetcher_count - 1) / fetcher_count;
495  i < fetcher_count && start_entry < entry_count;
496  ++i, start_entry += stride) {
497  const auto end_entry = std::min(start_entry + stride, entry_count);
498  fetcher_threads.push_back(std::async(
499  std::launch::async,
500  [&](std::list<std::shared_ptr<Analyzer::Expr>>& in_vals,
501  const size_t start,
502  const size_t end) {
503  for (auto index = start; index < end; ++index) {
504  auto row = val_set.getRowAt(index);
505  if (row.empty()) {
506  continue;
507  }
508  auto scalar_tv = boost::get<ScalarTargetValue>(&row[0]);
509  Datum d{0};
510  bool is_null_const{false};
511  std::tie(d, is_null_const) = datum_from_scalar_tv(scalar_tv, ti);
512  if (ti.is_string() && ti.get_compression() != kENCODING_NONE) {
513  auto ti_none_encoded = ti;
514  ti_none_encoded.set_compression(kENCODING_NONE);
515  auto none_encoded_string =
516  makeExpr<Analyzer::Constant>(ti, is_null_const, d);
517  auto dict_encoded_string = std::make_shared<Analyzer::UOper>(
518  ti, false, kCAST, none_encoded_string);
519  in_vals.push_back(dict_encoded_string);
520  } else {
521  in_vals.push_back(makeExpr<Analyzer::Constant>(ti, is_null_const, d));
522  }
523  }
524  },
525  std::ref(expr_set[i]),
526  start_entry,
527  end_entry));
528  }
529  for (auto& child : fetcher_threads) {
530  child.get();
531  }
532 
533  val_set.moveToBegin();
534  for (auto& exprs : expr_set) {
535  value_exprs.splice(value_exprs.end(), exprs);
536  }
537  return makeExpr<Analyzer::InValues>(arg, value_exprs);
538 }
539 
540 } // namespace
541 
542 // Creates an Analyzer expression for an IN subquery which subsequently goes through the
543 // regular Executor::codegen() mechanism. The creation of the expression out of
544 // subquery's result set is parallelized whenever possible. In addition, take advantage
545 // of additional information that elements in the right hand side are constants; see
546 // getInIntegerSetExpr().
547 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateInOper(
548  const RexOperator* rex_operator) const {
549  if (just_explain_) {
550  throw std::runtime_error("EXPLAIN is not supported with sub-queries");
551  }
552  CHECK(rex_operator->size() == 2);
553  const auto lhs = translateScalarRex(rex_operator->getOperand(0));
554  const auto rhs = rex_operator->getOperand(1);
555  const auto rex_subquery = dynamic_cast<const RexSubQuery*>(rhs);
556  CHECK(rex_subquery);
557  auto ti = lhs->get_type_info();
558  auto result = rex_subquery->getExecutionResult();
559  CHECK(result);
560  auto& row_set = result->getRows();
561  CHECK_EQ(size_t(1), row_set->colCount());
562  const auto& rhs_ti = row_set->getColType(0);
563  if (rhs_ti.get_type() != ti.get_type()) {
564  throw std::runtime_error(
565  "The two sides of the IN operator must have the same type; found " +
566  ti.get_type_name() + " and " + rhs_ti.get_type_name());
567  }
568  row_set->moveToBegin();
569  if (row_set->entryCount() > 10000) {
570  std::shared_ptr<Analyzer::Expr> expr;
571  if ((ti.is_integer() || (ti.is_string() && ti.get_compression() == kENCODING_DICT)) &&
572  !row_set->getQueryMemDesc().didOutputColumnar()) {
573  expr = getInIntegerSetExpr(lhs, *row_set);
574  // Handle the highly unlikely case when the InIntegerSet ended up being tiny.
575  // Just let it fall through the usual InValues path at the end of this method,
576  // its codegen knows to use inline comparisons for few values.
577  if (expr && std::static_pointer_cast<Analyzer::InIntegerSet>(expr)
578  ->get_value_list()
579  .size() <= 100) {
580  expr = nullptr;
581  }
582  } else {
583  expr = get_in_values_expr(lhs, *row_set);
584  }
585  if (expr) {
586  return expr;
587  }
588  }
589  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
590  while (true) {
591  auto row = row_set->getNextRow(true, false);
592  if (row.empty()) {
593  break;
594  }
595  if (g_enable_watchdog && value_exprs.size() >= 10000) {
596  throw std::runtime_error(
597  "Unable to handle 'expr IN (subquery)', subquery returned 10000+ rows.");
598  }
599  auto scalar_tv = boost::get<ScalarTargetValue>(&row[0]);
600  Datum d{0};
601  bool is_null_const{false};
602  std::tie(d, is_null_const) = datum_from_scalar_tv(scalar_tv, ti);
603  if (ti.is_string() && ti.get_compression() != kENCODING_NONE) {
604  auto ti_none_encoded = ti;
605  ti_none_encoded.set_compression(kENCODING_NONE);
606  auto none_encoded_string = makeExpr<Analyzer::Constant>(ti, is_null_const, d);
607  auto dict_encoded_string =
608  std::make_shared<Analyzer::UOper>(ti, false, kCAST, none_encoded_string);
609  value_exprs.push_back(dict_encoded_string);
610  } else {
611  value_exprs.push_back(makeExpr<Analyzer::Constant>(ti, is_null_const, d));
612  }
613  }
614  return makeExpr<Analyzer::InValues>(lhs, value_exprs);
615 }
616 
617 namespace {
618 
619 const size_t g_max_integer_set_size{1 << 25};
620 
622  std::vector<int64_t>& in_vals,
623  std::atomic<size_t>& total_in_vals_count,
624  const ResultSet* values_rowset,
625  const std::pair<int64_t, int64_t> values_rowset_slice,
626  const StringDictionaryProxy* source_dict,
627  const StringDictionaryProxy* dest_dict,
628  const int64_t needle_null_val) {
629  CHECK(in_vals.empty());
630  bool dicts_are_equal = source_dict == dest_dict;
631  for (auto index = values_rowset_slice.first; index < values_rowset_slice.second;
632  ++index) {
633  const auto row = values_rowset->getOneColRow(index);
634  if (UNLIKELY(!row.valid)) {
635  continue;
636  }
637  if (dicts_are_equal) {
638  in_vals.push_back(row.value);
639  } else {
640  const int string_id =
641  row.value == needle_null_val
642  ? needle_null_val
643  : dest_dict->getIdOfString(source_dict->getString(row.value));
644  if (string_id != StringDictionary::INVALID_STR_ID) {
645  in_vals.push_back(string_id);
646  }
647  }
648  if (UNLIKELY(g_enable_watchdog && (in_vals.size() & 1023) == 0 &&
649  total_in_vals_count.fetch_add(1024) >= g_max_integer_set_size)) {
650  throw std::runtime_error(
651  "Unable to handle 'expr IN (subquery)', subquery returned 30M+ rows.");
652  }
653  }
654 }
655 
656 void fill_integer_in_vals(std::vector<int64_t>& in_vals,
657  std::atomic<size_t>& total_in_vals_count,
658  const ResultSet* values_rowset,
659  const std::pair<int64_t, int64_t> values_rowset_slice) {
660  CHECK(in_vals.empty());
661  for (auto index = values_rowset_slice.first; index < values_rowset_slice.second;
662  ++index) {
663  const auto row = values_rowset->getOneColRow(index);
664  if (row.valid) {
665  in_vals.push_back(row.value);
666  if (UNLIKELY(g_enable_watchdog && (in_vals.size() & 1023) == 0 &&
667  total_in_vals_count.fetch_add(1024) >= g_max_integer_set_size)) {
668  throw std::runtime_error(
669  "Unable to handle 'expr IN (subquery)', subquery returned 30M+ rows.");
670  }
671  }
672  }
673 }
674 
675 // Multi-node counterpart of the other version. Saves round-trips, which is crucial
676 // for a big right-hand side result. It only handles physical string dictionary ids,
677 // therefore it won't be able to handle a right-hand side sub-query with a CASE
678 // returning literals on some branches. That case isn't hard too handle either, but
679 // it's not clear it's actually important in practice.
680 // RelAlgTranslator::getInIntegerSetExpr makes sure, by checking the encodings, that
681 // this function isn't called in such cases.
683  std::vector<int64_t>& in_vals,
684  std::atomic<size_t>& total_in_vals_count,
685  const ResultSet* values_rowset,
686  const std::pair<int64_t, int64_t> values_rowset_slice,
687  const std::vector<LeafHostInfo>& leaf_hosts,
688  const DictRef source_dict_ref,
689  const DictRef dest_dict_ref,
690  const int32_t dest_generation,
691  const int64_t needle_null_val) {
692  CHECK(in_vals.empty());
693  std::vector<int32_t> source_ids;
694  source_ids.reserve(values_rowset->entryCount());
695  bool has_nulls = false;
696  if (source_dict_ref == dest_dict_ref) {
697  in_vals.reserve(values_rowset_slice.second - values_rowset_slice.first +
698  1); // Add 1 to cover interval
699  for (auto index = values_rowset_slice.first; index < values_rowset_slice.second;
700  ++index) {
701  const auto row = values_rowset->getOneColRow(index);
702  if (!row.valid) {
703  continue;
704  }
705  if (row.value != needle_null_val) {
706  in_vals.push_back(row.value);
707  if (UNLIKELY(g_enable_watchdog && (in_vals.size() & 1023) == 0 &&
708  total_in_vals_count.fetch_add(1024) >= g_max_integer_set_size)) {
709  throw std::runtime_error(
710  "Unable to handle 'expr IN (subquery)', subquery returned 30M+ rows.");
711  }
712  } else {
713  has_nulls = true;
714  }
715  }
716  if (has_nulls) {
717  in_vals.push_back(
718  needle_null_val); // we've deduped null values as an optimization, although
719  // this is not required by consumer
720  }
721  return;
722  }
723  // Code path below is for when dictionaries are not shared
724  for (auto index = values_rowset_slice.first; index < values_rowset_slice.second;
725  ++index) {
726  const auto row = values_rowset->getOneColRow(index);
727  if (row.valid) {
728  if (row.value != needle_null_val) {
729  source_ids.push_back(row.value);
730  } else {
731  has_nulls = true;
732  }
733  }
734  }
735  std::vector<int32_t> dest_ids;
736  translate_string_ids(dest_ids,
737  leaf_hosts.front(),
738  dest_dict_ref,
739  source_ids,
740  source_dict_ref,
741  dest_generation);
742  CHECK_EQ(dest_ids.size(), source_ids.size());
743  in_vals.reserve(dest_ids.size() + (has_nulls ? 1 : 0));
744  if (has_nulls) {
745  in_vals.push_back(needle_null_val);
746  }
747  for (const int32_t dest_id : dest_ids) {
748  if (dest_id != StringDictionary::INVALID_STR_ID) {
749  in_vals.push_back(dest_id);
750  if (UNLIKELY(g_enable_watchdog && (in_vals.size() & 1023) == 0 &&
751  total_in_vals_count.fetch_add(1024) >= g_max_integer_set_size)) {
752  throw std::runtime_error(
753  "Unable to handle 'expr IN (subquery)', subquery returned 30M+ rows.");
754  }
755  }
756  }
757 }
758 
759 } // namespace
760 
761 // The typical IN subquery involves either dictionary-encoded strings or integers.
762 // Analyzer::InValues is a very heavy representation of the right hand side of such
763 // a query since we already know the right hand would be a list of Analyzer::Constant
764 // shared pointers. We can avoid the big overhead of each Analyzer::Constant and the
765 // refcounting associated with shared pointers by creating an abbreviated InIntegerSet
766 // representation of the IN expression which takes advantage of the this information.
767 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::getInIntegerSetExpr(
768  std::shared_ptr<Analyzer::Expr> arg,
769  const ResultSet& val_set) const {
771  return nullptr;
772  }
773  std::vector<int64_t> value_exprs;
774  const size_t fetcher_count = cpu_threads();
775  std::vector<std::vector<int64_t>> expr_set(fetcher_count);
776  std::vector<std::future<void>> fetcher_threads;
777  const auto& arg_type = arg->get_type_info();
778  const auto entry_count = val_set.entryCount();
779  CHECK_EQ(size_t(1), val_set.colCount());
780  const auto& col_type = val_set.getColType(0);
781  if (g_cluster && arg_type.is_string() &&
782  (col_type.get_comp_param() <= 0 || arg_type.get_comp_param() <= 0)) {
783  // Skip this case for now, see comment for fill_dictionary_encoded_in_vals.
784  return nullptr;
785  }
786  std::atomic<size_t> total_in_vals_count{0};
787  for (size_t i = 0,
788  start_entry = 0,
789  stride = (entry_count + fetcher_count - 1) / fetcher_count;
790  i < fetcher_count && start_entry < entry_count;
791  ++i, start_entry += stride) {
792  expr_set[i].reserve(entry_count / fetcher_count);
793  const auto end_entry = std::min(start_entry + stride, entry_count);
794  if (arg_type.is_string()) {
795  CHECK_EQ(kENCODING_DICT, arg_type.get_compression());
796  // const int32_t dest_dict_id = arg_type.get_comp_param();
797  // const int32_t source_dict_id = col_type.get_comp_param();
798  const DictRef dest_dict_ref(arg_type.get_comp_param(), cat_.getDatabaseId());
799  const DictRef source_dict_ref(col_type.get_comp_param(), cat_.getDatabaseId());
800  const auto dd = executor_->getStringDictionaryProxy(
801  arg_type.get_comp_param(), val_set.getRowSetMemOwner(), true);
802  const auto sd = executor_->getStringDictionaryProxy(
803  col_type.get_comp_param(), val_set.getRowSetMemOwner(), true);
804  CHECK(sd);
805  const auto needle_null_val = inline_int_null_val(arg_type);
806  fetcher_threads.push_back(std::async(
807  std::launch::async,
808  [this,
809  &val_set,
810  &total_in_vals_count,
811  sd,
812  dd,
813  source_dict_ref,
814  dest_dict_ref,
815  needle_null_val](
816  std::vector<int64_t>& in_vals, const size_t start, const size_t end) {
817  if (g_cluster) {
818  CHECK_GE(dd->getGeneration(), 0);
820  total_in_vals_count,
821  &val_set,
822  {start, end},
824  source_dict_ref,
825  dest_dict_ref,
826  dd->getGeneration(),
827  needle_null_val);
828  } else {
830  total_in_vals_count,
831  &val_set,
832  {start, end},
833  sd,
834  dd,
835  needle_null_val);
836  }
837  },
838  std::ref(expr_set[i]),
839  start_entry,
840  end_entry));
841  } else {
842  CHECK(arg_type.is_integer());
843  fetcher_threads.push_back(std::async(
844  std::launch::async,
845  [&val_set, &total_in_vals_count](
846  std::vector<int64_t>& in_vals, const size_t start, const size_t end) {
847  fill_integer_in_vals(in_vals, total_in_vals_count, &val_set, {start, end});
848  },
849  std::ref(expr_set[i]),
850  start_entry,
851  end_entry));
852  }
853  }
854  for (auto& child : fetcher_threads) {
855  child.get();
856  }
857 
858  val_set.moveToBegin();
859  value_exprs.reserve(entry_count);
860  for (auto& exprs : expr_set) {
861  value_exprs.insert(value_exprs.end(), exprs.begin(), exprs.end());
862  }
863  return makeExpr<Analyzer::InIntegerSet>(
864  arg, value_exprs, arg_type.get_notnull() && col_type.get_notnull());
865 }
866 
867 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateOper(
868  const RexOperator* rex_operator) const {
869  CHECK_GT(rex_operator->size(), size_t(0));
870  if (rex_operator->size() == 1) {
871  return translateUoper(rex_operator);
872  }
873  const auto sql_op = rex_operator->getOperator();
874  if (sql_op == kIN) {
875  return translateInOper(rex_operator);
876  }
877  if (sql_op == kMINUS || sql_op == kPLUS) {
878  auto date_plus_minus = translateDatePlusMinus(rex_operator);
879  if (date_plus_minus) {
880  return date_plus_minus;
881  }
882  }
883  if (sql_op == kOVERLAPS) {
884  return translateOverlapsOper(rex_operator);
885  } else if (IS_COMPARISON(sql_op)) {
886  auto geo_comp = translateGeoComparison(rex_operator);
887  if (geo_comp) {
888  return geo_comp;
889  }
890  }
891  auto lhs = translateScalarRex(rex_operator->getOperand(0));
892  for (size_t i = 1; i < rex_operator->size(); ++i) {
893  std::shared_ptr<Analyzer::Expr> rhs;
894  SQLQualifier sql_qual{kONE};
895  const auto rhs_op = rex_operator->getOperand(i);
896  std::tie(rhs, sql_qual) = get_quantified_rhs(rhs_op, *this);
897  if (!rhs) {
898  rhs = translateScalarRex(rhs_op);
899  }
900  CHECK(rhs);
901  lhs = Parser::OperExpr::normalize(sql_op, sql_qual, lhs, rhs);
902  }
903  return lhs;
904 }
905 
906 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateOverlapsOper(
907  const RexOperator* rex_operator) const {
908  const auto sql_op = rex_operator->getOperator();
909  CHECK(sql_op == kOVERLAPS);
910 
911  const auto lhs = translateScalarRex(rex_operator->getOperand(0));
912  const auto lhs_ti = lhs->get_type_info();
913  if (lhs_ti.is_geometry()) {
914  return translateGeoOverlapsOper(rex_operator);
915  } else {
916  throw std::runtime_error(
917  "Overlaps equivalence is currently only supported for geospatial types");
918  }
919 }
920 
921 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateCase(
922  const RexCase* rex_case) const {
923  std::shared_ptr<Analyzer::Expr> else_expr;
924  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
925  expr_list;
926  for (size_t i = 0; i < rex_case->branchCount(); ++i) {
927  const auto when_expr = translateScalarRex(rex_case->getWhen(i));
928  const auto then_expr = translateScalarRex(rex_case->getThen(i));
929  expr_list.emplace_back(when_expr, then_expr);
930  }
931  if (rex_case->getElse()) {
932  else_expr = translateScalarRex(rex_case->getElse());
933  }
934  return Parser::CaseExpr::normalize(expr_list, else_expr);
935 }
936 
937 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateLike(
938  const RexFunctionOperator* rex_function) const {
939  CHECK(rex_function->size() == 2 || rex_function->size() == 3);
940  const auto arg = translateScalarRex(rex_function->getOperand(0));
941  const auto like = translateScalarRex(rex_function->getOperand(1));
942  if (!std::dynamic_pointer_cast<const Analyzer::Constant>(like)) {
943  throw std::runtime_error("The matching pattern must be a literal.");
944  }
945  const auto escape = (rex_function->size() == 3)
946  ? translateScalarRex(rex_function->getOperand(2))
947  : nullptr;
948  const bool is_ilike = rex_function->getName() == "PG_ILIKE"sv;
949  return Parser::LikeExpr::get(arg, like, escape, is_ilike, false);
950 }
951 
952 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateRegexp(
953  const RexFunctionOperator* rex_function) const {
954  CHECK(rex_function->size() == 2 || rex_function->size() == 3);
955  const auto arg = translateScalarRex(rex_function->getOperand(0));
956  const auto pattern = translateScalarRex(rex_function->getOperand(1));
957  if (!std::dynamic_pointer_cast<const Analyzer::Constant>(pattern)) {
958  throw std::runtime_error("The matching pattern must be a literal.");
959  }
960  const auto escape = (rex_function->size() == 3)
961  ? translateScalarRex(rex_function->getOperand(2))
962  : nullptr;
963  return Parser::RegexpExpr::get(arg, pattern, escape, false);
964 }
965 
966 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateLikely(
967  const RexFunctionOperator* rex_function) const {
968  CHECK(rex_function->size() == 1);
969  const auto arg = translateScalarRex(rex_function->getOperand(0));
970  return makeExpr<Analyzer::LikelihoodExpr>(arg, 0.9375);
971 }
972 
973 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateUnlikely(
974  const RexFunctionOperator* rex_function) const {
975  CHECK(rex_function->size() == 1);
976  const auto arg = translateScalarRex(rex_function->getOperand(0));
977  return makeExpr<Analyzer::LikelihoodExpr>(arg, 0.0625);
978 }
979 
980 namespace {
981 
983  const std::shared_ptr<Analyzer::Constant> literal_expr) {
984  if (!literal_expr || literal_expr->get_is_null()) {
985  throw std::runtime_error("The 'DatePart' argument must be a not 'null' literal.");
986  }
987 }
988 
989 } // namespace
990 
991 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateExtract(
992  const RexFunctionOperator* rex_function) const {
993  CHECK_EQ(size_t(2), rex_function->size());
994  const auto timeunit = translateScalarRex(rex_function->getOperand(0));
995  const auto timeunit_lit = std::dynamic_pointer_cast<Analyzer::Constant>(timeunit);
997  const auto from_expr = translateScalarRex(rex_function->getOperand(1));
998  const bool is_date_trunc = rex_function->getName() == "PG_DATE_TRUNC"sv;
999  if (is_date_trunc) {
1000  return DateTruncExpr::generate(from_expr, *timeunit_lit->get_constval().stringval);
1001  } else {
1002  return ExtractExpr::generate(from_expr, *timeunit_lit->get_constval().stringval);
1003  }
1004 }
1005 
1006 namespace {
1007 
1008 std::shared_ptr<Analyzer::Constant> makeNumericConstant(const SQLTypeInfo& ti,
1009  const long val) {
1010  CHECK(ti.is_number());
1011  Datum datum{0};
1012  switch (ti.get_type()) {
1013  case kTINYINT: {
1014  datum.tinyintval = val;
1015  break;
1016  }
1017  case kSMALLINT: {
1018  datum.smallintval = val;
1019  break;
1020  }
1021  case kINT: {
1022  datum.intval = val;
1023  break;
1024  }
1025  case kBIGINT: {
1026  datum.bigintval = val;
1027  break;
1028  }
1029  case kDECIMAL:
1030  case kNUMERIC: {
1031  datum.bigintval = val * exp_to_scale(ti.get_scale());
1032  break;
1033  }
1034  case kFLOAT: {
1035  datum.floatval = val;
1036  break;
1037  }
1038  case kDOUBLE: {
1039  datum.doubleval = val;
1040  break;
1041  }
1042  default:
1043  CHECK(false);
1044  }
1045  return makeExpr<Analyzer::Constant>(ti, false, datum);
1046 }
1047 
1048 } // namespace
1049 
1050 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateDateadd(
1051  const RexFunctionOperator* rex_function) const {
1052  CHECK_EQ(size_t(3), rex_function->size());
1053  const auto timeunit = translateScalarRex(rex_function->getOperand(0));
1054  const auto timeunit_lit = std::dynamic_pointer_cast<Analyzer::Constant>(timeunit);
1056  const auto number_units = translateScalarRex(rex_function->getOperand(1));
1057  const auto number_units_const =
1058  std::dynamic_pointer_cast<Analyzer::Constant>(number_units);
1059  if (number_units_const && number_units_const->get_is_null()) {
1060  throw std::runtime_error("The 'Interval' argument literal must not be 'null'.");
1061  }
1062  const auto cast_number_units = number_units->add_cast(SQLTypeInfo(kBIGINT, false));
1063  const auto datetime = translateScalarRex(rex_function->getOperand(2));
1064  const auto& datetime_ti = datetime->get_type_info();
1065  if (datetime_ti.get_type() == kTIME) {
1066  throw std::runtime_error("DateAdd operation not supported for TIME.");
1067  }
1068  const auto& field = to_dateadd_field(*timeunit_lit->get_constval().stringval);
1069  const int dim = datetime_ti.get_dimension();
1070  return makeExpr<Analyzer::DateaddExpr>(
1071  SQLTypeInfo(kTIMESTAMP, dim, 0, false), field, cast_number_units, datetime);
1072 }
1073 
1074 namespace {
1075 
1077  CHECK(op == kPLUS);
1078  return "DATETIME_PLUS"s;
1079 }
1080 
1081 } // namespace
1082 
1083 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateDatePlusMinus(
1084  const RexOperator* rex_operator) const {
1085  if (rex_operator->size() != 2) {
1086  return nullptr;
1087  }
1088  const auto datetime = translateScalarRex(rex_operator->getOperand(0));
1089  const auto datetime_ti = datetime->get_type_info();
1090  if (!datetime_ti.is_timestamp() && !datetime_ti.is_date()) {
1091  if (datetime_ti.get_type() == kTIME) {
1092  throw std::runtime_error("DateTime addition/subtraction not supported for TIME.");
1093  }
1094  return nullptr;
1095  }
1096  const auto rhs = translateScalarRex(rex_operator->getOperand(1));
1097  const auto rhs_ti = rhs->get_type_info();
1098  if (rhs_ti.get_type() == kTIMESTAMP || rhs_ti.get_type() == kDATE) {
1099  if (datetime_ti.is_high_precision_timestamp() ||
1100  rhs_ti.is_high_precision_timestamp()) {
1101  throw std::runtime_error(
1102  "High Precision timestamps are not supported for TIMESTAMPDIFF operation. "
1103  "Use "
1104  "DATEDIFF.");
1105  }
1106  auto bigint_ti = SQLTypeInfo(kBIGINT, false);
1107  const auto& rex_operator_ti = rex_operator->getType();
1108  const auto datediff_field =
1109  (rex_operator_ti.get_type() == kINTERVAL_DAY_TIME) ? dtSECOND : dtMONTH;
1110  auto result =
1111  makeExpr<Analyzer::DatediffExpr>(bigint_ti, datediff_field, rhs, datetime);
1112  // multiply 1000 to result since expected result should be in millisecond precision.
1113  if (rex_operator_ti.get_type() == kINTERVAL_DAY_TIME) {
1114  return makeExpr<Analyzer::BinOper>(bigint_ti.get_type(),
1115  kMULTIPLY,
1116  kONE,
1117  result,
1118  makeNumericConstant(bigint_ti, 1000));
1119  } else {
1120  return result;
1121  }
1122  }
1123  const auto op = rex_operator->getOperator();
1124  if (op == kPLUS) {
1125  std::vector<std::shared_ptr<Analyzer::Expr>> args = {datetime, rhs};
1126  auto dt_plus = makeExpr<Analyzer::FunctionOper>(
1127  datetime_ti, get_datetimeplus_rewrite_funcname(op), args);
1128  const auto date_trunc = rewrite_to_date_trunc(dt_plus.get());
1129  if (date_trunc) {
1130  return date_trunc;
1131  }
1132  }
1133  const auto interval = fold_expr(rhs.get());
1134  auto interval_ti = interval->get_type_info();
1135  auto bigint_ti = SQLTypeInfo(kBIGINT, false);
1136  const auto interval_lit = std::dynamic_pointer_cast<Analyzer::Constant>(interval);
1137  if (interval_ti.get_type() == kINTERVAL_DAY_TIME) {
1138  std::shared_ptr<Analyzer::Expr> interval_sec;
1139  if (interval_lit) {
1140  interval_sec =
1141  makeNumericConstant(bigint_ti,
1142  (op == kMINUS ? -interval_lit->get_constval().bigintval
1143  : interval_lit->get_constval().bigintval) /
1144  1000);
1145  } else {
1146  interval_sec = makeExpr<Analyzer::BinOper>(bigint_ti.get_type(),
1147  kDIVIDE,
1148  kONE,
1149  interval,
1150  makeNumericConstant(bigint_ti, 1000));
1151  if (op == kMINUS) {
1152  interval_sec =
1153  std::make_shared<Analyzer::UOper>(bigint_ti, false, kUMINUS, interval_sec);
1154  }
1155  }
1156  return makeExpr<Analyzer::DateaddExpr>(datetime_ti, daSECOND, interval_sec, datetime);
1157  }
1158  CHECK(interval_ti.get_type() == kINTERVAL_YEAR_MONTH);
1159  const auto interval_months = op == kMINUS ? std::make_shared<Analyzer::UOper>(
1160  bigint_ti, false, kUMINUS, interval)
1161  : interval;
1162  return makeExpr<Analyzer::DateaddExpr>(datetime_ti, daMONTH, interval_months, datetime);
1163 }
1164 
1165 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateDatediff(
1166  const RexFunctionOperator* rex_function) const {
1167  CHECK_EQ(size_t(3), rex_function->size());
1168  const auto timeunit = translateScalarRex(rex_function->getOperand(0));
1169  const auto timeunit_lit = std::dynamic_pointer_cast<Analyzer::Constant>(timeunit);
1171  const auto start = translateScalarRex(rex_function->getOperand(1));
1172  const auto end = translateScalarRex(rex_function->getOperand(2));
1173  const auto field = to_datediff_field(*timeunit_lit->get_constval().stringval);
1174  return makeExpr<Analyzer::DatediffExpr>(SQLTypeInfo(kBIGINT, false), field, start, end);
1175 }
1176 
1177 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateDatepart(
1178  const RexFunctionOperator* rex_function) const {
1179  CHECK_EQ(size_t(2), rex_function->size());
1180  const auto timeunit = translateScalarRex(rex_function->getOperand(0));
1181  const auto timeunit_lit = std::dynamic_pointer_cast<Analyzer::Constant>(timeunit);
1183  const auto from_expr = translateScalarRex(rex_function->getOperand(1));
1184  return ExtractExpr::generate(
1185  from_expr, to_datepart_field(*timeunit_lit->get_constval().stringval));
1186 }
1187 
1188 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateLength(
1189  const RexFunctionOperator* rex_function) const {
1190  CHECK_EQ(size_t(1), rex_function->size());
1191  const auto str_arg = translateScalarRex(rex_function->getOperand(0));
1192  return makeExpr<Analyzer::CharLengthExpr>(str_arg->decompress(),
1193  rex_function->getName() == "CHAR_LENGTH"sv);
1194 }
1195 
1196 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateKeyForString(
1197  const RexFunctionOperator* rex_function) const {
1198  const auto& args = translateFunctionArgs(rex_function);
1199  CHECK_EQ(size_t(1), args.size());
1200  const auto expr = dynamic_cast<Analyzer::Expr*>(args[0].get());
1201  if (nullptr == expr || !expr->get_type_info().is_string() ||
1202  expr->get_type_info().is_varlen()) {
1203  throw std::runtime_error(rex_function->getName() +
1204  " expects a dictionary encoded text column.");
1205  }
1206  return makeExpr<Analyzer::KeyForStringExpr>(args[0]);
1207 }
1208 
1209 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateSampleRatio(
1210  const RexFunctionOperator* rex_function) const {
1211  CHECK_EQ(size_t(1), rex_function->size());
1212  auto arg = translateScalarRex(rex_function->getOperand(0));
1213  const auto& arg_ti = arg->get_type_info();
1214  if (arg_ti.get_type() != kDOUBLE) {
1215  const auto& double_ti = SQLTypeInfo(kDOUBLE, arg_ti.get_notnull());
1216  arg = arg->add_cast(double_ti);
1217  }
1218  return makeExpr<Analyzer::SampleRatioExpr>(arg);
1219 }
1220 
1221 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateCurrentUser(
1222  const RexFunctionOperator* rex_function) const {
1223  std::string user{"SESSIONLESS_USER"};
1224  if (query_state_) {
1225  user = query_state_->getConstSessionInfo()->get_currentUser().userName;
1226  }
1227  return Parser::UserLiteral::get(user);
1228 }
1229 
1230 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateLower(
1231  const RexFunctionOperator* rex_function) const {
1232  const auto& args = translateFunctionArgs(rex_function);
1233  CHECK_EQ(size_t(1), args.size());
1234  CHECK(args[0]);
1235 
1236  if (args[0]->get_type_info().is_dict_encoded_string() ||
1237  dynamic_cast<Analyzer::Constant*>(args[0].get())) {
1238  return makeExpr<Analyzer::LowerExpr>(args[0]);
1239  }
1240 
1241  throw std::runtime_error(rex_function->getName() +
1242  " expects a dictionary encoded text column or a literal.");
1243 }
1244 
1246  const RexFunctionOperator* rex_function) const {
1247  const auto ret_ti = rex_function->getType();
1248  const auto arg = translateScalarRex(rex_function->getOperand(0));
1249  const auto arg_ti = arg->get_type_info();
1250  if (!arg_ti.is_array()) {
1251  throw std::runtime_error(rex_function->getName() + " expects an array expression.");
1252  }
1253  if (arg_ti.get_subtype() == kARRAY) {
1254  throw std::runtime_error(rex_function->getName() +
1255  " expects one-dimension array expression.");
1256  }
1257  const auto array_size = arg_ti.get_size();
1258  const auto array_elem_size = arg_ti.get_elem_type().get_array_context_logical_size();
1259 
1260  if (array_size > 0) {
1261  if (array_elem_size <= 0) {
1262  throw std::runtime_error(rex_function->getName() +
1263  ": unexpected array element type.");
1264  }
1265  // Return cardinality of a fixed length array
1266  return makeNumericConstant(ret_ti, array_size / array_elem_size);
1267  }
1268  // Variable length array cardinality will be calculated at runtime
1269  return makeExpr<Analyzer::CardinalityExpr>(arg);
1270 }
1271 
1272 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateItem(
1273  const RexFunctionOperator* rex_function) const {
1274  CHECK_EQ(size_t(2), rex_function->size());
1275  const auto base = translateScalarRex(rex_function->getOperand(0));
1276  const auto index = translateScalarRex(rex_function->getOperand(1));
1277  return makeExpr<Analyzer::BinOper>(
1278  base->get_type_info().get_elem_type(), false, kARRAY_AT, kONE, base, index);
1279 }
1280 
1281 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateCurrentDate() const {
1282  constexpr bool is_null = false;
1283  Datum datum;
1284  datum.bigintval = now_ - now_ % (24 * 60 * 60); // Assumes 0 < now_.
1285  return makeExpr<Analyzer::Constant>(kDATE, is_null, datum);
1286 }
1287 
1288 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateCurrentTime() const {
1289  constexpr bool is_null = false;
1290  Datum datum;
1291  datum.bigintval = now_ % (24 * 60 * 60); // Assumes 0 < now_.
1292  return makeExpr<Analyzer::Constant>(kTIME, is_null, datum);
1293 }
1294 
1295 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateCurrentTimestamp() const {
1297 }
1298 
1299 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateDatetime(
1300  const RexFunctionOperator* rex_function) const {
1301  CHECK_EQ(size_t(1), rex_function->size());
1302  const auto arg = translateScalarRex(rex_function->getOperand(0));
1303  const auto arg_lit = std::dynamic_pointer_cast<Analyzer::Constant>(arg);
1304  const std::string datetime_err{R"(Only DATETIME('NOW') supported for now.)"};
1305  if (!arg_lit || arg_lit->get_is_null()) {
1306  throw std::runtime_error(datetime_err);
1307  }
1308  CHECK(arg_lit->get_type_info().is_string());
1309  if (*arg_lit->get_constval().stringval != "NOW"sv) {
1310  throw std::runtime_error(datetime_err);
1311  }
1312  return translateCurrentTimestamp();
1313 }
1314 
1315 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateAbs(
1316  const RexFunctionOperator* rex_function) const {
1317  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
1318  expr_list;
1319  CHECK_EQ(size_t(1), rex_function->size());
1320  const auto operand = translateScalarRex(rex_function->getOperand(0));
1321  const auto& operand_ti = operand->get_type_info();
1322  CHECK(operand_ti.is_number());
1323  const auto zero = makeNumericConstant(operand_ti, 0);
1324  const auto lt_zero = makeExpr<Analyzer::BinOper>(kBOOLEAN, kLT, kONE, operand, zero);
1325  const auto uminus_operand =
1326  makeExpr<Analyzer::UOper>(operand_ti.get_type(), kUMINUS, operand);
1327  expr_list.emplace_back(lt_zero, uminus_operand);
1328  return makeExpr<Analyzer::CaseExpr>(operand_ti, false, expr_list, operand);
1329 }
1330 
1331 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateSign(
1332  const RexFunctionOperator* rex_function) const {
1333  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
1334  expr_list;
1335  CHECK_EQ(size_t(1), rex_function->size());
1336  const auto operand = translateScalarRex(rex_function->getOperand(0));
1337  const auto& operand_ti = operand->get_type_info();
1338  CHECK(operand_ti.is_number());
1339  const auto zero = makeNumericConstant(operand_ti, 0);
1340  const auto lt_zero = makeExpr<Analyzer::BinOper>(kBOOLEAN, kLT, kONE, operand, zero);
1341  expr_list.emplace_back(lt_zero, makeNumericConstant(operand_ti, -1));
1342  const auto eq_zero = makeExpr<Analyzer::BinOper>(kBOOLEAN, kEQ, kONE, operand, zero);
1343  expr_list.emplace_back(eq_zero, makeNumericConstant(operand_ti, 0));
1344  const auto gt_zero = makeExpr<Analyzer::BinOper>(kBOOLEAN, kGT, kONE, operand, zero);
1345  expr_list.emplace_back(gt_zero, makeNumericConstant(operand_ti, 1));
1346  return makeExpr<Analyzer::CaseExpr>(
1347  operand_ti,
1348  false,
1349  expr_list,
1350  makeExpr<Analyzer::Constant>(operand_ti, true, Datum{0}));
1351 }
1352 
1353 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateOffsetInFragment() const {
1354  return makeExpr<Analyzer::OffsetInFragment>();
1355 }
1356 
1358  const RexFunctionOperator* rex_function) const {
1359  if (rex_function->getType().get_subtype() == kNULLT) {
1360  auto sql_type = rex_function->getType();
1361  CHECK(sql_type.get_type() == kARRAY);
1362 
1363  // FIX-ME: Deal with NULL arrays
1364  auto translated_function_args(translateFunctionArgs(rex_function));
1365  if (translated_function_args.size() > 0) {
1366  const auto first_element_logical_type =
1367  get_nullable_logical_type_info(translated_function_args[0]->get_type_info());
1368 
1369  auto diff_elem_itr =
1370  std::find_if(translated_function_args.begin(),
1371  translated_function_args.end(),
1372  [first_element_logical_type](const auto expr) {
1373  return first_element_logical_type !=
1374  get_nullable_logical_type_info(expr->get_type_info());
1375  });
1376  if (diff_elem_itr != translated_function_args.end()) {
1377  throw std::runtime_error(
1378  "Element " +
1379  std::to_string(diff_elem_itr - translated_function_args.begin()) +
1380  " is not of the same type as other elements of the array. Consider casting "
1381  "to force this condition.\nElement Type: " +
1382  get_nullable_logical_type_info((*diff_elem_itr)->get_type_info())
1383  .to_string() +
1384  "\nArray type: " + first_element_logical_type.to_string());
1385  }
1386 
1387  if (first_element_logical_type.is_string() &&
1388  !first_element_logical_type.is_dict_encoded_string()) {
1389  sql_type.set_subtype(first_element_logical_type.get_type());
1390  sql_type.set_compression(kENCODING_FIXED);
1391  } else if (first_element_logical_type.is_dict_encoded_string()) {
1392  sql_type.set_subtype(first_element_logical_type.get_type());
1393  sql_type.set_comp_param(TRANSIENT_DICT_ID);
1394  } else {
1395  sql_type.set_subtype(first_element_logical_type.get_type());
1396  sql_type.set_scale(first_element_logical_type.get_scale());
1397  sql_type.set_precision(first_element_logical_type.get_precision());
1398  }
1399 
1400  return makeExpr<Analyzer::ArrayExpr>(sql_type, translated_function_args);
1401  } else {
1402  // defaulting to valid sub-type for convenience
1403  sql_type.set_subtype(kBOOLEAN);
1404  return makeExpr<Analyzer::ArrayExpr>(sql_type, translated_function_args);
1405  }
1406  } else {
1407  return makeExpr<Analyzer::ArrayExpr>(rex_function->getType(),
1408  translateFunctionArgs(rex_function));
1409  }
1410 }
1411 
1412 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateFunction(
1413  const RexFunctionOperator* rex_function) const {
1414  if (func_resolve(rex_function->getName(), "LIKE"sv, "PG_ILIKE"sv)) {
1415  return translateLike(rex_function);
1416  }
1417  if (rex_function->getName() == "REGEXP_LIKE"sv) {
1418  return translateRegexp(rex_function);
1419  }
1420  if (rex_function->getName() == "LIKELY"sv) {
1421  return translateLikely(rex_function);
1422  }
1423  if (rex_function->getName() == "UNLIKELY"sv) {
1424  return translateUnlikely(rex_function);
1425  }
1426  if (func_resolve(rex_function->getName(), "PG_EXTRACT"sv, "PG_DATE_TRUNC"sv)) {
1427  return translateExtract(rex_function);
1428  }
1429  if (rex_function->getName() == "DATEADD"sv) {
1430  return translateDateadd(rex_function);
1431  }
1432  if (rex_function->getName() == "DATEDIFF"sv) {
1433  return translateDatediff(rex_function);
1434  }
1435  if (rex_function->getName() == "DATEPART"sv) {
1436  return translateDatepart(rex_function);
1437  }
1438  if (func_resolve(rex_function->getName(), "LENGTH"sv, "CHAR_LENGTH"sv)) {
1439  return translateLength(rex_function);
1440  }
1441  if (rex_function->getName() == "KEY_FOR_STRING"sv) {
1442  return translateKeyForString(rex_function);
1443  }
1444  if (rex_function->getName() == "SAMPLE_RATIO"sv) {
1445  return translateSampleRatio(rex_function);
1446  }
1447  if (rex_function->getName() == "CURRENT_USER"sv) {
1448  return translateCurrentUser(rex_function);
1449  }
1450  if (g_enable_experimental_string_functions && rex_function->getName() == "LOWER"sv) {
1451  return translateLower(rex_function);
1452  }
1453  if (func_resolve(rex_function->getName(), "CARDINALITY"sv, "ARRAY_LENGTH"sv)) {
1454  return translateCardinality(rex_function);
1455  }
1456  if (rex_function->getName() == "ITEM"sv) {
1457  return translateItem(rex_function);
1458  }
1459  if (rex_function->getName() == "CURRENT_DATE"sv) {
1460  return translateCurrentDate();
1461  }
1462  if (rex_function->getName() == "CURRENT_TIME"sv) {
1463  return translateCurrentTime();
1464  }
1465  if (rex_function->getName() == "CURRENT_TIMESTAMP"sv) {
1466  return translateCurrentTimestamp();
1467  }
1468  if (rex_function->getName() == "NOW"sv) {
1469  return translateCurrentTimestamp();
1470  }
1471  if (rex_function->getName() == "DATETIME"sv) {
1472  return translateDatetime(rex_function);
1473  }
1474  if (func_resolve(rex_function->getName(), "usTIMESTAMP"sv, "nsTIMESTAMP"sv)) {
1475  return translateHPTLiteral(rex_function);
1476  }
1477  if (rex_function->getName() == "ABS"sv) {
1478  return translateAbs(rex_function);
1479  }
1480  if (rex_function->getName() == "SIGN"sv) {
1481  return translateSign(rex_function);
1482  }
1483  if (func_resolve(rex_function->getName(), "CEIL"sv, "FLOOR"sv)) {
1484  return makeExpr<Analyzer::FunctionOperWithCustomTypeHandling>(
1485  rex_function->getType(),
1486  rex_function->getName(),
1487  translateFunctionArgs(rex_function));
1488  } else if (rex_function->getName() == "ROUND"sv) {
1489  std::vector<std::shared_ptr<Analyzer::Expr>> args =
1490  translateFunctionArgs(rex_function);
1491 
1492  if (rex_function->size() == 1) {
1493  // push a 0 constant if 2nd operand is missing.
1494  // this needs to be done as calcite returns
1495  // only the 1st operand without defaulting the 2nd one
1496  // when the user did not specify the 2nd operand.
1497  SQLTypes t = kSMALLINT;
1498  Datum d;
1499  d.smallintval = 0;
1500  args.push_back(makeExpr<Analyzer::Constant>(t, false, d));
1501  }
1502 
1503  // make sure we have only 2 operands
1504  CHECK(args.size() == 2);
1505 
1506  if (!args[0]->get_type_info().is_number()) {
1507  throw std::runtime_error("Only numeric 1st operands are supported");
1508  }
1509 
1510  // the 2nd operand does not need to be a constant
1511  // it can happily reference another integer column
1512  if (!args[1]->get_type_info().is_integer()) {
1513  throw std::runtime_error("Only integer 2nd operands are supported");
1514  }
1515 
1516  // Calcite may upcast decimals in a way that is
1517  // incompatible with the extension function input. Play it safe and stick with the
1518  // argument type instead.
1519  const SQLTypeInfo ret_ti = args[0]->get_type_info().is_decimal()
1520  ? args[0]->get_type_info()
1521  : rex_function->getType();
1522 
1523  return makeExpr<Analyzer::FunctionOperWithCustomTypeHandling>(
1524  ret_ti, rex_function->getName(), args);
1525  }
1526  if (rex_function->getName() == "DATETIME_PLUS"sv) {
1527  auto dt_plus = makeExpr<Analyzer::FunctionOper>(rex_function->getType(),
1528  rex_function->getName(),
1529  translateFunctionArgs(rex_function));
1530  const auto date_trunc = rewrite_to_date_trunc(dt_plus.get());
1531  if (date_trunc) {
1532  return date_trunc;
1533  }
1534  return translateDateadd(rex_function);
1535  }
1536  if (rex_function->getName() == "/INT"sv) {
1537  CHECK_EQ(size_t(2), rex_function->size());
1538  std::shared_ptr<Analyzer::Expr> lhs = translateScalarRex(rex_function->getOperand(0));
1539  std::shared_ptr<Analyzer::Expr> rhs = translateScalarRex(rex_function->getOperand(1));
1540  const auto rhs_lit = std::dynamic_pointer_cast<Analyzer::Constant>(rhs);
1541  return Parser::OperExpr::normalize(kDIVIDE, kONE, lhs, rhs);
1542  }
1543  if (rex_function->getName() == "Reinterpret"sv) {
1544  CHECK_EQ(size_t(1), rex_function->size());
1545  return translateScalarRex(rex_function->getOperand(0));
1546  }
1547  if (func_resolve(rex_function->getName(),
1548  "ST_X"sv,
1549  "ST_Y"sv,
1550  "ST_XMin"sv,
1551  "ST_YMin"sv,
1552  "ST_XMax"sv,
1553  "ST_YMax"sv,
1554  "ST_NRings"sv,
1555  "ST_NPoints"sv,
1556  "ST_Length"sv,
1557  "ST_Perimeter"sv,
1558  "ST_Area"sv,
1559  "ST_SRID"sv,
1560  "MapD_GeoPolyBoundsPtr"sv /* deprecated */,
1561  "MapD_GeoPolyBoundsPtr"sv /* deprecated */,
1562  "OmniSci_Geo_PolyBoundsPtr"sv,
1563  "OmniSci_Geo_PolyRenderGroup"sv)) {
1564  CHECK_EQ(rex_function->size(), size_t(1));
1565  return translateUnaryGeoFunction(rex_function);
1566  }
1567  if (func_resolve(rex_function->getName(),
1568  "convert_meters_to_pixel_width"sv,
1569  "convert_meters_to_pixel_height"sv,
1570  "is_point_in_view"sv,
1571  "is_point_size_in_view"sv)) {
1572  return translateFunctionWithGeoArg(rex_function);
1573  }
1574  if (func_resolve(rex_function->getName(),
1575  "ST_Distance"sv,
1576  "ST_MaxDistance"sv,
1577  "ST_Intersects"sv,
1578  "ST_Disjoint"sv,
1579  "ST_Contains"sv,
1580  "ST_Overlaps"sv,
1581  "ST_Approx_Overlaps"sv,
1582  "ST_Within"sv)) {
1583  CHECK_EQ(rex_function->size(), size_t(2));
1584  return translateBinaryGeoFunction(rex_function);
1585  }
1586  if (func_resolve(rex_function->getName(), "ST_DWithin"sv, "ST_DFullyWithin"sv)) {
1587  CHECK_EQ(rex_function->size(), size_t(3));
1588  return translateTernaryGeoFunction(rex_function);
1589  }
1590  if (rex_function->getName() == "OFFSET_IN_FRAGMENT"sv) {
1591  CHECK_EQ(size_t(0), rex_function->size());
1592  return translateOffsetInFragment();
1593  }
1594  if (rex_function->getName() == "ARRAY"sv) {
1595  // Var args; currently no check. Possible fix-me -- can array have 0 elements?
1596  return translateArrayFunction(rex_function);
1597  }
1598  if (func_resolve(rex_function->getName(),
1599  "ST_GeomFromText"sv,
1600  "ST_GeogFromText"sv,
1601  "ST_Point"sv,
1602  "ST_Centroid"sv,
1603  "ST_SetSRID"sv)) {
1604  SQLTypeInfo ti;
1605  return translateGeoProjection(rex_function, ti, false);
1606  }
1607  if (func_resolve(rex_function->getName(),
1608  "ST_Intersection"sv,
1609  "ST_Difference"sv,
1610  "ST_Union"sv,
1611  "ST_Buffer"sv)) {
1612  SQLTypeInfo ti;
1613  return translateGeoBinaryConstructor(rex_function, ti, false);
1614  }
1615  if (func_resolve(rex_function->getName(), "ST_IsEmpty"sv, "ST_IsValid"sv)) {
1616  SQLTypeInfo ti;
1617  return translateGeoPredicate(rex_function, ti, false);
1618  }
1619 
1620  auto arg_expr_list = translateFunctionArgs(rex_function);
1621  if (rex_function->getName() == std::string("||") ||
1622  rex_function->getName() == std::string("SUBSTRING")) {
1623  SQLTypeInfo ret_ti(kTEXT, false);
1624  return makeExpr<Analyzer::FunctionOper>(
1625  ret_ti, rex_function->getName(), arg_expr_list);
1626  }
1627  // Reset possibly wrong return type of rex_function to the return
1628  // type of the optimal valid implementation. The return type can be
1629  // wrong in the case of multiple implementations of UDF functions
1630  // that have different return types but Calcite specifies the return
1631  // type according to the first implementation.
1632  SQLTypeInfo ret_ti;
1633  try {
1634  auto ext_func_sig = bind_function(rex_function->getName(), arg_expr_list);
1635 
1636  auto ext_func_args = ext_func_sig.getArgs();
1637  CHECK_EQ(arg_expr_list.size(), ext_func_args.size());
1638  for (size_t i = 0; i < arg_expr_list.size(); i++) {
1639  // fold casts on constants
1640  if (auto constant =
1641  std::dynamic_pointer_cast<Analyzer::Constant>(arg_expr_list[i])) {
1642  auto ext_func_arg_ti = ext_arg_type_to_type_info(ext_func_args[i]);
1643  if (ext_func_arg_ti != arg_expr_list[i]->get_type_info()) {
1644  arg_expr_list[i] = constant->add_cast(ext_func_arg_ti);
1645  }
1646  }
1647  }
1648 
1649  ret_ti = ext_arg_type_to_type_info(ext_func_sig.getRet());
1650  } catch (ExtensionFunctionBindingError& e) {
1651  LOG(WARNING) << "RelAlgTranslator::translateFunction: " << e.what();
1652  throw;
1653  }
1654 
1655  // By default, the extension function type will not allow nulls. If one of the arguments
1656  // is nullable, the extension function must also explicitly allow nulls.
1657  bool arguments_not_null = true;
1658  for (const auto& arg_expr : arg_expr_list) {
1659  if (!arg_expr->get_type_info().get_notnull()) {
1660  arguments_not_null = false;
1661  break;
1662  }
1663  }
1664  ret_ti.set_notnull(arguments_not_null);
1665 
1666  return makeExpr<Analyzer::FunctionOper>(ret_ti, rex_function->getName(), arg_expr_list);
1667 }
1668 
1669 namespace {
1670 
1671 std::vector<Analyzer::OrderEntry> translate_collation(
1672  const std::vector<SortField>& sort_fields) {
1673  std::vector<Analyzer::OrderEntry> collation;
1674  for (size_t i = 0; i < sort_fields.size(); ++i) {
1675  const auto& sort_field = sort_fields[i];
1676  collation.emplace_back(i,
1677  sort_field.getSortDir() == SortDirection::Descending,
1678  sort_field.getNullsPosition() == NullSortedPosition::First);
1679  }
1680  return collation;
1681 }
1682 
1684  const RexWindowFunctionOperator::RexWindowBound& window_bound) {
1685  return window_bound.unbounded && window_bound.preceding && !window_bound.following &&
1686  !window_bound.is_current_row && !window_bound.offset &&
1687  window_bound.order_key == 0;
1688 }
1689 
1690 bool supported_upper_bound(const RexWindowFunctionOperator* rex_window_function) {
1691  const auto& window_bound = rex_window_function->getUpperBound();
1692  const bool to_current_row = !window_bound.unbounded && !window_bound.preceding &&
1693  !window_bound.following && window_bound.is_current_row &&
1694  !window_bound.offset && window_bound.order_key == 1;
1695  switch (rex_window_function->getKind()) {
1700  return to_current_row;
1701  }
1702  default: {
1703  return rex_window_function->getOrderKeys().empty()
1704  ? (window_bound.unbounded && !window_bound.preceding &&
1705  window_bound.following && !window_bound.is_current_row &&
1706  !window_bound.offset && window_bound.order_key == 2)
1707  : to_current_row;
1708  }
1709  }
1710 }
1711 
1712 } // namespace
1713 
1714 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateWindowFunction(
1715  const RexWindowFunctionOperator* rex_window_function) const {
1716  if (!supported_lower_bound(rex_window_function->getLowerBound()) ||
1717  !supported_upper_bound(rex_window_function) ||
1718  ((rex_window_function->getKind() == SqlWindowFunctionKind::ROW_NUMBER) !=
1719  rex_window_function->isRows())) {
1720  throw std::runtime_error("Frame specification not supported");
1721  }
1722  std::vector<std::shared_ptr<Analyzer::Expr>> args;
1723  for (size_t i = 0; i < rex_window_function->size(); ++i) {
1724  args.push_back(translateScalarRex(rex_window_function->getOperand(i)));
1725  }
1726  std::vector<std::shared_ptr<Analyzer::Expr>> partition_keys;
1727  for (const auto& partition_key : rex_window_function->getPartitionKeys()) {
1728  partition_keys.push_back(translateScalarRex(partition_key.get()));
1729  }
1730  std::vector<std::shared_ptr<Analyzer::Expr>> order_keys;
1731  for (const auto& order_key : rex_window_function->getOrderKeys()) {
1732  order_keys.push_back(translateScalarRex(order_key.get()));
1733  }
1734  auto ti = rex_window_function->getType();
1735  if (window_function_is_value(rex_window_function->getKind())) {
1736  CHECK_GE(args.size(), 1u);
1737  ti = args.front()->get_type_info();
1738  }
1739  return makeExpr<Analyzer::WindowFunction>(
1740  ti,
1741  rex_window_function->getKind(),
1742  args,
1743  partition_keys,
1744  order_keys,
1745  translate_collation(rex_window_function->getCollation()));
1746 }
1747 
1749  const RexFunctionOperator* rex_function) const {
1750  std::vector<std::shared_ptr<Analyzer::Expr>> args;
1751  for (size_t i = 0; i < rex_function->size(); ++i) {
1752  args.push_back(translateScalarRex(rex_function->getOperand(i)));
1753  }
1754  return args;
1755 }
1756 
1758  const std::shared_ptr<Analyzer::Expr> qual_expr) {
1759  CHECK(qual_expr);
1760  auto bin_oper = std::dynamic_pointer_cast<const Analyzer::BinOper>(qual_expr);
1761  if (!bin_oper) {
1762  const auto rewritten_qual_expr = rewrite_expr(qual_expr.get());
1763  return {{}, {rewritten_qual_expr ? rewritten_qual_expr : qual_expr}};
1764  }
1765 
1766  if (bin_oper->get_optype() == kAND) {
1767  const auto lhs_cf = qual_to_conjunctive_form(bin_oper->get_own_left_operand());
1768  const auto rhs_cf = qual_to_conjunctive_form(bin_oper->get_own_right_operand());
1769  auto simple_quals = lhs_cf.simple_quals;
1770  simple_quals.insert(
1771  simple_quals.end(), rhs_cf.simple_quals.begin(), rhs_cf.simple_quals.end());
1772  auto quals = lhs_cf.quals;
1773  quals.insert(quals.end(), rhs_cf.quals.begin(), rhs_cf.quals.end());
1774  return {simple_quals, quals};
1775  }
1776  int rte_idx{0};
1777  const auto simple_qual = bin_oper->normalize_simple_predicate(rte_idx);
1778  return simple_qual ? QualsConjunctiveForm{{simple_qual}, {}}
1779  : QualsConjunctiveForm{{}, {qual_expr}};
1780 }
1781 
1782 std::vector<std::shared_ptr<Analyzer::Expr>> qual_to_disjunctive_form(
1783  const std::shared_ptr<Analyzer::Expr>& qual_expr) {
1784  CHECK(qual_expr);
1785  const auto bin_oper = std::dynamic_pointer_cast<const Analyzer::BinOper>(qual_expr);
1786  if (!bin_oper) {
1787  const auto rewritten_qual_expr = rewrite_expr(qual_expr.get());
1788  return {rewritten_qual_expr ? rewritten_qual_expr : qual_expr};
1789  }
1790  if (bin_oper->get_optype() == kOR) {
1791  const auto lhs_df = qual_to_disjunctive_form(bin_oper->get_own_left_operand());
1792  const auto rhs_df = qual_to_disjunctive_form(bin_oper->get_own_right_operand());
1793  auto quals = lhs_df;
1794  quals.insert(quals.end(), rhs_df.begin(), rhs_df.end());
1795  return quals;
1796  }
1797  return {qual_expr};
1798 }
1799 
1800 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateHPTLiteral(
1801  const RexFunctionOperator* rex_function) const {
1802  /* since calcite uses Avatica package called DateTimeUtils to parse timestamp strings.
1803  Therefore any string having fractional seconds more 3 places after the decimal
1804  (milliseconds) will get truncated to 3 decimal places, therefore we lose precision
1805  (us|ns). Issue: [BE-2461] Here we are hijacking literal cast to Timestamp(6|9) from
1806  calcite and translating them to generate our own casts.
1807  */
1808  CHECK_EQ(size_t(1), rex_function->size());
1809  const auto operand = translateScalarRex(rex_function->getOperand(0));
1810  const auto& operand_ti = operand->get_type_info();
1811  const auto& target_ti = rex_function->getType();
1812  if (!operand_ti.is_string()) {
1813  throw std::runtime_error(
1814  "High precision timestamp cast argument must be a string. Input type is: " +
1815  operand_ti.get_type_name());
1816  } else if (!target_ti.is_high_precision_timestamp()) {
1817  throw std::runtime_error(
1818  "Cast target type should be high precision timestamp. Input type is: " +
1819  target_ti.get_type_name());
1820  } else if (target_ti.get_dimension() != 6 && target_ti.get_dimension() != 9) {
1821  throw std::runtime_error(
1822  "Cast target type should be TIMESTAMP(6|9). Input type is: TIMESTAMP(" +
1823  std::to_string(target_ti.get_dimension()) + ")");
1824  } else {
1825  return operand->add_cast(target_ti);
1826  }
1827 }
Defines data structures for the semantic analysis phase of query processing.
Definition: sqldefs.h:69
const RexScalar * getThen(const size_t idx) const
const std::vector< JoinType > join_types_
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:315
SQLAgg
Definition: sqldefs.h:71
#define CHECK_EQ(x, y)
Definition: Logger.h:211
auto func_resolve
std::shared_ptr< Analyzer::Expr > translateOffsetInFragment() const
static std::shared_ptr< Analyzer::Expr > get(const std::string &)
Definition: ParserNode.cpp:208
bool g_enable_watchdog
std::shared_ptr< Analyzer::Expr > translateCurrentTimestamp() const
SQLAgg getKind() const
Definition: sqltypes.h:48
std::shared_ptr< Analyzer::Expr > translateRegexp(const RexFunctionOperator *) const
bool supported_lower_bound(const RexWindowFunctionOperator::RexWindowBound &window_bound)
SQLTypes
Definition: sqltypes.h:37
size_t getOperand(size_t idx) const
const Executor * executor_
std::shared_ptr< Analyzer::Expr > translateUnlikely(const RexFunctionOperator *) const
tuple d
Definition: test_fsi.py:9
const RexScalar * getElse() const
SQLQualifier
Definition: sqldefs.h:69
static std::shared_ptr< Analyzer::Expr > analyzeValue(const int64_t intval)
Definition: ParserNode.cpp:133
std::shared_ptr< Analyzer::Expr > translateFunction(const RexFunctionOperator *) const
SQLTypeInfo get_nullable_logical_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:932
#define LOG(tag)
Definition: Logger.h:194
std::shared_ptr< Analyzer::Expr > translateScalarRex(const RexScalar *rex) const
const SQLTypeInfo & getType() const
bool boolval
Definition: sqltypes.h:205
size_t size() const
const RexScalar * getOperand(const size_t idx) const
std::shared_ptr< Analyzer::Expr > translateUoper(const RexOperator *) const
HOST DEVICE int get_scale() const
Definition: sqltypes.h:319
const std::vector< SortField > & getCollation() const
SQLOps
Definition: sqldefs.h:29
std::shared_ptr< Analyzer::Expr > translateDateadd(const RexFunctionOperator *) const
Definition: sqldefs.h:38
#define CHECK_GE(x, y)
Definition: Logger.h:216
std::shared_ptr< Analyzer::Expr > translateAbs(const RexFunctionOperator *) const
Definition: sqldefs.h:49
Definition: sqldefs.h:30
const RexScalar * getWhen(const size_t idx) const
std::shared_ptr< Analyzer::Expr > ExpressionPtr
Definition: Analyzer.h:180
std::string getString(int32_t string_id) const
std::shared_ptr< Analyzer::Expr > getInIntegerSetExpr(std::shared_ptr< Analyzer::Expr > arg, const ResultSet &val_set) const
Definition: sqldefs.h:41
SQLTypeInfo get_agg_type(const SQLAgg agg_kind, const Analyzer::Expr *arg_expr)
std::shared_ptr< Analyzer::Expr > translateItem(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Constant > makeNumericConstant(const SQLTypeInfo &ti, const long val)
Analyzer::ExpressionPtr rewrite_expr(const Analyzer::Expr *expr)
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:314
QualsConjunctiveForm qual_to_conjunctive_form(const std::shared_ptr< Analyzer::Expr > qual_expr)
bool is_number() const
Definition: sqltypes.h:494
#define CHECK_GT(x, y)
Definition: Logger.h:215
bool is_agg_supported_for_type(const SQLAgg &agg_kind, const SQLTypeInfo &arg_ti)
std::shared_ptr< Analyzer::Expr > translateGeoProjection(const RexFunctionOperator *, SQLTypeInfo &, const bool with_bounds) const
double inline_fp_null_val(const SQL_TYPE_INFO &ti)
bool is_time() const
Definition: sqltypes.h:495
std::shared_ptr< Analyzer::Expr > translateOper(const RexOperator *) const
std::string to_string(char const *&&v)
std::shared_ptr< Analyzer::Expr > translateDatediff(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Expr > translateInput(const RexInput *) const
std::shared_ptr< Analyzer::Expr > translateSign(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Expr > translateUnaryGeoFunction(const RexFunctionOperator *) const
static std::shared_ptr< Analyzer::Expr > normalize(const std::list< std::pair< std::shared_ptr< Analyzer::Expr >, std::shared_ptr< Analyzer::Expr >>> &, const std::shared_ptr< Analyzer::Expr >)
Definition: ParserNode.cpp:934
std::shared_ptr< Analyzer::Expr > translateGeoOverlapsOper(const RexOperator *) const
Definition: sqldefs.h:73
std::shared_ptr< Analyzer::Expr > translateLower(const RexFunctionOperator *) const
ExtractField to_datepart_field(const std::string &field)
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:31
unsigned getIndex() const
static std::shared_ptr< Analyzer::Expr > translateLiteral(const RexLiteral *)
SQLOps getOperator() const
bool window_function_is_value(const SqlWindowFunctionKind kind)
Definition: WindowContext.h:27
static constexpr int32_t INVALID_STR_ID
CONSTEXPR DEVICE bool is_null(const T &value)
Classes representing a parse tree.
std::shared_ptr< Analyzer::Expr > translateDatetime(const RexFunctionOperator *) const
void fill_dictionary_encoded_in_vals(std::vector< int64_t > &in_vals, std::atomic< size_t > &total_in_vals_count, const ResultSet *values_rowset, const std::pair< int64_t, int64_t > values_rowset_slice, const StringDictionaryProxy *source_dict, const StringDictionaryProxy *dest_dict, const int64_t needle_null_val)
#define CHECK_NE(x, y)
Definition: Logger.h:212
const std::shared_ptr< Analyzer::Expr > generate() const
static std::shared_ptr< Analyzer::Expr > get(std::shared_ptr< Analyzer::Expr > arg_expr, std::shared_ptr< Analyzer::Expr > pattern_expr, std::shared_ptr< Analyzer::Expr > escape_expr, const bool is_not)
Definition: ParserNode.cpp:654
void set_scale(int s)
Definition: sqltypes.h:409
int64_t bigintval
Definition: sqltypes.h:209
std::shared_ptr< const RexScalar > offset
std::vector< Analyzer::OrderEntry > translate_collation(const std::vector< SortField > &sort_fields)
size_t branchCount() const
std::shared_ptr< Analyzer::Expr > translateCurrentTime() const
Definition: sqldefs.h:37
Definition: sqldefs.h:75
Definition: sqldefs.h:69
int getDatabaseId() const
Definition: Catalog.h:277
int16_t smallintval
Definition: sqltypes.h:207
SQLTypeInfo build_type_info(const SQLTypes sql_type, const int scale, const int precision)
static std::shared_ptr< Analyzer::Expr > analyzeValue(const std::string &)
Definition: ParserNode.cpp:118
DatetruncField to_datediff_field(const std::string &field)
void translate_string_ids(std::vector< int32_t > &dest_ids, const LeafHostInfo &dict_server_host, const DictRef dest_dict_ref, const std::vector< int32_t > &source_ids, const DictRef source_dict_ref, const int32_t dest_generation)
bool is_boolean() const
Definition: sqltypes.h:496
const std::vector< LeafHostInfo > & getStringDictionaryHosts() const
Definition: Catalog.cpp:1543
std::tuple< T, std::vector< SQLTypeInfo > > bind_function(std::string name, Analyzer::ExpressionPtrVector func_args, const std::vector< T > &ext_funcs, const std::string processor)
const ColumnDescriptor * getMetadataForColumnBySpi(const int tableId, const size_t spi) const
Definition: Catalog.cpp:1598
const std::unordered_map< const RelAlgNode *, int > input_to_nest_level_
#define UNLIKELY(x)
Definition: likely.h:25
static std::shared_ptr< Analyzer::Expr > normalize(const SQLOps optype, const SQLQualifier qual, std::shared_ptr< Analyzer::Expr > left_expr, std::shared_ptr< Analyzer::Expr > right_expr)
Definition: ParserNode.cpp:284
std::pair< std::shared_ptr< Analyzer::Expr >, SQLQualifier > get_quantified_rhs(const RexScalar *rex_scalar, const RelAlgTranslator &translator)
Definition: sqldefs.h:34
#define CHECK_LT(x, y)
Definition: Logger.h:213
Definition: sqltypes.h:51
Definition: sqltypes.h:52
Analyzer::ExpressionPtrVector translateFunctionArgs(const RexFunctionOperator *) const
Definition: sqldefs.h:40
Definition: sqldefs.h:69
const ConstRexScalarPtrVector & getPartitionKeys() const
#define TRANSIENT_DICT_ID
Definition: sqltypes.h:253
const RexWindowBound & getLowerBound() const
#define CHECK_LE(x, y)
Definition: Logger.h:214
std::shared_ptr< Analyzer::Expr > translateOverlapsOper(const RexOperator *) const
std::shared_ptr< Analyzer::Expr > translateArrayFunction(const RexFunctionOperator *) const
static std::shared_ptr< Analyzer::Expr > get(std::shared_ptr< Analyzer::Expr > arg_expr, std::shared_ptr< Analyzer::Expr > like_expr, std::shared_ptr< Analyzer::Expr > escape_expr, const bool is_ilike, const bool is_not)
Definition: ParserNode.cpp:559
std::shared_ptr< Analyzer::Expr > translateCurrentUser(const RexFunctionOperator *) const
bool g_enable_experimental_string_functions
std::shared_ptr< Analyzer::Expr > translateSampleRatio(const RexFunctionOperator *) const
SqlWindowFunctionKind getKind() const
std::shared_ptr< Analyzer::Expr > translateLike(const RexFunctionOperator *) const
const RelAlgNode * getSourceNode() const
std::shared_ptr< Analyzer::Expr > translateGeoBinaryConstructor(const RexFunctionOperator *, SQLTypeInfo &, const bool with_bounds) const
Definition: sqltypes.h:40
bool supported_upper_bound(const RexWindowFunctionOperator *rex_window_function)
bool takes_arg(const TargetInfo &target_info)
static std::shared_ptr< Analyzer::Expr > analyzeValue(const int64_t numericval, const int scale, const int precision)
Definition: ParserNode.cpp:158
std::shared_ptr< Analyzer::Expr > translateLikely(const RexFunctionOperator *) const
const RexWindowBound & getUpperBound() const
Definition: sqldefs.h:53
std::shared_ptr< Analyzer::Expr > get_in_values_expr(std::shared_ptr< Analyzer::Expr > arg, const ResultSet &val_set)
static std::shared_ptr< Analyzer::Expr > get(const int64_t)
Definition: ParserNode.cpp:194
bool isDistinct() const
void set_notnull(bool n)
Definition: sqltypes.h:411
#define CHECK(condition)
Definition: Logger.h:203
std::shared_ptr< Analyzer::Expr > translateTernaryGeoFunction(const RexFunctionOperator *) const
const ConstRexScalarPtrVector & getOrderKeys() const
char * t
std::vector< std::shared_ptr< Analyzer::Expr > > qual_to_disjunctive_form(const std::shared_ptr< Analyzer::Expr > &qual_expr)
std::shared_ptr< Analyzer::Expr > translateBinaryGeoFunction(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Constant > make_fp_constant(const int64_t val, const SQLTypeInfo &ti)
std::pair< Datum, bool > datum_from_scalar_tv(const ScalarTargetValue *scalar_tv, const SQLTypeInfo &ti) noexcept
std::shared_ptr< Analyzer::Expr > translateInOper(const RexOperator *) const
uint64_t exp_to_scale(const unsigned exp)
size_t size() const
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
std::vector< ExpressionPtr > ExpressionPtrVector
Definition: Analyzer.h:182
bool g_cluster
Definition: sqldefs.h:33
std::shared_ptr< Analyzer::Expr > translateCase(const RexCase *) const
std::shared_ptr< Analyzer::Expr > translateFunctionWithGeoArg(const RexFunctionOperator *) const
Definition: sqltypes.h:44
std::shared_ptr< const query_state::QueryState > query_state_
const std::string & getName() const
std::shared_ptr< Analyzer::Expr > translateCurrentDate() const
std::string get_datetimeplus_rewrite_funcname(const SQLOps &op)
void validate_datetime_datepart_argument(const std::shared_ptr< Analyzer::Constant > literal_expr)
std::shared_ptr< Analyzer::Expr > translateCardinality(const RexFunctionOperator *) const
Definition: sqldefs.h:74
int cpu_threads()
Definition: thread_count.h:24
const bool just_explain_
const std::vector< TargetMetaInfo > & getOutputMetainfo() const
bool is_decimal() const
Definition: sqltypes.h:492
std::shared_ptr< Analyzer::Expr > translateGeoComparison(const RexOperator *) const
std::shared_ptr< Analyzer::Expr > translateDatePlusMinus(const RexOperator *) const
const std::list< std::shared_ptr< Analyzer::Expr > > simple_quals
std::shared_ptr< Analyzer::Expr > translateHPTLiteral(const RexFunctionOperator *) const
Definition: sqldefs.h:72
bool is_distinct(const size_t input_idx, const RelAlgNode *node)
int32_t getIdOfString(const std::string &str) const
std::shared_ptr< Analyzer::Expr > translateDatepart(const RexFunctionOperator *) const
bool can_use_parallel_algorithms(const ResultSet &rows)
Definition: ResultSet.cpp:1118
Definition: sqldefs.h:39
std::shared_ptr< Analyzer::Expr > rewrite_to_date_trunc(const Analyzer::FunctionOper *dt_plus)
SQLTypeInfo ext_arg_type_to_type_info(const ExtArgumentType ext_arg_type)
DateaddField to_dateadd_field(const std::string &field)
std::shared_ptr< Analyzer::Expr > fold_expr(const Analyzer::Expr *expr)
void set_precision(int d)
Definition: sqltypes.h:407
#define IS_COMPARISON(X)
Definition: sqldefs.h:57
double doubleval
Definition: sqltypes.h:211
void fill_integer_in_vals(std::vector< int64_t > &in_vals, std::atomic< size_t > &total_in_vals_count, const ResultSet *values_rowset, const std::pair< int64_t, int64_t > values_rowset_slice)
const Catalog_Namespace::Catalog & cat_
std::shared_ptr< Analyzer::Expr > translateKeyForString(const RexFunctionOperator *) const
static std::shared_ptr< Analyzer::Expr > translateAggregateRex(const RexAgg *rex, const std::vector< std::shared_ptr< Analyzer::Expr >> &scalar_sources)
std::shared_ptr< Analyzer::Expr > translateWindowFunction(const RexWindowFunctionOperator *) const
const std::shared_ptr< Analyzer::Expr > generate() const
std::shared_ptr< Analyzer::Expr > translateScalarSubquery(const RexSubQuery *) const
std::shared_ptr< Analyzer::Expr > translateGeoPredicate(const RexFunctionOperator *, SQLTypeInfo &, const bool with_bounds) const
boost::variant< int64_t, double, float, NullableString > ScalarTargetValue
Definition: TargetValue.h:156
std::shared_ptr< Analyzer::Expr > translateLength(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Expr > translateExtract(const RexFunctionOperator *) const