OmniSciDB  8a228a1076
RelAlgTranslator.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "RelAlgTranslator.h"
18 #include "Shared/SqlTypesLayout.h"
19 
21 #include "DateTimePlusRewrite.h"
22 #include "DateTimeTranslator.h"
24 #include "ExpressionRewrite.h"
27 #include "RelAlgDagBuilder.h"
28 #include "WindowContext.h"
29 
30 #include <future>
31 
32 #include "../Analyzer/Analyzer.h"
33 #include "../Parser/ParserNode.h"
34 #include "../Shared/likely.h"
35 #include "../Shared/sql_type_to_string.h"
36 #include "../Shared/thread_count.h"
37 
38 extern bool g_enable_watchdog;
39 
41 
42 namespace {
43 
45  const int scale,
46  const int precision) {
47  SQLTypeInfo ti(sql_type, 0, 0, true);
48  if (ti.is_decimal()) {
49  ti.set_scale(scale);
50  ti.set_precision(precision);
51  }
52  return ti;
53 }
54 
55 std::pair<std::shared_ptr<Analyzer::Expr>, SQLQualifier> get_quantified_rhs(
56  const RexScalar* rex_scalar,
57  const RelAlgTranslator& translator) {
58  std::shared_ptr<Analyzer::Expr> rhs;
59  SQLQualifier sql_qual{kONE};
60  const auto rex_operator = dynamic_cast<const RexOperator*>(rex_scalar);
61  if (!rex_operator) {
62  return std::make_pair(rhs, sql_qual);
63  }
64  const auto rex_function = dynamic_cast<const RexFunctionOperator*>(rex_operator);
65  const auto qual_str = rex_function ? rex_function->getName() : "";
66  if (qual_str == "PG_ANY"sv || qual_str == "PG_ALL"sv) {
67  CHECK_EQ(size_t(1), rex_function->size());
68  rhs = translator.translateScalarRex(rex_function->getOperand(0));
69  sql_qual = (qual_str == "PG_ANY"sv) ? kANY : kALL;
70  }
71  if (!rhs && rex_operator->getOperator() == kCAST) {
72  CHECK_EQ(size_t(1), rex_operator->size());
73  std::tie(rhs, sql_qual) = get_quantified_rhs(rex_operator->getOperand(0), translator);
74  }
75  return std::make_pair(rhs, sql_qual);
76 }
77 
78 std::pair<Datum, bool> datum_from_scalar_tv(const ScalarTargetValue* scalar_tv,
79  const SQLTypeInfo& ti) noexcept {
80  Datum d{0};
81  bool is_null_const{false};
82  switch (ti.get_type()) {
83  case kBOOLEAN: {
84  const auto ival = boost::get<int64_t>(scalar_tv);
85  CHECK(ival);
86  if (*ival == inline_int_null_val(ti)) {
87  is_null_const = true;
88  } else {
89  d.boolval = *ival;
90  }
91  break;
92  }
93  case kTINYINT: {
94  const auto ival = boost::get<int64_t>(scalar_tv);
95  CHECK(ival);
96  if (*ival == inline_int_null_val(ti)) {
97  is_null_const = true;
98  } else {
99  d.tinyintval = *ival;
100  }
101  break;
102  }
103  case kSMALLINT: {
104  const auto ival = boost::get<int64_t>(scalar_tv);
105  CHECK(ival);
106  if (*ival == inline_int_null_val(ti)) {
107  is_null_const = true;
108  } else {
109  d.smallintval = *ival;
110  }
111  break;
112  }
113  case kINT: {
114  const auto ival = boost::get<int64_t>(scalar_tv);
115  CHECK(ival);
116  if (*ival == inline_int_null_val(ti)) {
117  is_null_const = true;
118  } else {
119  d.intval = *ival;
120  }
121  break;
122  }
123  case kDECIMAL:
124  case kNUMERIC:
125  case kBIGINT:
126  case kDATE:
127  case kTIME:
128  case kTIMESTAMP: {
129  const auto ival = boost::get<int64_t>(scalar_tv);
130  CHECK(ival);
131  if (*ival == inline_int_null_val(ti)) {
132  is_null_const = true;
133  } else {
134  d.bigintval = *ival;
135  }
136  break;
137  }
138  case kDOUBLE: {
139  const auto dval = boost::get<double>(scalar_tv);
140  CHECK(dval);
141  if (*dval == inline_fp_null_val(ti)) {
142  is_null_const = true;
143  } else {
144  d.doubleval = *dval;
145  }
146  break;
147  }
148  case kFLOAT: {
149  const auto fval = boost::get<float>(scalar_tv);
150  CHECK(fval);
151  if (*fval == inline_fp_null_val(ti)) {
152  is_null_const = true;
153  } else {
154  d.floatval = *fval;
155  }
156  break;
157  }
158  case kTEXT:
159  case kVARCHAR:
160  case kCHAR: {
161  auto nullable_sptr = boost::get<NullableString>(scalar_tv);
162  CHECK(nullable_sptr);
163  if (boost::get<void*>(nullable_sptr)) {
164  is_null_const = true;
165  } else {
166  auto sptr = boost::get<std::string>(nullable_sptr);
167  d.stringval = new std::string(*sptr);
168  }
169  break;
170  }
171  default:
172  CHECK(false) << "Unhandled type: " << ti.get_type_name();
173  }
174  return {d, is_null_const};
175 }
176 
177 } // namespace
178 
179 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateScalarRex(
180  const RexScalar* rex) const {
181  const auto rex_input = dynamic_cast<const RexInput*>(rex);
182  if (rex_input) {
183  return translateInput(rex_input);
184  }
185  const auto rex_literal = dynamic_cast<const RexLiteral*>(rex);
186  if (rex_literal) {
187  return translateLiteral(rex_literal);
188  }
189  const auto rex_window_function = dynamic_cast<const RexWindowFunctionOperator*>(rex);
190  if (rex_window_function) {
191  return translateWindowFunction(rex_window_function);
192  }
193  const auto rex_function = dynamic_cast<const RexFunctionOperator*>(rex);
194  if (rex_function) {
195  return translateFunction(rex_function);
196  }
197  const auto rex_operator = dynamic_cast<const RexOperator*>(rex);
198  if (rex_operator) {
199  return translateOper(rex_operator);
200  }
201  const auto rex_case = dynamic_cast<const RexCase*>(rex);
202  if (rex_case) {
203  return translateCase(rex_case);
204  }
205  const auto rex_subquery = dynamic_cast<const RexSubQuery*>(rex);
206  if (rex_subquery) {
207  return translateScalarSubquery(rex_subquery);
208  }
209  CHECK(false);
210  return nullptr;
211 }
212 
213 namespace {
214 
215 bool is_agg_supported_for_type(const SQLAgg& agg_kind, const SQLTypeInfo& arg_ti) {
216  if ((agg_kind == kMIN || agg_kind == kMAX || agg_kind == kSUM || agg_kind == kAVG) &&
217  !(arg_ti.is_number() || arg_ti.is_boolean() || arg_ti.is_time())) {
218  return false;
219  }
220 
221  return true;
222 }
223 
224 } // namespace
225 
226 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateAggregateRex(
227  const RexAgg* rex,
228  const std::vector<std::shared_ptr<Analyzer::Expr>>& scalar_sources) {
229  const auto agg_kind = rex->getKind();
230  const bool is_distinct = rex->isDistinct();
231  const bool takes_arg{rex->size() > 0};
232  std::shared_ptr<Analyzer::Expr> arg_expr;
233  std::shared_ptr<Analyzer::Constant> err_rate;
234  if (takes_arg) {
235  const auto operand = rex->getOperand(0);
236  CHECK_LT(operand, scalar_sources.size());
237  CHECK_LE(rex->size(), 2u);
238  arg_expr = scalar_sources[operand];
239  if (agg_kind == kAPPROX_COUNT_DISTINCT && rex->size() == 2) {
240  err_rate = std::dynamic_pointer_cast<Analyzer::Constant>(
241  scalar_sources[rex->getOperand(1)]);
242  if (!err_rate || err_rate->get_type_info().get_type() != kINT ||
243  err_rate->get_constval().intval < 1 || err_rate->get_constval().intval > 100) {
244  throw std::runtime_error(
245  "APPROX_COUNT_DISTINCT's second parameter should be SMALLINT literal between "
246  "1 and 100");
247  }
248  }
249  const auto& arg_ti = arg_expr->get_type_info();
250  if (!is_agg_supported_for_type(agg_kind, arg_ti)) {
251  throw std::runtime_error("Aggregate on " + arg_ti.get_type_name() +
252  " is not supported yet.");
253  }
254  }
255  const auto agg_ti = get_agg_type(agg_kind, arg_expr.get());
256  return makeExpr<Analyzer::AggExpr>(agg_ti, agg_kind, arg_expr, is_distinct, err_rate);
257 }
258 
259 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateLiteral(
260  const RexLiteral* rex_literal) {
261  auto lit_ti = build_type_info(
262  rex_literal->getType(), rex_literal->getScale(), rex_literal->getPrecision());
263  auto target_ti = build_type_info(rex_literal->getTargetType(),
264  rex_literal->getTypeScale(),
265  rex_literal->getTypePrecision());
266  switch (rex_literal->getType()) {
267  case kDECIMAL: {
268  const auto val = rex_literal->getVal<int64_t>();
269  const int precision = rex_literal->getPrecision();
270  const int scale = rex_literal->getScale();
271  if (target_ti.is_fp() && !scale) {
272  return make_fp_constant(val, target_ti);
273  }
274  auto lit_expr = scale ? Parser::FixedPtLiteral::analyzeValue(val, scale, precision)
276  return lit_ti != target_ti ? lit_expr->add_cast(target_ti) : lit_expr;
277  }
278  case kTEXT: {
279  return Parser::StringLiteral::analyzeValue(rex_literal->getVal<std::string>());
280  }
281  case kBOOLEAN: {
282  Datum d;
283  d.boolval = rex_literal->getVal<bool>();
284  return makeExpr<Analyzer::Constant>(kBOOLEAN, false, d);
285  }
286  case kDOUBLE: {
287  Datum d;
288  d.doubleval = rex_literal->getVal<double>();
289  auto lit_expr = makeExpr<Analyzer::Constant>(kDOUBLE, false, d);
290  return lit_ti != target_ti ? lit_expr->add_cast(target_ti) : lit_expr;
291  }
292  case kINTERVAL_DAY_TIME:
293  case kINTERVAL_YEAR_MONTH: {
294  Datum d;
295  d.bigintval = rex_literal->getVal<int64_t>();
296  return makeExpr<Analyzer::Constant>(rex_literal->getType(), false, d);
297  }
298  case kTIME:
299  case kTIMESTAMP: {
300  Datum d;
301  d.bigintval =
302  rex_literal->getType() == kTIMESTAMP && rex_literal->getPrecision() > 0
303  ? rex_literal->getVal<int64_t>()
304  : rex_literal->getVal<int64_t>() / 1000;
305  return makeExpr<Analyzer::Constant>(
306  SQLTypeInfo(rex_literal->getType(), rex_literal->getPrecision(), 0, false),
307  false,
308  d);
309  }
310  case kDATE: {
311  Datum d;
312  d.bigintval = rex_literal->getVal<int64_t>() * 24 * 3600;
313  return makeExpr<Analyzer::Constant>(rex_literal->getType(), false, d);
314  }
315  case kNULLT: {
316  if (target_ti.is_array()) {
318  // defaulting to valid sub-type for convenience
319  target_ti.set_subtype(kBOOLEAN);
320  return makeExpr<Analyzer::ArrayExpr>(target_ti, args, true);
321  }
322  return makeExpr<Analyzer::Constant>(rex_literal->getTargetType(), true, Datum{0});
323  }
324  default: {
325  LOG(FATAL) << "Unexpected literal type " << lit_ti.get_type_name();
326  }
327  }
328  return nullptr;
329 }
330 
331 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateScalarSubquery(
332  const RexSubQuery* rex_subquery) const {
333  if (just_explain_) {
334  throw std::runtime_error("EXPLAIN is not supported with sub-queries");
335  }
336  CHECK(rex_subquery);
337  auto result = rex_subquery->getExecutionResult();
338  auto row_set = result->getRows();
339  const size_t row_count = row_set->rowCount();
340  if (row_count > size_t(1)) {
341  throw std::runtime_error("Scalar sub-query returned multiple rows");
342  }
343  if (row_count == size_t(0)) {
344  throw std::runtime_error("Scalar sub-query returned no results");
345  }
346  CHECK_EQ(row_count, size_t(1));
347  row_set->moveToBegin();
348  auto first_row = row_set->getNextRow(false, false);
349  CHECK_EQ(first_row.size(), size_t(1));
350  auto scalar_tv = boost::get<ScalarTargetValue>(&first_row[0]);
351  auto ti = rex_subquery->getType();
352  if (ti.is_string()) {
353  throw std::runtime_error("Scalar sub-queries which return strings not supported");
354  }
355  Datum d{0};
356  bool is_null_const{false};
357  std::tie(d, is_null_const) = datum_from_scalar_tv(scalar_tv, ti);
358  return makeExpr<Analyzer::Constant>(ti, is_null_const, d);
359 }
360 
361 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateInput(
362  const RexInput* rex_input) const {
363  const auto source = rex_input->getSourceNode();
364  const auto it_rte_idx = input_to_nest_level_.find(source);
365  CHECK(it_rte_idx != input_to_nest_level_.end())
366  << "Not found in input_to_nest_level_, source=" << source->toString();
367  const int rte_idx = it_rte_idx->second;
368  const auto scan_source = dynamic_cast<const RelScan*>(source);
369  const auto& in_metainfo = source->getOutputMetainfo();
370  if (scan_source) {
371  // We're at leaf (scan) level and not supposed to have input metadata,
372  // the name and type information come directly from the catalog.
373  CHECK(in_metainfo.empty());
374  const auto table_desc = scan_source->getTableDescriptor();
375  const auto cd =
376  cat_.getMetadataForColumnBySpi(table_desc->tableId, rex_input->getIndex() + 1);
377  CHECK(cd);
378  auto col_ti = cd->columnType;
379  if (col_ti.is_string()) {
380  col_ti.set_type(kTEXT);
381  }
382  if (cd->isVirtualCol) {
383  // TODO(alex): remove at some point, we only need this fixup for backwards
384  // compatibility with old imported data
385  CHECK_EQ("rowid", cd->columnName);
386  col_ti.set_size(8);
387  }
388  CHECK_LE(static_cast<size_t>(rte_idx), join_types_.size());
389  if (rte_idx > 0 && join_types_[rte_idx - 1] == JoinType::LEFT) {
390  col_ti.set_notnull(false);
391  }
392  return std::make_shared<Analyzer::ColumnVar>(
393  col_ti, table_desc->tableId, cd->columnId, rte_idx);
394  }
395  CHECK(!in_metainfo.empty()) << "for " << source->toString();
396  CHECK_GE(rte_idx, 0);
397  const size_t col_id = rex_input->getIndex();
398  CHECK_LT(col_id, in_metainfo.size());
399  auto col_ti = in_metainfo[col_id].get_type_info();
400 
401  if (join_types_.size() > 0) {
402  CHECK_LE(static_cast<size_t>(rte_idx), join_types_.size());
403  if (rte_idx > 0 && join_types_[rte_idx - 1] == JoinType::LEFT) {
404  col_ti.set_notnull(false);
405  }
406  }
407 
408  return std::make_shared<Analyzer::ColumnVar>(col_ti, -source->getId(), col_id, rte_idx);
409 }
410 
411 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateUoper(
412  const RexOperator* rex_operator) const {
413  CHECK_EQ(size_t(1), rex_operator->size());
414  const auto operand_expr = translateScalarRex(rex_operator->getOperand(0));
415  const auto sql_op = rex_operator->getOperator();
416  switch (sql_op) {
417  case kCAST: {
418  const auto& target_ti = rex_operator->getType();
419  CHECK_NE(kNULLT, target_ti.get_type());
420  const auto& operand_ti = operand_expr->get_type_info();
421  if (operand_ti.is_string() && target_ti.is_string()) {
422  return operand_expr;
423  }
424  if (target_ti.is_time() ||
425  operand_ti
426  .is_string()) { // TODO(alex): check and unify with the rest of the cases
427  // Do not propogate encoding on small dates
428  return target_ti.is_date_in_days()
429  ? operand_expr->add_cast(SQLTypeInfo(kDATE, false))
430  : operand_expr->add_cast(target_ti);
431  }
432  if (!operand_ti.is_string() && target_ti.is_string()) {
433  return operand_expr->add_cast(target_ti);
434  }
435 
436  return std::make_shared<Analyzer::UOper>(target_ti, false, sql_op, operand_expr);
437  }
438  case kNOT:
439  case kISNULL: {
440  return std::make_shared<Analyzer::UOper>(kBOOLEAN, sql_op, operand_expr);
441  }
442  case kISNOTNULL: {
443  auto is_null = std::make_shared<Analyzer::UOper>(kBOOLEAN, kISNULL, operand_expr);
444  return std::make_shared<Analyzer::UOper>(kBOOLEAN, kNOT, is_null);
445  }
446  case kMINUS: {
447  const auto& ti = operand_expr->get_type_info();
448  return std::make_shared<Analyzer::UOper>(ti, false, kUMINUS, operand_expr);
449  }
450  case kUNNEST: {
451  const auto& ti = operand_expr->get_type_info();
452  CHECK(ti.is_array());
453  return makeExpr<Analyzer::UOper>(ti.get_elem_type(), false, kUNNEST, operand_expr);
454  }
455  default:
456  CHECK(false);
457  }
458  return nullptr;
459 }
460 
461 namespace {
462 
463 std::shared_ptr<Analyzer::Expr> get_in_values_expr(std::shared_ptr<Analyzer::Expr> arg,
464  const ResultSet& val_set) {
465  if (!can_use_parallel_algorithms(val_set)) {
466  return nullptr;
467  }
468  if (val_set.rowCount() > 5000000 && g_enable_watchdog) {
469  throw std::runtime_error(
470  "Unable to handle 'expr IN (subquery)', subquery returned 5M+ rows.");
471  }
472  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
473  const size_t fetcher_count = cpu_threads();
474  std::vector<std::list<std::shared_ptr<Analyzer::Expr>>> expr_set(
475  fetcher_count, std::list<std::shared_ptr<Analyzer::Expr>>());
476  std::vector<std::future<void>> fetcher_threads;
477  const auto& ti = arg->get_type_info();
478  const auto entry_count = val_set.entryCount();
479  for (size_t i = 0,
480  start_entry = 0,
481  stride = (entry_count + fetcher_count - 1) / fetcher_count;
482  i < fetcher_count && start_entry < entry_count;
483  ++i, start_entry += stride) {
484  const auto end_entry = std::min(start_entry + stride, entry_count);
485  fetcher_threads.push_back(std::async(
486  std::launch::async,
487  [&](std::list<std::shared_ptr<Analyzer::Expr>>& in_vals,
488  const size_t start,
489  const size_t end) {
490  for (auto index = start; index < end; ++index) {
491  auto row = val_set.getRowAt(index);
492  if (row.empty()) {
493  continue;
494  }
495  auto scalar_tv = boost::get<ScalarTargetValue>(&row[0]);
496  Datum d{0};
497  bool is_null_const{false};
498  std::tie(d, is_null_const) = datum_from_scalar_tv(scalar_tv, ti);
499  if (ti.is_string() && ti.get_compression() != kENCODING_NONE) {
500  auto ti_none_encoded = ti;
501  ti_none_encoded.set_compression(kENCODING_NONE);
502  auto none_encoded_string =
503  makeExpr<Analyzer::Constant>(ti, is_null_const, d);
504  auto dict_encoded_string = std::make_shared<Analyzer::UOper>(
505  ti, false, kCAST, none_encoded_string);
506  in_vals.push_back(dict_encoded_string);
507  } else {
508  in_vals.push_back(makeExpr<Analyzer::Constant>(ti, is_null_const, d));
509  }
510  }
511  },
512  std::ref(expr_set[i]),
513  start_entry,
514  end_entry));
515  }
516  for (auto& child : fetcher_threads) {
517  child.get();
518  }
519 
520  val_set.moveToBegin();
521  for (auto& exprs : expr_set) {
522  value_exprs.splice(value_exprs.end(), exprs);
523  }
524  return makeExpr<Analyzer::InValues>(arg, value_exprs);
525 }
526 
527 } // namespace
528 
529 // Creates an Analyzer expression for an IN subquery which subsequently goes through the
530 // regular Executor::codegen() mechanism. The creation of the expression out of subquery's
531 // result set is parallelized whenever possible. In addition, take advantage of additional
532 // information that elements in the right hand side are constants; see
533 // getInIntegerSetExpr().
534 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateInOper(
535  const RexOperator* rex_operator) const {
536  if (just_explain_) {
537  throw std::runtime_error("EXPLAIN is not supported with sub-queries");
538  }
539  CHECK(rex_operator->size() == 2);
540  const auto lhs = translateScalarRex(rex_operator->getOperand(0));
541  const auto rhs = rex_operator->getOperand(1);
542  const auto rex_subquery = dynamic_cast<const RexSubQuery*>(rhs);
543  CHECK(rex_subquery);
544  auto ti = lhs->get_type_info();
545  auto result = rex_subquery->getExecutionResult();
546  auto& row_set = result->getRows();
547  CHECK_EQ(size_t(1), row_set->colCount());
548  const auto& rhs_ti = row_set->getColType(0);
549  if (rhs_ti.get_type() != ti.get_type()) {
550  throw std::runtime_error(
551  "The two sides of the IN operator must have the same type; found " +
552  ti.get_type_name() + " and " + rhs_ti.get_type_name());
553  }
554  row_set->moveToBegin();
555  if (row_set->entryCount() > 10000) {
556  std::shared_ptr<Analyzer::Expr> expr;
557  if ((ti.is_integer() || (ti.is_string() && ti.get_compression() == kENCODING_DICT)) &&
558  !row_set->getQueryMemDesc().didOutputColumnar()) {
559  expr = getInIntegerSetExpr(lhs, *row_set);
560  // Handle the highly unlikely case when the InIntegerSet ended up being tiny.
561  // Just let it fall through the usual InValues path at the end of this method,
562  // its codegen knows to use inline comparisons for few values.
563  if (expr && std::static_pointer_cast<Analyzer::InIntegerSet>(expr)
564  ->get_value_list()
565  .size() <= 100) {
566  expr = nullptr;
567  }
568  } else {
569  expr = get_in_values_expr(lhs, *row_set);
570  }
571  if (expr) {
572  return expr;
573  }
574  }
575  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
576  while (true) {
577  auto row = row_set->getNextRow(true, false);
578  if (row.empty()) {
579  break;
580  }
581  if (g_enable_watchdog && value_exprs.size() >= 10000) {
582  throw std::runtime_error(
583  "Unable to handle 'expr IN (subquery)', subquery returned 10000+ rows.");
584  }
585  auto scalar_tv = boost::get<ScalarTargetValue>(&row[0]);
586  Datum d{0};
587  bool is_null_const{false};
588  std::tie(d, is_null_const) = datum_from_scalar_tv(scalar_tv, ti);
589  if (ti.is_string() && ti.get_compression() != kENCODING_NONE) {
590  auto ti_none_encoded = ti;
591  ti_none_encoded.set_compression(kENCODING_NONE);
592  auto none_encoded_string = makeExpr<Analyzer::Constant>(ti, is_null_const, d);
593  auto dict_encoded_string =
594  std::make_shared<Analyzer::UOper>(ti, false, kCAST, none_encoded_string);
595  value_exprs.push_back(dict_encoded_string);
596  } else {
597  value_exprs.push_back(makeExpr<Analyzer::Constant>(ti, is_null_const, d));
598  }
599  }
600  return makeExpr<Analyzer::InValues>(lhs, value_exprs);
601 }
602 
603 namespace {
604 
605 const size_t g_max_integer_set_size{1 << 25};
606 
608  std::vector<int64_t>& in_vals,
609  std::atomic<size_t>& total_in_vals_count,
610  const ResultSet* values_rowset,
611  const std::pair<int64_t, int64_t> values_rowset_slice,
612  const StringDictionaryProxy* source_dict,
613  const StringDictionaryProxy* dest_dict,
614  const int64_t needle_null_val) {
615  CHECK(in_vals.empty());
616  bool dicts_are_equal = source_dict == dest_dict;
617  for (auto index = values_rowset_slice.first; index < values_rowset_slice.second;
618  ++index) {
619  const auto row = values_rowset->getOneColRow(index);
620  if (UNLIKELY(!row.valid)) {
621  continue;
622  }
623  if (dicts_are_equal) {
624  in_vals.push_back(row.value);
625  } else {
626  const int string_id =
627  row.value == needle_null_val
628  ? needle_null_val
629  : dest_dict->getIdOfString(source_dict->getString(row.value));
630  if (string_id != StringDictionary::INVALID_STR_ID) {
631  in_vals.push_back(string_id);
632  }
633  }
634  if (UNLIKELY(g_enable_watchdog && (in_vals.size() & 1023) == 0 &&
635  total_in_vals_count.fetch_add(1024) >= g_max_integer_set_size)) {
636  throw std::runtime_error(
637  "Unable to handle 'expr IN (subquery)', subquery returned 30M+ rows.");
638  }
639  }
640 }
641 
642 void fill_integer_in_vals(std::vector<int64_t>& in_vals,
643  std::atomic<size_t>& total_in_vals_count,
644  const ResultSet* values_rowset,
645  const std::pair<int64_t, int64_t> values_rowset_slice) {
646  CHECK(in_vals.empty());
647  for (auto index = values_rowset_slice.first; index < values_rowset_slice.second;
648  ++index) {
649  const auto row = values_rowset->getOneColRow(index);
650  if (row.valid) {
651  in_vals.push_back(row.value);
652  if (UNLIKELY(g_enable_watchdog && (in_vals.size() & 1023) == 0 &&
653  total_in_vals_count.fetch_add(1024) >= g_max_integer_set_size)) {
654  throw std::runtime_error(
655  "Unable to handle 'expr IN (subquery)', subquery returned 30M+ rows.");
656  }
657  }
658  }
659 }
660 
661 // Multi-node counterpart of the other version. Saves round-trips, which is crucial
662 // for a big right-hand side result. It only handles physical string dictionary ids,
663 // therefore it won't be able to handle a right-hand side sub-query with a CASE
664 // returning literals on some branches. That case isn't hard too handle either, but
665 // it's not clear it's actually important in practice.
666 // RelAlgTranslator::getInIntegerSetExpr makes sure, by checking the encodings, that this
667 // function isn't called in such cases.
669  std::vector<int64_t>& in_vals,
670  std::atomic<size_t>& total_in_vals_count,
671  const ResultSet* values_rowset,
672  const std::pair<int64_t, int64_t> values_rowset_slice,
673  const std::vector<LeafHostInfo>& leaf_hosts,
674  const DictRef source_dict_ref,
675  const DictRef dest_dict_ref,
676  const int32_t dest_generation,
677  const int64_t needle_null_val) {
678  CHECK(in_vals.empty());
679  std::vector<int32_t> source_ids;
680  source_ids.reserve(values_rowset->entryCount());
681  bool has_nulls = false;
682  if (source_dict_ref == dest_dict_ref) {
683  in_vals.reserve(values_rowset_slice.second - values_rowset_slice.first +
684  1); // Add 1 to cover interval
685  for (auto index = values_rowset_slice.first; index < values_rowset_slice.second;
686  ++index) {
687  const auto row = values_rowset->getOneColRow(index);
688  if (!row.valid) {
689  continue;
690  }
691  if (row.value != needle_null_val) {
692  in_vals.push_back(row.value);
693  if (UNLIKELY(g_enable_watchdog && (in_vals.size() & 1023) == 0 &&
694  total_in_vals_count.fetch_add(1024) >= g_max_integer_set_size)) {
695  throw std::runtime_error(
696  "Unable to handle 'expr IN (subquery)', subquery returned 30M+ rows.");
697  }
698  } else {
699  has_nulls = true;
700  }
701  }
702  if (has_nulls) {
703  in_vals.push_back(
704  needle_null_val); // we've deduped null values as an optimization, although
705  // this is not required by consumer
706  }
707  return;
708  }
709  // Code path below is for when dictionaries are not shared
710  for (auto index = values_rowset_slice.first; index < values_rowset_slice.second;
711  ++index) {
712  const auto row = values_rowset->getOneColRow(index);
713  if (row.valid) {
714  if (row.value != needle_null_val) {
715  source_ids.push_back(row.value);
716  } else {
717  has_nulls = true;
718  }
719  }
720  }
721  std::vector<int32_t> dest_ids;
722  translate_string_ids(dest_ids,
723  leaf_hosts.front(),
724  dest_dict_ref,
725  source_ids,
726  source_dict_ref,
727  dest_generation);
728  CHECK_EQ(dest_ids.size(), source_ids.size());
729  in_vals.reserve(dest_ids.size() + (has_nulls ? 1 : 0));
730  if (has_nulls) {
731  in_vals.push_back(needle_null_val);
732  }
733  for (const int32_t dest_id : dest_ids) {
734  if (dest_id != StringDictionary::INVALID_STR_ID) {
735  in_vals.push_back(dest_id);
736  if (UNLIKELY(g_enable_watchdog && (in_vals.size() & 1023) == 0 &&
737  total_in_vals_count.fetch_add(1024) >= g_max_integer_set_size)) {
738  throw std::runtime_error(
739  "Unable to handle 'expr IN (subquery)', subquery returned 30M+ rows.");
740  }
741  }
742  }
743 }
744 
745 } // namespace
746 
747 // The typical IN subquery involves either dictionary-encoded strings or integers.
748 // Analyzer::InValues is a very heavy representation of the right hand side of such
749 // a query since we already know the right hand would be a list of Analyzer::Constant
750 // shared pointers. We can avoid the big overhead of each Analyzer::Constant and the
751 // refcounting associated with shared pointers by creating an abbreviated InIntegerSet
752 // representation of the IN expression which takes advantage of the this information.
753 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::getInIntegerSetExpr(
754  std::shared_ptr<Analyzer::Expr> arg,
755  const ResultSet& val_set) const {
756  if (!can_use_parallel_algorithms(val_set)) {
757  return nullptr;
758  }
759  std::vector<int64_t> value_exprs;
760  const size_t fetcher_count = cpu_threads();
761  std::vector<std::vector<int64_t>> expr_set(fetcher_count);
762  std::vector<std::future<void>> fetcher_threads;
763  const auto& arg_type = arg->get_type_info();
764  const auto entry_count = val_set.entryCount();
765  CHECK_EQ(size_t(1), val_set.colCount());
766  const auto& col_type = val_set.getColType(0);
767  if (g_cluster && arg_type.is_string() &&
768  (col_type.get_comp_param() <= 0 || arg_type.get_comp_param() <= 0)) {
769  // Skip this case for now, see comment for fill_dictionary_encoded_in_vals.
770  return nullptr;
771  }
772  std::atomic<size_t> total_in_vals_count{0};
773  for (size_t i = 0,
774  start_entry = 0,
775  stride = (entry_count + fetcher_count - 1) / fetcher_count;
776  i < fetcher_count && start_entry < entry_count;
777  ++i, start_entry += stride) {
778  expr_set[i].reserve(entry_count / fetcher_count);
779  const auto end_entry = std::min(start_entry + stride, entry_count);
780  if (arg_type.is_string()) {
781  CHECK_EQ(kENCODING_DICT, arg_type.get_compression());
782  // const int32_t dest_dict_id = arg_type.get_comp_param();
783  // const int32_t source_dict_id = col_type.get_comp_param();
784  const DictRef dest_dict_ref(arg_type.get_comp_param(), cat_.getDatabaseId());
785  const DictRef source_dict_ref(col_type.get_comp_param(), cat_.getDatabaseId());
786  const auto dd = executor_->getStringDictionaryProxy(
787  arg_type.get_comp_param(), val_set.getRowSetMemOwner(), true);
788  const auto sd = executor_->getStringDictionaryProxy(
789  col_type.get_comp_param(), val_set.getRowSetMemOwner(), true);
790  CHECK(sd);
791  const auto needle_null_val = inline_int_null_val(arg_type);
792  fetcher_threads.push_back(std::async(
793  std::launch::async,
794  [this,
795  &val_set,
796  &total_in_vals_count,
797  sd,
798  dd,
799  source_dict_ref,
800  dest_dict_ref,
801  needle_null_val](
802  std::vector<int64_t>& in_vals, const size_t start, const size_t end) {
803  if (g_cluster) {
804  CHECK_GE(dd->getGeneration(), 0);
806  total_in_vals_count,
807  &val_set,
808  {start, end},
809  cat_.getStringDictionaryHosts(),
810  source_dict_ref,
811  dest_dict_ref,
812  dd->getGeneration(),
813  needle_null_val);
814  } else {
816  total_in_vals_count,
817  &val_set,
818  {start, end},
819  sd,
820  dd,
821  needle_null_val);
822  }
823  },
824  std::ref(expr_set[i]),
825  start_entry,
826  end_entry));
827  } else {
828  CHECK(arg_type.is_integer());
829  fetcher_threads.push_back(std::async(
830  std::launch::async,
831  [&val_set, &total_in_vals_count](
832  std::vector<int64_t>& in_vals, const size_t start, const size_t end) {
833  fill_integer_in_vals(in_vals, total_in_vals_count, &val_set, {start, end});
834  },
835  std::ref(expr_set[i]),
836  start_entry,
837  end_entry));
838  }
839  }
840  for (auto& child : fetcher_threads) {
841  child.get();
842  }
843 
844  val_set.moveToBegin();
845  value_exprs.reserve(entry_count);
846  for (auto& exprs : expr_set) {
847  value_exprs.insert(value_exprs.end(), exprs.begin(), exprs.end());
848  }
849  return makeExpr<Analyzer::InIntegerSet>(
850  arg, value_exprs, arg_type.get_notnull() && col_type.get_notnull());
851 }
852 
853 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateOper(
854  const RexOperator* rex_operator) const {
855  CHECK_GT(rex_operator->size(), size_t(0));
856  if (rex_operator->size() == 1) {
857  return translateUoper(rex_operator);
858  }
859  const auto sql_op = rex_operator->getOperator();
860  if (sql_op == kIN) {
861  return translateInOper(rex_operator);
862  }
863  if (sql_op == kMINUS || sql_op == kPLUS) {
864  auto date_plus_minus = translateDatePlusMinus(rex_operator);
865  if (date_plus_minus) {
866  return date_plus_minus;
867  }
868  }
869  if (sql_op == kOVERLAPS) {
870  return translateOverlapsOper(rex_operator);
871  } else if (IS_COMPARISON(sql_op)) {
872  auto geo_comp = translateGeoComparison(rex_operator);
873  if (geo_comp) {
874  return geo_comp;
875  }
876  }
877  auto lhs = translateScalarRex(rex_operator->getOperand(0));
878  for (size_t i = 1; i < rex_operator->size(); ++i) {
879  std::shared_ptr<Analyzer::Expr> rhs;
880  SQLQualifier sql_qual{kONE};
881  const auto rhs_op = rex_operator->getOperand(i);
882  std::tie(rhs, sql_qual) = get_quantified_rhs(rhs_op, *this);
883  if (!rhs) {
884  rhs = translateScalarRex(rhs_op);
885  }
886  CHECK(rhs);
887  lhs = Parser::OperExpr::normalize(sql_op, sql_qual, lhs, rhs);
888  }
889  return lhs;
890 }
891 
892 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateOverlapsOper(
893  const RexOperator* rex_operator) const {
894  const auto sql_op = rex_operator->getOperator();
895  CHECK(sql_op == kOVERLAPS);
896 
897  const auto lhs = translateScalarRex(rex_operator->getOperand(0));
898  const auto lhs_ti = lhs->get_type_info();
899  if (lhs_ti.is_geometry()) {
900  return translateGeoOverlapsOper(rex_operator);
901  } else {
902  throw std::runtime_error(
903  "Overlaps equivalence is currently only supported for geospatial types");
904  }
905 }
906 
907 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateCase(
908  const RexCase* rex_case) const {
909  std::shared_ptr<Analyzer::Expr> else_expr;
910  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
911  expr_list;
912  for (size_t i = 0; i < rex_case->branchCount(); ++i) {
913  const auto when_expr = translateScalarRex(rex_case->getWhen(i));
914  const auto then_expr = translateScalarRex(rex_case->getThen(i));
915  expr_list.emplace_back(when_expr, then_expr);
916  }
917  if (rex_case->getElse()) {
918  else_expr = translateScalarRex(rex_case->getElse());
919  }
920  return Parser::CaseExpr::normalize(expr_list, else_expr);
921 }
922 
923 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateLike(
924  const RexFunctionOperator* rex_function) const {
925  CHECK(rex_function->size() == 2 || rex_function->size() == 3);
926  const auto arg = translateScalarRex(rex_function->getOperand(0));
927  const auto like = translateScalarRex(rex_function->getOperand(1));
928  if (!std::dynamic_pointer_cast<const Analyzer::Constant>(like)) {
929  throw std::runtime_error("The matching pattern must be a literal.");
930  }
931  const auto escape = (rex_function->size() == 3)
932  ? translateScalarRex(rex_function->getOperand(2))
933  : nullptr;
934  const bool is_ilike = rex_function->getName() == "PG_ILIKE"sv;
935  return Parser::LikeExpr::get(arg, like, escape, is_ilike, false);
936 }
937 
938 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateRegexp(
939  const RexFunctionOperator* rex_function) const {
940  CHECK(rex_function->size() == 2 || rex_function->size() == 3);
941  const auto arg = translateScalarRex(rex_function->getOperand(0));
942  const auto pattern = translateScalarRex(rex_function->getOperand(1));
943  if (!std::dynamic_pointer_cast<const Analyzer::Constant>(pattern)) {
944  throw std::runtime_error("The matching pattern must be a literal.");
945  }
946  const auto escape = (rex_function->size() == 3)
947  ? translateScalarRex(rex_function->getOperand(2))
948  : nullptr;
949  return Parser::RegexpExpr::get(arg, pattern, escape, false);
950 }
951 
952 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateLikely(
953  const RexFunctionOperator* rex_function) const {
954  CHECK(rex_function->size() == 1);
955  const auto arg = translateScalarRex(rex_function->getOperand(0));
956  return makeExpr<Analyzer::LikelihoodExpr>(arg, 0.9375);
957 }
958 
959 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateUnlikely(
960  const RexFunctionOperator* rex_function) const {
961  CHECK(rex_function->size() == 1);
962  const auto arg = translateScalarRex(rex_function->getOperand(0));
963  return makeExpr<Analyzer::LikelihoodExpr>(arg, 0.0625);
964 }
965 
966 namespace {
967 
969  const std::shared_ptr<Analyzer::Constant> literal_expr) {
970  if (!literal_expr || literal_expr->get_is_null()) {
971  throw std::runtime_error("The 'DatePart' argument must be a not 'null' literal.");
972  }
973 }
974 
975 } // namespace
976 
977 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateExtract(
978  const RexFunctionOperator* rex_function) const {
979  CHECK_EQ(size_t(2), rex_function->size());
980  const auto timeunit = translateScalarRex(rex_function->getOperand(0));
981  const auto timeunit_lit = std::dynamic_pointer_cast<Analyzer::Constant>(timeunit);
983  const auto from_expr = translateScalarRex(rex_function->getOperand(1));
984  const bool is_date_trunc = rex_function->getName() == "PG_DATE_TRUNC"sv;
985  if (is_date_trunc) {
986  return DateTruncExpr::generate(from_expr, *timeunit_lit->get_constval().stringval);
987  } else {
988  return ExtractExpr::generate(from_expr, *timeunit_lit->get_constval().stringval);
989  }
990 }
991 
992 namespace {
993 
994 std::shared_ptr<Analyzer::Constant> makeNumericConstant(const SQLTypeInfo& ti,
995  const long val) {
996  CHECK(ti.is_number());
997  Datum datum{0};
998  switch (ti.get_type()) {
999  case kTINYINT: {
1000  datum.tinyintval = val;
1001  break;
1002  }
1003  case kSMALLINT: {
1004  datum.smallintval = val;
1005  break;
1006  }
1007  case kINT: {
1008  datum.intval = val;
1009  break;
1010  }
1011  case kBIGINT: {
1012  datum.bigintval = val;
1013  break;
1014  }
1015  case kDECIMAL:
1016  case kNUMERIC: {
1017  datum.bigintval = val * exp_to_scale(ti.get_scale());
1018  break;
1019  }
1020  case kFLOAT: {
1021  datum.floatval = val;
1022  break;
1023  }
1024  case kDOUBLE: {
1025  datum.doubleval = val;
1026  break;
1027  }
1028  default:
1029  CHECK(false);
1030  }
1031  return makeExpr<Analyzer::Constant>(ti, false, datum);
1032 }
1033 
1034 } // namespace
1035 
1036 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateDateadd(
1037  const RexFunctionOperator* rex_function) const {
1038  CHECK_EQ(size_t(3), rex_function->size());
1039  const auto timeunit = translateScalarRex(rex_function->getOperand(0));
1040  const auto timeunit_lit = std::dynamic_pointer_cast<Analyzer::Constant>(timeunit);
1042  const auto number_units = translateScalarRex(rex_function->getOperand(1));
1043  const auto number_units_const =
1044  std::dynamic_pointer_cast<Analyzer::Constant>(number_units);
1045  if (number_units_const && number_units_const->get_is_null()) {
1046  throw std::runtime_error("The 'Interval' argument literal must not be 'null'.");
1047  }
1048  const auto cast_number_units = number_units->add_cast(SQLTypeInfo(kBIGINT, false));
1049  const auto datetime = translateScalarRex(rex_function->getOperand(2));
1050  const auto& datetime_ti = datetime->get_type_info();
1051  if (datetime_ti.get_type() == kTIME) {
1052  throw std::runtime_error("DateAdd operation not supported for TIME.");
1053  }
1054  const auto& field = to_dateadd_field(*timeunit_lit->get_constval().stringval);
1055  const int dim = datetime_ti.get_dimension();
1056  return makeExpr<Analyzer::DateaddExpr>(
1057  SQLTypeInfo(kTIMESTAMP, dim, 0, false), field, cast_number_units, datetime);
1058 }
1059 
1060 namespace {
1061 
1063  CHECK(op == kPLUS);
1064  return "DATETIME_PLUS"s;
1065 }
1066 
1067 } // namespace
1068 
1069 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateDatePlusMinus(
1070  const RexOperator* rex_operator) const {
1071  if (rex_operator->size() != 2) {
1072  return nullptr;
1073  }
1074  const auto datetime = translateScalarRex(rex_operator->getOperand(0));
1075  const auto datetime_ti = datetime->get_type_info();
1076  if (!datetime_ti.is_timestamp() && !datetime_ti.is_date()) {
1077  if (datetime_ti.get_type() == kTIME) {
1078  throw std::runtime_error("DateTime addition/subtraction not supported for TIME.");
1079  }
1080  return nullptr;
1081  }
1082  const auto rhs = translateScalarRex(rex_operator->getOperand(1));
1083  const auto rhs_ti = rhs->get_type_info();
1084  if (rhs_ti.get_type() == kTIMESTAMP || rhs_ti.get_type() == kDATE) {
1085  if (datetime_ti.is_high_precision_timestamp() ||
1086  rhs_ti.is_high_precision_timestamp()) {
1087  throw std::runtime_error(
1088  "High Precision timestamps are not supported for TIMESTAMPDIFF operation. Use "
1089  "DATEDIFF.");
1090  }
1091  auto bigint_ti = SQLTypeInfo(kBIGINT, false);
1092  const auto& rex_operator_ti = rex_operator->getType();
1093  const auto datediff_field =
1094  (rex_operator_ti.get_type() == kINTERVAL_DAY_TIME) ? dtSECOND : dtMONTH;
1095  auto result =
1096  makeExpr<Analyzer::DatediffExpr>(bigint_ti, datediff_field, rhs, datetime);
1097  // multiply 1000 to result since expected result should be in millisecond precision.
1098  if (rex_operator_ti.get_type() == kINTERVAL_DAY_TIME) {
1099  return makeExpr<Analyzer::BinOper>(bigint_ti.get_type(),
1100  kMULTIPLY,
1101  kONE,
1102  result,
1103  makeNumericConstant(bigint_ti, 1000));
1104  } else {
1105  return result;
1106  }
1107  }
1108  const auto op = rex_operator->getOperator();
1109  if (op == kPLUS) {
1110  std::vector<std::shared_ptr<Analyzer::Expr>> args = {datetime, rhs};
1111  auto dt_plus = makeExpr<Analyzer::FunctionOper>(
1112  datetime_ti, get_datetimeplus_rewrite_funcname(op), args);
1113  const auto date_trunc = rewrite_to_date_trunc(dt_plus.get());
1114  if (date_trunc) {
1115  return date_trunc;
1116  }
1117  }
1118  const auto interval = fold_expr(rhs.get());
1119  auto interval_ti = interval->get_type_info();
1120  auto bigint_ti = SQLTypeInfo(kBIGINT, false);
1121  const auto interval_lit = std::dynamic_pointer_cast<Analyzer::Constant>(interval);
1122  if (interval_ti.get_type() == kINTERVAL_DAY_TIME) {
1123  std::shared_ptr<Analyzer::Expr> interval_sec;
1124  if (interval_lit) {
1125  interval_sec =
1126  makeNumericConstant(bigint_ti,
1127  (op == kMINUS ? -interval_lit->get_constval().bigintval
1128  : interval_lit->get_constval().bigintval) /
1129  1000);
1130  } else {
1131  interval_sec = makeExpr<Analyzer::BinOper>(bigint_ti.get_type(),
1132  kDIVIDE,
1133  kONE,
1134  interval,
1135  makeNumericConstant(bigint_ti, 1000));
1136  if (op == kMINUS) {
1137  interval_sec =
1138  std::make_shared<Analyzer::UOper>(bigint_ti, false, kUMINUS, interval_sec);
1139  }
1140  }
1141  return makeExpr<Analyzer::DateaddExpr>(datetime_ti, daSECOND, interval_sec, datetime);
1142  }
1143  CHECK(interval_ti.get_type() == kINTERVAL_YEAR_MONTH);
1144  const auto interval_months = op == kMINUS ? std::make_shared<Analyzer::UOper>(
1145  bigint_ti, false, kUMINUS, interval)
1146  : interval;
1147  return makeExpr<Analyzer::DateaddExpr>(datetime_ti, daMONTH, interval_months, datetime);
1148 }
1149 
1150 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateDatediff(
1151  const RexFunctionOperator* rex_function) const {
1152  CHECK_EQ(size_t(3), rex_function->size());
1153  const auto timeunit = translateScalarRex(rex_function->getOperand(0));
1154  const auto timeunit_lit = std::dynamic_pointer_cast<Analyzer::Constant>(timeunit);
1156  const auto start = translateScalarRex(rex_function->getOperand(1));
1157  const auto end = translateScalarRex(rex_function->getOperand(2));
1158  const auto field = to_datediff_field(*timeunit_lit->get_constval().stringval);
1159  return makeExpr<Analyzer::DatediffExpr>(SQLTypeInfo(kBIGINT, false), field, start, end);
1160 }
1161 
1162 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateDatepart(
1163  const RexFunctionOperator* rex_function) const {
1164  CHECK_EQ(size_t(2), rex_function->size());
1165  const auto timeunit = translateScalarRex(rex_function->getOperand(0));
1166  const auto timeunit_lit = std::dynamic_pointer_cast<Analyzer::Constant>(timeunit);
1168  const auto from_expr = translateScalarRex(rex_function->getOperand(1));
1169  return ExtractExpr::generate(
1170  from_expr, to_datepart_field(*timeunit_lit->get_constval().stringval));
1171 }
1172 
1173 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateLength(
1174  const RexFunctionOperator* rex_function) const {
1175  CHECK_EQ(size_t(1), rex_function->size());
1176  const auto str_arg = translateScalarRex(rex_function->getOperand(0));
1177  return makeExpr<Analyzer::CharLengthExpr>(str_arg->decompress(),
1178  rex_function->getName() == "CHAR_LENGTH"sv);
1179 }
1180 
1181 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateKeyForString(
1182  const RexFunctionOperator* rex_function) const {
1183  const auto& args = translateFunctionArgs(rex_function);
1184  CHECK_EQ(size_t(1), args.size());
1185  const auto expr = dynamic_cast<Analyzer::Expr*>(args[0].get());
1186  if (nullptr == expr || !expr->get_type_info().is_string() ||
1187  expr->get_type_info().is_varlen()) {
1188  throw std::runtime_error(rex_function->getName() +
1189  " expects a dictionary encoded text column.");
1190  }
1191  return makeExpr<Analyzer::KeyForStringExpr>(args[0]);
1192 }
1193 
1194 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateSampleRatio(
1195  const RexFunctionOperator* rex_function) const {
1196  CHECK_EQ(size_t(1), rex_function->size());
1197  auto arg = translateScalarRex(rex_function->getOperand(0));
1198  const auto& arg_ti = arg->get_type_info();
1199  if (arg_ti.get_type() != kDOUBLE) {
1200  const auto& double_ti = SQLTypeInfo(kDOUBLE, arg_ti.get_notnull());
1201  arg = arg->add_cast(double_ti);
1202  }
1203  return makeExpr<Analyzer::SampleRatioExpr>(arg);
1204 }
1205 
1206 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateCurrentUser(
1207  const RexFunctionOperator* rex_function) const {
1208  std::string user{"SESSIONLESS_USER"};
1209  if (query_state_) {
1210  user = query_state_->getConstSessionInfo()->get_currentUser().userName;
1211  }
1212  return Parser::UserLiteral::get(user);
1213 }
1214 
1215 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateLower(
1216  const RexFunctionOperator* rex_function) const {
1217  const auto& args = translateFunctionArgs(rex_function);
1218  CHECK_EQ(size_t(1), args.size());
1219  CHECK(args[0]);
1220 
1221  if (args[0]->get_type_info().is_dict_encoded_string() ||
1222  dynamic_cast<Analyzer::Constant*>(args[0].get())) {
1223  return makeExpr<Analyzer::LowerExpr>(args[0]);
1224  }
1225 
1226  throw std::runtime_error(rex_function->getName() +
1227  " expects a dictionary encoded text column or a literal.");
1228 }
1229 
1231  const RexFunctionOperator* rex_function) const {
1232  const auto ret_ti = rex_function->getType();
1233  const auto arg = translateScalarRex(rex_function->getOperand(0));
1234  const auto arg_ti = arg->get_type_info();
1235  if (!arg_ti.is_array()) {
1236  throw std::runtime_error(rex_function->getName() + " expects an array expression.");
1237  }
1238  if (arg_ti.get_subtype() == kARRAY) {
1239  throw std::runtime_error(rex_function->getName() +
1240  " expects one-dimension array expression.");
1241  }
1242  const auto array_size = arg_ti.get_size();
1243  const auto array_elem_size = arg_ti.get_elem_type().get_array_context_logical_size();
1244 
1245  if (array_size > 0) {
1246  if (array_elem_size <= 0) {
1247  throw std::runtime_error(rex_function->getName() +
1248  ": unexpected array element type.");
1249  }
1250  // Return cardinality of a fixed length array
1251  return makeNumericConstant(ret_ti, array_size / array_elem_size);
1252  }
1253  // Variable length array cardinality will be calculated at runtime
1254  return makeExpr<Analyzer::CardinalityExpr>(arg);
1255 }
1256 
1257 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateItem(
1258  const RexFunctionOperator* rex_function) const {
1259  CHECK_EQ(size_t(2), rex_function->size());
1260  const auto base = translateScalarRex(rex_function->getOperand(0));
1261  const auto index = translateScalarRex(rex_function->getOperand(1));
1262  return makeExpr<Analyzer::BinOper>(
1263  base->get_type_info().get_elem_type(), false, kARRAY_AT, kONE, base, index);
1264 }
1265 
1266 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateNow() const {
1267  return Parser::TimestampLiteral::get(now_);
1268 }
1269 
1270 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateDatetime(
1271  const RexFunctionOperator* rex_function) const {
1272  CHECK_EQ(size_t(1), rex_function->size());
1273  const auto arg = translateScalarRex(rex_function->getOperand(0));
1274  const auto arg_lit = std::dynamic_pointer_cast<Analyzer::Constant>(arg);
1275  const std::string datetime_err{R"(Only DATETIME('NOW') supported for now.)"};
1276  if (!arg_lit || arg_lit->get_is_null()) {
1277  throw std::runtime_error(datetime_err);
1278  }
1279  CHECK(arg_lit->get_type_info().is_string());
1280  if (*arg_lit->get_constval().stringval != "NOW"sv) {
1281  throw std::runtime_error(datetime_err);
1282  }
1283  return translateNow();
1284 }
1285 
1286 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateAbs(
1287  const RexFunctionOperator* rex_function) const {
1288  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
1289  expr_list;
1290  CHECK_EQ(size_t(1), rex_function->size());
1291  const auto operand = translateScalarRex(rex_function->getOperand(0));
1292  const auto& operand_ti = operand->get_type_info();
1293  CHECK(operand_ti.is_number());
1294  const auto zero = makeNumericConstant(operand_ti, 0);
1295  const auto lt_zero = makeExpr<Analyzer::BinOper>(kBOOLEAN, kLT, kONE, operand, zero);
1296  const auto uminus_operand =
1297  makeExpr<Analyzer::UOper>(operand_ti.get_type(), kUMINUS, operand);
1298  expr_list.emplace_back(lt_zero, uminus_operand);
1299  return makeExpr<Analyzer::CaseExpr>(operand_ti, false, expr_list, operand);
1300 }
1301 
1302 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateSign(
1303  const RexFunctionOperator* rex_function) const {
1304  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
1305  expr_list;
1306  CHECK_EQ(size_t(1), rex_function->size());
1307  const auto operand = translateScalarRex(rex_function->getOperand(0));
1308  const auto& operand_ti = operand->get_type_info();
1309  CHECK(operand_ti.is_number());
1310  const auto zero = makeNumericConstant(operand_ti, 0);
1311  const auto lt_zero = makeExpr<Analyzer::BinOper>(kBOOLEAN, kLT, kONE, operand, zero);
1312  expr_list.emplace_back(lt_zero, makeNumericConstant(operand_ti, -1));
1313  const auto eq_zero = makeExpr<Analyzer::BinOper>(kBOOLEAN, kEQ, kONE, operand, zero);
1314  expr_list.emplace_back(eq_zero, makeNumericConstant(operand_ti, 0));
1315  const auto gt_zero = makeExpr<Analyzer::BinOper>(kBOOLEAN, kGT, kONE, operand, zero);
1316  expr_list.emplace_back(gt_zero, makeNumericConstant(operand_ti, 1));
1317  return makeExpr<Analyzer::CaseExpr>(
1318  operand_ti,
1319  false,
1320  expr_list,
1321  makeExpr<Analyzer::Constant>(operand_ti, true, Datum{0}));
1322 }
1323 
1324 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateOffsetInFragment() const {
1325  return makeExpr<Analyzer::OffsetInFragment>();
1326 }
1327 
1329  const RexFunctionOperator* rex_function) const {
1330  if (rex_function->getType().get_subtype() == kNULLT) {
1331  auto sql_type = rex_function->getType();
1332  CHECK(sql_type.get_type() == kARRAY);
1333 
1334  // FIX-ME: Deal with NULL arrays
1335  auto translated_function_args(translateFunctionArgs(rex_function));
1336  if (translated_function_args.size() > 0) {
1337  const auto first_element_logical_type =
1338  get_nullable_logical_type_info(translated_function_args[0]->get_type_info());
1339 
1340  auto diff_elem_itr =
1341  std::find_if(translated_function_args.begin(),
1342  translated_function_args.end(),
1343  [first_element_logical_type](const auto expr) {
1344  return first_element_logical_type !=
1345  get_nullable_logical_type_info(expr->get_type_info());
1346  });
1347  if (diff_elem_itr != translated_function_args.end()) {
1348  throw std::runtime_error(
1349  "Element " +
1350  std::to_string(diff_elem_itr - translated_function_args.begin()) +
1351  " is not of the same type as other elements of the array. Consider casting "
1352  "to force this condition.\nElement Type: " +
1353  get_nullable_logical_type_info((*diff_elem_itr)->get_type_info())
1354  .to_string() +
1355  "\nArray type: " + first_element_logical_type.to_string());
1356  }
1357 
1358  if (first_element_logical_type.is_string() &&
1359  !first_element_logical_type.is_dict_encoded_string()) {
1360  sql_type.set_subtype(first_element_logical_type.get_type());
1361  sql_type.set_compression(kENCODING_FIXED);
1362  } else if (first_element_logical_type.is_dict_encoded_string()) {
1363  sql_type.set_subtype(first_element_logical_type.get_type());
1364  sql_type.set_comp_param(TRANSIENT_DICT_ID);
1365  } else {
1366  sql_type.set_subtype(first_element_logical_type.get_type());
1367  sql_type.set_scale(first_element_logical_type.get_scale());
1368  sql_type.set_precision(first_element_logical_type.get_precision());
1369  }
1370 
1371  return makeExpr<Analyzer::ArrayExpr>(sql_type, translated_function_args);
1372  } else {
1373  // defaulting to valid sub-type for convenience
1374  sql_type.set_subtype(kBOOLEAN);
1375  return makeExpr<Analyzer::ArrayExpr>(sql_type, translated_function_args);
1376  }
1377  } else {
1378  return makeExpr<Analyzer::ArrayExpr>(rex_function->getType(),
1379  translateFunctionArgs(rex_function));
1380  }
1381 }
1382 
1383 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateFunction(
1384  const RexFunctionOperator* rex_function) const {
1385  if (func_resolve(rex_function->getName(), "LIKE"sv, "PG_ILIKE"sv)) {
1386  return translateLike(rex_function);
1387  }
1388  if (rex_function->getName() == "REGEXP_LIKE"sv) {
1389  return translateRegexp(rex_function);
1390  }
1391  if (rex_function->getName() == "LIKELY"sv) {
1392  return translateLikely(rex_function);
1393  }
1394  if (rex_function->getName() == "UNLIKELY"sv) {
1395  return translateUnlikely(rex_function);
1396  }
1397  if (func_resolve(rex_function->getName(), "PG_EXTRACT"sv, "PG_DATE_TRUNC"sv)) {
1398  return translateExtract(rex_function);
1399  }
1400  if (rex_function->getName() == "DATEADD"sv) {
1401  return translateDateadd(rex_function);
1402  }
1403  if (rex_function->getName() == "DATEDIFF"sv) {
1404  return translateDatediff(rex_function);
1405  }
1406  if (rex_function->getName() == "DATEPART"sv) {
1407  return translateDatepart(rex_function);
1408  }
1409  if (func_resolve(rex_function->getName(), "LENGTH"sv, "CHAR_LENGTH"sv)) {
1410  return translateLength(rex_function);
1411  }
1412  if (rex_function->getName() == "KEY_FOR_STRING"sv) {
1413  return translateKeyForString(rex_function);
1414  }
1415  if (rex_function->getName() == "SAMPLE_RATIO"sv) {
1416  return translateSampleRatio(rex_function);
1417  }
1418  if (rex_function->getName() == "CURRENT_USER"sv) {
1419  return translateCurrentUser(rex_function);
1420  }
1421  if (g_enable_experimental_string_functions && rex_function->getName() == "LOWER"sv) {
1422  return translateLower(rex_function);
1423  }
1424  if (func_resolve(rex_function->getName(), "CARDINALITY"sv, "ARRAY_LENGTH"sv)) {
1425  return translateCardinality(rex_function);
1426  }
1427  if (rex_function->getName() == "ITEM"sv) {
1428  return translateItem(rex_function);
1429  }
1430  if (rex_function->getName() == "NOW"sv) {
1431  return translateNow();
1432  }
1433  if (rex_function->getName() == "DATETIME"sv) {
1434  return translateDatetime(rex_function);
1435  }
1436  if (func_resolve(rex_function->getName(), "usTIMESTAMP"sv, "nsTIMESTAMP"sv)) {
1437  return translateHPTLiteral(rex_function);
1438  }
1439  if (rex_function->getName() == "ABS"sv) {
1440  return translateAbs(rex_function);
1441  }
1442  if (rex_function->getName() == "SIGN"sv) {
1443  return translateSign(rex_function);
1444  }
1445  if (func_resolve(rex_function->getName(), "CEIL"sv, "FLOOR"sv)) {
1446  return makeExpr<Analyzer::FunctionOperWithCustomTypeHandling>(
1447  rex_function->getType(),
1448  rex_function->getName(),
1449  translateFunctionArgs(rex_function));
1450  } else if (rex_function->getName() == "ROUND"sv) {
1451  std::vector<std::shared_ptr<Analyzer::Expr>> args =
1452  translateFunctionArgs(rex_function);
1453 
1454  if (rex_function->size() == 1) {
1455  // push a 0 constant if 2nd operand is missing.
1456  // this needs to be done as calcite returns
1457  // only the 1st operand without defaulting the 2nd one
1458  // when the user did not specify the 2nd operand.
1459  SQLTypes t = kSMALLINT;
1460  Datum d;
1461  d.smallintval = 0;
1462  args.push_back(makeExpr<Analyzer::Constant>(t, false, d));
1463  }
1464 
1465  // make sure we have only 2 operands
1466  CHECK(args.size() == 2);
1467 
1468  if (!args[0]->get_type_info().is_number()) {
1469  throw std::runtime_error("Only numeric 1st operands are supported");
1470  }
1471 
1472  // the 2nd operand does not need to be a constant
1473  // it can happily reference another integer column
1474  if (!args[1]->get_type_info().is_integer()) {
1475  throw std::runtime_error("Only integer 2nd operands are supported");
1476  }
1477 
1478  // Calcite may upcast decimals in a way that is
1479  // incompatible with the extension function input. Play it safe and stick with the
1480  // argument type instead.
1481  const SQLTypeInfo ret_ti = args[0]->get_type_info().is_decimal()
1482  ? args[0]->get_type_info()
1483  : rex_function->getType();
1484 
1485  return makeExpr<Analyzer::FunctionOperWithCustomTypeHandling>(
1486  ret_ti, rex_function->getName(), args);
1487  }
1488  if (rex_function->getName() == "DATETIME_PLUS"sv) {
1489  auto dt_plus = makeExpr<Analyzer::FunctionOper>(rex_function->getType(),
1490  rex_function->getName(),
1491  translateFunctionArgs(rex_function));
1492  const auto date_trunc = rewrite_to_date_trunc(dt_plus.get());
1493  if (date_trunc) {
1494  return date_trunc;
1495  }
1496  return translateDateadd(rex_function);
1497  }
1498  if (rex_function->getName() == "/INT"sv) {
1499  CHECK_EQ(size_t(2), rex_function->size());
1500  std::shared_ptr<Analyzer::Expr> lhs = translateScalarRex(rex_function->getOperand(0));
1501  std::shared_ptr<Analyzer::Expr> rhs = translateScalarRex(rex_function->getOperand(1));
1502  const auto rhs_lit = std::dynamic_pointer_cast<Analyzer::Constant>(rhs);
1503  return Parser::OperExpr::normalize(kDIVIDE, kONE, lhs, rhs);
1504  }
1505  if (rex_function->getName() == "Reinterpret"sv) {
1506  CHECK_EQ(size_t(1), rex_function->size());
1507  return translateScalarRex(rex_function->getOperand(0));
1508  }
1509  if (func_resolve(rex_function->getName(),
1510  "ST_X"sv,
1511  "ST_Y"sv,
1512  "ST_XMin"sv,
1513  "ST_YMin"sv,
1514  "ST_XMax"sv,
1515  "ST_YMax"sv,
1516  "ST_NRings"sv,
1517  "ST_NPoints"sv,
1518  "ST_Length"sv,
1519  "ST_Perimeter"sv,
1520  "ST_Area"sv,
1521  "ST_SRID"sv,
1522  "MapD_GeoPolyBoundsPtr"sv /* deprecated */,
1523  "MapD_GeoPolyBoundsPtr"sv /* deprecated */,
1524  "OmniSci_Geo_PolyBoundsPtr"sv,
1525  "OmniSci_Geo_PolyRenderGroup"sv)) {
1526  CHECK_EQ(rex_function->size(), size_t(1));
1527  return translateUnaryGeoFunction(rex_function);
1528  }
1529  if (func_resolve(rex_function->getName(),
1530  "convert_meters_to_pixel_width"sv,
1531  "convert_meters_to_pixel_height"sv,
1532  "is_point_in_view"sv,
1533  "is_point_size_in_view"sv)) {
1534  return translateFunctionWithGeoArg(rex_function);
1535  }
1536  if (func_resolve(rex_function->getName(),
1537  "ST_Distance"sv,
1538  "ST_MaxDistance"sv,
1539  "ST_Intersects"sv,
1540  "ST_Disjoint"sv,
1541  "ST_Contains"sv,
1542  "ST_Overlaps"sv,
1543  "ST_Within"sv)) {
1544  CHECK_EQ(rex_function->size(), size_t(2));
1545  return translateBinaryGeoFunction(rex_function);
1546  }
1547  if (func_resolve(rex_function->getName(), "ST_DWithin"sv, "ST_DFullyWithin"sv)) {
1548  CHECK_EQ(rex_function->size(), size_t(3));
1549  return translateTernaryGeoFunction(rex_function);
1550  }
1551  if (rex_function->getName() == "OFFSET_IN_FRAGMENT"sv) {
1552  CHECK_EQ(size_t(0), rex_function->size());
1553  return translateOffsetInFragment();
1554  }
1555  if (rex_function->getName() == "ARRAY"sv) {
1556  // Var args; currently no check. Possible fix-me -- can array have 0 elements?
1557  return translateArrayFunction(rex_function);
1558  }
1559  if (func_resolve(rex_function->getName(),
1560  "ST_GeomFromText"sv,
1561  "ST_GeogFromText"sv,
1562  "ST_Point"sv,
1563  "ST_Centroid"sv,
1564  "ST_SetSRID"sv)) {
1565  SQLTypeInfo ti;
1566  return translateGeoProjection(rex_function, ti, false);
1567  }
1568  if (func_resolve(rex_function->getName(),
1569  "ST_Intersection"sv,
1570  "ST_Difference"sv,
1571  "ST_Union"sv,
1572  "ST_Buffer"sv)) {
1573  SQLTypeInfo ti;
1574  return translateGeoBinaryConstructor(rex_function, ti, false);
1575  }
1576  if (func_resolve(rex_function->getName(), "ST_IsEmpty"sv, "ST_IsValid"sv)) {
1577  SQLTypeInfo ti;
1578  return translateGeoPredicate(rex_function, ti, false);
1579  }
1580 
1581  auto arg_expr_list = translateFunctionArgs(rex_function);
1582  if (rex_function->getName() == std::string("||") ||
1583  rex_function->getName() == std::string("SUBSTRING")) {
1584  SQLTypeInfo ret_ti(kTEXT, false);
1585  return makeExpr<Analyzer::FunctionOper>(
1586  ret_ti, rex_function->getName(), arg_expr_list);
1587  }
1588  // Reset possibly wrong return type of rex_function to the return
1589  // type of the optimal valid implementation. The return type can be
1590  // wrong in the case of multiple implementations of UDF functions
1591  // that have different return types but Calcite specifies the return
1592  // type according to the first implementation.
1593  auto ext_func_sig = bind_function(rex_function->getName(), arg_expr_list);
1594  auto ext_func_args = ext_func_sig.getArgs();
1595  CHECK_EQ(arg_expr_list.size(), ext_func_args.size());
1596  for (size_t i = 0; i < arg_expr_list.size(); i++) {
1597  // fold casts on constants
1598  if (auto constant = std::dynamic_pointer_cast<Analyzer::Constant>(arg_expr_list[i])) {
1599  auto ext_func_arg_ti = ext_arg_type_to_type_info(ext_func_args[i]);
1600  if (ext_func_arg_ti != arg_expr_list[i]->get_type_info()) {
1601  arg_expr_list[i] = constant->add_cast(ext_func_arg_ti);
1602  }
1603  }
1604  }
1605  auto ret_ti = ext_arg_type_to_type_info(ext_func_sig.getRet());
1606  // By defualt, the extension function type will not allow nulls. If one of the arguments
1607  // is nullable, the extension function must also explicitly allow nulls.
1608  bool arguments_not_null = true;
1609  for (const auto& arg_expr : arg_expr_list) {
1610  if (!arg_expr->get_type_info().get_notnull()) {
1611  arguments_not_null = false;
1612  break;
1613  }
1614  }
1615  ret_ti.set_notnull(arguments_not_null);
1616 
1617  return makeExpr<Analyzer::FunctionOper>(ret_ti, rex_function->getName(), arg_expr_list);
1618 }
1619 
1620 namespace {
1621 
1622 std::vector<Analyzer::OrderEntry> translate_collation(
1623  const std::vector<SortField>& sort_fields) {
1624  std::vector<Analyzer::OrderEntry> collation;
1625  for (size_t i = 0; i < sort_fields.size(); ++i) {
1626  const auto& sort_field = sort_fields[i];
1627  collation.emplace_back(i,
1628  sort_field.getSortDir() == SortDirection::Descending,
1629  sort_field.getNullsPosition() == NullSortedPosition::First);
1630  }
1631  return collation;
1632 }
1633 
1635  const RexWindowFunctionOperator::RexWindowBound& window_bound) {
1636  return window_bound.unbounded && window_bound.preceding && !window_bound.following &&
1637  !window_bound.is_current_row && !window_bound.offset &&
1638  window_bound.order_key == 0;
1639 }
1640 
1641 bool supported_upper_bound(const RexWindowFunctionOperator* rex_window_function) {
1642  const auto& window_bound = rex_window_function->getUpperBound();
1643  const bool to_current_row = !window_bound.unbounded && !window_bound.preceding &&
1644  !window_bound.following && window_bound.is_current_row &&
1645  !window_bound.offset && window_bound.order_key == 1;
1646  switch (rex_window_function->getKind()) {
1651  return to_current_row;
1652  }
1653  default: {
1654  return rex_window_function->getOrderKeys().empty()
1655  ? (window_bound.unbounded && !window_bound.preceding &&
1656  window_bound.following && !window_bound.is_current_row &&
1657  !window_bound.offset && window_bound.order_key == 2)
1658  : to_current_row;
1659  }
1660  }
1661 }
1662 
1663 } // namespace
1664 
1665 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateWindowFunction(
1666  const RexWindowFunctionOperator* rex_window_function) const {
1667  if (!supported_lower_bound(rex_window_function->getLowerBound()) ||
1668  !supported_upper_bound(rex_window_function) ||
1669  ((rex_window_function->getKind() == SqlWindowFunctionKind::ROW_NUMBER) !=
1670  rex_window_function->isRows())) {
1671  throw std::runtime_error("Frame specification not supported");
1672  }
1673  std::vector<std::shared_ptr<Analyzer::Expr>> args;
1674  for (size_t i = 0; i < rex_window_function->size(); ++i) {
1675  args.push_back(translateScalarRex(rex_window_function->getOperand(i)));
1676  }
1677  std::vector<std::shared_ptr<Analyzer::Expr>> partition_keys;
1678  for (const auto& partition_key : rex_window_function->getPartitionKeys()) {
1679  partition_keys.push_back(translateScalarRex(partition_key.get()));
1680  }
1681  std::vector<std::shared_ptr<Analyzer::Expr>> order_keys;
1682  for (const auto& order_key : rex_window_function->getOrderKeys()) {
1683  order_keys.push_back(translateScalarRex(order_key.get()));
1684  }
1685  auto ti = rex_window_function->getType();
1686  if (window_function_is_value(rex_window_function->getKind())) {
1687  CHECK_GE(args.size(), 1u);
1688  ti = args.front()->get_type_info();
1689  }
1690  return makeExpr<Analyzer::WindowFunction>(
1691  ti,
1692  rex_window_function->getKind(),
1693  args,
1694  partition_keys,
1695  order_keys,
1696  translate_collation(rex_window_function->getCollation()));
1697 }
1698 
1700  const RexFunctionOperator* rex_function) const {
1701  std::vector<std::shared_ptr<Analyzer::Expr>> args;
1702  for (size_t i = 0; i < rex_function->size(); ++i) {
1703  args.push_back(translateScalarRex(rex_function->getOperand(i)));
1704  }
1705  return args;
1706 }
1707 
1709  const std::shared_ptr<Analyzer::Expr> qual_expr) {
1710  CHECK(qual_expr);
1711  auto bin_oper = std::dynamic_pointer_cast<const Analyzer::BinOper>(qual_expr);
1712  if (!bin_oper) {
1713  const auto rewritten_qual_expr = rewrite_expr(qual_expr.get());
1714  return {{}, {rewritten_qual_expr ? rewritten_qual_expr : qual_expr}};
1715  }
1716 
1717  if (bin_oper->get_optype() == kAND) {
1718  const auto lhs_cf = qual_to_conjunctive_form(bin_oper->get_own_left_operand());
1719  const auto rhs_cf = qual_to_conjunctive_form(bin_oper->get_own_right_operand());
1720  auto simple_quals = lhs_cf.simple_quals;
1721  simple_quals.insert(
1722  simple_quals.end(), rhs_cf.simple_quals.begin(), rhs_cf.simple_quals.end());
1723  auto quals = lhs_cf.quals;
1724  quals.insert(quals.end(), rhs_cf.quals.begin(), rhs_cf.quals.end());
1725  return {simple_quals, quals};
1726  }
1727  int rte_idx{0};
1728  const auto simple_qual = bin_oper->normalize_simple_predicate(rte_idx);
1729  return simple_qual ? QualsConjunctiveForm{{simple_qual}, {}}
1730  : QualsConjunctiveForm{{}, {qual_expr}};
1731 }
1732 
1733 std::vector<std::shared_ptr<Analyzer::Expr>> qual_to_disjunctive_form(
1734  const std::shared_ptr<Analyzer::Expr>& qual_expr) {
1735  CHECK(qual_expr);
1736  const auto bin_oper = std::dynamic_pointer_cast<const Analyzer::BinOper>(qual_expr);
1737  if (!bin_oper) {
1738  const auto rewritten_qual_expr = rewrite_expr(qual_expr.get());
1739  return {rewritten_qual_expr ? rewritten_qual_expr : qual_expr};
1740  }
1741  if (bin_oper->get_optype() == kOR) {
1742  const auto lhs_df = qual_to_disjunctive_form(bin_oper->get_own_left_operand());
1743  const auto rhs_df = qual_to_disjunctive_form(bin_oper->get_own_right_operand());
1744  auto quals = lhs_df;
1745  quals.insert(quals.end(), rhs_df.begin(), rhs_df.end());
1746  return quals;
1747  }
1748  return {qual_expr};
1749 }
1750 
1751 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateHPTLiteral(
1752  const RexFunctionOperator* rex_function) const {
1753  /* since calcite uses Avatica package called DateTimeUtils to parse timestamp strings.
1754  Therefore any string having fractional seconds more 3 places after the decimal
1755  (milliseconds) will get truncated to 3 decimal places, therefore we lose precision
1756  (us|ns). Issue: [BE-2461] Here we are hijacking literal cast to Timestamp(6|9) from
1757  calcite and translating them to generate our own casts.
1758  */
1759  CHECK_EQ(size_t(1), rex_function->size());
1760  const auto operand = translateScalarRex(rex_function->getOperand(0));
1761  const auto& operand_ti = operand->get_type_info();
1762  const auto& target_ti = rex_function->getType();
1763  if (!operand_ti.is_string()) {
1764  throw std::runtime_error(
1765  "High precision timestamp cast argument must be a string. Input type is: " +
1766  operand_ti.get_type_name());
1767  } else if (!target_ti.is_high_precision_timestamp()) {
1768  throw std::runtime_error(
1769  "Cast target type should be high precision timestamp. Input type is: " +
1770  target_ti.get_type_name());
1771  } else if (target_ti.get_dimension() != 6 && target_ti.get_dimension() != 9) {
1772  throw std::runtime_error(
1773  "Cast target type should be TIMESTAMP(6|9). Input type is: TIMESTAMP(" +
1774  std::to_string(target_ti.get_dimension()) + ")");
1775  } else {
1776  return operand->add_cast(target_ti);
1777  }
1778 }
std::shared_ptr< Analyzer::Expr > translateWindowFunction(const RexWindowFunctionOperator *) const
int32_t getIdOfString(const std::string &str) const
Definition: sqldefs.h:69
SQLOps getOperator() const
SQLAgg
Definition: sqldefs.h:71
size_t size() const
#define CHECK_EQ(x, y)
Definition: Logger.h:205
const std::vector< SortField > & getCollation() const
auto func_resolve
const ConstRexScalarPtrVector & getPartitionKeys() const
std::shared_ptr< Analyzer::Expr > translateDatediff(const RexFunctionOperator *) const
static std::shared_ptr< Analyzer::Expr > get(const std::string &)
Definition: ParserNode.cpp:192
bool is_time() const
Definition: sqltypes.h:422
bool g_enable_watchdog
Definition: Execute.cpp:74
bool is_boolean() const
Definition: sqltypes.h:423
Definition: sqltypes.h:51
SqlWindowFunctionKind getKind() const
bool supported_lower_bound(const RexWindowFunctionOperator::RexWindowBound &window_bound)
SQLTypes
Definition: sqltypes.h:40
std::shared_ptr< Analyzer::Expr > getInIntegerSetExpr(std::shared_ptr< Analyzer::Expr > arg, const ResultSet &val_set) const
const RexWindowBound & getLowerBound() const
SQLQualifier
Definition: sqldefs.h:69
static std::shared_ptr< Analyzer::Expr > analyzeValue(const int64_t intval)
Definition: ParserNode.cpp:117
std::shared_ptr< Analyzer::Expr > translateCardinality(const RexFunctionOperator *) const
SQLTypeInfo get_nullable_logical_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:840
#define LOG(tag)
Definition: Logger.h:188
bool boolval
Definition: sqltypes.h:134
SQLOps
Definition: sqldefs.h:29
ExtensionFunction bind_function(std::string name, Analyzer::ExpressionPtrVector func_args, const std::vector< ExtensionFunction > &ext_funcs)
std::shared_ptr< Analyzer::Expr > translateKeyForString(const RexFunctionOperator *) const
const ConstRexScalarPtrVector & getOrderKeys() const
size_t getOperand(size_t idx) const
std::shared_ptr< Analyzer::Expr > translateAbs(const RexFunctionOperator *) const
size_t branchCount() const
Definition: sqldefs.h:38
std::shared_ptr< Analyzer::Expr > translateItem(const RexFunctionOperator *) const
#define CHECK_GE(x, y)
Definition: Logger.h:210
Definition: sqldefs.h:49
Definition: sqldefs.h:30
std::shared_ptr< Analyzer::Expr > translateSign(const RexFunctionOperator *) const
HOST DEVICE int get_size() const
Definition: sqltypes.h:269
std::shared_ptr< Analyzer::Expr > ExpressionPtr
Definition: Analyzer.h:181
const std::shared_ptr< Analyzer::Expr > generate() const
Definition: sqldefs.h:41
std::shared_ptr< Analyzer::Expr > translateOper(const RexOperator *) const
SQLTypeInfo get_agg_type(const SQLAgg agg_kind, const Analyzer::Expr *arg_expr)
std::shared_ptr< Analyzer::Expr > translateLength(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Constant > makeNumericConstant(const SQLTypeInfo &ti, const long val)
Analyzer::ExpressionPtr rewrite_expr(const Analyzer::Expr *expr)
std::shared_ptr< Analyzer::Expr > translateUnlikely(const RexFunctionOperator *) const
QualsConjunctiveForm qual_to_conjunctive_form(const std::shared_ptr< Analyzer::Expr > qual_expr)
#define CHECK_GT(x, y)
Definition: Logger.h:209
bool is_agg_supported_for_type(const SQLAgg &agg_kind, const SQLTypeInfo &arg_ti)
bool is_decimal() const
Definition: sqltypes.h:419
double inline_fp_null_val(const SQL_TYPE_INFO &ti)
Analyzer::ExpressionPtrVector translateFunctionArgs(const RexFunctionOperator *) const
HOST DEVICE int get_scale() const
Definition: sqltypes.h:264
std::shared_ptr< Analyzer::Expr > translateOverlapsOper(const RexOperator *) const
std::string to_string(char const *&&v)
bool g_enable_experimental_string_functions
std::shared_ptr< Analyzer::Expr > translateDatePlusMinus(const RexOperator *) const
static std::shared_ptr< Analyzer::Expr > normalize(const std::list< std::pair< std::shared_ptr< Analyzer::Expr >, std::shared_ptr< Analyzer::Expr >>> &, const std::shared_ptr< Analyzer::Expr >)
Definition: ParserNode.cpp:922
const RexScalar * getWhen(const size_t idx) const
std::shared_ptr< Analyzer::Expr > translateOffsetInFragment() const
Definition: sqldefs.h:73
const RexWindowBound & getUpperBound() const
ExtractField to_datepart_field(const std::string &field)
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:31
std::shared_ptr< Analyzer::Expr > translateLower(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Expr > translateDateadd(const RexFunctionOperator *) const
static std::shared_ptr< Analyzer::Expr > translateLiteral(const RexLiteral *)
bool window_function_is_value(const SqlWindowFunctionKind kind)
Definition: WindowContext.h:27
static constexpr int32_t INVALID_STR_ID
const RexScalar * getThen(const size_t idx) const
std::shared_ptr< Analyzer::Expr > translateScalarSubquery(const RexSubQuery *) const
const std::vector< ExtArgumentType > & getArgs() const
const RelAlgNode * getSourceNode() const
std::string getString(int32_t string_id) const
#define CHECK_NE(x, y)
Definition: Logger.h:206
std::shared_ptr< Analyzer::Expr > translateUoper(const RexOperator *) const
static std::shared_ptr< Analyzer::Expr > get(std::shared_ptr< Analyzer::Expr > arg_expr, std::shared_ptr< Analyzer::Expr > pattern_expr, std::shared_ptr< Analyzer::Expr > escape_expr, const bool is_not)
Definition: ParserNode.cpp:642
std::shared_ptr< Analyzer::Expr > translateScalarRex(const RexScalar *rex) const
void set_scale(int s)
Definition: sqltypes.h:354
int64_t bigintval
Definition: sqltypes.h:138
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:260
std::shared_ptr< const RexScalar > offset
std::vector< Analyzer::OrderEntry > translate_collation(const std::vector< SortField > &sort_fields)
RexSubQuery(const std::shared_ptr< const RelAlgNode > ra)
Definition: sqldefs.h:37
Definition: sqldefs.h:75
Definition: sqldefs.h:69
int16_t smallintval
Definition: sqltypes.h:136
SQLTypeInfo build_type_info(const SQLTypes sql_type, const int scale, const int precision)
static std::shared_ptr< Analyzer::Expr > analyzeValue(const std::string &)
Definition: ParserNode.cpp:102
DatetruncField to_datediff_field(const std::string &field)
std::shared_ptr< Analyzer::Expr > translateFunction(const RexFunctionOperator *) const
void translate_string_ids(std::vector< int32_t > &dest_ids, const LeafHostInfo &dict_server_host, const DictRef dest_dict_ref, const std::vector< int32_t > &source_ids, const DictRef source_dict_ref, const int32_t dest_generation)
std::shared_ptr< Analyzer::Expr > translateArrayFunction(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Expr > translateLike(const RexFunctionOperator *) const
const RexScalar * getOperand(const size_t idx) const
#define UNLIKELY(x)
Definition: likely.h:20
static std::shared_ptr< Analyzer::Expr > normalize(const SQLOps optype, const SQLQualifier qual, std::shared_ptr< Analyzer::Expr > left_expr, std::shared_ptr< Analyzer::Expr > right_expr)
Definition: ParserNode.cpp:272
std::pair< std::shared_ptr< Analyzer::Expr >, SQLQualifier > get_quantified_rhs(const RexScalar *rex_scalar, const RelAlgTranslator &translator)
Definition: sqldefs.h:34
#define CHECK_LT(x, y)
Definition: Logger.h:207
Definition: sqltypes.h:54
Definition: sqltypes.h:55
Definition: sqldefs.h:40
Definition: sqldefs.h:69
const std::vector< TargetMetaInfo > & getOutputMetainfo() const
#define TRANSIENT_DICT_ID
Definition: sqltypes.h:198
std::shared_ptr< Analyzer::Expr > translateExtract(const RexFunctionOperator *) const
#define CHECK_LE(x, y)
Definition: Logger.h:208
unsigned getIndex() const
bool is_null(const T &v, const SQLTypeInfo &t)
static std::shared_ptr< Analyzer::Expr > get(std::shared_ptr< Analyzer::Expr > arg_expr, std::shared_ptr< Analyzer::Expr > like_expr, std::shared_ptr< Analyzer::Expr > escape_expr, const bool is_ilike, const bool is_not)
Definition: ParserNode.cpp:547
SQLAgg getKind() const
std::shared_ptr< Analyzer::Expr > translateLikely(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Expr > translateSampleRatio(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Expr > translateRegexp(const RexFunctionOperator *) const
Definition: sqltypes.h:43
bool supported_upper_bound(const RexWindowFunctionOperator *rex_window_function)
bool takes_arg(const TargetInfo &target_info)
static std::shared_ptr< Analyzer::Expr > analyzeValue(const int64_t numericval, const int scale, const int precision)
Definition: ParserNode.cpp:142
std::shared_ptr< Analyzer::Expr > translateHPTLiteral(const RexFunctionOperator *) const
Definition: sqldefs.h:53
std::shared_ptr< Analyzer::Expr > translateInOper(const RexOperator *) const
std::shared_ptr< Analyzer::Expr > get_in_values_expr(std::shared_ptr< Analyzer::Expr > arg, const ResultSet &val_set)
static std::shared_ptr< Analyzer::Expr > get(const int64_t)
Definition: ParserNode.cpp:178
bool can_use_parallel_algorithms(const ResultSet &rows)
Definition: ResultSet.cpp:1003
void set_notnull(bool n)
Definition: sqltypes.h:356
#define CHECK(condition)
Definition: Logger.h:197
std::vector< std::shared_ptr< Analyzer::Expr > > qual_to_disjunctive_form(const std::shared_ptr< Analyzer::Expr > &qual_expr)
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:259
std::shared_ptr< Analyzer::Constant > make_fp_constant(const int64_t val, const SQLTypeInfo &ti)
std::pair< Datum, bool > datum_from_scalar_tv(const ScalarTargetValue *scalar_tv, const SQLTypeInfo &ti) noexcept
uint64_t exp_to_scale(const unsigned exp)
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
std::shared_ptr< Analyzer::Expr > translateCase(const RexCase *) const
std::vector< ExpressionPtr > ExpressionPtrVector
Definition: Analyzer.h:183
bool g_cluster
const SQLTypeInfo & getType() const
Definition: sqldefs.h:33
Definition: sqltypes.h:47
std::shared_ptr< Analyzer::Expr > translateCurrentUser(const RexFunctionOperator *) const
std::string get_datetimeplus_rewrite_funcname(const SQLOps &op)
void validate_datetime_datepart_argument(const std::shared_ptr< Analyzer::Constant > literal_expr)
const std::string & getName() const
Definition: sqldefs.h:74
int cpu_threads()
Definition: thread_count.h:24
void fill_dictionary_encoded_in_vals(std::vector< int64_t > &in_vals, std::atomic< size_t > &total_in_vals_count, const ResultSet *values_rowset, const std::pair< int64_t, int64_t > values_rowset_slice, const std::vector< LeafHostInfo > &leaf_hosts, const DictRef source_dict_ref, const DictRef dest_dict_ref, const int32_t dest_generation, const int64_t needle_null_val)
const std::list< std::shared_ptr< Analyzer::Expr > > simple_quals
std::shared_ptr< Analyzer::Expr > translateNow() const
Definition: sqldefs.h:72
std::shared_ptr< Analyzer::Expr > translateDatetime(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Expr > translateInput(const RexInput *) const
bool is_distinct(const size_t input_idx, const RelAlgNode *node)
Definition: sqldefs.h:39
std::shared_ptr< Analyzer::Expr > rewrite_to_date_trunc(const Analyzer::FunctionOper *dt_plus)
SQLTypeInfo ext_arg_type_to_type_info(const ExtArgumentType ext_arg_type)
std::shared_ptr< Analyzer::Expr > translateDatepart(const RexFunctionOperator *) const
size_t size() const
bool is_number() const
Definition: sqltypes.h:421
DateaddField to_dateadd_field(const std::string &field)
std::shared_ptr< Analyzer::Expr > fold_expr(const Analyzer::Expr *expr)
void set_precision(int d)
Definition: sqltypes.h:352
#define IS_COMPARISON(X)
Definition: sqldefs.h:57
double doubleval
Definition: sqltypes.h:140
void fill_integer_in_vals(std::vector< int64_t > &in_vals, std::atomic< size_t > &total_in_vals_count, const ResultSet *values_rowset, const std::pair< int64_t, int64_t > values_rowset_slice)
static std::shared_ptr< Analyzer::Expr > translateAggregateRex(const RexAgg *rex, const std::vector< std::shared_ptr< Analyzer::Expr >> &scalar_sources)
const std::shared_ptr< Analyzer::Expr > generate() const
boost::variant< int64_t, double, float, NullableString > ScalarTargetValue
Definition: TargetValue.h:156
bool isDistinct() const
const RexScalar * getElse() const