OmniSciDB  2c44a3935d
RelAlgTranslator.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "RelAlgTranslator.h"
18 #include "Shared/SqlTypesLayout.h"
19 
21 #include "DateTimePlusRewrite.h"
22 #include "DateTimeTranslator.h"
24 #include "ExpressionRewrite.h"
27 #include "RelAlgDagBuilder.h"
28 #include "WindowContext.h"
29 
30 #include <future>
31 
32 #include "../Analyzer/Analyzer.h"
33 #include "../Parser/ParserNode.h"
34 #include "../Shared/likely.h"
35 #include "../Shared/sql_type_to_string.h"
36 #include "../Shared/thread_count.h"
37 
38 extern bool g_enable_watchdog;
39 
41 
42 namespace {
43 
45  const int scale,
46  const int precision) {
47  SQLTypeInfo ti(sql_type, 0, 0, true);
48  if (ti.is_decimal()) {
49  ti.set_scale(scale);
50  ti.set_precision(precision);
51  }
52  return ti;
53 }
54 
55 std::pair<std::shared_ptr<Analyzer::Expr>, SQLQualifier> get_quantified_rhs(
56  const RexScalar* rex_scalar,
57  const RelAlgTranslator& translator) {
58  std::shared_ptr<Analyzer::Expr> rhs;
59  SQLQualifier sql_qual{kONE};
60  const auto rex_operator = dynamic_cast<const RexOperator*>(rex_scalar);
61  if (!rex_operator) {
62  return std::make_pair(rhs, sql_qual);
63  }
64  const auto rex_function = dynamic_cast<const RexFunctionOperator*>(rex_operator);
65  const auto qual_str = rex_function ? rex_function->getName() : "";
66  if (qual_str == "PG_ANY"sv || qual_str == "PG_ALL"sv) {
67  CHECK_EQ(size_t(1), rex_function->size());
68  rhs = translator.translateScalarRex(rex_function->getOperand(0));
69  sql_qual = (qual_str == "PG_ANY"sv) ? kANY : kALL;
70  }
71  if (!rhs && rex_operator->getOperator() == kCAST) {
72  CHECK_EQ(size_t(1), rex_operator->size());
73  std::tie(rhs, sql_qual) = get_quantified_rhs(rex_operator->getOperand(0), translator);
74  }
75  return std::make_pair(rhs, sql_qual);
76 }
77 
78 std::pair<Datum, bool> datum_from_scalar_tv(const ScalarTargetValue* scalar_tv,
79  const SQLTypeInfo& ti) noexcept {
80  Datum d{0};
81  bool is_null_const{false};
82  switch (ti.get_type()) {
83  case kTINYINT: {
84  const auto ival = boost::get<int64_t>(scalar_tv);
85  CHECK(ival);
86  if (*ival == inline_int_null_val(ti)) {
87  is_null_const = true;
88  } else {
89  d.tinyintval = *ival;
90  }
91  break;
92  }
93  case kSMALLINT: {
94  const auto ival = boost::get<int64_t>(scalar_tv);
95  CHECK(ival);
96  if (*ival == inline_int_null_val(ti)) {
97  is_null_const = true;
98  } else {
99  d.smallintval = *ival;
100  }
101  break;
102  }
103  case kINT: {
104  const auto ival = boost::get<int64_t>(scalar_tv);
105  CHECK(ival);
106  if (*ival == inline_int_null_val(ti)) {
107  is_null_const = true;
108  } else {
109  d.intval = *ival;
110  }
111  break;
112  }
113  case kDECIMAL:
114  case kNUMERIC:
115  case kBIGINT:
116  case kDATE:
117  case kTIME:
118  case kTIMESTAMP: {
119  const auto ival = boost::get<int64_t>(scalar_tv);
120  CHECK(ival);
121  if (*ival == inline_int_null_val(ti)) {
122  is_null_const = true;
123  } else {
124  d.bigintval = *ival;
125  }
126  break;
127  }
128  case kDOUBLE: {
129  const auto dval = boost::get<double>(scalar_tv);
130  CHECK(dval);
131  if (*dval == inline_fp_null_val(ti)) {
132  is_null_const = true;
133  } else {
134  d.doubleval = *dval;
135  }
136  break;
137  }
138  case kFLOAT: {
139  const auto fval = boost::get<float>(scalar_tv);
140  CHECK(fval);
141  if (*fval == inline_fp_null_val(ti)) {
142  is_null_const = true;
143  } else {
144  d.floatval = *fval;
145  }
146  break;
147  }
148  case kTEXT:
149  case kVARCHAR:
150  case kCHAR: {
151  auto nullable_sptr = boost::get<NullableString>(scalar_tv);
152  CHECK(nullable_sptr);
153  if (boost::get<void*>(nullable_sptr)) {
154  is_null_const = true;
155  } else {
156  auto sptr = boost::get<std::string>(nullable_sptr);
157  d.stringval = new std::string(*sptr);
158  }
159  break;
160  }
161  default:
162  CHECK(false);
163  }
164  return {d, is_null_const};
165 }
166 
167 } // namespace
168 
169 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateScalarRex(
170  const RexScalar* rex) const {
171  const auto rex_input = dynamic_cast<const RexInput*>(rex);
172  if (rex_input) {
173  return translateInput(rex_input);
174  }
175  const auto rex_literal = dynamic_cast<const RexLiteral*>(rex);
176  if (rex_literal) {
177  return translateLiteral(rex_literal);
178  }
179  const auto rex_window_function = dynamic_cast<const RexWindowFunctionOperator*>(rex);
180  if (rex_window_function) {
181  return translateWindowFunction(rex_window_function);
182  }
183  const auto rex_function = dynamic_cast<const RexFunctionOperator*>(rex);
184  if (rex_function) {
185  return translateFunction(rex_function);
186  }
187  const auto rex_operator = dynamic_cast<const RexOperator*>(rex);
188  if (rex_operator) {
189  return translateOper(rex_operator);
190  }
191  const auto rex_case = dynamic_cast<const RexCase*>(rex);
192  if (rex_case) {
193  return translateCase(rex_case);
194  }
195  const auto rex_subquery = dynamic_cast<const RexSubQuery*>(rex);
196  if (rex_subquery) {
197  return translateScalarSubquery(rex_subquery);
198  }
199  CHECK(false);
200  return nullptr;
201 }
202 
203 namespace {
204 
205 bool is_agg_supported_for_type(const SQLAgg& agg_kind, const SQLTypeInfo& arg_ti) {
206  if ((agg_kind == kMIN || agg_kind == kMAX || agg_kind == kSUM || agg_kind == kAVG) &&
207  !(arg_ti.is_number() || arg_ti.is_boolean() || arg_ti.is_time())) {
208  return false;
209  }
210 
211  return true;
212 }
213 
214 } // namespace
215 
216 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateAggregateRex(
217  const RexAgg* rex,
218  const std::vector<std::shared_ptr<Analyzer::Expr>>& scalar_sources) {
219  const auto agg_kind = rex->getKind();
220  const bool is_distinct = rex->isDistinct();
221  const bool takes_arg{rex->size() > 0};
222  std::shared_ptr<Analyzer::Expr> arg_expr;
223  std::shared_ptr<Analyzer::Constant> err_rate;
224  if (takes_arg) {
225  const auto operand = rex->getOperand(0);
226  CHECK_LT(operand, scalar_sources.size());
227  CHECK_LE(rex->size(), 2u);
228  arg_expr = scalar_sources[operand];
229  if (agg_kind == kAPPROX_COUNT_DISTINCT && rex->size() == 2) {
230  err_rate = std::dynamic_pointer_cast<Analyzer::Constant>(
231  scalar_sources[rex->getOperand(1)]);
232  if (!err_rate || err_rate->get_type_info().get_type() != kINT ||
233  err_rate->get_constval().intval < 1 || err_rate->get_constval().intval > 100) {
234  throw std::runtime_error(
235  "APPROX_COUNT_DISTINCT's second parameter should be SMALLINT literal between "
236  "1 and 100");
237  }
238  }
239  const auto& arg_ti = arg_expr->get_type_info();
240  if (!is_agg_supported_for_type(agg_kind, arg_ti)) {
241  throw std::runtime_error("Aggregate on " + arg_ti.get_type_name() +
242  " is not supported yet.");
243  }
244  }
245  const auto agg_ti = get_agg_type(agg_kind, arg_expr.get());
246  return makeExpr<Analyzer::AggExpr>(agg_ti, agg_kind, arg_expr, is_distinct, err_rate);
247 }
248 
249 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateLiteral(
250  const RexLiteral* rex_literal) {
251  auto lit_ti = build_type_info(
252  rex_literal->getType(), rex_literal->getScale(), rex_literal->getPrecision());
253  auto target_ti = build_type_info(rex_literal->getTargetType(),
254  rex_literal->getTypeScale(),
255  rex_literal->getTypePrecision());
256  switch (rex_literal->getType()) {
257  case kDECIMAL: {
258  const auto val = rex_literal->getVal<int64_t>();
259  const int precision = rex_literal->getPrecision();
260  const int scale = rex_literal->getScale();
261  if (target_ti.is_fp() && !scale) {
262  return make_fp_constant(val, target_ti);
263  }
264  auto lit_expr = scale ? Parser::FixedPtLiteral::analyzeValue(val, scale, precision)
266  return lit_ti != target_ti ? lit_expr->add_cast(target_ti) : lit_expr;
267  }
268  case kTEXT: {
269  return Parser::StringLiteral::analyzeValue(rex_literal->getVal<std::string>());
270  }
271  case kBOOLEAN: {
272  Datum d;
273  d.boolval = rex_literal->getVal<bool>();
274  return makeExpr<Analyzer::Constant>(kBOOLEAN, false, d);
275  }
276  case kDOUBLE: {
277  Datum d;
278  d.doubleval = rex_literal->getVal<double>();
279  auto lit_expr = makeExpr<Analyzer::Constant>(kDOUBLE, false, d);
280  return lit_ti != target_ti ? lit_expr->add_cast(target_ti) : lit_expr;
281  }
282  case kINTERVAL_DAY_TIME:
283  case kINTERVAL_YEAR_MONTH: {
284  Datum d;
285  d.bigintval = rex_literal->getVal<int64_t>();
286  return makeExpr<Analyzer::Constant>(rex_literal->getType(), false, d);
287  }
288  case kTIME:
289  case kTIMESTAMP: {
290  Datum d;
291  d.bigintval =
292  rex_literal->getType() == kTIMESTAMP && rex_literal->getPrecision() > 0
293  ? rex_literal->getVal<int64_t>()
294  : rex_literal->getVal<int64_t>() / 1000;
295  return makeExpr<Analyzer::Constant>(
296  SQLTypeInfo(rex_literal->getType(), rex_literal->getPrecision(), 0, false),
297  false,
298  d);
299  }
300  case kDATE: {
301  Datum d;
302  d.bigintval = rex_literal->getVal<int64_t>() * 24 * 3600;
303  return makeExpr<Analyzer::Constant>(rex_literal->getType(), false, d);
304  }
305  case kNULLT: {
306  if (target_ti.is_array()) {
308  // defaulting to valid sub-type for convenience
309  target_ti.set_subtype(kBOOLEAN);
310  return makeExpr<Analyzer::ArrayExpr>(target_ti, args, true);
311  }
312  return makeExpr<Analyzer::Constant>(rex_literal->getTargetType(), true, Datum{0});
313  }
314  default: {
315  LOG(FATAL) << "Unexpected literal type " << lit_ti.get_type_name();
316  }
317  }
318  return nullptr;
319 }
320 
321 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateScalarSubquery(
322  const RexSubQuery* rex_subquery) const {
323  if (just_explain_) {
324  throw std::runtime_error("EXPLAIN is not supported with sub-queries");
325  }
326  CHECK(rex_subquery);
327  auto result = rex_subquery->getExecutionResult();
328  auto row_set = result->getRows();
329  const size_t row_count = row_set->rowCount();
330  if (row_count > size_t(1)) {
331  throw std::runtime_error("Scalar sub-query returned multiple rows");
332  }
333  if (row_count == size_t(0)) {
334  throw std::runtime_error("Scalar sub-query returned no results");
335  }
336  CHECK_EQ(row_count, size_t(1));
337  row_set->moveToBegin();
338  auto first_row = row_set->getNextRow(false, false);
339  CHECK_EQ(first_row.size(), size_t(1));
340  auto scalar_tv = boost::get<ScalarTargetValue>(&first_row[0]);
341  auto ti = rex_subquery->getType();
342  if (ti.is_string()) {
343  throw std::runtime_error("Scalar sub-queries which return strings not supported");
344  }
345  Datum d{0};
346  bool is_null_const{false};
347  std::tie(d, is_null_const) = datum_from_scalar_tv(scalar_tv, ti);
348  return makeExpr<Analyzer::Constant>(ti, is_null_const, d);
349 }
350 
351 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateInput(
352  const RexInput* rex_input) const {
353  const auto source = rex_input->getSourceNode();
354  const auto it_rte_idx = input_to_nest_level_.find(source);
355  CHECK(it_rte_idx != input_to_nest_level_.end())
356  << "Not found in input_to_nest_level_, source=" << source->toString();
357  const int rte_idx = it_rte_idx->second;
358  const auto scan_source = dynamic_cast<const RelScan*>(source);
359  const auto& in_metainfo = source->getOutputMetainfo();
360  if (scan_source) {
361  // We're at leaf (scan) level and not supposed to have input metadata,
362  // the name and type information come directly from the catalog.
363  CHECK(in_metainfo.empty());
364  const auto table_desc = scan_source->getTableDescriptor();
365  const auto cd =
366  cat_.getMetadataForColumnBySpi(table_desc->tableId, rex_input->getIndex() + 1);
367  CHECK(cd);
368  auto col_ti = cd->columnType;
369  if (col_ti.is_string()) {
370  col_ti.set_type(kTEXT);
371  }
372  if (cd->isVirtualCol) {
373  // TODO(alex): remove at some point, we only need this fixup for backwards
374  // compatibility with old imported data
375  CHECK_EQ("rowid", cd->columnName);
376  col_ti.set_size(8);
377  }
378  CHECK_LE(static_cast<size_t>(rte_idx), join_types_.size());
379  if (rte_idx > 0 && join_types_[rte_idx - 1] == JoinType::LEFT) {
380  col_ti.set_notnull(false);
381  }
382  return std::make_shared<Analyzer::ColumnVar>(
383  col_ti, table_desc->tableId, cd->columnId, rte_idx);
384  }
385  CHECK(!in_metainfo.empty()) << "for " << source->toString();
386  CHECK_GE(rte_idx, 0);
387  const size_t col_id = rex_input->getIndex();
388  CHECK_LT(col_id, in_metainfo.size());
389  auto col_ti = in_metainfo[col_id].get_type_info();
390  CHECK_LE(static_cast<size_t>(rte_idx), join_types_.size());
391  if (rte_idx > 0 && join_types_[rte_idx - 1] == JoinType::LEFT) {
392  col_ti.set_notnull(false);
393  }
394  return std::make_shared<Analyzer::ColumnVar>(col_ti, -source->getId(), col_id, rte_idx);
395 }
396 
397 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateUoper(
398  const RexOperator* rex_operator) const {
399  CHECK_EQ(size_t(1), rex_operator->size());
400  const auto operand_expr = translateScalarRex(rex_operator->getOperand(0));
401  const auto sql_op = rex_operator->getOperator();
402  switch (sql_op) {
403  case kCAST: {
404  const auto& target_ti = rex_operator->getType();
405  CHECK_NE(kNULLT, target_ti.get_type());
406  const auto& operand_ti = operand_expr->get_type_info();
407  if (operand_ti.is_string() && target_ti.is_string()) {
408  return operand_expr;
409  }
410  if (target_ti.is_time() ||
411  operand_ti
412  .is_string()) { // TODO(alex): check and unify with the rest of the cases
413  // Do not propogate encoding on small dates
414  return target_ti.is_date_in_days()
415  ? operand_expr->add_cast(SQLTypeInfo(kDATE, false))
416  : operand_expr->add_cast(target_ti);
417  }
418  if (!operand_ti.is_string() && target_ti.is_string()) {
419  return operand_expr->add_cast(target_ti);
420  }
421 
422  return std::make_shared<Analyzer::UOper>(target_ti, false, sql_op, operand_expr);
423  }
424  case kNOT:
425  case kISNULL: {
426  return std::make_shared<Analyzer::UOper>(kBOOLEAN, sql_op, operand_expr);
427  }
428  case kISNOTNULL: {
429  auto is_null = std::make_shared<Analyzer::UOper>(kBOOLEAN, kISNULL, operand_expr);
430  return std::make_shared<Analyzer::UOper>(kBOOLEAN, kNOT, is_null);
431  }
432  case kMINUS: {
433  const auto& ti = operand_expr->get_type_info();
434  return std::make_shared<Analyzer::UOper>(ti, false, kUMINUS, operand_expr);
435  }
436  case kUNNEST: {
437  const auto& ti = operand_expr->get_type_info();
438  CHECK(ti.is_array());
439  return makeExpr<Analyzer::UOper>(ti.get_elem_type(), false, kUNNEST, operand_expr);
440  }
441  default:
442  CHECK(false);
443  }
444  return nullptr;
445 }
446 
447 namespace {
448 
449 std::shared_ptr<Analyzer::Expr> get_in_values_expr(std::shared_ptr<Analyzer::Expr> arg,
450  const ResultSet& val_set) {
451  if (!can_use_parallel_algorithms(val_set)) {
452  return nullptr;
453  }
454  if (val_set.rowCount() > 5000000 && g_enable_watchdog) {
455  throw std::runtime_error(
456  "Unable to handle 'expr IN (subquery)', subquery returned 5M+ rows.");
457  }
458  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
459  const size_t fetcher_count = cpu_threads();
460  std::vector<std::list<std::shared_ptr<Analyzer::Expr>>> expr_set(
461  fetcher_count, std::list<std::shared_ptr<Analyzer::Expr>>());
462  std::vector<std::future<void>> fetcher_threads;
463  const auto& ti = arg->get_type_info();
464  const auto entry_count = val_set.entryCount();
465  for (size_t i = 0,
466  start_entry = 0,
467  stride = (entry_count + fetcher_count - 1) / fetcher_count;
468  i < fetcher_count && start_entry < entry_count;
469  ++i, start_entry += stride) {
470  const auto end_entry = std::min(start_entry + stride, entry_count);
471  fetcher_threads.push_back(std::async(
472  std::launch::async,
473  [&](std::list<std::shared_ptr<Analyzer::Expr>>& in_vals,
474  const size_t start,
475  const size_t end) {
476  for (auto index = start; index < end; ++index) {
477  auto row = val_set.getRowAt(index);
478  if (row.empty()) {
479  continue;
480  }
481  auto scalar_tv = boost::get<ScalarTargetValue>(&row[0]);
482  Datum d{0};
483  bool is_null_const{false};
484  std::tie(d, is_null_const) = datum_from_scalar_tv(scalar_tv, ti);
485  if (ti.is_string() && ti.get_compression() != kENCODING_NONE) {
486  auto ti_none_encoded = ti;
487  ti_none_encoded.set_compression(kENCODING_NONE);
488  auto none_encoded_string =
489  makeExpr<Analyzer::Constant>(ti, is_null_const, d);
490  auto dict_encoded_string = std::make_shared<Analyzer::UOper>(
491  ti, false, kCAST, none_encoded_string);
492  in_vals.push_back(dict_encoded_string);
493  } else {
494  in_vals.push_back(makeExpr<Analyzer::Constant>(ti, is_null_const, d));
495  }
496  }
497  },
498  std::ref(expr_set[i]),
499  start_entry,
500  end_entry));
501  }
502  for (auto& child : fetcher_threads) {
503  child.get();
504  }
505 
506  val_set.moveToBegin();
507  for (auto& exprs : expr_set) {
508  value_exprs.splice(value_exprs.end(), exprs);
509  }
510  return makeExpr<Analyzer::InValues>(arg, value_exprs);
511 }
512 
513 } // namespace
514 
515 // Creates an Analyzer expression for an IN subquery which subsequently goes through the
516 // regular Executor::codegen() mechanism. The creation of the expression out of subquery's
517 // result set is parallelized whenever possible. In addition, take advantage of additional
518 // information that elements in the right hand side are constants; see
519 // getInIntegerSetExpr().
520 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateInOper(
521  const RexOperator* rex_operator) const {
522  if (just_explain_) {
523  throw std::runtime_error("EXPLAIN is not supported with sub-queries");
524  }
525  CHECK(rex_operator->size() == 2);
526  const auto lhs = translateScalarRex(rex_operator->getOperand(0));
527  const auto rhs = rex_operator->getOperand(1);
528  const auto rex_subquery = dynamic_cast<const RexSubQuery*>(rhs);
529  CHECK(rex_subquery);
530  auto ti = lhs->get_type_info();
531  auto result = rex_subquery->getExecutionResult();
532  auto& row_set = result->getRows();
533  CHECK_EQ(size_t(1), row_set->colCount());
534  const auto& rhs_ti = row_set->getColType(0);
535  if (rhs_ti.get_type() != ti.get_type()) {
536  throw std::runtime_error(
537  "The two sides of the IN operator must have the same type; found " +
538  ti.get_type_name() + " and " + rhs_ti.get_type_name());
539  }
540  row_set->moveToBegin();
541  if (row_set->entryCount() > 10000) {
542  std::shared_ptr<Analyzer::Expr> expr;
543  if ((ti.is_integer() || (ti.is_string() && ti.get_compression() == kENCODING_DICT)) &&
544  !row_set->getQueryMemDesc().didOutputColumnar()) {
545  expr = getInIntegerSetExpr(lhs, *row_set);
546  // Handle the highly unlikely case when the InIntegerSet ended up being tiny.
547  // Just let it fall through the usual InValues path at the end of this method,
548  // its codegen knows to use inline comparisons for few values.
549  if (expr && std::static_pointer_cast<Analyzer::InIntegerSet>(expr)
550  ->get_value_list()
551  .size() <= 100) {
552  expr = nullptr;
553  }
554  } else {
555  expr = get_in_values_expr(lhs, *row_set);
556  }
557  if (expr) {
558  return expr;
559  }
560  }
561  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
562  while (true) {
563  auto row = row_set->getNextRow(true, false);
564  if (row.empty()) {
565  break;
566  }
567  if (g_enable_watchdog && value_exprs.size() >= 10000) {
568  throw std::runtime_error(
569  "Unable to handle 'expr IN (subquery)', subquery returned 10000+ rows.");
570  }
571  auto scalar_tv = boost::get<ScalarTargetValue>(&row[0]);
572  Datum d{0};
573  bool is_null_const{false};
574  std::tie(d, is_null_const) = datum_from_scalar_tv(scalar_tv, ti);
575  if (ti.is_string() && ti.get_compression() != kENCODING_NONE) {
576  auto ti_none_encoded = ti;
577  ti_none_encoded.set_compression(kENCODING_NONE);
578  auto none_encoded_string = makeExpr<Analyzer::Constant>(ti, is_null_const, d);
579  auto dict_encoded_string =
580  std::make_shared<Analyzer::UOper>(ti, false, kCAST, none_encoded_string);
581  value_exprs.push_back(dict_encoded_string);
582  } else {
583  value_exprs.push_back(makeExpr<Analyzer::Constant>(ti, is_null_const, d));
584  }
585  }
586  return makeExpr<Analyzer::InValues>(lhs, value_exprs);
587 }
588 
589 namespace {
590 
591 const size_t g_max_integer_set_size{1 << 25};
592 
594  std::vector<int64_t>& in_vals,
595  std::atomic<size_t>& total_in_vals_count,
596  const ResultSet* values_rowset,
597  const std::pair<int64_t, int64_t> values_rowset_slice,
598  const StringDictionaryProxy* source_dict,
599  const StringDictionaryProxy* dest_dict,
600  const int64_t needle_null_val) {
601  CHECK(in_vals.empty());
602  bool dicts_are_equal = source_dict == dest_dict;
603  for (auto index = values_rowset_slice.first; index < values_rowset_slice.second;
604  ++index) {
605  const auto row = values_rowset->getOneColRow(index);
606  if (UNLIKELY(!row.valid)) {
607  continue;
608  }
609  if (dicts_are_equal) {
610  in_vals.push_back(row.value);
611  } else {
612  const int string_id =
613  row.value == needle_null_val
614  ? needle_null_val
615  : dest_dict->getIdOfString(source_dict->getString(row.value));
616  if (string_id != StringDictionary::INVALID_STR_ID) {
617  in_vals.push_back(string_id);
618  }
619  }
620  if (UNLIKELY(g_enable_watchdog && (in_vals.size() & 1023) == 0 &&
621  total_in_vals_count.fetch_add(1024) >= g_max_integer_set_size)) {
622  throw std::runtime_error(
623  "Unable to handle 'expr IN (subquery)', subquery returned 30M+ rows.");
624  }
625  }
626 }
627 
628 void fill_integer_in_vals(std::vector<int64_t>& in_vals,
629  std::atomic<size_t>& total_in_vals_count,
630  const ResultSet* values_rowset,
631  const std::pair<int64_t, int64_t> values_rowset_slice) {
632  CHECK(in_vals.empty());
633  for (auto index = values_rowset_slice.first; index < values_rowset_slice.second;
634  ++index) {
635  const auto row = values_rowset->getOneColRow(index);
636  if (row.valid) {
637  in_vals.push_back(row.value);
638  if (UNLIKELY(g_enable_watchdog && (in_vals.size() & 1023) == 0 &&
639  total_in_vals_count.fetch_add(1024) >= g_max_integer_set_size)) {
640  throw std::runtime_error(
641  "Unable to handle 'expr IN (subquery)', subquery returned 30M+ rows.");
642  }
643  }
644  }
645 }
646 
647 // Multi-node counterpart of the other version. Saves round-trips, which is crucial
648 // for a big right-hand side result. It only handles physical string dictionary ids,
649 // therefore it won't be able to handle a right-hand side sub-query with a CASE
650 // returning literals on some branches. That case isn't hard too handle either, but
651 // it's not clear it's actually important in practice.
652 // RelAlgTranslator::getInIntegerSetExpr makes sure, by checking the encodings, that this
653 // function isn't called in such cases.
655  std::vector<int64_t>& in_vals,
656  std::atomic<size_t>& total_in_vals_count,
657  const ResultSet* values_rowset,
658  const std::pair<int64_t, int64_t> values_rowset_slice,
659  const std::vector<LeafHostInfo>& leaf_hosts,
660  const DictRef source_dict_ref,
661  const DictRef dest_dict_ref,
662  const int32_t dest_generation,
663  const int64_t needle_null_val) {
664  CHECK(in_vals.empty());
665  std::vector<int32_t> source_ids;
666  source_ids.reserve(values_rowset->entryCount());
667  bool has_nulls = false;
668  if (source_dict_ref == dest_dict_ref) {
669  in_vals.reserve(values_rowset_slice.second - values_rowset_slice.first +
670  1); // Add 1 to cover interval
671  for (auto index = values_rowset_slice.first; index < values_rowset_slice.second;
672  ++index) {
673  const auto row = values_rowset->getOneColRow(index);
674  if (!row.valid) {
675  continue;
676  }
677  if (row.value != needle_null_val) {
678  in_vals.push_back(row.value);
679  if (UNLIKELY(g_enable_watchdog && (in_vals.size() & 1023) == 0 &&
680  total_in_vals_count.fetch_add(1024) >= g_max_integer_set_size)) {
681  throw std::runtime_error(
682  "Unable to handle 'expr IN (subquery)', subquery returned 30M+ rows.");
683  }
684  } else {
685  has_nulls = true;
686  }
687  }
688  if (has_nulls) {
689  in_vals.push_back(
690  needle_null_val); // we've deduped null values as an optimization, although
691  // this is not required by consumer
692  }
693  return;
694  }
695  // Code path below is for when dictionaries are not shared
696  for (auto index = values_rowset_slice.first; index < values_rowset_slice.second;
697  ++index) {
698  const auto row = values_rowset->getOneColRow(index);
699  if (row.valid) {
700  if (row.value != needle_null_val) {
701  source_ids.push_back(row.value);
702  } else {
703  has_nulls = true;
704  }
705  }
706  }
707  std::vector<int32_t> dest_ids;
708  translate_string_ids(dest_ids,
709  leaf_hosts.front(),
710  dest_dict_ref,
711  source_ids,
712  source_dict_ref,
713  dest_generation);
714  CHECK_EQ(dest_ids.size(), source_ids.size());
715  in_vals.reserve(dest_ids.size() + (has_nulls ? 1 : 0));
716  if (has_nulls) {
717  in_vals.push_back(needle_null_val);
718  }
719  for (const int32_t dest_id : dest_ids) {
720  if (dest_id != StringDictionary::INVALID_STR_ID) {
721  in_vals.push_back(dest_id);
722  if (UNLIKELY(g_enable_watchdog && (in_vals.size() & 1023) == 0 &&
723  total_in_vals_count.fetch_add(1024) >= g_max_integer_set_size)) {
724  throw std::runtime_error(
725  "Unable to handle 'expr IN (subquery)', subquery returned 30M+ rows.");
726  }
727  }
728  }
729 }
730 
731 } // namespace
732 
733 // The typical IN subquery involves either dictionary-encoded strings or integers.
734 // Analyzer::InValues is a very heavy representation of the right hand side of such
735 // a query since we already know the right hand would be a list of Analyzer::Constant
736 // shared pointers. We can avoid the big overhead of each Analyzer::Constant and the
737 // refcounting associated with shared pointers by creating an abbreviated InIntegerSet
738 // representation of the IN expression which takes advantage of the this information.
739 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::getInIntegerSetExpr(
740  std::shared_ptr<Analyzer::Expr> arg,
741  const ResultSet& val_set) const {
742  if (!can_use_parallel_algorithms(val_set)) {
743  return nullptr;
744  }
745  std::vector<int64_t> value_exprs;
746  const size_t fetcher_count = cpu_threads();
747  std::vector<std::vector<int64_t>> expr_set(fetcher_count);
748  std::vector<std::future<void>> fetcher_threads;
749  const auto& arg_type = arg->get_type_info();
750  const auto entry_count = val_set.entryCount();
751  CHECK_EQ(size_t(1), val_set.colCount());
752  const auto& col_type = val_set.getColType(0);
753  if (g_cluster && arg_type.is_string() &&
754  (col_type.get_comp_param() <= 0 || arg_type.get_comp_param() <= 0)) {
755  // Skip this case for now, see comment for fill_dictionary_encoded_in_vals.
756  return nullptr;
757  }
758  std::atomic<size_t> total_in_vals_count{0};
759  for (size_t i = 0,
760  start_entry = 0,
761  stride = (entry_count + fetcher_count - 1) / fetcher_count;
762  i < fetcher_count && start_entry < entry_count;
763  ++i, start_entry += stride) {
764  expr_set[i].reserve(entry_count / fetcher_count);
765  const auto end_entry = std::min(start_entry + stride, entry_count);
766  if (arg_type.is_string()) {
767  CHECK_EQ(kENCODING_DICT, arg_type.get_compression());
768  // const int32_t dest_dict_id = arg_type.get_comp_param();
769  // const int32_t source_dict_id = col_type.get_comp_param();
770  const DictRef dest_dict_ref(arg_type.get_comp_param(), cat_.getDatabaseId());
771  const DictRef source_dict_ref(col_type.get_comp_param(), cat_.getDatabaseId());
772  const auto dd = executor_->getStringDictionaryProxy(
773  arg_type.get_comp_param(), val_set.getRowSetMemOwner(), true);
774  const auto sd = executor_->getStringDictionaryProxy(
775  col_type.get_comp_param(), val_set.getRowSetMemOwner(), true);
776  CHECK(sd);
777  const auto needle_null_val = inline_int_null_val(arg_type);
778  fetcher_threads.push_back(std::async(
779  std::launch::async,
780  [this,
781  &val_set,
782  &total_in_vals_count,
783  sd,
784  dd,
785  source_dict_ref,
786  dest_dict_ref,
787  needle_null_val](
788  std::vector<int64_t>& in_vals, const size_t start, const size_t end) {
789  if (g_cluster) {
790  CHECK_GE(dd->getGeneration(), 0);
792  total_in_vals_count,
793  &val_set,
794  {start, end},
795  cat_.getStringDictionaryHosts(),
796  source_dict_ref,
797  dest_dict_ref,
798  dd->getGeneration(),
799  needle_null_val);
800  } else {
802  total_in_vals_count,
803  &val_set,
804  {start, end},
805  sd,
806  dd,
807  needle_null_val);
808  }
809  },
810  std::ref(expr_set[i]),
811  start_entry,
812  end_entry));
813  } else {
814  CHECK(arg_type.is_integer());
815  fetcher_threads.push_back(std::async(
816  std::launch::async,
817  [&val_set, &total_in_vals_count](
818  std::vector<int64_t>& in_vals, const size_t start, const size_t end) {
819  fill_integer_in_vals(in_vals, total_in_vals_count, &val_set, {start, end});
820  },
821  std::ref(expr_set[i]),
822  start_entry,
823  end_entry));
824  }
825  }
826  for (auto& child : fetcher_threads) {
827  child.get();
828  }
829 
830  val_set.moveToBegin();
831  value_exprs.reserve(entry_count);
832  for (auto& exprs : expr_set) {
833  value_exprs.insert(value_exprs.end(), exprs.begin(), exprs.end());
834  }
835  return makeExpr<Analyzer::InIntegerSet>(
836  arg, value_exprs, arg_type.get_notnull() && col_type.get_notnull());
837 }
838 
839 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateOper(
840  const RexOperator* rex_operator) const {
841  CHECK_GT(rex_operator->size(), size_t(0));
842  if (rex_operator->size() == 1) {
843  return translateUoper(rex_operator);
844  }
845  const auto sql_op = rex_operator->getOperator();
846  if (sql_op == kIN) {
847  return translateInOper(rex_operator);
848  }
849  if (sql_op == kMINUS || sql_op == kPLUS) {
850  auto date_plus_minus = translateDatePlusMinus(rex_operator);
851  if (date_plus_minus) {
852  return date_plus_minus;
853  }
854  }
855  if (sql_op == kOVERLAPS) {
856  return translateOverlapsOper(rex_operator);
857  } else if (IS_COMPARISON(sql_op)) {
858  auto geo_comp = translateGeoComparison(rex_operator);
859  if (geo_comp) {
860  return geo_comp;
861  }
862  }
863  auto lhs = translateScalarRex(rex_operator->getOperand(0));
864  for (size_t i = 1; i < rex_operator->size(); ++i) {
865  std::shared_ptr<Analyzer::Expr> rhs;
866  SQLQualifier sql_qual{kONE};
867  const auto rhs_op = rex_operator->getOperand(i);
868  std::tie(rhs, sql_qual) = get_quantified_rhs(rhs_op, *this);
869  if (!rhs) {
870  rhs = translateScalarRex(rhs_op);
871  }
872  CHECK(rhs);
873  lhs = Parser::OperExpr::normalize(sql_op, sql_qual, lhs, rhs);
874  }
875  return lhs;
876 }
877 
878 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateOverlapsOper(
879  const RexOperator* rex_operator) const {
880  const auto sql_op = rex_operator->getOperator();
881  CHECK(sql_op == kOVERLAPS);
882 
883  const auto lhs = translateScalarRex(rex_operator->getOperand(0));
884  const auto lhs_ti = lhs->get_type_info();
885  if (lhs_ti.is_geometry()) {
886  return translateGeoOverlapsOper(rex_operator);
887  } else {
888  throw std::runtime_error(
889  "Overlaps equivalence is currently only supported for geospatial types");
890  }
891 }
892 
893 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateCase(
894  const RexCase* rex_case) const {
895  std::shared_ptr<Analyzer::Expr> else_expr;
896  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
897  expr_list;
898  for (size_t i = 0; i < rex_case->branchCount(); ++i) {
899  const auto when_expr = translateScalarRex(rex_case->getWhen(i));
900  const auto then_expr = translateScalarRex(rex_case->getThen(i));
901  expr_list.emplace_back(when_expr, then_expr);
902  }
903  if (rex_case->getElse()) {
904  else_expr = translateScalarRex(rex_case->getElse());
905  }
906  return Parser::CaseExpr::normalize(expr_list, else_expr);
907 }
908 
909 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateLike(
910  const RexFunctionOperator* rex_function) const {
911  CHECK(rex_function->size() == 2 || rex_function->size() == 3);
912  const auto arg = translateScalarRex(rex_function->getOperand(0));
913  const auto like = translateScalarRex(rex_function->getOperand(1));
914  if (!std::dynamic_pointer_cast<const Analyzer::Constant>(like)) {
915  throw std::runtime_error("The matching pattern must be a literal.");
916  }
917  const auto escape = (rex_function->size() == 3)
918  ? translateScalarRex(rex_function->getOperand(2))
919  : nullptr;
920  const bool is_ilike = rex_function->getName() == "PG_ILIKE"sv;
921  return Parser::LikeExpr::get(arg, like, escape, is_ilike, false);
922 }
923 
924 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateRegexp(
925  const RexFunctionOperator* rex_function) const {
926  CHECK(rex_function->size() == 2 || rex_function->size() == 3);
927  const auto arg = translateScalarRex(rex_function->getOperand(0));
928  const auto pattern = translateScalarRex(rex_function->getOperand(1));
929  if (!std::dynamic_pointer_cast<const Analyzer::Constant>(pattern)) {
930  throw std::runtime_error("The matching pattern must be a literal.");
931  }
932  const auto escape = (rex_function->size() == 3)
933  ? translateScalarRex(rex_function->getOperand(2))
934  : nullptr;
935  return Parser::RegexpExpr::get(arg, pattern, escape, false);
936 }
937 
938 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateLikely(
939  const RexFunctionOperator* rex_function) const {
940  CHECK(rex_function->size() == 1);
941  const auto arg = translateScalarRex(rex_function->getOperand(0));
942  return makeExpr<Analyzer::LikelihoodExpr>(arg, 0.9375);
943 }
944 
945 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateUnlikely(
946  const RexFunctionOperator* rex_function) const {
947  CHECK(rex_function->size() == 1);
948  const auto arg = translateScalarRex(rex_function->getOperand(0));
949  return makeExpr<Analyzer::LikelihoodExpr>(arg, 0.0625);
950 }
951 
952 namespace {
953 
955  const std::shared_ptr<Analyzer::Constant> literal_expr) {
956  if (!literal_expr || literal_expr->get_is_null()) {
957  throw std::runtime_error("The 'DatePart' argument must be a not 'null' literal.");
958  }
959 }
960 
961 } // namespace
962 
963 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateExtract(
964  const RexFunctionOperator* rex_function) const {
965  CHECK_EQ(size_t(2), rex_function->size());
966  const auto timeunit = translateScalarRex(rex_function->getOperand(0));
967  const auto timeunit_lit = std::dynamic_pointer_cast<Analyzer::Constant>(timeunit);
969  const auto from_expr = translateScalarRex(rex_function->getOperand(1));
970  const bool is_date_trunc = rex_function->getName() == "PG_DATE_TRUNC"sv;
971  if (is_date_trunc) {
972  return DateTruncExpr::generate(from_expr, *timeunit_lit->get_constval().stringval);
973  } else {
974  return ExtractExpr::generate(from_expr, *timeunit_lit->get_constval().stringval);
975  }
976 }
977 
978 namespace {
979 
980 std::shared_ptr<Analyzer::Constant> makeNumericConstant(const SQLTypeInfo& ti,
981  const long val) {
982  CHECK(ti.is_number());
983  Datum datum{0};
984  switch (ti.get_type()) {
985  case kTINYINT: {
986  datum.tinyintval = val;
987  break;
988  }
989  case kSMALLINT: {
990  datum.smallintval = val;
991  break;
992  }
993  case kINT: {
994  datum.intval = val;
995  break;
996  }
997  case kBIGINT: {
998  datum.bigintval = val;
999  break;
1000  }
1001  case kDECIMAL:
1002  case kNUMERIC: {
1003  datum.bigintval = val * exp_to_scale(ti.get_scale());
1004  break;
1005  }
1006  case kFLOAT: {
1007  datum.floatval = val;
1008  break;
1009  }
1010  case kDOUBLE: {
1011  datum.doubleval = val;
1012  break;
1013  }
1014  default:
1015  CHECK(false);
1016  }
1017  return makeExpr<Analyzer::Constant>(ti, false, datum);
1018 }
1019 
1020 } // namespace
1021 
1022 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateDateadd(
1023  const RexFunctionOperator* rex_function) const {
1024  CHECK_EQ(size_t(3), rex_function->size());
1025  const auto timeunit = translateScalarRex(rex_function->getOperand(0));
1026  const auto timeunit_lit = std::dynamic_pointer_cast<Analyzer::Constant>(timeunit);
1028  const auto number_units = translateScalarRex(rex_function->getOperand(1));
1029  const auto number_units_const =
1030  std::dynamic_pointer_cast<Analyzer::Constant>(number_units);
1031  if (number_units_const && number_units_const->get_is_null()) {
1032  throw std::runtime_error("The 'Interval' argument literal must not be 'null'.");
1033  }
1034  auto cast_number_units = number_units->add_cast(SQLTypeInfo(kBIGINT, false));
1035  const auto datetime = translateScalarRex(rex_function->getOperand(2));
1036  const auto& datetime_ti = datetime->get_type_info();
1037  if (datetime_ti.get_type() == kTIME) {
1038  throw std::runtime_error("DateAdd operation not supported for TIME.");
1039  }
1040  const auto& field = to_dateadd_field(*timeunit_lit->get_constval().stringval);
1041  if (!datetime_ti.is_high_precision_timestamp() &&
1043  // Scale the number to get value in seconds
1044  const auto bigint_ti = SQLTypeInfo(kBIGINT, false);
1045  cast_number_units = makeExpr<Analyzer::BinOper>(
1046  bigint_ti.get_type(),
1047  kDIVIDE,
1048  kONE,
1049  cast_number_units,
1050  makeNumericConstant(bigint_ti,
1052  cast_number_units = fold_expr(cast_number_units.get());
1053  }
1054  if (datetime_ti.is_high_precision_timestamp() &&
1057  field, datetime_ti.get_dimension());
1058  if (oper_scale.first) {
1059  // scale number to desired precision
1060  const auto bigint_ti = SQLTypeInfo(kBIGINT, false);
1061  cast_number_units =
1062  makeExpr<Analyzer::BinOper>(bigint_ti.get_type(),
1063  oper_scale.first,
1064  kONE,
1065  cast_number_units,
1066  makeNumericConstant(bigint_ti, oper_scale.second));
1067  cast_number_units = fold_expr(cast_number_units.get());
1068  }
1069  }
1070  return makeExpr<Analyzer::DateaddExpr>(
1071  SQLTypeInfo(kTIMESTAMP, datetime_ti.get_dimension(), 0, false),
1072  to_dateadd_field(*timeunit_lit->get_constval().stringval),
1073  cast_number_units,
1074  datetime);
1075 }
1076 
1077 namespace {
1078 
1080  CHECK(op == kPLUS);
1081  return "DATETIME_PLUS"s;
1082 }
1083 
1084 } // namespace
1085 
1086 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateDatePlusMinus(
1087  const RexOperator* rex_operator) const {
1088  if (rex_operator->size() != 2) {
1089  return nullptr;
1090  }
1091  const auto datetime = translateScalarRex(rex_operator->getOperand(0));
1092  const auto datetime_ti = datetime->get_type_info();
1093  if (!datetime_ti.is_timestamp() && !datetime_ti.is_date()) {
1094  if (datetime_ti.get_type() == kTIME) {
1095  throw std::runtime_error("DateTime addition/subtraction not supported for TIME.");
1096  }
1097  return nullptr;
1098  }
1099  const auto rhs = translateScalarRex(rex_operator->getOperand(1));
1100  const auto rhs_ti = rhs->get_type_info();
1101  if (rhs_ti.get_type() == kTIMESTAMP || rhs_ti.get_type() == kDATE) {
1102  if (datetime_ti.is_high_precision_timestamp() ||
1103  rhs_ti.is_high_precision_timestamp()) {
1104  throw std::runtime_error(
1105  "High Precision timestamps are not supported for TIMESTAMPDIFF operation. Use "
1106  "DATEDIFF.");
1107  }
1108  auto bigint_ti = SQLTypeInfo(kBIGINT, false);
1109  const auto& rex_operator_ti = rex_operator->getType();
1110  const auto datediff_field =
1111  (rex_operator_ti.get_type() == kINTERVAL_DAY_TIME) ? dtSECOND : dtMONTH;
1112  auto result =
1113  makeExpr<Analyzer::DatediffExpr>(bigint_ti, datediff_field, rhs, datetime);
1114  // multiply 1000 to result since expected result should be in millisecond precision.
1115  if (rex_operator_ti.get_type() == kINTERVAL_DAY_TIME) {
1116  return makeExpr<Analyzer::BinOper>(bigint_ti.get_type(),
1117  kMULTIPLY,
1118  kONE,
1119  result,
1120  makeNumericConstant(bigint_ti, 1000));
1121  } else {
1122  return result;
1123  }
1124  }
1125  const auto op = rex_operator->getOperator();
1126  if (op == kPLUS) {
1127  std::vector<std::shared_ptr<Analyzer::Expr>> args = {datetime, rhs};
1128  auto dt_plus = makeExpr<Analyzer::FunctionOper>(
1129  datetime_ti, get_datetimeplus_rewrite_funcname(op), args);
1130  const auto date_trunc = rewrite_to_date_trunc(dt_plus.get());
1131  if (date_trunc) {
1132  return date_trunc;
1133  }
1134  }
1135  const auto interval = fold_expr(rhs.get());
1136  auto interval_ti = interval->get_type_info();
1137  auto bigint_ti = SQLTypeInfo(kBIGINT, false);
1138  const auto interval_lit = std::dynamic_pointer_cast<Analyzer::Constant>(interval);
1139  if (interval_ti.get_type() == kINTERVAL_DAY_TIME) {
1140  std::shared_ptr<Analyzer::Expr> interval_sec;
1141  if (interval_lit) {
1142  interval_sec =
1143  makeNumericConstant(bigint_ti,
1144  (op == kMINUS ? -interval_lit->get_constval().bigintval
1145  : interval_lit->get_constval().bigintval) /
1146  1000);
1147  } else {
1148  interval_sec = makeExpr<Analyzer::BinOper>(bigint_ti.get_type(),
1149  kDIVIDE,
1150  kONE,
1151  interval,
1152  makeNumericConstant(bigint_ti, 1000));
1153  if (op == kMINUS) {
1154  interval_sec =
1155  std::make_shared<Analyzer::UOper>(bigint_ti, false, kUMINUS, interval_sec);
1156  }
1157  }
1158  return makeExpr<Analyzer::DateaddExpr>(datetime_ti, daSECOND, interval_sec, datetime);
1159  }
1160  CHECK(interval_ti.get_type() == kINTERVAL_YEAR_MONTH);
1161  const auto interval_months = op == kMINUS ? std::make_shared<Analyzer::UOper>(
1162  bigint_ti, false, kUMINUS, interval)
1163  : interval;
1164  return makeExpr<Analyzer::DateaddExpr>(datetime_ti, daMONTH, interval_months, datetime);
1165 }
1166 
1167 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateDatediff(
1168  const RexFunctionOperator* rex_function) const {
1169  CHECK_EQ(size_t(3), rex_function->size());
1170  const auto timeunit = translateScalarRex(rex_function->getOperand(0));
1171  const auto timeunit_lit = std::dynamic_pointer_cast<Analyzer::Constant>(timeunit);
1173  const auto start = translateScalarRex(rex_function->getOperand(1));
1174  const auto end = translateScalarRex(rex_function->getOperand(2));
1175  const auto field = to_datediff_field(*timeunit_lit->get_constval().stringval);
1176  return makeExpr<Analyzer::DatediffExpr>(SQLTypeInfo(kBIGINT, false), field, start, end);
1177 }
1178 
1179 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateDatepart(
1180  const RexFunctionOperator* rex_function) const {
1181  CHECK_EQ(size_t(2), rex_function->size());
1182  const auto timeunit = translateScalarRex(rex_function->getOperand(0));
1183  const auto timeunit_lit = std::dynamic_pointer_cast<Analyzer::Constant>(timeunit);
1185  const auto from_expr = translateScalarRex(rex_function->getOperand(1));
1186  return ExtractExpr::generate(
1187  from_expr, to_datepart_field(*timeunit_lit->get_constval().stringval));
1188 }
1189 
1190 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateLength(
1191  const RexFunctionOperator* rex_function) const {
1192  CHECK_EQ(size_t(1), rex_function->size());
1193  const auto str_arg = translateScalarRex(rex_function->getOperand(0));
1194  return makeExpr<Analyzer::CharLengthExpr>(str_arg->decompress(),
1195  rex_function->getName() == "CHAR_LENGTH"sv);
1196 }
1197 
1198 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateKeyForString(
1199  const RexFunctionOperator* rex_function) const {
1200  const auto& args = translateFunctionArgs(rex_function);
1201  CHECK_EQ(size_t(1), args.size());
1202  const auto expr = dynamic_cast<Analyzer::Expr*>(args[0].get());
1203  if (nullptr == expr || !expr->get_type_info().is_string() ||
1204  expr->get_type_info().is_varlen()) {
1205  throw std::runtime_error(rex_function->getName() +
1206  " expects a dictionary encoded text column.");
1207  }
1208  return makeExpr<Analyzer::KeyForStringExpr>(args[0]);
1209 }
1210 
1211 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateCurrentUser(
1212  const RexFunctionOperator* rex_function) const {
1213  std::string user{"SESSIONLESS_USER"};
1214  if (query_state_) {
1215  user = query_state_->getConstSessionInfo()->get_currentUser().userName;
1216  }
1217  return Parser::UserLiteral::get(user);
1218 }
1219 
1220 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateLower(
1221  const RexFunctionOperator* rex_function) const {
1222  const auto& args = translateFunctionArgs(rex_function);
1223  CHECK_EQ(size_t(1), args.size());
1224  CHECK(args[0]);
1225 
1226  if (args[0]->get_type_info().is_dict_encoded_string() ||
1227  dynamic_cast<Analyzer::Constant*>(args[0].get())) {
1228  return makeExpr<Analyzer::LowerExpr>(args[0]);
1229  }
1230 
1231  throw std::runtime_error(rex_function->getName() +
1232  " expects a dictionary encoded text column or a literal.");
1233 }
1234 
1236  const RexFunctionOperator* rex_function) const {
1237  const auto ret_ti = rex_function->getType();
1238  const auto arg = translateScalarRex(rex_function->getOperand(0));
1239  const auto arg_ti = arg->get_type_info();
1240  if (!arg_ti.is_array()) {
1241  throw std::runtime_error(rex_function->getName() + " expects an array expression.");
1242  }
1243  if (arg_ti.get_subtype() == kARRAY) {
1244  throw std::runtime_error(rex_function->getName() +
1245  " expects one-dimension array expression.");
1246  }
1247  const auto array_size = arg_ti.get_size();
1248  const auto array_elem_size = arg_ti.get_elem_type().get_array_context_logical_size();
1249 
1250  if (array_size > 0) {
1251  if (array_elem_size <= 0) {
1252  throw std::runtime_error(rex_function->getName() +
1253  ": unexpected array element type.");
1254  }
1255  // Return cardinality of a fixed length array
1256  return makeNumericConstant(ret_ti, array_size / array_elem_size);
1257  }
1258  // Variable length array cardinality will be calculated at runtime
1259  return makeExpr<Analyzer::CardinalityExpr>(arg);
1260 }
1261 
1262 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateItem(
1263  const RexFunctionOperator* rex_function) const {
1264  CHECK_EQ(size_t(2), rex_function->size());
1265  const auto base = translateScalarRex(rex_function->getOperand(0));
1266  const auto index = translateScalarRex(rex_function->getOperand(1));
1267  return makeExpr<Analyzer::BinOper>(
1268  base->get_type_info().get_elem_type(), false, kARRAY_AT, kONE, base, index);
1269 }
1270 
1271 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateNow() const {
1272  return Parser::TimestampLiteral::get(now_);
1273 }
1274 
1275 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateDatetime(
1276  const RexFunctionOperator* rex_function) const {
1277  CHECK_EQ(size_t(1), rex_function->size());
1278  const auto arg = translateScalarRex(rex_function->getOperand(0));
1279  const auto arg_lit = std::dynamic_pointer_cast<Analyzer::Constant>(arg);
1280  const std::string datetime_err{R"(Only DATETIME('NOW') supported for now.)"};
1281  if (!arg_lit || arg_lit->get_is_null()) {
1282  throw std::runtime_error(datetime_err);
1283  }
1284  CHECK(arg_lit->get_type_info().is_string());
1285  if (*arg_lit->get_constval().stringval != "NOW"sv) {
1286  throw std::runtime_error(datetime_err);
1287  }
1288  return translateNow();
1289 }
1290 
1291 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateAbs(
1292  const RexFunctionOperator* rex_function) const {
1293  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
1294  expr_list;
1295  CHECK_EQ(size_t(1), rex_function->size());
1296  const auto operand = translateScalarRex(rex_function->getOperand(0));
1297  const auto& operand_ti = operand->get_type_info();
1298  CHECK(operand_ti.is_number());
1299  const auto zero = makeNumericConstant(operand_ti, 0);
1300  const auto lt_zero = makeExpr<Analyzer::BinOper>(kBOOLEAN, kLT, kONE, operand, zero);
1301  const auto uminus_operand =
1302  makeExpr<Analyzer::UOper>(operand_ti.get_type(), kUMINUS, operand);
1303  expr_list.emplace_back(lt_zero, uminus_operand);
1304  return makeExpr<Analyzer::CaseExpr>(operand_ti, false, expr_list, operand);
1305 }
1306 
1307 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateSign(
1308  const RexFunctionOperator* rex_function) const {
1309  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
1310  expr_list;
1311  CHECK_EQ(size_t(1), rex_function->size());
1312  const auto operand = translateScalarRex(rex_function->getOperand(0));
1313  const auto& operand_ti = operand->get_type_info();
1314  CHECK(operand_ti.is_number());
1315  const auto zero = makeNumericConstant(operand_ti, 0);
1316  const auto lt_zero = makeExpr<Analyzer::BinOper>(kBOOLEAN, kLT, kONE, operand, zero);
1317  expr_list.emplace_back(lt_zero, makeNumericConstant(operand_ti, -1));
1318  const auto eq_zero = makeExpr<Analyzer::BinOper>(kBOOLEAN, kEQ, kONE, operand, zero);
1319  expr_list.emplace_back(eq_zero, makeNumericConstant(operand_ti, 0));
1320  const auto gt_zero = makeExpr<Analyzer::BinOper>(kBOOLEAN, kGT, kONE, operand, zero);
1321  expr_list.emplace_back(gt_zero, makeNumericConstant(operand_ti, 1));
1322  return makeExpr<Analyzer::CaseExpr>(
1323  operand_ti,
1324  false,
1325  expr_list,
1326  makeExpr<Analyzer::Constant>(operand_ti, true, Datum{0}));
1327 }
1328 
1329 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateOffsetInFragment() const {
1330  return makeExpr<Analyzer::OffsetInFragment>();
1331 }
1332 
1334  const RexFunctionOperator* rex_function) const {
1335  if (rex_function->getType().get_subtype() == kNULLT) {
1336  auto sql_type = rex_function->getType();
1337  CHECK(sql_type.get_type() == kARRAY);
1338 
1339  // FIX-ME: Deal with NULL arrays
1340  auto translated_function_args(translateFunctionArgs(rex_function));
1341  if (translated_function_args.size() > 0) {
1342  const auto first_element_logical_type =
1343  get_nullable_logical_type_info(translated_function_args[0]->get_type_info());
1344 
1345  auto diff_elem_itr =
1346  std::find_if(translated_function_args.begin(),
1347  translated_function_args.end(),
1348  [first_element_logical_type](const auto expr) {
1349  return first_element_logical_type !=
1350  get_nullable_logical_type_info(expr->get_type_info());
1351  });
1352  if (diff_elem_itr != translated_function_args.end()) {
1353  throw std::runtime_error(
1354  "Element " +
1355  std::to_string(diff_elem_itr - translated_function_args.begin()) +
1356  " is not of the same type as other elements of the array. Consider casting "
1357  "to force this condition.\nElement Type: " +
1358  get_nullable_logical_type_info((*diff_elem_itr)->get_type_info())
1359  .to_string() +
1360  "\nArray type: " + first_element_logical_type.to_string());
1361  }
1362 
1363  if (first_element_logical_type.is_string() &&
1364  !first_element_logical_type.is_dict_encoded_string()) {
1365  sql_type.set_subtype(first_element_logical_type.get_type());
1366  sql_type.set_compression(kENCODING_FIXED);
1367  } else if (first_element_logical_type.is_dict_encoded_string()) {
1368  sql_type.set_subtype(first_element_logical_type.get_type());
1369  sql_type.set_comp_param(TRANSIENT_DICT_ID);
1370  } else {
1371  sql_type.set_subtype(first_element_logical_type.get_type());
1372  sql_type.set_scale(first_element_logical_type.get_scale());
1373  sql_type.set_precision(first_element_logical_type.get_precision());
1374  }
1375 
1376  return makeExpr<Analyzer::ArrayExpr>(sql_type, translated_function_args);
1377  } else {
1378  // defaulting to valid sub-type for convenience
1379  sql_type.set_subtype(kBOOLEAN);
1380  return makeExpr<Analyzer::ArrayExpr>(sql_type, translated_function_args);
1381  }
1382  } else {
1383  return makeExpr<Analyzer::ArrayExpr>(rex_function->getType(),
1384  translateFunctionArgs(rex_function));
1385  }
1386 }
1387 
1388 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateFunction(
1389  const RexFunctionOperator* rex_function) const {
1390  if (func_resolve(rex_function->getName(), "LIKE"sv, "PG_ILIKE"sv)) {
1391  return translateLike(rex_function);
1392  }
1393  if (rex_function->getName() == "REGEXP_LIKE"sv) {
1394  return translateRegexp(rex_function);
1395  }
1396  if (rex_function->getName() == "LIKELY"sv) {
1397  return translateLikely(rex_function);
1398  }
1399  if (rex_function->getName() == "UNLIKELY"sv) {
1400  return translateUnlikely(rex_function);
1401  }
1402  if (func_resolve(rex_function->getName(), "PG_EXTRACT"sv, "PG_DATE_TRUNC"sv)) {
1403  return translateExtract(rex_function);
1404  }
1405  if (rex_function->getName() == "DATEADD"sv) {
1406  return translateDateadd(rex_function);
1407  }
1408  if (rex_function->getName() == "DATEDIFF"sv) {
1409  return translateDatediff(rex_function);
1410  }
1411  if (rex_function->getName() == "DATEPART"sv) {
1412  return translateDatepart(rex_function);
1413  }
1414  if (func_resolve(rex_function->getName(), "LENGTH"sv, "CHAR_LENGTH"sv)) {
1415  return translateLength(rex_function);
1416  }
1417  if (rex_function->getName() == "KEY_FOR_STRING"sv) {
1418  return translateKeyForString(rex_function);
1419  }
1420  if (rex_function->getName() == "CURRENT_USER"sv) {
1421  return translateCurrentUser(rex_function);
1422  }
1423  if (g_enable_experimental_string_functions && rex_function->getName() == "LOWER"sv) {
1424  return translateLower(rex_function);
1425  }
1426  if (func_resolve(rex_function->getName(), "CARDINALITY"sv, "ARRAY_LENGTH"sv)) {
1427  return translateCardinality(rex_function);
1428  }
1429  if (rex_function->getName() == "ITEM"sv) {
1430  return translateItem(rex_function);
1431  }
1432  if (rex_function->getName() == "NOW"sv) {
1433  return translateNow();
1434  }
1435  if (rex_function->getName() == "DATETIME"sv) {
1436  return translateDatetime(rex_function);
1437  }
1438  if (func_resolve(rex_function->getName(), "usTIMESTAMP"sv, "nsTIMESTAMP"sv)) {
1439  return translateHPTLiteral(rex_function);
1440  }
1441  if (rex_function->getName() == "ABS"sv) {
1442  return translateAbs(rex_function);
1443  }
1444  if (rex_function->getName() == "SIGN"sv) {
1445  return translateSign(rex_function);
1446  }
1447  if (func_resolve(rex_function->getName(), "CEIL"sv, "FLOOR"sv)) {
1448  return makeExpr<Analyzer::FunctionOperWithCustomTypeHandling>(
1449  rex_function->getType(),
1450  rex_function->getName(),
1451  translateFunctionArgs(rex_function));
1452  } else if (rex_function->getName() == "ROUND"sv) {
1453  std::vector<std::shared_ptr<Analyzer::Expr>> args =
1454  translateFunctionArgs(rex_function);
1455 
1456  if (rex_function->size() == 1) {
1457  // push a 0 constant if 2nd operand is missing.
1458  // this needs to be done as calcite returns
1459  // only the 1st operand without defaulting the 2nd one
1460  // when the user did not specify the 2nd operand.
1461  SQLTypes t = kSMALLINT;
1462  Datum d;
1463  d.smallintval = 0;
1464  args.push_back(makeExpr<Analyzer::Constant>(t, false, d));
1465  }
1466 
1467  // make sure we have only 2 operands
1468  CHECK(args.size() == 2);
1469 
1470  if (!args[0]->get_type_info().is_number()) {
1471  throw std::runtime_error("Only numeric 1st operands are supported");
1472  }
1473 
1474  // the 2nd operand does not need to be a constant
1475  // it can happily reference another integer column
1476  if (!args[1]->get_type_info().is_integer()) {
1477  throw std::runtime_error("Only integer 2nd operands are supported");
1478  }
1479 
1480  // Calcite may upcast decimals in a way that is
1481  // incompatible with the extension function input. Play it safe and stick with the
1482  // argument type instead.
1483  const SQLTypeInfo ret_ti = args[0]->get_type_info().is_decimal()
1484  ? args[0]->get_type_info()
1485  : rex_function->getType();
1486 
1487  return makeExpr<Analyzer::FunctionOperWithCustomTypeHandling>(
1488  ret_ti, rex_function->getName(), args);
1489  }
1490  if (rex_function->getName() == "DATETIME_PLUS"sv) {
1491  auto dt_plus = makeExpr<Analyzer::FunctionOper>(rex_function->getType(),
1492  rex_function->getName(),
1493  translateFunctionArgs(rex_function));
1494  const auto date_trunc = rewrite_to_date_trunc(dt_plus.get());
1495  if (date_trunc) {
1496  return date_trunc;
1497  }
1498  return translateDateadd(rex_function);
1499  }
1500  if (rex_function->getName() == "/INT"sv) {
1501  CHECK_EQ(size_t(2), rex_function->size());
1502  std::shared_ptr<Analyzer::Expr> lhs = translateScalarRex(rex_function->getOperand(0));
1503  std::shared_ptr<Analyzer::Expr> rhs = translateScalarRex(rex_function->getOperand(1));
1504  const auto rhs_lit = std::dynamic_pointer_cast<Analyzer::Constant>(rhs);
1505  return Parser::OperExpr::normalize(kDIVIDE, kONE, lhs, rhs);
1506  }
1507  if (rex_function->getName() == "Reinterpret"sv) {
1508  CHECK_EQ(size_t(1), rex_function->size());
1509  return translateScalarRex(rex_function->getOperand(0));
1510  }
1511  if (func_resolve(rex_function->getName(),
1512  "ST_X"sv,
1513  "ST_Y"sv,
1514  "ST_XMin"sv,
1515  "ST_YMin"sv,
1516  "ST_XMax"sv,
1517  "ST_YMax"sv,
1518  "ST_NRings"sv,
1519  "ST_NPoints"sv,
1520  "ST_Length"sv,
1521  "ST_Perimeter"sv,
1522  "ST_Area"sv,
1523  "ST_SRID"sv,
1524  "MapD_GeoPolyBoundsPtr"sv /* deprecated */,
1525  "MapD_GeoPolyBoundsPtr"sv /* deprecated */,
1526  "OmniSci_Geo_PolyBoundsPtr"sv,
1527  "OmniSci_Geo_PolyRenderGroup"sv)) {
1528  CHECK_EQ(rex_function->size(), size_t(1));
1529  return translateUnaryGeoFunction(rex_function);
1530  }
1531  if (func_resolve(rex_function->getName(),
1532  "convert_meters_to_pixel_width"sv,
1533  "convert_meters_to_pixel_height"sv,
1534  "is_point_in_view"sv,
1535  "is_point_size_in_view"sv)) {
1536  return translateFunctionWithGeoArg(rex_function);
1537  }
1538  if (func_resolve(rex_function->getName(),
1539  "ST_Distance"sv,
1540  "ST_MaxDistance"sv,
1541  "ST_Intersects"sv,
1542  "ST_Disjoint"sv,
1543  "ST_Contains"sv,
1544  "ST_Overlaps"sv,
1545  "ST_Within"sv)) {
1546  CHECK_EQ(rex_function->size(), size_t(2));
1547  return translateBinaryGeoFunction(rex_function);
1548  }
1549  if (func_resolve(rex_function->getName(), "ST_DWithin"sv, "ST_DFullyWithin"sv)) {
1550  CHECK_EQ(rex_function->size(), size_t(3));
1551  return translateTernaryGeoFunction(rex_function);
1552  }
1553  if (rex_function->getName() == "OFFSET_IN_FRAGMENT"sv) {
1554  CHECK_EQ(size_t(0), rex_function->size());
1555  return translateOffsetInFragment();
1556  }
1557  if (rex_function->getName() == "ARRAY"sv) {
1558  // Var args; currently no check. Possible fix-me -- can array have 0 elements?
1559  return translateArrayFunction(rex_function);
1560  }
1561  if (func_resolve(rex_function->getName(),
1562  "ST_GeomFromText"sv,
1563  "ST_GeogFromText"sv,
1564  "ST_Point"sv,
1565  "ST_SetSRID"sv)) {
1566  SQLTypeInfo ti;
1567  return translateGeoProjection(rex_function, ti, false);
1568  }
1569  if (func_resolve(rex_function->getName(),
1570  "ST_Intersection"sv,
1571  "ST_Difference"sv,
1572  "ST_Union"sv,
1573  "ST_Buffer"sv)) {
1574  SQLTypeInfo ti;
1575  return translateGeoBinaryConstructor(rex_function, ti, false);
1576  }
1577  if (func_resolve(rex_function->getName(), "ST_IsEmpty"sv, "ST_IsValid"sv)) {
1578  SQLTypeInfo ti;
1579  return translateGeoPredicate(rex_function, ti, false);
1580  }
1581 
1582  auto arg_expr_list = translateFunctionArgs(rex_function);
1583  if (rex_function->getName() == std::string("||") ||
1584  rex_function->getName() == std::string("SUBSTRING")) {
1585  SQLTypeInfo ret_ti(kTEXT, false);
1586  return makeExpr<Analyzer::FunctionOper>(
1587  ret_ti, rex_function->getName(), arg_expr_list);
1588  }
1589  // Reset possibly wrong return type of rex_function to the return
1590  // type of the optimal valid implementation. The return type can be
1591  // wrong in the case of multiple implementations of UDF functions
1592  // that have different return types but Calcite specifies the return
1593  // type according to the first implementation.
1594  auto ext_func_sig = bind_function(rex_function->getName(), arg_expr_list);
1595  auto ext_func_args = ext_func_sig.getArgs();
1596  CHECK_EQ(arg_expr_list.size(), ext_func_args.size());
1597  for (size_t i = 0; i < arg_expr_list.size(); i++) {
1598  // fold casts on constants
1599  if (auto constant = std::dynamic_pointer_cast<Analyzer::Constant>(arg_expr_list[i])) {
1600  auto ext_func_arg_ti = ext_arg_type_to_type_info(ext_func_args[i]);
1601  if (ext_func_arg_ti != arg_expr_list[i]->get_type_info()) {
1602  arg_expr_list[i] = constant->add_cast(ext_func_arg_ti);
1603  }
1604  }
1605  }
1606  auto ret_ti = ext_arg_type_to_type_info(ext_func_sig.getRet());
1607  // By defualt, the extension function type will not allow nulls. If one of the arguments
1608  // is nullable, the extension function must also explicitly allow nulls.
1609  bool arguments_not_null = true;
1610  for (const auto& arg_expr : arg_expr_list) {
1611  if (!arg_expr->get_type_info().get_notnull()) {
1612  arguments_not_null = false;
1613  break;
1614  }
1615  }
1616  ret_ti.set_notnull(arguments_not_null);
1617 
1618  return makeExpr<Analyzer::FunctionOper>(ret_ti, rex_function->getName(), arg_expr_list);
1619 }
1620 
1621 namespace {
1622 
1623 std::vector<Analyzer::OrderEntry> translate_collation(
1624  const std::vector<SortField>& sort_fields) {
1625  std::vector<Analyzer::OrderEntry> collation;
1626  for (size_t i = 0; i < sort_fields.size(); ++i) {
1627  const auto& sort_field = sort_fields[i];
1628  collation.emplace_back(i,
1629  sort_field.getSortDir() == SortDirection::Descending,
1630  sort_field.getNullsPosition() == NullSortedPosition::First);
1631  }
1632  return collation;
1633 }
1634 
1636  const RexWindowFunctionOperator::RexWindowBound& window_bound) {
1637  return window_bound.unbounded && window_bound.preceding && !window_bound.following &&
1638  !window_bound.is_current_row && !window_bound.offset &&
1639  window_bound.order_key == 0;
1640 }
1641 
1642 bool supported_upper_bound(const RexWindowFunctionOperator* rex_window_function) {
1643  const auto& window_bound = rex_window_function->getUpperBound();
1644  const bool to_current_row = !window_bound.unbounded && !window_bound.preceding &&
1645  !window_bound.following && window_bound.is_current_row &&
1646  !window_bound.offset && window_bound.order_key == 1;
1647  switch (rex_window_function->getKind()) {
1652  return to_current_row;
1653  }
1654  default: {
1655  return rex_window_function->getOrderKeys().empty()
1656  ? (window_bound.unbounded && !window_bound.preceding &&
1657  window_bound.following && !window_bound.is_current_row &&
1658  !window_bound.offset && window_bound.order_key == 2)
1659  : to_current_row;
1660  }
1661  }
1662 }
1663 
1664 } // namespace
1665 
1666 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateWindowFunction(
1667  const RexWindowFunctionOperator* rex_window_function) const {
1668  if (!supported_lower_bound(rex_window_function->getLowerBound()) ||
1669  !supported_upper_bound(rex_window_function) ||
1670  ((rex_window_function->getKind() == SqlWindowFunctionKind::ROW_NUMBER) !=
1671  rex_window_function->isRows())) {
1672  throw std::runtime_error("Frame specification not supported");
1673  }
1674  std::vector<std::shared_ptr<Analyzer::Expr>> args;
1675  for (size_t i = 0; i < rex_window_function->size(); ++i) {
1676  args.push_back(translateScalarRex(rex_window_function->getOperand(i)));
1677  }
1678  std::vector<std::shared_ptr<Analyzer::Expr>> partition_keys;
1679  for (const auto& partition_key : rex_window_function->getPartitionKeys()) {
1680  partition_keys.push_back(translateScalarRex(partition_key.get()));
1681  }
1682  std::vector<std::shared_ptr<Analyzer::Expr>> order_keys;
1683  for (const auto& order_key : rex_window_function->getOrderKeys()) {
1684  order_keys.push_back(translateScalarRex(order_key.get()));
1685  }
1686  auto ti = rex_window_function->getType();
1687  if (window_function_is_value(rex_window_function->getKind())) {
1688  CHECK_GE(args.size(), 1u);
1689  ti = args.front()->get_type_info();
1690  }
1691  return makeExpr<Analyzer::WindowFunction>(
1692  ti,
1693  rex_window_function->getKind(),
1694  args,
1695  partition_keys,
1696  order_keys,
1697  translate_collation(rex_window_function->getCollation()));
1698 }
1699 
1701  const RexFunctionOperator* rex_function) const {
1702  std::vector<std::shared_ptr<Analyzer::Expr>> args;
1703  for (size_t i = 0; i < rex_function->size(); ++i) {
1704  args.push_back(translateScalarRex(rex_function->getOperand(i)));
1705  }
1706  return args;
1707 }
1708 
1710  const std::shared_ptr<Analyzer::Expr> qual_expr) {
1711  CHECK(qual_expr);
1712  auto bin_oper = std::dynamic_pointer_cast<const Analyzer::BinOper>(qual_expr);
1713  if (!bin_oper) {
1714  const auto rewritten_qual_expr = rewrite_expr(qual_expr.get());
1715  return {{}, {rewritten_qual_expr ? rewritten_qual_expr : qual_expr}};
1716  }
1717 
1718  if (bin_oper->get_optype() == kAND) {
1719  const auto lhs_cf = qual_to_conjunctive_form(bin_oper->get_own_left_operand());
1720  const auto rhs_cf = qual_to_conjunctive_form(bin_oper->get_own_right_operand());
1721  auto simple_quals = lhs_cf.simple_quals;
1722  simple_quals.insert(
1723  simple_quals.end(), rhs_cf.simple_quals.begin(), rhs_cf.simple_quals.end());
1724  auto quals = lhs_cf.quals;
1725  quals.insert(quals.end(), rhs_cf.quals.begin(), rhs_cf.quals.end());
1726  return {simple_quals, quals};
1727  }
1728  int rte_idx{0};
1729  const auto simple_qual = bin_oper->normalize_simple_predicate(rte_idx);
1730  return simple_qual ? QualsConjunctiveForm{{simple_qual}, {}}
1731  : QualsConjunctiveForm{{}, {qual_expr}};
1732 }
1733 
1734 std::vector<std::shared_ptr<Analyzer::Expr>> qual_to_disjunctive_form(
1735  const std::shared_ptr<Analyzer::Expr>& qual_expr) {
1736  CHECK(qual_expr);
1737  const auto bin_oper = std::dynamic_pointer_cast<const Analyzer::BinOper>(qual_expr);
1738  if (!bin_oper) {
1739  const auto rewritten_qual_expr = rewrite_expr(qual_expr.get());
1740  return {rewritten_qual_expr ? rewritten_qual_expr : qual_expr};
1741  }
1742  if (bin_oper->get_optype() == kOR) {
1743  const auto lhs_df = qual_to_disjunctive_form(bin_oper->get_own_left_operand());
1744  const auto rhs_df = qual_to_disjunctive_form(bin_oper->get_own_right_operand());
1745  auto quals = lhs_df;
1746  quals.insert(quals.end(), rhs_df.begin(), rhs_df.end());
1747  return quals;
1748  }
1749  return {qual_expr};
1750 }
1751 
1752 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateHPTLiteral(
1753  const RexFunctionOperator* rex_function) const {
1754  /* since calcite uses Avatica package called DateTimeUtils to parse timestamp strings.
1755  Therefore any string having fractional seconds more 3 places after the decimal
1756  (milliseconds) will get truncated to 3 decimal places, therefore we lose precision
1757  (us|ns). Issue: [BE-2461] Here we are hijacking literal cast to Timestamp(6|9) from
1758  calcite and translating them to generate our own casts.
1759  */
1760  CHECK_EQ(size_t(1), rex_function->size());
1761  const auto operand = translateScalarRex(rex_function->getOperand(0));
1762  const auto& operand_ti = operand->get_type_info();
1763  const auto& target_ti = rex_function->getType();
1764  if (!operand_ti.is_string()) {
1765  throw std::runtime_error(
1766  "High precision timestamp cast argument must be a string. Input type is: " +
1767  operand_ti.get_type_name());
1768  } else if (!target_ti.is_high_precision_timestamp()) {
1769  throw std::runtime_error(
1770  "Cast target type should be high precision timestamp. Input type is: " +
1771  target_ti.get_type_name());
1772  } else if (target_ti.get_dimension() != 6 && target_ti.get_dimension() != 9) {
1773  throw std::runtime_error(
1774  "Cast target type should be TIMESTAMP(6|9). Input type is: TIMESTAMP(" +
1775  std::to_string(target_ti.get_dimension()) + ")");
1776  } else {
1777  return operand->add_cast(target_ti);
1778  }
1779 }
std::shared_ptr< Analyzer::Expr > translateWindowFunction(const RexWindowFunctionOperator *) const
int32_t getIdOfString(const std::string &str) const
Definition: sqldefs.h:69
SQLOps getOperator() const
SQLAgg
Definition: sqldefs.h:71
size_t size() const
#define CHECK_EQ(x, y)
Definition: Logger.h:205
const std::vector< SortField > & getCollation() const
auto func_resolve
const ConstRexScalarPtrVector & getPartitionKeys() const
std::shared_ptr< Analyzer::Expr > translateDatediff(const RexFunctionOperator *) const
static std::shared_ptr< Analyzer::Expr > get(const std::string &)
Definition: ParserNode.cpp:186
bool is_time() const
Definition: sqltypes.h:414
bool g_enable_watchdog
Definition: Execute.cpp:74
bool is_boolean() const
Definition: sqltypes.h:415
Definition: sqltypes.h:50
SqlWindowFunctionKind getKind() const
bool supported_lower_bound(const RexWindowFunctionOperator::RexWindowBound &window_bound)
SQLTypes
Definition: sqltypes.h:39
std::shared_ptr< Analyzer::Expr > getInIntegerSetExpr(std::shared_ptr< Analyzer::Expr > arg, const ResultSet &val_set) const
constexpr int64_t get_dateadd_timestamp_precision_scale(const DateaddField field)
Definition: DateTimeUtils.h:67
const RexWindowBound & getLowerBound() const
SQLQualifier
Definition: sqldefs.h:69
static std::shared_ptr< Analyzer::Expr > analyzeValue(const int64_t intval)
Definition: ParserNode.cpp:111
std::shared_ptr< Analyzer::Expr > translateCardinality(const RexFunctionOperator *) const
SQLTypeInfo get_nullable_logical_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:822
#define LOG(tag)
Definition: Logger.h:188
bool boolval
Definition: sqltypes.h:132
SQLOps
Definition: sqldefs.h:29
ExtensionFunction bind_function(std::string name, Analyzer::ExpressionPtrVector func_args, const std::vector< ExtensionFunction > &ext_funcs)
std::shared_ptr< Analyzer::Expr > translateKeyForString(const RexFunctionOperator *) const
const ConstRexScalarPtrVector & getOrderKeys() const
size_t getOperand(size_t idx) const
std::shared_ptr< Analyzer::Expr > translateAbs(const RexFunctionOperator *) const
size_t branchCount() const
Definition: sqldefs.h:38
std::shared_ptr< Analyzer::Expr > translateItem(const RexFunctionOperator *) const
#define CHECK_GE(x, y)
Definition: Logger.h:210
Definition: sqldefs.h:49
Definition: sqldefs.h:30
std::shared_ptr< Analyzer::Expr > translateSign(const RexFunctionOperator *) const
HOST DEVICE int get_size() const
Definition: sqltypes.h:267
std::shared_ptr< Analyzer::Expr > ExpressionPtr
Definition: Analyzer.h:181
const std::shared_ptr< Analyzer::Expr > generate() const
Definition: sqldefs.h:41
std::shared_ptr< Analyzer::Expr > translateOper(const RexOperator *) const
SQLTypeInfo get_agg_type(const SQLAgg agg_kind, const Analyzer::Expr *arg_expr)
std::shared_ptr< Analyzer::Expr > translateLength(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Constant > makeNumericConstant(const SQLTypeInfo &ti, const long val)
Analyzer::ExpressionPtr rewrite_expr(const Analyzer::Expr *expr)
std::shared_ptr< Analyzer::Expr > translateUnlikely(const RexFunctionOperator *) const
QualsConjunctiveForm qual_to_conjunctive_form(const std::shared_ptr< Analyzer::Expr > qual_expr)
#define CHECK_GT(x, y)
Definition: Logger.h:209
bool is_agg_supported_for_type(const SQLAgg &agg_kind, const SQLTypeInfo &arg_ti)
bool is_decimal() const
Definition: sqltypes.h:411
double inline_fp_null_val(const SQL_TYPE_INFO &ti)
Analyzer::ExpressionPtrVector translateFunctionArgs(const RexFunctionOperator *) const
HOST DEVICE int get_scale() const
Definition: sqltypes.h:262
std::shared_ptr< Analyzer::Expr > translateOverlapsOper(const RexOperator *) const
std::string to_string(char const *&&v)
bool g_enable_experimental_string_functions
std::shared_ptr< Analyzer::Expr > translateDatePlusMinus(const RexOperator *) const
static std::shared_ptr< Analyzer::Expr > normalize(const std::list< std::pair< std::shared_ptr< Analyzer::Expr >, std::shared_ptr< Analyzer::Expr >>> &, const std::shared_ptr< Analyzer::Expr >)
Definition: ParserNode.cpp:916
const RexScalar * getWhen(const size_t idx) const
std::shared_ptr< Analyzer::Expr > translateOffsetInFragment() const
Definition: sqldefs.h:73
const RexWindowBound & getUpperBound() const
ExtractField to_datepart_field(const std::string &field)
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:31
std::shared_ptr< Analyzer::Expr > translateLower(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Expr > translateDateadd(const RexFunctionOperator *) const
static std::shared_ptr< Analyzer::Expr > translateLiteral(const RexLiteral *)
bool window_function_is_value(const SqlWindowFunctionKind kind)
Definition: WindowContext.h:27
static constexpr int32_t INVALID_STR_ID
const RexScalar * getThen(const size_t idx) const
std::shared_ptr< Analyzer::Expr > translateScalarSubquery(const RexSubQuery *) const
const std::vector< ExtArgumentType > & getArgs() const
const RelAlgNode * getSourceNode() const
const std::pair< SQLOps, int64_t > get_dateadd_high_precision_adjusted_scale(const DateaddField field, int32_t dimen)
std::string getString(int32_t string_id) const
#define CHECK_NE(x, y)
Definition: Logger.h:206
std::shared_ptr< Analyzer::Expr > translateUoper(const RexOperator *) const
static std::shared_ptr< Analyzer::Expr > get(std::shared_ptr< Analyzer::Expr > arg_expr, std::shared_ptr< Analyzer::Expr > pattern_expr, std::shared_ptr< Analyzer::Expr > escape_expr, const bool is_not)
Definition: ParserNode.cpp:636
std::shared_ptr< Analyzer::Expr > translateScalarRex(const RexScalar *rex) const
void set_scale(int s)
Definition: sqltypes.h:352
int64_t bigintval
Definition: sqltypes.h:136
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:258
std::shared_ptr< const RexScalar > offset
std::vector< Analyzer::OrderEntry > translate_collation(const std::vector< SortField > &sort_fields)
Definition: sqldefs.h:37
Definition: sqldefs.h:75
Definition: sqldefs.h:69
int16_t smallintval
Definition: sqltypes.h:134
SQLTypeInfo build_type_info(const SQLTypes sql_type, const int scale, const int precision)
static std::shared_ptr< Analyzer::Expr > analyzeValue(const std::string &)
Definition: ParserNode.cpp:96
DatetruncField to_datediff_field(const std::string &field)
std::shared_ptr< Analyzer::Expr > translateFunction(const RexFunctionOperator *) const
void translate_string_ids(std::vector< int32_t > &dest_ids, const LeafHostInfo &dict_server_host, const DictRef dest_dict_ref, const std::vector< int32_t > &source_ids, const DictRef source_dict_ref, const int32_t dest_generation)
std::shared_ptr< Analyzer::Expr > translateArrayFunction(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Expr > translateLike(const RexFunctionOperator *) const
const RexScalar * getOperand(const size_t idx) const
#define UNLIKELY(x)
Definition: likely.h:20
static std::shared_ptr< Analyzer::Expr > normalize(const SQLOps optype, const SQLQualifier qual, std::shared_ptr< Analyzer::Expr > left_expr, std::shared_ptr< Analyzer::Expr > right_expr)
Definition: ParserNode.cpp:266
std::pair< std::shared_ptr< Analyzer::Expr >, SQLQualifier > get_quantified_rhs(const RexScalar *rex_scalar, const RelAlgTranslator &translator)
Definition: sqldefs.h:34
#define CHECK_LT(x, y)
Definition: Logger.h:207
Definition: sqltypes.h:53
Definition: sqltypes.h:54
Definition: sqldefs.h:40
Definition: sqldefs.h:69
const std::vector< TargetMetaInfo > & getOutputMetainfo() const
#define TRANSIENT_DICT_ID
Definition: sqltypes.h:196
std::shared_ptr< Analyzer::Expr > translateExtract(const RexFunctionOperator *) const
#define CHECK_LE(x, y)
Definition: Logger.h:208
unsigned getIndex() const
bool is_null(const T &v, const SQLTypeInfo &t)
static std::shared_ptr< Analyzer::Expr > get(std::shared_ptr< Analyzer::Expr > arg_expr, std::shared_ptr< Analyzer::Expr > like_expr, std::shared_ptr< Analyzer::Expr > escape_expr, const bool is_ilike, const bool is_not)
Definition: ParserNode.cpp:541
SQLAgg getKind() const
std::shared_ptr< Analyzer::Expr > translateLikely(const RexFunctionOperator *) const
virtual size_t size() const =0
std::shared_ptr< Analyzer::Expr > translateRegexp(const RexFunctionOperator *) const
Definition: sqltypes.h:42
bool supported_upper_bound(const RexWindowFunctionOperator *rex_window_function)
bool takes_arg(const TargetInfo &target_info)
static std::shared_ptr< Analyzer::Expr > analyzeValue(const int64_t numericval, const int scale, const int precision)
Definition: ParserNode.cpp:136
std::shared_ptr< Analyzer::Expr > translateHPTLiteral(const RexFunctionOperator *) const
Definition: sqldefs.h:53
std::shared_ptr< Analyzer::Expr > translateInOper(const RexOperator *) const
constexpr bool is_subsecond_dateadd_field(const DateaddField field)
Definition: DateTimeUtils.h:99
std::shared_ptr< Analyzer::Expr > get_in_values_expr(std::shared_ptr< Analyzer::Expr > arg, const ResultSet &val_set)
static std::shared_ptr< Analyzer::Expr > get(const int64_t)
Definition: ParserNode.cpp:172
bool can_use_parallel_algorithms(const ResultSet &rows)
Definition: ResultSet.cpp:981
void set_notnull(bool n)
Definition: sqltypes.h:354
#define CHECK(condition)
Definition: Logger.h:197
std::vector< std::shared_ptr< Analyzer::Expr > > qual_to_disjunctive_form(const std::shared_ptr< Analyzer::Expr > &qual_expr)
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:257
std::shared_ptr< Analyzer::Constant > make_fp_constant(const int64_t val, const SQLTypeInfo &ti)
std::pair< Datum, bool > datum_from_scalar_tv(const ScalarTargetValue *scalar_tv, const SQLTypeInfo &ti) noexcept
uint64_t exp_to_scale(const unsigned exp)
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
std::shared_ptr< Analyzer::Expr > translateCase(const RexCase *) const
std::vector< ExpressionPtr > ExpressionPtrVector
Definition: Analyzer.h:183
bool g_cluster
const SQLTypeInfo & getType() const
Definition: sqldefs.h:33
Definition: sqltypes.h:46
std::shared_ptr< Analyzer::Expr > translateCurrentUser(const RexFunctionOperator *) const
std::string get_datetimeplus_rewrite_funcname(const SQLOps &op)
void validate_datetime_datepart_argument(const std::shared_ptr< Analyzer::Constant > literal_expr)
const std::string & getName() const
Definition: sqldefs.h:74
int cpu_threads()
Definition: thread_count.h:25
void fill_dictionary_encoded_in_vals(std::vector< int64_t > &in_vals, std::atomic< size_t > &total_in_vals_count, const ResultSet *values_rowset, const std::pair< int64_t, int64_t > values_rowset_slice, const std::vector< LeafHostInfo > &leaf_hosts, const DictRef source_dict_ref, const DictRef dest_dict_ref, const int32_t dest_generation, const int64_t needle_null_val)
const std::list< std::shared_ptr< Analyzer::Expr > > simple_quals
std::shared_ptr< Analyzer::Expr > translateNow() const
Definition: sqldefs.h:72
std::shared_ptr< Analyzer::Expr > translateDatetime(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Expr > translateInput(const RexInput *) const
bool is_distinct(const size_t input_idx, const RelAlgNode *node)
Definition: sqldefs.h:39
std::shared_ptr< Analyzer::Expr > rewrite_to_date_trunc(const Analyzer::FunctionOper *dt_plus)
SQLTypeInfo ext_arg_type_to_type_info(const ExtArgumentType ext_arg_type)
std::shared_ptr< Analyzer::Expr > translateDatepart(const RexFunctionOperator *) const
size_t size() const
bool is_number() const
Definition: sqltypes.h:413
DateaddField to_dateadd_field(const std::string &field)
std::shared_ptr< Analyzer::Expr > fold_expr(const Analyzer::Expr *expr)
void set_precision(int d)
Definition: sqltypes.h:350
#define IS_COMPARISON(X)
Definition: sqldefs.h:57
double doubleval
Definition: sqltypes.h:138
void fill_integer_in_vals(std::vector< int64_t > &in_vals, std::atomic< size_t > &total_in_vals_count, const ResultSet *values_rowset, const std::pair< int64_t, int64_t > values_rowset_slice)
static std::shared_ptr< Analyzer::Expr > translateAggregateRex(const RexAgg *rex, const std::vector< std::shared_ptr< Analyzer::Expr >> &scalar_sources)
const std::shared_ptr< Analyzer::Expr > generate() const
boost::variant< int64_t, double, float, NullableString > ScalarTargetValue
Definition: TargetValue.h:156
bool isDistinct() const
const RexScalar * getElse() const