OmniSciDB  0bd2ec9cf4
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
RelAlgTranslator.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "RelAlgTranslator.h"
18 #include "Shared/SqlTypesLayout.h"
19 
21 #include "DateTimePlusRewrite.h"
22 #include "DateTimeTranslator.h"
24 #include "ExpressionRewrite.h"
27 #include "RelAlgDagBuilder.h"
28 #include "WindowContext.h"
29 
30 #include <future>
31 
32 #include "../Analyzer/Analyzer.h"
33 #include "../Parser/ParserNode.h"
34 #include "../Shared/likely.h"
35 #include "../Shared/sql_type_to_string.h"
36 #include "../Shared/thread_count.h"
37 
38 extern bool g_enable_watchdog;
39 
41 
42 namespace {
43 
45  const int scale,
46  const int precision) {
47  SQLTypeInfo ti(sql_type, 0, 0, true);
48  if (ti.is_decimal()) {
49  ti.set_scale(scale);
50  ti.set_precision(precision);
51  }
52  return ti;
53 }
54 
55 std::pair<std::shared_ptr<Analyzer::Expr>, SQLQualifier> get_quantified_rhs(
56  const RexScalar* rex_scalar,
57  const RelAlgTranslator& translator) {
58  std::shared_ptr<Analyzer::Expr> rhs;
59  SQLQualifier sql_qual{kONE};
60  const auto rex_operator = dynamic_cast<const RexOperator*>(rex_scalar);
61  if (!rex_operator) {
62  return std::make_pair(rhs, sql_qual);
63  }
64  const auto rex_function = dynamic_cast<const RexFunctionOperator*>(rex_operator);
65  const auto qual_str = rex_function ? rex_function->getName() : "";
66  if (qual_str == "PG_ANY"sv || qual_str == "PG_ALL"sv) {
67  CHECK_EQ(size_t(1), rex_function->size());
68  rhs = translator.translateScalarRex(rex_function->getOperand(0));
69  sql_qual = (qual_str == "PG_ANY"sv) ? kANY : kALL;
70  }
71  if (!rhs && rex_operator->getOperator() == kCAST) {
72  CHECK_EQ(size_t(1), rex_operator->size());
73  std::tie(rhs, sql_qual) = get_quantified_rhs(rex_operator->getOperand(0), translator);
74  }
75  return std::make_pair(rhs, sql_qual);
76 }
77 
78 std::pair<Datum, bool> datum_from_scalar_tv(const ScalarTargetValue* scalar_tv,
79  const SQLTypeInfo& ti) noexcept {
80  Datum d{0};
81  bool is_null_const{false};
82  switch (ti.get_type()) {
83  case kTINYINT: {
84  const auto ival = boost::get<int64_t>(scalar_tv);
85  CHECK(ival);
86  if (*ival == inline_int_null_val(ti)) {
87  is_null_const = true;
88  } else {
89  d.tinyintval = *ival;
90  }
91  break;
92  }
93  case kSMALLINT: {
94  const auto ival = boost::get<int64_t>(scalar_tv);
95  CHECK(ival);
96  if (*ival == inline_int_null_val(ti)) {
97  is_null_const = true;
98  } else {
99  d.smallintval = *ival;
100  }
101  break;
102  }
103  case kINT: {
104  const auto ival = boost::get<int64_t>(scalar_tv);
105  CHECK(ival);
106  if (*ival == inline_int_null_val(ti)) {
107  is_null_const = true;
108  } else {
109  d.intval = *ival;
110  }
111  break;
112  }
113  case kDECIMAL:
114  case kNUMERIC:
115  case kBIGINT:
116  case kDATE:
117  case kTIME:
118  case kTIMESTAMP: {
119  const auto ival = boost::get<int64_t>(scalar_tv);
120  CHECK(ival);
121  if (*ival == inline_int_null_val(ti)) {
122  is_null_const = true;
123  } else {
124  d.bigintval = *ival;
125  }
126  break;
127  }
128  case kDOUBLE: {
129  const auto dval = boost::get<double>(scalar_tv);
130  CHECK(dval);
131  if (*dval == inline_fp_null_val(ti)) {
132  is_null_const = true;
133  } else {
134  d.doubleval = *dval;
135  }
136  break;
137  }
138  case kFLOAT: {
139  const auto fval = boost::get<float>(scalar_tv);
140  CHECK(fval);
141  if (*fval == inline_fp_null_val(ti)) {
142  is_null_const = true;
143  } else {
144  d.floatval = *fval;
145  }
146  break;
147  }
148  case kTEXT:
149  case kVARCHAR:
150  case kCHAR: {
151  auto nullable_sptr = boost::get<NullableString>(scalar_tv);
152  CHECK(nullable_sptr);
153  if (boost::get<void*>(nullable_sptr)) {
154  is_null_const = true;
155  } else {
156  auto sptr = boost::get<std::string>(nullable_sptr);
157  d.stringval = new std::string(*sptr);
158  }
159  break;
160  }
161  default:
162  CHECK(false);
163  }
164  return {d, is_null_const};
165 }
166 
167 } // namespace
168 
169 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateScalarRex(
170  const RexScalar* rex) const {
171  const auto rex_input = dynamic_cast<const RexInput*>(rex);
172  if (rex_input) {
173  return translateInput(rex_input);
174  }
175  const auto rex_literal = dynamic_cast<const RexLiteral*>(rex);
176  if (rex_literal) {
177  return translateLiteral(rex_literal);
178  }
179  const auto rex_window_function = dynamic_cast<const RexWindowFunctionOperator*>(rex);
180  if (rex_window_function) {
181  return translateWindowFunction(rex_window_function);
182  }
183  const auto rex_function = dynamic_cast<const RexFunctionOperator*>(rex);
184  if (rex_function) {
185  return translateFunction(rex_function);
186  }
187  const auto rex_operator = dynamic_cast<const RexOperator*>(rex);
188  if (rex_operator) {
189  return translateOper(rex_operator);
190  }
191  const auto rex_case = dynamic_cast<const RexCase*>(rex);
192  if (rex_case) {
193  return translateCase(rex_case);
194  }
195  const auto rex_subquery = dynamic_cast<const RexSubQuery*>(rex);
196  if (rex_subquery) {
197  return translateScalarSubquery(rex_subquery);
198  }
199  CHECK(false);
200  return nullptr;
201 }
202 
203 namespace {
204 
205 bool is_agg_supported_for_type(const SQLAgg& agg_kind, const SQLTypeInfo& arg_ti) {
206  if ((agg_kind == kMIN || agg_kind == kMAX || agg_kind == kSUM || agg_kind == kAVG) &&
207  !(arg_ti.is_number() || arg_ti.is_boolean() || arg_ti.is_time())) {
208  return false;
209  }
210 
211  return true;
212 }
213 
214 } // namespace
215 
216 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateAggregateRex(
217  const RexAgg* rex,
218  const std::vector<std::shared_ptr<Analyzer::Expr>>& scalar_sources) {
219  const auto agg_kind = rex->getKind();
220  const bool is_distinct = rex->isDistinct();
221  const bool takes_arg{rex->size() > 0};
222  std::shared_ptr<Analyzer::Expr> arg_expr;
223  std::shared_ptr<Analyzer::Constant> err_rate;
224  if (takes_arg) {
225  const auto operand = rex->getOperand(0);
226  CHECK_LT(operand, scalar_sources.size());
227  CHECK_LE(rex->size(), 2u);
228  arg_expr = scalar_sources[operand];
229  if (agg_kind == kAPPROX_COUNT_DISTINCT && rex->size() == 2) {
230  err_rate = std::dynamic_pointer_cast<Analyzer::Constant>(
231  scalar_sources[rex->getOperand(1)]);
232  if (!err_rate || err_rate->get_type_info().get_type() != kINT ||
233  err_rate->get_constval().intval < 1 || err_rate->get_constval().intval > 100) {
234  throw std::runtime_error(
235  "APPROX_COUNT_DISTINCT's second parameter should be SMALLINT literal between "
236  "1 and 100");
237  }
238  }
239  const auto& arg_ti = arg_expr->get_type_info();
240  if (!is_agg_supported_for_type(agg_kind, arg_ti)) {
241  throw std::runtime_error("Aggregate on " + arg_ti.get_type_name() +
242  " is not supported yet.");
243  }
244  }
245  const auto agg_ti = get_agg_type(agg_kind, arg_expr.get());
246  return makeExpr<Analyzer::AggExpr>(agg_ti, agg_kind, arg_expr, is_distinct, err_rate);
247 }
248 
249 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateLiteral(
250  const RexLiteral* rex_literal) {
251  auto lit_ti = build_type_info(
252  rex_literal->getType(), rex_literal->getScale(), rex_literal->getPrecision());
253  auto target_ti = build_type_info(rex_literal->getTargetType(),
254  rex_literal->getTypeScale(),
255  rex_literal->getTypePrecision());
256  switch (rex_literal->getType()) {
257  case kDECIMAL: {
258  const auto val = rex_literal->getVal<int64_t>();
259  const int precision = rex_literal->getPrecision();
260  const int scale = rex_literal->getScale();
261  if (target_ti.is_fp() && !scale) {
262  return make_fp_constant(val, target_ti);
263  }
264  auto lit_expr = scale ? Parser::FixedPtLiteral::analyzeValue(val, scale, precision)
266  return lit_ti != target_ti ? lit_expr->add_cast(target_ti) : lit_expr;
267  }
268  case kTEXT: {
269  return Parser::StringLiteral::analyzeValue(rex_literal->getVal<std::string>());
270  }
271  case kBOOLEAN: {
272  Datum d;
273  d.boolval = rex_literal->getVal<bool>();
274  return makeExpr<Analyzer::Constant>(kBOOLEAN, false, d);
275  }
276  case kDOUBLE: {
277  Datum d;
278  d.doubleval = rex_literal->getVal<double>();
279  auto lit_expr = makeExpr<Analyzer::Constant>(kDOUBLE, false, d);
280  return lit_ti != target_ti ? lit_expr->add_cast(target_ti) : lit_expr;
281  }
282  case kINTERVAL_DAY_TIME:
283  case kINTERVAL_YEAR_MONTH: {
284  Datum d;
285  d.bigintval = rex_literal->getVal<int64_t>();
286  return makeExpr<Analyzer::Constant>(rex_literal->getType(), false, d);
287  }
288  case kTIME:
289  case kTIMESTAMP: {
290  Datum d;
291  d.bigintval =
292  rex_literal->getType() == kTIMESTAMP && rex_literal->getPrecision() > 0
293  ? rex_literal->getVal<int64_t>()
294  : rex_literal->getVal<int64_t>() / 1000;
295  return makeExpr<Analyzer::Constant>(
296  SQLTypeInfo(rex_literal->getType(), rex_literal->getPrecision(), 0, false),
297  false,
298  d);
299  }
300  case kDATE: {
301  Datum d;
302  d.bigintval = rex_literal->getVal<int64_t>() * 24 * 3600;
303  return makeExpr<Analyzer::Constant>(rex_literal->getType(), false, d);
304  }
305  case kNULLT: {
306  if (target_ti.is_array()) {
308  // defaulting to valid sub-type for convenience
309  target_ti.set_subtype(kBOOLEAN);
310  return makeExpr<Analyzer::ArrayExpr>(target_ti, args, -1, true);
311  }
312  return makeExpr<Analyzer::Constant>(rex_literal->getTargetType(), true, Datum{0});
313  }
314  default: {
315  LOG(FATAL) << "Unexpected literal type " << lit_ti.get_type_name();
316  }
317  }
318  return nullptr;
319 }
320 
321 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateScalarSubquery(
322  const RexSubQuery* rex_subquery) const {
323  if (just_explain_) {
324  throw std::runtime_error("EXPLAIN is not supported with sub-queries");
325  }
326  CHECK(rex_subquery);
327  auto result = rex_subquery->getExecutionResult();
328  auto row_set = result->getRows();
329  if (row_set->rowCount() > size_t(1)) {
330  throw std::runtime_error("Scalar sub-query returned multiple rows");
331  }
332  if (row_set->rowCount() < size_t(1)) {
333  CHECK_EQ(row_set->rowCount(), size_t(0));
334  throw std::runtime_error("Scalar sub-query returned no results");
335  }
336  auto first_row = row_set->getNextRow(false, false);
337  auto scalar_tv = boost::get<ScalarTargetValue>(&first_row[0]);
338  auto ti = rex_subquery->getType();
339  if (ti.is_string()) {
340  throw std::runtime_error("Scalar sub-queries which return strings not supported");
341  }
342  Datum d{0};
343  bool is_null_const{false};
344  std::tie(d, is_null_const) = datum_from_scalar_tv(scalar_tv, ti);
345  return makeExpr<Analyzer::Constant>(ti, is_null_const, d);
346 }
347 
348 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateInput(
349  const RexInput* rex_input) const {
350  const auto source = rex_input->getSourceNode();
351  const auto it_rte_idx = input_to_nest_level_.find(source);
352  CHECK(it_rte_idx != input_to_nest_level_.end());
353  const int rte_idx = it_rte_idx->second;
354  const auto scan_source = dynamic_cast<const RelScan*>(source);
355  const auto& in_metainfo = source->getOutputMetainfo();
356  if (scan_source) {
357  // We're at leaf (scan) level and not supposed to have input metadata,
358  // the name and type information come directly from the catalog.
359  CHECK(in_metainfo.empty());
360  const auto table_desc = scan_source->getTableDescriptor();
361  const auto cd =
362  cat_.getMetadataForColumnBySpi(table_desc->tableId, rex_input->getIndex() + 1);
363  CHECK(cd);
364  auto col_ti = cd->columnType;
365  if (col_ti.is_string()) {
366  col_ti.set_type(kTEXT);
367  }
368  if (cd->isVirtualCol) {
369  // TODO(alex): remove at some point, we only need this fixup for backwards
370  // compatibility with old imported data
371  CHECK_EQ("rowid", cd->columnName);
372  col_ti.set_size(8);
373  }
374  CHECK_LE(static_cast<size_t>(rte_idx), join_types_.size());
375  if (rte_idx > 0 && join_types_[rte_idx - 1] == JoinType::LEFT) {
376  col_ti.set_notnull(false);
377  }
378  return std::make_shared<Analyzer::ColumnVar>(
379  col_ti, table_desc->tableId, cd->columnId, rte_idx);
380  }
381  CHECK(!in_metainfo.empty());
382  CHECK_GE(rte_idx, 0);
383  const size_t col_id = rex_input->getIndex();
384  CHECK_LT(col_id, in_metainfo.size());
385  auto col_ti = in_metainfo[col_id].get_type_info();
386  CHECK_LE(static_cast<size_t>(rte_idx), join_types_.size());
387  if (rte_idx > 0 && join_types_[rte_idx - 1] == JoinType::LEFT) {
388  col_ti.set_notnull(false);
389  }
390  return std::make_shared<Analyzer::ColumnVar>(col_ti, -source->getId(), col_id, rte_idx);
391 }
392 
393 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateUoper(
394  const RexOperator* rex_operator) const {
395  CHECK_EQ(size_t(1), rex_operator->size());
396  const auto operand_expr = translateScalarRex(rex_operator->getOperand(0));
397  const auto sql_op = rex_operator->getOperator();
398  switch (sql_op) {
399  case kCAST: {
400  const auto& target_ti = rex_operator->getType();
401  CHECK_NE(kNULLT, target_ti.get_type());
402  const auto& operand_ti = operand_expr->get_type_info();
403  if (operand_ti.is_string() && target_ti.is_string()) {
404  return operand_expr;
405  }
406  if (target_ti.is_time() ||
407  operand_ti
408  .is_string()) { // TODO(alex): check and unify with the rest of the cases
409  // Do not propogate encoding on small dates
410  return target_ti.is_date_in_days()
411  ? operand_expr->add_cast(SQLTypeInfo(kDATE, false))
412  : operand_expr->add_cast(target_ti);
413  }
414  if (!operand_ti.is_string() && target_ti.is_string()) {
415  return operand_expr->add_cast(target_ti);
416  }
417 
418  return std::make_shared<Analyzer::UOper>(target_ti, false, sql_op, operand_expr);
419  }
420  case kNOT:
421  case kISNULL: {
422  return std::make_shared<Analyzer::UOper>(kBOOLEAN, sql_op, operand_expr);
423  }
424  case kISNOTNULL: {
425  auto is_null = std::make_shared<Analyzer::UOper>(kBOOLEAN, kISNULL, operand_expr);
426  return std::make_shared<Analyzer::UOper>(kBOOLEAN, kNOT, is_null);
427  }
428  case kMINUS: {
429  const auto& ti = operand_expr->get_type_info();
430  return std::make_shared<Analyzer::UOper>(ti, false, kUMINUS, operand_expr);
431  }
432  case kUNNEST: {
433  const auto& ti = operand_expr->get_type_info();
434  CHECK(ti.is_array());
435  return makeExpr<Analyzer::UOper>(ti.get_elem_type(), false, kUNNEST, operand_expr);
436  }
437  default:
438  CHECK(false);
439  }
440  return nullptr;
441 }
442 
443 namespace {
444 
445 std::shared_ptr<Analyzer::Expr> get_in_values_expr(std::shared_ptr<Analyzer::Expr> arg,
446  const ResultSet& val_set) {
447  if (!can_use_parallel_algorithms(val_set)) {
448  return nullptr;
449  }
450  if (val_set.rowCount() > 5000000 && g_enable_watchdog) {
451  throw std::runtime_error(
452  "Unable to handle 'expr IN (subquery)', subquery returned 5M+ rows.");
453  }
454  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
455  const size_t fetcher_count = cpu_threads();
456  std::vector<std::list<std::shared_ptr<Analyzer::Expr>>> expr_set(
457  fetcher_count, std::list<std::shared_ptr<Analyzer::Expr>>());
458  std::vector<std::future<void>> fetcher_threads;
459  const auto& ti = arg->get_type_info();
460  const auto entry_count = val_set.entryCount();
461  for (size_t i = 0,
462  start_entry = 0,
463  stride = (entry_count + fetcher_count - 1) / fetcher_count;
464  i < fetcher_count && start_entry < entry_count;
465  ++i, start_entry += stride) {
466  const auto end_entry = std::min(start_entry + stride, entry_count);
467  fetcher_threads.push_back(std::async(
468  std::launch::async,
469  [&](std::list<std::shared_ptr<Analyzer::Expr>>& in_vals,
470  const size_t start,
471  const size_t end) {
472  for (auto index = start; index < end; ++index) {
473  auto row = val_set.getRowAt(index);
474  if (row.empty()) {
475  continue;
476  }
477  auto scalar_tv = boost::get<ScalarTargetValue>(&row[0]);
478  Datum d{0};
479  bool is_null_const{false};
480  std::tie(d, is_null_const) = datum_from_scalar_tv(scalar_tv, ti);
481  if (ti.is_string() && ti.get_compression() != kENCODING_NONE) {
482  auto ti_none_encoded = ti;
483  ti_none_encoded.set_compression(kENCODING_NONE);
484  auto none_encoded_string =
485  makeExpr<Analyzer::Constant>(ti, is_null_const, d);
486  auto dict_encoded_string = std::make_shared<Analyzer::UOper>(
487  ti, false, kCAST, none_encoded_string);
488  in_vals.push_back(dict_encoded_string);
489  } else {
490  in_vals.push_back(makeExpr<Analyzer::Constant>(ti, is_null_const, d));
491  }
492  }
493  },
494  std::ref(expr_set[i]),
495  start_entry,
496  end_entry));
497  }
498  for (auto& child : fetcher_threads) {
499  child.get();
500  }
501 
502  val_set.moveToBegin();
503  for (auto& exprs : expr_set) {
504  value_exprs.splice(value_exprs.end(), exprs);
505  }
506  return makeExpr<Analyzer::InValues>(arg, value_exprs);
507 }
508 
509 } // namespace
510 
511 // Creates an Analyzer expression for an IN subquery which subsequently goes through the
512 // regular Executor::codegen() mechanism. The creation of the expression out of subquery's
513 // result set is parallelized whenever possible. In addition, take advantage of additional
514 // information that elements in the right hand side are constants; see
515 // getInIntegerSetExpr().
516 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateInOper(
517  const RexOperator* rex_operator) const {
518  if (just_explain_) {
519  throw std::runtime_error("EXPLAIN is not supported with sub-queries");
520  }
521  CHECK(rex_operator->size() == 2);
522  const auto lhs = translateScalarRex(rex_operator->getOperand(0));
523  const auto rhs = rex_operator->getOperand(1);
524  const auto rex_subquery = dynamic_cast<const RexSubQuery*>(rhs);
525  CHECK(rex_subquery);
526  auto ti = lhs->get_type_info();
527  auto result = rex_subquery->getExecutionResult();
528  auto& row_set = result->getRows();
529  CHECK_EQ(size_t(1), row_set->colCount());
530  const auto& rhs_ti = row_set->getColType(0);
531  if (rhs_ti.get_type() != ti.get_type()) {
532  throw std::runtime_error(
533  "The two sides of the IN operator must have the same type; found " +
534  ti.get_type_name() + " and " + rhs_ti.get_type_name());
535  }
536  row_set->moveToBegin();
537  if (row_set->entryCount() > 10000) {
538  std::shared_ptr<Analyzer::Expr> expr;
539  if ((ti.is_integer() || (ti.is_string() && ti.get_compression() == kENCODING_DICT)) &&
540  !row_set->getQueryMemDesc().didOutputColumnar()) {
541  expr = getInIntegerSetExpr(lhs, *row_set);
542  // Handle the highly unlikely case when the InIntegerSet ended up being tiny.
543  // Just let it fall through the usual InValues path at the end of this method,
544  // its codegen knows to use inline comparisons for few values.
545  if (expr && std::static_pointer_cast<Analyzer::InIntegerSet>(expr)
546  ->get_value_list()
547  .size() <= 100) {
548  expr = nullptr;
549  }
550  } else {
551  expr = get_in_values_expr(lhs, *row_set);
552  }
553  if (expr) {
554  return expr;
555  }
556  }
557  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
558  while (true) {
559  auto row = row_set->getNextRow(true, false);
560  if (row.empty()) {
561  break;
562  }
563  if (g_enable_watchdog && value_exprs.size() >= 10000) {
564  throw std::runtime_error(
565  "Unable to handle 'expr IN (subquery)', subquery returned 10000+ rows.");
566  }
567  auto scalar_tv = boost::get<ScalarTargetValue>(&row[0]);
568  Datum d{0};
569  bool is_null_const{false};
570  std::tie(d, is_null_const) = datum_from_scalar_tv(scalar_tv, ti);
571  if (ti.is_string() && ti.get_compression() != kENCODING_NONE) {
572  auto ti_none_encoded = ti;
573  ti_none_encoded.set_compression(kENCODING_NONE);
574  auto none_encoded_string = makeExpr<Analyzer::Constant>(ti, is_null_const, d);
575  auto dict_encoded_string =
576  std::make_shared<Analyzer::UOper>(ti, false, kCAST, none_encoded_string);
577  value_exprs.push_back(dict_encoded_string);
578  } else {
579  value_exprs.push_back(makeExpr<Analyzer::Constant>(ti, is_null_const, d));
580  }
581  }
582  return makeExpr<Analyzer::InValues>(lhs, value_exprs);
583 }
584 
585 namespace {
586 
587 const size_t g_max_integer_set_size{1 << 25};
588 
590  std::vector<int64_t>& in_vals,
591  std::atomic<size_t>& total_in_vals_count,
592  const ResultSet* values_rowset,
593  const std::pair<int64_t, int64_t> values_rowset_slice,
594  const StringDictionaryProxy* source_dict,
595  const StringDictionaryProxy* dest_dict,
596  const int64_t needle_null_val) {
597  CHECK(in_vals.empty());
598  bool dicts_are_equal = source_dict == dest_dict;
599  for (auto index = values_rowset_slice.first; index < values_rowset_slice.second;
600  ++index) {
601  const auto row = values_rowset->getOneColRow(index);
602  if (UNLIKELY(!row.valid)) {
603  continue;
604  }
605  if (dicts_are_equal) {
606  in_vals.push_back(row.value);
607  } else {
608  const int string_id =
609  row.value == needle_null_val
610  ? needle_null_val
611  : dest_dict->getIdOfString(source_dict->getString(row.value));
612  if (string_id != StringDictionary::INVALID_STR_ID) {
613  in_vals.push_back(string_id);
614  }
615  }
616  if (UNLIKELY(g_enable_watchdog && (in_vals.size() & 1023) == 0 &&
617  total_in_vals_count.fetch_add(1024) >= g_max_integer_set_size)) {
618  throw std::runtime_error(
619  "Unable to handle 'expr IN (subquery)', subquery returned 30M+ rows.");
620  }
621  }
622 }
623 
624 void fill_integer_in_vals(std::vector<int64_t>& in_vals,
625  std::atomic<size_t>& total_in_vals_count,
626  const ResultSet* values_rowset,
627  const std::pair<int64_t, int64_t> values_rowset_slice) {
628  CHECK(in_vals.empty());
629  for (auto index = values_rowset_slice.first; index < values_rowset_slice.second;
630  ++index) {
631  const auto row = values_rowset->getOneColRow(index);
632  if (row.valid) {
633  in_vals.push_back(row.value);
634  if (UNLIKELY(g_enable_watchdog && (in_vals.size() & 1023) == 0 &&
635  total_in_vals_count.fetch_add(1024) >= g_max_integer_set_size)) {
636  throw std::runtime_error(
637  "Unable to handle 'expr IN (subquery)', subquery returned 30M+ rows.");
638  }
639  }
640  }
641 }
642 
643 // Multi-node counterpart of the other version. Saves round-trips, which is crucial
644 // for a big right-hand side result. It only handles physical string dictionary ids,
645 // therefore it won't be able to handle a right-hand side sub-query with a CASE
646 // returning literals on some branches. That case isn't hard too handle either, but
647 // it's not clear it's actually important in practice.
648 // RelAlgTranslator::getInIntegerSetExpr makes sure, by checking the encodings, that this
649 // function isn't called in such cases.
651  std::vector<int64_t>& in_vals,
652  std::atomic<size_t>& total_in_vals_count,
653  const ResultSet* values_rowset,
654  const std::pair<int64_t, int64_t> values_rowset_slice,
655  const std::vector<LeafHostInfo>& leaf_hosts,
656  const DictRef source_dict_ref,
657  const DictRef dest_dict_ref,
658  const int32_t dest_generation,
659  const int64_t needle_null_val) {
660  CHECK(in_vals.empty());
661  std::vector<int32_t> source_ids;
662  source_ids.reserve(values_rowset->entryCount());
663  bool has_nulls = false;
664  if (source_dict_ref == dest_dict_ref) {
665  in_vals.reserve(values_rowset_slice.second - values_rowset_slice.first +
666  1); // Add 1 to cover interval
667  for (auto index = values_rowset_slice.first; index < values_rowset_slice.second;
668  ++index) {
669  const auto row = values_rowset->getOneColRow(index);
670  if (!row.valid) {
671  continue;
672  }
673  if (row.value != needle_null_val) {
674  in_vals.push_back(row.value);
675  if (UNLIKELY(g_enable_watchdog && (in_vals.size() & 1023) == 0 &&
676  total_in_vals_count.fetch_add(1024) >= g_max_integer_set_size)) {
677  throw std::runtime_error(
678  "Unable to handle 'expr IN (subquery)', subquery returned 30M+ rows.");
679  }
680  } else {
681  has_nulls = true;
682  }
683  }
684  if (has_nulls) {
685  in_vals.push_back(
686  needle_null_val); // we've deduped null values as an optimization, although
687  // this is not required by consumer
688  }
689  return;
690  }
691  // Code path below is for when dictionaries are not shared
692  for (auto index = values_rowset_slice.first; index < values_rowset_slice.second;
693  ++index) {
694  const auto row = values_rowset->getOneColRow(index);
695  if (row.valid) {
696  if (row.value != needle_null_val) {
697  source_ids.push_back(row.value);
698  } else {
699  has_nulls = true;
700  }
701  }
702  }
703  std::vector<int32_t> dest_ids;
704  translate_string_ids(dest_ids,
705  leaf_hosts.front(),
706  dest_dict_ref,
707  source_ids,
708  source_dict_ref,
709  dest_generation);
710  CHECK_EQ(dest_ids.size(), source_ids.size());
711  in_vals.reserve(dest_ids.size() + (has_nulls ? 1 : 0));
712  if (has_nulls) {
713  in_vals.push_back(needle_null_val);
714  }
715  for (const int32_t dest_id : dest_ids) {
716  if (dest_id != StringDictionary::INVALID_STR_ID) {
717  in_vals.push_back(dest_id);
718  if (UNLIKELY(g_enable_watchdog && (in_vals.size() & 1023) == 0 &&
719  total_in_vals_count.fetch_add(1024) >= g_max_integer_set_size)) {
720  throw std::runtime_error(
721  "Unable to handle 'expr IN (subquery)', subquery returned 30M+ rows.");
722  }
723  }
724  }
725 }
726 
727 } // namespace
728 
729 // The typical IN subquery involves either dictionary-encoded strings or integers.
730 // Analyzer::InValues is a very heavy representation of the right hand side of such
731 // a query since we already know the right hand would be a list of Analyzer::Constant
732 // shared pointers. We can avoid the big overhead of each Analyzer::Constant and the
733 // refcounting associated with shared pointers by creating an abbreviated InIntegerSet
734 // representation of the IN expression which takes advantage of the this information.
735 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::getInIntegerSetExpr(
736  std::shared_ptr<Analyzer::Expr> arg,
737  const ResultSet& val_set) const {
738  if (!can_use_parallel_algorithms(val_set)) {
739  return nullptr;
740  }
741  std::vector<int64_t> value_exprs;
742  const size_t fetcher_count = cpu_threads();
743  std::vector<std::vector<int64_t>> expr_set(fetcher_count);
744  std::vector<std::future<void>> fetcher_threads;
745  const auto& arg_type = arg->get_type_info();
746  const auto entry_count = val_set.entryCount();
747  CHECK_EQ(size_t(1), val_set.colCount());
748  const auto& col_type = val_set.getColType(0);
749  if (g_cluster && arg_type.is_string() &&
750  (col_type.get_comp_param() <= 0 || arg_type.get_comp_param() <= 0)) {
751  // Skip this case for now, see comment for fill_dictionary_encoded_in_vals.
752  return nullptr;
753  }
754  std::atomic<size_t> total_in_vals_count{0};
755  for (size_t i = 0,
756  start_entry = 0,
757  stride = (entry_count + fetcher_count - 1) / fetcher_count;
758  i < fetcher_count && start_entry < entry_count;
759  ++i, start_entry += stride) {
760  expr_set[i].reserve(entry_count / fetcher_count);
761  const auto end_entry = std::min(start_entry + stride, entry_count);
762  if (arg_type.is_string()) {
763  CHECK_EQ(kENCODING_DICT, arg_type.get_compression());
764  // const int32_t dest_dict_id = arg_type.get_comp_param();
765  // const int32_t source_dict_id = col_type.get_comp_param();
766  const DictRef dest_dict_ref(arg_type.get_comp_param(), cat_.getDatabaseId());
767  const DictRef source_dict_ref(col_type.get_comp_param(), cat_.getDatabaseId());
768  const auto dd = executor_->getStringDictionaryProxy(
769  arg_type.get_comp_param(), val_set.getRowSetMemOwner(), true);
770  const auto sd = executor_->getStringDictionaryProxy(
771  col_type.get_comp_param(), val_set.getRowSetMemOwner(), true);
772  CHECK(sd);
773  const auto needle_null_val = inline_int_null_val(arg_type);
774  fetcher_threads.push_back(std::async(
775  std::launch::async,
776  [this,
777  &val_set,
778  &total_in_vals_count,
779  sd,
780  dd,
781  source_dict_ref,
782  dest_dict_ref,
783  needle_null_val](
784  std::vector<int64_t>& in_vals, const size_t start, const size_t end) {
785  if (g_cluster) {
786  CHECK_GE(dd->getGeneration(), 0);
788  total_in_vals_count,
789  &val_set,
790  {start, end},
792  source_dict_ref,
793  dest_dict_ref,
794  dd->getGeneration(),
795  needle_null_val);
796  } else {
798  total_in_vals_count,
799  &val_set,
800  {start, end},
801  sd,
802  dd,
803  needle_null_val);
804  }
805  },
806  std::ref(expr_set[i]),
807  start_entry,
808  end_entry));
809  } else {
810  CHECK(arg_type.is_integer());
811  fetcher_threads.push_back(std::async(
812  std::launch::async,
813  [&val_set, &total_in_vals_count](
814  std::vector<int64_t>& in_vals, const size_t start, const size_t end) {
815  fill_integer_in_vals(in_vals, total_in_vals_count, &val_set, {start, end});
816  },
817  std::ref(expr_set[i]),
818  start_entry,
819  end_entry));
820  }
821  }
822  for (auto& child : fetcher_threads) {
823  child.get();
824  }
825 
826  val_set.moveToBegin();
827  value_exprs.reserve(entry_count);
828  for (auto& exprs : expr_set) {
829  value_exprs.insert(value_exprs.end(), exprs.begin(), exprs.end());
830  }
831  return makeExpr<Analyzer::InIntegerSet>(
832  arg, value_exprs, arg_type.get_notnull() && col_type.get_notnull());
833 }
834 
835 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateOper(
836  const RexOperator* rex_operator) const {
837  CHECK_GT(rex_operator->size(), size_t(0));
838  if (rex_operator->size() == 1) {
839  return translateUoper(rex_operator);
840  }
841  const auto sql_op = rex_operator->getOperator();
842  if (sql_op == kIN) {
843  return translateInOper(rex_operator);
844  }
845  if (sql_op == kMINUS || sql_op == kPLUS) {
846  auto date_plus_minus = translateDatePlusMinus(rex_operator);
847  if (date_plus_minus) {
848  return date_plus_minus;
849  }
850  }
851  if (sql_op == kOVERLAPS) {
852  return translateOverlapsOper(rex_operator);
853  } else if (IS_COMPARISON(sql_op)) {
854  auto geo_comp = translateGeoComparison(rex_operator);
855  if (geo_comp) {
856  return geo_comp;
857  }
858  }
859  auto lhs = translateScalarRex(rex_operator->getOperand(0));
860  for (size_t i = 1; i < rex_operator->size(); ++i) {
861  std::shared_ptr<Analyzer::Expr> rhs;
862  SQLQualifier sql_qual{kONE};
863  const auto rhs_op = rex_operator->getOperand(i);
864  std::tie(rhs, sql_qual) = get_quantified_rhs(rhs_op, *this);
865  if (!rhs) {
866  rhs = translateScalarRex(rhs_op);
867  }
868  CHECK(rhs);
869  lhs = Parser::OperExpr::normalize(sql_op, sql_qual, lhs, rhs);
870  }
871  return lhs;
872 }
873 
874 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateOverlapsOper(
875  const RexOperator* rex_operator) const {
876  const auto sql_op = rex_operator->getOperator();
877  CHECK(sql_op == kOVERLAPS);
878 
879  const auto lhs = translateScalarRex(rex_operator->getOperand(0));
880  const auto lhs_ti = lhs->get_type_info();
881  if (lhs_ti.is_geometry()) {
882  return translateGeoOverlapsOper(rex_operator);
883  } else {
884  throw std::runtime_error(
885  "Overlaps equivalence is currently only supported for geospatial types");
886  }
887 }
888 
889 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateCase(
890  const RexCase* rex_case) const {
891  std::shared_ptr<Analyzer::Expr> else_expr;
892  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
893  expr_list;
894  for (size_t i = 0; i < rex_case->branchCount(); ++i) {
895  const auto when_expr = translateScalarRex(rex_case->getWhen(i));
896  const auto then_expr = translateScalarRex(rex_case->getThen(i));
897  expr_list.emplace_back(when_expr, then_expr);
898  }
899  if (rex_case->getElse()) {
900  else_expr = translateScalarRex(rex_case->getElse());
901  }
902  return Parser::CaseExpr::normalize(expr_list, else_expr);
903 }
904 
905 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateLike(
906  const RexFunctionOperator* rex_function) const {
907  CHECK(rex_function->size() == 2 || rex_function->size() == 3);
908  const auto arg = translateScalarRex(rex_function->getOperand(0));
909  const auto like = translateScalarRex(rex_function->getOperand(1));
910  if (!std::dynamic_pointer_cast<const Analyzer::Constant>(like)) {
911  throw std::runtime_error("The matching pattern must be a literal.");
912  }
913  const auto escape = (rex_function->size() == 3)
914  ? translateScalarRex(rex_function->getOperand(2))
915  : nullptr;
916  const bool is_ilike = rex_function->getName() == "PG_ILIKE"sv;
917  return Parser::LikeExpr::get(arg, like, escape, is_ilike, false);
918 }
919 
920 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateRegexp(
921  const RexFunctionOperator* rex_function) const {
922  CHECK(rex_function->size() == 2 || rex_function->size() == 3);
923  const auto arg = translateScalarRex(rex_function->getOperand(0));
924  const auto pattern = translateScalarRex(rex_function->getOperand(1));
925  if (!std::dynamic_pointer_cast<const Analyzer::Constant>(pattern)) {
926  throw std::runtime_error("The matching pattern must be a literal.");
927  }
928  const auto escape = (rex_function->size() == 3)
929  ? translateScalarRex(rex_function->getOperand(2))
930  : nullptr;
931  return Parser::RegexpExpr::get(arg, pattern, escape, false);
932 }
933 
934 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateLikely(
935  const RexFunctionOperator* rex_function) const {
936  CHECK(rex_function->size() == 1);
937  const auto arg = translateScalarRex(rex_function->getOperand(0));
938  return makeExpr<Analyzer::LikelihoodExpr>(arg, 0.9375);
939 }
940 
941 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateUnlikely(
942  const RexFunctionOperator* rex_function) const {
943  CHECK(rex_function->size() == 1);
944  const auto arg = translateScalarRex(rex_function->getOperand(0));
945  return makeExpr<Analyzer::LikelihoodExpr>(arg, 0.0625);
946 }
947 
948 namespace {
949 
951  const std::shared_ptr<Analyzer::Constant> literal_expr) {
952  if (!literal_expr || literal_expr->get_is_null()) {
953  throw std::runtime_error("The 'DatePart' argument must be a not 'null' literal.");
954  }
955 }
956 
957 } // namespace
958 
959 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateExtract(
960  const RexFunctionOperator* rex_function) const {
961  CHECK_EQ(size_t(2), rex_function->size());
962  const auto timeunit = translateScalarRex(rex_function->getOperand(0));
963  const auto timeunit_lit = std::dynamic_pointer_cast<Analyzer::Constant>(timeunit);
965  const auto from_expr = translateScalarRex(rex_function->getOperand(1));
966  const bool is_date_trunc = rex_function->getName() == "PG_DATE_TRUNC"sv;
967  if (is_date_trunc) {
968  return DateTruncExpr::generate(from_expr, *timeunit_lit->get_constval().stringval);
969  } else {
970  return ExtractExpr::generate(from_expr, *timeunit_lit->get_constval().stringval);
971  }
972 }
973 
974 namespace {
975 
976 std::shared_ptr<Analyzer::Constant> makeNumericConstant(const SQLTypeInfo& ti,
977  const long val) {
978  CHECK(ti.is_number());
979  Datum datum{0};
980  switch (ti.get_type()) {
981  case kTINYINT: {
982  datum.tinyintval = val;
983  break;
984  }
985  case kSMALLINT: {
986  datum.smallintval = val;
987  break;
988  }
989  case kINT: {
990  datum.intval = val;
991  break;
992  }
993  case kBIGINT: {
994  datum.bigintval = val;
995  break;
996  }
997  case kDECIMAL:
998  case kNUMERIC: {
999  datum.bigintval = val * exp_to_scale(ti.get_scale());
1000  break;
1001  }
1002  case kFLOAT: {
1003  datum.floatval = val;
1004  break;
1005  }
1006  case kDOUBLE: {
1007  datum.doubleval = val;
1008  break;
1009  }
1010  default:
1011  CHECK(false);
1012  }
1013  return makeExpr<Analyzer::Constant>(ti, false, datum);
1014 }
1015 
1016 } // namespace
1017 
1018 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateDateadd(
1019  const RexFunctionOperator* rex_function) const {
1020  CHECK_EQ(size_t(3), rex_function->size());
1021  const auto timeunit = translateScalarRex(rex_function->getOperand(0));
1022  const auto timeunit_lit = std::dynamic_pointer_cast<Analyzer::Constant>(timeunit);
1024  const auto number_units = translateScalarRex(rex_function->getOperand(1));
1025  const auto number_units_const =
1026  std::dynamic_pointer_cast<Analyzer::Constant>(number_units);
1027  if (number_units_const && number_units_const->get_is_null()) {
1028  throw std::runtime_error("The 'Interval' argument literal must not be 'null'.");
1029  }
1030  auto cast_number_units = number_units->add_cast(SQLTypeInfo(kBIGINT, false));
1031  const auto datetime = translateScalarRex(rex_function->getOperand(2));
1032  const auto& datetime_ti = datetime->get_type_info();
1033  if (datetime_ti.get_type() == kTIME) {
1034  throw std::runtime_error("DateAdd operation not supported for TIME.");
1035  }
1036  const auto& field = to_dateadd_field(*timeunit_lit->get_constval().stringval);
1037  if (!datetime_ti.is_high_precision_timestamp() &&
1039  // Scale the number to get value in seconds
1040  const auto bigint_ti = SQLTypeInfo(kBIGINT, false);
1041  cast_number_units = makeExpr<Analyzer::BinOper>(
1042  bigint_ti.get_type(),
1043  kDIVIDE,
1044  kONE,
1045  cast_number_units,
1046  makeNumericConstant(bigint_ti,
1048  cast_number_units = fold_expr(cast_number_units.get());
1049  }
1050  if (datetime_ti.is_high_precision_timestamp() &&
1053  field, datetime_ti.get_dimension());
1054  if (oper_scale.first) {
1055  // scale number to desired precision
1056  const auto bigint_ti = SQLTypeInfo(kBIGINT, false);
1057  cast_number_units =
1058  makeExpr<Analyzer::BinOper>(bigint_ti.get_type(),
1059  oper_scale.first,
1060  kONE,
1061  cast_number_units,
1062  makeNumericConstant(bigint_ti, oper_scale.second));
1063  cast_number_units = fold_expr(cast_number_units.get());
1064  }
1065  }
1066  return makeExpr<Analyzer::DateaddExpr>(
1067  SQLTypeInfo(kTIMESTAMP, datetime_ti.get_dimension(), 0, false),
1068  to_dateadd_field(*timeunit_lit->get_constval().stringval),
1069  cast_number_units,
1070  datetime);
1071 }
1072 
1073 namespace {
1074 
1076  CHECK(op == kPLUS);
1077  return "DATETIME_PLUS"s;
1078 }
1079 
1080 } // namespace
1081 
1082 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateDatePlusMinus(
1083  const RexOperator* rex_operator) const {
1084  if (rex_operator->size() != 2) {
1085  return nullptr;
1086  }
1087  const auto datetime = translateScalarRex(rex_operator->getOperand(0));
1088  const auto datetime_ti = datetime->get_type_info();
1089  if (!datetime_ti.is_timestamp() && !datetime_ti.is_date()) {
1090  if (datetime_ti.get_type() == kTIME) {
1091  throw std::runtime_error("DateTime addition/subtraction not supported for TIME.");
1092  }
1093  return nullptr;
1094  }
1095  const auto rhs = translateScalarRex(rex_operator->getOperand(1));
1096  const auto rhs_ti = rhs->get_type_info();
1097  if (rhs_ti.get_type() == kTIMESTAMP || rhs_ti.get_type() == kDATE) {
1098  if (datetime_ti.is_high_precision_timestamp() ||
1099  rhs_ti.is_high_precision_timestamp()) {
1100  throw std::runtime_error(
1101  "High Precision timestamps are not supported for TIMESTAMPDIFF operation. Use "
1102  "DATEDIFF.");
1103  }
1104  auto bigint_ti = SQLTypeInfo(kBIGINT, false);
1105  const auto& rex_operator_ti = rex_operator->getType();
1106  const auto datediff_field =
1107  (rex_operator_ti.get_type() == kINTERVAL_DAY_TIME) ? dtSECOND : dtMONTH;
1108  auto result =
1109  makeExpr<Analyzer::DatediffExpr>(bigint_ti, datediff_field, rhs, datetime);
1110  // multiply 1000 to result since expected result should be in millisecond precision.
1111  if (rex_operator_ti.get_type() == kINTERVAL_DAY_TIME) {
1112  return makeExpr<Analyzer::BinOper>(bigint_ti.get_type(),
1113  kMULTIPLY,
1114  kONE,
1115  result,
1116  makeNumericConstant(bigint_ti, 1000));
1117  } else {
1118  return result;
1119  }
1120  }
1121  const auto op = rex_operator->getOperator();
1122  if (op == kPLUS) {
1123  std::vector<std::shared_ptr<Analyzer::Expr>> args = {datetime, rhs};
1124  auto dt_plus = makeExpr<Analyzer::FunctionOper>(
1125  datetime_ti, get_datetimeplus_rewrite_funcname(op), args);
1126  const auto date_trunc = rewrite_to_date_trunc(dt_plus.get());
1127  if (date_trunc) {
1128  return date_trunc;
1129  }
1130  }
1131  const auto interval = fold_expr(rhs.get());
1132  auto interval_ti = interval->get_type_info();
1133  auto bigint_ti = SQLTypeInfo(kBIGINT, false);
1134  const auto interval_lit = std::dynamic_pointer_cast<Analyzer::Constant>(interval);
1135  if (interval_ti.get_type() == kINTERVAL_DAY_TIME) {
1136  std::shared_ptr<Analyzer::Expr> interval_sec;
1137  if (interval_lit) {
1138  interval_sec =
1139  makeNumericConstant(bigint_ti,
1140  (op == kMINUS ? -interval_lit->get_constval().bigintval
1141  : interval_lit->get_constval().bigintval) /
1142  1000);
1143  } else {
1144  interval_sec = makeExpr<Analyzer::BinOper>(bigint_ti.get_type(),
1145  kDIVIDE,
1146  kONE,
1147  interval,
1148  makeNumericConstant(bigint_ti, 1000));
1149  if (op == kMINUS) {
1150  interval_sec =
1151  std::make_shared<Analyzer::UOper>(bigint_ti, false, kUMINUS, interval_sec);
1152  }
1153  }
1154  return makeExpr<Analyzer::DateaddExpr>(datetime_ti, daSECOND, interval_sec, datetime);
1155  }
1156  CHECK(interval_ti.get_type() == kINTERVAL_YEAR_MONTH);
1157  const auto interval_months = op == kMINUS ? std::make_shared<Analyzer::UOper>(
1158  bigint_ti, false, kUMINUS, interval)
1159  : interval;
1160  return makeExpr<Analyzer::DateaddExpr>(datetime_ti, daMONTH, interval_months, datetime);
1161 }
1162 
1163 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateDatediff(
1164  const RexFunctionOperator* rex_function) const {
1165  CHECK_EQ(size_t(3), rex_function->size());
1166  const auto timeunit = translateScalarRex(rex_function->getOperand(0));
1167  const auto timeunit_lit = std::dynamic_pointer_cast<Analyzer::Constant>(timeunit);
1169  const auto start = translateScalarRex(rex_function->getOperand(1));
1170  const auto end = translateScalarRex(rex_function->getOperand(2));
1171  const auto field = to_datediff_field(*timeunit_lit->get_constval().stringval);
1172  return makeExpr<Analyzer::DatediffExpr>(SQLTypeInfo(kBIGINT, false), field, start, end);
1173 }
1174 
1175 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateDatepart(
1176  const RexFunctionOperator* rex_function) const {
1177  CHECK_EQ(size_t(2), rex_function->size());
1178  const auto timeunit = translateScalarRex(rex_function->getOperand(0));
1179  const auto timeunit_lit = std::dynamic_pointer_cast<Analyzer::Constant>(timeunit);
1181  const auto from_expr = translateScalarRex(rex_function->getOperand(1));
1182  return ExtractExpr::generate(
1183  from_expr, to_datepart_field(*timeunit_lit->get_constval().stringval));
1184 }
1185 
1186 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateLength(
1187  const RexFunctionOperator* rex_function) const {
1188  CHECK_EQ(size_t(1), rex_function->size());
1189  const auto str_arg = translateScalarRex(rex_function->getOperand(0));
1190  return makeExpr<Analyzer::CharLengthExpr>(str_arg->decompress(),
1191  rex_function->getName() == "CHAR_LENGTH"sv);
1192 }
1193 
1194 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateKeyForString(
1195  const RexFunctionOperator* rex_function) const {
1196  const auto& args = translateFunctionArgs(rex_function);
1197  CHECK_EQ(size_t(1), args.size());
1198  const auto expr = dynamic_cast<Analyzer::Expr*>(args[0].get());
1199  if (nullptr == expr || !expr->get_type_info().is_string() ||
1200  expr->get_type_info().is_varlen()) {
1201  throw std::runtime_error(rex_function->getName() +
1202  " expects a dictionary encoded text column.");
1203  }
1204  return makeExpr<Analyzer::KeyForStringExpr>(args[0]);
1205 }
1206 
1207 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateLower(
1208  const RexFunctionOperator* rex_function) const {
1209  const auto& args = translateFunctionArgs(rex_function);
1210  CHECK_EQ(size_t(1), args.size());
1211  CHECK(args[0]);
1212 
1213  if (args[0]->get_type_info().is_dict_encoded_string() ||
1214  dynamic_cast<Analyzer::Constant*>(args[0].get())) {
1215  return makeExpr<Analyzer::LowerExpr>(args[0]);
1216  }
1217 
1218  throw std::runtime_error(rex_function->getName() +
1219  " expects a dictionary encoded text column or a literal.");
1220 }
1221 
1223  const RexFunctionOperator* rex_function) const {
1224  const auto ret_ti = rex_function->getType();
1225  const auto arg = translateScalarRex(rex_function->getOperand(0));
1226  const auto arg_ti = arg->get_type_info();
1227  if (!arg_ti.is_array()) {
1228  throw std::runtime_error(rex_function->getName() + " expects an array expression.");
1229  }
1230  if (arg_ti.get_subtype() == kARRAY) {
1231  throw std::runtime_error(rex_function->getName() +
1232  " expects one-dimension array expression.");
1233  }
1234  const auto array_size = arg_ti.get_size();
1235  const auto array_elem_size = arg_ti.get_elem_type().get_array_context_logical_size();
1236 
1237  if (array_size > 0) {
1238  if (array_elem_size <= 0) {
1239  throw std::runtime_error(rex_function->getName() +
1240  ": unexpected array element type.");
1241  }
1242  // Return cardinality of a fixed length array
1243  return makeNumericConstant(ret_ti, array_size / array_elem_size);
1244  }
1245  // Variable length array cardinality will be calculated at runtime
1246  return makeExpr<Analyzer::CardinalityExpr>(arg);
1247 }
1248 
1249 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateItem(
1250  const RexFunctionOperator* rex_function) const {
1251  CHECK_EQ(size_t(2), rex_function->size());
1252  const auto base = translateScalarRex(rex_function->getOperand(0));
1253  const auto index = translateScalarRex(rex_function->getOperand(1));
1254  return makeExpr<Analyzer::BinOper>(
1255  base->get_type_info().get_elem_type(), false, kARRAY_AT, kONE, base, index);
1256 }
1257 
1258 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateNow() const {
1260 }
1261 
1262 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateDatetime(
1263  const RexFunctionOperator* rex_function) const {
1264  CHECK_EQ(size_t(1), rex_function->size());
1265  const auto arg = translateScalarRex(rex_function->getOperand(0));
1266  const auto arg_lit = std::dynamic_pointer_cast<Analyzer::Constant>(arg);
1267  const std::string datetime_err{R"(Only DATETIME('NOW') supported for now.)"};
1268  if (!arg_lit || arg_lit->get_is_null()) {
1269  throw std::runtime_error(datetime_err);
1270  }
1271  CHECK(arg_lit->get_type_info().is_string());
1272  if (*arg_lit->get_constval().stringval != "NOW"sv) {
1273  throw std::runtime_error(datetime_err);
1274  }
1275  return translateNow();
1276 }
1277 
1278 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateAbs(
1279  const RexFunctionOperator* rex_function) const {
1280  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
1281  expr_list;
1282  CHECK_EQ(size_t(1), rex_function->size());
1283  const auto operand = translateScalarRex(rex_function->getOperand(0));
1284  const auto& operand_ti = operand->get_type_info();
1285  CHECK(operand_ti.is_number());
1286  const auto zero = makeNumericConstant(operand_ti, 0);
1287  const auto lt_zero = makeExpr<Analyzer::BinOper>(kBOOLEAN, kLT, kONE, operand, zero);
1288  const auto uminus_operand =
1289  makeExpr<Analyzer::UOper>(operand_ti.get_type(), kUMINUS, operand);
1290  expr_list.emplace_back(lt_zero, uminus_operand);
1291  return makeExpr<Analyzer::CaseExpr>(operand_ti, false, expr_list, operand);
1292 }
1293 
1294 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateSign(
1295  const RexFunctionOperator* rex_function) const {
1296  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
1297  expr_list;
1298  CHECK_EQ(size_t(1), rex_function->size());
1299  const auto operand = translateScalarRex(rex_function->getOperand(0));
1300  const auto& operand_ti = operand->get_type_info();
1301  CHECK(operand_ti.is_number());
1302  const auto zero = makeNumericConstant(operand_ti, 0);
1303  const auto lt_zero = makeExpr<Analyzer::BinOper>(kBOOLEAN, kLT, kONE, operand, zero);
1304  expr_list.emplace_back(lt_zero, makeNumericConstant(operand_ti, -1));
1305  const auto eq_zero = makeExpr<Analyzer::BinOper>(kBOOLEAN, kEQ, kONE, operand, zero);
1306  expr_list.emplace_back(eq_zero, makeNumericConstant(operand_ti, 0));
1307  const auto gt_zero = makeExpr<Analyzer::BinOper>(kBOOLEAN, kGT, kONE, operand, zero);
1308  expr_list.emplace_back(gt_zero, makeNumericConstant(operand_ti, 1));
1309  return makeExpr<Analyzer::CaseExpr>(
1310  operand_ti,
1311  false,
1312  expr_list,
1313  makeExpr<Analyzer::Constant>(operand_ti, true, Datum{0}));
1314 }
1315 
1316 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateOffsetInFragment() const {
1317  return makeExpr<Analyzer::OffsetInFragment>();
1318 }
1319 
1321  const RexFunctionOperator* rex_function) const {
1322  if (rex_function->getType().get_subtype() == kNULLT) {
1323  auto sql_type = rex_function->getType();
1324  CHECK(sql_type.get_type() == kARRAY);
1325 
1326  // FIX-ME: Deal with NULL arrays
1327  auto translated_function_args(translateFunctionArgs(rex_function));
1328  if (translated_function_args.size() > 0) {
1329  auto const& first_element_logical_type(
1330  get_nullable_logical_type_info(translated_function_args[0]->get_type_info()));
1331 
1332  on_member_of_typeset<kCHAR, kVARCHAR, kTEXT>(
1333  first_element_logical_type,
1334  [&] {
1335  bool same_type_status = true;
1336  for (auto const& expr_ptr : translated_function_args) {
1337  same_type_status =
1338  same_type_status && (expr_ptr->get_type_info().is_string());
1339  }
1340 
1341  if (same_type_status == false) {
1342  throw std::runtime_error(
1343  "All elements of the array are not of the same logical subtype; "
1344  "consider casting to force this condition.");
1345  }
1346 
1347  sql_type.set_subtype(first_element_logical_type.get_type());
1348  sql_type.set_compression(kENCODING_FIXED);
1349  sql_type.set_comp_param(TRANSIENT_DICT_ID);
1350  },
1351  [&] {
1352  // Non string types
1353  bool same_type_status = true;
1354  for (auto const& expr_ptr : translated_function_args) {
1355  same_type_status =
1356  same_type_status &&
1357  (first_element_logical_type ==
1358  get_nullable_logical_type_info(expr_ptr->get_type_info()));
1359  }
1360 
1361  if (same_type_status == false) {
1362  throw std::runtime_error(
1363  "All elements of the array are not of the same logical subtype; "
1364  "consider casting to force this condition.");
1365  }
1366  sql_type.set_subtype(first_element_logical_type.get_type());
1367  sql_type.set_scale(first_element_logical_type.get_scale());
1368  sql_type.set_precision(first_element_logical_type.get_precision());
1369  });
1370 
1371  feature_stash_.setCPUOnlyExecutionRequired();
1372  return makeExpr<Analyzer::ArrayExpr>(
1373  sql_type, translated_function_args, feature_stash_.getAndBumpArrayExprCount());
1374  } else {
1375  // defaulting to valid sub-type for convenience
1376  sql_type.set_subtype(kBOOLEAN);
1377  return makeExpr<Analyzer::ArrayExpr>(
1378  sql_type, translated_function_args, feature_stash_.getAndBumpArrayExprCount());
1379  }
1380  } else {
1381  feature_stash_.setCPUOnlyExecutionRequired();
1382  return makeExpr<Analyzer::ArrayExpr>(rex_function->getType(),
1383  translateFunctionArgs(rex_function),
1384  feature_stash_.getAndBumpArrayExprCount());
1385  }
1386 }
1387 
1388 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateFunction(
1389  const RexFunctionOperator* rex_function) const {
1390  if (func_resolve(rex_function->getName(), "LIKE"sv, "PG_ILIKE"sv)) {
1391  return translateLike(rex_function);
1392  }
1393  if (rex_function->getName() == "REGEXP_LIKE"sv) {
1394  return translateRegexp(rex_function);
1395  }
1396  if (rex_function->getName() == "LIKELY"sv) {
1397  return translateLikely(rex_function);
1398  }
1399  if (rex_function->getName() == "UNLIKELY"sv) {
1400  return translateUnlikely(rex_function);
1401  }
1402  if (func_resolve(rex_function->getName(), "PG_EXTRACT"sv, "PG_DATE_TRUNC"sv)) {
1403  return translateExtract(rex_function);
1404  }
1405  if (rex_function->getName() == "DATEADD"sv) {
1406  return translateDateadd(rex_function);
1407  }
1408  if (rex_function->getName() == "DATEDIFF"sv) {
1409  return translateDatediff(rex_function);
1410  }
1411  if (rex_function->getName() == "DATEPART"sv) {
1412  return translateDatepart(rex_function);
1413  }
1414  if (func_resolve(rex_function->getName(), "LENGTH"sv, "CHAR_LENGTH"sv)) {
1415  return translateLength(rex_function);
1416  }
1417  if (rex_function->getName() == "KEY_FOR_STRING"sv) {
1418  return translateKeyForString(rex_function);
1419  }
1420  if (g_enable_experimental_string_functions && rex_function->getName() == "LOWER"sv) {
1421  return translateLower(rex_function);
1422  }
1423  if (func_resolve(rex_function->getName(), "CARDINALITY"sv, "ARRAY_LENGTH"sv)) {
1424  return translateCardinality(rex_function);
1425  }
1426  if (rex_function->getName() == "ITEM"sv) {
1427  return translateItem(rex_function);
1428  }
1429  if (rex_function->getName() == "NOW"sv) {
1430  return translateNow();
1431  }
1432  if (rex_function->getName() == "DATETIME"sv) {
1433  return translateDatetime(rex_function);
1434  }
1435  if (func_resolve(rex_function->getName(), "usTIMESTAMP"sv, "nsTIMESTAMP"sv)) {
1436  return translateHPTLiteral(rex_function);
1437  }
1438  if (rex_function->getName() == "ABS"sv) {
1439  return translateAbs(rex_function);
1440  }
1441  if (rex_function->getName() == "SIGN"sv) {
1442  return translateSign(rex_function);
1443  }
1444  if (func_resolve(rex_function->getName(), "CEIL"sv, "FLOOR"sv)) {
1445  return makeExpr<Analyzer::FunctionOperWithCustomTypeHandling>(
1446  rex_function->getType(),
1447  rex_function->getName(),
1448  translateFunctionArgs(rex_function));
1449  } else if (rex_function->getName() == "ROUND"sv) {
1450  std::vector<std::shared_ptr<Analyzer::Expr>> args =
1451  translateFunctionArgs(rex_function);
1452 
1453  if (rex_function->size() == 1) {
1454  // push a 0 constant if 2nd operand is missing.
1455  // this needs to be done as calcite returns
1456  // only the 1st operand without defaulting the 2nd one
1457  // when the user did not specify the 2nd operand.
1458  SQLTypes t = kSMALLINT;
1459  Datum d;
1460  d.smallintval = 0;
1461  args.push_back(makeExpr<Analyzer::Constant>(t, false, d));
1462  }
1463 
1464  // make sure we have only 2 operands
1465  CHECK(args.size() == 2);
1466 
1467  if (!args[0]->get_type_info().is_number()) {
1468  throw std::runtime_error("Only numeric 1st operands are supported");
1469  }
1470 
1471  // the 2nd operand does not need to be a constant
1472  // it can happily reference another integer column
1473  if (!args[1]->get_type_info().is_integer()) {
1474  throw std::runtime_error("Only integer 2nd operands are supported");
1475  }
1476 
1477  // Calcite may upcast decimals in a way that is
1478  // incompatible with the extension function input. Play it safe and stick with the
1479  // argument type instead.
1480  const SQLTypeInfo ret_ti = args[0]->get_type_info().is_decimal()
1481  ? args[0]->get_type_info()
1482  : rex_function->getType();
1483 
1484  return makeExpr<Analyzer::FunctionOperWithCustomTypeHandling>(
1485  ret_ti, rex_function->getName(), args);
1486  }
1487  if (rex_function->getName() == "DATETIME_PLUS"sv) {
1488  auto dt_plus = makeExpr<Analyzer::FunctionOper>(rex_function->getType(),
1489  rex_function->getName(),
1490  translateFunctionArgs(rex_function));
1491  const auto date_trunc = rewrite_to_date_trunc(dt_plus.get());
1492  if (date_trunc) {
1493  return date_trunc;
1494  }
1495  return translateDateadd(rex_function);
1496  }
1497  if (rex_function->getName() == "/INT"sv) {
1498  CHECK_EQ(size_t(2), rex_function->size());
1499  std::shared_ptr<Analyzer::Expr> lhs = translateScalarRex(rex_function->getOperand(0));
1500  std::shared_ptr<Analyzer::Expr> rhs = translateScalarRex(rex_function->getOperand(1));
1501  const auto rhs_lit = std::dynamic_pointer_cast<Analyzer::Constant>(rhs);
1502  return Parser::OperExpr::normalize(kDIVIDE, kONE, lhs, rhs);
1503  }
1504  if (rex_function->getName() == "Reinterpret"sv) {
1505  CHECK_EQ(size_t(1), rex_function->size());
1506  return translateScalarRex(rex_function->getOperand(0));
1507  }
1508  if (func_resolve(rex_function->getName(),
1509  "ST_X"sv,
1510  "ST_Y"sv,
1511  "ST_XMin"sv,
1512  "ST_YMin"sv,
1513  "ST_XMax"sv,
1514  "ST_YMax"sv,
1515  "ST_NRings"sv,
1516  "ST_NPoints"sv,
1517  "ST_Length"sv,
1518  "ST_Perimeter"sv,
1519  "ST_Area"sv,
1520  "ST_SRID"sv,
1521  "MapD_GeoPolyBoundsPtr"sv /* deprecated */,
1522  "MapD_GeoPolyBoundsPtr"sv /* deprecated */,
1523  "OmniSci_Geo_PolyBoundsPtr"sv,
1524  "OmniSci_Geo_PolyRenderGroup"sv)) {
1525  CHECK_EQ(rex_function->size(), size_t(1));
1526  return translateUnaryGeoFunction(rex_function);
1527  }
1528  if (func_resolve(rex_function->getName(),
1529  "convert_meters_to_pixel_width"sv,
1530  "convert_meters_to_pixel_height"sv,
1531  "is_point_in_view"sv,
1532  "is_point_size_in_view"sv)) {
1533  return translateFunctionWithGeoArg(rex_function);
1534  }
1535  if (func_resolve(rex_function->getName(),
1536  "ST_Distance"sv,
1537  "ST_MaxDistance"sv,
1538  "ST_Intersects"sv,
1539  "ST_Disjoint"sv,
1540  "ST_Contains"sv,
1541  "ST_Within"sv)) {
1542  CHECK_EQ(rex_function->size(), size_t(2));
1543  return translateBinaryGeoFunction(rex_function);
1544  }
1545  if (func_resolve(rex_function->getName(), "ST_DWithin"sv, "ST_DFullyWithin"sv)) {
1546  CHECK_EQ(rex_function->size(), size_t(3));
1547  return translateTernaryGeoFunction(rex_function);
1548  }
1549  if (rex_function->getName() == "OFFSET_IN_FRAGMENT"sv) {
1550  CHECK_EQ(size_t(0), rex_function->size());
1551  return translateOffsetInFragment();
1552  }
1553  if (rex_function->getName() == "ARRAY"sv) {
1554  // Var args; currently no check. Possible fix-me -- can array have 0 elements?
1555  return translateArrayFunction(rex_function);
1556  }
1557  if (func_resolve(rex_function->getName(),
1558  "ST_GeomFromText"sv,
1559  "ST_GeogFromText"sv,
1560  "ST_Point"sv,
1561  "ST_SetSRID"sv)) {
1562  return translateGeoConstructor(rex_function);
1563  }
1564 
1565  auto arg_expr_list = translateFunctionArgs(rex_function);
1566  // Reset possibly wrong return type of rex_function to the return
1567  // type of the optimal valid implementation. The return type can be
1568  // wrong in the case of multiple implementations of UDF functions
1569  // that have different return types but Calcite specifies the return
1570  // type according to the first implementation.
1571  auto ext_func_sig = bind_function(rex_function->getName(), arg_expr_list);
1572  auto ret_ti = ext_arg_type_to_type_info(ext_func_sig.getRet());
1573  // By defualt, the extension function type will not allow nulls. If one of the arguments
1574  // is nullable, the extension function must also explicitly allow nulls.
1575  bool arguments_not_null = true;
1576  for (const auto& arg_expr : arg_expr_list) {
1577  if (!arg_expr->get_type_info().get_notnull()) {
1578  arguments_not_null = false;
1579  break;
1580  }
1581  }
1582  ret_ti.set_notnull(arguments_not_null);
1583  return makeExpr<Analyzer::FunctionOper>(ret_ti, rex_function->getName(), arg_expr_list);
1584 }
1585 
1586 namespace {
1587 
1588 std::vector<Analyzer::OrderEntry> translate_collation(
1589  const std::vector<SortField>& sort_fields) {
1590  std::vector<Analyzer::OrderEntry> collation;
1591  for (size_t i = 0; i < sort_fields.size(); ++i) {
1592  const auto& sort_field = sort_fields[i];
1593  collation.emplace_back(i,
1594  sort_field.getSortDir() == SortDirection::Descending,
1595  sort_field.getNullsPosition() == NullSortedPosition::First);
1596  }
1597  return collation;
1598 }
1599 
1601  const RexWindowFunctionOperator::RexWindowBound& window_bound) {
1602  return window_bound.unbounded && window_bound.preceding && !window_bound.following &&
1603  !window_bound.is_current_row && !window_bound.offset &&
1604  window_bound.order_key == 0;
1605 }
1606 
1607 bool supported_upper_bound(const RexWindowFunctionOperator* rex_window_function) {
1608  const auto& window_bound = rex_window_function->getUpperBound();
1609  const bool to_current_row = !window_bound.unbounded && !window_bound.preceding &&
1610  !window_bound.following && window_bound.is_current_row &&
1611  !window_bound.offset && window_bound.order_key == 1;
1612  switch (rex_window_function->getKind()) {
1617  return to_current_row;
1618  }
1619  default: {
1620  return rex_window_function->getOrderKeys().empty()
1621  ? (window_bound.unbounded && !window_bound.preceding &&
1622  window_bound.following && !window_bound.is_current_row &&
1623  !window_bound.offset && window_bound.order_key == 2)
1624  : to_current_row;
1625  }
1626  }
1627 }
1628 
1629 } // namespace
1630 
1631 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateWindowFunction(
1632  const RexWindowFunctionOperator* rex_window_function) const {
1633  if (!supported_lower_bound(rex_window_function->getLowerBound()) ||
1634  !supported_upper_bound(rex_window_function) ||
1635  ((rex_window_function->getKind() == SqlWindowFunctionKind::ROW_NUMBER) !=
1636  rex_window_function->isRows())) {
1637  throw std::runtime_error("Frame specification not supported");
1638  }
1639  std::vector<std::shared_ptr<Analyzer::Expr>> args;
1640  for (size_t i = 0; i < rex_window_function->size(); ++i) {
1641  args.push_back(translateScalarRex(rex_window_function->getOperand(i)));
1642  }
1643  std::vector<std::shared_ptr<Analyzer::Expr>> partition_keys;
1644  for (const auto& partition_key : rex_window_function->getPartitionKeys()) {
1645  partition_keys.push_back(translateScalarRex(partition_key.get()));
1646  }
1647  std::vector<std::shared_ptr<Analyzer::Expr>> order_keys;
1648  for (const auto& order_key : rex_window_function->getOrderKeys()) {
1649  order_keys.push_back(translateScalarRex(order_key.get()));
1650  }
1651  auto ti = rex_window_function->getType();
1652  if (window_function_is_value(rex_window_function->getKind())) {
1653  CHECK_GE(args.size(), 1u);
1654  ti = args.front()->get_type_info();
1655  }
1656  return makeExpr<Analyzer::WindowFunction>(
1657  ti,
1658  rex_window_function->getKind(),
1659  args,
1660  partition_keys,
1661  order_keys,
1662  translate_collation(rex_window_function->getCollation()));
1663 }
1664 
1666  const RexFunctionOperator* rex_function) const {
1667  std::vector<std::shared_ptr<Analyzer::Expr>> args;
1668  for (size_t i = 0; i < rex_function->size(); ++i) {
1669  args.push_back(translateScalarRex(rex_function->getOperand(i)));
1670  }
1671  return args;
1672 }
1673 
1675  const std::shared_ptr<Analyzer::Expr> qual_expr) {
1676  CHECK(qual_expr);
1677  auto bin_oper = std::dynamic_pointer_cast<const Analyzer::BinOper>(qual_expr);
1678  if (!bin_oper) {
1679  const auto rewritten_qual_expr = rewrite_expr(qual_expr.get());
1680  return {{}, {rewritten_qual_expr ? rewritten_qual_expr : qual_expr}};
1681  }
1682 
1683  if (bin_oper->get_optype() == kAND) {
1684  const auto lhs_cf = qual_to_conjunctive_form(bin_oper->get_own_left_operand());
1685  const auto rhs_cf = qual_to_conjunctive_form(bin_oper->get_own_right_operand());
1686  auto simple_quals = lhs_cf.simple_quals;
1687  simple_quals.insert(
1688  simple_quals.end(), rhs_cf.simple_quals.begin(), rhs_cf.simple_quals.end());
1689  auto quals = lhs_cf.quals;
1690  quals.insert(quals.end(), rhs_cf.quals.begin(), rhs_cf.quals.end());
1691  return {simple_quals, quals};
1692  }
1693  int rte_idx{0};
1694  const auto simple_qual = bin_oper->normalize_simple_predicate(rte_idx);
1695  return simple_qual ? QualsConjunctiveForm{{simple_qual}, {}}
1696  : QualsConjunctiveForm{{}, {qual_expr}};
1697 }
1698 
1699 std::vector<std::shared_ptr<Analyzer::Expr>> qual_to_disjunctive_form(
1700  const std::shared_ptr<Analyzer::Expr>& qual_expr) {
1701  CHECK(qual_expr);
1702  const auto bin_oper = std::dynamic_pointer_cast<const Analyzer::BinOper>(qual_expr);
1703  if (!bin_oper) {
1704  const auto rewritten_qual_expr = rewrite_expr(qual_expr.get());
1705  return {rewritten_qual_expr ? rewritten_qual_expr : qual_expr};
1706  }
1707  if (bin_oper->get_optype() == kOR) {
1708  const auto lhs_df = qual_to_disjunctive_form(bin_oper->get_own_left_operand());
1709  const auto rhs_df = qual_to_disjunctive_form(bin_oper->get_own_right_operand());
1710  auto quals = lhs_df;
1711  quals.insert(quals.end(), rhs_df.begin(), rhs_df.end());
1712  return quals;
1713  }
1714  return {qual_expr};
1715 }
1716 
1717 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateHPTLiteral(
1718  const RexFunctionOperator* rex_function) const {
1719  /* since calcite uses Avatica package called DateTimeUtils to parse timestamp strings.
1720  Therefore any string having fractional seconds more 3 places after the decimal
1721  (milliseconds) will get truncated to 3 decimal places, therefore we lose precision
1722  (us|ns). Issue: [BE-2461] Here we are hijacking literal cast to Timestamp(6|9) from
1723  calcite and translating them to generate our own casts.
1724  */
1725  CHECK_EQ(size_t(1), rex_function->size());
1726  const auto operand = translateScalarRex(rex_function->getOperand(0));
1727  const auto& operand_ti = operand->get_type_info();
1728  const auto& target_ti = rex_function->getType();
1729  if (!operand_ti.is_string()) {
1730  throw std::runtime_error(
1731  "High precision timestamp cast argument must be a string. Input type is: " +
1732  operand_ti.get_type_name());
1733  } else if (!target_ti.is_high_precision_timestamp()) {
1734  throw std::runtime_error(
1735  "Cast target type should be high precision timestamp. Input type is: " +
1736  target_ti.get_type_name());
1737  } else if (target_ti.get_dimension() != 6 && target_ti.get_dimension() != 9) {
1738  throw std::runtime_error(
1739  "Cast target type should be TIMESTAMP(6|9). Input type is: TIMESTAMP(" +
1740  std::to_string(target_ti.get_dimension()) + ")");
1741  } else {
1742  return operand->add_cast(target_ti);
1743  }
1744 }
Definition: sqldefs.h:69
bool is_boolean() const
Definition: sqltypes.h:484
const RexScalar * getThen(const size_t idx) const
const std::vector< JoinType > join_types_
SQLAgg
Definition: sqldefs.h:71
#define CHECK_EQ(x, y)
Definition: Logger.h:201
auto func_resolve
std::shared_ptr< Analyzer::Expr > translateOffsetInFragment() const
SQLAgg getKind() const
Definition: sqltypes.h:52
std::shared_ptr< Analyzer::Expr > translateRegexp(const RexFunctionOperator *) const
bool supported_lower_bound(const RexWindowFunctionOperator::RexWindowBound &window_bound)
SQLTypes
Definition: sqltypes.h:41
bool g_cluster
size_t getOperand(size_t idx) const
const Executor * executor_
std::shared_ptr< Analyzer::Expr > translateUnlikely(const RexFunctionOperator *) const
const RexScalar * getElse() const
constexpr int64_t get_dateadd_timestamp_precision_scale(const DateaddField field)
Definition: DateTimeUtils.h:67
SQLQualifier
Definition: sqldefs.h:69
static std::shared_ptr< Analyzer::Expr > analyzeValue(const int64_t intval)
Definition: ParserNode.cpp:112
std::shared_ptr< Analyzer::Expr > translateFunction(const RexFunctionOperator *) const
SQLTypeInfo get_nullable_logical_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:884
#define LOG(tag)
Definition: Logger.h:188
std::shared_ptr< Analyzer::Expr > translateScalarRex(const RexScalar *rex) const
const SQLTypeInfo & getType() const
bool boolval
Definition: sqltypes.h:125
size_t size() const
const RexScalar * getOperand(const size_t idx) const
std::shared_ptr< Analyzer::Expr > translateUoper(const RexOperator *) const
const std::vector< SortField > & getCollation() const
SQLOps
Definition: sqldefs.h:29
ExtensionFunction bind_function(std::string name, Analyzer::ExpressionPtrVector func_args, const std::vector< ExtensionFunction > &ext_funcs)
HOST DEVICE int get_scale() const
Definition: sqltypes.h:331
std::shared_ptr< Analyzer::Expr > translateDateadd(const RexFunctionOperator *) const
Definition: sqldefs.h:38
std::shared_ptr< Analyzer::Expr > translateNow() const
#define CHECK_GE(x, y)
Definition: Logger.h:206
std::shared_ptr< Analyzer::Expr > translateAbs(const RexFunctionOperator *) const
Definition: sqldefs.h:49
Definition: sqldefs.h:30
const RexScalar * getWhen(const size_t idx) const
std::shared_ptr< Analyzer::Expr > ExpressionPtr
Definition: Analyzer.h:180
std::string getString(int32_t string_id) const
std::shared_ptr< Analyzer::Expr > getInIntegerSetExpr(std::shared_ptr< Analyzer::Expr > arg, const ResultSet &val_set) const
Definition: sqldefs.h:41
SQLTypeInfo get_agg_type(const SQLAgg agg_kind, const Analyzer::Expr *arg_expr)
std::shared_ptr< Analyzer::Expr > translateItem(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Constant > makeNumericConstant(const SQLTypeInfo &ti, const long val)
Analyzer::ExpressionPtr rewrite_expr(const Analyzer::Expr *expr)
QualsConjunctiveForm qual_to_conjunctive_form(const std::shared_ptr< Analyzer::Expr > qual_expr)
void set_scale(int s)
Definition: sqltypes.h:421
#define CHECK_GT(x, y)
Definition: Logger.h:205
bool is_agg_supported_for_type(const SQLAgg &agg_kind, const SQLTypeInfo &arg_ti)
double inline_fp_null_val(const SQL_TYPE_INFO &ti)
std::shared_ptr< Analyzer::Expr > translateOper(const RexOperator *) const
std::string to_string(char const *&&v)
void set_notnull(bool n)
Definition: sqltypes.h:423
std::shared_ptr< Analyzer::Expr > translateDatediff(const RexFunctionOperator *) const
QueryFeatureDescriptor & feature_stash_
std::shared_ptr< Analyzer::Expr > translateInput(const RexInput *) const
bool g_enable_watchdog
bool is_number() const
Definition: sqltypes.h:482
std::shared_ptr< Analyzer::Expr > translateSign(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Expr > translateUnaryGeoFunction(const RexFunctionOperator *) const
static std::shared_ptr< Analyzer::Expr > normalize(const std::list< std::pair< std::shared_ptr< Analyzer::Expr >, std::shared_ptr< Analyzer::Expr >>> &, const std::shared_ptr< Analyzer::Expr >)
Definition: ParserNode.cpp:905
std::shared_ptr< Analyzer::Expr > translateGeoOverlapsOper(const RexOperator *) const
Definition: sqldefs.h:73
std::shared_ptr< Analyzer::Expr > translateLower(const RexFunctionOperator *) const
ExtractField to_datepart_field(const std::string &field)
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:31
void set_precision(int d)
Definition: sqltypes.h:419
unsigned getIndex() const
static std::shared_ptr< Analyzer::Expr > translateLiteral(const RexLiteral *)
SQLOps getOperator() const
bool window_function_is_value(const SqlWindowFunctionKind kind)
Definition: WindowContext.h:27
static constexpr int32_t INVALID_STR_ID
CHECK(cgen_state)
std::shared_ptr< Analyzer::Expr > translateDatetime(const RexFunctionOperator *) const
bool is_time() const
Definition: sqltypes.h:483
const std::pair< SQLOps, int64_t > get_dateadd_high_precision_adjusted_scale(const DateaddField field, int32_t dimen)
void fill_dictionary_encoded_in_vals(std::vector< int64_t > &in_vals, std::atomic< size_t > &total_in_vals_count, const ResultSet *values_rowset, const std::pair< int64_t, int64_t > values_rowset_slice, const StringDictionaryProxy *source_dict, const StringDictionaryProxy *dest_dict, const int64_t needle_null_val)
#define CHECK_NE(x, y)
Definition: Logger.h:202
const std::shared_ptr< Analyzer::Expr > generate() const
static std::shared_ptr< Analyzer::Expr > get(std::shared_ptr< Analyzer::Expr > arg_expr, std::shared_ptr< Analyzer::Expr > pattern_expr, std::shared_ptr< Analyzer::Expr > escape_expr, const bool is_not)
Definition: ParserNode.cpp:625
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:326
int64_t bigintval
Definition: sqltypes.h:129
std::shared_ptr< const RexScalar > offset
std::vector< Analyzer::OrderEntry > translate_collation(const std::vector< SortField > &sort_fields)
size_t branchCount() const
Definition: sqldefs.h:37
Definition: sqldefs.h:75
Definition: sqldefs.h:69
int getDatabaseId() const
Definition: Catalog.h:192
int16_t smallintval
Definition: sqltypes.h:127
SQLTypeInfo build_type_info(const SQLTypes sql_type, const int scale, const int precision)
static std::shared_ptr< Analyzer::Expr > analyzeValue(const std::string &)
Definition: ParserNode.cpp:97
DatetruncField to_datediff_field(const std::string &field)
void translate_string_ids(std::vector< int32_t > &dest_ids, const LeafHostInfo &dict_server_host, const DictRef dest_dict_ref, const std::vector< int32_t > &source_ids, const DictRef source_dict_ref, const int32_t dest_generation)
std::shared_ptr< Analyzer::Expr > translateGeoConstructor(const RexFunctionOperator *) const
const std::vector< LeafHostInfo > & getStringDictionaryHosts() const
Definition: Catalog.cpp:1413
SQLTypeInfoCore< ArrayContextTypeSizer, ExecutorTypePackaging, DateTimeFacilities > SQLTypeInfo
Definition: sqltypes.h:852
const ColumnDescriptor * getMetadataForColumnBySpi(const int tableId, const size_t spi) const
Definition: Catalog.cpp:1464
const std::unordered_map< const RelAlgNode *, int > input_to_nest_level_
#define UNLIKELY(x)
Definition: likely.h:20
static std::shared_ptr< Analyzer::Expr > normalize(const SQLOps optype, const SQLQualifier qual, std::shared_ptr< Analyzer::Expr > left_expr, std::shared_ptr< Analyzer::Expr > right_expr)
Definition: ParserNode.cpp:261
std::pair< std::shared_ptr< Analyzer::Expr >, SQLQualifier > get_quantified_rhs(const RexScalar *rex_scalar, const RelAlgTranslator &translator)
Definition: sqldefs.h:34
#define CHECK_LT(x, y)
Definition: Logger.h:203
Definition: sqltypes.h:55
Definition: sqltypes.h:56
Analyzer::ExpressionPtrVector translateFunctionArgs(const RexFunctionOperator *) const
Definition: sqldefs.h:40
Definition: sqldefs.h:69
const ConstRexScalarPtrVector & getPartitionKeys() const
#define TRANSIENT_DICT_ID
Definition: sqltypes.h:189
const RexWindowBound & getLowerBound() const
#define CHECK_LE(x, y)
Definition: Logger.h:204
std::shared_ptr< Analyzer::Expr > translateOverlapsOper(const RexOperator *) const
bool is_null(const T &v, const SQLTypeInfo &t)
std::shared_ptr< Analyzer::Expr > translateArrayFunction(const RexFunctionOperator *) const
static std::shared_ptr< Analyzer::Expr > get(std::shared_ptr< Analyzer::Expr > arg_expr, std::shared_ptr< Analyzer::Expr > like_expr, std::shared_ptr< Analyzer::Expr > escape_expr, const bool is_ilike, const bool is_not)
Definition: ParserNode.cpp:530
bool g_enable_experimental_string_functions
SqlWindowFunctionKind getKind() const
std::shared_ptr< Analyzer::Expr > translateLike(const RexFunctionOperator *) const
const RelAlgNode * getSourceNode() const
Definition: sqltypes.h:44
bool supported_upper_bound(const RexWindowFunctionOperator *rex_window_function)
bool takes_arg(const TargetInfo &target_info)
static std::shared_ptr< Analyzer::Expr > analyzeValue(const int64_t numericval, const int scale, const int precision)
Definition: ParserNode.cpp:137
std::shared_ptr< Analyzer::Expr > translateLikely(const RexFunctionOperator *) const
const RexWindowBound & getUpperBound() const
Definition: sqldefs.h:53
constexpr bool is_subsecond_dateadd_field(const DateaddField field)
Definition: DateTimeUtils.h:99
std::shared_ptr< Analyzer::Expr > get_in_values_expr(std::shared_ptr< Analyzer::Expr > arg, const ResultSet &val_set)
static std::shared_ptr< Analyzer::Expr > get(const int64_t)
Definition: ParserNode.cpp:173
bool can_use_parallel_algorithms(const ResultSet &rows)
Definition: ResultSet.cpp:870
bool isDistinct() const
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:327
std::shared_ptr< Analyzer::Expr > translateTernaryGeoFunction(const RexFunctionOperator *) const
const ConstRexScalarPtrVector & getOrderKeys() const
std::vector< std::shared_ptr< Analyzer::Expr > > qual_to_disjunctive_form(const std::shared_ptr< Analyzer::Expr > &qual_expr)
std::shared_ptr< Analyzer::Expr > translateBinaryGeoFunction(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Constant > make_fp_constant(const int64_t val, const SQLTypeInfo &ti)
std::pair< Datum, bool > datum_from_scalar_tv(const ScalarTargetValue *scalar_tv, const SQLTypeInfo &ti) noexcept
std::shared_ptr< Analyzer::Expr > translateInOper(const RexOperator *) const
uint64_t exp_to_scale(const unsigned exp)
size_t size() const
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
std::vector< ExpressionPtr > ExpressionPtrVector
Definition: Analyzer.h:182
Definition: sqldefs.h:33
std::shared_ptr< Analyzer::Expr > translateCase(const RexCase *) const
std::shared_ptr< Analyzer::Expr > translateFunctionWithGeoArg(const RexFunctionOperator *) const
Definition: sqltypes.h:48
const std::string & getName() const
std::string get_datetimeplus_rewrite_funcname(const SQLOps &op)
void validate_datetime_datepart_argument(const std::shared_ptr< Analyzer::Constant > literal_expr)
bool is_decimal() const
Definition: sqltypes.h:480
std::shared_ptr< Analyzer::Expr > translateCardinality(const RexFunctionOperator *) const
Definition: sqldefs.h:74
int cpu_threads()
Definition: thread_count.h:25
const bool just_explain_
const std::vector< TargetMetaInfo > & getOutputMetainfo() const
std::shared_ptr< Analyzer::Expr > translateGeoComparison(const RexOperator *) const
std::shared_ptr< Analyzer::Expr > translateDatePlusMinus(const RexOperator *) const
const std::list< std::shared_ptr< Analyzer::Expr > > simple_quals
std::shared_ptr< Analyzer::Expr > translateHPTLiteral(const RexFunctionOperator *) const
Definition: sqldefs.h:72
bool is_distinct(const size_t input_idx, const RelAlgNode *node)
int32_t getIdOfString(const std::string &str) const
std::shared_ptr< Analyzer::Expr > translateDatepart(const RexFunctionOperator *) const
Definition: sqldefs.h:39
std::shared_ptr< Analyzer::Expr > rewrite_to_date_trunc(const Analyzer::FunctionOper *dt_plus)
SQLTypeInfo ext_arg_type_to_type_info(const ExtArgumentType ext_arg_type)
DateaddField to_dateadd_field(const std::string &field)
std::shared_ptr< Analyzer::Expr > fold_expr(const Analyzer::Expr *expr)
#define IS_COMPARISON(X)
Definition: sqldefs.h:57
double doubleval
Definition: sqltypes.h:131
void fill_integer_in_vals(std::vector< int64_t > &in_vals, std::atomic< size_t > &total_in_vals_count, const ResultSet *values_rowset, const std::pair< int64_t, int64_t > values_rowset_slice)
const Catalog_Namespace::Catalog & cat_
std::shared_ptr< Analyzer::Expr > translateKeyForString(const RexFunctionOperator *) const
static std::shared_ptr< Analyzer::Expr > translateAggregateRex(const RexAgg *rex, const std::vector< std::shared_ptr< Analyzer::Expr >> &scalar_sources)
std::shared_ptr< Analyzer::Expr > translateWindowFunction(const RexWindowFunctionOperator *) const
const std::shared_ptr< Analyzer::Expr > generate() const
std::shared_ptr< Analyzer::Expr > translateScalarSubquery(const RexSubQuery *) const
boost::variant< int64_t, double, float, NullableString > ScalarTargetValue
Definition: TargetValue.h:156
std::shared_ptr< Analyzer::Expr > translateLength(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Expr > translateExtract(const RexFunctionOperator *) const