OmniSciDB  b24e664e58
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
RelAlgTranslator.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "RelAlgTranslator.h"
18 #include "Shared/SqlTypesLayout.h"
19 
21 #include "DateTimePlusRewrite.h"
22 #include "DateTimeTranslator.h"
24 #include "ExpressionRewrite.h"
28 #include "WindowContext.h"
29 
30 #include <future>
31 
32 #include "../Analyzer/Analyzer.h"
33 #include "../Parser/ParserNode.h"
34 #include "../Shared/likely.h"
35 #include "../Shared/sql_type_to_string.h"
36 #include "../Shared/thread_count.h"
37 
38 extern bool g_enable_watchdog;
39 
41 
42 namespace {
43 
45  const int scale,
46  const int precision) {
47  SQLTypeInfo ti(sql_type, 0, 0, true);
48  ti.set_scale(scale);
49  ti.set_precision(precision);
50  return ti;
51 }
52 
53 std::pair<std::shared_ptr<Analyzer::Expr>, SQLQualifier> get_quantified_rhs(
54  const RexScalar* rex_scalar,
55  const RelAlgTranslator& translator) {
56  std::shared_ptr<Analyzer::Expr> rhs;
57  SQLQualifier sql_qual{kONE};
58  const auto rex_operator = dynamic_cast<const RexOperator*>(rex_scalar);
59  if (!rex_operator) {
60  return std::make_pair(rhs, sql_qual);
61  }
62  const auto rex_function = dynamic_cast<const RexFunctionOperator*>(rex_operator);
63  const auto qual_str = rex_function ? rex_function->getName() : "";
64  if (qual_str == "PG_ANY"sv || qual_str == "PG_ALL"sv) {
65  CHECK_EQ(size_t(1), rex_function->size());
66  rhs = translator.translateScalarRex(rex_function->getOperand(0));
67  sql_qual = (qual_str == "PG_ANY"sv) ? kANY : kALL;
68  }
69  if (!rhs && rex_operator->getOperator() == kCAST) {
70  CHECK_EQ(size_t(1), rex_operator->size());
71  std::tie(rhs, sql_qual) = get_quantified_rhs(rex_operator->getOperand(0), translator);
72  }
73  return std::make_pair(rhs, sql_qual);
74 }
75 
76 std::pair<Datum, bool> datum_from_scalar_tv(const ScalarTargetValue* scalar_tv,
77  const SQLTypeInfo& ti) noexcept {
78  Datum d{0};
79  bool is_null_const{false};
80  switch (ti.get_type()) {
81  case kTINYINT: {
82  const auto ival = boost::get<int64_t>(scalar_tv);
83  CHECK(ival);
84  if (*ival == inline_int_null_val(ti)) {
85  is_null_const = true;
86  } else {
87  d.tinyintval = *ival;
88  }
89  break;
90  }
91  case kSMALLINT: {
92  const auto ival = boost::get<int64_t>(scalar_tv);
93  CHECK(ival);
94  if (*ival == inline_int_null_val(ti)) {
95  is_null_const = true;
96  } else {
97  d.smallintval = *ival;
98  }
99  break;
100  }
101  case kINT: {
102  const auto ival = boost::get<int64_t>(scalar_tv);
103  CHECK(ival);
104  if (*ival == inline_int_null_val(ti)) {
105  is_null_const = true;
106  } else {
107  d.intval = *ival;
108  }
109  break;
110  }
111  case kDECIMAL:
112  case kNUMERIC:
113  case kBIGINT:
114  case kDATE:
115  case kTIME:
116  case kTIMESTAMP: {
117  const auto ival = boost::get<int64_t>(scalar_tv);
118  CHECK(ival);
119  if (*ival == inline_int_null_val(ti)) {
120  is_null_const = true;
121  } else {
122  d.bigintval = *ival;
123  }
124  break;
125  }
126  case kDOUBLE: {
127  const auto dval = boost::get<double>(scalar_tv);
128  CHECK(dval);
129  if (*dval == inline_fp_null_val(ti)) {
130  is_null_const = true;
131  } else {
132  d.doubleval = *dval;
133  }
134  break;
135  }
136  case kFLOAT: {
137  const auto fval = boost::get<float>(scalar_tv);
138  CHECK(fval);
139  if (*fval == inline_fp_null_val(ti)) {
140  is_null_const = true;
141  } else {
142  d.floatval = *fval;
143  }
144  break;
145  }
146  case kTEXT:
147  case kVARCHAR:
148  case kCHAR: {
149  auto nullable_sptr = boost::get<NullableString>(scalar_tv);
150  CHECK(nullable_sptr);
151  if (boost::get<void*>(nullable_sptr)) {
152  is_null_const = true;
153  } else {
154  auto sptr = boost::get<std::string>(nullable_sptr);
155  d.stringval = new std::string(*sptr);
156  }
157  break;
158  }
159  default:
160  CHECK(false);
161  }
162  return {d, is_null_const};
163 }
164 
165 } // namespace
166 
167 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateScalarRex(
168  const RexScalar* rex) const {
169  const auto rex_input = dynamic_cast<const RexInput*>(rex);
170  if (rex_input) {
171  return translateInput(rex_input);
172  }
173  const auto rex_literal = dynamic_cast<const RexLiteral*>(rex);
174  if (rex_literal) {
175  return translateLiteral(rex_literal);
176  }
177  const auto rex_window_function = dynamic_cast<const RexWindowFunctionOperator*>(rex);
178  if (rex_window_function) {
179  return translateWindowFunction(rex_window_function);
180  }
181  const auto rex_function = dynamic_cast<const RexFunctionOperator*>(rex);
182  if (rex_function) {
183  return translateFunction(rex_function);
184  }
185  const auto rex_operator = dynamic_cast<const RexOperator*>(rex);
186  if (rex_operator) {
187  return translateOper(rex_operator);
188  }
189  const auto rex_case = dynamic_cast<const RexCase*>(rex);
190  if (rex_case) {
191  return translateCase(rex_case);
192  }
193  const auto rex_subquery = dynamic_cast<const RexSubQuery*>(rex);
194  if (rex_subquery) {
195  return translateScalarSubquery(rex_subquery);
196  }
197  CHECK(false);
198  return nullptr;
199 }
200 
201 namespace {
202 
203 bool is_agg_supported_for_type(const SQLAgg& agg_kind, const SQLTypeInfo& arg_ti) {
204  if ((agg_kind == kMIN || agg_kind == kMAX || agg_kind == kSUM || agg_kind == kAVG) &&
205  !(arg_ti.is_number() || arg_ti.is_boolean() || arg_ti.is_time())) {
206  return false;
207  }
208 
209  return true;
210 }
211 
212 } // namespace
213 
214 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateAggregateRex(
215  const RexAgg* rex,
216  const std::vector<std::shared_ptr<Analyzer::Expr>>& scalar_sources) {
217  const auto agg_kind = rex->getKind();
218  const bool is_distinct = rex->isDistinct();
219  const bool takes_arg{rex->size() > 0};
220  std::shared_ptr<Analyzer::Expr> arg_expr;
221  std::shared_ptr<Analyzer::Constant> err_rate;
222  if (takes_arg) {
223  const auto operand = rex->getOperand(0);
224  CHECK_LT(operand, scalar_sources.size());
225  CHECK_LE(rex->size(), 2u);
226  arg_expr = scalar_sources[operand];
227  if (agg_kind == kAPPROX_COUNT_DISTINCT && rex->size() == 2) {
228  err_rate = std::dynamic_pointer_cast<Analyzer::Constant>(
229  scalar_sources[rex->getOperand(1)]);
230  if (!err_rate || err_rate->get_type_info().get_type() != kINT ||
231  err_rate->get_constval().intval < 1 || err_rate->get_constval().intval > 100) {
232  throw std::runtime_error(
233  "APPROX_COUNT_DISTINCT's second parameter should be SMALLINT literal between "
234  "1 and 100");
235  }
236  }
237  const auto& arg_ti = arg_expr->get_type_info();
238  if (!is_agg_supported_for_type(agg_kind, arg_ti)) {
239  throw std::runtime_error("Aggregate on " + arg_ti.get_type_name() +
240  " is not supported yet.");
241  }
242  }
243  const auto agg_ti = get_agg_type(agg_kind, arg_expr.get());
244  return makeExpr<Analyzer::AggExpr>(agg_ti, agg_kind, arg_expr, is_distinct, err_rate);
245 }
246 
247 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateLiteral(
248  const RexLiteral* rex_literal) {
249  const auto lit_ti = build_type_info(
250  rex_literal->getType(), rex_literal->getScale(), rex_literal->getPrecision());
251  const auto target_ti = build_type_info(rex_literal->getTargetType(),
252  rex_literal->getTypeScale(),
253  rex_literal->getTypePrecision());
254  switch (rex_literal->getType()) {
255  case kDECIMAL: {
256  const auto val = rex_literal->getVal<int64_t>();
257  const int precision = rex_literal->getPrecision();
258  const int scale = rex_literal->getScale();
259  if (target_ti.is_fp() && !scale) {
260  return make_fp_constant(val, target_ti);
261  }
262  auto lit_expr = scale ? Parser::FixedPtLiteral::analyzeValue(val, scale, precision)
264  return lit_ti != target_ti ? lit_expr->add_cast(target_ti) : lit_expr;
265  }
266  case kTEXT: {
267  return Parser::StringLiteral::analyzeValue(rex_literal->getVal<std::string>());
268  }
269  case kBOOLEAN: {
270  Datum d;
271  d.boolval = rex_literal->getVal<bool>();
272  return makeExpr<Analyzer::Constant>(kBOOLEAN, false, d);
273  }
274  case kDOUBLE: {
275  Datum d;
276  d.doubleval = rex_literal->getVal<double>();
277  auto lit_expr = makeExpr<Analyzer::Constant>(kDOUBLE, false, d);
278  return lit_ti != target_ti ? lit_expr->add_cast(target_ti) : lit_expr;
279  }
280  case kINTERVAL_DAY_TIME:
281  case kINTERVAL_YEAR_MONTH: {
282  Datum d;
283  d.bigintval = rex_literal->getVal<int64_t>();
284  return makeExpr<Analyzer::Constant>(rex_literal->getType(), false, d);
285  }
286  case kTIME:
287  case kTIMESTAMP: {
288  Datum d;
289  d.bigintval =
290  rex_literal->getType() == kTIMESTAMP && rex_literal->getPrecision() > 0
291  ? rex_literal->getVal<int64_t>()
292  : rex_literal->getVal<int64_t>() / 1000;
293  return makeExpr<Analyzer::Constant>(
294  SQLTypeInfo(rex_literal->getType(), rex_literal->getPrecision(), 0, false),
295  false,
296  d);
297  }
298  case kDATE: {
299  Datum d;
300  d.bigintval = rex_literal->getVal<int64_t>() * 24 * 3600;
301  return makeExpr<Analyzer::Constant>(rex_literal->getType(), false, d);
302  }
303  case kNULLT: {
304  return makeExpr<Analyzer::Constant>(rex_literal->getTargetType(), true, Datum{0});
305  }
306  default: {
307  LOG(FATAL) << "Unexpected literal type " << lit_ti.get_type_name();
308  }
309  }
310  return nullptr;
311 }
312 
313 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateScalarSubquery(
314  const RexSubQuery* rex_subquery) const {
315  if (just_explain_) {
316  throw std::runtime_error("EXPLAIN is not supported with sub-queries");
317  }
318  CHECK(rex_subquery);
319  auto result = rex_subquery->getExecutionResult();
320  auto row_set = result->getRows();
321  if (row_set->rowCount() > size_t(1)) {
322  throw std::runtime_error("Scalar sub-query returned multiple rows");
323  }
324  if (row_set->rowCount() < size_t(1)) {
325  CHECK_EQ(row_set->rowCount(), size_t(0));
326  throw std::runtime_error("Scalar sub-query returned no results");
327  }
328  auto first_row = row_set->getNextRow(false, false);
329  auto scalar_tv = boost::get<ScalarTargetValue>(&first_row[0]);
330  auto ti = rex_subquery->getType();
331  if (ti.is_string()) {
332  throw std::runtime_error("Scalar sub-queries which return strings not supported");
333  }
334  Datum d{0};
335  bool is_null_const{false};
336  std::tie(d, is_null_const) = datum_from_scalar_tv(scalar_tv, ti);
337  return makeExpr<Analyzer::Constant>(ti, is_null_const, d);
338 }
339 
340 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateInput(
341  const RexInput* rex_input) const {
342  const auto source = rex_input->getSourceNode();
343  const auto it_rte_idx = input_to_nest_level_.find(source);
344  CHECK(it_rte_idx != input_to_nest_level_.end());
345  const int rte_idx = it_rte_idx->second;
346  const auto scan_source = dynamic_cast<const RelScan*>(source);
347  const auto& in_metainfo = source->getOutputMetainfo();
348  if (scan_source) {
349  // We're at leaf (scan) level and not supposed to have input metadata,
350  // the name and type information come directly from the catalog.
351  CHECK(in_metainfo.empty());
352  const auto table_desc = scan_source->getTableDescriptor();
353  const auto cd =
354  cat_.getMetadataForColumnBySpi(table_desc->tableId, rex_input->getIndex() + 1);
355  CHECK(cd);
356  auto col_ti = cd->columnType;
357  if (col_ti.is_string()) {
358  col_ti.set_type(kTEXT);
359  }
360  if (cd->isVirtualCol) {
361  // TODO(alex): remove at some point, we only need this fixup for backwards
362  // compatibility with old imported data
363  CHECK_EQ("rowid", cd->columnName);
364  col_ti.set_size(8);
365  }
366  CHECK_LE(static_cast<size_t>(rte_idx), join_types_.size());
367  if (rte_idx > 0 && join_types_[rte_idx - 1] == JoinType::LEFT) {
368  col_ti.set_notnull(false);
369  }
370  return std::make_shared<Analyzer::ColumnVar>(
371  col_ti, table_desc->tableId, cd->columnId, rte_idx);
372  }
373  CHECK(!in_metainfo.empty());
374  CHECK_GE(rte_idx, 0);
375  const size_t col_id = rex_input->getIndex();
376  CHECK_LT(col_id, in_metainfo.size());
377  auto col_ti = in_metainfo[col_id].get_type_info();
378  CHECK_LE(static_cast<size_t>(rte_idx), join_types_.size());
379  if (rte_idx > 0 && join_types_[rte_idx - 1] == JoinType::LEFT) {
380  col_ti.set_notnull(false);
381  }
382  return std::make_shared<Analyzer::ColumnVar>(col_ti, -source->getId(), col_id, rte_idx);
383 }
384 
385 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateUoper(
386  const RexOperator* rex_operator) const {
387  CHECK_EQ(size_t(1), rex_operator->size());
388  const auto operand_expr = translateScalarRex(rex_operator->getOperand(0));
389  const auto sql_op = rex_operator->getOperator();
390  switch (sql_op) {
391  case kCAST: {
392  const auto& target_ti = rex_operator->getType();
393  CHECK_NE(kNULLT, target_ti.get_type());
394  const auto& operand_ti = operand_expr->get_type_info();
395  if (operand_ti.is_string() && target_ti.is_string()) {
396  return operand_expr;
397  }
398  if (target_ti.is_time() ||
399  operand_ti
400  .is_string()) { // TODO(alex): check and unify with the rest of the cases
401  // Do not propogate encoding on small dates
402  return target_ti.is_date_in_days()
403  ? operand_expr->add_cast(SQLTypeInfo(kDATE, false))
404  : operand_expr->add_cast(target_ti);
405  }
406  if (!operand_ti.is_string() && target_ti.is_string()) {
407  return operand_expr->add_cast(target_ti);
408  }
409 
410  return std::make_shared<Analyzer::UOper>(target_ti, false, sql_op, operand_expr);
411  }
412  case kNOT:
413  case kISNULL: {
414  return std::make_shared<Analyzer::UOper>(kBOOLEAN, sql_op, operand_expr);
415  }
416  case kISNOTNULL: {
417  auto is_null = std::make_shared<Analyzer::UOper>(kBOOLEAN, kISNULL, operand_expr);
418  return std::make_shared<Analyzer::UOper>(kBOOLEAN, kNOT, is_null);
419  }
420  case kMINUS: {
421  const auto& ti = operand_expr->get_type_info();
422  return std::make_shared<Analyzer::UOper>(ti, false, kUMINUS, operand_expr);
423  }
424  case kUNNEST: {
425  const auto& ti = operand_expr->get_type_info();
426  CHECK(ti.is_array());
427  return makeExpr<Analyzer::UOper>(ti.get_elem_type(), false, kUNNEST, operand_expr);
428  }
429  default:
430  CHECK(false);
431  }
432  return nullptr;
433 }
434 
435 namespace {
436 
437 std::shared_ptr<Analyzer::Expr> get_in_values_expr(std::shared_ptr<Analyzer::Expr> arg,
438  const ResultSet& val_set) {
439  if (!can_use_parallel_algorithms(val_set)) {
440  return nullptr;
441  }
442  if (val_set.rowCount() > 5000000 && g_enable_watchdog) {
443  throw std::runtime_error(
444  "Unable to handle 'expr IN (subquery)', subquery returned 5M+ rows.");
445  }
446  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
447  const size_t fetcher_count = cpu_threads();
448  std::vector<std::list<std::shared_ptr<Analyzer::Expr>>> expr_set(
449  fetcher_count, std::list<std::shared_ptr<Analyzer::Expr>>());
450  std::vector<std::future<void>> fetcher_threads;
451  const auto& ti = arg->get_type_info();
452  const auto entry_count = val_set.entryCount();
453  for (size_t i = 0,
454  start_entry = 0,
455  stride = (entry_count + fetcher_count - 1) / fetcher_count;
456  i < fetcher_count && start_entry < entry_count;
457  ++i, start_entry += stride) {
458  const auto end_entry = std::min(start_entry + stride, entry_count);
459  fetcher_threads.push_back(std::async(
460  std::launch::async,
461  [&](std::list<std::shared_ptr<Analyzer::Expr>>& in_vals,
462  const size_t start,
463  const size_t end) {
464  for (auto index = start; index < end; ++index) {
465  auto row = val_set.getRowAt(index);
466  if (row.empty()) {
467  continue;
468  }
469  auto scalar_tv = boost::get<ScalarTargetValue>(&row[0]);
470  Datum d{0};
471  bool is_null_const{false};
472  std::tie(d, is_null_const) = datum_from_scalar_tv(scalar_tv, ti);
473  if (ti.is_string() && ti.get_compression() != kENCODING_NONE) {
474  auto ti_none_encoded = ti;
475  ti_none_encoded.set_compression(kENCODING_NONE);
476  auto none_encoded_string =
477  makeExpr<Analyzer::Constant>(ti, is_null_const, d);
478  auto dict_encoded_string = std::make_shared<Analyzer::UOper>(
479  ti, false, kCAST, none_encoded_string);
480  in_vals.push_back(dict_encoded_string);
481  } else {
482  in_vals.push_back(makeExpr<Analyzer::Constant>(ti, is_null_const, d));
483  }
484  }
485  },
486  std::ref(expr_set[i]),
487  start_entry,
488  end_entry));
489  }
490  for (auto& child : fetcher_threads) {
491  child.get();
492  }
493 
494  val_set.moveToBegin();
495  for (auto& exprs : expr_set) {
496  value_exprs.splice(value_exprs.end(), exprs);
497  }
498  return makeExpr<Analyzer::InValues>(arg, value_exprs);
499 }
500 
501 } // namespace
502 
503 // Creates an Analyzer expression for an IN subquery which subsequently goes through the
504 // regular Executor::codegen() mechanism. The creation of the expression out of subquery's
505 // result set is parallelized whenever possible. In addition, take advantage of additional
506 // information that elements in the right hand side are constants; see
507 // getInIntegerSetExpr().
508 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateInOper(
509  const RexOperator* rex_operator) const {
510  if (just_explain_) {
511  throw std::runtime_error("EXPLAIN is not supported with sub-queries");
512  }
513  CHECK(rex_operator->size() == 2);
514  const auto lhs = translateScalarRex(rex_operator->getOperand(0));
515  const auto rhs = rex_operator->getOperand(1);
516  const auto rex_subquery = dynamic_cast<const RexSubQuery*>(rhs);
517  CHECK(rex_subquery);
518  auto ti = lhs->get_type_info();
519  auto result = rex_subquery->getExecutionResult();
520  auto& row_set = result->getRows();
521  CHECK_EQ(size_t(1), row_set->colCount());
522  const auto& rhs_ti = row_set->getColType(0);
523  if (rhs_ti.get_type() != ti.get_type()) {
524  throw std::runtime_error(
525  "The two sides of the IN operator must have the same type; found " +
526  ti.get_type_name() + " and " + rhs_ti.get_type_name());
527  }
528  row_set->moveToBegin();
529  if (row_set->entryCount() > 10000) {
530  std::shared_ptr<Analyzer::Expr> expr;
531  if ((ti.is_integer() || (ti.is_string() && ti.get_compression() == kENCODING_DICT)) &&
532  !row_set->getQueryMemDesc().didOutputColumnar()) {
533  expr = getInIntegerSetExpr(lhs, *row_set);
534  // Handle the highly unlikely case when the InIntegerSet ended up being tiny.
535  // Just let it fall through the usual InValues path at the end of this method,
536  // its codegen knows to use inline comparisons for few values.
537  if (expr && std::static_pointer_cast<Analyzer::InIntegerSet>(expr)
538  ->get_value_list()
539  .size() <= 100) {
540  expr = nullptr;
541  }
542  } else {
543  expr = get_in_values_expr(lhs, *row_set);
544  }
545  if (expr) {
546  return expr;
547  }
548  }
549  std::list<std::shared_ptr<Analyzer::Expr>> value_exprs;
550  while (true) {
551  auto row = row_set->getNextRow(true, false);
552  if (row.empty()) {
553  break;
554  }
555  if (g_enable_watchdog && value_exprs.size() >= 10000) {
556  throw std::runtime_error(
557  "Unable to handle 'expr IN (subquery)', subquery returned 10000+ rows.");
558  }
559  auto scalar_tv = boost::get<ScalarTargetValue>(&row[0]);
560  Datum d{0};
561  bool is_null_const{false};
562  std::tie(d, is_null_const) = datum_from_scalar_tv(scalar_tv, ti);
563  if (ti.is_string() && ti.get_compression() != kENCODING_NONE) {
564  auto ti_none_encoded = ti;
565  ti_none_encoded.set_compression(kENCODING_NONE);
566  auto none_encoded_string = makeExpr<Analyzer::Constant>(ti, is_null_const, d);
567  auto dict_encoded_string =
568  std::make_shared<Analyzer::UOper>(ti, false, kCAST, none_encoded_string);
569  value_exprs.push_back(dict_encoded_string);
570  } else {
571  value_exprs.push_back(makeExpr<Analyzer::Constant>(ti, is_null_const, d));
572  }
573  }
574  return makeExpr<Analyzer::InValues>(lhs, value_exprs);
575 }
576 
577 namespace {
578 
579 const size_t g_max_integer_set_size{1 << 25};
580 
582  std::vector<int64_t>& in_vals,
583  std::atomic<size_t>& total_in_vals_count,
584  const ResultSet* values_rowset,
585  const std::pair<int64_t, int64_t> values_rowset_slice,
586  const StringDictionaryProxy* source_dict,
587  const StringDictionaryProxy* dest_dict,
588  const int64_t needle_null_val) {
589  CHECK(in_vals.empty());
590  bool dicts_are_equal = source_dict == dest_dict;
591  for (auto index = values_rowset_slice.first; index < values_rowset_slice.second;
592  ++index) {
593  const auto row = values_rowset->getOneColRow(index);
594  if (UNLIKELY(!row.valid)) {
595  continue;
596  }
597  if (dicts_are_equal) {
598  in_vals.push_back(row.value);
599  } else {
600  const int string_id =
601  row.value == needle_null_val
602  ? needle_null_val
603  : dest_dict->getIdOfString(source_dict->getString(row.value));
604  if (string_id != StringDictionary::INVALID_STR_ID) {
605  in_vals.push_back(string_id);
606  }
607  }
608  if (UNLIKELY(g_enable_watchdog && (in_vals.size() & 1023) == 0 &&
609  total_in_vals_count.fetch_add(1024) >= g_max_integer_set_size)) {
610  throw std::runtime_error(
611  "Unable to handle 'expr IN (subquery)', subquery returned 30M+ rows.");
612  }
613  }
614 }
615 
616 void fill_integer_in_vals(std::vector<int64_t>& in_vals,
617  std::atomic<size_t>& total_in_vals_count,
618  const ResultSet* values_rowset,
619  const std::pair<int64_t, int64_t> values_rowset_slice) {
620  CHECK(in_vals.empty());
621  for (auto index = values_rowset_slice.first; index < values_rowset_slice.second;
622  ++index) {
623  const auto row = values_rowset->getOneColRow(index);
624  if (row.valid) {
625  in_vals.push_back(row.value);
626  if (UNLIKELY(g_enable_watchdog && (in_vals.size() & 1023) == 0 &&
627  total_in_vals_count.fetch_add(1024) >= g_max_integer_set_size)) {
628  throw std::runtime_error(
629  "Unable to handle 'expr IN (subquery)', subquery returned 30M+ rows.");
630  }
631  }
632  }
633 }
634 
635 // Multi-node counterpart of the other version. Saves round-trips, which is crucial
636 // for a big right-hand side result. It only handles physical string dictionary ids,
637 // therefore it won't be able to handle a right-hand side sub-query with a CASE
638 // returning literals on some branches. That case isn't hard too handle either, but
639 // it's not clear it's actually important in practice.
640 // RelAlgTranslator::getInIntegerSetExpr makes sure, by checking the encodings, that this
641 // function isn't called in such cases.
643  std::vector<int64_t>& in_vals,
644  std::atomic<size_t>& total_in_vals_count,
645  const ResultSet* values_rowset,
646  const std::pair<int64_t, int64_t> values_rowset_slice,
647  const std::vector<LeafHostInfo>& leaf_hosts,
648  const DictRef source_dict_ref,
649  const DictRef dest_dict_ref,
650  const int32_t dest_generation,
651  const int64_t needle_null_val) {
652  CHECK(in_vals.empty());
653  std::vector<int32_t> source_ids;
654  source_ids.reserve(values_rowset->entryCount());
655  bool has_nulls = false;
656  if (source_dict_ref == dest_dict_ref) {
657  in_vals.reserve(values_rowset_slice.second - values_rowset_slice.first +
658  1); // Add 1 to cover interval
659  for (auto index = values_rowset_slice.first; index < values_rowset_slice.second;
660  ++index) {
661  const auto row = values_rowset->getOneColRow(index);
662  if (!row.valid) {
663  continue;
664  }
665  if (row.value != needle_null_val) {
666  in_vals.push_back(row.value);
667  if (UNLIKELY(g_enable_watchdog && (in_vals.size() & 1023) == 0 &&
668  total_in_vals_count.fetch_add(1024) >= g_max_integer_set_size)) {
669  throw std::runtime_error(
670  "Unable to handle 'expr IN (subquery)', subquery returned 30M+ rows.");
671  }
672  } else {
673  has_nulls = true;
674  }
675  }
676  if (has_nulls) {
677  in_vals.push_back(
678  needle_null_val); // we've deduped null values as an optimization, although
679  // this is not required by consumer
680  }
681  return;
682  }
683  // Code path below is for when dictionaries are not shared
684  for (auto index = values_rowset_slice.first; index < values_rowset_slice.second;
685  ++index) {
686  const auto row = values_rowset->getOneColRow(index);
687  if (row.valid) {
688  if (row.value != needle_null_val) {
689  source_ids.push_back(row.value);
690  } else {
691  has_nulls = true;
692  }
693  }
694  }
695  std::vector<int32_t> dest_ids;
696  translate_string_ids(dest_ids,
697  leaf_hosts.front(),
698  dest_dict_ref,
699  source_ids,
700  source_dict_ref,
701  dest_generation);
702  CHECK_EQ(dest_ids.size(), source_ids.size());
703  in_vals.reserve(dest_ids.size() + (has_nulls ? 1 : 0));
704  if (has_nulls) {
705  in_vals.push_back(needle_null_val);
706  }
707  for (const int32_t dest_id : dest_ids) {
708  if (dest_id != StringDictionary::INVALID_STR_ID) {
709  in_vals.push_back(dest_id);
710  if (UNLIKELY(g_enable_watchdog && (in_vals.size() & 1023) == 0 &&
711  total_in_vals_count.fetch_add(1024) >= g_max_integer_set_size)) {
712  throw std::runtime_error(
713  "Unable to handle 'expr IN (subquery)', subquery returned 30M+ rows.");
714  }
715  }
716  }
717 }
718 
719 } // namespace
720 
721 // The typical IN subquery involves either dictionary-encoded strings or integers.
722 // Analyzer::InValues is a very heavy representation of the right hand side of such
723 // a query since we already know the right hand would be a list of Analyzer::Constant
724 // shared pointers. We can avoid the big overhead of each Analyzer::Constant and the
725 // refcounting associated with shared pointers by creating an abbreviated InIntegerSet
726 // representation of the IN expression which takes advantage of the this information.
727 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::getInIntegerSetExpr(
728  std::shared_ptr<Analyzer::Expr> arg,
729  const ResultSet& val_set) const {
730  if (!can_use_parallel_algorithms(val_set)) {
731  return nullptr;
732  }
733  std::vector<int64_t> value_exprs;
734  const size_t fetcher_count = cpu_threads();
735  std::vector<std::vector<int64_t>> expr_set(fetcher_count);
736  std::vector<std::future<void>> fetcher_threads;
737  const auto& arg_type = arg->get_type_info();
738  const auto entry_count = val_set.entryCount();
739  CHECK_EQ(size_t(1), val_set.colCount());
740  const auto& col_type = val_set.getColType(0);
741  if (g_cluster && arg_type.is_string() &&
742  (col_type.get_comp_param() <= 0 || arg_type.get_comp_param() <= 0)) {
743  // Skip this case for now, see comment for fill_dictionary_encoded_in_vals.
744  return nullptr;
745  }
746  std::atomic<size_t> total_in_vals_count{0};
747  for (size_t i = 0,
748  start_entry = 0,
749  stride = (entry_count + fetcher_count - 1) / fetcher_count;
750  i < fetcher_count && start_entry < entry_count;
751  ++i, start_entry += stride) {
752  expr_set[i].reserve(entry_count / fetcher_count);
753  const auto end_entry = std::min(start_entry + stride, entry_count);
754  if (arg_type.is_string()) {
755  CHECK_EQ(kENCODING_DICT, arg_type.get_compression());
756  // const int32_t dest_dict_id = arg_type.get_comp_param();
757  // const int32_t source_dict_id = col_type.get_comp_param();
758  const DictRef dest_dict_ref(arg_type.get_comp_param(), cat_.getDatabaseId());
759  const DictRef source_dict_ref(col_type.get_comp_param(), cat_.getDatabaseId());
760  const auto dd = executor_->getStringDictionaryProxy(
761  arg_type.get_comp_param(), val_set.getRowSetMemOwner(), true);
762  const auto sd = executor_->getStringDictionaryProxy(
763  col_type.get_comp_param(), val_set.getRowSetMemOwner(), true);
764  CHECK(sd);
765  const auto needle_null_val = inline_int_null_val(arg_type);
766  fetcher_threads.push_back(std::async(
767  std::launch::async,
768  [this,
769  &val_set,
770  &total_in_vals_count,
771  sd,
772  dd,
773  source_dict_ref,
774  dest_dict_ref,
775  needle_null_val](
776  std::vector<int64_t>& in_vals, const size_t start, const size_t end) {
777  if (g_cluster) {
778  CHECK_GE(dd->getGeneration(), 0);
780  total_in_vals_count,
781  &val_set,
782  {start, end},
784  source_dict_ref,
785  dest_dict_ref,
786  dd->getGeneration(),
787  needle_null_val);
788  } else {
790  total_in_vals_count,
791  &val_set,
792  {start, end},
793  sd,
794  dd,
795  needle_null_val);
796  }
797  },
798  std::ref(expr_set[i]),
799  start_entry,
800  end_entry));
801  } else {
802  CHECK(arg_type.is_integer());
803  fetcher_threads.push_back(std::async(
804  std::launch::async,
805  [&val_set, &total_in_vals_count](
806  std::vector<int64_t>& in_vals, const size_t start, const size_t end) {
807  fill_integer_in_vals(in_vals, total_in_vals_count, &val_set, {start, end});
808  },
809  std::ref(expr_set[i]),
810  start_entry,
811  end_entry));
812  }
813  }
814  for (auto& child : fetcher_threads) {
815  child.get();
816  }
817 
818  val_set.moveToBegin();
819  value_exprs.reserve(entry_count);
820  for (auto& exprs : expr_set) {
821  value_exprs.insert(value_exprs.end(), exprs.begin(), exprs.end());
822  }
823  return makeExpr<Analyzer::InIntegerSet>(
824  arg, value_exprs, arg_type.get_notnull() && col_type.get_notnull());
825 }
826 
827 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateOper(
828  const RexOperator* rex_operator) const {
829  CHECK_GT(rex_operator->size(), size_t(0));
830  if (rex_operator->size() == 1) {
831  return translateUoper(rex_operator);
832  }
833  const auto sql_op = rex_operator->getOperator();
834  if (sql_op == kIN) {
835  return translateInOper(rex_operator);
836  }
837  if (sql_op == kMINUS || sql_op == kPLUS) {
838  auto date_plus_minus = translateDatePlusMinus(rex_operator);
839  if (date_plus_minus) {
840  return date_plus_minus;
841  }
842  }
843  if (sql_op == kOVERLAPS) {
844  return translateOverlapsOper(rex_operator);
845  } else if (IS_COMPARISON(sql_op)) {
846  auto geo_comp = translateGeoComparison(rex_operator);
847  if (geo_comp) {
848  return geo_comp;
849  }
850  }
851  auto lhs = translateScalarRex(rex_operator->getOperand(0));
852  for (size_t i = 1; i < rex_operator->size(); ++i) {
853  std::shared_ptr<Analyzer::Expr> rhs;
854  SQLQualifier sql_qual{kONE};
855  const auto rhs_op = rex_operator->getOperand(i);
856  std::tie(rhs, sql_qual) = get_quantified_rhs(rhs_op, *this);
857  if (!rhs) {
858  rhs = translateScalarRex(rhs_op);
859  }
860  CHECK(rhs);
861  lhs = Parser::OperExpr::normalize(sql_op, sql_qual, lhs, rhs);
862  }
863  return lhs;
864 }
865 
866 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateOverlapsOper(
867  const RexOperator* rex_operator) const {
868  const auto sql_op = rex_operator->getOperator();
869  CHECK(sql_op == kOVERLAPS);
870 
871  const auto lhs = translateScalarRex(rex_operator->getOperand(0));
872  const auto lhs_ti = lhs->get_type_info();
873  if (lhs_ti.is_geometry()) {
874  return translateGeoOverlapsOper(rex_operator);
875  } else {
876  throw std::runtime_error(
877  "Overlaps equivalence is currently only supported for geospatial types");
878  }
879 }
880 
881 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateCase(
882  const RexCase* rex_case) const {
883  std::shared_ptr<Analyzer::Expr> else_expr;
884  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
885  expr_list;
886  for (size_t i = 0; i < rex_case->branchCount(); ++i) {
887  const auto when_expr = translateScalarRex(rex_case->getWhen(i));
888  const auto then_expr = translateScalarRex(rex_case->getThen(i));
889  expr_list.emplace_back(when_expr, then_expr);
890  }
891  if (rex_case->getElse()) {
892  else_expr = translateScalarRex(rex_case->getElse());
893  }
894  return Parser::CaseExpr::normalize(expr_list, else_expr);
895 }
896 
897 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateLike(
898  const RexFunctionOperator* rex_function) const {
899  CHECK(rex_function->size() == 2 || rex_function->size() == 3);
900  const auto arg = translateScalarRex(rex_function->getOperand(0));
901  const auto like = translateScalarRex(rex_function->getOperand(1));
902  if (!std::dynamic_pointer_cast<const Analyzer::Constant>(like)) {
903  throw std::runtime_error("The matching pattern must be a literal.");
904  }
905  const auto escape = (rex_function->size() == 3)
906  ? translateScalarRex(rex_function->getOperand(2))
907  : nullptr;
908  const bool is_ilike = rex_function->getName() == "PG_ILIKE"sv;
909  return Parser::LikeExpr::get(arg, like, escape, is_ilike, false);
910 }
911 
912 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateRegexp(
913  const RexFunctionOperator* rex_function) const {
914  CHECK(rex_function->size() == 2 || rex_function->size() == 3);
915  const auto arg = translateScalarRex(rex_function->getOperand(0));
916  const auto pattern = translateScalarRex(rex_function->getOperand(1));
917  if (!std::dynamic_pointer_cast<const Analyzer::Constant>(pattern)) {
918  throw std::runtime_error("The matching pattern must be a literal.");
919  }
920  const auto escape = (rex_function->size() == 3)
921  ? translateScalarRex(rex_function->getOperand(2))
922  : nullptr;
923  return Parser::RegexpExpr::get(arg, pattern, escape, false);
924 }
925 
926 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateLikely(
927  const RexFunctionOperator* rex_function) const {
928  CHECK(rex_function->size() == 1);
929  const auto arg = translateScalarRex(rex_function->getOperand(0));
930  return makeExpr<Analyzer::LikelihoodExpr>(arg, 0.9375);
931 }
932 
933 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateUnlikely(
934  const RexFunctionOperator* rex_function) const {
935  CHECK(rex_function->size() == 1);
936  const auto arg = translateScalarRex(rex_function->getOperand(0));
937  return makeExpr<Analyzer::LikelihoodExpr>(arg, 0.0625);
938 }
939 
940 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateExtract(
941  const RexFunctionOperator* rex_function) const {
942  CHECK_EQ(size_t(2), rex_function->size());
943  const auto timeunit = translateScalarRex(rex_function->getOperand(0));
944  const auto timeunit_lit = std::dynamic_pointer_cast<Analyzer::Constant>(timeunit);
945  if (!timeunit_lit) {
946  throw std::runtime_error("The time unit parameter must be a literal.");
947  }
948  const auto from_expr = translateScalarRex(rex_function->getOperand(1));
949  const bool is_date_trunc = rex_function->getName() == "PG_DATE_TRUNC"sv;
950  if (is_date_trunc) {
951  return DateTruncExpr::generate(from_expr, *timeunit_lit->get_constval().stringval);
952  } else {
953  return ExtractExpr::generate(from_expr, *timeunit_lit->get_constval().stringval);
954  }
955 }
956 
957 namespace {
958 
959 std::shared_ptr<Analyzer::Constant> makeNumericConstant(const SQLTypeInfo& ti,
960  const long val) {
961  CHECK(ti.is_number());
962  Datum datum{0};
963  switch (ti.get_type()) {
964  case kTINYINT: {
965  datum.tinyintval = val;
966  break;
967  }
968  case kSMALLINT: {
969  datum.smallintval = val;
970  break;
971  }
972  case kINT: {
973  datum.intval = val;
974  break;
975  }
976  case kBIGINT: {
977  datum.bigintval = val;
978  break;
979  }
980  case kDECIMAL:
981  case kNUMERIC: {
982  datum.bigintval = val * exp_to_scale(ti.get_scale());
983  break;
984  }
985  case kFLOAT: {
986  datum.floatval = val;
987  break;
988  }
989  case kDOUBLE: {
990  datum.doubleval = val;
991  break;
992  }
993  default:
994  CHECK(false);
995  }
996  return makeExpr<Analyzer::Constant>(ti, false, datum);
997 }
998 
999 } // namespace
1000 
1001 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateDateadd(
1002  const RexFunctionOperator* rex_function) const {
1003  CHECK_EQ(size_t(3), rex_function->size());
1004  const auto timeunit = translateScalarRex(rex_function->getOperand(0));
1005  const auto timeunit_lit = std::dynamic_pointer_cast<Analyzer::Constant>(timeunit);
1006  if (!timeunit_lit) {
1007  throw std::runtime_error("The time unit parameter must be a literal.");
1008  }
1009 
1010  const auto number_units = translateScalarRex(rex_function->getOperand(1));
1011  auto cast_number_units = number_units->add_cast(SQLTypeInfo(kBIGINT, false));
1012  const auto datetime = translateScalarRex(rex_function->getOperand(2));
1013  const auto& datetime_ti = datetime->get_type_info();
1014  if (datetime_ti.get_type() == kTIME) {
1015  throw std::runtime_error("DateAdd operation not supported for TIME.");
1016  }
1017  const auto& field = to_dateadd_field(*timeunit_lit->get_constval().stringval);
1018  if (!datetime_ti.is_high_precision_timestamp() &&
1020  // Scale the number to get value in seconds
1021  const auto bigint_ti = SQLTypeInfo(kBIGINT, false);
1022  cast_number_units = makeExpr<Analyzer::BinOper>(
1023  bigint_ti.get_type(),
1024  kDIVIDE,
1025  kONE,
1026  cast_number_units,
1027  makeNumericConstant(bigint_ti,
1029  cast_number_units = fold_expr(cast_number_units.get());
1030  }
1031  if (datetime_ti.is_high_precision_timestamp() &&
1034  field, datetime_ti.get_dimension());
1035  if (oper_scale.first) {
1036  // scale number to desired precision
1037  const auto bigint_ti = SQLTypeInfo(kBIGINT, false);
1038  cast_number_units =
1039  makeExpr<Analyzer::BinOper>(bigint_ti.get_type(),
1040  oper_scale.first,
1041  kONE,
1042  cast_number_units,
1043  makeNumericConstant(bigint_ti, oper_scale.second));
1044  cast_number_units = fold_expr(cast_number_units.get());
1045  }
1046  }
1047  return makeExpr<Analyzer::DateaddExpr>(
1048  SQLTypeInfo(kTIMESTAMP, datetime_ti.get_dimension(), 0, false),
1049  to_dateadd_field(*timeunit_lit->get_constval().stringval),
1050  cast_number_units,
1051  datetime);
1052 }
1053 
1054 namespace {
1055 
1057  CHECK(op == kPLUS);
1058  return "DATETIME_PLUS"s;
1059 }
1060 
1061 } // namespace
1062 
1063 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateDatePlusMinus(
1064  const RexOperator* rex_operator) const {
1065  if (rex_operator->size() != 2) {
1066  return nullptr;
1067  }
1068  const auto datetime = translateScalarRex(rex_operator->getOperand(0));
1069  const auto datetime_ti = datetime->get_type_info();
1070  if (!datetime_ti.is_timestamp() && !datetime_ti.is_date()) {
1071  if (datetime_ti.get_type() == kTIME) {
1072  throw std::runtime_error("DateTime addition/subtraction not supported for TIME.");
1073  }
1074  return nullptr;
1075  }
1076  const auto rhs = translateScalarRex(rex_operator->getOperand(1));
1077  const auto rhs_ti = rhs->get_type_info();
1078  if (rhs_ti.get_type() == kTIMESTAMP || rhs_ti.get_type() == kDATE) {
1079  if (datetime_ti.is_high_precision_timestamp() ||
1080  rhs_ti.is_high_precision_timestamp()) {
1081  throw std::runtime_error(
1082  "High Precision timestamps are not supported for TIMESTAMPDIFF operation. Use "
1083  "DATEDIFF.");
1084  }
1085  auto bigint_ti = SQLTypeInfo(kBIGINT, false);
1086  const auto& rex_operator_ti = rex_operator->getType();
1087  const auto datediff_field =
1088  (rex_operator_ti.get_type() == kINTERVAL_DAY_TIME) ? dtSECOND : dtMONTH;
1089  auto result =
1090  makeExpr<Analyzer::DatediffExpr>(bigint_ti, datediff_field, rhs, datetime);
1091  // multiply 1000 to result since expected result should be in millisecond precision.
1092  if (rex_operator_ti.get_type() == kINTERVAL_DAY_TIME) {
1093  return makeExpr<Analyzer::BinOper>(bigint_ti.get_type(),
1094  kMULTIPLY,
1095  kONE,
1096  result,
1097  makeNumericConstant(bigint_ti, 1000));
1098  } else {
1099  return result;
1100  }
1101  }
1102  const auto op = rex_operator->getOperator();
1103  if (op == kPLUS) {
1104  std::vector<std::shared_ptr<Analyzer::Expr>> args = {datetime, rhs};
1105  auto dt_plus = makeExpr<Analyzer::FunctionOper>(
1106  datetime_ti, get_datetimeplus_rewrite_funcname(op), args);
1107  const auto date_trunc = rewrite_to_date_trunc(dt_plus.get());
1108  if (date_trunc) {
1109  return date_trunc;
1110  }
1111  }
1112  const auto interval = fold_expr(rhs.get());
1113  auto interval_ti = interval->get_type_info();
1114  auto bigint_ti = SQLTypeInfo(kBIGINT, false);
1115  const auto interval_lit = std::dynamic_pointer_cast<Analyzer::Constant>(interval);
1116  if (interval_ti.get_type() == kINTERVAL_DAY_TIME) {
1117  std::shared_ptr<Analyzer::Expr> interval_sec;
1118  if (interval_lit) {
1119  interval_sec =
1120  makeNumericConstant(bigint_ti,
1121  (op == kMINUS ? -interval_lit->get_constval().bigintval
1122  : interval_lit->get_constval().bigintval) /
1123  1000);
1124  } else {
1125  interval_sec = makeExpr<Analyzer::BinOper>(bigint_ti.get_type(),
1126  kDIVIDE,
1127  kONE,
1128  interval,
1129  makeNumericConstant(bigint_ti, 1000));
1130  if (op == kMINUS) {
1131  interval_sec =
1132  std::make_shared<Analyzer::UOper>(bigint_ti, false, kUMINUS, interval_sec);
1133  }
1134  }
1135  return makeExpr<Analyzer::DateaddExpr>(datetime_ti, daSECOND, interval_sec, datetime);
1136  }
1137  CHECK(interval_ti.get_type() == kINTERVAL_YEAR_MONTH);
1138  const auto interval_months = op == kMINUS ? std::make_shared<Analyzer::UOper>(
1139  bigint_ti, false, kUMINUS, interval)
1140  : interval;
1141  return makeExpr<Analyzer::DateaddExpr>(datetime_ti, daMONTH, interval_months, datetime);
1142 }
1143 
1144 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateDatediff(
1145  const RexFunctionOperator* rex_function) const {
1146  CHECK_EQ(size_t(3), rex_function->size());
1147  const auto timeunit = translateScalarRex(rex_function->getOperand(0));
1148  const auto timeunit_lit = std::dynamic_pointer_cast<Analyzer::Constant>(timeunit);
1149  if (!timeunit_lit) {
1150  throw std::runtime_error("The time unit parameter must be a literal.");
1151  }
1152  const auto start = translateScalarRex(rex_function->getOperand(1));
1153  const auto end = translateScalarRex(rex_function->getOperand(2));
1154  const auto field = to_datediff_field(*timeunit_lit->get_constval().stringval);
1155  return makeExpr<Analyzer::DatediffExpr>(SQLTypeInfo(kBIGINT, false), field, start, end);
1156 }
1157 
1158 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateDatepart(
1159  const RexFunctionOperator* rex_function) const {
1160  CHECK_EQ(size_t(2), rex_function->size());
1161  const auto timeunit = translateScalarRex(rex_function->getOperand(0));
1162  const auto timeunit_lit = std::dynamic_pointer_cast<Analyzer::Constant>(timeunit);
1163  if (!timeunit_lit) {
1164  throw std::runtime_error("The time unit parameter must be a literal.");
1165  }
1166  const auto from_expr = translateScalarRex(rex_function->getOperand(1));
1167  return ExtractExpr::generate(
1168  from_expr, to_datepart_field(*timeunit_lit->get_constval().stringval));
1169 }
1170 
1171 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateLength(
1172  const RexFunctionOperator* rex_function) const {
1173  CHECK_EQ(size_t(1), rex_function->size());
1174  const auto str_arg = translateScalarRex(rex_function->getOperand(0));
1175  return makeExpr<Analyzer::CharLengthExpr>(str_arg->decompress(),
1176  rex_function->getName() == "CHAR_LENGTH"sv);
1177 }
1178 
1179 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateKeyForString(
1180  const RexFunctionOperator* rex_function) const {
1181  const auto& args = translateFunctionArgs(rex_function);
1182  CHECK_EQ(size_t(1), args.size());
1183  const auto expr = dynamic_cast<Analyzer::Expr*>(args[0].get());
1184  if (nullptr == expr || !expr->get_type_info().is_string() ||
1185  expr->get_type_info().is_varlen()) {
1186  throw std::runtime_error(rex_function->getName() +
1187  " expects a dictionary encoded text column.");
1188  }
1189  return makeExpr<Analyzer::KeyForStringExpr>(args[0]);
1190 }
1191 
1192 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateLower(
1193  const RexFunctionOperator* rex_function) const {
1194  const auto& args = translateFunctionArgs(rex_function);
1195  CHECK_EQ(size_t(1), args.size());
1196  CHECK(args[0]);
1197 
1198  if (args[0]->get_type_info().is_dict_encoded_string() ||
1199  dynamic_cast<Analyzer::Constant*>(args[0].get())) {
1200  return makeExpr<Analyzer::LowerExpr>(args[0]);
1201  }
1202 
1203  throw std::runtime_error(rex_function->getName() +
1204  " expects a dictionary encoded text column or a literal.");
1205 }
1206 
1208  const RexFunctionOperator* rex_function) const {
1209  const auto ret_ti = rex_function->getType();
1210  const auto arg = translateScalarRex(rex_function->getOperand(0));
1211  const auto arg_ti = arg->get_type_info();
1212  if (!arg_ti.is_array()) {
1213  throw std::runtime_error(rex_function->getName() + " expects an array expression.");
1214  }
1215  if (arg_ti.get_subtype() == kARRAY) {
1216  throw std::runtime_error(rex_function->getName() +
1217  " expects one-dimension array expression.");
1218  }
1219  const auto array_size = arg_ti.get_size();
1220  const auto array_elem_size = arg_ti.get_elem_type().get_array_context_logical_size();
1221 
1222  if (array_size > 0) {
1223  if (array_elem_size <= 0) {
1224  throw std::runtime_error(rex_function->getName() +
1225  ": unexpected array element type.");
1226  }
1227  // Return cardinality of a fixed length array
1228  return makeNumericConstant(ret_ti, array_size / array_elem_size);
1229  }
1230  // Variable length array cardinality will be calculated at runtime
1231  return makeExpr<Analyzer::CardinalityExpr>(arg);
1232 }
1233 
1234 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateItem(
1235  const RexFunctionOperator* rex_function) const {
1236  CHECK_EQ(size_t(2), rex_function->size());
1237  const auto base = translateScalarRex(rex_function->getOperand(0));
1238  const auto index = translateScalarRex(rex_function->getOperand(1));
1239  return makeExpr<Analyzer::BinOper>(
1240  base->get_type_info().get_elem_type(), false, kARRAY_AT, kONE, base, index);
1241 }
1242 
1243 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateNow() const {
1245 }
1246 
1247 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateDatetime(
1248  const RexFunctionOperator* rex_function) const {
1249  CHECK_EQ(size_t(1), rex_function->size());
1250  const auto arg = translateScalarRex(rex_function->getOperand(0));
1251  const auto arg_lit = std::dynamic_pointer_cast<Analyzer::Constant>(arg);
1252  const std::string datetime_err{R"(Only DATETIME('NOW') supported for now.)"};
1253  if (!arg_lit) {
1254  throw std::runtime_error(datetime_err);
1255  }
1256  CHECK(arg_lit->get_type_info().is_string());
1257  if (*arg_lit->get_constval().stringval != "NOW"sv) {
1258  throw std::runtime_error(datetime_err);
1259  }
1260  return translateNow();
1261 }
1262 
1263 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateAbs(
1264  const RexFunctionOperator* rex_function) const {
1265  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
1266  expr_list;
1267  CHECK_EQ(size_t(1), rex_function->size());
1268  const auto operand = translateScalarRex(rex_function->getOperand(0));
1269  const auto& operand_ti = operand->get_type_info();
1270  CHECK(operand_ti.is_number());
1271  const auto zero = makeNumericConstant(operand_ti, 0);
1272  const auto lt_zero = makeExpr<Analyzer::BinOper>(kBOOLEAN, kLT, kONE, operand, zero);
1273  const auto uminus_operand =
1274  makeExpr<Analyzer::UOper>(operand_ti.get_type(), kUMINUS, operand);
1275  expr_list.emplace_back(lt_zero, uminus_operand);
1276  return makeExpr<Analyzer::CaseExpr>(operand_ti, false, expr_list, operand);
1277 }
1278 
1279 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateSign(
1280  const RexFunctionOperator* rex_function) const {
1281  std::list<std::pair<std::shared_ptr<Analyzer::Expr>, std::shared_ptr<Analyzer::Expr>>>
1282  expr_list;
1283  CHECK_EQ(size_t(1), rex_function->size());
1284  const auto operand = translateScalarRex(rex_function->getOperand(0));
1285  const auto& operand_ti = operand->get_type_info();
1286  CHECK(operand_ti.is_number());
1287  const auto zero = makeNumericConstant(operand_ti, 0);
1288  const auto lt_zero = makeExpr<Analyzer::BinOper>(kBOOLEAN, kLT, kONE, operand, zero);
1289  expr_list.emplace_back(lt_zero, makeNumericConstant(operand_ti, -1));
1290  const auto eq_zero = makeExpr<Analyzer::BinOper>(kBOOLEAN, kEQ, kONE, operand, zero);
1291  expr_list.emplace_back(eq_zero, makeNumericConstant(operand_ti, 0));
1292  const auto gt_zero = makeExpr<Analyzer::BinOper>(kBOOLEAN, kGT, kONE, operand, zero);
1293  expr_list.emplace_back(gt_zero, makeNumericConstant(operand_ti, 1));
1294  return makeExpr<Analyzer::CaseExpr>(
1295  operand_ti,
1296  false,
1297  expr_list,
1298  makeExpr<Analyzer::Constant>(operand_ti, true, Datum{0}));
1299 }
1300 
1301 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateOffsetInFragment() const {
1302  return makeExpr<Analyzer::OffsetInFragment>();
1303 }
1304 
1306  const RexFunctionOperator* rex_function) const {
1307  if (rex_function->getType().get_subtype() == kNULLT) {
1308  auto sql_type = rex_function->getType();
1309  CHECK(sql_type.get_type() == kARRAY);
1310 
1311  // FIX-ME: Deal with NULL arrays
1312  auto translated_function_args(translateFunctionArgs(rex_function));
1313  if (translated_function_args.size() > 0) {
1314  auto const& first_element_logical_type(
1315  get_logical_type_info(translated_function_args[0]->get_type_info()));
1316 
1317  on_member_of_typeset<kCHAR, kVARCHAR, kTEXT>(
1318  first_element_logical_type,
1319  [&] {
1320  bool same_type_status = true;
1321  for (auto const& expr_ptr : translated_function_args) {
1322  same_type_status =
1323  same_type_status && (expr_ptr->get_type_info().is_string());
1324  }
1325 
1326  if (same_type_status == false) {
1327  throw std::runtime_error(
1328  "All elements of the array are not of the same logical subtype; "
1329  "consider casting to force this condition.");
1330  }
1331 
1332  sql_type.set_subtype(first_element_logical_type.get_type());
1333  sql_type.set_compression(kENCODING_FIXED);
1334  sql_type.set_comp_param(TRANSIENT_DICT_ID);
1335  },
1336  [&] {
1337  // Non string types
1338  bool same_type_status = true;
1339  for (auto const& expr_ptr : translated_function_args) {
1340  same_type_status =
1341  same_type_status && (first_element_logical_type ==
1342  get_logical_type_info(expr_ptr->get_type_info()));
1343  }
1344 
1345  if (same_type_status == false) {
1346  throw std::runtime_error(
1347  "All elements of the array are not of the same logical subtype; "
1348  "consider casting to force this condition.");
1349  }
1350  sql_type.set_subtype(first_element_logical_type.get_type());
1351  sql_type.set_scale(first_element_logical_type.get_scale());
1352  sql_type.set_precision(first_element_logical_type.get_precision());
1353  });
1354 
1355  feature_stash_.setCPUOnlyExecutionRequired();
1356  return makeExpr<Analyzer::ArrayExpr>(
1357  sql_type, translated_function_args, feature_stash_.getAndBumpArrayExprCount());
1358  } else {
1359  throw std::runtime_error("NULL ARRAY[] expressions not supported yet. FIX-ME.");
1360  }
1361  } else {
1362  feature_stash_.setCPUOnlyExecutionRequired();
1363  return makeExpr<Analyzer::ArrayExpr>(rex_function->getType(),
1364  translateFunctionArgs(rex_function),
1365  feature_stash_.getAndBumpArrayExprCount());
1366  }
1367 }
1368 
1369 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateFunction(
1370  const RexFunctionOperator* rex_function) const {
1371  if (func_resolve(rex_function->getName(), "LIKE"sv, "PG_ILIKE"sv)) {
1372  return translateLike(rex_function);
1373  }
1374  if (rex_function->getName() == "REGEXP_LIKE"sv) {
1375  return translateRegexp(rex_function);
1376  }
1377  if (rex_function->getName() == "LIKELY"sv) {
1378  return translateLikely(rex_function);
1379  }
1380  if (rex_function->getName() == "UNLIKELY"sv) {
1381  return translateUnlikely(rex_function);
1382  }
1383  if (func_resolve(rex_function->getName(), "PG_EXTRACT"sv, "PG_DATE_TRUNC"sv)) {
1384  return translateExtract(rex_function);
1385  }
1386  if (rex_function->getName() == "DATEADD"sv) {
1387  return translateDateadd(rex_function);
1388  }
1389  if (rex_function->getName() == "DATEDIFF"sv) {
1390  return translateDatediff(rex_function);
1391  }
1392  if (rex_function->getName() == "DATEPART"sv) {
1393  return translateDatepart(rex_function);
1394  }
1395  if (func_resolve(rex_function->getName(), "LENGTH"sv, "CHAR_LENGTH"sv)) {
1396  return translateLength(rex_function);
1397  }
1398  if (rex_function->getName() == "KEY_FOR_STRING"sv) {
1399  return translateKeyForString(rex_function);
1400  }
1401  if (g_enable_experimental_string_functions && rex_function->getName() == "LOWER"sv) {
1402  return translateLower(rex_function);
1403  }
1404  if (func_resolve(rex_function->getName(), "CARDINALITY"sv, "ARRAY_LENGTH"sv)) {
1405  return translateCardinality(rex_function);
1406  }
1407  if (rex_function->getName() == "ITEM"sv) {
1408  return translateItem(rex_function);
1409  }
1410  if (rex_function->getName() == "NOW"sv) {
1411  return translateNow();
1412  }
1413  if (rex_function->getName() == "DATETIME"sv) {
1414  return translateDatetime(rex_function);
1415  }
1416  if (func_resolve(rex_function->getName(), "usTIMESTAMP"sv, "nsTIMESTAMP"sv)) {
1417  return translateHPTLiteral(rex_function);
1418  }
1419  if (rex_function->getName() == "ABS"sv) {
1420  return translateAbs(rex_function);
1421  }
1422  if (rex_function->getName() == "SIGN"sv) {
1423  return translateSign(rex_function);
1424  }
1425  if (func_resolve(rex_function->getName(), "CEIL"sv, "FLOOR"sv)) {
1426  return makeExpr<Analyzer::FunctionOperWithCustomTypeHandling>(
1427  rex_function->getType(),
1428  rex_function->getName(),
1429  translateFunctionArgs(rex_function));
1430  } else if (rex_function->getName() == "ROUND"sv) {
1431  std::vector<std::shared_ptr<Analyzer::Expr>> args =
1432  translateFunctionArgs(rex_function);
1433 
1434  if (rex_function->size() == 1) {
1435  // push a 0 constant if 2nd operand is missing.
1436  // this needs to be done as calcite returns
1437  // only the 1st operand without defaulting the 2nd one
1438  // when the user did not specify the 2nd operand.
1439  SQLTypes t = kSMALLINT;
1440  Datum d;
1441  d.smallintval = 0;
1442  args.push_back(makeExpr<Analyzer::Constant>(t, false, d));
1443  }
1444 
1445  // make sure we have only 2 operands
1446  CHECK(args.size() == 2);
1447 
1448  if (!args[0]->get_type_info().is_number()) {
1449  throw std::runtime_error("Only numeric 1st operands are supported");
1450  }
1451 
1452  // the 2nd operand does not need to be a constant
1453  // it can happily reference another integer column
1454  if (!args[1]->get_type_info().is_integer()) {
1455  throw std::runtime_error("Only integer 2nd operands are supported");
1456  }
1457 
1458  // Calcite may upcast decimals in a way that is
1459  // incompatible with the extension function input. Play it safe and stick with the
1460  // argument type instead.
1461  const SQLTypeInfo ret_ti = args[0]->get_type_info().is_decimal()
1462  ? args[0]->get_type_info()
1463  : rex_function->getType();
1464 
1465  return makeExpr<Analyzer::FunctionOperWithCustomTypeHandling>(
1466  ret_ti, rex_function->getName(), args);
1467  }
1468  if (rex_function->getName() == "DATETIME_PLUS"sv) {
1469  auto dt_plus = makeExpr<Analyzer::FunctionOper>(rex_function->getType(),
1470  rex_function->getName(),
1471  translateFunctionArgs(rex_function));
1472  const auto date_trunc = rewrite_to_date_trunc(dt_plus.get());
1473  if (date_trunc) {
1474  return date_trunc;
1475  }
1476  return translateDateadd(rex_function);
1477  }
1478  if (rex_function->getName() == "/INT"sv) {
1479  CHECK_EQ(size_t(2), rex_function->size());
1480  std::shared_ptr<Analyzer::Expr> lhs = translateScalarRex(rex_function->getOperand(0));
1481  std::shared_ptr<Analyzer::Expr> rhs = translateScalarRex(rex_function->getOperand(1));
1482  const auto rhs_lit = std::dynamic_pointer_cast<Analyzer::Constant>(rhs);
1483  return Parser::OperExpr::normalize(kDIVIDE, kONE, lhs, rhs);
1484  }
1485  if (rex_function->getName() == "Reinterpret"sv) {
1486  CHECK_EQ(size_t(1), rex_function->size());
1487  return translateScalarRex(rex_function->getOperand(0));
1488  }
1489  if (func_resolve(rex_function->getName(),
1490  "ST_X"sv,
1491  "ST_Y"sv,
1492  "ST_XMin"sv,
1493  "ST_YMin"sv,
1494  "ST_XMax"sv,
1495  "ST_YMax"sv,
1496  "ST_NRings"sv,
1497  "ST_NPoints"sv,
1498  "ST_Length"sv,
1499  "ST_Perimeter"sv,
1500  "ST_Area"sv,
1501  "ST_SRID"sv,
1502  "MapD_GeoPolyBoundsPtr"sv /* deprecated */,
1503  "MapD_GeoPolyBoundsPtr"sv /* deprecated */,
1504  "OmniSci_Geo_PolyBoundsPtr"sv,
1505  "OmniSci_Geo_PolyRenderGroup"sv)) {
1506  CHECK_EQ(rex_function->size(), size_t(1));
1507  return translateUnaryGeoFunction(rex_function);
1508  }
1509  if (func_resolve(rex_function->getName(),
1510  "convert_meters_to_pixel_width"sv,
1511  "convert_meters_to_pixel_height"sv,
1512  "is_point_in_view"sv,
1513  "is_point_size_in_view"sv)) {
1514  return translateFunctionWithGeoArg(rex_function);
1515  }
1516  if (func_resolve(rex_function->getName(),
1517  "ST_Distance"sv,
1518  "ST_MaxDistance"sv,
1519  "ST_Intersects"sv,
1520  "ST_Disjoint"sv,
1521  "ST_Contains"sv,
1522  "ST_Within"sv)) {
1523  CHECK_EQ(rex_function->size(), size_t(2));
1524  return translateBinaryGeoFunction(rex_function);
1525  }
1526  if (func_resolve(rex_function->getName(), "ST_DWithin"sv, "ST_DFullyWithin"sv)) {
1527  CHECK_EQ(rex_function->size(), size_t(3));
1528  return translateTernaryGeoFunction(rex_function);
1529  }
1530  if (rex_function->getName() == "OFFSET_IN_FRAGMENT"sv) {
1531  CHECK_EQ(size_t(0), rex_function->size());
1532  return translateOffsetInFragment();
1533  }
1534  if (rex_function->getName() == "ARRAY"sv) {
1535  // Var args; currently no check. Possible fix-me -- can array have 0 elements?
1536  return translateArrayFunction(rex_function);
1537  }
1538  if (func_resolve(rex_function->getName(),
1539  "ST_GeomFromText"sv,
1540  "ST_GeogFromText"sv,
1541  "ST_Point"sv,
1542  "ST_SetSRID"sv)) {
1543  return translateGeoConstructor(rex_function);
1544  }
1545 
1546  auto arg_expr_list = translateFunctionArgs(rex_function);
1547  // Reset possibly wrong return type of rex_function to the return
1548  // type of the optimal valid implementation. The return type can be
1549  // wrong in the case of multiple implementations of UDF functions
1550  // that have different return types but Calcite specifies the return
1551  // type according to the first implementation.
1552  auto ext_func_sig = bind_function(rex_function->getName(), arg_expr_list);
1553  auto ret_ti = ext_arg_type_to_type_info(ext_func_sig.getRet());
1554  // By defualt, the extension function type will not allow nulls. If one of the arguments
1555  // is nullable, the extension function must also explicitly allow nulls.
1556  bool arguments_not_null = true;
1557  for (const auto& arg_expr : arg_expr_list) {
1558  if (!arg_expr->get_type_info().get_notnull()) {
1559  arguments_not_null = false;
1560  break;
1561  }
1562  }
1563  ret_ti.set_notnull(arguments_not_null);
1564  return makeExpr<Analyzer::FunctionOper>(ret_ti, rex_function->getName(), arg_expr_list);
1565 }
1566 
1567 namespace {
1568 
1569 std::vector<Analyzer::OrderEntry> translate_collation(
1570  const std::vector<SortField>& sort_fields) {
1571  std::vector<Analyzer::OrderEntry> collation;
1572  for (size_t i = 0; i < sort_fields.size(); ++i) {
1573  const auto& sort_field = sort_fields[i];
1574  collation.emplace_back(i,
1575  sort_field.getSortDir() == SortDirection::Descending,
1576  sort_field.getNullsPosition() == NullSortedPosition::First);
1577  }
1578  return collation;
1579 }
1580 
1582  const RexWindowFunctionOperator::RexWindowBound& window_bound) {
1583  return window_bound.unbounded && window_bound.preceding && !window_bound.following &&
1584  !window_bound.is_current_row && !window_bound.offset &&
1585  window_bound.order_key == 0;
1586 }
1587 
1588 bool supported_upper_bound(const RexWindowFunctionOperator* rex_window_function) {
1589  const auto& window_bound = rex_window_function->getUpperBound();
1590  const bool to_current_row = !window_bound.unbounded && !window_bound.preceding &&
1591  !window_bound.following && window_bound.is_current_row &&
1592  !window_bound.offset && window_bound.order_key == 1;
1593  switch (rex_window_function->getKind()) {
1598  return to_current_row;
1599  }
1600  default: {
1601  return rex_window_function->getOrderKeys().empty()
1602  ? (window_bound.unbounded && !window_bound.preceding &&
1603  window_bound.following && !window_bound.is_current_row &&
1604  !window_bound.offset && window_bound.order_key == 2)
1605  : to_current_row;
1606  }
1607  }
1608 }
1609 
1610 } // namespace
1611 
1612 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateWindowFunction(
1613  const RexWindowFunctionOperator* rex_window_function) const {
1614  if (!supported_lower_bound(rex_window_function->getLowerBound()) ||
1615  !supported_upper_bound(rex_window_function) ||
1616  ((rex_window_function->getKind() == SqlWindowFunctionKind::ROW_NUMBER) !=
1617  rex_window_function->isRows())) {
1618  throw std::runtime_error("Frame specification not supported");
1619  }
1620  std::vector<std::shared_ptr<Analyzer::Expr>> args;
1621  for (size_t i = 0; i < rex_window_function->size(); ++i) {
1622  args.push_back(translateScalarRex(rex_window_function->getOperand(i)));
1623  }
1624  std::vector<std::shared_ptr<Analyzer::Expr>> partition_keys;
1625  for (const auto& partition_key : rex_window_function->getPartitionKeys()) {
1626  partition_keys.push_back(translateScalarRex(partition_key.get()));
1627  }
1628  std::vector<std::shared_ptr<Analyzer::Expr>> order_keys;
1629  for (const auto& order_key : rex_window_function->getOrderKeys()) {
1630  order_keys.push_back(translateScalarRex(order_key.get()));
1631  }
1632  auto ti = rex_window_function->getType();
1633  if (window_function_is_value(rex_window_function->getKind())) {
1634  CHECK_GE(args.size(), 1u);
1635  ti = args.front()->get_type_info();
1636  }
1637  return makeExpr<Analyzer::WindowFunction>(
1638  ti,
1639  rex_window_function->getKind(),
1640  args,
1641  partition_keys,
1642  order_keys,
1643  translate_collation(rex_window_function->getCollation()));
1644 }
1645 
1647  const RexFunctionOperator* rex_function) const {
1648  std::vector<std::shared_ptr<Analyzer::Expr>> args;
1649  for (size_t i = 0; i < rex_function->size(); ++i) {
1650  args.push_back(translateScalarRex(rex_function->getOperand(i)));
1651  }
1652  return args;
1653 }
1654 
1656  const std::shared_ptr<Analyzer::Expr> qual_expr) {
1657  CHECK(qual_expr);
1658  auto bin_oper = std::dynamic_pointer_cast<const Analyzer::BinOper>(qual_expr);
1659  if (!bin_oper) {
1660  const auto rewritten_qual_expr = rewrite_expr(qual_expr.get());
1661  return {{}, {rewritten_qual_expr ? rewritten_qual_expr : qual_expr}};
1662  }
1663 
1664  if (bin_oper->get_optype() == kAND) {
1665  const auto lhs_cf = qual_to_conjunctive_form(bin_oper->get_own_left_operand());
1666  const auto rhs_cf = qual_to_conjunctive_form(bin_oper->get_own_right_operand());
1667  auto simple_quals = lhs_cf.simple_quals;
1668  simple_quals.insert(
1669  simple_quals.end(), rhs_cf.simple_quals.begin(), rhs_cf.simple_quals.end());
1670  auto quals = lhs_cf.quals;
1671  quals.insert(quals.end(), rhs_cf.quals.begin(), rhs_cf.quals.end());
1672  return {simple_quals, quals};
1673  }
1674  int rte_idx{0};
1675  const auto simple_qual = bin_oper->normalize_simple_predicate(rte_idx);
1676  return simple_qual ? QualsConjunctiveForm{{simple_qual}, {}}
1677  : QualsConjunctiveForm{{}, {qual_expr}};
1678 }
1679 
1680 std::vector<std::shared_ptr<Analyzer::Expr>> qual_to_disjunctive_form(
1681  const std::shared_ptr<Analyzer::Expr>& qual_expr) {
1682  CHECK(qual_expr);
1683  const auto bin_oper = std::dynamic_pointer_cast<const Analyzer::BinOper>(qual_expr);
1684  if (!bin_oper) {
1685  const auto rewritten_qual_expr = rewrite_expr(qual_expr.get());
1686  return {rewritten_qual_expr ? rewritten_qual_expr : qual_expr};
1687  }
1688  if (bin_oper->get_optype() == kOR) {
1689  const auto lhs_df = qual_to_disjunctive_form(bin_oper->get_own_left_operand());
1690  const auto rhs_df = qual_to_disjunctive_form(bin_oper->get_own_right_operand());
1691  auto quals = lhs_df;
1692  quals.insert(quals.end(), rhs_df.begin(), rhs_df.end());
1693  return quals;
1694  }
1695  return {qual_expr};
1696 }
1697 
1698 std::shared_ptr<Analyzer::Expr> RelAlgTranslator::translateHPTLiteral(
1699  const RexFunctionOperator* rex_function) const {
1700  /* since calcite uses Avatica package called DateTimeUtils to parse timestamp strings.
1701  Therefore any string having fractional seconds more 3 places after the decimal
1702  (milliseconds) will get truncated to 3 decimal places, therefore we lose precision
1703  (us|ns). Issue: [BE-2461] Here we are hijacking literal cast to Timestamp(6|9) from
1704  calcite and translating them to generate our own casts.
1705  */
1706  CHECK_EQ(size_t(1), rex_function->size());
1707  const auto operand = translateScalarRex(rex_function->getOperand(0));
1708  const auto& operand_ti = operand->get_type_info();
1709  const auto& target_ti = rex_function->getType();
1710  if (!operand_ti.is_string()) {
1711  throw std::runtime_error(
1712  "High precision timestamp cast argument must be a string. Input type is: " +
1713  operand_ti.get_type_name());
1714  } else if (!target_ti.is_high_precision_timestamp()) {
1715  throw std::runtime_error(
1716  "Cast target type should be high precision timestamp. Input type is: " +
1717  target_ti.get_type_name());
1718  } else if (target_ti.get_dimension() != 6 && target_ti.get_dimension() != 9) {
1719  throw std::runtime_error(
1720  "Cast target type should be TIMESTAMP(6|9). Input type is: TIMESTAMP(" +
1721  std::to_string(target_ti.get_dimension()) + ")");
1722  } else {
1723  return operand->add_cast(target_ti);
1724  }
1725 }
Definition: sqldefs.h:69
bool is_boolean() const
Definition: sqltypes.h:484
const RexScalar * getThen(const size_t idx) const
const std::vector< JoinType > join_types_
SQLAgg
Definition: sqldefs.h:71
#define CHECK_EQ(x, y)
Definition: Logger.h:198
auto func_resolve
std::shared_ptr< Analyzer::Expr > translateOffsetInFragment() const
SQLAgg getKind() const
Definition: sqltypes.h:52
std::shared_ptr< Analyzer::Expr > translateRegexp(const RexFunctionOperator *) const
bool supported_lower_bound(const RexWindowFunctionOperator::RexWindowBound &window_bound)
SQLTypes
Definition: sqltypes.h:41
bool g_cluster
size_t getOperand(size_t idx) const
const Executor * executor_
std::shared_ptr< Analyzer::Expr > translateUnlikely(const RexFunctionOperator *) const
const RexScalar * getElse() const
constexpr int64_t get_dateadd_timestamp_precision_scale(const DateaddField field)
Definition: DateTimeUtils.h:64
SQLQualifier
Definition: sqldefs.h:69
static std::shared_ptr< Analyzer::Expr > analyzeValue(const int64_t intval)
Definition: ParserNode.cpp:112
std::shared_ptr< Analyzer::Expr > translateFunction(const RexFunctionOperator *) const
#define LOG(tag)
Definition: Logger.h:185
std::shared_ptr< Analyzer::Expr > translateScalarRex(const RexScalar *rex) const
const SQLTypeInfo & getType() const
bool boolval
Definition: sqltypes.h:125
size_t size() const
const RexScalar * getOperand(const size_t idx) const
std::shared_ptr< Analyzer::Expr > translateUoper(const RexOperator *) const
const std::vector< SortField > & getCollation() const
SQLOps
Definition: sqldefs.h:29
ExtensionFunction bind_function(std::string name, Analyzer::ExpressionPtrVector func_args, const std::vector< ExtensionFunction > &ext_funcs)
HOST DEVICE int get_scale() const
Definition: sqltypes.h:331
std::shared_ptr< Analyzer::Expr > translateDateadd(const RexFunctionOperator *) const
Definition: sqldefs.h:38
std::shared_ptr< Analyzer::Expr > translateNow() const
#define CHECK_GE(x, y)
Definition: Logger.h:203
SQLTypeInfo get_logical_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:869
std::shared_ptr< Analyzer::Expr > translateAbs(const RexFunctionOperator *) const
Definition: sqldefs.h:49
Definition: sqldefs.h:30
const RexScalar * getWhen(const size_t idx) const
std::shared_ptr< Analyzer::Expr > ExpressionPtr
Definition: Analyzer.h:180
std::string getString(int32_t string_id) const
std::shared_ptr< Analyzer::Expr > getInIntegerSetExpr(std::shared_ptr< Analyzer::Expr > arg, const ResultSet &val_set) const
Definition: sqldefs.h:41
SQLTypeInfo get_agg_type(const SQLAgg agg_kind, const Analyzer::Expr *arg_expr)
std::shared_ptr< Analyzer::Expr > translateItem(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Constant > makeNumericConstant(const SQLTypeInfo &ti, const long val)
Analyzer::ExpressionPtr rewrite_expr(const Analyzer::Expr *expr)
QualsConjunctiveForm qual_to_conjunctive_form(const std::shared_ptr< Analyzer::Expr > qual_expr)
void set_scale(int s)
Definition: sqltypes.h:421
#define CHECK_GT(x, y)
Definition: Logger.h:202
bool is_agg_supported_for_type(const SQLAgg &agg_kind, const SQLTypeInfo &arg_ti)
double inline_fp_null_val(const SQL_TYPE_INFO &ti)
std::shared_ptr< Analyzer::Expr > translateOper(const RexOperator *) const
std::string to_string(char const *&&v)
void set_notnull(bool n)
Definition: sqltypes.h:423
std::shared_ptr< Analyzer::Expr > translateDatediff(const RexFunctionOperator *) const
QueryFeatureDescriptor & feature_stash_
std::shared_ptr< Analyzer::Expr > translateInput(const RexInput *) const
bool g_enable_watchdog
bool is_number() const
Definition: sqltypes.h:482
std::shared_ptr< Analyzer::Expr > translateSign(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Expr > translateUnaryGeoFunction(const RexFunctionOperator *) const
static std::shared_ptr< Analyzer::Expr > normalize(const std::list< std::pair< std::shared_ptr< Analyzer::Expr >, std::shared_ptr< Analyzer::Expr >>> &, const std::shared_ptr< Analyzer::Expr >)
Definition: ParserNode.cpp:905
std::shared_ptr< Analyzer::Expr > translateGeoOverlapsOper(const RexOperator *) const
Definition: sqldefs.h:71
std::shared_ptr< Analyzer::Expr > translateLower(const RexFunctionOperator *) const
ExtractField to_datepart_field(const std::string &field)
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:31
void set_precision(int d)
Definition: sqltypes.h:419
unsigned getIndex() const
static std::shared_ptr< Analyzer::Expr > translateLiteral(const RexLiteral *)
SQLOps getOperator() const
bool window_function_is_value(const SqlWindowFunctionKind kind)
Definition: WindowContext.h:27
static constexpr int32_t INVALID_STR_ID
CHECK(cgen_state)
std::shared_ptr< Analyzer::Expr > translateDatetime(const RexFunctionOperator *) const
bool is_time() const
Definition: sqltypes.h:483
const std::pair< SQLOps, int64_t > get_dateadd_high_precision_adjusted_scale(const DateaddField field, int32_t dimen)
void fill_dictionary_encoded_in_vals(std::vector< int64_t > &in_vals, std::atomic< size_t > &total_in_vals_count, const ResultSet *values_rowset, const std::pair< int64_t, int64_t > values_rowset_slice, const StringDictionaryProxy *source_dict, const StringDictionaryProxy *dest_dict, const int64_t needle_null_val)
#define CHECK_NE(x, y)
Definition: Logger.h:199
const std::shared_ptr< Analyzer::Expr > generate() const
static std::shared_ptr< Analyzer::Expr > get(std::shared_ptr< Analyzer::Expr > arg_expr, std::shared_ptr< Analyzer::Expr > pattern_expr, std::shared_ptr< Analyzer::Expr > escape_expr, const bool is_not)
Definition: ParserNode.cpp:625
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:326
int64_t bigintval
Definition: sqltypes.h:129
std::vector< Analyzer::OrderEntry > translate_collation(const std::vector< SortField > &sort_fields)
size_t branchCount() const
Definition: sqldefs.h:37
Definition: sqldefs.h:71
Definition: sqldefs.h:69
int getDatabaseId() const
Definition: Catalog.h:192
int16_t smallintval
Definition: sqltypes.h:127
SQLTypeInfo build_type_info(const SQLTypes sql_type, const int scale, const int precision)
static std::shared_ptr< Analyzer::Expr > analyzeValue(const std::string &)
Definition: ParserNode.cpp:97
DatetruncField to_datediff_field(const std::string &field)
void translate_string_ids(std::vector< int32_t > &dest_ids, const LeafHostInfo &dict_server_host, const DictRef dest_dict_ref, const std::vector< int32_t > &source_ids, const DictRef source_dict_ref, const int32_t dest_generation)
std::shared_ptr< Analyzer::Expr > translateGeoConstructor(const RexFunctionOperator *) const
const std::vector< LeafHostInfo > & getStringDictionaryHosts() const
Definition: Catalog.cpp:1386
SQLTypeInfoCore< ArrayContextTypeSizer, ExecutorTypePackaging, DateTimeFacilities > SQLTypeInfo
Definition: sqltypes.h:852
const ColumnDescriptor * getMetadataForColumnBySpi(const int tableId, const size_t spi) const
Definition: Catalog.cpp:1437
const std::unordered_map< const RelAlgNode *, int > input_to_nest_level_
#define UNLIKELY(x)
Definition: likely.h:20
static std::shared_ptr< Analyzer::Expr > normalize(const SQLOps optype, const SQLQualifier qual, std::shared_ptr< Analyzer::Expr > left_expr, std::shared_ptr< Analyzer::Expr > right_expr)
Definition: ParserNode.cpp:261
std::pair< std::shared_ptr< Analyzer::Expr >, SQLQualifier > get_quantified_rhs(const RexScalar *rex_scalar, const RelAlgTranslator &translator)
Definition: sqldefs.h:34
#define CHECK_LT(x, y)
Definition: Logger.h:200
Definition: sqltypes.h:55
Definition: sqltypes.h:56
Analyzer::ExpressionPtrVector translateFunctionArgs(const RexFunctionOperator *) const
Definition: sqldefs.h:40
Definition: sqldefs.h:69
const ConstRexScalarPtrVector & getPartitionKeys() const
#define TRANSIENT_DICT_ID
Definition: sqltypes.h:189
const RexWindowBound & getLowerBound() const
#define CHECK_LE(x, y)
Definition: Logger.h:201
std::shared_ptr< Analyzer::Expr > translateOverlapsOper(const RexOperator *) const
bool is_null(const T &v, const SQLTypeInfo &t)
std::shared_ptr< Analyzer::Expr > translateArrayFunction(const RexFunctionOperator *) const
static std::shared_ptr< Analyzer::Expr > get(std::shared_ptr< Analyzer::Expr > arg_expr, std::shared_ptr< Analyzer::Expr > like_expr, std::shared_ptr< Analyzer::Expr > escape_expr, const bool is_ilike, const bool is_not)
Definition: ParserNode.cpp:530
bool g_enable_experimental_string_functions
SqlWindowFunctionKind getKind() const
std::shared_ptr< Analyzer::Expr > translateLike(const RexFunctionOperator *) const
const RelAlgNode * getSourceNode() const
Definition: sqltypes.h:44
bool supported_upper_bound(const RexWindowFunctionOperator *rex_window_function)
bool takes_arg(const TargetInfo &target_info)
static std::shared_ptr< Analyzer::Expr > analyzeValue(const int64_t numericval, const int scale, const int precision)
Definition: ParserNode.cpp:137
std::shared_ptr< Analyzer::Expr > translateLikely(const RexFunctionOperator *) const
const RexWindowBound & getUpperBound() const
Definition: sqldefs.h:53
constexpr bool is_subsecond_dateadd_field(const DateaddField field)
Definition: DateTimeUtils.h:96
std::shared_ptr< Analyzer::Expr > get_in_values_expr(std::shared_ptr< Analyzer::Expr > arg, const ResultSet &val_set)
static std::shared_ptr< Analyzer::Expr > get(const int64_t)
Definition: ParserNode.cpp:173
bool can_use_parallel_algorithms(const ResultSet &rows)
Definition: ResultSet.cpp:869
bool isDistinct() const
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:327
std::shared_ptr< Analyzer::Expr > translateTernaryGeoFunction(const RexFunctionOperator *) const
const ConstRexScalarPtrVector & getOrderKeys() const
std::vector< std::shared_ptr< Analyzer::Expr > > qual_to_disjunctive_form(const std::shared_ptr< Analyzer::Expr > &qual_expr)
std::shared_ptr< Analyzer::Expr > translateBinaryGeoFunction(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Constant > make_fp_constant(const int64_t val, const SQLTypeInfo &ti)
std::pair< Datum, bool > datum_from_scalar_tv(const ScalarTargetValue *scalar_tv, const SQLTypeInfo &ti) noexcept
std::shared_ptr< Analyzer::Expr > translateInOper(const RexOperator *) const
uint64_t exp_to_scale(const unsigned exp)
size_t size() const
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
std::vector< ExpressionPtr > ExpressionPtrVector
Definition: Analyzer.h:182
Definition: sqldefs.h:33
std::shared_ptr< Analyzer::Expr > translateCase(const RexCase *) const
std::shared_ptr< Analyzer::Expr > translateFunctionWithGeoArg(const RexFunctionOperator *) const
Definition: sqltypes.h:48
const std::string & getName() const
std::string get_datetimeplus_rewrite_funcname(const SQLOps &op)
bool is_decimal() const
Definition: sqltypes.h:480
std::shared_ptr< Analyzer::Expr > translateCardinality(const RexFunctionOperator *) const
Definition: sqldefs.h:71
int cpu_threads()
Definition: thread_count.h:25
const bool just_explain_
const std::vector< TargetMetaInfo > & getOutputMetainfo() const
std::shared_ptr< Analyzer::Expr > translateGeoComparison(const RexOperator *) const
std::shared_ptr< Analyzer::Expr > translateDatePlusMinus(const RexOperator *) const
const std::list< std::shared_ptr< Analyzer::Expr > > simple_quals
std::shared_ptr< Analyzer::Expr > translateHPTLiteral(const RexFunctionOperator *) const
Definition: sqldefs.h:71
bool is_distinct(const size_t input_idx, const RelAlgNode *node)
int32_t getIdOfString(const std::string &str) const
std::shared_ptr< Analyzer::Expr > translateDatepart(const RexFunctionOperator *) const
Definition: sqldefs.h:39
std::shared_ptr< Analyzer::Expr > rewrite_to_date_trunc(const Analyzer::FunctionOper *dt_plus)
SQLTypeInfo ext_arg_type_to_type_info(const ExtArgumentType ext_arg_type)
DateaddField to_dateadd_field(const std::string &field)
std::shared_ptr< Analyzer::Expr > fold_expr(const Analyzer::Expr *expr)
#define IS_COMPARISON(X)
Definition: sqldefs.h:57
double doubleval
Definition: sqltypes.h:131
void fill_integer_in_vals(std::vector< int64_t > &in_vals, std::atomic< size_t > &total_in_vals_count, const ResultSet *values_rowset, const std::pair< int64_t, int64_t > values_rowset_slice)
const Catalog_Namespace::Catalog & cat_
std::shared_ptr< Analyzer::Expr > translateKeyForString(const RexFunctionOperator *) const
static std::shared_ptr< Analyzer::Expr > translateAggregateRex(const RexAgg *rex, const std::vector< std::shared_ptr< Analyzer::Expr >> &scalar_sources)
std::shared_ptr< Analyzer::Expr > translateWindowFunction(const RexWindowFunctionOperator *) const
const std::shared_ptr< Analyzer::Expr > generate() const
std::shared_ptr< Analyzer::Expr > translateScalarSubquery(const RexSubQuery *) const
boost::variant< int64_t, double, float, NullableString > ScalarTargetValue
Definition: TargetValue.h:156
std::shared_ptr< Analyzer::Expr > translateLength(const RexFunctionOperator *) const
std::shared_ptr< Analyzer::Expr > translateExtract(const RexFunctionOperator *) const