OmniSciDB  91042dcc5b
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ExpressionRange.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2021 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "ExpressionRange.h"
18 #include "DateTimeTranslator.h"
19 #include "DateTimeUtils.h"
20 #include "DateTruncate.h"
22 #include "Execute.h"
23 #include "ExtractFromTime.h"
24 #include "GroupByAndAggregate.h"
26 
27 #include <algorithm>
28 #include <cfenv>
29 #include <cmath>
30 
31 #define DEF_OPERATOR(fname, op) \
32  ExpressionRange fname(const ExpressionRange& other) const { \
33  if (type_ == ExpressionRangeType::Invalid || \
34  other.type_ == ExpressionRangeType::Invalid) { \
35  return ExpressionRange::makeInvalidRange(); \
36  } \
37  CHECK(type_ == other.type_); \
38  switch (type_) { \
39  case ExpressionRangeType::Integer: \
40  return binOp<int64_t>(other, [](const int64_t x, const int64_t y) { \
41  return int64_t(checked_int64_t(x) op y); \
42  }); \
43  case ExpressionRangeType::Float: \
44  return binOp<float>(other, [](const float x, const float y) { \
45  std::feclearexcept(FE_OVERFLOW); \
46  std::feclearexcept(FE_UNDERFLOW); \
47  auto result = x op y; \
48  if (std::fetestexcept(FE_OVERFLOW) || std::fetestexcept(FE_UNDERFLOW)) { \
49  throw std::runtime_error("overflow / underflow"); \
50  } \
51  return result; \
52  }); \
53  case ExpressionRangeType::Double: \
54  return binOp<double>(other, [](const double x, const double y) { \
55  std::feclearexcept(FE_OVERFLOW); \
56  std::feclearexcept(FE_UNDERFLOW); \
57  auto result = x op y; \
58  if (std::fetestexcept(FE_OVERFLOW) || std::fetestexcept(FE_UNDERFLOW)) { \
59  throw std::runtime_error("overflow / underflow"); \
60  } \
61  return result; \
62  }); \
63  default: \
64  CHECK(false); \
65  } \
66  CHECK(false); \
67  return ExpressionRange::makeInvalidRange(); \
68  }
69 
70 DEF_OPERATOR(ExpressionRange::operator+, +)
71 DEF_OPERATOR(ExpressionRange::operator-, -)
72 DEF_OPERATOR(ExpressionRange::operator*, *)
73 
74 void apply_fp_qual(const Datum const_datum,
75  const SQLTypes const_type,
76  const SQLOps sql_op,
77  ExpressionRange& qual_range) {
78  double const_val = get_value_from_datum<double>(const_datum, const_type);
79  switch (sql_op) {
80  case kGT:
81  case kGE:
82  qual_range.setFpMin(std::max(qual_range.getFpMin(), const_val));
83  break;
84  case kLT:
85  case kLE:
86  qual_range.setFpMax(std::min(qual_range.getFpMax(), const_val));
87  break;
88  case kEQ:
89  qual_range.setFpMin(std::max(qual_range.getFpMin(), const_val));
90  qual_range.setFpMax(std::min(qual_range.getFpMax(), const_val));
91  break;
92  default: // there may be other operators, but don't do anything with them
93  break;
94  }
95 }
96 
97 void apply_int_qual(const Datum const_datum,
98  const SQLTypes const_type,
99  const SQLOps sql_op,
100  ExpressionRange& qual_range) {
101  int64_t const_val = get_value_from_datum<int64_t>(const_datum, const_type);
102  switch (sql_op) {
103  case kGT:
104  qual_range.setIntMin(std::max(qual_range.getIntMin(), const_val + 1));
105  break;
106  case kGE:
107  qual_range.setIntMin(std::max(qual_range.getIntMin(), const_val));
108  break;
109  case kLT:
110  qual_range.setIntMax(std::min(qual_range.getIntMax(), const_val - 1));
111  break;
112  case kLE:
113  qual_range.setIntMax(std::min(qual_range.getIntMax(), const_val));
114  break;
115  case kEQ:
116  qual_range.setIntMin(std::max(qual_range.getIntMin(), const_val));
117  qual_range.setIntMax(std::min(qual_range.getIntMax(), const_val));
118  break;
119  default: // there may be other operators, but don't do anything with them
120  break;
121  }
122 }
123 
124 void apply_hpt_qual(const Datum const_datum,
125  const SQLTypes const_type,
126  const int32_t const_dimen,
127  const int32_t col_dimen,
128  const SQLOps sql_op,
129  ExpressionRange& qual_range) {
130  CHECK(const_dimen != col_dimen);
131  Datum datum{0};
132  if (const_dimen > col_dimen) {
133  datum.bigintval =
134  get_value_from_datum<int64_t>(const_datum, const_type) /
135  DateTimeUtils::get_timestamp_precision_scale(const_dimen - col_dimen);
136  } else {
137  datum.bigintval =
138  get_value_from_datum<int64_t>(const_datum, const_type) *
139  DateTimeUtils::get_timestamp_precision_scale(col_dimen - const_dimen);
140  }
141  apply_int_qual(datum, const_type, sql_op, qual_range);
142 }
143 
145  const Analyzer::ColumnVar* col_expr,
146  const ExpressionRange& col_range,
147  const boost::optional<std::list<std::shared_ptr<Analyzer::Expr>>> simple_quals) {
148  if (!simple_quals) {
149  return col_range;
150  }
151  ExpressionRange qual_range(col_range);
152  for (auto const& itr : simple_quals.get()) {
153  auto qual_bin_oper = dynamic_cast<Analyzer::BinOper*>(itr.get());
154  if (!qual_bin_oper) {
155  continue;
156  }
157  const Analyzer::Expr* left_operand = qual_bin_oper->get_left_operand();
158  auto qual_col = dynamic_cast<const Analyzer::ColumnVar*>(left_operand);
159  if (!qual_col) {
160  // Check for possibility that column is wrapped in a cast
161  // Presumes that only simple casts (i.e. timestamp to timestamp or int to int) have
162  // been passed through by BinOper::normalize_simple_predicate
163  auto u_expr = dynamic_cast<const Analyzer::UOper*>(left_operand);
164  if (!u_expr) {
165  continue;
166  }
167  qual_col = dynamic_cast<const Analyzer::ColumnVar*>(u_expr->get_operand());
168  if (!qual_col) {
169  continue;
170  }
171  }
172  if (qual_col->get_table_id() != col_expr->get_table_id() ||
173  qual_col->get_column_id() != col_expr->get_column_id()) {
174  continue;
175  }
176  const Analyzer::Expr* right_operand = qual_bin_oper->get_right_operand();
177  auto qual_const = dynamic_cast<const Analyzer::Constant*>(right_operand);
178  if (!qual_const) {
179  continue;
180  }
181  if (qual_range.getType() == ExpressionRangeType::Float ||
182  qual_range.getType() == ExpressionRangeType::Double) {
183  apply_fp_qual(qual_const->get_constval(),
184  qual_const->get_type_info().get_type(),
185  qual_bin_oper->get_optype(),
186  qual_range);
187  } else if ((qual_col->get_type_info().is_timestamp() ||
188  qual_const->get_type_info().is_timestamp()) &&
189  (qual_col->get_type_info().get_dimension() !=
190  qual_const->get_type_info().get_dimension())) {
191  apply_hpt_qual(qual_const->get_constval(),
192  qual_const->get_type_info().get_type(),
193  qual_const->get_type_info().get_dimension(),
194  qual_col->get_type_info().get_dimension(),
195  qual_bin_oper->get_optype(),
196  qual_range);
197  } else {
198  apply_int_qual(qual_const->get_constval(),
199  qual_const->get_type_info().get_type(),
200  qual_bin_oper->get_optype(),
201  qual_range);
202  }
203  }
204  return qual_range;
205 }
206 
211  }
212  if (other.int_min_ * other.int_max_ <= 0) {
213  // if the other interval contains 0, the rule is more complicated;
214  // punt for now, we can revisit by splitting the other interval and
215  // taking the convex hull of the resulting two intervals
217  }
218  auto div_range = binOp<int64_t>(other, [](const int64_t x, const int64_t y) {
219  return int64_t(checked_int64_t(x) / y);
220  });
221  if (g_null_div_by_zero) {
222  div_range.setHasNulls();
223  }
224  return div_range;
225 }
226 
228  if (type_ != other.type_) {
230  }
232  switch (type_) {
237  result.has_nulls_ = has_nulls_ || other.has_nulls_;
238  result.int_min_ = std::min(int_min_, other.int_min_);
239  result.int_max_ = std::max(int_max_, other.int_max_);
240  result.bucket_ = std::min(bucket_, other.bucket_);
241  break;
242  }
245  result.type_ = type_;
246  result.has_nulls_ = has_nulls_ || other.has_nulls_;
247  result.fp_min_ = std::min(fp_min_, other.fp_min_);
248  result.fp_max_ = std::max(fp_max_, other.fp_max_);
249  break;
250  }
251  default:
252  CHECK(false);
253  }
254  return result;
255 }
256 
258  if (type_ != other.type_) {
259  return false;
260  }
261  switch (type_) {
263  return true;
265  return has_nulls_ == other.has_nulls_ && int_min_ == other.int_min_ &&
266  int_max_ == other.int_max_;
267  }
270  return has_nulls_ == other.has_nulls_ && fp_min_ == other.fp_min_ &&
271  fp_max_ == other.fp_max_;
272  }
273  default:
274  CHECK(false);
275  }
276  return false;
277 }
278 
280  if (ti.is_array()) {
281  return typeSupportsRange(ti.get_elem_type());
282  } else {
283  return (ti.is_number() || ti.is_boolean() || ti.is_time() ||
284  (ti.is_string() && ti.get_compression() == kENCODING_DICT));
285  }
286 }
287 
289  const Analyzer::BinOper* expr,
290  const std::vector<InputTableInfo>& query_infos,
291  const Executor*,
292  boost::optional<std::list<std::shared_ptr<Analyzer::Expr>>> simple_quals);
293 
295 
297  const Analyzer::ColumnVar* col_expr,
298  const std::vector<InputTableInfo>& query_infos,
299  const Executor* executor,
300  boost::optional<std::list<std::shared_ptr<Analyzer::Expr>>> simple_quals);
301 
303 
305  const std::vector<InputTableInfo>& query_infos,
306  const Executor*);
307 
309  const Analyzer::UOper* u_expr,
310  const std::vector<InputTableInfo>& query_infos,
311  const Executor*,
312  boost::optional<std::list<std::shared_ptr<Analyzer::Expr>>> simple_quals);
313 
315  const Analyzer::ExtractExpr* extract_expr,
316  const std::vector<InputTableInfo>& query_infos,
317  const Executor*,
318  boost::optional<std::list<std::shared_ptr<Analyzer::Expr>>> simple_quals);
319 
321  const Analyzer::DatetruncExpr* datetrunc_expr,
322  const std::vector<InputTableInfo>& query_infos,
323  const Executor* executor,
324  boost::optional<std::list<std::shared_ptr<Analyzer::Expr>>> simple_quals);
325 
328  const std::vector<InputTableInfo>& query_infos,
329  const Executor* executor,
330  boost::optional<std::list<std::shared_ptr<Analyzer::Expr>>> simple_quals);
331 
333  const Analyzer::Expr* expr,
334  const std::vector<InputTableInfo>& query_infos,
335  const Executor* executor,
336  boost::optional<std::list<std::shared_ptr<Analyzer::Expr>>> simple_quals) {
337  const auto& expr_ti = expr->get_type_info();
338  if (!ExpressionRange::typeSupportsRange(expr_ti)) {
340  }
341  auto bin_oper_expr = dynamic_cast<const Analyzer::BinOper*>(expr);
342  if (bin_oper_expr) {
343  return getExpressionRange(bin_oper_expr, query_infos, executor, simple_quals);
344  }
345  auto constant_expr = dynamic_cast<const Analyzer::Constant*>(expr);
346  if (constant_expr) {
347  return getExpressionRange(constant_expr);
348  }
349  auto column_var_expr = dynamic_cast<const Analyzer::ColumnVar*>(expr);
350  if (column_var_expr) {
351  return getExpressionRange(column_var_expr, query_infos, executor, simple_quals);
352  }
353  auto like_expr = dynamic_cast<const Analyzer::LikeExpr*>(expr);
354  if (like_expr) {
355  return getExpressionRange(like_expr);
356  }
357  auto case_expr = dynamic_cast<const Analyzer::CaseExpr*>(expr);
358  if (case_expr) {
359  return getExpressionRange(case_expr, query_infos, executor);
360  }
361  auto u_expr = dynamic_cast<const Analyzer::UOper*>(expr);
362  if (u_expr) {
363  return getExpressionRange(u_expr, query_infos, executor, simple_quals);
364  }
365  auto extract_expr = dynamic_cast<const Analyzer::ExtractExpr*>(expr);
366  if (extract_expr) {
367  return getExpressionRange(extract_expr, query_infos, executor, simple_quals);
368  }
369  auto datetrunc_expr = dynamic_cast<const Analyzer::DatetruncExpr*>(expr);
370  if (datetrunc_expr) {
371  return getExpressionRange(datetrunc_expr, query_infos, executor, simple_quals);
372  }
373  auto width_bucket_expr = dynamic_cast<const Analyzer::WidthBucketExpr*>(expr);
374  if (width_bucket_expr) {
375  return getExpressionRange(width_bucket_expr, query_infos, executor, simple_quals);
376  }
378 }
379 
380 namespace {
381 
382 int64_t scale_up_interval_endpoint(const int64_t endpoint, const SQLTypeInfo& ti) {
383  return endpoint * static_cast<int64_t>(exp_to_scale(ti.get_scale()));
384 }
385 
386 } // namespace
387 
389  const Analyzer::BinOper* expr,
390  const std::vector<InputTableInfo>& query_infos,
391  const Executor* executor,
392  boost::optional<std::list<std::shared_ptr<Analyzer::Expr>>> simple_quals) {
393  const auto& lhs =
394  getExpressionRange(expr->get_left_operand(), query_infos, executor, simple_quals);
395  const auto& rhs =
396  getExpressionRange(expr->get_right_operand(), query_infos, executor, simple_quals);
397  switch (expr->get_optype()) {
398  case kPLUS:
399  return lhs + rhs;
400  case kMINUS:
401  return lhs - rhs;
402  case kMULTIPLY:
403  return lhs * rhs;
404  case kDIVIDE: {
405  const auto& lhs_type = expr->get_left_operand()->get_type_info();
406  if (lhs_type.is_decimal() && lhs.getType() != ExpressionRangeType::Invalid) {
407  CHECK(lhs.getType() == ExpressionRangeType::Integer);
408  const auto adjusted_lhs = ExpressionRange::makeIntRange(
409  scale_up_interval_endpoint(lhs.getIntMin(), lhs_type),
410  scale_up_interval_endpoint(lhs.getIntMax(), lhs_type),
411  0,
412  lhs.hasNulls());
413  return adjusted_lhs / rhs;
414  }
415  return lhs / rhs;
416  }
417  default:
418  break;
419  }
421 }
422 
424  if (constant_expr->get_is_null()) {
426  }
427  const auto constant_type = constant_expr->get_type_info().get_type();
428  const auto datum = constant_expr->get_constval();
429  switch (constant_type) {
430  case kTINYINT: {
431  const int64_t v = datum.tinyintval;
432  return ExpressionRange::makeIntRange(v, v, 0, false);
433  }
434  case kSMALLINT: {
435  const int64_t v = datum.smallintval;
436  return ExpressionRange::makeIntRange(v, v, 0, false);
437  }
438  case kINT: {
439  const int64_t v = datum.intval;
440  return ExpressionRange::makeIntRange(v, v, 0, false);
441  }
442  case kBIGINT:
443  case kNUMERIC:
444  case kDECIMAL: {
445  const int64_t v = datum.bigintval;
446  return ExpressionRange::makeIntRange(v, v, 0, false);
447  }
448  case kTIME:
449  case kTIMESTAMP:
450  case kDATE: {
451  const int64_t v = datum.bigintval;
452  return ExpressionRange::makeIntRange(v, v, 0, false);
453  }
454  case kFLOAT: {
455  return ExpressionRange::makeFloatRange(datum.floatval, datum.floatval, false);
456  }
457  case kDOUBLE: {
458  return ExpressionRange::makeDoubleRange(datum.doubleval, datum.doubleval, false);
459  }
460  default:
461  break;
462  }
464 }
465 
466 #define FIND_STAT_FRAG(stat_name) \
467  const auto stat_name##_frag_index = std::stat_name##_element( \
468  nonempty_fragment_indices.begin(), \
469  nonempty_fragment_indices.end(), \
470  [&fragments, &has_nulls, col_id, col_ti](const size_t lhs_idx, \
471  const size_t rhs_idx) { \
472  const auto& lhs = fragments[lhs_idx]; \
473  const auto& rhs = fragments[rhs_idx]; \
474  auto lhs_meta_it = lhs.getChunkMetadataMap().find(col_id); \
475  if (lhs_meta_it == lhs.getChunkMetadataMap().end()) { \
476  return false; \
477  } \
478  auto rhs_meta_it = rhs.getChunkMetadataMap().find(col_id); \
479  CHECK(rhs_meta_it != rhs.getChunkMetadataMap().end()); \
480  if (lhs_meta_it->second->chunkStats.has_nulls || \
481  rhs_meta_it->second->chunkStats.has_nulls) { \
482  has_nulls = true; \
483  } \
484  if (col_ti.is_fp()) { \
485  return extract_##stat_name##_stat_fp_type(lhs_meta_it->second->chunkStats, \
486  col_ti) < \
487  extract_##stat_name##_stat_fp_type(rhs_meta_it->second->chunkStats, \
488  col_ti); \
489  } \
490  return extract_##stat_name##_stat_int_type(lhs_meta_it->second->chunkStats, \
491  col_ti) < \
492  extract_##stat_name##_stat_int_type(rhs_meta_it->second->chunkStats, \
493  col_ti); \
494  }); \
495  if (stat_name##_frag_index == nonempty_fragment_indices.end()) { \
496  return ExpressionRange::makeInvalidRange(); \
497  }
498 
499 namespace {
500 
501 int64_t get_conservative_datetrunc_bucket(const DatetruncField datetrunc_field) {
502  const int64_t day_seconds{24 * 3600};
503  const int64_t year_days{365};
504  switch (datetrunc_field) {
505  case dtYEAR:
506  return year_days * day_seconds;
507  case dtQUARTER:
508  return 90 * day_seconds; // 90 is least number of days in any quater
509  case dtMONTH:
510  return 28 * day_seconds;
511  case dtDAY:
512  return day_seconds;
513  case dtHOUR:
514  return 3600;
515  case dtMINUTE:
516  return 60;
517  case dtMILLENNIUM:
518  return 1000 * year_days * day_seconds;
519  case dtCENTURY:
520  return 100 * year_days * day_seconds;
521  case dtDECADE:
522  return 10 * year_days * day_seconds;
523  case dtWEEK:
524  case dtWEEK_SUNDAY:
525  case dtWEEK_SATURDAY:
526  return 7 * day_seconds;
527  case dtQUARTERDAY:
528  return 4 * 60 * 50;
529  default:
530  return 0;
531  }
532 }
533 
534 } // namespace
535 
537  const std::vector<InputTableInfo>& query_infos,
538  const Executor* executor,
539  const bool is_outer_join_proj) {
540  bool has_nulls = is_outer_join_proj;
541  int col_id = col_expr->get_column_id();
542  const auto& col_phys_ti = col_expr->get_type_info().is_array()
543  ? col_expr->get_type_info().get_elem_type()
544  : col_expr->get_type_info();
545  const auto col_ti = get_logical_type_info(col_phys_ti);
546  switch (col_ti.get_type()) {
547  case kTEXT:
548  case kCHAR:
549  case kVARCHAR:
550  CHECK_EQ(kENCODING_DICT, col_ti.get_compression());
551  case kBOOLEAN:
552  case kTINYINT:
553  case kSMALLINT:
554  case kINT:
555  case kBIGINT:
556  case kDECIMAL:
557  case kNUMERIC:
558  case kDATE:
559  case kTIMESTAMP:
560  case kTIME:
561  case kFLOAT:
562  case kDOUBLE: {
563  std::optional<size_t> ti_idx;
564  for (size_t i = 0; i < query_infos.size(); ++i) {
565  if (col_expr->get_table_id() == query_infos[i].table_id) {
566  ti_idx = i;
567  break;
568  }
569  }
570  CHECK(ti_idx);
571  const auto& query_info = query_infos[*ti_idx].info;
572  const auto& fragments = query_info.fragments;
573  const auto cd = executor->getColumnDescriptor(col_expr);
574  if (cd && cd->isVirtualCol) {
575  CHECK(cd->columnName == "rowid");
576  CHECK_EQ(kBIGINT, col_ti.get_type());
577  const int64_t num_tuples = query_info.getNumTuples();
579  0, std::max(num_tuples - 1, int64_t(0)), 0, has_nulls);
580  }
581  if (query_info.getNumTuples() == 0) {
582  // The column doesn't contain any values, synthesize an empty range.
583  if (col_ti.is_fp()) {
584  return col_ti.get_type() == kFLOAT
585  ? ExpressionRange::makeFloatRange(0, -1, false)
586  : ExpressionRange::makeDoubleRange(0, -1, false);
587  }
588  return ExpressionRange::makeIntRange(0, -1, 0, false);
589  }
590  std::vector<size_t> nonempty_fragment_indices;
591  for (size_t i = 0; i < fragments.size(); ++i) {
592  const auto& fragment = fragments[i];
593  if (!fragment.isEmptyPhysicalFragment()) {
594  nonempty_fragment_indices.push_back(i);
595  }
596  }
597  FIND_STAT_FRAG(min);
598  FIND_STAT_FRAG(max);
599  const auto& min_frag = fragments[*min_frag_index];
600  const auto min_it = min_frag.getChunkMetadataMap().find(col_id);
601  if (min_it == min_frag.getChunkMetadataMap().end()) {
603  }
604  const auto& max_frag = fragments[*max_frag_index];
605  const auto max_it = max_frag.getChunkMetadataMap().find(col_id);
606  CHECK(max_it != max_frag.getChunkMetadataMap().end());
607  for (const auto& fragment : fragments) {
608  const auto it = fragment.getChunkMetadataMap().find(col_id);
609  if (it != fragment.getChunkMetadataMap().end()) {
610  if (it->second->chunkStats.has_nulls) {
611  has_nulls = true;
612  break;
613  }
614  }
615  }
616  if (col_ti.is_fp()) {
617  const auto min_val = extract_min_stat_fp_type(min_it->second->chunkStats, col_ti);
618  const auto max_val = extract_max_stat_fp_type(max_it->second->chunkStats, col_ti);
619  return col_ti.get_type() == kFLOAT
620  ? ExpressionRange::makeFloatRange(min_val, max_val, has_nulls)
621  : ExpressionRange::makeDoubleRange(min_val, max_val, has_nulls);
622  }
623  const auto min_val = extract_min_stat_int_type(min_it->second->chunkStats, col_ti);
624  const auto max_val = extract_max_stat_int_type(max_it->second->chunkStats, col_ti);
625  if (max_val < min_val) {
626  // The column doesn't contain any non-null values, synthesize an empty range.
627  CHECK_GT(min_val, 0);
628  return ExpressionRange::makeIntRange(0, -1, 0, has_nulls);
629  }
630  const int64_t bucket =
631  col_ti.get_type() == kDATE ? get_conservative_datetrunc_bucket(dtDAY) : 0;
632  return ExpressionRange::makeIntRange(min_val, max_val, bucket, has_nulls);
633  }
634  default:
635  break;
636  }
638 }
639 
640 #undef FIND_STAT_FRAG
641 
643  const Analyzer::ColumnVar* col_expr,
644  const std::vector<InputTableInfo>& query_infos,
645  const Executor* executor,
646  boost::optional<std::list<std::shared_ptr<Analyzer::Expr>>> simple_quals) {
647  const int rte_idx = col_expr->get_rte_idx();
648  CHECK_GE(rte_idx, 0);
649  CHECK_LT(static_cast<size_t>(rte_idx), query_infos.size());
650  bool is_outer_join_proj = rte_idx > 0 && executor->containsLeftDeepOuterJoin();
651  if (col_expr->get_table_id() > 0) {
652  auto col_range = executor->getColRange(
653  PhysicalInput{col_expr->get_column_id(), col_expr->get_table_id()});
654  if (is_outer_join_proj) {
655  col_range.setHasNulls();
656  }
657  return apply_simple_quals(col_expr, col_range, simple_quals);
658  }
659  return getLeafColumnRange(col_expr, query_infos, executor, is_outer_join_proj);
660 }
661 
663  const auto& ti = like_expr->get_type_info();
664  CHECK(ti.is_boolean());
665  const auto& arg_ti = like_expr->get_arg()->get_type_info();
667  arg_ti.get_notnull() ? 0 : inline_int_null_val(ti), 1, 0, false);
668 }
669 
671  const std::vector<InputTableInfo>& query_infos,
672  const Executor* executor) {
673  const auto& expr_pair_list = case_expr->get_expr_pair_list();
674  auto expr_range = ExpressionRange::makeInvalidRange();
675  bool has_nulls = false;
676  for (const auto& expr_pair : expr_pair_list) {
677  CHECK_EQ(expr_pair.first->get_type_info().get_type(), kBOOLEAN);
678  const auto crt_range =
679  getExpressionRange(expr_pair.second.get(), query_infos, executor);
680  if (crt_range.getType() == ExpressionRangeType::Null) {
681  has_nulls = true;
682  continue;
683  }
684  if (crt_range.getType() == ExpressionRangeType::Invalid) {
686  }
687  expr_range = (expr_range.getType() != ExpressionRangeType::Invalid)
688  ? expr_range || crt_range
689  : crt_range;
690  }
691  if (has_nulls && !(expr_range.getType() == ExpressionRangeType::Invalid)) {
692  expr_range.setHasNulls();
693  }
694  const auto else_expr = case_expr->get_else_expr();
695  CHECK(else_expr);
696  const auto else_null_expr = dynamic_cast<const Analyzer::Constant*>(else_expr);
697  if (else_null_expr && else_null_expr->get_is_null()) {
698  expr_range.setHasNulls();
699  return expr_range;
700  }
701  return expr_range || getExpressionRange(else_expr, query_infos, executor);
702 }
703 
704 namespace {
705 
707  const int64_t scale,
708  const SQLTypeInfo& target_ti) {
709  CHECK(target_ti.is_fp());
710  if (target_ti.get_type() == kFLOAT) {
712  static_cast<float>(arg_range.getIntMin()) / scale,
713  static_cast<float>(arg_range.getIntMax()) / scale,
714  arg_range.hasNulls());
715  }
717  static_cast<double>(arg_range.getIntMin()) / scale,
718  static_cast<double>(arg_range.getIntMax()) / scale,
719  arg_range.hasNulls());
720 }
721 
723  const SQLTypeInfo& oper_ti,
724  const SQLTypeInfo& target_ti) {
725  if (oper_ti.is_timestamp() && target_ti.is_date()) {
726  const auto field = dtDAY;
727  const int64_t scale =
730  : 1;
731  const int64_t min_ts = oper_ti.is_high_precision_timestamp()
732  ? DateTruncate(field, arg_range.getIntMin() / scale)
733  : DateTruncate(field, arg_range.getIntMin());
734  const int64_t max_ts = oper_ti.is_high_precision_timestamp()
735  ? DateTruncate(field, arg_range.getIntMax() / scale)
736  : DateTruncate(field, arg_range.getIntMax());
737  const int64_t bucket = get_conservative_datetrunc_bucket(field);
738 
739  return ExpressionRange::makeIntRange(min_ts, max_ts, bucket, arg_range.hasNulls());
740  }
741 
742  const int32_t ti_dimen = target_ti.get_dimension();
743  const int32_t oper_dimen = oper_ti.get_dimension();
744  CHECK(oper_dimen != ti_dimen);
745  const int64_t min_ts =
746  ti_dimen > oper_dimen
748  arg_range.getIntMin(),
749  abs(oper_dimen - ti_dimen))
752  arg_range.getIntMin(),
753  abs(oper_dimen - ti_dimen));
754  const int64_t max_ts =
755  ti_dimen > oper_dimen
757  arg_range.getIntMax(),
758  abs(oper_dimen - ti_dimen))
761  arg_range.getIntMax(),
762  abs(oper_dimen - ti_dimen));
763 
764  return ExpressionRange::makeIntRange(min_ts, max_ts, 0, arg_range.hasNulls());
765 }
766 
767 } // namespace
768 
770  const Analyzer::UOper* u_expr,
771  const std::vector<InputTableInfo>& query_infos,
772  const Executor* executor,
773  boost::optional<std::list<std::shared_ptr<Analyzer::Expr>>> simple_quals) {
774  if (u_expr->get_optype() == kUNNEST) {
775  return getExpressionRange(u_expr->get_operand(), query_infos, executor, simple_quals);
776  }
777  if (u_expr->get_optype() != kCAST) {
779  }
780  const auto& ti = u_expr->get_type_info();
781  if (ti.is_string() && ti.get_compression() == kENCODING_DICT) {
782  const auto sdp = executor->getStringDictionaryProxy(
783  ti.get_comp_param(), executor->getRowSetMemoryOwner(), true);
784  CHECK(sdp);
785  const auto const_operand =
786  dynamic_cast<const Analyzer::Constant*>(u_expr->get_operand());
787  if (!const_operand) {
788  // casted subquery result. return invalid for now, but we could attempt to pull the
789  // range from the subquery result in the future
790  CHECK(u_expr->get_operand());
791  VLOG(1) << "Unable to determine expression range for dictionary encoded expression "
792  << u_expr->get_operand()->toString() << ", proceeding with invalid range.";
794  }
795 
796  if (const_operand->get_is_null()) {
798  }
799  CHECK(const_operand->get_constval().stringval);
800  const int64_t v = sdp->getIdOfString(*const_operand->get_constval().stringval);
801  return ExpressionRange::makeIntRange(v, v, 0, false);
802  }
803  const auto arg_range =
804  getExpressionRange(u_expr->get_operand(), query_infos, executor, simple_quals);
805  const auto& arg_ti = u_expr->get_operand()->get_type_info();
806  // Timestamp to Date OR Date/Timestamp casts with different precision
807  if ((ti.is_timestamp() && (arg_ti.get_dimension() != ti.get_dimension())) ||
808  ((arg_ti.is_timestamp() && ti.is_date()))) {
809  return getDateTimePrecisionCastRange(arg_range, arg_ti, ti);
810  }
811  switch (arg_range.getType()) {
814  if (ti.is_fp()) {
815  return ti.get_type() == kDOUBLE
817  arg_range.getFpMin(), arg_range.getFpMax(), arg_range.hasNulls())
818  : ExpressionRange::makeFloatRange(arg_range.getFpMin(),
819  arg_range.getFpMax(),
820  arg_range.hasNulls());
821  }
822  if (ti.is_integer()) {
823  return ExpressionRange::makeIntRange(std::floor(arg_range.getFpMin()),
824  std::ceil(arg_range.getFpMax()),
825  0,
826  arg_range.hasNulls());
827  }
828  break;
829  }
831  if (ti.is_decimal()) {
832  CHECK_EQ(int64_t(0), arg_range.getBucket());
833  const int64_t scale = exp_to_scale(ti.get_scale() - arg_ti.get_scale());
834  return ExpressionRange::makeIntRange(arg_range.getIntMin() * scale,
835  arg_range.getIntMax() * scale,
836  0,
837  arg_range.hasNulls());
838  }
839  if (arg_ti.is_decimal()) {
840  CHECK_EQ(int64_t(0), arg_range.getBucket());
841  const int64_t scale = exp_to_scale(arg_ti.get_scale());
842  const int64_t scale_half = scale / 2;
843  if (ti.is_fp()) {
844  return fpRangeFromDecimal(arg_range, scale, ti);
845  }
846  return ExpressionRange::makeIntRange((arg_range.getIntMin() - scale_half) / scale,
847  (arg_range.getIntMax() + scale_half) / scale,
848  0,
849  arg_range.hasNulls());
850  }
851  if (ti.is_integer() || ti.is_time()) {
852  return arg_range;
853  }
854  if (ti.get_type() == kFLOAT) {
856  arg_range.getIntMin(), arg_range.getIntMax(), arg_range.hasNulls());
857  }
858  if (ti.get_type() == kDOUBLE) {
860  arg_range.getIntMin(), arg_range.getIntMax(), arg_range.hasNulls());
861  }
862  break;
863  }
865  break;
866  default:
867  CHECK(false);
868  }
870 }
871 
873  const Analyzer::ExtractExpr* extract_expr,
874  const std::vector<InputTableInfo>& query_infos,
875  const Executor* executor,
876  boost::optional<std::list<std::shared_ptr<Analyzer::Expr>>> simple_quals) {
877  const int32_t extract_field{extract_expr->get_field()};
878  const auto arg_range = getExpressionRange(
879  extract_expr->get_from_expr(), query_infos, executor, simple_quals);
880  const bool has_nulls =
881  arg_range.getType() == ExpressionRangeType::Invalid || arg_range.hasNulls();
882  const auto& extract_expr_ti = extract_expr->get_from_expr()->get_type_info();
883  switch (extract_field) {
884  case kYEAR: {
885  if (arg_range.getType() == ExpressionRangeType::Invalid) {
887  }
888  CHECK(arg_range.getType() == ExpressionRangeType::Integer);
889  const int64_t year_range_min =
890  extract_expr_ti.is_high_precision_timestamp()
891  ? ExtractFromTime(
892  kYEAR,
893  arg_range.getIntMin() /
894  get_timestamp_precision_scale(extract_expr_ti.get_dimension()))
895  : ExtractFromTime(kYEAR, arg_range.getIntMin());
896  const int64_t year_range_max =
897  extract_expr_ti.is_high_precision_timestamp()
898  ? ExtractFromTime(
899  kYEAR,
900  arg_range.getIntMax() /
901  get_timestamp_precision_scale(extract_expr_ti.get_dimension()))
902  : ExtractFromTime(kYEAR, arg_range.getIntMax());
904  year_range_min, year_range_max, 0, arg_range.hasNulls());
905  }
906  case kEPOCH:
907  case kDATEEPOCH:
908  return arg_range;
909  case kQUARTERDAY:
910  case kQUARTER:
911  return ExpressionRange::makeIntRange(1, 4, 0, has_nulls);
912  case kMONTH:
913  return ExpressionRange::makeIntRange(1, 12, 0, has_nulls);
914  case kDAY:
915  return ExpressionRange::makeIntRange(1, 31, 0, has_nulls);
916  case kHOUR:
917  return ExpressionRange::makeIntRange(0, 23, 0, has_nulls);
918  case kMINUTE:
919  return ExpressionRange::makeIntRange(0, 59, 0, has_nulls);
920  case kSECOND:
921  return ExpressionRange::makeIntRange(0, 60, 0, has_nulls);
922  case kMILLISECOND:
923  return ExpressionRange::makeIntRange(0, 999, 0, has_nulls);
924  case kMICROSECOND:
925  return ExpressionRange::makeIntRange(0, 999999, 0, has_nulls);
926  case kNANOSECOND:
927  return ExpressionRange::makeIntRange(0, 999999999, 0, has_nulls);
928  case kDOW:
929  return ExpressionRange::makeIntRange(0, 6, 0, has_nulls);
930  case kISODOW:
931  return ExpressionRange::makeIntRange(1, 7, 0, has_nulls);
932  case kDOY:
933  return ExpressionRange::makeIntRange(1, 366, 0, has_nulls);
934  case kWEEK:
935  case kWEEK_SUNDAY:
936  case kWEEK_SATURDAY:
937  return ExpressionRange::makeIntRange(1, 53, 0, has_nulls);
938  default:
939  CHECK(false);
940  }
942 }
943 
945  const Analyzer::DatetruncExpr* datetrunc_expr,
946  const std::vector<InputTableInfo>& query_infos,
947  const Executor* executor,
948  boost::optional<std::list<std::shared_ptr<Analyzer::Expr>>> simple_quals) {
949  const auto arg_range = getExpressionRange(
950  datetrunc_expr->get_from_expr(), query_infos, executor, simple_quals);
951  if (arg_range.getType() == ExpressionRangeType::Invalid) {
953  }
954  const auto& datetrunc_expr_ti = datetrunc_expr->get_from_expr()->get_type_info();
955  const int64_t min_ts = DateTimeTranslator::getDateTruncConstantValue(
956  arg_range.getIntMin(), datetrunc_expr->get_field(), datetrunc_expr_ti);
957  const int64_t max_ts = DateTimeTranslator::getDateTruncConstantValue(
958  arg_range.getIntMax(), datetrunc_expr->get_field(), datetrunc_expr_ti);
959  const int64_t bucket =
960  datetrunc_expr_ti.is_high_precision_timestamp()
961  ? get_conservative_datetrunc_bucket(datetrunc_expr->get_field()) *
963  datetrunc_expr_ti.get_dimension())
964  : get_conservative_datetrunc_bucket(datetrunc_expr->get_field());
965 
966  return ExpressionRange::makeIntRange(min_ts, max_ts, bucket, arg_range.hasNulls());
967 }
968 
971  const std::vector<InputTableInfo>& query_infos,
972  const Executor* executor,
973  boost::optional<std::list<std::shared_ptr<Analyzer::Expr>>> simple_quals) {
974  auto target_value_expr = width_bucket_expr->get_target_value();
975  auto target_value_range = getExpressionRange(target_value_expr, query_infos, executor);
976  auto target_ti = target_value_expr->get_type_info();
977  if (width_bucket_expr->is_constant_expr() &&
978  target_value_range.getType() != ExpressionRangeType::Invalid) {
979  auto const_target_value = dynamic_cast<const Analyzer::Constant*>(target_value_expr);
980  if (const_target_value) {
981  if (const_target_value->get_is_null()) {
982  // null constant, return default width_bucket range
984  0, width_bucket_expr->get_partition_count_val(), 0, true);
985  } else {
986  CHECK(target_value_range.getFpMax() == target_value_range.getFpMin());
987  auto target_value_bucket =
988  width_bucket_expr->compute_bucket(target_value_range.getFpMax(), target_ti);
990  target_value_bucket, target_value_bucket, 0, target_value_range.hasNulls());
991  }
992  }
993  // compute possible bucket range based on lower and upper bound constants
994  // to elucidate a target bucket range
995  const auto target_value_range_with_qual =
996  getExpressionRange(target_value_expr, query_infos, executor, simple_quals);
997  auto compute_bucket_range = [&width_bucket_expr](const ExpressionRange& target_range,
998  SQLTypeInfo ti) {
999  // we casted bucket bound exprs to double
1000  auto lower_bound_bucket =
1001  width_bucket_expr->compute_bucket<double>(target_range.getFpMin(), ti);
1002  auto upper_bound_bucket =
1003  width_bucket_expr->compute_bucket<double>(target_range.getFpMax(), ti);
1005  lower_bound_bucket, upper_bound_bucket, 0, target_range.hasNulls());
1006  };
1007  auto res_range = compute_bucket_range(target_value_range_with_qual, target_ti);
1008  // check target_value expression's col range to be not nullable iff it has its filter
1009  // expression i.e., in simple_quals
1010  // todo (yoonmin) : need to search simple_quals to cover more cases?
1011  if (target_value_range.getFpMin() < target_value_range_with_qual.getFpMin() ||
1012  target_value_range.getFpMax() > target_value_range_with_qual.getFpMax()) {
1013  res_range.setNulls(false);
1014  }
1015  return res_range;
1016  } else {
1017  // we cannot determine a possibility of skipping oob check safely
1018  const bool has_nulls = target_value_range.getType() == ExpressionRangeType::Invalid ||
1019  target_value_range.hasNulls();
1020  auto partition_expr_range = getExpressionRange(
1021  width_bucket_expr->get_partition_count(), query_infos, executor, simple_quals);
1022  auto res = ExpressionRange::makeIntRange(0, INT32_MAX, 0, has_nulls);
1023  switch (partition_expr_range.getType()) {
1025  res.setIntMax(partition_expr_range.getIntMax() + 1);
1026  break;
1027  }
1030  res.setIntMax(static_cast<int64_t>(partition_expr_range.getFpMax()) + 1);
1031  break;
1032  }
1033  default:
1034  break;
1035  }
1036  return res;
1037  }
1038 }
int get_table_id() const
Definition: Analyzer.h:193
int64_t getIntMin() const
#define CHECK_EQ(x, y)
Definition: Logger.h:219
#define FIND_STAT_FRAG(stat_name)
const Expr * get_from_expr() const
Definition: Analyzer.h:1352
const Expr * get_partition_count() const
Definition: Analyzer.h:1091
bool is_constant_expr() const
Definition: Analyzer.h:1166
const Expr * get_else_expr() const
Definition: Analyzer.h:1307
static ExpressionRange makeNullRange()
Definition: sqltypes.h:49
int64_t DateTruncate(DatetruncField field, const int64_t timeval)
SQLTypes
Definition: sqltypes.h:38
bool is_timestamp() const
Definition: sqltypes.h:891
bool operator==(const ExpressionRange &other) const
boost::multiprecision::number< boost::multiprecision::cpp_int_backend< 64, 64, boost::multiprecision::signed_magnitude, boost::multiprecision::checked, void >> checked_int64_t
void apply_hpt_qual(const Datum const_datum, const SQLTypes const_type, const int32_t const_dimen, const int32_t col_dimen, const SQLOps sql_op, ExpressionRange &qual_range)
void apply_int_qual(const Datum const_datum, const SQLTypes const_type, const SQLOps sql_op, ExpressionRange &qual_range)
double extract_max_stat_fp_type(const ChunkStats &stats, const SQLTypeInfo &ti)
#define const
ExtractField get_field() const
Definition: Analyzer.h:1351
bool is_fp() const
Definition: sqltypes.h:523
HOST DEVICE int get_scale() const
Definition: sqltypes.h:334
const Expr * get_right_operand() const
Definition: Analyzer.h:442
SQLOps
Definition: sqldefs.h:29
Definition: sqldefs.h:35
Definition: sqldefs.h:36
#define DEF_OPERATOR(fname, op)
bool get_is_null() const
Definition: Analyzer.h:333
#define CHECK_GE(x, y)
Definition: Logger.h:224
#define INT32_MAX
SQLTypeInfo get_logical_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:1064
Definition: sqldefs.h:49
Definition: sqldefs.h:30
DatetruncField get_field() const
Definition: Analyzer.h:1476
Definition: sqldefs.h:41
int64_t scale_up_interval_endpoint(const int64_t endpoint, const SQLTypeInfo &ti)
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:329
void setIntMin(const int64_t int_min)
bool is_number() const
Definition: sqltypes.h:524
#define CHECK_GT(x, y)
Definition: Logger.h:223
constexpr int64_t get_datetime_scaled_epoch(const ScalingType direction, const int64_t epoch, const int32_t dimen)
const Expr * get_arg() const
Definition: Analyzer.h:951
int64_t extract_max_stat_int_type(const ChunkStats &stats, const SQLTypeInfo &ti)
bool is_time() const
Definition: sqltypes.h:525
static ExpressionRange makeFloatRange(const float fp_min, const float fp_max, const bool has_nulls)
ExpressionRange apply_simple_quals(const Analyzer::ColumnVar *col_expr, const ExpressionRange &col_range, const boost::optional< std::list< std::shared_ptr< Analyzer::Expr >>> simple_quals)
bool g_null_div_by_zero
Definition: Execute.cpp:85
SQLOps get_optype() const
Definition: Analyzer.h:438
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:31
ExpressionRangeType type_
ExpressionRange operator||(const ExpressionRange &other) const
DatetruncField
Definition: DateTruncate.h:27
int64_t get_conservative_datetrunc_bucket(const DatetruncField datetrunc_field)
double extract_min_stat_fp_type(const ChunkStats &stats, const SQLTypeInfo &ti)
void apply_fp_qual(const Datum const_datum, const SQLTypes const_type, const SQLOps sql_op, ExpressionRange &qual_range)
int64_t bigintval
Definition: sqltypes.h:215
int64_t extract_min_stat_int_type(const ChunkStats &stats, const SQLTypeInfo &ti)
ExpressionRange getLeafColumnRange(const Analyzer::ColumnVar *col_expr, const std::vector< InputTableInfo > &query_infos, const Executor *executor, const bool is_outer_join_proj)
bool is_boolean() const
Definition: sqltypes.h:526
ExpressionRange getExpressionRange(const Analyzer::BinOper *expr, const std::vector< InputTableInfo > &query_infos, const Executor *, boost::optional< std::list< std::shared_ptr< Analyzer::Expr >>> simple_quals)
bool hasNulls() const
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:77
static ExpressionRange makeIntRange(const int64_t int_min, const int64_t int_max, const int64_t bucket, const bool has_nulls)
Definition: sqldefs.h:34
static ExpressionRange makeDoubleRange(const double fp_min, const double fp_max, const bool has_nulls)
#define CHECK_LT(x, y)
Definition: Logger.h:221
Definition: sqltypes.h:52
Definition: sqltypes.h:53
double getFpMax() const
Definition: sqldefs.h:40
virtual std::string toString() const =0
const Expr * get_from_expr() const
Definition: Analyzer.h:1477
double getFpMin() const
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:337
RUNTIME_EXPORT ALWAYS_INLINE DEVICE double width_bucket_expr(const double target_value, const bool reversed, const double lower_bound, const double upper_bound, const int32_t partition_count)
int get_rte_idx() const
Definition: Analyzer.h:195
const Expr * get_operand() const
Definition: Analyzer.h:370
Datum get_constval() const
Definition: Analyzer.h:334
HOST DEVICE int get_dimension() const
Definition: sqltypes.h:331
ExpressionRange operator/(const ExpressionRange &other) const
int32_t get_partition_count_val() const
Definition: Analyzer.cpp:3430
DEVICE int64_t ExtractFromTime(ExtractField field, const int64_t timeval)
Definition: sqltypes.h:41
void setIntMax(const int64_t int_max)
const Expr * get_target_value() const
Definition: Analyzer.h:1088
ExpressionRangeType getType() const
int64_t getIntMax() const
#define CHECK(condition)
Definition: Logger.h:211
constexpr int64_t get_timestamp_precision_scale(const int32_t dimen)
Definition: DateTimeUtils.h:51
bool is_high_precision_timestamp() const
Definition: sqltypes.h:881
uint64_t exp_to_scale(const unsigned exp)
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
int32_t compute_bucket(T target_const_val, SQLTypeInfo &ti) const
Definition: Analyzer.h:1129
Definition: sqldefs.h:33
const Expr * get_left_operand() const
Definition: Analyzer.h:441
static bool typeSupportsRange(const SQLTypeInfo &ti)
Definition: sqltypes.h:45
int get_column_id() const
Definition: Analyzer.h:194
static ExpressionRange makeInvalidRange()
bool is_string() const
Definition: sqltypes.h:519
static int64_t getDateTruncConstantValue(const int64_t &timeval, const DatetruncField &field, const SQLTypeInfo &ti)
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:861
ExpressionRange getDateTimePrecisionCastRange(const ExpressionRange &arg_range, const SQLTypeInfo &oper_ti, const SQLTypeInfo &target_ti)
SQLOps get_optype() const
Definition: Analyzer.h:369
bool is_date() const
Definition: sqltypes.h:879
bool is_array() const
Definition: sqltypes.h:527
const std::list< std::pair< std::shared_ptr< Analyzer::Expr >, std::shared_ptr< Analyzer::Expr > > > & get_expr_pair_list() const
Definition: Analyzer.h:1304
#define VLOG(n)
Definition: Logger.h:305
ExpressionRange fpRangeFromDecimal(const ExpressionRange &arg_range, const int64_t scale, const SQLTypeInfo &target_ti)