OmniSciDB  eb3a3d0a03
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ExpressionRange.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "ExpressionRange.h"
18 #include <algorithm>
19 #include <cfenv>
20 #include <cmath>
21 #include "DateTimeTranslator.h"
22 #include "DateTimeUtils.h"
23 #include "DateTruncate.h"
25 #include "Execute.h"
26 #include "ExtractFromTime.h"
27 #include "GroupByAndAggregate.h"
29 
30 #define DEF_OPERATOR(fname, op) \
31  ExpressionRange fname(const ExpressionRange& other) const { \
32  if (type_ == ExpressionRangeType::Invalid || \
33  other.type_ == ExpressionRangeType::Invalid) { \
34  return ExpressionRange::makeInvalidRange(); \
35  } \
36  CHECK(type_ == other.type_); \
37  switch (type_) { \
38  case ExpressionRangeType::Integer: \
39  return binOp<int64_t>(other, [](const int64_t x, const int64_t y) { \
40  return int64_t(checked_int64_t(x) op y); \
41  }); \
42  case ExpressionRangeType::Float: \
43  return binOp<float>(other, [](const float x, const float y) { \
44  std::feclearexcept(FE_OVERFLOW); \
45  std::feclearexcept(FE_UNDERFLOW); \
46  auto result = x op y; \
47  if (std::fetestexcept(FE_OVERFLOW) || std::fetestexcept(FE_UNDERFLOW)) { \
48  throw std::runtime_error("overflow / underflow"); \
49  } \
50  return result; \
51  }); \
52  case ExpressionRangeType::Double: \
53  return binOp<double>(other, [](const double x, const double y) { \
54  std::feclearexcept(FE_OVERFLOW); \
55  std::feclearexcept(FE_UNDERFLOW); \
56  auto result = x op y; \
57  if (std::fetestexcept(FE_OVERFLOW) || std::fetestexcept(FE_UNDERFLOW)) { \
58  throw std::runtime_error("overflow / underflow"); \
59  } \
60  return result; \
61  }); \
62  default: \
63  CHECK(false); \
64  } \
65  CHECK(false); \
66  return ExpressionRange::makeInvalidRange(); \
67  }
68 
69 DEF_OPERATOR(ExpressionRange::operator+, +)
70 DEF_OPERATOR(ExpressionRange::operator-, -)
71 DEF_OPERATOR(ExpressionRange::operator*, *)
72 
73 void apply_fp_qual(const Datum const_datum,
74  const SQLTypes const_type,
75  const SQLOps sql_op,
76  ExpressionRange& qual_range) {
77  double const_val = get_value_from_datum<double>(const_datum, const_type);
78  switch (sql_op) {
79  case kGT:
80  case kGE:
81  qual_range.setFpMin(std::max(qual_range.getFpMin(), const_val));
82  break;
83  case kLT:
84  case kLE:
85  qual_range.setFpMax(std::min(qual_range.getFpMax(), const_val));
86  break;
87  case kEQ:
88  qual_range.setFpMin(std::max(qual_range.getFpMin(), const_val));
89  qual_range.setFpMax(std::min(qual_range.getFpMax(), const_val));
90  break;
91  default: // there may be other operators, but don't do anything with them
92  break;
93  }
94 }
95 
96 void apply_int_qual(const Datum const_datum,
97  const SQLTypes const_type,
98  const SQLOps sql_op,
99  ExpressionRange& qual_range) {
100  int64_t const_val = get_value_from_datum<int64_t>(const_datum, const_type);
101  switch (sql_op) {
102  case kGT:
103  qual_range.setIntMin(std::max(qual_range.getIntMin(), const_val + 1));
104  break;
105  case kGE:
106  qual_range.setIntMin(std::max(qual_range.getIntMin(), const_val));
107  break;
108  case kLT:
109  qual_range.setIntMax(std::min(qual_range.getIntMax(), const_val - 1));
110  break;
111  case kLE:
112  qual_range.setIntMax(std::min(qual_range.getIntMax(), const_val));
113  break;
114  case kEQ:
115  qual_range.setIntMin(std::max(qual_range.getIntMin(), const_val));
116  qual_range.setIntMax(std::min(qual_range.getIntMax(), const_val));
117  break;
118  default: // there may be other operators, but don't do anything with them
119  break;
120  }
121 }
122 
123 void apply_hpt_qual(const Datum const_datum,
124  const SQLTypes const_type,
125  const int32_t const_dimen,
126  const int32_t col_dimen,
127  const SQLOps sql_op,
128  ExpressionRange& qual_range) {
129  CHECK(const_dimen != col_dimen);
130  Datum datum{0};
131  if (const_dimen > col_dimen) {
132  datum.bigintval =
133  get_value_from_datum<int64_t>(const_datum, const_type) /
134  DateTimeUtils::get_timestamp_precision_scale(const_dimen - col_dimen);
135  } else {
136  datum.bigintval =
137  get_value_from_datum<int64_t>(const_datum, const_type) *
138  DateTimeUtils::get_timestamp_precision_scale(col_dimen - const_dimen);
139  }
140  apply_int_qual(datum, const_type, sql_op, qual_range);
141 }
142 
144  const Analyzer::ColumnVar* col_expr,
145  const ExpressionRange& col_range,
146  const boost::optional<std::list<std::shared_ptr<Analyzer::Expr>>> simple_quals) {
147  if (!simple_quals) {
148  return col_range;
149  }
150  ExpressionRange qual_range(col_range);
151  for (auto const& itr : simple_quals.get()) {
152  auto qual_bin_oper = dynamic_cast<Analyzer::BinOper*>(itr.get());
153  if (!qual_bin_oper) {
154  continue;
155  }
156  const Analyzer::Expr* left_operand = qual_bin_oper->get_left_operand();
157  auto qual_col = dynamic_cast<const Analyzer::ColumnVar*>(left_operand);
158  if (!qual_col) {
159  // Check for possibility that column is wrapped in a cast
160  // Presumes that only simple casts (i.e. timestamp to timestamp or int to int) have
161  // been passed through by BinOper::normalize_simple_predicate
162  auto u_expr = dynamic_cast<const Analyzer::UOper*>(left_operand);
163  if (!u_expr) {
164  continue;
165  }
166  qual_col = dynamic_cast<const Analyzer::ColumnVar*>(u_expr->get_operand());
167  if (!qual_col) {
168  continue;
169  }
170  }
171  if (qual_col->get_table_id() != col_expr->get_table_id() ||
172  qual_col->get_column_id() != col_expr->get_column_id()) {
173  continue;
174  }
175  const Analyzer::Expr* right_operand = qual_bin_oper->get_right_operand();
176  auto qual_const = dynamic_cast<const Analyzer::Constant*>(right_operand);
177  if (!qual_const) {
178  continue;
179  }
180  if (qual_range.getType() == ExpressionRangeType::Float ||
181  qual_range.getType() == ExpressionRangeType::Double) {
182  apply_fp_qual(qual_const->get_constval(),
183  qual_const->get_type_info().get_type(),
184  qual_bin_oper->get_optype(),
185  qual_range);
186  } else if ((qual_col->get_type_info().is_timestamp() ||
187  qual_const->get_type_info().is_timestamp()) &&
188  (qual_col->get_type_info().get_dimension() !=
189  qual_const->get_type_info().get_dimension())) {
190  apply_hpt_qual(qual_const->get_constval(),
191  qual_const->get_type_info().get_type(),
192  qual_const->get_type_info().get_dimension(),
193  qual_col->get_type_info().get_dimension(),
194  qual_bin_oper->get_optype(),
195  qual_range);
196  } else {
197  apply_int_qual(qual_const->get_constval(),
198  qual_const->get_type_info().get_type(),
199  qual_bin_oper->get_optype(),
200  qual_range);
201  }
202  }
203  return qual_range;
204 }
205 
210  }
211  if (other.int_min_ * other.int_max_ <= 0) {
212  // if the other interval contains 0, the rule is more complicated;
213  // punt for now, we can revisit by splitting the other interval and
214  // taking the convex hull of the resulting two intervals
216  }
217  auto div_range = binOp<int64_t>(other, [](const int64_t x, const int64_t y) {
218  return int64_t(checked_int64_t(x) / y);
219  });
220  if (g_null_div_by_zero) {
221  div_range.setHasNulls();
222  }
223  return div_range;
224 }
225 
227  if (type_ != other.type_) {
229  }
231  switch (type_) {
236  result.has_nulls_ = has_nulls_ || other.has_nulls_;
237  result.int_min_ = std::min(int_min_, other.int_min_);
238  result.int_max_ = std::max(int_max_, other.int_max_);
239  result.bucket_ = std::min(bucket_, other.bucket_);
240  break;
241  }
244  result.type_ = type_;
245  result.has_nulls_ = has_nulls_ || other.has_nulls_;
246  result.fp_min_ = std::min(fp_min_, other.fp_min_);
247  result.fp_max_ = std::max(fp_max_, other.fp_max_);
248  break;
249  }
250  default:
251  CHECK(false);
252  }
253  return result;
254 }
255 
257  if (type_ != other.type_) {
258  return false;
259  }
260  switch (type_) {
262  return true;
264  return has_nulls_ == other.has_nulls_ && int_min_ == other.int_min_ &&
265  int_max_ == other.int_max_;
266  }
269  return has_nulls_ == other.has_nulls_ && fp_min_ == other.fp_min_ &&
270  fp_max_ == other.fp_max_;
271  }
272  default:
273  CHECK(false);
274  }
275  return false;
276 }
277 
279  if (ti.is_array()) {
280  return typeSupportsRange(ti.get_elem_type());
281  } else {
282  return (ti.is_number() || ti.is_boolean() || ti.is_time() ||
283  (ti.is_string() && ti.get_compression() == kENCODING_DICT));
284  }
285 }
286 
288  const Analyzer::BinOper* expr,
289  const std::vector<InputTableInfo>& query_infos,
290  const Executor*,
291  boost::optional<std::list<std::shared_ptr<Analyzer::Expr>>> simple_quals);
292 
294 
296  const Analyzer::ColumnVar* col_expr,
297  const std::vector<InputTableInfo>& query_infos,
298  const Executor* executor,
299  boost::optional<std::list<std::shared_ptr<Analyzer::Expr>>> simple_quals);
300 
302 
304  const std::vector<InputTableInfo>& query_infos,
305  const Executor*);
306 
308  const Analyzer::UOper* u_expr,
309  const std::vector<InputTableInfo>& query_infos,
310  const Executor*,
311  boost::optional<std::list<std::shared_ptr<Analyzer::Expr>>> simple_quals);
312 
314  const Analyzer::ExtractExpr* extract_expr,
315  const std::vector<InputTableInfo>& query_infos,
316  const Executor*,
317  boost::optional<std::list<std::shared_ptr<Analyzer::Expr>>> simple_quals);
318 
320  const Analyzer::DatetruncExpr* datetrunc_expr,
321  const std::vector<InputTableInfo>& query_infos,
322  const Executor* executor,
323  boost::optional<std::list<std::shared_ptr<Analyzer::Expr>>> simple_quals);
324 
327  const std::vector<InputTableInfo>& query_infos,
328  const Executor* executor,
329  boost::optional<std::list<std::shared_ptr<Analyzer::Expr>>> simple_quals);
330 
332  const Analyzer::Expr* expr,
333  const std::vector<InputTableInfo>& query_infos,
334  const Executor* executor,
335  boost::optional<std::list<std::shared_ptr<Analyzer::Expr>>> simple_quals) {
336  const auto& expr_ti = expr->get_type_info();
337  if (!ExpressionRange::typeSupportsRange(expr_ti)) {
339  }
340  auto bin_oper_expr = dynamic_cast<const Analyzer::BinOper*>(expr);
341  if (bin_oper_expr) {
342  return getExpressionRange(bin_oper_expr, query_infos, executor, simple_quals);
343  }
344  auto constant_expr = dynamic_cast<const Analyzer::Constant*>(expr);
345  if (constant_expr) {
346  return getExpressionRange(constant_expr);
347  }
348  auto column_var_expr = dynamic_cast<const Analyzer::ColumnVar*>(expr);
349  if (column_var_expr) {
350  return getExpressionRange(column_var_expr, query_infos, executor, simple_quals);
351  }
352  auto like_expr = dynamic_cast<const Analyzer::LikeExpr*>(expr);
353  if (like_expr) {
354  return getExpressionRange(like_expr);
355  }
356  auto case_expr = dynamic_cast<const Analyzer::CaseExpr*>(expr);
357  if (case_expr) {
358  return getExpressionRange(case_expr, query_infos, executor);
359  }
360  auto u_expr = dynamic_cast<const Analyzer::UOper*>(expr);
361  if (u_expr) {
362  return getExpressionRange(u_expr, query_infos, executor, simple_quals);
363  }
364  auto extract_expr = dynamic_cast<const Analyzer::ExtractExpr*>(expr);
365  if (extract_expr) {
366  return getExpressionRange(extract_expr, query_infos, executor, simple_quals);
367  }
368  auto datetrunc_expr = dynamic_cast<const Analyzer::DatetruncExpr*>(expr);
369  if (datetrunc_expr) {
370  return getExpressionRange(datetrunc_expr, query_infos, executor, simple_quals);
371  }
372  auto width_bucket_expr = dynamic_cast<const Analyzer::WidthBucketExpr*>(expr);
373  if (width_bucket_expr) {
374  return getExpressionRange(width_bucket_expr, query_infos, executor, simple_quals);
375  }
377 }
378 
379 namespace {
380 
381 int64_t scale_up_interval_endpoint(const int64_t endpoint, const SQLTypeInfo& ti) {
382  return endpoint * static_cast<int64_t>(exp_to_scale(ti.get_scale()));
383 }
384 
385 } // namespace
386 
388  const Analyzer::BinOper* expr,
389  const std::vector<InputTableInfo>& query_infos,
390  const Executor* executor,
391  boost::optional<std::list<std::shared_ptr<Analyzer::Expr>>> simple_quals) {
392  const auto& lhs =
393  getExpressionRange(expr->get_left_operand(), query_infos, executor, simple_quals);
394  const auto& rhs =
395  getExpressionRange(expr->get_right_operand(), query_infos, executor, simple_quals);
396  switch (expr->get_optype()) {
397  case kPLUS:
398  return lhs + rhs;
399  case kMINUS:
400  return lhs - rhs;
401  case kMULTIPLY:
402  return lhs * rhs;
403  case kDIVIDE: {
404  const auto& lhs_type = expr->get_left_operand()->get_type_info();
405  if (lhs_type.is_decimal() && lhs.getType() != ExpressionRangeType::Invalid) {
406  CHECK(lhs.getType() == ExpressionRangeType::Integer);
407  const auto adjusted_lhs = ExpressionRange::makeIntRange(
408  scale_up_interval_endpoint(lhs.getIntMin(), lhs_type),
409  scale_up_interval_endpoint(lhs.getIntMax(), lhs_type),
410  0,
411  lhs.hasNulls());
412  return adjusted_lhs / rhs;
413  }
414  return lhs / rhs;
415  }
416  default:
417  break;
418  }
420 }
421 
423  if (constant_expr->get_is_null()) {
425  }
426  const auto constant_type = constant_expr->get_type_info().get_type();
427  const auto datum = constant_expr->get_constval();
428  switch (constant_type) {
429  case kTINYINT: {
430  const int64_t v = datum.tinyintval;
431  return ExpressionRange::makeIntRange(v, v, 0, false);
432  }
433  case kSMALLINT: {
434  const int64_t v = datum.smallintval;
435  return ExpressionRange::makeIntRange(v, v, 0, false);
436  }
437  case kINT: {
438  const int64_t v = datum.intval;
439  return ExpressionRange::makeIntRange(v, v, 0, false);
440  }
441  case kBIGINT:
442  case kNUMERIC:
443  case kDECIMAL: {
444  const int64_t v = datum.bigintval;
445  return ExpressionRange::makeIntRange(v, v, 0, false);
446  }
447  case kTIME:
448  case kTIMESTAMP:
449  case kDATE: {
450  const int64_t v = datum.bigintval;
451  return ExpressionRange::makeIntRange(v, v, 0, false);
452  }
453  case kFLOAT: {
454  return ExpressionRange::makeFloatRange(datum.floatval, datum.floatval, false);
455  }
456  case kDOUBLE: {
457  return ExpressionRange::makeDoubleRange(datum.doubleval, datum.doubleval, false);
458  }
459  default:
460  break;
461  }
463 }
464 
465 #define FIND_STAT_FRAG(stat_name) \
466  const auto stat_name##_frag_index = std::stat_name##_element( \
467  nonempty_fragment_indices.begin(), \
468  nonempty_fragment_indices.end(), \
469  [&fragments, &has_nulls, col_id, col_ti](const size_t lhs_idx, \
470  const size_t rhs_idx) { \
471  const auto& lhs = fragments[lhs_idx]; \
472  const auto& rhs = fragments[rhs_idx]; \
473  auto lhs_meta_it = lhs.getChunkMetadataMap().find(col_id); \
474  if (lhs_meta_it == lhs.getChunkMetadataMap().end()) { \
475  return false; \
476  } \
477  auto rhs_meta_it = rhs.getChunkMetadataMap().find(col_id); \
478  CHECK(rhs_meta_it != rhs.getChunkMetadataMap().end()); \
479  if (lhs_meta_it->second->chunkStats.has_nulls || \
480  rhs_meta_it->second->chunkStats.has_nulls) { \
481  has_nulls = true; \
482  } \
483  if (col_ti.is_fp()) { \
484  return extract_##stat_name##_stat_double(lhs_meta_it->second->chunkStats, \
485  col_ti) < \
486  extract_##stat_name##_stat_double(rhs_meta_it->second->chunkStats, \
487  col_ti); \
488  } \
489  return extract_##stat_name##_stat(lhs_meta_it->second->chunkStats, col_ti) < \
490  extract_##stat_name##_stat(rhs_meta_it->second->chunkStats, col_ti); \
491  }); \
492  if (stat_name##_frag_index == nonempty_fragment_indices.end()) { \
493  return ExpressionRange::makeInvalidRange(); \
494  }
495 
496 namespace {
497 
498 double extract_min_stat_double(const ChunkStats& stats, const SQLTypeInfo& col_ti) {
499  return col_ti.get_type() == kDOUBLE ? stats.min.doubleval : stats.min.floatval;
500 }
501 
502 double extract_max_stat_double(const ChunkStats& stats, const SQLTypeInfo& col_ti) {
503  return col_ti.get_type() == kDOUBLE ? stats.max.doubleval : stats.max.floatval;
504 }
505 
506 int64_t get_conservative_datetrunc_bucket(const DatetruncField datetrunc_field) {
507  const int64_t day_seconds{24 * 3600};
508  const int64_t year_days{365};
509  switch (datetrunc_field) {
510  case dtYEAR:
511  return year_days * day_seconds;
512  case dtQUARTER:
513  return 90 * day_seconds; // 90 is least number of days in any quater
514  case dtMONTH:
515  return 28 * day_seconds;
516  case dtDAY:
517  return day_seconds;
518  case dtHOUR:
519  return 3600;
520  case dtMINUTE:
521  return 60;
522  case dtMILLENNIUM:
523  return 1000 * year_days * day_seconds;
524  case dtCENTURY:
525  return 100 * year_days * day_seconds;
526  case dtDECADE:
527  return 10 * year_days * day_seconds;
528  case dtWEEK:
529  case dtWEEK_SUNDAY:
530  case dtWEEK_SATURDAY:
531  return 7 * day_seconds;
532  case dtQUARTERDAY:
533  return 4 * 60 * 50;
534  default:
535  return 0;
536  }
537 }
538 
539 } // namespace
540 
542  const std::vector<InputTableInfo>& query_infos,
543  const Executor* executor,
544  const bool is_outer_join_proj) {
545  bool has_nulls = is_outer_join_proj;
546  int col_id = col_expr->get_column_id();
547  const auto& col_phys_ti = col_expr->get_type_info().is_array()
548  ? col_expr->get_type_info().get_elem_type()
549  : col_expr->get_type_info();
550  const auto col_ti = get_logical_type_info(col_phys_ti);
551  switch (col_ti.get_type()) {
552  case kTEXT:
553  case kCHAR:
554  case kVARCHAR:
555  CHECK_EQ(kENCODING_DICT, col_ti.get_compression());
556  case kBOOLEAN:
557  case kTINYINT:
558  case kSMALLINT:
559  case kINT:
560  case kBIGINT:
561  case kDECIMAL:
562  case kNUMERIC:
563  case kDATE:
564  case kTIMESTAMP:
565  case kTIME:
566  case kFLOAT:
567  case kDOUBLE: {
568  std::optional<size_t> ti_idx;
569  for (size_t i = 0; i < query_infos.size(); ++i) {
570  if (col_expr->get_table_id() == query_infos[i].table_id) {
571  ti_idx = i;
572  break;
573  }
574  }
575  CHECK(ti_idx);
576  const auto& query_info = query_infos[*ti_idx].info;
577  const auto& fragments = query_info.fragments;
578  const auto cd = executor->getColumnDescriptor(col_expr);
579  if (cd && cd->isVirtualCol) {
580  CHECK(cd->columnName == "rowid");
581  CHECK_EQ(kBIGINT, col_ti.get_type());
582  const int64_t num_tuples = query_info.getNumTuples();
584  0, std::max(num_tuples - 1, int64_t(0)), 0, has_nulls);
585  }
586  if (query_info.getNumTuples() == 0) {
587  // The column doesn't contain any values, synthesize an empty range.
588  if (col_ti.is_fp()) {
589  return col_ti.get_type() == kFLOAT
590  ? ExpressionRange::makeFloatRange(0, -1, false)
591  : ExpressionRange::makeDoubleRange(0, -1, false);
592  }
593  return ExpressionRange::makeIntRange(0, -1, 0, false);
594  }
595  std::vector<size_t> nonempty_fragment_indices;
596  for (size_t i = 0; i < fragments.size(); ++i) {
597  const auto& fragment = fragments[i];
598  if (!fragment.isEmptyPhysicalFragment()) {
599  nonempty_fragment_indices.push_back(i);
600  }
601  }
602  FIND_STAT_FRAG(min);
603  FIND_STAT_FRAG(max);
604  const auto& min_frag = fragments[*min_frag_index];
605  const auto min_it = min_frag.getChunkMetadataMap().find(col_id);
606  if (min_it == min_frag.getChunkMetadataMap().end()) {
608  }
609  const auto& max_frag = fragments[*max_frag_index];
610  const auto max_it = max_frag.getChunkMetadataMap().find(col_id);
611  CHECK(max_it != max_frag.getChunkMetadataMap().end());
612  for (const auto& fragment : fragments) {
613  const auto it = fragment.getChunkMetadataMap().find(col_id);
614  if (it != fragment.getChunkMetadataMap().end()) {
615  if (it->second->chunkStats.has_nulls) {
616  has_nulls = true;
617  break;
618  }
619  }
620  }
621  if (col_ti.is_fp()) {
622  const auto min_val = extract_min_stat_double(min_it->second->chunkStats, col_ti);
623  const auto max_val = extract_max_stat_double(max_it->second->chunkStats, col_ti);
624  return col_ti.get_type() == kFLOAT
625  ? ExpressionRange::makeFloatRange(min_val, max_val, has_nulls)
626  : ExpressionRange::makeDoubleRange(min_val, max_val, has_nulls);
627  }
628  const auto min_val = extract_min_stat(min_it->second->chunkStats, col_ti);
629  const auto max_val = extract_max_stat(max_it->second->chunkStats, col_ti);
630  if (max_val < min_val) {
631  // The column doesn't contain any non-null values, synthesize an empty range.
632  CHECK_GT(min_val, 0);
633  return ExpressionRange::makeIntRange(0, -1, 0, has_nulls);
634  }
635  const int64_t bucket =
636  col_ti.get_type() == kDATE ? get_conservative_datetrunc_bucket(dtDAY) : 0;
637  return ExpressionRange::makeIntRange(min_val, max_val, bucket, has_nulls);
638  }
639  default:
640  break;
641  }
643 }
644 
645 #undef FIND_STAT_FRAG
646 
648  const Analyzer::ColumnVar* col_expr,
649  const std::vector<InputTableInfo>& query_infos,
650  const Executor* executor,
651  boost::optional<std::list<std::shared_ptr<Analyzer::Expr>>> simple_quals) {
652  const int rte_idx = col_expr->get_rte_idx();
653  CHECK_GE(rte_idx, 0);
654  CHECK_LT(static_cast<size_t>(rte_idx), query_infos.size());
655  bool is_outer_join_proj = rte_idx > 0 && executor->containsLeftDeepOuterJoin();
656  if (col_expr->get_table_id() > 0) {
657  auto col_range = executor->getColRange(
658  PhysicalInput{col_expr->get_column_id(), col_expr->get_table_id()});
659  if (is_outer_join_proj) {
660  col_range.setHasNulls();
661  }
662  return apply_simple_quals(col_expr, col_range, simple_quals);
663  }
664  return getLeafColumnRange(col_expr, query_infos, executor, is_outer_join_proj);
665 }
666 
668  const auto& ti = like_expr->get_type_info();
669  CHECK(ti.is_boolean());
670  const auto& arg_ti = like_expr->get_arg()->get_type_info();
672  arg_ti.get_notnull() ? 0 : inline_int_null_val(ti), 1, 0, false);
673 }
674 
676  const std::vector<InputTableInfo>& query_infos,
677  const Executor* executor) {
678  const auto& expr_pair_list = case_expr->get_expr_pair_list();
679  auto expr_range = ExpressionRange::makeInvalidRange();
680  bool has_nulls = false;
681  for (const auto& expr_pair : expr_pair_list) {
682  CHECK_EQ(expr_pair.first->get_type_info().get_type(), kBOOLEAN);
683  const auto crt_range =
684  getExpressionRange(expr_pair.second.get(), query_infos, executor);
685  if (crt_range.getType() == ExpressionRangeType::Null) {
686  has_nulls = true;
687  continue;
688  }
689  if (crt_range.getType() == ExpressionRangeType::Invalid) {
691  }
692  expr_range = (expr_range.getType() != ExpressionRangeType::Invalid)
693  ? expr_range || crt_range
694  : crt_range;
695  }
696  if (has_nulls && !(expr_range.getType() == ExpressionRangeType::Invalid)) {
697  expr_range.setHasNulls();
698  }
699  const auto else_expr = case_expr->get_else_expr();
700  CHECK(else_expr);
701  const auto else_null_expr = dynamic_cast<const Analyzer::Constant*>(else_expr);
702  if (else_null_expr && else_null_expr->get_is_null()) {
703  expr_range.setHasNulls();
704  return expr_range;
705  }
706  return expr_range || getExpressionRange(else_expr, query_infos, executor);
707 }
708 
709 namespace {
710 
712  const int64_t scale,
713  const SQLTypeInfo& target_ti) {
714  CHECK(target_ti.is_fp());
715  if (target_ti.get_type() == kFLOAT) {
717  static_cast<float>(arg_range.getIntMin()) / scale,
718  static_cast<float>(arg_range.getIntMax()) / scale,
719  arg_range.hasNulls());
720  }
722  static_cast<double>(arg_range.getIntMin()) / scale,
723  static_cast<double>(arg_range.getIntMax()) / scale,
724  arg_range.hasNulls());
725 }
726 
728  const SQLTypeInfo& oper_ti,
729  const SQLTypeInfo& target_ti) {
730  if (oper_ti.is_timestamp() && target_ti.is_date()) {
731  const auto field = dtDAY;
732  const int64_t scale =
735  : 1;
736  const int64_t min_ts = oper_ti.is_high_precision_timestamp()
737  ? DateTruncate(field, arg_range.getIntMin() / scale)
738  : DateTruncate(field, arg_range.getIntMin());
739  const int64_t max_ts = oper_ti.is_high_precision_timestamp()
740  ? DateTruncate(field, arg_range.getIntMax() / scale)
741  : DateTruncate(field, arg_range.getIntMax());
742  const int64_t bucket = get_conservative_datetrunc_bucket(field);
743 
744  return ExpressionRange::makeIntRange(min_ts, max_ts, bucket, arg_range.hasNulls());
745  }
746 
747  const int32_t ti_dimen = target_ti.get_dimension();
748  const int32_t oper_dimen = oper_ti.get_dimension();
749  CHECK(oper_dimen != ti_dimen);
750  const int64_t min_ts =
751  ti_dimen > oper_dimen
753  arg_range.getIntMin(),
754  abs(oper_dimen - ti_dimen))
757  arg_range.getIntMin(),
758  abs(oper_dimen - ti_dimen));
759  const int64_t max_ts =
760  ti_dimen > oper_dimen
762  arg_range.getIntMax(),
763  abs(oper_dimen - ti_dimen))
766  arg_range.getIntMax(),
767  abs(oper_dimen - ti_dimen));
768 
769  return ExpressionRange::makeIntRange(min_ts, max_ts, 0, arg_range.hasNulls());
770 }
771 
772 } // namespace
773 
775  const Analyzer::UOper* u_expr,
776  const std::vector<InputTableInfo>& query_infos,
777  const Executor* executor,
778  boost::optional<std::list<std::shared_ptr<Analyzer::Expr>>> simple_quals) {
779  if (u_expr->get_optype() == kUNNEST) {
780  return getExpressionRange(u_expr->get_operand(), query_infos, executor, simple_quals);
781  }
782  if (u_expr->get_optype() != kCAST) {
784  }
785  const auto& ti = u_expr->get_type_info();
786  if (ti.is_string() && ti.get_compression() == kENCODING_DICT) {
787  const auto sdp = executor->getStringDictionaryProxy(
788  ti.get_comp_param(), executor->getRowSetMemoryOwner(), true);
789  CHECK(sdp);
790  const auto const_operand =
791  dynamic_cast<const Analyzer::Constant*>(u_expr->get_operand());
792  if (!const_operand) {
793  // casted subquery result. return invalid for now, but we could attempt to pull the
794  // range from the subquery result in the future
795  CHECK(u_expr->get_operand());
796  VLOG(1) << "Unable to determine expression range for dictionary encoded expression "
797  << u_expr->get_operand()->toString() << ", proceeding with invalid range.";
799  }
800 
801  if (const_operand->get_is_null()) {
803  }
804  CHECK(const_operand->get_constval().stringval);
805  const int64_t v = sdp->getIdOfString(*const_operand->get_constval().stringval);
806  return ExpressionRange::makeIntRange(v, v, 0, false);
807  }
808  const auto arg_range =
809  getExpressionRange(u_expr->get_operand(), query_infos, executor, simple_quals);
810  const auto& arg_ti = u_expr->get_operand()->get_type_info();
811  // Timestamp to Date OR Date/Timestamp casts with different precision
812  if ((ti.is_timestamp() && (arg_ti.get_dimension() != ti.get_dimension())) ||
813  ((arg_ti.is_timestamp() && ti.is_date()))) {
814  return getDateTimePrecisionCastRange(arg_range, arg_ti, ti);
815  }
816  switch (arg_range.getType()) {
819  if (ti.is_fp()) {
820  return ti.get_type() == kDOUBLE
822  arg_range.getFpMin(), arg_range.getFpMax(), arg_range.hasNulls())
823  : ExpressionRange::makeFloatRange(arg_range.getFpMin(),
824  arg_range.getFpMax(),
825  arg_range.hasNulls());
826  }
827  if (ti.is_integer()) {
829  arg_range.getFpMin(), arg_range.getFpMax(), 0, arg_range.hasNulls());
830  }
831  break;
832  }
834  if (ti.is_decimal()) {
835  CHECK_EQ(int64_t(0), arg_range.getBucket());
836  const int64_t scale = exp_to_scale(ti.get_scale() - arg_ti.get_scale());
837  return ExpressionRange::makeIntRange(arg_range.getIntMin() * scale,
838  arg_range.getIntMax() * scale,
839  0,
840  arg_range.hasNulls());
841  }
842  if (arg_ti.is_decimal()) {
843  CHECK_EQ(int64_t(0), arg_range.getBucket());
844  const int64_t scale = exp_to_scale(arg_ti.get_scale());
845  const int64_t scale_half = scale / 2;
846  if (ti.is_fp()) {
847  return fpRangeFromDecimal(arg_range, scale, ti);
848  }
849  return ExpressionRange::makeIntRange((arg_range.getIntMin() - scale_half) / scale,
850  (arg_range.getIntMax() + scale_half) / scale,
851  0,
852  arg_range.hasNulls());
853  }
854  if (ti.is_integer() || ti.is_time()) {
855  return arg_range;
856  }
857  if (ti.get_type() == kFLOAT) {
859  arg_range.getIntMin(), arg_range.getIntMax(), arg_range.hasNulls());
860  }
861  if (ti.get_type() == kDOUBLE) {
863  arg_range.getIntMin(), arg_range.getIntMax(), arg_range.hasNulls());
864  }
865  break;
866  }
868  break;
869  default:
870  CHECK(false);
871  }
873 }
874 
876  const Analyzer::ExtractExpr* extract_expr,
877  const std::vector<InputTableInfo>& query_infos,
878  const Executor* executor,
879  boost::optional<std::list<std::shared_ptr<Analyzer::Expr>>> simple_quals) {
880  const int32_t extract_field{extract_expr->get_field()};
881  const auto arg_range = getExpressionRange(
882  extract_expr->get_from_expr(), query_infos, executor, simple_quals);
883  const bool has_nulls =
884  arg_range.getType() == ExpressionRangeType::Invalid || arg_range.hasNulls();
885  const auto& extract_expr_ti = extract_expr->get_from_expr()->get_type_info();
886  switch (extract_field) {
887  case kYEAR: {
888  if (arg_range.getType() == ExpressionRangeType::Invalid) {
890  }
891  CHECK(arg_range.getType() == ExpressionRangeType::Integer);
892  const int64_t year_range_min =
893  extract_expr_ti.is_high_precision_timestamp()
894  ? ExtractFromTime(
895  kYEAR,
896  arg_range.getIntMin() /
897  get_timestamp_precision_scale(extract_expr_ti.get_dimension()))
898  : ExtractFromTime(kYEAR, arg_range.getIntMin());
899  const int64_t year_range_max =
900  extract_expr_ti.is_high_precision_timestamp()
901  ? ExtractFromTime(
902  kYEAR,
903  arg_range.getIntMax() /
904  get_timestamp_precision_scale(extract_expr_ti.get_dimension()))
905  : ExtractFromTime(kYEAR, arg_range.getIntMax());
907  year_range_min, year_range_max, 0, arg_range.hasNulls());
908  }
909  case kEPOCH:
910  case kDATEEPOCH:
911  return arg_range;
912  case kQUARTERDAY:
913  case kQUARTER:
914  return ExpressionRange::makeIntRange(1, 4, 0, has_nulls);
915  case kMONTH:
916  return ExpressionRange::makeIntRange(1, 12, 0, has_nulls);
917  case kDAY:
918  return ExpressionRange::makeIntRange(1, 31, 0, has_nulls);
919  case kHOUR:
920  return ExpressionRange::makeIntRange(0, 23, 0, has_nulls);
921  case kMINUTE:
922  return ExpressionRange::makeIntRange(0, 59, 0, has_nulls);
923  case kSECOND:
924  return ExpressionRange::makeIntRange(0, 60, 0, has_nulls);
925  case kMILLISECOND:
926  return ExpressionRange::makeIntRange(0, 999, 0, has_nulls);
927  case kMICROSECOND:
928  return ExpressionRange::makeIntRange(0, 999999, 0, has_nulls);
929  case kNANOSECOND:
930  return ExpressionRange::makeIntRange(0, 999999999, 0, has_nulls);
931  case kDOW:
932  return ExpressionRange::makeIntRange(0, 6, 0, has_nulls);
933  case kISODOW:
934  return ExpressionRange::makeIntRange(1, 7, 0, has_nulls);
935  case kDOY:
936  return ExpressionRange::makeIntRange(1, 366, 0, has_nulls);
937  case kWEEK:
938  case kWEEK_SUNDAY:
939  case kWEEK_SATURDAY:
940  return ExpressionRange::makeIntRange(1, 53, 0, has_nulls);
941  default:
942  CHECK(false);
943  }
945 }
946 
948  const Analyzer::DatetruncExpr* datetrunc_expr,
949  const std::vector<InputTableInfo>& query_infos,
950  const Executor* executor,
951  boost::optional<std::list<std::shared_ptr<Analyzer::Expr>>> simple_quals) {
952  const auto arg_range = getExpressionRange(
953  datetrunc_expr->get_from_expr(), query_infos, executor, simple_quals);
954  if (arg_range.getType() == ExpressionRangeType::Invalid) {
956  }
957  const auto& datetrunc_expr_ti = datetrunc_expr->get_from_expr()->get_type_info();
958  const int64_t min_ts = DateTimeTranslator::getDateTruncConstantValue(
959  arg_range.getIntMin(), datetrunc_expr->get_field(), datetrunc_expr_ti);
960  const int64_t max_ts = DateTimeTranslator::getDateTruncConstantValue(
961  arg_range.getIntMax(), datetrunc_expr->get_field(), datetrunc_expr_ti);
962  const int64_t bucket =
963  datetrunc_expr_ti.is_high_precision_timestamp()
964  ? get_conservative_datetrunc_bucket(datetrunc_expr->get_field()) *
966  datetrunc_expr_ti.get_dimension())
967  : get_conservative_datetrunc_bucket(datetrunc_expr->get_field());
968 
969  return ExpressionRange::makeIntRange(min_ts, max_ts, bucket, arg_range.hasNulls());
970 }
971 
974  const std::vector<InputTableInfo>& query_infos,
975  const Executor* executor,
976  boost::optional<std::list<std::shared_ptr<Analyzer::Expr>>> simple_quals) {
977  auto target_value_expr = width_bucket_expr->get_target_value();
978  auto target_ti = target_value_expr->get_type_info();
979  if (width_bucket_expr->is_constant_expr()) {
980  auto const_target_value = dynamic_cast<const Analyzer::Constant*>(target_value_expr);
981  if (const_target_value) {
982  if (const_target_value->get_is_null()) {
983  // null constant, return default width_bucket range
985  0, width_bucket_expr->get_partition_count_val(), 0, true);
986  } else {
987  auto target_value_range =
988  getExpressionRange(target_value_expr, query_infos, executor);
989  CHECK(target_value_range.getFpMax() == target_value_range.getFpMin());
990  auto target_value_bucket =
991  width_bucket_expr->compute_bucket(target_value_range.getFpMax(), target_ti);
993  target_value_bucket, target_value_bucket, 0, false);
994  }
995  }
996  // compute possible bucket range based on lower and upper bound constants
997  // to elucidate a target bucket range
998  const auto target_value_range =
999  getExpressionRange(target_value_expr, query_infos, executor);
1000  const auto target_value_range_with_qual =
1001  getExpressionRange(target_value_expr, query_infos, executor, simple_quals);
1002  auto compute_bucket_range = [&width_bucket_expr](const ExpressionRange& target_range,
1003  SQLTypeInfo ti) {
1004  // target value is casted ti double
1005  auto lower_bound_bucket =
1006  width_bucket_expr->compute_bucket<double>(target_range.getFpMin(), ti);
1007  auto upper_bound_bucket =
1008  width_bucket_expr->compute_bucket<double>(target_range.getFpMax(), ti);
1010  lower_bound_bucket, upper_bound_bucket, 0, target_range.hasNulls());
1011  };
1012  auto res_range = compute_bucket_range(target_value_range_with_qual, target_ti);
1013  // check target_value expression's col range to be not nullable iff it has its filter
1014  // expression i.e., in simple_quals
1015  // todo (yoonmin) : need to search simple_quals to cover more cases?
1016  if (target_value_range.getFpMin() < target_value_range_with_qual.getFpMin() ||
1017  target_value_range.getFpMax() > target_value_range_with_qual.getFpMax()) {
1018  res_range.setNulls(false);
1019  }
1020  return res_range;
1021  } else {
1022  // we cannot determine a possibility of skipping oob check safely
1023  auto target_expression_range = getExpressionRange(
1024  width_bucket_expr->get_partition_count(), query_infos, executor, simple_quals);
1026  0, INT32_MAX, 0, target_value_expr->get_type_info().get_notnull());
1027  switch (target_expression_range.getType()) {
1029  res.setIntMax(target_expression_range.getIntMax() + 1);
1030  break;
1031  }
1034  res.setIntMax(static_cast<int64_t>(target_expression_range.getFpMax()) + 1);
1035  break;
1036  }
1037  default:
1038  break;
1039  }
1040  return res;
1041  }
1042 }
int get_table_id() const
Definition: Analyzer.h:194
int64_t getIntMin() const
#define CHECK_EQ(x, y)
Definition: Logger.h:217
#define FIND_STAT_FRAG(stat_name)
const Expr * get_from_expr() const
Definition: Analyzer.h:1340
const Expr * get_partition_count() const
Definition: Analyzer.h:1079
bool is_constant_expr() const
Definition: Analyzer.h:1154
const Expr * get_else_expr() const
Definition: Analyzer.h:1295
static ExpressionRange makeNullRange()
Definition: sqltypes.h:49
int64_t DateTruncate(DatetruncField field, const int64_t timeval)
SQLTypes
Definition: sqltypes.h:38
bool is_timestamp() const
Definition: sqltypes.h:762
bool operator==(const ExpressionRange &other) const
boost::multiprecision::number< boost::multiprecision::cpp_int_backend< 64, 64, boost::multiprecision::signed_magnitude, boost::multiprecision::checked, void >> checked_int64_t
void apply_hpt_qual(const Datum const_datum, const SQLTypes const_type, const int32_t const_dimen, const int32_t col_dimen, const SQLOps sql_op, ExpressionRange &qual_range)
void apply_int_qual(const Datum const_datum, const SQLTypes const_type, const SQLOps sql_op, ExpressionRange &qual_range)
#define const
double extract_min_stat_double(const ChunkStats &stats, const SQLTypeInfo &col_ti)
ExtractField get_field() const
Definition: Analyzer.h:1339
bool is_fp() const
Definition: sqltypes.h:508
HOST DEVICE int get_scale() const
Definition: sqltypes.h:334
const Expr * get_right_operand() const
Definition: Analyzer.h:443
SQLOps
Definition: sqldefs.h:29
Definition: sqldefs.h:35
Definition: sqldefs.h:36
#define DEF_OPERATOR(fname, op)
bool get_is_null() const
Definition: Analyzer.h:334
#define CHECK_GE(x, y)
Definition: Logger.h:222
#define INT32_MAX
SQLTypeInfo get_logical_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:931
Definition: sqldefs.h:49
Definition: sqldefs.h:30
DatetruncField get_field() const
Definition: Analyzer.h:1464
Definition: sqldefs.h:41
int64_t scale_up_interval_endpoint(const int64_t endpoint, const SQLTypeInfo &ti)
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:329
void setIntMin(const int64_t int_min)
bool is_number() const
Definition: sqltypes.h:509
#define CHECK_GT(x, y)
Definition: Logger.h:221
constexpr int64_t get_datetime_scaled_epoch(const ScalingType direction, const int64_t epoch, const int32_t dimen)
const Expr * get_arg() const
Definition: Analyzer.h:939
bool is_time() const
Definition: sqltypes.h:510
static ExpressionRange makeFloatRange(const float fp_min, const float fp_max, const bool has_nulls)
ExpressionRange apply_simple_quals(const Analyzer::ColumnVar *col_expr, const ExpressionRange &col_range, const boost::optional< std::list< std::shared_ptr< Analyzer::Expr >>> simple_quals)
bool g_null_div_by_zero
Definition: Execute.cpp:84
SQLOps get_optype() const
Definition: Analyzer.h:439
float floatval
Definition: sqltypes.h:216
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
Definition: JsonAccessors.h:31
ExpressionRangeType type_
ExpressionRange operator||(const ExpressionRange &other) const
DatetruncField
Definition: DateTruncate.h:27
int64_t get_conservative_datetrunc_bucket(const DatetruncField datetrunc_field)
void apply_fp_qual(const Datum const_datum, const SQLTypes const_type, const SQLOps sql_op, ExpressionRange &qual_range)
int64_t bigintval
Definition: sqltypes.h:215
ExpressionRange getLeafColumnRange(const Analyzer::ColumnVar *col_expr, const std::vector< InputTableInfo > &query_infos, const Executor *executor, const bool is_outer_join_proj)
bool is_boolean() const
Definition: sqltypes.h:511
ExpressionRange getExpressionRange(const Analyzer::BinOper *expr, const std::vector< InputTableInfo > &query_infos, const Executor *, boost::optional< std::list< std::shared_ptr< Analyzer::Expr >>> simple_quals)
bool hasNulls() const
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:78
static ExpressionRange makeIntRange(const int64_t int_min, const int64_t int_max, const int64_t bucket, const bool has_nulls)
Definition: sqldefs.h:34
static ExpressionRange makeDoubleRange(const double fp_min, const double fp_max, const bool has_nulls)
#define CHECK_LT(x, y)
Definition: Logger.h:219
Definition: sqltypes.h:52
Definition: sqltypes.h:53
double getFpMax() const
Definition: sqldefs.h:40
virtual std::string toString() const =0
const Expr * get_from_expr() const
Definition: Analyzer.h:1465
double getFpMin() const
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:337
int get_rte_idx() const
Definition: Analyzer.h:196
const Expr * get_operand() const
Definition: Analyzer.h:371
Datum get_constval() const
Definition: Analyzer.h:335
HOST DEVICE int get_dimension() const
Definition: sqltypes.h:331
ExpressionRange operator/(const ExpressionRange &other) const
int32_t get_partition_count_val() const
Definition: Analyzer.cpp:3423
DEVICE int64_t ExtractFromTime(ExtractField field, const int64_t timeval)
Definition: sqltypes.h:41
void setIntMax(const int64_t int_max)
int64_t extract_min_stat(const ChunkStats &stats, const SQLTypeInfo &ti)
const Expr * get_target_value() const
Definition: Analyzer.h:1076
ExpressionRangeType getType() const
int64_t getIntMax() const
#define CHECK(condition)
Definition: Logger.h:209
constexpr int64_t get_timestamp_precision_scale(const int32_t dimen)
Definition: DateTimeUtils.h:51
bool is_high_precision_timestamp() const
Definition: sqltypes.h:752
uint64_t exp_to_scale(const unsigned exp)
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
int32_t compute_bucket(T target_const_val, SQLTypeInfo &ti) const
Definition: Analyzer.h:1117
Definition: sqldefs.h:33
const Expr * get_left_operand() const
Definition: Analyzer.h:442
static bool typeSupportsRange(const SQLTypeInfo &ti)
Definition: sqltypes.h:45
int get_column_id() const
Definition: Analyzer.h:195
static ExpressionRange makeInvalidRange()
bool is_string() const
Definition: sqltypes.h:504
static int64_t getDateTruncConstantValue(const int64_t &timeval, const DatetruncField &field, const SQLTypeInfo &ti)
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:732
ExpressionRange getDateTimePrecisionCastRange(const ExpressionRange &arg_range, const SQLTypeInfo &oper_ti, const SQLTypeInfo &target_ti)
SQLOps get_optype() const
Definition: Analyzer.h:370
bool is_date() const
Definition: sqltypes.h:750
bool is_array() const
Definition: sqltypes.h:512
ALWAYS_INLINE DEVICE double width_bucket_expr(const double target_value, const bool reversed, const double lower_bound, const double upper_bound, const int32_t partition_count)
const std::list< std::pair< std::shared_ptr< Analyzer::Expr >, std::shared_ptr< Analyzer::Expr > > > & get_expr_pair_list() const
Definition: Analyzer.h:1292
#define VLOG(n)
Definition: Logger.h:303
double extract_max_stat_double(const ChunkStats &stats, const SQLTypeInfo &col_ti)
int64_t extract_max_stat(const ChunkStats &stats, const SQLTypeInfo &ti)
double doubleval
Definition: sqltypes.h:217
ExpressionRange fpRangeFromDecimal(const ExpressionRange &arg_range, const int64_t scale, const SQLTypeInfo &target_ti)