OmniSciDB  471d68cefb
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Datum.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2021 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
23 #include <algorithm>
24 #include <cassert>
25 #include <cctype>
26 #include <charconv>
27 #include <cmath>
28 #include <cstdio>
29 #include <cstdlib>
30 #include <limits>
31 #include <stdexcept>
32 #include <string>
33 
34 #include "DateConverters.h"
35 #include "DateTimeParser.h"
36 #include "Logger/Logger.h"
38 #include "StringTransform.h"
39 #include "misc.h"
40 #include "sqltypes.h"
41 
42 std::string SQLTypeInfo::type_name[kSQLTYPE_LAST] = {"NULL",
43  "BOOLEAN",
44  "CHAR",
45  "VARCHAR",
46  "NUMERIC",
47  "DECIMAL",
48  "INTEGER",
49  "SMALLINT",
50  "FLOAT",
51  "DOUBLE",
52  "TIME",
53  "TIMESTAMP",
54  "BIGINT",
55  "TEXT",
56  "DATE",
57  "ARRAY",
58  "INTERVAL_DAY_TIME",
59  "INTERVAL_YEAR_MONTH",
60  "POINT",
61  "LINESTRING",
62  "POLYGON",
63  "MULTIPOLYGON",
64  "TINYINT",
65  "GEOMETRY",
66  "GEOGRAPHY",
67  "EVAL_CONTEXT_TYPE",
68  "VOID",
69  "CURSOR",
70  "COLUMN",
71  "COLUMN_LIST"};
73  {"NONE", "FIXED", "RL", "DIFF", "DICT", "SPARSE", "COMPRESSED", "DAYS"};
74 
75 namespace {
76 // Return decimal_value * 10^dscale
77 int64_t convert_decimal_value_to_scale_internal(const int64_t decimal_value,
78  int const dscale) {
79  constexpr int max_scale = std::numeric_limits<uint64_t>::digits10; // 19
80  constexpr auto pow10 = shared::powersOf<uint64_t, max_scale + 1>(10);
81  if (dscale < 0) {
82  if (dscale < -max_scale) {
83  return 0; // +/- 0.09223372036854775807 rounds to 0
84  }
85  uint64_t const u = std::abs(decimal_value);
86  uint64_t const pow = pow10[-dscale];
87  uint64_t div = u / pow;
88  uint64_t rem = u % pow;
89  div += pow / 2 <= rem;
90  return decimal_value < 0 ? -div : div;
91  } else if (dscale < max_scale) {
92  int64_t retval;
93 #ifdef _WIN32
94  return decimal_value * pow10[dscale];
95 #else
96  if (!__builtin_mul_overflow(decimal_value, pow10[dscale], &retval)) {
97  return retval;
98  }
99 #endif
100  }
101  if (decimal_value == 0) {
102  return 0;
103  }
104  throw std::runtime_error("Overflow in DECIMAL-to-DECIMAL conversion.");
105 }
106 } // namespace
107 
108 int64_t parse_numeric(const std::string_view s, SQLTypeInfo& ti) {
109  // if we are given a dimension, first parse to the maximum precision of the string
110  // and then convert to the correct size
111  if (ti.get_dimension() != 0) {
112  SQLTypeInfo ti_string(kNUMERIC, 0, 0, false);
113  return convert_decimal_value_to_scale(parse_numeric(s, ti_string), ti_string, ti);
114  }
115  size_t dot = s.find_first_of('.', 0);
116  std::string before_dot;
117  std::string after_dot;
118  if (dot != std::string::npos) {
119  // make .99 as 0.99, or std::stoll below throws exception 'std::invalid_argument'
120  before_dot = (0 == dot) ? "0" : s.substr(0, dot);
121  after_dot = s.substr(dot + 1);
122  } else {
123  before_dot = s;
124  after_dot = "0";
125  }
126  const bool is_negative = before_dot.find_first_of('-', 0) != std::string::npos;
127  const int64_t sign = is_negative ? -1 : 1;
128  int64_t result;
129  result = std::abs(std::stoll(before_dot));
130  int64_t fraction = 0;
131  const size_t before_dot_digits = before_dot.length() - (is_negative ? 1 : 0);
132 
133  constexpr int max_digits = std::numeric_limits<int64_t>::digits10;
134  if (!after_dot.empty()) {
135  int64_t next_digit = 0;
136  // After dot will be used to scale integer part so make sure it wont overflow
137  if (after_dot.size() + before_dot_digits > max_digits) {
138  if (before_dot_digits >= max_digits) {
139  after_dot = "0";
140  } else {
141  next_digit = std::stoll(after_dot.substr(max_digits - before_dot_digits, 1));
142  after_dot = after_dot.substr(0, max_digits - before_dot_digits);
143  }
144  }
145  fraction = std::stoll(after_dot);
146  fraction += next_digit >= 5 ? 1 : 0;
147  }
148 
149  // set the type info based on the literal string
150  ti.set_scale(static_cast<int>(after_dot.length()));
151  ti.set_dimension(static_cast<int>(before_dot_digits + ti.get_scale()));
152  ti.set_notnull(false);
153  if (ti.get_scale()) {
154  result = convert_decimal_value_to_scale_internal(result, ti.get_scale());
155  }
156  result += fraction;
157 
158  return result * sign;
159 }
160 
161 namespace {
162 
163 // Equal to NULL value for nullable types.
164 template <typename T>
165 T minValue(unsigned const fieldsize) {
166  static_assert(std::is_signed_v<T>);
167  return T(-1) << (fieldsize - 1);
168 }
169 
170 template <typename T>
171 T maxValue(unsigned const fieldsize) {
172  return ~minValue<T>(fieldsize);
173 }
174 
175 std::string toString(SQLTypeInfo const& ti, unsigned const fieldsize) {
176  return ti.get_type_name() + '(' + std::to_string(fieldsize) + ')';
177 }
178 
179 // GCC 10 does not support std::from_chars w/ double, so strtold() is used instead.
180 // Convert s to long double then round to integer type T.
181 // It's not assumed that long double is any particular size; it is to be nice to
182 // users who use floating point values where integers are expected. Some platforms
183 // may be more accommodating with larger long doubles than others.
184 template <typename T, typename U = long double>
185 T parseFloatAsInteger(std::string_view s, SQLTypeInfo const& ti) {
186  // Use stack memory if s is small enough before resorting to dynamic memory.
187  constexpr size_t bufsize = 64;
188  char c_str[bufsize];
189  std::string str;
190  char const* str_begin;
191  char* str_end;
192  if (s.size() < bufsize) {
193  s.copy(c_str, s.size());
194  c_str[s.size()] = '\0';
195  str_begin = c_str;
196  } else {
197  str = s;
198  str_begin = str.c_str();
199  }
200  U value = strtold(str_begin, &str_end);
201  if (str_begin == str_end) {
202  throw std::runtime_error("Unable to parse " + std::string(s) + " to " +
203  ti.get_type_name());
204  } else if (str_begin + s.size() != str_end) {
205  throw std::runtime_error(std::string("Unexpected character \"") + *str_end +
206  "\" encountered in " + ti.get_type_name() + " value " +
207  std::string(s));
208  }
209  value = std::round(value);
210  if (!std::isfinite(value)) {
211  throw std::runtime_error("Invalid conversion from \"" + std::string(s) + "\" to " +
212  ti.get_type_name());
213  } else if (value < static_cast<U>(std::numeric_limits<T>::min()) ||
214  static_cast<U>(std::numeric_limits<T>::max()) < value) {
215  throw std::runtime_error("Integer " + std::string(s) + " is out of range for " +
216  ti.get_type_name());
217  }
218  return static_cast<T>(value);
219 }
220 
221 // String ends in either "." or ".0".
222 inline bool hasCommonSuffix(char const* const ptr, char const* const end) {
223  return *ptr == '.' && (ptr + 1 == end || (ptr[1] == '0' && ptr + 2 == end));
224 }
225 
226 template <typename T>
227 T parseInteger(std::string_view s, SQLTypeInfo const& ti) {
228  T retval{0};
229  char const* const end = s.data() + s.size();
230  auto [ptr, error_code] = std::from_chars(s.data(), end, retval);
231  if (ptr != end) {
232  if (error_code != std::errc() || !hasCommonSuffix(ptr, end)) {
233  retval = parseFloatAsInteger<T>(s, ti);
234  }
235  } else if (error_code != std::errc()) {
236  if (error_code == std::errc::result_out_of_range) {
237  throw std::runtime_error("Integer " + std::string(s) + " is out of range for " +
238  ti.get_type_name());
239  }
240  throw std::runtime_error("Invalid conversion from \"" + std::string(s) + "\" to " +
241  ti.get_type_name());
242  }
243  // Bounds checking based on SQLTypeInfo.
244  unsigned const fieldsize =
245  ti.get_compression() == kENCODING_FIXED ? ti.get_comp_param() : 8 * sizeof(T);
246  if (fieldsize < 8 * sizeof(T)) {
247  if (maxValue<T>(fieldsize) < retval) {
248  throw std::runtime_error("Integer " + std::string(s) +
249  " exceeds maximum value for " + toString(ti, fieldsize));
250  } else if (ti.get_notnull()) {
251  if (retval < minValue<T>(fieldsize)) {
252  throw std::runtime_error("Integer " + std::string(s) +
253  " exceeds minimum value for " + toString(ti, fieldsize));
254  }
255  } else {
256  if (retval <= minValue<T>(fieldsize)) {
257  throw std::runtime_error("Integer " + std::string(s) +
258  " exceeds minimum value for nullable " +
259  toString(ti, fieldsize));
260  }
261  }
262  } else if (!ti.get_notnull() && retval == std::numeric_limits<T>::min()) {
263  throw std::runtime_error("Integer " + std::string(s) +
264  " exceeds minimum value for nullable " +
265  toString(ti, fieldsize));
266  }
267  return retval;
268 }
269 
270 } // namespace
271 
272 /*
273  * @brief convert string to a datum
274  */
275 Datum StringToDatum(std::string_view s, SQLTypeInfo& ti) {
276  Datum d;
277  try {
278  switch (ti.get_type()) {
279  case kARRAY:
280  case kCOLUMN:
281  case kCOLUMN_LIST:
282  break;
283  case kBOOLEAN:
284  if (s == "t" || s == "T" || s == "1" || to_upper(std::string(s)) == "TRUE") {
285  d.boolval = true;
286  } else if (s == "f" || s == "F" || s == "0" ||
287  to_upper(std::string(s)) == "FALSE") {
288  d.boolval = false;
289  } else {
290  throw std::runtime_error("Invalid string for boolean " + std::string(s));
291  }
292  break;
293  case kNUMERIC:
294  case kDECIMAL:
295  d.bigintval = parse_numeric(s, ti);
296  break;
297  case kBIGINT:
298  d.bigintval = parseInteger<int64_t>(s, ti);
299  break;
300  case kINT:
301  d.intval = parseInteger<int32_t>(s, ti);
302  break;
303  case kSMALLINT:
304  d.smallintval = parseInteger<int16_t>(s, ti);
305  break;
306  case kTINYINT:
307  d.tinyintval = parseInteger<int8_t>(s, ti);
308  break;
309  case kFLOAT:
310  d.floatval = std::stof(std::string(s));
311  break;
312  case kDOUBLE:
313  d.doubleval = std::stod(std::string(s));
314  break;
315  case kTIME:
316  d.bigintval = dateTimeParse<kTIME>(s, ti.get_dimension());
317  break;
318  case kTIMESTAMP:
319  d.bigintval = dateTimeParse<kTIMESTAMP>(s, ti.get_dimension());
320  break;
321  case kDATE:
322  d.bigintval = dateTimeParse<kDATE>(s, ti.get_dimension());
323  break;
324  case kPOINT:
325  case kLINESTRING:
326  case kPOLYGON:
327  case kMULTIPOLYGON:
328  throw std::runtime_error("Internal error: geometry type in StringToDatum.");
329  default:
330  throw std::runtime_error("Internal error: invalid type in StringToDatum: " +
331  ti.get_type_name());
332  }
333  } catch (const std::invalid_argument&) {
334  throw std::runtime_error("Invalid conversion from string to " + ti.get_type_name());
335  } catch (const std::out_of_range&) {
336  throw std::runtime_error("Got out of range error during conversion from string to " +
337  ti.get_type_name());
338  }
339  return d;
340 }
341 
342 bool DatumEqual(const Datum a, const Datum b, const SQLTypeInfo& ti) {
343  switch (ti.get_type()) {
344  case kBOOLEAN:
345  return a.boolval == b.boolval;
346  case kBIGINT:
347  case kNUMERIC:
348  case kDECIMAL:
349  return a.bigintval == b.bigintval;
350  case kINT:
351  return a.intval == b.intval;
352  case kSMALLINT:
353  return a.smallintval == b.smallintval;
354  case kTINYINT:
355  return a.tinyintval == b.tinyintval;
356  case kFLOAT:
357  return a.floatval == b.floatval;
358  case kDOUBLE:
359  return a.doubleval == b.doubleval;
360  case kTIME:
361  case kTIMESTAMP:
362  case kDATE:
363  case kINTERVAL_DAY_TIME:
365  return a.bigintval == b.bigintval;
366  case kTEXT:
367  case kVARCHAR:
368  case kCHAR:
369  case kPOINT:
370  case kLINESTRING:
371  case kPOLYGON:
372  case kMULTIPOLYGON:
373  if (ti.get_compression() == kENCODING_DICT) {
374  return a.intval == b.intval;
375  }
376  if (a.stringval == nullptr && b.stringval == nullptr) {
377  return true;
378  }
379  if (a.stringval == nullptr || b.stringval == nullptr) {
380  return false;
381  }
382  return *a.stringval == *b.stringval;
383  default:
384  return false;
385  }
386  return false;
387 }
388 
389 /*
390  * @brief convert datum to string
391  */
392 std::string DatumToString(Datum d, const SQLTypeInfo& ti) {
393  constexpr size_t buf_size = 64;
394  char buf[buf_size]; // Hold "2000-03-01 12:34:56.123456789" and large years.
395  switch (ti.get_type()) {
396  case kBOOLEAN:
397  if (d.boolval) {
398  return "t";
399  }
400  return "f";
401  case kNUMERIC:
402  case kDECIMAL: {
403  double v = (double)d.bigintval / pow(10, ti.get_scale());
404  int size = snprintf(buf, buf_size, "%*.*f", ti.get_dimension(), ti.get_scale(), v);
405  CHECK_LE(0, size) << v << ' ' << ti.to_string();
406  CHECK_LT(size_t(size), buf_size) << v << ' ' << ti.to_string();
407  return buf;
408  }
409  case kINT:
410  return std::to_string(d.intval);
411  case kSMALLINT:
412  return std::to_string(d.smallintval);
413  case kTINYINT:
414  return std::to_string(d.tinyintval);
415  case kBIGINT:
416  return std::to_string(d.bigintval);
417  case kFLOAT:
418  return std::to_string(d.floatval);
419  case kDOUBLE:
420  return std::to_string(d.doubleval);
421  case kTIME: {
422  size_t const len = shared::formatHMS(buf, buf_size, d.bigintval);
423  CHECK_EQ(8u, len); // 8 == strlen("HH:MM:SS")
424  return buf;
425  }
426  case kTIMESTAMP: {
427  unsigned const dim = ti.get_dimension(); // assumes dim <= 9
428  size_t const len = shared::formatDateTime(buf, buf_size, d.bigintval, dim);
429  CHECK_LE(19u + bool(dim) + dim, len); // 19 = strlen("YYYY-MM-DD HH:MM:SS")
430  return buf;
431  }
432  case kDATE: {
433  size_t const len = shared::formatDate(buf, buf_size, d.bigintval);
434  CHECK_LE(10u, len); // 10 == strlen("YYYY-MM-DD")
435  return buf;
436  }
437  case kINTERVAL_DAY_TIME:
438  return std::to_string(d.bigintval) + " ms (day-time interval)";
440  return std::to_string(d.bigintval) + " month(s) (year-month interval)";
441  case kTEXT:
442  case kVARCHAR:
443  case kCHAR:
444  if (d.stringval == nullptr) {
445  return "NULL";
446  }
447  return *d.stringval;
448  default:
449  throw std::runtime_error("Internal error: invalid type " + ti.get_type_name() +
450  " in DatumToString.");
451  }
452  return "";
453 }
454 
456  switch (ti.get_size()) {
457  case 1:
458  return kTINYINT;
459  case 2:
460  return kSMALLINT;
461  case 4:
462  return kINT;
463  case 8:
464  return kBIGINT;
465  default:
466  CHECK(false);
467  }
468  return kNULLT;
469 }
470 
471 // Return decimal_value * 10^dscale
472 // where dscale = new_type_info.get_scale() - type_info.get_scale()
473 int64_t convert_decimal_value_to_scale(const int64_t decimal_value,
474  const SQLTypeInfo& type_info,
475  const SQLTypeInfo& new_type_info) {
476  int const dscale = new_type_info.get_scale() - type_info.get_scale();
477  return convert_decimal_value_to_scale_internal(decimal_value, dscale);
478 }
int8_t tinyintval
Definition: sqltypes.h:212
#define CHECK_EQ(x, y)
Definition: Logger.h:217
HOST DEVICE int get_size() const
Definition: sqltypes.h:339
std::string DatumToString(Datum d, const SQLTypeInfo &ti)
Definition: Datum.cpp:392
std::string toString(const ExtArgumentType &sig_type)
Definition: sqltypes.h:49
T parseInteger(std::string_view s, SQLTypeInfo const &ti)
Definition: Datum.cpp:227
T maxValue(unsigned const fieldsize)
Definition: Datum.cpp:171
SQLTypes
Definition: sqltypes.h:38
int64_t parse_numeric(const std::string_view s, SQLTypeInfo &ti)
Definition: Datum.cpp:108
HOST DEVICE int get_scale() const
Definition: sqltypes.h:334
int8_t boolval
Definition: sqltypes.h:211
size_t formatDateTime(char *buf, size_t const max, int64_t const timestamp, int const dimension)
Definition: misc.cpp:43
Constants for Builtin SQL Types supported by OmniSci.
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:329
int32_t intval
Definition: sqltypes.h:214
std::string to_string(char const *&&v)
size_t formatHMS(char *buf, size_t const max, int64_t const unixtime)
Definition: misc.cpp:80
constexpr double a
Definition: Utm.h:38
float floatval
Definition: sqltypes.h:216
std::string to_string() const
Definition: sqltypes.h:472
T minValue(unsigned const fieldsize)
Definition: Datum.cpp:165
bool DatumEqual(const Datum a, const Datum b, const SQLTypeInfo &ti)
Definition: Datum.cpp:342
static std::string type_name[kSQLTYPE_LAST]
Definition: sqltypes.h:893
void set_scale(int s)
Definition: sqltypes.h:424
int64_t bigintval
Definition: sqltypes.h:215
int16_t smallintval
Definition: sqltypes.h:213
Datum StringToDatum(std::string_view s, SQLTypeInfo &ti)
Definition: Datum.cpp:275
std::string * stringval
Definition: sqltypes.h:220
std::string to_upper(const std::string &str)
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:455
size_t formatDate(char *buf, size_t const max, int64_t const unixtime)
Definition: misc.cpp:25
#define CHECK_LT(x, y)
Definition: Logger.h:219
Definition: sqltypes.h:52
Definition: sqltypes.h:53
static std::string comp_name[kENCODING_LAST]
Definition: sqltypes.h:894
#define CHECK_LE(x, y)
Definition: Logger.h:220
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:337
int64_t convert_decimal_value_to_scale(const int64_t decimal_value, const SQLTypeInfo &type_info, const SQLTypeInfo &new_type_info)
Definition: Datum.cpp:473
void set_dimension(int d)
Definition: sqltypes.h:421
HOST DEVICE int get_dimension() const
Definition: sqltypes.h:331
std::string get_type_name() const
Definition: sqltypes.h:432
Definition: sqltypes.h:41
HOST DEVICE int get_comp_param() const
Definition: sqltypes.h:338
void set_notnull(bool n)
Definition: sqltypes.h:426
#define CHECK(condition)
Definition: Logger.h:209
int64_t convert_decimal_value_to_scale_internal(const int64_t decimal_value, int const dscale)
Definition: Datum.cpp:77
Definition: sqltypes.h:45
bool hasCommonSuffix(char const *const ptr, char const *const end)
Definition: Datum.cpp:222
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:336
T parseFloatAsInteger(std::string_view s, SQLTypeInfo const &ti)
Definition: Datum.cpp:185
double doubleval
Definition: sqltypes.h:217