OmniSciDB  8fa3bf436f
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Datum.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2021 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
23 #include <algorithm>
24 #include <cassert>
25 #include <cctype>
26 #include <charconv>
27 #include <cmath>
28 #include <cstdio>
29 #include <cstdlib>
30 #include <limits>
31 #include <stdexcept>
32 #include <string>
33 
34 #include "DateConverters.h"
35 #include "DateTimeParser.h"
36 #include "Logger/Logger.h"
38 #include "StringTransform.h"
39 #include "misc.h"
40 #include "sqltypes.h"
41 
42 std::string SQLTypeInfo::type_name[kSQLTYPE_LAST] = {"NULL",
43  "BOOLEAN",
44  "CHAR",
45  "VARCHAR",
46  "NUMERIC",
47  "DECIMAL",
48  "INTEGER",
49  "SMALLINT",
50  "FLOAT",
51  "DOUBLE",
52  "TIME",
53  "TIMESTAMP",
54  "BIGINT",
55  "TEXT",
56  "DATE",
57  "ARRAY",
58  "INTERVAL_DAY_TIME",
59  "INTERVAL_YEAR_MONTH",
60  "POINT",
61  "LINESTRING",
62  "POLYGON",
63  "MULTIPOLYGON",
64  "TINYINT",
65  "GEOMETRY",
66  "GEOGRAPHY",
67  "EVAL_CONTEXT_TYPE",
68  "VOID",
69  "CURSOR",
70  "COLUMN",
71  "COLUMN_LIST"};
73  {"NONE", "FIXED", "RL", "DIFF", "DICT", "SPARSE", "COMPRESSED", "DAYS"};
74 
75 int64_t parse_numeric(const std::string_view s, SQLTypeInfo& ti) {
76  assert(s.length() <= 20);
77  size_t dot = s.find_first_of('.', 0);
78  std::string before_dot;
79  std::string after_dot;
80  if (dot != std::string::npos) {
81  // make .99 as 0.99, or std::stoll below throws exception 'std::invalid_argument'
82  before_dot = (0 == dot) ? "0" : s.substr(0, dot);
83  after_dot = s.substr(dot + 1);
84  } else {
85  before_dot = s;
86  after_dot = "0";
87  }
88  const bool is_negative = before_dot.find_first_of('-', 0) != std::string::npos;
89  const int64_t sign = is_negative ? -1 : 1;
90  int64_t result;
91  result = std::abs(std::stoll(before_dot));
92  int64_t fraction = 0;
93  const size_t before_dot_digits = before_dot.length() - (is_negative ? 1 : 0);
94  if (!after_dot.empty()) {
95  fraction = std::stoll(after_dot);
96  }
97  if (ti.get_dimension() == 0) {
98  // set the type info based on the literal string
99  ti.set_scale(static_cast<int>(after_dot.length()));
100  ti.set_dimension(static_cast<int>(before_dot_digits + ti.get_scale()));
101  ti.set_notnull(false);
102  } else {
103  CHECK_GE(ti.get_scale(), 0);
104  if (before_dot_digits + ti.get_scale() > static_cast<size_t>(ti.get_dimension())) {
105  throw std::runtime_error("numeric value " + std::string(s) +
106  " exceeds the maximum precision of " +
108  }
109  for (size_t i = static_cast<size_t>(ti.get_scale()); i < after_dot.length(); ++i) {
110  fraction /= 10; // truncate the digits after decimal point.
111  }
112  }
113  // the following loop can be made more efficient if needed
114  for (int i = 0; i < ti.get_scale(); i++) {
115  result *= 10;
116  }
117  if (result < 0) {
118  result -= fraction;
119  } else {
120  result += fraction;
121  }
122  return result * sign;
123 }
124 
125 namespace {
126 
127 // Equal to NULL value for nullable types.
128 template <typename T>
129 T minValue(unsigned const fieldsize) {
130  static_assert(std::is_signed_v<T>);
131  return T(-1) << (fieldsize - 1);
132 }
133 
134 template <typename T>
135 T maxValue(unsigned const fieldsize) {
136  return ~minValue<T>(fieldsize);
137 }
138 
139 std::string toString(SQLTypeInfo const& ti, unsigned const fieldsize) {
140  return ti.get_type_name() + '(' + std::to_string(fieldsize) + ')';
141 }
142 
143 // GCC 10 does not support std::from_chars w/ double, so strtold() is used instead.
144 // Convert s to long double then round to integer type T.
145 // It's not assumed that long double is any particular size; it is to be nice to
146 // users who use floating point values where integers are expected. Some platforms
147 // may be more accommodating with larger long doubles than others.
148 template <typename T, typename U = long double>
149 T parseFloatAsInteger(std::string_view s, SQLTypeInfo const& ti) {
150  // Use stack memory if s is small enough before resorting to dynamic memory.
151  constexpr size_t bufsize = 64;
152  char c_str[bufsize];
153  std::string str;
154  char const* str_begin;
155  char* str_end;
156  if (s.size() < bufsize) {
157  s.copy(c_str, s.size());
158  c_str[s.size()] = '\0';
159  str_begin = c_str;
160  } else {
161  str = s;
162  str_begin = str.c_str();
163  }
164  U value = strtold(str_begin, &str_end);
165  if (str_begin == str_end) {
166  throw std::runtime_error("Unable to parse " + std::string(s) + " to " +
167  ti.get_type_name());
168  } else if (str_begin + s.size() != str_end) {
169  throw std::runtime_error(std::string("Unexpected character \"") + *str_end +
170  "\" encountered in " + ti.get_type_name() + " value " +
171  std::string(s));
172  }
173  value = std::round(value);
174  if (!std::isfinite(value)) {
175  throw std::runtime_error("Invalid conversion from \"" + std::string(s) + "\" to " +
176  ti.get_type_name());
177  } else if (value < static_cast<U>(std::numeric_limits<T>::min()) ||
178  static_cast<U>(std::numeric_limits<T>::max()) < value) {
179  throw std::runtime_error("Integer " + std::string(s) + " is out of range for " +
180  ti.get_type_name());
181  }
182  return static_cast<T>(value);
183 }
184 
185 // String ends in either "." or ".0".
186 inline bool hasCommonSuffix(char const* const ptr, char const* const end) {
187  return *ptr == '.' && (ptr + 1 == end || (ptr[1] == '0' && ptr + 2 == end));
188 }
189 
190 template <typename T>
191 T parseInteger(std::string_view s, SQLTypeInfo const& ti) {
192  T retval{0};
193  char const* const end = s.data() + s.size();
194  auto [ptr, error_code] = std::from_chars(s.data(), end, retval);
195  if (ptr != end) {
196  if (error_code != std::errc() || !hasCommonSuffix(ptr, end)) {
197  retval = parseFloatAsInteger<T>(s, ti);
198  }
199  } else if (error_code != std::errc()) {
200  if (error_code == std::errc::result_out_of_range) {
201  throw std::runtime_error("Integer " + std::string(s) + " is out of range for " +
202  ti.get_type_name());
203  }
204  throw std::runtime_error("Invalid conversion from \"" + std::string(s) + "\" to " +
205  ti.get_type_name());
206  }
207  // Bounds checking based on SQLTypeInfo.
208  unsigned const fieldsize =
209  ti.get_compression() == kENCODING_FIXED ? ti.get_comp_param() : 8 * sizeof(T);
210  if (fieldsize < 8 * sizeof(T)) {
211  if (maxValue<T>(fieldsize) < retval) {
212  throw std::runtime_error("Integer " + std::string(s) +
213  " exceeds maximum value for " + toString(ti, fieldsize));
214  } else if (ti.get_notnull()) {
215  if (retval < minValue<T>(fieldsize)) {
216  throw std::runtime_error("Integer " + std::string(s) +
217  " exceeds minimum value for " + toString(ti, fieldsize));
218  }
219  } else {
220  if (retval <= minValue<T>(fieldsize)) {
221  throw std::runtime_error("Integer " + std::string(s) +
222  " exceeds minimum value for nullable " +
223  toString(ti, fieldsize));
224  }
225  }
226  } else if (!ti.get_notnull() && retval == std::numeric_limits<T>::min()) {
227  throw std::runtime_error("Integer " + std::string(s) +
228  " exceeds minimum value for nullable " +
229  toString(ti, fieldsize));
230  }
231  return retval;
232 }
233 
234 } // namespace
235 
236 /*
237  * @brief convert string to a datum
238  */
239 Datum StringToDatum(std::string_view s, SQLTypeInfo& ti) {
240  Datum d;
241  try {
242  switch (ti.get_type()) {
243  case kARRAY:
244  case kCOLUMN:
245  case kCOLUMN_LIST:
246  break;
247  case kBOOLEAN:
248  if (s == "t" || s == "T" || s == "1" || to_upper(std::string(s)) == "TRUE") {
249  d.boolval = true;
250  } else if (s == "f" || s == "F" || s == "0" ||
251  to_upper(std::string(s)) == "FALSE") {
252  d.boolval = false;
253  } else {
254  throw std::runtime_error("Invalid string for boolean " + std::string(s));
255  }
256  break;
257  case kNUMERIC:
258  case kDECIMAL:
259  d.bigintval = parse_numeric(s, ti);
260  break;
261  case kBIGINT:
262  d.bigintval = parseInteger<int64_t>(s, ti);
263  break;
264  case kINT:
265  d.intval = parseInteger<int32_t>(s, ti);
266  break;
267  case kSMALLINT:
268  d.smallintval = parseInteger<int16_t>(s, ti);
269  break;
270  case kTINYINT:
271  d.tinyintval = parseInteger<int8_t>(s, ti);
272  break;
273  case kFLOAT:
274  d.floatval = std::stof(std::string(s));
275  break;
276  case kDOUBLE:
277  d.doubleval = std::stod(std::string(s));
278  break;
279  case kTIME:
280  d.bigintval = dateTimeParse<kTIME>(s, ti.get_dimension());
281  break;
282  case kTIMESTAMP:
283  d.bigintval = dateTimeParse<kTIMESTAMP>(s, ti.get_dimension());
284  break;
285  case kDATE:
286  d.bigintval = dateTimeParse<kDATE>(s, ti.get_dimension());
287  break;
288  case kPOINT:
289  case kLINESTRING:
290  case kPOLYGON:
291  case kMULTIPOLYGON:
292  throw std::runtime_error("Internal error: geometry type in StringToDatum.");
293  default:
294  throw std::runtime_error("Internal error: invalid type in StringToDatum: " +
295  ti.get_type_name());
296  }
297  } catch (const std::invalid_argument&) {
298  throw std::runtime_error("Invalid conversion from string to " + ti.get_type_name());
299  } catch (const std::out_of_range&) {
300  throw std::runtime_error("Got out of range error during conversion from string to " +
301  ti.get_type_name());
302  }
303  return d;
304 }
305 
306 bool DatumEqual(const Datum a, const Datum b, const SQLTypeInfo& ti) {
307  switch (ti.get_type()) {
308  case kBOOLEAN:
309  return a.boolval == b.boolval;
310  case kBIGINT:
311  case kNUMERIC:
312  case kDECIMAL:
313  return a.bigintval == b.bigintval;
314  case kINT:
315  return a.intval == b.intval;
316  case kSMALLINT:
317  return a.smallintval == b.smallintval;
318  case kTINYINT:
319  return a.tinyintval == b.tinyintval;
320  case kFLOAT:
321  return a.floatval == b.floatval;
322  case kDOUBLE:
323  return a.doubleval == b.doubleval;
324  case kTIME:
325  case kTIMESTAMP:
326  case kDATE:
327  case kINTERVAL_DAY_TIME:
329  return a.bigintval == b.bigintval;
330  case kTEXT:
331  case kVARCHAR:
332  case kCHAR:
333  case kPOINT:
334  case kLINESTRING:
335  case kPOLYGON:
336  case kMULTIPOLYGON:
337  if (ti.get_compression() == kENCODING_DICT) {
338  return a.intval == b.intval;
339  }
340  if (a.stringval == nullptr && b.stringval == nullptr) {
341  return true;
342  }
343  if (a.stringval == nullptr || b.stringval == nullptr) {
344  return false;
345  }
346  return *a.stringval == *b.stringval;
347  default:
348  return false;
349  }
350  return false;
351 }
352 
353 /*
354  * @brief convert datum to string
355  */
356 std::string DatumToString(Datum d, const SQLTypeInfo& ti) {
357  constexpr size_t buf_size = 64;
358  char buf[buf_size]; // Hold "2000-03-01 12:34:56.123456789" and large years.
359  switch (ti.get_type()) {
360  case kBOOLEAN:
361  if (d.boolval) {
362  return "t";
363  }
364  return "f";
365  case kNUMERIC:
366  case kDECIMAL: {
367  double v = (double)d.bigintval / pow(10, ti.get_scale());
368  int size = snprintf(buf, buf_size, "%*.*f", ti.get_dimension(), ti.get_scale(), v);
369  CHECK_LE(0, size) << v << ' ' << ti.to_string();
370  CHECK_LT(size_t(size), buf_size) << v << ' ' << ti.to_string();
371  return buf;
372  }
373  case kINT:
374  return std::to_string(d.intval);
375  case kSMALLINT:
376  return std::to_string(d.smallintval);
377  case kTINYINT:
378  return std::to_string(d.tinyintval);
379  case kBIGINT:
380  return std::to_string(d.bigintval);
381  case kFLOAT:
382  return std::to_string(d.floatval);
383  case kDOUBLE:
384  return std::to_string(d.doubleval);
385  case kTIME: {
386  size_t const len = shared::formatHMS(buf, buf_size, d.bigintval);
387  CHECK_EQ(8u, len); // 8 == strlen("HH:MM:SS")
388  return buf;
389  }
390  case kTIMESTAMP: {
391  unsigned const dim = ti.get_dimension(); // assumes dim <= 9
392  size_t const len = shared::formatDateTime(buf, buf_size, d.bigintval, dim);
393  CHECK_LE(19u + bool(dim) + dim, len); // 19 = strlen("YYYY-MM-DD HH:MM:SS")
394  return buf;
395  }
396  case kDATE: {
397  size_t const len = shared::formatDate(buf, buf_size, d.bigintval);
398  CHECK_LE(10u, len); // 10 == strlen("YYYY-MM-DD")
399  return buf;
400  }
401  case kINTERVAL_DAY_TIME:
402  return std::to_string(d.bigintval) + " ms (day-time interval)";
404  return std::to_string(d.bigintval) + " month(s) (year-month interval)";
405  case kTEXT:
406  case kVARCHAR:
407  case kCHAR:
408  if (d.stringval == nullptr) {
409  return "NULL";
410  }
411  return *d.stringval;
412  default:
413  throw std::runtime_error("Internal error: invalid type " + ti.get_type_name() +
414  " in DatumToString.");
415  }
416  return "";
417 }
418 
420  switch (ti.get_size()) {
421  case 1:
422  return kTINYINT;
423  case 2:
424  return kSMALLINT;
425  case 4:
426  return kINT;
427  case 8:
428  return kBIGINT;
429  default:
430  CHECK(false);
431  }
432  return kNULLT;
433 }
434 
435 // Return decimal_value * 10^dscale
436 // where dscale = new_type_info.get_scale() - type_info.get_scale()
437 int64_t convert_decimal_value_to_scale(const int64_t decimal_value,
438  const SQLTypeInfo& type_info,
439  const SQLTypeInfo& new_type_info) {
440  constexpr int max_scale = std::numeric_limits<uint64_t>::digits10; // 19
441  constexpr auto pow10 = shared::powersOf<uint64_t, max_scale + 1>(10);
442  int const dscale = new_type_info.get_scale() - type_info.get_scale();
443  if (dscale < 0) {
444  if (dscale < -max_scale) {
445  return 0; // +/- 0.09223372036854775807 rounds to 0
446  }
447  uint64_t const u = std::abs(decimal_value);
448  uint64_t const pow = pow10[-dscale];
449  uint64_t div = u / pow;
450  uint64_t rem = u % pow;
451  div += pow / 2 <= rem;
452  return decimal_value < 0 ? -div : div;
453  } else if (dscale < max_scale) {
454  int64_t retval;
455  if (!__builtin_mul_overflow(decimal_value, pow10[dscale], &retval)) {
456  return retval;
457  }
458  }
459  if (decimal_value == 0) {
460  return 0;
461  }
462  throw std::runtime_error("Overflow in DECIMAL-to-DECIMAL conversion.");
463 }
int8_t tinyintval
Definition: sqltypes.h:206
#define CHECK_EQ(x, y)
Definition: Logger.h:211
HOST DEVICE int get_size() const
Definition: sqltypes.h:324
std::string DatumToString(Datum d, const SQLTypeInfo &ti)
Definition: Datum.cpp:356
std::string toString(const ExtArgumentType &sig_type)
Definition: sqltypes.h:48
T parseInteger(std::string_view s, SQLTypeInfo const &ti)
Definition: Datum.cpp:191
T maxValue(unsigned const fieldsize)
Definition: Datum.cpp:135
SQLTypes
Definition: sqltypes.h:37
tuple d
Definition: test_fsi.py:9
int64_t parse_numeric(const std::string_view s, SQLTypeInfo &ti)
Definition: Datum.cpp:75
bool boolval
Definition: sqltypes.h:205
HOST DEVICE int get_scale() const
Definition: sqltypes.h:319
#define CHECK_GE(x, y)
Definition: Logger.h:216
size_t formatDateTime(char *buf, size_t const max, int64_t const timestamp, int const dimension)
Definition: misc.cpp:43
Constants for Builtin SQL Types supported by OmniSci.
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:314
int32_t intval
Definition: sqltypes.h:208
std::string to_string(char const *&&v)
size_t formatHMS(char *buf, size_t const max, int64_t const unixtime)
Definition: misc.cpp:80
float floatval
Definition: sqltypes.h:210
std::string to_string() const
Definition: sqltypes.h:457
T minValue(unsigned const fieldsize)
Definition: Datum.cpp:129
bool DatumEqual(const Datum a, const Datum b, const SQLTypeInfo &ti)
Definition: Datum.cpp:306
static std::string type_name[kSQLTYPE_LAST]
Definition: sqltypes.h:756
void set_scale(int s)
Definition: sqltypes.h:409
int64_t bigintval
Definition: sqltypes.h:209
int16_t smallintval
Definition: sqltypes.h:207
Datum StringToDatum(std::string_view s, SQLTypeInfo &ti)
Definition: Datum.cpp:239
std::string * stringval
Definition: sqltypes.h:214
std::string to_upper(const std::string &str)
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:419
size_t formatDate(char *buf, size_t const max, int64_t const unixtime)
Definition: misc.cpp:25
#define CHECK_LT(x, y)
Definition: Logger.h:213
Definition: sqltypes.h:51
Definition: sqltypes.h:52
static std::string comp_name[kENCODING_LAST]
Definition: sqltypes.h:757
#define CHECK_LE(x, y)
Definition: Logger.h:214
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:322
int64_t convert_decimal_value_to_scale(const int64_t decimal_value, const SQLTypeInfo &type_info, const SQLTypeInfo &new_type_info)
Definition: Datum.cpp:437
void set_dimension(int d)
Definition: sqltypes.h:406
HOST DEVICE int get_dimension() const
Definition: sqltypes.h:316
std::string get_type_name() const
Definition: sqltypes.h:417
Definition: sqltypes.h:40
HOST DEVICE int get_comp_param() const
Definition: sqltypes.h:323
void set_notnull(bool n)
Definition: sqltypes.h:411
#define CHECK(condition)
Definition: Logger.h:203
Definition: sqltypes.h:44
bool hasCommonSuffix(char const *const ptr, char const *const end)
Definition: Datum.cpp:186
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:321
T parseFloatAsInteger(std::string_view s, SQLTypeInfo const &ti)
Definition: Datum.cpp:149
double doubleval
Definition: sqltypes.h:211