OmniSciDB  c07336695a
Datum.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
25 #ifndef __STDC_FORMAT_MACROS
26 #define __STDC_FORMAT_MACROS
27 #endif
28 #include <cinttypes>
29 
30 #include <cassert>
31 #include <cmath>
32 #include <cstdio>
33 #include <cstdlib>
34 #include <stdexcept>
35 #include <string>
36 #include "Logger.h"
37 #include "StringTransform.h"
38 
39 #include "DateConverters.h"
40 #include "TimeGM.h"
41 #include "sqltypes.h"
42 
43 int64_t parse_numeric(const std::string& s, SQLTypeInfo& ti) {
44  assert(s.length() <= 20);
45  size_t dot = s.find_first_of('.', 0);
46  std::string before_dot;
47  std::string after_dot;
48  if (dot != std::string::npos) {
49  // make .99 as 0.99, or std::stoll below throws exception 'std::invalid_argument'
50  before_dot = (0 == dot) ? "0" : s.substr(0, dot);
51  after_dot = s.substr(dot + 1);
52  } else {
53  before_dot = s;
54  after_dot = "0";
55  }
56  const bool is_negative = before_dot.find_first_of('-', 0) != std::string::npos;
57  const int64_t sign = is_negative ? -1 : 1;
58  int64_t result;
59  result = std::abs(std::stoll(before_dot));
60  int64_t fraction = 0;
61  const size_t before_dot_digits = before_dot.length() - (is_negative ? 1 : 0);
62  if (!after_dot.empty()) {
63  fraction = std::stoll(after_dot);
64  }
65  if (ti.get_dimension() == 0) {
66  // set the type info based on the literal string
67  ti.set_scale(after_dot.length());
68  ti.set_dimension(before_dot_digits + ti.get_scale());
69  ti.set_notnull(false);
70  } else {
71  if (before_dot_digits + ti.get_scale() > static_cast<size_t>(ti.get_dimension())) {
72  throw std::runtime_error("numeric value " + s +
73  " exceeds the maximum precision of " +
75  }
76  for (ssize_t i = 0; i < static_cast<ssize_t>(after_dot.length()) - ti.get_scale();
77  i++) {
78  fraction /= 10; // truncate the digits after decimal point.
79  }
80  }
81  // the following loop can be made more efficient if needed
82  for (int i = 0; i < ti.get_scale(); i++) {
83  result *= 10;
84  }
85  if (result < 0) {
86  result -= fraction;
87  } else {
88  result += fraction;
89  }
90  return result * sign;
91 }
92 
93 /*
94  * @brief convert string to a datum
95  */
96 Datum StringToDatum(const std::string& s, SQLTypeInfo& ti) {
97  Datum d;
98  try {
99  switch (ti.get_type()) {
100  case kARRAY:
101  break;
102  case kBOOLEAN:
103  if (s == "t" || s == "T" || s == "1" || to_upper(s) == "TRUE") {
104  d.boolval = true;
105  } else if (s == "f" || s == "F" || s == "0" || to_upper(s) == "FALSE") {
106  d.boolval = false;
107  } else {
108  throw std::runtime_error("Invalid string for boolean " + s);
109  }
110  break;
111  case kNUMERIC:
112  case kDECIMAL:
113  d.bigintval = parse_numeric(s, ti);
114  break;
115  case kBIGINT:
116  d.bigintval = std::stoll(s);
117  break;
118  case kINT:
119  d.intval = std::stoi(s);
120  break;
121  case kSMALLINT:
122  d.smallintval = std::stoi(s);
123  break;
124  case kTINYINT:
125  d.tinyintval = std::stoi(s);
126  break;
127  case kFLOAT:
128  d.floatval = std::stof(s);
129  break;
130  case kDOUBLE:
131  d.doubleval = std::stod(s);
132  break;
133  case kTIME: {
134  // @TODO handle fractional seconds
135  std::tm tm_struct = {0};
136  if (!strptime(s.c_str(), "%T %z", &tm_struct) &&
137  !strptime(s.c_str(), "%T", &tm_struct) &&
138  !strptime(s.c_str(), "%H%M%S", &tm_struct) &&
139  !strptime(s.c_str(), "%R", &tm_struct)) {
140  throw std::runtime_error("Invalid time string " + s);
141  }
142  tm_struct.tm_mday = 1;
143  tm_struct.tm_mon = 0;
144  tm_struct.tm_year = 70;
145  tm_struct.tm_wday = tm_struct.tm_yday = tm_struct.tm_isdst = tm_struct.tm_gmtoff =
146  0;
147  d.bigintval = static_cast<int64_t>(TimeGM::instance().my_timegm(&tm_struct));
148  break;
149  }
150  case kTIMESTAMP: {
151  std::tm tm_struct = {0};
152  // not sure in advance if it is used so need to zero before processing
153  tm_struct.tm_gmtoff = 0;
154  char* tp;
155  // try ISO8601 date first
156  tp = strptime(s.c_str(), "%Y-%m-%d", &tm_struct);
157  if (!tp) {
158  tp = strptime(s.c_str(), "%m/%d/%Y", &tm_struct); // accept American date
159  }
160  if (!tp) {
161  tp = strptime(s.c_str(), "%d-%b-%y", &tm_struct); // accept 03-Sep-15
162  }
163  if (!tp) {
164  tp = strptime(s.c_str(), "%d/%b/%Y", &tm_struct); // accept 03/Sep/2015
165  }
166  if (!tp) {
167  try {
168  d.bigintval = static_cast<int64_t>(std::stoll(s));
169  break;
170  } catch (const std::invalid_argument& ia) {
171  throw std::runtime_error("Invalid timestamp string " + s);
172  }
173  }
174  if (*tp == 'T' || *tp == ' ' || *tp == ':') {
175  tp++;
176  } else {
177  throw std::runtime_error("Invalid timestamp break string " + s);
178  }
179  // now parse the time
180  char* p = strptime(tp, "%T %z", &tm_struct);
181  if (!p) {
182  p = strptime(tp, "%T", &tm_struct);
183  }
184  if (!p) {
185  p = strptime(tp, "%H%M%S", &tm_struct);
186  }
187  if (!p) {
188  p = strptime(tp, "%R", &tm_struct);
189  }
190  if (!p) {
191  // check for weird customer format
192  // remove decimal seconds from string if there is a period followed by a number
193  char* startptr = nullptr;
194  char* endptr;
195  // find last decimal in string
196  int loop = strlen(tp);
197  while (loop > 0) {
198  if (tp[loop] == '.') {
199  // found last period
200  startptr = &tp[loop];
201  break;
202  }
203  loop--;
204  }
205  if (startptr) {
206  // look for space
207  endptr = strchr(startptr, ' ');
208  if (endptr) {
209  // ok we found a start and and end
210  // remove the decimal portion
211  // will need to capture this for later
212  memmove(startptr, endptr, strlen(endptr) + 1);
213  }
214  }
215  p = strptime(
216  tp, "%I . %M . %S %p", &tm_struct); // customers weird '.' separated date
217  }
218  if (!p) {
219  throw std::runtime_error("Invalid timestamp time string " + s);
220  }
221  tm_struct.tm_wday = tm_struct.tm_yday = tm_struct.tm_isdst = 0;
222  // handle fractional seconds
223  if (ti.get_dimension() > 0) { // check for precision
224  time_t fsc;
225  if (*p == '.') {
226  p++;
227  uint64_t frac_num = 0;
228  int ntotal = 0;
229  sscanf(p, "%" SCNu64 "%n", &frac_num, &ntotal);
230  fsc = TimeGM::instance().parse_fractional_seconds(frac_num, ntotal, ti);
231  } else if (*p == '\0') {
232  fsc = 0;
233  } else { // check for misleading/unclear syntax
234  throw std::runtime_error("Unclear syntax for leading fractional seconds: " +
235  std::string(p));
236  }
237  d.bigintval =
238  static_cast<int64_t>(TimeGM::instance().my_timegm(&tm_struct, fsc, ti));
239  } else { // default timestamp(0) precision
240  d.bigintval = static_cast<int64_t>(TimeGM::instance().my_timegm(&tm_struct));
241  if (*p == '.') {
242  p++;
243  }
244  }
245  if (*p != '\0') {
246  uint32_t hour = 0;
247  sscanf(tp, "%u", &hour);
248  d.bigintval = static_cast<int64_t>(TimeGM::instance().parse_meridians(
249  static_cast<time_t>(d.bigintval), p, hour, ti));
250  break;
251  }
252  break;
253  }
254  case kDATE: {
255  std::tm tm_struct = {0};
256  // not sure in advance if it is used so need to zero before processing
257  tm_struct.tm_gmtoff = 0;
258  char* tp;
259  // try ISO8601 date first
260  tp = strptime(s.c_str(), "%Y-%m-%d", &tm_struct);
261  if (!tp) {
262  tp = strptime(s.c_str(), "%m/%d/%Y", &tm_struct); // accept American date
263  }
264  if (!tp) {
265  tp = strptime(s.c_str(), "%d-%b-%y", &tm_struct); // accept 03-Sep-15
266  }
267  if (!tp) {
268  tp = strptime(s.c_str(), "%d/%b/%Y", &tm_struct); // accept 03/Sep/2015
269  }
270  if (!tp) {
271  try {
272  d.bigintval = static_cast<int64_t>(std::stoll(s));
273  break;
274  } catch (const std::invalid_argument& ia) {
275  throw std::runtime_error("Invalid date string " + s);
276  }
277  }
278  tm_struct.tm_sec = tm_struct.tm_min = tm_struct.tm_hour = 0;
279  tm_struct.tm_wday = tm_struct.tm_yday = tm_struct.tm_isdst = tm_struct.tm_gmtoff =
280  0;
281  d.bigintval = static_cast<int64_t>(TimeGM::instance().my_timegm(&tm_struct));
282  break;
283  }
284  case kPOINT:
285  case kLINESTRING:
286  case kPOLYGON:
287  case kMULTIPOLYGON:
288  throw std::runtime_error("Internal error: geometry type in StringToDatum.");
289  default:
290  throw std::runtime_error("Internal error: invalid type in StringToDatum.");
291  }
292  } catch (const std::invalid_argument&) {
293  throw std::runtime_error("Invalid conversion from string to " + ti.get_type_name());
294  } catch (const std::out_of_range&) {
295  throw std::runtime_error("Got out of range error during conversion from string to " +
296  ti.get_type_name());
297  }
298  return d;
299 }
300 
301 bool DatumEqual(const Datum a, const Datum b, const SQLTypeInfo& ti) {
302  switch (ti.get_type()) {
303  case kBOOLEAN:
304  return a.boolval == b.boolval;
305  case kBIGINT:
306  case kNUMERIC:
307  case kDECIMAL:
308  return a.bigintval == b.bigintval;
309  case kINT:
310  return a.intval == b.intval;
311  case kSMALLINT:
312  return a.smallintval == b.smallintval;
313  case kTINYINT:
314  return a.tinyintval == b.tinyintval;
315  case kFLOAT:
316  return a.floatval == b.floatval;
317  case kDOUBLE:
318  return a.doubleval == b.doubleval;
319  case kTIME:
320  case kTIMESTAMP:
321  case kDATE:
322  case kINTERVAL_DAY_TIME:
324  return a.bigintval == b.bigintval;
325  case kTEXT:
326  case kVARCHAR:
327  case kCHAR:
328  if (ti.get_compression() == kENCODING_DICT) {
329  return a.intval == b.intval;
330  }
331  return *a.stringval == *b.stringval;
332  default:
333  return false;
334  }
335  return false;
336 }
337 
338 /*
339  * @brief convert datum to string
340  */
341 std::string DatumToString(Datum d, const SQLTypeInfo& ti) {
342  switch (ti.get_type()) {
343  case kBOOLEAN:
344  if (d.boolval) {
345  return "t";
346  }
347  return "f";
348  case kNUMERIC:
349  case kDECIMAL: {
350  char str[ti.get_dimension() + 1];
351  double v = (double)d.bigintval / pow(10, ti.get_scale());
352  sprintf(str, "%*.*f", ti.get_dimension(), ti.get_scale(), v);
353  return std::string(str);
354  }
355  case kINT:
356  return std::to_string(d.intval);
357  case kSMALLINT:
358  return std::to_string(d.smallintval);
359  case kTINYINT:
360  return std::to_string(d.tinyintval);
361  case kBIGINT:
362  return std::to_string(d.bigintval);
363  case kFLOAT:
364  return std::to_string(d.floatval);
365  case kDOUBLE:
366  return std::to_string(d.doubleval);
367  case kTIME: {
368  std::tm tm_struct;
369  gmtime_r(reinterpret_cast<time_t*>(&d.bigintval), &tm_struct);
370  char buf[9];
371  strftime(buf, 9, "%T", &tm_struct);
372  return std::string(buf);
373  }
374  case kTIMESTAMP: {
375  std::tm tm_struct{0};
376  if (ti.get_dimension() > 0) {
377  std::string t = std::to_string(d.bigintval);
378  int cp = t.length() - ti.get_dimension();
379  time_t sec = std::stoll(t.substr(0, cp));
380  t = t.substr(cp);
381  gmtime_r(&sec, &tm_struct);
382  char buf[21];
383  strftime(buf, 21, "%F %T.", &tm_struct);
384  return std::string(buf) += t;
385  } else {
386  time_t sec = static_cast<time_t>(d.bigintval);
387  gmtime_r(&sec, &tm_struct);
388  char buf[20];
389  strftime(buf, 20, "%F %T", &tm_struct);
390  return std::string(buf);
391  }
392  }
393  case kDATE: {
394  std::tm tm_struct;
395  time_t ntimeval = static_cast<time_t>(d.bigintval);
396  gmtime_r(&ntimeval, &tm_struct);
397  char buf[11];
398  strftime(buf, 11, "%F", &tm_struct);
399  return std::string(buf);
400  }
401  case kINTERVAL_DAY_TIME:
402  return std::to_string(d.bigintval) + " ms (day-time interval)";
404  return std::to_string(d.bigintval) + " month(s) (year-month interval)";
405  case kTEXT:
406  case kVARCHAR:
407  case kCHAR:
408  return *d.stringval;
409  default:
410  throw std::runtime_error("Internal error: invalid type " + ti.get_type_name() +
411  " in DatumToString.");
412  }
413  return "";
414 }
415 
417  switch (ti.get_size()) {
418  case 1:
419  return kTINYINT;
420  case 2:
421  return kSMALLINT;
422  case 4:
423  return kINT;
424  case 8:
425  return kBIGINT;
426  default:
427  CHECK(false);
428  }
429  return kNULLT;
430 }
431 
432 int64_t convert_decimal_value_to_scale(const int64_t decimal_value,
433  const SQLTypeInfo& type_info,
434  const SQLTypeInfo& new_type_info) {
435  auto converted_decimal_value = decimal_value;
436  if (new_type_info.get_scale() > type_info.get_scale()) {
437  for (int i = 0; i < new_type_info.get_scale() - type_info.get_scale(); i++) {
438  converted_decimal_value *= 10;
439  }
440  } else if (new_type_info.get_scale() < type_info.get_scale()) {
441  for (int i = 0; i < type_info.get_scale() - new_type_info.get_scale(); i++) {
442  if (converted_decimal_value > 0) {
443  converted_decimal_value = (converted_decimal_value + 5) / 10;
444  } else {
445  converted_decimal_value = (converted_decimal_value - 5) / 10;
446  }
447  }
448  }
449  return converted_decimal_value;
450 }
int8_t tinyintval
Definition: sqltypes.h:123
void d(const SQLTypes expected_type, const std::string &str)
Definition: ImportTest.cpp:268
std::string DatumToString(Datum d, const SQLTypeInfo &ti)
Definition: Datum.cpp:341
HOST DEVICE int get_size() const
Definition: sqltypes.h:329
HOST DEVICE int get_dimension() const
Definition: sqltypes.h:321
Definition: sqltypes.h:51
SQLTypes
Definition: sqltypes.h:40
bool boolval
Definition: sqltypes.h:122
time_t parse_meridians(const time_t &timeval, const char *p, const uint32_t &hour, const SQLTypeInfo &ti)
Definition: timegm.cpp:68
HOST DEVICE int get_scale() const
Definition: sqltypes.h:324
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:319
Constants for Builtin SQL Types supported by MapD.
void set_dimension(int d)
Definition: sqltypes.h:411
void set_scale(int s)
Definition: sqltypes.h:414
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:327
int32_t intval
Definition: sqltypes.h:125
std::string to_string(char const *&&v)
void set_notnull(bool n)
Definition: sqltypes.h:416
int64_t parse_numeric(const std::string &s, SQLTypeInfo &ti)
Definition: Datum.cpp:43
std::string get_type_name() const
Definition: sqltypes.h:422
float floatval
Definition: sqltypes.h:127
T v(const TargetValue &r)
time_t my_timegm(const struct tm *tm)
Definition: timegm.cpp:111
bool DatumEqual(const Datum a, const Datum b, const SQLTypeInfo &ti)
Definition: Datum.cpp:301
int64_t bigintval
Definition: sqltypes.h:126
int16_t smallintval
Definition: sqltypes.h:124
time_t parse_fractional_seconds(uint64_t sfrac, const int ntotal, const SQLTypeInfo &ti)
Definition: timegm.cpp:52
std::string * stringval
Definition: sqltypes.h:131
std::string to_upper(const std::string &str)
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:416
Definition: sqltypes.h:54
Definition: sqltypes.h:55
int64_t convert_decimal_value_to_scale(const int64_t decimal_value, const SQLTypeInfo &type_info, const SQLTypeInfo &new_type_info)
Definition: Datum.cpp:432
Datum StringToDatum(const std::string &s, SQLTypeInfo &ti)
Definition: Datum.cpp:96
Definition: sqltypes.h:43
#define CHECK(condition)
Definition: Logger.h:187
Definition: sqltypes.h:47
static TimeGM & instance()
Definition: TimeGM.h:38
double doubleval
Definition: sqltypes.h:128