OmniSciDB  ab4938a6a3
sqltypes.h
Go to the documentation of this file.
1 /*
2  * Copyright 2020 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
23 #pragma once
24 
25 #include "StringTransform.h"
26 #include "funcannotations.h"
27 
28 #include <cassert>
29 #include <cfloat>
30 #include <cstdint>
31 #include <ctime>
32 #include <limits>
33 #include <memory>
34 #include <string>
35 #include <type_traits>
36 #include <vector>
37 
38 // must not change because these values persist in catalogs.
39 enum SQLTypes {
40  kNULLT = 0, // type for null values
41  kBOOLEAN = 1,
42  kCHAR = 2,
43  kVARCHAR = 3,
44  kNUMERIC = 4,
45  kDECIMAL = 5,
46  kINT = 6,
47  kSMALLINT = 7,
48  kFLOAT = 8,
49  kDOUBLE = 9,
50  kTIME = 10,
51  kTIMESTAMP = 11,
52  kBIGINT = 12,
53  kTEXT = 13,
54  kDATE = 14,
55  kARRAY = 15,
58  kPOINT = 18,
60  kPOLYGON = 20,
62  kTINYINT = 22,
63  kGEOMETRY = 23,
64  kGEOGRAPHY = 24,
65  kEVAL_CONTEXT_TYPE = 25, // Placeholder Type for ANY
66  kVOID = 26,
67  kCURSOR = 27,
69 };
70 
71 struct VarlenDatum {
72  size_t length;
73  int8_t* pointer;
74  bool is_null;
75 
76  DEVICE VarlenDatum() : length(0), pointer(nullptr), is_null(true) {}
77  DEVICE virtual ~VarlenDatum() {}
78 
79  VarlenDatum(const size_t l, int8_t* p, const bool n)
80  : length(l), pointer(p), is_null(n) {}
81 };
82 
84  void operator()(int8_t*) {}
85 };
86 struct FreeDeleter {
87  void operator()(int8_t* p) { free(p); }
88 };
89 
90 struct HostArrayDatum : public VarlenDatum {
91  using ManagedPtr = std::shared_ptr<int8_t>;
92 
93  HostArrayDatum() = default;
94 
95  HostArrayDatum(size_t const l, ManagedPtr p, bool const n)
96  : VarlenDatum(l, p.get(), n), data_ptr(p) {}
97 
98  HostArrayDatum(size_t const l, int8_t* p, bool const n)
99  : VarlenDatum(l, p, n), data_ptr(p, FreeDeleter()){};
100 
101  template <typename CUSTOM_DELETER,
102  typename = std::enable_if_t<
103  std::is_void<std::result_of_t<CUSTOM_DELETER(int8_t*)> >::value> >
104  HostArrayDatum(size_t const l, int8_t* p, CUSTOM_DELETER custom_deleter)
105  : VarlenDatum(l, p, 0 == l), data_ptr(p, custom_deleter) {}
106 
107  template <typename CUSTOM_DELETER,
108  typename = std::enable_if_t<
109  std::is_void<std::result_of_t<CUSTOM_DELETER(int8_t*)> >::value> >
110  HostArrayDatum(size_t const l, int8_t* p, bool const n, CUSTOM_DELETER custom_deleter)
111  : VarlenDatum(l, p, n), data_ptr(p, custom_deleter) {}
112 
114 };
115 
116 struct DeviceArrayDatum : public VarlenDatum {
118 };
119 
120 inline DEVICE constexpr bool is_cuda_compiler() {
121 #ifdef __CUDACC__
122  return true;
123 #else
124  return false;
125 #endif
126 }
127 
128 using ArrayDatum =
129  std::conditional_t<is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum>;
130 
131 union Datum {
132  bool boolval;
133  int8_t tinyintval;
134  int16_t smallintval;
135  int32_t intval;
136  int64_t bigintval;
137  float floatval;
138  double doubleval;
140 #ifndef __CUDACC__
141  std::string* stringval; // string value
142 #endif
143 };
144 
145 #ifndef __CUDACC__
147  int8_t* numbersPtr;
148  std::vector<std::string>* stringsPtr;
149  std::vector<ArrayDatum>* arraysPtr;
150 };
151 #endif
152 
153 // must not change because these values persist in catalogs.
155  kENCODING_NONE = 0, // no encoding
156  kENCODING_FIXED = 1, // Fixed-bit encoding
157  kENCODING_RL = 2, // Run Length encoding
158  kENCODING_DIFF = 3, // Differential encoding
159  kENCODING_DICT = 4, // Dictionary encoding
160  kENCODING_SPARSE = 5, // Null encoding for sparse columns
161  kENCODING_GEOINT = 6, // Encoding coordinates as intergers
162  kENCODING_DATE_IN_DAYS = 7, // Date encoding in days
163  kENCODING_PACKED_PIXEL_COORD = 8, // Render Pixel Coordinate (packed 14.2+14.2)
165 };
166 
167 #define IS_INTEGER(T) \
168  (((T) == kINT) || ((T) == kSMALLINT) || ((T) == kBIGINT) || ((T) == kTINYINT))
169 #define IS_NUMBER(T) \
170  (((T) == kINT) || ((T) == kSMALLINT) || ((T) == kDOUBLE) || ((T) == kFLOAT) || \
171  ((T) == kBIGINT) || ((T) == kNUMERIC) || ((T) == kDECIMAL) || ((T) == kTINYINT))
172 #define IS_STRING(T) (((T) == kTEXT) || ((T) == kVARCHAR) || ((T) == kCHAR))
173 #define IS_GEO(T) \
174  (((T) == kPOINT) || ((T) == kLINESTRING) || ((T) == kPOLYGON) || ((T) == kMULTIPOLYGON))
175 #define IS_INTERVAL(T) ((T) == kINTERVAL_DAY_TIME || (T) == kINTERVAL_YEAR_MONTH)
176 #define IS_DECIMAL(T) ((T) == kNUMERIC || (T) == kDECIMAL)
177 #define IS_GEO_POLY(T) (((T) == kPOLYGON) || ((T) == kMULTIPOLYGON))
178 
179 #define NULL_BOOLEAN INT8_MIN
180 #define NULL_TINYINT INT8_MIN
181 #define NULL_SMALLINT INT16_MIN
182 #define NULL_INT INT32_MIN
183 #define NULL_BIGINT INT64_MIN
184 #define NULL_FLOAT FLT_MIN
185 #define NULL_DOUBLE DBL_MIN
186 
187 #define NULL_ARRAY_BOOLEAN (INT8_MIN + 1)
188 #define NULL_ARRAY_TINYINT (INT8_MIN + 1)
189 #define NULL_ARRAY_SMALLINT (INT16_MIN + 1)
190 #define NULL_ARRAY_INT (INT32_MIN + 1)
191 #define NULL_ARRAY_BIGINT (INT64_MIN + 1)
192 #define NULL_ARRAY_FLOAT (FLT_MIN * 2.0)
193 #define NULL_ARRAY_DOUBLE (DBL_MIN * 2.0)
194 
195 #define NULL_ARRAY_COMPRESSED_32 0x80000000U
196 
197 #define TRANSIENT_DICT_ID 0
198 #define TRANSIENT_DICT(ID) (-(ID))
199 #define REGULAR_DICT(TRANSIENTID) (-(TRANSIENTID))
200 
201 constexpr auto is_datetime(SQLTypes type) {
202  return type == kTIME || type == kTIMESTAMP || type == kDATE;
203 }
204 
205 // @type SQLTypeInfo
206 // @brief a structure to capture all type information including
207 // length, precision, scale, etc.
208 class SQLTypeInfo {
209  public:
210  SQLTypeInfo(SQLTypes t, int d, int s, bool n, EncodingType c, int p, SQLTypes st)
211  : type(t)
212  , subtype(st)
213  , dimension(d)
214  , scale(s)
215  , notnull(n)
216  , compression(c)
217  , comp_param(p)
218  , size(get_storage_size()) {}
219  SQLTypeInfo(SQLTypes t, int d, int s, bool n)
220  : type(t)
221  , subtype(kNULLT)
222  , dimension(d)
223  , scale(s)
224  , notnull(n)
225  , compression(kENCODING_NONE)
226  , comp_param(0)
227  , size(get_storage_size()) {}
228  SQLTypeInfo(SQLTypes t, int d, int s) : SQLTypeInfo(t, d, s, false) {}
230  : type(t)
231  , subtype(kNULLT)
232  , dimension(0)
233  , scale(0)
234  , notnull(n)
235  , compression(kENCODING_NONE)
236  , comp_param(0)
237  , size(get_storage_size()) {}
238  SQLTypeInfo(SQLTypes t) : SQLTypeInfo(t, false) {}
240  : type(t)
241  , subtype(kNULLT)
242  , dimension(0)
243  , scale(0)
244  , notnull(n)
245  , compression(c)
246  , comp_param(0)
247  , size(get_storage_size()) {}
249  : type(kNULLT)
250  , subtype(kNULLT)
251  , dimension(0)
252  , scale(0)
253  , notnull(false)
254  , compression(kENCODING_NONE)
255  , comp_param(0)
256  , size(0) {}
257 
258  HOST DEVICE inline SQLTypes get_type() const { return type; }
259  HOST DEVICE inline SQLTypes get_subtype() const { return subtype; }
260  HOST DEVICE inline int get_dimension() const { return dimension; }
261  inline int get_precision() const { return dimension; }
262  HOST DEVICE inline int get_input_srid() const { return dimension; }
263  HOST DEVICE inline int get_scale() const { return scale; }
264  HOST DEVICE inline int get_output_srid() const { return scale; }
265  HOST DEVICE inline bool get_notnull() const { return notnull; }
266  HOST DEVICE inline EncodingType get_compression() const { return compression; }
267  HOST DEVICE inline int get_comp_param() const { return comp_param; }
268  HOST DEVICE inline int get_size() const { return size; }
269  inline int get_logical_size() const {
270  if (compression == kENCODING_FIXED || compression == kENCODING_DATE_IN_DAYS) {
271  SQLTypeInfo ti(type, dimension, scale, notnull, kENCODING_NONE, 0, subtype);
272  return ti.get_size();
273  }
274  if (compression == kENCODING_DICT) {
275  return 4;
276  }
277  return get_size();
278  }
279  inline int get_physical_cols() const {
280  switch (type) {
281  case kPOINT:
282  return 1; // coords
283  case kLINESTRING:
284  return 2; // coords, bounds
285  case kPOLYGON:
286  return 4; // coords, ring_sizes, bounds, render_group
287  case kMULTIPOLYGON:
288  return 5; // coords, ring_sizes, poly_rings, bounds, render_group
289  default:
290  break;
291  }
292  return 0;
293  }
294  inline int get_physical_coord_cols() const {
295  // @TODO dmitri/simon rename this function?
296  // It needs to return the number of extra columns
297  // which need to go through the executor, as opposed
298  // to those which are only needed by CPU for poly
299  // cache building or what-not. For now, we just omit
300  // the Render Group column. If we add Bounding Box
301  // or something this may require rethinking. Perhaps
302  // these two functions need to return an array of
303  // offsets rather than just a number to loop over,
304  // so that executor and non-executor columns can
305  // be mixed.
306  // NOTE(adb): In binding to extension functions, we need to know some pretty specific
307  // type info about each of the physical coords cols for each geo type. I added checks
308  // there to ensure the physical coords col for the geo type match what we expect. If
309  // these values are ever changed, corresponding values in
310  // ExtensionFunctionsBinding.cpp::compute_narrowing_conv_scores and
311  // ExtensionFunctionsBinding.cpp::compute_widening_conv_scores will also need to be
312  // changed.
313  switch (type) {
314  case kPOINT:
315  return 1;
316  case kLINESTRING:
317  return 1; // omit bounds
318  case kPOLYGON:
319  return 2; // omit bounds, render group
320  case kMULTIPOLYGON:
321  return 3; // omit bounds, render group
322  default:
323  break;
324  }
325  return 0;
326  }
327  inline bool has_bounds() const {
328  switch (type) {
329  case kLINESTRING:
330  case kPOLYGON:
331  case kMULTIPOLYGON:
332  return true;
333  default:
334  break;
335  }
336  return false;
337  }
338  inline bool has_render_group() const {
339  switch (type) {
340  case kPOLYGON:
341  case kMULTIPOLYGON:
342  return true;
343  default:
344  break;
345  }
346  return false;
347  }
348  HOST DEVICE inline void set_type(SQLTypes t) { type = t; }
349  HOST DEVICE inline void set_subtype(SQLTypes st) { subtype = st; }
350  inline void set_dimension(int d) { dimension = d; }
351  inline void set_precision(int d) { dimension = d; }
352  inline void set_input_srid(int d) { dimension = d; }
353  inline void set_scale(int s) { scale = s; }
354  inline void set_output_srid(int s) { scale = s; }
355  inline void set_notnull(bool n) { notnull = n; }
356  inline void set_size(int s) { size = s; }
357  inline void set_fixed_size() { size = get_storage_size(); }
358  inline void set_compression(EncodingType c) { compression = c; }
359  inline void set_comp_param(int p) { comp_param = p; }
360 #ifndef __CUDACC__
361  inline std::string get_type_name() const {
362  if (IS_GEO(type)) {
363  std::string srid_string = "";
364  if (get_output_srid() > 0) {
365  srid_string = ", " + std::to_string(get_output_srid());
366  }
367  CHECK_LT(static_cast<int>(subtype), kSQLTYPE_LAST);
368  return type_name[static_cast<int>(subtype)] + "(" +
369  type_name[static_cast<int>(type)] + srid_string + ")";
370  }
371  std::string ps = "";
372  if (type == kDECIMAL || type == kNUMERIC || subtype == kDECIMAL ||
373  subtype == kNUMERIC) {
374  ps = "(" + std::to_string(dimension) + "," + std::to_string(scale) + ")";
375  } else if (type == kTIMESTAMP) {
376  ps = "(" + std::to_string(dimension) + ")";
377  }
378  if (type == kARRAY) {
379  auto elem_ti = get_elem_type();
380  auto num_elems = (size > 0) ? std::to_string(size / elem_ti.get_size()) : "";
381  CHECK_LT(static_cast<int>(subtype), kSQLTYPE_LAST);
382  return type_name[static_cast<int>(subtype)] + ps + "[" + num_elems + "]";
383  }
384  return type_name[static_cast<int>(type)] + ps;
385  }
386  inline std::string get_compression_name() const { return comp_name[(int)compression]; }
387  inline std::string to_string() const {
388  return concat("(",
389  type_name[static_cast<int>(type)],
390  ", ",
391  get_dimension(),
392  ", ",
393  get_scale(),
394  ", ",
395  get_notnull() ? "not nullable" : "nullable",
396  ", ",
397  get_compression_name(),
398  ", ",
399  get_comp_param(),
400  ", ",
401  type_name[static_cast<int>(subtype)],
402  ": ",
403  get_size(),
404  ": ",
405  get_elem_type().get_size(),
406  ")");
407  }
408 #endif
409  inline bool is_string() const { return IS_STRING(type); }
410  inline bool is_string_array() const { return (type == kARRAY) && IS_STRING(subtype); }
411  inline bool is_integer() const { return IS_INTEGER(type); }
412  inline bool is_decimal() const { return type == kDECIMAL || type == kNUMERIC; }
413  inline bool is_fp() const { return type == kFLOAT || type == kDOUBLE; }
414  inline bool is_number() const { return IS_NUMBER(type); }
415  inline bool is_time() const { return is_datetime(type); }
416  inline bool is_boolean() const { return type == kBOOLEAN; }
417  inline bool is_array() const { return type == kARRAY; }
418  inline bool is_varlen_array() const { return type == kARRAY && size <= 0; }
419  inline bool is_fixlen_array() const { return type == kARRAY && size > 0; }
420  inline bool is_timeinterval() const { return IS_INTERVAL(type); }
421  inline bool is_geometry() const { return IS_GEO(type); }
422 
423  inline bool is_varlen() const { // TODO: logically this should ignore fixlen arrays
424  return (IS_STRING(type) && compression != kENCODING_DICT) || type == kARRAY ||
425  IS_GEO(type);
426  }
427 
428  // need this here till is_varlen can be fixed w/o negative impact to existing code
429  inline bool is_varlen_indeed() const {
430  // SQLTypeInfo.is_varlen() is broken with fixedlen array now
431  // and seems left broken for some concern, so fix it locally
432  return is_varlen() && !is_fixlen_array();
433  }
434 
435  inline bool is_dict_encoded_string() const {
436  return is_string() && compression == kENCODING_DICT;
437  }
438 
439  inline bool is_packed_pixel_coord() const {
440  return type == kINT && compression == kENCODING_PACKED_PIXEL_COORD;
441  }
442 
443  HOST DEVICE inline bool operator!=(const SQLTypeInfo& rhs) const {
444  return type != rhs.get_type() || subtype != rhs.get_subtype() ||
445  dimension != rhs.get_dimension() || scale != rhs.get_scale() ||
446  compression != rhs.get_compression() ||
447  (compression != kENCODING_NONE && comp_param != rhs.get_comp_param() &&
448  comp_param != TRANSIENT_DICT(rhs.get_comp_param())) ||
449  notnull != rhs.get_notnull();
450  }
451  HOST DEVICE inline bool operator==(const SQLTypeInfo& rhs) const {
452  return type == rhs.get_type() && subtype == rhs.get_subtype() &&
453  dimension == rhs.get_dimension() && scale == rhs.get_scale() &&
454  compression == rhs.get_compression() &&
455  (compression == kENCODING_NONE || comp_param == rhs.get_comp_param() ||
456  comp_param == TRANSIENT_DICT(rhs.get_comp_param())) &&
457  notnull == rhs.get_notnull();
458  }
459 
460  inline int get_array_context_logical_size() const {
461  if (is_string()) {
462  auto comp_type(get_compression());
463  if (comp_type == kENCODING_DICT || comp_type == kENCODING_FIXED ||
464  comp_type == kENCODING_NONE) {
465  return sizeof(int32_t);
466  }
467  }
468  return get_logical_size();
469  }
470 
471  HOST DEVICE inline void operator=(const SQLTypeInfo& rhs) {
472  type = rhs.get_type();
473  subtype = rhs.get_subtype();
474  dimension = rhs.get_dimension();
475  scale = rhs.get_scale();
476  notnull = rhs.get_notnull();
477  compression = rhs.get_compression();
478  comp_param = rhs.get_comp_param();
479  size = rhs.get_size();
480  }
481 
482  inline bool is_castable(const SQLTypeInfo& new_type_info) const {
483  // can always cast between the same type but different precision/scale/encodings
484  if (type == new_type_info.get_type()) {
485  return true;
486  // can always cast from or to string
487  } else if (is_string() || new_type_info.is_string()) {
488  return true;
489  // can cast between numbers
490  } else if (is_number() && new_type_info.is_number()) {
491  return true;
492  // can cast from timestamp or date to number (epoch)
493  } else if ((type == kTIMESTAMP || type == kDATE) && new_type_info.is_number()) {
494  return true;
495  // can cast from date to timestamp
496  } else if (type == kDATE && new_type_info.get_type() == kTIMESTAMP) {
497  return true;
498  } else if (type == kTIMESTAMP && new_type_info.get_type() == kDATE) {
499  return true;
500  } else if (type == kBOOLEAN && new_type_info.is_number()) {
501  return true;
502  } else if (type == kARRAY && new_type_info.get_type() == kARRAY) {
503  return get_elem_type().is_castable(new_type_info.get_elem_type());
504  } else {
505  return false;
506  }
507  }
508 
509  HOST DEVICE inline bool is_null(const Datum& d) const {
510  // assuming Datum is always uncompressed
511  switch (type) {
512  case kBOOLEAN:
513  return (int8_t)d.boolval == NULL_BOOLEAN;
514  case kTINYINT:
515  return d.tinyintval == NULL_TINYINT;
516  case kSMALLINT:
517  return d.smallintval == NULL_SMALLINT;
518  case kINT:
519  return d.intval == NULL_INT;
520  case kBIGINT:
521  case kNUMERIC:
522  case kDECIMAL:
523  return d.bigintval == NULL_BIGINT;
524  case kFLOAT:
525  return d.floatval == NULL_FLOAT;
526  case kDOUBLE:
527  return d.doubleval == NULL_DOUBLE;
528  case kTIME:
529  case kTIMESTAMP:
530  case kDATE:
531  return d.bigintval == NULL_BIGINT;
532  case kTEXT:
533  case kVARCHAR:
534  case kCHAR:
535  // @TODO handle null strings
536  break;
537  case kNULLT:
538  return true;
539  case kARRAY:
540  return d.arrayval == NULL || d.arrayval->is_null;
541  default:
542  break;
543  }
544  return false;
545  }
546  HOST DEVICE inline bool is_null(const int8_t* val) const {
547  if (type == kFLOAT) {
548  return *(float*)val == NULL_FLOAT;
549  }
550  if (type == kDOUBLE) {
551  return *(double*)val == NULL_DOUBLE;
552  }
553  // val can be either compressed or uncompressed
554  switch (size) {
555  case 1:
556  return *val == NULL_TINYINT;
557  case 2:
558  return *(int16_t*)val == NULL_SMALLINT;
559  case 4:
560  return *(int32_t*)val == NULL_INT;
561  case 8:
562  return *(int64_t*)val == NULL_BIGINT;
563  case kNULLT:
564  return true;
565  default:
566  // @TODO(wei) handle null strings
567  break;
568  }
569  return false;
570  }
571  HOST DEVICE inline bool is_null_fixlen_array(const int8_t* val, int array_size) const {
572  // Check if fixed length array has a NULL_ARRAY sentinel as the first element
573  if (type == kARRAY && val && array_size > 0 && array_size == size) {
574  // Need to create element type to get the size, but can't call get_elem_type()
575  // since this is a HOST DEVICE function. Going through copy constructor instead.
576  auto elem_ti{*this};
577  elem_ti.set_type(subtype);
578  elem_ti.set_subtype(kNULLT);
579  auto elem_size = elem_ti.get_storage_size();
580  if (elem_size < 1) {
581  return false;
582  }
583  if (subtype == kFLOAT) {
584  return *(float*)val == NULL_ARRAY_FLOAT;
585  }
586  if (subtype == kDOUBLE) {
587  return *(double*)val == NULL_ARRAY_DOUBLE;
588  }
589  switch (elem_size) {
590  case 1:
591  return *val == NULL_ARRAY_TINYINT;
592  case 2:
593  return *(int16_t*)val == NULL_ARRAY_SMALLINT;
594  case 4:
595  return *(int32_t*)val == NULL_ARRAY_INT;
596  case 8:
597  return *(int64_t*)val == NULL_ARRAY_BIGINT;
598  default:
599  return false;
600  }
601  }
602  return false;
603  }
604  HOST DEVICE inline bool is_null_point_coord_array(const int8_t* val,
605  int array_size) const {
606  if (type == kARRAY && subtype == kTINYINT && val && array_size > 0 &&
607  array_size == size) {
608  if (array_size == 2 * sizeof(double)) {
609  return *(double*)val == NULL_ARRAY_DOUBLE;
610  }
611  if (array_size == 2 * sizeof(int32_t)) {
612  return *(uint32_t*)val == NULL_ARRAY_COMPRESSED_32;
613  }
614  }
615  return false;
616  }
617  inline SQLTypeInfo get_elem_type() const {
618  return SQLTypeInfo(
619  subtype, dimension, scale, notnull, compression, comp_param, kNULLT);
620  }
621  inline SQLTypeInfo get_array_type() const {
622  return SQLTypeInfo(kARRAY, dimension, scale, notnull, compression, comp_param, type);
623  }
624 
625  inline bool is_date_in_days() const {
626  if (type == kDATE) {
627  const auto comp_type = get_compression();
628  if (comp_type == kENCODING_DATE_IN_DAYS) {
629  return true;
630  }
631  }
632  return false;
633  }
634 
635  inline bool is_date() const { return type == kDATE; }
636 
637  inline bool is_high_precision_timestamp() const {
638  if (type == kTIMESTAMP) {
639  const auto dimension = get_dimension();
640  if (dimension > 0) {
641  return true;
642  }
643  }
644  return false;
645  }
646 
647  inline bool is_timestamp() const { return type == kTIMESTAMP; }
648 
649  private:
650  SQLTypes type; // type id
651  SQLTypes subtype; // element type of arrays
652  int dimension; // VARCHAR/CHAR length or NUMERIC/DECIMAL precision
653  int scale; // NUMERIC/DECIMAL scale
654  bool notnull; // nullable? a hint, not used for type checking
655  EncodingType compression; // compression scheme
656  int comp_param; // compression parameter when applicable for certain schemes
657  int size; // size of the type in bytes. -1 for variable size
658 #ifndef __CUDACC__
659  static std::string type_name[kSQLTYPE_LAST];
660  static std::string comp_name[kENCODING_LAST];
661 #endif
662  HOST DEVICE inline int get_storage_size() const {
663  switch (type) {
664  case kBOOLEAN:
665  return sizeof(int8_t);
666  case kTINYINT:
667  return sizeof(int8_t);
668  case kSMALLINT:
669  switch (compression) {
670  case kENCODING_NONE:
671  return sizeof(int16_t);
672  case kENCODING_FIXED:
673  case kENCODING_SPARSE:
674  return comp_param / 8;
675  case kENCODING_RL:
676  case kENCODING_DIFF:
677  break;
678  default:
679  assert(false);
680  }
681  break;
682  case kINT:
683  switch (compression) {
684  case kENCODING_NONE:
686  return sizeof(int32_t);
687  case kENCODING_FIXED:
688  case kENCODING_SPARSE:
689  return comp_param / 8;
690  case kENCODING_RL:
691  case kENCODING_DIFF:
692  break;
693  default:
694  assert(false);
695  }
696  break;
697  case kBIGINT:
698  case kNUMERIC:
699  case kDECIMAL:
700  switch (compression) {
701  case kENCODING_NONE:
702  return sizeof(int64_t);
703  case kENCODING_FIXED:
704  case kENCODING_SPARSE:
705  return comp_param / 8;
706  case kENCODING_RL:
707  case kENCODING_DIFF:
708  break;
709  default:
710  assert(false);
711  }
712  break;
713  case kFLOAT:
714  switch (compression) {
715  case kENCODING_NONE:
716  return sizeof(float);
717  case kENCODING_FIXED:
718  case kENCODING_RL:
719  case kENCODING_DIFF:
720  case kENCODING_SPARSE:
721  assert(false);
722  break;
723  default:
724  assert(false);
725  }
726  break;
727  case kDOUBLE:
728  switch (compression) {
729  case kENCODING_NONE:
730  return sizeof(double);
731  case kENCODING_FIXED:
732  case kENCODING_RL:
733  case kENCODING_DIFF:
734  case kENCODING_SPARSE:
735  assert(false);
736  break;
737  default:
738  assert(false);
739  }
740  break;
741  case kTIMESTAMP:
742  case kTIME:
743  case kINTERVAL_DAY_TIME:
745  case kDATE:
746  switch (compression) {
747  case kENCODING_NONE:
748  return sizeof(int64_t);
749  case kENCODING_FIXED:
750  if (type == kTIMESTAMP && dimension > 0) {
751  assert(false); // disable compression for timestamp precisions
752  }
753  return comp_param / 8;
754  case kENCODING_RL:
755  case kENCODING_DIFF:
756  case kENCODING_SPARSE:
757  assert(false);
758  break;
760  switch (comp_param) {
761  case 0:
762  return 4; // Default date encoded in days is 32 bits
763  case 16:
764  case 32:
765  return comp_param / 8;
766  default:
767  assert(false);
768  break;
769  }
770  default:
771  assert(false);
772  }
773  break;
774  case kTEXT:
775  case kVARCHAR:
776  case kCHAR:
777  if (compression == kENCODING_DICT) {
778  return sizeof(int32_t); // @TODO(wei) must check DictDescriptor
779  }
780  break;
781  case kARRAY:
782  // TODO: return size for fixlen arrays?
783  break;
784  case kPOINT:
785  case kLINESTRING:
786  case kPOLYGON:
787  case kMULTIPOLYGON:
788  break;
789  default:
790  break;
791  }
792  return -1;
793  }
794 };
795 
797 
798 #ifndef __CUDACC__
799 #include <string_view>
800 
801 Datum StringToDatum(std::string_view s, SQLTypeInfo& ti);
802 std::string DatumToString(Datum d, const SQLTypeInfo& ti);
803 bool DatumEqual(const Datum, const Datum, const SQLTypeInfo& ti);
804 int64_t convert_decimal_value_to_scale(const int64_t decimal_value,
805  const SQLTypeInfo& type_info,
806  const SQLTypeInfo& new_type_info);
807 #endif
808 
809 #include "../QueryEngine/DateAdd.h"
810 #include "../QueryEngine/DateTruncate.h"
811 #include "../QueryEngine/ExtractFromTime.h"
812 
814  EncodingType encoding = type_info.get_compression();
815  if (encoding == kENCODING_DATE_IN_DAYS ||
816  (encoding == kENCODING_FIXED && type_info.get_type() != kARRAY)) {
817  encoding = kENCODING_NONE;
818  }
819  return SQLTypeInfo(type_info.get_type(),
820  type_info.get_dimension(),
821  type_info.get_scale(),
822  type_info.get_notnull(),
823  encoding,
824  type_info.get_comp_param(),
825  type_info.get_subtype());
826 }
827 
829  SQLTypeInfo nullable_type_info = get_logical_type_info(type_info);
830  nullable_type_info.set_notnull(false);
831  return nullable_type_info;
832 }
833 
834 template <class T>
835 constexpr inline int64_t inline_int_null_value() {
836  return std::is_signed<T>::value ? std::numeric_limits<T>::min()
837  : std::numeric_limits<T>::max();
838 }
839 
840 template <class T>
841 constexpr inline int64_t inline_int_null_array_value() {
842  return std::is_signed<T>::value ? std::numeric_limits<T>::min() + 1
843  : std::numeric_limits<T>::max() - 1;
844  // TODO: null_array values in signed types would step on max valid value
845  // in fixlen unsigned arrays, the max valid value may need to be lowered.
846 }
847 
848 template <class T>
849 constexpr inline int64_t max_valid_int_value() {
850  return std::is_signed<T>::value ? std::numeric_limits<T>::max()
851  : std::numeric_limits<T>::max() - 1;
852 }
853 
854 #include "InlineNullValues.h"
855 
856 using StringOffsetT = int32_t;
857 using ArrayOffsetT = int32_t;
858 
859 inline int8_t* appendDatum(int8_t* buf, Datum d, const SQLTypeInfo& ti) {
860  switch (ti.get_type()) {
861  case kBOOLEAN:
862  *(bool*)buf = d.boolval;
863  return buf + sizeof(bool);
864  case kNUMERIC:
865  case kDECIMAL:
866  case kBIGINT:
867  *(int64_t*)buf = d.bigintval;
868  return buf + sizeof(int64_t);
869  case kINT:
870  *(int32_t*)buf = d.intval;
871  return buf + sizeof(int32_t);
872  case kSMALLINT:
873  *(int16_t*)buf = d.smallintval;
874  return buf + sizeof(int16_t);
875  case kTINYINT:
876  *(int8_t*)buf = d.tinyintval;
877  return buf + sizeof(int8_t);
878  case kFLOAT:
879  *(float*)buf = d.floatval;
880  return buf + sizeof(float);
881  case kDOUBLE:
882  *(double*)buf = d.doubleval;
883  return buf + sizeof(double);
884  case kTIME:
885  case kTIMESTAMP:
886  case kDATE:
887  *reinterpret_cast<int64_t*>(buf) = d.bigintval;
888  return buf + sizeof(int64_t);
889  default:
890  return nullptr;
891  }
892 }
int8_t tinyintval
Definition: sqltypes.h:133
HOST DEVICE bool operator!=(const SQLTypeInfo &rhs) const
Definition: sqltypes.h:443
void set_compression(EncodingType c)
Definition: sqltypes.h:358
void set_size(int s)
Definition: sqltypes.h:356
Datum StringToDatum(std::string_view s, SQLTypeInfo &ti)
Definition: Datum.cpp:122
#define NULL_DOUBLE
Definition: sqltypes.h:185
bool is_time() const
Definition: sqltypes.h:415
HOST DEVICE bool is_null(const Datum &d) const
Definition: sqltypes.h:509
HOST DEVICE void operator=(const SQLTypeInfo &rhs)
Definition: sqltypes.h:471
bool is_array() const
Definition: sqltypes.h:417
int get_precision() const
Definition: sqltypes.h:261
bool is_string() const
Definition: sqltypes.h:409
DEVICE VarlenDatum()
Definition: sqltypes.h:76
bool is_boolean() const
Definition: sqltypes.h:416
Definition: sqltypes.h:50
DEVICE constexpr bool is_cuda_compiler()
Definition: sqltypes.h:120
SQLTypes
Definition: sqltypes.h:39
std::vector< std::string > * stringsPtr
Definition: sqltypes.h:148
std::vector< ArrayDatum > * arraysPtr
Definition: sqltypes.h:149
EncodingType
Definition: encodetypes.h:22
bool is_integer() const
Definition: sqltypes.h:411
#define NULL_ARRAY_COMPRESSED_32
Definition: sqltypes.h:195
bool is_null
Definition: sqltypes.h:74
#define NULL_BIGINT
Definition: sqltypes.h:183
#define NULL_ARRAY_DOUBLE
Definition: sqltypes.h:193
bool is_castable(const SQLTypeInfo &new_type_info) const
Definition: sqltypes.h:482
SQLTypeInfo get_nullable_logical_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:828
bool boolval
Definition: sqltypes.h:132
HOST DEVICE bool is_null(const int8_t *val) const
Definition: sqltypes.h:546
constexpr int64_t inline_int_null_value()
Definition: sqltypes.h:835
int get_array_context_logical_size() const
Definition: sqltypes.h:460
HOST DEVICE int get_comp_param() const
Definition: sqltypes.h:267
VarlenDatum * arrayval
Definition: sqltypes.h:139
HOST DEVICE void set_subtype(SQLTypes st)
Definition: sqltypes.h:349
SQLTypeInfo(SQLTypes t, int d, int s)
Definition: sqltypes.h:228
SQLTypeInfo get_logical_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:813
bool is_date() const
Definition: sqltypes.h:635
HOST DEVICE int get_size() const
Definition: sqltypes.h:268
SQLTypes decimal_to_int_type(const SQLTypeInfo &)
Definition: Datum.cpp:302
bool is_high_precision_timestamp() const
Definition: sqltypes.h:637
#define NULL_ARRAY_SMALLINT
Definition: sqltypes.h:189
#define NULL_ARRAY_TINYINT
Definition: sqltypes.h:188
Definition: sqltypes.h:66
bool is_timeinterval() const
Definition: sqltypes.h:420
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:266
bool is_varlen() const
Definition: sqltypes.h:423
bool is_decimal() const
Definition: sqltypes.h:412
int32_t intval
Definition: sqltypes.h:135
HOST DEVICE int get_scale() const
Definition: sqltypes.h:263
std::string to_string(char const *&&v)
HostArrayDatum(size_t const l, int8_t *p, bool const n, CUSTOM_DELETER custom_deleter)
Definition: sqltypes.h:110
int8_t * pointer
Definition: sqltypes.h:73
int32_t StringOffsetT
Definition: sqltypes.h:856
bool DatumEqual(const Datum, const Datum, const SQLTypeInfo &ti)
Definition: Datum.cpp:187
#define DEVICE
constexpr int64_t max_valid_int_value()
Definition: sqltypes.h:849
int get_logical_size() const
Definition: sqltypes.h:269
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:129
#define HOST
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:265
void set_input_srid(int d)
Definition: sqltypes.h:352
float floatval
Definition: sqltypes.h:137
bool is_dict_encoded_string() const
Definition: sqltypes.h:435
std::string concat(Types &&... parms)
#define IS_INTERVAL(T)
Definition: sqltypes.h:175
void set_fixed_size()
Definition: sqltypes.h:357
std::shared_ptr< int8_t > ManagedPtr
Definition: sqltypes.h:91
HOST DEVICE int get_output_srid() const
Definition: sqltypes.h:264
std::string to_string() const
Definition: sqltypes.h:387
#define NULL_TINYINT
Definition: sqltypes.h:180
VarlenDatum(const size_t l, int8_t *p, const bool n)
Definition: sqltypes.h:79
SQLTypes subtype
Definition: sqltypes.h:651
void set_scale(int s)
Definition: sqltypes.h:353
bool notnull
Definition: sqltypes.h:654
int64_t bigintval
Definition: sqltypes.h:136
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:259
#define NULL_FLOAT
Definition: sqltypes.h:184
HostArrayDatum(size_t const l, int8_t *p, CUSTOM_DELETER custom_deleter)
Definition: sqltypes.h:104
constexpr int64_t inline_int_null_array_value()
Definition: sqltypes.h:841
ManagedPtr data_ptr
Definition: sqltypes.h:113
int16_t smallintval
Definition: sqltypes.h:134
bool is_varlen_array() const
Definition: sqltypes.h:418
int get_physical_cols() const
Definition: sqltypes.h:279
SQLTypeInfo(SQLTypes t, int d, int s, bool n)
Definition: sqltypes.h:219
#define NULL_ARRAY_INT
Definition: sqltypes.h:190
std::string DatumToString(Datum d, const SQLTypeInfo &ti)
Definition: Datum.cpp:227
#define NULL_INT
Definition: sqltypes.h:182
HostArrayDatum(size_t const l, int8_t *p, bool const n)
Definition: sqltypes.h:98
void operator()(int8_t *p)
Definition: sqltypes.h:87
SQLTypeInfo(SQLTypes t, int d, int s, bool n, EncodingType c, int p, SQLTypes st)
Definition: sqltypes.h:210
SQLTypeInfo(SQLTypes t)
Definition: sqltypes.h:238
SQLTypeInfo(SQLTypes t, bool n, EncodingType c)
Definition: sqltypes.h:239
EncodingType compression
Definition: sqltypes.h:655
std::string * stringval
Definition: sqltypes.h:141
void set_output_srid(int s)
Definition: sqltypes.h:354
bool has_bounds() const
Definition: sqltypes.h:327
DEVICE DeviceArrayDatum()
Definition: sqltypes.h:117
void set_comp_param(int p)
Definition: sqltypes.h:359
#define CHECK_LT(x, y)
Definition: Logger.h:207
Definition: sqltypes.h:53
Definition: sqltypes.h:54
bool is_geometry() const
Definition: sqltypes.h:421
HOST DEVICE bool is_null_fixlen_array(const int8_t *val, int array_size) const
Definition: sqltypes.h:571
std::string get_compression_name() const
Definition: sqltypes.h:386
HOST DEVICE int get_input_srid() const
Definition: sqltypes.h:262
int8_t * appendDatum(int8_t *buf, Datum d, const SQLTypeInfo &ti)
Definition: sqltypes.h:859
std::string get_type_name() const
Definition: sqltypes.h:361
bool is_string_array() const
Definition: sqltypes.h:410
int32_t ArrayOffsetT
Definition: sqltypes.h:857
HOST DEVICE bool is_null_point_coord_array(const int8_t *val, int array_size) const
Definition: sqltypes.h:604
void set_dimension(int d)
Definition: sqltypes.h:350
#define NULL_ARRAY_BIGINT
Definition: sqltypes.h:191
HOST DEVICE int get_dimension() const
Definition: sqltypes.h:260
#define IS_INTEGER(T)
Definition: sqltypes.h:167
HOST DEVICE int get_storage_size() const
Definition: sqltypes.h:662
Definition: sqltypes.h:42
#define IS_STRING(T)
Definition: sqltypes.h:172
virtual DEVICE ~VarlenDatum()
Definition: sqltypes.h:77
#define NULL_SMALLINT
Definition: sqltypes.h:181
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:617
void set_notnull(bool n)
Definition: sqltypes.h:355
int get_physical_coord_cols() const
Definition: sqltypes.h:294
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:258
bool is_fixlen_array() const
Definition: sqltypes.h:419
SQLTypes type
Definition: sqltypes.h:650
HostArrayDatum(size_t const l, ManagedPtr p, bool const n)
Definition: sqltypes.h:95
bool is_packed_pixel_coord() const
Definition: sqltypes.h:439
Definition: sqltypes.h:46
HOST DEVICE bool operator==(const SQLTypeInfo &rhs) const
Definition: sqltypes.h:451
bool is_timestamp() const
Definition: sqltypes.h:647
SQLTypeInfo(SQLTypes t, bool n)
Definition: sqltypes.h:229
int8_t * numbersPtr
Definition: sqltypes.h:147
bool has_render_group() const
Definition: sqltypes.h:338
#define IS_NUMBER(T)
Definition: sqltypes.h:169
bool is_varlen_indeed() const
Definition: sqltypes.h:429
void operator()(int8_t *)
Definition: sqltypes.h:84
#define IS_GEO(T)
Definition: sqltypes.h:173
#define TRANSIENT_DICT(ID)
Definition: sqltypes.h:198
int comp_param
Definition: sqltypes.h:656
#define NULL_BOOLEAN
Definition: sqltypes.h:179
bool is_number() const
Definition: sqltypes.h:414
void set_precision(int d)
Definition: sqltypes.h:351
SQLTypeInfo get_array_type() const
Definition: sqltypes.h:621
int dimension
Definition: sqltypes.h:652
double doubleval
Definition: sqltypes.h:138
bool is_fp() const
Definition: sqltypes.h:413
bool is_date_in_days() const
Definition: sqltypes.h:625
constexpr auto is_datetime(SQLTypes type)
Definition: sqltypes.h:201
int64_t convert_decimal_value_to_scale(const int64_t decimal_value, const SQLTypeInfo &type_info, const SQLTypeInfo &new_type_info)
Definition: Datum.cpp:318
size_t length
Definition: sqltypes.h:72
#define NULL_ARRAY_FLOAT
Definition: sqltypes.h:192
HOST DEVICE void set_type(SQLTypes t)
Definition: sqltypes.h:348