OmniSciDB  04ee39c94c
sqltypes.h
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
24 #ifndef SQLTYPES_H
25 #define SQLTYPES_H
26 
27 #include "ConfigResolve.h"
28 
29 #include <cassert>
30 #include <cfloat>
31 #include <cstdint>
32 #include <ctime>
33 #include <limits>
34 #include <memory>
35 #include <string>
36 #include <type_traits>
37 #include <vector>
38 
39 // must not change because these values persist in catalogs.
40 enum SQLTypes {
41  kNULLT = 0, // type for null values
42  kBOOLEAN = 1,
43  kCHAR = 2,
44  kVARCHAR = 3,
45  kNUMERIC = 4,
46  kDECIMAL = 5,
47  kINT = 6,
48  kSMALLINT = 7,
49  kFLOAT = 8,
50  kDOUBLE = 9,
51  kTIME = 10,
52  kTIMESTAMP = 11,
53  kBIGINT = 12,
54  kTEXT = 13,
55  kDATE = 14,
56  kARRAY = 15,
59  kPOINT = 18,
61  kPOLYGON = 20,
63  kTINYINT = 22,
64  kGEOMETRY = 23,
65  kGEOGRAPHY = 24,
66  kEVAL_CONTEXT_TYPE = 25, // Placeholder Type for ANY
68 };
69 
70 struct VarlenDatum {
71  size_t length;
72  int8_t* pointer;
73  bool is_null;
74 
75  DEVICE VarlenDatum() : length(0), pointer(NULL), is_null(true) {}
76  DEVICE virtual ~VarlenDatum() {}
77 
78  VarlenDatum(const size_t l, int8_t* p, const bool n)
79  : length(l), pointer(p), is_null(n) {}
80 };
81 
83  void operator()(int8_t*) {}
84 };
85 struct FreeDeleter {
86  void operator()(int8_t* p) { free(p); }
87 };
88 
89 struct HostArrayDatum : public VarlenDatum {
90  using ManagedPtr = std::shared_ptr<int8_t>;
91 
92  HostArrayDatum() = default;
93 
94  HostArrayDatum(size_t const l, ManagedPtr p, bool const n)
95  : VarlenDatum(l, p.get(), n), data_ptr(p) {}
96 
97  HostArrayDatum(size_t const l, int8_t* p, bool const n)
98  : VarlenDatum(l, p, n), data_ptr(p, FreeDeleter()){};
99 
100  template <typename CUSTOM_DELETER,
101  typename = std::enable_if_t<
102  std::is_void<std::result_of_t<CUSTOM_DELETER(int8_t*)> >::value> >
103  HostArrayDatum(size_t const l, int8_t* p, CUSTOM_DELETER custom_deleter)
104  : VarlenDatum(l, p, 0 == l), data_ptr(p, custom_deleter) {}
105 
106  template <typename CUSTOM_DELETER,
107  typename = std::enable_if_t<
108  std::is_void<std::result_of_t<CUSTOM_DELETER(int8_t*)> >::value> >
109  HostArrayDatum(size_t const l, int8_t* p, bool const n, CUSTOM_DELETER custom_deleter)
110  : VarlenDatum(l, p, n), data_ptr(p, custom_deleter) {}
111 
113 };
114 
115 struct DeviceArrayDatum : public VarlenDatum {
117 };
118 
119 using ArrayDatum = std::conditional_t<isCudaCC(), DeviceArrayDatum, HostArrayDatum>;
120 
121 typedef union {
122  bool boolval;
123  int8_t tinyintval;
124  int16_t smallintval;
125  int32_t intval;
126  int64_t bigintval;
127  float floatval;
128  double doubleval;
130 #ifndef __CUDACC__
131  std::string* stringval; // string value
132 #endif
133 } Datum;
134 
135 #ifndef __CUDACC__
137  int8_t* numbersPtr;
138  std::vector<std::string>* stringsPtr;
139  std::vector<ArrayDatum>* arraysPtr;
140 };
141 #endif
142 
143 // must not change because these values persist in catalogs.
145  kENCODING_NONE = 0, // no encoding
146  kENCODING_FIXED = 1, // Fixed-bit encoding
147  kENCODING_RL = 2, // Run Length encoding
148  kENCODING_DIFF = 3, // Differential encoding
149  kENCODING_DICT = 4, // Dictionary encoding
150  kENCODING_SPARSE = 5, // Null encoding for sparse columns
151  kENCODING_GEOINT = 6, // Encoding coordinates as intergers
152  kENCODING_DATE_IN_DAYS = 7, // Date encoding in days
154 };
155 
156 #include "SQLTypeUtilities.h"
157 
158 #define IS_INTEGER(T) \
159  (((T) == kINT) || ((T) == kSMALLINT) || ((T) == kBIGINT) || ((T) == kTINYINT))
160 #define IS_NUMBER(T) \
161  (((T) == kINT) || ((T) == kSMALLINT) || ((T) == kDOUBLE) || ((T) == kFLOAT) || \
162  ((T) == kBIGINT) || ((T) == kNUMERIC) || ((T) == kDECIMAL) || ((T) == kTINYINT))
163 #define IS_STRING(T) (((T) == kTEXT) || ((T) == kVARCHAR) || ((T) == kCHAR))
164 #define IS_GEO(T) \
165  (((T) == kPOINT) || ((T) == kLINESTRING) || ((T) == kPOLYGON) || ((T) == kMULTIPOLYGON))
166 #define IS_INTERVAL(T) ((T) == kINTERVAL_DAY_TIME || (T) == kINTERVAL_YEAR_MONTH)
167 #define IS_DECIMAL(T) ((T) == kNUMERIC || (T) == kDECIMAL)
168 #define IS_GEO_POLY(T) (((T) == kPOLYGON) || ((T) == kMULTIPOLYGON))
169 
170 #define NULL_BOOLEAN INT8_MIN
171 #define NULL_TINYINT INT8_MIN
172 #define NULL_SMALLINT INT16_MIN
173 #define NULL_INT INT32_MIN
174 #define NULL_BIGINT INT64_MIN
175 #define NULL_FLOAT FLT_MIN
176 #define NULL_DOUBLE DBL_MIN
177 
178 #define NULL_ARRAY_BOOLEAN (INT8_MIN + 1)
179 #define NULL_ARRAY_TINYINT (INT8_MIN + 1)
180 #define NULL_ARRAY_SMALLINT (INT16_MIN + 1)
181 #define NULL_ARRAY_INT (INT32_MIN + 1)
182 #define NULL_ARRAY_BIGINT (INT64_MIN + 1)
183 #define NULL_ARRAY_FLOAT (FLT_MIN * 2.0)
184 #define NULL_ARRAY_DOUBLE (DBL_MIN * 2.0)
185 
186 #define TRANSIENT_DICT_ID 0
187 #define TRANSIENT_DICT(ID) (-(ID))
188 #define REGULAR_DICT(TRANSIENTID) (-(TRANSIENTID))
189 
190 template <typename T>
191 constexpr auto is_datetime(T sql_type) {
192  return sql_type == kTIME || sql_type == kTIMESTAMP || sql_type == kDATE;
193 }
194 
195 template <typename CORE_TYPE>
197  public:
198  enum PackagingType { Chunk, StandardBuffer };
199 
200  ExecutorTypePackaging() : packaging_type_(Chunk) {}
201 
202  bool isStandardBufferPackaging() const { return packaging_type_ == StandardBuffer; }
203  bool isChunkIteratorPackaging() const { return packaging_type_ == Chunk; }
204  void setStandardBufferPackaging() { packaging_type_ = StandardBuffer; }
205  void setChunkIteratorPackaging() { packaging_type_ = Chunk; }
206 
207  private:
209 };
210 
211 template <typename CORE_TYPE>
213  public:
214  inline int get_array_context_logical_size() const {
215  CORE_TYPE const* derived(static_cast<CORE_TYPE const*>(this));
216  if (is_member_of_typeset<kCHAR, kTEXT, kVARCHAR>(*derived)) {
217  auto comp_type(derived->get_compression());
218  if (comp_type == kENCODING_DICT || comp_type == kENCODING_FIXED ||
219  comp_type == kENCODING_NONE) {
220  return sizeof(int32_t);
221  }
222  }
223  return derived->get_logical_size();
224  }
225 };
226 
227 template <typename CORE_TYPE>
229  public:
230  constexpr auto is_date_in_days() const {
231  CORE_TYPE const* derived(static_cast<CORE_TYPE const*>(this));
232  if (is_member_of_typeset<kDATE>(*derived)) {
233  auto comp_type(derived->get_compression());
234  if (comp_type == kENCODING_DATE_IN_DAYS) {
235  return true;
236  }
237  }
238  return false;
239  }
240 
241  constexpr auto is_date() const {
242  CORE_TYPE const* derived(static_cast<CORE_TYPE const*>(this));
243  if (is_member_of_typeset<kDATE>(*derived)) {
244  return true;
245  }
246  return false;
247  }
248 
249  constexpr auto is_high_precision_timestamp() const {
250  CORE_TYPE const* derived(static_cast<CORE_TYPE const*>(this));
251  if (is_member_of_typeset<kTIMESTAMP>(*derived)) {
252  auto dimension(derived->get_dimension());
253  if (dimension > 0) {
254  return true;
255  }
256  }
257  return false;
258  }
259 
260  constexpr auto is_timestamp() const {
261  CORE_TYPE const* derived(static_cast<CORE_TYPE const*>(this));
262  if (is_member_of_typeset<kTIMESTAMP>(*derived)) {
263  return true;
264  }
265  return false;
266  }
267 };
268 
269 // @type SQLTypeInfo
270 // @brief a structure to capture all type information including
271 // length, precision, scale, etc.
272 template <template <class> class... TYPE_FACET_PACK>
273 class SQLTypeInfoCore : public TYPE_FACET_PACK<SQLTypeInfoCore<TYPE_FACET_PACK...> >... {
274  public:
275  SQLTypeInfoCore(SQLTypes t, int d, int s, bool n, EncodingType c, int p, SQLTypes st)
276  : type(t)
277  , subtype(st)
278  , dimension(d)
279  , scale(s)
280  , notnull(n)
281  , compression(c)
282  , comp_param(p)
283  , size(get_storage_size()) {}
284  SQLTypeInfoCore(SQLTypes t, int d, int s, bool n)
285  : type(t)
286  , subtype(kNULLT)
287  , dimension(d)
288  , scale(s)
289  , notnull(n)
290  , compression(kENCODING_NONE)
291  , comp_param(0)
292  , size(get_storage_size()) {}
293  SQLTypeInfoCore(SQLTypes t, int d, int s) : SQLTypeInfoCore(t, d, s, false) {}
295  : type(t)
296  , subtype(kNULLT)
297  , dimension(0)
298  , scale(0)
299  , notnull(n)
300  , compression(kENCODING_NONE)
301  , comp_param(0)
302  , size(get_storage_size()) {}
305  : type(t)
306  , subtype(kNULLT)
307  , dimension(0)
308  , scale(0)
309  , notnull(n)
310  , compression(c)
311  , comp_param(0)
312  , size(get_storage_size()) {}
314  : type(kNULLT)
315  , subtype(kNULLT)
316  , dimension(0)
317  , scale(0)
318  , notnull(false)
319  , compression(kENCODING_NONE)
320  , comp_param(0)
321  , size(0) {}
322 
323  HOST DEVICE inline SQLTypes get_type() const { return type; }
324  HOST DEVICE inline SQLTypes get_subtype() const { return subtype; }
325  HOST DEVICE inline int get_dimension() const { return dimension; }
326  inline int get_precision() const { return dimension; }
327  HOST DEVICE inline int get_input_srid() const { return dimension; }
328  HOST DEVICE inline int get_scale() const { return scale; }
329  HOST DEVICE inline int get_output_srid() const { return scale; }
330  HOST DEVICE inline bool get_notnull() const { return notnull; }
331  HOST DEVICE inline EncodingType get_compression() const { return compression; }
332  HOST DEVICE inline int get_comp_param() const { return comp_param; }
333  HOST DEVICE inline int get_size() const { return size; }
334  inline int get_logical_size() const {
335  if (compression == kENCODING_FIXED || compression == kENCODING_DATE_IN_DAYS) {
336  SQLTypeInfoCore ti(type, dimension, scale, notnull, kENCODING_NONE, 0, subtype);
337  return ti.get_size();
338  }
339  if (compression == kENCODING_DICT) {
340  return 4;
341  }
342  return get_size();
343  }
344  inline int get_physical_cols() const {
345  switch (type) {
346  case kPOINT:
347  return 1; // coords
348  case kLINESTRING:
349  return 2; // coords, bounds
350  case kPOLYGON:
351  return 4; // coords, ring_sizes, bounds, render_group
352  case kMULTIPOLYGON:
353  return 5; // coords, ring_sizes, poly_rings, bounds, render_group
354  default:
355  break;
356  }
357  return 0;
358  }
359  inline int get_physical_coord_cols() const {
360  // @TODO dmitri/simon rename this function?
361  // It needs to return the number of extra columns
362  // which need to go through the executor, as opposed
363  // to those which are only needed by CPU for poly
364  // cache building or what-not. For now, we just omit
365  // the Render Group column. If we add Bounding Box
366  // or something this may require rethinking. Perhaps
367  // these two functions need to return an array of
368  // offsets rather than just a number to loop over,
369  // so that executor and non-executor columns can
370  // be mixed.
371  // NOTE(adb): In binding to extension functions, we need to know some pretty specific
372  // type info about each of the physical coords cols for each geo type. I added checks
373  // there to ensure the physical coords col for the geo type match what we expect. If
374  // these values are ever changed, corresponding values in
375  // ExtensionFunctionsBinding.cpp::compute_narrowing_conv_scores and
376  // ExtensionFunctionsBinding.cpp::compute_widening_conv_scores will also need to be
377  // changed.
378  switch (type) {
379  case kPOINT:
380  return 1;
381  case kLINESTRING:
382  return 1; // omit bounds
383  case kPOLYGON:
384  return 2; // omit bounds, render group
385  case kMULTIPOLYGON:
386  return 3; // omit bounds, render group
387  default:
388  break;
389  }
390  return 0;
391  }
392  inline bool has_bounds() const {
393  switch (type) {
394  case kLINESTRING:
395  case kPOLYGON:
396  case kMULTIPOLYGON:
397  return true;
398  default:
399  break;
400  }
401  return false;
402  }
403  inline bool has_render_group() const {
404  switch (type) {
405  case kPOLYGON:
406  case kMULTIPOLYGON:
407  return true;
408  default:
409  break;
410  }
411  return false;
412  }
413  HOST DEVICE inline void set_type(SQLTypes t) { type = t; }
414  HOST DEVICE inline void set_subtype(SQLTypes st) { subtype = st; }
415  inline void set_dimension(int d) { dimension = d; }
416  inline void set_precision(int d) { dimension = d; }
417  inline void set_input_srid(int d) { dimension = d; }
418  inline void set_scale(int s) { scale = s; }
419  inline void set_output_srid(int s) { scale = s; }
420  inline void set_notnull(bool n) { notnull = n; }
421  inline void set_size(int s) { size = s; }
422  inline void set_fixed_size() { size = get_storage_size(); }
423  inline void set_compression(EncodingType c) { compression = c; }
424  inline void set_comp_param(int p) { comp_param = p; }
425 #ifndef __CUDACC__
426  inline std::string get_type_name() const {
427  if (IS_GEO(type)) {
428  std::string srid_string = "";
429  if (get_output_srid() > 0) {
430  srid_string = ", " + std::to_string(get_output_srid());
431  }
432  return type_name[(int)subtype] + "(" + type_name[(int)type] + srid_string + ")";
433  }
434  std::string ps = "";
435  if (type == kDECIMAL || type == kNUMERIC || subtype == kDECIMAL ||
436  subtype == kNUMERIC) {
437  ps = "(" + std::to_string(dimension) + "," + std::to_string(scale) + ")";
438  } else if (type == kTIMESTAMP) {
439  ps = "(" + std::to_string(dimension) + ")";
440  }
441  if (type == kARRAY) {
442  auto elem_ti = get_elem_type();
443  auto num_elems = (size > 0) ? std::to_string(size / elem_ti.get_size()) : "";
444  return type_name[(int)subtype] + ps + "[" + num_elems + "]";
445  }
446  return type_name[(int)type] + ps;
447  }
448  inline std::string get_compression_name() const { return comp_name[(int)compression]; }
449 #endif
450  inline bool is_string() const { return IS_STRING(type); }
451  inline bool is_string_array() const { return (type == kARRAY) && IS_STRING(subtype); }
452  inline bool is_integer() const { return IS_INTEGER(type); }
453  inline bool is_decimal() const { return type == kDECIMAL || type == kNUMERIC; }
454  inline bool is_fp() const { return type == kFLOAT || type == kDOUBLE; }
455  inline bool is_number() const { return IS_NUMBER(type); }
456  inline bool is_time() const { return is_datetime(type); }
457  inline bool is_boolean() const { return type == kBOOLEAN; }
458  inline bool is_array() const { return type == kARRAY; }
459  inline bool is_varlen_array() const { return type == kARRAY && size <= 0; }
460  inline bool is_fixlen_array() const { return type == kARRAY && size > 0; }
461  inline bool is_timeinterval() const { return IS_INTERVAL(type); }
462  inline bool is_geometry() const { return IS_GEO(type); }
463 
464  inline bool is_varlen() const { // TODO: logically this should ignore fixlen arrays
465  return (IS_STRING(type) && compression != kENCODING_DICT) || type == kARRAY ||
466  IS_GEO(type);
467  }
468 
469  // need this here till is_varlen can be fixed w/o negative impact to existing code
470  inline bool is_varlen_indeed() const {
471  // SQLTypeInfo.is_varlen() is broken with fixedlen array now
472  // and seems left broken for some concern, so fix it locally
473  return is_varlen() && !is_fixlen_array();
474  }
475 
476  inline bool is_dict_encoded_string() const {
477  return is_string() && compression == kENCODING_DICT;
478  }
479 
480  HOST DEVICE inline bool operator!=(const SQLTypeInfoCore& rhs) const {
481  return type != rhs.get_type() || subtype != rhs.get_subtype() ||
482  dimension != rhs.get_dimension() || scale != rhs.get_scale() ||
483  compression != rhs.get_compression() ||
484  (compression != kENCODING_NONE && comp_param != rhs.get_comp_param() &&
485  comp_param != TRANSIENT_DICT(rhs.get_comp_param())) ||
486  notnull != rhs.get_notnull();
487  }
488  HOST DEVICE inline bool operator==(const SQLTypeInfoCore& rhs) const {
489  return type == rhs.get_type() && subtype == rhs.get_subtype() &&
490  dimension == rhs.get_dimension() && scale == rhs.get_scale() &&
491  compression == rhs.get_compression() &&
492  (compression == kENCODING_NONE || comp_param == rhs.get_comp_param() ||
493  comp_param == TRANSIENT_DICT(rhs.get_comp_param())) &&
494  notnull == rhs.get_notnull();
495  }
496 
497  // FIX-ME: Work through variadic base classes
499  type = rhs.get_type();
500  subtype = rhs.get_subtype();
501  dimension = rhs.get_dimension();
502  scale = rhs.get_scale();
503  notnull = rhs.get_notnull();
504  compression = rhs.get_compression();
505  comp_param = rhs.get_comp_param();
506  size = rhs.get_size();
507  return (*this);
508  }
509 
510  inline bool is_castable(const SQLTypeInfoCore& new_type_info) const {
511  // can always cast between the same type but different precision/scale/encodings
512  if (type == new_type_info.get_type()) {
513  return true;
514  // can always cast from or to string
515  } else if (is_string() || new_type_info.is_string()) {
516  return true;
517  // can cast between numbers
518  } else if (is_number() && new_type_info.is_number()) {
519  return true;
520  // can cast from timestamp or date to number (epoch)
521  } else if ((type == kTIMESTAMP || type == kDATE) && new_type_info.is_number()) {
522  return true;
523  // can cast from date to timestamp
524  } else if (type == kDATE && new_type_info.get_type() == kTIMESTAMP) {
525  return true;
526  } else if (type == kTIMESTAMP && new_type_info.get_type() == kDATE) {
527  return true;
528  } else if (type == kBOOLEAN && new_type_info.is_number()) {
529  return true;
530  } else if (type == kARRAY && new_type_info.get_type() == kARRAY) {
531  return get_elem_type().is_castable(new_type_info.get_elem_type());
532  } else {
533  return false;
534  }
535  }
536 
537  HOST DEVICE inline bool is_null(const Datum& d) const {
538  // assuming Datum is always uncompressed
539  switch (type) {
540  case kBOOLEAN:
541  return (int8_t)d.boolval == NULL_BOOLEAN;
542  case kTINYINT:
543  return d.tinyintval == NULL_TINYINT;
544  case kSMALLINT:
545  return d.smallintval == NULL_SMALLINT;
546  case kINT:
547  return d.intval == NULL_INT;
548  case kBIGINT:
549  case kNUMERIC:
550  case kDECIMAL:
551  return d.bigintval == NULL_BIGINT;
552  case kFLOAT:
553  return d.floatval == NULL_FLOAT;
554  case kDOUBLE:
555  return d.doubleval == NULL_DOUBLE;
556  case kTIME:
557  case kTIMESTAMP:
558  case kDATE:
559  return d.bigintval == NULL_BIGINT;
560  case kTEXT:
561  case kVARCHAR:
562  case kCHAR:
563  // @TODO handle null strings
564  break;
565  case kNULLT:
566  return true;
567  case kARRAY:
568  return d.arrayval == NULL || d.arrayval->is_null;
569  default:
570  break;
571  }
572  return false;
573  }
574  HOST DEVICE inline bool is_null(const int8_t* val) const {
575  if (type == kFLOAT) {
576  return *(float*)val == NULL_FLOAT;
577  }
578  if (type == kDOUBLE) {
579  return *(double*)val == NULL_DOUBLE;
580  }
581  // val can be either compressed or uncompressed
582  switch (size) {
583  case 1:
584  return *val == NULL_TINYINT;
585  case 2:
586  return *(int16_t*)val == NULL_SMALLINT;
587  case 4:
588  return *(int32_t*)val == NULL_INT;
589  case 8:
590  return *(int64_t*)val == NULL_BIGINT;
591  case kNULLT:
592  return true;
593  default:
594  // @TODO(wei) handle null strings
595  break;
596  }
597  return false;
598  }
599  HOST DEVICE inline bool is_null_fixlen_array(const int8_t* val, int array_size) const {
600  // Check if fixed length array has a NULL_ARRAY sentinel as the first element
601  if (type == kARRAY && val && array_size > 0 && array_size == size) {
602  // Need to create element type to get the size, but can't call get_elem_type()
603  // since this is a HOST DEVICE function. Going through copy constructor instead.
604  auto elem_ti{*this};
605  elem_ti.set_type(subtype);
606  elem_ti.set_subtype(kNULLT);
607  auto elem_size = elem_ti.get_storage_size();
608  if (elem_size < 1) {
609  return false;
610  }
611  if (subtype == kFLOAT) {
612  return *(float*)val == NULL_ARRAY_FLOAT;
613  }
614  if (subtype == kDOUBLE) {
615  return *(double*)val == NULL_ARRAY_DOUBLE;
616  }
617  switch (elem_size) {
618  case 1:
619  return *val == NULL_ARRAY_TINYINT;
620  case 2:
621  return *(int16_t*)val == NULL_ARRAY_SMALLINT;
622  case 4:
623  return *(int32_t*)val == NULL_ARRAY_INT;
624  case 8:
625  return *(int64_t*)val == NULL_ARRAY_BIGINT;
626  default:
627  return false;
628  }
629  }
630  return false;
631  }
633  return SQLTypeInfoCore(
634  subtype, dimension, scale, notnull, compression, comp_param, kNULLT);
635  }
637  return SQLTypeInfoCore(
638  kARRAY, dimension, scale, notnull, compression, comp_param, type);
639  }
640 
641  private:
642  SQLTypes type; // type id
643  SQLTypes subtype; // element type of arrays
644  int dimension; // VARCHAR/CHAR length or NUMERIC/DECIMAL precision
645  int scale; // NUMERIC/DECIMAL scale
646  bool notnull; // nullable? a hint, not used for type checking
647  EncodingType compression; // compression scheme
648  int comp_param; // compression parameter when applicable for certain schemes
649  int size; // size of the type in bytes. -1 for variable size
650 #ifndef __CUDACC__
651  static std::string type_name[kSQLTYPE_LAST];
652  static std::string comp_name[kENCODING_LAST];
653 #endif
654  HOST DEVICE inline int get_storage_size() const {
655  switch (type) {
656  case kBOOLEAN:
657  return sizeof(int8_t);
658  case kTINYINT:
659  return sizeof(int8_t);
660  case kSMALLINT:
661  switch (compression) {
662  case kENCODING_NONE:
663  return sizeof(int16_t);
664  case kENCODING_FIXED:
665  case kENCODING_SPARSE:
666  return comp_param / 8;
667  case kENCODING_RL:
668  case kENCODING_DIFF:
669  break;
670  default:
671  assert(false);
672  }
673  break;
674  case kINT:
675  switch (compression) {
676  case kENCODING_NONE:
677  return sizeof(int32_t);
678  case kENCODING_FIXED:
679  case kENCODING_SPARSE:
680  return comp_param / 8;
681  case kENCODING_RL:
682  case kENCODING_DIFF:
683  break;
684  default:
685  assert(false);
686  }
687  break;
688  case kBIGINT:
689  case kNUMERIC:
690  case kDECIMAL:
691  switch (compression) {
692  case kENCODING_NONE:
693  return sizeof(int64_t);
694  case kENCODING_FIXED:
695  case kENCODING_SPARSE:
696  return comp_param / 8;
697  case kENCODING_RL:
698  case kENCODING_DIFF:
699  break;
700  default:
701  assert(false);
702  }
703  break;
704  case kFLOAT:
705  switch (compression) {
706  case kENCODING_NONE:
707  return sizeof(float);
708  case kENCODING_FIXED:
709  case kENCODING_RL:
710  case kENCODING_DIFF:
711  case kENCODING_SPARSE:
712  assert(false);
713  break;
714  default:
715  assert(false);
716  }
717  break;
718  case kDOUBLE:
719  switch (compression) {
720  case kENCODING_NONE:
721  return sizeof(double);
722  case kENCODING_FIXED:
723  case kENCODING_RL:
724  case kENCODING_DIFF:
725  case kENCODING_SPARSE:
726  assert(false);
727  break;
728  default:
729  assert(false);
730  }
731  break;
732  case kTIMESTAMP:
733  case kTIME:
734  case kINTERVAL_DAY_TIME:
736  case kDATE:
737  switch (compression) {
738  case kENCODING_NONE:
739  return sizeof(int64_t);
740  case kENCODING_FIXED:
741  if (type == kTIMESTAMP && dimension > 0) {
742  assert(false); // disable compression for timestamp precisions
743  }
744  return comp_param / 8;
745  case kENCODING_RL:
746  case kENCODING_DIFF:
747  case kENCODING_SPARSE:
748  assert(false);
749  break;
751  switch (comp_param) {
752  case 0:
753  return 4; // Default date encoded in days is 32 bits
754  case 16:
755  case 32:
756  return comp_param / 8;
757  default:
758  assert(false);
759  break;
760  }
761  default:
762  assert(false);
763  }
764  break;
765  case kTEXT:
766  case kVARCHAR:
767  case kCHAR:
768  if (compression == kENCODING_DICT) {
769  return sizeof(int32_t); // @TODO(wei) must check DictDescriptor
770  }
771  break;
772  case kARRAY:
773  // TODO: return size for fixlen arrays?
774  break;
775  case kPOINT:
776  case kLINESTRING:
777  case kPOLYGON:
778  case kMULTIPOLYGON:
779  break;
780  default:
781  break;
782  }
783  return -1;
784  }
785 };
786 
787 #ifndef __CUDACC__
788 // todo: Get rid of preprocessor definition and move into Cuda Type Concept
789 template <template <class> class... TYPE_FACET_PACK>
790 std::string SQLTypeInfoCore<TYPE_FACET_PACK...>::type_name[kSQLTYPE_LAST] = {
791  "NULL",
792  "BOOLEAN",
793  "CHAR",
794  "VARCHAR",
795  "NUMERIC",
796  "DECIMAL",
797  "INTEGER",
798  "SMALLINT",
799  "FLOAT",
800  "DOUBLE",
801  "TIME",
802  "TIMESTAMP",
803  "BIGINT",
804  "TEXT",
805  "DATE",
806  "ARRAY",
807  "INTERVAL_DAY_TIME",
808  "INTERVAL_YEAR_MONTH",
809  "POINT",
810  "LINESTRING",
811  "POLYGON",
812  "MULTIPOLYGON",
813  "TINYINT",
814  "GEOMETRY",
815  "GEOGRAPHY"};
816 
817 template <template <class> class... TYPE_FACET_PACK>
818 std::string SQLTypeInfoCore<TYPE_FACET_PACK...>::comp_name[kENCODING_LAST] =
819  {"NONE", "FIXED", "RL", "DIFF", "DICT", "SPARSE", "COMPRESSED", "DAYS"};
820 #endif
821 
822 using SQLTypeInfo =
824 
826 
827 #ifndef __CUDACC__
828 Datum StringToDatum(const std::string& s, SQLTypeInfo& ti);
829 std::string DatumToString(Datum d, const SQLTypeInfo& ti);
830 bool DatumEqual(const Datum, const Datum, const SQLTypeInfo& ti);
831 int64_t convert_decimal_value_to_scale(const int64_t decimal_value,
832  const SQLTypeInfo& type_info,
833  const SQLTypeInfo& new_type_info);
834 #endif
835 
836 #include "../QueryEngine/DateAdd.h"
837 #include "../QueryEngine/DateTruncate.h"
838 #include "../QueryEngine/ExtractFromTime.h"
839 
841  EncodingType encoding = type_info.get_compression();
842  if (encoding == kENCODING_DATE_IN_DAYS ||
843  (encoding == kENCODING_FIXED && type_info.get_type() != kARRAY)) {
844  encoding = kENCODING_NONE;
845  }
846  return SQLTypeInfo(type_info.get_type(),
847  type_info.get_dimension(),
848  type_info.get_scale(),
849  type_info.get_notnull(),
850  encoding,
851  type_info.get_comp_param(),
852  type_info.get_subtype());
853 }
854 
855 template <class T>
856 constexpr inline int64_t inline_int_null_value() {
857  return std::is_signed<T>::value ? std::numeric_limits<T>::min()
858  : std::numeric_limits<T>::max();
859 }
860 
861 template <class T>
862 constexpr inline int64_t inline_int_null_array_value() {
863  return std::is_signed<T>::value ? std::numeric_limits<T>::min() + 1
864  : std::numeric_limits<T>::max() - 1;
865  // TODO: null_array values in signed types would step on max valid value
866  // in fixlen unsigned arrays, the max valid value may need to be lowered.
867 }
868 
869 template <class T>
870 constexpr inline int64_t max_valid_int_value() {
871  return std::is_signed<T>::value ? std::numeric_limits<T>::max()
872  : std::numeric_limits<T>::max() - 1;
873 }
874 
875 #include "InlineNullValues.h"
876 
877 using StringOffsetT = int32_t;
878 using ArrayOffsetT = int32_t;
879 
880 #endif // SQLTYPES_H
int8_t tinyintval
Definition: sqltypes.h:123
void setStandardBufferPackaging()
Definition: sqltypes.h:204
#define NULL_DOUBLE
Definition: sqltypes.h:176
void setChunkIteratorPackaging()
Definition: sqltypes.h:205
int get_physical_coord_cols() const
Definition: sqltypes.h:359
bool is_string_array() const
Definition: sqltypes.h:451
void d(const SQLTypes expected_type, const std::string &str)
Definition: ImportTest.cpp:268
HOST DEVICE SQLTypeInfoCore & operator=(const SQLTypeInfoCore &rhs)
Definition: sqltypes.h:498
bool is_time() const
Definition: sqltypes.h:456
HOST DEVICE int get_size() const
Definition: sqltypes.h:333
DEVICE VarlenDatum()
Definition: sqltypes.h:75
int get_precision() const
Definition: sqltypes.h:326
HOST DEVICE int get_dimension() const
Definition: sqltypes.h:325
Definition: sqltypes.h:51
bool is_fp() const
Definition: sqltypes.h:454
SQLTypes
Definition: sqltypes.h:40
std::vector< std::string > * stringsPtr
Definition: sqltypes.h:138
std::vector< ArrayDatum > * arraysPtr
Definition: sqltypes.h:139
EncodingType
Definition: encodetypes.h:22
bool is_null
Definition: sqltypes.h:73
#define NULL_BIGINT
Definition: sqltypes.h:174
SQLTypeInfoCore(SQLTypes t, bool n)
Definition: sqltypes.h:294
#define NULL_ARRAY_DOUBLE
Definition: sqltypes.h:184
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:330
bool boolval
Definition: sqltypes.h:122
constexpr auto is_high_precision_timestamp() const
Definition: sqltypes.h:249
constexpr int64_t inline_int_null_value()
Definition: sqltypes.h:856
void set_size(int s)
Definition: sqltypes.h:421
void c(const std::string &query_string, const ExecutorDeviceType device_type)
bool isStandardBufferPackaging() const
Definition: sqltypes.h:202
HOST DEVICE int get_scale() const
Definition: sqltypes.h:328
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:323
bool is_varlen() const
Definition: sqltypes.h:464
VarlenDatum * arrayval
Definition: sqltypes.h:129
void set_input_srid(int d)
Definition: sqltypes.h:417
HOST DEVICE bool operator!=(const SQLTypeInfoCore &rhs) const
Definition: sqltypes.h:480
constexpr auto is_datetime(T sql_type)
Definition: sqltypes.h:191
SQLTypeInfo get_logical_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:840
HOST DEVICE void set_type(SQLTypes t)
Definition: sqltypes.h:413
SQLTypes decimal_to_int_type(const SQLTypeInfo &)
Definition: Datum.cpp:268
void set_fixed_size()
Definition: sqltypes.h:422
#define NULL_ARRAY_SMALLINT
Definition: sqltypes.h:180
#define NULL_ARRAY_TINYINT
Definition: sqltypes.h:179
std::string get_compression_name() const
Definition: sqltypes.h:448
bool is_castable(const SQLTypeInfoCore &new_type_info) const
Definition: sqltypes.h:510
void set_dimension(int d)
Definition: sqltypes.h:415
constexpr auto is_timestamp() const
Definition: sqltypes.h:260
int get_array_context_logical_size() const
Definition: sqltypes.h:214
SQLTypeInfoCore(SQLTypes t, int d, int s)
Definition: sqltypes.h:293
void set_scale(int s)
Definition: sqltypes.h:418
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:331
void set_compression(EncodingType c)
Definition: sqltypes.h:423
int32_t intval
Definition: sqltypes.h:125
std::string to_string(char const *&&v)
HostArrayDatum(size_t const l, int8_t *p, bool const n, CUSTOM_DELETER custom_deleter)
Definition: sqltypes.h:109
void set_notnull(bool n)
Definition: sqltypes.h:420
void set_output_srid(int s)
Definition: sqltypes.h:419
int8_t * pointer
Definition: sqltypes.h:72
int32_t StringOffsetT
Definition: sqltypes.h:877
bool DatumEqual(const Datum, const Datum, const SQLTypeInfo &ti)
Definition: Datum.cpp:153
#define DEVICE
constexpr int64_t max_valid_int_value()
Definition: sqltypes.h:870
#define HOST
std::string get_type_name() const
Definition: sqltypes.h:426
float floatval
Definition: sqltypes.h:127
HOST DEVICE void set_subtype(SQLTypes st)
Definition: sqltypes.h:414
bool is_array() const
Definition: sqltypes.h:458
void set_precision(int d)
Definition: sqltypes.h:416
bool is_timeinterval() const
Definition: sqltypes.h:461
bool is_integer() const
Definition: sqltypes.h:452
HOST DEVICE int get_storage_size() const
Definition: sqltypes.h:654
#define IS_INTERVAL(T)
Definition: sqltypes.h:166
std::shared_ptr< int8_t > ManagedPtr
Definition: sqltypes.h:90
#define NULL_TINYINT
Definition: sqltypes.h:171
VarlenDatum(const size_t l, int8_t *p, const bool n)
Definition: sqltypes.h:78
bool is_varlen_array() const
Definition: sqltypes.h:459
bool is_decimal() const
Definition: sqltypes.h:453
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:324
constexpr auto is_date_in_days() const
Definition: sqltypes.h:230
int64_t bigintval
Definition: sqltypes.h:126
SQLTypeInfoCore(SQLTypes t, bool n, EncodingType c)
Definition: sqltypes.h:304
#define NULL_FLOAT
Definition: sqltypes.h:175
HostArrayDatum(size_t const l, int8_t *p, CUSTOM_DELETER custom_deleter)
Definition: sqltypes.h:103
bool is_dict_encoded_string() const
Definition: sqltypes.h:476
HOST DEVICE int get_input_srid() const
Definition: sqltypes.h:327
constexpr int64_t inline_int_null_array_value()
Definition: sqltypes.h:862
ManagedPtr data_ptr
Definition: sqltypes.h:112
bool has_render_group() const
Definition: sqltypes.h:403
int16_t smallintval
Definition: sqltypes.h:124
#define NULL_ARRAY_INT
Definition: sqltypes.h:181
std::string DatumToString(Datum d, const SQLTypeInfo &ti)
Definition: Datum.cpp:193
PackagingType packaging_type_
Definition: sqltypes.h:208
SQLTypeInfoCore get_elem_type() const
Definition: sqltypes.h:632
#define NULL_INT
Definition: sqltypes.h:173
HostArrayDatum(size_t const l, int8_t *p, bool const n)
Definition: sqltypes.h:97
void operator()(int8_t *p)
Definition: sqltypes.h:86
SQLTypeInfoCore< ArrayContextTypeSizer, ExecutorTypePackaging, DateTimeFacilities > SQLTypeInfo
Definition: sqltypes.h:823
std::string * stringval
Definition: sqltypes.h:131
DEVICE DeviceArrayDatum()
Definition: sqltypes.h:116
bool is_boolean() const
Definition: sqltypes.h:457
SQLTypes subtype
Definition: sqltypes.h:643
Definition: sqltypes.h:54
Definition: sqltypes.h:55
HOST DEVICE int get_output_srid() const
Definition: sqltypes.h:329
HOST DEVICE bool is_null(const int8_t *val) const
Definition: sqltypes.h:574
HOST DEVICE bool operator==(const SQLTypeInfoCore &rhs) const
Definition: sqltypes.h:488
int32_t ArrayOffsetT
Definition: sqltypes.h:878
bool is_varlen_indeed() const
Definition: sqltypes.h:470
#define NULL_ARRAY_BIGINT
Definition: sqltypes.h:182
SQLTypeInfoCore get_array_type() const
Definition: sqltypes.h:636
#define IS_INTEGER(T)
Definition: sqltypes.h:158
Definition: sqltypes.h:43
#define IS_STRING(T)
Definition: sqltypes.h:163
SQLTypeInfoCore(SQLTypes t)
Definition: sqltypes.h:303
int get_logical_size() const
Definition: sqltypes.h:334
void set_comp_param(int p)
Definition: sqltypes.h:424
HOST DEVICE int get_comp_param() const
Definition: sqltypes.h:332
bool isChunkIteratorPackaging() const
Definition: sqltypes.h:203
Datum StringToDatum(const std::string &s, SQLTypeInfo &ti)
Definition: Datum.cpp:90
bool is_number() const
Definition: sqltypes.h:455
virtual DEVICE ~VarlenDatum()
Definition: sqltypes.h:76
#define NULL_SMALLINT
Definition: sqltypes.h:172
int get_physical_cols() const
Definition: sqltypes.h:344
constexpr auto is_date() const
Definition: sqltypes.h:241
bool is_geometry() const
Definition: sqltypes.h:462
FileBuffer Chunk
A Chunk is the fundamental unit of execution in Map-D.
Definition: FileMgr.h:68
HostArrayDatum(size_t const l, ManagedPtr p, bool const n)
Definition: sqltypes.h:94
Definition: sqltypes.h:47
int8_t * numbersPtr
Definition: sqltypes.h:137
SQLTypes type
Definition: sqltypes.h:642
#define IS_NUMBER(T)
Definition: sqltypes.h:160
bool is_string() const
Definition: sqltypes.h:450
void operator()(int8_t *)
Definition: sqltypes.h:83
#define IS_GEO(T)
Definition: sqltypes.h:164
#define TRANSIENT_DICT(ID)
Definition: sqltypes.h:187
bool has_bounds() const
Definition: sqltypes.h:392
EncodingType compression
Definition: sqltypes.h:647
#define NULL_BOOLEAN
Definition: sqltypes.h:170
HOST DEVICE bool is_null(const Datum &d) const
Definition: sqltypes.h:537
SQLTypeInfoCore(SQLTypes t, int d, int s, bool n)
Definition: sqltypes.h:284
HOST DEVICE bool is_null_fixlen_array(const int8_t *val, int array_size) const
Definition: sqltypes.h:599
std::conditional_t< isCudaCC(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:119
SQLTypeInfoCore(SQLTypes t, int d, int s, bool n, EncodingType c, int p, SQLTypes st)
Definition: sqltypes.h:275
double doubleval
Definition: sqltypes.h:128
int64_t convert_decimal_value_to_scale(const int64_t decimal_value, const SQLTypeInfo &type_info, const SQLTypeInfo &new_type_info)
Definition: Datum.cpp:284
size_t length
Definition: sqltypes.h:71
#define NULL_ARRAY_FLOAT
Definition: sqltypes.h:183
bool is_fixlen_array() const
Definition: sqltypes.h:460