OmniSciDB  95562058bd
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
sqltypes.h
Go to the documentation of this file.
1 /*
2  * Copyright 2020 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
23 #pragma once
24 
25 #include "Logger/Logger.h"
26 #include "StringTransform.h"
27 #include "funcannotations.h"
28 
29 #include <cassert>
30 #include <cfloat>
31 #include <cstdint>
32 #include <ctime>
33 #include <limits>
34 #include <memory>
35 #include <string>
36 #include <type_traits>
37 #include <vector>
38 
39 // must not change because these values persist in catalogs.
40 enum SQLTypes {
41  kNULLT = 0, // type for null values
42  kBOOLEAN = 1,
43  kCHAR = 2,
44  kVARCHAR = 3,
45  kNUMERIC = 4,
46  kDECIMAL = 5,
47  kINT = 6,
48  kSMALLINT = 7,
49  kFLOAT = 8,
50  kDOUBLE = 9,
51  kTIME = 10,
52  kTIMESTAMP = 11,
53  kBIGINT = 12,
54  kTEXT = 13,
55  kDATE = 14,
56  kARRAY = 15,
59  kPOINT = 18,
61  kPOLYGON = 20,
63  kTINYINT = 22,
64  kGEOMETRY = 23,
65  kGEOGRAPHY = 24,
66  kEVAL_CONTEXT_TYPE = 25, // Placeholder Type for ANY
67  kVOID = 26,
68  kCURSOR = 27,
69  kCOLUMN = 28,
71 };
72 
73 struct VarlenDatum {
74  size_t length;
75  int8_t* pointer;
76  bool is_null;
77 
78  DEVICE VarlenDatum() : length(0), pointer(nullptr), is_null(true) {}
79  DEVICE virtual ~VarlenDatum() {}
80 
81  VarlenDatum(const size_t l, int8_t* p, const bool n)
82  : length(l), pointer(p), is_null(n) {}
83 };
84 
86  void operator()(int8_t*) {}
87 };
88 struct FreeDeleter {
89  void operator()(int8_t* p) { free(p); }
90 };
91 
92 struct HostArrayDatum : public VarlenDatum {
93  using ManagedPtr = std::shared_ptr<int8_t>;
94 
95  HostArrayDatum() = default;
96 
97  HostArrayDatum(size_t const l, ManagedPtr p, bool const n)
98  : VarlenDatum(l, p.get(), n), data_ptr(p) {}
99 
100  HostArrayDatum(size_t const l, int8_t* p, bool const n)
101  : VarlenDatum(l, p, n), data_ptr(p, FreeDeleter()){};
102 
103  template <typename CUSTOM_DELETER,
104  typename = std::enable_if_t<
105  std::is_void<std::result_of_t<CUSTOM_DELETER(int8_t*)> >::value> >
106  HostArrayDatum(size_t const l, int8_t* p, CUSTOM_DELETER custom_deleter)
107  : VarlenDatum(l, p, 0 == l), data_ptr(p, custom_deleter) {}
108 
109  template <typename CUSTOM_DELETER,
110  typename = std::enable_if_t<
111  std::is_void<std::result_of_t<CUSTOM_DELETER(int8_t*)> >::value> >
112  HostArrayDatum(size_t const l, int8_t* p, bool const n, CUSTOM_DELETER custom_deleter)
113  : VarlenDatum(l, p, n), data_ptr(p, custom_deleter) {}
114 
116 };
117 
118 struct DeviceArrayDatum : public VarlenDatum {
120 };
121 
122 inline DEVICE constexpr bool is_cuda_compiler() {
123 #ifdef __CUDACC__
124  return true;
125 #else
126  return false;
127 #endif
128 }
129 
130 using ArrayDatum =
131  std::conditional_t<is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum>;
132 
133 union Datum {
134  bool boolval;
135  int8_t tinyintval;
136  int16_t smallintval;
137  int32_t intval;
138  int64_t bigintval;
139  float floatval;
140  double doubleval;
142 #ifndef __CUDACC__
143  std::string* stringval; // string value
144 #endif
145 };
146 
147 #ifndef __CUDACC__
149  int8_t* numbersPtr;
150  std::vector<std::string>* stringsPtr;
151  std::vector<ArrayDatum>* arraysPtr;
152 };
153 #endif
154 
155 // must not change because these values persist in catalogs.
157  kENCODING_NONE = 0, // no encoding
158  kENCODING_FIXED = 1, // Fixed-bit encoding
159  kENCODING_RL = 2, // Run Length encoding
160  kENCODING_DIFF = 3, // Differential encoding
161  kENCODING_DICT = 4, // Dictionary encoding
162  kENCODING_SPARSE = 5, // Null encoding for sparse columns
163  kENCODING_GEOINT = 6, // Encoding coordinates as intergers
164  kENCODING_DATE_IN_DAYS = 7, // Date encoding in days
166 };
167 
168 #define IS_INTEGER(T) \
169  (((T) == kINT) || ((T) == kSMALLINT) || ((T) == kBIGINT) || ((T) == kTINYINT))
170 #define IS_NUMBER(T) \
171  (((T) == kINT) || ((T) == kSMALLINT) || ((T) == kDOUBLE) || ((T) == kFLOAT) || \
172  ((T) == kBIGINT) || ((T) == kNUMERIC) || ((T) == kDECIMAL) || ((T) == kTINYINT))
173 #define IS_STRING(T) (((T) == kTEXT) || ((T) == kVARCHAR) || ((T) == kCHAR))
174 #define IS_GEO(T) \
175  (((T) == kPOINT) || ((T) == kLINESTRING) || ((T) == kPOLYGON) || ((T) == kMULTIPOLYGON))
176 #define IS_INTERVAL(T) ((T) == kINTERVAL_DAY_TIME || (T) == kINTERVAL_YEAR_MONTH)
177 #define IS_DECIMAL(T) ((T) == kNUMERIC || (T) == kDECIMAL)
178 #define IS_GEO_POLY(T) (((T) == kPOLYGON) || ((T) == kMULTIPOLYGON))
179 
180 #define NULL_BOOLEAN INT8_MIN
181 #define NULL_TINYINT INT8_MIN
182 #define NULL_SMALLINT INT16_MIN
183 #define NULL_INT INT32_MIN
184 #define NULL_BIGINT INT64_MIN
185 #define NULL_FLOAT FLT_MIN
186 #define NULL_DOUBLE DBL_MIN
187 
188 #define NULL_ARRAY_BOOLEAN (INT8_MIN + 1)
189 #define NULL_ARRAY_TINYINT (INT8_MIN + 1)
190 #define NULL_ARRAY_SMALLINT (INT16_MIN + 1)
191 #define NULL_ARRAY_INT (INT32_MIN + 1)
192 #define NULL_ARRAY_BIGINT (INT64_MIN + 1)
193 #define NULL_ARRAY_FLOAT (FLT_MIN * 2.0)
194 #define NULL_ARRAY_DOUBLE (DBL_MIN * 2.0)
195 
196 #define NULL_ARRAY_COMPRESSED_32 0x80000000U
197 
198 #define TRANSIENT_DICT_ID 0
199 #define TRANSIENT_DICT(ID) (-(ID))
200 #define REGULAR_DICT(TRANSIENTID) (-(TRANSIENTID))
201 
202 constexpr auto is_datetime(SQLTypes type) {
203  return type == kTIME || type == kTIMESTAMP || type == kDATE;
204 }
205 
206 // @type SQLTypeInfo
207 // @brief a structure to capture all type information including
208 // length, precision, scale, etc.
209 class SQLTypeInfo {
210  public:
211  SQLTypeInfo(SQLTypes t, int d, int s, bool n, EncodingType c, int p, SQLTypes st)
212  : type(t)
213  , subtype(st)
214  , dimension(d)
215  , scale(s)
216  , notnull(n)
217  , compression(c)
218  , comp_param(p)
219  , size(get_storage_size()) {}
220  SQLTypeInfo(SQLTypes t, int d, int s, bool n)
221  : type(t)
222  , subtype(kNULLT)
223  , dimension(d)
224  , scale(s)
225  , notnull(n)
227  , comp_param(0)
228  , size(get_storage_size()) {}
229  SQLTypeInfo(SQLTypes t, int d, int s) : SQLTypeInfo(t, d, s, false) {}
231  : type(t)
232  , subtype(kNULLT)
233  , dimension(0)
234  , scale(0)
235  , notnull(n)
237  , comp_param(0)
238  , size(get_storage_size()) {}
241  : type(t)
242  , subtype(kNULLT)
243  , dimension(0)
244  , scale(0)
245  , notnull(n)
246  , compression(c)
247  , comp_param(0)
248  , size(get_storage_size()) {}
250  : type(kNULLT)
251  , subtype(kNULLT)
252  , dimension(0)
253  , scale(0)
254  , notnull(false)
256  , comp_param(0)
257  , size(0) {}
258 
259  HOST DEVICE inline SQLTypes get_type() const { return type; }
260  HOST DEVICE inline SQLTypes get_subtype() const { return subtype; }
261  HOST DEVICE inline int get_dimension() const { return dimension; }
262  inline int get_precision() const { return dimension; }
263  HOST DEVICE inline int get_input_srid() const { return dimension; }
264  HOST DEVICE inline int get_scale() const { return scale; }
265  HOST DEVICE inline int get_output_srid() const { return scale; }
266  HOST DEVICE inline bool get_notnull() const { return notnull; }
268  HOST DEVICE inline int get_comp_param() const { return comp_param; }
269  HOST DEVICE inline int get_size() const { return size; }
270  inline int get_logical_size() const {
273  return ti.get_size();
274  }
275  if (compression == kENCODING_DICT) {
276  return 4;
277  }
278  return get_size();
279  }
280  inline int get_physical_cols() const {
281  switch (type) {
282  case kPOINT:
283  return 1; // coords
284  case kLINESTRING:
285  return 2; // coords, bounds
286  case kPOLYGON:
287  return 4; // coords, ring_sizes, bounds, render_group
288  case kMULTIPOLYGON:
289  return 5; // coords, ring_sizes, poly_rings, bounds, render_group
290  default:
291  break;
292  }
293  return 0;
294  }
295  inline int get_physical_coord_cols() const {
296  // @TODO dmitri/simon rename this function?
297  // It needs to return the number of extra columns
298  // which need to go through the executor, as opposed
299  // to those which are only needed by CPU for poly
300  // cache building or what-not. For now, we just omit
301  // the Render Group column. If we add Bounding Box
302  // or something this may require rethinking. Perhaps
303  // these two functions need to return an array of
304  // offsets rather than just a number to loop over,
305  // so that executor and non-executor columns can
306  // be mixed.
307  // NOTE(adb): In binding to extension functions, we need to know some pretty specific
308  // type info about each of the physical coords cols for each geo type. I added checks
309  // there to ensure the physical coords col for the geo type match what we expect. If
310  // these values are ever changed, corresponding values in
311  // ExtensionFunctionsBinding.cpp::compute_narrowing_conv_scores and
312  // ExtensionFunctionsBinding.cpp::compute_widening_conv_scores will also need to be
313  // changed.
314  switch (type) {
315  case kPOINT:
316  return 1;
317  case kLINESTRING:
318  return 1; // omit bounds
319  case kPOLYGON:
320  return 2; // omit bounds, render group
321  case kMULTIPOLYGON:
322  return 3; // omit bounds, render group
323  default:
324  break;
325  }
326  return 0;
327  }
328  inline bool has_bounds() const {
329  switch (type) {
330  case kLINESTRING:
331  case kPOLYGON:
332  case kMULTIPOLYGON:
333  return true;
334  default:
335  break;
336  }
337  return false;
338  }
339  inline bool has_render_group() const {
340  switch (type) {
341  case kPOLYGON:
342  case kMULTIPOLYGON:
343  return true;
344  default:
345  break;
346  }
347  return false;
348  }
349  HOST DEVICE inline void set_type(SQLTypes t) { type = t; }
350  HOST DEVICE inline void set_subtype(SQLTypes st) { subtype = st; }
351  inline void set_dimension(int d) { dimension = d; }
352  inline void set_precision(int d) { dimension = d; }
353  inline void set_input_srid(int d) { dimension = d; }
354  inline void set_scale(int s) { scale = s; }
355  inline void set_output_srid(int s) { scale = s; }
356  inline void set_notnull(bool n) { notnull = n; }
357  inline void set_size(int s) { size = s; }
358  inline void set_fixed_size() { size = get_storage_size(); }
359  inline void set_compression(EncodingType c) { compression = c; }
360  inline void set_comp_param(int p) { comp_param = p; }
361 #ifndef __CUDACC__
362  inline std::string get_type_name() const {
363  if (IS_GEO(type)) {
364  std::string srid_string = "";
365  if (get_output_srid() > 0) {
366  srid_string = ", " + std::to_string(get_output_srid());
367  }
368  CHECK_LT(static_cast<int>(subtype), kSQLTYPE_LAST);
369  return type_name[static_cast<int>(subtype)] + "(" +
370  type_name[static_cast<int>(type)] + srid_string + ")";
371  }
372  std::string ps = "";
373  if (type == kDECIMAL || type == kNUMERIC || subtype == kDECIMAL ||
374  subtype == kNUMERIC) {
375  ps = "(" + std::to_string(dimension) + "," + std::to_string(scale) + ")";
376  } else if (type == kTIMESTAMP) {
377  ps = "(" + std::to_string(dimension) + ")";
378  }
379  if (type == kARRAY) {
380  auto elem_ti = get_elem_type();
381  auto num_elems = (size > 0) ? std::to_string(size / elem_ti.get_size()) : "";
382  CHECK_LT(static_cast<int>(subtype), kSQLTYPE_LAST);
383  return type_name[static_cast<int>(subtype)] + ps + "[" + num_elems + "]";
384  }
385  if (type == kCOLUMN) {
386  auto elem_ti = get_elem_type();
387  auto num_elems =
388  (size > 0) ? "[" + std::to_string(size / elem_ti.get_size()) + "]" : "";
389  CHECK_LT(static_cast<int>(subtype), kSQLTYPE_LAST);
390  return "COLUMN<" + type_name[static_cast<int>(subtype)] + ps + ">" + num_elems;
391  }
392  return type_name[static_cast<int>(type)] + ps;
393  }
394  inline std::string get_compression_name() const { return comp_name[(int)compression]; }
395  inline std::string to_string() const {
396  return concat("(",
397  type_name[static_cast<int>(type)],
398  ", ",
399  get_dimension(),
400  ", ",
401  get_scale(),
402  ", ",
403  get_notnull() ? "not nullable" : "nullable",
404  ", ",
406  ", ",
407  get_comp_param(),
408  ", ",
409  type_name[static_cast<int>(subtype)],
410  ": ",
411  get_size(),
412  ": ",
414  ")");
415  }
416 #endif
417  inline bool is_string() const { return IS_STRING(type); }
418  inline bool is_string_array() const { return (type == kARRAY) && IS_STRING(subtype); }
419  inline bool is_integer() const { return IS_INTEGER(type); }
420  inline bool is_decimal() const { return type == kDECIMAL || type == kNUMERIC; }
421  inline bool is_fp() const { return type == kFLOAT || type == kDOUBLE; }
422  inline bool is_number() const { return IS_NUMBER(type); }
423  inline bool is_time() const { return is_datetime(type); }
424  inline bool is_boolean() const { return type == kBOOLEAN; }
425  inline bool is_array() const { return type == kARRAY; }
426  inline bool is_varlen_array() const { return type == kARRAY && size <= 0; }
427  inline bool is_fixlen_array() const { return type == kARRAY && size > 0; }
428  inline bool is_timeinterval() const { return IS_INTERVAL(type); }
429  inline bool is_geometry() const { return IS_GEO(type); }
430  inline bool is_column() const { return type == kCOLUMN; }
431 
432  inline bool is_varlen() const { // TODO: logically this should ignore fixlen arrays
433  return (IS_STRING(type) && compression != kENCODING_DICT) || type == kARRAY ||
434  IS_GEO(type);
435  }
436 
437  // need this here till is_varlen can be fixed w/o negative impact to existing code
438  inline bool is_varlen_indeed() const {
439  // SQLTypeInfo.is_varlen() is broken with fixedlen array now
440  // and seems left broken for some concern, so fix it locally
441  return is_varlen() && !is_fixlen_array();
442  }
443 
444  inline bool is_dict_encoded_string() const {
445  return is_string() && compression == kENCODING_DICT;
446  }
447 
448  HOST DEVICE inline bool operator!=(const SQLTypeInfo& rhs) const {
449  return type != rhs.get_type() || subtype != rhs.get_subtype() ||
450  dimension != rhs.get_dimension() || scale != rhs.get_scale() ||
451  compression != rhs.get_compression() ||
454  notnull != rhs.get_notnull();
455  }
456  HOST DEVICE inline bool operator==(const SQLTypeInfo& rhs) const {
457  return type == rhs.get_type() && subtype == rhs.get_subtype() &&
458  dimension == rhs.get_dimension() && scale == rhs.get_scale() &&
459  compression == rhs.get_compression() &&
462  notnull == rhs.get_notnull();
463  }
464 
465  inline int get_array_context_logical_size() const {
466  if (is_string()) {
467  auto comp_type(get_compression());
468  if (comp_type == kENCODING_DICT || comp_type == kENCODING_FIXED ||
469  comp_type == kENCODING_NONE) {
470  return sizeof(int32_t);
471  }
472  }
473  return get_logical_size();
474  }
475 
476  HOST DEVICE inline void operator=(const SQLTypeInfo& rhs) {
477  type = rhs.get_type();
478  subtype = rhs.get_subtype();
479  dimension = rhs.get_dimension();
480  scale = rhs.get_scale();
481  notnull = rhs.get_notnull();
483  comp_param = rhs.get_comp_param();
484  size = rhs.get_size();
485  }
486 
487  inline bool is_castable(const SQLTypeInfo& new_type_info) const {
488  // can always cast between the same type but different precision/scale/encodings
489  if (type == new_type_info.get_type()) {
490  return true;
491  // can always cast from or to string
492  } else if (is_string() || new_type_info.is_string()) {
493  return true;
494  // can cast between numbers
495  } else if (is_number() && new_type_info.is_number()) {
496  return true;
497  // can cast from timestamp or date to number (epoch)
498  } else if ((type == kTIMESTAMP || type == kDATE) && new_type_info.is_number()) {
499  return true;
500  // can cast from date to timestamp
501  } else if (type == kDATE && new_type_info.get_type() == kTIMESTAMP) {
502  return true;
503  } else if (type == kTIMESTAMP && new_type_info.get_type() == kDATE) {
504  return true;
505  } else if (type == kBOOLEAN && new_type_info.is_number()) {
506  return true;
507  } else if (type == kARRAY && new_type_info.get_type() == kARRAY) {
508  return get_elem_type().is_castable(new_type_info.get_elem_type());
509  } else if (type == kCOLUMN && new_type_info.get_type() == kCOLUMN) {
510  return get_elem_type().is_castable(new_type_info.get_elem_type());
511  } else {
512  return false;
513  }
514  }
515 
516  HOST DEVICE inline bool is_null(const Datum& d) const {
517  // assuming Datum is always uncompressed
518  switch (type) {
519  case kBOOLEAN:
520  return (int8_t)d.boolval == NULL_BOOLEAN;
521  case kTINYINT:
522  return d.tinyintval == NULL_TINYINT;
523  case kSMALLINT:
524  return d.smallintval == NULL_SMALLINT;
525  case kINT:
526  return d.intval == NULL_INT;
527  case kBIGINT:
528  case kNUMERIC:
529  case kDECIMAL:
530  return d.bigintval == NULL_BIGINT;
531  case kFLOAT:
532  return d.floatval == NULL_FLOAT;
533  case kDOUBLE:
534  return d.doubleval == NULL_DOUBLE;
535  case kTIME:
536  case kTIMESTAMP:
537  case kDATE:
538  return d.bigintval == NULL_BIGINT;
539  case kTEXT:
540  case kVARCHAR:
541  case kCHAR:
542  // @TODO handle null strings
543  break;
544  case kNULLT:
545  return true;
546  case kARRAY:
547  return d.arrayval == NULL || d.arrayval->is_null;
548  default:
549  break;
550  }
551  return false;
552  }
553  HOST DEVICE inline bool is_null(const int8_t* val) const {
554  if (type == kFLOAT) {
555  return *(float*)val == NULL_FLOAT;
556  }
557  if (type == kDOUBLE) {
558  return *(double*)val == NULL_DOUBLE;
559  }
560  // val can be either compressed or uncompressed
561  switch (size) {
562  case 1:
563  return *val == NULL_TINYINT;
564  case 2:
565  return *(int16_t*)val == NULL_SMALLINT;
566  case 4:
567  return *(int32_t*)val == NULL_INT;
568  case 8:
569  return *(int64_t*)val == NULL_BIGINT;
570  case kNULLT:
571  return true;
572  default:
573  // @TODO(wei) handle null strings
574  break;
575  }
576  return false;
577  }
578  HOST DEVICE inline bool is_null_fixlen_array(const int8_t* val, int array_size) const {
579  // Check if fixed length array has a NULL_ARRAY sentinel as the first element
580  if (type == kARRAY && val && array_size > 0 && array_size == size) {
581  // Need to create element type to get the size, but can't call get_elem_type()
582  // since this is a HOST DEVICE function. Going through copy constructor instead.
583  auto elem_ti{*this};
584  elem_ti.set_type(subtype);
585  elem_ti.set_subtype(kNULLT);
586  auto elem_size = elem_ti.get_storage_size();
587  if (elem_size < 1) {
588  return false;
589  }
590  if (subtype == kFLOAT) {
591  return *(float*)val == NULL_ARRAY_FLOAT;
592  }
593  if (subtype == kDOUBLE) {
594  return *(double*)val == NULL_ARRAY_DOUBLE;
595  }
596  switch (elem_size) {
597  case 1:
598  return *val == NULL_ARRAY_TINYINT;
599  case 2:
600  return *(int16_t*)val == NULL_ARRAY_SMALLINT;
601  case 4:
602  return *(int32_t*)val == NULL_ARRAY_INT;
603  case 8:
604  return *(int64_t*)val == NULL_ARRAY_BIGINT;
605  default:
606  return false;
607  }
608  }
609  return false;
610  }
611  HOST DEVICE inline bool is_null_point_coord_array(const int8_t* val,
612  int array_size) const {
613  if (type == kARRAY && subtype == kTINYINT && val && array_size > 0 &&
614  array_size == size) {
615  if (array_size == 2 * sizeof(double)) {
616  return *(double*)val == NULL_ARRAY_DOUBLE;
617  }
618  if (array_size == 2 * sizeof(int32_t)) {
619  return *(uint32_t*)val == NULL_ARRAY_COMPRESSED_32;
620  }
621  }
622  return false;
623  }
624  inline SQLTypeInfo get_elem_type() const {
625  return SQLTypeInfo(
627  }
628  inline SQLTypeInfo get_array_type() const {
630  }
631 
632  inline bool is_date_in_days() const {
633  if (type == kDATE) {
634  const auto comp_type = get_compression();
635  if (comp_type == kENCODING_DATE_IN_DAYS) {
636  return true;
637  }
638  }
639  return false;
640  }
641 
642  inline bool is_date() const { return type == kDATE; }
643 
644  inline bool is_high_precision_timestamp() const {
645  if (type == kTIMESTAMP) {
646  const auto dimension = get_dimension();
647  if (dimension > 0) {
648  return true;
649  }
650  }
651  return false;
652  }
653 
654  inline bool is_timestamp() const { return type == kTIMESTAMP; }
655 
656  private:
657  SQLTypes type; // type id
658  SQLTypes subtype; // element type of arrays
659  int dimension; // VARCHAR/CHAR length or NUMERIC/DECIMAL precision
660  int scale; // NUMERIC/DECIMAL scale
661  bool notnull; // nullable? a hint, not used for type checking
662  EncodingType compression; // compression scheme
663  int comp_param; // compression parameter when applicable for certain schemes
664  int size; // size of the type in bytes. -1 for variable size
665 #ifndef __CUDACC__
666  static std::string type_name[kSQLTYPE_LAST];
667  static std::string comp_name[kENCODING_LAST];
668 #endif
669  HOST DEVICE inline int get_storage_size() const {
670  switch (type) {
671  case kBOOLEAN:
672  return sizeof(int8_t);
673  case kTINYINT:
674  return sizeof(int8_t);
675  case kSMALLINT:
676  switch (compression) {
677  case kENCODING_NONE:
678  return sizeof(int16_t);
679  case kENCODING_FIXED:
680  case kENCODING_SPARSE:
681  return comp_param / 8;
682  case kENCODING_RL:
683  case kENCODING_DIFF:
684  break;
685  default:
686  assert(false);
687  }
688  break;
689  case kINT:
690  switch (compression) {
691  case kENCODING_NONE:
692  return sizeof(int32_t);
693  case kENCODING_FIXED:
694  case kENCODING_SPARSE:
695  return comp_param / 8;
696  case kENCODING_RL:
697  case kENCODING_DIFF:
698  break;
699  default:
700  assert(false);
701  }
702  break;
703  case kBIGINT:
704  case kNUMERIC:
705  case kDECIMAL:
706  switch (compression) {
707  case kENCODING_NONE:
708  return sizeof(int64_t);
709  case kENCODING_FIXED:
710  case kENCODING_SPARSE:
711  return comp_param / 8;
712  case kENCODING_RL:
713  case kENCODING_DIFF:
714  break;
715  default:
716  assert(false);
717  }
718  break;
719  case kFLOAT:
720  switch (compression) {
721  case kENCODING_NONE:
722  return sizeof(float);
723  case kENCODING_FIXED:
724  case kENCODING_RL:
725  case kENCODING_DIFF:
726  case kENCODING_SPARSE:
727  assert(false);
728  break;
729  default:
730  assert(false);
731  }
732  break;
733  case kDOUBLE:
734  switch (compression) {
735  case kENCODING_NONE:
736  return sizeof(double);
737  case kENCODING_FIXED:
738  case kENCODING_RL:
739  case kENCODING_DIFF:
740  case kENCODING_SPARSE:
741  assert(false);
742  break;
743  default:
744  assert(false);
745  }
746  break;
747  case kTIMESTAMP:
748  case kTIME:
749  case kINTERVAL_DAY_TIME:
751  case kDATE:
752  switch (compression) {
753  case kENCODING_NONE:
754  return sizeof(int64_t);
755  case kENCODING_FIXED:
756  if (type == kTIMESTAMP && dimension > 0) {
757  assert(false); // disable compression for timestamp precisions
758  }
759  return comp_param / 8;
760  case kENCODING_RL:
761  case kENCODING_DIFF:
762  case kENCODING_SPARSE:
763  assert(false);
764  break;
766  switch (comp_param) {
767  case 0:
768  return 4; // Default date encoded in days is 32 bits
769  case 16:
770  case 32:
771  return comp_param / 8;
772  default:
773  assert(false);
774  break;
775  }
776  default:
777  assert(false);
778  }
779  break;
780  case kTEXT:
781  case kVARCHAR:
782  case kCHAR:
783  if (compression == kENCODING_DICT) {
784  return sizeof(int32_t); // @TODO(wei) must check DictDescriptor
785  }
786  break;
787  case kARRAY:
788  // TODO: return size for fixlen arrays?
789  break;
790  case kPOINT:
791  case kLINESTRING:
792  case kPOLYGON:
793  case kMULTIPOLYGON:
794  case kCOLUMN:
795  break;
796  default:
797  break;
798  }
799  return -1;
800  }
801 };
802 
804 
805 #ifndef __CUDACC__
806 #include <string_view>
807 
808 Datum StringToDatum(std::string_view s, SQLTypeInfo& ti);
809 std::string DatumToString(Datum d, const SQLTypeInfo& ti);
810 bool DatumEqual(const Datum, const Datum, const SQLTypeInfo& ti);
811 int64_t convert_decimal_value_to_scale(const int64_t decimal_value,
812  const SQLTypeInfo& type_info,
813  const SQLTypeInfo& new_type_info);
814 #endif
815 
816 #include "../QueryEngine/DateAdd.h"
817 #include "../QueryEngine/DateTruncate.h"
818 #include "../QueryEngine/ExtractFromTime.h"
819 
821  EncodingType encoding = type_info.get_compression();
822  if (encoding == kENCODING_DATE_IN_DAYS ||
823  (encoding == kENCODING_FIXED && type_info.get_type() != kARRAY)) {
824  encoding = kENCODING_NONE;
825  }
826  return SQLTypeInfo(type_info.get_type(),
827  type_info.get_dimension(),
828  type_info.get_scale(),
829  type_info.get_notnull(),
830  encoding,
831  type_info.get_comp_param(),
832  type_info.get_subtype());
833 }
834 
836  SQLTypeInfo nullable_type_info = type_info;
837  nullable_type_info.set_notnull(false);
838  return nullable_type_info;
839 }
840 
842  SQLTypeInfo nullable_type_info = get_logical_type_info(type_info);
843  return get_nullable_type_info(nullable_type_info);
844 }
845 
846 template <class T>
847 constexpr inline int64_t inline_int_null_value() {
848  return std::is_signed<T>::value ? std::numeric_limits<T>::min()
849  : std::numeric_limits<T>::max();
850 }
851 
852 template <class T>
853 constexpr inline int64_t inline_int_null_array_value() {
854  return std::is_signed<T>::value ? std::numeric_limits<T>::min() + 1
855  : std::numeric_limits<T>::max() - 1;
856  // TODO: null_array values in signed types would step on max valid value
857  // in fixlen unsigned arrays, the max valid value may need to be lowered.
858 }
859 
860 template <class T>
861 constexpr inline int64_t max_valid_int_value() {
862  return std::is_signed<T>::value ? std::numeric_limits<T>::max()
863  : std::numeric_limits<T>::max() - 1;
864 }
865 
866 #include "InlineNullValues.h"
867 
868 using StringOffsetT = int32_t;
869 using ArrayOffsetT = int32_t;
870 
871 inline int8_t* appendDatum(int8_t* buf, Datum d, const SQLTypeInfo& ti) {
872  switch (ti.get_type()) {
873  case kBOOLEAN:
874  *(bool*)buf = d.boolval;
875  return buf + sizeof(bool);
876  case kNUMERIC:
877  case kDECIMAL:
878  case kBIGINT:
879  *(int64_t*)buf = d.bigintval;
880  return buf + sizeof(int64_t);
881  case kINT:
882  *(int32_t*)buf = d.intval;
883  return buf + sizeof(int32_t);
884  case kSMALLINT:
885  *(int16_t*)buf = d.smallintval;
886  return buf + sizeof(int16_t);
887  case kTINYINT:
888  *(int8_t*)buf = d.tinyintval;
889  return buf + sizeof(int8_t);
890  case kFLOAT:
891  *(float*)buf = d.floatval;
892  return buf + sizeof(float);
893  case kDOUBLE:
894  *(double*)buf = d.doubleval;
895  return buf + sizeof(double);
896  case kTIME:
897  case kTIMESTAMP:
898  case kDATE:
899  *reinterpret_cast<int64_t*>(buf) = d.bigintval;
900  return buf + sizeof(int64_t);
901  default:
902  return nullptr;
903  }
904 }
int8_t tinyintval
Definition: sqltypes.h:135
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:260
void set_compression(EncodingType c)
Definition: sqltypes.h:359
void set_size(int s)
Definition: sqltypes.h:357
#define NULL_DOUBLE
Definition: sqltypes.h:186
HOST DEVICE int get_size() const
Definition: sqltypes.h:269
HOST DEVICE void operator=(const SQLTypeInfo &rhs)
Definition: sqltypes.h:476
std::string DatumToString(Datum d, const SQLTypeInfo &ti)
Definition: Datum.cpp:239
bool is_varlen_array() const
Definition: sqltypes.h:426
DEVICE VarlenDatum()
Definition: sqltypes.h:78
Definition: sqltypes.h:51
DEVICE constexpr bool is_cuda_compiler()
Definition: sqltypes.h:122
SQLTypes
Definition: sqltypes.h:40
std::vector< std::string > * stringsPtr
Definition: sqltypes.h:150
bool is_timestamp() const
Definition: sqltypes.h:654
std::vector< ArrayDatum > * arraysPtr
Definition: sqltypes.h:151
#define NULL_ARRAY_COMPRESSED_32
Definition: sqltypes.h:196
bool is_null
Definition: sqltypes.h:76
#define NULL_BIGINT
Definition: sqltypes.h:184
#define NULL_ARRAY_DOUBLE
Definition: sqltypes.h:194
SQLTypeInfo get_nullable_logical_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:841
HOST DEVICE bool operator==(const SQLTypeInfo &rhs) const
Definition: sqltypes.h:456
bool boolval
Definition: sqltypes.h:134
bool is_fp() const
Definition: sqltypes.h:421
HOST DEVICE int get_scale() const
Definition: sqltypes.h:264
bool is_varlen() const
Definition: sqltypes.h:432
constexpr int64_t inline_int_null_value()
Definition: sqltypes.h:847
std::string get_compression_name() const
Definition: sqltypes.h:394
std::string concat(Types &&...parms)
VarlenDatum * arrayval
Definition: sqltypes.h:141
HOST DEVICE void set_subtype(SQLTypes st)
Definition: sqltypes.h:350
SQLTypeInfo(SQLTypes t, int d, int s)
Definition: sqltypes.h:229
SQLTypeInfo get_logical_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:820
#define NULL_ARRAY_SMALLINT
Definition: sqltypes.h:190
#define NULL_ARRAY_TINYINT
Definition: sqltypes.h:189
Definition: sqltypes.h:67
HOST DEVICE bool is_null_fixlen_array(const int8_t *val, int array_size) const
Definition: sqltypes.h:578
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:259
bool is_number() const
Definition: sqltypes.h:422
int32_t intval
Definition: sqltypes.h:137
bool is_time() const
Definition: sqltypes.h:423
std::string to_string(char const *&&v)
HostArrayDatum(size_t const l, int8_t *p, bool const n, CUSTOM_DELETER custom_deleter)
Definition: sqltypes.h:112
int8_t * pointer
Definition: sqltypes.h:75
int32_t StringOffsetT
Definition: sqltypes.h:868
bool has_render_group() const
Definition: sqltypes.h:339
#define DEVICE
constexpr int64_t max_valid_int_value()
Definition: sqltypes.h:861
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:131
#define HOST
void set_input_srid(int d)
Definition: sqltypes.h:353
float floatval
Definition: sqltypes.h:139
std::string to_string() const
Definition: sqltypes.h:395
EncodingType
Definition: sqltypes.h:156
int get_physical_cols() const
Definition: sqltypes.h:280
bool is_fixlen_array() const
Definition: sqltypes.h:427
bool is_castable(const SQLTypeInfo &new_type_info) const
Definition: sqltypes.h:487
#define IS_INTERVAL(T)
Definition: sqltypes.h:176
void set_fixed_size()
Definition: sqltypes.h:358
std::shared_ptr< int8_t > ManagedPtr
Definition: sqltypes.h:93
HOST DEVICE bool operator!=(const SQLTypeInfo &rhs) const
Definition: sqltypes.h:448
int get_logical_size() const
Definition: sqltypes.h:270
bool DatumEqual(const Datum a, const Datum b, const SQLTypeInfo &ti)
Definition: Datum.cpp:189
static std::string type_name[kSQLTYPE_LAST]
Definition: sqltypes.h:666
bool is_integer() const
Definition: sqltypes.h:419
#define NULL_TINYINT
Definition: sqltypes.h:181
VarlenDatum(const size_t l, int8_t *p, const bool n)
Definition: sqltypes.h:81
SQLTypes subtype
Definition: sqltypes.h:658
void set_scale(int s)
Definition: sqltypes.h:354
bool notnull
Definition: sqltypes.h:661
bool has_bounds() const
Definition: sqltypes.h:328
int64_t bigintval
Definition: sqltypes.h:138
#define NULL_FLOAT
Definition: sqltypes.h:185
HostArrayDatum(size_t const l, int8_t *p, CUSTOM_DELETER custom_deleter)
Definition: sqltypes.h:106
bool is_timeinterval() const
Definition: sqltypes.h:428
constexpr int64_t inline_int_null_array_value()
Definition: sqltypes.h:853
ManagedPtr data_ptr
Definition: sqltypes.h:115
HostArrayDatum()=default
int16_t smallintval
Definition: sqltypes.h:136
SQLTypeInfo(SQLTypes t, int d, int s, bool n)
Definition: sqltypes.h:220
Datum StringToDatum(std::string_view s, SQLTypeInfo &ti)
Definition: Datum.cpp:124
#define NULL_ARRAY_INT
Definition: sqltypes.h:191
#define NULL_INT
Definition: sqltypes.h:183
bool is_boolean() const
Definition: sqltypes.h:424
HostArrayDatum(size_t const l, int8_t *p, bool const n)
Definition: sqltypes.h:100
bool g_enable_smem_group_by true
void operator()(int8_t *p)
Definition: sqltypes.h:89
SQLTypeInfo(SQLTypes t, int d, int s, bool n, EncodingType c, int p, SQLTypes st)
Definition: sqltypes.h:211
SQLTypeInfo(SQLTypes t)
Definition: sqltypes.h:239
SQLTypeInfo(SQLTypes t, bool n, EncodingType c)
Definition: sqltypes.h:240
SQLTypeInfo get_array_type() const
Definition: sqltypes.h:628
EncodingType compression
Definition: sqltypes.h:662
int get_precision() const
Definition: sqltypes.h:262
std::string * stringval
Definition: sqltypes.h:143
void set_output_srid(int s)
Definition: sqltypes.h:355
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:302
bool is_column() const
Definition: sqltypes.h:430
DEVICE DeviceArrayDatum()
Definition: sqltypes.h:119
HOST DEVICE bool is_null(const Datum &d) const
Definition: sqltypes.h:516
void set_comp_param(int p)
Definition: sqltypes.h:360
HOST DEVICE int get_storage_size() const
Definition: sqltypes.h:669
#define CHECK_LT(x, y)
Definition: Logger.h:207
Definition: sqltypes.h:54
Definition: sqltypes.h:55
static std::string comp_name[kENCODING_LAST]
Definition: sqltypes.h:667
int64_t const int32_t sz assert(dest)
int8_t * appendDatum(int8_t *buf, Datum d, const SQLTypeInfo &ti)
Definition: sqltypes.h:871
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:267
bool is_date_in_days() const
Definition: sqltypes.h:632
int get_array_context_logical_size() const
Definition: sqltypes.h:465
int64_t convert_decimal_value_to_scale(const int64_t decimal_value, const SQLTypeInfo &type_info, const SQLTypeInfo &new_type_info)
Definition: Datum.cpp:318
int32_t ArrayOffsetT
Definition: sqltypes.h:869
void set_dimension(int d)
Definition: sqltypes.h:351
#define NULL_ARRAY_BIGINT
Definition: sqltypes.h:192
HOST DEVICE int get_dimension() const
Definition: sqltypes.h:261
#define IS_INTEGER(T)
Definition: sqltypes.h:168
std::string get_type_name() const
Definition: sqltypes.h:362
Definition: sqltypes.h:43
#define IS_STRING(T)
Definition: sqltypes.h:173
HOST DEVICE int get_comp_param() const
Definition: sqltypes.h:268
HOST DEVICE int get_input_srid() const
Definition: sqltypes.h:263
virtual DEVICE ~VarlenDatum()
Definition: sqltypes.h:79
#define NULL_SMALLINT
Definition: sqltypes.h:182
bool g_enable_watchdog false
Definition: Execute.cpp:74
void set_notnull(bool n)
Definition: sqltypes.h:356
bool is_geometry() const
Definition: sqltypes.h:429
bool is_high_precision_timestamp() const
Definition: sqltypes.h:644
SQLTypes type
Definition: sqltypes.h:657
HostArrayDatum(size_t const l, ManagedPtr p, bool const n)
Definition: sqltypes.h:97
bool is_dict_encoded_string() const
Definition: sqltypes.h:444
Definition: sqltypes.h:47
bool is_varlen_indeed() const
Definition: sqltypes.h:438
bool is_string() const
Definition: sqltypes.h:417
SQLTypeInfo(SQLTypes t, bool n)
Definition: sqltypes.h:230
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:266
int8_t * numbersPtr
Definition: sqltypes.h:149
bool is_string_array() const
Definition: sqltypes.h:418
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:624
bool is_decimal() const
Definition: sqltypes.h:420
int get_physical_coord_cols() const
Definition: sqltypes.h:295
#define IS_NUMBER(T)
Definition: sqltypes.h:170
void operator()(int8_t *)
Definition: sqltypes.h:86
#define IS_GEO(T)
Definition: sqltypes.h:174
#define TRANSIENT_DICT(ID)
Definition: sqltypes.h:199
int comp_param
Definition: sqltypes.h:663
#define NULL_BOOLEAN
Definition: sqltypes.h:180
bool is_date() const
Definition: sqltypes.h:642
bool is_array() const
Definition: sqltypes.h:425
void set_precision(int d)
Definition: sqltypes.h:352
SQLTypeInfo get_nullable_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:835
int dimension
Definition: sqltypes.h:659
HOST DEVICE bool is_null_point_coord_array(const int8_t *val, int array_size) const
Definition: sqltypes.h:611
double doubleval
Definition: sqltypes.h:140
HOST DEVICE int get_output_srid() const
Definition: sqltypes.h:265
constexpr auto is_datetime(SQLTypes type)
Definition: sqltypes.h:202
HOST DEVICE bool is_null(const int8_t *val) const
Definition: sqltypes.h:553
size_t length
Definition: sqltypes.h:74
#define NULL_ARRAY_FLOAT
Definition: sqltypes.h:193
HOST DEVICE void set_type(SQLTypes t)
Definition: sqltypes.h:349