OmniSciDB  06b3bd477c
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
sqltypes.h
Go to the documentation of this file.
1 /*
2  * Copyright 2020 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
23 #pragma once
24 
25 #include "StringTransform.h"
26 #include "funcannotations.h"
27 
28 #include <cassert>
29 #include <cfloat>
30 #include <cstdint>
31 #include <ctime>
32 #include <limits>
33 #include <memory>
34 #include <string>
35 #include <type_traits>
36 #include <vector>
37 
38 // must not change because these values persist in catalogs.
39 enum SQLTypes {
40  kNULLT = 0, // type for null values
41  kBOOLEAN = 1,
42  kCHAR = 2,
43  kVARCHAR = 3,
44  kNUMERIC = 4,
45  kDECIMAL = 5,
46  kINT = 6,
47  kSMALLINT = 7,
48  kFLOAT = 8,
49  kDOUBLE = 9,
50  kTIME = 10,
51  kTIMESTAMP = 11,
52  kBIGINT = 12,
53  kTEXT = 13,
54  kDATE = 14,
55  kARRAY = 15,
58  kPOINT = 18,
60  kPOLYGON = 20,
62  kTINYINT = 22,
63  kGEOMETRY = 23,
64  kGEOGRAPHY = 24,
65  kEVAL_CONTEXT_TYPE = 25, // Placeholder Type for ANY
66  kVOID = 26,
67  kCURSOR = 27,
68  kCOLUMN = 28,
70 };
71 
72 struct VarlenDatum {
73  size_t length;
74  int8_t* pointer;
75  bool is_null;
76 
77  DEVICE VarlenDatum() : length(0), pointer(nullptr), is_null(true) {}
78  DEVICE virtual ~VarlenDatum() {}
79 
80  VarlenDatum(const size_t l, int8_t* p, const bool n)
81  : length(l), pointer(p), is_null(n) {}
82 };
83 
85  void operator()(int8_t*) {}
86 };
87 struct FreeDeleter {
88  void operator()(int8_t* p) { free(p); }
89 };
90 
91 struct HostArrayDatum : public VarlenDatum {
92  using ManagedPtr = std::shared_ptr<int8_t>;
93 
94  HostArrayDatum() = default;
95 
96  HostArrayDatum(size_t const l, ManagedPtr p, bool const n)
97  : VarlenDatum(l, p.get(), n), data_ptr(p) {}
98 
99  HostArrayDatum(size_t const l, int8_t* p, bool const n)
100  : VarlenDatum(l, p, n), data_ptr(p, FreeDeleter()){};
101 
102  template <typename CUSTOM_DELETER,
103  typename = std::enable_if_t<
104  std::is_void<std::result_of_t<CUSTOM_DELETER(int8_t*)> >::value> >
105  HostArrayDatum(size_t const l, int8_t* p, CUSTOM_DELETER custom_deleter)
106  : VarlenDatum(l, p, 0 == l), data_ptr(p, custom_deleter) {}
107 
108  template <typename CUSTOM_DELETER,
109  typename = std::enable_if_t<
110  std::is_void<std::result_of_t<CUSTOM_DELETER(int8_t*)> >::value> >
111  HostArrayDatum(size_t const l, int8_t* p, bool const n, CUSTOM_DELETER custom_deleter)
112  : VarlenDatum(l, p, n), data_ptr(p, custom_deleter) {}
113 
115 };
116 
117 struct DeviceArrayDatum : public VarlenDatum {
119 };
120 
121 inline DEVICE constexpr bool is_cuda_compiler() {
122 #ifdef __CUDACC__
123  return true;
124 #else
125  return false;
126 #endif
127 }
128 
129 using ArrayDatum =
130  std::conditional_t<is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum>;
131 
132 union Datum {
133  bool boolval;
134  int8_t tinyintval;
135  int16_t smallintval;
136  int32_t intval;
137  int64_t bigintval;
138  float floatval;
139  double doubleval;
141 #ifndef __CUDACC__
142  std::string* stringval; // string value
143 #endif
144 };
145 
146 #ifndef __CUDACC__
148  int8_t* numbersPtr;
149  std::vector<std::string>* stringsPtr;
150  std::vector<ArrayDatum>* arraysPtr;
151 };
152 #endif
153 
154 // must not change because these values persist in catalogs.
156  kENCODING_NONE = 0, // no encoding
157  kENCODING_FIXED = 1, // Fixed-bit encoding
158  kENCODING_RL = 2, // Run Length encoding
159  kENCODING_DIFF = 3, // Differential encoding
160  kENCODING_DICT = 4, // Dictionary encoding
161  kENCODING_SPARSE = 5, // Null encoding for sparse columns
162  kENCODING_GEOINT = 6, // Encoding coordinates as intergers
163  kENCODING_DATE_IN_DAYS = 7, // Date encoding in days
165 };
166 
167 #define IS_INTEGER(T) \
168  (((T) == kINT) || ((T) == kSMALLINT) || ((T) == kBIGINT) || ((T) == kTINYINT))
169 #define IS_NUMBER(T) \
170  (((T) == kINT) || ((T) == kSMALLINT) || ((T) == kDOUBLE) || ((T) == kFLOAT) || \
171  ((T) == kBIGINT) || ((T) == kNUMERIC) || ((T) == kDECIMAL) || ((T) == kTINYINT))
172 #define IS_STRING(T) (((T) == kTEXT) || ((T) == kVARCHAR) || ((T) == kCHAR))
173 #define IS_GEO(T) \
174  (((T) == kPOINT) || ((T) == kLINESTRING) || ((T) == kPOLYGON) || ((T) == kMULTIPOLYGON))
175 #define IS_INTERVAL(T) ((T) == kINTERVAL_DAY_TIME || (T) == kINTERVAL_YEAR_MONTH)
176 #define IS_DECIMAL(T) ((T) == kNUMERIC || (T) == kDECIMAL)
177 #define IS_GEO_POLY(T) (((T) == kPOLYGON) || ((T) == kMULTIPOLYGON))
178 
179 #define NULL_BOOLEAN INT8_MIN
180 #define NULL_TINYINT INT8_MIN
181 #define NULL_SMALLINT INT16_MIN
182 #define NULL_INT INT32_MIN
183 #define NULL_BIGINT INT64_MIN
184 #define NULL_FLOAT FLT_MIN
185 #define NULL_DOUBLE DBL_MIN
186 
187 #define NULL_ARRAY_BOOLEAN (INT8_MIN + 1)
188 #define NULL_ARRAY_TINYINT (INT8_MIN + 1)
189 #define NULL_ARRAY_SMALLINT (INT16_MIN + 1)
190 #define NULL_ARRAY_INT (INT32_MIN + 1)
191 #define NULL_ARRAY_BIGINT (INT64_MIN + 1)
192 #define NULL_ARRAY_FLOAT (FLT_MIN * 2.0)
193 #define NULL_ARRAY_DOUBLE (DBL_MIN * 2.0)
194 
195 #define NULL_ARRAY_COMPRESSED_32 0x80000000U
196 
197 #define TRANSIENT_DICT_ID 0
198 #define TRANSIENT_DICT(ID) (-(ID))
199 #define REGULAR_DICT(TRANSIENTID) (-(TRANSIENTID))
200 
201 constexpr auto is_datetime(SQLTypes type) {
202  return type == kTIME || type == kTIMESTAMP || type == kDATE;
203 }
204 
205 // @type SQLTypeInfo
206 // @brief a structure to capture all type information including
207 // length, precision, scale, etc.
208 class SQLTypeInfo {
209  public:
210  SQLTypeInfo(SQLTypes t, int d, int s, bool n, EncodingType c, int p, SQLTypes st)
211  : type(t)
212  , subtype(st)
213  , dimension(d)
214  , scale(s)
215  , notnull(n)
216  , compression(c)
217  , comp_param(p)
218  , size(get_storage_size()) {}
219  SQLTypeInfo(SQLTypes t, int d, int s, bool n)
220  : type(t)
221  , subtype(kNULLT)
222  , dimension(d)
223  , scale(s)
224  , notnull(n)
226  , comp_param(0)
227  , size(get_storage_size()) {}
228  SQLTypeInfo(SQLTypes t, int d, int s) : SQLTypeInfo(t, d, s, false) {}
230  : type(t)
231  , subtype(kNULLT)
232  , dimension(0)
233  , scale(0)
234  , notnull(n)
236  , comp_param(0)
237  , size(get_storage_size()) {}
240  : type(t)
241  , subtype(kNULLT)
242  , dimension(0)
243  , scale(0)
244  , notnull(n)
245  , compression(c)
246  , comp_param(0)
247  , size(get_storage_size()) {}
249  : type(kNULLT)
250  , subtype(kNULLT)
251  , dimension(0)
252  , scale(0)
253  , notnull(false)
255  , comp_param(0)
256  , size(0) {}
257 
258  HOST DEVICE inline SQLTypes get_type() const { return type; }
259  HOST DEVICE inline SQLTypes get_subtype() const { return subtype; }
260  HOST DEVICE inline int get_dimension() const { return dimension; }
261  inline int get_precision() const { return dimension; }
262  HOST DEVICE inline int get_input_srid() const { return dimension; }
263  HOST DEVICE inline int get_scale() const { return scale; }
264  HOST DEVICE inline int get_output_srid() const { return scale; }
265  HOST DEVICE inline bool get_notnull() const { return notnull; }
267  HOST DEVICE inline int get_comp_param() const { return comp_param; }
268  HOST DEVICE inline int get_size() const { return size; }
269  inline int get_logical_size() const {
272  return ti.get_size();
273  }
274  if (compression == kENCODING_DICT) {
275  return 4;
276  }
277  return get_size();
278  }
279  inline int get_physical_cols() const {
280  switch (type) {
281  case kPOINT:
282  return 1; // coords
283  case kLINESTRING:
284  return 2; // coords, bounds
285  case kPOLYGON:
286  return 4; // coords, ring_sizes, bounds, render_group
287  case kMULTIPOLYGON:
288  return 5; // coords, ring_sizes, poly_rings, bounds, render_group
289  default:
290  break;
291  }
292  return 0;
293  }
294  inline int get_physical_coord_cols() const {
295  // @TODO dmitri/simon rename this function?
296  // It needs to return the number of extra columns
297  // which need to go through the executor, as opposed
298  // to those which are only needed by CPU for poly
299  // cache building or what-not. For now, we just omit
300  // the Render Group column. If we add Bounding Box
301  // or something this may require rethinking. Perhaps
302  // these two functions need to return an array of
303  // offsets rather than just a number to loop over,
304  // so that executor and non-executor columns can
305  // be mixed.
306  // NOTE(adb): In binding to extension functions, we need to know some pretty specific
307  // type info about each of the physical coords cols for each geo type. I added checks
308  // there to ensure the physical coords col for the geo type match what we expect. If
309  // these values are ever changed, corresponding values in
310  // ExtensionFunctionsBinding.cpp::compute_narrowing_conv_scores and
311  // ExtensionFunctionsBinding.cpp::compute_widening_conv_scores will also need to be
312  // changed.
313  switch (type) {
314  case kPOINT:
315  return 1;
316  case kLINESTRING:
317  return 1; // omit bounds
318  case kPOLYGON:
319  return 2; // omit bounds, render group
320  case kMULTIPOLYGON:
321  return 3; // omit bounds, render group
322  default:
323  break;
324  }
325  return 0;
326  }
327  inline bool has_bounds() const {
328  switch (type) {
329  case kLINESTRING:
330  case kPOLYGON:
331  case kMULTIPOLYGON:
332  return true;
333  default:
334  break;
335  }
336  return false;
337  }
338  inline bool has_render_group() const {
339  switch (type) {
340  case kPOLYGON:
341  case kMULTIPOLYGON:
342  return true;
343  default:
344  break;
345  }
346  return false;
347  }
348  HOST DEVICE inline void set_type(SQLTypes t) { type = t; }
349  HOST DEVICE inline void set_subtype(SQLTypes st) { subtype = st; }
350  inline void set_dimension(int d) { dimension = d; }
351  inline void set_precision(int d) { dimension = d; }
352  inline void set_input_srid(int d) { dimension = d; }
353  inline void set_scale(int s) { scale = s; }
354  inline void set_output_srid(int s) { scale = s; }
355  inline void set_notnull(bool n) { notnull = n; }
356  inline void set_size(int s) { size = s; }
357  inline void set_fixed_size() { size = get_storage_size(); }
358  inline void set_compression(EncodingType c) { compression = c; }
359  inline void set_comp_param(int p) { comp_param = p; }
360 #ifndef __CUDACC__
361  inline std::string get_type_name() const {
362  if (IS_GEO(type)) {
363  std::string srid_string = "";
364  if (get_output_srid() > 0) {
365  srid_string = ", " + std::to_string(get_output_srid());
366  }
367  CHECK_LT(static_cast<int>(subtype), kSQLTYPE_LAST);
368  return type_name[static_cast<int>(subtype)] + "(" +
369  type_name[static_cast<int>(type)] + srid_string + ")";
370  }
371  std::string ps = "";
372  if (type == kDECIMAL || type == kNUMERIC || subtype == kDECIMAL ||
373  subtype == kNUMERIC) {
374  ps = "(" + std::to_string(dimension) + "," + std::to_string(scale) + ")";
375  } else if (type == kTIMESTAMP) {
376  ps = "(" + std::to_string(dimension) + ")";
377  }
378  if (type == kARRAY) {
379  auto elem_ti = get_elem_type();
380  auto num_elems = (size > 0) ? std::to_string(size / elem_ti.get_size()) : "";
381  CHECK_LT(static_cast<int>(subtype), kSQLTYPE_LAST);
382  return type_name[static_cast<int>(subtype)] + ps + "[" + num_elems + "]";
383  }
384  if (type == kCOLUMN) {
385  auto elem_ti = get_elem_type();
386  auto num_elems = (size > 0) ? std::to_string(size / elem_ti.get_size()) : "";
387  CHECK_LT(static_cast<int>(subtype), kSQLTYPE_LAST);
388  return "Column" + type_name[static_cast<int>(subtype)] + ps + "[" + num_elems + "]";
389  }
390  return type_name[static_cast<int>(type)] + ps;
391  }
392  inline std::string get_compression_name() const { return comp_name[(int)compression]; }
393  inline std::string to_string() const {
394  return concat("(",
395  type_name[static_cast<int>(type)],
396  ", ",
397  get_dimension(),
398  ", ",
399  get_scale(),
400  ", ",
401  get_notnull() ? "not nullable" : "nullable",
402  ", ",
404  ", ",
405  get_comp_param(),
406  ", ",
407  type_name[static_cast<int>(subtype)],
408  ": ",
409  get_size(),
410  ": ",
412  ")");
413  }
414 #endif
415  inline bool is_string() const { return IS_STRING(type); }
416  inline bool is_string_array() const { return (type == kARRAY) && IS_STRING(subtype); }
417  inline bool is_integer() const { return IS_INTEGER(type); }
418  inline bool is_decimal() const { return type == kDECIMAL || type == kNUMERIC; }
419  inline bool is_fp() const { return type == kFLOAT || type == kDOUBLE; }
420  inline bool is_number() const { return IS_NUMBER(type); }
421  inline bool is_time() const { return is_datetime(type); }
422  inline bool is_boolean() const { return type == kBOOLEAN; }
423  inline bool is_array() const { return type == kARRAY; }
424  inline bool is_varlen_array() const { return type == kARRAY && size <= 0; }
425  inline bool is_fixlen_array() const { return type == kARRAY && size > 0; }
426  inline bool is_timeinterval() const { return IS_INTERVAL(type); }
427  inline bool is_geometry() const { return IS_GEO(type); }
428  inline bool is_column() const { return type == kCOLUMN; }
429 
430  inline bool is_varlen() const { // TODO: logically this should ignore fixlen arrays
431  return (IS_STRING(type) && compression != kENCODING_DICT) || type == kARRAY ||
432  IS_GEO(type);
433  }
434 
435  // need this here till is_varlen can be fixed w/o negative impact to existing code
436  inline bool is_varlen_indeed() const {
437  // SQLTypeInfo.is_varlen() is broken with fixedlen array now
438  // and seems left broken for some concern, so fix it locally
439  return is_varlen() && !is_fixlen_array();
440  }
441 
442  inline bool is_dict_encoded_string() const {
443  return is_string() && compression == kENCODING_DICT;
444  }
445 
446  HOST DEVICE inline bool operator!=(const SQLTypeInfo& rhs) const {
447  return type != rhs.get_type() || subtype != rhs.get_subtype() ||
448  dimension != rhs.get_dimension() || scale != rhs.get_scale() ||
449  compression != rhs.get_compression() ||
452  notnull != rhs.get_notnull();
453  }
454  HOST DEVICE inline bool operator==(const SQLTypeInfo& rhs) const {
455  return type == rhs.get_type() && subtype == rhs.get_subtype() &&
456  dimension == rhs.get_dimension() && scale == rhs.get_scale() &&
457  compression == rhs.get_compression() &&
460  notnull == rhs.get_notnull();
461  }
462 
463  inline int get_array_context_logical_size() const {
464  if (is_string()) {
465  auto comp_type(get_compression());
466  if (comp_type == kENCODING_DICT || comp_type == kENCODING_FIXED ||
467  comp_type == kENCODING_NONE) {
468  return sizeof(int32_t);
469  }
470  }
471  return get_logical_size();
472  }
473 
474  HOST DEVICE inline void operator=(const SQLTypeInfo& rhs) {
475  type = rhs.get_type();
476  subtype = rhs.get_subtype();
477  dimension = rhs.get_dimension();
478  scale = rhs.get_scale();
479  notnull = rhs.get_notnull();
481  comp_param = rhs.get_comp_param();
482  size = rhs.get_size();
483  }
484 
485  inline bool is_castable(const SQLTypeInfo& new_type_info) const {
486  // can always cast between the same type but different precision/scale/encodings
487  if (type == new_type_info.get_type()) {
488  return true;
489  // can always cast from or to string
490  } else if (is_string() || new_type_info.is_string()) {
491  return true;
492  // can cast between numbers
493  } else if (is_number() && new_type_info.is_number()) {
494  return true;
495  // can cast from timestamp or date to number (epoch)
496  } else if ((type == kTIMESTAMP || type == kDATE) && new_type_info.is_number()) {
497  return true;
498  // can cast from date to timestamp
499  } else if (type == kDATE && new_type_info.get_type() == kTIMESTAMP) {
500  return true;
501  } else if (type == kTIMESTAMP && new_type_info.get_type() == kDATE) {
502  return true;
503  } else if (type == kBOOLEAN && new_type_info.is_number()) {
504  return true;
505  } else if (type == kARRAY && new_type_info.get_type() == kARRAY) {
506  return get_elem_type().is_castable(new_type_info.get_elem_type());
507  } else if (type == kCOLUMN && new_type_info.get_type() == kCOLUMN) {
508  return get_elem_type().is_castable(new_type_info.get_elem_type());
509  } else {
510  return false;
511  }
512  }
513 
514  HOST DEVICE inline bool is_null(const Datum& d) const {
515  // assuming Datum is always uncompressed
516  switch (type) {
517  case kBOOLEAN:
518  return (int8_t)d.boolval == NULL_BOOLEAN;
519  case kTINYINT:
520  return d.tinyintval == NULL_TINYINT;
521  case kSMALLINT:
522  return d.smallintval == NULL_SMALLINT;
523  case kINT:
524  return d.intval == NULL_INT;
525  case kBIGINT:
526  case kNUMERIC:
527  case kDECIMAL:
528  return d.bigintval == NULL_BIGINT;
529  case kFLOAT:
530  return d.floatval == NULL_FLOAT;
531  case kDOUBLE:
532  return d.doubleval == NULL_DOUBLE;
533  case kTIME:
534  case kTIMESTAMP:
535  case kDATE:
536  return d.bigintval == NULL_BIGINT;
537  case kTEXT:
538  case kVARCHAR:
539  case kCHAR:
540  // @TODO handle null strings
541  break;
542  case kNULLT:
543  return true;
544  case kARRAY:
545  return d.arrayval == NULL || d.arrayval->is_null;
546  default:
547  break;
548  }
549  return false;
550  }
551  HOST DEVICE inline bool is_null(const int8_t* val) const {
552  if (type == kFLOAT) {
553  return *(float*)val == NULL_FLOAT;
554  }
555  if (type == kDOUBLE) {
556  return *(double*)val == NULL_DOUBLE;
557  }
558  // val can be either compressed or uncompressed
559  switch (size) {
560  case 1:
561  return *val == NULL_TINYINT;
562  case 2:
563  return *(int16_t*)val == NULL_SMALLINT;
564  case 4:
565  return *(int32_t*)val == NULL_INT;
566  case 8:
567  return *(int64_t*)val == NULL_BIGINT;
568  case kNULLT:
569  return true;
570  default:
571  // @TODO(wei) handle null strings
572  break;
573  }
574  return false;
575  }
576  HOST DEVICE inline bool is_null_fixlen_array(const int8_t* val, int array_size) const {
577  // Check if fixed length array has a NULL_ARRAY sentinel as the first element
578  if (type == kARRAY && val && array_size > 0 && array_size == size) {
579  // Need to create element type to get the size, but can't call get_elem_type()
580  // since this is a HOST DEVICE function. Going through copy constructor instead.
581  auto elem_ti{*this};
582  elem_ti.set_type(subtype);
583  elem_ti.set_subtype(kNULLT);
584  auto elem_size = elem_ti.get_storage_size();
585  if (elem_size < 1) {
586  return false;
587  }
588  if (subtype == kFLOAT) {
589  return *(float*)val == NULL_ARRAY_FLOAT;
590  }
591  if (subtype == kDOUBLE) {
592  return *(double*)val == NULL_ARRAY_DOUBLE;
593  }
594  switch (elem_size) {
595  case 1:
596  return *val == NULL_ARRAY_TINYINT;
597  case 2:
598  return *(int16_t*)val == NULL_ARRAY_SMALLINT;
599  case 4:
600  return *(int32_t*)val == NULL_ARRAY_INT;
601  case 8:
602  return *(int64_t*)val == NULL_ARRAY_BIGINT;
603  default:
604  return false;
605  }
606  }
607  return false;
608  }
609  HOST DEVICE inline bool is_null_point_coord_array(const int8_t* val,
610  int array_size) const {
611  if (type == kARRAY && subtype == kTINYINT && val && array_size > 0 &&
612  array_size == size) {
613  if (array_size == 2 * sizeof(double)) {
614  return *(double*)val == NULL_ARRAY_DOUBLE;
615  }
616  if (array_size == 2 * sizeof(int32_t)) {
617  return *(uint32_t*)val == NULL_ARRAY_COMPRESSED_32;
618  }
619  }
620  return false;
621  }
622  inline SQLTypeInfo get_elem_type() const {
623  return SQLTypeInfo(
625  }
626  inline SQLTypeInfo get_array_type() const {
628  }
629 
630  inline bool is_date_in_days() const {
631  if (type == kDATE) {
632  const auto comp_type = get_compression();
633  if (comp_type == kENCODING_DATE_IN_DAYS) {
634  return true;
635  }
636  }
637  return false;
638  }
639 
640  inline bool is_date() const { return type == kDATE; }
641 
642  inline bool is_high_precision_timestamp() const {
643  if (type == kTIMESTAMP) {
644  const auto dimension = get_dimension();
645  if (dimension > 0) {
646  return true;
647  }
648  }
649  return false;
650  }
651 
652  inline bool is_timestamp() const { return type == kTIMESTAMP; }
653 
654  private:
655  SQLTypes type; // type id
656  SQLTypes subtype; // element type of arrays
657  int dimension; // VARCHAR/CHAR length or NUMERIC/DECIMAL precision
658  int scale; // NUMERIC/DECIMAL scale
659  bool notnull; // nullable? a hint, not used for type checking
660  EncodingType compression; // compression scheme
661  int comp_param; // compression parameter when applicable for certain schemes
662  int size; // size of the type in bytes. -1 for variable size
663 #ifndef __CUDACC__
664  static std::string type_name[kSQLTYPE_LAST];
665  static std::string comp_name[kENCODING_LAST];
666 #endif
667  HOST DEVICE inline int get_storage_size() const {
668  switch (type) {
669  case kBOOLEAN:
670  return sizeof(int8_t);
671  case kTINYINT:
672  return sizeof(int8_t);
673  case kSMALLINT:
674  switch (compression) {
675  case kENCODING_NONE:
676  return sizeof(int16_t);
677  case kENCODING_FIXED:
678  case kENCODING_SPARSE:
679  return comp_param / 8;
680  case kENCODING_RL:
681  case kENCODING_DIFF:
682  break;
683  default:
684  assert(false);
685  }
686  break;
687  case kINT:
688  switch (compression) {
689  case kENCODING_NONE:
690  return sizeof(int32_t);
691  case kENCODING_FIXED:
692  case kENCODING_SPARSE:
693  return comp_param / 8;
694  case kENCODING_RL:
695  case kENCODING_DIFF:
696  break;
697  default:
698  assert(false);
699  }
700  break;
701  case kBIGINT:
702  case kNUMERIC:
703  case kDECIMAL:
704  switch (compression) {
705  case kENCODING_NONE:
706  return sizeof(int64_t);
707  case kENCODING_FIXED:
708  case kENCODING_SPARSE:
709  return comp_param / 8;
710  case kENCODING_RL:
711  case kENCODING_DIFF:
712  break;
713  default:
714  assert(false);
715  }
716  break;
717  case kFLOAT:
718  switch (compression) {
719  case kENCODING_NONE:
720  return sizeof(float);
721  case kENCODING_FIXED:
722  case kENCODING_RL:
723  case kENCODING_DIFF:
724  case kENCODING_SPARSE:
725  assert(false);
726  break;
727  default:
728  assert(false);
729  }
730  break;
731  case kDOUBLE:
732  switch (compression) {
733  case kENCODING_NONE:
734  return sizeof(double);
735  case kENCODING_FIXED:
736  case kENCODING_RL:
737  case kENCODING_DIFF:
738  case kENCODING_SPARSE:
739  assert(false);
740  break;
741  default:
742  assert(false);
743  }
744  break;
745  case kTIMESTAMP:
746  case kTIME:
747  case kINTERVAL_DAY_TIME:
749  case kDATE:
750  switch (compression) {
751  case kENCODING_NONE:
752  return sizeof(int64_t);
753  case kENCODING_FIXED:
754  if (type == kTIMESTAMP && dimension > 0) {
755  assert(false); // disable compression for timestamp precisions
756  }
757  return comp_param / 8;
758  case kENCODING_RL:
759  case kENCODING_DIFF:
760  case kENCODING_SPARSE:
761  assert(false);
762  break;
764  switch (comp_param) {
765  case 0:
766  return 4; // Default date encoded in days is 32 bits
767  case 16:
768  case 32:
769  return comp_param / 8;
770  default:
771  assert(false);
772  break;
773  }
774  default:
775  assert(false);
776  }
777  break;
778  case kTEXT:
779  case kVARCHAR:
780  case kCHAR:
781  if (compression == kENCODING_DICT) {
782  return sizeof(int32_t); // @TODO(wei) must check DictDescriptor
783  }
784  break;
785  case kARRAY:
786  // TODO: return size for fixlen arrays?
787  break;
788  case kPOINT:
789  case kLINESTRING:
790  case kPOLYGON:
791  case kMULTIPOLYGON:
792  case kCOLUMN:
793  break;
794  default:
795  break;
796  }
797  return -1;
798  }
799 };
800 
802 
803 #ifndef __CUDACC__
804 #include <string_view>
805 
806 Datum StringToDatum(std::string_view s, SQLTypeInfo& ti);
807 std::string DatumToString(Datum d, const SQLTypeInfo& ti);
808 bool DatumEqual(const Datum, const Datum, const SQLTypeInfo& ti);
809 int64_t convert_decimal_value_to_scale(const int64_t decimal_value,
810  const SQLTypeInfo& type_info,
811  const SQLTypeInfo& new_type_info);
812 #endif
813 
814 #include "../QueryEngine/DateAdd.h"
815 #include "../QueryEngine/DateTruncate.h"
816 #include "../QueryEngine/ExtractFromTime.h"
817 
819  EncodingType encoding = type_info.get_compression();
820  if (encoding == kENCODING_DATE_IN_DAYS ||
821  (encoding == kENCODING_FIXED && type_info.get_type() != kARRAY)) {
822  encoding = kENCODING_NONE;
823  }
824  return SQLTypeInfo(type_info.get_type(),
825  type_info.get_dimension(),
826  type_info.get_scale(),
827  type_info.get_notnull(),
828  encoding,
829  type_info.get_comp_param(),
830  type_info.get_subtype());
831 }
832 
834  SQLTypeInfo nullable_type_info = type_info;
835  nullable_type_info.set_notnull(false);
836  return nullable_type_info;
837 }
838 
840  SQLTypeInfo nullable_type_info = get_logical_type_info(type_info);
841  return get_nullable_type_info(nullable_type_info);
842 }
843 
844 template <class T>
845 constexpr inline int64_t inline_int_null_value() {
846  return std::is_signed<T>::value ? std::numeric_limits<T>::min()
847  : std::numeric_limits<T>::max();
848 }
849 
850 template <class T>
851 constexpr inline int64_t inline_int_null_array_value() {
852  return std::is_signed<T>::value ? std::numeric_limits<T>::min() + 1
853  : std::numeric_limits<T>::max() - 1;
854  // TODO: null_array values in signed types would step on max valid value
855  // in fixlen unsigned arrays, the max valid value may need to be lowered.
856 }
857 
858 template <class T>
859 constexpr inline int64_t max_valid_int_value() {
860  return std::is_signed<T>::value ? std::numeric_limits<T>::max()
861  : std::numeric_limits<T>::max() - 1;
862 }
863 
864 #include "InlineNullValues.h"
865 
866 using StringOffsetT = int32_t;
867 using ArrayOffsetT = int32_t;
868 
869 inline int8_t* appendDatum(int8_t* buf, Datum d, const SQLTypeInfo& ti) {
870  switch (ti.get_type()) {
871  case kBOOLEAN:
872  *(bool*)buf = d.boolval;
873  return buf + sizeof(bool);
874  case kNUMERIC:
875  case kDECIMAL:
876  case kBIGINT:
877  *(int64_t*)buf = d.bigintval;
878  return buf + sizeof(int64_t);
879  case kINT:
880  *(int32_t*)buf = d.intval;
881  return buf + sizeof(int32_t);
882  case kSMALLINT:
883  *(int16_t*)buf = d.smallintval;
884  return buf + sizeof(int16_t);
885  case kTINYINT:
886  *(int8_t*)buf = d.tinyintval;
887  return buf + sizeof(int8_t);
888  case kFLOAT:
889  *(float*)buf = d.floatval;
890  return buf + sizeof(float);
891  case kDOUBLE:
892  *(double*)buf = d.doubleval;
893  return buf + sizeof(double);
894  case kTIME:
895  case kTIMESTAMP:
896  case kDATE:
897  *reinterpret_cast<int64_t*>(buf) = d.bigintval;
898  return buf + sizeof(int64_t);
899  default:
900  return nullptr;
901  }
902 }
int8_t tinyintval
Definition: sqltypes.h:134
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:259
void set_compression(EncodingType c)
Definition: sqltypes.h:358
void set_size(int s)
Definition: sqltypes.h:356
#define NULL_DOUBLE
Definition: sqltypes.h:185
HOST DEVICE int get_size() const
Definition: sqltypes.h:268
HOST DEVICE void operator=(const SQLTypeInfo &rhs)
Definition: sqltypes.h:474
std::string DatumToString(Datum d, const SQLTypeInfo &ti)
Definition: Datum.cpp:230
bool is_varlen_array() const
Definition: sqltypes.h:424
DEVICE VarlenDatum()
Definition: sqltypes.h:77
Definition: sqltypes.h:50
DEVICE constexpr bool is_cuda_compiler()
Definition: sqltypes.h:121
SQLTypes
Definition: sqltypes.h:39
std::vector< std::string > * stringsPtr
Definition: sqltypes.h:149
bool is_timestamp() const
Definition: sqltypes.h:652
std::vector< ArrayDatum > * arraysPtr
Definition: sqltypes.h:150
EncodingType
Definition: encodetypes.h:22
#define NULL_ARRAY_COMPRESSED_32
Definition: sqltypes.h:195
bool is_null
Definition: sqltypes.h:75
#define NULL_BIGINT
Definition: sqltypes.h:183
#define NULL_ARRAY_DOUBLE
Definition: sqltypes.h:193
SQLTypeInfo get_nullable_logical_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:839
HOST DEVICE bool operator==(const SQLTypeInfo &rhs) const
Definition: sqltypes.h:454
bool boolval
Definition: sqltypes.h:133
bool is_fp() const
Definition: sqltypes.h:419
HOST DEVICE int get_scale() const
Definition: sqltypes.h:263
bool is_varlen() const
Definition: sqltypes.h:430
constexpr int64_t inline_int_null_value()
Definition: sqltypes.h:845
std::string get_compression_name() const
Definition: sqltypes.h:392
std::string concat(Types &&...parms)
VarlenDatum * arrayval
Definition: sqltypes.h:140
HOST DEVICE void set_subtype(SQLTypes st)
Definition: sqltypes.h:349
SQLTypeInfo(SQLTypes t, int d, int s)
Definition: sqltypes.h:228
SQLTypeInfo get_logical_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:818
#define NULL_ARRAY_SMALLINT
Definition: sqltypes.h:189
#define NULL_ARRAY_TINYINT
Definition: sqltypes.h:188
Definition: sqltypes.h:66
HOST DEVICE bool is_null_fixlen_array(const int8_t *val, int array_size) const
Definition: sqltypes.h:576
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:258
bool is_number() const
Definition: sqltypes.h:420
int32_t intval
Definition: sqltypes.h:136
bool is_time() const
Definition: sqltypes.h:421
std::string to_string(char const *&&v)
HostArrayDatum(size_t const l, int8_t *p, bool const n, CUSTOM_DELETER custom_deleter)
Definition: sqltypes.h:111
int8_t * pointer
Definition: sqltypes.h:74
int32_t StringOffsetT
Definition: sqltypes.h:866
bool has_render_group() const
Definition: sqltypes.h:338
#define DEVICE
constexpr int64_t max_valid_int_value()
Definition: sqltypes.h:859
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:130
#define HOST
void set_input_srid(int d)
Definition: sqltypes.h:352
float floatval
Definition: sqltypes.h:138
std::string to_string() const
Definition: sqltypes.h:393
int get_physical_cols() const
Definition: sqltypes.h:279
bool is_fixlen_array() const
Definition: sqltypes.h:425
bool is_castable(const SQLTypeInfo &new_type_info) const
Definition: sqltypes.h:485
#define IS_INTERVAL(T)
Definition: sqltypes.h:175
void set_fixed_size()
Definition: sqltypes.h:357
std::shared_ptr< int8_t > ManagedPtr
Definition: sqltypes.h:92
HOST DEVICE bool operator!=(const SQLTypeInfo &rhs) const
Definition: sqltypes.h:446
int get_logical_size() const
Definition: sqltypes.h:269
bool DatumEqual(const Datum a, const Datum b, const SQLTypeInfo &ti)
Definition: Datum.cpp:190
static std::string type_name[kSQLTYPE_LAST]
Definition: sqltypes.h:664
bool is_integer() const
Definition: sqltypes.h:417
#define NULL_TINYINT
Definition: sqltypes.h:180
VarlenDatum(const size_t l, int8_t *p, const bool n)
Definition: sqltypes.h:80
SQLTypes subtype
Definition: sqltypes.h:656
void set_scale(int s)
Definition: sqltypes.h:353
bool notnull
Definition: sqltypes.h:659
bool has_bounds() const
Definition: sqltypes.h:327
int64_t bigintval
Definition: sqltypes.h:137
#define NULL_FLOAT
Definition: sqltypes.h:184
HostArrayDatum(size_t const l, int8_t *p, CUSTOM_DELETER custom_deleter)
Definition: sqltypes.h:105
bool is_timeinterval() const
Definition: sqltypes.h:426
constexpr int64_t inline_int_null_array_value()
Definition: sqltypes.h:851
ManagedPtr data_ptr
Definition: sqltypes.h:114
HostArrayDatum()=default
int16_t smallintval
Definition: sqltypes.h:135
SQLTypeInfo(SQLTypes t, int d, int s, bool n)
Definition: sqltypes.h:219
Datum StringToDatum(std::string_view s, SQLTypeInfo &ti)
Definition: Datum.cpp:124
#define NULL_ARRAY_INT
Definition: sqltypes.h:190
#define NULL_INT
Definition: sqltypes.h:182
bool is_boolean() const
Definition: sqltypes.h:422
HostArrayDatum(size_t const l, int8_t *p, bool const n)
Definition: sqltypes.h:99
bool g_enable_smem_group_by true
void operator()(int8_t *p)
Definition: sqltypes.h:88
SQLTypeInfo(SQLTypes t, int d, int s, bool n, EncodingType c, int p, SQLTypes st)
Definition: sqltypes.h:210
SQLTypeInfo(SQLTypes t)
Definition: sqltypes.h:238
SQLTypeInfo(SQLTypes t, bool n, EncodingType c)
Definition: sqltypes.h:239
SQLTypeInfo get_array_type() const
Definition: sqltypes.h:626
EncodingType compression
Definition: sqltypes.h:660
int get_precision() const
Definition: sqltypes.h:261
std::string * stringval
Definition: sqltypes.h:142
void set_output_srid(int s)
Definition: sqltypes.h:354
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:311
bool is_column() const
Definition: sqltypes.h:428
DEVICE DeviceArrayDatum()
Definition: sqltypes.h:118
HOST DEVICE bool is_null(const Datum &d) const
Definition: sqltypes.h:514
void set_comp_param(int p)
Definition: sqltypes.h:359
HOST DEVICE int get_storage_size() const
Definition: sqltypes.h:667
#define CHECK_LT(x, y)
Definition: Logger.h:207
Definition: sqltypes.h:53
Definition: sqltypes.h:54
static std::string comp_name[kENCODING_LAST]
Definition: sqltypes.h:665
int64_t const int32_t sz assert(dest)
int8_t * appendDatum(int8_t *buf, Datum d, const SQLTypeInfo &ti)
Definition: sqltypes.h:869
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:266
bool is_date_in_days() const
Definition: sqltypes.h:630
int get_array_context_logical_size() const
Definition: sqltypes.h:463
int64_t convert_decimal_value_to_scale(const int64_t decimal_value, const SQLTypeInfo &type_info, const SQLTypeInfo &new_type_info)
Definition: Datum.cpp:327
int32_t ArrayOffsetT
Definition: sqltypes.h:867
void set_dimension(int d)
Definition: sqltypes.h:350
#define NULL_ARRAY_BIGINT
Definition: sqltypes.h:191
HOST DEVICE int get_dimension() const
Definition: sqltypes.h:260
#define IS_INTEGER(T)
Definition: sqltypes.h:167
std::string get_type_name() const
Definition: sqltypes.h:361
Definition: sqltypes.h:42
#define IS_STRING(T)
Definition: sqltypes.h:172
HOST DEVICE int get_comp_param() const
Definition: sqltypes.h:267
HOST DEVICE int get_input_srid() const
Definition: sqltypes.h:262
virtual DEVICE ~VarlenDatum()
Definition: sqltypes.h:78
#define NULL_SMALLINT
Definition: sqltypes.h:181
bool g_enable_watchdog false
Definition: Execute.cpp:74
void set_notnull(bool n)
Definition: sqltypes.h:355
bool is_geometry() const
Definition: sqltypes.h:427
bool is_high_precision_timestamp() const
Definition: sqltypes.h:642
SQLTypes type
Definition: sqltypes.h:655
HostArrayDatum(size_t const l, ManagedPtr p, bool const n)
Definition: sqltypes.h:96
bool is_dict_encoded_string() const
Definition: sqltypes.h:442
Definition: sqltypes.h:46
bool is_varlen_indeed() const
Definition: sqltypes.h:436
bool is_string() const
Definition: sqltypes.h:415
SQLTypeInfo(SQLTypes t, bool n)
Definition: sqltypes.h:229
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:265
int8_t * numbersPtr
Definition: sqltypes.h:148
bool is_string_array() const
Definition: sqltypes.h:416
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:622
bool is_decimal() const
Definition: sqltypes.h:418
int get_physical_coord_cols() const
Definition: sqltypes.h:294
#define IS_NUMBER(T)
Definition: sqltypes.h:169
void operator()(int8_t *)
Definition: sqltypes.h:85
#define IS_GEO(T)
Definition: sqltypes.h:173
#define TRANSIENT_DICT(ID)
Definition: sqltypes.h:198
int comp_param
Definition: sqltypes.h:661
#define NULL_BOOLEAN
Definition: sqltypes.h:179
bool is_date() const
Definition: sqltypes.h:640
bool is_array() const
Definition: sqltypes.h:423
void set_precision(int d)
Definition: sqltypes.h:351
SQLTypeInfo get_nullable_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:833
int dimension
Definition: sqltypes.h:657
HOST DEVICE bool is_null_point_coord_array(const int8_t *val, int array_size) const
Definition: sqltypes.h:609
double doubleval
Definition: sqltypes.h:139
HOST DEVICE int get_output_srid() const
Definition: sqltypes.h:264
constexpr auto is_datetime(SQLTypes type)
Definition: sqltypes.h:201
HOST DEVICE bool is_null(const int8_t *val) const
Definition: sqltypes.h:551
size_t length
Definition: sqltypes.h:73
#define NULL_ARRAY_FLOAT
Definition: sqltypes.h:192
HOST DEVICE void set_type(SQLTypes t)
Definition: sqltypes.h:348