OmniSciDB  a667adc9c8
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
sqltypes.h
Go to the documentation of this file.
1 /*
2  * Copyright 2020 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
23 #pragma once
24 
25 #include "../Logger/Logger.h"
26 #include "StringTransform.h"
27 #include "funcannotations.h"
28 
29 #include <cassert>
30 #include <ctime>
31 #include <memory>
32 #include <string>
33 #include <type_traits>
34 #include <vector>
35 
36 // must not change because these values persist in catalogs.
37 enum SQLTypes {
38  kNULLT = 0, // type for null values
39  kBOOLEAN = 1,
40  kCHAR = 2,
41  kVARCHAR = 3,
42  kNUMERIC = 4,
43  kDECIMAL = 5,
44  kINT = 6,
45  kSMALLINT = 7,
46  kFLOAT = 8,
47  kDOUBLE = 9,
48  kTIME = 10,
49  kTIMESTAMP = 11,
50  kBIGINT = 12,
51  kTEXT = 13,
52  kDATE = 14,
53  kARRAY = 15,
56  kPOINT = 18,
58  kPOLYGON = 20,
60  kTINYINT = 22,
61  kGEOMETRY = 23,
62  kGEOGRAPHY = 24,
63  kEVAL_CONTEXT_TYPE = 25, // Placeholder Type for ANY
64  kVOID = 26,
65  kCURSOR = 27,
66  kCOLUMN = 28,
69 };
70 
71 #ifndef __CUDACC__
72 
73 inline std::string toString(const SQLTypes& type) {
74  switch (type) {
75  case kNULLT:
76  return "NULL";
77  case kBOOLEAN:
78  return "BOOL";
79  case kCHAR:
80  return "CHAR";
81  case kVARCHAR:
82  return "VARCHAR";
83  case kNUMERIC:
84  return "NUMERIC";
85  case kDECIMAL:
86  return "DECIMAL";
87  case kINT:
88  return "INT";
89  case kSMALLINT:
90  return "SMALLINT";
91  case kFLOAT:
92  return "FLOAT";
93  case kDOUBLE:
94  return "DOUBLE";
95  case kTIME:
96  return "TIME";
97  case kTIMESTAMP:
98  return "TIMESTAMP";
99  case kBIGINT:
100  return "BIGINT";
101  case kTEXT:
102  return "TEXT";
103  case kDATE:
104  return "DATE";
105  case kARRAY:
106  return "ARRAY";
107  case kINTERVAL_DAY_TIME:
108  return "DAY TIME INTERVAL";
110  return "YEAR MONTH INTERVAL";
111  case kPOINT:
112  return "POINT";
113  case kLINESTRING:
114  return "LINESTRING";
115  case kPOLYGON:
116  return "POLYGON";
117  case kMULTIPOLYGON:
118  return "MULTIPOLYGON";
119  case kTINYINT:
120  return "TINYINT";
121  case kGEOMETRY:
122  return "GEOMETRY";
123  case kGEOGRAPHY:
124  return "GEOGRAPHY";
125  case kEVAL_CONTEXT_TYPE:
126  return "UNEVALUATED ANY";
127  case kVOID:
128  return "VOID";
129  case kCURSOR:
130  return "CURSOR";
131  case kCOLUMN:
132  return "COLUMN";
133  case kCOLUMN_LIST:
134  return "COLUMN_LIST";
135  case kSQLTYPE_LAST:
136  break;
137  }
138  LOG(FATAL) << "Invalid SQL type: " << type;
139  return "";
140 }
141 
142 #endif
143 
144 struct VarlenDatum {
145  size_t length;
146  int8_t* pointer;
147  bool is_null;
148 
149  DEVICE VarlenDatum() : length(0), pointer(nullptr), is_null(true) {}
150  DEVICE virtual ~VarlenDatum() {}
151 
152  VarlenDatum(const size_t l, int8_t* p, const bool n)
153  : length(l), pointer(p), is_null(n) {}
154 };
155 
157  void operator()(int8_t*) {}
158 };
159 struct FreeDeleter {
160  void operator()(int8_t* p) { free(p); }
161 };
162 
163 struct HostArrayDatum : public VarlenDatum {
164  using ManagedPtr = std::shared_ptr<int8_t>;
165 
166  HostArrayDatum() = default;
167 
168  HostArrayDatum(size_t const l, ManagedPtr p, bool const n)
169  : VarlenDatum(l, p.get(), n), data_ptr(p) {}
170 
171  HostArrayDatum(size_t const l, int8_t* p, bool const n)
172  : VarlenDatum(l, p, n), data_ptr(p, FreeDeleter()){};
173 
174  template <typename CUSTOM_DELETER,
175  typename = std::enable_if_t<
176  std::is_void<std::result_of_t<CUSTOM_DELETER(int8_t*)> >::value> >
177  HostArrayDatum(size_t const l, int8_t* p, CUSTOM_DELETER custom_deleter)
178  : VarlenDatum(l, p, 0 == l), data_ptr(p, custom_deleter) {}
179 
180  template <typename CUSTOM_DELETER,
181  typename = std::enable_if_t<
182  std::is_void<std::result_of_t<CUSTOM_DELETER(int8_t*)> >::value> >
183  HostArrayDatum(size_t const l, int8_t* p, bool const n, CUSTOM_DELETER custom_deleter)
184  : VarlenDatum(l, p, n), data_ptr(p, custom_deleter) {}
185 
187 };
188 
189 struct DeviceArrayDatum : public VarlenDatum {
191 };
192 
193 inline DEVICE constexpr bool is_cuda_compiler() {
194 #ifdef __CUDACC__
195  return true;
196 #else
197  return false;
198 #endif
199 }
200 
201 using ArrayDatum =
202  std::conditional_t<is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum>;
203 
204 union Datum {
205  bool boolval;
206  int8_t tinyintval;
207  int16_t smallintval;
208  int32_t intval;
209  int64_t bigintval;
210  float floatval;
211  double doubleval;
213 #ifndef __CUDACC__
214  std::string* stringval; // string value
215 #endif
216 };
217 
218 #ifndef __CUDACC__
220  int8_t* numbersPtr;
221  std::vector<std::string>* stringsPtr;
222  std::vector<ArrayDatum>* arraysPtr;
223 };
224 #endif
225 
226 // must not change because these values persist in catalogs.
228  kENCODING_NONE = 0, // no encoding
229  kENCODING_FIXED = 1, // Fixed-bit encoding
230  kENCODING_RL = 2, // Run Length encoding
231  kENCODING_DIFF = 3, // Differential encoding
232  kENCODING_DICT = 4, // Dictionary encoding
233  kENCODING_SPARSE = 5, // Null encoding for sparse columns
234  kENCODING_GEOINT = 6, // Encoding coordinates as intergers
235  kENCODING_DATE_IN_DAYS = 7, // Date encoding in days
237 };
238 
239 #define IS_INTEGER(T) \
240  (((T) == kINT) || ((T) == kSMALLINT) || ((T) == kBIGINT) || ((T) == kTINYINT))
241 #define IS_NUMBER(T) \
242  (((T) == kINT) || ((T) == kSMALLINT) || ((T) == kDOUBLE) || ((T) == kFLOAT) || \
243  ((T) == kBIGINT) || ((T) == kNUMERIC) || ((T) == kDECIMAL) || ((T) == kTINYINT))
244 #define IS_STRING(T) (((T) == kTEXT) || ((T) == kVARCHAR) || ((T) == kCHAR))
245 #define IS_GEO(T) \
246  (((T) == kPOINT) || ((T) == kLINESTRING) || ((T) == kPOLYGON) || ((T) == kMULTIPOLYGON))
247 #define IS_INTERVAL(T) ((T) == kINTERVAL_DAY_TIME || (T) == kINTERVAL_YEAR_MONTH)
248 #define IS_DECIMAL(T) ((T) == kNUMERIC || (T) == kDECIMAL)
249 #define IS_GEO_POLY(T) (((T) == kPOLYGON) || ((T) == kMULTIPOLYGON))
250 
251 #include "InlineNullValues.h"
252 
253 #define TRANSIENT_DICT_ID 0
254 #define TRANSIENT_DICT(ID) (-(ID))
255 #define REGULAR_DICT(TRANSIENTID) (-(TRANSIENTID))
256 
257 constexpr auto is_datetime(SQLTypes type) {
258  return type == kTIME || type == kTIMESTAMP || type == kDATE;
259 }
260 
261 // @type SQLTypeInfo
262 // @brief a structure to capture all type information including
263 // length, precision, scale, etc.
264 class SQLTypeInfo {
265  public:
266  SQLTypeInfo(SQLTypes t, int d, int s, bool n, EncodingType c, int p, SQLTypes st)
267  : type(t)
268  , subtype(st)
269  , dimension(d)
270  , scale(s)
271  , notnull(n)
272  , compression(c)
273  , comp_param(p)
274  , size(get_storage_size()) {}
275  SQLTypeInfo(SQLTypes t, int d, int s, bool n)
276  : type(t)
277  , subtype(kNULLT)
278  , dimension(d)
279  , scale(s)
280  , notnull(n)
282  , comp_param(0)
283  , size(get_storage_size()) {}
284  SQLTypeInfo(SQLTypes t, int d, int s) : SQLTypeInfo(t, d, s, false) {}
286  : type(t)
287  , subtype(kNULLT)
288  , dimension(0)
289  , scale(0)
290  , notnull(n)
292  , comp_param(0)
293  , size(get_storage_size()) {}
296  : type(t)
297  , subtype(kNULLT)
298  , dimension(0)
299  , scale(0)
300  , notnull(n)
301  , compression(c)
302  , comp_param(0)
303  , size(get_storage_size()) {}
305  : type(kNULLT)
306  , subtype(kNULLT)
307  , dimension(0)
308  , scale(0)
309  , notnull(false)
311  , comp_param(0)
312  , size(0) {}
313 
314  HOST DEVICE inline SQLTypes get_type() const { return type; }
315  HOST DEVICE inline SQLTypes get_subtype() const { return subtype; }
316  HOST DEVICE inline int get_dimension() const { return dimension; }
317  inline int get_precision() const { return dimension; }
318  HOST DEVICE inline int get_input_srid() const { return dimension; }
319  HOST DEVICE inline int get_scale() const { return scale; }
320  HOST DEVICE inline int get_output_srid() const { return scale; }
321  HOST DEVICE inline bool get_notnull() const { return notnull; }
323  HOST DEVICE inline int get_comp_param() const { return comp_param; }
324  HOST DEVICE inline int get_size() const { return size; }
325  inline int get_logical_size() const {
328  return ti.get_size();
329  }
330  if (compression == kENCODING_DICT) {
331  return 4;
332  }
333  return get_size();
334  }
335  inline int get_physical_cols() const {
336  switch (type) {
337  case kPOINT:
338  return 1; // coords
339  case kLINESTRING:
340  return 2; // coords, bounds
341  case kPOLYGON:
342  return 4; // coords, ring_sizes, bounds, render_group
343  case kMULTIPOLYGON:
344  return 5; // coords, ring_sizes, poly_rings, bounds, render_group
345  default:
346  break;
347  }
348  return 0;
349  }
350  inline int get_physical_coord_cols() const {
351  // @TODO dmitri/simon rename this function?
352  // It needs to return the number of extra columns
353  // which need to go through the executor, as opposed
354  // to those which are only needed by CPU for poly
355  // cache building or what-not. For now, we just omit
356  // the Render Group column. If we add Bounding Box
357  // or something this may require rethinking. Perhaps
358  // these two functions need to return an array of
359  // offsets rather than just a number to loop over,
360  // so that executor and non-executor columns can
361  // be mixed.
362  // NOTE(adb): In binding to extension functions, we need to know some pretty specific
363  // type info about each of the physical coords cols for each geo type. I added checks
364  // there to ensure the physical coords col for the geo type match what we expect. If
365  // these values are ever changed, corresponding values in
366  // ExtensionFunctionsBinding.cpp::compute_narrowing_conv_scores and
367  // ExtensionFunctionsBinding.cpp::compute_widening_conv_scores will also need to be
368  // changed.
369  switch (type) {
370  case kPOINT:
371  return 1;
372  case kLINESTRING:
373  return 1; // omit bounds
374  case kPOLYGON:
375  return 2; // omit bounds, render group
376  case kMULTIPOLYGON:
377  return 3; // omit bounds, render group
378  default:
379  break;
380  }
381  return 0;
382  }
383  inline bool has_bounds() const {
384  switch (type) {
385  case kLINESTRING:
386  case kPOLYGON:
387  case kMULTIPOLYGON:
388  return true;
389  default:
390  break;
391  }
392  return false;
393  }
394  inline bool has_render_group() const {
395  switch (type) {
396  case kPOLYGON:
397  case kMULTIPOLYGON:
398  return true;
399  default:
400  break;
401  }
402  return false;
403  }
404  HOST DEVICE inline void set_type(SQLTypes t) { type = t; }
405  HOST DEVICE inline void set_subtype(SQLTypes st) { subtype = st; }
406  inline void set_dimension(int d) { dimension = d; }
407  inline void set_precision(int d) { dimension = d; }
408  inline void set_input_srid(int d) { dimension = d; }
409  inline void set_scale(int s) { scale = s; }
410  inline void set_output_srid(int s) { scale = s; }
411  inline void set_notnull(bool n) { notnull = n; }
412  inline void set_size(int s) { size = s; }
413  inline void set_fixed_size() { size = get_storage_size(); }
414  inline void set_compression(EncodingType c) { compression = c; }
415  inline void set_comp_param(int p) { comp_param = p; }
416 #ifndef __CUDACC__
417  inline std::string get_type_name() const {
418  if (IS_GEO(type)) {
419  std::string srid_string = "";
420  if (get_output_srid() > 0) {
421  srid_string = ", " + std::to_string(get_output_srid());
422  }
423  CHECK_LT(static_cast<int>(subtype), kSQLTYPE_LAST);
424  return type_name[static_cast<int>(subtype)] + "(" +
425  type_name[static_cast<int>(type)] + srid_string + ")";
426  }
427  std::string ps = "";
428  if (type == kDECIMAL || type == kNUMERIC) {
429  ps = "(" + std::to_string(dimension) + "," + std::to_string(scale) + ")";
430  } else if (type == kTIMESTAMP) {
431  ps = "(" + std::to_string(dimension) + ")";
432  }
433  if (type == kARRAY) {
434  auto elem_ti = get_elem_type();
435  auto num_elems = (size > 0) ? std::to_string(size / elem_ti.get_size()) : "";
436  CHECK_LT(static_cast<int>(subtype), kSQLTYPE_LAST);
437  return elem_ti.get_type_name() + ps + "[" + num_elems + "]";
438  }
439  if (type == kCOLUMN) {
440  auto elem_ti = get_elem_type();
441  auto num_elems =
442  (size > 0) ? "[" + std::to_string(size / elem_ti.get_size()) + "]" : "";
443  CHECK_LT(static_cast<int>(subtype), kSQLTYPE_LAST);
444  return "COLUMN<" + type_name[static_cast<int>(subtype)] + ps + ">" + num_elems;
445  }
446  if (type == kCOLUMN_LIST) {
447  auto elem_ti = get_elem_type();
448  auto num_elems =
449  (size > 0) ? "[" + std::to_string(size / elem_ti.get_size()) + "]" : "";
450  CHECK_LT(static_cast<int>(subtype), kSQLTYPE_LAST);
451  return "COLUMN_LIST<" + type_name[static_cast<int>(subtype)] + ps + ">" + num_elems;
452  }
453  return type_name[static_cast<int>(type)] + ps;
454  }
455  inline std::string get_compression_name() const { return comp_name[(int)compression]; }
456  inline std::string to_string() const {
457  return concat("(",
458  type_name[static_cast<int>(type)],
459  ", ",
460  get_dimension(),
461  ", ",
462  get_scale(),
463  ", ",
464  get_notnull() ? "not nullable" : "nullable",
465  ", ",
467  ", ",
468  get_comp_param(),
469  ", ",
470  type_name[static_cast<int>(subtype)],
471  ": ",
472  get_size(),
473  ": ",
475  ")");
476  }
477  inline std::string get_buffer_name() const {
478  if (is_array())
479  return "Array";
480  if (is_bytes())
481  return "Bytes";
482  if (is_column())
483  return "Column";
484  assert(false);
485  return "";
486  }
487 #endif
488  inline bool is_string() const { return IS_STRING(type); }
489  inline bool is_string_array() const { return (type == kARRAY) && IS_STRING(subtype); }
490  inline bool is_integer() const { return IS_INTEGER(type); }
491  inline bool is_decimal() const { return type == kDECIMAL || type == kNUMERIC; }
492  inline bool is_fp() const { return type == kFLOAT || type == kDOUBLE; }
493  inline bool is_number() const { return IS_NUMBER(type); }
494  inline bool is_time() const { return is_datetime(type); }
495  inline bool is_boolean() const { return type == kBOOLEAN; }
496  inline bool is_array() const { return type == kARRAY; } // rbc Array
497  inline bool is_varlen_array() const { return type == kARRAY && size <= 0; }
498  inline bool is_fixlen_array() const { return type == kARRAY && size > 0; }
499  inline bool is_timeinterval() const { return IS_INTERVAL(type); }
500  inline bool is_geometry() const { return IS_GEO(type); }
501  inline bool is_column() const { return type == kCOLUMN; } // rbc Column
502  inline bool is_column_list() const { return type == kCOLUMN_LIST; } // rbc ColumnList
503  inline bool is_bytes() const {
504  return type == kTEXT && get_compression() == kENCODING_NONE;
505  } // rbc Bytes
506  inline bool is_buffer() const {
507  return is_array() || is_column() || is_column_list() || is_bytes();
508  }
509  inline bool transforms() const {
510  return IS_GEO(type) && get_output_srid() != get_input_srid();
511  }
512 
513  inline bool is_varlen() const { // TODO: logically this should ignore fixlen arrays
514  return (IS_STRING(type) && compression != kENCODING_DICT) || type == kARRAY ||
515  IS_GEO(type);
516  }
517 
518  // need this here till is_varlen can be fixed w/o negative impact to existing code
519  inline bool is_varlen_indeed() const {
520  // SQLTypeInfo.is_varlen() is broken with fixedlen array now
521  // and seems left broken for some concern, so fix it locally
522  return is_varlen() && !is_fixlen_array();
523  }
524 
525  inline bool is_dict_encoded_string() const {
526  return is_string() && compression == kENCODING_DICT;
527  }
528 
529  inline bool is_dict_encoded_type() const {
530  return is_dict_encoded_string() ||
532  }
533 
534  HOST DEVICE inline bool operator!=(const SQLTypeInfo& rhs) const {
535  return type != rhs.get_type() || subtype != rhs.get_subtype() ||
536  dimension != rhs.get_dimension() || scale != rhs.get_scale() ||
537  compression != rhs.get_compression() ||
540  notnull != rhs.get_notnull();
541  }
542  HOST DEVICE inline bool operator==(const SQLTypeInfo& rhs) const {
543  return type == rhs.get_type() && subtype == rhs.get_subtype() &&
544  dimension == rhs.get_dimension() && scale == rhs.get_scale() &&
545  compression == rhs.get_compression() &&
548  notnull == rhs.get_notnull();
549  }
550 
551  inline int get_array_context_logical_size() const {
552  if (is_string()) {
553  auto comp_type(get_compression());
554  if (comp_type == kENCODING_DICT || comp_type == kENCODING_FIXED ||
555  comp_type == kENCODING_NONE) {
556  return sizeof(int32_t);
557  }
558  }
559  return get_logical_size();
560  }
561 
562  HOST DEVICE inline void operator=(const SQLTypeInfo& rhs) {
563  type = rhs.get_type();
564  subtype = rhs.get_subtype();
565  dimension = rhs.get_dimension();
566  scale = rhs.get_scale();
567  notnull = rhs.get_notnull();
569  comp_param = rhs.get_comp_param();
570  size = rhs.get_size();
571  }
572 
573  inline bool is_castable(const SQLTypeInfo& new_type_info) const {
574  // can always cast between the same type but different precision/scale/encodings
575  if (type == new_type_info.get_type()) {
576  return true;
577  // can always cast from or to string
578  } else if (is_string() || new_type_info.is_string()) {
579  return true;
580  // can cast between numbers
581  } else if (is_number() && new_type_info.is_number()) {
582  return true;
583  // can cast from timestamp or date to number (epoch)
584  } else if ((type == kTIMESTAMP || type == kDATE) && new_type_info.is_number()) {
585  return true;
586  // can cast from date to timestamp
587  } else if (type == kDATE && new_type_info.get_type() == kTIMESTAMP) {
588  return true;
589  } else if (type == kTIMESTAMP && new_type_info.get_type() == kDATE) {
590  return true;
591  } else if (type == kBOOLEAN && new_type_info.is_number()) {
592  return true;
593  } else if (type == kARRAY && new_type_info.get_type() == kARRAY) {
594  return get_elem_type().is_castable(new_type_info.get_elem_type());
595  } else if (type == kCOLUMN && new_type_info.get_type() == kCOLUMN) {
596  return get_elem_type().is_castable(new_type_info.get_elem_type());
597  } else if (type == kCOLUMN_LIST && new_type_info.get_type() == kCOLUMN_LIST) {
598  return get_elem_type().is_castable(new_type_info.get_elem_type());
599  } else {
600  return false;
601  }
602  }
603 
604  HOST DEVICE inline bool is_null(const Datum& d) const {
605  // assuming Datum is always uncompressed
606  switch (type) {
607  case kBOOLEAN:
608  return (int8_t)d.boolval == NULL_BOOLEAN;
609  case kTINYINT:
610  return d.tinyintval == NULL_TINYINT;
611  case kSMALLINT:
612  return d.smallintval == NULL_SMALLINT;
613  case kINT:
614  return d.intval == NULL_INT;
615  case kBIGINT:
616  case kNUMERIC:
617  case kDECIMAL:
618  return d.bigintval == NULL_BIGINT;
619  case kFLOAT:
620  return d.floatval == NULL_FLOAT;
621  case kDOUBLE:
622  return d.doubleval == NULL_DOUBLE;
623  case kTIME:
624  case kTIMESTAMP:
625  case kDATE:
626  return d.bigintval == NULL_BIGINT;
627  case kTEXT:
628  case kVARCHAR:
629  case kCHAR:
630  // @TODO handle null strings
631  break;
632  case kNULLT:
633  return true;
634  case kARRAY:
635  return d.arrayval == NULL || d.arrayval->is_null;
636  default:
637  break;
638  }
639  return false;
640  }
641  HOST DEVICE inline bool is_null(const int8_t* val) const {
642  if (type == kFLOAT) {
643  return *(float*)val == NULL_FLOAT;
644  }
645  if (type == kDOUBLE) {
646  return *(double*)val == NULL_DOUBLE;
647  }
648  // val can be either compressed or uncompressed
649  switch (size) {
650  case 1:
651  return *val == NULL_TINYINT;
652  case 2:
653  return *(int16_t*)val == NULL_SMALLINT;
654  case 4:
655  return *(int32_t*)val == NULL_INT;
656  case 8:
657  return *(int64_t*)val == NULL_BIGINT;
658  case kNULLT:
659  return true;
660  default:
661  // @TODO(wei) handle null strings
662  break;
663  }
664  return false;
665  }
666  HOST DEVICE inline bool is_null_fixlen_array(const int8_t* val, int array_size) const {
667  // Check if fixed length array has a NULL_ARRAY sentinel as the first element
668  if (type == kARRAY && val && array_size > 0 && array_size == size) {
669  // Need to create element type to get the size, but can't call get_elem_type()
670  // since this is a HOST DEVICE function. Going through copy constructor instead.
671  auto elem_ti{*this};
672  elem_ti.set_type(subtype);
673  elem_ti.set_subtype(kNULLT);
674  auto elem_size = elem_ti.get_storage_size();
675  if (elem_size < 1) {
676  return false;
677  }
678  if (subtype == kFLOAT) {
679  return *(float*)val == NULL_ARRAY_FLOAT;
680  }
681  if (subtype == kDOUBLE) {
682  return *(double*)val == NULL_ARRAY_DOUBLE;
683  }
684  switch (elem_size) {
685  case 1:
686  return *val == NULL_ARRAY_TINYINT;
687  case 2:
688  return *(int16_t*)val == NULL_ARRAY_SMALLINT;
689  case 4:
690  return *(int32_t*)val == NULL_ARRAY_INT;
691  case 8:
692  return *(int64_t*)val == NULL_ARRAY_BIGINT;
693  default:
694  return false;
695  }
696  }
697  return false;
698  }
699  HOST DEVICE inline bool is_null_point_coord_array(const int8_t* val,
700  int array_size) const {
701  if (type == kARRAY && subtype == kTINYINT && val && array_size > 0 &&
702  array_size == size) {
703  if (array_size == 2 * sizeof(double)) {
704  return *(double*)val == NULL_ARRAY_DOUBLE;
705  }
706  if (array_size == 2 * sizeof(int32_t)) {
707  return *(uint32_t*)val == NULL_ARRAY_COMPRESSED_32;
708  }
709  }
710  return false;
711  }
712  inline SQLTypeInfo get_elem_type() const {
713  return SQLTypeInfo(
715  }
716  inline SQLTypeInfo get_array_type() const {
718  }
719 
720  inline bool is_date_in_days() const {
721  if (type == kDATE) {
722  const auto comp_type = get_compression();
723  if (comp_type == kENCODING_DATE_IN_DAYS) {
724  return true;
725  }
726  }
727  return false;
728  }
729 
730  inline bool is_date() const { return type == kDATE; }
731 
732  inline bool is_high_precision_timestamp() const {
733  if (type == kTIMESTAMP) {
734  const auto dimension = get_dimension();
735  if (dimension > 0) {
736  return true;
737  }
738  }
739  return false;
740  }
741 
742  inline bool is_timestamp() const { return type == kTIMESTAMP; }
743 
744  private:
745  SQLTypes type; // type id
746  SQLTypes subtype; // element type of arrays or columns
747  int dimension; // VARCHAR/CHAR length or NUMERIC/DECIMAL precision or COLUMN_LIST
748  // length
749  int scale; // NUMERIC/DECIMAL scale
750  bool notnull; // nullable? a hint, not used for type checking
751  EncodingType compression; // compression scheme
752  int comp_param; // compression parameter when applicable for certain schemes
753  int size; // size of the type in bytes. -1 for variable size
754 #ifndef __CUDACC__
755  static std::string type_name[kSQLTYPE_LAST];
756  static std::string comp_name[kENCODING_LAST];
757 #endif
758  HOST DEVICE inline int get_storage_size() const {
759  switch (type) {
760  case kBOOLEAN:
761  return sizeof(int8_t);
762  case kTINYINT:
763  return sizeof(int8_t);
764  case kSMALLINT:
765  switch (compression) {
766  case kENCODING_NONE:
767  return sizeof(int16_t);
768  case kENCODING_FIXED:
769  case kENCODING_SPARSE:
770  return comp_param / 8;
771  case kENCODING_RL:
772  case kENCODING_DIFF:
773  break;
774  default:
775  assert(false);
776  }
777  break;
778  case kINT:
779  switch (compression) {
780  case kENCODING_NONE:
781  return sizeof(int32_t);
782  case kENCODING_FIXED:
783  case kENCODING_SPARSE:
784  return comp_param / 8;
785  case kENCODING_RL:
786  case kENCODING_DIFF:
787  break;
788  default:
789  assert(false);
790  }
791  break;
792  case kBIGINT:
793  case kNUMERIC:
794  case kDECIMAL:
795  switch (compression) {
796  case kENCODING_NONE:
797  return sizeof(int64_t);
798  case kENCODING_FIXED:
799  case kENCODING_SPARSE:
800  return comp_param / 8;
801  case kENCODING_RL:
802  case kENCODING_DIFF:
803  break;
804  default:
805  assert(false);
806  }
807  break;
808  case kFLOAT:
809  switch (compression) {
810  case kENCODING_NONE:
811  return sizeof(float);
812  case kENCODING_FIXED:
813  case kENCODING_RL:
814  case kENCODING_DIFF:
815  case kENCODING_SPARSE:
816  assert(false);
817  break;
818  default:
819  assert(false);
820  }
821  break;
822  case kDOUBLE:
823  switch (compression) {
824  case kENCODING_NONE:
825  return sizeof(double);
826  case kENCODING_FIXED:
827  case kENCODING_RL:
828  case kENCODING_DIFF:
829  case kENCODING_SPARSE:
830  assert(false);
831  break;
832  default:
833  assert(false);
834  }
835  break;
836  case kTIMESTAMP:
837  case kTIME:
838  case kINTERVAL_DAY_TIME:
840  case kDATE:
841  switch (compression) {
842  case kENCODING_NONE:
843  return sizeof(int64_t);
844  case kENCODING_FIXED:
845  if (type == kTIMESTAMP && dimension > 0) {
846  assert(false); // disable compression for timestamp precisions
847  }
848  return comp_param / 8;
849  case kENCODING_RL:
850  case kENCODING_DIFF:
851  case kENCODING_SPARSE:
852  assert(false);
853  break;
855  switch (comp_param) {
856  case 0:
857  return 4; // Default date encoded in days is 32 bits
858  case 16:
859  case 32:
860  return comp_param / 8;
861  default:
862  assert(false);
863  break;
864  }
865  default:
866  assert(false);
867  }
868  break;
869  case kTEXT:
870  case kVARCHAR:
871  case kCHAR:
872  if (compression == kENCODING_DICT) {
873  return sizeof(int32_t); // @TODO(wei) must check DictDescriptor
874  }
875  break;
876  case kARRAY:
877  // TODO: return size for fixlen arrays?
878  break;
879  case kPOINT:
880  case kLINESTRING:
881  case kPOLYGON:
882  case kMULTIPOLYGON:
883  case kCOLUMN:
884  case kCOLUMN_LIST:
885  break;
886  default:
887  break;
888  }
889  return -1;
890  }
891 };
892 
894 
895 #ifndef __CUDACC__
896 #include <string_view>
897 
898 Datum StringToDatum(std::string_view s, SQLTypeInfo& ti);
899 std::string DatumToString(Datum d, const SQLTypeInfo& ti);
900 bool DatumEqual(const Datum, const Datum, const SQLTypeInfo& ti);
901 int64_t convert_decimal_value_to_scale(const int64_t decimal_value,
902  const SQLTypeInfo& type_info,
903  const SQLTypeInfo& new_type_info);
904 #endif
905 
906 #include "../QueryEngine/DateAdd.h"
907 #include "../QueryEngine/DateTruncate.h"
908 #include "../QueryEngine/ExtractFromTime.h"
909 
911  EncodingType encoding = type_info.get_compression();
912  if (encoding == kENCODING_DATE_IN_DAYS ||
913  (encoding == kENCODING_FIXED && type_info.get_type() != kARRAY)) {
914  encoding = kENCODING_NONE;
915  }
916  return SQLTypeInfo(type_info.get_type(),
917  type_info.get_dimension(),
918  type_info.get_scale(),
919  type_info.get_notnull(),
920  encoding,
921  type_info.get_comp_param(),
922  type_info.get_subtype());
923 }
924 
926  SQLTypeInfo nullable_type_info = type_info;
927  nullable_type_info.set_notnull(false);
928  return nullable_type_info;
929 }
930 
932  SQLTypeInfo nullable_type_info = get_logical_type_info(type_info);
933  return get_nullable_type_info(nullable_type_info);
934 }
935 
936 using StringOffsetT = int32_t;
937 using ArrayOffsetT = int32_t;
938 
939 inline int8_t* appendDatum(int8_t* buf, Datum d, const SQLTypeInfo& ti) {
940  switch (ti.get_type()) {
941  case kBOOLEAN:
942  *(bool*)buf = d.boolval;
943  return buf + sizeof(bool);
944  case kNUMERIC:
945  case kDECIMAL:
946  case kBIGINT:
947  *(int64_t*)buf = d.bigintval;
948  return buf + sizeof(int64_t);
949  case kINT:
950  *(int32_t*)buf = d.intval;
951  return buf + sizeof(int32_t);
952  case kSMALLINT:
953  *(int16_t*)buf = d.smallintval;
954  return buf + sizeof(int16_t);
955  case kTINYINT:
956  *(int8_t*)buf = d.tinyintval;
957  return buf + sizeof(int8_t);
958  case kFLOAT:
959  *(float*)buf = d.floatval;
960  return buf + sizeof(float);
961  case kDOUBLE:
962  *(double*)buf = d.doubleval;
963  return buf + sizeof(double);
964  case kTIME:
965  case kTIMESTAMP:
966  case kDATE:
967  *reinterpret_cast<int64_t*>(buf) = d.bigintval;
968  return buf + sizeof(int64_t);
969  default:
970  return nullptr;
971  }
972 }
973 
974 inline auto generate_array_type(const SQLTypes subtype) {
975  auto ti = SQLTypeInfo(kARRAY, false);
976  ti.set_subtype(subtype);
977  return ti;
978 }
979 
980 inline auto generate_column_type(const SQLTypes subtype) {
981  auto ti = SQLTypeInfo(kCOLUMN, false);
982  ti.set_subtype(subtype);
983  return ti;
984 }
985 
986 inline auto generate_column_list_type(const SQLTypes subtype) {
987  auto ti = SQLTypeInfo(kCOLUMN_LIST, false);
988  ti.set_subtype(subtype);
989  return ti;
990 }
int8_t tinyintval
Definition: sqltypes.h:206
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:315
void set_compression(EncodingType c)
Definition: sqltypes.h:414
void set_size(int s)
Definition: sqltypes.h:412
#define NULL_DOUBLE
HOST DEVICE int get_size() const
Definition: sqltypes.h:324
HOST DEVICE void operator=(const SQLTypeInfo &rhs)
Definition: sqltypes.h:562
std::string DatumToString(Datum d, const SQLTypeInfo &ti)
Definition: Datum.cpp:356
bool is_varlen_array() const
Definition: sqltypes.h:497
DEVICE VarlenDatum()
Definition: sqltypes.h:149
std::string toString(const ExtArgumentType &sig_type)
Definition: sqltypes.h:48
DEVICE constexpr bool is_cuda_compiler()
Definition: sqltypes.h:193
SQLTypes
Definition: sqltypes.h:37
std::vector< std::string > * stringsPtr
Definition: sqltypes.h:221
bool is_timestamp() const
Definition: sqltypes.h:742
std::vector< ArrayDatum > * arraysPtr
Definition: sqltypes.h:222
#define NULL_ARRAY_INT
tuple d
Definition: test_fsi.py:9
#define NULL_FLOAT
bool is_null
Definition: sqltypes.h:147
#define NULL_BIGINT
SQLTypeInfo get_nullable_logical_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:931
#define LOG(tag)
Definition: Logger.h:188
HOST DEVICE bool operator==(const SQLTypeInfo &rhs) const
Definition: sqltypes.h:542
bool boolval
Definition: sqltypes.h:205
bool is_fp() const
Definition: sqltypes.h:492
HOST DEVICE int get_scale() const
Definition: sqltypes.h:319
bool is_varlen() const
Definition: sqltypes.h:513
#define NULL_ARRAY_SMALLINT
auto generate_column_type(const SQLTypes subtype)
Definition: sqltypes.h:980
std::string get_compression_name() const
Definition: sqltypes.h:455
std::string concat(Types &&...parms)
VarlenDatum * arrayval
Definition: sqltypes.h:212
HOST DEVICE void set_subtype(SQLTypes st)
Definition: sqltypes.h:405
SQLTypeInfo(SQLTypes t, int d, int s)
Definition: sqltypes.h:284
SQLTypeInfo get_logical_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:910
Definition: sqltypes.h:64
#define NULL_ARRAY_TINYINT
HOST DEVICE bool is_null_fixlen_array(const int8_t *val, int array_size) const
Definition: sqltypes.h:666
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:314
bool is_number() const
Definition: sqltypes.h:493
int32_t intval
Definition: sqltypes.h:208
bool is_time() const
Definition: sqltypes.h:494
std::string to_string(char const *&&v)
HostArrayDatum(size_t const l, int8_t *p, bool const n, CUSTOM_DELETER custom_deleter)
Definition: sqltypes.h:183
int8_t * pointer
Definition: sqltypes.h:146
#define NULL_INT
int32_t StringOffsetT
Definition: sqltypes.h:936
bool has_render_group() const
Definition: sqltypes.h:394
#define DEVICE
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:202
#define HOST
void set_input_srid(int d)
Definition: sqltypes.h:408
float floatval
Definition: sqltypes.h:210
std::string to_string() const
Definition: sqltypes.h:456
EncodingType
Definition: sqltypes.h:227
int get_physical_cols() const
Definition: sqltypes.h:335
bool is_fixlen_array() const
Definition: sqltypes.h:498
bool is_castable(const SQLTypeInfo &new_type_info) const
Definition: sqltypes.h:573
#define IS_INTERVAL(T)
Definition: sqltypes.h:247
void set_fixed_size()
Definition: sqltypes.h:413
std::shared_ptr< int8_t > ManagedPtr
Definition: sqltypes.h:164
HOST DEVICE bool operator!=(const SQLTypeInfo &rhs) const
Definition: sqltypes.h:534
int get_logical_size() const
Definition: sqltypes.h:325
bool DatumEqual(const Datum a, const Datum b, const SQLTypeInfo &ti)
Definition: Datum.cpp:306
static std::string type_name[kSQLTYPE_LAST]
Definition: sqltypes.h:755
bool is_integer() const
Definition: sqltypes.h:490
VarlenDatum(const size_t l, int8_t *p, const bool n)
Definition: sqltypes.h:152
#define NULL_ARRAY_COMPRESSED_32
SQLTypes subtype
Definition: sqltypes.h:746
void set_scale(int s)
Definition: sqltypes.h:409
bool notnull
Definition: sqltypes.h:750
bool has_bounds() const
Definition: sqltypes.h:383
int64_t bigintval
Definition: sqltypes.h:209
HostArrayDatum(size_t const l, int8_t *p, CUSTOM_DELETER custom_deleter)
Definition: sqltypes.h:177
bool is_timeinterval() const
Definition: sqltypes.h:499
#define NULL_ARRAY_FLOAT
ManagedPtr data_ptr
Definition: sqltypes.h:186
auto generate_column_list_type(const SQLTypes subtype)
Definition: sqltypes.h:986
HostArrayDatum()=default
int16_t smallintval
Definition: sqltypes.h:207
bool is_dict_encoded_type() const
Definition: sqltypes.h:529
SQLTypeInfo(SQLTypes t, int d, int s, bool n)
Definition: sqltypes.h:275
Datum StringToDatum(std::string_view s, SQLTypeInfo &ti)
Definition: Datum.cpp:239
bool is_boolean() const
Definition: sqltypes.h:495
HostArrayDatum(size_t const l, int8_t *p, bool const n)
Definition: sqltypes.h:171
bool g_enable_smem_group_by true
void operator()(int8_t *p)
Definition: sqltypes.h:160
SQLTypeInfo(SQLTypes t, int d, int s, bool n, EncodingType c, int p, SQLTypes st)
Definition: sqltypes.h:266
SQLTypeInfo(SQLTypes t)
Definition: sqltypes.h:294
#define NULL_BOOLEAN
std::string get_buffer_name() const
Definition: sqltypes.h:477
SQLTypeInfo(SQLTypes t, bool n, EncodingType c)
Definition: sqltypes.h:295
SQLTypeInfo get_array_type() const
Definition: sqltypes.h:716
EncodingType compression
Definition: sqltypes.h:751
int get_precision() const
Definition: sqltypes.h:317
std::string * stringval
Definition: sqltypes.h:214
void set_output_srid(int s)
Definition: sqltypes.h:410
bool is_buffer() const
Definition: sqltypes.h:506
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:419
auto generate_array_type(const SQLTypes subtype)
Definition: sqltypes.h:974
bool is_column() const
Definition: sqltypes.h:501
DEVICE DeviceArrayDatum()
Definition: sqltypes.h:190
HOST DEVICE bool is_null(const Datum &d) const
Definition: sqltypes.h:604
void set_comp_param(int p)
Definition: sqltypes.h:415
HOST DEVICE int get_storage_size() const
Definition: sqltypes.h:758
#define CHECK_LT(x, y)
Definition: Logger.h:207
Definition: sqltypes.h:51
Definition: sqltypes.h:52
static std::string comp_name[kENCODING_LAST]
Definition: sqltypes.h:756
int8_t * appendDatum(int8_t *buf, Datum d, const SQLTypeInfo &ti)
Definition: sqltypes.h:939
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:322
bool is_date_in_days() const
Definition: sqltypes.h:720
int get_array_context_logical_size() const
Definition: sqltypes.h:551
int64_t convert_decimal_value_to_scale(const int64_t decimal_value, const SQLTypeInfo &type_info, const SQLTypeInfo &new_type_info)
Definition: Datum.cpp:435
int32_t ArrayOffsetT
Definition: sqltypes.h:937
void set_dimension(int d)
Definition: sqltypes.h:406
HOST DEVICE int get_dimension() const
Definition: sqltypes.h:316
#define IS_INTEGER(T)
Definition: sqltypes.h:239
std::string get_type_name() const
Definition: sqltypes.h:417
Definition: sqltypes.h:40
#define IS_STRING(T)
Definition: sqltypes.h:244
HOST DEVICE int get_comp_param() const
Definition: sqltypes.h:323
HOST DEVICE int get_input_srid() const
Definition: sqltypes.h:318
#define NULL_TINYINT
#define NULL_ARRAY_DOUBLE
virtual DEVICE ~VarlenDatum()
Definition: sqltypes.h:150
bool is_bytes() const
Definition: sqltypes.h:503
bool is_column_list() const
Definition: sqltypes.h:502
bool g_enable_watchdog false
Definition: Execute.cpp:76
void set_notnull(bool n)
Definition: sqltypes.h:411
bool is_geometry() const
Definition: sqltypes.h:500
char * t
bool is_high_precision_timestamp() const
Definition: sqltypes.h:732
SQLTypes type
Definition: sqltypes.h:745
#define NULL_SMALLINT
HostArrayDatum(size_t const l, ManagedPtr p, bool const n)
Definition: sqltypes.h:168
#define NULL_ARRAY_BIGINT
bool is_dict_encoded_string() const
Definition: sqltypes.h:525
Definition: sqltypes.h:44
bool is_varlen_indeed() const
Definition: sqltypes.h:519
bool is_string() const
Definition: sqltypes.h:488
bool transforms() const
Definition: sqltypes.h:509
SQLTypeInfo(SQLTypes t, bool n)
Definition: sqltypes.h:285
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:321
int8_t * numbersPtr
Definition: sqltypes.h:220
bool is_string_array() const
Definition: sqltypes.h:489
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:712
bool is_decimal() const
Definition: sqltypes.h:491
int get_physical_coord_cols() const
Definition: sqltypes.h:350
#define IS_NUMBER(T)
Definition: sqltypes.h:241
void operator()(int8_t *)
Definition: sqltypes.h:157
#define IS_GEO(T)
Definition: sqltypes.h:245
#define TRANSIENT_DICT(ID)
Definition: sqltypes.h:254
int comp_param
Definition: sqltypes.h:752
bool is_date() const
Definition: sqltypes.h:730
bool is_array() const
Definition: sqltypes.h:496
void set_precision(int d)
Definition: sqltypes.h:407
SQLTypeInfo get_nullable_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:925
int dimension
Definition: sqltypes.h:747
HOST DEVICE bool is_null_point_coord_array(const int8_t *val, int array_size) const
Definition: sqltypes.h:699
double doubleval
Definition: sqltypes.h:211
HOST DEVICE int get_output_srid() const
Definition: sqltypes.h:320
constexpr auto is_datetime(SQLTypes type)
Definition: sqltypes.h:257
HOST DEVICE bool is_null(const int8_t *val) const
Definition: sqltypes.h:641
size_t length
Definition: sqltypes.h:145
HOST DEVICE void set_type(SQLTypes t)
Definition: sqltypes.h:404