OmniSciDB  8fa3bf436f
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
sqltypes.h
Go to the documentation of this file.
1 /*
2  * Copyright 2020 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
23 #pragma once
24 
25 #include "../Logger/Logger.h"
26 #include "StringTransform.h"
27 #include "funcannotations.h"
28 
29 #include <cassert>
30 #include <ctime>
31 #include <memory>
32 #include <string>
33 #include <type_traits>
34 #include <vector>
35 
36 // must not change because these values persist in catalogs.
37 enum SQLTypes {
38  kNULLT = 0, // type for null values
39  kBOOLEAN = 1,
40  kCHAR = 2,
41  kVARCHAR = 3,
42  kNUMERIC = 4,
43  kDECIMAL = 5,
44  kINT = 6,
45  kSMALLINT = 7,
46  kFLOAT = 8,
47  kDOUBLE = 9,
48  kTIME = 10,
49  kTIMESTAMP = 11,
50  kBIGINT = 12,
51  kTEXT = 13,
52  kDATE = 14,
53  kARRAY = 15,
56  kPOINT = 18,
58  kPOLYGON = 20,
60  kTINYINT = 22,
61  kGEOMETRY = 23,
62  kGEOGRAPHY = 24,
63  kEVAL_CONTEXT_TYPE = 25, // Placeholder Type for ANY
64  kVOID = 26,
65  kCURSOR = 27,
66  kCOLUMN = 28,
69 };
70 
71 #ifndef __CUDACC__
72 
73 inline std::string toString(const SQLTypes& type) {
74  switch (type) {
75  case kNULLT:
76  return "NULL";
77  case kBOOLEAN:
78  return "BOOL";
79  case kCHAR:
80  return "CHAR";
81  case kVARCHAR:
82  return "VARCHAR";
83  case kNUMERIC:
84  return "NUMERIC";
85  case kDECIMAL:
86  return "DECIMAL";
87  case kINT:
88  return "INT";
89  case kSMALLINT:
90  return "SMALLINT";
91  case kFLOAT:
92  return "FLOAT";
93  case kDOUBLE:
94  return "DOUBLE";
95  case kTIME:
96  return "TIME";
97  case kTIMESTAMP:
98  return "TIMESTAMP";
99  case kBIGINT:
100  return "BIGINT";
101  case kTEXT:
102  return "TEXT";
103  case kDATE:
104  return "DATE";
105  case kARRAY:
106  return "ARRAY";
107  case kINTERVAL_DAY_TIME:
108  return "DAY TIME INTERVAL";
110  return "YEAR MONTH INTERVAL";
111  case kPOINT:
112  return "POINT";
113  case kLINESTRING:
114  return "LINESTRING";
115  case kPOLYGON:
116  return "POLYGON";
117  case kMULTIPOLYGON:
118  return "MULTIPOLYGON";
119  case kTINYINT:
120  return "TINYINT";
121  case kGEOMETRY:
122  return "GEOMETRY";
123  case kGEOGRAPHY:
124  return "GEOGRAPHY";
125  case kEVAL_CONTEXT_TYPE:
126  return "UNEVALUATED ANY";
127  case kVOID:
128  return "VOID";
129  case kCURSOR:
130  return "CURSOR";
131  case kCOLUMN:
132  return "COLUMN";
133  case kCOLUMN_LIST:
134  return "COLUMN_LIST";
135  case kSQLTYPE_LAST:
136  break;
137  }
138  LOG(FATAL) << "Invalid SQL type: " << type;
139  return "";
140 }
141 
142 #endif
143 
144 struct VarlenDatum {
145  size_t length;
146  int8_t* pointer;
147  bool is_null;
148 
149  DEVICE VarlenDatum() : length(0), pointer(nullptr), is_null(true) {}
150  DEVICE virtual ~VarlenDatum() {}
151 
152  VarlenDatum(const size_t l, int8_t* p, const bool n)
153  : length(l), pointer(p), is_null(n) {}
154 };
155 
157  void operator()(int8_t*) {}
158 };
159 struct FreeDeleter {
160  void operator()(int8_t* p) { free(p); }
161 };
162 
163 struct HostArrayDatum : public VarlenDatum {
164  using ManagedPtr = std::shared_ptr<int8_t>;
165 
166  HostArrayDatum() = default;
167 
168  HostArrayDatum(size_t const l, ManagedPtr p, bool const n)
169  : VarlenDatum(l, p.get(), n), data_ptr(p) {}
170 
171  HostArrayDatum(size_t const l, int8_t* p, bool const n)
172  : VarlenDatum(l, p, n), data_ptr(p, FreeDeleter()){};
173 
174  template <typename CUSTOM_DELETER,
175  typename = std::enable_if_t<
176  std::is_void<std::result_of_t<CUSTOM_DELETER(int8_t*)> >::value> >
177  HostArrayDatum(size_t const l, int8_t* p, CUSTOM_DELETER custom_deleter)
178  : VarlenDatum(l, p, 0 == l), data_ptr(p, custom_deleter) {}
179 
180  template <typename CUSTOM_DELETER,
181  typename = std::enable_if_t<
182  std::is_void<std::result_of_t<CUSTOM_DELETER(int8_t*)> >::value> >
183  HostArrayDatum(size_t const l, int8_t* p, bool const n, CUSTOM_DELETER custom_deleter)
184  : VarlenDatum(l, p, n), data_ptr(p, custom_deleter) {}
185 
187 };
188 
189 struct DeviceArrayDatum : public VarlenDatum {
191 };
192 
193 inline DEVICE constexpr bool is_cuda_compiler() {
194 #ifdef __CUDACC__
195  return true;
196 #else
197  return false;
198 #endif
199 }
200 
201 using ArrayDatum =
202  std::conditional_t<is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum>;
203 
204 union Datum {
205  bool boolval;
206  int8_t tinyintval;
207  int16_t smallintval;
208  int32_t intval;
209  int64_t bigintval;
210  float floatval;
211  double doubleval;
213 #ifndef __CUDACC__
214  std::string* stringval; // string value
215 #endif
216 };
217 
218 #ifndef __CUDACC__
220  int8_t* numbersPtr;
221  std::vector<std::string>* stringsPtr;
222  std::vector<ArrayDatum>* arraysPtr;
223 };
224 #endif
225 
226 // must not change because these values persist in catalogs.
228  kENCODING_NONE = 0, // no encoding
229  kENCODING_FIXED = 1, // Fixed-bit encoding
230  kENCODING_RL = 2, // Run Length encoding
231  kENCODING_DIFF = 3, // Differential encoding
232  kENCODING_DICT = 4, // Dictionary encoding
233  kENCODING_SPARSE = 5, // Null encoding for sparse columns
234  kENCODING_GEOINT = 6, // Encoding coordinates as intergers
235  kENCODING_DATE_IN_DAYS = 7, // Date encoding in days
237 };
238 
239 #define IS_INTEGER(T) \
240  (((T) == kINT) || ((T) == kSMALLINT) || ((T) == kBIGINT) || ((T) == kTINYINT))
241 #define IS_NUMBER(T) \
242  (((T) == kINT) || ((T) == kSMALLINT) || ((T) == kDOUBLE) || ((T) == kFLOAT) || \
243  ((T) == kBIGINT) || ((T) == kNUMERIC) || ((T) == kDECIMAL) || ((T) == kTINYINT))
244 #define IS_STRING(T) (((T) == kTEXT) || ((T) == kVARCHAR) || ((T) == kCHAR))
245 #define IS_GEO(T) \
246  (((T) == kPOINT) || ((T) == kLINESTRING) || ((T) == kPOLYGON) || ((T) == kMULTIPOLYGON))
247 #define IS_INTERVAL(T) ((T) == kINTERVAL_DAY_TIME || (T) == kINTERVAL_YEAR_MONTH)
248 #define IS_DECIMAL(T) ((T) == kNUMERIC || (T) == kDECIMAL)
249 #define IS_GEO_POLY(T) (((T) == kPOLYGON) || ((T) == kMULTIPOLYGON))
250 
251 #include "InlineNullValues.h"
252 
253 #define TRANSIENT_DICT_ID 0
254 #define TRANSIENT_DICT(ID) (-(ID))
255 #define REGULAR_DICT(TRANSIENTID) (-(TRANSIENTID))
256 
257 constexpr auto is_datetime(SQLTypes type) {
258  return type == kTIME || type == kTIMESTAMP || type == kDATE;
259 }
260 
261 // @type SQLTypeInfo
262 // @brief a structure to capture all type information including
263 // length, precision, scale, etc.
264 class SQLTypeInfo {
265  public:
266  SQLTypeInfo(SQLTypes t, int d, int s, bool n, EncodingType c, int p, SQLTypes st)
267  : type(t)
268  , subtype(st)
269  , dimension(d)
270  , scale(s)
271  , notnull(n)
272  , compression(c)
273  , comp_param(p)
274  , size(get_storage_size()) {}
275  SQLTypeInfo(SQLTypes t, int d, int s, bool n)
276  : type(t)
277  , subtype(kNULLT)
278  , dimension(d)
279  , scale(s)
280  , notnull(n)
282  , comp_param(0)
283  , size(get_storage_size()) {}
284  SQLTypeInfo(SQLTypes t, int d, int s) : SQLTypeInfo(t, d, s, false) {}
286  : type(t)
287  , subtype(kNULLT)
288  , dimension(0)
289  , scale(0)
290  , notnull(n)
292  , comp_param(0)
293  , size(get_storage_size()) {}
296  : type(t)
297  , subtype(kNULLT)
298  , dimension(0)
299  , scale(0)
300  , notnull(n)
301  , compression(c)
302  , comp_param(0)
303  , size(get_storage_size()) {}
305  : type(kNULLT)
306  , subtype(kNULLT)
307  , dimension(0)
308  , scale(0)
309  , notnull(false)
311  , comp_param(0)
312  , size(0) {}
313 
314  HOST DEVICE inline SQLTypes get_type() const { return type; }
315  HOST DEVICE inline SQLTypes get_subtype() const { return subtype; }
316  HOST DEVICE inline int get_dimension() const { return dimension; }
317  inline int get_precision() const { return dimension; }
318  HOST DEVICE inline int get_input_srid() const { return dimension; }
319  HOST DEVICE inline int get_scale() const { return scale; }
320  HOST DEVICE inline int get_output_srid() const { return scale; }
321  HOST DEVICE inline bool get_notnull() const { return notnull; }
323  HOST DEVICE inline int get_comp_param() const { return comp_param; }
324  HOST DEVICE inline int get_size() const { return size; }
325  inline int get_logical_size() const {
328  return ti.get_size();
329  }
330  if (compression == kENCODING_DICT) {
331  return 4;
332  }
333  return get_size();
334  }
335  inline int get_physical_cols() const {
336  switch (type) {
337  case kPOINT:
338  return 1; // coords
339  case kLINESTRING:
340  return 2; // coords, bounds
341  case kPOLYGON:
342  return 4; // coords, ring_sizes, bounds, render_group
343  case kMULTIPOLYGON:
344  return 5; // coords, ring_sizes, poly_rings, bounds, render_group
345  default:
346  break;
347  }
348  return 0;
349  }
350  inline int get_physical_coord_cols() const {
351  // @TODO dmitri/simon rename this function?
352  // It needs to return the number of extra columns
353  // which need to go through the executor, as opposed
354  // to those which are only needed by CPU for poly
355  // cache building or what-not. For now, we just omit
356  // the Render Group column. If we add Bounding Box
357  // or something this may require rethinking. Perhaps
358  // these two functions need to return an array of
359  // offsets rather than just a number to loop over,
360  // so that executor and non-executor columns can
361  // be mixed.
362  // NOTE(adb): In binding to extension functions, we need to know some pretty specific
363  // type info about each of the physical coords cols for each geo type. I added checks
364  // there to ensure the physical coords col for the geo type match what we expect. If
365  // these values are ever changed, corresponding values in
366  // ExtensionFunctionsBinding.cpp::compute_narrowing_conv_scores and
367  // ExtensionFunctionsBinding.cpp::compute_widening_conv_scores will also need to be
368  // changed.
369  switch (type) {
370  case kPOINT:
371  return 1;
372  case kLINESTRING:
373  return 1; // omit bounds
374  case kPOLYGON:
375  return 2; // omit bounds, render group
376  case kMULTIPOLYGON:
377  return 3; // omit bounds, render group
378  default:
379  break;
380  }
381  return 0;
382  }
383  inline bool has_bounds() const {
384  switch (type) {
385  case kLINESTRING:
386  case kPOLYGON:
387  case kMULTIPOLYGON:
388  return true;
389  default:
390  break;
391  }
392  return false;
393  }
394  inline bool has_render_group() const {
395  switch (type) {
396  case kPOLYGON:
397  case kMULTIPOLYGON:
398  return true;
399  default:
400  break;
401  }
402  return false;
403  }
404  HOST DEVICE inline void set_type(SQLTypes t) { type = t; }
405  HOST DEVICE inline void set_subtype(SQLTypes st) { subtype = st; }
406  inline void set_dimension(int d) { dimension = d; }
407  inline void set_precision(int d) { dimension = d; }
408  inline void set_input_srid(int d) { dimension = d; }
409  inline void set_scale(int s) { scale = s; }
410  inline void set_output_srid(int s) { scale = s; }
411  inline void set_notnull(bool n) { notnull = n; }
412  inline void set_size(int s) { size = s; }
413  inline void set_fixed_size() { size = get_storage_size(); }
414  inline void set_compression(EncodingType c) { compression = c; }
415  inline void set_comp_param(int p) { comp_param = p; }
416 #ifndef __CUDACC__
417  inline std::string get_type_name() const {
418  if (IS_GEO(type)) {
419  std::string srid_string = "";
420  if (get_output_srid() > 0) {
421  srid_string = ", " + std::to_string(get_output_srid());
422  }
423  CHECK_LT(static_cast<int>(subtype), kSQLTYPE_LAST);
424  return type_name[static_cast<int>(subtype)] + "(" +
425  type_name[static_cast<int>(type)] + srid_string + ")";
426  }
427  std::string ps = "";
428  if (type == kDECIMAL || type == kNUMERIC) {
429  ps = "(" + std::to_string(dimension) + "," + std::to_string(scale) + ")";
430  } else if (type == kTIMESTAMP) {
431  ps = "(" + std::to_string(dimension) + ")";
432  }
433  if (type == kARRAY) {
434  auto elem_ti = get_elem_type();
435  auto num_elems = (size > 0) ? std::to_string(size / elem_ti.get_size()) : "";
436  CHECK_LT(static_cast<int>(subtype), kSQLTYPE_LAST);
437  return elem_ti.get_type_name() + ps + "[" + num_elems + "]";
438  }
439  if (type == kCOLUMN) {
440  auto elem_ti = get_elem_type();
441  auto num_elems =
442  (size > 0) ? "[" + std::to_string(size / elem_ti.get_size()) + "]" : "";
443  CHECK_LT(static_cast<int>(subtype), kSQLTYPE_LAST);
444  return "COLUMN<" + type_name[static_cast<int>(subtype)] + ps + ">" + num_elems;
445  }
446  if (type == kCOLUMN_LIST) {
447  auto elem_ti = get_elem_type();
448  auto num_elems =
449  (size > 0) ? "[" + std::to_string(size / elem_ti.get_size()) + "]" : "";
450  CHECK_LT(static_cast<int>(subtype), kSQLTYPE_LAST);
451  return "COLUMN_LIST<" + type_name[static_cast<int>(subtype)] + ps + ">" + num_elems;
452  }
453  return type_name[static_cast<int>(type)] + ps;
454  }
455  inline std::string get_compression_name() const { return comp_name[(int)compression]; }
456  std::string toString() const { return to_string(); } // for PRINT macro
457  inline std::string to_string() const {
458  return concat("(type=",
459  type_name[static_cast<int>(type)],
460  ", dimension=",
461  get_dimension(),
462  ", scale=",
463  get_scale(),
464  ", null=",
465  get_notnull() ? "not nullable" : "nullable",
466  ", name=",
468  ", comp=",
469  get_comp_param(),
470  ", subtype=",
471  type_name[static_cast<int>(subtype)],
472  ", size=",
473  get_size(),
474  ", element_size=",
476  ")");
477  }
478  inline std::string get_buffer_name() const {
479  if (is_array())
480  return "Array";
481  if (is_bytes())
482  return "Bytes";
483  if (is_column())
484  return "Column";
485  assert(false);
486  return "";
487  }
488 #endif
489  inline bool is_string() const { return IS_STRING(type); }
490  inline bool is_string_array() const { return (type == kARRAY) && IS_STRING(subtype); }
491  inline bool is_integer() const { return IS_INTEGER(type); }
492  inline bool is_decimal() const { return type == kDECIMAL || type == kNUMERIC; }
493  inline bool is_fp() const { return type == kFLOAT || type == kDOUBLE; }
494  inline bool is_number() const { return IS_NUMBER(type); }
495  inline bool is_time() const { return is_datetime(type); }
496  inline bool is_boolean() const { return type == kBOOLEAN; }
497  inline bool is_array() const { return type == kARRAY; } // rbc Array
498  inline bool is_varlen_array() const { return type == kARRAY && size <= 0; }
499  inline bool is_fixlen_array() const { return type == kARRAY && size > 0; }
500  inline bool is_timeinterval() const { return IS_INTERVAL(type); }
501  inline bool is_geometry() const { return IS_GEO(type); }
502  inline bool is_column() const { return type == kCOLUMN; } // rbc Column
503  inline bool is_column_list() const { return type == kCOLUMN_LIST; } // rbc ColumnList
504  inline bool is_bytes() const {
505  return type == kTEXT && get_compression() == kENCODING_NONE;
506  } // rbc Bytes
507  inline bool is_buffer() const {
508  return is_array() || is_column() || is_column_list() || is_bytes();
509  }
510  inline bool transforms() const {
511  return IS_GEO(type) && get_output_srid() != get_input_srid();
512  }
513 
514  inline bool is_varlen() const { // TODO: logically this should ignore fixlen arrays
515  return (IS_STRING(type) && compression != kENCODING_DICT) || type == kARRAY ||
516  IS_GEO(type);
517  }
518 
519  // need this here till is_varlen can be fixed w/o negative impact to existing code
520  inline bool is_varlen_indeed() const {
521  // SQLTypeInfo.is_varlen() is broken with fixedlen array now
522  // and seems left broken for some concern, so fix it locally
523  return is_varlen() && !is_fixlen_array();
524  }
525 
526  inline bool is_dict_encoded_string() const {
527  return is_string() && compression == kENCODING_DICT;
528  }
529 
530  inline bool is_dict_encoded_type() const {
531  return is_dict_encoded_string() ||
533  }
534 
535  HOST DEVICE inline bool operator!=(const SQLTypeInfo& rhs) const {
536  return type != rhs.get_type() || subtype != rhs.get_subtype() ||
537  dimension != rhs.get_dimension() || scale != rhs.get_scale() ||
538  compression != rhs.get_compression() ||
541  notnull != rhs.get_notnull();
542  }
543  HOST DEVICE inline bool operator==(const SQLTypeInfo& rhs) const {
544  return type == rhs.get_type() && subtype == rhs.get_subtype() &&
545  dimension == rhs.get_dimension() && scale == rhs.get_scale() &&
546  compression == rhs.get_compression() &&
549  notnull == rhs.get_notnull();
550  }
551 
552  inline int get_array_context_logical_size() const {
553  if (is_string()) {
554  auto comp_type(get_compression());
555  if (comp_type == kENCODING_DICT || comp_type == kENCODING_FIXED ||
556  comp_type == kENCODING_NONE) {
557  return sizeof(int32_t);
558  }
559  }
560  return get_logical_size();
561  }
562 
563  HOST DEVICE inline void operator=(const SQLTypeInfo& rhs) {
564  type = rhs.get_type();
565  subtype = rhs.get_subtype();
566  dimension = rhs.get_dimension();
567  scale = rhs.get_scale();
568  notnull = rhs.get_notnull();
570  comp_param = rhs.get_comp_param();
571  size = rhs.get_size();
572  }
573 
574  inline bool is_castable(const SQLTypeInfo& new_type_info) const {
575  // can always cast between the same type but different precision/scale/encodings
576  if (type == new_type_info.get_type()) {
577  return true;
578  // can always cast from or to string
579  } else if (is_string() || new_type_info.is_string()) {
580  return true;
581  // can cast between numbers
582  } else if (is_number() && new_type_info.is_number()) {
583  return true;
584  // can cast from timestamp or date to number (epoch)
585  } else if ((type == kTIMESTAMP || type == kDATE) && new_type_info.is_number()) {
586  return true;
587  // can cast from date to timestamp
588  } else if (type == kDATE && new_type_info.get_type() == kTIMESTAMP) {
589  return true;
590  } else if (type == kTIMESTAMP && new_type_info.get_type() == kDATE) {
591  return true;
592  } else if (type == kBOOLEAN && new_type_info.is_number()) {
593  return true;
594  } else if (type == kARRAY && new_type_info.get_type() == kARRAY) {
595  return get_elem_type().is_castable(new_type_info.get_elem_type());
596  } else if (type == kCOLUMN && new_type_info.get_type() == kCOLUMN) {
597  return get_elem_type().is_castable(new_type_info.get_elem_type());
598  } else if (type == kCOLUMN_LIST && new_type_info.get_type() == kCOLUMN_LIST) {
599  return get_elem_type().is_castable(new_type_info.get_elem_type());
600  } else {
601  return false;
602  }
603  }
604 
605  HOST DEVICE inline bool is_null(const Datum& d) const {
606  // assuming Datum is always uncompressed
607  switch (type) {
608  case kBOOLEAN:
609  return (int8_t)d.boolval == NULL_BOOLEAN;
610  case kTINYINT:
611  return d.tinyintval == NULL_TINYINT;
612  case kSMALLINT:
613  return d.smallintval == NULL_SMALLINT;
614  case kINT:
615  return d.intval == NULL_INT;
616  case kBIGINT:
617  case kNUMERIC:
618  case kDECIMAL:
619  return d.bigintval == NULL_BIGINT;
620  case kFLOAT:
621  return d.floatval == NULL_FLOAT;
622  case kDOUBLE:
623  return d.doubleval == NULL_DOUBLE;
624  case kTIME:
625  case kTIMESTAMP:
626  case kDATE:
627  return d.bigintval == NULL_BIGINT;
628  case kTEXT:
629  case kVARCHAR:
630  case kCHAR:
631  // @TODO handle null strings
632  break;
633  case kNULLT:
634  return true;
635  case kARRAY:
636  return d.arrayval == NULL || d.arrayval->is_null;
637  default:
638  break;
639  }
640  return false;
641  }
642  HOST DEVICE inline bool is_null(const int8_t* val) const {
643  if (type == kFLOAT) {
644  return *(float*)val == NULL_FLOAT;
645  }
646  if (type == kDOUBLE) {
647  return *(double*)val == NULL_DOUBLE;
648  }
649  // val can be either compressed or uncompressed
650  switch (size) {
651  case 1:
652  return *val == NULL_TINYINT;
653  case 2:
654  return *(int16_t*)val == NULL_SMALLINT;
655  case 4:
656  return *(int32_t*)val == NULL_INT;
657  case 8:
658  return *(int64_t*)val == NULL_BIGINT;
659  case kNULLT:
660  return true;
661  default:
662  // @TODO(wei) handle null strings
663  break;
664  }
665  return false;
666  }
667  HOST DEVICE inline bool is_null_fixlen_array(const int8_t* val, int array_size) const {
668  // Check if fixed length array has a NULL_ARRAY sentinel as the first element
669  if (type == kARRAY && val && array_size > 0 && array_size == size) {
670  // Need to create element type to get the size, but can't call get_elem_type()
671  // since this is a HOST DEVICE function. Going through copy constructor instead.
672  auto elem_ti{*this};
673  elem_ti.set_type(subtype);
674  elem_ti.set_subtype(kNULLT);
675  auto elem_size = elem_ti.get_storage_size();
676  if (elem_size < 1) {
677  return false;
678  }
679  if (subtype == kFLOAT) {
680  return *(float*)val == NULL_ARRAY_FLOAT;
681  }
682  if (subtype == kDOUBLE) {
683  return *(double*)val == NULL_ARRAY_DOUBLE;
684  }
685  switch (elem_size) {
686  case 1:
687  return *val == NULL_ARRAY_TINYINT;
688  case 2:
689  return *(int16_t*)val == NULL_ARRAY_SMALLINT;
690  case 4:
691  return *(int32_t*)val == NULL_ARRAY_INT;
692  case 8:
693  return *(int64_t*)val == NULL_ARRAY_BIGINT;
694  default:
695  return false;
696  }
697  }
698  return false;
699  }
700  HOST DEVICE inline bool is_null_point_coord_array(const int8_t* val,
701  int array_size) const {
702  if (type == kARRAY && subtype == kTINYINT && val && array_size > 0 &&
703  array_size == size) {
704  if (array_size == 2 * sizeof(double)) {
705  return *(double*)val == NULL_ARRAY_DOUBLE;
706  }
707  if (array_size == 2 * sizeof(int32_t)) {
708  return *(uint32_t*)val == NULL_ARRAY_COMPRESSED_32;
709  }
710  }
711  return false;
712  }
713  inline SQLTypeInfo get_elem_type() const {
714  return SQLTypeInfo(
716  }
717  inline SQLTypeInfo get_array_type() const {
719  }
720 
721  inline bool is_date_in_days() const {
722  if (type == kDATE) {
723  const auto comp_type = get_compression();
724  if (comp_type == kENCODING_DATE_IN_DAYS) {
725  return true;
726  }
727  }
728  return false;
729  }
730 
731  inline bool is_date() const { return type == kDATE; }
732 
733  inline bool is_high_precision_timestamp() const {
734  if (type == kTIMESTAMP) {
735  const auto dimension = get_dimension();
736  if (dimension > 0) {
737  return true;
738  }
739  }
740  return false;
741  }
742 
743  inline bool is_timestamp() const { return type == kTIMESTAMP; }
744 
745  private:
746  SQLTypes type; // type id
747  SQLTypes subtype; // element type of arrays or columns
748  int dimension; // VARCHAR/CHAR length or NUMERIC/DECIMAL precision or COLUMN_LIST
749  // length
750  int scale; // NUMERIC/DECIMAL scale
751  bool notnull; // nullable? a hint, not used for type checking
752  EncodingType compression; // compression scheme
753  int comp_param; // compression parameter when applicable for certain schemes
754  int size; // size of the type in bytes. -1 for variable size
755 #ifndef __CUDACC__
756  static std::string type_name[kSQLTYPE_LAST];
757  static std::string comp_name[kENCODING_LAST];
758 #endif
759  HOST DEVICE inline int get_storage_size() const {
760  switch (type) {
761  case kBOOLEAN:
762  return sizeof(int8_t);
763  case kTINYINT:
764  return sizeof(int8_t);
765  case kSMALLINT:
766  switch (compression) {
767  case kENCODING_NONE:
768  return sizeof(int16_t);
769  case kENCODING_FIXED:
770  case kENCODING_SPARSE:
771  return comp_param / 8;
772  case kENCODING_RL:
773  case kENCODING_DIFF:
774  break;
775  default:
776  assert(false);
777  }
778  break;
779  case kINT:
780  switch (compression) {
781  case kENCODING_NONE:
782  return sizeof(int32_t);
783  case kENCODING_FIXED:
784  case kENCODING_SPARSE:
785  return comp_param / 8;
786  case kENCODING_RL:
787  case kENCODING_DIFF:
788  break;
789  default:
790  assert(false);
791  }
792  break;
793  case kBIGINT:
794  case kNUMERIC:
795  case kDECIMAL:
796  switch (compression) {
797  case kENCODING_NONE:
798  return sizeof(int64_t);
799  case kENCODING_FIXED:
800  case kENCODING_SPARSE:
801  return comp_param / 8;
802  case kENCODING_RL:
803  case kENCODING_DIFF:
804  break;
805  default:
806  assert(false);
807  }
808  break;
809  case kFLOAT:
810  switch (compression) {
811  case kENCODING_NONE:
812  return sizeof(float);
813  case kENCODING_FIXED:
814  case kENCODING_RL:
815  case kENCODING_DIFF:
816  case kENCODING_SPARSE:
817  assert(false);
818  break;
819  default:
820  assert(false);
821  }
822  break;
823  case kDOUBLE:
824  switch (compression) {
825  case kENCODING_NONE:
826  return sizeof(double);
827  case kENCODING_FIXED:
828  case kENCODING_RL:
829  case kENCODING_DIFF:
830  case kENCODING_SPARSE:
831  assert(false);
832  break;
833  default:
834  assert(false);
835  }
836  break;
837  case kTIMESTAMP:
838  case kTIME:
839  case kINTERVAL_DAY_TIME:
841  case kDATE:
842  switch (compression) {
843  case kENCODING_NONE:
844  return sizeof(int64_t);
845  case kENCODING_FIXED:
846  if (type == kTIMESTAMP && dimension > 0) {
847  assert(false); // disable compression for timestamp precisions
848  }
849  return comp_param / 8;
850  case kENCODING_RL:
851  case kENCODING_DIFF:
852  case kENCODING_SPARSE:
853  assert(false);
854  break;
856  switch (comp_param) {
857  case 0:
858  return 4; // Default date encoded in days is 32 bits
859  case 16:
860  case 32:
861  return comp_param / 8;
862  default:
863  assert(false);
864  break;
865  }
866  default:
867  assert(false);
868  }
869  break;
870  case kTEXT:
871  case kVARCHAR:
872  case kCHAR:
873  if (compression == kENCODING_DICT) {
874  return sizeof(int32_t); // @TODO(wei) must check DictDescriptor
875  }
876  break;
877  case kARRAY:
878  // TODO: return size for fixlen arrays?
879  break;
880  case kPOINT:
881  case kLINESTRING:
882  case kPOLYGON:
883  case kMULTIPOLYGON:
884  case kCOLUMN:
885  case kCOLUMN_LIST:
886  break;
887  default:
888  break;
889  }
890  return -1;
891  }
892 };
893 
895 
896 #ifndef __CUDACC__
897 #include <string_view>
898 
899 Datum StringToDatum(std::string_view s, SQLTypeInfo& ti);
900 std::string DatumToString(Datum d, const SQLTypeInfo& ti);
901 bool DatumEqual(const Datum, const Datum, const SQLTypeInfo& ti);
902 int64_t convert_decimal_value_to_scale(const int64_t decimal_value,
903  const SQLTypeInfo& type_info,
904  const SQLTypeInfo& new_type_info);
905 #endif
906 
907 #include "../QueryEngine/DateAdd.h"
908 #include "../QueryEngine/DateTruncate.h"
909 #include "../QueryEngine/ExtractFromTime.h"
910 
912  EncodingType encoding = type_info.get_compression();
913  if (encoding == kENCODING_DATE_IN_DAYS ||
914  (encoding == kENCODING_FIXED && type_info.get_type() != kARRAY)) {
915  encoding = kENCODING_NONE;
916  }
917  return SQLTypeInfo(type_info.get_type(),
918  type_info.get_dimension(),
919  type_info.get_scale(),
920  type_info.get_notnull(),
921  encoding,
922  type_info.get_comp_param(),
923  type_info.get_subtype());
924 }
925 
927  SQLTypeInfo nullable_type_info = type_info;
928  nullable_type_info.set_notnull(false);
929  return nullable_type_info;
930 }
931 
933  SQLTypeInfo nullable_type_info = get_logical_type_info(type_info);
934  return get_nullable_type_info(nullable_type_info);
935 }
936 
937 using StringOffsetT = int32_t;
938 using ArrayOffsetT = int32_t;
939 
940 inline int8_t* appendDatum(int8_t* buf, Datum d, const SQLTypeInfo& ti) {
941  switch (ti.get_type()) {
942  case kBOOLEAN:
943  *(bool*)buf = d.boolval;
944  return buf + sizeof(bool);
945  case kNUMERIC:
946  case kDECIMAL:
947  case kBIGINT:
948  *(int64_t*)buf = d.bigintval;
949  return buf + sizeof(int64_t);
950  case kINT:
951  *(int32_t*)buf = d.intval;
952  return buf + sizeof(int32_t);
953  case kSMALLINT:
954  *(int16_t*)buf = d.smallintval;
955  return buf + sizeof(int16_t);
956  case kTINYINT:
957  *(int8_t*)buf = d.tinyintval;
958  return buf + sizeof(int8_t);
959  case kFLOAT:
960  *(float*)buf = d.floatval;
961  return buf + sizeof(float);
962  case kDOUBLE:
963  *(double*)buf = d.doubleval;
964  return buf + sizeof(double);
965  case kTIME:
966  case kTIMESTAMP:
967  case kDATE:
968  *reinterpret_cast<int64_t*>(buf) = d.bigintval;
969  return buf + sizeof(int64_t);
970  default:
971  return nullptr;
972  }
973 }
974 
975 inline auto generate_array_type(const SQLTypes subtype) {
976  auto ti = SQLTypeInfo(kARRAY, false);
977  ti.set_subtype(subtype);
978  return ti;
979 }
980 
981 inline auto generate_column_type(const SQLTypes subtype) {
982  auto ti = SQLTypeInfo(kCOLUMN, false);
983  ti.set_subtype(subtype);
984  return ti;
985 }
986 
987 inline auto generate_column_list_type(const SQLTypes subtype) {
988  auto ti = SQLTypeInfo(kCOLUMN_LIST, false);
989  ti.set_subtype(subtype);
990  return ti;
991 }
int8_t tinyintval
Definition: sqltypes.h:206
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:315
void set_compression(EncodingType c)
Definition: sqltypes.h:414
void set_size(int s)
Definition: sqltypes.h:412
#define NULL_DOUBLE
HOST DEVICE int get_size() const
Definition: sqltypes.h:324
HOST DEVICE void operator=(const SQLTypeInfo &rhs)
Definition: sqltypes.h:563
std::string DatumToString(Datum d, const SQLTypeInfo &ti)
Definition: Datum.cpp:356
bool is_varlen_array() const
Definition: sqltypes.h:498
DEVICE VarlenDatum()
Definition: sqltypes.h:149
std::string toString(const ExtArgumentType &sig_type)
Definition: sqltypes.h:48
DEVICE constexpr bool is_cuda_compiler()
Definition: sqltypes.h:193
SQLTypes
Definition: sqltypes.h:37
std::vector< std::string > * stringsPtr
Definition: sqltypes.h:221
bool is_timestamp() const
Definition: sqltypes.h:743
std::vector< ArrayDatum > * arraysPtr
Definition: sqltypes.h:222
#define NULL_ARRAY_INT
tuple d
Definition: test_fsi.py:9
#define NULL_FLOAT
bool is_null
Definition: sqltypes.h:147
#define NULL_BIGINT
SQLTypeInfo get_nullable_logical_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:932
#define LOG(tag)
Definition: Logger.h:194
HOST DEVICE bool operator==(const SQLTypeInfo &rhs) const
Definition: sqltypes.h:543
bool boolval
Definition: sqltypes.h:205
bool is_fp() const
Definition: sqltypes.h:493
HOST DEVICE int get_scale() const
Definition: sqltypes.h:319
bool is_varlen() const
Definition: sqltypes.h:514
#define NULL_ARRAY_SMALLINT
auto generate_column_type(const SQLTypes subtype)
Definition: sqltypes.h:981
std::string get_compression_name() const
Definition: sqltypes.h:455
std::string concat(Types &&...parms)
VarlenDatum * arrayval
Definition: sqltypes.h:212
HOST DEVICE void set_subtype(SQLTypes st)
Definition: sqltypes.h:405
SQLTypeInfo(SQLTypes t, int d, int s)
Definition: sqltypes.h:284
SQLTypeInfo get_logical_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:911
Definition: sqltypes.h:64
#define NULL_ARRAY_TINYINT
HOST DEVICE bool is_null_fixlen_array(const int8_t *val, int array_size) const
Definition: sqltypes.h:667
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:314
bool is_number() const
Definition: sqltypes.h:494
int32_t intval
Definition: sqltypes.h:208
bool is_time() const
Definition: sqltypes.h:495
std::string to_string(char const *&&v)
HostArrayDatum(size_t const l, int8_t *p, bool const n, CUSTOM_DELETER custom_deleter)
Definition: sqltypes.h:183
int8_t * pointer
Definition: sqltypes.h:146
#define NULL_INT
int32_t StringOffsetT
Definition: sqltypes.h:937
bool has_render_group() const
Definition: sqltypes.h:394
#define DEVICE
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:202
#define HOST
void set_input_srid(int d)
Definition: sqltypes.h:408
float floatval
Definition: sqltypes.h:210
std::string to_string() const
Definition: sqltypes.h:457
EncodingType
Definition: sqltypes.h:227
int get_physical_cols() const
Definition: sqltypes.h:335
bool is_fixlen_array() const
Definition: sqltypes.h:499
bool is_castable(const SQLTypeInfo &new_type_info) const
Definition: sqltypes.h:574
#define IS_INTERVAL(T)
Definition: sqltypes.h:247
void set_fixed_size()
Definition: sqltypes.h:413
std::shared_ptr< int8_t > ManagedPtr
Definition: sqltypes.h:164
HOST DEVICE bool operator!=(const SQLTypeInfo &rhs) const
Definition: sqltypes.h:535
int get_logical_size() const
Definition: sqltypes.h:325
bool DatumEqual(const Datum a, const Datum b, const SQLTypeInfo &ti)
Definition: Datum.cpp:306
static std::string type_name[kSQLTYPE_LAST]
Definition: sqltypes.h:756
bool is_integer() const
Definition: sqltypes.h:491
VarlenDatum(const size_t l, int8_t *p, const bool n)
Definition: sqltypes.h:152
#define NULL_ARRAY_COMPRESSED_32
SQLTypes subtype
Definition: sqltypes.h:747
void set_scale(int s)
Definition: sqltypes.h:409
bool notnull
Definition: sqltypes.h:751
bool has_bounds() const
Definition: sqltypes.h:383
int64_t bigintval
Definition: sqltypes.h:209
HostArrayDatum(size_t const l, int8_t *p, CUSTOM_DELETER custom_deleter)
Definition: sqltypes.h:177
bool is_timeinterval() const
Definition: sqltypes.h:500
#define NULL_ARRAY_FLOAT
ManagedPtr data_ptr
Definition: sqltypes.h:186
auto generate_column_list_type(const SQLTypes subtype)
Definition: sqltypes.h:987
HostArrayDatum()=default
int16_t smallintval
Definition: sqltypes.h:207
bool is_dict_encoded_type() const
Definition: sqltypes.h:530
SQLTypeInfo(SQLTypes t, int d, int s, bool n)
Definition: sqltypes.h:275
Datum StringToDatum(std::string_view s, SQLTypeInfo &ti)
Definition: Datum.cpp:239
std::string toString() const
Definition: sqltypes.h:456
bool is_boolean() const
Definition: sqltypes.h:496
HostArrayDatum(size_t const l, int8_t *p, bool const n)
Definition: sqltypes.h:171
bool g_enable_smem_group_by true
void operator()(int8_t *p)
Definition: sqltypes.h:160
SQLTypeInfo(SQLTypes t, int d, int s, bool n, EncodingType c, int p, SQLTypes st)
Definition: sqltypes.h:266
SQLTypeInfo(SQLTypes t)
Definition: sqltypes.h:294
#define NULL_BOOLEAN
std::string get_buffer_name() const
Definition: sqltypes.h:478
SQLTypeInfo(SQLTypes t, bool n, EncodingType c)
Definition: sqltypes.h:295
SQLTypeInfo get_array_type() const
Definition: sqltypes.h:717
EncodingType compression
Definition: sqltypes.h:752
int get_precision() const
Definition: sqltypes.h:317
std::string * stringval
Definition: sqltypes.h:214
void set_output_srid(int s)
Definition: sqltypes.h:410
bool is_buffer() const
Definition: sqltypes.h:507
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:419
auto generate_array_type(const SQLTypes subtype)
Definition: sqltypes.h:975
bool is_column() const
Definition: sqltypes.h:502
DEVICE DeviceArrayDatum()
Definition: sqltypes.h:190
HOST DEVICE bool is_null(const Datum &d) const
Definition: sqltypes.h:605
void set_comp_param(int p)
Definition: sqltypes.h:415
HOST DEVICE int get_storage_size() const
Definition: sqltypes.h:759
#define CHECK_LT(x, y)
Definition: Logger.h:213
Definition: sqltypes.h:51
Definition: sqltypes.h:52
static std::string comp_name[kENCODING_LAST]
Definition: sqltypes.h:757
int8_t * appendDatum(int8_t *buf, Datum d, const SQLTypeInfo &ti)
Definition: sqltypes.h:940
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:322
bool is_date_in_days() const
Definition: sqltypes.h:721
int get_array_context_logical_size() const
Definition: sqltypes.h:552
int64_t convert_decimal_value_to_scale(const int64_t decimal_value, const SQLTypeInfo &type_info, const SQLTypeInfo &new_type_info)
Definition: Datum.cpp:437
int32_t ArrayOffsetT
Definition: sqltypes.h:938
void set_dimension(int d)
Definition: sqltypes.h:406
HOST DEVICE int get_dimension() const
Definition: sqltypes.h:316
#define IS_INTEGER(T)
Definition: sqltypes.h:239
std::string get_type_name() const
Definition: sqltypes.h:417
Definition: sqltypes.h:40
#define IS_STRING(T)
Definition: sqltypes.h:244
HOST DEVICE int get_comp_param() const
Definition: sqltypes.h:323
HOST DEVICE int get_input_srid() const
Definition: sqltypes.h:318
#define NULL_TINYINT
#define NULL_ARRAY_DOUBLE
virtual DEVICE ~VarlenDatum()
Definition: sqltypes.h:150
bool is_bytes() const
Definition: sqltypes.h:504
bool is_column_list() const
Definition: sqltypes.h:503
bool g_enable_watchdog false
Definition: Execute.cpp:76
void set_notnull(bool n)
Definition: sqltypes.h:411
bool is_geometry() const
Definition: sqltypes.h:501
char * t
bool is_high_precision_timestamp() const
Definition: sqltypes.h:733
SQLTypes type
Definition: sqltypes.h:746
#define NULL_SMALLINT
HostArrayDatum(size_t const l, ManagedPtr p, bool const n)
Definition: sqltypes.h:168
#define NULL_ARRAY_BIGINT
bool is_dict_encoded_string() const
Definition: sqltypes.h:526
Definition: sqltypes.h:44
bool is_varlen_indeed() const
Definition: sqltypes.h:520
bool is_string() const
Definition: sqltypes.h:489
bool transforms() const
Definition: sqltypes.h:510
SQLTypeInfo(SQLTypes t, bool n)
Definition: sqltypes.h:285
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:321
int8_t * numbersPtr
Definition: sqltypes.h:220
bool is_string_array() const
Definition: sqltypes.h:490
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:713
bool is_decimal() const
Definition: sqltypes.h:492
int get_physical_coord_cols() const
Definition: sqltypes.h:350
#define IS_NUMBER(T)
Definition: sqltypes.h:241
void operator()(int8_t *)
Definition: sqltypes.h:157
#define IS_GEO(T)
Definition: sqltypes.h:245
#define TRANSIENT_DICT(ID)
Definition: sqltypes.h:254
int comp_param
Definition: sqltypes.h:753
bool is_date() const
Definition: sqltypes.h:731
bool is_array() const
Definition: sqltypes.h:497
void set_precision(int d)
Definition: sqltypes.h:407
SQLTypeInfo get_nullable_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:926
int dimension
Definition: sqltypes.h:748
HOST DEVICE bool is_null_point_coord_array(const int8_t *val, int array_size) const
Definition: sqltypes.h:700
double doubleval
Definition: sqltypes.h:211
HOST DEVICE int get_output_srid() const
Definition: sqltypes.h:320
constexpr auto is_datetime(SQLTypes type)
Definition: sqltypes.h:257
HOST DEVICE bool is_null(const int8_t *val) const
Definition: sqltypes.h:642
size_t length
Definition: sqltypes.h:145
HOST DEVICE void set_type(SQLTypes t)
Definition: sqltypes.h:404