OmniSciDB  bf83d84833
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
sqltypes.h
Go to the documentation of this file.
1 /*
2  * Copyright 2020 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
23 #pragma once
24 
25 #include "../Logger/Logger.h"
26 #include "StringTransform.h"
27 #include "funcannotations.h"
28 
29 #include <cassert>
30 #include <ctime>
31 #include <memory>
32 #include <string>
33 #include <type_traits>
34 #include <vector>
35 
36 // must not change because these values persist in catalogs.
37 enum SQLTypes {
38  kNULLT = 0, // type for null values
39  kBOOLEAN = 1,
40  kCHAR = 2,
41  kVARCHAR = 3,
42  kNUMERIC = 4,
43  kDECIMAL = 5,
44  kINT = 6,
45  kSMALLINT = 7,
46  kFLOAT = 8,
47  kDOUBLE = 9,
48  kTIME = 10,
49  kTIMESTAMP = 11,
50  kBIGINT = 12,
51  kTEXT = 13,
52  kDATE = 14,
53  kARRAY = 15,
56  kPOINT = 18,
58  kPOLYGON = 20,
60  kTINYINT = 22,
61  kGEOMETRY = 23,
62  kGEOGRAPHY = 24,
63  kEVAL_CONTEXT_TYPE = 25, // Placeholder Type for ANY
64  kVOID = 26,
65  kCURSOR = 27,
66  kCOLUMN = 28,
68 };
69 
70 #ifndef __CUDACC__
71 
72 inline std::string toString(const SQLTypes& type) {
73  switch (type) {
74  case kNULLT:
75  return "NULL";
76  case kBOOLEAN:
77  return "BOOL";
78  case kCHAR:
79  return "CHAR";
80  case kVARCHAR:
81  return "VARCHAR";
82  case kNUMERIC:
83  return "NUMERIC";
84  case kDECIMAL:
85  return "DECIMAL";
86  case kINT:
87  return "INT";
88  case kSMALLINT:
89  return "SMALLINT";
90  case kFLOAT:
91  return "FLOAT";
92  case kDOUBLE:
93  return "DOUBLE";
94  case kTIME:
95  return "TIME";
96  case kTIMESTAMP:
97  return "TIMESTAMP";
98  case kBIGINT:
99  return "BIGINT";
100  case kTEXT:
101  return "TEXT";
102  case kDATE:
103  return "DATE";
104  case kARRAY:
105  return "ARRAY";
106  case kINTERVAL_DAY_TIME:
107  return "DAY TIME INTERVAL";
109  return "YEAR MONTH INTERVAL";
110  case kPOINT:
111  return "POINT";
112  case kLINESTRING:
113  return "LINESTRING";
114  case kPOLYGON:
115  return "POLYGON";
116  case kMULTIPOLYGON:
117  return "MULTIPOLYGON";
118  case kTINYINT:
119  return "TINYINT";
120  case kGEOMETRY:
121  return "GEOMETRY";
122  case kGEOGRAPHY:
123  return "GEOGRAPHY";
124  case kEVAL_CONTEXT_TYPE:
125  return "UNEVALUATED ANY";
126  case kVOID:
127  return "VOID";
128  case kCURSOR:
129  return "CURSOR";
130  case kCOLUMN:
131  return "COLUMN";
132  case kSQLTYPE_LAST:
133  break;
134  }
135  LOG(FATAL) << "Invalid SQL type: " << type;
136  return "";
137 }
138 
139 #endif
140 
141 struct VarlenDatum {
142  size_t length;
143  int8_t* pointer;
144  bool is_null;
145 
146  DEVICE VarlenDatum() : length(0), pointer(nullptr), is_null(true) {}
147  DEVICE virtual ~VarlenDatum() {}
148 
149  VarlenDatum(const size_t l, int8_t* p, const bool n)
150  : length(l), pointer(p), is_null(n) {}
151 };
152 
154  void operator()(int8_t*) {}
155 };
156 struct FreeDeleter {
157  void operator()(int8_t* p) { free(p); }
158 };
159 
160 struct HostArrayDatum : public VarlenDatum {
161  using ManagedPtr = std::shared_ptr<int8_t>;
162 
163  HostArrayDatum() = default;
164 
165  HostArrayDatum(size_t const l, ManagedPtr p, bool const n)
166  : VarlenDatum(l, p.get(), n), data_ptr(p) {}
167 
168  HostArrayDatum(size_t const l, int8_t* p, bool const n)
169  : VarlenDatum(l, p, n), data_ptr(p, FreeDeleter()){};
170 
171  template <typename CUSTOM_DELETER,
172  typename = std::enable_if_t<
173  std::is_void<std::result_of_t<CUSTOM_DELETER(int8_t*)> >::value> >
174  HostArrayDatum(size_t const l, int8_t* p, CUSTOM_DELETER custom_deleter)
175  : VarlenDatum(l, p, 0 == l), data_ptr(p, custom_deleter) {}
176 
177  template <typename CUSTOM_DELETER,
178  typename = std::enable_if_t<
179  std::is_void<std::result_of_t<CUSTOM_DELETER(int8_t*)> >::value> >
180  HostArrayDatum(size_t const l, int8_t* p, bool const n, CUSTOM_DELETER custom_deleter)
181  : VarlenDatum(l, p, n), data_ptr(p, custom_deleter) {}
182 
184 };
185 
186 struct DeviceArrayDatum : public VarlenDatum {
188 };
189 
190 inline DEVICE constexpr bool is_cuda_compiler() {
191 #ifdef __CUDACC__
192  return true;
193 #else
194  return false;
195 #endif
196 }
197 
198 using ArrayDatum =
199  std::conditional_t<is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum>;
200 
201 union Datum {
202  bool boolval;
203  int8_t tinyintval;
204  int16_t smallintval;
205  int32_t intval;
206  int64_t bigintval;
207  float floatval;
208  double doubleval;
210 #ifndef __CUDACC__
211  std::string* stringval; // string value
212 #endif
213 };
214 
215 #ifndef __CUDACC__
217  int8_t* numbersPtr;
218  std::vector<std::string>* stringsPtr;
219  std::vector<ArrayDatum>* arraysPtr;
220 };
221 #endif
222 
223 // must not change because these values persist in catalogs.
225  kENCODING_NONE = 0, // no encoding
226  kENCODING_FIXED = 1, // Fixed-bit encoding
227  kENCODING_RL = 2, // Run Length encoding
228  kENCODING_DIFF = 3, // Differential encoding
229  kENCODING_DICT = 4, // Dictionary encoding
230  kENCODING_SPARSE = 5, // Null encoding for sparse columns
231  kENCODING_GEOINT = 6, // Encoding coordinates as intergers
232  kENCODING_DATE_IN_DAYS = 7, // Date encoding in days
234 };
235 
236 #define IS_INTEGER(T) \
237  (((T) == kINT) || ((T) == kSMALLINT) || ((T) == kBIGINT) || ((T) == kTINYINT))
238 #define IS_NUMBER(T) \
239  (((T) == kINT) || ((T) == kSMALLINT) || ((T) == kDOUBLE) || ((T) == kFLOAT) || \
240  ((T) == kBIGINT) || ((T) == kNUMERIC) || ((T) == kDECIMAL) || ((T) == kTINYINT))
241 #define IS_STRING(T) (((T) == kTEXT) || ((T) == kVARCHAR) || ((T) == kCHAR))
242 #define IS_GEO(T) \
243  (((T) == kPOINT) || ((T) == kLINESTRING) || ((T) == kPOLYGON) || ((T) == kMULTIPOLYGON))
244 #define IS_INTERVAL(T) ((T) == kINTERVAL_DAY_TIME || (T) == kINTERVAL_YEAR_MONTH)
245 #define IS_DECIMAL(T) ((T) == kNUMERIC || (T) == kDECIMAL)
246 #define IS_GEO_POLY(T) (((T) == kPOLYGON) || ((T) == kMULTIPOLYGON))
247 
248 #include "InlineNullValues.h"
249 
250 #define TRANSIENT_DICT_ID 0
251 #define TRANSIENT_DICT(ID) (-(ID))
252 #define REGULAR_DICT(TRANSIENTID) (-(TRANSIENTID))
253 
254 constexpr auto is_datetime(SQLTypes type) {
255  return type == kTIME || type == kTIMESTAMP || type == kDATE;
256 }
257 
258 // @type SQLTypeInfo
259 // @brief a structure to capture all type information including
260 // length, precision, scale, etc.
261 class SQLTypeInfo {
262  public:
263  SQLTypeInfo(SQLTypes t, int d, int s, bool n, EncodingType c, int p, SQLTypes st)
264  : type(t)
265  , subtype(st)
266  , dimension(d)
267  , scale(s)
268  , notnull(n)
269  , compression(c)
270  , comp_param(p)
271  , size(get_storage_size()) {}
272  SQLTypeInfo(SQLTypes t, int d, int s, bool n)
273  : type(t)
274  , subtype(kNULLT)
275  , dimension(d)
276  , scale(s)
277  , notnull(n)
279  , comp_param(0)
280  , size(get_storage_size()) {}
281  SQLTypeInfo(SQLTypes t, int d, int s) : SQLTypeInfo(t, d, s, false) {}
283  : type(t)
284  , subtype(kNULLT)
285  , dimension(0)
286  , scale(0)
287  , notnull(n)
289  , comp_param(0)
290  , size(get_storage_size()) {}
293  : type(t)
294  , subtype(kNULLT)
295  , dimension(0)
296  , scale(0)
297  , notnull(n)
298  , compression(c)
299  , comp_param(0)
300  , size(get_storage_size()) {}
302  : type(kNULLT)
303  , subtype(kNULLT)
304  , dimension(0)
305  , scale(0)
306  , notnull(false)
308  , comp_param(0)
309  , size(0) {}
310 
311  HOST DEVICE inline SQLTypes get_type() const { return type; }
312  HOST DEVICE inline SQLTypes get_subtype() const { return subtype; }
313  HOST DEVICE inline int get_dimension() const { return dimension; }
314  inline int get_precision() const { return dimension; }
315  HOST DEVICE inline int get_input_srid() const { return dimension; }
316  HOST DEVICE inline int get_scale() const { return scale; }
317  HOST DEVICE inline int get_output_srid() const { return scale; }
318  HOST DEVICE inline bool get_notnull() const { return notnull; }
320  HOST DEVICE inline int get_comp_param() const { return comp_param; }
321  HOST DEVICE inline int get_size() const { return size; }
322  inline int get_logical_size() const {
325  return ti.get_size();
326  }
327  if (compression == kENCODING_DICT) {
328  return 4;
329  }
330  return get_size();
331  }
332  inline int get_physical_cols() const {
333  switch (type) {
334  case kPOINT:
335  return 1; // coords
336  case kLINESTRING:
337  return 2; // coords, bounds
338  case kPOLYGON:
339  return 4; // coords, ring_sizes, bounds, render_group
340  case kMULTIPOLYGON:
341  return 5; // coords, ring_sizes, poly_rings, bounds, render_group
342  default:
343  break;
344  }
345  return 0;
346  }
347  inline int get_physical_coord_cols() const {
348  // @TODO dmitri/simon rename this function?
349  // It needs to return the number of extra columns
350  // which need to go through the executor, as opposed
351  // to those which are only needed by CPU for poly
352  // cache building or what-not. For now, we just omit
353  // the Render Group column. If we add Bounding Box
354  // or something this may require rethinking. Perhaps
355  // these two functions need to return an array of
356  // offsets rather than just a number to loop over,
357  // so that executor and non-executor columns can
358  // be mixed.
359  // NOTE(adb): In binding to extension functions, we need to know some pretty specific
360  // type info about each of the physical coords cols for each geo type. I added checks
361  // there to ensure the physical coords col for the geo type match what we expect. If
362  // these values are ever changed, corresponding values in
363  // ExtensionFunctionsBinding.cpp::compute_narrowing_conv_scores and
364  // ExtensionFunctionsBinding.cpp::compute_widening_conv_scores will also need to be
365  // changed.
366  switch (type) {
367  case kPOINT:
368  return 1;
369  case kLINESTRING:
370  return 1; // omit bounds
371  case kPOLYGON:
372  return 2; // omit bounds, render group
373  case kMULTIPOLYGON:
374  return 3; // omit bounds, render group
375  default:
376  break;
377  }
378  return 0;
379  }
380  inline bool has_bounds() const {
381  switch (type) {
382  case kLINESTRING:
383  case kPOLYGON:
384  case kMULTIPOLYGON:
385  return true;
386  default:
387  break;
388  }
389  return false;
390  }
391  inline bool has_render_group() const {
392  switch (type) {
393  case kPOLYGON:
394  case kMULTIPOLYGON:
395  return true;
396  default:
397  break;
398  }
399  return false;
400  }
401  HOST DEVICE inline void set_type(SQLTypes t) { type = t; }
402  HOST DEVICE inline void set_subtype(SQLTypes st) { subtype = st; }
403  inline void set_dimension(int d) { dimension = d; }
404  inline void set_precision(int d) { dimension = d; }
405  inline void set_input_srid(int d) { dimension = d; }
406  inline void set_scale(int s) { scale = s; }
407  inline void set_output_srid(int s) { scale = s; }
408  inline void set_notnull(bool n) { notnull = n; }
409  inline void set_size(int s) { size = s; }
410  inline void set_fixed_size() { size = get_storage_size(); }
411  inline void set_compression(EncodingType c) { compression = c; }
412  inline void set_comp_param(int p) { comp_param = p; }
413 #ifndef __CUDACC__
414  inline std::string get_type_name() const {
415  if (IS_GEO(type)) {
416  std::string srid_string = "";
417  if (get_output_srid() > 0) {
418  srid_string = ", " + std::to_string(get_output_srid());
419  }
420  CHECK_LT(static_cast<int>(subtype), kSQLTYPE_LAST);
421  return type_name[static_cast<int>(subtype)] + "(" +
422  type_name[static_cast<int>(type)] + srid_string + ")";
423  }
424  std::string ps = "";
425  if (type == kDECIMAL || type == kNUMERIC) {
426  ps = "(" + std::to_string(dimension) + "," + std::to_string(scale) + ")";
427  } else if (type == kTIMESTAMP) {
428  ps = "(" + std::to_string(dimension) + ")";
429  }
430  if (type == kARRAY) {
431  auto elem_ti = get_elem_type();
432  auto num_elems = (size > 0) ? std::to_string(size / elem_ti.get_size()) : "";
433  CHECK_LT(static_cast<int>(subtype), kSQLTYPE_LAST);
434  return elem_ti.get_type_name() + ps + "[" + num_elems + "]";
435  }
436  if (type == kCOLUMN) {
437  auto elem_ti = get_elem_type();
438  auto num_elems =
439  (size > 0) ? "[" + std::to_string(size / elem_ti.get_size()) + "]" : "";
440  CHECK_LT(static_cast<int>(subtype), kSQLTYPE_LAST);
441  return "COLUMN<" + type_name[static_cast<int>(subtype)] + ps + ">" + num_elems;
442  }
443  return type_name[static_cast<int>(type)] + ps;
444  }
445  inline std::string get_compression_name() const { return comp_name[(int)compression]; }
446  inline std::string to_string() const {
447  return concat("(",
448  type_name[static_cast<int>(type)],
449  ", ",
450  get_dimension(),
451  ", ",
452  get_scale(),
453  ", ",
454  get_notnull() ? "not nullable" : "nullable",
455  ", ",
457  ", ",
458  get_comp_param(),
459  ", ",
460  type_name[static_cast<int>(subtype)],
461  ": ",
462  get_size(),
463  ": ",
465  ")");
466  }
467  inline std::string get_buffer_name() const {
468  if (is_array())
469  return "Array";
470  if (is_bytes())
471  return "Bytes";
472  if (is_column())
473  return "Column";
474  assert(false);
475  return "";
476  }
477 #endif
478  inline bool is_string() const { return IS_STRING(type); }
479  inline bool is_string_array() const { return (type == kARRAY) && IS_STRING(subtype); }
480  inline bool is_integer() const { return IS_INTEGER(type); }
481  inline bool is_decimal() const { return type == kDECIMAL || type == kNUMERIC; }
482  inline bool is_fp() const { return type == kFLOAT || type == kDOUBLE; }
483  inline bool is_number() const { return IS_NUMBER(type); }
484  inline bool is_time() const { return is_datetime(type); }
485  inline bool is_boolean() const { return type == kBOOLEAN; }
486  inline bool is_array() const { return type == kARRAY; } // rbc Array
487  inline bool is_varlen_array() const { return type == kARRAY && size <= 0; }
488  inline bool is_fixlen_array() const { return type == kARRAY && size > 0; }
489  inline bool is_timeinterval() const { return IS_INTERVAL(type); }
490  inline bool is_geometry() const { return IS_GEO(type); }
491  inline bool is_column() const { return type == kCOLUMN; } // rbc Column
492  inline bool is_bytes() const {
493  return type == kTEXT && get_compression() == kENCODING_NONE;
494  } // rbc Bytes
495  inline bool is_buffer() const { return is_array() || is_column() || is_bytes(); }
496  inline bool transforms() const {
497  return IS_GEO(type) && get_output_srid() != get_input_srid();
498  }
499 
500  inline bool is_varlen() const { // TODO: logically this should ignore fixlen arrays
501  return (IS_STRING(type) && compression != kENCODING_DICT) || type == kARRAY ||
502  IS_GEO(type);
503  }
504 
505  // need this here till is_varlen can be fixed w/o negative impact to existing code
506  inline bool is_varlen_indeed() const {
507  // SQLTypeInfo.is_varlen() is broken with fixedlen array now
508  // and seems left broken for some concern, so fix it locally
509  return is_varlen() && !is_fixlen_array();
510  }
511 
512  inline bool is_dict_encoded_string() const {
513  return is_string() && compression == kENCODING_DICT;
514  }
515 
516  inline bool is_dict_encoded_type() const {
517  return is_dict_encoded_string() ||
519  }
520 
521  HOST DEVICE inline bool operator!=(const SQLTypeInfo& rhs) const {
522  return type != rhs.get_type() || subtype != rhs.get_subtype() ||
523  dimension != rhs.get_dimension() || scale != rhs.get_scale() ||
524  compression != rhs.get_compression() ||
527  notnull != rhs.get_notnull();
528  }
529  HOST DEVICE inline bool operator==(const SQLTypeInfo& rhs) const {
530  return type == rhs.get_type() && subtype == rhs.get_subtype() &&
531  dimension == rhs.get_dimension() && scale == rhs.get_scale() &&
532  compression == rhs.get_compression() &&
535  notnull == rhs.get_notnull();
536  }
537 
538  inline int get_array_context_logical_size() const {
539  if (is_string()) {
540  auto comp_type(get_compression());
541  if (comp_type == kENCODING_DICT || comp_type == kENCODING_FIXED ||
542  comp_type == kENCODING_NONE) {
543  return sizeof(int32_t);
544  }
545  }
546  return get_logical_size();
547  }
548 
549  HOST DEVICE inline void operator=(const SQLTypeInfo& rhs) {
550  type = rhs.get_type();
551  subtype = rhs.get_subtype();
552  dimension = rhs.get_dimension();
553  scale = rhs.get_scale();
554  notnull = rhs.get_notnull();
556  comp_param = rhs.get_comp_param();
557  size = rhs.get_size();
558  }
559 
560  inline bool is_castable(const SQLTypeInfo& new_type_info) const {
561  // can always cast between the same type but different precision/scale/encodings
562  if (type == new_type_info.get_type()) {
563  return true;
564  // can always cast from or to string
565  } else if (is_string() || new_type_info.is_string()) {
566  return true;
567  // can cast between numbers
568  } else if (is_number() && new_type_info.is_number()) {
569  return true;
570  // can cast from timestamp or date to number (epoch)
571  } else if ((type == kTIMESTAMP || type == kDATE) && new_type_info.is_number()) {
572  return true;
573  // can cast from date to timestamp
574  } else if (type == kDATE && new_type_info.get_type() == kTIMESTAMP) {
575  return true;
576  } else if (type == kTIMESTAMP && new_type_info.get_type() == kDATE) {
577  return true;
578  } else if (type == kBOOLEAN && new_type_info.is_number()) {
579  return true;
580  } else if (type == kARRAY && new_type_info.get_type() == kARRAY) {
581  return get_elem_type().is_castable(new_type_info.get_elem_type());
582  } else if (type == kCOLUMN && new_type_info.get_type() == kCOLUMN) {
583  return get_elem_type().is_castable(new_type_info.get_elem_type());
584  } else {
585  return false;
586  }
587  }
588 
589  HOST DEVICE inline bool is_null(const Datum& d) const {
590  // assuming Datum is always uncompressed
591  switch (type) {
592  case kBOOLEAN:
593  return (int8_t)d.boolval == NULL_BOOLEAN;
594  case kTINYINT:
595  return d.tinyintval == NULL_TINYINT;
596  case kSMALLINT:
597  return d.smallintval == NULL_SMALLINT;
598  case kINT:
599  return d.intval == NULL_INT;
600  case kBIGINT:
601  case kNUMERIC:
602  case kDECIMAL:
603  return d.bigintval == NULL_BIGINT;
604  case kFLOAT:
605  return d.floatval == NULL_FLOAT;
606  case kDOUBLE:
607  return d.doubleval == NULL_DOUBLE;
608  case kTIME:
609  case kTIMESTAMP:
610  case kDATE:
611  return d.bigintval == NULL_BIGINT;
612  case kTEXT:
613  case kVARCHAR:
614  case kCHAR:
615  // @TODO handle null strings
616  break;
617  case kNULLT:
618  return true;
619  case kARRAY:
620  return d.arrayval == NULL || d.arrayval->is_null;
621  default:
622  break;
623  }
624  return false;
625  }
626  HOST DEVICE inline bool is_null(const int8_t* val) const {
627  if (type == kFLOAT) {
628  return *(float*)val == NULL_FLOAT;
629  }
630  if (type == kDOUBLE) {
631  return *(double*)val == NULL_DOUBLE;
632  }
633  // val can be either compressed or uncompressed
634  switch (size) {
635  case 1:
636  return *val == NULL_TINYINT;
637  case 2:
638  return *(int16_t*)val == NULL_SMALLINT;
639  case 4:
640  return *(int32_t*)val == NULL_INT;
641  case 8:
642  return *(int64_t*)val == NULL_BIGINT;
643  case kNULLT:
644  return true;
645  default:
646  // @TODO(wei) handle null strings
647  break;
648  }
649  return false;
650  }
651  HOST DEVICE inline bool is_null_fixlen_array(const int8_t* val, int array_size) const {
652  // Check if fixed length array has a NULL_ARRAY sentinel as the first element
653  if (type == kARRAY && val && array_size > 0 && array_size == size) {
654  // Need to create element type to get the size, but can't call get_elem_type()
655  // since this is a HOST DEVICE function. Going through copy constructor instead.
656  auto elem_ti{*this};
657  elem_ti.set_type(subtype);
658  elem_ti.set_subtype(kNULLT);
659  auto elem_size = elem_ti.get_storage_size();
660  if (elem_size < 1) {
661  return false;
662  }
663  if (subtype == kFLOAT) {
664  return *(float*)val == NULL_ARRAY_FLOAT;
665  }
666  if (subtype == kDOUBLE) {
667  return *(double*)val == NULL_ARRAY_DOUBLE;
668  }
669  switch (elem_size) {
670  case 1:
671  return *val == NULL_ARRAY_TINYINT;
672  case 2:
673  return *(int16_t*)val == NULL_ARRAY_SMALLINT;
674  case 4:
675  return *(int32_t*)val == NULL_ARRAY_INT;
676  case 8:
677  return *(int64_t*)val == NULL_ARRAY_BIGINT;
678  default:
679  return false;
680  }
681  }
682  return false;
683  }
684  HOST DEVICE inline bool is_null_point_coord_array(const int8_t* val,
685  int array_size) const {
686  if (type == kARRAY && subtype == kTINYINT && val && array_size > 0 &&
687  array_size == size) {
688  if (array_size == 2 * sizeof(double)) {
689  return *(double*)val == NULL_ARRAY_DOUBLE;
690  }
691  if (array_size == 2 * sizeof(int32_t)) {
692  return *(uint32_t*)val == NULL_ARRAY_COMPRESSED_32;
693  }
694  }
695  return false;
696  }
697  inline SQLTypeInfo get_elem_type() const {
698  return SQLTypeInfo(
700  }
701  inline SQLTypeInfo get_array_type() const {
703  }
704 
705  inline bool is_date_in_days() const {
706  if (type == kDATE) {
707  const auto comp_type = get_compression();
708  if (comp_type == kENCODING_DATE_IN_DAYS) {
709  return true;
710  }
711  }
712  return false;
713  }
714 
715  inline bool is_date() const { return type == kDATE; }
716 
717  inline bool is_high_precision_timestamp() const {
718  if (type == kTIMESTAMP) {
719  const auto dimension = get_dimension();
720  if (dimension > 0) {
721  return true;
722  }
723  }
724  return false;
725  }
726 
727  inline bool is_timestamp() const { return type == kTIMESTAMP; }
728 
729  private:
730  SQLTypes type; // type id
731  SQLTypes subtype; // element type of arrays
732  int dimension; // VARCHAR/CHAR length or NUMERIC/DECIMAL precision
733  int scale; // NUMERIC/DECIMAL scale
734  bool notnull; // nullable? a hint, not used for type checking
735  EncodingType compression; // compression scheme
736  int comp_param; // compression parameter when applicable for certain schemes
737  int size; // size of the type in bytes. -1 for variable size
738 #ifndef __CUDACC__
739  static std::string type_name[kSQLTYPE_LAST];
740  static std::string comp_name[kENCODING_LAST];
741 #endif
742  HOST DEVICE inline int get_storage_size() const {
743  switch (type) {
744  case kBOOLEAN:
745  return sizeof(int8_t);
746  case kTINYINT:
747  return sizeof(int8_t);
748  case kSMALLINT:
749  switch (compression) {
750  case kENCODING_NONE:
751  return sizeof(int16_t);
752  case kENCODING_FIXED:
753  case kENCODING_SPARSE:
754  return comp_param / 8;
755  case kENCODING_RL:
756  case kENCODING_DIFF:
757  break;
758  default:
759  assert(false);
760  }
761  break;
762  case kINT:
763  switch (compression) {
764  case kENCODING_NONE:
765  return sizeof(int32_t);
766  case kENCODING_FIXED:
767  case kENCODING_SPARSE:
768  return comp_param / 8;
769  case kENCODING_RL:
770  case kENCODING_DIFF:
771  break;
772  default:
773  assert(false);
774  }
775  break;
776  case kBIGINT:
777  case kNUMERIC:
778  case kDECIMAL:
779  switch (compression) {
780  case kENCODING_NONE:
781  return sizeof(int64_t);
782  case kENCODING_FIXED:
783  case kENCODING_SPARSE:
784  return comp_param / 8;
785  case kENCODING_RL:
786  case kENCODING_DIFF:
787  break;
788  default:
789  assert(false);
790  }
791  break;
792  case kFLOAT:
793  switch (compression) {
794  case kENCODING_NONE:
795  return sizeof(float);
796  case kENCODING_FIXED:
797  case kENCODING_RL:
798  case kENCODING_DIFF:
799  case kENCODING_SPARSE:
800  assert(false);
801  break;
802  default:
803  assert(false);
804  }
805  break;
806  case kDOUBLE:
807  switch (compression) {
808  case kENCODING_NONE:
809  return sizeof(double);
810  case kENCODING_FIXED:
811  case kENCODING_RL:
812  case kENCODING_DIFF:
813  case kENCODING_SPARSE:
814  assert(false);
815  break;
816  default:
817  assert(false);
818  }
819  break;
820  case kTIMESTAMP:
821  case kTIME:
822  case kINTERVAL_DAY_TIME:
824  case kDATE:
825  switch (compression) {
826  case kENCODING_NONE:
827  return sizeof(int64_t);
828  case kENCODING_FIXED:
829  if (type == kTIMESTAMP && dimension > 0) {
830  assert(false); // disable compression for timestamp precisions
831  }
832  return comp_param / 8;
833  case kENCODING_RL:
834  case kENCODING_DIFF:
835  case kENCODING_SPARSE:
836  assert(false);
837  break;
839  switch (comp_param) {
840  case 0:
841  return 4; // Default date encoded in days is 32 bits
842  case 16:
843  case 32:
844  return comp_param / 8;
845  default:
846  assert(false);
847  break;
848  }
849  default:
850  assert(false);
851  }
852  break;
853  case kTEXT:
854  case kVARCHAR:
855  case kCHAR:
856  if (compression == kENCODING_DICT) {
857  return sizeof(int32_t); // @TODO(wei) must check DictDescriptor
858  }
859  break;
860  case kARRAY:
861  // TODO: return size for fixlen arrays?
862  break;
863  case kPOINT:
864  case kLINESTRING:
865  case kPOLYGON:
866  case kMULTIPOLYGON:
867  case kCOLUMN:
868  break;
869  default:
870  break;
871  }
872  return -1;
873  }
874 };
875 
877 
878 #ifndef __CUDACC__
879 #include <string_view>
880 
881 Datum StringToDatum(std::string_view s, SQLTypeInfo& ti);
882 std::string DatumToString(Datum d, const SQLTypeInfo& ti);
883 bool DatumEqual(const Datum, const Datum, const SQLTypeInfo& ti);
884 int64_t convert_decimal_value_to_scale(const int64_t decimal_value,
885  const SQLTypeInfo& type_info,
886  const SQLTypeInfo& new_type_info);
887 #endif
888 
889 #include "../QueryEngine/DateAdd.h"
890 #include "../QueryEngine/DateTruncate.h"
891 #include "../QueryEngine/ExtractFromTime.h"
892 
894  EncodingType encoding = type_info.get_compression();
895  if (encoding == kENCODING_DATE_IN_DAYS ||
896  (encoding == kENCODING_FIXED && type_info.get_type() != kARRAY)) {
897  encoding = kENCODING_NONE;
898  }
899  return SQLTypeInfo(type_info.get_type(),
900  type_info.get_dimension(),
901  type_info.get_scale(),
902  type_info.get_notnull(),
903  encoding,
904  type_info.get_comp_param(),
905  type_info.get_subtype());
906 }
907 
909  SQLTypeInfo nullable_type_info = type_info;
910  nullable_type_info.set_notnull(false);
911  return nullable_type_info;
912 }
913 
915  SQLTypeInfo nullable_type_info = get_logical_type_info(type_info);
916  return get_nullable_type_info(nullable_type_info);
917 }
918 
919 using StringOffsetT = int32_t;
920 using ArrayOffsetT = int32_t;
921 
922 inline int8_t* appendDatum(int8_t* buf, Datum d, const SQLTypeInfo& ti) {
923  switch (ti.get_type()) {
924  case kBOOLEAN:
925  *(bool*)buf = d.boolval;
926  return buf + sizeof(bool);
927  case kNUMERIC:
928  case kDECIMAL:
929  case kBIGINT:
930  *(int64_t*)buf = d.bigintval;
931  return buf + sizeof(int64_t);
932  case kINT:
933  *(int32_t*)buf = d.intval;
934  return buf + sizeof(int32_t);
935  case kSMALLINT:
936  *(int16_t*)buf = d.smallintval;
937  return buf + sizeof(int16_t);
938  case kTINYINT:
939  *(int8_t*)buf = d.tinyintval;
940  return buf + sizeof(int8_t);
941  case kFLOAT:
942  *(float*)buf = d.floatval;
943  return buf + sizeof(float);
944  case kDOUBLE:
945  *(double*)buf = d.doubleval;
946  return buf + sizeof(double);
947  case kTIME:
948  case kTIMESTAMP:
949  case kDATE:
950  *reinterpret_cast<int64_t*>(buf) = d.bigintval;
951  return buf + sizeof(int64_t);
952  default:
953  return nullptr;
954  }
955 }
int8_t tinyintval
Definition: sqltypes.h:203
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:312
void set_compression(EncodingType c)
Definition: sqltypes.h:411
void set_size(int s)
Definition: sqltypes.h:409
#define NULL_DOUBLE
HOST DEVICE int get_size() const
Definition: sqltypes.h:321
HOST DEVICE void operator=(const SQLTypeInfo &rhs)
Definition: sqltypes.h:549
std::string DatumToString(Datum d, const SQLTypeInfo &ti)
Definition: Datum.cpp:240
bool is_varlen_array() const
Definition: sqltypes.h:487
DEVICE VarlenDatum()
Definition: sqltypes.h:146
std::string toString(const ExtArgumentType &sig_type)
Definition: sqltypes.h:48
DEVICE constexpr bool is_cuda_compiler()
Definition: sqltypes.h:190
SQLTypes
Definition: sqltypes.h:37
std::vector< std::string > * stringsPtr
Definition: sqltypes.h:218
bool is_timestamp() const
Definition: sqltypes.h:727
std::vector< ArrayDatum > * arraysPtr
Definition: sqltypes.h:219
#define NULL_ARRAY_INT
#define NULL_FLOAT
bool is_null
Definition: sqltypes.h:144
#define NULL_BIGINT
SQLTypeInfo get_nullable_logical_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:914
#define LOG(tag)
Definition: Logger.h:188
HOST DEVICE bool operator==(const SQLTypeInfo &rhs) const
Definition: sqltypes.h:529
bool boolval
Definition: sqltypes.h:202
bool is_fp() const
Definition: sqltypes.h:482
HOST DEVICE int get_scale() const
Definition: sqltypes.h:316
bool is_varlen() const
Definition: sqltypes.h:500
#define NULL_ARRAY_SMALLINT
std::string get_compression_name() const
Definition: sqltypes.h:445
std::string concat(Types &&...parms)
VarlenDatum * arrayval
Definition: sqltypes.h:209
HOST DEVICE void set_subtype(SQLTypes st)
Definition: sqltypes.h:402
SQLTypeInfo(SQLTypes t, int d, int s)
Definition: sqltypes.h:281
SQLTypeInfo get_logical_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:893
Definition: sqltypes.h:64
#define NULL_ARRAY_TINYINT
HOST DEVICE bool is_null_fixlen_array(const int8_t *val, int array_size) const
Definition: sqltypes.h:651
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:311
bool is_number() const
Definition: sqltypes.h:483
int32_t intval
Definition: sqltypes.h:205
bool is_time() const
Definition: sqltypes.h:484
std::string to_string(char const *&&v)
HostArrayDatum(size_t const l, int8_t *p, bool const n, CUSTOM_DELETER custom_deleter)
Definition: sqltypes.h:180
int8_t * pointer
Definition: sqltypes.h:143
#define NULL_INT
int32_t StringOffsetT
Definition: sqltypes.h:919
bool has_render_group() const
Definition: sqltypes.h:391
#define DEVICE
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:199
#define HOST
void set_input_srid(int d)
Definition: sqltypes.h:405
float floatval
Definition: sqltypes.h:207
std::string to_string() const
Definition: sqltypes.h:446
EncodingType
Definition: sqltypes.h:224
int get_physical_cols() const
Definition: sqltypes.h:332
bool is_fixlen_array() const
Definition: sqltypes.h:488
bool is_castable(const SQLTypeInfo &new_type_info) const
Definition: sqltypes.h:560
#define IS_INTERVAL(T)
Definition: sqltypes.h:244
void set_fixed_size()
Definition: sqltypes.h:410
std::shared_ptr< int8_t > ManagedPtr
Definition: sqltypes.h:161
HOST DEVICE bool operator!=(const SQLTypeInfo &rhs) const
Definition: sqltypes.h:521
int get_logical_size() const
Definition: sqltypes.h:322
bool DatumEqual(const Datum a, const Datum b, const SQLTypeInfo &ti)
Definition: Datum.cpp:190
static std::string type_name[kSQLTYPE_LAST]
Definition: sqltypes.h:739
bool is_integer() const
Definition: sqltypes.h:480
VarlenDatum(const size_t l, int8_t *p, const bool n)
Definition: sqltypes.h:149
#define NULL_ARRAY_COMPRESSED_32
SQLTypes subtype
Definition: sqltypes.h:731
void set_scale(int s)
Definition: sqltypes.h:406
bool notnull
Definition: sqltypes.h:734
bool has_bounds() const
Definition: sqltypes.h:380
int64_t bigintval
Definition: sqltypes.h:206
HostArrayDatum(size_t const l, int8_t *p, CUSTOM_DELETER custom_deleter)
Definition: sqltypes.h:174
bool is_timeinterval() const
Definition: sqltypes.h:489
#define NULL_ARRAY_FLOAT
ManagedPtr data_ptr
Definition: sqltypes.h:183
HostArrayDatum()=default
int16_t smallintval
Definition: sqltypes.h:204
bool is_dict_encoded_type() const
Definition: sqltypes.h:516
SQLTypeInfo(SQLTypes t, int d, int s, bool n)
Definition: sqltypes.h:272
Datum StringToDatum(std::string_view s, SQLTypeInfo &ti)
Definition: Datum.cpp:124
bool is_boolean() const
Definition: sqltypes.h:485
HostArrayDatum(size_t const l, int8_t *p, bool const n)
Definition: sqltypes.h:168
bool g_enable_smem_group_by true
void operator()(int8_t *p)
Definition: sqltypes.h:157
SQLTypeInfo(SQLTypes t, int d, int s, bool n, EncodingType c, int p, SQLTypes st)
Definition: sqltypes.h:263
SQLTypeInfo(SQLTypes t)
Definition: sqltypes.h:291
#define NULL_BOOLEAN
std::string get_buffer_name() const
Definition: sqltypes.h:467
SQLTypeInfo(SQLTypes t, bool n, EncodingType c)
Definition: sqltypes.h:292
SQLTypeInfo get_array_type() const
Definition: sqltypes.h:701
EncodingType compression
Definition: sqltypes.h:735
int get_precision() const
Definition: sqltypes.h:314
std::string * stringval
Definition: sqltypes.h:211
void set_output_srid(int s)
Definition: sqltypes.h:407
bool is_buffer() const
Definition: sqltypes.h:495
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:303
bool is_column() const
Definition: sqltypes.h:491
DEVICE DeviceArrayDatum()
Definition: sqltypes.h:187
HOST DEVICE bool is_null(const Datum &d) const
Definition: sqltypes.h:589
void set_comp_param(int p)
Definition: sqltypes.h:412
HOST DEVICE int get_storage_size() const
Definition: sqltypes.h:742
#define CHECK_LT(x, y)
Definition: Logger.h:207
Definition: sqltypes.h:51
Definition: sqltypes.h:52
static std::string comp_name[kENCODING_LAST]
Definition: sqltypes.h:740
int8_t * appendDatum(int8_t *buf, Datum d, const SQLTypeInfo &ti)
Definition: sqltypes.h:922
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:319
bool is_date_in_days() const
Definition: sqltypes.h:705
int get_array_context_logical_size() const
Definition: sqltypes.h:538
int64_t convert_decimal_value_to_scale(const int64_t decimal_value, const SQLTypeInfo &type_info, const SQLTypeInfo &new_type_info)
Definition: Datum.cpp:319
int32_t ArrayOffsetT
Definition: sqltypes.h:920
void set_dimension(int d)
Definition: sqltypes.h:403
HOST DEVICE int get_dimension() const
Definition: sqltypes.h:313
#define IS_INTEGER(T)
Definition: sqltypes.h:236
std::string get_type_name() const
Definition: sqltypes.h:414
Definition: sqltypes.h:40
#define IS_STRING(T)
Definition: sqltypes.h:241
HOST DEVICE int get_comp_param() const
Definition: sqltypes.h:320
HOST DEVICE int get_input_srid() const
Definition: sqltypes.h:315
#define NULL_TINYINT
#define NULL_ARRAY_DOUBLE
virtual DEVICE ~VarlenDatum()
Definition: sqltypes.h:147
bool is_bytes() const
Definition: sqltypes.h:492
bool g_enable_watchdog false
Definition: Execute.cpp:76
void set_notnull(bool n)
Definition: sqltypes.h:408
bool is_geometry() const
Definition: sqltypes.h:490
bool is_high_precision_timestamp() const
Definition: sqltypes.h:717
SQLTypes type
Definition: sqltypes.h:730
#define NULL_SMALLINT
HostArrayDatum(size_t const l, ManagedPtr p, bool const n)
Definition: sqltypes.h:165
#define NULL_ARRAY_BIGINT
bool is_dict_encoded_string() const
Definition: sqltypes.h:512
Definition: sqltypes.h:44
bool is_varlen_indeed() const
Definition: sqltypes.h:506
bool is_string() const
Definition: sqltypes.h:478
bool transforms() const
Definition: sqltypes.h:496
SQLTypeInfo(SQLTypes t, bool n)
Definition: sqltypes.h:282
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:318
int8_t * numbersPtr
Definition: sqltypes.h:217
bool is_string_array() const
Definition: sqltypes.h:479
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:697
bool is_decimal() const
Definition: sqltypes.h:481
int get_physical_coord_cols() const
Definition: sqltypes.h:347
#define IS_NUMBER(T)
Definition: sqltypes.h:238
void operator()(int8_t *)
Definition: sqltypes.h:154
#define IS_GEO(T)
Definition: sqltypes.h:242
#define TRANSIENT_DICT(ID)
Definition: sqltypes.h:251
int comp_param
Definition: sqltypes.h:736
bool is_date() const
Definition: sqltypes.h:715
bool is_array() const
Definition: sqltypes.h:486
void set_precision(int d)
Definition: sqltypes.h:404
SQLTypeInfo get_nullable_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:908
int dimension
Definition: sqltypes.h:732
HOST DEVICE bool is_null_point_coord_array(const int8_t *val, int array_size) const
Definition: sqltypes.h:684
double doubleval
Definition: sqltypes.h:208
HOST DEVICE int get_output_srid() const
Definition: sqltypes.h:317
constexpr auto is_datetime(SQLTypes type)
Definition: sqltypes.h:254
HOST DEVICE bool is_null(const int8_t *val) const
Definition: sqltypes.h:626
size_t length
Definition: sqltypes.h:142
HOST DEVICE void set_type(SQLTypes t)
Definition: sqltypes.h:401