OmniSciDB  21ac014ffc
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
sqltypes.h
Go to the documentation of this file.
1 /*
2  * Copyright 2020 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
23 #pragma once
24 
25 #include "../Logger/Logger.h"
26 #include "StringTransform.h"
27 #include "funcannotations.h"
28 
29 #include <cassert>
30 #include <ctime>
31 #include <memory>
32 #include <string>
33 #include <type_traits>
34 #include <vector>
35 
36 // must not change because these values persist in catalogs.
37 enum SQLTypes {
38  kNULLT = 0, // type for null values
39  kBOOLEAN = 1,
40  kCHAR = 2,
41  kVARCHAR = 3,
42  kNUMERIC = 4,
43  kDECIMAL = 5,
44  kINT = 6,
45  kSMALLINT = 7,
46  kFLOAT = 8,
47  kDOUBLE = 9,
48  kTIME = 10,
49  kTIMESTAMP = 11,
50  kBIGINT = 12,
51  kTEXT = 13,
52  kDATE = 14,
53  kARRAY = 15,
56  kPOINT = 18,
58  kPOLYGON = 20,
60  kTINYINT = 22,
61  kGEOMETRY = 23,
62  kGEOGRAPHY = 24,
63  kEVAL_CONTEXT_TYPE = 25, // Placeholder Type for ANY
64  kVOID = 26,
65  kCURSOR = 27,
66  kCOLUMN = 28,
69 };
70 
71 #ifndef __CUDACC__
72 
73 inline std::string toString(const SQLTypes& type) {
74  switch (type) {
75  case kNULLT:
76  return "NULL";
77  case kBOOLEAN:
78  return "BOOL";
79  case kCHAR:
80  return "CHAR";
81  case kVARCHAR:
82  return "VARCHAR";
83  case kNUMERIC:
84  return "NUMERIC";
85  case kDECIMAL:
86  return "DECIMAL";
87  case kINT:
88  return "INT";
89  case kSMALLINT:
90  return "SMALLINT";
91  case kFLOAT:
92  return "FLOAT";
93  case kDOUBLE:
94  return "DOUBLE";
95  case kTIME:
96  return "TIME";
97  case kTIMESTAMP:
98  return "TIMESTAMP";
99  case kBIGINT:
100  return "BIGINT";
101  case kTEXT:
102  return "TEXT";
103  case kDATE:
104  return "DATE";
105  case kARRAY:
106  return "ARRAY";
107  case kINTERVAL_DAY_TIME:
108  return "DAY TIME INTERVAL";
110  return "YEAR MONTH INTERVAL";
111  case kPOINT:
112  return "POINT";
113  case kLINESTRING:
114  return "LINESTRING";
115  case kPOLYGON:
116  return "POLYGON";
117  case kMULTIPOLYGON:
118  return "MULTIPOLYGON";
119  case kTINYINT:
120  return "TINYINT";
121  case kGEOMETRY:
122  return "GEOMETRY";
123  case kGEOGRAPHY:
124  return "GEOGRAPHY";
125  case kEVAL_CONTEXT_TYPE:
126  return "UNEVALUATED ANY";
127  case kVOID:
128  return "VOID";
129  case kCURSOR:
130  return "CURSOR";
131  case kCOLUMN:
132  return "COLUMN";
133  case kCOLUMN_LIST:
134  return "COLUMN_LIST";
135  case kSQLTYPE_LAST:
136  break;
137  }
138  LOG(FATAL) << "Invalid SQL type: " << type;
139  return "";
140 }
141 
142 #endif
143 
144 struct VarlenDatum {
145  size_t length;
146  int8_t* pointer;
147  bool is_null;
148 
149  DEVICE VarlenDatum() : length(0), pointer(nullptr), is_null(true) {}
150  DEVICE virtual ~VarlenDatum() {}
151 
152  VarlenDatum(const size_t l, int8_t* p, const bool n)
153  : length(l), pointer(p), is_null(n) {}
154 };
155 
157  void operator()(int8_t*) {}
158 };
159 struct FreeDeleter {
160  void operator()(int8_t* p) { free(p); }
161 };
162 
163 struct HostArrayDatum : public VarlenDatum {
164  using ManagedPtr = std::shared_ptr<int8_t>;
165 
166  HostArrayDatum() = default;
167 
168  HostArrayDatum(size_t const l, ManagedPtr p, bool const n)
169  : VarlenDatum(l, p.get(), n), data_ptr(p) {}
170 
171  HostArrayDatum(size_t const l, int8_t* p, bool const n)
172  : VarlenDatum(l, p, n), data_ptr(p, FreeDeleter()){};
173 
174  template <typename CUSTOM_DELETER,
175  typename = std::enable_if_t<
176  std::is_void<std::result_of_t<CUSTOM_DELETER(int8_t*)> >::value> >
177  HostArrayDatum(size_t const l, int8_t* p, CUSTOM_DELETER custom_deleter)
178  : VarlenDatum(l, p, 0 == l), data_ptr(p, custom_deleter) {}
179 
180  template <typename CUSTOM_DELETER,
181  typename = std::enable_if_t<
182  std::is_void<std::result_of_t<CUSTOM_DELETER(int8_t*)> >::value> >
183  HostArrayDatum(size_t const l, int8_t* p, bool const n, CUSTOM_DELETER custom_deleter)
184  : VarlenDatum(l, p, n), data_ptr(p, custom_deleter) {}
185 
187 };
188 
189 struct DeviceArrayDatum : public VarlenDatum {
191 };
192 
193 inline DEVICE constexpr bool is_cuda_compiler() {
194 #ifdef __CUDACC__
195  return true;
196 #else
197  return false;
198 #endif
199 }
200 
201 using ArrayDatum =
202  std::conditional_t<is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum>;
203 
204 union Datum {
205  int8_t boolval;
206  int8_t tinyintval;
207  int16_t smallintval;
208  int32_t intval;
209  int64_t bigintval;
210  float floatval;
211  double doubleval;
213 #ifndef __CUDACC__
214  std::string* stringval; // string value
215 #endif
216 };
217 
218 #ifndef __CUDACC__
220  int8_t* numbersPtr;
221  std::vector<std::string>* stringsPtr;
222  std::vector<ArrayDatum>* arraysPtr;
223 };
224 #endif
225 
226 // must not change because these values persist in catalogs.
228  kENCODING_NONE = 0, // no encoding
229  kENCODING_FIXED = 1, // Fixed-bit encoding
230  kENCODING_RL = 2, // Run Length encoding
231  kENCODING_DIFF = 3, // Differential encoding
232  kENCODING_DICT = 4, // Dictionary encoding
233  kENCODING_SPARSE = 5, // Null encoding for sparse columns
234  kENCODING_GEOINT = 6, // Encoding coordinates as intergers
235  kENCODING_DATE_IN_DAYS = 7, // Date encoding in days
237 };
238 
239 #define IS_INTEGER(T) \
240  (((T) == kINT) || ((T) == kSMALLINT) || ((T) == kBIGINT) || ((T) == kTINYINT))
241 #define IS_NUMBER(T) \
242  (((T) == kINT) || ((T) == kSMALLINT) || ((T) == kDOUBLE) || ((T) == kFLOAT) || \
243  ((T) == kBIGINT) || ((T) == kNUMERIC) || ((T) == kDECIMAL) || ((T) == kTINYINT))
244 #define IS_STRING(T) (((T) == kTEXT) || ((T) == kVARCHAR) || ((T) == kCHAR))
245 #define IS_GEO(T) \
246  (((T) == kPOINT) || ((T) == kLINESTRING) || ((T) == kPOLYGON) || ((T) == kMULTIPOLYGON))
247 #define IS_INTERVAL(T) ((T) == kINTERVAL_DAY_TIME || (T) == kINTERVAL_YEAR_MONTH)
248 #define IS_DECIMAL(T) ((T) == kNUMERIC || (T) == kDECIMAL)
249 #define IS_GEO_POLY(T) (((T) == kPOLYGON) || ((T) == kMULTIPOLYGON))
250 
251 #include "InlineNullValues.h"
252 
253 #define TRANSIENT_DICT_ID 0
254 #define TRANSIENT_DICT(ID) (-(ID))
255 #define REGULAR_DICT(TRANSIENTID) (-(TRANSIENTID))
256 
257 constexpr auto is_datetime(SQLTypes type) {
258  return type == kTIME || type == kTIMESTAMP || type == kDATE;
259 }
260 
261 // @type SQLTypeInfo
262 // @brief a structure to capture all type information including
263 // length, precision, scale, etc.
264 class SQLTypeInfo {
265  public:
266  SQLTypeInfo(SQLTypes t, int d, int s, bool n, EncodingType c, int p, SQLTypes st)
267  : type(t)
268  , subtype(st)
269  , dimension(d)
270  , scale(s)
271  , notnull(n)
272  , compression(c)
273  , comp_param(p)
274  , size(get_storage_size()) {}
275  SQLTypeInfo(SQLTypes t, int d, int s, bool n)
276  : type(t)
277  , subtype(kNULLT)
278  , dimension(d)
279  , scale(s)
280  , notnull(n)
282  , comp_param(0)
283  , size(get_storage_size()) {}
285  : type(t)
286  , subtype(st)
287  , dimension(0)
288  , scale(0)
289  , notnull(false)
290  , compression(c)
291  , comp_param(p)
292  , size(get_storage_size()) {}
293  SQLTypeInfo(SQLTypes t, int d, int s) : SQLTypeInfo(t, d, s, false) {}
295  : type(t)
296  , subtype(kNULLT)
297  , dimension(0)
298  , scale(0)
299  , notnull(n)
301  , comp_param(0)
302  , size(get_storage_size()) {}
305  : type(t)
306  , subtype(kNULLT)
307  , dimension(0)
308  , scale(0)
309  , notnull(n)
310  , compression(c)
311  , comp_param(0)
312  , size(get_storage_size()) {}
314  : type(kNULLT)
315  , subtype(kNULLT)
316  , dimension(0)
317  , scale(0)
318  , notnull(false)
320  , comp_param(0)
321  , size(0) {}
322 
323  HOST DEVICE inline SQLTypes get_type() const { return type; }
324  HOST DEVICE inline SQLTypes get_subtype() const { return subtype; }
325  HOST DEVICE inline int get_dimension() const { return dimension; }
326  inline int get_precision() const { return dimension; }
327  HOST DEVICE inline int get_input_srid() const { return dimension; }
328  HOST DEVICE inline int get_scale() const { return scale; }
329  HOST DEVICE inline int get_output_srid() const { return scale; }
330  HOST DEVICE inline bool get_notnull() const { return notnull; }
332  HOST DEVICE inline int get_comp_param() const { return comp_param; }
333  HOST DEVICE inline int get_size() const { return size; }
334  inline int get_logical_size() const {
337  return ti.get_size();
338  }
339  if (compression == kENCODING_DICT) {
340  return 4;
341  }
342  return get_size();
343  }
344  inline int get_physical_cols() const {
345  switch (type) {
346  case kPOINT:
347  return 1; // coords
348  case kLINESTRING:
349  return 2; // coords, bounds
350  case kPOLYGON:
351  return 4; // coords, ring_sizes, bounds, render_group
352  case kMULTIPOLYGON:
353  return 5; // coords, ring_sizes, poly_rings, bounds, render_group
354  default:
355  break;
356  }
357  return 0;
358  }
359  inline int get_physical_coord_cols() const {
360  // @TODO dmitri/simon rename this function?
361  // It needs to return the number of extra columns
362  // which need to go through the executor, as opposed
363  // to those which are only needed by CPU for poly
364  // cache building or what-not. For now, we just omit
365  // the Render Group column. If we add Bounding Box
366  // or something this may require rethinking. Perhaps
367  // these two functions need to return an array of
368  // offsets rather than just a number to loop over,
369  // so that executor and non-executor columns can
370  // be mixed.
371  // NOTE(adb): In binding to extension functions, we need to know some pretty specific
372  // type info about each of the physical coords cols for each geo type. I added checks
373  // there to ensure the physical coords col for the geo type match what we expect. If
374  // these values are ever changed, corresponding values in
375  // ExtensionFunctionsBinding.cpp::compute_narrowing_conv_scores and
376  // ExtensionFunctionsBinding.cpp::compute_widening_conv_scores will also need to be
377  // changed.
378  switch (type) {
379  case kPOINT:
380  return 1;
381  case kLINESTRING:
382  return 1; // omit bounds
383  case kPOLYGON:
384  return 2; // omit bounds, render group
385  case kMULTIPOLYGON:
386  return 3; // omit bounds, render group
387  default:
388  break;
389  }
390  return 0;
391  }
392  inline bool has_bounds() const {
393  switch (type) {
394  case kLINESTRING:
395  case kPOLYGON:
396  case kMULTIPOLYGON:
397  return true;
398  default:
399  break;
400  }
401  return false;
402  }
403  inline bool has_render_group() const {
404  switch (type) {
405  case kPOLYGON:
406  case kMULTIPOLYGON:
407  return true;
408  default:
409  break;
410  }
411  return false;
412  }
413  HOST DEVICE inline void set_type(SQLTypes t) { type = t; }
414  HOST DEVICE inline void set_subtype(SQLTypes st) { subtype = st; }
415  inline void set_dimension(int d) { dimension = d; }
416  inline void set_precision(int d) { dimension = d; }
417  inline void set_input_srid(int d) { dimension = d; }
418  inline void set_scale(int s) { scale = s; }
419  inline void set_output_srid(int s) { scale = s; }
420  inline void set_notnull(bool n) { notnull = n; }
421  inline void set_size(int s) { size = s; }
422  inline void set_fixed_size() { size = get_storage_size(); }
423  inline void set_compression(EncodingType c) { compression = c; }
424  inline void set_comp_param(int p) { comp_param = p; }
425 #ifndef __CUDACC__
426  inline std::string get_type_name() const {
427  if (IS_GEO(type)) {
428  std::string srid_string = "";
429  if (get_output_srid() > 0) {
430  srid_string = ", " + std::to_string(get_output_srid());
431  }
432  CHECK_LT(static_cast<int>(subtype), kSQLTYPE_LAST);
433  return type_name[static_cast<int>(subtype)] + "(" +
434  type_name[static_cast<int>(type)] + srid_string + ")";
435  }
436  std::string ps = "";
437  if (type == kDECIMAL || type == kNUMERIC) {
438  ps = "(" + std::to_string(dimension) + "," + std::to_string(scale) + ")";
439  } else if (type == kTIMESTAMP) {
440  ps = "(" + std::to_string(dimension) + ")";
441  }
442  if (type == kARRAY) {
443  auto elem_ti = get_elem_type();
444  auto num_elems = (size > 0) ? std::to_string(size / elem_ti.get_size()) : "";
445  CHECK_LT(static_cast<int>(subtype), kSQLTYPE_LAST);
446  return elem_ti.get_type_name() + ps + "[" + num_elems + "]";
447  }
448  if (type == kCOLUMN) {
449  auto elem_ti = get_elem_type();
450  auto num_elems =
451  (size > 0) ? "[" + std::to_string(size / elem_ti.get_size()) + "]" : "";
452  CHECK_LT(static_cast<int>(subtype), kSQLTYPE_LAST);
453  return "COLUMN<" + type_name[static_cast<int>(subtype)] + ps + ">" + num_elems;
454  }
455  if (type == kCOLUMN_LIST) {
456  auto elem_ti = get_elem_type();
457  auto num_elems =
458  (size > 0) ? "[" + std::to_string(size / elem_ti.get_size()) + "]" : "";
459  CHECK_LT(static_cast<int>(subtype), kSQLTYPE_LAST);
460  return "COLUMN_LIST<" + type_name[static_cast<int>(subtype)] + ps + ">" + num_elems;
461  }
462  return type_name[static_cast<int>(type)] + ps;
463  }
464  inline std::string get_compression_name() const { return comp_name[(int)compression]; }
465  std::string toString() const { return to_string(); } // for PRINT macro
466  inline std::string to_string() const {
467  return concat("(type=",
468  type_name[static_cast<int>(type)],
469  ", dimension=",
470  get_dimension(),
471  ", scale=",
472  get_scale(),
473  ", null=",
474  get_notnull() ? "not nullable" : "nullable",
475  ", name=",
477  ", comp=",
478  get_comp_param(),
479  ", subtype=",
480  type_name[static_cast<int>(subtype)],
481  ", size=",
482  get_size(),
483  ", element_size=",
485  ")");
486  }
487  inline std::string get_buffer_name() const {
488  if (is_array())
489  return "Array";
490  if (is_bytes())
491  return "Bytes";
492  if (is_column())
493  return "Column";
494  assert(false);
495  return "";
496  }
497 #endif
498  inline bool is_string() const { return IS_STRING(type); }
499  inline bool is_string_array() const { return (type == kARRAY) && IS_STRING(subtype); }
500  inline bool is_integer() const { return IS_INTEGER(type); }
501  inline bool is_decimal() const { return type == kDECIMAL || type == kNUMERIC; }
502  inline bool is_fp() const { return type == kFLOAT || type == kDOUBLE; }
503  inline bool is_number() const { return IS_NUMBER(type); }
504  inline bool is_time() const { return is_datetime(type); }
505  inline bool is_boolean() const { return type == kBOOLEAN; }
506  inline bool is_array() const { return type == kARRAY; } // rbc Array
507  inline bool is_varlen_array() const { return type == kARRAY && size <= 0; }
508  inline bool is_fixlen_array() const { return type == kARRAY && size > 0; }
509  inline bool is_timeinterval() const { return IS_INTERVAL(type); }
510  inline bool is_geometry() const { return IS_GEO(type); }
511  inline bool is_column() const { return type == kCOLUMN; } // rbc Column
512  inline bool is_column_list() const { return type == kCOLUMN_LIST; } // rbc ColumnList
513  inline bool is_bytes() const {
514  return type == kTEXT && get_compression() == kENCODING_NONE;
515  } // rbc Bytes
516  inline bool is_buffer() const {
517  return is_array() || is_column() || is_column_list() || is_bytes();
518  }
519  inline bool transforms() const {
520  return IS_GEO(type) && get_output_srid() != get_input_srid();
521  }
522 
523  inline bool is_varlen() const { // TODO: logically this should ignore fixlen arrays
524  return (IS_STRING(type) && compression != kENCODING_DICT) || type == kARRAY ||
525  IS_GEO(type);
526  }
527 
528  // need this here till is_varlen can be fixed w/o negative impact to existing code
529  inline bool is_varlen_indeed() const {
530  // SQLTypeInfo.is_varlen() is broken with fixedlen array now
531  // and seems left broken for some concern, so fix it locally
532  return is_varlen() && !is_fixlen_array();
533  }
534 
535  inline bool is_dict_encoded_string() const {
536  return is_string() && compression == kENCODING_DICT;
537  }
538 
539  inline bool is_subtype_dict_encoded_string() const {
541  }
542 
543  inline bool is_dict_encoded_type() const {
544  return is_dict_encoded_string() ||
546  }
547 
548  HOST DEVICE inline bool operator!=(const SQLTypeInfo& rhs) const {
549  return type != rhs.get_type() || subtype != rhs.get_subtype() ||
550  dimension != rhs.get_dimension() || scale != rhs.get_scale() ||
551  compression != rhs.get_compression() ||
554  notnull != rhs.get_notnull();
555  }
556  HOST DEVICE inline bool operator==(const SQLTypeInfo& rhs) const {
557  return type == rhs.get_type() && subtype == rhs.get_subtype() &&
558  dimension == rhs.get_dimension() && scale == rhs.get_scale() &&
559  compression == rhs.get_compression() &&
562  notnull == rhs.get_notnull();
563  }
564 
565  inline int get_array_context_logical_size() const {
566  if (is_string()) {
567  auto comp_type(get_compression());
568  if (comp_type == kENCODING_DICT || comp_type == kENCODING_FIXED ||
569  comp_type == kENCODING_NONE) {
570  return sizeof(int32_t);
571  }
572  }
573  return get_logical_size();
574  }
575 
576  HOST DEVICE inline void operator=(const SQLTypeInfo& rhs) {
577  type = rhs.get_type();
578  subtype = rhs.get_subtype();
579  dimension = rhs.get_dimension();
580  scale = rhs.get_scale();
581  notnull = rhs.get_notnull();
583  comp_param = rhs.get_comp_param();
584  size = rhs.get_size();
585  }
586 
587  inline bool is_castable(const SQLTypeInfo& new_type_info) const {
588  // can always cast between the same type but different precision/scale/encodings
589  if (type == new_type_info.get_type()) {
590  return true;
591  // can always cast from or to string
592  } else if (is_string() || new_type_info.is_string()) {
593  return true;
594  // can cast between numbers
595  } else if (is_number() && new_type_info.is_number()) {
596  return true;
597  // can cast from timestamp or date to number (epoch)
598  } else if ((type == kTIMESTAMP || type == kDATE) && new_type_info.is_number()) {
599  return true;
600  // can cast from date to timestamp
601  } else if (type == kDATE && new_type_info.get_type() == kTIMESTAMP) {
602  return true;
603  } else if (type == kTIMESTAMP && new_type_info.get_type() == kDATE) {
604  return true;
605  } else if (type == kBOOLEAN && new_type_info.is_number()) {
606  return true;
607  } else if (type == kARRAY && new_type_info.get_type() == kARRAY) {
608  return get_elem_type().is_castable(new_type_info.get_elem_type());
609  } else if (type == kCOLUMN && new_type_info.get_type() == kCOLUMN) {
610  return get_elem_type().is_castable(new_type_info.get_elem_type());
611  } else if (type == kCOLUMN_LIST && new_type_info.get_type() == kCOLUMN_LIST) {
612  return get_elem_type().is_castable(new_type_info.get_elem_type());
613  } else {
614  return false;
615  }
616  }
617 
618  HOST DEVICE inline bool is_null(const Datum& d) const {
619  // assuming Datum is always uncompressed
620  switch (type) {
621  case kBOOLEAN:
622  return (int8_t)d.boolval == NULL_BOOLEAN;
623  case kTINYINT:
624  return d.tinyintval == NULL_TINYINT;
625  case kSMALLINT:
626  return d.smallintval == NULL_SMALLINT;
627  case kINT:
628  return d.intval == NULL_INT;
629  case kBIGINT:
630  case kNUMERIC:
631  case kDECIMAL:
632  return d.bigintval == NULL_BIGINT;
633  case kFLOAT:
634  return d.floatval == NULL_FLOAT;
635  case kDOUBLE:
636  return d.doubleval == NULL_DOUBLE;
637  case kTIME:
638  case kTIMESTAMP:
639  case kDATE:
640  return d.bigintval == NULL_BIGINT;
641  case kTEXT:
642  case kVARCHAR:
643  case kCHAR:
644  // @TODO handle null strings
645  break;
646  case kNULLT:
647  return true;
648  case kARRAY:
649  return d.arrayval == NULL || d.arrayval->is_null;
650  default:
651  break;
652  }
653  return false;
654  }
655  HOST DEVICE inline bool is_null(const int8_t* val) const {
656  if (type == kFLOAT) {
657  return *(float*)val == NULL_FLOAT;
658  }
659  if (type == kDOUBLE) {
660  return *(double*)val == NULL_DOUBLE;
661  }
662  // val can be either compressed or uncompressed
663  switch (size) {
664  case 1:
665  return *val == NULL_TINYINT;
666  case 2:
667  return *(int16_t*)val == NULL_SMALLINT;
668  case 4:
669  return *(int32_t*)val == NULL_INT;
670  case 8:
671  return *(int64_t*)val == NULL_BIGINT;
672  case kNULLT:
673  return true;
674  default:
675  // @TODO(wei) handle null strings
676  break;
677  }
678  return false;
679  }
680  HOST DEVICE inline bool is_null_fixlen_array(const int8_t* val, int array_size) const {
681  // Check if fixed length array has a NULL_ARRAY sentinel as the first element
682  if (type == kARRAY && val && array_size > 0 && array_size == size) {
683  // Need to create element type to get the size, but can't call get_elem_type()
684  // since this is a HOST DEVICE function. Going through copy constructor instead.
685  auto elem_ti{*this};
686  elem_ti.set_type(subtype);
687  elem_ti.set_subtype(kNULLT);
688  auto elem_size = elem_ti.get_storage_size();
689  if (elem_size < 1) {
690  return false;
691  }
692  if (subtype == kFLOAT) {
693  return *(float*)val == NULL_ARRAY_FLOAT;
694  }
695  if (subtype == kDOUBLE) {
696  return *(double*)val == NULL_ARRAY_DOUBLE;
697  }
698  switch (elem_size) {
699  case 1:
700  return *val == NULL_ARRAY_TINYINT;
701  case 2:
702  return *(int16_t*)val == NULL_ARRAY_SMALLINT;
703  case 4:
704  return *(int32_t*)val == NULL_ARRAY_INT;
705  case 8:
706  return *(int64_t*)val == NULL_ARRAY_BIGINT;
707  default:
708  return false;
709  }
710  }
711  return false;
712  }
713  HOST DEVICE inline bool is_null_point_coord_array(const int8_t* val,
714  int array_size) const {
715  if (type == kARRAY && subtype == kTINYINT && val && array_size > 0 &&
716  array_size == size) {
717  if (array_size == 2 * sizeof(double)) {
718  return *(double*)val == NULL_ARRAY_DOUBLE;
719  }
720  if (array_size == 2 * sizeof(int32_t)) {
721  return *(uint32_t*)val == NULL_ARRAY_COMPRESSED_32;
722  }
723  }
724  return false;
725  }
726  inline SQLTypeInfo get_elem_type() const {
727  return SQLTypeInfo(
729  }
730  inline SQLTypeInfo get_array_type() const {
732  }
733 
734  inline bool is_date_in_days() const {
735  if (type == kDATE) {
736  const auto comp_type = get_compression();
737  if (comp_type == kENCODING_DATE_IN_DAYS) {
738  return true;
739  }
740  }
741  return false;
742  }
743 
744  inline bool is_date() const { return type == kDATE; }
745 
746  inline bool is_high_precision_timestamp() const {
747  if (type == kTIMESTAMP) {
748  const auto dimension = get_dimension();
749  if (dimension > 0) {
750  return true;
751  }
752  }
753  return false;
754  }
755 
756  inline bool is_timestamp() const { return type == kTIMESTAMP; }
757 
758  private:
759  SQLTypes type; // type id
760  SQLTypes subtype; // element type of arrays or columns
761  int dimension; // VARCHAR/CHAR length or NUMERIC/DECIMAL precision or COLUMN_LIST
762  // length
763  int scale; // NUMERIC/DECIMAL scale
764  bool notnull; // nullable? a hint, not used for type checking
765  EncodingType compression; // compression scheme
766  int comp_param; // compression parameter when applicable for certain schemes
767  int size; // size of the type in bytes. -1 for variable size
768 #ifndef __CUDACC__
769  static std::string type_name[kSQLTYPE_LAST];
770  static std::string comp_name[kENCODING_LAST];
771 #endif
772  HOST DEVICE inline int get_storage_size() const {
773  switch (type) {
774  case kBOOLEAN:
775  return sizeof(int8_t);
776  case kTINYINT:
777  return sizeof(int8_t);
778  case kSMALLINT:
779  switch (compression) {
780  case kENCODING_NONE:
781  return sizeof(int16_t);
782  case kENCODING_FIXED:
783  case kENCODING_SPARSE:
784  return comp_param / 8;
785  case kENCODING_RL:
786  case kENCODING_DIFF:
787  break;
788  default:
789  assert(false);
790  }
791  break;
792  case kINT:
793  switch (compression) {
794  case kENCODING_NONE:
795  return sizeof(int32_t);
796  case kENCODING_FIXED:
797  case kENCODING_SPARSE:
798  case kENCODING_GEOINT:
799  return comp_param / 8;
800  case kENCODING_RL:
801  case kENCODING_DIFF:
802  break;
803  default:
804  assert(false);
805  }
806  break;
807  case kBIGINT:
808  case kNUMERIC:
809  case kDECIMAL:
810  switch (compression) {
811  case kENCODING_NONE:
812  return sizeof(int64_t);
813  case kENCODING_FIXED:
814  case kENCODING_SPARSE:
815  return comp_param / 8;
816  case kENCODING_RL:
817  case kENCODING_DIFF:
818  break;
819  default:
820  assert(false);
821  }
822  break;
823  case kFLOAT:
824  switch (compression) {
825  case kENCODING_NONE:
826  return sizeof(float);
827  case kENCODING_FIXED:
828  case kENCODING_RL:
829  case kENCODING_DIFF:
830  case kENCODING_SPARSE:
831  assert(false);
832  break;
833  default:
834  assert(false);
835  }
836  break;
837  case kDOUBLE:
838  switch (compression) {
839  case kENCODING_NONE:
840  return sizeof(double);
841  case kENCODING_FIXED:
842  case kENCODING_RL:
843  case kENCODING_DIFF:
844  case kENCODING_SPARSE:
845  assert(false);
846  break;
847  default:
848  assert(false);
849  }
850  break;
851  case kTIMESTAMP:
852  case kTIME:
853  case kINTERVAL_DAY_TIME:
855  case kDATE:
856  switch (compression) {
857  case kENCODING_NONE:
858  return sizeof(int64_t);
859  case kENCODING_FIXED:
860  if (type == kTIMESTAMP && dimension > 0) {
861  assert(false); // disable compression for timestamp precisions
862  }
863  return comp_param / 8;
864  case kENCODING_RL:
865  case kENCODING_DIFF:
866  case kENCODING_SPARSE:
867  assert(false);
868  break;
870  switch (comp_param) {
871  case 0:
872  return 4; // Default date encoded in days is 32 bits
873  case 16:
874  case 32:
875  return comp_param / 8;
876  default:
877  assert(false);
878  break;
879  }
880  default:
881  assert(false);
882  }
883  break;
884  case kTEXT:
885  case kVARCHAR:
886  case kCHAR:
887  if (compression == kENCODING_DICT) {
888  return sizeof(int32_t); // @TODO(wei) must check DictDescriptor
889  }
890  break;
891  case kARRAY:
892  // TODO: return size for fixlen arrays?
893  break;
894  case kPOINT:
895  case kLINESTRING:
896  case kPOLYGON:
897  case kMULTIPOLYGON:
898  case kCOLUMN:
899  case kCOLUMN_LIST:
900  break;
901  default:
902  break;
903  }
904  return -1;
905  }
906 };
907 
909 
910 #ifndef __CUDACC__
911 #include <string_view>
912 
913 Datum StringToDatum(std::string_view s, SQLTypeInfo& ti);
914 std::string DatumToString(Datum d, const SQLTypeInfo& ti);
915 bool DatumEqual(const Datum, const Datum, const SQLTypeInfo& ti);
916 int64_t convert_decimal_value_to_scale(const int64_t decimal_value,
917  const SQLTypeInfo& type_info,
918  const SQLTypeInfo& new_type_info);
919 #endif
920 
921 #include "../QueryEngine/DateAdd.h"
922 #include "../QueryEngine/DateTruncate.h"
923 #include "../QueryEngine/ExtractFromTime.h"
924 
926  EncodingType encoding = type_info.get_compression();
927  if (encoding == kENCODING_DATE_IN_DAYS ||
928  (encoding == kENCODING_FIXED && type_info.get_type() != kARRAY)) {
929  encoding = kENCODING_NONE;
930  }
931  return SQLTypeInfo(type_info.get_type(),
932  type_info.get_dimension(),
933  type_info.get_scale(),
934  type_info.get_notnull(),
935  encoding,
936  type_info.get_comp_param(),
937  type_info.get_subtype());
938 }
939 
941  SQLTypeInfo nullable_type_info = type_info;
942  nullable_type_info.set_notnull(false);
943  return nullable_type_info;
944 }
945 
947  SQLTypeInfo nullable_type_info = get_logical_type_info(type_info);
948  return get_nullable_type_info(nullable_type_info);
949 }
950 
951 using StringOffsetT = int32_t;
952 using ArrayOffsetT = int32_t;
953 
954 inline int8_t* appendDatum(int8_t* buf, Datum d, const SQLTypeInfo& ti) {
955  switch (ti.get_type()) {
956  case kBOOLEAN:
957  *(int8_t*)buf = d.boolval;
958  return buf + sizeof(int8_t);
959  case kNUMERIC:
960  case kDECIMAL:
961  case kBIGINT:
962  *(int64_t*)buf = d.bigintval;
963  return buf + sizeof(int64_t);
964  case kINT:
965  *(int32_t*)buf = d.intval;
966  return buf + sizeof(int32_t);
967  case kSMALLINT:
968  *(int16_t*)buf = d.smallintval;
969  return buf + sizeof(int16_t);
970  case kTINYINT:
971  *(int8_t*)buf = d.tinyintval;
972  return buf + sizeof(int8_t);
973  case kFLOAT:
974  *(float*)buf = d.floatval;
975  return buf + sizeof(float);
976  case kDOUBLE:
977  *(double*)buf = d.doubleval;
978  return buf + sizeof(double);
979  case kTIME:
980  case kTIMESTAMP:
981  case kDATE:
982  *reinterpret_cast<int64_t*>(buf) = d.bigintval;
983  return buf + sizeof(int64_t);
984  default:
985  return nullptr;
986  }
987 }
988 
989 inline auto generate_array_type(const SQLTypes subtype) {
990  auto ti = SQLTypeInfo(kARRAY, false);
991  ti.set_subtype(subtype);
992  return ti;
993 }
994 
995 inline auto generate_column_type(const SQLTypes subtype) {
996  auto ti = SQLTypeInfo(kCOLUMN, false);
997  ti.set_subtype(subtype);
998  return ti;
999 }
1000 
1001 inline auto generate_column_type(const SQLTypes subtype, EncodingType c, int p) {
1002  auto ti = SQLTypeInfo(kCOLUMN, false);
1003  ti.set_subtype(subtype);
1004  ti.set_compression(c);
1005  ti.set_comp_param(p);
1006  return ti;
1007 }
1008 
1009 inline auto generate_column_list_type(const SQLTypes subtype) {
1010  auto ti = SQLTypeInfo(kCOLUMN_LIST, false);
1011  ti.set_subtype(subtype);
1012  return ti;
1013 }
int8_t tinyintval
Definition: sqltypes.h:206
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:324
void set_compression(EncodingType c)
Definition: sqltypes.h:423
void set_size(int s)
Definition: sqltypes.h:421
#define NULL_DOUBLE
HOST DEVICE int get_size() const
Definition: sqltypes.h:333
HOST DEVICE void operator=(const SQLTypeInfo &rhs)
Definition: sqltypes.h:576
std::string DatumToString(Datum d, const SQLTypeInfo &ti)
Definition: Datum.cpp:356
bool is_varlen_array() const
Definition: sqltypes.h:507
DEVICE VarlenDatum()
Definition: sqltypes.h:149
std::string toString(const ExtArgumentType &sig_type)
Definition: sqltypes.h:48
DEVICE constexpr bool is_cuda_compiler()
Definition: sqltypes.h:193
SQLTypes
Definition: sqltypes.h:37
std::vector< std::string > * stringsPtr
Definition: sqltypes.h:221
bool is_timestamp() const
Definition: sqltypes.h:756
std::vector< ArrayDatum > * arraysPtr
Definition: sqltypes.h:222
#define NULL_ARRAY_INT
tuple d
Definition: test_fsi.py:9
#define NULL_FLOAT
bool is_null
Definition: sqltypes.h:147
#define NULL_BIGINT
SQLTypeInfo get_nullable_logical_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:946
#define LOG(tag)
Definition: Logger.h:200
HOST DEVICE bool operator==(const SQLTypeInfo &rhs) const
Definition: sqltypes.h:556
bool is_fp() const
Definition: sqltypes.h:502
HOST DEVICE int get_scale() const
Definition: sqltypes.h:328
bool is_varlen() const
Definition: sqltypes.h:523
#define NULL_ARRAY_SMALLINT
auto generate_column_type(const SQLTypes subtype)
Definition: sqltypes.h:995
int8_t boolval
Definition: sqltypes.h:205
std::string get_compression_name() const
Definition: sqltypes.h:464
std::string concat(Types &&...parms)
VarlenDatum * arrayval
Definition: sqltypes.h:212
HOST DEVICE void set_subtype(SQLTypes st)
Definition: sqltypes.h:414
SQLTypeInfo(SQLTypes t, int d, int s)
Definition: sqltypes.h:293
SQLTypeInfo get_logical_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:925
Definition: sqltypes.h:64
#define NULL_ARRAY_TINYINT
HOST DEVICE bool is_null_fixlen_array(const int8_t *val, int array_size) const
Definition: sqltypes.h:680
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:323
bool is_number() const
Definition: sqltypes.h:503
int32_t intval
Definition: sqltypes.h:208
bool is_time() const
Definition: sqltypes.h:504
std::string to_string(char const *&&v)
HostArrayDatum(size_t const l, int8_t *p, bool const n, CUSTOM_DELETER custom_deleter)
Definition: sqltypes.h:183
int8_t * pointer
Definition: sqltypes.h:146
#define NULL_INT
int32_t StringOffsetT
Definition: sqltypes.h:951
bool has_render_group() const
Definition: sqltypes.h:403
#define DEVICE
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:202
#define HOST
void set_input_srid(int d)
Definition: sqltypes.h:417
float floatval
Definition: sqltypes.h:210
std::string to_string() const
Definition: sqltypes.h:466
EncodingType
Definition: sqltypes.h:227
int get_physical_cols() const
Definition: sqltypes.h:344
bool is_fixlen_array() const
Definition: sqltypes.h:508
bool is_castable(const SQLTypeInfo &new_type_info) const
Definition: sqltypes.h:587
#define IS_INTERVAL(T)
Definition: sqltypes.h:247
void set_fixed_size()
Definition: sqltypes.h:422
std::shared_ptr< int8_t > ManagedPtr
Definition: sqltypes.h:164
HOST DEVICE bool operator!=(const SQLTypeInfo &rhs) const
Definition: sqltypes.h:548
int get_logical_size() const
Definition: sqltypes.h:334
bool DatumEqual(const Datum a, const Datum b, const SQLTypeInfo &ti)
Definition: Datum.cpp:306
static std::string type_name[kSQLTYPE_LAST]
Definition: sqltypes.h:769
bool is_integer() const
Definition: sqltypes.h:500
VarlenDatum(const size_t l, int8_t *p, const bool n)
Definition: sqltypes.h:152
bool is_subtype_dict_encoded_string() const
Definition: sqltypes.h:539
#define NULL_ARRAY_COMPRESSED_32
SQLTypes subtype
Definition: sqltypes.h:760
void set_scale(int s)
Definition: sqltypes.h:418
bool notnull
Definition: sqltypes.h:764
bool has_bounds() const
Definition: sqltypes.h:392
int64_t bigintval
Definition: sqltypes.h:209
HostArrayDatum(size_t const l, int8_t *p, CUSTOM_DELETER custom_deleter)
Definition: sqltypes.h:177
bool is_timeinterval() const
Definition: sqltypes.h:509
#define NULL_ARRAY_FLOAT
ManagedPtr data_ptr
Definition: sqltypes.h:186
auto generate_column_list_type(const SQLTypes subtype)
Definition: sqltypes.h:1009
HostArrayDatum()=default
int16_t smallintval
Definition: sqltypes.h:207
bool is_dict_encoded_type() const
Definition: sqltypes.h:543
SQLTypeInfo(SQLTypes t, int d, int s, bool n)
Definition: sqltypes.h:275
Datum StringToDatum(std::string_view s, SQLTypeInfo &ti)
Definition: Datum.cpp:239
std::string toString() const
Definition: sqltypes.h:465
bool is_boolean() const
Definition: sqltypes.h:505
HostArrayDatum(size_t const l, int8_t *p, bool const n)
Definition: sqltypes.h:171
bool g_enable_smem_group_by true
void operator()(int8_t *p)
Definition: sqltypes.h:160
SQLTypeInfo(SQLTypes t, int d, int s, bool n, EncodingType c, int p, SQLTypes st)
Definition: sqltypes.h:266
SQLTypeInfo(SQLTypes t)
Definition: sqltypes.h:303
#define NULL_BOOLEAN
std::string get_buffer_name() const
Definition: sqltypes.h:487
SQLTypeInfo(SQLTypes t, bool n, EncodingType c)
Definition: sqltypes.h:304
SQLTypeInfo get_array_type() const
Definition: sqltypes.h:730
EncodingType compression
Definition: sqltypes.h:765
int get_precision() const
Definition: sqltypes.h:326
std::string * stringval
Definition: sqltypes.h:214
void set_output_srid(int s)
Definition: sqltypes.h:419
bool is_buffer() const
Definition: sqltypes.h:516
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:419
auto generate_array_type(const SQLTypes subtype)
Definition: sqltypes.h:989
bool is_column() const
Definition: sqltypes.h:511
DEVICE DeviceArrayDatum()
Definition: sqltypes.h:190
HOST DEVICE bool is_null(const Datum &d) const
Definition: sqltypes.h:618
void set_comp_param(int p)
Definition: sqltypes.h:424
HOST DEVICE int get_storage_size() const
Definition: sqltypes.h:772
#define CHECK_LT(x, y)
Definition: Logger.h:216
Definition: sqltypes.h:51
Definition: sqltypes.h:52
static std::string comp_name[kENCODING_LAST]
Definition: sqltypes.h:770
int8_t * appendDatum(int8_t *buf, Datum d, const SQLTypeInfo &ti)
Definition: sqltypes.h:954
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:331
bool is_date_in_days() const
Definition: sqltypes.h:734
int get_array_context_logical_size() const
Definition: sqltypes.h:565
int64_t convert_decimal_value_to_scale(const int64_t decimal_value, const SQLTypeInfo &type_info, const SQLTypeInfo &new_type_info)
Definition: Datum.cpp:437
int32_t ArrayOffsetT
Definition: sqltypes.h:952
void set_dimension(int d)
Definition: sqltypes.h:415
HOST DEVICE int get_dimension() const
Definition: sqltypes.h:325
#define IS_INTEGER(T)
Definition: sqltypes.h:239
std::string get_type_name() const
Definition: sqltypes.h:426
Definition: sqltypes.h:40
#define IS_STRING(T)
Definition: sqltypes.h:244
HOST DEVICE int get_comp_param() const
Definition: sqltypes.h:332
HOST DEVICE int get_input_srid() const
Definition: sqltypes.h:327
#define NULL_TINYINT
#define NULL_ARRAY_DOUBLE
virtual DEVICE ~VarlenDatum()
Definition: sqltypes.h:150
bool is_bytes() const
Definition: sqltypes.h:513
bool is_column_list() const
Definition: sqltypes.h:512
bool g_enable_watchdog false
Definition: Execute.cpp:75
void set_notnull(bool n)
Definition: sqltypes.h:420
bool is_geometry() const
Definition: sqltypes.h:510
char * t
bool is_high_precision_timestamp() const
Definition: sqltypes.h:746
SQLTypes type
Definition: sqltypes.h:759
#define NULL_SMALLINT
HostArrayDatum(size_t const l, ManagedPtr p, bool const n)
Definition: sqltypes.h:168
#define NULL_ARRAY_BIGINT
bool is_dict_encoded_string() const
Definition: sqltypes.h:535
Definition: sqltypes.h:44
bool is_varlen_indeed() const
Definition: sqltypes.h:529
bool is_string() const
Definition: sqltypes.h:498
SQLTypeInfo(SQLTypes t, EncodingType c, int p, SQLTypes st)
Definition: sqltypes.h:284
bool transforms() const
Definition: sqltypes.h:519
SQLTypeInfo(SQLTypes t, bool n)
Definition: sqltypes.h:294
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:330
int8_t * numbersPtr
Definition: sqltypes.h:220
bool is_string_array() const
Definition: sqltypes.h:499
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:726
bool is_decimal() const
Definition: sqltypes.h:501
int get_physical_coord_cols() const
Definition: sqltypes.h:359
#define IS_NUMBER(T)
Definition: sqltypes.h:241
void operator()(int8_t *)
Definition: sqltypes.h:157
#define IS_GEO(T)
Definition: sqltypes.h:245
#define TRANSIENT_DICT(ID)
Definition: sqltypes.h:254
int comp_param
Definition: sqltypes.h:766
bool is_date() const
Definition: sqltypes.h:744
bool is_array() const
Definition: sqltypes.h:506
void set_precision(int d)
Definition: sqltypes.h:416
SQLTypeInfo get_nullable_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:940
int dimension
Definition: sqltypes.h:761
HOST DEVICE bool is_null_point_coord_array(const int8_t *val, int array_size) const
Definition: sqltypes.h:713
double doubleval
Definition: sqltypes.h:211
HOST DEVICE int get_output_srid() const
Definition: sqltypes.h:329
constexpr auto is_datetime(SQLTypes type)
Definition: sqltypes.h:257
HOST DEVICE bool is_null(const int8_t *val) const
Definition: sqltypes.h:655
size_t length
Definition: sqltypes.h:145
HOST DEVICE void set_type(SQLTypes t)
Definition: sqltypes.h:413