OmniSciDB  ca0c39ec8f
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
sqltypes.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
23 #pragma once
24 
25 #include "../Logger/Logger.h"
26 #include "Datum.h"
27 #include "funcannotations.h"
28 
29 #include <cassert>
30 #include <ctime>
31 #include <memory>
32 #include <ostream>
33 #include <sstream>
34 #include <string>
35 #include <type_traits>
36 #include <vector>
37 
38 namespace sql_constants {
39 /*
40 The largest precision an SQL type is allowed to specify is currently 18 digits,
41 however, the most precise numeric value we can represent is actually precise to 19 digits.
42 This means that we can be slightly more relaxed when doing internal calculations than when
43 setting column types (e.g. a CAST from double to numeric could use precision 19 as long as
44 it doesn't overflow but a column cannot be specified to have precision 19+).
45 */
46 constexpr static int32_t kMaxNumericPrecision =
47  std::numeric_limits<int64_t>::digits10; // 18
48 constexpr static int32_t kMaxRepresentableNumericPrecision =
49  kMaxNumericPrecision + 1; // 19
50 } // namespace sql_constants
51 
52 // must not change because these values persist in catalogs.
53 enum SQLTypes {
54  kNULLT = 0, // type for null values
55  kBOOLEAN = 1,
56  kCHAR = 2,
57  kVARCHAR = 3,
58  kNUMERIC = 4,
59  kDECIMAL = 5,
60  kINT = 6,
61  kSMALLINT = 7,
62  kFLOAT = 8,
63  kDOUBLE = 9,
64  kTIME = 10,
65  kTIMESTAMP = 11,
66  kBIGINT = 12,
67  kTEXT = 13,
68  kDATE = 14,
69  kARRAY = 15,
72  kPOINT = 18,
74  kPOLYGON = 20,
76  kTINYINT = 22,
77  kGEOMETRY = 23,
78  kGEOGRAPHY = 24,
79  kEVAL_CONTEXT_TYPE = 25, // Placeholder Type for ANY
80  kVOID = 26,
81  kCURSOR = 27,
82  kCOLUMN = 28,
87 };
88 
89 #if !(defined(__CUDACC__) || defined(NO_BOOST))
90 
91 inline std::string toString(const SQLTypes& type) {
92  switch (type) {
93  case kNULLT:
94  return "NULL";
95  case kBOOLEAN:
96  return "BOOL";
97  case kCHAR:
98  return "CHAR";
99  case kVARCHAR:
100  return "VARCHAR";
101  case kNUMERIC:
102  return "NUMERIC";
103  case kDECIMAL:
104  return "DECIMAL";
105  case kINT:
106  return "INT";
107  case kSMALLINT:
108  return "SMALLINT";
109  case kFLOAT:
110  return "FLOAT";
111  case kDOUBLE:
112  return "DOUBLE";
113  case kTIME:
114  return "TIME";
115  case kTIMESTAMP:
116  return "TIMESTAMP";
117  case kBIGINT:
118  return "BIGINT";
119  case kTEXT:
120  return "TEXT";
121  case kDATE:
122  return "DATE";
123  case kARRAY:
124  return "ARRAY";
125  case kINTERVAL_DAY_TIME:
126  return "DAY TIME INTERVAL";
128  return "YEAR MONTH INTERVAL";
129  case kPOINT:
130  return "POINT";
131  case kMULTIPOINT:
132  return "MULTIPOINT";
133  case kLINESTRING:
134  return "LINESTRING";
135  case kMULTILINESTRING:
136  return "MULTILINESTRING";
137  case kPOLYGON:
138  return "POLYGON";
139  case kMULTIPOLYGON:
140  return "MULTIPOLYGON";
141  case kTINYINT:
142  return "TINYINT";
143  case kGEOMETRY:
144  return "GEOMETRY";
145  case kGEOGRAPHY:
146  return "GEOGRAPHY";
147  case kEVAL_CONTEXT_TYPE:
148  return "UNEVALUATED ANY";
149  case kVOID:
150  return "VOID";
151  case kCURSOR:
152  return "CURSOR";
153  case kCOLUMN:
154  return "COLUMN";
155  case kCOLUMN_LIST:
156  return "COLUMN_LIST";
157  case kSQLTYPE_LAST:
158  break;
159  }
160  LOG(FATAL) << "Invalid SQL type: " << type;
161  return "";
162 }
163 
164 inline std::ostream& operator<<(std::ostream& os, SQLTypes const sql_type) {
165  os << toString(sql_type);
166  return os;
167 }
168 
169 #endif // #if !(defined(__CUDACC__) || defined(NO_BOOST))
170 
172  void operator()(int8_t*) {}
173 };
174 struct FreeDeleter {
175  void operator()(int8_t* p) { free(p); }
176 };
177 
178 struct HostArrayDatum : public VarlenDatum {
179  using ManagedPtr = std::shared_ptr<int8_t>;
180 
181  HostArrayDatum() = default;
182 
183  HostArrayDatum(size_t const l, ManagedPtr p, bool const n)
184  : VarlenDatum(l, p.get(), n), data_ptr(p) {}
185 
186  HostArrayDatum(size_t const l, int8_t* p, bool const n)
187  : VarlenDatum(l, p, n), data_ptr(p, FreeDeleter()){};
188 
189  template <typename CUSTOM_DELETER,
190  typename = std::enable_if_t<
191  std::is_void<std::result_of_t<CUSTOM_DELETER(int8_t*)> >::value> >
192  HostArrayDatum(size_t const l, int8_t* p, CUSTOM_DELETER custom_deleter)
193  : VarlenDatum(l, p, 0 == l), data_ptr(p, custom_deleter) {}
194 
195  template <typename CUSTOM_DELETER,
196  typename = std::enable_if_t<
197  std::is_void<std::result_of_t<CUSTOM_DELETER(int8_t*)> >::value> >
198  HostArrayDatum(size_t const l, int8_t* p, bool const n, CUSTOM_DELETER custom_deleter)
199  : VarlenDatum(l, p, n), data_ptr(p, custom_deleter) {}
200 
202 };
203 
204 struct DeviceArrayDatum : public VarlenDatum {
206 };
207 
208 inline DEVICE constexpr bool is_cuda_compiler() {
209 #ifdef __CUDACC__
210  return true;
211 #else
212  return false;
213 #endif
214 }
215 
216 using ArrayDatum =
217  std::conditional_t<is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum>;
218 
219 #ifndef __CUDACC__
221  int8_t* numbersPtr;
222  std::vector<std::string>* stringsPtr;
223  std::vector<ArrayDatum>* arraysPtr;
224 };
225 #endif
226 
227 // must not change because these values persist in catalogs.
229  kENCODING_NONE = 0, // no encoding
230  kENCODING_FIXED = 1, // Fixed-bit encoding
231  kENCODING_RL = 2, // Run Length encoding
232  kENCODING_DIFF = 3, // Differential encoding
233  kENCODING_DICT = 4, // Dictionary encoding
234  kENCODING_SPARSE = 5, // Null encoding for sparse columns
235  kENCODING_GEOINT = 6, // Encoding coordinates as intergers
236  kENCODING_DATE_IN_DAYS = 7, // Date encoding in days
237  kENCODING_ARRAY = 8, // Array encoding for columns of arrays
238  kENCODING_ARRAY_DICT = 9, // Array encoding for columns of text encoding dict arrays
240 };
241 
242 #if !(defined(__CUDACC__) || defined(NO_BOOST))
243 
244 inline std::ostream& operator<<(std::ostream& os, EncodingType const type) {
245  switch (type) {
246  case kENCODING_NONE:
247  os << "NONE";
248  break;
249  case kENCODING_FIXED:
250  os << "FIXED";
251  break;
252  case kENCODING_RL:
253  os << "RL";
254  break;
255  case kENCODING_DIFF:
256  os << "DIFF";
257  break;
258  case kENCODING_DICT:
259  os << "DICT";
260  break;
261  case kENCODING_SPARSE:
262  os << "SPARSE";
263  break;
264  case kENCODING_GEOINT:
265  os << "GEOINT";
266  break;
268  os << "DATE_IN_DAYS";
269  break;
270  case kENCODING_ARRAY:
271  os << "ARRAY";
272  break;
274  os << "ARRAY_DICT";
275  break;
276  case kENCODING_LAST:
277  break;
278  default:
279  LOG(FATAL) << "Invalid EncodingType: " << type;
280  }
281  return os;
282 }
283 
284 inline std::string toString(const EncodingType& type) {
285  std::ostringstream ss;
286  ss << type;
287  return ss.str();
288 }
289 
290 #endif // #if !(defined(__CUDACC__) || defined(NO_BOOST))
291 
292 #define IS_INTEGER(T) \
293  (((T) == kINT) || ((T) == kSMALLINT) || ((T) == kBIGINT) || ((T) == kTINYINT))
294 #define IS_NUMBER(T) \
295  (((T) == kINT) || ((T) == kSMALLINT) || ((T) == kDOUBLE) || ((T) == kFLOAT) || \
296  ((T) == kBIGINT) || ((T) == kNUMERIC) || ((T) == kDECIMAL) || ((T) == kTINYINT))
297 #define IS_STRING(T) (((T) == kTEXT) || ((T) == kVARCHAR) || ((T) == kCHAR))
298 #define IS_GEO(T) \
299  (((T) == kPOINT) || ((T) == kLINESTRING) || ((T) == kMULTILINESTRING) || \
300  ((T) == kMULTIPOINT) || ((T) == kPOLYGON) || ((T) == kMULTIPOLYGON))
301 #define IS_INTERVAL(T) ((T) == kINTERVAL_DAY_TIME || (T) == kINTERVAL_YEAR_MONTH)
302 #define IS_DECIMAL(T) ((T) == kNUMERIC || (T) == kDECIMAL)
303 #define IS_GEO_POLY(T) (((T) == kPOLYGON) || ((T) == kMULTIPOLYGON))
304 #define IS_GEO_LINE(T) (((T) == kLINESTRING) || ((T) == kMULTILINESTRING))
305 #define IS_GEO_MULTI(T) (((T) == kMULTIPOLYGON) || ((T) == kMULTILINESTRING)) || ((T) == kMULTIPOINT))
306 
307 #include "InlineNullValues.h"
308 
309 #define TRANSIENT_DICT_ID 0
310 #define TRANSIENT_DICT(ID) (-(ID))
311 #define REGULAR_DICT(TRANSIENTID) (-(TRANSIENTID))
312 
313 constexpr auto is_datetime(SQLTypes type) {
314  return type == kTIME || type == kTIMESTAMP || type == kDATE;
315 }
316 
317 // @type SQLTypeInfo
318 // @brief a structure to capture all type information including
319 // length, precision, scale, etc.
320 class SQLTypeInfo {
321  public:
322  SQLTypeInfo(SQLTypes t, int d, int s, bool n, EncodingType c, int p, SQLTypes st)
323  : type(t)
324  , subtype(st)
325  , dimension(d)
326  , scale(s)
327  , notnull(n)
328  , compression(c)
329  , comp_param(p)
330  , size(get_storage_size()) {}
331  SQLTypeInfo(SQLTypes t, int d, int s, bool n)
332  : type(t)
333  , subtype(kNULLT)
334  , dimension(d)
335  , scale(s)
336  , notnull(n)
338  , comp_param(0)
339  , size(get_storage_size()) {}
341  : type(t)
342  , subtype(st)
343  , dimension(0)
344  , scale(0)
345  , notnull(false)
346  , compression(c)
347  , comp_param(p)
348  , size(get_storage_size()) {}
349  SQLTypeInfo(SQLTypes t, int d, int s) : SQLTypeInfo(t, d, s, false) {}
351  : type(t)
352  , subtype(kNULLT)
353  , dimension(0)
354  , scale(0)
355  , notnull(n)
357  , comp_param(0)
358  , size(get_storage_size()) {}
361  : type(t)
362  , subtype(kNULLT)
363  , dimension(0)
364  , scale(0)
365  , notnull(n)
366  , compression(c)
367  , comp_param(0)
368  , size(get_storage_size()) {}
370  : type(kNULLT)
371  , subtype(kNULLT)
372  , dimension(0)
373  , scale(0)
374  , notnull(false)
376  , comp_param(0)
377  , size(0) {}
378 
379  HOST DEVICE inline SQLTypes get_type() const { return type; }
380  HOST DEVICE inline SQLTypes get_subtype() const { return subtype; }
381  HOST DEVICE inline int get_dimension() const { return dimension; }
382  inline int get_precision() const { return dimension; }
383  HOST DEVICE inline int get_input_srid() const { return dimension; }
384  HOST DEVICE inline int get_scale() const { return scale; }
385  HOST DEVICE inline int get_output_srid() const { return scale; }
386  HOST DEVICE inline bool get_notnull() const { return notnull; }
388  HOST DEVICE inline int get_comp_param() const { return comp_param; }
389  HOST DEVICE inline int get_size() const { return size; }
390 
391  inline int is_logical_geo_type() const {
392  if (type == kPOINT || type == kLINESTRING || type == kMULTILINESTRING ||
393  type == kMULTIPOINT || type == kPOLYGON || type == kMULTIPOLYGON) {
394  return true;
395  }
396  return false;
397  }
398 
399  inline int get_logical_size() const {
402  return ti.get_size();
403  }
404  if (compression == kENCODING_DICT) {
405  return 4;
406  }
407  return get_size();
408  }
409 
410  inline int get_physical_cols() const {
411  switch (type) {
412  case kPOINT:
413  return 1; // coords
414  case kMULTIPOINT:
415  return 2; // coords, bounds
416  case kLINESTRING:
417  return 2; // coords, bounds
418  case kMULTILINESTRING:
419  return 3; // coords, linestring_sizes, bounds
420  case kPOLYGON:
421  return 4; // coords, ring_sizes, bounds, render_group
422  case kMULTIPOLYGON:
423  return 5; // coords, ring_sizes, poly_rings, bounds, render_group
424  default:
425  break;
426  }
427  return 0;
428  }
429  inline int get_physical_coord_cols() const {
430  // @TODO dmitri/simon rename this function?
431  // It needs to return the number of extra columns
432  // which need to go through the executor, as opposed
433  // to those which are only needed by CPU for poly
434  // cache building or what-not. For now, we just omit
435  // the Render Group column. If we add Bounding Box
436  // or something this may require rethinking. Perhaps
437  // these two functions need to return an array of
438  // offsets rather than just a number to loop over,
439  // so that executor and non-executor columns can
440  // be mixed.
441  // NOTE(adb): In binding to extension functions, we need to know some pretty specific
442  // type info about each of the physical coords cols for each geo type. I added checks
443  // there to ensure the physical coords col for the geo type match what we expect. If
444  // these values are ever changed, corresponding values in
445  // ExtensionFunctionsBinding.cpp::compute_narrowing_conv_scores and
446  // ExtensionFunctionsBinding.cpp::compute_widening_conv_scores will also need to be
447  // changed.
448  switch (type) {
449  case kPOINT:
450  return 1;
451  case kMULTIPOINT:
452  return 1; // omit bounds
453  case kLINESTRING:
454  return 1; // omit bounds
455  case kMULTILINESTRING:
456  return 2; // omit bounds
457  case kPOLYGON:
458  return 2; // omit bounds, render group
459  case kMULTIPOLYGON:
460  return 3; // omit bounds, render group
461  default:
462  break;
463  }
464  return 0;
465  }
466  inline bool has_bounds() const {
467  switch (type) {
468  case kMULTIPOINT:
469  case kLINESTRING:
470  case kMULTILINESTRING:
471  case kPOLYGON:
472  case kMULTIPOLYGON:
473  return true;
474  default:
475  break;
476  }
477  return false;
478  }
479  inline bool has_render_group() const {
480  switch (type) {
481  case kPOLYGON:
482  case kMULTIPOLYGON:
483  return true;
484  default:
485  break;
486  }
487  return false;
488  }
489  HOST DEVICE inline void set_type(SQLTypes t) { type = t; }
490  HOST DEVICE inline void set_subtype(SQLTypes st) { subtype = st; }
491  inline void set_dimension(int d) { dimension = d; }
492  inline void set_precision(int d) { dimension = d; }
493  inline void set_input_srid(int d) { dimension = d; }
494  inline void set_scale(int s) { scale = s; }
495  inline void set_output_srid(int s) { scale = s; }
496  inline void set_notnull(bool n) { notnull = n; }
497  inline void set_size(int s) { size = s; }
498  inline void set_fixed_size() { size = get_storage_size(); }
499  inline void set_dict_intersection() { dict_intersection = true; }
500  inline void set_compression(EncodingType c) { compression = c; }
501  inline void set_comp_param(int p) { comp_param = p; }
502 #ifndef __CUDACC__
503  inline std::string get_type_name() const {
504  if (IS_GEO(type)) {
505  std::string srid_string = "";
506  if (get_output_srid() > 0) {
507  srid_string = ", " + std::to_string(get_output_srid());
508  }
509  CHECK_LT(static_cast<int>(subtype), kSQLTYPE_LAST);
510  return type_name[static_cast<int>(subtype)] + "(" +
511  type_name[static_cast<int>(type)] + srid_string + ")";
512  }
513  std::string ps = "";
514  if (type == kDECIMAL || type == kNUMERIC) {
515  ps = "(" + std::to_string(dimension) + "," + std::to_string(scale) + ")";
516  } else if (type == kTIMESTAMP) {
517  ps = "(" + std::to_string(dimension) + ")";
518  }
519  if (type == kARRAY) {
520  auto elem_ti = get_elem_type();
521  auto num_elems = (size > 0) ? std::to_string(size / elem_ti.get_size()) : "";
522  CHECK_LT(static_cast<int>(subtype), kSQLTYPE_LAST);
523  return elem_ti.get_type_name() + ps + "[" + num_elems + "]";
524  }
525  if (type == kCOLUMN) {
526  auto elem_ti = get_elem_type();
527  auto num_elems =
528  (size > 0) ? "[" + std::to_string(size / elem_ti.get_size()) + "]" : "";
529  CHECK_LT(static_cast<int>(subtype), kSQLTYPE_LAST);
530  return "COLUMN<" + elem_ti.get_type_name() + ps + ">" + num_elems;
531  }
532  if (type == kCOLUMN_LIST) {
533  auto elem_ti = get_elem_type();
534  auto num_elems =
535  (size > 0) ? "[" + std::to_string(size / elem_ti.get_size()) + "]" : "";
536  CHECK_LT(static_cast<int>(subtype), kSQLTYPE_LAST);
537  return "COLUMN_LIST<" + elem_ti.get_type_name() + ps + ">" + num_elems;
538  }
539  return type_name[static_cast<int>(type)] + ps;
540  }
541  inline std::string get_compression_name() const { return comp_name[(int)compression]; }
542  std::string toString() const { return to_string(); } // for PRINT macro
543  inline std::string to_string() const {
544  std::ostringstream oss;
545  oss << "(type=" << type_name[static_cast<int>(type)]
546  << ", dimension=" << get_dimension() << ", scale=" << get_scale()
547  << ", null=" << (get_notnull() ? "not nullable" : "nullable")
548  << ", compression_name=" << get_compression_name()
549  << ", comp_param=" << get_comp_param()
550  << ", subtype=" << type_name[static_cast<int>(subtype)] << ", size=" << get_size()
551  << ", element_size=" << get_elem_type().get_size() << ")";
552  return oss.str();
553  }
554 
555  inline std::string get_buffer_name() const {
556  if (is_array()) {
557  return "Array";
558  }
559  if (is_bytes()) {
560  return "Bytes";
561  }
562 
563  if (is_column()) {
564  return "Column";
565  }
566 
567  assert(false);
568  return "";
569  }
570 #endif
571  template <typename... Types>
572  bool is_any(Types... types) const {
573  return (... || (types == type));
574  }
575  inline bool is_string() const { return IS_STRING(type); }
576  inline bool is_string_array() const { return (type == kARRAY) && IS_STRING(subtype); }
577  inline bool is_integer() const { return IS_INTEGER(type); }
578  inline bool is_decimal() const { return type == kDECIMAL || type == kNUMERIC; }
579  inline bool is_fp() const { return type == kFLOAT || type == kDOUBLE; }
580  inline bool is_number() const { return IS_NUMBER(type); }
581  inline bool is_time() const { return is_datetime(type); }
582  inline bool is_boolean() const { return type == kBOOLEAN; }
583  inline bool is_array() const { return type == kARRAY; } // Array
584  inline bool is_varlen_array() const { return type == kARRAY && size <= 0; }
585  inline bool is_fixlen_array() const { return type == kARRAY && size > 0; }
586  inline bool is_timeinterval() const { return IS_INTERVAL(type); }
587  inline bool is_geometry() const { return IS_GEO(type); }
588  inline bool is_column() const { return type == kCOLUMN; } // Column
589  inline bool is_column_list() const { return type == kCOLUMN_LIST; } // ColumnList
590  inline bool is_column_array() const {
591  const auto c = get_compression();
592  return type == kCOLUMN && (c == kENCODING_ARRAY || c == kENCODING_ARRAY_DICT);
593  } // ColumnArray
594  inline bool is_column_list_array() const {
595  const auto c = get_compression();
596  return type == kCOLUMN_LIST && (c == kENCODING_ARRAY || c == kENCODING_ARRAY_DICT);
597  } // ColumnList of ColumnArray
598  inline bool is_bytes() const {
599  return type == kTEXT && get_compression() == kENCODING_NONE;
600  }
601  inline bool is_text_encoding_dict() const {
602  return type == kTEXT && get_compression() == kENCODING_DICT;
603  }
604  inline bool is_text_encoding_dict_array() const {
605  return type == kARRAY && subtype == kTEXT && get_compression() == kENCODING_DICT;
606  }
607  inline bool is_buffer() const {
608  return is_array() || is_column() || is_column_list() || is_bytes();
609  }
610  inline bool transforms() const {
611  return IS_GEO(type) && get_input_srid() > 0 && get_output_srid() > 0 &&
613  }
614 
615  inline bool is_varlen() const { // TODO: logically this should ignore fixlen arrays
616  return (IS_STRING(type) && compression != kENCODING_DICT) || type == kARRAY ||
617  IS_GEO(type);
618  }
619 
620  // need this here till is_varlen can be fixed w/o negative impact to existing code
621  inline bool is_varlen_indeed() const {
622  // SQLTypeInfo.is_varlen() is broken with fixedlen array now
623  // and seems left broken for some concern, so fix it locally
624  return is_varlen() && !is_fixlen_array();
625  }
626 
627  inline bool is_dict_encoded_string() const {
628  return is_string() && compression == kENCODING_DICT;
629  }
630 
631  inline bool is_none_encoded_string() const {
632  return is_string() && compression == kENCODING_NONE;
633  }
634 
635  inline bool is_subtype_dict_encoded_string() const {
637  }
638 
639  inline bool is_dict_encoded_type() const {
640  return is_dict_encoded_string() ||
642  }
643 
644  inline bool is_dict_intersection() const { return dict_intersection; }
645 
646  inline bool has_same_itemtype(const SQLTypeInfo& other) const {
647  if ((is_column() || is_column_list()) &&
648  (other.is_column() || other.is_column_list())) {
649  return subtype == other.get_subtype() &&
651  compression == other.get_compression());
652  }
653  return subtype == other.get_subtype();
654  }
655 
656  HOST DEVICE inline bool operator!=(const SQLTypeInfo& rhs) const {
657  return type != rhs.get_type() || subtype != rhs.get_subtype() ||
658  dimension != rhs.get_dimension() || scale != rhs.get_scale() ||
659  compression != rhs.get_compression() ||
662  notnull != rhs.get_notnull();
663  }
664  HOST DEVICE inline bool operator==(const SQLTypeInfo& rhs) const {
665  return type == rhs.get_type() && subtype == rhs.get_subtype() &&
666  dimension == rhs.get_dimension() && scale == rhs.get_scale() &&
667  compression == rhs.get_compression() &&
670  notnull == rhs.get_notnull();
671  }
672 
673  inline int get_array_context_logical_size() const {
674  if (is_string()) {
675  auto comp_type(get_compression());
676  if (comp_type == kENCODING_DICT || comp_type == kENCODING_FIXED ||
677  comp_type == kENCODING_NONE) {
678  return sizeof(int32_t);
679  }
680  }
681  return get_logical_size();
682  }
683 
684  HOST DEVICE inline void operator=(const SQLTypeInfo& rhs) {
685  type = rhs.get_type();
686  subtype = rhs.get_subtype();
687  dimension = rhs.get_dimension();
688  scale = rhs.get_scale();
689  notnull = rhs.get_notnull();
691  comp_param = rhs.get_comp_param();
692  size = rhs.get_size();
693  }
694 
695  inline bool is_castable(const SQLTypeInfo& new_type_info) const {
696  // can always cast between the same type but different precision/scale/encodings
697  if (type == new_type_info.get_type()) {
698  return true;
699  // can always cast between strings
700  } else if (is_string() && new_type_info.is_string()) {
701  return true;
702  } else if (is_string() && !new_type_info.is_string()) {
703  return false;
704  } else if (!is_string() && new_type_info.is_string()) {
705  return false;
706  // can cast between numbers
707  } else if (is_number() && new_type_info.is_number()) {
708  return true;
709  // can cast from timestamp or date to number (epoch)
710  } else if ((type == kTIMESTAMP || type == kDATE) && new_type_info.is_number()) {
711  return true;
712  // can cast from number (epoch) to timestamp, date, or time
713  } else if (is_number() && new_type_info.is_time()) {
714  return true;
715  // can cast from date to timestamp
716  } else if (type == kDATE && new_type_info.get_type() == kTIMESTAMP) {
717  return true;
718  } else if (type == kTIMESTAMP && new_type_info.get_type() == kDATE) {
719  return true;
720  } else if (type == kTIMESTAMP && new_type_info.get_type() == kTIME) {
721  return true;
722  } else if (type == kBOOLEAN && new_type_info.is_number()) {
723  return true;
724  } else if (type == kARRAY && new_type_info.get_type() == kARRAY) {
725  return get_elem_type().is_castable(new_type_info.get_elem_type());
726  } else if (type == kCOLUMN && new_type_info.get_type() == kCOLUMN) {
727  return get_elem_type().is_castable(new_type_info.get_elem_type());
728  } else if (type == kCOLUMN_LIST && new_type_info.get_type() == kCOLUMN_LIST) {
729  return get_elem_type().is_castable(new_type_info.get_elem_type());
730  } else {
731  return false;
732  }
733  }
734 
743  inline bool is_numeric_scalar_auto_castable(const SQLTypeInfo& new_type_info) const {
744  const auto& new_type = new_type_info.get_type();
745  switch (type) {
746  case kBOOLEAN:
747  return new_type == kBOOLEAN;
748  case kTINYINT:
749  case kSMALLINT:
750  case kINT:
751  if (!new_type_info.is_number()) {
752  return false;
753  }
754  if (new_type_info.is_fp()) {
755  // We can lose precision here, but preserving existing behavior
756  return true;
757  }
758  return new_type_info.get_logical_size() >= get_logical_size();
759  case kBIGINT:
760  return new_type == kBIGINT || new_type == kDOUBLE || new_type == kFLOAT;
761  case kFLOAT:
762  case kDOUBLE:
763  if (!new_type_info.is_fp()) {
764  return false;
765  }
766  return (new_type_info.get_logical_size() >= get_logical_size());
767  case kDECIMAL:
768  case kNUMERIC:
769  switch (new_type) {
770  case kDECIMAL:
771  case kNUMERIC:
772  return new_type_info.get_dimension() >= get_dimension();
773  case kDOUBLE:
774  return true;
775  case kFLOAT:
776  return get_dimension() <= 7;
777  default:
778  return false;
779  }
780  case kTIMESTAMP:
781  if (new_type != kTIMESTAMP) {
782  return false;
783  }
784  return new_type_info.get_dimension() >= get_dimension();
785  case kDATE:
786  return new_type == kDATE;
787  case kTIME:
788  return new_type == kTIME;
789  default:
790  UNREACHABLE();
791  return false;
792  }
793  }
794 
804  inline int32_t get_numeric_scalar_scale() const {
805  CHECK(type == kBOOLEAN || type == kTINYINT || type == kSMALLINT || type == kINT ||
806  type == kBIGINT || type == kFLOAT || type == kDOUBLE || type == kDECIMAL ||
807  type == kNUMERIC || type == kTIMESTAMP || type == kDATE || type == kTIME);
808  switch (type) {
809  case kBOOLEAN:
810  return 1;
811  case kTINYINT:
812  case kSMALLINT:
813  case kINT:
814  case kBIGINT:
815  case kFLOAT:
816  case kDOUBLE:
817  return get_logical_size();
818  case kDECIMAL:
819  case kNUMERIC:
820  if (get_dimension() > 7) {
821  return 8;
822  } else {
823  return 4;
824  }
825  case kTIMESTAMP:
826  switch (get_dimension()) {
827  case 9:
828  return 8;
829  case 6:
830  return 4;
831  case 3:
832  return 2;
833  case 0:
834  return 1;
835  default:
836  UNREACHABLE();
837  }
838  case kDATE:
839  return 1;
840  case kTIME:
841  return 1;
842  default:
843  UNREACHABLE();
844  return 0;
845  }
846  }
847 
848  HOST DEVICE inline bool is_null(const Datum& d) const {
849  // assuming Datum is always uncompressed
850  switch (type) {
851  case kBOOLEAN:
852  return (int8_t)d.boolval == NULL_BOOLEAN;
853  case kTINYINT:
854  return d.tinyintval == NULL_TINYINT;
855  case kSMALLINT:
856  return d.smallintval == NULL_SMALLINT;
857  case kINT:
858  return d.intval == NULL_INT;
859  case kBIGINT:
860  case kNUMERIC:
861  case kDECIMAL:
862  return d.bigintval == NULL_BIGINT;
863  case kFLOAT:
864  return d.floatval == NULL_FLOAT;
865  case kDOUBLE:
866  return d.doubleval == NULL_DOUBLE;
867  case kTIME:
868  case kTIMESTAMP:
869  case kDATE:
870  return d.bigintval == NULL_BIGINT;
871  case kTEXT:
872  case kVARCHAR:
873  case kCHAR:
874  // @TODO handle null strings
875  break;
876  case kNULLT:
877  return true;
878  case kARRAY:
879  return d.arrayval == NULL || d.arrayval->is_null;
880  default:
881  break;
882  }
883  return false;
884  }
885  HOST DEVICE inline bool is_null(const int8_t* val) const {
886  if (type == kFLOAT) {
887  return *(float*)val == NULL_FLOAT;
888  }
889  if (type == kDOUBLE) {
890  return *(double*)val == NULL_DOUBLE;
891  }
892  // val can be either compressed or uncompressed
893  switch (size) {
894  case 1:
895  return *val == NULL_TINYINT;
896  case 2:
897  return *(int16_t*)val == NULL_SMALLINT;
898  case 4:
899  return *(int32_t*)val == NULL_INT;
900  case 8:
901  return *(int64_t*)val == NULL_BIGINT;
902  case kNULLT:
903  return true;
904  default:
905  // @TODO(wei) handle null strings
906  break;
907  }
908  return false;
909  }
910  HOST DEVICE inline bool is_null_fixlen_array(const int8_t* val, int array_size) const {
911  // Check if fixed length array has a NULL_ARRAY sentinel as the first element
912  if (type == kARRAY && val && array_size > 0 && array_size == size) {
913  // Need to create element type to get the size, but can't call get_elem_type()
914  // since this is a HOST DEVICE function. Going through copy constructor instead.
915  auto elem_ti{*this};
916  elem_ti.set_type(subtype);
917  elem_ti.set_subtype(kNULLT);
918  auto elem_size = elem_ti.get_storage_size();
919  if (elem_size < 1) {
920  return false;
921  }
922  if (subtype == kFLOAT) {
923  return *(float*)val == NULL_ARRAY_FLOAT;
924  }
925  if (subtype == kDOUBLE) {
926  return *(double*)val == NULL_ARRAY_DOUBLE;
927  }
928  switch (elem_size) {
929  case 1:
930  return *val == NULL_ARRAY_TINYINT;
931  case 2:
932  return *(int16_t*)val == NULL_ARRAY_SMALLINT;
933  case 4:
934  return *(int32_t*)val == NULL_ARRAY_INT;
935  case 8:
936  return *(int64_t*)val == NULL_ARRAY_BIGINT;
937  default:
938  return false;
939  }
940  }
941  return false;
942  }
943  HOST DEVICE inline bool is_null_point_coord_array(const int8_t* val,
944  int array_size) const {
945  if (type == kARRAY && subtype == kTINYINT && val && array_size > 0 &&
946  array_size == size) {
947  if (array_size == 2 * sizeof(double)) {
948  return *(double*)val == NULL_ARRAY_DOUBLE;
949  }
950  if (array_size == 2 * sizeof(int32_t)) {
951  return *(uint32_t*)val == NULL_ARRAY_COMPRESSED_32;
952  }
953  }
954  return false;
955  }
956  inline SQLTypeInfo get_elem_type() const {
957  if ((type == kCOLUMN || type == kCOLUMN_LIST) && compression == kENCODING_ARRAY) {
958  return SQLTypeInfo(
960  }
961  if ((type == kCOLUMN || type == kCOLUMN_LIST) &&
963  return SQLTypeInfo(
965  }
966  return SQLTypeInfo(
968  }
969  inline SQLTypeInfo get_array_type() const {
971  }
972 
973  inline bool is_date_in_days() const {
974  if (type == kDATE) {
975  const auto comp_type = get_compression();
976  if (comp_type == kENCODING_DATE_IN_DAYS) {
977  return true;
978  }
979  }
980  return false;
981  }
982 
983  inline bool is_date() const { return type == kDATE; }
984 
985  inline bool is_high_precision_timestamp() const {
986  if (type == kTIMESTAMP) {
987  const auto dimension = get_dimension();
988  if (dimension > 0) {
989  return true;
990  }
991  }
992  return false;
993  }
994 
995  inline bool is_timestamp() const { return type == kTIMESTAMP; }
996 
997  private:
998  SQLTypes type; // type id
999  SQLTypes subtype; // element type of arrays or columns
1000  int dimension; // VARCHAR/CHAR length or NUMERIC/DECIMAL precision or COLUMN_LIST
1001  // length or TIMESTAMP precision
1002  int scale; // NUMERIC/DECIMAL scale
1003  bool notnull; // nullable? a hint, not used for type checking
1004  EncodingType compression; // compression scheme
1005  int comp_param; // compression parameter when applicable for certain schemes
1006  int size; // size of the type in bytes. -1 for variable size
1007  bool dict_intersection{false};
1008 #ifndef __CUDACC__
1009  static std::string type_name[kSQLTYPE_LAST];
1010  static std::string comp_name[kENCODING_LAST];
1011 #endif
1012  HOST DEVICE inline int get_storage_size() const {
1013  switch (type) {
1014  case kBOOLEAN:
1015  return sizeof(int8_t);
1016  case kTINYINT:
1017  return sizeof(int8_t);
1018  case kSMALLINT:
1019  switch (compression) {
1020  case kENCODING_NONE:
1021  return sizeof(int16_t);
1022  case kENCODING_FIXED:
1023  case kENCODING_SPARSE:
1024  return comp_param / 8;
1025  case kENCODING_RL:
1026  case kENCODING_DIFF:
1027  break;
1028  default:
1029  assert(false);
1030  }
1031  break;
1032  case kINT:
1033  switch (compression) {
1034  case kENCODING_NONE:
1035  return sizeof(int32_t);
1036  case kENCODING_FIXED:
1037  case kENCODING_SPARSE:
1038  case kENCODING_GEOINT:
1039  return comp_param / 8;
1040  case kENCODING_RL:
1041  case kENCODING_DIFF:
1042  break;
1043  default:
1044  assert(false);
1045  }
1046  break;
1047  case kBIGINT:
1048  case kNUMERIC:
1049  case kDECIMAL:
1050  switch (compression) {
1051  case kENCODING_NONE:
1052  return sizeof(int64_t);
1053  case kENCODING_FIXED:
1054  case kENCODING_SPARSE:
1055  return comp_param / 8;
1056  case kENCODING_RL:
1057  case kENCODING_DIFF:
1058  break;
1059  default:
1060  assert(false);
1061  }
1062  break;
1063  case kFLOAT:
1064  switch (compression) {
1065  case kENCODING_NONE:
1066  return sizeof(float);
1067  case kENCODING_FIXED:
1068  case kENCODING_RL:
1069  case kENCODING_DIFF:
1070  case kENCODING_SPARSE:
1071  assert(false);
1072  break;
1073  default:
1074  assert(false);
1075  }
1076  break;
1077  case kDOUBLE:
1078  switch (compression) {
1079  case kENCODING_NONE:
1080  return sizeof(double);
1081  case kENCODING_FIXED:
1082  case kENCODING_RL:
1083  case kENCODING_DIFF:
1084  case kENCODING_SPARSE:
1085  assert(false);
1086  break;
1087  default:
1088  assert(false);
1089  }
1090  break;
1091  case kTIMESTAMP:
1092  case kTIME:
1093  case kINTERVAL_DAY_TIME:
1094  case kINTERVAL_YEAR_MONTH:
1095  case kDATE:
1096  switch (compression) {
1097  case kENCODING_NONE:
1098  return sizeof(int64_t);
1099  case kENCODING_FIXED:
1100  if (type == kTIMESTAMP && dimension > 0) {
1101  assert(false); // disable compression for timestamp precisions
1102  }
1103  return comp_param / 8;
1104  case kENCODING_RL:
1105  case kENCODING_DIFF:
1106  case kENCODING_SPARSE:
1107  assert(false);
1108  break;
1110  switch (comp_param) {
1111  case 0:
1112  return 4; // Default date encoded in days is 32 bits
1113  case 16:
1114  case 32:
1115  return comp_param / 8;
1116  default:
1117  assert(false);
1118  break;
1119  }
1120  default:
1121  assert(false);
1122  }
1123  break;
1124  case kTEXT:
1125  case kVARCHAR:
1126  case kCHAR:
1127  if (compression == kENCODING_DICT) {
1128  return sizeof(int32_t); // @TODO(wei) must check DictDescriptor
1129  }
1130  break;
1131  case kARRAY:
1132  // TODO: return size for fixlen arrays?
1133  break;
1134  case kPOINT:
1135  case kMULTIPOINT:
1136  case kLINESTRING:
1137  case kMULTILINESTRING:
1138  case kPOLYGON:
1139  case kMULTIPOLYGON:
1140  case kCOLUMN:
1141  case kCOLUMN_LIST:
1142  break;
1143  default:
1144  break;
1145  }
1146  return -1;
1147  }
1148 };
1149 
1151 
1153 
1154 #ifndef __CUDACC__
1155 #include <string_view>
1156 
1157 Datum NullDatum(const SQLTypeInfo& ti);
1158 bool IsNullDatum(const Datum d, const SQLTypeInfo& ti);
1159 Datum StringToDatum(const std::string_view s, SQLTypeInfo& ti);
1160 std::string DatumToString(const Datum d, const SQLTypeInfo& ti);
1161 int64_t extract_int_type_from_datum(const Datum datum, const SQLTypeInfo& ti);
1162 double extract_fp_type_from_datum(const Datum datum, const SQLTypeInfo& ti);
1163 bool DatumEqual(const Datum, const Datum, const SQLTypeInfo& ti);
1164 int64_t convert_decimal_value_to_scale(const int64_t decimal_value,
1165  const SQLTypeInfo& type_info,
1166  const SQLTypeInfo& new_type_info);
1167 #endif
1168 
1169 #ifdef HAVE_TOSTRING
1170 inline std::ostream& operator<<(std::ostream& os, const SQLTypeInfo& type_info) {
1171  os << toString(type_info);
1172  return os;
1173 }
1174 #endif
1175 
1176 #include "../QueryEngine/DateAdd.h"
1177 #include "../QueryEngine/DateTruncate.h"
1178 #include "../QueryEngine/ExtractFromTime.h"
1179 
1180 inline SQLTypes get_int_type_by_size(size_t const nbytes) {
1181  switch (nbytes) {
1182  case 1:
1183  return kTINYINT;
1184  case 2:
1185  return kSMALLINT;
1186  case 4:
1187  return kINT;
1188  case 8:
1189  return kBIGINT;
1190  default:
1191 #if !(defined(__CUDACC__) || defined(NO_BOOST))
1192  UNREACHABLE() << "Invalid number of bytes=" << nbytes;
1193 #endif
1194  return {};
1195  }
1196 }
1197 
1199  EncodingType encoding = type_info.get_compression();
1200  if (encoding == kENCODING_DATE_IN_DAYS ||
1201  (encoding == kENCODING_FIXED && type_info.get_type() != kARRAY)) {
1202  encoding = kENCODING_NONE;
1203  }
1204  return SQLTypeInfo(type_info.get_type(),
1205  type_info.get_dimension(),
1206  type_info.get_scale(),
1207  type_info.get_notnull(),
1208  encoding,
1209  type_info.get_comp_param(),
1210  type_info.get_subtype());
1211 }
1212 
1214  SQLTypeInfo nullable_type_info = type_info;
1215  nullable_type_info.set_notnull(false);
1216  return nullable_type_info;
1217 }
1218 
1220  SQLTypeInfo nullable_type_info = get_logical_type_info(type_info);
1221  return get_nullable_type_info(nullable_type_info);
1222 }
1223 
1224 using StringOffsetT = int32_t;
1225 using ArrayOffsetT = int32_t;
1226 
1227 int8_t* append_datum(int8_t* buf, const Datum& d, const SQLTypeInfo& ti);
1228 
1229 // clang-format off
1230 /*
1231 
1232 A note on representing collection types using SQLTypeInfo
1233 =========================================================
1234 
1235 In general, a collection type is a type of collection of items. A
1236 collection can be an array, a column, or a column list. A column list
1237 is as collection of columns that have the same item type. An item can
1238 be of scalar type (bool, integers, floats, text encoding dict's, etc)
1239 or of collection type (array of scalars, column of scalars, column of
1240 array of scalars).
1241 
1242 SQLTypeInfo provides a structure to represent both item and collection
1243 types using the following list of attributes:
1244  SQLTypes type
1245  SQLTypes subtype
1246  int dimension
1247  int scale
1248  bool notnull
1249  EncodingType compression
1250  int comp_param
1251  int size
1252 
1253 To represent a particular type, not all attributes are used. However,
1254 there may exists multiple ways to represent the same type using
1255 various combinations of these attributes and this note can be used as
1256 a guideline to how to represent a newly introduced collection type
1257 using the SQLTypeInfo structure.
1258 
1259 Scalar types
1260 ------------
1261 
1262 - Scalar types are booleans, integers, and floats that are defined
1263  by type and size attributes,
1264 
1265  SQLTypeInfo(type=kSCALAR)
1266 
1267  where SCALAR is in {BOOL, BIGINT, INT, SMALLINT, TINYINT, DOUBLE,
1268  FLOAT} while the corresponding size is specified in
1269  get_storage_size(). For example, SQLTypeInfo(type=kFLOAT)
1270  represents FLOAT and its size is implemented as 4 in the
1271  get_storage_size() method,
1272 
1273 - Text encoding dict (as defined as index and string dictionary) is
1274  represented as a 32-bit integer value and its type is specified as
1275 
1276  SQLTypeInfo(type=kTEXT, compression=kENCODING_DICT, comp_param=<dict id>)
1277 
1278  and size is defined as 4 by get_storage_size().
1279 
1280 Collection types
1281 ----------------
1282 
1283 - The type of a varlen array of scalar items is specified as
1284 
1285  SQLTypeInfo(type=kARRAY, subtype=kSCALAR)
1286 
1287  and size is defined as -1 by get_storage_size() which can be interpreted as N/A.
1288 
1289 - The type of a varlen array of text encoding dict is specified as
1290 
1291  SQLTypeInfo(type=kARRAY, subtype=kTEXT, compression=kENCODING_DICT, comp_param=<dict id>)
1292 
1293  Notice that the compression and comp_param attributes apply to
1294  subtype rather than to type. This quirk exemplifies the fact that
1295  SQLTypeInfo provides limited ability to support composite types.
1296 
1297 - Similarly, the types of a column of scalar and text encoded dict
1298  items are specified as
1299 
1300  SQLTypeInfo(type=kCOLUMN, subtype=kSCALAR)
1301 
1302  and
1303 
1304  SQLTypeInfo(type=kCOLUMN, subtype=kTEXT, compression=kENCODING_DICT, comp_param=<dict id>)
1305 
1306  respectively.
1307 
1308 - The type of column list with scalar items is specified as
1309 
1310  SQLTypeInfo(type=kCOLUMN_LIST, subtype=kSCALAR, dimension=<nof columns>)
1311 
1312  WARNING: Column list with items that type use compression (such as
1313  TIMESTAMP), cannot be supported! See QE-427.
1314 
1315 - The type of column list with text encoded dict items is specified as
1316 
1317  SQLTypeInfo(type=kCOLUMN_LIST, subtype=kTEXT, compression=kENCODING_DICT, dimension=<nof columns>)
1318 
1319 - The type of a column of arrays of scalar items is specified as
1320 
1321  SQLTypeInfo(type=kCOLUMN, subtype=kSCALAR, compression=kENCODING_ARRAY)
1322 
1323  Notice that the "a collection of collections of items" is specified
1324  by introducing a new compression scheme that descibes the
1325  "collections" part while the subtype attribute specifies the type of
1326  items.
1327 
1328 - The type of a column of arrays of text encoding dict items is specified as
1329 
1330  SQLTypeInfo(type=kCOLUMN, subtype=kTEXT, compression=kENCODING_ARRAY_DICT, comp_param=<dict id>)
1331 
1332  where the compression attribute kENCODING_ARRAY_DICT carries two
1333  pieces of information: (i) the items type is dict encoded string and
1334  (ii) the type represents a "column of arrays".
1335 
1336 
1337 - The type of a column list of arrays of scalar items is specified as
1338 
1339  SQLTypeInfo(type=kCOLUMN_LIST, subtype=kSCALAR, compression=kENCODING_ARRAY, dimension=<nof columns>)
1340 
1341 - The type of a column list of arrays of text encoding dict items is specified as
1342 
1343  SQLTypeInfo(type=kCOLUMN_LIST, subtype=kTEXT, compression=kENCODING_ARRAY_DICT, comp_param=<dict id>, dimension=<nof columns>)
1344 
1345  that is the most complicated currently supported type of "a
1346  collection(=list) of collections(=columns) of collections(=arrays)
1347  of items(=text)" with a specified compression scheme and comp_param
1348  attributes.
1349 
1350 */
1351 // clang-format on
1352 
1353 inline auto generate_column_type(const SQLTypeInfo& elem_ti) {
1354  SQLTypes elem_type = elem_ti.get_type();
1355  if (elem_type == kCOLUMN) {
1356  return elem_ti;
1357  }
1358  auto c = elem_ti.get_compression();
1359  auto d = elem_ti.get_dimension();
1360  auto p = elem_ti.get_comp_param();
1361  switch (elem_type) {
1362  case kBOOLEAN:
1363  case kTINYINT:
1364  case kSMALLINT:
1365  case kINT:
1366  case kBIGINT:
1367  case kFLOAT:
1368  case kDOUBLE:
1369  if (c == kENCODING_NONE && p == 0) {
1370  break; // here and below `break` means supported element type
1371  // for extension functions
1372  }
1373  case kTEXT:
1374  if (c == kENCODING_DICT && p != 0) {
1375  break;
1376  }
1377  case kTIMESTAMP:
1378  if (c == kENCODING_NONE && p == 0 && (d == 9 || d == 6 || d == 0)) {
1379  break;
1380  }
1381  case kARRAY:
1382  elem_type = elem_ti.get_subtype();
1383  if (IS_NUMBER(elem_type) || elem_type == kBOOLEAN || elem_type == kTEXT) {
1384  if (c == kENCODING_NONE && p == 0) {
1385  c = kENCODING_ARRAY;
1386  break;
1387  } else if (c == kENCODING_DICT && p != 0) {
1389  break;
1390  }
1391  }
1392  default:
1393  elem_type = kNULLT; // indicates unsupported element type that
1394  // the caller needs to handle accordingly
1395  }
1396  auto ti = SQLTypeInfo(kCOLUMN, c, p, elem_type);
1397  ti.set_dimension(d);
1398  return ti;
1399 }
1400 
1401 inline auto generate_column_list_type(const SQLTypeInfo& elem_ti) {
1402  auto type_info = generate_column_type(elem_ti);
1403  if (type_info.get_subtype() != kNULLT) {
1404  type_info.set_type(kCOLUMN_LIST);
1405  }
1406  if (type_info.get_subtype() == kTIMESTAMP) {
1407  // ColumnList<Timestamp> is not supported, see QE-472
1408  type_info.set_subtype(kNULLT);
1409  }
1410  return type_info;
1411 }
1412 
1413 // SQLTypeInfo-friendly interface to FlatBuffer:
1414 
1415 #include "../QueryEngine/Utils/FlatBuffer.h"
1416 
1417 inline int64_t getVarlenArrayBufferSize(int64_t items_count,
1418  int64_t max_nof_values,
1419  const SQLTypeInfo& ti) {
1420  CHECK(ti.is_array());
1421  const size_t array_item_size = ti.get_elem_type().get_size();
1422  if (ti.is_text_encoding_dict_array()) {
1424  items_count,
1425  max_nof_values,
1426  array_item_size,
1427  FlatBufferManager::DTypeMetadataKind::SIZE_DICTID);
1428  } else {
1430  items_count,
1431  max_nof_values,
1432  array_item_size,
1433  FlatBufferManager::DTypeMetadataKind::SIZE);
1434  }
1435 }
1436 
1438  int64_t items_count,
1439  int64_t max_nof_values,
1440  const SQLTypeInfo& ti) {
1441  CHECK(ti.is_array());
1442  const size_t array_item_size = ti.get_elem_type().get_size();
1443  if (ti.is_text_encoding_dict_array()) {
1444  m.initializeVarlenArray(items_count,
1445  max_nof_values,
1446  array_item_size,
1447  FlatBufferManager::DTypeMetadataKind::SIZE_DICTID);
1449  } else {
1450  m.initializeVarlenArray(items_count,
1451  max_nof_values,
1452  array_item_size,
1453  FlatBufferManager::DTypeMetadataKind::SIZE);
1454  }
1455 }
1456 
1458  size_t index;
1461 };
int8_t tinyintval
Definition: Datum.h:46
void initializeVarlenArray(int64_t items_count, int64_t max_nof_values, int64_t dtype_size, DTypeMetadataKind dtype_metadata_kind)
Definition: FlatBuffer.h:336
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:380
void set_compression(EncodingType c)
Definition: sqltypes.h:500
void set_size(int s)
Definition: sqltypes.h:497
static constexpr int32_t kMaxRepresentableNumericPrecision
Definition: sqltypes.h:48
#define NULL_DOUBLE
HOST DEVICE int get_size() const
Definition: sqltypes.h:389
HOST DEVICE void operator=(const SQLTypeInfo &rhs)
Definition: sqltypes.h:684
int8_t * append_datum(int8_t *buf, const Datum &d, const SQLTypeInfo &ti)
Definition: Datum.cpp:578
std::string DatumToString(Datum d, const SQLTypeInfo &ti)
Definition: Datum.cpp:458
bool is_varlen_array() const
Definition: sqltypes.h:584
static constexpr int32_t kMaxNumericPrecision
Definition: sqltypes.h:46
Definition: sqltypes.h:64
bool is_text_encoding_dict_array() const
Definition: sqltypes.h:604
DEVICE constexpr bool is_cuda_compiler()
Definition: sqltypes.h:208
SQLTypes
Definition: sqltypes.h:53
std::vector< std::string > * stringsPtr
Definition: sqltypes.h:222
bool is_timestamp() const
Definition: sqltypes.h:995
std::vector< ArrayDatum > * arraysPtr
Definition: sqltypes.h:223
#define NULL_ARRAY_INT
bool is_column_list_array() const
Definition: sqltypes.h:594
bool is_any(Types...types) const
Definition: sqltypes.h:572
#define NULL_FLOAT
bool is_null
Definition: Datum.h:35
#define NULL_BIGINT
SQLTypeInfo get_nullable_logical_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:1219
#define LOG(tag)
Definition: Logger.h:216
HOST DEVICE bool operator==(const SQLTypeInfo &rhs) const
Definition: sqltypes.h:664
std::ostream & operator<<(std::ostream &os, const SessionInfo &session_info)
Definition: SessionInfo.cpp:57
bool is_fp() const
Definition: sqltypes.h:579
HOST DEVICE int get_scale() const
Definition: sqltypes.h:384
bool is_varlen() const
Definition: sqltypes.h:615
#define NULL_ARRAY_SMALLINT
int8_t boolval
Definition: Datum.h:45
std::string get_compression_name() const
Definition: sqltypes.h:541
static int64_t get_VarlenArray_flatbuffer_size(int64_t items_count, int64_t max_nof_values, int64_t dtype_size, DTypeMetadataKind dtype_metadata_kind)
Definition: FlatBuffer.h:316
VarlenDatum * arrayval
Definition: Datum.h:52
#define UNREACHABLE()
Definition: Logger.h:266
HOST DEVICE void set_subtype(SQLTypes st)
Definition: sqltypes.h:490
Definitions for core Datum union type.
SQLTypeInfo(SQLTypes t, int d, int s)
Definition: sqltypes.h:349
SQLTypeInfo get_logical_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:1198
std::string toString(const QueryDescriptionType &type)
Definition: Types.h:64
Definition: sqltypes.h:80
#define NULL_ARRAY_TINYINT
HOST DEVICE bool is_null_fixlen_array(const int8_t *val, int array_size) const
Definition: sqltypes.h:910
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:379
bool is_number() const
Definition: sqltypes.h:580
int32_t intval
Definition: Datum.h:48
bool is_time() const
Definition: sqltypes.h:581
std::string to_string(char const *&&v)
HostArrayDatum(size_t const l, int8_t *p, bool const n, CUSTOM_DELETER custom_deleter)
Definition: sqltypes.h:198
#define NULL_INT
SQLTypeInfo ti
Definition: sqltypes.h:1459
int32_t StringOffsetT
Definition: sqltypes.h:1224
bool has_render_group() const
Definition: sqltypes.h:479
#define DEVICE
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:217
#define HOST
void set_input_srid(int d)
Definition: sqltypes.h:493
float floatval
Definition: Datum.h:50
std::string to_string() const
Definition: sqltypes.h:543
EncodingType
Definition: sqltypes.h:228
int get_physical_cols() const
Definition: sqltypes.h:410
bool is_fixlen_array() const
Definition: sqltypes.h:585
bool is_castable(const SQLTypeInfo &new_type_info) const
Definition: sqltypes.h:695
#define IS_INTERVAL(T)
Definition: sqltypes.h:301
void set_fixed_size()
Definition: sqltypes.h:498
std::shared_ptr< int8_t > ManagedPtr
Definition: sqltypes.h:179
HOST DEVICE bool operator!=(const SQLTypeInfo &rhs) const
Definition: sqltypes.h:656
int get_logical_size() const
Definition: sqltypes.h:399
bool DatumEqual(const Datum a, const Datum b, const SQLTypeInfo &ti)
Definition: Datum.cpp:406
static std::string type_name[kSQLTYPE_LAST]
Definition: sqltypes.h:1009
bool is_integer() const
Definition: sqltypes.h:577
bool is_subtype_dict_encoded_string() const
Definition: sqltypes.h:635
int64_t extract_int_type_from_datum(const Datum datum, const SQLTypeInfo &ti)
Definition: Datum.cpp:521
bool is_column_array() const
Definition: sqltypes.h:590
#define NULL_ARRAY_COMPRESSED_32
SQLTypes subtype
Definition: sqltypes.h:999
bool has_same_itemtype(const SQLTypeInfo &other) const
Definition: sqltypes.h:646
bool is_text_encoding_dict() const
Definition: sqltypes.h:601
void set_scale(int s)
Definition: sqltypes.h:494
bool notnull
Definition: sqltypes.h:1003
int64_t bigintval
Definition: Datum.h:49
bool has_bounds() const
Definition: sqltypes.h:466
HostArrayDatum(size_t const l, int8_t *p, CUSTOM_DELETER custom_deleter)
Definition: sqltypes.h:192
bool is_timeinterval() const
Definition: sqltypes.h:586
#define NULL_ARRAY_FLOAT
bool is_numeric_scalar_auto_castable(const SQLTypeInfo &new_type_info) const
returns true if the sql_type can be cast to the type specified by new_type_info with no loss of preci...
Definition: sqltypes.h:743
ManagedPtr data_ptr
Definition: sqltypes.h:201
int is_logical_geo_type() const
Definition: sqltypes.h:391
HostArrayDatum()=default
int16_t smallintval
Definition: Datum.h:47
bool is_dict_intersection() const
Definition: sqltypes.h:644
bool is_dict_encoded_type() const
Definition: sqltypes.h:639
Datum StringToDatum(const std::string_view s, SQLTypeInfo &ti)
Definition: Datum.cpp:337
SQLTypeInfo(SQLTypes t, int d, int s, bool n)
Definition: sqltypes.h:331
std::string toString() const
Definition: sqltypes.h:542
bool is_boolean() const
Definition: sqltypes.h:582
HostArrayDatum(size_t const l, int8_t *p, bool const n)
Definition: sqltypes.h:186
void operator()(int8_t *p)
Definition: sqltypes.h:175
SQLTypeInfo(SQLTypes t, int d, int s, bool n, EncodingType c, int p, SQLTypes st)
Definition: sqltypes.h:322
SQLTypeInfo(SQLTypes t)
Definition: sqltypes.h:359
bool IsNullDatum(const Datum datum, const SQLTypeInfo &ti)
Definition: Datum.cpp:329
#define NULL_BOOLEAN
std::string get_buffer_name() const
Definition: sqltypes.h:555
SQLTypeInfo(SQLTypes t, bool n, EncodingType c)
Definition: sqltypes.h:360
SQLTypeInfo get_array_type() const
Definition: sqltypes.h:969
EncodingType compression
Definition: sqltypes.h:1004
Datum NullDatum(const SQLTypeInfo &ti)
Definition: Datum.cpp:286
int get_precision() const
Definition: sqltypes.h:382
void set_output_srid(int s)
Definition: sqltypes.h:495
bool is_buffer() const
Definition: sqltypes.h:607
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:559
bool is_column() const
Definition: sqltypes.h:588
DEVICE DeviceArrayDatum()
Definition: sqltypes.h:205
HOST DEVICE bool is_null(const Datum &d) const
Definition: sqltypes.h:848
auto generate_column_type(const SQLTypeInfo &elem_ti)
Definition: sqltypes.h:1353
void set_comp_param(int p)
Definition: sqltypes.h:501
HOST DEVICE int get_storage_size() const
Definition: sqltypes.h:1012
#define CHECK_LT(x, y)
Definition: Logger.h:232
Definition: sqltypes.h:67
Definition: sqltypes.h:68
bool dict_intersection
Definition: sqltypes.h:1007
static std::string comp_name[kENCODING_LAST]
Definition: sqltypes.h:1010
void initializeVarlenArray(FlatBufferManager &m, int64_t items_count, int64_t max_nof_values, const SQLTypeInfo &ti)
Definition: sqltypes.h:1437
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:387
bool is_date_in_days() const
Definition: sqltypes.h:973
int get_array_context_logical_size() const
Definition: sqltypes.h:673
int64_t convert_decimal_value_to_scale(const int64_t decimal_value, const SQLTypeInfo &type_info, const SQLTypeInfo &new_type_info)
Definition: Datum.cpp:622
auto generate_column_list_type(const SQLTypeInfo &elem_ti)
Definition: sqltypes.h:1401
int32_t ArrayOffsetT
Definition: sqltypes.h:1225
void set_dimension(int d)
Definition: sqltypes.h:491
SQLTypes get_int_type_by_size(size_t const nbytes)
Definition: sqltypes.h:1180
bool is_none_encoded_string() const
Definition: sqltypes.h:631
HOST DEVICE int get_dimension() const
Definition: sqltypes.h:381
#define IS_INTEGER(T)
Definition: sqltypes.h:292
std::string get_type_name() const
Definition: sqltypes.h:503
int32_t get_numeric_scalar_scale() const
returns integer between 1 and 8 indicating what is roughly equivalent to the logical byte size of a s...
Definition: sqltypes.h:804
Definition: sqltypes.h:56
#define IS_STRING(T)
Definition: sqltypes.h:297
HOST DEVICE int get_comp_param() const
Definition: sqltypes.h:388
void setDTypeMetadataDictId(int32_t dict_id)
Definition: FlatBuffer.h:288
HOST DEVICE int get_input_srid() const
Definition: sqltypes.h:383
void set_dict_intersection()
Definition: sqltypes.h:499
#define NULL_TINYINT
#define NULL_ARRAY_DOUBLE
bool is_bytes() const
Definition: sqltypes.h:598
bool is_column_list() const
Definition: sqltypes.h:589
bool g_enable_watchdog false
Definition: Execute.cpp:79
int64_t getVarlenArrayBufferSize(int64_t items_count, int64_t max_nof_values, const SQLTypeInfo &ti)
Definition: sqltypes.h:1417
void set_notnull(bool n)
Definition: sqltypes.h:496
#define CHECK(condition)
Definition: Logger.h:222
bool is_geometry() const
Definition: sqltypes.h:587
bool is_high_precision_timestamp() const
Definition: sqltypes.h:985
SQLTypes type
Definition: sqltypes.h:998
#define NULL_SMALLINT
double extract_fp_type_from_datum(const Datum datum, const SQLTypeInfo &ti)
Definition: Datum.cpp:547
HostArrayDatum(size_t const l, ManagedPtr p, bool const n)
Definition: sqltypes.h:183
#define NULL_ARRAY_BIGINT
bool is_dict_encoded_string() const
Definition: sqltypes.h:627
Definition: sqltypes.h:60
bool is_varlen_indeed() const
Definition: sqltypes.h:621
bool is_string() const
Definition: sqltypes.h:575
SQLTypeInfo(SQLTypes t, EncodingType c, int p, SQLTypes st)
Definition: sqltypes.h:340
constexpr double n
Definition: Utm.h:38
bool transforms() const
Definition: sqltypes.h:610
SQLTypeInfo(SQLTypes t, bool n)
Definition: sqltypes.h:350
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:386
int8_t * numbersPtr
Definition: sqltypes.h:221
bool is_string_array() const
Definition: sqltypes.h:576
size_t index
Definition: sqltypes.h:1458
Definition: Datum.h:44
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:956
bool is_decimal() const
Definition: sqltypes.h:578
int get_physical_coord_cols() const
Definition: sqltypes.h:429
#define IS_NUMBER(T)
Definition: sqltypes.h:294
void operator()(int8_t *)
Definition: sqltypes.h:172
#define IS_GEO(T)
Definition: sqltypes.h:298
#define TRANSIENT_DICT(ID)
Definition: sqltypes.h:310
int comp_param
Definition: sqltypes.h:1005
bool is_date() const
Definition: sqltypes.h:983
bool is_array() const
Definition: sqltypes.h:583
void set_precision(int d)
Definition: sqltypes.h:492
SQLTypeInfo get_nullable_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:1213
int dimension
Definition: sqltypes.h:1000
HOST DEVICE bool is_null_point_coord_array(const int8_t *val, int array_size) const
Definition: sqltypes.h:943
double doubleval
Definition: Datum.h:51
HOST DEVICE int get_output_srid() const
Definition: sqltypes.h:385
constexpr auto is_datetime(SQLTypes type)
Definition: sqltypes.h:313
HOST DEVICE bool is_null(const int8_t *val) const
Definition: sqltypes.h:885
SQLTypes string_dict_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:563
HOST DEVICE void set_type(SQLTypes t)
Definition: sqltypes.h:489