OmniSciDB  c1a53651b2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
sqltypes.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
23 #pragma once
24 
25 #include "../Logger/Logger.h"
26 #include "Datum.h"
27 #include "funcannotations.h"
28 
29 #include <cassert>
30 #include <ctime>
31 #include <memory>
32 #include <ostream>
33 #include <sstream>
34 #include <string>
35 #include <type_traits>
36 #include <vector>
37 
38 #include "Shared/DbObjectKeys.h"
39 
40 namespace sql_constants {
41 /*
42 The largest precision an SQL type is allowed to specify is currently 18 digits,
43 however, the most precise numeric value we can represent is actually precise to 19 digits.
44 This means that we can be slightly more relaxed when doing internal calculations than when
45 setting column types (e.g. a CAST from double to numeric could use precision 19 as long as
46 it doesn't overflow but a column cannot be specified to have precision 19+).
47 */
48 constexpr static int32_t kMaxNumericPrecision =
49  std::numeric_limits<int64_t>::digits10; // 18
50 constexpr static int32_t kMaxRepresentableNumericPrecision =
51  kMaxNumericPrecision + 1; // 19
52 } // namespace sql_constants
53 
54 // must not change because these values persist in catalogs.
55 enum SQLTypes {
56  kNULLT = 0, // type for null values
57  kBOOLEAN = 1,
58  kCHAR = 2,
59  kVARCHAR = 3,
60  kNUMERIC = 4,
61  kDECIMAL = 5,
62  kINT = 6,
63  kSMALLINT = 7,
64  kFLOAT = 8,
65  kDOUBLE = 9,
66  kTIME = 10,
67  kTIMESTAMP = 11,
68  kBIGINT = 12,
69  kTEXT = 13,
70  kDATE = 14,
71  kARRAY = 15,
74  kPOINT = 18,
76  kPOLYGON = 20,
78  kTINYINT = 22,
79  kGEOMETRY = 23,
80  kGEOGRAPHY = 24,
81  kEVAL_CONTEXT_TYPE = 25, // Placeholder Type for ANY
82  kVOID = 26,
83  kCURSOR = 27,
84  kCOLUMN = 28,
89 };
90 
91 #if !(defined(__CUDACC__) || defined(NO_BOOST))
92 
93 inline std::string toString(const SQLTypes& type) {
94  switch (type) {
95  case kNULLT:
96  return "NULL";
97  case kBOOLEAN:
98  return "BOOL";
99  case kCHAR:
100  return "CHAR";
101  case kVARCHAR:
102  return "VARCHAR";
103  case kNUMERIC:
104  return "NUMERIC";
105  case kDECIMAL:
106  return "DECIMAL";
107  case kINT:
108  return "INT";
109  case kSMALLINT:
110  return "SMALLINT";
111  case kFLOAT:
112  return "FLOAT";
113  case kDOUBLE:
114  return "DOUBLE";
115  case kTIME:
116  return "TIME";
117  case kTIMESTAMP:
118  return "TIMESTAMP";
119  case kBIGINT:
120  return "BIGINT";
121  case kTEXT:
122  return "TEXT";
123  case kDATE:
124  return "DATE";
125  case kARRAY:
126  return "ARRAY";
127  case kINTERVAL_DAY_TIME:
128  return "DAY TIME INTERVAL";
130  return "YEAR MONTH INTERVAL";
131  case kPOINT:
132  return "POINT";
133  case kMULTIPOINT:
134  return "MULTIPOINT";
135  case kLINESTRING:
136  return "LINESTRING";
137  case kMULTILINESTRING:
138  return "MULTILINESTRING";
139  case kPOLYGON:
140  return "POLYGON";
141  case kMULTIPOLYGON:
142  return "MULTIPOLYGON";
143  case kTINYINT:
144  return "TINYINT";
145  case kGEOMETRY:
146  return "GEOMETRY";
147  case kGEOGRAPHY:
148  return "GEOGRAPHY";
149  case kEVAL_CONTEXT_TYPE:
150  return "UNEVALUATED ANY";
151  case kVOID:
152  return "VOID";
153  case kCURSOR:
154  return "CURSOR";
155  case kCOLUMN:
156  return "COLUMN";
157  case kCOLUMN_LIST:
158  return "COLUMN_LIST";
159  case kSQLTYPE_LAST:
160  break;
161  }
162  LOG(FATAL) << "Invalid SQL type: " << type;
163  return "";
164 }
165 
166 inline std::ostream& operator<<(std::ostream& os, SQLTypes const sql_type) {
167  os << toString(sql_type);
168  return os;
169 }
170 
171 #endif // #if !(defined(__CUDACC__) || defined(NO_BOOST))
172 
174  void operator()(int8_t*) {}
175 };
176 struct FreeDeleter {
177  void operator()(int8_t* p) { free(p); }
178 };
179 
180 struct HostArrayDatum : public VarlenDatum {
181  using ManagedPtr = std::shared_ptr<int8_t>;
182 
183  HostArrayDatum() = default;
184 
185  HostArrayDatum(size_t const l, ManagedPtr p, bool const n)
186  : VarlenDatum(l, p.get(), n), data_ptr(p) {}
187 
188  HostArrayDatum(size_t const l, int8_t* p, bool const n)
189  : VarlenDatum(l, p, n), data_ptr(p, FreeDeleter()){};
190 
191  template <typename CUSTOM_DELETER,
192  typename = std::enable_if_t<
193  std::is_void<std::result_of_t<CUSTOM_DELETER(int8_t*)> >::value> >
194  HostArrayDatum(size_t const l, int8_t* p, CUSTOM_DELETER custom_deleter)
195  : VarlenDatum(l, p, 0 == l), data_ptr(p, custom_deleter) {}
196 
197  template <typename CUSTOM_DELETER,
198  typename = std::enable_if_t<
199  std::is_void<std::result_of_t<CUSTOM_DELETER(int8_t*)> >::value> >
200  HostArrayDatum(size_t const l, int8_t* p, bool const n, CUSTOM_DELETER custom_deleter)
201  : VarlenDatum(l, p, n), data_ptr(p, custom_deleter) {}
202 
204 };
205 
206 struct DeviceArrayDatum : public VarlenDatum {
208 };
209 
210 inline DEVICE constexpr bool is_cuda_compiler() {
211 #ifdef __CUDACC__
212  return true;
213 #else
214  return false;
215 #endif
216 }
217 
218 using ArrayDatum =
219  std::conditional_t<is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum>;
220 
221 #ifndef __CUDACC__
223  int8_t* numbersPtr;
224  std::vector<std::string>* stringsPtr;
225  std::vector<ArrayDatum>* arraysPtr;
226 };
227 #endif
228 
229 // must not change because these values persist in catalogs.
231  kENCODING_NONE = 0, // no encoding
232  kENCODING_FIXED = 1, // Fixed-bit encoding
233  kENCODING_RL = 2, // Run Length encoding
234  kENCODING_DIFF = 3, // Differential encoding
235  kENCODING_DICT = 4, // Dictionary encoding
236  kENCODING_SPARSE = 5, // Null encoding for sparse columns
237  kENCODING_GEOINT = 6, // Encoding coordinates as intergers
238  kENCODING_DATE_IN_DAYS = 7, // Date encoding in days
239  kENCODING_ARRAY = 8, // Array encoding for columns of arrays
240  kENCODING_ARRAY_DICT = 9, // Array encoding for columns of text encoding dict arrays
242 };
243 
244 #if !(defined(__CUDACC__) || defined(NO_BOOST))
245 
246 inline std::ostream& operator<<(std::ostream& os, EncodingType const type) {
247  switch (type) {
248  case kENCODING_NONE:
249  os << "NONE";
250  break;
251  case kENCODING_FIXED:
252  os << "FIXED";
253  break;
254  case kENCODING_RL:
255  os << "RL";
256  break;
257  case kENCODING_DIFF:
258  os << "DIFF";
259  break;
260  case kENCODING_DICT:
261  os << "DICT";
262  break;
263  case kENCODING_SPARSE:
264  os << "SPARSE";
265  break;
266  case kENCODING_GEOINT:
267  os << "GEOINT";
268  break;
270  os << "DATE_IN_DAYS";
271  break;
272  case kENCODING_ARRAY:
273  os << "ARRAY";
274  break;
276  os << "ARRAY_DICT";
277  break;
278  case kENCODING_LAST:
279  break;
280  default:
281  LOG(FATAL) << "Invalid EncodingType: " << type;
282  }
283  return os;
284 }
285 
286 inline std::string toString(const EncodingType& type) {
287  std::ostringstream ss;
288  ss << type;
289  return ss.str();
290 }
291 
292 #endif // #if !(defined(__CUDACC__) || defined(NO_BOOST))
293 
294 #define IS_INTEGER(T) \
295  (((T) == kINT) || ((T) == kSMALLINT) || ((T) == kBIGINT) || ((T) == kTINYINT))
296 #define IS_NUMBER(T) \
297  (((T) == kINT) || ((T) == kSMALLINT) || ((T) == kDOUBLE) || ((T) == kFLOAT) || \
298  ((T) == kBIGINT) || ((T) == kNUMERIC) || ((T) == kDECIMAL) || ((T) == kTINYINT))
299 #define IS_STRING(T) (((T) == kTEXT) || ((T) == kVARCHAR) || ((T) == kCHAR))
300 #define IS_GEO(T) \
301  (((T) == kPOINT) || ((T) == kLINESTRING) || ((T) == kMULTILINESTRING) || \
302  ((T) == kMULTIPOINT) || ((T) == kPOLYGON) || ((T) == kMULTIPOLYGON))
303 #define IS_INTERVAL(T) ((T) == kINTERVAL_DAY_TIME || (T) == kINTERVAL_YEAR_MONTH)
304 #define IS_DECIMAL(T) ((T) == kNUMERIC || (T) == kDECIMAL)
305 #define IS_GEO_POLY(T) (((T) == kPOLYGON) || ((T) == kMULTIPOLYGON))
306 #define IS_GEO_LINE(T) (((T) == kLINESTRING) || ((T) == kMULTILINESTRING))
307 #define IS_GEO_MULTI(T) \
308  (((T) == kMULTIPOLYGON) || ((T) == kMULTILINESTRING) || ((T) == kMULTIPOINT))
309 
310 #include "InlineNullValues.h"
311 
312 #define TRANSIENT_DICT(ID) (-(ID))
313 #define REGULAR_DICT(TRANSIENTID) (-(TRANSIENTID))
314 
315 constexpr auto is_datetime(SQLTypes type) {
316  return type == kTIME || type == kTIMESTAMP || type == kDATE;
317 }
318 
319 // @type SQLTypeInfo
320 // @brief a structure to capture all type information including
321 // length, precision, scale, etc.
322 class SQLTypeInfo {
323  public:
324  SQLTypeInfo(SQLTypes t, int d, int s, bool n, EncodingType c, int p, SQLTypes st)
325  : type(t)
326  , subtype(st)
327  , dimension(d)
328  , scale(s)
329  , notnull(n)
330  , compression(c)
331  , comp_param(p)
332  , size(get_storage_size()) {}
333  SQLTypeInfo(SQLTypes t, int d, int s, bool n)
334  : type(t)
335  , subtype(kNULLT)
336  , dimension(d)
337  , scale(s)
338  , notnull(n)
340  , comp_param(0)
341  , size(get_storage_size()) {}
343  : type(t)
344  , subtype(st)
345  , dimension(0)
346  , scale(0)
347  , notnull(false)
348  , compression(c)
349  , comp_param(p)
350  , size(get_storage_size()) {}
351  SQLTypeInfo(SQLTypes t, int d, int s) : SQLTypeInfo(t, d, s, false) {}
353  : type(t)
354  , subtype(kNULLT)
355  , dimension(0)
356  , scale(0)
357  , notnull(n)
359  , comp_param(0)
360  , size(get_storage_size()) {}
363  : type(t)
364  , subtype(kNULLT)
365  , dimension(0)
366  , scale(0)
367  , notnull(n)
368  , compression(c)
369  , comp_param(0)
370  , size(get_storage_size()) {}
372  : type(kNULLT)
373  , subtype(kNULLT)
374  , dimension(0)
375  , scale(0)
376  , notnull(false)
378  , comp_param(0)
379  , size(0) {}
380 
381  HOST DEVICE inline SQLTypes get_type() const { return type; }
382  HOST DEVICE inline SQLTypes get_subtype() const { return subtype; }
383  HOST DEVICE inline int get_dimension() const { return dimension; }
384  inline int get_precision() const { return dimension; }
385  HOST DEVICE inline int get_input_srid() const { return dimension; }
386  HOST DEVICE inline int get_scale() const { return scale; }
387  HOST DEVICE inline int get_output_srid() const { return scale; }
388  HOST DEVICE inline bool get_notnull() const { return notnull; }
390  // TODO: Remove ambiguous `comp_param` attribute and replace with a comp_size enum.
391  // dict_key should be used uniformly for dictionary ID.
392  HOST DEVICE inline int get_comp_param() const { return comp_param; }
393  HOST DEVICE inline int get_size() const { return size; }
394 
395  inline int is_logical_geo_type() const {
396  if (type == kPOINT || type == kLINESTRING || type == kMULTILINESTRING ||
397  type == kMULTIPOINT || type == kPOLYGON || type == kMULTIPOLYGON) {
398  return true;
399  }
400  return false;
401  }
402 
403  inline int get_logical_size() const {
406  return ti.get_size();
407  }
408  if (compression == kENCODING_DICT) {
409  return 4;
410  }
411  return get_size();
412  }
413 
414  inline int get_physical_cols() const {
415  switch (type) {
416  case kPOINT:
417  return 1; // coords
418  case kMULTIPOINT:
419  return 2; // coords, bounds
420  case kLINESTRING:
421  return 2; // coords, bounds
422  case kMULTILINESTRING:
423  return 3; // coords, linestring_sizes, bounds
424  case kPOLYGON:
425  return 4; // coords, ring_sizes, bounds, render_group
426  case kMULTIPOLYGON:
427  return 5; // coords, ring_sizes, poly_rings, bounds, render_group
428  default:
429  break;
430  }
431  return 0;
432  }
433  inline int get_physical_coord_cols() const {
434  // @TODO dmitri/simon rename this function?
435  // It needs to return the number of extra columns
436  // which need to go through the executor, as opposed
437  // to those which are only needed by CPU for poly
438  // cache building or what-not. For now, we just omit
439  // the Render Group column. If we add Bounding Box
440  // or something this may require rethinking. Perhaps
441  // these two functions need to return an array of
442  // offsets rather than just a number to loop over,
443  // so that executor and non-executor columns can
444  // be mixed.
445  // NOTE(adb): In binding to extension functions, we need to know some pretty specific
446  // type info about each of the physical coords cols for each geo type. I added checks
447  // there to ensure the physical coords col for the geo type match what we expect. If
448  // these values are ever changed, corresponding values in
449  // ExtensionFunctionsBinding.cpp::compute_narrowing_conv_scores and
450  // ExtensionFunctionsBinding.cpp::compute_widening_conv_scores will also need to be
451  // changed.
452  switch (type) {
453  case kPOINT:
454  return 1;
455  case kMULTIPOINT:
456  return 1; // omit bounds
457  case kLINESTRING:
458  return 1; // omit bounds
459  case kMULTILINESTRING:
460  return 2; // omit bounds
461  case kPOLYGON:
462  return 2; // omit bounds, render group
463  case kMULTIPOLYGON:
464  return 3; // omit bounds, render group
465  default:
466  break;
467  }
468  return 0;
469  }
470  inline bool has_bounds() const {
471  switch (type) {
472  case kMULTIPOINT:
473  case kLINESTRING:
474  case kMULTILINESTRING:
475  case kPOLYGON:
476  case kMULTIPOLYGON:
477  return true;
478  default:
479  break;
480  }
481  return false;
482  }
483  inline bool has_render_group() const {
484  switch (type) {
485  case kPOLYGON:
486  case kMULTIPOLYGON:
487  return true;
488  default:
489  break;
490  }
491  return false;
492  }
493  HOST DEVICE inline void set_type(SQLTypes t) { type = t; }
494  HOST DEVICE inline void set_subtype(SQLTypes st) { subtype = st; }
495  inline void set_dimension(int d) { dimension = d; }
496  inline void set_precision(int d) { dimension = d; }
497  inline void set_input_srid(int d) { dimension = d; }
498  inline void set_scale(int s) { scale = s; }
499  inline void set_output_srid(int s) { scale = s; }
500  inline void set_notnull(bool n) { notnull = n; }
501  inline void set_size(int s) { size = s; }
502  inline void set_fixed_size() { size = get_storage_size(); }
503  inline void set_dict_intersection() { dict_intersection = true; }
504  inline void set_compression(EncodingType c) { compression = c; }
505  inline void set_comp_param(int p) { comp_param = p; }
506 #ifndef __CUDACC__
507  inline std::string get_type_name() const {
508  if (IS_GEO(type)) {
509  std::string srid_string = "";
510  if (get_output_srid() > 0) {
511  srid_string = ", " + std::to_string(get_output_srid());
512  }
513  CHECK_LT(static_cast<int>(subtype), kSQLTYPE_LAST);
514  return type_name[static_cast<int>(subtype)] + "(" +
515  type_name[static_cast<int>(type)] + srid_string + ")";
516  }
517  std::string ps = "";
518  if (type == kDECIMAL || type == kNUMERIC) {
519  ps = "(" + std::to_string(dimension) + "," + std::to_string(scale) + ")";
520  } else if (type == kTIMESTAMP) {
521  ps = "(" + std::to_string(dimension) + ")";
522  }
523  if (type == kARRAY) {
524  auto elem_ti = get_elem_type();
525  auto num_elems = (size > 0) ? std::to_string(size / elem_ti.get_size()) : "";
526  CHECK_LT(static_cast<int>(subtype), kSQLTYPE_LAST);
527  return elem_ti.get_type_name() + ps + "[" + num_elems + "]";
528  }
529  if (type == kCOLUMN) {
530  auto elem_ti = get_elem_type();
531  auto num_elems =
532  (size > 0) ? "[" + std::to_string(size / elem_ti.get_size()) + "]" : "";
533  CHECK_LT(static_cast<int>(subtype), kSQLTYPE_LAST);
534  return "COLUMN<" + elem_ti.get_type_name() + ps + ">" + num_elems;
535  }
536  if (type == kCOLUMN_LIST) {
537  auto elem_ti = get_elem_type();
538  auto num_elems =
539  (size > 0) ? "[" + std::to_string(size / elem_ti.get_size()) + "]" : "";
540  CHECK_LT(static_cast<int>(subtype), kSQLTYPE_LAST);
541  return "COLUMN_LIST<" + elem_ti.get_type_name() + ps + ">" + num_elems;
542  }
543  return type_name[static_cast<int>(type)] + ps;
544  }
545  inline std::string get_compression_name() const { return comp_name[(int)compression]; }
546  std::string toString() const { return to_string(); } // for PRINT macro
547  inline std::string to_string() const {
548  std::ostringstream oss;
549  oss << "(type=" << type_name[static_cast<int>(type)]
550  << ", dimension=" << get_dimension() << ", scale=" << get_scale()
551  << ", null=" << (get_notnull() ? "not nullable" : "nullable")
552  << ", compression_name=" << get_compression_name()
553  << ", comp_param=" << get_comp_param()
554  << ", subtype=" << type_name[static_cast<int>(subtype)] << ", size=" << get_size()
555  << ", element_size=" << get_elem_type().get_size() << ", dict_key=" << dict_key_
556  << ")";
557  return oss.str();
558  }
559 
560  inline std::string get_buffer_name() const {
561  if (is_array()) {
562  return "Array";
563  }
564  if (is_bytes()) {
565  return "Bytes";
566  }
567 
568  if (is_column()) {
569  return "Column";
570  }
571 
572  assert(false);
573  return "";
574  }
575 #endif
576  template <SQLTypes... types>
577  bool is_any() const {
578  return (... || (types == type));
579  }
580  inline bool is_string() const { return IS_STRING(type); }
581  inline bool is_string_array() const { return (type == kARRAY) && IS_STRING(subtype); }
582  inline bool is_integer() const { return IS_INTEGER(type); }
583  inline bool is_decimal() const { return type == kDECIMAL || type == kNUMERIC; }
584  inline bool is_fp() const { return type == kFLOAT || type == kDOUBLE; }
585  inline bool is_number() const { return IS_NUMBER(type); }
586  inline bool is_time() const { return is_datetime(type); }
587  inline bool is_boolean() const { return type == kBOOLEAN; }
588  inline bool is_array() const { return type == kARRAY; } // Array
589  inline bool is_varlen_array() const { return type == kARRAY && size <= 0; }
590  inline bool is_fixlen_array() const { return type == kARRAY && size > 0; }
591  inline bool is_timeinterval() const { return IS_INTERVAL(type); }
592  inline bool is_geometry() const { return IS_GEO(type); }
593  inline bool is_column() const { return type == kCOLUMN; } // Column
594  inline bool is_column_list() const { return type == kCOLUMN_LIST; } // ColumnList
595  inline bool is_column_array() const {
596  const auto c = get_compression();
597  return type == kCOLUMN && (c == kENCODING_ARRAY || c == kENCODING_ARRAY_DICT);
598  } // ColumnArray
599  inline bool is_column_list_array() const {
600  const auto c = get_compression();
601  return type == kCOLUMN_LIST && (c == kENCODING_ARRAY || c == kENCODING_ARRAY_DICT);
602  } // ColumnList of ColumnArray
603  inline bool is_bytes() const {
604  return type == kTEXT && get_compression() == kENCODING_NONE;
605  }
606  inline bool is_text_encoding_dict() const {
607  return type == kTEXT && get_compression() == kENCODING_DICT;
608  }
609  inline bool is_text_encoding_dict_array() const {
610  return type == kARRAY && subtype == kTEXT && get_compression() == kENCODING_DICT;
611  }
612  inline bool is_buffer() const {
613  return is_array() || is_column() || is_column_list() || is_bytes();
614  }
615  inline bool transforms() const {
616  return IS_GEO(type) && get_input_srid() > 0 && get_output_srid() > 0 &&
618  }
619 
620  inline bool is_varlen() const { // TODO: logically this should ignore fixlen arrays
621  return (IS_STRING(type) && compression != kENCODING_DICT) || type == kARRAY ||
622  IS_GEO(type);
623  }
624 
625  // need this here till is_varlen can be fixed w/o negative impact to existing code
626  inline bool is_varlen_indeed() const {
627  // SQLTypeInfo.is_varlen() is broken with fixedlen array now
628  // and seems left broken for some concern, so fix it locally
629  return is_varlen() && !is_fixlen_array();
630  }
631 
632  inline bool is_dict_encoded_string() const {
633  return is_string() && compression == kENCODING_DICT;
634  }
635 
636  inline bool is_none_encoded_string() const {
637  return is_string() && compression == kENCODING_NONE;
638  }
639 
640  inline bool is_subtype_dict_encoded_string() const {
642  }
643 
644  inline bool is_dict_encoded_type() const {
645  return is_dict_encoded_string() ||
647  }
648 
649  inline bool is_dict_intersection() const { return dict_intersection; }
650 
651  inline bool has_same_itemtype(const SQLTypeInfo& other) const {
652  if ((is_column() || is_column_list()) &&
653  (other.is_column() || other.is_column_list())) {
654  return subtype == other.get_subtype() &&
656  compression == other.get_compression());
657  }
658  return subtype == other.get_subtype();
659  }
660 
661  HOST DEVICE inline bool operator!=(const SQLTypeInfo& rhs) const {
662  return type != rhs.get_type() || subtype != rhs.get_subtype() ||
663  dimension != rhs.get_dimension() || scale != rhs.get_scale() ||
664  compression != rhs.get_compression() ||
667  notnull != rhs.get_notnull() || dict_key_ != rhs.dict_key_;
668  }
669  HOST DEVICE inline bool operator==(const SQLTypeInfo& rhs) const {
670  return type == rhs.get_type() && subtype == rhs.get_subtype() &&
671  dimension == rhs.get_dimension() && scale == rhs.get_scale() &&
672  compression == rhs.get_compression() &&
675  notnull == rhs.get_notnull() && dict_key_ == rhs.dict_key_;
676  }
677 
678  inline int get_array_context_logical_size() const {
679  if (is_string()) {
680  auto comp_type(get_compression());
681  if (comp_type == kENCODING_DICT || comp_type == kENCODING_FIXED ||
682  comp_type == kENCODING_NONE) {
683  return sizeof(int32_t);
684  }
685  }
686  return get_logical_size();
687  }
688 
689  HOST DEVICE inline void operator=(const SQLTypeInfo& rhs) {
690  type = rhs.get_type();
691  subtype = rhs.get_subtype();
692  dimension = rhs.get_dimension();
693  scale = rhs.get_scale();
694  notnull = rhs.get_notnull();
696  comp_param = rhs.get_comp_param();
697  size = rhs.get_size();
698  dict_key_ = rhs.dict_key_;
699  }
700 
701  inline bool is_castable(const SQLTypeInfo& new_type_info) const {
702  // can always cast between the same type but different precision/scale/encodings
703  if (type == new_type_info.get_type()) {
704  return true;
705  // can always cast between strings
706  } else if (is_string() && new_type_info.is_string()) {
707  return true;
708  } else if (is_string() && !new_type_info.is_string()) {
709  return false;
710  } else if (!is_string() && new_type_info.is_string()) {
711  return true;
712  // can cast between numbers
713  } else if (is_number() && new_type_info.is_number()) {
714  return true;
715  // can cast from timestamp or date to number (epoch)
716  } else if ((type == kTIMESTAMP || type == kDATE) && new_type_info.is_number()) {
717  return true;
718  // can cast from number (epoch) to timestamp, date, or time
719  } else if (is_number() && new_type_info.is_time()) {
720  return true;
721  // can cast from date to timestamp
722  } else if (type == kDATE && new_type_info.get_type() == kTIMESTAMP) {
723  return true;
724  } else if (type == kTIMESTAMP && new_type_info.get_type() == kDATE) {
725  return true;
726  } else if (type == kTIMESTAMP && new_type_info.get_type() == kTIME) {
727  return true;
728  } else if (type == kBOOLEAN && new_type_info.is_number()) {
729  return true;
730  } else if (type == kARRAY && new_type_info.get_type() == kARRAY) {
731  return get_elem_type().is_castable(new_type_info.get_elem_type());
732  } else if (type == kCOLUMN && new_type_info.get_type() == kCOLUMN) {
733  return get_elem_type().is_castable(new_type_info.get_elem_type());
734  } else if (type == kCOLUMN_LIST && new_type_info.get_type() == kCOLUMN_LIST) {
735  return get_elem_type().is_castable(new_type_info.get_elem_type());
736  } else {
737  return false;
738  }
739  }
740 
749  inline bool is_numeric_scalar_auto_castable(const SQLTypeInfo& new_type_info) const {
750  const auto& new_type = new_type_info.get_type();
751  switch (type) {
752  case kBOOLEAN:
753  return new_type == kBOOLEAN;
754  case kTINYINT:
755  case kSMALLINT:
756  case kINT:
757  if (!new_type_info.is_number()) {
758  return false;
759  }
760  if (new_type_info.is_fp()) {
761  // We can lose precision here, but preserving existing behavior
762  return true;
763  }
764  return new_type_info.get_logical_size() >= get_logical_size();
765  case kBIGINT:
766  return new_type == kBIGINT || new_type == kDOUBLE || new_type == kFLOAT;
767  case kFLOAT:
768  case kDOUBLE:
769  if (!new_type_info.is_fp()) {
770  return false;
771  }
772  return (new_type_info.get_logical_size() >= get_logical_size());
773  case kDECIMAL:
774  case kNUMERIC:
775  switch (new_type) {
776  case kDECIMAL:
777  case kNUMERIC:
778  return new_type_info.get_dimension() >= get_dimension();
779  case kDOUBLE:
780  return true;
781  case kFLOAT:
782  return get_dimension() <= 7;
783  default:
784  return false;
785  }
786  case kTIMESTAMP:
787  if (new_type != kTIMESTAMP) {
788  return false;
789  }
790  return new_type_info.get_dimension() >= get_dimension();
791  case kDATE:
792  return new_type == kDATE;
793  case kTIME:
794  return new_type == kTIME;
795  default:
796  UNREACHABLE();
797  return false;
798  }
799  }
800 
810  inline int32_t get_numeric_scalar_scale() const {
811  CHECK(type == kBOOLEAN || type == kTINYINT || type == kSMALLINT || type == kINT ||
812  type == kBIGINT || type == kFLOAT || type == kDOUBLE || type == kDECIMAL ||
813  type == kNUMERIC || type == kTIMESTAMP || type == kDATE || type == kTIME);
814  switch (type) {
815  case kBOOLEAN:
816  return 1;
817  case kTINYINT:
818  case kSMALLINT:
819  case kINT:
820  case kBIGINT:
821  case kFLOAT:
822  case kDOUBLE:
823  return get_logical_size();
824  case kDECIMAL:
825  case kNUMERIC:
826  if (get_dimension() > 7) {
827  return 8;
828  } else {
829  return 4;
830  }
831  case kTIMESTAMP:
832  switch (get_dimension()) {
833  case 9:
834  return 8;
835  case 6:
836  return 4;
837  case 3:
838  return 2;
839  case 0:
840  return 1;
841  default:
842  UNREACHABLE();
843  }
844  case kDATE:
845  return 1;
846  case kTIME:
847  return 1;
848  default:
849  UNREACHABLE();
850  return 0;
851  }
852  }
853 
854  HOST DEVICE inline bool is_null(const Datum& d) const {
855  // assuming Datum is always uncompressed
856  switch (type) {
857  case kBOOLEAN:
858  return (int8_t)d.boolval == NULL_BOOLEAN;
859  case kTINYINT:
860  return d.tinyintval == NULL_TINYINT;
861  case kSMALLINT:
862  return d.smallintval == NULL_SMALLINT;
863  case kINT:
864  return d.intval == NULL_INT;
865  case kBIGINT:
866  case kNUMERIC:
867  case kDECIMAL:
868  return d.bigintval == NULL_BIGINT;
869  case kFLOAT:
870  return d.floatval == NULL_FLOAT;
871  case kDOUBLE:
872  return d.doubleval == NULL_DOUBLE;
873  case kTIME:
874  case kTIMESTAMP:
875  case kDATE:
876  return d.bigintval == NULL_BIGINT;
877  case kTEXT:
878  case kVARCHAR:
879  case kCHAR:
880  // @TODO handle null strings
881  break;
882  case kNULLT:
883  return true;
884  case kARRAY:
885  return d.arrayval == NULL || d.arrayval->is_null;
886  default:
887  break;
888  }
889  return false;
890  }
891  HOST DEVICE inline bool is_null(const int8_t* val) const {
892  if (type == kFLOAT) {
893  return *(float*)val == NULL_FLOAT;
894  }
895  if (type == kDOUBLE) {
896  return *(double*)val == NULL_DOUBLE;
897  }
898  // val can be either compressed or uncompressed
899  switch (size) {
900  case 1:
901  return *val == NULL_TINYINT;
902  case 2:
903  return *(int16_t*)val == NULL_SMALLINT;
904  case 4:
905  return *(int32_t*)val == NULL_INT;
906  case 8:
907  return *(int64_t*)val == NULL_BIGINT;
908  case kNULLT:
909  return true;
910  default:
911  // @TODO(wei) handle null strings
912  break;
913  }
914  return false;
915  }
916  HOST DEVICE inline bool is_null_fixlen_array(const int8_t* val, int array_size) const {
917  // Check if fixed length array has a NULL_ARRAY sentinel as the first element
918  if (type == kARRAY && val && array_size > 0 && array_size == size) {
919  // Need to create element type to get the size, but can't call get_elem_type()
920  // since this is a HOST DEVICE function. Going through copy constructor instead.
921  auto elem_ti{*this};
922  elem_ti.set_type(subtype);
923  elem_ti.set_subtype(kNULLT);
924  auto elem_size = elem_ti.get_storage_size();
925  if (elem_size < 1) {
926  return false;
927  }
928  if (subtype == kFLOAT) {
929  return *(float*)val == NULL_ARRAY_FLOAT;
930  }
931  if (subtype == kDOUBLE) {
932  return *(double*)val == NULL_ARRAY_DOUBLE;
933  }
934  switch (elem_size) {
935  case 1:
936  return *val == NULL_ARRAY_TINYINT;
937  case 2:
938  return *(int16_t*)val == NULL_ARRAY_SMALLINT;
939  case 4:
940  return *(int32_t*)val == NULL_ARRAY_INT;
941  case 8:
942  return *(int64_t*)val == NULL_ARRAY_BIGINT;
943  default:
944  return false;
945  }
946  }
947  return false;
948  }
949  HOST DEVICE inline bool is_null_point_coord_array(const int8_t* val,
950  int array_size) const {
951  if (type == kARRAY && subtype == kTINYINT && val && array_size > 0 &&
952  array_size == size) {
953  if (array_size == 2 * sizeof(double)) {
954  return *(double*)val == NULL_ARRAY_DOUBLE;
955  }
956  if (array_size == 2 * sizeof(int32_t)) {
957  return *(uint32_t*)val == NULL_ARRAY_COMPRESSED_32;
958  }
959  }
960  return false;
961  }
962 
963  inline SQLTypeInfo get_elem_type() const {
964  SQLTypeInfo type_info = *this;
965  if ((type == kCOLUMN || type == kCOLUMN_LIST) && compression == kENCODING_ARRAY) {
966  type_info.set_type(kARRAY);
967  type_info.set_compression(kENCODING_NONE);
968  } else if ((type == kCOLUMN || type == kCOLUMN_LIST) &&
970  type_info.set_type(kARRAY);
971  type_info.set_compression(kENCODING_DICT);
972  } else {
973  type_info.set_type(subtype);
974  type_info.set_subtype(kNULLT);
975  }
976  type_info.setStorageSize();
977  return type_info;
978  }
979 
980  inline SQLTypeInfo get_array_type() const {
981  SQLTypeInfo type_info = *this;
982  type_info.set_type(kARRAY);
983  type_info.set_subtype(type);
984  type_info.setStorageSize();
985  return type_info;
986  }
987 
988  inline bool is_date_in_days() const {
989  if (type == kDATE) {
990  const auto comp_type = get_compression();
991  if (comp_type == kENCODING_DATE_IN_DAYS) {
992  return true;
993  }
994  }
995  return false;
996  }
997 
998  inline bool is_date() const { return type == kDATE; }
999 
1000  inline bool is_time_or_date() const {
1001  return type == kDATE || type == kTIME || type == kTIMESTAMP;
1002  }
1003 
1004  inline bool is_high_precision_timestamp() const {
1005  if (type == kTIMESTAMP) {
1006  const auto dimension = get_dimension();
1007  if (dimension > 0) {
1008  return true;
1009  }
1010  }
1011  return false;
1012  }
1013 
1014  inline bool is_timestamp() const { return type == kTIMESTAMP; }
1015  inline bool is_encoded_timestamp() const {
1016  return is_timestamp() && compression == kENCODING_FIXED;
1017  }
1018 
1020 
1022  // If comp_param is set, it should equal dict_id.
1024  return dict_key_;
1025  }
1026 
1027  void setStringDictKey(const shared::StringDictKey& dict_key) {
1028  dict_key_ = dict_key;
1029  // If comp_param is set, it should equal dict_id.
1031  }
1032 
1033  private:
1034  SQLTypes type; // type id
1035  SQLTypes subtype; // element type of arrays or columns
1036  int dimension; // VARCHAR/CHAR length or NUMERIC/DECIMAL precision or COLUMN_LIST
1037  // length or TIMESTAMP precision
1038  int scale; // NUMERIC/DECIMAL scale
1039  bool notnull; // nullable? a hint, not used for type checking
1040  EncodingType compression; // compression scheme
1041  int comp_param; // compression parameter when applicable for certain schemes
1042  int size; // size of the type in bytes. -1 for variable size
1043  bool dict_intersection{false};
1044 #ifndef __CUDACC__
1045  static std::string type_name[kSQLTYPE_LAST];
1046  static std::string comp_name[kENCODING_LAST];
1047 #endif
1049  HOST DEVICE inline int get_storage_size() const {
1050  switch (type) {
1051  case kBOOLEAN:
1052  return sizeof(int8_t);
1053  case kTINYINT:
1054  return sizeof(int8_t);
1055  case kSMALLINT:
1056  switch (compression) {
1057  case kENCODING_NONE:
1058  return sizeof(int16_t);
1059  case kENCODING_FIXED:
1060  case kENCODING_SPARSE:
1061  return comp_param / 8;
1062  case kENCODING_RL:
1063  case kENCODING_DIFF:
1064  break;
1065  default:
1066  assert(false);
1067  }
1068  break;
1069  case kINT:
1070  switch (compression) {
1071  case kENCODING_NONE:
1072  return sizeof(int32_t);
1073  case kENCODING_FIXED:
1074  case kENCODING_SPARSE:
1075  case kENCODING_GEOINT:
1076  return comp_param / 8;
1077  case kENCODING_RL:
1078  case kENCODING_DIFF:
1079  break;
1080  default:
1081  assert(false);
1082  }
1083  break;
1084  case kBIGINT:
1085  case kNUMERIC:
1086  case kDECIMAL:
1087  switch (compression) {
1088  case kENCODING_NONE:
1089  return sizeof(int64_t);
1090  case kENCODING_FIXED:
1091  case kENCODING_SPARSE:
1092  return comp_param / 8;
1093  case kENCODING_RL:
1094  case kENCODING_DIFF:
1095  break;
1096  default:
1097  assert(false);
1098  }
1099  break;
1100  case kFLOAT:
1101  switch (compression) {
1102  case kENCODING_NONE:
1103  return sizeof(float);
1104  case kENCODING_FIXED:
1105  case kENCODING_RL:
1106  case kENCODING_DIFF:
1107  case kENCODING_SPARSE:
1108  assert(false);
1109  break;
1110  default:
1111  assert(false);
1112  }
1113  break;
1114  case kDOUBLE:
1115  switch (compression) {
1116  case kENCODING_NONE:
1117  return sizeof(double);
1118  case kENCODING_FIXED:
1119  case kENCODING_RL:
1120  case kENCODING_DIFF:
1121  case kENCODING_SPARSE:
1122  assert(false);
1123  break;
1124  default:
1125  assert(false);
1126  }
1127  break;
1128  case kTIMESTAMP:
1129  case kTIME:
1130  case kINTERVAL_DAY_TIME:
1131  case kINTERVAL_YEAR_MONTH:
1132  case kDATE:
1133  switch (compression) {
1134  case kENCODING_NONE:
1135  return sizeof(int64_t);
1136  case kENCODING_FIXED:
1137  if (type == kTIMESTAMP && dimension > 0) {
1138  assert(false); // disable compression for timestamp precisions
1139  }
1140  return comp_param / 8;
1141  case kENCODING_RL:
1142  case kENCODING_DIFF:
1143  case kENCODING_SPARSE:
1144  assert(false);
1145  break;
1147  switch (comp_param) {
1148  case 0:
1149  return 4; // Default date encoded in days is 32 bits
1150  case 16:
1151  case 32:
1152  return comp_param / 8;
1153  default:
1154  assert(false);
1155  break;
1156  }
1157  default:
1158  assert(false);
1159  }
1160  break;
1161  case kTEXT:
1162  case kVARCHAR:
1163  case kCHAR:
1164  if (compression == kENCODING_DICT) {
1165  return sizeof(int32_t); // @TODO(wei) must check DictDescriptor
1166  }
1167  break;
1168  case kARRAY:
1169  // TODO: return size for fixlen arrays?
1170  break;
1171  case kPOINT:
1172  case kMULTIPOINT:
1173  case kLINESTRING:
1174  case kMULTILINESTRING:
1175  case kPOLYGON:
1176  case kMULTIPOLYGON:
1177  case kCOLUMN:
1178  case kCOLUMN_LIST:
1179  break;
1180  default:
1181  break;
1182  }
1183  return -1;
1184  }
1185 };
1186 
1188 
1190 
1191 #ifndef __CUDACC__
1192 #include <string_view>
1193 
1194 Datum NullDatum(const SQLTypeInfo& ti);
1195 bool IsNullDatum(const Datum d, const SQLTypeInfo& ti);
1196 Datum StringToDatum(const std::string_view s, SQLTypeInfo& ti);
1197 std::string DatumToString(const Datum d, const SQLTypeInfo& ti);
1198 int64_t extract_int_type_from_datum(const Datum datum, const SQLTypeInfo& ti);
1199 double extract_fp_type_from_datum(const Datum datum, const SQLTypeInfo& ti);
1200 bool DatumEqual(const Datum, const Datum, const SQLTypeInfo& ti);
1201 int64_t convert_decimal_value_to_scale(const int64_t decimal_value,
1202  const SQLTypeInfo& type_info,
1203  const SQLTypeInfo& new_type_info);
1204 #endif
1205 
1206 #ifdef HAVE_TOSTRING
1207 inline std::ostream& operator<<(std::ostream& os, const SQLTypeInfo& type_info) {
1208  os << toString(type_info);
1209  return os;
1210 }
1211 #endif
1212 
1213 #include "../QueryEngine/DateAdd.h"
1214 #include "../QueryEngine/DateTruncate.h"
1215 #include "../QueryEngine/ExtractFromTime.h"
1216 
1217 inline SQLTypes get_int_type_by_size(size_t const nbytes) {
1218  switch (nbytes) {
1219  case 1:
1220  return kTINYINT;
1221  case 2:
1222  return kSMALLINT;
1223  case 4:
1224  return kINT;
1225  case 8:
1226  return kBIGINT;
1227  default:
1228 #if !(defined(__CUDACC__) || defined(NO_BOOST))
1229  UNREACHABLE() << "Invalid number of bytes=" << nbytes;
1230 #endif
1231  return {};
1232  }
1233 }
1234 
1236  EncodingType encoding = type_info.get_compression();
1237  if (encoding == kENCODING_DATE_IN_DAYS ||
1238  (encoding == kENCODING_FIXED && type_info.get_type() != kARRAY)) {
1239  encoding = kENCODING_NONE;
1240  }
1241  auto type_info_copy = type_info;
1242  type_info_copy.set_compression(encoding);
1243  type_info_copy.setStorageSize();
1244  return type_info_copy;
1245 }
1246 
1248  SQLTypeInfo nullable_type_info = type_info;
1249  nullable_type_info.set_notnull(false);
1250  return nullable_type_info;
1251 }
1252 
1254  SQLTypeInfo nullable_type_info = get_logical_type_info(type_info);
1255  return get_nullable_type_info(nullable_type_info);
1256 }
1257 
1258 using StringOffsetT = int32_t;
1259 using ArrayOffsetT = int32_t;
1260 
1261 int8_t* append_datum(int8_t* buf, const Datum& d, const SQLTypeInfo& ti);
1262 
1263 // clang-format off
1264 /*
1265 
1266 A note on representing collection types using SQLTypeInfo
1267 =========================================================
1268 
1269 In general, a collection type is a type of collection of items. A
1270 collection can be an array, a column, or a column list. A column list
1271 is as collection of columns that have the same item type. An item can
1272 be of scalar type (bool, integers, floats, text encoding dict's, etc)
1273 or of collection type (array of scalars, column of scalars, column of
1274 array of scalars).
1275 
1276 SQLTypeInfo provides a structure to represent both item and collection
1277 types using the following list of attributes:
1278  SQLTypes type
1279  SQLTypes subtype
1280  int dimension
1281  int scale
1282  bool notnull
1283  EncodingType compression
1284  int comp_param
1285  int size
1286 
1287 To represent a particular type, not all attributes are used. However,
1288 there may exists multiple ways to represent the same type using
1289 various combinations of these attributes and this note can be used as
1290 a guideline to how to represent a newly introduced collection type
1291 using the SQLTypeInfo structure.
1292 
1293 Scalar types
1294 ------------
1295 
1296 - Scalar types are booleans, integers, and floats that are defined
1297  by type and size attributes,
1298 
1299  SQLTypeInfo(type=kSCALAR)
1300 
1301  where SCALAR is in {BOOL, BIGINT, INT, SMALLINT, TINYINT, DOUBLE,
1302  FLOAT} while the corresponding size is specified in
1303  get_storage_size(). For example, SQLTypeInfo(type=kFLOAT)
1304  represents FLOAT and its size is implemented as 4 in the
1305  get_storage_size() method,
1306 
1307 - Text encoding dict (as defined as index and string dictionary) is
1308  represented as a 32-bit integer value and its type is specified as
1309 
1310  SQLTypeInfo(type=kTEXT, compression=kENCODING_DICT, comp_param=<dict id>)
1311 
1312  and size is defined as 4 by get_storage_size().
1313 
1314 Collection types
1315 ----------------
1316 
1317 - The type of a varlen array of scalar items is specified as
1318 
1319  SQLTypeInfo(type=kARRAY, subtype=kSCALAR)
1320 
1321  and size is defined as -1 by get_storage_size() which can be interpreted as N/A.
1322 
1323 - The type of a varlen array of text encoding dict is specified as
1324 
1325  SQLTypeInfo(type=kARRAY, subtype=kTEXT, compression=kENCODING_DICT, comp_param=<dict id>)
1326 
1327  Notice that the compression and comp_param attributes apply to
1328  subtype rather than to type. This quirk exemplifies the fact that
1329  SQLTypeInfo provides limited ability to support composite types.
1330 
1331 - Similarly, the types of a column of scalar and text encoded dict
1332  items are specified as
1333 
1334  SQLTypeInfo(type=kCOLUMN, subtype=kSCALAR)
1335 
1336  and
1337 
1338  SQLTypeInfo(type=kCOLUMN, subtype=kTEXT, compression=kENCODING_DICT, comp_param=<dict id>)
1339 
1340  respectively.
1341 
1342 - The type of column list with scalar items is specified as
1343 
1344  SQLTypeInfo(type=kCOLUMN_LIST, subtype=kSCALAR, dimension=<nof columns>)
1345 
1346  WARNING: Column list with items that type use compression (such as
1347  TIMESTAMP), cannot be supported! See QE-427.
1348 
1349 - The type of column list with text encoded dict items is specified as
1350 
1351  SQLTypeInfo(type=kCOLUMN_LIST, subtype=kTEXT, compression=kENCODING_DICT, dimension=<nof columns>)
1352 
1353 - The type of a column of arrays of scalar items is specified as
1354 
1355  SQLTypeInfo(type=kCOLUMN, subtype=kSCALAR, compression=kENCODING_ARRAY)
1356 
1357  Notice that the "a collection of collections of items" is specified
1358  by introducing a new compression scheme that descibes the
1359  "collections" part while the subtype attribute specifies the type of
1360  items.
1361 
1362 - The type of a column of arrays of text encoding dict items is specified as
1363 
1364  SQLTypeInfo(type=kCOLUMN, subtype=kTEXT, compression=kENCODING_ARRAY_DICT, comp_param=<dict id>)
1365 
1366  where the compression attribute kENCODING_ARRAY_DICT carries two
1367  pieces of information: (i) the items type is dict encoded string and
1368  (ii) the type represents a "column of arrays".
1369 
1370 
1371 - The type of a column list of arrays of scalar items is specified as
1372 
1373  SQLTypeInfo(type=kCOLUMN_LIST, subtype=kSCALAR, compression=kENCODING_ARRAY, dimension=<nof columns>)
1374 
1375 - The type of a column list of arrays of text encoding dict items is specified as
1376 
1377  SQLTypeInfo(type=kCOLUMN_LIST, subtype=kTEXT, compression=kENCODING_ARRAY_DICT, comp_param=<dict id>, dimension=<nof columns>)
1378 
1379  that is the most complicated currently supported type of "a
1380  collection(=list) of collections(=columns) of collections(=arrays)
1381  of items(=text)" with a specified compression scheme and comp_param
1382  attributes.
1383 
1384 */
1385 // clang-format on
1386 
1387 inline auto generate_column_type(const SQLTypeInfo& elem_ti) {
1388  SQLTypes elem_type = elem_ti.get_type();
1389  if (elem_type == kCOLUMN) {
1390  if (elem_ti.get_subtype() == kVARCHAR) {
1391  auto new_elem_ti = elem_ti;
1392  new_elem_ti.set_subtype(kTEXT);
1393  return new_elem_ti;
1394  }
1395  return elem_ti;
1396  }
1397  auto c = elem_ti.get_compression();
1398  auto d = elem_ti.get_dimension();
1399  auto p = elem_ti.get_comp_param();
1400  switch (elem_type) {
1401  case kBOOLEAN:
1402  case kTINYINT:
1403  case kSMALLINT:
1404  case kINT:
1405  case kBIGINT:
1406  case kFLOAT:
1407  case kDOUBLE:
1408  if (c == kENCODING_NONE && p == 0) {
1409  break; // here and below `break` means supported element type
1410  // for extension functions
1411  }
1412  case kTEXT:
1413  case kVARCHAR:
1414  elem_type = kTEXT;
1415  if (c == kENCODING_DICT) {
1416  break;
1417  }
1418  case kTIMESTAMP:
1419  if (c == kENCODING_NONE && p == 0 && (d == 9 || d == 6 || d == 0)) {
1420  break;
1421  }
1422  case kARRAY:
1423  elem_type = elem_ti.get_subtype();
1424  if (IS_NUMBER(elem_type) || elem_type == kBOOLEAN || elem_type == kTEXT) {
1425  if (c == kENCODING_NONE && p == 0) {
1426  c = kENCODING_ARRAY;
1427  break;
1428  } else if (c == kENCODING_DICT && p != 0) {
1430  break;
1431  }
1432  }
1433  default:
1434  elem_type = kNULLT; // indicates unsupported element type that
1435  // the caller needs to handle accordingly
1436  }
1437  auto ti = SQLTypeInfo(kCOLUMN, c, p, elem_type);
1438  ti.set_dimension(d);
1439  if (c == kENCODING_DICT) {
1440  ti.setStringDictKey(elem_ti.getStringDictKey());
1441  }
1442  return ti;
1443 }
1444 
1445 inline auto generate_column_list_type(const SQLTypeInfo& elem_ti) {
1446  auto type_info = generate_column_type(elem_ti);
1447  if (type_info.get_subtype() != kNULLT) {
1448  type_info.set_type(kCOLUMN_LIST);
1449  }
1450  if (type_info.get_subtype() == kTIMESTAMP) {
1451  // ColumnList<Timestamp> is not supported, see QE-472
1452  type_info.set_subtype(kNULLT);
1453  }
1454  return type_info;
1455 }
1456 
1457 // SQLTypeInfo-friendly interface to FlatBuffer:
1458 
1459 #include "../QueryEngine/Utils/FlatBuffer.h"
1460 
1461 inline int64_t getVarlenArrayBufferSize(int64_t items_count,
1462  int64_t max_nof_values,
1463  const SQLTypeInfo& ti) {
1464  CHECK(ti.is_array());
1465  const size_t array_item_size = ti.get_elem_type().get_size();
1466  if (ti.is_text_encoding_dict_array()) {
1468  items_count,
1469  max_nof_values,
1470  array_item_size,
1471  FlatBufferManager::DTypeMetadataKind::SIZE_DICTID);
1472  } else {
1474  items_count,
1475  max_nof_values,
1476  array_item_size,
1477  FlatBufferManager::DTypeMetadataKind::SIZE);
1478  }
1479 }
1480 
1482  int64_t items_count,
1483  int64_t max_nof_values,
1484  const SQLTypeInfo& ti) {
1485  CHECK(ti.is_array());
1486  const size_t array_item_size = ti.get_elem_type().get_size();
1487  if (ti.is_text_encoding_dict_array()) {
1488  m.initializeVarlenArray(items_count,
1489  max_nof_values,
1490  array_item_size,
1491  FlatBufferManager::DTypeMetadataKind::SIZE_DICTID);
1492  const auto& dict_key = ti.getStringDictKey();
1493  m.setDTypeMetadataDictKey(dict_key.db_id, dict_key.dict_id);
1494  } else {
1495  m.initializeVarlenArray(items_count,
1496  max_nof_values,
1497  array_item_size,
1498  FlatBufferManager::DTypeMetadataKind::SIZE);
1499  }
1500 }
1501 
1502 // ChunkIter_get_nth variant for array buffers using FlatBuffer storage schema:
1503 DEVICE inline void VarlenArray_get_nth(int8_t* buf,
1504  int n,
1505  ArrayDatum* result,
1506  bool* is_end) {
1507  FlatBufferManager m{buf};
1508  auto status = m.getItem(n, result->length, result->pointer, result->is_null);
1509  if (status == FlatBufferManager::Status::IndexError) {
1510  *is_end = true;
1511  result->length = 0;
1512  result->pointer = NULL;
1513  result->is_null = true;
1514  } else {
1515  *is_end = false;
1516 #ifndef __CUDACC__
1517  CHECK_EQ(status, FlatBufferManager::Status::Success);
1518 #endif
1519  }
1520 }
1521 
1523  size_t index;
1526 };
int8_t tinyintval
Definition: Datum.h:69
void initializeVarlenArray(int64_t items_count, int64_t max_nof_values, int64_t dtype_size, DTypeMetadataKind dtype_metadata_kind)
Definition: FlatBuffer.h:363
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:382
void set_compression(EncodingType c)
Definition: sqltypes.h:504
void set_size(int s)
Definition: sqltypes.h:501
static constexpr int32_t kMaxRepresentableNumericPrecision
Definition: sqltypes.h:50
#define CHECK_EQ(x, y)
Definition: Logger.h:301
#define NULL_DOUBLE
HOST DEVICE int get_size() const
Definition: sqltypes.h:393
HOST DEVICE void operator=(const SQLTypeInfo &rhs)
Definition: sqltypes.h:689
shared::StringDictKey dict_key_
Definition: sqltypes.h:1048
int8_t * append_datum(int8_t *buf, const Datum &d, const SQLTypeInfo &ti)
Definition: Datum.cpp:578
std::string DatumToString(Datum d, const SQLTypeInfo &ti)
Definition: Datum.cpp:458
bool is_varlen_array() const
Definition: sqltypes.h:589
static constexpr int32_t kMaxNumericPrecision
Definition: sqltypes.h:48
Definition: sqltypes.h:66
bool is_text_encoding_dict_array() const
Definition: sqltypes.h:609
DEVICE constexpr bool is_cuda_compiler()
Definition: sqltypes.h:210
SQLTypes
Definition: sqltypes.h:55
std::vector< std::string > * stringsPtr
Definition: sqltypes.h:224
bool is_timestamp() const
Definition: sqltypes.h:1014
std::vector< ArrayDatum > * arraysPtr
Definition: sqltypes.h:225
#define NULL_ARRAY_INT
bool is_column_list_array() const
Definition: sqltypes.h:599
#define NULL_FLOAT
bool is_null
Definition: Datum.h:55
#define NULL_BIGINT
bool is_time_or_date() const
Definition: sqltypes.h:1000
SQLTypeInfo get_nullable_logical_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:1253
#define LOG(tag)
Definition: Logger.h:285
HOST DEVICE bool operator==(const SQLTypeInfo &rhs) const
Definition: sqltypes.h:669
std::ostream & operator<<(std::ostream &os, const SessionInfo &session_info)
Definition: SessionInfo.cpp:57
bool is_fp() const
Definition: sqltypes.h:584
HOST DEVICE int get_scale() const
Definition: sqltypes.h:386
bool is_varlen() const
Definition: sqltypes.h:620
#define NULL_ARRAY_SMALLINT
int8_t boolval
Definition: Datum.h:68
std::string get_compression_name() const
Definition: sqltypes.h:545
static int64_t get_VarlenArray_flatbuffer_size(int64_t items_count, int64_t max_nof_values, int64_t dtype_size, DTypeMetadataKind dtype_metadata_kind)
Definition: FlatBuffer.h:343
VarlenDatum * arrayval
Definition: Datum.h:75
#define UNREACHABLE()
Definition: Logger.h:337
HOST DEVICE void set_subtype(SQLTypes st)
Definition: sqltypes.h:494
Definitions for core Datum union type.
SQLTypeInfo(SQLTypes t, int d, int s)
Definition: sqltypes.h:351
SQLTypeInfo get_logical_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:1235
Definition: sqltypes.h:82
#define NULL_ARRAY_TINYINT
HOST DEVICE bool is_null_fixlen_array(const int8_t *val, int array_size) const
Definition: sqltypes.h:916
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:381
bool is_number() const
Definition: sqltypes.h:585
int32_t intval
Definition: Datum.h:71
bool is_time() const
Definition: sqltypes.h:586
std::string to_string(char const *&&v)
HostArrayDatum(size_t const l, int8_t *p, bool const n, CUSTOM_DELETER custom_deleter)
Definition: sqltypes.h:200
#define NULL_INT
SQLTypeInfo ti
Definition: sqltypes.h:1524
int32_t StringOffsetT
Definition: sqltypes.h:1258
bool has_render_group() const
Definition: sqltypes.h:483
#define DEVICE
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:219
#define HOST
void set_input_srid(int d)
Definition: sqltypes.h:497
float floatval
Definition: Datum.h:73
std::string to_string() const
Definition: sqltypes.h:547
EncodingType
Definition: sqltypes.h:230
int get_physical_cols() const
Definition: sqltypes.h:414
bool is_fixlen_array() const
Definition: sqltypes.h:590
HOST DEVICE Status getItem(int64_t index, int64_t &size, int8_t *&dest, bool &is_null)
Definition: FlatBuffer.h:633
bool is_castable(const SQLTypeInfo &new_type_info) const
Definition: sqltypes.h:701
#define IS_INTERVAL(T)
Definition: sqltypes.h:303
void set_fixed_size()
Definition: sqltypes.h:502
std::shared_ptr< int8_t > ManagedPtr
Definition: sqltypes.h:181
HOST DEVICE bool operator!=(const SQLTypeInfo &rhs) const
Definition: sqltypes.h:661
int get_logical_size() const
Definition: sqltypes.h:403
bool DatumEqual(const Datum a, const Datum b, const SQLTypeInfo &ti)
Definition: Datum.cpp:406
static std::string type_name[kSQLTYPE_LAST]
Definition: sqltypes.h:1045
bool is_integer() const
Definition: sqltypes.h:582
bool is_subtype_dict_encoded_string() const
Definition: sqltypes.h:640
int64_t extract_int_type_from_datum(const Datum datum, const SQLTypeInfo &ti)
Definition: Datum.cpp:521
bool is_column_array() const
Definition: sqltypes.h:595
#define NULL_ARRAY_COMPRESSED_32
SQLTypes subtype
Definition: sqltypes.h:1035
bool has_same_itemtype(const SQLTypeInfo &other) const
Definition: sqltypes.h:651
bool is_text_encoding_dict() const
Definition: sqltypes.h:606
void set_scale(int s)
Definition: sqltypes.h:498
bool notnull
Definition: sqltypes.h:1039
int64_t bigintval
Definition: Datum.h:72
bool has_bounds() const
Definition: sqltypes.h:470
HostArrayDatum(size_t const l, int8_t *p, CUSTOM_DELETER custom_deleter)
Definition: sqltypes.h:194
bool is_timeinterval() const
Definition: sqltypes.h:591
#define NULL_ARRAY_FLOAT
void setStorageSize()
Definition: sqltypes.h:1019
bool is_numeric_scalar_auto_castable(const SQLTypeInfo &new_type_info) const
returns true if the sql_type can be cast to the type specified by new_type_info with no loss of preci...
Definition: sqltypes.h:749
ManagedPtr data_ptr
Definition: sqltypes.h:203
int is_logical_geo_type() const
Definition: sqltypes.h:395
HostArrayDatum()=default
int16_t smallintval
Definition: Datum.h:70
bool is_dict_intersection() const
Definition: sqltypes.h:649
bool is_dict_encoded_type() const
Definition: sqltypes.h:644
Datum StringToDatum(const std::string_view s, SQLTypeInfo &ti)
Definition: Datum.cpp:337
SQLTypeInfo(SQLTypes t, int d, int s, bool n)
Definition: sqltypes.h:333
std::string toString(const ExecutorDeviceType &device_type)
std::string toString() const
Definition: sqltypes.h:546
bool is_boolean() const
Definition: sqltypes.h:587
HostArrayDatum(size_t const l, int8_t *p, bool const n)
Definition: sqltypes.h:188
void operator()(int8_t *p)
Definition: sqltypes.h:177
SQLTypeInfo(SQLTypes t, int d, int s, bool n, EncodingType c, int p, SQLTypes st)
Definition: sqltypes.h:324
SQLTypeInfo(SQLTypes t)
Definition: sqltypes.h:361
bool IsNullDatum(const Datum datum, const SQLTypeInfo &ti)
Definition: Datum.cpp:329
#define NULL_BOOLEAN
std::string get_buffer_name() const
Definition: sqltypes.h:560
SQLTypeInfo(SQLTypes t, bool n, EncodingType c)
Definition: sqltypes.h:362
SQLTypeInfo get_array_type() const
Definition: sqltypes.h:980
EncodingType compression
Definition: sqltypes.h:1040
Datum NullDatum(const SQLTypeInfo &ti)
Definition: Datum.cpp:286
int get_precision() const
Definition: sqltypes.h:384
void set_output_srid(int s)
Definition: sqltypes.h:499
bool is_buffer() const
Definition: sqltypes.h:612
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:559
bool is_column() const
Definition: sqltypes.h:593
DEVICE DeviceArrayDatum()
Definition: sqltypes.h:207
HOST DEVICE bool is_null(const Datum &d) const
Definition: sqltypes.h:854
auto generate_column_type(const SQLTypeInfo &elem_ti)
Definition: sqltypes.h:1387
void set_comp_param(int p)
Definition: sqltypes.h:505
HOST DEVICE int get_storage_size() const
Definition: sqltypes.h:1049
#define CHECK_LT(x, y)
Definition: Logger.h:303
Definition: sqltypes.h:69
Definition: sqltypes.h:70
bool dict_intersection
Definition: sqltypes.h:1043
static std::string comp_name[kENCODING_LAST]
Definition: sqltypes.h:1046
void initializeVarlenArray(FlatBufferManager &m, int64_t items_count, int64_t max_nof_values, const SQLTypeInfo &ti)
Definition: sqltypes.h:1481
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:389
bool is_date_in_days() const
Definition: sqltypes.h:988
int get_array_context_logical_size() const
Definition: sqltypes.h:678
int64_t convert_decimal_value_to_scale(const int64_t decimal_value, const SQLTypeInfo &type_info, const SQLTypeInfo &new_type_info)
Definition: Datum.cpp:622
auto generate_column_list_type(const SQLTypeInfo &elem_ti)
Definition: sqltypes.h:1445
int32_t ArrayOffsetT
Definition: sqltypes.h:1259
void set_dimension(int d)
Definition: sqltypes.h:495
void setStringDictKey(const shared::StringDictKey &dict_key)
Definition: sqltypes.h:1027
SQLTypes get_int_type_by_size(size_t const nbytes)
Definition: sqltypes.h:1217
bool is_none_encoded_string() const
Definition: sqltypes.h:636
HOST DEVICE int get_dimension() const
Definition: sqltypes.h:383
#define IS_INTEGER(T)
Definition: sqltypes.h:294
std::string get_type_name() const
Definition: sqltypes.h:507
int32_t get_numeric_scalar_scale() const
returns integer between 1 and 8 indicating what is roughly equivalent to the logical byte size of a s...
Definition: sqltypes.h:810
Definition: sqltypes.h:58
#define IS_STRING(T)
Definition: sqltypes.h:299
HOST DEVICE int get_comp_param() const
Definition: sqltypes.h:392
HOST DEVICE int get_input_srid() const
Definition: sqltypes.h:385
void set_dict_intersection()
Definition: sqltypes.h:503
#define NULL_TINYINT
#define NULL_ARRAY_DOUBLE
bool is_bytes() const
Definition: sqltypes.h:603
bool is_column_list() const
Definition: sqltypes.h:594
bool g_enable_watchdog false
Definition: Execute.cpp:79
int64_t getVarlenArrayBufferSize(int64_t items_count, int64_t max_nof_values, const SQLTypeInfo &ti)
Definition: sqltypes.h:1461
void set_notnull(bool n)
Definition: sqltypes.h:500
#define CHECK(condition)
Definition: Logger.h:291
bool is_geometry() const
Definition: sqltypes.h:592
bool is_encoded_timestamp() const
Definition: sqltypes.h:1015
bool is_high_precision_timestamp() const
Definition: sqltypes.h:1004
SQLTypes type
Definition: sqltypes.h:1034
#define NULL_SMALLINT
double extract_fp_type_from_datum(const Datum datum, const SQLTypeInfo &ti)
Definition: Datum.cpp:547
void setDTypeMetadataDictKey(int32_t db_id, int32_t dict_id)
Definition: FlatBuffer.h:300
bool is_any() const
Definition: sqltypes.h:577
HostArrayDatum(size_t const l, ManagedPtr p, bool const n)
Definition: sqltypes.h:185
#define NULL_ARRAY_BIGINT
bool is_dict_encoded_string() const
Definition: sqltypes.h:632
Definition: sqltypes.h:62
bool is_varlen_indeed() const
Definition: sqltypes.h:626
bool is_string() const
Definition: sqltypes.h:580
SQLTypeInfo(SQLTypes t, EncodingType c, int p, SQLTypes st)
Definition: sqltypes.h:342
constexpr double n
Definition: Utm.h:38
bool transforms() const
Definition: sqltypes.h:615
SQLTypeInfo(SQLTypes t, bool n)
Definition: sqltypes.h:352
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:388
int8_t * numbersPtr
Definition: sqltypes.h:223
bool is_string_array() const
Definition: sqltypes.h:581
size_t index
Definition: sqltypes.h:1523
Definition: Datum.h:67
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:963
bool is_decimal() const
Definition: sqltypes.h:583
int get_physical_coord_cols() const
Definition: sqltypes.h:433
#define IS_NUMBER(T)
Definition: sqltypes.h:296
void operator()(int8_t *)
Definition: sqltypes.h:174
#define IS_GEO(T)
Definition: sqltypes.h:300
#define TRANSIENT_DICT(ID)
Definition: sqltypes.h:312
int comp_param
Definition: sqltypes.h:1041
bool is_date() const
Definition: sqltypes.h:998
bool is_array() const
Definition: sqltypes.h:588
void set_precision(int d)
Definition: sqltypes.h:496
SQLTypeInfo get_nullable_type_info(const SQLTypeInfo &type_info)
Definition: sqltypes.h:1247
int dimension
Definition: sqltypes.h:1036
HOST DEVICE bool is_null_point_coord_array(const int8_t *val, int array_size) const
Definition: sqltypes.h:949
double doubleval
Definition: Datum.h:74
HOST DEVICE int get_output_srid() const
Definition: sqltypes.h:387
DEVICE void VarlenArray_get_nth(int8_t *buf, int n, ArrayDatum *result, bool *is_end)
Definition: sqltypes.h:1503
constexpr auto is_datetime(SQLTypes type)
Definition: sqltypes.h:315
const shared::StringDictKey & getStringDictKey() const
Definition: sqltypes.h:1021
HOST DEVICE bool is_null(const int8_t *val) const
Definition: sqltypes.h:891
SQLTypes string_dict_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:563
HOST DEVICE void set_type(SQLTypes t)
Definition: sqltypes.h:493