OmniSciDB  b28c0d5765
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
import_export::TypedImportBuffer Class Reference

#include <Importer.h>

+ Inheritance diagram for import_export::TypedImportBuffer:
+ Collaboration diagram for import_export::TypedImportBuffer:

Public Types

using OptionalStringVector = std::optional< std::vector< std::string >>
 

Public Member Functions

 TypedImportBuffer (const ColumnDescriptor *col_desc, StringDictionary *string_dict)
 
 ~TypedImportBuffer ()
 
void addBoolean (const int8_t v)
 
void addTinyint (const int8_t v)
 
void addSmallint (const int16_t v)
 
void addInt (const int32_t v)
 
void addBigint (const int64_t v)
 
void addFloat (const float v)
 
void addDouble (const double v)
 
void addString (const std::string_view v)
 
void addGeoString (const std::string_view v)
 
void addArray (const ArrayDatum &v)
 
OptionalStringVectoraddStringArray ()
 
void addStringArray (const OptionalStringVector &arr)
 
void addDictEncodedString (const std::vector< std::string > &string_vec)
 
void addDictEncodedStringArray (const std::vector< OptionalStringVector > &string_array_vec)
 
const SQLTypeInfogetTypeInfo () const
 
const ColumnDescriptorgetColumnDesc () const
 
StringDictionarygetStringDictionary () const
 
int8_t * getAsBytes () const
 
size_t getElementSize () const
 
std::vector< std::string > * getStringBuffer () const
 
std::vector< std::string > * getGeoStringBuffer () const
 
std::vector< ArrayDatum > * getArrayBuffer () const
 
std::vector
< OptionalStringVector > * 
getStringArrayBuffer () const
 
std::vector< ArrayDatum > * getStringArrayDictBuffer () const
 
int8_t * getStringDictBuffer () const
 
bool stringDictCheckpoint ()
 
void clear ()
 
size_t add_values (const ColumnDescriptor *cd, const TColumn &data)
 
size_t add_arrow_values (const ColumnDescriptor *cd, const arrow::Array &data, const bool exact_type_match, const ArraySliceRange &slice_range, BadRowsTracker *bad_rows_tracker)
 
void add_value (const ColumnDescriptor *cd, const std::string_view val, const bool is_null, const CopyParams &copy_params, const bool check_not_null=true)
 
void add_value (const ColumnDescriptor *cd, const TDatum &val, const bool is_null)
 
void addDefaultValues (const ColumnDescriptor *cd, size_t num_rows)
 
void pop_value ()
 
template<typename DATA_TYPE >
size_t convert_arrow_val_to_import_buffer (const ColumnDescriptor *cd, const arrow::Array &array, std::vector< DATA_TYPE > &buffer, const ArraySliceRange &slice_range, BadRowsTracker *const bad_rows_tracker)
 
template<typename DATA_TYPE >
auto del_values (std::vector< DATA_TYPE > &buffer, BadRowsTracker *const bad_rows_tracker)
 
auto del_values (const SQLTypes type, BadRowsTracker *const bad_rows_tracker)
 
template<typename DATA_TYPE >
size_t convert_arrow_val_to_import_buffer (const ColumnDescriptor *cd, const Array &array, std::vector< DATA_TYPE > &buffer, const ArraySliceRange &slice_range, import_export::BadRowsTracker *const bad_rows_tracker)
 

Static Public Member Functions

static std::vector< DataBlockPtrget_data_block_pointers (const std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers)
 

Public Attributes

std::vector< std::unique_ptr
< TypedImportBuffer > > * 
import_buffers
 
size_t col_idx
 
std::vector< int8_t > * bool_buffer_
 
std::vector< int8_t > * tinyint_buffer_
 
std::vector< int16_t > * smallint_buffer_
 
std::vector< int32_t > * int_buffer_
 
std::vector< int64_t > * bigint_buffer_
 
std::vector< float > * float_buffer_
 
std::vector< double > * double_buffer_
 
std::vector< std::string > * string_buffer_
 
std::vector< std::string > * geo_string_buffer_
 
std::vector< ArrayDatum > * array_buffer_
 
std::vector
< OptionalStringVector > * 
string_array_buffer_
 
std::vector< uint8_t > * string_dict_i8_buffer_
 
std::vector< uint16_t > * string_dict_i16_buffer_
 
std::vector< int32_t > * string_dict_i32_buffer_
 
std::vector< ArrayDatum > * string_array_dict_buffer_
 

Private Attributes

union {
   std::vector< int8_t > *   bool_buffer_
 
   std::vector< int8_t > *   tinyint_buffer_
 
   std::vector< int16_t > *   smallint_buffer_
 
   std::vector< int32_t > *   int_buffer_
 
   std::vector< int64_t > *   bigint_buffer_
 
   std::vector< float > *   float_buffer_
 
   std::vector< double > *   double_buffer_
 
   std::vector< std::string > *   string_buffer_
 
   std::vector< std::string > *   geo_string_buffer_
 
   std::vector< ArrayDatum > *   array_buffer_
 
   std::vector
< OptionalStringVector > *   string_array_buffer_
 
}; 
 
union {
   std::vector< uint8_t > *   string_dict_i8_buffer_
 
   std::vector< uint16_t > *   string_dict_i16_buffer_
 
   std::vector< int32_t > *   string_dict_i32_buffer_
 
   std::vector< ArrayDatum > *   string_array_dict_buffer_
 
}; 
 
const ColumnDescriptorcolumn_desc_
 
StringDictionarystring_dict_
 

Detailed Description

Definition at line 93 of file Importer.h.

Member Typedef Documentation

using import_export::TypedImportBuffer::OptionalStringVector = std::optional<std::vector<std::string>>

Definition at line 95 of file Importer.h.

Constructor & Destructor Documentation

import_export::TypedImportBuffer::TypedImportBuffer ( const ColumnDescriptor col_desc,
StringDictionary string_dict 
)
inline

Definition at line 96 of file Importer.h.

References array_buffer_, bigint_buffer_, bool_buffer_, CHECK, ColumnDescriptor::columnType, double_buffer_, float_buffer_, geo_string_buffer_, SQLTypeInfo::get_compression(), SQLTypeInfo::get_size(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), int_buffer_, IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kENCODING_DICT, kFLOAT, kINT, kLINESTRING, kMULTILINESTRING, kMULTIPOINT, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, smallint_buffer_, string_array_buffer_, string_array_dict_buffer_, string_buffer_, string_dict_i16_buffer_, string_dict_i32_buffer_, string_dict_i8_buffer_, and tinyint_buffer_.

97  : column_desc_(col_desc), string_dict_(string_dict) {
98  switch (col_desc->columnType.get_type()) {
99  case kBOOLEAN:
100  bool_buffer_ = new std::vector<int8_t>();
101  break;
102  case kTINYINT:
103  tinyint_buffer_ = new std::vector<int8_t>();
104  break;
105  case kSMALLINT:
106  smallint_buffer_ = new std::vector<int16_t>();
107  break;
108  case kINT:
109  int_buffer_ = new std::vector<int32_t>();
110  break;
111  case kBIGINT:
112  case kNUMERIC:
113  case kDECIMAL:
114  bigint_buffer_ = new std::vector<int64_t>();
115  break;
116  case kFLOAT:
117  float_buffer_ = new std::vector<float>();
118  break;
119  case kDOUBLE:
120  double_buffer_ = new std::vector<double>();
121  break;
122  case kTEXT:
123  case kVARCHAR:
124  case kCHAR:
125  string_buffer_ = new std::vector<std::string>();
126  if (col_desc->columnType.get_compression() == kENCODING_DICT) {
127  switch (col_desc->columnType.get_size()) {
128  case 1:
129  string_dict_i8_buffer_ = new std::vector<uint8_t>();
130  break;
131  case 2:
132  string_dict_i16_buffer_ = new std::vector<uint16_t>();
133  break;
134  case 4:
135  string_dict_i32_buffer_ = new std::vector<int32_t>();
136  break;
137  default:
138  CHECK(false);
139  }
140  }
141  break;
142  case kDATE:
143  case kTIME:
144  case kTIMESTAMP:
145  bigint_buffer_ = new std::vector<int64_t>();
146  break;
147  case kARRAY:
148  if (IS_STRING(col_desc->columnType.get_subtype())) {
150  string_array_buffer_ = new std::vector<OptionalStringVector>();
151  string_array_dict_buffer_ = new std::vector<ArrayDatum>();
152  } else {
153  array_buffer_ = new std::vector<ArrayDatum>();
154  }
155  break;
156  case kPOINT:
157  case kMULTIPOINT:
158  case kLINESTRING:
159  case kMULTILINESTRING:
160  case kPOLYGON:
161  case kMULTIPOLYGON:
162  geo_string_buffer_ = new std::vector<std::string>();
163  break;
164  default:
165  CHECK(false);
166  }
167  }
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:381
HOST DEVICE int get_size() const
Definition: sqltypes.h:390
Definition: sqltypes.h:64
std::vector< std::string > * string_buffer_
Definition: Importer.h:536
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:538
StringDictionary * string_dict_
Definition: Importer.h:548
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:531
std::vector< float > * float_buffer_
Definition: Importer.h:534
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:380
std::vector< double > * double_buffer_
Definition: Importer.h:535
std::vector< int32_t > * int_buffer_
Definition: Importer.h:532
std::vector< ArrayDatum > * string_array_dict_buffer_
Definition: Importer.h:545
std::vector< uint8_t > * string_dict_i8_buffer_
Definition: Importer.h:542
std::vector< OptionalStringVector > * string_array_buffer_
Definition: Importer.h:539
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:533
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:529
Definition: sqltypes.h:67
Definition: sqltypes.h:68
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:388
std::vector< int32_t > * string_dict_i32_buffer_
Definition: Importer.h:544
Definition: sqltypes.h:56
#define IS_STRING(T)
Definition: sqltypes.h:297
const ColumnDescriptor * column_desc_
Definition: Importer.h:547
std::vector< uint16_t > * string_dict_i16_buffer_
Definition: Importer.h:543
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:530
#define CHECK(condition)
Definition: Logger.h:222
Definition: sqltypes.h:60
SQLTypeInfo columnType
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:537

+ Here is the call graph for this function:

import_export::TypedImportBuffer::~TypedImportBuffer ( )
inline

Definition at line 169 of file Importer.h.

References array_buffer_, bigint_buffer_, bool_buffer_, CHECK, column_desc_, ColumnDescriptor::columnType, double_buffer_, float_buffer_, geo_string_buffer_, SQLTypeInfo::get_compression(), SQLTypeInfo::get_size(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), int_buffer_, IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kENCODING_DICT, kFLOAT, kINT, kLINESTRING, kMULTILINESTRING, kMULTIPOINT, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, smallint_buffer_, string_array_buffer_, string_array_dict_buffer_, string_buffer_, string_dict_i16_buffer_, string_dict_i32_buffer_, string_dict_i8_buffer_, and tinyint_buffer_.

169  {
170  switch (column_desc_->columnType.get_type()) {
171  case kBOOLEAN:
172  delete bool_buffer_;
173  break;
174  case kTINYINT:
175  delete tinyint_buffer_;
176  break;
177  case kSMALLINT:
178  delete smallint_buffer_;
179  break;
180  case kINT:
181  delete int_buffer_;
182  break;
183  case kBIGINT:
184  case kNUMERIC:
185  case kDECIMAL:
186  delete bigint_buffer_;
187  break;
188  case kFLOAT:
189  delete float_buffer_;
190  break;
191  case kDOUBLE:
192  delete double_buffer_;
193  break;
194  case kTEXT:
195  case kVARCHAR:
196  case kCHAR:
197  delete string_buffer_;
199  switch (column_desc_->columnType.get_size()) {
200  case 1:
201  delete string_dict_i8_buffer_;
202  break;
203  case 2:
205  break;
206  case 4:
208  break;
209  }
210  }
211  break;
212  case kDATE:
213  case kTIME:
214  case kTIMESTAMP:
215  delete bigint_buffer_;
216  break;
217  case kARRAY:
219  delete string_array_buffer_;
221  } else {
222  delete array_buffer_;
223  }
224  break;
225  case kPOINT:
226  case kMULTIPOINT:
227  case kLINESTRING:
228  case kMULTILINESTRING:
229  case kPOLYGON:
230  case kMULTIPOLYGON:
231  delete geo_string_buffer_;
232  break;
233  default:
234  CHECK(false);
235  }
236  }
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:381
HOST DEVICE int get_size() const
Definition: sqltypes.h:390
Definition: sqltypes.h:64
std::vector< std::string > * string_buffer_
Definition: Importer.h:536
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:538
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:531
std::vector< float > * float_buffer_
Definition: Importer.h:534
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:380
std::vector< double > * double_buffer_
Definition: Importer.h:535
std::vector< int32_t > * int_buffer_
Definition: Importer.h:532
std::vector< ArrayDatum > * string_array_dict_buffer_
Definition: Importer.h:545
std::vector< uint8_t > * string_dict_i8_buffer_
Definition: Importer.h:542
std::vector< OptionalStringVector > * string_array_buffer_
Definition: Importer.h:539
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:533
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:529
Definition: sqltypes.h:67
Definition: sqltypes.h:68
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:388
std::vector< int32_t > * string_dict_i32_buffer_
Definition: Importer.h:544
Definition: sqltypes.h:56
#define IS_STRING(T)
Definition: sqltypes.h:297
const ColumnDescriptor * column_desc_
Definition: Importer.h:547
std::vector< uint16_t > * string_dict_i16_buffer_
Definition: Importer.h:543
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:530
#define CHECK(condition)
Definition: Logger.h:222
Definition: sqltypes.h:60
SQLTypeInfo columnType
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:537

+ Here is the call graph for this function:

Member Function Documentation

size_t import_export::TypedImportBuffer::add_arrow_values ( const ColumnDescriptor cd,
const arrow::Array &  data,
const bool  exact_type_match,
const ArraySliceRange slice_range,
BadRowsTracker bad_rows_tracker 
)

Definition at line 873 of file Importer.cpp.

References arrow_throw_if(), bigint_buffer_, bool_buffer_, ColumnDescriptor::columnName, ColumnDescriptor::columnType, convert_arrow_val_to_import_buffer(), double_buffer_, float_buffer_, geo_string_buffer_, SQLTypeInfo::get_notnull(), SQLTypeInfo::get_type(), int_buffer_, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTILINESTRING, kMULTIPOINT, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, smallint_buffer_, heavydb.dtypes::STRING, string_buffer_, tinyint_buffer_, and run_benchmark_import::type.

877  {
878  const auto type = cd->columnType.get_type();
879  if (cd->columnType.get_notnull()) {
880  // We can't have any null values for this column; to have them is an error
881  arrow_throw_if(col.null_count() > 0, "NULL not allowed for column " + cd->columnName);
882  }
883 
884  switch (type) {
885  case kBOOLEAN:
886  if (exact_type_match) {
887  arrow_throw_if(col.type_id() != Type::BOOL, "Expected boolean type");
888  }
890  cd, col, *bool_buffer_, slice_range, bad_rows_tracker);
891  case kTINYINT:
892  if (exact_type_match) {
893  arrow_throw_if(col.type_id() != Type::INT8, "Expected int8 type");
894  }
896  cd, col, *tinyint_buffer_, slice_range, bad_rows_tracker);
897  case kSMALLINT:
898  if (exact_type_match) {
899  arrow_throw_if(col.type_id() != Type::INT16, "Expected int16 type");
900  }
902  cd, col, *smallint_buffer_, slice_range, bad_rows_tracker);
903  case kINT:
904  if (exact_type_match) {
905  arrow_throw_if(col.type_id() != Type::INT32, "Expected int32 type");
906  }
908  cd, col, *int_buffer_, slice_range, bad_rows_tracker);
909  case kBIGINT:
910  case kNUMERIC:
911  case kDECIMAL:
912  if (exact_type_match) {
913  arrow_throw_if(col.type_id() != Type::INT64, "Expected int64 type");
914  }
916  cd, col, *bigint_buffer_, slice_range, bad_rows_tracker);
917  case kFLOAT:
918  if (exact_type_match) {
919  arrow_throw_if(col.type_id() != Type::FLOAT, "Expected float type");
920  }
922  cd, col, *float_buffer_, slice_range, bad_rows_tracker);
923  case kDOUBLE:
924  if (exact_type_match) {
925  arrow_throw_if(col.type_id() != Type::DOUBLE, "Expected double type");
926  }
928  cd, col, *double_buffer_, slice_range, bad_rows_tracker);
929  case kTEXT:
930  case kVARCHAR:
931  case kCHAR:
932  if (exact_type_match) {
933  arrow_throw_if(col.type_id() != Type::BINARY && col.type_id() != Type::STRING,
934  "Expected string type");
935  }
937  cd, col, *string_buffer_, slice_range, bad_rows_tracker);
938  case kTIME:
939  if (exact_type_match) {
940  arrow_throw_if(col.type_id() != Type::TIME32 && col.type_id() != Type::TIME64,
941  "Expected time32 or time64 type");
942  }
944  cd, col, *bigint_buffer_, slice_range, bad_rows_tracker);
945  case kTIMESTAMP:
946  if (exact_type_match) {
947  arrow_throw_if(col.type_id() != Type::TIMESTAMP, "Expected timestamp type");
948  }
950  cd, col, *bigint_buffer_, slice_range, bad_rows_tracker);
951  case kDATE:
952  if (exact_type_match) {
953  arrow_throw_if(col.type_id() != Type::DATE32 && col.type_id() != Type::DATE64,
954  "Expected date32 or date64 type");
955  }
957  cd, col, *bigint_buffer_, slice_range, bad_rows_tracker);
958  case kPOINT:
959  case kMULTIPOINT:
960  case kLINESTRING:
961  case kMULTILINESTRING:
962  case kPOLYGON:
963  case kMULTIPOLYGON:
964  arrow_throw_if(col.type_id() != Type::BINARY && col.type_id() != Type::STRING,
965  "Expected string type");
967  cd, col, *geo_string_buffer_, slice_range, bad_rows_tracker);
968  case kARRAY:
969  throw std::runtime_error("Arrow array appends not yet supported");
970  default:
971  throw std::runtime_error("Invalid Type");
972  }
973 }
Definition: sqltypes.h:64
std::vector< std::string > * string_buffer_
Definition: Importer.h:536
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:531
std::vector< float > * float_buffer_
Definition: Importer.h:534
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:380
tuple STRING
Definition: dtypes.py:31
std::vector< double > * double_buffer_
Definition: Importer.h:535
std::vector< int32_t > * int_buffer_
Definition: Importer.h:532
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:533
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:529
Definition: sqltypes.h:67
Definition: sqltypes.h:68
Definition: sqltypes.h:56
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:530
size_t convert_arrow_val_to_import_buffer(const ColumnDescriptor *cd, const arrow::Array &array, std::vector< DATA_TYPE > &buffer, const ArraySliceRange &slice_range, BadRowsTracker *const bad_rows_tracker)
Definition: sqltypes.h:60
SQLTypeInfo columnType
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:387
std::string columnName
void arrow_throw_if(const bool cond, const std::string &message)
Definition: ArrowImporter.h:42
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:537

+ Here is the call graph for this function:

void import_export::TypedImportBuffer::add_value ( const ColumnDescriptor cd,
const std::string_view  val,
const bool  is_null,
const CopyParams copy_params,
const bool  check_not_null = true 
)

Definition at line 528 of file Importer.cpp.

References addArray(), addBigint(), addBoolean(), addDouble(), addFloat(), addGeoString(), addInt(), addSmallint(), addString(), addStringArray(), addTinyint(), Datum::bigintval, Datum::boolval, CHECK, ColumnDescriptor::columnName, ColumnDescriptor::columnType, SQLTypeInfo::get_elem_type(), SQLTypeInfo::get_notnull(), SQLTypeInfo::get_size(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), inline_fixed_encoding_null_val(), Datum::intval, IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTILINESTRING, kMULTIPOINT, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, StringDictionary::MAX_STRLEN, NULL_DOUBLE, NULL_FLOAT, import_export::NullArray(), import_export::delimited_parser::parse_string_array(), Datum::smallintval, import_export::StringToArray(), StringToDatum(), Datum::tinyintval, to_string(), run_benchmark_import::type, and DecimalOverflowValidator::validate().

Referenced by foreign_storage::anonymous_namespace{InternalStorageStatsDataWrapper.cpp}::set_null(), foreign_storage::anonymous_namespace{InternalCatalogDataWrapper.cpp}::set_null(), and foreign_storage::anonymous_namespace{InternalMemoryStatsDataWrapper.cpp}::set_null().

532  {
533  const auto type = cd->columnType.get_type();
534  switch (type) {
535  case kBOOLEAN: {
536  if (is_null) {
537  if (check_not_null && cd->columnType.get_notnull()) {
538  throw std::runtime_error("NULL for column " + cd->columnName);
539  }
541  } else {
542  auto ti = cd->columnType;
543  Datum d = StringToDatum(val, ti);
544  addBoolean(static_cast<int8_t>(d.boolval));
545  }
546  break;
547  }
548  case kTINYINT: {
549  if (!is_null && (isdigit(val[0]) || val[0] == '-')) {
550  auto ti = cd->columnType;
551  Datum d = StringToDatum(val, ti);
553  } else {
554  if (check_not_null && cd->columnType.get_notnull()) {
555  throw std::runtime_error("NULL for column " + cd->columnName);
556  }
558  }
559  break;
560  }
561  case kSMALLINT: {
562  if (!is_null && (isdigit(val[0]) || val[0] == '-')) {
563  auto ti = cd->columnType;
564  Datum d = StringToDatum(val, ti);
566  } else {
567  if (check_not_null && cd->columnType.get_notnull()) {
568  throw std::runtime_error("NULL for column " + cd->columnName);
569  }
571  }
572  break;
573  }
574  case kINT: {
575  if (!is_null && (isdigit(val[0]) || val[0] == '-')) {
576  auto ti = cd->columnType;
577  Datum d = StringToDatum(val, ti);
578  addInt(d.intval);
579  } else {
580  if (check_not_null && cd->columnType.get_notnull()) {
581  throw std::runtime_error("NULL for column " + cd->columnName);
582  }
584  }
585  break;
586  }
587  case kBIGINT: {
588  if (!is_null && (isdigit(val[0]) || val[0] == '-')) {
589  auto ti = cd->columnType;
590  Datum d = StringToDatum(val, ti);
591  addBigint(d.bigintval);
592  } else {
593  if (check_not_null && cd->columnType.get_notnull()) {
594  throw std::runtime_error("NULL for column " + cd->columnName);
595  }
597  }
598  break;
599  }
600  case kDECIMAL:
601  case kNUMERIC: {
602  if (!is_null) {
603  auto ti = cd->columnType;
604  Datum d = StringToDatum(val, ti);
605  DecimalOverflowValidator validator(ti);
606  validator.validate(d.bigintval);
607  addBigint(d.bigintval);
608  } else {
609  if (check_not_null && cd->columnType.get_notnull()) {
610  throw std::runtime_error("NULL for column " + cd->columnName);
611  }
613  }
614  break;
615  }
616  case kFLOAT:
617  if (!is_null && (val[0] == '.' || isdigit(val[0]) || val[0] == '-')) {
618  addFloat(static_cast<float>(std::atof(std::string(val).c_str())));
619  } else {
620  if (check_not_null && cd->columnType.get_notnull()) {
621  throw std::runtime_error("NULL for column " + cd->columnName);
622  }
624  }
625  break;
626  case kDOUBLE:
627  if (!is_null && (val[0] == '.' || isdigit(val[0]) || val[0] == '-')) {
628  addDouble(std::atof(std::string(val).c_str()));
629  } else {
630  if (check_not_null && cd->columnType.get_notnull()) {
631  throw std::runtime_error("NULL for column " + cd->columnName);
632  }
634  }
635  break;
636  case kTEXT:
637  case kVARCHAR:
638  case kCHAR: {
639  // @TODO(wei) for now, use empty string for nulls
640  if (is_null) {
641  if (check_not_null && cd->columnType.get_notnull()) {
642  throw std::runtime_error("NULL for column " + cd->columnName);
643  }
644  addString(std::string());
645  } else {
646  if (val.length() > StringDictionary::MAX_STRLEN) {
647  throw std::runtime_error("String too long for column " + cd->columnName +
648  " was " + std::to_string(val.length()) + " max is " +
650  }
651  addString(val);
652  }
653  break;
654  }
655  case kTIME:
656  case kTIMESTAMP:
657  case kDATE:
658  if (!is_null && (isdigit(val[0]) || val[0] == '-')) {
659  SQLTypeInfo ti = cd->columnType;
660  Datum d = StringToDatum(val, ti);
661  addBigint(d.bigintval);
662  } else {
663  if (check_not_null && cd->columnType.get_notnull()) {
664  throw std::runtime_error("NULL for column " + cd->columnName);
665  }
667  }
668  break;
669  case kARRAY: {
670  if (check_not_null && is_null && cd->columnType.get_notnull()) {
671  throw std::runtime_error("NULL for column " + cd->columnName);
672  }
673  SQLTypeInfo ti = cd->columnType;
674  if (IS_STRING(ti.get_subtype())) {
675  std::vector<std::string> string_vec;
676  // Just parse string array, don't push it to buffer yet as we might throw
678  std::string(val), copy_params, string_vec);
679  if (!is_null) {
680  if (ti.get_size() > 0) {
681  auto sti = ti.get_elem_type();
682  size_t expected_size = ti.get_size() / sti.get_size();
683  size_t actual_size = string_vec.size();
684  if (actual_size != expected_size) {
685  throw std::runtime_error("Fixed length array column " + cd->columnName +
686  " expects " + std::to_string(expected_size) +
687  " values, received " +
688  std::to_string(actual_size));
689  }
690  }
691  addStringArray(string_vec);
692  } else {
693  addStringArray(std::nullopt);
694  }
695  } else {
696  if (!is_null) {
697  ArrayDatum d = StringToArray(std::string(val), ti, copy_params);
698  if (d.is_null) { // val could be "NULL"
699  addArray(NullArray(ti));
700  } else {
701  if (ti.get_size() > 0 && static_cast<size_t>(ti.get_size()) != d.length) {
702  throw std::runtime_error("Fixed length array for column " + cd->columnName +
703  " has incorrect length: " + std::string(val));
704  }
705  addArray(d);
706  }
707  } else {
708  addArray(NullArray(ti));
709  }
710  }
711  break;
712  }
713  case kPOINT:
714  case kMULTIPOINT:
715  case kLINESTRING:
716  case kMULTILINESTRING:
717  case kPOLYGON:
718  case kMULTIPOLYGON:
719  addGeoString(val);
720  break;
721  default:
722  CHECK(false) << "TypedImportBuffer::add_value() does not support type " << type;
723  }
724 }
int8_t tinyintval
Definition: Datum.h:46
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:381
#define NULL_DOUBLE
HOST DEVICE int get_size() const
Definition: sqltypes.h:390
void addBigint(const int64_t v)
Definition: Importer.h:246
OptionalStringVector & addStringArray()
Definition: Importer.h:258
void addSmallint(const int16_t v)
Definition: Importer.h:242
Definition: sqltypes.h:64
#define NULL_FLOAT
void addString(const std::string_view v)
Definition: Importer.h:252
void addDouble(const double v)
Definition: Importer.h:250
ArrayDatum NullArray(const SQLTypeInfo &ti)
Definition: Importer.cpp:370
int8_t boolval
Definition: Datum.h:45
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:380
int32_t intval
Definition: Datum.h:48
std::string to_string(char const *&&v)
void addFloat(const float v)
Definition: Importer.h:248
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:217
void addGeoString(const std::string_view v)
Definition: Importer.h:254
CONSTEXPR DEVICE bool is_null(const T &value)
void addBoolean(const int8_t v)
Definition: Importer.h:238
void addTinyint(const int8_t v)
Definition: Importer.h:240
int64_t bigintval
Definition: Datum.h:49
void addInt(const int32_t v)
Definition: Importer.h:244
int16_t smallintval
Definition: Datum.h:47
void parse_string_array(const std::string &s, const import_export::CopyParams &copy_params, std::vector< std::string > &string_vec, bool truncate_values)
Parses given string array and inserts into given vector of strings.
Datum StringToDatum(const std::string_view s, SQLTypeInfo &ti)
Definition: Datum.cpp:337
Definition: sqltypes.h:67
Definition: sqltypes.h:68
ArrayDatum StringToArray(const std::string &s, const SQLTypeInfo &ti, const CopyParams &copy_params)
Definition: Importer.cpp:315
void addArray(const ArrayDatum &v)
Definition: Importer.h:256
Definition: sqltypes.h:56
#define IS_STRING(T)
Definition: sqltypes.h:297
#define CHECK(condition)
Definition: Logger.h:222
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
static constexpr size_t MAX_STRLEN
Definition: sqltypes.h:60
SQLTypeInfo columnType
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:387
Definition: Datum.h:44
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:957
std::string columnName

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::add_value ( const ColumnDescriptor cd,
const TDatum &  val,
const bool  is_null 
)

Definition at line 1307 of file Importer.cpp.

References addArray(), addBigint(), import_export::addBinaryStringArray(), addBoolean(), addDouble(), addFloat(), addGeoString(), addInt(), addSmallint(), addString(), addStringArray(), addTinyint(), CHECK, ColumnDescriptor::columnName, ColumnDescriptor::columnType, decimal_to_int_type(), SQLTypeInfo::get_notnull(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), inline_fixed_encoding_null_val(), SQLTypeInfo::is_decimal(), IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTILINESTRING, kMULTIPOINT, kMULTIPOLYGON, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, NULL_DOUBLE, NULL_FLOAT, import_export::NullArray(), import_export::TDatumToArrayDatum(), and run_benchmark_import::type.

1309  {
1310  const auto type = cd->columnType.is_decimal() ? decimal_to_int_type(cd->columnType)
1311  : cd->columnType.get_type();
1312  switch (type) {
1313  case kBOOLEAN: {
1314  if (is_null) {
1315  if (cd->columnType.get_notnull()) {
1316  throw std::runtime_error("NULL for column " + cd->columnName);
1317  }
1319  } else {
1320  addBoolean((int8_t)datum.val.int_val);
1321  }
1322  break;
1323  }
1324  case kTINYINT:
1325  if (!is_null) {
1326  addTinyint((int8_t)datum.val.int_val);
1327  } else {
1328  if (cd->columnType.get_notnull()) {
1329  throw std::runtime_error("NULL for column " + cd->columnName);
1330  }
1332  }
1333  break;
1334  case kSMALLINT:
1335  if (!is_null) {
1336  addSmallint((int16_t)datum.val.int_val);
1337  } else {
1338  if (cd->columnType.get_notnull()) {
1339  throw std::runtime_error("NULL for column " + cd->columnName);
1340  }
1342  }
1343  break;
1344  case kINT:
1345  if (!is_null) {
1346  addInt((int32_t)datum.val.int_val);
1347  } else {
1348  if (cd->columnType.get_notnull()) {
1349  throw std::runtime_error("NULL for column " + cd->columnName);
1350  }
1352  }
1353  break;
1354  case kBIGINT:
1355  if (!is_null) {
1356  addBigint(datum.val.int_val);
1357  } else {
1358  if (cd->columnType.get_notnull()) {
1359  throw std::runtime_error("NULL for column " + cd->columnName);
1360  }
1362  }
1363  break;
1364  case kFLOAT:
1365  if (!is_null) {
1366  addFloat((float)datum.val.real_val);
1367  } else {
1368  if (cd->columnType.get_notnull()) {
1369  throw std::runtime_error("NULL for column " + cd->columnName);
1370  }
1372  }
1373  break;
1374  case kDOUBLE:
1375  if (!is_null) {
1376  addDouble(datum.val.real_val);
1377  } else {
1378  if (cd->columnType.get_notnull()) {
1379  throw std::runtime_error("NULL for column " + cd->columnName);
1380  }
1382  }
1383  break;
1384  case kTEXT:
1385  case kVARCHAR:
1386  case kCHAR: {
1387  // @TODO(wei) for now, use empty string for nulls
1388  if (is_null) {
1389  if (cd->columnType.get_notnull()) {
1390  throw std::runtime_error("NULL for column " + cd->columnName);
1391  }
1392  addString(std::string());
1393  } else {
1394  addString(datum.val.str_val);
1395  }
1396  break;
1397  }
1398  case kTIME:
1399  case kTIMESTAMP:
1400  case kDATE: {
1401  if (!is_null) {
1402  addBigint(datum.val.int_val);
1403  } else {
1404  if (cd->columnType.get_notnull()) {
1405  throw std::runtime_error("NULL for column " + cd->columnName);
1406  }
1408  }
1409  break;
1410  }
1411  case kARRAY:
1412  if (is_null && cd->columnType.get_notnull()) {
1413  throw std::runtime_error("NULL for column " + cd->columnName);
1414  }
1415  if (IS_STRING(cd->columnType.get_subtype())) {
1416  OptionalStringVector& string_vec = addStringArray();
1417  addBinaryStringArray(datum, *string_vec);
1418  } else {
1419  if (!is_null) {
1420  addArray(TDatumToArrayDatum(datum, cd->columnType));
1421  } else {
1423  }
1424  }
1425  break;
1426  case kPOINT:
1427  case kMULTIPOINT:
1428  case kLINESTRING:
1429  case kMULTILINESTRING:
1430  case kPOLYGON:
1431  case kMULTIPOLYGON:
1432  if (is_null) {
1433  if (cd->columnType.get_notnull()) {
1434  throw std::runtime_error("NULL for column " + cd->columnName);
1435  }
1436  addGeoString(std::string());
1437  } else {
1438  addGeoString(datum.val.str_val);
1439  }
1440  break;
1441  default:
1442  CHECK(false) << "TypedImportBuffer::add_value() does not support type " << type;
1443  }
1444 }
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:381
#define NULL_DOUBLE
void addBigint(const int64_t v)
Definition: Importer.h:246
OptionalStringVector & addStringArray()
Definition: Importer.h:258
void addSmallint(const int16_t v)
Definition: Importer.h:242
Definition: sqltypes.h:64
#define NULL_FLOAT
void addString(const std::string_view v)
Definition: Importer.h:252
std::optional< std::vector< std::string >> OptionalStringVector
Definition: Importer.h:95
void addDouble(const double v)
Definition: Importer.h:250
ArrayDatum NullArray(const SQLTypeInfo &ti)
Definition: Importer.cpp:370
void addFloat(const float v)
Definition: Importer.h:248
void addGeoString(const std::string_view v)
Definition: Importer.h:254
ArrayDatum TDatumToArrayDatum(const TDatum &datum, const SQLTypeInfo &ti)
Definition: Importer.cpp:468
CONSTEXPR DEVICE bool is_null(const T &value)
void addBoolean(const int8_t v)
Definition: Importer.h:238
void addTinyint(const int8_t v)
Definition: Importer.h:240
void addInt(const int32_t v)
Definition: Importer.h:244
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:559
Definition: sqltypes.h:67
Definition: sqltypes.h:68
void addArray(const ArrayDatum &v)
Definition: Importer.h:256
Definition: sqltypes.h:56
#define IS_STRING(T)
Definition: sqltypes.h:297
#define CHECK(condition)
Definition: Logger.h:222
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
Definition: sqltypes.h:60
SQLTypeInfo columnType
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:387
bool is_decimal() const
Definition: sqltypes.h:579
std::string columnName
void addBinaryStringArray(const TDatum &datum, std::vector< std::string > &string_vec)
Definition: Importer.cpp:414

+ Here is the call graph for this function:

size_t import_export::TypedImportBuffer::add_values ( const ColumnDescriptor cd,
const TColumn &  data 
)

Definition at line 976 of file Importer.cpp.

References addArray(), addStringArray(), anonymous_namespace{QueryMemoryDescriptor.cpp}::any_of(), bigint_buffer_, bool_buffer_, checked_malloc(), ColumnDescriptor::columnName, ColumnDescriptor::columnType, double_buffer_, float_buffer_, geo_string_buffer_, SQLTypeInfo::get_elem_type(), SQLTypeInfo::get_notnull(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), inline_fixed_encoding_null_val(), int_buffer_, IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTILINESTRING, kMULTIPOINT, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, NULL_DOUBLE, NULL_FLOAT, import_export::NullArray(), smallint_buffer_, string_buffer_, and tinyint_buffer_.

976  {
977  size_t dataSize = 0;
978  if (cd->columnType.get_notnull()) {
979  // We can't have any null values for this column; to have them is an error
980  if (std::any_of(col.nulls.begin(), col.nulls.end(), [](int i) { return i != 0; })) {
981  throw std::runtime_error("NULL for column " + cd->columnName);
982  }
983  }
984 
985  switch (cd->columnType.get_type()) {
986  case kBOOLEAN: {
987  dataSize = col.data.int_col.size();
988  bool_buffer_->reserve(dataSize);
989  for (size_t i = 0; i < dataSize; i++) {
990  if (col.nulls[i]) {
992  } else {
993  bool_buffer_->push_back((int8_t)col.data.int_col[i]);
994  }
995  }
996  break;
997  }
998  case kTINYINT: {
999  dataSize = col.data.int_col.size();
1000  tinyint_buffer_->reserve(dataSize);
1001  for (size_t i = 0; i < dataSize; i++) {
1002  if (col.nulls[i]) {
1004  } else {
1005  tinyint_buffer_->push_back((int8_t)col.data.int_col[i]);
1006  }
1007  }
1008  break;
1009  }
1010  case kSMALLINT: {
1011  dataSize = col.data.int_col.size();
1012  smallint_buffer_->reserve(dataSize);
1013  for (size_t i = 0; i < dataSize; i++) {
1014  if (col.nulls[i]) {
1016  } else {
1017  smallint_buffer_->push_back((int16_t)col.data.int_col[i]);
1018  }
1019  }
1020  break;
1021  }
1022  case kINT: {
1023  dataSize = col.data.int_col.size();
1024  int_buffer_->reserve(dataSize);
1025  for (size_t i = 0; i < dataSize; i++) {
1026  if (col.nulls[i]) {
1028  } else {
1029  int_buffer_->push_back((int32_t)col.data.int_col[i]);
1030  }
1031  }
1032  break;
1033  }
1034  case kBIGINT:
1035  case kNUMERIC:
1036  case kDECIMAL: {
1037  dataSize = col.data.int_col.size();
1038  bigint_buffer_->reserve(dataSize);
1039  for (size_t i = 0; i < dataSize; i++) {
1040  if (col.nulls[i]) {
1042  } else {
1043  bigint_buffer_->push_back((int64_t)col.data.int_col[i]);
1044  }
1045  }
1046  break;
1047  }
1048  case kFLOAT: {
1049  dataSize = col.data.real_col.size();
1050  float_buffer_->reserve(dataSize);
1051  for (size_t i = 0; i < dataSize; i++) {
1052  if (col.nulls[i]) {
1053  float_buffer_->push_back(NULL_FLOAT);
1054  } else {
1055  float_buffer_->push_back((float)col.data.real_col[i]);
1056  }
1057  }
1058  break;
1059  }
1060  case kDOUBLE: {
1061  dataSize = col.data.real_col.size();
1062  double_buffer_->reserve(dataSize);
1063  for (size_t i = 0; i < dataSize; i++) {
1064  if (col.nulls[i]) {
1065  double_buffer_->push_back(NULL_DOUBLE);
1066  } else {
1067  double_buffer_->push_back((double)col.data.real_col[i]);
1068  }
1069  }
1070  break;
1071  }
1072  case kTEXT:
1073  case kVARCHAR:
1074  case kCHAR: {
1075  // TODO: for now, use empty string for nulls
1076  dataSize = col.data.str_col.size();
1077  string_buffer_->reserve(dataSize);
1078  for (size_t i = 0; i < dataSize; i++) {
1079  if (col.nulls[i]) {
1080  string_buffer_->push_back(std::string());
1081  } else {
1082  string_buffer_->push_back(col.data.str_col[i]);
1083  }
1084  }
1085  break;
1086  }
1087  case kTIME:
1088  case kTIMESTAMP:
1089  case kDATE: {
1090  dataSize = col.data.int_col.size();
1091  bigint_buffer_->reserve(dataSize);
1092  for (size_t i = 0; i < dataSize; i++) {
1093  if (col.nulls[i]) {
1095  } else {
1096  bigint_buffer_->push_back(static_cast<int64_t>(col.data.int_col[i]));
1097  }
1098  }
1099  break;
1100  }
1101  case kPOINT:
1102  case kMULTIPOINT:
1103  case kLINESTRING:
1104  case kMULTILINESTRING:
1105  case kPOLYGON:
1106  case kMULTIPOLYGON: {
1107  dataSize = col.data.str_col.size();
1108  geo_string_buffer_->reserve(dataSize);
1109  for (size_t i = 0; i < dataSize; i++) {
1110  if (col.nulls[i]) {
1111  // TODO: add support for NULL geo
1112  geo_string_buffer_->push_back(std::string());
1113  } else {
1114  geo_string_buffer_->push_back(col.data.str_col[i]);
1115  }
1116  }
1117  break;
1118  }
1119  case kARRAY: {
1120  dataSize = col.data.arr_col.size();
1121  if (IS_STRING(cd->columnType.get_subtype())) {
1122  for (size_t i = 0; i < dataSize; i++) {
1123  OptionalStringVector& string_vec = addStringArray();
1124  if (!col.nulls[i]) {
1125  size_t stringArrSize = col.data.arr_col[i].data.str_col.size();
1126  for (size_t str_idx = 0; str_idx != stringArrSize; ++str_idx) {
1127  string_vec->push_back(col.data.arr_col[i].data.str_col[str_idx]);
1128  }
1129  }
1130  }
1131  } else {
1132  auto elem_ti = cd->columnType.get_subtype();
1133  switch (elem_ti) {
1134  case kBOOLEAN: {
1135  for (size_t i = 0; i < dataSize; i++) {
1136  if (col.nulls[i]) {
1138  } else {
1139  size_t len = col.data.arr_col[i].data.int_col.size();
1140  size_t byteSize = len * sizeof(int8_t);
1141  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1142  int8_t* p = buf;
1143  for (size_t j = 0; j < len; ++j) {
1144  // Explicitly checking the item for null because
1145  // casting null value (-128) to bool results
1146  // incorrect value 1.
1147  if (col.data.arr_col[i].nulls[j]) {
1148  *p = static_cast<int8_t>(
1150  } else {
1151  *(bool*)p = static_cast<bool>(col.data.arr_col[i].data.int_col[j]);
1152  }
1153  p += sizeof(bool);
1154  }
1155  addArray(ArrayDatum(byteSize, buf, false));
1156  }
1157  }
1158  break;
1159  }
1160  case kTINYINT: {
1161  for (size_t i = 0; i < dataSize; i++) {
1162  if (col.nulls[i]) {
1164  } else {
1165  size_t len = col.data.arr_col[i].data.int_col.size();
1166  size_t byteSize = len * sizeof(int8_t);
1167  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1168  int8_t* p = buf;
1169  for (size_t j = 0; j < len; ++j) {
1170  *(int8_t*)p = static_cast<int8_t>(col.data.arr_col[i].data.int_col[j]);
1171  p += sizeof(int8_t);
1172  }
1173  addArray(ArrayDatum(byteSize, buf, false));
1174  }
1175  }
1176  break;
1177  }
1178  case kSMALLINT: {
1179  for (size_t i = 0; i < dataSize; i++) {
1180  if (col.nulls[i]) {
1182  } else {
1183  size_t len = col.data.arr_col[i].data.int_col.size();
1184  size_t byteSize = len * sizeof(int16_t);
1185  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1186  int8_t* p = buf;
1187  for (size_t j = 0; j < len; ++j) {
1188  *(int16_t*)p =
1189  static_cast<int16_t>(col.data.arr_col[i].data.int_col[j]);
1190  p += sizeof(int16_t);
1191  }
1192  addArray(ArrayDatum(byteSize, buf, false));
1193  }
1194  }
1195  break;
1196  }
1197  case kINT: {
1198  for (size_t i = 0; i < dataSize; i++) {
1199  if (col.nulls[i]) {
1201  } else {
1202  size_t len = col.data.arr_col[i].data.int_col.size();
1203  size_t byteSize = len * sizeof(int32_t);
1204  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1205  int8_t* p = buf;
1206  for (size_t j = 0; j < len; ++j) {
1207  *(int32_t*)p =
1208  static_cast<int32_t>(col.data.arr_col[i].data.int_col[j]);
1209  p += sizeof(int32_t);
1210  }
1211  addArray(ArrayDatum(byteSize, buf, false));
1212  }
1213  }
1214  break;
1215  }
1216  case kBIGINT:
1217  case kNUMERIC:
1218  case kDECIMAL: {
1219  for (size_t i = 0; i < dataSize; i++) {
1220  if (col.nulls[i]) {
1222  } else {
1223  size_t len = col.data.arr_col[i].data.int_col.size();
1224  size_t byteSize = len * sizeof(int64_t);
1225  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1226  int8_t* p = buf;
1227  for (size_t j = 0; j < len; ++j) {
1228  *(int64_t*)p =
1229  static_cast<int64_t>(col.data.arr_col[i].data.int_col[j]);
1230  p += sizeof(int64_t);
1231  }
1232  addArray(ArrayDatum(byteSize, buf, false));
1233  }
1234  }
1235  break;
1236  }
1237  case kFLOAT: {
1238  for (size_t i = 0; i < dataSize; i++) {
1239  if (col.nulls[i]) {
1241  } else {
1242  size_t len = col.data.arr_col[i].data.real_col.size();
1243  size_t byteSize = len * sizeof(float);
1244  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1245  int8_t* p = buf;
1246  for (size_t j = 0; j < len; ++j) {
1247  *(float*)p = static_cast<float>(col.data.arr_col[i].data.real_col[j]);
1248  p += sizeof(float);
1249  }
1250  addArray(ArrayDatum(byteSize, buf, false));
1251  }
1252  }
1253  break;
1254  }
1255  case kDOUBLE: {
1256  for (size_t i = 0; i < dataSize; i++) {
1257  if (col.nulls[i]) {
1259  } else {
1260  size_t len = col.data.arr_col[i].data.real_col.size();
1261  size_t byteSize = len * sizeof(double);
1262  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1263  int8_t* p = buf;
1264  for (size_t j = 0; j < len; ++j) {
1265  *(double*)p = static_cast<double>(col.data.arr_col[i].data.real_col[j]);
1266  p += sizeof(double);
1267  }
1268  addArray(ArrayDatum(byteSize, buf, false));
1269  }
1270  }
1271  break;
1272  }
1273  case kTIME:
1274  case kTIMESTAMP:
1275  case kDATE: {
1276  for (size_t i = 0; i < dataSize; i++) {
1277  if (col.nulls[i]) {
1279  } else {
1280  size_t len = col.data.arr_col[i].data.int_col.size();
1281  size_t byteWidth = sizeof(int64_t);
1282  size_t byteSize = len * byteWidth;
1283  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1284  int8_t* p = buf;
1285  for (size_t j = 0; j < len; ++j) {
1286  *reinterpret_cast<int64_t*>(p) =
1287  static_cast<int64_t>(col.data.arr_col[i].data.int_col[j]);
1288  p += sizeof(int64_t);
1289  }
1290  addArray(ArrayDatum(byteSize, buf, false));
1291  }
1292  }
1293  break;
1294  }
1295  default:
1296  throw std::runtime_error("Invalid Array Type");
1297  }
1298  }
1299  break;
1300  }
1301  default:
1302  throw std::runtime_error("Invalid Type");
1303  }
1304  return dataSize;
1305 }
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:381
#define NULL_DOUBLE
OptionalStringVector & addStringArray()
Definition: Importer.h:258
Definition: sqltypes.h:64
#define NULL_FLOAT
std::vector< std::string > * string_buffer_
Definition: Importer.h:536
std::optional< std::vector< std::string >> OptionalStringVector
Definition: Importer.h:95
ArrayDatum NullArray(const SQLTypeInfo &ti)
Definition: Importer.cpp:370
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:531
std::vector< float > * float_buffer_
Definition: Importer.h:534
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:380
std::vector< double > * double_buffer_
Definition: Importer.h:535
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:217
std::vector< int32_t > * int_buffer_
Definition: Importer.h:532
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:45
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:533
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:529
Definition: sqltypes.h:67
Definition: sqltypes.h:68
void addArray(const ArrayDatum &v)
Definition: Importer.h:256
Definition: sqltypes.h:56
#define IS_STRING(T)
Definition: sqltypes.h:297
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:530
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
Definition: sqltypes.h:60
SQLTypeInfo columnType
bool any_of(std::vector< Analyzer::Expr * > const &target_exprs)
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:387
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:957
std::string columnName
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:537

+ Here is the call graph for this function:

void import_export::TypedImportBuffer::addArray ( const ArrayDatum v)
inline

Definition at line 256 of file Importer.h.

References array_buffer_.

Referenced by add_value(), and add_values().

256 { array_buffer_->push_back(v); }
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:538

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addBigint ( const int64_t  v)
inline

Definition at line 246 of file Importer.h.

References bigint_buffer_.

Referenced by add_value().

246 { bigint_buffer_->push_back(v); }
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:533

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addBoolean ( const int8_t  v)
inline

Definition at line 238 of file Importer.h.

References bool_buffer_.

Referenced by add_value().

238 { bool_buffer_->push_back(v); }
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:529

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addDefaultValues ( const ColumnDescriptor cd,
size_t  num_rows 
)

Definition at line 1446 of file Importer.cpp.

References array_buffer_, bigint_buffer_, bool_buffer_, CHECK, ColumnDescriptor::columnName, ColumnDescriptor::columnType, convert_decimal_value_to_scale(), ColumnDescriptor::default_value, double_buffer_, float_buffer_, geo_string_buffer_, SQLTypeInfo::get_notnull(), SQLTypeInfo::get_type(), inline_fixed_encoding_null_val(), int_buffer_, is_null(), IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTILINESTRING, kMULTIPOINT, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, StringDictionary::MAX_STRLEN, NULL_DOUBLE, NULL_FLOAT, import_export::NullArray(), import_export::delimited_parser::parse_string_array(), smallint_buffer_, string_array_buffer_, string_buffer_, import_export::StringToArray(), StringToDatum(), tinyint_buffer_, to_string(), and run_benchmark_import::type.

1446  {
1447  bool is_null = !cd->default_value.has_value();
1448  CHECK(!(is_null && cd->columnType.get_notnull()));
1449  const auto type = cd->columnType.get_type();
1450  auto ti = cd->columnType;
1451  auto val = cd->default_value.value_or("NULL");
1452  CopyParams cp;
1453  switch (type) {
1454  case kBOOLEAN: {
1455  if (!is_null) {
1456  bool_buffer_->resize(num_rows, StringToDatum(val, ti).boolval);
1457  } else {
1458  bool_buffer_->resize(num_rows, inline_fixed_encoding_null_val(cd->columnType));
1459  }
1460  break;
1461  }
1462  case kTINYINT: {
1463  if (!is_null) {
1464  tinyint_buffer_->resize(num_rows, StringToDatum(val, ti).tinyintval);
1465  } else {
1467  }
1468  break;
1469  }
1470  case kSMALLINT: {
1471  if (!is_null) {
1472  smallint_buffer_->resize(num_rows, StringToDatum(val, ti).smallintval);
1473  } else {
1474  smallint_buffer_->resize(num_rows,
1476  }
1477  break;
1478  }
1479  case kINT: {
1480  if (!is_null) {
1481  int_buffer_->resize(num_rows, StringToDatum(val, ti).intval);
1482  } else {
1483  int_buffer_->resize(num_rows, inline_fixed_encoding_null_val(cd->columnType));
1484  }
1485  break;
1486  }
1487  case kBIGINT: {
1488  if (!is_null) {
1489  bigint_buffer_->resize(num_rows, StringToDatum(val, ti).bigintval);
1490  } else {
1492  }
1493  break;
1494  }
1495  case kDECIMAL:
1496  case kNUMERIC: {
1497  if (!is_null) {
1498  const auto converted_decimal_value = convert_decimal_value_to_scale(
1499  StringToDatum(val, ti).bigintval, ti, cd->columnType);
1500  bigint_buffer_->resize(num_rows, converted_decimal_value);
1501  } else {
1503  }
1504  break;
1505  }
1506  case kFLOAT:
1507  if (!is_null) {
1508  float_buffer_->resize(num_rows,
1509  static_cast<float>(std::atof(std::string(val).c_str())));
1510  } else {
1511  float_buffer_->resize(num_rows, NULL_FLOAT);
1512  }
1513  break;
1514  case kDOUBLE:
1515  if (!is_null) {
1516  double_buffer_->resize(num_rows, std::atof(std::string(val).c_str()));
1517  } else {
1518  double_buffer_->resize(num_rows, NULL_DOUBLE);
1519  }
1520  break;
1521  case kTEXT:
1522  case kVARCHAR:
1523  case kCHAR: {
1524  if (is_null) {
1525  string_buffer_->resize(num_rows, "");
1526  } else {
1527  if (val.length() > StringDictionary::MAX_STRLEN) {
1528  throw std::runtime_error("String too long for column " + cd->columnName +
1529  " was " + std::to_string(val.length()) + " max is " +
1531  }
1532  string_buffer_->resize(num_rows, val);
1533  }
1534  break;
1535  }
1536  case kTIME:
1537  case kTIMESTAMP:
1538  case kDATE:
1539  if (!is_null) {
1540  bigint_buffer_->resize(num_rows, StringToDatum(val, ti).bigintval);
1541  } else {
1543  }
1544  break;
1545  case kARRAY: {
1546  if (IS_STRING(ti.get_subtype())) {
1547  std::vector<std::string> string_vec;
1548  // Just parse string array, don't push it to buffer yet as we might throw
1550  std::string(val), cp, string_vec);
1551  if (!is_null) {
1552  // TODO: add support for NULL string arrays
1553  if (ti.get_size() > 0) {
1554  auto sti = ti.get_elem_type();
1555  size_t expected_size = ti.get_size() / sti.get_size();
1556  size_t actual_size = string_vec.size();
1557  if (actual_size != expected_size) {
1558  throw std::runtime_error("Fixed length array column " + cd->columnName +
1559  " expects " + std::to_string(expected_size) +
1560  " values, received " +
1561  std::to_string(actual_size));
1562  }
1563  }
1564  string_array_buffer_->resize(num_rows, string_vec);
1565  } else {
1566  if (ti.get_size() > 0) {
1567  // TODO: remove once NULL fixlen arrays are allowed
1568  throw std::runtime_error("Fixed length array column " + cd->columnName +
1569  " currently cannot accept NULL arrays");
1570  }
1571  // TODO: add support for NULL string arrays, replace with addStringArray(),
1572  // for now add whatever parseStringArray() outputs for NULLs ("NULL")
1573  string_array_buffer_->resize(num_rows, string_vec);
1574  }
1575  } else {
1576  if (!is_null) {
1577  ArrayDatum d = StringToArray(std::string(val), ti, cp);
1578  if (d.is_null) { // val could be "NULL"
1579  array_buffer_->resize(num_rows, NullArray(ti));
1580  } else {
1581  if (ti.get_size() > 0 && static_cast<size_t>(ti.get_size()) != d.length) {
1582  throw std::runtime_error("Fixed length array for column " + cd->columnName +
1583  " has incorrect length: " + std::string(val));
1584  }
1585  array_buffer_->resize(num_rows, d);
1586  }
1587  } else {
1588  array_buffer_->resize(num_rows, NullArray(ti));
1589  }
1590  }
1591  break;
1592  }
1593  case kPOINT:
1594  case kMULTIPOINT:
1595  case kLINESTRING:
1596  case kMULTILINESTRING:
1597  case kPOLYGON:
1598  case kMULTIPOLYGON:
1599  geo_string_buffer_->resize(num_rows, val);
1600  break;
1601  default:
1602  CHECK(false) << "TypedImportBuffer::addDefaultValues() does not support type "
1603  << type;
1604  }
1605 }
#define NULL_DOUBLE
Definition: sqltypes.h:64
#define NULL_FLOAT
std::vector< std::string > * string_buffer_
Definition: Importer.h:536
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:538
ArrayDatum NullArray(const SQLTypeInfo &ti)
Definition: Importer.cpp:370
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:531
std::vector< float > * float_buffer_
Definition: Importer.h:534
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:380
std::vector< double > * double_buffer_
Definition: Importer.h:535
std::string to_string(char const *&&v)
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:217
std::vector< int32_t > * int_buffer_
Definition: Importer.h:532
CONSTEXPR DEVICE bool is_null(const T &value)
std::vector< OptionalStringVector > * string_array_buffer_
Definition: Importer.h:539
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:533
void parse_string_array(const std::string &s, const import_export::CopyParams &copy_params, std::vector< std::string > &string_vec, bool truncate_values)
Parses given string array and inserts into given vector of strings.
Datum StringToDatum(const std::string_view s, SQLTypeInfo &ti)
Definition: Datum.cpp:337
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:529
std::optional< std::string > default_value
Definition: sqltypes.h:67
Definition: sqltypes.h:68
ArrayDatum StringToArray(const std::string &s, const SQLTypeInfo &ti, const CopyParams &copy_params)
Definition: Importer.cpp:315
int64_t convert_decimal_value_to_scale(const int64_t decimal_value, const SQLTypeInfo &type_info, const SQLTypeInfo &new_type_info)
Definition: Datum.cpp:622
Definition: sqltypes.h:56
#define IS_STRING(T)
Definition: sqltypes.h:297
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:530
#define CHECK(condition)
Definition: Logger.h:222
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
static constexpr size_t MAX_STRLEN
Definition: sqltypes.h:60
SQLTypeInfo columnType
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:387
std::string columnName
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:537

+ Here is the call graph for this function:

void import_export::TypedImportBuffer::addDictEncodedString ( const std::vector< std::string > &  string_vec)

Definition at line 488 of file Importer.cpp.

References CHECK, column_desc_, ColumnDescriptor::columnName, ColumnDescriptor::columnType, logger::ERROR, SQLTypeInfo::get_size(), getColumnDesc(), StringDictionary::getOrAddBulk(), LOG, StringDictionary::MAX_STRLEN, string_dict_, string_dict_i16_buffer_, string_dict_i32_buffer_, and string_dict_i8_buffer_.

488  {
490  std::vector<std::string_view> string_view_vec;
491  string_view_vec.reserve(string_vec.size());
492  for (const auto& str : string_vec) {
493  if (str.size() > StringDictionary::MAX_STRLEN) {
494  std::ostringstream oss;
495  oss << "while processing dictionary for column " << getColumnDesc()->columnName
496  << " a string was detected too long for encoding, string length = "
497  << str.size() << ", first 100 characters are '" << str.substr(0, 100) << "'";
498  throw std::runtime_error(oss.str());
499  }
500  string_view_vec.push_back(str);
501  }
502  try {
503  switch (column_desc_->columnType.get_size()) {
504  case 1:
505  string_dict_i8_buffer_->resize(string_view_vec.size());
506  string_dict_->getOrAddBulk(string_view_vec, string_dict_i8_buffer_->data());
507  break;
508  case 2:
509  string_dict_i16_buffer_->resize(string_view_vec.size());
510  string_dict_->getOrAddBulk(string_view_vec, string_dict_i16_buffer_->data());
511  break;
512  case 4:
513  string_dict_i32_buffer_->resize(string_view_vec.size());
514  string_dict_->getOrAddBulk(string_view_vec, string_dict_i32_buffer_->data());
515  break;
516  default:
517  CHECK(false);
518  }
519  } catch (std::exception& e) {
520  std::ostringstream oss;
521  oss << "while processing dictionary for column " << getColumnDesc()->columnName
522  << " : " << e.what();
523  LOG(ERROR) << oss.str();
524  throw std::runtime_error(oss.str());
525  }
526 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:390
#define LOG(tag)
Definition: Logger.h:216
StringDictionary * string_dict_
Definition: Importer.h:548
std::vector< uint8_t > * string_dict_i8_buffer_
Definition: Importer.h:542
void getOrAddBulk(const std::vector< String > &string_vec, T *encoded_vec)
std::vector< int32_t > * string_dict_i32_buffer_
Definition: Importer.h:544
const ColumnDescriptor * column_desc_
Definition: Importer.h:547
std::vector< uint16_t > * string_dict_i16_buffer_
Definition: Importer.h:543
#define CHECK(condition)
Definition: Logger.h:222
const ColumnDescriptor * getColumnDesc() const
Definition: Importer.h:319
static constexpr size_t MAX_STRLEN
SQLTypeInfo columnType
std::string columnName

+ Here is the call graph for this function:

void import_export::TypedImportBuffer::addDictEncodedStringArray ( const std::vector< OptionalStringVector > &  string_array_vec)
inline

Definition at line 269 of file Importer.h.

References anonymous_namespace{Utm.h}::a, CHECK, checked_malloc(), column_desc_, ColumnDescriptor::columnType, import_export::ImporterUtils::composeNullArray(), StringDictionary::getOrAddBulkArray(), StringDictionary::MAX_STRLEN, string_array_dict_buffer_, and string_dict_.

270  {
272 
273  // first check data is ok
274  for (auto& p : string_array_vec) {
275  if (!p) {
276  continue;
277  }
278  for (const auto& str : *p) {
279  if (str.size() > StringDictionary::MAX_STRLEN) {
280  throw std::runtime_error("String too long for dictionary encoding.");
281  }
282  }
283  }
284 
285  // to avoid copying, create a string view of each string in the
286  // `string_array_vec` where the array holding the string is *not null*
287  std::vector<std::vector<std::string_view>> string_view_array_vec;
288  for (auto& p : string_array_vec) {
289  if (!p) {
290  continue;
291  }
292  auto& array = string_view_array_vec.emplace_back();
293  for (const auto& str : *p) {
294  array.emplace_back(str);
295  }
296  }
297 
298  std::vector<std::vector<int32_t>> ids_array(0);
299  string_dict_->getOrAddBulkArray(string_view_array_vec, ids_array);
300 
301  size_t i, j;
302  for (i = 0, j = 0; i < string_array_vec.size(); ++i) {
303  if (!string_array_vec[i]) { // null array
304  string_array_dict_buffer_->push_back(
306  } else { // non-null array
307  auto& p = ids_array[j++];
308  size_t len = p.size() * sizeof(int32_t);
309  auto a = static_cast<int32_t*>(checked_malloc(len));
310  memcpy(a, &p[0], len);
311  string_array_dict_buffer_->push_back(
312  ArrayDatum(len, reinterpret_cast<int8_t*>(a), false));
313  }
314  }
315  }
StringDictionary * string_dict_
Definition: Importer.h:548
static ArrayDatum composeNullArray(const SQLTypeInfo &ti)
Definition: Importer.cpp:394
constexpr double a
Definition: Utm.h:32
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:217
std::vector< ArrayDatum > * string_array_dict_buffer_
Definition: Importer.h:545
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:45
void getOrAddBulkArray(const std::vector< std::vector< String >> &string_array_vec, std::vector< std::vector< int32_t >> &ids_array_vec)
const ColumnDescriptor * column_desc_
Definition: Importer.h:547
#define CHECK(condition)
Definition: Logger.h:222
static constexpr size_t MAX_STRLEN
SQLTypeInfo columnType

+ Here is the call graph for this function:

void import_export::TypedImportBuffer::addDouble ( const double  v)
inline

Definition at line 250 of file Importer.h.

References double_buffer_.

Referenced by add_value().

250 { double_buffer_->push_back(v); }
std::vector< double > * double_buffer_
Definition: Importer.h:535

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addFloat ( const float  v)
inline

Definition at line 248 of file Importer.h.

References float_buffer_.

Referenced by add_value().

248 { float_buffer_->push_back(v); }
std::vector< float > * float_buffer_
Definition: Importer.h:534

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addGeoString ( const std::string_view  v)
inline

Definition at line 254 of file Importer.h.

References geo_string_buffer_.

Referenced by add_value().

254 { geo_string_buffer_->emplace_back(v); }
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:537

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addInt ( const int32_t  v)
inline

Definition at line 244 of file Importer.h.

References int_buffer_.

Referenced by add_value().

244 { int_buffer_->push_back(v); }
std::vector< int32_t > * int_buffer_
Definition: Importer.h:532

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addSmallint ( const int16_t  v)
inline

Definition at line 242 of file Importer.h.

References smallint_buffer_.

Referenced by add_value().

242 { smallint_buffer_->push_back(v); }
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:531

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addString ( const std::string_view  v)
inline

Definition at line 252 of file Importer.h.

References string_buffer_.

Referenced by add_value().

252 { string_buffer_->emplace_back(v); }
std::vector< std::string > * string_buffer_
Definition: Importer.h:536

+ Here is the caller graph for this function:

OptionalStringVector& import_export::TypedImportBuffer::addStringArray ( )
inline

Definition at line 258 of file Importer.h.

References string_array_buffer_.

Referenced by add_value(), and add_values().

258  {
259  string_array_buffer_->emplace_back(std::vector<std::string>{});
260  return string_array_buffer_->back();
261  }
std::vector< OptionalStringVector > * string_array_buffer_
Definition: Importer.h:539

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addStringArray ( const OptionalStringVector arr)
inline

Definition at line 263 of file Importer.h.

References string_array_buffer_.

263  {
264  string_array_buffer_->push_back(arr);
265  }
std::vector< OptionalStringVector > * string_array_buffer_
Definition: Importer.h:539
void import_export::TypedImportBuffer::addTinyint ( const int8_t  v)
inline

Definition at line 240 of file Importer.h.

References tinyint_buffer_.

Referenced by add_value().

240 { tinyint_buffer_->push_back(v); }
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:530

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::clear ( )
inline

Definition at line 411 of file Importer.h.

References array_buffer_, bigint_buffer_, bool_buffer_, CHECK, column_desc_, ColumnDescriptor::columnType, double_buffer_, float_buffer_, geo_string_buffer_, SQLTypeInfo::get_compression(), SQLTypeInfo::get_size(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), int_buffer_, IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kENCODING_DICT, kFLOAT, kINT, kLINESTRING, kMULTILINESTRING, kMULTIPOINT, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, smallint_buffer_, string_array_buffer_, string_array_dict_buffer_, string_buffer_, string_dict_i16_buffer_, string_dict_i32_buffer_, string_dict_i8_buffer_, and tinyint_buffer_.

411  {
412  switch (column_desc_->columnType.get_type()) {
413  case kBOOLEAN: {
414  bool_buffer_->clear();
415  break;
416  }
417  case kTINYINT: {
418  tinyint_buffer_->clear();
419  break;
420  }
421  case kSMALLINT: {
422  smallint_buffer_->clear();
423  break;
424  }
425  case kINT: {
426  int_buffer_->clear();
427  break;
428  }
429  case kBIGINT:
430  case kNUMERIC:
431  case kDECIMAL: {
432  bigint_buffer_->clear();
433  break;
434  }
435  case kFLOAT: {
436  float_buffer_->clear();
437  break;
438  }
439  case kDOUBLE: {
440  double_buffer_->clear();
441  break;
442  }
443  case kTEXT:
444  case kVARCHAR:
445  case kCHAR: {
446  string_buffer_->clear();
448  switch (column_desc_->columnType.get_size()) {
449  case 1:
450  string_dict_i8_buffer_->clear();
451  break;
452  case 2:
453  string_dict_i16_buffer_->clear();
454  break;
455  case 4:
456  string_dict_i32_buffer_->clear();
457  break;
458  default:
459  CHECK(false);
460  }
461  }
462  break;
463  }
464  case kDATE:
465  case kTIME:
466  case kTIMESTAMP:
467  bigint_buffer_->clear();
468  break;
469  case kARRAY: {
471  string_array_buffer_->clear();
472  string_array_dict_buffer_->clear();
473  } else {
474  array_buffer_->clear();
475  }
476  break;
477  }
478  case kPOINT:
479  case kMULTIPOINT:
480  case kLINESTRING:
481  case kMULTILINESTRING:
482  case kPOLYGON:
483  case kMULTIPOLYGON:
484  geo_string_buffer_->clear();
485  break;
486  default:
487  CHECK(false);
488  }
489  }
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:381
HOST DEVICE int get_size() const
Definition: sqltypes.h:390
Definition: sqltypes.h:64
std::vector< std::string > * string_buffer_
Definition: Importer.h:536
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:538
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:531
std::vector< float > * float_buffer_
Definition: Importer.h:534
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:380
std::vector< double > * double_buffer_
Definition: Importer.h:535
std::vector< int32_t > * int_buffer_
Definition: Importer.h:532
std::vector< ArrayDatum > * string_array_dict_buffer_
Definition: Importer.h:545
std::vector< uint8_t > * string_dict_i8_buffer_
Definition: Importer.h:542
std::vector< OptionalStringVector > * string_array_buffer_
Definition: Importer.h:539
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:533
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:529
Definition: sqltypes.h:67
Definition: sqltypes.h:68
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:388
std::vector< int32_t > * string_dict_i32_buffer_
Definition: Importer.h:544
Definition: sqltypes.h:56
#define IS_STRING(T)
Definition: sqltypes.h:297
const ColumnDescriptor * column_desc_
Definition: Importer.h:547
std::vector< uint16_t > * string_dict_i16_buffer_
Definition: Importer.h:543
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:530
#define CHECK(condition)
Definition: Logger.h:222
Definition: sqltypes.h:60
SQLTypeInfo columnType
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:537

+ Here is the call graph for this function:

template<typename DATA_TYPE >
size_t import_export::TypedImportBuffer::convert_arrow_val_to_import_buffer ( const ColumnDescriptor cd,
const arrow::Array &  array,
std::vector< DATA_TYPE > &  buffer,
const ArraySliceRange slice_range,
BadRowsTracker *const  bad_rows_tracker 
)

Referenced by add_arrow_values().

+ Here is the caller graph for this function:

template<typename DATA_TYPE >
size_t import_export::TypedImportBuffer::convert_arrow_val_to_import_buffer ( const ColumnDescriptor cd,
const Array array,
std::vector< DATA_TYPE > &  buffer,
const ArraySliceRange slice_range,
import_export::BadRowsTracker *const  bad_rows_tracker 
)

Definition at line 788 of file Importer.cpp.

References col_idx, anonymous_namespace{ArrowImporter.h}::error_context(), geo_string_buffer_, SQLTypeInfo::get_type(), import_export::Importer::getCatalog(), Geospatial::GeoTypesFactory::getGeoColumns(), Geospatial::GeoTypesFactory::getNullGeoColumns(), import_buffers, import_export::BadRowsTracker::importer, import_export::BadRowsTracker::mutex, import_export::BadRowsTracker::rows, import_export::Importer::set_geo_physical_import_buffer(), and anonymous_namespace{ArrowImporter.h}::value_getter().

793  {
794  auto data =
795  std::make_unique<DataBuffer<DATA_TYPE>>(cd, array, buffer, bad_rows_tracker);
796  auto f_value_getter = value_getter(array, cd, bad_rows_tracker);
797  std::function<void(const int64_t)> f_add_geo_phy_cols = [&](const int64_t row) {};
798  if (bad_rows_tracker && cd->columnType.is_geometry()) {
799  f_add_geo_phy_cols = [&](const int64_t row) {
800  // Populate physical columns (ref. DBHandler::load_table)
801  std::vector<double> coords, bounds;
802  std::vector<int> ring_sizes, poly_rings;
803  int render_group = 0;
804  SQLTypeInfo ti;
805  // replace any unexpected exception from getGeoColumns or other
806  // on this path with a GeoImportException so that we wont over
807  // push a null to the logical column...
808  try {
809  SQLTypeInfo import_ti{ti};
810  if (array.IsNull(row)) {
812  import_ti, coords, bounds, ring_sizes, poly_rings, false);
813  } else {
814  arrow_throw_if<GeoImportException>(
816  ti,
817  coords,
818  bounds,
819  ring_sizes,
820  poly_rings,
821  false),
822  error_context(cd, bad_rows_tracker) + "Invalid geometry");
823  arrow_throw_if<GeoImportException>(
824  cd->columnType.get_type() != ti.get_type(),
825  error_context(cd, bad_rows_tracker) + "Geometry type mismatch");
826  }
827  auto col_idx_workpad = col_idx; // what a pitfall!!
829  bad_rows_tracker->importer->getCatalog(),
830  cd,
832  col_idx_workpad,
833  coords,
834  bounds,
835  ring_sizes,
836  poly_rings,
837  render_group);
838  } catch (GeoImportException&) {
839  throw;
840  } catch (std::runtime_error& e) {
841  throw GeoImportException(e.what());
842  } catch (const std::exception& e) {
843  throw GeoImportException(e.what());
844  } catch (...) {
845  throw GeoImportException("unknown exception");
846  }
847  };
848  }
849  auto f_mark_a_bad_row = [&](const auto row) {
850  std::unique_lock<std::mutex> lck(bad_rows_tracker->mutex);
851  bad_rows_tracker->rows.insert(row - slice_range.first);
852  };
853  buffer.reserve(slice_range.second - slice_range.first);
854  for (size_t row = slice_range.first; row < slice_range.second; ++row) {
855  try {
856  *data << (array.IsNull(row) ? nullptr : f_value_getter(array, row));
857  f_add_geo_phy_cols(row);
858  } catch (GeoImportException&) {
859  f_mark_a_bad_row(row);
860  } catch (ArrowImporterException&) {
861  // trace bad rows of each column; otherwise rethrow.
862  if (bad_rows_tracker) {
863  *data << nullptr;
864  f_mark_a_bad_row(row);
865  } else {
866  throw;
867  }
868  }
869  }
870  return buffer.size();
871 }
auto value_getter(const arrow::Array &array, const ColumnDescriptor *cd, import_export::BadRowsTracker *const bad_rows_tracker)
static void getNullGeoColumns(SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: Types.cpp:1309
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:380
Catalog_Namespace::Catalog & getCatalog()
Definition: Importer.h:841
std::set< int64_t > rows
Definition: Importer.h:79
std::vector< std::unique_ptr< TypedImportBuffer > > * import_buffers
Definition: Importer.h:524
static bool getGeoColumns(const std::string &wkt_or_wkb_hex, SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: Types.cpp:1079
static void set_geo_physical_import_buffer(const Catalog_Namespace::Catalog &catalog, const ColumnDescriptor *cd, std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers, size_t &col_idx, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, int render_group, const bool force_null=false)
Definition: Importer.cpp:1627
std::string error_context(const ColumnDescriptor *cd, import_export::BadRowsTracker *const bad_rows_tracker)
Definition: ArrowImporter.h:77
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:537

+ Here is the call graph for this function:

template<typename DATA_TYPE >
auto import_export::TypedImportBuffer::del_values ( std::vector< DATA_TYPE > &  buffer,
BadRowsTracker *const  bad_rows_tracker 
)
auto import_export::TypedImportBuffer::del_values ( const SQLTypes  type,
BadRowsTracker *const  bad_rows_tracker 
)
std::vector< DataBlockPtr > import_export::TypedImportBuffer::get_data_block_pointers ( const std::vector< std::unique_ptr< TypedImportBuffer >> &  import_buffers)
static

Definition at line 3014 of file Importer.cpp.

References DataBlockPtr::arraysPtr, threading_serial::async(), CHECK, CHECK_EQ, getStringArrayBuffer(), getTypeInfo(), import_buffers, SQLTypeInfo::is_number(), IS_STRING, SQLTypeInfo::is_string(), kARRAY, kBOOLEAN, kENCODING_DICT, kENCODING_NONE, DataBlockPtr::numbersPtr, run_benchmark_import::result, and DataBlockPtr::stringsPtr.

Referenced by import_export::fill_missing_columns(), import_export::Loader::loadImpl(), and import_export::Loader::loadToShard().

3015  {
3016  std::vector<DataBlockPtr> result(import_buffers.size());
3017  std::vector<std::pair<const size_t, std::future<int8_t*>>>
3018  encoded_data_block_ptrs_futures;
3019  // make all async calls to string dictionary here and then continue execution
3020  for (size_t buf_idx = 0; buf_idx < import_buffers.size(); buf_idx++) {
3021  if (import_buffers[buf_idx]->getTypeInfo().is_string() &&
3022  import_buffers[buf_idx]->getTypeInfo().get_compression() != kENCODING_NONE) {
3023  auto string_payload_ptr = import_buffers[buf_idx]->getStringBuffer();
3024  CHECK_EQ(kENCODING_DICT, import_buffers[buf_idx]->getTypeInfo().get_compression());
3025 
3026  encoded_data_block_ptrs_futures.emplace_back(std::make_pair(
3027  buf_idx,
3028  std::async(std::launch::async, [buf_idx, &import_buffers, string_payload_ptr] {
3029  import_buffers[buf_idx]->addDictEncodedString(*string_payload_ptr);
3030  return import_buffers[buf_idx]->getStringDictBuffer();
3031  })));
3032  }
3033  }
3034 
3035  for (size_t buf_idx = 0; buf_idx < import_buffers.size(); buf_idx++) {
3036  DataBlockPtr p;
3037  if (import_buffers[buf_idx]->getTypeInfo().is_number() ||
3038  import_buffers[buf_idx]->getTypeInfo().is_time() ||
3039  import_buffers[buf_idx]->getTypeInfo().get_type() == kBOOLEAN) {
3040  p.numbersPtr = import_buffers[buf_idx]->getAsBytes();
3041  } else if (import_buffers[buf_idx]->getTypeInfo().is_string()) {
3042  auto string_payload_ptr = import_buffers[buf_idx]->getStringBuffer();
3043  if (import_buffers[buf_idx]->getTypeInfo().get_compression() == kENCODING_NONE) {
3044  p.stringsPtr = string_payload_ptr;
3045  } else {
3046  // This condition means we have column which is ENCODED string. We already made
3047  // Async request to gain the encoded integer values above so we should skip this
3048  // iteration and continue.
3049  continue;
3050  }
3051  } else if (import_buffers[buf_idx]->getTypeInfo().is_geometry()) {
3052  auto geo_payload_ptr = import_buffers[buf_idx]->getGeoStringBuffer();
3053  p.stringsPtr = geo_payload_ptr;
3054  } else {
3055  CHECK(import_buffers[buf_idx]->getTypeInfo().get_type() == kARRAY);
3056  if (IS_STRING(import_buffers[buf_idx]->getTypeInfo().get_subtype())) {
3057  CHECK(import_buffers[buf_idx]->getTypeInfo().get_compression() == kENCODING_DICT);
3058  import_buffers[buf_idx]->addDictEncodedStringArray(
3059  *import_buffers[buf_idx]->getStringArrayBuffer());
3060  p.arraysPtr = import_buffers[buf_idx]->getStringArrayDictBuffer();
3061  } else {
3062  p.arraysPtr = import_buffers[buf_idx]->getArrayBuffer();
3063  }
3064  }
3065  result[buf_idx] = p;
3066  }
3067 
3068  // wait for the async requests we made for string dictionary
3069  for (auto& encoded_ptr_future : encoded_data_block_ptrs_futures) {
3070  result[encoded_ptr_future.first].numbersPtr = encoded_ptr_future.second.get();
3071  }
3072  return result;
3073 }
#define CHECK_EQ(x, y)
Definition: Logger.h:230
const SQLTypeInfo & getTypeInfo() const
Definition: Importer.h:317
std::vector< std::string > * stringsPtr
Definition: sqltypes.h:222
std::vector< ArrayDatum > * arraysPtr
Definition: sqltypes.h:223
bool is_number() const
Definition: sqltypes.h:581
future< Result > async(Fn &&fn, Args &&...args)
std::vector< std::unique_ptr< TypedImportBuffer > > * import_buffers
Definition: Importer.h:524
#define IS_STRING(T)
Definition: sqltypes.h:297
#define CHECK(condition)
Definition: Logger.h:222
std::vector< OptionalStringVector > * getStringArrayBuffer() const
Definition: Importer.h:383
bool is_string() const
Definition: sqltypes.h:576
int8_t * numbersPtr
Definition: sqltypes.h:221

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector<ArrayDatum>* import_export::TypedImportBuffer::getArrayBuffer ( ) const
inline

Definition at line 381 of file Importer.h.

References array_buffer_.

381 { return array_buffer_; }
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:538
int8_t* import_export::TypedImportBuffer::getAsBytes ( ) const
inline

Definition at line 323 of file Importer.h.

References bigint_buffer_, bool_buffer_, column_desc_, ColumnDescriptor::columnType, double_buffer_, float_buffer_, SQLTypeInfo::get_type(), int_buffer_, kBIGINT, kBOOLEAN, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kNUMERIC, kSMALLINT, kTIME, kTIMESTAMP, kTINYINT, smallint_buffer_, and tinyint_buffer_.

Referenced by import_export::anonymous_namespace{Importer.cpp}::double_value_at(), import_export::anonymous_namespace{Importer.cpp}::float_value_at(), and import_export::anonymous_namespace{Importer.cpp}::int_value_at().

323  {
324  switch (column_desc_->columnType.get_type()) {
325  case kBOOLEAN:
326  return reinterpret_cast<int8_t*>(bool_buffer_->data());
327  case kTINYINT:
328  return reinterpret_cast<int8_t*>(tinyint_buffer_->data());
329  case kSMALLINT:
330  return reinterpret_cast<int8_t*>(smallint_buffer_->data());
331  case kINT:
332  return reinterpret_cast<int8_t*>(int_buffer_->data());
333  case kBIGINT:
334  case kNUMERIC:
335  case kDECIMAL:
336  return reinterpret_cast<int8_t*>(bigint_buffer_->data());
337  case kFLOAT:
338  return reinterpret_cast<int8_t*>(float_buffer_->data());
339  case kDOUBLE:
340  return reinterpret_cast<int8_t*>(double_buffer_->data());
341  case kDATE:
342  case kTIME:
343  case kTIMESTAMP:
344  return reinterpret_cast<int8_t*>(bigint_buffer_->data());
345  default:
346  abort();
347  }
348  }
Definition: sqltypes.h:64
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:531
std::vector< float > * float_buffer_
Definition: Importer.h:534
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:380
std::vector< double > * double_buffer_
Definition: Importer.h:535
std::vector< int32_t > * int_buffer_
Definition: Importer.h:532
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:533
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:529
Definition: sqltypes.h:68
const ColumnDescriptor * column_desc_
Definition: Importer.h:547
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:530
Definition: sqltypes.h:60
SQLTypeInfo columnType

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const ColumnDescriptor* import_export::TypedImportBuffer::getColumnDesc ( ) const
inline

Definition at line 319 of file Importer.h.

References column_desc_.

Referenced by addDictEncodedString(), foreign_storage::anonymous_namespace{InternalStorageStatsDataWrapper.cpp}::set_null(), foreign_storage::anonymous_namespace{InternalCatalogDataWrapper.cpp}::set_null(), and foreign_storage::anonymous_namespace{InternalMemoryStatsDataWrapper.cpp}::set_null().

319 { return column_desc_; }
const ColumnDescriptor * column_desc_
Definition: Importer.h:547

+ Here is the caller graph for this function:

size_t import_export::TypedImportBuffer::getElementSize ( ) const
inline

Definition at line 350 of file Importer.h.

References column_desc_, ColumnDescriptor::columnType, SQLTypeInfo::get_type(), kBIGINT, kBOOLEAN, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kNUMERIC, kSMALLINT, kTIME, kTIMESTAMP, and kTINYINT.

350  {
351  switch (column_desc_->columnType.get_type()) {
352  case kBOOLEAN:
353  return sizeof((*bool_buffer_)[0]);
354  case kTINYINT:
355  return sizeof((*tinyint_buffer_)[0]);
356  case kSMALLINT:
357  return sizeof((*smallint_buffer_)[0]);
358  case kINT:
359  return sizeof((*int_buffer_)[0]);
360  case kBIGINT:
361  case kNUMERIC:
362  case kDECIMAL:
363  return sizeof((*bigint_buffer_)[0]);
364  case kFLOAT:
365  return sizeof((*float_buffer_)[0]);
366  case kDOUBLE:
367  return sizeof((*double_buffer_)[0]);
368  case kDATE:
369  case kTIME:
370  case kTIMESTAMP:
371  return sizeof((*bigint_buffer_)[0]);
372  default:
373  abort();
374  }
375  }
Definition: sqltypes.h:64
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:380
Definition: sqltypes.h:68
const ColumnDescriptor * column_desc_
Definition: Importer.h:547
Definition: sqltypes.h:60
SQLTypeInfo columnType

+ Here is the call graph for this function:

std::vector<std::string>* import_export::TypedImportBuffer::getGeoStringBuffer ( ) const
inline

Definition at line 379 of file Importer.h.

References geo_string_buffer_.

379 { return geo_string_buffer_; }
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:537
std::vector<OptionalStringVector>* import_export::TypedImportBuffer::getStringArrayBuffer ( ) const
inline

Definition at line 383 of file Importer.h.

References string_array_buffer_.

Referenced by get_data_block_pointers().

383  {
384  return string_array_buffer_;
385  }
std::vector< OptionalStringVector > * string_array_buffer_
Definition: Importer.h:539

+ Here is the caller graph for this function:

std::vector<ArrayDatum>* import_export::TypedImportBuffer::getStringArrayDictBuffer ( ) const
inline

Definition at line 387 of file Importer.h.

References string_array_dict_buffer_.

387  {
389  }
std::vector< ArrayDatum > * string_array_dict_buffer_
Definition: Importer.h:545
std::vector<std::string>* import_export::TypedImportBuffer::getStringBuffer ( ) const
inline

Definition at line 377 of file Importer.h.

References string_buffer_.

377 { return string_buffer_; }
std::vector< std::string > * string_buffer_
Definition: Importer.h:536
int8_t* import_export::TypedImportBuffer::getStringDictBuffer ( ) const
inline

Definition at line 391 of file Importer.h.

References column_desc_, ColumnDescriptor::columnType, SQLTypeInfo::get_size(), string_dict_i16_buffer_, string_dict_i32_buffer_, and string_dict_i8_buffer_.

Referenced by import_export::anonymous_namespace{Importer.cpp}::int_value_at().

391  {
392  switch (column_desc_->columnType.get_size()) {
393  case 1:
394  return reinterpret_cast<int8_t*>(string_dict_i8_buffer_->data());
395  case 2:
396  return reinterpret_cast<int8_t*>(string_dict_i16_buffer_->data());
397  case 4:
398  return reinterpret_cast<int8_t*>(string_dict_i32_buffer_->data());
399  default:
400  abort();
401  }
402  }
HOST DEVICE int get_size() const
Definition: sqltypes.h:390
std::vector< uint8_t > * string_dict_i8_buffer_
Definition: Importer.h:542
std::vector< int32_t > * string_dict_i32_buffer_
Definition: Importer.h:544
const ColumnDescriptor * column_desc_
Definition: Importer.h:547
std::vector< uint16_t > * string_dict_i16_buffer_
Definition: Importer.h:543
SQLTypeInfo columnType

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

StringDictionary* import_export::TypedImportBuffer::getStringDictionary ( ) const
inline

Definition at line 321 of file Importer.h.

References string_dict_.

321 { return string_dict_; }
StringDictionary * string_dict_
Definition: Importer.h:548
const SQLTypeInfo& import_export::TypedImportBuffer::getTypeInfo ( ) const
inline

Definition at line 317 of file Importer.h.

References column_desc_, and ColumnDescriptor::columnType.

Referenced by import_export::anonymous_namespace{Importer.cpp}::double_value_at(), import_export::anonymous_namespace{Importer.cpp}::float_value_at(), get_data_block_pointers(), and import_export::anonymous_namespace{Importer.cpp}::int_value_at().

317 { return column_desc_->columnType; }
const ColumnDescriptor * column_desc_
Definition: Importer.h:547
SQLTypeInfo columnType

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::pop_value ( )

Definition at line 726 of file Importer.cpp.

References array_buffer_, bigint_buffer_, bool_buffer_, CHECK, column_desc_, ColumnDescriptor::columnType, decimal_to_int_type(), double_buffer_, float_buffer_, geo_string_buffer_, SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), int_buffer_, SQLTypeInfo::is_decimal(), IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTILINESTRING, kMULTIPOINT, kMULTIPOLYGON, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, smallint_buffer_, string_array_buffer_, string_buffer_, tinyint_buffer_, and run_benchmark_import::type.

726  {
727  const auto type = column_desc_->columnType.is_decimal()
729  : column_desc_->columnType.get_type();
730  switch (type) {
731  case kBOOLEAN:
732  bool_buffer_->pop_back();
733  break;
734  case kTINYINT:
735  tinyint_buffer_->pop_back();
736  break;
737  case kSMALLINT:
738  smallint_buffer_->pop_back();
739  break;
740  case kINT:
741  int_buffer_->pop_back();
742  break;
743  case kBIGINT:
744  bigint_buffer_->pop_back();
745  break;
746  case kFLOAT:
747  float_buffer_->pop_back();
748  break;
749  case kDOUBLE:
750  double_buffer_->pop_back();
751  break;
752  case kTEXT:
753  case kVARCHAR:
754  case kCHAR:
755  string_buffer_->pop_back();
756  break;
757  case kDATE:
758  case kTIME:
759  case kTIMESTAMP:
760  bigint_buffer_->pop_back();
761  break;
762  case kARRAY:
764  string_array_buffer_->pop_back();
765  } else {
766  array_buffer_->pop_back();
767  }
768  break;
769  case kPOINT:
770  case kMULTIPOINT:
771  case kLINESTRING:
772  case kMULTILINESTRING:
773  case kPOLYGON:
774  case kMULTIPOLYGON:
775  geo_string_buffer_->pop_back();
776  break;
777  default:
778  CHECK(false) << "TypedImportBuffer::pop_value() does not support type " << type;
779  }
780 }
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:381
Definition: sqltypes.h:64
std::vector< std::string > * string_buffer_
Definition: Importer.h:536
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:538
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:531
std::vector< float > * float_buffer_
Definition: Importer.h:534
std::vector< double > * double_buffer_
Definition: Importer.h:535
std::vector< int32_t > * int_buffer_
Definition: Importer.h:532
std::vector< OptionalStringVector > * string_array_buffer_
Definition: Importer.h:539
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:533
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:529
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:559
Definition: sqltypes.h:67
Definition: sqltypes.h:68
Definition: sqltypes.h:56
#define IS_STRING(T)
Definition: sqltypes.h:297
const ColumnDescriptor * column_desc_
Definition: Importer.h:547
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:530
#define CHECK(condition)
Definition: Logger.h:222
Definition: sqltypes.h:60
SQLTypeInfo columnType
bool is_decimal() const
Definition: sqltypes.h:579
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:537

+ Here is the call graph for this function:

bool import_export::TypedImportBuffer::stringDictCheckpoint ( )
inline

Definition at line 404 of file Importer.h.

References StringDictionary::checkpoint(), and string_dict_.

404  {
405  if (string_dict_ == nullptr) {
406  return true;
407  }
408  return string_dict_->checkpoint();
409  }
StringDictionary * string_dict_
Definition: Importer.h:548
bool checkpoint() noexcept

+ Here is the call graph for this function:

Member Data Documentation

union { ... }
union { ... }
std::vector<ArrayDatum>* import_export::TypedImportBuffer::array_buffer_
std::vector<int64_t>* import_export::TypedImportBuffer::bigint_buffer_
std::vector<int8_t>* import_export::TypedImportBuffer::bool_buffer_
size_t import_export::TypedImportBuffer::col_idx

Definition at line 525 of file Importer.h.

Referenced by convert_arrow_val_to_import_buffer().

const ColumnDescriptor* import_export::TypedImportBuffer::column_desc_
private
std::vector<double>* import_export::TypedImportBuffer::double_buffer_
std::vector<float>* import_export::TypedImportBuffer::float_buffer_
std::vector<std::string>* import_export::TypedImportBuffer::geo_string_buffer_
std::vector<std::unique_ptr<TypedImportBuffer> >* import_export::TypedImportBuffer::import_buffers

Definition at line 524 of file Importer.h.

Referenced by convert_arrow_val_to_import_buffer(), and get_data_block_pointers().

std::vector<int32_t>* import_export::TypedImportBuffer::int_buffer_
std::vector<int16_t>* import_export::TypedImportBuffer::smallint_buffer_
std::vector<OptionalStringVector>* import_export::TypedImportBuffer::string_array_buffer_
std::vector<ArrayDatum>* import_export::TypedImportBuffer::string_array_dict_buffer_
std::vector<std::string>* import_export::TypedImportBuffer::string_buffer_
StringDictionary* import_export::TypedImportBuffer::string_dict_
private
std::vector<uint16_t>* import_export::TypedImportBuffer::string_dict_i16_buffer_
std::vector<int32_t>* import_export::TypedImportBuffer::string_dict_i32_buffer_
std::vector<uint8_t>* import_export::TypedImportBuffer::string_dict_i8_buffer_
std::vector<int8_t>* import_export::TypedImportBuffer::tinyint_buffer_

The documentation for this class was generated from the following files: