OmniSciDB  8a228a1076
import_export::TypedImportBuffer Class Reference

#include <Importer.h>

+ Inheritance diagram for import_export::TypedImportBuffer:
+ Collaboration diagram for import_export::TypedImportBuffer:

Public Member Functions

 TypedImportBuffer (const ColumnDescriptor *col_desc, StringDictionary *string_dict)
 
 ~TypedImportBuffer ()
 
void addBoolean (const int8_t v)
 
void addTinyint (const int8_t v)
 
void addSmallint (const int16_t v)
 
void addInt (const int32_t v)
 
void addBigint (const int64_t v)
 
void addFloat (const float v)
 
void addDouble (const double v)
 
void addString (const std::string_view v)
 
void addGeoString (const std::string_view v)
 
void addArray (const ArrayDatum &v)
 
std::vector< std::string > & addStringArray ()
 
void addStringArray (const std::vector< std::string > &arr)
 
void addDictEncodedString (const std::vector< std::string > &string_vec)
 
void addDictEncodedStringArray (const std::vector< std::vector< std::string >> &string_array_vec)
 
const SQLTypeInfogetTypeInfo () const
 
const ColumnDescriptorgetColumnDesc () const
 
StringDictionarygetStringDictionary () const
 
int8_t * getAsBytes () const
 
size_t getElementSize () const
 
std::vector< std::string > * getStringBuffer () const
 
std::vector< std::string > * getGeoStringBuffer () const
 
std::vector< ArrayDatum > * getArrayBuffer () const
 
std::vector< std::vector< std::string > > * getStringArrayBuffer () const
 
std::vector< ArrayDatum > * getStringArrayDictBuffer () const
 
int8_t * getStringDictBuffer () const
 
bool stringDictCheckpoint ()
 
void clear ()
 
size_t add_values (const ColumnDescriptor *cd, const TColumn &data)
 
size_t add_arrow_values (const ColumnDescriptor *cd, const arrow::Array &data, const bool exact_type_match, const ArraySliceRange &slice_range, BadRowsTracker *bad_rows_tracker)
 
void add_value (const ColumnDescriptor *cd, const std::string_view val, const bool is_null, const CopyParams &copy_params, const int64_t replicate_count=0)
 
void add_value (const ColumnDescriptor *cd, const TDatum &val, const bool is_null, const int64_t replicate_count=0)
 
void pop_value ()
 
int64_t get_replicate_count () const
 
void set_replicate_count (const int64_t replicate_count)
 
template<typename DATA_TYPE >
size_t convert_arrow_val_to_import_buffer (const ColumnDescriptor *cd, const arrow::Array &array, std::vector< DATA_TYPE > &buffer, const ArraySliceRange &slice_range, BadRowsTracker *const bad_rows_tracker)
 
template<typename DATA_TYPE >
auto del_values (std::vector< DATA_TYPE > &buffer, BadRowsTracker *const bad_rows_tracker)
 
auto del_values (const SQLTypes type, BadRowsTracker *const bad_rows_tracker)
 
template<typename DATA_TYPE >
size_t convert_arrow_val_to_import_buffer (const ColumnDescriptor *cd, const Array &array, std::vector< DATA_TYPE > &buffer, const ArraySliceRange &slice_range, import_export::BadRowsTracker *const bad_rows_tracker)
 

Public Attributes

std::vector< std::unique_ptr< TypedImportBuffer > > * import_buffers
 
size_t col_idx
 
std::vector< int8_t > * bool_buffer_
 
std::vector< int8_t > * tinyint_buffer_
 
std::vector< int16_t > * smallint_buffer_
 
std::vector< int32_t > * int_buffer_
 
std::vector< int64_t > * bigint_buffer_
 
std::vector< float > * float_buffer_
 
std::vector< double > * double_buffer_
 
std::vector< std::string > * string_buffer_
 
std::vector< std::string > * geo_string_buffer_
 
std::vector< ArrayDatum > * array_buffer_
 
std::vector< std::vector< std::string > > * string_array_buffer_
 
std::vector< uint8_t > * string_dict_i8_buffer_
 
std::vector< uint16_t > * string_dict_i16_buffer_
 
std::vector< int32_t > * string_dict_i32_buffer_
 
std::vector< ArrayDatum > * string_array_dict_buffer_
 

Private Attributes

union {
   std::vector< int8_t > *   bool_buffer_
 
   std::vector< int8_t > *   tinyint_buffer_
 
   std::vector< int16_t > *   smallint_buffer_
 
   std::vector< int32_t > *   int_buffer_
 
   std::vector< int64_t > *   bigint_buffer_
 
   std::vector< float > *   float_buffer_
 
   std::vector< double > *   double_buffer_
 
   std::vector< std::string > *   string_buffer_
 
   std::vector< std::string > *   geo_string_buffer_
 
   std::vector< ArrayDatum > *   array_buffer_
 
   std::vector< std::vector< std::string > > *   string_array_buffer_
 
}; 
 
union {
   std::vector< uint8_t > *   string_dict_i8_buffer_
 
   std::vector< uint16_t > *   string_dict_i16_buffer_
 
   std::vector< int32_t > *   string_dict_i32_buffer_
 
   std::vector< ArrayDatum > *   string_array_dict_buffer_
 
}; 
 
const ColumnDescriptorcolumn_desc_
 
StringDictionarystring_dict_
 
size_t replicate_count_ = 0
 

Detailed Description

Definition at line 83 of file Importer.h.

Constructor & Destructor Documentation

◆ TypedImportBuffer()

import_export::TypedImportBuffer::TypedImportBuffer ( const ColumnDescriptor col_desc,
StringDictionary string_dict 
)
inline

Definition at line 85 of file Importer.h.

References CHECK, ColumnDescriptor::columnType, SQLTypeInfo::get_compression(), SQLTypeInfo::get_size(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kENCODING_DICT, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, and kVARCHAR.

86  : column_desc_(col_desc), string_dict_(string_dict) {
87  switch (col_desc->columnType.get_type()) {
88  case kBOOLEAN:
89  bool_buffer_ = new std::vector<int8_t>();
90  break;
91  case kTINYINT:
92  tinyint_buffer_ = new std::vector<int8_t>();
93  break;
94  case kSMALLINT:
95  smallint_buffer_ = new std::vector<int16_t>();
96  break;
97  case kINT:
98  int_buffer_ = new std::vector<int32_t>();
99  break;
100  case kBIGINT:
101  case kNUMERIC:
102  case kDECIMAL:
103  bigint_buffer_ = new std::vector<int64_t>();
104  break;
105  case kFLOAT:
106  float_buffer_ = new std::vector<float>();
107  break;
108  case kDOUBLE:
109  double_buffer_ = new std::vector<double>();
110  break;
111  case kTEXT:
112  case kVARCHAR:
113  case kCHAR:
114  string_buffer_ = new std::vector<std::string>();
115  if (col_desc->columnType.get_compression() == kENCODING_DICT) {
116  switch (col_desc->columnType.get_size()) {
117  case 1:
118  string_dict_i8_buffer_ = new std::vector<uint8_t>();
119  break;
120  case 2:
121  string_dict_i16_buffer_ = new std::vector<uint16_t>();
122  break;
123  case 4:
124  string_dict_i32_buffer_ = new std::vector<int32_t>();
125  break;
126  default:
127  CHECK(false);
128  }
129  }
130  break;
131  case kDATE:
132  case kTIME:
133  case kTIMESTAMP:
134  bigint_buffer_ = new std::vector<int64_t>();
135  break;
136  case kARRAY:
137  if (IS_STRING(col_desc->columnType.get_subtype())) {
139  string_array_buffer_ = new std::vector<std::vector<std::string>>();
140  string_array_dict_buffer_ = new std::vector<ArrayDatum>();
141  } else {
142  array_buffer_ = new std::vector<ArrayDatum>();
143  }
144  break;
145  case kPOINT:
146  case kLINESTRING:
147  case kPOLYGON:
148  case kMULTIPOLYGON:
149  geo_string_buffer_ = new std::vector<std::string>();
150  break;
151  default:
152  CHECK(false);
153  }
154  }
Definition: sqltypes.h:51
std::vector< std::string > * string_buffer_
Definition: Importer.h:498
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:500
StringDictionary * string_dict_
Definition: Importer.h:510
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:493
HOST DEVICE int get_size() const
Definition: sqltypes.h:269
std::vector< float > * float_buffer_
Definition: Importer.h:496
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:267
std::vector< double > * double_buffer_
Definition: Importer.h:497
std::vector< int32_t > * int_buffer_
Definition: Importer.h:494
std::vector< ArrayDatum > * string_array_dict_buffer_
Definition: Importer.h:507
std::vector< uint8_t > * string_dict_i8_buffer_
Definition: Importer.h:504
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:495
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:260
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:491
std::vector< std::vector< std::string > > * string_array_buffer_
Definition: Importer.h:501
Definition: sqltypes.h:54
Definition: sqltypes.h:55
std::vector< int32_t > * string_dict_i32_buffer_
Definition: Importer.h:506
Definition: sqltypes.h:43
#define IS_STRING(T)
Definition: sqltypes.h:173
const ColumnDescriptor * column_desc_
Definition: Importer.h:509
std::vector< uint16_t > * string_dict_i16_buffer_
Definition: Importer.h:505
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:492
#define CHECK(condition)
Definition: Logger.h:197
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:259
Definition: sqltypes.h:47
SQLTypeInfo columnType
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:499
+ Here is the call graph for this function:

◆ ~TypedImportBuffer()

import_export::TypedImportBuffer::~TypedImportBuffer ( )
inline

Definition at line 156 of file Importer.h.

References CHECK, IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kENCODING_DICT, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, and kVARCHAR.

156  {
157  switch (column_desc_->columnType.get_type()) {
158  case kBOOLEAN:
159  delete bool_buffer_;
160  break;
161  case kTINYINT:
162  delete tinyint_buffer_;
163  break;
164  case kSMALLINT:
165  delete smallint_buffer_;
166  break;
167  case kINT:
168  delete int_buffer_;
169  break;
170  case kBIGINT:
171  case kNUMERIC:
172  case kDECIMAL:
173  delete bigint_buffer_;
174  break;
175  case kFLOAT:
176  delete float_buffer_;
177  break;
178  case kDOUBLE:
179  delete double_buffer_;
180  break;
181  case kTEXT:
182  case kVARCHAR:
183  case kCHAR:
184  delete string_buffer_;
186  switch (column_desc_->columnType.get_size()) {
187  case 1:
188  delete string_dict_i8_buffer_;
189  break;
190  case 2:
192  break;
193  case 4:
195  break;
196  }
197  }
198  break;
199  case kDATE:
200  case kTIME:
201  case kTIMESTAMP:
202  delete bigint_buffer_;
203  break;
204  case kARRAY:
206  delete string_array_buffer_;
208  } else {
209  delete array_buffer_;
210  }
211  break;
212  case kPOINT:
213  case kLINESTRING:
214  case kPOLYGON:
215  case kMULTIPOLYGON:
216  delete geo_string_buffer_;
217  break;
218  default:
219  CHECK(false);
220  }
221  }
Definition: sqltypes.h:51
std::vector< std::string > * string_buffer_
Definition: Importer.h:498
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:500
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:493
HOST DEVICE int get_size() const
Definition: sqltypes.h:269
std::vector< float > * float_buffer_
Definition: Importer.h:496
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:267
std::vector< double > * double_buffer_
Definition: Importer.h:497
std::vector< int32_t > * int_buffer_
Definition: Importer.h:494
std::vector< ArrayDatum > * string_array_dict_buffer_
Definition: Importer.h:507
std::vector< uint8_t > * string_dict_i8_buffer_
Definition: Importer.h:504
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:495
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:260
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:491
std::vector< std::vector< std::string > > * string_array_buffer_
Definition: Importer.h:501
Definition: sqltypes.h:54
Definition: sqltypes.h:55
std::vector< int32_t > * string_dict_i32_buffer_
Definition: Importer.h:506
Definition: sqltypes.h:43
#define IS_STRING(T)
Definition: sqltypes.h:173
const ColumnDescriptor * column_desc_
Definition: Importer.h:509
std::vector< uint16_t > * string_dict_i16_buffer_
Definition: Importer.h:505
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:492
#define CHECK(condition)
Definition: Logger.h:197
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:259
Definition: sqltypes.h:47
SQLTypeInfo columnType
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:499

Member Function Documentation

◆ add_arrow_values()

size_t import_export::TypedImportBuffer::add_arrow_values ( const ColumnDescriptor cd,
const arrow::Array &  data,
const bool  exact_type_match,
const ArraySliceRange slice_range,
BadRowsTracker bad_rows_tracker 
)

Definition at line 849 of file Importer.cpp.

References arrow_throw_if(), ColumnDescriptor::columnName, ColumnDescriptor::columnType, SQLTypeInfo::get_notnull(), SQLTypeInfo::get_type(), kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, and run_benchmark_import::type.

853  {
854  const auto type = cd->columnType.get_type();
855  if (cd->columnType.get_notnull()) {
856  // We can't have any null values for this column; to have them is an error
857  arrow_throw_if(col.null_count() > 0, "NULL not allowed for column " + cd->columnName);
858  }
859 
860  switch (type) {
861  case kBOOLEAN:
862  if (exact_type_match) {
863  arrow_throw_if(col.type_id() != Type::BOOL, "Expected boolean type");
864  }
866  cd, col, *bool_buffer_, slice_range, bad_rows_tracker);
867  case kTINYINT:
868  if (exact_type_match) {
869  arrow_throw_if(col.type_id() != Type::INT8, "Expected int8 type");
870  }
872  cd, col, *tinyint_buffer_, slice_range, bad_rows_tracker);
873  case kSMALLINT:
874  if (exact_type_match) {
875  arrow_throw_if(col.type_id() != Type::INT16, "Expected int16 type");
876  }
878  cd, col, *smallint_buffer_, slice_range, bad_rows_tracker);
879  case kINT:
880  if (exact_type_match) {
881  arrow_throw_if(col.type_id() != Type::INT32, "Expected int32 type");
882  }
884  cd, col, *int_buffer_, slice_range, bad_rows_tracker);
885  case kBIGINT:
886  case kNUMERIC:
887  case kDECIMAL:
888  if (exact_type_match) {
889  arrow_throw_if(col.type_id() != Type::INT64, "Expected int64 type");
890  }
892  cd, col, *bigint_buffer_, slice_range, bad_rows_tracker);
893  case kFLOAT:
894  if (exact_type_match) {
895  arrow_throw_if(col.type_id() != Type::FLOAT, "Expected float type");
896  }
898  cd, col, *float_buffer_, slice_range, bad_rows_tracker);
899  case kDOUBLE:
900  if (exact_type_match) {
901  arrow_throw_if(col.type_id() != Type::DOUBLE, "Expected double type");
902  }
904  cd, col, *double_buffer_, slice_range, bad_rows_tracker);
905  case kTEXT:
906  case kVARCHAR:
907  case kCHAR:
908  if (exact_type_match) {
909  arrow_throw_if(col.type_id() != Type::BINARY && col.type_id() != Type::STRING,
910  "Expected string type");
911  }
913  cd, col, *string_buffer_, slice_range, bad_rows_tracker);
914  case kTIME:
915  if (exact_type_match) {
916  arrow_throw_if(col.type_id() != Type::TIME32 && col.type_id() != Type::TIME64,
917  "Expected time32 or time64 type");
918  }
920  cd, col, *bigint_buffer_, slice_range, bad_rows_tracker);
921  case kTIMESTAMP:
922  if (exact_type_match) {
923  arrow_throw_if(col.type_id() != Type::TIMESTAMP, "Expected timestamp type");
924  }
926  cd, col, *bigint_buffer_, slice_range, bad_rows_tracker);
927  case kDATE:
928  if (exact_type_match) {
929  arrow_throw_if(col.type_id() != Type::DATE32 && col.type_id() != Type::DATE64,
930  "Expected date32 or date64 type");
931  }
933  cd, col, *bigint_buffer_, slice_range, bad_rows_tracker);
934  case kPOINT:
935  case kLINESTRING:
936  case kPOLYGON:
937  case kMULTIPOLYGON:
938  arrow_throw_if(col.type_id() != Type::BINARY && col.type_id() != Type::STRING,
939  "Expected string type");
941  cd, col, *geo_string_buffer_, slice_range, bad_rows_tracker);
942  case kARRAY:
943  throw std::runtime_error("Arrow array appends not yet supported");
944  default:
945  throw std::runtime_error("Invalid Type");
946  }
947 }
Definition: sqltypes.h:51
std::vector< std::string > * string_buffer_
Definition: Importer.h:498
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:493
std::vector< float > * float_buffer_
Definition: Importer.h:496
std::vector< double > * double_buffer_
Definition: Importer.h:497
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:266
std::vector< int32_t > * int_buffer_
Definition: Importer.h:494
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:495
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:491
Definition: sqltypes.h:54
Definition: sqltypes.h:55
Definition: sqltypes.h:43
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:492
size_t convert_arrow_val_to_import_buffer(const ColumnDescriptor *cd, const arrow::Array &array, std::vector< DATA_TYPE > &buffer, const ArraySliceRange &slice_range, BadRowsTracker *const bad_rows_tracker)
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:259
Definition: sqltypes.h:47
SQLTypeInfo columnType
std::string columnName
void arrow_throw_if(const bool cond, const std::string &message)
Definition: ArrowImporter.h:41
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:499
+ Here is the call graph for this function:

◆ add_value() [1/2]

void import_export::TypedImportBuffer::add_value ( const ColumnDescriptor cd,
const std::string_view  val,
const bool  is_null,
const CopyParams copy_params,
const int64_t  replicate_count = 0 
)

Definition at line 499 of file Importer.cpp.

References Datum::bigintval, Datum::boolval, CHECK, ColumnDescriptor::columnName, ColumnDescriptor::columnType, convert_decimal_value_to_scale(), SQLTypeInfo::get_elem_type(), SQLTypeInfo::get_notnull(), SQLTypeInfo::get_size(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), inline_fixed_encoding_null_val(), Datum::intval, IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, StringDictionary::MAX_STRLEN, NULL_DOUBLE, NULL_FLOAT, import_export::NullArray(), import_export::delimited_parser::parse_string_array(), Datum::smallintval, import_export::StringToArray(), StringToDatum(), Datum::tinyintval, to_string(), and run_benchmark_import::type.

503  {
504  set_replicate_count(replicate_count);
505  const auto type = cd->columnType.get_type();
506  switch (type) {
507  case kBOOLEAN: {
508  if (is_null) {
509  if (cd->columnType.get_notnull()) {
510  throw std::runtime_error("NULL for column " + cd->columnName);
511  }
513  } else {
514  auto ti = cd->columnType;
515  Datum d = StringToDatum(val, ti);
516  addBoolean(static_cast<int8_t>(d.boolval));
517  }
518  break;
519  }
520  case kTINYINT: {
521  if (!is_null && (isdigit(val[0]) || val[0] == '-')) {
522  auto ti = cd->columnType;
523  Datum d = StringToDatum(val, ti);
525  } else {
526  if (cd->columnType.get_notnull()) {
527  throw std::runtime_error("NULL for column " + cd->columnName);
528  }
530  }
531  break;
532  }
533  case kSMALLINT: {
534  if (!is_null && (isdigit(val[0]) || val[0] == '-')) {
535  auto ti = cd->columnType;
536  Datum d = StringToDatum(val, ti);
538  } else {
539  if (cd->columnType.get_notnull()) {
540  throw std::runtime_error("NULL for column " + cd->columnName);
541  }
543  }
544  break;
545  }
546  case kINT: {
547  if (!is_null && (isdigit(val[0]) || val[0] == '-')) {
548  auto ti = cd->columnType;
549  Datum d = StringToDatum(val, ti);
550  addInt(d.intval);
551  } else {
552  if (cd->columnType.get_notnull()) {
553  throw std::runtime_error("NULL for column " + cd->columnName);
554  }
556  }
557  break;
558  }
559  case kBIGINT: {
560  if (!is_null && (isdigit(val[0]) || val[0] == '-')) {
561  auto ti = cd->columnType;
562  Datum d = StringToDatum(val, ti);
563  addBigint(d.bigintval);
564  } else {
565  if (cd->columnType.get_notnull()) {
566  throw std::runtime_error("NULL for column " + cd->columnName);
567  }
569  }
570  break;
571  }
572  case kDECIMAL:
573  case kNUMERIC: {
574  if (!is_null) {
575  SQLTypeInfo ti(kNUMERIC, 0, 0, false);
576  Datum d = StringToDatum(val, ti);
577  const auto converted_decimal_value =
579  addBigint(converted_decimal_value);
580  } else {
581  if (cd->columnType.get_notnull()) {
582  throw std::runtime_error("NULL for column " + cd->columnName);
583  }
585  }
586  break;
587  }
588  case kFLOAT:
589  if (!is_null && (val[0] == '.' || isdigit(val[0]) || val[0] == '-')) {
590  addFloat(static_cast<float>(std::atof(std::string(val).c_str())));
591  } else {
592  if (cd->columnType.get_notnull()) {
593  throw std::runtime_error("NULL for column " + cd->columnName);
594  }
596  }
597  break;
598  case kDOUBLE:
599  if (!is_null && (val[0] == '.' || isdigit(val[0]) || val[0] == '-')) {
600  addDouble(std::atof(std::string(val).c_str()));
601  } else {
602  if (cd->columnType.get_notnull()) {
603  throw std::runtime_error("NULL for column " + cd->columnName);
604  }
606  }
607  break;
608  case kTEXT:
609  case kVARCHAR:
610  case kCHAR: {
611  // @TODO(wei) for now, use empty string for nulls
612  if (is_null) {
613  if (cd->columnType.get_notnull()) {
614  throw std::runtime_error("NULL for column " + cd->columnName);
615  }
616  addString(std::string());
617  } else {
618  if (val.length() > StringDictionary::MAX_STRLEN) {
619  throw std::runtime_error("String too long for column " + cd->columnName +
620  " was " + std::to_string(val.length()) + " max is " +
622  }
623  addString(val);
624  }
625  break;
626  }
627  case kTIME:
628  case kTIMESTAMP:
629  case kDATE:
630  if (!is_null && (isdigit(val[0]) || val[0] == '-')) {
631  SQLTypeInfo ti = cd->columnType;
632  Datum d = StringToDatum(val, ti);
633  addBigint(d.bigintval);
634  } else {
635  if (cd->columnType.get_notnull()) {
636  throw std::runtime_error("NULL for column " + cd->columnName);
637  }
639  }
640  break;
641  case kARRAY: {
642  if (is_null && cd->columnType.get_notnull()) {
643  throw std::runtime_error("NULL for column " + cd->columnName);
644  }
645  SQLTypeInfo ti = cd->columnType;
646  if (IS_STRING(ti.get_subtype())) {
647  std::vector<std::string> string_vec;
648  // Just parse string array, don't push it to buffer yet as we might throw
650  std::string(val), copy_params, string_vec);
651  if (!is_null) {
652  // TODO: add support for NULL string arrays
653  if (ti.get_size() > 0) {
654  auto sti = ti.get_elem_type();
655  size_t expected_size = ti.get_size() / sti.get_size();
656  size_t actual_size = string_vec.size();
657  if (actual_size != expected_size) {
658  throw std::runtime_error("Fixed length array column " + cd->columnName +
659  " expects " + std::to_string(expected_size) +
660  " values, received " +
661  std::to_string(actual_size));
662  }
663  }
664  addStringArray(string_vec);
665  } else {
666  if (ti.get_size() > 0) {
667  // TODO: remove once NULL fixlen arrays are allowed
668  throw std::runtime_error("Fixed length array column " + cd->columnName +
669  " currently cannot accept NULL arrays");
670  }
671  // TODO: add support for NULL string arrays, replace with addStringArray(),
672  // for now add whatever parseStringArray() outputs for NULLs ("NULL")
673  addStringArray(string_vec);
674  }
675  } else {
676  if (!is_null) {
677  ArrayDatum d = StringToArray(std::string(val), ti, copy_params);
678  if (d.is_null) { // val could be "NULL"
679  addArray(NullArray(ti));
680  } else {
681  if (ti.get_size() > 0 && static_cast<size_t>(ti.get_size()) != d.length) {
682  throw std::runtime_error("Fixed length array for column " + cd->columnName +
683  " has incorrect length: " + std::string(val));
684  }
685  addArray(d);
686  }
687  } else {
688  addArray(NullArray(ti));
689  }
690  }
691  break;
692  }
693  case kPOINT:
694  case kLINESTRING:
695  case kPOLYGON:
696  case kMULTIPOLYGON:
697  addGeoString(val);
698  break;
699  default:
700  CHECK(false) << "TypedImportBuffer::add_value() does not support type " << type;
701  }
702 }
int8_t tinyintval
Definition: sqltypes.h:135
#define NULL_DOUBLE
Definition: sqltypes.h:186
void addBigint(const int64_t v)
Definition: Importer.h:231
void addSmallint(const int16_t v)
Definition: Importer.h:227
Definition: sqltypes.h:51
void addString(const std::string_view v)
Definition: Importer.h:237
bool boolval
Definition: sqltypes.h:134
void set_replicate_count(const int64_t replicate_count)
Definition: Importer.h:474
void addDouble(const double v)
Definition: Importer.h:235
ArrayDatum NullArray(const SQLTypeInfo &ti)
Definition: Importer.cpp:368
HOST DEVICE int get_size() const
Definition: sqltypes.h:269
int32_t intval
Definition: sqltypes.h:137
std::string to_string(char const *&&v)
void addFloat(const float v)
Definition: Importer.h:233
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:131
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:266
void addGeoString(const std::string_view v)
Definition: Importer.h:239
void addBoolean(const int8_t v)
Definition: Importer.h:223
void addTinyint(const int8_t v)
Definition: Importer.h:225
int64_t bigintval
Definition: sqltypes.h:138
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:260
#define NULL_FLOAT
Definition: sqltypes.h:185
void addInt(const int32_t v)
Definition: Importer.h:229
int16_t smallintval
Definition: sqltypes.h:136
Datum StringToDatum(std::string_view s, SQLTypeInfo &ti)
Definition: Datum.cpp:124
Definition: sqltypes.h:54
Definition: sqltypes.h:55
std::vector< std::string > & addStringArray()
Definition: Importer.h:243
ArrayDatum StringToArray(const std::string &s, const SQLTypeInfo &ti, const CopyParams &copy_params)
Definition: Importer.cpp:315
bool is_null(const T &v, const SQLTypeInfo &t)
int64_t convert_decimal_value_to_scale(const int64_t decimal_value, const SQLTypeInfo &type_info, const SQLTypeInfo &new_type_info)
Definition: Datum.cpp:315
void addArray(const ArrayDatum &v)
Definition: Importer.h:241
Definition: sqltypes.h:43
#define IS_STRING(T)
Definition: sqltypes.h:173
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:623
#define CHECK(condition)
Definition: Logger.h:197
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:259
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
static constexpr size_t MAX_STRLEN
Definition: sqltypes.h:47
SQLTypeInfo columnType
std::string columnName
void parse_string_array(const std::string &s, const import_export::CopyParams &copy_params, std::vector< std::string > &string_vec)
Parses given string array and inserts into given vector of strings.
+ Here is the call graph for this function:

◆ add_value() [2/2]

void import_export::TypedImportBuffer::add_value ( const ColumnDescriptor cd,
const TDatum &  val,
const bool  is_null,
const int64_t  replicate_count = 0 
)

Definition at line 1271 of file Importer.cpp.

References import_export::addBinaryStringArray(), CHECK, ColumnDescriptor::columnName, ColumnDescriptor::columnType, decimal_to_int_type(), SQLTypeInfo::get_notnull(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), inline_fixed_encoding_null_val(), SQLTypeInfo::is_decimal(), IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, NULL_DOUBLE, NULL_FLOAT, import_export::NullArray(), import_export::TDatumToArrayDatum(), and run_benchmark_import::type.

1274  {
1275  set_replicate_count(replicate_count);
1276  const auto type = cd->columnType.is_decimal() ? decimal_to_int_type(cd->columnType)
1277  : cd->columnType.get_type();
1278  switch (type) {
1279  case kBOOLEAN: {
1280  if (is_null) {
1281  if (cd->columnType.get_notnull()) {
1282  throw std::runtime_error("NULL for column " + cd->columnName);
1283  }
1285  } else {
1286  addBoolean((int8_t)datum.val.int_val);
1287  }
1288  break;
1289  }
1290  case kTINYINT:
1291  if (!is_null) {
1292  addTinyint((int8_t)datum.val.int_val);
1293  } else {
1294  if (cd->columnType.get_notnull()) {
1295  throw std::runtime_error("NULL for column " + cd->columnName);
1296  }
1298  }
1299  break;
1300  case kSMALLINT:
1301  if (!is_null) {
1302  addSmallint((int16_t)datum.val.int_val);
1303  } else {
1304  if (cd->columnType.get_notnull()) {
1305  throw std::runtime_error("NULL for column " + cd->columnName);
1306  }
1308  }
1309  break;
1310  case kINT:
1311  if (!is_null) {
1312  addInt((int32_t)datum.val.int_val);
1313  } else {
1314  if (cd->columnType.get_notnull()) {
1315  throw std::runtime_error("NULL for column " + cd->columnName);
1316  }
1318  }
1319  break;
1320  case kBIGINT:
1321  if (!is_null) {
1322  addBigint(datum.val.int_val);
1323  } else {
1324  if (cd->columnType.get_notnull()) {
1325  throw std::runtime_error("NULL for column " + cd->columnName);
1326  }
1328  }
1329  break;
1330  case kFLOAT:
1331  if (!is_null) {
1332  addFloat((float)datum.val.real_val);
1333  } else {
1334  if (cd->columnType.get_notnull()) {
1335  throw std::runtime_error("NULL for column " + cd->columnName);
1336  }
1338  }
1339  break;
1340  case kDOUBLE:
1341  if (!is_null) {
1342  addDouble(datum.val.real_val);
1343  } else {
1344  if (cd->columnType.get_notnull()) {
1345  throw std::runtime_error("NULL for column " + cd->columnName);
1346  }
1348  }
1349  break;
1350  case kTEXT:
1351  case kVARCHAR:
1352  case kCHAR: {
1353  // @TODO(wei) for now, use empty string for nulls
1354  if (is_null) {
1355  if (cd->columnType.get_notnull()) {
1356  throw std::runtime_error("NULL for column " + cd->columnName);
1357  }
1358  addString(std::string());
1359  } else {
1360  addString(datum.val.str_val);
1361  }
1362  break;
1363  }
1364  case kTIME:
1365  case kTIMESTAMP:
1366  case kDATE: {
1367  if (!is_null) {
1368  addBigint(datum.val.int_val);
1369  } else {
1370  if (cd->columnType.get_notnull()) {
1371  throw std::runtime_error("NULL for column " + cd->columnName);
1372  }
1374  }
1375  break;
1376  }
1377  case kARRAY:
1378  if (is_null && cd->columnType.get_notnull()) {
1379  throw std::runtime_error("NULL for column " + cd->columnName);
1380  }
1381  if (IS_STRING(cd->columnType.get_subtype())) {
1382  std::vector<std::string>& string_vec = addStringArray();
1383  addBinaryStringArray(datum, string_vec);
1384  } else {
1385  if (!is_null) {
1386  addArray(TDatumToArrayDatum(datum, cd->columnType));
1387  } else {
1389  }
1390  }
1391  break;
1392  case kPOINT:
1393  case kLINESTRING:
1394  case kPOLYGON:
1395  case kMULTIPOLYGON:
1396  if (is_null) {
1397  if (cd->columnType.get_notnull()) {
1398  throw std::runtime_error("NULL for column " + cd->columnName);
1399  }
1400  addGeoString(std::string());
1401  } else {
1402  addGeoString(datum.val.str_val);
1403  }
1404  break;
1405  default:
1406  CHECK(false) << "TypedImportBuffer::add_value() does not support type " << type;
1407  }
1408 }
#define NULL_DOUBLE
Definition: sqltypes.h:186
void addBigint(const int64_t v)
Definition: Importer.h:231
void addSmallint(const int16_t v)
Definition: Importer.h:227
Definition: sqltypes.h:51
void addString(const std::string_view v)
Definition: Importer.h:237
void set_replicate_count(const int64_t replicate_count)
Definition: Importer.h:474
void addDouble(const double v)
Definition: Importer.h:235
ArrayDatum NullArray(const SQLTypeInfo &ti)
Definition: Importer.cpp:368
bool is_decimal() const
Definition: sqltypes.h:419
void addFloat(const float v)
Definition: Importer.h:233
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:266
void addGeoString(const std::string_view v)
Definition: Importer.h:239
ArrayDatum TDatumToArrayDatum(const TDatum &datum, const SQLTypeInfo &ti)
Definition: Importer.cpp:452
void addBoolean(const int8_t v)
Definition: Importer.h:223
void addTinyint(const int8_t v)
Definition: Importer.h:225
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:260
#define NULL_FLOAT
Definition: sqltypes.h:185
void addInt(const int32_t v)
Definition: Importer.h:229
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:299
Definition: sqltypes.h:54
Definition: sqltypes.h:55
std::vector< std::string > & addStringArray()
Definition: Importer.h:243
bool is_null(const T &v, const SQLTypeInfo &t)
void addArray(const ArrayDatum &v)
Definition: Importer.h:241
Definition: sqltypes.h:43
#define IS_STRING(T)
Definition: sqltypes.h:173
#define CHECK(condition)
Definition: Logger.h:197
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
Definition: sqltypes.h:47
SQLTypeInfo columnType
std::string columnName
void addBinaryStringArray(const TDatum &datum, std::vector< std::string > &string_vec)
Definition: Importer.cpp:400
+ Here is the call graph for this function:

◆ add_values()

size_t import_export::TypedImportBuffer::add_values ( const ColumnDescriptor cd,
const TColumn &  data 
)

Definition at line 950 of file Importer.cpp.

References checked_malloc(), ColumnDescriptor::columnName, ColumnDescriptor::columnType, SQLTypeInfo::get_notnull(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), inline_fixed_encoding_null_val(), IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, NULL_DOUBLE, NULL_FLOAT, and import_export::NullArray().

950  {
951  size_t dataSize = 0;
952  if (cd->columnType.get_notnull()) {
953  // We can't have any null values for this column; to have them is an error
954  if (std::any_of(col.nulls.begin(), col.nulls.end(), [](int i) { return i != 0; })) {
955  throw std::runtime_error("NULL for column " + cd->columnName);
956  }
957  }
958 
959  switch (cd->columnType.get_type()) {
960  case kBOOLEAN: {
961  dataSize = col.data.int_col.size();
962  bool_buffer_->reserve(dataSize);
963  for (size_t i = 0; i < dataSize; i++) {
964  if (col.nulls[i]) {
966  } else {
967  bool_buffer_->push_back((int8_t)col.data.int_col[i]);
968  }
969  }
970  break;
971  }
972  case kTINYINT: {
973  dataSize = col.data.int_col.size();
974  tinyint_buffer_->reserve(dataSize);
975  for (size_t i = 0; i < dataSize; i++) {
976  if (col.nulls[i]) {
978  } else {
979  tinyint_buffer_->push_back((int8_t)col.data.int_col[i]);
980  }
981  }
982  break;
983  }
984  case kSMALLINT: {
985  dataSize = col.data.int_col.size();
986  smallint_buffer_->reserve(dataSize);
987  for (size_t i = 0; i < dataSize; i++) {
988  if (col.nulls[i]) {
990  } else {
991  smallint_buffer_->push_back((int16_t)col.data.int_col[i]);
992  }
993  }
994  break;
995  }
996  case kINT: {
997  dataSize = col.data.int_col.size();
998  int_buffer_->reserve(dataSize);
999  for (size_t i = 0; i < dataSize; i++) {
1000  if (col.nulls[i]) {
1002  } else {
1003  int_buffer_->push_back((int32_t)col.data.int_col[i]);
1004  }
1005  }
1006  break;
1007  }
1008  case kBIGINT:
1009  case kNUMERIC:
1010  case kDECIMAL: {
1011  dataSize = col.data.int_col.size();
1012  bigint_buffer_->reserve(dataSize);
1013  for (size_t i = 0; i < dataSize; i++) {
1014  if (col.nulls[i]) {
1016  } else {
1017  bigint_buffer_->push_back((int64_t)col.data.int_col[i]);
1018  }
1019  }
1020  break;
1021  }
1022  case kFLOAT: {
1023  dataSize = col.data.real_col.size();
1024  float_buffer_->reserve(dataSize);
1025  for (size_t i = 0; i < dataSize; i++) {
1026  if (col.nulls[i]) {
1027  float_buffer_->push_back(NULL_FLOAT);
1028  } else {
1029  float_buffer_->push_back((float)col.data.real_col[i]);
1030  }
1031  }
1032  break;
1033  }
1034  case kDOUBLE: {
1035  dataSize = col.data.real_col.size();
1036  double_buffer_->reserve(dataSize);
1037  for (size_t i = 0; i < dataSize; i++) {
1038  if (col.nulls[i]) {
1039  double_buffer_->push_back(NULL_DOUBLE);
1040  } else {
1041  double_buffer_->push_back((double)col.data.real_col[i]);
1042  }
1043  }
1044  break;
1045  }
1046  case kTEXT:
1047  case kVARCHAR:
1048  case kCHAR: {
1049  // TODO: for now, use empty string for nulls
1050  dataSize = col.data.str_col.size();
1051  string_buffer_->reserve(dataSize);
1052  for (size_t i = 0; i < dataSize; i++) {
1053  if (col.nulls[i]) {
1054  string_buffer_->push_back(std::string());
1055  } else {
1056  string_buffer_->push_back(col.data.str_col[i]);
1057  }
1058  }
1059  break;
1060  }
1061  case kTIME:
1062  case kTIMESTAMP:
1063  case kDATE: {
1064  dataSize = col.data.int_col.size();
1065  bigint_buffer_->reserve(dataSize);
1066  for (size_t i = 0; i < dataSize; i++) {
1067  if (col.nulls[i]) {
1069  } else {
1070  bigint_buffer_->push_back(static_cast<int64_t>(col.data.int_col[i]));
1071  }
1072  }
1073  break;
1074  }
1075  case kPOINT:
1076  case kLINESTRING:
1077  case kPOLYGON:
1078  case kMULTIPOLYGON: {
1079  dataSize = col.data.str_col.size();
1080  geo_string_buffer_->reserve(dataSize);
1081  for (size_t i = 0; i < dataSize; i++) {
1082  if (col.nulls[i]) {
1083  // TODO: add support for NULL geo
1084  geo_string_buffer_->push_back(std::string());
1085  } else {
1086  geo_string_buffer_->push_back(col.data.str_col[i]);
1087  }
1088  }
1089  break;
1090  }
1091  case kARRAY: {
1092  dataSize = col.data.arr_col.size();
1093  if (IS_STRING(cd->columnType.get_subtype())) {
1094  for (size_t i = 0; i < dataSize; i++) {
1095  std::vector<std::string>& string_vec = addStringArray();
1096  if (!col.nulls[i]) {
1097  size_t stringArrSize = col.data.arr_col[i].data.str_col.size();
1098  for (size_t str_idx = 0; str_idx != stringArrSize; ++str_idx) {
1099  string_vec.push_back(col.data.arr_col[i].data.str_col[str_idx]);
1100  }
1101  }
1102  }
1103  } else {
1104  auto elem_ti = cd->columnType.get_subtype();
1105  switch (elem_ti) {
1106  case kBOOLEAN: {
1107  for (size_t i = 0; i < dataSize; i++) {
1108  if (col.nulls[i]) {
1110  } else {
1111  size_t len = col.data.arr_col[i].data.int_col.size();
1112  size_t byteSize = len * sizeof(int8_t);
1113  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1114  int8_t* p = buf;
1115  for (size_t j = 0; j < len; ++j) {
1116  *(bool*)p = static_cast<bool>(col.data.arr_col[i].data.int_col[j]);
1117  p += sizeof(bool);
1118  }
1119  addArray(ArrayDatum(byteSize, buf, false));
1120  }
1121  }
1122  break;
1123  }
1124  case kTINYINT: {
1125  for (size_t i = 0; i < dataSize; i++) {
1126  if (col.nulls[i]) {
1128  } else {
1129  size_t len = col.data.arr_col[i].data.int_col.size();
1130  size_t byteSize = len * sizeof(int8_t);
1131  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1132  int8_t* p = buf;
1133  for (size_t j = 0; j < len; ++j) {
1134  *(int8_t*)p = static_cast<int8_t>(col.data.arr_col[i].data.int_col[j]);
1135  p += sizeof(int8_t);
1136  }
1137  addArray(ArrayDatum(byteSize, buf, false));
1138  }
1139  }
1140  break;
1141  }
1142  case kSMALLINT: {
1143  for (size_t i = 0; i < dataSize; i++) {
1144  if (col.nulls[i]) {
1146  } else {
1147  size_t len = col.data.arr_col[i].data.int_col.size();
1148  size_t byteSize = len * sizeof(int16_t);
1149  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1150  int8_t* p = buf;
1151  for (size_t j = 0; j < len; ++j) {
1152  *(int16_t*)p =
1153  static_cast<int16_t>(col.data.arr_col[i].data.int_col[j]);
1154  p += sizeof(int16_t);
1155  }
1156  addArray(ArrayDatum(byteSize, buf, false));
1157  }
1158  }
1159  break;
1160  }
1161  case kINT: {
1162  for (size_t i = 0; i < dataSize; i++) {
1163  if (col.nulls[i]) {
1165  } else {
1166  size_t len = col.data.arr_col[i].data.int_col.size();
1167  size_t byteSize = len * sizeof(int32_t);
1168  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1169  int8_t* p = buf;
1170  for (size_t j = 0; j < len; ++j) {
1171  *(int32_t*)p =
1172  static_cast<int32_t>(col.data.arr_col[i].data.int_col[j]);
1173  p += sizeof(int32_t);
1174  }
1175  addArray(ArrayDatum(byteSize, buf, false));
1176  }
1177  }
1178  break;
1179  }
1180  case kBIGINT:
1181  case kNUMERIC:
1182  case kDECIMAL: {
1183  for (size_t i = 0; i < dataSize; i++) {
1184  if (col.nulls[i]) {
1186  } else {
1187  size_t len = col.data.arr_col[i].data.int_col.size();
1188  size_t byteSize = len * sizeof(int64_t);
1189  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1190  int8_t* p = buf;
1191  for (size_t j = 0; j < len; ++j) {
1192  *(int64_t*)p =
1193  static_cast<int64_t>(col.data.arr_col[i].data.int_col[j]);
1194  p += sizeof(int64_t);
1195  }
1196  addArray(ArrayDatum(byteSize, buf, false));
1197  }
1198  }
1199  break;
1200  }
1201  case kFLOAT: {
1202  for (size_t i = 0; i < dataSize; i++) {
1203  if (col.nulls[i]) {
1205  } else {
1206  size_t len = col.data.arr_col[i].data.real_col.size();
1207  size_t byteSize = len * sizeof(float);
1208  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1209  int8_t* p = buf;
1210  for (size_t j = 0; j < len; ++j) {
1211  *(float*)p = static_cast<float>(col.data.arr_col[i].data.real_col[j]);
1212  p += sizeof(float);
1213  }
1214  addArray(ArrayDatum(byteSize, buf, false));
1215  }
1216  }
1217  break;
1218  }
1219  case kDOUBLE: {
1220  for (size_t i = 0; i < dataSize; i++) {
1221  if (col.nulls[i]) {
1223  } else {
1224  size_t len = col.data.arr_col[i].data.real_col.size();
1225  size_t byteSize = len * sizeof(double);
1226  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1227  int8_t* p = buf;
1228  for (size_t j = 0; j < len; ++j) {
1229  *(double*)p = static_cast<double>(col.data.arr_col[i].data.real_col[j]);
1230  p += sizeof(double);
1231  }
1232  addArray(ArrayDatum(byteSize, buf, false));
1233  }
1234  }
1235  break;
1236  }
1237  case kTIME:
1238  case kTIMESTAMP:
1239  case kDATE: {
1240  for (size_t i = 0; i < dataSize; i++) {
1241  if (col.nulls[i]) {
1243  } else {
1244  size_t len = col.data.arr_col[i].data.int_col.size();
1245  size_t byteWidth = sizeof(int64_t);
1246  size_t byteSize = len * byteWidth;
1247  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1248  int8_t* p = buf;
1249  for (size_t j = 0; j < len; ++j) {
1250  *reinterpret_cast<int64_t*>(p) =
1251  static_cast<int64_t>(col.data.arr_col[i].data.int_col[j]);
1252  p += sizeof(int64_t);
1253  }
1254  addArray(ArrayDatum(byteSize, buf, false));
1255  }
1256  }
1257  break;
1258  }
1259  default:
1260  throw std::runtime_error("Invalid Array Type");
1261  }
1262  }
1263  break;
1264  }
1265  default:
1266  throw std::runtime_error("Invalid Type");
1267  }
1268  return dataSize;
1269 }
#define NULL_DOUBLE
Definition: sqltypes.h:186
Definition: sqltypes.h:51
std::vector< std::string > * string_buffer_
Definition: Importer.h:498
ArrayDatum NullArray(const SQLTypeInfo &ti)
Definition: Importer.cpp:368
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:493
std::vector< float > * float_buffer_
Definition: Importer.h:496
std::vector< double > * double_buffer_
Definition: Importer.h:497
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:131
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:266
std::vector< int32_t > * int_buffer_
Definition: Importer.h:494
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:44
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:495
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:260
#define NULL_FLOAT
Definition: sqltypes.h:185
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:491
Definition: sqltypes.h:54
Definition: sqltypes.h:55
std::vector< std::string > & addStringArray()
Definition: Importer.h:243
void addArray(const ArrayDatum &v)
Definition: Importer.h:241
Definition: sqltypes.h:43
#define IS_STRING(T)
Definition: sqltypes.h:173
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:492
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:259
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
Definition: sqltypes.h:47
SQLTypeInfo columnType
std::string columnName
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:499
+ Here is the call graph for this function:

◆ addArray()

void import_export::TypedImportBuffer::addArray ( const ArrayDatum v)
inline

Definition at line 241 of file Importer.h.

241 { array_buffer_->push_back(v); }
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:500

◆ addBigint()

void import_export::TypedImportBuffer::addBigint ( const int64_t  v)
inline

Definition at line 231 of file Importer.h.

231 { bigint_buffer_->push_back(v); }
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:495

◆ addBoolean()

void import_export::TypedImportBuffer::addBoolean ( const int8_t  v)
inline

Definition at line 223 of file Importer.h.

223 { bool_buffer_->push_back(v); }
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:491

◆ addDictEncodedString()

void import_export::TypedImportBuffer::addDictEncodedString ( const std::vector< std::string > &  string_vec)

Definition at line 471 of file Importer.cpp.

References CHECK, and StringDictionary::MAX_STRLEN.

471  {
473  std::vector<std::string_view> string_view_vec;
474  string_view_vec.reserve(string_vec.size());
475  for (const auto& str : string_vec) {
476  if (str.size() > StringDictionary::MAX_STRLEN) {
477  throw std::runtime_error("String too long for dictionary encoding.");
478  }
479  string_view_vec.push_back(str);
480  }
481  switch (column_desc_->columnType.get_size()) {
482  case 1:
483  string_dict_i8_buffer_->resize(string_view_vec.size());
484  string_dict_->getOrAddBulk(string_view_vec, string_dict_i8_buffer_->data());
485  break;
486  case 2:
487  string_dict_i16_buffer_->resize(string_view_vec.size());
488  string_dict_->getOrAddBulk(string_view_vec, string_dict_i16_buffer_->data());
489  break;
490  case 4:
491  string_dict_i32_buffer_->resize(string_view_vec.size());
492  string_dict_->getOrAddBulk(string_view_vec, string_dict_i32_buffer_->data());
493  break;
494  default:
495  CHECK(false);
496  }
497 }
StringDictionary * string_dict_
Definition: Importer.h:510
HOST DEVICE int get_size() const
Definition: sqltypes.h:269
std::vector< uint8_t > * string_dict_i8_buffer_
Definition: Importer.h:504
void getOrAddBulk(const std::vector< String > &string_vec, T *encoded_vec)
std::vector< int32_t > * string_dict_i32_buffer_
Definition: Importer.h:506
const ColumnDescriptor * column_desc_
Definition: Importer.h:509
std::vector< uint16_t > * string_dict_i16_buffer_
Definition: Importer.h:505
#define CHECK(condition)
Definition: Logger.h:197
static constexpr size_t MAX_STRLEN
SQLTypeInfo columnType

◆ addDictEncodedStringArray()

void import_export::TypedImportBuffer::addDictEncodedStringArray ( const std::vector< std::vector< std::string >> &  string_array_vec)
inline

Definition at line 254 of file Importer.h.

References CHECK, checked_malloc(), and StringDictionary::MAX_STRLEN.

255  {
257 
258  // first check data is ok
259  for (auto& p : string_array_vec) {
260  for (const auto& str : p) {
261  if (str.size() > StringDictionary::MAX_STRLEN) {
262  throw std::runtime_error("String too long for dictionary encoding.");
263  }
264  }
265  }
266 
267  std::vector<std::vector<int32_t>> ids_array(0);
268  string_dict_->getOrAddBulkArray(string_array_vec, ids_array);
269 
270  for (auto& p : ids_array) {
271  size_t len = p.size() * sizeof(int32_t);
272  auto a = static_cast<int32_t*>(checked_malloc(len));
273  memcpy(a, &p[0], len);
274  // TODO: distinguish between empty and NULL
275  string_array_dict_buffer_->push_back(
276  ArrayDatum(len, reinterpret_cast<int8_t*>(a), len == 0));
277  }
278  }
StringDictionary * string_dict_
Definition: Importer.h:510
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:131
std::vector< ArrayDatum > * string_array_dict_buffer_
Definition: Importer.h:507
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:44
void getOrAddBulkArray(const std::vector< std::vector< String >> &string_array_vec, std::vector< std::vector< int32_t >> &ids_array_vec)
#define CHECK(condition)
Definition: Logger.h:197
static constexpr size_t MAX_STRLEN
+ Here is the call graph for this function:

◆ addDouble()

void import_export::TypedImportBuffer::addDouble ( const double  v)
inline

Definition at line 235 of file Importer.h.

235 { double_buffer_->push_back(v); }
std::vector< double > * double_buffer_
Definition: Importer.h:497

◆ addFloat()

void import_export::TypedImportBuffer::addFloat ( const float  v)
inline

Definition at line 233 of file Importer.h.

233 { float_buffer_->push_back(v); }
std::vector< float > * float_buffer_
Definition: Importer.h:496

◆ addGeoString()

void import_export::TypedImportBuffer::addGeoString ( const std::string_view  v)
inline

Definition at line 239 of file Importer.h.

239 { geo_string_buffer_->emplace_back(v); }
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:499

◆ addInt()

void import_export::TypedImportBuffer::addInt ( const int32_t  v)
inline

Definition at line 229 of file Importer.h.

229 { int_buffer_->push_back(v); }
std::vector< int32_t > * int_buffer_
Definition: Importer.h:494

◆ addSmallint()

void import_export::TypedImportBuffer::addSmallint ( const int16_t  v)
inline

Definition at line 227 of file Importer.h.

227 { smallint_buffer_->push_back(v); }
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:493

◆ addString()

void import_export::TypedImportBuffer::addString ( const std::string_view  v)
inline

Definition at line 237 of file Importer.h.

237 { string_buffer_->emplace_back(v); }
std::vector< std::string > * string_buffer_
Definition: Importer.h:498

◆ addStringArray() [1/2]

std::vector<std::string>& import_export::TypedImportBuffer::addStringArray ( )
inline

Definition at line 243 of file Importer.h.

243  {
244  string_array_buffer_->emplace_back();
245  return string_array_buffer_->back();
246  }
std::vector< std::vector< std::string > > * string_array_buffer_
Definition: Importer.h:501

◆ addStringArray() [2/2]

void import_export::TypedImportBuffer::addStringArray ( const std::vector< std::string > &  arr)
inline

Definition at line 248 of file Importer.h.

248  {
249  string_array_buffer_->push_back(arr);
250  }
std::vector< std::vector< std::string > > * string_array_buffer_
Definition: Importer.h:501

◆ addTinyint()

void import_export::TypedImportBuffer::addTinyint ( const int8_t  v)
inline

Definition at line 225 of file Importer.h.

225 { tinyint_buffer_->push_back(v); }
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:492

◆ clear()

void import_export::TypedImportBuffer::clear ( )
inline

Definition at line 374 of file Importer.h.

References CHECK, anonymous_namespace{TypedDataAccessors.h}::is_null(), IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kENCODING_DICT, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, and kVARCHAR.

374  {
375  switch (column_desc_->columnType.get_type()) {
376  case kBOOLEAN: {
377  bool_buffer_->clear();
378  break;
379  }
380  case kTINYINT: {
381  tinyint_buffer_->clear();
382  break;
383  }
384  case kSMALLINT: {
385  smallint_buffer_->clear();
386  break;
387  }
388  case kINT: {
389  int_buffer_->clear();
390  break;
391  }
392  case kBIGINT:
393  case kNUMERIC:
394  case kDECIMAL: {
395  bigint_buffer_->clear();
396  break;
397  }
398  case kFLOAT: {
399  float_buffer_->clear();
400  break;
401  }
402  case kDOUBLE: {
403  double_buffer_->clear();
404  break;
405  }
406  case kTEXT:
407  case kVARCHAR:
408  case kCHAR: {
409  string_buffer_->clear();
411  switch (column_desc_->columnType.get_size()) {
412  case 1:
413  string_dict_i8_buffer_->clear();
414  break;
415  case 2:
416  string_dict_i16_buffer_->clear();
417  break;
418  case 4:
419  string_dict_i32_buffer_->clear();
420  break;
421  default:
422  CHECK(false);
423  }
424  }
425  break;
426  }
427  case kDATE:
428  case kTIME:
429  case kTIMESTAMP:
430  bigint_buffer_->clear();
431  break;
432  case kARRAY: {
434  string_array_buffer_->clear();
435  string_array_dict_buffer_->clear();
436  } else {
437  array_buffer_->clear();
438  }
439  break;
440  }
441  case kPOINT:
442  case kLINESTRING:
443  case kPOLYGON:
444  case kMULTIPOLYGON:
445  geo_string_buffer_->clear();
446  break;
447  default:
448  CHECK(false);
449  }
450  }
Definition: sqltypes.h:51
std::vector< std::string > * string_buffer_
Definition: Importer.h:498
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:500
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:493
HOST DEVICE int get_size() const
Definition: sqltypes.h:269
std::vector< float > * float_buffer_
Definition: Importer.h:496
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:267
std::vector< double > * double_buffer_
Definition: Importer.h:497
std::vector< int32_t > * int_buffer_
Definition: Importer.h:494
std::vector< ArrayDatum > * string_array_dict_buffer_
Definition: Importer.h:507
std::vector< uint8_t > * string_dict_i8_buffer_
Definition: Importer.h:504
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:495
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:260
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:491
std::vector< std::vector< std::string > > * string_array_buffer_
Definition: Importer.h:501
Definition: sqltypes.h:54
Definition: sqltypes.h:55
std::vector< int32_t > * string_dict_i32_buffer_
Definition: Importer.h:506
Definition: sqltypes.h:43
#define IS_STRING(T)
Definition: sqltypes.h:173
const ColumnDescriptor * column_desc_
Definition: Importer.h:509
std::vector< uint16_t > * string_dict_i16_buffer_
Definition: Importer.h:505
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:492
#define CHECK(condition)
Definition: Logger.h:197
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:259
Definition: sqltypes.h:47
SQLTypeInfo columnType
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:499
+ Here is the call graph for this function:

◆ convert_arrow_val_to_import_buffer() [1/2]

template<typename DATA_TYPE >
size_t import_export::TypedImportBuffer::convert_arrow_val_to_import_buffer ( const ColumnDescriptor cd,
const arrow::Array &  array,
std::vector< DATA_TYPE > &  buffer,
const ArraySliceRange slice_range,
BadRowsTracker *const  bad_rows_tracker 
)

◆ convert_arrow_val_to_import_buffer() [2/2]

template<typename DATA_TYPE >
size_t import_export::TypedImportBuffer::convert_arrow_val_to_import_buffer ( const ColumnDescriptor cd,
const Array array,
std::vector< DATA_TYPE > &  buffer,
const ArraySliceRange slice_range,
import_export::BadRowsTracker *const  bad_rows_tracker 
)

Definition at line 764 of file Importer.cpp.

References import_export::Importer::buffer, anonymous_namespace{ArrowImporter.h}::error_context(), SQLTypeInfo::get_type(), import_export::Importer::getCatalog(), Geo_namespace::GeoTypesFactory::getGeoColumns(), Geo_namespace::GeoTypesFactory::getNullGeoColumns(), import_export::BadRowsTracker::importer, import_export::BadRowsTracker::mutex, import_export::BadRowsTracker::rows, import_export::Importer::set_geo_physical_import_buffer(), and anonymous_namespace{ArrowImporter.h}::value_getter().

769  {
770  auto data =
771  std::make_unique<DataBuffer<DATA_TYPE>>(cd, array, buffer, bad_rows_tracker);
772  auto f_value_getter = value_getter(array, cd, bad_rows_tracker);
773  std::function<void(const int64_t)> f_add_geo_phy_cols = [&](const int64_t row) {};
774  if (bad_rows_tracker && cd->columnType.is_geometry()) {
775  f_add_geo_phy_cols = [&](const int64_t row) {
776  // Populate physical columns (ref. DBHandler::load_table)
777  std::vector<double> coords, bounds;
778  std::vector<int> ring_sizes, poly_rings;
779  int render_group = 0;
780  SQLTypeInfo ti;
781  // replace any unexpected exception from getGeoColumns or other
782  // on this path with a GeoImportException so that we wont over
783  // push a null to the logical column...
784  try {
785  SQLTypeInfo import_ti{ti};
786  if (array.IsNull(row)) {
788  import_ti, coords, bounds, ring_sizes, poly_rings, false);
789  } else {
790  arrow_throw_if<GeoImportException>(
792  ti,
793  coords,
794  bounds,
795  ring_sizes,
796  poly_rings,
797  false),
798  error_context(cd, bad_rows_tracker) + "Invalid geometry");
799  arrow_throw_if<GeoImportException>(
800  cd->columnType.get_type() != ti.get_type(),
801  error_context(cd, bad_rows_tracker) + "Geometry type mismatch");
802  }
803  auto col_idx_workpad = col_idx; // what a pitfall!!
805  bad_rows_tracker->importer->getCatalog(),
806  cd,
808  col_idx_workpad,
809  coords,
810  bounds,
811  ring_sizes,
812  poly_rings,
813  render_group);
814  } catch (GeoImportException&) {
815  throw;
816  } catch (std::runtime_error& e) {
817  throw GeoImportException(e.what());
818  } catch (const std::exception& e) {
819  throw GeoImportException(e.what());
820  } catch (...) {
821  throw GeoImportException("unknown exception");
822  }
823  };
824  }
825  auto f_mark_a_bad_row = [&](const auto row) {
826  std::unique_lock<std::mutex> lck(bad_rows_tracker->mutex);
827  bad_rows_tracker->rows.insert(row - slice_range.first);
828  };
829  buffer.reserve(slice_range.second - slice_range.first);
830  for (size_t row = slice_range.first; row < slice_range.second; ++row) {
831  try {
832  *data << (array.IsNull(row) ? nullptr : f_value_getter(array, row));
833  f_add_geo_phy_cols(row);
834  } catch (GeoImportException&) {
835  f_mark_a_bad_row(row);
836  } catch (ArrowImporterException&) {
837  // trace bad rows of each column; otherwise rethrow.
838  if (bad_rows_tracker) {
839  *data << nullptr;
840  f_mark_a_bad_row(row);
841  } else {
842  throw;
843  }
844  }
845  }
846  return buffer.size();
847 }
static bool getGeoColumns(const std::string &wkt_or_wkb_hex, SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: geo_types.cpp:700
auto value_getter(const arrow::Array &array, const ColumnDescriptor *cd, import_export::BadRowsTracker *const bad_rows_tracker)
static void set_geo_physical_import_buffer(const Catalog_Namespace::Catalog &catalog, const ColumnDescriptor *cd, std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers, size_t &col_idx, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, int render_group, const int64_t replicate_count=0)
Definition: Importer.cpp:1422
Catalog_Namespace::Catalog & getCatalog()
Definition: Importer.h:785
std::set< int64_t > rows
Definition: Importer.h:76
std::vector< std::unique_ptr< TypedImportBuffer > > * import_buffers
Definition: Importer.h:486
static void getNullGeoColumns(SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: geo_types.cpp:907
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:259
std::string error_context(const ColumnDescriptor *cd, import_export::BadRowsTracker *const bad_rows_tracker)
Definition: ArrowImporter.h:76
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:499
+ Here is the call graph for this function:

◆ del_values() [1/2]

template<typename DATA_TYPE >
auto import_export::TypedImportBuffer::del_values ( std::vector< DATA_TYPE > &  buffer,
BadRowsTracker *const  bad_rows_tracker 
)

Referenced by import_export::DataStreamSink::archivePlumber().

+ Here is the caller graph for this function:

◆ del_values() [2/2]

auto import_export::TypedImportBuffer::del_values ( const SQLTypes  type,
BadRowsTracker *const  bad_rows_tracker 
)

◆ get_replicate_count()

int64_t import_export::TypedImportBuffer::get_replicate_count ( ) const
inline

Definition at line 473 of file Importer.h.

473 { return replicate_count_; }

◆ getArrayBuffer()

std::vector<ArrayDatum>* import_export::TypedImportBuffer::getArrayBuffer ( ) const
inline

Definition at line 344 of file Importer.h.

344 { return array_buffer_; }
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:500

◆ getAsBytes()

int8_t* import_export::TypedImportBuffer::getAsBytes ( ) const
inline

Definition at line 286 of file Importer.h.

References kBIGINT, kBOOLEAN, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kNUMERIC, kSMALLINT, kTIME, kTIMESTAMP, and kTINYINT.

Referenced by import_export::anonymous_namespace{Importer.cpp}::double_value_at(), import_export::anonymous_namespace{Importer.cpp}::float_value_at(), and import_export::anonymous_namespace{Importer.cpp}::int_value_at().

286  {
287  switch (column_desc_->columnType.get_type()) {
288  case kBOOLEAN:
289  return reinterpret_cast<int8_t*>(&((*bool_buffer_)[0]));
290  case kTINYINT:
291  return reinterpret_cast<int8_t*>(&((*tinyint_buffer_)[0]));
292  case kSMALLINT:
293  return reinterpret_cast<int8_t*>(&((*smallint_buffer_)[0]));
294  case kINT:
295  return reinterpret_cast<int8_t*>(&((*int_buffer_)[0]));
296  case kBIGINT:
297  case kNUMERIC:
298  case kDECIMAL:
299  return reinterpret_cast<int8_t*>(&((*bigint_buffer_)[0]));
300  case kFLOAT:
301  return reinterpret_cast<int8_t*>(&((*float_buffer_)[0]));
302  case kDOUBLE:
303  return reinterpret_cast<int8_t*>(&((*double_buffer_)[0]));
304  case kDATE:
305  case kTIME:
306  case kTIMESTAMP:
307  return reinterpret_cast<int8_t*>(&((*bigint_buffer_)[0]));
308  default:
309  abort();
310  }
311  }
Definition: sqltypes.h:51
Definition: sqltypes.h:55
const ColumnDescriptor * column_desc_
Definition: Importer.h:509
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:259
Definition: sqltypes.h:47
SQLTypeInfo columnType
+ Here is the caller graph for this function:

◆ getColumnDesc()

const ColumnDescriptor* import_export::TypedImportBuffer::getColumnDesc ( ) const
inline

Definition at line 282 of file Importer.h.

282 { return column_desc_; }
const ColumnDescriptor * column_desc_
Definition: Importer.h:509

◆ getElementSize()

size_t import_export::TypedImportBuffer::getElementSize ( ) const
inline

Definition at line 313 of file Importer.h.

References kBIGINT, kBOOLEAN, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kNUMERIC, kSMALLINT, kTIME, kTIMESTAMP, and kTINYINT.

313  {
314  switch (column_desc_->columnType.get_type()) {
315  case kBOOLEAN:
316  return sizeof((*bool_buffer_)[0]);
317  case kTINYINT:
318  return sizeof((*tinyint_buffer_)[0]);
319  case kSMALLINT:
320  return sizeof((*smallint_buffer_)[0]);
321  case kINT:
322  return sizeof((*int_buffer_)[0]);
323  case kBIGINT:
324  case kNUMERIC:
325  case kDECIMAL:
326  return sizeof((*bigint_buffer_)[0]);
327  case kFLOAT:
328  return sizeof((*float_buffer_)[0]);
329  case kDOUBLE:
330  return sizeof((*double_buffer_)[0]);
331  case kDATE:
332  case kTIME:
333  case kTIMESTAMP:
334  return sizeof((*bigint_buffer_)[0]);
335  default:
336  abort();
337  }
338  }
Definition: sqltypes.h:51
Definition: sqltypes.h:55
const ColumnDescriptor * column_desc_
Definition: Importer.h:509
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:259
Definition: sqltypes.h:47
SQLTypeInfo columnType

◆ getGeoStringBuffer()

std::vector<std::string>* import_export::TypedImportBuffer::getGeoStringBuffer ( ) const
inline

Definition at line 342 of file Importer.h.

342 { return geo_string_buffer_; }
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:499

◆ getStringArrayBuffer()

std::vector<std::vector<std::string> >* import_export::TypedImportBuffer::getStringArrayBuffer ( ) const
inline

Definition at line 346 of file Importer.h.

346  {
347  return string_array_buffer_;
348  }
std::vector< std::vector< std::string > > * string_array_buffer_
Definition: Importer.h:501

◆ getStringArrayDictBuffer()

std::vector<ArrayDatum>* import_export::TypedImportBuffer::getStringArrayDictBuffer ( ) const
inline

Definition at line 350 of file Importer.h.

350  {
352  }
std::vector< ArrayDatum > * string_array_dict_buffer_
Definition: Importer.h:507

◆ getStringBuffer()

std::vector<std::string>* import_export::TypedImportBuffer::getStringBuffer ( ) const
inline

Definition at line 340 of file Importer.h.

340 { return string_buffer_; }
std::vector< std::string > * string_buffer_
Definition: Importer.h:498

◆ getStringDictBuffer()

int8_t* import_export::TypedImportBuffer::getStringDictBuffer ( ) const
inline

Definition at line 354 of file Importer.h.

Referenced by import_export::anonymous_namespace{Importer.cpp}::int_value_at().

354  {
355  switch (column_desc_->columnType.get_size()) {
356  case 1:
357  return reinterpret_cast<int8_t*>(&((*string_dict_i8_buffer_)[0]));
358  case 2:
359  return reinterpret_cast<int8_t*>(&((*string_dict_i16_buffer_)[0]));
360  case 4:
361  return reinterpret_cast<int8_t*>(&((*string_dict_i32_buffer_)[0]));
362  default:
363  abort();
364  }
365  }
HOST DEVICE int get_size() const
Definition: sqltypes.h:269
const ColumnDescriptor * column_desc_
Definition: Importer.h:509
SQLTypeInfo columnType
+ Here is the caller graph for this function:

◆ getStringDictionary()

StringDictionary* import_export::TypedImportBuffer::getStringDictionary ( ) const
inline

Definition at line 284 of file Importer.h.

284 { return string_dict_; }
StringDictionary * string_dict_
Definition: Importer.h:510

◆ getTypeInfo()

const SQLTypeInfo& import_export::TypedImportBuffer::getTypeInfo ( ) const
inline

Definition at line 280 of file Importer.h.

Referenced by import_export::anonymous_namespace{Importer.cpp}::double_value_at(), import_export::anonymous_namespace{Importer.cpp}::float_value_at(), and import_export::anonymous_namespace{Importer.cpp}::int_value_at().

280 { return column_desc_->columnType; }
const ColumnDescriptor * column_desc_
Definition: Importer.h:509
SQLTypeInfo columnType
+ Here is the caller graph for this function:

◆ pop_value()

void import_export::TypedImportBuffer::pop_value ( )

Definition at line 704 of file Importer.cpp.

References CHECK, decimal_to_int_type(), IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, and run_benchmark_import::type.

704  {
705  const auto type = column_desc_->columnType.is_decimal()
707  : column_desc_->columnType.get_type();
708  switch (type) {
709  case kBOOLEAN:
710  bool_buffer_->pop_back();
711  break;
712  case kTINYINT:
713  tinyint_buffer_->pop_back();
714  break;
715  case kSMALLINT:
716  smallint_buffer_->pop_back();
717  break;
718  case kINT:
719  int_buffer_->pop_back();
720  break;
721  case kBIGINT:
722  bigint_buffer_->pop_back();
723  break;
724  case kFLOAT:
725  float_buffer_->pop_back();
726  break;
727  case kDOUBLE:
728  double_buffer_->pop_back();
729  break;
730  case kTEXT:
731  case kVARCHAR:
732  case kCHAR:
733  string_buffer_->pop_back();
734  break;
735  case kDATE:
736  case kTIME:
737  case kTIMESTAMP:
738  bigint_buffer_->pop_back();
739  break;
740  case kARRAY:
742  string_array_buffer_->pop_back();
743  } else {
744  array_buffer_->pop_back();
745  }
746  break;
747  case kPOINT:
748  case kLINESTRING:
749  case kPOLYGON:
750  case kMULTIPOLYGON:
751  geo_string_buffer_->pop_back();
752  break;
753  default:
754  CHECK(false) << "TypedImportBuffer::pop_value() does not support type " << type;
755  }
756 }
Definition: sqltypes.h:51
std::vector< std::string > * string_buffer_
Definition: Importer.h:498
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:500
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:493
std::vector< float > * float_buffer_
Definition: Importer.h:496
bool is_decimal() const
Definition: sqltypes.h:419
std::vector< double > * double_buffer_
Definition: Importer.h:497
std::vector< int32_t > * int_buffer_
Definition: Importer.h:494
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:495
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:260
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:491
std::vector< std::vector< std::string > > * string_array_buffer_
Definition: Importer.h:501
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:299
Definition: sqltypes.h:54
Definition: sqltypes.h:55
Definition: sqltypes.h:43
#define IS_STRING(T)
Definition: sqltypes.h:173
const ColumnDescriptor * column_desc_
Definition: Importer.h:509
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:492
#define CHECK(condition)
Definition: Logger.h:197
Definition: sqltypes.h:47
SQLTypeInfo columnType
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:499
+ Here is the call graph for this function:

◆ set_replicate_count()

void import_export::TypedImportBuffer::set_replicate_count ( const int64_t  replicate_count)
inline

Definition at line 474 of file Importer.h.

References run_benchmark_import::type.

474  {
475  replicate_count_ = replicate_count;
476  }

◆ stringDictCheckpoint()

bool import_export::TypedImportBuffer::stringDictCheckpoint ( )
inline

Definition at line 367 of file Importer.h.

367  {
368  if (string_dict_ == nullptr) {
369  return true;
370  }
371  return string_dict_->checkpoint();
372  }
StringDictionary * string_dict_
Definition: Importer.h:510
bool checkpoint() noexcept

Member Data Documentation

◆ @1

union { ... }

◆ @3

union { ... }

◆ array_buffer_

std::vector<ArrayDatum>* import_export::TypedImportBuffer::array_buffer_

Definition at line 500 of file Importer.h.

◆ bigint_buffer_

std::vector<int64_t>* import_export::TypedImportBuffer::bigint_buffer_

Definition at line 495 of file Importer.h.

◆ bool_buffer_

std::vector<int8_t>* import_export::TypedImportBuffer::bool_buffer_

Definition at line 491 of file Importer.h.

◆ col_idx

size_t import_export::TypedImportBuffer::col_idx

Definition at line 487 of file Importer.h.

◆ column_desc_

const ColumnDescriptor* import_export::TypedImportBuffer::column_desc_
private

Definition at line 509 of file Importer.h.

◆ double_buffer_

std::vector<double>* import_export::TypedImportBuffer::double_buffer_

Definition at line 497 of file Importer.h.

◆ float_buffer_

std::vector<float>* import_export::TypedImportBuffer::float_buffer_

Definition at line 496 of file Importer.h.

◆ geo_string_buffer_

std::vector<std::string>* import_export::TypedImportBuffer::geo_string_buffer_

Definition at line 499 of file Importer.h.

◆ import_buffers

std::vector<std::unique_ptr<TypedImportBuffer> >* import_export::TypedImportBuffer::import_buffers

Definition at line 486 of file Importer.h.

◆ int_buffer_

std::vector<int32_t>* import_export::TypedImportBuffer::int_buffer_

Definition at line 494 of file Importer.h.

◆ replicate_count_

size_t import_export::TypedImportBuffer::replicate_count_ = 0
private

Definition at line 511 of file Importer.h.

◆ smallint_buffer_

std::vector<int16_t>* import_export::TypedImportBuffer::smallint_buffer_

Definition at line 493 of file Importer.h.

◆ string_array_buffer_

std::vector<std::vector<std::string> >* import_export::TypedImportBuffer::string_array_buffer_

Definition at line 501 of file Importer.h.

◆ string_array_dict_buffer_

std::vector<ArrayDatum>* import_export::TypedImportBuffer::string_array_dict_buffer_

Definition at line 507 of file Importer.h.

◆ string_buffer_

std::vector<std::string>* import_export::TypedImportBuffer::string_buffer_

Definition at line 498 of file Importer.h.

◆ string_dict_

StringDictionary* import_export::TypedImportBuffer::string_dict_
private

Definition at line 510 of file Importer.h.

◆ string_dict_i16_buffer_

std::vector<uint16_t>* import_export::TypedImportBuffer::string_dict_i16_buffer_

Definition at line 505 of file Importer.h.

◆ string_dict_i32_buffer_

std::vector<int32_t>* import_export::TypedImportBuffer::string_dict_i32_buffer_

Definition at line 506 of file Importer.h.

◆ string_dict_i8_buffer_

std::vector<uint8_t>* import_export::TypedImportBuffer::string_dict_i8_buffer_

Definition at line 504 of file Importer.h.

◆ tinyint_buffer_

std::vector<int8_t>* import_export::TypedImportBuffer::tinyint_buffer_

Definition at line 492 of file Importer.h.


The documentation for this class was generated from the following files: