OmniSciDB  16c4e035a1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
import_export::TypedImportBuffer Class Reference

#include <Importer.h>

+ Inheritance diagram for import_export::TypedImportBuffer:
+ Collaboration diagram for import_export::TypedImportBuffer:

Public Types

using OptionalStringVector = std::optional< std::vector< std::string >>
 

Public Member Functions

 TypedImportBuffer (const ColumnDescriptor *col_desc, StringDictionary *string_dict)
 
 ~TypedImportBuffer ()
 
void addBoolean (const int8_t v)
 
void addTinyint (const int8_t v)
 
void addSmallint (const int16_t v)
 
void addInt (const int32_t v)
 
void addBigint (const int64_t v)
 
void addFloat (const float v)
 
void addDouble (const double v)
 
void addString (const std::string_view v)
 
void addGeoString (const std::string_view v)
 
void addArray (const ArrayDatum &v)
 
OptionalStringVectoraddStringArray ()
 
void addStringArray (const OptionalStringVector &arr)
 
void addDictEncodedString (const std::vector< std::string > &string_vec)
 
void addDictEncodedStringArray (const std::vector< OptionalStringVector > &string_array_vec)
 
const SQLTypeInfogetTypeInfo () const
 
const ColumnDescriptorgetColumnDesc () const
 
StringDictionarygetStringDictionary () const
 
int8_t * getAsBytes () const
 
size_t getElementSize () const
 
std::vector< std::string > * getStringBuffer () const
 
std::vector< std::string > * getGeoStringBuffer () const
 
std::vector< ArrayDatum > * getArrayBuffer () const
 
std::vector
< OptionalStringVector > * 
getStringArrayBuffer () const
 
std::vector< ArrayDatum > * getStringArrayDictBuffer () const
 
int8_t * getStringDictBuffer () const
 
bool stringDictCheckpoint ()
 
void clear ()
 
size_t add_values (const ColumnDescriptor *cd, const TColumn &data)
 
size_t add_arrow_values (const ColumnDescriptor *cd, const arrow::Array &data, const bool exact_type_match, const ArraySliceRange &slice_range, BadRowsTracker *bad_rows_tracker)
 
void add_value (const ColumnDescriptor *cd, const std::string_view val, const bool is_null, const CopyParams &copy_params)
 
void add_value (const ColumnDescriptor *cd, const TDatum &val, const bool is_null)
 
void addDefaultValues (const ColumnDescriptor *cd, size_t num_rows)
 
void pop_value ()
 
template<typename DATA_TYPE >
size_t convert_arrow_val_to_import_buffer (const ColumnDescriptor *cd, const arrow::Array &array, std::vector< DATA_TYPE > &buffer, const ArraySliceRange &slice_range, BadRowsTracker *const bad_rows_tracker)
 
template<typename DATA_TYPE >
auto del_values (std::vector< DATA_TYPE > &buffer, BadRowsTracker *const bad_rows_tracker)
 
auto del_values (const SQLTypes type, BadRowsTracker *const bad_rows_tracker)
 
template<typename DATA_TYPE >
size_t convert_arrow_val_to_import_buffer (const ColumnDescriptor *cd, const Array &array, std::vector< DATA_TYPE > &buffer, const ArraySliceRange &slice_range, import_export::BadRowsTracker *const bad_rows_tracker)
 

Static Public Member Functions

static std::vector< DataBlockPtrget_data_block_pointers (const std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers)
 

Public Attributes

std::vector< std::unique_ptr
< TypedImportBuffer > > * 
import_buffers
 
size_t col_idx
 
std::vector< int8_t > * bool_buffer_
 
std::vector< int8_t > * tinyint_buffer_
 
std::vector< int16_t > * smallint_buffer_
 
std::vector< int32_t > * int_buffer_
 
std::vector< int64_t > * bigint_buffer_
 
std::vector< float > * float_buffer_
 
std::vector< double > * double_buffer_
 
std::vector< std::string > * string_buffer_
 
std::vector< std::string > * geo_string_buffer_
 
std::vector< ArrayDatum > * array_buffer_
 
std::vector
< OptionalStringVector > * 
string_array_buffer_
 
std::vector< uint8_t > * string_dict_i8_buffer_
 
std::vector< uint16_t > * string_dict_i16_buffer_
 
std::vector< int32_t > * string_dict_i32_buffer_
 
std::vector< ArrayDatum > * string_array_dict_buffer_
 

Private Attributes

union {
   std::vector< int8_t > *   bool_buffer_
 
   std::vector< int8_t > *   tinyint_buffer_
 
   std::vector< int16_t > *   smallint_buffer_
 
   std::vector< int32_t > *   int_buffer_
 
   std::vector< int64_t > *   bigint_buffer_
 
   std::vector< float > *   float_buffer_
 
   std::vector< double > *   double_buffer_
 
   std::vector< std::string > *   string_buffer_
 
   std::vector< std::string > *   geo_string_buffer_
 
   std::vector< ArrayDatum > *   array_buffer_
 
   std::vector
< OptionalStringVector > *   string_array_buffer_
 
}; 
 
union {
   std::vector< uint8_t > *   string_dict_i8_buffer_
 
   std::vector< uint16_t > *   string_dict_i16_buffer_
 
   std::vector< int32_t > *   string_dict_i32_buffer_
 
   std::vector< ArrayDatum > *   string_array_dict_buffer_
 
}; 
 
const ColumnDescriptorcolumn_desc_
 
StringDictionarystring_dict_
 

Detailed Description

Definition at line 89 of file Importer.h.

Member Typedef Documentation

using import_export::TypedImportBuffer::OptionalStringVector = std::optional<std::vector<std::string>>

Definition at line 91 of file Importer.h.

Constructor & Destructor Documentation

import_export::TypedImportBuffer::TypedImportBuffer ( const ColumnDescriptor col_desc,
StringDictionary string_dict 
)
inline

Definition at line 92 of file Importer.h.

References array_buffer_, bigint_buffer_, bool_buffer_, CHECK, ColumnDescriptor::columnType, double_buffer_, float_buffer_, geo_string_buffer_, SQLTypeInfo::get_compression(), SQLTypeInfo::get_size(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), int_buffer_, IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kENCODING_DICT, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, smallint_buffer_, string_array_buffer_, string_array_dict_buffer_, string_buffer_, string_dict_i16_buffer_, string_dict_i32_buffer_, string_dict_i8_buffer_, and tinyint_buffer_.

93  : column_desc_(col_desc), string_dict_(string_dict) {
94  switch (col_desc->columnType.get_type()) {
95  case kBOOLEAN:
96  bool_buffer_ = new std::vector<int8_t>();
97  break;
98  case kTINYINT:
99  tinyint_buffer_ = new std::vector<int8_t>();
100  break;
101  case kSMALLINT:
102  smallint_buffer_ = new std::vector<int16_t>();
103  break;
104  case kINT:
105  int_buffer_ = new std::vector<int32_t>();
106  break;
107  case kBIGINT:
108  case kNUMERIC:
109  case kDECIMAL:
110  bigint_buffer_ = new std::vector<int64_t>();
111  break;
112  case kFLOAT:
113  float_buffer_ = new std::vector<float>();
114  break;
115  case kDOUBLE:
116  double_buffer_ = new std::vector<double>();
117  break;
118  case kTEXT:
119  case kVARCHAR:
120  case kCHAR:
121  string_buffer_ = new std::vector<std::string>();
122  if (col_desc->columnType.get_compression() == kENCODING_DICT) {
123  switch (col_desc->columnType.get_size()) {
124  case 1:
125  string_dict_i8_buffer_ = new std::vector<uint8_t>();
126  break;
127  case 2:
128  string_dict_i16_buffer_ = new std::vector<uint16_t>();
129  break;
130  case 4:
131  string_dict_i32_buffer_ = new std::vector<int32_t>();
132  break;
133  default:
134  CHECK(false);
135  }
136  }
137  break;
138  case kDATE:
139  case kTIME:
140  case kTIMESTAMP:
141  bigint_buffer_ = new std::vector<int64_t>();
142  break;
143  case kARRAY:
144  if (IS_STRING(col_desc->columnType.get_subtype())) {
146  string_array_buffer_ = new std::vector<OptionalStringVector>();
147  string_array_dict_buffer_ = new std::vector<ArrayDatum>();
148  } else {
149  array_buffer_ = new std::vector<ArrayDatum>();
150  }
151  break;
152  case kPOINT:
153  case kLINESTRING:
154  case kPOLYGON:
155  case kMULTIPOLYGON:
156  geo_string_buffer_ = new std::vector<std::string>();
157  break;
158  default:
159  CHECK(false);
160  }
161  }
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:330
HOST DEVICE int get_size() const
Definition: sqltypes.h:339
Definition: sqltypes.h:49
std::vector< std::string > * string_buffer_
Definition: Importer.h:525
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:527
StringDictionary * string_dict_
Definition: Importer.h:537
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:520
std::vector< float > * float_buffer_
Definition: Importer.h:523
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:329
std::vector< double > * double_buffer_
Definition: Importer.h:524
std::vector< int32_t > * int_buffer_
Definition: Importer.h:521
std::vector< ArrayDatum > * string_array_dict_buffer_
Definition: Importer.h:534
std::vector< uint8_t > * string_dict_i8_buffer_
Definition: Importer.h:531
std::vector< OptionalStringVector > * string_array_buffer_
Definition: Importer.h:528
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:522
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:518
Definition: sqltypes.h:52
Definition: sqltypes.h:53
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:337
std::vector< int32_t > * string_dict_i32_buffer_
Definition: Importer.h:533
Definition: sqltypes.h:41
#define IS_STRING(T)
Definition: sqltypes.h:250
const ColumnDescriptor * column_desc_
Definition: Importer.h:536
std::vector< uint16_t > * string_dict_i16_buffer_
Definition: Importer.h:532
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:519
#define CHECK(condition)
Definition: Logger.h:211
Definition: sqltypes.h:45
SQLTypeInfo columnType
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:526

+ Here is the call graph for this function:

import_export::TypedImportBuffer::~TypedImportBuffer ( )
inline

Definition at line 163 of file Importer.h.

References array_buffer_, bigint_buffer_, bool_buffer_, CHECK, column_desc_, ColumnDescriptor::columnType, double_buffer_, float_buffer_, geo_string_buffer_, SQLTypeInfo::get_compression(), SQLTypeInfo::get_size(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), int_buffer_, IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kENCODING_DICT, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, smallint_buffer_, string_array_buffer_, string_array_dict_buffer_, string_buffer_, string_dict_i16_buffer_, string_dict_i32_buffer_, string_dict_i8_buffer_, and tinyint_buffer_.

163  {
164  switch (column_desc_->columnType.get_type()) {
165  case kBOOLEAN:
166  delete bool_buffer_;
167  break;
168  case kTINYINT:
169  delete tinyint_buffer_;
170  break;
171  case kSMALLINT:
172  delete smallint_buffer_;
173  break;
174  case kINT:
175  delete int_buffer_;
176  break;
177  case kBIGINT:
178  case kNUMERIC:
179  case kDECIMAL:
180  delete bigint_buffer_;
181  break;
182  case kFLOAT:
183  delete float_buffer_;
184  break;
185  case kDOUBLE:
186  delete double_buffer_;
187  break;
188  case kTEXT:
189  case kVARCHAR:
190  case kCHAR:
191  delete string_buffer_;
193  switch (column_desc_->columnType.get_size()) {
194  case 1:
195  delete string_dict_i8_buffer_;
196  break;
197  case 2:
199  break;
200  case 4:
202  break;
203  }
204  }
205  break;
206  case kDATE:
207  case kTIME:
208  case kTIMESTAMP:
209  delete bigint_buffer_;
210  break;
211  case kARRAY:
213  delete string_array_buffer_;
215  } else {
216  delete array_buffer_;
217  }
218  break;
219  case kPOINT:
220  case kLINESTRING:
221  case kPOLYGON:
222  case kMULTIPOLYGON:
223  delete geo_string_buffer_;
224  break;
225  default:
226  CHECK(false);
227  }
228  }
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:330
HOST DEVICE int get_size() const
Definition: sqltypes.h:339
Definition: sqltypes.h:49
std::vector< std::string > * string_buffer_
Definition: Importer.h:525
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:527
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:520
std::vector< float > * float_buffer_
Definition: Importer.h:523
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:329
std::vector< double > * double_buffer_
Definition: Importer.h:524
std::vector< int32_t > * int_buffer_
Definition: Importer.h:521
std::vector< ArrayDatum > * string_array_dict_buffer_
Definition: Importer.h:534
std::vector< uint8_t > * string_dict_i8_buffer_
Definition: Importer.h:531
std::vector< OptionalStringVector > * string_array_buffer_
Definition: Importer.h:528
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:522
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:518
Definition: sqltypes.h:52
Definition: sqltypes.h:53
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:337
std::vector< int32_t > * string_dict_i32_buffer_
Definition: Importer.h:533
Definition: sqltypes.h:41
#define IS_STRING(T)
Definition: sqltypes.h:250
const ColumnDescriptor * column_desc_
Definition: Importer.h:536
std::vector< uint16_t > * string_dict_i16_buffer_
Definition: Importer.h:532
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:519
#define CHECK(condition)
Definition: Logger.h:211
Definition: sqltypes.h:45
SQLTypeInfo columnType
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:526

+ Here is the call graph for this function:

Member Function Documentation

size_t import_export::TypedImportBuffer::add_arrow_values ( const ColumnDescriptor cd,
const arrow::Array &  data,
const bool  exact_type_match,
const ArraySliceRange slice_range,
BadRowsTracker bad_rows_tracker 
)

Definition at line 889 of file Importer.cpp.

References arrow_throw_if(), bigint_buffer_, bool_buffer_, ColumnDescriptor::columnName, ColumnDescriptor::columnType, convert_arrow_val_to_import_buffer(), DOUBLE, double_buffer_, FLOAT, float_buffer_, geo_string_buffer_, SQLTypeInfo::get_notnull(), SQLTypeInfo::get_type(), int_buffer_, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, smallint_buffer_, STRING, string_buffer_, TIMESTAMP, tinyint_buffer_, and run_benchmark_import::type.

893  {
894  const auto type = cd->columnType.get_type();
895  if (cd->columnType.get_notnull()) {
896  // We can't have any null values for this column; to have them is an error
897  arrow_throw_if(col.null_count() > 0, "NULL not allowed for column " + cd->columnName);
898  }
899 
900  switch (type) {
901  case kBOOLEAN:
902  if (exact_type_match) {
903  arrow_throw_if(col.type_id() != Type::BOOL, "Expected boolean type");
904  }
906  cd, col, *bool_buffer_, slice_range, bad_rows_tracker);
907  case kTINYINT:
908  if (exact_type_match) {
909  arrow_throw_if(col.type_id() != Type::INT8, "Expected int8 type");
910  }
912  cd, col, *tinyint_buffer_, slice_range, bad_rows_tracker);
913  case kSMALLINT:
914  if (exact_type_match) {
915  arrow_throw_if(col.type_id() != Type::INT16, "Expected int16 type");
916  }
918  cd, col, *smallint_buffer_, slice_range, bad_rows_tracker);
919  case kINT:
920  if (exact_type_match) {
921  arrow_throw_if(col.type_id() != Type::INT32, "Expected int32 type");
922  }
924  cd, col, *int_buffer_, slice_range, bad_rows_tracker);
925  case kBIGINT:
926  case kNUMERIC:
927  case kDECIMAL:
928  if (exact_type_match) {
929  arrow_throw_if(col.type_id() != Type::INT64, "Expected int64 type");
930  }
932  cd, col, *bigint_buffer_, slice_range, bad_rows_tracker);
933  case kFLOAT:
934  if (exact_type_match) {
935  arrow_throw_if(col.type_id() != Type::FLOAT, "Expected float type");
936  }
938  cd, col, *float_buffer_, slice_range, bad_rows_tracker);
939  case kDOUBLE:
940  if (exact_type_match) {
941  arrow_throw_if(col.type_id() != Type::DOUBLE, "Expected double type");
942  }
944  cd, col, *double_buffer_, slice_range, bad_rows_tracker);
945  case kTEXT:
946  case kVARCHAR:
947  case kCHAR:
948  if (exact_type_match) {
949  arrow_throw_if(col.type_id() != Type::BINARY && col.type_id() != Type::STRING,
950  "Expected string type");
951  }
953  cd, col, *string_buffer_, slice_range, bad_rows_tracker);
954  case kTIME:
955  if (exact_type_match) {
956  arrow_throw_if(col.type_id() != Type::TIME32 && col.type_id() != Type::TIME64,
957  "Expected time32 or time64 type");
958  }
960  cd, col, *bigint_buffer_, slice_range, bad_rows_tracker);
961  case kTIMESTAMP:
962  if (exact_type_match) {
963  arrow_throw_if(col.type_id() != Type::TIMESTAMP, "Expected timestamp type");
964  }
966  cd, col, *bigint_buffer_, slice_range, bad_rows_tracker);
967  case kDATE:
968  if (exact_type_match) {
969  arrow_throw_if(col.type_id() != Type::DATE32 && col.type_id() != Type::DATE64,
970  "Expected date32 or date64 type");
971  }
973  cd, col, *bigint_buffer_, slice_range, bad_rows_tracker);
974  case kPOINT:
975  case kLINESTRING:
976  case kPOLYGON:
977  case kMULTIPOLYGON:
978  arrow_throw_if(col.type_id() != Type::BINARY && col.type_id() != Type::STRING,
979  "Expected string type");
981  cd, col, *geo_string_buffer_, slice_range, bad_rows_tracker);
982  case kARRAY:
983  throw std::runtime_error("Arrow array appends not yet supported");
984  default:
985  throw std::runtime_error("Invalid Type");
986  }
987 }
Definition: sqltypes.h:49
std::vector< std::string > * string_buffer_
Definition: Importer.h:525
#define DOUBLE
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:520
std::vector< float > * float_buffer_
Definition: Importer.h:523
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:329
std::vector< double > * double_buffer_
Definition: Importer.h:524
std::vector< int32_t > * int_buffer_
Definition: Importer.h:521
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:522
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:518
Definition: sqltypes.h:52
Definition: sqltypes.h:53
#define TIMESTAMP
Definition: sqltypes.h:41
#define STRING
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:519
size_t convert_arrow_val_to_import_buffer(const ColumnDescriptor *cd, const arrow::Array &array, std::vector< DATA_TYPE > &buffer, const ArraySliceRange &slice_range, BadRowsTracker *const bad_rows_tracker)
Definition: sqltypes.h:45
SQLTypeInfo columnType
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:336
#define FLOAT
std::string columnName
void arrow_throw_if(const bool cond, const std::string &message)
Definition: ArrowImporter.h:41
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:526

+ Here is the call graph for this function:

void import_export::TypedImportBuffer::add_value ( const ColumnDescriptor cd,
const std::string_view  val,
const bool  is_null,
const CopyParams copy_params 
)

Definition at line 549 of file Importer.cpp.

References addArray(), addBigint(), addBoolean(), addDouble(), addFloat(), addGeoString(), addInt(), addSmallint(), addString(), addStringArray(), addTinyint(), Datum::bigintval, Datum::boolval, CHECK, ColumnDescriptor::columnName, ColumnDescriptor::columnType, SQLTypeInfo::get_elem_type(), SQLTypeInfo::get_notnull(), SQLTypeInfo::get_size(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), inline_fixed_encoding_null_val(), Datum::intval, IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, StringDictionary::MAX_STRLEN, NULL_DOUBLE, NULL_FLOAT, import_export::NullArray(), import_export::delimited_parser::parse_string_array(), Datum::smallintval, import_export::StringToArray(), StringToDatum(), Datum::tinyintval, to_string(), run_benchmark_import::type, and DecimalOverflowValidator::validate().

Referenced by foreign_storage::anonymous_namespace{InternalStorageStatsDataWrapper.cpp}::set_null(), and foreign_storage::anonymous_namespace{InternalMemoryStatsDataWrapper.cpp}::set_null().

552  {
553  const auto type = cd->columnType.get_type();
554  switch (type) {
555  case kBOOLEAN: {
556  if (is_null) {
557  if (cd->columnType.get_notnull()) {
558  throw std::runtime_error("NULL for column " + cd->columnName);
559  }
561  } else {
562  auto ti = cd->columnType;
563  Datum d = StringToDatum(val, ti);
564  addBoolean(static_cast<int8_t>(d.boolval));
565  }
566  break;
567  }
568  case kTINYINT: {
569  if (!is_null && (isdigit(val[0]) || val[0] == '-')) {
570  auto ti = cd->columnType;
571  Datum d = StringToDatum(val, ti);
573  } else {
574  if (cd->columnType.get_notnull()) {
575  throw std::runtime_error("NULL for column " + cd->columnName);
576  }
578  }
579  break;
580  }
581  case kSMALLINT: {
582  if (!is_null && (isdigit(val[0]) || val[0] == '-')) {
583  auto ti = cd->columnType;
584  Datum d = StringToDatum(val, ti);
586  } else {
587  if (cd->columnType.get_notnull()) {
588  throw std::runtime_error("NULL for column " + cd->columnName);
589  }
591  }
592  break;
593  }
594  case kINT: {
595  if (!is_null && (isdigit(val[0]) || val[0] == '-')) {
596  auto ti = cd->columnType;
597  Datum d = StringToDatum(val, ti);
598  addInt(d.intval);
599  } else {
600  if (cd->columnType.get_notnull()) {
601  throw std::runtime_error("NULL for column " + cd->columnName);
602  }
604  }
605  break;
606  }
607  case kBIGINT: {
608  if (!is_null && (isdigit(val[0]) || val[0] == '-')) {
609  auto ti = cd->columnType;
610  Datum d = StringToDatum(val, ti);
611  addBigint(d.bigintval);
612  } else {
613  if (cd->columnType.get_notnull()) {
614  throw std::runtime_error("NULL for column " + cd->columnName);
615  }
617  }
618  break;
619  }
620  case kDECIMAL:
621  case kNUMERIC: {
622  if (!is_null) {
623  auto ti = cd->columnType;
624  Datum d = StringToDatum(val, ti);
625  DecimalOverflowValidator validator(ti);
626  validator.validate(d.bigintval);
627  addBigint(d.bigintval);
628  } else {
629  if (cd->columnType.get_notnull()) {
630  throw std::runtime_error("NULL for column " + cd->columnName);
631  }
633  }
634  break;
635  }
636  case kFLOAT:
637  if (!is_null && (val[0] == '.' || isdigit(val[0]) || val[0] == '-')) {
638  addFloat(static_cast<float>(std::atof(std::string(val).c_str())));
639  } else {
640  if (cd->columnType.get_notnull()) {
641  throw std::runtime_error("NULL for column " + cd->columnName);
642  }
644  }
645  break;
646  case kDOUBLE:
647  if (!is_null && (val[0] == '.' || isdigit(val[0]) || val[0] == '-')) {
648  addDouble(std::atof(std::string(val).c_str()));
649  } else {
650  if (cd->columnType.get_notnull()) {
651  throw std::runtime_error("NULL for column " + cd->columnName);
652  }
654  }
655  break;
656  case kTEXT:
657  case kVARCHAR:
658  case kCHAR: {
659  // @TODO(wei) for now, use empty string for nulls
660  if (is_null) {
661  if (cd->columnType.get_notnull()) {
662  throw std::runtime_error("NULL for column " + cd->columnName);
663  }
664  addString(std::string());
665  } else {
666  if (val.length() > StringDictionary::MAX_STRLEN) {
667  throw std::runtime_error("String too long for column " + cd->columnName +
668  " was " + std::to_string(val.length()) + " max is " +
670  }
671  addString(val);
672  }
673  break;
674  }
675  case kTIME:
676  case kTIMESTAMP:
677  case kDATE:
678  if (!is_null && (isdigit(val[0]) || val[0] == '-')) {
679  SQLTypeInfo ti = cd->columnType;
680  Datum d = StringToDatum(val, ti);
681  addBigint(d.bigintval);
682  } else {
683  if (cd->columnType.get_notnull()) {
684  throw std::runtime_error("NULL for column " + cd->columnName);
685  }
687  }
688  break;
689  case kARRAY: {
690  if (is_null && cd->columnType.get_notnull()) {
691  throw std::runtime_error("NULL for column " + cd->columnName);
692  }
693  SQLTypeInfo ti = cd->columnType;
694  if (IS_STRING(ti.get_subtype())) {
695  std::vector<std::string> string_vec;
696  // Just parse string array, don't push it to buffer yet as we might throw
698  std::string(val), copy_params, string_vec);
699  if (!is_null) {
700  if (ti.get_size() > 0) {
701  auto sti = ti.get_elem_type();
702  size_t expected_size = ti.get_size() / sti.get_size();
703  size_t actual_size = string_vec.size();
704  if (actual_size != expected_size) {
705  throw std::runtime_error("Fixed length array column " + cd->columnName +
706  " expects " + std::to_string(expected_size) +
707  " values, received " +
708  std::to_string(actual_size));
709  }
710  }
711  addStringArray(string_vec);
712  } else {
713  addStringArray(std::nullopt);
714  }
715  } else {
716  if (!is_null) {
717  ArrayDatum d = StringToArray(std::string(val), ti, copy_params);
718  if (d.is_null) { // val could be "NULL"
719  addArray(NullArray(ti));
720  } else {
721  if (ti.get_size() > 0 && static_cast<size_t>(ti.get_size()) != d.length) {
722  throw std::runtime_error("Fixed length array for column " + cd->columnName +
723  " has incorrect length: " + std::string(val));
724  }
725  addArray(d);
726  }
727  } else {
728  addArray(NullArray(ti));
729  }
730  }
731  break;
732  }
733  case kPOINT:
734  case kLINESTRING:
735  case kPOLYGON:
736  case kMULTIPOLYGON:
737  addGeoString(val);
738  break;
739  default:
740  CHECK(false) << "TypedImportBuffer::add_value() does not support type " << type;
741  }
742 }
int8_t tinyintval
Definition: sqltypes.h:212
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:330
#define NULL_DOUBLE
HOST DEVICE int get_size() const
Definition: sqltypes.h:339
void addBigint(const int64_t v)
Definition: Importer.h:238
OptionalStringVector & addStringArray()
Definition: Importer.h:250
void addSmallint(const int16_t v)
Definition: Importer.h:234
Definition: sqltypes.h:49
#define NULL_FLOAT
void addString(const std::string_view v)
Definition: Importer.h:244
void addDouble(const double v)
Definition: Importer.h:242
ArrayDatum NullArray(const SQLTypeInfo &ti)
Definition: Importer.cpp:393
int8_t boolval
Definition: sqltypes.h:211
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:329
int32_t intval
Definition: sqltypes.h:214
std::string to_string(char const *&&v)
void addFloat(const float v)
Definition: Importer.h:240
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:208
void addGeoString(const std::string_view v)
Definition: Importer.h:246
CONSTEXPR DEVICE bool is_null(const T &value)
void addBoolean(const int8_t v)
Definition: Importer.h:230
void addTinyint(const int8_t v)
Definition: Importer.h:232
int64_t bigintval
Definition: sqltypes.h:215
void addInt(const int32_t v)
Definition: Importer.h:236
int16_t smallintval
Definition: sqltypes.h:213
Datum StringToDatum(std::string_view s, SQLTypeInfo &ti)
Definition: Datum.cpp:275
Definition: sqltypes.h:52
Definition: sqltypes.h:53
ArrayDatum StringToArray(const std::string &s, const SQLTypeInfo &ti, const CopyParams &copy_params)
Definition: Importer.cpp:339
void addArray(const ArrayDatum &v)
Definition: Importer.h:248
Definition: sqltypes.h:41
#define IS_STRING(T)
Definition: sqltypes.h:250
#define CHECK(condition)
Definition: Logger.h:211
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
static constexpr size_t MAX_STRLEN
Definition: sqltypes.h:45
SQLTypeInfo columnType
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:336
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:861
std::string columnName
void parse_string_array(const std::string &s, const import_export::CopyParams &copy_params, std::vector< std::string > &string_vec)
Parses given string array and inserts into given vector of strings.

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::add_value ( const ColumnDescriptor cd,
const TDatum &  val,
const bool  is_null 
)

Definition at line 1319 of file Importer.cpp.

References addArray(), addBigint(), import_export::addBinaryStringArray(), addBoolean(), addDouble(), addFloat(), addGeoString(), addInt(), addSmallint(), addString(), addStringArray(), addTinyint(), CHECK, ColumnDescriptor::columnName, ColumnDescriptor::columnType, decimal_to_int_type(), SQLTypeInfo::get_notnull(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), inline_fixed_encoding_null_val(), SQLTypeInfo::is_decimal(), IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, NULL_DOUBLE, NULL_FLOAT, import_export::NullArray(), import_export::TDatumToArrayDatum(), and run_benchmark_import::type.

1321  {
1322  const auto type = cd->columnType.is_decimal() ? decimal_to_int_type(cd->columnType)
1323  : cd->columnType.get_type();
1324  switch (type) {
1325  case kBOOLEAN: {
1326  if (is_null) {
1327  if (cd->columnType.get_notnull()) {
1328  throw std::runtime_error("NULL for column " + cd->columnName);
1329  }
1331  } else {
1332  addBoolean((int8_t)datum.val.int_val);
1333  }
1334  break;
1335  }
1336  case kTINYINT:
1337  if (!is_null) {
1338  addTinyint((int8_t)datum.val.int_val);
1339  } else {
1340  if (cd->columnType.get_notnull()) {
1341  throw std::runtime_error("NULL for column " + cd->columnName);
1342  }
1344  }
1345  break;
1346  case kSMALLINT:
1347  if (!is_null) {
1348  addSmallint((int16_t)datum.val.int_val);
1349  } else {
1350  if (cd->columnType.get_notnull()) {
1351  throw std::runtime_error("NULL for column " + cd->columnName);
1352  }
1354  }
1355  break;
1356  case kINT:
1357  if (!is_null) {
1358  addInt((int32_t)datum.val.int_val);
1359  } else {
1360  if (cd->columnType.get_notnull()) {
1361  throw std::runtime_error("NULL for column " + cd->columnName);
1362  }
1364  }
1365  break;
1366  case kBIGINT:
1367  if (!is_null) {
1368  addBigint(datum.val.int_val);
1369  } else {
1370  if (cd->columnType.get_notnull()) {
1371  throw std::runtime_error("NULL for column " + cd->columnName);
1372  }
1374  }
1375  break;
1376  case kFLOAT:
1377  if (!is_null) {
1378  addFloat((float)datum.val.real_val);
1379  } else {
1380  if (cd->columnType.get_notnull()) {
1381  throw std::runtime_error("NULL for column " + cd->columnName);
1382  }
1384  }
1385  break;
1386  case kDOUBLE:
1387  if (!is_null) {
1388  addDouble(datum.val.real_val);
1389  } else {
1390  if (cd->columnType.get_notnull()) {
1391  throw std::runtime_error("NULL for column " + cd->columnName);
1392  }
1394  }
1395  break;
1396  case kTEXT:
1397  case kVARCHAR:
1398  case kCHAR: {
1399  // @TODO(wei) for now, use empty string for nulls
1400  if (is_null) {
1401  if (cd->columnType.get_notnull()) {
1402  throw std::runtime_error("NULL for column " + cd->columnName);
1403  }
1404  addString(std::string());
1405  } else {
1406  addString(datum.val.str_val);
1407  }
1408  break;
1409  }
1410  case kTIME:
1411  case kTIMESTAMP:
1412  case kDATE: {
1413  if (!is_null) {
1414  addBigint(datum.val.int_val);
1415  } else {
1416  if (cd->columnType.get_notnull()) {
1417  throw std::runtime_error("NULL for column " + cd->columnName);
1418  }
1420  }
1421  break;
1422  }
1423  case kARRAY:
1424  if (is_null && cd->columnType.get_notnull()) {
1425  throw std::runtime_error("NULL for column " + cd->columnName);
1426  }
1427  if (IS_STRING(cd->columnType.get_subtype())) {
1428  OptionalStringVector& string_vec = addStringArray();
1429  addBinaryStringArray(datum, *string_vec);
1430  } else {
1431  if (!is_null) {
1432  addArray(TDatumToArrayDatum(datum, cd->columnType));
1433  } else {
1435  }
1436  }
1437  break;
1438  case kPOINT:
1439  case kLINESTRING:
1440  case kPOLYGON:
1441  case kMULTIPOLYGON:
1442  if (is_null) {
1443  if (cd->columnType.get_notnull()) {
1444  throw std::runtime_error("NULL for column " + cd->columnName);
1445  }
1446  addGeoString(std::string());
1447  } else {
1448  addGeoString(datum.val.str_val);
1449  }
1450  break;
1451  default:
1452  CHECK(false) << "TypedImportBuffer::add_value() does not support type " << type;
1453  }
1454 }
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:330
#define NULL_DOUBLE
void addBigint(const int64_t v)
Definition: Importer.h:238
OptionalStringVector & addStringArray()
Definition: Importer.h:250
void addSmallint(const int16_t v)
Definition: Importer.h:234
Definition: sqltypes.h:49
#define NULL_FLOAT
void addString(const std::string_view v)
Definition: Importer.h:244
std::optional< std::vector< std::string >> OptionalStringVector
Definition: Importer.h:91
void addDouble(const double v)
Definition: Importer.h:242
ArrayDatum NullArray(const SQLTypeInfo &ti)
Definition: Importer.cpp:393
void addFloat(const float v)
Definition: Importer.h:240
void addGeoString(const std::string_view v)
Definition: Importer.h:246
ArrayDatum TDatumToArrayDatum(const TDatum &datum, const SQLTypeInfo &ti)
Definition: Importer.cpp:489
CONSTEXPR DEVICE bool is_null(const T &value)
void addBoolean(const int8_t v)
Definition: Importer.h:230
void addTinyint(const int8_t v)
Definition: Importer.h:232
void addInt(const int32_t v)
Definition: Importer.h:236
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:493
Definition: sqltypes.h:52
Definition: sqltypes.h:53
void addArray(const ArrayDatum &v)
Definition: Importer.h:248
Definition: sqltypes.h:41
#define IS_STRING(T)
Definition: sqltypes.h:250
#define CHECK(condition)
Definition: Logger.h:211
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
Definition: sqltypes.h:45
SQLTypeInfo columnType
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:336
bool is_decimal() const
Definition: sqltypes.h:522
std::string columnName
void addBinaryStringArray(const TDatum &datum, std::vector< std::string > &string_vec)
Definition: Importer.cpp:437

+ Here is the call graph for this function:

size_t import_export::TypedImportBuffer::add_values ( const ColumnDescriptor cd,
const TColumn &  data 
)

Definition at line 990 of file Importer.cpp.

References addArray(), addStringArray(), bigint_buffer_, bool_buffer_, checked_malloc(), ColumnDescriptor::columnName, ColumnDescriptor::columnType, double_buffer_, float_buffer_, geo_string_buffer_, SQLTypeInfo::get_elem_type(), SQLTypeInfo::get_notnull(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), i, inline_fixed_encoding_null_val(), int_buffer_, IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, NULL_DOUBLE, NULL_FLOAT, import_export::NullArray(), smallint_buffer_, string_buffer_, and tinyint_buffer_.

990  {
991  size_t dataSize = 0;
992  if (cd->columnType.get_notnull()) {
993  // We can't have any null values for this column; to have them is an error
994  if (std::any_of(col.nulls.begin(), col.nulls.end(), [](int i) { return i != 0; })) {
995  throw std::runtime_error("NULL for column " + cd->columnName);
996  }
997  }
998 
999  switch (cd->columnType.get_type()) {
1000  case kBOOLEAN: {
1001  dataSize = col.data.int_col.size();
1002  bool_buffer_->reserve(dataSize);
1003  for (size_t i = 0; i < dataSize; i++) {
1004  if (col.nulls[i]) {
1006  } else {
1007  bool_buffer_->push_back((int8_t)col.data.int_col[i]);
1008  }
1009  }
1010  break;
1011  }
1012  case kTINYINT: {
1013  dataSize = col.data.int_col.size();
1014  tinyint_buffer_->reserve(dataSize);
1015  for (size_t i = 0; i < dataSize; i++) {
1016  if (col.nulls[i]) {
1018  } else {
1019  tinyint_buffer_->push_back((int8_t)col.data.int_col[i]);
1020  }
1021  }
1022  break;
1023  }
1024  case kSMALLINT: {
1025  dataSize = col.data.int_col.size();
1026  smallint_buffer_->reserve(dataSize);
1027  for (size_t i = 0; i < dataSize; i++) {
1028  if (col.nulls[i]) {
1030  } else {
1031  smallint_buffer_->push_back((int16_t)col.data.int_col[i]);
1032  }
1033  }
1034  break;
1035  }
1036  case kINT: {
1037  dataSize = col.data.int_col.size();
1038  int_buffer_->reserve(dataSize);
1039  for (size_t i = 0; i < dataSize; i++) {
1040  if (col.nulls[i]) {
1042  } else {
1043  int_buffer_->push_back((int32_t)col.data.int_col[i]);
1044  }
1045  }
1046  break;
1047  }
1048  case kBIGINT:
1049  case kNUMERIC:
1050  case kDECIMAL: {
1051  dataSize = col.data.int_col.size();
1052  bigint_buffer_->reserve(dataSize);
1053  for (size_t i = 0; i < dataSize; i++) {
1054  if (col.nulls[i]) {
1056  } else {
1057  bigint_buffer_->push_back((int64_t)col.data.int_col[i]);
1058  }
1059  }
1060  break;
1061  }
1062  case kFLOAT: {
1063  dataSize = col.data.real_col.size();
1064  float_buffer_->reserve(dataSize);
1065  for (size_t i = 0; i < dataSize; i++) {
1066  if (col.nulls[i]) {
1067  float_buffer_->push_back(NULL_FLOAT);
1068  } else {
1069  float_buffer_->push_back((float)col.data.real_col[i]);
1070  }
1071  }
1072  break;
1073  }
1074  case kDOUBLE: {
1075  dataSize = col.data.real_col.size();
1076  double_buffer_->reserve(dataSize);
1077  for (size_t i = 0; i < dataSize; i++) {
1078  if (col.nulls[i]) {
1079  double_buffer_->push_back(NULL_DOUBLE);
1080  } else {
1081  double_buffer_->push_back((double)col.data.real_col[i]);
1082  }
1083  }
1084  break;
1085  }
1086  case kTEXT:
1087  case kVARCHAR:
1088  case kCHAR: {
1089  // TODO: for now, use empty string for nulls
1090  dataSize = col.data.str_col.size();
1091  string_buffer_->reserve(dataSize);
1092  for (size_t i = 0; i < dataSize; i++) {
1093  if (col.nulls[i]) {
1094  string_buffer_->push_back(std::string());
1095  } else {
1096  string_buffer_->push_back(col.data.str_col[i]);
1097  }
1098  }
1099  break;
1100  }
1101  case kTIME:
1102  case kTIMESTAMP:
1103  case kDATE: {
1104  dataSize = col.data.int_col.size();
1105  bigint_buffer_->reserve(dataSize);
1106  for (size_t i = 0; i < dataSize; i++) {
1107  if (col.nulls[i]) {
1109  } else {
1110  bigint_buffer_->push_back(static_cast<int64_t>(col.data.int_col[i]));
1111  }
1112  }
1113  break;
1114  }
1115  case kPOINT:
1116  case kLINESTRING:
1117  case kPOLYGON:
1118  case kMULTIPOLYGON: {
1119  dataSize = col.data.str_col.size();
1120  geo_string_buffer_->reserve(dataSize);
1121  for (size_t i = 0; i < dataSize; i++) {
1122  if (col.nulls[i]) {
1123  // TODO: add support for NULL geo
1124  geo_string_buffer_->push_back(std::string());
1125  } else {
1126  geo_string_buffer_->push_back(col.data.str_col[i]);
1127  }
1128  }
1129  break;
1130  }
1131  case kARRAY: {
1132  dataSize = col.data.arr_col.size();
1133  if (IS_STRING(cd->columnType.get_subtype())) {
1134  for (size_t i = 0; i < dataSize; i++) {
1135  OptionalStringVector& string_vec = addStringArray();
1136  if (!col.nulls[i]) {
1137  size_t stringArrSize = col.data.arr_col[i].data.str_col.size();
1138  for (size_t str_idx = 0; str_idx != stringArrSize; ++str_idx) {
1139  string_vec->push_back(col.data.arr_col[i].data.str_col[str_idx]);
1140  }
1141  }
1142  }
1143  } else {
1144  auto elem_ti = cd->columnType.get_subtype();
1145  switch (elem_ti) {
1146  case kBOOLEAN: {
1147  for (size_t i = 0; i < dataSize; i++) {
1148  if (col.nulls[i]) {
1150  } else {
1151  size_t len = col.data.arr_col[i].data.int_col.size();
1152  size_t byteSize = len * sizeof(int8_t);
1153  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1154  int8_t* p = buf;
1155  for (size_t j = 0; j < len; ++j) {
1156  // Explicitly checking the item for null because
1157  // casting null value (-128) to bool results
1158  // incorrect value 1.
1159  if (col.data.arr_col[i].nulls[j]) {
1160  *p = static_cast<int8_t>(
1162  } else {
1163  *(bool*)p = static_cast<bool>(col.data.arr_col[i].data.int_col[j]);
1164  }
1165  p += sizeof(bool);
1166  }
1167  addArray(ArrayDatum(byteSize, buf, false));
1168  }
1169  }
1170  break;
1171  }
1172  case kTINYINT: {
1173  for (size_t i = 0; i < dataSize; i++) {
1174  if (col.nulls[i]) {
1176  } else {
1177  size_t len = col.data.arr_col[i].data.int_col.size();
1178  size_t byteSize = len * sizeof(int8_t);
1179  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1180  int8_t* p = buf;
1181  for (size_t j = 0; j < len; ++j) {
1182  *(int8_t*)p = static_cast<int8_t>(col.data.arr_col[i].data.int_col[j]);
1183  p += sizeof(int8_t);
1184  }
1185  addArray(ArrayDatum(byteSize, buf, false));
1186  }
1187  }
1188  break;
1189  }
1190  case kSMALLINT: {
1191  for (size_t i = 0; i < dataSize; i++) {
1192  if (col.nulls[i]) {
1194  } else {
1195  size_t len = col.data.arr_col[i].data.int_col.size();
1196  size_t byteSize = len * sizeof(int16_t);
1197  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1198  int8_t* p = buf;
1199  for (size_t j = 0; j < len; ++j) {
1200  *(int16_t*)p =
1201  static_cast<int16_t>(col.data.arr_col[i].data.int_col[j]);
1202  p += sizeof(int16_t);
1203  }
1204  addArray(ArrayDatum(byteSize, buf, false));
1205  }
1206  }
1207  break;
1208  }
1209  case kINT: {
1210  for (size_t i = 0; i < dataSize; i++) {
1211  if (col.nulls[i]) {
1213  } else {
1214  size_t len = col.data.arr_col[i].data.int_col.size();
1215  size_t byteSize = len * sizeof(int32_t);
1216  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1217  int8_t* p = buf;
1218  for (size_t j = 0; j < len; ++j) {
1219  *(int32_t*)p =
1220  static_cast<int32_t>(col.data.arr_col[i].data.int_col[j]);
1221  p += sizeof(int32_t);
1222  }
1223  addArray(ArrayDatum(byteSize, buf, false));
1224  }
1225  }
1226  break;
1227  }
1228  case kBIGINT:
1229  case kNUMERIC:
1230  case kDECIMAL: {
1231  for (size_t i = 0; i < dataSize; i++) {
1232  if (col.nulls[i]) {
1234  } else {
1235  size_t len = col.data.arr_col[i].data.int_col.size();
1236  size_t byteSize = len * sizeof(int64_t);
1237  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1238  int8_t* p = buf;
1239  for (size_t j = 0; j < len; ++j) {
1240  *(int64_t*)p =
1241  static_cast<int64_t>(col.data.arr_col[i].data.int_col[j]);
1242  p += sizeof(int64_t);
1243  }
1244  addArray(ArrayDatum(byteSize, buf, false));
1245  }
1246  }
1247  break;
1248  }
1249  case kFLOAT: {
1250  for (size_t i = 0; i < dataSize; i++) {
1251  if (col.nulls[i]) {
1253  } else {
1254  size_t len = col.data.arr_col[i].data.real_col.size();
1255  size_t byteSize = len * sizeof(float);
1256  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1257  int8_t* p = buf;
1258  for (size_t j = 0; j < len; ++j) {
1259  *(float*)p = static_cast<float>(col.data.arr_col[i].data.real_col[j]);
1260  p += sizeof(float);
1261  }
1262  addArray(ArrayDatum(byteSize, buf, false));
1263  }
1264  }
1265  break;
1266  }
1267  case kDOUBLE: {
1268  for (size_t i = 0; i < dataSize; i++) {
1269  if (col.nulls[i]) {
1271  } else {
1272  size_t len = col.data.arr_col[i].data.real_col.size();
1273  size_t byteSize = len * sizeof(double);
1274  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1275  int8_t* p = buf;
1276  for (size_t j = 0; j < len; ++j) {
1277  *(double*)p = static_cast<double>(col.data.arr_col[i].data.real_col[j]);
1278  p += sizeof(double);
1279  }
1280  addArray(ArrayDatum(byteSize, buf, false));
1281  }
1282  }
1283  break;
1284  }
1285  case kTIME:
1286  case kTIMESTAMP:
1287  case kDATE: {
1288  for (size_t i = 0; i < dataSize; i++) {
1289  if (col.nulls[i]) {
1291  } else {
1292  size_t len = col.data.arr_col[i].data.int_col.size();
1293  size_t byteWidth = sizeof(int64_t);
1294  size_t byteSize = len * byteWidth;
1295  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1296  int8_t* p = buf;
1297  for (size_t j = 0; j < len; ++j) {
1298  *reinterpret_cast<int64_t*>(p) =
1299  static_cast<int64_t>(col.data.arr_col[i].data.int_col[j]);
1300  p += sizeof(int64_t);
1301  }
1302  addArray(ArrayDatum(byteSize, buf, false));
1303  }
1304  }
1305  break;
1306  }
1307  default:
1308  throw std::runtime_error("Invalid Array Type");
1309  }
1310  }
1311  break;
1312  }
1313  default:
1314  throw std::runtime_error("Invalid Type");
1315  }
1316  return dataSize;
1317 }
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:330
#define NULL_DOUBLE
OptionalStringVector & addStringArray()
Definition: Importer.h:250
Definition: sqltypes.h:49
#define NULL_FLOAT
std::vector< std::string > * string_buffer_
Definition: Importer.h:525
std::optional< std::vector< std::string >> OptionalStringVector
Definition: Importer.h:91
ArrayDatum NullArray(const SQLTypeInfo &ti)
Definition: Importer.cpp:393
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:520
std::vector< float > * float_buffer_
Definition: Importer.h:523
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:329
std::vector< double > * double_buffer_
Definition: Importer.h:524
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:208
std::vector< int32_t > * int_buffer_
Definition: Importer.h:521
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:45
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:522
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:518
Definition: sqltypes.h:52
Definition: sqltypes.h:53
void addArray(const ArrayDatum &v)
Definition: Importer.h:248
Definition: sqltypes.h:41
#define IS_STRING(T)
Definition: sqltypes.h:250
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:519
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
Definition: sqltypes.h:45
SQLTypeInfo columnType
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:336
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:861
std::string columnName
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:526

+ Here is the call graph for this function:

void import_export::TypedImportBuffer::addArray ( const ArrayDatum v)
inline

Definition at line 248 of file Importer.h.

References array_buffer_.

Referenced by add_value(), and add_values().

248 { array_buffer_->push_back(v); }
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:527

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addBigint ( const int64_t  v)
inline

Definition at line 238 of file Importer.h.

References bigint_buffer_.

Referenced by add_value().

238 { bigint_buffer_->push_back(v); }
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:522

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addBoolean ( const int8_t  v)
inline

Definition at line 230 of file Importer.h.

References bool_buffer_.

Referenced by add_value().

230 { bool_buffer_->push_back(v); }
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:518

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addDefaultValues ( const ColumnDescriptor cd,
size_t  num_rows 
)

Definition at line 1456 of file Importer.cpp.

References array_buffer_, bigint_buffer_, bool_buffer_, CHECK, ColumnDescriptor::columnName, ColumnDescriptor::columnType, convert_decimal_value_to_scale(), ColumnDescriptor::default_value, double_buffer_, float_buffer_, geo_string_buffer_, SQLTypeInfo::get_notnull(), SQLTypeInfo::get_type(), inline_fixed_encoding_null_val(), int_buffer_, is_null(), IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, StringDictionary::MAX_STRLEN, NULL_DOUBLE, NULL_FLOAT, import_export::NullArray(), import_export::delimited_parser::parse_string_array(), smallint_buffer_, string_array_buffer_, string_buffer_, import_export::StringToArray(), StringToDatum(), tinyint_buffer_, to_string(), and run_benchmark_import::type.

1456  {
1457  bool is_null = !cd->default_value.has_value();
1458  CHECK(!(is_null && cd->columnType.get_notnull()));
1459  const auto type = cd->columnType.get_type();
1460  auto ti = cd->columnType;
1461  auto val = cd->default_value.value_or("NULL");
1462  CopyParams cp;
1463  switch (type) {
1464  case kBOOLEAN: {
1465  if (!is_null) {
1466  bool_buffer_->resize(num_rows, StringToDatum(val, ti).boolval);
1467  } else {
1468  bool_buffer_->resize(num_rows, inline_fixed_encoding_null_val(cd->columnType));
1469  }
1470  break;
1471  }
1472  case kTINYINT: {
1473  if (!is_null) {
1474  tinyint_buffer_->resize(num_rows, StringToDatum(val, ti).tinyintval);
1475  } else {
1477  }
1478  break;
1479  }
1480  case kSMALLINT: {
1481  if (!is_null) {
1482  smallint_buffer_->resize(num_rows, StringToDatum(val, ti).smallintval);
1483  } else {
1484  smallint_buffer_->resize(num_rows,
1486  }
1487  break;
1488  }
1489  case kINT: {
1490  if (!is_null) {
1491  int_buffer_->resize(num_rows, StringToDatum(val, ti).intval);
1492  } else {
1493  int_buffer_->resize(num_rows, inline_fixed_encoding_null_val(cd->columnType));
1494  }
1495  break;
1496  }
1497  case kBIGINT: {
1498  if (!is_null) {
1499  bigint_buffer_->resize(num_rows, StringToDatum(val, ti).bigintval);
1500  } else {
1502  }
1503  break;
1504  }
1505  case kDECIMAL:
1506  case kNUMERIC: {
1507  if (!is_null) {
1508  const auto converted_decimal_value = convert_decimal_value_to_scale(
1509  StringToDatum(val, ti).bigintval, ti, cd->columnType);
1510  bigint_buffer_->resize(num_rows, converted_decimal_value);
1511  } else {
1513  }
1514  break;
1515  }
1516  case kFLOAT:
1517  if (!is_null) {
1518  float_buffer_->resize(num_rows,
1519  static_cast<float>(std::atof(std::string(val).c_str())));
1520  } else {
1521  float_buffer_->resize(num_rows, NULL_FLOAT);
1522  }
1523  break;
1524  case kDOUBLE:
1525  if (!is_null) {
1526  double_buffer_->resize(num_rows, std::atof(std::string(val).c_str()));
1527  } else {
1528  double_buffer_->resize(num_rows, NULL_DOUBLE);
1529  }
1530  break;
1531  case kTEXT:
1532  case kVARCHAR:
1533  case kCHAR: {
1534  if (is_null) {
1535  string_buffer_->resize(num_rows, "");
1536  } else {
1537  if (val.length() > StringDictionary::MAX_STRLEN) {
1538  throw std::runtime_error("String too long for column " + cd->columnName +
1539  " was " + std::to_string(val.length()) + " max is " +
1541  }
1542  string_buffer_->resize(num_rows, val);
1543  }
1544  break;
1545  }
1546  case kTIME:
1547  case kTIMESTAMP:
1548  case kDATE:
1549  if (!is_null) {
1550  bigint_buffer_->resize(num_rows, StringToDatum(val, ti).bigintval);
1551  } else {
1553  }
1554  break;
1555  case kARRAY: {
1556  if (IS_STRING(ti.get_subtype())) {
1557  std::vector<std::string> string_vec;
1558  // Just parse string array, don't push it to buffer yet as we might throw
1560  std::string(val), cp, string_vec);
1561  if (!is_null) {
1562  // TODO: add support for NULL string arrays
1563  if (ti.get_size() > 0) {
1564  auto sti = ti.get_elem_type();
1565  size_t expected_size = ti.get_size() / sti.get_size();
1566  size_t actual_size = string_vec.size();
1567  if (actual_size != expected_size) {
1568  throw std::runtime_error("Fixed length array column " + cd->columnName +
1569  " expects " + std::to_string(expected_size) +
1570  " values, received " +
1571  std::to_string(actual_size));
1572  }
1573  }
1574  string_array_buffer_->resize(num_rows, string_vec);
1575  } else {
1576  if (ti.get_size() > 0) {
1577  // TODO: remove once NULL fixlen arrays are allowed
1578  throw std::runtime_error("Fixed length array column " + cd->columnName +
1579  " currently cannot accept NULL arrays");
1580  }
1581  // TODO: add support for NULL string arrays, replace with addStringArray(),
1582  // for now add whatever parseStringArray() outputs for NULLs ("NULL")
1583  string_array_buffer_->resize(num_rows, string_vec);
1584  }
1585  } else {
1586  if (!is_null) {
1587  ArrayDatum d = StringToArray(std::string(val), ti, cp);
1588  if (d.is_null) { // val could be "NULL"
1589  array_buffer_->resize(num_rows, NullArray(ti));
1590  } else {
1591  if (ti.get_size() > 0 && static_cast<size_t>(ti.get_size()) != d.length) {
1592  throw std::runtime_error("Fixed length array for column " + cd->columnName +
1593  " has incorrect length: " + std::string(val));
1594  }
1595  array_buffer_->resize(num_rows, d);
1596  }
1597  } else {
1598  array_buffer_->resize(num_rows, NullArray(ti));
1599  }
1600  }
1601  break;
1602  }
1603  case kPOINT:
1604  case kLINESTRING:
1605  case kPOLYGON:
1606  case kMULTIPOLYGON:
1607  geo_string_buffer_->resize(num_rows, val);
1608  break;
1609  default:
1610  CHECK(false) << "TypedImportBuffer::addDefaultValues() does not support type "
1611  << type;
1612  }
1613 }
#define NULL_DOUBLE
Definition: sqltypes.h:49
#define NULL_FLOAT
std::vector< std::string > * string_buffer_
Definition: Importer.h:525
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:527
ArrayDatum NullArray(const SQLTypeInfo &ti)
Definition: Importer.cpp:393
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:520
std::vector< float > * float_buffer_
Definition: Importer.h:523
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:329
std::vector< double > * double_buffer_
Definition: Importer.h:524
std::string to_string(char const *&&v)
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:208
std::vector< int32_t > * int_buffer_
Definition: Importer.h:521
CONSTEXPR DEVICE bool is_null(const T &value)
std::vector< OptionalStringVector > * string_array_buffer_
Definition: Importer.h:528
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:522
Datum StringToDatum(std::string_view s, SQLTypeInfo &ti)
Definition: Datum.cpp:275
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:518
std::optional< std::string > default_value
Definition: sqltypes.h:52
Definition: sqltypes.h:53
ArrayDatum StringToArray(const std::string &s, const SQLTypeInfo &ti, const CopyParams &copy_params)
Definition: Importer.cpp:339
int64_t convert_decimal_value_to_scale(const int64_t decimal_value, const SQLTypeInfo &type_info, const SQLTypeInfo &new_type_info)
Definition: Datum.cpp:568
Definition: sqltypes.h:41
#define IS_STRING(T)
Definition: sqltypes.h:250
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:519
#define CHECK(condition)
Definition: Logger.h:211
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
static constexpr size_t MAX_STRLEN
Definition: sqltypes.h:45
SQLTypeInfo columnType
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:336
std::string columnName
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:526
void parse_string_array(const std::string &s, const import_export::CopyParams &copy_params, std::vector< std::string > &string_vec)
Parses given string array and inserts into given vector of strings.

+ Here is the call graph for this function:

void import_export::TypedImportBuffer::addDictEncodedString ( const std::vector< std::string > &  string_vec)

Definition at line 509 of file Importer.cpp.

References CHECK, column_desc_, ColumnDescriptor::columnName, ColumnDescriptor::columnType, logger::ERROR, SQLTypeInfo::get_size(), getColumnDesc(), StringDictionary::getOrAddBulk(), LOG, StringDictionary::MAX_STRLEN, string_dict_, string_dict_i16_buffer_, string_dict_i32_buffer_, and string_dict_i8_buffer_.

509  {
511  std::vector<std::string_view> string_view_vec;
512  string_view_vec.reserve(string_vec.size());
513  for (const auto& str : string_vec) {
514  if (str.size() > StringDictionary::MAX_STRLEN) {
515  std::ostringstream oss;
516  oss << "while processing dictionary for column " << getColumnDesc()->columnName
517  << " a string was detected too long for encoding, string length = "
518  << str.size() << ", first 100 characters are '" << str.substr(0, 100) << "'";
519  throw std::runtime_error(oss.str());
520  }
521  string_view_vec.push_back(str);
522  }
523  try {
524  switch (column_desc_->columnType.get_size()) {
525  case 1:
526  string_dict_i8_buffer_->resize(string_view_vec.size());
527  string_dict_->getOrAddBulk(string_view_vec, string_dict_i8_buffer_->data());
528  break;
529  case 2:
530  string_dict_i16_buffer_->resize(string_view_vec.size());
531  string_dict_->getOrAddBulk(string_view_vec, string_dict_i16_buffer_->data());
532  break;
533  case 4:
534  string_dict_i32_buffer_->resize(string_view_vec.size());
535  string_dict_->getOrAddBulk(string_view_vec, string_dict_i32_buffer_->data());
536  break;
537  default:
538  CHECK(false);
539  }
540  } catch (std::exception& e) {
541  std::ostringstream oss;
542  oss << "while processing dictionary for column " << getColumnDesc()->columnName
543  << " : " << e.what();
544  LOG(ERROR) << oss.str();
545  throw std::runtime_error(oss.str());
546  }
547 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:339
#define LOG(tag)
Definition: Logger.h:205
StringDictionary * string_dict_
Definition: Importer.h:537
std::vector< uint8_t > * string_dict_i8_buffer_
Definition: Importer.h:531
void getOrAddBulk(const std::vector< String > &string_vec, T *encoded_vec)
std::vector< int32_t > * string_dict_i32_buffer_
Definition: Importer.h:533
const ColumnDescriptor * column_desc_
Definition: Importer.h:536
std::vector< uint16_t > * string_dict_i16_buffer_
Definition: Importer.h:532
#define CHECK(condition)
Definition: Logger.h:211
const ColumnDescriptor * getColumnDesc() const
Definition: Importer.h:311
static constexpr size_t MAX_STRLEN
SQLTypeInfo columnType
std::string columnName

+ Here is the call graph for this function:

void import_export::TypedImportBuffer::addDictEncodedStringArray ( const std::vector< OptionalStringVector > &  string_array_vec)
inline

Definition at line 261 of file Importer.h.

References anonymous_namespace{Utm.h}::a, CHECK, checked_malloc(), column_desc_, ColumnDescriptor::columnType, import_export::ImporterUtils::composeNullArray(), StringDictionary::getOrAddBulkArray(), i, StringDictionary::MAX_STRLEN, string_array_dict_buffer_, and string_dict_.

262  {
264 
265  // first check data is ok
266  for (auto& p : string_array_vec) {
267  if (!p) {
268  continue;
269  }
270  for (const auto& str : *p) {
271  if (str.size() > StringDictionary::MAX_STRLEN) {
272  throw std::runtime_error("String too long for dictionary encoding.");
273  }
274  }
275  }
276 
277  // to avoid copying, create a string view of each string in the
278  // `string_array_vec` where the array holding the string is *not null*
279  std::vector<std::vector<std::string_view>> string_view_array_vec;
280  for (auto& p : string_array_vec) {
281  if (!p) {
282  continue;
283  }
284  auto& array = string_view_array_vec.emplace_back();
285  for (const auto& str : *p) {
286  array.emplace_back(str);
287  }
288  }
289 
290  std::vector<std::vector<int32_t>> ids_array(0);
291  string_dict_->getOrAddBulkArray(string_view_array_vec, ids_array);
292 
293  size_t i, j;
294  for (i = 0, j = 0; i < string_array_vec.size(); ++i) {
295  if (!string_array_vec[i]) { // null array
296  string_array_dict_buffer_->push_back(
298  } else { // non-null array
299  auto& p = ids_array[j++];
300  size_t len = p.size() * sizeof(int32_t);
301  auto a = static_cast<int32_t*>(checked_malloc(len));
302  memcpy(a, &p[0], len);
303  string_array_dict_buffer_->push_back(
304  ArrayDatum(len, reinterpret_cast<int8_t*>(a), false));
305  }
306  }
307  }
StringDictionary * string_dict_
Definition: Importer.h:537
static ArrayDatum composeNullArray(const SQLTypeInfo &ti)
Definition: Importer.cpp:417
constexpr double a
Definition: Utm.h:32
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:208
std::vector< ArrayDatum > * string_array_dict_buffer_
Definition: Importer.h:534
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:45
void getOrAddBulkArray(const std::vector< std::vector< String >> &string_array_vec, std::vector< std::vector< int32_t >> &ids_array_vec)
const ColumnDescriptor * column_desc_
Definition: Importer.h:536
#define CHECK(condition)
Definition: Logger.h:211
static constexpr size_t MAX_STRLEN
SQLTypeInfo columnType

+ Here is the call graph for this function:

void import_export::TypedImportBuffer::addDouble ( const double  v)
inline

Definition at line 242 of file Importer.h.

References double_buffer_.

Referenced by add_value().

242 { double_buffer_->push_back(v); }
std::vector< double > * double_buffer_
Definition: Importer.h:524

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addFloat ( const float  v)
inline

Definition at line 240 of file Importer.h.

References float_buffer_.

Referenced by add_value().

240 { float_buffer_->push_back(v); }
std::vector< float > * float_buffer_
Definition: Importer.h:523

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addGeoString ( const std::string_view  v)
inline

Definition at line 246 of file Importer.h.

References geo_string_buffer_.

Referenced by add_value().

246 { geo_string_buffer_->emplace_back(v); }
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:526

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addInt ( const int32_t  v)
inline

Definition at line 236 of file Importer.h.

References int_buffer_.

Referenced by add_value().

236 { int_buffer_->push_back(v); }
std::vector< int32_t > * int_buffer_
Definition: Importer.h:521

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addSmallint ( const int16_t  v)
inline

Definition at line 234 of file Importer.h.

References smallint_buffer_.

Referenced by add_value().

234 { smallint_buffer_->push_back(v); }
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:520

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addString ( const std::string_view  v)
inline

Definition at line 244 of file Importer.h.

References string_buffer_.

Referenced by add_value().

244 { string_buffer_->emplace_back(v); }
std::vector< std::string > * string_buffer_
Definition: Importer.h:525

+ Here is the caller graph for this function:

OptionalStringVector& import_export::TypedImportBuffer::addStringArray ( )
inline

Definition at line 250 of file Importer.h.

References string_array_buffer_.

Referenced by add_value(), and add_values().

250  {
251  string_array_buffer_->emplace_back(std::vector<std::string>{});
252  return string_array_buffer_->back();
253  }
std::vector< OptionalStringVector > * string_array_buffer_
Definition: Importer.h:528

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addStringArray ( const OptionalStringVector arr)
inline

Definition at line 255 of file Importer.h.

References string_array_buffer_.

255  {
256  string_array_buffer_->push_back(arr);
257  }
std::vector< OptionalStringVector > * string_array_buffer_
Definition: Importer.h:528
void import_export::TypedImportBuffer::addTinyint ( const int8_t  v)
inline

Definition at line 232 of file Importer.h.

References tinyint_buffer_.

Referenced by add_value().

232 { tinyint_buffer_->push_back(v); }
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:519

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::clear ( )
inline

Definition at line 403 of file Importer.h.

References array_buffer_, bigint_buffer_, bool_buffer_, CHECK, column_desc_, ColumnDescriptor::columnType, double_buffer_, float_buffer_, geo_string_buffer_, SQLTypeInfo::get_compression(), SQLTypeInfo::get_size(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), int_buffer_, IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kENCODING_DICT, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, smallint_buffer_, string_array_buffer_, string_array_dict_buffer_, string_buffer_, string_dict_i16_buffer_, string_dict_i32_buffer_, string_dict_i8_buffer_, and tinyint_buffer_.

403  {
404  switch (column_desc_->columnType.get_type()) {
405  case kBOOLEAN: {
406  bool_buffer_->clear();
407  break;
408  }
409  case kTINYINT: {
410  tinyint_buffer_->clear();
411  break;
412  }
413  case kSMALLINT: {
414  smallint_buffer_->clear();
415  break;
416  }
417  case kINT: {
418  int_buffer_->clear();
419  break;
420  }
421  case kBIGINT:
422  case kNUMERIC:
423  case kDECIMAL: {
424  bigint_buffer_->clear();
425  break;
426  }
427  case kFLOAT: {
428  float_buffer_->clear();
429  break;
430  }
431  case kDOUBLE: {
432  double_buffer_->clear();
433  break;
434  }
435  case kTEXT:
436  case kVARCHAR:
437  case kCHAR: {
438  string_buffer_->clear();
440  switch (column_desc_->columnType.get_size()) {
441  case 1:
442  string_dict_i8_buffer_->clear();
443  break;
444  case 2:
445  string_dict_i16_buffer_->clear();
446  break;
447  case 4:
448  string_dict_i32_buffer_->clear();
449  break;
450  default:
451  CHECK(false);
452  }
453  }
454  break;
455  }
456  case kDATE:
457  case kTIME:
458  case kTIMESTAMP:
459  bigint_buffer_->clear();
460  break;
461  case kARRAY: {
463  string_array_buffer_->clear();
464  string_array_dict_buffer_->clear();
465  } else {
466  array_buffer_->clear();
467  }
468  break;
469  }
470  case kPOINT:
471  case kLINESTRING:
472  case kPOLYGON:
473  case kMULTIPOLYGON:
474  geo_string_buffer_->clear();
475  break;
476  default:
477  CHECK(false);
478  }
479  }
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:330
HOST DEVICE int get_size() const
Definition: sqltypes.h:339
Definition: sqltypes.h:49
std::vector< std::string > * string_buffer_
Definition: Importer.h:525
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:527
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:520
std::vector< float > * float_buffer_
Definition: Importer.h:523
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:329
std::vector< double > * double_buffer_
Definition: Importer.h:524
std::vector< int32_t > * int_buffer_
Definition: Importer.h:521
std::vector< ArrayDatum > * string_array_dict_buffer_
Definition: Importer.h:534
std::vector< uint8_t > * string_dict_i8_buffer_
Definition: Importer.h:531
std::vector< OptionalStringVector > * string_array_buffer_
Definition: Importer.h:528
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:522
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:518
Definition: sqltypes.h:52
Definition: sqltypes.h:53
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:337
std::vector< int32_t > * string_dict_i32_buffer_
Definition: Importer.h:533
Definition: sqltypes.h:41
#define IS_STRING(T)
Definition: sqltypes.h:250
const ColumnDescriptor * column_desc_
Definition: Importer.h:536
std::vector< uint16_t > * string_dict_i16_buffer_
Definition: Importer.h:532
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:519
#define CHECK(condition)
Definition: Logger.h:211
Definition: sqltypes.h:45
SQLTypeInfo columnType
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:526

+ Here is the call graph for this function:

template<typename DATA_TYPE >
size_t import_export::TypedImportBuffer::convert_arrow_val_to_import_buffer ( const ColumnDescriptor cd,
const arrow::Array &  array,
std::vector< DATA_TYPE > &  buffer,
const ArraySliceRange slice_range,
BadRowsTracker *const  bad_rows_tracker 
)

Referenced by add_arrow_values().

+ Here is the caller graph for this function:

template<typename DATA_TYPE >
size_t import_export::TypedImportBuffer::convert_arrow_val_to_import_buffer ( const ColumnDescriptor cd,
const Array array,
std::vector< DATA_TYPE > &  buffer,
const ArraySliceRange slice_range,
import_export::BadRowsTracker *const  bad_rows_tracker 
)

Definition at line 804 of file Importer.cpp.

References col_idx, anonymous_namespace{ArrowImporter.h}::error_context(), geo_string_buffer_, SQLTypeInfo::get_type(), import_export::Importer::getCatalog(), Geospatial::GeoTypesFactory::getGeoColumns(), Geospatial::GeoTypesFactory::getNullGeoColumns(), import_buffers, import_export::BadRowsTracker::importer, import_export::BadRowsTracker::mutex, import_export::BadRowsTracker::rows, import_export::Importer::set_geo_physical_import_buffer(), and anonymous_namespace{ArrowImporter.h}::value_getter().

809  {
810  auto data =
811  std::make_unique<DataBuffer<DATA_TYPE>>(cd, array, buffer, bad_rows_tracker);
812  auto f_value_getter = value_getter(array, cd, bad_rows_tracker);
813  std::function<void(const int64_t)> f_add_geo_phy_cols = [&](const int64_t row) {};
814  if (bad_rows_tracker && cd->columnType.is_geometry()) {
815  f_add_geo_phy_cols = [&](const int64_t row) {
816  // Populate physical columns (ref. DBHandler::load_table)
817  std::vector<double> coords, bounds;
818  std::vector<int> ring_sizes, poly_rings;
819  int render_group = 0;
820  SQLTypeInfo ti;
821  // replace any unexpected exception from getGeoColumns or other
822  // on this path with a GeoImportException so that we wont over
823  // push a null to the logical column...
824  try {
825  SQLTypeInfo import_ti{ti};
826  if (array.IsNull(row)) {
828  import_ti, coords, bounds, ring_sizes, poly_rings, false);
829  } else {
830  arrow_throw_if<GeoImportException>(
832  ti,
833  coords,
834  bounds,
835  ring_sizes,
836  poly_rings,
837  false),
838  error_context(cd, bad_rows_tracker) + "Invalid geometry");
839  arrow_throw_if<GeoImportException>(
840  cd->columnType.get_type() != ti.get_type(),
841  error_context(cd, bad_rows_tracker) + "Geometry type mismatch");
842  }
843  auto col_idx_workpad = col_idx; // what a pitfall!!
845  bad_rows_tracker->importer->getCatalog(),
846  cd,
848  col_idx_workpad,
849  coords,
850  bounds,
851  ring_sizes,
852  poly_rings,
853  render_group);
854  } catch (GeoImportException&) {
855  throw;
856  } catch (std::runtime_error& e) {
857  throw GeoImportException(e.what());
858  } catch (const std::exception& e) {
859  throw GeoImportException(e.what());
860  } catch (...) {
861  throw GeoImportException("unknown exception");
862  }
863  };
864  }
865  auto f_mark_a_bad_row = [&](const auto row) {
866  std::unique_lock<std::mutex> lck(bad_rows_tracker->mutex);
867  bad_rows_tracker->rows.insert(row - slice_range.first);
868  };
869  buffer.reserve(slice_range.second - slice_range.first);
870  for (size_t row = slice_range.first; row < slice_range.second; ++row) {
871  try {
872  *data << (array.IsNull(row) ? nullptr : f_value_getter(array, row));
873  f_add_geo_phy_cols(row);
874  } catch (GeoImportException&) {
875  f_mark_a_bad_row(row);
876  } catch (ArrowImporterException&) {
877  // trace bad rows of each column; otherwise rethrow.
878  if (bad_rows_tracker) {
879  *data << nullptr;
880  f_mark_a_bad_row(row);
881  } else {
882  throw;
883  }
884  }
885  }
886  return buffer.size();
887 }
auto value_getter(const arrow::Array &array, const ColumnDescriptor *cd, import_export::BadRowsTracker *const bad_rows_tracker)
static void getNullGeoColumns(SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: Types.cpp:1144
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:329
static void set_geo_physical_import_buffer(const Catalog_Namespace::Catalog &catalog, const ColumnDescriptor *cd, std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers, size_t &col_idx, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, int render_group)
Definition: Importer.cpp:1635
Catalog_Namespace::Catalog & getCatalog()
Definition: Importer.h:826
std::set< int64_t > rows
Definition: Importer.h:75
std::vector< std::unique_ptr< TypedImportBuffer > > * import_buffers
Definition: Importer.h:513
static bool getGeoColumns(const std::string &wkt_or_wkb_hex, SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: Types.cpp:937
std::string error_context(const ColumnDescriptor *cd, import_export::BadRowsTracker *const bad_rows_tracker)
Definition: ArrowImporter.h:76
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:526

+ Here is the call graph for this function:

template<typename DATA_TYPE >
auto import_export::TypedImportBuffer::del_values ( std::vector< DATA_TYPE > &  buffer,
BadRowsTracker *const  bad_rows_tracker 
)
auto import_export::TypedImportBuffer::del_values ( const SQLTypes  type,
BadRowsTracker *const  bad_rows_tracker 
)
std::vector< DataBlockPtr > import_export::TypedImportBuffer::get_data_block_pointers ( const std::vector< std::unique_ptr< TypedImportBuffer >> &  import_buffers)
static

Definition at line 3026 of file Importer.cpp.

References DataBlockPtr::arraysPtr, threading_serial::async(), CHECK, CHECK_EQ, getStringArrayBuffer(), getTypeInfo(), import_buffers, SQLTypeInfo::is_number(), IS_STRING, SQLTypeInfo::is_string(), kARRAY, kBOOLEAN, kENCODING_DICT, kENCODING_NONE, DataBlockPtr::numbersPtr, run_benchmark_import::result, and DataBlockPtr::stringsPtr.

Referenced by import_export::fill_missing_columns(), import_export::Loader::loadImpl(), and import_export::Loader::loadToShard().

3027  {
3028  std::vector<DataBlockPtr> result(import_buffers.size());
3029  std::vector<std::pair<const size_t, std::future<int8_t*>>>
3030  encoded_data_block_ptrs_futures;
3031  // make all async calls to string dictionary here and then continue execution
3032  for (size_t buf_idx = 0; buf_idx < import_buffers.size(); buf_idx++) {
3033  if (import_buffers[buf_idx]->getTypeInfo().is_string() &&
3034  import_buffers[buf_idx]->getTypeInfo().get_compression() != kENCODING_NONE) {
3035  auto string_payload_ptr = import_buffers[buf_idx]->getStringBuffer();
3036  CHECK_EQ(kENCODING_DICT, import_buffers[buf_idx]->getTypeInfo().get_compression());
3037 
3038  encoded_data_block_ptrs_futures.emplace_back(std::make_pair(
3039  buf_idx,
3040  std::async(std::launch::async, [buf_idx, &import_buffers, string_payload_ptr] {
3041  import_buffers[buf_idx]->addDictEncodedString(*string_payload_ptr);
3042  return import_buffers[buf_idx]->getStringDictBuffer();
3043  })));
3044  }
3045  }
3046 
3047  for (size_t buf_idx = 0; buf_idx < import_buffers.size(); buf_idx++) {
3048  DataBlockPtr p;
3049  if (import_buffers[buf_idx]->getTypeInfo().is_number() ||
3050  import_buffers[buf_idx]->getTypeInfo().is_time() ||
3051  import_buffers[buf_idx]->getTypeInfo().get_type() == kBOOLEAN) {
3052  p.numbersPtr = import_buffers[buf_idx]->getAsBytes();
3053  } else if (import_buffers[buf_idx]->getTypeInfo().is_string()) {
3054  auto string_payload_ptr = import_buffers[buf_idx]->getStringBuffer();
3055  if (import_buffers[buf_idx]->getTypeInfo().get_compression() == kENCODING_NONE) {
3056  p.stringsPtr = string_payload_ptr;
3057  } else {
3058  // This condition means we have column which is ENCODED string. We already made
3059  // Async request to gain the encoded integer values above so we should skip this
3060  // iteration and continue.
3061  continue;
3062  }
3063  } else if (import_buffers[buf_idx]->getTypeInfo().is_geometry()) {
3064  auto geo_payload_ptr = import_buffers[buf_idx]->getGeoStringBuffer();
3065  p.stringsPtr = geo_payload_ptr;
3066  } else {
3067  CHECK(import_buffers[buf_idx]->getTypeInfo().get_type() == kARRAY);
3068  if (IS_STRING(import_buffers[buf_idx]->getTypeInfo().get_subtype())) {
3069  CHECK(import_buffers[buf_idx]->getTypeInfo().get_compression() == kENCODING_DICT);
3070  import_buffers[buf_idx]->addDictEncodedStringArray(
3071  *import_buffers[buf_idx]->getStringArrayBuffer());
3072  p.arraysPtr = import_buffers[buf_idx]->getStringArrayDictBuffer();
3073  } else {
3074  p.arraysPtr = import_buffers[buf_idx]->getArrayBuffer();
3075  }
3076  }
3077  result[buf_idx] = p;
3078  }
3079 
3080  // wait for the async requests we made for string dictionary
3081  for (auto& encoded_ptr_future : encoded_data_block_ptrs_futures) {
3082  result[encoded_ptr_future.first].numbersPtr = encoded_ptr_future.second.get();
3083  }
3084  return result;
3085 }
#define CHECK_EQ(x, y)
Definition: Logger.h:219
const SQLTypeInfo & getTypeInfo() const
Definition: Importer.h:309
std::vector< std::string > * stringsPtr
Definition: sqltypes.h:227
std::vector< ArrayDatum > * arraysPtr
Definition: sqltypes.h:228
bool is_number() const
Definition: sqltypes.h:524
future< Result > async(Fn &&fn, Args &&...args)
std::vector< std::unique_ptr< TypedImportBuffer > > * import_buffers
Definition: Importer.h:513
#define IS_STRING(T)
Definition: sqltypes.h:250
#define CHECK(condition)
Definition: Logger.h:211
std::vector< OptionalStringVector > * getStringArrayBuffer() const
Definition: Importer.h:375
bool is_string() const
Definition: sqltypes.h:519
int8_t * numbersPtr
Definition: sqltypes.h:226

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector<ArrayDatum>* import_export::TypedImportBuffer::getArrayBuffer ( ) const
inline

Definition at line 373 of file Importer.h.

References array_buffer_.

373 { return array_buffer_; }
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:527
int8_t* import_export::TypedImportBuffer::getAsBytes ( ) const
inline

Definition at line 315 of file Importer.h.

References bigint_buffer_, bool_buffer_, column_desc_, ColumnDescriptor::columnType, double_buffer_, float_buffer_, SQLTypeInfo::get_type(), int_buffer_, kBIGINT, kBOOLEAN, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kNUMERIC, kSMALLINT, kTIME, kTIMESTAMP, kTINYINT, smallint_buffer_, and tinyint_buffer_.

Referenced by import_export::anonymous_namespace{Importer.cpp}::double_value_at(), import_export::anonymous_namespace{Importer.cpp}::float_value_at(), and import_export::anonymous_namespace{Importer.cpp}::int_value_at().

315  {
316  switch (column_desc_->columnType.get_type()) {
317  case kBOOLEAN:
318  return reinterpret_cast<int8_t*>(bool_buffer_->data());
319  case kTINYINT:
320  return reinterpret_cast<int8_t*>(tinyint_buffer_->data());
321  case kSMALLINT:
322  return reinterpret_cast<int8_t*>(smallint_buffer_->data());
323  case kINT:
324  return reinterpret_cast<int8_t*>(int_buffer_->data());
325  case kBIGINT:
326  case kNUMERIC:
327  case kDECIMAL:
328  return reinterpret_cast<int8_t*>(bigint_buffer_->data());
329  case kFLOAT:
330  return reinterpret_cast<int8_t*>(float_buffer_->data());
331  case kDOUBLE:
332  return reinterpret_cast<int8_t*>(double_buffer_->data());
333  case kDATE:
334  case kTIME:
335  case kTIMESTAMP:
336  return reinterpret_cast<int8_t*>(bigint_buffer_->data());
337  default:
338  abort();
339  }
340  }
Definition: sqltypes.h:49
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:520
std::vector< float > * float_buffer_
Definition: Importer.h:523
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:329
std::vector< double > * double_buffer_
Definition: Importer.h:524
std::vector< int32_t > * int_buffer_
Definition: Importer.h:521
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:522
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:518
Definition: sqltypes.h:53
const ColumnDescriptor * column_desc_
Definition: Importer.h:536
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:519
Definition: sqltypes.h:45
SQLTypeInfo columnType

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const ColumnDescriptor* import_export::TypedImportBuffer::getColumnDesc ( ) const
inline

Definition at line 311 of file Importer.h.

References column_desc_.

Referenced by addDictEncodedString(), foreign_storage::anonymous_namespace{InternalStorageStatsDataWrapper.cpp}::set_null(), and foreign_storage::anonymous_namespace{InternalMemoryStatsDataWrapper.cpp}::set_null().

311 { return column_desc_; }
const ColumnDescriptor * column_desc_
Definition: Importer.h:536

+ Here is the caller graph for this function:

size_t import_export::TypedImportBuffer::getElementSize ( ) const
inline

Definition at line 342 of file Importer.h.

References column_desc_, ColumnDescriptor::columnType, SQLTypeInfo::get_type(), kBIGINT, kBOOLEAN, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kNUMERIC, kSMALLINT, kTIME, kTIMESTAMP, and kTINYINT.

342  {
343  switch (column_desc_->columnType.get_type()) {
344  case kBOOLEAN:
345  return sizeof((*bool_buffer_)[0]);
346  case kTINYINT:
347  return sizeof((*tinyint_buffer_)[0]);
348  case kSMALLINT:
349  return sizeof((*smallint_buffer_)[0]);
350  case kINT:
351  return sizeof((*int_buffer_)[0]);
352  case kBIGINT:
353  case kNUMERIC:
354  case kDECIMAL:
355  return sizeof((*bigint_buffer_)[0]);
356  case kFLOAT:
357  return sizeof((*float_buffer_)[0]);
358  case kDOUBLE:
359  return sizeof((*double_buffer_)[0]);
360  case kDATE:
361  case kTIME:
362  case kTIMESTAMP:
363  return sizeof((*bigint_buffer_)[0]);
364  default:
365  abort();
366  }
367  }
Definition: sqltypes.h:49
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:329
Definition: sqltypes.h:53
const ColumnDescriptor * column_desc_
Definition: Importer.h:536
Definition: sqltypes.h:45
SQLTypeInfo columnType

+ Here is the call graph for this function:

std::vector<std::string>* import_export::TypedImportBuffer::getGeoStringBuffer ( ) const
inline

Definition at line 371 of file Importer.h.

References geo_string_buffer_.

371 { return geo_string_buffer_; }
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:526
std::vector<OptionalStringVector>* import_export::TypedImportBuffer::getStringArrayBuffer ( ) const
inline

Definition at line 375 of file Importer.h.

References string_array_buffer_.

Referenced by get_data_block_pointers().

375  {
376  return string_array_buffer_;
377  }
std::vector< OptionalStringVector > * string_array_buffer_
Definition: Importer.h:528

+ Here is the caller graph for this function:

std::vector<ArrayDatum>* import_export::TypedImportBuffer::getStringArrayDictBuffer ( ) const
inline

Definition at line 379 of file Importer.h.

References string_array_dict_buffer_.

379  {
381  }
std::vector< ArrayDatum > * string_array_dict_buffer_
Definition: Importer.h:534
std::vector<std::string>* import_export::TypedImportBuffer::getStringBuffer ( ) const
inline

Definition at line 369 of file Importer.h.

References string_buffer_.

369 { return string_buffer_; }
std::vector< std::string > * string_buffer_
Definition: Importer.h:525
int8_t* import_export::TypedImportBuffer::getStringDictBuffer ( ) const
inline

Definition at line 383 of file Importer.h.

References column_desc_, ColumnDescriptor::columnType, SQLTypeInfo::get_size(), string_dict_i16_buffer_, string_dict_i32_buffer_, and string_dict_i8_buffer_.

Referenced by import_export::anonymous_namespace{Importer.cpp}::int_value_at().

383  {
384  switch (column_desc_->columnType.get_size()) {
385  case 1:
386  return reinterpret_cast<int8_t*>(string_dict_i8_buffer_->data());
387  case 2:
388  return reinterpret_cast<int8_t*>(string_dict_i16_buffer_->data());
389  case 4:
390  return reinterpret_cast<int8_t*>(string_dict_i32_buffer_->data());
391  default:
392  abort();
393  }
394  }
HOST DEVICE int get_size() const
Definition: sqltypes.h:339
std::vector< uint8_t > * string_dict_i8_buffer_
Definition: Importer.h:531
std::vector< int32_t > * string_dict_i32_buffer_
Definition: Importer.h:533
const ColumnDescriptor * column_desc_
Definition: Importer.h:536
std::vector< uint16_t > * string_dict_i16_buffer_
Definition: Importer.h:532
SQLTypeInfo columnType

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

StringDictionary* import_export::TypedImportBuffer::getStringDictionary ( ) const
inline

Definition at line 313 of file Importer.h.

References string_dict_.

313 { return string_dict_; }
StringDictionary * string_dict_
Definition: Importer.h:537
const SQLTypeInfo& import_export::TypedImportBuffer::getTypeInfo ( ) const
inline

Definition at line 309 of file Importer.h.

References column_desc_, and ColumnDescriptor::columnType.

Referenced by import_export::anonymous_namespace{Importer.cpp}::double_value_at(), import_export::anonymous_namespace{Importer.cpp}::float_value_at(), get_data_block_pointers(), and import_export::anonymous_namespace{Importer.cpp}::int_value_at().

309 { return column_desc_->columnType; }
const ColumnDescriptor * column_desc_
Definition: Importer.h:536
SQLTypeInfo columnType

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::pop_value ( )

Definition at line 744 of file Importer.cpp.

References array_buffer_, bigint_buffer_, bool_buffer_, CHECK, column_desc_, ColumnDescriptor::columnType, decimal_to_int_type(), double_buffer_, float_buffer_, geo_string_buffer_, SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), int_buffer_, SQLTypeInfo::is_decimal(), IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, smallint_buffer_, string_array_buffer_, string_buffer_, tinyint_buffer_, and run_benchmark_import::type.

744  {
745  const auto type = column_desc_->columnType.is_decimal()
747  : column_desc_->columnType.get_type();
748  switch (type) {
749  case kBOOLEAN:
750  bool_buffer_->pop_back();
751  break;
752  case kTINYINT:
753  tinyint_buffer_->pop_back();
754  break;
755  case kSMALLINT:
756  smallint_buffer_->pop_back();
757  break;
758  case kINT:
759  int_buffer_->pop_back();
760  break;
761  case kBIGINT:
762  bigint_buffer_->pop_back();
763  break;
764  case kFLOAT:
765  float_buffer_->pop_back();
766  break;
767  case kDOUBLE:
768  double_buffer_->pop_back();
769  break;
770  case kTEXT:
771  case kVARCHAR:
772  case kCHAR:
773  string_buffer_->pop_back();
774  break;
775  case kDATE:
776  case kTIME:
777  case kTIMESTAMP:
778  bigint_buffer_->pop_back();
779  break;
780  case kARRAY:
782  string_array_buffer_->pop_back();
783  } else {
784  array_buffer_->pop_back();
785  }
786  break;
787  case kPOINT:
788  case kLINESTRING:
789  case kPOLYGON:
790  case kMULTIPOLYGON:
791  geo_string_buffer_->pop_back();
792  break;
793  default:
794  CHECK(false) << "TypedImportBuffer::pop_value() does not support type " << type;
795  }
796 }
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:330
Definition: sqltypes.h:49
std::vector< std::string > * string_buffer_
Definition: Importer.h:525
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:527
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:520
std::vector< float > * float_buffer_
Definition: Importer.h:523
std::vector< double > * double_buffer_
Definition: Importer.h:524
std::vector< int32_t > * int_buffer_
Definition: Importer.h:521
std::vector< OptionalStringVector > * string_array_buffer_
Definition: Importer.h:528
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:522
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:518
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:493
Definition: sqltypes.h:52
Definition: sqltypes.h:53
Definition: sqltypes.h:41
#define IS_STRING(T)
Definition: sqltypes.h:250
const ColumnDescriptor * column_desc_
Definition: Importer.h:536
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:519
#define CHECK(condition)
Definition: Logger.h:211
Definition: sqltypes.h:45
SQLTypeInfo columnType
bool is_decimal() const
Definition: sqltypes.h:522
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:526

+ Here is the call graph for this function:

bool import_export::TypedImportBuffer::stringDictCheckpoint ( )
inline

Definition at line 396 of file Importer.h.

References StringDictionary::checkpoint(), and string_dict_.

396  {
397  if (string_dict_ == nullptr) {
398  return true;
399  }
400  return string_dict_->checkpoint();
401  }
StringDictionary * string_dict_
Definition: Importer.h:537
bool checkpoint() noexcept

+ Here is the call graph for this function:

Member Data Documentation

union { ... }
union { ... }
std::vector<ArrayDatum>* import_export::TypedImportBuffer::array_buffer_
std::vector<int64_t>* import_export::TypedImportBuffer::bigint_buffer_
std::vector<int8_t>* import_export::TypedImportBuffer::bool_buffer_
size_t import_export::TypedImportBuffer::col_idx

Definition at line 514 of file Importer.h.

Referenced by convert_arrow_val_to_import_buffer().

std::vector<double>* import_export::TypedImportBuffer::double_buffer_
std::vector<float>* import_export::TypedImportBuffer::float_buffer_
std::vector<std::string>* import_export::TypedImportBuffer::geo_string_buffer_
std::vector<std::unique_ptr<TypedImportBuffer> >* import_export::TypedImportBuffer::import_buffers

Definition at line 513 of file Importer.h.

Referenced by convert_arrow_val_to_import_buffer(), and get_data_block_pointers().

std::vector<int32_t>* import_export::TypedImportBuffer::int_buffer_
std::vector<int16_t>* import_export::TypedImportBuffer::smallint_buffer_
std::vector<OptionalStringVector>* import_export::TypedImportBuffer::string_array_buffer_
std::vector<ArrayDatum>* import_export::TypedImportBuffer::string_array_dict_buffer_
std::vector<std::string>* import_export::TypedImportBuffer::string_buffer_
StringDictionary* import_export::TypedImportBuffer::string_dict_
private
std::vector<uint16_t>* import_export::TypedImportBuffer::string_dict_i16_buffer_
std::vector<int32_t>* import_export::TypedImportBuffer::string_dict_i32_buffer_
std::vector<uint8_t>* import_export::TypedImportBuffer::string_dict_i8_buffer_
std::vector<int8_t>* import_export::TypedImportBuffer::tinyint_buffer_

The documentation for this class was generated from the following files: