OmniSciDB  6686921089
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
import_export::TypedImportBuffer Class Reference

#include <Importer.h>

+ Inheritance diagram for import_export::TypedImportBuffer:
+ Collaboration diagram for import_export::TypedImportBuffer:

Public Types

using OptionalStringVector = std::optional< std::vector< std::string >>
 

Public Member Functions

 TypedImportBuffer (const ColumnDescriptor *col_desc, StringDictionary *string_dict)
 
 ~TypedImportBuffer ()
 
void addBoolean (const int8_t v)
 
void addTinyint (const int8_t v)
 
void addSmallint (const int16_t v)
 
void addInt (const int32_t v)
 
void addBigint (const int64_t v)
 
void addFloat (const float v)
 
void addDouble (const double v)
 
void addString (const std::string_view v)
 
void addGeoString (const std::string_view v)
 
void addArray (const ArrayDatum &v)
 
OptionalStringVectoraddStringArray ()
 
void addStringArray (const OptionalStringVector &arr)
 
void addDictEncodedString (const std::vector< std::string > &string_vec)
 
void addDictEncodedStringArray (const std::vector< OptionalStringVector > &string_array_vec)
 
const SQLTypeInfogetTypeInfo () const
 
const ColumnDescriptorgetColumnDesc () const
 
StringDictionarygetStringDictionary () const
 
int8_t * getAsBytes () const
 
size_t getElementSize () const
 
std::vector< std::string > * getStringBuffer () const
 
std::vector< std::string > * getGeoStringBuffer () const
 
std::vector< ArrayDatum > * getArrayBuffer () const
 
std::vector
< OptionalStringVector > * 
getStringArrayBuffer () const
 
std::vector< ArrayDatum > * getStringArrayDictBuffer () const
 
int8_t * getStringDictBuffer () const
 
bool stringDictCheckpoint ()
 
void clear ()
 
size_t add_values (const ColumnDescriptor *cd, const TColumn &data)
 
size_t add_arrow_values (const ColumnDescriptor *cd, const arrow::Array &data, const bool exact_type_match, const ArraySliceRange &slice_range, BadRowsTracker *bad_rows_tracker)
 
void add_value (const ColumnDescriptor *cd, const std::string_view val, const bool is_null, const CopyParams &copy_params)
 
void add_value (const ColumnDescriptor *cd, const TDatum &val, const bool is_null)
 
void addDefaultValues (const ColumnDescriptor *cd, size_t num_rows)
 
void pop_value ()
 
template<typename DATA_TYPE >
size_t convert_arrow_val_to_import_buffer (const ColumnDescriptor *cd, const arrow::Array &array, std::vector< DATA_TYPE > &buffer, const ArraySliceRange &slice_range, BadRowsTracker *const bad_rows_tracker)
 
template<typename DATA_TYPE >
auto del_values (std::vector< DATA_TYPE > &buffer, BadRowsTracker *const bad_rows_tracker)
 
auto del_values (const SQLTypes type, BadRowsTracker *const bad_rows_tracker)
 
template<typename DATA_TYPE >
size_t convert_arrow_val_to_import_buffer (const ColumnDescriptor *cd, const Array &array, std::vector< DATA_TYPE > &buffer, const ArraySliceRange &slice_range, import_export::BadRowsTracker *const bad_rows_tracker)
 

Static Public Member Functions

static std::vector< DataBlockPtrget_data_block_pointers (const std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers)
 

Public Attributes

std::vector< std::unique_ptr
< TypedImportBuffer > > * 
import_buffers
 
size_t col_idx
 
std::vector< int8_t > * bool_buffer_
 
std::vector< int8_t > * tinyint_buffer_
 
std::vector< int16_t > * smallint_buffer_
 
std::vector< int32_t > * int_buffer_
 
std::vector< int64_t > * bigint_buffer_
 
std::vector< float > * float_buffer_
 
std::vector< double > * double_buffer_
 
std::vector< std::string > * string_buffer_
 
std::vector< std::string > * geo_string_buffer_
 
std::vector< ArrayDatum > * array_buffer_
 
std::vector
< OptionalStringVector > * 
string_array_buffer_
 
std::vector< uint8_t > * string_dict_i8_buffer_
 
std::vector< uint16_t > * string_dict_i16_buffer_
 
std::vector< int32_t > * string_dict_i32_buffer_
 
std::vector< ArrayDatum > * string_array_dict_buffer_
 

Private Attributes

union {
   std::vector< int8_t > *   bool_buffer_
 
   std::vector< int8_t > *   tinyint_buffer_
 
   std::vector< int16_t > *   smallint_buffer_
 
   std::vector< int32_t > *   int_buffer_
 
   std::vector< int64_t > *   bigint_buffer_
 
   std::vector< float > *   float_buffer_
 
   std::vector< double > *   double_buffer_
 
   std::vector< std::string > *   string_buffer_
 
   std::vector< std::string > *   geo_string_buffer_
 
   std::vector< ArrayDatum > *   array_buffer_
 
   std::vector
< OptionalStringVector > *   string_array_buffer_
 
}; 
 
union {
   std::vector< uint8_t > *   string_dict_i8_buffer_
 
   std::vector< uint16_t > *   string_dict_i16_buffer_
 
   std::vector< int32_t > *   string_dict_i32_buffer_
 
   std::vector< ArrayDatum > *   string_array_dict_buffer_
 
}; 
 
const ColumnDescriptorcolumn_desc_
 
StringDictionarystring_dict_
 

Detailed Description

Definition at line 91 of file Importer.h.

Member Typedef Documentation

using import_export::TypedImportBuffer::OptionalStringVector = std::optional<std::vector<std::string>>

Definition at line 93 of file Importer.h.

Constructor & Destructor Documentation

import_export::TypedImportBuffer::TypedImportBuffer ( const ColumnDescriptor col_desc,
StringDictionary string_dict 
)
inline

Definition at line 94 of file Importer.h.

References array_buffer_, bigint_buffer_, bool_buffer_, CHECK, ColumnDescriptor::columnType, double_buffer_, float_buffer_, geo_string_buffer_, SQLTypeInfo::get_compression(), SQLTypeInfo::get_size(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), int_buffer_, IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kENCODING_DICT, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, smallint_buffer_, string_array_buffer_, string_array_dict_buffer_, string_buffer_, string_dict_i16_buffer_, string_dict_i32_buffer_, string_dict_i8_buffer_, and tinyint_buffer_.

95  : column_desc_(col_desc), string_dict_(string_dict) {
96  switch (col_desc->columnType.get_type()) {
97  case kBOOLEAN:
98  bool_buffer_ = new std::vector<int8_t>();
99  break;
100  case kTINYINT:
101  tinyint_buffer_ = new std::vector<int8_t>();
102  break;
103  case kSMALLINT:
104  smallint_buffer_ = new std::vector<int16_t>();
105  break;
106  case kINT:
107  int_buffer_ = new std::vector<int32_t>();
108  break;
109  case kBIGINT:
110  case kNUMERIC:
111  case kDECIMAL:
112  bigint_buffer_ = new std::vector<int64_t>();
113  break;
114  case kFLOAT:
115  float_buffer_ = new std::vector<float>();
116  break;
117  case kDOUBLE:
118  double_buffer_ = new std::vector<double>();
119  break;
120  case kTEXT:
121  case kVARCHAR:
122  case kCHAR:
123  string_buffer_ = new std::vector<std::string>();
124  if (col_desc->columnType.get_compression() == kENCODING_DICT) {
125  switch (col_desc->columnType.get_size()) {
126  case 1:
127  string_dict_i8_buffer_ = new std::vector<uint8_t>();
128  break;
129  case 2:
130  string_dict_i16_buffer_ = new std::vector<uint16_t>();
131  break;
132  case 4:
133  string_dict_i32_buffer_ = new std::vector<int32_t>();
134  break;
135  default:
136  CHECK(false);
137  }
138  }
139  break;
140  case kDATE:
141  case kTIME:
142  case kTIMESTAMP:
143  bigint_buffer_ = new std::vector<int64_t>();
144  break;
145  case kARRAY:
146  if (IS_STRING(col_desc->columnType.get_subtype())) {
148  string_array_buffer_ = new std::vector<OptionalStringVector>();
149  string_array_dict_buffer_ = new std::vector<ArrayDatum>();
150  } else {
151  array_buffer_ = new std::vector<ArrayDatum>();
152  }
153  break;
154  case kPOINT:
155  case kLINESTRING:
156  case kPOLYGON:
157  case kMULTIPOLYGON:
158  geo_string_buffer_ = new std::vector<std::string>();
159  break;
160  default:
161  CHECK(false);
162  }
163  }
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:330
HOST DEVICE int get_size() const
Definition: sqltypes.h:339
Definition: sqltypes.h:49
std::vector< std::string > * string_buffer_
Definition: Importer.h:527
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:529
StringDictionary * string_dict_
Definition: Importer.h:539
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:522
std::vector< float > * float_buffer_
Definition: Importer.h:525
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:329
std::vector< double > * double_buffer_
Definition: Importer.h:526
std::vector< int32_t > * int_buffer_
Definition: Importer.h:523
std::vector< ArrayDatum > * string_array_dict_buffer_
Definition: Importer.h:536
std::vector< uint8_t > * string_dict_i8_buffer_
Definition: Importer.h:533
std::vector< OptionalStringVector > * string_array_buffer_
Definition: Importer.h:530
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:524
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:520
Definition: sqltypes.h:52
Definition: sqltypes.h:53
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:337
std::vector< int32_t > * string_dict_i32_buffer_
Definition: Importer.h:535
Definition: sqltypes.h:41
#define IS_STRING(T)
Definition: sqltypes.h:250
const ColumnDescriptor * column_desc_
Definition: Importer.h:538
std::vector< uint16_t > * string_dict_i16_buffer_
Definition: Importer.h:534
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:521
#define CHECK(condition)
Definition: Logger.h:209
Definition: sqltypes.h:45
SQLTypeInfo columnType
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:528

+ Here is the call graph for this function:

import_export::TypedImportBuffer::~TypedImportBuffer ( )
inline

Definition at line 165 of file Importer.h.

References array_buffer_, bigint_buffer_, bool_buffer_, CHECK, column_desc_, ColumnDescriptor::columnType, double_buffer_, float_buffer_, geo_string_buffer_, SQLTypeInfo::get_compression(), SQLTypeInfo::get_size(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), int_buffer_, IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kENCODING_DICT, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, smallint_buffer_, string_array_buffer_, string_array_dict_buffer_, string_buffer_, string_dict_i16_buffer_, string_dict_i32_buffer_, string_dict_i8_buffer_, and tinyint_buffer_.

165  {
166  switch (column_desc_->columnType.get_type()) {
167  case kBOOLEAN:
168  delete bool_buffer_;
169  break;
170  case kTINYINT:
171  delete tinyint_buffer_;
172  break;
173  case kSMALLINT:
174  delete smallint_buffer_;
175  break;
176  case kINT:
177  delete int_buffer_;
178  break;
179  case kBIGINT:
180  case kNUMERIC:
181  case kDECIMAL:
182  delete bigint_buffer_;
183  break;
184  case kFLOAT:
185  delete float_buffer_;
186  break;
187  case kDOUBLE:
188  delete double_buffer_;
189  break;
190  case kTEXT:
191  case kVARCHAR:
192  case kCHAR:
193  delete string_buffer_;
195  switch (column_desc_->columnType.get_size()) {
196  case 1:
197  delete string_dict_i8_buffer_;
198  break;
199  case 2:
201  break;
202  case 4:
204  break;
205  }
206  }
207  break;
208  case kDATE:
209  case kTIME:
210  case kTIMESTAMP:
211  delete bigint_buffer_;
212  break;
213  case kARRAY:
215  delete string_array_buffer_;
217  } else {
218  delete array_buffer_;
219  }
220  break;
221  case kPOINT:
222  case kLINESTRING:
223  case kPOLYGON:
224  case kMULTIPOLYGON:
225  delete geo_string_buffer_;
226  break;
227  default:
228  CHECK(false);
229  }
230  }
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:330
HOST DEVICE int get_size() const
Definition: sqltypes.h:339
Definition: sqltypes.h:49
std::vector< std::string > * string_buffer_
Definition: Importer.h:527
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:529
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:522
std::vector< float > * float_buffer_
Definition: Importer.h:525
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:329
std::vector< double > * double_buffer_
Definition: Importer.h:526
std::vector< int32_t > * int_buffer_
Definition: Importer.h:523
std::vector< ArrayDatum > * string_array_dict_buffer_
Definition: Importer.h:536
std::vector< uint8_t > * string_dict_i8_buffer_
Definition: Importer.h:533
std::vector< OptionalStringVector > * string_array_buffer_
Definition: Importer.h:530
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:524
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:520
Definition: sqltypes.h:52
Definition: sqltypes.h:53
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:337
std::vector< int32_t > * string_dict_i32_buffer_
Definition: Importer.h:535
Definition: sqltypes.h:41
#define IS_STRING(T)
Definition: sqltypes.h:250
const ColumnDescriptor * column_desc_
Definition: Importer.h:538
std::vector< uint16_t > * string_dict_i16_buffer_
Definition: Importer.h:534
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:521
#define CHECK(condition)
Definition: Logger.h:209
Definition: sqltypes.h:45
SQLTypeInfo columnType
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:528

+ Here is the call graph for this function:

Member Function Documentation

size_t import_export::TypedImportBuffer::add_arrow_values ( const ColumnDescriptor cd,
const arrow::Array &  data,
const bool  exact_type_match,
const ArraySliceRange slice_range,
BadRowsTracker bad_rows_tracker 
)

Definition at line 884 of file Importer.cpp.

References arrow_throw_if(), bigint_buffer_, bool_buffer_, ColumnDescriptor::columnName, ColumnDescriptor::columnType, convert_arrow_val_to_import_buffer(), DOUBLE, double_buffer_, FLOAT, float_buffer_, geo_string_buffer_, SQLTypeInfo::get_notnull(), SQLTypeInfo::get_type(), int_buffer_, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, smallint_buffer_, STRING, string_buffer_, TIMESTAMP, tinyint_buffer_, and run_benchmark_import::type.

888  {
889  const auto type = cd->columnType.get_type();
890  if (cd->columnType.get_notnull()) {
891  // We can't have any null values for this column; to have them is an error
892  arrow_throw_if(col.null_count() > 0, "NULL not allowed for column " + cd->columnName);
893  }
894 
895  switch (type) {
896  case kBOOLEAN:
897  if (exact_type_match) {
898  arrow_throw_if(col.type_id() != Type::BOOL, "Expected boolean type");
899  }
901  cd, col, *bool_buffer_, slice_range, bad_rows_tracker);
902  case kTINYINT:
903  if (exact_type_match) {
904  arrow_throw_if(col.type_id() != Type::INT8, "Expected int8 type");
905  }
907  cd, col, *tinyint_buffer_, slice_range, bad_rows_tracker);
908  case kSMALLINT:
909  if (exact_type_match) {
910  arrow_throw_if(col.type_id() != Type::INT16, "Expected int16 type");
911  }
913  cd, col, *smallint_buffer_, slice_range, bad_rows_tracker);
914  case kINT:
915  if (exact_type_match) {
916  arrow_throw_if(col.type_id() != Type::INT32, "Expected int32 type");
917  }
919  cd, col, *int_buffer_, slice_range, bad_rows_tracker);
920  case kBIGINT:
921  case kNUMERIC:
922  case kDECIMAL:
923  if (exact_type_match) {
924  arrow_throw_if(col.type_id() != Type::INT64, "Expected int64 type");
925  }
927  cd, col, *bigint_buffer_, slice_range, bad_rows_tracker);
928  case kFLOAT:
929  if (exact_type_match) {
930  arrow_throw_if(col.type_id() != Type::FLOAT, "Expected float type");
931  }
933  cd, col, *float_buffer_, slice_range, bad_rows_tracker);
934  case kDOUBLE:
935  if (exact_type_match) {
936  arrow_throw_if(col.type_id() != Type::DOUBLE, "Expected double type");
937  }
939  cd, col, *double_buffer_, slice_range, bad_rows_tracker);
940  case kTEXT:
941  case kVARCHAR:
942  case kCHAR:
943  if (exact_type_match) {
944  arrow_throw_if(col.type_id() != Type::BINARY && col.type_id() != Type::STRING,
945  "Expected string type");
946  }
948  cd, col, *string_buffer_, slice_range, bad_rows_tracker);
949  case kTIME:
950  if (exact_type_match) {
951  arrow_throw_if(col.type_id() != Type::TIME32 && col.type_id() != Type::TIME64,
952  "Expected time32 or time64 type");
953  }
955  cd, col, *bigint_buffer_, slice_range, bad_rows_tracker);
956  case kTIMESTAMP:
957  if (exact_type_match) {
958  arrow_throw_if(col.type_id() != Type::TIMESTAMP, "Expected timestamp type");
959  }
961  cd, col, *bigint_buffer_, slice_range, bad_rows_tracker);
962  case kDATE:
963  if (exact_type_match) {
964  arrow_throw_if(col.type_id() != Type::DATE32 && col.type_id() != Type::DATE64,
965  "Expected date32 or date64 type");
966  }
968  cd, col, *bigint_buffer_, slice_range, bad_rows_tracker);
969  case kPOINT:
970  case kLINESTRING:
971  case kPOLYGON:
972  case kMULTIPOLYGON:
973  arrow_throw_if(col.type_id() != Type::BINARY && col.type_id() != Type::STRING,
974  "Expected string type");
976  cd, col, *geo_string_buffer_, slice_range, bad_rows_tracker);
977  case kARRAY:
978  throw std::runtime_error("Arrow array appends not yet supported");
979  default:
980  throw std::runtime_error("Invalid Type");
981  }
982 }
Definition: sqltypes.h:49
std::vector< std::string > * string_buffer_
Definition: Importer.h:527
#define DOUBLE
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:522
std::vector< float > * float_buffer_
Definition: Importer.h:525
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:329
std::vector< double > * double_buffer_
Definition: Importer.h:526
std::vector< int32_t > * int_buffer_
Definition: Importer.h:523
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:524
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:520
Definition: sqltypes.h:52
Definition: sqltypes.h:53
#define TIMESTAMP
Definition: sqltypes.h:41
#define STRING
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:521
size_t convert_arrow_val_to_import_buffer(const ColumnDescriptor *cd, const arrow::Array &array, std::vector< DATA_TYPE > &buffer, const ArraySliceRange &slice_range, BadRowsTracker *const bad_rows_tracker)
Definition: sqltypes.h:45
SQLTypeInfo columnType
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:336
#define FLOAT
std::string columnName
void arrow_throw_if(const bool cond, const std::string &message)
Definition: ArrowImporter.h:41
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:528

+ Here is the call graph for this function:

void import_export::TypedImportBuffer::add_value ( const ColumnDescriptor cd,
const std::string_view  val,
const bool  is_null,
const CopyParams copy_params 
)

Definition at line 541 of file Importer.cpp.

References addArray(), addBigint(), addBoolean(), addDouble(), addFloat(), addGeoString(), addInt(), addSmallint(), addString(), addStringArray(), addTinyint(), Datum::bigintval, Datum::boolval, CHECK, ColumnDescriptor::columnName, ColumnDescriptor::columnType, SQLTypeInfo::get_elem_type(), SQLTypeInfo::get_notnull(), SQLTypeInfo::get_size(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), inline_fixed_encoding_null_val(), Datum::intval, IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, StringDictionary::MAX_STRLEN, NULL_DOUBLE, NULL_FLOAT, import_export::NullArray(), import_export::delimited_parser::parse_string_array(), Datum::smallintval, import_export::StringToArray(), StringToDatum(), Datum::tinyintval, to_string(), and run_benchmark_import::type.

544  {
545  const auto type = cd->columnType.get_type();
546  switch (type) {
547  case kBOOLEAN: {
548  if (is_null) {
549  if (cd->columnType.get_notnull()) {
550  throw std::runtime_error("NULL for column " + cd->columnName);
551  }
553  } else {
554  auto ti = cd->columnType;
555  Datum d = StringToDatum(val, ti);
556  addBoolean(static_cast<int8_t>(d.boolval));
557  }
558  break;
559  }
560  case kTINYINT: {
561  if (!is_null && (isdigit(val[0]) || val[0] == '-')) {
562  auto ti = cd->columnType;
563  Datum d = StringToDatum(val, ti);
565  } else {
566  if (cd->columnType.get_notnull()) {
567  throw std::runtime_error("NULL for column " + cd->columnName);
568  }
570  }
571  break;
572  }
573  case kSMALLINT: {
574  if (!is_null && (isdigit(val[0]) || val[0] == '-')) {
575  auto ti = cd->columnType;
576  Datum d = StringToDatum(val, ti);
578  } else {
579  if (cd->columnType.get_notnull()) {
580  throw std::runtime_error("NULL for column " + cd->columnName);
581  }
583  }
584  break;
585  }
586  case kINT: {
587  if (!is_null && (isdigit(val[0]) || val[0] == '-')) {
588  auto ti = cd->columnType;
589  Datum d = StringToDatum(val, ti);
590  addInt(d.intval);
591  } else {
592  if (cd->columnType.get_notnull()) {
593  throw std::runtime_error("NULL for column " + cd->columnName);
594  }
596  }
597  break;
598  }
599  case kBIGINT: {
600  if (!is_null && (isdigit(val[0]) || val[0] == '-')) {
601  auto ti = cd->columnType;
602  Datum d = StringToDatum(val, ti);
603  addBigint(d.bigintval);
604  } else {
605  if (cd->columnType.get_notnull()) {
606  throw std::runtime_error("NULL for column " + cd->columnName);
607  }
609  }
610  break;
611  }
612  case kDECIMAL:
613  case kNUMERIC: {
614  if (!is_null) {
615  auto ti = cd->columnType;
616  Datum d = StringToDatum(val, ti);
617  addBigint(d.bigintval);
618  } else {
619  if (cd->columnType.get_notnull()) {
620  throw std::runtime_error("NULL for column " + cd->columnName);
621  }
623  }
624  break;
625  }
626  case kFLOAT:
627  if (!is_null && (val[0] == '.' || isdigit(val[0]) || val[0] == '-')) {
628  addFloat(static_cast<float>(std::atof(std::string(val).c_str())));
629  } else {
630  if (cd->columnType.get_notnull()) {
631  throw std::runtime_error("NULL for column " + cd->columnName);
632  }
634  }
635  break;
636  case kDOUBLE:
637  if (!is_null && (val[0] == '.' || isdigit(val[0]) || val[0] == '-')) {
638  addDouble(std::atof(std::string(val).c_str()));
639  } else {
640  if (cd->columnType.get_notnull()) {
641  throw std::runtime_error("NULL for column " + cd->columnName);
642  }
644  }
645  break;
646  case kTEXT:
647  case kVARCHAR:
648  case kCHAR: {
649  // @TODO(wei) for now, use empty string for nulls
650  if (is_null) {
651  if (cd->columnType.get_notnull()) {
652  throw std::runtime_error("NULL for column " + cd->columnName);
653  }
654  addString(std::string());
655  } else {
656  if (val.length() > StringDictionary::MAX_STRLEN) {
657  throw std::runtime_error("String too long for column " + cd->columnName +
658  " was " + std::to_string(val.length()) + " max is " +
660  }
661  addString(val);
662  }
663  break;
664  }
665  case kTIME:
666  case kTIMESTAMP:
667  case kDATE:
668  if (!is_null && (isdigit(val[0]) || val[0] == '-')) {
669  SQLTypeInfo ti = cd->columnType;
670  Datum d = StringToDatum(val, ti);
671  addBigint(d.bigintval);
672  } else {
673  if (cd->columnType.get_notnull()) {
674  throw std::runtime_error("NULL for column " + cd->columnName);
675  }
677  }
678  break;
679  case kARRAY: {
680  if (is_null && cd->columnType.get_notnull()) {
681  throw std::runtime_error("NULL for column " + cd->columnName);
682  }
683  SQLTypeInfo ti = cd->columnType;
684  if (IS_STRING(ti.get_subtype())) {
685  std::vector<std::string> string_vec;
686  // Just parse string array, don't push it to buffer yet as we might throw
688  std::string(val), copy_params, string_vec);
689  if (!is_null) {
690  if (ti.get_size() > 0) {
691  auto sti = ti.get_elem_type();
692  size_t expected_size = ti.get_size() / sti.get_size();
693  size_t actual_size = string_vec.size();
694  if (actual_size != expected_size) {
695  throw std::runtime_error("Fixed length array column " + cd->columnName +
696  " expects " + std::to_string(expected_size) +
697  " values, received " +
698  std::to_string(actual_size));
699  }
700  }
701  addStringArray(string_vec);
702  } else {
703  if (ti.get_size() > 0) {
704  // TODO: remove once NULL fixlen arrays are allowed
705  throw std::runtime_error("Fixed length array column " + cd->columnName +
706  " currently cannot accept NULL arrays");
707  }
708  addStringArray(std::nullopt);
709  }
710  } else {
711  if (!is_null) {
712  ArrayDatum d = StringToArray(std::string(val), ti, copy_params);
713  if (d.is_null) { // val could be "NULL"
714  addArray(NullArray(ti));
715  } else {
716  if (ti.get_size() > 0 && static_cast<size_t>(ti.get_size()) != d.length) {
717  throw std::runtime_error("Fixed length array for column " + cd->columnName +
718  " has incorrect length: " + std::string(val));
719  }
720  addArray(d);
721  }
722  } else {
723  addArray(NullArray(ti));
724  }
725  }
726  break;
727  }
728  case kPOINT:
729  case kLINESTRING:
730  case kPOLYGON:
731  case kMULTIPOLYGON:
732  addGeoString(val);
733  break;
734  default:
735  CHECK(false) << "TypedImportBuffer::add_value() does not support type " << type;
736  }
737 }
int8_t tinyintval
Definition: sqltypes.h:212
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:330
#define NULL_DOUBLE
HOST DEVICE int get_size() const
Definition: sqltypes.h:339
void addBigint(const int64_t v)
Definition: Importer.h:240
OptionalStringVector & addStringArray()
Definition: Importer.h:252
void addSmallint(const int16_t v)
Definition: Importer.h:236
Definition: sqltypes.h:49
#define NULL_FLOAT
void addString(const std::string_view v)
Definition: Importer.h:246
void addDouble(const double v)
Definition: Importer.h:244
ArrayDatum NullArray(const SQLTypeInfo &ti)
Definition: Importer.cpp:388
int8_t boolval
Definition: sqltypes.h:211
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:329
int32_t intval
Definition: sqltypes.h:214
std::string to_string(char const *&&v)
void addFloat(const float v)
Definition: Importer.h:242
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:208
void addGeoString(const std::string_view v)
Definition: Importer.h:248
CONSTEXPR DEVICE bool is_null(const T &value)
void addBoolean(const int8_t v)
Definition: Importer.h:232
void addTinyint(const int8_t v)
Definition: Importer.h:234
int64_t bigintval
Definition: sqltypes.h:215
void addInt(const int32_t v)
Definition: Importer.h:238
int16_t smallintval
Definition: sqltypes.h:213
Datum StringToDatum(std::string_view s, SQLTypeInfo &ti)
Definition: Datum.cpp:275
Definition: sqltypes.h:52
Definition: sqltypes.h:53
ArrayDatum StringToArray(const std::string &s, const SQLTypeInfo &ti, const CopyParams &copy_params)
Definition: Importer.cpp:335
void addArray(const ArrayDatum &v)
Definition: Importer.h:250
Definition: sqltypes.h:41
#define IS_STRING(T)
Definition: sqltypes.h:250
#define CHECK(condition)
Definition: Logger.h:209
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
static constexpr size_t MAX_STRLEN
Definition: sqltypes.h:45
SQLTypeInfo columnType
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:336
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:850
std::string columnName
void parse_string_array(const std::string &s, const import_export::CopyParams &copy_params, std::vector< std::string > &string_vec)
Parses given string array and inserts into given vector of strings.

+ Here is the call graph for this function:

void import_export::TypedImportBuffer::add_value ( const ColumnDescriptor cd,
const TDatum &  val,
const bool  is_null 
)

Definition at line 1314 of file Importer.cpp.

References addArray(), addBigint(), import_export::addBinaryStringArray(), addBoolean(), addDouble(), addFloat(), addGeoString(), addInt(), addSmallint(), addString(), addStringArray(), addTinyint(), CHECK, ColumnDescriptor::columnName, ColumnDescriptor::columnType, decimal_to_int_type(), SQLTypeInfo::get_notnull(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), inline_fixed_encoding_null_val(), SQLTypeInfo::is_decimal(), IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, NULL_DOUBLE, NULL_FLOAT, import_export::NullArray(), import_export::TDatumToArrayDatum(), and run_benchmark_import::type.

1316  {
1317  const auto type = cd->columnType.is_decimal() ? decimal_to_int_type(cd->columnType)
1318  : cd->columnType.get_type();
1319  switch (type) {
1320  case kBOOLEAN: {
1321  if (is_null) {
1322  if (cd->columnType.get_notnull()) {
1323  throw std::runtime_error("NULL for column " + cd->columnName);
1324  }
1326  } else {
1327  addBoolean((int8_t)datum.val.int_val);
1328  }
1329  break;
1330  }
1331  case kTINYINT:
1332  if (!is_null) {
1333  addTinyint((int8_t)datum.val.int_val);
1334  } else {
1335  if (cd->columnType.get_notnull()) {
1336  throw std::runtime_error("NULL for column " + cd->columnName);
1337  }
1339  }
1340  break;
1341  case kSMALLINT:
1342  if (!is_null) {
1343  addSmallint((int16_t)datum.val.int_val);
1344  } else {
1345  if (cd->columnType.get_notnull()) {
1346  throw std::runtime_error("NULL for column " + cd->columnName);
1347  }
1349  }
1350  break;
1351  case kINT:
1352  if (!is_null) {
1353  addInt((int32_t)datum.val.int_val);
1354  } else {
1355  if (cd->columnType.get_notnull()) {
1356  throw std::runtime_error("NULL for column " + cd->columnName);
1357  }
1359  }
1360  break;
1361  case kBIGINT:
1362  if (!is_null) {
1363  addBigint(datum.val.int_val);
1364  } else {
1365  if (cd->columnType.get_notnull()) {
1366  throw std::runtime_error("NULL for column " + cd->columnName);
1367  }
1369  }
1370  break;
1371  case kFLOAT:
1372  if (!is_null) {
1373  addFloat((float)datum.val.real_val);
1374  } else {
1375  if (cd->columnType.get_notnull()) {
1376  throw std::runtime_error("NULL for column " + cd->columnName);
1377  }
1379  }
1380  break;
1381  case kDOUBLE:
1382  if (!is_null) {
1383  addDouble(datum.val.real_val);
1384  } else {
1385  if (cd->columnType.get_notnull()) {
1386  throw std::runtime_error("NULL for column " + cd->columnName);
1387  }
1389  }
1390  break;
1391  case kTEXT:
1392  case kVARCHAR:
1393  case kCHAR: {
1394  // @TODO(wei) for now, use empty string for nulls
1395  if (is_null) {
1396  if (cd->columnType.get_notnull()) {
1397  throw std::runtime_error("NULL for column " + cd->columnName);
1398  }
1399  addString(std::string());
1400  } else {
1401  addString(datum.val.str_val);
1402  }
1403  break;
1404  }
1405  case kTIME:
1406  case kTIMESTAMP:
1407  case kDATE: {
1408  if (!is_null) {
1409  addBigint(datum.val.int_val);
1410  } else {
1411  if (cd->columnType.get_notnull()) {
1412  throw std::runtime_error("NULL for column " + cd->columnName);
1413  }
1415  }
1416  break;
1417  }
1418  case kARRAY:
1419  if (is_null && cd->columnType.get_notnull()) {
1420  throw std::runtime_error("NULL for column " + cd->columnName);
1421  }
1422  if (IS_STRING(cd->columnType.get_subtype())) {
1423  OptionalStringVector& string_vec = addStringArray();
1424  addBinaryStringArray(datum, *string_vec);
1425  } else {
1426  if (!is_null) {
1427  addArray(TDatumToArrayDatum(datum, cd->columnType));
1428  } else {
1430  }
1431  }
1432  break;
1433  case kPOINT:
1434  case kLINESTRING:
1435  case kPOLYGON:
1436  case kMULTIPOLYGON:
1437  if (is_null) {
1438  if (cd->columnType.get_notnull()) {
1439  throw std::runtime_error("NULL for column " + cd->columnName);
1440  }
1441  addGeoString(std::string());
1442  } else {
1443  addGeoString(datum.val.str_val);
1444  }
1445  break;
1446  default:
1447  CHECK(false) << "TypedImportBuffer::add_value() does not support type " << type;
1448  }
1449 }
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:330
#define NULL_DOUBLE
void addBigint(const int64_t v)
Definition: Importer.h:240
OptionalStringVector & addStringArray()
Definition: Importer.h:252
void addSmallint(const int16_t v)
Definition: Importer.h:236
Definition: sqltypes.h:49
#define NULL_FLOAT
void addString(const std::string_view v)
Definition: Importer.h:246
std::optional< std::vector< std::string >> OptionalStringVector
Definition: Importer.h:93
void addDouble(const double v)
Definition: Importer.h:244
ArrayDatum NullArray(const SQLTypeInfo &ti)
Definition: Importer.cpp:388
void addFloat(const float v)
Definition: Importer.h:242
void addGeoString(const std::string_view v)
Definition: Importer.h:248
ArrayDatum TDatumToArrayDatum(const TDatum &datum, const SQLTypeInfo &ti)
Definition: Importer.cpp:482
CONSTEXPR DEVICE bool is_null(const T &value)
void addBoolean(const int8_t v)
Definition: Importer.h:232
void addTinyint(const int8_t v)
Definition: Importer.h:234
void addInt(const int32_t v)
Definition: Importer.h:238
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:455
Definition: sqltypes.h:52
Definition: sqltypes.h:53
void addArray(const ArrayDatum &v)
Definition: Importer.h:250
Definition: sqltypes.h:41
#define IS_STRING(T)
Definition: sqltypes.h:250
#define CHECK(condition)
Definition: Logger.h:209
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
Definition: sqltypes.h:45
SQLTypeInfo columnType
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:336
bool is_decimal() const
Definition: sqltypes.h:512
std::string columnName
void addBinaryStringArray(const TDatum &datum, std::vector< std::string > &string_vec)
Definition: Importer.cpp:430

+ Here is the call graph for this function:

size_t import_export::TypedImportBuffer::add_values ( const ColumnDescriptor cd,
const TColumn &  data 
)

Definition at line 985 of file Importer.cpp.

References addArray(), addStringArray(), bigint_buffer_, bool_buffer_, checked_malloc(), ColumnDescriptor::columnName, ColumnDescriptor::columnType, double_buffer_, float_buffer_, geo_string_buffer_, SQLTypeInfo::get_elem_type(), SQLTypeInfo::get_notnull(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), i, inline_fixed_encoding_null_val(), int_buffer_, IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, NULL_DOUBLE, NULL_FLOAT, import_export::NullArray(), smallint_buffer_, string_buffer_, and tinyint_buffer_.

985  {
986  size_t dataSize = 0;
987  if (cd->columnType.get_notnull()) {
988  // We can't have any null values for this column; to have them is an error
989  if (std::any_of(col.nulls.begin(), col.nulls.end(), [](int i) { return i != 0; })) {
990  throw std::runtime_error("NULL for column " + cd->columnName);
991  }
992  }
993 
994  switch (cd->columnType.get_type()) {
995  case kBOOLEAN: {
996  dataSize = col.data.int_col.size();
997  bool_buffer_->reserve(dataSize);
998  for (size_t i = 0; i < dataSize; i++) {
999  if (col.nulls[i]) {
1001  } else {
1002  bool_buffer_->push_back((int8_t)col.data.int_col[i]);
1003  }
1004  }
1005  break;
1006  }
1007  case kTINYINT: {
1008  dataSize = col.data.int_col.size();
1009  tinyint_buffer_->reserve(dataSize);
1010  for (size_t i = 0; i < dataSize; i++) {
1011  if (col.nulls[i]) {
1013  } else {
1014  tinyint_buffer_->push_back((int8_t)col.data.int_col[i]);
1015  }
1016  }
1017  break;
1018  }
1019  case kSMALLINT: {
1020  dataSize = col.data.int_col.size();
1021  smallint_buffer_->reserve(dataSize);
1022  for (size_t i = 0; i < dataSize; i++) {
1023  if (col.nulls[i]) {
1025  } else {
1026  smallint_buffer_->push_back((int16_t)col.data.int_col[i]);
1027  }
1028  }
1029  break;
1030  }
1031  case kINT: {
1032  dataSize = col.data.int_col.size();
1033  int_buffer_->reserve(dataSize);
1034  for (size_t i = 0; i < dataSize; i++) {
1035  if (col.nulls[i]) {
1037  } else {
1038  int_buffer_->push_back((int32_t)col.data.int_col[i]);
1039  }
1040  }
1041  break;
1042  }
1043  case kBIGINT:
1044  case kNUMERIC:
1045  case kDECIMAL: {
1046  dataSize = col.data.int_col.size();
1047  bigint_buffer_->reserve(dataSize);
1048  for (size_t i = 0; i < dataSize; i++) {
1049  if (col.nulls[i]) {
1051  } else {
1052  bigint_buffer_->push_back((int64_t)col.data.int_col[i]);
1053  }
1054  }
1055  break;
1056  }
1057  case kFLOAT: {
1058  dataSize = col.data.real_col.size();
1059  float_buffer_->reserve(dataSize);
1060  for (size_t i = 0; i < dataSize; i++) {
1061  if (col.nulls[i]) {
1062  float_buffer_->push_back(NULL_FLOAT);
1063  } else {
1064  float_buffer_->push_back((float)col.data.real_col[i]);
1065  }
1066  }
1067  break;
1068  }
1069  case kDOUBLE: {
1070  dataSize = col.data.real_col.size();
1071  double_buffer_->reserve(dataSize);
1072  for (size_t i = 0; i < dataSize; i++) {
1073  if (col.nulls[i]) {
1074  double_buffer_->push_back(NULL_DOUBLE);
1075  } else {
1076  double_buffer_->push_back((double)col.data.real_col[i]);
1077  }
1078  }
1079  break;
1080  }
1081  case kTEXT:
1082  case kVARCHAR:
1083  case kCHAR: {
1084  // TODO: for now, use empty string for nulls
1085  dataSize = col.data.str_col.size();
1086  string_buffer_->reserve(dataSize);
1087  for (size_t i = 0; i < dataSize; i++) {
1088  if (col.nulls[i]) {
1089  string_buffer_->push_back(std::string());
1090  } else {
1091  string_buffer_->push_back(col.data.str_col[i]);
1092  }
1093  }
1094  break;
1095  }
1096  case kTIME:
1097  case kTIMESTAMP:
1098  case kDATE: {
1099  dataSize = col.data.int_col.size();
1100  bigint_buffer_->reserve(dataSize);
1101  for (size_t i = 0; i < dataSize; i++) {
1102  if (col.nulls[i]) {
1104  } else {
1105  bigint_buffer_->push_back(static_cast<int64_t>(col.data.int_col[i]));
1106  }
1107  }
1108  break;
1109  }
1110  case kPOINT:
1111  case kLINESTRING:
1112  case kPOLYGON:
1113  case kMULTIPOLYGON: {
1114  dataSize = col.data.str_col.size();
1115  geo_string_buffer_->reserve(dataSize);
1116  for (size_t i = 0; i < dataSize; i++) {
1117  if (col.nulls[i]) {
1118  // TODO: add support for NULL geo
1119  geo_string_buffer_->push_back(std::string());
1120  } else {
1121  geo_string_buffer_->push_back(col.data.str_col[i]);
1122  }
1123  }
1124  break;
1125  }
1126  case kARRAY: {
1127  dataSize = col.data.arr_col.size();
1128  if (IS_STRING(cd->columnType.get_subtype())) {
1129  for (size_t i = 0; i < dataSize; i++) {
1130  OptionalStringVector& string_vec = addStringArray();
1131  if (!col.nulls[i]) {
1132  size_t stringArrSize = col.data.arr_col[i].data.str_col.size();
1133  for (size_t str_idx = 0; str_idx != stringArrSize; ++str_idx) {
1134  string_vec->push_back(col.data.arr_col[i].data.str_col[str_idx]);
1135  }
1136  }
1137  }
1138  } else {
1139  auto elem_ti = cd->columnType.get_subtype();
1140  switch (elem_ti) {
1141  case kBOOLEAN: {
1142  for (size_t i = 0; i < dataSize; i++) {
1143  if (col.nulls[i]) {
1145  } else {
1146  size_t len = col.data.arr_col[i].data.int_col.size();
1147  size_t byteSize = len * sizeof(int8_t);
1148  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1149  int8_t* p = buf;
1150  for (size_t j = 0; j < len; ++j) {
1151  // Explicitly checking the item for null because
1152  // casting null value (-128) to bool results
1153  // incorrect value 1.
1154  if (col.data.arr_col[i].nulls[j]) {
1155  *p = static_cast<int8_t>(
1157  } else {
1158  *(bool*)p = static_cast<bool>(col.data.arr_col[i].data.int_col[j]);
1159  }
1160  p += sizeof(bool);
1161  }
1162  addArray(ArrayDatum(byteSize, buf, false));
1163  }
1164  }
1165  break;
1166  }
1167  case kTINYINT: {
1168  for (size_t i = 0; i < dataSize; i++) {
1169  if (col.nulls[i]) {
1171  } else {
1172  size_t len = col.data.arr_col[i].data.int_col.size();
1173  size_t byteSize = len * sizeof(int8_t);
1174  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1175  int8_t* p = buf;
1176  for (size_t j = 0; j < len; ++j) {
1177  *(int8_t*)p = static_cast<int8_t>(col.data.arr_col[i].data.int_col[j]);
1178  p += sizeof(int8_t);
1179  }
1180  addArray(ArrayDatum(byteSize, buf, false));
1181  }
1182  }
1183  break;
1184  }
1185  case kSMALLINT: {
1186  for (size_t i = 0; i < dataSize; i++) {
1187  if (col.nulls[i]) {
1189  } else {
1190  size_t len = col.data.arr_col[i].data.int_col.size();
1191  size_t byteSize = len * sizeof(int16_t);
1192  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1193  int8_t* p = buf;
1194  for (size_t j = 0; j < len; ++j) {
1195  *(int16_t*)p =
1196  static_cast<int16_t>(col.data.arr_col[i].data.int_col[j]);
1197  p += sizeof(int16_t);
1198  }
1199  addArray(ArrayDatum(byteSize, buf, false));
1200  }
1201  }
1202  break;
1203  }
1204  case kINT: {
1205  for (size_t i = 0; i < dataSize; i++) {
1206  if (col.nulls[i]) {
1208  } else {
1209  size_t len = col.data.arr_col[i].data.int_col.size();
1210  size_t byteSize = len * sizeof(int32_t);
1211  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1212  int8_t* p = buf;
1213  for (size_t j = 0; j < len; ++j) {
1214  *(int32_t*)p =
1215  static_cast<int32_t>(col.data.arr_col[i].data.int_col[j]);
1216  p += sizeof(int32_t);
1217  }
1218  addArray(ArrayDatum(byteSize, buf, false));
1219  }
1220  }
1221  break;
1222  }
1223  case kBIGINT:
1224  case kNUMERIC:
1225  case kDECIMAL: {
1226  for (size_t i = 0; i < dataSize; i++) {
1227  if (col.nulls[i]) {
1229  } else {
1230  size_t len = col.data.arr_col[i].data.int_col.size();
1231  size_t byteSize = len * sizeof(int64_t);
1232  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1233  int8_t* p = buf;
1234  for (size_t j = 0; j < len; ++j) {
1235  *(int64_t*)p =
1236  static_cast<int64_t>(col.data.arr_col[i].data.int_col[j]);
1237  p += sizeof(int64_t);
1238  }
1239  addArray(ArrayDatum(byteSize, buf, false));
1240  }
1241  }
1242  break;
1243  }
1244  case kFLOAT: {
1245  for (size_t i = 0; i < dataSize; i++) {
1246  if (col.nulls[i]) {
1248  } else {
1249  size_t len = col.data.arr_col[i].data.real_col.size();
1250  size_t byteSize = len * sizeof(float);
1251  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1252  int8_t* p = buf;
1253  for (size_t j = 0; j < len; ++j) {
1254  *(float*)p = static_cast<float>(col.data.arr_col[i].data.real_col[j]);
1255  p += sizeof(float);
1256  }
1257  addArray(ArrayDatum(byteSize, buf, false));
1258  }
1259  }
1260  break;
1261  }
1262  case kDOUBLE: {
1263  for (size_t i = 0; i < dataSize; i++) {
1264  if (col.nulls[i]) {
1266  } else {
1267  size_t len = col.data.arr_col[i].data.real_col.size();
1268  size_t byteSize = len * sizeof(double);
1269  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1270  int8_t* p = buf;
1271  for (size_t j = 0; j < len; ++j) {
1272  *(double*)p = static_cast<double>(col.data.arr_col[i].data.real_col[j]);
1273  p += sizeof(double);
1274  }
1275  addArray(ArrayDatum(byteSize, buf, false));
1276  }
1277  }
1278  break;
1279  }
1280  case kTIME:
1281  case kTIMESTAMP:
1282  case kDATE: {
1283  for (size_t i = 0; i < dataSize; i++) {
1284  if (col.nulls[i]) {
1286  } else {
1287  size_t len = col.data.arr_col[i].data.int_col.size();
1288  size_t byteWidth = sizeof(int64_t);
1289  size_t byteSize = len * byteWidth;
1290  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1291  int8_t* p = buf;
1292  for (size_t j = 0; j < len; ++j) {
1293  *reinterpret_cast<int64_t*>(p) =
1294  static_cast<int64_t>(col.data.arr_col[i].data.int_col[j]);
1295  p += sizeof(int64_t);
1296  }
1297  addArray(ArrayDatum(byteSize, buf, false));
1298  }
1299  }
1300  break;
1301  }
1302  default:
1303  throw std::runtime_error("Invalid Array Type");
1304  }
1305  }
1306  break;
1307  }
1308  default:
1309  throw std::runtime_error("Invalid Type");
1310  }
1311  return dataSize;
1312 }
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:330
#define NULL_DOUBLE
OptionalStringVector & addStringArray()
Definition: Importer.h:252
Definition: sqltypes.h:49
#define NULL_FLOAT
std::vector< std::string > * string_buffer_
Definition: Importer.h:527
std::optional< std::vector< std::string >> OptionalStringVector
Definition: Importer.h:93
ArrayDatum NullArray(const SQLTypeInfo &ti)
Definition: Importer.cpp:388
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:522
std::vector< float > * float_buffer_
Definition: Importer.h:525
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:329
std::vector< double > * double_buffer_
Definition: Importer.h:526
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:208
std::vector< int32_t > * int_buffer_
Definition: Importer.h:523
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:45
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:524
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:520
Definition: sqltypes.h:52
Definition: sqltypes.h:53
void addArray(const ArrayDatum &v)
Definition: Importer.h:250
Definition: sqltypes.h:41
#define IS_STRING(T)
Definition: sqltypes.h:250
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:521
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
Definition: sqltypes.h:45
SQLTypeInfo columnType
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:336
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:850
std::string columnName
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:528

+ Here is the call graph for this function:

void import_export::TypedImportBuffer::addArray ( const ArrayDatum v)
inline

Definition at line 250 of file Importer.h.

References array_buffer_.

Referenced by add_value(), and add_values().

250 { array_buffer_->push_back(v); }
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:529

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addBigint ( const int64_t  v)
inline

Definition at line 240 of file Importer.h.

References bigint_buffer_.

Referenced by add_value().

240 { bigint_buffer_->push_back(v); }
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:524

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addBoolean ( const int8_t  v)
inline

Definition at line 232 of file Importer.h.

References bool_buffer_.

Referenced by add_value().

232 { bool_buffer_->push_back(v); }
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:520

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addDefaultValues ( const ColumnDescriptor cd,
size_t  num_rows 
)

Definition at line 1451 of file Importer.cpp.

References array_buffer_, bigint_buffer_, bool_buffer_, CHECK, ColumnDescriptor::columnName, ColumnDescriptor::columnType, convert_decimal_value_to_scale(), ColumnDescriptor::default_value, double_buffer_, float_buffer_, geo_string_buffer_, SQLTypeInfo::get_notnull(), SQLTypeInfo::get_type(), inline_fixed_encoding_null_val(), int_buffer_, is_null(), IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, StringDictionary::MAX_STRLEN, NULL_DOUBLE, NULL_FLOAT, import_export::NullArray(), import_export::delimited_parser::parse_string_array(), smallint_buffer_, string_array_buffer_, string_buffer_, import_export::StringToArray(), StringToDatum(), tinyint_buffer_, to_string(), and run_benchmark_import::type.

1451  {
1452  bool is_null = !cd->default_value.has_value();
1453  CHECK(!(is_null && cd->columnType.get_notnull()));
1454  const auto type = cd->columnType.get_type();
1455  auto ti = cd->columnType;
1456  auto val = cd->default_value.value_or("NULL");
1457  CopyParams cp;
1458  switch (type) {
1459  case kBOOLEAN: {
1460  if (!is_null) {
1461  bool_buffer_->resize(num_rows, StringToDatum(val, ti).boolval);
1462  } else {
1463  bool_buffer_->resize(num_rows, inline_fixed_encoding_null_val(cd->columnType));
1464  }
1465  break;
1466  }
1467  case kTINYINT: {
1468  if (!is_null) {
1469  tinyint_buffer_->resize(num_rows, StringToDatum(val, ti).tinyintval);
1470  } else {
1472  }
1473  break;
1474  }
1475  case kSMALLINT: {
1476  if (!is_null) {
1477  smallint_buffer_->resize(num_rows, StringToDatum(val, ti).smallintval);
1478  } else {
1479  smallint_buffer_->resize(num_rows,
1481  }
1482  break;
1483  }
1484  case kINT: {
1485  if (!is_null) {
1486  int_buffer_->resize(num_rows, StringToDatum(val, ti).intval);
1487  } else {
1488  int_buffer_->resize(num_rows, inline_fixed_encoding_null_val(cd->columnType));
1489  }
1490  break;
1491  }
1492  case kBIGINT: {
1493  if (!is_null) {
1494  bigint_buffer_->resize(num_rows, StringToDatum(val, ti).bigintval);
1495  } else {
1497  }
1498  break;
1499  }
1500  case kDECIMAL:
1501  case kNUMERIC: {
1502  if (!is_null) {
1503  const auto converted_decimal_value = convert_decimal_value_to_scale(
1504  StringToDatum(val, ti).bigintval, ti, cd->columnType);
1505  bigint_buffer_->resize(num_rows, converted_decimal_value);
1506  } else {
1508  }
1509  break;
1510  }
1511  case kFLOAT:
1512  if (!is_null) {
1513  float_buffer_->resize(num_rows,
1514  static_cast<float>(std::atof(std::string(val).c_str())));
1515  } else {
1516  float_buffer_->resize(num_rows, NULL_FLOAT);
1517  }
1518  break;
1519  case kDOUBLE:
1520  if (!is_null) {
1521  double_buffer_->resize(num_rows, std::atof(std::string(val).c_str()));
1522  } else {
1523  double_buffer_->resize(num_rows, NULL_DOUBLE);
1524  }
1525  break;
1526  case kTEXT:
1527  case kVARCHAR:
1528  case kCHAR: {
1529  if (is_null) {
1530  string_buffer_->resize(num_rows, "");
1531  } else {
1532  if (val.length() > StringDictionary::MAX_STRLEN) {
1533  throw std::runtime_error("String too long for column " + cd->columnName +
1534  " was " + std::to_string(val.length()) + " max is " +
1536  }
1537  string_buffer_->resize(num_rows, val);
1538  }
1539  break;
1540  }
1541  case kTIME:
1542  case kTIMESTAMP:
1543  case kDATE:
1544  if (!is_null) {
1545  bigint_buffer_->resize(num_rows, StringToDatum(val, ti).bigintval);
1546  } else {
1548  }
1549  break;
1550  case kARRAY: {
1551  if (IS_STRING(ti.get_subtype())) {
1552  std::vector<std::string> string_vec;
1553  // Just parse string array, don't push it to buffer yet as we might throw
1555  std::string(val), cp, string_vec);
1556  if (!is_null) {
1557  // TODO: add support for NULL string arrays
1558  if (ti.get_size() > 0) {
1559  auto sti = ti.get_elem_type();
1560  size_t expected_size = ti.get_size() / sti.get_size();
1561  size_t actual_size = string_vec.size();
1562  if (actual_size != expected_size) {
1563  throw std::runtime_error("Fixed length array column " + cd->columnName +
1564  " expects " + std::to_string(expected_size) +
1565  " values, received " +
1566  std::to_string(actual_size));
1567  }
1568  }
1569  string_array_buffer_->resize(num_rows, string_vec);
1570  } else {
1571  if (ti.get_size() > 0) {
1572  // TODO: remove once NULL fixlen arrays are allowed
1573  throw std::runtime_error("Fixed length array column " + cd->columnName +
1574  " currently cannot accept NULL arrays");
1575  }
1576  // TODO: add support for NULL string arrays, replace with addStringArray(),
1577  // for now add whatever parseStringArray() outputs for NULLs ("NULL")
1578  string_array_buffer_->resize(num_rows, string_vec);
1579  }
1580  } else {
1581  if (!is_null) {
1582  ArrayDatum d = StringToArray(std::string(val), ti, cp);
1583  if (d.is_null) { // val could be "NULL"
1584  array_buffer_->resize(num_rows, NullArray(ti));
1585  } else {
1586  if (ti.get_size() > 0 && static_cast<size_t>(ti.get_size()) != d.length) {
1587  throw std::runtime_error("Fixed length array for column " + cd->columnName +
1588  " has incorrect length: " + std::string(val));
1589  }
1590  array_buffer_->resize(num_rows, d);
1591  }
1592  } else {
1593  array_buffer_->resize(num_rows, NullArray(ti));
1594  }
1595  }
1596  break;
1597  }
1598  case kPOINT:
1599  case kLINESTRING:
1600  case kPOLYGON:
1601  case kMULTIPOLYGON:
1602  geo_string_buffer_->resize(num_rows, val);
1603  break;
1604  default:
1605  CHECK(false) << "TypedImportBuffer::addDefaultValues() does not support type "
1606  << type;
1607  }
1608 }
#define NULL_DOUBLE
Definition: sqltypes.h:49
#define NULL_FLOAT
std::vector< std::string > * string_buffer_
Definition: Importer.h:527
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:529
ArrayDatum NullArray(const SQLTypeInfo &ti)
Definition: Importer.cpp:388
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:522
std::vector< float > * float_buffer_
Definition: Importer.h:525
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:329
std::vector< double > * double_buffer_
Definition: Importer.h:526
std::string to_string(char const *&&v)
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:208
std::vector< int32_t > * int_buffer_
Definition: Importer.h:523
CONSTEXPR DEVICE bool is_null(const T &value)
std::vector< OptionalStringVector > * string_array_buffer_
Definition: Importer.h:530
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:524
Datum StringToDatum(std::string_view s, SQLTypeInfo &ti)
Definition: Datum.cpp:275
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:520
std::optional< std::string > default_value
Definition: sqltypes.h:52
Definition: sqltypes.h:53
ArrayDatum StringToArray(const std::string &s, const SQLTypeInfo &ti, const CopyParams &copy_params)
Definition: Importer.cpp:335
int64_t convert_decimal_value_to_scale(const int64_t decimal_value, const SQLTypeInfo &type_info, const SQLTypeInfo &new_type_info)
Definition: Datum.cpp:473
Definition: sqltypes.h:41
#define IS_STRING(T)
Definition: sqltypes.h:250
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:521
#define CHECK(condition)
Definition: Logger.h:209
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
static constexpr size_t MAX_STRLEN
Definition: sqltypes.h:45
SQLTypeInfo columnType
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:336
std::string columnName
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:528
void parse_string_array(const std::string &s, const import_export::CopyParams &copy_params, std::vector< std::string > &string_vec)
Parses given string array and inserts into given vector of strings.

+ Here is the call graph for this function:

void import_export::TypedImportBuffer::addDictEncodedString ( const std::vector< std::string > &  string_vec)

Definition at line 501 of file Importer.cpp.

References CHECK, column_desc_, ColumnDescriptor::columnName, ColumnDescriptor::columnType, logger::ERROR, SQLTypeInfo::get_size(), getColumnDesc(), StringDictionary::getOrAddBulk(), LOG, StringDictionary::MAX_STRLEN, string_dict_, string_dict_i16_buffer_, string_dict_i32_buffer_, and string_dict_i8_buffer_.

501  {
503  std::vector<std::string_view> string_view_vec;
504  string_view_vec.reserve(string_vec.size());
505  for (const auto& str : string_vec) {
506  if (str.size() > StringDictionary::MAX_STRLEN) {
507  std::ostringstream oss;
508  oss << "while processing dictionary for column " << getColumnDesc()->columnName
509  << " a string was detected too long for encoding, string length = "
510  << str.size() << ", first 100 characters are '" << str.substr(0, 100) << "'";
511  throw std::runtime_error(oss.str());
512  }
513  string_view_vec.push_back(str);
514  }
515  try {
516  switch (column_desc_->columnType.get_size()) {
517  case 1:
518  string_dict_i8_buffer_->resize(string_view_vec.size());
519  string_dict_->getOrAddBulk(string_view_vec, string_dict_i8_buffer_->data());
520  break;
521  case 2:
522  string_dict_i16_buffer_->resize(string_view_vec.size());
523  string_dict_->getOrAddBulk(string_view_vec, string_dict_i16_buffer_->data());
524  break;
525  case 4:
526  string_dict_i32_buffer_->resize(string_view_vec.size());
527  string_dict_->getOrAddBulk(string_view_vec, string_dict_i32_buffer_->data());
528  break;
529  default:
530  CHECK(false);
531  }
532  } catch (std::exception& e) {
533  std::ostringstream oss;
534  oss << "while processing dictionary for column " << getColumnDesc()->columnName
535  << " : " << e.what();
536  LOG(ERROR) << oss.str();
537  throw std::runtime_error(oss.str());
538  }
539 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:339
#define LOG(tag)
Definition: Logger.h:203
StringDictionary * string_dict_
Definition: Importer.h:539
std::vector< uint8_t > * string_dict_i8_buffer_
Definition: Importer.h:533
void getOrAddBulk(const std::vector< String > &string_vec, T *encoded_vec)
std::vector< int32_t > * string_dict_i32_buffer_
Definition: Importer.h:535
const ColumnDescriptor * column_desc_
Definition: Importer.h:538
std::vector< uint16_t > * string_dict_i16_buffer_
Definition: Importer.h:534
#define CHECK(condition)
Definition: Logger.h:209
const ColumnDescriptor * getColumnDesc() const
Definition: Importer.h:313
static constexpr size_t MAX_STRLEN
SQLTypeInfo columnType
std::string columnName

+ Here is the call graph for this function:

void import_export::TypedImportBuffer::addDictEncodedStringArray ( const std::vector< OptionalStringVector > &  string_array_vec)
inline

Definition at line 263 of file Importer.h.

References anonymous_namespace{Utm.h}::a, CHECK, checked_malloc(), column_desc_, ColumnDescriptor::columnType, import_export::ImporterUtils::composeNullArray(), StringDictionary::getOrAddBulkArray(), i, StringDictionary::MAX_STRLEN, string_array_dict_buffer_, and string_dict_.

264  {
266 
267  // first check data is ok
268  for (auto& p : string_array_vec) {
269  if (!p) {
270  continue;
271  }
272  for (const auto& str : *p) {
273  if (str.size() > StringDictionary::MAX_STRLEN) {
274  throw std::runtime_error("String too long for dictionary encoding.");
275  }
276  }
277  }
278 
279  // to avoid copying, create a string view of each string in the
280  // `string_array_vec` where the array holding the string is *not null*
281  std::vector<std::vector<std::string_view>> string_view_array_vec;
282  for (auto& p : string_array_vec) {
283  if (!p) {
284  continue;
285  }
286  auto& array = string_view_array_vec.emplace_back();
287  for (const auto& str : *p) {
288  array.emplace_back(str);
289  }
290  }
291 
292  std::vector<std::vector<int32_t>> ids_array(0);
293  string_dict_->getOrAddBulkArray(string_view_array_vec, ids_array);
294 
295  size_t i, j;
296  for (i = 0, j = 0; i < string_array_vec.size(); ++i) {
297  if (!string_array_vec[i]) { // null array
298  string_array_dict_buffer_->push_back(
300  } else { // non-null array
301  auto& p = ids_array[j++];
302  size_t len = p.size() * sizeof(int32_t);
303  auto a = static_cast<int32_t*>(checked_malloc(len));
304  memcpy(a, &p[0], len);
305  string_array_dict_buffer_->push_back(
306  ArrayDatum(len, reinterpret_cast<int8_t*>(a), false));
307  }
308  }
309  }
StringDictionary * string_dict_
Definition: Importer.h:539
static ArrayDatum composeNullArray(const SQLTypeInfo &ti)
Definition: Importer.cpp:410
constexpr double a
Definition: Utm.h:38
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:208
std::vector< ArrayDatum > * string_array_dict_buffer_
Definition: Importer.h:536
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:45
void getOrAddBulkArray(const std::vector< std::vector< String >> &string_array_vec, std::vector< std::vector< int32_t >> &ids_array_vec)
const ColumnDescriptor * column_desc_
Definition: Importer.h:538
#define CHECK(condition)
Definition: Logger.h:209
static constexpr size_t MAX_STRLEN
SQLTypeInfo columnType

+ Here is the call graph for this function:

void import_export::TypedImportBuffer::addDouble ( const double  v)
inline

Definition at line 244 of file Importer.h.

References double_buffer_.

Referenced by add_value().

244 { double_buffer_->push_back(v); }
std::vector< double > * double_buffer_
Definition: Importer.h:526

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addFloat ( const float  v)
inline

Definition at line 242 of file Importer.h.

References float_buffer_.

Referenced by add_value().

242 { float_buffer_->push_back(v); }
std::vector< float > * float_buffer_
Definition: Importer.h:525

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addGeoString ( const std::string_view  v)
inline

Definition at line 248 of file Importer.h.

References geo_string_buffer_.

Referenced by add_value().

248 { geo_string_buffer_->emplace_back(v); }
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:528

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addInt ( const int32_t  v)
inline

Definition at line 238 of file Importer.h.

References int_buffer_.

Referenced by add_value().

238 { int_buffer_->push_back(v); }
std::vector< int32_t > * int_buffer_
Definition: Importer.h:523

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addSmallint ( const int16_t  v)
inline

Definition at line 236 of file Importer.h.

References smallint_buffer_.

Referenced by add_value().

236 { smallint_buffer_->push_back(v); }
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:522

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addString ( const std::string_view  v)
inline

Definition at line 246 of file Importer.h.

References string_buffer_.

Referenced by add_value().

246 { string_buffer_->emplace_back(v); }
std::vector< std::string > * string_buffer_
Definition: Importer.h:527

+ Here is the caller graph for this function:

OptionalStringVector& import_export::TypedImportBuffer::addStringArray ( )
inline

Definition at line 252 of file Importer.h.

References string_array_buffer_.

Referenced by add_value(), and add_values().

252  {
253  string_array_buffer_->emplace_back(std::vector<std::string>{});
254  return string_array_buffer_->back();
255  }
std::vector< OptionalStringVector > * string_array_buffer_
Definition: Importer.h:530

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addStringArray ( const OptionalStringVector arr)
inline

Definition at line 257 of file Importer.h.

References string_array_buffer_.

257  {
258  string_array_buffer_->push_back(arr);
259  }
std::vector< OptionalStringVector > * string_array_buffer_
Definition: Importer.h:530
void import_export::TypedImportBuffer::addTinyint ( const int8_t  v)
inline

Definition at line 234 of file Importer.h.

References tinyint_buffer_.

Referenced by add_value().

234 { tinyint_buffer_->push_back(v); }
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:521

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::clear ( )
inline

Definition at line 405 of file Importer.h.

References array_buffer_, bigint_buffer_, bool_buffer_, CHECK, column_desc_, ColumnDescriptor::columnType, double_buffer_, float_buffer_, geo_string_buffer_, SQLTypeInfo::get_compression(), SQLTypeInfo::get_size(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), int_buffer_, IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kENCODING_DICT, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, smallint_buffer_, string_array_buffer_, string_array_dict_buffer_, string_buffer_, string_dict_i16_buffer_, string_dict_i32_buffer_, string_dict_i8_buffer_, and tinyint_buffer_.

405  {
406  switch (column_desc_->columnType.get_type()) {
407  case kBOOLEAN: {
408  bool_buffer_->clear();
409  break;
410  }
411  case kTINYINT: {
412  tinyint_buffer_->clear();
413  break;
414  }
415  case kSMALLINT: {
416  smallint_buffer_->clear();
417  break;
418  }
419  case kINT: {
420  int_buffer_->clear();
421  break;
422  }
423  case kBIGINT:
424  case kNUMERIC:
425  case kDECIMAL: {
426  bigint_buffer_->clear();
427  break;
428  }
429  case kFLOAT: {
430  float_buffer_->clear();
431  break;
432  }
433  case kDOUBLE: {
434  double_buffer_->clear();
435  break;
436  }
437  case kTEXT:
438  case kVARCHAR:
439  case kCHAR: {
440  string_buffer_->clear();
442  switch (column_desc_->columnType.get_size()) {
443  case 1:
444  string_dict_i8_buffer_->clear();
445  break;
446  case 2:
447  string_dict_i16_buffer_->clear();
448  break;
449  case 4:
450  string_dict_i32_buffer_->clear();
451  break;
452  default:
453  CHECK(false);
454  }
455  }
456  break;
457  }
458  case kDATE:
459  case kTIME:
460  case kTIMESTAMP:
461  bigint_buffer_->clear();
462  break;
463  case kARRAY: {
465  string_array_buffer_->clear();
466  string_array_dict_buffer_->clear();
467  } else {
468  array_buffer_->clear();
469  }
470  break;
471  }
472  case kPOINT:
473  case kLINESTRING:
474  case kPOLYGON:
475  case kMULTIPOLYGON:
476  geo_string_buffer_->clear();
477  break;
478  default:
479  CHECK(false);
480  }
481  }
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:330
HOST DEVICE int get_size() const
Definition: sqltypes.h:339
Definition: sqltypes.h:49
std::vector< std::string > * string_buffer_
Definition: Importer.h:527
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:529
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:522
std::vector< float > * float_buffer_
Definition: Importer.h:525
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:329
std::vector< double > * double_buffer_
Definition: Importer.h:526
std::vector< int32_t > * int_buffer_
Definition: Importer.h:523
std::vector< ArrayDatum > * string_array_dict_buffer_
Definition: Importer.h:536
std::vector< uint8_t > * string_dict_i8_buffer_
Definition: Importer.h:533
std::vector< OptionalStringVector > * string_array_buffer_
Definition: Importer.h:530
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:524
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:520
Definition: sqltypes.h:52
Definition: sqltypes.h:53
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:337
std::vector< int32_t > * string_dict_i32_buffer_
Definition: Importer.h:535
Definition: sqltypes.h:41
#define IS_STRING(T)
Definition: sqltypes.h:250
const ColumnDescriptor * column_desc_
Definition: Importer.h:538
std::vector< uint16_t > * string_dict_i16_buffer_
Definition: Importer.h:534
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:521
#define CHECK(condition)
Definition: Logger.h:209
Definition: sqltypes.h:45
SQLTypeInfo columnType
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:528

+ Here is the call graph for this function:

template<typename DATA_TYPE >
size_t import_export::TypedImportBuffer::convert_arrow_val_to_import_buffer ( const ColumnDescriptor cd,
const arrow::Array &  array,
std::vector< DATA_TYPE > &  buffer,
const ArraySliceRange slice_range,
BadRowsTracker *const  bad_rows_tracker 
)

Referenced by add_arrow_values().

+ Here is the caller graph for this function:

template<typename DATA_TYPE >
size_t import_export::TypedImportBuffer::convert_arrow_val_to_import_buffer ( const ColumnDescriptor cd,
const Array array,
std::vector< DATA_TYPE > &  buffer,
const ArraySliceRange slice_range,
import_export::BadRowsTracker *const  bad_rows_tracker 
)

Definition at line 799 of file Importer.cpp.

References col_idx, anonymous_namespace{ArrowImporter.h}::error_context(), geo_string_buffer_, SQLTypeInfo::get_type(), import_export::Importer::getCatalog(), Geospatial::GeoTypesFactory::getGeoColumns(), Geospatial::GeoTypesFactory::getNullGeoColumns(), import_buffers, import_export::BadRowsTracker::importer, import_export::BadRowsTracker::mutex, import_export::BadRowsTracker::rows, import_export::Importer::set_geo_physical_import_buffer(), and anonymous_namespace{ArrowImporter.h}::value_getter().

804  {
805  auto data =
806  std::make_unique<DataBuffer<DATA_TYPE>>(cd, array, buffer, bad_rows_tracker);
807  auto f_value_getter = value_getter(array, cd, bad_rows_tracker);
808  std::function<void(const int64_t)> f_add_geo_phy_cols = [&](const int64_t row) {};
809  if (bad_rows_tracker && cd->columnType.is_geometry()) {
810  f_add_geo_phy_cols = [&](const int64_t row) {
811  // Populate physical columns (ref. DBHandler::load_table)
812  std::vector<double> coords, bounds;
813  std::vector<int> ring_sizes, poly_rings;
814  int render_group = 0;
815  SQLTypeInfo ti;
816  // replace any unexpected exception from getGeoColumns or other
817  // on this path with a GeoImportException so that we wont over
818  // push a null to the logical column...
819  try {
820  SQLTypeInfo import_ti{ti};
821  if (array.IsNull(row)) {
823  import_ti, coords, bounds, ring_sizes, poly_rings, false);
824  } else {
825  arrow_throw_if<GeoImportException>(
827  ti,
828  coords,
829  bounds,
830  ring_sizes,
831  poly_rings,
832  false),
833  error_context(cd, bad_rows_tracker) + "Invalid geometry");
834  arrow_throw_if<GeoImportException>(
835  cd->columnType.get_type() != ti.get_type(),
836  error_context(cd, bad_rows_tracker) + "Geometry type mismatch");
837  }
838  auto col_idx_workpad = col_idx; // what a pitfall!!
840  bad_rows_tracker->importer->getCatalog(),
841  cd,
843  col_idx_workpad,
844  coords,
845  bounds,
846  ring_sizes,
847  poly_rings,
848  render_group);
849  } catch (GeoImportException&) {
850  throw;
851  } catch (std::runtime_error& e) {
852  throw GeoImportException(e.what());
853  } catch (const std::exception& e) {
854  throw GeoImportException(e.what());
855  } catch (...) {
856  throw GeoImportException("unknown exception");
857  }
858  };
859  }
860  auto f_mark_a_bad_row = [&](const auto row) {
861  std::unique_lock<std::mutex> lck(bad_rows_tracker->mutex);
862  bad_rows_tracker->rows.insert(row - slice_range.first);
863  };
864  buffer.reserve(slice_range.second - slice_range.first);
865  for (size_t row = slice_range.first; row < slice_range.second; ++row) {
866  try {
867  *data << (array.IsNull(row) ? nullptr : f_value_getter(array, row));
868  f_add_geo_phy_cols(row);
869  } catch (GeoImportException&) {
870  f_mark_a_bad_row(row);
871  } catch (ArrowImporterException&) {
872  // trace bad rows of each column; otherwise rethrow.
873  if (bad_rows_tracker) {
874  *data << nullptr;
875  f_mark_a_bad_row(row);
876  } else {
877  throw;
878  }
879  }
880  }
881  return buffer.size();
882 }
auto value_getter(const arrow::Array &array, const ColumnDescriptor *cd, import_export::BadRowsTracker *const bad_rows_tracker)
static void getNullGeoColumns(SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: Types.cpp:1144
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:329
static void set_geo_physical_import_buffer(const Catalog_Namespace::Catalog &catalog, const ColumnDescriptor *cd, std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers, size_t &col_idx, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, int render_group)
Definition: Importer.cpp:1630
Catalog_Namespace::Catalog & getCatalog()
Definition: Importer.h:826
std::set< int64_t > rows
Definition: Importer.h:77
std::vector< std::unique_ptr< TypedImportBuffer > > * import_buffers
Definition: Importer.h:515
static bool getGeoColumns(const std::string &wkt_or_wkb_hex, SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: Types.cpp:937
std::string error_context(const ColumnDescriptor *cd, import_export::BadRowsTracker *const bad_rows_tracker)
Definition: ArrowImporter.h:76
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:528

+ Here is the call graph for this function:

template<typename DATA_TYPE >
auto import_export::TypedImportBuffer::del_values ( std::vector< DATA_TYPE > &  buffer,
BadRowsTracker *const  bad_rows_tracker 
)
auto import_export::TypedImportBuffer::del_values ( const SQLTypes  type,
BadRowsTracker *const  bad_rows_tracker 
)
std::vector< DataBlockPtr > import_export::TypedImportBuffer::get_data_block_pointers ( const std::vector< std::unique_ptr< TypedImportBuffer >> &  import_buffers)
static

Definition at line 3033 of file Importer.cpp.

References DataBlockPtr::arraysPtr, threading_serial::async(), CHECK, CHECK_EQ, getStringArrayBuffer(), getTypeInfo(), import_buffers, SQLTypeInfo::is_number(), IS_STRING, SQLTypeInfo::is_string(), kARRAY, kBOOLEAN, kENCODING_DICT, kENCODING_NONE, DataBlockPtr::numbersPtr, run_benchmark_import::result, and DataBlockPtr::stringsPtr.

Referenced by import_export::fill_missing_columns(), import_export::Loader::loadImpl(), and import_export::Loader::loadToShard().

3034  {
3035  std::vector<DataBlockPtr> result(import_buffers.size());
3036  std::vector<std::pair<const size_t, std::future<int8_t*>>>
3037  encoded_data_block_ptrs_futures;
3038  // make all async calls to string dictionary here and then continue execution
3039  for (size_t buf_idx = 0; buf_idx < import_buffers.size(); buf_idx++) {
3040  if (import_buffers[buf_idx]->getTypeInfo().is_string() &&
3041  import_buffers[buf_idx]->getTypeInfo().get_compression() != kENCODING_NONE) {
3042  auto string_payload_ptr = import_buffers[buf_idx]->getStringBuffer();
3043  CHECK_EQ(kENCODING_DICT, import_buffers[buf_idx]->getTypeInfo().get_compression());
3044 
3045  encoded_data_block_ptrs_futures.emplace_back(std::make_pair(
3046  buf_idx,
3047  std::async(std::launch::async, [buf_idx, &import_buffers, string_payload_ptr] {
3048  import_buffers[buf_idx]->addDictEncodedString(*string_payload_ptr);
3049  return import_buffers[buf_idx]->getStringDictBuffer();
3050  })));
3051  }
3052  }
3053 
3054  for (size_t buf_idx = 0; buf_idx < import_buffers.size(); buf_idx++) {
3055  DataBlockPtr p;
3056  if (import_buffers[buf_idx]->getTypeInfo().is_number() ||
3057  import_buffers[buf_idx]->getTypeInfo().is_time() ||
3058  import_buffers[buf_idx]->getTypeInfo().get_type() == kBOOLEAN) {
3059  p.numbersPtr = import_buffers[buf_idx]->getAsBytes();
3060  } else if (import_buffers[buf_idx]->getTypeInfo().is_string()) {
3061  auto string_payload_ptr = import_buffers[buf_idx]->getStringBuffer();
3062  if (import_buffers[buf_idx]->getTypeInfo().get_compression() == kENCODING_NONE) {
3063  p.stringsPtr = string_payload_ptr;
3064  } else {
3065  // This condition means we have column which is ENCODED string. We already made
3066  // Async request to gain the encoded integer values above so we should skip this
3067  // iteration and continue.
3068  continue;
3069  }
3070  } else if (import_buffers[buf_idx]->getTypeInfo().is_geometry()) {
3071  auto geo_payload_ptr = import_buffers[buf_idx]->getGeoStringBuffer();
3072  p.stringsPtr = geo_payload_ptr;
3073  } else {
3074  CHECK(import_buffers[buf_idx]->getTypeInfo().get_type() == kARRAY);
3075  if (IS_STRING(import_buffers[buf_idx]->getTypeInfo().get_subtype())) {
3076  CHECK(import_buffers[buf_idx]->getTypeInfo().get_compression() == kENCODING_DICT);
3077  import_buffers[buf_idx]->addDictEncodedStringArray(
3078  *import_buffers[buf_idx]->getStringArrayBuffer());
3079  p.arraysPtr = import_buffers[buf_idx]->getStringArrayDictBuffer();
3080  } else {
3081  p.arraysPtr = import_buffers[buf_idx]->getArrayBuffer();
3082  }
3083  }
3084  result[buf_idx] = p;
3085  }
3086 
3087  // wait for the async requests we made for string dictionary
3088  for (auto& encoded_ptr_future : encoded_data_block_ptrs_futures) {
3089  result[encoded_ptr_future.first].numbersPtr = encoded_ptr_future.second.get();
3090  }
3091  return result;
3092 }
#define CHECK_EQ(x, y)
Definition: Logger.h:217
const SQLTypeInfo & getTypeInfo() const
Definition: Importer.h:311
std::vector< std::string > * stringsPtr
Definition: sqltypes.h:227
std::vector< ArrayDatum > * arraysPtr
Definition: sqltypes.h:228
bool is_number() const
Definition: sqltypes.h:514
future< Result > async(Fn &&fn, Args &&...args)
std::vector< std::unique_ptr< TypedImportBuffer > > * import_buffers
Definition: Importer.h:515
#define IS_STRING(T)
Definition: sqltypes.h:250
#define CHECK(condition)
Definition: Logger.h:209
std::vector< OptionalStringVector > * getStringArrayBuffer() const
Definition: Importer.h:377
bool is_string() const
Definition: sqltypes.h:509
int8_t * numbersPtr
Definition: sqltypes.h:226

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector<ArrayDatum>* import_export::TypedImportBuffer::getArrayBuffer ( ) const
inline

Definition at line 375 of file Importer.h.

References array_buffer_.

375 { return array_buffer_; }
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:529
int8_t* import_export::TypedImportBuffer::getAsBytes ( ) const
inline

Definition at line 317 of file Importer.h.

References bigint_buffer_, bool_buffer_, column_desc_, ColumnDescriptor::columnType, double_buffer_, float_buffer_, SQLTypeInfo::get_type(), int_buffer_, kBIGINT, kBOOLEAN, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kNUMERIC, kSMALLINT, kTIME, kTIMESTAMP, kTINYINT, smallint_buffer_, and tinyint_buffer_.

Referenced by import_export::anonymous_namespace{Importer.cpp}::double_value_at(), import_export::anonymous_namespace{Importer.cpp}::float_value_at(), and import_export::anonymous_namespace{Importer.cpp}::int_value_at().

317  {
318  switch (column_desc_->columnType.get_type()) {
319  case kBOOLEAN:
320  return reinterpret_cast<int8_t*>(bool_buffer_->data());
321  case kTINYINT:
322  return reinterpret_cast<int8_t*>(tinyint_buffer_->data());
323  case kSMALLINT:
324  return reinterpret_cast<int8_t*>(smallint_buffer_->data());
325  case kINT:
326  return reinterpret_cast<int8_t*>(int_buffer_->data());
327  case kBIGINT:
328  case kNUMERIC:
329  case kDECIMAL:
330  return reinterpret_cast<int8_t*>(bigint_buffer_->data());
331  case kFLOAT:
332  return reinterpret_cast<int8_t*>(float_buffer_->data());
333  case kDOUBLE:
334  return reinterpret_cast<int8_t*>(double_buffer_->data());
335  case kDATE:
336  case kTIME:
337  case kTIMESTAMP:
338  return reinterpret_cast<int8_t*>(bigint_buffer_->data());
339  default:
340  abort();
341  }
342  }
Definition: sqltypes.h:49
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:522
std::vector< float > * float_buffer_
Definition: Importer.h:525
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:329
std::vector< double > * double_buffer_
Definition: Importer.h:526
std::vector< int32_t > * int_buffer_
Definition: Importer.h:523
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:524
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:520
Definition: sqltypes.h:53
const ColumnDescriptor * column_desc_
Definition: Importer.h:538
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:521
Definition: sqltypes.h:45
SQLTypeInfo columnType

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const ColumnDescriptor* import_export::TypedImportBuffer::getColumnDesc ( ) const
inline

Definition at line 313 of file Importer.h.

References column_desc_.

Referenced by addDictEncodedString().

313 { return column_desc_; }
const ColumnDescriptor * column_desc_
Definition: Importer.h:538

+ Here is the caller graph for this function:

size_t import_export::TypedImportBuffer::getElementSize ( ) const
inline

Definition at line 344 of file Importer.h.

References column_desc_, ColumnDescriptor::columnType, SQLTypeInfo::get_type(), kBIGINT, kBOOLEAN, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kNUMERIC, kSMALLINT, kTIME, kTIMESTAMP, and kTINYINT.

344  {
345  switch (column_desc_->columnType.get_type()) {
346  case kBOOLEAN:
347  return sizeof((*bool_buffer_)[0]);
348  case kTINYINT:
349  return sizeof((*tinyint_buffer_)[0]);
350  case kSMALLINT:
351  return sizeof((*smallint_buffer_)[0]);
352  case kINT:
353  return sizeof((*int_buffer_)[0]);
354  case kBIGINT:
355  case kNUMERIC:
356  case kDECIMAL:
357  return sizeof((*bigint_buffer_)[0]);
358  case kFLOAT:
359  return sizeof((*float_buffer_)[0]);
360  case kDOUBLE:
361  return sizeof((*double_buffer_)[0]);
362  case kDATE:
363  case kTIME:
364  case kTIMESTAMP:
365  return sizeof((*bigint_buffer_)[0]);
366  default:
367  abort();
368  }
369  }
Definition: sqltypes.h:49
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:329
Definition: sqltypes.h:53
const ColumnDescriptor * column_desc_
Definition: Importer.h:538
Definition: sqltypes.h:45
SQLTypeInfo columnType

+ Here is the call graph for this function:

std::vector<std::string>* import_export::TypedImportBuffer::getGeoStringBuffer ( ) const
inline

Definition at line 373 of file Importer.h.

References geo_string_buffer_.

373 { return geo_string_buffer_; }
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:528
std::vector<OptionalStringVector>* import_export::TypedImportBuffer::getStringArrayBuffer ( ) const
inline

Definition at line 377 of file Importer.h.

References string_array_buffer_.

Referenced by get_data_block_pointers().

377  {
378  return string_array_buffer_;
379  }
std::vector< OptionalStringVector > * string_array_buffer_
Definition: Importer.h:530

+ Here is the caller graph for this function:

std::vector<ArrayDatum>* import_export::TypedImportBuffer::getStringArrayDictBuffer ( ) const
inline

Definition at line 381 of file Importer.h.

References string_array_dict_buffer_.

381  {
383  }
std::vector< ArrayDatum > * string_array_dict_buffer_
Definition: Importer.h:536
std::vector<std::string>* import_export::TypedImportBuffer::getStringBuffer ( ) const
inline

Definition at line 371 of file Importer.h.

References string_buffer_.

371 { return string_buffer_; }
std::vector< std::string > * string_buffer_
Definition: Importer.h:527
int8_t* import_export::TypedImportBuffer::getStringDictBuffer ( ) const
inline

Definition at line 385 of file Importer.h.

References column_desc_, ColumnDescriptor::columnType, SQLTypeInfo::get_size(), string_dict_i16_buffer_, string_dict_i32_buffer_, and string_dict_i8_buffer_.

Referenced by import_export::anonymous_namespace{Importer.cpp}::int_value_at().

385  {
386  switch (column_desc_->columnType.get_size()) {
387  case 1:
388  return reinterpret_cast<int8_t*>(string_dict_i8_buffer_->data());
389  case 2:
390  return reinterpret_cast<int8_t*>(string_dict_i16_buffer_->data());
391  case 4:
392  return reinterpret_cast<int8_t*>(string_dict_i32_buffer_->data());
393  default:
394  abort();
395  }
396  }
HOST DEVICE int get_size() const
Definition: sqltypes.h:339
std::vector< uint8_t > * string_dict_i8_buffer_
Definition: Importer.h:533
std::vector< int32_t > * string_dict_i32_buffer_
Definition: Importer.h:535
const ColumnDescriptor * column_desc_
Definition: Importer.h:538
std::vector< uint16_t > * string_dict_i16_buffer_
Definition: Importer.h:534
SQLTypeInfo columnType

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

StringDictionary* import_export::TypedImportBuffer::getStringDictionary ( ) const
inline

Definition at line 315 of file Importer.h.

References string_dict_.

315 { return string_dict_; }
StringDictionary * string_dict_
Definition: Importer.h:539
const SQLTypeInfo& import_export::TypedImportBuffer::getTypeInfo ( ) const
inline

Definition at line 311 of file Importer.h.

References column_desc_, and ColumnDescriptor::columnType.

Referenced by import_export::anonymous_namespace{Importer.cpp}::double_value_at(), import_export::anonymous_namespace{Importer.cpp}::float_value_at(), get_data_block_pointers(), and import_export::anonymous_namespace{Importer.cpp}::int_value_at().

311 { return column_desc_->columnType; }
const ColumnDescriptor * column_desc_
Definition: Importer.h:538
SQLTypeInfo columnType

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::pop_value ( )

Definition at line 739 of file Importer.cpp.

References array_buffer_, bigint_buffer_, bool_buffer_, CHECK, column_desc_, ColumnDescriptor::columnType, decimal_to_int_type(), double_buffer_, float_buffer_, geo_string_buffer_, SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), int_buffer_, SQLTypeInfo::is_decimal(), IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, smallint_buffer_, string_array_buffer_, string_buffer_, tinyint_buffer_, and run_benchmark_import::type.

739  {
740  const auto type = column_desc_->columnType.is_decimal()
742  : column_desc_->columnType.get_type();
743  switch (type) {
744  case kBOOLEAN:
745  bool_buffer_->pop_back();
746  break;
747  case kTINYINT:
748  tinyint_buffer_->pop_back();
749  break;
750  case kSMALLINT:
751  smallint_buffer_->pop_back();
752  break;
753  case kINT:
754  int_buffer_->pop_back();
755  break;
756  case kBIGINT:
757  bigint_buffer_->pop_back();
758  break;
759  case kFLOAT:
760  float_buffer_->pop_back();
761  break;
762  case kDOUBLE:
763  double_buffer_->pop_back();
764  break;
765  case kTEXT:
766  case kVARCHAR:
767  case kCHAR:
768  string_buffer_->pop_back();
769  break;
770  case kDATE:
771  case kTIME:
772  case kTIMESTAMP:
773  bigint_buffer_->pop_back();
774  break;
775  case kARRAY:
777  string_array_buffer_->pop_back();
778  } else {
779  array_buffer_->pop_back();
780  }
781  break;
782  case kPOINT:
783  case kLINESTRING:
784  case kPOLYGON:
785  case kMULTIPOLYGON:
786  geo_string_buffer_->pop_back();
787  break;
788  default:
789  CHECK(false) << "TypedImportBuffer::pop_value() does not support type " << type;
790  }
791 }
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:330
Definition: sqltypes.h:49
std::vector< std::string > * string_buffer_
Definition: Importer.h:527
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:529
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:522
std::vector< float > * float_buffer_
Definition: Importer.h:525
std::vector< double > * double_buffer_
Definition: Importer.h:526
std::vector< int32_t > * int_buffer_
Definition: Importer.h:523
std::vector< OptionalStringVector > * string_array_buffer_
Definition: Importer.h:530
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:524
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:520
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:455
Definition: sqltypes.h:52
Definition: sqltypes.h:53
Definition: sqltypes.h:41
#define IS_STRING(T)
Definition: sqltypes.h:250
const ColumnDescriptor * column_desc_
Definition: Importer.h:538
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:521
#define CHECK(condition)
Definition: Logger.h:209
Definition: sqltypes.h:45
SQLTypeInfo columnType
bool is_decimal() const
Definition: sqltypes.h:512
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:528

+ Here is the call graph for this function:

bool import_export::TypedImportBuffer::stringDictCheckpoint ( )
inline

Definition at line 398 of file Importer.h.

References StringDictionary::checkpoint(), and string_dict_.

398  {
399  if (string_dict_ == nullptr) {
400  return true;
401  }
402  return string_dict_->checkpoint();
403  }
StringDictionary * string_dict_
Definition: Importer.h:539
bool checkpoint() noexcept

+ Here is the call graph for this function:

Member Data Documentation

union { ... }
union { ... }
std::vector<ArrayDatum>* import_export::TypedImportBuffer::array_buffer_
std::vector<int64_t>* import_export::TypedImportBuffer::bigint_buffer_
std::vector<int8_t>* import_export::TypedImportBuffer::bool_buffer_
size_t import_export::TypedImportBuffer::col_idx

Definition at line 516 of file Importer.h.

Referenced by convert_arrow_val_to_import_buffer().

std::vector<double>* import_export::TypedImportBuffer::double_buffer_
std::vector<float>* import_export::TypedImportBuffer::float_buffer_
std::vector<std::string>* import_export::TypedImportBuffer::geo_string_buffer_
std::vector<std::unique_ptr<TypedImportBuffer> >* import_export::TypedImportBuffer::import_buffers

Definition at line 515 of file Importer.h.

Referenced by convert_arrow_val_to_import_buffer(), and get_data_block_pointers().

std::vector<int32_t>* import_export::TypedImportBuffer::int_buffer_
std::vector<int16_t>* import_export::TypedImportBuffer::smallint_buffer_
std::vector<OptionalStringVector>* import_export::TypedImportBuffer::string_array_buffer_
std::vector<ArrayDatum>* import_export::TypedImportBuffer::string_array_dict_buffer_
std::vector<std::string>* import_export::TypedImportBuffer::string_buffer_
StringDictionary* import_export::TypedImportBuffer::string_dict_
private
std::vector<uint16_t>* import_export::TypedImportBuffer::string_dict_i16_buffer_
std::vector<int32_t>* import_export::TypedImportBuffer::string_dict_i32_buffer_
std::vector<uint8_t>* import_export::TypedImportBuffer::string_dict_i8_buffer_
std::vector<int8_t>* import_export::TypedImportBuffer::tinyint_buffer_

The documentation for this class was generated from the following files: