OmniSciDB  fe05a0c208
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
import_export::TypedImportBuffer Class Reference

#include <Importer.h>

+ Inheritance diagram for import_export::TypedImportBuffer:
+ Collaboration diagram for import_export::TypedImportBuffer:

Public Member Functions

 TypedImportBuffer (const ColumnDescriptor *col_desc, StringDictionary *string_dict)
 
 ~TypedImportBuffer ()
 
void addBoolean (const int8_t v)
 
void addTinyint (const int8_t v)
 
void addSmallint (const int16_t v)
 
void addInt (const int32_t v)
 
void addBigint (const int64_t v)
 
void addFloat (const float v)
 
void addDouble (const double v)
 
void addString (const std::string_view v)
 
void addGeoString (const std::string_view v)
 
void addArray (const ArrayDatum &v)
 
std::vector< std::string > & addStringArray ()
 
void addStringArray (const std::vector< std::string > &arr)
 
void addDictEncodedString (const std::vector< std::string > &string_vec)
 
void addDictEncodedStringArray (const std::vector< std::vector< std::string >> &string_array_vec)
 
const SQLTypeInfogetTypeInfo () const
 
const ColumnDescriptorgetColumnDesc () const
 
StringDictionarygetStringDictionary () const
 
int8_t * getAsBytes () const
 
size_t getElementSize () const
 
std::vector< std::string > * getStringBuffer () const
 
std::vector< std::string > * getGeoStringBuffer () const
 
std::vector< ArrayDatum > * getArrayBuffer () const
 
std::vector< std::vector
< std::string > > * 
getStringArrayBuffer () const
 
std::vector< ArrayDatum > * getStringArrayDictBuffer () const
 
int8_t * getStringDictBuffer () const
 
bool stringDictCheckpoint ()
 
void clear ()
 
size_t add_values (const ColumnDescriptor *cd, const TColumn &data)
 
size_t add_arrow_values (const ColumnDescriptor *cd, const arrow::Array &data, const bool exact_type_match, const ArraySliceRange &slice_range, BadRowsTracker *bad_rows_tracker)
 
void add_value (const ColumnDescriptor *cd, const std::string_view val, const bool is_null, const CopyParams &copy_params)
 
void add_value (const ColumnDescriptor *cd, const TDatum &val, const bool is_null)
 
void pop_value ()
 
template<typename DATA_TYPE >
size_t convert_arrow_val_to_import_buffer (const ColumnDescriptor *cd, const arrow::Array &array, std::vector< DATA_TYPE > &buffer, const ArraySliceRange &slice_range, BadRowsTracker *const bad_rows_tracker)
 
template<typename DATA_TYPE >
auto del_values (std::vector< DATA_TYPE > &buffer, BadRowsTracker *const bad_rows_tracker)
 
auto del_values (const SQLTypes type, BadRowsTracker *const bad_rows_tracker)
 
template<typename DATA_TYPE >
size_t convert_arrow_val_to_import_buffer (const ColumnDescriptor *cd, const Array &array, std::vector< DATA_TYPE > &buffer, const ArraySliceRange &slice_range, import_export::BadRowsTracker *const bad_rows_tracker)
 

Static Public Member Functions

static std::vector< DataBlockPtrget_data_block_pointers (const std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers)
 

Public Attributes

std::vector< std::unique_ptr
< TypedImportBuffer > > * 
import_buffers
 
size_t col_idx
 
std::vector< int8_t > * bool_buffer_
 
std::vector< int8_t > * tinyint_buffer_
 
std::vector< int16_t > * smallint_buffer_
 
std::vector< int32_t > * int_buffer_
 
std::vector< int64_t > * bigint_buffer_
 
std::vector< float > * float_buffer_
 
std::vector< double > * double_buffer_
 
std::vector< std::string > * string_buffer_
 
std::vector< std::string > * geo_string_buffer_
 
std::vector< ArrayDatum > * array_buffer_
 
std::vector< std::vector
< std::string > > * 
string_array_buffer_
 
std::vector< uint8_t > * string_dict_i8_buffer_
 
std::vector< uint16_t > * string_dict_i16_buffer_
 
std::vector< int32_t > * string_dict_i32_buffer_
 
std::vector< ArrayDatum > * string_array_dict_buffer_
 

Private Attributes

union {
   std::vector< int8_t > *   bool_buffer_
 
   std::vector< int8_t > *   tinyint_buffer_
 
   std::vector< int16_t > *   smallint_buffer_
 
   std::vector< int32_t > *   int_buffer_
 
   std::vector< int64_t > *   bigint_buffer_
 
   std::vector< float > *   float_buffer_
 
   std::vector< double > *   double_buffer_
 
   std::vector< std::string > *   string_buffer_
 
   std::vector< std::string > *   geo_string_buffer_
 
   std::vector< ArrayDatum > *   array_buffer_
 
   std::vector< std::vector
< std::string > > *   string_array_buffer_
 
}; 
 
union {
   std::vector< uint8_t > *   string_dict_i8_buffer_
 
   std::vector< uint16_t > *   string_dict_i16_buffer_
 
   std::vector< int32_t > *   string_dict_i32_buffer_
 
   std::vector< ArrayDatum > *   string_array_dict_buffer_
 
}; 
 
const ColumnDescriptorcolumn_desc_
 
StringDictionarystring_dict_
 

Detailed Description

Definition at line 83 of file Importer.h.

Constructor & Destructor Documentation

import_export::TypedImportBuffer::TypedImportBuffer ( const ColumnDescriptor col_desc,
StringDictionary string_dict 
)
inline

Definition at line 85 of file Importer.h.

References array_buffer_, bigint_buffer_, bool_buffer_, CHECK, ColumnDescriptor::columnType, double_buffer_, float_buffer_, geo_string_buffer_, SQLTypeInfo::get_compression(), SQLTypeInfo::get_size(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), int_buffer_, IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kENCODING_DICT, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, smallint_buffer_, string_array_buffer_, string_array_dict_buffer_, string_buffer_, string_dict_i16_buffer_, string_dict_i32_buffer_, string_dict_i8_buffer_, and tinyint_buffer_.

86  : column_desc_(col_desc), string_dict_(string_dict) {
87  switch (col_desc->columnType.get_type()) {
88  case kBOOLEAN:
89  bool_buffer_ = new std::vector<int8_t>();
90  break;
91  case kTINYINT:
92  tinyint_buffer_ = new std::vector<int8_t>();
93  break;
94  case kSMALLINT:
95  smallint_buffer_ = new std::vector<int16_t>();
96  break;
97  case kINT:
98  int_buffer_ = new std::vector<int32_t>();
99  break;
100  case kBIGINT:
101  case kNUMERIC:
102  case kDECIMAL:
103  bigint_buffer_ = new std::vector<int64_t>();
104  break;
105  case kFLOAT:
106  float_buffer_ = new std::vector<float>();
107  break;
108  case kDOUBLE:
109  double_buffer_ = new std::vector<double>();
110  break;
111  case kTEXT:
112  case kVARCHAR:
113  case kCHAR:
114  string_buffer_ = new std::vector<std::string>();
115  if (col_desc->columnType.get_compression() == kENCODING_DICT) {
116  switch (col_desc->columnType.get_size()) {
117  case 1:
118  string_dict_i8_buffer_ = new std::vector<uint8_t>();
119  break;
120  case 2:
121  string_dict_i16_buffer_ = new std::vector<uint16_t>();
122  break;
123  case 4:
124  string_dict_i32_buffer_ = new std::vector<int32_t>();
125  break;
126  default:
127  CHECK(false);
128  }
129  }
130  break;
131  case kDATE:
132  case kTIME:
133  case kTIMESTAMP:
134  bigint_buffer_ = new std::vector<int64_t>();
135  break;
136  case kARRAY:
137  if (IS_STRING(col_desc->columnType.get_subtype())) {
139  string_array_buffer_ = new std::vector<std::vector<std::string>>();
140  string_array_dict_buffer_ = new std::vector<ArrayDatum>();
141  } else {
142  array_buffer_ = new std::vector<ArrayDatum>();
143  }
144  break;
145  case kPOINT:
146  case kLINESTRING:
147  case kPOLYGON:
148  case kMULTIPOLYGON:
149  geo_string_buffer_ = new std::vector<std::string>();
150  break;
151  default:
152  CHECK(false);
153  }
154  }
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:315
HOST DEVICE int get_size() const
Definition: sqltypes.h:324
Definition: sqltypes.h:48
std::vector< std::string > * string_buffer_
Definition: Importer.h:494
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:496
StringDictionary * string_dict_
Definition: Importer.h:506
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:489
std::vector< float > * float_buffer_
Definition: Importer.h:492
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:314
std::vector< double > * double_buffer_
Definition: Importer.h:493
std::vector< int32_t > * int_buffer_
Definition: Importer.h:490
std::vector< ArrayDatum > * string_array_dict_buffer_
Definition: Importer.h:503
std::vector< uint8_t > * string_dict_i8_buffer_
Definition: Importer.h:500
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:491
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:487
std::vector< std::vector< std::string > > * string_array_buffer_
Definition: Importer.h:497
Definition: sqltypes.h:51
Definition: sqltypes.h:52
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:322
std::vector< int32_t > * string_dict_i32_buffer_
Definition: Importer.h:502
Definition: sqltypes.h:40
#define IS_STRING(T)
Definition: sqltypes.h:244
const ColumnDescriptor * column_desc_
Definition: Importer.h:505
std::vector< uint16_t > * string_dict_i16_buffer_
Definition: Importer.h:501
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:488
#define CHECK(condition)
Definition: Logger.h:203
Definition: sqltypes.h:44
SQLTypeInfo columnType
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:495

+ Here is the call graph for this function:

import_export::TypedImportBuffer::~TypedImportBuffer ( )
inline

Definition at line 156 of file Importer.h.

References array_buffer_, bigint_buffer_, bool_buffer_, CHECK, column_desc_, ColumnDescriptor::columnType, double_buffer_, float_buffer_, geo_string_buffer_, SQLTypeInfo::get_compression(), SQLTypeInfo::get_size(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), int_buffer_, IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kENCODING_DICT, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, smallint_buffer_, string_array_buffer_, string_array_dict_buffer_, string_buffer_, string_dict_i16_buffer_, string_dict_i32_buffer_, string_dict_i8_buffer_, and tinyint_buffer_.

156  {
157  switch (column_desc_->columnType.get_type()) {
158  case kBOOLEAN:
159  delete bool_buffer_;
160  break;
161  case kTINYINT:
162  delete tinyint_buffer_;
163  break;
164  case kSMALLINT:
165  delete smallint_buffer_;
166  break;
167  case kINT:
168  delete int_buffer_;
169  break;
170  case kBIGINT:
171  case kNUMERIC:
172  case kDECIMAL:
173  delete bigint_buffer_;
174  break;
175  case kFLOAT:
176  delete float_buffer_;
177  break;
178  case kDOUBLE:
179  delete double_buffer_;
180  break;
181  case kTEXT:
182  case kVARCHAR:
183  case kCHAR:
184  delete string_buffer_;
186  switch (column_desc_->columnType.get_size()) {
187  case 1:
188  delete string_dict_i8_buffer_;
189  break;
190  case 2:
192  break;
193  case 4:
195  break;
196  }
197  }
198  break;
199  case kDATE:
200  case kTIME:
201  case kTIMESTAMP:
202  delete bigint_buffer_;
203  break;
204  case kARRAY:
206  delete string_array_buffer_;
208  } else {
209  delete array_buffer_;
210  }
211  break;
212  case kPOINT:
213  case kLINESTRING:
214  case kPOLYGON:
215  case kMULTIPOLYGON:
216  delete geo_string_buffer_;
217  break;
218  default:
219  CHECK(false);
220  }
221  }
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:315
HOST DEVICE int get_size() const
Definition: sqltypes.h:324
Definition: sqltypes.h:48
std::vector< std::string > * string_buffer_
Definition: Importer.h:494
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:496
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:489
std::vector< float > * float_buffer_
Definition: Importer.h:492
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:314
std::vector< double > * double_buffer_
Definition: Importer.h:493
std::vector< int32_t > * int_buffer_
Definition: Importer.h:490
std::vector< ArrayDatum > * string_array_dict_buffer_
Definition: Importer.h:503
std::vector< uint8_t > * string_dict_i8_buffer_
Definition: Importer.h:500
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:491
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:487
std::vector< std::vector< std::string > > * string_array_buffer_
Definition: Importer.h:497
Definition: sqltypes.h:51
Definition: sqltypes.h:52
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:322
std::vector< int32_t > * string_dict_i32_buffer_
Definition: Importer.h:502
Definition: sqltypes.h:40
#define IS_STRING(T)
Definition: sqltypes.h:244
const ColumnDescriptor * column_desc_
Definition: Importer.h:505
std::vector< uint16_t > * string_dict_i16_buffer_
Definition: Importer.h:501
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:488
#define CHECK(condition)
Definition: Logger.h:203
Definition: sqltypes.h:44
SQLTypeInfo columnType
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:495

+ Here is the call graph for this function:

Member Function Documentation

size_t import_export::TypedImportBuffer::add_arrow_values ( const ColumnDescriptor cd,
const arrow::Array &  data,
const bool  exact_type_match,
const ArraySliceRange slice_range,
BadRowsTracker bad_rows_tracker 
)

Definition at line 880 of file Importer.cpp.

References arrow_throw_if(), bigint_buffer_, bool_buffer_, ColumnDescriptor::columnName, ColumnDescriptor::columnType, convert_arrow_val_to_import_buffer(), DOUBLE, double_buffer_, FLOAT, float_buffer_, geo_string_buffer_, SQLTypeInfo::get_notnull(), SQLTypeInfo::get_type(), int_buffer_, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, smallint_buffer_, STRING, string_buffer_, TIMESTAMP, tinyint_buffer_, and run_benchmark_import::type.

884  {
885  const auto type = cd->columnType.get_type();
886  if (cd->columnType.get_notnull()) {
887  // We can't have any null values for this column; to have them is an error
888  arrow_throw_if(col.null_count() > 0, "NULL not allowed for column " + cd->columnName);
889  }
890 
891  switch (type) {
892  case kBOOLEAN:
893  if (exact_type_match) {
894  arrow_throw_if(col.type_id() != Type::BOOL, "Expected boolean type");
895  }
897  cd, col, *bool_buffer_, slice_range, bad_rows_tracker);
898  case kTINYINT:
899  if (exact_type_match) {
900  arrow_throw_if(col.type_id() != Type::INT8, "Expected int8 type");
901  }
903  cd, col, *tinyint_buffer_, slice_range, bad_rows_tracker);
904  case kSMALLINT:
905  if (exact_type_match) {
906  arrow_throw_if(col.type_id() != Type::INT16, "Expected int16 type");
907  }
909  cd, col, *smallint_buffer_, slice_range, bad_rows_tracker);
910  case kINT:
911  if (exact_type_match) {
912  arrow_throw_if(col.type_id() != Type::INT32, "Expected int32 type");
913  }
915  cd, col, *int_buffer_, slice_range, bad_rows_tracker);
916  case kBIGINT:
917  case kNUMERIC:
918  case kDECIMAL:
919  if (exact_type_match) {
920  arrow_throw_if(col.type_id() != Type::INT64, "Expected int64 type");
921  }
923  cd, col, *bigint_buffer_, slice_range, bad_rows_tracker);
924  case kFLOAT:
925  if (exact_type_match) {
926  arrow_throw_if(col.type_id() != Type::FLOAT, "Expected float type");
927  }
929  cd, col, *float_buffer_, slice_range, bad_rows_tracker);
930  case kDOUBLE:
931  if (exact_type_match) {
932  arrow_throw_if(col.type_id() != Type::DOUBLE, "Expected double type");
933  }
935  cd, col, *double_buffer_, slice_range, bad_rows_tracker);
936  case kTEXT:
937  case kVARCHAR:
938  case kCHAR:
939  if (exact_type_match) {
940  arrow_throw_if(col.type_id() != Type::BINARY && col.type_id() != Type::STRING,
941  "Expected string type");
942  }
944  cd, col, *string_buffer_, slice_range, bad_rows_tracker);
945  case kTIME:
946  if (exact_type_match) {
947  arrow_throw_if(col.type_id() != Type::TIME32 && col.type_id() != Type::TIME64,
948  "Expected time32 or time64 type");
949  }
951  cd, col, *bigint_buffer_, slice_range, bad_rows_tracker);
952  case kTIMESTAMP:
953  if (exact_type_match) {
954  arrow_throw_if(col.type_id() != Type::TIMESTAMP, "Expected timestamp type");
955  }
957  cd, col, *bigint_buffer_, slice_range, bad_rows_tracker);
958  case kDATE:
959  if (exact_type_match) {
960  arrow_throw_if(col.type_id() != Type::DATE32 && col.type_id() != Type::DATE64,
961  "Expected date32 or date64 type");
962  }
964  cd, col, *bigint_buffer_, slice_range, bad_rows_tracker);
965  case kPOINT:
966  case kLINESTRING:
967  case kPOLYGON:
968  case kMULTIPOLYGON:
969  arrow_throw_if(col.type_id() != Type::BINARY && col.type_id() != Type::STRING,
970  "Expected string type");
972  cd, col, *geo_string_buffer_, slice_range, bad_rows_tracker);
973  case kARRAY:
974  throw std::runtime_error("Arrow array appends not yet supported");
975  default:
976  throw std::runtime_error("Invalid Type");
977  }
978 }
Definition: sqltypes.h:48
std::vector< std::string > * string_buffer_
Definition: Importer.h:494
#define DOUBLE
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:489
std::vector< float > * float_buffer_
Definition: Importer.h:492
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:314
std::vector< double > * double_buffer_
Definition: Importer.h:493
std::vector< int32_t > * int_buffer_
Definition: Importer.h:490
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:491
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:487
Definition: sqltypes.h:51
Definition: sqltypes.h:52
#define TIMESTAMP
Definition: sqltypes.h:40
#define STRING
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:488
size_t convert_arrow_val_to_import_buffer(const ColumnDescriptor *cd, const arrow::Array &array, std::vector< DATA_TYPE > &buffer, const ArraySliceRange &slice_range, BadRowsTracker *const bad_rows_tracker)
Definition: sqltypes.h:44
SQLTypeInfo columnType
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:321
#define FLOAT
std::string columnName
void arrow_throw_if(const bool cond, const std::string &message)
Definition: ArrowImporter.h:41
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:495

+ Here is the call graph for this function:

void import_export::TypedImportBuffer::add_value ( const ColumnDescriptor cd,
const std::string_view  val,
const bool  is_null,
const CopyParams copy_params 
)

Definition at line 532 of file Importer.cpp.

References addArray(), addBigint(), addBoolean(), addDouble(), addFloat(), addGeoString(), addInt(), addSmallint(), addString(), addStringArray(), addTinyint(), Datum::bigintval, Datum::boolval, CHECK, ColumnDescriptor::columnName, ColumnDescriptor::columnType, convert_decimal_value_to_scale(), test_fsi::d, SQLTypeInfo::get_elem_type(), SQLTypeInfo::get_notnull(), SQLTypeInfo::get_size(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), inline_fixed_encoding_null_val(), Datum::intval, IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, StringDictionary::MAX_STRLEN, NULL_DOUBLE, NULL_FLOAT, import_export::NullArray(), import_export::delimited_parser::parse_string_array(), Datum::smallintval, import_export::StringToArray(), StringToDatum(), Datum::tinyintval, to_string(), and run_benchmark_import::type.

535  {
536  const auto type = cd->columnType.get_type();
537  switch (type) {
538  case kBOOLEAN: {
539  if (is_null) {
540  if (cd->columnType.get_notnull()) {
541  throw std::runtime_error("NULL for column " + cd->columnName);
542  }
544  } else {
545  auto ti = cd->columnType;
546  Datum d = StringToDatum(val, ti);
547  addBoolean(static_cast<int8_t>(d.boolval));
548  }
549  break;
550  }
551  case kTINYINT: {
552  if (!is_null && (isdigit(val[0]) || val[0] == '-')) {
553  auto ti = cd->columnType;
554  Datum d = StringToDatum(val, ti);
556  } else {
557  if (cd->columnType.get_notnull()) {
558  throw std::runtime_error("NULL for column " + cd->columnName);
559  }
561  }
562  break;
563  }
564  case kSMALLINT: {
565  if (!is_null && (isdigit(val[0]) || val[0] == '-')) {
566  auto ti = cd->columnType;
567  Datum d = StringToDatum(val, ti);
569  } else {
570  if (cd->columnType.get_notnull()) {
571  throw std::runtime_error("NULL for column " + cd->columnName);
572  }
574  }
575  break;
576  }
577  case kINT: {
578  if (!is_null && (isdigit(val[0]) || val[0] == '-')) {
579  auto ti = cd->columnType;
580  Datum d = StringToDatum(val, ti);
581  addInt(d.intval);
582  } else {
583  if (cd->columnType.get_notnull()) {
584  throw std::runtime_error("NULL for column " + cd->columnName);
585  }
587  }
588  break;
589  }
590  case kBIGINT: {
591  if (!is_null && (isdigit(val[0]) || val[0] == '-')) {
592  auto ti = cd->columnType;
593  Datum d = StringToDatum(val, ti);
594  addBigint(d.bigintval);
595  } else {
596  if (cd->columnType.get_notnull()) {
597  throw std::runtime_error("NULL for column " + cd->columnName);
598  }
600  }
601  break;
602  }
603  case kDECIMAL:
604  case kNUMERIC: {
605  if (!is_null) {
606  SQLTypeInfo ti(kNUMERIC, 0, 0, false);
607  Datum d = StringToDatum(val, ti);
608  const auto converted_decimal_value =
610  addBigint(converted_decimal_value);
611  } else {
612  if (cd->columnType.get_notnull()) {
613  throw std::runtime_error("NULL for column " + cd->columnName);
614  }
616  }
617  break;
618  }
619  case kFLOAT:
620  if (!is_null && (val[0] == '.' || isdigit(val[0]) || val[0] == '-')) {
621  addFloat(static_cast<float>(std::atof(std::string(val).c_str())));
622  } else {
623  if (cd->columnType.get_notnull()) {
624  throw std::runtime_error("NULL for column " + cd->columnName);
625  }
627  }
628  break;
629  case kDOUBLE:
630  if (!is_null && (val[0] == '.' || isdigit(val[0]) || val[0] == '-')) {
631  addDouble(std::atof(std::string(val).c_str()));
632  } else {
633  if (cd->columnType.get_notnull()) {
634  throw std::runtime_error("NULL for column " + cd->columnName);
635  }
637  }
638  break;
639  case kTEXT:
640  case kVARCHAR:
641  case kCHAR: {
642  // @TODO(wei) for now, use empty string for nulls
643  if (is_null) {
644  if (cd->columnType.get_notnull()) {
645  throw std::runtime_error("NULL for column " + cd->columnName);
646  }
647  addString(std::string());
648  } else {
649  if (val.length() > StringDictionary::MAX_STRLEN) {
650  throw std::runtime_error("String too long for column " + cd->columnName +
651  " was " + std::to_string(val.length()) + " max is " +
653  }
654  addString(val);
655  }
656  break;
657  }
658  case kTIME:
659  case kTIMESTAMP:
660  case kDATE:
661  if (!is_null && (isdigit(val[0]) || val[0] == '-')) {
662  SQLTypeInfo ti = cd->columnType;
663  Datum d = StringToDatum(val, ti);
664  addBigint(d.bigintval);
665  } else {
666  if (cd->columnType.get_notnull()) {
667  throw std::runtime_error("NULL for column " + cd->columnName);
668  }
670  }
671  break;
672  case kARRAY: {
673  if (is_null && cd->columnType.get_notnull()) {
674  throw std::runtime_error("NULL for column " + cd->columnName);
675  }
676  SQLTypeInfo ti = cd->columnType;
677  if (IS_STRING(ti.get_subtype())) {
678  std::vector<std::string> string_vec;
679  // Just parse string array, don't push it to buffer yet as we might throw
681  std::string(val), copy_params, string_vec);
682  if (!is_null) {
683  // TODO: add support for NULL string arrays
684  if (ti.get_size() > 0) {
685  auto sti = ti.get_elem_type();
686  size_t expected_size = ti.get_size() / sti.get_size();
687  size_t actual_size = string_vec.size();
688  if (actual_size != expected_size) {
689  throw std::runtime_error("Fixed length array column " + cd->columnName +
690  " expects " + std::to_string(expected_size) +
691  " values, received " +
692  std::to_string(actual_size));
693  }
694  }
695  addStringArray(string_vec);
696  } else {
697  if (ti.get_size() > 0) {
698  // TODO: remove once NULL fixlen arrays are allowed
699  throw std::runtime_error("Fixed length array column " + cd->columnName +
700  " currently cannot accept NULL arrays");
701  }
702  // TODO: add support for NULL string arrays, replace with addStringArray(),
703  // for now add whatever parseStringArray() outputs for NULLs ("NULL")
704  addStringArray(string_vec);
705  }
706  } else {
707  if (!is_null) {
708  ArrayDatum d = StringToArray(std::string(val), ti, copy_params);
709  if (d.is_null) { // val could be "NULL"
710  addArray(NullArray(ti));
711  } else {
712  if (ti.get_size() > 0 && static_cast<size_t>(ti.get_size()) != d.length) {
713  throw std::runtime_error("Fixed length array for column " + cd->columnName +
714  " has incorrect length: " + std::string(val));
715  }
716  addArray(d);
717  }
718  } else {
719  addArray(NullArray(ti));
720  }
721  }
722  break;
723  }
724  case kPOINT:
725  case kLINESTRING:
726  case kPOLYGON:
727  case kMULTIPOLYGON:
728  addGeoString(val);
729  break;
730  default:
731  CHECK(false) << "TypedImportBuffer::add_value() does not support type " << type;
732  }
733 }
int8_t tinyintval
Definition: sqltypes.h:206
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:315
#define NULL_DOUBLE
HOST DEVICE int get_size() const
Definition: sqltypes.h:324
void addBigint(const int64_t v)
Definition: Importer.h:231
void addSmallint(const int16_t v)
Definition: Importer.h:227
Definition: sqltypes.h:48
tuple d
Definition: test_fsi.py:9
#define NULL_FLOAT
void addString(const std::string_view v)
Definition: Importer.h:237
bool boolval
Definition: sqltypes.h:205
void addDouble(const double v)
Definition: Importer.h:235
ArrayDatum NullArray(const SQLTypeInfo &ti)
Definition: Importer.cpp:389
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:314
int32_t intval
Definition: sqltypes.h:208
std::string to_string(char const *&&v)
void addFloat(const float v)
Definition: Importer.h:233
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:202
void addGeoString(const std::string_view v)
Definition: Importer.h:239
CONSTEXPR DEVICE bool is_null(const T &value)
void addBoolean(const int8_t v)
Definition: Importer.h:223
void addTinyint(const int8_t v)
Definition: Importer.h:225
int64_t bigintval
Definition: sqltypes.h:209
void addInt(const int32_t v)
Definition: Importer.h:229
int16_t smallintval
Definition: sqltypes.h:207
Datum StringToDatum(std::string_view s, SQLTypeInfo &ti)
Definition: Datum.cpp:239
Definition: sqltypes.h:51
Definition: sqltypes.h:52
std::vector< std::string > & addStringArray()
Definition: Importer.h:243
ArrayDatum StringToArray(const std::string &s, const SQLTypeInfo &ti, const CopyParams &copy_params)
Definition: Importer.cpp:336
int64_t convert_decimal_value_to_scale(const int64_t decimal_value, const SQLTypeInfo &type_info, const SQLTypeInfo &new_type_info)
Definition: Datum.cpp:437
void addArray(const ArrayDatum &v)
Definition: Importer.h:241
Definition: sqltypes.h:40
#define IS_STRING(T)
Definition: sqltypes.h:244
#define CHECK(condition)
Definition: Logger.h:203
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
static constexpr size_t MAX_STRLEN
Definition: sqltypes.h:44
SQLTypeInfo columnType
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:321
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:713
std::string columnName
void parse_string_array(const std::string &s, const import_export::CopyParams &copy_params, std::vector< std::string > &string_vec)
Parses given string array and inserts into given vector of strings.

+ Here is the call graph for this function:

void import_export::TypedImportBuffer::add_value ( const ColumnDescriptor cd,
const TDatum &  val,
const bool  is_null 
)

Definition at line 1302 of file Importer.cpp.

References addArray(), addBigint(), import_export::addBinaryStringArray(), addBoolean(), addDouble(), addFloat(), addGeoString(), addInt(), addSmallint(), addString(), addStringArray(), addTinyint(), CHECK, ColumnDescriptor::columnName, ColumnDescriptor::columnType, decimal_to_int_type(), SQLTypeInfo::get_notnull(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), inline_fixed_encoding_null_val(), SQLTypeInfo::is_decimal(), IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, NULL_DOUBLE, NULL_FLOAT, import_export::NullArray(), import_export::TDatumToArrayDatum(), and run_benchmark_import::type.

1304  {
1305  const auto type = cd->columnType.is_decimal() ? decimal_to_int_type(cd->columnType)
1306  : cd->columnType.get_type();
1307  switch (type) {
1308  case kBOOLEAN: {
1309  if (is_null) {
1310  if (cd->columnType.get_notnull()) {
1311  throw std::runtime_error("NULL for column " + cd->columnName);
1312  }
1314  } else {
1315  addBoolean((int8_t)datum.val.int_val);
1316  }
1317  break;
1318  }
1319  case kTINYINT:
1320  if (!is_null) {
1321  addTinyint((int8_t)datum.val.int_val);
1322  } else {
1323  if (cd->columnType.get_notnull()) {
1324  throw std::runtime_error("NULL for column " + cd->columnName);
1325  }
1327  }
1328  break;
1329  case kSMALLINT:
1330  if (!is_null) {
1331  addSmallint((int16_t)datum.val.int_val);
1332  } else {
1333  if (cd->columnType.get_notnull()) {
1334  throw std::runtime_error("NULL for column " + cd->columnName);
1335  }
1337  }
1338  break;
1339  case kINT:
1340  if (!is_null) {
1341  addInt((int32_t)datum.val.int_val);
1342  } else {
1343  if (cd->columnType.get_notnull()) {
1344  throw std::runtime_error("NULL for column " + cd->columnName);
1345  }
1347  }
1348  break;
1349  case kBIGINT:
1350  if (!is_null) {
1351  addBigint(datum.val.int_val);
1352  } else {
1353  if (cd->columnType.get_notnull()) {
1354  throw std::runtime_error("NULL for column " + cd->columnName);
1355  }
1357  }
1358  break;
1359  case kFLOAT:
1360  if (!is_null) {
1361  addFloat((float)datum.val.real_val);
1362  } else {
1363  if (cd->columnType.get_notnull()) {
1364  throw std::runtime_error("NULL for column " + cd->columnName);
1365  }
1367  }
1368  break;
1369  case kDOUBLE:
1370  if (!is_null) {
1371  addDouble(datum.val.real_val);
1372  } else {
1373  if (cd->columnType.get_notnull()) {
1374  throw std::runtime_error("NULL for column " + cd->columnName);
1375  }
1377  }
1378  break;
1379  case kTEXT:
1380  case kVARCHAR:
1381  case kCHAR: {
1382  // @TODO(wei) for now, use empty string for nulls
1383  if (is_null) {
1384  if (cd->columnType.get_notnull()) {
1385  throw std::runtime_error("NULL for column " + cd->columnName);
1386  }
1387  addString(std::string());
1388  } else {
1389  addString(datum.val.str_val);
1390  }
1391  break;
1392  }
1393  case kTIME:
1394  case kTIMESTAMP:
1395  case kDATE: {
1396  if (!is_null) {
1397  addBigint(datum.val.int_val);
1398  } else {
1399  if (cd->columnType.get_notnull()) {
1400  throw std::runtime_error("NULL for column " + cd->columnName);
1401  }
1403  }
1404  break;
1405  }
1406  case kARRAY:
1407  if (is_null && cd->columnType.get_notnull()) {
1408  throw std::runtime_error("NULL for column " + cd->columnName);
1409  }
1410  if (IS_STRING(cd->columnType.get_subtype())) {
1411  std::vector<std::string>& string_vec = addStringArray();
1412  addBinaryStringArray(datum, string_vec);
1413  } else {
1414  if (!is_null) {
1415  addArray(TDatumToArrayDatum(datum, cd->columnType));
1416  } else {
1418  }
1419  }
1420  break;
1421  case kPOINT:
1422  case kLINESTRING:
1423  case kPOLYGON:
1424  case kMULTIPOLYGON:
1425  if (is_null) {
1426  if (cd->columnType.get_notnull()) {
1427  throw std::runtime_error("NULL for column " + cd->columnName);
1428  }
1429  addGeoString(std::string());
1430  } else {
1431  addGeoString(datum.val.str_val);
1432  }
1433  break;
1434  default:
1435  CHECK(false) << "TypedImportBuffer::add_value() does not support type " << type;
1436  }
1437 }
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:315
#define NULL_DOUBLE
void addBigint(const int64_t v)
Definition: Importer.h:231
void addSmallint(const int16_t v)
Definition: Importer.h:227
Definition: sqltypes.h:48
#define NULL_FLOAT
void addString(const std::string_view v)
Definition: Importer.h:237
void addDouble(const double v)
Definition: Importer.h:235
ArrayDatum NullArray(const SQLTypeInfo &ti)
Definition: Importer.cpp:389
void addFloat(const float v)
Definition: Importer.h:233
void addGeoString(const std::string_view v)
Definition: Importer.h:239
ArrayDatum TDatumToArrayDatum(const TDatum &datum, const SQLTypeInfo &ti)
Definition: Importer.cpp:473
CONSTEXPR DEVICE bool is_null(const T &value)
void addBoolean(const int8_t v)
Definition: Importer.h:223
void addTinyint(const int8_t v)
Definition: Importer.h:225
void addInt(const int32_t v)
Definition: Importer.h:229
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:419
Definition: sqltypes.h:51
Definition: sqltypes.h:52
std::vector< std::string > & addStringArray()
Definition: Importer.h:243
void addArray(const ArrayDatum &v)
Definition: Importer.h:241
Definition: sqltypes.h:40
#define IS_STRING(T)
Definition: sqltypes.h:244
#define CHECK(condition)
Definition: Logger.h:203
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
Definition: sqltypes.h:44
SQLTypeInfo columnType
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:321
bool is_decimal() const
Definition: sqltypes.h:492
std::string columnName
void addBinaryStringArray(const TDatum &datum, std::vector< std::string > &string_vec)
Definition: Importer.cpp:421

+ Here is the call graph for this function:

size_t import_export::TypedImportBuffer::add_values ( const ColumnDescriptor cd,
const TColumn &  data 
)

Definition at line 981 of file Importer.cpp.

References addArray(), addStringArray(), bigint_buffer_, bool_buffer_, checked_malloc(), ColumnDescriptor::columnName, ColumnDescriptor::columnType, double_buffer_, float_buffer_, geo_string_buffer_, SQLTypeInfo::get_notnull(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), i, inline_fixed_encoding_null_val(), int_buffer_, IS_STRING, generate_TableFunctionsFactory_init::j, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, NULL_DOUBLE, NULL_FLOAT, import_export::NullArray(), smallint_buffer_, string_buffer_, and tinyint_buffer_.

981  {
982  size_t dataSize = 0;
983  if (cd->columnType.get_notnull()) {
984  // We can't have any null values for this column; to have them is an error
985  if (std::any_of(col.nulls.begin(), col.nulls.end(), [](int i) { return i != 0; })) {
986  throw std::runtime_error("NULL for column " + cd->columnName);
987  }
988  }
989 
990  switch (cd->columnType.get_type()) {
991  case kBOOLEAN: {
992  dataSize = col.data.int_col.size();
993  bool_buffer_->reserve(dataSize);
994  for (size_t i = 0; i < dataSize; i++) {
995  if (col.nulls[i]) {
997  } else {
998  bool_buffer_->push_back((int8_t)col.data.int_col[i]);
999  }
1000  }
1001  break;
1002  }
1003  case kTINYINT: {
1004  dataSize = col.data.int_col.size();
1005  tinyint_buffer_->reserve(dataSize);
1006  for (size_t i = 0; i < dataSize; i++) {
1007  if (col.nulls[i]) {
1009  } else {
1010  tinyint_buffer_->push_back((int8_t)col.data.int_col[i]);
1011  }
1012  }
1013  break;
1014  }
1015  case kSMALLINT: {
1016  dataSize = col.data.int_col.size();
1017  smallint_buffer_->reserve(dataSize);
1018  for (size_t i = 0; i < dataSize; i++) {
1019  if (col.nulls[i]) {
1021  } else {
1022  smallint_buffer_->push_back((int16_t)col.data.int_col[i]);
1023  }
1024  }
1025  break;
1026  }
1027  case kINT: {
1028  dataSize = col.data.int_col.size();
1029  int_buffer_->reserve(dataSize);
1030  for (size_t i = 0; i < dataSize; i++) {
1031  if (col.nulls[i]) {
1033  } else {
1034  int_buffer_->push_back((int32_t)col.data.int_col[i]);
1035  }
1036  }
1037  break;
1038  }
1039  case kBIGINT:
1040  case kNUMERIC:
1041  case kDECIMAL: {
1042  dataSize = col.data.int_col.size();
1043  bigint_buffer_->reserve(dataSize);
1044  for (size_t i = 0; i < dataSize; i++) {
1045  if (col.nulls[i]) {
1047  } else {
1048  bigint_buffer_->push_back((int64_t)col.data.int_col[i]);
1049  }
1050  }
1051  break;
1052  }
1053  case kFLOAT: {
1054  dataSize = col.data.real_col.size();
1055  float_buffer_->reserve(dataSize);
1056  for (size_t i = 0; i < dataSize; i++) {
1057  if (col.nulls[i]) {
1058  float_buffer_->push_back(NULL_FLOAT);
1059  } else {
1060  float_buffer_->push_back((float)col.data.real_col[i]);
1061  }
1062  }
1063  break;
1064  }
1065  case kDOUBLE: {
1066  dataSize = col.data.real_col.size();
1067  double_buffer_->reserve(dataSize);
1068  for (size_t i = 0; i < dataSize; i++) {
1069  if (col.nulls[i]) {
1070  double_buffer_->push_back(NULL_DOUBLE);
1071  } else {
1072  double_buffer_->push_back((double)col.data.real_col[i]);
1073  }
1074  }
1075  break;
1076  }
1077  case kTEXT:
1078  case kVARCHAR:
1079  case kCHAR: {
1080  // TODO: for now, use empty string for nulls
1081  dataSize = col.data.str_col.size();
1082  string_buffer_->reserve(dataSize);
1083  for (size_t i = 0; i < dataSize; i++) {
1084  if (col.nulls[i]) {
1085  string_buffer_->push_back(std::string());
1086  } else {
1087  string_buffer_->push_back(col.data.str_col[i]);
1088  }
1089  }
1090  break;
1091  }
1092  case kTIME:
1093  case kTIMESTAMP:
1094  case kDATE: {
1095  dataSize = col.data.int_col.size();
1096  bigint_buffer_->reserve(dataSize);
1097  for (size_t i = 0; i < dataSize; i++) {
1098  if (col.nulls[i]) {
1100  } else {
1101  bigint_buffer_->push_back(static_cast<int64_t>(col.data.int_col[i]));
1102  }
1103  }
1104  break;
1105  }
1106  case kPOINT:
1107  case kLINESTRING:
1108  case kPOLYGON:
1109  case kMULTIPOLYGON: {
1110  dataSize = col.data.str_col.size();
1111  geo_string_buffer_->reserve(dataSize);
1112  for (size_t i = 0; i < dataSize; i++) {
1113  if (col.nulls[i]) {
1114  // TODO: add support for NULL geo
1115  geo_string_buffer_->push_back(std::string());
1116  } else {
1117  geo_string_buffer_->push_back(col.data.str_col[i]);
1118  }
1119  }
1120  break;
1121  }
1122  case kARRAY: {
1123  dataSize = col.data.arr_col.size();
1124  if (IS_STRING(cd->columnType.get_subtype())) {
1125  for (size_t i = 0; i < dataSize; i++) {
1126  std::vector<std::string>& string_vec = addStringArray();
1127  if (!col.nulls[i]) {
1128  size_t stringArrSize = col.data.arr_col[i].data.str_col.size();
1129  for (size_t str_idx = 0; str_idx != stringArrSize; ++str_idx) {
1130  string_vec.push_back(col.data.arr_col[i].data.str_col[str_idx]);
1131  }
1132  }
1133  }
1134  } else {
1135  auto elem_ti = cd->columnType.get_subtype();
1136  switch (elem_ti) {
1137  case kBOOLEAN: {
1138  for (size_t i = 0; i < dataSize; i++) {
1139  if (col.nulls[i]) {
1141  } else {
1142  size_t len = col.data.arr_col[i].data.int_col.size();
1143  size_t byteSize = len * sizeof(int8_t);
1144  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1145  int8_t* p = buf;
1146  for (size_t j = 0; j < len; ++j) {
1147  *(bool*)p = static_cast<bool>(col.data.arr_col[i].data.int_col[j]);
1148  p += sizeof(bool);
1149  }
1150  addArray(ArrayDatum(byteSize, buf, false));
1151  }
1152  }
1153  break;
1154  }
1155  case kTINYINT: {
1156  for (size_t i = 0; i < dataSize; i++) {
1157  if (col.nulls[i]) {
1159  } else {
1160  size_t len = col.data.arr_col[i].data.int_col.size();
1161  size_t byteSize = len * sizeof(int8_t);
1162  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1163  int8_t* p = buf;
1164  for (size_t j = 0; j < len; ++j) {
1165  *(int8_t*)p = static_cast<int8_t>(col.data.arr_col[i].data.int_col[j]);
1166  p += sizeof(int8_t);
1167  }
1168  addArray(ArrayDatum(byteSize, buf, false));
1169  }
1170  }
1171  break;
1172  }
1173  case kSMALLINT: {
1174  for (size_t i = 0; i < dataSize; i++) {
1175  if (col.nulls[i]) {
1177  } else {
1178  size_t len = col.data.arr_col[i].data.int_col.size();
1179  size_t byteSize = len * sizeof(int16_t);
1180  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1181  int8_t* p = buf;
1182  for (size_t j = 0; j < len; ++j) {
1183  *(int16_t*)p =
1184  static_cast<int16_t>(col.data.arr_col[i].data.int_col[j]);
1185  p += sizeof(int16_t);
1186  }
1187  addArray(ArrayDatum(byteSize, buf, false));
1188  }
1189  }
1190  break;
1191  }
1192  case kINT: {
1193  for (size_t i = 0; i < dataSize; i++) {
1194  if (col.nulls[i]) {
1196  } else {
1197  size_t len = col.data.arr_col[i].data.int_col.size();
1198  size_t byteSize = len * sizeof(int32_t);
1199  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1200  int8_t* p = buf;
1201  for (size_t j = 0; j < len; ++j) {
1202  *(int32_t*)p =
1203  static_cast<int32_t>(col.data.arr_col[i].data.int_col[j]);
1204  p += sizeof(int32_t);
1205  }
1206  addArray(ArrayDatum(byteSize, buf, false));
1207  }
1208  }
1209  break;
1210  }
1211  case kBIGINT:
1212  case kNUMERIC:
1213  case kDECIMAL: {
1214  for (size_t i = 0; i < dataSize; i++) {
1215  if (col.nulls[i]) {
1217  } else {
1218  size_t len = col.data.arr_col[i].data.int_col.size();
1219  size_t byteSize = len * sizeof(int64_t);
1220  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1221  int8_t* p = buf;
1222  for (size_t j = 0; j < len; ++j) {
1223  *(int64_t*)p =
1224  static_cast<int64_t>(col.data.arr_col[i].data.int_col[j]);
1225  p += sizeof(int64_t);
1226  }
1227  addArray(ArrayDatum(byteSize, buf, false));
1228  }
1229  }
1230  break;
1231  }
1232  case kFLOAT: {
1233  for (size_t i = 0; i < dataSize; i++) {
1234  if (col.nulls[i]) {
1236  } else {
1237  size_t len = col.data.arr_col[i].data.real_col.size();
1238  size_t byteSize = len * sizeof(float);
1239  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1240  int8_t* p = buf;
1241  for (size_t j = 0; j < len; ++j) {
1242  *(float*)p = static_cast<float>(col.data.arr_col[i].data.real_col[j]);
1243  p += sizeof(float);
1244  }
1245  addArray(ArrayDatum(byteSize, buf, false));
1246  }
1247  }
1248  break;
1249  }
1250  case kDOUBLE: {
1251  for (size_t i = 0; i < dataSize; i++) {
1252  if (col.nulls[i]) {
1254  } else {
1255  size_t len = col.data.arr_col[i].data.real_col.size();
1256  size_t byteSize = len * sizeof(double);
1257  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1258  int8_t* p = buf;
1259  for (size_t j = 0; j < len; ++j) {
1260  *(double*)p = static_cast<double>(col.data.arr_col[i].data.real_col[j]);
1261  p += sizeof(double);
1262  }
1263  addArray(ArrayDatum(byteSize, buf, false));
1264  }
1265  }
1266  break;
1267  }
1268  case kTIME:
1269  case kTIMESTAMP:
1270  case kDATE: {
1271  for (size_t i = 0; i < dataSize; i++) {
1272  if (col.nulls[i]) {
1274  } else {
1275  size_t len = col.data.arr_col[i].data.int_col.size();
1276  size_t byteWidth = sizeof(int64_t);
1277  size_t byteSize = len * byteWidth;
1278  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1279  int8_t* p = buf;
1280  for (size_t j = 0; j < len; ++j) {
1281  *reinterpret_cast<int64_t*>(p) =
1282  static_cast<int64_t>(col.data.arr_col[i].data.int_col[j]);
1283  p += sizeof(int64_t);
1284  }
1285  addArray(ArrayDatum(byteSize, buf, false));
1286  }
1287  }
1288  break;
1289  }
1290  default:
1291  throw std::runtime_error("Invalid Array Type");
1292  }
1293  }
1294  break;
1295  }
1296  default:
1297  throw std::runtime_error("Invalid Type");
1298  }
1299  return dataSize;
1300 }
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:315
#define NULL_DOUBLE
Definition: sqltypes.h:48
#define NULL_FLOAT
std::vector< std::string > * string_buffer_
Definition: Importer.h:494
ArrayDatum NullArray(const SQLTypeInfo &ti)
Definition: Importer.cpp:389
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:489
std::vector< float > * float_buffer_
Definition: Importer.h:492
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:314
std::vector< double > * double_buffer_
Definition: Importer.h:493
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:202
std::vector< int32_t > * int_buffer_
Definition: Importer.h:490
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:45
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:491
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:487
Definition: sqltypes.h:51
Definition: sqltypes.h:52
std::vector< std::string > & addStringArray()
Definition: Importer.h:243
void addArray(const ArrayDatum &v)
Definition: Importer.h:241
Definition: sqltypes.h:40
#define IS_STRING(T)
Definition: sqltypes.h:244
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:488
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
Definition: sqltypes.h:44
SQLTypeInfo columnType
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:321
std::string columnName
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:495

+ Here is the call graph for this function:

void import_export::TypedImportBuffer::addArray ( const ArrayDatum v)
inline

Definition at line 241 of file Importer.h.

References array_buffer_.

Referenced by add_value(), and add_values().

241 { array_buffer_->push_back(v); }
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:496

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addBigint ( const int64_t  v)
inline

Definition at line 231 of file Importer.h.

References bigint_buffer_.

Referenced by add_value().

231 { bigint_buffer_->push_back(v); }
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:491

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addBoolean ( const int8_t  v)
inline

Definition at line 223 of file Importer.h.

References bool_buffer_.

Referenced by add_value().

223 { bool_buffer_->push_back(v); }
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:487

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addDictEncodedString ( const std::vector< std::string > &  string_vec)

Definition at line 492 of file Importer.cpp.

References CHECK, column_desc_, ColumnDescriptor::columnName, ColumnDescriptor::columnType, logger::ERROR, SQLTypeInfo::get_size(), getColumnDesc(), StringDictionary::getOrAddBulk(), LOG, StringDictionary::MAX_STRLEN, string_dict_, string_dict_i16_buffer_, string_dict_i32_buffer_, and string_dict_i8_buffer_.

492  {
494  std::vector<std::string_view> string_view_vec;
495  string_view_vec.reserve(string_vec.size());
496  for (const auto& str : string_vec) {
497  if (str.size() > StringDictionary::MAX_STRLEN) {
498  std::ostringstream oss;
499  oss << "while processing dictionary for column " << getColumnDesc()->columnName
500  << " a string was detected too long for encoding, string length = "
501  << str.size() << ", first 100 characters are '" << str.substr(0, 100) << "'";
502  throw std::runtime_error(oss.str());
503  }
504  string_view_vec.push_back(str);
505  }
506  try {
507  switch (column_desc_->columnType.get_size()) {
508  case 1:
509  string_dict_i8_buffer_->resize(string_view_vec.size());
510  string_dict_->getOrAddBulk(string_view_vec, string_dict_i8_buffer_->data());
511  break;
512  case 2:
513  string_dict_i16_buffer_->resize(string_view_vec.size());
514  string_dict_->getOrAddBulk(string_view_vec, string_dict_i16_buffer_->data());
515  break;
516  case 4:
517  string_dict_i32_buffer_->resize(string_view_vec.size());
518  string_dict_->getOrAddBulk(string_view_vec, string_dict_i32_buffer_->data());
519  break;
520  default:
521  CHECK(false);
522  }
523  } catch (std::exception& e) {
524  std::ostringstream oss;
525  oss << "while processing dictionary for column " << getColumnDesc()->columnName
526  << " : " << e.what();
527  LOG(ERROR) << oss.str();
528  throw std::runtime_error(oss.str());
529  }
530 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:324
#define LOG(tag)
Definition: Logger.h:194
StringDictionary * string_dict_
Definition: Importer.h:506
std::vector< uint8_t > * string_dict_i8_buffer_
Definition: Importer.h:500
void getOrAddBulk(const std::vector< String > &string_vec, T *encoded_vec)
std::vector< int32_t > * string_dict_i32_buffer_
Definition: Importer.h:502
const ColumnDescriptor * column_desc_
Definition: Importer.h:505
std::vector< uint16_t > * string_dict_i16_buffer_
Definition: Importer.h:501
#define CHECK(condition)
Definition: Logger.h:203
const ColumnDescriptor * getColumnDesc() const
Definition: Importer.h:282
static constexpr size_t MAX_STRLEN
SQLTypeInfo columnType
std::string columnName

+ Here is the call graph for this function:

void import_export::TypedImportBuffer::addDictEncodedStringArray ( const std::vector< std::vector< std::string >> &  string_array_vec)
inline

Definition at line 254 of file Importer.h.

References CHECK, checked_malloc(), StringDictionary::getOrAddBulkArray(), StringDictionary::MAX_STRLEN, string_array_dict_buffer_, and string_dict_.

255  {
257 
258  // first check data is ok
259  for (auto& p : string_array_vec) {
260  for (const auto& str : p) {
261  if (str.size() > StringDictionary::MAX_STRLEN) {
262  throw std::runtime_error("String too long for dictionary encoding.");
263  }
264  }
265  }
266 
267  std::vector<std::vector<int32_t>> ids_array(0);
268  string_dict_->getOrAddBulkArray(string_array_vec, ids_array);
269 
270  for (auto& p : ids_array) {
271  size_t len = p.size() * sizeof(int32_t);
272  auto a = static_cast<int32_t*>(checked_malloc(len));
273  memcpy(a, &p[0], len);
274  // TODO: distinguish between empty and NULL
275  string_array_dict_buffer_->push_back(
276  ArrayDatum(len, reinterpret_cast<int8_t*>(a), len == 0));
277  }
278  }
StringDictionary * string_dict_
Definition: Importer.h:506
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:202
std::vector< ArrayDatum > * string_array_dict_buffer_
Definition: Importer.h:503
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:45
void getOrAddBulkArray(const std::vector< std::vector< String >> &string_array_vec, std::vector< std::vector< int32_t >> &ids_array_vec)
#define CHECK(condition)
Definition: Logger.h:203
static constexpr size_t MAX_STRLEN

+ Here is the call graph for this function:

void import_export::TypedImportBuffer::addDouble ( const double  v)
inline

Definition at line 235 of file Importer.h.

References double_buffer_.

Referenced by add_value().

235 { double_buffer_->push_back(v); }
std::vector< double > * double_buffer_
Definition: Importer.h:493

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addFloat ( const float  v)
inline

Definition at line 233 of file Importer.h.

References float_buffer_.

Referenced by add_value().

233 { float_buffer_->push_back(v); }
std::vector< float > * float_buffer_
Definition: Importer.h:492

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addGeoString ( const std::string_view  v)
inline

Definition at line 239 of file Importer.h.

References geo_string_buffer_.

Referenced by add_value().

239 { geo_string_buffer_->emplace_back(v); }
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:495

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addInt ( const int32_t  v)
inline

Definition at line 229 of file Importer.h.

References int_buffer_.

Referenced by add_value().

229 { int_buffer_->push_back(v); }
std::vector< int32_t > * int_buffer_
Definition: Importer.h:490

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addSmallint ( const int16_t  v)
inline

Definition at line 227 of file Importer.h.

References smallint_buffer_.

Referenced by add_value().

227 { smallint_buffer_->push_back(v); }
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:489

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addString ( const std::string_view  v)
inline

Definition at line 237 of file Importer.h.

References string_buffer_.

Referenced by add_value().

237 { string_buffer_->emplace_back(v); }
std::vector< std::string > * string_buffer_
Definition: Importer.h:494

+ Here is the caller graph for this function:

std::vector<std::string>& import_export::TypedImportBuffer::addStringArray ( )
inline

Definition at line 243 of file Importer.h.

References string_array_buffer_.

Referenced by add_value(), and add_values().

243  {
244  string_array_buffer_->emplace_back();
245  return string_array_buffer_->back();
246  }
std::vector< std::vector< std::string > > * string_array_buffer_
Definition: Importer.h:497

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addStringArray ( const std::vector< std::string > &  arr)
inline

Definition at line 248 of file Importer.h.

References string_array_buffer_.

248  {
249  string_array_buffer_->push_back(arr);
250  }
std::vector< std::vector< std::string > > * string_array_buffer_
Definition: Importer.h:497
void import_export::TypedImportBuffer::addTinyint ( const int8_t  v)
inline

Definition at line 225 of file Importer.h.

References tinyint_buffer_.

Referenced by add_value().

225 { tinyint_buffer_->push_back(v); }
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:488

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::clear ( )
inline

Definition at line 374 of file Importer.h.

References array_buffer_, bigint_buffer_, bool_buffer_, CHECK, column_desc_, ColumnDescriptor::columnType, double_buffer_, float_buffer_, geo_string_buffer_, SQLTypeInfo::get_compression(), SQLTypeInfo::get_size(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), int_buffer_, IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kENCODING_DICT, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, smallint_buffer_, string_array_buffer_, string_array_dict_buffer_, string_buffer_, string_dict_i16_buffer_, string_dict_i32_buffer_, string_dict_i8_buffer_, and tinyint_buffer_.

374  {
375  switch (column_desc_->columnType.get_type()) {
376  case kBOOLEAN: {
377  bool_buffer_->clear();
378  break;
379  }
380  case kTINYINT: {
381  tinyint_buffer_->clear();
382  break;
383  }
384  case kSMALLINT: {
385  smallint_buffer_->clear();
386  break;
387  }
388  case kINT: {
389  int_buffer_->clear();
390  break;
391  }
392  case kBIGINT:
393  case kNUMERIC:
394  case kDECIMAL: {
395  bigint_buffer_->clear();
396  break;
397  }
398  case kFLOAT: {
399  float_buffer_->clear();
400  break;
401  }
402  case kDOUBLE: {
403  double_buffer_->clear();
404  break;
405  }
406  case kTEXT:
407  case kVARCHAR:
408  case kCHAR: {
409  string_buffer_->clear();
411  switch (column_desc_->columnType.get_size()) {
412  case 1:
413  string_dict_i8_buffer_->clear();
414  break;
415  case 2:
416  string_dict_i16_buffer_->clear();
417  break;
418  case 4:
419  string_dict_i32_buffer_->clear();
420  break;
421  default:
422  CHECK(false);
423  }
424  }
425  break;
426  }
427  case kDATE:
428  case kTIME:
429  case kTIMESTAMP:
430  bigint_buffer_->clear();
431  break;
432  case kARRAY: {
434  string_array_buffer_->clear();
435  string_array_dict_buffer_->clear();
436  } else {
437  array_buffer_->clear();
438  }
439  break;
440  }
441  case kPOINT:
442  case kLINESTRING:
443  case kPOLYGON:
444  case kMULTIPOLYGON:
445  geo_string_buffer_->clear();
446  break;
447  default:
448  CHECK(false);
449  }
450  }
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:315
HOST DEVICE int get_size() const
Definition: sqltypes.h:324
Definition: sqltypes.h:48
std::vector< std::string > * string_buffer_
Definition: Importer.h:494
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:496
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:489
std::vector< float > * float_buffer_
Definition: Importer.h:492
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:314
std::vector< double > * double_buffer_
Definition: Importer.h:493
std::vector< int32_t > * int_buffer_
Definition: Importer.h:490
std::vector< ArrayDatum > * string_array_dict_buffer_
Definition: Importer.h:503
std::vector< uint8_t > * string_dict_i8_buffer_
Definition: Importer.h:500
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:491
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:487
std::vector< std::vector< std::string > > * string_array_buffer_
Definition: Importer.h:497
Definition: sqltypes.h:51
Definition: sqltypes.h:52
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:322
std::vector< int32_t > * string_dict_i32_buffer_
Definition: Importer.h:502
Definition: sqltypes.h:40
#define IS_STRING(T)
Definition: sqltypes.h:244
const ColumnDescriptor * column_desc_
Definition: Importer.h:505
std::vector< uint16_t > * string_dict_i16_buffer_
Definition: Importer.h:501
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:488
#define CHECK(condition)
Definition: Logger.h:203
Definition: sqltypes.h:44
SQLTypeInfo columnType
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:495

+ Here is the call graph for this function:

template<typename DATA_TYPE >
size_t import_export::TypedImportBuffer::convert_arrow_val_to_import_buffer ( const ColumnDescriptor cd,
const arrow::Array &  array,
std::vector< DATA_TYPE > &  buffer,
const ArraySliceRange slice_range,
BadRowsTracker *const  bad_rows_tracker 
)

Referenced by add_arrow_values().

+ Here is the caller graph for this function:

template<typename DATA_TYPE >
size_t import_export::TypedImportBuffer::convert_arrow_val_to_import_buffer ( const ColumnDescriptor cd,
const Array array,
std::vector< DATA_TYPE > &  buffer,
const ArraySliceRange slice_range,
import_export::BadRowsTracker *const  bad_rows_tracker 
)

Definition at line 795 of file Importer.cpp.

References col_idx, anonymous_namespace{ArrowImporter.h}::error_context(), geo_string_buffer_, SQLTypeInfo::get_type(), import_export::Importer::getCatalog(), Geospatial::GeoTypesFactory::getGeoColumns(), Geospatial::GeoTypesFactory::getNullGeoColumns(), import_buffers, import_export::BadRowsTracker::importer, import_export::BadRowsTracker::mutex, import_export::BadRowsTracker::rows, import_export::Importer::set_geo_physical_import_buffer(), and anonymous_namespace{ArrowImporter.h}::value_getter().

800  {
801  auto data =
802  std::make_unique<DataBuffer<DATA_TYPE>>(cd, array, buffer, bad_rows_tracker);
803  auto f_value_getter = value_getter(array, cd, bad_rows_tracker);
804  std::function<void(const int64_t)> f_add_geo_phy_cols = [&](const int64_t row) {};
805  if (bad_rows_tracker && cd->columnType.is_geometry()) {
806  f_add_geo_phy_cols = [&](const int64_t row) {
807  // Populate physical columns (ref. DBHandler::load_table)
808  std::vector<double> coords, bounds;
809  std::vector<int> ring_sizes, poly_rings;
810  int render_group = 0;
811  SQLTypeInfo ti;
812  // replace any unexpected exception from getGeoColumns or other
813  // on this path with a GeoImportException so that we wont over
814  // push a null to the logical column...
815  try {
816  SQLTypeInfo import_ti{ti};
817  if (array.IsNull(row)) {
819  import_ti, coords, bounds, ring_sizes, poly_rings, false);
820  } else {
821  arrow_throw_if<GeoImportException>(
823  ti,
824  coords,
825  bounds,
826  ring_sizes,
827  poly_rings,
828  false),
829  error_context(cd, bad_rows_tracker) + "Invalid geometry");
830  arrow_throw_if<GeoImportException>(
831  cd->columnType.get_type() != ti.get_type(),
832  error_context(cd, bad_rows_tracker) + "Geometry type mismatch");
833  }
834  auto col_idx_workpad = col_idx; // what a pitfall!!
836  bad_rows_tracker->importer->getCatalog(),
837  cd,
839  col_idx_workpad,
840  coords,
841  bounds,
842  ring_sizes,
843  poly_rings,
844  render_group);
845  } catch (GeoImportException&) {
846  throw;
847  } catch (std::runtime_error& e) {
848  throw GeoImportException(e.what());
849  } catch (const std::exception& e) {
850  throw GeoImportException(e.what());
851  } catch (...) {
852  throw GeoImportException("unknown exception");
853  }
854  };
855  }
856  auto f_mark_a_bad_row = [&](const auto row) {
857  std::unique_lock<std::mutex> lck(bad_rows_tracker->mutex);
858  bad_rows_tracker->rows.insert(row - slice_range.first);
859  };
860  buffer.reserve(slice_range.second - slice_range.first);
861  for (size_t row = slice_range.first; row < slice_range.second; ++row) {
862  try {
863  *data << (array.IsNull(row) ? nullptr : f_value_getter(array, row));
864  f_add_geo_phy_cols(row);
865  } catch (GeoImportException&) {
866  f_mark_a_bad_row(row);
867  } catch (ArrowImporterException&) {
868  // trace bad rows of each column; otherwise rethrow.
869  if (bad_rows_tracker) {
870  *data << nullptr;
871  f_mark_a_bad_row(row);
872  } else {
873  throw;
874  }
875  }
876  }
877  return buffer.size();
878 }
auto value_getter(const arrow::Array &array, const ColumnDescriptor *cd, import_export::BadRowsTracker *const bad_rows_tracker)
static void getNullGeoColumns(SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: Types.cpp:1144
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:314
static void set_geo_physical_import_buffer(const Catalog_Namespace::Catalog &catalog, const ColumnDescriptor *cd, std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers, size_t &col_idx, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, int render_group)
Definition: Importer.cpp:1459
Catalog_Namespace::Catalog & getCatalog()
Definition: Importer.h:822
std::set< int64_t > rows
Definition: Importer.h:76
std::vector< std::unique_ptr< TypedImportBuffer > > * import_buffers
Definition: Importer.h:482
static bool getGeoColumns(const std::string &wkt_or_wkb_hex, SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: Types.cpp:937
std::string error_context(const ColumnDescriptor *cd, import_export::BadRowsTracker *const bad_rows_tracker)
Definition: ArrowImporter.h:76
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:495

+ Here is the call graph for this function:

template<typename DATA_TYPE >
auto import_export::TypedImportBuffer::del_values ( std::vector< DATA_TYPE > &  buffer,
BadRowsTracker *const  bad_rows_tracker 
)
auto import_export::TypedImportBuffer::del_values ( const SQLTypes  type,
BadRowsTracker *const  bad_rows_tracker 
)
std::vector< DataBlockPtr > import_export::TypedImportBuffer::get_data_block_pointers ( const std::vector< std::unique_ptr< TypedImportBuffer >> &  import_buffers)
static

Definition at line 2829 of file Importer.cpp.

References DataBlockPtr::arraysPtr, CHECK, CHECK_EQ, getStringArrayBuffer(), getTypeInfo(), import_buffers, SQLTypeInfo::is_number(), IS_STRING, SQLTypeInfo::is_string(), kARRAY, kBOOLEAN, kENCODING_DICT, kENCODING_NONE, DataBlockPtr::numbersPtr, run_benchmark_import::result, and DataBlockPtr::stringsPtr.

Referenced by import_export::fill_missing_columns(), import_export::Loader::loadImpl(), and import_export::Loader::loadToShard().

2830  {
2831  std::vector<DataBlockPtr> result(import_buffers.size());
2832  std::vector<std::pair<const size_t, std::future<int8_t*>>>
2833  encoded_data_block_ptrs_futures;
2834  // make all async calls to string dictionary here and then continue execution
2835  for (size_t buf_idx = 0; buf_idx < import_buffers.size(); buf_idx++) {
2836  if (import_buffers[buf_idx]->getTypeInfo().is_string() &&
2837  import_buffers[buf_idx]->getTypeInfo().get_compression() != kENCODING_NONE) {
2838  auto string_payload_ptr = import_buffers[buf_idx]->getStringBuffer();
2839  CHECK_EQ(kENCODING_DICT, import_buffers[buf_idx]->getTypeInfo().get_compression());
2840 
2841  encoded_data_block_ptrs_futures.emplace_back(std::make_pair(
2842  buf_idx,
2843  std::async(std::launch::async, [buf_idx, &import_buffers, string_payload_ptr] {
2844  import_buffers[buf_idx]->addDictEncodedString(*string_payload_ptr);
2845  return import_buffers[buf_idx]->getStringDictBuffer();
2846  })));
2847  }
2848  }
2849 
2850  for (size_t buf_idx = 0; buf_idx < import_buffers.size(); buf_idx++) {
2851  DataBlockPtr p;
2852  if (import_buffers[buf_idx]->getTypeInfo().is_number() ||
2853  import_buffers[buf_idx]->getTypeInfo().is_time() ||
2854  import_buffers[buf_idx]->getTypeInfo().get_type() == kBOOLEAN) {
2855  p.numbersPtr = import_buffers[buf_idx]->getAsBytes();
2856  } else if (import_buffers[buf_idx]->getTypeInfo().is_string()) {
2857  auto string_payload_ptr = import_buffers[buf_idx]->getStringBuffer();
2858  if (import_buffers[buf_idx]->getTypeInfo().get_compression() == kENCODING_NONE) {
2859  p.stringsPtr = string_payload_ptr;
2860  } else {
2861  // This condition means we have column which is ENCODED string. We already made
2862  // Async request to gain the encoded integer values above so we should skip this
2863  // iteration and continue.
2864  continue;
2865  }
2866  } else if (import_buffers[buf_idx]->getTypeInfo().is_geometry()) {
2867  auto geo_payload_ptr = import_buffers[buf_idx]->getGeoStringBuffer();
2868  p.stringsPtr = geo_payload_ptr;
2869  } else {
2870  CHECK(import_buffers[buf_idx]->getTypeInfo().get_type() == kARRAY);
2871  if (IS_STRING(import_buffers[buf_idx]->getTypeInfo().get_subtype())) {
2872  CHECK(import_buffers[buf_idx]->getTypeInfo().get_compression() == kENCODING_DICT);
2873  import_buffers[buf_idx]->addDictEncodedStringArray(
2874  *import_buffers[buf_idx]->getStringArrayBuffer());
2875  p.arraysPtr = import_buffers[buf_idx]->getStringArrayDictBuffer();
2876  } else {
2877  p.arraysPtr = import_buffers[buf_idx]->getArrayBuffer();
2878  }
2879  }
2880  result[buf_idx] = p;
2881  }
2882 
2883  // wait for the async requests we made for string dictionary
2884  for (auto& encoded_ptr_future : encoded_data_block_ptrs_futures) {
2885  result[encoded_ptr_future.first].numbersPtr = encoded_ptr_future.second.get();
2886  }
2887  return result;
2888 }
#define CHECK_EQ(x, y)
Definition: Logger.h:211
const SQLTypeInfo & getTypeInfo() const
Definition: Importer.h:280
std::vector< std::string > * stringsPtr
Definition: sqltypes.h:221
std::vector< ArrayDatum > * arraysPtr
Definition: sqltypes.h:222
bool is_number() const
Definition: sqltypes.h:494
std::vector< std::unique_ptr< TypedImportBuffer > > * import_buffers
Definition: Importer.h:482
std::vector< std::vector< std::string > > * getStringArrayBuffer() const
Definition: Importer.h:346
#define IS_STRING(T)
Definition: sqltypes.h:244
#define CHECK(condition)
Definition: Logger.h:203
bool is_string() const
Definition: sqltypes.h:489
int8_t * numbersPtr
Definition: sqltypes.h:220

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector<ArrayDatum>* import_export::TypedImportBuffer::getArrayBuffer ( ) const
inline

Definition at line 344 of file Importer.h.

References array_buffer_.

344 { return array_buffer_; }
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:496
int8_t* import_export::TypedImportBuffer::getAsBytes ( ) const
inline

Definition at line 286 of file Importer.h.

References bigint_buffer_, bool_buffer_, column_desc_, ColumnDescriptor::columnType, double_buffer_, float_buffer_, SQLTypeInfo::get_type(), int_buffer_, kBIGINT, kBOOLEAN, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kNUMERIC, kSMALLINT, kTIME, kTIMESTAMP, kTINYINT, smallint_buffer_, and tinyint_buffer_.

Referenced by import_export::anonymous_namespace{Importer.cpp}::double_value_at(), import_export::anonymous_namespace{Importer.cpp}::float_value_at(), and import_export::anonymous_namespace{Importer.cpp}::int_value_at().

286  {
287  switch (column_desc_->columnType.get_type()) {
288  case kBOOLEAN:
289  return reinterpret_cast<int8_t*>(bool_buffer_->data());
290  case kTINYINT:
291  return reinterpret_cast<int8_t*>(tinyint_buffer_->data());
292  case kSMALLINT:
293  return reinterpret_cast<int8_t*>(smallint_buffer_->data());
294  case kINT:
295  return reinterpret_cast<int8_t*>(int_buffer_->data());
296  case kBIGINT:
297  case kNUMERIC:
298  case kDECIMAL:
299  return reinterpret_cast<int8_t*>(bigint_buffer_->data());
300  case kFLOAT:
301  return reinterpret_cast<int8_t*>(float_buffer_->data());
302  case kDOUBLE:
303  return reinterpret_cast<int8_t*>(double_buffer_->data());
304  case kDATE:
305  case kTIME:
306  case kTIMESTAMP:
307  return reinterpret_cast<int8_t*>(bigint_buffer_->data());
308  default:
309  abort();
310  }
311  }
Definition: sqltypes.h:48
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:489
std::vector< float > * float_buffer_
Definition: Importer.h:492
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:314
std::vector< double > * double_buffer_
Definition: Importer.h:493
std::vector< int32_t > * int_buffer_
Definition: Importer.h:490
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:491
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:487
Definition: sqltypes.h:52
const ColumnDescriptor * column_desc_
Definition: Importer.h:505
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:488
Definition: sqltypes.h:44
SQLTypeInfo columnType

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const ColumnDescriptor* import_export::TypedImportBuffer::getColumnDesc ( ) const
inline

Definition at line 282 of file Importer.h.

References column_desc_.

Referenced by addDictEncodedString().

282 { return column_desc_; }
const ColumnDescriptor * column_desc_
Definition: Importer.h:505

+ Here is the caller graph for this function:

size_t import_export::TypedImportBuffer::getElementSize ( ) const
inline

Definition at line 313 of file Importer.h.

References column_desc_, ColumnDescriptor::columnType, SQLTypeInfo::get_type(), kBIGINT, kBOOLEAN, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kNUMERIC, kSMALLINT, kTIME, kTIMESTAMP, and kTINYINT.

313  {
314  switch (column_desc_->columnType.get_type()) {
315  case kBOOLEAN:
316  return sizeof((*bool_buffer_)[0]);
317  case kTINYINT:
318  return sizeof((*tinyint_buffer_)[0]);
319  case kSMALLINT:
320  return sizeof((*smallint_buffer_)[0]);
321  case kINT:
322  return sizeof((*int_buffer_)[0]);
323  case kBIGINT:
324  case kNUMERIC:
325  case kDECIMAL:
326  return sizeof((*bigint_buffer_)[0]);
327  case kFLOAT:
328  return sizeof((*float_buffer_)[0]);
329  case kDOUBLE:
330  return sizeof((*double_buffer_)[0]);
331  case kDATE:
332  case kTIME:
333  case kTIMESTAMP:
334  return sizeof((*bigint_buffer_)[0]);
335  default:
336  abort();
337  }
338  }
Definition: sqltypes.h:48
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:314
Definition: sqltypes.h:52
const ColumnDescriptor * column_desc_
Definition: Importer.h:505
Definition: sqltypes.h:44
SQLTypeInfo columnType

+ Here is the call graph for this function:

std::vector<std::string>* import_export::TypedImportBuffer::getGeoStringBuffer ( ) const
inline

Definition at line 342 of file Importer.h.

References geo_string_buffer_.

342 { return geo_string_buffer_; }
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:495
std::vector<std::vector<std::string> >* import_export::TypedImportBuffer::getStringArrayBuffer ( ) const
inline

Definition at line 346 of file Importer.h.

References string_array_buffer_.

Referenced by get_data_block_pointers().

346  {
347  return string_array_buffer_;
348  }
std::vector< std::vector< std::string > > * string_array_buffer_
Definition: Importer.h:497

+ Here is the caller graph for this function:

std::vector<ArrayDatum>* import_export::TypedImportBuffer::getStringArrayDictBuffer ( ) const
inline

Definition at line 350 of file Importer.h.

References string_array_dict_buffer_.

350  {
352  }
std::vector< ArrayDatum > * string_array_dict_buffer_
Definition: Importer.h:503
std::vector<std::string>* import_export::TypedImportBuffer::getStringBuffer ( ) const
inline

Definition at line 340 of file Importer.h.

References string_buffer_.

340 { return string_buffer_; }
std::vector< std::string > * string_buffer_
Definition: Importer.h:494
int8_t* import_export::TypedImportBuffer::getStringDictBuffer ( ) const
inline

Definition at line 354 of file Importer.h.

References column_desc_, ColumnDescriptor::columnType, SQLTypeInfo::get_size(), string_dict_i16_buffer_, string_dict_i32_buffer_, and string_dict_i8_buffer_.

Referenced by import_export::anonymous_namespace{Importer.cpp}::int_value_at().

354  {
355  switch (column_desc_->columnType.get_size()) {
356  case 1:
357  return reinterpret_cast<int8_t*>(string_dict_i8_buffer_->data());
358  case 2:
359  return reinterpret_cast<int8_t*>(string_dict_i16_buffer_->data());
360  case 4:
361  return reinterpret_cast<int8_t*>(string_dict_i32_buffer_->data());
362  default:
363  abort();
364  }
365  }
HOST DEVICE int get_size() const
Definition: sqltypes.h:324
std::vector< uint8_t > * string_dict_i8_buffer_
Definition: Importer.h:500
std::vector< int32_t > * string_dict_i32_buffer_
Definition: Importer.h:502
const ColumnDescriptor * column_desc_
Definition: Importer.h:505
std::vector< uint16_t > * string_dict_i16_buffer_
Definition: Importer.h:501
SQLTypeInfo columnType

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

StringDictionary* import_export::TypedImportBuffer::getStringDictionary ( ) const
inline

Definition at line 284 of file Importer.h.

References string_dict_.

284 { return string_dict_; }
StringDictionary * string_dict_
Definition: Importer.h:506
const SQLTypeInfo& import_export::TypedImportBuffer::getTypeInfo ( ) const
inline

Definition at line 280 of file Importer.h.

References column_desc_, and ColumnDescriptor::columnType.

Referenced by import_export::anonymous_namespace{Importer.cpp}::double_value_at(), import_export::anonymous_namespace{Importer.cpp}::float_value_at(), get_data_block_pointers(), and import_export::anonymous_namespace{Importer.cpp}::int_value_at().

280 { return column_desc_->columnType; }
const ColumnDescriptor * column_desc_
Definition: Importer.h:505
SQLTypeInfo columnType

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::pop_value ( )

Definition at line 735 of file Importer.cpp.

References array_buffer_, bigint_buffer_, bool_buffer_, CHECK, column_desc_, ColumnDescriptor::columnType, decimal_to_int_type(), double_buffer_, float_buffer_, geo_string_buffer_, SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), int_buffer_, SQLTypeInfo::is_decimal(), IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, smallint_buffer_, string_array_buffer_, string_buffer_, tinyint_buffer_, and run_benchmark_import::type.

735  {
736  const auto type = column_desc_->columnType.is_decimal()
738  : column_desc_->columnType.get_type();
739  switch (type) {
740  case kBOOLEAN:
741  bool_buffer_->pop_back();
742  break;
743  case kTINYINT:
744  tinyint_buffer_->pop_back();
745  break;
746  case kSMALLINT:
747  smallint_buffer_->pop_back();
748  break;
749  case kINT:
750  int_buffer_->pop_back();
751  break;
752  case kBIGINT:
753  bigint_buffer_->pop_back();
754  break;
755  case kFLOAT:
756  float_buffer_->pop_back();
757  break;
758  case kDOUBLE:
759  double_buffer_->pop_back();
760  break;
761  case kTEXT:
762  case kVARCHAR:
763  case kCHAR:
764  string_buffer_->pop_back();
765  break;
766  case kDATE:
767  case kTIME:
768  case kTIMESTAMP:
769  bigint_buffer_->pop_back();
770  break;
771  case kARRAY:
773  string_array_buffer_->pop_back();
774  } else {
775  array_buffer_->pop_back();
776  }
777  break;
778  case kPOINT:
779  case kLINESTRING:
780  case kPOLYGON:
781  case kMULTIPOLYGON:
782  geo_string_buffer_->pop_back();
783  break;
784  default:
785  CHECK(false) << "TypedImportBuffer::pop_value() does not support type " << type;
786  }
787 }
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:315
Definition: sqltypes.h:48
std::vector< std::string > * string_buffer_
Definition: Importer.h:494
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:496
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:489
std::vector< float > * float_buffer_
Definition: Importer.h:492
std::vector< double > * double_buffer_
Definition: Importer.h:493
std::vector< int32_t > * int_buffer_
Definition: Importer.h:490
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:491
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:487
std::vector< std::vector< std::string > > * string_array_buffer_
Definition: Importer.h:497
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:419
Definition: sqltypes.h:51
Definition: sqltypes.h:52
Definition: sqltypes.h:40
#define IS_STRING(T)
Definition: sqltypes.h:244
const ColumnDescriptor * column_desc_
Definition: Importer.h:505
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:488
#define CHECK(condition)
Definition: Logger.h:203
Definition: sqltypes.h:44
SQLTypeInfo columnType
bool is_decimal() const
Definition: sqltypes.h:492
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:495

+ Here is the call graph for this function:

bool import_export::TypedImportBuffer::stringDictCheckpoint ( )
inline

Definition at line 367 of file Importer.h.

References StringDictionary::checkpoint(), and string_dict_.

367  {
368  if (string_dict_ == nullptr) {
369  return true;
370  }
371  return string_dict_->checkpoint();
372  }
StringDictionary * string_dict_
Definition: Importer.h:506
bool checkpoint() noexcept

+ Here is the call graph for this function:

Member Data Documentation

union { ... }
union { ... }
std::vector<ArrayDatum>* import_export::TypedImportBuffer::array_buffer_
std::vector<int64_t>* import_export::TypedImportBuffer::bigint_buffer_
std::vector<int8_t>* import_export::TypedImportBuffer::bool_buffer_
size_t import_export::TypedImportBuffer::col_idx

Definition at line 483 of file Importer.h.

Referenced by convert_arrow_val_to_import_buffer().

const ColumnDescriptor* import_export::TypedImportBuffer::column_desc_
private
std::vector<double>* import_export::TypedImportBuffer::double_buffer_
std::vector<float>* import_export::TypedImportBuffer::float_buffer_
std::vector<std::string>* import_export::TypedImportBuffer::geo_string_buffer_
std::vector<std::unique_ptr<TypedImportBuffer> >* import_export::TypedImportBuffer::import_buffers

Definition at line 482 of file Importer.h.

Referenced by convert_arrow_val_to_import_buffer(), and get_data_block_pointers().

std::vector<int32_t>* import_export::TypedImportBuffer::int_buffer_
std::vector<int16_t>* import_export::TypedImportBuffer::smallint_buffer_
std::vector<std::vector<std::string> >* import_export::TypedImportBuffer::string_array_buffer_
std::vector<ArrayDatum>* import_export::TypedImportBuffer::string_array_dict_buffer_
std::vector<std::string>* import_export::TypedImportBuffer::string_buffer_
StringDictionary* import_export::TypedImportBuffer::string_dict_
private
std::vector<uint16_t>* import_export::TypedImportBuffer::string_dict_i16_buffer_
std::vector<int32_t>* import_export::TypedImportBuffer::string_dict_i32_buffer_
std::vector<uint8_t>* import_export::TypedImportBuffer::string_dict_i8_buffer_
std::vector<int8_t>* import_export::TypedImportBuffer::tinyint_buffer_

The documentation for this class was generated from the following files: