OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
import_export::TypedImportBuffer Class Reference

#include <Importer.h>

+ Inheritance diagram for import_export::TypedImportBuffer:
+ Collaboration diagram for import_export::TypedImportBuffer:

Public Types

using OptionalStringVector = std::optional< std::vector< std::string >>
 

Public Member Functions

 TypedImportBuffer (const ColumnDescriptor *col_desc, StringDictionary *string_dict)
 
 ~TypedImportBuffer ()
 
void addBoolean (const int8_t v)
 
void addTinyint (const int8_t v)
 
void addSmallint (const int16_t v)
 
void addInt (const int32_t v)
 
void addBigint (const int64_t v)
 
void addFloat (const float v)
 
void addDouble (const double v)
 
void addString (const std::string_view v)
 
void addDictStringWithTruncation (std::string_view v)
 
void addGeoString (const std::string_view v)
 
void addArray (const ArrayDatum &v)
 
OptionalStringVectoraddStringArray ()
 
void addStringArray (const OptionalStringVector &arr)
 
void addDictEncodedString (const std::vector< std::string > &string_vec)
 
void addDictEncodedStringArray (const std::vector< OptionalStringVector > &string_array_vec)
 
const SQLTypeInfogetTypeInfo () const
 
const ColumnDescriptorgetColumnDesc () const
 
StringDictionarygetStringDictionary () const
 
int8_t * getAsBytes () const
 
size_t getElementSize () const
 
std::vector< std::string > * getStringBuffer () const
 
std::vector< std::string > * getGeoStringBuffer () const
 
std::vector< ArrayDatum > * getArrayBuffer () const
 
std::vector
< OptionalStringVector > * 
getStringArrayBuffer () const
 
std::vector< ArrayDatum > * getStringArrayDictBuffer () const
 
int8_t * getStringDictBuffer () const
 
bool stringDictCheckpoint ()
 
void clear ()
 
size_t add_values (const ColumnDescriptor *cd, const TColumn &data)
 
size_t add_arrow_values (const ColumnDescriptor *cd, const arrow::Array &data, const bool exact_type_match, const ArraySliceRange &slice_range, BadRowsTracker *bad_rows_tracker)
 
void add_value (const ColumnDescriptor *cd, const std::string_view val, const bool is_null, const CopyParams &copy_params, const bool check_not_null=true)
 
void add_value (const ColumnDescriptor *cd, const TDatum &val, const bool is_null)
 
void addDefaultValues (const ColumnDescriptor *cd, size_t num_rows)
 
void pop_value ()
 
template<typename DATA_TYPE >
size_t convert_arrow_val_to_import_buffer (const ColumnDescriptor *cd, const arrow::Array &array, std::vector< DATA_TYPE > &buffer, const ArraySliceRange &slice_range, BadRowsTracker *const bad_rows_tracker)
 
template<typename DATA_TYPE >
auto del_values (std::vector< DATA_TYPE > &buffer, BadRowsTracker *const bad_rows_tracker)
 
auto del_values (const SQLTypes type, BadRowsTracker *const bad_rows_tracker)
 
template<typename DATA_TYPE >
size_t convert_arrow_val_to_import_buffer (const ColumnDescriptor *cd, const Array &array, std::vector< DATA_TYPE > &buffer, const ArraySliceRange &slice_range, import_export::BadRowsTracker *const bad_rows_tracker)
 

Static Public Member Functions

static std::vector< DataBlockPtrget_data_block_pointers (const std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers)
 

Public Attributes

std::vector< std::unique_ptr
< TypedImportBuffer > > * 
import_buffers
 
size_t col_idx
 
std::vector< int8_t > * bool_buffer_
 
std::vector< int8_t > * tinyint_buffer_
 
std::vector< int16_t > * smallint_buffer_
 
std::vector< int32_t > * int_buffer_
 
std::vector< int64_t > * bigint_buffer_
 
std::vector< float > * float_buffer_
 
std::vector< double > * double_buffer_
 
std::vector< std::string > * string_buffer_
 
std::vector< std::string > * geo_string_buffer_
 
std::vector< ArrayDatum > * array_buffer_
 
std::vector
< OptionalStringVector > * 
string_array_buffer_
 
std::vector< uint8_t > * string_dict_i8_buffer_
 
std::vector< uint16_t > * string_dict_i16_buffer_
 
std::vector< int32_t > * string_dict_i32_buffer_
 
std::vector< ArrayDatum > * string_array_dict_buffer_
 

Private Attributes

union {
   std::vector< int8_t > *   bool_buffer_
 
   std::vector< int8_t > *   tinyint_buffer_
 
   std::vector< int16_t > *   smallint_buffer_
 
   std::vector< int32_t > *   int_buffer_
 
   std::vector< int64_t > *   bigint_buffer_
 
   std::vector< float > *   float_buffer_
 
   std::vector< double > *   double_buffer_
 
   std::vector< std::string > *   string_buffer_
 
   std::vector< std::string > *   geo_string_buffer_
 
   std::vector< ArrayDatum > *   array_buffer_
 
   std::vector
< OptionalStringVector > *   string_array_buffer_
 
}; 
 
union {
   std::vector< uint8_t > *   string_dict_i8_buffer_
 
   std::vector< uint16_t > *   string_dict_i16_buffer_
 
   std::vector< int32_t > *   string_dict_i32_buffer_
 
   std::vector< ArrayDatum > *   string_array_dict_buffer_
 
}; 
 
const ColumnDescriptorcolumn_desc_
 
StringDictionarystring_dict_
 

Detailed Description

Definition at line 92 of file Importer.h.

Member Typedef Documentation

using import_export::TypedImportBuffer::OptionalStringVector = std::optional<std::vector<std::string>>

Definition at line 94 of file Importer.h.

Constructor & Destructor Documentation

import_export::TypedImportBuffer::TypedImportBuffer ( const ColumnDescriptor col_desc,
StringDictionary string_dict 
)
inline

Definition at line 95 of file Importer.h.

References array_buffer_, bigint_buffer_, bool_buffer_, CHECK, ColumnDescriptor::columnType, double_buffer_, float_buffer_, geo_string_buffer_, SQLTypeInfo::get_compression(), SQLTypeInfo::get_size(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), int_buffer_, IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kENCODING_DICT, kFLOAT, kINT, kLINESTRING, kMULTILINESTRING, kMULTIPOINT, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, smallint_buffer_, string_array_buffer_, string_array_dict_buffer_, string_buffer_, string_dict_i16_buffer_, string_dict_i32_buffer_, string_dict_i8_buffer_, and tinyint_buffer_.

96  : column_desc_(col_desc), string_dict_(string_dict) {
97  switch (col_desc->columnType.get_type()) {
98  case kBOOLEAN:
99  bool_buffer_ = new std::vector<int8_t>();
100  break;
101  case kTINYINT:
102  tinyint_buffer_ = new std::vector<int8_t>();
103  break;
104  case kSMALLINT:
105  smallint_buffer_ = new std::vector<int16_t>();
106  break;
107  case kINT:
108  int_buffer_ = new std::vector<int32_t>();
109  break;
110  case kBIGINT:
111  case kNUMERIC:
112  case kDECIMAL:
113  bigint_buffer_ = new std::vector<int64_t>();
114  break;
115  case kFLOAT:
116  float_buffer_ = new std::vector<float>();
117  break;
118  case kDOUBLE:
119  double_buffer_ = new std::vector<double>();
120  break;
121  case kTEXT:
122  case kVARCHAR:
123  case kCHAR:
124  string_buffer_ = new std::vector<std::string>();
125  if (col_desc->columnType.get_compression() == kENCODING_DICT) {
126  switch (col_desc->columnType.get_size()) {
127  case 1:
128  string_dict_i8_buffer_ = new std::vector<uint8_t>();
129  break;
130  case 2:
131  string_dict_i16_buffer_ = new std::vector<uint16_t>();
132  break;
133  case 4:
134  string_dict_i32_buffer_ = new std::vector<int32_t>();
135  break;
136  default:
137  CHECK(false);
138  }
139  }
140  break;
141  case kDATE:
142  case kTIME:
143  case kTIMESTAMP:
144  bigint_buffer_ = new std::vector<int64_t>();
145  break;
146  case kARRAY:
147  if (IS_STRING(col_desc->columnType.get_subtype())) {
149  string_array_buffer_ = new std::vector<OptionalStringVector>();
150  string_array_dict_buffer_ = new std::vector<ArrayDatum>();
151  } else {
152  array_buffer_ = new std::vector<ArrayDatum>();
153  }
154  break;
155  case kPOINT:
156  case kMULTIPOINT:
157  case kLINESTRING:
158  case kMULTILINESTRING:
159  case kPOLYGON:
160  case kMULTIPOLYGON:
161  geo_string_buffer_ = new std::vector<std::string>();
162  break;
163  default:
164  CHECK(false);
165  }
166  }
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:392
HOST DEVICE int get_size() const
Definition: sqltypes.h:403
Definition: sqltypes.h:76
std::vector< std::string > * string_buffer_
Definition: Importer.h:542
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:544
StringDictionary * string_dict_
Definition: Importer.h:554
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:537
std::vector< float > * float_buffer_
Definition: Importer.h:540
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:391
std::vector< double > * double_buffer_
Definition: Importer.h:541
std::vector< int32_t > * int_buffer_
Definition: Importer.h:538
std::vector< ArrayDatum > * string_array_dict_buffer_
Definition: Importer.h:551
std::vector< uint8_t > * string_dict_i8_buffer_
Definition: Importer.h:548
std::vector< OptionalStringVector > * string_array_buffer_
Definition: Importer.h:545
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:539
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:535
Definition: sqltypes.h:79
Definition: sqltypes.h:80
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:399
std::vector< int32_t > * string_dict_i32_buffer_
Definition: Importer.h:550
Definition: sqltypes.h:68
#define IS_STRING(T)
Definition: sqltypes.h:309
const ColumnDescriptor * column_desc_
Definition: Importer.h:553
std::vector< uint16_t > * string_dict_i16_buffer_
Definition: Importer.h:549
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:536
#define CHECK(condition)
Definition: Logger.h:291
Definition: sqltypes.h:72
SQLTypeInfo columnType
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:543

+ Here is the call graph for this function:

import_export::TypedImportBuffer::~TypedImportBuffer ( )
inline

Definition at line 168 of file Importer.h.

References array_buffer_, bigint_buffer_, bool_buffer_, CHECK, column_desc_, ColumnDescriptor::columnType, double_buffer_, float_buffer_, geo_string_buffer_, SQLTypeInfo::get_compression(), SQLTypeInfo::get_size(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), int_buffer_, IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kENCODING_DICT, kFLOAT, kINT, kLINESTRING, kMULTILINESTRING, kMULTIPOINT, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, smallint_buffer_, string_array_buffer_, string_array_dict_buffer_, string_buffer_, string_dict_i16_buffer_, string_dict_i32_buffer_, string_dict_i8_buffer_, and tinyint_buffer_.

168  {
169  switch (column_desc_->columnType.get_type()) {
170  case kBOOLEAN:
171  delete bool_buffer_;
172  break;
173  case kTINYINT:
174  delete tinyint_buffer_;
175  break;
176  case kSMALLINT:
177  delete smallint_buffer_;
178  break;
179  case kINT:
180  delete int_buffer_;
181  break;
182  case kBIGINT:
183  case kNUMERIC:
184  case kDECIMAL:
185  delete bigint_buffer_;
186  break;
187  case kFLOAT:
188  delete float_buffer_;
189  break;
190  case kDOUBLE:
191  delete double_buffer_;
192  break;
193  case kTEXT:
194  case kVARCHAR:
195  case kCHAR:
196  delete string_buffer_;
198  switch (column_desc_->columnType.get_size()) {
199  case 1:
200  delete string_dict_i8_buffer_;
201  break;
202  case 2:
204  break;
205  case 4:
207  break;
208  }
209  }
210  break;
211  case kDATE:
212  case kTIME:
213  case kTIMESTAMP:
214  delete bigint_buffer_;
215  break;
216  case kARRAY:
218  delete string_array_buffer_;
220  } else {
221  delete array_buffer_;
222  }
223  break;
224  case kPOINT:
225  case kMULTIPOINT:
226  case kLINESTRING:
227  case kMULTILINESTRING:
228  case kPOLYGON:
229  case kMULTIPOLYGON:
230  delete geo_string_buffer_;
231  break;
232  default:
233  CHECK(false);
234  }
235  }
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:392
HOST DEVICE int get_size() const
Definition: sqltypes.h:403
Definition: sqltypes.h:76
std::vector< std::string > * string_buffer_
Definition: Importer.h:542
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:544
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:537
std::vector< float > * float_buffer_
Definition: Importer.h:540
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:391
std::vector< double > * double_buffer_
Definition: Importer.h:541
std::vector< int32_t > * int_buffer_
Definition: Importer.h:538
std::vector< ArrayDatum > * string_array_dict_buffer_
Definition: Importer.h:551
std::vector< uint8_t > * string_dict_i8_buffer_
Definition: Importer.h:548
std::vector< OptionalStringVector > * string_array_buffer_
Definition: Importer.h:545
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:539
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:535
Definition: sqltypes.h:79
Definition: sqltypes.h:80
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:399
std::vector< int32_t > * string_dict_i32_buffer_
Definition: Importer.h:550
Definition: sqltypes.h:68
#define IS_STRING(T)
Definition: sqltypes.h:309
const ColumnDescriptor * column_desc_
Definition: Importer.h:553
std::vector< uint16_t > * string_dict_i16_buffer_
Definition: Importer.h:549
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:536
#define CHECK(condition)
Definition: Logger.h:291
Definition: sqltypes.h:72
SQLTypeInfo columnType
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:543

+ Here is the call graph for this function:

Member Function Documentation

size_t import_export::TypedImportBuffer::add_arrow_values ( const ColumnDescriptor cd,
const arrow::Array &  data,
const bool  exact_type_match,
const ArraySliceRange slice_range,
BadRowsTracker bad_rows_tracker 
)

Definition at line 874 of file Importer.cpp.

References arrow_throw_if(), bigint_buffer_, bool_buffer_, ColumnDescriptor::columnName, ColumnDescriptor::columnType, convert_arrow_val_to_import_buffer(), double_buffer_, float_buffer_, geo_string_buffer_, SQLTypeInfo::get_notnull(), SQLTypeInfo::get_type(), int_buffer_, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTILINESTRING, kMULTIPOINT, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, smallint_buffer_, heavydb.dtypes::STRING, string_buffer_, tinyint_buffer_, and run_benchmark_import::type.

878  {
879  const auto type = cd->columnType.get_type();
880  if (cd->columnType.get_notnull()) {
881  // We can't have any null values for this column; to have them is an error
882  arrow_throw_if(col.null_count() > 0, "NULL not allowed for column " + cd->columnName);
883  }
884 
885  switch (type) {
886  case kBOOLEAN:
887  if (exact_type_match) {
888  arrow_throw_if(col.type_id() != Type::BOOL, "Expected boolean type");
889  }
891  cd, col, *bool_buffer_, slice_range, bad_rows_tracker);
892  case kTINYINT:
893  if (exact_type_match) {
894  arrow_throw_if(col.type_id() != Type::INT8, "Expected int8 type");
895  }
897  cd, col, *tinyint_buffer_, slice_range, bad_rows_tracker);
898  case kSMALLINT:
899  if (exact_type_match) {
900  arrow_throw_if(col.type_id() != Type::INT16, "Expected int16 type");
901  }
903  cd, col, *smallint_buffer_, slice_range, bad_rows_tracker);
904  case kINT:
905  if (exact_type_match) {
906  arrow_throw_if(col.type_id() != Type::INT32, "Expected int32 type");
907  }
909  cd, col, *int_buffer_, slice_range, bad_rows_tracker);
910  case kBIGINT:
911  case kNUMERIC:
912  case kDECIMAL:
913  if (exact_type_match) {
914  arrow_throw_if(col.type_id() != Type::INT64, "Expected int64 type");
915  }
917  cd, col, *bigint_buffer_, slice_range, bad_rows_tracker);
918  case kFLOAT:
919  if (exact_type_match) {
920  arrow_throw_if(col.type_id() != Type::FLOAT, "Expected float type");
921  }
923  cd, col, *float_buffer_, slice_range, bad_rows_tracker);
924  case kDOUBLE:
925  if (exact_type_match) {
926  arrow_throw_if(col.type_id() != Type::DOUBLE, "Expected double type");
927  }
929  cd, col, *double_buffer_, slice_range, bad_rows_tracker);
930  case kTEXT:
931  case kVARCHAR:
932  case kCHAR:
933  if (exact_type_match) {
934  arrow_throw_if(col.type_id() != Type::BINARY && col.type_id() != Type::STRING,
935  "Expected string type");
936  }
938  cd, col, *string_buffer_, slice_range, bad_rows_tracker);
939  case kTIME:
940  if (exact_type_match) {
941  arrow_throw_if(col.type_id() != Type::TIME32 && col.type_id() != Type::TIME64,
942  "Expected time32 or time64 type");
943  }
945  cd, col, *bigint_buffer_, slice_range, bad_rows_tracker);
946  case kTIMESTAMP:
947  if (exact_type_match) {
948  arrow_throw_if(col.type_id() != Type::TIMESTAMP, "Expected timestamp type");
949  }
951  cd, col, *bigint_buffer_, slice_range, bad_rows_tracker);
952  case kDATE:
953  if (exact_type_match) {
954  arrow_throw_if(col.type_id() != Type::DATE32 && col.type_id() != Type::DATE64,
955  "Expected date32 or date64 type");
956  }
958  cd, col, *bigint_buffer_, slice_range, bad_rows_tracker);
959  case kPOINT:
960  case kMULTIPOINT:
961  case kLINESTRING:
962  case kMULTILINESTRING:
963  case kPOLYGON:
964  case kMULTIPOLYGON:
965  arrow_throw_if(col.type_id() != Type::BINARY && col.type_id() != Type::STRING,
966  "Expected string type");
968  cd, col, *geo_string_buffer_, slice_range, bad_rows_tracker);
969  case kARRAY:
970  throw std::runtime_error("Arrow array appends not yet supported");
971  default:
972  throw std::runtime_error("Invalid Type");
973  }
974 }
Definition: sqltypes.h:76
std::vector< std::string > * string_buffer_
Definition: Importer.h:542
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:537
std::vector< float > * float_buffer_
Definition: Importer.h:540
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:391
tuple STRING
Definition: dtypes.py:31
std::vector< double > * double_buffer_
Definition: Importer.h:541
std::vector< int32_t > * int_buffer_
Definition: Importer.h:538
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:539
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:535
Definition: sqltypes.h:79
Definition: sqltypes.h:80
Definition: sqltypes.h:68
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:536
size_t convert_arrow_val_to_import_buffer(const ColumnDescriptor *cd, const arrow::Array &array, std::vector< DATA_TYPE > &buffer, const ArraySliceRange &slice_range, BadRowsTracker *const bad_rows_tracker)
Definition: sqltypes.h:72
SQLTypeInfo columnType
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:398
std::string columnName
void arrow_throw_if(const bool cond, const std::string &message)
Definition: ArrowImporter.h:42
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:543

+ Here is the call graph for this function:

void import_export::TypedImportBuffer::add_value ( const ColumnDescriptor cd,
const std::string_view  val,
const bool  is_null,
const CopyParams copy_params,
const bool  check_not_null = true 
)

Definition at line 529 of file Importer.cpp.

References addArray(), addBigint(), addBoolean(), addDouble(), addFloat(), addGeoString(), addInt(), addSmallint(), addString(), addStringArray(), addTinyint(), Datum::bigintval, Datum::boolval, CHECK, ColumnDescriptor::columnName, ColumnDescriptor::columnType, SQLTypeInfo::get_elem_type(), SQLTypeInfo::get_max_strlen(), SQLTypeInfo::get_notnull(), SQLTypeInfo::get_size(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), inline_fixed_encoding_null_val(), Datum::intval, IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTILINESTRING, kMULTIPOINT, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, NULL_DOUBLE, NULL_FLOAT, import_export::NullArray(), import_export::delimited_parser::parse_string_array(), Datum::smallintval, import_export::StringToArray(), StringToDatum(), Datum::tinyintval, to_string(), run_benchmark_import::type, and DecimalOverflowValidator::validate().

Referenced by foreign_storage::anonymous_namespace{InternalStorageStatsDataWrapper.cpp}::set_null(), foreign_storage::anonymous_namespace{InternalCatalogDataWrapper.cpp}::set_null(), and foreign_storage::anonymous_namespace{InternalMemoryStatsDataWrapper.cpp}::set_null().

533  {
534  const auto type = cd->columnType.get_type();
535  switch (type) {
536  case kBOOLEAN: {
537  if (is_null) {
538  if (check_not_null && cd->columnType.get_notnull()) {
539  throw std::runtime_error("NULL for column " + cd->columnName);
540  }
542  } else {
543  auto ti = cd->columnType;
544  Datum d = StringToDatum(val, ti);
545  addBoolean(static_cast<int8_t>(d.boolval));
546  }
547  break;
548  }
549  case kTINYINT: {
550  if (!is_null && (isdigit(val[0]) || val[0] == '-')) {
551  auto ti = cd->columnType;
552  Datum d = StringToDatum(val, ti);
554  } else {
555  if (check_not_null && cd->columnType.get_notnull()) {
556  throw std::runtime_error("NULL for column " + cd->columnName);
557  }
559  }
560  break;
561  }
562  case kSMALLINT: {
563  if (!is_null && (isdigit(val[0]) || val[0] == '-')) {
564  auto ti = cd->columnType;
565  Datum d = StringToDatum(val, ti);
567  } else {
568  if (check_not_null && cd->columnType.get_notnull()) {
569  throw std::runtime_error("NULL for column " + cd->columnName);
570  }
572  }
573  break;
574  }
575  case kINT: {
576  if (!is_null && (isdigit(val[0]) || val[0] == '-')) {
577  auto ti = cd->columnType;
578  Datum d = StringToDatum(val, ti);
579  addInt(d.intval);
580  } else {
581  if (check_not_null && cd->columnType.get_notnull()) {
582  throw std::runtime_error("NULL for column " + cd->columnName);
583  }
585  }
586  break;
587  }
588  case kBIGINT: {
589  if (!is_null && (isdigit(val[0]) || val[0] == '-')) {
590  auto ti = cd->columnType;
591  Datum d = StringToDatum(val, ti);
592  addBigint(d.bigintval);
593  } else {
594  if (check_not_null && cd->columnType.get_notnull()) {
595  throw std::runtime_error("NULL for column " + cd->columnName);
596  }
598  }
599  break;
600  }
601  case kDECIMAL:
602  case kNUMERIC: {
603  if (!is_null) {
604  auto ti = cd->columnType;
605  Datum d = StringToDatum(val, ti);
606  DecimalOverflowValidator validator(ti);
607  validator.validate(d.bigintval);
608  addBigint(d.bigintval);
609  } else {
610  if (check_not_null && cd->columnType.get_notnull()) {
611  throw std::runtime_error("NULL for column " + cd->columnName);
612  }
614  }
615  break;
616  }
617  case kFLOAT:
618  if (!is_null && (val[0] == '.' || isdigit(val[0]) || val[0] == '-')) {
619  addFloat(static_cast<float>(std::atof(std::string(val).c_str())));
620  } else {
621  if (check_not_null && cd->columnType.get_notnull()) {
622  throw std::runtime_error("NULL for column " + cd->columnName);
623  }
625  }
626  break;
627  case kDOUBLE:
628  if (!is_null && (val[0] == '.' || isdigit(val[0]) || val[0] == '-')) {
629  addDouble(std::atof(std::string(val).c_str()));
630  } else {
631  if (check_not_null && cd->columnType.get_notnull()) {
632  throw std::runtime_error("NULL for column " + cd->columnName);
633  }
635  }
636  break;
637  case kTEXT:
638  case kVARCHAR:
639  case kCHAR: {
640  // @TODO(wei) for now, use empty string for nulls
641  if (is_null) {
642  if (check_not_null && cd->columnType.get_notnull()) {
643  throw std::runtime_error("NULL for column " + cd->columnName);
644  }
645  addString(std::string());
646  } else {
647  if (val.length() > cd->columnType.get_max_strlen()) {
648  throw std::runtime_error("String too long for column " + cd->columnName +
649  " was " + std::to_string(val.length()) + " max is " +
651  }
652  addString(val);
653  }
654  break;
655  }
656  case kTIME:
657  case kTIMESTAMP:
658  case kDATE:
659  if (!is_null && (isdigit(val[0]) || val[0] == '-')) {
660  SQLTypeInfo ti = cd->columnType;
661  Datum d = StringToDatum(val, ti);
662  addBigint(d.bigintval);
663  } else {
664  if (check_not_null && cd->columnType.get_notnull()) {
665  throw std::runtime_error("NULL for column " + cd->columnName);
666  }
668  }
669  break;
670  case kARRAY: {
671  if (check_not_null && is_null && cd->columnType.get_notnull()) {
672  throw std::runtime_error("NULL for column " + cd->columnName);
673  }
674  SQLTypeInfo ti = cd->columnType;
675  if (IS_STRING(ti.get_subtype())) {
676  std::vector<std::string> string_vec;
677  // Just parse string array, don't push it to buffer yet as we might throw
679  std::string(val), copy_params, string_vec);
680  if (!is_null) {
681  if (ti.get_size() > 0) {
682  auto sti = ti.get_elem_type();
683  size_t expected_size = ti.get_size() / sti.get_size();
684  size_t actual_size = string_vec.size();
685  if (actual_size != expected_size) {
686  throw std::runtime_error("Fixed length array column " + cd->columnName +
687  " expects " + std::to_string(expected_size) +
688  " values, received " +
689  std::to_string(actual_size));
690  }
691  }
692  addStringArray(string_vec);
693  } else {
694  addStringArray(std::nullopt);
695  }
696  } else {
697  if (!is_null) {
698  ArrayDatum d = StringToArray(std::string(val), ti, copy_params);
699  if (d.is_null) { // val could be "NULL"
700  addArray(NullArray(ti));
701  } else {
702  if (ti.get_size() > 0 && static_cast<size_t>(ti.get_size()) != d.length) {
703  throw std::runtime_error("Fixed length array for column " + cd->columnName +
704  " has incorrect length: " + std::string(val));
705  }
706  addArray(d);
707  }
708  } else {
709  addArray(NullArray(ti));
710  }
711  }
712  break;
713  }
714  case kPOINT:
715  case kMULTIPOINT:
716  case kLINESTRING:
717  case kMULTILINESTRING:
718  case kPOLYGON:
719  case kMULTIPOLYGON:
720  addGeoString(val);
721  break;
722  default:
723  CHECK(false) << "TypedImportBuffer::add_value() does not support type " << type;
724  }
725 }
int8_t tinyintval
Definition: Datum.h:71
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:392
#define NULL_DOUBLE
HOST DEVICE int get_size() const
Definition: sqltypes.h:403
void addBigint(const int64_t v)
Definition: Importer.h:245
OptionalStringVector & addStringArray()
Definition: Importer.h:264
void addSmallint(const int16_t v)
Definition: Importer.h:241
Definition: sqltypes.h:76
#define NULL_FLOAT
void addString(const std::string_view v)
Definition: Importer.h:251
void addDouble(const double v)
Definition: Importer.h:249
ArrayDatum NullArray(const SQLTypeInfo &ti)
Definition: Importer.cpp:371
int8_t boolval
Definition: Datum.h:70
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:391
int32_t intval
Definition: Datum.h:73
std::string to_string(char const *&&v)
void addFloat(const float v)
Definition: Importer.h:247
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:229
void addGeoString(const std::string_view v)
Definition: Importer.h:260
CONSTEXPR DEVICE bool is_null(const T &value)
void addBoolean(const int8_t v)
Definition: Importer.h:237
void addTinyint(const int8_t v)
Definition: Importer.h:239
int64_t bigintval
Definition: Datum.h:74
void addInt(const int32_t v)
Definition: Importer.h:243
int16_t smallintval
Definition: Datum.h:72
void parse_string_array(const std::string &s, const import_export::CopyParams &copy_params, std::vector< std::string > &string_vec, bool truncate_values)
Parses given string array and inserts into given vector of strings.
Datum StringToDatum(const std::string_view s, SQLTypeInfo &ti)
Definition: Datum.cpp:339
Definition: sqltypes.h:79
Definition: sqltypes.h:80
ArrayDatum StringToArray(const std::string &s, const SQLTypeInfo &ti, const CopyParams &copy_params)
Definition: Importer.cpp:316
void addArray(const ArrayDatum &v)
Definition: Importer.h:262
Definition: sqltypes.h:68
#define IS_STRING(T)
Definition: sqltypes.h:309
#define CHECK(condition)
Definition: Logger.h:291
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
Definition: sqltypes.h:72
SQLTypeInfo columnType
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:398
HOST DEVICE size_t get_max_strlen() const
Definition: sqltypes.h:405
Definition: Datum.h:69
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:975
std::string columnName

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::add_value ( const ColumnDescriptor cd,
const TDatum &  val,
const bool  is_null 
)

Definition at line 1308 of file Importer.cpp.

References addArray(), addBigint(), import_export::addBinaryStringArray(), addBoolean(), addDouble(), addFloat(), addGeoString(), addInt(), addSmallint(), addString(), addStringArray(), addTinyint(), CHECK, ColumnDescriptor::columnName, ColumnDescriptor::columnType, decimal_to_int_type(), SQLTypeInfo::get_notnull(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), inline_fixed_encoding_null_val(), SQLTypeInfo::is_decimal(), IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTILINESTRING, kMULTIPOINT, kMULTIPOLYGON, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, NULL_DOUBLE, NULL_FLOAT, import_export::NullArray(), import_export::TDatumToArrayDatum(), and run_benchmark_import::type.

1310  {
1311  const auto type = cd->columnType.is_decimal() ? decimal_to_int_type(cd->columnType)
1312  : cd->columnType.get_type();
1313  switch (type) {
1314  case kBOOLEAN: {
1315  if (is_null) {
1316  if (cd->columnType.get_notnull()) {
1317  throw std::runtime_error("NULL for column " + cd->columnName);
1318  }
1320  } else {
1321  addBoolean((int8_t)datum.val.int_val);
1322  }
1323  break;
1324  }
1325  case kTINYINT:
1326  if (!is_null) {
1327  addTinyint((int8_t)datum.val.int_val);
1328  } else {
1329  if (cd->columnType.get_notnull()) {
1330  throw std::runtime_error("NULL for column " + cd->columnName);
1331  }
1333  }
1334  break;
1335  case kSMALLINT:
1336  if (!is_null) {
1337  addSmallint((int16_t)datum.val.int_val);
1338  } else {
1339  if (cd->columnType.get_notnull()) {
1340  throw std::runtime_error("NULL for column " + cd->columnName);
1341  }
1343  }
1344  break;
1345  case kINT:
1346  if (!is_null) {
1347  addInt((int32_t)datum.val.int_val);
1348  } else {
1349  if (cd->columnType.get_notnull()) {
1350  throw std::runtime_error("NULL for column " + cd->columnName);
1351  }
1353  }
1354  break;
1355  case kBIGINT:
1356  if (!is_null) {
1357  addBigint(datum.val.int_val);
1358  } else {
1359  if (cd->columnType.get_notnull()) {
1360  throw std::runtime_error("NULL for column " + cd->columnName);
1361  }
1363  }
1364  break;
1365  case kFLOAT:
1366  if (!is_null) {
1367  addFloat((float)datum.val.real_val);
1368  } else {
1369  if (cd->columnType.get_notnull()) {
1370  throw std::runtime_error("NULL for column " + cd->columnName);
1371  }
1373  }
1374  break;
1375  case kDOUBLE:
1376  if (!is_null) {
1377  addDouble(datum.val.real_val);
1378  } else {
1379  if (cd->columnType.get_notnull()) {
1380  throw std::runtime_error("NULL for column " + cd->columnName);
1381  }
1383  }
1384  break;
1385  case kTEXT:
1386  case kVARCHAR:
1387  case kCHAR: {
1388  // @TODO(wei) for now, use empty string for nulls
1389  if (is_null) {
1390  if (cd->columnType.get_notnull()) {
1391  throw std::runtime_error("NULL for column " + cd->columnName);
1392  }
1393  addString(std::string());
1394  } else {
1395  addString(datum.val.str_val);
1396  }
1397  break;
1398  }
1399  case kTIME:
1400  case kTIMESTAMP:
1401  case kDATE: {
1402  if (!is_null) {
1403  addBigint(datum.val.int_val);
1404  } else {
1405  if (cd->columnType.get_notnull()) {
1406  throw std::runtime_error("NULL for column " + cd->columnName);
1407  }
1409  }
1410  break;
1411  }
1412  case kARRAY:
1413  if (is_null && cd->columnType.get_notnull()) {
1414  throw std::runtime_error("NULL for column " + cd->columnName);
1415  }
1416  if (IS_STRING(cd->columnType.get_subtype())) {
1417  OptionalStringVector& string_vec = addStringArray();
1418  addBinaryStringArray(datum, *string_vec);
1419  } else {
1420  if (!is_null) {
1421  addArray(TDatumToArrayDatum(datum, cd->columnType));
1422  } else {
1424  }
1425  }
1426  break;
1427  case kPOINT:
1428  case kMULTIPOINT:
1429  case kLINESTRING:
1430  case kMULTILINESTRING:
1431  case kPOLYGON:
1432  case kMULTIPOLYGON:
1433  if (is_null) {
1434  if (cd->columnType.get_notnull()) {
1435  throw std::runtime_error("NULL for column " + cd->columnName);
1436  }
1437  addGeoString(std::string());
1438  } else {
1439  addGeoString(datum.val.str_val);
1440  }
1441  break;
1442  default:
1443  CHECK(false) << "TypedImportBuffer::add_value() does not support type " << type;
1444  }
1445 }
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:392
#define NULL_DOUBLE
void addBigint(const int64_t v)
Definition: Importer.h:245
OptionalStringVector & addStringArray()
Definition: Importer.h:264
void addSmallint(const int16_t v)
Definition: Importer.h:241
Definition: sqltypes.h:76
#define NULL_FLOAT
void addString(const std::string_view v)
Definition: Importer.h:251
std::optional< std::vector< std::string >> OptionalStringVector
Definition: Importer.h:94
void addDouble(const double v)
Definition: Importer.h:249
ArrayDatum NullArray(const SQLTypeInfo &ti)
Definition: Importer.cpp:371
void addFloat(const float v)
Definition: Importer.h:247
void addGeoString(const std::string_view v)
Definition: Importer.h:260
ArrayDatum TDatumToArrayDatum(const TDatum &datum, const SQLTypeInfo &ti)
Definition: Importer.cpp:469
CONSTEXPR DEVICE bool is_null(const T &value)
void addBoolean(const int8_t v)
Definition: Importer.h:237
void addTinyint(const int8_t v)
Definition: Importer.h:239
void addInt(const int32_t v)
Definition: Importer.h:243
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:561
Definition: sqltypes.h:79
Definition: sqltypes.h:80
void addArray(const ArrayDatum &v)
Definition: Importer.h:262
Definition: sqltypes.h:68
#define IS_STRING(T)
Definition: sqltypes.h:309
#define CHECK(condition)
Definition: Logger.h:291
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
Definition: sqltypes.h:72
SQLTypeInfo columnType
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:398
bool is_decimal() const
Definition: sqltypes.h:568
std::string columnName
void addBinaryStringArray(const TDatum &datum, std::vector< std::string > &string_vec)
Definition: Importer.cpp:415

+ Here is the call graph for this function:

size_t import_export::TypedImportBuffer::add_values ( const ColumnDescriptor cd,
const TColumn &  data 
)

Definition at line 977 of file Importer.cpp.

References addArray(), addStringArray(), anonymous_namespace{QueryMemoryDescriptor.cpp}::any_of(), bigint_buffer_, bool_buffer_, checked_malloc(), ColumnDescriptor::columnName, ColumnDescriptor::columnType, double_buffer_, float_buffer_, geo_string_buffer_, SQLTypeInfo::get_elem_type(), SQLTypeInfo::get_notnull(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), inline_fixed_encoding_null_val(), int_buffer_, IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTILINESTRING, kMULTIPOINT, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, NULL_DOUBLE, NULL_FLOAT, import_export::NullArray(), smallint_buffer_, string_buffer_, and tinyint_buffer_.

977  {
978  size_t dataSize = 0;
979  if (cd->columnType.get_notnull()) {
980  // We can't have any null values for this column; to have them is an error
981  if (std::any_of(col.nulls.begin(), col.nulls.end(), [](int i) { return i != 0; })) {
982  throw std::runtime_error("NULL for column " + cd->columnName);
983  }
984  }
985 
986  switch (cd->columnType.get_type()) {
987  case kBOOLEAN: {
988  dataSize = col.data.int_col.size();
989  bool_buffer_->reserve(dataSize);
990  for (size_t i = 0; i < dataSize; i++) {
991  if (col.nulls[i]) {
993  } else {
994  bool_buffer_->push_back((int8_t)col.data.int_col[i]);
995  }
996  }
997  break;
998  }
999  case kTINYINT: {
1000  dataSize = col.data.int_col.size();
1001  tinyint_buffer_->reserve(dataSize);
1002  for (size_t i = 0; i < dataSize; i++) {
1003  if (col.nulls[i]) {
1005  } else {
1006  tinyint_buffer_->push_back((int8_t)col.data.int_col[i]);
1007  }
1008  }
1009  break;
1010  }
1011  case kSMALLINT: {
1012  dataSize = col.data.int_col.size();
1013  smallint_buffer_->reserve(dataSize);
1014  for (size_t i = 0; i < dataSize; i++) {
1015  if (col.nulls[i]) {
1017  } else {
1018  smallint_buffer_->push_back((int16_t)col.data.int_col[i]);
1019  }
1020  }
1021  break;
1022  }
1023  case kINT: {
1024  dataSize = col.data.int_col.size();
1025  int_buffer_->reserve(dataSize);
1026  for (size_t i = 0; i < dataSize; i++) {
1027  if (col.nulls[i]) {
1029  } else {
1030  int_buffer_->push_back((int32_t)col.data.int_col[i]);
1031  }
1032  }
1033  break;
1034  }
1035  case kBIGINT:
1036  case kNUMERIC:
1037  case kDECIMAL: {
1038  dataSize = col.data.int_col.size();
1039  bigint_buffer_->reserve(dataSize);
1040  for (size_t i = 0; i < dataSize; i++) {
1041  if (col.nulls[i]) {
1043  } else {
1044  bigint_buffer_->push_back((int64_t)col.data.int_col[i]);
1045  }
1046  }
1047  break;
1048  }
1049  case kFLOAT: {
1050  dataSize = col.data.real_col.size();
1051  float_buffer_->reserve(dataSize);
1052  for (size_t i = 0; i < dataSize; i++) {
1053  if (col.nulls[i]) {
1054  float_buffer_->push_back(NULL_FLOAT);
1055  } else {
1056  float_buffer_->push_back((float)col.data.real_col[i]);
1057  }
1058  }
1059  break;
1060  }
1061  case kDOUBLE: {
1062  dataSize = col.data.real_col.size();
1063  double_buffer_->reserve(dataSize);
1064  for (size_t i = 0; i < dataSize; i++) {
1065  if (col.nulls[i]) {
1066  double_buffer_->push_back(NULL_DOUBLE);
1067  } else {
1068  double_buffer_->push_back((double)col.data.real_col[i]);
1069  }
1070  }
1071  break;
1072  }
1073  case kTEXT:
1074  case kVARCHAR:
1075  case kCHAR: {
1076  // TODO: for now, use empty string for nulls
1077  dataSize = col.data.str_col.size();
1078  string_buffer_->reserve(dataSize);
1079  for (size_t i = 0; i < dataSize; i++) {
1080  if (col.nulls[i]) {
1081  string_buffer_->push_back(std::string());
1082  } else {
1083  string_buffer_->push_back(col.data.str_col[i]);
1084  }
1085  }
1086  break;
1087  }
1088  case kTIME:
1089  case kTIMESTAMP:
1090  case kDATE: {
1091  dataSize = col.data.int_col.size();
1092  bigint_buffer_->reserve(dataSize);
1093  for (size_t i = 0; i < dataSize; i++) {
1094  if (col.nulls[i]) {
1096  } else {
1097  bigint_buffer_->push_back(static_cast<int64_t>(col.data.int_col[i]));
1098  }
1099  }
1100  break;
1101  }
1102  case kPOINT:
1103  case kMULTIPOINT:
1104  case kLINESTRING:
1105  case kMULTILINESTRING:
1106  case kPOLYGON:
1107  case kMULTIPOLYGON: {
1108  dataSize = col.data.str_col.size();
1109  geo_string_buffer_->reserve(dataSize);
1110  for (size_t i = 0; i < dataSize; i++) {
1111  if (col.nulls[i]) {
1112  // TODO: add support for NULL geo
1113  geo_string_buffer_->push_back(std::string());
1114  } else {
1115  geo_string_buffer_->push_back(col.data.str_col[i]);
1116  }
1117  }
1118  break;
1119  }
1120  case kARRAY: {
1121  dataSize = col.data.arr_col.size();
1122  if (IS_STRING(cd->columnType.get_subtype())) {
1123  for (size_t i = 0; i < dataSize; i++) {
1124  OptionalStringVector& string_vec = addStringArray();
1125  if (!col.nulls[i]) {
1126  size_t stringArrSize = col.data.arr_col[i].data.str_col.size();
1127  for (size_t str_idx = 0; str_idx != stringArrSize; ++str_idx) {
1128  string_vec->push_back(col.data.arr_col[i].data.str_col[str_idx]);
1129  }
1130  }
1131  }
1132  } else {
1133  auto elem_ti = cd->columnType.get_subtype();
1134  switch (elem_ti) {
1135  case kBOOLEAN: {
1136  for (size_t i = 0; i < dataSize; i++) {
1137  if (col.nulls[i]) {
1139  } else {
1140  size_t len = col.data.arr_col[i].data.int_col.size();
1141  size_t byteSize = len * sizeof(int8_t);
1142  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1143  int8_t* p = buf;
1144  for (size_t j = 0; j < len; ++j) {
1145  // Explicitly checking the item for null because
1146  // casting null value (-128) to bool results
1147  // incorrect value 1.
1148  if (col.data.arr_col[i].nulls[j]) {
1149  *p = static_cast<int8_t>(
1151  } else {
1152  *(bool*)p = static_cast<bool>(col.data.arr_col[i].data.int_col[j]);
1153  }
1154  p += sizeof(bool);
1155  }
1156  addArray(ArrayDatum(byteSize, buf, false));
1157  }
1158  }
1159  break;
1160  }
1161  case kTINYINT: {
1162  for (size_t i = 0; i < dataSize; i++) {
1163  if (col.nulls[i]) {
1165  } else {
1166  size_t len = col.data.arr_col[i].data.int_col.size();
1167  size_t byteSize = len * sizeof(int8_t);
1168  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1169  int8_t* p = buf;
1170  for (size_t j = 0; j < len; ++j) {
1171  *(int8_t*)p = static_cast<int8_t>(col.data.arr_col[i].data.int_col[j]);
1172  p += sizeof(int8_t);
1173  }
1174  addArray(ArrayDatum(byteSize, buf, false));
1175  }
1176  }
1177  break;
1178  }
1179  case kSMALLINT: {
1180  for (size_t i = 0; i < dataSize; i++) {
1181  if (col.nulls[i]) {
1183  } else {
1184  size_t len = col.data.arr_col[i].data.int_col.size();
1185  size_t byteSize = len * sizeof(int16_t);
1186  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1187  int8_t* p = buf;
1188  for (size_t j = 0; j < len; ++j) {
1189  *(int16_t*)p =
1190  static_cast<int16_t>(col.data.arr_col[i].data.int_col[j]);
1191  p += sizeof(int16_t);
1192  }
1193  addArray(ArrayDatum(byteSize, buf, false));
1194  }
1195  }
1196  break;
1197  }
1198  case kINT: {
1199  for (size_t i = 0; i < dataSize; i++) {
1200  if (col.nulls[i]) {
1202  } else {
1203  size_t len = col.data.arr_col[i].data.int_col.size();
1204  size_t byteSize = len * sizeof(int32_t);
1205  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1206  int8_t* p = buf;
1207  for (size_t j = 0; j < len; ++j) {
1208  *(int32_t*)p =
1209  static_cast<int32_t>(col.data.arr_col[i].data.int_col[j]);
1210  p += sizeof(int32_t);
1211  }
1212  addArray(ArrayDatum(byteSize, buf, false));
1213  }
1214  }
1215  break;
1216  }
1217  case kBIGINT:
1218  case kNUMERIC:
1219  case kDECIMAL: {
1220  for (size_t i = 0; i < dataSize; i++) {
1221  if (col.nulls[i]) {
1223  } else {
1224  size_t len = col.data.arr_col[i].data.int_col.size();
1225  size_t byteSize = len * sizeof(int64_t);
1226  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1227  int8_t* p = buf;
1228  for (size_t j = 0; j < len; ++j) {
1229  *(int64_t*)p =
1230  static_cast<int64_t>(col.data.arr_col[i].data.int_col[j]);
1231  p += sizeof(int64_t);
1232  }
1233  addArray(ArrayDatum(byteSize, buf, false));
1234  }
1235  }
1236  break;
1237  }
1238  case kFLOAT: {
1239  for (size_t i = 0; i < dataSize; i++) {
1240  if (col.nulls[i]) {
1242  } else {
1243  size_t len = col.data.arr_col[i].data.real_col.size();
1244  size_t byteSize = len * sizeof(float);
1245  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1246  int8_t* p = buf;
1247  for (size_t j = 0; j < len; ++j) {
1248  *(float*)p = static_cast<float>(col.data.arr_col[i].data.real_col[j]);
1249  p += sizeof(float);
1250  }
1251  addArray(ArrayDatum(byteSize, buf, false));
1252  }
1253  }
1254  break;
1255  }
1256  case kDOUBLE: {
1257  for (size_t i = 0; i < dataSize; i++) {
1258  if (col.nulls[i]) {
1260  } else {
1261  size_t len = col.data.arr_col[i].data.real_col.size();
1262  size_t byteSize = len * sizeof(double);
1263  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1264  int8_t* p = buf;
1265  for (size_t j = 0; j < len; ++j) {
1266  *(double*)p = static_cast<double>(col.data.arr_col[i].data.real_col[j]);
1267  p += sizeof(double);
1268  }
1269  addArray(ArrayDatum(byteSize, buf, false));
1270  }
1271  }
1272  break;
1273  }
1274  case kTIME:
1275  case kTIMESTAMP:
1276  case kDATE: {
1277  for (size_t i = 0; i < dataSize; i++) {
1278  if (col.nulls[i]) {
1280  } else {
1281  size_t len = col.data.arr_col[i].data.int_col.size();
1282  size_t byteWidth = sizeof(int64_t);
1283  size_t byteSize = len * byteWidth;
1284  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1285  int8_t* p = buf;
1286  for (size_t j = 0; j < len; ++j) {
1287  *reinterpret_cast<int64_t*>(p) =
1288  static_cast<int64_t>(col.data.arr_col[i].data.int_col[j]);
1289  p += sizeof(int64_t);
1290  }
1291  addArray(ArrayDatum(byteSize, buf, false));
1292  }
1293  }
1294  break;
1295  }
1296  default:
1297  throw std::runtime_error("Invalid Array Type");
1298  }
1299  }
1300  break;
1301  }
1302  default:
1303  throw std::runtime_error("Invalid Type");
1304  }
1305  return dataSize;
1306 }
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:392
#define NULL_DOUBLE
OptionalStringVector & addStringArray()
Definition: Importer.h:264
Definition: sqltypes.h:76
#define NULL_FLOAT
std::vector< std::string > * string_buffer_
Definition: Importer.h:542
std::optional< std::vector< std::string >> OptionalStringVector
Definition: Importer.h:94
ArrayDatum NullArray(const SQLTypeInfo &ti)
Definition: Importer.cpp:371
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:537
std::vector< float > * float_buffer_
Definition: Importer.h:540
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:391
std::vector< double > * double_buffer_
Definition: Importer.h:541
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:229
std::vector< int32_t > * int_buffer_
Definition: Importer.h:538
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:45
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:539
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:535
Definition: sqltypes.h:79
Definition: sqltypes.h:80
void addArray(const ArrayDatum &v)
Definition: Importer.h:262
Definition: sqltypes.h:68
#define IS_STRING(T)
Definition: sqltypes.h:309
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:536
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
Definition: sqltypes.h:72
SQLTypeInfo columnType
bool any_of(std::vector< Analyzer::Expr * > const &target_exprs)
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:398
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:975
std::string columnName
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:543

+ Here is the call graph for this function:

void import_export::TypedImportBuffer::addArray ( const ArrayDatum v)
inline

Definition at line 262 of file Importer.h.

References array_buffer_.

Referenced by add_value(), and add_values().

262 { array_buffer_->push_back(v); }
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:544

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addBigint ( const int64_t  v)
inline

Definition at line 245 of file Importer.h.

References bigint_buffer_.

Referenced by add_value().

245 { bigint_buffer_->push_back(v); }
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:539

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addBoolean ( const int8_t  v)
inline

Definition at line 237 of file Importer.h.

References bool_buffer_.

Referenced by add_value().

237 { bool_buffer_->push_back(v); }
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:535

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addDefaultValues ( const ColumnDescriptor cd,
size_t  num_rows 
)

Definition at line 1447 of file Importer.cpp.

References array_buffer_, bigint_buffer_, bool_buffer_, CHECK, ColumnDescriptor::columnName, ColumnDescriptor::columnType, convert_decimal_value_to_scale(), ColumnDescriptor::default_value, double_buffer_, float_buffer_, geo_string_buffer_, SQLTypeInfo::get_notnull(), SQLTypeInfo::get_type(), inline_fixed_encoding_null_val(), int_buffer_, is_null(), IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTILINESTRING, kMULTIPOINT, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, NULL_DOUBLE, NULL_FLOAT, import_export::NullArray(), import_export::delimited_parser::parse_string_array(), smallint_buffer_, string_array_buffer_, string_buffer_, import_export::StringToArray(), StringToDatum(), tinyint_buffer_, to_string(), and run_benchmark_import::type.

1447  {
1448  bool is_null = !cd->default_value.has_value();
1449  CHECK(!(is_null && cd->columnType.get_notnull()));
1450  const auto type = cd->columnType.get_type();
1451  auto ti = cd->columnType;
1452  auto val = cd->default_value.value_or("NULL");
1453  CopyParams cp;
1454  switch (type) {
1455  case kBOOLEAN: {
1456  if (!is_null) {
1457  bool_buffer_->resize(num_rows, StringToDatum(val, ti).boolval);
1458  } else {
1459  bool_buffer_->resize(num_rows, inline_fixed_encoding_null_val(cd->columnType));
1460  }
1461  break;
1462  }
1463  case kTINYINT: {
1464  if (!is_null) {
1465  tinyint_buffer_->resize(num_rows, StringToDatum(val, ti).tinyintval);
1466  } else {
1468  }
1469  break;
1470  }
1471  case kSMALLINT: {
1472  if (!is_null) {
1473  smallint_buffer_->resize(num_rows, StringToDatum(val, ti).smallintval);
1474  } else {
1475  smallint_buffer_->resize(num_rows,
1477  }
1478  break;
1479  }
1480  case kINT: {
1481  if (!is_null) {
1482  int_buffer_->resize(num_rows, StringToDatum(val, ti).intval);
1483  } else {
1484  int_buffer_->resize(num_rows, inline_fixed_encoding_null_val(cd->columnType));
1485  }
1486  break;
1487  }
1488  case kBIGINT: {
1489  if (!is_null) {
1490  bigint_buffer_->resize(num_rows, StringToDatum(val, ti).bigintval);
1491  } else {
1493  }
1494  break;
1495  }
1496  case kDECIMAL:
1497  case kNUMERIC: {
1498  if (!is_null) {
1499  const auto converted_decimal_value = convert_decimal_value_to_scale(
1500  StringToDatum(val, ti).bigintval, ti, cd->columnType);
1501  bigint_buffer_->resize(num_rows, converted_decimal_value);
1502  } else {
1504  }
1505  break;
1506  }
1507  case kFLOAT:
1508  if (!is_null) {
1509  float_buffer_->resize(num_rows,
1510  static_cast<float>(std::atof(std::string(val).c_str())));
1511  } else {
1512  float_buffer_->resize(num_rows, NULL_FLOAT);
1513  }
1514  break;
1515  case kDOUBLE:
1516  if (!is_null) {
1517  double_buffer_->resize(num_rows, std::atof(std::string(val).c_str()));
1518  } else {
1519  double_buffer_->resize(num_rows, NULL_DOUBLE);
1520  }
1521  break;
1522  case kTEXT:
1523  case kVARCHAR:
1524  case kCHAR: {
1525  if (is_null) {
1526  string_buffer_->resize(num_rows, "");
1527  } else {
1528  if (val.length() > ti.get_max_strlen()) {
1529  throw std::runtime_error("String too long for column " + cd->columnName +
1530  " was " + std::to_string(val.length()) + " max is " +
1531  std::to_string(ti.get_max_strlen()));
1532  }
1533  string_buffer_->resize(num_rows, val);
1534  }
1535  break;
1536  }
1537  case kTIME:
1538  case kTIMESTAMP:
1539  case kDATE:
1540  if (!is_null) {
1541  bigint_buffer_->resize(num_rows, StringToDatum(val, ti).bigintval);
1542  } else {
1544  }
1545  break;
1546  case kARRAY: {
1547  if (IS_STRING(ti.get_subtype())) {
1548  std::vector<std::string> string_vec;
1549  // Just parse string array, don't push it to buffer yet as we might throw
1551  std::string(val), cp, string_vec);
1552  if (!is_null) {
1553  // TODO: add support for NULL string arrays
1554  if (ti.get_size() > 0) {
1555  auto sti = ti.get_elem_type();
1556  size_t expected_size = ti.get_size() / sti.get_size();
1557  size_t actual_size = string_vec.size();
1558  if (actual_size != expected_size) {
1559  throw std::runtime_error("Fixed length array column " + cd->columnName +
1560  " expects " + std::to_string(expected_size) +
1561  " values, received " +
1562  std::to_string(actual_size));
1563  }
1564  }
1565  string_array_buffer_->resize(num_rows, string_vec);
1566  } else {
1567  if (ti.get_size() > 0) {
1568  // TODO: remove once NULL fixlen arrays are allowed
1569  throw std::runtime_error("Fixed length array column " + cd->columnName +
1570  " currently cannot accept NULL arrays");
1571  }
1572  // TODO: add support for NULL string arrays, replace with addStringArray(),
1573  // for now add whatever parseStringArray() outputs for NULLs ("NULL")
1574  string_array_buffer_->resize(num_rows, string_vec);
1575  }
1576  } else {
1577  if (!is_null) {
1578  ArrayDatum d = StringToArray(std::string(val), ti, cp);
1579  if (d.is_null) { // val could be "NULL"
1580  array_buffer_->resize(num_rows, NullArray(ti));
1581  } else {
1582  if (ti.get_size() > 0 && static_cast<size_t>(ti.get_size()) != d.length) {
1583  throw std::runtime_error("Fixed length array for column " + cd->columnName +
1584  " has incorrect length: " + std::string(val));
1585  }
1586  array_buffer_->resize(num_rows, d);
1587  }
1588  } else {
1589  array_buffer_->resize(num_rows, NullArray(ti));
1590  }
1591  }
1592  break;
1593  }
1594  case kPOINT:
1595  case kMULTIPOINT:
1596  case kLINESTRING:
1597  case kMULTILINESTRING:
1598  case kPOLYGON:
1599  case kMULTIPOLYGON:
1600  geo_string_buffer_->resize(num_rows, val);
1601  break;
1602  default:
1603  CHECK(false) << "TypedImportBuffer::addDefaultValues() does not support type "
1604  << type;
1605  }
1606 }
#define NULL_DOUBLE
Definition: sqltypes.h:76
#define NULL_FLOAT
std::vector< std::string > * string_buffer_
Definition: Importer.h:542
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:544
ArrayDatum NullArray(const SQLTypeInfo &ti)
Definition: Importer.cpp:371
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:537
std::vector< float > * float_buffer_
Definition: Importer.h:540
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:391
std::vector< double > * double_buffer_
Definition: Importer.h:541
std::string to_string(char const *&&v)
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:229
std::vector< int32_t > * int_buffer_
Definition: Importer.h:538
CONSTEXPR DEVICE bool is_null(const T &value)
std::vector< OptionalStringVector > * string_array_buffer_
Definition: Importer.h:545
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:539
void parse_string_array(const std::string &s, const import_export::CopyParams &copy_params, std::vector< std::string > &string_vec, bool truncate_values)
Parses given string array and inserts into given vector of strings.
Datum StringToDatum(const std::string_view s, SQLTypeInfo &ti)
Definition: Datum.cpp:339
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:535
std::optional< std::string > default_value
Definition: sqltypes.h:79
Definition: sqltypes.h:80
ArrayDatum StringToArray(const std::string &s, const SQLTypeInfo &ti, const CopyParams &copy_params)
Definition: Importer.cpp:316
int64_t convert_decimal_value_to_scale(const int64_t decimal_value, const SQLTypeInfo &type_info, const SQLTypeInfo &new_type_info)
Definition: Datum.cpp:624
Definition: sqltypes.h:68
#define IS_STRING(T)
Definition: sqltypes.h:309
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:536
#define CHECK(condition)
Definition: Logger.h:291
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
Definition: sqltypes.h:72
SQLTypeInfo columnType
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:398
std::string columnName
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:543

+ Here is the call graph for this function:

void import_export::TypedImportBuffer::addDictEncodedString ( const std::vector< std::string > &  string_vec)

Definition at line 489 of file Importer.cpp.

References CHECK, column_desc_, ColumnDescriptor::columnName, ColumnDescriptor::columnType, logger::ERROR, SQLTypeInfo::get_size(), getColumnDesc(), StringDictionary::getOrAddBulk(), LOG, StringDictionary::MAX_STRLEN, string_dict_, string_dict_i16_buffer_, string_dict_i32_buffer_, and string_dict_i8_buffer_.

489  {
491  std::vector<std::string_view> string_view_vec;
492  string_view_vec.reserve(string_vec.size());
493  for (const auto& str : string_vec) {
494  if (str.size() > StringDictionary::MAX_STRLEN) {
495  std::ostringstream oss;
496  oss << "while processing dictionary for column " << getColumnDesc()->columnName
497  << " a string was detected too long for encoding, string length = "
498  << str.size() << ", first 100 characters are '" << str.substr(0, 100) << "'";
499  throw std::runtime_error(oss.str());
500  }
501  string_view_vec.push_back(str);
502  }
503  try {
504  switch (column_desc_->columnType.get_size()) {
505  case 1:
506  string_dict_i8_buffer_->resize(string_view_vec.size());
507  string_dict_->getOrAddBulk(string_view_vec, string_dict_i8_buffer_->data());
508  break;
509  case 2:
510  string_dict_i16_buffer_->resize(string_view_vec.size());
511  string_dict_->getOrAddBulk(string_view_vec, string_dict_i16_buffer_->data());
512  break;
513  case 4:
514  string_dict_i32_buffer_->resize(string_view_vec.size());
515  string_dict_->getOrAddBulk(string_view_vec, string_dict_i32_buffer_->data());
516  break;
517  default:
518  CHECK(false);
519  }
520  } catch (std::exception& e) {
521  std::ostringstream oss;
522  oss << "while processing dictionary for column " << getColumnDesc()->columnName
523  << " : " << e.what();
524  LOG(ERROR) << oss.str();
525  throw std::runtime_error(oss.str());
526  }
527 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:403
#define LOG(tag)
Definition: Logger.h:285
StringDictionary * string_dict_
Definition: Importer.h:554
std::vector< uint8_t > * string_dict_i8_buffer_
Definition: Importer.h:548
void getOrAddBulk(const std::vector< String > &string_vec, T *encoded_vec)
std::vector< int32_t > * string_dict_i32_buffer_
Definition: Importer.h:550
const ColumnDescriptor * column_desc_
Definition: Importer.h:553
std::vector< uint16_t > * string_dict_i16_buffer_
Definition: Importer.h:549
#define CHECK(condition)
Definition: Logger.h:291
const ColumnDescriptor * getColumnDesc() const
Definition: Importer.h:325
static constexpr size_t MAX_STRLEN
SQLTypeInfo columnType
std::string columnName

+ Here is the call graph for this function:

void import_export::TypedImportBuffer::addDictEncodedStringArray ( const std::vector< OptionalStringVector > &  string_array_vec)
inline

Definition at line 275 of file Importer.h.

References anonymous_namespace{Utm.h}::a, CHECK, checked_malloc(), column_desc_, ColumnDescriptor::columnType, import_export::ImporterUtils::composeNullArray(), StringDictionary::getOrAddBulkArray(), StringDictionary::MAX_STRLEN, string_array_dict_buffer_, and string_dict_.

276  {
278 
279  // first check data is ok
280  for (auto& p : string_array_vec) {
281  if (!p) {
282  continue;
283  }
284  for (const auto& str : *p) {
285  if (str.size() > StringDictionary::MAX_STRLEN) {
286  throw std::runtime_error("String too long for dictionary encoding.");
287  }
288  }
289  }
290 
291  // to avoid copying, create a string view of each string in the
292  // `string_array_vec` where the array holding the string is *not null*
293  std::vector<std::vector<std::string_view>> string_view_array_vec;
294  for (auto& p : string_array_vec) {
295  if (!p) {
296  continue;
297  }
298  auto& array = string_view_array_vec.emplace_back();
299  for (const auto& str : *p) {
300  array.emplace_back(str);
301  }
302  }
303 
304  std::vector<std::vector<int32_t>> ids_array(0);
305  string_dict_->getOrAddBulkArray(string_view_array_vec, ids_array);
306 
307  size_t i, j;
308  for (i = 0, j = 0; i < string_array_vec.size(); ++i) {
309  if (!string_array_vec[i]) { // null array
310  string_array_dict_buffer_->push_back(
312  } else { // non-null array
313  auto& p = ids_array[j++];
314  size_t len = p.size() * sizeof(int32_t);
315  auto a = static_cast<int32_t*>(checked_malloc(len));
316  memcpy(a, &p[0], len);
317  string_array_dict_buffer_->push_back(
318  ArrayDatum(len, reinterpret_cast<int8_t*>(a), false));
319  }
320  }
321  }
StringDictionary * string_dict_
Definition: Importer.h:554
static ArrayDatum composeNullArray(const SQLTypeInfo &ti)
Definition: Importer.cpp:395
constexpr double a
Definition: Utm.h:32
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:229
std::vector< ArrayDatum > * string_array_dict_buffer_
Definition: Importer.h:551
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:45
void getOrAddBulkArray(const std::vector< std::vector< String >> &string_array_vec, std::vector< std::vector< int32_t >> &ids_array_vec)
const ColumnDescriptor * column_desc_
Definition: Importer.h:553
#define CHECK(condition)
Definition: Logger.h:291
static constexpr size_t MAX_STRLEN
SQLTypeInfo columnType

+ Here is the call graph for this function:

void import_export::TypedImportBuffer::addDictStringWithTruncation ( std::string_view  v)
inline

Definition at line 253 of file Importer.h.

References StringDictionary::MAX_STRLEN, and string_buffer_.

253  {
254  if (v.size() > StringDictionary::MAX_STRLEN) {
255  v = v.substr(0, StringDictionary::MAX_STRLEN);
256  }
257  string_buffer_->emplace_back(v);
258  }
std::vector< std::string > * string_buffer_
Definition: Importer.h:542
static constexpr size_t MAX_STRLEN
void import_export::TypedImportBuffer::addDouble ( const double  v)
inline

Definition at line 249 of file Importer.h.

References double_buffer_.

Referenced by add_value().

249 { double_buffer_->push_back(v); }
std::vector< double > * double_buffer_
Definition: Importer.h:541

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addFloat ( const float  v)
inline

Definition at line 247 of file Importer.h.

References float_buffer_.

Referenced by add_value().

247 { float_buffer_->push_back(v); }
std::vector< float > * float_buffer_
Definition: Importer.h:540

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addGeoString ( const std::string_view  v)
inline

Definition at line 260 of file Importer.h.

References geo_string_buffer_.

Referenced by add_value().

260 { geo_string_buffer_->emplace_back(v); }
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:543

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addInt ( const int32_t  v)
inline

Definition at line 243 of file Importer.h.

References int_buffer_.

Referenced by add_value().

243 { int_buffer_->push_back(v); }
std::vector< int32_t > * int_buffer_
Definition: Importer.h:538

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addSmallint ( const int16_t  v)
inline

Definition at line 241 of file Importer.h.

References smallint_buffer_.

Referenced by add_value().

241 { smallint_buffer_->push_back(v); }
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:537

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addString ( const std::string_view  v)
inline

Definition at line 251 of file Importer.h.

References string_buffer_.

Referenced by add_value().

251 { string_buffer_->emplace_back(v); }
std::vector< std::string > * string_buffer_
Definition: Importer.h:542

+ Here is the caller graph for this function:

OptionalStringVector& import_export::TypedImportBuffer::addStringArray ( )
inline

Definition at line 264 of file Importer.h.

References string_array_buffer_.

Referenced by add_value(), and add_values().

264  {
265  string_array_buffer_->emplace_back(std::vector<std::string>{});
266  return string_array_buffer_->back();
267  }
std::vector< OptionalStringVector > * string_array_buffer_
Definition: Importer.h:545

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addStringArray ( const OptionalStringVector arr)
inline

Definition at line 269 of file Importer.h.

References string_array_buffer_.

269  {
270  string_array_buffer_->push_back(arr);
271  }
std::vector< OptionalStringVector > * string_array_buffer_
Definition: Importer.h:545
void import_export::TypedImportBuffer::addTinyint ( const int8_t  v)
inline

Definition at line 239 of file Importer.h.

References tinyint_buffer_.

Referenced by add_value().

239 { tinyint_buffer_->push_back(v); }
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:536

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::clear ( )
inline

Definition at line 417 of file Importer.h.

References array_buffer_, bigint_buffer_, bool_buffer_, CHECK, column_desc_, ColumnDescriptor::columnType, double_buffer_, float_buffer_, geo_string_buffer_, SQLTypeInfo::get_compression(), SQLTypeInfo::get_size(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), int_buffer_, IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kENCODING_DICT, kFLOAT, kINT, kLINESTRING, kMULTILINESTRING, kMULTIPOINT, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, smallint_buffer_, string_array_buffer_, string_array_dict_buffer_, string_buffer_, string_dict_i16_buffer_, string_dict_i32_buffer_, string_dict_i8_buffer_, and tinyint_buffer_.

417  {
418  switch (column_desc_->columnType.get_type()) {
419  case kBOOLEAN: {
420  bool_buffer_->clear();
421  break;
422  }
423  case kTINYINT: {
424  tinyint_buffer_->clear();
425  break;
426  }
427  case kSMALLINT: {
428  smallint_buffer_->clear();
429  break;
430  }
431  case kINT: {
432  int_buffer_->clear();
433  break;
434  }
435  case kBIGINT:
436  case kNUMERIC:
437  case kDECIMAL: {
438  bigint_buffer_->clear();
439  break;
440  }
441  case kFLOAT: {
442  float_buffer_->clear();
443  break;
444  }
445  case kDOUBLE: {
446  double_buffer_->clear();
447  break;
448  }
449  case kTEXT:
450  case kVARCHAR:
451  case kCHAR: {
452  string_buffer_->clear();
454  switch (column_desc_->columnType.get_size()) {
455  case 1:
456  string_dict_i8_buffer_->clear();
457  break;
458  case 2:
459  string_dict_i16_buffer_->clear();
460  break;
461  case 4:
462  string_dict_i32_buffer_->clear();
463  break;
464  default:
465  CHECK(false);
466  }
467  }
468  break;
469  }
470  case kDATE:
471  case kTIME:
472  case kTIMESTAMP:
473  bigint_buffer_->clear();
474  break;
475  case kARRAY: {
477  string_array_buffer_->clear();
478  string_array_dict_buffer_->clear();
479  } else {
480  array_buffer_->clear();
481  }
482  break;
483  }
484  case kPOINT:
485  case kMULTIPOINT:
486  case kLINESTRING:
487  case kMULTILINESTRING:
488  case kPOLYGON:
489  case kMULTIPOLYGON:
490  geo_string_buffer_->clear();
491  break;
492  default:
493  CHECK(false);
494  }
495  }
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:392
HOST DEVICE int get_size() const
Definition: sqltypes.h:403
Definition: sqltypes.h:76
std::vector< std::string > * string_buffer_
Definition: Importer.h:542
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:544
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:537
std::vector< float > * float_buffer_
Definition: Importer.h:540
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:391
std::vector< double > * double_buffer_
Definition: Importer.h:541
std::vector< int32_t > * int_buffer_
Definition: Importer.h:538
std::vector< ArrayDatum > * string_array_dict_buffer_
Definition: Importer.h:551
std::vector< uint8_t > * string_dict_i8_buffer_
Definition: Importer.h:548
std::vector< OptionalStringVector > * string_array_buffer_
Definition: Importer.h:545
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:539
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:535
Definition: sqltypes.h:79
Definition: sqltypes.h:80
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:399
std::vector< int32_t > * string_dict_i32_buffer_
Definition: Importer.h:550
Definition: sqltypes.h:68
#define IS_STRING(T)
Definition: sqltypes.h:309
const ColumnDescriptor * column_desc_
Definition: Importer.h:553
std::vector< uint16_t > * string_dict_i16_buffer_
Definition: Importer.h:549
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:536
#define CHECK(condition)
Definition: Logger.h:291
Definition: sqltypes.h:72
SQLTypeInfo columnType
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:543

+ Here is the call graph for this function:

template<typename DATA_TYPE >
size_t import_export::TypedImportBuffer::convert_arrow_val_to_import_buffer ( const ColumnDescriptor cd,
const arrow::Array &  array,
std::vector< DATA_TYPE > &  buffer,
const ArraySliceRange slice_range,
BadRowsTracker *const  bad_rows_tracker 
)

Referenced by add_arrow_values().

+ Here is the caller graph for this function:

template<typename DATA_TYPE >
size_t import_export::TypedImportBuffer::convert_arrow_val_to_import_buffer ( const ColumnDescriptor cd,
const Array array,
std::vector< DATA_TYPE > &  buffer,
const ArraySliceRange slice_range,
import_export::BadRowsTracker *const  bad_rows_tracker 
)

Definition at line 789 of file Importer.cpp.

References col_idx, anonymous_namespace{ArrowImporter.h}::error_context(), geo_string_buffer_, SQLTypeInfo::get_type(), import_export::Importer::getCatalog(), Geospatial::GeoTypesFactory::getGeoColumns(), Geospatial::GeoTypesFactory::getNullGeoColumns(), import_buffers, import_export::BadRowsTracker::importer, import_export::BadRowsTracker::mutex, import_export::BadRowsTracker::rows, import_export::Importer::set_geo_physical_import_buffer(), and anonymous_namespace{ArrowImporter.h}::value_getter().

794  {
795  auto data =
796  std::make_unique<DataBuffer<DATA_TYPE>>(cd, array, buffer, bad_rows_tracker);
797  auto f_value_getter = value_getter(array, cd, bad_rows_tracker);
798  std::function<void(const int64_t)> f_add_geo_phy_cols = [&](const int64_t row) {};
799  if (bad_rows_tracker && cd->columnType.is_geometry()) {
800  f_add_geo_phy_cols = [&](const int64_t row) {
801  // Populate physical columns (ref. DBHandler::load_table)
802  std::vector<double> coords, bounds;
803  std::vector<int> ring_sizes, poly_rings;
804  SQLTypeInfo ti;
805  // replace any unexpected exception from getGeoColumns or other
806  // on this path with a GeoImportException so that we wont over
807  // push a null to the logical column...
808  try {
809  SQLTypeInfo import_ti{ti};
810  if (array.IsNull(row)) {
812  import_ti, coords, bounds, ring_sizes, poly_rings);
813  } else {
814  const bool validate_with_geos_if_available = false;
815  arrow_throw_if<GeoImportException>(
817  geo_string_buffer_->back(),
818  ti,
819  coords,
820  bounds,
821  ring_sizes,
822  poly_rings,
823  validate_with_geos_if_available),
824  error_context(cd, bad_rows_tracker) + "Invalid geometry");
825  arrow_throw_if<GeoImportException>(
826  cd->columnType.get_type() != ti.get_type(),
827  error_context(cd, bad_rows_tracker) + "Geometry type mismatch");
828  }
829  auto col_idx_workpad = col_idx; // what a pitfall!!
831  bad_rows_tracker->importer->getCatalog(),
832  cd,
834  col_idx_workpad,
835  coords,
836  bounds,
837  ring_sizes,
838  poly_rings);
839  } catch (GeoImportException&) {
840  throw;
841  } catch (std::runtime_error& e) {
842  throw GeoImportException(e.what());
843  } catch (const std::exception& e) {
844  throw GeoImportException(e.what());
845  } catch (...) {
846  throw GeoImportException("unknown exception");
847  }
848  };
849  }
850  auto f_mark_a_bad_row = [&](const auto row) {
851  std::unique_lock<std::mutex> lck(bad_rows_tracker->mutex);
852  bad_rows_tracker->rows.insert(row - slice_range.first);
853  };
854  buffer.reserve(slice_range.second - slice_range.first);
855  for (size_t row = slice_range.first; row < slice_range.second; ++row) {
856  try {
857  *data << (array.IsNull(row) ? nullptr : f_value_getter(array, row));
858  f_add_geo_phy_cols(row);
859  } catch (GeoImportException&) {
860  f_mark_a_bad_row(row);
861  } catch (ArrowImporterException&) {
862  // trace bad rows of each column; otherwise rethrow.
863  if (bad_rows_tracker) {
864  *data << nullptr;
865  f_mark_a_bad_row(row);
866  } else {
867  throw;
868  }
869  }
870  }
871  return buffer.size();
872 }
auto value_getter(const arrow::Array &array, const ColumnDescriptor *cd, import_export::BadRowsTracker *const bad_rows_tracker)
static void getNullGeoColumns(SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings)
Definition: Types.cpp:1342
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:391
static void set_geo_physical_import_buffer(const Catalog_Namespace::Catalog &catalog, const ColumnDescriptor *cd, std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers, size_t &col_idx, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool force_null=false)
Definition: Importer.cpp:1636
static bool getGeoColumns(const std::string &wkt_or_wkb_hex, SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool validate_with_geos_if_available)
Definition: Types.cpp:1121
Catalog_Namespace::Catalog & getCatalog()
Definition: Importer.h:847
std::set< int64_t > rows
Definition: Importer.h:78
std::vector< std::unique_ptr< TypedImportBuffer > > * import_buffers
Definition: Importer.h:530
std::string error_context(const ColumnDescriptor *cd, import_export::BadRowsTracker *const bad_rows_tracker)
Definition: ArrowImporter.h:77
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:543

+ Here is the call graph for this function:

template<typename DATA_TYPE >
auto import_export::TypedImportBuffer::del_values ( std::vector< DATA_TYPE > &  buffer,
BadRowsTracker *const  bad_rows_tracker 
)
auto import_export::TypedImportBuffer::del_values ( const SQLTypes  type,
BadRowsTracker *const  bad_rows_tracker 
)
std::vector< DataBlockPtr > import_export::TypedImportBuffer::get_data_block_pointers ( const std::vector< std::unique_ptr< TypedImportBuffer >> &  import_buffers)
static

Definition at line 2937 of file Importer.cpp.

References DataBlockPtr::arraysPtr, threading_serial::async(), CHECK, CHECK_EQ, getStringArrayBuffer(), getTypeInfo(), import_buffers, SQLTypeInfo::is_number(), IS_STRING, SQLTypeInfo::is_string(), kARRAY, kBOOLEAN, kENCODING_DICT, kENCODING_NONE, DataBlockPtr::numbersPtr, run_benchmark_import::result, and DataBlockPtr::stringsPtr.

Referenced by import_export::fill_missing_columns(), import_export::Loader::loadImpl(), and import_export::Loader::loadToShard().

2938  {
2939  std::vector<DataBlockPtr> result(import_buffers.size());
2940  std::vector<std::pair<const size_t, std::future<int8_t*>>>
2941  encoded_data_block_ptrs_futures;
2942  // make all async calls to string dictionary here and then continue execution
2943  for (size_t buf_idx = 0; buf_idx < import_buffers.size(); buf_idx++) {
2944  if (import_buffers[buf_idx]->getTypeInfo().is_string() &&
2945  import_buffers[buf_idx]->getTypeInfo().get_compression() != kENCODING_NONE) {
2946  auto string_payload_ptr = import_buffers[buf_idx]->getStringBuffer();
2947  CHECK_EQ(kENCODING_DICT, import_buffers[buf_idx]->getTypeInfo().get_compression());
2948 
2949  encoded_data_block_ptrs_futures.emplace_back(std::make_pair(
2950  buf_idx,
2951  std::async(std::launch::async, [buf_idx, &import_buffers, string_payload_ptr] {
2952  import_buffers[buf_idx]->addDictEncodedString(*string_payload_ptr);
2953  return import_buffers[buf_idx]->getStringDictBuffer();
2954  })));
2955  }
2956  }
2957 
2958  for (size_t buf_idx = 0; buf_idx < import_buffers.size(); buf_idx++) {
2959  DataBlockPtr p;
2960  if (import_buffers[buf_idx]->getTypeInfo().is_number() ||
2961  import_buffers[buf_idx]->getTypeInfo().is_time() ||
2962  import_buffers[buf_idx]->getTypeInfo().get_type() == kBOOLEAN) {
2963  p.numbersPtr = import_buffers[buf_idx]->getAsBytes();
2964  } else if (import_buffers[buf_idx]->getTypeInfo().is_string()) {
2965  auto string_payload_ptr = import_buffers[buf_idx]->getStringBuffer();
2966  if (import_buffers[buf_idx]->getTypeInfo().get_compression() == kENCODING_NONE) {
2967  p.stringsPtr = string_payload_ptr;
2968  } else {
2969  // This condition means we have column which is ENCODED string. We already made
2970  // Async request to gain the encoded integer values above so we should skip this
2971  // iteration and continue.
2972  continue;
2973  }
2974  } else if (import_buffers[buf_idx]->getTypeInfo().is_geometry()) {
2975  auto geo_payload_ptr = import_buffers[buf_idx]->getGeoStringBuffer();
2976  p.stringsPtr = geo_payload_ptr;
2977  } else {
2978  CHECK(import_buffers[buf_idx]->getTypeInfo().get_type() == kARRAY);
2979  if (IS_STRING(import_buffers[buf_idx]->getTypeInfo().get_subtype())) {
2980  CHECK(import_buffers[buf_idx]->getTypeInfo().get_compression() == kENCODING_DICT);
2981  import_buffers[buf_idx]->addDictEncodedStringArray(
2982  *import_buffers[buf_idx]->getStringArrayBuffer());
2983  p.arraysPtr = import_buffers[buf_idx]->getStringArrayDictBuffer();
2984  } else {
2985  p.arraysPtr = import_buffers[buf_idx]->getArrayBuffer();
2986  }
2987  }
2988  result[buf_idx] = p;
2989  }
2990 
2991  // wait for the async requests we made for string dictionary
2992  for (auto& encoded_ptr_future : encoded_data_block_ptrs_futures) {
2993  result[encoded_ptr_future.first].numbersPtr = encoded_ptr_future.second.get();
2994  }
2995  return result;
2996 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
const SQLTypeInfo & getTypeInfo() const
Definition: Importer.h:323
std::vector< std::string > * stringsPtr
Definition: sqltypes.h:234
std::vector< ArrayDatum > * arraysPtr
Definition: sqltypes.h:235
bool is_number() const
Definition: sqltypes.h:574
future< Result > async(Fn &&fn, Args &&...args)
std::vector< std::unique_ptr< TypedImportBuffer > > * import_buffers
Definition: Importer.h:530
#define IS_STRING(T)
Definition: sqltypes.h:309
#define CHECK(condition)
Definition: Logger.h:291
std::vector< OptionalStringVector > * getStringArrayBuffer() const
Definition: Importer.h:389
bool is_string() const
Definition: sqltypes.h:559
int8_t * numbersPtr
Definition: sqltypes.h:233

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector<ArrayDatum>* import_export::TypedImportBuffer::getArrayBuffer ( ) const
inline

Definition at line 387 of file Importer.h.

References array_buffer_.

387 { return array_buffer_; }
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:544
int8_t* import_export::TypedImportBuffer::getAsBytes ( ) const
inline

Definition at line 329 of file Importer.h.

References bigint_buffer_, bool_buffer_, column_desc_, ColumnDescriptor::columnType, double_buffer_, float_buffer_, SQLTypeInfo::get_type(), int_buffer_, kBIGINT, kBOOLEAN, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kNUMERIC, kSMALLINT, kTIME, kTIMESTAMP, kTINYINT, smallint_buffer_, and tinyint_buffer_.

Referenced by import_export::anonymous_namespace{Importer.cpp}::double_value_at(), import_export::anonymous_namespace{Importer.cpp}::float_value_at(), and import_export::anonymous_namespace{Importer.cpp}::int_value_at().

329  {
330  switch (column_desc_->columnType.get_type()) {
331  case kBOOLEAN:
332  return reinterpret_cast<int8_t*>(bool_buffer_->data());
333  case kTINYINT:
334  return reinterpret_cast<int8_t*>(tinyint_buffer_->data());
335  case kSMALLINT:
336  return reinterpret_cast<int8_t*>(smallint_buffer_->data());
337  case kINT:
338  return reinterpret_cast<int8_t*>(int_buffer_->data());
339  case kBIGINT:
340  case kNUMERIC:
341  case kDECIMAL:
342  return reinterpret_cast<int8_t*>(bigint_buffer_->data());
343  case kFLOAT:
344  return reinterpret_cast<int8_t*>(float_buffer_->data());
345  case kDOUBLE:
346  return reinterpret_cast<int8_t*>(double_buffer_->data());
347  case kDATE:
348  case kTIME:
349  case kTIMESTAMP:
350  return reinterpret_cast<int8_t*>(bigint_buffer_->data());
351  default:
352  abort();
353  }
354  }
Definition: sqltypes.h:76
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:537
std::vector< float > * float_buffer_
Definition: Importer.h:540
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:391
std::vector< double > * double_buffer_
Definition: Importer.h:541
std::vector< int32_t > * int_buffer_
Definition: Importer.h:538
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:539
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:535
Definition: sqltypes.h:80
const ColumnDescriptor * column_desc_
Definition: Importer.h:553
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:536
Definition: sqltypes.h:72
SQLTypeInfo columnType

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const ColumnDescriptor* import_export::TypedImportBuffer::getColumnDesc ( ) const
inline

Definition at line 325 of file Importer.h.

References column_desc_.

Referenced by addDictEncodedString(), foreign_storage::anonymous_namespace{InternalStorageStatsDataWrapper.cpp}::set_null(), foreign_storage::anonymous_namespace{InternalCatalogDataWrapper.cpp}::set_null(), and foreign_storage::anonymous_namespace{InternalMemoryStatsDataWrapper.cpp}::set_null().

325 { return column_desc_; }
const ColumnDescriptor * column_desc_
Definition: Importer.h:553

+ Here is the caller graph for this function:

size_t import_export::TypedImportBuffer::getElementSize ( ) const
inline

Definition at line 356 of file Importer.h.

References column_desc_, ColumnDescriptor::columnType, SQLTypeInfo::get_type(), kBIGINT, kBOOLEAN, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kNUMERIC, kSMALLINT, kTIME, kTIMESTAMP, and kTINYINT.

356  {
357  switch (column_desc_->columnType.get_type()) {
358  case kBOOLEAN:
359  return sizeof((*bool_buffer_)[0]);
360  case kTINYINT:
361  return sizeof((*tinyint_buffer_)[0]);
362  case kSMALLINT:
363  return sizeof((*smallint_buffer_)[0]);
364  case kINT:
365  return sizeof((*int_buffer_)[0]);
366  case kBIGINT:
367  case kNUMERIC:
368  case kDECIMAL:
369  return sizeof((*bigint_buffer_)[0]);
370  case kFLOAT:
371  return sizeof((*float_buffer_)[0]);
372  case kDOUBLE:
373  return sizeof((*double_buffer_)[0]);
374  case kDATE:
375  case kTIME:
376  case kTIMESTAMP:
377  return sizeof((*bigint_buffer_)[0]);
378  default:
379  abort();
380  }
381  }
Definition: sqltypes.h:76
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:391
Definition: sqltypes.h:80
const ColumnDescriptor * column_desc_
Definition: Importer.h:553
Definition: sqltypes.h:72
SQLTypeInfo columnType

+ Here is the call graph for this function:

std::vector<std::string>* import_export::TypedImportBuffer::getGeoStringBuffer ( ) const
inline

Definition at line 385 of file Importer.h.

References geo_string_buffer_.

385 { return geo_string_buffer_; }
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:543
std::vector<OptionalStringVector>* import_export::TypedImportBuffer::getStringArrayBuffer ( ) const
inline

Definition at line 389 of file Importer.h.

References string_array_buffer_.

Referenced by get_data_block_pointers().

389  {
390  return string_array_buffer_;
391  }
std::vector< OptionalStringVector > * string_array_buffer_
Definition: Importer.h:545

+ Here is the caller graph for this function:

std::vector<ArrayDatum>* import_export::TypedImportBuffer::getStringArrayDictBuffer ( ) const
inline

Definition at line 393 of file Importer.h.

References string_array_dict_buffer_.

393  {
395  }
std::vector< ArrayDatum > * string_array_dict_buffer_
Definition: Importer.h:551
std::vector<std::string>* import_export::TypedImportBuffer::getStringBuffer ( ) const
inline

Definition at line 383 of file Importer.h.

References string_buffer_.

383 { return string_buffer_; }
std::vector< std::string > * string_buffer_
Definition: Importer.h:542
int8_t* import_export::TypedImportBuffer::getStringDictBuffer ( ) const
inline

Definition at line 397 of file Importer.h.

References column_desc_, ColumnDescriptor::columnType, SQLTypeInfo::get_size(), string_dict_i16_buffer_, string_dict_i32_buffer_, and string_dict_i8_buffer_.

Referenced by import_export::anonymous_namespace{Importer.cpp}::int_value_at().

397  {
398  switch (column_desc_->columnType.get_size()) {
399  case 1:
400  return reinterpret_cast<int8_t*>(string_dict_i8_buffer_->data());
401  case 2:
402  return reinterpret_cast<int8_t*>(string_dict_i16_buffer_->data());
403  case 4:
404  return reinterpret_cast<int8_t*>(string_dict_i32_buffer_->data());
405  default:
406  abort();
407  }
408  }
HOST DEVICE int get_size() const
Definition: sqltypes.h:403
std::vector< uint8_t > * string_dict_i8_buffer_
Definition: Importer.h:548
std::vector< int32_t > * string_dict_i32_buffer_
Definition: Importer.h:550
const ColumnDescriptor * column_desc_
Definition: Importer.h:553
std::vector< uint16_t > * string_dict_i16_buffer_
Definition: Importer.h:549
SQLTypeInfo columnType

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

StringDictionary* import_export::TypedImportBuffer::getStringDictionary ( ) const
inline

Definition at line 327 of file Importer.h.

References string_dict_.

327 { return string_dict_; }
StringDictionary * string_dict_
Definition: Importer.h:554
const SQLTypeInfo& import_export::TypedImportBuffer::getTypeInfo ( ) const
inline

Definition at line 323 of file Importer.h.

References column_desc_, and ColumnDescriptor::columnType.

Referenced by import_export::anonymous_namespace{Importer.cpp}::double_value_at(), import_export::anonymous_namespace{Importer.cpp}::float_value_at(), get_data_block_pointers(), and import_export::anonymous_namespace{Importer.cpp}::int_value_at().

323 { return column_desc_->columnType; }
const ColumnDescriptor * column_desc_
Definition: Importer.h:553
SQLTypeInfo columnType

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::pop_value ( )

Definition at line 727 of file Importer.cpp.

References array_buffer_, bigint_buffer_, bool_buffer_, CHECK, column_desc_, ColumnDescriptor::columnType, decimal_to_int_type(), double_buffer_, float_buffer_, geo_string_buffer_, SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), int_buffer_, SQLTypeInfo::is_decimal(), IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTILINESTRING, kMULTIPOINT, kMULTIPOLYGON, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, smallint_buffer_, string_array_buffer_, string_buffer_, tinyint_buffer_, and run_benchmark_import::type.

727  {
728  const auto type = column_desc_->columnType.is_decimal()
730  : column_desc_->columnType.get_type();
731  switch (type) {
732  case kBOOLEAN:
733  bool_buffer_->pop_back();
734  break;
735  case kTINYINT:
736  tinyint_buffer_->pop_back();
737  break;
738  case kSMALLINT:
739  smallint_buffer_->pop_back();
740  break;
741  case kINT:
742  int_buffer_->pop_back();
743  break;
744  case kBIGINT:
745  bigint_buffer_->pop_back();
746  break;
747  case kFLOAT:
748  float_buffer_->pop_back();
749  break;
750  case kDOUBLE:
751  double_buffer_->pop_back();
752  break;
753  case kTEXT:
754  case kVARCHAR:
755  case kCHAR:
756  string_buffer_->pop_back();
757  break;
758  case kDATE:
759  case kTIME:
760  case kTIMESTAMP:
761  bigint_buffer_->pop_back();
762  break;
763  case kARRAY:
765  string_array_buffer_->pop_back();
766  } else {
767  array_buffer_->pop_back();
768  }
769  break;
770  case kPOINT:
771  case kMULTIPOINT:
772  case kLINESTRING:
773  case kMULTILINESTRING:
774  case kPOLYGON:
775  case kMULTIPOLYGON:
776  geo_string_buffer_->pop_back();
777  break;
778  default:
779  CHECK(false) << "TypedImportBuffer::pop_value() does not support type " << type;
780  }
781 }
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:392
Definition: sqltypes.h:76
std::vector< std::string > * string_buffer_
Definition: Importer.h:542
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:544
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:537
std::vector< float > * float_buffer_
Definition: Importer.h:540
std::vector< double > * double_buffer_
Definition: Importer.h:541
std::vector< int32_t > * int_buffer_
Definition: Importer.h:538
std::vector< OptionalStringVector > * string_array_buffer_
Definition: Importer.h:545
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:539
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:535
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:561
Definition: sqltypes.h:79
Definition: sqltypes.h:80
Definition: sqltypes.h:68
#define IS_STRING(T)
Definition: sqltypes.h:309
const ColumnDescriptor * column_desc_
Definition: Importer.h:553
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:536
#define CHECK(condition)
Definition: Logger.h:291
Definition: sqltypes.h:72
SQLTypeInfo columnType
bool is_decimal() const
Definition: sqltypes.h:568
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:543

+ Here is the call graph for this function:

bool import_export::TypedImportBuffer::stringDictCheckpoint ( )
inline

Definition at line 410 of file Importer.h.

References StringDictionary::checkpoint(), and string_dict_.

410  {
411  if (string_dict_ == nullptr) {
412  return true;
413  }
414  return string_dict_->checkpoint();
415  }
StringDictionary * string_dict_
Definition: Importer.h:554
bool checkpoint() noexcept

+ Here is the call graph for this function:

Member Data Documentation

union { ... }
union { ... }
std::vector<ArrayDatum>* import_export::TypedImportBuffer::array_buffer_
std::vector<int64_t>* import_export::TypedImportBuffer::bigint_buffer_
std::vector<int8_t>* import_export::TypedImportBuffer::bool_buffer_
size_t import_export::TypedImportBuffer::col_idx

Definition at line 531 of file Importer.h.

Referenced by convert_arrow_val_to_import_buffer().

const ColumnDescriptor* import_export::TypedImportBuffer::column_desc_
private
std::vector<double>* import_export::TypedImportBuffer::double_buffer_
std::vector<float>* import_export::TypedImportBuffer::float_buffer_
std::vector<std::string>* import_export::TypedImportBuffer::geo_string_buffer_
std::vector<std::unique_ptr<TypedImportBuffer> >* import_export::TypedImportBuffer::import_buffers

Definition at line 530 of file Importer.h.

Referenced by convert_arrow_val_to_import_buffer(), and get_data_block_pointers().

std::vector<int32_t>* import_export::TypedImportBuffer::int_buffer_
std::vector<int16_t>* import_export::TypedImportBuffer::smallint_buffer_
std::vector<OptionalStringVector>* import_export::TypedImportBuffer::string_array_buffer_
std::vector<ArrayDatum>* import_export::TypedImportBuffer::string_array_dict_buffer_
std::vector<std::string>* import_export::TypedImportBuffer::string_buffer_
StringDictionary* import_export::TypedImportBuffer::string_dict_
private
std::vector<uint16_t>* import_export::TypedImportBuffer::string_dict_i16_buffer_
std::vector<int32_t>* import_export::TypedImportBuffer::string_dict_i32_buffer_
std::vector<uint8_t>* import_export::TypedImportBuffer::string_dict_i8_buffer_
std::vector<int8_t>* import_export::TypedImportBuffer::tinyint_buffer_

The documentation for this class was generated from the following files: