OmniSciDB  c0231cc57d
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
import_export::TypedImportBuffer Class Reference

#include <Importer.h>

+ Inheritance diagram for import_export::TypedImportBuffer:
+ Collaboration diagram for import_export::TypedImportBuffer:

Public Types

using OptionalStringVector = std::optional< std::vector< std::string >>
 

Public Member Functions

 TypedImportBuffer (const ColumnDescriptor *col_desc, StringDictionary *string_dict)
 
 ~TypedImportBuffer ()
 
void addBoolean (const int8_t v)
 
void addTinyint (const int8_t v)
 
void addSmallint (const int16_t v)
 
void addInt (const int32_t v)
 
void addBigint (const int64_t v)
 
void addFloat (const float v)
 
void addDouble (const double v)
 
void addString (const std::string_view v)
 
void addGeoString (const std::string_view v)
 
void addArray (const ArrayDatum &v)
 
OptionalStringVectoraddStringArray ()
 
void addStringArray (const OptionalStringVector &arr)
 
void addDictEncodedString (const std::vector< std::string > &string_vec)
 
void addDictEncodedStringArray (const std::vector< OptionalStringVector > &string_array_vec)
 
const SQLTypeInfogetTypeInfo () const
 
const ColumnDescriptorgetColumnDesc () const
 
StringDictionarygetStringDictionary () const
 
int8_t * getAsBytes () const
 
size_t getElementSize () const
 
std::vector< std::string > * getStringBuffer () const
 
std::vector< std::string > * getGeoStringBuffer () const
 
std::vector< ArrayDatum > * getArrayBuffer () const
 
std::vector
< OptionalStringVector > * 
getStringArrayBuffer () const
 
std::vector< ArrayDatum > * getStringArrayDictBuffer () const
 
int8_t * getStringDictBuffer () const
 
bool stringDictCheckpoint ()
 
void clear ()
 
size_t add_values (const ColumnDescriptor *cd, const TColumn &data)
 
size_t add_arrow_values (const ColumnDescriptor *cd, const arrow::Array &data, const bool exact_type_match, const ArraySliceRange &slice_range, BadRowsTracker *bad_rows_tracker)
 
void add_value (const ColumnDescriptor *cd, const std::string_view val, const bool is_null, const CopyParams &copy_params, const bool check_not_null=true)
 
void add_value (const ColumnDescriptor *cd, const TDatum &val, const bool is_null)
 
void addDefaultValues (const ColumnDescriptor *cd, size_t num_rows)
 
void pop_value ()
 
template<typename DATA_TYPE >
size_t convert_arrow_val_to_import_buffer (const ColumnDescriptor *cd, const arrow::Array &array, std::vector< DATA_TYPE > &buffer, const ArraySliceRange &slice_range, BadRowsTracker *const bad_rows_tracker)
 
template<typename DATA_TYPE >
auto del_values (std::vector< DATA_TYPE > &buffer, BadRowsTracker *const bad_rows_tracker)
 
auto del_values (const SQLTypes type, BadRowsTracker *const bad_rows_tracker)
 
template<typename DATA_TYPE >
size_t convert_arrow_val_to_import_buffer (const ColumnDescriptor *cd, const Array &array, std::vector< DATA_TYPE > &buffer, const ArraySliceRange &slice_range, import_export::BadRowsTracker *const bad_rows_tracker)
 

Static Public Member Functions

static std::vector< DataBlockPtrget_data_block_pointers (const std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers)
 

Public Attributes

std::vector< std::unique_ptr
< TypedImportBuffer > > * 
import_buffers
 
size_t col_idx
 
std::vector< int8_t > * bool_buffer_
 
std::vector< int8_t > * tinyint_buffer_
 
std::vector< int16_t > * smallint_buffer_
 
std::vector< int32_t > * int_buffer_
 
std::vector< int64_t > * bigint_buffer_
 
std::vector< float > * float_buffer_
 
std::vector< double > * double_buffer_
 
std::vector< std::string > * string_buffer_
 
std::vector< std::string > * geo_string_buffer_
 
std::vector< ArrayDatum > * array_buffer_
 
std::vector
< OptionalStringVector > * 
string_array_buffer_
 
std::vector< uint8_t > * string_dict_i8_buffer_
 
std::vector< uint16_t > * string_dict_i16_buffer_
 
std::vector< int32_t > * string_dict_i32_buffer_
 
std::vector< ArrayDatum > * string_array_dict_buffer_
 

Private Attributes

union {
   std::vector< int8_t > *   bool_buffer_
 
   std::vector< int8_t > *   tinyint_buffer_
 
   std::vector< int16_t > *   smallint_buffer_
 
   std::vector< int32_t > *   int_buffer_
 
   std::vector< int64_t > *   bigint_buffer_
 
   std::vector< float > *   float_buffer_
 
   std::vector< double > *   double_buffer_
 
   std::vector< std::string > *   string_buffer_
 
   std::vector< std::string > *   geo_string_buffer_
 
   std::vector< ArrayDatum > *   array_buffer_
 
   std::vector
< OptionalStringVector > *   string_array_buffer_
 
}; 
 
union {
   std::vector< uint8_t > *   string_dict_i8_buffer_
 
   std::vector< uint16_t > *   string_dict_i16_buffer_
 
   std::vector< int32_t > *   string_dict_i32_buffer_
 
   std::vector< ArrayDatum > *   string_array_dict_buffer_
 
}; 
 
const ColumnDescriptorcolumn_desc_
 
StringDictionarystring_dict_
 

Detailed Description

Definition at line 93 of file Importer.h.

Member Typedef Documentation

using import_export::TypedImportBuffer::OptionalStringVector = std::optional<std::vector<std::string>>

Definition at line 95 of file Importer.h.

Constructor & Destructor Documentation

import_export::TypedImportBuffer::TypedImportBuffer ( const ColumnDescriptor col_desc,
StringDictionary string_dict 
)
inline

Definition at line 96 of file Importer.h.

References array_buffer_, bigint_buffer_, bool_buffer_, CHECK, ColumnDescriptor::columnType, double_buffer_, float_buffer_, geo_string_buffer_, SQLTypeInfo::get_compression(), SQLTypeInfo::get_size(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), int_buffer_, IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kENCODING_DICT, kFLOAT, kINT, kLINESTRING, kMULTILINESTRING, kMULTIPOINT, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, smallint_buffer_, string_array_buffer_, string_array_dict_buffer_, string_buffer_, string_dict_i16_buffer_, string_dict_i32_buffer_, string_dict_i8_buffer_, and tinyint_buffer_.

97  : column_desc_(col_desc), string_dict_(string_dict) {
98  switch (col_desc->columnType.get_type()) {
99  case kBOOLEAN:
100  bool_buffer_ = new std::vector<int8_t>();
101  break;
102  case kTINYINT:
103  tinyint_buffer_ = new std::vector<int8_t>();
104  break;
105  case kSMALLINT:
106  smallint_buffer_ = new std::vector<int16_t>();
107  break;
108  case kINT:
109  int_buffer_ = new std::vector<int32_t>();
110  break;
111  case kBIGINT:
112  case kNUMERIC:
113  case kDECIMAL:
114  bigint_buffer_ = new std::vector<int64_t>();
115  break;
116  case kFLOAT:
117  float_buffer_ = new std::vector<float>();
118  break;
119  case kDOUBLE:
120  double_buffer_ = new std::vector<double>();
121  break;
122  case kTEXT:
123  case kVARCHAR:
124  case kCHAR:
125  string_buffer_ = new std::vector<std::string>();
126  if (col_desc->columnType.get_compression() == kENCODING_DICT) {
127  switch (col_desc->columnType.get_size()) {
128  case 1:
129  string_dict_i8_buffer_ = new std::vector<uint8_t>();
130  break;
131  case 2:
132  string_dict_i16_buffer_ = new std::vector<uint16_t>();
133  break;
134  case 4:
135  string_dict_i32_buffer_ = new std::vector<int32_t>();
136  break;
137  default:
138  CHECK(false);
139  }
140  }
141  break;
142  case kDATE:
143  case kTIME:
144  case kTIMESTAMP:
145  bigint_buffer_ = new std::vector<int64_t>();
146  break;
147  case kARRAY:
148  if (IS_STRING(col_desc->columnType.get_subtype())) {
150  string_array_buffer_ = new std::vector<OptionalStringVector>();
151  string_array_dict_buffer_ = new std::vector<ArrayDatum>();
152  } else {
153  array_buffer_ = new std::vector<ArrayDatum>();
154  }
155  break;
156  case kPOINT:
157  case kMULTIPOINT:
158  case kLINESTRING:
159  case kMULTILINESTRING:
160  case kPOLYGON:
161  case kMULTIPOLYGON:
162  geo_string_buffer_ = new std::vector<std::string>();
163  break;
164  default:
165  CHECK(false);
166  }
167  }
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:405
HOST DEVICE int get_size() const
Definition: sqltypes.h:414
Definition: sqltypes.h:63
std::vector< std::string > * string_buffer_
Definition: Importer.h:536
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:538
StringDictionary * string_dict_
Definition: Importer.h:548
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:531
std::vector< float > * float_buffer_
Definition: Importer.h:534
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:404
std::vector< double > * double_buffer_
Definition: Importer.h:535
std::vector< int32_t > * int_buffer_
Definition: Importer.h:532
std::vector< ArrayDatum > * string_array_dict_buffer_
Definition: Importer.h:545
std::vector< uint8_t > * string_dict_i8_buffer_
Definition: Importer.h:542
std::vector< OptionalStringVector > * string_array_buffer_
Definition: Importer.h:539
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:533
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:529
Definition: sqltypes.h:66
Definition: sqltypes.h:67
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:412
std::vector< int32_t > * string_dict_i32_buffer_
Definition: Importer.h:544
Definition: sqltypes.h:55
#define IS_STRING(T)
Definition: sqltypes.h:322
const ColumnDescriptor * column_desc_
Definition: Importer.h:547
std::vector< uint16_t > * string_dict_i16_buffer_
Definition: Importer.h:543
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:530
#define CHECK(condition)
Definition: Logger.h:222
Definition: sqltypes.h:59
SQLTypeInfo columnType
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:537

+ Here is the call graph for this function:

import_export::TypedImportBuffer::~TypedImportBuffer ( )
inline

Definition at line 169 of file Importer.h.

References array_buffer_, bigint_buffer_, bool_buffer_, CHECK, column_desc_, ColumnDescriptor::columnType, double_buffer_, float_buffer_, geo_string_buffer_, SQLTypeInfo::get_compression(), SQLTypeInfo::get_size(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), int_buffer_, IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kENCODING_DICT, kFLOAT, kINT, kLINESTRING, kMULTILINESTRING, kMULTIPOINT, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, smallint_buffer_, string_array_buffer_, string_array_dict_buffer_, string_buffer_, string_dict_i16_buffer_, string_dict_i32_buffer_, string_dict_i8_buffer_, and tinyint_buffer_.

169  {
170  switch (column_desc_->columnType.get_type()) {
171  case kBOOLEAN:
172  delete bool_buffer_;
173  break;
174  case kTINYINT:
175  delete tinyint_buffer_;
176  break;
177  case kSMALLINT:
178  delete smallint_buffer_;
179  break;
180  case kINT:
181  delete int_buffer_;
182  break;
183  case kBIGINT:
184  case kNUMERIC:
185  case kDECIMAL:
186  delete bigint_buffer_;
187  break;
188  case kFLOAT:
189  delete float_buffer_;
190  break;
191  case kDOUBLE:
192  delete double_buffer_;
193  break;
194  case kTEXT:
195  case kVARCHAR:
196  case kCHAR:
197  delete string_buffer_;
199  switch (column_desc_->columnType.get_size()) {
200  case 1:
201  delete string_dict_i8_buffer_;
202  break;
203  case 2:
205  break;
206  case 4:
208  break;
209  }
210  }
211  break;
212  case kDATE:
213  case kTIME:
214  case kTIMESTAMP:
215  delete bigint_buffer_;
216  break;
217  case kARRAY:
219  delete string_array_buffer_;
221  } else {
222  delete array_buffer_;
223  }
224  break;
225  case kPOINT:
226  case kMULTIPOINT:
227  case kLINESTRING:
228  case kMULTILINESTRING:
229  case kPOLYGON:
230  case kMULTIPOLYGON:
231  delete geo_string_buffer_;
232  break;
233  default:
234  CHECK(false);
235  }
236  }
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:405
HOST DEVICE int get_size() const
Definition: sqltypes.h:414
Definition: sqltypes.h:63
std::vector< std::string > * string_buffer_
Definition: Importer.h:536
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:538
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:531
std::vector< float > * float_buffer_
Definition: Importer.h:534
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:404
std::vector< double > * double_buffer_
Definition: Importer.h:535
std::vector< int32_t > * int_buffer_
Definition: Importer.h:532
std::vector< ArrayDatum > * string_array_dict_buffer_
Definition: Importer.h:545
std::vector< uint8_t > * string_dict_i8_buffer_
Definition: Importer.h:542
std::vector< OptionalStringVector > * string_array_buffer_
Definition: Importer.h:539
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:533
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:529
Definition: sqltypes.h:66
Definition: sqltypes.h:67
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:412
std::vector< int32_t > * string_dict_i32_buffer_
Definition: Importer.h:544
Definition: sqltypes.h:55
#define IS_STRING(T)
Definition: sqltypes.h:322
const ColumnDescriptor * column_desc_
Definition: Importer.h:547
std::vector< uint16_t > * string_dict_i16_buffer_
Definition: Importer.h:543
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:530
#define CHECK(condition)
Definition: Logger.h:222
Definition: sqltypes.h:59
SQLTypeInfo columnType
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:537

+ Here is the call graph for this function:

Member Function Documentation

size_t import_export::TypedImportBuffer::add_arrow_values ( const ColumnDescriptor cd,
const arrow::Array &  data,
const bool  exact_type_match,
const ArraySliceRange slice_range,
BadRowsTracker bad_rows_tracker 
)

Definition at line 912 of file Importer.cpp.

References arrow_throw_if(), bigint_buffer_, bool_buffer_, ColumnDescriptor::columnName, ColumnDescriptor::columnType, convert_arrow_val_to_import_buffer(), double_buffer_, float_buffer_, geo_string_buffer_, SQLTypeInfo::get_notnull(), SQLTypeInfo::get_type(), int_buffer_, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTILINESTRING, kMULTIPOINT, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, smallint_buffer_, heavydb.dtypes::STRING, string_buffer_, tinyint_buffer_, and run_benchmark_import::type.

916  {
917  const auto type = cd->columnType.get_type();
918  if (cd->columnType.get_notnull()) {
919  // We can't have any null values for this column; to have them is an error
920  arrow_throw_if(col.null_count() > 0, "NULL not allowed for column " + cd->columnName);
921  }
922 
923  switch (type) {
924  case kBOOLEAN:
925  if (exact_type_match) {
926  arrow_throw_if(col.type_id() != Type::BOOL, "Expected boolean type");
927  }
929  cd, col, *bool_buffer_, slice_range, bad_rows_tracker);
930  case kTINYINT:
931  if (exact_type_match) {
932  arrow_throw_if(col.type_id() != Type::INT8, "Expected int8 type");
933  }
935  cd, col, *tinyint_buffer_, slice_range, bad_rows_tracker);
936  case kSMALLINT:
937  if (exact_type_match) {
938  arrow_throw_if(col.type_id() != Type::INT16, "Expected int16 type");
939  }
941  cd, col, *smallint_buffer_, slice_range, bad_rows_tracker);
942  case kINT:
943  if (exact_type_match) {
944  arrow_throw_if(col.type_id() != Type::INT32, "Expected int32 type");
945  }
947  cd, col, *int_buffer_, slice_range, bad_rows_tracker);
948  case kBIGINT:
949  case kNUMERIC:
950  case kDECIMAL:
951  if (exact_type_match) {
952  arrow_throw_if(col.type_id() != Type::INT64, "Expected int64 type");
953  }
955  cd, col, *bigint_buffer_, slice_range, bad_rows_tracker);
956  case kFLOAT:
957  if (exact_type_match) {
958  arrow_throw_if(col.type_id() != Type::FLOAT, "Expected float type");
959  }
961  cd, col, *float_buffer_, slice_range, bad_rows_tracker);
962  case kDOUBLE:
963  if (exact_type_match) {
964  arrow_throw_if(col.type_id() != Type::DOUBLE, "Expected double type");
965  }
967  cd, col, *double_buffer_, slice_range, bad_rows_tracker);
968  case kTEXT:
969  case kVARCHAR:
970  case kCHAR:
971  if (exact_type_match) {
972  arrow_throw_if(col.type_id() != Type::BINARY && col.type_id() != Type::STRING,
973  "Expected string type");
974  }
976  cd, col, *string_buffer_, slice_range, bad_rows_tracker);
977  case kTIME:
978  if (exact_type_match) {
979  arrow_throw_if(col.type_id() != Type::TIME32 && col.type_id() != Type::TIME64,
980  "Expected time32 or time64 type");
981  }
983  cd, col, *bigint_buffer_, slice_range, bad_rows_tracker);
984  case kTIMESTAMP:
985  if (exact_type_match) {
986  arrow_throw_if(col.type_id() != Type::TIMESTAMP, "Expected timestamp type");
987  }
989  cd, col, *bigint_buffer_, slice_range, bad_rows_tracker);
990  case kDATE:
991  if (exact_type_match) {
992  arrow_throw_if(col.type_id() != Type::DATE32 && col.type_id() != Type::DATE64,
993  "Expected date32 or date64 type");
994  }
996  cd, col, *bigint_buffer_, slice_range, bad_rows_tracker);
997  case kPOINT:
998  case kMULTIPOINT:
999  case kLINESTRING:
1000  case kMULTILINESTRING:
1001  case kPOLYGON:
1002  case kMULTIPOLYGON:
1003  arrow_throw_if(col.type_id() != Type::BINARY && col.type_id() != Type::STRING,
1004  "Expected string type");
1006  cd, col, *geo_string_buffer_, slice_range, bad_rows_tracker);
1007  case kARRAY:
1008  throw std::runtime_error("Arrow array appends not yet supported");
1009  default:
1010  throw std::runtime_error("Invalid Type");
1011  }
1012 }
Definition: sqltypes.h:63
std::vector< std::string > * string_buffer_
Definition: Importer.h:536
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:531
std::vector< float > * float_buffer_
Definition: Importer.h:534
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:404
tuple STRING
Definition: dtypes.py:31
std::vector< double > * double_buffer_
Definition: Importer.h:535
std::vector< int32_t > * int_buffer_
Definition: Importer.h:532
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:533
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:529
Definition: sqltypes.h:66
Definition: sqltypes.h:67
Definition: sqltypes.h:55
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:530
size_t convert_arrow_val_to_import_buffer(const ColumnDescriptor *cd, const arrow::Array &array, std::vector< DATA_TYPE > &buffer, const ArraySliceRange &slice_range, BadRowsTracker *const bad_rows_tracker)
Definition: sqltypes.h:59
SQLTypeInfo columnType
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:411
std::string columnName
void arrow_throw_if(const bool cond, const std::string &message)
Definition: ArrowImporter.h:42
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:537

+ Here is the call graph for this function:

void import_export::TypedImportBuffer::add_value ( const ColumnDescriptor cd,
const std::string_view  val,
const bool  is_null,
const CopyParams copy_params,
const bool  check_not_null = true 
)

Definition at line 567 of file Importer.cpp.

References addArray(), addBigint(), addBoolean(), addDouble(), addFloat(), addGeoString(), addInt(), addSmallint(), addString(), addStringArray(), addTinyint(), Datum::bigintval, Datum::boolval, CHECK, ColumnDescriptor::columnName, ColumnDescriptor::columnType, SQLTypeInfo::get_elem_type(), SQLTypeInfo::get_notnull(), SQLTypeInfo::get_size(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), inline_fixed_encoding_null_val(), Datum::intval, IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTILINESTRING, kMULTIPOINT, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, StringDictionary::MAX_STRLEN, NULL_DOUBLE, NULL_FLOAT, import_export::NullArray(), import_export::delimited_parser::parse_string_array(), Datum::smallintval, import_export::StringToArray(), StringToDatum(), Datum::tinyintval, to_string(), run_benchmark_import::type, and DecimalOverflowValidator::validate().

Referenced by foreign_storage::anonymous_namespace{InternalStorageStatsDataWrapper.cpp}::set_null(), foreign_storage::anonymous_namespace{InternalCatalogDataWrapper.cpp}::set_null(), and foreign_storage::anonymous_namespace{InternalMemoryStatsDataWrapper.cpp}::set_null().

571  {
572  const auto type = cd->columnType.get_type();
573  switch (type) {
574  case kBOOLEAN: {
575  if (is_null) {
576  if (check_not_null && cd->columnType.get_notnull()) {
577  throw std::runtime_error("NULL for column " + cd->columnName);
578  }
580  } else {
581  auto ti = cd->columnType;
582  Datum d = StringToDatum(val, ti);
583  addBoolean(static_cast<int8_t>(d.boolval));
584  }
585  break;
586  }
587  case kTINYINT: {
588  if (!is_null && (isdigit(val[0]) || val[0] == '-')) {
589  auto ti = cd->columnType;
590  Datum d = StringToDatum(val, ti);
592  } else {
593  if (check_not_null && cd->columnType.get_notnull()) {
594  throw std::runtime_error("NULL for column " + cd->columnName);
595  }
597  }
598  break;
599  }
600  case kSMALLINT: {
601  if (!is_null && (isdigit(val[0]) || val[0] == '-')) {
602  auto ti = cd->columnType;
603  Datum d = StringToDatum(val, ti);
605  } else {
606  if (check_not_null && cd->columnType.get_notnull()) {
607  throw std::runtime_error("NULL for column " + cd->columnName);
608  }
610  }
611  break;
612  }
613  case kINT: {
614  if (!is_null && (isdigit(val[0]) || val[0] == '-')) {
615  auto ti = cd->columnType;
616  Datum d = StringToDatum(val, ti);
617  addInt(d.intval);
618  } else {
619  if (check_not_null && cd->columnType.get_notnull()) {
620  throw std::runtime_error("NULL for column " + cd->columnName);
621  }
623  }
624  break;
625  }
626  case kBIGINT: {
627  if (!is_null && (isdigit(val[0]) || val[0] == '-')) {
628  auto ti = cd->columnType;
629  Datum d = StringToDatum(val, ti);
630  addBigint(d.bigintval);
631  } else {
632  if (check_not_null && cd->columnType.get_notnull()) {
633  throw std::runtime_error("NULL for column " + cd->columnName);
634  }
636  }
637  break;
638  }
639  case kDECIMAL:
640  case kNUMERIC: {
641  if (!is_null) {
642  auto ti = cd->columnType;
643  Datum d = StringToDatum(val, ti);
644  DecimalOverflowValidator validator(ti);
645  validator.validate(d.bigintval);
646  addBigint(d.bigintval);
647  } else {
648  if (check_not_null && cd->columnType.get_notnull()) {
649  throw std::runtime_error("NULL for column " + cd->columnName);
650  }
652  }
653  break;
654  }
655  case kFLOAT:
656  if (!is_null && (val[0] == '.' || isdigit(val[0]) || val[0] == '-')) {
657  addFloat(static_cast<float>(std::atof(std::string(val).c_str())));
658  } else {
659  if (check_not_null && cd->columnType.get_notnull()) {
660  throw std::runtime_error("NULL for column " + cd->columnName);
661  }
663  }
664  break;
665  case kDOUBLE:
666  if (!is_null && (val[0] == '.' || isdigit(val[0]) || val[0] == '-')) {
667  addDouble(std::atof(std::string(val).c_str()));
668  } else {
669  if (check_not_null && cd->columnType.get_notnull()) {
670  throw std::runtime_error("NULL for column " + cd->columnName);
671  }
673  }
674  break;
675  case kTEXT:
676  case kVARCHAR:
677  case kCHAR: {
678  // @TODO(wei) for now, use empty string for nulls
679  if (is_null) {
680  if (check_not_null && cd->columnType.get_notnull()) {
681  throw std::runtime_error("NULL for column " + cd->columnName);
682  }
683  addString(std::string());
684  } else {
685  if (val.length() > StringDictionary::MAX_STRLEN) {
686  throw std::runtime_error("String too long for column " + cd->columnName +
687  " was " + std::to_string(val.length()) + " max is " +
689  }
690  addString(val);
691  }
692  break;
693  }
694  case kTIME:
695  case kTIMESTAMP:
696  case kDATE:
697  if (!is_null && (isdigit(val[0]) || val[0] == '-')) {
698  SQLTypeInfo ti = cd->columnType;
699  Datum d = StringToDatum(val, ti);
700  addBigint(d.bigintval);
701  } else {
702  if (check_not_null && cd->columnType.get_notnull()) {
703  throw std::runtime_error("NULL for column " + cd->columnName);
704  }
706  }
707  break;
708  case kARRAY: {
709  if (check_not_null && is_null && cd->columnType.get_notnull()) {
710  throw std::runtime_error("NULL for column " + cd->columnName);
711  }
712  SQLTypeInfo ti = cd->columnType;
713  if (IS_STRING(ti.get_subtype())) {
714  std::vector<std::string> string_vec;
715  // Just parse string array, don't push it to buffer yet as we might throw
717  std::string(val), copy_params, string_vec);
718  if (!is_null) {
719  if (ti.get_size() > 0) {
720  auto sti = ti.get_elem_type();
721  size_t expected_size = ti.get_size() / sti.get_size();
722  size_t actual_size = string_vec.size();
723  if (actual_size != expected_size) {
724  throw std::runtime_error("Fixed length array column " + cd->columnName +
725  " expects " + std::to_string(expected_size) +
726  " values, received " +
727  std::to_string(actual_size));
728  }
729  }
730  addStringArray(string_vec);
731  } else {
732  addStringArray(std::nullopt);
733  }
734  } else {
735  if (!is_null) {
736  ArrayDatum d = StringToArray(std::string(val), ti, copy_params);
737  if (d.is_null) { // val could be "NULL"
738  addArray(NullArray(ti));
739  } else {
740  if (ti.get_size() > 0 && static_cast<size_t>(ti.get_size()) != d.length) {
741  throw std::runtime_error("Fixed length array for column " + cd->columnName +
742  " has incorrect length: " + std::string(val));
743  }
744  addArray(d);
745  }
746  } else {
747  addArray(NullArray(ti));
748  }
749  }
750  break;
751  }
752  case kPOINT:
753  case kMULTIPOINT:
754  case kLINESTRING:
755  case kMULTILINESTRING:
756  case kPOLYGON:
757  case kMULTIPOLYGON:
758  addGeoString(val);
759  break;
760  default:
761  CHECK(false) << "TypedImportBuffer::add_value() does not support type " << type;
762  }
763 }
int8_t tinyintval
Definition: sqltypes.h:232
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:405
#define NULL_DOUBLE
HOST DEVICE int get_size() const
Definition: sqltypes.h:414
void addBigint(const int64_t v)
Definition: Importer.h:246
OptionalStringVector & addStringArray()
Definition: Importer.h:258
void addSmallint(const int16_t v)
Definition: Importer.h:242
Definition: sqltypes.h:63
#define NULL_FLOAT
void addString(const std::string_view v)
Definition: Importer.h:252
void addDouble(const double v)
Definition: Importer.h:250
ArrayDatum NullArray(const SQLTypeInfo &ti)
Definition: Importer.cpp:409
int8_t boolval
Definition: sqltypes.h:231
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:404
int32_t intval
Definition: sqltypes.h:234
std::string to_string(char const *&&v)
void addFloat(const float v)
Definition: Importer.h:248
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:228
void addGeoString(const std::string_view v)
Definition: Importer.h:254
CONSTEXPR DEVICE bool is_null(const T &value)
void addBoolean(const int8_t v)
Definition: Importer.h:238
void addTinyint(const int8_t v)
Definition: Importer.h:240
int64_t bigintval
Definition: sqltypes.h:235
void addInt(const int32_t v)
Definition: Importer.h:244
int16_t smallintval
Definition: sqltypes.h:233
void parse_string_array(const std::string &s, const import_export::CopyParams &copy_params, std::vector< std::string > &string_vec, bool truncate_values)
Parses given string array and inserts into given vector of strings.
Datum StringToDatum(std::string_view s, SQLTypeInfo &ti)
Definition: Datum.cpp:277
Definition: sqltypes.h:66
Definition: sqltypes.h:67
ArrayDatum StringToArray(const std::string &s, const SQLTypeInfo &ti, const CopyParams &copy_params)
Definition: Importer.cpp:354
void addArray(const ArrayDatum &v)
Definition: Importer.h:256
Definition: sqltypes.h:55
#define IS_STRING(T)
Definition: sqltypes.h:322
#define CHECK(condition)
Definition: Logger.h:222
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
static constexpr size_t MAX_STRLEN
Definition: sqltypes.h:59
SQLTypeInfo columnType
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:411
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:981
std::string columnName

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::add_value ( const ColumnDescriptor cd,
const TDatum &  val,
const bool  is_null 
)

Definition at line 1346 of file Importer.cpp.

References addArray(), addBigint(), import_export::addBinaryStringArray(), addBoolean(), addDouble(), addFloat(), addGeoString(), addInt(), addSmallint(), addString(), addStringArray(), addTinyint(), CHECK, ColumnDescriptor::columnName, ColumnDescriptor::columnType, decimal_to_int_type(), SQLTypeInfo::get_notnull(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), inline_fixed_encoding_null_val(), SQLTypeInfo::is_decimal(), IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTILINESTRING, kMULTIPOINT, kMULTIPOLYGON, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, NULL_DOUBLE, NULL_FLOAT, import_export::NullArray(), import_export::TDatumToArrayDatum(), and run_benchmark_import::type.

1348  {
1349  const auto type = cd->columnType.is_decimal() ? decimal_to_int_type(cd->columnType)
1350  : cd->columnType.get_type();
1351  switch (type) {
1352  case kBOOLEAN: {
1353  if (is_null) {
1354  if (cd->columnType.get_notnull()) {
1355  throw std::runtime_error("NULL for column " + cd->columnName);
1356  }
1358  } else {
1359  addBoolean((int8_t)datum.val.int_val);
1360  }
1361  break;
1362  }
1363  case kTINYINT:
1364  if (!is_null) {
1365  addTinyint((int8_t)datum.val.int_val);
1366  } else {
1367  if (cd->columnType.get_notnull()) {
1368  throw std::runtime_error("NULL for column " + cd->columnName);
1369  }
1371  }
1372  break;
1373  case kSMALLINT:
1374  if (!is_null) {
1375  addSmallint((int16_t)datum.val.int_val);
1376  } else {
1377  if (cd->columnType.get_notnull()) {
1378  throw std::runtime_error("NULL for column " + cd->columnName);
1379  }
1381  }
1382  break;
1383  case kINT:
1384  if (!is_null) {
1385  addInt((int32_t)datum.val.int_val);
1386  } else {
1387  if (cd->columnType.get_notnull()) {
1388  throw std::runtime_error("NULL for column " + cd->columnName);
1389  }
1391  }
1392  break;
1393  case kBIGINT:
1394  if (!is_null) {
1395  addBigint(datum.val.int_val);
1396  } else {
1397  if (cd->columnType.get_notnull()) {
1398  throw std::runtime_error("NULL for column " + cd->columnName);
1399  }
1401  }
1402  break;
1403  case kFLOAT:
1404  if (!is_null) {
1405  addFloat((float)datum.val.real_val);
1406  } else {
1407  if (cd->columnType.get_notnull()) {
1408  throw std::runtime_error("NULL for column " + cd->columnName);
1409  }
1411  }
1412  break;
1413  case kDOUBLE:
1414  if (!is_null) {
1415  addDouble(datum.val.real_val);
1416  } else {
1417  if (cd->columnType.get_notnull()) {
1418  throw std::runtime_error("NULL for column " + cd->columnName);
1419  }
1421  }
1422  break;
1423  case kTEXT:
1424  case kVARCHAR:
1425  case kCHAR: {
1426  // @TODO(wei) for now, use empty string for nulls
1427  if (is_null) {
1428  if (cd->columnType.get_notnull()) {
1429  throw std::runtime_error("NULL for column " + cd->columnName);
1430  }
1431  addString(std::string());
1432  } else {
1433  addString(datum.val.str_val);
1434  }
1435  break;
1436  }
1437  case kTIME:
1438  case kTIMESTAMP:
1439  case kDATE: {
1440  if (!is_null) {
1441  addBigint(datum.val.int_val);
1442  } else {
1443  if (cd->columnType.get_notnull()) {
1444  throw std::runtime_error("NULL for column " + cd->columnName);
1445  }
1447  }
1448  break;
1449  }
1450  case kARRAY:
1451  if (is_null && cd->columnType.get_notnull()) {
1452  throw std::runtime_error("NULL for column " + cd->columnName);
1453  }
1454  if (IS_STRING(cd->columnType.get_subtype())) {
1455  OptionalStringVector& string_vec = addStringArray();
1456  addBinaryStringArray(datum, *string_vec);
1457  } else {
1458  if (!is_null) {
1459  addArray(TDatumToArrayDatum(datum, cd->columnType));
1460  } else {
1462  }
1463  }
1464  break;
1465  case kPOINT:
1466  case kMULTIPOINT:
1467  case kLINESTRING:
1468  case kMULTILINESTRING:
1469  case kPOLYGON:
1470  case kMULTIPOLYGON:
1471  if (is_null) {
1472  if (cd->columnType.get_notnull()) {
1473  throw std::runtime_error("NULL for column " + cd->columnName);
1474  }
1475  addGeoString(std::string());
1476  } else {
1477  addGeoString(datum.val.str_val);
1478  }
1479  break;
1480  default:
1481  CHECK(false) << "TypedImportBuffer::add_value() does not support type " << type;
1482  }
1483 }
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:405
#define NULL_DOUBLE
void addBigint(const int64_t v)
Definition: Importer.h:246
OptionalStringVector & addStringArray()
Definition: Importer.h:258
void addSmallint(const int16_t v)
Definition: Importer.h:242
Definition: sqltypes.h:63
#define NULL_FLOAT
void addString(const std::string_view v)
Definition: Importer.h:252
std::optional< std::vector< std::string >> OptionalStringVector
Definition: Importer.h:95
void addDouble(const double v)
Definition: Importer.h:250
ArrayDatum NullArray(const SQLTypeInfo &ti)
Definition: Importer.cpp:409
void addFloat(const float v)
Definition: Importer.h:248
void addGeoString(const std::string_view v)
Definition: Importer.h:254
ArrayDatum TDatumToArrayDatum(const TDatum &datum, const SQLTypeInfo &ti)
Definition: Importer.cpp:507
CONSTEXPR DEVICE bool is_null(const T &value)
void addBoolean(const int8_t v)
Definition: Importer.h:238
void addTinyint(const int8_t v)
Definition: Importer.h:240
void addInt(const int32_t v)
Definition: Importer.h:244
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:499
Definition: sqltypes.h:66
Definition: sqltypes.h:67
void addArray(const ArrayDatum &v)
Definition: Importer.h:256
Definition: sqltypes.h:55
#define IS_STRING(T)
Definition: sqltypes.h:322
#define CHECK(condition)
Definition: Logger.h:222
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
Definition: sqltypes.h:59
SQLTypeInfo columnType
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:411
bool is_decimal() const
Definition: sqltypes.h:603
std::string columnName
void addBinaryStringArray(const TDatum &datum, std::vector< std::string > &string_vec)
Definition: Importer.cpp:453

+ Here is the call graph for this function:

size_t import_export::TypedImportBuffer::add_values ( const ColumnDescriptor cd,
const TColumn &  data 
)

Definition at line 1015 of file Importer.cpp.

References addArray(), addStringArray(), bigint_buffer_, bool_buffer_, checked_malloc(), ColumnDescriptor::columnName, ColumnDescriptor::columnType, double_buffer_, float_buffer_, geo_string_buffer_, SQLTypeInfo::get_elem_type(), SQLTypeInfo::get_notnull(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), inline_fixed_encoding_null_val(), int_buffer_, IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTILINESTRING, kMULTIPOINT, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, NULL_DOUBLE, NULL_FLOAT, import_export::NullArray(), smallint_buffer_, string_buffer_, and tinyint_buffer_.

1015  {
1016  size_t dataSize = 0;
1017  if (cd->columnType.get_notnull()) {
1018  // We can't have any null values for this column; to have them is an error
1019  if (std::any_of(col.nulls.begin(), col.nulls.end(), [](int i) { return i != 0; })) {
1020  throw std::runtime_error("NULL for column " + cd->columnName);
1021  }
1022  }
1023 
1024  switch (cd->columnType.get_type()) {
1025  case kBOOLEAN: {
1026  dataSize = col.data.int_col.size();
1027  bool_buffer_->reserve(dataSize);
1028  for (size_t i = 0; i < dataSize; i++) {
1029  if (col.nulls[i]) {
1031  } else {
1032  bool_buffer_->push_back((int8_t)col.data.int_col[i]);
1033  }
1034  }
1035  break;
1036  }
1037  case kTINYINT: {
1038  dataSize = col.data.int_col.size();
1039  tinyint_buffer_->reserve(dataSize);
1040  for (size_t i = 0; i < dataSize; i++) {
1041  if (col.nulls[i]) {
1043  } else {
1044  tinyint_buffer_->push_back((int8_t)col.data.int_col[i]);
1045  }
1046  }
1047  break;
1048  }
1049  case kSMALLINT: {
1050  dataSize = col.data.int_col.size();
1051  smallint_buffer_->reserve(dataSize);
1052  for (size_t i = 0; i < dataSize; i++) {
1053  if (col.nulls[i]) {
1055  } else {
1056  smallint_buffer_->push_back((int16_t)col.data.int_col[i]);
1057  }
1058  }
1059  break;
1060  }
1061  case kINT: {
1062  dataSize = col.data.int_col.size();
1063  int_buffer_->reserve(dataSize);
1064  for (size_t i = 0; i < dataSize; i++) {
1065  if (col.nulls[i]) {
1067  } else {
1068  int_buffer_->push_back((int32_t)col.data.int_col[i]);
1069  }
1070  }
1071  break;
1072  }
1073  case kBIGINT:
1074  case kNUMERIC:
1075  case kDECIMAL: {
1076  dataSize = col.data.int_col.size();
1077  bigint_buffer_->reserve(dataSize);
1078  for (size_t i = 0; i < dataSize; i++) {
1079  if (col.nulls[i]) {
1081  } else {
1082  bigint_buffer_->push_back((int64_t)col.data.int_col[i]);
1083  }
1084  }
1085  break;
1086  }
1087  case kFLOAT: {
1088  dataSize = col.data.real_col.size();
1089  float_buffer_->reserve(dataSize);
1090  for (size_t i = 0; i < dataSize; i++) {
1091  if (col.nulls[i]) {
1092  float_buffer_->push_back(NULL_FLOAT);
1093  } else {
1094  float_buffer_->push_back((float)col.data.real_col[i]);
1095  }
1096  }
1097  break;
1098  }
1099  case kDOUBLE: {
1100  dataSize = col.data.real_col.size();
1101  double_buffer_->reserve(dataSize);
1102  for (size_t i = 0; i < dataSize; i++) {
1103  if (col.nulls[i]) {
1104  double_buffer_->push_back(NULL_DOUBLE);
1105  } else {
1106  double_buffer_->push_back((double)col.data.real_col[i]);
1107  }
1108  }
1109  break;
1110  }
1111  case kTEXT:
1112  case kVARCHAR:
1113  case kCHAR: {
1114  // TODO: for now, use empty string for nulls
1115  dataSize = col.data.str_col.size();
1116  string_buffer_->reserve(dataSize);
1117  for (size_t i = 0; i < dataSize; i++) {
1118  if (col.nulls[i]) {
1119  string_buffer_->push_back(std::string());
1120  } else {
1121  string_buffer_->push_back(col.data.str_col[i]);
1122  }
1123  }
1124  break;
1125  }
1126  case kTIME:
1127  case kTIMESTAMP:
1128  case kDATE: {
1129  dataSize = col.data.int_col.size();
1130  bigint_buffer_->reserve(dataSize);
1131  for (size_t i = 0; i < dataSize; i++) {
1132  if (col.nulls[i]) {
1134  } else {
1135  bigint_buffer_->push_back(static_cast<int64_t>(col.data.int_col[i]));
1136  }
1137  }
1138  break;
1139  }
1140  case kPOINT:
1141  case kMULTIPOINT:
1142  case kLINESTRING:
1143  case kMULTILINESTRING:
1144  case kPOLYGON:
1145  case kMULTIPOLYGON: {
1146  dataSize = col.data.str_col.size();
1147  geo_string_buffer_->reserve(dataSize);
1148  for (size_t i = 0; i < dataSize; i++) {
1149  if (col.nulls[i]) {
1150  // TODO: add support for NULL geo
1151  geo_string_buffer_->push_back(std::string());
1152  } else {
1153  geo_string_buffer_->push_back(col.data.str_col[i]);
1154  }
1155  }
1156  break;
1157  }
1158  case kARRAY: {
1159  dataSize = col.data.arr_col.size();
1160  if (IS_STRING(cd->columnType.get_subtype())) {
1161  for (size_t i = 0; i < dataSize; i++) {
1162  OptionalStringVector& string_vec = addStringArray();
1163  if (!col.nulls[i]) {
1164  size_t stringArrSize = col.data.arr_col[i].data.str_col.size();
1165  for (size_t str_idx = 0; str_idx != stringArrSize; ++str_idx) {
1166  string_vec->push_back(col.data.arr_col[i].data.str_col[str_idx]);
1167  }
1168  }
1169  }
1170  } else {
1171  auto elem_ti = cd->columnType.get_subtype();
1172  switch (elem_ti) {
1173  case kBOOLEAN: {
1174  for (size_t i = 0; i < dataSize; i++) {
1175  if (col.nulls[i]) {
1177  } else {
1178  size_t len = col.data.arr_col[i].data.int_col.size();
1179  size_t byteSize = len * sizeof(int8_t);
1180  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1181  int8_t* p = buf;
1182  for (size_t j = 0; j < len; ++j) {
1183  // Explicitly checking the item for null because
1184  // casting null value (-128) to bool results
1185  // incorrect value 1.
1186  if (col.data.arr_col[i].nulls[j]) {
1187  *p = static_cast<int8_t>(
1189  } else {
1190  *(bool*)p = static_cast<bool>(col.data.arr_col[i].data.int_col[j]);
1191  }
1192  p += sizeof(bool);
1193  }
1194  addArray(ArrayDatum(byteSize, buf, false));
1195  }
1196  }
1197  break;
1198  }
1199  case kTINYINT: {
1200  for (size_t i = 0; i < dataSize; i++) {
1201  if (col.nulls[i]) {
1203  } else {
1204  size_t len = col.data.arr_col[i].data.int_col.size();
1205  size_t byteSize = len * sizeof(int8_t);
1206  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1207  int8_t* p = buf;
1208  for (size_t j = 0; j < len; ++j) {
1209  *(int8_t*)p = static_cast<int8_t>(col.data.arr_col[i].data.int_col[j]);
1210  p += sizeof(int8_t);
1211  }
1212  addArray(ArrayDatum(byteSize, buf, false));
1213  }
1214  }
1215  break;
1216  }
1217  case kSMALLINT: {
1218  for (size_t i = 0; i < dataSize; i++) {
1219  if (col.nulls[i]) {
1221  } else {
1222  size_t len = col.data.arr_col[i].data.int_col.size();
1223  size_t byteSize = len * sizeof(int16_t);
1224  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1225  int8_t* p = buf;
1226  for (size_t j = 0; j < len; ++j) {
1227  *(int16_t*)p =
1228  static_cast<int16_t>(col.data.arr_col[i].data.int_col[j]);
1229  p += sizeof(int16_t);
1230  }
1231  addArray(ArrayDatum(byteSize, buf, false));
1232  }
1233  }
1234  break;
1235  }
1236  case kINT: {
1237  for (size_t i = 0; i < dataSize; i++) {
1238  if (col.nulls[i]) {
1240  } else {
1241  size_t len = col.data.arr_col[i].data.int_col.size();
1242  size_t byteSize = len * sizeof(int32_t);
1243  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1244  int8_t* p = buf;
1245  for (size_t j = 0; j < len; ++j) {
1246  *(int32_t*)p =
1247  static_cast<int32_t>(col.data.arr_col[i].data.int_col[j]);
1248  p += sizeof(int32_t);
1249  }
1250  addArray(ArrayDatum(byteSize, buf, false));
1251  }
1252  }
1253  break;
1254  }
1255  case kBIGINT:
1256  case kNUMERIC:
1257  case kDECIMAL: {
1258  for (size_t i = 0; i < dataSize; i++) {
1259  if (col.nulls[i]) {
1261  } else {
1262  size_t len = col.data.arr_col[i].data.int_col.size();
1263  size_t byteSize = len * sizeof(int64_t);
1264  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1265  int8_t* p = buf;
1266  for (size_t j = 0; j < len; ++j) {
1267  *(int64_t*)p =
1268  static_cast<int64_t>(col.data.arr_col[i].data.int_col[j]);
1269  p += sizeof(int64_t);
1270  }
1271  addArray(ArrayDatum(byteSize, buf, false));
1272  }
1273  }
1274  break;
1275  }
1276  case kFLOAT: {
1277  for (size_t i = 0; i < dataSize; i++) {
1278  if (col.nulls[i]) {
1280  } else {
1281  size_t len = col.data.arr_col[i].data.real_col.size();
1282  size_t byteSize = len * sizeof(float);
1283  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1284  int8_t* p = buf;
1285  for (size_t j = 0; j < len; ++j) {
1286  *(float*)p = static_cast<float>(col.data.arr_col[i].data.real_col[j]);
1287  p += sizeof(float);
1288  }
1289  addArray(ArrayDatum(byteSize, buf, false));
1290  }
1291  }
1292  break;
1293  }
1294  case kDOUBLE: {
1295  for (size_t i = 0; i < dataSize; i++) {
1296  if (col.nulls[i]) {
1298  } else {
1299  size_t len = col.data.arr_col[i].data.real_col.size();
1300  size_t byteSize = len * sizeof(double);
1301  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1302  int8_t* p = buf;
1303  for (size_t j = 0; j < len; ++j) {
1304  *(double*)p = static_cast<double>(col.data.arr_col[i].data.real_col[j]);
1305  p += sizeof(double);
1306  }
1307  addArray(ArrayDatum(byteSize, buf, false));
1308  }
1309  }
1310  break;
1311  }
1312  case kTIME:
1313  case kTIMESTAMP:
1314  case kDATE: {
1315  for (size_t i = 0; i < dataSize; i++) {
1316  if (col.nulls[i]) {
1318  } else {
1319  size_t len = col.data.arr_col[i].data.int_col.size();
1320  size_t byteWidth = sizeof(int64_t);
1321  size_t byteSize = len * byteWidth;
1322  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1323  int8_t* p = buf;
1324  for (size_t j = 0; j < len; ++j) {
1325  *reinterpret_cast<int64_t*>(p) =
1326  static_cast<int64_t>(col.data.arr_col[i].data.int_col[j]);
1327  p += sizeof(int64_t);
1328  }
1329  addArray(ArrayDatum(byteSize, buf, false));
1330  }
1331  }
1332  break;
1333  }
1334  default:
1335  throw std::runtime_error("Invalid Array Type");
1336  }
1337  }
1338  break;
1339  }
1340  default:
1341  throw std::runtime_error("Invalid Type");
1342  }
1343  return dataSize;
1344 }
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:405
#define NULL_DOUBLE
OptionalStringVector & addStringArray()
Definition: Importer.h:258
Definition: sqltypes.h:63
#define NULL_FLOAT
std::vector< std::string > * string_buffer_
Definition: Importer.h:536
std::optional< std::vector< std::string >> OptionalStringVector
Definition: Importer.h:95
ArrayDatum NullArray(const SQLTypeInfo &ti)
Definition: Importer.cpp:409
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:531
std::vector< float > * float_buffer_
Definition: Importer.h:534
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:404
std::vector< double > * double_buffer_
Definition: Importer.h:535
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:228
std::vector< int32_t > * int_buffer_
Definition: Importer.h:532
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:45
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:533
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:529
Definition: sqltypes.h:66
Definition: sqltypes.h:67
void addArray(const ArrayDatum &v)
Definition: Importer.h:256
Definition: sqltypes.h:55
#define IS_STRING(T)
Definition: sqltypes.h:322
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:530
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
Definition: sqltypes.h:59
SQLTypeInfo columnType
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:411
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:981
std::string columnName
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:537

+ Here is the call graph for this function:

void import_export::TypedImportBuffer::addArray ( const ArrayDatum v)
inline

Definition at line 256 of file Importer.h.

References array_buffer_.

Referenced by add_value(), and add_values().

256 { array_buffer_->push_back(v); }
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:538

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addBigint ( const int64_t  v)
inline

Definition at line 246 of file Importer.h.

References bigint_buffer_.

Referenced by add_value().

246 { bigint_buffer_->push_back(v); }
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:533

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addBoolean ( const int8_t  v)
inline

Definition at line 238 of file Importer.h.

References bool_buffer_.

Referenced by add_value().

238 { bool_buffer_->push_back(v); }
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:529

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addDefaultValues ( const ColumnDescriptor cd,
size_t  num_rows 
)

Definition at line 1485 of file Importer.cpp.

References array_buffer_, bigint_buffer_, bool_buffer_, CHECK, ColumnDescriptor::columnName, ColumnDescriptor::columnType, convert_decimal_value_to_scale(), ColumnDescriptor::default_value, double_buffer_, float_buffer_, geo_string_buffer_, SQLTypeInfo::get_notnull(), SQLTypeInfo::get_type(), inline_fixed_encoding_null_val(), int_buffer_, is_null(), IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTILINESTRING, kMULTIPOINT, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, StringDictionary::MAX_STRLEN, NULL_DOUBLE, NULL_FLOAT, import_export::NullArray(), import_export::delimited_parser::parse_string_array(), smallint_buffer_, string_array_buffer_, string_buffer_, import_export::StringToArray(), StringToDatum(), tinyint_buffer_, to_string(), and run_benchmark_import::type.

1485  {
1486  bool is_null = !cd->default_value.has_value();
1487  CHECK(!(is_null && cd->columnType.get_notnull()));
1488  const auto type = cd->columnType.get_type();
1489  auto ti = cd->columnType;
1490  auto val = cd->default_value.value_or("NULL");
1491  CopyParams cp;
1492  switch (type) {
1493  case kBOOLEAN: {
1494  if (!is_null) {
1495  bool_buffer_->resize(num_rows, StringToDatum(val, ti).boolval);
1496  } else {
1497  bool_buffer_->resize(num_rows, inline_fixed_encoding_null_val(cd->columnType));
1498  }
1499  break;
1500  }
1501  case kTINYINT: {
1502  if (!is_null) {
1503  tinyint_buffer_->resize(num_rows, StringToDatum(val, ti).tinyintval);
1504  } else {
1506  }
1507  break;
1508  }
1509  case kSMALLINT: {
1510  if (!is_null) {
1511  smallint_buffer_->resize(num_rows, StringToDatum(val, ti).smallintval);
1512  } else {
1513  smallint_buffer_->resize(num_rows,
1515  }
1516  break;
1517  }
1518  case kINT: {
1519  if (!is_null) {
1520  int_buffer_->resize(num_rows, StringToDatum(val, ti).intval);
1521  } else {
1522  int_buffer_->resize(num_rows, inline_fixed_encoding_null_val(cd->columnType));
1523  }
1524  break;
1525  }
1526  case kBIGINT: {
1527  if (!is_null) {
1528  bigint_buffer_->resize(num_rows, StringToDatum(val, ti).bigintval);
1529  } else {
1531  }
1532  break;
1533  }
1534  case kDECIMAL:
1535  case kNUMERIC: {
1536  if (!is_null) {
1537  const auto converted_decimal_value = convert_decimal_value_to_scale(
1538  StringToDatum(val, ti).bigintval, ti, cd->columnType);
1539  bigint_buffer_->resize(num_rows, converted_decimal_value);
1540  } else {
1542  }
1543  break;
1544  }
1545  case kFLOAT:
1546  if (!is_null) {
1547  float_buffer_->resize(num_rows,
1548  static_cast<float>(std::atof(std::string(val).c_str())));
1549  } else {
1550  float_buffer_->resize(num_rows, NULL_FLOAT);
1551  }
1552  break;
1553  case kDOUBLE:
1554  if (!is_null) {
1555  double_buffer_->resize(num_rows, std::atof(std::string(val).c_str()));
1556  } else {
1557  double_buffer_->resize(num_rows, NULL_DOUBLE);
1558  }
1559  break;
1560  case kTEXT:
1561  case kVARCHAR:
1562  case kCHAR: {
1563  if (is_null) {
1564  string_buffer_->resize(num_rows, "");
1565  } else {
1566  if (val.length() > StringDictionary::MAX_STRLEN) {
1567  throw std::runtime_error("String too long for column " + cd->columnName +
1568  " was " + std::to_string(val.length()) + " max is " +
1570  }
1571  string_buffer_->resize(num_rows, val);
1572  }
1573  break;
1574  }
1575  case kTIME:
1576  case kTIMESTAMP:
1577  case kDATE:
1578  if (!is_null) {
1579  bigint_buffer_->resize(num_rows, StringToDatum(val, ti).bigintval);
1580  } else {
1582  }
1583  break;
1584  case kARRAY: {
1585  if (IS_STRING(ti.get_subtype())) {
1586  std::vector<std::string> string_vec;
1587  // Just parse string array, don't push it to buffer yet as we might throw
1589  std::string(val), cp, string_vec);
1590  if (!is_null) {
1591  // TODO: add support for NULL string arrays
1592  if (ti.get_size() > 0) {
1593  auto sti = ti.get_elem_type();
1594  size_t expected_size = ti.get_size() / sti.get_size();
1595  size_t actual_size = string_vec.size();
1596  if (actual_size != expected_size) {
1597  throw std::runtime_error("Fixed length array column " + cd->columnName +
1598  " expects " + std::to_string(expected_size) +
1599  " values, received " +
1600  std::to_string(actual_size));
1601  }
1602  }
1603  string_array_buffer_->resize(num_rows, string_vec);
1604  } else {
1605  if (ti.get_size() > 0) {
1606  // TODO: remove once NULL fixlen arrays are allowed
1607  throw std::runtime_error("Fixed length array column " + cd->columnName +
1608  " currently cannot accept NULL arrays");
1609  }
1610  // TODO: add support for NULL string arrays, replace with addStringArray(),
1611  // for now add whatever parseStringArray() outputs for NULLs ("NULL")
1612  string_array_buffer_->resize(num_rows, string_vec);
1613  }
1614  } else {
1615  if (!is_null) {
1616  ArrayDatum d = StringToArray(std::string(val), ti, cp);
1617  if (d.is_null) { // val could be "NULL"
1618  array_buffer_->resize(num_rows, NullArray(ti));
1619  } else {
1620  if (ti.get_size() > 0 && static_cast<size_t>(ti.get_size()) != d.length) {
1621  throw std::runtime_error("Fixed length array for column " + cd->columnName +
1622  " has incorrect length: " + std::string(val));
1623  }
1624  array_buffer_->resize(num_rows, d);
1625  }
1626  } else {
1627  array_buffer_->resize(num_rows, NullArray(ti));
1628  }
1629  }
1630  break;
1631  }
1632  case kPOINT:
1633  case kMULTIPOINT:
1634  case kLINESTRING:
1635  case kMULTILINESTRING:
1636  case kPOLYGON:
1637  case kMULTIPOLYGON:
1638  geo_string_buffer_->resize(num_rows, val);
1639  break;
1640  default:
1641  CHECK(false) << "TypedImportBuffer::addDefaultValues() does not support type "
1642  << type;
1643  }
1644 }
#define NULL_DOUBLE
Definition: sqltypes.h:63
#define NULL_FLOAT
std::vector< std::string > * string_buffer_
Definition: Importer.h:536
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:538
ArrayDatum NullArray(const SQLTypeInfo &ti)
Definition: Importer.cpp:409
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:531
std::vector< float > * float_buffer_
Definition: Importer.h:534
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:404
std::vector< double > * double_buffer_
Definition: Importer.h:535
std::string to_string(char const *&&v)
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:228
std::vector< int32_t > * int_buffer_
Definition: Importer.h:532
CONSTEXPR DEVICE bool is_null(const T &value)
std::vector< OptionalStringVector > * string_array_buffer_
Definition: Importer.h:539
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:533
void parse_string_array(const std::string &s, const import_export::CopyParams &copy_params, std::vector< std::string > &string_vec, bool truncate_values)
Parses given string array and inserts into given vector of strings.
Datum StringToDatum(std::string_view s, SQLTypeInfo &ti)
Definition: Datum.cpp:277
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:529
std::optional< std::string > default_value
Definition: sqltypes.h:66
Definition: sqltypes.h:67
ArrayDatum StringToArray(const std::string &s, const SQLTypeInfo &ti, const CopyParams &copy_params)
Definition: Importer.cpp:354
int64_t convert_decimal_value_to_scale(const int64_t decimal_value, const SQLTypeInfo &type_info, const SQLTypeInfo &new_type_info)
Definition: Datum.cpp:562
Definition: sqltypes.h:55
#define IS_STRING(T)
Definition: sqltypes.h:322
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:530
#define CHECK(condition)
Definition: Logger.h:222
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
static constexpr size_t MAX_STRLEN
Definition: sqltypes.h:59
SQLTypeInfo columnType
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:411
std::string columnName
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:537

+ Here is the call graph for this function:

void import_export::TypedImportBuffer::addDictEncodedString ( const std::vector< std::string > &  string_vec)

Definition at line 527 of file Importer.cpp.

References CHECK, column_desc_, ColumnDescriptor::columnName, ColumnDescriptor::columnType, logger::ERROR, SQLTypeInfo::get_size(), getColumnDesc(), StringDictionary::getOrAddBulk(), LOG, StringDictionary::MAX_STRLEN, string_dict_, string_dict_i16_buffer_, string_dict_i32_buffer_, and string_dict_i8_buffer_.

527  {
529  std::vector<std::string_view> string_view_vec;
530  string_view_vec.reserve(string_vec.size());
531  for (const auto& str : string_vec) {
532  if (str.size() > StringDictionary::MAX_STRLEN) {
533  std::ostringstream oss;
534  oss << "while processing dictionary for column " << getColumnDesc()->columnName
535  << " a string was detected too long for encoding, string length = "
536  << str.size() << ", first 100 characters are '" << str.substr(0, 100) << "'";
537  throw std::runtime_error(oss.str());
538  }
539  string_view_vec.push_back(str);
540  }
541  try {
542  switch (column_desc_->columnType.get_size()) {
543  case 1:
544  string_dict_i8_buffer_->resize(string_view_vec.size());
545  string_dict_->getOrAddBulk(string_view_vec, string_dict_i8_buffer_->data());
546  break;
547  case 2:
548  string_dict_i16_buffer_->resize(string_view_vec.size());
549  string_dict_->getOrAddBulk(string_view_vec, string_dict_i16_buffer_->data());
550  break;
551  case 4:
552  string_dict_i32_buffer_->resize(string_view_vec.size());
553  string_dict_->getOrAddBulk(string_view_vec, string_dict_i32_buffer_->data());
554  break;
555  default:
556  CHECK(false);
557  }
558  } catch (std::exception& e) {
559  std::ostringstream oss;
560  oss << "while processing dictionary for column " << getColumnDesc()->columnName
561  << " : " << e.what();
562  LOG(ERROR) << oss.str();
563  throw std::runtime_error(oss.str());
564  }
565 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:414
#define LOG(tag)
Definition: Logger.h:216
StringDictionary * string_dict_
Definition: Importer.h:548
std::vector< uint8_t > * string_dict_i8_buffer_
Definition: Importer.h:542
void getOrAddBulk(const std::vector< String > &string_vec, T *encoded_vec)
std::vector< int32_t > * string_dict_i32_buffer_
Definition: Importer.h:544
const ColumnDescriptor * column_desc_
Definition: Importer.h:547
std::vector< uint16_t > * string_dict_i16_buffer_
Definition: Importer.h:543
#define CHECK(condition)
Definition: Logger.h:222
const ColumnDescriptor * getColumnDesc() const
Definition: Importer.h:319
static constexpr size_t MAX_STRLEN
SQLTypeInfo columnType
std::string columnName

+ Here is the call graph for this function:

void import_export::TypedImportBuffer::addDictEncodedStringArray ( const std::vector< OptionalStringVector > &  string_array_vec)
inline

Definition at line 269 of file Importer.h.

References anonymous_namespace{Utm.h}::a, CHECK, checked_malloc(), column_desc_, ColumnDescriptor::columnType, import_export::ImporterUtils::composeNullArray(), StringDictionary::getOrAddBulkArray(), StringDictionary::MAX_STRLEN, string_array_dict_buffer_, and string_dict_.

270  {
272 
273  // first check data is ok
274  for (auto& p : string_array_vec) {
275  if (!p) {
276  continue;
277  }
278  for (const auto& str : *p) {
279  if (str.size() > StringDictionary::MAX_STRLEN) {
280  throw std::runtime_error("String too long for dictionary encoding.");
281  }
282  }
283  }
284 
285  // to avoid copying, create a string view of each string in the
286  // `string_array_vec` where the array holding the string is *not null*
287  std::vector<std::vector<std::string_view>> string_view_array_vec;
288  for (auto& p : string_array_vec) {
289  if (!p) {
290  continue;
291  }
292  auto& array = string_view_array_vec.emplace_back();
293  for (const auto& str : *p) {
294  array.emplace_back(str);
295  }
296  }
297 
298  std::vector<std::vector<int32_t>> ids_array(0);
299  string_dict_->getOrAddBulkArray(string_view_array_vec, ids_array);
300 
301  size_t i, j;
302  for (i = 0, j = 0; i < string_array_vec.size(); ++i) {
303  if (!string_array_vec[i]) { // null array
304  string_array_dict_buffer_->push_back(
306  } else { // non-null array
307  auto& p = ids_array[j++];
308  size_t len = p.size() * sizeof(int32_t);
309  auto a = static_cast<int32_t*>(checked_malloc(len));
310  memcpy(a, &p[0], len);
311  string_array_dict_buffer_->push_back(
312  ArrayDatum(len, reinterpret_cast<int8_t*>(a), false));
313  }
314  }
315  }
StringDictionary * string_dict_
Definition: Importer.h:548
static ArrayDatum composeNullArray(const SQLTypeInfo &ti)
Definition: Importer.cpp:433
constexpr double a
Definition: Utm.h:32
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:228
std::vector< ArrayDatum > * string_array_dict_buffer_
Definition: Importer.h:545
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:45
void getOrAddBulkArray(const std::vector< std::vector< String >> &string_array_vec, std::vector< std::vector< int32_t >> &ids_array_vec)
const ColumnDescriptor * column_desc_
Definition: Importer.h:547
#define CHECK(condition)
Definition: Logger.h:222
static constexpr size_t MAX_STRLEN
SQLTypeInfo columnType

+ Here is the call graph for this function:

void import_export::TypedImportBuffer::addDouble ( const double  v)
inline

Definition at line 250 of file Importer.h.

References double_buffer_.

Referenced by add_value().

250 { double_buffer_->push_back(v); }
std::vector< double > * double_buffer_
Definition: Importer.h:535

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addFloat ( const float  v)
inline

Definition at line 248 of file Importer.h.

References float_buffer_.

Referenced by add_value().

248 { float_buffer_->push_back(v); }
std::vector< float > * float_buffer_
Definition: Importer.h:534

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addGeoString ( const std::string_view  v)
inline

Definition at line 254 of file Importer.h.

References geo_string_buffer_.

Referenced by add_value().

254 { geo_string_buffer_->emplace_back(v); }
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:537

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addInt ( const int32_t  v)
inline

Definition at line 244 of file Importer.h.

References int_buffer_.

Referenced by add_value().

244 { int_buffer_->push_back(v); }
std::vector< int32_t > * int_buffer_
Definition: Importer.h:532

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addSmallint ( const int16_t  v)
inline

Definition at line 242 of file Importer.h.

References smallint_buffer_.

Referenced by add_value().

242 { smallint_buffer_->push_back(v); }
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:531

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addString ( const std::string_view  v)
inline

Definition at line 252 of file Importer.h.

References string_buffer_.

Referenced by add_value().

252 { string_buffer_->emplace_back(v); }
std::vector< std::string > * string_buffer_
Definition: Importer.h:536

+ Here is the caller graph for this function:

OptionalStringVector& import_export::TypedImportBuffer::addStringArray ( )
inline

Definition at line 258 of file Importer.h.

References string_array_buffer_.

Referenced by add_value(), and add_values().

258  {
259  string_array_buffer_->emplace_back(std::vector<std::string>{});
260  return string_array_buffer_->back();
261  }
std::vector< OptionalStringVector > * string_array_buffer_
Definition: Importer.h:539

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addStringArray ( const OptionalStringVector arr)
inline

Definition at line 263 of file Importer.h.

References string_array_buffer_.

263  {
264  string_array_buffer_->push_back(arr);
265  }
std::vector< OptionalStringVector > * string_array_buffer_
Definition: Importer.h:539
void import_export::TypedImportBuffer::addTinyint ( const int8_t  v)
inline

Definition at line 240 of file Importer.h.

References tinyint_buffer_.

Referenced by add_value().

240 { tinyint_buffer_->push_back(v); }
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:530

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::clear ( )
inline

Definition at line 411 of file Importer.h.

References array_buffer_, bigint_buffer_, bool_buffer_, CHECK, column_desc_, ColumnDescriptor::columnType, double_buffer_, float_buffer_, geo_string_buffer_, SQLTypeInfo::get_compression(), SQLTypeInfo::get_size(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), int_buffer_, IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kENCODING_DICT, kFLOAT, kINT, kLINESTRING, kMULTILINESTRING, kMULTIPOINT, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, smallint_buffer_, string_array_buffer_, string_array_dict_buffer_, string_buffer_, string_dict_i16_buffer_, string_dict_i32_buffer_, string_dict_i8_buffer_, and tinyint_buffer_.

411  {
412  switch (column_desc_->columnType.get_type()) {
413  case kBOOLEAN: {
414  bool_buffer_->clear();
415  break;
416  }
417  case kTINYINT: {
418  tinyint_buffer_->clear();
419  break;
420  }
421  case kSMALLINT: {
422  smallint_buffer_->clear();
423  break;
424  }
425  case kINT: {
426  int_buffer_->clear();
427  break;
428  }
429  case kBIGINT:
430  case kNUMERIC:
431  case kDECIMAL: {
432  bigint_buffer_->clear();
433  break;
434  }
435  case kFLOAT: {
436  float_buffer_->clear();
437  break;
438  }
439  case kDOUBLE: {
440  double_buffer_->clear();
441  break;
442  }
443  case kTEXT:
444  case kVARCHAR:
445  case kCHAR: {
446  string_buffer_->clear();
448  switch (column_desc_->columnType.get_size()) {
449  case 1:
450  string_dict_i8_buffer_->clear();
451  break;
452  case 2:
453  string_dict_i16_buffer_->clear();
454  break;
455  case 4:
456  string_dict_i32_buffer_->clear();
457  break;
458  default:
459  CHECK(false);
460  }
461  }
462  break;
463  }
464  case kDATE:
465  case kTIME:
466  case kTIMESTAMP:
467  bigint_buffer_->clear();
468  break;
469  case kARRAY: {
471  string_array_buffer_->clear();
472  string_array_dict_buffer_->clear();
473  } else {
474  array_buffer_->clear();
475  }
476  break;
477  }
478  case kPOINT:
479  case kMULTIPOINT:
480  case kLINESTRING:
481  case kMULTILINESTRING:
482  case kPOLYGON:
483  case kMULTIPOLYGON:
484  geo_string_buffer_->clear();
485  break;
486  default:
487  CHECK(false);
488  }
489  }
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:405
HOST DEVICE int get_size() const
Definition: sqltypes.h:414
Definition: sqltypes.h:63
std::vector< std::string > * string_buffer_
Definition: Importer.h:536
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:538
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:531
std::vector< float > * float_buffer_
Definition: Importer.h:534
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:404
std::vector< double > * double_buffer_
Definition: Importer.h:535
std::vector< int32_t > * int_buffer_
Definition: Importer.h:532
std::vector< ArrayDatum > * string_array_dict_buffer_
Definition: Importer.h:545
std::vector< uint8_t > * string_dict_i8_buffer_
Definition: Importer.h:542
std::vector< OptionalStringVector > * string_array_buffer_
Definition: Importer.h:539
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:533
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:529
Definition: sqltypes.h:66
Definition: sqltypes.h:67
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:412
std::vector< int32_t > * string_dict_i32_buffer_
Definition: Importer.h:544
Definition: sqltypes.h:55
#define IS_STRING(T)
Definition: sqltypes.h:322
const ColumnDescriptor * column_desc_
Definition: Importer.h:547
std::vector< uint16_t > * string_dict_i16_buffer_
Definition: Importer.h:543
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:530
#define CHECK(condition)
Definition: Logger.h:222
Definition: sqltypes.h:59
SQLTypeInfo columnType
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:537

+ Here is the call graph for this function:

template<typename DATA_TYPE >
size_t import_export::TypedImportBuffer::convert_arrow_val_to_import_buffer ( const ColumnDescriptor cd,
const arrow::Array &  array,
std::vector< DATA_TYPE > &  buffer,
const ArraySliceRange slice_range,
BadRowsTracker *const  bad_rows_tracker 
)

Referenced by add_arrow_values().

+ Here is the caller graph for this function:

template<typename DATA_TYPE >
size_t import_export::TypedImportBuffer::convert_arrow_val_to_import_buffer ( const ColumnDescriptor cd,
const Array array,
std::vector< DATA_TYPE > &  buffer,
const ArraySliceRange slice_range,
import_export::BadRowsTracker *const  bad_rows_tracker 
)

Definition at line 827 of file Importer.cpp.

References col_idx, anonymous_namespace{ArrowImporter.h}::error_context(), geo_string_buffer_, SQLTypeInfo::get_type(), import_export::Importer::getCatalog(), Geospatial::GeoTypesFactory::getGeoColumns(), Geospatial::GeoTypesFactory::getNullGeoColumns(), import_buffers, import_export::BadRowsTracker::importer, import_export::BadRowsTracker::mutex, import_export::BadRowsTracker::rows, import_export::Importer::set_geo_physical_import_buffer(), and anonymous_namespace{ArrowImporter.h}::value_getter().

832  {
833  auto data =
834  std::make_unique<DataBuffer<DATA_TYPE>>(cd, array, buffer, bad_rows_tracker);
835  auto f_value_getter = value_getter(array, cd, bad_rows_tracker);
836  std::function<void(const int64_t)> f_add_geo_phy_cols = [&](const int64_t row) {};
837  if (bad_rows_tracker && cd->columnType.is_geometry()) {
838  f_add_geo_phy_cols = [&](const int64_t row) {
839  // Populate physical columns (ref. DBHandler::load_table)
840  std::vector<double> coords, bounds;
841  std::vector<int> ring_sizes, poly_rings;
842  int render_group = 0;
843  SQLTypeInfo ti;
844  // replace any unexpected exception from getGeoColumns or other
845  // on this path with a GeoImportException so that we wont over
846  // push a null to the logical column...
847  try {
848  SQLTypeInfo import_ti{ti};
849  if (array.IsNull(row)) {
851  import_ti, coords, bounds, ring_sizes, poly_rings, false);
852  } else {
853  arrow_throw_if<GeoImportException>(
855  ti,
856  coords,
857  bounds,
858  ring_sizes,
859  poly_rings,
860  false),
861  error_context(cd, bad_rows_tracker) + "Invalid geometry");
862  arrow_throw_if<GeoImportException>(
863  cd->columnType.get_type() != ti.get_type(),
864  error_context(cd, bad_rows_tracker) + "Geometry type mismatch");
865  }
866  auto col_idx_workpad = col_idx; // what a pitfall!!
868  bad_rows_tracker->importer->getCatalog(),
869  cd,
871  col_idx_workpad,
872  coords,
873  bounds,
874  ring_sizes,
875  poly_rings,
876  render_group);
877  } catch (GeoImportException&) {
878  throw;
879  } catch (std::runtime_error& e) {
880  throw GeoImportException(e.what());
881  } catch (const std::exception& e) {
882  throw GeoImportException(e.what());
883  } catch (...) {
884  throw GeoImportException("unknown exception");
885  }
886  };
887  }
888  auto f_mark_a_bad_row = [&](const auto row) {
889  std::unique_lock<std::mutex> lck(bad_rows_tracker->mutex);
890  bad_rows_tracker->rows.insert(row - slice_range.first);
891  };
892  buffer.reserve(slice_range.second - slice_range.first);
893  for (size_t row = slice_range.first; row < slice_range.second; ++row) {
894  try {
895  *data << (array.IsNull(row) ? nullptr : f_value_getter(array, row));
896  f_add_geo_phy_cols(row);
897  } catch (GeoImportException&) {
898  f_mark_a_bad_row(row);
899  } catch (ArrowImporterException&) {
900  // trace bad rows of each column; otherwise rethrow.
901  if (bad_rows_tracker) {
902  *data << nullptr;
903  f_mark_a_bad_row(row);
904  } else {
905  throw;
906  }
907  }
908  }
909  return buffer.size();
910 }
auto value_getter(const arrow::Array &array, const ColumnDescriptor *cd, import_export::BadRowsTracker *const bad_rows_tracker)
static void getNullGeoColumns(SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: Types.cpp:1309
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:404
Catalog_Namespace::Catalog & getCatalog()
Definition: Importer.h:841
std::set< int64_t > rows
Definition: Importer.h:79
std::vector< std::unique_ptr< TypedImportBuffer > > * import_buffers
Definition: Importer.h:524
static bool getGeoColumns(const std::string &wkt_or_wkb_hex, SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: Types.cpp:1079
static void set_geo_physical_import_buffer(const Catalog_Namespace::Catalog &catalog, const ColumnDescriptor *cd, std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers, size_t &col_idx, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, int render_group, const bool force_null=false)
Definition: Importer.cpp:1666
std::string error_context(const ColumnDescriptor *cd, import_export::BadRowsTracker *const bad_rows_tracker)
Definition: ArrowImporter.h:77
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:537

+ Here is the call graph for this function:

template<typename DATA_TYPE >
auto import_export::TypedImportBuffer::del_values ( std::vector< DATA_TYPE > &  buffer,
BadRowsTracker *const  bad_rows_tracker 
)
auto import_export::TypedImportBuffer::del_values ( const SQLTypes  type,
BadRowsTracker *const  bad_rows_tracker 
)
std::vector< DataBlockPtr > import_export::TypedImportBuffer::get_data_block_pointers ( const std::vector< std::unique_ptr< TypedImportBuffer >> &  import_buffers)
static

Definition at line 3053 of file Importer.cpp.

References DataBlockPtr::arraysPtr, threading_serial::async(), CHECK, CHECK_EQ, getStringArrayBuffer(), getTypeInfo(), import_buffers, SQLTypeInfo::is_number(), IS_STRING, SQLTypeInfo::is_string(), kARRAY, kBOOLEAN, kENCODING_DICT, kENCODING_NONE, DataBlockPtr::numbersPtr, run_benchmark_import::result, and DataBlockPtr::stringsPtr.

Referenced by import_export::fill_missing_columns(), import_export::Loader::loadImpl(), and import_export::Loader::loadToShard().

3054  {
3055  std::vector<DataBlockPtr> result(import_buffers.size());
3056  std::vector<std::pair<const size_t, std::future<int8_t*>>>
3057  encoded_data_block_ptrs_futures;
3058  // make all async calls to string dictionary here and then continue execution
3059  for (size_t buf_idx = 0; buf_idx < import_buffers.size(); buf_idx++) {
3060  if (import_buffers[buf_idx]->getTypeInfo().is_string() &&
3061  import_buffers[buf_idx]->getTypeInfo().get_compression() != kENCODING_NONE) {
3062  auto string_payload_ptr = import_buffers[buf_idx]->getStringBuffer();
3063  CHECK_EQ(kENCODING_DICT, import_buffers[buf_idx]->getTypeInfo().get_compression());
3064 
3065  encoded_data_block_ptrs_futures.emplace_back(std::make_pair(
3066  buf_idx,
3067  std::async(std::launch::async, [buf_idx, &import_buffers, string_payload_ptr] {
3068  import_buffers[buf_idx]->addDictEncodedString(*string_payload_ptr);
3069  return import_buffers[buf_idx]->getStringDictBuffer();
3070  })));
3071  }
3072  }
3073 
3074  for (size_t buf_idx = 0; buf_idx < import_buffers.size(); buf_idx++) {
3075  DataBlockPtr p;
3076  if (import_buffers[buf_idx]->getTypeInfo().is_number() ||
3077  import_buffers[buf_idx]->getTypeInfo().is_time() ||
3078  import_buffers[buf_idx]->getTypeInfo().get_type() == kBOOLEAN) {
3079  p.numbersPtr = import_buffers[buf_idx]->getAsBytes();
3080  } else if (import_buffers[buf_idx]->getTypeInfo().is_string()) {
3081  auto string_payload_ptr = import_buffers[buf_idx]->getStringBuffer();
3082  if (import_buffers[buf_idx]->getTypeInfo().get_compression() == kENCODING_NONE) {
3083  p.stringsPtr = string_payload_ptr;
3084  } else {
3085  // This condition means we have column which is ENCODED string. We already made
3086  // Async request to gain the encoded integer values above so we should skip this
3087  // iteration and continue.
3088  continue;
3089  }
3090  } else if (import_buffers[buf_idx]->getTypeInfo().is_geometry()) {
3091  auto geo_payload_ptr = import_buffers[buf_idx]->getGeoStringBuffer();
3092  p.stringsPtr = geo_payload_ptr;
3093  } else {
3094  CHECK(import_buffers[buf_idx]->getTypeInfo().get_type() == kARRAY);
3095  if (IS_STRING(import_buffers[buf_idx]->getTypeInfo().get_subtype())) {
3096  CHECK(import_buffers[buf_idx]->getTypeInfo().get_compression() == kENCODING_DICT);
3097  import_buffers[buf_idx]->addDictEncodedStringArray(
3098  *import_buffers[buf_idx]->getStringArrayBuffer());
3099  p.arraysPtr = import_buffers[buf_idx]->getStringArrayDictBuffer();
3100  } else {
3101  p.arraysPtr = import_buffers[buf_idx]->getArrayBuffer();
3102  }
3103  }
3104  result[buf_idx] = p;
3105  }
3106 
3107  // wait for the async requests we made for string dictionary
3108  for (auto& encoded_ptr_future : encoded_data_block_ptrs_futures) {
3109  result[encoded_ptr_future.first].numbersPtr = encoded_ptr_future.second.get();
3110  }
3111  return result;
3112 }
#define CHECK_EQ(x, y)
Definition: Logger.h:230
const SQLTypeInfo & getTypeInfo() const
Definition: Importer.h:317
std::vector< std::string > * stringsPtr
Definition: sqltypes.h:247
std::vector< ArrayDatum > * arraysPtr
Definition: sqltypes.h:248
bool is_number() const
Definition: sqltypes.h:605
future< Result > async(Fn &&fn, Args &&...args)
std::vector< std::unique_ptr< TypedImportBuffer > > * import_buffers
Definition: Importer.h:524
#define IS_STRING(T)
Definition: sqltypes.h:322
#define CHECK(condition)
Definition: Logger.h:222
std::vector< OptionalStringVector > * getStringArrayBuffer() const
Definition: Importer.h:383
bool is_string() const
Definition: sqltypes.h:600
int8_t * numbersPtr
Definition: sqltypes.h:246

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector<ArrayDatum>* import_export::TypedImportBuffer::getArrayBuffer ( ) const
inline

Definition at line 381 of file Importer.h.

References array_buffer_.

381 { return array_buffer_; }
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:538
int8_t* import_export::TypedImportBuffer::getAsBytes ( ) const
inline

Definition at line 323 of file Importer.h.

References bigint_buffer_, bool_buffer_, column_desc_, ColumnDescriptor::columnType, double_buffer_, float_buffer_, SQLTypeInfo::get_type(), int_buffer_, kBIGINT, kBOOLEAN, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kNUMERIC, kSMALLINT, kTIME, kTIMESTAMP, kTINYINT, smallint_buffer_, and tinyint_buffer_.

Referenced by import_export::anonymous_namespace{Importer.cpp}::double_value_at(), import_export::anonymous_namespace{Importer.cpp}::float_value_at(), and import_export::anonymous_namespace{Importer.cpp}::int_value_at().

323  {
324  switch (column_desc_->columnType.get_type()) {
325  case kBOOLEAN:
326  return reinterpret_cast<int8_t*>(bool_buffer_->data());
327  case kTINYINT:
328  return reinterpret_cast<int8_t*>(tinyint_buffer_->data());
329  case kSMALLINT:
330  return reinterpret_cast<int8_t*>(smallint_buffer_->data());
331  case kINT:
332  return reinterpret_cast<int8_t*>(int_buffer_->data());
333  case kBIGINT:
334  case kNUMERIC:
335  case kDECIMAL:
336  return reinterpret_cast<int8_t*>(bigint_buffer_->data());
337  case kFLOAT:
338  return reinterpret_cast<int8_t*>(float_buffer_->data());
339  case kDOUBLE:
340  return reinterpret_cast<int8_t*>(double_buffer_->data());
341  case kDATE:
342  case kTIME:
343  case kTIMESTAMP:
344  return reinterpret_cast<int8_t*>(bigint_buffer_->data());
345  default:
346  abort();
347  }
348  }
Definition: sqltypes.h:63
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:531
std::vector< float > * float_buffer_
Definition: Importer.h:534
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:404
std::vector< double > * double_buffer_
Definition: Importer.h:535
std::vector< int32_t > * int_buffer_
Definition: Importer.h:532
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:533
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:529
Definition: sqltypes.h:67
const ColumnDescriptor * column_desc_
Definition: Importer.h:547
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:530
Definition: sqltypes.h:59
SQLTypeInfo columnType

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const ColumnDescriptor* import_export::TypedImportBuffer::getColumnDesc ( ) const
inline

Definition at line 319 of file Importer.h.

References column_desc_.

Referenced by addDictEncodedString(), foreign_storage::anonymous_namespace{InternalStorageStatsDataWrapper.cpp}::set_null(), foreign_storage::anonymous_namespace{InternalCatalogDataWrapper.cpp}::set_null(), and foreign_storage::anonymous_namespace{InternalMemoryStatsDataWrapper.cpp}::set_null().

319 { return column_desc_; }
const ColumnDescriptor * column_desc_
Definition: Importer.h:547

+ Here is the caller graph for this function:

size_t import_export::TypedImportBuffer::getElementSize ( ) const
inline

Definition at line 350 of file Importer.h.

References column_desc_, ColumnDescriptor::columnType, SQLTypeInfo::get_type(), kBIGINT, kBOOLEAN, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kNUMERIC, kSMALLINT, kTIME, kTIMESTAMP, and kTINYINT.

350  {
351  switch (column_desc_->columnType.get_type()) {
352  case kBOOLEAN:
353  return sizeof((*bool_buffer_)[0]);
354  case kTINYINT:
355  return sizeof((*tinyint_buffer_)[0]);
356  case kSMALLINT:
357  return sizeof((*smallint_buffer_)[0]);
358  case kINT:
359  return sizeof((*int_buffer_)[0]);
360  case kBIGINT:
361  case kNUMERIC:
362  case kDECIMAL:
363  return sizeof((*bigint_buffer_)[0]);
364  case kFLOAT:
365  return sizeof((*float_buffer_)[0]);
366  case kDOUBLE:
367  return sizeof((*double_buffer_)[0]);
368  case kDATE:
369  case kTIME:
370  case kTIMESTAMP:
371  return sizeof((*bigint_buffer_)[0]);
372  default:
373  abort();
374  }
375  }
Definition: sqltypes.h:63
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:404
Definition: sqltypes.h:67
const ColumnDescriptor * column_desc_
Definition: Importer.h:547
Definition: sqltypes.h:59
SQLTypeInfo columnType

+ Here is the call graph for this function:

std::vector<std::string>* import_export::TypedImportBuffer::getGeoStringBuffer ( ) const
inline

Definition at line 379 of file Importer.h.

References geo_string_buffer_.

379 { return geo_string_buffer_; }
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:537
std::vector<OptionalStringVector>* import_export::TypedImportBuffer::getStringArrayBuffer ( ) const
inline

Definition at line 383 of file Importer.h.

References string_array_buffer_.

Referenced by get_data_block_pointers().

383  {
384  return string_array_buffer_;
385  }
std::vector< OptionalStringVector > * string_array_buffer_
Definition: Importer.h:539

+ Here is the caller graph for this function:

std::vector<ArrayDatum>* import_export::TypedImportBuffer::getStringArrayDictBuffer ( ) const
inline

Definition at line 387 of file Importer.h.

References string_array_dict_buffer_.

387  {
389  }
std::vector< ArrayDatum > * string_array_dict_buffer_
Definition: Importer.h:545
std::vector<std::string>* import_export::TypedImportBuffer::getStringBuffer ( ) const
inline

Definition at line 377 of file Importer.h.

References string_buffer_.

377 { return string_buffer_; }
std::vector< std::string > * string_buffer_
Definition: Importer.h:536
int8_t* import_export::TypedImportBuffer::getStringDictBuffer ( ) const
inline

Definition at line 391 of file Importer.h.

References column_desc_, ColumnDescriptor::columnType, SQLTypeInfo::get_size(), string_dict_i16_buffer_, string_dict_i32_buffer_, and string_dict_i8_buffer_.

Referenced by import_export::anonymous_namespace{Importer.cpp}::int_value_at().

391  {
392  switch (column_desc_->columnType.get_size()) {
393  case 1:
394  return reinterpret_cast<int8_t*>(string_dict_i8_buffer_->data());
395  case 2:
396  return reinterpret_cast<int8_t*>(string_dict_i16_buffer_->data());
397  case 4:
398  return reinterpret_cast<int8_t*>(string_dict_i32_buffer_->data());
399  default:
400  abort();
401  }
402  }
HOST DEVICE int get_size() const
Definition: sqltypes.h:414
std::vector< uint8_t > * string_dict_i8_buffer_
Definition: Importer.h:542
std::vector< int32_t > * string_dict_i32_buffer_
Definition: Importer.h:544
const ColumnDescriptor * column_desc_
Definition: Importer.h:547
std::vector< uint16_t > * string_dict_i16_buffer_
Definition: Importer.h:543
SQLTypeInfo columnType

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

StringDictionary* import_export::TypedImportBuffer::getStringDictionary ( ) const
inline

Definition at line 321 of file Importer.h.

References string_dict_.

321 { return string_dict_; }
StringDictionary * string_dict_
Definition: Importer.h:548
const SQLTypeInfo& import_export::TypedImportBuffer::getTypeInfo ( ) const
inline

Definition at line 317 of file Importer.h.

References column_desc_, and ColumnDescriptor::columnType.

Referenced by import_export::anonymous_namespace{Importer.cpp}::double_value_at(), import_export::anonymous_namespace{Importer.cpp}::float_value_at(), get_data_block_pointers(), and import_export::anonymous_namespace{Importer.cpp}::int_value_at().

317 { return column_desc_->columnType; }
const ColumnDescriptor * column_desc_
Definition: Importer.h:547
SQLTypeInfo columnType

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::pop_value ( )

Definition at line 765 of file Importer.cpp.

References array_buffer_, bigint_buffer_, bool_buffer_, CHECK, column_desc_, ColumnDescriptor::columnType, decimal_to_int_type(), double_buffer_, float_buffer_, geo_string_buffer_, SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), int_buffer_, SQLTypeInfo::is_decimal(), IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTILINESTRING, kMULTIPOINT, kMULTIPOLYGON, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, smallint_buffer_, string_array_buffer_, string_buffer_, tinyint_buffer_, and run_benchmark_import::type.

765  {
766  const auto type = column_desc_->columnType.is_decimal()
768  : column_desc_->columnType.get_type();
769  switch (type) {
770  case kBOOLEAN:
771  bool_buffer_->pop_back();
772  break;
773  case kTINYINT:
774  tinyint_buffer_->pop_back();
775  break;
776  case kSMALLINT:
777  smallint_buffer_->pop_back();
778  break;
779  case kINT:
780  int_buffer_->pop_back();
781  break;
782  case kBIGINT:
783  bigint_buffer_->pop_back();
784  break;
785  case kFLOAT:
786  float_buffer_->pop_back();
787  break;
788  case kDOUBLE:
789  double_buffer_->pop_back();
790  break;
791  case kTEXT:
792  case kVARCHAR:
793  case kCHAR:
794  string_buffer_->pop_back();
795  break;
796  case kDATE:
797  case kTIME:
798  case kTIMESTAMP:
799  bigint_buffer_->pop_back();
800  break;
801  case kARRAY:
803  string_array_buffer_->pop_back();
804  } else {
805  array_buffer_->pop_back();
806  }
807  break;
808  case kPOINT:
809  case kMULTIPOINT:
810  case kLINESTRING:
811  case kMULTILINESTRING:
812  case kPOLYGON:
813  case kMULTIPOLYGON:
814  geo_string_buffer_->pop_back();
815  break;
816  default:
817  CHECK(false) << "TypedImportBuffer::pop_value() does not support type " << type;
818  }
819 }
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:405
Definition: sqltypes.h:63
std::vector< std::string > * string_buffer_
Definition: Importer.h:536
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:538
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:531
std::vector< float > * float_buffer_
Definition: Importer.h:534
std::vector< double > * double_buffer_
Definition: Importer.h:535
std::vector< int32_t > * int_buffer_
Definition: Importer.h:532
std::vector< OptionalStringVector > * string_array_buffer_
Definition: Importer.h:539
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:533
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:529
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:499
Definition: sqltypes.h:66
Definition: sqltypes.h:67
Definition: sqltypes.h:55
#define IS_STRING(T)
Definition: sqltypes.h:322
const ColumnDescriptor * column_desc_
Definition: Importer.h:547
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:530
#define CHECK(condition)
Definition: Logger.h:222
Definition: sqltypes.h:59
SQLTypeInfo columnType
bool is_decimal() const
Definition: sqltypes.h:603
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:537

+ Here is the call graph for this function:

bool import_export::TypedImportBuffer::stringDictCheckpoint ( )
inline

Definition at line 404 of file Importer.h.

References StringDictionary::checkpoint(), and string_dict_.

404  {
405  if (string_dict_ == nullptr) {
406  return true;
407  }
408  return string_dict_->checkpoint();
409  }
StringDictionary * string_dict_
Definition: Importer.h:548
bool checkpoint() noexcept

+ Here is the call graph for this function:

Member Data Documentation

union { ... }
union { ... }
std::vector<ArrayDatum>* import_export::TypedImportBuffer::array_buffer_
std::vector<int64_t>* import_export::TypedImportBuffer::bigint_buffer_
std::vector<int8_t>* import_export::TypedImportBuffer::bool_buffer_
size_t import_export::TypedImportBuffer::col_idx

Definition at line 525 of file Importer.h.

Referenced by convert_arrow_val_to_import_buffer().

const ColumnDescriptor* import_export::TypedImportBuffer::column_desc_
private
std::vector<double>* import_export::TypedImportBuffer::double_buffer_
std::vector<float>* import_export::TypedImportBuffer::float_buffer_
std::vector<std::string>* import_export::TypedImportBuffer::geo_string_buffer_
std::vector<std::unique_ptr<TypedImportBuffer> >* import_export::TypedImportBuffer::import_buffers

Definition at line 524 of file Importer.h.

Referenced by convert_arrow_val_to_import_buffer(), and get_data_block_pointers().

std::vector<int32_t>* import_export::TypedImportBuffer::int_buffer_
std::vector<int16_t>* import_export::TypedImportBuffer::smallint_buffer_
std::vector<OptionalStringVector>* import_export::TypedImportBuffer::string_array_buffer_
std::vector<ArrayDatum>* import_export::TypedImportBuffer::string_array_dict_buffer_
std::vector<std::string>* import_export::TypedImportBuffer::string_buffer_
StringDictionary* import_export::TypedImportBuffer::string_dict_
private
std::vector<uint16_t>* import_export::TypedImportBuffer::string_dict_i16_buffer_
std::vector<int32_t>* import_export::TypedImportBuffer::string_dict_i32_buffer_
std::vector<uint8_t>* import_export::TypedImportBuffer::string_dict_i8_buffer_
std::vector<int8_t>* import_export::TypedImportBuffer::tinyint_buffer_

The documentation for this class was generated from the following files: