OmniSciDB  cde582ebc3
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
import_export::TypedImportBuffer Class Reference

#include <Importer.h>

+ Inheritance diagram for import_export::TypedImportBuffer:
+ Collaboration diagram for import_export::TypedImportBuffer:

Public Types

using OptionalStringVector = std::optional< std::vector< std::string >>
 

Public Member Functions

 TypedImportBuffer (const ColumnDescriptor *col_desc, StringDictionary *string_dict)
 
 ~TypedImportBuffer ()
 
void addBoolean (const int8_t v)
 
void addTinyint (const int8_t v)
 
void addSmallint (const int16_t v)
 
void addInt (const int32_t v)
 
void addBigint (const int64_t v)
 
void addFloat (const float v)
 
void addDouble (const double v)
 
void addString (const std::string_view v)
 
void addGeoString (const std::string_view v)
 
void addArray (const ArrayDatum &v)
 
OptionalStringVectoraddStringArray ()
 
void addStringArray (const OptionalStringVector &arr)
 
void addDictEncodedString (const std::vector< std::string > &string_vec)
 
void addDictEncodedStringArray (const std::vector< OptionalStringVector > &string_array_vec)
 
const SQLTypeInfogetTypeInfo () const
 
const ColumnDescriptorgetColumnDesc () const
 
StringDictionarygetStringDictionary () const
 
int8_t * getAsBytes () const
 
size_t getElementSize () const
 
std::vector< std::string > * getStringBuffer () const
 
std::vector< std::string > * getGeoStringBuffer () const
 
std::vector< ArrayDatum > * getArrayBuffer () const
 
std::vector
< OptionalStringVector > * 
getStringArrayBuffer () const
 
std::vector< ArrayDatum > * getStringArrayDictBuffer () const
 
int8_t * getStringDictBuffer () const
 
bool stringDictCheckpoint ()
 
void clear ()
 
size_t add_values (const ColumnDescriptor *cd, const TColumn &data)
 
size_t add_arrow_values (const ColumnDescriptor *cd, const arrow::Array &data, const bool exact_type_match, const ArraySliceRange &slice_range, BadRowsTracker *bad_rows_tracker)
 
void add_value (const ColumnDescriptor *cd, const std::string_view val, const bool is_null, const CopyParams &copy_params, const bool check_not_null=true)
 
void add_value (const ColumnDescriptor *cd, const TDatum &val, const bool is_null)
 
void addDefaultValues (const ColumnDescriptor *cd, size_t num_rows)
 
void pop_value ()
 
template<typename DATA_TYPE >
size_t convert_arrow_val_to_import_buffer (const ColumnDescriptor *cd, const arrow::Array &array, std::vector< DATA_TYPE > &buffer, const ArraySliceRange &slice_range, BadRowsTracker *const bad_rows_tracker)
 
template<typename DATA_TYPE >
auto del_values (std::vector< DATA_TYPE > &buffer, BadRowsTracker *const bad_rows_tracker)
 
auto del_values (const SQLTypes type, BadRowsTracker *const bad_rows_tracker)
 
template<typename DATA_TYPE >
size_t convert_arrow_val_to_import_buffer (const ColumnDescriptor *cd, const Array &array, std::vector< DATA_TYPE > &buffer, const ArraySliceRange &slice_range, import_export::BadRowsTracker *const bad_rows_tracker)
 

Static Public Member Functions

static std::vector< DataBlockPtrget_data_block_pointers (const std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers)
 

Public Attributes

std::vector< std::unique_ptr
< TypedImportBuffer > > * 
import_buffers
 
size_t col_idx
 
std::vector< int8_t > * bool_buffer_
 
std::vector< int8_t > * tinyint_buffer_
 
std::vector< int16_t > * smallint_buffer_
 
std::vector< int32_t > * int_buffer_
 
std::vector< int64_t > * bigint_buffer_
 
std::vector< float > * float_buffer_
 
std::vector< double > * double_buffer_
 
std::vector< std::string > * string_buffer_
 
std::vector< std::string > * geo_string_buffer_
 
std::vector< ArrayDatum > * array_buffer_
 
std::vector
< OptionalStringVector > * 
string_array_buffer_
 
std::vector< uint8_t > * string_dict_i8_buffer_
 
std::vector< uint16_t > * string_dict_i16_buffer_
 
std::vector< int32_t > * string_dict_i32_buffer_
 
std::vector< ArrayDatum > * string_array_dict_buffer_
 

Private Attributes

union {
   std::vector< int8_t > *   bool_buffer_
 
   std::vector< int8_t > *   tinyint_buffer_
 
   std::vector< int16_t > *   smallint_buffer_
 
   std::vector< int32_t > *   int_buffer_
 
   std::vector< int64_t > *   bigint_buffer_
 
   std::vector< float > *   float_buffer_
 
   std::vector< double > *   double_buffer_
 
   std::vector< std::string > *   string_buffer_
 
   std::vector< std::string > *   geo_string_buffer_
 
   std::vector< ArrayDatum > *   array_buffer_
 
   std::vector
< OptionalStringVector > *   string_array_buffer_
 
}; 
 
union {
   std::vector< uint8_t > *   string_dict_i8_buffer_
 
   std::vector< uint16_t > *   string_dict_i16_buffer_
 
   std::vector< int32_t > *   string_dict_i32_buffer_
 
   std::vector< ArrayDatum > *   string_array_dict_buffer_
 
}; 
 
const ColumnDescriptorcolumn_desc_
 
StringDictionarystring_dict_
 

Detailed Description

Definition at line 93 of file Importer.h.

Member Typedef Documentation

using import_export::TypedImportBuffer::OptionalStringVector = std::optional<std::vector<std::string>>

Definition at line 95 of file Importer.h.

Constructor & Destructor Documentation

import_export::TypedImportBuffer::TypedImportBuffer ( const ColumnDescriptor col_desc,
StringDictionary string_dict 
)
inline

Definition at line 96 of file Importer.h.

References array_buffer_, bigint_buffer_, bool_buffer_, CHECK, ColumnDescriptor::columnType, double_buffer_, float_buffer_, geo_string_buffer_, SQLTypeInfo::get_compression(), SQLTypeInfo::get_size(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), int_buffer_, IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kENCODING_DICT, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, smallint_buffer_, string_array_buffer_, string_array_dict_buffer_, string_buffer_, string_dict_i16_buffer_, string_dict_i32_buffer_, string_dict_i8_buffer_, and tinyint_buffer_.

97  : column_desc_(col_desc), string_dict_(string_dict) {
98  switch (col_desc->columnType.get_type()) {
99  case kBOOLEAN:
100  bool_buffer_ = new std::vector<int8_t>();
101  break;
102  case kTINYINT:
103  tinyint_buffer_ = new std::vector<int8_t>();
104  break;
105  case kSMALLINT:
106  smallint_buffer_ = new std::vector<int16_t>();
107  break;
108  case kINT:
109  int_buffer_ = new std::vector<int32_t>();
110  break;
111  case kBIGINT:
112  case kNUMERIC:
113  case kDECIMAL:
114  bigint_buffer_ = new std::vector<int64_t>();
115  break;
116  case kFLOAT:
117  float_buffer_ = new std::vector<float>();
118  break;
119  case kDOUBLE:
120  double_buffer_ = new std::vector<double>();
121  break;
122  case kTEXT:
123  case kVARCHAR:
124  case kCHAR:
125  string_buffer_ = new std::vector<std::string>();
126  if (col_desc->columnType.get_compression() == kENCODING_DICT) {
127  switch (col_desc->columnType.get_size()) {
128  case 1:
129  string_dict_i8_buffer_ = new std::vector<uint8_t>();
130  break;
131  case 2:
132  string_dict_i16_buffer_ = new std::vector<uint16_t>();
133  break;
134  case 4:
135  string_dict_i32_buffer_ = new std::vector<int32_t>();
136  break;
137  default:
138  CHECK(false);
139  }
140  }
141  break;
142  case kDATE:
143  case kTIME:
144  case kTIMESTAMP:
145  bigint_buffer_ = new std::vector<int64_t>();
146  break;
147  case kARRAY:
148  if (IS_STRING(col_desc->columnType.get_subtype())) {
150  string_array_buffer_ = new std::vector<OptionalStringVector>();
151  string_array_dict_buffer_ = new std::vector<ArrayDatum>();
152  } else {
153  array_buffer_ = new std::vector<ArrayDatum>();
154  }
155  break;
156  case kPOINT:
157  case kLINESTRING:
158  case kPOLYGON:
159  case kMULTIPOLYGON:
160  geo_string_buffer_ = new std::vector<std::string>();
161  break;
162  default:
163  CHECK(false);
164  }
165  }
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:330
HOST DEVICE int get_size() const
Definition: sqltypes.h:339
Definition: sqltypes.h:49
std::vector< std::string > * string_buffer_
Definition: Importer.h:530
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:532
StringDictionary * string_dict_
Definition: Importer.h:542
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:525
std::vector< float > * float_buffer_
Definition: Importer.h:528
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:329
std::vector< double > * double_buffer_
Definition: Importer.h:529
std::vector< int32_t > * int_buffer_
Definition: Importer.h:526
std::vector< ArrayDatum > * string_array_dict_buffer_
Definition: Importer.h:539
std::vector< uint8_t > * string_dict_i8_buffer_
Definition: Importer.h:536
std::vector< OptionalStringVector > * string_array_buffer_
Definition: Importer.h:533
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:527
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:523
Definition: sqltypes.h:52
Definition: sqltypes.h:53
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:337
std::vector< int32_t > * string_dict_i32_buffer_
Definition: Importer.h:538
Definition: sqltypes.h:41
#define IS_STRING(T)
Definition: sqltypes.h:250
const ColumnDescriptor * column_desc_
Definition: Importer.h:541
std::vector< uint16_t > * string_dict_i16_buffer_
Definition: Importer.h:537
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:524
#define CHECK(condition)
Definition: Logger.h:222
Definition: sqltypes.h:45
SQLTypeInfo columnType
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:531

+ Here is the call graph for this function:

import_export::TypedImportBuffer::~TypedImportBuffer ( )
inline

Definition at line 167 of file Importer.h.

References array_buffer_, bigint_buffer_, bool_buffer_, CHECK, column_desc_, ColumnDescriptor::columnType, double_buffer_, float_buffer_, geo_string_buffer_, SQLTypeInfo::get_compression(), SQLTypeInfo::get_size(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), int_buffer_, IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kENCODING_DICT, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, smallint_buffer_, string_array_buffer_, string_array_dict_buffer_, string_buffer_, string_dict_i16_buffer_, string_dict_i32_buffer_, string_dict_i8_buffer_, and tinyint_buffer_.

167  {
168  switch (column_desc_->columnType.get_type()) {
169  case kBOOLEAN:
170  delete bool_buffer_;
171  break;
172  case kTINYINT:
173  delete tinyint_buffer_;
174  break;
175  case kSMALLINT:
176  delete smallint_buffer_;
177  break;
178  case kINT:
179  delete int_buffer_;
180  break;
181  case kBIGINT:
182  case kNUMERIC:
183  case kDECIMAL:
184  delete bigint_buffer_;
185  break;
186  case kFLOAT:
187  delete float_buffer_;
188  break;
189  case kDOUBLE:
190  delete double_buffer_;
191  break;
192  case kTEXT:
193  case kVARCHAR:
194  case kCHAR:
195  delete string_buffer_;
197  switch (column_desc_->columnType.get_size()) {
198  case 1:
199  delete string_dict_i8_buffer_;
200  break;
201  case 2:
203  break;
204  case 4:
206  break;
207  }
208  }
209  break;
210  case kDATE:
211  case kTIME:
212  case kTIMESTAMP:
213  delete bigint_buffer_;
214  break;
215  case kARRAY:
217  delete string_array_buffer_;
219  } else {
220  delete array_buffer_;
221  }
222  break;
223  case kPOINT:
224  case kLINESTRING:
225  case kPOLYGON:
226  case kMULTIPOLYGON:
227  delete geo_string_buffer_;
228  break;
229  default:
230  CHECK(false);
231  }
232  }
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:330
HOST DEVICE int get_size() const
Definition: sqltypes.h:339
Definition: sqltypes.h:49
std::vector< std::string > * string_buffer_
Definition: Importer.h:530
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:532
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:525
std::vector< float > * float_buffer_
Definition: Importer.h:528
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:329
std::vector< double > * double_buffer_
Definition: Importer.h:529
std::vector< int32_t > * int_buffer_
Definition: Importer.h:526
std::vector< ArrayDatum > * string_array_dict_buffer_
Definition: Importer.h:539
std::vector< uint8_t > * string_dict_i8_buffer_
Definition: Importer.h:536
std::vector< OptionalStringVector > * string_array_buffer_
Definition: Importer.h:533
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:527
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:523
Definition: sqltypes.h:52
Definition: sqltypes.h:53
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:337
std::vector< int32_t > * string_dict_i32_buffer_
Definition: Importer.h:538
Definition: sqltypes.h:41
#define IS_STRING(T)
Definition: sqltypes.h:250
const ColumnDescriptor * column_desc_
Definition: Importer.h:541
std::vector< uint16_t > * string_dict_i16_buffer_
Definition: Importer.h:537
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:524
#define CHECK(condition)
Definition: Logger.h:222
Definition: sqltypes.h:45
SQLTypeInfo columnType
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:531

+ Here is the call graph for this function:

Member Function Documentation

size_t import_export::TypedImportBuffer::add_arrow_values ( const ColumnDescriptor cd,
const arrow::Array &  data,
const bool  exact_type_match,
const ArraySliceRange slice_range,
BadRowsTracker bad_rows_tracker 
)

Definition at line 908 of file Importer.cpp.

References arrow_throw_if(), bigint_buffer_, bool_buffer_, ColumnDescriptor::columnName, ColumnDescriptor::columnType, convert_arrow_val_to_import_buffer(), double_buffer_, float_buffer_, geo_string_buffer_, SQLTypeInfo::get_notnull(), SQLTypeInfo::get_type(), int_buffer_, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, smallint_buffer_, heavydb.dtypes::STRING, string_buffer_, tinyint_buffer_, and run_benchmark_import::type.

912  {
913  const auto type = cd->columnType.get_type();
914  if (cd->columnType.get_notnull()) {
915  // We can't have any null values for this column; to have them is an error
916  arrow_throw_if(col.null_count() > 0, "NULL not allowed for column " + cd->columnName);
917  }
918 
919  switch (type) {
920  case kBOOLEAN:
921  if (exact_type_match) {
922  arrow_throw_if(col.type_id() != Type::BOOL, "Expected boolean type");
923  }
925  cd, col, *bool_buffer_, slice_range, bad_rows_tracker);
926  case kTINYINT:
927  if (exact_type_match) {
928  arrow_throw_if(col.type_id() != Type::INT8, "Expected int8 type");
929  }
931  cd, col, *tinyint_buffer_, slice_range, bad_rows_tracker);
932  case kSMALLINT:
933  if (exact_type_match) {
934  arrow_throw_if(col.type_id() != Type::INT16, "Expected int16 type");
935  }
937  cd, col, *smallint_buffer_, slice_range, bad_rows_tracker);
938  case kINT:
939  if (exact_type_match) {
940  arrow_throw_if(col.type_id() != Type::INT32, "Expected int32 type");
941  }
943  cd, col, *int_buffer_, slice_range, bad_rows_tracker);
944  case kBIGINT:
945  case kNUMERIC:
946  case kDECIMAL:
947  if (exact_type_match) {
948  arrow_throw_if(col.type_id() != Type::INT64, "Expected int64 type");
949  }
951  cd, col, *bigint_buffer_, slice_range, bad_rows_tracker);
952  case kFLOAT:
953  if (exact_type_match) {
954  arrow_throw_if(col.type_id() != Type::FLOAT, "Expected float type");
955  }
957  cd, col, *float_buffer_, slice_range, bad_rows_tracker);
958  case kDOUBLE:
959  if (exact_type_match) {
960  arrow_throw_if(col.type_id() != Type::DOUBLE, "Expected double type");
961  }
963  cd, col, *double_buffer_, slice_range, bad_rows_tracker);
964  case kTEXT:
965  case kVARCHAR:
966  case kCHAR:
967  if (exact_type_match) {
968  arrow_throw_if(col.type_id() != Type::BINARY && col.type_id() != Type::STRING,
969  "Expected string type");
970  }
972  cd, col, *string_buffer_, slice_range, bad_rows_tracker);
973  case kTIME:
974  if (exact_type_match) {
975  arrow_throw_if(col.type_id() != Type::TIME32 && col.type_id() != Type::TIME64,
976  "Expected time32 or time64 type");
977  }
979  cd, col, *bigint_buffer_, slice_range, bad_rows_tracker);
980  case kTIMESTAMP:
981  if (exact_type_match) {
982  arrow_throw_if(col.type_id() != Type::TIMESTAMP, "Expected timestamp type");
983  }
985  cd, col, *bigint_buffer_, slice_range, bad_rows_tracker);
986  case kDATE:
987  if (exact_type_match) {
988  arrow_throw_if(col.type_id() != Type::DATE32 && col.type_id() != Type::DATE64,
989  "Expected date32 or date64 type");
990  }
992  cd, col, *bigint_buffer_, slice_range, bad_rows_tracker);
993  case kPOINT:
994  case kLINESTRING:
995  case kPOLYGON:
996  case kMULTIPOLYGON:
997  arrow_throw_if(col.type_id() != Type::BINARY && col.type_id() != Type::STRING,
998  "Expected string type");
1000  cd, col, *geo_string_buffer_, slice_range, bad_rows_tracker);
1001  case kARRAY:
1002  throw std::runtime_error("Arrow array appends not yet supported");
1003  default:
1004  throw std::runtime_error("Invalid Type");
1005  }
1006 }
Definition: sqltypes.h:49
std::vector< std::string > * string_buffer_
Definition: Importer.h:530
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:525
std::vector< float > * float_buffer_
Definition: Importer.h:528
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:329
tuple STRING
Definition: dtypes.py:31
std::vector< double > * double_buffer_
Definition: Importer.h:529
std::vector< int32_t > * int_buffer_
Definition: Importer.h:526
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:527
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:523
Definition: sqltypes.h:52
Definition: sqltypes.h:53
Definition: sqltypes.h:41
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:524
size_t convert_arrow_val_to_import_buffer(const ColumnDescriptor *cd, const arrow::Array &array, std::vector< DATA_TYPE > &buffer, const ArraySliceRange &slice_range, BadRowsTracker *const bad_rows_tracker)
Definition: sqltypes.h:45
SQLTypeInfo columnType
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:336
std::string columnName
void arrow_throw_if(const bool cond, const std::string &message)
Definition: ArrowImporter.h:42
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:531

+ Here is the call graph for this function:

void import_export::TypedImportBuffer::add_value ( const ColumnDescriptor cd,
const std::string_view  val,
const bool  is_null,
const CopyParams copy_params,
const bool  check_not_null = true 
)

Definition at line 567 of file Importer.cpp.

References addArray(), addBigint(), addBoolean(), addDouble(), addFloat(), addGeoString(), addInt(), addSmallint(), addString(), addStringArray(), addTinyint(), Datum::bigintval, Datum::boolval, CHECK, ColumnDescriptor::columnName, ColumnDescriptor::columnType, SQLTypeInfo::get_elem_type(), SQLTypeInfo::get_notnull(), SQLTypeInfo::get_size(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), inline_fixed_encoding_null_val(), Datum::intval, IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, StringDictionary::MAX_STRLEN, NULL_DOUBLE, NULL_FLOAT, import_export::NullArray(), import_export::delimited_parser::parse_string_array(), Datum::smallintval, import_export::StringToArray(), StringToDatum(), Datum::tinyintval, to_string(), run_benchmark_import::type, and DecimalOverflowValidator::validate().

Referenced by foreign_storage::anonymous_namespace{InternalStorageStatsDataWrapper.cpp}::set_null(), foreign_storage::anonymous_namespace{InternalCatalogDataWrapper.cpp}::set_null(), and foreign_storage::anonymous_namespace{InternalMemoryStatsDataWrapper.cpp}::set_null().

571  {
572  const auto type = cd->columnType.get_type();
573  switch (type) {
574  case kBOOLEAN: {
575  if (is_null) {
576  if (check_not_null && cd->columnType.get_notnull()) {
577  throw std::runtime_error("NULL for column " + cd->columnName);
578  }
580  } else {
581  auto ti = cd->columnType;
582  Datum d = StringToDatum(val, ti);
583  addBoolean(static_cast<int8_t>(d.boolval));
584  }
585  break;
586  }
587  case kTINYINT: {
588  if (!is_null && (isdigit(val[0]) || val[0] == '-')) {
589  auto ti = cd->columnType;
590  Datum d = StringToDatum(val, ti);
592  } else {
593  if (check_not_null && cd->columnType.get_notnull()) {
594  throw std::runtime_error("NULL for column " + cd->columnName);
595  }
597  }
598  break;
599  }
600  case kSMALLINT: {
601  if (!is_null && (isdigit(val[0]) || val[0] == '-')) {
602  auto ti = cd->columnType;
603  Datum d = StringToDatum(val, ti);
605  } else {
606  if (check_not_null && cd->columnType.get_notnull()) {
607  throw std::runtime_error("NULL for column " + cd->columnName);
608  }
610  }
611  break;
612  }
613  case kINT: {
614  if (!is_null && (isdigit(val[0]) || val[0] == '-')) {
615  auto ti = cd->columnType;
616  Datum d = StringToDatum(val, ti);
617  addInt(d.intval);
618  } else {
619  if (check_not_null && cd->columnType.get_notnull()) {
620  throw std::runtime_error("NULL for column " + cd->columnName);
621  }
623  }
624  break;
625  }
626  case kBIGINT: {
627  if (!is_null && (isdigit(val[0]) || val[0] == '-')) {
628  auto ti = cd->columnType;
629  Datum d = StringToDatum(val, ti);
630  addBigint(d.bigintval);
631  } else {
632  if (check_not_null && cd->columnType.get_notnull()) {
633  throw std::runtime_error("NULL for column " + cd->columnName);
634  }
636  }
637  break;
638  }
639  case kDECIMAL:
640  case kNUMERIC: {
641  if (!is_null) {
642  auto ti = cd->columnType;
643  Datum d = StringToDatum(val, ti);
644  DecimalOverflowValidator validator(ti);
645  validator.validate(d.bigintval);
646  addBigint(d.bigintval);
647  } else {
648  if (check_not_null && cd->columnType.get_notnull()) {
649  throw std::runtime_error("NULL for column " + cd->columnName);
650  }
652  }
653  break;
654  }
655  case kFLOAT:
656  if (!is_null && (val[0] == '.' || isdigit(val[0]) || val[0] == '-')) {
657  addFloat(static_cast<float>(std::atof(std::string(val).c_str())));
658  } else {
659  if (check_not_null && cd->columnType.get_notnull()) {
660  throw std::runtime_error("NULL for column " + cd->columnName);
661  }
663  }
664  break;
665  case kDOUBLE:
666  if (!is_null && (val[0] == '.' || isdigit(val[0]) || val[0] == '-')) {
667  addDouble(std::atof(std::string(val).c_str()));
668  } else {
669  if (check_not_null && cd->columnType.get_notnull()) {
670  throw std::runtime_error("NULL for column " + cd->columnName);
671  }
673  }
674  break;
675  case kTEXT:
676  case kVARCHAR:
677  case kCHAR: {
678  // @TODO(wei) for now, use empty string for nulls
679  if (is_null) {
680  if (check_not_null && cd->columnType.get_notnull()) {
681  throw std::runtime_error("NULL for column " + cd->columnName);
682  }
683  addString(std::string());
684  } else {
685  if (val.length() > StringDictionary::MAX_STRLEN) {
686  throw std::runtime_error("String too long for column " + cd->columnName +
687  " was " + std::to_string(val.length()) + " max is " +
689  }
690  addString(val);
691  }
692  break;
693  }
694  case kTIME:
695  case kTIMESTAMP:
696  case kDATE:
697  if (!is_null && (isdigit(val[0]) || val[0] == '-')) {
698  SQLTypeInfo ti = cd->columnType;
699  Datum d = StringToDatum(val, ti);
700  addBigint(d.bigintval);
701  } else {
702  if (check_not_null && cd->columnType.get_notnull()) {
703  throw std::runtime_error("NULL for column " + cd->columnName);
704  }
706  }
707  break;
708  case kARRAY: {
709  if (check_not_null && is_null && cd->columnType.get_notnull()) {
710  throw std::runtime_error("NULL for column " + cd->columnName);
711  }
712  SQLTypeInfo ti = cd->columnType;
713  if (IS_STRING(ti.get_subtype())) {
714  std::vector<std::string> string_vec;
715  // Just parse string array, don't push it to buffer yet as we might throw
717  std::string(val), copy_params, string_vec);
718  if (!is_null) {
719  if (ti.get_size() > 0) {
720  auto sti = ti.get_elem_type();
721  size_t expected_size = ti.get_size() / sti.get_size();
722  size_t actual_size = string_vec.size();
723  if (actual_size != expected_size) {
724  throw std::runtime_error("Fixed length array column " + cd->columnName +
725  " expects " + std::to_string(expected_size) +
726  " values, received " +
727  std::to_string(actual_size));
728  }
729  }
730  addStringArray(string_vec);
731  } else {
732  addStringArray(std::nullopt);
733  }
734  } else {
735  if (!is_null) {
736  ArrayDatum d = StringToArray(std::string(val), ti, copy_params);
737  if (d.is_null) { // val could be "NULL"
738  addArray(NullArray(ti));
739  } else {
740  if (ti.get_size() > 0 && static_cast<size_t>(ti.get_size()) != d.length) {
741  throw std::runtime_error("Fixed length array for column " + cd->columnName +
742  " has incorrect length: " + std::string(val));
743  }
744  addArray(d);
745  }
746  } else {
747  addArray(NullArray(ti));
748  }
749  }
750  break;
751  }
752  case kPOINT:
753  case kLINESTRING:
754  case kPOLYGON:
755  case kMULTIPOLYGON:
756  addGeoString(val);
757  break;
758  default:
759  CHECK(false) << "TypedImportBuffer::add_value() does not support type " << type;
760  }
761 }
int8_t tinyintval
Definition: sqltypes.h:212
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:330
#define NULL_DOUBLE
HOST DEVICE int get_size() const
Definition: sqltypes.h:339
void addBigint(const int64_t v)
Definition: Importer.h:242
OptionalStringVector & addStringArray()
Definition: Importer.h:254
void addSmallint(const int16_t v)
Definition: Importer.h:238
Definition: sqltypes.h:49
#define NULL_FLOAT
void addString(const std::string_view v)
Definition: Importer.h:248
void addDouble(const double v)
Definition: Importer.h:246
ArrayDatum NullArray(const SQLTypeInfo &ti)
Definition: Importer.cpp:411
int8_t boolval
Definition: sqltypes.h:211
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:329
int32_t intval
Definition: sqltypes.h:214
std::string to_string(char const *&&v)
void addFloat(const float v)
Definition: Importer.h:244
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:208
void addGeoString(const std::string_view v)
Definition: Importer.h:250
CONSTEXPR DEVICE bool is_null(const T &value)
void addBoolean(const int8_t v)
Definition: Importer.h:234
void addTinyint(const int8_t v)
Definition: Importer.h:236
int64_t bigintval
Definition: sqltypes.h:215
void addInt(const int32_t v)
Definition: Importer.h:240
int16_t smallintval
Definition: sqltypes.h:213
void parse_string_array(const std::string &s, const import_export::CopyParams &copy_params, std::vector< std::string > &string_vec, bool truncate_values)
Parses given string array and inserts into given vector of strings.
Datum StringToDatum(std::string_view s, SQLTypeInfo &ti)
Definition: Datum.cpp:275
Definition: sqltypes.h:52
Definition: sqltypes.h:53
ArrayDatum StringToArray(const std::string &s, const SQLTypeInfo &ti, const CopyParams &copy_params)
Definition: Importer.cpp:356
void addArray(const ArrayDatum &v)
Definition: Importer.h:252
Definition: sqltypes.h:41
#define IS_STRING(T)
Definition: sqltypes.h:250
#define CHECK(condition)
Definition: Logger.h:222
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
static constexpr size_t MAX_STRLEN
Definition: sqltypes.h:45
SQLTypeInfo columnType
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:336
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:865
std::string columnName

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::add_value ( const ColumnDescriptor cd,
const TDatum &  val,
const bool  is_null 
)

Definition at line 1338 of file Importer.cpp.

References addArray(), addBigint(), import_export::addBinaryStringArray(), addBoolean(), addDouble(), addFloat(), addGeoString(), addInt(), addSmallint(), addString(), addStringArray(), addTinyint(), CHECK, ColumnDescriptor::columnName, ColumnDescriptor::columnType, decimal_to_int_type(), SQLTypeInfo::get_notnull(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), inline_fixed_encoding_null_val(), SQLTypeInfo::is_decimal(), IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, NULL_DOUBLE, NULL_FLOAT, import_export::NullArray(), import_export::TDatumToArrayDatum(), and run_benchmark_import::type.

1340  {
1341  const auto type = cd->columnType.is_decimal() ? decimal_to_int_type(cd->columnType)
1342  : cd->columnType.get_type();
1343  switch (type) {
1344  case kBOOLEAN: {
1345  if (is_null) {
1346  if (cd->columnType.get_notnull()) {
1347  throw std::runtime_error("NULL for column " + cd->columnName);
1348  }
1350  } else {
1351  addBoolean((int8_t)datum.val.int_val);
1352  }
1353  break;
1354  }
1355  case kTINYINT:
1356  if (!is_null) {
1357  addTinyint((int8_t)datum.val.int_val);
1358  } else {
1359  if (cd->columnType.get_notnull()) {
1360  throw std::runtime_error("NULL for column " + cd->columnName);
1361  }
1363  }
1364  break;
1365  case kSMALLINT:
1366  if (!is_null) {
1367  addSmallint((int16_t)datum.val.int_val);
1368  } else {
1369  if (cd->columnType.get_notnull()) {
1370  throw std::runtime_error("NULL for column " + cd->columnName);
1371  }
1373  }
1374  break;
1375  case kINT:
1376  if (!is_null) {
1377  addInt((int32_t)datum.val.int_val);
1378  } else {
1379  if (cd->columnType.get_notnull()) {
1380  throw std::runtime_error("NULL for column " + cd->columnName);
1381  }
1383  }
1384  break;
1385  case kBIGINT:
1386  if (!is_null) {
1387  addBigint(datum.val.int_val);
1388  } else {
1389  if (cd->columnType.get_notnull()) {
1390  throw std::runtime_error("NULL for column " + cd->columnName);
1391  }
1393  }
1394  break;
1395  case kFLOAT:
1396  if (!is_null) {
1397  addFloat((float)datum.val.real_val);
1398  } else {
1399  if (cd->columnType.get_notnull()) {
1400  throw std::runtime_error("NULL for column " + cd->columnName);
1401  }
1403  }
1404  break;
1405  case kDOUBLE:
1406  if (!is_null) {
1407  addDouble(datum.val.real_val);
1408  } else {
1409  if (cd->columnType.get_notnull()) {
1410  throw std::runtime_error("NULL for column " + cd->columnName);
1411  }
1413  }
1414  break;
1415  case kTEXT:
1416  case kVARCHAR:
1417  case kCHAR: {
1418  // @TODO(wei) for now, use empty string for nulls
1419  if (is_null) {
1420  if (cd->columnType.get_notnull()) {
1421  throw std::runtime_error("NULL for column " + cd->columnName);
1422  }
1423  addString(std::string());
1424  } else {
1425  addString(datum.val.str_val);
1426  }
1427  break;
1428  }
1429  case kTIME:
1430  case kTIMESTAMP:
1431  case kDATE: {
1432  if (!is_null) {
1433  addBigint(datum.val.int_val);
1434  } else {
1435  if (cd->columnType.get_notnull()) {
1436  throw std::runtime_error("NULL for column " + cd->columnName);
1437  }
1439  }
1440  break;
1441  }
1442  case kARRAY:
1443  if (is_null && cd->columnType.get_notnull()) {
1444  throw std::runtime_error("NULL for column " + cd->columnName);
1445  }
1446  if (IS_STRING(cd->columnType.get_subtype())) {
1447  OptionalStringVector& string_vec = addStringArray();
1448  addBinaryStringArray(datum, *string_vec);
1449  } else {
1450  if (!is_null) {
1451  addArray(TDatumToArrayDatum(datum, cd->columnType));
1452  } else {
1454  }
1455  }
1456  break;
1457  case kPOINT:
1458  case kLINESTRING:
1459  case kPOLYGON:
1460  case kMULTIPOLYGON:
1461  if (is_null) {
1462  if (cd->columnType.get_notnull()) {
1463  throw std::runtime_error("NULL for column " + cd->columnName);
1464  }
1465  addGeoString(std::string());
1466  } else {
1467  addGeoString(datum.val.str_val);
1468  }
1469  break;
1470  default:
1471  CHECK(false) << "TypedImportBuffer::add_value() does not support type " << type;
1472  }
1473 }
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:330
#define NULL_DOUBLE
void addBigint(const int64_t v)
Definition: Importer.h:242
OptionalStringVector & addStringArray()
Definition: Importer.h:254
void addSmallint(const int16_t v)
Definition: Importer.h:238
Definition: sqltypes.h:49
#define NULL_FLOAT
void addString(const std::string_view v)
Definition: Importer.h:248
std::optional< std::vector< std::string >> OptionalStringVector
Definition: Importer.h:95
void addDouble(const double v)
Definition: Importer.h:246
ArrayDatum NullArray(const SQLTypeInfo &ti)
Definition: Importer.cpp:411
void addFloat(const float v)
Definition: Importer.h:244
void addGeoString(const std::string_view v)
Definition: Importer.h:250
ArrayDatum TDatumToArrayDatum(const TDatum &datum, const SQLTypeInfo &ti)
Definition: Importer.cpp:507
CONSTEXPR DEVICE bool is_null(const T &value)
void addBoolean(const int8_t v)
Definition: Importer.h:234
void addTinyint(const int8_t v)
Definition: Importer.h:236
void addInt(const int32_t v)
Definition: Importer.h:240
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:493
Definition: sqltypes.h:52
Definition: sqltypes.h:53
void addArray(const ArrayDatum &v)
Definition: Importer.h:252
Definition: sqltypes.h:41
#define IS_STRING(T)
Definition: sqltypes.h:250
#define CHECK(condition)
Definition: Logger.h:222
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
Definition: sqltypes.h:45
SQLTypeInfo columnType
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:336
bool is_decimal() const
Definition: sqltypes.h:513
std::string columnName
void addBinaryStringArray(const TDatum &datum, std::vector< std::string > &string_vec)
Definition: Importer.cpp:455

+ Here is the call graph for this function:

size_t import_export::TypedImportBuffer::add_values ( const ColumnDescriptor cd,
const TColumn &  data 
)

Definition at line 1009 of file Importer.cpp.

References addArray(), addStringArray(), bigint_buffer_, bool_buffer_, checked_malloc(), ColumnDescriptor::columnName, ColumnDescriptor::columnType, double_buffer_, float_buffer_, geo_string_buffer_, SQLTypeInfo::get_elem_type(), SQLTypeInfo::get_notnull(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), inline_fixed_encoding_null_val(), int_buffer_, IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, NULL_DOUBLE, NULL_FLOAT, import_export::NullArray(), smallint_buffer_, string_buffer_, and tinyint_buffer_.

1009  {
1010  size_t dataSize = 0;
1011  if (cd->columnType.get_notnull()) {
1012  // We can't have any null values for this column; to have them is an error
1013  if (std::any_of(col.nulls.begin(), col.nulls.end(), [](int i) { return i != 0; })) {
1014  throw std::runtime_error("NULL for column " + cd->columnName);
1015  }
1016  }
1017 
1018  switch (cd->columnType.get_type()) {
1019  case kBOOLEAN: {
1020  dataSize = col.data.int_col.size();
1021  bool_buffer_->reserve(dataSize);
1022  for (size_t i = 0; i < dataSize; i++) {
1023  if (col.nulls[i]) {
1025  } else {
1026  bool_buffer_->push_back((int8_t)col.data.int_col[i]);
1027  }
1028  }
1029  break;
1030  }
1031  case kTINYINT: {
1032  dataSize = col.data.int_col.size();
1033  tinyint_buffer_->reserve(dataSize);
1034  for (size_t i = 0; i < dataSize; i++) {
1035  if (col.nulls[i]) {
1037  } else {
1038  tinyint_buffer_->push_back((int8_t)col.data.int_col[i]);
1039  }
1040  }
1041  break;
1042  }
1043  case kSMALLINT: {
1044  dataSize = col.data.int_col.size();
1045  smallint_buffer_->reserve(dataSize);
1046  for (size_t i = 0; i < dataSize; i++) {
1047  if (col.nulls[i]) {
1049  } else {
1050  smallint_buffer_->push_back((int16_t)col.data.int_col[i]);
1051  }
1052  }
1053  break;
1054  }
1055  case kINT: {
1056  dataSize = col.data.int_col.size();
1057  int_buffer_->reserve(dataSize);
1058  for (size_t i = 0; i < dataSize; i++) {
1059  if (col.nulls[i]) {
1061  } else {
1062  int_buffer_->push_back((int32_t)col.data.int_col[i]);
1063  }
1064  }
1065  break;
1066  }
1067  case kBIGINT:
1068  case kNUMERIC:
1069  case kDECIMAL: {
1070  dataSize = col.data.int_col.size();
1071  bigint_buffer_->reserve(dataSize);
1072  for (size_t i = 0; i < dataSize; i++) {
1073  if (col.nulls[i]) {
1075  } else {
1076  bigint_buffer_->push_back((int64_t)col.data.int_col[i]);
1077  }
1078  }
1079  break;
1080  }
1081  case kFLOAT: {
1082  dataSize = col.data.real_col.size();
1083  float_buffer_->reserve(dataSize);
1084  for (size_t i = 0; i < dataSize; i++) {
1085  if (col.nulls[i]) {
1086  float_buffer_->push_back(NULL_FLOAT);
1087  } else {
1088  float_buffer_->push_back((float)col.data.real_col[i]);
1089  }
1090  }
1091  break;
1092  }
1093  case kDOUBLE: {
1094  dataSize = col.data.real_col.size();
1095  double_buffer_->reserve(dataSize);
1096  for (size_t i = 0; i < dataSize; i++) {
1097  if (col.nulls[i]) {
1098  double_buffer_->push_back(NULL_DOUBLE);
1099  } else {
1100  double_buffer_->push_back((double)col.data.real_col[i]);
1101  }
1102  }
1103  break;
1104  }
1105  case kTEXT:
1106  case kVARCHAR:
1107  case kCHAR: {
1108  // TODO: for now, use empty string for nulls
1109  dataSize = col.data.str_col.size();
1110  string_buffer_->reserve(dataSize);
1111  for (size_t i = 0; i < dataSize; i++) {
1112  if (col.nulls[i]) {
1113  string_buffer_->push_back(std::string());
1114  } else {
1115  string_buffer_->push_back(col.data.str_col[i]);
1116  }
1117  }
1118  break;
1119  }
1120  case kTIME:
1121  case kTIMESTAMP:
1122  case kDATE: {
1123  dataSize = col.data.int_col.size();
1124  bigint_buffer_->reserve(dataSize);
1125  for (size_t i = 0; i < dataSize; i++) {
1126  if (col.nulls[i]) {
1128  } else {
1129  bigint_buffer_->push_back(static_cast<int64_t>(col.data.int_col[i]));
1130  }
1131  }
1132  break;
1133  }
1134  case kPOINT:
1135  case kLINESTRING:
1136  case kPOLYGON:
1137  case kMULTIPOLYGON: {
1138  dataSize = col.data.str_col.size();
1139  geo_string_buffer_->reserve(dataSize);
1140  for (size_t i = 0; i < dataSize; i++) {
1141  if (col.nulls[i]) {
1142  // TODO: add support for NULL geo
1143  geo_string_buffer_->push_back(std::string());
1144  } else {
1145  geo_string_buffer_->push_back(col.data.str_col[i]);
1146  }
1147  }
1148  break;
1149  }
1150  case kARRAY: {
1151  dataSize = col.data.arr_col.size();
1152  if (IS_STRING(cd->columnType.get_subtype())) {
1153  for (size_t i = 0; i < dataSize; i++) {
1154  OptionalStringVector& string_vec = addStringArray();
1155  if (!col.nulls[i]) {
1156  size_t stringArrSize = col.data.arr_col[i].data.str_col.size();
1157  for (size_t str_idx = 0; str_idx != stringArrSize; ++str_idx) {
1158  string_vec->push_back(col.data.arr_col[i].data.str_col[str_idx]);
1159  }
1160  }
1161  }
1162  } else {
1163  auto elem_ti = cd->columnType.get_subtype();
1164  switch (elem_ti) {
1165  case kBOOLEAN: {
1166  for (size_t i = 0; i < dataSize; i++) {
1167  if (col.nulls[i]) {
1169  } else {
1170  size_t len = col.data.arr_col[i].data.int_col.size();
1171  size_t byteSize = len * sizeof(int8_t);
1172  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1173  int8_t* p = buf;
1174  for (size_t j = 0; j < len; ++j) {
1175  // Explicitly checking the item for null because
1176  // casting null value (-128) to bool results
1177  // incorrect value 1.
1178  if (col.data.arr_col[i].nulls[j]) {
1179  *p = static_cast<int8_t>(
1181  } else {
1182  *(bool*)p = static_cast<bool>(col.data.arr_col[i].data.int_col[j]);
1183  }
1184  p += sizeof(bool);
1185  }
1186  addArray(ArrayDatum(byteSize, buf, false));
1187  }
1188  }
1189  break;
1190  }
1191  case kTINYINT: {
1192  for (size_t i = 0; i < dataSize; i++) {
1193  if (col.nulls[i]) {
1195  } else {
1196  size_t len = col.data.arr_col[i].data.int_col.size();
1197  size_t byteSize = len * sizeof(int8_t);
1198  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1199  int8_t* p = buf;
1200  for (size_t j = 0; j < len; ++j) {
1201  *(int8_t*)p = static_cast<int8_t>(col.data.arr_col[i].data.int_col[j]);
1202  p += sizeof(int8_t);
1203  }
1204  addArray(ArrayDatum(byteSize, buf, false));
1205  }
1206  }
1207  break;
1208  }
1209  case kSMALLINT: {
1210  for (size_t i = 0; i < dataSize; i++) {
1211  if (col.nulls[i]) {
1213  } else {
1214  size_t len = col.data.arr_col[i].data.int_col.size();
1215  size_t byteSize = len * sizeof(int16_t);
1216  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1217  int8_t* p = buf;
1218  for (size_t j = 0; j < len; ++j) {
1219  *(int16_t*)p =
1220  static_cast<int16_t>(col.data.arr_col[i].data.int_col[j]);
1221  p += sizeof(int16_t);
1222  }
1223  addArray(ArrayDatum(byteSize, buf, false));
1224  }
1225  }
1226  break;
1227  }
1228  case kINT: {
1229  for (size_t i = 0; i < dataSize; i++) {
1230  if (col.nulls[i]) {
1232  } else {
1233  size_t len = col.data.arr_col[i].data.int_col.size();
1234  size_t byteSize = len * sizeof(int32_t);
1235  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1236  int8_t* p = buf;
1237  for (size_t j = 0; j < len; ++j) {
1238  *(int32_t*)p =
1239  static_cast<int32_t>(col.data.arr_col[i].data.int_col[j]);
1240  p += sizeof(int32_t);
1241  }
1242  addArray(ArrayDatum(byteSize, buf, false));
1243  }
1244  }
1245  break;
1246  }
1247  case kBIGINT:
1248  case kNUMERIC:
1249  case kDECIMAL: {
1250  for (size_t i = 0; i < dataSize; i++) {
1251  if (col.nulls[i]) {
1253  } else {
1254  size_t len = col.data.arr_col[i].data.int_col.size();
1255  size_t byteSize = len * sizeof(int64_t);
1256  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1257  int8_t* p = buf;
1258  for (size_t j = 0; j < len; ++j) {
1259  *(int64_t*)p =
1260  static_cast<int64_t>(col.data.arr_col[i].data.int_col[j]);
1261  p += sizeof(int64_t);
1262  }
1263  addArray(ArrayDatum(byteSize, buf, false));
1264  }
1265  }
1266  break;
1267  }
1268  case kFLOAT: {
1269  for (size_t i = 0; i < dataSize; i++) {
1270  if (col.nulls[i]) {
1272  } else {
1273  size_t len = col.data.arr_col[i].data.real_col.size();
1274  size_t byteSize = len * sizeof(float);
1275  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1276  int8_t* p = buf;
1277  for (size_t j = 0; j < len; ++j) {
1278  *(float*)p = static_cast<float>(col.data.arr_col[i].data.real_col[j]);
1279  p += sizeof(float);
1280  }
1281  addArray(ArrayDatum(byteSize, buf, false));
1282  }
1283  }
1284  break;
1285  }
1286  case kDOUBLE: {
1287  for (size_t i = 0; i < dataSize; i++) {
1288  if (col.nulls[i]) {
1290  } else {
1291  size_t len = col.data.arr_col[i].data.real_col.size();
1292  size_t byteSize = len * sizeof(double);
1293  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1294  int8_t* p = buf;
1295  for (size_t j = 0; j < len; ++j) {
1296  *(double*)p = static_cast<double>(col.data.arr_col[i].data.real_col[j]);
1297  p += sizeof(double);
1298  }
1299  addArray(ArrayDatum(byteSize, buf, false));
1300  }
1301  }
1302  break;
1303  }
1304  case kTIME:
1305  case kTIMESTAMP:
1306  case kDATE: {
1307  for (size_t i = 0; i < dataSize; i++) {
1308  if (col.nulls[i]) {
1310  } else {
1311  size_t len = col.data.arr_col[i].data.int_col.size();
1312  size_t byteWidth = sizeof(int64_t);
1313  size_t byteSize = len * byteWidth;
1314  int8_t* buf = (int8_t*)checked_malloc(len * byteSize);
1315  int8_t* p = buf;
1316  for (size_t j = 0; j < len; ++j) {
1317  *reinterpret_cast<int64_t*>(p) =
1318  static_cast<int64_t>(col.data.arr_col[i].data.int_col[j]);
1319  p += sizeof(int64_t);
1320  }
1321  addArray(ArrayDatum(byteSize, buf, false));
1322  }
1323  }
1324  break;
1325  }
1326  default:
1327  throw std::runtime_error("Invalid Array Type");
1328  }
1329  }
1330  break;
1331  }
1332  default:
1333  throw std::runtime_error("Invalid Type");
1334  }
1335  return dataSize;
1336 }
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:330
#define NULL_DOUBLE
OptionalStringVector & addStringArray()
Definition: Importer.h:254
Definition: sqltypes.h:49
#define NULL_FLOAT
std::vector< std::string > * string_buffer_
Definition: Importer.h:530
std::optional< std::vector< std::string >> OptionalStringVector
Definition: Importer.h:95
ArrayDatum NullArray(const SQLTypeInfo &ti)
Definition: Importer.cpp:411
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:525
std::vector< float > * float_buffer_
Definition: Importer.h:528
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:329
std::vector< double > * double_buffer_
Definition: Importer.h:529
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:208
std::vector< int32_t > * int_buffer_
Definition: Importer.h:526
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:45
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:527
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:523
Definition: sqltypes.h:52
Definition: sqltypes.h:53
void addArray(const ArrayDatum &v)
Definition: Importer.h:252
Definition: sqltypes.h:41
#define IS_STRING(T)
Definition: sqltypes.h:250
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:524
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
Definition: sqltypes.h:45
SQLTypeInfo columnType
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:336
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:865
std::string columnName
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:531

+ Here is the call graph for this function:

void import_export::TypedImportBuffer::addArray ( const ArrayDatum v)
inline

Definition at line 252 of file Importer.h.

References array_buffer_.

Referenced by add_value(), and add_values().

252 { array_buffer_->push_back(v); }
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:532

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addBigint ( const int64_t  v)
inline

Definition at line 242 of file Importer.h.

References bigint_buffer_.

Referenced by add_value().

242 { bigint_buffer_->push_back(v); }
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:527

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addBoolean ( const int8_t  v)
inline

Definition at line 234 of file Importer.h.

References bool_buffer_.

Referenced by add_value().

234 { bool_buffer_->push_back(v); }
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:523

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addDefaultValues ( const ColumnDescriptor cd,
size_t  num_rows 
)

Definition at line 1475 of file Importer.cpp.

References array_buffer_, bigint_buffer_, bool_buffer_, CHECK, ColumnDescriptor::columnName, ColumnDescriptor::columnType, convert_decimal_value_to_scale(), ColumnDescriptor::default_value, double_buffer_, float_buffer_, geo_string_buffer_, SQLTypeInfo::get_notnull(), SQLTypeInfo::get_type(), inline_fixed_encoding_null_val(), int_buffer_, is_null(), IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, StringDictionary::MAX_STRLEN, NULL_DOUBLE, NULL_FLOAT, import_export::NullArray(), import_export::delimited_parser::parse_string_array(), smallint_buffer_, string_array_buffer_, string_buffer_, import_export::StringToArray(), StringToDatum(), tinyint_buffer_, to_string(), and run_benchmark_import::type.

1475  {
1476  bool is_null = !cd->default_value.has_value();
1477  CHECK(!(is_null && cd->columnType.get_notnull()));
1478  const auto type = cd->columnType.get_type();
1479  auto ti = cd->columnType;
1480  auto val = cd->default_value.value_or("NULL");
1481  CopyParams cp;
1482  switch (type) {
1483  case kBOOLEAN: {
1484  if (!is_null) {
1485  bool_buffer_->resize(num_rows, StringToDatum(val, ti).boolval);
1486  } else {
1487  bool_buffer_->resize(num_rows, inline_fixed_encoding_null_val(cd->columnType));
1488  }
1489  break;
1490  }
1491  case kTINYINT: {
1492  if (!is_null) {
1493  tinyint_buffer_->resize(num_rows, StringToDatum(val, ti).tinyintval);
1494  } else {
1496  }
1497  break;
1498  }
1499  case kSMALLINT: {
1500  if (!is_null) {
1501  smallint_buffer_->resize(num_rows, StringToDatum(val, ti).smallintval);
1502  } else {
1503  smallint_buffer_->resize(num_rows,
1505  }
1506  break;
1507  }
1508  case kINT: {
1509  if (!is_null) {
1510  int_buffer_->resize(num_rows, StringToDatum(val, ti).intval);
1511  } else {
1512  int_buffer_->resize(num_rows, inline_fixed_encoding_null_val(cd->columnType));
1513  }
1514  break;
1515  }
1516  case kBIGINT: {
1517  if (!is_null) {
1518  bigint_buffer_->resize(num_rows, StringToDatum(val, ti).bigintval);
1519  } else {
1521  }
1522  break;
1523  }
1524  case kDECIMAL:
1525  case kNUMERIC: {
1526  if (!is_null) {
1527  const auto converted_decimal_value = convert_decimal_value_to_scale(
1528  StringToDatum(val, ti).bigintval, ti, cd->columnType);
1529  bigint_buffer_->resize(num_rows, converted_decimal_value);
1530  } else {
1532  }
1533  break;
1534  }
1535  case kFLOAT:
1536  if (!is_null) {
1537  float_buffer_->resize(num_rows,
1538  static_cast<float>(std::atof(std::string(val).c_str())));
1539  } else {
1540  float_buffer_->resize(num_rows, NULL_FLOAT);
1541  }
1542  break;
1543  case kDOUBLE:
1544  if (!is_null) {
1545  double_buffer_->resize(num_rows, std::atof(std::string(val).c_str()));
1546  } else {
1547  double_buffer_->resize(num_rows, NULL_DOUBLE);
1548  }
1549  break;
1550  case kTEXT:
1551  case kVARCHAR:
1552  case kCHAR: {
1553  if (is_null) {
1554  string_buffer_->resize(num_rows, "");
1555  } else {
1556  if (val.length() > StringDictionary::MAX_STRLEN) {
1557  throw std::runtime_error("String too long for column " + cd->columnName +
1558  " was " + std::to_string(val.length()) + " max is " +
1560  }
1561  string_buffer_->resize(num_rows, val);
1562  }
1563  break;
1564  }
1565  case kTIME:
1566  case kTIMESTAMP:
1567  case kDATE:
1568  if (!is_null) {
1569  bigint_buffer_->resize(num_rows, StringToDatum(val, ti).bigintval);
1570  } else {
1572  }
1573  break;
1574  case kARRAY: {
1575  if (IS_STRING(ti.get_subtype())) {
1576  std::vector<std::string> string_vec;
1577  // Just parse string array, don't push it to buffer yet as we might throw
1579  std::string(val), cp, string_vec);
1580  if (!is_null) {
1581  // TODO: add support for NULL string arrays
1582  if (ti.get_size() > 0) {
1583  auto sti = ti.get_elem_type();
1584  size_t expected_size = ti.get_size() / sti.get_size();
1585  size_t actual_size = string_vec.size();
1586  if (actual_size != expected_size) {
1587  throw std::runtime_error("Fixed length array column " + cd->columnName +
1588  " expects " + std::to_string(expected_size) +
1589  " values, received " +
1590  std::to_string(actual_size));
1591  }
1592  }
1593  string_array_buffer_->resize(num_rows, string_vec);
1594  } else {
1595  if (ti.get_size() > 0) {
1596  // TODO: remove once NULL fixlen arrays are allowed
1597  throw std::runtime_error("Fixed length array column " + cd->columnName +
1598  " currently cannot accept NULL arrays");
1599  }
1600  // TODO: add support for NULL string arrays, replace with addStringArray(),
1601  // for now add whatever parseStringArray() outputs for NULLs ("NULL")
1602  string_array_buffer_->resize(num_rows, string_vec);
1603  }
1604  } else {
1605  if (!is_null) {
1606  ArrayDatum d = StringToArray(std::string(val), ti, cp);
1607  if (d.is_null) { // val could be "NULL"
1608  array_buffer_->resize(num_rows, NullArray(ti));
1609  } else {
1610  if (ti.get_size() > 0 && static_cast<size_t>(ti.get_size()) != d.length) {
1611  throw std::runtime_error("Fixed length array for column " + cd->columnName +
1612  " has incorrect length: " + std::string(val));
1613  }
1614  array_buffer_->resize(num_rows, d);
1615  }
1616  } else {
1617  array_buffer_->resize(num_rows, NullArray(ti));
1618  }
1619  }
1620  break;
1621  }
1622  case kPOINT:
1623  case kLINESTRING:
1624  case kPOLYGON:
1625  case kMULTIPOLYGON:
1626  geo_string_buffer_->resize(num_rows, val);
1627  break;
1628  default:
1629  CHECK(false) << "TypedImportBuffer::addDefaultValues() does not support type "
1630  << type;
1631  }
1632 }
#define NULL_DOUBLE
Definition: sqltypes.h:49
#define NULL_FLOAT
std::vector< std::string > * string_buffer_
Definition: Importer.h:530
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:532
ArrayDatum NullArray(const SQLTypeInfo &ti)
Definition: Importer.cpp:411
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:525
std::vector< float > * float_buffer_
Definition: Importer.h:528
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:329
std::vector< double > * double_buffer_
Definition: Importer.h:529
std::string to_string(char const *&&v)
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:208
std::vector< int32_t > * int_buffer_
Definition: Importer.h:526
CONSTEXPR DEVICE bool is_null(const T &value)
std::vector< OptionalStringVector > * string_array_buffer_
Definition: Importer.h:533
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:527
void parse_string_array(const std::string &s, const import_export::CopyParams &copy_params, std::vector< std::string > &string_vec, bool truncate_values)
Parses given string array and inserts into given vector of strings.
Datum StringToDatum(std::string_view s, SQLTypeInfo &ti)
Definition: Datum.cpp:275
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:523
std::optional< std::string > default_value
Definition: sqltypes.h:52
Definition: sqltypes.h:53
ArrayDatum StringToArray(const std::string &s, const SQLTypeInfo &ti, const CopyParams &copy_params)
Definition: Importer.cpp:356
int64_t convert_decimal_value_to_scale(const int64_t decimal_value, const SQLTypeInfo &type_info, const SQLTypeInfo &new_type_info)
Definition: Datum.cpp:556
Definition: sqltypes.h:41
#define IS_STRING(T)
Definition: sqltypes.h:250
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:524
#define CHECK(condition)
Definition: Logger.h:222
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
static constexpr size_t MAX_STRLEN
Definition: sqltypes.h:45
SQLTypeInfo columnType
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:336
std::string columnName
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:531

+ Here is the call graph for this function:

void import_export::TypedImportBuffer::addDictEncodedString ( const std::vector< std::string > &  string_vec)

Definition at line 527 of file Importer.cpp.

References CHECK, column_desc_, ColumnDescriptor::columnName, ColumnDescriptor::columnType, logger::ERROR, SQLTypeInfo::get_size(), getColumnDesc(), StringDictionary::getOrAddBulk(), LOG, StringDictionary::MAX_STRLEN, string_dict_, string_dict_i16_buffer_, string_dict_i32_buffer_, and string_dict_i8_buffer_.

527  {
529  std::vector<std::string_view> string_view_vec;
530  string_view_vec.reserve(string_vec.size());
531  for (const auto& str : string_vec) {
532  if (str.size() > StringDictionary::MAX_STRLEN) {
533  std::ostringstream oss;
534  oss << "while processing dictionary for column " << getColumnDesc()->columnName
535  << " a string was detected too long for encoding, string length = "
536  << str.size() << ", first 100 characters are '" << str.substr(0, 100) << "'";
537  throw std::runtime_error(oss.str());
538  }
539  string_view_vec.push_back(str);
540  }
541  try {
542  switch (column_desc_->columnType.get_size()) {
543  case 1:
544  string_dict_i8_buffer_->resize(string_view_vec.size());
545  string_dict_->getOrAddBulk(string_view_vec, string_dict_i8_buffer_->data());
546  break;
547  case 2:
548  string_dict_i16_buffer_->resize(string_view_vec.size());
549  string_dict_->getOrAddBulk(string_view_vec, string_dict_i16_buffer_->data());
550  break;
551  case 4:
552  string_dict_i32_buffer_->resize(string_view_vec.size());
553  string_dict_->getOrAddBulk(string_view_vec, string_dict_i32_buffer_->data());
554  break;
555  default:
556  CHECK(false);
557  }
558  } catch (std::exception& e) {
559  std::ostringstream oss;
560  oss << "while processing dictionary for column " << getColumnDesc()->columnName
561  << " : " << e.what();
562  LOG(ERROR) << oss.str();
563  throw std::runtime_error(oss.str());
564  }
565 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:339
#define LOG(tag)
Definition: Logger.h:216
StringDictionary * string_dict_
Definition: Importer.h:542
std::vector< uint8_t > * string_dict_i8_buffer_
Definition: Importer.h:536
void getOrAddBulk(const std::vector< String > &string_vec, T *encoded_vec)
std::vector< int32_t > * string_dict_i32_buffer_
Definition: Importer.h:538
const ColumnDescriptor * column_desc_
Definition: Importer.h:541
std::vector< uint16_t > * string_dict_i16_buffer_
Definition: Importer.h:537
#define CHECK(condition)
Definition: Logger.h:222
const ColumnDescriptor * getColumnDesc() const
Definition: Importer.h:315
static constexpr size_t MAX_STRLEN
SQLTypeInfo columnType
std::string columnName

+ Here is the call graph for this function:

void import_export::TypedImportBuffer::addDictEncodedStringArray ( const std::vector< OptionalStringVector > &  string_array_vec)
inline

Definition at line 265 of file Importer.h.

References anonymous_namespace{Utm.h}::a, CHECK, checked_malloc(), column_desc_, ColumnDescriptor::columnType, import_export::ImporterUtils::composeNullArray(), StringDictionary::getOrAddBulkArray(), StringDictionary::MAX_STRLEN, string_array_dict_buffer_, and string_dict_.

266  {
268 
269  // first check data is ok
270  for (auto& p : string_array_vec) {
271  if (!p) {
272  continue;
273  }
274  for (const auto& str : *p) {
275  if (str.size() > StringDictionary::MAX_STRLEN) {
276  throw std::runtime_error("String too long for dictionary encoding.");
277  }
278  }
279  }
280 
281  // to avoid copying, create a string view of each string in the
282  // `string_array_vec` where the array holding the string is *not null*
283  std::vector<std::vector<std::string_view>> string_view_array_vec;
284  for (auto& p : string_array_vec) {
285  if (!p) {
286  continue;
287  }
288  auto& array = string_view_array_vec.emplace_back();
289  for (const auto& str : *p) {
290  array.emplace_back(str);
291  }
292  }
293 
294  std::vector<std::vector<int32_t>> ids_array(0);
295  string_dict_->getOrAddBulkArray(string_view_array_vec, ids_array);
296 
297  size_t i, j;
298  for (i = 0, j = 0; i < string_array_vec.size(); ++i) {
299  if (!string_array_vec[i]) { // null array
300  string_array_dict_buffer_->push_back(
302  } else { // non-null array
303  auto& p = ids_array[j++];
304  size_t len = p.size() * sizeof(int32_t);
305  auto a = static_cast<int32_t*>(checked_malloc(len));
306  memcpy(a, &p[0], len);
307  string_array_dict_buffer_->push_back(
308  ArrayDatum(len, reinterpret_cast<int8_t*>(a), false));
309  }
310  }
311  }
StringDictionary * string_dict_
Definition: Importer.h:542
static ArrayDatum composeNullArray(const SQLTypeInfo &ti)
Definition: Importer.cpp:435
constexpr double a
Definition: Utm.h:32
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:208
std::vector< ArrayDatum > * string_array_dict_buffer_
Definition: Importer.h:539
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:45
void getOrAddBulkArray(const std::vector< std::vector< String >> &string_array_vec, std::vector< std::vector< int32_t >> &ids_array_vec)
const ColumnDescriptor * column_desc_
Definition: Importer.h:541
#define CHECK(condition)
Definition: Logger.h:222
static constexpr size_t MAX_STRLEN
SQLTypeInfo columnType

+ Here is the call graph for this function:

void import_export::TypedImportBuffer::addDouble ( const double  v)
inline

Definition at line 246 of file Importer.h.

References double_buffer_.

Referenced by add_value().

246 { double_buffer_->push_back(v); }
std::vector< double > * double_buffer_
Definition: Importer.h:529

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addFloat ( const float  v)
inline

Definition at line 244 of file Importer.h.

References float_buffer_.

Referenced by add_value().

244 { float_buffer_->push_back(v); }
std::vector< float > * float_buffer_
Definition: Importer.h:528

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addGeoString ( const std::string_view  v)
inline

Definition at line 250 of file Importer.h.

References geo_string_buffer_.

Referenced by add_value().

250 { geo_string_buffer_->emplace_back(v); }
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:531

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addInt ( const int32_t  v)
inline

Definition at line 240 of file Importer.h.

References int_buffer_.

Referenced by add_value().

240 { int_buffer_->push_back(v); }
std::vector< int32_t > * int_buffer_
Definition: Importer.h:526

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addSmallint ( const int16_t  v)
inline

Definition at line 238 of file Importer.h.

References smallint_buffer_.

Referenced by add_value().

238 { smallint_buffer_->push_back(v); }
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:525

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addString ( const std::string_view  v)
inline

Definition at line 248 of file Importer.h.

References string_buffer_.

Referenced by add_value().

248 { string_buffer_->emplace_back(v); }
std::vector< std::string > * string_buffer_
Definition: Importer.h:530

+ Here is the caller graph for this function:

OptionalStringVector& import_export::TypedImportBuffer::addStringArray ( )
inline

Definition at line 254 of file Importer.h.

References string_array_buffer_.

Referenced by add_value(), and add_values().

254  {
255  string_array_buffer_->emplace_back(std::vector<std::string>{});
256  return string_array_buffer_->back();
257  }
std::vector< OptionalStringVector > * string_array_buffer_
Definition: Importer.h:533

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::addStringArray ( const OptionalStringVector arr)
inline

Definition at line 259 of file Importer.h.

References string_array_buffer_.

259  {
260  string_array_buffer_->push_back(arr);
261  }
std::vector< OptionalStringVector > * string_array_buffer_
Definition: Importer.h:533
void import_export::TypedImportBuffer::addTinyint ( const int8_t  v)
inline

Definition at line 236 of file Importer.h.

References tinyint_buffer_.

Referenced by add_value().

236 { tinyint_buffer_->push_back(v); }
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:524

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::clear ( )
inline

Definition at line 407 of file Importer.h.

References array_buffer_, bigint_buffer_, bool_buffer_, CHECK, column_desc_, ColumnDescriptor::columnType, double_buffer_, float_buffer_, geo_string_buffer_, SQLTypeInfo::get_compression(), SQLTypeInfo::get_size(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), int_buffer_, IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kENCODING_DICT, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kNUMERIC, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, smallint_buffer_, string_array_buffer_, string_array_dict_buffer_, string_buffer_, string_dict_i16_buffer_, string_dict_i32_buffer_, string_dict_i8_buffer_, and tinyint_buffer_.

407  {
408  switch (column_desc_->columnType.get_type()) {
409  case kBOOLEAN: {
410  bool_buffer_->clear();
411  break;
412  }
413  case kTINYINT: {
414  tinyint_buffer_->clear();
415  break;
416  }
417  case kSMALLINT: {
418  smallint_buffer_->clear();
419  break;
420  }
421  case kINT: {
422  int_buffer_->clear();
423  break;
424  }
425  case kBIGINT:
426  case kNUMERIC:
427  case kDECIMAL: {
428  bigint_buffer_->clear();
429  break;
430  }
431  case kFLOAT: {
432  float_buffer_->clear();
433  break;
434  }
435  case kDOUBLE: {
436  double_buffer_->clear();
437  break;
438  }
439  case kTEXT:
440  case kVARCHAR:
441  case kCHAR: {
442  string_buffer_->clear();
444  switch (column_desc_->columnType.get_size()) {
445  case 1:
446  string_dict_i8_buffer_->clear();
447  break;
448  case 2:
449  string_dict_i16_buffer_->clear();
450  break;
451  case 4:
452  string_dict_i32_buffer_->clear();
453  break;
454  default:
455  CHECK(false);
456  }
457  }
458  break;
459  }
460  case kDATE:
461  case kTIME:
462  case kTIMESTAMP:
463  bigint_buffer_->clear();
464  break;
465  case kARRAY: {
467  string_array_buffer_->clear();
468  string_array_dict_buffer_->clear();
469  } else {
470  array_buffer_->clear();
471  }
472  break;
473  }
474  case kPOINT:
475  case kLINESTRING:
476  case kPOLYGON:
477  case kMULTIPOLYGON:
478  geo_string_buffer_->clear();
479  break;
480  default:
481  CHECK(false);
482  }
483  }
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:330
HOST DEVICE int get_size() const
Definition: sqltypes.h:339
Definition: sqltypes.h:49
std::vector< std::string > * string_buffer_
Definition: Importer.h:530
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:532
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:525
std::vector< float > * float_buffer_
Definition: Importer.h:528
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:329
std::vector< double > * double_buffer_
Definition: Importer.h:529
std::vector< int32_t > * int_buffer_
Definition: Importer.h:526
std::vector< ArrayDatum > * string_array_dict_buffer_
Definition: Importer.h:539
std::vector< uint8_t > * string_dict_i8_buffer_
Definition: Importer.h:536
std::vector< OptionalStringVector > * string_array_buffer_
Definition: Importer.h:533
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:527
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:523
Definition: sqltypes.h:52
Definition: sqltypes.h:53
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:337
std::vector< int32_t > * string_dict_i32_buffer_
Definition: Importer.h:538
Definition: sqltypes.h:41
#define IS_STRING(T)
Definition: sqltypes.h:250
const ColumnDescriptor * column_desc_
Definition: Importer.h:541
std::vector< uint16_t > * string_dict_i16_buffer_
Definition: Importer.h:537
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:524
#define CHECK(condition)
Definition: Logger.h:222
Definition: sqltypes.h:45
SQLTypeInfo columnType
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:531

+ Here is the call graph for this function:

template<typename DATA_TYPE >
size_t import_export::TypedImportBuffer::convert_arrow_val_to_import_buffer ( const ColumnDescriptor cd,
const arrow::Array &  array,
std::vector< DATA_TYPE > &  buffer,
const ArraySliceRange slice_range,
BadRowsTracker *const  bad_rows_tracker 
)

Referenced by add_arrow_values().

+ Here is the caller graph for this function:

template<typename DATA_TYPE >
size_t import_export::TypedImportBuffer::convert_arrow_val_to_import_buffer ( const ColumnDescriptor cd,
const Array array,
std::vector< DATA_TYPE > &  buffer,
const ArraySliceRange slice_range,
import_export::BadRowsTracker *const  bad_rows_tracker 
)

Definition at line 823 of file Importer.cpp.

References col_idx, anonymous_namespace{ArrowImporter.h}::error_context(), geo_string_buffer_, SQLTypeInfo::get_type(), import_export::Importer::getCatalog(), Geospatial::GeoTypesFactory::getGeoColumns(), Geospatial::GeoTypesFactory::getNullGeoColumns(), import_buffers, import_export::BadRowsTracker::importer, import_export::BadRowsTracker::mutex, import_export::BadRowsTracker::rows, import_export::Importer::set_geo_physical_import_buffer(), and anonymous_namespace{ArrowImporter.h}::value_getter().

828  {
829  auto data =
830  std::make_unique<DataBuffer<DATA_TYPE>>(cd, array, buffer, bad_rows_tracker);
831  auto f_value_getter = value_getter(array, cd, bad_rows_tracker);
832  std::function<void(const int64_t)> f_add_geo_phy_cols = [&](const int64_t row) {};
833  if (bad_rows_tracker && cd->columnType.is_geometry()) {
834  f_add_geo_phy_cols = [&](const int64_t row) {
835  // Populate physical columns (ref. DBHandler::load_table)
836  std::vector<double> coords, bounds;
837  std::vector<int> ring_sizes, poly_rings;
838  int render_group = 0;
839  SQLTypeInfo ti;
840  // replace any unexpected exception from getGeoColumns or other
841  // on this path with a GeoImportException so that we wont over
842  // push a null to the logical column...
843  try {
844  SQLTypeInfo import_ti{ti};
845  if (array.IsNull(row)) {
847  import_ti, coords, bounds, ring_sizes, poly_rings, false);
848  } else {
849  arrow_throw_if<GeoImportException>(
851  ti,
852  coords,
853  bounds,
854  ring_sizes,
855  poly_rings,
856  false),
857  error_context(cd, bad_rows_tracker) + "Invalid geometry");
858  arrow_throw_if<GeoImportException>(
859  cd->columnType.get_type() != ti.get_type(),
860  error_context(cd, bad_rows_tracker) + "Geometry type mismatch");
861  }
862  auto col_idx_workpad = col_idx; // what a pitfall!!
864  bad_rows_tracker->importer->getCatalog(),
865  cd,
867  col_idx_workpad,
868  coords,
869  bounds,
870  ring_sizes,
871  poly_rings,
872  render_group);
873  } catch (GeoImportException&) {
874  throw;
875  } catch (std::runtime_error& e) {
876  throw GeoImportException(e.what());
877  } catch (const std::exception& e) {
878  throw GeoImportException(e.what());
879  } catch (...) {
880  throw GeoImportException("unknown exception");
881  }
882  };
883  }
884  auto f_mark_a_bad_row = [&](const auto row) {
885  std::unique_lock<std::mutex> lck(bad_rows_tracker->mutex);
886  bad_rows_tracker->rows.insert(row - slice_range.first);
887  };
888  buffer.reserve(slice_range.second - slice_range.first);
889  for (size_t row = slice_range.first; row < slice_range.second; ++row) {
890  try {
891  *data << (array.IsNull(row) ? nullptr : f_value_getter(array, row));
892  f_add_geo_phy_cols(row);
893  } catch (GeoImportException&) {
894  f_mark_a_bad_row(row);
895  } catch (ArrowImporterException&) {
896  // trace bad rows of each column; otherwise rethrow.
897  if (bad_rows_tracker) {
898  *data << nullptr;
899  f_mark_a_bad_row(row);
900  } else {
901  throw;
902  }
903  }
904  }
905  return buffer.size();
906 }
auto value_getter(const arrow::Array &array, const ColumnDescriptor *cd, import_export::BadRowsTracker *const bad_rows_tracker)
static void getNullGeoColumns(SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: Types.cpp:1146
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:329
Catalog_Namespace::Catalog & getCatalog()
Definition: Importer.h:835
std::set< int64_t > rows
Definition: Importer.h:79
std::vector< std::unique_ptr< TypedImportBuffer > > * import_buffers
Definition: Importer.h:518
static bool getGeoColumns(const std::string &wkt_or_wkb_hex, SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool promote_poly_to_mpoly=false)
Definition: Types.cpp:937
static void set_geo_physical_import_buffer(const Catalog_Namespace::Catalog &catalog, const ColumnDescriptor *cd, std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers, size_t &col_idx, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, int render_group, const bool force_null=false)
Definition: Importer.cpp:1654
std::string error_context(const ColumnDescriptor *cd, import_export::BadRowsTracker *const bad_rows_tracker)
Definition: ArrowImporter.h:77
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:531

+ Here is the call graph for this function:

template<typename DATA_TYPE >
auto import_export::TypedImportBuffer::del_values ( std::vector< DATA_TYPE > &  buffer,
BadRowsTracker *const  bad_rows_tracker 
)
auto import_export::TypedImportBuffer::del_values ( const SQLTypes  type,
BadRowsTracker *const  bad_rows_tracker 
)
std::vector< DataBlockPtr > import_export::TypedImportBuffer::get_data_block_pointers ( const std::vector< std::unique_ptr< TypedImportBuffer >> &  import_buffers)
static

Definition at line 3035 of file Importer.cpp.

References DataBlockPtr::arraysPtr, threading_serial::async(), CHECK, CHECK_EQ, getStringArrayBuffer(), getTypeInfo(), import_buffers, SQLTypeInfo::is_number(), IS_STRING, SQLTypeInfo::is_string(), kARRAY, kBOOLEAN, kENCODING_DICT, kENCODING_NONE, DataBlockPtr::numbersPtr, run_benchmark_import::result, and DataBlockPtr::stringsPtr.

Referenced by import_export::fill_missing_columns(), import_export::Loader::loadImpl(), and import_export::Loader::loadToShard().

3036  {
3037  std::vector<DataBlockPtr> result(import_buffers.size());
3038  std::vector<std::pair<const size_t, std::future<int8_t*>>>
3039  encoded_data_block_ptrs_futures;
3040  // make all async calls to string dictionary here and then continue execution
3041  for (size_t buf_idx = 0; buf_idx < import_buffers.size(); buf_idx++) {
3042  if (import_buffers[buf_idx]->getTypeInfo().is_string() &&
3043  import_buffers[buf_idx]->getTypeInfo().get_compression() != kENCODING_NONE) {
3044  auto string_payload_ptr = import_buffers[buf_idx]->getStringBuffer();
3045  CHECK_EQ(kENCODING_DICT, import_buffers[buf_idx]->getTypeInfo().get_compression());
3046 
3047  encoded_data_block_ptrs_futures.emplace_back(std::make_pair(
3048  buf_idx,
3049  std::async(std::launch::async, [buf_idx, &import_buffers, string_payload_ptr] {
3050  import_buffers[buf_idx]->addDictEncodedString(*string_payload_ptr);
3051  return import_buffers[buf_idx]->getStringDictBuffer();
3052  })));
3053  }
3054  }
3055 
3056  for (size_t buf_idx = 0; buf_idx < import_buffers.size(); buf_idx++) {
3057  DataBlockPtr p;
3058  if (import_buffers[buf_idx]->getTypeInfo().is_number() ||
3059  import_buffers[buf_idx]->getTypeInfo().is_time() ||
3060  import_buffers[buf_idx]->getTypeInfo().get_type() == kBOOLEAN) {
3061  p.numbersPtr = import_buffers[buf_idx]->getAsBytes();
3062  } else if (import_buffers[buf_idx]->getTypeInfo().is_string()) {
3063  auto string_payload_ptr = import_buffers[buf_idx]->getStringBuffer();
3064  if (import_buffers[buf_idx]->getTypeInfo().get_compression() == kENCODING_NONE) {
3065  p.stringsPtr = string_payload_ptr;
3066  } else {
3067  // This condition means we have column which is ENCODED string. We already made
3068  // Async request to gain the encoded integer values above so we should skip this
3069  // iteration and continue.
3070  continue;
3071  }
3072  } else if (import_buffers[buf_idx]->getTypeInfo().is_geometry()) {
3073  auto geo_payload_ptr = import_buffers[buf_idx]->getGeoStringBuffer();
3074  p.stringsPtr = geo_payload_ptr;
3075  } else {
3076  CHECK(import_buffers[buf_idx]->getTypeInfo().get_type() == kARRAY);
3077  if (IS_STRING(import_buffers[buf_idx]->getTypeInfo().get_subtype())) {
3078  CHECK(import_buffers[buf_idx]->getTypeInfo().get_compression() == kENCODING_DICT);
3079  import_buffers[buf_idx]->addDictEncodedStringArray(
3080  *import_buffers[buf_idx]->getStringArrayBuffer());
3081  p.arraysPtr = import_buffers[buf_idx]->getStringArrayDictBuffer();
3082  } else {
3083  p.arraysPtr = import_buffers[buf_idx]->getArrayBuffer();
3084  }
3085  }
3086  result[buf_idx] = p;
3087  }
3088 
3089  // wait for the async requests we made for string dictionary
3090  for (auto& encoded_ptr_future : encoded_data_block_ptrs_futures) {
3091  result[encoded_ptr_future.first].numbersPtr = encoded_ptr_future.second.get();
3092  }
3093  return result;
3094 }
#define CHECK_EQ(x, y)
Definition: Logger.h:230
const SQLTypeInfo & getTypeInfo() const
Definition: Importer.h:313
std::vector< std::string > * stringsPtr
Definition: sqltypes.h:227
std::vector< ArrayDatum > * arraysPtr
Definition: sqltypes.h:228
bool is_number() const
Definition: sqltypes.h:515
future< Result > async(Fn &&fn, Args &&...args)
std::vector< std::unique_ptr< TypedImportBuffer > > * import_buffers
Definition: Importer.h:518
#define IS_STRING(T)
Definition: sqltypes.h:250
#define CHECK(condition)
Definition: Logger.h:222
std::vector< OptionalStringVector > * getStringArrayBuffer() const
Definition: Importer.h:379
bool is_string() const
Definition: sqltypes.h:510
int8_t * numbersPtr
Definition: sqltypes.h:226

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector<ArrayDatum>* import_export::TypedImportBuffer::getArrayBuffer ( ) const
inline

Definition at line 377 of file Importer.h.

References array_buffer_.

377 { return array_buffer_; }
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:532
int8_t* import_export::TypedImportBuffer::getAsBytes ( ) const
inline

Definition at line 319 of file Importer.h.

References bigint_buffer_, bool_buffer_, column_desc_, ColumnDescriptor::columnType, double_buffer_, float_buffer_, SQLTypeInfo::get_type(), int_buffer_, kBIGINT, kBOOLEAN, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kNUMERIC, kSMALLINT, kTIME, kTIMESTAMP, kTINYINT, smallint_buffer_, and tinyint_buffer_.

Referenced by import_export::anonymous_namespace{Importer.cpp}::double_value_at(), import_export::anonymous_namespace{Importer.cpp}::float_value_at(), and import_export::anonymous_namespace{Importer.cpp}::int_value_at().

319  {
320  switch (column_desc_->columnType.get_type()) {
321  case kBOOLEAN:
322  return reinterpret_cast<int8_t*>(bool_buffer_->data());
323  case kTINYINT:
324  return reinterpret_cast<int8_t*>(tinyint_buffer_->data());
325  case kSMALLINT:
326  return reinterpret_cast<int8_t*>(smallint_buffer_->data());
327  case kINT:
328  return reinterpret_cast<int8_t*>(int_buffer_->data());
329  case kBIGINT:
330  case kNUMERIC:
331  case kDECIMAL:
332  return reinterpret_cast<int8_t*>(bigint_buffer_->data());
333  case kFLOAT:
334  return reinterpret_cast<int8_t*>(float_buffer_->data());
335  case kDOUBLE:
336  return reinterpret_cast<int8_t*>(double_buffer_->data());
337  case kDATE:
338  case kTIME:
339  case kTIMESTAMP:
340  return reinterpret_cast<int8_t*>(bigint_buffer_->data());
341  default:
342  abort();
343  }
344  }
Definition: sqltypes.h:49
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:525
std::vector< float > * float_buffer_
Definition: Importer.h:528
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:329
std::vector< double > * double_buffer_
Definition: Importer.h:529
std::vector< int32_t > * int_buffer_
Definition: Importer.h:526
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:527
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:523
Definition: sqltypes.h:53
const ColumnDescriptor * column_desc_
Definition: Importer.h:541
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:524
Definition: sqltypes.h:45
SQLTypeInfo columnType

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const ColumnDescriptor* import_export::TypedImportBuffer::getColumnDesc ( ) const
inline

Definition at line 315 of file Importer.h.

References column_desc_.

Referenced by addDictEncodedString(), foreign_storage::anonymous_namespace{InternalStorageStatsDataWrapper.cpp}::set_null(), foreign_storage::anonymous_namespace{InternalCatalogDataWrapper.cpp}::set_null(), and foreign_storage::anonymous_namespace{InternalMemoryStatsDataWrapper.cpp}::set_null().

315 { return column_desc_; }
const ColumnDescriptor * column_desc_
Definition: Importer.h:541

+ Here is the caller graph for this function:

size_t import_export::TypedImportBuffer::getElementSize ( ) const
inline

Definition at line 346 of file Importer.h.

References column_desc_, ColumnDescriptor::columnType, SQLTypeInfo::get_type(), kBIGINT, kBOOLEAN, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kNUMERIC, kSMALLINT, kTIME, kTIMESTAMP, and kTINYINT.

346  {
347  switch (column_desc_->columnType.get_type()) {
348  case kBOOLEAN:
349  return sizeof((*bool_buffer_)[0]);
350  case kTINYINT:
351  return sizeof((*tinyint_buffer_)[0]);
352  case kSMALLINT:
353  return sizeof((*smallint_buffer_)[0]);
354  case kINT:
355  return sizeof((*int_buffer_)[0]);
356  case kBIGINT:
357  case kNUMERIC:
358  case kDECIMAL:
359  return sizeof((*bigint_buffer_)[0]);
360  case kFLOAT:
361  return sizeof((*float_buffer_)[0]);
362  case kDOUBLE:
363  return sizeof((*double_buffer_)[0]);
364  case kDATE:
365  case kTIME:
366  case kTIMESTAMP:
367  return sizeof((*bigint_buffer_)[0]);
368  default:
369  abort();
370  }
371  }
Definition: sqltypes.h:49
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:329
Definition: sqltypes.h:53
const ColumnDescriptor * column_desc_
Definition: Importer.h:541
Definition: sqltypes.h:45
SQLTypeInfo columnType

+ Here is the call graph for this function:

std::vector<std::string>* import_export::TypedImportBuffer::getGeoStringBuffer ( ) const
inline

Definition at line 375 of file Importer.h.

References geo_string_buffer_.

375 { return geo_string_buffer_; }
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:531
std::vector<OptionalStringVector>* import_export::TypedImportBuffer::getStringArrayBuffer ( ) const
inline

Definition at line 379 of file Importer.h.

References string_array_buffer_.

Referenced by get_data_block_pointers().

379  {
380  return string_array_buffer_;
381  }
std::vector< OptionalStringVector > * string_array_buffer_
Definition: Importer.h:533

+ Here is the caller graph for this function:

std::vector<ArrayDatum>* import_export::TypedImportBuffer::getStringArrayDictBuffer ( ) const
inline

Definition at line 383 of file Importer.h.

References string_array_dict_buffer_.

383  {
385  }
std::vector< ArrayDatum > * string_array_dict_buffer_
Definition: Importer.h:539
std::vector<std::string>* import_export::TypedImportBuffer::getStringBuffer ( ) const
inline

Definition at line 373 of file Importer.h.

References string_buffer_.

373 { return string_buffer_; }
std::vector< std::string > * string_buffer_
Definition: Importer.h:530
int8_t* import_export::TypedImportBuffer::getStringDictBuffer ( ) const
inline

Definition at line 387 of file Importer.h.

References column_desc_, ColumnDescriptor::columnType, SQLTypeInfo::get_size(), string_dict_i16_buffer_, string_dict_i32_buffer_, and string_dict_i8_buffer_.

Referenced by import_export::anonymous_namespace{Importer.cpp}::int_value_at().

387  {
388  switch (column_desc_->columnType.get_size()) {
389  case 1:
390  return reinterpret_cast<int8_t*>(string_dict_i8_buffer_->data());
391  case 2:
392  return reinterpret_cast<int8_t*>(string_dict_i16_buffer_->data());
393  case 4:
394  return reinterpret_cast<int8_t*>(string_dict_i32_buffer_->data());
395  default:
396  abort();
397  }
398  }
HOST DEVICE int get_size() const
Definition: sqltypes.h:339
std::vector< uint8_t > * string_dict_i8_buffer_
Definition: Importer.h:536
std::vector< int32_t > * string_dict_i32_buffer_
Definition: Importer.h:538
const ColumnDescriptor * column_desc_
Definition: Importer.h:541
std::vector< uint16_t > * string_dict_i16_buffer_
Definition: Importer.h:537
SQLTypeInfo columnType

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

StringDictionary* import_export::TypedImportBuffer::getStringDictionary ( ) const
inline

Definition at line 317 of file Importer.h.

References string_dict_.

317 { return string_dict_; }
StringDictionary * string_dict_
Definition: Importer.h:542
const SQLTypeInfo& import_export::TypedImportBuffer::getTypeInfo ( ) const
inline

Definition at line 313 of file Importer.h.

References column_desc_, and ColumnDescriptor::columnType.

Referenced by import_export::anonymous_namespace{Importer.cpp}::double_value_at(), import_export::anonymous_namespace{Importer.cpp}::float_value_at(), get_data_block_pointers(), and import_export::anonymous_namespace{Importer.cpp}::int_value_at().

313 { return column_desc_->columnType; }
const ColumnDescriptor * column_desc_
Definition: Importer.h:541
SQLTypeInfo columnType

+ Here is the caller graph for this function:

void import_export::TypedImportBuffer::pop_value ( )

Definition at line 763 of file Importer.cpp.

References array_buffer_, bigint_buffer_, bool_buffer_, CHECK, column_desc_, ColumnDescriptor::columnType, decimal_to_int_type(), double_buffer_, float_buffer_, geo_string_buffer_, SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), int_buffer_, SQLTypeInfo::is_decimal(), IS_STRING, kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDOUBLE, kFLOAT, kINT, kLINESTRING, kMULTIPOLYGON, kPOINT, kPOLYGON, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, smallint_buffer_, string_array_buffer_, string_buffer_, tinyint_buffer_, and run_benchmark_import::type.

763  {
764  const auto type = column_desc_->columnType.is_decimal()
766  : column_desc_->columnType.get_type();
767  switch (type) {
768  case kBOOLEAN:
769  bool_buffer_->pop_back();
770  break;
771  case kTINYINT:
772  tinyint_buffer_->pop_back();
773  break;
774  case kSMALLINT:
775  smallint_buffer_->pop_back();
776  break;
777  case kINT:
778  int_buffer_->pop_back();
779  break;
780  case kBIGINT:
781  bigint_buffer_->pop_back();
782  break;
783  case kFLOAT:
784  float_buffer_->pop_back();
785  break;
786  case kDOUBLE:
787  double_buffer_->pop_back();
788  break;
789  case kTEXT:
790  case kVARCHAR:
791  case kCHAR:
792  string_buffer_->pop_back();
793  break;
794  case kDATE:
795  case kTIME:
796  case kTIMESTAMP:
797  bigint_buffer_->pop_back();
798  break;
799  case kARRAY:
801  string_array_buffer_->pop_back();
802  } else {
803  array_buffer_->pop_back();
804  }
805  break;
806  case kPOINT:
807  case kLINESTRING:
808  case kPOLYGON:
809  case kMULTIPOLYGON:
810  geo_string_buffer_->pop_back();
811  break;
812  default:
813  CHECK(false) << "TypedImportBuffer::pop_value() does not support type " << type;
814  }
815 }
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:330
Definition: sqltypes.h:49
std::vector< std::string > * string_buffer_
Definition: Importer.h:530
std::vector< ArrayDatum > * array_buffer_
Definition: Importer.h:532
std::vector< int16_t > * smallint_buffer_
Definition: Importer.h:525
std::vector< float > * float_buffer_
Definition: Importer.h:528
std::vector< double > * double_buffer_
Definition: Importer.h:529
std::vector< int32_t > * int_buffer_
Definition: Importer.h:526
std::vector< OptionalStringVector > * string_array_buffer_
Definition: Importer.h:533
std::vector< int64_t > * bigint_buffer_
Definition: Importer.h:527
std::vector< int8_t > * bool_buffer_
Definition: Importer.h:523
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:493
Definition: sqltypes.h:52
Definition: sqltypes.h:53
Definition: sqltypes.h:41
#define IS_STRING(T)
Definition: sqltypes.h:250
const ColumnDescriptor * column_desc_
Definition: Importer.h:541
std::vector< int8_t > * tinyint_buffer_
Definition: Importer.h:524
#define CHECK(condition)
Definition: Logger.h:222
Definition: sqltypes.h:45
SQLTypeInfo columnType
bool is_decimal() const
Definition: sqltypes.h:513
std::vector< std::string > * geo_string_buffer_
Definition: Importer.h:531

+ Here is the call graph for this function:

bool import_export::TypedImportBuffer::stringDictCheckpoint ( )
inline

Definition at line 400 of file Importer.h.

References StringDictionary::checkpoint(), and string_dict_.

400  {
401  if (string_dict_ == nullptr) {
402  return true;
403  }
404  return string_dict_->checkpoint();
405  }
StringDictionary * string_dict_
Definition: Importer.h:542
bool checkpoint() noexcept

+ Here is the call graph for this function:

Member Data Documentation

union { ... }
union { ... }
std::vector<ArrayDatum>* import_export::TypedImportBuffer::array_buffer_
std::vector<int64_t>* import_export::TypedImportBuffer::bigint_buffer_
std::vector<int8_t>* import_export::TypedImportBuffer::bool_buffer_
size_t import_export::TypedImportBuffer::col_idx

Definition at line 519 of file Importer.h.

Referenced by convert_arrow_val_to_import_buffer().

const ColumnDescriptor* import_export::TypedImportBuffer::column_desc_
private
std::vector<double>* import_export::TypedImportBuffer::double_buffer_
std::vector<float>* import_export::TypedImportBuffer::float_buffer_
std::vector<std::string>* import_export::TypedImportBuffer::geo_string_buffer_
std::vector<std::unique_ptr<TypedImportBuffer> >* import_export::TypedImportBuffer::import_buffers

Definition at line 518 of file Importer.h.

Referenced by convert_arrow_val_to_import_buffer(), and get_data_block_pointers().

std::vector<int32_t>* import_export::TypedImportBuffer::int_buffer_
std::vector<int16_t>* import_export::TypedImportBuffer::smallint_buffer_
std::vector<OptionalStringVector>* import_export::TypedImportBuffer::string_array_buffer_
std::vector<ArrayDatum>* import_export::TypedImportBuffer::string_array_dict_buffer_
std::vector<std::string>* import_export::TypedImportBuffer::string_buffer_
StringDictionary* import_export::TypedImportBuffer::string_dict_
private
std::vector<uint16_t>* import_export::TypedImportBuffer::string_dict_i16_buffer_
std::vector<int32_t>* import_export::TypedImportBuffer::string_dict_i32_buffer_
std::vector<uint8_t>* import_export::TypedImportBuffer::string_dict_i8_buffer_
std::vector<int8_t>* import_export::TypedImportBuffer::tinyint_buffer_

The documentation for this class was generated from the following files: