OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
foreign_storage::TypedParquetInPlaceEncoder< V, T, NullType > Class Template Reference

#include <ParquetInPlaceEncoder.h>

+ Inheritance diagram for foreign_storage::TypedParquetInPlaceEncoder< V, T, NullType >:
+ Collaboration diagram for foreign_storage::TypedParquetInPlaceEncoder< V, T, NullType >:

Public Member Functions

 TypedParquetInPlaceEncoder (Data_Namespace::AbstractBuffer *buffer, const ColumnDescriptor *column_desciptor, const parquet::ColumnDescriptor *parquet_column_descriptor)
 
 TypedParquetInPlaceEncoder (Data_Namespace::AbstractBuffer *buffer, const size_t omnisci_data_type_byte_size, const size_t parquet_data_type_byte_size)
 
void validate (const int8_t *parquet_data, const int64_t j, const SQLTypeInfo &column_type) const override
 
std::string integralTypeToString (const V &element) const
 
bool isIntegralType (const SQLTypeInfo &type) const
 
std::string elementToString (const V &element) const
 
std::string encodedDataToString (const int8_t *bytes) const override
 
void setDetectBufferConverterType ()
 
void validateUsingEncodersColumnType (const int8_t *parquet_data, const int64_t j) const override
 
void reserve (const size_t num_append_elements) override
 
void appendDataTrackErrors (const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values) override
 
void validateAndAppendData (const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values, const SQLTypeInfo &column_type, InvalidRowGroupIndices &invalid_indices) override
 
void eraseInvalidIndicesInBuffer (const InvalidRowGroupIndices &invalid_indices) override
 
void appendData (const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values) override
 
void encodeAndCopyContiguous (const int8_t *parquet_data_bytes, int8_t *omnisci_data_bytes, const size_t num_elements) override
 
void setNull (int8_t *omnisci_data_bytes) override
 
void copy (const int8_t *omnisci_data_bytes_source, int8_t *omnisci_data_bytes_destination) override
 
std::shared_ptr< ChunkMetadatagetRowGroupMetadata (const parquet::RowGroupMetaData *group_metadata, const int parquet_column_index, const SQLTypeInfo &column_type) override
 
- Public Member Functions inherited from foreign_storage::ParquetInPlaceEncoder
 ParquetInPlaceEncoder (Data_Namespace::AbstractBuffer *buffer, const size_t omnisci_data_type_byte_size, const size_t parquet_data_type_byte_size)
 
- Public Member Functions inherited from foreign_storage::ParquetScalarEncoder
 ParquetScalarEncoder (Data_Namespace::AbstractBuffer *buffer)
 
virtual void encodeAndCopy (const int8_t *parquet_data_bytes, int8_t *omnisci_data_bytes)=0
 
- Public Member Functions inherited from foreign_storage::ParquetEncoder
 ParquetEncoder (Data_Namespace::AbstractBuffer *buffer)
 
virtual ~ParquetEncoder ()=default
 
RejectedRowIndices getRejectedRowIndices () const
 
virtual void disableMetadataStatsValidation ()
 
virtual void initializeErrorTracking ()
 
virtual void initializeColumnType (const SQLTypeInfo &column_type)
 

Protected Member Functions

virtual bool encodingIsIdentityForSameTypes () const
 
std::pair< T, T > getUnencodedStats (std::shared_ptr< parquet::Statistics > stats) const
 

Private Member Functions

std::pair< V, V > getEncodedStats (const parquet::ColumnDescriptor *parquet_column_descriptor, std::shared_ptr< parquet::Statistics > stats)
 

Static Private Member Functions

static ChunkStats getUpdatedStats (V &stats_min, V &stats_max, const SQLTypeInfo &column_type)
 

Private Attributes

int64_t current_batch_offset_ = 0
 

Additional Inherited Members

- Static Protected Member Functions inherited from foreign_storage::ParquetEncoder
static std::shared_ptr
< ChunkMetadata
createMetadata (const SQLTypeInfo &column_type)
 
static void throwNotNullViolation (const std::string &parquet_column_name)
 
static void validateNullCount (const std::string &parquet_column_name, int64_t null_count, const SQLTypeInfo &column_type)
 
- Protected Attributes inherited from foreign_storage::ParquetInPlaceEncoder
const size_t omnisci_data_type_byte_size_
 
const size_t parquet_data_type_byte_size_
 
- Protected Attributes inherited from foreign_storage::ParquetEncoder
Data_Namespace::AbstractBufferbuffer_
 
bool is_error_tracking_enabled_
 
RejectedRowIndices invalid_indices_
 
size_t current_chunk_offset_
 
SQLTypeInfo column_type_
 
bool validate_metadata_stats_
 

Detailed Description

template<typename V, typename T, typename NullType = V>
class foreign_storage::TypedParquetInPlaceEncoder< V, T, NullType >

Definition at line 114 of file ParquetInPlaceEncoder.h.

Constructor & Destructor Documentation

template<typename V, typename T, typename NullType = V>
foreign_storage::TypedParquetInPlaceEncoder< V, T, NullType >::TypedParquetInPlaceEncoder ( Data_Namespace::AbstractBuffer buffer,
const ColumnDescriptor column_desciptor,
const parquet::ColumnDescriptor *  parquet_column_descriptor 
)
inline

Definition at line 116 of file ParquetInPlaceEncoder.h.

120  buffer,
121  sizeof(V),
122  parquet::GetTypeByteSize(parquet_column_descriptor->physical_type()))
123  , current_batch_offset_(0) {
124  if (auto detect_buffer = dynamic_cast<TypedParquetDetectBuffer*>(buffer_)) {
126  }
127  }
ParquetInPlaceEncoder(Data_Namespace::AbstractBuffer *buffer, const size_t omnisci_data_type_byte_size, const size_t parquet_data_type_byte_size)
Data_Namespace::AbstractBuffer * buffer_
template<typename V, typename T, typename NullType = V>
foreign_storage::TypedParquetInPlaceEncoder< V, T, NullType >::TypedParquetInPlaceEncoder ( Data_Namespace::AbstractBuffer buffer,
const size_t  omnisci_data_type_byte_size,
const size_t  parquet_data_type_byte_size 
)
inline

Definition at line 129 of file ParquetInPlaceEncoder.h.

132  : ParquetInPlaceEncoder(buffer, sizeof(V), parquet_data_type_byte_size)
133  , current_batch_offset_(0) {
134  if (auto detect_buffer = dynamic_cast<TypedParquetDetectBuffer*>(buffer_)) {
136  }
137  }
ParquetInPlaceEncoder(Data_Namespace::AbstractBuffer *buffer, const size_t omnisci_data_type_byte_size, const size_t parquet_data_type_byte_size)
Data_Namespace::AbstractBuffer * buffer_

Member Function Documentation

template<typename V, typename T, typename NullType = V>
void foreign_storage::TypedParquetInPlaceEncoder< V, T, NullType >::appendData ( const int16_t *  def_levels,
const int16_t *  rep_levels,
const int64_t  values_read,
const int64_t  levels_read,
int8_t *  values 
)
inlineoverridevirtual

This is a specialization of ParquetInPlaceEncoder::appendData for known types that allows for optimization.

See comment for ParquetInPlaceEncoder::appendData for details.

Reimplemented from foreign_storage::ParquetInPlaceEncoder.

Definition at line 264 of file ParquetInPlaceEncoder.h.

Referenced by foreign_storage::ParquetStringEncoder< V >::appendData(), foreign_storage::TypedParquetInPlaceEncoder< V, V >::appendDataTrackErrors(), and foreign_storage::TypedParquetInPlaceEncoder< V, V >::validateAndAppendData().

268  {
269  if (std::is_same<V, T>::value && values_read == levels_read) {
271  for (int64_t i = 0; i < levels_read; ++i) {
273  values + i * omnisci_data_type_byte_size_);
274  }
275  }
276  buffer_->append(values, levels_read * omnisci_data_type_byte_size_);
277  } else {
279  def_levels, rep_levels, values_read, levels_read, values);
280  }
281  }
virtual void encodeAndCopy(const int8_t *parquet_data_bytes, int8_t *omnisci_data_bytes)=0
virtual void append(int8_t *src, const size_t num_bytes, const MemoryLevel src_buffer_type=CPU_LEVEL, const int device_id=-1)=0
Data_Namespace::AbstractBuffer * buffer_
void appendData(const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values) override

+ Here is the caller graph for this function:

template<typename V, typename T, typename NullType = V>
void foreign_storage::TypedParquetInPlaceEncoder< V, T, NullType >::appendDataTrackErrors ( const int16_t *  def_levels,
const int16_t *  rep_levels,
const int64_t  values_read,
const int64_t  levels_read,
int8_t *  values 
)
inlineoverridevirtual

Implements foreign_storage::ParquetEncoder.

Definition at line 191 of file ParquetInPlaceEncoder.h.

195  {
197  int64_t i, j;
198  for (i = 0, j = 0; i < levels_read; ++i) {
199  if (def_levels[i]) {
200  try {
201  CHECK(j < values_read);
202  validateUsingEncodersColumnType(values, j++);
203  } catch (const std::runtime_error& error) {
205  }
206  } else if (column_type_.get_notnull()) { // item is null for NOT NULL column
208  }
209  }
210  current_chunk_offset_ += levels_read;
211  appendData(def_levels, rep_levels, values_read, levels_read, values);
212  }
RejectedRowIndices invalid_indices_
void validateUsingEncodersColumnType(const int8_t *parquet_data, const int64_t j) const override
void appendData(const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values) override
#define CHECK(condition)
Definition: Logger.h:291
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:398
template<typename V, typename T, typename NullType = V>
void foreign_storage::TypedParquetInPlaceEncoder< V, T, NullType >::copy ( const int8_t *  omnisci_data_bytes_source,
int8_t *  omnisci_data_bytes_destination 
)
inlineoverridevirtual

Implements foreign_storage::ParquetScalarEncoder.

Definition at line 299 of file ParquetInPlaceEncoder.h.

Referenced by foreign_storage::ParquetStringEncoder< V >::encodeAndCopy().

300  {
301  const auto& omnisci_data_value_source =
302  reinterpret_cast<const V*>(omnisci_data_bytes_source)[0];
303  auto& omnisci_data_value_destination =
304  reinterpret_cast<V*>(omnisci_data_bytes_destination)[0];
305  omnisci_data_value_destination = omnisci_data_value_source;
306  }

+ Here is the caller graph for this function:

template<typename V, typename T, typename NullType = V>
std::string foreign_storage::TypedParquetInPlaceEncoder< V, T, NullType >::elementToString ( const V &  element) const
inline

Definition at line 156 of file ParquetInPlaceEncoder.h.

Referenced by foreign_storage::TypedParquetInPlaceEncoder< V, V >::encodedDataToString(), and foreign_storage::TypedParquetInPlaceEncoder< V, V >::setDetectBufferConverterType().

156  {
157  // handle specialized cases that require specific formating when converting to string
158  auto null_value = get_null_value<NullType>();
159  if (element == null_value) {
160  return "NULL";
161  }
163  return integralTypeToString(element);
164  }
165  return std::to_string(element);
166  }
bool isIntegralType(const SQLTypeInfo &type) const
std::string to_string(char const *&&v)
std::string integralTypeToString(const V &element) const

+ Here is the caller graph for this function:

template<typename V, typename T, typename NullType = V>
void foreign_storage::TypedParquetInPlaceEncoder< V, T, NullType >::encodeAndCopyContiguous ( const int8_t *  parquet_data_bytes,
int8_t *  omnisci_data_bytes,
const size_t  num_elements 
)
inlineoverridevirtual

Implements foreign_storage::ParquetScalarEncoder.

Definition at line 283 of file ParquetInPlaceEncoder.h.

285  {
286  auto parquet_data_ptr = reinterpret_cast<const T*>(parquet_data_bytes);
287  auto omnisci_data_ptr = reinterpret_cast<V*>(omnisci_data_bytes);
288  for (size_t i = 0; i < num_elements; ++i) {
289  encodeAndCopy(reinterpret_cast<const int8_t*>(&parquet_data_ptr[i]),
290  reinterpret_cast<int8_t*>(&omnisci_data_ptr[i]));
291  }
292  }
virtual void encodeAndCopy(const int8_t *parquet_data_bytes, int8_t *omnisci_data_bytes)=0
template<typename V, typename T, typename NullType = V>
std::string foreign_storage::TypedParquetInPlaceEncoder< V, T, NullType >::encodedDataToString ( const int8_t *  bytes) const
inlineoverridevirtual

Implements foreign_storage::ParquetScalarEncoder.

Definition at line 168 of file ParquetInPlaceEncoder.h.

168  {
169  const auto& element = reinterpret_cast<const V*>(bytes)[0];
170  return elementToString(element);
171  }
std::string elementToString(const V &element) const
template<typename V, typename T, typename NullType = V>
virtual bool foreign_storage::TypedParquetInPlaceEncoder< V, T, NullType >::encodingIsIdentityForSameTypes ( ) const
inlineprotectedvirtual

Reimplemented in foreign_storage::ParquetStringEncoder< V >, foreign_storage::ParquetFixedLengthEncoder< V, T, NullType >, and foreign_storage::ParquetDecimalEncoder< V, T, NullType >.

Definition at line 362 of file ParquetInPlaceEncoder.h.

Referenced by foreign_storage::TypedParquetInPlaceEncoder< V, V >::appendData().

362 { return false; }

+ Here is the caller graph for this function:

template<typename V, typename T, typename NullType = V>
void foreign_storage::TypedParquetInPlaceEncoder< V, T, NullType >::eraseInvalidIndicesInBuffer ( const InvalidRowGroupIndices invalid_indices)
inlineoverridevirtual

Implements foreign_storage::ParquetImportEncoder.

Definition at line 239 of file ParquetInPlaceEncoder.h.

240  {
241  if (invalid_indices.empty()) {
242  return;
243  }
244  auto omnisci_data_values = reinterpret_cast<V*>(buffer_->getMemoryPtr());
246  size_t num_elements = buffer_->size() / omnisci_data_type_byte_size_;
247  std::remove_if(
248  omnisci_data_values, omnisci_data_values + num_elements, [&](const V& value) {
249  const V* start = omnisci_data_values;
250  auto index = std::distance(start, &value);
251  return invalid_indices.find(index) != invalid_indices.end();
252  });
253  size_t num_bytes_erased = invalid_indices.size() * omnisci_data_type_byte_size_;
254  CHECK(num_bytes_erased <= buffer_->size());
255  buffer_->setSize(buffer_->size() - num_bytes_erased);
256  }
virtual int8_t * getMemoryPtr()=0
void setSize(const size_t size)
#define CHECK(condition)
Definition: Logger.h:291
Data_Namespace::AbstractBuffer * buffer_
template<typename V, typename T, typename NullType = V>
std::pair<V, V> foreign_storage::TypedParquetInPlaceEncoder< V, T, NullType >::getEncodedStats ( const parquet::ColumnDescriptor *  parquet_column_descriptor,
std::shared_ptr< parquet::Statistics >  stats 
)
inlineprivate

Definition at line 394 of file ParquetInPlaceEncoder.h.

Referenced by foreign_storage::TypedParquetInPlaceEncoder< V, V >::getRowGroupMetadata().

396  {
397  V stats_min, stats_max;
398  auto min_string = stats->EncodeMin();
399  auto max_string = stats->EncodeMax();
400  if constexpr (std::is_same<T, parquet::FixedLenByteArray>::value) {
401  CHECK_EQ(parquet_column_descriptor->physical_type(),
402  parquet::Type::FIXED_LEN_BYTE_ARRAY);
403  parquet::FixedLenByteArray min_byte_array, max_byte_array;
404  min_byte_array.ptr = reinterpret_cast<const uint8_t*>(min_string.data());
405  max_byte_array.ptr = reinterpret_cast<const uint8_t*>(max_string.data());
406  encodeAndCopy(reinterpret_cast<int8_t*>(&min_byte_array),
407  reinterpret_cast<int8_t*>(&stats_min));
408  encodeAndCopy(reinterpret_cast<int8_t*>(&max_byte_array),
409  reinterpret_cast<int8_t*>(&stats_max));
410  } else if constexpr (std::is_same<T, parquet::ByteArray>::value) {
411  CHECK_EQ(parquet_column_descriptor->physical_type(), parquet::Type::BYTE_ARRAY);
412  parquet::ByteArray min_byte_array, max_byte_array;
413  min_byte_array.ptr = reinterpret_cast<const uint8_t*>(min_string.data());
414  min_byte_array.len = min_string.length();
415  max_byte_array.ptr = reinterpret_cast<const uint8_t*>(max_string.data());
416  max_byte_array.len = max_string.length();
417  encodeAndCopy(reinterpret_cast<int8_t*>(&min_byte_array),
418  reinterpret_cast<int8_t*>(&stats_min));
419  encodeAndCopy(reinterpret_cast<int8_t*>(&max_byte_array),
420  reinterpret_cast<int8_t*>(&stats_max));
421  } else {
422  encodeAndCopy(reinterpret_cast<int8_t*>(min_string.data()),
423  reinterpret_cast<int8_t*>(&stats_min));
424  encodeAndCopy(reinterpret_cast<int8_t*>(max_string.data()),
425  reinterpret_cast<int8_t*>(&stats_max));
426  }
427  return {stats_min, stats_max};
428  }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
virtual void encodeAndCopy(const int8_t *parquet_data_bytes, int8_t *omnisci_data_bytes)=0
dictionary stats
Definition: report.py:116

+ Here is the caller graph for this function:

template<typename V, typename T, typename NullType = V>
std::shared_ptr<ChunkMetadata> foreign_storage::TypedParquetInPlaceEncoder< V, T, NullType >::getRowGroupMetadata ( const parquet::RowGroupMetaData *  group_metadata,
const int  parquet_column_index,
const SQLTypeInfo column_type 
)
inlineoverridevirtual

Reimplemented from foreign_storage::ParquetEncoder.

Definition at line 308 of file ParquetInPlaceEncoder.h.

311  {
312  auto metadata = ParquetEncoder::createMetadata(column_type);
313  auto column_metadata = group_metadata->ColumnChunk(parquet_column_index);
314 
315  // update statistics
316  auto parquet_column_descriptor =
317  group_metadata->schema()->Column(parquet_column_index);
318 
319  if (ParquetEncoder::validate_metadata_stats_ && group_metadata->num_rows() > 0) {
320  auto stats = validate_and_get_column_metadata_statistics(column_metadata.get());
321  if (stats->HasMinMax()) {
322  // validate statistics if validation applicable as part of encoding
323  if (auto parquet_scalar_validator =
324  dynamic_cast<ParquetMetadataValidator*>(this)) {
325  try {
326  parquet_scalar_validator->validate(
327  stats,
328  column_type.is_array() ? column_type.get_elem_type() : column_type);
329  } catch (const std::exception& e) {
330  std::stringstream error_message;
331  error_message
332  << e.what() << " Error validating statistics of Parquet column '"
333  << group_metadata->schema()->Column(parquet_column_index)->name() << "'";
334  throw std::runtime_error(error_message.str());
335  }
336  }
337 
338  auto [stats_min, stats_max] = getEncodedStats(parquet_column_descriptor, stats);
339  auto updated_chunk_stats = getUpdatedStats(stats_min, stats_max, column_type);
340  metadata->fillChunkStats(updated_chunk_stats.min,
341  updated_chunk_stats.max,
342  metadata->chunkStats.has_nulls);
343  }
344  auto null_count = stats->null_count();
345  validateNullCount(group_metadata->schema()->Column(parquet_column_index)->name(),
346  null_count,
347  column_type);
348  metadata->chunkStats.has_nulls = null_count > 0;
349  }
350 
351  // update sizing
352  metadata->numBytes =
353  sizeof(NullType) // use NullType byte size since it is guaranteed to
354  // be the byte size of stored data
355  * column_metadata->num_values();
356  metadata->numElements = group_metadata->num_rows();
357 
358  return metadata;
359  }
std::shared_ptr< parquet::Statistics > validate_and_get_column_metadata_statistics(const parquet::ColumnChunkMetaData *column_metadata)
dictionary stats
Definition: report.py:116
static void validateNullCount(const std::string &parquet_column_name, int64_t null_count, const SQLTypeInfo &column_type)
static std::shared_ptr< ChunkMetadata > createMetadata(const SQLTypeInfo &column_type)
std::pair< V, V > getEncodedStats(const parquet::ColumnDescriptor *parquet_column_descriptor, std::shared_ptr< parquet::Statistics > stats)
static ChunkStats getUpdatedStats(V &stats_min, V &stats_max, const SQLTypeInfo &column_type)
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:975
bool is_array() const
Definition: sqltypes.h:583
template<typename V, typename T, typename NullType = V>
std::pair<T, T> foreign_storage::TypedParquetInPlaceEncoder< V, T, NullType >::getUnencodedStats ( std::shared_ptr< parquet::Statistics >  stats) const
inlineprotected

Definition at line 364 of file ParquetInPlaceEncoder.h.

Referenced by foreign_storage::ParquetDateInSecondsEncoder< NullType >::validate(), foreign_storage::ParquetTimestampEncoder< V, T, conversion_denominator *kSecsPerDay, NullType >::validate(), foreign_storage::ParquetUnsignedFixedLengthEncoder< V, T, U, NullType >::validate(), and foreign_storage::ParquetFixedLengthEncoder< V, T, NullType >::validateIntegralOrFloatingPointMetadata().

364  {
365  T stats_min = reinterpret_cast<T*>(stats->EncodeMin().data())[0];
366  T stats_max = reinterpret_cast<T*>(stats->EncodeMax().data())[0];
367  return {stats_min, stats_max};
368  }
dictionary stats
Definition: report.py:116

+ Here is the caller graph for this function:

template<typename V, typename T, typename NullType = V>
static ChunkStats foreign_storage::TypedParquetInPlaceEncoder< V, T, NullType >::getUpdatedStats ( V &  stats_min,
V &  stats_max,
const SQLTypeInfo column_type 
)
inlinestaticprivate

Definition at line 371 of file ParquetInPlaceEncoder.h.

Referenced by foreign_storage::TypedParquetInPlaceEncoder< V, V >::getRowGroupMetadata().

373  {
374  ForeignStorageBuffer buffer;
375  buffer.initEncoder(column_type);
376  auto encoder = buffer.getEncoder();
377 
378  if (column_type.is_array()) {
379  ArrayDatum min_datum(
380  sizeof(V), reinterpret_cast<int8_t*>(&stats_min), false, DoNothingDeleter());
381  ArrayDatum max_datum(
382  sizeof(V), reinterpret_cast<int8_t*>(&stats_max), false, DoNothingDeleter());
383  std::vector<ArrayDatum> min_max_datums{min_datum, max_datum};
384  encoder->updateStats(&min_max_datums, 0, 1);
385  } else {
386  encoder->updateStats(reinterpret_cast<int8_t*>(&stats_min), 1);
387  encoder->updateStats(reinterpret_cast<int8_t*>(&stats_max), 1);
388  }
389  auto updated_chunk_stats_metadata = std::make_shared<ChunkMetadata>();
390  encoder->getMetadata(updated_chunk_stats_metadata);
391  return updated_chunk_stats_metadata->chunkStats;
392  }
void initEncoder(const SQLTypeInfo &tmp_sql_type)
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:229
bool is_array() const
Definition: sqltypes.h:583

+ Here is the caller graph for this function:

template<typename V, typename T, typename NullType = V>
std::string foreign_storage::TypedParquetInPlaceEncoder< V, T, NullType >::integralTypeToString ( const V &  element) const
inline

Definition at line 145 of file ParquetInPlaceEncoder.h.

Referenced by foreign_storage::TypedParquetInPlaceEncoder< V, V >::elementToString().

145  {
146  Datum d;
147  d.bigintval = element;
149  }
std::string DatumToString(Datum d, const SQLTypeInfo &ti)
Definition: Datum.cpp:460
int64_t bigintval
Definition: Datum.h:74
Definition: Datum.h:69

+ Here is the caller graph for this function:

template<typename V, typename T, typename NullType = V>
bool foreign_storage::TypedParquetInPlaceEncoder< V, T, NullType >::isIntegralType ( const SQLTypeInfo type) const
inline

Definition at line 151 of file ParquetInPlaceEncoder.h.

Referenced by foreign_storage::TypedParquetInPlaceEncoder< V, V >::elementToString().

151  {
152  return type.is_timestamp() || type.is_time() || type.is_date() || type.is_boolean() ||
153  type.is_decimal() || type.is_integer();
154  }
bool is_timestamp() const
Definition: sqltypes.h:1044
bool is_time() const
Definition: sqltypes.h:577
bool is_integer() const
Definition: sqltypes.h:565
bool is_boolean() const
Definition: sqltypes.h:580
bool is_decimal() const
Definition: sqltypes.h:568
bool is_date() const
Definition: sqltypes.h:1026

+ Here is the caller graph for this function:

template<typename V, typename T, typename NullType = V>
void foreign_storage::TypedParquetInPlaceEncoder< V, T, NullType >::reserve ( const size_t  num_append_elements)
inlineoverridevirtual

Implements foreign_storage::ParquetInPlaceEncoder.

Definition at line 187 of file ParquetInPlaceEncoder.h.

187  {
188  buffer_->reserve(buffer_->size() + (num_append_elements * sizeof(V)));
189  }
Data_Namespace::AbstractBuffer * buffer_
virtual void reserve(size_t num_bytes)=0
template<typename V, typename T, typename NullType = V>
void foreign_storage::TypedParquetInPlaceEncoder< V, T, NullType >::setDetectBufferConverterType ( )
inline

Definition at line 173 of file ParquetInPlaceEncoder.h.

Referenced by foreign_storage::TypedParquetInPlaceEncoder< V, V >::TypedParquetInPlaceEncoder().

173  {
174  auto detect_buffer = dynamic_cast<TypedParquetDetectBuffer*>(buffer_);
175  CHECK(detect_buffer);
176  std::function<std::string(const V&)> element_to_string = [this](const V& element) {
177  return this->elementToString(element);
178  };
179  detect_buffer->setConverterType<V>(element_to_string);
180  }
std::string elementToString(const V &element) const
#define CHECK(condition)
Definition: Logger.h:291
Data_Namespace::AbstractBuffer * buffer_

+ Here is the caller graph for this function:

template<typename V, typename T, typename NullType = V>
void foreign_storage::TypedParquetInPlaceEncoder< V, T, NullType >::setNull ( int8_t *  omnisci_data_bytes)
inlineoverridevirtual

Implements foreign_storage::ParquetScalarEncoder.

Definition at line 294 of file ParquetInPlaceEncoder.h.

294  {
295  auto& omnisci_data_value = reinterpret_cast<V*>(omnisci_data_bytes)[0];
296  omnisci_data_value = get_null_value<NullType>();
297  }
template<typename V, typename T, typename NullType = V>
void foreign_storage::TypedParquetInPlaceEncoder< V, T, NullType >::validate ( const int8_t *  parquet_data,
const int64_t  j,
const SQLTypeInfo column_type 
) const
inlineoverridevirtual

Implements foreign_storage::ParquetScalarEncoder.

Reimplemented in foreign_storage::ParquetTimestampEncoder< V, T, conversion_denominator, NullType >, and foreign_storage::ParquetTimestampEncoder< V, T, conversion_denominator *kSecsPerDay, NullType >.

Definition at line 139 of file ParquetInPlaceEncoder.h.

Referenced by foreign_storage::TypedParquetInPlaceEncoder< V, V >::validateAndAppendData(), and foreign_storage::TypedParquetInPlaceEncoder< V, V >::validateUsingEncodersColumnType().

141  {
142  // no-op by default
143  }

+ Here is the caller graph for this function:

template<typename V, typename T, typename NullType = V>
void foreign_storage::TypedParquetInPlaceEncoder< V, T, NullType >::validateAndAppendData ( const int16_t *  def_levels,
const int16_t *  rep_levels,
const int64_t  values_read,
const int64_t  levels_read,
int8_t *  values,
const SQLTypeInfo column_type,
InvalidRowGroupIndices invalid_indices 
)
inlineoverridevirtual

Implements foreign_storage::ParquetImportEncoder.

Definition at line 217 of file ParquetInPlaceEncoder.h.

223  {
224  int64_t i, j;
225  for (i = 0, j = 0; i < levels_read; ++i) {
226  if (def_levels[i]) {
227  try {
228  CHECK(j < values_read);
229  validate(values, j++, column_type);
230  } catch (const std::runtime_error& error) {
231  invalid_indices.insert(current_batch_offset_ + i);
232  }
233  }
234  }
235  current_batch_offset_ += levels_read;
236  appendData(def_levels, rep_levels, values_read, levels_read, values);
237  }
void validate(const int8_t *parquet_data, const int64_t j, const SQLTypeInfo &column_type) const override
void appendData(const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values) override
#define CHECK(condition)
Definition: Logger.h:291
template<typename V, typename T, typename NullType = V>
void foreign_storage::TypedParquetInPlaceEncoder< V, T, NullType >::validateUsingEncodersColumnType ( const int8_t *  parquet_data,
const int64_t  j 
) const
inlineoverridevirtual

Implements foreign_storage::ParquetScalarEncoder.

Definition at line 182 of file ParquetInPlaceEncoder.h.

Referenced by foreign_storage::TypedParquetInPlaceEncoder< V, V >::appendDataTrackErrors().

183  {
184  validate(parquet_data, j, column_type_);
185  }
void validate(const int8_t *parquet_data, const int64_t j, const SQLTypeInfo &column_type) const override

+ Here is the caller graph for this function:

Member Data Documentation

template<typename V, typename T, typename NullType = V>
int64_t foreign_storage::TypedParquetInPlaceEncoder< V, T, NullType >::current_batch_offset_ = 0
private

The documentation for this class was generated from the following file: