OmniSciDB
bf83d84833
|
#include <ParquetStringEncoder.h>
Public Member Functions | |
ParquetStringEncoder (Data_Namespace::AbstractBuffer *buffer, StringDictionary *string_dictionary, std::unique_ptr< ChunkMetadata > &chunk_metadata) | |
void | appendData (const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, const bool is_last_batch, int8_t *values) override |
void | encodeAndCopyContiguous (const int8_t *parquet_data_bytes, int8_t *omnisci_data_bytes, const size_t num_elements) override |
void | encodeAndCopy (const int8_t *parquet_data_bytes, int8_t *omnisci_data_bytes) override |
std::shared_ptr< ChunkMetadata > | getRowGroupMetadata (const parquet::RowGroupMetaData *group_metadata, const int parquet_column_index, const SQLTypeInfo &column_type) override |
![]() | |
TypedParquetInPlaceEncoder (Data_Namespace::AbstractBuffer *buffer, const ColumnDescriptor *column_desciptor, const parquet::ColumnDescriptor *parquet_column_descriptor) | |
TypedParquetInPlaceEncoder (Data_Namespace::AbstractBuffer *buffer, const size_t omnisci_data_type_byte_size, const size_t parquet_data_type_byte_size) | |
void | appendData (const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, const bool is_last_batch, int8_t *values) override |
void | encodeAndCopyContiguous (const int8_t *parquet_data_bytes, int8_t *omnisci_data_bytes, const size_t num_elements) override |
void | setNull (int8_t *omnisci_data_bytes) override |
void | copy (const int8_t *omnisci_data_bytes_source, int8_t *omnisci_data_bytes_destination) override |
std::shared_ptr< ChunkMetadata > | getRowGroupMetadata (const parquet::RowGroupMetaData *group_metadata, const int parquet_column_index, const SQLTypeInfo &column_type) override |
![]() | |
ParquetInPlaceEncoder (Data_Namespace::AbstractBuffer *buffer, const size_t omnisci_data_type_byte_size, const size_t parquet_data_type_byte_size) | |
![]() | |
ParquetScalarEncoder (Data_Namespace::AbstractBuffer *buffer) | |
![]() | |
ParquetEncoder (Data_Namespace::AbstractBuffer *buffer) | |
virtual | ~ParquetEncoder ()=default |
Protected Member Functions | |
bool | encodingIsIdentityForSameTypes () const override |
![]() | |
std::pair< V, V > | getUnencodedStats (std::shared_ptr< parquet::Statistics > stats) const |
Private Member Functions | |
void | updateMetadataStats (int64_t values_read, int8_t *values) |
Private Attributes | |
StringDictionary * | string_dictionary_ |
std::unique_ptr< ChunkMetadata > & | chunk_metadata_ |
std::vector< int8_t > | encode_buffer_ |
V | min_ |
V | max_ |
Additional Inherited Members | |
![]() | |
static std::shared_ptr < ChunkMetadata > | createMetadata (const SQLTypeInfo &column_type) |
static void | throwNotNullViolation (const std::string &parquet_column_name) |
static void | validateNullCount (const std::string &parquet_column_name, int64_t null_count, const SQLTypeInfo &column_type) |
![]() | |
const size_t | omnisci_data_type_byte_size_ |
![]() | |
Data_Namespace::AbstractBuffer * | buffer_ |
Definition at line 29 of file ParquetStringEncoder.h.
|
inline |
Definition at line 31 of file ParquetStringEncoder.h.
|
inlineoverridevirtual |
Appends Parquet data to the buffer using an in-place algorithm. Any necessary transformation or validation of the data and decoding of nulls is part of appending the data. Each class inheriting from this abstract class must implement the functionality to copy, nullify and encode the data.
def_levels | - an array containing the Dremel encoding definition levels |
rep_levels | - an array containing the Dremel encoding repetition levels |
values_read | - the number of non-null values read |
levels_read | - the total number of values (non-null & null) that are read |
is_last_batch | - flag indicating if this is the last read for the row group |
values | - values that are read |
Note that the Parquet format encodes nulls using Dremel encoding.
Reimplemented from foreign_storage::ParquetInPlaceEncoder.
Definition at line 41 of file ParquetStringEncoder.h.
References foreign_storage::TypedParquetInPlaceEncoder< V, T >::appendData(), foreign_storage::ParquetStringEncoder< V >::encode_buffer_, and foreign_storage::ParquetStringEncoder< V >::encodeAndCopyContiguous().
|
inlineoverridevirtual |
Implements foreign_storage::ParquetScalarEncoder.
Definition at line 74 of file ParquetStringEncoder.h.
References foreign_storage::TypedParquetInPlaceEncoder< V, T >::copy().
|
inlineoverridevirtual |
Implements foreign_storage::ParquetScalarEncoder.
Definition at line 56 of file ParquetStringEncoder.h.
References CHECK, StringDictionary::getOrAddBulk(), foreign_storage::ParquetStringEncoder< V >::string_dictionary_, and foreign_storage::ParquetStringEncoder< V >::updateMetadataStats().
Referenced by foreign_storage::ParquetStringEncoder< V >::appendData().
|
inlineoverrideprotectedvirtual |
Reimplemented from foreign_storage::TypedParquetInPlaceEncoder< V, V >.
Definition at line 92 of file ParquetStringEncoder.h.
|
inlineoverridevirtual |
Reimplemented from foreign_storage::ParquetEncoder.
Definition at line 79 of file ParquetStringEncoder.h.
References foreign_storage::ParquetEncoder::getRowGroupMetadata(), and foreign_storage::ParquetInPlaceEncoder::omnisci_data_type_byte_size_.
|
inlineprivate |
Definition at line 95 of file ParquetStringEncoder.h.
References foreign_storage::ParquetStringEncoder< V >::chunk_metadata_, foreign_storage::ParquetStringEncoder< V >::max_, and foreign_storage::ParquetStringEncoder< V >::min_.
Referenced by foreign_storage::ParquetStringEncoder< V >::encodeAndCopyContiguous().
|
private |
Definition at line 105 of file ParquetStringEncoder.h.
Referenced by foreign_storage::ParquetStringEncoder< V >::updateMetadataStats().
|
private |
Definition at line 106 of file ParquetStringEncoder.h.
Referenced by foreign_storage::ParquetStringEncoder< V >::appendData().
|
private |
Definition at line 108 of file ParquetStringEncoder.h.
Referenced by foreign_storage::ParquetStringEncoder< V >::updateMetadataStats().
|
private |
Definition at line 108 of file ParquetStringEncoder.h.
Referenced by foreign_storage::ParquetStringEncoder< V >::updateMetadataStats().
|
private |
Definition at line 104 of file ParquetStringEncoder.h.
Referenced by foreign_storage::ParquetStringEncoder< V >::encodeAndCopyContiguous().