OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
foreign_storage::ParquetVariableLengthArrayEncoder Class Reference

#include <ParquetVariableLengthArrayEncoder.h>

+ Inheritance diagram for foreign_storage::ParquetVariableLengthArrayEncoder:
+ Collaboration diagram for foreign_storage::ParquetVariableLengthArrayEncoder:

Public Member Functions

 ParquetVariableLengthArrayEncoder (Data_Namespace::AbstractBuffer *data_buffer, Data_Namespace::AbstractBuffer *index_buffer, std::shared_ptr< ParquetScalarEncoder > scalar_encoder, const ColumnDescriptor *column_desciptor)
 
void appendData (const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values) override
 
- Public Member Functions inherited from foreign_storage::ParquetArrayEncoder
 ParquetArrayEncoder (Data_Namespace::AbstractBuffer *data_buffer, std::shared_ptr< ParquetScalarEncoder > scalar_encoder, const ColumnDescriptor *column_desciptor)
 
void appendDataTrackErrors (const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values) override
 
void finalizeRowGroup ()
 
std::shared_ptr< ChunkMetadatagetRowGroupMetadata (const parquet::RowGroupMetaData *group_metadata, const int parquet_column_index, const SQLTypeInfo &column_type) override
 
virtual void disableMetadataStatsValidation () override
 
virtual void initializeErrorTracking () override
 
virtual void initializeColumnType (const SQLTypeInfo &column_type) override
 
- Public Member Functions inherited from foreign_storage::ParquetEncoder
 ParquetEncoder (Data_Namespace::AbstractBuffer *buffer)
 
virtual ~ParquetEncoder ()=default
 
RejectedRowIndices getRejectedRowIndices () const
 

Protected Member Functions

void appendArraysToBuffer () override
 
void processLastArray () override
 
- Protected Member Functions inherited from foreign_storage::ParquetArrayEncoder
bool isLastArrayNull () const
 
bool isLastArrayEmpty () const
 
size_t sizeOfLastArray () const
 
int8_t * resizeArrayDataBytes (const size_t additional_num_elements)
 
virtual void resetLastArrayMetadata ()
 
bool isNewArray (const int16_t rep_level) const
 
int8_t * encodedDataAtIndex (const size_t index)
 
void updateMetadataForAppendedArrayItem (const int64_t encoded_index)
 
virtual void appendArrayItem (const int64_t encoded_index)
 
virtual void encodeAllValues (const int8_t *values, const int64_t values_read)
 

Private Member Functions

void setFirstOffsetForBuffer (const int16_t def_level)
 
void appendLastArrayOffset ()
 

Private Attributes

Data_Namespace::AbstractBufferindex_buffer_
 
std::vector< ArrayOffsetToffsets_
 

Additional Inherited Members

- Static Protected Member Functions inherited from foreign_storage::ParquetEncoder
static std::shared_ptr
< ChunkMetadata
createMetadata (const SQLTypeInfo &column_type)
 
static void throwNotNullViolation (const std::string &parquet_column_name)
 
static void validateNullCount (const std::string &parquet_column_name, int64_t null_count, const SQLTypeInfo &column_type)
 
- Protected Attributes inherited from foreign_storage::ParquetArrayEncoder
size_t omnisci_data_type_byte_size_
 
std::shared_ptr
< ParquetScalarEncoder
scalar_encoder_
 
std::vector< int8_t > data_buffer_bytes_
 
- Protected Attributes inherited from foreign_storage::ParquetEncoder
Data_Namespace::AbstractBufferbuffer_
 
bool is_error_tracking_enabled_
 
RejectedRowIndices invalid_indices_
 
size_t current_chunk_offset_
 
SQLTypeInfo column_type_
 
bool validate_metadata_stats_
 
- Static Protected Attributes inherited from foreign_storage::ParquetArrayEncoder
static const int16_t non_null_def_level = 3
 
static const int16_t item_null_def_level = 2
 
static const int16_t empty_list_def_level = 1
 
static const int16_t list_null_def_level = 0
 

Detailed Description

Definition at line 25 of file ParquetVariableLengthArrayEncoder.h.

Constructor & Destructor Documentation

foreign_storage::ParquetVariableLengthArrayEncoder::ParquetVariableLengthArrayEncoder ( Data_Namespace::AbstractBuffer data_buffer,
Data_Namespace::AbstractBuffer index_buffer,
std::shared_ptr< ParquetScalarEncoder scalar_encoder,
const ColumnDescriptor column_desciptor 
)
inline

Definition at line 27 of file ParquetVariableLengthArrayEncoder.h.

31  : ParquetArrayEncoder(data_buffer, scalar_encoder, column_desciptor)
32  , index_buffer_(index_buffer) {}
ParquetArrayEncoder(Data_Namespace::AbstractBuffer *data_buffer, std::shared_ptr< ParquetScalarEncoder > scalar_encoder, const ColumnDescriptor *column_desciptor)

Member Function Documentation

void foreign_storage::ParquetVariableLengthArrayEncoder::appendArraysToBuffer ( )
inlineoverrideprotectedvirtual

Reimplemented from foreign_storage::ParquetArrayEncoder.

Definition at line 46 of file ParquetVariableLengthArrayEncoder.h.

References Data_Namespace::AbstractBuffer::append(), foreign_storage::ParquetArrayEncoder::appendArraysToBuffer(), index_buffer_, and offsets_.

46  {
47  index_buffer_->append(reinterpret_cast<int8_t*>(offsets_.data()),
48  offsets_.size() * sizeof(ArrayOffsetT));
49  offsets_.clear();
51  }
int32_t ArrayOffsetT
Definition: sqltypes.h:1494
virtual void append(int8_t *src, const size_t num_bytes, const MemoryLevel src_buffer_type=CPU_LEVEL, const int device_id=-1)=0

+ Here is the call graph for this function:

void foreign_storage::ParquetVariableLengthArrayEncoder::appendData ( const int16_t *  def_levels,
const int16_t *  rep_levels,
const int64_t  values_read,
const int64_t  levels_read,
int8_t *  values 
)
inlineoverridevirtual

Reimplemented from foreign_storage::ParquetArrayEncoder.

Definition at line 34 of file ParquetVariableLengthArrayEncoder.h.

References foreign_storage::ParquetArrayEncoder::appendData(), CHECK, and setFirstOffsetForBuffer().

38  {
39  CHECK(levels_read > 0);
40  setFirstOffsetForBuffer(def_levels[0]);
42  def_levels, rep_levels, values_read, levels_read, values);
43  }
void appendData(const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values) override
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

void foreign_storage::ParquetVariableLengthArrayEncoder::appendLastArrayOffset ( )
inlineprivate

Definition at line 78 of file ParquetVariableLengthArrayEncoder.h.

References foreign_storage::ParquetEncoder::buffer_, foreign_storage::ParquetArrayEncoder::data_buffer_bytes_, foreign_storage::ParquetArrayEncoder::isLastArrayEmpty(), foreign_storage::ParquetArrayEncoder::isLastArrayNull(), offsets_, and Data_Namespace::AbstractBuffer::size().

Referenced by processLastArray().

78  {
79  int64_t last_offset = buffer_->size() + data_buffer_bytes_.size();
80  if (isLastArrayNull()) {
81  // append a null array offset
82  offsets_.push_back(-last_offset);
83  } else if (isLastArrayEmpty()) {
84  offsets_.push_back(last_offset);
85  } else {
86  // append array data offset
87  offsets_.push_back(last_offset);
88  }
89  }
Data_Namespace::AbstractBuffer * buffer_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::ParquetVariableLengthArrayEncoder::processLastArray ( )
inlineoverrideprotectedvirtual

Reimplemented from foreign_storage::ParquetArrayEncoder.

Definition at line 53 of file ParquetVariableLengthArrayEncoder.h.

References appendLastArrayOffset(), and foreign_storage::ParquetArrayEncoder::processLastArray().

+ Here is the call graph for this function:

void foreign_storage::ParquetVariableLengthArrayEncoder::setFirstOffsetForBuffer ( const int16_t  def_level)
inlineprivate

Definition at line 59 of file ParquetVariableLengthArrayEncoder.h.

References foreign_storage::ParquetEncoder::buffer_, foreign_storage::ParquetArrayEncoder::data_buffer_bytes_, ArrayNoneEncoder::DEFAULT_NULL_PADDING_SIZE, foreign_storage::ParquetArrayEncoder::empty_list_def_level, foreign_storage::ParquetArrayEncoder::list_null_def_level, offsets_, and Data_Namespace::AbstractBuffer::size().

Referenced by appendData().

59  {
60  if (data_buffer_bytes_.size() == 0 && buffer_->size() == 0) { // first element
63  // OmniSci variable array types have a special encoding for chunks in
64  // which the first array is null: the first `DEFAULT_NULL_PADDING_SIZE`
65  // bytes of the chunk are filled and the offset is set appropriately.
66  // Ostensibly, this is done to allow marking a null array by negating
67  // a non-zero value.
69  std::vector<int8_t> zero_bytes(ArrayNoneEncoder::DEFAULT_NULL_PADDING_SIZE, 0);
70  data_buffer_bytes_.insert(
71  data_buffer_bytes_.end(), zero_bytes.begin(), zero_bytes.end());
72  } else {
73  offsets_.push_back(0);
74  }
75  }
76  }
static constexpr size_t DEFAULT_NULL_PADDING_SIZE
Data_Namespace::AbstractBuffer * buffer_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Member Data Documentation

Data_Namespace::AbstractBuffer* foreign_storage::ParquetVariableLengthArrayEncoder::index_buffer_
private

Definition at line 91 of file ParquetVariableLengthArrayEncoder.h.

Referenced by appendArraysToBuffer().

std::vector<ArrayOffsetT> foreign_storage::ParquetVariableLengthArrayEncoder::offsets_
private

The documentation for this class was generated from the following file: