OmniSciDB  2e3a973ef4
foreign_storage::ParquetInPlaceEncoder Class Reference

#include <ParquetInPlaceEncoder.h>

+ Inheritance diagram for foreign_storage::ParquetInPlaceEncoder:
+ Collaboration diagram for foreign_storage::ParquetInPlaceEncoder:

Public Member Functions

 ParquetInPlaceEncoder (Data_Namespace::AbstractBuffer *buffer, const size_t omnisci_data_type_byte_size, const size_t parquet_data_type_byte_size)
void appendData (const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, const bool is_last_batch, int8_t *values) override
- Public Member Functions inherited from foreign_storage::ParquetScalarEncoder
 ParquetScalarEncoder (Data_Namespace::AbstractBuffer *buffer)
virtual void setNull (int8_t *omnisci_data_bytes)=0
virtual void copy (const int8_t *omnisci_data_bytes_source, int8_t *omnisci_data_bytes_destination)=0
virtual void encodeAndCopy (const int8_t *parquet_data_bytes, int8_t *omnisci_data_bytes)=0
virtual void encodeAndCopyContiguous (const int8_t *parquet_data_bytes, int8_t *omnisci_data_bytes, const size_t num_elements)=0
- Public Member Functions inherited from foreign_storage::ParquetEncoder
 ParquetEncoder (Data_Namespace::AbstractBuffer *buffer)
virtual ~ParquetEncoder ()=default

Protected Attributes

const size_t omnisci_data_type_byte_size_
- Protected Attributes inherited from foreign_storage::ParquetEncoder

Private Member Functions

void decodeNullsAndEncodeData (int8_t *data_ptr, const int16_t *def_levels, const int64_t values_read, const int64_t levels_read, const bool do_encoding)

Private Attributes

const size_t parquet_data_type_byte_size_

Detailed Description

Definition at line 57 of file ParquetInPlaceEncoder.h.

Constructor & Destructor Documentation

◆ ParquetInPlaceEncoder()

foreign_storage::ParquetInPlaceEncoder::ParquetInPlaceEncoder ( Data_Namespace::AbstractBuffer buffer,
const size_t  omnisci_data_type_byte_size,
const size_t  parquet_data_type_byte_size 

Definition at line 59 of file ParquetInPlaceEncoder.h.

62  : ParquetScalarEncoder(buffer)
63  , omnisci_data_type_byte_size_(omnisci_data_type_byte_size)
64  , parquet_data_type_byte_size_(parquet_data_type_byte_size) {}
ParquetScalarEncoder(Data_Namespace::AbstractBuffer *buffer)

Member Function Documentation

◆ appendData()

void foreign_storage::ParquetInPlaceEncoder::appendData ( const int16_t *  def_levels,
const int16_t *  rep_levels,
const int64_t  values_read,
const int64_t  levels_read,
const bool  is_last_batch,
int8_t *  values 

Appends Parquet data to the buffer using an in-place algorithm. Any necessary transformation or validation of the data and decoding of nulls is part of appending the data. Each class inheriting from this abstract class must implement the functionality to copy, nullify and encode the data.

def_levels- an array containing the Dremel encoding definition levels
rep_levels- an array containing the Dremel encoding repetition levels
values_read- the number of non-null values read
levels_read- the total number of values (non-null & null) that are read
is_last_batch- flag indicating if this is the last read for the row group
values- values that are read

Note that the Parquet format encodes nulls using Dremel encoding.

Implements foreign_storage::ParquetEncoder.

Reimplemented in foreign_storage::TypedParquetInPlaceEncoder< V, T >, foreign_storage::TypedParquetInPlaceEncoder< V, V >, foreign_storage::TypedParquetInPlaceEncoder< int64_t, int32_t >, and foreign_storage::ParquetStringEncoder< V >.

Definition at line 81 of file ParquetInPlaceEncoder.h.

References Data_Namespace::AbstractBuffer::append(), foreign_storage::ParquetEncoder::buffer_, decodeNullsAndEncodeData(), foreign_storage::ParquetScalarEncoder::encodeAndCopy(), omnisci_data_type_byte_size_, and parquet_data_type_byte_size_.

Referenced by foreign_storage::TypedParquetInPlaceEncoder< int64_t, int32_t >::appendData().

86  {
88  for (int64_t i = 0; i < values_read; ++i) {
90  values + i * omnisci_data_type_byte_size_);
91  }
92  }
94  if (values_read < levels_read) { // nulls exist
96  values,
97  def_levels,
98  values_read,
99  levels_read,
102  for (int64_t i = levels_read - 1; i >= 0; --i) {
104  values + i * omnisci_data_type_byte_size_);
105  }
106  }
108  buffer_->append(values, levels_read * omnisci_data_type_byte_size_);
109  }
virtual void encodeAndCopy(const int8_t *parquet_data_bytes, int8_t *omnisci_data_bytes)=0
void decodeNullsAndEncodeData(int8_t *data_ptr, const int16_t *def_levels, const int64_t values_read, const int64_t levels_read, const bool do_encoding)
virtual void append(int8_t *src, const size_t num_bytes, const MemoryLevel src_buffer_type=CPU_LEVEL, const int device_id=-1)=0
Data_Namespace::AbstractBuffer * buffer_
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ decodeNullsAndEncodeData()

void foreign_storage::ParquetInPlaceEncoder::decodeNullsAndEncodeData ( int8_t *  data_ptr,
const int16_t *  def_levels,
const int64_t  values_read,
const int64_t  levels_read,
const bool  do_encoding 

Definition at line 115 of file ParquetInPlaceEncoder.h.

References CHECK, foreign_storage::ParquetScalarEncoder::copy(), foreign_storage::ParquetScalarEncoder::encodeAndCopy(), parquet_data_type_byte_size_, and foreign_storage::ParquetScalarEncoder::setNull().

Referenced by appendData().

119  {
120  for (int64_t i = levels_read - 1, j = values_read - 1; i >= 0; --i) {
121  if (def_levels[i]) { // not null
122  CHECK(j >= 0);
123  if (do_encoding) {
124  encodeAndCopy(data_ptr + (j--) * parquet_data_type_byte_size_,
125  data_ptr + i * omnisci_data_type_byte_size_);
126  } else {
127  copy(data_ptr + (j--) * omnisci_data_type_byte_size_,
128  data_ptr + i * omnisci_data_type_byte_size_);
129  }
130  } else { // null
131  setNull(data_ptr + i * omnisci_data_type_byte_size_);
132  }
133  }
134  }
virtual void encodeAndCopy(const int8_t *parquet_data_bytes, int8_t *omnisci_data_bytes)=0
virtual void setNull(int8_t *omnisci_data_bytes)=0
#define CHECK(condition)
Definition: Logger.h:197
virtual void copy(const int8_t *omnisci_data_bytes_source, int8_t *omnisci_data_bytes_destination)=0
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

Member Data Documentation

◆ omnisci_data_type_byte_size_

const size_t foreign_storage::ParquetInPlaceEncoder::omnisci_data_type_byte_size_

◆ parquet_data_type_byte_size_

const size_t foreign_storage::ParquetInPlaceEncoder::parquet_data_type_byte_size_

Definition at line 136 of file ParquetInPlaceEncoder.h.

Referenced by appendData(), and decodeNullsAndEncodeData().

The documentation for this class was generated from the following file: