OmniSciDB  6686921089
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
foreign_storage::ParquetArrayEncoder Class Referenceabstract

#include <ParquetArrayEncoder.h>

+ Inheritance diagram for foreign_storage::ParquetArrayEncoder:
+ Collaboration diagram for foreign_storage::ParquetArrayEncoder:

Public Member Functions

 ParquetArrayEncoder (Data_Namespace::AbstractBuffer *data_buffer, std::shared_ptr< ParquetScalarEncoder > scalar_encoder, const ColumnDescriptor *column_desciptor)
 
void appendData (const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values) override
 
void finalizeRowGroup ()
 
std::shared_ptr< ChunkMetadatagetRowGroupMetadata (const parquet::RowGroupMetaData *group_metadata, const int parquet_column_index, const SQLTypeInfo &column_type) override
 
- Public Member Functions inherited from foreign_storage::ParquetEncoder
 ParquetEncoder (Data_Namespace::AbstractBuffer *buffer)
 
virtual ~ParquetEncoder ()=default
 

Protected Member Functions

virtual void processLastArray ()=0
 
virtual void appendArraysToBuffer ()
 
bool isLastArrayNull () const
 
bool isLastArrayEmpty () const
 
size_t sizeOfLastArray () const
 
int8_t * resizeArrayDataBytes (const size_t additional_num_elements)
 
virtual void resetLastArrayMetadata ()
 
bool isNewArray (const int16_t rep_level) const
 
virtual void appendArrayItem (const int64_t encoded_index)
 

Protected Attributes

size_t omnisci_data_type_byte_size_
 
std::shared_ptr
< ParquetScalarEncoder
scalar_encoder_
 
std::vector< int8_t > data_buffer_bytes_
 
- Protected Attributes inherited from foreign_storage::ParquetEncoder
Data_Namespace::AbstractBufferbuffer_
 

Static Protected Attributes

static const int16_t non_null_def_level = 3
 
static const int16_t item_null_def_level = 2
 
static const int16_t empty_list_def_level = 1
 
static const int16_t list_null_def_level = 0
 

Private Member Functions

void processArrayItem (const int16_t def_level, int64_t &encoded_index)
 
void encodeAllValues (const int8_t *values, const int64_t values_read)
 
void markArrayAsNull ()
 
void markArrayAsEmpty ()
 
void appendNullArrayItem ()
 

Private Attributes

std::vector< int8_t > encode_buffer_
 
bool has_assembly_started_
 
bool is_null_array_
 
bool is_empty_array_
 
size_t num_elements_in_array_
 

Additional Inherited Members

- Static Protected Member Functions inherited from foreign_storage::ParquetEncoder
static std::shared_ptr
< ChunkMetadata
createMetadata (const SQLTypeInfo &column_type)
 
static void throwNotNullViolation (const std::string &parquet_column_name)
 
static void validateNullCount (const std::string &parquet_column_name, int64_t null_count, const SQLTypeInfo &column_type)
 

Detailed Description

Definition at line 23 of file ParquetArrayEncoder.h.

Constructor & Destructor Documentation

foreign_storage::ParquetArrayEncoder::ParquetArrayEncoder ( Data_Namespace::AbstractBuffer data_buffer,
std::shared_ptr< ParquetScalarEncoder scalar_encoder,
const ColumnDescriptor column_desciptor 
)
inline

Definition at line 25 of file ParquetArrayEncoder.h.

28  : ParquetEncoder(data_buffer)
30  column_desciptor->columnType.get_elem_type().get_size())
31  , scalar_encoder_(scalar_encoder)
32  , has_assembly_started_(false)
33  , is_null_array_(false)
34  , is_empty_array_(false)
ParquetEncoder(Data_Namespace::AbstractBuffer *buffer)
HOST DEVICE int get_size() const
Definition: sqltypes.h:339
SQLTypeInfo columnType
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:850
std::shared_ptr< ParquetScalarEncoder > scalar_encoder_

Member Function Documentation

virtual void foreign_storage::ParquetArrayEncoder::appendArrayItem ( const int64_t  encoded_index)
inlineprotectedvirtual

Reimplemented in foreign_storage::ParquetArrayImportEncoder.

Definition at line 116 of file ParquetArrayEncoder.h.

References encode_buffer_, num_elements_in_array_, omnisci_data_type_byte_size_, resizeArrayDataBytes(), and scalar_encoder_.

Referenced by foreign_storage::ParquetArrayImportEncoder::appendArrayItem(), and processArrayItem().

116  {
117  auto omnisci_data_ptr = resizeArrayDataBytes(1);
118  scalar_encoder_->copy(
119  encode_buffer_.data() + (encoded_index)*omnisci_data_type_byte_size_,
120  omnisci_data_ptr);
122  }
int8_t * resizeArrayDataBytes(const size_t additional_num_elements)
std::shared_ptr< ParquetScalarEncoder > scalar_encoder_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

virtual void foreign_storage::ParquetArrayEncoder::appendArraysToBuffer ( )
inlineprotectedvirtual

Reimplemented in foreign_storage::ParquetArrayImportEncoder, and foreign_storage::ParquetVariableLengthArrayEncoder.

Definition at line 78 of file ParquetArrayEncoder.h.

References Data_Namespace::AbstractBuffer::append(), foreign_storage::ParquetEncoder::buffer_, and data_buffer_bytes_.

Referenced by foreign_storage::ParquetVariableLengthArrayEncoder::appendArraysToBuffer(), and finalizeRowGroup().

78  {
80  data_buffer_bytes_.clear();
81  }
virtual void append(int8_t *src, const size_t num_bytes, const MemoryLevel src_buffer_type=CPU_LEVEL, const int device_id=-1)=0
Data_Namespace::AbstractBuffer * buffer_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::ParquetArrayEncoder::appendData ( const int16_t *  def_levels,
const int16_t *  rep_levels,
const int64_t  values_read,
const int64_t  levels_read,
int8_t *  values 
)
inlineoverridevirtual

Implements foreign_storage::ParquetEncoder.

Reimplemented in foreign_storage::ParquetVariableLengthArrayEncoder.

Definition at line 37 of file ParquetArrayEncoder.h.

References CHECK, encodeAllValues(), i, isNewArray(), processArrayItem(), processLastArray(), and resetLastArrayMetadata().

Referenced by foreign_storage::ParquetVariableLengthArrayEncoder::appendData(), and foreign_storage::ParquetArrayImportEncoder::validateAndAppendData().

41  {
42  CHECK(levels_read > 0);
43 
44  // encode all values in the temporary in-memory `encode_buffer_`, doing
45  // this encoding as a batch rather than element-wise exposes opportunities
46  // for performance optimization for certain scalar types
47  encodeAllValues(values, values_read);
48 
49  for (int64_t i = 0, j = 0; i < levels_read; ++i) {
50  if (isNewArray(rep_levels[i])) {
53  }
54  processArrayItem(def_levels[i], j);
55  }
56  }
void processArrayItem(const int16_t def_level, int64_t &encoded_index)
void encodeAllValues(const int8_t *values, const int64_t values_read)
#define CHECK(condition)
Definition: Logger.h:209
bool isNewArray(const int16_t rep_level) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::ParquetArrayEncoder::appendNullArrayItem ( )
inlineprivate

Definition at line 151 of file ParquetArrayEncoder.h.

References num_elements_in_array_, resizeArrayDataBytes(), and scalar_encoder_.

Referenced by processArrayItem().

151  {
154  }
int8_t * resizeArrayDataBytes(const size_t additional_num_elements)
std::shared_ptr< ParquetScalarEncoder > scalar_encoder_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::ParquetArrayEncoder::encodeAllValues ( const int8_t *  values,
const int64_t  values_read 
)
inlineprivate

Definition at line 142 of file ParquetArrayEncoder.h.

References encode_buffer_, omnisci_data_type_byte_size_, and scalar_encoder_.

Referenced by appendData().

142  {
143  encode_buffer_.resize(values_read * omnisci_data_type_byte_size_);
144  scalar_encoder_->encodeAndCopyContiguous(values, encode_buffer_.data(), values_read);
145  }
std::shared_ptr< ParquetScalarEncoder > scalar_encoder_

+ Here is the caller graph for this function:

void foreign_storage::ParquetArrayEncoder::finalizeRowGroup ( )
inline

Definition at line 58 of file ParquetArrayEncoder.h.

References appendArraysToBuffer(), has_assembly_started_, processLastArray(), and resetLastArrayMetadata().

+ Here is the call graph for this function:

std::shared_ptr<ChunkMetadata> foreign_storage::ParquetArrayEncoder::getRowGroupMetadata ( const parquet::RowGroupMetaData *  group_metadata,
const int  parquet_column_index,
const SQLTypeInfo column_type 
)
inlineoverridevirtual

Reimplemented from foreign_storage::ParquetEncoder.

Reimplemented in foreign_storage::ParquetFixedLengthArrayEncoder.

Definition at line 65 of file ParquetArrayEncoder.h.

References scalar_encoder_.

Referenced by foreign_storage::ParquetFixedLengthArrayEncoder::getRowGroupMetadata().

68  {
69  auto metadata = scalar_encoder_->getRowGroupMetadata(
70  group_metadata, parquet_column_index, column_type);
71  metadata->numBytes = 0; // number of bytes is not known
72  return metadata;
73  }
std::shared_ptr< ParquetScalarEncoder > scalar_encoder_

+ Here is the caller graph for this function:

bool foreign_storage::ParquetArrayEncoder::isLastArrayEmpty ( ) const
inlineprotected

Definition at line 85 of file ParquetArrayEncoder.h.

References is_empty_array_.

Referenced by foreign_storage::ParquetVariableLengthArrayEncoder::appendLastArrayOffset(), foreign_storage::ParquetFixedLengthArrayEncoder::appendNullArrayOrCheckArraySize(), and foreign_storage::ParquetArrayImportEncoder::appendToArrayDatumBuffer().

+ Here is the caller graph for this function:

bool foreign_storage::ParquetArrayEncoder::isLastArrayNull ( ) const
inlineprotected

Definition at line 83 of file ParquetArrayEncoder.h.

References is_null_array_.

Referenced by foreign_storage::ParquetVariableLengthArrayEncoder::appendLastArrayOffset(), foreign_storage::ParquetFixedLengthArrayEncoder::appendNullArrayOrCheckArraySize(), and foreign_storage::ParquetArrayImportEncoder::appendToArrayDatumBuffer().

+ Here is the caller graph for this function:

bool foreign_storage::ParquetArrayEncoder::isNewArray ( const int16_t  rep_level) const
inlineprotected

Definition at line 112 of file ParquetArrayEncoder.h.

References has_assembly_started_.

Referenced by appendData().

112  {
113  return rep_level == 0 && has_assembly_started_;
114  }

+ Here is the caller graph for this function:

void foreign_storage::ParquetArrayEncoder::markArrayAsEmpty ( )
inlineprivate

Definition at line 149 of file ParquetArrayEncoder.h.

References is_empty_array_.

Referenced by processArrayItem().

+ Here is the caller graph for this function:

void foreign_storage::ParquetArrayEncoder::markArrayAsNull ( )
inlineprivate

Definition at line 147 of file ParquetArrayEncoder.h.

References is_null_array_.

Referenced by processArrayItem().

+ Here is the caller graph for this function:

void foreign_storage::ParquetArrayEncoder::processArrayItem ( const int16_t  def_level,
int64_t &  encoded_index 
)
inlineprivate

Definition at line 125 of file ParquetArrayEncoder.h.

References appendArrayItem(), appendNullArrayItem(), empty_list_def_level, has_assembly_started_, item_null_def_level, list_null_def_level, markArrayAsEmpty(), markArrayAsNull(), non_null_def_level, and UNREACHABLE.

Referenced by appendData().

125  {
126  has_assembly_started_ = true;
127  if (def_level == non_null_def_level) {
128  // push back a scalar element to in-memory data buffer
129  appendArrayItem(encoded_index++);
130  } else if (def_level == item_null_def_level) {
131  // push back a scalar null to in-memory data buffer
133  } else if (def_level == list_null_def_level) {
134  markArrayAsNull();
135  } else if (def_level == empty_list_def_level) {
137  } else {
138  UNREACHABLE();
139  }
140  }
virtual void appendArrayItem(const int64_t encoded_index)
#define UNREACHABLE()
Definition: Logger.h:253

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

virtual void foreign_storage::ParquetArrayEncoder::processLastArray ( )
protectedpure virtual

Implemented in foreign_storage::ParquetArrayImportEncoder, foreign_storage::ParquetVariableLengthArrayEncoder, and foreign_storage::ParquetFixedLengthArrayEncoder.

Referenced by appendData(), and finalizeRowGroup().

+ Here is the caller graph for this function:

virtual void foreign_storage::ParquetArrayEncoder::resetLastArrayMetadata ( )
inlineprotectedvirtual

Reimplemented in foreign_storage::ParquetArrayImportEncoder.

Definition at line 106 of file ParquetArrayEncoder.h.

References is_empty_array_, is_null_array_, and num_elements_in_array_.

Referenced by appendData(), finalizeRowGroup(), and foreign_storage::ParquetArrayImportEncoder::resetLastArrayMetadata().

+ Here is the caller graph for this function:

int8_t* foreign_storage::ParquetArrayEncoder::resizeArrayDataBytes ( const size_t  additional_num_elements)
inlineprotected

Definition at line 89 of file ParquetArrayEncoder.h.

References data_buffer_bytes_, and omnisci_data_type_byte_size_.

Referenced by appendArrayItem(), appendNullArrayItem(), and foreign_storage::ParquetFixedLengthArrayEncoder::appendNullFixedLengthArray().

89  {
90  auto current_data_byte_size = data_buffer_bytes_.size();
91  data_buffer_bytes_.resize(current_data_byte_size +
92  additional_num_elements * omnisci_data_type_byte_size_);
93  return data_buffer_bytes_.data() + current_data_byte_size;
94  }

+ Here is the caller graph for this function:

size_t foreign_storage::ParquetArrayEncoder::sizeOfLastArray ( ) const
inlineprotected

Definition at line 87 of file ParquetArrayEncoder.h.

References num_elements_in_array_.

Referenced by foreign_storage::ParquetFixedLengthArrayEncoder::appendNullArrayOrCheckArraySize(), and foreign_storage::ParquetArrayImportEncoder::appendToArrayDatumBuffer().

+ Here is the caller graph for this function:

Member Data Documentation

const int16_t foreign_storage::ParquetArrayEncoder::empty_list_def_level = 1
staticprotected
std::vector<int8_t> foreign_storage::ParquetArrayEncoder::encode_buffer_
private

Definition at line 156 of file ParquetArrayEncoder.h.

Referenced by appendArrayItem(), and encodeAllValues().

bool foreign_storage::ParquetArrayEncoder::has_assembly_started_
private

Definition at line 157 of file ParquetArrayEncoder.h.

Referenced by finalizeRowGroup(), isNewArray(), and processArrayItem().

bool foreign_storage::ParquetArrayEncoder::is_empty_array_
private
bool foreign_storage::ParquetArrayEncoder::is_null_array_
private

Definition at line 158 of file ParquetArrayEncoder.h.

Referenced by isLastArrayNull(), markArrayAsNull(), and resetLastArrayMetadata().

const int16_t foreign_storage::ParquetArrayEncoder::item_null_def_level = 2
staticprotected

Definition at line 102 of file ParquetArrayEncoder.h.

Referenced by processArrayItem().

const int16_t foreign_storage::ParquetArrayEncoder::list_null_def_level = 0
staticprotected
const int16_t foreign_storage::ParquetArrayEncoder::non_null_def_level = 3
staticprotected

Definition at line 101 of file ParquetArrayEncoder.h.

Referenced by processArrayItem().

size_t foreign_storage::ParquetArrayEncoder::num_elements_in_array_
private

The documentation for this class was generated from the following file: