OmniSciDB  95562058bd
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
foreign_storage::ParquetArrayEncoder Class Referenceabstract

#include <ParquetArrayEncoder.h>

+ Inheritance diagram for foreign_storage::ParquetArrayEncoder:
+ Collaboration diagram for foreign_storage::ParquetArrayEncoder:

Public Member Functions

 ParquetArrayEncoder (Data_Namespace::AbstractBuffer *data_buffer, std::shared_ptr< ParquetScalarEncoder > scalar_encoder, const ColumnDescriptor *column_desciptor)
 
void appendData (const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, const bool is_last_batch, int8_t *values) override
 
- Public Member Functions inherited from foreign_storage::ParquetEncoder
 ParquetEncoder (Data_Namespace::AbstractBuffer *buffer)
 
virtual ~ParquetEncoder ()=default
 
virtual std::shared_ptr
< ChunkMetadata
getRowGroupMetadata (const parquet::RowGroupMetaData *group_metadata, const int parquet_column_index, const SQLTypeInfo &column_type)
 

Protected Member Functions

virtual void processLastArray ()=0
 
virtual void appendArraysToBuffer ()
 
bool isLastArrayNull () const
 
size_t sizeOfLastArray () const
 
int8_t * resizeArrayDataBytes (const size_t additional_num_elements)
 

Protected Attributes

size_t omnisci_data_type_byte_size_
 
std::shared_ptr
< ParquetScalarEncoder
scalar_encoder_
 
std::vector< int8_t > data_buffer_bytes_
 
- Protected Attributes inherited from foreign_storage::ParquetEncoder
Data_Namespace::AbstractBufferbuffer_
 

Static Protected Attributes

static const int16_t non_null_def_level = 3
 
static const int16_t item_null_def_level = 2
 
static const int16_t list_null_def_level = 0
 

Private Member Functions

void finalizeRowGroup ()
 
void resetLastArrayMetadata ()
 
bool isNewArray (const int16_t rep_level) const
 
void processArrayItem (const int16_t def_level, int64_t &encoded_index)
 
void encodeAllValues (const int8_t *values, const int64_t values_read)
 
void markArrayAsNull ()
 
void appendArrayItem (const int64_t encoded_index)
 
void appendNullArrayItem ()
 

Private Attributes

std::vector< int8_t > encode_buffer_
 
bool has_assembly_started_
 
bool is_null_array_
 
size_t num_elements_in_array_
 

Additional Inherited Members

- Static Protected Member Functions inherited from foreign_storage::ParquetEncoder
static std::shared_ptr
< ChunkMetadata
createMetadata (const SQLTypeInfo &column_type)
 

Detailed Description

Definition at line 23 of file ParquetArrayEncoder.h.

Constructor & Destructor Documentation

foreign_storage::ParquetArrayEncoder::ParquetArrayEncoder ( Data_Namespace::AbstractBuffer data_buffer,
std::shared_ptr< ParquetScalarEncoder scalar_encoder,
const ColumnDescriptor column_desciptor 
)
inline

Definition at line 25 of file ParquetArrayEncoder.h.

28  : ParquetEncoder(data_buffer)
30  column_desciptor->columnType.get_elem_type().get_size())
31  , scalar_encoder_(scalar_encoder)
32  , has_assembly_started_(false)
33  , is_null_array_(false)
ParquetEncoder(Data_Namespace::AbstractBuffer *buffer)
HOST DEVICE int get_size() const
Definition: sqltypes.h:269
SQLTypeInfo columnType
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:624
std::shared_ptr< ParquetScalarEncoder > scalar_encoder_

Member Function Documentation

void foreign_storage::ParquetArrayEncoder::appendArrayItem ( const int64_t  encoded_index)
inlineprivate

Definition at line 129 of file ParquetArrayEncoder.h.

References encode_buffer_, num_elements_in_array_, omnisci_data_type_byte_size_, resizeArrayDataBytes(), and scalar_encoder_.

Referenced by processArrayItem().

129  {
130  auto omnisci_data_ptr = resizeArrayDataBytes(1);
131  scalar_encoder_->copy(
132  encode_buffer_.data() + (encoded_index)*omnisci_data_type_byte_size_,
133  omnisci_data_ptr);
135  }
int8_t * resizeArrayDataBytes(const size_t additional_num_elements)
std::shared_ptr< ParquetScalarEncoder > scalar_encoder_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

virtual void foreign_storage::ParquetArrayEncoder::appendArraysToBuffer ( )
inlineprotectedvirtual

Reimplemented in foreign_storage::ParquetVariableLengthArrayEncoder.

Definition at line 65 of file ParquetArrayEncoder.h.

References Data_Namespace::AbstractBuffer::append(), foreign_storage::ParquetEncoder::buffer_, and data_buffer_bytes_.

Referenced by foreign_storage::ParquetVariableLengthArrayEncoder::appendArraysToBuffer(), and finalizeRowGroup().

65  {
67  data_buffer_bytes_.clear();
68  }
virtual void append(int8_t *src, const size_t num_bytes, const MemoryLevel src_buffer_type=CPU_LEVEL, const int device_id=-1)=0
Data_Namespace::AbstractBuffer * buffer_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::ParquetArrayEncoder::appendData ( const int16_t *  def_levels,
const int16_t *  rep_levels,
const int64_t  values_read,
const int64_t  levels_read,
const bool  is_last_batch,
int8_t *  values 
)
inlineoverridevirtual

Implements foreign_storage::ParquetEncoder.

Reimplemented in foreign_storage::ParquetVariableLengthArrayEncoder.

Definition at line 36 of file ParquetArrayEncoder.h.

References CHECK, encodeAllValues(), finalizeRowGroup(), isNewArray(), processArrayItem(), processLastArray(), and resetLastArrayMetadata().

Referenced by foreign_storage::ParquetVariableLengthArrayEncoder::appendData().

41  {
42  CHECK(levels_read > 0);
43 
44  // encode all values in the temporary in-memory `encode_buffer_`, doing
45  // this encoding as a batch rather than element-wise exposes opportunities
46  // for performance optimization for certain scalar types
47  encodeAllValues(values, values_read);
48 
49  for (int64_t i = 0, j = 0; i < levels_read; ++i) {
50  if (isNewArray(rep_levels[i])) {
53  }
54  processArrayItem(def_levels[i], j);
55  }
56 
57  if (is_last_batch) {
59  }
60  }
void processArrayItem(const int16_t def_level, int64_t &encoded_index)
void encodeAllValues(const int8_t *values, const int64_t values_read)
#define CHECK(condition)
Definition: Logger.h:197
bool isNewArray(const int16_t rep_level) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::ParquetArrayEncoder::appendNullArrayItem ( )
inlineprivate

Definition at line 137 of file ParquetArrayEncoder.h.

References num_elements_in_array_, resizeArrayDataBytes(), and scalar_encoder_.

Referenced by processArrayItem().

137  {
140  }
int8_t * resizeArrayDataBytes(const size_t additional_num_elements)
std::shared_ptr< ParquetScalarEncoder > scalar_encoder_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::ParquetArrayEncoder::encodeAllValues ( const int8_t *  values,
const int64_t  values_read 
)
inlineprivate

Definition at line 122 of file ParquetArrayEncoder.h.

References encode_buffer_, omnisci_data_type_byte_size_, and scalar_encoder_.

Referenced by appendData().

122  {
123  encode_buffer_.resize(values_read * omnisci_data_type_byte_size_);
124  scalar_encoder_->encodeAndCopyContiguous(values, encode_buffer_.data(), values_read);
125  }
std::shared_ptr< ParquetScalarEncoder > scalar_encoder_

+ Here is the caller graph for this function:

void foreign_storage::ParquetArrayEncoder::finalizeRowGroup ( )
inlineprivate

Definition at line 91 of file ParquetArrayEncoder.h.

References appendArraysToBuffer(), has_assembly_started_, processLastArray(), and resetLastArrayMetadata().

Referenced by appendData().

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool foreign_storage::ParquetArrayEncoder::isLastArrayNull ( ) const
inlineprotected

Definition at line 70 of file ParquetArrayEncoder.h.

References is_null_array_.

Referenced by foreign_storage::ParquetVariableLengthArrayEncoder::appendLastArrayOffset(), and foreign_storage::ParquetFixedLengthArrayEncoder::appendNullArrayOrCheckArraySize().

+ Here is the caller graph for this function:

bool foreign_storage::ParquetArrayEncoder::isNewArray ( const int16_t  rep_level) const
inlineprivate

Definition at line 103 of file ParquetArrayEncoder.h.

References has_assembly_started_.

Referenced by appendData().

103  {
104  return rep_level == 0 && has_assembly_started_;
105  }

+ Here is the caller graph for this function:

void foreign_storage::ParquetArrayEncoder::markArrayAsNull ( )
inlineprivate

Definition at line 127 of file ParquetArrayEncoder.h.

References is_null_array_.

Referenced by processArrayItem().

+ Here is the caller graph for this function:

void foreign_storage::ParquetArrayEncoder::processArrayItem ( const int16_t  def_level,
int64_t &  encoded_index 
)
inlineprivate

Definition at line 107 of file ParquetArrayEncoder.h.

References appendArrayItem(), appendNullArrayItem(), has_assembly_started_, item_null_def_level, list_null_def_level, markArrayAsNull(), non_null_def_level, and UNREACHABLE.

Referenced by appendData().

107  {
108  has_assembly_started_ = true;
109  if (def_level == non_null_def_level) {
110  // push back a scalar element to in-memory data buffer
111  appendArrayItem(encoded_index++);
112  } else if (def_level == item_null_def_level) {
113  // push back a scalar null to in-memory data buffer
115  } else if (def_level == list_null_def_level) {
116  markArrayAsNull();
117  } else {
118  UNREACHABLE();
119  }
120  }
#define UNREACHABLE()
Definition: Logger.h:241
void appendArrayItem(const int64_t encoded_index)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

virtual void foreign_storage::ParquetArrayEncoder::processLastArray ( )
protectedpure virtual

Implemented in foreign_storage::ParquetVariableLengthArrayEncoder, and foreign_storage::ParquetFixedLengthArrayEncoder.

Referenced by appendData(), and finalizeRowGroup().

+ Here is the caller graph for this function:

void foreign_storage::ParquetArrayEncoder::resetLastArrayMetadata ( )
inlineprivate

Definition at line 98 of file ParquetArrayEncoder.h.

References is_null_array_, and num_elements_in_array_.

Referenced by appendData(), and finalizeRowGroup().

+ Here is the caller graph for this function:

int8_t* foreign_storage::ParquetArrayEncoder::resizeArrayDataBytes ( const size_t  additional_num_elements)
inlineprotected

Definition at line 74 of file ParquetArrayEncoder.h.

References data_buffer_bytes_, and omnisci_data_type_byte_size_.

Referenced by appendArrayItem(), appendNullArrayItem(), and foreign_storage::ParquetFixedLengthArrayEncoder::appendNullFixedLengthArray().

74  {
75  auto current_data_byte_size = data_buffer_bytes_.size();
76  data_buffer_bytes_.resize(current_data_byte_size +
77  additional_num_elements * omnisci_data_type_byte_size_);
78  return data_buffer_bytes_.data() + current_data_byte_size;
79  }

+ Here is the caller graph for this function:

size_t foreign_storage::ParquetArrayEncoder::sizeOfLastArray ( ) const
inlineprotected

Definition at line 72 of file ParquetArrayEncoder.h.

References num_elements_in_array_.

Referenced by foreign_storage::ParquetFixedLengthArrayEncoder::appendNullArrayOrCheckArraySize().

+ Here is the caller graph for this function:

Member Data Documentation

std::vector<int8_t> foreign_storage::ParquetArrayEncoder::encode_buffer_
private

Definition at line 142 of file ParquetArrayEncoder.h.

Referenced by appendArrayItem(), and encodeAllValues().

bool foreign_storage::ParquetArrayEncoder::has_assembly_started_
private

Definition at line 143 of file ParquetArrayEncoder.h.

Referenced by finalizeRowGroup(), isNewArray(), and processArrayItem().

bool foreign_storage::ParquetArrayEncoder::is_null_array_
private

Definition at line 144 of file ParquetArrayEncoder.h.

Referenced by isLastArrayNull(), markArrayAsNull(), and resetLastArrayMetadata().

const int16_t foreign_storage::ParquetArrayEncoder::item_null_def_level = 2
staticprotected

Definition at line 87 of file ParquetArrayEncoder.h.

Referenced by processArrayItem().

const int16_t foreign_storage::ParquetArrayEncoder::list_null_def_level = 0
staticprotected
const int16_t foreign_storage::ParquetArrayEncoder::non_null_def_level = 3
staticprotected

Definition at line 86 of file ParquetArrayEncoder.h.

Referenced by processArrayItem().

size_t foreign_storage::ParquetArrayEncoder::num_elements_in_array_
private
std::shared_ptr<ParquetScalarEncoder> foreign_storage::ParquetArrayEncoder::scalar_encoder_
protected

The documentation for this class was generated from the following file: