OmniSciDB  c0231cc57d
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
foreign_storage::ParquetArrayDetectEncoder Class Reference

#include <ParquetArrayDetectEncoder.h>

+ Inheritance diagram for foreign_storage::ParquetArrayDetectEncoder:
+ Collaboration diagram for foreign_storage::ParquetArrayDetectEncoder:

Public Member Functions

 ParquetArrayDetectEncoder (Data_Namespace::AbstractBuffer *data_buffer, std::shared_ptr< ParquetScalarEncoder > scalar_encoder, const ColumnDescriptor *column_desciptor)
 
void appendArrayItem (const int64_t encoded_index) override
 
- Public Member Functions inherited from foreign_storage::ParquetArrayEncoder
 ParquetArrayEncoder (Data_Namespace::AbstractBuffer *data_buffer, std::shared_ptr< ParquetScalarEncoder > scalar_encoder, const ColumnDescriptor *column_desciptor)
 
void appendDataTrackErrors (const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values) override
 
void appendData (const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values) override
 
void finalizeRowGroup ()
 
std::shared_ptr< ChunkMetadatagetRowGroupMetadata (const parquet::RowGroupMetaData *group_metadata, const int parquet_column_index, const SQLTypeInfo &column_type) override
 
virtual void disableMetadataStatsValidation () override
 
virtual void initializeErrorTracking (const SQLTypeInfo &column_type) override
 
- Public Member Functions inherited from foreign_storage::ParquetEncoder
 ParquetEncoder (Data_Namespace::AbstractBuffer *buffer)
 
virtual ~ParquetEncoder ()=default
 
RejectedRowIndices getRejectedRowIndices () const
 

Protected Member Functions

void encodeAllValues (const int8_t *values, const int64_t values_read) override
 
void appendArraysToBuffer () override
 
void processLastArray () override
 
- Protected Member Functions inherited from foreign_storage::ParquetArrayEncoder
bool isLastArrayNull () const
 
bool isLastArrayEmpty () const
 
size_t sizeOfLastArray () const
 
int8_t * resizeArrayDataBytes (const size_t additional_num_elements)
 
virtual void resetLastArrayMetadata ()
 
bool isNewArray (const int16_t rep_level) const
 
int8_t * encodedDataAtIndex (const size_t index)
 
void updateMetadataForAppendedArrayItem (const int64_t encoded_index)
 

Private Member Functions

void appendToDetectBuffer ()
 

Private Attributes

TypedParquetDetectBufferdetect_buffer_
 
const bool is_string_array_
 
std::vector< std::string > array_string_
 
std::vector< std::string > string_buffer_
 

Additional Inherited Members

- Static Protected Member Functions inherited from foreign_storage::ParquetEncoder
static std::shared_ptr
< ChunkMetadata
createMetadata (const SQLTypeInfo &column_type)
 
static void throwNotNullViolation (const std::string &parquet_column_name)
 
static void validateNullCount (const std::string &parquet_column_name, int64_t null_count, const SQLTypeInfo &column_type)
 
- Protected Attributes inherited from foreign_storage::ParquetArrayEncoder
size_t omnisci_data_type_byte_size_
 
std::shared_ptr
< ParquetScalarEncoder
scalar_encoder_
 
std::vector< int8_t > data_buffer_bytes_
 
- Protected Attributes inherited from foreign_storage::ParquetEncoder
Data_Namespace::AbstractBufferbuffer_
 
bool is_error_tracking_enabled_
 
RejectedRowIndices invalid_indices_
 
size_t current_chunk_offset_
 
SQLTypeInfo column_type_
 
bool validate_metadata_stats_
 
- Static Protected Attributes inherited from foreign_storage::ParquetArrayEncoder
static const int16_t non_null_def_level = 3
 
static const int16_t item_null_def_level = 2
 
static const int16_t empty_list_def_level = 1
 
static const int16_t list_null_def_level = 0
 

Detailed Description

Definition at line 27 of file ParquetArrayDetectEncoder.h.

Constructor & Destructor Documentation

foreign_storage::ParquetArrayDetectEncoder::ParquetArrayDetectEncoder ( Data_Namespace::AbstractBuffer data_buffer,
std::shared_ptr< ParquetScalarEncoder scalar_encoder,
const ColumnDescriptor column_desciptor 
)
inline

Definition at line 29 of file ParquetArrayDetectEncoder.h.

References CHECK, and detect_buffer_.

32  : ParquetArrayEncoder(data_buffer, scalar_encoder, column_desciptor)
33  , detect_buffer_(dynamic_cast<TypedParquetDetectBuffer*>(data_buffer))
35  dynamic_cast<ParquetDetectStringEncoder*>(scalar_encoder_.get())) {
37  }
ParquetArrayEncoder(Data_Namespace::AbstractBuffer *data_buffer, std::shared_ptr< ParquetScalarEncoder > scalar_encoder, const ColumnDescriptor *column_desciptor)
#define CHECK(condition)
Definition: Logger.h:222
std::shared_ptr< ParquetScalarEncoder > scalar_encoder_

Member Function Documentation

void foreign_storage::ParquetArrayDetectEncoder::appendArrayItem ( const int64_t  encoded_index)
inlineoverridevirtual

Reimplemented from foreign_storage::ParquetArrayEncoder.

Definition at line 39 of file ParquetArrayDetectEncoder.h.

References array_string_, CHECK_GT, foreign_storage::ParquetArrayEncoder::encodedDataAtIndex(), is_string_array_, foreign_storage::ParquetArrayEncoder::scalar_encoder_, string_buffer_, and foreign_storage::ParquetArrayEncoder::updateMetadataForAppendedArrayItem().

39  {
40  if (!is_string_array_) {
41  auto string_value =
42  scalar_encoder_->encodedDataToString(encodedDataAtIndex(encoded_index));
43  array_string_.emplace_back(string_value);
44  } else {
45  CHECK_GT(string_buffer_.size(), static_cast<size_t>(encoded_index));
46  array_string_.emplace_back(string_buffer_[encoded_index]);
47  }
49  }
#define CHECK_GT(x, y)
Definition: Logger.h:234
int8_t * encodedDataAtIndex(const size_t index)
void updateMetadataForAppendedArrayItem(const int64_t encoded_index)
std::shared_ptr< ParquetScalarEncoder > scalar_encoder_

+ Here is the call graph for this function:

void foreign_storage::ParquetArrayDetectEncoder::appendArraysToBuffer ( )
inlineoverrideprotectedvirtual

Reimplemented from foreign_storage::ParquetArrayEncoder.

Definition at line 67 of file ParquetArrayDetectEncoder.h.

67  {
68  // no-op as data is already written to buffer in `processLastArray`
69  }
void foreign_storage::ParquetArrayDetectEncoder::appendToDetectBuffer ( )
inlineprivate

Definition at line 77 of file ParquetArrayDetectEncoder.h.

References foreign_storage::TypedParquetDetectBuffer::appendValue(), array_string_, detect_buffer_, foreign_storage::ParquetArrayEncoder::isLastArrayEmpty(), foreign_storage::ParquetArrayEncoder::isLastArrayNull(), and join().

Referenced by processLastArray().

77  {
78  if (isLastArrayNull()) {
79  detect_buffer_->appendValue("NULL");
80  } else if (isLastArrayEmpty()) {
82  } else {
83  detect_buffer_->appendValue("{" + join(array_string_, ",") + "}");
84  array_string_.clear();
85  }
86  }
std::string join(T const &container, std::string const &delim)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::ParquetArrayDetectEncoder::encodeAllValues ( const int8_t *  values,
const int64_t  values_read 
)
inlineoverrideprotectedvirtual

Reimplemented from foreign_storage::ParquetArrayEncoder.

Definition at line 52 of file ParquetArrayDetectEncoder.h.

References foreign_storage::ParquetArrayEncoder::encodeAllValues(), is_string_array_, and string_buffer_.

52  {
53  if (!is_string_array_) {
54  ParquetArrayEncoder::encodeAllValues(values, values_read);
55  } else { // string arrays are a special case that require special handling
56  string_buffer_.clear();
57  auto parquet_data_ptr = reinterpret_cast<const parquet::ByteArray*>(values);
58  for (int64_t i = 0; i < values_read; ++i) {
59  auto& byte_array = parquet_data_ptr[i];
60  auto string_value =
61  std::string{reinterpret_cast<const char*>(byte_array.ptr), byte_array.len};
62  string_buffer_.push_back(string_value);
63  }
64  }
65  }
virtual void encodeAllValues(const int8_t *values, const int64_t values_read)

+ Here is the call graph for this function:

void foreign_storage::ParquetArrayDetectEncoder::processLastArray ( )
inlineoverrideprotectedvirtual

Reimplemented from foreign_storage::ParquetArrayEncoder.

Definition at line 71 of file ParquetArrayDetectEncoder.h.

References appendToDetectBuffer(), and foreign_storage::ParquetArrayEncoder::processLastArray().

+ Here is the call graph for this function:

Member Data Documentation

std::vector<std::string> foreign_storage::ParquetArrayDetectEncoder::array_string_
private

Definition at line 90 of file ParquetArrayDetectEncoder.h.

Referenced by appendArrayItem(), and appendToDetectBuffer().

TypedParquetDetectBuffer* foreign_storage::ParquetArrayDetectEncoder::detect_buffer_
private

Definition at line 88 of file ParquetArrayDetectEncoder.h.

Referenced by appendToDetectBuffer(), and ParquetArrayDetectEncoder().

const bool foreign_storage::ParquetArrayDetectEncoder::is_string_array_
private

Definition at line 89 of file ParquetArrayDetectEncoder.h.

Referenced by appendArrayItem(), and encodeAllValues().

std::vector<std::string> foreign_storage::ParquetArrayDetectEncoder::string_buffer_
private

Definition at line 91 of file ParquetArrayDetectEncoder.h.

Referenced by appendArrayItem(), and encodeAllValues().


The documentation for this class was generated from the following file: