OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
foreign_storage::ParquetArrayImportEncoder Class Reference

#include <ParquetArrayImportEncoder.h>

+ Inheritance diagram for foreign_storage::ParquetArrayImportEncoder:
+ Collaboration diagram for foreign_storage::ParquetArrayImportEncoder:

Public Member Functions

 ParquetArrayImportEncoder (Data_Namespace::AbstractBuffer *data_buffer, std::shared_ptr< ParquetScalarEncoder > scalar_encoder, const ColumnDescriptor *column_desciptor)
 
void appendArrayItem (const int64_t encoded_index) override
 
void validateAndAppendData (const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values, const SQLTypeInfo &column_type, InvalidRowGroupIndices &invalid_indices) override
 
void resetLastArrayMetadata () override
 
- Public Member Functions inherited from foreign_storage::ParquetArrayEncoder
 ParquetArrayEncoder (Data_Namespace::AbstractBuffer *data_buffer, std::shared_ptr< ParquetScalarEncoder > scalar_encoder, const ColumnDescriptor *column_desciptor)
 
void appendDataTrackErrors (const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values) override
 
void appendData (const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values) override
 
void finalizeRowGroup ()
 
std::shared_ptr< ChunkMetadatagetRowGroupMetadata (const parquet::RowGroupMetaData *group_metadata, const int parquet_column_index, const SQLTypeInfo &column_type) override
 
virtual void disableMetadataStatsValidation () override
 
virtual void initializeErrorTracking () override
 
virtual void initializeColumnType (const SQLTypeInfo &column_type) override
 
- Public Member Functions inherited from foreign_storage::ParquetEncoder
 ParquetEncoder (Data_Namespace::AbstractBuffer *buffer)
 
virtual ~ParquetEncoder ()=default
 
RejectedRowIndices getRejectedRowIndices () const
 

Protected Member Functions

void appendArraysToBuffer () override
 
void processLastArray () override
 
- Protected Member Functions inherited from foreign_storage::ParquetArrayEncoder
bool isLastArrayNull () const
 
bool isLastArrayEmpty () const
 
size_t sizeOfLastArray () const
 
int8_t * resizeArrayDataBytes (const size_t additional_num_elements)
 
bool isNewArray (const int16_t rep_level) const
 
int8_t * encodedDataAtIndex (const size_t index)
 
void updateMetadataForAppendedArrayItem (const int64_t encoded_index)
 
virtual void encodeAllValues (const int8_t *values, const int64_t values_read)
 

Private Member Functions

ArrayDatum convertToArrayDatum (const int8_t *data, const size_t num_elements)
 
ArrayDatum getNullArrayDatum ()
 
void appendToArrayDatumBuffer ()
 
void eraseInvalidIndicesInBuffer (const InvalidRowGroupIndices &invalid_indices) override
 

Private Attributes

std::vector< bool > is_valid_item_
 
TypedParquetStorageBuffer
< ArrayDatum > * 
array_datum_buffer_
 
const ColumnDescriptorcolumn_descriptor_
 
size_t num_array_assembled_
 
bool is_invalid_array_
 
InvalidRowGroupIndicesinvalid_indices_
 

Additional Inherited Members

- Static Protected Member Functions inherited from foreign_storage::ParquetEncoder
static std::shared_ptr
< ChunkMetadata
createMetadata (const SQLTypeInfo &column_type)
 
static void throwNotNullViolation (const std::string &parquet_column_name)
 
static void validateNullCount (const std::string &parquet_column_name, int64_t null_count, const SQLTypeInfo &column_type)
 
- Protected Attributes inherited from foreign_storage::ParquetArrayEncoder
size_t omnisci_data_type_byte_size_
 
std::shared_ptr
< ParquetScalarEncoder
scalar_encoder_
 
std::vector< int8_t > data_buffer_bytes_
 
- Protected Attributes inherited from foreign_storage::ParquetEncoder
Data_Namespace::AbstractBufferbuffer_
 
bool is_error_tracking_enabled_
 
RejectedRowIndices invalid_indices_
 
size_t current_chunk_offset_
 
SQLTypeInfo column_type_
 
bool validate_metadata_stats_
 
- Static Protected Attributes inherited from foreign_storage::ParquetArrayEncoder
static const int16_t non_null_def_level = 3
 
static const int16_t item_null_def_level = 2
 
static const int16_t empty_list_def_level = 1
 
static const int16_t list_null_def_level = 0
 

Detailed Description

Definition at line 27 of file ParquetArrayImportEncoder.h.

Constructor & Destructor Documentation

foreign_storage::ParquetArrayImportEncoder::ParquetArrayImportEncoder ( Data_Namespace::AbstractBuffer data_buffer,
std::shared_ptr< ParquetScalarEncoder scalar_encoder,
const ColumnDescriptor column_desciptor 
)
inline

Definition at line 30 of file ParquetArrayImportEncoder.h.

References array_datum_buffer_, and CHECK.

33  : ParquetArrayEncoder(data_buffer, scalar_encoder, column_desciptor)
35  dynamic_cast<TypedParquetStorageBuffer<ArrayDatum>*>(data_buffer))
36  , column_descriptor_(column_desciptor)
38  , is_invalid_array_(false)
39  , invalid_indices_(nullptr) {
41  }
ParquetArrayEncoder(Data_Namespace::AbstractBuffer *data_buffer, std::shared_ptr< ParquetScalarEncoder > scalar_encoder, const ColumnDescriptor *column_desciptor)
TypedParquetStorageBuffer< ArrayDatum > * array_datum_buffer_
#define CHECK(condition)
Definition: Logger.h:291

Member Function Documentation

void foreign_storage::ParquetArrayImportEncoder::appendArrayItem ( const int64_t  encoded_index)
inlineoverridevirtual

Reimplemented from foreign_storage::ParquetArrayEncoder.

Definition at line 43 of file ParquetArrayImportEncoder.h.

References foreign_storage::ParquetArrayEncoder::appendArrayItem(), is_invalid_array_, and is_valid_item_.

43  {
45  if (!is_valid_item_[encoded_index]) {
46  is_invalid_array_ = true;
47  }
48  }
virtual void appendArrayItem(const int64_t encoded_index)

+ Here is the call graph for this function:

void foreign_storage::ParquetArrayImportEncoder::appendArraysToBuffer ( )
inlineoverrideprotectedvirtual

Reimplemented from foreign_storage::ParquetArrayEncoder.

Definition at line 76 of file ParquetArrayImportEncoder.h.

76  {
77  // no-op as data is already written to buffer in `processLastArray`
78  }
void foreign_storage::ParquetArrayImportEncoder::appendToArrayDatumBuffer ( )
inlineprivate

Definition at line 102 of file ParquetArrayImportEncoder.h.

References foreign_storage::TypedParquetStorageBuffer< Type >::appendElement(), array_datum_buffer_, CHECK, convertToArrayDatum(), foreign_storage::ParquetArrayEncoder::data_buffer_bytes_, getNullArrayDatum(), foreign_storage::ParquetArrayEncoder::isLastArrayEmpty(), foreign_storage::ParquetArrayEncoder::isLastArrayNull(), foreign_storage::ParquetArrayEncoder::omnisci_data_type_byte_size_, and foreign_storage::ParquetArrayEncoder::sizeOfLastArray().

Referenced by processLastArray().

102  {
103  if (isLastArrayNull()) {
104  // append a null array offset
106  } else if (isLastArrayEmpty()) {
108  } else {
109  CHECK(data_buffer_bytes_.size() ==
114  .clear(); // can clear immediately, only one array buffered at a time
115  }
116  }
ArrayDatum convertToArrayDatum(const int8_t *data, const size_t num_elements)
TypedParquetStorageBuffer< ArrayDatum > * array_datum_buffer_
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ArrayDatum foreign_storage::ParquetArrayImportEncoder::convertToArrayDatum ( const int8_t *  data,
const size_t  num_elements 
)
inlineprivate

Definition at line 90 of file ParquetArrayImportEncoder.h.

References foreign_storage::ParquetArrayEncoder::omnisci_data_type_byte_size_.

Referenced by appendToArrayDatumBuffer().

90  {
91  const size_t num_bytes = num_elements * omnisci_data_type_byte_size_;
92  std::shared_ptr<int8_t> buffer(new int8_t[num_bytes],
93  std::default_delete<int8_t[]>());
94  memcpy(buffer.get(), data, num_bytes);
95  return ArrayDatum(num_bytes, buffer, false);
96  }
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:229

+ Here is the caller graph for this function:

void foreign_storage::ParquetArrayImportEncoder::eraseInvalidIndicesInBuffer ( const InvalidRowGroupIndices invalid_indices)
inlineoverrideprivatevirtual

Implements foreign_storage::ParquetImportEncoder.

Definition at line 118 of file ParquetArrayImportEncoder.h.

References array_datum_buffer_, and foreign_storage::TypedParquetStorageBuffer< Type >::eraseInvalidData().

119  {
120  if (invalid_indices.empty()) {
121  return;
122  }
123  array_datum_buffer_->eraseInvalidData(invalid_indices);
124  }
void eraseInvalidData(const FindContainer &invalid_indices)
TypedParquetStorageBuffer< ArrayDatum > * array_datum_buffer_

+ Here is the call graph for this function:

ArrayDatum foreign_storage::ParquetArrayImportEncoder::getNullArrayDatum ( )
inlineprivate

Definition at line 98 of file ParquetArrayImportEncoder.h.

References column_descriptor_, ColumnDescriptor::columnType, and import_export::ImporterUtils::composeNullArray().

Referenced by appendToArrayDatumBuffer().

98  {
100  }
static ArrayDatum composeNullArray(const SQLTypeInfo &ti)
Definition: Importer.cpp:395
SQLTypeInfo columnType

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::ParquetArrayImportEncoder::processLastArray ( )
inlineoverrideprotectedvirtual

Reimplemented from foreign_storage::ParquetArrayEncoder.

Definition at line 80 of file ParquetArrayImportEncoder.h.

References appendToArrayDatumBuffer(), CHECK, invalid_indices_, is_invalid_array_, and num_array_assembled_.

+ Here is the call graph for this function:

void foreign_storage::ParquetArrayImportEncoder::resetLastArrayMetadata ( )
inlineoverridevirtual

Reimplemented from foreign_storage::ParquetArrayEncoder.

Definition at line 70 of file ParquetArrayImportEncoder.h.

References is_invalid_array_, and foreign_storage::ParquetArrayEncoder::resetLastArrayMetadata().

+ Here is the call graph for this function:

void foreign_storage::ParquetArrayImportEncoder::validateAndAppendData ( const int16_t *  def_levels,
const int16_t *  rep_levels,
const int64_t  values_read,
const int64_t  levels_read,
int8_t *  values,
const SQLTypeInfo column_type,
InvalidRowGroupIndices invalid_indices 
)
inlineoverridevirtual

Implements foreign_storage::ParquetImportEncoder.

Definition at line 50 of file ParquetArrayImportEncoder.h.

References foreign_storage::ParquetArrayEncoder::appendData(), invalid_indices_, is_valid_item_, and foreign_storage::ParquetArrayEncoder::scalar_encoder_.

56  {
57  // validate all elements
58  is_valid_item_.assign(values_read, true);
59  for (int64_t j = 0; j < values_read; ++j) {
60  try {
61  scalar_encoder_->validate(values, j, column_type);
62  } catch (const std::runtime_error& error) {
63  is_valid_item_[j] = false;
64  }
65  }
66  invalid_indices_ = &invalid_indices; // used in assembly algorithm
67  appendData(def_levels, rep_levels, values_read, levels_read, values);
68  }
void appendData(const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values) override
std::shared_ptr< ParquetScalarEncoder > scalar_encoder_

+ Here is the call graph for this function:

Member Data Documentation

TypedParquetStorageBuffer<ArrayDatum>* foreign_storage::ParquetArrayImportEncoder::array_datum_buffer_
private
const ColumnDescriptor* foreign_storage::ParquetArrayImportEncoder::column_descriptor_
private

Definition at line 128 of file ParquetArrayImportEncoder.h.

Referenced by getNullArrayDatum().

InvalidRowGroupIndices* foreign_storage::ParquetArrayImportEncoder::invalid_indices_
private

Definition at line 131 of file ParquetArrayImportEncoder.h.

Referenced by processLastArray(), and validateAndAppendData().

bool foreign_storage::ParquetArrayImportEncoder::is_invalid_array_
private
std::vector<bool> foreign_storage::ParquetArrayImportEncoder::is_valid_item_
private

Definition at line 126 of file ParquetArrayImportEncoder.h.

Referenced by appendArrayItem(), and validateAndAppendData().

size_t foreign_storage::ParquetArrayImportEncoder::num_array_assembled_
private

Definition at line 129 of file ParquetArrayImportEncoder.h.

Referenced by processLastArray().


The documentation for this class was generated from the following file: