OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
foreign_storage::ParquetGeospatialImportEncoder Class Reference

#include <ParquetGeospatialImportEncoder.h>

+ Inheritance diagram for foreign_storage::ParquetGeospatialImportEncoder:
+ Collaboration diagram for foreign_storage::ParquetGeospatialImportEncoder:

Public Member Functions

 ParquetGeospatialImportEncoder (const bool geo_validate_geometry)
 
 ParquetGeospatialImportEncoder (std::list< Chunk_NS::Chunk > &chunks, const bool geo_validate_geometry)
 
void validateAndAppendData (const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values, const SQLTypeInfo &column_type, InvalidRowGroupIndices &invalid_indices) override
 
void eraseInvalidIndicesInBuffer (const InvalidRowGroupIndices &invalid_indices) override
 
void appendData (const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values) override
 
void appendDataTrackErrors (const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values) override
 
- Public Member Functions inherited from foreign_storage::ParquetEncoder
 ParquetEncoder (Data_Namespace::AbstractBuffer *buffer)
 
virtual ~ParquetEncoder ()=default
 
virtual std::shared_ptr
< ChunkMetadata
getRowGroupMetadata (const parquet::RowGroupMetaData *group_metadata, const int parquet_column_index, const SQLTypeInfo &column_type)
 
RejectedRowIndices getRejectedRowIndices () const
 
virtual void disableMetadataStatsValidation ()
 
virtual void initializeErrorTracking ()
 
virtual void initializeColumnType (const SQLTypeInfo &column_type)
 
- Public Member Functions inherited from foreign_storage::GeospatialEncoder
virtual ~GeospatialEncoder ()=default
 
 GeospatialEncoder (const bool geo_validate_geometry)
 
 GeospatialEncoder (std::list< Chunk_NS::Chunk > &chunks, const bool geo_validate_geometry)
 
 GeospatialEncoder (std::list< Chunk_NS::Chunk > &chunks, std::list< std::unique_ptr< ChunkMetadata >> &chunk_metadata, const bool geo_validate_geometry)
 

Private Member Functions

void appendArrayDatumsIfApplicable (TypedParquetStorageBuffer< ArrayDatum > *column_buffer, const std::vector< ArrayDatum > &datum_buffer)
 
void appendArrayDatumsToBuffer ()
 
void appendBaseData (const int64_t row_count)
 
AbstractBuffergetBuffer (std::list< Chunk_NS::Chunk > &chunks, const SQLTypes sql_type, GeoColumnType geo_column_type)
 

Private Attributes

int64_t current_batch_offset_
 
InvalidRowGroupIndicesinvalid_indices_
 
TypedParquetStorageBuffer
< std::string > * 
base_column_buffer_
 
TypedParquetStorageBuffer
< ArrayDatum > * 
coords_column_buffer_
 
TypedParquetStorageBuffer
< ArrayDatum > * 
bounds_column_buffer_
 
TypedParquetStorageBuffer
< ArrayDatum > * 
ring_or_line_sizes_column_buffer_
 
TypedParquetStorageBuffer
< ArrayDatum > * 
poly_rings_column_buffer_
 

Additional Inherited Members

- Protected Types inherited from foreign_storage::GeospatialEncoder
enum  GeoColumnType { COORDS, BOUNDS, RING_OR_LINE_SIZES, POLY_RINGS }
 
- Protected Member Functions inherited from foreign_storage::GeospatialEncoder
void appendBaseDataAndUpdateMetadata (const int64_t row_count)
 
void validateChunksSizing (std::list< Chunk_NS::Chunk > &chunks) const
 
void validateMetadataSizing (std::list< std::unique_ptr< ChunkMetadata >> &chunk_metadata) const
 
void appendArrayDatumsToBufferAndUpdateMetadata ()
 
void appendToArrayEncoderAndUpdateMetadata (const std::vector< ArrayDatum > &datum_parse_buffer, Encoder *encoder, ChunkMetadata *chunk_metadata) const
 
void processGeoElement (std::string_view geo_string_view)
 
void processNullGeoElement ()
 
void clearParseBuffers ()
 
void clearDatumBuffers ()
 
template<typename T >
std::list< T >::iterator getIteratorForGeoColumnType (std::list< T > &list, const SQLTypes column_type, const GeoColumnType geo_column)
 
std::tuple< Encoder
*, ChunkMetadata *, const
ColumnDescriptor * > 
initEncoderAndGetEncoderAndMetadata (std::list< Chunk_NS::Chunk > &chunks, std::list< std::unique_ptr< ChunkMetadata >> &chunk_metadata, const SQLTypes sql_type, GeoColumnType geo_column_type)
 
const ColumnDescriptorgetColumnDescriptor (std::list< Chunk_NS::Chunk > &chunks, const SQLTypes sql_type, GeoColumnType geo_column_type)
 
bool hasBoundsColumn () const
 
bool hasRingOrLineSizesColumn () const
 
bool hasPolyRingsColumn () const
 
- Static Protected Member Functions inherited from foreign_storage::ParquetEncoder
static std::shared_ptr
< ChunkMetadata
createMetadata (const SQLTypeInfo &column_type)
 
static void throwNotNullViolation (const std::string &parquet_column_name)
 
static void validateNullCount (const std::string &parquet_column_name, int64_t null_count, const SQLTypeInfo &column_type)
 
- Static Protected Member Functions inherited from foreign_storage::GeospatialEncoder
static void throwMalformedGeoElement (const std::string &omnisci_column_name)
 
static void throwMismatchedGeoElement (const std::string &omnisci_column_name)
 
- Protected Attributes inherited from foreign_storage::ParquetEncoder
Data_Namespace::AbstractBufferbuffer_
 
bool is_error_tracking_enabled_
 
RejectedRowIndices invalid_indices_
 
size_t current_chunk_offset_
 
SQLTypeInfo column_type_
 
bool validate_metadata_stats_
 
- Protected Attributes inherited from foreign_storage::GeospatialEncoder
const ColumnDescriptorgeo_column_descriptor_
 
StringNoneEncoderbase_column_encoder_
 
Encodercoords_column_encoder_
 
Encoderbounds_column_encoder_
 
Encoderring_or_line_sizes_column_encoder_
 
Encoderpoly_rings_column_encoder_
 
ChunkMetadatabase_column_metadata_
 
ChunkMetadatacoords_column_metadata_
 
ChunkMetadatabounds_column_metadata_
 
ChunkMetadataring_or_line_sizes_column_metadata_
 
ChunkMetadatapoly_rings_column_metadata_
 
const ColumnDescriptorcoords_column_descriptor_
 
const ColumnDescriptorbounds_column_descriptor_
 
const ColumnDescriptorring_or_line_sizes_column_descriptor_
 
const ColumnDescriptorpoly_rings_column_descriptor_
 
std::vector< std::string > base_values_
 
std::vector< double > coords_parse_buffer_
 
std::vector< double > bounds_parse_buffer_
 
std::vector< int > ring_or_line_sizes_parse_buffer_
 
std::vector< int > poly_rings_parse_buffer_
 
std::vector< ArrayDatumcoords_datum_buffer_
 
std::vector< ArrayDatumbounds_datum_buffer_
 
std::vector< ArrayDatumring_or_line_sizes_datum_buffer_
 
std::vector< ArrayDatumpoly_rings_datum_buffer_
 
bool geo_validate_geometry_
 

Detailed Description

Definition at line 27 of file ParquetGeospatialImportEncoder.h.

Constructor & Destructor Documentation

foreign_storage::ParquetGeospatialImportEncoder::ParquetGeospatialImportEncoder ( const bool  geo_validate_geometry)
inline

Definition at line 31 of file ParquetGeospatialImportEncoder.h.

32  : ParquetEncoder(nullptr)
33  , GeospatialEncoder(geo_validate_geometry)
35  , invalid_indices_(nullptr) {}
ParquetEncoder(Data_Namespace::AbstractBuffer *buffer)
GeospatialEncoder(const bool geo_validate_geometry)
foreign_storage::ParquetGeospatialImportEncoder::ParquetGeospatialImportEncoder ( std::list< Chunk_NS::Chunk > &  chunks,
const bool  geo_validate_geometry 
)
inline

Definition at line 37 of file ParquetGeospatialImportEncoder.h.

References base_column_buffer_, foreign_storage::GeospatialEncoder::BOUNDS, bounds_column_buffer_, CHECK, ColumnDescriptor::columnType, foreign_storage::GeospatialEncoder::COORDS, coords_column_buffer_, foreign_storage::GeospatialEncoder::geo_column_descriptor_, SQLTypeInfo::get_type(), getBuffer(), foreign_storage::GeospatialEncoder::hasBoundsColumn(), foreign_storage::GeospatialEncoder::hasPolyRingsColumn(), foreign_storage::GeospatialEncoder::hasRingOrLineSizesColumn(), SQLTypeInfo::is_geometry(), foreign_storage::GeospatialEncoder::POLY_RINGS, poly_rings_column_buffer_, foreign_storage::GeospatialEncoder::RING_OR_LINE_SIZES, and ring_or_line_sizes_column_buffer_.

39  : ParquetEncoder(nullptr)
40  , GeospatialEncoder(chunks, geo_validate_geometry)
42  , invalid_indices_(nullptr)
43  , base_column_buffer_(nullptr)
44  , coords_column_buffer_(nullptr)
45  , bounds_column_buffer_(nullptr)
47  , poly_rings_column_buffer_(nullptr) {
49 
50  const auto geo_column_type = geo_column_descriptor_->columnType.get_type();
51 
52  base_column_buffer_ = dynamic_cast<TypedParquetStorageBuffer<std::string>*>(
53  chunks.begin()->getBuffer());
55 
56  // initialize coords column
57  coords_column_buffer_ = dynamic_cast<TypedParquetStorageBuffer<ArrayDatum>*>(
58  getBuffer(chunks, geo_column_type, COORDS));
60 
61  // initialize bounds column
62  if (hasBoundsColumn()) {
63  bounds_column_buffer_ = dynamic_cast<TypedParquetStorageBuffer<ArrayDatum>*>(
64  getBuffer(chunks, geo_column_type, BOUNDS));
66  }
67 
68  // initialize ring sizes column & render group column
71  dynamic_cast<TypedParquetStorageBuffer<ArrayDatum>*>(
72  getBuffer(chunks, geo_column_type, RING_OR_LINE_SIZES));
74  }
75 
76  // initialize poly rings column
77  if (hasPolyRingsColumn()) {
78  poly_rings_column_buffer_ = dynamic_cast<TypedParquetStorageBuffer<ArrayDatum>*>(
79  getBuffer(chunks, geo_column_type, POLY_RINGS));
81  }
82  }
AbstractBuffer * getBuffer(std::list< Chunk_NS::Chunk > &chunks, const SQLTypes sql_type, GeoColumnType geo_column_type)
ParquetEncoder(Data_Namespace::AbstractBuffer *buffer)
TypedParquetStorageBuffer< ArrayDatum > * coords_column_buffer_
TypedParquetStorageBuffer< std::string > * base_column_buffer_
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:391
#define CHECK(condition)
Definition: Logger.h:291
bool is_geometry() const
Definition: sqltypes.h:595
TypedParquetStorageBuffer< ArrayDatum > * ring_or_line_sizes_column_buffer_
GeospatialEncoder(const bool geo_validate_geometry)
const ColumnDescriptor * geo_column_descriptor_
SQLTypeInfo columnType
TypedParquetStorageBuffer< ArrayDatum > * bounds_column_buffer_
TypedParquetStorageBuffer< ArrayDatum > * poly_rings_column_buffer_

+ Here is the call graph for this function:

Member Function Documentation

void foreign_storage::ParquetGeospatialImportEncoder::appendArrayDatumsIfApplicable ( TypedParquetStorageBuffer< ArrayDatum > *  column_buffer,
const std::vector< ArrayDatum > &  datum_buffer 
)
inlineprivate

Definition at line 159 of file ParquetGeospatialImportEncoder.h.

References foreign_storage::TypedParquetStorageBuffer< Type >::appendElement(), and CHECK.

Referenced by appendArrayDatumsToBuffer().

160  {
161  if (column_buffer) {
162  for (const auto& datum : datum_buffer) {
163  column_buffer->appendElement(datum);
164  }
165  } else {
166  CHECK(datum_buffer.empty());
167  }
168  }
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::ParquetGeospatialImportEncoder::appendArrayDatumsToBuffer ( )
inlineprivate

Definition at line 170 of file ParquetGeospatialImportEncoder.h.

References appendArrayDatumsIfApplicable(), bounds_column_buffer_, foreign_storage::GeospatialEncoder::bounds_datum_buffer_, coords_column_buffer_, foreign_storage::GeospatialEncoder::coords_datum_buffer_, poly_rings_column_buffer_, foreign_storage::GeospatialEncoder::poly_rings_datum_buffer_, ring_or_line_sizes_column_buffer_, and foreign_storage::GeospatialEncoder::ring_or_line_sizes_datum_buffer_.

Referenced by appendData().

170  {
176  }
std::vector< ArrayDatum > coords_datum_buffer_
std::vector< ArrayDatum > ring_or_line_sizes_datum_buffer_
TypedParquetStorageBuffer< ArrayDatum > * coords_column_buffer_
std::vector< ArrayDatum > bounds_datum_buffer_
void appendArrayDatumsIfApplicable(TypedParquetStorageBuffer< ArrayDatum > *column_buffer, const std::vector< ArrayDatum > &datum_buffer)
TypedParquetStorageBuffer< ArrayDatum > * ring_or_line_sizes_column_buffer_
TypedParquetStorageBuffer< ArrayDatum > * bounds_column_buffer_
std::vector< ArrayDatum > poly_rings_datum_buffer_
TypedParquetStorageBuffer< ArrayDatum > * poly_rings_column_buffer_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::ParquetGeospatialImportEncoder::appendBaseData ( const int64_t  row_count)
inlineprivate

Definition at line 178 of file ParquetGeospatialImportEncoder.h.

References foreign_storage::TypedParquetStorageBuffer< Type >::appendElement(), and base_column_buffer_.

Referenced by appendData().

178  {
179  for (int64_t i = 0; i < row_count; ++i) {
181  }
182  }
TypedParquetStorageBuffer< std::string > * base_column_buffer_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::ParquetGeospatialImportEncoder::appendData ( const int16_t *  def_levels,
const int16_t *  rep_levels,
const int64_t  values_read,
const int64_t  levels_read,
int8_t *  values 
)
inlineoverridevirtual

add null if failed

Implements foreign_storage::ParquetEncoder.

Definition at line 113 of file ParquetGeospatialImportEncoder.h.

References appendArrayDatumsToBuffer(), appendBaseData(), CHECK, foreign_storage::GeospatialEncoder::clearDatumBuffers(), foreign_storage::GeospatialEncoder::clearParseBuffers(), current_batch_offset_, invalid_indices_, foreign_storage::GeospatialEncoder::processGeoElement(), and foreign_storage::GeospatialEncoder::processNullGeoElement().

Referenced by validateAndAppendData().

117  {
118  auto parquet_data_ptr = reinterpret_cast<const parquet::ByteArray*>(values);
119 
121 
122  for (int64_t i = 0, j = 0; i < levels_read; ++i) {
124  if (def_levels[i] == 0) {
126  } else {
127  CHECK(j < values_read);
128  auto& byte_array = parquet_data_ptr[j++];
129  auto geo_string_view = std::string_view{
130  reinterpret_cast<const char*>(byte_array.ptr), byte_array.len};
131  try {
132  processGeoElement(geo_string_view);
133  } catch (const std::runtime_error& error) {
139  }
140  }
141  }
142 
144 
145  appendBaseData(levels_read);
146 
147  current_batch_offset_ += levels_read;
148  }
void processGeoElement(std::string_view geo_string_view)
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::ParquetGeospatialImportEncoder::appendDataTrackErrors ( const int16_t *  def_levels,
const int16_t *  rep_levels,
const int64_t  values_read,
const int64_t  levels_read,
int8_t *  values 
)
inlineoverridevirtual

Implements foreign_storage::ParquetEncoder.

Definition at line 150 of file ParquetGeospatialImportEncoder.h.

References UNREACHABLE.

154  {
155  UNREACHABLE() << "unexpected call to appendDataTrackErrors from unsupported encoder";
156  }
#define UNREACHABLE()
Definition: Logger.h:338
void foreign_storage::ParquetGeospatialImportEncoder::eraseInvalidIndicesInBuffer ( const InvalidRowGroupIndices invalid_indices)
inlineoverridevirtual

Implements foreign_storage::ParquetImportEncoder.

Definition at line 95 of file ParquetGeospatialImportEncoder.h.

References base_column_buffer_, bounds_column_buffer_, coords_column_buffer_, foreign_storage::TypedParquetStorageBuffer< Type >::eraseInvalidData(), foreign_storage::GeospatialEncoder::hasBoundsColumn(), foreign_storage::GeospatialEncoder::hasPolyRingsColumn(), foreign_storage::GeospatialEncoder::hasRingOrLineSizesColumn(), poly_rings_column_buffer_, and ring_or_line_sizes_column_buffer_.

96  {
97  if (invalid_indices.empty()) {
98  return;
99  }
100  base_column_buffer_->eraseInvalidData(invalid_indices);
101  coords_column_buffer_->eraseInvalidData(invalid_indices);
102  if (hasBoundsColumn()) {
103  bounds_column_buffer_->eraseInvalidData(invalid_indices);
104  }
105  if (hasRingOrLineSizesColumn()) {
107  }
108  if (hasPolyRingsColumn()) {
110  }
111  }
void eraseInvalidData(const FindContainer &invalid_indices)
TypedParquetStorageBuffer< ArrayDatum > * coords_column_buffer_
TypedParquetStorageBuffer< std::string > * base_column_buffer_
TypedParquetStorageBuffer< ArrayDatum > * ring_or_line_sizes_column_buffer_
TypedParquetStorageBuffer< ArrayDatum > * bounds_column_buffer_
TypedParquetStorageBuffer< ArrayDatum > * poly_rings_column_buffer_

+ Here is the call graph for this function:

AbstractBuffer* foreign_storage::ParquetGeospatialImportEncoder::getBuffer ( std::list< Chunk_NS::Chunk > &  chunks,
const SQLTypes  sql_type,
GeoColumnType  geo_column_type 
)
inlineprivate

Definition at line 184 of file ParquetGeospatialImportEncoder.h.

References foreign_storage::GeospatialEncoder::getIteratorForGeoColumnType().

Referenced by ParquetGeospatialImportEncoder().

186  {
187  auto chunk = getIteratorForGeoColumnType(chunks, sql_type, geo_column_type);
188  auto buffer = chunk->getBuffer();
189  return buffer;
190  }
std::list< T >::iterator getIteratorForGeoColumnType(std::list< T > &list, const SQLTypes column_type, const GeoColumnType geo_column)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::ParquetGeospatialImportEncoder::validateAndAppendData ( const int16_t *  def_levels,
const int16_t *  rep_levels,
const int64_t  values_read,
const int64_t  levels_read,
int8_t *  values,
const SQLTypeInfo column_type,
InvalidRowGroupIndices invalid_indices 
)
inlineoverridevirtual

Implements foreign_storage::ParquetImportEncoder.

Definition at line 84 of file ParquetGeospatialImportEncoder.h.

References appendData(), and invalid_indices_.

90  {
91  invalid_indices_ = &invalid_indices; // used in assembly algorithm
92  appendData(def_levels, rep_levels, values_read, levels_read, values);
93  }
void appendData(const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values) override

+ Here is the call graph for this function:

Member Data Documentation

TypedParquetStorageBuffer<std::string>* foreign_storage::ParquetGeospatialImportEncoder::base_column_buffer_
private
TypedParquetStorageBuffer<ArrayDatum>* foreign_storage::ParquetGeospatialImportEncoder::bounds_column_buffer_
private
TypedParquetStorageBuffer<ArrayDatum>* foreign_storage::ParquetGeospatialImportEncoder::coords_column_buffer_
private
int64_t foreign_storage::ParquetGeospatialImportEncoder::current_batch_offset_
private

Definition at line 192 of file ParquetGeospatialImportEncoder.h.

Referenced by appendData().

InvalidRowGroupIndices* foreign_storage::ParquetGeospatialImportEncoder::invalid_indices_
private

Definition at line 193 of file ParquetGeospatialImportEncoder.h.

Referenced by appendData(), and validateAndAppendData().

TypedParquetStorageBuffer<ArrayDatum>* foreign_storage::ParquetGeospatialImportEncoder::poly_rings_column_buffer_
private
TypedParquetStorageBuffer<ArrayDatum>* foreign_storage::ParquetGeospatialImportEncoder::ring_or_line_sizes_column_buffer_
private

The documentation for this class was generated from the following file: