OmniSciDB  a987f07e93
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
foreign_storage::ParquetGeospatialImportEncoder Class Reference

#include <ParquetGeospatialImportEncoder.h>

+ Inheritance diagram for foreign_storage::ParquetGeospatialImportEncoder:
+ Collaboration diagram for foreign_storage::ParquetGeospatialImportEncoder:

Public Member Functions

 ParquetGeospatialImportEncoder ()
 
 ParquetGeospatialImportEncoder (std::list< Chunk_NS::Chunk > &chunks)
 
void validateAndAppendData (const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values, const SQLTypeInfo &column_type, InvalidRowGroupIndices &invalid_indices) override
 
void eraseInvalidIndicesInBuffer (const InvalidRowGroupIndices &invalid_indices) override
 
void appendData (const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values) override
 
void appendDataTrackErrors (const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values) override
 
- Public Member Functions inherited from foreign_storage::ParquetEncoder
 ParquetEncoder (Data_Namespace::AbstractBuffer *buffer)
 
virtual ~ParquetEncoder ()=default
 
virtual std::shared_ptr
< ChunkMetadata
getRowGroupMetadata (const parquet::RowGroupMetaData *group_metadata, const int parquet_column_index, const SQLTypeInfo &column_type)
 
RejectedRowIndices getRejectedRowIndices () const
 
virtual void disableMetadataStatsValidation ()
 
virtual void initializeErrorTracking (const SQLTypeInfo &column_type)
 
- Public Member Functions inherited from foreign_storage::GeospatialEncoder
virtual ~GeospatialEncoder ()=default
 
 GeospatialEncoder (const RenderGroupAnalyzerMap *render_group_analyzer_map)
 
 GeospatialEncoder (std::list< Chunk_NS::Chunk > &chunks, const RenderGroupAnalyzerMap *render_group_analyzer_map)
 
 GeospatialEncoder (std::list< Chunk_NS::Chunk > &chunks, std::list< std::unique_ptr< ChunkMetadata >> &chunk_metadata, const RenderGroupAnalyzerMap *render_group_analyzer_map)
 

Private Member Functions

void appendArrayDatumsIfApplicable (TypedParquetStorageBuffer< ArrayDatum > *column_buffer, const std::vector< ArrayDatum > &datum_buffer)
 
void appendArrayDatumsToBuffer ()
 
void appendBaseAndRenderGroupData (const int64_t row_count)
 
AbstractBuffergetBuffer (std::list< Chunk_NS::Chunk > &chunks, const SQLTypes sql_type, GeoColumnType geo_column_type)
 

Private Attributes

int64_t current_batch_offset_
 
InvalidRowGroupIndicesinvalid_indices_
 
TypedParquetStorageBuffer
< std::string > * 
base_column_buffer_
 
TypedParquetStorageBuffer
< ArrayDatum > * 
coords_column_buffer_
 
TypedParquetStorageBuffer
< ArrayDatum > * 
bounds_column_buffer_
 
TypedParquetStorageBuffer
< ArrayDatum > * 
ring_or_line_sizes_column_buffer_
 
TypedParquetStorageBuffer
< ArrayDatum > * 
poly_rings_column_buffer_
 
AbstractBufferrender_group_column_buffer_
 

Additional Inherited Members

- Protected Types inherited from foreign_storage::GeospatialEncoder
enum  GeoColumnType {
  COORDS, BOUNDS, RING_OR_LINE_SIZES, POLY_RINGS,
  RENDER_GROUP
}
 
- Protected Member Functions inherited from foreign_storage::GeospatialEncoder
void appendBaseAndRenderGroupDataAndUpdateMetadata (const int64_t row_count)
 
void validateChunksSizing (std::list< Chunk_NS::Chunk > &chunks) const
 
void validateMetadataSizing (std::list< std::unique_ptr< ChunkMetadata >> &chunk_metadata) const
 
void appendArrayDatumsToBufferAndUpdateMetadata ()
 
void appendToArrayEncoderAndUpdateMetadata (const std::vector< ArrayDatum > &datum_parse_buffer, Encoder *encoder, ChunkMetadata *chunk_metadata) const
 
void processGeoElement (std::string_view geo_string_view)
 
void processNullGeoElement ()
 
void clearParseBuffers ()
 
void clearDatumBuffers ()
 
template<typename T >
std::list< T >::iterator getIteratorForGeoColumnType (std::list< T > &list, const SQLTypes column_type, const GeoColumnType geo_column)
 
std::tuple< Encoder
*, ChunkMetadata *, const
ColumnDescriptor * > 
initEncoderAndGetEncoderAndMetadata (std::list< Chunk_NS::Chunk > &chunks, std::list< std::unique_ptr< ChunkMetadata >> &chunk_metadata, const SQLTypes sql_type, GeoColumnType geo_column_type)
 
const ColumnDescriptorgetColumnDescriptor (std::list< Chunk_NS::Chunk > &chunks, const SQLTypes sql_type, GeoColumnType geo_column_type)
 
bool hasBoundsColumn () const
 
bool hasRingOrLineSizesColumn () const
 
bool hasRenderGroupColumn () const
 
bool hasPolyRingsColumn () const
 
- Static Protected Member Functions inherited from foreign_storage::ParquetEncoder
static std::shared_ptr
< ChunkMetadata
createMetadata (const SQLTypeInfo &column_type)
 
static void throwNotNullViolation (const std::string &parquet_column_name)
 
static void validateNullCount (const std::string &parquet_column_name, int64_t null_count, const SQLTypeInfo &column_type)
 
- Static Protected Member Functions inherited from foreign_storage::GeospatialEncoder
static void throwMalformedGeoElement (const std::string &omnisci_column_name)
 
static void throwMismatchedGeoElement (const std::string &omnisci_column_name)
 
- Protected Attributes inherited from foreign_storage::ParquetEncoder
Data_Namespace::AbstractBufferbuffer_
 
bool is_error_tracking_enabled_
 
RejectedRowIndices invalid_indices_
 
size_t current_chunk_offset_
 
SQLTypeInfo column_type_
 
bool validate_metadata_stats_
 
- Protected Attributes inherited from foreign_storage::GeospatialEncoder
const ColumnDescriptorgeo_column_descriptor_
 
StringNoneEncoderbase_column_encoder_
 
Encodercoords_column_encoder_
 
Encoderbounds_column_encoder_
 
Encoderring_or_line_sizes_column_encoder_
 
Encoderpoly_rings_column_encoder_
 
Encoderrender_group_column_encoder_
 
ChunkMetadatabase_column_metadata_
 
ChunkMetadatacoords_column_metadata_
 
ChunkMetadatabounds_column_metadata_
 
ChunkMetadataring_or_line_sizes_column_metadata_
 
ChunkMetadatapoly_rings_column_metadata_
 
ChunkMetadatarender_group_column_metadata_
 
const ColumnDescriptorcoords_column_descriptor_
 
const ColumnDescriptorbounds_column_descriptor_
 
const ColumnDescriptorring_or_line_sizes_column_descriptor_
 
const ColumnDescriptorpoly_rings_column_descriptor_
 
const ColumnDescriptorrender_group_column_descriptor_
 
std::vector< std::string > base_values_
 
std::vector< double > coords_parse_buffer_
 
std::vector< double > bounds_parse_buffer_
 
std::vector< int > ring_or_line_sizes_parse_buffer_
 
std::vector< int > poly_rings_parse_buffer_
 
std::vector< ArrayDatumcoords_datum_buffer_
 
std::vector< ArrayDatumbounds_datum_buffer_
 
std::vector< ArrayDatumring_or_line_sizes_datum_buffer_
 
std::vector< ArrayDatumpoly_rings_datum_buffer_
 
std::vector< int32_t > render_group_value_buffer_
 
const RenderGroupAnalyzerMaprender_group_analyzer_map_
 
- Static Protected Attributes inherited from foreign_storage::GeospatialEncoder
static constexpr bool PROMOTE_POLYGON_TO_MULTIPOLYGON = true
 

Detailed Description

Definition at line 27 of file ParquetGeospatialImportEncoder.h.

Constructor & Destructor Documentation

foreign_storage::ParquetGeospatialImportEncoder::ParquetGeospatialImportEncoder ( )
inline

Definition at line 31 of file ParquetGeospatialImportEncoder.h.

32  : ParquetEncoder(nullptr)
33  , GeospatialEncoder(nullptr)
35  , invalid_indices_(nullptr) {}
ParquetEncoder(Data_Namespace::AbstractBuffer *buffer)
GeospatialEncoder(const RenderGroupAnalyzerMap *render_group_analyzer_map)
foreign_storage::ParquetGeospatialImportEncoder::ParquetGeospatialImportEncoder ( std::list< Chunk_NS::Chunk > &  chunks)
inline

Definition at line 37 of file ParquetGeospatialImportEncoder.h.

References base_column_buffer_, foreign_storage::GeospatialEncoder::BOUNDS, bounds_column_buffer_, CHECK, ColumnDescriptor::columnType, foreign_storage::GeospatialEncoder::COORDS, coords_column_buffer_, foreign_storage::GeospatialEncoder::geo_column_descriptor_, SQLTypeInfo::get_type(), getBuffer(), foreign_storage::GeospatialEncoder::hasBoundsColumn(), foreign_storage::GeospatialEncoder::hasPolyRingsColumn(), foreign_storage::GeospatialEncoder::hasRenderGroupColumn(), foreign_storage::GeospatialEncoder::hasRingOrLineSizesColumn(), SQLTypeInfo::is_geometry(), foreign_storage::GeospatialEncoder::POLY_RINGS, poly_rings_column_buffer_, foreign_storage::GeospatialEncoder::RENDER_GROUP, render_group_column_buffer_, foreign_storage::GeospatialEncoder::RING_OR_LINE_SIZES, and ring_or_line_sizes_column_buffer_.

38  : ParquetEncoder(nullptr)
39  , GeospatialEncoder(chunks, nullptr)
41  , invalid_indices_(nullptr)
42  , base_column_buffer_(nullptr)
43  , coords_column_buffer_(nullptr)
44  , bounds_column_buffer_(nullptr)
46  , poly_rings_column_buffer_(nullptr)
47  , render_group_column_buffer_(nullptr) {
49 
50  const auto geo_column_type = geo_column_descriptor_->columnType.get_type();
51 
52  base_column_buffer_ = dynamic_cast<TypedParquetStorageBuffer<std::string>*>(
53  chunks.begin()->getBuffer());
55 
56  // initialize coords column
57  coords_column_buffer_ = dynamic_cast<TypedParquetStorageBuffer<ArrayDatum>*>(
58  getBuffer(chunks, geo_column_type, COORDS));
60 
61  // initialize bounds column
62  if (hasBoundsColumn()) {
63  bounds_column_buffer_ = dynamic_cast<TypedParquetStorageBuffer<ArrayDatum>*>(
64  getBuffer(chunks, geo_column_type, BOUNDS));
66  }
67 
68  // initialize ring sizes column & render group column
71  dynamic_cast<TypedParquetStorageBuffer<ArrayDatum>*>(
72  getBuffer(chunks, geo_column_type, RING_OR_LINE_SIZES));
74  }
75  if (hasRenderGroupColumn()) {
76  render_group_column_buffer_ = getBuffer(chunks, geo_column_type, RENDER_GROUP);
78  }
79 
80  // initialize poly rings column
81  if (hasPolyRingsColumn()) {
82  poly_rings_column_buffer_ = dynamic_cast<TypedParquetStorageBuffer<ArrayDatum>*>(
83  getBuffer(chunks, geo_column_type, POLY_RINGS));
85  }
86  }
AbstractBuffer * getBuffer(std::list< Chunk_NS::Chunk > &chunks, const SQLTypes sql_type, GeoColumnType geo_column_type)
ParquetEncoder(Data_Namespace::AbstractBuffer *buffer)
TypedParquetStorageBuffer< ArrayDatum > * coords_column_buffer_
TypedParquetStorageBuffer< std::string > * base_column_buffer_
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:380
GeospatialEncoder(const RenderGroupAnalyzerMap *render_group_analyzer_map)
#define CHECK(condition)
Definition: Logger.h:289
bool is_geometry() const
Definition: sqltypes.h:588
TypedParquetStorageBuffer< ArrayDatum > * ring_or_line_sizes_column_buffer_
const ColumnDescriptor * geo_column_descriptor_
SQLTypeInfo columnType
TypedParquetStorageBuffer< ArrayDatum > * bounds_column_buffer_
TypedParquetStorageBuffer< ArrayDatum > * poly_rings_column_buffer_

+ Here is the call graph for this function:

Member Function Documentation

void foreign_storage::ParquetGeospatialImportEncoder::appendArrayDatumsIfApplicable ( TypedParquetStorageBuffer< ArrayDatum > *  column_buffer,
const std::vector< ArrayDatum > &  datum_buffer 
)
inlineprivate

Definition at line 168 of file ParquetGeospatialImportEncoder.h.

References foreign_storage::TypedParquetStorageBuffer< Type >::appendElement(), and CHECK.

Referenced by appendArrayDatumsToBuffer().

169  {
170  if (column_buffer) {
171  for (const auto& datum : datum_buffer) {
172  column_buffer->appendElement(datum);
173  }
174  } else {
175  CHECK(datum_buffer.empty());
176  }
177  }
#define CHECK(condition)
Definition: Logger.h:289

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::ParquetGeospatialImportEncoder::appendArrayDatumsToBuffer ( )
inlineprivate

Definition at line 179 of file ParquetGeospatialImportEncoder.h.

References appendArrayDatumsIfApplicable(), bounds_column_buffer_, foreign_storage::GeospatialEncoder::bounds_datum_buffer_, coords_column_buffer_, foreign_storage::GeospatialEncoder::coords_datum_buffer_, poly_rings_column_buffer_, foreign_storage::GeospatialEncoder::poly_rings_datum_buffer_, ring_or_line_sizes_column_buffer_, and foreign_storage::GeospatialEncoder::ring_or_line_sizes_datum_buffer_.

Referenced by appendData().

179  {
185  }
std::vector< ArrayDatum > coords_datum_buffer_
std::vector< ArrayDatum > ring_or_line_sizes_datum_buffer_
TypedParquetStorageBuffer< ArrayDatum > * coords_column_buffer_
std::vector< ArrayDatum > bounds_datum_buffer_
void appendArrayDatumsIfApplicable(TypedParquetStorageBuffer< ArrayDatum > *column_buffer, const std::vector< ArrayDatum > &datum_buffer)
TypedParquetStorageBuffer< ArrayDatum > * ring_or_line_sizes_column_buffer_
TypedParquetStorageBuffer< ArrayDatum > * bounds_column_buffer_
std::vector< ArrayDatum > poly_rings_datum_buffer_
TypedParquetStorageBuffer< ArrayDatum > * poly_rings_column_buffer_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::ParquetGeospatialImportEncoder::appendBaseAndRenderGroupData ( const int64_t  row_count)
inlineprivate

Definition at line 187 of file ParquetGeospatialImportEncoder.h.

References Data_Namespace::AbstractBuffer::append(), foreign_storage::TypedParquetStorageBuffer< Type >::appendElement(), base_column_buffer_, render_group_column_buffer_, and foreign_storage::GeospatialEncoder::render_group_value_buffer_.

Referenced by appendData().

187  {
188  for (int64_t i = 0; i < row_count; ++i) {
190  }
192  auto data_ptr = reinterpret_cast<int8_t*>(render_group_value_buffer_.data());
193  render_group_column_buffer_->append(data_ptr, sizeof(int32_t) * row_count);
194  }
195  }
TypedParquetStorageBuffer< std::string > * base_column_buffer_
std::vector< int32_t > render_group_value_buffer_
virtual void append(int8_t *src, const size_t num_bytes, const MemoryLevel src_buffer_type=CPU_LEVEL, const int device_id=-1)=0

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::ParquetGeospatialImportEncoder::appendData ( const int16_t *  def_levels,
const int16_t *  rep_levels,
const int64_t  values_read,
const int64_t  levels_read,
int8_t *  values 
)
inlineoverridevirtual

add null if failed

Implements foreign_storage::ParquetEncoder.

Definition at line 122 of file ParquetGeospatialImportEncoder.h.

References appendArrayDatumsToBuffer(), appendBaseAndRenderGroupData(), CHECK, foreign_storage::GeospatialEncoder::clearDatumBuffers(), foreign_storage::GeospatialEncoder::clearParseBuffers(), current_batch_offset_, invalid_indices_, foreign_storage::GeospatialEncoder::processGeoElement(), and foreign_storage::GeospatialEncoder::processNullGeoElement().

Referenced by validateAndAppendData().

126  {
127  auto parquet_data_ptr = reinterpret_cast<const parquet::ByteArray*>(values);
128 
130 
131  for (int64_t i = 0, j = 0; i < levels_read; ++i) {
133  if (def_levels[i] == 0) {
135  } else {
136  CHECK(j < values_read);
137  auto& byte_array = parquet_data_ptr[j++];
138  auto geo_string_view = std::string_view{
139  reinterpret_cast<const char*>(byte_array.ptr), byte_array.len};
140  try {
141  processGeoElement(geo_string_view);
142  } catch (const std::runtime_error& error) {
148  }
149  }
150  }
151 
153 
154  appendBaseAndRenderGroupData(levels_read);
155 
156  current_batch_offset_ += levels_read;
157  }
void processGeoElement(std::string_view geo_string_view)
#define CHECK(condition)
Definition: Logger.h:289

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::ParquetGeospatialImportEncoder::appendDataTrackErrors ( const int16_t *  def_levels,
const int16_t *  rep_levels,
const int64_t  values_read,
const int64_t  levels_read,
int8_t *  values 
)
inlineoverridevirtual

Implements foreign_storage::ParquetEncoder.

Definition at line 159 of file ParquetGeospatialImportEncoder.h.

References UNREACHABLE.

163  {
164  UNREACHABLE() << "unexpected call to appendDataTrackErrors from unsupported encoder";
165  }
#define UNREACHABLE()
Definition: Logger.h:333
void foreign_storage::ParquetGeospatialImportEncoder::eraseInvalidIndicesInBuffer ( const InvalidRowGroupIndices invalid_indices)
inlineoverridevirtual

Implements foreign_storage::ParquetImportEncoder.

Definition at line 99 of file ParquetGeospatialImportEncoder.h.

References base_column_buffer_, bounds_column_buffer_, coords_column_buffer_, foreign_storage::TypedParquetStorageBuffer< Type >::eraseInvalidData(), foreign_storage::GeospatialEncoder::hasBoundsColumn(), foreign_storage::GeospatialEncoder::hasPolyRingsColumn(), foreign_storage::GeospatialEncoder::hasRenderGroupColumn(), foreign_storage::GeospatialEncoder::hasRingOrLineSizesColumn(), poly_rings_column_buffer_, render_group_column_buffer_, ring_or_line_sizes_column_buffer_, Data_Namespace::AbstractBuffer::setSize(), and Data_Namespace::AbstractBuffer::size().

100  {
101  if (invalid_indices.empty()) {
102  return;
103  }
104  base_column_buffer_->eraseInvalidData(invalid_indices);
105  coords_column_buffer_->eraseInvalidData(invalid_indices);
106  if (hasBoundsColumn()) {
107  bounds_column_buffer_->eraseInvalidData(invalid_indices);
108  }
109  if (hasRingOrLineSizesColumn()) {
111  }
112  if (hasPolyRingsColumn()) {
114  }
115  if (hasRenderGroupColumn()) {
117  sizeof(int32_t) *
118  (render_group_column_buffer_->size() - invalid_indices.size()));
119  }
120  }
void eraseInvalidData(const FindContainer &invalid_indices)
TypedParquetStorageBuffer< ArrayDatum > * coords_column_buffer_
TypedParquetStorageBuffer< std::string > * base_column_buffer_
void setSize(const size_t size)
TypedParquetStorageBuffer< ArrayDatum > * ring_or_line_sizes_column_buffer_
TypedParquetStorageBuffer< ArrayDatum > * bounds_column_buffer_
TypedParquetStorageBuffer< ArrayDatum > * poly_rings_column_buffer_

+ Here is the call graph for this function:

AbstractBuffer* foreign_storage::ParquetGeospatialImportEncoder::getBuffer ( std::list< Chunk_NS::Chunk > &  chunks,
const SQLTypes  sql_type,
GeoColumnType  geo_column_type 
)
inlineprivate

Definition at line 197 of file ParquetGeospatialImportEncoder.h.

References foreign_storage::GeospatialEncoder::getIteratorForGeoColumnType().

Referenced by ParquetGeospatialImportEncoder().

199  {
200  auto chunk = getIteratorForGeoColumnType(chunks, sql_type, geo_column_type);
201  auto buffer = chunk->getBuffer();
202  return buffer;
203  }
std::list< T >::iterator getIteratorForGeoColumnType(std::list< T > &list, const SQLTypes column_type, const GeoColumnType geo_column)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::ParquetGeospatialImportEncoder::validateAndAppendData ( const int16_t *  def_levels,
const int16_t *  rep_levels,
const int64_t  values_read,
const int64_t  levels_read,
int8_t *  values,
const SQLTypeInfo column_type,
InvalidRowGroupIndices invalid_indices 
)
inlineoverridevirtual

Implements foreign_storage::ParquetImportEncoder.

Definition at line 88 of file ParquetGeospatialImportEncoder.h.

References appendData(), and invalid_indices_.

94  {
95  invalid_indices_ = &invalid_indices; // used in assembly algorithm
96  appendData(def_levels, rep_levels, values_read, levels_read, values);
97  }
void appendData(const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values) override

+ Here is the call graph for this function:

Member Data Documentation

TypedParquetStorageBuffer<std::string>* foreign_storage::ParquetGeospatialImportEncoder::base_column_buffer_
private
TypedParquetStorageBuffer<ArrayDatum>* foreign_storage::ParquetGeospatialImportEncoder::bounds_column_buffer_
private
TypedParquetStorageBuffer<ArrayDatum>* foreign_storage::ParquetGeospatialImportEncoder::coords_column_buffer_
private
int64_t foreign_storage::ParquetGeospatialImportEncoder::current_batch_offset_
private

Definition at line 205 of file ParquetGeospatialImportEncoder.h.

Referenced by appendData().

InvalidRowGroupIndices* foreign_storage::ParquetGeospatialImportEncoder::invalid_indices_
private

Definition at line 206 of file ParquetGeospatialImportEncoder.h.

Referenced by appendData(), and validateAndAppendData().

TypedParquetStorageBuffer<ArrayDatum>* foreign_storage::ParquetGeospatialImportEncoder::poly_rings_column_buffer_
private
AbstractBuffer* foreign_storage::ParquetGeospatialImportEncoder::render_group_column_buffer_
private
TypedParquetStorageBuffer<ArrayDatum>* foreign_storage::ParquetGeospatialImportEncoder::ring_or_line_sizes_column_buffer_
private

The documentation for this class was generated from the following file: