OmniSciDB  471d68cefb
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
foreign_storage::ParquetGeospatialImportEncoder Class Reference

#include <ParquetGeospatialImportEncoder.h>

+ Inheritance diagram for foreign_storage::ParquetGeospatialImportEncoder:
+ Collaboration diagram for foreign_storage::ParquetGeospatialImportEncoder:

Public Member Functions

 ParquetGeospatialImportEncoder ()
 
 ParquetGeospatialImportEncoder (std::list< Chunk_NS::Chunk > &chunks)
 
void validateAndAppendData (const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values, const SQLTypeInfo &column_type, InvalidRowGroupIndices &invalid_indices) override
 
void eraseInvalidIndicesInBuffer (const InvalidRowGroupIndices &invalid_indices) override
 
void appendData (const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values) override
 
- Public Member Functions inherited from foreign_storage::ParquetEncoder
 ParquetEncoder (Data_Namespace::AbstractBuffer *buffer)
 
virtual ~ParquetEncoder ()=default
 
virtual std::shared_ptr
< ChunkMetadata
getRowGroupMetadata (const parquet::RowGroupMetaData *group_metadata, const int parquet_column_index, const SQLTypeInfo &column_type)
 
- Public Member Functions inherited from foreign_storage::GeospatialEncoder
virtual ~GeospatialEncoder ()=default
 
 GeospatialEncoder ()
 
 GeospatialEncoder (std::list< Chunk_NS::Chunk > &chunks)
 
 GeospatialEncoder (std::list< Chunk_NS::Chunk > &chunks, std::list< std::unique_ptr< ChunkMetadata >> &chunk_metadata)
 

Private Member Functions

void appendArrayDatumsIfApplicable (TypedParquetStorageBuffer< ArrayDatum > *column_buffer, const std::vector< ArrayDatum > &datum_buffer)
 
void appendArrayDatumsToBuffer ()
 
void appendBaseAndRenderGroupData (const int64_t row_count)
 
AbstractBuffergetBuffer (std::list< Chunk_NS::Chunk > &chunks, const SQLTypes sql_type, GeoColumnType geo_column_type)
 

Private Attributes

int64_t current_batch_offset_
 
InvalidRowGroupIndicesinvalid_indices_
 
TypedParquetStorageBuffer
< std::string > * 
base_column_buffer_
 
TypedParquetStorageBuffer
< ArrayDatum > * 
coords_column_buffer_
 
TypedParquetStorageBuffer
< ArrayDatum > * 
bounds_column_buffer_
 
TypedParquetStorageBuffer
< ArrayDatum > * 
ring_sizes_column_buffer_
 
TypedParquetStorageBuffer
< ArrayDatum > * 
poly_rings_column_buffer_
 
AbstractBufferrender_group_column_buffer_
 

Additional Inherited Members

- Protected Types inherited from foreign_storage::GeospatialEncoder
enum  GeoColumnType {
  COORDS, BOUNDS, RING_SIZES, POLY_RINGS,
  RENDER_GROUP
}
 
- Protected Member Functions inherited from foreign_storage::GeospatialEncoder
void appendBaseAndRenderGroupDataAndUpdateMetadata (const int64_t row_count)
 
void validateChunksSizing (std::list< Chunk_NS::Chunk > &chunks) const
 
void validateMetadataSizing (std::list< std::unique_ptr< ChunkMetadata >> &chunk_metadata) const
 
void appendArrayDatumsToBufferAndUpdateMetadata ()
 
void appendToArrayEncoderAndUpdateMetadata (const std::vector< ArrayDatum > &datum_parse_buffer, Encoder *encoder, ChunkMetadata *chunk_metadata) const
 
void processGeoElement (std::string_view geo_string_view)
 
void processNullGeoElement ()
 
void clearParseBuffers ()
 
void clearDatumBuffers ()
 
template<typename T >
std::list< T >::iterator getIteratorForGeoColumnType (std::list< T > &list, const SQLTypes column_type, const GeoColumnType geo_column)
 
std::tuple< Encoder
*, ChunkMetadata *, const
ColumnDescriptor * > 
initEncoderAndGetEncoderAndMetadata (std::list< Chunk_NS::Chunk > &chunks, std::list< std::unique_ptr< ChunkMetadata >> &chunk_metadata, const SQLTypes sql_type, GeoColumnType geo_column_type)
 
const ColumnDescriptorgetColumnDescriptor (std::list< Chunk_NS::Chunk > &chunks, const SQLTypes sql_type, GeoColumnType geo_column_type)
 
bool hasBoundsColumn () const
 
bool hasRingSizesColumn () const
 
bool hasRenderGroupColumn () const
 
bool hasPolyRingsColumn () const
 
- Static Protected Member Functions inherited from foreign_storage::ParquetEncoder
static std::shared_ptr
< ChunkMetadata
createMetadata (const SQLTypeInfo &column_type)
 
static void throwNotNullViolation (const std::string &parquet_column_name)
 
static void validateNullCount (const std::string &parquet_column_name, int64_t null_count, const SQLTypeInfo &column_type)
 
- Static Protected Member Functions inherited from foreign_storage::GeospatialEncoder
static void throwMalformedGeoElement (const std::string &omnisci_column_name)
 
static void throwMismatchedGeoElement (const std::string &omnisci_column_name)
 
- Protected Attributes inherited from foreign_storage::ParquetEncoder
Data_Namespace::AbstractBufferbuffer_
 
- Protected Attributes inherited from foreign_storage::GeospatialEncoder
const ColumnDescriptorgeo_column_descriptor_
 
StringNoneEncoderbase_column_encoder_
 
Encodercoords_column_encoder_
 
Encoderbounds_column_encoder_
 
Encoderring_sizes_column_encoder_
 
Encoderpoly_rings_column_encoder_
 
Encoderrender_group_column_encoder_
 
ChunkMetadatabase_column_metadata_
 
ChunkMetadatacoords_column_metadata_
 
ChunkMetadatabounds_column_metadata_
 
ChunkMetadataring_sizes_column_metadata_
 
ChunkMetadatapoly_rings_column_metadata_
 
ChunkMetadatarender_group_column_metadata_
 
const ColumnDescriptorcoords_column_descriptor_
 
const ColumnDescriptorbounds_column_descriptor_
 
const ColumnDescriptorring_sizes_column_descriptor_
 
const ColumnDescriptorpoly_rings_column_descriptor_
 
const ColumnDescriptorrender_group_column_descriptor_
 
std::vector< int32_t > render_group_values_
 
std::vector< std::string > base_values_
 
std::vector< double > coords_parse_buffer_
 
std::vector< double > bounds_parse_buffer_
 
std::vector< int > ring_sizes_parse_buffer_
 
std::vector< int > poly_rings_parse_buffer_
 
std::vector< ArrayDatumcoords_datum_buffer_
 
std::vector< ArrayDatumbounds_datum_buffer_
 
std::vector< ArrayDatumring_sizes_datum_buffer_
 
std::vector< ArrayDatumpoly_rings_datum_buffer_
 
- Static Protected Attributes inherited from foreign_storage::GeospatialEncoder
static constexpr bool PROMOTE_POLYGON_TO_MULTIPOLYGON = true
 

Detailed Description

Definition at line 27 of file ParquetGeospatialImportEncoder.h.

Constructor & Destructor Documentation

foreign_storage::ParquetGeospatialImportEncoder::ParquetGeospatialImportEncoder ( )
inline
foreign_storage::ParquetGeospatialImportEncoder::ParquetGeospatialImportEncoder ( std::list< Chunk_NS::Chunk > &  chunks)
inline

Definition at line 37 of file ParquetGeospatialImportEncoder.h.

References base_column_buffer_, foreign_storage::GeospatialEncoder::BOUNDS, bounds_column_buffer_, CHECK, ColumnDescriptor::columnType, foreign_storage::GeospatialEncoder::COORDS, coords_column_buffer_, foreign_storage::GeospatialEncoder::geo_column_descriptor_, SQLTypeInfo::get_type(), getBuffer(), foreign_storage::GeospatialEncoder::hasBoundsColumn(), foreign_storage::GeospatialEncoder::hasPolyRingsColumn(), foreign_storage::GeospatialEncoder::hasRenderGroupColumn(), foreign_storage::GeospatialEncoder::hasRingSizesColumn(), SQLTypeInfo::is_geometry(), foreign_storage::GeospatialEncoder::POLY_RINGS, poly_rings_column_buffer_, foreign_storage::GeospatialEncoder::RENDER_GROUP, render_group_column_buffer_, foreign_storage::GeospatialEncoder::RING_SIZES, and ring_sizes_column_buffer_.

38  : ParquetEncoder(nullptr)
39  , GeospatialEncoder(chunks)
41  , invalid_indices_(nullptr)
42  , base_column_buffer_(nullptr)
43  , coords_column_buffer_(nullptr)
44  , bounds_column_buffer_(nullptr)
45  , ring_sizes_column_buffer_(nullptr)
46  , poly_rings_column_buffer_(nullptr)
47  , render_group_column_buffer_(nullptr) {
49 
50  const auto geo_column_type = geo_column_descriptor_->columnType.get_type();
51 
52  base_column_buffer_ = dynamic_cast<TypedParquetStorageBuffer<std::string>*>(
53  chunks.begin()->getBuffer());
55 
56  // initialize coords column
57  coords_column_buffer_ = dynamic_cast<TypedParquetStorageBuffer<ArrayDatum>*>(
58  getBuffer(chunks, geo_column_type, COORDS));
60 
61  // initialize bounds column
62  if (hasBoundsColumn()) {
63  bounds_column_buffer_ = dynamic_cast<TypedParquetStorageBuffer<ArrayDatum>*>(
64  getBuffer(chunks, geo_column_type, BOUNDS));
66  }
67 
68  // initialize ring sizes column & render group column
69  if (hasRingSizesColumn()) {
70  ring_sizes_column_buffer_ = dynamic_cast<TypedParquetStorageBuffer<ArrayDatum>*>(
71  getBuffer(chunks, geo_column_type, RING_SIZES));
73  }
74  if (hasRenderGroupColumn()) {
75  render_group_column_buffer_ = getBuffer(chunks, geo_column_type, RENDER_GROUP);
77  }
78 
79  // initialize poly rings column
80  if (hasPolyRingsColumn()) {
81  poly_rings_column_buffer_ = dynamic_cast<TypedParquetStorageBuffer<ArrayDatum>*>(
82  getBuffer(chunks, geo_column_type, POLY_RINGS));
84  }
85  }
AbstractBuffer * getBuffer(std::list< Chunk_NS::Chunk > &chunks, const SQLTypes sql_type, GeoColumnType geo_column_type)
ParquetEncoder(Data_Namespace::AbstractBuffer *buffer)
TypedParquetStorageBuffer< ArrayDatum > * coords_column_buffer_
TypedParquetStorageBuffer< std::string > * base_column_buffer_
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:329
#define CHECK(condition)
Definition: Logger.h:209
bool is_geometry() const
Definition: sqltypes.h:521
const ColumnDescriptor * geo_column_descriptor_
SQLTypeInfo columnType
TypedParquetStorageBuffer< ArrayDatum > * bounds_column_buffer_
TypedParquetStorageBuffer< ArrayDatum > * poly_rings_column_buffer_
TypedParquetStorageBuffer< ArrayDatum > * ring_sizes_column_buffer_

+ Here is the call graph for this function:

Member Function Documentation

void foreign_storage::ParquetGeospatialImportEncoder::appendArrayDatumsIfApplicable ( TypedParquetStorageBuffer< ArrayDatum > *  column_buffer,
const std::vector< ArrayDatum > &  datum_buffer 
)
inlineprivate

Definition at line 159 of file ParquetGeospatialImportEncoder.h.

References foreign_storage::TypedParquetStorageBuffer< Type >::appendElement(), and CHECK.

Referenced by appendArrayDatumsToBuffer().

160  {
161  if (column_buffer) {
162  for (const auto& datum : datum_buffer) {
163  column_buffer->appendElement(datum);
164  }
165  } else {
166  CHECK(datum_buffer.empty());
167  }
168  }
#define CHECK(condition)
Definition: Logger.h:209

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::ParquetGeospatialImportEncoder::appendArrayDatumsToBuffer ( )
inlineprivate

Definition at line 170 of file ParquetGeospatialImportEncoder.h.

References appendArrayDatumsIfApplicable(), bounds_column_buffer_, foreign_storage::GeospatialEncoder::bounds_datum_buffer_, coords_column_buffer_, foreign_storage::GeospatialEncoder::coords_datum_buffer_, poly_rings_column_buffer_, foreign_storage::GeospatialEncoder::poly_rings_datum_buffer_, ring_sizes_column_buffer_, and foreign_storage::GeospatialEncoder::ring_sizes_datum_buffer_.

Referenced by appendData().

170  {
175  }
std::vector< ArrayDatum > coords_datum_buffer_
TypedParquetStorageBuffer< ArrayDatum > * coords_column_buffer_
std::vector< ArrayDatum > bounds_datum_buffer_
void appendArrayDatumsIfApplicable(TypedParquetStorageBuffer< ArrayDatum > *column_buffer, const std::vector< ArrayDatum > &datum_buffer)
std::vector< ArrayDatum > ring_sizes_datum_buffer_
TypedParquetStorageBuffer< ArrayDatum > * bounds_column_buffer_
std::vector< ArrayDatum > poly_rings_datum_buffer_
TypedParquetStorageBuffer< ArrayDatum > * poly_rings_column_buffer_
TypedParquetStorageBuffer< ArrayDatum > * ring_sizes_column_buffer_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::ParquetGeospatialImportEncoder::appendBaseAndRenderGroupData ( const int64_t  row_count)
inlineprivate

Definition at line 177 of file ParquetGeospatialImportEncoder.h.

References Data_Namespace::AbstractBuffer::append(), foreign_storage::TypedParquetStorageBuffer< Type >::appendElement(), base_column_buffer_, i, render_group_column_buffer_, and foreign_storage::GeospatialEncoder::render_group_values_.

Referenced by appendData().

177  {
178  for (int64_t i = 0; i < row_count; ++i) {
180  }
182  render_group_values_.resize(row_count, 0);
183  auto data_ptr = reinterpret_cast<int8_t*>(render_group_values_.data());
184  render_group_column_buffer_->append(data_ptr, sizeof(int32_t) * row_count);
185  }
186  }
TypedParquetStorageBuffer< std::string > * base_column_buffer_
virtual void append(int8_t *src, const size_t num_bytes, const MemoryLevel src_buffer_type=CPU_LEVEL, const int device_id=-1)=0
std::vector< int32_t > render_group_values_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::ParquetGeospatialImportEncoder::appendData ( const int16_t *  def_levels,
const int16_t *  rep_levels,
const int64_t  values_read,
const int64_t  levels_read,
int8_t *  values 
)
inlineoverridevirtual

add null if failed

Implements foreign_storage::ParquetEncoder.

Definition at line 121 of file ParquetGeospatialImportEncoder.h.

References appendArrayDatumsToBuffer(), appendBaseAndRenderGroupData(), CHECK, foreign_storage::GeospatialEncoder::clearDatumBuffers(), foreign_storage::GeospatialEncoder::clearParseBuffers(), current_batch_offset_, i, invalid_indices_, foreign_storage::GeospatialEncoder::processGeoElement(), and foreign_storage::GeospatialEncoder::processNullGeoElement().

Referenced by validateAndAppendData().

125  {
126  auto parquet_data_ptr = reinterpret_cast<const parquet::ByteArray*>(values);
127 
129 
130  for (int64_t i = 0, j = 0; i < levels_read; ++i) {
132  if (def_levels[i] == 0) {
134  } else {
135  CHECK(j < values_read);
136  auto& byte_array = parquet_data_ptr[j++];
137  auto geo_string_view = std::string_view{
138  reinterpret_cast<const char*>(byte_array.ptr), byte_array.len};
139  try {
140  processGeoElement(geo_string_view);
141  } catch (const std::runtime_error& error) {
147  }
148  }
149  }
150 
152 
153  appendBaseAndRenderGroupData(levels_read);
154 
155  current_batch_offset_ += levels_read;
156  }
void processGeoElement(std::string_view geo_string_view)
#define CHECK(condition)
Definition: Logger.h:209

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::ParquetGeospatialImportEncoder::eraseInvalidIndicesInBuffer ( const InvalidRowGroupIndices invalid_indices)
inlineoverridevirtual

Implements foreign_storage::ParquetImportEncoder.

Definition at line 98 of file ParquetGeospatialImportEncoder.h.

References base_column_buffer_, bounds_column_buffer_, coords_column_buffer_, foreign_storage::TypedParquetStorageBuffer< Type >::eraseInvalidData(), foreign_storage::GeospatialEncoder::hasBoundsColumn(), foreign_storage::GeospatialEncoder::hasPolyRingsColumn(), foreign_storage::GeospatialEncoder::hasRenderGroupColumn(), foreign_storage::GeospatialEncoder::hasRingSizesColumn(), poly_rings_column_buffer_, render_group_column_buffer_, ring_sizes_column_buffer_, Data_Namespace::AbstractBuffer::setSize(), and Data_Namespace::AbstractBuffer::size().

99  {
100  if (invalid_indices.empty()) {
101  return;
102  }
103  base_column_buffer_->eraseInvalidData(invalid_indices);
104  coords_column_buffer_->eraseInvalidData(invalid_indices);
105  if (hasBoundsColumn()) {
106  bounds_column_buffer_->eraseInvalidData(invalid_indices);
107  }
108  if (hasRingSizesColumn()) {
110  }
111  if (hasPolyRingsColumn()) {
113  }
114  if (hasRenderGroupColumn()) {
116  sizeof(int32_t) *
117  (render_group_column_buffer_->size() - invalid_indices.size()));
118  }
119  }
void eraseInvalidData(const FindContainer &invalid_indices)
TypedParquetStorageBuffer< ArrayDatum > * coords_column_buffer_
TypedParquetStorageBuffer< std::string > * base_column_buffer_
void setSize(const size_t size)
TypedParquetStorageBuffer< ArrayDatum > * bounds_column_buffer_
TypedParquetStorageBuffer< ArrayDatum > * poly_rings_column_buffer_
TypedParquetStorageBuffer< ArrayDatum > * ring_sizes_column_buffer_

+ Here is the call graph for this function:

AbstractBuffer* foreign_storage::ParquetGeospatialImportEncoder::getBuffer ( std::list< Chunk_NS::Chunk > &  chunks,
const SQLTypes  sql_type,
GeoColumnType  geo_column_type 
)
inlineprivate

Definition at line 188 of file ParquetGeospatialImportEncoder.h.

References foreign_storage::GeospatialEncoder::getIteratorForGeoColumnType().

Referenced by ParquetGeospatialImportEncoder().

190  {
191  auto chunk = getIteratorForGeoColumnType(chunks, sql_type, geo_column_type);
192  auto buffer = chunk->getBuffer();
193  return buffer;
194  }
std::list< T >::iterator getIteratorForGeoColumnType(std::list< T > &list, const SQLTypes column_type, const GeoColumnType geo_column)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void foreign_storage::ParquetGeospatialImportEncoder::validateAndAppendData ( const int16_t *  def_levels,
const int16_t *  rep_levels,
const int64_t  values_read,
const int64_t  levels_read,
int8_t *  values,
const SQLTypeInfo column_type,
InvalidRowGroupIndices invalid_indices 
)
inlineoverridevirtual

Implements foreign_storage::ParquetImportEncoder.

Definition at line 87 of file ParquetGeospatialImportEncoder.h.

References appendData(), and invalid_indices_.

93  {
94  invalid_indices_ = &invalid_indices; // used in assembly algorithm
95  appendData(def_levels, rep_levels, values_read, levels_read, values);
96  }
void appendData(const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values) override

+ Here is the call graph for this function:

Member Data Documentation

TypedParquetStorageBuffer<std::string>* foreign_storage::ParquetGeospatialImportEncoder::base_column_buffer_
private
TypedParquetStorageBuffer<ArrayDatum>* foreign_storage::ParquetGeospatialImportEncoder::bounds_column_buffer_
private
TypedParquetStorageBuffer<ArrayDatum>* foreign_storage::ParquetGeospatialImportEncoder::coords_column_buffer_
private
int64_t foreign_storage::ParquetGeospatialImportEncoder::current_batch_offset_
private

Definition at line 196 of file ParquetGeospatialImportEncoder.h.

Referenced by appendData().

InvalidRowGroupIndices* foreign_storage::ParquetGeospatialImportEncoder::invalid_indices_
private

Definition at line 197 of file ParquetGeospatialImportEncoder.h.

Referenced by appendData(), and validateAndAppendData().

TypedParquetStorageBuffer<ArrayDatum>* foreign_storage::ParquetGeospatialImportEncoder::poly_rings_column_buffer_
private
AbstractBuffer* foreign_storage::ParquetGeospatialImportEncoder::render_group_column_buffer_
private
TypedParquetStorageBuffer<ArrayDatum>* foreign_storage::ParquetGeospatialImportEncoder::ring_sizes_column_buffer_
private

The documentation for this class was generated from the following file: