19 #include <parquet/schema.h>
20 #include <parquet/types.h>
25 namespace foreign_storage {
53 chunks.begin()->getBuffer());
88 const int16_t* rep_levels,
89 const int64_t values_read,
90 const int64_t levels_read,
95 appendData(def_levels, rep_levels, values_read, levels_read, values);
100 if (invalid_indices.empty()) {
122 const int16_t* rep_levels,
123 const int64_t values_read,
124 const int64_t levels_read,
125 int8_t* values)
override {
126 auto parquet_data_ptr =
reinterpret_cast<const parquet::ByteArray*
>(values);
130 for (int64_t i = 0, j = 0; i < levels_read; ++i) {
132 if (def_levels[i] == 0) {
135 CHECK(j < values_read);
136 auto& byte_array = parquet_data_ptr[j++];
137 auto geo_string_view = std::string_view{
138 reinterpret_cast<const char*
>(byte_array.ptr), byte_array.len};
141 }
catch (
const std::runtime_error& error) {
159 const int16_t* rep_levels,
160 const int64_t values_read,
161 const int64_t levels_read,
162 int8_t* values)
override {
163 UNREACHABLE() <<
"unexpected call to appendDataTrackErrors from unsupported encoder";
168 const std::vector<ArrayDatum>& datum_buffer) {
170 for (
const auto& datum : datum_buffer) {
174 CHECK(datum_buffer.empty());
186 for (int64_t i = 0; i < row_count; ++i) {
199 auto buffer = chunk->getBuffer();
AbstractBuffer * getBuffer(std::list< Chunk_NS::Chunk > &chunks, const SQLTypes sql_type, GeoColumnType geo_column_type)
void eraseInvalidData(const FindContainer &invalid_indices)
int64_t current_batch_offset_
void validateAndAppendData(const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values, const SQLTypeInfo &column_type, InvalidRowGroupIndices &invalid_indices) override
void processNullGeoElement()
void appendArrayDatumsToBuffer()
std::vector< ArrayDatum > coords_datum_buffer_
void appendDataTrackErrors(const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values) override
TypedParquetStorageBuffer< ArrayDatum > * coords_column_buffer_
void appendData(const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values) override
TypedParquetStorageBuffer< std::string > * base_column_buffer_
bool hasRenderGroupColumn() const
std::vector< ArrayDatum > bounds_datum_buffer_
HOST DEVICE SQLTypes get_type() const
void appendArrayDatumsIfApplicable(TypedParquetStorageBuffer< ArrayDatum > *column_buffer, const std::vector< ArrayDatum > &datum_buffer)
std::vector< int32_t > render_group_value_buffer_
AbstractBuffer * render_group_column_buffer_
std::set< int64_t > InvalidRowGroupIndices
ParquetGeospatialImportEncoder()
An AbstractBuffer is a unit of data management for a data manager.
bool hasBoundsColumn() const
void processGeoElement(std::string_view geo_string_view)
std::vector< ArrayDatum > ring_sizes_datum_buffer_
void appendBaseAndRenderGroupData(const int64_t row_count)
ParquetGeospatialImportEncoder(std::list< Chunk_NS::Chunk > &chunks)
virtual void append(int8_t *src, const size_t num_bytes, const MemoryLevel src_buffer_type=CPU_LEVEL, const int device_id=-1)=0
void setSize(const size_t size)
bool hasPolyRingsColumn() const
const ColumnDescriptor * geo_column_descriptor_
TypedParquetStorageBuffer< ArrayDatum > * bounds_column_buffer_
std::vector< ArrayDatum > poly_rings_datum_buffer_
TypedParquetStorageBuffer< ArrayDatum > * poly_rings_column_buffer_
void appendElement(const Type &element)
void eraseInvalidIndicesInBuffer(const InvalidRowGroupIndices &invalid_indices) override
std::list< T >::iterator getIteratorForGeoColumnType(std::list< T > &list, const SQLTypes column_type, const GeoColumnType geo_column)
bool hasRingSizesColumn() const
InvalidRowGroupIndices * invalid_indices_
TypedParquetStorageBuffer< ArrayDatum > * ring_sizes_column_buffer_