OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ParquetGeospatialEncoder.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <parquet/schema.h>
20 #include <parquet/types.h>
21 #include "GeospatialEncoder.h"
22 #include "ParquetEncoder.h"
23 
24 namespace foreign_storage {
25 
27  public:
28  ParquetGeospatialEncoder(const bool geo_validate_geometry)
29  : ParquetEncoder(nullptr), GeospatialEncoder(geo_validate_geometry) {}
30 
31  ParquetGeospatialEncoder(const parquet::ColumnDescriptor* parquet_column_descriptor,
32  std::list<Chunk_NS::Chunk>& chunks,
33  std::list<std::unique_ptr<ChunkMetadata>>& chunk_metadata,
34  const bool geo_validate_geometry)
35  : ParquetEncoder(nullptr)
36  , GeospatialEncoder(chunks, chunk_metadata, geo_validate_geometry) {}
37 
38  void appendData(const int16_t* def_levels,
39  const int16_t* rep_levels,
40  const int64_t values_read,
41  const int64_t levels_read,
42  int8_t* values) override {
43  auto parquet_data_ptr = reinterpret_cast<const parquet::ByteArray*>(values);
44 
46 
47  for (int64_t i = 0, j = 0; i < levels_read; ++i) {
49  if (def_levels[i] == 0) {
52  .get_notnull()) { // mark as invalid due to a null in a NOT NULL column
54  }
56  } else {
57  CHECK(j < values_read);
58  auto& byte_array = parquet_data_ptr[j++];
59  auto geo_string_view = std::string_view{
60  reinterpret_cast<const char*>(byte_array.ptr), byte_array.len};
61  try {
62  processGeoElement(geo_string_view);
63  } catch (const std::runtime_error& error) {
69  } else {
70  throw;
71  }
72  }
73  }
74  }
75 
77 
79 
81  current_chunk_offset_ += levels_read;
82  }
83  }
84 
85  void appendDataTrackErrors(const int16_t* def_levels,
86  const int16_t* rep_levels,
87  const int64_t values_read,
88  const int64_t levels_read,
89  int8_t* values) override {
91  // `appendData` modifies its behaviour based on the
92  // `is_error_tracking_enabled_` flag to handle this case
93  appendData(def_levels, rep_levels, values_read, levels_read, values);
94  }
95 };
96 
97 } // namespace foreign_storage
RejectedRowIndices invalid_indices_
void appendDataTrackErrors(const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values) override
void processGeoElement(std::string_view geo_string_view)
void appendData(const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values) override
#define CHECK(condition)
Definition: Logger.h:291
void appendBaseDataAndUpdateMetadata(const int64_t row_count)
ParquetGeospatialEncoder(const bool geo_validate_geometry)
ParquetGeospatialEncoder(const parquet::ColumnDescriptor *parquet_column_descriptor, std::list< Chunk_NS::Chunk > &chunks, std::list< std::unique_ptr< ChunkMetadata >> &chunk_metadata, const bool geo_validate_geometry)