23 #include <parquet/schema.h>
24 #include <parquet/types.h>
26 namespace foreign_storage {
33 std::unique_ptr<ChunkMetadata>& chunk_metadata)
38 ,
min_(std::numeric_limits<V>::max())
39 ,
max_(std::numeric_limits<V>::lowest()) {}
42 const int16_t* rep_levels,
43 const int64_t values_read,
44 const int64_t levels_read,
45 const bool is_last_batch,
46 int8_t* values)
override {
57 int8_t* omnisci_data_bytes,
58 const size_t num_elements)
override {
60 auto parquet_data_ptr =
61 reinterpret_cast<const parquet::ByteArray*
>(parquet_data_bytes);
62 auto omnisci_data_ptr =
reinterpret_cast<V*
>(omnisci_data_bytes);
63 std::vector<std::string_view> string_views;
64 string_views.reserve(num_elements);
65 for (
size_t i = 0; i < num_elements; ++i) {
66 auto& byte_array = parquet_data_ptr[i];
67 string_views.emplace_back(reinterpret_cast<const char*>(byte_array.ptr),
75 int8_t* omnisci_data_bytes)
override {
80 const parquet::RowGroupMetaData* group_metadata,
81 const int parquet_column_index,
84 group_metadata, parquet_column_index, column_type);
85 auto column_metadata = group_metadata->ColumnChunk(parquet_column_index);
87 column_metadata->num_values();
96 V* data_ptr =
reinterpret_cast<V*
>(values);
97 for (int64_t i = 0; i < values_read; ++i) {
98 min_ = std::min<V>(data_ptr[i],
min_);
99 max_ = std::max<V>(data_ptr[i],
max_);
bool encodingIsIdentityForSameTypes() const override
void updateMetadataStats(int64_t values_read, int8_t *values)
std::vector< int8_t > encode_buffer_
void copy(const int8_t *omnisci_data_bytes_source, int8_t *omnisci_data_bytes_destination) override
StringDictionary * string_dictionary_
std::shared_ptr< ChunkMetadata > getRowGroupMetadata(const parquet::RowGroupMetaData *group_metadata, const int parquet_column_index, const SQLTypeInfo &column_type) override
void appendData(const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, const bool is_last_batch, int8_t *values) override
ParquetStringEncoder(Data_Namespace::AbstractBuffer *buffer, StringDictionary *string_dictionary, std::unique_ptr< ChunkMetadata > &chunk_metadata)
An AbstractBuffer is a unit of data management for a data manager.
std::unique_ptr< ChunkMetadata > & chunk_metadata_
void appendData(const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, const bool is_last_batch, int8_t *values) override
void getOrAddBulk(const std::vector< String > &string_vec, T *encoded_vec)
virtual std::shared_ptr< ChunkMetadata > getRowGroupMetadata(const parquet::RowGroupMetaData *group_metadata, const int parquet_column_index, const SQLTypeInfo &column_type)
void encodeAndCopyContiguous(const int8_t *parquet_data_bytes, int8_t *omnisci_data_bytes, const size_t num_elements) override
void encodeAndCopy(const int8_t *parquet_data_bytes, int8_t *omnisci_data_bytes) override
const size_t omnisci_data_type_byte_size_