OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ParquetDecimalEncoder.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <arrow/util/decimal.h>
20 #include "ParquetInPlaceEncoder.h"
21 
22 namespace foreign_storage {
23 template <typename V, typename T, typename NullType = V>
24 class ParquetDecimalEncoder : public TypedParquetInPlaceEncoder<V, T, NullType> {
25  public:
27  const ColumnDescriptor* column_desciptor,
28  const parquet::ColumnDescriptor* parquet_column_descriptor)
29  : TypedParquetInPlaceEncoder<V, T, NullType>(buffer,
30  column_desciptor,
31  parquet_column_descriptor)
32  , parquet_column_type_length_(parquet_column_descriptor->type_length())
33  , decimal_overflow_validator_(column_desciptor->columnType) {}
34 
35  void encodeAndCopy(const int8_t* parquet_data_bytes,
36  int8_t* omnisci_data_bytes) override {
37  const auto& parquet_data_value = reinterpret_cast<const T*>(parquet_data_bytes)[0];
38  auto& omnisci_data_value = reinterpret_cast<V*>(omnisci_data_bytes)[0];
39  omnisci_data_value = getDecimal(parquet_data_value);
40  }
41 
42  void validate(const int8_t* parquet_data,
43  const int64_t j,
44  const SQLTypeInfo& column_type) const override {
45  const auto& parquet_data_value = reinterpret_cast<const T*>(parquet_data)[j];
46  int64_t omnisci_data_value = getDecimal(parquet_data_value);
47  decimal_overflow_validator_.validate(omnisci_data_value);
48  }
49 
50  protected:
51  int64_t getDecimal(const int32_t& parquet_data_value) const {
52  return parquet_data_value;
53  }
54 
55  int64_t getDecimal(const int64_t& parquet_data_value) const {
56  return parquet_data_value;
57  }
58 
59  int64_t getDecimal(const parquet::FixedLenByteArray& parquet_data_value) const {
60  return convertDecimalByteArrayToInt(parquet_data_value.ptr,
62  }
63 
64  int64_t getDecimal(const parquet::ByteArray& parquet_data_value) const {
65  return convertDecimalByteArrayToInt(parquet_data_value.ptr, parquet_data_value.len);
66  }
67 
68  bool encodingIsIdentityForSameTypes() const override { return true; }
69 
70  private:
71  int64_t convertDecimalByteArrayToInt(const uint8_t* byte_array,
72  const int byte_array_size) const {
73  auto result = arrow::Decimal128::FromBigEndian(byte_array, byte_array_size);
74  CHECK(result.ok()) << result.status().message();
75  auto& decimal = result.ValueOrDie();
76  return static_cast<int64_t>(decimal);
77  }
78 
81 };
82 } // namespace foreign_storage
int64_t getDecimal(const int64_t &parquet_data_value) const
int64_t convertDecimalByteArrayToInt(const uint8_t *byte_array, const int byte_array_size) const
int64_t getDecimal(const int32_t &parquet_data_value) const
bool encodingIsIdentityForSameTypes() const override
void encodeAndCopy(const int8_t *parquet_data_bytes, int8_t *omnisci_data_bytes) override
An AbstractBuffer is a unit of data management for a data manager.
specifies the content in-memory of a row in the column metadata table
int64_t getDecimal(const parquet::ByteArray &parquet_data_value) const
const DecimalOverflowValidator decimal_overflow_validator_
void validate(const int8_t *parquet_data, const int64_t j, const SQLTypeInfo &column_type) const override
int64_t getDecimal(const parquet::FixedLenByteArray &parquet_data_value) const
#define CHECK(condition)
Definition: Logger.h:291
ParquetDecimalEncoder(Data_Namespace::AbstractBuffer *buffer, const ColumnDescriptor *column_desciptor, const parquet::ColumnDescriptor *parquet_column_descriptor)
void validate(T value) const
Definition: Encoder.h:54