OmniSciDB  3a86f6ec37
ParquetTimestampEncoder.h
Go to the documentation of this file.
1 /*
2  * Copyright 2020 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include "ParquetInPlaceEncoder.h"
20 
21 namespace foreign_storage {
22 
23 // The following semantics apply to the templated types below.
24 //
25 // V - type of omnisci data
26 // T - physical type of parquet data
27 // conversion_denominator - the denominator constant used in converting parquet to omnisci
28 // data
29 //
30 // The `conversion_denominator` template is used instead of a class member to
31 // specify it at compile-time versus run-time. In testing this has a major
32 // impact on the runtime of the conversion performed by this encoder since the
33 // compiler can significantly optimize if this is known at compile time.
34 template <typename V, typename T, T conversion_denominator>
37  public:
39  const ColumnDescriptor* column_desciptor,
40  const parquet::ColumnDescriptor* parquet_column_descriptor)
41  : TypedParquetInPlaceEncoder<V, T>(buffer,
42  column_desciptor,
43  parquet_column_descriptor) {
44  CHECK(parquet_column_descriptor->logical_type()->is_timestamp());
45  }
46 
47  void encodeAndCopy(const int8_t* parquet_data_bytes,
48  int8_t* omnisci_data_bytes) override {
49  const auto& parquet_data_value = reinterpret_cast<const T*>(parquet_data_bytes)[0];
50  auto& omnisci_data_value = reinterpret_cast<V*>(omnisci_data_bytes)[0];
51  omnisci_data_value = convert(parquet_data_value);
52  }
53 
54  void validate(std::shared_ptr<parquet::Statistics> stats,
55  const SQLTypeInfo& column_type) const override {
56  CHECK(column_type.is_timestamp() || column_type.is_date());
57  auto [unencoded_stats_min, unencoded_stats_max] =
59  if (column_type.is_timestamp()) {
61  unencoded_stats_max, convert(unencoded_stats_max), column_type);
63  unencoded_stats_min, convert(unencoded_stats_min), column_type);
64  } else if (column_type.is_date()) {
66  column_type);
68  column_type);
69  }
70  }
71 
72  private:
73  T convert(const T& value) const {
74  T quotient = value / conversion_denominator;
75  return value < 0 && (value % conversion_denominator != 0) ? quotient - 1 : quotient;
76  }
77 };
78 
79 } // namespace foreign_storage
void validate(std::shared_ptr< parquet::Statistics > stats, const SQLTypeInfo &column_type) const override
ParquetTimestampEncoder(Data_Namespace::AbstractBuffer *buffer, const ColumnDescriptor *column_desciptor, const parquet::ColumnDescriptor *parquet_column_descriptor)
bool is_date() const
Definition: sqltypes.h:715
void encodeAndCopy(const int8_t *parquet_data_bytes, int8_t *omnisci_data_bytes) override
An AbstractBuffer is a unit of data management for a data manager.
specifies the content in-memory of a row in the column metadata table
static void validateValue(const D &data_value, const SQLTypeInfo &column_type)
static void validateValue(const D &data_value, const D &display_data_value, const SQLTypeInfo &column_type)
#define CHECK(condition)
Definition: Logger.h:197
std::pair< T, T > getUnencodedStats(std::shared_ptr< parquet::Statistics > stats) const
bool is_timestamp() const
Definition: sqltypes.h:727