OmniSciDB  2e3a973ef4
ParquetTypeMappings.h
Go to the documentation of this file.
1 /*
2  * Copyright 2020 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <Shared/sqltypes.h>
20 #include <parquet/schema.h>
21 #include <parquet/types.h>
22 #include <set>
23 
25 
26 namespace foreign_storage {
27 
33  inline static bool isColumnMappingSupported(
34  const ColumnDescriptor* omnisci_desc,
35  const parquet::ColumnDescriptor* parquet_desc) {
36  auto column_type = omnisci_desc->columnType.is_array()
37  ? omnisci_desc->columnType.get_elem_type()
38  : omnisci_desc->columnType;
39  auto logical_type = parquet_desc->logical_type();
40  if (logical_type->is_none()) { // Fallback on physical type
41  return physical_type_mappings.find(
42  {column_type.get_type(), parquet_desc->physical_type()}) !=
44  }
45  if (validateIntegralMapping(column_type, logical_type)) {
46  return true;
47  }
48  if (validateDecimalMapping(column_type, logical_type)) {
49  return true;
50  }
51  if (validateStringMapping(column_type, logical_type)) {
52  return true;
53  }
54  if (validateDateTimeMapping(column_type, logical_type)) {
55  return true;
56  }
57  return false;
58  }
59 
60  inline static bool isSameTimeUnit(
61  const SQLTypeInfo& type,
62  const parquet::LogicalType::TimeUnit::unit time_unit) {
63  return (type.get_precision() == 3 &&
64  time_unit == parquet::LogicalType::TimeUnit::MILLIS) ||
65  (type.get_precision() == 6 &&
66  time_unit == parquet::LogicalType::TimeUnit::MICROS) ||
67  (type.get_precision() == 9 &&
68  time_unit == parquet::LogicalType::TimeUnit::NANOS);
69  }
70 
71  private:
72  inline const static std::set<std::tuple<SQLTypes, parquet::Type::type>>
73  physical_type_mappings{{kBOOLEAN, parquet::Type::BOOLEAN},
74  {kTINYINT, parquet::Type::INT32},
75  {kTINYINT, parquet::Type::INT64},
76  {kSMALLINT, parquet::Type::INT32},
77  {kSMALLINT, parquet::Type::INT64},
78  {kINT, parquet::Type::INT32},
79  {kINT, parquet::Type::INT64},
80  {kBIGINT, parquet::Type::INT32},
81  {kBIGINT, parquet::Type::INT64},
82  {kFLOAT, parquet::Type::FLOAT},
83  {kFLOAT, parquet::Type::DOUBLE},
84  {kDOUBLE, parquet::Type::FLOAT},
85  {kDOUBLE, parquet::Type::DOUBLE},
86  {kTEXT, parquet::Type::BYTE_ARRAY},
87  {kPOINT, parquet::Type::BYTE_ARRAY},
88  {kLINESTRING, parquet::Type::BYTE_ARRAY},
89  {kPOLYGON, parquet::Type::BYTE_ARRAY},
90  {kMULTIPOLYGON, parquet::Type::BYTE_ARRAY}};
91 
92  inline static bool validateIntegralMapping(
93  const SQLTypeInfo& column_type,
94  const std::shared_ptr<const parquet::LogicalType>& logical_type) {
95  if (logical_type->is_int() && column_type.is_integer()) {
96  auto int_logical_type =
97  dynamic_cast<const parquet::IntLogicalType*>(logical_type.get());
98  auto logical_byte_width = int_logical_type->bit_width() / 8;
99  auto omnisci_byte_width = column_type.get_size();
100  bool is_signed = int_logical_type->is_signed();
101  // If parquet type is unsigned, to represent the same range with signed
102  // integers, at least one additional bit is required
103  if ((is_signed && logical_byte_width <= omnisci_byte_width) ||
104  (!is_signed && logical_byte_width < omnisci_byte_width)) {
105  return true;
106  }
107  }
108  return false;
109  }
110 
111  inline static bool validateDecimalMapping(
112  const SQLTypeInfo& column_type,
113  const std::shared_ptr<const parquet::LogicalType>& logical_type) {
114  if (logical_type->is_decimal() && column_type.is_decimal()) {
115  auto decimal_logical_type =
116  dynamic_cast<const parquet::DecimalLogicalType*>(logical_type.get());
117  if (column_type.get_precision() == decimal_logical_type->precision() &&
118  column_type.get_scale() == decimal_logical_type->scale()) {
119  return true;
120  }
121  }
122  return false;
123  }
124 
125  inline static bool validateStringMapping(
126  const SQLTypeInfo& column_type,
127  const std::shared_ptr<const parquet::LogicalType>& logical_type) {
128  return logical_type->is_string() &&
129  (column_type.is_string() || column_type.is_geometry());
130  }
131 
132  inline static bool validateDateTimeMapping(
133  const SQLTypeInfo& column_type,
134  const std::shared_ptr<const parquet::LogicalType>& logical_type) {
135  if (logical_type->is_timestamp() && column_type.is_timestamp()) {
136  auto timestamp_type =
137  dynamic_cast<const parquet::TimestampLogicalType*>(logical_type.get());
138  CHECK(timestamp_type);
139  return (isSameTimeUnit(column_type, timestamp_type->time_unit()) ||
140  column_type.get_precision() == 0);
141  }
142  return (logical_type->is_time() && column_type.get_type() == kTIME) ||
143  (logical_type->is_date() && column_type.is_date());
144  }
145 };
146 
147 } // namespace foreign_storage
bool is_array() const
Definition: sqltypes.h:425
int get_precision() const
Definition: sqltypes.h:262
bool is_string() const
Definition: sqltypes.h:417
Definition: sqltypes.h:51
bool is_integer() const
Definition: sqltypes.h:419
bool is_date() const
Definition: sqltypes.h:642
HOST DEVICE int get_size() const
Definition: sqltypes.h:269
Constants for Builtin SQL Types supported by OmniSci.
bool is_decimal() const
Definition: sqltypes.h:420
HOST DEVICE int get_scale() const
Definition: sqltypes.h:264
static bool validateDateTimeMapping(const SQLTypeInfo &column_type, const std::shared_ptr< const parquet::LogicalType > &logical_type)
static bool validateIntegralMapping(const SQLTypeInfo &column_type, const std::shared_ptr< const parquet::LogicalType > &logical_type)
specifies the content in-memory of a row in the column metadata table
static const std::set< std::tuple< SQLTypes, parquet::Type::type > > physical_type_mappings
static bool isColumnMappingSupported(const ColumnDescriptor *omnisci_desc, const parquet::ColumnDescriptor *parquet_desc)
Definition: sqltypes.h:54
bool is_geometry() const
Definition: sqltypes.h:429
static bool isSameTimeUnit(const SQLTypeInfo &type, const parquet::LogicalType::TimeUnit::unit time_unit)
static bool validateStringMapping(const SQLTypeInfo &column_type, const std::shared_ptr< const parquet::LogicalType > &logical_type)
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:624
#define CHECK(condition)
Definition: Logger.h:197
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:259
Definition: sqltypes.h:47
SQLTypeInfo columnType
static bool validateDecimalMapping(const SQLTypeInfo &column_type, const std::shared_ptr< const parquet::LogicalType > &logical_type)
bool is_timestamp() const
Definition: sqltypes.h:654