OmniSciDB  85c2d10cdc
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
ParquetFixedLengthArrayEncoder.h
Go to the documentation of this file.
1 /*
2  * Copyright 2020 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <stdexcept>
20 
21 #include <parquet/types.h>
23 #include "ParquetArrayEncoder.h"
25 
26 namespace foreign_storage {
27 
29  public:
31  std::shared_ptr<ParquetScalarEncoder> scalar_encoder,
32  const ColumnDescriptor* column_desciptor)
33  : ParquetArrayEncoder(data_buffer, scalar_encoder, column_desciptor)
34  , column_desciptor_(*column_desciptor)
35  , array_element_count_(column_desciptor->columnType.get_size() /
37  CHECK(column_desciptor->columnType.get_size() % omnisci_data_type_byte_size_ == 0);
38  }
39 
40  std::shared_ptr<ChunkMetadata> getRowGroupMetadata(
41  const parquet::RowGroupMetaData* group_metadata,
42  const int parquet_column_index,
43  const SQLTypeInfo& column_type) override {
45  group_metadata, parquet_column_index, column_type);
46  metadata->numBytes =
47  omnisci_data_type_byte_size_ * group_metadata->num_rows() * array_element_count_;
48  return metadata;
49  }
50 
51  protected:
53 
54  private:
56  auto omnisci_data_ptr = resizeArrayDataBytes(array_element_count_);
57  setNullFixedLengthArraySentinel(omnisci_data_ptr);
58  for (size_t i = 1; i < array_element_count_; ++i) {
59  scalar_encoder_->setNull(omnisci_data_ptr + i * omnisci_data_type_byte_size_);
60  }
61  }
62 
63  void setNullFixedLengthArraySentinel(int8_t* omnisci_data_bytes) {
65  if (ti.is_string()) {
66  // TODO: after investigation as to why fixed length arrays with
67  // strings can not represent null arrays, either fix this error
68  // or erase this comment.
70  }
71  const auto type = ti.get_type();
72  switch (type) {
73  case kBOOLEAN:
74  reinterpret_cast<bool*>(omnisci_data_bytes)[0] =
76  break;
77  case kBIGINT:
78  case kNUMERIC:
79  case kDECIMAL:
80  reinterpret_cast<int64_t*>(omnisci_data_bytes)[0] =
82  break;
83  case kINT:
84  reinterpret_cast<int32_t*>(omnisci_data_bytes)[0] =
86  break;
87  case kSMALLINT:
88  reinterpret_cast<int16_t*>(omnisci_data_bytes)[0] =
90  break;
91  case kTINYINT:
92  reinterpret_cast<int8_t*>(omnisci_data_bytes)[0] =
94  break;
95  case kFLOAT:
96  reinterpret_cast<float*>(omnisci_data_bytes)[0] = NULL_ARRAY_FLOAT;
97  break;
98  case kDOUBLE:
99  reinterpret_cast<double*>(omnisci_data_bytes)[0] = NULL_ARRAY_DOUBLE;
100  break;
101  case kTIME:
102  case kTIMESTAMP:
103  case kDATE:
104  reinterpret_cast<int64_t*>(omnisci_data_bytes)[0] =
106  break;
107  case kTEXT:
108  case kVARCHAR:
109  case kCHAR:
110  case kPOINT:
111  case kLINESTRING:
112  case kPOLYGON:
113  case kMULTIPOLYGON:
114  default:
115  UNREACHABLE();
116  }
117  }
118 
120  auto size_of_last_array = sizeOfLastArray();
121  if (!isLastArrayNull()) {
122  if (size_of_last_array != array_element_count_) {
124  size_of_last_array, array_element_count_, column_desciptor_.columnName);
125  }
126  } else {
127  // append a null array sentinel
128  CHECK(size_of_last_array == 0);
130  }
131  }
132 
133  void throwWrongSizeArray(const size_t size_of_last_array,
134  const size_t array_element_count,
135  const std::string& omnisci_column_name) {
136  throw ForeignStorageException("Detected a row with " +
137  std::to_string(size_of_last_array) +
138  " elements being loaded into"
139  " OmniSci column '" +
140  omnisci_column_name +
141  "' which has a fixed length array type,"
142  " expecting " +
143  std::to_string(array_element_count) + " elements.");
144  }
145 
146  void throwNullInDictionaryEncodedColumn(const std::string& omnisci_column_name) {
147  throw ForeignStorageException("Detected a null array being imported into OmniSci '" +
148  omnisci_column_name +
149  "' column which has a fixed length array type of "
150  "dictionary encoded text. Currently "
151  "null arrays for this type of column are not allowed.");
152  }
153 
156 };
157 } // namespace foreign_storage
HOST DEVICE int get_size() const
Definition: sqltypes.h:324
Definition: sqltypes.h:48
#define UNREACHABLE()
Definition: Logger.h:241
std::shared_ptr< ChunkMetadata > getRowGroupMetadata(const parquet::RowGroupMetaData *group_metadata, const int parquet_column_index, const SQLTypeInfo &column_type) override
ParquetFixedLengthArrayEncoder(Data_Namespace::AbstractBuffer *data_buffer, std::shared_ptr< ParquetScalarEncoder > scalar_encoder, const ColumnDescriptor *column_desciptor)
void throwWrongSizeArray(const size_t size_of_last_array, const size_t array_element_count, const std::string &omnisci_column_name)
std::string to_string(char const *&&v)
int8_t * resizeArrayDataBytes(const size_t additional_num_elements)
#define NULL_ARRAY_FLOAT
An AbstractBuffer is a unit of data management for a data manager.
specifies the content in-memory of a row in the column metadata table
Definition: sqltypes.h:51
Definition: sqltypes.h:52
std::shared_ptr< ChunkMetadata > getRowGroupMetadata(const parquet::RowGroupMetaData *group_metadata, const int parquet_column_index, const SQLTypeInfo &column_type) override
Definition: sqltypes.h:40
int64_t inline_fixed_encoding_null_array_val(const SQL_TYPE_INFO &ti)
#define NULL_ARRAY_DOUBLE
#define CHECK(condition)
Definition: Logger.h:197
Definition: sqltypes.h:44
SQLTypeInfo columnType
void throwNullInDictionaryEncodedColumn(const std::string &omnisci_column_name)
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:712
std::string columnName
std::shared_ptr< ParquetScalarEncoder > scalar_encoder_