OmniSciDB  c0231cc57d
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ParquetFixedLengthArrayEncoder.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <stdexcept>
20 
21 #include <parquet/types.h>
23 #include "ParquetArrayEncoder.h"
25 
26 namespace foreign_storage {
27 
29  public:
31  std::shared_ptr<ParquetScalarEncoder> scalar_encoder,
32  const ColumnDescriptor* column_desciptor)
33  : ParquetArrayEncoder(data_buffer, scalar_encoder, column_desciptor)
34  , column_desciptor_(*column_desciptor)
35  , array_element_count_(column_desciptor->columnType.get_size() /
37  CHECK(column_desciptor->columnType.get_size() % omnisci_data_type_byte_size_ == 0);
38  }
39 
40  std::shared_ptr<ChunkMetadata> getRowGroupMetadata(
41  const parquet::RowGroupMetaData* group_metadata,
42  const int parquet_column_index,
43  const SQLTypeInfo& column_type) override {
45  group_metadata, parquet_column_index, column_type);
46  metadata->numBytes =
47  omnisci_data_type_byte_size_ * group_metadata->num_rows() * array_element_count_;
48  return metadata;
49  }
50 
51  protected:
52  void processLastArray() override {
55  }
56 
57  private:
59  auto omnisci_data_ptr = resizeArrayDataBytes(array_element_count_);
60  setNullFixedLengthArraySentinel(omnisci_data_ptr);
61  for (size_t i = 1; i < array_element_count_; ++i) {
62  scalar_encoder_->setNull(omnisci_data_ptr + i * omnisci_data_type_byte_size_);
63  }
64  }
65 
66  void setNullFixedLengthArraySentinel(int8_t* omnisci_data_bytes) {
68  SQLTypes type;
69  if (ti.is_dict_encoded_string()) {
70  type = string_dict_to_int_type(ti);
71  } else {
72  type = ti.get_type();
73  }
74  switch (type) {
75  case kBOOLEAN:
76  reinterpret_cast<bool*>(omnisci_data_bytes)[0] =
78  break;
79  case kBIGINT:
80  case kNUMERIC:
81  case kDECIMAL:
82  reinterpret_cast<int64_t*>(omnisci_data_bytes)[0] =
84  break;
85  case kINT:
86  reinterpret_cast<int32_t*>(omnisci_data_bytes)[0] =
88  break;
89  case kSMALLINT:
90  reinterpret_cast<int16_t*>(omnisci_data_bytes)[0] =
92  break;
93  case kTINYINT:
94  reinterpret_cast<int8_t*>(omnisci_data_bytes)[0] =
96  break;
97  case kFLOAT:
98  reinterpret_cast<float*>(omnisci_data_bytes)[0] = NULL_ARRAY_FLOAT;
99  break;
100  case kDOUBLE:
101  reinterpret_cast<double*>(omnisci_data_bytes)[0] = NULL_ARRAY_DOUBLE;
102  break;
103  case kTIME:
104  case kTIMESTAMP:
105  case kDATE:
106  reinterpret_cast<int64_t*>(omnisci_data_bytes)[0] =
108  break;
109  case kTEXT:
110  case kVARCHAR:
111  case kCHAR:
112  case kPOINT:
113  case kLINESTRING:
114  case kPOLYGON:
115  case kMULTIPOLYGON:
116  default:
117  UNREACHABLE();
118  }
119  }
120 
122  auto size_of_last_array = sizeOfLastArray();
123  if (isLastArrayNull()) {
124  // append a null array sentinel
125  CHECK(size_of_last_array == 0);
127  } else if (isLastArrayEmpty()) {
129  } else {
130  if (size_of_last_array != array_element_count_) {
132  size_of_last_array, array_element_count_, column_desciptor_.columnName);
133  }
134  }
135  }
136 
137  void throwEmptyArrayException(const size_t array_element_count,
138  const std::string& omnisci_column_name) {
140  "Detected an empty array"
141  " being loaded into"
142  " HeavyDB column '" +
143  omnisci_column_name +
144  "' which has a fixed length array type,"
145  " expecting " +
146  std::to_string(array_element_count) + " elements.");
147  }
148 
149  void throwWrongSizeArray(const size_t size_of_last_array,
150  const size_t array_element_count,
151  const std::string& omnisci_column_name) {
152  throw ForeignStorageException("Detected a row with " +
153  std::to_string(size_of_last_array) +
154  " elements being loaded into"
155  " HeavyDB column '" +
156  omnisci_column_name +
157  "' which has a fixed length array type,"
158  " expecting " +
159  std::to_string(array_element_count) + " elements.");
160  }
161 
164 };
165 } // namespace foreign_storage
HOST DEVICE int get_size() const
Definition: sqltypes.h:414
Definition: sqltypes.h:63
SQLTypes
Definition: sqltypes.h:52
#define UNREACHABLE()
Definition: Logger.h:266
std::shared_ptr< ChunkMetadata > getRowGroupMetadata(const parquet::RowGroupMetaData *group_metadata, const int parquet_column_index, const SQLTypeInfo &column_type) override
ParquetFixedLengthArrayEncoder(Data_Namespace::AbstractBuffer *data_buffer, std::shared_ptr< ParquetScalarEncoder > scalar_encoder, const ColumnDescriptor *column_desciptor)
void throwWrongSizeArray(const size_t size_of_last_array, const size_t array_element_count, const std::string &omnisci_column_name)
std::string to_string(char const *&&v)
void throwEmptyArrayException(const size_t array_element_count, const std::string &omnisci_column_name)
int8_t * resizeArrayDataBytes(const size_t additional_num_elements)
#define NULL_ARRAY_FLOAT
An AbstractBuffer is a unit of data management for a data manager.
specifies the content in-memory of a row in the column metadata table
Definition: sqltypes.h:66
Definition: sqltypes.h:67
std::shared_ptr< ChunkMetadata > getRowGroupMetadata(const parquet::RowGroupMetaData *group_metadata, const int parquet_column_index, const SQLTypeInfo &column_type) override
Definition: sqltypes.h:55
int64_t inline_fixed_encoding_null_array_val(const SQL_TYPE_INFO &ti)
#define NULL_ARRAY_DOUBLE
#define CHECK(condition)
Definition: Logger.h:222
Definition: sqltypes.h:59
SQLTypeInfo columnType
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:981
std::string columnName
std::shared_ptr< ParquetScalarEncoder > scalar_encoder_
SQLTypes string_dict_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:503