OmniSciDB  91042dcc5b
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ParquetFixedLengthArrayEncoder.h
Go to the documentation of this file.
1 /*
2  * Copyright 2020 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <stdexcept>
20 
21 #include <parquet/types.h>
23 #include "ParquetArrayEncoder.h"
25 
26 namespace foreign_storage {
27 
29  public:
31  std::shared_ptr<ParquetScalarEncoder> scalar_encoder,
32  const ColumnDescriptor* column_desciptor)
33  : ParquetArrayEncoder(data_buffer, scalar_encoder, column_desciptor)
34  , column_desciptor_(*column_desciptor)
35  , array_element_count_(column_desciptor->columnType.get_size() /
37  CHECK(column_desciptor->columnType.get_size() % omnisci_data_type_byte_size_ == 0);
38  }
39 
40  std::shared_ptr<ChunkMetadata> getRowGroupMetadata(
41  const parquet::RowGroupMetaData* group_metadata,
42  const int parquet_column_index,
43  const SQLTypeInfo& column_type) override {
45  group_metadata, parquet_column_index, column_type);
46  metadata->numBytes =
47  omnisci_data_type_byte_size_ * group_metadata->num_rows() * array_element_count_;
48  return metadata;
49  }
50 
51  protected:
53 
54  private:
56  auto omnisci_data_ptr = resizeArrayDataBytes(array_element_count_);
57  setNullFixedLengthArraySentinel(omnisci_data_ptr);
58  for (size_t i = 1; i < array_element_count_; ++i) {
59  scalar_encoder_->setNull(omnisci_data_ptr + i * omnisci_data_type_byte_size_);
60  }
61  }
62 
63  void setNullFixedLengthArraySentinel(int8_t* omnisci_data_bytes) {
65  SQLTypes type;
66  if (ti.is_dict_encoded_string()) {
67  type = string_dict_to_int_type(ti);
68  } else {
69  type = ti.get_type();
70  }
71  switch (type) {
72  case kBOOLEAN:
73  reinterpret_cast<bool*>(omnisci_data_bytes)[0] =
75  break;
76  case kBIGINT:
77  case kNUMERIC:
78  case kDECIMAL:
79  reinterpret_cast<int64_t*>(omnisci_data_bytes)[0] =
81  break;
82  case kINT:
83  reinterpret_cast<int32_t*>(omnisci_data_bytes)[0] =
85  break;
86  case kSMALLINT:
87  reinterpret_cast<int16_t*>(omnisci_data_bytes)[0] =
89  break;
90  case kTINYINT:
91  reinterpret_cast<int8_t*>(omnisci_data_bytes)[0] =
93  break;
94  case kFLOAT:
95  reinterpret_cast<float*>(omnisci_data_bytes)[0] = NULL_ARRAY_FLOAT;
96  break;
97  case kDOUBLE:
98  reinterpret_cast<double*>(omnisci_data_bytes)[0] = NULL_ARRAY_DOUBLE;
99  break;
100  case kTIME:
101  case kTIMESTAMP:
102  case kDATE:
103  reinterpret_cast<int64_t*>(omnisci_data_bytes)[0] =
105  break;
106  case kTEXT:
107  case kVARCHAR:
108  case kCHAR:
109  case kPOINT:
110  case kLINESTRING:
111  case kPOLYGON:
112  case kMULTIPOLYGON:
113  default:
114  UNREACHABLE();
115  }
116  }
117 
119  auto size_of_last_array = sizeOfLastArray();
120  if (isLastArrayNull()) {
121  // append a null array sentinel
122  CHECK(size_of_last_array == 0);
124  } else if (isLastArrayEmpty()) {
126  } else {
127  if (size_of_last_array != array_element_count_) {
129  size_of_last_array, array_element_count_, column_desciptor_.columnName);
130  }
131  }
132  }
133 
134  void throwEmptyArrayException(const size_t array_element_count,
135  const std::string& omnisci_column_name) {
137  "Detected an empty array"
138  " being loaded into"
139  " OmniSci column '" +
140  omnisci_column_name +
141  "' which has a fixed length array type,"
142  " expecting " +
143  std::to_string(array_element_count) + " elements.");
144  }
145 
146  void throwWrongSizeArray(const size_t size_of_last_array,
147  const size_t array_element_count,
148  const std::string& omnisci_column_name) {
149  throw ForeignStorageException("Detected a row with " +
150  std::to_string(size_of_last_array) +
151  " elements being loaded into"
152  " OmniSci column '" +
153  omnisci_column_name +
154  "' which has a fixed length array type,"
155  " expecting " +
156  std::to_string(array_element_count) + " elements.");
157  }
158 
161 };
162 } // namespace foreign_storage
HOST DEVICE int get_size() const
Definition: sqltypes.h:339
Definition: sqltypes.h:49
SQLTypes
Definition: sqltypes.h:38
#define UNREACHABLE()
Definition: Logger.h:255
std::shared_ptr< ChunkMetadata > getRowGroupMetadata(const parquet::RowGroupMetaData *group_metadata, const int parquet_column_index, const SQLTypeInfo &column_type) override
ParquetFixedLengthArrayEncoder(Data_Namespace::AbstractBuffer *data_buffer, std::shared_ptr< ParquetScalarEncoder > scalar_encoder, const ColumnDescriptor *column_desciptor)
void throwWrongSizeArray(const size_t size_of_last_array, const size_t array_element_count, const std::string &omnisci_column_name)
std::string to_string(char const *&&v)
void throwEmptyArrayException(const size_t array_element_count, const std::string &omnisci_column_name)
int8_t * resizeArrayDataBytes(const size_t additional_num_elements)
#define NULL_ARRAY_FLOAT
An AbstractBuffer is a unit of data management for a data manager.
specifies the content in-memory of a row in the column metadata table
Definition: sqltypes.h:52
Definition: sqltypes.h:53
std::shared_ptr< ChunkMetadata > getRowGroupMetadata(const parquet::RowGroupMetaData *group_metadata, const int parquet_column_index, const SQLTypeInfo &column_type) override
Definition: sqltypes.h:41
int64_t inline_fixed_encoding_null_array_val(const SQL_TYPE_INFO &ti)
#define NULL_ARRAY_DOUBLE
#define CHECK(condition)
Definition: Logger.h:211
Definition: sqltypes.h:45
SQLTypeInfo columnType
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:861
std::string columnName
std::shared_ptr< ParquetScalarEncoder > scalar_encoder_
SQLTypes string_dict_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:509