OmniSciDB  2e3a973ef4
ParquetFixedLengthArrayEncoder.h
Go to the documentation of this file.
1 /*
2  * Copyright 2020 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <stdexcept>
20 
21 #include <parquet/types.h>
22 #include "ParquetArrayEncoder.h"
24 
25 namespace foreign_storage {
26 
28  public:
30  std::shared_ptr<ParquetScalarEncoder> scalar_encoder,
31  const ColumnDescriptor* column_desciptor)
32  : ParquetArrayEncoder(data_buffer, scalar_encoder, column_desciptor)
33  , column_desciptor_(*column_desciptor)
34  , array_element_count_(column_desciptor->columnType.get_size() /
36  CHECK(column_desciptor->columnType.get_size() % omnisci_data_type_byte_size_ == 0);
37  }
38 
39  protected:
41 
42  private:
44  auto omnisci_data_ptr = resizeArrayDataBytes(array_element_count_);
45  setNullFixedLengthArraySentinel(omnisci_data_ptr);
46  for (size_t i = 1; i < array_element_count_; ++i) {
47  scalar_encoder_->setNull(omnisci_data_ptr + i * omnisci_data_type_byte_size_);
48  }
49  }
50 
51  void setNullFixedLengthArraySentinel(int8_t* omnisci_data_bytes) {
53  if (ti.is_string()) {
54  // TODO: after investigation as to why fixed length arrays with
55  // strings can not represent null arrays, either fix this error
56  // or erase this comment.
57  throw std::runtime_error("Detected a null array being imported into OmniSci '" +
59  "' column which has a fixed length array type of "
60  "dictionary encoded text. Currently "
61  "null arrays for this type of column are not allowed.");
62  }
63  const auto type = ti.get_type();
64  switch (type) {
65  case kBOOLEAN:
66  reinterpret_cast<bool*>(omnisci_data_bytes)[0] =
68  break;
69  case kBIGINT:
70  case kNUMERIC:
71  case kDECIMAL:
72  reinterpret_cast<int64_t*>(omnisci_data_bytes)[0] =
74  break;
75  case kINT:
76  reinterpret_cast<int32_t*>(omnisci_data_bytes)[0] =
78  break;
79  case kSMALLINT:
80  reinterpret_cast<int16_t*>(omnisci_data_bytes)[0] =
82  break;
83  case kTINYINT:
84  reinterpret_cast<int8_t*>(omnisci_data_bytes)[0] =
86  break;
87  case kFLOAT:
88  reinterpret_cast<float*>(omnisci_data_bytes)[0] = NULL_ARRAY_FLOAT;
89  break;
90  case kDOUBLE:
91  reinterpret_cast<double*>(omnisci_data_bytes)[0] = NULL_ARRAY_DOUBLE;
92  break;
93  case kTIME:
94  case kTIMESTAMP:
95  case kDATE:
96  reinterpret_cast<int64_t*>(omnisci_data_bytes)[0] =
98  break;
99  case kTEXT:
100  case kVARCHAR:
101  case kCHAR:
102  case kPOINT:
103  case kLINESTRING:
104  case kPOLYGON:
105  case kMULTIPOLYGON:
106  default:
107  UNREACHABLE();
108  }
109  }
110 
112  auto size_of_last_array = sizeOfLastArray();
113  if (!isLastArrayNull()) {
114  if (size_of_last_array != array_element_count_) {
115  throw std::runtime_error("Detected a row with " +
116  std::to_string(size_of_last_array) +
117  " elements being loaded into"
118  " OmniSci column '" +
120  "' which has a fixed length array type,"
121  " expecting " +
122  std::to_string(array_element_count_) + " elements.");
123  }
124  } else {
125  // append a null array sentinel
126  CHECK(size_of_last_array == 0);
128  }
129  }
130 
133 };
134 } // namespace foreign_storage
Definition: sqltypes.h:51
#define NULL_ARRAY_DOUBLE
Definition: sqltypes.h:194
#define UNREACHABLE()
Definition: Logger.h:241
ParquetFixedLengthArrayEncoder(Data_Namespace::AbstractBuffer *data_buffer, std::shared_ptr< ParquetScalarEncoder > scalar_encoder, const ColumnDescriptor *column_desciptor)
HOST DEVICE int get_size() const
Definition: sqltypes.h:269
std::string to_string(char const *&&v)
int8_t * resizeArrayDataBytes(const size_t additional_num_elements)
An AbstractBuffer is a unit of data management for a data manager.
specifies the content in-memory of a row in the column metadata table
Definition: sqltypes.h:54
Definition: sqltypes.h:55
Definition: sqltypes.h:43
int64_t inline_fixed_encoding_null_array_val(const SQL_TYPE_INFO &ti)
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:624
#define CHECK(condition)
Definition: Logger.h:197
Definition: sqltypes.h:47
SQLTypeInfo columnType
std::string columnName
std::shared_ptr< ParquetScalarEncoder > scalar_encoder_
#define NULL_ARRAY_FLOAT
Definition: sqltypes.h:193