OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
StringViewToArrayEncoder.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include "BaseConvertEncoder.h"
21 #include "ImportExport/Importer.h" // composeNullArray
22 
25 
26 namespace data_conversion {
27 
28 template <typename ScalarEncoderType>
30  : public TypedBaseConvertEncoder<typename ScalarEncoderType::DataType,
31  typename ScalarEncoderType::MetadataType> {
32  public:
33  using DstDataType = typename ScalarEncoderType::DataType;
34 
35  StringViewToArrayEncoder(const Chunk_NS::Chunk& scalar_temp_chunk,
36  const Chunk_NS::Chunk& dst_chunk,
37  const bool error_tracking_enabled)
38  : TypedBaseConvertEncoder<typename ScalarEncoderType::DataType,
39  typename ScalarEncoderType::MetadataType>(
40  error_tracking_enabled)
41  , dst_chunk_(dst_chunk)
42  , scalar_temp_chunk_(scalar_temp_chunk)
43  , scalar_encoder_(scalar_temp_chunk, error_tracking_enabled) {
44  initialize();
45  }
46 
47  void encodeAndAppendData(const int8_t* data, const size_t num_elements) override {
48  auto typed_data = reinterpret_cast<const std::string_view*>(data);
49 
50  const import_export::CopyParams default_copy_params;
51 
52  std::optional<std::vector<bool>> error_occurred = std::nullopt;
53 
55  error_occurred = std::vector<bool>{};
56  error_occurred->reserve(num_elements);
57  }
58 
59  clearLocalState(num_elements);
60 
61  encodeScalarData(num_elements, typed_data, default_copy_params, error_occurred);
62 
63  auto current_data = reinterpret_cast<int8_t*>(
64  scalar_encoder_.getDstChunk().getBuffer()->getMemoryPtr());
65  size_t current_scalar_offset = 0;
66 
67  appendArrayDatums(num_elements, error_occurred, current_data, current_scalar_offset);
68  }
69 
70  void appendArrayDatums(const size_t num_elements,
71  std::optional<std::vector<bool>>& error_occurred,
72  int8_t* current_data,
73  size_t current_scalar_offset) {
74  for (size_t i = 0; i < num_elements; ++i) {
75  auto array_size = array_sizes_[i];
77  BaseConvertEncoder::delete_buffer_->push_back(false);
78  }
79  if (is_null_[i]) {
83  } else {
84  throw std::runtime_error("NULL value not allowed in NOT NULL column");
85  }
86  }
87  array_datums_.push_back(
89  } else {
91  for (size_t j = current_scalar_offset; j < current_scalar_offset + array_size;
92  ++j) {
93  if ((*scalar_encoder_.getDeleteBuffer())[j]) {
94  (*error_occurred)[i] = true;
95  break;
96  }
97  }
98  current_scalar_offset += array_size;
99  }
101  array_size * sizeof(DstDataType) !=
102  static_cast<size_t>(dst_type_info_.get_size())) {
104  array_datums_.push_back(
106  current_data += sizeof(DstDataType) * array_size;
107  BaseConvertEncoder::delete_buffer_->back() = true;
108  continue;
109  } else {
110  throw std::runtime_error(
111  "Incorrect number of elements (" + std::to_string(array_size) +
112  ") in array for fixed length array of size " +
114  }
115  } else {
117  if ((*error_occurred)[i]) {
118  array_datums_.push_back(
120  BaseConvertEncoder::delete_buffer_->back() = true;
121  continue;
122  }
123  }
124  array_datums_.emplace_back(
125  sizeof(DstDataType) * array_size, current_data, false, DoNothingDeleter{});
126  current_data += sizeof(DstDataType) * array_size;
127  }
128  }
129  }
130 
132  auto encoder = dynamic_cast<ArrayNoneEncoder*>(buffer_->getEncoder());
133  CHECK(encoder);
134  encoder->appendData(&array_datums_, 0, num_elements, false);
135  } else if (dst_type_info_.is_fixlen_array()) {
136  auto encoder = dynamic_cast<FixedLengthArrayNoneEncoder*>(buffer_->getEncoder());
137  CHECK(encoder);
138  encoder->appendData(&array_datums_, 0, num_elements, false);
139  } else {
140  UNREACHABLE();
141  }
142  }
143 
144  void clearLocalState(const size_t num_elements) {
145  scalar_encoder_.clear();
146  array_datums_.clear();
147  array_sizes_.clear();
148  is_null_.clear();
149  array_sizes_.reserve(num_elements);
150  array_datums_.reserve(num_elements);
151  is_null_.reserve(num_elements);
152  }
153 
154  void encodeScalarData(const size_t num_elements,
155  const std::string_view* typed_data,
156  const import_export::CopyParams& default_copy_params,
157  std::optional<std::vector<bool>>& error_occurred) {
158  for (size_t i = 0; i < num_elements; ++i) {
160  error_occurred->push_back(false);
161  }
162  if (typed_data[i].empty()) {
163  is_null_.push_back(true);
164  array_sizes_.push_back(0);
166  continue;
167  }
168  is_null_.push_back(false);
169  array_.clear();
170  array_views_.clear();
171  try {
172  std::string s{typed_data[i]};
174  s, default_copy_params, array_, false);
175  array_sizes_.push_back(array_.size());
176  for (const auto& s : array_) {
177  array_views_.emplace_back(s.data(), s.length());
178  }
179  } catch (std::exception& except) {
181  error_occurred->back() = true;
182  array_sizes_.push_back(0);
183  continue;
184  } else {
185  throw except;
186  }
187  }
188  scalar_encoder_.encodeAndAppendData(reinterpret_cast<int8_t*>(array_views_.data()),
189  array_views_.size());
190  }
191 
192  scalar_encoder_.finalize(num_elements);
193  }
194 
195  private:
196  void initialize() {
197  auto type_info = dst_chunk_.getColumnDesc()->columnType;
198  dst_type_info_ = type_info;
200  }
201 
206 
207  ScalarEncoderType scalar_encoder_;
208 
209  std::vector<std::string> array_;
210  std::vector<std::string_view> array_views_;
211  std::vector<ArrayDatum> array_datums_;
212  std::vector<size_t> array_sizes_;
213  std::vector<bool> is_null_;
214 };
215 
216 } // namespace data_conversion
void encodeScalarData(const size_t num_elements, const std::string_view *typed_data, const import_export::CopyParams &default_copy_params, std::optional< std::vector< bool >> &error_occurred)
HOST DEVICE int get_size() const
Definition: sqltypes.h:403
bool is_varlen_array() const
Definition: sqltypes.h:586
static ArrayDatum composeNullArray(const SQLTypeInfo &ti)
Definition: Importer.cpp:395
#define UNREACHABLE()
Definition: Logger.h:338
StringViewToArrayEncoder(const Chunk_NS::Chunk &scalar_temp_chunk, const Chunk_NS::Chunk &dst_chunk, const bool error_tracking_enabled)
std::string to_string(char const *&&v)
const ColumnDescriptor * getColumnDesc() const
Definition: Chunk.h:65
bool is_fixlen_array() const
Definition: sqltypes.h:589
void appendArrayDatums(const size_t num_elements, std::optional< std::vector< bool >> &error_occurred, int8_t *current_data, size_t current_scalar_offset)
void parse_string_array(const std::string &s, const import_export::CopyParams &copy_params, std::vector< std::string > &string_vec, bool truncate_values)
Parses given string array and inserts into given vector of strings.
An AbstractBuffer is a unit of data management for a data manager.
std::optional< std::vector< bool > > delete_buffer_
void encodeAndAppendData(const int8_t *data, const size_t num_elements) override
void clearLocalState(const size_t num_elements)
AbstractBuffer * getBuffer() const
Definition: Chunk.h:146
typename ScalarEncoderType::DataType DstDataType
unencoded fixed length array encoder
#define CHECK(condition)
Definition: Logger.h:291
SQLTypeInfo columnType
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:398
unencoded array encoder