OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
StringViewToStringDictEncoder.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include "BaseConvertEncoder.h"
21 
22 namespace data_conversion {
23 
24 template <typename IdType>
26  public:
28  const bool error_tracking_enabled)
29  : TypedBaseConvertEncoder<IdType>(error_tracking_enabled), dst_chunk_(dst_chunk) {
30  initialize();
31  }
32 
33  void encodeAndAppendData(const int8_t* data, const size_t num_elements) override {
34  auto typed_data = reinterpret_cast<const std::string_view*>(data);
35 
37  << " unimplemented case for this encoder";
38 
39  std::vector<std::string_view> input_buffer(typed_data, typed_data + num_elements);
41  dict_encoding_output_buffer_.resize(num_elements);
42  for (size_t i = 0; i < num_elements; ++i) {
43  if (input_buffer[i].size() > StringDictionary::MAX_STRLEN) {
45  throw std::runtime_error("String length of " +
46  std::to_string(input_buffer[i].size()) +
47  " exceeds allowed maximum string length of " +
49  } else {
50  BaseConvertEncoder::delete_buffer_->push_back(true);
51  input_buffer[i] = {}; // set to NULL/empty string to process
52  }
53  } else if (BaseConvertEncoder::isNull(input_buffer[i])) {
56  throw std::runtime_error("NULL value not allowed in NOT NULL column");
57  }
59  } else {
61  BaseConvertEncoder::delete_buffer_->push_back(true);
62  } else {
63  BaseConvertEncoder::delete_buffer_->push_back(false);
64  }
65  }
66  } else {
68  BaseConvertEncoder::delete_buffer_->push_back(false);
69  }
70  }
71  }
72  string_dict_->getOrAddBulk<IdType, std::string_view>(
73  input_buffer, dict_encoding_output_buffer_.data());
74  for (size_t i = 0; i < num_elements; ++i) {
75  if (!BaseConvertEncoder::isNull(input_buffer[i])) {
78  }
79  }
80 
81  buffer_->append(reinterpret_cast<int8_t*>(dict_encoding_output_buffer_.data()),
82  num_elements * sizeof(IdType));
83  }
84 
85  void clear() override {
88  }
89 
90  const Chunk_NS::Chunk& getDstChunk() const { return dst_chunk_; }
91 
92  std::optional<std::vector<bool>>& getDeleteBuffer() {
94  }
95 
96  private:
97  void initialize() {
98  auto type_info = dst_chunk_.getColumnDesc()->columnType;
100  type_info.getStringDictKey().db_id);
101  string_dict_ = catalog->getMetadataForDict(type_info.getStringDictKey().dict_id, true)
102  ->stringDict.get();
103  dst_type_info_ = type_info;
105  }
106 
110 
111  std::vector<IdType> dict_encoding_output_buffer_;
112 
114 };
115 
116 } // namespace data_conversion
void updateMetadataStats(const DataType &typed_value, const bool is_date_in_days=false)
std::optional< std::vector< bool > > & getDeleteBuffer()
std::string to_string(char const *&&v)
const ColumnDescriptor * getColumnDesc() const
Definition: Chunk.h:65
static SysCatalog & instance()
Definition: SysCatalog.h:343
StringViewToStringDictEncoder(const Chunk_NS::Chunk &dst_chunk, const bool error_tracking_enabled)
An AbstractBuffer is a unit of data management for a data manager.
bool isNull(const DataType &typed_value)
std::optional< std::vector< bool > > delete_buffer_
void getOrAddBulk(const std::vector< String > &string_vec, T *encoded_vec)
void encodeAndAppendData(const int8_t *data, const size_t num_elements) override
std::shared_ptr< Catalog > getCatalog(const std::string &dbName)
AbstractBuffer * getBuffer() const
Definition: Chunk.h:146
virtual void append(int8_t *src, const size_t num_bytes, const MemoryLevel src_buffer_type=CPU_LEVEL, const int device_id=-1)=0
#define CHECK(condition)
Definition: Logger.h:291
static constexpr size_t MAX_STRLEN
SQLTypeInfo columnType
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:398