OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
StringViewSource.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "BaseConvertEncoder.h"
17 
18 namespace data_conversion {
19 
20 namespace {
21 
22 template <typename DataType>
23 bool is_null(const DataType& typed_value) {
24  if constexpr (std::is_arithmetic<DataType>::value) {
25  auto null = foreign_storage::get_null_value<DataType>();
26  if (typed_value == null) {
27  return true;
28  } else {
29  return false;
30  }
31  }
32 
33  UNREACHABLE();
34 
35  return false;
36 }
37 
38 template <typename T>
39 std::vector<std::string_view> get_materialized_string_views(
40  const size_t num_elements,
41  const StringDictionary* string_dict,
42  const T* ids) {
43  std::vector<std::string_view> materialized_string_views(num_elements);
44  std::transform(ids,
45  ids + num_elements,
46  materialized_string_views.begin(),
47  [&string_dict](const T& id) -> std::string_view {
48  if (is_null(id)) {
49  return std::string_view(nullptr, 0);
50  }
51  return string_dict->getStringView(id);
52  });
53  return materialized_string_views;
54 }
55 
56 std::vector<std::string_view> get_materialized_string_views(
57  const size_t num_elements,
58  const StringDictionary* string_dict,
59  const int8_t* ids,
60  const SQLTypeInfo& type_info) {
61  switch (type_info.get_size()) {
62  case 1:
64  num_elements, string_dict, reinterpret_cast<const uint8_t*>(ids));
65  break;
66  case 2:
68  num_elements, string_dict, reinterpret_cast<const uint16_t*>(ids));
69  break;
70  case 4:
72  num_elements, string_dict, reinterpret_cast<const int32_t*>(ids));
73  break;
74  default:
75  UNREACHABLE();
76  }
77 
78  return {};
79 }
80 } // namespace
81 
82 class BaseSource {
83  public:
84  virtual ~BaseSource() = default;
85 
86  virtual std::pair<const int8_t*, size_t> getSourceData() = 0;
87 };
88 
89 class StringViewSource : public BaseSource {
90  public:
91  StringViewSource(const Chunk_NS::Chunk& input) : input_(input) {}
92 
93  std::pair<const int8_t*, size_t> getSourceData() override {
94  auto buffer = input_.getBuffer();
95  auto src_type_info = input_.getColumnDesc()->columnType;
96  auto dict_key = src_type_info.getStringDictKey();
97  auto num_elements = buffer->getEncoder()->getNumElems();
98 
99  if (src_type_info.is_dict_encoded_string()) {
100  auto catalog = Catalog_Namespace::SysCatalog::instance().getCatalog(dict_key.db_id);
101 
102  auto src_string_dictionary =
103  catalog->getMetadataForDict(dict_key.dict_id, true)->stringDict.get();
104  CHECK(src_string_dictionary);
106  num_elements, src_string_dictionary, buffer->getMemoryPtr(), src_type_info);
107  } else if (src_type_info.is_none_encoded_string()) {
108  auto index_buffer = input_.getIndexBuf();
109  src_string_views_.resize(num_elements);
110  for (size_t i = 0; i < num_elements; ++i) {
112  index_buffer->getMemoryPtr(), buffer->getMemoryPtr(), i);
113  }
114  } else {
115  UNREACHABLE() << "unknown string type";
116  }
117 
118  return {reinterpret_cast<int8_t*>(src_string_views_.data()), num_elements};
119  }
120 
121  private:
122  std::vector<std::string_view> src_string_views_;
123 
125 };
126 
127 } // namespace data_conversion
AbstractBuffer * getIndexBuf() const
Definition: Chunk.h:148
static std::string_view getStringAtIndex(const int8_t *index_data, const int8_t *data, size_t index)
std::vector< std::string_view > get_materialized_string_views(const size_t num_elements, const StringDictionary *string_dict, const T *ids)
HOST DEVICE int get_size() const
Definition: sqltypes.h:403
virtual ~BaseSource()=default
#define UNREACHABLE()
Definition: Logger.h:338
std::string_view getStringView(int32_t string_id) const
const ColumnDescriptor * getColumnDesc() const
Definition: Chunk.h:65
static SysCatalog & instance()
Definition: SysCatalog.h:343
CONSTEXPR DEVICE bool is_null(const T &value)
virtual std::pair< const int8_t *, size_t > getSourceData()=0
const Chunk_NS::Chunk & input_
OUTPUT transform(INPUT const &input, FUNC const &func)
Definition: misc.h:320
std::shared_ptr< Catalog > getCatalog(const std::string &dbName)
StringViewSource(const Chunk_NS::Chunk &input)
AbstractBuffer * getBuffer() const
Definition: Chunk.h:146
#define CHECK(condition)
Definition: Logger.h:291
std::vector< std::string_view > src_string_views_
SQLTypeInfo columnType
std::pair< const int8_t *, size_t > getSourceData() override
const shared::StringDictKey & getStringDictKey() const
Definition: sqltypes.h:1055