OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
BaseConvertEncoder.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include "Catalog/Catalog.h"
21 #include "Catalog/SysCatalog.h"
22 #include "DataMgr/Chunk/Chunk.h"
25 #include "Shared/types.h"
27 
28 #include <stdexcept>
29 
30 namespace data_conversion {
31 
33  public:
34  virtual ~BaseConvertEncoder(){};
35 
36  BaseConvertEncoder(const bool error_tracking_enabled)
37  : delete_buffer_(std::nullopt)
38  , error_tracking_enabled_(error_tracking_enabled)
39  , has_nulls_(false)
40  , num_elements_(0) {}
41 
42  void initializeDeleteBuffer(const size_t size_hint) {
43  if (!delete_buffer_.has_value()) {
44  delete_buffer_ = std::vector<bool>{};
45  }
46  delete_buffer_->clear();
47  delete_buffer_->reserve(size_hint);
48  }
49 
50  virtual void finalize(const size_t rows_appended) { num_elements_ = rows_appended; }
51 
52  virtual void encodeAndAppendData(const int8_t* data, const size_t num_elements) = 0;
53 
54  virtual std::shared_ptr<ChunkMetadata> getMetadata(const Chunk_NS::Chunk& chunk) const {
55  auto chunk_metadata = std::make_shared<ChunkMetadata>();
56  auto dst_type_info = chunk.getColumnDesc()->columnType;
57  chunk_metadata->sqlType = dst_type_info;
58  chunk_metadata->chunkStats.has_nulls = has_nulls_;
59  chunk_metadata->numElements = num_elements_;
60  chunk_metadata->numBytes = chunk.getBuffer()->size();
61  return chunk_metadata;
62  }
63 
64  protected:
65  virtual void clear() {
66  num_elements_ = 0;
68  delete_buffer_->clear();
69  }
70  }
71 
72  template <typename DataType>
73  bool isNull(const DataType& typed_value) {
74  if constexpr (std::is_arithmetic<DataType>::value) {
75  auto null = foreign_storage::get_null_value<DataType>();
76  if (typed_value == null) {
77  return true;
78  }
79  } else if constexpr (std::is_same<DataType, std::string>::value ||
80  std::is_same<DataType, std::string_view>::value) {
81  if (typed_value.empty()) {
82  return true;
83  }
84  }
85  return false;
86  }
87 
88  std::optional<std::vector<bool>> delete_buffer_;
90 
91  bool has_nulls_;
92  size_t num_elements_;
93 };
94 
95 template <typename DataType_, typename MetadataType_ = DataType_>
97  public:
98  using DataType = DataType_;
99  using MetadataType = MetadataType_;
100 
101  TypedBaseConvertEncoder(const bool error_tracking_enabled)
102  : BaseConvertEncoder(error_tracking_enabled) {
103  min_ = std::numeric_limits<MetadataType>::max();
104  max_ = std::numeric_limits<MetadataType>::lowest();
105  }
106 
107  std::shared_ptr<ChunkMetadata> getMetadata(
108  const Chunk_NS::Chunk& chunk) const override {
109  auto metadata = BaseConvertEncoder::getMetadata(chunk);
110  metadata->fillChunkStats(min_, max_, has_nulls_);
111  return metadata;
112  }
113 
114  protected:
115  DataType getNull() const {
116  if constexpr (std::is_arithmetic<DataType>::value) {
117  auto null = foreign_storage::get_null_value<DataType>();
118  return null;
119  } else if constexpr (std::is_same<DataType, std::string>::value ||
120  std::is_same<DataType, std::string_view>::value) {
121  return std::string{}; // empty_string
122  } else {
123  return nullptr;
124  }
125  }
126 
127  void updateMetadataStats(const DataType& typed_value,
128  const bool is_date_in_days = false) {
129  if (is_date_in_days) {
130  const MetadataType to_compare =
132  min_ = std::min<MetadataType>(min_, to_compare);
133  max_ = std::max<MetadataType>(max_, to_compare);
134  } else {
135  min_ = std::min<MetadataType>(min_, typed_value);
136  max_ = std::max<MetadataType>(max_, typed_value);
137  }
138  }
139 
141 };
142 
143 } // namespace data_conversion
void initializeDeleteBuffer(const size_t size_hint)
void updateMetadataStats(const DataType &typed_value, const bool is_date_in_days=false)
virtual void encodeAndAppendData(const int8_t *data, const size_t num_elements)=0
BaseConvertEncoder(const bool error_tracking_enabled)
virtual std::shared_ptr< ChunkMetadata > getMetadata(const Chunk_NS::Chunk &chunk) const
This file contains the class specification and related data structures for Catalog.
const ColumnDescriptor * getColumnDesc() const
Definition: Chunk.h:65
int64_t get_epoch_seconds_from_days(const int64_t days)
This file contains the class specification and related data structures for SysCatalog.
bool isNull(const DataType &typed_value)
std::optional< std::vector< bool > > delete_buffer_
AbstractBuffer * getBuffer() const
Definition: Chunk.h:146
TypedBaseConvertEncoder(const bool error_tracking_enabled)
std::shared_ptr< ChunkMetadata > getMetadata(const Chunk_NS::Chunk &chunk) const override
bool g_enable_watchdog false
Definition: Execute.cpp:80
For unencoded strings.
SQLTypeInfo columnType
virtual void finalize(const size_t rows_appended)