OmniSciDB  16c4e035a1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
DateDaysEncoder.h
Go to the documentation of this file.
1 /*
2  * Copyright 2019 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef DATE_DAYS_ENCODER_H
18 #define DATE_DAYS_ENCODER_H
19 
20 #include "Logger/Logger.h"
21 
22 #include <iostream>
23 #include <memory>
24 #include "AbstractBuffer.h"
25 #include "Encoder.h"
26 
27 #include <Shared/DatumFetchers.h>
28 
29 template <typename T, typename V>
30 class DateDaysEncoder : public Encoder {
31  public:
34  }
35 
36  size_t getNumElemsForBytesEncodedData(const int8_t* index_data,
37  const int start_idx,
38  const size_t num_elements,
39  const size_t byte_limit) override {
40  UNREACHABLE() << "getNumElemsForBytesEncodedData unexpectedly called for non varlen"
41  " encoder";
42  return {};
43  }
44 
45  std::shared_ptr<ChunkMetadata> appendEncodedDataAtIndices(
46  const int8_t*,
47  int8_t* data,
48  const std::vector<size_t>& selected_idx) override {
49  std::vector<V> data_subset;
50  data_subset.reserve(selected_idx.size());
51  auto encoded_data = reinterpret_cast<V*>(data);
52  for (const auto& index : selected_idx) {
53  data_subset.emplace_back(encoded_data[index]);
54  }
55  auto append_data = reinterpret_cast<int8_t*>(data_subset.data());
57  append_data, selected_idx.size(), SQLTypeInfo{}, false, -1, true);
58  }
59 
60  std::shared_ptr<ChunkMetadata> appendEncodedData(const int8_t*,
61  int8_t* data,
62  const size_t start_idx,
63  const size_t num_elements) override {
64  auto current_data = data + sizeof(V) * start_idx;
66  current_data, num_elements, SQLTypeInfo{}, false, -1, true);
67  }
68 
69  std::shared_ptr<ChunkMetadata> appendData(int8_t*& src_data,
70  const size_t num_elems_to_append,
71  const SQLTypeInfo& ti,
72  const bool replicating = false,
73  const int64_t offset = -1) override {
75  src_data, num_elems_to_append, ti, replicating, offset, false);
76  }
77 
78  void getMetadata(const std::shared_ptr<ChunkMetadata>& chunkMetadata) override {
79  Encoder::getMetadata(chunkMetadata);
80  chunkMetadata->fillChunkStats(dataMin, dataMax, has_nulls);
81  }
82 
83  // Only called from the executor for synthesized meta-information.
84  std::shared_ptr<ChunkMetadata> getMetadata(const SQLTypeInfo& ti) override {
85  auto chunk_metadata = std::make_shared<ChunkMetadata>(ti, 0, 0, ChunkStats{});
86  chunk_metadata->fillChunkStats(dataMin, dataMax, has_nulls);
87  return chunk_metadata;
88  }
89 
90  // Only called from the executor for synthesized meta-information.
91  void updateStats(const int64_t val, const bool is_null) override {
92  if (is_null) {
93  has_nulls = true;
94  } else {
95  const auto data = static_cast<T>(val);
96  dataMin = std::min(dataMin, data);
97  dataMax = std::max(dataMax, data);
98  }
99  }
100 
101  // Only called from the executor for synthesized meta-information.
102  void updateStats(const double val, const bool is_null) override {
103  if (is_null) {
104  has_nulls = true;
105  } else {
106  const auto data = static_cast<T>(val);
107  dataMin = std::min(dataMin, data);
108  dataMax = std::max(dataMax, data);
109  }
110  }
111 
112  void updateStats(const int8_t* const src_data, const size_t num_elements) override {
113  const T* unencoded_data = reinterpret_cast<const T*>(src_data);
114  for (size_t i = 0; i < num_elements; ++i) {
115  encodeDataAndUpdateStats(unencoded_data[i]);
116  }
117  }
118 
119  void updateStats(const std::vector<std::string>* const src_data,
120  const size_t start_idx,
121  const size_t num_elements) override {
122  UNREACHABLE();
123  }
124 
125  void updateStats(const std::vector<ArrayDatum>* const src_data,
126  const size_t start_idx,
127  const size_t num_elements) override {
128  UNREACHABLE();
129  }
130 
131  // Only called from the executor for synthesized meta-information.
132  void reduceStats(const Encoder& that) override {
133  const auto that_typed = static_cast<const DateDaysEncoder<T, V>&>(that);
134  if (that_typed.has_nulls) {
135  has_nulls = true;
136  }
137  dataMin = std::min(dataMin, that_typed.dataMin);
138  dataMax = std::max(dataMax, that_typed.dataMax);
139  }
140 
141  void copyMetadata(const Encoder* copyFromEncoder) override {
142  num_elems_ = copyFromEncoder->getNumElems();
143  auto castedEncoder = reinterpret_cast<const DateDaysEncoder<T, V>*>(copyFromEncoder);
144  dataMin = castedEncoder->dataMin;
145  dataMax = castedEncoder->dataMax;
146  has_nulls = castedEncoder->has_nulls;
147  }
148 
149  void writeMetadata(FILE* f) override {
150  // assumes pointer is already in right place
151  fwrite((int8_t*)&num_elems_, sizeof(size_t), 1, f);
152  fwrite((int8_t*)&dataMin, sizeof(T), 1, f);
153  fwrite((int8_t*)&dataMax, sizeof(T), 1, f);
154  fwrite((int8_t*)&has_nulls, sizeof(bool), 1, f);
155  }
156 
157  void readMetadata(FILE* f) override {
158  // assumes pointer is already in right place
159  fread((int8_t*)&num_elems_, sizeof(size_t), 1, f);
160  fread((int8_t*)&dataMin, 1, sizeof(T), f);
161  fread((int8_t*)&dataMax, 1, sizeof(T), f);
162  fread((int8_t*)&has_nulls, 1, sizeof(bool), f);
163  }
164 
165  bool resetChunkStats(const ChunkStats& stats) override {
166  const auto new_min = DatumFetcher::getDatumVal<T>(stats.min);
167  const auto new_max = DatumFetcher::getDatumVal<T>(stats.max);
168 
169  if (dataMin == new_min && dataMax == new_max && has_nulls == stats.has_nulls) {
170  return false;
171  }
172 
173  dataMin = new_min;
174  dataMax = new_max;
175  has_nulls = stats.has_nulls;
176  return true;
177  }
178 
179  void resetChunkStats() override {
180  dataMin = std::numeric_limits<T>::max();
181  dataMax = std::numeric_limits<T>::lowest();
182  has_nulls = false;
183  }
184 
187  bool has_nulls;
188 
189  private:
190  std::shared_ptr<ChunkMetadata> appendEncodedOrUnencodedData(
191  int8_t*& src_data,
192  const size_t num_elems_to_append,
193  const SQLTypeInfo& ti,
194  const bool replicating,
195  const int64_t offset,
196  const bool is_encoded) {
197  if (offset == 0 && num_elems_to_append >= num_elems_) {
198  resetChunkStats();
199  }
200 
201  CHECK(!is_encoded || !replicating); // do not support replicating of encoded data
202 
203  T* unencoded_data = reinterpret_cast<T*>(src_data);
204  std::vector<V> encoded_data;
205  V* data_to_write = nullptr;
206  if (!is_encoded) {
207  encoded_data.resize(num_elems_to_append);
208  data_to_write = encoded_data.data();
209  for (size_t i = 0; i < num_elems_to_append; ++i) {
210  size_t ri = replicating ? 0 : i;
211  encoded_data[i] = encodeDataAndUpdateStats(unencoded_data[ri]);
212  }
213  } else {
214  data_to_write = reinterpret_cast<V*>(src_data);
215  for (size_t i = 0; i < num_elems_to_append; ++i) {
216  updateStatsWithAlreadyEncoded(data_to_write[i]);
217  }
218  }
219 
220  if (offset == -1) {
221  num_elems_ += num_elems_to_append;
222  buffer_->append(reinterpret_cast<int8_t*>(data_to_write),
223  num_elems_to_append * sizeof(V));
224  if (!replicating) {
225  src_data += num_elems_to_append * sizeof(T);
226  }
227  } else {
228  num_elems_ = offset + num_elems_to_append;
229  CHECK(!replicating);
230  CHECK_GE(offset, 0);
231  buffer_->write(reinterpret_cast<int8_t*>(data_to_write),
232  num_elems_to_append * sizeof(V),
233  static_cast<size_t>(offset));
234  }
235 
236  auto chunk_metadata = std::make_shared<ChunkMetadata>();
237  getMetadata(chunk_metadata);
238  return chunk_metadata;
239  }
240 
241  void updateStatsWithAlreadyEncoded(const V& encoded_data) {
242  if (encoded_data == std::numeric_limits<V>::min()) {
243  has_nulls = true;
244  } else {
245  const T data = DateConverters::get_epoch_seconds_from_days(encoded_data);
246  dataMax = std::max(dataMax, data);
247  dataMin = std::min(dataMin, data);
248  }
249  }
250 
251  V encodeDataAndUpdateStats(const T& unencoded_data) {
252  V encoded_data;
253  if (unencoded_data == std::numeric_limits<V>::min()) {
254  has_nulls = true;
255  encoded_data = static_cast<V>(unencoded_data);
256  } else {
257  date_days_overflow_validator_.validate(unencoded_data);
258  encoded_data = DateConverters::get_epoch_days_from_seconds(unencoded_data);
259  const T data = DateConverters::get_epoch_seconds_from_days(encoded_data);
260  dataMax = std::max(dataMax, data);
261  dataMin = std::min(dataMin, data);
262  }
263  return encoded_data;
264  }
265 }; // DateDaysEncoder
266 
267 #endif // DATE_DAYS_ENCODER_H
size_t getNumElemsForBytesEncodedData(const int8_t *index_data, const int start_idx, const size_t num_elements, const size_t byte_limit) override
size_t num_elems_
Definition: Encoder.h:289
void updateStats(const int8_t *const src_data, const size_t num_elements) override
void updateStats(const std::vector< ArrayDatum > *const src_data, const size_t start_idx, const size_t num_elements) override
std::shared_ptr< ChunkMetadata > appendData(int8_t *&src_data, const size_t num_elems_to_append, const SQLTypeInfo &ti, const bool replicating=false, const int64_t offset=-1) override
void updateStats(const int64_t val, const bool is_null) override
#define UNREACHABLE()
Definition: Logger.h:255
#define CHECK_GE(x, y)
Definition: Logger.h:224
bool has_nulls
Definition: ChunkMetadata.h:28
void resetChunkStats() override
virtual void getMetadata(const std::shared_ptr< ChunkMetadata > &chunkMetadata)
Definition: Encoder.cpp:227
void updateStats(const std::vector< std::string > *const src_data, const size_t start_idx, const size_t num_elements) override
void readMetadata(FILE *f) override
int64_t get_epoch_seconds_from_days(const int64_t days)
CONSTEXPR DEVICE bool is_null(const T &value)
Data_Namespace::AbstractBuffer * buffer_
Definition: Encoder.h:291
V encodeDataAndUpdateStats(const T &unencoded_data)
void writeMetadata(FILE *f) override
size_t getNumElems() const
Definition: Encoder.h:285
void updateStats(const double val, const bool is_null) override
void validate(T value)
Definition: Encoder.h:122
An AbstractBuffer is a unit of data management for a data manager.
virtual void write(int8_t *src, const size_t num_bytes, const size_t offset=0, const MemoryLevel src_buffer_type=CPU_LEVEL, const int src_device_id=-1)=0
DateDaysOverflowValidator date_days_overflow_validator_
Definition: Encoder.h:294
std::shared_ptr< ChunkMetadata > getMetadata(const SQLTypeInfo &ti) override
void getMetadata(const std::shared_ptr< ChunkMetadata > &chunkMetadata) override
std::shared_ptr< ChunkMetadata > appendEncodedData(const int8_t *, int8_t *data, const size_t start_idx, const size_t num_elements) override
virtual void append(int8_t *src, const size_t num_bytes, const MemoryLevel src_buffer_type=CPU_LEVEL, const int device_id=-1)=0
void reduceStats(const Encoder &that) override
void copyMetadata(const Encoder *copyFromEncoder) override
#define CHECK(condition)
Definition: Logger.h:211
bool resetChunkStats(const ChunkStats &stats) override
: Reset chunk level stats (min, max, nulls) using new values from the argument.
void updateStatsWithAlreadyEncoded(const V &encoded_data)
char * f
int64_t get_epoch_days_from_seconds(const int64_t seconds)
std::shared_ptr< ChunkMetadata > appendEncodedOrUnencodedData(int8_t *&src_data, const size_t num_elems_to_append, const SQLTypeInfo &ti, const bool replicating, const int64_t offset, const bool is_encoded)
DateDaysEncoder(Data_Namespace::AbstractBuffer *buffer)
std::shared_ptr< ChunkMetadata > appendEncodedDataAtIndices(const int8_t *, int8_t *data, const std::vector< size_t > &selected_idx) override