OmniSciDB  8fa3bf436f
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
FixedLengthEncoder.h
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef FIXED_LENGTH_ENCODER_H
18 #define FIXED_LENGTH_ENCODER_H
19 #include "Logger/Logger.h"
20 
21 #include <iostream>
22 #include <memory>
23 #include <stdexcept>
24 #include "AbstractBuffer.h"
25 #include "Encoder.h"
26 
27 #include <Shared/DatumFetchers.h>
28 #include <tbb/parallel_for.h>
29 #include <tbb/parallel_reduce.h>
30 #include <tuple>
31 
32 template <typename T, typename V>
33 class FixedLengthEncoder : public Encoder {
34  public:
37  }
38 
39  std::shared_ptr<ChunkMetadata> appendData(int8_t*& src_data,
40  const size_t num_elems_to_append,
41  const SQLTypeInfo& ti,
42  const bool replicating = false,
43  const int64_t offset = -1) override {
44  if (offset == 0 &&
45  num_elems_to_append >=
46  num_elems_) { // we're rewriting entire buffer so fully recompute metadata
48  }
49 
50  T* unencoded_data = reinterpret_cast<T*>(src_data);
51  auto encoded_data = std::make_unique<V[]>(num_elems_to_append);
52  for (size_t i = 0; i < num_elems_to_append; ++i) {
53  size_t ri = replicating ? 0 : i;
54  encoded_data.get()[i] = encodeDataAndUpdateStats(unencoded_data[ri]);
55  }
56 
57  // assume always CPU_BUFFER?
58  if (offset == -1) {
59  num_elems_ += num_elems_to_append;
60  buffer_->append(reinterpret_cast<int8_t*>(encoded_data.get()),
61  num_elems_to_append * sizeof(V));
62  if (!replicating) {
63  src_data += num_elems_to_append * sizeof(T);
64  }
65  } else {
66  num_elems_ = offset + num_elems_to_append;
67  CHECK(!replicating);
68  CHECK_GE(offset, 0);
69  buffer_->write(reinterpret_cast<int8_t*>(encoded_data.get()),
70  num_elems_to_append * sizeof(V),
71  static_cast<size_t>(offset));
72  }
73  auto chunk_metadata = std::make_shared<ChunkMetadata>();
74  getMetadata(chunk_metadata);
75  return chunk_metadata;
76  }
77 
78  void getMetadata(const std::shared_ptr<ChunkMetadata>& chunkMetadata) override {
79  Encoder::getMetadata(chunkMetadata); // call on parent class
80  chunkMetadata->fillChunkStats(dataMin, dataMax, has_nulls);
81  }
82 
83  // Only called from the executor for synthesized meta-information.
84  std::shared_ptr<ChunkMetadata> getMetadata(const SQLTypeInfo& ti) override {
85  auto chunk_metadata = std::make_shared<ChunkMetadata>(ti, 0, 0, ChunkStats{});
86  chunk_metadata->fillChunkStats(dataMin, dataMax, has_nulls);
87  return chunk_metadata;
88  }
89 
90  // Only called from the executor for synthesized meta-information.
91  void updateStats(const int64_t val, const bool is_null) override {
92  if (is_null) {
93  has_nulls = true;
94  } else {
95  const auto data = static_cast<T>(val);
96  dataMin = std::min(dataMin, data);
97  dataMax = std::max(dataMax, data);
98  }
99  }
100 
101  // Only called from the executor for synthesized meta-information.
102  void updateStats(const double val, const bool is_null) override {
103  if (is_null) {
104  has_nulls = true;
105  } else {
106  const auto data = static_cast<T>(val);
107  dataMin = std::min(dataMin, data);
108  dataMax = std::max(dataMax, data);
109  }
110  }
111 
112  void updateStats(const int8_t* const src_data, const size_t num_elements) override {
113  const T* unencoded_data = reinterpret_cast<const T*>(src_data);
114  for (size_t i = 0; i < num_elements; ++i) {
115  encodeDataAndUpdateStats(unencoded_data[i]);
116  }
117  }
118 
119  void updateStatsEncoded(const int8_t* const dst_data,
120  const size_t num_elements) override {
121  const V* data = reinterpret_cast<const V*>(dst_data);
122 
123  std::tie(dataMin, dataMax, has_nulls) = tbb::parallel_reduce(
124  tbb::blocked_range(size_t(0), num_elements),
125  std::tuple(static_cast<V>(dataMin), static_cast<V>(dataMax), has_nulls),
126  [&](const auto& range, auto init) {
127  auto [min, max, nulls] = init;
128  for (size_t i = range.begin(); i < range.end(); i++) {
129  if (data[i] != std::numeric_limits<V>::min()) {
131  min = std::min(min, data[i]);
132  max = std::max(max, data[i]);
133  } else {
134  nulls = true;
135  }
136  }
137  return std::tuple(min, max, nulls);
138  },
139  [&](auto lhs, auto rhs) {
140  const auto [lhs_min, lhs_max, lhs_nulls] = lhs;
141  const auto [rhs_min, rhs_max, rhs_nulls] = rhs;
142  return std::tuple(std::min(lhs_min, rhs_min),
143  std::max(lhs_max, rhs_max),
144  lhs_nulls || rhs_nulls);
145  });
146  }
147 
148  void updateStats(const std::vector<std::string>* const src_data,
149  const size_t start_idx,
150  const size_t num_elements) override {
151  UNREACHABLE();
152  }
153 
154  void updateStats(const std::vector<ArrayDatum>* const src_data,
155  const size_t start_idx,
156  const size_t num_elements) override {
157  UNREACHABLE();
158  }
159 
160  // Only called from the executor for synthesized meta-information.
161  void reduceStats(const Encoder& that) override {
162  const auto that_typed = static_cast<const FixedLengthEncoder<T, V>&>(that);
163  if (that_typed.has_nulls) {
164  has_nulls = true;
165  }
166  dataMin = std::min(dataMin, that_typed.dataMin);
167  dataMax = std::max(dataMax, that_typed.dataMax);
168  }
169 
170  void copyMetadata(const Encoder* copyFromEncoder) override {
171  num_elems_ = copyFromEncoder->getNumElems();
172  auto castedEncoder =
173  reinterpret_cast<const FixedLengthEncoder<T, V>*>(copyFromEncoder);
174  dataMin = castedEncoder->dataMin;
175  dataMax = castedEncoder->dataMax;
176  has_nulls = castedEncoder->has_nulls;
177  }
178 
179  void writeMetadata(FILE* f) override {
180  // assumes pointer is already in right place
181  fwrite((int8_t*)&num_elems_, sizeof(size_t), 1, f);
182  fwrite((int8_t*)&dataMin, sizeof(T), 1, f);
183  fwrite((int8_t*)&dataMax, sizeof(T), 1, f);
184  fwrite((int8_t*)&has_nulls, sizeof(bool), 1, f);
185  }
186 
187  void readMetadata(FILE* f) override {
188  // assumes pointer is already in right place
189  fread((int8_t*)&num_elems_, sizeof(size_t), 1, f);
190  fread((int8_t*)&dataMin, 1, sizeof(T), f);
191  fread((int8_t*)&dataMax, 1, sizeof(T), f);
192  fread((int8_t*)&has_nulls, 1, sizeof(bool), f);
193  }
194 
195  bool resetChunkStats(const ChunkStats& stats) override {
196  const auto new_min = DatumFetcher::getDatumVal<T>(stats.min);
197  const auto new_max = DatumFetcher::getDatumVal<T>(stats.max);
198 
199  if (dataMin == new_min && dataMax == new_max && has_nulls == stats.has_nulls) {
200  return false;
201  }
202 
203  dataMin = new_min;
204  dataMax = new_max;
205  has_nulls = stats.has_nulls;
206  return true;
207  }
208 
209  void resetChunkStats() override {
210  dataMin = std::numeric_limits<T>::max();
211  dataMax = std::numeric_limits<T>::lowest();
212  has_nulls = false;
213  }
214 
217  bool has_nulls;
218 
219  private:
220  V encodeDataAndUpdateStats(const T& unencoded_data) {
221  V encoded_data = static_cast<V>(unencoded_data);
222  if (unencoded_data != encoded_data) {
223  decimal_overflow_validator_.validate(unencoded_data);
224  LOG(ERROR) << "Fixed encoding failed, Unencoded: " +
225  std::to_string(unencoded_data) +
226  " encoded: " + std::to_string(encoded_data);
227  } else {
228  T data = unencoded_data;
229  if (data == std::numeric_limits<V>::min()) {
230  has_nulls = true;
231  } else {
233  dataMin = std::min(dataMin, data);
234  dataMax = std::max(dataMax, data);
235  }
236  }
237  return encoded_data;
238  }
239 }; // FixedLengthEncoder
240 
241 #endif // FIXED_LENGTH_ENCODER_H
void updateStats(const int8_t *const src_data, const size_t num_elements) override
size_t num_elems_
Definition: Encoder.h:237
std::shared_ptr< ChunkMetadata > appendData(int8_t *&src_data, const size_t num_elems_to_append, const SQLTypeInfo &ti, const bool replicating=false, const int64_t offset=-1) override
DecimalOverflowValidator decimal_overflow_validator_
Definition: Encoder.h:241
#define LOG(tag)
Definition: Logger.h:194
#define UNREACHABLE()
Definition: Logger.h:247
#define CHECK_GE(x, y)
Definition: Logger.h:216
bool has_nulls
Definition: ChunkMetadata.h:28
void resetChunkStats() override
void updateStats(const int64_t val, const bool is_null) override
void updateStats(const std::vector< std::string > *const src_data, const size_t start_idx, const size_t num_elements) override
std::string to_string(char const *&&v)
virtual void getMetadata(const std::shared_ptr< ChunkMetadata > &chunkMetadata)
Definition: Encoder.cpp:227
CONSTEXPR DEVICE bool is_null(const T &value)
Data_Namespace::AbstractBuffer * buffer_
Definition: Encoder.h:239
void init(LogOptions const &log_opts)
Definition: Logger.cpp:280
void copyMetadata(const Encoder *copyFromEncoder) override
void readMetadata(FILE *f) override
size_t getNumElems() const
Definition: Encoder.h:233
void updateStats(const double val, const bool is_null) override
V encodeDataAndUpdateStats(const T &unencoded_data)
An AbstractBuffer is a unit of data management for a data manager.
void getMetadata(const std::shared_ptr< ChunkMetadata > &chunkMetadata) override
virtual void write(int8_t *src, const size_t num_bytes, const size_t offset=0, const MemoryLevel src_buffer_type=CPU_LEVEL, const int src_device_id=-1)=0
void updateStats(const std::vector< ArrayDatum > *const src_data, const size_t start_idx, const size_t num_elements) override
bool resetChunkStats(const ChunkStats &stats) override
: Reset chunk level stats (min, max, nulls) using new values from the argument.
void validate(T value)
Definition: Encoder.h:54
void updateStatsEncoded(const int8_t *const dst_data, const size_t num_elements) override
FixedLengthEncoder(Data_Namespace::AbstractBuffer *buffer)
void writeMetadata(FILE *f) override
virtual void append(int8_t *src, const size_t num_bytes, const MemoryLevel src_buffer_type=CPU_LEVEL, const int device_id=-1)=0
#define CHECK(condition)
Definition: Logger.h:203
char * f
void reduceStats(const Encoder &that) override
std::shared_ptr< ChunkMetadata > getMetadata(const SQLTypeInfo &ti) override