OmniSciDB  471d68cefb
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
NoneEncoder.h
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef NONE_ENCODER_H
18 #define NONE_ENCODER_H
19 
20 #include "AbstractBuffer.h"
21 #include "Encoder.h"
22 
23 #include <Shared/DatumFetchers.h>
24 
25 #include <tbb/parallel_for.h>
26 #include <tbb/parallel_reduce.h>
27 #include <tuple>
28 
29 template <typename T>
31  return std::is_integral<T>::value ? inline_int_null_value<T>()
32  : inline_fp_null_value<T>();
33 }
34 
35 template <typename T>
36 class NoneEncoder : public Encoder {
37  public:
40  }
41 
42  std::shared_ptr<ChunkMetadata> appendData(int8_t*& src_data,
43  const size_t num_elems_to_append,
44  const SQLTypeInfo&,
45  const bool replicating = false,
46  const int64_t offset = -1) override {
47  if (offset == 0 && num_elems_to_append >= num_elems_) {
49  }
50  T* unencodedData = reinterpret_cast<T*>(src_data);
51  std::vector<T> encoded_data;
52  if (replicating) {
53  encoded_data.resize(num_elems_to_append);
54  }
55  for (size_t i = 0; i < num_elems_to_append; ++i) {
56  size_t ri = replicating ? 0 : i;
57  T data = validateDataAndUpdateStats(unencodedData[ri]);
58  if (replicating) {
59  encoded_data[i] = data;
60  }
61  }
62  if (offset == -1) {
63  num_elems_ += num_elems_to_append;
64  buffer_->append(
65  replicating ? reinterpret_cast<int8_t*>(encoded_data.data()) : src_data,
66  num_elems_to_append * sizeof(T));
67  if (!replicating) {
68  src_data += num_elems_to_append * sizeof(T);
69  }
70  } else {
71  num_elems_ = offset + num_elems_to_append;
72  CHECK(!replicating);
73  CHECK_GE(offset, 0);
74  buffer_->write(
75  src_data, num_elems_to_append * sizeof(T), static_cast<size_t>(offset));
76  }
77  auto chunk_metadata = std::make_shared<ChunkMetadata>();
78  getMetadata(chunk_metadata);
79  return chunk_metadata;
80  }
81 
82  void getMetadata(const std::shared_ptr<ChunkMetadata>& chunkMetadata) override {
83  Encoder::getMetadata(chunkMetadata); // call on parent class
84  chunkMetadata->fillChunkStats(dataMin, dataMax, has_nulls);
85  }
86 
87  // Only called from the executor for synthesized meta-information.
88  std::shared_ptr<ChunkMetadata> getMetadata(const SQLTypeInfo& ti) override {
89  auto chunk_metadata = std::make_shared<ChunkMetadata>(ti, 0, 0, ChunkStats{});
90  chunk_metadata->fillChunkStats(dataMin, dataMax, has_nulls);
91  return chunk_metadata;
92  }
93 
94  // Only called from the executor for synthesized meta-information.
95  void updateStats(const int64_t val, const bool is_null) override {
96  if (is_null) {
97  has_nulls = true;
98  } else {
99  const auto data = static_cast<T>(val);
100  dataMin = std::min(dataMin, data);
101  dataMax = std::max(dataMax, data);
102  }
103  }
104 
105  // Only called from the executor for synthesized meta-information.
106  void updateStats(const double val, const bool is_null) override {
107  if (is_null) {
108  has_nulls = true;
109  } else {
110  const auto data = static_cast<T>(val);
111  dataMin = std::min(dataMin, data);
112  dataMax = std::max(dataMax, data);
113  }
114  }
115 
116  void updateStats(const int8_t* const src_data, const size_t num_elements) override {
117  const T* unencoded_data = reinterpret_cast<const T*>(src_data);
118  for (size_t i = 0; i < num_elements; ++i) {
119  validateDataAndUpdateStats(unencoded_data[i]);
120  }
121  }
122 
123  void updateStatsEncoded(const int8_t* const dst_data,
124  const size_t num_elements) override {
125  const T* data = reinterpret_cast<const T*>(dst_data);
126 
128  tbb::blocked_range(size_t(0), num_elements),
129  std::tuple(dataMin, dataMax, has_nulls),
130  [&](const auto& range, auto init) {
131  auto [min, max, nulls] = init;
132  for (size_t i = range.begin(); i < range.end(); i++) {
133  if (data[i] != none_encoded_null_value<T>()) {
135  min = std::min(min, data[i]);
136  max = std::max(max, data[i]);
137  } else {
138  nulls = true;
139  }
140  }
141  return std::tuple(min, max, nulls);
142  },
143  [&](auto lhs, auto rhs) {
144  const auto [lhs_min, lhs_max, lhs_nulls] = lhs;
145  const auto [rhs_min, rhs_max, rhs_nulls] = rhs;
146  return std::tuple(std::min(lhs_min, rhs_min),
147  std::max(lhs_max, rhs_max),
148  lhs_nulls || rhs_nulls);
149  });
150  }
151 
152  void updateStats(const std::vector<std::string>* const src_data,
153  const size_t start_idx,
154  const size_t num_elements) override {
155  UNREACHABLE();
156  }
157 
158  void updateStats(const std::vector<ArrayDatum>* const src_data,
159  const size_t start_idx,
160  const size_t num_elements) override {
161  UNREACHABLE();
162  }
163 
164  // Only called from the executor for synthesized meta-information.
165  void reduceStats(const Encoder& that) override {
166  const auto that_typed = static_cast<const NoneEncoder&>(that);
167  if (that_typed.has_nulls) {
168  has_nulls = true;
169  }
170  dataMin = std::min(dataMin, that_typed.dataMin);
171  dataMax = std::max(dataMax, that_typed.dataMax);
172  }
173 
174  void writeMetadata(FILE* f) override {
175  // assumes pointer is already in right place
176  fwrite((int8_t*)&num_elems_, sizeof(size_t), 1, f);
177  fwrite((int8_t*)&dataMin, sizeof(T), 1, f);
178  fwrite((int8_t*)&dataMax, sizeof(T), 1, f);
179  fwrite((int8_t*)&has_nulls, sizeof(bool), 1, f);
180  }
181 
182  void readMetadata(FILE* f) override {
183  // assumes pointer is already in right place
184  fread((int8_t*)&num_elems_, sizeof(size_t), 1, f);
185  fread((int8_t*)&dataMin, sizeof(T), 1, f);
186  fread((int8_t*)&dataMax, sizeof(T), 1, f);
187  fread((int8_t*)&has_nulls, sizeof(bool), 1, f);
188  }
189 
190  bool resetChunkStats(const ChunkStats& stats) override {
191  const auto new_min = DatumFetcher::getDatumVal<T>(stats.min);
192  const auto new_max = DatumFetcher::getDatumVal<T>(stats.max);
193 
194  if (dataMin == new_min && dataMax == new_max && has_nulls == stats.has_nulls) {
195  return false;
196  }
197 
198  dataMin = new_min;
199  dataMax = new_max;
200  has_nulls = stats.has_nulls;
201  return true;
202  }
203 
204  void copyMetadata(const Encoder* copyFromEncoder) override {
205  num_elems_ = copyFromEncoder->getNumElems();
206  auto castedEncoder = reinterpret_cast<const NoneEncoder<T>*>(copyFromEncoder);
207  dataMin = castedEncoder->dataMin;
208  dataMax = castedEncoder->dataMax;
209  has_nulls = castedEncoder->has_nulls;
210  }
211 
212  void resetChunkStats() override {
213  dataMin = std::numeric_limits<T>::max();
214  dataMax = std::numeric_limits<T>::lowest();
215  has_nulls = false;
216  }
217 
220  bool has_nulls;
221 
222  private:
223  T validateDataAndUpdateStats(const T& unencoded_data) {
224  if (unencoded_data == none_encoded_null_value<T>()) {
225  has_nulls = true;
226  } else {
227  decimal_overflow_validator_.validate(unencoded_data);
228  dataMin = std::min(dataMin, unencoded_data);
229  dataMax = std::max(dataMax, unencoded_data);
230  }
231  return unencoded_data;
232  }
233 }; // class NoneEncoder
234 
235 #endif // NONE_ENCODER_H
void updateStats(const int8_t *const src_data, const size_t num_elements) override
Definition: NoneEncoder.h:116
size_t num_elems_
Definition: Encoder.h:237
void writeMetadata(FILE *f) override
Definition: NoneEncoder.h:174
DecimalOverflowValidator decimal_overflow_validator_
Definition: Encoder.h:241
#define UNREACHABLE()
Definition: Logger.h:253
#define CHECK_GE(x, y)
Definition: Logger.h:222
bool has_nulls
Definition: ChunkMetadata.h:28
void updateStats(const int64_t val, const bool is_null) override
Definition: NoneEncoder.h:95
virtual void getMetadata(const std::shared_ptr< ChunkMetadata > &chunkMetadata)
Definition: Encoder.cpp:227
void resetChunkStats() override
Definition: NoneEncoder.h:212
void updateStats(const std::vector< std::string > *const src_data, const size_t start_idx, const size_t num_elements) override
Definition: NoneEncoder.h:152
CONSTEXPR DEVICE bool is_null(const T &value)
Data_Namespace::AbstractBuffer * buffer_
Definition: Encoder.h:239
std::shared_ptr< ChunkMetadata > appendData(int8_t *&src_data, const size_t num_elems_to_append, const SQLTypeInfo &, const bool replicating=false, const int64_t offset=-1) override
Definition: NoneEncoder.h:42
void init(LogOptions const &log_opts)
Definition: Logger.cpp:290
size_t getNumElems() const
Definition: Encoder.h:233
An AbstractBuffer is a unit of data management for a data manager.
void getMetadata(const std::shared_ptr< ChunkMetadata > &chunkMetadata) override
Definition: NoneEncoder.h:82
virtual void write(int8_t *src, const size_t num_bytes, const size_t offset=0, const MemoryLevel src_buffer_type=CPU_LEVEL, const int src_device_id=-1)=0
Value parallel_reduce(const blocked_range< Int > &range, const Value &identity, const RealBody &real_body, const Reduction &reduction, const Partitioner &p=Partitioner())
Parallel iteration with reduction.
T none_encoded_null_value()
Definition: NoneEncoder.h:30
bool resetChunkStats(const ChunkStats &stats) override
: Reset chunk level stats (min, max, nulls) using new values from the argument.
Definition: NoneEncoder.h:190
bool has_nulls
Definition: NoneEncoder.h:220
void updateStats(const double val, const bool is_null) override
Definition: NoneEncoder.h:106
void updateStats(const std::vector< ArrayDatum > *const src_data, const size_t start_idx, const size_t num_elements) override
Definition: NoneEncoder.h:158
std::shared_ptr< ChunkMetadata > getMetadata(const SQLTypeInfo &ti) override
Definition: NoneEncoder.h:88
virtual void append(int8_t *src, const size_t num_bytes, const MemoryLevel src_buffer_type=CPU_LEVEL, const int device_id=-1)=0
#define CHECK(condition)
Definition: Logger.h:209
NoneEncoder(Data_Namespace::AbstractBuffer *buffer)
Definition: NoneEncoder.h:38
char * f
void updateStatsEncoded(const int8_t *const dst_data, const size_t num_elements) override
Definition: NoneEncoder.h:123
void reduceStats(const Encoder &that) override
Definition: NoneEncoder.h:165
void copyMetadata(const Encoder *copyFromEncoder) override
Definition: NoneEncoder.h:204
T validateDataAndUpdateStats(const T &unencoded_data)
Definition: NoneEncoder.h:223
void validate(T value) const
Definition: Encoder.h:54
void readMetadata(FILE *f) override
Definition: NoneEncoder.h:182