OmniSciDB  04ee39c94c
FixedLengthEncoder.h
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef FIXED_LENGTH_ENCODER_H
18 #define FIXED_LENGTH_ENCODER_H
19 #include "Shared/Logger.h"
20 
21 #include <iostream>
22 #include <memory>
23 #include <stdexcept>
24 #include "AbstractBuffer.h"
25 #include "Encoder.h"
26 
27 #include <Shared/DatumFetchers.h>
28 
29 template <typename T, typename V>
30 class FixedLengthEncoder : public Encoder {
31  public:
33  : Encoder(buffer)
34  , dataMin(std::numeric_limits<T>::max())
35  , dataMax(std::numeric_limits<T>::min())
36  , has_nulls(false) {}
37 
38  ChunkMetadata appendData(int8_t*& srcData,
39  const size_t numAppendElems,
40  const SQLTypeInfo& ti,
41  const bool replicating = false) override {
42  T* unencodedData = reinterpret_cast<T*>(srcData);
43  auto encodedData = std::make_unique<V[]>(numAppendElems);
44  for (size_t i = 0; i < numAppendElems; ++i) {
45  size_t ri = replicating ? 0 : i;
46  encodedData.get()[i] = static_cast<V>(unencodedData[ri]);
47  if (unencodedData[ri] != encodedData.get()[i]) {
48  decimal_overflow_validator_.validate(unencodedData[ri]);
49  LOG(ERROR) << "Fixed encoding failed, Unencoded: " +
50  std::to_string(unencodedData[ri]) +
51  " encoded: " + std::to_string(encodedData.get()[i]);
52  } else {
53  T data = unencodedData[ri];
54  if (data == std::numeric_limits<V>::min()) {
55  has_nulls = true;
56  } else {
58  dataMin = std::min(dataMin, data);
59  dataMax = std::max(dataMax, data);
60  }
61  }
62  }
63  num_elems_ += numAppendElems;
64 
65  // assume always CPU_BUFFER?
66  buffer_->append((int8_t*)(encodedData.get()), numAppendElems * sizeof(V));
67  ChunkMetadata chunkMetadata;
68  getMetadata(chunkMetadata);
69  if (!replicating) {
70  srcData += numAppendElems * sizeof(T);
71  }
72  return chunkMetadata;
73  }
74 
75  void getMetadata(ChunkMetadata& chunkMetadata) override {
76  Encoder::getMetadata(chunkMetadata); // call on parent class
77  chunkMetadata.fillChunkStats(dataMin, dataMax, has_nulls);
78  }
79 
80  // Only called from the executor for synthesized meta-information.
81  ChunkMetadata getMetadata(const SQLTypeInfo& ti) override {
82  ChunkMetadata chunk_metadata{ti, 0, 0, ChunkStats{}};
83  chunk_metadata.fillChunkStats(dataMin, dataMax, has_nulls);
84  return chunk_metadata;
85  }
86 
87  // Only called from the executor for synthesized meta-information.
88  void updateStats(const int64_t val, const bool is_null) override {
89  if (is_null) {
90  has_nulls = true;
91  } else {
92  const auto data = static_cast<T>(val);
93  dataMin = std::min(dataMin, data);
94  dataMax = std::max(dataMax, data);
95  }
96  }
97 
98  // Only called from the executor for synthesized meta-information.
99  void updateStats(const double val, const bool is_null) override {
100  if (is_null) {
101  has_nulls = true;
102  } else {
103  const auto data = static_cast<T>(val);
104  dataMin = std::min(dataMin, data);
105  dataMax = std::max(dataMax, data);
106  }
107  }
108 
109  // Only called from the executor for synthesized meta-information.
110  void reduceStats(const Encoder& that) override {
111  const auto that_typed = static_cast<const FixedLengthEncoder<T, V>&>(that);
112  if (that_typed.has_nulls) {
113  has_nulls = true;
114  }
115  dataMin = std::min(dataMin, that_typed.dataMin);
116  dataMax = std::max(dataMax, that_typed.dataMax);
117  }
118 
119  void copyMetadata(const Encoder* copyFromEncoder) override {
120  num_elems_ = copyFromEncoder->getNumElems();
121  auto castedEncoder =
122  reinterpret_cast<const FixedLengthEncoder<T, V>*>(copyFromEncoder);
123  dataMin = castedEncoder->dataMin;
124  dataMax = castedEncoder->dataMax;
125  has_nulls = castedEncoder->has_nulls;
126  }
127 
128  void writeMetadata(FILE* f) override {
129  // assumes pointer is already in right place
130  fwrite((int8_t*)&num_elems_, sizeof(size_t), 1, f);
131  fwrite((int8_t*)&dataMin, sizeof(T), 1, f);
132  fwrite((int8_t*)&dataMax, sizeof(T), 1, f);
133  fwrite((int8_t*)&has_nulls, sizeof(bool), 1, f);
134  }
135 
136  void readMetadata(FILE* f) override {
137  // assumes pointer is already in right place
138  fread((int8_t*)&num_elems_, sizeof(size_t), 1, f);
139  fread((int8_t*)&dataMin, 1, sizeof(T), f);
140  fread((int8_t*)&dataMax, 1, sizeof(T), f);
141  fread((int8_t*)&has_nulls, 1, sizeof(bool), f);
142  }
143 
144  bool resetChunkStats(const ChunkStats& stats) override {
145  const auto new_min = DatumFetcher::getDatumVal<T>(stats.min);
146  const auto new_max = DatumFetcher::getDatumVal<T>(stats.max);
147 
148  if (dataMin == new_min && dataMax == new_max && has_nulls == stats.has_nulls) {
149  return false;
150  }
151 
152  dataMin = new_min;
153  dataMax = new_max;
154  has_nulls = stats.has_nulls;
155  return true;
156  }
157 
160  bool has_nulls;
161 
162 }; // FixedLengthEncoder
163 
164 #endif // FIXED_LENGTH_ENCODER_H
size_t num_elems_
Definition: Encoder.h:179
ChunkMetadata appendData(int8_t *&srcData, const size_t numAppendElems, const SQLTypeInfo &ti, const bool replicating=false) override
DecimalOverflowValidator decimal_overflow_validator_
Definition: Encoder.h:184
#define LOG(tag)
Definition: Logger.h:182
void fillChunkStats(const T min, const T max, const bool has_nulls)
Definition: ChunkMetadata.h:38
bool has_nulls
Definition: ChunkMetadata.h:28
void updateStats(const int64_t val, const bool is_null) override
std::string to_string(char const *&&v)
Data_Namespace::AbstractBuffer * buffer_
Definition: Encoder.h:181
void copyMetadata(const Encoder *copyFromEncoder) override
void readMetadata(FILE *f) override
virtual void getMetadata(ChunkMetadata &chunkMetadata)
Definition: Encoder.cpp:227
virtual void append(int8_t *src, const size_t numBytes, const MemoryLevel srcBufferType=CPU_LEVEL, const int deviceId=-1)=0
void getMetadata(ChunkMetadata &chunkMetadata) override
void updateStats(const double val, const bool is_null) override
An AbstractBuffer is a unit of data management for a data manager.
bool resetChunkStats(const ChunkStats &stats) override
: Reset chunk level stats (min, max, nulls) using new values from the argument.
void validate(T value)
Definition: Encoder.h:54
ChunkMetadata getMetadata(const SQLTypeInfo &ti) override
bool is_null(const T &v, const SQLTypeInfo &t)
FixedLengthEncoder(Data_Namespace::AbstractBuffer *buffer)
void writeMetadata(FILE *f) override
size_t getNumElems() const
Definition: Encoder.h:175
void reduceStats(const Encoder &that) override