OmniSciDB  c07336695a
FixedLengthArrayNoneEncoder.h
Go to the documentation of this file.
1 /*
2  * Copyright 2018 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
24 #ifndef FIXED_LENGTH_ARRAY_NONE_ENCODER_H
25 #define FIXED_LENGTH_ARRAY_NONE_ENCODER_H
26 
27 #include "Shared/Logger.h"
28 
29 #include <cassert>
30 #include <cstring>
31 #include <memory>
32 #include <mutex>
33 #include <string>
34 #include <vector>
35 #include "AbstractBuffer.h"
36 #include "ChunkMetadata.h"
37 #include "Encoder.h"
38 
40 
42  public:
44  : Encoder(buffer), has_nulls(false), initialized(false), array_size(as) {}
45 
46  size_t getNumElemsForBytesInsertData(const std::vector<ArrayDatum>* srcData,
47  const int start_idx,
48  const size_t numAppendElems,
49  const size_t byteLimit,
50  const bool replicating = false) {
51  size_t dataSize = numAppendElems * array_size;
52  if (dataSize > byteLimit) {
53  dataSize = byteLimit;
54  }
55  return dataSize / array_size;
56  }
57 
58  ChunkMetadata appendData(int8_t*& srcData,
59  const size_t numAppendElems,
60  const SQLTypeInfo&,
61  const bool replicating = false) override {
62  CHECK(false); // should never be called for arrays
63  return ChunkMetadata{};
64  }
65 
66  ChunkMetadata appendData(const std::vector<ArrayDatum>* srcData,
67  const int start_idx,
68  const size_t numAppendElems,
69  const bool replicating = false) {
70  size_t data_size = array_size * numAppendElems;
71  buffer_->reserve(data_size);
72 
73  for (size_t i = start_idx; i < start_idx + numAppendElems; i++) {
74  size_t len = (*srcData)[replicating ? 0 : i].length;
75  // Length of the appended array should be equal to the fixed length,
76  // all others should have been discarded, assert if something slips through
77  CHECK_EQ(len, array_size);
78  // NULL arrays have been filled with subtype's NULL sentinels,
79  // should be appended as regular data, same size
80  buffer_->append((*srcData)[replicating ? 0 : i].pointer, len);
81 
82  // keep Chunk statistics with array elements
83  update_elem_stats((*srcData)[replicating ? 0 : i]);
84  }
85  // make sure buffer_ is flushed even if no new data is appended to it
86  // (e.g. empty strings) because the metadata needs to be flushed.
87  if (!buffer_->isDirty()) {
88  buffer_->setDirty();
89  }
90 
91  num_elems_ += numAppendElems;
92  ChunkMetadata chunkMetadata;
93  getMetadata(chunkMetadata);
94  return chunkMetadata;
95  }
96 
97  void getMetadata(ChunkMetadata& chunkMetadata) override {
98  Encoder::getMetadata(chunkMetadata); // call on parent class
99  chunkMetadata.fillChunkStats(elem_min, elem_max, has_nulls);
100  }
101 
102  // Only called from the executor for synthesized meta-information.
103  ChunkMetadata getMetadata(const SQLTypeInfo& ti) override {
104  ChunkMetadata chunk_metadata{ti, 0, 0, ChunkStats{elem_min, elem_max, has_nulls}};
105  return chunk_metadata;
106  }
107 
108  void updateStats(const int64_t, const bool) override { CHECK(false); }
109 
110  void updateStats(const double, const bool) override { CHECK(false); }
111 
112  void reduceStats(const Encoder&) override { CHECK(false); }
113 
114  void writeMetadata(FILE* f) override {
115  // assumes pointer is already in right place
116  fwrite((int8_t*)&num_elems_, sizeof(size_t), 1, f);
117  fwrite((int8_t*)&elem_min, sizeof(Datum), 1, f);
118  fwrite((int8_t*)&elem_max, sizeof(Datum), 1, f);
119  fwrite((int8_t*)&has_nulls, sizeof(bool), 1, f);
120  fwrite((int8_t*)&initialized, sizeof(bool), 1, f);
121  }
122 
123  void readMetadata(FILE* f) override {
124  // assumes pointer is already in right place
125  fread((int8_t*)&num_elems_, sizeof(size_t), 1, f);
126  fread((int8_t*)&elem_min, sizeof(Datum), 1, f);
127  fread((int8_t*)&elem_max, sizeof(Datum), 1, f);
128  fread((int8_t*)&has_nulls, sizeof(bool), 1, f);
129  fread((int8_t*)&initialized, sizeof(bool), 1, f);
130  }
131 
132  void copyMetadata(const Encoder* copyFromEncoder) override {
133  num_elems_ = copyFromEncoder->getNumElems();
134  auto array_encoder =
135  dynamic_cast<const FixedLengthArrayNoneEncoder*>(copyFromEncoder);
136  elem_min = array_encoder->elem_min;
137  elem_max = array_encoder->elem_max;
138  has_nulls = array_encoder->has_nulls;
139  initialized = array_encoder->initialized;
140  }
141 
142  void updateMetadata(int8_t* array) {
144  }
145 
148  bool has_nulls;
150 
151  private:
152  std::mutex EncoderMutex_;
153  size_t array_size;
154 
155  bool is_null(int8_t* array) {
156  if (buffer_->sqlType.get_notnull()) {
157  return false;
158  }
159  switch (buffer_->sqlType.get_subtype()) {
160  case kBOOLEAN: {
161  const bool* bool_array = (bool*)array;
162  return ((int8_t)bool_array[0] == NULL_ARRAY_BOOLEAN);
163  }
164  case kINT: {
165  const int32_t* int_array = (int32_t*)array;
166  return (int_array[0] == NULL_ARRAY_INT);
167  }
168  case kSMALLINT: {
169  const int16_t* smallint_array = (int16_t*)array;
170  return (smallint_array[0] == NULL_ARRAY_SMALLINT);
171  }
172  case kTINYINT: {
173  const int8_t* tinyint_array = (int8_t*)array;
174  return (tinyint_array[0] == NULL_ARRAY_TINYINT);
175  }
176  case kBIGINT:
177  case kNUMERIC:
178  case kDECIMAL: {
179  const int64_t* bigint_array = (int64_t*)array;
180  return (bigint_array[0] == NULL_ARRAY_BIGINT);
181  }
182  case kFLOAT: {
183  const float* flt_array = (float*)array;
184  return (flt_array[0] == NULL_ARRAY_FLOAT);
185  }
186  case kDOUBLE: {
187  const double* dbl_array = (double*)array;
188  return (dbl_array[0] == NULL_ARRAY_DOUBLE);
189  }
190  case kTIME:
191  case kTIMESTAMP:
192  case kDATE: {
193  const int64_t* tm_array = reinterpret_cast<int64_t*>(array);
194  return (tm_array[0] == NULL_ARRAY_BIGINT);
195  }
196  case kCHAR:
197  case kVARCHAR:
198  case kTEXT: {
200  const int32_t* int_array = (int32_t*)array;
201  return (int_array[0] == NULL_ARRAY_INT);
202  }
203  default:
204  assert(false);
205  }
206  return false;
207  }
208 
209  void update_elem_stats(const ArrayDatum& array) {
210  if (array.is_null) {
211  has_nulls = true;
212  }
213  switch (buffer_->sqlType.get_subtype()) {
214  case kBOOLEAN: {
215  if (!initialized) {
216  elem_min.boolval = true;
217  elem_max.boolval = false;
218  }
219  if (array.is_null) {
220  break;
221  }
222  const bool* bool_array = (bool*)array.pointer;
223  for (size_t i = 0; i < array.length / sizeof(bool); i++) {
224  if ((int8_t)bool_array[i] == NULL_BOOLEAN) {
225  has_nulls = true;
226  } else if (initialized) {
227  elem_min.boolval = std::min(elem_min.boolval, bool_array[i]);
228  elem_max.boolval = std::max(elem_max.boolval, bool_array[i]);
229  } else {
230  elem_min.boolval = bool_array[i];
231  elem_max.boolval = bool_array[i];
232  initialized = true;
233  }
234  }
235  break;
236  }
237  case kINT: {
238  if (!initialized) {
239  elem_min.intval = 1;
240  elem_max.intval = 0;
241  }
242  if (array.is_null) {
243  break;
244  }
245  const int32_t* int_array = (int32_t*)array.pointer;
246  for (size_t i = 0; i < array.length / sizeof(int32_t); i++) {
247  if (int_array[i] == NULL_INT) {
248  has_nulls = true;
249  } else if (initialized) {
250  elem_min.intval = std::min(elem_min.intval, int_array[i]);
251  elem_max.intval = std::max(elem_max.intval, int_array[i]);
252  } else {
253  elem_min.intval = int_array[i];
254  elem_max.intval = int_array[i];
255  initialized = true;
256  }
257  }
258  break;
259  }
260  case kSMALLINT: {
261  if (!initialized) {
262  elem_min.smallintval = 1;
263  elem_max.smallintval = 0;
264  }
265  if (array.is_null) {
266  break;
267  }
268  const int16_t* smallint_array = (int16_t*)array.pointer;
269  for (size_t i = 0; i < array.length / sizeof(int16_t); i++) {
270  if (smallint_array[i] == NULL_SMALLINT) {
271  has_nulls = true;
272  } else if (initialized) {
273  elem_min.smallintval = std::min(elem_min.smallintval, smallint_array[i]);
274  elem_max.smallintval = std::max(elem_max.smallintval, smallint_array[i]);
275  } else {
276  elem_min.smallintval = smallint_array[i];
277  elem_max.smallintval = smallint_array[i];
278  initialized = true;
279  }
280  }
281  break;
282  }
283  case kTINYINT: {
284  if (!initialized) {
285  elem_min.tinyintval = 1;
286  elem_max.tinyintval = 0;
287  }
288  if (array.is_null) {
289  break;
290  }
291  const int8_t* tinyint_array = (int8_t*)array.pointer;
292  for (size_t i = 0; i < array.length / sizeof(int8_t); i++) {
293  if (tinyint_array[i] == NULL_TINYINT) {
294  has_nulls = true;
295  } else if (initialized) {
296  elem_min.tinyintval = std::min(elem_min.tinyintval, tinyint_array[i]);
297  elem_max.tinyintval = std::max(elem_max.tinyintval, tinyint_array[i]);
298  } else {
299  elem_min.tinyintval = tinyint_array[i];
300  elem_max.tinyintval = tinyint_array[i];
301  initialized = true;
302  }
303  }
304  break;
305  }
306  case kBIGINT:
307  case kNUMERIC:
308  case kDECIMAL: {
309  if (!initialized) {
310  elem_min.bigintval = 1;
311  elem_max.bigintval = 0;
312  }
313  if (array.is_null) {
314  break;
315  }
316  const int64_t* bigint_array = (int64_t*)array.pointer;
317  for (size_t i = 0; i < array.length / sizeof(int64_t); i++) {
318  if (bigint_array[i] == NULL_BIGINT) {
319  has_nulls = true;
320  } else if (initialized) {
321  decimal_overflow_validator_.validate(bigint_array[i]);
322  elem_min.bigintval = std::min(elem_min.bigintval, bigint_array[i]);
323  elem_max.bigintval = std::max(elem_max.bigintval, bigint_array[i]);
324  } else {
325  decimal_overflow_validator_.validate(bigint_array[i]);
326  elem_min.bigintval = bigint_array[i];
327  elem_max.bigintval = bigint_array[i];
328  initialized = true;
329  }
330  }
331  break;
332  }
333  case kFLOAT: {
334  if (!initialized) {
335  elem_min.floatval = 1.0;
336  elem_max.floatval = 0.0;
337  }
338  if (array.is_null) {
339  break;
340  }
341  const float* flt_array = (float*)array.pointer;
342  for (size_t i = 0; i < array.length / sizeof(float); i++) {
343  if (flt_array[i] == NULL_FLOAT) {
344  has_nulls = true;
345  } else if (initialized) {
346  elem_min.floatval = std::min(elem_min.floatval, flt_array[i]);
347  elem_max.floatval = std::max(elem_max.floatval, flt_array[i]);
348  } else {
349  elem_min.floatval = flt_array[i];
350  elem_max.floatval = flt_array[i];
351  initialized = true;
352  }
353  }
354  break;
355  }
356  case kDOUBLE: {
357  if (!initialized) {
358  elem_min.doubleval = 1.0;
359  elem_max.doubleval = 0.0;
360  }
361  if (array.is_null) {
362  break;
363  }
364  const double* dbl_array = (double*)array.pointer;
365  for (size_t i = 0; i < array.length / sizeof(double); i++) {
366  if (dbl_array[i] == NULL_DOUBLE) {
367  has_nulls = true;
368  } else if (initialized) {
369  elem_min.doubleval = std::min(elem_min.doubleval, dbl_array[i]);
370  elem_max.doubleval = std::max(elem_max.doubleval, dbl_array[i]);
371  } else {
372  elem_min.doubleval = dbl_array[i];
373  elem_max.doubleval = dbl_array[i];
374  initialized = true;
375  }
376  }
377  break;
378  }
379  case kTIME:
380  case kTIMESTAMP:
381  case kDATE: {
382  if (!initialized) {
383  elem_min.bigintval = 1;
384  elem_max.bigintval = 0;
385  }
386  if (array.is_null) {
387  break;
388  }
389  const int64_t* tm_array = reinterpret_cast<int64_t*>(array.pointer);
390  for (size_t i = 0; i < array.length / sizeof(int64_t); i++) {
391  if (tm_array[i] == NULL_BIGINT) {
392  has_nulls = true;
393  } else if (initialized) {
394  elem_min.bigintval = std::min(elem_min.bigintval, tm_array[i]);
395  elem_max.bigintval = std::max(elem_max.bigintval, tm_array[i]);
396  } else {
397  elem_min.bigintval = tm_array[i];
398  elem_max.bigintval = tm_array[i];
399  initialized = true;
400  }
401  }
402  break;
403  }
404  case kCHAR:
405  case kVARCHAR:
406  case kTEXT: {
408  if (!initialized) {
409  elem_min.intval = 1;
410  elem_max.intval = 0;
411  }
412  if (array.is_null) {
413  break;
414  }
415  const int32_t* int_array = (int32_t*)array.pointer;
416  for (size_t i = 0; i < array.length / sizeof(int32_t); i++) {
417  if (int_array[i] == NULL_INT) {
418  has_nulls = true;
419  } else if (initialized) {
420  elem_min.intval = std::min(elem_min.intval, int_array[i]);
421  elem_max.intval = std::max(elem_max.intval, int_array[i]);
422  } else {
423  elem_min.intval = int_array[i];
424  elem_max.intval = int_array[i];
425  initialized = true;
426  }
427  }
428  break;
429  }
430  default:
431  assert(false);
432  }
433  };
434 
435 }; // class FixedLengthArrayNoneEncoder
436 
437 #endif // FIXED_LENGTH_ARRAY_NONE_ENCODER_H
int8_t tinyintval
Definition: sqltypes.h:123
#define CHECK_EQ(x, y)
Definition: Logger.h:195
#define NULL_DOUBLE
Definition: sqltypes.h:177
ChunkMetadata appendData(const std::vector< ArrayDatum > *srcData, const int start_idx, const size_t numAppendElems, const bool replicating=false)
size_t num_elems_
Definition: Encoder.h:179
Definition: sqltypes.h:51
DecimalOverflowValidator decimal_overflow_validator_
Definition: Encoder.h:184
#define NULL_BIGINT
Definition: sqltypes.h:175
#define NULL_ARRAY_DOUBLE
Definition: sqltypes.h:185
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:326
bool boolval
Definition: sqltypes.h:122
void fillChunkStats(const T min, const T max, const bool has_nulls)
Definition: ChunkMetadata.h:38
#define NULL_ARRAY_SMALLINT
Definition: sqltypes.h:181
#define NULL_ARRAY_TINYINT
Definition: sqltypes.h:180
ChunkMetadata getMetadata(const SQLTypeInfo &ti) override
#define NULL_ARRAY_BOOLEAN
Definition: sqltypes.h:179
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:327
int32_t intval
Definition: sqltypes.h:125
virtual void reserve(size_t numBytes)=0
void updateStats(const double, const bool) override
float floatval
Definition: sqltypes.h:127
FixedLengthArrayNoneEncoder(AbstractBuffer *buffer, size_t as)
Data_Namespace::AbstractBuffer * buffer_
Definition: Encoder.h:181
#define NULL_TINYINT
Definition: sqltypes.h:172
virtual void getMetadata(ChunkMetadata &chunkMetadata)
Definition: Encoder.cpp:227
void copyMetadata(const Encoder *copyFromEncoder) override
virtual void append(int8_t *src, const size_t numBytes, const MemoryLevel srcBufferType=CPU_LEVEL, const int deviceId=-1)=0
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:320
int64_t bigintval
Definition: sqltypes.h:126
#define NULL_FLOAT
Definition: sqltypes.h:176
int16_t smallintval
Definition: sqltypes.h:124
An AbstractBuffer is a unit of data management for a data manager.
#define NULL_ARRAY_INT
Definition: sqltypes.h:182
#define NULL_INT
Definition: sqltypes.h:174
ChunkMetadata appendData(int8_t *&srcData, const size_t numAppendElems, const SQLTypeInfo &, const bool replicating=false) override
void validate(T value)
Definition: Encoder.h:54
Definition: sqltypes.h:54
Definition: sqltypes.h:55
#define NULL_ARRAY_BIGINT
Definition: sqltypes.h:183
Definition: sqltypes.h:43
void update_elem_stats(const ArrayDatum &array)
#define NULL_SMALLINT
Definition: sqltypes.h:173
void reduceStats(const Encoder &) override
size_t getNumElems() const
Definition: Encoder.h:175
#define CHECK(condition)
Definition: Logger.h:187
void updateStats(const int64_t, const bool) override
virtual bool isDirty() const
Definition: sqltypes.h:47
size_t getNumElemsForBytesInsertData(const std::vector< ArrayDatum > *srcData, const int start_idx, const size_t numAppendElems, const size_t byteLimit, const bool replicating=false)
void getMetadata(ChunkMetadata &chunkMetadata) override
#define NULL_BOOLEAN
Definition: sqltypes.h:171
std::conditional_t< isCudaCC(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:119
double doubleval
Definition: sqltypes.h:128
#define NULL_ARRAY_FLOAT
Definition: sqltypes.h:184