OmniSciDB  eee9fa949c
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
FixedLengthArrayNoneEncoder.h
Go to the documentation of this file.
1 /*
2  * Copyright 2018 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
24 #ifndef FIXED_LENGTH_ARRAY_NONE_ENCODER_H
25 #define FIXED_LENGTH_ARRAY_NONE_ENCODER_H
26 
27 #include "Shared/Logger.h"
28 
29 #include <cassert>
30 #include <cstring>
31 #include <memory>
32 #include <mutex>
33 #include <string>
34 #include <vector>
35 #include "AbstractBuffer.h"
36 #include "ChunkMetadata.h"
37 #include "Encoder.h"
38 
40 
42  public:
44  : Encoder(buffer), has_nulls(false), initialized(false), array_size(as) {}
45 
46  size_t getNumElemsForBytesInsertData(const std::vector<ArrayDatum>* srcData,
47  const int start_idx,
48  const size_t numAppendElems,
49  const size_t byteLimit,
50  const bool replicating = false) {
51  size_t dataSize = numAppendElems * array_size;
52  if (dataSize > byteLimit) {
53  dataSize = byteLimit;
54  }
55  return dataSize / array_size;
56  }
57 
58  ChunkMetadata appendData(int8_t*& srcData,
59  const size_t numAppendElems,
60  const SQLTypeInfo&,
61  const bool replicating = false) override {
62  CHECK(false); // should never be called for arrays
63  return ChunkMetadata{};
64  }
65 
66  ChunkMetadata appendData(const std::vector<ArrayDatum>* srcData,
67  const int start_idx,
68  const size_t numAppendElems,
69  const bool replicating = false) {
70  size_t data_size = array_size * numAppendElems;
71  buffer_->reserve(data_size);
72 
73  for (size_t i = start_idx; i < start_idx + numAppendElems; i++) {
74  size_t len = (*srcData)[replicating ? 0 : i].length;
75  // Length of the appended array should be equal to the fixed length,
76  // all others should have been discarded, assert if something slips through
77  CHECK_EQ(len, array_size);
78  // NULL arrays have been filled with subtype's NULL sentinels,
79  // should be appended as regular data, same size
80  buffer_->append((*srcData)[replicating ? 0 : i].pointer, len);
81 
82  // keep Chunk statistics with array elements
83  update_elem_stats((*srcData)[replicating ? 0 : i]);
84  }
85  // make sure buffer_ is flushed even if no new data is appended to it
86  // (e.g. empty strings) because the metadata needs to be flushed.
87  if (!buffer_->isDirty()) {
88  buffer_->setDirty();
89  }
90 
91  num_elems_ += numAppendElems;
92  ChunkMetadata chunkMetadata;
93  getMetadata(chunkMetadata);
94  return chunkMetadata;
95  }
96 
97  void getMetadata(ChunkMetadata& chunkMetadata) override {
98  Encoder::getMetadata(chunkMetadata); // call on parent class
99  chunkMetadata.fillChunkStats(elem_min, elem_max, has_nulls);
100  }
101 
102  // Only called from the executor for synthesized meta-information.
103  ChunkMetadata getMetadata(const SQLTypeInfo& ti) override {
104  ChunkMetadata chunk_metadata{ti, 0, 0, ChunkStats{elem_min, elem_max, has_nulls}};
105  return chunk_metadata;
106  }
107 
108  void updateStats(const int64_t, const bool) override { CHECK(false); }
109 
110  void updateStats(const double, const bool) override { CHECK(false); }
111 
112  void reduceStats(const Encoder&) override { CHECK(false); }
113 
114  void writeMetadata(FILE* f) override {
115  // assumes pointer is already in right place
116  fwrite((int8_t*)&num_elems_, sizeof(size_t), 1, f);
117  fwrite((int8_t*)&elem_min, sizeof(Datum), 1, f);
118  fwrite((int8_t*)&elem_max, sizeof(Datum), 1, f);
119  fwrite((int8_t*)&has_nulls, sizeof(bool), 1, f);
120  fwrite((int8_t*)&initialized, sizeof(bool), 1, f);
121  }
122 
123  void readMetadata(FILE* f) override {
124  // assumes pointer is already in right place
125  fread((int8_t*)&num_elems_, sizeof(size_t), 1, f);
126  fread((int8_t*)&elem_min, sizeof(Datum), 1, f);
127  fread((int8_t*)&elem_max, sizeof(Datum), 1, f);
128  fread((int8_t*)&has_nulls, sizeof(bool), 1, f);
129  fread((int8_t*)&initialized, sizeof(bool), 1, f);
130  }
131 
132  void copyMetadata(const Encoder* copyFromEncoder) override {
133  num_elems_ = copyFromEncoder->getNumElems();
134  auto array_encoder =
135  dynamic_cast<const FixedLengthArrayNoneEncoder*>(copyFromEncoder);
136  elem_min = array_encoder->elem_min;
137  elem_max = array_encoder->elem_max;
138  has_nulls = array_encoder->has_nulls;
139  initialized = array_encoder->initialized;
140  }
141 
142  void updateMetadata(int8_t* array) {
144  }
145 
146  static bool is_null(const SQLTypeInfo& type, int8_t* array) {
147  if (type.get_notnull()) {
148  return false;
149  }
150  switch (type.get_subtype()) {
151  case kBOOLEAN: {
152  const bool* bool_array = (bool*)array;
153  return ((int8_t)bool_array[0] == NULL_ARRAY_BOOLEAN);
154  }
155  case kINT: {
156  const int32_t* int_array = (int32_t*)array;
157  return (int_array[0] == NULL_ARRAY_INT);
158  }
159  case kSMALLINT: {
160  const int16_t* smallint_array = (int16_t*)array;
161  return (smallint_array[0] == NULL_ARRAY_SMALLINT);
162  }
163  case kTINYINT: {
164  const int8_t* tinyint_array = (int8_t*)array;
165  return (tinyint_array[0] == NULL_ARRAY_TINYINT);
166  }
167  case kBIGINT:
168  case kNUMERIC:
169  case kDECIMAL: {
170  const int64_t* bigint_array = (int64_t*)array;
171  return (bigint_array[0] == NULL_ARRAY_BIGINT);
172  }
173  case kFLOAT: {
174  const float* flt_array = (float*)array;
175  return (flt_array[0] == NULL_ARRAY_FLOAT);
176  }
177  case kDOUBLE: {
178  const double* dbl_array = (double*)array;
179  return (dbl_array[0] == NULL_ARRAY_DOUBLE);
180  }
181  case kTIME:
182  case kTIMESTAMP:
183  case kDATE: {
184  const int64_t* tm_array = reinterpret_cast<int64_t*>(array);
185  return (tm_array[0] == NULL_ARRAY_BIGINT);
186  }
187  case kCHAR:
188  case kVARCHAR:
189  case kTEXT: {
191  const int32_t* int_array = (int32_t*)array;
192  return (int_array[0] == NULL_ARRAY_INT);
193  }
194  default:
195  UNREACHABLE();
196  }
197  return false;
198  }
199 
202  bool has_nulls;
204 
205  private:
206  std::mutex EncoderMutex_;
207  size_t array_size;
208 
209  bool is_null(int8_t* array) { return is_null(buffer_->sql_type, array); }
210 
211  void update_elem_stats(const ArrayDatum& array) {
212  if (array.is_null) {
213  has_nulls = true;
214  }
215  switch (buffer_->sql_type.get_subtype()) {
216  case kBOOLEAN: {
217  if (!initialized) {
218  elem_min.boolval = true;
219  elem_max.boolval = false;
220  }
221  if (array.is_null) {
222  break;
223  }
224  const bool* bool_array = (bool*)array.pointer;
225  for (size_t i = 0; i < array.length / sizeof(bool); i++) {
226  if ((int8_t)bool_array[i] == NULL_BOOLEAN) {
227  has_nulls = true;
228  } else if (initialized) {
229  elem_min.boolval = std::min(elem_min.boolval, bool_array[i]);
230  elem_max.boolval = std::max(elem_max.boolval, bool_array[i]);
231  } else {
232  elem_min.boolval = bool_array[i];
233  elem_max.boolval = bool_array[i];
234  initialized = true;
235  }
236  }
237  break;
238  }
239  case kINT: {
240  if (!initialized) {
241  elem_min.intval = 1;
242  elem_max.intval = 0;
243  }
244  if (array.is_null) {
245  break;
246  }
247  const int32_t* int_array = (int32_t*)array.pointer;
248  for (size_t i = 0; i < array.length / sizeof(int32_t); i++) {
249  if (int_array[i] == NULL_INT) {
250  has_nulls = true;
251  } else if (initialized) {
252  elem_min.intval = std::min(elem_min.intval, int_array[i]);
253  elem_max.intval = std::max(elem_max.intval, int_array[i]);
254  } else {
255  elem_min.intval = int_array[i];
256  elem_max.intval = int_array[i];
257  initialized = true;
258  }
259  }
260  break;
261  }
262  case kSMALLINT: {
263  if (!initialized) {
264  elem_min.smallintval = 1;
265  elem_max.smallintval = 0;
266  }
267  if (array.is_null) {
268  break;
269  }
270  const int16_t* smallint_array = (int16_t*)array.pointer;
271  for (size_t i = 0; i < array.length / sizeof(int16_t); i++) {
272  if (smallint_array[i] == NULL_SMALLINT) {
273  has_nulls = true;
274  } else if (initialized) {
275  elem_min.smallintval = std::min(elem_min.smallintval, smallint_array[i]);
276  elem_max.smallintval = std::max(elem_max.smallintval, smallint_array[i]);
277  } else {
278  elem_min.smallintval = smallint_array[i];
279  elem_max.smallintval = smallint_array[i];
280  initialized = true;
281  }
282  }
283  break;
284  }
285  case kTINYINT: {
286  if (!initialized) {
287  elem_min.tinyintval = 1;
288  elem_max.tinyintval = 0;
289  }
290  if (array.is_null) {
291  break;
292  }
293  const int8_t* tinyint_array = (int8_t*)array.pointer;
294  for (size_t i = 0; i < array.length / sizeof(int8_t); i++) {
295  if (tinyint_array[i] == NULL_TINYINT) {
296  has_nulls = true;
297  } else if (initialized) {
298  elem_min.tinyintval = std::min(elem_min.tinyintval, tinyint_array[i]);
299  elem_max.tinyintval = std::max(elem_max.tinyintval, tinyint_array[i]);
300  } else {
301  elem_min.tinyintval = tinyint_array[i];
302  elem_max.tinyintval = tinyint_array[i];
303  initialized = true;
304  }
305  }
306  break;
307  }
308  case kBIGINT:
309  case kNUMERIC:
310  case kDECIMAL: {
311  if (!initialized) {
312  elem_min.bigintval = 1;
313  elem_max.bigintval = 0;
314  }
315  if (array.is_null) {
316  break;
317  }
318  const int64_t* bigint_array = (int64_t*)array.pointer;
319  for (size_t i = 0; i < array.length / sizeof(int64_t); i++) {
320  if (bigint_array[i] == NULL_BIGINT) {
321  has_nulls = true;
322  } else if (initialized) {
323  decimal_overflow_validator_.validate(bigint_array[i]);
324  elem_min.bigintval = std::min(elem_min.bigintval, bigint_array[i]);
325  elem_max.bigintval = std::max(elem_max.bigintval, bigint_array[i]);
326  } else {
327  decimal_overflow_validator_.validate(bigint_array[i]);
328  elem_min.bigintval = bigint_array[i];
329  elem_max.bigintval = bigint_array[i];
330  initialized = true;
331  }
332  }
333  break;
334  }
335  case kFLOAT: {
336  if (!initialized) {
337  elem_min.floatval = 1.0;
338  elem_max.floatval = 0.0;
339  }
340  if (array.is_null) {
341  break;
342  }
343  const float* flt_array = (float*)array.pointer;
344  for (size_t i = 0; i < array.length / sizeof(float); i++) {
345  if (flt_array[i] == NULL_FLOAT) {
346  has_nulls = true;
347  } else if (initialized) {
348  elem_min.floatval = std::min(elem_min.floatval, flt_array[i]);
349  elem_max.floatval = std::max(elem_max.floatval, flt_array[i]);
350  } else {
351  elem_min.floatval = flt_array[i];
352  elem_max.floatval = flt_array[i];
353  initialized = true;
354  }
355  }
356  break;
357  }
358  case kDOUBLE: {
359  if (!initialized) {
360  elem_min.doubleval = 1.0;
361  elem_max.doubleval = 0.0;
362  }
363  if (array.is_null) {
364  break;
365  }
366  const double* dbl_array = (double*)array.pointer;
367  for (size_t i = 0; i < array.length / sizeof(double); i++) {
368  if (dbl_array[i] == NULL_DOUBLE) {
369  has_nulls = true;
370  } else if (initialized) {
371  elem_min.doubleval = std::min(elem_min.doubleval, dbl_array[i]);
372  elem_max.doubleval = std::max(elem_max.doubleval, dbl_array[i]);
373  } else {
374  elem_min.doubleval = dbl_array[i];
375  elem_max.doubleval = dbl_array[i];
376  initialized = true;
377  }
378  }
379  break;
380  }
381  case kTIME:
382  case kTIMESTAMP:
383  case kDATE: {
384  if (!initialized) {
385  elem_min.bigintval = 1;
386  elem_max.bigintval = 0;
387  }
388  if (array.is_null) {
389  break;
390  }
391  const int64_t* tm_array = reinterpret_cast<int64_t*>(array.pointer);
392  for (size_t i = 0; i < array.length / sizeof(int64_t); i++) {
393  if (tm_array[i] == NULL_BIGINT) {
394  has_nulls = true;
395  } else if (initialized) {
396  elem_min.bigintval = std::min(elem_min.bigintval, tm_array[i]);
397  elem_max.bigintval = std::max(elem_max.bigintval, tm_array[i]);
398  } else {
399  elem_min.bigintval = tm_array[i];
400  elem_max.bigintval = tm_array[i];
401  initialized = true;
402  }
403  }
404  break;
405  }
406  case kCHAR:
407  case kVARCHAR:
408  case kTEXT: {
410  if (!initialized) {
411  elem_min.intval = 1;
412  elem_max.intval = 0;
413  }
414  if (array.is_null) {
415  break;
416  }
417  const int32_t* int_array = (int32_t*)array.pointer;
418  for (size_t i = 0; i < array.length / sizeof(int32_t); i++) {
419  if (int_array[i] == NULL_INT) {
420  has_nulls = true;
421  } else if (initialized) {
422  elem_min.intval = std::min(elem_min.intval, int_array[i]);
423  elem_max.intval = std::max(elem_max.intval, int_array[i]);
424  } else {
425  elem_min.intval = int_array[i];
426  elem_max.intval = int_array[i];
427  initialized = true;
428  }
429  }
430  break;
431  }
432  default:
433  UNREACHABLE();
434  }
435  };
436 
437 }; // class FixedLengthArrayNoneEncoder
438 
439 #endif // FIXED_LENGTH_ARRAY_NONE_ENCODER_H
int8_t tinyintval
Definition: sqltypes.h:126
#define CHECK_EQ(x, y)
Definition: Logger.h:205
#define NULL_DOUBLE
Definition: sqltypes.h:179
ChunkMetadata appendData(const std::vector< ArrayDatum > *srcData, const int start_idx, const size_t numAppendElems, const bool replicating=false)
size_t num_elems_
Definition: Encoder.h:179
Definition: sqltypes.h:52
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:334
DecimalOverflowValidator decimal_overflow_validator_
Definition: Encoder.h:184
#define NULL_BIGINT
Definition: sqltypes.h:177
#define NULL_ARRAY_DOUBLE
Definition: sqltypes.h:187
bool boolval
Definition: sqltypes.h:125
void fillChunkStats(const T min, const T max, const bool has_nulls)
Definition: ChunkMetadata.h:38
#define UNREACHABLE()
Definition: Logger.h:241
#define NULL_ARRAY_SMALLINT
Definition: sqltypes.h:183
#define NULL_ARRAY_TINYINT
Definition: sqltypes.h:182
ChunkMetadata getMetadata(const SQLTypeInfo &ti) override
#define NULL_ARRAY_BOOLEAN
Definition: sqltypes.h:181
int32_t intval
Definition: sqltypes.h:128
virtual bool isDirty() const
void updateStats(const double, const bool) override
float floatval
Definition: sqltypes.h:130
FixedLengthArrayNoneEncoder(AbstractBuffer *buffer, size_t as)
CHECK(cgen_state)
Data_Namespace::AbstractBuffer * buffer_
Definition: Encoder.h:181
#define NULL_TINYINT
Definition: sqltypes.h:174
virtual void getMetadata(ChunkMetadata &chunkMetadata)
Definition: Encoder.cpp:227
void copyMetadata(const Encoder *copyFromEncoder) override
int64_t bigintval
Definition: sqltypes.h:129
#define NULL_FLOAT
Definition: sqltypes.h:178
size_t getNumElems() const
Definition: Encoder.h:175
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:333
int16_t smallintval
Definition: sqltypes.h:127
An AbstractBuffer is a unit of data management for a data manager.
#define NULL_ARRAY_INT
Definition: sqltypes.h:184
#define NULL_INT
Definition: sqltypes.h:176
ChunkMetadata appendData(int8_t *&srcData, const size_t numAppendElems, const SQLTypeInfo &, const bool replicating=false) override
void validate(T value)
Definition: Encoder.h:54
Definition: sqltypes.h:55
Definition: sqltypes.h:56
#define NULL_ARRAY_BIGINT
Definition: sqltypes.h:185
Definition: sqltypes.h:44
virtual void append(int8_t *src, const size_t num_bytes, const MemoryLevel src_buffer_type=CPU_LEVEL, const int device_id=-1)=0
void update_elem_stats(const ArrayDatum &array)
#define NULL_SMALLINT
Definition: sqltypes.h:175
void reduceStats(const Encoder &) override
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:327
bool g_enable_watchdog false
Definition: Execute.cpp:71
void updateStats(const int64_t, const bool) override
static bool is_null(const SQLTypeInfo &type, int8_t *array)
Definition: sqltypes.h:48
size_t getNumElemsForBytesInsertData(const std::vector< ArrayDatum > *srcData, const int start_idx, const size_t numAppendElems, const size_t byteLimit, const bool replicating=false)
virtual void reserve(size_t num_bytes)=0
void getMetadata(ChunkMetadata &chunkMetadata) override
#define NULL_BOOLEAN
Definition: sqltypes.h:173
std::conditional_t< isCudaCC(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:122
double doubleval
Definition: sqltypes.h:131
#define NULL_ARRAY_FLOAT
Definition: sqltypes.h:186