OmniSciDB  94e8789169
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
FixedLengthArrayNoneEncoder.h
Go to the documentation of this file.
1 /*
2  * Copyright 2018 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
24 #ifndef FIXED_LENGTH_ARRAY_NONE_ENCODER_H
25 #define FIXED_LENGTH_ARRAY_NONE_ENCODER_H
26 
27 #include "Logger/Logger.h"
28 
29 #include <cassert>
30 #include <cstring>
31 #include <memory>
32 #include <mutex>
33 #include <string>
34 #include <vector>
35 #include "AbstractBuffer.h"
36 #include "ChunkMetadata.h"
37 #include "Encoder.h"
38 
40 
42  public:
44  : Encoder(buffer), has_nulls(false), initialized(false), array_size(as) {}
45 
46  size_t getNumElemsForBytesInsertData(const std::vector<ArrayDatum>* srcData,
47  const int start_idx,
48  const size_t numAppendElems,
49  const size_t byteLimit,
50  const bool replicating = false) {
51  size_t dataSize = numAppendElems * array_size;
52  if (dataSize > byteLimit) {
53  dataSize = byteLimit;
54  }
55  return dataSize / array_size;
56  }
57 
58  std::shared_ptr<ChunkMetadata> appendData(int8_t*& src_data,
59  const size_t num_elems_to_append,
60  const SQLTypeInfo& ti,
61  const bool replicating = false,
62  const int64_t offset = -1) override {
63  UNREACHABLE(); // should never be called for arrays
64  return nullptr;
65  }
66 
67  std::shared_ptr<ChunkMetadata> appendData(const std::vector<ArrayDatum>* srcData,
68  const int start_idx,
69  const size_t numAppendElems,
70  const bool replicating = false) {
71  size_t data_size = array_size * numAppendElems;
72  buffer_->reserve(data_size);
73 
74  for (size_t i = start_idx; i < start_idx + numAppendElems; i++) {
75  size_t len = (*srcData)[replicating ? 0 : i].length;
76  // Length of the appended array should be equal to the fixed length,
77  // all others should have been discarded, assert if something slips through
78  CHECK_EQ(len, array_size);
79  // NULL arrays have been filled with subtype's NULL sentinels,
80  // should be appended as regular data, same size
81  buffer_->append((*srcData)[replicating ? 0 : i].pointer, len);
82 
83  // keep Chunk statistics with array elements
84  update_elem_stats((*srcData)[replicating ? 0 : i]);
85  }
86  // make sure buffer_ is flushed even if no new data is appended to it
87  // (e.g. empty strings) because the metadata needs to be flushed.
88  if (!buffer_->isDirty()) {
89  buffer_->setDirty();
90  }
91 
92  num_elems_ += numAppendElems;
93  auto chunk_metadata = std::make_shared<ChunkMetadata>();
94  getMetadata(chunk_metadata);
95  return chunk_metadata;
96  }
97 
98  void getMetadata(const std::shared_ptr<ChunkMetadata>& chunkMetadata) override {
99  Encoder::getMetadata(chunkMetadata); // call on parent class
100  chunkMetadata->fillChunkStats(elem_min, elem_max, has_nulls);
101  }
102 
103  // Only called from the executor for synthesized meta-information.
104  std::shared_ptr<ChunkMetadata> getMetadata(const SQLTypeInfo& ti) override {
105  auto chunk_metadata = std::make_shared<ChunkMetadata>(
106  ti, 0, 0, ChunkStats{elem_min, elem_max, has_nulls});
107  return chunk_metadata;
108  }
109 
110  void updateStats(const int64_t, const bool) override { CHECK(false); }
111 
112  void updateStats(const double, const bool) override { CHECK(false); }
113 
114  void reduceStats(const Encoder&) override { CHECK(false); }
115 
116  void updateStats(const int8_t* const src_data, const size_t num_elements) override {
117  UNREACHABLE();
118  }
119 
120  void updateStats(const std::vector<std::string>* const src_data,
121  const size_t start_idx,
122  const size_t num_elements) override {
123  UNREACHABLE();
124  }
125 
126  void updateStats(const std::vector<ArrayDatum>* const src_data,
127  const size_t start_idx,
128  const size_t num_elements) override {
129  for (size_t n = start_idx; n < start_idx + num_elements; n++) {
130  update_elem_stats((*src_data)[n]);
131  }
132  }
133 
134  void writeMetadata(FILE* f) override {
135  // assumes pointer is already in right place
136  fwrite((int8_t*)&num_elems_, sizeof(size_t), 1, f);
137  fwrite((int8_t*)&elem_min, sizeof(Datum), 1, f);
138  fwrite((int8_t*)&elem_max, sizeof(Datum), 1, f);
139  fwrite((int8_t*)&has_nulls, sizeof(bool), 1, f);
140  fwrite((int8_t*)&initialized, sizeof(bool), 1, f);
141  }
142 
143  void readMetadata(FILE* f) override {
144  // assumes pointer is already in right place
145  fread((int8_t*)&num_elems_, sizeof(size_t), 1, f);
146  fread((int8_t*)&elem_min, sizeof(Datum), 1, f);
147  fread((int8_t*)&elem_max, sizeof(Datum), 1, f);
148  fread((int8_t*)&has_nulls, sizeof(bool), 1, f);
149  fread((int8_t*)&initialized, sizeof(bool), 1, f);
150  }
151 
152  void copyMetadata(const Encoder* copyFromEncoder) override {
153  num_elems_ = copyFromEncoder->getNumElems();
154  auto array_encoder =
155  dynamic_cast<const FixedLengthArrayNoneEncoder*>(copyFromEncoder);
156  elem_min = array_encoder->elem_min;
157  elem_max = array_encoder->elem_max;
158  has_nulls = array_encoder->has_nulls;
159  initialized = array_encoder->initialized;
160  }
161 
162  void updateMetadata(int8_t* array) {
164  }
165 
166  static bool is_null(const SQLTypeInfo& type, int8_t* array) {
167  if (type.get_notnull()) {
168  return false;
169  }
170  switch (type.get_subtype()) {
171  case kBOOLEAN: {
172  const bool* bool_array = (bool*)array;
173  return ((int8_t)bool_array[0] == NULL_ARRAY_BOOLEAN);
174  }
175  case kINT: {
176  const int32_t* int_array = (int32_t*)array;
177  return (int_array[0] == NULL_ARRAY_INT);
178  }
179  case kSMALLINT: {
180  const int16_t* smallint_array = (int16_t*)array;
181  return (smallint_array[0] == NULL_ARRAY_SMALLINT);
182  }
183  case kTINYINT: {
184  const int8_t* tinyint_array = (int8_t*)array;
185  return (tinyint_array[0] == NULL_ARRAY_TINYINT);
186  }
187  case kBIGINT:
188  case kNUMERIC:
189  case kDECIMAL: {
190  const int64_t* bigint_array = (int64_t*)array;
191  return (bigint_array[0] == NULL_ARRAY_BIGINT);
192  }
193  case kFLOAT: {
194  const float* flt_array = (float*)array;
195  return (flt_array[0] == NULL_ARRAY_FLOAT);
196  }
197  case kDOUBLE: {
198  const double* dbl_array = (double*)array;
199  return (dbl_array[0] == NULL_ARRAY_DOUBLE);
200  }
201  case kTIME:
202  case kTIMESTAMP:
203  case kDATE: {
204  const int64_t* tm_array = reinterpret_cast<int64_t*>(array);
205  return (tm_array[0] == NULL_ARRAY_BIGINT);
206  }
207  case kCHAR:
208  case kVARCHAR:
209  case kTEXT: {
211  const int32_t* int_array = (int32_t*)array;
212  return (int_array[0] == NULL_ARRAY_INT);
213  }
214  default:
215  UNREACHABLE();
216  }
217  return false;
218  }
219 
220  bool resetChunkStats(const ChunkStats& stats) override {
221  auto elem_type = buffer_->getSqlType().get_elem_type();
222  if (DatumEqual(elem_min, stats.min, elem_type) &&
223  DatumEqual(elem_max, stats.max, elem_type) && has_nulls == stats.has_nulls) {
224  return false;
225  }
226  elem_min = stats.min;
227  elem_max = stats.max;
228  has_nulls = stats.has_nulls;
229  return true;
230  }
231 
234  bool has_nulls;
236 
237  private:
238  std::mutex EncoderMutex_;
239  size_t array_size;
240 
241  bool is_null(int8_t* array) { return is_null(buffer_->getSqlType(), array); }
242 
243  void update_elem_stats(const ArrayDatum& array) {
244  if (array.is_null) {
245  has_nulls = true;
246  }
247  switch (buffer_->getSqlType().get_subtype()) {
248  case kBOOLEAN: {
249  if (!initialized) {
250  elem_min.boolval = true;
251  elem_max.boolval = false;
252  }
253  if (array.is_null) {
254  break;
255  }
256  const bool* bool_array = (bool*)array.pointer;
257  for (size_t i = 0; i < array.length / sizeof(bool); i++) {
258  if ((int8_t)bool_array[i] == NULL_BOOLEAN) {
259  has_nulls = true;
260  } else if (initialized) {
261  elem_min.boolval = std::min(elem_min.boolval, bool_array[i]);
262  elem_max.boolval = std::max(elem_max.boolval, bool_array[i]);
263  } else {
264  elem_min.boolval = bool_array[i];
265  elem_max.boolval = bool_array[i];
266  initialized = true;
267  }
268  }
269  break;
270  }
271  case kINT: {
272  if (!initialized) {
273  elem_min.intval = 1;
274  elem_max.intval = 0;
275  }
276  if (array.is_null) {
277  break;
278  }
279  const int32_t* int_array = (int32_t*)array.pointer;
280  for (size_t i = 0; i < array.length / sizeof(int32_t); i++) {
281  if (int_array[i] == NULL_INT) {
282  has_nulls = true;
283  } else if (initialized) {
284  elem_min.intval = std::min(elem_min.intval, int_array[i]);
285  elem_max.intval = std::max(elem_max.intval, int_array[i]);
286  } else {
287  elem_min.intval = int_array[i];
288  elem_max.intval = int_array[i];
289  initialized = true;
290  }
291  }
292  break;
293  }
294  case kSMALLINT: {
295  if (!initialized) {
296  elem_min.smallintval = 1;
297  elem_max.smallintval = 0;
298  }
299  if (array.is_null) {
300  break;
301  }
302  const int16_t* smallint_array = (int16_t*)array.pointer;
303  for (size_t i = 0; i < array.length / sizeof(int16_t); i++) {
304  if (smallint_array[i] == NULL_SMALLINT) {
305  has_nulls = true;
306  } else if (initialized) {
307  elem_min.smallintval = std::min(elem_min.smallintval, smallint_array[i]);
308  elem_max.smallintval = std::max(elem_max.smallintval, smallint_array[i]);
309  } else {
310  elem_min.smallintval = smallint_array[i];
311  elem_max.smallintval = smallint_array[i];
312  initialized = true;
313  }
314  }
315  break;
316  }
317  case kTINYINT: {
318  if (!initialized) {
319  elem_min.tinyintval = 1;
320  elem_max.tinyintval = 0;
321  }
322  if (array.is_null) {
323  break;
324  }
325  const int8_t* tinyint_array = (int8_t*)array.pointer;
326  for (size_t i = 0; i < array.length / sizeof(int8_t); i++) {
327  if (tinyint_array[i] == NULL_TINYINT) {
328  has_nulls = true;
329  } else if (initialized) {
330  elem_min.tinyintval = std::min(elem_min.tinyintval, tinyint_array[i]);
331  elem_max.tinyintval = std::max(elem_max.tinyintval, tinyint_array[i]);
332  } else {
333  elem_min.tinyintval = tinyint_array[i];
334  elem_max.tinyintval = tinyint_array[i];
335  initialized = true;
336  }
337  }
338  break;
339  }
340  case kBIGINT:
341  case kNUMERIC:
342  case kDECIMAL: {
343  if (!initialized) {
344  elem_min.bigintval = 1;
345  elem_max.bigintval = 0;
346  }
347  if (array.is_null) {
348  break;
349  }
350  const int64_t* bigint_array = (int64_t*)array.pointer;
351  for (size_t i = 0; i < array.length / sizeof(int64_t); i++) {
352  if (bigint_array[i] == NULL_BIGINT) {
353  has_nulls = true;
354  } else if (initialized) {
355  decimal_overflow_validator_.validate(bigint_array[i]);
356  elem_min.bigintval = std::min(elem_min.bigintval, bigint_array[i]);
357  elem_max.bigintval = std::max(elem_max.bigintval, bigint_array[i]);
358  } else {
359  decimal_overflow_validator_.validate(bigint_array[i]);
360  elem_min.bigintval = bigint_array[i];
361  elem_max.bigintval = bigint_array[i];
362  initialized = true;
363  }
364  }
365  break;
366  }
367  case kFLOAT: {
368  if (!initialized) {
369  elem_min.floatval = 1.0;
370  elem_max.floatval = 0.0;
371  }
372  if (array.is_null) {
373  break;
374  }
375  const float* flt_array = (float*)array.pointer;
376  for (size_t i = 0; i < array.length / sizeof(float); i++) {
377  if (flt_array[i] == NULL_FLOAT) {
378  has_nulls = true;
379  } else if (initialized) {
380  elem_min.floatval = std::min(elem_min.floatval, flt_array[i]);
381  elem_max.floatval = std::max(elem_max.floatval, flt_array[i]);
382  } else {
383  elem_min.floatval = flt_array[i];
384  elem_max.floatval = flt_array[i];
385  initialized = true;
386  }
387  }
388  break;
389  }
390  case kDOUBLE: {
391  if (!initialized) {
392  elem_min.doubleval = 1.0;
393  elem_max.doubleval = 0.0;
394  }
395  if (array.is_null) {
396  break;
397  }
398  const double* dbl_array = (double*)array.pointer;
399  for (size_t i = 0; i < array.length / sizeof(double); i++) {
400  if (dbl_array[i] == NULL_DOUBLE) {
401  has_nulls = true;
402  } else if (initialized) {
403  elem_min.doubleval = std::min(elem_min.doubleval, dbl_array[i]);
404  elem_max.doubleval = std::max(elem_max.doubleval, dbl_array[i]);
405  } else {
406  elem_min.doubleval = dbl_array[i];
407  elem_max.doubleval = dbl_array[i];
408  initialized = true;
409  }
410  }
411  break;
412  }
413  case kTIME:
414  case kTIMESTAMP:
415  case kDATE: {
416  if (!initialized) {
417  elem_min.bigintval = 1;
418  elem_max.bigintval = 0;
419  }
420  if (array.is_null) {
421  break;
422  }
423  const int64_t* tm_array = reinterpret_cast<int64_t*>(array.pointer);
424  for (size_t i = 0; i < array.length / sizeof(int64_t); i++) {
425  if (tm_array[i] == NULL_BIGINT) {
426  has_nulls = true;
427  } else if (initialized) {
428  elem_min.bigintval = std::min(elem_min.bigintval, tm_array[i]);
429  elem_max.bigintval = std::max(elem_max.bigintval, tm_array[i]);
430  } else {
431  elem_min.bigintval = tm_array[i];
432  elem_max.bigintval = tm_array[i];
433  initialized = true;
434  }
435  }
436  break;
437  }
438  case kCHAR:
439  case kVARCHAR:
440  case kTEXT: {
442  if (!initialized) {
443  elem_min.intval = 1;
444  elem_max.intval = 0;
445  }
446  if (array.is_null) {
447  break;
448  }
449  const int32_t* int_array = (int32_t*)array.pointer;
450  for (size_t i = 0; i < array.length / sizeof(int32_t); i++) {
451  if (int_array[i] == NULL_INT) {
452  has_nulls = true;
453  } else if (initialized) {
454  elem_min.intval = std::min(elem_min.intval, int_array[i]);
455  elem_max.intval = std::max(elem_max.intval, int_array[i]);
456  } else {
457  elem_min.intval = int_array[i];
458  elem_max.intval = int_array[i];
459  initialized = true;
460  }
461  }
462  break;
463  }
464  default:
465  UNREACHABLE();
466  }
467  };
468 
469 }; // class FixedLengthArrayNoneEncoder
470 
471 #endif // FIXED_LENGTH_ARRAY_NONE_ENCODER_H
int8_t tinyintval
Definition: sqltypes.h:203
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:312
#define CHECK_EQ(x, y)
Definition: Logger.h:205
#define NULL_DOUBLE
void updateStats(const int8_t *const src_data, const size_t num_elements) override
size_t num_elems_
Definition: Encoder.h:232
Definition: sqltypes.h:48
#define NULL_ARRAY_INT
#define NULL_FLOAT
DecimalOverflowValidator decimal_overflow_validator_
Definition: Encoder.h:236
#define NULL_BIGINT
bool boolval
Definition: sqltypes.h:202
#define NULL_ARRAY_SMALLINT
#define UNREACHABLE()
Definition: Logger.h:241
bool has_nulls
Definition: ChunkMetadata.h:28
#define NULL_ARRAY_TINYINT
int32_t intval
Definition: sqltypes.h:205
#define NULL_INT
virtual void getMetadata(const std::shared_ptr< ChunkMetadata > &chunkMetadata)
Definition: Encoder.cpp:227
void updateStats(const double, const bool) override
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:199
float floatval
Definition: sqltypes.h:207
FixedLengthArrayNoneEncoder(AbstractBuffer *buffer, size_t as)
void getMetadata(const std::shared_ptr< ChunkMetadata > &chunkMetadata) override
std::shared_ptr< ChunkMetadata > appendData(int8_t *&src_data, const size_t num_elems_to_append, const SQLTypeInfo &ti, const bool replicating=false, const int64_t offset=-1) override
Data_Namespace::AbstractBuffer * buffer_
Definition: Encoder.h:234
std::shared_ptr< ChunkMetadata > getMetadata(const SQLTypeInfo &ti) override
bool DatumEqual(const Datum a, const Datum b, const SQLTypeInfo &ti)
Definition: Datum.cpp:190
void copyMetadata(const Encoder *copyFromEncoder) override
int64_t bigintval
Definition: sqltypes.h:206
size_t getNumElems() const
Definition: Encoder.h:228
#define NULL_ARRAY_FLOAT
int16_t smallintval
Definition: sqltypes.h:204
An AbstractBuffer is a unit of data management for a data manager.
#define NULL_BOOLEAN
void validate(T value)
Definition: Encoder.h:54
Definition: sqltypes.h:51
Definition: sqltypes.h:52
void updateStats(const std::vector< std::string > *const src_data, const size_t start_idx, const size_t num_elements) override
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:319
Definition: sqltypes.h:40
virtual void append(int8_t *src, const size_t num_bytes, const MemoryLevel src_buffer_type=CPU_LEVEL, const int device_id=-1)=0
SQLTypeInfo getSqlType() const
#define NULL_TINYINT
#define NULL_ARRAY_DOUBLE
void update_elem_stats(const ArrayDatum &array)
void reduceStats(const Encoder &) override
bool g_enable_watchdog false
Definition: Execute.cpp:76
#define CHECK(condition)
Definition: Logger.h:197
void updateStats(const int64_t, const bool) override
#define NULL_SMALLINT
#define NULL_ARRAY_BIGINT
static bool is_null(const SQLTypeInfo &type, int8_t *array)
Definition: sqltypes.h:44
void updateStats(const std::vector< ArrayDatum > *const src_data, const size_t start_idx, const size_t num_elements) override
#define NULL_ARRAY_BOOLEAN
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:318
size_t getNumElemsForBytesInsertData(const std::vector< ArrayDatum > *srcData, const int start_idx, const size_t numAppendElems, const size_t byteLimit, const bool replicating=false)
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:697
virtual void reserve(size_t num_bytes)=0
double doubleval
Definition: sqltypes.h:208
std::shared_ptr< ChunkMetadata > appendData(const std::vector< ArrayDatum > *srcData, const int start_idx, const size_t numAppendElems, const bool replicating=false)
bool resetChunkStats(const ChunkStats &stats) override
: Reset chunk level stats (min, max, nulls) using new values from the argument.