OmniSciDB  fe05a0c208
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
FixedLengthArrayNoneEncoder.h
Go to the documentation of this file.
1 /*
2  * Copyright 2018 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
24 #ifndef FIXED_LENGTH_ARRAY_NONE_ENCODER_H
25 #define FIXED_LENGTH_ARRAY_NONE_ENCODER_H
26 
27 #include "Logger/Logger.h"
28 
29 #include <cassert>
30 #include <cstring>
31 #include <memory>
32 #include <mutex>
33 #include <string>
34 #include <vector>
35 #include "AbstractBuffer.h"
36 #include "ChunkMetadata.h"
37 #include "Encoder.h"
38 
40 
42  public:
44  : Encoder(buffer), has_nulls(false), initialized(false), array_size(as) {}
45 
46  size_t getNumElemsForBytesInsertData(const std::vector<ArrayDatum>* srcData,
47  const int start_idx,
48  const size_t numAppendElems,
49  const size_t byteLimit,
50  const bool replicating = false) {
51  size_t dataSize = numAppendElems * array_size;
52  if (dataSize > byteLimit) {
53  dataSize = byteLimit;
54  }
55  return dataSize / array_size;
56  }
57 
58  std::shared_ptr<ChunkMetadata> appendData(int8_t*& src_data,
59  const size_t num_elems_to_append,
60  const SQLTypeInfo& ti,
61  const bool replicating = false,
62  const int64_t offset = -1) override {
63  UNREACHABLE(); // should never be called for arrays
64  return nullptr;
65  }
66 
67  std::shared_ptr<ChunkMetadata> appendData(const std::vector<ArrayDatum>* srcData,
68  const int start_idx,
69  const size_t numAppendElems,
70  const bool replicating = false) {
71  size_t data_size = array_size * numAppendElems;
72  buffer_->reserve(data_size);
73 
74  for (size_t i = start_idx; i < start_idx + numAppendElems; i++) {
75  size_t len = (*srcData)[replicating ? 0 : i].length;
76  // Length of the appended array should be equal to the fixed length,
77  // all others should have been discarded, assert if something slips through
78  CHECK_EQ(len, array_size);
79  // NULL arrays have been filled with subtype's NULL sentinels,
80  // should be appended as regular data, same size
81  buffer_->append((*srcData)[replicating ? 0 : i].pointer, len);
82 
83  // keep Chunk statistics with array elements
84  update_elem_stats((*srcData)[replicating ? 0 : i]);
85  }
86  // make sure buffer_ is flushed even if no new data is appended to it
87  // (e.g. empty strings) because the metadata needs to be flushed.
88  if (!buffer_->isDirty()) {
89  buffer_->setDirty();
90  }
91 
92  num_elems_ += numAppendElems;
93  auto chunk_metadata = std::make_shared<ChunkMetadata>();
94  getMetadata(chunk_metadata);
95  return chunk_metadata;
96  }
97 
98  void getMetadata(const std::shared_ptr<ChunkMetadata>& chunkMetadata) override {
99  Encoder::getMetadata(chunkMetadata); // call on parent class
100  chunkMetadata->fillChunkStats(elem_min, elem_max, has_nulls);
101  }
102 
103  // Only called from the executor for synthesized meta-information.
104  std::shared_ptr<ChunkMetadata> getMetadata(const SQLTypeInfo& ti) override {
105  auto chunk_metadata = std::make_shared<ChunkMetadata>(
106  ti, 0, 0, ChunkStats{elem_min, elem_max, has_nulls});
107  return chunk_metadata;
108  }
109 
110  void updateStats(const int64_t, const bool) override { CHECK(false); }
111 
112  void updateStats(const double, const bool) override { CHECK(false); }
113 
114  void reduceStats(const Encoder&) override { CHECK(false); }
115 
116  void updateStats(const int8_t* const src_data, const size_t num_elements) override {
117  UNREACHABLE();
118  }
119 
120  void updateStats(const std::vector<std::string>* const src_data,
121  const size_t start_idx,
122  const size_t num_elements) override {
123  UNREACHABLE();
124  }
125 
126  void updateStats(const std::vector<ArrayDatum>* const src_data,
127  const size_t start_idx,
128  const size_t num_elements) override {
129  for (size_t n = start_idx; n < start_idx + num_elements; n++) {
130  update_elem_stats((*src_data)[n]);
131  }
132  }
133 
134  void writeMetadata(FILE* f) override {
135  // assumes pointer is already in right place
136  fwrite((int8_t*)&num_elems_, sizeof(size_t), 1, f);
137  fwrite((int8_t*)&elem_min, sizeof(Datum), 1, f);
138  fwrite((int8_t*)&elem_max, sizeof(Datum), 1, f);
139  fwrite((int8_t*)&has_nulls, sizeof(bool), 1, f);
140  fwrite((int8_t*)&initialized, sizeof(bool), 1, f);
141  }
142 
143  void readMetadata(FILE* f) override {
144  // assumes pointer is already in right place
145  fread((int8_t*)&num_elems_, sizeof(size_t), 1, f);
146  fread((int8_t*)&elem_min, sizeof(Datum), 1, f);
147  fread((int8_t*)&elem_max, sizeof(Datum), 1, f);
148  fread((int8_t*)&has_nulls, sizeof(bool), 1, f);
149  fread((int8_t*)&initialized, sizeof(bool), 1, f);
150  }
151 
152  void copyMetadata(const Encoder* copyFromEncoder) override {
153  num_elems_ = copyFromEncoder->getNumElems();
154  auto array_encoder =
155  dynamic_cast<const FixedLengthArrayNoneEncoder*>(copyFromEncoder);
156  elem_min = array_encoder->elem_min;
157  elem_max = array_encoder->elem_max;
158  has_nulls = array_encoder->has_nulls;
159  initialized = array_encoder->initialized;
160  }
161 
162  void updateMetadata(int8_t* array) {
164  }
165 
166  static bool is_null(const SQLTypeInfo& type, int8_t* array) {
167  if (type.get_notnull()) {
168  return false;
169  }
170  switch (type.get_subtype()) {
171  case kBOOLEAN: {
172  return (array[0] == NULL_ARRAY_BOOLEAN);
173  }
174  case kINT: {
175  const int32_t* int_array = (int32_t*)array;
176  return (int_array[0] == NULL_ARRAY_INT);
177  }
178  case kSMALLINT: {
179  const int16_t* smallint_array = (int16_t*)array;
180  return (smallint_array[0] == NULL_ARRAY_SMALLINT);
181  }
182  case kTINYINT: {
183  const int8_t* tinyint_array = (int8_t*)array;
184  return (tinyint_array[0] == NULL_ARRAY_TINYINT);
185  }
186  case kBIGINT:
187  case kNUMERIC:
188  case kDECIMAL: {
189  const int64_t* bigint_array = (int64_t*)array;
190  return (bigint_array[0] == NULL_ARRAY_BIGINT);
191  }
192  case kFLOAT: {
193  const float* flt_array = (float*)array;
194  return (flt_array[0] == NULL_ARRAY_FLOAT);
195  }
196  case kDOUBLE: {
197  const double* dbl_array = (double*)array;
198  return (dbl_array[0] == NULL_ARRAY_DOUBLE);
199  }
200  case kTIME:
201  case kTIMESTAMP:
202  case kDATE: {
203  const int64_t* tm_array = reinterpret_cast<int64_t*>(array);
204  return (tm_array[0] == NULL_ARRAY_BIGINT);
205  }
206  case kCHAR:
207  case kVARCHAR:
208  case kTEXT: {
210  const int32_t* int_array = (int32_t*)array;
211  return (int_array[0] == NULL_ARRAY_INT);
212  }
213  default:
214  UNREACHABLE();
215  }
216  return false;
217  }
218 
219  bool resetChunkStats(const ChunkStats& stats) override {
220  auto elem_type = buffer_->getSqlType().get_elem_type();
221  if (DatumEqual(elem_min, stats.min, elem_type) &&
222  DatumEqual(elem_max, stats.max, elem_type) && has_nulls == stats.has_nulls) {
223  return false;
224  }
225  elem_min = stats.min;
226  elem_max = stats.max;
227  has_nulls = stats.has_nulls;
228  return true;
229  }
230 
231  void resetChunkStats() override {
232  has_nulls = false;
233  initialized = false;
234  }
235 
238  bool has_nulls;
240 
241  private:
242  std::mutex EncoderMutex_;
243  size_t array_size;
244 
245  bool is_null(int8_t* array) { return is_null(buffer_->getSqlType(), array); }
246 
247  void update_elem_stats(const ArrayDatum& array) {
248  if (array.is_null) {
249  has_nulls = true;
250  }
251  switch (buffer_->getSqlType().get_subtype()) {
252  case kBOOLEAN: {
253  if (!initialized) {
254  elem_min.boolval = true;
255  elem_max.boolval = false;
256  }
257  if (array.is_null) {
258  break;
259  }
260  const bool* bool_array = (bool*)array.pointer;
261  for (size_t i = 0; i < array.length / sizeof(bool); i++) {
262  if ((int8_t)bool_array[i] == NULL_BOOLEAN) {
263  has_nulls = true;
264  } else if (initialized) {
265  elem_min.boolval = std::min(elem_min.boolval, bool_array[i]);
266  elem_max.boolval = std::max(elem_max.boolval, bool_array[i]);
267  } else {
268  elem_min.boolval = bool_array[i];
269  elem_max.boolval = bool_array[i];
270  initialized = true;
271  }
272  }
273  break;
274  }
275  case kINT: {
276  if (!initialized) {
277  elem_min.intval = 1;
278  elem_max.intval = 0;
279  }
280  if (array.is_null) {
281  break;
282  }
283  const int32_t* int_array = (int32_t*)array.pointer;
284  for (size_t i = 0; i < array.length / sizeof(int32_t); i++) {
285  if (int_array[i] == NULL_INT) {
286  has_nulls = true;
287  } else if (initialized) {
288  elem_min.intval = std::min(elem_min.intval, int_array[i]);
289  elem_max.intval = std::max(elem_max.intval, int_array[i]);
290  } else {
291  elem_min.intval = int_array[i];
292  elem_max.intval = int_array[i];
293  initialized = true;
294  }
295  }
296  break;
297  }
298  case kSMALLINT: {
299  if (!initialized) {
300  elem_min.smallintval = 1;
301  elem_max.smallintval = 0;
302  }
303  if (array.is_null) {
304  break;
305  }
306  const int16_t* smallint_array = (int16_t*)array.pointer;
307  for (size_t i = 0; i < array.length / sizeof(int16_t); i++) {
308  if (smallint_array[i] == NULL_SMALLINT) {
309  has_nulls = true;
310  } else if (initialized) {
311  elem_min.smallintval = std::min(elem_min.smallintval, smallint_array[i]);
312  elem_max.smallintval = std::max(elem_max.smallintval, smallint_array[i]);
313  } else {
314  elem_min.smallintval = smallint_array[i];
315  elem_max.smallintval = smallint_array[i];
316  initialized = true;
317  }
318  }
319  break;
320  }
321  case kTINYINT: {
322  if (!initialized) {
323  elem_min.tinyintval = 1;
324  elem_max.tinyintval = 0;
325  }
326  if (array.is_null) {
327  break;
328  }
329  const int8_t* tinyint_array = (int8_t*)array.pointer;
330  for (size_t i = 0; i < array.length / sizeof(int8_t); i++) {
331  if (tinyint_array[i] == NULL_TINYINT) {
332  has_nulls = true;
333  } else if (initialized) {
334  elem_min.tinyintval = std::min(elem_min.tinyintval, tinyint_array[i]);
335  elem_max.tinyintval = std::max(elem_max.tinyintval, tinyint_array[i]);
336  } else {
337  elem_min.tinyintval = tinyint_array[i];
338  elem_max.tinyintval = tinyint_array[i];
339  initialized = true;
340  }
341  }
342  break;
343  }
344  case kBIGINT:
345  case kNUMERIC:
346  case kDECIMAL: {
347  if (!initialized) {
348  elem_min.bigintval = 1;
349  elem_max.bigintval = 0;
350  }
351  if (array.is_null) {
352  break;
353  }
354  const int64_t* bigint_array = (int64_t*)array.pointer;
355  for (size_t i = 0; i < array.length / sizeof(int64_t); i++) {
356  if (bigint_array[i] == NULL_BIGINT) {
357  has_nulls = true;
358  } else if (initialized) {
359  decimal_overflow_validator_.validate(bigint_array[i]);
360  elem_min.bigintval = std::min(elem_min.bigintval, bigint_array[i]);
361  elem_max.bigintval = std::max(elem_max.bigintval, bigint_array[i]);
362  } else {
363  decimal_overflow_validator_.validate(bigint_array[i]);
364  elem_min.bigintval = bigint_array[i];
365  elem_max.bigintval = bigint_array[i];
366  initialized = true;
367  }
368  }
369  break;
370  }
371  case kFLOAT: {
372  if (!initialized) {
373  elem_min.floatval = 1.0;
374  elem_max.floatval = 0.0;
375  }
376  if (array.is_null) {
377  break;
378  }
379  const float* flt_array = (float*)array.pointer;
380  for (size_t i = 0; i < array.length / sizeof(float); i++) {
381  if (flt_array[i] == NULL_FLOAT) {
382  has_nulls = true;
383  } else if (initialized) {
384  elem_min.floatval = std::min(elem_min.floatval, flt_array[i]);
385  elem_max.floatval = std::max(elem_max.floatval, flt_array[i]);
386  } else {
387  elem_min.floatval = flt_array[i];
388  elem_max.floatval = flt_array[i];
389  initialized = true;
390  }
391  }
392  break;
393  }
394  case kDOUBLE: {
395  if (!initialized) {
396  elem_min.doubleval = 1.0;
397  elem_max.doubleval = 0.0;
398  }
399  if (array.is_null) {
400  break;
401  }
402  const double* dbl_array = (double*)array.pointer;
403  for (size_t i = 0; i < array.length / sizeof(double); i++) {
404  if (dbl_array[i] == NULL_DOUBLE) {
405  has_nulls = true;
406  } else if (initialized) {
407  elem_min.doubleval = std::min(elem_min.doubleval, dbl_array[i]);
408  elem_max.doubleval = std::max(elem_max.doubleval, dbl_array[i]);
409  } else {
410  elem_min.doubleval = dbl_array[i];
411  elem_max.doubleval = dbl_array[i];
412  initialized = true;
413  }
414  }
415  break;
416  }
417  case kTIME:
418  case kTIMESTAMP:
419  case kDATE: {
420  if (!initialized) {
421  elem_min.bigintval = 1;
422  elem_max.bigintval = 0;
423  }
424  if (array.is_null) {
425  break;
426  }
427  const int64_t* tm_array = reinterpret_cast<int64_t*>(array.pointer);
428  for (size_t i = 0; i < array.length / sizeof(int64_t); i++) {
429  if (tm_array[i] == NULL_BIGINT) {
430  has_nulls = true;
431  } else if (initialized) {
432  elem_min.bigintval = std::min(elem_min.bigintval, tm_array[i]);
433  elem_max.bigintval = std::max(elem_max.bigintval, tm_array[i]);
434  } else {
435  elem_min.bigintval = tm_array[i];
436  elem_max.bigintval = tm_array[i];
437  initialized = true;
438  }
439  }
440  break;
441  }
442  case kCHAR:
443  case kVARCHAR:
444  case kTEXT: {
446  if (!initialized) {
447  elem_min.intval = 1;
448  elem_max.intval = 0;
449  }
450  if (array.is_null) {
451  break;
452  }
453  const int32_t* int_array = (int32_t*)array.pointer;
454  for (size_t i = 0; i < array.length / sizeof(int32_t); i++) {
455  if (int_array[i] == NULL_INT) {
456  has_nulls = true;
457  } else if (initialized) {
458  elem_min.intval = std::min(elem_min.intval, int_array[i]);
459  elem_max.intval = std::max(elem_max.intval, int_array[i]);
460  } else {
461  elem_min.intval = int_array[i];
462  elem_max.intval = int_array[i];
463  initialized = true;
464  }
465  }
466  break;
467  }
468  default:
469  UNREACHABLE();
470  }
471  };
472 
473 }; // class FixedLengthArrayNoneEncoder
474 
475 #endif // FIXED_LENGTH_ARRAY_NONE_ENCODER_H
int8_t tinyintval
Definition: sqltypes.h:206
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:315
#define CHECK_EQ(x, y)
Definition: Logger.h:211
#define NULL_DOUBLE
void updateStats(const int8_t *const src_data, const size_t num_elements) override
size_t num_elems_
Definition: Encoder.h:237
Definition: sqltypes.h:48
#define NULL_ARRAY_INT
#define NULL_FLOAT
DecimalOverflowValidator decimal_overflow_validator_
Definition: Encoder.h:241
#define NULL_BIGINT
bool boolval
Definition: sqltypes.h:205
#define NULL_ARRAY_SMALLINT
#define UNREACHABLE()
Definition: Logger.h:247
bool has_nulls
Definition: ChunkMetadata.h:28
#define NULL_ARRAY_TINYINT
int32_t intval
Definition: sqltypes.h:208
#define NULL_INT
virtual void getMetadata(const std::shared_ptr< ChunkMetadata > &chunkMetadata)
Definition: Encoder.cpp:227
void updateStats(const double, const bool) override
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:202
float floatval
Definition: sqltypes.h:210
FixedLengthArrayNoneEncoder(AbstractBuffer *buffer, size_t as)
void getMetadata(const std::shared_ptr< ChunkMetadata > &chunkMetadata) override
std::shared_ptr< ChunkMetadata > appendData(int8_t *&src_data, const size_t num_elems_to_append, const SQLTypeInfo &ti, const bool replicating=false, const int64_t offset=-1) override
Data_Namespace::AbstractBuffer * buffer_
Definition: Encoder.h:239
std::shared_ptr< ChunkMetadata > getMetadata(const SQLTypeInfo &ti) override
bool DatumEqual(const Datum a, const Datum b, const SQLTypeInfo &ti)
Definition: Datum.cpp:306
void copyMetadata(const Encoder *copyFromEncoder) override
int64_t bigintval
Definition: sqltypes.h:209
size_t getNumElems() const
Definition: Encoder.h:233
#define NULL_ARRAY_FLOAT
int16_t smallintval
Definition: sqltypes.h:207
An AbstractBuffer is a unit of data management for a data manager.
#define NULL_BOOLEAN
void validate(T value)
Definition: Encoder.h:54
Definition: sqltypes.h:51
Definition: sqltypes.h:52
void updateStats(const std::vector< std::string > *const src_data, const size_t start_idx, const size_t num_elements) override
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:322
Definition: sqltypes.h:40
virtual void append(int8_t *src, const size_t num_bytes, const MemoryLevel src_buffer_type=CPU_LEVEL, const int device_id=-1)=0
SQLTypeInfo getSqlType() const
#define NULL_TINYINT
#define NULL_ARRAY_DOUBLE
void update_elem_stats(const ArrayDatum &array)
void reduceStats(const Encoder &) override
bool g_enable_watchdog false
Definition: Execute.cpp:76
#define CHECK(condition)
Definition: Logger.h:203
void updateStats(const int64_t, const bool) override
#define NULL_SMALLINT
char * f
#define NULL_ARRAY_BIGINT
static bool is_null(const SQLTypeInfo &type, int8_t *array)
Definition: sqltypes.h:44
void updateStats(const std::vector< ArrayDatum > *const src_data, const size_t start_idx, const size_t num_elements) override
#define NULL_ARRAY_BOOLEAN
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:321
size_t getNumElemsForBytesInsertData(const std::vector< ArrayDatum > *srcData, const int start_idx, const size_t numAppendElems, const size_t byteLimit, const bool replicating=false)
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:713
virtual void reserve(size_t num_bytes)=0
double doubleval
Definition: sqltypes.h:211
std::shared_ptr< ChunkMetadata > appendData(const std::vector< ArrayDatum > *srcData, const int start_idx, const size_t numAppendElems, const bool replicating=false)
bool resetChunkStats(const ChunkStats &stats) override
: Reset chunk level stats (min, max, nulls) using new values from the argument.