OmniSciDB  340b00dbf6
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
FixedLengthArrayNoneEncoder.h
Go to the documentation of this file.
1 /*
2  * Copyright 2018 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
24 #ifndef FIXED_LENGTH_ARRAY_NONE_ENCODER_H
25 #define FIXED_LENGTH_ARRAY_NONE_ENCODER_H
26 
27 #include "Logger/Logger.h"
28 
29 #include <cassert>
30 #include <cstring>
31 #include <memory>
32 #include <mutex>
33 #include <string>
34 #include <vector>
35 #include "AbstractBuffer.h"
36 #include "ChunkMetadata.h"
37 #include "Encoder.h"
38 
40 
42  public:
44  : Encoder(buffer), has_nulls(false), initialized(false), array_size(as) {}
45 
46  size_t getNumElemsForBytesInsertData(const std::vector<ArrayDatum>* srcData,
47  const int start_idx,
48  const size_t numAppendElems,
49  const size_t byteLimit,
50  const bool replicating = false) {
51  size_t dataSize = numAppendElems * array_size;
52  if (dataSize > byteLimit) {
53  dataSize = byteLimit;
54  }
55  return dataSize / array_size;
56  }
57 
58  std::shared_ptr<ChunkMetadata> appendData(int8_t*& src_data,
59  const size_t num_elems_to_append,
60  const SQLTypeInfo& ti,
61  const bool replicating = false,
62  const int64_t offset = -1) override {
63  UNREACHABLE(); // should never be called for arrays
64  return nullptr;
65  }
66 
67  std::shared_ptr<ChunkMetadata> appendData(const std::vector<ArrayDatum>* srcData,
68  const int start_idx,
69  const size_t numAppendElems,
70  const bool replicating = false) {
71  size_t data_size = array_size * numAppendElems;
72  buffer_->reserve(data_size);
73 
74  for (size_t i = start_idx; i < start_idx + numAppendElems; i++) {
75  size_t len = (*srcData)[replicating ? 0 : i].length;
76  // Length of the appended array should be equal to the fixed length,
77  // all others should have been discarded, assert if something slips through
78  CHECK_EQ(len, array_size);
79  // NULL arrays have been filled with subtype's NULL sentinels,
80  // should be appended as regular data, same size
81  buffer_->append((*srcData)[replicating ? 0 : i].pointer, len);
82 
83  // keep Chunk statistics with array elements
84  update_elem_stats((*srcData)[replicating ? 0 : i]);
85  }
86  // make sure buffer_ is flushed even if no new data is appended to it
87  // (e.g. empty strings) because the metadata needs to be flushed.
88  if (!buffer_->isDirty()) {
89  buffer_->setDirty();
90  }
91 
92  num_elems_ += numAppendElems;
93  auto chunk_metadata = std::make_shared<ChunkMetadata>();
94  getMetadata(chunk_metadata);
95  return chunk_metadata;
96  }
97 
98  void getMetadata(const std::shared_ptr<ChunkMetadata>& chunkMetadata) override {
99  Encoder::getMetadata(chunkMetadata); // call on parent class
100  chunkMetadata->fillChunkStats(elem_min, elem_max, has_nulls);
101  }
102 
103  // Only called from the executor for synthesized meta-information.
104  std::shared_ptr<ChunkMetadata> getMetadata(const SQLTypeInfo& ti) override {
105  auto chunk_metadata = std::make_shared<ChunkMetadata>(
106  ti, 0, 0, ChunkStats{elem_min, elem_max, has_nulls});
107  return chunk_metadata;
108  }
109 
110  void updateStats(const int64_t, const bool) override { CHECK(false); }
111 
112  void updateStats(const double, const bool) override { CHECK(false); }
113 
114  void reduceStats(const Encoder&) override { CHECK(false); }
115 
116  void updateStats(const int8_t* const src_data, const size_t num_elements) override {
117  UNREACHABLE();
118  }
119 
120  void updateStats(const std::vector<std::string>* const src_data,
121  const size_t start_idx,
122  const size_t num_elements) override {
123  UNREACHABLE();
124  }
125 
126  void updateStats(const std::vector<ArrayDatum>* const src_data,
127  const size_t start_idx,
128  const size_t num_elements) override {
129  for (size_t n = start_idx; n < start_idx + num_elements; n++) {
130  update_elem_stats((*src_data)[n]);
131  }
132  }
133 
134  void writeMetadata(FILE* f) override {
135  // assumes pointer is already in right place
136  fwrite((int8_t*)&num_elems_, sizeof(size_t), 1, f);
137  fwrite((int8_t*)&elem_min, sizeof(Datum), 1, f);
138  fwrite((int8_t*)&elem_max, sizeof(Datum), 1, f);
139  fwrite((int8_t*)&has_nulls, sizeof(bool), 1, f);
140  fwrite((int8_t*)&initialized, sizeof(bool), 1, f);
141  }
142 
143  void readMetadata(FILE* f) override {
144  // assumes pointer is already in right place
145  fread((int8_t*)&num_elems_, sizeof(size_t), 1, f);
146  fread((int8_t*)&elem_min, sizeof(Datum), 1, f);
147  fread((int8_t*)&elem_max, sizeof(Datum), 1, f);
148  fread((int8_t*)&has_nulls, sizeof(bool), 1, f);
149  fread((int8_t*)&initialized, sizeof(bool), 1, f);
150  }
151 
152  void copyMetadata(const Encoder* copyFromEncoder) override {
153  num_elems_ = copyFromEncoder->getNumElems();
154  auto array_encoder =
155  dynamic_cast<const FixedLengthArrayNoneEncoder*>(copyFromEncoder);
156  elem_min = array_encoder->elem_min;
157  elem_max = array_encoder->elem_max;
158  has_nulls = array_encoder->has_nulls;
159  initialized = array_encoder->initialized;
160  }
161 
162  void updateMetadata(int8_t* array) {
164  }
165 
166  static bool is_null(const SQLTypeInfo& type, int8_t* array) {
167  if (type.get_notnull()) {
168  return false;
169  }
170  switch (type.get_subtype()) {
171  case kBOOLEAN: {
172  const bool* bool_array = (bool*)array;
173  return ((int8_t)bool_array[0] == NULL_ARRAY_BOOLEAN);
174  }
175  case kINT: {
176  const int32_t* int_array = (int32_t*)array;
177  return (int_array[0] == NULL_ARRAY_INT);
178  }
179  case kSMALLINT: {
180  const int16_t* smallint_array = (int16_t*)array;
181  return (smallint_array[0] == NULL_ARRAY_SMALLINT);
182  }
183  case kTINYINT: {
184  const int8_t* tinyint_array = (int8_t*)array;
185  return (tinyint_array[0] == NULL_ARRAY_TINYINT);
186  }
187  case kBIGINT:
188  case kNUMERIC:
189  case kDECIMAL: {
190  const int64_t* bigint_array = (int64_t*)array;
191  return (bigint_array[0] == NULL_ARRAY_BIGINT);
192  }
193  case kFLOAT: {
194  const float* flt_array = (float*)array;
195  return (flt_array[0] == NULL_ARRAY_FLOAT);
196  }
197  case kDOUBLE: {
198  const double* dbl_array = (double*)array;
199  return (dbl_array[0] == NULL_ARRAY_DOUBLE);
200  }
201  case kTIME:
202  case kTIMESTAMP:
203  case kDATE: {
204  const int64_t* tm_array = reinterpret_cast<int64_t*>(array);
205  return (tm_array[0] == NULL_ARRAY_BIGINT);
206  }
207  case kCHAR:
208  case kVARCHAR:
209  case kTEXT: {
211  const int32_t* int_array = (int32_t*)array;
212  return (int_array[0] == NULL_ARRAY_INT);
213  }
214  default:
215  UNREACHABLE();
216  }
217  return false;
218  }
219 
222  bool has_nulls;
224 
225  private:
226  std::mutex EncoderMutex_;
227  size_t array_size;
228 
229  bool is_null(int8_t* array) { return is_null(buffer_->getSqlType(), array); }
230 
231  void update_elem_stats(const ArrayDatum& array) {
232  if (array.is_null) {
233  has_nulls = true;
234  }
235  switch (buffer_->getSqlType().get_subtype()) {
236  case kBOOLEAN: {
237  if (!initialized) {
238  elem_min.boolval = true;
239  elem_max.boolval = false;
240  }
241  if (array.is_null) {
242  break;
243  }
244  const bool* bool_array = (bool*)array.pointer;
245  for (size_t i = 0; i < array.length / sizeof(bool); i++) {
246  if ((int8_t)bool_array[i] == NULL_BOOLEAN) {
247  has_nulls = true;
248  } else if (initialized) {
249  elem_min.boolval = std::min(elem_min.boolval, bool_array[i]);
250  elem_max.boolval = std::max(elem_max.boolval, bool_array[i]);
251  } else {
252  elem_min.boolval = bool_array[i];
253  elem_max.boolval = bool_array[i];
254  initialized = true;
255  }
256  }
257  break;
258  }
259  case kINT: {
260  if (!initialized) {
261  elem_min.intval = 1;
262  elem_max.intval = 0;
263  }
264  if (array.is_null) {
265  break;
266  }
267  const int32_t* int_array = (int32_t*)array.pointer;
268  for (size_t i = 0; i < array.length / sizeof(int32_t); i++) {
269  if (int_array[i] == NULL_INT) {
270  has_nulls = true;
271  } else if (initialized) {
272  elem_min.intval = std::min(elem_min.intval, int_array[i]);
273  elem_max.intval = std::max(elem_max.intval, int_array[i]);
274  } else {
275  elem_min.intval = int_array[i];
276  elem_max.intval = int_array[i];
277  initialized = true;
278  }
279  }
280  break;
281  }
282  case kSMALLINT: {
283  if (!initialized) {
284  elem_min.smallintval = 1;
285  elem_max.smallintval = 0;
286  }
287  if (array.is_null) {
288  break;
289  }
290  const int16_t* smallint_array = (int16_t*)array.pointer;
291  for (size_t i = 0; i < array.length / sizeof(int16_t); i++) {
292  if (smallint_array[i] == NULL_SMALLINT) {
293  has_nulls = true;
294  } else if (initialized) {
295  elem_min.smallintval = std::min(elem_min.smallintval, smallint_array[i]);
296  elem_max.smallintval = std::max(elem_max.smallintval, smallint_array[i]);
297  } else {
298  elem_min.smallintval = smallint_array[i];
299  elem_max.smallintval = smallint_array[i];
300  initialized = true;
301  }
302  }
303  break;
304  }
305  case kTINYINT: {
306  if (!initialized) {
307  elem_min.tinyintval = 1;
308  elem_max.tinyintval = 0;
309  }
310  if (array.is_null) {
311  break;
312  }
313  const int8_t* tinyint_array = (int8_t*)array.pointer;
314  for (size_t i = 0; i < array.length / sizeof(int8_t); i++) {
315  if (tinyint_array[i] == NULL_TINYINT) {
316  has_nulls = true;
317  } else if (initialized) {
318  elem_min.tinyintval = std::min(elem_min.tinyintval, tinyint_array[i]);
319  elem_max.tinyintval = std::max(elem_max.tinyintval, tinyint_array[i]);
320  } else {
321  elem_min.tinyintval = tinyint_array[i];
322  elem_max.tinyintval = tinyint_array[i];
323  initialized = true;
324  }
325  }
326  break;
327  }
328  case kBIGINT:
329  case kNUMERIC:
330  case kDECIMAL: {
331  if (!initialized) {
332  elem_min.bigintval = 1;
333  elem_max.bigintval = 0;
334  }
335  if (array.is_null) {
336  break;
337  }
338  const int64_t* bigint_array = (int64_t*)array.pointer;
339  for (size_t i = 0; i < array.length / sizeof(int64_t); i++) {
340  if (bigint_array[i] == NULL_BIGINT) {
341  has_nulls = true;
342  } else if (initialized) {
343  decimal_overflow_validator_.validate(bigint_array[i]);
344  elem_min.bigintval = std::min(elem_min.bigintval, bigint_array[i]);
345  elem_max.bigintval = std::max(elem_max.bigintval, bigint_array[i]);
346  } else {
347  decimal_overflow_validator_.validate(bigint_array[i]);
348  elem_min.bigintval = bigint_array[i];
349  elem_max.bigintval = bigint_array[i];
350  initialized = true;
351  }
352  }
353  break;
354  }
355  case kFLOAT: {
356  if (!initialized) {
357  elem_min.floatval = 1.0;
358  elem_max.floatval = 0.0;
359  }
360  if (array.is_null) {
361  break;
362  }
363  const float* flt_array = (float*)array.pointer;
364  for (size_t i = 0; i < array.length / sizeof(float); i++) {
365  if (flt_array[i] == NULL_FLOAT) {
366  has_nulls = true;
367  } else if (initialized) {
368  elem_min.floatval = std::min(elem_min.floatval, flt_array[i]);
369  elem_max.floatval = std::max(elem_max.floatval, flt_array[i]);
370  } else {
371  elem_min.floatval = flt_array[i];
372  elem_max.floatval = flt_array[i];
373  initialized = true;
374  }
375  }
376  break;
377  }
378  case kDOUBLE: {
379  if (!initialized) {
380  elem_min.doubleval = 1.0;
381  elem_max.doubleval = 0.0;
382  }
383  if (array.is_null) {
384  break;
385  }
386  const double* dbl_array = (double*)array.pointer;
387  for (size_t i = 0; i < array.length / sizeof(double); i++) {
388  if (dbl_array[i] == NULL_DOUBLE) {
389  has_nulls = true;
390  } else if (initialized) {
391  elem_min.doubleval = std::min(elem_min.doubleval, dbl_array[i]);
392  elem_max.doubleval = std::max(elem_max.doubleval, dbl_array[i]);
393  } else {
394  elem_min.doubleval = dbl_array[i];
395  elem_max.doubleval = dbl_array[i];
396  initialized = true;
397  }
398  }
399  break;
400  }
401  case kTIME:
402  case kTIMESTAMP:
403  case kDATE: {
404  if (!initialized) {
405  elem_min.bigintval = 1;
406  elem_max.bigintval = 0;
407  }
408  if (array.is_null) {
409  break;
410  }
411  const int64_t* tm_array = reinterpret_cast<int64_t*>(array.pointer);
412  for (size_t i = 0; i < array.length / sizeof(int64_t); i++) {
413  if (tm_array[i] == NULL_BIGINT) {
414  has_nulls = true;
415  } else if (initialized) {
416  elem_min.bigintval = std::min(elem_min.bigintval, tm_array[i]);
417  elem_max.bigintval = std::max(elem_max.bigintval, tm_array[i]);
418  } else {
419  elem_min.bigintval = tm_array[i];
420  elem_max.bigintval = tm_array[i];
421  initialized = true;
422  }
423  }
424  break;
425  }
426  case kCHAR:
427  case kVARCHAR:
428  case kTEXT: {
430  if (!initialized) {
431  elem_min.intval = 1;
432  elem_max.intval = 0;
433  }
434  if (array.is_null) {
435  break;
436  }
437  const int32_t* int_array = (int32_t*)array.pointer;
438  for (size_t i = 0; i < array.length / sizeof(int32_t); i++) {
439  if (int_array[i] == NULL_INT) {
440  has_nulls = true;
441  } else if (initialized) {
442  elem_min.intval = std::min(elem_min.intval, int_array[i]);
443  elem_max.intval = std::max(elem_max.intval, int_array[i]);
444  } else {
445  elem_min.intval = int_array[i];
446  elem_max.intval = int_array[i];
447  initialized = true;
448  }
449  }
450  break;
451  }
452  default:
453  UNREACHABLE();
454  }
455  };
456 
457 }; // class FixedLengthArrayNoneEncoder
458 
459 #endif // FIXED_LENGTH_ARRAY_NONE_ENCODER_H
int8_t tinyintval
Definition: sqltypes.h:206
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:331
#define CHECK_EQ(x, y)
Definition: Logger.h:205
#define NULL_DOUBLE
Definition: sqltypes.h:257
void updateStats(const int8_t *const src_data, const size_t num_elements) override
size_t num_elems_
Definition: Encoder.h:229
Definition: sqltypes.h:51
DecimalOverflowValidator decimal_overflow_validator_
Definition: Encoder.h:233
#define NULL_BIGINT
Definition: sqltypes.h:255
#define NULL_ARRAY_DOUBLE
Definition: sqltypes.h:265
bool boolval
Definition: sqltypes.h:205
#define UNREACHABLE()
Definition: Logger.h:241
#define NULL_ARRAY_SMALLINT
Definition: sqltypes.h:261
#define NULL_ARRAY_TINYINT
Definition: sqltypes.h:260
#define NULL_ARRAY_BOOLEAN
Definition: sqltypes.h:259
int32_t intval
Definition: sqltypes.h:208
virtual void getMetadata(const std::shared_ptr< ChunkMetadata > &chunkMetadata)
Definition: Encoder.cpp:227
void updateStats(const double, const bool) override
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:202
float floatval
Definition: sqltypes.h:210
FixedLengthArrayNoneEncoder(AbstractBuffer *buffer, size_t as)
void getMetadata(const std::shared_ptr< ChunkMetadata > &chunkMetadata) override
std::shared_ptr< ChunkMetadata > appendData(int8_t *&src_data, const size_t num_elems_to_append, const SQLTypeInfo &ti, const bool replicating=false, const int64_t offset=-1) override
Data_Namespace::AbstractBuffer * buffer_
Definition: Encoder.h:231
std::shared_ptr< ChunkMetadata > getMetadata(const SQLTypeInfo &ti) override
#define NULL_TINYINT
Definition: sqltypes.h:252
void copyMetadata(const Encoder *copyFromEncoder) override
int64_t bigintval
Definition: sqltypes.h:209
#define NULL_FLOAT
Definition: sqltypes.h:256
size_t getNumElems() const
Definition: Encoder.h:225
int16_t smallintval
Definition: sqltypes.h:207
An AbstractBuffer is a unit of data management for a data manager.
#define NULL_ARRAY_INT
Definition: sqltypes.h:262
#define NULL_INT
Definition: sqltypes.h:254
void validate(T value)
Definition: Encoder.h:54
Definition: sqltypes.h:54
Definition: sqltypes.h:55
void updateStats(const std::vector< std::string > *const src_data, const size_t start_idx, const size_t num_elements) override
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:338
#define NULL_ARRAY_BIGINT
Definition: sqltypes.h:263
Definition: sqltypes.h:43
virtual void append(int8_t *src, const size_t num_bytes, const MemoryLevel src_buffer_type=CPU_LEVEL, const int device_id=-1)=0
SQLTypeInfo getSqlType() const
void update_elem_stats(const ArrayDatum &array)
#define NULL_SMALLINT
Definition: sqltypes.h:253
void reduceStats(const Encoder &) override
bool g_enable_watchdog false
Definition: Execute.cpp:73
#define CHECK(condition)
Definition: Logger.h:197
void updateStats(const int64_t, const bool) override
static bool is_null(const SQLTypeInfo &type, int8_t *array)
Definition: sqltypes.h:47
void updateStats(const std::vector< ArrayDatum > *const src_data, const size_t start_idx, const size_t num_elements) override
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:337
size_t getNumElemsForBytesInsertData(const std::vector< ArrayDatum > *srcData, const int start_idx, const size_t numAppendElems, const size_t byteLimit, const bool replicating=false)
virtual void reserve(size_t num_bytes)=0
#define NULL_BOOLEAN
Definition: sqltypes.h:251
double doubleval
Definition: sqltypes.h:211
std::shared_ptr< ChunkMetadata > appendData(const std::vector< ArrayDatum > *srcData, const int start_idx, const size_t numAppendElems, const bool replicating=false)
#define NULL_ARRAY_FLOAT
Definition: sqltypes.h:264