OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ArrayNoneEncoder Class Reference

#include <ArrayNoneEncoder.h>

+ Inheritance diagram for ArrayNoneEncoder:
+ Collaboration diagram for ArrayNoneEncoder:

Public Member Functions

 ArrayNoneEncoder (AbstractBuffer *buffer)
 
size_t getNumElemsForBytesInsertData (const std::vector< ArrayDatum > *srcData, const int start_idx, const size_t numAppendElems, const size_t byteLimit, const bool replicating=false)
 
size_t getNumElemsForBytesEncodedDataAtIndices (const int8_t *index_data, const std::vector< size_t > &selected_idx, const size_t byte_limit) override
 
std::shared_ptr< ChunkMetadataappendData (int8_t *&src_data, const size_t num_elems_to_append, const SQLTypeInfo &ti, const bool replicating=false, const int64_t offset=-1) override
 
std::shared_ptr< ChunkMetadataappendEncodedDataAtIndices (const int8_t *index_data, int8_t *data, const std::vector< size_t > &selected_idx) override
 
std::shared_ptr< ChunkMetadataappendEncodedData (const int8_t *index_data, int8_t *data, const size_t start_idx, const size_t num_elements) override
 
std::shared_ptr< ChunkMetadataappendData (const std::vector< ArrayDatum > *srcData, const int start_idx, const size_t numAppendElems, const bool replicating)
 
void getMetadata (const std::shared_ptr< ChunkMetadata > &chunkMetadata) override
 
std::shared_ptr< ChunkMetadatagetMetadata (const SQLTypeInfo &ti) override
 
void updateStats (const int64_t, const bool) override
 
void updateStats (const double, const bool) override
 
void updateStats (const int8_t *const src_data, const size_t num_elements) override
 
void updateStats (const std::vector< std::string > *const src_data, const size_t start_idx, const size_t num_elements) override
 
void updateStats (const std::vector< ArrayDatum > *const src_data, const size_t start_idx, const size_t num_elements) override
 
void reduceStats (const Encoder &) override
 
void writeMetadata (FILE *f) override
 
void readMetadata (FILE *f) override
 
void copyMetadata (const Encoder *copyFromEncoder) override
 
AbstractBuffergetIndexBuf () const
 
bool resetChunkStats (const ChunkStats &stats) override
 : Reset chunk level stats (min, max, nulls) using new values from the argument. More...
 
void resetChunkStats () override
 
void setIndexBuffer (AbstractBuffer *buf)
 
- Public Member Functions inherited from Encoder
 Encoder (Data_Namespace::AbstractBuffer *buffer)
 
virtual ~Encoder ()
 
virtual void updateStatsEncoded (const int8_t *const dst_data, const size_t num_elements)
 
size_t getNumElems () const
 
void setNumElems (const size_t num_elems)
 

Public Attributes

Datum elem_min
 
Datum elem_max
 
bool has_nulls
 
bool initialized
 

Static Public Attributes

static constexpr size_t DEFAULT_NULL_PADDING_SIZE {8}
 

Private Member Functions

void update_elem_stats (const ArrayDatum &array)
 
std::pair< ArrayOffsetT,
ArrayOffsetT
getArrayOffsetsAtIndex (const int8_t *index_data, size_t index)
 
size_t getArrayDatumSizeAtIndex (const int8_t *index_data, size_t index)
 
ArrayDatum getArrayDatumAtIndex (const int8_t *index_data, int8_t *data, size_t index)
 

Private Attributes

std::mutex EncoderMutex_
 
AbstractBufferindex_buf
 
ArrayOffsetT last_offset
 

Additional Inherited Members

- Static Public Member Functions inherited from Encoder
static EncoderCreate (Data_Namespace::AbstractBuffer *buffer, const SQLTypeInfo sqlType)
 
- Protected Attributes inherited from Encoder
size_t num_elems_
 
Data_Namespace::AbstractBufferbuffer_
 
DecimalOverflowValidator decimal_overflow_validator_
 
DateDaysOverflowValidator date_days_overflow_validator_
 

Detailed Description

Definition at line 41 of file ArrayNoneEncoder.h.

Constructor & Destructor Documentation

ArrayNoneEncoder::ArrayNoneEncoder ( AbstractBuffer buffer)
inline

Definition at line 43 of file ArrayNoneEncoder.h.

44  : Encoder(buffer)
45  , has_nulls(false)
46  , initialized(false)
47  , index_buf(nullptr)
48  , last_offset(-1) {}
Encoder(Data_Namespace::AbstractBuffer *buffer)
Definition: Encoder.cpp:225
AbstractBuffer * index_buf
ArrayOffsetT last_offset

Member Function Documentation

std::shared_ptr<ChunkMetadata> ArrayNoneEncoder::appendData ( int8_t *&  src_data,
const size_t  num_elems_to_append,
const SQLTypeInfo ti,
const bool  replicating = false,
const int64_t  offset = -1 
)
inlineoverridevirtual

Append data to the chunk buffer backing this encoder.

Parameters
src_dataSource data for the append
num_elems_to_appendNumber of elements to append
tiSQL Type Info for the column TODO(adb): used?
replicatingPass one value and fill the chunk with it
offsetWrite data starting at a given offset. Default is -1 which indicates an append, an offset of 0 rewrites the chunk up to num_elems_to_append.

Implements Encoder.

Definition at line 84 of file ArrayNoneEncoder.h.

References UNREACHABLE.

Referenced by Chunk_NS::Chunk::appendData(), appendEncodedData(), and appendEncodedDataAtIndices().

88  {
89  UNREACHABLE(); // should never be called for arrays
90  return nullptr;
91  }
#define UNREACHABLE()
Definition: Logger.h:338

+ Here is the caller graph for this function:

std::shared_ptr<ChunkMetadata> ArrayNoneEncoder::appendData ( const std::vector< ArrayDatum > *  srcData,
const int  start_idx,
const size_t  numAppendElems,
const bool  replicating 
)
inline

Definition at line 118 of file ArrayNoneEncoder.h.

References Data_Namespace::AbstractBuffer::append(), Encoder::buffer_, CHECK, Data_Namespace::CPU_LEVEL, DEFAULT_NULL_PADDING_SIZE, run_benchmark_import::dest, getMetadata(), index_buf, is_null(), Data_Namespace::AbstractBuffer::isDirty(), last_offset, MAX_INPUT_BUF_SIZE, anonymous_namespace{Utm.h}::n, Encoder::num_elems_, Data_Namespace::AbstractBuffer::read(), Data_Namespace::AbstractBuffer::reserve(), Data_Namespace::AbstractBuffer::setDirty(), Data_Namespace::AbstractBuffer::size(), and update_elem_stats().

121  {
122  CHECK(index_buf != nullptr); // index_buf must be set before this.
123  size_t append_index_size = numAppendElems * sizeof(ArrayOffsetT);
124  if (num_elems_ == 0) {
125  append_index_size += sizeof(ArrayOffsetT); // plus one for the initial offset
126  }
127  index_buf->reserve(index_buf->size() + append_index_size);
128 
129  bool first_elem_padded = false;
130  ArrayOffsetT initial_offset = 0;
131  if (num_elems_ == 0) {
132  if ((*srcData)[0].is_null || (*srcData)[0].length <= 1) {
133  // Covers following potentially problematic first arrays:
134  // (1) NULL array, issue - can't encode a NULL with 0 initial offset
135  // otherwise, if first array is not NULL:
136  // (2) length=1 array - could be followed by a {}*/NULL, covers tinyint,bool
137  // (3) empty array - could be followed by {}*/NULL, or {}*|{x}|{}*|NULL, etc.
138  initial_offset = DEFAULT_NULL_PADDING_SIZE;
139  first_elem_padded = true;
140  }
141  index_buf->append((int8_t*)&initial_offset,
142  sizeof(ArrayOffsetT)); // write the initial offset
143  last_offset = initial_offset;
144  } else {
145  // Valid last_offset is never negative
146  // always need to read a valid last offset from buffer/disk
147  // b/c now due to vacuum "last offset" may go backward and if
148  // index chunk was not reloaded last_offset would go way off!
149  index_buf->read((int8_t*)&last_offset,
150  sizeof(ArrayOffsetT),
151  index_buf->size() - sizeof(ArrayOffsetT),
153  CHECK(last_offset != -1);
154  // If the loaded offset is negative it means the last value was a NULL array,
155  // convert to a valid last offset
156  if (last_offset < 0) {
158  }
159  }
160  // Need to start data from 8 byte offset if first array encoded is a NULL array
161  size_t append_data_size = (first_elem_padded) ? DEFAULT_NULL_PADDING_SIZE : 0;
162  for (size_t n = start_idx; n < start_idx + numAppendElems; n++) {
163  // NULL arrays don't take any space so don't add to the data size
164  if ((*srcData)[replicating ? 0 : n].is_null) {
165  continue;
166  }
167  append_data_size += (*srcData)[replicating ? 0 : n].length;
168  }
169  buffer_->reserve(buffer_->size() + append_data_size);
170 
171  size_t inbuf_size = std::min(std::max(append_index_size, append_data_size),
172  (size_t)MAX_INPUT_BUF_SIZE);
173  auto gc_inbuf = std::make_unique<int8_t[]>(inbuf_size);
174  auto inbuf = gc_inbuf.get();
175  for (size_t num_appended = 0; num_appended < numAppendElems;) {
176  ArrayOffsetT* p = (ArrayOffsetT*)inbuf;
177  size_t i;
178  for (i = 0; num_appended < numAppendElems && i < inbuf_size / sizeof(ArrayOffsetT);
179  i++, num_appended++) {
180  p[i] =
181  last_offset + (*srcData)[replicating ? 0 : num_appended + start_idx].length;
182  last_offset = p[i];
183  if ((*srcData)[replicating ? 0 : num_appended + start_idx].is_null) {
184  // Record array NULLness in the index buffer
185  p[i] = -p[i];
186  }
187  }
188  index_buf->append(inbuf, i * sizeof(ArrayOffsetT));
189  }
190 
191  // Pad buffer_ with 8 bytes if first encoded array is a NULL array
192  if (first_elem_padded) {
193  auto padding_size = DEFAULT_NULL_PADDING_SIZE;
194  buffer_->append(inbuf, padding_size);
195  }
196  for (size_t num_appended = 0; num_appended < numAppendElems;) {
197  size_t size = 0;
198  for (int i = start_idx + num_appended;
199  num_appended < numAppendElems && size < inbuf_size;
200  i++, num_appended++) {
201  if ((*srcData)[replicating ? 0 : i].is_null) {
202  continue; // NULL arrays don't take up any space in the data buffer
203  }
204  size_t len = (*srcData)[replicating ? 0 : i].length;
205  if (len > inbuf_size) {
206  // for large strings, append on its own
207  if (size > 0) {
208  buffer_->append(inbuf, size);
209  }
210  size = 0;
211  buffer_->append((*srcData)[replicating ? 0 : i].pointer, len);
212  num_appended++;
213  break;
214  } else if (size + len > inbuf_size) {
215  break;
216  }
217  char* dest = (char*)inbuf + size;
218  if (len > 0) {
219  std::memcpy((void*)dest, (void*)(*srcData)[replicating ? 0 : i].pointer, len);
220  size += len;
221  }
222  }
223  if (size > 0) {
224  buffer_->append(inbuf, size);
225  }
226  }
227  // make sure buffer_ is flushed even if no new data is appended to it
228  // (e.g. empty strings) because the metadata needs to be flushed.
229  if (!buffer_->isDirty()) {
230  buffer_->setDirty();
231  }
232 
233  // keep Chunk statistics with array elements
234  for (size_t n = start_idx; n < start_idx + numAppendElems; n++) {
235  update_elem_stats((*srcData)[replicating ? 0 : n]);
236  }
237  num_elems_ += numAppendElems;
238  auto chunk_metadata = std::make_shared<ChunkMetadata>();
239  getMetadata(chunk_metadata);
240  return chunk_metadata;
241  }
void update_elem_stats(const ArrayDatum &array)
size_t num_elems_
Definition: Encoder.h:288
#define MAX_INPUT_BUF_SIZE
Definition: Encoder.h:36
virtual void read(int8_t *const dst, const size_t num_bytes, const size_t offset=0, const MemoryLevel dst_buffer_type=CPU_LEVEL, const int dst_device_id=-1)=0
CONSTEXPR DEVICE bool is_null(const T &value)
Data_Namespace::AbstractBuffer * buffer_
Definition: Encoder.h:290
void getMetadata(const std::shared_ptr< ChunkMetadata > &chunkMetadata) override
AbstractBuffer * index_buf
static constexpr size_t DEFAULT_NULL_PADDING_SIZE
int32_t ArrayOffsetT
Definition: sqltypes.h:1496
virtual void append(int8_t *src, const size_t num_bytes, const MemoryLevel src_buffer_type=CPU_LEVEL, const int device_id=-1)=0
ArrayOffsetT last_offset
#define CHECK(condition)
Definition: Logger.h:291
constexpr double n
Definition: Utm.h:38
virtual void reserve(size_t num_bytes)=0

+ Here is the call graph for this function:

std::shared_ptr<ChunkMetadata> ArrayNoneEncoder::appendEncodedData ( const int8_t *  index_data,
int8_t *  data,
const size_t  start_idx,
const size_t  num_elements 
)
inlineoverridevirtual

Append encoded data to the chunk buffer backing this encoder.

Parameters
index_data- (optional) the index data of data to append
data- the data to append
start_idx- the position to start encoding from in the data array
num_elements- the number of elements to encode from the data array
Returns
updated chunk metadata for the chunk buffer backing this encoder

NOTE: index_data must be non-null for varlen encoder types.

Implements Encoder.

Definition at line 105 of file ArrayNoneEncoder.h.

References appendData(), and getArrayDatumAtIndex().

108  {
109  std::vector<ArrayDatum> data_subset;
110  data_subset.reserve(num_elements);
111  for (size_t count = 0; count < num_elements; ++count) {
112  auto current_index = start_idx + count;
113  data_subset.emplace_back(getArrayDatumAtIndex(index_data, data, current_index));
114  }
115  return appendData(&data_subset, 0, num_elements, false);
116  }
std::shared_ptr< ChunkMetadata > appendData(int8_t *&src_data, const size_t num_elems_to_append, const SQLTypeInfo &ti, const bool replicating=false, const int64_t offset=-1) override
ArrayDatum getArrayDatumAtIndex(const int8_t *index_data, int8_t *data, size_t index)

+ Here is the call graph for this function:

std::shared_ptr<ChunkMetadata> ArrayNoneEncoder::appendEncodedDataAtIndices ( const int8_t *  index_data,
int8_t *  data,
const std::vector< size_t > &  selected_idx 
)
inlineoverridevirtual

Append selected encoded data to the chunk buffer backing this encoder.

Parameters
index_data- (optional) the index data of data to append
data- the data to append
selected_idx- which indices in the encoded data to append
Returns
updated chunk metadata for the chunk buffer backing this encoder

NOTE: index_data must be non-null for varlen encoder types.

Implements Encoder.

Definition at line 93 of file ArrayNoneEncoder.h.

References appendData(), and getArrayDatumAtIndex().

96  {
97  std::vector<ArrayDatum> data_subset;
98  data_subset.reserve(selected_idx.size());
99  for (const auto& offset_index : selected_idx) {
100  data_subset.emplace_back(getArrayDatumAtIndex(index_data, data, offset_index));
101  }
102  return appendData(&data_subset, 0, selected_idx.size(), false);
103  }
std::shared_ptr< ChunkMetadata > appendData(int8_t *&src_data, const size_t num_elems_to_append, const SQLTypeInfo &ti, const bool replicating=false, const int64_t offset=-1) override
ArrayDatum getArrayDatumAtIndex(const int8_t *index_data, int8_t *data, size_t index)

+ Here is the call graph for this function:

void ArrayNoneEncoder::copyMetadata ( const Encoder copyFromEncoder)
inlineoverridevirtual

Implements Encoder.

Definition at line 297 of file ArrayNoneEncoder.h.

References elem_max, elem_min, Encoder::getNumElems(), has_nulls, initialized, and Encoder::num_elems_.

297  {
298  num_elems_ = copyFromEncoder->getNumElems();
299  auto array_encoder = dynamic_cast<const ArrayNoneEncoder*>(copyFromEncoder);
300  elem_min = array_encoder->elem_min;
301  elem_max = array_encoder->elem_max;
302  has_nulls = array_encoder->has_nulls;
303  initialized = array_encoder->initialized;
304  }
size_t num_elems_
Definition: Encoder.h:288
size_t getNumElems() const
Definition: Encoder.h:284

+ Here is the call graph for this function:

ArrayDatum ArrayNoneEncoder::getArrayDatumAtIndex ( const int8_t *  index_data,
int8_t *  data,
size_t  index 
)
inlineprivate

Definition at line 581 of file ArrayNoneEncoder.h.

References getArrayOffsetsAtIndex(), is_null(), and last_offset.

Referenced by appendEncodedData(), and appendEncodedDataAtIndices().

581  {
582  auto [offset, last_offset] = getArrayOffsetsAtIndex(index_data, index);
583  size_t array_byte_size = std::abs(offset) - std::abs(last_offset);
584  bool is_null = offset < 0;
585  auto current_data = data + std::abs(last_offset);
586  return is_null ? ArrayDatum(0, nullptr, true, DoNothingDeleter{})
587  : ArrayDatum(array_byte_size, current_data, false, DoNothingDeleter{});
588  }
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:229
CONSTEXPR DEVICE bool is_null(const T &value)
std::pair< ArrayOffsetT, ArrayOffsetT > getArrayOffsetsAtIndex(const int8_t *index_data, size_t index)
ArrayOffsetT last_offset

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t ArrayNoneEncoder::getArrayDatumSizeAtIndex ( const int8_t *  index_data,
size_t  index 
)
inlineprivate

Definition at line 575 of file ArrayNoneEncoder.h.

References getArrayOffsetsAtIndex(), and last_offset.

Referenced by getNumElemsForBytesEncodedDataAtIndices().

575  {
576  auto [offset, last_offset] = getArrayOffsetsAtIndex(index_data, index);
577  size_t array_byte_size = std::abs(offset) - std::abs(last_offset);
578  return array_byte_size;
579  }
std::pair< ArrayOffsetT, ArrayOffsetT > getArrayOffsetsAtIndex(const int8_t *index_data, size_t index)
ArrayOffsetT last_offset

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::pair<ArrayOffsetT, ArrayOffsetT> ArrayNoneEncoder::getArrayOffsetsAtIndex ( const int8_t *  index_data,
size_t  index 
)
inlineprivate

Definition at line 566 of file ArrayNoneEncoder.h.

References last_offset.

Referenced by getArrayDatumAtIndex(), and getArrayDatumSizeAtIndex().

567  {
568  auto array_offsets = reinterpret_cast<const ArrayOffsetT*>(index_data);
569  auto current_index = index + 1;
570  auto offset = array_offsets[current_index];
571  int64_t last_offset = array_offsets[current_index - 1];
572  return {offset, last_offset};
573  }
int32_t ArrayOffsetT
Definition: sqltypes.h:1496
ArrayOffsetT last_offset

+ Here is the caller graph for this function:

AbstractBuffer* ArrayNoneEncoder::getIndexBuf ( ) const
inline

Definition at line 306 of file ArrayNoneEncoder.h.

References index_buf.

306 { return index_buf; }
AbstractBuffer * index_buf
void ArrayNoneEncoder::getMetadata ( const std::shared_ptr< ChunkMetadata > &  chunkMetadata)
inlineoverridevirtual

Reimplemented from Encoder.

Definition at line 243 of file ArrayNoneEncoder.h.

References elem_max, elem_min, Encoder::getMetadata(), and has_nulls.

Referenced by appendData().

243  {
244  Encoder::getMetadata(chunkMetadata); // call on parent class
245  chunkMetadata->fillChunkStats(elem_min, elem_max, has_nulls);
246  }
virtual void getMetadata(const std::shared_ptr< ChunkMetadata > &chunkMetadata)
Definition: Encoder.cpp:231

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::shared_ptr<ChunkMetadata> ArrayNoneEncoder::getMetadata ( const SQLTypeInfo ti)
inlineoverridevirtual

Implements Encoder.

Definition at line 249 of file ArrayNoneEncoder.h.

References elem_max, elem_min, and has_nulls.

249  {
250  auto chunk_metadata = std::make_shared<ChunkMetadata>(
251  ti, 0, 0, ChunkStats{elem_min, elem_max, has_nulls});
252  return chunk_metadata;
253  }
size_t ArrayNoneEncoder::getNumElemsForBytesEncodedDataAtIndices ( const int8_t *  index_data,
const std::vector< size_t > &  selected_idx,
const size_t  byte_limit 
)
inlineoverridevirtual

Compute the maximum number of variable length encoded elements given a byte limit

Parameters
index_data- (optional) index data for the encoded type
selected_idx- which indices in the encoded data to consider
byte_limit- byte limit that must be respected
Returns
the number of elements

NOTE: optional parameters above may be ignored by the implementation, but may or may not be required depending on the encoder type backing the implementation.

Implements Encoder.

Definition at line 68 of file ArrayNoneEncoder.h.

References getArrayDatumSizeAtIndex().

70  {
71  size_t num_elements = 0;
72  size_t data_size = 0;
73  for (const auto& offset_index : selected_idx) {
74  auto element_size = getArrayDatumSizeAtIndex(index_data, offset_index);
75  if (data_size + element_size > byte_limit) {
76  break;
77  }
78  data_size += element_size;
79  num_elements++;
80  }
81  return num_elements;
82  }
size_t getArrayDatumSizeAtIndex(const int8_t *index_data, size_t index)

+ Here is the call graph for this function:

size_t ArrayNoneEncoder::getNumElemsForBytesInsertData ( const std::vector< ArrayDatum > *  srcData,
const int  start_idx,
const size_t  numAppendElems,
const size_t  byteLimit,
const bool  replicating = false 
)
inline

Definition at line 50 of file ArrayNoneEncoder.h.

References anonymous_namespace{Utm.h}::n.

Referenced by Chunk_NS::Chunk::getNumElemsForBytesInsertData().

54  {
55  size_t dataSize = 0;
56 
57  size_t n = start_idx;
58  for (; n < start_idx + numAppendElems; n++) {
59  size_t len = (*srcData)[replicating ? 0 : n].length;
60  if (dataSize + len > byteLimit) {
61  break;
62  }
63  dataSize += len;
64  }
65  return n - start_idx;
66  }
constexpr double n
Definition: Utm.h:38

+ Here is the caller graph for this function:

void ArrayNoneEncoder::readMetadata ( FILE *  f)
inlineoverridevirtual

Implements Encoder.

Definition at line 288 of file ArrayNoneEncoder.h.

References elem_max, elem_min, has_nulls, initialized, and Encoder::num_elems_.

288  {
289  // assumes pointer is already in right place
290  fread((int8_t*)&num_elems_, sizeof(size_t), 1, f);
291  fread((int8_t*)&elem_min, sizeof(Datum), 1, f);
292  fread((int8_t*)&elem_max, sizeof(Datum), 1, f);
293  fread((int8_t*)&has_nulls, sizeof(bool), 1, f);
294  fread((int8_t*)&initialized, sizeof(bool), 1, f);
295  }
size_t num_elems_
Definition: Encoder.h:288
torch::Tensor f(torch::Tensor x, torch::Tensor W_target, torch::Tensor b_target)
Definition: Datum.h:71
void ArrayNoneEncoder::reduceStats ( const Encoder )
inlineoverridevirtual

Implements Encoder.

Definition at line 277 of file ArrayNoneEncoder.h.

References CHECK.

277 { CHECK(false); }
#define CHECK(condition)
Definition: Logger.h:291
bool ArrayNoneEncoder::resetChunkStats ( const ChunkStats )
inlineoverridevirtual

: Reset chunk level stats (min, max, nulls) using new values from the argument.

Returns
: True if an update occurred and the chunk needs to be flushed. False otherwise. Default false if metadata update is unsupported. Only reset chunk stats if the incoming stats differ from the current stats.

Reimplemented from Encoder.

Definition at line 308 of file ArrayNoneEncoder.h.

References Encoder::buffer_, DatumEqual(), elem_max, elem_min, SQLTypeInfo::get_elem_type(), Data_Namespace::AbstractBuffer::getSqlType(), ChunkStats::has_nulls, has_nulls, initialized, ChunkStats::max, and ChunkStats::min.

308  {
309  auto elem_type = buffer_->getSqlType().get_elem_type();
310  if (initialized && DatumEqual(elem_min, stats.min, elem_type) &&
311  DatumEqual(elem_max, stats.max, elem_type) && has_nulls == stats.has_nulls) {
312  return false;
313  }
314  elem_min = stats.min;
315  elem_max = stats.max;
316  has_nulls = stats.has_nulls;
317  return true;
318  }
dictionary stats
Definition: report.py:116
Data_Namespace::AbstractBuffer * buffer_
Definition: Encoder.h:290
bool DatumEqual(const Datum a, const Datum b, const SQLTypeInfo &ti)
Definition: Datum.cpp:408
SQLTypeInfo getSqlType() const
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:977

+ Here is the call graph for this function:

void ArrayNoneEncoder::resetChunkStats ( )
inlineoverridevirtual

Resets chunk metadata stats to their default values.

Implements Encoder.

Definition at line 320 of file ArrayNoneEncoder.h.

References has_nulls, and initialized.

320  {
321  has_nulls = false;
322  initialized = false;
323  }
void ArrayNoneEncoder::setIndexBuffer ( AbstractBuffer buf)
inline

Definition at line 329 of file ArrayNoneEncoder.h.

References EncoderMutex_, and index_buf.

Referenced by Chunk_NS::Chunk::initEncoder().

329  {
330  std::unique_lock<std::mutex> lock(EncoderMutex_);
331  index_buf = buf;
332  }
std::mutex EncoderMutex_
AbstractBuffer * index_buf

+ Here is the caller graph for this function:

void ArrayNoneEncoder::update_elem_stats ( const ArrayDatum array)
inlineprivate

Definition at line 341 of file ArrayNoneEncoder.h.

References Datum::bigintval, Datum::boolval, Encoder::buffer_, CHECK_EQ, Datum::doubleval, elem_max, elem_min, Datum::floatval, SQLTypeInfo::get_compression(), SQLTypeInfo::get_subtype(), Data_Namespace::AbstractBuffer::getSqlType(), has_nulls, initialized, Datum::intval, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kENCODING_DICT, kFLOAT, kINT, kNUMERIC, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, NULL_BIGINT, NULL_BOOLEAN, NULL_DOUBLE, NULL_FLOAT, NULL_INT, NULL_SMALLINT, NULL_TINYINT, Datum::smallintval, Datum::tinyintval, and UNREACHABLE.

Referenced by appendData(), and updateStats().

341  {
342  if (array.is_null) {
343  has_nulls = true;
344  }
345  switch (buffer_->getSqlType().get_subtype()) {
346  case kBOOLEAN: {
347  if (!initialized) {
348  elem_min.boolval = 1;
349  elem_max.boolval = 0;
350  }
351  if (array.is_null || array.length == 0) {
352  break;
353  }
354  const int8_t* bool_array = array.pointer;
355  for (size_t i = 0; i < array.length / sizeof(bool); i++) {
356  if (bool_array[i] == NULL_BOOLEAN) {
357  has_nulls = true;
358  } else if (initialized) {
359  elem_min.boolval = std::min(elem_min.boolval, bool_array[i]);
360  elem_max.boolval = std::max(elem_max.boolval, bool_array[i]);
361  } else {
362  elem_min.boolval = bool_array[i];
363  elem_max.boolval = bool_array[i];
364  initialized = true;
365  }
366  }
367  break;
368  }
369  case kINT: {
370  if (!initialized) {
371  elem_min.intval = 1;
372  elem_max.intval = 0;
373  }
374  if (array.is_null || array.length == 0) {
375  break;
376  }
377  const int32_t* int_array = (int32_t*)array.pointer;
378  for (size_t i = 0; i < array.length / sizeof(int32_t); i++) {
379  if (int_array[i] == NULL_INT) {
380  has_nulls = true;
381  } else if (initialized) {
382  elem_min.intval = std::min(elem_min.intval, int_array[i]);
383  elem_max.intval = std::max(elem_max.intval, int_array[i]);
384  } else {
385  elem_min.intval = int_array[i];
386  elem_max.intval = int_array[i];
387  initialized = true;
388  }
389  }
390  break;
391  }
392  case kSMALLINT: {
393  if (!initialized) {
394  elem_min.smallintval = 1;
395  elem_max.smallintval = 0;
396  }
397  if (array.is_null || array.length == 0) {
398  break;
399  }
400  const int16_t* int_array = (int16_t*)array.pointer;
401  for (size_t i = 0; i < array.length / sizeof(int16_t); i++) {
402  if (int_array[i] == NULL_SMALLINT) {
403  has_nulls = true;
404  } else if (initialized) {
405  elem_min.smallintval = std::min(elem_min.smallintval, int_array[i]);
406  elem_max.smallintval = std::max(elem_max.smallintval, int_array[i]);
407  } else {
408  elem_min.smallintval = int_array[i];
409  elem_max.smallintval = int_array[i];
410  initialized = true;
411  }
412  }
413  break;
414  }
415  case kTINYINT: {
416  if (!initialized) {
417  elem_min.tinyintval = 1;
418  elem_max.tinyintval = 0;
419  }
420  if (array.is_null || array.length == 0) {
421  break;
422  }
423  const int8_t* int_array = (int8_t*)array.pointer;
424  for (size_t i = 0; i < array.length / sizeof(int8_t); i++) {
425  if (int_array[i] == NULL_TINYINT) {
426  has_nulls = true;
427  } else if (initialized) {
428  elem_min.tinyintval = std::min(elem_min.tinyintval, int_array[i]);
429  elem_max.tinyintval = std::max(elem_max.tinyintval, int_array[i]);
430  } else {
431  elem_min.tinyintval = int_array[i];
432  elem_max.tinyintval = int_array[i];
433  initialized = true;
434  }
435  }
436  break;
437  }
438  case kBIGINT:
439  case kNUMERIC:
440  case kDECIMAL: {
441  if (!initialized) {
442  elem_min.bigintval = 1;
443  elem_max.bigintval = 0;
444  }
445  if (array.is_null || array.length == 0) {
446  break;
447  }
448  const int64_t* int_array = (int64_t*)array.pointer;
449  for (size_t i = 0; i < array.length / sizeof(int64_t); i++) {
450  if (int_array[i] == NULL_BIGINT) {
451  has_nulls = true;
452  } else if (initialized) {
453  elem_min.bigintval = std::min(elem_min.bigintval, int_array[i]);
454  elem_max.bigintval = std::max(elem_max.bigintval, int_array[i]);
455  } else {
456  elem_min.bigintval = int_array[i];
457  elem_max.bigintval = int_array[i];
458  initialized = true;
459  }
460  }
461  break;
462  }
463  case kFLOAT: {
464  if (!initialized) {
465  elem_min.floatval = 1.0;
466  elem_max.floatval = 0.0;
467  }
468  if (array.is_null || array.length == 0) {
469  break;
470  }
471  const float* flt_array = (float*)array.pointer;
472  for (size_t i = 0; i < array.length / sizeof(float); i++) {
473  if (flt_array[i] == NULL_FLOAT) {
474  has_nulls = true;
475  } else if (initialized) {
476  elem_min.floatval = std::min(elem_min.floatval, flt_array[i]);
477  elem_max.floatval = std::max(elem_max.floatval, flt_array[i]);
478  } else {
479  elem_min.floatval = flt_array[i];
480  elem_max.floatval = flt_array[i];
481  initialized = true;
482  }
483  }
484  break;
485  }
486  case kDOUBLE: {
487  if (!initialized) {
488  elem_min.doubleval = 1.0;
489  elem_max.doubleval = 0.0;
490  }
491  if (array.is_null || array.length == 0) {
492  break;
493  }
494  const double* dbl_array = (double*)array.pointer;
495  for (size_t i = 0; i < array.length / sizeof(double); i++) {
496  if (dbl_array[i] == NULL_DOUBLE) {
497  has_nulls = true;
498  } else if (initialized) {
499  elem_min.doubleval = std::min(elem_min.doubleval, dbl_array[i]);
500  elem_max.doubleval = std::max(elem_max.doubleval, dbl_array[i]);
501  } else {
502  elem_min.doubleval = dbl_array[i];
503  elem_max.doubleval = dbl_array[i];
504  initialized = true;
505  }
506  }
507  break;
508  }
509  case kTIME:
510  case kTIMESTAMP:
511  case kDATE: {
512  if (!initialized) {
513  elem_min.bigintval = 1;
514  elem_max.bigintval = 0;
515  }
516  if (array.is_null || array.length == 0) {
517  break;
518  }
519  const auto tm_array = reinterpret_cast<int64_t*>(array.pointer);
520  for (size_t i = 0; i < array.length / sizeof(int64_t); i++) {
521  if (tm_array[i] == NULL_BIGINT) {
522  has_nulls = true;
523  } else if (initialized) {
524  elem_min.bigintval = std::min(elem_min.bigintval, tm_array[i]);
525  elem_max.bigintval = std::max(elem_max.bigintval, tm_array[i]);
526  } else {
527  elem_min.bigintval = tm_array[i];
528  elem_max.bigintval = tm_array[i];
529  initialized = true;
530  }
531  }
532  break;
533  }
534  case kCHAR:
535  case kVARCHAR:
536  case kTEXT: {
538  if (!initialized) {
539  elem_min.intval = 1;
540  elem_max.intval = 0;
541  }
542  if (array.is_null || array.length == 0) {
543  break;
544  }
545  const int32_t* int_array = (int32_t*)array.pointer;
546  for (size_t i = 0; i < array.length / sizeof(int32_t); i++) {
547  if (int_array[i] == NULL_INT) {
548  has_nulls = true;
549  } else if (initialized) {
550  elem_min.intval = std::min(elem_min.intval, int_array[i]);
551  elem_max.intval = std::max(elem_max.intval, int_array[i]);
552  } else {
553  elem_min.intval = int_array[i];
554  elem_max.intval = int_array[i];
555  initialized = true;
556  }
557  }
558  break;
559  }
560  default:
561  UNREACHABLE();
562  }
563  };
int8_t tinyintval
Definition: Datum.h:73
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:392
#define CHECK_EQ(x, y)
Definition: Logger.h:301
#define NULL_DOUBLE
Definition: sqltypes.h:76
#define NULL_FLOAT
#define NULL_BIGINT
int8_t boolval
Definition: Datum.h:72
#define UNREACHABLE()
Definition: Logger.h:338
int32_t intval
Definition: Datum.h:75
#define NULL_INT
float floatval
Definition: Datum.h:77
Data_Namespace::AbstractBuffer * buffer_
Definition: Encoder.h:290
int64_t bigintval
Definition: Datum.h:76
int16_t smallintval
Definition: Datum.h:74
#define NULL_BOOLEAN
Definition: sqltypes.h:79
Definition: sqltypes.h:80
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:399
Definition: sqltypes.h:68
SQLTypeInfo getSqlType() const
#define NULL_TINYINT
#define NULL_SMALLINT
Definition: sqltypes.h:72
double doubleval
Definition: Datum.h:78

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void ArrayNoneEncoder::updateStats ( const int64_t  ,
const bool   
)
inlineoverridevirtual

Implements Encoder.

Definition at line 255 of file ArrayNoneEncoder.h.

References CHECK.

255 { CHECK(false); }
#define CHECK(condition)
Definition: Logger.h:291
void ArrayNoneEncoder::updateStats ( const double  ,
const bool   
)
inlineoverridevirtual

Implements Encoder.

Definition at line 257 of file ArrayNoneEncoder.h.

References CHECK.

257 { CHECK(false); }
#define CHECK(condition)
Definition: Logger.h:291
void ArrayNoneEncoder::updateStats ( const int8_t *const  src_data,
const size_t  num_elements 
)
inlineoverridevirtual

Update statistics for data without appending.

Parameters
src_data- the data with which to update statistics
num_elements- the number of elements to scan in the data

Implements Encoder.

Definition at line 259 of file ArrayNoneEncoder.h.

References CHECK.

259  {
260  CHECK(false);
261  }
#define CHECK(condition)
Definition: Logger.h:291
void ArrayNoneEncoder::updateStats ( const std::vector< std::string > *const  src_data,
const size_t  start_idx,
const size_t  num_elements 
)
inlineoverridevirtual

Update statistics for string data without appending.

Parameters
src_data- the string data with which to update statistics
start_idx- the offset into src_data to start the update
num_elements- the number of elements to scan in the string data

Implements Encoder.

Definition at line 263 of file ArrayNoneEncoder.h.

References UNREACHABLE.

265  {
266  UNREACHABLE();
267  }
#define UNREACHABLE()
Definition: Logger.h:338
void ArrayNoneEncoder::updateStats ( const std::vector< ArrayDatum > *const  src_data,
const size_t  start_idx,
const size_t  num_elements 
)
inlineoverridevirtual

Update statistics for array data without appending.

Parameters
src_data- the array data with which to update statistics
start_idx- the offset into src_data to start the update
num_elements- the number of elements to scan in the array data

Implements Encoder.

Definition at line 269 of file ArrayNoneEncoder.h.

References anonymous_namespace{Utm.h}::n, and update_elem_stats().

271  {
272  for (size_t n = start_idx; n < start_idx + num_elements; n++) {
273  update_elem_stats((*src_data)[n]);
274  }
275  }
void update_elem_stats(const ArrayDatum &array)
constexpr double n
Definition: Utm.h:38

+ Here is the call graph for this function:

void ArrayNoneEncoder::writeMetadata ( FILE *  f)
inlineoverridevirtual

Implements Encoder.

Definition at line 279 of file ArrayNoneEncoder.h.

References elem_max, elem_min, has_nulls, initialized, and Encoder::num_elems_.

279  {
280  // assumes pointer is already in right place
281  fwrite((int8_t*)&num_elems_, sizeof(size_t), 1, f);
282  fwrite((int8_t*)&elem_min, sizeof(Datum), 1, f);
283  fwrite((int8_t*)&elem_max, sizeof(Datum), 1, f);
284  fwrite((int8_t*)&has_nulls, sizeof(bool), 1, f);
285  fwrite((int8_t*)&initialized, sizeof(bool), 1, f);
286  }
size_t num_elems_
Definition: Encoder.h:288
torch::Tensor f(torch::Tensor x, torch::Tensor W_target, torch::Tensor b_target)
Definition: Datum.h:71

Member Data Documentation

Datum ArrayNoneEncoder::elem_max
Datum ArrayNoneEncoder::elem_min
std::mutex ArrayNoneEncoder::EncoderMutex_
private

Definition at line 337 of file ArrayNoneEncoder.h.

Referenced by setIndexBuffer().

AbstractBuffer* ArrayNoneEncoder::index_buf
private

Definition at line 338 of file ArrayNoneEncoder.h.

Referenced by appendData(), getIndexBuf(), and setIndexBuffer().

bool ArrayNoneEncoder::initialized
ArrayOffsetT ArrayNoneEncoder::last_offset
private

The documentation for this class was generated from the following file: