OmniSciDB  2c44a3935d
StringNoneEncoder Class Reference

#include <StringNoneEncoder.h>

+ Inheritance diagram for StringNoneEncoder:
+ Collaboration diagram for StringNoneEncoder:

Public Member Functions

 StringNoneEncoder (AbstractBuffer *buffer)
 
size_t getNumElemsForBytesInsertData (const std::vector< std::string > *srcData, const int start_idx, const size_t numAppendElems, const size_t byteLimit, const bool replicating=false)
 
std::shared_ptr< ChunkMetadataappendData (int8_t *&src_data, const size_t num_elems_to_append, const SQLTypeInfo &ti, const bool replicating=false, const int64_t offset=-1) override
 
std::shared_ptr< ChunkMetadataappendData (const std::vector< std::string > *srcData, const int start_idx, const size_t numAppendElems, const bool replicating=false)
 
void getMetadata (const std::shared_ptr< ChunkMetadata > &chunkMetadata) override
 
std::shared_ptr< ChunkMetadatagetMetadata (const SQLTypeInfo &ti) override
 
void updateStats (const int64_t, const bool) override
 
void updateStats (const double, const bool) override
 
void updateStats (const int8_t *const src_data, const size_t num_elements) override
 
void updateStats (const std::vector< std::string > *const src_data, const size_t start_idx, const size_t num_elements) override
 
void updateStats (const std::vector< ArrayDatum > *const src_data, const size_t start_idx, const size_t num_elements) override
 
void reduceStats (const Encoder &) override
 
void writeMetadata (FILE *f) override
 
void readMetadata (FILE *f) override
 
void copyMetadata (const Encoder *copyFromEncoder) override
 
AbstractBuffergetIndexBuf () const
 
void setIndexBuffer (AbstractBuffer *buf)
 
- Public Member Functions inherited from Encoder
 Encoder (Data_Namespace::AbstractBuffer *buffer)
 
virtual ~Encoder ()
 
virtual bool resetChunkStats (const ChunkStats &)
 : Reset chunk level stats (min, max, nulls) using new values from the argument. More...
 
size_t getNumElems () const
 
void setNumElems (const size_t num_elems)
 

Private Member Functions

void update_elem_stats (const std::string &elem)
 

Private Attributes

AbstractBufferindex_buf
 
StringOffsetT last_offset
 
bool has_nulls
 

Additional Inherited Members

- Static Public Member Functions inherited from Encoder
static EncoderCreate (Data_Namespace::AbstractBuffer *buffer, const SQLTypeInfo sqlType)
 
- Protected Attributes inherited from Encoder
size_t num_elems_
 
Data_Namespace::AbstractBufferbuffer_
 
DecimalOverflowValidator decimal_overflow_validator_
 
DateDaysOverflowValidator date_days_overflow_validator_
 

Detailed Description

Definition at line 37 of file StringNoneEncoder.h.

Constructor & Destructor Documentation

◆ StringNoneEncoder()

StringNoneEncoder::StringNoneEncoder ( AbstractBuffer buffer)
inline

Definition at line 39 of file StringNoneEncoder.h.

References getNumElemsForBytesInsertData().

40  : Encoder(buffer), index_buf(nullptr), last_offset(-1), has_nulls(false) {}
AbstractBuffer * index_buf
Encoder(Data_Namespace::AbstractBuffer *buffer)
Definition: Encoder.cpp:221
StringOffsetT last_offset
+ Here is the call graph for this function:

Member Function Documentation

◆ appendData() [1/2]

std::shared_ptr<ChunkMetadata> StringNoneEncoder::appendData ( int8_t *&  src_data,
const size_t  num_elems_to_append,
const SQLTypeInfo ti,
const bool  replicating = false,
const int64_t  offset = -1 
)
inlineoverridevirtual

Append data to the chunk buffer backing this encoder.

Parameters
src_dataSource data for the append
num_elems_to_appendNumber of elements to append
tiSQL Type Info for the column TODO(adb): used?
replicatingPass one value and fill the chunk with it
offsetWrite data starting at a given offset. Default is -1 which indicates an append, an offset of 0 rewrites the chunk up to num_elems_to_append.

Implements Encoder.

Definition at line 48 of file StringNoneEncoder.h.

References UNREACHABLE.

Referenced by Chunk_NS::Chunk::appendData().

52  {
53  UNREACHABLE(); // should never be called for strings
54  return nullptr;
55  }
#define UNREACHABLE()
Definition: Logger.h:241
+ Here is the caller graph for this function:

◆ appendData() [2/2]

std::shared_ptr< ChunkMetadata > StringNoneEncoder::appendData ( const std::vector< std::string > *  srcData,
const int  start_idx,
const size_t  numAppendElems,
const bool  replicating = false 
)

Definition at line 51 of file StringNoneEncoder.cpp.

References Data_Namespace::AbstractBuffer::append(), Encoder::buffer_, CHECK, CHECK_GE, Data_Namespace::CPU_LEVEL, run_benchmark_import::dest, getMetadata(), index_buf, Data_Namespace::AbstractBuffer::isDirty(), last_offset, MAX_INPUT_BUF_SIZE, Encoder::num_elems_, Data_Namespace::AbstractBuffer::read(), Data_Namespace::AbstractBuffer::reserve(), Data_Namespace::AbstractBuffer::setDirty(), Data_Namespace::AbstractBuffer::size(), and update_elem_stats().

55  {
56  CHECK(index_buf); // index_buf must be set before this.
57  size_t index_size = numAppendElems * sizeof(StringOffsetT);
58  if (num_elems_ == 0) {
59  index_size += sizeof(StringOffsetT); // plus one for the initial offset of 0.
60  }
61  index_buf->reserve(index_size);
62  StringOffsetT offset = 0;
63  if (num_elems_ == 0) {
64  index_buf->append((int8_t*)&offset,
65  sizeof(StringOffsetT)); // write the inital 0 offset
66  last_offset = 0;
67  } else {
68  // always need to read a valid last offset from buffer/disk
69  // b/c now due to vacuum "last offset" may go backward and if
70  // index chunk was not reloaded last_offset would go way off!
71  index_buf->read((int8_t*)&last_offset,
72  sizeof(StringOffsetT),
73  index_buf->size() - sizeof(StringOffsetT),
75  CHECK_GE(last_offset, 0);
76  }
77  size_t data_size = 0;
78  for (size_t n = start_idx; n < start_idx + numAppendElems; n++) {
79  size_t len = (*srcData)[replicating ? 0 : n].length();
80  data_size += len;
81  }
82  buffer_->reserve(data_size);
83 
84  size_t inbuf_size =
85  std::min(std::max(index_size, data_size), (size_t)MAX_INPUT_BUF_SIZE);
86  auto inbuf = std::make_unique<int8_t[]>(inbuf_size);
87  for (size_t num_appended = 0; num_appended < numAppendElems;) {
88  StringOffsetT* p = reinterpret_cast<StringOffsetT*>(inbuf.get());
89  size_t i;
90  for (i = 0; num_appended < numAppendElems && i < inbuf_size / sizeof(StringOffsetT);
91  i++, num_appended++) {
92  p[i] =
93  last_offset + (*srcData)[replicating ? 0 : num_appended + start_idx].length();
94  last_offset = p[i];
95  }
96  index_buf->append(inbuf.get(), i * sizeof(StringOffsetT));
97  }
98 
99  for (size_t num_appended = 0; num_appended < numAppendElems;) {
100  size_t size = 0;
101  for (int i = start_idx + num_appended;
102  num_appended < numAppendElems && size < inbuf_size;
103  i++, num_appended++) {
104  size_t len = (*srcData)[replicating ? 0 : i].length();
105  if (len > inbuf_size) {
106  // for large strings, append on its own
107  if (size > 0) {
108  buffer_->append(inbuf.get(), size);
109  }
110  size = 0;
111  buffer_->append((int8_t*)(*srcData)[replicating ? 0 : i].data(), len);
112  num_appended++;
113  break;
114  } else if (size + len > inbuf_size) {
115  break;
116  }
117  char* dest = reinterpret_cast<char*>(inbuf.get()) + size;
118  if (len > 0) {
119  (*srcData)[replicating ? 0 : i].copy(dest, len);
120  size += len;
121  }
122  update_elem_stats((*srcData)[replicating ? 0 : i]);
123  }
124  if (size > 0) {
125  buffer_->append(inbuf.get(), size);
126  }
127  }
128  // make sure buffer_ is flushed even if no new data is appended to it
129  // (e.g. empty strings) because the metadata needs to be flushed.
130  if (!buffer_->isDirty()) {
131  buffer_->setDirty();
132  }
133 
134  num_elems_ += numAppendElems;
135  auto chunk_metadata = std::make_shared<ChunkMetadata>();
136  getMetadata(chunk_metadata);
137  return chunk_metadata;
138 }
size_t num_elems_
Definition: Encoder.h:213
#define MAX_INPUT_BUF_SIZE
Definition: Encoder.h:36
virtual size_t size() const =0
#define CHECK_GE(x, y)
Definition: Logger.h:210
int32_t StringOffsetT
Definition: sqltypes.h:850
virtual void read(int8_t *const dst, const size_t num_bytes, const size_t offset=0, const MemoryLevel dst_buffer_type=CPU_LEVEL, const int dst_device_id=-1)=0
AbstractBuffer * index_buf
Data_Namespace::AbstractBuffer * buffer_
Definition: Encoder.h:215
void update_elem_stats(const std::string &elem)
StringOffsetT last_offset
void getMetadata(const std::shared_ptr< ChunkMetadata > &chunkMetadata) override
virtual void append(int8_t *src, const size_t num_bytes, const MemoryLevel src_buffer_type=CPU_LEVEL, const int device_id=-1)=0
#define CHECK(condition)
Definition: Logger.h:197
virtual bool isDirty() const
virtual void reserve(size_t num_bytes)=0
+ Here is the call graph for this function:

◆ copyMetadata()

void StringNoneEncoder::copyMetadata ( const Encoder copyFromEncoder)
inlineoverridevirtual

Implements Encoder.

Definition at line 110 of file StringNoneEncoder.h.

References Encoder::getNumElems(), has_nulls, and Encoder::num_elems_.

110  {
111  num_elems_ = copyFromEncoder->getNumElems();
112  has_nulls = static_cast<const StringNoneEncoder*>(copyFromEncoder)->has_nulls;
113  }
size_t num_elems_
Definition: Encoder.h:213
size_t getNumElems() const
Definition: Encoder.h:209
+ Here is the call graph for this function:

◆ getIndexBuf()

AbstractBuffer* StringNoneEncoder::getIndexBuf ( ) const
inline

Definition at line 115 of file StringNoneEncoder.h.

References index_buf.

115 { return index_buf; }
AbstractBuffer * index_buf

◆ getMetadata() [1/2]

void StringNoneEncoder::getMetadata ( const std::shared_ptr< ChunkMetadata > &  chunkMetadata)
inlineoverridevirtual

Reimplemented from Encoder.

Definition at line 62 of file StringNoneEncoder.h.

References Encoder::getMetadata(), and has_nulls.

Referenced by appendData().

62  {
63  Encoder::getMetadata(chunkMetadata); // call on parent class
64  chunkMetadata->chunkStats.min.stringval = nullptr;
65  chunkMetadata->chunkStats.max.stringval = nullptr;
66  chunkMetadata->chunkStats.has_nulls = has_nulls;
67  }
virtual void getMetadata(const std::shared_ptr< ChunkMetadata > &chunkMetadata)
Definition: Encoder.cpp:227
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ getMetadata() [2/2]

std::shared_ptr<ChunkMetadata> StringNoneEncoder::getMetadata ( const SQLTypeInfo ti)
inlineoverridevirtual

Implements Encoder.

Definition at line 70 of file StringNoneEncoder.h.

References has_nulls, ChunkStats::min, and Datum::stringval.

70  {
71  auto chunk_stats = ChunkStats{};
72  chunk_stats.min.stringval = nullptr;
73  chunk_stats.max.stringval = nullptr;
74  chunk_stats.has_nulls = has_nulls;
75  return std::make_shared<ChunkMetadata>(ti, 0, 0, chunk_stats);
76  }
std::string * stringval
Definition: sqltypes.h:141

◆ getNumElemsForBytesInsertData()

size_t StringNoneEncoder::getNumElemsForBytesInsertData ( const std::vector< std::string > *  srcData,
const int  start_idx,
const size_t  numAppendElems,
const size_t  byteLimit,
const bool  replicating = false 
)

Definition at line 33 of file StringNoneEncoder.cpp.

Referenced by Chunk_NS::Chunk::getNumElemsForBytesInsertData(), and StringNoneEncoder().

38  {
39  size_t dataSize = 0;
40  size_t n = start_idx;
41  for (; n < start_idx + numAppendElems; n++) {
42  size_t len = (*srcData)[replicating ? 0 : n].length();
43  if (dataSize + len > byteLimit) {
44  break;
45  }
46  dataSize += len;
47  }
48  return n - start_idx;
49 }
+ Here is the caller graph for this function:

◆ readMetadata()

void StringNoneEncoder::readMetadata ( FILE *  f)
inlineoverridevirtual

Implements Encoder.

Definition at line 104 of file StringNoneEncoder.h.

References CHECK_NE, has_nulls, and Encoder::num_elems_.

104  {
105  // assumes pointer is already in right place
106  CHECK_NE(fread((int8_t*)&num_elems_, sizeof(size_t), size_t(1), f), size_t(0));
107  CHECK_NE(fread((int8_t*)&has_nulls, sizeof(bool), size_t(1), f), size_t(0));
108  }
size_t num_elems_
Definition: Encoder.h:213
#define CHECK_NE(x, y)
Definition: Logger.h:206

◆ reduceStats()

void StringNoneEncoder::reduceStats ( const Encoder )
inlineoverridevirtual

Implements Encoder.

Definition at line 96 of file StringNoneEncoder.h.

References CHECK.

96 { CHECK(false); }
#define CHECK(condition)
Definition: Logger.h:197

◆ setIndexBuffer()

void StringNoneEncoder::setIndexBuffer ( AbstractBuffer buf)
inline

Definition at line 116 of file StringNoneEncoder.h.

References index_buf.

Referenced by Chunk_NS::Chunk::initEncoder().

116 { index_buf = buf; }
AbstractBuffer * index_buf
+ Here is the caller graph for this function:

◆ update_elem_stats()

void StringNoneEncoder::update_elem_stats ( const std::string &  elem)
private

Definition at line 151 of file StringNoneEncoder.cpp.

References has_nulls.

Referenced by appendData(), and updateStats().

151  {
152  if (!has_nulls && elem.empty()) {
153  has_nulls = true;
154  }
155 }
+ Here is the caller graph for this function:

◆ updateStats() [1/5]

void StringNoneEncoder::updateStats ( const int64_t  ,
const bool   
)
inlineoverridevirtual

Implements Encoder.

Definition at line 78 of file StringNoneEncoder.h.

References CHECK.

Referenced by updateStats().

78 { CHECK(false); }
#define CHECK(condition)
Definition: Logger.h:197
+ Here is the caller graph for this function:

◆ updateStats() [2/5]

void StringNoneEncoder::updateStats ( const double  ,
const bool   
)
inlineoverridevirtual

Implements Encoder.

Definition at line 80 of file StringNoneEncoder.h.

References CHECK.

80 { CHECK(false); }
#define CHECK(condition)
Definition: Logger.h:197

◆ updateStats() [3/5]

void StringNoneEncoder::updateStats ( const int8_t *const  src_data,
const size_t  num_elements 
)
inlineoverridevirtual

Implements Encoder.

Definition at line 82 of file StringNoneEncoder.h.

References UNREACHABLE, and updateStats().

82  {
83  UNREACHABLE();
84  }
#define UNREACHABLE()
Definition: Logger.h:241
+ Here is the call graph for this function:

◆ updateStats() [4/5]

void StringNoneEncoder::updateStats ( const std::vector< std::string > *const  src_data,
const size_t  start_idx,
const size_t  num_elements 
)
overridevirtual

Update statistics for string data without appending data.

Parameters
dataBlock- the data block with which to update statistics
startIdx- the start_idx that would normally be passed to appendData
numElements- the number of elements in the data block

Implements Encoder.

Definition at line 140 of file StringNoneEncoder.cpp.

References has_nulls, and update_elem_stats().

142  {
143  for (size_t n = start_idx; n < start_idx + num_elements; n++) {
144  update_elem_stats((*src_data)[n]);
145  if (has_nulls) {
146  break;
147  }
148  }
149 }
void update_elem_stats(const std::string &elem)
+ Here is the call graph for this function:

◆ updateStats() [5/5]

void StringNoneEncoder::updateStats ( const std::vector< ArrayDatum > *const  src_data,
const size_t  start_idx,
const size_t  num_elements 
)
inlineoverridevirtual

Update statistics for array data without appending data.

Parameters
dataBlock- the data block with which to update statistics
startIdx- the start_idx that would normally be passed to appendData
numElements- the number of elements in the data block

Implements Encoder.

Definition at line 90 of file StringNoneEncoder.h.

References UNREACHABLE.

92  {
93  UNREACHABLE();
94  }
#define UNREACHABLE()
Definition: Logger.h:241

◆ writeMetadata()

void StringNoneEncoder::writeMetadata ( FILE *  f)
inlineoverridevirtual

Implements Encoder.

Definition at line 98 of file StringNoneEncoder.h.

References has_nulls, and Encoder::num_elems_.

98  {
99  // assumes pointer is already in right place
100  fwrite((int8_t*)&num_elems_, sizeof(size_t), 1, f);
101  fwrite((int8_t*)&has_nulls, sizeof(bool), 1, f);
102  }
size_t num_elems_
Definition: Encoder.h:213

Member Data Documentation

◆ has_nulls

bool StringNoneEncoder::has_nulls
private

◆ index_buf

AbstractBuffer* StringNoneEncoder::index_buf
private

Definition at line 119 of file StringNoneEncoder.h.

Referenced by appendData(), getIndexBuf(), and setIndexBuffer().

◆ last_offset

StringOffsetT StringNoneEncoder::last_offset
private

Definition at line 120 of file StringNoneEncoder.h.

Referenced by appendData().


The documentation for this class was generated from the following files: