34 const std::vector<std::string>* srcData,
36 const size_t numAppendElems,
37 const size_t byteLimit,
38 const bool replicating) {
41 for (; n < start_idx + numAppendElems; n++) {
42 size_t len = (*srcData)[replicating ? 0 :
n].length();
43 if (dataSize + len > byteLimit) {
52 const int8_t* index_data,
53 const std::vector<size_t>& selected_idx,
54 const size_t byte_limit) {
55 size_t num_elements = 0;
57 for (
const auto& offset_index : selected_idx) {
59 if (data_size + element_size > byte_limit) {
62 data_size += element_size;
69 const int8_t* index_data,
71 const std::vector<size_t>& selected_idx) {
72 std::vector<std::string_view> data_subset;
73 data_subset.reserve(selected_idx.size());
74 for (
const auto& offset_index : selected_idx) {
77 return appendData(&data_subset, 0, selected_idx.size(),
false);
81 const int8_t* index_data,
83 const size_t start_idx,
84 const size_t num_elements) {
85 std::vector<std::string_view> data_subset;
86 data_subset.reserve(num_elements);
87 for (
size_t count = 0; count < num_elements; ++count) {
88 auto current_index = start_idx + count;
91 return appendData(&data_subset, 0, num_elements,
false);
94 template <
typename StringType>
96 const std::vector<StringType>* srcData,
98 const size_t numAppendElems,
99 const bool replicating) {
121 size_t data_size = 0;
122 for (
size_t n = start_idx;
n < start_idx + numAppendElems;
n++) {
123 size_t len = (*srcData)[replicating ? 0 :
n].length();
130 auto inbuf = std::make_unique<int8_t[]>(inbuf_size);
131 for (
size_t num_appended = 0; num_appended < numAppendElems;) {
134 for (i = 0; num_appended < numAppendElems && i < inbuf_size /
sizeof(
StringOffsetT);
135 i++, num_appended++) {
137 last_offset + (*srcData)[replicating ? 0 : num_appended + start_idx].length();
143 for (
size_t num_appended = 0; num_appended < numAppendElems;) {
145 for (
int i = start_idx + num_appended;
146 num_appended < numAppendElems && size < inbuf_size;
147 i++, num_appended++) {
148 size_t len = (*srcData)[replicating ? 0 : i].length();
149 if (len > inbuf_size) {
155 buffer_->
append((int8_t*)(*srcData)[replicating ? 0 : i].data(), len);
158 }
else if (size + len > inbuf_size) {
161 char*
dest =
reinterpret_cast<char*
>(inbuf.get()) + size;
163 (*srcData)[replicating ? 0 : i].copy(dest, len);
179 auto chunk_metadata = std::make_shared<ChunkMetadata>();
181 return chunk_metadata;
185 const size_t start_idx,
186 const size_t num_elements) {
187 for (
size_t n = start_idx;
n < start_idx + num_elements;
n++) {
195 template <
typename StringType>
203 const int8_t* index_data,
205 auto string_offsets =
reinterpret_cast<const StringOffsetT*
>(index_data);
206 auto current_index = index + 1;
207 auto offset = string_offsets[current_index];
209 int64_t
last_offset = string_offsets[current_index - 1];
210 CHECK(last_offset >= 0 && last_offset <= offset);
211 return {offset, last_offset};
217 return string_byte_size;
225 auto current_data =
reinterpret_cast<const char*
>(data +
last_offset);
226 return std::string_view{current_data, string_byte_size};
229 template std::shared_ptr<ChunkMetadata> StringNoneEncoder::appendData<std::string>(
230 const std::vector<std::string>* srcData,
232 const size_t numAppendElems,
233 const bool replicating);
235 template std::shared_ptr<ChunkMetadata> StringNoneEncoder::appendData<std::string_view>(
236 const std::vector<std::string_view>* srcData,
238 const size_t numAppendElems,
239 const bool replicating);
241 template void StringNoneEncoder::update_elem_stats<std::string>(
const std::string& elem);
242 template void StringNoneEncoder::update_elem_stats<std::string_view>(
243 const std::string_view& elem);
std::string_view getStringAtIndex(const int8_t *index_data, const int8_t *data, size_t index)
void updateStats(const int64_t, const bool) override
#define MAX_INPUT_BUF_SIZE
std::shared_ptr< ChunkMetadata > appendData(int8_t *&src_data, const size_t num_elems_to_append, const SQLTypeInfo &ti, const bool replicating=false, const int64_t offset=-1) override
std::shared_ptr< ChunkMetadata > appendEncodedDataAtIndices(const int8_t *index_data, int8_t *data, const std::vector< size_t > &selected_idx) override
size_t getNumElemsForBytesInsertData(const std::vector< std::string > *srcData, const int start_idx, const size_t numAppendElems, const size_t byteLimit, const bool replicating=false)
virtual void read(int8_t *const dst, const size_t num_bytes, const size_t offset=0, const MemoryLevel dst_buffer_type=CPU_LEVEL, const int dst_device_id=-1)=0
AbstractBuffer * index_buf
Data_Namespace::AbstractBuffer * buffer_
size_t getNumElemsForBytesEncodedDataAtIndices(const int8_t *index_data, const std::vector< size_t > &selected_idx, const size_t byte_limit) override
An AbstractBuffer is a unit of data management for a data manager.
size_t getStringSizeAtIndex(const int8_t *index_data, size_t index)
void update_elem_stats(const StringType &elem)
StringOffsetT last_offset
void getMetadata(const std::shared_ptr< ChunkMetadata > &chunkMetadata) override
std::shared_ptr< ChunkMetadata > appendEncodedData(const int8_t *index_data, int8_t *data, const size_t start_idx, const size_t num_elements) override
virtual void append(int8_t *src, const size_t num_bytes, const MemoryLevel src_buffer_type=CPU_LEVEL, const int device_id=-1)=0
virtual void reserve(size_t num_bytes)=0
std::pair< StringOffsetT, StringOffsetT > getStringOffsets(const int8_t *index_data, size_t index)