32 const std::vector<std::string>* srcData,
34 const size_t numAppendElems,
35 const size_t byteLimit,
36 const bool replicating) {
39 for (; n < start_idx + numAppendElems; n++) {
40 size_t len = (*srcData)[replicating ? 0 :
n].length();
41 if (dataSize + len > byteLimit) {
50 const int8_t* index_data,
51 const std::vector<size_t>& selected_idx,
52 const size_t byte_limit) {
53 size_t num_elements = 0;
55 for (
const auto& offset_index : selected_idx) {
57 if (data_size + element_size > byte_limit) {
60 data_size += element_size;
67 const int8_t* index_data,
69 const std::vector<size_t>& selected_idx) {
70 std::vector<std::string_view> data_subset;
71 data_subset.reserve(selected_idx.size());
72 for (
const auto& offset_index : selected_idx) {
75 return appendData(&data_subset, 0, selected_idx.size(),
false);
79 const int8_t* index_data,
81 const size_t start_idx,
82 const size_t num_elements) {
83 std::vector<std::string_view> data_subset;
84 data_subset.reserve(num_elements);
85 for (
size_t count = 0; count < num_elements; ++count) {
86 auto current_index = start_idx + count;
89 return appendData(&data_subset, 0, num_elements,
false);
92 template <
typename StringType>
94 const std::vector<StringType>* srcData,
96 const size_t numAppendElems,
97 const bool replicating) {
99 size_t append_index_size = numAppendElems *
sizeof(
StringOffsetT);
119 size_t append_data_size = 0;
120 for (
size_t n = start_idx;
n < start_idx + numAppendElems;
n++) {
121 size_t len = (*srcData)[replicating ? 0 :
n].length();
122 append_data_size += len;
128 auto inbuf = std::make_unique<int8_t[]>(inbuf_size);
129 for (
size_t num_appended = 0; num_appended < numAppendElems;) {
132 for (i = 0; num_appended < numAppendElems && i < inbuf_size /
sizeof(
StringOffsetT);
133 i++, num_appended++) {
135 last_offset + (*srcData)[replicating ? 0 : num_appended + start_idx].length();
141 for (
size_t num_appended = 0; num_appended < numAppendElems;) {
143 for (
int i = start_idx + num_appended;
144 num_appended < numAppendElems && size < inbuf_size;
145 i++, num_appended++) {
146 size_t len = (*srcData)[replicating ? 0 : i].length();
147 if (len > inbuf_size) {
153 buffer_->
append((int8_t*)(*srcData)[replicating ? 0 : i].data(), len);
156 }
else if (size + len > inbuf_size) {
159 char*
dest =
reinterpret_cast<char*
>(inbuf.get()) + size;
161 (*srcData)[replicating ? 0 : i].copy(dest, len);
177 auto chunk_metadata = std::make_shared<ChunkMetadata>();
179 return chunk_metadata;
183 const size_t start_idx,
184 const size_t num_elements) {
185 for (
size_t n = start_idx;
n < start_idx + num_elements;
n++) {
193 template <
typename StringType>
201 const int8_t* index_data,
203 auto string_offsets =
reinterpret_cast<const StringOffsetT*
>(index_data);
204 auto current_index = index + 1;
205 auto offset = string_offsets[current_index];
207 int64_t
last_offset = string_offsets[current_index - 1];
208 CHECK(last_offset >= 0 && last_offset <= offset);
209 return {offset, last_offset};
215 return string_byte_size;
223 auto current_data =
reinterpret_cast<const char*
>(data +
last_offset);
224 return std::string_view{current_data, string_byte_size};
227 template std::shared_ptr<ChunkMetadata> StringNoneEncoder::appendData<std::string>(
228 const std::vector<std::string>* srcData,
230 const size_t numAppendElems,
231 const bool replicating);
233 template std::shared_ptr<ChunkMetadata> StringNoneEncoder::appendData<std::string_view>(
234 const std::vector<std::string_view>* srcData,
236 const size_t numAppendElems,
237 const bool replicating);
239 template void StringNoneEncoder::update_elem_stats<std::string>(
const std::string& elem);
240 template void StringNoneEncoder::update_elem_stats<std::string_view>(
241 const std::string_view& elem);
245 chunkMetadata->chunkStats.min.stringval =
nullptr;
246 chunkMetadata->chunkStats.max.stringval =
nullptr;
247 chunkMetadata->chunkStats.has_nulls =
has_nulls;
254 chunk_stats.max.stringval =
nullptr;
256 return std::make_shared<ChunkMetadata>(ti, 0, 0, chunk_stats);
std::string_view getStringAtIndex(const int8_t *index_data, const int8_t *data, size_t index)
void updateStats(const int64_t, const bool) override
#define MAX_INPUT_BUF_SIZE
std::shared_ptr< ChunkMetadata > appendData(int8_t *&src_data, const size_t num_elems_to_append, const SQLTypeInfo &ti, const bool replicating=false, const int64_t offset=-1) override
std::shared_ptr< ChunkMetadata > appendEncodedDataAtIndices(const int8_t *index_data, int8_t *data, const std::vector< size_t > &selected_idx) override
size_t getNumElemsForBytesInsertData(const std::vector< std::string > *srcData, const int start_idx, const size_t numAppendElems, const size_t byteLimit, const bool replicating=false)
virtual void read(int8_t *const dst, const size_t num_bytes, const size_t offset=0, const MemoryLevel dst_buffer_type=CPU_LEVEL, const int dst_device_id=-1)=0
virtual void getMetadata(const std::shared_ptr< ChunkMetadata > &chunkMetadata)
AbstractBuffer * index_buf
Data_Namespace::AbstractBuffer * buffer_
size_t getNumElemsForBytesEncodedDataAtIndices(const int8_t *index_data, const std::vector< size_t > &selected_idx, const size_t byte_limit) override
An AbstractBuffer is a unit of data management for a data manager.
size_t getStringSizeAtIndex(const int8_t *index_data, size_t index)
void update_elem_stats(const StringType &elem)
StringOffsetT last_offset
void getMetadata(const std::shared_ptr< ChunkMetadata > &chunkMetadata) override
std::shared_ptr< ChunkMetadata > appendEncodedData(const int8_t *index_data, int8_t *data, const size_t start_idx, const size_t num_elements) override
virtual void append(int8_t *src, const size_t num_bytes, const MemoryLevel src_buffer_type=CPU_LEVEL, const int device_id=-1)=0
virtual void reserve(size_t num_bytes)=0
std::pair< StringOffsetT, StringOffsetT > getStringOffsets(const int8_t *index_data, size_t index)