OmniSciDB  cde582ebc3
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Chunk.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
23 #include "DataMgr/Chunk/Chunk.h"
28 #include "Shared/toString.h"
29 
30 namespace Chunk_NS {
31 std::shared_ptr<Chunk> Chunk::getChunk(const ColumnDescriptor* cd,
32  DataMgr* data_mgr,
33  const ChunkKey& key,
34  const MemoryLevel memoryLevel,
35  const int deviceId,
36  const size_t numBytes,
37  const size_t numElems,
38  const bool pinnable) {
39  std::shared_ptr<Chunk> chunkp = std::make_shared<Chunk>(Chunk(cd, pinnable));
40  chunkp->getChunkBuffer(data_mgr, key, memoryLevel, deviceId, numBytes, numElems);
41  return chunkp;
42 }
43 
44 std::shared_ptr<Chunk> Chunk::getChunk(const ColumnDescriptor* cd,
45  AbstractBuffer* data_buffer,
46  AbstractBuffer* index_buffer,
47  const bool pinnable) {
48  std::shared_ptr<Chunk> chunkp = std::make_shared<Chunk>(Chunk(cd, pinnable));
49  chunkp->setChunkBuffer(data_buffer, index_buffer);
50  return chunkp;
51 }
52 
54  const ChunkKey& key,
55  const MemoryLevel mem_level,
56  const int device_id) {
59  ChunkKey subKey = key;
60  ChunkKey indexKey(subKey);
61  indexKey.push_back(1);
62  ChunkKey dataKey(subKey);
63  dataKey.push_back(2);
64  return data_mgr->isBufferOnDevice(indexKey, mem_level, device_id) &&
65  data_mgr->isBufferOnDevice(dataKey, mem_level, device_id);
66  } else {
67  return data_mgr->isBufferOnDevice(key, mem_level, device_id);
68  }
69 }
70 
71 void Chunk::setChunkBuffer(AbstractBuffer* buffer, AbstractBuffer* index_buffer) {
74  CHECK(index_buffer);
75  buffer_ = buffer;
76  index_buf_ = index_buffer;
77  switch (column_desc_->columnType.get_type()) {
78  case kARRAY: {
79  auto array_encoder = dynamic_cast<ArrayNoneEncoder*>(buffer_->getEncoder());
80  CHECK(array_encoder);
81  array_encoder->setIndexBuffer(index_buf_);
82  break;
83  }
84  case kTEXT:
85  case kVARCHAR:
86  case kCHAR: {
88  auto str_encoder = dynamic_cast<StringNoneEncoder*>(buffer_->getEncoder());
89  CHECK(str_encoder);
90  str_encoder->setIndexBuffer(index_buf_);
91  break;
92  }
93  case kPOINT:
94  case kLINESTRING:
95  case kPOLYGON:
96  case kMULTIPOLYGON: {
97  auto str_encoder = dynamic_cast<StringNoneEncoder*>(buffer_->getEncoder());
98  CHECK(str_encoder);
99  str_encoder->setIndexBuffer(index_buf_);
100  break;
101  }
102  default:
103  UNREACHABLE();
104  }
105  } else {
106  buffer_ = buffer;
107  }
108 }
109 
111  const ChunkKey& key,
112  const MemoryLevel mem_level,
113  const int device_id,
114  const size_t num_bytes,
115  const size_t num_elems) {
118  ChunkKey data_key = key;
119  data_key.push_back(1);
120  ChunkKey index_key = key;
121  index_key.push_back(2);
123  data_mgr->getChunkBuffer(data_key, mem_level, device_id, num_bytes),
124  data_mgr->getChunkBuffer(
125  index_key, mem_level, device_id, (num_elems + 1) * sizeof(StringOffsetT)));
126 
127  } else {
128  setChunkBuffer(data_mgr->getChunkBuffer(key, mem_level, device_id, num_bytes),
129  nullptr);
130  }
131 }
132 
134  const ChunkKey& key,
135  const MemoryLevel mem_level,
136  const int device_id,
137  const size_t page_size) {
140  ChunkKey subKey = key;
141  subKey.push_back(1); // 1 for the main buffer_
142  buffer_ = data_mgr->createChunkBuffer(subKey, mem_level, device_id, page_size);
143  subKey.pop_back();
144  subKey.push_back(2); // 2 for the index buffer_
145  index_buf_ = data_mgr->createChunkBuffer(subKey, mem_level, device_id, page_size);
146  } else {
147  buffer_ = data_mgr->createChunkBuffer(key, mem_level, device_id, page_size);
148  }
149 }
150 
152  const int8_t* index_data,
153  const std::vector<size_t>& selected_idx,
154  const size_t byte_limit) {
158  index_data, selected_idx, byte_limit);
159 }
160 
162  const size_t num_elems,
163  const size_t start_idx,
164  const size_t byte_limit,
165  const bool replicating) {
167  switch (column_desc_->columnType.get_type()) {
168  case kARRAY: {
169  if (column_desc_->columnType.get_size() > 0) {
170  FixedLengthArrayNoneEncoder* array_encoder =
172  return array_encoder->getNumElemsForBytesInsertData(
173  src_data.arraysPtr, start_idx, num_elems, byte_limit, replicating);
174  }
175  ArrayNoneEncoder* array_encoder =
176  dynamic_cast<ArrayNoneEncoder*>(buffer_->getEncoder());
177  return array_encoder->getNumElemsForBytesInsertData(
178  src_data.arraysPtr, start_idx, num_elems, byte_limit, replicating);
179  }
180  case kTEXT:
181  case kVARCHAR:
182  case kCHAR: {
184  StringNoneEncoder* str_encoder =
185  dynamic_cast<StringNoneEncoder*>(buffer_->getEncoder());
186  return str_encoder->getNumElemsForBytesInsertData(
187  src_data.stringsPtr, start_idx, num_elems, byte_limit, replicating);
188  }
189  case kPOINT:
190  case kLINESTRING:
191  case kPOLYGON:
192  case kMULTIPOLYGON: {
193  StringNoneEncoder* str_encoder =
194  dynamic_cast<StringNoneEncoder*>(buffer_->getEncoder());
195  return str_encoder->getNumElemsForBytesInsertData(
196  src_data.stringsPtr, start_idx, num_elems, byte_limit, replicating);
197  }
198  default:
199  CHECK(false);
200  return 0;
201  }
202 }
203 
204 std::shared_ptr<ChunkMetadata> Chunk::appendEncodedDataAtIndices(
205  const Chunk& src_chunk,
206  const std::vector<size_t>& selected_idx) {
207  const auto& ti = column_desc_->columnType;
208  int8_t* data_buffer_ptr = src_chunk.getBuffer()->getMemoryPtr();
209  const int8_t* index_buffer_ptr =
210  ti.is_varlen_indeed() ? src_chunk.getIndexBuf()->getMemoryPtr() : nullptr;
213  index_buffer_ptr, data_buffer_ptr, selected_idx);
214 }
215 
216 std::shared_ptr<ChunkMetadata> Chunk::appendEncodedData(const Chunk& src_chunk,
217  const size_t num_elements,
218  const size_t start_idx) {
219  const auto& ti = column_desc_->columnType;
220  int8_t* data_buffer_ptr = src_chunk.getBuffer()->getMemoryPtr();
221  const int8_t* index_buffer_ptr =
222  ti.is_varlen_indeed() ? src_chunk.getIndexBuf()->getMemoryPtr() : nullptr;
225  index_buffer_ptr, data_buffer_ptr, start_idx, num_elements);
226 }
227 
228 std::shared_ptr<ChunkMetadata> Chunk::appendData(DataBlockPtr& src_data,
229  const size_t num_elems,
230  const size_t start_idx,
231  const bool replicating) {
232  const auto& ti = column_desc_->columnType;
233  if (ti.is_varlen()) {
234  switch (ti.get_type()) {
235  case kARRAY: {
236  if (ti.get_size() > 0) {
237  FixedLengthArrayNoneEncoder* array_encoder =
239  return array_encoder->appendData(
240  src_data.arraysPtr, start_idx, num_elems, replicating);
241  }
242  ArrayNoneEncoder* array_encoder =
243  dynamic_cast<ArrayNoneEncoder*>(buffer_->getEncoder());
244  return array_encoder->appendData(
245  src_data.arraysPtr, start_idx, num_elems, replicating);
246  }
247  case kTEXT:
248  case kVARCHAR:
249  case kCHAR: {
250  CHECK_EQ(kENCODING_NONE, ti.get_compression());
251  StringNoneEncoder* str_encoder =
252  dynamic_cast<StringNoneEncoder*>(buffer_->getEncoder());
253  return str_encoder->appendData(
254  src_data.stringsPtr, start_idx, num_elems, replicating);
255  }
256  case kPOINT:
257  case kLINESTRING:
258  case kPOLYGON:
259  case kMULTIPOLYGON: {
260  StringNoneEncoder* str_encoder =
261  dynamic_cast<StringNoneEncoder*>(buffer_->getEncoder());
262  return str_encoder->appendData(
263  src_data.stringsPtr, start_idx, num_elems, replicating);
264  }
265  default:
266  CHECK(false);
267  }
268  }
269  return buffer_->getEncoder()->appendData(
270  src_data.numbersPtr, num_elems, ti, replicating);
271 }
272 
274  if (pinnable_) {
275  if (buffer_) {
276  buffer_->unPin();
277  }
278  if (index_buf_) {
279  index_buf_->unPin();
280  }
281  }
282 }
283 
288  switch (column_desc_->columnType.get_type()) {
289  case kARRAY: {
290  ArrayNoneEncoder* array_encoder =
291  dynamic_cast<ArrayNoneEncoder*>(buffer_->getEncoder());
292  array_encoder->setIndexBuffer(index_buf_);
293  break;
294  }
295  case kTEXT:
296  case kVARCHAR:
297  case kCHAR: {
299  StringNoneEncoder* str_encoder =
300  dynamic_cast<StringNoneEncoder*>(buffer_->getEncoder());
301  str_encoder->setIndexBuffer(index_buf_);
302  break;
303  }
304  case kPOINT:
305  case kLINESTRING:
306  case kPOLYGON:
307  case kMULTIPOLYGON: {
308  StringNoneEncoder* str_encoder =
309  dynamic_cast<StringNoneEncoder*>(buffer_->getEncoder());
310  str_encoder->setIndexBuffer(index_buf_);
311  break;
312  }
313  default:
314  CHECK(false);
315  }
316  }
317 }
318 
319 ChunkIter Chunk::begin_iterator(const std::shared_ptr<ChunkMetadata>& chunk_metadata,
320  int start_idx,
321  int skip) const {
322  ChunkIter it;
324  it.skip = skip;
326  if (it.skip_size < 0) { // if it's variable length
327  it.current_pos = it.start_pos =
328  index_buf_->getMemoryPtr() + start_idx * sizeof(StringOffsetT);
331  } else {
332  it.current_pos = it.start_pos = buffer_->getMemoryPtr() + start_idx * it.skip_size;
333  it.end_pos = buffer_->getMemoryPtr() + buffer_->size();
334  it.second_buf = nullptr;
335  }
336  it.num_elems = chunk_metadata->numElements;
337  return it;
338 }
339 
341  const std::list<const ColumnDescriptor*>& colDescs,
342  std::vector<Chunk>& chunkVec) {
343  for (auto cd : colDescs) {
344  chunkVec.emplace_back(cd);
345  }
346 }
347 
348 std::string Chunk::toString() const {
349  return ::typeName(this) + "(buffer=" + ::toString(buffer_) +
350  ", index_buf=" + ::toString(index_buf_) +
351  ", column_desc=" + ::toString(column_desc_) + ")";
352 }
353 } // namespace Chunk_NS
AbstractBuffer * getIndexBuf() const
Definition: Chunk.h:148
#define CHECK_EQ(x, y)
Definition: Logger.h:230
std::vector< int > ChunkKey
Definition: types.h:36
size_t getNumElemsForBytesInsertData(const std::vector< ArrayDatum > *srcData, const int start_idx, const size_t numAppendElems, const size_t byteLimit, const bool replicating=false)
int8_t * start_pos
Definition: ChunkIter.h:34
HOST DEVICE int get_size() const
Definition: sqltypes.h:339
int8_t * current_pos
Definition: ChunkIter.h:33
SQLTypeInfo type_info
Definition: ChunkIter.h:31
std::vector< std::string > * stringsPtr
Definition: sqltypes.h:227
std::vector< ArrayDatum > * arraysPtr
Definition: sqltypes.h:228
const ColumnDescriptor * column_desc_
Definition: Chunk.h:165
bool is_varlen() const
Definition: sqltypes.h:536
virtual int8_t * getMemoryPtr()=0
std::shared_ptr< ChunkMetadata > appendData(int8_t *&src_data, const size_t num_elems_to_append, const SQLTypeInfo &ti, const bool replicating=false, const int64_t offset=-1) override
#define UNREACHABLE()
Definition: Logger.h:266
void getChunkBuffer(DataMgr *data_mgr, const ChunkKey &key, const MemoryLevel mem_level, const int deviceId=0, const size_t num_bytes=0, const size_t num_elems=0)
Definition: Chunk.cpp:110
std::shared_ptr< ChunkMetadata > appendEncodedDataAtIndices(const Chunk &src_chunk, const std::vector< size_t > &selected_idx)
Definition: Chunk.cpp:204
void setChunkBuffer(AbstractBuffer *buffer, AbstractBuffer *index_buffer)
Definition: Chunk.cpp:71
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:329
void initEncoder(const SQLTypeInfo &tmp_sql_type)
size_t getNumElemsForBytesInsertData(const std::vector< std::string > *srcData, const int start_idx, const size_t numAppendElems, const size_t byteLimit, const bool replicating=false)
int32_t StringOffsetT
Definition: sqltypes.h:1113
virtual size_t getNumElemsForBytesEncodedDataAtIndices(const int8_t *index_data, const std::vector< size_t > &selected_idx, const size_t byte_limit)=0
bool is_fixlen_array() const
Definition: sqltypes.h:520
int8_t * end_pos
Definition: ChunkIter.h:35
size_t num_elems
Definition: ChunkIter.h:38
AbstractBuffer * buffer_
Definition: Chunk.h:163
std::shared_ptr< ChunkMetadata > appendData(int8_t *&src_data, const size_t num_elems_to_append, const SQLTypeInfo &ti, const bool replicating=false, const int64_t offset=-1) override
void unpinBuffer()
Definition: Chunk.cpp:273
bool isChunkOnDevice(DataMgr *data_mgr, const ChunkKey &key, const MemoryLevel mem_level, const int device_id)
Definition: Chunk.cpp:53
An AbstractBuffer is a unit of data management for a data manager.
specifies the content in-memory of a row in the column metadata table
void setIndexBuffer(AbstractBuffer *buf)
int skip_size
Definition: ChunkIter.h:37
Definition: sqltypes.h:52
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:337
AbstractBuffer * getBuffer() const
Definition: Chunk.h:146
std::shared_ptr< ChunkMetadata > appendData(int8_t *&src_data, const size_t num_elems_to_append, const SQLTypeInfo &ti, const bool replicating=false, const int64_t offset=-1) override
bool isBufferOnDevice(const ChunkKey &key, const MemoryLevel memLevel, const int deviceId)
Definition: DataMgr.cpp:462
AbstractBuffer * getChunkBuffer(const ChunkKey &key, const MemoryLevel memoryLevel, const int deviceId=0, const size_t numBytes=0)
Definition: DataMgr.cpp:484
int8_t * second_buf
Definition: ChunkIter.h:32
Definition: sqltypes.h:41
std::string typeName(const T *v)
Definition: toString.h:102
void initEncoder()
Definition: Chunk.cpp:284
unencoded fixed length array encoder
int skip
Definition: ChunkIter.h:36
#define CHECK(condition)
Definition: Logger.h:222
size_t getNumElemsForBytesEncodedDataAtIndices(const int8_t *index_data, const std::vector< size_t > &selected_idx, const size_t byte_limit)
Definition: Chunk.cpp:151
void setIndexBuffer(AbstractBuffer *buf)
std::shared_ptr< ChunkMetadata > appendEncodedData(const Chunk &src_chunk, const size_t num_elements, const size_t start_idx)
Definition: Chunk.cpp:216
For unencoded strings.
std::string toString() const
Definition: Chunk.cpp:348
AbstractBuffer * createChunkBuffer(const ChunkKey &key, const MemoryLevel memoryLevel, const int deviceId=0, const size_t page_size=0)
Definition: DataMgr.cpp:475
size_t getNumElemsForBytesInsertData(const DataBlockPtr &src_data, const size_t num_elems, const size_t start_idx, const size_t byte_limit, const bool replicating=false)
Definition: Chunk.cpp:161
ChunkIter begin_iterator(const std::shared_ptr< ChunkMetadata > &, int start_idx=0, int skip=1) const
Definition: Chunk.cpp:319
void createChunkBuffer(DataMgr *data_mgr, const ChunkKey &key, const MemoryLevel mem_level, const int deviceId=0, const size_t page_size=0)
Definition: Chunk.cpp:133
SQLTypeInfo columnType
static std::shared_ptr< Chunk > getChunk(const ColumnDescriptor *cd, DataMgr *data_mgr, const ChunkKey &key, const MemoryLevel mem_level, const int deviceId, const size_t num_bytes, const size_t num_elems, const bool pinnable=true)
Definition: Chunk.cpp:31
virtual std::shared_ptr< ChunkMetadata > appendEncodedDataAtIndices(const int8_t *index_data, int8_t *data, const std::vector< size_t > &selected_idx)=0
AbstractBuffer * index_buf_
Definition: Chunk.h:164
int8_t * numbersPtr
Definition: sqltypes.h:226
unencoded array encoder
size_t getNumElemsForBytesInsertData(const std::vector< ArrayDatum > *srcData, const int start_idx, const size_t numAppendElems, const size_t byteLimit, const bool replicating=false)
Chunk(bool pinnable=true)
Definition: Chunk.h:43
virtual std::shared_ptr< ChunkMetadata > appendEncodedData(const int8_t *index_data, int8_t *data, const size_t start_idx, const size_t num_elements)=0
bool pinnable_
Definition: Chunk.h:168
std::shared_ptr< ChunkMetadata > appendData(DataBlockPtr &srcData, const size_t numAppendElems, const size_t startIdx, const bool replicating=false)
Definition: Chunk.cpp:228
static void translateColumnDescriptorsToChunkVec(const std::list< const ColumnDescriptor * > &colDescs, std::vector< Chunk > &chunkVec)
Definition: Chunk.cpp:340
virtual std::shared_ptr< ChunkMetadata > appendData(int8_t *&src_data, const size_t num_elems_to_append, const SQLTypeInfo &ti, const bool replicating=false, const int64_t offset=-1)=0