OmniSciDB  91042dcc5b
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Chunk.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2020 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /*
18  * @file Chunk.cpp
19  * @author Wei Hong <wei@mapd.com>
20  */
21 
22 #include "DataMgr/Chunk/Chunk.h"
27 #include "Shared/toString.h"
28 
29 namespace Chunk_NS {
30 std::shared_ptr<Chunk> Chunk::getChunk(const ColumnDescriptor* cd,
31  DataMgr* data_mgr,
32  const ChunkKey& key,
33  const MemoryLevel memoryLevel,
34  const int deviceId,
35  const size_t numBytes,
36  const size_t numElems,
37  const bool pinnable) {
38  std::shared_ptr<Chunk> chunkp = std::make_shared<Chunk>(Chunk(cd, pinnable));
39  chunkp->getChunkBuffer(data_mgr, key, memoryLevel, deviceId, numBytes, numElems);
40  return chunkp;
41 }
42 
43 std::shared_ptr<Chunk> Chunk::getChunk(const ColumnDescriptor* cd,
44  AbstractBuffer* data_buffer,
45  AbstractBuffer* index_buffer) {
46  std::shared_ptr<Chunk> chunkp = std::make_shared<Chunk>(Chunk(cd));
47  chunkp->setChunkBuffer(data_buffer, index_buffer);
48  return chunkp;
49 }
50 
52  const ChunkKey& key,
53  const MemoryLevel mem_level,
54  const int device_id) {
57  ChunkKey subKey = key;
58  ChunkKey indexKey(subKey);
59  indexKey.push_back(1);
60  ChunkKey dataKey(subKey);
61  dataKey.push_back(2);
62  return data_mgr->isBufferOnDevice(indexKey, mem_level, device_id) &&
63  data_mgr->isBufferOnDevice(dataKey, mem_level, device_id);
64  } else {
65  return data_mgr->isBufferOnDevice(key, mem_level, device_id);
66  }
67 }
68 
69 void Chunk::setChunkBuffer(AbstractBuffer* buffer, AbstractBuffer* index_buffer) {
72  CHECK(index_buffer);
73  buffer_ = buffer;
74  index_buf_ = index_buffer;
75  switch (column_desc_->columnType.get_type()) {
76  case kARRAY: {
77  auto array_encoder = dynamic_cast<ArrayNoneEncoder*>(buffer_->getEncoder());
78  CHECK(array_encoder);
79  array_encoder->setIndexBuffer(index_buf_);
80  break;
81  }
82  case kTEXT:
83  case kVARCHAR:
84  case kCHAR: {
86  auto str_encoder = dynamic_cast<StringNoneEncoder*>(buffer_->getEncoder());
87  CHECK(str_encoder);
88  str_encoder->setIndexBuffer(index_buf_);
89  break;
90  }
91  case kPOINT:
92  case kLINESTRING:
93  case kPOLYGON:
94  case kMULTIPOLYGON: {
95  auto str_encoder = dynamic_cast<StringNoneEncoder*>(buffer_->getEncoder());
96  CHECK(str_encoder);
97  str_encoder->setIndexBuffer(index_buf_);
98  break;
99  }
100  default:
101  UNREACHABLE();
102  }
103  } else {
104  buffer_ = buffer;
105  }
106 }
107 
109  const ChunkKey& key,
110  const MemoryLevel mem_level,
111  const int device_id,
112  const size_t num_bytes,
113  const size_t num_elems) {
116  ChunkKey data_key = key;
117  data_key.push_back(1);
118  ChunkKey index_key = key;
119  index_key.push_back(2);
121  data_mgr->getChunkBuffer(data_key, mem_level, device_id, num_bytes),
122  data_mgr->getChunkBuffer(
123  index_key, mem_level, device_id, (num_elems + 1) * sizeof(StringOffsetT)));
124 
125  } else {
126  setChunkBuffer(data_mgr->getChunkBuffer(key, mem_level, device_id, num_bytes),
127  nullptr);
128  }
129 }
130 
132  const ChunkKey& key,
133  const MemoryLevel mem_level,
134  const int device_id,
135  const size_t page_size) {
138  ChunkKey subKey = key;
139  subKey.push_back(1); // 1 for the main buffer_
140  buffer_ = data_mgr->createChunkBuffer(subKey, mem_level, device_id, page_size);
141  subKey.pop_back();
142  subKey.push_back(2); // 2 for the index buffer_
143  index_buf_ = data_mgr->createChunkBuffer(subKey, mem_level, device_id, page_size);
144  } else {
145  buffer_ = data_mgr->createChunkBuffer(key, mem_level, device_id, page_size);
146  }
147 }
148 
149 size_t Chunk::getNumElemsForBytesEncodedData(const int8_t* index_data,
150  const size_t num_elems,
151  const size_t start_idx,
152  const size_t byte_limit) {
156  index_data, start_idx, num_elems, byte_limit);
157 }
158 
160  const size_t num_elems,
161  const size_t start_idx,
162  const size_t byte_limit,
163  const bool replicating) {
165  switch (column_desc_->columnType.get_type()) {
166  case kARRAY: {
167  if (column_desc_->columnType.get_size() > 0) {
168  FixedLengthArrayNoneEncoder* array_encoder =
170  return array_encoder->getNumElemsForBytesInsertData(
171  src_data.arraysPtr, start_idx, num_elems, byte_limit, replicating);
172  }
173  ArrayNoneEncoder* array_encoder =
174  dynamic_cast<ArrayNoneEncoder*>(buffer_->getEncoder());
175  return array_encoder->getNumElemsForBytesInsertData(
176  src_data.arraysPtr, start_idx, num_elems, byte_limit, replicating);
177  }
178  case kTEXT:
179  case kVARCHAR:
180  case kCHAR: {
182  StringNoneEncoder* str_encoder =
183  dynamic_cast<StringNoneEncoder*>(buffer_->getEncoder());
184  return str_encoder->getNumElemsForBytesInsertData(
185  src_data.stringsPtr, start_idx, num_elems, byte_limit, replicating);
186  }
187  case kPOINT:
188  case kLINESTRING:
189  case kPOLYGON:
190  case kMULTIPOLYGON: {
191  StringNoneEncoder* str_encoder =
192  dynamic_cast<StringNoneEncoder*>(buffer_->getEncoder());
193  return str_encoder->getNumElemsForBytesInsertData(
194  src_data.stringsPtr, start_idx, num_elems, byte_limit, replicating);
195  }
196  default:
197  CHECK(false);
198  return 0;
199  }
200 }
201 
202 std::shared_ptr<ChunkMetadata> Chunk::appendEncodedDataAtIndices(
203  const Chunk& src_chunk,
204  const std::vector<size_t>& selected_idx) {
205  const auto& ti = column_desc_->columnType;
206  int8_t* data_buffer_ptr = src_chunk.getBuffer()->getMemoryPtr();
207  const int8_t* index_buffer_ptr =
208  ti.is_varlen_indeed() ? src_chunk.getIndexBuf()->getMemoryPtr() : nullptr;
211  index_buffer_ptr, data_buffer_ptr, selected_idx);
212 }
213 
214 std::shared_ptr<ChunkMetadata> Chunk::appendEncodedData(const Chunk& src_chunk,
215  const size_t num_elements,
216  const size_t start_idx) {
217  const auto& ti = column_desc_->columnType;
218  int8_t* data_buffer_ptr = src_chunk.getBuffer()->getMemoryPtr();
219  const int8_t* index_buffer_ptr =
220  ti.is_varlen_indeed() ? src_chunk.getIndexBuf()->getMemoryPtr() : nullptr;
223  index_buffer_ptr, data_buffer_ptr, start_idx, num_elements);
224 }
225 
226 std::shared_ptr<ChunkMetadata> Chunk::appendData(DataBlockPtr& src_data,
227  const size_t num_elems,
228  const size_t start_idx,
229  const bool replicating) {
230  const auto& ti = column_desc_->columnType;
231  if (ti.is_varlen()) {
232  switch (ti.get_type()) {
233  case kARRAY: {
234  if (ti.get_size() > 0) {
235  FixedLengthArrayNoneEncoder* array_encoder =
237  return array_encoder->appendData(
238  src_data.arraysPtr, start_idx, num_elems, replicating);
239  }
240  ArrayNoneEncoder* array_encoder =
241  dynamic_cast<ArrayNoneEncoder*>(buffer_->getEncoder());
242  return array_encoder->appendData(
243  src_data.arraysPtr, start_idx, num_elems, replicating);
244  }
245  case kTEXT:
246  case kVARCHAR:
247  case kCHAR: {
248  CHECK_EQ(kENCODING_NONE, ti.get_compression());
249  StringNoneEncoder* str_encoder =
250  dynamic_cast<StringNoneEncoder*>(buffer_->getEncoder());
251  return str_encoder->appendData(
252  src_data.stringsPtr, start_idx, num_elems, replicating);
253  }
254  case kPOINT:
255  case kLINESTRING:
256  case kPOLYGON:
257  case kMULTIPOLYGON: {
258  StringNoneEncoder* str_encoder =
259  dynamic_cast<StringNoneEncoder*>(buffer_->getEncoder());
260  return str_encoder->appendData(
261  src_data.stringsPtr, start_idx, num_elems, replicating);
262  }
263  default:
264  CHECK(false);
265  }
266  }
267  return buffer_->getEncoder()->appendData(
268  src_data.numbersPtr, num_elems, ti, replicating);
269 }
270 
272  if (pinnable_) {
273  if (buffer_) {
274  buffer_->unPin();
275  }
276  if (index_buf_) {
277  index_buf_->unPin();
278  }
279  }
280 }
281 
286  switch (column_desc_->columnType.get_type()) {
287  case kARRAY: {
288  ArrayNoneEncoder* array_encoder =
289  dynamic_cast<ArrayNoneEncoder*>(buffer_->getEncoder());
290  array_encoder->setIndexBuffer(index_buf_);
291  break;
292  }
293  case kTEXT:
294  case kVARCHAR:
295  case kCHAR: {
297  StringNoneEncoder* str_encoder =
298  dynamic_cast<StringNoneEncoder*>(buffer_->getEncoder());
299  str_encoder->setIndexBuffer(index_buf_);
300  break;
301  }
302  case kPOINT:
303  case kLINESTRING:
304  case kPOLYGON:
305  case kMULTIPOLYGON: {
306  StringNoneEncoder* str_encoder =
307  dynamic_cast<StringNoneEncoder*>(buffer_->getEncoder());
308  str_encoder->setIndexBuffer(index_buf_);
309  break;
310  }
311  default:
312  CHECK(false);
313  }
314  }
315 }
316 
317 ChunkIter Chunk::begin_iterator(const std::shared_ptr<ChunkMetadata>& chunk_metadata,
318  int start_idx,
319  int skip) const {
320  ChunkIter it;
322  it.skip = skip;
324  if (it.skip_size < 0) { // if it's variable length
325  it.current_pos = it.start_pos =
326  index_buf_->getMemoryPtr() + start_idx * sizeof(StringOffsetT);
329  } else {
330  it.current_pos = it.start_pos = buffer_->getMemoryPtr() + start_idx * it.skip_size;
331  it.end_pos = buffer_->getMemoryPtr() + buffer_->size();
332  it.second_buf = nullptr;
333  }
334  it.num_elems = chunk_metadata->numElements;
335  return it;
336 }
337 
339  const std::list<const ColumnDescriptor*>& colDescs,
340  std::vector<Chunk>& chunkVec) {
341  for (auto cd : colDescs) {
342  chunkVec.emplace_back(cd);
343  }
344 }
345 
346 std::string Chunk::toString() const {
347  return ::typeName(this) + "(buffer=" + ::toString(buffer_) +
348  ", index_buf=" + ::toString(index_buf_) +
349  ", column_desc=" + ::toString(column_desc_) + ")";
350 }
351 } // namespace Chunk_NS
AbstractBuffer * getIndexBuf() const
Definition: Chunk.h:147
#define CHECK_EQ(x, y)
Definition: Logger.h:219
std::vector< int > ChunkKey
Definition: types.h:37
size_t getNumElemsForBytesInsertData(const std::vector< ArrayDatum > *srcData, const int start_idx, const size_t numAppendElems, const size_t byteLimit, const bool replicating=false)
int8_t * start_pos
Definition: ChunkIter.h:33
HOST DEVICE int get_size() const
Definition: sqltypes.h:339
int8_t * current_pos
Definition: ChunkIter.h:32
SQLTypeInfo type_info
Definition: ChunkIter.h:30
std::vector< std::string > * stringsPtr
Definition: sqltypes.h:227
std::vector< ArrayDatum > * arraysPtr
Definition: sqltypes.h:228
const ColumnDescriptor * column_desc_
Definition: Chunk.h:164
bool is_varlen() const
Definition: sqltypes.h:545
virtual int8_t * getMemoryPtr()=0
std::shared_ptr< ChunkMetadata > appendData(int8_t *&src_data, const size_t num_elems_to_append, const SQLTypeInfo &ti, const bool replicating=false, const int64_t offset=-1) override
#define UNREACHABLE()
Definition: Logger.h:255
size_t getNumElemsForBytesEncodedData(const int8_t *index_data, const size_t num_elems, const size_t start_idx, const size_t byte_limit)
Definition: Chunk.cpp:149
void getChunkBuffer(DataMgr *data_mgr, const ChunkKey &key, const MemoryLevel mem_level, const int deviceId=0, const size_t num_bytes=0, const size_t num_elems=0)
Definition: Chunk.cpp:108
std::shared_ptr< ChunkMetadata > appendEncodedDataAtIndices(const Chunk &src_chunk, const std::vector< size_t > &selected_idx)
Definition: Chunk.cpp:202
void setChunkBuffer(AbstractBuffer *buffer, AbstractBuffer *index_buffer)
Definition: Chunk.cpp:69
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:329
void initEncoder(const SQLTypeInfo &tmp_sql_type)
size_t getNumElemsForBytesInsertData(const std::vector< std::string > *srcData, const int start_idx, const size_t numAppendElems, const size_t byteLimit, const bool replicating=false)
int32_t StringOffsetT
Definition: sqltypes.h:1090
bool is_fixlen_array() const
Definition: sqltypes.h:529
int8_t * end_pos
Definition: ChunkIter.h:34
size_t num_elems
Definition: ChunkIter.h:37
AbstractBuffer * buffer_
Definition: Chunk.h:162
std::shared_ptr< ChunkMetadata > appendData(int8_t *&src_data, const size_t num_elems_to_append, const SQLTypeInfo &ti, const bool replicating=false, const int64_t offset=-1) override
void unpinBuffer()
Definition: Chunk.cpp:271
bool isChunkOnDevice(DataMgr *data_mgr, const ChunkKey &key, const MemoryLevel mem_level, const int device_id)
Definition: Chunk.cpp:51
virtual size_t getNumElemsForBytesEncodedData(const int8_t *index_data, const int start_idx, const size_t num_elements, const size_t byte_limit)=0
An AbstractBuffer is a unit of data management for a data manager.
specifies the content in-memory of a row in the column metadata table
void setIndexBuffer(AbstractBuffer *buf)
int skip_size
Definition: ChunkIter.h:36
Definition: sqltypes.h:52
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:337
AbstractBuffer * getBuffer() const
Definition: Chunk.h:145
std::shared_ptr< ChunkMetadata > appendData(int8_t *&src_data, const size_t num_elems_to_append, const SQLTypeInfo &ti, const bool replicating=false, const int64_t offset=-1) override
bool isBufferOnDevice(const ChunkKey &key, const MemoryLevel memLevel, const int deviceId)
Definition: DataMgr.cpp:460
AbstractBuffer * getChunkBuffer(const ChunkKey &key, const MemoryLevel memoryLevel, const int deviceId=0, const size_t numBytes=0)
Definition: DataMgr.cpp:482
int8_t * second_buf
Definition: ChunkIter.h:31
Definition: sqltypes.h:41
std::string typeName(const T *v)
Definition: toString.h:102
void initEncoder()
Definition: Chunk.cpp:282
unencoded fixed length array encoder
int skip
Definition: ChunkIter.h:35
#define CHECK(condition)
Definition: Logger.h:211
void setIndexBuffer(AbstractBuffer *buf)
std::shared_ptr< ChunkMetadata > appendEncodedData(const Chunk &src_chunk, const size_t num_elements, const size_t start_idx)
Definition: Chunk.cpp:214
For unencoded strings.
std::string toString() const
Definition: Chunk.cpp:346
AbstractBuffer * createChunkBuffer(const ChunkKey &key, const MemoryLevel memoryLevel, const int deviceId=0, const size_t page_size=0)
Definition: DataMgr.cpp:473
size_t getNumElemsForBytesInsertData(const DataBlockPtr &src_data, const size_t num_elems, const size_t start_idx, const size_t byte_limit, const bool replicating=false)
Definition: Chunk.cpp:159
ChunkIter begin_iterator(const std::shared_ptr< ChunkMetadata > &, int start_idx=0, int skip=1) const
Definition: Chunk.cpp:317
void createChunkBuffer(DataMgr *data_mgr, const ChunkKey &key, const MemoryLevel mem_level, const int deviceId=0, const size_t page_size=0)
Definition: Chunk.cpp:131
SQLTypeInfo columnType
static std::shared_ptr< Chunk > getChunk(const ColumnDescriptor *cd, DataMgr *data_mgr, const ChunkKey &key, const MemoryLevel mem_level, const int deviceId, const size_t num_bytes, const size_t num_elems, const bool pinnable=true)
Definition: Chunk.cpp:30
virtual std::shared_ptr< ChunkMetadata > appendEncodedDataAtIndices(const int8_t *index_data, int8_t *data, const std::vector< size_t > &selected_idx)=0
AbstractBuffer * index_buf_
Definition: Chunk.h:163
int8_t * numbersPtr
Definition: sqltypes.h:226
unencoded array encoder
size_t getNumElemsForBytesInsertData(const std::vector< ArrayDatum > *srcData, const int start_idx, const size_t numAppendElems, const size_t byteLimit, const bool replicating=false)
Chunk(bool pinnable=true)
Definition: Chunk.h:43
virtual std::shared_ptr< ChunkMetadata > appendEncodedData(const int8_t *index_data, int8_t *data, const size_t start_idx, const size_t num_elements)=0
bool pinnable_
Definition: Chunk.h:167
std::shared_ptr< ChunkMetadata > appendData(DataBlockPtr &srcData, const size_t numAppendElems, const size_t startIdx, const bool replicating=false)
Definition: Chunk.cpp:226
static void translateColumnDescriptorsToChunkVec(const std::list< const ColumnDescriptor * > &colDescs, std::vector< Chunk > &chunkVec)
Definition: Chunk.cpp:338
virtual std::shared_ptr< ChunkMetadata > appendData(int8_t *&src_data, const size_t num_elems_to_append, const SQLTypeInfo &ti, const bool replicating=false, const int64_t offset=-1)=0