OmniSciDB  8a228a1076
Chunk.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2020 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /*
18  * @file Chunk.cpp
19  * @author Wei Hong <wei@mapd.com>
20  */
21 
22 #include "DataMgr/Chunk/Chunk.h"
26 
27 namespace Chunk_NS {
28 std::shared_ptr<Chunk> Chunk::getChunk(const ColumnDescriptor* cd,
29  DataMgr* data_mgr,
30  const ChunkKey& key,
31  const MemoryLevel memoryLevel,
32  const int deviceId,
33  const size_t numBytes,
34  const size_t numElems) {
35  std::shared_ptr<Chunk> chunkp = std::make_shared<Chunk>(Chunk(cd));
36  chunkp->getChunkBuffer(data_mgr, key, memoryLevel, deviceId, numBytes, numElems);
37  return chunkp;
38 }
39 
41  const ChunkKey& key,
42  const MemoryLevel mem_level,
43  const int device_id) {
46  ChunkKey subKey = key;
47  ChunkKey indexKey(subKey);
48  indexKey.push_back(1);
49  ChunkKey dataKey(subKey);
50  dataKey.push_back(2);
51  return data_mgr->isBufferOnDevice(indexKey, mem_level, device_id) &&
52  data_mgr->isBufferOnDevice(dataKey, mem_level, device_id);
53  } else {
54  return data_mgr->isBufferOnDevice(key, mem_level, device_id);
55  }
56 }
57 
59  const ChunkKey& key,
60  const MemoryLevel mem_level,
61  const int device_id,
62  const size_t num_bytes,
63  const size_t num_elems) {
66  ChunkKey subKey = key;
67  subKey.push_back(1); // 1 for the main buffer_
68  buffer_ = data_mgr->getChunkBuffer(subKey, mem_level, device_id, num_bytes);
69  subKey.pop_back();
70  subKey.push_back(2); // 2 for the index buffer_
71  index_buf_ = data_mgr->getChunkBuffer(
72  subKey,
73  mem_level,
74  device_id,
75  (num_elems + 1) * sizeof(StringOffsetT)); // always record n+1 offsets so string
76  // length can be calculated
77  switch (column_desc_->columnType.get_type()) {
78  case kARRAY: {
79  auto array_encoder = dynamic_cast<ArrayNoneEncoder*>(buffer_->encoder.get());
80  CHECK(array_encoder);
81  array_encoder->setIndexBuffer(index_buf_);
82  break;
83  }
84  case kTEXT:
85  case kVARCHAR:
86  case kCHAR: {
88  auto str_encoder = dynamic_cast<StringNoneEncoder*>(buffer_->encoder.get());
89  CHECK(str_encoder);
90  str_encoder->setIndexBuffer(index_buf_);
91  break;
92  }
93  case kPOINT:
94  case kLINESTRING:
95  case kPOLYGON:
96  case kMULTIPOLYGON: {
97  auto str_encoder = dynamic_cast<StringNoneEncoder*>(buffer_->encoder.get());
98  CHECK(str_encoder);
99  str_encoder->setIndexBuffer(index_buf_);
100  break;
101  }
102  default:
103  UNREACHABLE();
104  }
105  } else {
106  buffer_ = data_mgr->getChunkBuffer(key, mem_level, device_id, num_bytes);
107  }
108 }
109 
111  const ChunkKey& key,
112  const MemoryLevel mem_level,
113  const int device_id,
114  const size_t page_size) {
117  ChunkKey subKey = key;
118  subKey.push_back(1); // 1 for the main buffer_
119  buffer_ = data_mgr->createChunkBuffer(subKey, mem_level, device_id, page_size);
120  subKey.pop_back();
121  subKey.push_back(2); // 2 for the index buffer_
122  index_buf_ = data_mgr->createChunkBuffer(subKey, mem_level, device_id, page_size);
123  } else {
124  buffer_ = data_mgr->createChunkBuffer(key, mem_level, device_id, page_size);
125  }
126 }
127 
129  const size_t num_elems,
130  const size_t start_idx,
131  const size_t byte_limit,
132  const bool replicating) {
134  switch (column_desc_->columnType.get_type()) {
135  case kARRAY: {
136  if (column_desc_->columnType.get_size() > 0) {
137  FixedLengthArrayNoneEncoder* array_encoder =
138  dynamic_cast<FixedLengthArrayNoneEncoder*>(buffer_->encoder.get());
139  return array_encoder->getNumElemsForBytesInsertData(
140  src_data.arraysPtr, start_idx, num_elems, byte_limit, replicating);
141  }
142  ArrayNoneEncoder* array_encoder =
143  dynamic_cast<ArrayNoneEncoder*>(buffer_->encoder.get());
144  return array_encoder->getNumElemsForBytesInsertData(
145  src_data.arraysPtr, start_idx, num_elems, byte_limit, replicating);
146  }
147  case kTEXT:
148  case kVARCHAR:
149  case kCHAR: {
151  StringNoneEncoder* str_encoder =
152  dynamic_cast<StringNoneEncoder*>(buffer_->encoder.get());
153  return str_encoder->getNumElemsForBytesInsertData(
154  src_data.stringsPtr, start_idx, num_elems, byte_limit, replicating);
155  }
156  case kPOINT:
157  case kLINESTRING:
158  case kPOLYGON:
159  case kMULTIPOLYGON: {
160  StringNoneEncoder* str_encoder =
161  dynamic_cast<StringNoneEncoder*>(buffer_->encoder.get());
162  return str_encoder->getNumElemsForBytesInsertData(
163  src_data.stringsPtr, start_idx, num_elems, byte_limit, replicating);
164  }
165  default:
166  CHECK(false);
167  return 0;
168  }
169 }
170 
171 std::shared_ptr<ChunkMetadata> Chunk::appendData(DataBlockPtr& src_data,
172  const size_t num_elems,
173  const size_t start_idx,
174  const bool replicating) {
175  const auto& ti = column_desc_->columnType;
176  if (ti.is_varlen()) {
177  switch (ti.get_type()) {
178  case kARRAY: {
179  if (ti.get_size() > 0) {
180  FixedLengthArrayNoneEncoder* array_encoder =
181  dynamic_cast<FixedLengthArrayNoneEncoder*>(buffer_->encoder.get());
182  return array_encoder->appendData(
183  src_data.arraysPtr, start_idx, num_elems, replicating);
184  }
185  ArrayNoneEncoder* array_encoder =
186  dynamic_cast<ArrayNoneEncoder*>(buffer_->encoder.get());
187  return array_encoder->appendData(
188  src_data.arraysPtr, start_idx, num_elems, replicating);
189  }
190  case kTEXT:
191  case kVARCHAR:
192  case kCHAR: {
193  CHECK_EQ(kENCODING_NONE, ti.get_compression());
194  StringNoneEncoder* str_encoder =
195  dynamic_cast<StringNoneEncoder*>(buffer_->encoder.get());
196  return str_encoder->appendData(
197  src_data.stringsPtr, start_idx, num_elems, replicating);
198  }
199  case kPOINT:
200  case kLINESTRING:
201  case kPOLYGON:
202  case kMULTIPOLYGON: {
203  StringNoneEncoder* str_encoder =
204  dynamic_cast<StringNoneEncoder*>(buffer_->encoder.get());
205  return str_encoder->appendData(
206  src_data.stringsPtr, start_idx, num_elems, replicating);
207  }
208  default:
209  CHECK(false);
210  }
211  }
212  return buffer_->encoder->appendData(src_data.numbersPtr, num_elems, ti, replicating);
213 }
214 
216  if (buffer_) {
217  buffer_->unPin();
218  }
219  if (index_buf_) {
220  index_buf_->unPin();
221  }
222 }
223 
228  switch (column_desc_->columnType.get_type()) {
229  case kARRAY: {
230  ArrayNoneEncoder* array_encoder =
231  dynamic_cast<ArrayNoneEncoder*>(buffer_->encoder.get());
232  array_encoder->setIndexBuffer(index_buf_);
233  break;
234  }
235  case kTEXT:
236  case kVARCHAR:
237  case kCHAR: {
239  StringNoneEncoder* str_encoder =
240  dynamic_cast<StringNoneEncoder*>(buffer_->encoder.get());
241  str_encoder->setIndexBuffer(index_buf_);
242  break;
243  }
244  case kPOINT:
245  case kLINESTRING:
246  case kPOLYGON:
247  case kMULTIPOLYGON: {
248  StringNoneEncoder* str_encoder =
249  dynamic_cast<StringNoneEncoder*>(buffer_->encoder.get());
250  str_encoder->setIndexBuffer(index_buf_);
251  break;
252  }
253  default:
254  CHECK(false);
255  }
256  }
257 }
258 
259 ChunkIter Chunk::begin_iterator(const std::shared_ptr<ChunkMetadata>& chunk_metadata,
260  int start_idx,
261  int skip) const {
262  ChunkIter it;
264  it.skip = skip;
266  if (it.skip_size < 0) { // if it's variable length
267  it.current_pos = it.start_pos =
268  index_buf_->getMemoryPtr() + start_idx * sizeof(StringOffsetT);
271  } else {
272  it.current_pos = it.start_pos = buffer_->getMemoryPtr() + start_idx * it.skip_size;
273  it.end_pos = buffer_->getMemoryPtr() + buffer_->size();
274  it.second_buf = nullptr;
275  }
276  it.num_elems = chunk_metadata->numElements;
277  return it;
278 }
279 } // namespace Chunk_NS
void initEncoder(const SQLTypeInfo tmp_sql_type)
#define CHECK_EQ(x, y)
Definition: Logger.h:205
size_t getNumElemsForBytesInsertData(const std::vector< ArrayDatum > *srcData, const int start_idx, const size_t numAppendElems, const size_t byteLimit, const bool replicating=false)
int8_t * start_pos
Definition: ChunkIter.h:33
int8_t * current_pos
Definition: ChunkIter.h:32
SQLTypeInfo type_info
Definition: ChunkIter.h:30
std::vector< std::string > * stringsPtr
Definition: sqltypes.h:150
std::vector< ArrayDatum > * arraysPtr
Definition: sqltypes.h:151
const ColumnDescriptor * column_desc_
Definition: Chunk.h:119
virtual size_t size() const =0
virtual int8_t * getMemoryPtr()=0
std::shared_ptr< ChunkMetadata > appendData(int8_t *&src_data, const size_t num_elems_to_append, const SQLTypeInfo &ti, const bool replicating=false, const int64_t offset=-1) override
#define UNREACHABLE()
Definition: Logger.h:241
HOST DEVICE int get_size() const
Definition: sqltypes.h:269
void getChunkBuffer(DataMgr *data_mgr, const ChunkKey &key, const MemoryLevel mem_level, const int deviceId=0, const size_t num_bytes=0, const size_t num_elems=0)
Definition: Chunk.cpp:58
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:267
bool is_varlen() const
Definition: sqltypes.h:431
size_t getNumElemsForBytesInsertData(const std::vector< std::string > *srcData, const int start_idx, const size_t numAppendElems, const size_t byteLimit, const bool replicating=false)
int32_t StringOffsetT
Definition: sqltypes.h:867
int8_t * end_pos
Definition: ChunkIter.h:34
size_t num_elems
Definition: ChunkIter.h:37
AbstractBuffer * buffer_
Definition: Chunk.h:117
std::shared_ptr< ChunkMetadata > appendData(int8_t *&src_data, const size_t num_elems_to_append, const SQLTypeInfo &ti, const bool replicating=false, const int64_t offset=-1) override
void unpinBuffer()
Definition: Chunk.cpp:215
bool isChunkOnDevice(DataMgr *data_mgr, const ChunkKey &key, const MemoryLevel mem_level, const int device_id)
Definition: Chunk.cpp:40
static std::shared_ptr< Chunk > getChunk(const ColumnDescriptor *cd, DataMgr *data_mgr, const ChunkKey &key, const MemoryLevel mem_level, const int deviceId, const size_t num_bytes, const size_t num_elems)
Definition: Chunk.cpp:28
specifies the content in-memory of a row in the column metadata table
void setIndexBuffer(AbstractBuffer *buf)
int skip_size
Definition: ChunkIter.h:36
Definition: sqltypes.h:54
std::shared_ptr< ChunkMetadata > appendData(int8_t *&src_data, const size_t num_elems_to_append, const SQLTypeInfo &ti, const bool replicating=false, const int64_t offset=-1) override
bool isBufferOnDevice(const ChunkKey &key, const MemoryLevel memLevel, const int deviceId)
Definition: DataMgr.cpp:406
AbstractBuffer * getChunkBuffer(const ChunkKey &key, const MemoryLevel memoryLevel, const int deviceId=0, const size_t numBytes=0)
Definition: DataMgr.cpp:434
ChunkIter begin_iterator(const std::shared_ptr< ChunkMetadata > &, int start_idx=0, int skip=1) const
Definition: Chunk.cpp:259
int8_t * second_buf
Definition: ChunkIter.h:31
Definition: sqltypes.h:43
void initEncoder()
Definition: Chunk.cpp:224
unencoded fixed length array encoder
int skip
Definition: ChunkIter.h:35
#define CHECK(condition)
Definition: Logger.h:197
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:259
std::vector< int > ChunkKey
Definition: types.h:35
bool is_fixlen_array() const
Definition: sqltypes.h:426
void setIndexBuffer(AbstractBuffer *buf)
For unencoded strings.
AbstractBuffer * createChunkBuffer(const ChunkKey &key, const MemoryLevel memoryLevel, const int deviceId=0, const size_t page_size=0)
Definition: DataMgr.cpp:425
size_t getNumElemsForBytesInsertData(const DataBlockPtr &src_data, const size_t num_elems, const size_t start_idx, const size_t byte_limit, const bool replicating=false)
Definition: Chunk.cpp:128
void createChunkBuffer(DataMgr *data_mgr, const ChunkKey &key, const MemoryLevel mem_level, const int deviceId=0, const size_t page_size=0)
Definition: Chunk.cpp:110
SQLTypeInfo columnType
AbstractBuffer * index_buf_
Definition: Chunk.h:118
int8_t * numbersPtr
Definition: sqltypes.h:149
unencoded array encoder
size_t getNumElemsForBytesInsertData(const std::vector< ArrayDatum > *srcData, const int start_idx, const size_t numAppendElems, const size_t byteLimit, const bool replicating=false)
std::unique_ptr< Encoder > encoder
std::shared_ptr< ChunkMetadata > appendData(DataBlockPtr &srcData, const size_t numAppendElems, const size_t startIdx, const bool replicating=false)
Definition: Chunk.cpp:171