OmniSciDB  340b00dbf6
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
Chunk.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2020 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /*
18  * @file Chunk.cpp
19  * @author Wei Hong <wei@mapd.com>
20  */
21 
22 #include "DataMgr/Chunk/Chunk.h"
26 
27 namespace Chunk_NS {
28 std::shared_ptr<Chunk> Chunk::getChunk(const ColumnDescriptor* cd,
29  DataMgr* data_mgr,
30  const ChunkKey& key,
31  const MemoryLevel memoryLevel,
32  const int deviceId,
33  const size_t numBytes,
34  const size_t numElems) {
35  std::shared_ptr<Chunk> chunkp = std::make_shared<Chunk>(Chunk(cd));
36  chunkp->getChunkBuffer(data_mgr, key, memoryLevel, deviceId, numBytes, numElems);
37  return chunkp;
38 }
39 
41  const ChunkKey& key,
42  const MemoryLevel mem_level,
43  const int device_id) {
46  ChunkKey subKey = key;
47  ChunkKey indexKey(subKey);
48  indexKey.push_back(1);
49  ChunkKey dataKey(subKey);
50  dataKey.push_back(2);
51  return data_mgr->isBufferOnDevice(indexKey, mem_level, device_id) &&
52  data_mgr->isBufferOnDevice(dataKey, mem_level, device_id);
53  } else {
54  return data_mgr->isBufferOnDevice(key, mem_level, device_id);
55  }
56 }
57 
59  const ChunkKey& key,
60  const MemoryLevel mem_level,
61  const int device_id,
62  const size_t num_bytes,
63  const size_t num_elems) {
66  ChunkKey subKey = key;
67  subKey.push_back(1); // 1 for the main buffer_
68  buffer_ = data_mgr->getChunkBuffer(subKey, mem_level, device_id, num_bytes);
69  subKey.pop_back();
70  subKey.push_back(2); // 2 for the index buffer_
71  index_buf_ = data_mgr->getChunkBuffer(
72  subKey,
73  mem_level,
74  device_id,
75  (num_elems + 1) * sizeof(StringOffsetT)); // always record n+1 offsets so string
76  // length can be calculated
77  switch (column_desc_->columnType.get_type()) {
78  case kARRAY: {
79  auto array_encoder = dynamic_cast<ArrayNoneEncoder*>(buffer_->getEncoder());
80  CHECK(array_encoder);
81  array_encoder->setIndexBuffer(index_buf_);
82  break;
83  }
84  case kTEXT:
85  case kVARCHAR:
86  case kCHAR: {
88  auto str_encoder = dynamic_cast<StringNoneEncoder*>(buffer_->getEncoder());
89  CHECK(str_encoder);
90  str_encoder->setIndexBuffer(index_buf_);
91  break;
92  }
93  case kPOINT:
94  case kLINESTRING:
95  case kPOLYGON:
96  case kMULTIPOLYGON: {
97  auto str_encoder = dynamic_cast<StringNoneEncoder*>(buffer_->getEncoder());
98  CHECK(str_encoder);
99  str_encoder->setIndexBuffer(index_buf_);
100  break;
101  }
102  default:
103  UNREACHABLE();
104  }
105  } else {
106  buffer_ = data_mgr->getChunkBuffer(key, mem_level, device_id, num_bytes);
107  }
108 }
109 
111  const ChunkKey& key,
112  const MemoryLevel mem_level,
113  const int device_id,
114  const size_t page_size) {
117  ChunkKey subKey = key;
118  subKey.push_back(1); // 1 for the main buffer_
119  buffer_ = data_mgr->createChunkBuffer(subKey, mem_level, device_id, page_size);
120  subKey.pop_back();
121  subKey.push_back(2); // 2 for the index buffer_
122  index_buf_ = data_mgr->createChunkBuffer(subKey, mem_level, device_id, page_size);
123  } else {
124  buffer_ = data_mgr->createChunkBuffer(key, mem_level, device_id, page_size);
125  }
126 }
127 
129  const size_t num_elems,
130  const size_t start_idx,
131  const size_t byte_limit,
132  const bool replicating) {
134  switch (column_desc_->columnType.get_type()) {
135  case kARRAY: {
136  if (column_desc_->columnType.get_size() > 0) {
137  FixedLengthArrayNoneEncoder* array_encoder =
139  return array_encoder->getNumElemsForBytesInsertData(
140  src_data.arraysPtr, start_idx, num_elems, byte_limit, replicating);
141  }
142  ArrayNoneEncoder* array_encoder =
143  dynamic_cast<ArrayNoneEncoder*>(buffer_->getEncoder());
144  return array_encoder->getNumElemsForBytesInsertData(
145  src_data.arraysPtr, start_idx, num_elems, byte_limit, replicating);
146  }
147  case kTEXT:
148  case kVARCHAR:
149  case kCHAR: {
151  StringNoneEncoder* str_encoder =
152  dynamic_cast<StringNoneEncoder*>(buffer_->getEncoder());
153  return str_encoder->getNumElemsForBytesInsertData(
154  src_data.stringsPtr, start_idx, num_elems, byte_limit, replicating);
155  }
156  case kPOINT:
157  case kLINESTRING:
158  case kPOLYGON:
159  case kMULTIPOLYGON: {
160  StringNoneEncoder* str_encoder =
161  dynamic_cast<StringNoneEncoder*>(buffer_->getEncoder());
162  return str_encoder->getNumElemsForBytesInsertData(
163  src_data.stringsPtr, start_idx, num_elems, byte_limit, replicating);
164  }
165  default:
166  CHECK(false);
167  return 0;
168  }
169 }
170 
171 std::shared_ptr<ChunkMetadata> Chunk::appendData(DataBlockPtr& src_data,
172  const size_t num_elems,
173  const size_t start_idx,
174  const bool replicating) {
175  const auto& ti = column_desc_->columnType;
176  if (ti.is_varlen()) {
177  switch (ti.get_type()) {
178  case kARRAY: {
179  if (ti.get_size() > 0) {
180  FixedLengthArrayNoneEncoder* array_encoder =
182  return array_encoder->appendData(
183  src_data.arraysPtr, start_idx, num_elems, replicating);
184  }
185  ArrayNoneEncoder* array_encoder =
186  dynamic_cast<ArrayNoneEncoder*>(buffer_->getEncoder());
187  return array_encoder->appendData(
188  src_data.arraysPtr, start_idx, num_elems, replicating);
189  }
190  case kTEXT:
191  case kVARCHAR:
192  case kCHAR: {
193  CHECK_EQ(kENCODING_NONE, ti.get_compression());
194  StringNoneEncoder* str_encoder =
195  dynamic_cast<StringNoneEncoder*>(buffer_->getEncoder());
196  return str_encoder->appendData(
197  src_data.stringsPtr, start_idx, num_elems, replicating);
198  }
199  case kPOINT:
200  case kLINESTRING:
201  case kPOLYGON:
202  case kMULTIPOLYGON: {
203  StringNoneEncoder* str_encoder =
204  dynamic_cast<StringNoneEncoder*>(buffer_->getEncoder());
205  return str_encoder->appendData(
206  src_data.stringsPtr, start_idx, num_elems, replicating);
207  }
208  default:
209  CHECK(false);
210  }
211  }
212  return buffer_->getEncoder()->appendData(
213  src_data.numbersPtr, num_elems, ti, replicating);
214 }
215 
217  if (buffer_) {
218  buffer_->unPin();
219  }
220  if (index_buf_) {
221  index_buf_->unPin();
222  }
223 }
224 
229  switch (column_desc_->columnType.get_type()) {
230  case kARRAY: {
231  ArrayNoneEncoder* array_encoder =
232  dynamic_cast<ArrayNoneEncoder*>(buffer_->getEncoder());
233  array_encoder->setIndexBuffer(index_buf_);
234  break;
235  }
236  case kTEXT:
237  case kVARCHAR:
238  case kCHAR: {
240  StringNoneEncoder* str_encoder =
241  dynamic_cast<StringNoneEncoder*>(buffer_->getEncoder());
242  str_encoder->setIndexBuffer(index_buf_);
243  break;
244  }
245  case kPOINT:
246  case kLINESTRING:
247  case kPOLYGON:
248  case kMULTIPOLYGON: {
249  StringNoneEncoder* str_encoder =
250  dynamic_cast<StringNoneEncoder*>(buffer_->getEncoder());
251  str_encoder->setIndexBuffer(index_buf_);
252  break;
253  }
254  default:
255  CHECK(false);
256  }
257  }
258 }
259 
260 ChunkIter Chunk::begin_iterator(const std::shared_ptr<ChunkMetadata>& chunk_metadata,
261  int start_idx,
262  int skip) const {
263  ChunkIter it;
265  it.skip = skip;
267  if (it.skip_size < 0) { // if it's variable length
268  it.current_pos = it.start_pos =
269  index_buf_->getMemoryPtr() + start_idx * sizeof(StringOffsetT);
272  } else {
273  it.current_pos = it.start_pos = buffer_->getMemoryPtr() + start_idx * it.skip_size;
274  it.end_pos = buffer_->getMemoryPtr() + buffer_->size();
275  it.second_buf = nullptr;
276  }
277  it.num_elems = chunk_metadata->numElements;
278  return it;
279 }
280 } // namespace Chunk_NS
#define CHECK_EQ(x, y)
Definition: Logger.h:205
std::vector< int > ChunkKey
Definition: types.h:37
size_t getNumElemsForBytesInsertData(const std::vector< ArrayDatum > *srcData, const int start_idx, const size_t numAppendElems, const size_t byteLimit, const bool replicating=false)
int8_t * start_pos
Definition: ChunkIter.h:33
HOST DEVICE int get_size() const
Definition: sqltypes.h:340
int8_t * current_pos
Definition: ChunkIter.h:32
SQLTypeInfo type_info
Definition: ChunkIter.h:30
std::vector< std::string > * stringsPtr
Definition: sqltypes.h:221
std::vector< ArrayDatum > * arraysPtr
Definition: sqltypes.h:222
const ColumnDescriptor * column_desc_
Definition: Chunk.h:119
bool is_varlen() const
Definition: sqltypes.h:506
virtual int8_t * getMemoryPtr()=0
std::shared_ptr< ChunkMetadata > appendData(int8_t *&src_data, const size_t num_elems_to_append, const SQLTypeInfo &ti, const bool replicating=false, const int64_t offset=-1) override
#define UNREACHABLE()
Definition: Logger.h:241
void getChunkBuffer(DataMgr *data_mgr, const ChunkKey &key, const MemoryLevel mem_level, const int deviceId=0, const size_t num_bytes=0, const size_t num_elems=0)
Definition: Chunk.cpp:58
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:330
void initEncoder(const SQLTypeInfo &tmp_sql_type)
size_t getNumElemsForBytesInsertData(const std::vector< std::string > *srcData, const int start_idx, const size_t numAppendElems, const size_t byteLimit, const bool replicating=false)
int32_t StringOffsetT
Definition: sqltypes.h:947
bool is_fixlen_array() const
Definition: sqltypes.h:497
int8_t * end_pos
Definition: ChunkIter.h:34
size_t num_elems
Definition: ChunkIter.h:37
AbstractBuffer * buffer_
Definition: Chunk.h:117
std::shared_ptr< ChunkMetadata > appendData(int8_t *&src_data, const size_t num_elems_to_append, const SQLTypeInfo &ti, const bool replicating=false, const int64_t offset=-1) override
void unpinBuffer()
Definition: Chunk.cpp:216
bool isChunkOnDevice(DataMgr *data_mgr, const ChunkKey &key, const MemoryLevel mem_level, const int device_id)
Definition: Chunk.cpp:40
static std::shared_ptr< Chunk > getChunk(const ColumnDescriptor *cd, DataMgr *data_mgr, const ChunkKey &key, const MemoryLevel mem_level, const int deviceId, const size_t num_bytes, const size_t num_elems)
Definition: Chunk.cpp:28
specifies the content in-memory of a row in the column metadata table
void setIndexBuffer(AbstractBuffer *buf)
int skip_size
Definition: ChunkIter.h:36
Definition: sqltypes.h:54
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:338
std::shared_ptr< ChunkMetadata > appendData(int8_t *&src_data, const size_t num_elems_to_append, const SQLTypeInfo &ti, const bool replicating=false, const int64_t offset=-1) override
bool isBufferOnDevice(const ChunkKey &key, const MemoryLevel memLevel, const int deviceId)
Definition: DataMgr.cpp:404
AbstractBuffer * getChunkBuffer(const ChunkKey &key, const MemoryLevel memoryLevel, const int deviceId=0, const size_t numBytes=0)
Definition: DataMgr.cpp:425
int8_t * second_buf
Definition: ChunkIter.h:31
Definition: sqltypes.h:43
void initEncoder()
Definition: Chunk.cpp:225
unencoded fixed length array encoder
int skip
Definition: ChunkIter.h:35
#define CHECK(condition)
Definition: Logger.h:197
void setIndexBuffer(AbstractBuffer *buf)
For unencoded strings.
AbstractBuffer * createChunkBuffer(const ChunkKey &key, const MemoryLevel memoryLevel, const int deviceId=0, const size_t page_size=0)
Definition: DataMgr.cpp:416
size_t getNumElemsForBytesInsertData(const DataBlockPtr &src_data, const size_t num_elems, const size_t start_idx, const size_t byte_limit, const bool replicating=false)
Definition: Chunk.cpp:128
ChunkIter begin_iterator(const std::shared_ptr< ChunkMetadata > &, int start_idx=0, int skip=1) const
Definition: Chunk.cpp:260
void createChunkBuffer(DataMgr *data_mgr, const ChunkKey &key, const MemoryLevel mem_level, const int deviceId=0, const size_t page_size=0)
Definition: Chunk.cpp:110
SQLTypeInfo columnType
AbstractBuffer * index_buf_
Definition: Chunk.h:118
int8_t * numbersPtr
Definition: sqltypes.h:220
unencoded array encoder
size_t getNumElemsForBytesInsertData(const std::vector< ArrayDatum > *srcData, const int start_idx, const size_t numAppendElems, const size_t byteLimit, const bool replicating=false)
std::shared_ptr< ChunkMetadata > appendData(DataBlockPtr &srcData, const size_t numAppendElems, const size_t startIdx, const bool replicating=false)
Definition: Chunk.cpp:171
virtual std::shared_ptr< ChunkMetadata > appendData(int8_t *&src_data, const size_t num_elems_to_append, const SQLTypeInfo &ti, const bool replicating=false, const int64_t offset=-1)=0