OmniSciDB  c1a53651b2
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ChunkIter.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /*
18  * @file ChunkIter.cpp
19  * @brief
20  *
21  */
22 
23 #include "ChunkIter.h"
24 
25 #include <cstdlib>
26 
27 DEVICE static void decompress(const SQLTypeInfo& ti,
28  int8_t* compressed,
30  Datum* datum) {
31  switch (ti.get_type()) {
32  case kSMALLINT:
33  result->length = sizeof(int16_t);
34  result->pointer = (int8_t*)&datum->smallintval;
35  switch (ti.get_compression()) {
36  case kENCODING_FIXED:
37  assert(ti.get_comp_param() == 8);
38  datum->smallintval = (int16_t) * (int8_t*)compressed;
39  break;
40  case kENCODING_RL:
41  case kENCODING_DIFF:
42  case kENCODING_SPARSE:
43  assert(false);
44  break;
45  default:
46  assert(false);
47  }
48  break;
49  case kINT:
50  result->length = sizeof(int32_t);
51  result->pointer = (int8_t*)&datum->intval;
52  switch (ti.get_compression()) {
53  case kENCODING_FIXED:
54  switch (ti.get_comp_param()) {
55  case 8:
56  datum->intval = (int32_t) * (int8_t*)compressed;
57  break;
58  case 16:
59  datum->intval = (int32_t) * (int16_t*)compressed;
60  break;
61  default:
62  assert(false);
63  }
64  break;
65  case kENCODING_RL:
66  case kENCODING_DIFF:
67  case kENCODING_SPARSE:
68  assert(false);
69  break;
70  default:
71  assert(false);
72  }
73  break;
74  case kBIGINT:
75  case kNUMERIC:
76  case kDECIMAL:
77  result->length = sizeof(int64_t);
78  result->pointer = (int8_t*)&datum->bigintval;
79  switch (ti.get_compression()) {
80  case kENCODING_FIXED:
81  switch (ti.get_comp_param()) {
82  case 8:
83  datum->bigintval = (int64_t) * (int8_t*)compressed;
84  break;
85  case 16:
86  datum->bigintval = (int64_t) * (int16_t*)compressed;
87  break;
88  case 32:
89  datum->bigintval = (int64_t) * (int32_t*)compressed;
90  break;
91  default:
92  assert(false);
93  }
94  break;
95  case kENCODING_RL:
96  case kENCODING_DIFF:
97  case kENCODING_SPARSE:
98  assert(false);
99  break;
100  default:
101  assert(false);
102  }
103  break;
104  case kTIME:
105  case kTIMESTAMP:
106  case kDATE:
107  switch (ti.get_compression()) {
108  case kENCODING_FIXED:
109  datum->bigintval = (int64_t) * (int32_t*)compressed;
110  break;
112  switch (ti.get_comp_param()) {
113  case 0:
114  case 32:
115  datum->bigintval = (int64_t) * (int32_t*)compressed;
116  break;
117  case 16:
118  datum->bigintval = (int64_t) * (int16_t*)compressed;
119  break;
120  default:
121  assert(false);
122  break;
123  }
124  break;
125  case kENCODING_RL:
126  case kENCODING_DIFF:
127  case kENCODING_DICT:
128  case kENCODING_SPARSE:
129  case kENCODING_NONE:
130  assert(false);
131  break;
132  default:
133  assert(false);
134  }
135  result->length = sizeof(int64_t);
136  result->pointer = (int8_t*)&datum->bigintval;
137  break;
138  default:
139  assert(false);
140  }
141  result->is_null = ti.is_null(*datum);
142 }
143 
145  it->current_pos = it->start_pos;
146 }
147 
149  bool uncompress,
151  bool* is_end) {
152  if (it->current_pos >= it->end_pos) {
153  *is_end = true;
154  result->length = 0;
155  result->pointer = NULL;
156  result->is_null = true;
157  return;
158  }
159  *is_end = false;
160 
161  if (it->skip_size > 0) {
162  // for fixed-size
163  if (uncompress && (it->type_info.get_compression() != kENCODING_NONE)) {
164  decompress(it->type_info, it->current_pos, result, &it->datum);
165  } else {
166  result->length = static_cast<size_t>(it->skip_size);
167  result->pointer = it->current_pos;
168  result->is_null = it->type_info.is_null(result->pointer);
169  }
170  it->current_pos += it->skip * it->skip_size;
171  } else {
172  StringOffsetT offset = *(StringOffsetT*)it->current_pos;
173  result->length = static_cast<size_t>(*((StringOffsetT*)it->current_pos + 1) - offset);
174  result->pointer = it->second_buf + offset;
175  // @TODO(wei) treat zero length as null for now
176  result->is_null = (result->length == 0);
177  it->current_pos += it->skip * sizeof(StringOffsetT);
178  }
179 }
180 
181 // @brief get nth element in Chunk. Does not change ChunkIter state
183  int n,
184  bool uncompress,
186  bool* is_end) {
187  if (static_cast<size_t>(n) >= it->num_elems || n < 0) {
188  *is_end = true;
189  result->length = 0;
190  result->pointer = NULL;
191  result->is_null = true;
192  return;
193  }
194  *is_end = false;
195 
196  if (it->skip_size > 0) {
197  // for fixed-size
198  int8_t* current_pos = it->start_pos + n * it->skip_size;
199  if (uncompress && (it->type_info.get_compression() != kENCODING_NONE)) {
200  decompress(it->type_info, current_pos, result, &it->datum);
201  } else {
202  result->length = static_cast<size_t>(it->skip_size);
203  result->pointer = current_pos;
204  result->is_null = it->type_info.is_null(result->pointer);
205  }
206  } else {
207  int8_t* current_pos = it->start_pos + n * sizeof(StringOffsetT);
208  StringOffsetT offset = *(StringOffsetT*)current_pos;
209  result->length = static_cast<size_t>(*((StringOffsetT*)current_pos + 1) - offset);
210  result->pointer = it->second_buf + offset;
211  // @TODO(wei) treat zero length as null for now
212  result->is_null = (result->length == 0);
213  }
214 }
215 
216 // @brief get nth element in Chunk. Does not change ChunkIter state
217 DEVICE void ChunkIter_get_nth(ChunkIter* it, int n, ArrayDatum* result, bool* is_end) {
219  VarlenArray_get_nth(reinterpret_cast<int8_t*>(it), n, result, is_end);
220  return;
221  }
222  if (static_cast<size_t>(n) >= it->num_elems || n < 0) {
223  *is_end = true;
224  result->length = 0;
225  result->pointer = NULL;
226  result->is_null = true;
227  return;
228  }
229  *is_end = false;
230 
231  if (it->skip_size > 0) {
232  // for fixed-size
233  int8_t* current_pos = it->start_pos + n * it->skip_size;
234  result->length = static_cast<size_t>(it->skip_size);
235  result->pointer = current_pos;
236  bool is_null = false;
237  if (!it->type_info.get_notnull()) {
238  // Nulls can only be recognized when iterating over a !notnull-typed chunk
239  is_null = it->type_info.is_null_fixlen_array(result->pointer, result->length);
240  }
241  result->is_null = is_null;
242  } else {
243  int8_t* current_pos = it->start_pos + n * sizeof(ArrayOffsetT);
244  int8_t* next_pos = current_pos + sizeof(ArrayOffsetT);
245  ArrayOffsetT offset = *(ArrayOffsetT*)current_pos;
246  ArrayOffsetT next_offset = *(ArrayOffsetT*)next_pos;
247  if (next_offset < 0) { // Encoded NULL array
248  result->length = 0;
249  result->pointer = NULL;
250  result->is_null = true;
251  } else {
252  if (offset < 0) {
253  offset = -offset; // Previous array may have been NULL, remove negativity
254  }
255  result->length = static_cast<size_t>(next_offset - offset);
256  result->pointer = it->second_buf + offset;
257  result->is_null = false;
258  }
259  }
260 }
261 
262 // @brief get nth varlen array element in Chunk. Does not change ChunkIter state
264  int n,
266  bool* is_end) {
268  VarlenArray_get_nth(reinterpret_cast<int8_t*>(it), n, result, is_end);
269  return;
270  }
271  *is_end = (static_cast<size_t>(n) >= it->num_elems || n < 0);
272 
273  if (!*is_end) {
274  int8_t* current_pos = it->start_pos + n * sizeof(ArrayOffsetT);
275  int8_t* next_pos = current_pos + sizeof(ArrayOffsetT);
276  ArrayOffsetT offset = *(ArrayOffsetT*)current_pos;
277  ArrayOffsetT next_offset = *(ArrayOffsetT*)next_pos;
278 
279  if (next_offset >= 0) {
280  // Previous array may have been NULL, remove offset negativity
281  if (offset < 0) {
282  offset = -offset;
283  }
284  result->length = static_cast<size_t>(next_offset - offset);
285  result->pointer = it->second_buf + offset;
286  result->is_null = false;
287  return;
288  }
289  }
290  // Encoded NULL array or out of bounds
291  result->length = 0;
292  result->pointer = NULL;
293  result->is_null = true;
294 }
295 
296 // @brief get nth varlen notnull array element in Chunk. Does not change ChunkIter state
298  int n,
300  bool* is_end) {
301  *is_end = (static_cast<size_t>(n) >= it->num_elems || n < 0);
302 
303  int8_t* current_pos = it->start_pos + n * sizeof(ArrayOffsetT);
304  int8_t* next_pos = current_pos + sizeof(ArrayOffsetT);
305  ArrayOffsetT offset = *(ArrayOffsetT*)current_pos;
306  ArrayOffsetT next_offset = *(ArrayOffsetT*)next_pos;
307 
308  result->length = static_cast<size_t>(next_offset - offset);
309  result->pointer = it->second_buf + offset;
310  result->is_null = false;
311 }
312 
313 // @brief get nth point coord array in Chunk. Does not change ChunkIter state
314 // Custom iterator for point coord arrays:
315 // int8_t[16] representing uncompressed double[2] coords
316 // int8_t[8] representing 32-bit compressed int32_t[2] coords
318  int n,
320  bool* is_end) {
321  if (static_cast<size_t>(n) >= it->num_elems || n < 0) {
322  *is_end = true;
323  result->length = 0;
324  result->pointer = NULL;
325  result->is_null = true;
326  return;
327  }
328  *is_end = false;
329 
330  assert(it->skip_size > 0);
331  int8_t* current_pos = it->start_pos + n * it->skip_size;
332  result->length = static_cast<size_t>(it->skip_size);
333  result->pointer = current_pos;
334  bool is_null = false;
335  if (!it->type_info.get_notnull()) {
336  // Nulls can only be recognized when iterating over a !notnull-typed chunk
337  is_null = it->type_info.is_null_point_coord_array(result->pointer, result->length);
338  }
339  result->is_null = is_null;
340 }
int8_t * start_pos
Definition: ChunkIter.h:34
int8_t * current_pos
Definition: ChunkIter.h:33
SQLTypeInfo type_info
Definition: ChunkIter.h:31
Definition: sqltypes.h:66
static DEVICE void decompress(const SQLTypeInfo &ti, int8_t *compressed, VarlenDatum *result, Datum *datum)
Definition: ChunkIter.cpp:27
Datum datum
Definition: ChunkIter.h:39
bool is_null
Definition: Datum.h:55
DEVICE void ChunkIter_get_nth_point_coords(ChunkIter *it, int n, ArrayDatum *result, bool *is_end)
Definition: ChunkIter.cpp:317
HOST DEVICE bool is_null_fixlen_array(const int8_t *val, int array_size) const
Definition: sqltypes.h:916
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:381
DEVICE void ChunkIter_get_nth(ChunkIter *it, int n, bool uncompress, VarlenDatum *result, bool *is_end)
Definition: ChunkIter.cpp:182
int32_t intval
Definition: Datum.h:71
int8_t * pointer
Definition: Datum.h:54
int32_t StringOffsetT
Definition: sqltypes.h:1258
#define DEVICE
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:219
int8_t * end_pos
Definition: ChunkIter.h:35
size_t num_elems
Definition: ChunkIter.h:38
CONSTEXPR DEVICE bool is_null(const T &value)
int64_t bigintval
Definition: Datum.h:72
int16_t smallintval
Definition: Datum.h:70
DEVICE void ChunkIter_get_nth_varlen(ChunkIter *it, int n, ArrayDatum *result, bool *is_end)
Definition: ChunkIter.cpp:263
HOST DEVICE bool is_null(const Datum &d) const
Definition: sqltypes.h:854
int skip_size
Definition: ChunkIter.h:37
Definition: sqltypes.h:70
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:389
void ChunkIter_reset(ChunkIter *it)
Definition: ChunkIter.cpp:144
int32_t ArrayOffsetT
Definition: sqltypes.h:1259
int8_t * second_buf
Definition: ChunkIter.h:32
HOST DEVICE int get_comp_param() const
Definition: sqltypes.h:392
DEVICE void ChunkIter_get_next(ChunkIter *it, bool uncompress, VarlenDatum *result, bool *is_end)
Definition: ChunkIter.cpp:148
int skip
Definition: ChunkIter.h:36
Definition: sqltypes.h:62
constexpr double n
Definition: Utm.h:38
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:388
HOST static DEVICE bool isFlatBuffer(const void *buffer)
Definition: FlatBuffer.h:186
Definition: Datum.h:67
DEVICE void ChunkIter_get_nth_varlen_notnull(ChunkIter *it, int n, ArrayDatum *result, bool *is_end)
Definition: ChunkIter.cpp:297
HOST DEVICE bool is_null_point_coord_array(const int8_t *val, int array_size) const
Definition: sqltypes.h:949
DEVICE void VarlenArray_get_nth(int8_t *buf, int n, ArrayDatum *result, bool *is_end)
Definition: sqltypes.h:1503
size_t length
Definition: Datum.h:53