OmniSciDB  dfae7c3b14
JoinColumnIterator.h
Go to the documentation of this file.
1 /*
2  * Copyright 2019 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
21  const JoinColumn* join_column; // WARNING: pointer might be on GPU
22  const JoinColumnTypeInfo* type_info; // WARNING: pointer might be on GPU
23  const struct JoinChunk* join_chunk_array;
24  const int8_t* chunk_data; // bool(chunk_data) tells if this iterator is valid
27  size_t index;
28  size_t start;
29  size_t step;
30 
31  DEVICE FORCE_INLINE operator bool() const { return chunk_data; }
32 
33  DEVICE FORCE_INLINE const int8_t* ptr() const {
34  return &chunk_data[index_inside_chunk * join_column->elem_sz];
35  }
36 
38  switch (type_info->column_type) {
39  case SmallDate:
41  chunk_data,
42  type_info->elem_sz,
43  type_info->elem_sz == 4 ? NULL_INT : NULL_SMALLINT,
44  type_info->elem_sz == 4 ? NULL_INT : NULL_SMALLINT,
46  case Signed:
49  case Unsigned:
52  case Double:
54  default:
55 #ifndef __CUDACC__
56  CHECK(false);
57 #else
58  assert(0);
59 #endif
60  return 0;
61  }
62  }
63 
64  struct IndexedElement {
65  size_t index;
66  int64_t element;
67  }; // struct IndexedElement
68 
70  return {index, getElementSwitch()};
71  }
72 
74  index += step;
75  index_inside_chunk += step;
76  while (chunk_data &&
77  index_inside_chunk >= join_chunk_array[index_of_chunk].num_elems) {
78  index_inside_chunk -= join_chunk_array[index_of_chunk].num_elems;
80  if (index_of_chunk < join_column->num_chunks) {
81  chunk_data = join_chunk_array[index_of_chunk].col_buff;
82  } else {
83  chunk_data = nullptr;
84  }
85  }
86  return *this;
87  }
88 
89  DEVICE JoinColumnIterator() : chunk_data(nullptr) {}
90 
92  const JoinColumn* join_column, // WARNING: pointer might be on GPU
93  const JoinColumnTypeInfo* type_info, // WARNING: pointer might be on GPU
94  size_t start,
95  size_t step)
96  : join_column(join_column)
97  , type_info(type_info)
98  , join_chunk_array(
99  reinterpret_cast<const struct JoinChunk*>(join_column->col_chunks_buff))
100  , chunk_data(join_column->num_elems > 0 ? join_chunk_array->col_buff : nullptr)
101  , index_of_chunk(0)
102  , index_inside_chunk(0)
103  , index(0)
104  , start(start)
105  , step(step) {
106  // Stagger the index differently for each thread iterating over the column.
107  auto temp = this->step;
108  this->step = this->start;
109  operator++();
110  this->step = temp;
111  }
112 }; // struct JoinColumnIterator
113 
117  // NOTE(sy): Someday we might want to merge JoinColumnTypeInfo into JoinColumn but
118  // this class is a good enough solution for now until we have time to do more cleanup.
119  const struct JoinColumn* join_column;
121 
123  return JoinColumnIterator(join_column, type_info, 0, 1);
124  }
125 
127 
128  struct Slice {
130  size_t start;
131  size_t step;
132 
134  return JoinColumnIterator(
135  join_column_typed->join_column, join_column_typed->type_info, start, step);
136  }
137 
139 
140  }; // struct Slice
141 
142  DEVICE Slice slice(size_t start, size_t step) { return Slice{this, start, step}; }
143 
144 }; // struct JoinColumnTyped
145 
148  // NOTE(sy): Someday we'd prefer to JIT compile this iterator, producing faster,
149  // custom, code for each combination of column types encountered at runtime.
150 
151  size_t num_cols;
153 
154  // NOTE(sy): Are these multiple iterator instances (one per column) required when
155  // we are always pointing to the same row in all N columns? Yes they are required,
156  // if the chunk sizes can be different from column to column. I don't know if they
157  // can or can't, so this code plays it safe for now.
158 
159  DEVICE JoinColumnTupleIterator() : num_cols(0) {}
160 
162  const JoinColumn* join_column_per_key,
163  const JoinColumnTypeInfo* type_info_per_key,
164  size_t start,
165  size_t step)
166  : num_cols(num_cols) {
167 #ifndef __CUDACC__
169 #else
170  assert(num_cols <= g_maximum_conditions_to_coalesce);
171 #endif
172  for (size_t i = 0; i < num_cols; ++i) {
173  join_column_iterators[i] =
174  JoinColumnIterator(&join_column_per_key[i],
175  type_info_per_key ? &type_info_per_key[i] : nullptr,
176  start,
177  step);
178  }
179  }
180 
181  DEVICE FORCE_INLINE operator bool() const {
182  for (size_t i = 0; i < num_cols; ++i) {
183  if (join_column_iterators[i]) {
184  return true;
185  // If any column iterator is still valid, then the tuple is still valid.
186  }
187  }
188  return false;
189  }
190 
192  for (size_t i = 0; i < num_cols; ++i) {
193  ++join_column_iterators[i];
194  }
195  return *this;
196  }
197 
199 }; // struct JoinColumnTupleIterator
200 
204  size_t num_cols;
207 
209  : num_cols(0), join_column_per_key(nullptr), type_info_per_key(nullptr) {}
210 
211  DEVICE JoinColumnTuple(size_t num_cols,
212  const JoinColumn* join_column_per_key,
213  const JoinColumnTypeInfo* type_info_per_key)
214  : num_cols(num_cols)
215  , join_column_per_key(join_column_per_key)
216  , type_info_per_key(type_info_per_key) {}
217 
220  num_cols, join_column_per_key, type_info_per_key, 0, 1);
221  }
222 
224 
225  struct Slice {
227  size_t start;
228  size_t step;
229 
231  return JoinColumnTupleIterator(join_column_tuple->num_cols,
232  join_column_tuple->join_column_per_key,
233  join_column_tuple->type_info_per_key,
234  start,
235  step);
236  }
237 
239 
240  }; // struct Slice
241 
242  DEVICE Slice slice(size_t start, size_t step) { return Slice{this, start, step}; }
243 
244 }; // struct JoinColumnTuple
const int8_t * chunk_data
const ColumnType column_type
DEVICE Slice slice(size_t start, size_t step)
DEVICE JoinColumnTupleIterator(size_t num_cols, const JoinColumn *join_column_per_key, const JoinColumnTypeInfo *type_info_per_key, size_t start, size_t step)
DEVICE FORCE_INLINE JoinColumnTupleIterator & operator*()
DEVICE FORCE_INLINE JoinColumnTupleIterator & operator++()
DEVICE JoinColumnTuple(size_t num_cols, const JoinColumn *join_column_per_key, const JoinColumnTypeInfo *type_info_per_key)
DEVICE NEVER_INLINE int64_t SUFFIX() fixed_width_int_decode_noinline(const int8_t *byte_stream, const int32_t byte_width, const int64_t pos)
Definition: DecodersImpl.h:83
DEVICE JoinColumnIterator end()
const size_t elem_sz
DEVICE FORCE_INLINE int64_t getElementSwitch() const
#define SUFFIX(name)
const struct JoinColumnTypeInfo * type_info
DEVICE JoinColumnTupleIterator begin()
const int8_t * col_buff
JoinColumnTyped * join_column_typed
Iterates over the rows of a JoinColumnTuple across multiple fragments/chunks.
#define DEVICE
DEVICE FORCE_INLINE const int8_t * ptr() const
JoinColumnTuple * join_column_tuple
Iterates over the rows of a JoinColumn across multiple fragments/chunks.
DEVICE JoinColumnIterator end()
DEVICE JoinColumnIterator begin()
DEVICE JoinColumnIterator begin()
const JoinColumn * join_column
DEVICE NEVER_INLINE int64_t SUFFIX() fixed_width_unsigned_decode_noinline(const int8_t *byte_stream, const int32_t byte_width, const int64_t pos)
Definition: DecodersImpl.h:90
#define NULL_INT
Definition: sqltypes.h:183
const struct JoinChunk * join_chunk_array
DEVICE JoinColumnIterator(const JoinColumn *join_column, const JoinColumnTypeInfo *type_info, size_t start, size_t step)
const struct JoinColumn * join_column
DEVICE FORCE_INLINE JoinColumnIterator & operator++()
DEVICE JoinColumnTupleIterator end()
DEVICE NEVER_INLINE int64_t SUFFIX() fixed_width_small_date_decode_noinline(const int8_t *byte_stream, const int32_t byte_width, const int32_t null_val, const int64_t ret_null_val, const int64_t pos)
Definition: DecodersImpl.h:141
#define CHECK_LE(x, y)
Definition: Logger.h:208
#define FORCE_INLINE
const JoinColumnTypeInfo * type_info_per_key
DEVICE JoinColumnTupleIterator begin()
DEVICE FORCE_INLINE IndexedElement operator*() const
DEVICE Slice slice(size_t start, size_t step)
#define NULL_SMALLINT
Definition: sqltypes.h:182
const JoinColumn * join_column_per_key
DEVICE JoinColumnTupleIterator end()
#define CHECK(condition)
Definition: Logger.h:197
DEVICE NEVER_INLINE double SUFFIX() fixed_width_double_decode_noinline(const int8_t *byte_stream, const int64_t pos)
Definition: DecodersImpl.h:126
const JoinColumnTypeInfo * type_info
size_t num_elems
const size_t g_maximum_conditions_to_coalesce