OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
JoinColumnIterator.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
21  const JoinColumn* join_column; // WARNING: pointer might be on GPU
22  const JoinColumnTypeInfo* type_info; // WARNING: pointer might be on GPU
23  const struct JoinChunk* join_chunk_array;
24  const int8_t* chunk_data; // bool(chunk_data) tells if this iterator is valid
27  size_t index;
28  size_t start;
29  size_t step;
30 
31  DEVICE FORCE_INLINE operator bool() const { return chunk_data; }
32 
33  DEVICE FORCE_INLINE const int8_t* ptr() const {
35  }
36 
38  switch (type_info->column_type) {
39  case SmallDate:
41  chunk_data,
46  case Signed:
49  case Unsigned:
52  case Double:
54  default:
55 #ifndef __CUDACC__
56  CHECK(false);
57 #else
58  assert(0);
59 #endif
60  return 0;
61  }
62  }
63 
64  struct IndexedElement {
65  size_t index;
66  int64_t element;
67  }; // struct IndexedElement
68 
70  return {index, getElementSwitch()};
71  }
72 
74  index += step;
76  while (chunk_data &&
80  if (index_of_chunk < join_column->num_chunks) {
82  } else {
83  chunk_data = nullptr;
84  }
85  }
86  return *this;
87  }
88 
90  index -= step;
92  while (chunk_data &&
96  if (index_of_chunk < join_column->num_chunks) {
98  } else {
99  chunk_data = nullptr;
100  }
101  }
102  return *this;
103  }
104 
106 
108  const JoinColumn* join_column, // WARNING: pointer might be on GPU
109  const JoinColumnTypeInfo* type_info, // WARNING: pointer might be on GPU
110  size_t start,
111  size_t step)
112  : join_column(join_column)
113  , type_info(type_info)
115  reinterpret_cast<const struct JoinChunk*>(join_column->col_chunks_buff))
116  , chunk_data(join_column->num_elems > 0 ? join_chunk_array->col_buff : nullptr)
117  , index_of_chunk(0)
118  , index_inside_chunk(0)
119  , index(0)
120  , start(start)
121  , step(step) {
122  // Stagger the index differently for each thread iterating over the column.
123  auto temp = this->step;
124  this->step = this->start;
125  operator++();
126  this->step = temp;
127  }
128 }; // struct JoinColumnIterator
129 
133  // NOTE(sy): Someday we might want to merge JoinColumnTypeInfo into JoinColumn but
134  // this class is a good enough solution for now until we have time to do more cleanup.
135  const struct JoinColumn* join_column;
137 
140  }
141 
143 
144  struct Slice {
146  size_t start;
147  size_t step;
148 
150  return JoinColumnIterator(
152  }
153 
155 
156  }; // struct Slice
157 
158  DEVICE Slice slice(size_t start, size_t step) { return Slice{this, start, step}; }
159 
160 }; // struct JoinColumnTyped
161 
164  // NOTE(sy): Someday we'd prefer to JIT compile this iterator, producing faster,
165  // custom, code for each combination of column types encountered at runtime.
166 
167  size_t num_cols;
169 
170  // NOTE(sy): Are these multiple iterator instances (one per column) required when
171  // we are always pointing to the same row in all N columns? Yes they are required,
172  // if the chunk sizes can be different from column to column. I don't know if they
173  // can or can't, so this code plays it safe for now.
174 
176 
178  const JoinColumn* join_column_per_key,
179  const JoinColumnTypeInfo* type_info_per_key,
180  size_t start,
181  size_t step)
182  : num_cols(num_cols) {
183  for (size_t i = 0; i < num_cols; ++i) {
185  JoinColumnIterator(&join_column_per_key[i],
186  type_info_per_key ? &type_info_per_key[i] : nullptr,
187  start,
188  step);
189  }
190  }
191 
192  DEVICE FORCE_INLINE operator bool() const {
193  for (size_t i = 0; i < num_cols; ++i) {
194  if (join_column_iterators[i]) {
195  return true;
196  // If any column iterator is still valid, then the tuple is still valid.
197  }
198  }
199  return false;
200  }
201 
203  for (size_t i = 0; i < num_cols; ++i) {
205  }
206  return *this;
207  }
208 
210 }; // struct JoinColumnTupleIterator
211 
215  size_t num_cols;
218 
220  : num_cols(0), join_column_per_key(nullptr), type_info_per_key(nullptr) {}
221 
225  : num_cols(num_cols)
226  , join_column_per_key(join_column_per_key)
227  , type_info_per_key(type_info_per_key) {}
228 
232  }
233 
235 
236  struct Slice {
238  size_t start;
239  size_t step;
240 
245  start,
246  step);
247  }
248 
250 
251  }; // struct Slice
252 
253  DEVICE Slice slice(size_t start, size_t step) { return Slice{this, start, step}; }
254 
255 }; // struct JoinColumnTuple
const int8_t * chunk_data
const ColumnType column_type
DEVICE Slice slice(size_t start, size_t step)
struct JoinChunk * join_chunk_array
DEVICE JoinColumnTupleIterator(size_t num_cols, const JoinColumn *join_column_per_key, const JoinColumnTypeInfo *type_info_per_key, size_t start, size_t step)
DEVICE FORCE_INLINE JoinColumnTupleIterator & operator*()
DEVICE FORCE_INLINE JoinColumnTupleIterator & operator++()
DEVICE JoinColumnTuple(size_t num_cols, const JoinColumn *join_column_per_key, const JoinColumnTypeInfo *type_info_per_key)
DEVICE NEVER_INLINE int64_t SUFFIX() fixed_width_int_decode_noinline(const int8_t *byte_stream, const int32_t byte_width, const int64_t pos)
Definition: DecodersImpl.h:91
DEVICE JoinColumnIterator end()
const size_t elem_sz
DEVICE FORCE_INLINE JoinColumnIterator & operator--()
#define SUFFIX(name)
DEVICE JoinColumnTupleIterator begin()
const int8_t * col_buff
JoinColumnTyped * join_column_typed
Iterates over the rows of a JoinColumnTuple across multiple fragments/chunks.
#define NULL_INT
struct JoinColumnTypeInfo * type_info
#define DEVICE
JoinColumnIterator join_column_iterators[g_maximum_conditions_to_coalesce]
JoinColumnTuple * join_column_tuple
Iterates over the rows of a JoinColumn across multiple fragments/chunks.
DEVICE JoinColumnIterator end()
DEVICE JoinColumnIterator begin()
DEVICE JoinColumnIterator begin()
const JoinColumn * join_column
DEVICE NEVER_INLINE int64_t SUFFIX() fixed_width_unsigned_decode_noinline(const int8_t *byte_stream, const int32_t byte_width, const int64_t pos)
Definition: DecodersImpl.h:98
DEVICE FORCE_INLINE int64_t getElementSwitch() const
DEVICE JoinColumnIterator(const JoinColumn *join_column, const JoinColumnTypeInfo *type_info, size_t start, size_t step)
DEVICE FORCE_INLINE JoinColumnIterator & operator++()
DEVICE JoinColumnTupleIterator end()
DEVICE NEVER_INLINE int64_t SUFFIX() fixed_width_small_date_decode_noinline(const int8_t *byte_stream, const int32_t byte_width, const int32_t null_val, const int64_t ret_null_val, const int64_t pos)
Definition: DecodersImpl.h:149
#define FORCE_INLINE
const JoinColumnTypeInfo * type_info_per_key
DEVICE JoinColumnTupleIterator begin()
DEVICE Slice slice(size_t start, size_t step)
const JoinColumn * join_column_per_key
DEVICE JoinColumnTupleIterator end()
#define CHECK(condition)
Definition: Logger.h:291
#define NULL_SMALLINT
DEVICE NEVER_INLINE double SUFFIX() fixed_width_double_decode_noinline(const int8_t *byte_stream, const int64_t pos)
Definition: DecodersImpl.h:134
DEVICE FORCE_INLINE IndexedElement operator*() const
DEVICE FORCE_INLINE const int8_t * ptr() const
const JoinColumnTypeInfo * type_info
struct JoinColumn * join_column
size_t num_elems
const size_t g_maximum_conditions_to_coalesce