OmniSciDB  1dac507f6e
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
JoinHashTableInterface.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2019 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "JoinHashTableInterface.h"
19 
20 namespace {
21 
22 template <typename T>
23 void innerDecodeJoinHashBufferToString(const int8_t* ptr1,
24  size_t entry_count,
25  size_t key_component_count,
26  bool raw,
27  std::string& txt) {
28  auto empty = get_empty_key<T>();
29  auto ptr = reinterpret_cast<const T*>(ptr1);
30  for (size_t e = 0; e < entry_count; ++e, ptr += key_component_count) {
31  if (e != 0) {
32  txt += " ";
33  }
34  if (*ptr == empty && !raw) {
35  txt += "*"; // null hash table entry
36  } else if (*ptr == empty - 1 && !raw) {
37  txt += "?"; // write_pending (should never happen here)
38  } else {
39  txt += "(";
40  for (size_t j = 0; j < key_component_count; ++j) {
41  if (j != 0) {
42  txt += ",";
43  }
44  txt += std::to_string(ptr[j]);
45  }
46  txt += ")";
47  }
48  }
49 }
50 
51 } // anonymous namespace
52 
54  size_t key_component_count, // number of key parts
55  size_t key_component_width, // width of a key part
56  const int8_t* ptr1, // keys
57  const int8_t* ptr2, // offsets
58  const int8_t* ptr3, // counts
59  const int8_t* ptr4, // payloads (rowids)
60  size_t buffer_size, // total memory size
61  bool raw) {
62  std::string txt;
63 
64  CHECK(key_component_width == 8 || key_component_width == 4);
65 
66  auto i64ptr1 = reinterpret_cast<const int64_t*>(ptr1);
67  auto i64ptr2 = reinterpret_cast<const int64_t*>(ptr2);
68  auto i32ptr2 = reinterpret_cast<const int32_t*>(ptr2);
69  auto i32ptr3 = reinterpret_cast<const int32_t*>(ptr3);
70  auto i32ptr4 = reinterpret_cast<const int32_t*>(ptr4);
71  auto i32ptr5 = reinterpret_cast<const int32_t*>(ptr1 + buffer_size);
72  auto i64ptr5 = reinterpret_cast<const int64_t*>(i32ptr5);
73 
74  CHECK_LE(i64ptr1, i64ptr2);
75  CHECK_LT(i64ptr2, i64ptr5);
76  CHECK_LT(i32ptr2, i32ptr3);
77  CHECK_LT(i32ptr3, i32ptr4);
78  CHECK_LT(i32ptr4, i32ptr5);
79 
80  size_t entry_count = (ptr3 - ptr2) / sizeof(int32_t);
81 
82  // first section: keys
83  if (i64ptr1 < i64ptr2) {
84  if (key_component_width == 8) {
85  innerDecodeJoinHashBufferToString<int64_t>(
86  ptr1, entry_count, key_component_count, raw, txt);
87  } else if (key_component_width == 4) {
88  innerDecodeJoinHashBufferToString<int32_t>(
89  ptr1, entry_count, key_component_count, raw, txt);
90  }
91 
92  txt += " | ";
93  }
94 
95  // second section: offsets
96  for (size_t i = 0; &i32ptr2[i] < i32ptr3; ++i) {
97  if (i != 0) {
98  txt += " ";
99  }
100  if (i32ptr2[i] == -1) {
101  txt += "*"; // null
102  } else {
103  txt += std::to_string(i32ptr2[i]);
104  }
105  }
106 
107  txt += " | ";
108 
109  // third section: counts
110  for (size_t i = 0; &i32ptr3[i] < i32ptr4; ++i) {
111  if (i != 0) {
112  txt += " ";
113  }
114  if (i32ptr3[i] == 0) {
115  txt += "*"; // null
116  } else {
117  txt += std::to_string(i32ptr3[i]);
118  }
119  }
120 
121  txt += " | ";
122 
123  // fourth section: payloads (rowids)
124  for (size_t i = 0; &i32ptr4[i] < i32ptr5; ++i) {
125  if (i != 0) {
126  txt += " ";
127  }
128  if (i32ptr4[i] == -1) {
129  txt += "*"; // null
130  } else {
131  txt += std::to_string(i32ptr4[i]);
132  }
133  }
134 
135  return txt;
136 }
137 
138 namespace {
139 
140 template <typename T>
142  const ExecutorDeviceType device_type,
143  const int device_id) noexcept {
144  auto mem =
145  reinterpret_cast<const T*>(hash_table->getJoinHashBuffer(device_type, device_id));
146  auto memsz = hash_table->getJoinHashBufferSize(device_type, device_id) / sizeof(T);
147  std::string txt;
148  for (size_t i = 0; i < memsz; ++i) {
149  if (i > 0) {
150  txt += ", ";
151  }
152  txt += std::to_string(mem[i]);
153  }
154  return txt;
155 }
156 
157 } // anonymous namespace
158 
160  const int device_id) const noexcept {
161  return decodeJoinHashBufferToStringFlat<int64_t>(this, device_type, device_id);
162 }
163 
165  const int device_id) const noexcept {
166  return decodeJoinHashBufferToStringFlat<int32_t>(this, device_type, device_id);
167 }
168 
169 std::ostream& operator<<(std::ostream& os, const DecodedJoinHashBufferEntry& e) {
170  os << "(";
171  bool first = true;
172  for (auto k : e.key) {
173  if (!first) {
174  os << ",";
175  } else {
176  first = false;
177  }
178  os << k;
179  }
180  os << ")";
181  os << ": ";
182  first = true;
183  for (auto p : e.payload) {
184  if (!first) {
185  os << " ";
186  } else {
187  first = false;
188  }
189  os << p;
190  }
191  return os;
192 }
193 
194 std::ostream& operator<<(std::ostream& os,
195  const std::set<DecodedJoinHashBufferEntry>& s) {
196  for (auto e : s) {
197  os << e << "\n";
198  }
199  return os;
200 }
201 
202 namespace {
203 
204 template <typename T>
205 void innerDecodeJoinHashBuffer(const int8_t* ptr1,
206  const int32_t* ptr2,
207  const int32_t* ptr3,
208  const int32_t* ptr4,
209  size_t entry_count,
210  size_t key_component_count,
211  std::set<DecodedJoinHashBufferEntry>& s) {
212  auto empty = get_empty_key<T>();
213  auto ptr = reinterpret_cast<const T*>(ptr1);
214  for (size_t e = 0; e < entry_count; ++e, ptr += key_component_count) {
215  if (*ptr == empty) {
216  continue;
217  }
218 
219  std::vector<int64_t> key;
220  for (size_t j = 0; j < key_component_count; ++j) {
221  key.push_back(ptr[j]);
222  }
223 
224  int32_t offset = ptr2[e];
225 
226  int32_t count = ptr3[e];
227 
228  std::set<int32_t> payload;
229  for (size_t j = 0; j < static_cast<size_t>(count); ++j) {
230  payload.insert(ptr4[offset + j]);
231  }
232 
233  s.insert({std::move(key), std::move(payload)});
234  }
235 }
236 
237 template <typename T>
238 void innerDecodeJoinHashBuffer(const int32_t* ptr2,
239  const int32_t* ptr3,
240  const int32_t* ptr4,
241  size_t entry_count,
242  std::set<DecodedJoinHashBufferEntry>& s) {
243  auto empty = -1;
244  auto ptr = reinterpret_cast<const T*>(ptr2);
245  for (size_t e = 0; e < entry_count; ++e, ++ptr) {
246  if (*ptr == empty) {
247  continue;
248  }
249 
250  std::vector<int64_t> key;
251  key.push_back(e);
252 
253  int32_t offset = ptr2[e];
254 
255  int32_t count = ptr3[e];
256 
257  std::set<int32_t> payload;
258  for (size_t j = 0; j < static_cast<size_t>(count); ++j) {
259  payload.insert(ptr4[offset + j]);
260  }
261 
262  s.insert({std::move(key), std::move(payload)});
263  }
264 }
265 
266 } // anonymous namespace
267 
268 std::set<DecodedJoinHashBufferEntry> decodeJoinHashBuffer(
269  size_t key_component_count, // number of key parts
270  size_t key_component_width, // width of a key part
271  const int8_t* ptr1, // keys
272  const int8_t* ptr2, // offsets
273  const int8_t* ptr3, // counts
274  const int8_t* ptr4, // payloads (rowids)
275  size_t buffer_size) { // total memory size
276  std::set<DecodedJoinHashBufferEntry> s;
277 
278  CHECK(key_component_width == 8 || key_component_width == 4);
279 
280  auto i64ptr1 = reinterpret_cast<const int64_t*>(ptr1);
281  auto i64ptr2 = reinterpret_cast<const int64_t*>(ptr2);
282  auto i32ptr2 = reinterpret_cast<const int32_t*>(ptr2);
283  auto i32ptr3 = reinterpret_cast<const int32_t*>(ptr3);
284  auto i32ptr4 = reinterpret_cast<const int32_t*>(ptr4);
285  auto i32ptr5 = reinterpret_cast<const int32_t*>(ptr1 + buffer_size);
286  auto i64ptr5 = reinterpret_cast<const int64_t*>(i32ptr5);
287 
288  CHECK_LE(i64ptr1, i64ptr2);
289  CHECK_LT(i64ptr2, i64ptr5);
290  CHECK_LT(i32ptr2, i32ptr3);
291  CHECK_LT(i32ptr3, i32ptr4);
292  CHECK_LT(i32ptr4, i32ptr5);
293 
294  size_t entry_count = (ptr3 - ptr2) / sizeof(int32_t);
295 
296  if (i64ptr1 < i64ptr2) { // BaselineJoinHashTable or OverlapsJoinHashTable
297  if (key_component_width == 8) {
298  innerDecodeJoinHashBuffer<int64_t>(
299  ptr1, i32ptr2, i32ptr3, i32ptr4, entry_count, key_component_count, s);
300  } else if (key_component_width == 4) {
301  innerDecodeJoinHashBuffer<int32_t>(
302  ptr1, i32ptr2, i32ptr3, i32ptr4, entry_count, key_component_count, s);
303  }
304  } else { // JoinHashTable
305  if (key_component_width == 8) {
306  innerDecodeJoinHashBuffer<int64_t>(i32ptr2, i32ptr3, i32ptr4, entry_count, s);
307  } else if (key_component_width == 4) {
308  innerDecodeJoinHashBuffer<int32_t>(i32ptr2, i32ptr3, i32ptr4, entry_count, s);
309  }
310  }
311 
312  return s;
313 }
ExecutorDeviceType
std::ostream & operator<<(std::ostream &os, const SessionInfo &session_info)
Definition: SessionInfo.cpp:53
std::string decodeJoinHashBufferToStringFlat(const JoinHashTableInterface *hash_table, const ExecutorDeviceType device_type, const int device_id) noexcept
std::string decodeJoinHashBufferToString(size_t key_component_count, size_t key_component_width, const int8_t *ptr1, const int8_t *ptr2, const int8_t *ptr3, const int8_t *ptr4, size_t buffer_size, bool raw)
void innerDecodeJoinHashBuffer(const int8_t *ptr1, const int32_t *ptr2, const int32_t *ptr3, const int32_t *ptr4, size_t entry_count, size_t key_component_count, std::set< DecodedJoinHashBufferEntry > &s)
std::string to_string(char const *&&v)
virtual std::string toStringFlat64(const ExecutorDeviceType device_type, const int device_id) const noexcept
CHECK(cgen_state)
virtual std::string toStringFlat32(const ExecutorDeviceType device_type, const int device_id) const noexcept
void innerDecodeJoinHashBufferToString(const int8_t *ptr1, size_t entry_count, size_t key_component_count, bool raw, std::string &txt)
std::set< DecodedJoinHashBufferEntry > decodeJoinHashBuffer(size_t key_component_count, size_t key_component_width, const int8_t *ptr1, const int8_t *ptr2, const int8_t *ptr3, const int8_t *ptr4, size_t buffer_size)
#define CHECK_LT(x, y)
Definition: Logger.h:200
#define CHECK_LE(x, y)
Definition: Logger.h:201
std::set< int32_t > payload
std::vector< int64_t > key