OmniSciDB  a987f07e93
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ResultSetIteration.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
23 #include "Execute.h"
24 #include "Geospatial/Compression.h"
25 #include "Geospatial/Types.h"
26 #include "ParserNode.h"
30 #include "ResultSet.h"
32 #include "RuntimeFunctions.h"
33 #include "Shared/SqlTypesLayout.h"
34 #include "Shared/likely.h"
35 #include "Shared/sqltypes.h"
36 #include "TypePunning.h"
37 
38 #include <boost/math/special_functions/fpclassify.hpp>
39 
40 #include <memory>
41 #include <utility>
42 
43 namespace {
44 
45 // Interprets ptr1, ptr2 as the sum and count pair used for AVG.
47  const int8_t compact_sz1,
48  const int8_t* ptr2,
49  const int8_t compact_sz2,
50  const TargetInfo& target_info) {
51  int64_t sum{0};
52  CHECK(target_info.agg_kind == kAVG);
53  const bool float_argument_input = takes_float_argument(target_info);
54  const auto actual_compact_sz1 = float_argument_input ? sizeof(float) : compact_sz1;
55  const auto& agg_ti = target_info.agg_arg_type;
56  if (agg_ti.is_integer() || agg_ti.is_decimal()) {
57  sum = read_int_from_buff(ptr1, actual_compact_sz1);
58  } else if (agg_ti.is_fp()) {
59  switch (actual_compact_sz1) {
60  case 8: {
61  double d = *reinterpret_cast<const double*>(ptr1);
62  sum = *reinterpret_cast<const int64_t*>(may_alias_ptr(&d));
63  break;
64  }
65  case 4: {
66  double d = *reinterpret_cast<const float*>(ptr1);
67  sum = *reinterpret_cast<const int64_t*>(may_alias_ptr(&d));
68  break;
69  }
70  default:
71  CHECK(false);
72  }
73  } else {
74  CHECK(false);
75  }
76  const auto count = read_int_from_buff(ptr2, compact_sz2);
77  return pair_to_double({sum, count}, target_info.sql_type, false);
78 }
79 
80 // Given the entire buffer for the result set, buff, finds the beginning of the
81 // column for slot_idx. Only makes sense for column-wise representation.
82 const int8_t* advance_col_buff_to_slot(const int8_t* buff,
84  const std::vector<TargetInfo>& targets,
85  const size_t slot_idx,
86  const bool separate_varlen_storage) {
87  auto crt_col_ptr = get_cols_ptr(buff, query_mem_desc);
88  const auto buffer_col_count = query_mem_desc.getBufferColSlotCount();
89  size_t agg_col_idx{0};
90  for (size_t target_idx = 0; target_idx < targets.size(); ++target_idx) {
91  if (agg_col_idx == slot_idx) {
92  return crt_col_ptr;
93  }
94  CHECK_LT(agg_col_idx, buffer_col_count);
95  const auto& agg_info = targets[target_idx];
96  crt_col_ptr =
97  advance_to_next_columnar_target_buff(crt_col_ptr, query_mem_desc, agg_col_idx);
98  if (agg_info.is_agg && agg_info.agg_kind == kAVG) {
99  if (agg_col_idx + 1 == slot_idx) {
100  return crt_col_ptr;
101  }
103  crt_col_ptr, query_mem_desc, agg_col_idx + 1);
104  }
105  agg_col_idx = advance_slot(agg_col_idx, agg_info, separate_varlen_storage);
106  }
107  CHECK(false);
108  return nullptr;
109 }
110 } // namespace
111 
112 // Gets the byte offset, starting from the beginning of the row targets buffer, of
113 // the value in position slot_idx (only makes sense for row-wise representation).
114 size_t result_set::get_byteoff_of_slot(const size_t slot_idx,
116  return query_mem_desc.getPaddedColWidthForRange(0, slot_idx);
117 }
118 
119 std::vector<TargetValue> ResultSet::getRowAt(
120  const size_t global_entry_idx,
121  const bool translate_strings,
122  const bool decimal_to_double,
123  const bool fixup_count_distinct_pointers,
124  const std::vector<bool>& targets_to_skip /* = {}*/) const {
125  const auto storage_lookup_result =
126  fixup_count_distinct_pointers
127  ? StorageLookupResult{storage_.get(), global_entry_idx, 0}
128  : findStorage(global_entry_idx);
129  const auto storage = storage_lookup_result.storage_ptr;
130  const auto local_entry_idx = storage_lookup_result.fixedup_entry_idx;
131  if (!fixup_count_distinct_pointers && storage->isEmptyEntry(local_entry_idx)) {
132  return {};
133  }
134  const auto buff = storage->buff_;
135  CHECK(buff);
136  std::vector<TargetValue> row;
137  size_t agg_col_idx = 0;
138  int8_t* rowwise_target_ptr{nullptr};
139  int8_t* keys_ptr{nullptr};
140  const int8_t* crt_col_ptr{nullptr};
141  if (query_mem_desc_.didOutputColumnar()) {
142  keys_ptr = buff;
143  crt_col_ptr = get_cols_ptr(buff, storage->query_mem_desc_);
144  } else {
145  keys_ptr = row_ptr_rowwise(buff, query_mem_desc_, local_entry_idx);
146  const auto key_bytes_with_padding =
147  align_to_int64(get_key_bytes_rowwise(query_mem_desc_));
148  rowwise_target_ptr = keys_ptr + key_bytes_with_padding;
149  }
150  for (size_t target_idx = 0; target_idx < storage->targets_.size(); ++target_idx) {
151  const auto& agg_info = storage->targets_[target_idx];
152  if (query_mem_desc_.didOutputColumnar()) {
153  if (UNLIKELY(!targets_to_skip.empty())) {
154  row.push_back(!targets_to_skip[target_idx]
155  ? getTargetValueFromBufferColwise(crt_col_ptr,
156  keys_ptr,
157  storage->query_mem_desc_,
158  local_entry_idx,
159  global_entry_idx,
160  agg_info,
161  target_idx,
162  agg_col_idx,
163  translate_strings,
164  decimal_to_double)
165  : nullptr);
166  } else {
167  row.push_back(getTargetValueFromBufferColwise(crt_col_ptr,
168  keys_ptr,
169  storage->query_mem_desc_,
170  local_entry_idx,
171  global_entry_idx,
172  agg_info,
173  target_idx,
174  agg_col_idx,
175  translate_strings,
176  decimal_to_double));
177  }
178  crt_col_ptr = advance_target_ptr_col_wise(crt_col_ptr,
179  agg_info,
180  agg_col_idx,
181  storage->query_mem_desc_,
182  separate_varlen_storage_valid_);
183  } else {
184  if (UNLIKELY(!targets_to_skip.empty())) {
185  row.push_back(!targets_to_skip[target_idx]
186  ? getTargetValueFromBufferRowwise(rowwise_target_ptr,
187  keys_ptr,
188  global_entry_idx,
189  agg_info,
190  target_idx,
191  agg_col_idx,
192  translate_strings,
193  decimal_to_double,
194  fixup_count_distinct_pointers)
195  : nullptr);
196  } else {
197  row.push_back(getTargetValueFromBufferRowwise(rowwise_target_ptr,
198  keys_ptr,
199  global_entry_idx,
200  agg_info,
201  target_idx,
202  agg_col_idx,
203  translate_strings,
204  decimal_to_double,
205  fixup_count_distinct_pointers));
206  }
207  rowwise_target_ptr = advance_target_ptr_row_wise(rowwise_target_ptr,
208  agg_info,
209  agg_col_idx,
210  query_mem_desc_,
211  separate_varlen_storage_valid_);
212  }
213  agg_col_idx = advance_slot(agg_col_idx, agg_info, separate_varlen_storage_valid_);
214  }
215 
216  return row;
217 }
218 
219 TargetValue ResultSet::getRowAt(const size_t row_idx,
220  const size_t col_idx,
221  const bool translate_strings,
222  const bool decimal_to_double /* = true */) const {
223  std::lock_guard<std::mutex> lock(row_iteration_mutex_);
224  moveToBegin();
225  for (size_t i = 0; i < row_idx; ++i) {
226  auto crt_row = getNextRowUnlocked(translate_strings, decimal_to_double);
227  CHECK(!crt_row.empty());
228  }
229  auto crt_row = getNextRowUnlocked(translate_strings, decimal_to_double);
230  CHECK(!crt_row.empty());
231  return crt_row[col_idx];
232 }
233 
234 OneIntegerColumnRow ResultSet::getOneColRow(const size_t global_entry_idx) const {
235  const auto storage_lookup_result = findStorage(global_entry_idx);
236  const auto storage = storage_lookup_result.storage_ptr;
237  const auto local_entry_idx = storage_lookup_result.fixedup_entry_idx;
238  if (storage->isEmptyEntry(local_entry_idx)) {
239  return {0, false};
240  }
241  const auto buff = storage->buff_;
242  CHECK(buff);
243  CHECK(!query_mem_desc_.didOutputColumnar());
244  const auto keys_ptr = row_ptr_rowwise(buff, query_mem_desc_, local_entry_idx);
245  const auto key_bytes_with_padding =
246  align_to_int64(get_key_bytes_rowwise(query_mem_desc_));
247  const auto rowwise_target_ptr = keys_ptr + key_bytes_with_padding;
248  const auto tv = getTargetValueFromBufferRowwise(rowwise_target_ptr,
249  keys_ptr,
250  global_entry_idx,
251  targets_.front(),
252  0,
253  0,
254  false,
255  false,
256  false);
257  const auto scalar_tv = boost::get<ScalarTargetValue>(&tv);
258  CHECK(scalar_tv);
259  const auto ival_ptr = boost::get<int64_t>(scalar_tv);
260  CHECK(ival_ptr);
261  return {*ival_ptr, true};
262 }
263 
264 std::vector<TargetValue> ResultSet::getRowAt(const size_t logical_index) const {
265  if (logical_index >= entryCount()) {
266  return {};
267  }
268  const auto entry_idx =
269  permutation_.empty() ? logical_index : permutation_[logical_index];
270  return getRowAt(entry_idx, true, false, false);
271 }
272 
273 std::vector<TargetValue> ResultSet::getRowAtNoTranslations(
274  const size_t logical_index,
275  const std::vector<bool>& targets_to_skip /* = {}*/) const {
276  if (logical_index >= entryCount()) {
277  return {};
278  }
279  const auto entry_idx =
280  permutation_.empty() ? logical_index : permutation_[logical_index];
281  return getRowAt(entry_idx, false, false, false, targets_to_skip);
282 }
283 
284 bool ResultSet::isRowAtEmpty(const size_t logical_index) const {
285  if (logical_index >= entryCount()) {
286  return true;
287  }
288  const auto entry_idx =
289  permutation_.empty() ? logical_index : permutation_[logical_index];
290  const auto storage_lookup_result = findStorage(entry_idx);
291  const auto storage = storage_lookup_result.storage_ptr;
292  const auto local_entry_idx = storage_lookup_result.fixedup_entry_idx;
293  return storage->isEmptyEntry(local_entry_idx);
294 }
295 
296 std::vector<TargetValue> ResultSet::getNextRow(const bool translate_strings,
297  const bool decimal_to_double) const {
298  std::lock_guard<std::mutex> lock(row_iteration_mutex_);
299  if (!storage_ && !just_explain_) {
300  return {};
301  }
302  return getNextRowUnlocked(translate_strings, decimal_to_double);
303 }
304 
305 std::vector<TargetValue> ResultSet::getNextRowUnlocked(
306  const bool translate_strings,
307  const bool decimal_to_double) const {
308  if (just_explain_) {
309  if (fetched_so_far_) {
310  return {};
311  }
312  fetched_so_far_ = 1;
313  return {explanation_};
314  }
315  return getNextRowImpl(translate_strings, decimal_to_double);
316 }
317 
318 std::vector<TargetValue> ResultSet::getNextRowImpl(const bool translate_strings,
319  const bool decimal_to_double) const {
320  size_t entry_buff_idx = 0;
321  do {
322  if (keep_first_ && fetched_so_far_ >= drop_first_ + keep_first_) {
323  return {};
324  }
325 
326  entry_buff_idx = advanceCursorToNextEntry();
327 
328  if (crt_row_buff_idx_ >= entryCount()) {
329  CHECK_EQ(entryCount(), crt_row_buff_idx_);
330  return {};
331  }
332  ++crt_row_buff_idx_;
333  ++fetched_so_far_;
334 
335  } while (drop_first_ && fetched_so_far_ <= drop_first_);
336 
337  auto row = getRowAt(entry_buff_idx, translate_strings, decimal_to_double, false);
338  CHECK(!row.empty());
339 
340  return row;
341 }
342 
343 namespace {
344 
345 const int8_t* columnar_elem_ptr(const size_t entry_idx,
346  const int8_t* col1_ptr,
347  const int8_t compact_sz1) {
348  return col1_ptr + compact_sz1 * entry_idx;
349 }
350 
351 int64_t int_resize_cast(const int64_t ival, const size_t sz) {
352  switch (sz) {
353  case 8:
354  return ival;
355  case 4:
356  return static_cast<int32_t>(ival);
357  case 2:
358  return static_cast<int16_t>(ival);
359  case 1:
360  return static_cast<int8_t>(ival);
361  default:
362  UNREACHABLE();
363  }
364  UNREACHABLE();
365  return 0;
366 }
367 
368 } // namespace
369 
371  // Compute offsets for base storage and all appended storage
372  for (size_t storage_idx = 0; storage_idx < result_set_->appended_storage_.size() + 1;
373  ++storage_idx) {
374  offsets_for_storage_.emplace_back();
375 
376  const int8_t* rowwise_target_ptr{0};
377 
378  size_t agg_col_idx = 0;
379  for (size_t target_idx = 0; target_idx < result_set_->storage_->targets_.size();
380  ++target_idx) {
381  const auto& agg_info = result_set_->storage_->targets_[target_idx];
382 
383  auto ptr1 = rowwise_target_ptr;
384  const auto compact_sz1 =
385  result_set_->query_mem_desc_.getPaddedSlotWidthBytes(agg_col_idx)
386  ? result_set_->query_mem_desc_.getPaddedSlotWidthBytes(agg_col_idx)
387  : key_width_;
388 
389  const int8_t* ptr2{nullptr};
390  int8_t compact_sz2{0};
391  if ((agg_info.is_agg && agg_info.agg_kind == kAVG)) {
392  ptr2 = ptr1 + compact_sz1;
393  compact_sz2 =
394  result_set_->query_mem_desc_.getPaddedSlotWidthBytes(agg_col_idx + 1);
395  } else if (is_real_str_or_array(agg_info)) {
396  ptr2 = ptr1 + compact_sz1;
397  if (!result_set_->separate_varlen_storage_valid_) {
398  // None encoded strings explicitly attached to ResultSetStorage do not have a
399  // second slot in the QueryMemoryDescriptor col width vector
400  compact_sz2 =
401  result_set_->query_mem_desc_.getPaddedSlotWidthBytes(agg_col_idx + 1);
402  }
403  }
404  offsets_for_storage_[storage_idx].push_back(
405  TargetOffsets{ptr1,
406  static_cast<size_t>(compact_sz1),
407  ptr2,
408  static_cast<size_t>(compact_sz2)});
409  rowwise_target_ptr =
410  advance_target_ptr_row_wise(rowwise_target_ptr,
411  agg_info,
412  agg_col_idx,
413  result_set_->query_mem_desc_,
414  result_set_->separate_varlen_storage_valid_);
415 
416  agg_col_idx = advance_slot(
417  agg_col_idx, agg_info, result_set_->separate_varlen_storage_valid_);
418  }
419  CHECK_EQ(offsets_for_storage_[storage_idx].size(),
420  result_set_->storage_->targets_.size());
421  }
422 }
423 
425  const int8_t* buff,
426  const size_t entry_idx,
427  const size_t target_logical_idx,
428  const StorageLookupResult& storage_lookup_result) const {
429  CHECK(buff);
430  const int8_t* rowwise_target_ptr{nullptr};
431  const int8_t* keys_ptr{nullptr};
432 
433  const size_t storage_idx = storage_lookup_result.storage_idx;
434 
435  CHECK_LT(storage_idx, offsets_for_storage_.size());
436  CHECK_LT(target_logical_idx, offsets_for_storage_[storage_idx].size());
437 
438  const auto& offsets_for_target = offsets_for_storage_[storage_idx][target_logical_idx];
439  const auto& agg_info = result_set_->storage_->targets_[target_logical_idx];
440  const auto& type_info = agg_info.sql_type;
441 
442  keys_ptr = get_rowwise_ptr(buff, entry_idx);
443  rowwise_target_ptr = keys_ptr + key_bytes_with_padding_;
444  auto ptr1 = rowwise_target_ptr + reinterpret_cast<size_t>(offsets_for_target.ptr1);
445  if (result_set_->query_mem_desc_.targetGroupbyIndicesSize() > 0) {
446  if (result_set_->query_mem_desc_.getTargetGroupbyIndex(target_logical_idx) >= 0) {
447  ptr1 = keys_ptr +
448  result_set_->query_mem_desc_.getTargetGroupbyIndex(target_logical_idx) *
449  key_width_;
450  }
451  }
452  const auto i1 =
453  result_set_->lazyReadInt(read_int_from_buff(ptr1, offsets_for_target.compact_sz1),
454  target_logical_idx,
455  storage_lookup_result);
456  if (agg_info.is_agg && agg_info.agg_kind == kAVG) {
457  CHECK(offsets_for_target.ptr2);
458  const auto ptr2 =
459  rowwise_target_ptr + reinterpret_cast<size_t>(offsets_for_target.ptr2);
460  const auto i2 = read_int_from_buff(ptr2, offsets_for_target.compact_sz2);
461  return InternalTargetValue(i1, i2);
462  } else {
463  if (type_info.is_string() && type_info.get_compression() == kENCODING_NONE) {
464  CHECK(!agg_info.is_agg);
465  if (!result_set_->lazy_fetch_info_.empty()) {
466  CHECK_LT(target_logical_idx, result_set_->lazy_fetch_info_.size());
467  const auto& col_lazy_fetch = result_set_->lazy_fetch_info_[target_logical_idx];
468  if (col_lazy_fetch.is_lazily_fetched) {
469  return InternalTargetValue(reinterpret_cast<const std::string*>(i1));
470  }
471  }
472  if (result_set_->separate_varlen_storage_valid_) {
473  if (i1 < 0) {
474  CHECK_EQ(-1, i1);
475  return InternalTargetValue(static_cast<const std::string*>(nullptr));
476  }
477  CHECK_LT(storage_lookup_result.storage_idx,
478  result_set_->serialized_varlen_buffer_.size());
479  const auto& varlen_buffer_for_fragment =
480  result_set_->serialized_varlen_buffer_[storage_lookup_result.storage_idx];
481  CHECK_LT(static_cast<size_t>(i1), varlen_buffer_for_fragment.size());
482  return InternalTargetValue(&varlen_buffer_for_fragment[i1]);
483  }
484  CHECK(offsets_for_target.ptr2);
485  const auto ptr2 =
486  rowwise_target_ptr + reinterpret_cast<size_t>(offsets_for_target.ptr2);
487  const auto str_len = read_int_from_buff(ptr2, offsets_for_target.compact_sz2);
488  CHECK_GE(str_len, 0);
489  return result_set_->getVarlenOrderEntry(i1, str_len);
490  } else if (agg_info.is_agg && agg_info.agg_kind == kMODE) {
491  return InternalTargetValue(i1); // AggMode*
492  }
493  return InternalTargetValue(
494  type_info.is_fp() ? i1 : int_resize_cast(i1, type_info.get_logical_size()));
495  }
496 }
497 
499  // Compute offsets for base storage and all appended storage
500  const auto key_width = result_set_->query_mem_desc_.getEffectiveKeyWidth();
501  for (size_t storage_idx = 0; storage_idx < result_set_->appended_storage_.size() + 1;
502  ++storage_idx) {
503  offsets_for_storage_.emplace_back();
504 
505  const int8_t* buff = storage_idx == 0
506  ? result_set_->storage_->buff_
507  : result_set_->appended_storage_[storage_idx - 1]->buff_;
508  CHECK(buff);
509 
510  const auto& crt_query_mem_desc =
511  storage_idx == 0
512  ? result_set_->storage_->query_mem_desc_
513  : result_set_->appended_storage_[storage_idx - 1]->query_mem_desc_;
514  const int8_t* crt_col_ptr = get_cols_ptr(buff, crt_query_mem_desc);
515 
516  size_t agg_col_idx = 0;
517  for (size_t target_idx = 0; target_idx < result_set_->storage_->targets_.size();
518  ++target_idx) {
519  const auto& agg_info = result_set_->storage_->targets_[target_idx];
520 
521  const auto compact_sz1 =
522  crt_query_mem_desc.getPaddedSlotWidthBytes(agg_col_idx)
523  ? crt_query_mem_desc.getPaddedSlotWidthBytes(agg_col_idx)
524  : key_width;
525 
526  const auto next_col_ptr = advance_to_next_columnar_target_buff(
527  crt_col_ptr, crt_query_mem_desc, agg_col_idx);
528  const bool uses_two_slots = (agg_info.is_agg && agg_info.agg_kind == kAVG) ||
529  is_real_str_or_array(agg_info);
530  const auto col2_ptr = uses_two_slots ? next_col_ptr : nullptr;
531  const auto compact_sz2 =
532  (agg_info.is_agg && agg_info.agg_kind == kAVG) || is_real_str_or_array(agg_info)
533  ? crt_query_mem_desc.getPaddedSlotWidthBytes(agg_col_idx + 1)
534  : 0;
535 
536  offsets_for_storage_[storage_idx].push_back(
537  TargetOffsets{crt_col_ptr,
538  static_cast<size_t>(compact_sz1),
539  col2_ptr,
540  static_cast<size_t>(compact_sz2)});
541 
542  crt_col_ptr = next_col_ptr;
543  if (uses_two_slots) {
545  crt_col_ptr, crt_query_mem_desc, agg_col_idx + 1);
546  }
547  agg_col_idx = advance_slot(
548  agg_col_idx, agg_info, result_set_->separate_varlen_storage_valid_);
549  }
550  CHECK_EQ(offsets_for_storage_[storage_idx].size(),
551  result_set_->storage_->targets_.size());
552  }
553 }
554 
556  const int8_t* buff,
557  const size_t entry_idx,
558  const size_t target_logical_idx,
559  const StorageLookupResult& storage_lookup_result) const {
560  const size_t storage_idx = storage_lookup_result.storage_idx;
561 
562  CHECK_LT(storage_idx, offsets_for_storage_.size());
563  CHECK_LT(target_logical_idx, offsets_for_storage_[storage_idx].size());
564 
565  const auto& offsets_for_target = offsets_for_storage_[storage_idx][target_logical_idx];
566  const auto& agg_info = result_set_->storage_->targets_[target_logical_idx];
567  const auto& type_info = agg_info.sql_type;
568  auto ptr1 = offsets_for_target.ptr1;
569  if (result_set_->query_mem_desc_.targetGroupbyIndicesSize() > 0) {
570  if (result_set_->query_mem_desc_.getTargetGroupbyIndex(target_logical_idx) >= 0) {
571  ptr1 =
572  buff + result_set_->query_mem_desc_.getTargetGroupbyIndex(target_logical_idx) *
573  result_set_->query_mem_desc_.getEffectiveKeyWidth() *
574  result_set_->query_mem_desc_.entry_count_;
575  }
576  }
577 
578  const auto i1 = result_set_->lazyReadInt(
580  columnar_elem_ptr(entry_idx, ptr1, offsets_for_target.compact_sz1),
581  offsets_for_target.compact_sz1),
582  target_logical_idx,
583  storage_lookup_result);
584  if (agg_info.is_agg && agg_info.agg_kind == kAVG) {
585  CHECK(offsets_for_target.ptr2);
586  const auto i2 = read_int_from_buff(
588  entry_idx, offsets_for_target.ptr2, offsets_for_target.compact_sz2),
589  offsets_for_target.compact_sz2);
590  return InternalTargetValue(i1, i2);
591  } else {
592  // for TEXT ENCODING NONE:
593  if (type_info.is_string() && type_info.get_compression() == kENCODING_NONE) {
594  CHECK(!agg_info.is_agg);
595  if (!result_set_->lazy_fetch_info_.empty()) {
596  CHECK_LT(target_logical_idx, result_set_->lazy_fetch_info_.size());
597  const auto& col_lazy_fetch = result_set_->lazy_fetch_info_[target_logical_idx];
598  if (col_lazy_fetch.is_lazily_fetched) {
599  return InternalTargetValue(reinterpret_cast<const std::string*>(i1));
600  }
601  }
602  if (result_set_->separate_varlen_storage_valid_) {
603  if (i1 < 0) {
604  CHECK_EQ(-1, i1);
605  return InternalTargetValue(static_cast<const std::string*>(nullptr));
606  }
607  CHECK_LT(storage_lookup_result.storage_idx,
608  result_set_->serialized_varlen_buffer_.size());
609  const auto& varlen_buffer_for_fragment =
610  result_set_->serialized_varlen_buffer_[storage_lookup_result.storage_idx];
611  CHECK_LT(static_cast<size_t>(i1), varlen_buffer_for_fragment.size());
612  return InternalTargetValue(&varlen_buffer_for_fragment[i1]);
613  }
614  CHECK(offsets_for_target.ptr2);
615  const auto i2 = read_int_from_buff(
617  entry_idx, offsets_for_target.ptr2, offsets_for_target.compact_sz2),
618  offsets_for_target.compact_sz2);
619  CHECK_GE(i2, 0);
620  return result_set_->getVarlenOrderEntry(i1, i2);
621  }
622  return InternalTargetValue(
623  type_info.is_fp() ? i1 : int_resize_cast(i1, type_info.get_logical_size()));
624  }
625 }
626 
628  const size_t str_len) const {
629  char* host_str_ptr{nullptr};
630  std::vector<int8_t> cpu_buffer;
632  cpu_buffer.resize(str_len);
633  const auto executor = query_mem_desc_.getExecutor();
634  CHECK(executor);
635  auto data_mgr = executor->getDataMgr();
636  auto allocator = std::make_unique<CudaAllocator>(
637  data_mgr, device_id_, getQueryEngineCudaStreamForDevice(device_id_));
638  allocator->copyFromDevice(
639  &cpu_buffer[0], reinterpret_cast<int8_t*>(str_ptr), str_len);
640  host_str_ptr = reinterpret_cast<char*>(&cpu_buffer[0]);
641  } else {
643  host_str_ptr = reinterpret_cast<char*>(str_ptr);
644  }
645  std::string str(host_str_ptr, str_len);
646  return InternalTargetValue(row_set_mem_owner_->addString(str));
647 }
648 
649 int64_t ResultSet::lazyReadInt(const int64_t ival,
650  const size_t target_logical_idx,
651  const StorageLookupResult& storage_lookup_result) const {
652  if (!lazy_fetch_info_.empty()) {
653  CHECK_LT(target_logical_idx, lazy_fetch_info_.size());
654  const auto& col_lazy_fetch = lazy_fetch_info_[target_logical_idx];
655  if (col_lazy_fetch.is_lazily_fetched) {
656  CHECK_LT(static_cast<size_t>(storage_lookup_result.storage_idx),
657  col_buffers_.size());
658  int64_t ival_copy = ival;
659  auto& frag_col_buffers =
660  getColumnFrag(static_cast<size_t>(storage_lookup_result.storage_idx),
661  target_logical_idx,
662  ival_copy);
663  auto& frag_col_buffer = frag_col_buffers[col_lazy_fetch.local_col_id];
664  CHECK_LT(target_logical_idx, targets_.size());
665  const TargetInfo& target_info = targets_[target_logical_idx];
666  CHECK(!target_info.is_agg);
667  if (target_info.sql_type.is_string() &&
668  target_info.sql_type.get_compression() == kENCODING_NONE) {
669  VarlenDatum vd;
670  bool is_end{false};
672  reinterpret_cast<ChunkIter*>(const_cast<int8_t*>(frag_col_buffer)),
673  storage_lookup_result.fixedup_entry_idx,
674  false,
675  &vd,
676  &is_end);
677  CHECK(!is_end);
678  if (vd.is_null) {
679  return 0;
680  }
681  std::string fetched_str(reinterpret_cast<char*>(vd.pointer), vd.length);
682  return reinterpret_cast<int64_t>(row_set_mem_owner_->addString(fetched_str));
683  }
684  return result_set::lazy_decode(col_lazy_fetch, frag_col_buffer, ival_copy);
685  }
686  }
687  return ival;
688 }
689 
690 // Not all entries in the buffer represent a valid row. Advance the internal cursor
691 // used for the getNextRow method to the next row which is valid.
694  iter.global_entry_idx_valid_ = false;
695  return;
696  }
697 
698  while (iter.crt_row_buff_idx_ < entryCount()) {
699  const auto entry_idx = permutation_.empty() ? iter.crt_row_buff_idx_
701  const auto storage_lookup_result = findStorage(entry_idx);
702  const auto storage = storage_lookup_result.storage_ptr;
703  const auto fixedup_entry_idx = storage_lookup_result.fixedup_entry_idx;
704  if (!storage->isEmptyEntry(fixedup_entry_idx)) {
705  if (iter.fetched_so_far_ < drop_first_) {
706  ++iter.fetched_so_far_;
707  } else {
708  break;
709  }
710  }
711  ++iter.crt_row_buff_idx_;
712  }
713  if (permutation_.empty()) {
715  } else {
717  iter.global_entry_idx_ = iter.crt_row_buff_idx_ == permutation_.size()
718  ? iter.crt_row_buff_idx_
720  }
721 
723 
724  if (iter.global_entry_idx_valid_) {
725  ++iter.crt_row_buff_idx_;
726  ++iter.fetched_so_far_;
727  }
728 }
729 
730 // Not all entries in the buffer represent a valid row. Advance the internal cursor
731 // used for the getNextRow method to the next row which is valid.
733  while (crt_row_buff_idx_ < entryCount()) {
734  const auto entry_idx =
736  const auto storage_lookup_result = findStorage(entry_idx);
737  const auto storage = storage_lookup_result.storage_ptr;
738  const auto fixedup_entry_idx = storage_lookup_result.fixedup_entry_idx;
739  if (!storage->isEmptyEntry(fixedup_entry_idx)) {
740  break;
741  }
743  }
744  if (permutation_.empty()) {
745  return crt_row_buff_idx_;
746  }
748  return crt_row_buff_idx_ == permutation_.size() ? crt_row_buff_idx_
750 }
751 
752 size_t ResultSet::entryCount() const {
753  return permutation_.empty() ? query_mem_desc_.getEntryCount() : permutation_.size();
754 }
755 
756 size_t ResultSet::getBufferSizeBytes(const ExecutorDeviceType device_type) const {
757  CHECK(storage_);
758  return storage_->query_mem_desc_.getBufferSizeBytes(device_type);
759 }
760 
761 namespace {
762 
763 template <class T>
765  return ScalarTargetValue(static_cast<int64_t>(val));
766 }
767 
768 template <>
770  return ScalarTargetValue(val);
771 }
772 
773 template <>
775  return ScalarTargetValue(val);
776 }
777 
778 template <class T>
780  const int8_t* buff,
781  const size_t buff_sz,
782  std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner) {
783  std::vector<ScalarTargetValue> values;
784  auto buff_elems = reinterpret_cast<const T*>(buff);
785  CHECK_EQ(size_t(0), buff_sz % sizeof(T));
786  const size_t num_elems = buff_sz / sizeof(T);
787  for (size_t i = 0; i < num_elems; ++i) {
788  values.push_back(make_scalar_tv<T>(buff_elems[i]));
789  }
790  return ArrayTargetValue(values);
791 }
792 
794  const int32_t* buff,
795  const size_t buff_sz,
796  const int dict_id,
797  const bool translate_strings,
798  std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
799  const Catalog_Namespace::Catalog* catalog) {
800  std::vector<ScalarTargetValue> values;
801  CHECK_EQ(size_t(0), buff_sz % sizeof(int32_t));
802  const size_t num_elems = buff_sz / sizeof(int32_t);
803  if (translate_strings) {
804  for (size_t i = 0; i < num_elems; ++i) {
805  const auto string_id = buff[i];
806 
807  if (string_id == NULL_INT) {
808  values.emplace_back(NullableString(nullptr));
809  } else {
810  if (dict_id == 0) {
811  StringDictionaryProxy* sdp = row_set_mem_owner->getLiteralStringDictProxy();
812  values.emplace_back(sdp->getString(string_id));
813  } else {
814  values.emplace_back(NullableString(
815  row_set_mem_owner
816  ->getOrAddStringDictProxy(dict_id, /*with_generation=*/false, catalog)
817  ->getString(string_id)));
818  }
819  }
820  }
821  } else {
822  for (size_t i = 0; i < num_elems; i++) {
823  values.emplace_back(static_cast<int64_t>(buff[i]));
824  }
825  }
826  return ArrayTargetValue(values);
827 }
828 
830  const int8_t* buff,
831  const size_t buff_sz,
832  const bool translate_strings,
833  std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
834  const Catalog_Namespace::Catalog* catalog) {
835  CHECK(array_ti.is_array());
836  const auto& elem_ti = array_ti.get_elem_type();
837  if (elem_ti.is_string()) {
838  return build_string_array_target_value(reinterpret_cast<const int32_t*>(buff),
839  buff_sz,
840  elem_ti.get_comp_param(),
841  translate_strings,
842  row_set_mem_owner,
843  catalog);
844  }
845  switch (elem_ti.get_size()) {
846  case 1:
847  return build_array_target_value<int8_t>(buff, buff_sz, row_set_mem_owner);
848  case 2:
849  return build_array_target_value<int16_t>(buff, buff_sz, row_set_mem_owner);
850  case 4:
851  if (elem_ti.is_fp()) {
852  return build_array_target_value<float>(buff, buff_sz, row_set_mem_owner);
853  } else {
854  return build_array_target_value<int32_t>(buff, buff_sz, row_set_mem_owner);
855  }
856  case 8:
857  if (elem_ti.is_fp()) {
858  return build_array_target_value<double>(buff, buff_sz, row_set_mem_owner);
859  } else {
860  return build_array_target_value<int64_t>(buff, buff_sz, row_set_mem_owner);
861  }
862  default:
863  CHECK(false);
864  }
865  CHECK(false);
866  return TargetValue(nullptr);
867 }
868 
869 template <class Tuple, size_t... indices>
870 inline std::vector<std::pair<const int8_t*, const int64_t>> make_vals_vector(
871  std::index_sequence<indices...>,
872  const Tuple& tuple) {
873  return std::vector<std::pair<const int8_t*, const int64_t>>{
874  std::make_pair(std::get<2 * indices>(tuple), std::get<2 * indices + 1>(tuple))...};
875 }
876 
877 inline std::unique_ptr<ArrayDatum> lazy_fetch_chunk(const int8_t* ptr,
878  const int64_t varlen_ptr) {
879  auto ad = std::make_unique<ArrayDatum>();
880  bool is_end;
881  ChunkIter_get_nth(reinterpret_cast<ChunkIter*>(const_cast<int8_t*>(ptr)),
882  varlen_ptr,
883  ad.get(),
884  &is_end);
885  CHECK(!is_end);
886  return ad;
887 }
888 
890  template <typename... T>
891  static inline auto fetch(const SQLTypeInfo& geo_ti,
892  const ResultSet::GeoReturnType return_type,
893  T&&... vals) {
894  constexpr int num_vals = sizeof...(vals);
895  static_assert(
896  num_vals % 2 == 0,
897  "Must have consistent pointer/size pairs for lazy fetch of geo target values.");
898  const auto vals_vector = make_vals_vector(std::make_index_sequence<num_vals / 2>{},
899  std::make_tuple(vals...));
900  std::array<VarlenDatumPtr, num_vals / 2> ad_arr;
901  size_t ctr = 0;
902  for (const auto& col_pair : vals_vector) {
903  ad_arr[ctr] = lazy_fetch_chunk(col_pair.first, col_pair.second);
904  // Regular chunk iterator used to fetch this datum sets the right nullness.
905  // That includes the fixlen bounds array.
906  // However it may incorrectly set it for the POINT coord array datum
907  // if 1st byte happened to hold NULL_ARRAY_TINYINT. One should either use
908  // the specialized iterator for POINT coords or rely on regular iterator +
909  // reset + recheck, which is what is done below.
910  auto is_point = (geo_ti.get_type() == kPOINT && ctr == 0);
911  if (is_point) {
912  // Resetting POINT coords array nullness here
913  ad_arr[ctr]->is_null = false;
914  }
915  if (!geo_ti.get_notnull()) {
916  // Recheck and set nullness
917  if (ad_arr[ctr]->length == 0 || ad_arr[ctr]->pointer == NULL ||
918  (is_point &&
919  is_null_point(geo_ti, ad_arr[ctr]->pointer, ad_arr[ctr]->length))) {
920  ad_arr[ctr]->is_null = true;
921  }
922  }
923  ctr++;
924  }
925  return ad_arr;
926  }
927 };
928 
929 inline std::unique_ptr<ArrayDatum> fetch_data_from_gpu(int64_t varlen_ptr,
930  const int64_t length,
931  Data_Namespace::DataMgr* data_mgr,
932  const int device_id) {
933  auto cpu_buf =
934  std::shared_ptr<int8_t>(new int8_t[length], std::default_delete<int8_t[]>());
935  auto allocator = std::make_unique<CudaAllocator>(
936  data_mgr, device_id, getQueryEngineCudaStreamForDevice(device_id));
937  allocator->copyFromDevice(cpu_buf.get(), reinterpret_cast<int8_t*>(varlen_ptr), length);
938  // Just fetching the data from gpu, not checking geo nullness
939  return std::make_unique<ArrayDatum>(length, cpu_buf, false);
940 }
941 
943  static inline auto yieldGpuPtrFetcher() {
944  return [](const int64_t ptr, const int64_t length) -> VarlenDatumPtr {
945  // Just fetching the data from gpu, not checking geo nullness
946  return std::make_unique<VarlenDatum>(length, reinterpret_cast<int8_t*>(ptr), false);
947  };
948  }
949 
950  static inline auto yieldGpuDatumFetcher(Data_Namespace::DataMgr* data_mgr_ptr,
951  const int device_id) {
952  return [data_mgr_ptr, device_id](const int64_t ptr,
953  const int64_t length) -> VarlenDatumPtr {
954  return fetch_data_from_gpu(ptr, length, data_mgr_ptr, device_id);
955  };
956  }
957 
958  static inline auto yieldCpuDatumFetcher() {
959  return [](const int64_t ptr, const int64_t length) -> VarlenDatumPtr {
960  // Just fetching the data from gpu, not checking geo nullness
961  return std::make_unique<VarlenDatum>(length, reinterpret_cast<int8_t*>(ptr), false);
962  };
963  }
964 
965  template <typename... T>
966  static inline auto fetch(const SQLTypeInfo& geo_ti,
967  const ResultSet::GeoReturnType return_type,
968  Data_Namespace::DataMgr* data_mgr,
969  const bool fetch_data_from_gpu,
970  const int device_id,
971  T&&... vals) {
972  auto ad_arr_generator = [&](auto datum_fetcher) {
973  constexpr int num_vals = sizeof...(vals);
974  static_assert(
975  num_vals % 2 == 0,
976  "Must have consistent pointer/size pairs for lazy fetch of geo target values.");
977  const auto vals_vector = std::vector<int64_t>{vals...};
978 
979  std::array<VarlenDatumPtr, num_vals / 2> ad_arr;
980  size_t ctr = 0;
981  for (size_t i = 0; i < vals_vector.size(); i += 2, ctr++) {
982  if (vals_vector[i] == 0) {
983  // projected null
984  CHECK(!geo_ti.get_notnull());
985  ad_arr[ctr] = std::make_unique<ArrayDatum>(0, nullptr, true);
986  continue;
987  }
988  ad_arr[ctr] = datum_fetcher(vals_vector[i], vals_vector[i + 1]);
989  // All fetched datums come in with is_null set to false
990  if (!geo_ti.get_notnull()) {
991  bool is_null = false;
992  // Now need to set the nullness
993  if (ad_arr[ctr]->length == 0 || ad_arr[ctr]->pointer == NULL) {
994  is_null = true;
995  } else if (geo_ti.get_type() == kPOINT && ctr == 0 &&
996  is_null_point(geo_ti, ad_arr[ctr]->pointer, ad_arr[ctr]->length)) {
997  is_null = true; // recognizes compressed and uncompressed points
998  } else if (ad_arr[ctr]->length == 4 * sizeof(double)) {
999  // Bounds
1000  auto dti = SQLTypeInfo(kARRAY, 0, 0, false, kENCODING_NONE, 0, kDOUBLE);
1001  is_null = dti.is_null_fixlen_array(ad_arr[ctr]->pointer, ad_arr[ctr]->length);
1002  }
1003  ad_arr[ctr]->is_null = is_null;
1004  }
1005  }
1006  return ad_arr;
1007  };
1008 
1009  if (fetch_data_from_gpu) {
1011  return ad_arr_generator(yieldGpuPtrFetcher());
1012  } else {
1013  return ad_arr_generator(yieldGpuDatumFetcher(data_mgr, device_id));
1014  }
1015  } else {
1016  return ad_arr_generator(yieldCpuDatumFetcher());
1017  }
1018  }
1019 };
1020 
1021 template <SQLTypes GEO_SOURCE_TYPE, typename GeoTargetFetcher>
1023  template <typename... T>
1024  static inline TargetValue build(const SQLTypeInfo& geo_ti,
1025  const ResultSet::GeoReturnType return_type,
1026  T&&... vals) {
1027  auto ad_arr = GeoTargetFetcher::fetch(geo_ti, return_type, std::forward<T>(vals)...);
1028  static_assert(std::tuple_size<decltype(ad_arr)>::value > 0,
1029  "ArrayDatum array for Geo Target must contain at least one value.");
1030 
1031  // Fetcher sets the geo nullness based on geo typeinfo's notnull, type and
1032  // compression. Serializers will generate appropriate NULL geo where necessary.
1033  switch (return_type) {
1035  if (!geo_ti.get_notnull() && ad_arr[0]->is_null) {
1036  return GeoTargetValue();
1037  }
1039  GEO_SOURCE_TYPE>::GeoSerializerType::serialize(geo_ti,
1040  ad_arr);
1041  }
1043  if (!geo_ti.get_notnull() && ad_arr[0]->is_null) {
1044  // Generating NULL wkt string to represent NULL geo
1045  return NullableString(nullptr);
1046  }
1048  GEO_SOURCE_TYPE>::GeoSerializerType::serialize(geo_ti,
1049  ad_arr);
1050  }
1053  if (!geo_ti.get_notnull() && ad_arr[0]->is_null) {
1054  // NULL geo
1055  // Pass along null datum, instead of an empty/null GeoTargetValuePtr
1056  // return GeoTargetValuePtr();
1057  }
1059  GEO_SOURCE_TYPE>::GeoSerializerType::serialize(geo_ti,
1060  ad_arr);
1061  }
1062  default: {
1063  UNREACHABLE();
1064  return TargetValue(nullptr);
1065  }
1066  }
1067  }
1068 };
1069 
1070 template <typename T>
1071 inline std::pair<int64_t, int64_t> get_frag_id_and_local_idx(
1072  const std::vector<std::vector<T>>& frag_offsets,
1073  const size_t tab_or_col_idx,
1074  const int64_t global_idx) {
1075  CHECK_GE(global_idx, int64_t(0));
1076  for (int64_t frag_id = frag_offsets.size() - 1; frag_id > 0; --frag_id) {
1077  CHECK_LT(tab_or_col_idx, frag_offsets[frag_id].size());
1078  const auto frag_off = static_cast<int64_t>(frag_offsets[frag_id][tab_or_col_idx]);
1079  if (frag_off < global_idx) {
1080  return {frag_id, global_idx - frag_off};
1081  }
1082  }
1083  return {-1, -1};
1084 }
1085 
1086 } // namespace
1087 
1088 // clang-format off
1089 // formatted by clang-format 14.0.6
1091  bool const translate_strings,
1092  int64_t const val) const {
1093  if (ti.is_string()) {
1095  return makeStringTargetValue(ti, translate_strings, val);
1096  } else {
1097  return ti.is_any<kDOUBLE>() ? ScalarTargetValue(shared::bit_cast<double>(val))
1098  : ti.is_any<kFLOAT>() ? ScalarTargetValue(shared::bit_cast<float>(val))
1099  : ScalarTargetValue(val);
1100  }
1101 }
1102 
1104  bool const translate_strings) {
1107  : ti.is_string() ? translate_strings
1108  ? ScalarTargetValue(NullableString(nullptr))
1109  : ScalarTargetValue(static_cast<int64_t>(NULL_INT))
1111 }
1112 
1114  int64_t const lhs,
1115  int64_t const rhs) const {
1116  if (ti.is_string()) {
1118  return getString(ti, lhs) < getString(ti, rhs);
1119  } else {
1120  return ti.is_any<kDOUBLE>()
1121  ? shared::bit_cast<double>(lhs) < shared::bit_cast<double>(rhs)
1122  : ti.is_any<kFLOAT>()
1123  ? shared::bit_cast<float>(lhs) < shared::bit_cast<float>(rhs)
1124  : lhs < rhs;
1125  }
1126 }
1127 
1129  bool const translate_strings,
1130  int64_t const ival) {
1131  return ti.is_any<kDOUBLE>() ? shared::bit_cast<double>(ival) == NULL_DOUBLE
1132  : ti.is_any<kFLOAT>() ? shared::bit_cast<float>(ival) == NULL_FLOAT
1133  : ti.is_string() ? translate_strings ? ival == NULL_INT : ival == 0
1134  : ival == inline_int_null_val(ti);
1135 }
1136 // clang-format on
1137 
1138 const std::vector<const int8_t*>& ResultSet::getColumnFrag(const size_t storage_idx,
1139  const size_t col_logical_idx,
1140  int64_t& global_idx) const {
1141  CHECK_LT(static_cast<size_t>(storage_idx), col_buffers_.size());
1142  if (col_buffers_[storage_idx].size() > 1) {
1143  int64_t frag_id = 0;
1144  int64_t local_idx = global_idx;
1145  if (consistent_frag_sizes_[storage_idx][col_logical_idx] != -1) {
1146  frag_id = global_idx / consistent_frag_sizes_[storage_idx][col_logical_idx];
1147  local_idx = global_idx % consistent_frag_sizes_[storage_idx][col_logical_idx];
1148  } else {
1149  std::tie(frag_id, local_idx) = get_frag_id_and_local_idx(
1150  frag_offsets_[storage_idx], col_logical_idx, global_idx);
1151  CHECK_LE(local_idx, global_idx);
1152  }
1153  CHECK_GE(frag_id, int64_t(0));
1154  CHECK_LT(static_cast<size_t>(frag_id), col_buffers_[storage_idx].size());
1155  global_idx = local_idx;
1156  return col_buffers_[storage_idx][frag_id];
1157  } else {
1158  CHECK_EQ(size_t(1), col_buffers_[storage_idx].size());
1159  return col_buffers_[storage_idx][0];
1160  }
1161 }
1162 
1163 const VarlenOutputInfo* ResultSet::getVarlenOutputInfo(const size_t entry_idx) const {
1164  auto storage_lookup_result = findStorage(entry_idx);
1165  CHECK(storage_lookup_result.storage_ptr);
1166  return storage_lookup_result.storage_ptr->getVarlenOutputInfo();
1167 }
1168 
1173 void ResultSet::copyColumnIntoBuffer(const size_t column_idx,
1174  int8_t* output_buffer,
1175  const size_t output_buffer_size) const {
1177  CHECK_LT(column_idx, query_mem_desc_.getSlotCount());
1178  CHECK(output_buffer_size > 0);
1179  CHECK(output_buffer);
1180  const auto column_width_size = query_mem_desc_.getPaddedSlotWidthBytes(column_idx);
1181  size_t out_buff_offset = 0;
1182 
1183  // the main storage:
1184  const size_t crt_storage_row_count = storage_->query_mem_desc_.getEntryCount();
1185  const size_t crt_buffer_size = crt_storage_row_count * column_width_size;
1186  const size_t column_offset = storage_->query_mem_desc_.getColOffInBytes(column_idx);
1187  const int8_t* storage_buffer = storage_->getUnderlyingBuffer() + column_offset;
1188  CHECK(crt_buffer_size <= output_buffer_size);
1189  std::memcpy(output_buffer, storage_buffer, crt_buffer_size);
1190 
1191  out_buff_offset += crt_buffer_size;
1192 
1193  // the appended storages:
1194  for (size_t i = 0; i < appended_storage_.size(); i++) {
1195  const size_t crt_storage_row_count =
1196  appended_storage_[i]->query_mem_desc_.getEntryCount();
1197  if (crt_storage_row_count == 0) {
1198  // skip an empty appended storage
1199  continue;
1200  }
1201  CHECK_LT(out_buff_offset, output_buffer_size);
1202  const size_t crt_buffer_size = crt_storage_row_count * column_width_size;
1203  const size_t column_offset =
1204  appended_storage_[i]->query_mem_desc_.getColOffInBytes(column_idx);
1205  const int8_t* storage_buffer =
1206  appended_storage_[i]->getUnderlyingBuffer() + column_offset;
1207  CHECK(out_buff_offset + crt_buffer_size <= output_buffer_size);
1208  std::memcpy(output_buffer + out_buff_offset, storage_buffer, crt_buffer_size);
1209 
1210  out_buff_offset += crt_buffer_size;
1211  }
1212 }
1213 
1214 template <typename ENTRY_TYPE, QueryDescriptionType QUERY_TYPE, bool COLUMNAR_FORMAT>
1215 ENTRY_TYPE ResultSet::getEntryAt(const size_t row_idx,
1216  const size_t target_idx,
1217  const size_t slot_idx) const {
1218  if constexpr (QUERY_TYPE == QueryDescriptionType::GroupByPerfectHash) { // NOLINT
1219  if constexpr (COLUMNAR_FORMAT) { // NOLINT
1220  return getColumnarPerfectHashEntryAt<ENTRY_TYPE>(row_idx, target_idx, slot_idx);
1221  } else {
1222  return getRowWisePerfectHashEntryAt<ENTRY_TYPE>(row_idx, target_idx, slot_idx);
1223  }
1224  } else if constexpr (QUERY_TYPE == QueryDescriptionType::GroupByBaselineHash) {
1225  if constexpr (COLUMNAR_FORMAT) { // NOLINT
1226  return getColumnarBaselineEntryAt<ENTRY_TYPE>(row_idx, target_idx, slot_idx);
1227  } else {
1228  return getRowWiseBaselineEntryAt<ENTRY_TYPE>(row_idx, target_idx, slot_idx);
1229  }
1230  } else {
1231  UNREACHABLE() << "Invalid query type is used";
1232  return 0;
1233  }
1234 }
1235 
1236 #define DEF_GET_ENTRY_AT(query_type, columnar_output) \
1237  template DATA_T ResultSet::getEntryAt<DATA_T, query_type, columnar_output>( \
1238  const size_t row_idx, const size_t target_idx, const size_t slot_idx) const;
1239 
1240 #define DATA_T int64_t
1244 DEF_GET_ENTRY_AT(QueryDescriptionType::GroupByBaselineHash, false)
1245 #undef DATA_T
1246 
1247 #define DATA_T int32_t
1249 DEF_GET_ENTRY_AT(QueryDescriptionType::GroupByPerfectHash, false)
1250 DEF_GET_ENTRY_AT(QueryDescriptionType::GroupByBaselineHash, true)
1251 DEF_GET_ENTRY_AT(QueryDescriptionType::GroupByBaselineHash, false)
1252 #undef DATA_T
1253 
1254 #define DATA_T int16_t
1256 DEF_GET_ENTRY_AT(QueryDescriptionType::GroupByPerfectHash, false)
1257 DEF_GET_ENTRY_AT(QueryDescriptionType::GroupByBaselineHash, true)
1258 DEF_GET_ENTRY_AT(QueryDescriptionType::GroupByBaselineHash, false)
1259 #undef DATA_T
1260 
1261 #define DATA_T int8_t
1263 DEF_GET_ENTRY_AT(QueryDescriptionType::GroupByPerfectHash, false)
1264 DEF_GET_ENTRY_AT(QueryDescriptionType::GroupByBaselineHash, true)
1265 DEF_GET_ENTRY_AT(QueryDescriptionType::GroupByBaselineHash, false)
1266 #undef DATA_T
1267 
1268 #define DATA_T float
1270 DEF_GET_ENTRY_AT(QueryDescriptionType::GroupByPerfectHash, false)
1271 DEF_GET_ENTRY_AT(QueryDescriptionType::GroupByBaselineHash, true)
1272 DEF_GET_ENTRY_AT(QueryDescriptionType::GroupByBaselineHash, false)
1273 #undef DATA_T
1274 
1275 #define DATA_T double
1277 DEF_GET_ENTRY_AT(QueryDescriptionType::GroupByPerfectHash, false)
1278 DEF_GET_ENTRY_AT(QueryDescriptionType::GroupByBaselineHash, true)
1279 DEF_GET_ENTRY_AT(QueryDescriptionType::GroupByBaselineHash, false)
1280 #undef DATA_T
1281 
1282 #undef DEF_GET_ENTRY_AT
1283 
1290 template <typename ENTRY_TYPE>
1291 ENTRY_TYPE ResultSet::getColumnarPerfectHashEntryAt(const size_t row_idx,
1292  const size_t target_idx,
1293  const size_t slot_idx) const {
1294  const size_t column_offset = storage_->query_mem_desc_.getColOffInBytes(slot_idx);
1295  const int8_t* storage_buffer = storage_->getUnderlyingBuffer() + column_offset;
1296  return reinterpret_cast<const ENTRY_TYPE*>(storage_buffer)[row_idx];
1297 }
1298 
1305 template <typename ENTRY_TYPE>
1306 ENTRY_TYPE ResultSet::getRowWisePerfectHashEntryAt(const size_t row_idx,
1307  const size_t target_idx,
1308  const size_t slot_idx) const {
1309  const size_t row_offset = storage_->query_mem_desc_.getRowSize() * row_idx;
1310  const size_t column_offset = storage_->query_mem_desc_.getColOffInBytes(slot_idx);
1311  const int8_t* storage_buffer =
1312  storage_->getUnderlyingBuffer() + row_offset + column_offset;
1313  return *reinterpret_cast<const ENTRY_TYPE*>(storage_buffer);
1314 }
1315 
1322 template <typename ENTRY_TYPE>
1323 ENTRY_TYPE ResultSet::getRowWiseBaselineEntryAt(const size_t row_idx,
1324  const size_t target_idx,
1325  const size_t slot_idx) const {
1326  CHECK_NE(storage_->query_mem_desc_.targetGroupbyIndicesSize(), size_t(0));
1327  const auto key_width = storage_->query_mem_desc_.getEffectiveKeyWidth();
1328  auto keys_ptr = row_ptr_rowwise(
1329  storage_->getUnderlyingBuffer(), storage_->query_mem_desc_, row_idx);
1330  const auto column_offset =
1331  (storage_->query_mem_desc_.getTargetGroupbyIndex(target_idx) < 0)
1332  ? storage_->query_mem_desc_.getColOffInBytes(slot_idx)
1333  : storage_->query_mem_desc_.getTargetGroupbyIndex(target_idx) * key_width;
1334  const auto storage_buffer = keys_ptr + column_offset;
1335  return *reinterpret_cast<const ENTRY_TYPE*>(storage_buffer);
1336 }
1337 
1344 template <typename ENTRY_TYPE>
1345 ENTRY_TYPE ResultSet::getColumnarBaselineEntryAt(const size_t row_idx,
1346  const size_t target_idx,
1347  const size_t slot_idx) const {
1348  CHECK_NE(storage_->query_mem_desc_.targetGroupbyIndicesSize(), size_t(0));
1349  const auto key_width = storage_->query_mem_desc_.getEffectiveKeyWidth();
1350  const auto column_offset =
1351  (storage_->query_mem_desc_.getTargetGroupbyIndex(target_idx) < 0)
1352  ? storage_->query_mem_desc_.getColOffInBytes(slot_idx)
1353  : storage_->query_mem_desc_.getTargetGroupbyIndex(target_idx) * key_width *
1354  storage_->query_mem_desc_.getEntryCount();
1355  const auto column_buffer = storage_->getUnderlyingBuffer() + column_offset;
1356  return reinterpret_cast<const ENTRY_TYPE*>(column_buffer)[row_idx];
1357 }
1358 
1359 // Interprets ptr1, ptr2 as the ptr and len pair used for variable length data.
1361  const int8_t compact_sz1,
1362  const int8_t* ptr2,
1363  const int8_t compact_sz2,
1364  const TargetInfo& target_info,
1365  const size_t target_logical_idx,
1366  const bool translate_strings,
1367  const size_t entry_buff_idx) const {
1368  auto varlen_ptr = read_int_from_buff(ptr1, compact_sz1);
1369  if (separate_varlen_storage_valid_ && !target_info.is_agg) {
1370  if (varlen_ptr < 0) {
1371  CHECK_EQ(-1, varlen_ptr);
1372  if (target_info.sql_type.get_type() == kARRAY) {
1373  return ArrayTargetValue(boost::optional<std::vector<ScalarTargetValue>>{});
1374  }
1375  return TargetValue(nullptr);
1376  }
1377  const auto storage_idx = getStorageIndex(entry_buff_idx);
1378  if (target_info.sql_type.is_string()) {
1379  CHECK(target_info.sql_type.get_compression() == kENCODING_NONE);
1380  CHECK_LT(storage_idx.first, serialized_varlen_buffer_.size());
1381  const auto& varlen_buffer_for_storage =
1382  serialized_varlen_buffer_[storage_idx.first];
1383  CHECK_LT(static_cast<size_t>(varlen_ptr), varlen_buffer_for_storage.size());
1384  return varlen_buffer_for_storage[varlen_ptr];
1385  } else if (target_info.sql_type.get_type() == kARRAY) {
1386  CHECK_LT(storage_idx.first, serialized_varlen_buffer_.size());
1387  const auto& varlen_buffer = serialized_varlen_buffer_[storage_idx.first];
1388  CHECK_LT(static_cast<size_t>(varlen_ptr), varlen_buffer.size());
1389 
1390  return build_array_target_value(
1391  target_info.sql_type,
1392  reinterpret_cast<const int8_t*>(varlen_buffer[varlen_ptr].data()),
1393  varlen_buffer[varlen_ptr].size(),
1394  translate_strings,
1396  catalog_);
1397  } else {
1398  CHECK(false);
1399  }
1400  }
1401  if (!lazy_fetch_info_.empty()) {
1402  CHECK_LT(target_logical_idx, lazy_fetch_info_.size());
1403  const auto& col_lazy_fetch = lazy_fetch_info_[target_logical_idx];
1404  if (col_lazy_fetch.is_lazily_fetched) {
1405  const auto storage_idx = getStorageIndex(entry_buff_idx);
1406  CHECK_LT(storage_idx.first, col_buffers_.size());
1407  auto& frag_col_buffers =
1408  getColumnFrag(storage_idx.first, target_logical_idx, varlen_ptr);
1409  bool is_end{false};
1410  auto col_buf = const_cast<int8_t*>(frag_col_buffers[col_lazy_fetch.local_col_id]);
1411  if (target_info.sql_type.is_string()) {
1412  VarlenDatum vd;
1414  reinterpret_cast<ChunkIter*>(col_buf), varlen_ptr, false, &vd, &is_end);
1415  CHECK(!is_end);
1416  if (vd.is_null) {
1417  return TargetValue(nullptr);
1418  }
1419  CHECK(vd.pointer);
1420  CHECK_GT(vd.length, 0u);
1421  std::string fetched_str(reinterpret_cast<char*>(vd.pointer), vd.length);
1422  return fetched_str;
1423  } else {
1424  CHECK(target_info.sql_type.is_array());
1425  ArrayDatum ad;
1426  if (FlatBufferManager::isFlatBuffer(col_buf)) {
1427  VarlenArray_get_nth(col_buf, varlen_ptr, &ad, &is_end);
1428  } else {
1430  reinterpret_cast<ChunkIter*>(col_buf), varlen_ptr, &ad, &is_end);
1431  }
1432  CHECK(!is_end);
1433  if (ad.is_null) {
1434  return ArrayTargetValue(boost::optional<std::vector<ScalarTargetValue>>{});
1435  }
1436  CHECK_GE(ad.length, 0u);
1437  if (ad.length > 0) {
1438  CHECK(ad.pointer);
1439  }
1440  return build_array_target_value(target_info.sql_type,
1441  ad.pointer,
1442  ad.length,
1443  translate_strings,
1445  catalog_);
1446  }
1447  }
1448  }
1449  if (!varlen_ptr) {
1450  if (target_info.sql_type.is_array()) {
1451  return ArrayTargetValue(boost::optional<std::vector<ScalarTargetValue>>{});
1452  }
1453  return TargetValue(nullptr);
1454  }
1455  auto length = read_int_from_buff(ptr2, compact_sz2);
1456  if (target_info.sql_type.is_array()) {
1457  const auto& elem_ti = target_info.sql_type.get_elem_type();
1458  length *= elem_ti.get_array_context_logical_size();
1459  }
1460  std::vector<int8_t> cpu_buffer;
1461  if (varlen_ptr && device_type_ == ExecutorDeviceType::GPU) {
1462  cpu_buffer.resize(length);
1463  const auto executor = query_mem_desc_.getExecutor();
1464  CHECK(executor);
1465  auto data_mgr = executor->getDataMgr();
1466  auto allocator = std::make_unique<CudaAllocator>(
1467  data_mgr, device_id_, getQueryEngineCudaStreamForDevice(device_id_));
1468 
1469  allocator->copyFromDevice(
1470  &cpu_buffer[0], reinterpret_cast<int8_t*>(varlen_ptr), length);
1471  varlen_ptr = reinterpret_cast<int64_t>(&cpu_buffer[0]);
1472  }
1473  if (target_info.sql_type.is_array()) {
1474  return build_array_target_value(target_info.sql_type,
1475  reinterpret_cast<const int8_t*>(varlen_ptr),
1476  length,
1477  translate_strings,
1479  catalog_);
1480  }
1481  return std::string(reinterpret_cast<char*>(varlen_ptr), length);
1482 }
1483 
1484 bool ResultSet::isGeoColOnGpu(const size_t col_idx) const {
1485  // This should match the logic in makeGeoTargetValue which ultimately calls
1486  // fetch_data_from_gpu when the geo column is on the device.
1487  // TODO(croot): somehow find a way to refactor this and makeGeoTargetValue to use a
1488  // utility function that handles this logic in one place
1489  CHECK_LT(col_idx, targets_.size());
1490  if (!IS_GEO(targets_[col_idx].sql_type.get_type())) {
1491  throw std::runtime_error("Column target at index " + std::to_string(col_idx) +
1492  " is not a geo column. It is of type " +
1493  targets_[col_idx].sql_type.get_type_name() + ".");
1494  }
1495 
1496  const auto& target_info = targets_[col_idx];
1497  if (separate_varlen_storage_valid_ && !target_info.is_agg) {
1498  return false;
1499  }
1500 
1501  if (!lazy_fetch_info_.empty()) {
1502  CHECK_LT(col_idx, lazy_fetch_info_.size());
1503  if (lazy_fetch_info_[col_idx].is_lazily_fetched) {
1504  return false;
1505  }
1506  }
1507 
1509 }
1510 
1511 // Reads a geo value from a series of ptrs to var len types
1512 // In Columnar format, geo_target_ptr is the geo column ptr (a pointer to the beginning
1513 // of that specific geo column) and should be appropriately adjusted with the
1514 // entry_buff_idx
1515 TargetValue ResultSet::makeGeoTargetValue(const int8_t* geo_target_ptr,
1516  const size_t slot_idx,
1517  const TargetInfo& target_info,
1518  const size_t target_logical_idx,
1519  const size_t entry_buff_idx) const {
1520  CHECK(target_info.sql_type.is_geometry());
1521 
1522  auto getNextTargetBufferRowWise = [&](const size_t slot_idx, const size_t range) {
1523  return geo_target_ptr + query_mem_desc_.getPaddedColWidthForRange(slot_idx, range);
1524  };
1525 
1526  auto getNextTargetBufferColWise = [&](const size_t slot_idx, const size_t range) {
1527  const auto storage_info = findStorage(entry_buff_idx);
1528  auto crt_geo_col_ptr = geo_target_ptr;
1529  for (size_t i = slot_idx; i < slot_idx + range; i++) {
1530  crt_geo_col_ptr = advance_to_next_columnar_target_buff(
1531  crt_geo_col_ptr, storage_info.storage_ptr->query_mem_desc_, i);
1532  }
1533  // adjusting the column pointer to represent a pointer to the geo target value
1534  return crt_geo_col_ptr +
1535  storage_info.fixedup_entry_idx *
1536  storage_info.storage_ptr->query_mem_desc_.getPaddedSlotWidthBytes(
1537  slot_idx + range);
1538  };
1539 
1540  auto getNextTargetBuffer = [&](const size_t slot_idx, const size_t range) {
1542  ? getNextTargetBufferColWise(slot_idx, range)
1543  : getNextTargetBufferRowWise(slot_idx, range);
1544  };
1545 
1546  auto getCoordsDataPtr = [&](const int8_t* geo_target_ptr) {
1547  return read_int_from_buff(getNextTargetBuffer(slot_idx, 0),
1549  };
1550 
1551  auto getCoordsLength = [&](const int8_t* geo_target_ptr) {
1552  return read_int_from_buff(getNextTargetBuffer(slot_idx, 1),
1554  };
1555 
1556  auto getRingSizesPtr = [&](const int8_t* geo_target_ptr) {
1557  return read_int_from_buff(getNextTargetBuffer(slot_idx, 2),
1559  };
1560 
1561  auto getRingSizesLength = [&](const int8_t* geo_target_ptr) {
1562  return read_int_from_buff(getNextTargetBuffer(slot_idx, 3),
1564  };
1565 
1566  auto getPolyRingsPtr = [&](const int8_t* geo_target_ptr) {
1567  return read_int_from_buff(getNextTargetBuffer(slot_idx, 4),
1569  };
1570 
1571  auto getPolyRingsLength = [&](const int8_t* geo_target_ptr) {
1572  return read_int_from_buff(getNextTargetBuffer(slot_idx, 5),
1574  };
1575 
1576  auto getFragColBuffers = [&]() -> decltype(auto) {
1577  const auto storage_idx = getStorageIndex(entry_buff_idx);
1578  CHECK_LT(storage_idx.first, col_buffers_.size());
1579  auto global_idx = getCoordsDataPtr(geo_target_ptr);
1580  return getColumnFrag(storage_idx.first, target_logical_idx, global_idx);
1581  };
1582 
1583  const bool is_gpu_fetch = device_type_ == ExecutorDeviceType::GPU;
1584 
1585  auto getDataMgr = [&]() {
1586  auto executor = query_mem_desc_.getExecutor();
1587  CHECK(executor);
1588  return executor->getDataMgr();
1589  };
1590 
1591  auto getSeparateVarlenStorage = [&]() -> decltype(auto) {
1592  const auto storage_idx = getStorageIndex(entry_buff_idx);
1593  CHECK_LT(storage_idx.first, serialized_varlen_buffer_.size());
1594  const auto& varlen_buffer = serialized_varlen_buffer_[storage_idx.first];
1595  return varlen_buffer;
1596  };
1597 
1598  if (separate_varlen_storage_valid_ && getCoordsDataPtr(geo_target_ptr) < 0) {
1599  CHECK_EQ(-1, getCoordsDataPtr(geo_target_ptr));
1600  return TargetValue(nullptr);
1601  }
1602 
1603  const ColumnLazyFetchInfo* col_lazy_fetch = nullptr;
1604  if (!lazy_fetch_info_.empty()) {
1605  CHECK_LT(target_logical_idx, lazy_fetch_info_.size());
1606  col_lazy_fetch = &lazy_fetch_info_[target_logical_idx];
1607  }
1608 
1609  switch (target_info.sql_type.get_type()) {
1610  case kPOINT: {
1611  if (query_mem_desc_.slotIsVarlenOutput(slot_idx)) {
1612  auto varlen_output_info = getVarlenOutputInfo(entry_buff_idx);
1613  CHECK(varlen_output_info);
1614  auto geo_data_ptr = read_int_from_buff(
1615  geo_target_ptr, query_mem_desc_.getPaddedSlotWidthBytes(slot_idx));
1616  auto cpu_data_ptr =
1617  reinterpret_cast<int64_t>(varlen_output_info->computeCpuOffset(geo_data_ptr));
1618  return GeoTargetValueBuilder<kPOINT, GeoQueryOutputFetchHandler>::build(
1619  target_info.sql_type,
1621  /*data_mgr=*/nullptr,
1622  /*is_gpu_fetch=*/false,
1623  device_id_,
1624  cpu_data_ptr,
1625  target_info.sql_type.get_compression() == kENCODING_GEOINT ? 8 : 16);
1626  } else if (separate_varlen_storage_valid_ && !target_info.is_agg) {
1627  const auto& varlen_buffer = getSeparateVarlenStorage();
1628  CHECK_LT(static_cast<size_t>(getCoordsDataPtr(geo_target_ptr)),
1629  varlen_buffer.size());
1630 
1631  return GeoTargetValueBuilder<kPOINT, GeoQueryOutputFetchHandler>::build(
1632  target_info.sql_type,
1634  nullptr,
1635  false,
1636  device_id_,
1637  reinterpret_cast<int64_t>(
1638  varlen_buffer[getCoordsDataPtr(geo_target_ptr)].data()),
1639  static_cast<int64_t>(varlen_buffer[getCoordsDataPtr(geo_target_ptr)].size()));
1640  } else if (col_lazy_fetch && col_lazy_fetch->is_lazily_fetched) {
1641  const auto& frag_col_buffers = getFragColBuffers();
1642  return GeoTargetValueBuilder<kPOINT, GeoLazyFetchHandler>::build(
1643  target_info.sql_type,
1645  frag_col_buffers[col_lazy_fetch->local_col_id],
1646  getCoordsDataPtr(geo_target_ptr));
1647  } else {
1648  return GeoTargetValueBuilder<kPOINT, GeoQueryOutputFetchHandler>::build(
1649  target_info.sql_type,
1651  is_gpu_fetch ? getDataMgr() : nullptr,
1652  is_gpu_fetch,
1653  device_id_,
1654  getCoordsDataPtr(geo_target_ptr),
1655  getCoordsLength(geo_target_ptr));
1656  }
1657  break;
1658  }
1659  case kMULTIPOINT: {
1660  if (separate_varlen_storage_valid_ && !target_info.is_agg) {
1661  const auto& varlen_buffer = getSeparateVarlenStorage();
1662  CHECK_LT(static_cast<size_t>(getCoordsDataPtr(geo_target_ptr)),
1663  varlen_buffer.size());
1664 
1665  return GeoTargetValueBuilder<kMULTIPOINT, GeoQueryOutputFetchHandler>::build(
1666  target_info.sql_type,
1668  nullptr,
1669  false,
1670  device_id_,
1671  reinterpret_cast<int64_t>(
1672  varlen_buffer[getCoordsDataPtr(geo_target_ptr)].data()),
1673  static_cast<int64_t>(varlen_buffer[getCoordsDataPtr(geo_target_ptr)].size()));
1674  } else if (col_lazy_fetch && col_lazy_fetch->is_lazily_fetched) {
1675  const auto& frag_col_buffers = getFragColBuffers();
1676  return GeoTargetValueBuilder<kMULTIPOINT, GeoLazyFetchHandler>::build(
1677  target_info.sql_type,
1679  frag_col_buffers[col_lazy_fetch->local_col_id],
1680  getCoordsDataPtr(geo_target_ptr));
1681  } else {
1682  return GeoTargetValueBuilder<kMULTIPOINT, GeoQueryOutputFetchHandler>::build(
1683  target_info.sql_type,
1685  is_gpu_fetch ? getDataMgr() : nullptr,
1686  is_gpu_fetch,
1687  device_id_,
1688  getCoordsDataPtr(geo_target_ptr),
1689  getCoordsLength(geo_target_ptr));
1690  }
1691  break;
1692  }
1693  case kLINESTRING: {
1694  if (separate_varlen_storage_valid_ && !target_info.is_agg) {
1695  const auto& varlen_buffer = getSeparateVarlenStorage();
1696  CHECK_LT(static_cast<size_t>(getCoordsDataPtr(geo_target_ptr)),
1697  varlen_buffer.size());
1698 
1699  return GeoTargetValueBuilder<kLINESTRING, GeoQueryOutputFetchHandler>::build(
1700  target_info.sql_type,
1702  nullptr,
1703  false,
1704  device_id_,
1705  reinterpret_cast<int64_t>(
1706  varlen_buffer[getCoordsDataPtr(geo_target_ptr)].data()),
1707  static_cast<int64_t>(varlen_buffer[getCoordsDataPtr(geo_target_ptr)].size()));
1708  } else if (col_lazy_fetch && col_lazy_fetch->is_lazily_fetched) {
1709  const auto& frag_col_buffers = getFragColBuffers();
1710  return GeoTargetValueBuilder<kLINESTRING, GeoLazyFetchHandler>::build(
1711  target_info.sql_type,
1713  frag_col_buffers[col_lazy_fetch->local_col_id],
1714  getCoordsDataPtr(geo_target_ptr));
1715  } else {
1716  return GeoTargetValueBuilder<kLINESTRING, GeoQueryOutputFetchHandler>::build(
1717  target_info.sql_type,
1719  is_gpu_fetch ? getDataMgr() : nullptr,
1720  is_gpu_fetch,
1721  device_id_,
1722  getCoordsDataPtr(geo_target_ptr),
1723  getCoordsLength(geo_target_ptr));
1724  }
1725  break;
1726  }
1727  case kMULTILINESTRING: {
1728  if (separate_varlen_storage_valid_ && !target_info.is_agg) {
1729  const auto& varlen_buffer = getSeparateVarlenStorage();
1730  CHECK_LT(static_cast<size_t>(getCoordsDataPtr(geo_target_ptr) + 1),
1731  varlen_buffer.size());
1732 
1733  return GeoTargetValueBuilder<kMULTILINESTRING, GeoQueryOutputFetchHandler>::build(
1734  target_info.sql_type,
1736  nullptr,
1737  false,
1738  device_id_,
1739  reinterpret_cast<int64_t>(
1740  varlen_buffer[getCoordsDataPtr(geo_target_ptr)].data()),
1741  static_cast<int64_t>(varlen_buffer[getCoordsDataPtr(geo_target_ptr)].size()),
1742  reinterpret_cast<int64_t>(
1743  varlen_buffer[getCoordsDataPtr(geo_target_ptr) + 1].data()),
1744  static_cast<int64_t>(
1745  varlen_buffer[getCoordsDataPtr(geo_target_ptr) + 1].size()));
1746  } else if (col_lazy_fetch && col_lazy_fetch->is_lazily_fetched) {
1747  const auto& frag_col_buffers = getFragColBuffers();
1748 
1749  return GeoTargetValueBuilder<kMULTILINESTRING, GeoLazyFetchHandler>::build(
1750  target_info.sql_type,
1752  frag_col_buffers[col_lazy_fetch->local_col_id],
1753  getCoordsDataPtr(geo_target_ptr),
1754  frag_col_buffers[col_lazy_fetch->local_col_id + 1],
1755  getCoordsDataPtr(geo_target_ptr));
1756  } else {
1757  return GeoTargetValueBuilder<kMULTILINESTRING, GeoQueryOutputFetchHandler>::build(
1758  target_info.sql_type,
1760  is_gpu_fetch ? getDataMgr() : nullptr,
1761  is_gpu_fetch,
1762  device_id_,
1763  getCoordsDataPtr(geo_target_ptr),
1764  getCoordsLength(geo_target_ptr),
1765  getRingSizesPtr(geo_target_ptr),
1766  getRingSizesLength(geo_target_ptr) * 4);
1767  }
1768  break;
1769  }
1770  case kPOLYGON: {
1771  if (separate_varlen_storage_valid_ && !target_info.is_agg) {
1772  const auto& varlen_buffer = getSeparateVarlenStorage();
1773  CHECK_LT(static_cast<size_t>(getCoordsDataPtr(geo_target_ptr) + 1),
1774  varlen_buffer.size());
1775 
1776  return GeoTargetValueBuilder<kPOLYGON, GeoQueryOutputFetchHandler>::build(
1777  target_info.sql_type,
1779  nullptr,
1780  false,
1781  device_id_,
1782  reinterpret_cast<int64_t>(
1783  varlen_buffer[getCoordsDataPtr(geo_target_ptr)].data()),
1784  static_cast<int64_t>(varlen_buffer[getCoordsDataPtr(geo_target_ptr)].size()),
1785  reinterpret_cast<int64_t>(
1786  varlen_buffer[getCoordsDataPtr(geo_target_ptr) + 1].data()),
1787  static_cast<int64_t>(
1788  varlen_buffer[getCoordsDataPtr(geo_target_ptr) + 1].size()));
1789  } else if (col_lazy_fetch && col_lazy_fetch->is_lazily_fetched) {
1790  const auto& frag_col_buffers = getFragColBuffers();
1791 
1792  return GeoTargetValueBuilder<kPOLYGON, GeoLazyFetchHandler>::build(
1793  target_info.sql_type,
1795  frag_col_buffers[col_lazy_fetch->local_col_id],
1796  getCoordsDataPtr(geo_target_ptr),
1797  frag_col_buffers[col_lazy_fetch->local_col_id + 1],
1798  getCoordsDataPtr(geo_target_ptr));
1799  } else {
1800  return GeoTargetValueBuilder<kPOLYGON, GeoQueryOutputFetchHandler>::build(
1801  target_info.sql_type,
1803  is_gpu_fetch ? getDataMgr() : nullptr,
1804  is_gpu_fetch,
1805  device_id_,
1806  getCoordsDataPtr(geo_target_ptr),
1807  getCoordsLength(geo_target_ptr),
1808  getRingSizesPtr(geo_target_ptr),
1809  getRingSizesLength(geo_target_ptr) * 4);
1810  }
1811  break;
1812  }
1813  case kMULTIPOLYGON: {
1814  if (separate_varlen_storage_valid_ && !target_info.is_agg) {
1815  const auto& varlen_buffer = getSeparateVarlenStorage();
1816  CHECK_LT(static_cast<size_t>(getCoordsDataPtr(geo_target_ptr) + 2),
1817  varlen_buffer.size());
1818 
1819  return GeoTargetValueBuilder<kMULTIPOLYGON, GeoQueryOutputFetchHandler>::build(
1820  target_info.sql_type,
1822  nullptr,
1823  false,
1824  device_id_,
1825  reinterpret_cast<int64_t>(
1826  varlen_buffer[getCoordsDataPtr(geo_target_ptr)].data()),
1827  static_cast<int64_t>(varlen_buffer[getCoordsDataPtr(geo_target_ptr)].size()),
1828  reinterpret_cast<int64_t>(
1829  varlen_buffer[getCoordsDataPtr(geo_target_ptr) + 1].data()),
1830  static_cast<int64_t>(
1831  varlen_buffer[getCoordsDataPtr(geo_target_ptr) + 1].size()),
1832  reinterpret_cast<int64_t>(
1833  varlen_buffer[getCoordsDataPtr(geo_target_ptr) + 2].data()),
1834  static_cast<int64_t>(
1835  varlen_buffer[getCoordsDataPtr(geo_target_ptr) + 2].size()));
1836  } else if (col_lazy_fetch && col_lazy_fetch->is_lazily_fetched) {
1837  const auto& frag_col_buffers = getFragColBuffers();
1838 
1839  return GeoTargetValueBuilder<kMULTIPOLYGON, GeoLazyFetchHandler>::build(
1840  target_info.sql_type,
1842  frag_col_buffers[col_lazy_fetch->local_col_id],
1843  getCoordsDataPtr(geo_target_ptr),
1844  frag_col_buffers[col_lazy_fetch->local_col_id + 1],
1845  getCoordsDataPtr(geo_target_ptr),
1846  frag_col_buffers[col_lazy_fetch->local_col_id + 2],
1847  getCoordsDataPtr(geo_target_ptr));
1848  } else {
1849  return GeoTargetValueBuilder<kMULTIPOLYGON, GeoQueryOutputFetchHandler>::build(
1850  target_info.sql_type,
1852  is_gpu_fetch ? getDataMgr() : nullptr,
1853  is_gpu_fetch,
1854  device_id_,
1855  getCoordsDataPtr(geo_target_ptr),
1856  getCoordsLength(geo_target_ptr),
1857  getRingSizesPtr(geo_target_ptr),
1858  getRingSizesLength(geo_target_ptr) * 4,
1859  getPolyRingsPtr(geo_target_ptr),
1860  getPolyRingsLength(geo_target_ptr) * 4);
1861  }
1862  break;
1863  }
1864  default:
1865  throw std::runtime_error("Unknown Geometry type encountered: " +
1866  target_info.sql_type.get_type_name());
1867  }
1868  UNREACHABLE();
1869  return TargetValue(nullptr);
1870 }
1871 
1872 std::string ResultSet::getString(SQLTypeInfo const& ti, int64_t const ival) const {
1873  StringDictionaryProxy* sdp;
1874  if (ti.get_comp_param()) {
1875  constexpr bool with_generation = false;
1876  sdp = catalog_ ? row_set_mem_owner_->getOrAddStringDictProxy(
1877  ti.get_comp_param(), with_generation, catalog_)
1878  : row_set_mem_owner_->getStringDictProxy(
1879  ti.get_comp_param()); // unit tests bypass the catalog
1880  } else {
1881  sdp = row_set_mem_owner_->getLiteralStringDictProxy();
1882  }
1883  return sdp->getString(ival);
1884 }
1885 
1887  bool const translate_strings,
1888  int64_t const ival) const {
1889  if (translate_strings) {
1890  if (static_cast<int32_t>(ival) == NULL_INT) { // TODO(alex): this isn't nice, fix it
1891  return NullableString(nullptr);
1892  } else {
1893  return NullableString(getString(chosen_type, ival));
1894  }
1895  } else {
1896  return static_cast<int64_t>(static_cast<int32_t>(ival));
1897  }
1898 }
1899 
1900 // Reads an integer or a float from ptr based on the type and the byte width.
1902  const int8_t compact_sz,
1903  const TargetInfo& target_info,
1904  const size_t target_logical_idx,
1905  const bool translate_strings,
1906  const bool decimal_to_double,
1907  const size_t entry_buff_idx) const {
1908  auto actual_compact_sz = compact_sz;
1909  const auto& type_info = target_info.sql_type;
1910  if (type_info.get_type() == kFLOAT && !query_mem_desc_.forceFourByteFloat()) {
1912  actual_compact_sz = sizeof(float);
1913  } else {
1914  actual_compact_sz = sizeof(double);
1915  }
1916  if (target_info.is_agg &&
1917  (target_info.agg_kind == kAVG || target_info.agg_kind == kSUM ||
1918  target_info.agg_kind == kSUM_IF || target_info.agg_kind == kMIN ||
1919  target_info.agg_kind == kMAX || target_info.agg_kind == kSINGLE_VALUE)) {
1920  // The above listed aggregates use two floats in a single 8-byte slot. Set the
1921  // padded size to 4 bytes to properly read each value.
1922  actual_compact_sz = sizeof(float);
1923  }
1924  }
1925  if (get_compact_type(target_info).is_date_in_days()) {
1926  // Dates encoded in days are converted to 8 byte values on read.
1927  actual_compact_sz = sizeof(int64_t);
1928  }
1929 
1930  // String dictionary keys are read as 32-bit values regardless of encoding
1931  if (type_info.is_string() && type_info.get_compression() == kENCODING_DICT &&
1932  type_info.get_comp_param()) {
1933  actual_compact_sz = sizeof(int32_t);
1934  }
1935 
1936  auto ival = read_int_from_buff(ptr, actual_compact_sz);
1937  const auto& chosen_type = get_compact_type(target_info);
1938  if (!lazy_fetch_info_.empty()) {
1939  CHECK_LT(target_logical_idx, lazy_fetch_info_.size());
1940  const auto& col_lazy_fetch = lazy_fetch_info_[target_logical_idx];
1941  if (col_lazy_fetch.is_lazily_fetched) {
1942  CHECK_GE(ival, 0);
1943  const auto storage_idx = getStorageIndex(entry_buff_idx);
1944  CHECK_LT(storage_idx.first, col_buffers_.size());
1945  auto& frag_col_buffers = getColumnFrag(storage_idx.first, target_logical_idx, ival);
1946  CHECK_LT(size_t(col_lazy_fetch.local_col_id), frag_col_buffers.size());
1947  ival = result_set::lazy_decode(
1948  col_lazy_fetch, frag_col_buffers[col_lazy_fetch.local_col_id], ival);
1949  if (chosen_type.is_fp()) {
1950  const auto dval = *reinterpret_cast<const double*>(may_alias_ptr(&ival));
1951  if (chosen_type.get_type() == kFLOAT) {
1952  return ScalarTargetValue(static_cast<float>(dval));
1953  } else {
1954  return ScalarTargetValue(dval);
1955  }
1956  }
1957  }
1958  }
1959  if (target_info.agg_kind == kMODE) {
1960  if (!isNullIval(chosen_type, translate_strings, ival)) {
1961  auto const* const* const agg_mode = reinterpret_cast<AggMode const* const*>(ptr);
1962  if (std::optional<int64_t> const mode = (*agg_mode)->mode()) {
1963  return convertToScalarTargetValue(chosen_type, translate_strings, *mode);
1964  }
1965  }
1966  return nullScalarTargetValue(chosen_type, translate_strings);
1967  }
1968  if (chosen_type.is_fp()) {
1969  if (target_info.agg_kind == kAPPROX_QUANTILE) {
1970  return *reinterpret_cast<double const*>(ptr) == NULL_DOUBLE
1971  ? NULL_DOUBLE // sql_validate / just_validate
1972  : calculateQuantile(*reinterpret_cast<quantile::TDigest* const*>(ptr));
1973  }
1974  switch (actual_compact_sz) {
1975  case 8: {
1976  const auto dval = *reinterpret_cast<const double*>(ptr);
1977  return chosen_type.get_type() == kFLOAT
1978  ? ScalarTargetValue(static_cast<const float>(dval))
1979  : ScalarTargetValue(dval);
1980  }
1981  case 4: {
1982  CHECK_EQ(kFLOAT, chosen_type.get_type());
1983  return *reinterpret_cast<const float*>(ptr);
1984  }
1985  default:
1986  CHECK(false);
1987  }
1988  }
1989  if (chosen_type.is_integer() || chosen_type.is_boolean() || chosen_type.is_time() ||
1990  chosen_type.is_timeinterval()) {
1991  if (is_distinct_target(target_info)) {
1993  ival, query_mem_desc_.getCountDistinctDescriptor(target_logical_idx)));
1994  }
1995  // TODO(alex): remove int_resize_cast, make read_int_from_buff return the
1996  // right type instead
1997  if (inline_int_null_val(chosen_type) ==
1998  int_resize_cast(ival, chosen_type.get_logical_size())) {
1999  return inline_int_null_val(type_info);
2000  }
2001  return ival;
2002  }
2003  if (chosen_type.is_string() && chosen_type.get_compression() == kENCODING_DICT) {
2004  return makeStringTargetValue(chosen_type, translate_strings, ival);
2005  }
2006  if (chosen_type.is_decimal()) {
2007  if (decimal_to_double) {
2008  if (target_info.is_agg &&
2009  (target_info.agg_kind == kAVG || target_info.agg_kind == kSUM ||
2010  target_info.agg_kind == kSUM_IF || target_info.agg_kind == kMIN ||
2011  target_info.agg_kind == kMAX) &&
2012  ival == inline_int_null_val(SQLTypeInfo(kBIGINT, false))) {
2013  return NULL_DOUBLE;
2014  }
2015  if (!chosen_type.get_notnull() &&
2016  ival ==
2017  inline_int_null_val(SQLTypeInfo(decimal_to_int_type(chosen_type), false))) {
2018  return NULL_DOUBLE;
2019  }
2020  return static_cast<double>(ival) / exp_to_scale(chosen_type.get_scale());
2021  }
2022  return ival;
2023  }
2024  CHECK(false);
2025  return TargetValue(int64_t(0));
2026 }
2027 
2028 // Gets the TargetValue stored at position local_entry_idx in the col1_ptr and col2_ptr
2029 // column buffers. The second column is only used for AVG.
2030 // the global_entry_idx is passed to makeTargetValue to be used for
2031 // final lazy fetch (if there's any).
2033  const int8_t* col_ptr,
2034  const int8_t* keys_ptr,
2036  const size_t local_entry_idx,
2037  const size_t global_entry_idx,
2038  const TargetInfo& target_info,
2039  const size_t target_logical_idx,
2040  const size_t slot_idx,
2041  const bool translate_strings,
2042  const bool decimal_to_double) const {
2044  const auto col1_ptr = col_ptr;
2045  const auto compact_sz1 = query_mem_desc.getPaddedSlotWidthBytes(slot_idx);
2046  const auto next_col_ptr =
2047  advance_to_next_columnar_target_buff(col1_ptr, query_mem_desc, slot_idx);
2048  const auto col2_ptr = ((target_info.is_agg && target_info.agg_kind == kAVG) ||
2049  is_real_str_or_array(target_info))
2050  ? next_col_ptr
2051  : nullptr;
2052  const auto compact_sz2 = ((target_info.is_agg && target_info.agg_kind == kAVG) ||
2053  is_real_str_or_array(target_info))
2054  ? query_mem_desc.getPaddedSlotWidthBytes(slot_idx + 1)
2055  : 0;
2056 
2057  // TODO(Saman): add required logics for count distinct
2058  // geospatial target values:
2059  if (target_info.sql_type.is_geometry()) {
2060  return makeGeoTargetValue(
2061  col1_ptr, slot_idx, target_info, target_logical_idx, global_entry_idx);
2062  }
2063 
2064  const auto ptr1 = columnar_elem_ptr(local_entry_idx, col1_ptr, compact_sz1);
2065  if (target_info.agg_kind == kAVG || is_real_str_or_array(target_info)) {
2066  CHECK(col2_ptr);
2067  CHECK(compact_sz2);
2068  const auto ptr2 = columnar_elem_ptr(local_entry_idx, col2_ptr, compact_sz2);
2069  return target_info.agg_kind == kAVG
2070  ? make_avg_target_value(ptr1, compact_sz1, ptr2, compact_sz2, target_info)
2071  : makeVarlenTargetValue(ptr1,
2072  compact_sz1,
2073  ptr2,
2074  compact_sz2,
2075  target_info,
2076  target_logical_idx,
2077  translate_strings,
2078  global_entry_idx);
2079  }
2081  query_mem_desc_.getTargetGroupbyIndex(target_logical_idx) < 0) {
2082  return makeTargetValue(ptr1,
2083  compact_sz1,
2084  target_info,
2085  target_logical_idx,
2086  translate_strings,
2087  decimal_to_double,
2088  global_entry_idx);
2089  }
2090  const auto key_width = query_mem_desc_.getEffectiveKeyWidth();
2091  const auto key_idx = query_mem_desc_.getTargetGroupbyIndex(target_logical_idx);
2092  CHECK_GE(key_idx, 0);
2093  auto key_col_ptr = keys_ptr + key_idx * query_mem_desc_.getEntryCount() * key_width;
2094  return makeTargetValue(columnar_elem_ptr(local_entry_idx, key_col_ptr, key_width),
2095  key_width,
2096  target_info,
2097  target_logical_idx,
2098  translate_strings,
2099  decimal_to_double,
2100  global_entry_idx);
2101 }
2102 
2103 // Gets the TargetValue stored in slot_idx (and slot_idx for AVG) of
2104 // rowwise_target_ptr.
2106  int8_t* rowwise_target_ptr,
2107  int8_t* keys_ptr,
2108  const size_t entry_buff_idx,
2109  const TargetInfo& target_info,
2110  const size_t target_logical_idx,
2111  const size_t slot_idx,
2112  const bool translate_strings,
2113  const bool decimal_to_double,
2114  const bool fixup_count_distinct_pointers) const {
2115  if (UNLIKELY(fixup_count_distinct_pointers)) {
2116  if (is_distinct_target(target_info)) {
2117  auto count_distinct_ptr_ptr = reinterpret_cast<int64_t*>(rowwise_target_ptr);
2118  const auto remote_ptr = *count_distinct_ptr_ptr;
2119  if (remote_ptr) {
2120  const auto ptr = storage_->mappedPtr(remote_ptr);
2121  if (ptr) {
2122  *count_distinct_ptr_ptr = ptr;
2123  } else {
2124  // need to create a zero filled buffer for this remote_ptr
2125  const auto& count_distinct_desc =
2126  query_mem_desc_.count_distinct_descriptors_[target_logical_idx];
2127  const auto bitmap_byte_sz = count_distinct_desc.sub_bitmap_count == 1
2128  ? count_distinct_desc.bitmapSizeBytes()
2129  : count_distinct_desc.bitmapPaddedSizeBytes();
2130  auto count_distinct_buffer = row_set_mem_owner_->allocateCountDistinctBuffer(
2131  bitmap_byte_sz, /*thread_idx=*/0);
2132  *count_distinct_ptr_ptr = reinterpret_cast<int64_t>(count_distinct_buffer);
2133  }
2134  }
2135  }
2136  return int64_t(0);
2137  }
2138  if (target_info.sql_type.is_geometry()) {
2139  return makeGeoTargetValue(
2140  rowwise_target_ptr, slot_idx, target_info, target_logical_idx, entry_buff_idx);
2141  }
2142 
2143  auto ptr1 = rowwise_target_ptr;
2144  int8_t compact_sz1 = query_mem_desc_.getPaddedSlotWidthBytes(slot_idx);
2146  !query_mem_desc_.hasKeylessHash() && !target_info.is_agg) {
2147  // Single column perfect hash group by can utilize one slot for both the key and the
2148  // target value if both values fit in 8 bytes. Use the target value actual size for
2149  // this case. If they don't, the target value should be 8 bytes, so we can still use
2150  // the actual size rather than the compact size.
2151  compact_sz1 = query_mem_desc_.getLogicalSlotWidthBytes(slot_idx);
2152  }
2153 
2154  // logic for deciding width of column
2155  if (target_info.agg_kind == kAVG || is_real_str_or_array(target_info)) {
2156  const auto ptr2 =
2157  rowwise_target_ptr + query_mem_desc_.getPaddedSlotWidthBytes(slot_idx);
2158  int8_t compact_sz2 = 0;
2159  // Skip reading the second slot if we have a none encoded string and are using
2160  // the none encoded strings buffer attached to ResultSetStorage
2162  (target_info.sql_type.is_array() ||
2163  (target_info.sql_type.is_string() &&
2164  target_info.sql_type.get_compression() == kENCODING_NONE)))) {
2165  compact_sz2 = query_mem_desc_.getPaddedSlotWidthBytes(slot_idx + 1);
2166  }
2167  if (separate_varlen_storage_valid_ && target_info.is_agg) {
2168  compact_sz2 = 8; // TODO(adb): is there a better way to do this?
2169  }
2170  CHECK(ptr2);
2171  return target_info.agg_kind == kAVG
2172  ? make_avg_target_value(ptr1, compact_sz1, ptr2, compact_sz2, target_info)
2173  : makeVarlenTargetValue(ptr1,
2174  compact_sz1,
2175  ptr2,
2176  compact_sz2,
2177  target_info,
2178  target_logical_idx,
2179  translate_strings,
2180  entry_buff_idx);
2181  }
2183  query_mem_desc_.getTargetGroupbyIndex(target_logical_idx) < 0) {
2184  return makeTargetValue(ptr1,
2185  compact_sz1,
2186  target_info,
2187  target_logical_idx,
2188  translate_strings,
2189  decimal_to_double,
2190  entry_buff_idx);
2191  }
2192  const auto key_width = query_mem_desc_.getEffectiveKeyWidth();
2193  ptr1 = keys_ptr + query_mem_desc_.getTargetGroupbyIndex(target_logical_idx) * key_width;
2194  return makeTargetValue(ptr1,
2195  key_width,
2196  target_info,
2197  target_logical_idx,
2198  translate_strings,
2199  decimal_to_double,
2200  entry_buff_idx);
2201 }
2202 
2203 // Returns true iff the entry at position entry_idx in buff contains a valid row.
2204 bool ResultSetStorage::isEmptyEntry(const size_t entry_idx, const int8_t* buff) const {
2207  return false;
2208  }
2210  return isEmptyEntryColumnar(entry_idx, buff);
2211  }
2216  CHECK_LT(static_cast<size_t>(query_mem_desc_.getTargetIdxForKey()),
2217  target_init_vals_.size());
2218  const auto rowwise_target_ptr = row_ptr_rowwise(buff, query_mem_desc_, entry_idx);
2219  const auto target_slot_off = result_set::get_byteoff_of_slot(
2221  return read_int_from_buff(rowwise_target_ptr + target_slot_off,
2224  target_init_vals_[query_mem_desc_.getTargetIdxForKey()];
2225  } else {
2226  const auto keys_ptr = row_ptr_rowwise(buff, query_mem_desc_, entry_idx);
2228  case 4:
2231  return *reinterpret_cast<const int32_t*>(keys_ptr) == EMPTY_KEY_32;
2232  case 8:
2233  return *reinterpret_cast<const int64_t*>(keys_ptr) == EMPTY_KEY_64;
2234  default:
2235  CHECK(false);
2236  return true;
2237  }
2238  }
2239 }
2240 
2241 /*
2242  * Returns true if the entry contain empty keys
2243  * This function should only be used with columnar format.
2244  */
2245 bool ResultSetStorage::isEmptyEntryColumnar(const size_t entry_idx,
2246  const int8_t* buff) const {
2250  return false;
2251  }
2253  // For table functions the entry count should always be set to the actual output size
2254  // (i.e. there are not empty entries), so just assume value is non-empty
2255  CHECK_LT(entry_idx, getEntryCount());
2256  return false;
2257  }
2262  CHECK_LT(static_cast<size_t>(query_mem_desc_.getTargetIdxForKey()),
2263  target_init_vals_.size());
2264  const auto col_buff = advance_col_buff_to_slot(
2266  const auto entry_buff =
2267  col_buff + entry_idx * query_mem_desc_.getPaddedSlotWidthBytes(
2269  return read_int_from_buff(entry_buff,
2272  target_init_vals_[query_mem_desc_.getTargetIdxForKey()];
2273  } else {
2274  // it's enough to find the first group key which is empty
2276  return reinterpret_cast<const int64_t*>(buff)[entry_idx] == EMPTY_KEY_64;
2277  } else {
2279  const auto target_buff = buff + query_mem_desc_.getPrependedGroupColOffInBytes(0);
2280  switch (query_mem_desc_.groupColWidth(0)) {
2281  case 8:
2282  return reinterpret_cast<const int64_t*>(target_buff)[entry_idx] == EMPTY_KEY_64;
2283  case 4:
2284  return reinterpret_cast<const int32_t*>(target_buff)[entry_idx] == EMPTY_KEY_32;
2285  case 2:
2286  return reinterpret_cast<const int16_t*>(target_buff)[entry_idx] == EMPTY_KEY_16;
2287  case 1:
2288  return reinterpret_cast<const int8_t*>(target_buff)[entry_idx] == EMPTY_KEY_8;
2289  default:
2290  CHECK(false);
2291  }
2292  }
2293  return false;
2294  }
2295  return false;
2296 }
2297 
2298 namespace {
2299 
2300 template <typename T>
2301 inline size_t make_bin_search(size_t l, size_t r, T&& is_empty_fn) {
2302  // Avoid search if there are no empty keys.
2303  if (!is_empty_fn(r - 1)) {
2304  return r;
2305  }
2306 
2307  --r;
2308  while (l != r) {
2309  size_t c = (l + r) / 2;
2310  if (is_empty_fn(c)) {
2311  r = c;
2312  } else {
2313  l = c + 1;
2314  }
2315  }
2316 
2317  return r;
2318 }
2319 
2320 } // namespace
2321 
2323  // Note that table function result sets should never use this path as the row count
2324  // can be known statically (as the output buffers do not contain empty entries)
2327 
2328  if (!query_mem_desc_.getEntryCount()) {
2329  return 0;
2330  }
2331 
2333  return make_bin_search(0, query_mem_desc_.getEntryCount(), [this](size_t idx) {
2334  return reinterpret_cast<const int64_t*>(buff_)[idx] == EMPTY_KEY_64;
2335  });
2336  } else {
2337  return make_bin_search(0, query_mem_desc_.getEntryCount(), [this](size_t idx) {
2338  const auto keys_ptr = row_ptr_rowwise(buff_, query_mem_desc_, idx);
2339  return *reinterpret_cast<const int64_t*>(keys_ptr) == EMPTY_KEY_64;
2340  });
2341  }
2342 }
2343 
2344 bool ResultSetStorage::isEmptyEntry(const size_t entry_idx) const {
2345  return isEmptyEntry(entry_idx, buff_);
2346 }
2347 
2349  const InternalTargetValue& val,
2350  const bool float_argument_input) {
2351  if (ti.get_notnull()) {
2352  return false;
2353  }
2354  if (val.isInt()) {
2355  return val.i1 == null_val_bit_pattern(ti, float_argument_input);
2356  }
2357  if (val.isPair()) {
2358  return !val.i2;
2359  }
2360  if (val.isStr()) {
2361  return !val.i1;
2362  }
2363  CHECK(val.isNull());
2364  return true;
2365 }
InternalTargetValue getColumnInternal(const int8_t *buff, const size_t entry_idx, const size_t target_logical_idx, const StorageLookupResult &storage_lookup_result) const
#define CHECK_EQ(x, y)
Definition: Logger.h:297
bool slotIsVarlenOutput(const size_t slot_idx) const
std::pair< size_t, size_t > getStorageIndex(const size_t entry_idx) const
Definition: ResultSet.cpp:916
#define NULL_DOUBLE
Permutation permutation_
Definition: ResultSet.h:945
bool isEmptyEntry(const size_t entry_idx, const int8_t *buff) const
#define EMPTY_KEY_64
ENTRY_TYPE getRowWisePerfectHashEntryAt(const size_t row_idx, const size_t target_idx, const size_t slot_idx) const
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:132
double decimal_to_double(const SQLTypeInfo &otype, int64_t oval)
static ScalarTargetValue nullScalarTargetValue(SQLTypeInfo const &, bool const translate_strings)
bool isPair() const
Definition: TargetValue.h:65
AppendedStorage appended_storage_
Definition: ResultSet.h:939
ENTRY_TYPE getColumnarPerfectHashEntryAt(const size_t row_idx, const size_t target_idx, const size_t slot_idx) const
int64_t getTargetGroupbyIndex(const size_t target_idx) const
GeoReturnType geo_return_type_
Definition: ResultSet.h:979
bool isEmptyEntryColumnar(const size_t entry_idx, const int8_t *buff) const
ExecutorDeviceType
#define NULL_FLOAT
bool isStr() const
Definition: TargetValue.h:69
bool is_null
Definition: Datum.h:55
bool isLogicalSizedColumnsAllowed() const
T advance_to_next_columnar_target_buff(T target_ptr, const QueryMemoryDescriptor &query_mem_desc, const size_t target_slot_idx)
SQLTypeInfo sql_type
Definition: TargetInfo.h:52
TargetValue build_string_array_target_value(const int32_t *buff, const size_t buff_sz, const int dict_id, const bool translate_strings, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const Catalog_Namespace::Catalog *catalog)
size_t make_bin_search(size_t l, size_t r, T &&is_empty_fn)
std::unique_ptr< ArrayDatum > lazy_fetch_chunk(const int8_t *ptr, const int64_t varlen_ptr)
const Catalog_Namespace::Catalog * catalog_
Definition: ResultSet.h:947
std::vector< TargetValue > getNextRow(const bool translate_strings, const bool decimal_to_double) const
static bool isNull(const SQLTypeInfo &ti, const InternalTargetValue &val, const bool float_argument_input)
QueryMemoryDescriptor query_mem_desc_
Definition: ResultSet.h:937
#define UNREACHABLE()
Definition: Logger.h:333
#define CHECK_GE(x, y)
Definition: Logger.h:302
std::unique_ptr< ResultSetStorage > storage_
Definition: ResultSet.h:938
bool is_null_point(const SQLTypeInfo &geo_ti, const int8_t *coords, const size_t coords_sz)
std::string getString(int32_t string_id) const
High-level representation of SQL values.
ENTRY_TYPE getEntryAt(const size_t row_idx, const size_t target_idx, const size_t slot_idx) const
size_t getEffectiveKeyWidth() const
Constants for Builtin SQL Types supported by HEAVY.AI.
TargetValue makeGeoTargetValue(const int8_t *geo_target_ptr, const size_t slot_idx, const TargetInfo &target_info, const size_t target_logical_idx, const size_t entry_buff_idx) const
TargetValue getTargetValueFromBufferRowwise(int8_t *rowwise_target_ptr, int8_t *keys_ptr, const size_t entry_buff_idx, const TargetInfo &target_info, const size_t target_logical_idx, const size_t slot_idx, const bool translate_strings, const bool decimal_to_double, const bool fixup_count_distinct_pointers) const
int64_t read_int_from_buff(const int8_t *ptr, const int8_t compact_sz)
size_t keep_first_
Definition: ResultSet.h:943
TargetValue make_avg_target_value(const int8_t *ptr1, const int8_t compact_sz1, const int8_t *ptr2, const int8_t compact_sz2, const TargetInfo &target_info)
double pair_to_double(const std::pair< int64_t, int64_t > &fp_pair, const SQLTypeInfo &ti, const bool float_argument_input)
bool takes_float_argument(const TargetInfo &target_info)
Definition: TargetInfo.h:102
std::vector< SerializedVarlenBufferStorage > serialized_varlen_buffer_
Definition: ResultSet.h:970
int64_t lazyReadInt(const int64_t ival, const size_t target_logical_idx, const StorageLookupResult &storage_lookup_result) const
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:380
OneIntegerColumnRow getOneColRow(const size_t index) const
static bool isNullIval(SQLTypeInfo const &, bool const translate_strings, int64_t const ival)
TargetValue getTargetValueFromBufferColwise(const int8_t *col_ptr, const int8_t *keys_ptr, const QueryMemoryDescriptor &query_mem_desc, const size_t local_entry_idx, const size_t global_entry_idx, const TargetInfo &target_info, const size_t target_logical_idx, const size_t slot_idx, const bool translate_strings, const bool decimal_to_double) const
T advance_target_ptr_row_wise(T target_ptr, const TargetInfo &target_info, const size_t slot_idx, const QueryMemoryDescriptor &query_mem_desc, const bool separate_varlen_storage)
#define CHECK_GT(x, y)
Definition: Logger.h:301
int64_t null_val_bit_pattern(const SQLTypeInfo &ti, const bool float_argument_input)
DEVICE void ChunkIter_get_nth(ChunkIter *it, int n, bool uncompress, VarlenDatum *result, bool *is_end)
Definition: ChunkIter.cpp:182
TargetValue build_array_target_value(const int8_t *buff, const size_t buff_sz, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner)
std::string to_string(char const *&&v)
SQLTypeInfo agg_arg_type
Definition: TargetInfo.h:53
int8_t * pointer
Definition: Datum.h:54
ScalarTargetValue makeStringTargetValue(SQLTypeInfo const &chosen_type, bool const translate_strings, int64_t const ival) const
#define NULL_INT
const ResultSet * result_set_
Definition: ResultSet.h:795
std::vector< TargetValue > getRowAtNoTranslations(const size_t index, const std::vector< bool > &targets_to_skip={}) const
const int8_t * advance_col_buff_to_slot(const int8_t *buff, const QueryMemoryDescriptor &query_mem_desc, const std::vector< TargetInfo > &targets, const size_t slot_idx, const bool separate_varlen_storage)
Definition: sqldefs.h:75
Serialization routines for geospatial types.
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:217
const SQLTypeInfo get_compact_type(const TargetInfo &target)
size_t global_entry_idx_
Definition: ResultSet.h:126
InternalTargetValue getVarlenOrderEntry(const int64_t str_ptr, const size_t str_len) const
const std::vector< TargetInfo > targets_
Definition: ResultSet.h:934
int8_t groupColWidth(const size_t key_idx) const
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
Definition: ResultSet.h:944
size_t get_byteoff_of_slot(const size_t slot_idx, const QueryMemoryDescriptor &query_mem_desc)
size_t drop_first_
Definition: ResultSet.h:942
bool is_agg
Definition: TargetInfo.h:50
size_t advance_slot(const size_t j, const TargetInfo &target_info, const bool separate_varlen_storage)
CONSTEXPR DEVICE bool is_null(const T &value)
static TargetValue build(const SQLTypeInfo &geo_ti, const ResultSet::GeoReturnType return_type, T &&...vals)
Classes representing a parse tree.
int64_t count_distinct_set_size(const int64_t set_handle, const CountDistinctDescriptor &count_distinct_desc)
Definition: CountDistinct.h:75
size_t getGroupbyColCount() const
#define CHECK_NE(x, y)
Definition: Logger.h:298
size_t targetGroupbyIndicesSize() const
size_t binSearchRowCount() const
boost::optional< std::vector< ScalarTargetValue >> ArrayTargetValue
Definition: TargetValue.h:181
int64_t lazy_decode(const ColumnLazyFetchInfo &col_lazy_fetch, const int8_t *byte_stream, const int64_t pos)
ScalarTargetValue convertToScalarTargetValue(SQLTypeInfo const &, bool const translate_strings, int64_t const val) const
CountDistinctDescriptors count_distinct_descriptors_
Definition: sqldefs.h:77
size_t getPaddedColWidthForRange(const size_t offset, const size_t range) const
StorageLookupResult findStorage(const size_t entry_idx) const
Definition: ResultSet.cpp:941
boost::optional< boost::variant< GeoPointTargetValue, GeoMultiPointTargetValue, GeoLineStringTargetValue, GeoMultiLineStringTargetValue, GeoPolyTargetValue, GeoMultiPolyTargetValue >> GeoTargetValue
Definition: TargetValue.h:187
bool is_distinct_target(const TargetInfo &target_info)
Definition: TargetInfo.h:98
bool isNull() const
Definition: TargetValue.h:67
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
#define EMPTY_KEY_8
void copyColumnIntoBuffer(const size_t column_idx, int8_t *output_buffer, const size_t output_buffer_size) const
bool g_enable_smem_group_by true
static double calculateQuantile(quantile::TDigest *const t_digest)
Definition: ResultSet.cpp:1037
T row_ptr_rowwise(T buff, const QueryMemoryDescriptor &query_mem_desc, const size_t entry_idx)
SQLAgg agg_kind
Definition: TargetInfo.h:51
const VarlenOutputInfo * getVarlenOutputInfo(const size_t entry_idx) const
QueryDescriptionType getQueryDescriptionType() const
SQLTypes decimal_to_int_type(const SQLTypeInfo &ti)
Definition: Datum.cpp:559
#define UNLIKELY(x)
Definition: likely.h:25
std::vector< TargetValue > getRowAt(const size_t index) const
const CountDistinctDescriptor & getCountDistinctDescriptor(const size_t idx) const
#define CHECK_LT(x, y)
Definition: Logger.h:299
bool is_real_str_or_array(const TargetInfo &target_info)
bool isSingleColumnGroupByWithPerfectHash() const
#define CHECK_LE(x, y)
Definition: Logger.h:300
void serialize(Archive &ar, RegisteredQueryHint &query_hint, const unsigned int version)
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:388
bool is_date_in_days() const
Definition: sqltypes.h:974
int64_t int_resize_cast(const int64_t ival, const size_t sz)
int get_array_context_logical_size() const
Definition: sqltypes.h:674
bool isGeoColOnGpu(const size_t col_idx) const
#define EMPTY_KEY_16
std::vector< std::vector< std::vector< const int8_t * > > > col_buffers_
Definition: ResultSet.h:958
#define DEF_GET_ENTRY_AT(query_type, columnar_output)
bool isRowAtEmpty(const size_t index) const
size_t entryCount() const
Returns the number of entries the result set is allocated to hold.
static auto fetch(const SQLTypeInfo &geo_ti, const ResultSet::GeoReturnType return_type, Data_Namespace::DataMgr *data_mgr, const bool fetch_data_from_gpu, const int device_id, T &&...vals)
std::string get_type_name() const
Definition: sqltypes.h:504
boost::variant< std::string, void * > NullableString
Definition: TargetValue.h:179
CUstream getQueryEngineCudaStreamForDevice(int device_num)
Definition: QueryEngine.cpp:7
TargetValue makeTargetValue(const int8_t *ptr, const int8_t compact_sz, const TargetInfo &target_info, const size_t target_logical_idx, const bool translate_strings, const bool decimal_to_double, const size_t entry_buff_idx) const
TO bit_cast(FROM &&from)
Definition: misc.h:298
static auto yieldGpuDatumFetcher(Data_Namespace::DataMgr *data_mgr_ptr, const int device_id)
HOST DEVICE int get_comp_param() const
Definition: sqltypes.h:389
const bool is_lazily_fetched
std::vector< std::vector< int64_t > > consistent_frag_sizes_
Definition: ResultSet.h:960
std::string getString(SQLTypeInfo const &, int64_t const ival) const
const ExecutorDeviceType device_type_
Definition: ResultSet.h:935
std::vector< TargetValue > getNextRowImpl(const bool translate_strings, const bool decimal_to_double) const
bool isInt() const
Definition: TargetValue.h:63
bool g_enable_watchdog false
Definition: Execute.cpp:79
static auto fetch(const SQLTypeInfo &geo_ti, const ResultSet::GeoReturnType return_type, T &&...vals)
#define CHECK(condition)
Definition: Logger.h:289
bool is_geometry() const
Definition: sqltypes.h:588
ScalarTargetValue make_scalar_tv(const T val)
size_t getBufferSizeBytes(const ExecutorDeviceType device_type) const
#define EMPTY_KEY_32
std::vector< ColumnLazyFetchInfo > lazy_fetch_info_
Definition: ResultSet.h:957
uint64_t exp_to_scale(const unsigned exp)
size_t crt_row_buff_idx_
Definition: ResultSet.h:940
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
QueryDescriptionType
Definition: Types.h:29
std::vector< std::vector< std::vector< int64_t > > > frag_offsets_
Definition: ResultSet.h:959
bool is_any() const
Definition: sqltypes.h:573
Basic constructors and methods of the row set interface.
bool separate_varlen_storage_valid_
Definition: ResultSet.h:971
boost::variant< ScalarTargetValue, ArrayTargetValue, GeoTargetValue, GeoTargetValuePtr > TargetValue
Definition: TargetValue.h:195
std::vector< TargetValue > getNextRowUnlocked(const bool translate_strings, const bool decimal_to_double) const
std::vector< std::pair< const int8_t *, const int64_t > > make_vals_vector(std::index_sequence< indices...>, const Tuple &tuple)
T advance_target_ptr_col_wise(T target_ptr, const TargetInfo &target_info, const size_t slot_idx, const QueryMemoryDescriptor &query_mem_desc, const bool separate_varlen_storage)
size_t advanceCursorToNextEntry() const
bool is_string() const
Definition: sqltypes.h:576
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:387
ENTRY_TYPE getColumnarBaselineEntryAt(const size_t row_idx, const size_t target_idx, const size_t slot_idx) const
Definition: sqldefs.h:76
size_t crt_row_buff_idx_
Definition: ResultSet.h:125
HOST static DEVICE bool isFlatBuffer(const void *buffer)
Definition: FlatBuffer.h:186
InternalTargetValue getColumnInternal(const int8_t *buff, const size_t entry_idx, const size_t target_logical_idx, const StorageLookupResult &storage_lookup_result) const
bool isLessThan(SQLTypeInfo const &, int64_t const lhs, int64_t const rhs) const
std::vector< std::vector< TargetOffsets > > offsets_for_storage_
Definition: ResultSet.h:793
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:957
T get_cols_ptr(T buff, const QueryMemoryDescriptor &query_mem_desc)
const int8_t getLogicalSlotWidthBytes(const size_t slot_idx) const
Definition: sqldefs.h:74
bool global_entry_idx_valid_
Definition: ResultSet.h:127
std::unique_ptr< VarlenDatum > VarlenDatumPtr
#define IS_GEO(T)
Definition: sqltypes.h:298
bool isDirectColumnarConversionPossible() const
Definition: ResultSet.cpp:1468
size_t get_key_bytes_rowwise(const QueryMemoryDescriptor &query_mem_desc)
Definition: sqldefs.h:83
const int8_t * columnar_elem_ptr(const size_t entry_idx, const int8_t *col1_ptr, const int8_t compact_sz1)
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)
bool is_array() const
Definition: sqltypes.h:584
TargetValue makeVarlenTargetValue(const int8_t *ptr1, const int8_t compact_sz1, const int8_t *ptr2, const int8_t compact_sz2, const TargetInfo &target_info, const size_t target_logical_idx, const bool translate_strings, const size_t entry_buff_idx) const
std::pair< int64_t, int64_t > get_frag_id_and_local_idx(const std::vector< std::vector< T >> &frag_offsets, const size_t tab_or_col_idx, const int64_t global_idx)
const std::vector< const int8_t * > & getColumnFrag(const size_t storge_idx, const size_t col_logical_idx, int64_t &global_idx) const
const Executor * getExecutor() const
std::unique_ptr< ArrayDatum > fetch_data_from_gpu(int64_t varlen_ptr, const int64_t length, Data_Namespace::DataMgr *data_mgr, const int device_id)
DEVICE void VarlenArray_get_nth(int8_t *buf, int n, ArrayDatum *result, bool *is_end)
Definition: sqltypes.h:1466
ENTRY_TYPE getRowWiseBaselineEntryAt(const size_t row_idx, const size_t target_idx, const size_t slot_idx) const
boost::variant< int64_t, double, float, NullableString > ScalarTargetValue
Definition: TargetValue.h:180
int32_t getTargetIdxForKey() const
size_t length
Definition: Datum.h:53
size_t getPrependedGroupColOffInBytes(const size_t group_idx) const
const int device_id_
Definition: ResultSet.h:936