OmniSciDB  dfae7c3b14
ResultSet.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
25 #include "ResultSet.h"
26 
29 #include "Execute.h"
30 #include "GpuMemUtils.h"
31 #include "InPlaceSort.h"
33 #include "RuntimeFunctions.h"
34 #include "Shared/SqlTypesLayout.h"
35 #include "Shared/checked_alloc.h"
36 #include "Shared/likely.h"
37 #include "Shared/thread_count.h"
38 #include "Shared/threadpool.h"
39 
40 #include <algorithm>
41 #include <bitset>
42 #include <future>
43 #include <numeric>
44 
45 extern bool g_use_tbb_pool;
46 
48  const std::vector<TargetInfo>& targets) {
49  std::vector<int64_t> target_init_vals;
50  for (const auto& target_info : targets) {
51  if (target_info.agg_kind == kCOUNT ||
52  target_info.agg_kind == kAPPROX_COUNT_DISTINCT) {
53  target_init_vals.push_back(0);
54  continue;
55  }
56  if (target_info.sql_type.is_column()) {
57  int64_t init_val = null_val_bit_pattern(target_info.sql_type.get_subtype(),
58  takes_float_argument(target_info));
59  target_init_vals.push_back(target_info.is_agg ? init_val : 0);
60  } else if (!target_info.sql_type.get_notnull()) {
61  int64_t init_val =
62  null_val_bit_pattern(target_info.sql_type, takes_float_argument(target_info));
63  target_init_vals.push_back(target_info.is_agg ? init_val : 0);
64  } else {
65  target_init_vals.push_back(target_info.is_agg ? 0xdeadbeef : 0);
66  }
67  if (target_info.agg_kind == kAVG) {
68  target_init_vals.push_back(0);
69  } else if (target_info.agg_kind == kSAMPLE && target_info.sql_type.is_geometry()) {
70  for (int i = 1; i < 2 * target_info.sql_type.get_physical_coord_cols(); i++) {
71  target_init_vals.push_back(0);
72  }
73  } else if (target_info.agg_kind == kSAMPLE && target_info.sql_type.is_varlen()) {
74  target_init_vals.push_back(0);
75  }
76  }
77  return target_init_vals;
78 }
79 
80 ResultSetStorage::ResultSetStorage(const std::vector<TargetInfo>& targets,
81  const QueryMemoryDescriptor& query_mem_desc,
82  int8_t* buff,
83  const bool buff_is_provided)
84  : targets_(targets)
85  , query_mem_desc_(query_mem_desc)
86  , buff_(buff)
87  , buff_is_provided_(buff_is_provided)
88  , target_init_vals_(initialize_target_values_for_storage(targets)) {}
89 
91  return buff_;
92 }
93 
94 void ResultSet::keepFirstN(const size_t n) {
95  CHECK_EQ(-1, cached_row_count_);
96  keep_first_ = n;
97 }
98 
99 void ResultSet::dropFirstN(const size_t n) {
100  CHECK_EQ(-1, cached_row_count_);
101  drop_first_ = n;
102 }
103 
104 ResultSet::ResultSet(const std::vector<TargetInfo>& targets,
105  const ExecutorDeviceType device_type,
106  const QueryMemoryDescriptor& query_mem_desc,
107  const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
108  const Executor* executor)
109  : targets_(targets)
110  , device_type_(device_type)
111  , device_id_(-1)
112  , query_mem_desc_(query_mem_desc)
113  , crt_row_buff_idx_(0)
114  , fetched_so_far_(0)
115  , drop_first_(0)
116  , keep_first_(0)
117  , row_set_mem_owner_(row_set_mem_owner)
118  , executor_(executor)
119  , data_mgr_(nullptr)
120  , separate_varlen_storage_valid_(false)
121  , just_explain_(false)
122  , cached_row_count_(-1)
123  , geo_return_type_(GeoReturnType::WktString) {}
124 
125 ResultSet::ResultSet(const std::vector<TargetInfo>& targets,
126  const std::vector<ColumnLazyFetchInfo>& lazy_fetch_info,
127  const std::vector<std::vector<const int8_t*>>& col_buffers,
128  const std::vector<std::vector<int64_t>>& frag_offsets,
129  const std::vector<int64_t>& consistent_frag_sizes,
130  const ExecutorDeviceType device_type,
131  const int device_id,
132  const QueryMemoryDescriptor& query_mem_desc,
133  const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
134  const Executor* executor)
135  : targets_(targets)
136  , device_type_(device_type)
137  , device_id_(device_id)
138  , query_mem_desc_(query_mem_desc)
139  , crt_row_buff_idx_(0)
140  , fetched_so_far_(0)
141  , drop_first_(0)
142  , keep_first_(0)
143  , row_set_mem_owner_(row_set_mem_owner)
144  , executor_(executor)
145  , lazy_fetch_info_(lazy_fetch_info)
146  , col_buffers_{col_buffers}
147  , frag_offsets_{frag_offsets}
148  , consistent_frag_sizes_{consistent_frag_sizes}
149  , data_mgr_(nullptr)
151  , just_explain_(false)
152  , cached_row_count_(-1)
154 
155 ResultSet::ResultSet(const std::shared_ptr<const Analyzer::Estimator> estimator,
156  const ExecutorDeviceType device_type,
157  const int device_id,
158  Data_Namespace::DataMgr* data_mgr)
159  : device_type_(device_type)
160  , device_id_(device_id)
161  , query_mem_desc_{}
162  , crt_row_buff_idx_(0)
163  , estimator_(estimator)
164  , data_mgr_(data_mgr)
166  , just_explain_(false)
167  , cached_row_count_(-1)
169  if (device_type == ExecutorDeviceType::GPU) {
171  data_mgr_, estimator_->getBufferSize(), device_id_);
173  estimator_->getBufferSize(),
174  device_id_);
175  } else {
177  static_cast<int8_t*>(checked_calloc(estimator_->getBufferSize(), 1));
178  }
179 }
180 
181 ResultSet::ResultSet(const std::string& explanation)
183  , device_id_(-1)
184  , fetched_so_far_(0)
186  , explanation_(explanation)
187  , just_explain_(true)
188  , cached_row_count_(-1)
189  , geo_return_type_(GeoReturnType::WktString) {}
190 
191 ResultSet::ResultSet(int64_t queue_time_ms,
192  int64_t render_time_ms,
193  const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner)
195  , device_id_(-1)
196  , fetched_so_far_(0)
197  , row_set_mem_owner_(row_set_mem_owner)
198  , timings_(QueryExecutionTimings{queue_time_ms, render_time_ms, 0, 0})
200  , just_explain_(true)
201  , cached_row_count_(-1)
203 
205  if (storage_) {
206  if (!storage_->buff_is_provided_) {
207  CHECK(storage_->getUnderlyingBuffer());
208  free(storage_->getUnderlyingBuffer());
209  }
210  }
211  for (auto& storage : appended_storage_) {
212  if (storage && !storage->buff_is_provided_) {
213  free(storage->getUnderlyingBuffer());
214  }
215  }
219  }
221  CHECK(data_mgr_);
223  }
224 }
225 
227  return device_type_;
228 }
229 
231  CHECK(!storage_);
233  auto buff =
235  storage_.reset(
236  new ResultSetStorage(targets_, query_mem_desc_, buff, /*buff_is_provided=*/true));
237  return storage_.get();
238 }
239 
241  int8_t* buff,
242  const std::vector<int64_t>& target_init_vals) const {
243  CHECK(buff);
244  CHECK(!storage_);
245  storage_.reset(new ResultSetStorage(targets_, query_mem_desc_, buff, true));
246  storage_->target_init_vals_ = target_init_vals;
247  return storage_.get();
248 }
249 
251  const std::vector<int64_t>& target_init_vals) const {
252  CHECK(!storage_);
254  auto buff =
256  storage_.reset(
257  new ResultSetStorage(targets_, query_mem_desc_, buff, /*buff_is_provided=*/true));
258  storage_->target_init_vals_ = target_init_vals;
259  return storage_.get();
260 }
261 
263  if (crt_row_buff_idx_ == 0) {
264  throw std::runtime_error("current row buffer iteration index is undefined");
265  }
266  return crt_row_buff_idx_ - 1;
267 }
268 
269 // Note: that.appended_storage_ does not get appended to this.
272  if (!that.storage_) {
273  return;
274  }
275  appended_storage_.push_back(std::move(that.storage_));
278  appended_storage_.back()->query_mem_desc_.getEntryCount());
279  chunks_.insert(chunks_.end(), that.chunks_.begin(), that.chunks_.end());
280  col_buffers_.insert(
281  col_buffers_.end(), that.col_buffers_.begin(), that.col_buffers_.end());
282  frag_offsets_.insert(
283  frag_offsets_.end(), that.frag_offsets_.begin(), that.frag_offsets_.end());
285  that.consistent_frag_sizes_.begin(),
286  that.consistent_frag_sizes_.end());
287  chunk_iters_.insert(
288  chunk_iters_.end(), that.chunk_iters_.begin(), that.chunk_iters_.end());
290  CHECK(that.separate_varlen_storage_valid_);
292  that.serialized_varlen_buffer_.begin(),
293  that.serialized_varlen_buffer_.end());
294  }
295  for (auto& buff : that.literal_buffers_) {
296  literal_buffers_.push_back(std::move(buff));
297  }
298 }
299 
301  return storage_.get();
302 }
303 
304 size_t ResultSet::colCount() const {
305  return just_explain_ ? 1 : targets_.size();
306 }
307 
308 SQLTypeInfo ResultSet::getColType(const size_t col_idx) const {
309  if (just_explain_) {
310  return SQLTypeInfo(kTEXT, false);
311  }
312  CHECK_LT(col_idx, targets_.size());
313  return targets_[col_idx].agg_kind == kAVG ? SQLTypeInfo(kDOUBLE, false)
314  : targets_[col_idx].sql_type;
315 }
316 
317 namespace {
318 
319 size_t get_truncated_row_count(size_t total_row_count, size_t limit, size_t offset) {
320  if (total_row_count < offset) {
321  return 0;
322  }
323 
324  size_t total_truncated_row_count = total_row_count - offset;
325 
326  if (limit) {
327  return std::min(total_truncated_row_count, limit);
328  }
329 
330  return total_truncated_row_count;
331 }
332 
333 } // namespace
334 
335 size_t ResultSet::rowCount(const bool force_parallel) const {
336  if (just_explain_) {
337  return 1;
338  }
339  if (!permutation_.empty()) {
340  if (drop_first_ > permutation_.size()) {
341  return 0;
342  }
343  const auto limited_row_count = keep_first_ + drop_first_;
344  return limited_row_count ? std::min(limited_row_count, permutation_.size())
345  : permutation_.size();
346  }
347  if (cached_row_count_ != -1) {
349  return cached_row_count_;
350  }
351  if (!storage_) {
352  return 0;
353  }
354  if (permutation_.empty() &&
356  return binSearchRowCount();
357  }
358  if (force_parallel || entryCount() > 20000) {
359  return parallelRowCount();
360  }
361  std::lock_guard<std::mutex> lock(row_iteration_mutex_);
362  moveToBegin();
363  size_t row_count{0};
364  while (true) {
365  auto crt_row = getNextRowUnlocked(false, false);
366  if (crt_row.empty()) {
367  break;
368  }
369  ++row_count;
370  }
371  moveToBegin();
372  return row_count;
373 }
374 
375 void ResultSet::setCachedRowCount(const size_t row_count) const {
376  CHECK(cached_row_count_ == -1 || cached_row_count_ == static_cast<int64_t>(row_count));
377  cached_row_count_ = row_count;
378 }
379 
381  if (!storage_) {
382  return 0;
383  }
384 
385  size_t row_count = storage_->binSearchRowCount();
386  for (auto& s : appended_storage_) {
387  row_count += s->binSearchRowCount();
388  }
389 
390  return get_truncated_row_count(row_count, getLimit(), drop_first_);
391 }
392 
394  auto execute_parallel_row_count = [this](auto counter_threads) -> size_t {
395  const size_t worker_count = cpu_threads();
396  for (size_t i = 0,
397  start_entry = 0,
398  stride = (entryCount() + worker_count - 1) / worker_count;
399  i < worker_count && start_entry < entryCount();
400  ++i, start_entry += stride) {
401  const auto end_entry = std::min(start_entry + stride, entryCount());
402  counter_threads.spawn(
403  [this](const size_t start, const size_t end) {
404  size_t row_count{0};
405  for (size_t i = start; i < end; ++i) {
406  if (!isRowAtEmpty(i)) {
407  ++row_count;
408  }
409  }
410  return row_count;
411  },
412  start_entry,
413  end_entry);
414  }
415  const auto row_counts = counter_threads.join();
416  const size_t row_count = std::accumulate(row_counts.begin(), row_counts.end(), 0);
417  return row_count;
418  };
419  // will fall back to futures threadpool if TBB is not enabled
420  const auto row_count =
422  ? execute_parallel_row_count(threadpool::ThreadPool<size_t>())
423  : execute_parallel_row_count(threadpool::FuturesThreadPool<size_t>());
424 
425  return get_truncated_row_count(row_count, getLimit(), drop_first_);
426 }
427 
429  return !storage_ && !estimator_ && !just_explain_;
430 }
431 
433  CHECK(storage_);
434  return storage_->query_mem_desc_;
435 }
436 
437 const std::vector<TargetInfo>& ResultSet::getTargetInfos() const {
438  return targets_;
439 }
440 
441 const std::vector<int64_t>& ResultSet::getTargetInitVals() const {
442  CHECK(storage_);
443  return storage_->target_init_vals_;
444 }
445 
450 }
451 
453  return host_estimator_buffer_;
454 }
455 
459  CHECK_EQ(size_t(0), estimator_->getBufferSize() % sizeof(int64_t));
461  static_cast<int8_t*>(checked_calloc(estimator_->getBufferSize(), 1));
463  auto device_buffer_ptr = device_estimator_buffer_->getMemoryPtr();
466  reinterpret_cast<CUdeviceptr>(device_buffer_ptr),
467  estimator_->getBufferSize(),
468  device_id_);
469 }
470 
471 void ResultSet::setQueueTime(const int64_t queue_time) {
472  timings_.executor_queue_time = queue_time;
473 }
474 
475 void ResultSet::setKernelQueueTime(const int64_t kernel_queue_time) {
476  timings_.kernel_queue_time = kernel_queue_time;
477 }
478 
479 void ResultSet::addCompilationQueueTime(const int64_t compilation_queue_time) {
480  timings_.compilation_queue_time += compilation_queue_time;
481 }
482 
483 int64_t ResultSet::getQueueTime() const {
486 }
487 
488 int64_t ResultSet::getRenderTime() const {
489  return timings_.render_time;
490 }
491 
493  crt_row_buff_idx_ = 0;
494  fetched_so_far_ = 0;
495 }
496 
498  return keep_first_ + drop_first_;
499 }
500 
501 bool ResultSet::isExplain() const {
502  return just_explain_;
503 }
504 
506  return device_id_;
507 }
508 
510  const QueryMemoryDescriptor& query_mem_desc) {
511  auto query_mem_desc_copy = query_mem_desc;
512  query_mem_desc_copy.resetGroupColWidths(
513  std::vector<int8_t>(query_mem_desc_copy.getGroupbyColCount(), 8));
514  if (query_mem_desc.didOutputColumnar()) {
515  return query_mem_desc_copy;
516  }
517  query_mem_desc_copy.alignPaddedSlots();
518  return query_mem_desc_copy;
519 }
520 
521 void ResultSet::sort(const std::list<Analyzer::OrderEntry>& order_entries,
522  const size_t top_n) {
523  auto timer = DEBUG_TIMER(__func__);
524 
525  if (!storage_) {
526  return;
527  }
529  CHECK(!targets_.empty());
530 #ifdef HAVE_CUDA
531  if (canUseFastBaselineSort(order_entries, top_n)) {
532  baselineSort(order_entries, top_n);
533  return;
534  }
535 #endif // HAVE_CUDA
536  if (query_mem_desc_.sortOnGpu()) {
537  try {
538  radixSortOnGpu(order_entries);
539  } catch (const OutOfMemory&) {
540  LOG(WARNING) << "Out of GPU memory during sort, finish on CPU";
541  radixSortOnCpu(order_entries);
542  } catch (const std::bad_alloc&) {
543  LOG(WARNING) << "Out of GPU memory during sort, finish on CPU";
544  radixSortOnCpu(order_entries);
545  }
546  return;
547  }
548  // This check isn't strictly required, but allows the index buffer to be 32-bit.
549  if (query_mem_desc_.getEntryCount() > std::numeric_limits<uint32_t>::max()) {
550  throw RowSortException("Sorting more than 4B elements not supported");
551  }
552 
553  CHECK(permutation_.empty());
554 
555  const bool use_heap{order_entries.size() == 1 && top_n};
556  if (use_heap && entryCount() > 100000) {
557  if (g_enable_watchdog && (entryCount() > 20000000)) {
558  throw WatchdogException("Sorting the result would be too slow");
559  }
560  parallelTop(order_entries, top_n);
561  return;
562  }
563 
565  throw WatchdogException("Sorting the result would be too slow");
566  }
567 
569 
570  auto compare = createComparator(order_entries, use_heap);
571 
572  if (use_heap) {
573  topPermutation(permutation_, top_n, compare);
574  } else {
575  sortPermutation(compare);
576  }
577 }
578 
579 #ifdef HAVE_CUDA
580 void ResultSet::baselineSort(const std::list<Analyzer::OrderEntry>& order_entries,
581  const size_t top_n) {
582  auto timer = DEBUG_TIMER(__func__);
583  // If we only have on GPU, it's usually faster to do multi-threaded radix sort on CPU
584  if (getGpuCount() > 1) {
585  try {
586  doBaselineSort(ExecutorDeviceType::GPU, order_entries, top_n);
587  } catch (...) {
588  doBaselineSort(ExecutorDeviceType::CPU, order_entries, top_n);
589  }
590  } else {
591  doBaselineSort(ExecutorDeviceType::CPU, order_entries, top_n);
592  }
593 }
594 #endif // HAVE_CUDA
595 
596 std::vector<uint32_t> ResultSet::initPermutationBuffer(const size_t start,
597  const size_t step) {
598  auto timer = DEBUG_TIMER(__func__);
599  CHECK_NE(size_t(0), step);
600  std::vector<uint32_t> permutation;
601  const auto total_entries = query_mem_desc_.getEntryCount();
602  permutation.reserve(total_entries / step);
603  for (size_t i = start; i < total_entries; i += step) {
604  const auto storage_lookup_result = findStorage(i);
605  const auto lhs_storage = storage_lookup_result.storage_ptr;
606  const auto off = storage_lookup_result.fixedup_entry_idx;
607  CHECK(lhs_storage);
608  if (!lhs_storage->isEmptyEntry(off)) {
609  permutation.emplace_back(i);
610  }
611  }
612  return permutation;
613 }
614 
615 const std::vector<uint32_t>& ResultSet::getPermutationBuffer() const {
616  return permutation_;
617 }
618 
619 void ResultSet::parallelTop(const std::list<Analyzer::OrderEntry>& order_entries,
620  const size_t top_n) {
621  auto timer = DEBUG_TIMER(__func__);
622  const size_t step = cpu_threads();
623  std::vector<std::vector<uint32_t>> strided_permutations(step);
624  std::vector<std::future<void>> init_futures;
625  for (size_t start = 0; start < step; ++start) {
626  init_futures.emplace_back(
627  std::async(std::launch::async, [this, start, step, &strided_permutations] {
628  strided_permutations[start] = initPermutationBuffer(start, step);
629  }));
630  }
631  for (auto& init_future : init_futures) {
632  init_future.wait();
633  }
634  for (auto& init_future : init_futures) {
635  init_future.get();
636  }
637  auto compare = createComparator(order_entries, true);
638  std::vector<std::future<void>> top_futures;
639  for (auto& strided_permutation : strided_permutations) {
640  top_futures.emplace_back(
641  std::async(std::launch::async, [&strided_permutation, &compare, top_n] {
642  topPermutation(strided_permutation, top_n, compare);
643  }));
644  }
645  for (auto& top_future : top_futures) {
646  top_future.wait();
647  }
648  for (auto& top_future : top_futures) {
649  top_future.get();
650  }
651  permutation_.reserve(strided_permutations.size() * top_n);
652  for (const auto& strided_permutation : strided_permutations) {
653  permutation_.insert(
654  permutation_.end(), strided_permutation.begin(), strided_permutation.end());
655  }
656  topPermutation(permutation_, top_n, compare);
657 }
658 
659 std::pair<size_t, size_t> ResultSet::getStorageIndex(const size_t entry_idx) const {
660  size_t fixedup_entry_idx = entry_idx;
661  auto entry_count = storage_->query_mem_desc_.getEntryCount();
662  const bool is_rowwise_layout = !storage_->query_mem_desc_.didOutputColumnar();
663  if (fixedup_entry_idx < entry_count) {
664  return {0, fixedup_entry_idx};
665  }
666  fixedup_entry_idx -= entry_count;
667  for (size_t i = 0; i < appended_storage_.size(); ++i) {
668  const auto& desc = appended_storage_[i]->query_mem_desc_;
669  CHECK_NE(is_rowwise_layout, desc.didOutputColumnar());
670  entry_count = desc.getEntryCount();
671  if (fixedup_entry_idx < entry_count) {
672  return {i + 1, fixedup_entry_idx};
673  }
674  fixedup_entry_idx -= entry_count;
675  }
676  UNREACHABLE() << "entry_idx = " << entry_idx << ", query_mem_desc_.getEntryCount() = "
678  return {};
679 }
680 
683 
685  auto [stg_idx, fixedup_entry_idx] = getStorageIndex(entry_idx);
686  return {stg_idx ? appended_storage_[stg_idx - 1].get() : storage_.get(),
687  fixedup_entry_idx,
688  stg_idx};
689 }
690 
691 template <typename BUFFER_ITERATOR_TYPE>
693  BUFFER_ITERATOR_TYPE>::materializeCountDistinctColumns() {
694  for (const auto& order_entry : order_entries_) {
695  if (is_distinct_target(result_set_->targets_[order_entry.tle_no - 1])) {
696  count_distinct_materialized_buffers_.emplace_back(
697  materializeCountDistinctColumn(order_entry));
698  }
699  }
700 }
701 
702 template <typename BUFFER_ITERATOR_TYPE>
703 std::vector<int64_t>
705  const Analyzer::OrderEntry& order_entry) const {
706  const size_t num_storage_entries = result_set_->query_mem_desc_.getEntryCount();
707  std::vector<int64_t> count_distinct_materialized_buffer(num_storage_entries);
708  const CountDistinctDescriptor count_distinct_descriptor =
709  result_set_->query_mem_desc_.getCountDistinctDescriptor(order_entry.tle_no - 1);
710  const size_t num_non_empty_entries = result_set_->permutation_.size();
711  const size_t worker_count = cpu_threads();
712  // TODO(tlm): Allow use of tbb after we determine how to easily encapsulate the choice
713  // between thread pool types
715  for (size_t i = 0,
716  start_entry = 0,
717  stride = (num_non_empty_entries + worker_count - 1) / worker_count;
718  i < worker_count && start_entry < num_non_empty_entries;
719  ++i, start_entry += stride) {
720  const auto end_entry = std::min(start_entry + stride, num_non_empty_entries);
721  thread_pool.spawn(
722  [this](const size_t start,
723  const size_t end,
724  const Analyzer::OrderEntry& order_entry,
725  const CountDistinctDescriptor& count_distinct_descriptor,
726  std::vector<int64_t>& count_distinct_materialized_buffer) {
727  for (size_t i = start; i < end; ++i) {
728  const uint32_t permuted_idx = result_set_->permutation_[i];
729  const auto storage_lookup_result = result_set_->findStorage(permuted_idx);
730  const auto storage = storage_lookup_result.storage_ptr;
731  const auto off = storage_lookup_result.fixedup_entry_idx;
732  const auto value = buffer_itr_.getColumnInternal(
733  storage->buff_, off, order_entry.tle_no - 1, storage_lookup_result);
734  count_distinct_materialized_buffer[permuted_idx] =
735  count_distinct_set_size(value.i1, count_distinct_descriptor);
736  }
737  },
738  start_entry,
739  end_entry,
740  std::cref(order_entry),
741  std::cref(count_distinct_descriptor),
742  std::ref(count_distinct_materialized_buffer));
743  }
744  thread_pool.join();
745  return count_distinct_materialized_buffer;
746 }
747 
748 template <typename BUFFER_ITERATOR_TYPE>
750  const uint32_t lhs,
751  const uint32_t rhs) const {
752  // NB: The compare function must define a strict weak ordering, otherwise
753  // std::sort will trigger a segmentation fault (or corrupt memory).
754  const auto lhs_storage_lookup_result = result_set_->findStorage(lhs);
755  const auto rhs_storage_lookup_result = result_set_->findStorage(rhs);
756  const auto lhs_storage = lhs_storage_lookup_result.storage_ptr;
757  const auto rhs_storage = rhs_storage_lookup_result.storage_ptr;
758  const auto fixedup_lhs = lhs_storage_lookup_result.fixedup_entry_idx;
759  const auto fixedup_rhs = rhs_storage_lookup_result.fixedup_entry_idx;
760  size_t materialized_count_distinct_buffer_idx{0};
761 
762  for (const auto& order_entry : order_entries_) {
763  CHECK_GE(order_entry.tle_no, 1);
764  const auto& agg_info = result_set_->targets_[order_entry.tle_no - 1];
765  const auto entry_ti = get_compact_type(agg_info);
766  bool float_argument_input = takes_float_argument(agg_info);
767  // Need to determine if the float value has been stored as float
768  // or if it has been compacted to a different (often larger 8 bytes)
769  // in distributed case the floats are actually 4 bytes
770  // TODO the above takes_float_argument() is widely used wonder if this problem
771  // exists elsewhere
772  if (entry_ti.get_type() == kFLOAT) {
773  const auto is_col_lazy =
774  !result_set_->lazy_fetch_info_.empty() &&
775  result_set_->lazy_fetch_info_[order_entry.tle_no - 1].is_lazily_fetched;
776  if (result_set_->query_mem_desc_.getPaddedSlotWidthBytes(order_entry.tle_no - 1) ==
777  sizeof(float)) {
778  float_argument_input =
779  result_set_->query_mem_desc_.didOutputColumnar() ? !is_col_lazy : true;
780  }
781  }
782 
783  const bool use_desc_cmp = use_heap_ ? !order_entry.is_desc : order_entry.is_desc;
784 
785  if (UNLIKELY(is_distinct_target(agg_info))) {
786  CHECK_LT(materialized_count_distinct_buffer_idx,
787  count_distinct_materialized_buffers_.size());
788  const auto& count_distinct_materialized_buffer =
789  count_distinct_materialized_buffers_[materialized_count_distinct_buffer_idx];
790  const auto lhs_sz = count_distinct_materialized_buffer[lhs];
791  const auto rhs_sz = count_distinct_materialized_buffer[rhs];
792  ++materialized_count_distinct_buffer_idx;
793  if (lhs_sz == rhs_sz) {
794  continue;
795  }
796  return use_desc_cmp ? lhs_sz > rhs_sz : lhs_sz < rhs_sz;
797  }
798 
799  const auto lhs_v = buffer_itr_.getColumnInternal(lhs_storage->buff_,
800  fixedup_lhs,
801  order_entry.tle_no - 1,
802  lhs_storage_lookup_result);
803  const auto rhs_v = buffer_itr_.getColumnInternal(rhs_storage->buff_,
804  fixedup_rhs,
805  order_entry.tle_no - 1,
806  rhs_storage_lookup_result);
807 
808  if (UNLIKELY(isNull(entry_ti, lhs_v, float_argument_input) &&
809  isNull(entry_ti, rhs_v, float_argument_input))) {
810  return false;
811  }
812  if (UNLIKELY(isNull(entry_ti, lhs_v, float_argument_input) &&
813  !isNull(entry_ti, rhs_v, float_argument_input))) {
814  return use_heap_ ? !order_entry.nulls_first : order_entry.nulls_first;
815  }
816  if (UNLIKELY(isNull(entry_ti, rhs_v, float_argument_input) &&
817  !isNull(entry_ti, lhs_v, float_argument_input))) {
818  return use_heap_ ? order_entry.nulls_first : !order_entry.nulls_first;
819  }
820 
821  if (LIKELY(lhs_v.isInt())) {
822  CHECK(rhs_v.isInt());
823  if (UNLIKELY(entry_ti.is_string() &&
824  entry_ti.get_compression() == kENCODING_DICT)) {
825  CHECK_EQ(4, entry_ti.get_logical_size());
826  const auto string_dict_proxy = result_set_->executor_->getStringDictionaryProxy(
827  entry_ti.get_comp_param(), result_set_->row_set_mem_owner_, false);
828  auto lhs_str = string_dict_proxy->getString(lhs_v.i1);
829  auto rhs_str = string_dict_proxy->getString(rhs_v.i1);
830  if (lhs_str == rhs_str) {
831  continue;
832  }
833  return use_desc_cmp ? lhs_str > rhs_str : lhs_str < rhs_str;
834  }
835 
836  if (lhs_v.i1 == rhs_v.i1) {
837  continue;
838  }
839  if (entry_ti.is_fp()) {
840  if (float_argument_input) {
841  const auto lhs_dval = *reinterpret_cast<const float*>(may_alias_ptr(&lhs_v.i1));
842  const auto rhs_dval = *reinterpret_cast<const float*>(may_alias_ptr(&rhs_v.i1));
843  return use_desc_cmp ? lhs_dval > rhs_dval : lhs_dval < rhs_dval;
844  } else {
845  const auto lhs_dval =
846  *reinterpret_cast<const double*>(may_alias_ptr(&lhs_v.i1));
847  const auto rhs_dval =
848  *reinterpret_cast<const double*>(may_alias_ptr(&rhs_v.i1));
849  return use_desc_cmp ? lhs_dval > rhs_dval : lhs_dval < rhs_dval;
850  }
851  }
852  return use_desc_cmp ? lhs_v.i1 > rhs_v.i1 : lhs_v.i1 < rhs_v.i1;
853  } else {
854  if (lhs_v.isPair()) {
855  CHECK(rhs_v.isPair());
856  const auto lhs =
857  pair_to_double({lhs_v.i1, lhs_v.i2}, entry_ti, float_argument_input);
858  const auto rhs =
859  pair_to_double({rhs_v.i1, rhs_v.i2}, entry_ti, float_argument_input);
860  if (lhs == rhs) {
861  continue;
862  }
863  return use_desc_cmp ? lhs > rhs : lhs < rhs;
864  } else {
865  CHECK(lhs_v.isStr() && rhs_v.isStr());
866  const auto lhs = lhs_v.strVal();
867  const auto rhs = rhs_v.strVal();
868  if (lhs == rhs) {
869  continue;
870  }
871  return use_desc_cmp ? lhs > rhs : lhs < rhs;
872  }
873  }
874  }
875  return false;
876 }
877 
879  std::vector<uint32_t>& to_sort,
880  const size_t n,
881  const std::function<bool(const uint32_t, const uint32_t)> compare) {
882  auto timer = DEBUG_TIMER(__func__);
883  std::make_heap(to_sort.begin(), to_sort.end(), compare);
884  std::vector<uint32_t> permutation_top;
885  permutation_top.reserve(n);
886  for (size_t i = 0; i < n && !to_sort.empty(); ++i) {
887  permutation_top.push_back(to_sort.front());
888  std::pop_heap(to_sort.begin(), to_sort.end(), compare);
889  to_sort.pop_back();
890  }
891  to_sort.swap(permutation_top);
892 }
893 
895  const std::function<bool(const uint32_t, const uint32_t)> compare) {
896  auto timer = DEBUG_TIMER(__func__);
897  std::sort(permutation_.begin(), permutation_.end(), compare);
898 }
899 
901  const std::list<Analyzer::OrderEntry>& order_entries) const {
902  auto timer = DEBUG_TIMER(__func__);
903  auto data_mgr = &executor_->catalog_->getDataMgr();
904  const int device_id{0};
905  CudaAllocator cuda_allocator(data_mgr, device_id);
906  std::vector<int64_t*> group_by_buffers(executor_->blockSize());
907  group_by_buffers[0] = reinterpret_cast<int64_t*>(storage_->getUnderlyingBuffer());
908  auto dev_group_by_buffers =
909  create_dev_group_by_buffers(&cuda_allocator,
910  group_by_buffers,
912  executor_->blockSize(),
913  executor_->gridSize(),
914  device_id,
916  -1,
917  true,
918  true,
919  false,
920  nullptr);
922  order_entries, query_mem_desc_, dev_group_by_buffers, data_mgr, device_id);
924  data_mgr,
925  group_by_buffers,
927  dev_group_by_buffers.second,
929  executor_->blockSize(),
930  executor_->gridSize(),
931  device_id,
932  false);
933 }
934 
936  const std::list<Analyzer::OrderEntry>& order_entries) const {
937  auto timer = DEBUG_TIMER(__func__);
939  std::vector<int64_t> tmp_buff(query_mem_desc_.getEntryCount());
940  std::vector<int32_t> idx_buff(query_mem_desc_.getEntryCount());
941  CHECK_EQ(size_t(1), order_entries.size());
942  auto buffer_ptr = storage_->getUnderlyingBuffer();
943  for (const auto& order_entry : order_entries) {
944  const auto target_idx = order_entry.tle_no - 1;
945  const auto sortkey_val_buff = reinterpret_cast<int64_t*>(
946  buffer_ptr + query_mem_desc_.getColOffInBytes(target_idx));
947  const auto chosen_bytes = query_mem_desc_.getPaddedSlotWidthBytes(target_idx);
948  sort_groups_cpu(sortkey_val_buff,
949  &idx_buff[0],
951  order_entry.is_desc,
952  chosen_bytes);
953  apply_permutation_cpu(reinterpret_cast<int64_t*>(buffer_ptr),
954  &idx_buff[0],
956  &tmp_buff[0],
957  sizeof(int64_t));
958  for (size_t target_idx = 0; target_idx < query_mem_desc_.getSlotCount();
959  ++target_idx) {
960  if (static_cast<int>(target_idx) == order_entry.tle_no - 1) {
961  continue;
962  }
963  const auto chosen_bytes = query_mem_desc_.getPaddedSlotWidthBytes(target_idx);
964  const auto satellite_val_buff = reinterpret_cast<int64_t*>(
965  buffer_ptr + query_mem_desc_.getColOffInBytes(target_idx));
966  apply_permutation_cpu(satellite_val_buff,
967  &idx_buff[0],
969  &tmp_buff[0],
970  chosen_bytes);
971  }
972  }
973 }
974 
976  const int64_t ptr) {
977  const auto it_ok = count_distinct_sets_mapping_.emplace(remote_ptr, ptr);
978  CHECK(it_ok.second);
979 }
980 
981 int64_t ResultSetStorage::mappedPtr(const int64_t remote_ptr) const {
982  const auto it = count_distinct_sets_mapping_.find(remote_ptr);
983  // Due to the removal of completely zero bitmaps in a distributed transfer there will be
984  // remote ptr that do not not exists. Return 0 if no pointer found
985  if (it == count_distinct_sets_mapping_.end()) {
986  return int64_t(0);
987  }
988  return it->second;
989 }
990 
991 size_t ResultSet::getLimit() const {
992  return keep_first_;
993 }
994 
995 std::shared_ptr<const std::vector<std::string>> ResultSet::getStringDictionaryPayloadCopy(
996  const int dict_id) const {
997  CHECK(executor_);
998  const auto sdp =
999  executor_->getStringDictionaryProxy(dict_id, row_set_mem_owner_, false);
1000  return sdp->getDictionary()->copyStrings();
1001 }
1002 
1004  return !rows.isTruncated();
1005 }
1006 
1008  return can_use_parallel_algorithms(rows) && rows.entryCount() >= 20000;
1009 }
1010 
1020  return false;
1021  } else if (query_mem_desc_.didOutputColumnar()) {
1022  return permutation_.empty() && (query_mem_desc_.getQueryDescriptionType() ==
1028  } else {
1029  return permutation_.empty() && (query_mem_desc_.getQueryDescriptionType() ==
1033  }
1034 }
1035 
1039  appended_storage_.empty() && storage_ &&
1040  (lazy_fetch_info_.empty() || !lazy_fetch_info_[column_idx].is_lazily_fetched);
1041 }
1042 
1043 const int8_t* ResultSet::getColumnarBuffer(size_t column_idx) const {
1045  return storage_->getUnderlyingBuffer() + query_mem_desc_.getColOffInBytes(column_idx);
1046 }
1047 
1048 // returns a bitmap (and total number) of all single slot targets
1049 std::tuple<std::vector<bool>, size_t> ResultSet::getSingleSlotTargetBitmap() const {
1050  std::vector<bool> target_bitmap(targets_.size(), true);
1051  size_t num_single_slot_targets = 0;
1052  for (size_t target_idx = 0; target_idx < targets_.size(); target_idx++) {
1053  const auto& sql_type = targets_[target_idx].sql_type;
1054  if (targets_[target_idx].is_agg && targets_[target_idx].agg_kind == kAVG) {
1055  target_bitmap[target_idx] = false;
1056  } else if (sql_type.is_varlen()) {
1057  target_bitmap[target_idx] = false;
1058  } else {
1059  num_single_slot_targets++;
1060  }
1061  }
1062  return std::make_tuple(std::move(target_bitmap), num_single_slot_targets);
1063 }
1064 
1073 std::tuple<std::vector<bool>, size_t> ResultSet::getSupportedSingleSlotTargetBitmap()
1074  const {
1076  auto [single_slot_targets, num_single_slot_targets] = getSingleSlotTargetBitmap();
1077 
1078  for (size_t target_idx = 0; target_idx < single_slot_targets.size(); target_idx++) {
1079  const auto& target = targets_[target_idx];
1080  if (single_slot_targets[target_idx] &&
1081  (is_distinct_target(target) ||
1082  (target.is_agg && target.agg_kind == kSAMPLE && target.sql_type == kFLOAT))) {
1083  single_slot_targets[target_idx] = false;
1084  num_single_slot_targets--;
1085  }
1086  }
1087  CHECK_GE(num_single_slot_targets, size_t(0));
1088  return std::make_tuple(std::move(single_slot_targets), num_single_slot_targets);
1089 }
1090 
1091 // returns the starting slot index for all targets in the result set
1092 std::vector<size_t> ResultSet::getSlotIndicesForTargetIndices() const {
1093  std::vector<size_t> slot_indices(targets_.size(), 0);
1094  size_t slot_index = 0;
1095  for (size_t target_idx = 0; target_idx < targets_.size(); target_idx++) {
1096  slot_indices[target_idx] = slot_index;
1097  slot_index = advance_slot(slot_index, targets_[target_idx], false);
1098  }
1099  return slot_indices;
1100 }
bool is_agg(const Analyzer::Expr *expr)
std::mutex row_iteration_mutex_
Definition: ResultSet.h:875
void syncEstimatorBuffer() const
Definition: ResultSet.cpp:456
void baselineSort(const std::list< Analyzer::OrderEntry > &order_entries, const size_t top_n)
#define CHECK_EQ(x, y)
Definition: Logger.h:205
void sort_groups_cpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, const bool desc, const uint32_t chosen_bytes)
Definition: InPlaceSort.cpp:27
std::vector< int64_t > initialize_target_values_for_storage(const std::vector< TargetInfo > &targets)
Definition: ResultSet.cpp:47
std::tuple< std::vector< bool >, size_t > getSupportedSingleSlotTargetBitmap() const
Definition: ResultSet.cpp:1073
bool use_parallel_algorithms(const ResultSet &rows)
Definition: ResultSet.cpp:1007
GpuGroupByBuffers create_dev_group_by_buffers(DeviceAllocator *cuda_allocator, const std::vector< int64_t *> &group_by_buffers, const QueryMemoryDescriptor &query_mem_desc, const unsigned block_size_x, const unsigned grid_size_x, const int device_id, const ExecutorDispatchMode dispatch_mode, const int64_t num_input_rows, const bool prepend_index_buffer, const bool always_init_group_by_on_host, const bool use_bump_allocator, Allocator *insitu_allocator)
Definition: GpuMemUtils.cpp:60
const std::vector< TargetInfo > targets_
Definition: ResultSet.h:207
void setEntryCount(const size_t val)
size_t getCurrentRowBufferIndex() const
Definition: ResultSet.cpp:262
const int8_t * getColumnarBuffer(size_t column_idx) const
Definition: ResultSet.cpp:1043
int64_t getRenderTime() const
Definition: ResultSet.cpp:488
AppendedStorage appended_storage_
Definition: ResultSet.h:841
bool g_enable_direct_columnarization
Definition: Execute.cpp:106
GeoReturnType geo_return_type_
Definition: ResultSet.h:878
ExecutorDeviceType
size_t entryCount() const
void radixSortOnCpu(const std::list< Analyzer::OrderEntry > &order_entries) const
Definition: ResultSet.cpp:935
const std::vector< int64_t > & getTargetInitVals() const
Definition: ResultSet.cpp:441
#define LOG(tag)
Definition: Logger.h:188
int8_t * getHostEstimatorBuffer() const
Definition: ResultSet.cpp:452
void spawn(Function &&f, Args &&... args)
Definition: threadpool.h:33
size_t colCount() const
Definition: ResultSet.cpp:304
bool isTruncated() const
Definition: ResultSet.cpp:497
virtual int8_t * getMemoryPtr()=0
static const size_t baseline_threshold
Definition: Execute.h:933
const Executor * executor_
Definition: ResultSet.h:850
static bool isNull(const SQLTypeInfo &ti, const InternalTargetValue &val, const bool float_argument_input)
bool isExplain() const
Definition: ResultSet.cpp:501
int tle_no
Definition: Analyzer.h:1418
QueryMemoryDescriptor query_mem_desc_
Definition: ResultSet.h:839
#define UNREACHABLE()
Definition: Logger.h:241
void addCountDistinctSetPointerMapping(const int64_t remote_ptr, const int64_t ptr)
Definition: ResultSet.cpp:975
#define CHECK_GE(x, y)
Definition: Logger.h:210
ResultSetStorage(const std::vector< TargetInfo > &targets, const QueryMemoryDescriptor &query_mem_desc, int8_t *buff, const bool buff_is_provided)
Definition: ResultSet.cpp:80
std::unique_ptr< ResultSetStorage > storage_
Definition: ResultSet.h:840
bool isDirectColumnarConversionPossible() const
Definition: ResultSet.cpp:1018
void setCachedRowCount(const size_t row_count) const
Definition: ResultSet.cpp:375
void setKernelQueueTime(const int64_t kernel_queue_time)
Definition: ResultSet.cpp:475
void radixSortOnGpu(const std::list< Analyzer::OrderEntry > &order_entries) const
Definition: ResultSet.cpp:900
SQLTypeInfo getColType(const size_t col_idx) const
Definition: ResultSet.cpp:308
bool operator()(const uint32_t lhs, const uint32_t rhs) const
Definition: ResultSet.cpp:749
size_t keep_first_
Definition: ResultSet.h:845
void keepFirstN(const size_t n)
Definition: ResultSet.cpp:94
std::vector< std::shared_ptr< std::list< ChunkIter > > > chunk_iters_
Definition: ResultSet.h:853
double pair_to_double(const std::pair< int64_t, int64_t > &fp_pair, const SQLTypeInfo &ti, const bool float_argument_input)
void addCompilationQueueTime(const int64_t compilation_queue_time)
Definition: ResultSet.cpp:479
bool takes_float_argument(const TargetInfo &target_info)
Definition: TargetInfo.h:133
std::vector< SerializedVarlenBufferStorage > serialized_varlen_buffer_
Definition: ResultSet.h:870
void inplace_sort_gpu(const std::list< Analyzer::OrderEntry > &order_entries, const QueryMemoryDescriptor &query_mem_desc, const GpuGroupByBuffers &group_by_buffers, Data_Namespace::DataMgr *data_mgr, const int device_id)
ExecutorDeviceType getDeviceType() const
Definition: ResultSet.cpp:226
std::vector< TargetValue > getNextRowUnlocked(const bool translate_strings, const bool decimal_to_double) const
const QueryMemoryDescriptor & getQueryMemDesc() const
Definition: ResultSet.cpp:432
void apply_permutation_cpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, int64_t *tmp_buff, const uint32_t chosen_bytes)
Definition: InPlaceSort.cpp:46
int64_t null_val_bit_pattern(const SQLTypeInfo &ti, const bool float_argument_input)
const std::vector< uint32_t > & getPermutationBuffer() const
Definition: ResultSet.cpp:615
const bool just_explain_
Definition: ResultSet.h:873
std::vector< uint32_t > permutation_
Definition: ResultSet.h:847
std::atomic< int64_t > cached_row_count_
Definition: ResultSet.h:874
size_t get_truncated_row_count(size_t total_row_count, size_t limit, size_t offset)
Definition: ResultSet.cpp:319
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
ResultSet(const std::vector< TargetInfo > &targets, const ExecutorDeviceType device_type, const QueryMemoryDescriptor &query_mem_desc, const std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const Executor *executor)
Definition: ResultSet.cpp:104
std::vector< uint32_t > initPermutationBuffer(const size_t start, const size_t step)
Definition: ResultSet.cpp:596
std::vector< size_t > getSlotIndicesForTargetIndices() const
Definition: ResultSet.cpp:1092
const SQLTypeInfo get_compact_type(const TargetInfo &target)
const ResultSetStorage * getStorage() const
Definition: ResultSet.cpp:300
const std::vector< TargetInfo > targets_
Definition: ResultSet.h:836
void copy_group_by_buffers_from_gpu(Data_Namespace::DataMgr *data_mgr, const std::vector< int64_t *> &group_by_buffers, const size_t groups_buffer_size, const CUdeviceptr group_by_dev_buffers_mem, const QueryMemoryDescriptor &query_mem_desc, const unsigned block_size_x, const unsigned grid_size_x, const int device_id, const bool prepend_index_buffer)
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
Definition: ResultSet.h:846
size_t drop_first_
Definition: ResultSet.h:844
size_t advance_slot(const size_t j, const TargetInfo &target_info, const bool separate_varlen_storage)
std::vector< int64_t > materializeCountDistinctColumn(const Analyzer::OrderEntry &order_entry) const
Definition: ResultSet.cpp:704
std::pair< size_t, size_t > getStorageIndex(const size_t entry_idx) const
Definition: ResultSet.cpp:659
std::list< std::shared_ptr< Chunk_NS::Chunk > > chunks_
Definition: ResultSet.h:852
int64_t count_distinct_set_size(const int64_t set_handle, const CountDistinctDescriptor &count_distinct_desc)
Definition: CountDistinct.h:75
QueryExecutionTimings timings_
Definition: ResultSet.h:849
void setQueueTime(const int64_t queue_time)
Definition: ResultSet.cpp:471
#define CHECK_NE(x, y)
Definition: Logger.h:206
void dropFirstN(const size_t n)
Definition: ResultSet.cpp:99
std::vector< std::vector< int8_t > > literal_buffers_
Definition: ResultSet.h:856
size_t binSearchRowCount() const
Definition: ResultSet.cpp:380
void doBaselineSort(const ExecutorDeviceType device_type, const std::list< Analyzer::OrderEntry > &order_entries, const size_t top_n)
bool canUseFastBaselineSort(const std::list< Analyzer::OrderEntry > &order_entries, const size_t top_n)
void copy_from_gpu(Data_Namespace::DataMgr *data_mgr, void *dst, const CUdeviceptr src, const size_t num_bytes, const int device_id)
StorageLookupResult findStorage(const size_t entry_idx) const
Definition: ResultSet.cpp:684
bool g_use_tbb_pool
Definition: Execute.cpp:76
size_t getBufferSizeBytes(const RelAlgExecutionUnit &ra_exe_unit, const unsigned thread_count, const ExecutorDeviceType device_type) const
#define LIKELY(x)
Definition: likely.h:24
void * checked_calloc(const size_t nmemb, const size_t size)
Definition: checked_alloc.h:52
bool is_distinct_target(const TargetInfo &target_info)
Definition: TargetInfo.h:129
static void topPermutation(std::vector< uint32_t > &to_sort, const size_t n, const std::function< bool(const uint32_t, const uint32_t)> compare)
Definition: ResultSet.cpp:878
bool definitelyHasNoRows() const
Definition: ResultSet.cpp:428
bool isRowAtEmpty(const size_t index) const
const std::vector< ColumnLazyFetchInfo > lazy_fetch_info_
Definition: ResultSet.h:857
Data_Namespace::DataMgr * data_mgr_
Definition: ResultSet.h:865
#define UNLIKELY(x)
Definition: likely.h:25
const std::shared_ptr< const Analyzer::Estimator > estimator_
Definition: ResultSet.h:862
#define CHECK_LT(x, y)
Definition: Logger.h:207
Definition: sqltypes.h:54
bool isZeroCopyColumnarConversionPossible(size_t column_idx) const
Definition: ResultSet.cpp:1036
int8_t * buff_
Definition: ResultSet.h:209
void sortPermutation(const std::function< bool(const uint32_t, const uint32_t)> compare)
Definition: ResultSet.cpp:894
size_t rowCount(const bool force_parallel=false) const
Definition: ResultSet.cpp:335
Definition: sqldefs.h:76
std::vector< std::vector< std::vector< const int8_t * > > > col_buffers_
Definition: ResultSet.h:858
CudaMgr_Namespace::CudaMgr * getCudaMgr() const
Definition: DataMgr.h:208
size_t parallelRowCount() const
Definition: ResultSet.cpp:393
static Data_Namespace::AbstractBuffer * allocGpuAbstractBuffer(Data_Namespace::DataMgr *data_mgr, const size_t num_bytes, const int device_id)
static QueryMemoryDescriptor fixupQueryMemoryDescriptor(const QueryMemoryDescriptor &)
Definition: ResultSet.cpp:509
int getGpuCount() const
void append(ResultSet &that)
Definition: ResultSet.cpp:270
std::string explanation_
Definition: ResultSet.h:872
std::tuple< std::vector< bool >, size_t > getSingleSlotTargetBitmap() const
Definition: ResultSet.cpp:1049
std::vector< std::vector< int64_t > > consistent_frag_sizes_
Definition: ResultSet.h:860
int8_t * host_estimator_buffer_
Definition: ResultSet.h:864
bool can_use_parallel_algorithms(const ResultSet &rows)
Definition: ResultSet.cpp:1003
size_t getLimit() const
Definition: ResultSet.cpp:991
int64_t getQueueTime() const
Definition: ResultSet.cpp:483
const ExecutorDeviceType device_type_
Definition: ResultSet.h:837
void parallelTop(const std::list< Analyzer::OrderEntry > &order_entries, const size_t top_n)
Definition: ResultSet.cpp:619
#define CHECK(condition)
Definition: Logger.h:197
const std::vector< TargetInfo > & getTargetInfos() const
Definition: ResultSet.cpp:437
#define DEBUG_TIMER(name)
Definition: Logger.h:313
size_t fetched_so_far_
Definition: ResultSet.h:843
size_t crt_row_buff_idx_
Definition: ResultSet.h:842
std::vector< std::vector< std::vector< int64_t > > > frag_offsets_
Definition: ResultSet.h:859
bool g_enable_watchdog
Definition: Execute.cpp:74
void resetGroupColWidths(const std::vector< int8_t > &new_group_col_widths)
Basic constructors and methods of the row set interface.
bool separate_varlen_storage_valid_
Definition: ResultSet.h:871
size_t getColOffInBytes(const size_t col_idx) const
int8_t * getUnderlyingBuffer() const
Definition: ResultSet.cpp:90
int8_t * getDeviceEstimatorBuffer() const
Definition: ResultSet.cpp:446
Allocate GPU memory using GpuBuffers via DataMgr.
void free(AbstractBuffer *buffer)
Definition: DataMgr.cpp:461
int getDeviceId() const
Definition: ResultSet.cpp:505
const ResultSetStorage * allocateStorage() const
Definition: ResultSet.cpp:230
std::function< bool(const uint32_t, const uint32_t)> createComparator(const std::list< Analyzer::OrderEntry > &order_entries, const bool use_heap)
Definition: ResultSet.h:774
void sort(const std::list< Analyzer::OrderEntry > &order_entries, const size_t top_n)
Definition: ResultSet.cpp:521
Definition: Analyzer.h:1413
QueryDescriptionType getQueryDescriptionType() const
void zeroDeviceMem(int8_t *device_ptr, const size_t num_bytes, const int device_num)
Definition: CudaMgr.cpp:270
int cpu_threads()
Definition: thread_count.h:24
Data_Namespace::AbstractBuffer * device_estimator_buffer_
Definition: ResultSet.h:863
Definition: sqldefs.h:72
void moveToBegin() const
Definition: ResultSet.cpp:492
int64_t mappedPtr(const int64_t) const
Definition: ResultSet.cpp:981
std::shared_ptr< const std::vector< std::string > > getStringDictionaryPayloadCopy(const int dict_id) const
Definition: ResultSet.cpp:995
QueryMemoryDescriptor query_mem_desc_
Definition: ResultSet.h:208
const int device_id_
Definition: ResultSet.h:838