OmniSciDB  fe05a0c208
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
ResultSet.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
25 #include "ResultSet.h"
28 #include "Execute.h"
29 #include "GpuMemUtils.h"
30 #include "InPlaceSort.h"
32 #include "RuntimeFunctions.h"
33 #include "Shared/Intervals.h"
34 #include "Shared/SqlTypesLayout.h"
35 #include "Shared/checked_alloc.h"
36 #include "Shared/likely.h"
37 #include "Shared/thread_count.h"
38 #include "Shared/threadpool.h"
39 
40 #include <algorithm>
41 #include <bitset>
42 #include <future>
43 #include <numeric>
44 
45 extern bool g_use_tbb_pool;
46 
47 size_t g_parallel_top_min = 100e3;
48 size_t g_parallel_top_max = 20e6; // In effect only with g_enable_watchdog.
49 
50 void ResultSet::keepFirstN(const size_t n) {
51  CHECK_EQ(-1, cached_row_count_);
52  keep_first_ = n;
53 }
54 
55 void ResultSet::dropFirstN(const size_t n) {
56  CHECK_EQ(-1, cached_row_count_);
57  drop_first_ = n;
58 }
59 
60 ResultSet::ResultSet(const std::vector<TargetInfo>& targets,
61  const ExecutorDeviceType device_type,
63  const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
64  const Catalog_Namespace::Catalog* catalog,
65  const unsigned block_size,
66  const unsigned grid_size)
67  : targets_(targets)
68  , device_type_(device_type)
69  , device_id_(-1)
70  , query_mem_desc_(query_mem_desc)
71  , crt_row_buff_idx_(0)
72  , fetched_so_far_(0)
73  , drop_first_(0)
74  , keep_first_(0)
75  , row_set_mem_owner_(row_set_mem_owner)
76  , catalog_(catalog)
77  , block_size_(block_size)
78  , grid_size_(grid_size)
79  , data_mgr_(nullptr)
80  , separate_varlen_storage_valid_(false)
81  , just_explain_(false)
82  , for_validation_only_(false)
83  , cached_row_count_(-1)
84  , geo_return_type_(GeoReturnType::WktString) {}
85 
86 ResultSet::ResultSet(const std::vector<TargetInfo>& targets,
87  const std::vector<ColumnLazyFetchInfo>& lazy_fetch_info,
88  const std::vector<std::vector<const int8_t*>>& col_buffers,
89  const std::vector<std::vector<int64_t>>& frag_offsets,
90  const std::vector<int64_t>& consistent_frag_sizes,
91  const ExecutorDeviceType device_type,
92  const int device_id,
94  const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
95  const Catalog_Namespace::Catalog* catalog,
96  const unsigned block_size,
97  const unsigned grid_size)
98  : targets_(targets)
99  , device_type_(device_type)
100  , device_id_(device_id)
101  , query_mem_desc_(query_mem_desc)
102  , crt_row_buff_idx_(0)
103  , fetched_so_far_(0)
104  , drop_first_(0)
105  , keep_first_(0)
106  , row_set_mem_owner_(row_set_mem_owner)
107  , catalog_(catalog)
108  , block_size_(block_size)
109  , grid_size_(grid_size)
110  , lazy_fetch_info_(lazy_fetch_info)
111  , col_buffers_{col_buffers}
112  , frag_offsets_{frag_offsets}
113  , consistent_frag_sizes_{consistent_frag_sizes}
114  , data_mgr_(nullptr)
115  , separate_varlen_storage_valid_(false)
116  , just_explain_(false)
117  , for_validation_only_(false)
118  , cached_row_count_(-1)
119  , geo_return_type_(GeoReturnType::WktString) {}
120 
121 ResultSet::ResultSet(const std::shared_ptr<const Analyzer::Estimator> estimator,
122  const ExecutorDeviceType device_type,
123  const int device_id,
124  Data_Namespace::DataMgr* data_mgr)
125  : device_type_(device_type)
126  , device_id_(device_id)
127  , query_mem_desc_{}
128  , crt_row_buff_idx_(0)
129  , estimator_(estimator)
130  , data_mgr_(data_mgr)
131  , separate_varlen_storage_valid_(false)
132  , just_explain_(false)
133  , for_validation_only_(false)
134  , cached_row_count_(-1)
135  , geo_return_type_(GeoReturnType::WktString) {
136  if (device_type == ExecutorDeviceType::GPU) {
137  device_estimator_buffer_ = CudaAllocator::allocGpuAbstractBuffer(
138  data_mgr_, estimator_->getBufferSize(), device_id_);
139  data_mgr->getCudaMgr()->zeroDeviceMem(device_estimator_buffer_->getMemoryPtr(),
140  estimator_->getBufferSize(),
141  device_id_);
142  } else {
143  host_estimator_buffer_ =
144  static_cast<int8_t*>(checked_calloc(estimator_->getBufferSize(), 1));
145  }
146 }
147 
148 ResultSet::ResultSet(const std::string& explanation)
149  : device_type_(ExecutorDeviceType::CPU)
150  , device_id_(-1)
151  , fetched_so_far_(0)
152  , separate_varlen_storage_valid_(false)
153  , explanation_(explanation)
154  , just_explain_(true)
155  , for_validation_only_(false)
156  , cached_row_count_(-1)
157  , geo_return_type_(GeoReturnType::WktString) {}
158 
159 ResultSet::ResultSet(int64_t queue_time_ms,
160  int64_t render_time_ms,
161  const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner)
162  : device_type_(ExecutorDeviceType::CPU)
163  , device_id_(-1)
164  , fetched_so_far_(0)
165  , row_set_mem_owner_(row_set_mem_owner)
166  , timings_(QueryExecutionTimings{queue_time_ms, render_time_ms, 0, 0})
167  , separate_varlen_storage_valid_(false)
168  , just_explain_(true)
169  , for_validation_only_(false)
170  , cached_row_count_(-1)
171  , geo_return_type_(GeoReturnType::WktString){};
172 
174  if (storage_) {
175  if (!storage_->buff_is_provided_) {
176  CHECK(storage_->getUnderlyingBuffer());
177  free(storage_->getUnderlyingBuffer());
178  }
179  }
180  for (auto& storage : appended_storage_) {
181  if (storage && !storage->buff_is_provided_) {
182  free(storage->getUnderlyingBuffer());
183  }
184  }
185  if (host_estimator_buffer_) {
186  CHECK(device_type_ == ExecutorDeviceType::CPU || device_estimator_buffer_);
187  free(host_estimator_buffer_);
188  }
189  if (device_estimator_buffer_) {
190  CHECK(data_mgr_);
191  data_mgr_->free(device_estimator_buffer_);
192  }
193 }
194 
196  return device_type_;
197 }
198 
200  CHECK(!storage_);
201  CHECK(row_set_mem_owner_);
202  auto buff = row_set_mem_owner_->allocate(
203  query_mem_desc_.getBufferSizeBytes(device_type_), /*thread_idx=*/0);
204  storage_.reset(
205  new ResultSetStorage(targets_, query_mem_desc_, buff, /*buff_is_provided=*/true));
206  return storage_.get();
207 }
208 
210  int8_t* buff,
211  const std::vector<int64_t>& target_init_vals) const {
212  CHECK(buff);
213  CHECK(!storage_);
214  storage_.reset(new ResultSetStorage(targets_, query_mem_desc_, buff, true));
215  storage_->target_init_vals_ = target_init_vals;
216  return storage_.get();
217 }
218 
220  const std::vector<int64_t>& target_init_vals) const {
221  CHECK(!storage_);
222  CHECK(row_set_mem_owner_);
223  auto buff = row_set_mem_owner_->allocate(
224  query_mem_desc_.getBufferSizeBytes(device_type_), /*thread_idx=*/0);
225  storage_.reset(
226  new ResultSetStorage(targets_, query_mem_desc_, buff, /*buff_is_provided=*/true));
227  storage_->target_init_vals_ = target_init_vals;
228  return storage_.get();
229 }
230 
232  if (crt_row_buff_idx_ == 0) {
233  throw std::runtime_error("current row buffer iteration index is undefined");
234  }
235  return crt_row_buff_idx_ - 1;
236 }
237 
238 // Note: that.appended_storage_ does not get appended to this.
239 void ResultSet::append(ResultSet& that) {
240  CHECK_EQ(-1, cached_row_count_);
241  if (!that.storage_) {
242  return;
243  }
244  appended_storage_.push_back(std::move(that.storage_));
245  query_mem_desc_.setEntryCount(
246  query_mem_desc_.getEntryCount() +
247  appended_storage_.back()->query_mem_desc_.getEntryCount());
248  chunks_.insert(chunks_.end(), that.chunks_.begin(), that.chunks_.end());
249  col_buffers_.insert(
250  col_buffers_.end(), that.col_buffers_.begin(), that.col_buffers_.end());
251  frag_offsets_.insert(
252  frag_offsets_.end(), that.frag_offsets_.begin(), that.frag_offsets_.end());
253  consistent_frag_sizes_.insert(consistent_frag_sizes_.end(),
254  that.consistent_frag_sizes_.begin(),
255  that.consistent_frag_sizes_.end());
256  chunk_iters_.insert(
257  chunk_iters_.end(), that.chunk_iters_.begin(), that.chunk_iters_.end());
258  if (separate_varlen_storage_valid_) {
259  CHECK(that.separate_varlen_storage_valid_);
260  serialized_varlen_buffer_.insert(serialized_varlen_buffer_.end(),
261  that.serialized_varlen_buffer_.begin(),
262  that.serialized_varlen_buffer_.end());
263  }
264  for (auto& buff : that.literal_buffers_) {
265  literal_buffers_.push_back(std::move(buff));
266  }
267 }
268 
270  return storage_.get();
271 }
272 
273 size_t ResultSet::colCount() const {
274  return just_explain_ ? 1 : targets_.size();
275 }
276 
277 SQLTypeInfo ResultSet::getColType(const size_t col_idx) const {
278  if (just_explain_) {
279  return SQLTypeInfo(kTEXT, false);
280  }
281  CHECK_LT(col_idx, targets_.size());
282  return targets_[col_idx].agg_kind == kAVG ? SQLTypeInfo(kDOUBLE, false)
283  : targets_[col_idx].sql_type;
284 }
285 
286 namespace {
287 
288 size_t get_truncated_row_count(size_t total_row_count, size_t limit, size_t offset) {
289  if (total_row_count < offset) {
290  return 0;
291  }
292 
293  size_t total_truncated_row_count = total_row_count - offset;
294 
295  if (limit) {
296  return std::min(total_truncated_row_count, limit);
297  }
298 
299  return total_truncated_row_count;
300 }
301 
302 } // namespace
303 
304 size_t ResultSet::rowCount(const bool force_parallel) const {
305  if (just_explain_) {
306  return 1;
307  }
308  if (!permutation_.empty()) {
309  if (drop_first_ > permutation_.size()) {
310  return 0;
311  }
312  const auto limited_row_count = keep_first_ + drop_first_;
313  return limited_row_count ? std::min(limited_row_count, permutation_.size())
314  : permutation_.size();
315  }
316  if (cached_row_count_ != -1) {
317  CHECK_GE(cached_row_count_, 0);
318  return cached_row_count_;
319  }
320  if (!storage_) {
321  return 0;
322  }
323  if (permutation_.empty() &&
324  query_mem_desc_.getQueryDescriptionType() == QueryDescriptionType::Projection) {
325  return binSearchRowCount();
326  }
327  if (force_parallel || entryCount() > 20000) {
328  return parallelRowCount();
329  }
330  std::lock_guard<std::mutex> lock(row_iteration_mutex_);
331  moveToBegin();
332  size_t row_count{0};
333  while (true) {
334  auto crt_row = getNextRowUnlocked(false, false);
335  if (crt_row.empty()) {
336  break;
337  }
338  ++row_count;
339  }
340  moveToBegin();
341  return row_count;
342 }
343 
344 void ResultSet::setCachedRowCount(const size_t row_count) const {
345  CHECK(cached_row_count_ == -1 || cached_row_count_ == static_cast<int64_t>(row_count));
346  cached_row_count_ = row_count;
347 }
348 
350  if (!storage_) {
351  return 0;
352  }
353 
354  size_t row_count = storage_->binSearchRowCount();
355  for (auto& s : appended_storage_) {
356  row_count += s->binSearchRowCount();
357  }
358 
359  return get_truncated_row_count(row_count, getLimit(), drop_first_);
360 }
361 
363  auto execute_parallel_row_count = [this](auto counter_threads) -> size_t {
364  const size_t worker_count = cpu_threads();
365  for (size_t i = 0,
366  start_entry = 0,
367  stride = (entryCount() + worker_count - 1) / worker_count;
368  i < worker_count && start_entry < entryCount();
369  ++i, start_entry += stride) {
370  const auto end_entry = std::min(start_entry + stride, entryCount());
371  counter_threads.spawn(
372  [this](const size_t start, const size_t end) {
373  size_t row_count{0};
374  for (size_t i = start; i < end; ++i) {
375  if (!isRowAtEmpty(i)) {
376  ++row_count;
377  }
378  }
379  return row_count;
380  },
381  start_entry,
382  end_entry);
383  }
384  const auto row_counts = counter_threads.join();
385  const size_t row_count = std::accumulate(row_counts.begin(), row_counts.end(), 0);
386  return row_count;
387  };
388  // will fall back to futures threadpool if TBB is not enabled
389  const auto row_count =
391  ? execute_parallel_row_count(threadpool::ThreadPool<size_t>())
392  : execute_parallel_row_count(threadpool::FuturesThreadPool<size_t>());
393 
394  return get_truncated_row_count(row_count, getLimit(), drop_first_);
395 }
396 
398  return !storage_ && !estimator_ && !just_explain_;
399 }
400 
402  CHECK(storage_);
403  return storage_->query_mem_desc_;
404 }
405 
406 const std::vector<TargetInfo>& ResultSet::getTargetInfos() const {
407  return targets_;
408 }
409 
410 const std::vector<int64_t>& ResultSet::getTargetInitVals() const {
411  CHECK(storage_);
412  return storage_->target_init_vals_;
413 }
414 
416  CHECK(device_type_ == ExecutorDeviceType::GPU);
417  CHECK(device_estimator_buffer_);
418  return device_estimator_buffer_->getMemoryPtr();
419 }
420 
422  return host_estimator_buffer_;
423 }
424 
426  CHECK(device_type_ == ExecutorDeviceType::GPU);
427  CHECK(!host_estimator_buffer_);
428  CHECK_EQ(size_t(0), estimator_->getBufferSize() % sizeof(int64_t));
429  host_estimator_buffer_ =
430  static_cast<int8_t*>(checked_calloc(estimator_->getBufferSize(), 1));
431  CHECK(device_estimator_buffer_);
432  auto device_buffer_ptr = device_estimator_buffer_->getMemoryPtr();
433  copy_from_gpu(data_mgr_,
434  host_estimator_buffer_,
435  reinterpret_cast<CUdeviceptr>(device_buffer_ptr),
436  estimator_->getBufferSize(),
437  device_id_);
438 }
439 
440 void ResultSet::setQueueTime(const int64_t queue_time) {
441  timings_.executor_queue_time = queue_time;
442 }
443 
444 void ResultSet::setKernelQueueTime(const int64_t kernel_queue_time) {
445  timings_.kernel_queue_time = kernel_queue_time;
446 }
447 
448 void ResultSet::addCompilationQueueTime(const int64_t compilation_queue_time) {
449  timings_.compilation_queue_time += compilation_queue_time;
450 }
451 
452 int64_t ResultSet::getQueueTime() const {
453  return timings_.executor_queue_time + timings_.kernel_queue_time +
454  timings_.compilation_queue_time;
455 }
456 
457 int64_t ResultSet::getRenderTime() const {
458  return timings_.render_time;
459 }
460 
462  crt_row_buff_idx_ = 0;
463  fetched_so_far_ = 0;
464 }
465 
467  return keep_first_ + drop_first_;
468 }
469 
470 bool ResultSet::isExplain() const {
471  return just_explain_;
472 }
473 
475  for_validation_only_ = true;
476 }
477 
479  return for_validation_only_;
480 }
481 
483  return device_id_;
484 }
485 
488  auto query_mem_desc_copy = query_mem_desc;
489  query_mem_desc_copy.resetGroupColWidths(
490  std::vector<int8_t>(query_mem_desc_copy.getGroupbyColCount(), 8));
491  if (query_mem_desc.didOutputColumnar()) {
492  return query_mem_desc_copy;
493  }
494  query_mem_desc_copy.alignPaddedSlots();
495  return query_mem_desc_copy;
496 }
497 
498 void ResultSet::sort(const std::list<Analyzer::OrderEntry>& order_entries,
499  size_t top_n,
500  const Executor* executor) {
501  auto timer = DEBUG_TIMER(__func__);
502 
503  if (!storage_) {
504  return;
505  }
506  CHECK_EQ(-1, cached_row_count_);
507  CHECK(!targets_.empty());
508 #ifdef HAVE_CUDA
509  if (canUseFastBaselineSort(order_entries, top_n)) {
510  baselineSort(order_entries, top_n, executor);
511  return;
512  }
513 #endif // HAVE_CUDA
514  if (query_mem_desc_.sortOnGpu()) {
515  try {
516  radixSortOnGpu(order_entries);
517  } catch (const OutOfMemory&) {
518  LOG(WARNING) << "Out of GPU memory during sort, finish on CPU";
519  radixSortOnCpu(order_entries);
520  } catch (const std::bad_alloc&) {
521  LOG(WARNING) << "Out of GPU memory during sort, finish on CPU";
522  radixSortOnCpu(order_entries);
523  }
524  return;
525  }
526  // This check isn't strictly required, but allows the index buffer to be 32-bit.
527  if (query_mem_desc_.getEntryCount() > std::numeric_limits<uint32_t>::max()) {
528  throw RowSortException("Sorting more than 4B elements not supported");
529  }
530 
531  CHECK(permutation_.empty());
532 
533  if (top_n && g_parallel_top_min < entryCount()) {
534  if (g_enable_watchdog && g_parallel_top_max < entryCount()) {
535  throw WatchdogException("Sorting the result would be too slow");
536  }
537  parallelTop(order_entries, top_n, executor);
538  } else {
539  if (g_enable_watchdog && Executor::baseline_threshold < entryCount()) {
540  throw WatchdogException("Sorting the result would be too slow");
541  }
542  permutation_.resize(query_mem_desc_.getEntryCount());
543  // PermutationView is used to share common API with parallelTop().
544  PermutationView pv(permutation_.data(), 0, permutation_.size());
545  pv = initPermutationBuffer(pv, 0, permutation_.size());
546  if (top_n == 0) {
547  top_n = pv.size(); // top_n == 0 implies a full sort
548  }
549  pv = topPermutation(pv, top_n, createComparator(order_entries, pv, executor, false));
550  if (pv.size() < permutation_.size()) {
551  permutation_.resize(pv.size());
552  permutation_.shrink_to_fit();
553  }
554  }
555 }
556 
557 #ifdef HAVE_CUDA
558 void ResultSet::baselineSort(const std::list<Analyzer::OrderEntry>& order_entries,
559  const size_t top_n,
560  const Executor* executor) {
561  auto timer = DEBUG_TIMER(__func__);
562  // If we only have on GPU, it's usually faster to do multi-threaded radix sort on CPU
563  if (getGpuCount() > 1) {
564  try {
565  doBaselineSort(ExecutorDeviceType::GPU, order_entries, top_n, executor);
566  } catch (...) {
567  doBaselineSort(ExecutorDeviceType::CPU, order_entries, top_n, executor);
568  }
569  } else {
570  doBaselineSort(ExecutorDeviceType::CPU, order_entries, top_n, executor);
571  }
572 }
573 #endif // HAVE_CUDA
574 
575 // Append non-empty indexes i in [begin,end) from findStorage(i) to permutation.
577  PermutationIdx const begin,
578  PermutationIdx const end) const {
579  auto timer = DEBUG_TIMER(__func__);
580  for (PermutationIdx i = begin; i < end; ++i) {
581  const auto storage_lookup_result = findStorage(i);
582  const auto lhs_storage = storage_lookup_result.storage_ptr;
583  const auto off = storage_lookup_result.fixedup_entry_idx;
584  CHECK(lhs_storage);
585  if (!lhs_storage->isEmptyEntry(off)) {
586  permutation.push_back(i);
587  }
588  }
589  return permutation;
590 }
591 
593  return permutation_;
594 }
595 
596 void ResultSet::parallelTop(const std::list<Analyzer::OrderEntry>& order_entries,
597  const size_t top_n,
598  const Executor* executor) {
599  auto timer = DEBUG_TIMER(__func__);
600  const size_t nthreads = cpu_threads();
601 
602  // Split permutation_ into nthreads subranges and top-sort in-place.
603  permutation_.resize(query_mem_desc_.getEntryCount());
604  std::vector<PermutationView> permutation_views(nthreads);
605  const auto top_sort_interval = [&, top_n, executor](const auto interval) {
606  PermutationView pv(permutation_.data() + interval.begin, 0, interval.size());
607  pv = initPermutationBuffer(pv, interval.begin, interval.end);
608  const auto compare = createComparator(order_entries, pv, executor, true);
609  permutation_views[interval.index] = topPermutation(pv, top_n, compare);
610  };
611  threadpool::FuturesThreadPool<void> top_sort_threads;
612  for (auto interval : makeIntervals<PermutationIdx>(0, permutation_.size(), nthreads)) {
613  top_sort_threads.spawn(top_sort_interval, interval);
614  }
615  top_sort_threads.join();
616 
617  // In case you are considering implementing a parallel reduction, note that the
618  // ResultSetComparator constructor is O(N) in order to materialize some of the aggregate
619  // columns as necessary to perform a comparison. This cost is why reduction is chosen to
620  // be serial instead; only one more Comparator is needed below.
621 
622  // Left-copy disjoint top-sorted subranges into one contiguous range.
623  // ++++....+++.....+++++... -> ++++++++++++............
624  auto end = permutation_.begin() + permutation_views.front().size();
625  for (size_t i = 1; i < nthreads; ++i) {
626  std::copy(permutation_views[i].begin(), permutation_views[i].end(), end);
627  end += permutation_views[i].size();
628  }
629 
630  // Top sort final range.
631  PermutationView pv(permutation_.data(), end - permutation_.begin());
632  const auto compare = createComparator(order_entries, pv, executor, false);
633  pv = topPermutation(pv, top_n, compare);
634  permutation_.resize(pv.size());
635  permutation_.shrink_to_fit();
636 }
637 
638 std::pair<size_t, size_t> ResultSet::getStorageIndex(const size_t entry_idx) const {
639  size_t fixedup_entry_idx = entry_idx;
640  auto entry_count = storage_->query_mem_desc_.getEntryCount();
641  const bool is_rowwise_layout = !storage_->query_mem_desc_.didOutputColumnar();
642  if (fixedup_entry_idx < entry_count) {
643  return {0, fixedup_entry_idx};
644  }
645  fixedup_entry_idx -= entry_count;
646  for (size_t i = 0; i < appended_storage_.size(); ++i) {
647  const auto& desc = appended_storage_[i]->query_mem_desc_;
648  CHECK_NE(is_rowwise_layout, desc.didOutputColumnar());
649  entry_count = desc.getEntryCount();
650  if (fixedup_entry_idx < entry_count) {
651  return {i + 1, fixedup_entry_idx};
652  }
653  fixedup_entry_idx -= entry_count;
654  }
655  UNREACHABLE() << "entry_idx = " << entry_idx << ", query_mem_desc_.getEntryCount() = "
656  << query_mem_desc_.getEntryCount();
657  return {};
658 }
659 
662 
664  auto [stg_idx, fixedup_entry_idx] = getStorageIndex(entry_idx);
665  return {stg_idx ? appended_storage_[stg_idx - 1].get() : storage_.get(),
666  fixedup_entry_idx,
667  stg_idx};
668 }
669 
670 template <typename BUFFER_ITERATOR_TYPE>
672  BUFFER_ITERATOR_TYPE>::materializeCountDistinctColumns() {
673  for (const auto& order_entry : order_entries_) {
674  if (is_distinct_target(result_set_->targets_[order_entry.tle_no - 1])) {
675  count_distinct_materialized_buffers_.emplace_back(
676  materializeCountDistinctColumn(order_entry));
677  }
678  }
679 }
680 
681 template <typename BUFFER_ITERATOR_TYPE>
683  BUFFER_ITERATOR_TYPE>::materializeApproxMedianColumns() const {
684  ResultSet::ApproxMedianBuffers approx_median_materialized_buffers;
685  for (const auto& order_entry : order_entries_) {
686  if (result_set_->targets_[order_entry.tle_no - 1].agg_kind == kAPPROX_MEDIAN) {
687  approx_median_materialized_buffers.emplace_back(
688  materializeApproxMedianColumn(order_entry));
689  }
690  }
691  return approx_median_materialized_buffers;
692 }
693 
694 template <typename BUFFER_ITERATOR_TYPE>
695 std::vector<int64_t>
697  const Analyzer::OrderEntry& order_entry) const {
698  const size_t num_storage_entries = result_set_->query_mem_desc_.getEntryCount();
699  std::vector<int64_t> count_distinct_materialized_buffer(num_storage_entries);
700  const CountDistinctDescriptor count_distinct_descriptor =
701  result_set_->query_mem_desc_.getCountDistinctDescriptor(order_entry.tle_no - 1);
702  const size_t num_non_empty_entries = permutation_.size();
703  const auto work = [&](const size_t start, const size_t end) {
704  for (size_t i = start; i < end; ++i) {
705  const PermutationIdx permuted_idx = permutation_[i];
706  const auto storage_lookup_result = result_set_->findStorage(permuted_idx);
707  const auto storage = storage_lookup_result.storage_ptr;
708  const auto off = storage_lookup_result.fixedup_entry_idx;
709  const auto value = buffer_itr_.getColumnInternal(
710  storage->buff_, off, order_entry.tle_no - 1, storage_lookup_result);
711  count_distinct_materialized_buffer[permuted_idx] =
712  count_distinct_set_size(value.i1, count_distinct_descriptor);
713  }
714  };
715  // TODO(tlm): Allow use of tbb after we determine how to easily encapsulate the choice
716  // between thread pool types
717  if (single_threaded_) {
718  work(0, num_non_empty_entries);
719  } else {
721  for (auto interval : makeIntervals<size_t>(0, num_non_empty_entries, cpu_threads())) {
722  thread_pool.spawn(work, interval.begin, interval.end);
723  }
724  thread_pool.join();
725  }
726  return count_distinct_materialized_buffer;
727 }
728 
729 double ResultSet::calculateQuantile(quantile::TDigest* const t_digest, double const q) {
730  static_assert(sizeof(int64_t) == sizeof(quantile::TDigest*));
731  CHECK(t_digest) << "t_digest=" << (void*)t_digest << ", q=" << q;
732  t_digest->mergeBuffer();
733  double const median = t_digest->quantile(q);
734  return boost::math::isnan(median) ? NULL_DOUBLE : median;
735 }
736 
737 template <typename BUFFER_ITERATOR_TYPE>
738 ResultSet::ApproxMedianBuffers::value_type
740  const Analyzer::OrderEntry& order_entry) const {
741  ResultSet::ApproxMedianBuffers::value_type materialized_buffer(
742  result_set_->query_mem_desc_.getEntryCount());
743  const size_t size = permutation_.size();
744  const auto work = [&](const size_t start, const size_t end) {
745  for (size_t i = start; i < end; ++i) {
746  const PermutationIdx permuted_idx = permutation_[i];
747  const auto storage_lookup_result = result_set_->findStorage(permuted_idx);
748  const auto storage = storage_lookup_result.storage_ptr;
749  const auto off = storage_lookup_result.fixedup_entry_idx;
750  const auto value = buffer_itr_.getColumnInternal(
751  storage->buff_, off, order_entry.tle_no - 1, storage_lookup_result);
752  materialized_buffer[permuted_idx] =
753  value.i1
754  ? calculateQuantile(reinterpret_cast<quantile::TDigest*>(value.i1), 0.5)
755  : NULL_DOUBLE;
756  }
757  };
758  if (single_threaded_) {
759  work(0, size);
760  } else {
762  for (auto interval : makeIntervals<size_t>(0, size, cpu_threads())) {
763  thread_pool.spawn(work, interval.begin, interval.end);
764  }
765  thread_pool.join();
766  }
767  return materialized_buffer;
768 }
769 
770 template <typename BUFFER_ITERATOR_TYPE>
772  const PermutationIdx lhs,
773  const PermutationIdx rhs) const {
774  // NB: The compare function must define a strict weak ordering, otherwise
775  // std::sort will trigger a segmentation fault (or corrupt memory).
776  const auto lhs_storage_lookup_result = result_set_->findStorage(lhs);
777  const auto rhs_storage_lookup_result = result_set_->findStorage(rhs);
778  const auto lhs_storage = lhs_storage_lookup_result.storage_ptr;
779  const auto rhs_storage = rhs_storage_lookup_result.storage_ptr;
780  const auto fixedup_lhs = lhs_storage_lookup_result.fixedup_entry_idx;
781  const auto fixedup_rhs = rhs_storage_lookup_result.fixedup_entry_idx;
782  size_t materialized_count_distinct_buffer_idx{0};
783  size_t materialized_approx_median_buffer_idx{0};
784 
785  for (const auto& order_entry : order_entries_) {
786  CHECK_GE(order_entry.tle_no, 1);
787  const auto& agg_info = result_set_->targets_[order_entry.tle_no - 1];
788  const auto entry_ti = get_compact_type(agg_info);
789  bool float_argument_input = takes_float_argument(agg_info);
790  // Need to determine if the float value has been stored as float
791  // or if it has been compacted to a different (often larger 8 bytes)
792  // in distributed case the floats are actually 4 bytes
793  // TODO the above takes_float_argument() is widely used wonder if this problem
794  // exists elsewhere
795  if (entry_ti.get_type() == kFLOAT) {
796  const auto is_col_lazy =
797  !result_set_->lazy_fetch_info_.empty() &&
798  result_set_->lazy_fetch_info_[order_entry.tle_no - 1].is_lazily_fetched;
799  if (result_set_->query_mem_desc_.getPaddedSlotWidthBytes(order_entry.tle_no - 1) ==
800  sizeof(float)) {
801  float_argument_input =
802  result_set_->query_mem_desc_.didOutputColumnar() ? !is_col_lazy : true;
803  }
804  }
805 
806  if (UNLIKELY(is_distinct_target(agg_info))) {
807  CHECK_LT(materialized_count_distinct_buffer_idx,
808  count_distinct_materialized_buffers_.size());
809 
810  const auto& count_distinct_materialized_buffer =
811  count_distinct_materialized_buffers_[materialized_count_distinct_buffer_idx];
812  const auto lhs_sz = count_distinct_materialized_buffer[lhs];
813  const auto rhs_sz = count_distinct_materialized_buffer[rhs];
814  ++materialized_count_distinct_buffer_idx;
815  if (lhs_sz == rhs_sz) {
816  continue;
817  }
818  return (lhs_sz < rhs_sz) != order_entry.is_desc;
819  } else if (UNLIKELY(agg_info.agg_kind == kAPPROX_MEDIAN)) {
820  CHECK_LT(materialized_approx_median_buffer_idx,
821  approx_median_materialized_buffers_.size());
822  const auto& approx_median_materialized_buffer =
823  approx_median_materialized_buffers_[materialized_approx_median_buffer_idx];
824  const auto lhs_value = approx_median_materialized_buffer[lhs];
825  const auto rhs_value = approx_median_materialized_buffer[rhs];
826  ++materialized_approx_median_buffer_idx;
827  if (lhs_value == rhs_value) {
828  continue;
829  } else if (!entry_ti.get_notnull()) {
830  if (lhs_value == NULL_DOUBLE) {
831  return order_entry.nulls_first;
832  } else if (rhs_value == NULL_DOUBLE) {
833  return !order_entry.nulls_first;
834  }
835  }
836  return (lhs_value < rhs_value) != order_entry.is_desc;
837  }
838 
839  const auto lhs_v = buffer_itr_.getColumnInternal(lhs_storage->buff_,
840  fixedup_lhs,
841  order_entry.tle_no - 1,
842  lhs_storage_lookup_result);
843  const auto rhs_v = buffer_itr_.getColumnInternal(rhs_storage->buff_,
844  fixedup_rhs,
845  order_entry.tle_no - 1,
846  rhs_storage_lookup_result);
847 
848  if (UNLIKELY(isNull(entry_ti, lhs_v, float_argument_input) &&
849  isNull(entry_ti, rhs_v, float_argument_input))) {
850  continue;
851  }
852  if (UNLIKELY(isNull(entry_ti, lhs_v, float_argument_input) &&
853  !isNull(entry_ti, rhs_v, float_argument_input))) {
854  return order_entry.nulls_first;
855  }
856  if (UNLIKELY(isNull(entry_ti, rhs_v, float_argument_input) &&
857  !isNull(entry_ti, lhs_v, float_argument_input))) {
858  return !order_entry.nulls_first;
859  }
860 
861  if (LIKELY(lhs_v.isInt())) {
862  CHECK(rhs_v.isInt());
863  if (UNLIKELY(entry_ti.is_string() &&
864  entry_ti.get_compression() == kENCODING_DICT)) {
865  CHECK_EQ(4, entry_ti.get_logical_size());
866  CHECK(executor_);
867  const auto string_dict_proxy = executor_->getStringDictionaryProxy(
868  entry_ti.get_comp_param(), result_set_->row_set_mem_owner_, false);
869  auto lhs_str = string_dict_proxy->getString(lhs_v.i1);
870  auto rhs_str = string_dict_proxy->getString(rhs_v.i1);
871  if (lhs_str == rhs_str) {
872  continue;
873  }
874  return (lhs_str < rhs_str) != order_entry.is_desc;
875  }
876 
877  if (lhs_v.i1 == rhs_v.i1) {
878  continue;
879  }
880  if (entry_ti.is_fp()) {
881  if (float_argument_input) {
882  const auto lhs_dval = *reinterpret_cast<const float*>(may_alias_ptr(&lhs_v.i1));
883  const auto rhs_dval = *reinterpret_cast<const float*>(may_alias_ptr(&rhs_v.i1));
884  return (lhs_dval < rhs_dval) != order_entry.is_desc;
885  } else {
886  const auto lhs_dval =
887  *reinterpret_cast<const double*>(may_alias_ptr(&lhs_v.i1));
888  const auto rhs_dval =
889  *reinterpret_cast<const double*>(may_alias_ptr(&rhs_v.i1));
890  return (lhs_dval < rhs_dval) != order_entry.is_desc;
891  }
892  }
893  return (lhs_v.i1 < rhs_v.i1) != order_entry.is_desc;
894  } else {
895  if (lhs_v.isPair()) {
896  CHECK(rhs_v.isPair());
897  const auto lhs =
898  pair_to_double({lhs_v.i1, lhs_v.i2}, entry_ti, float_argument_input);
899  const auto rhs =
900  pair_to_double({rhs_v.i1, rhs_v.i2}, entry_ti, float_argument_input);
901  if (lhs == rhs) {
902  continue;
903  }
904  return (lhs < rhs) != order_entry.is_desc;
905  } else {
906  CHECK(lhs_v.isStr() && rhs_v.isStr());
907  const auto lhs = lhs_v.strVal();
908  const auto rhs = rhs_v.strVal();
909  if (lhs == rhs) {
910  continue;
911  }
912  return (lhs < rhs) != order_entry.is_desc;
913  }
914  }
915  }
916  return false;
917 }
918 
919 // Partial sort permutation into top(least by compare) n elements.
920 // If permutation.size() <= n then sort entire permutation by compare.
921 // Return PermutationView with new size() = min(n, permutation.size()).
923  const size_t n,
924  const Comparator& compare) {
925  auto timer = DEBUG_TIMER(__func__);
926  if (n < permutation.size()) {
927  std::partial_sort(
928  permutation.begin(), permutation.begin() + n, permutation.end(), compare);
929  permutation.resize(n);
930  } else {
931  std::sort(permutation.begin(), permutation.end(), compare);
932  }
933  return permutation;
934 }
935 
937  const std::list<Analyzer::OrderEntry>& order_entries) const {
938  auto timer = DEBUG_TIMER(__func__);
939  auto data_mgr = &catalog_->getDataMgr();
940  const int device_id{0};
941  CudaAllocator cuda_allocator(data_mgr, device_id);
942  CHECK_GT(block_size_, 0);
943  CHECK_GT(grid_size_, 0);
944  std::vector<int64_t*> group_by_buffers(block_size_);
945  group_by_buffers[0] = reinterpret_cast<int64_t*>(storage_->getUnderlyingBuffer());
946  auto dev_group_by_buffers =
947  create_dev_group_by_buffers(&cuda_allocator,
948  group_by_buffers,
949  query_mem_desc_,
950  block_size_,
951  grid_size_,
952  device_id,
954  -1,
955  true,
956  true,
957  false,
958  nullptr);
960  order_entries, query_mem_desc_, dev_group_by_buffers, data_mgr, device_id);
962  data_mgr,
963  group_by_buffers,
964  query_mem_desc_.getBufferSizeBytes(ExecutorDeviceType::GPU),
965  dev_group_by_buffers.second,
966  query_mem_desc_,
967  block_size_,
968  grid_size_,
969  device_id,
970  false);
971 }
972 
974  const std::list<Analyzer::OrderEntry>& order_entries) const {
975  auto timer = DEBUG_TIMER(__func__);
976  CHECK(!query_mem_desc_.hasKeylessHash());
977  std::vector<int64_t> tmp_buff(query_mem_desc_.getEntryCount());
978  std::vector<int32_t> idx_buff(query_mem_desc_.getEntryCount());
979  CHECK_EQ(size_t(1), order_entries.size());
980  auto buffer_ptr = storage_->getUnderlyingBuffer();
981  for (const auto& order_entry : order_entries) {
982  const auto target_idx = order_entry.tle_no - 1;
983  const auto sortkey_val_buff = reinterpret_cast<int64_t*>(
984  buffer_ptr + query_mem_desc_.getColOffInBytes(target_idx));
985  const auto chosen_bytes = query_mem_desc_.getPaddedSlotWidthBytes(target_idx);
986  sort_groups_cpu(sortkey_val_buff,
987  &idx_buff[0],
988  query_mem_desc_.getEntryCount(),
989  order_entry.is_desc,
990  chosen_bytes);
991  apply_permutation_cpu(reinterpret_cast<int64_t*>(buffer_ptr),
992  &idx_buff[0],
993  query_mem_desc_.getEntryCount(),
994  &tmp_buff[0],
995  sizeof(int64_t));
996  for (size_t target_idx = 0; target_idx < query_mem_desc_.getSlotCount();
997  ++target_idx) {
998  if (static_cast<int>(target_idx) == order_entry.tle_no - 1) {
999  continue;
1000  }
1001  const auto chosen_bytes = query_mem_desc_.getPaddedSlotWidthBytes(target_idx);
1002  const auto satellite_val_buff = reinterpret_cast<int64_t*>(
1003  buffer_ptr + query_mem_desc_.getColOffInBytes(target_idx));
1004  apply_permutation_cpu(satellite_val_buff,
1005  &idx_buff[0],
1006  query_mem_desc_.getEntryCount(),
1007  &tmp_buff[0],
1008  chosen_bytes);
1009  }
1010  }
1011 }
1012 
1013 size_t ResultSet::getLimit() const {
1014  return keep_first_;
1015 }
1016 
1017 std::shared_ptr<const std::vector<std::string>> ResultSet::getStringDictionaryPayloadCopy(
1018  const int dict_id) const {
1019  const auto sdp = row_set_mem_owner_->getOrAddStringDictProxy(
1020  dict_id, /*with_generation=*/false, catalog_);
1021  CHECK(sdp);
1022  return sdp->getDictionary()->copyStrings();
1023 }
1024 
1034  return false;
1035  } else if (query_mem_desc_.didOutputColumnar()) {
1036  return permutation_.empty() && (query_mem_desc_.getQueryDescriptionType() ==
1038  (query_mem_desc_.getQueryDescriptionType() ==
1040  query_mem_desc_.getQueryDescriptionType() ==
1042  } else {
1043  return permutation_.empty() && (query_mem_desc_.getQueryDescriptionType() ==
1045  query_mem_desc_.getQueryDescriptionType() ==
1047  }
1048 }
1049 
1051  return query_mem_desc_.didOutputColumnar() &&
1052  query_mem_desc_.getQueryDescriptionType() == QueryDescriptionType::Projection &&
1053  appended_storage_.empty() && storage_ &&
1054  (lazy_fetch_info_.empty() || !lazy_fetch_info_[column_idx].is_lazily_fetched);
1055 }
1056 
1057 const int8_t* ResultSet::getColumnarBuffer(size_t column_idx) const {
1058  CHECK(isZeroCopyColumnarConversionPossible(column_idx));
1059  return storage_->getUnderlyingBuffer() + query_mem_desc_.getColOffInBytes(column_idx);
1060 }
1061 
1062 // returns a bitmap (and total number) of all single slot targets
1063 std::tuple<std::vector<bool>, size_t> ResultSet::getSingleSlotTargetBitmap() const {
1064  std::vector<bool> target_bitmap(targets_.size(), true);
1065  size_t num_single_slot_targets = 0;
1066  for (size_t target_idx = 0; target_idx < targets_.size(); target_idx++) {
1067  const auto& sql_type = targets_[target_idx].sql_type;
1068  if (targets_[target_idx].is_agg && targets_[target_idx].agg_kind == kAVG) {
1069  target_bitmap[target_idx] = false;
1070  } else if (sql_type.is_varlen()) {
1071  target_bitmap[target_idx] = false;
1072  } else {
1073  num_single_slot_targets++;
1074  }
1075  }
1076  return std::make_tuple(std::move(target_bitmap), num_single_slot_targets);
1077 }
1078 
1087 std::tuple<std::vector<bool>, size_t> ResultSet::getSupportedSingleSlotTargetBitmap()
1088  const {
1089  CHECK(isDirectColumnarConversionPossible());
1090  auto [single_slot_targets, num_single_slot_targets] = getSingleSlotTargetBitmap();
1091 
1092  for (size_t target_idx = 0; target_idx < single_slot_targets.size(); target_idx++) {
1093  const auto& target = targets_[target_idx];
1094  if (single_slot_targets[target_idx] &&
1095  (is_distinct_target(target) || target.agg_kind == kAPPROX_MEDIAN ||
1096  (target.is_agg && target.agg_kind == kSAMPLE && target.sql_type == kFLOAT))) {
1097  single_slot_targets[target_idx] = false;
1098  num_single_slot_targets--;
1099  }
1100  }
1101  CHECK_GE(num_single_slot_targets, size_t(0));
1102  return std::make_tuple(std::move(single_slot_targets), num_single_slot_targets);
1103 }
1104 
1105 // returns the starting slot index for all targets in the result set
1106 std::vector<size_t> ResultSet::getSlotIndicesForTargetIndices() const {
1107  std::vector<size_t> slot_indices(targets_.size(), 0);
1108  size_t slot_index = 0;
1109  for (size_t target_idx = 0; target_idx < targets_.size(); target_idx++) {
1110  slot_indices[target_idx] = slot_index;
1111  slot_index = advance_slot(slot_index, targets_[target_idx], false);
1112  }
1113  return slot_indices;
1114 }
1115 
1116 // namespace result_set
1117 
1118 bool result_set::can_use_parallel_algorithms(const ResultSet& rows) {
1119  return !rows.isTruncated();
1120 }
1121 
1122 bool result_set::use_parallel_algorithms(const ResultSet& rows) {
1123  return result_set::can_use_parallel_algorithms(rows) && rows.entryCount() >= 20000;
1124 }
bool is_agg(const Analyzer::Expr *expr)
catalog_(nullptr)
void syncEstimatorBuffer() const
Definition: ResultSet.cpp:425
#define CHECK_EQ(x, y)
Definition: Logger.h:211
const QueryMemoryDescriptor & getQueryMemDesc() const
Definition: ResultSet.cpp:401
void sort_groups_cpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, const bool desc, const uint32_t chosen_bytes)
Definition: InPlaceSort.cpp:27
size_t g_parallel_top_max
Definition: ResultSet.cpp:48
std::pair< size_t, size_t > getStorageIndex(const size_t entry_idx) const
Definition: ResultSet.cpp:638
#define NULL_DOUBLE
DEVICE void push_back(T const &value)
Definition: VectorView.h:74
bool isValidationOnlyRes() const
Definition: ResultSet.cpp:478
bool g_enable_watchdog
void setValidationOnlyRes()
Definition: ResultSet.cpp:474
PermutationView initPermutationBuffer(PermutationView permutation, PermutationIdx const begin, PermutationIdx const end) const
Definition: ResultSet.cpp:576
DEVICE RealType quantile(RealType const q)
Definition: quantile.h:246
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:102
GpuGroupByBuffers create_dev_group_by_buffers(DeviceAllocator *cuda_allocator, const std::vector< int64_t * > &group_by_buffers, const QueryMemoryDescriptor &query_mem_desc, const unsigned block_size_x, const unsigned grid_size_x, const int device_id, const ExecutorDispatchMode dispatch_mode, const int64_t num_input_rows, const bool prepend_index_buffer, const bool always_init_group_by_on_host, const bool use_bump_allocator, Allocator *insitu_allocator)
Definition: GpuMemUtils.cpp:60
bool g_enable_direct_columnarization
Definition: Execute.cpp:111
ExecutorDeviceType
void moveToBegin() const
Definition: ResultSet.cpp:461
#define LOG(tag)
Definition: Logger.h:194
static const size_t baseline_threshold
Definition: Execute.h:1046
ResultSet(const std::vector< TargetInfo > &targets, const ExecutorDeviceType device_type, const QueryMemoryDescriptor &query_mem_desc, const std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const Catalog_Namespace::Catalog *catalog, const unsigned block_size, const unsigned grid_size)
Definition: ResultSet.cpp:60
int tle_no
Definition: Analyzer.h:1418
#define UNREACHABLE()
Definition: Logger.h:247
DEVICE void sort(ARGS &&...args)
Definition: gpu_enabled.h:105
const std::vector< TargetInfo > & getTargetInfos() const
Definition: ResultSet.cpp:406
#define CHECK_GE(x, y)
Definition: Logger.h:216
void setKernelQueueTime(const int64_t kernel_queue_time)
Definition: ResultSet.cpp:444
size_t rowCount(const bool force_parallel=false) const
Definition: ResultSet.cpp:304
DEVICE void mergeBuffer()
Definition: quantile.h:604
void keepFirstN(const size_t n)
Definition: ResultSet.cpp:50
double pair_to_double(const std::pair< int64_t, int64_t > &fp_pair, const SQLTypeInfo &ti, const bool float_argument_input)
void addCompilationQueueTime(const int64_t compilation_queue_time)
Definition: ResultSet.cpp:448
bool takes_float_argument(const TargetInfo &target_info)
Definition: TargetInfo.h:134
void parallelTop(const std::list< Analyzer::OrderEntry > &order_entries, const size_t top_n, const Executor *executor)
Definition: ResultSet.cpp:596
size_t colCount() const
Definition: ResultSet.cpp:273
void inplace_sort_gpu(const std::list< Analyzer::OrderEntry > &order_entries, const QueryMemoryDescriptor &query_mem_desc, const GpuGroupByBuffers &group_by_buffers, Data_Namespace::DataMgr *data_mgr, const int device_id)
void apply_permutation_cpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, int64_t *tmp_buff, const uint32_t chosen_bytes)
Definition: InPlaceSort.cpp:46
DEVICE void resize(size_type const size)
Definition: VectorView.h:75
#define CHECK_GT(x, y)
Definition: Logger.h:215
size_t getLimit() const
Definition: ResultSet.cpp:1013
std::vector< int64_t > materializeCountDistinctColumn(const Analyzer::OrderEntry &order_entry) const
Definition: ResultSet.cpp:696
bool isTruncated() const
Definition: ResultSet.cpp:466
size_t get_truncated_row_count(size_t total_row_count, size_t limit, size_t offset)
Definition: ResultSet.cpp:288
size_t parallelRowCount() const
Definition: ResultSet.cpp:362
void radixSortOnCpu(const std::list< Analyzer::OrderEntry > &order_entries) const
Definition: ResultSet.cpp:973
const SQLTypeInfo get_compact_type(const TargetInfo &target)
bool definitelyHasNoRows() const
Definition: ResultSet.cpp:397
bool use_parallel_algorithms(const ResultSet &rows)
Definition: ResultSet.cpp:1122
bool isZeroCopyColumnarConversionPossible(size_t column_idx) const
Definition: ResultSet.cpp:1050
size_t g_parallel_top_min
Definition: ResultSet.cpp:47
int8_t * getHostEstimatorBuffer() const
Definition: ResultSet.cpp:421
DEVICE size_type size() const
Definition: VectorView.h:84
const ResultSetStorage * allocateStorage() const
std::shared_ptr< const std::vector< std::string > > getStringDictionaryPayloadCopy(const int dict_id) const
Definition: ResultSet.cpp:1017
size_t advance_slot(const size_t j, const TargetInfo &target_info, const bool separate_varlen_storage)
int64_t count_distinct_set_size(const int64_t set_handle, const CountDistinctDescriptor &count_distinct_desc)
Definition: CountDistinct.h:75
void sort(const std::list< Analyzer::OrderEntry > &order_entries, size_t top_n, const Executor *executor)
Definition: ResultSet.cpp:498
DEVICE auto copy(ARGS &&...args)
Definition: gpu_enabled.h:51
void setQueueTime(const int64_t queue_time)
Definition: ResultSet.cpp:440
#define CHECK_NE(x, y)
Definition: Logger.h:212
void dropFirstN(const size_t n)
Definition: ResultSet.cpp:55
DEVICE T * begin() const
Definition: VectorView.h:60
void copy_from_gpu(Data_Namespace::DataMgr *data_mgr, void *dst, const CUdeviceptr src, const size_t num_bytes, const int device_id)
std::vector< PermutationIdx > Permutation
Definition: ResultSet.h:153
std::tuple< std::vector< bool >, size_t > getSingleSlotTargetBitmap() const
Definition: ResultSet.cpp:1063
DEVICE auto accumulate(ARGS &&...args)
Definition: gpu_enabled.h:42
#define LIKELY(x)
Definition: likely.h:24
void * checked_calloc(const size_t nmemb, const size_t size)
Definition: checked_alloc.h:53
StorageLookupResult findStorage(const size_t entry_idx) const
Definition: ResultSet.cpp:663
bool is_distinct_target(const TargetInfo &target_info)
Definition: TargetInfo.h:130
std::vector< std::vector< double >> ApproxMedianBuffers
Definition: ResultSet.h:624
std::function< bool(const PermutationIdx, const PermutationIdx)> Comparator
Definition: ResultSet.h:155
bool g_enable_smem_group_by true
void radixSortOnGpu(const std::list< Analyzer::OrderEntry > &order_entries) const
Definition: ResultSet.cpp:936
const ResultSetStorage * getStorage() const
Definition: ResultSet.cpp:269
static double calculateQuantile(quantile::TDigest *const t_digest, double const q)
Definition: ResultSet.cpp:729
int64_t getQueueTime() const
Definition: ResultSet.cpp:452
#define UNLIKELY(x)
Definition: likely.h:25
uint32_t PermutationIdx
Definition: ResultSet.h:152
#define CHECK_LT(x, y)
Definition: Logger.h:213
Definition: sqltypes.h:51
SQLTypeInfo getColType(const size_t col_idx) const
Definition: ResultSet.cpp:277
ApproxMedianBuffers::value_type materializeApproxMedianColumn(const Analyzer::OrderEntry &order_entry) const
Definition: ResultSet.cpp:739
std::tuple< std::vector< bool >, size_t > getSupportedSingleSlotTargetBitmap() const
Definition: ResultSet.cpp:1087
ExecutorDeviceType getDeviceType() const
Definition: ResultSet.cpp:195
const int8_t * getColumnarBuffer(size_t column_idx) const
Definition: ResultSet.cpp:1057
bool isExplain() const
Definition: ResultSet.cpp:470
void copy_group_by_buffers_from_gpu(Data_Namespace::DataMgr *data_mgr, const std::vector< int64_t * > &group_by_buffers, const size_t groups_buffer_size, const CUdeviceptr group_by_dev_buffers_mem, const QueryMemoryDescriptor &query_mem_desc, const unsigned block_size_x, const unsigned grid_size_x, const int device_id, const bool prepend_index_buffer)
void spawn(Function &&f, Args &&...args)
Definition: threadpool.h:33
static Data_Namespace::AbstractBuffer * allocGpuAbstractBuffer(Data_Namespace::DataMgr *data_mgr, const size_t num_bytes, const int device_id)
static QueryMemoryDescriptor fixupQueryMemoryDescriptor(const QueryMemoryDescriptor &)
Definition: ResultSet.cpp:486
void baselineSort(const std::list< Analyzer::OrderEntry > &order_entries, const size_t top_n, const Executor *executor)
const Permutation & getPermutationBuffer() const
Definition: ResultSet.cpp:592
void append(ResultSet &that)
Definition: ResultSet.cpp:239
static PermutationView topPermutation(PermutationView, const size_t n, const Comparator &)
Definition: ResultSet.cpp:922
size_t getCurrentRowBufferIndex() const
Definition: ResultSet.cpp:231
bool g_enable_watchdog false
Definition: Execute.cpp:76
#define CHECK(condition)
Definition: Logger.h:203
#define DEBUG_TIMER(name)
Definition: Logger.h:319
int8_t * getDeviceEstimatorBuffer() const
Definition: ResultSet.cpp:415
bool operator()(const PermutationIdx lhs, const PermutationIdx rhs) const
Definition: ResultSet.cpp:771
Basic constructors and methods of the row set interface.
const std::vector< int64_t > & getTargetInitVals() const
Definition: ResultSet.cpp:410
std::vector< size_t > getSlotIndicesForTargetIndices() const
Definition: ResultSet.cpp:1106
Allocate GPU memory using GpuBuffers via DataMgr.
Definition: Analyzer.h:1413
int cpu_threads()
Definition: thread_count.h:24
bool g_use_tbb_pool
Definition: Execute.cpp:78
Definition: sqldefs.h:72
bool can_use_parallel_algorithms(const ResultSet &rows)
Definition: ResultSet.cpp:1118
int64_t getRenderTime() const
Definition: ResultSet.cpp:457
void setCachedRowCount(const size_t row_count) const
Definition: ResultSet.cpp:344
bool isDirectColumnarConversionPossible() const
Definition: ResultSet.cpp:1032
size_t binSearchRowCount() const
Definition: ResultSet.cpp:349
int getDeviceId() const
Definition: ResultSet.cpp:482
DEVICE T * end() const
Definition: VectorView.h:68