OmniSciDB  6686921089
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ResultSet.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2021 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
23 #include "ResultSet.h"
26 #include "Execute.h"
27 #include "GpuMemUtils.h"
28 #include "InPlaceSort.h"
30 #include "RuntimeFunctions.h"
31 #include "Shared/Intervals.h"
32 #include "Shared/SqlTypesLayout.h"
33 #include "Shared/checked_alloc.h"
34 #include "Shared/likely.h"
35 #include "Shared/thread_count.h"
36 #include "Shared/threading.h"
37 
38 #include <algorithm>
39 #include <atomic>
40 #include <bitset>
41 #include <future>
42 #include <numeric>
43 
44 size_t g_parallel_top_min = 100e3;
45 size_t g_parallel_top_max = 20e6; // In effect only with g_enable_watchdog.
46 
47 void ResultSet::keepFirstN(const size_t n) {
48  CHECK_EQ(-1, cached_row_count_);
49  keep_first_ = n;
50 }
51 
52 void ResultSet::dropFirstN(const size_t n) {
53  CHECK_EQ(-1, cached_row_count_);
54  drop_first_ = n;
55 }
56 
57 ResultSet::ResultSet(const std::vector<TargetInfo>& targets,
58  const ExecutorDeviceType device_type,
60  const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
61  const Catalog_Namespace::Catalog* catalog,
62  const unsigned block_size,
63  const unsigned grid_size)
64  : targets_(targets)
65  , device_type_(device_type)
66  , device_id_(-1)
67  , query_mem_desc_(query_mem_desc)
68  , crt_row_buff_idx_(0)
69  , fetched_so_far_(0)
70  , drop_first_(0)
71  , keep_first_(0)
72  , row_set_mem_owner_(row_set_mem_owner)
73  , catalog_(catalog)
74  , block_size_(block_size)
75  , grid_size_(grid_size)
76  , data_mgr_(nullptr)
77  , separate_varlen_storage_valid_(false)
78  , just_explain_(false)
79  , for_validation_only_(false)
80  , cached_row_count_(-1)
81  , geo_return_type_(GeoReturnType::WktString) {}
82 
83 ResultSet::ResultSet(const std::vector<TargetInfo>& targets,
84  const std::vector<ColumnLazyFetchInfo>& lazy_fetch_info,
85  const std::vector<std::vector<const int8_t*>>& col_buffers,
86  const std::vector<std::vector<int64_t>>& frag_offsets,
87  const std::vector<int64_t>& consistent_frag_sizes,
88  const ExecutorDeviceType device_type,
89  const int device_id,
91  const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
92  const Catalog_Namespace::Catalog* catalog,
93  const unsigned block_size,
94  const unsigned grid_size)
95  : targets_(targets)
96  , device_type_(device_type)
97  , device_id_(device_id)
98  , query_mem_desc_(query_mem_desc)
99  , crt_row_buff_idx_(0)
100  , fetched_so_far_(0)
101  , drop_first_(0)
102  , keep_first_(0)
103  , row_set_mem_owner_(row_set_mem_owner)
104  , catalog_(catalog)
105  , block_size_(block_size)
106  , grid_size_(grid_size)
107  , lazy_fetch_info_(lazy_fetch_info)
108  , col_buffers_{col_buffers}
109  , frag_offsets_{frag_offsets}
110  , consistent_frag_sizes_{consistent_frag_sizes}
111  , data_mgr_(nullptr)
112  , separate_varlen_storage_valid_(false)
113  , just_explain_(false)
114  , for_validation_only_(false)
115  , cached_row_count_(-1)
116  , geo_return_type_(GeoReturnType::WktString) {}
117 
118 ResultSet::ResultSet(const std::shared_ptr<const Analyzer::Estimator> estimator,
119  const ExecutorDeviceType device_type,
120  const int device_id,
121  Data_Namespace::DataMgr* data_mgr)
122  : device_type_(device_type)
123  , device_id_(device_id)
124  , query_mem_desc_{}
125  , crt_row_buff_idx_(0)
126  , estimator_(estimator)
127  , data_mgr_(data_mgr)
128  , separate_varlen_storage_valid_(false)
129  , just_explain_(false)
130  , for_validation_only_(false)
131  , cached_row_count_(-1)
132  , geo_return_type_(GeoReturnType::WktString) {
133  if (device_type == ExecutorDeviceType::GPU) {
134  device_estimator_buffer_ = CudaAllocator::allocGpuAbstractBuffer(
135  data_mgr_, estimator_->getBufferSize(), device_id_);
136  data_mgr->getCudaMgr()->zeroDeviceMem(device_estimator_buffer_->getMemoryPtr(),
137  estimator_->getBufferSize(),
138  device_id_);
139  } else {
140  host_estimator_buffer_ =
141  static_cast<int8_t*>(checked_calloc(estimator_->getBufferSize(), 1));
142  }
143 }
144 
145 ResultSet::ResultSet(const std::string& explanation)
146  : device_type_(ExecutorDeviceType::CPU)
147  , device_id_(-1)
148  , fetched_so_far_(0)
149  , separate_varlen_storage_valid_(false)
150  , explanation_(explanation)
151  , just_explain_(true)
152  , for_validation_only_(false)
153  , cached_row_count_(-1)
154  , geo_return_type_(GeoReturnType::WktString) {}
155 
156 ResultSet::ResultSet(int64_t queue_time_ms,
157  int64_t render_time_ms,
158  const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner)
159  : device_type_(ExecutorDeviceType::CPU)
160  , device_id_(-1)
161  , fetched_so_far_(0)
162  , row_set_mem_owner_(row_set_mem_owner)
163  , timings_(QueryExecutionTimings{queue_time_ms, render_time_ms, 0, 0})
164  , separate_varlen_storage_valid_(false)
165  , just_explain_(true)
166  , for_validation_only_(false)
167  , cached_row_count_(-1)
168  , geo_return_type_(GeoReturnType::WktString){};
169 
171  if (storage_) {
172  if (!storage_->buff_is_provided_) {
173  CHECK(storage_->getUnderlyingBuffer());
174  free(storage_->getUnderlyingBuffer());
175  }
176  }
177  for (auto& storage : appended_storage_) {
178  if (storage && !storage->buff_is_provided_) {
179  free(storage->getUnderlyingBuffer());
180  }
181  }
182  if (host_estimator_buffer_) {
183  CHECK(device_type_ == ExecutorDeviceType::CPU || device_estimator_buffer_);
184  free(host_estimator_buffer_);
185  }
186  if (device_estimator_buffer_) {
187  CHECK(data_mgr_);
188  data_mgr_->free(device_estimator_buffer_);
189  }
190 }
191 
193  return device_type_;
194 }
195 
197  CHECK(!storage_);
198  CHECK(row_set_mem_owner_);
199  auto buff = row_set_mem_owner_->allocate(
200  query_mem_desc_.getBufferSizeBytes(device_type_), /*thread_idx=*/0);
201  storage_.reset(
202  new ResultSetStorage(targets_, query_mem_desc_, buff, /*buff_is_provided=*/true));
203  return storage_.get();
204 }
205 
207  int8_t* buff,
208  const std::vector<int64_t>& target_init_vals,
209  std::shared_ptr<VarlenOutputInfo> varlen_output_info) const {
210  CHECK(buff);
211  CHECK(!storage_);
212  storage_.reset(new ResultSetStorage(targets_, query_mem_desc_, buff, true));
213  // TODO: add both to the constructor
214  storage_->target_init_vals_ = target_init_vals;
215  if (varlen_output_info) {
216  storage_->varlen_output_info_ = varlen_output_info;
217  }
218  return storage_.get();
219 }
220 
222  const std::vector<int64_t>& target_init_vals) const {
223  CHECK(!storage_);
224  CHECK(row_set_mem_owner_);
225  auto buff = row_set_mem_owner_->allocate(
226  query_mem_desc_.getBufferSizeBytes(device_type_), /*thread_idx=*/0);
227  storage_.reset(
228  new ResultSetStorage(targets_, query_mem_desc_, buff, /*buff_is_provided=*/true));
229  storage_->target_init_vals_ = target_init_vals;
230  return storage_.get();
231 }
232 
234  if (crt_row_buff_idx_ == 0) {
235  throw std::runtime_error("current row buffer iteration index is undefined");
236  }
237  return crt_row_buff_idx_ - 1;
238 }
239 
240 // Note: that.appended_storage_ does not get appended to this.
241 void ResultSet::append(ResultSet& that) {
242  CHECK_EQ(-1, cached_row_count_);
243  if (!that.storage_) {
244  return;
245  }
246  appended_storage_.push_back(std::move(that.storage_));
247  query_mem_desc_.setEntryCount(
248  query_mem_desc_.getEntryCount() +
249  appended_storage_.back()->query_mem_desc_.getEntryCount());
250  chunks_.insert(chunks_.end(), that.chunks_.begin(), that.chunks_.end());
251  col_buffers_.insert(
252  col_buffers_.end(), that.col_buffers_.begin(), that.col_buffers_.end());
253  frag_offsets_.insert(
254  frag_offsets_.end(), that.frag_offsets_.begin(), that.frag_offsets_.end());
255  consistent_frag_sizes_.insert(consistent_frag_sizes_.end(),
256  that.consistent_frag_sizes_.begin(),
257  that.consistent_frag_sizes_.end());
258  chunk_iters_.insert(
259  chunk_iters_.end(), that.chunk_iters_.begin(), that.chunk_iters_.end());
260  if (separate_varlen_storage_valid_) {
261  CHECK(that.separate_varlen_storage_valid_);
262  serialized_varlen_buffer_.insert(serialized_varlen_buffer_.end(),
263  that.serialized_varlen_buffer_.begin(),
264  that.serialized_varlen_buffer_.end());
265  }
266  for (auto& buff : that.literal_buffers_) {
267  literal_buffers_.push_back(std::move(buff));
268  }
269 }
270 
272  return storage_.get();
273 }
274 
275 size_t ResultSet::colCount() const {
276  return just_explain_ ? 1 : targets_.size();
277 }
278 
279 SQLTypeInfo ResultSet::getColType(const size_t col_idx) const {
280  if (just_explain_) {
281  return SQLTypeInfo(kTEXT, false);
282  }
283  CHECK_LT(col_idx, targets_.size());
284  return targets_[col_idx].agg_kind == kAVG ? SQLTypeInfo(kDOUBLE, false)
285  : targets_[col_idx].sql_type;
286 }
287 
288 namespace {
289 
290 size_t get_truncated_row_count(size_t total_row_count, size_t limit, size_t offset) {
291  if (total_row_count < offset) {
292  return 0;
293  }
294 
295  size_t total_truncated_row_count = total_row_count - offset;
296 
297  if (limit) {
298  return std::min(total_truncated_row_count, limit);
299  }
300 
301  return total_truncated_row_count;
302 }
303 
304 } // namespace
305 
306 size_t ResultSet::rowCount(const bool force_parallel) const {
307  if (just_explain_) {
308  return 1;
309  }
310  if (!permutation_.empty()) {
311  if (drop_first_ > permutation_.size()) {
312  return 0;
313  }
314  const auto limited_row_count = keep_first_ + drop_first_;
315  return limited_row_count ? std::min(limited_row_count, permutation_.size())
316  : permutation_.size();
317  }
318  if (cached_row_count_ != -1) {
319  CHECK_GE(cached_row_count_, 0);
320  return cached_row_count_;
321  }
322  if (!storage_) {
323  return 0;
324  }
325  if (permutation_.empty() &&
326  query_mem_desc_.getQueryDescriptionType() == QueryDescriptionType::Projection) {
327  return binSearchRowCount();
328  }
329  if (force_parallel || entryCount() > 20000) {
330  return parallelRowCount();
331  }
332  std::lock_guard<std::mutex> lock(row_iteration_mutex_);
333  moveToBegin();
334  size_t row_count{0};
335  while (true) {
336  auto crt_row = getNextRowUnlocked(false, false);
337  if (crt_row.empty()) {
338  break;
339  }
340  ++row_count;
341  }
342  moveToBegin();
343  return row_count;
344 }
345 
346 void ResultSet::setCachedRowCount(const size_t row_count) const {
347  CHECK(cached_row_count_ == -1 || cached_row_count_ == static_cast<int64_t>(row_count));
348  cached_row_count_ = row_count;
349 }
350 
352  if (!storage_) {
353  return 0;
354  }
355 
356  size_t row_count = storage_->binSearchRowCount();
357  for (auto& s : appended_storage_) {
358  row_count += s->binSearchRowCount();
359  }
360 
361  return get_truncated_row_count(row_count, getLimit(), drop_first_);
362 }
363 
365  using namespace threading;
366  auto execute_parallel_row_count = [this, query_id = logger::query_id()](
367  const blocked_range<size_t>& r,
368  size_t row_count) {
369  auto qid_scope_guard = logger::set_thread_local_query_id(query_id);
370  for (size_t i = r.begin(); i < r.end(); ++i) {
371  if (!isRowAtEmpty(i)) {
372  ++row_count;
373  }
374  }
375  return row_count;
376  };
377  const auto row_count = parallel_reduce(blocked_range<size_t>(0, entryCount()),
378  size_t(0),
379  execute_parallel_row_count,
380  std::plus<int>());
381  return get_truncated_row_count(row_count, getLimit(), drop_first_);
382 }
383 
384 bool ResultSet::isEmpty() const {
385  if (entryCount() == 0) {
386  return true;
387  }
388  if (!storage_) {
389  return true;
390  }
391 
392  std::lock_guard<std::mutex> lock(row_iteration_mutex_);
393  moveToBegin();
394  while (true) {
395  auto crt_row = getNextRowUnlocked(false, false);
396  if (!crt_row.empty()) {
397  return false;
398  }
399  }
400  moveToBegin();
401  return true;
402 }
403 
405  return !storage_ && !estimator_ && !just_explain_;
406 }
407 
409  CHECK(storage_);
410  return storage_->query_mem_desc_;
411 }
412 
413 const std::vector<TargetInfo>& ResultSet::getTargetInfos() const {
414  return targets_;
415 }
416 
417 const std::vector<int64_t>& ResultSet::getTargetInitVals() const {
418  CHECK(storage_);
419  return storage_->target_init_vals_;
420 }
421 
423  CHECK(device_type_ == ExecutorDeviceType::GPU);
424  CHECK(device_estimator_buffer_);
425  return device_estimator_buffer_->getMemoryPtr();
426 }
427 
429  return host_estimator_buffer_;
430 }
431 
433  CHECK(device_type_ == ExecutorDeviceType::GPU);
434  CHECK(!host_estimator_buffer_);
435  CHECK_EQ(size_t(0), estimator_->getBufferSize() % sizeof(int64_t));
436  host_estimator_buffer_ =
437  static_cast<int8_t*>(checked_calloc(estimator_->getBufferSize(), 1));
438  CHECK(device_estimator_buffer_);
439  auto device_buffer_ptr = device_estimator_buffer_->getMemoryPtr();
440  auto allocator = data_mgr_->createGpuAllocator(device_id_);
441  allocator->copyFromDevice(
442  host_estimator_buffer_, device_buffer_ptr, estimator_->getBufferSize());
443 }
444 
445 void ResultSet::setQueueTime(const int64_t queue_time) {
446  timings_.executor_queue_time = queue_time;
447 }
448 
449 void ResultSet::setKernelQueueTime(const int64_t kernel_queue_time) {
450  timings_.kernel_queue_time = kernel_queue_time;
451 }
452 
453 void ResultSet::addCompilationQueueTime(const int64_t compilation_queue_time) {
454  timings_.compilation_queue_time += compilation_queue_time;
455 }
456 
457 int64_t ResultSet::getQueueTime() const {
458  return timings_.executor_queue_time + timings_.kernel_queue_time +
459  timings_.compilation_queue_time;
460 }
461 
462 int64_t ResultSet::getRenderTime() const {
463  return timings_.render_time;
464 }
465 
467  crt_row_buff_idx_ = 0;
468  fetched_so_far_ = 0;
469 }
470 
472  return keep_first_ + drop_first_;
473 }
474 
475 bool ResultSet::isExplain() const {
476  return just_explain_;
477 }
478 
480  for_validation_only_ = true;
481 }
482 
484  return for_validation_only_;
485 }
486 
488  return device_id_;
489 }
490 
493  auto query_mem_desc_copy = query_mem_desc;
494  query_mem_desc_copy.resetGroupColWidths(
495  std::vector<int8_t>(query_mem_desc_copy.getGroupbyColCount(), 8));
496  if (query_mem_desc.didOutputColumnar()) {
497  return query_mem_desc_copy;
498  }
499  query_mem_desc_copy.alignPaddedSlots();
500  return query_mem_desc_copy;
501 }
502 
503 void ResultSet::sort(const std::list<Analyzer::OrderEntry>& order_entries,
504  size_t top_n,
505  const Executor* executor) {
506  auto timer = DEBUG_TIMER(__func__);
507 
508  if (!storage_) {
509  return;
510  }
511  CHECK_EQ(-1, cached_row_count_);
512  CHECK(!targets_.empty());
513 #ifdef HAVE_CUDA
514  if (canUseFastBaselineSort(order_entries, top_n)) {
515  baselineSort(order_entries, top_n, executor);
516  return;
517  }
518 #endif // HAVE_CUDA
519  if (query_mem_desc_.sortOnGpu()) {
520  try {
521  radixSortOnGpu(order_entries);
522  } catch (const OutOfMemory&) {
523  LOG(WARNING) << "Out of GPU memory during sort, finish on CPU";
524  radixSortOnCpu(order_entries);
525  } catch (const std::bad_alloc&) {
526  LOG(WARNING) << "Out of GPU memory during sort, finish on CPU";
527  radixSortOnCpu(order_entries);
528  }
529  return;
530  }
531  // This check isn't strictly required, but allows the index buffer to be 32-bit.
532  if (query_mem_desc_.getEntryCount() > std::numeric_limits<uint32_t>::max()) {
533  throw RowSortException("Sorting more than 4B elements not supported");
534  }
535 
536  CHECK(permutation_.empty());
537 
538  if (top_n && g_parallel_top_min < entryCount()) {
539  if (g_enable_watchdog && g_parallel_top_max < entryCount()) {
540  throw WatchdogException("Sorting the result would be too slow");
541  }
542  parallelTop(order_entries, top_n, executor);
543  } else {
544  if (g_enable_watchdog && Executor::baseline_threshold < entryCount()) {
545  throw WatchdogException("Sorting the result would be too slow");
546  }
547  permutation_.resize(query_mem_desc_.getEntryCount());
548  // PermutationView is used to share common API with parallelTop().
549  PermutationView pv(permutation_.data(), 0, permutation_.size());
550  pv = initPermutationBuffer(pv, 0, permutation_.size());
551  if (top_n == 0) {
552  top_n = pv.size(); // top_n == 0 implies a full sort
553  }
554  pv = topPermutation(pv, top_n, createComparator(order_entries, pv, executor, false));
555  if (pv.size() < permutation_.size()) {
556  permutation_.resize(pv.size());
557  permutation_.shrink_to_fit();
558  }
559  }
560 }
561 
562 #ifdef HAVE_CUDA
563 void ResultSet::baselineSort(const std::list<Analyzer::OrderEntry>& order_entries,
564  const size_t top_n,
565  const Executor* executor) {
566  auto timer = DEBUG_TIMER(__func__);
567  // If we only have on GPU, it's usually faster to do multi-threaded radix sort on CPU
568  if (getGpuCount() > 1) {
569  try {
570  doBaselineSort(ExecutorDeviceType::GPU, order_entries, top_n, executor);
571  } catch (...) {
572  doBaselineSort(ExecutorDeviceType::CPU, order_entries, top_n, executor);
573  }
574  } else {
575  doBaselineSort(ExecutorDeviceType::CPU, order_entries, top_n, executor);
576  }
577 }
578 #endif // HAVE_CUDA
579 
580 // Append non-empty indexes i in [begin,end) from findStorage(i) to permutation.
582  PermutationIdx const begin,
583  PermutationIdx const end) const {
584  auto timer = DEBUG_TIMER(__func__);
585  for (PermutationIdx i = begin; i < end; ++i) {
586  const auto storage_lookup_result = findStorage(i);
587  const auto lhs_storage = storage_lookup_result.storage_ptr;
588  const auto off = storage_lookup_result.fixedup_entry_idx;
589  CHECK(lhs_storage);
590  if (!lhs_storage->isEmptyEntry(off)) {
591  permutation.push_back(i);
592  }
593  }
594  return permutation;
595 }
596 
598  return permutation_;
599 }
600 
601 void ResultSet::parallelTop(const std::list<Analyzer::OrderEntry>& order_entries,
602  const size_t top_n,
603  const Executor* executor) {
604  auto timer = DEBUG_TIMER(__func__);
605  const size_t nthreads = cpu_threads();
606 
607  // Split permutation_ into nthreads subranges and top-sort in-place.
608  permutation_.resize(query_mem_desc_.getEntryCount());
609  std::vector<PermutationView> permutation_views(nthreads);
610  threading::task_group top_sort_threads;
611  for (auto interval : makeIntervals<PermutationIdx>(0, permutation_.size(), nthreads)) {
612  top_sort_threads.run([this,
613  &order_entries,
614  &permutation_views,
615  top_n,
616  executor,
618  interval] {
619  auto qid_scope_guard = logger::set_thread_local_query_id(query_id);
620  PermutationView pv(permutation_.data() + interval.begin, 0, interval.size());
621  pv = initPermutationBuffer(pv, interval.begin, interval.end);
622  const auto compare = createComparator(order_entries, pv, executor, true);
623  permutation_views[interval.index] = topPermutation(pv, top_n, compare);
624  });
625  }
626  top_sort_threads.wait();
627 
628  // In case you are considering implementing a parallel reduction, note that the
629  // ResultSetComparator constructor is O(N) in order to materialize some of the aggregate
630  // columns as necessary to perform a comparison. This cost is why reduction is chosen to
631  // be serial instead; only one more Comparator is needed below.
632 
633  // Left-copy disjoint top-sorted subranges into one contiguous range.
634  // ++++....+++.....+++++... -> ++++++++++++............
635  auto end = permutation_.begin() + permutation_views.front().size();
636  for (size_t i = 1; i < nthreads; ++i) {
637  std::copy(permutation_views[i].begin(), permutation_views[i].end(), end);
638  end += permutation_views[i].size();
639  }
640 
641  // Top sort final range.
642  PermutationView pv(permutation_.data(), end - permutation_.begin());
643  const auto compare = createComparator(order_entries, pv, executor, false);
644  pv = topPermutation(pv, top_n, compare);
645  permutation_.resize(pv.size());
646  permutation_.shrink_to_fit();
647 }
648 
649 std::pair<size_t, size_t> ResultSet::getStorageIndex(const size_t entry_idx) const {
650  size_t fixedup_entry_idx = entry_idx;
651  auto entry_count = storage_->query_mem_desc_.getEntryCount();
652  const bool is_rowwise_layout = !storage_->query_mem_desc_.didOutputColumnar();
653  if (fixedup_entry_idx < entry_count) {
654  return {0, fixedup_entry_idx};
655  }
656  fixedup_entry_idx -= entry_count;
657  for (size_t i = 0; i < appended_storage_.size(); ++i) {
658  const auto& desc = appended_storage_[i]->query_mem_desc_;
659  CHECK_NE(is_rowwise_layout, desc.didOutputColumnar());
660  entry_count = desc.getEntryCount();
661  if (fixedup_entry_idx < entry_count) {
662  return {i + 1, fixedup_entry_idx};
663  }
664  fixedup_entry_idx -= entry_count;
665  }
666  UNREACHABLE() << "entry_idx = " << entry_idx << ", query_mem_desc_.getEntryCount() = "
667  << query_mem_desc_.getEntryCount();
668  return {};
669 }
670 
673 
675  auto [stg_idx, fixedup_entry_idx] = getStorageIndex(entry_idx);
676  return {stg_idx ? appended_storage_[stg_idx - 1].get() : storage_.get(),
677  fixedup_entry_idx,
678  stg_idx};
679 }
680 
681 template <typename BUFFER_ITERATOR_TYPE>
683  BUFFER_ITERATOR_TYPE>::materializeCountDistinctColumns() {
684  for (const auto& order_entry : order_entries_) {
685  if (is_distinct_target(result_set_->targets_[order_entry.tle_no - 1])) {
686  count_distinct_materialized_buffers_.emplace_back(
687  materializeCountDistinctColumn(order_entry));
688  }
689  }
690 }
691 
692 template <typename BUFFER_ITERATOR_TYPE>
694  BUFFER_ITERATOR_TYPE>::materializeApproxQuantileColumns() const {
695  ResultSet::ApproxQuantileBuffers approx_quantile_materialized_buffers;
696  for (const auto& order_entry : order_entries_) {
697  if (result_set_->targets_[order_entry.tle_no - 1].agg_kind == kAPPROX_QUANTILE) {
698  approx_quantile_materialized_buffers.emplace_back(
699  materializeApproxQuantileColumn(order_entry));
700  }
701  }
702  return approx_quantile_materialized_buffers;
703 }
704 
705 template <typename BUFFER_ITERATOR_TYPE>
706 std::vector<int64_t>
708  const Analyzer::OrderEntry& order_entry) const {
709  const size_t num_storage_entries = result_set_->query_mem_desc_.getEntryCount();
710  std::vector<int64_t> count_distinct_materialized_buffer(num_storage_entries);
711  const CountDistinctDescriptor count_distinct_descriptor =
712  result_set_->query_mem_desc_.getCountDistinctDescriptor(order_entry.tle_no - 1);
713  const size_t num_non_empty_entries = permutation_.size();
714 
715  const auto work = [&, query_id = logger::query_id()](const size_t start,
716  const size_t end) {
717  auto qid_scope_guard = logger::set_thread_local_query_id(query_id);
718  for (size_t i = start; i < end; ++i) {
719  const PermutationIdx permuted_idx = permutation_[i];
720  const auto storage_lookup_result = result_set_->findStorage(permuted_idx);
721  const auto storage = storage_lookup_result.storage_ptr;
722  const auto off = storage_lookup_result.fixedup_entry_idx;
723  const auto value = buffer_itr_.getColumnInternal(
724  storage->buff_, off, order_entry.tle_no - 1, storage_lookup_result);
725  count_distinct_materialized_buffer[permuted_idx] =
726  count_distinct_set_size(value.i1, count_distinct_descriptor);
727  }
728  };
729  // TODO(tlm): Allow use of tbb after we determine how to easily encapsulate the choice
730  // between thread pool types
731  if (single_threaded_) {
732  work(0, num_non_empty_entries);
733  } else {
734  threading::task_group thread_pool;
735  for (auto interval : makeIntervals<size_t>(0, num_non_empty_entries, cpu_threads())) {
736  thread_pool.run([=] { work(interval.begin, interval.end); });
737  }
738  thread_pool.wait();
739  }
740  return count_distinct_materialized_buffer;
741 }
742 
744  static_assert(sizeof(int64_t) == sizeof(quantile::TDigest*));
745  CHECK(t_digest);
746  t_digest->mergeBuffer();
747  double const quantile = t_digest->quantile();
748  return boost::math::isnan(quantile) ? NULL_DOUBLE : quantile;
749 }
750 
751 template <typename BUFFER_ITERATOR_TYPE>
752 ResultSet::ApproxQuantileBuffers::value_type
754  const Analyzer::OrderEntry& order_entry) const {
755  ResultSet::ApproxQuantileBuffers::value_type materialized_buffer(
756  result_set_->query_mem_desc_.getEntryCount());
757  const size_t size = permutation_.size();
758  const auto work = [&, query_id = logger::query_id()](const size_t start,
759  const size_t end) {
760  auto qid_scope_guard = logger::set_thread_local_query_id(query_id);
761  for (size_t i = start; i < end; ++i) {
762  const PermutationIdx permuted_idx = permutation_[i];
763  const auto storage_lookup_result = result_set_->findStorage(permuted_idx);
764  const auto storage = storage_lookup_result.storage_ptr;
765  const auto off = storage_lookup_result.fixedup_entry_idx;
766  const auto value = buffer_itr_.getColumnInternal(
767  storage->buff_, off, order_entry.tle_no - 1, storage_lookup_result);
768  materialized_buffer[permuted_idx] =
769  value.i1 ? calculateQuantile(reinterpret_cast<quantile::TDigest*>(value.i1))
770  : NULL_DOUBLE;
771  }
772  };
773  if (single_threaded_) {
774  work(0, size);
775  } else {
776  threading::task_group thread_pool;
777  for (auto interval : makeIntervals<size_t>(0, size, cpu_threads())) {
778  thread_pool.run([=] { work(interval.begin, interval.end); });
779  }
780  thread_pool.wait();
781  }
782  return materialized_buffer;
783 }
784 
785 template <typename BUFFER_ITERATOR_TYPE>
787  const PermutationIdx lhs,
788  const PermutationIdx rhs) const {
789  // NB: The compare function must define a strict weak ordering, otherwise
790  // std::sort will trigger a segmentation fault (or corrupt memory).
791  const auto lhs_storage_lookup_result = result_set_->findStorage(lhs);
792  const auto rhs_storage_lookup_result = result_set_->findStorage(rhs);
793  const auto lhs_storage = lhs_storage_lookup_result.storage_ptr;
794  const auto rhs_storage = rhs_storage_lookup_result.storage_ptr;
795  const auto fixedup_lhs = lhs_storage_lookup_result.fixedup_entry_idx;
796  const auto fixedup_rhs = rhs_storage_lookup_result.fixedup_entry_idx;
797  size_t materialized_count_distinct_buffer_idx{0};
798  size_t materialized_approx_quantile_buffer_idx{0};
799 
800  for (const auto& order_entry : order_entries_) {
801  CHECK_GE(order_entry.tle_no, 1);
802  const auto& agg_info = result_set_->targets_[order_entry.tle_no - 1];
803  const auto entry_ti = get_compact_type(agg_info);
804  bool float_argument_input = takes_float_argument(agg_info);
805  // Need to determine if the float value has been stored as float
806  // or if it has been compacted to a different (often larger 8 bytes)
807  // in distributed case the floats are actually 4 bytes
808  // TODO the above takes_float_argument() is widely used wonder if this problem
809  // exists elsewhere
810  if (entry_ti.get_type() == kFLOAT) {
811  const auto is_col_lazy =
812  !result_set_->lazy_fetch_info_.empty() &&
813  result_set_->lazy_fetch_info_[order_entry.tle_no - 1].is_lazily_fetched;
814  if (result_set_->query_mem_desc_.getPaddedSlotWidthBytes(order_entry.tle_no - 1) ==
815  sizeof(float)) {
816  float_argument_input =
817  result_set_->query_mem_desc_.didOutputColumnar() ? !is_col_lazy : true;
818  }
819  }
820 
821  if (UNLIKELY(is_distinct_target(agg_info))) {
822  CHECK_LT(materialized_count_distinct_buffer_idx,
823  count_distinct_materialized_buffers_.size());
824 
825  const auto& count_distinct_materialized_buffer =
826  count_distinct_materialized_buffers_[materialized_count_distinct_buffer_idx];
827  const auto lhs_sz = count_distinct_materialized_buffer[lhs];
828  const auto rhs_sz = count_distinct_materialized_buffer[rhs];
829  ++materialized_count_distinct_buffer_idx;
830  if (lhs_sz == rhs_sz) {
831  continue;
832  }
833  return (lhs_sz < rhs_sz) != order_entry.is_desc;
834  } else if (UNLIKELY(agg_info.agg_kind == kAPPROX_QUANTILE)) {
835  CHECK_LT(materialized_approx_quantile_buffer_idx,
836  approx_quantile_materialized_buffers_.size());
837  const auto& approx_quantile_materialized_buffer =
838  approx_quantile_materialized_buffers_[materialized_approx_quantile_buffer_idx];
839  const auto lhs_value = approx_quantile_materialized_buffer[lhs];
840  const auto rhs_value = approx_quantile_materialized_buffer[rhs];
841  ++materialized_approx_quantile_buffer_idx;
842  if (lhs_value == rhs_value) {
843  continue;
844  } else if (!entry_ti.get_notnull()) {
845  if (lhs_value == NULL_DOUBLE) {
846  return order_entry.nulls_first;
847  } else if (rhs_value == NULL_DOUBLE) {
848  return !order_entry.nulls_first;
849  }
850  }
851  return (lhs_value < rhs_value) != order_entry.is_desc;
852  }
853 
854  const auto lhs_v = buffer_itr_.getColumnInternal(lhs_storage->buff_,
855  fixedup_lhs,
856  order_entry.tle_no - 1,
857  lhs_storage_lookup_result);
858  const auto rhs_v = buffer_itr_.getColumnInternal(rhs_storage->buff_,
859  fixedup_rhs,
860  order_entry.tle_no - 1,
861  rhs_storage_lookup_result);
862 
863  if (UNLIKELY(isNull(entry_ti, lhs_v, float_argument_input) &&
864  isNull(entry_ti, rhs_v, float_argument_input))) {
865  continue;
866  }
867  if (UNLIKELY(isNull(entry_ti, lhs_v, float_argument_input) &&
868  !isNull(entry_ti, rhs_v, float_argument_input))) {
869  return order_entry.nulls_first;
870  }
871  if (UNLIKELY(isNull(entry_ti, rhs_v, float_argument_input) &&
872  !isNull(entry_ti, lhs_v, float_argument_input))) {
873  return !order_entry.nulls_first;
874  }
875 
876  if (LIKELY(lhs_v.isInt())) {
877  CHECK(rhs_v.isInt());
878  if (UNLIKELY(entry_ti.is_string() &&
879  entry_ti.get_compression() == kENCODING_DICT)) {
880  CHECK_EQ(4, entry_ti.get_logical_size());
881  CHECK(executor_);
882  const auto string_dict_proxy = executor_->getStringDictionaryProxy(
883  entry_ti.get_comp_param(), result_set_->row_set_mem_owner_, false);
884  auto lhs_str = string_dict_proxy->getString(lhs_v.i1);
885  auto rhs_str = string_dict_proxy->getString(rhs_v.i1);
886  if (lhs_str == rhs_str) {
887  continue;
888  }
889  return (lhs_str < rhs_str) != order_entry.is_desc;
890  }
891 
892  if (lhs_v.i1 == rhs_v.i1) {
893  continue;
894  }
895  if (entry_ti.is_fp()) {
896  if (float_argument_input) {
897  const auto lhs_dval = *reinterpret_cast<const float*>(may_alias_ptr(&lhs_v.i1));
898  const auto rhs_dval = *reinterpret_cast<const float*>(may_alias_ptr(&rhs_v.i1));
899  return (lhs_dval < rhs_dval) != order_entry.is_desc;
900  } else {
901  const auto lhs_dval =
902  *reinterpret_cast<const double*>(may_alias_ptr(&lhs_v.i1));
903  const auto rhs_dval =
904  *reinterpret_cast<const double*>(may_alias_ptr(&rhs_v.i1));
905  return (lhs_dval < rhs_dval) != order_entry.is_desc;
906  }
907  }
908  return (lhs_v.i1 < rhs_v.i1) != order_entry.is_desc;
909  } else {
910  if (lhs_v.isPair()) {
911  CHECK(rhs_v.isPair());
912  const auto lhs =
913  pair_to_double({lhs_v.i1, lhs_v.i2}, entry_ti, float_argument_input);
914  const auto rhs =
915  pair_to_double({rhs_v.i1, rhs_v.i2}, entry_ti, float_argument_input);
916  if (lhs == rhs) {
917  continue;
918  }
919  return (lhs < rhs) != order_entry.is_desc;
920  } else {
921  CHECK(lhs_v.isStr() && rhs_v.isStr());
922  const auto lhs = lhs_v.strVal();
923  const auto rhs = rhs_v.strVal();
924  if (lhs == rhs) {
925  continue;
926  }
927  return (lhs < rhs) != order_entry.is_desc;
928  }
929  }
930  }
931  return false;
932 }
933 
934 // Partial sort permutation into top(least by compare) n elements.
935 // If permutation.size() <= n then sort entire permutation by compare.
936 // Return PermutationView with new size() = min(n, permutation.size()).
938  const size_t n,
939  const Comparator& compare) {
940  auto timer = DEBUG_TIMER(__func__);
941  if (n < permutation.size()) {
942  std::partial_sort(
943  permutation.begin(), permutation.begin() + n, permutation.end(), compare);
944  permutation.resize(n);
945  } else {
946  std::sort(permutation.begin(), permutation.end(), compare);
947  }
948  return permutation;
949 }
950 
952  const std::list<Analyzer::OrderEntry>& order_entries) const {
953  auto timer = DEBUG_TIMER(__func__);
954  auto data_mgr = &catalog_->getDataMgr();
955  const int device_id{0};
956  auto allocator = data_mgr->createGpuAllocator(device_id);
957  CHECK_GT(block_size_, 0);
958  CHECK_GT(grid_size_, 0);
959  std::vector<int64_t*> group_by_buffers(block_size_);
960  group_by_buffers[0] = reinterpret_cast<int64_t*>(storage_->getUnderlyingBuffer());
961  auto dev_group_by_buffers =
962  create_dev_group_by_buffers(allocator.get(),
963  group_by_buffers,
964  query_mem_desc_,
965  block_size_,
966  grid_size_,
967  device_id,
969  /*num_input_rows=*/-1,
970  /*prepend_index_buffer=*/true,
971  /*always_init_group_by_on_host=*/true,
972  /*use_bump_allocator=*/false,
973  /*has_varlen_output=*/false,
974  /*insitu_allocator*=*/nullptr);
976  order_entries, query_mem_desc_, dev_group_by_buffers, data_mgr, device_id);
978  *allocator,
979  group_by_buffers,
980  query_mem_desc_.getBufferSizeBytes(ExecutorDeviceType::GPU),
981  dev_group_by_buffers.data,
982  query_mem_desc_,
983  block_size_,
984  grid_size_,
985  device_id,
986  /*use_bump_allocator=*/false,
987  /*has_varlen_output=*/false);
988 }
989 
991  const std::list<Analyzer::OrderEntry>& order_entries) const {
992  auto timer = DEBUG_TIMER(__func__);
993  CHECK(!query_mem_desc_.hasKeylessHash());
994  std::vector<int64_t> tmp_buff(query_mem_desc_.getEntryCount());
995  std::vector<int32_t> idx_buff(query_mem_desc_.getEntryCount());
996  CHECK_EQ(size_t(1), order_entries.size());
997  auto buffer_ptr = storage_->getUnderlyingBuffer();
998  for (const auto& order_entry : order_entries) {
999  const auto target_idx = order_entry.tle_no - 1;
1000  const auto sortkey_val_buff = reinterpret_cast<int64_t*>(
1001  buffer_ptr + query_mem_desc_.getColOffInBytes(target_idx));
1002  const auto chosen_bytes = query_mem_desc_.getPaddedSlotWidthBytes(target_idx);
1003  sort_groups_cpu(sortkey_val_buff,
1004  &idx_buff[0],
1005  query_mem_desc_.getEntryCount(),
1006  order_entry.is_desc,
1007  chosen_bytes);
1008  apply_permutation_cpu(reinterpret_cast<int64_t*>(buffer_ptr),
1009  &idx_buff[0],
1010  query_mem_desc_.getEntryCount(),
1011  &tmp_buff[0],
1012  sizeof(int64_t));
1013  for (size_t target_idx = 0; target_idx < query_mem_desc_.getSlotCount();
1014  ++target_idx) {
1015  if (static_cast<int>(target_idx) == order_entry.tle_no - 1) {
1016  continue;
1017  }
1018  const auto chosen_bytes = query_mem_desc_.getPaddedSlotWidthBytes(target_idx);
1019  const auto satellite_val_buff = reinterpret_cast<int64_t*>(
1020  buffer_ptr + query_mem_desc_.getColOffInBytes(target_idx));
1021  apply_permutation_cpu(satellite_val_buff,
1022  &idx_buff[0],
1023  query_mem_desc_.getEntryCount(),
1024  &tmp_buff[0],
1025  chosen_bytes);
1026  }
1027  }
1028 }
1029 
1030 size_t ResultSet::getLimit() const {
1031  return keep_first_;
1032 }
1033 
1034 std::shared_ptr<const std::vector<std::string>> ResultSet::getStringDictionaryPayloadCopy(
1035  const int dict_id) const {
1036  const auto sdp = row_set_mem_owner_->getOrAddStringDictProxy(
1037  dict_id, /*with_generation=*/false, catalog_);
1038  CHECK(sdp);
1039  return sdp->getDictionary()->copyStrings();
1040 }
1041 
1051  return false;
1052  } else if (query_mem_desc_.didOutputColumnar()) {
1053  return permutation_.empty() && (query_mem_desc_.getQueryDescriptionType() ==
1055  (query_mem_desc_.getQueryDescriptionType() ==
1057  query_mem_desc_.getQueryDescriptionType() ==
1059  } else {
1060  return permutation_.empty() && (query_mem_desc_.getQueryDescriptionType() ==
1062  query_mem_desc_.getQueryDescriptionType() ==
1064  }
1065 }
1066 
1068  return query_mem_desc_.didOutputColumnar() &&
1069  query_mem_desc_.getQueryDescriptionType() == QueryDescriptionType::Projection &&
1070  appended_storage_.empty() && storage_ &&
1071  (lazy_fetch_info_.empty() || !lazy_fetch_info_[column_idx].is_lazily_fetched);
1072 }
1073 
1074 const int8_t* ResultSet::getColumnarBuffer(size_t column_idx) const {
1075  CHECK(isZeroCopyColumnarConversionPossible(column_idx));
1076  return storage_->getUnderlyingBuffer() + query_mem_desc_.getColOffInBytes(column_idx);
1077 }
1078 
1079 // returns a bitmap (and total number) of all single slot targets
1080 std::tuple<std::vector<bool>, size_t> ResultSet::getSingleSlotTargetBitmap() const {
1081  std::vector<bool> target_bitmap(targets_.size(), true);
1082  size_t num_single_slot_targets = 0;
1083  for (size_t target_idx = 0; target_idx < targets_.size(); target_idx++) {
1084  const auto& sql_type = targets_[target_idx].sql_type;
1085  if (targets_[target_idx].is_agg && targets_[target_idx].agg_kind == kAVG) {
1086  target_bitmap[target_idx] = false;
1087  } else if (sql_type.is_varlen()) {
1088  target_bitmap[target_idx] = false;
1089  } else {
1090  num_single_slot_targets++;
1091  }
1092  }
1093  return std::make_tuple(std::move(target_bitmap), num_single_slot_targets);
1094 }
1095 
1104 std::tuple<std::vector<bool>, size_t> ResultSet::getSupportedSingleSlotTargetBitmap()
1105  const {
1106  CHECK(isDirectColumnarConversionPossible());
1107  auto [single_slot_targets, num_single_slot_targets] = getSingleSlotTargetBitmap();
1108 
1109  for (size_t target_idx = 0; target_idx < single_slot_targets.size(); target_idx++) {
1110  const auto& target = targets_[target_idx];
1111  if (single_slot_targets[target_idx] &&
1112  (is_distinct_target(target) || target.agg_kind == kAPPROX_QUANTILE ||
1113  (target.is_agg && target.agg_kind == kSAMPLE && target.sql_type == kFLOAT))) {
1114  single_slot_targets[target_idx] = false;
1115  num_single_slot_targets--;
1116  }
1117  }
1118  CHECK_GE(num_single_slot_targets, size_t(0));
1119  return std::make_tuple(std::move(single_slot_targets), num_single_slot_targets);
1120 }
1121 
1122 // returns the starting slot index for all targets in the result set
1123 std::vector<size_t> ResultSet::getSlotIndicesForTargetIndices() const {
1124  std::vector<size_t> slot_indices(targets_.size(), 0);
1125  size_t slot_index = 0;
1126  for (size_t target_idx = 0; target_idx < targets_.size(); target_idx++) {
1127  slot_indices[target_idx] = slot_index;
1128  slot_index = advance_slot(slot_index, targets_[target_idx], false);
1129  }
1130  return slot_indices;
1131 }
1132 
1133 // namespace result_set
1134 
1135 bool result_set::can_use_parallel_algorithms(const ResultSet& rows) {
1136  return !rows.isTruncated();
1137 }
1138 
1139 bool result_set::use_parallel_algorithms(const ResultSet& rows) {
1140  return result_set::can_use_parallel_algorithms(rows) && rows.entryCount() >= 20000;
1141 }
QidScopeGuard set_thread_local_query_id(QueryId const query_id)
Definition: Logger.cpp:468
bool is_agg(const Analyzer::Expr *expr)
catalog_(nullptr)
void syncEstimatorBuffer() const
Definition: ResultSet.cpp:432
#define CHECK_EQ(x, y)
Definition: Logger.h:217
const QueryMemoryDescriptor & getQueryMemDesc() const
Definition: ResultSet.cpp:408
void sort_groups_cpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, const bool desc, const uint32_t chosen_bytes)
Definition: InPlaceSort.cpp:27
GpuGroupByBuffers create_dev_group_by_buffers(DeviceAllocator *device_allocator, const std::vector< int64_t * > &group_by_buffers, const QueryMemoryDescriptor &query_mem_desc, const unsigned block_size_x, const unsigned grid_size_x, const int device_id, const ExecutorDispatchMode dispatch_mode, const int64_t num_input_rows, const bool prepend_index_buffer, const bool always_init_group_by_on_host, const bool use_bump_allocator, const bool has_varlen_output, Allocator *insitu_allocator)
Definition: GpuMemUtils.cpp:60
size_t g_parallel_top_max
Definition: ResultSet.cpp:45
std::pair< size_t, size_t > getStorageIndex(const size_t entry_idx) const
Definition: ResultSet.cpp:649
#define NULL_DOUBLE
DEVICE void push_back(T const &value)
Definition: VectorView.h:74
bool isValidationOnlyRes() const
Definition: ResultSet.cpp:483
bool g_enable_watchdog
void setValidationOnlyRes()
Definition: ResultSet.cpp:479
PermutationView initPermutationBuffer(PermutationView permutation, PermutationIdx const begin, PermutationIdx const end) const
Definition: ResultSet.cpp:581
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:111
bool g_enable_direct_columnarization
Definition: Execute.cpp:115
ExecutorDeviceType
void moveToBegin() const
Definition: ResultSet.cpp:466
#define LOG(tag)
Definition: Logger.h:203
static const size_t baseline_threshold
Definition: Execute.h:1091
ResultSet(const std::vector< TargetInfo > &targets, const ExecutorDeviceType device_type, const QueryMemoryDescriptor &query_mem_desc, const std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const Catalog_Namespace::Catalog *catalog, const unsigned block_size, const unsigned grid_size)
Definition: ResultSet.cpp:57
int tle_no
Definition: Analyzer.h:1578
#define UNREACHABLE()
Definition: Logger.h:253
DEVICE void sort(ARGS &&...args)
Definition: gpu_enabled.h:105
const std::vector< TargetInfo > & getTargetInfos() const
Definition: ResultSet.cpp:413
#define CHECK_GE(x, y)
Definition: Logger.h:222
void setKernelQueueTime(const int64_t kernel_queue_time)
Definition: ResultSet.cpp:449
size_t rowCount(const bool force_parallel=false) const
Definition: ResultSet.cpp:306
DEVICE void mergeBuffer()
Definition: quantile.h:629
void keepFirstN(const size_t n)
Definition: ResultSet.cpp:47
double pair_to_double(const std::pair< int64_t, int64_t > &fp_pair, const SQLTypeInfo &ti, const bool float_argument_input)
void addCompilationQueueTime(const int64_t compilation_queue_time)
Definition: ResultSet.cpp:453
std::vector< std::vector< double >> ApproxQuantileBuffers
Definition: ResultSet.h:633
bool takes_float_argument(const TargetInfo &target_info)
Definition: TargetInfo.h:157
void parallelTop(const std::list< Analyzer::OrderEntry > &order_entries, const size_t top_n, const Executor *executor)
Definition: ResultSet.cpp:601
size_t colCount() const
Definition: ResultSet.cpp:275
void inplace_sort_gpu(const std::list< Analyzer::OrderEntry > &order_entries, const QueryMemoryDescriptor &query_mem_desc, const GpuGroupByBuffers &group_by_buffers, Data_Namespace::DataMgr *data_mgr, const int device_id)
void apply_permutation_cpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, int64_t *tmp_buff, const uint32_t chosen_bytes)
Definition: InPlaceSort.cpp:46
DEVICE void resize(size_type const size)
Definition: VectorView.h:75
#define CHECK_GT(x, y)
Definition: Logger.h:221
size_t getLimit() const
Definition: ResultSet.cpp:1030
std::vector< int64_t > materializeCountDistinctColumn(const Analyzer::OrderEntry &order_entry) const
Definition: ResultSet.cpp:707
ApproxQuantileBuffers::value_type materializeApproxQuantileColumn(const Analyzer::OrderEntry &order_entry) const
Definition: ResultSet.cpp:753
bool isTruncated() const
Definition: ResultSet.cpp:471
size_t get_truncated_row_count(size_t total_row_count, size_t limit, size_t offset)
Definition: ResultSet.cpp:290
size_t parallelRowCount() const
Definition: ResultSet.cpp:364
void radixSortOnCpu(const std::list< Analyzer::OrderEntry > &order_entries) const
Definition: ResultSet.cpp:990
const SQLTypeInfo get_compact_type(const TargetInfo &target)
bool definitelyHasNoRows() const
Definition: ResultSet.cpp:404
bool use_parallel_algorithms(const ResultSet &rows)
Definition: ResultSet.cpp:1139
bool isZeroCopyColumnarConversionPossible(size_t column_idx) const
Definition: ResultSet.cpp:1067
size_t g_parallel_top_min
Definition: ResultSet.cpp:44
int8_t * getHostEstimatorBuffer() const
Definition: ResultSet.cpp:428
DEVICE size_type size() const
Definition: VectorView.h:84
const ResultSetStorage * allocateStorage() const
std::shared_ptr< const std::vector< std::string > > getStringDictionaryPayloadCopy(const int dict_id) const
Definition: ResultSet.cpp:1034
size_t advance_slot(const size_t j, const TargetInfo &target_info, const bool separate_varlen_storage)
int64_t count_distinct_set_size(const int64_t set_handle, const CountDistinctDescriptor &count_distinct_desc)
Definition: CountDistinct.h:75
void sort(const std::list< Analyzer::OrderEntry > &order_entries, size_t top_n, const Executor *executor)
Definition: ResultSet.cpp:503
DEVICE auto copy(ARGS &&...args)
Definition: gpu_enabled.h:51
void setQueueTime(const int64_t queue_time)
Definition: ResultSet.cpp:445
#define CHECK_NE(x, y)
Definition: Logger.h:218
void dropFirstN(const size_t n)
Definition: ResultSet.cpp:52
DEVICE T * begin() const
Definition: VectorView.h:60
std::vector< PermutationIdx > Permutation
Definition: ResultSet.h:153
std::tuple< std::vector< bool >, size_t > getSingleSlotTargetBitmap() const
Definition: ResultSet.cpp:1080
#define LIKELY(x)
Definition: likely.h:24
void * checked_calloc(const size_t nmemb, const size_t size)
Definition: checked_alloc.h:53
StorageLookupResult findStorage(const size_t entry_idx) const
Definition: ResultSet.cpp:674
bool is_distinct_target(const TargetInfo &target_info)
Definition: TargetInfo.h:153
std::function< bool(const PermutationIdx, const PermutationIdx)> Comparator
Definition: ResultSet.h:155
bool g_enable_smem_group_by true
static double calculateQuantile(quantile::TDigest *const t_digest)
Definition: ResultSet.cpp:743
void radixSortOnGpu(const std::list< Analyzer::OrderEntry > &order_entries) const
Definition: ResultSet.cpp:951
const ResultSetStorage * getStorage() const
Definition: ResultSet.cpp:271
Value parallel_reduce(const blocked_range< Int > &range, const Value &identity, const RealBody &real_body, const Reduction &reduction, const Partitioner &p=Partitioner())
Parallel iteration with reduction.
int64_t getQueueTime() const
Definition: ResultSet.cpp:457
#define UNLIKELY(x)
Definition: likely.h:25
uint32_t PermutationIdx
Definition: ResultSet.h:152
#define CHECK_LT(x, y)
Definition: Logger.h:219
Definition: sqltypes.h:52
SQLTypeInfo getColType(const size_t col_idx) const
Definition: ResultSet.cpp:279
std::tuple< std::vector< bool >, size_t > getSupportedSingleSlotTargetBitmap() const
Definition: ResultSet.cpp:1104
ExecutorDeviceType getDeviceType() const
Definition: ResultSet.cpp:192
const int8_t * getColumnarBuffer(size_t column_idx) const
Definition: ResultSet.cpp:1074
bool isExplain() const
Definition: ResultSet.cpp:475
DEVICE RealType quantile(IndexType *buf, RealType const q)
Definition: quantile.h:793
static Data_Namespace::AbstractBuffer * allocGpuAbstractBuffer(Data_Namespace::DataMgr *data_mgr, const size_t num_bytes, const int device_id)
static QueryMemoryDescriptor fixupQueryMemoryDescriptor(const QueryMemoryDescriptor &)
Definition: ResultSet.cpp:491
void baselineSort(const std::list< Analyzer::OrderEntry > &order_entries, const size_t top_n, const Executor *executor)
const Permutation & getPermutationBuffer() const
Definition: ResultSet.cpp:597
void append(ResultSet &that)
Definition: ResultSet.cpp:241
data_mgr_(data_mgr)
static PermutationView topPermutation(PermutationView, const size_t n, const Comparator &)
Definition: ResultSet.cpp:937
size_t getCurrentRowBufferIndex() const
Definition: ResultSet.cpp:233
bool g_enable_watchdog false
Definition: Execute.cpp:76
#define CHECK(condition)
Definition: Logger.h:209
QueryId query_id()
Definition: Logger.cpp:454
#define DEBUG_TIMER(name)
Definition: Logger.h:352
int8_t * getDeviceEstimatorBuffer() const
Definition: ResultSet.cpp:422
bool operator()(const PermutationIdx lhs, const PermutationIdx rhs) const
Definition: ResultSet.cpp:786
Basic constructors and methods of the row set interface.
bool isEmpty() const
Definition: ResultSet.cpp:384
const std::vector< int64_t > & getTargetInitVals() const
Definition: ResultSet.cpp:417
std::vector< size_t > getSlotIndicesForTargetIndices() const
Definition: ResultSet.cpp:1123
Allocate GPU memory using GpuBuffers via DataMgr.
constexpr double n
Definition: Utm.h:46
Definition: Analyzer.h:1573
int cpu_threads()
Definition: thread_count.h:24
Definition: sqldefs.h:72
void copy_group_by_buffers_from_gpu(DeviceAllocator &device_allocator, const std::vector< int64_t * > &group_by_buffers, const size_t groups_buffer_size, const int8_t *group_by_dev_buffers_mem, const QueryMemoryDescriptor &query_mem_desc, const unsigned block_size_x, const unsigned grid_size_x, const int device_id, const bool prepend_index_buffer, const bool has_varlen_output)
bool can_use_parallel_algorithms(const ResultSet &rows)
Definition: ResultSet.cpp:1135
int64_t getRenderTime() const
Definition: ResultSet.cpp:462
void setCachedRowCount(const size_t row_count) const
Definition: ResultSet.cpp:346
bool isDirectColumnarConversionPossible() const
Definition: ResultSet.cpp:1049
size_t binSearchRowCount() const
Definition: ResultSet.cpp:351
int getDeviceId() const
Definition: ResultSet.cpp:487
DEVICE T * end() const
Definition: VectorView.h:68