OmniSciDB  bf83d84833
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
ResultSet.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
25 #include "ResultSet.h"
28 #include "Execute.h"
29 #include "GpuMemUtils.h"
30 #include "InPlaceSort.h"
32 #include "RuntimeFunctions.h"
33 #include "Shared/SqlTypesLayout.h"
34 #include "Shared/checked_alloc.h"
35 #include "Shared/likely.h"
36 #include "Shared/thread_count.h"
37 #include "Shared/threadpool.h"
38 
39 #include <algorithm>
40 #include <bitset>
41 #include <future>
42 #include <numeric>
43 
44 extern bool g_use_tbb_pool;
45 
46 void ResultSet::keepFirstN(const size_t n) {
47  CHECK_EQ(-1, cached_row_count_);
48  keep_first_ = n;
49 }
50 
51 void ResultSet::dropFirstN(const size_t n) {
52  CHECK_EQ(-1, cached_row_count_);
53  drop_first_ = n;
54 }
55 
56 ResultSet::ResultSet(const std::vector<TargetInfo>& targets,
57  const ExecutorDeviceType device_type,
59  const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
60  const Catalog_Namespace::Catalog* catalog,
61  const unsigned block_size,
62  const unsigned grid_size)
63  : targets_(targets)
64  , device_type_(device_type)
65  , device_id_(-1)
66  , query_mem_desc_(query_mem_desc)
67  , crt_row_buff_idx_(0)
68  , fetched_so_far_(0)
69  , drop_first_(0)
70  , keep_first_(0)
71  , row_set_mem_owner_(row_set_mem_owner)
72  , catalog_(catalog)
73  , block_size_(block_size)
74  , grid_size_(grid_size)
75  , data_mgr_(nullptr)
76  , separate_varlen_storage_valid_(false)
77  , just_explain_(false)
78  , for_validation_only_(false)
79  , cached_row_count_(-1)
80  , geo_return_type_(GeoReturnType::WktString) {}
81 
82 ResultSet::ResultSet(const std::vector<TargetInfo>& targets,
83  const std::vector<ColumnLazyFetchInfo>& lazy_fetch_info,
84  const std::vector<std::vector<const int8_t*>>& col_buffers,
85  const std::vector<std::vector<int64_t>>& frag_offsets,
86  const std::vector<int64_t>& consistent_frag_sizes,
87  const ExecutorDeviceType device_type,
88  const int device_id,
90  const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
91  const Catalog_Namespace::Catalog* catalog,
92  const unsigned block_size,
93  const unsigned grid_size)
94  : targets_(targets)
95  , device_type_(device_type)
96  , device_id_(device_id)
97  , query_mem_desc_(query_mem_desc)
98  , crt_row_buff_idx_(0)
99  , fetched_so_far_(0)
100  , drop_first_(0)
101  , keep_first_(0)
102  , row_set_mem_owner_(row_set_mem_owner)
103  , catalog_(catalog)
104  , block_size_(block_size)
105  , grid_size_(grid_size)
106  , lazy_fetch_info_(lazy_fetch_info)
107  , col_buffers_{col_buffers}
108  , frag_offsets_{frag_offsets}
109  , consistent_frag_sizes_{consistent_frag_sizes}
110  , data_mgr_(nullptr)
111  , separate_varlen_storage_valid_(false)
112  , just_explain_(false)
113  , for_validation_only_(false)
114  , cached_row_count_(-1)
115  , geo_return_type_(GeoReturnType::WktString) {}
116 
117 ResultSet::ResultSet(const std::shared_ptr<const Analyzer::Estimator> estimator,
118  const ExecutorDeviceType device_type,
119  const int device_id,
120  Data_Namespace::DataMgr* data_mgr)
121  : device_type_(device_type)
122  , device_id_(device_id)
123  , query_mem_desc_{}
124  , crt_row_buff_idx_(0)
125  , estimator_(estimator)
126  , data_mgr_(data_mgr)
127  , separate_varlen_storage_valid_(false)
128  , just_explain_(false)
129  , for_validation_only_(false)
130  , cached_row_count_(-1)
131  , geo_return_type_(GeoReturnType::WktString) {
132  if (device_type == ExecutorDeviceType::GPU) {
133  device_estimator_buffer_ = CudaAllocator::allocGpuAbstractBuffer(
134  data_mgr_, estimator_->getBufferSize(), device_id_);
135  data_mgr->getCudaMgr()->zeroDeviceMem(device_estimator_buffer_->getMemoryPtr(),
136  estimator_->getBufferSize(),
137  device_id_);
138  } else {
139  host_estimator_buffer_ =
140  static_cast<int8_t*>(checked_calloc(estimator_->getBufferSize(), 1));
141  }
142 }
143 
144 ResultSet::ResultSet(const std::string& explanation)
145  : device_type_(ExecutorDeviceType::CPU)
146  , device_id_(-1)
147  , fetched_so_far_(0)
148  , separate_varlen_storage_valid_(false)
149  , explanation_(explanation)
150  , just_explain_(true)
151  , for_validation_only_(false)
152  , cached_row_count_(-1)
153  , geo_return_type_(GeoReturnType::WktString) {}
154 
155 ResultSet::ResultSet(int64_t queue_time_ms,
156  int64_t render_time_ms,
157  const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner)
158  : device_type_(ExecutorDeviceType::CPU)
159  , device_id_(-1)
160  , fetched_so_far_(0)
161  , row_set_mem_owner_(row_set_mem_owner)
162  , timings_(QueryExecutionTimings{queue_time_ms, render_time_ms, 0, 0})
163  , separate_varlen_storage_valid_(false)
164  , just_explain_(true)
165  , for_validation_only_(false)
166  , cached_row_count_(-1)
167  , geo_return_type_(GeoReturnType::WktString){};
168 
170  if (storage_) {
171  if (!storage_->buff_is_provided_) {
172  CHECK(storage_->getUnderlyingBuffer());
173  free(storage_->getUnderlyingBuffer());
174  }
175  }
176  for (auto& storage : appended_storage_) {
177  if (storage && !storage->buff_is_provided_) {
178  free(storage->getUnderlyingBuffer());
179  }
180  }
181  if (host_estimator_buffer_) {
182  CHECK(device_type_ == ExecutorDeviceType::CPU || device_estimator_buffer_);
183  free(host_estimator_buffer_);
184  }
185  if (device_estimator_buffer_) {
186  CHECK(data_mgr_);
187  data_mgr_->free(device_estimator_buffer_);
188  }
189 }
190 
192  return device_type_;
193 }
194 
196  CHECK(!storage_);
197  CHECK(row_set_mem_owner_);
198  auto buff =
199  row_set_mem_owner_->allocate(query_mem_desc_.getBufferSizeBytes(device_type_));
200  storage_.reset(
201  new ResultSetStorage(targets_, query_mem_desc_, buff, /*buff_is_provided=*/true));
202  return storage_.get();
203 }
204 
206  int8_t* buff,
207  const std::vector<int64_t>& target_init_vals) const {
208  CHECK(buff);
209  CHECK(!storage_);
210  storage_.reset(new ResultSetStorage(targets_, query_mem_desc_, buff, true));
211  storage_->target_init_vals_ = target_init_vals;
212  return storage_.get();
213 }
214 
216  const std::vector<int64_t>& target_init_vals) const {
217  CHECK(!storage_);
218  CHECK(row_set_mem_owner_);
219  auto buff =
220  row_set_mem_owner_->allocate(query_mem_desc_.getBufferSizeBytes(device_type_));
221  storage_.reset(
222  new ResultSetStorage(targets_, query_mem_desc_, buff, /*buff_is_provided=*/true));
223  storage_->target_init_vals_ = target_init_vals;
224  return storage_.get();
225 }
226 
228  if (crt_row_buff_idx_ == 0) {
229  throw std::runtime_error("current row buffer iteration index is undefined");
230  }
231  return crt_row_buff_idx_ - 1;
232 }
233 
234 // Note: that.appended_storage_ does not get appended to this.
235 void ResultSet::append(ResultSet& that) {
236  CHECK_EQ(-1, cached_row_count_);
237  if (!that.storage_) {
238  return;
239  }
240  appended_storage_.push_back(std::move(that.storage_));
241  query_mem_desc_.setEntryCount(
242  query_mem_desc_.getEntryCount() +
243  appended_storage_.back()->query_mem_desc_.getEntryCount());
244  chunks_.insert(chunks_.end(), that.chunks_.begin(), that.chunks_.end());
245  col_buffers_.insert(
246  col_buffers_.end(), that.col_buffers_.begin(), that.col_buffers_.end());
247  frag_offsets_.insert(
248  frag_offsets_.end(), that.frag_offsets_.begin(), that.frag_offsets_.end());
249  consistent_frag_sizes_.insert(consistent_frag_sizes_.end(),
250  that.consistent_frag_sizes_.begin(),
251  that.consistent_frag_sizes_.end());
252  chunk_iters_.insert(
253  chunk_iters_.end(), that.chunk_iters_.begin(), that.chunk_iters_.end());
254  if (separate_varlen_storage_valid_) {
255  CHECK(that.separate_varlen_storage_valid_);
256  serialized_varlen_buffer_.insert(serialized_varlen_buffer_.end(),
257  that.serialized_varlen_buffer_.begin(),
258  that.serialized_varlen_buffer_.end());
259  }
260  for (auto& buff : that.literal_buffers_) {
261  literal_buffers_.push_back(std::move(buff));
262  }
263 }
264 
266  return storage_.get();
267 }
268 
269 size_t ResultSet::colCount() const {
270  return just_explain_ ? 1 : targets_.size();
271 }
272 
273 SQLTypeInfo ResultSet::getColType(const size_t col_idx) const {
274  if (just_explain_) {
275  return SQLTypeInfo(kTEXT, false);
276  }
277  CHECK_LT(col_idx, targets_.size());
278  return targets_[col_idx].agg_kind == kAVG ? SQLTypeInfo(kDOUBLE, false)
279  : targets_[col_idx].sql_type;
280 }
281 
282 namespace {
283 
284 size_t get_truncated_row_count(size_t total_row_count, size_t limit, size_t offset) {
285  if (total_row_count < offset) {
286  return 0;
287  }
288 
289  size_t total_truncated_row_count = total_row_count - offset;
290 
291  if (limit) {
292  return std::min(total_truncated_row_count, limit);
293  }
294 
295  return total_truncated_row_count;
296 }
297 
298 } // namespace
299 
300 size_t ResultSet::rowCount(const bool force_parallel) const {
301  if (just_explain_) {
302  return 1;
303  }
304  if (!permutation_.empty()) {
305  if (drop_first_ > permutation_.size()) {
306  return 0;
307  }
308  const auto limited_row_count = keep_first_ + drop_first_;
309  return limited_row_count ? std::min(limited_row_count, permutation_.size())
310  : permutation_.size();
311  }
312  if (cached_row_count_ != -1) {
313  CHECK_GE(cached_row_count_, 0);
314  return cached_row_count_;
315  }
316  if (!storage_) {
317  return 0;
318  }
319  if (permutation_.empty() &&
320  query_mem_desc_.getQueryDescriptionType() == QueryDescriptionType::Projection) {
321  return binSearchRowCount();
322  }
323  if (force_parallel || entryCount() > 20000) {
324  return parallelRowCount();
325  }
326  std::lock_guard<std::mutex> lock(row_iteration_mutex_);
327  moveToBegin();
328  size_t row_count{0};
329  while (true) {
330  auto crt_row = getNextRowUnlocked(false, false);
331  if (crt_row.empty()) {
332  break;
333  }
334  ++row_count;
335  }
336  moveToBegin();
337  return row_count;
338 }
339 
340 void ResultSet::setCachedRowCount(const size_t row_count) const {
341  CHECK(cached_row_count_ == -1 || cached_row_count_ == static_cast<int64_t>(row_count));
342  cached_row_count_ = row_count;
343 }
344 
346  if (!storage_) {
347  return 0;
348  }
349 
350  size_t row_count = storage_->binSearchRowCount();
351  for (auto& s : appended_storage_) {
352  row_count += s->binSearchRowCount();
353  }
354 
355  return get_truncated_row_count(row_count, getLimit(), drop_first_);
356 }
357 
359  auto execute_parallel_row_count = [this](auto counter_threads) -> size_t {
360  const size_t worker_count = cpu_threads();
361  for (size_t i = 0,
362  start_entry = 0,
363  stride = (entryCount() + worker_count - 1) / worker_count;
364  i < worker_count && start_entry < entryCount();
365  ++i, start_entry += stride) {
366  const auto end_entry = std::min(start_entry + stride, entryCount());
367  counter_threads.spawn(
368  [this](const size_t start, const size_t end) {
369  size_t row_count{0};
370  for (size_t i = start; i < end; ++i) {
371  if (!isRowAtEmpty(i)) {
372  ++row_count;
373  }
374  }
375  return row_count;
376  },
377  start_entry,
378  end_entry);
379  }
380  const auto row_counts = counter_threads.join();
381  const size_t row_count = std::accumulate(row_counts.begin(), row_counts.end(), 0);
382  return row_count;
383  };
384  // will fall back to futures threadpool if TBB is not enabled
385  const auto row_count =
387  ? execute_parallel_row_count(threadpool::ThreadPool<size_t>())
388  : execute_parallel_row_count(threadpool::FuturesThreadPool<size_t>());
389 
390  return get_truncated_row_count(row_count, getLimit(), drop_first_);
391 }
392 
394  return !storage_ && !estimator_ && !just_explain_;
395 }
396 
398  CHECK(storage_);
399  return storage_->query_mem_desc_;
400 }
401 
402 const std::vector<TargetInfo>& ResultSet::getTargetInfos() const {
403  return targets_;
404 }
405 
406 const std::vector<int64_t>& ResultSet::getTargetInitVals() const {
407  CHECK(storage_);
408  return storage_->target_init_vals_;
409 }
410 
412  CHECK(device_type_ == ExecutorDeviceType::GPU);
413  CHECK(device_estimator_buffer_);
414  return device_estimator_buffer_->getMemoryPtr();
415 }
416 
418  return host_estimator_buffer_;
419 }
420 
422  CHECK(device_type_ == ExecutorDeviceType::GPU);
423  CHECK(!host_estimator_buffer_);
424  CHECK_EQ(size_t(0), estimator_->getBufferSize() % sizeof(int64_t));
425  host_estimator_buffer_ =
426  static_cast<int8_t*>(checked_calloc(estimator_->getBufferSize(), 1));
427  CHECK(device_estimator_buffer_);
428  auto device_buffer_ptr = device_estimator_buffer_->getMemoryPtr();
429  copy_from_gpu(data_mgr_,
430  host_estimator_buffer_,
431  reinterpret_cast<CUdeviceptr>(device_buffer_ptr),
432  estimator_->getBufferSize(),
433  device_id_);
434 }
435 
436 void ResultSet::setQueueTime(const int64_t queue_time) {
437  timings_.executor_queue_time = queue_time;
438 }
439 
440 void ResultSet::setKernelQueueTime(const int64_t kernel_queue_time) {
441  timings_.kernel_queue_time = kernel_queue_time;
442 }
443 
444 void ResultSet::addCompilationQueueTime(const int64_t compilation_queue_time) {
445  timings_.compilation_queue_time += compilation_queue_time;
446 }
447 
448 int64_t ResultSet::getQueueTime() const {
449  return timings_.executor_queue_time + timings_.kernel_queue_time +
450  timings_.compilation_queue_time;
451 }
452 
453 int64_t ResultSet::getRenderTime() const {
454  return timings_.render_time;
455 }
456 
458  crt_row_buff_idx_ = 0;
459  fetched_so_far_ = 0;
460 }
461 
463  return keep_first_ + drop_first_;
464 }
465 
466 bool ResultSet::isExplain() const {
467  return just_explain_;
468 }
469 
471  for_validation_only_ = true;
472 }
473 
475  return for_validation_only_;
476 }
477 
479  return device_id_;
480 }
481 
484  auto query_mem_desc_copy = query_mem_desc;
485  query_mem_desc_copy.resetGroupColWidths(
486  std::vector<int8_t>(query_mem_desc_copy.getGroupbyColCount(), 8));
487  if (query_mem_desc.didOutputColumnar()) {
488  return query_mem_desc_copy;
489  }
490  query_mem_desc_copy.alignPaddedSlots();
491  return query_mem_desc_copy;
492 }
493 
494 void ResultSet::sort(const std::list<Analyzer::OrderEntry>& order_entries,
495  const size_t top_n,
496  const Executor* executor) {
497  auto timer = DEBUG_TIMER(__func__);
498 
499  if (!storage_) {
500  return;
501  }
502  CHECK_EQ(-1, cached_row_count_);
503  CHECK(!targets_.empty());
504 #ifdef HAVE_CUDA
505  if (canUseFastBaselineSort(order_entries, top_n)) {
506  baselineSort(order_entries, top_n, executor);
507  return;
508  }
509 #endif // HAVE_CUDA
510  if (query_mem_desc_.sortOnGpu()) {
511  try {
512  radixSortOnGpu(order_entries);
513  } catch (const OutOfMemory&) {
514  LOG(WARNING) << "Out of GPU memory during sort, finish on CPU";
515  radixSortOnCpu(order_entries);
516  } catch (const std::bad_alloc&) {
517  LOG(WARNING) << "Out of GPU memory during sort, finish on CPU";
518  radixSortOnCpu(order_entries);
519  }
520  return;
521  }
522  // This check isn't strictly required, but allows the index buffer to be 32-bit.
523  if (query_mem_desc_.getEntryCount() > std::numeric_limits<uint32_t>::max()) {
524  throw RowSortException("Sorting more than 4B elements not supported");
525  }
526 
527  CHECK(permutation_.empty());
528 
529  const bool use_heap{order_entries.size() == 1 && top_n};
530  if (use_heap && entryCount() > 100000) {
531  if (g_enable_watchdog && (entryCount() > 20000000)) {
532  throw WatchdogException("Sorting the result would be too slow");
533  }
534  parallelTop(order_entries, top_n, executor);
535  return;
536  }
537 
538  if (g_enable_watchdog && (entryCount() > Executor::baseline_threshold)) {
539  throw WatchdogException("Sorting the result would be too slow");
540  }
541 
542  permutation_ = initPermutationBuffer(0, 1);
543 
544  auto compare = createComparator(order_entries, use_heap, executor);
545 
546  if (use_heap) {
547  topPermutation(permutation_, top_n, compare);
548  } else {
549  sortPermutation(compare);
550  }
551 }
552 
553 #ifdef HAVE_CUDA
554 void ResultSet::baselineSort(const std::list<Analyzer::OrderEntry>& order_entries,
555  const size_t top_n,
556  const Executor* executor) {
557  auto timer = DEBUG_TIMER(__func__);
558  // If we only have on GPU, it's usually faster to do multi-threaded radix sort on CPU
559  if (getGpuCount() > 1) {
560  try {
561  doBaselineSort(ExecutorDeviceType::GPU, order_entries, top_n, executor);
562  } catch (...) {
563  doBaselineSort(ExecutorDeviceType::CPU, order_entries, top_n, executor);
564  }
565  } else {
566  doBaselineSort(ExecutorDeviceType::CPU, order_entries, top_n, executor);
567  }
568 }
569 #endif // HAVE_CUDA
570 
571 std::vector<uint32_t> ResultSet::initPermutationBuffer(const size_t start,
572  const size_t step) {
573  auto timer = DEBUG_TIMER(__func__);
574  CHECK_NE(size_t(0), step);
575  std::vector<uint32_t> permutation;
576  const auto total_entries = query_mem_desc_.getEntryCount();
577  permutation.reserve(total_entries / step);
578  for (size_t i = start; i < total_entries; i += step) {
579  const auto storage_lookup_result = findStorage(i);
580  const auto lhs_storage = storage_lookup_result.storage_ptr;
581  const auto off = storage_lookup_result.fixedup_entry_idx;
582  CHECK(lhs_storage);
583  if (!lhs_storage->isEmptyEntry(off)) {
584  permutation.emplace_back(i);
585  }
586  }
587  return permutation;
588 }
589 
590 const std::vector<uint32_t>& ResultSet::getPermutationBuffer() const {
591  return permutation_;
592 }
593 
594 void ResultSet::parallelTop(const std::list<Analyzer::OrderEntry>& order_entries,
595  const size_t top_n,
596  const Executor* executor) {
597  auto timer = DEBUG_TIMER(__func__);
598  const size_t step = cpu_threads();
599  std::vector<std::vector<uint32_t>> strided_permutations(step);
600  std::vector<std::future<void>> init_futures;
601  for (size_t start = 0; start < step; ++start) {
602  init_futures.emplace_back(
603  std::async(std::launch::async, [this, start, step, &strided_permutations] {
604  strided_permutations[start] = initPermutationBuffer(start, step);
605  }));
606  }
607  for (auto& init_future : init_futures) {
608  init_future.wait();
609  }
610  for (auto& init_future : init_futures) {
611  init_future.get();
612  }
613  auto compare = createComparator(order_entries, true, executor);
614  std::vector<std::future<void>> top_futures;
615  for (auto& strided_permutation : strided_permutations) {
616  top_futures.emplace_back(
617  std::async(std::launch::async, [&strided_permutation, &compare, top_n] {
618  topPermutation(strided_permutation, top_n, compare);
619  }));
620  }
621  for (auto& top_future : top_futures) {
622  top_future.wait();
623  }
624  for (auto& top_future : top_futures) {
625  top_future.get();
626  }
627  permutation_.reserve(strided_permutations.size() * top_n);
628  for (const auto& strided_permutation : strided_permutations) {
629  permutation_.insert(
630  permutation_.end(), strided_permutation.begin(), strided_permutation.end());
631  }
632  topPermutation(permutation_, top_n, compare);
633 }
634 
635 std::pair<size_t, size_t> ResultSet::getStorageIndex(const size_t entry_idx) const {
636  size_t fixedup_entry_idx = entry_idx;
637  auto entry_count = storage_->query_mem_desc_.getEntryCount();
638  const bool is_rowwise_layout = !storage_->query_mem_desc_.didOutputColumnar();
639  if (fixedup_entry_idx < entry_count) {
640  return {0, fixedup_entry_idx};
641  }
642  fixedup_entry_idx -= entry_count;
643  for (size_t i = 0; i < appended_storage_.size(); ++i) {
644  const auto& desc = appended_storage_[i]->query_mem_desc_;
645  CHECK_NE(is_rowwise_layout, desc.didOutputColumnar());
646  entry_count = desc.getEntryCount();
647  if (fixedup_entry_idx < entry_count) {
648  return {i + 1, fixedup_entry_idx};
649  }
650  fixedup_entry_idx -= entry_count;
651  }
652  UNREACHABLE() << "entry_idx = " << entry_idx << ", query_mem_desc_.getEntryCount() = "
653  << query_mem_desc_.getEntryCount();
654  return {};
655 }
656 
659 
661  auto [stg_idx, fixedup_entry_idx] = getStorageIndex(entry_idx);
662  return {stg_idx ? appended_storage_[stg_idx - 1].get() : storage_.get(),
663  fixedup_entry_idx,
664  stg_idx};
665 }
666 
667 template <typename BUFFER_ITERATOR_TYPE>
669  BUFFER_ITERATOR_TYPE>::materializeCountDistinctColumns() {
670  for (const auto& order_entry : order_entries_) {
671  if (is_distinct_target(result_set_->targets_[order_entry.tle_no - 1])) {
672  count_distinct_materialized_buffers_.emplace_back(
673  materializeCountDistinctColumn(order_entry));
674  }
675  }
676 }
677 
678 template <typename BUFFER_ITERATOR_TYPE>
679 std::vector<int64_t>
681  const Analyzer::OrderEntry& order_entry) const {
682  const size_t num_storage_entries = result_set_->query_mem_desc_.getEntryCount();
683  std::vector<int64_t> count_distinct_materialized_buffer(num_storage_entries);
684  const CountDistinctDescriptor count_distinct_descriptor =
685  result_set_->query_mem_desc_.getCountDistinctDescriptor(order_entry.tle_no - 1);
686  const size_t num_non_empty_entries = result_set_->permutation_.size();
687  const size_t worker_count = cpu_threads();
688  // TODO(tlm): Allow use of tbb after we determine how to easily encapsulate the choice
689  // between thread pool types
691  for (size_t i = 0,
692  start_entry = 0,
693  stride = (num_non_empty_entries + worker_count - 1) / worker_count;
694  i < worker_count && start_entry < num_non_empty_entries;
695  ++i, start_entry += stride) {
696  const auto end_entry = std::min(start_entry + stride, num_non_empty_entries);
697  thread_pool.spawn(
698  [this](const size_t start,
699  const size_t end,
700  const Analyzer::OrderEntry& order_entry,
701  const CountDistinctDescriptor& count_distinct_descriptor,
702  std::vector<int64_t>& count_distinct_materialized_buffer) {
703  for (size_t i = start; i < end; ++i) {
704  const uint32_t permuted_idx = result_set_->permutation_[i];
705  const auto storage_lookup_result = result_set_->findStorage(permuted_idx);
706  const auto storage = storage_lookup_result.storage_ptr;
707  const auto off = storage_lookup_result.fixedup_entry_idx;
708  const auto value = buffer_itr_.getColumnInternal(
709  storage->buff_, off, order_entry.tle_no - 1, storage_lookup_result);
710  count_distinct_materialized_buffer[permuted_idx] =
711  count_distinct_set_size(value.i1, count_distinct_descriptor);
712  }
713  },
714  start_entry,
715  end_entry,
716  std::cref(order_entry),
717  std::cref(count_distinct_descriptor),
718  std::ref(count_distinct_materialized_buffer));
719  }
720  thread_pool.join();
721  return count_distinct_materialized_buffer;
722 }
723 
724 template <typename BUFFER_ITERATOR_TYPE>
726  const uint32_t lhs,
727  const uint32_t rhs) const {
728  // NB: The compare function must define a strict weak ordering, otherwise
729  // std::sort will trigger a segmentation fault (or corrupt memory).
730  const auto lhs_storage_lookup_result = result_set_->findStorage(lhs);
731  const auto rhs_storage_lookup_result = result_set_->findStorage(rhs);
732  const auto lhs_storage = lhs_storage_lookup_result.storage_ptr;
733  const auto rhs_storage = rhs_storage_lookup_result.storage_ptr;
734  const auto fixedup_lhs = lhs_storage_lookup_result.fixedup_entry_idx;
735  const auto fixedup_rhs = rhs_storage_lookup_result.fixedup_entry_idx;
736  size_t materialized_count_distinct_buffer_idx{0};
737 
738  for (const auto& order_entry : order_entries_) {
739  CHECK_GE(order_entry.tle_no, 1);
740  const auto& agg_info = result_set_->targets_[order_entry.tle_no - 1];
741  const auto entry_ti = get_compact_type(agg_info);
742  bool float_argument_input = takes_float_argument(agg_info);
743  // Need to determine if the float value has been stored as float
744  // or if it has been compacted to a different (often larger 8 bytes)
745  // in distributed case the floats are actually 4 bytes
746  // TODO the above takes_float_argument() is widely used wonder if this problem
747  // exists elsewhere
748  if (entry_ti.get_type() == kFLOAT) {
749  const auto is_col_lazy =
750  !result_set_->lazy_fetch_info_.empty() &&
751  result_set_->lazy_fetch_info_[order_entry.tle_no - 1].is_lazily_fetched;
752  if (result_set_->query_mem_desc_.getPaddedSlotWidthBytes(order_entry.tle_no - 1) ==
753  sizeof(float)) {
754  float_argument_input =
755  result_set_->query_mem_desc_.didOutputColumnar() ? !is_col_lazy : true;
756  }
757  }
758 
759  const bool use_desc_cmp = use_heap_ ? !order_entry.is_desc : order_entry.is_desc;
760 
761  if (UNLIKELY(is_distinct_target(agg_info))) {
762  CHECK_LT(materialized_count_distinct_buffer_idx,
763  count_distinct_materialized_buffers_.size());
764  const auto& count_distinct_materialized_buffer =
765  count_distinct_materialized_buffers_[materialized_count_distinct_buffer_idx];
766  const auto lhs_sz = count_distinct_materialized_buffer[lhs];
767  const auto rhs_sz = count_distinct_materialized_buffer[rhs];
768  ++materialized_count_distinct_buffer_idx;
769  if (lhs_sz == rhs_sz) {
770  continue;
771  }
772  return use_desc_cmp ? lhs_sz > rhs_sz : lhs_sz < rhs_sz;
773  }
774 
775  const auto lhs_v = buffer_itr_.getColumnInternal(lhs_storage->buff_,
776  fixedup_lhs,
777  order_entry.tle_no - 1,
778  lhs_storage_lookup_result);
779  const auto rhs_v = buffer_itr_.getColumnInternal(rhs_storage->buff_,
780  fixedup_rhs,
781  order_entry.tle_no - 1,
782  rhs_storage_lookup_result);
783 
784  if (UNLIKELY(isNull(entry_ti, lhs_v, float_argument_input) &&
785  isNull(entry_ti, rhs_v, float_argument_input))) {
786  return false;
787  }
788  if (UNLIKELY(isNull(entry_ti, lhs_v, float_argument_input) &&
789  !isNull(entry_ti, rhs_v, float_argument_input))) {
790  return use_heap_ ? !order_entry.nulls_first : order_entry.nulls_first;
791  }
792  if (UNLIKELY(isNull(entry_ti, rhs_v, float_argument_input) &&
793  !isNull(entry_ti, lhs_v, float_argument_input))) {
794  return use_heap_ ? order_entry.nulls_first : !order_entry.nulls_first;
795  }
796 
797  if (LIKELY(lhs_v.isInt())) {
798  CHECK(rhs_v.isInt());
799  if (UNLIKELY(entry_ti.is_string() &&
800  entry_ti.get_compression() == kENCODING_DICT)) {
801  CHECK_EQ(4, entry_ti.get_logical_size());
802  CHECK(executor_);
803  const auto string_dict_proxy = executor_->getStringDictionaryProxy(
804  entry_ti.get_comp_param(), result_set_->row_set_mem_owner_, false);
805  auto lhs_str = string_dict_proxy->getString(lhs_v.i1);
806  auto rhs_str = string_dict_proxy->getString(rhs_v.i1);
807  if (lhs_str == rhs_str) {
808  continue;
809  }
810  return use_desc_cmp ? lhs_str > rhs_str : lhs_str < rhs_str;
811  }
812 
813  if (lhs_v.i1 == rhs_v.i1) {
814  continue;
815  }
816  if (entry_ti.is_fp()) {
817  if (float_argument_input) {
818  const auto lhs_dval = *reinterpret_cast<const float*>(may_alias_ptr(&lhs_v.i1));
819  const auto rhs_dval = *reinterpret_cast<const float*>(may_alias_ptr(&rhs_v.i1));
820  return use_desc_cmp ? lhs_dval > rhs_dval : lhs_dval < rhs_dval;
821  } else {
822  const auto lhs_dval =
823  *reinterpret_cast<const double*>(may_alias_ptr(&lhs_v.i1));
824  const auto rhs_dval =
825  *reinterpret_cast<const double*>(may_alias_ptr(&rhs_v.i1));
826  return use_desc_cmp ? lhs_dval > rhs_dval : lhs_dval < rhs_dval;
827  }
828  }
829  return use_desc_cmp ? lhs_v.i1 > rhs_v.i1 : lhs_v.i1 < rhs_v.i1;
830  } else {
831  if (lhs_v.isPair()) {
832  CHECK(rhs_v.isPair());
833  const auto lhs =
834  pair_to_double({lhs_v.i1, lhs_v.i2}, entry_ti, float_argument_input);
835  const auto rhs =
836  pair_to_double({rhs_v.i1, rhs_v.i2}, entry_ti, float_argument_input);
837  if (lhs == rhs) {
838  continue;
839  }
840  return use_desc_cmp ? lhs > rhs : lhs < rhs;
841  } else {
842  CHECK(lhs_v.isStr() && rhs_v.isStr());
843  const auto lhs = lhs_v.strVal();
844  const auto rhs = rhs_v.strVal();
845  if (lhs == rhs) {
846  continue;
847  }
848  return use_desc_cmp ? lhs > rhs : lhs < rhs;
849  }
850  }
851  }
852  return false;
853 }
854 
856  std::vector<uint32_t>& to_sort,
857  const size_t n,
858  const std::function<bool(const uint32_t, const uint32_t)> compare) {
859  auto timer = DEBUG_TIMER(__func__);
860  std::make_heap(to_sort.begin(), to_sort.end(), compare);
861  std::vector<uint32_t> permutation_top;
862  permutation_top.reserve(n);
863  for (size_t i = 0; i < n && !to_sort.empty(); ++i) {
864  permutation_top.push_back(to_sort.front());
865  std::pop_heap(to_sort.begin(), to_sort.end(), compare);
866  to_sort.pop_back();
867  }
868  to_sort.swap(permutation_top);
869 }
870 
872  const std::function<bool(const uint32_t, const uint32_t)> compare) {
873  auto timer = DEBUG_TIMER(__func__);
874  std::sort(permutation_.begin(), permutation_.end(), compare);
875 }
876 
878  const std::list<Analyzer::OrderEntry>& order_entries) const {
879  auto timer = DEBUG_TIMER(__func__);
880  auto data_mgr = &catalog_->getDataMgr();
881  const int device_id{0};
882  CudaAllocator cuda_allocator(data_mgr, device_id);
883  CHECK_GT(block_size_, 0);
884  CHECK_GT(grid_size_, 0);
885  std::vector<int64_t*> group_by_buffers(block_size_);
886  group_by_buffers[0] = reinterpret_cast<int64_t*>(storage_->getUnderlyingBuffer());
887  auto dev_group_by_buffers =
888  create_dev_group_by_buffers(&cuda_allocator,
889  group_by_buffers,
890  query_mem_desc_,
891  block_size_,
892  grid_size_,
893  device_id,
895  -1,
896  true,
897  true,
898  false,
899  nullptr);
901  order_entries, query_mem_desc_, dev_group_by_buffers, data_mgr, device_id);
903  data_mgr,
904  group_by_buffers,
905  query_mem_desc_.getBufferSizeBytes(ExecutorDeviceType::GPU),
906  dev_group_by_buffers.second,
907  query_mem_desc_,
908  block_size_,
909  grid_size_,
910  device_id,
911  false);
912 }
913 
915  const std::list<Analyzer::OrderEntry>& order_entries) const {
916  auto timer = DEBUG_TIMER(__func__);
917  CHECK(!query_mem_desc_.hasKeylessHash());
918  std::vector<int64_t> tmp_buff(query_mem_desc_.getEntryCount());
919  std::vector<int32_t> idx_buff(query_mem_desc_.getEntryCount());
920  CHECK_EQ(size_t(1), order_entries.size());
921  auto buffer_ptr = storage_->getUnderlyingBuffer();
922  for (const auto& order_entry : order_entries) {
923  const auto target_idx = order_entry.tle_no - 1;
924  const auto sortkey_val_buff = reinterpret_cast<int64_t*>(
925  buffer_ptr + query_mem_desc_.getColOffInBytes(target_idx));
926  const auto chosen_bytes = query_mem_desc_.getPaddedSlotWidthBytes(target_idx);
927  sort_groups_cpu(sortkey_val_buff,
928  &idx_buff[0],
929  query_mem_desc_.getEntryCount(),
930  order_entry.is_desc,
931  chosen_bytes);
932  apply_permutation_cpu(reinterpret_cast<int64_t*>(buffer_ptr),
933  &idx_buff[0],
934  query_mem_desc_.getEntryCount(),
935  &tmp_buff[0],
936  sizeof(int64_t));
937  for (size_t target_idx = 0; target_idx < query_mem_desc_.getSlotCount();
938  ++target_idx) {
939  if (static_cast<int>(target_idx) == order_entry.tle_no - 1) {
940  continue;
941  }
942  const auto chosen_bytes = query_mem_desc_.getPaddedSlotWidthBytes(target_idx);
943  const auto satellite_val_buff = reinterpret_cast<int64_t*>(
944  buffer_ptr + query_mem_desc_.getColOffInBytes(target_idx));
945  apply_permutation_cpu(satellite_val_buff,
946  &idx_buff[0],
947  query_mem_desc_.getEntryCount(),
948  &tmp_buff[0],
949  chosen_bytes);
950  }
951  }
952 }
953 
954 size_t ResultSet::getLimit() const {
955  return keep_first_;
956 }
957 
958 std::shared_ptr<const std::vector<std::string>> ResultSet::getStringDictionaryPayloadCopy(
959  const int dict_id) const {
960  const auto sdp = row_set_mem_owner_->getOrAddStringDictProxy(
961  dict_id, /*with_generation=*/false, catalog_);
962  CHECK(sdp);
963  return sdp->getDictionary()->copyStrings();
964 }
965 
975  return false;
976  } else if (query_mem_desc_.didOutputColumnar()) {
977  return permutation_.empty() && (query_mem_desc_.getQueryDescriptionType() ==
979  (query_mem_desc_.getQueryDescriptionType() ==
981  query_mem_desc_.getQueryDescriptionType() ==
983  } else {
984  return permutation_.empty() && (query_mem_desc_.getQueryDescriptionType() ==
986  query_mem_desc_.getQueryDescriptionType() ==
988  }
989 }
990 
991 bool ResultSet::isZeroCopyColumnarConversionPossible(size_t column_idx) const {
992  return query_mem_desc_.didOutputColumnar() &&
993  query_mem_desc_.getQueryDescriptionType() == QueryDescriptionType::Projection &&
994  appended_storage_.empty() && storage_ &&
995  (lazy_fetch_info_.empty() || !lazy_fetch_info_[column_idx].is_lazily_fetched);
996 }
997 
998 const int8_t* ResultSet::getColumnarBuffer(size_t column_idx) const {
999  CHECK(isZeroCopyColumnarConversionPossible(column_idx));
1000  return storage_->getUnderlyingBuffer() + query_mem_desc_.getColOffInBytes(column_idx);
1001 }
1002 
1003 // returns a bitmap (and total number) of all single slot targets
1004 std::tuple<std::vector<bool>, size_t> ResultSet::getSingleSlotTargetBitmap() const {
1005  std::vector<bool> target_bitmap(targets_.size(), true);
1006  size_t num_single_slot_targets = 0;
1007  for (size_t target_idx = 0; target_idx < targets_.size(); target_idx++) {
1008  const auto& sql_type = targets_[target_idx].sql_type;
1009  if (targets_[target_idx].is_agg && targets_[target_idx].agg_kind == kAVG) {
1010  target_bitmap[target_idx] = false;
1011  } else if (sql_type.is_varlen()) {
1012  target_bitmap[target_idx] = false;
1013  } else {
1014  num_single_slot_targets++;
1015  }
1016  }
1017  return std::make_tuple(std::move(target_bitmap), num_single_slot_targets);
1018 }
1019 
1028 std::tuple<std::vector<bool>, size_t> ResultSet::getSupportedSingleSlotTargetBitmap()
1029  const {
1030  CHECK(isDirectColumnarConversionPossible());
1031  auto [single_slot_targets, num_single_slot_targets] = getSingleSlotTargetBitmap();
1032 
1033  for (size_t target_idx = 0; target_idx < single_slot_targets.size(); target_idx++) {
1034  const auto& target = targets_[target_idx];
1035  if (single_slot_targets[target_idx] &&
1036  (is_distinct_target(target) ||
1037  (target.is_agg && target.agg_kind == kSAMPLE && target.sql_type == kFLOAT))) {
1038  single_slot_targets[target_idx] = false;
1039  num_single_slot_targets--;
1040  }
1041  }
1042  CHECK_GE(num_single_slot_targets, size_t(0));
1043  return std::make_tuple(std::move(single_slot_targets), num_single_slot_targets);
1044 }
1045 
1046 // returns the starting slot index for all targets in the result set
1047 std::vector<size_t> ResultSet::getSlotIndicesForTargetIndices() const {
1048  std::vector<size_t> slot_indices(targets_.size(), 0);
1049  size_t slot_index = 0;
1050  for (size_t target_idx = 0; target_idx < targets_.size(); target_idx++) {
1051  slot_indices[target_idx] = slot_index;
1052  slot_index = advance_slot(slot_index, targets_[target_idx], false);
1053  }
1054  return slot_indices;
1055 }
1056 
1057 // namespace result_set
1058 
1059 bool result_set::can_use_parallel_algorithms(const ResultSet& rows) {
1060  return !rows.isTruncated();
1061 }
1062 
1063 bool result_set::use_parallel_algorithms(const ResultSet& rows) {
1064  return result_set::can_use_parallel_algorithms(rows) && rows.entryCount() >= 20000;
1065 }
bool is_agg(const Analyzer::Expr *expr)
catalog_(nullptr)
void syncEstimatorBuffer() const
Definition: ResultSet.cpp:421
#define CHECK_EQ(x, y)
Definition: Logger.h:205
const QueryMemoryDescriptor & getQueryMemDesc() const
Definition: ResultSet.cpp:397
void sort_groups_cpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, const bool desc, const uint32_t chosen_bytes)
Definition: InPlaceSort.cpp:27
std::pair< size_t, size_t > getStorageIndex(const size_t entry_idx) const
Definition: ResultSet.cpp:635
bool isValidationOnlyRes() const
Definition: ResultSet.cpp:474
bool g_enable_watchdog
void setValidationOnlyRes()
Definition: ResultSet.cpp:470
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:101
void sort(const std::list< Analyzer::OrderEntry > &order_entries, const size_t top_n, const Executor *executor)
Definition: ResultSet.cpp:494
GpuGroupByBuffers create_dev_group_by_buffers(DeviceAllocator *cuda_allocator, const std::vector< int64_t * > &group_by_buffers, const QueryMemoryDescriptor &query_mem_desc, const unsigned block_size_x, const unsigned grid_size_x, const int device_id, const ExecutorDispatchMode dispatch_mode, const int64_t num_input_rows, const bool prepend_index_buffer, const bool always_init_group_by_on_host, const bool use_bump_allocator, Allocator *insitu_allocator)
Definition: GpuMemUtils.cpp:60
bool g_enable_direct_columnarization
Definition: Execute.cpp:108
ExecutorDeviceType
void moveToBegin() const
Definition: ResultSet.cpp:457
#define LOG(tag)
Definition: Logger.h:188
static const size_t baseline_threshold
Definition: Execute.h:1018
ResultSet(const std::vector< TargetInfo > &targets, const ExecutorDeviceType device_type, const QueryMemoryDescriptor &query_mem_desc, const std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const Catalog_Namespace::Catalog *catalog, const unsigned block_size, const unsigned grid_size)
Definition: ResultSet.cpp:56
int tle_no
Definition: Analyzer.h:1418
#define UNREACHABLE()
Definition: Logger.h:241
DEVICE void sort(ARGS &&...args)
Definition: gpu_enabled.h:105
const std::vector< TargetInfo > & getTargetInfos() const
Definition: ResultSet.cpp:402
#define CHECK_GE(x, y)
Definition: Logger.h:210
void setKernelQueueTime(const int64_t kernel_queue_time)
Definition: ResultSet.cpp:440
size_t rowCount(const bool force_parallel=false) const
Definition: ResultSet.cpp:300
void keepFirstN(const size_t n)
Definition: ResultSet.cpp:46
double pair_to_double(const std::pair< int64_t, int64_t > &fp_pair, const SQLTypeInfo &ti, const bool float_argument_input)
void addCompilationQueueTime(const int64_t compilation_queue_time)
Definition: ResultSet.cpp:444
bool takes_float_argument(const TargetInfo &target_info)
Definition: TargetInfo.h:134
void parallelTop(const std::list< Analyzer::OrderEntry > &order_entries, const size_t top_n, const Executor *executor)
Definition: ResultSet.cpp:594
size_t colCount() const
Definition: ResultSet.cpp:269
void inplace_sort_gpu(const std::list< Analyzer::OrderEntry > &order_entries, const QueryMemoryDescriptor &query_mem_desc, const GpuGroupByBuffers &group_by_buffers, Data_Namespace::DataMgr *data_mgr, const int device_id)
void apply_permutation_cpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, int64_t *tmp_buff, const uint32_t chosen_bytes)
Definition: InPlaceSort.cpp:46
#define CHECK_GT(x, y)
Definition: Logger.h:209
size_t getLimit() const
Definition: ResultSet.cpp:954
std::vector< int64_t > materializeCountDistinctColumn(const Analyzer::OrderEntry &order_entry) const
Definition: ResultSet.cpp:680
bool isTruncated() const
Definition: ResultSet.cpp:462
size_t get_truncated_row_count(size_t total_row_count, size_t limit, size_t offset)
Definition: ResultSet.cpp:284
size_t parallelRowCount() const
Definition: ResultSet.cpp:358
std::vector< uint32_t > initPermutationBuffer(const size_t start, const size_t step)
Definition: ResultSet.cpp:571
void radixSortOnCpu(const std::list< Analyzer::OrderEntry > &order_entries) const
Definition: ResultSet.cpp:914
const SQLTypeInfo get_compact_type(const TargetInfo &target)
bool definitelyHasNoRows() const
Definition: ResultSet.cpp:393
bool use_parallel_algorithms(const ResultSet &rows)
Definition: ResultSet.cpp:1063
bool isZeroCopyColumnarConversionPossible(size_t column_idx) const
Definition: ResultSet.cpp:991
int8_t * getHostEstimatorBuffer() const
Definition: ResultSet.cpp:417
const ResultSetStorage * allocateStorage() const
std::shared_ptr< const std::vector< std::string > > getStringDictionaryPayloadCopy(const int dict_id) const
Definition: ResultSet.cpp:958
size_t advance_slot(const size_t j, const TargetInfo &target_info, const bool separate_varlen_storage)
int64_t count_distinct_set_size(const int64_t set_handle, const CountDistinctDescriptor &count_distinct_desc)
Definition: CountDistinct.h:75
void setQueueTime(const int64_t queue_time)
Definition: ResultSet.cpp:436
#define CHECK_NE(x, y)
Definition: Logger.h:206
void dropFirstN(const size_t n)
Definition: ResultSet.cpp:51
void copy_from_gpu(Data_Namespace::DataMgr *data_mgr, void *dst, const CUdeviceptr src, const size_t num_bytes, const int device_id)
std::tuple< std::vector< bool >, size_t > getSingleSlotTargetBitmap() const
Definition: ResultSet.cpp:1004
DEVICE auto accumulate(ARGS &&...args)
Definition: gpu_enabled.h:42
#define LIKELY(x)
Definition: likely.h:24
void * checked_calloc(const size_t nmemb, const size_t size)
Definition: checked_alloc.h:52
StorageLookupResult findStorage(const size_t entry_idx) const
Definition: ResultSet.cpp:660
bool is_distinct_target(const TargetInfo &target_info)
Definition: TargetInfo.h:130
static void topPermutation(std::vector< uint32_t > &to_sort, const size_t n, const std::function< bool(const uint32_t, const uint32_t)> compare)
Definition: ResultSet.cpp:855
bool g_enable_smem_group_by true
void radixSortOnGpu(const std::list< Analyzer::OrderEntry > &order_entries) const
Definition: ResultSet.cpp:877
const ResultSetStorage * getStorage() const
Definition: ResultSet.cpp:265
int64_t getQueueTime() const
Definition: ResultSet.cpp:448
#define UNLIKELY(x)
Definition: likely.h:25
#define CHECK_LT(x, y)
Definition: Logger.h:207
Definition: sqltypes.h:51
SQLTypeInfo getColType(const size_t col_idx) const
Definition: ResultSet.cpp:273
std::tuple< std::vector< bool >, size_t > getSupportedSingleSlotTargetBitmap() const
Definition: ResultSet.cpp:1028
ExecutorDeviceType getDeviceType() const
Definition: ResultSet.cpp:191
const int8_t * getColumnarBuffer(size_t column_idx) const
Definition: ResultSet.cpp:998
bool isExplain() const
Definition: ResultSet.cpp:466
void sortPermutation(const std::function< bool(const uint32_t, const uint32_t)> compare)
Definition: ResultSet.cpp:871
void copy_group_by_buffers_from_gpu(Data_Namespace::DataMgr *data_mgr, const std::vector< int64_t * > &group_by_buffers, const size_t groups_buffer_size, const CUdeviceptr group_by_dev_buffers_mem, const QueryMemoryDescriptor &query_mem_desc, const unsigned block_size_x, const unsigned grid_size_x, const int device_id, const bool prepend_index_buffer)
void spawn(Function &&f, Args &&...args)
Definition: threadpool.h:33
static Data_Namespace::AbstractBuffer * allocGpuAbstractBuffer(Data_Namespace::DataMgr *data_mgr, const size_t num_bytes, const int device_id)
static QueryMemoryDescriptor fixupQueryMemoryDescriptor(const QueryMemoryDescriptor &)
Definition: ResultSet.cpp:482
void baselineSort(const std::list< Analyzer::OrderEntry > &order_entries, const size_t top_n, const Executor *executor)
void append(ResultSet &that)
Definition: ResultSet.cpp:235
bool operator()(const uint32_t lhs, const uint32_t rhs) const
Definition: ResultSet.cpp:725
size_t getCurrentRowBufferIndex() const
Definition: ResultSet.cpp:227
bool g_enable_watchdog false
Definition: Execute.cpp:76
#define CHECK(condition)
Definition: Logger.h:197
#define DEBUG_TIMER(name)
Definition: Logger.h:313
int8_t * getDeviceEstimatorBuffer() const
Definition: ResultSet.cpp:411
Basic constructors and methods of the row set interface.
const std::vector< int64_t > & getTargetInitVals() const
Definition: ResultSet.cpp:406
std::vector< size_t > getSlotIndicesForTargetIndices() const
Definition: ResultSet.cpp:1047
const std::vector< uint32_t > & getPermutationBuffer() const
Definition: ResultSet.cpp:590
Allocate GPU memory using GpuBuffers via DataMgr.
Definition: Analyzer.h:1413
int cpu_threads()
Definition: thread_count.h:24
bool g_use_tbb_pool
Definition: Execute.cpp:78
Definition: sqldefs.h:72
bool can_use_parallel_algorithms(const ResultSet &rows)
Definition: ResultSet.cpp:1059
int64_t getRenderTime() const
Definition: ResultSet.cpp:453
void setCachedRowCount(const size_t row_count) const
Definition: ResultSet.cpp:340
bool isDirectColumnarConversionPossible() const
Definition: ResultSet.cpp:973
size_t binSearchRowCount() const
Definition: ResultSet.cpp:345
int getDeviceId() const
Definition: ResultSet.cpp:478