OmniSciDB  06b3bd477c
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
ResultSet.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
25 #include "ResultSet.h"
26 
29 #include "Execute.h"
30 #include "GpuMemUtils.h"
31 #include "InPlaceSort.h"
33 #include "RuntimeFunctions.h"
34 #include "Shared/SqlTypesLayout.h"
35 #include "Shared/checked_alloc.h"
36 #include "Shared/likely.h"
37 #include "Shared/thread_count.h"
38 #include "Shared/threadpool.h"
39 
40 #include <algorithm>
41 #include <bitset>
42 #include <future>
43 #include <numeric>
44 
45 extern bool g_use_tbb_pool;
46 
48  const std::vector<TargetInfo>& targets) {
49  std::vector<int64_t> target_init_vals;
50  for (const auto& target_info : targets) {
51  if (target_info.agg_kind == kCOUNT ||
52  target_info.agg_kind == kAPPROX_COUNT_DISTINCT) {
53  target_init_vals.push_back(0);
54  continue;
55  }
56  if (target_info.sql_type.is_column()) {
57  int64_t init_val = null_val_bit_pattern(target_info.sql_type.get_subtype(),
58  takes_float_argument(target_info));
59  target_init_vals.push_back(target_info.is_agg ? init_val : 0);
60  } else if (!target_info.sql_type.get_notnull()) {
61  int64_t init_val =
62  null_val_bit_pattern(target_info.sql_type, takes_float_argument(target_info));
63  target_init_vals.push_back(target_info.is_agg ? init_val : 0);
64  } else {
65  target_init_vals.push_back(target_info.is_agg ? 0xdeadbeef : 0);
66  }
67  if (target_info.agg_kind == kAVG) {
68  target_init_vals.push_back(0);
69  } else if (target_info.agg_kind == kSAMPLE && target_info.sql_type.is_geometry()) {
70  for (int i = 1; i < 2 * target_info.sql_type.get_physical_coord_cols(); i++) {
71  target_init_vals.push_back(0);
72  }
73  } else if (target_info.agg_kind == kSAMPLE && target_info.sql_type.is_varlen()) {
74  target_init_vals.push_back(0);
75  }
76  }
77  return target_init_vals;
78 }
79 
80 ResultSetStorage::ResultSetStorage(const std::vector<TargetInfo>& targets,
82  int8_t* buff,
83  const bool buff_is_provided)
84  : targets_(targets)
85  , query_mem_desc_(query_mem_desc)
86  , buff_(buff)
87  , buff_is_provided_(buff_is_provided)
88  , target_init_vals_(initialize_target_values_for_storage(targets)) {}
89 
91  return buff_;
92 }
93 
94 void ResultSet::keepFirstN(const size_t n) {
95  CHECK_EQ(-1, cached_row_count_);
96  keep_first_ = n;
97 }
98 
99 void ResultSet::dropFirstN(const size_t n) {
100  CHECK_EQ(-1, cached_row_count_);
101  drop_first_ = n;
102 }
103 
104 ResultSet::ResultSet(const std::vector<TargetInfo>& targets,
105  const ExecutorDeviceType device_type,
107  const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
108  const Executor* executor)
109  : targets_(targets)
110  , device_type_(device_type)
111  , device_id_(-1)
112  , query_mem_desc_(query_mem_desc)
113  , crt_row_buff_idx_(0)
114  , fetched_so_far_(0)
115  , drop_first_(0)
116  , keep_first_(0)
117  , row_set_mem_owner_(row_set_mem_owner)
118  , executor_(executor)
119  , data_mgr_(nullptr)
120  , separate_varlen_storage_valid_(false)
121  , just_explain_(false)
122  , cached_row_count_(-1)
123  , geo_return_type_(GeoReturnType::WktString) {}
124 
125 ResultSet::ResultSet(const std::vector<TargetInfo>& targets,
126  const std::vector<ColumnLazyFetchInfo>& lazy_fetch_info,
127  const std::vector<std::vector<const int8_t*>>& col_buffers,
128  const std::vector<std::vector<int64_t>>& frag_offsets,
129  const std::vector<int64_t>& consistent_frag_sizes,
130  const ExecutorDeviceType device_type,
131  const int device_id,
133  const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
134  const Executor* executor)
135  : targets_(targets)
136  , device_type_(device_type)
137  , device_id_(device_id)
138  , query_mem_desc_(query_mem_desc)
139  , crt_row_buff_idx_(0)
140  , fetched_so_far_(0)
141  , drop_first_(0)
142  , keep_first_(0)
143  , row_set_mem_owner_(row_set_mem_owner)
144  , executor_(executor)
145  , lazy_fetch_info_(lazy_fetch_info)
146  , col_buffers_{col_buffers}
147  , frag_offsets_{frag_offsets}
148  , consistent_frag_sizes_{consistent_frag_sizes}
149  , data_mgr_(nullptr)
150  , separate_varlen_storage_valid_(false)
151  , just_explain_(false)
152  , cached_row_count_(-1)
153  , geo_return_type_(GeoReturnType::WktString) {}
154 
155 ResultSet::ResultSet(const std::shared_ptr<const Analyzer::Estimator> estimator,
156  const ExecutorDeviceType device_type,
157  const int device_id,
158  Data_Namespace::DataMgr* data_mgr)
159  : device_type_(device_type)
160  , device_id_(device_id)
161  , query_mem_desc_{}
162  , crt_row_buff_idx_(0)
163  , estimator_(estimator)
164  , data_mgr_(data_mgr)
165  , separate_varlen_storage_valid_(false)
166  , just_explain_(false)
167  , cached_row_count_(-1)
168  , geo_return_type_(GeoReturnType::WktString) {
169  if (device_type == ExecutorDeviceType::GPU) {
170  device_estimator_buffer_ = CudaAllocator::allocGpuAbstractBuffer(
171  data_mgr_, estimator_->getBufferSize(), device_id_);
172  data_mgr->getCudaMgr()->zeroDeviceMem(device_estimator_buffer_->getMemoryPtr(),
173  estimator_->getBufferSize(),
174  device_id_);
175  } else {
176  host_estimator_buffer_ =
177  static_cast<int8_t*>(checked_calloc(estimator_->getBufferSize(), 1));
178  }
179 }
180 
181 ResultSet::ResultSet(const std::string& explanation)
182  : device_type_(ExecutorDeviceType::CPU)
183  , device_id_(-1)
184  , fetched_so_far_(0)
185  , separate_varlen_storage_valid_(false)
186  , explanation_(explanation)
187  , just_explain_(true)
188  , cached_row_count_(-1)
189  , geo_return_type_(GeoReturnType::WktString) {}
190 
191 ResultSet::ResultSet(int64_t queue_time_ms,
192  int64_t render_time_ms,
193  const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner)
194  : device_type_(ExecutorDeviceType::CPU)
195  , device_id_(-1)
196  , fetched_so_far_(0)
197  , row_set_mem_owner_(row_set_mem_owner)
198  , timings_(QueryExecutionTimings{queue_time_ms, render_time_ms, 0, 0})
199  , separate_varlen_storage_valid_(false)
200  , just_explain_(true)
201  , cached_row_count_(-1)
202  , geo_return_type_(GeoReturnType::WktString){};
203 
205  if (storage_) {
206  if (!storage_->buff_is_provided_) {
207  CHECK(storage_->getUnderlyingBuffer());
208  free(storage_->getUnderlyingBuffer());
209  }
210  }
211  for (auto& storage : appended_storage_) {
212  if (storage && !storage->buff_is_provided_) {
213  free(storage->getUnderlyingBuffer());
214  }
215  }
216  if (host_estimator_buffer_) {
217  CHECK(device_type_ == ExecutorDeviceType::CPU || device_estimator_buffer_);
218  free(host_estimator_buffer_);
219  }
220  if (device_estimator_buffer_) {
221  CHECK(data_mgr_);
222  data_mgr_->free(device_estimator_buffer_);
223  }
224 }
225 
227  return device_type_;
228 }
229 
231  CHECK(!storage_);
232  CHECK(row_set_mem_owner_);
233  auto buff =
234  row_set_mem_owner_->allocate(query_mem_desc_.getBufferSizeBytes(device_type_));
235  storage_.reset(
236  new ResultSetStorage(targets_, query_mem_desc_, buff, /*buff_is_provided=*/true));
237  return storage_.get();
238 }
239 
241  int8_t* buff,
242  const std::vector<int64_t>& target_init_vals) const {
243  CHECK(buff);
244  CHECK(!storage_);
245  storage_.reset(new ResultSetStorage(targets_, query_mem_desc_, buff, true));
246  storage_->target_init_vals_ = target_init_vals;
247  return storage_.get();
248 }
249 
251  const std::vector<int64_t>& target_init_vals) const {
252  CHECK(!storage_);
253  CHECK(row_set_mem_owner_);
254  auto buff =
255  row_set_mem_owner_->allocate(query_mem_desc_.getBufferSizeBytes(device_type_));
256  storage_.reset(
257  new ResultSetStorage(targets_, query_mem_desc_, buff, /*buff_is_provided=*/true));
258  storage_->target_init_vals_ = target_init_vals;
259  return storage_.get();
260 }
261 
263  if (crt_row_buff_idx_ == 0) {
264  throw std::runtime_error("current row buffer iteration index is undefined");
265  }
266  return crt_row_buff_idx_ - 1;
267 }
268 
269 // Note: that.appended_storage_ does not get appended to this.
270 void ResultSet::append(ResultSet& that) {
271  CHECK_EQ(-1, cached_row_count_);
272  if (!that.storage_) {
273  return;
274  }
275  appended_storage_.push_back(std::move(that.storage_));
276  query_mem_desc_.setEntryCount(
277  query_mem_desc_.getEntryCount() +
278  appended_storage_.back()->query_mem_desc_.getEntryCount());
279  chunks_.insert(chunks_.end(), that.chunks_.begin(), that.chunks_.end());
280  col_buffers_.insert(
281  col_buffers_.end(), that.col_buffers_.begin(), that.col_buffers_.end());
282  frag_offsets_.insert(
283  frag_offsets_.end(), that.frag_offsets_.begin(), that.frag_offsets_.end());
284  consistent_frag_sizes_.insert(consistent_frag_sizes_.end(),
285  that.consistent_frag_sizes_.begin(),
286  that.consistent_frag_sizes_.end());
287  chunk_iters_.insert(
288  chunk_iters_.end(), that.chunk_iters_.begin(), that.chunk_iters_.end());
289  if (separate_varlen_storage_valid_) {
290  CHECK(that.separate_varlen_storage_valid_);
291  serialized_varlen_buffer_.insert(serialized_varlen_buffer_.end(),
292  that.serialized_varlen_buffer_.begin(),
293  that.serialized_varlen_buffer_.end());
294  }
295  for (auto& buff : that.literal_buffers_) {
296  literal_buffers_.push_back(std::move(buff));
297  }
298 }
299 
301  return storage_.get();
302 }
303 
304 size_t ResultSet::colCount() const {
305  return just_explain_ ? 1 : targets_.size();
306 }
307 
308 SQLTypeInfo ResultSet::getColType(const size_t col_idx) const {
309  if (just_explain_) {
310  return SQLTypeInfo(kTEXT, false);
311  }
312  CHECK_LT(col_idx, targets_.size());
313  return targets_[col_idx].agg_kind == kAVG ? SQLTypeInfo(kDOUBLE, false)
314  : targets_[col_idx].sql_type;
315 }
316 
317 namespace {
318 
319 size_t get_truncated_row_count(size_t total_row_count, size_t limit, size_t offset) {
320  if (total_row_count < offset) {
321  return 0;
322  }
323 
324  size_t total_truncated_row_count = total_row_count - offset;
325 
326  if (limit) {
327  return std::min(total_truncated_row_count, limit);
328  }
329 
330  return total_truncated_row_count;
331 }
332 
333 } // namespace
334 
335 size_t ResultSet::rowCount(const bool force_parallel) const {
336  if (just_explain_) {
337  return 1;
338  }
339  if (!permutation_.empty()) {
340  const auto limited_row_count = keep_first_ + drop_first_;
341  return limited_row_count ? std::min(limited_row_count, permutation_.size())
342  : permutation_.size();
343  }
344  if (cached_row_count_ != -1) {
345  CHECK_GE(cached_row_count_, 0);
346  return cached_row_count_;
347  }
348  if (!storage_) {
349  return 0;
350  }
351  if (permutation_.empty() &&
352  query_mem_desc_.getQueryDescriptionType() == QueryDescriptionType::Projection) {
353  return binSearchRowCount();
354  }
355  if (force_parallel || entryCount() > 20000) {
356  return parallelRowCount();
357  }
358  std::lock_guard<std::mutex> lock(row_iteration_mutex_);
359  moveToBegin();
360  size_t row_count{0};
361  while (true) {
362  auto crt_row = getNextRowUnlocked(false, false);
363  if (crt_row.empty()) {
364  break;
365  }
366  ++row_count;
367  }
368  moveToBegin();
369  return row_count;
370 }
371 
372 void ResultSet::setCachedRowCount(const size_t row_count) const {
373  CHECK(cached_row_count_ == -1 || cached_row_count_ == static_cast<ssize_t>(row_count));
374  cached_row_count_ = row_count;
375 }
376 
378  if (!storage_) {
379  return 0;
380  }
381 
382  size_t row_count = storage_->binSearchRowCount();
383  for (auto& s : appended_storage_) {
384  row_count += s->binSearchRowCount();
385  }
386 
387  return get_truncated_row_count(row_count, getLimit(), drop_first_);
388 }
389 
391  auto execute_parallel_row_count = [this](auto counter_threads) -> size_t {
392  const size_t worker_count = cpu_threads();
393  for (size_t i = 0,
394  start_entry = 0,
395  stride = (entryCount() + worker_count - 1) / worker_count;
396  i < worker_count && start_entry < entryCount();
397  ++i, start_entry += stride) {
398  const auto end_entry = std::min(start_entry + stride, entryCount());
399  counter_threads.append(
400  [this](const size_t start, const size_t end) {
401  size_t row_count{0};
402  for (size_t i = start; i < end; ++i) {
403  if (!isRowAtEmpty(i)) {
404  ++row_count;
405  }
406  }
407  return row_count;
408  },
409  start_entry,
410  end_entry);
411  }
412  const auto row_counts = counter_threads.join();
413  const size_t row_count = std::accumulate(row_counts.begin(), row_counts.end(), 0);
414  return row_count;
415  };
416  // will fall back to futures threadpool if TBB is not enabled
417  const auto row_count =
419  ? execute_parallel_row_count(threadpool::ThreadPool<size_t>())
420  : execute_parallel_row_count(threadpool::FuturesThreadPool<size_t>());
421 
422  return get_truncated_row_count(row_count, getLimit(), drop_first_);
423 }
424 
426  return !storage_ && !estimator_ && !just_explain_;
427 }
428 
430  CHECK(storage_);
431  return storage_->query_mem_desc_;
432 }
433 
434 const std::vector<TargetInfo>& ResultSet::getTargetInfos() const {
435  return targets_;
436 }
437 
438 const std::vector<int64_t>& ResultSet::getTargetInitVals() const {
439  CHECK(storage_);
440  return storage_->target_init_vals_;
441 }
442 
444  CHECK(device_type_ == ExecutorDeviceType::GPU);
445  CHECK(device_estimator_buffer_);
446  return device_estimator_buffer_->getMemoryPtr();
447 }
448 
450  return host_estimator_buffer_;
451 }
452 
454  CHECK(device_type_ == ExecutorDeviceType::GPU);
455  CHECK(!host_estimator_buffer_);
456  CHECK_EQ(size_t(0), estimator_->getBufferSize() % sizeof(int64_t));
457  host_estimator_buffer_ =
458  static_cast<int8_t*>(checked_calloc(estimator_->getBufferSize(), 1));
459  CHECK(device_estimator_buffer_);
460  auto device_buffer_ptr = device_estimator_buffer_->getMemoryPtr();
461  copy_from_gpu(data_mgr_,
462  host_estimator_buffer_,
463  reinterpret_cast<CUdeviceptr>(device_buffer_ptr),
464  estimator_->getBufferSize(),
465  device_id_);
466 }
467 
468 void ResultSet::setQueueTime(const int64_t queue_time) {
469  timings_.executor_queue_time = queue_time;
470 }
471 
472 void ResultSet::setKernelQueueTime(const int64_t kernel_queue_time) {
473  timings_.kernel_queue_time = kernel_queue_time;
474 }
475 
476 void ResultSet::addCompilationQueueTime(const int64_t compilation_queue_time) {
477  timings_.compilation_queue_time += compilation_queue_time;
478 }
479 
480 int64_t ResultSet::getQueueTime() const {
481  return timings_.executor_queue_time + timings_.kernel_queue_time +
482  timings_.compilation_queue_time;
483 }
484 
485 int64_t ResultSet::getRenderTime() const {
486  return timings_.render_time;
487 }
488 
490  crt_row_buff_idx_ = 0;
491  fetched_so_far_ = 0;
492 }
493 
495  return keep_first_ + drop_first_;
496 }
497 
498 bool ResultSet::isExplain() const {
499  return just_explain_;
500 }
501 
503  return device_id_;
504 }
505 
508  auto query_mem_desc_copy = query_mem_desc;
509  query_mem_desc_copy.resetGroupColWidths(
510  std::vector<int8_t>(query_mem_desc_copy.getGroupbyColCount(), 8));
511  if (query_mem_desc.didOutputColumnar()) {
512  return query_mem_desc_copy;
513  }
514  query_mem_desc_copy.alignPaddedSlots();
515  return query_mem_desc_copy;
516 }
517 
518 void ResultSet::sort(const std::list<Analyzer::OrderEntry>& order_entries,
519  const size_t top_n) {
520  auto timer = DEBUG_TIMER(__func__);
521  CHECK_EQ(-1, cached_row_count_);
522  CHECK(!targets_.empty());
523 #ifdef HAVE_CUDA
524  if (canUseFastBaselineSort(order_entries, top_n)) {
525  baselineSort(order_entries, top_n);
526  return;
527  }
528 #endif // HAVE_CUDA
529  if (query_mem_desc_.sortOnGpu()) {
530  try {
531  radixSortOnGpu(order_entries);
532  } catch (const OutOfMemory&) {
533  LOG(WARNING) << "Out of GPU memory during sort, finish on CPU";
534  radixSortOnCpu(order_entries);
535  } catch (const std::bad_alloc&) {
536  LOG(WARNING) << "Out of GPU memory during sort, finish on CPU";
537  radixSortOnCpu(order_entries);
538  }
539  return;
540  }
541  // This check isn't strictly required, but allows the index buffer to be 32-bit.
542  if (query_mem_desc_.getEntryCount() > std::numeric_limits<uint32_t>::max()) {
543  throw RowSortException("Sorting more than 4B elements not supported");
544  }
545 
546  CHECK(permutation_.empty());
547 
548  const bool use_heap{order_entries.size() == 1 && top_n};
549  if (use_heap && entryCount() > 100000) {
550  if (g_enable_watchdog && (entryCount() > 20000000)) {
551  throw WatchdogException("Sorting the result would be too slow");
552  }
553  parallelTop(order_entries, top_n);
554  return;
555  }
556 
557  if (g_enable_watchdog && (entryCount() > Executor::baseline_threshold)) {
558  throw WatchdogException("Sorting the result would be too slow");
559  }
560 
561  permutation_ = initPermutationBuffer(0, 1);
562 
563  auto compare = createComparator(order_entries, use_heap);
564 
565  if (use_heap) {
566  topPermutation(permutation_, top_n, compare);
567  } else {
568  sortPermutation(compare);
569  }
570 }
571 
572 #ifdef HAVE_CUDA
573 void ResultSet::baselineSort(const std::list<Analyzer::OrderEntry>& order_entries,
574  const size_t top_n) {
575  auto timer = DEBUG_TIMER(__func__);
576  // If we only have on GPU, it's usually faster to do multi-threaded radix sort on CPU
577  if (getGpuCount() > 1) {
578  try {
579  doBaselineSort(ExecutorDeviceType::GPU, order_entries, top_n);
580  } catch (...) {
581  doBaselineSort(ExecutorDeviceType::CPU, order_entries, top_n);
582  }
583  } else {
584  doBaselineSort(ExecutorDeviceType::CPU, order_entries, top_n);
585  }
586 }
587 #endif // HAVE_CUDA
588 
589 std::vector<uint32_t> ResultSet::initPermutationBuffer(const size_t start,
590  const size_t step) {
591  auto timer = DEBUG_TIMER(__func__);
592  CHECK_NE(size_t(0), step);
593  std::vector<uint32_t> permutation;
594  const auto total_entries = query_mem_desc_.getEntryCount();
595  permutation.reserve(total_entries / step);
596  for (size_t i = start; i < total_entries; i += step) {
597  const auto storage_lookup_result = findStorage(i);
598  const auto lhs_storage = storage_lookup_result.storage_ptr;
599  const auto off = storage_lookup_result.fixedup_entry_idx;
600  CHECK(lhs_storage);
601  if (!lhs_storage->isEmptyEntry(off)) {
602  permutation.emplace_back(i);
603  }
604  }
605  return permutation;
606 }
607 
608 const std::vector<uint32_t>& ResultSet::getPermutationBuffer() const {
609  return permutation_;
610 }
611 
612 void ResultSet::parallelTop(const std::list<Analyzer::OrderEntry>& order_entries,
613  const size_t top_n) {
614  auto timer = DEBUG_TIMER(__func__);
615  const size_t step = cpu_threads();
616  std::vector<std::vector<uint32_t>> strided_permutations(step);
617  std::vector<std::future<void>> init_futures;
618  for (size_t start = 0; start < step; ++start) {
619  init_futures.emplace_back(
620  std::async(std::launch::async, [this, start, step, &strided_permutations] {
621  strided_permutations[start] = initPermutationBuffer(start, step);
622  }));
623  }
624  for (auto& init_future : init_futures) {
625  init_future.wait();
626  }
627  for (auto& init_future : init_futures) {
628  init_future.get();
629  }
630  auto compare = createComparator(order_entries, true);
631  std::vector<std::future<void>> top_futures;
632  for (auto& strided_permutation : strided_permutations) {
633  top_futures.emplace_back(
634  std::async(std::launch::async, [&strided_permutation, &compare, top_n] {
635  topPermutation(strided_permutation, top_n, compare);
636  }));
637  }
638  for (auto& top_future : top_futures) {
639  top_future.wait();
640  }
641  for (auto& top_future : top_futures) {
642  top_future.get();
643  }
644  permutation_.reserve(strided_permutations.size() * top_n);
645  for (const auto& strided_permutation : strided_permutations) {
646  permutation_.insert(
647  permutation_.end(), strided_permutation.begin(), strided_permutation.end());
648  }
649  topPermutation(permutation_, top_n, compare);
650 }
651 
652 std::pair<size_t, size_t> ResultSet::getStorageIndex(const size_t entry_idx) const {
653  size_t fixedup_entry_idx = entry_idx;
654  auto entry_count = storage_->query_mem_desc_.getEntryCount();
655  const bool is_rowwise_layout = !storage_->query_mem_desc_.didOutputColumnar();
656  if (fixedup_entry_idx < entry_count) {
657  return {0, fixedup_entry_idx};
658  }
659  fixedup_entry_idx -= entry_count;
660  for (size_t i = 0; i < appended_storage_.size(); ++i) {
661  const auto& desc = appended_storage_[i]->query_mem_desc_;
662  CHECK_NE(is_rowwise_layout, desc.didOutputColumnar());
663  entry_count = desc.getEntryCount();
664  if (fixedup_entry_idx < entry_count) {
665  return {i + 1, fixedup_entry_idx};
666  }
667  fixedup_entry_idx -= entry_count;
668  }
669  UNREACHABLE() << "entry_idx = " << entry_idx << ", query_mem_desc_.getEntryCount() = "
670  << query_mem_desc_.getEntryCount();
671  return {};
672 }
673 
676 
678  auto [stg_idx, fixedup_entry_idx] = getStorageIndex(entry_idx);
679  return {stg_idx ? appended_storage_[stg_idx - 1].get() : storage_.get(),
680  fixedup_entry_idx,
681  stg_idx};
682 }
683 
684 template <typename BUFFER_ITERATOR_TYPE>
686  BUFFER_ITERATOR_TYPE>::materializeCountDistinctColumns() {
687  for (const auto& order_entry : order_entries_) {
688  if (is_distinct_target(result_set_->targets_[order_entry.tle_no - 1])) {
689  count_distinct_materialized_buffers_.emplace_back(
690  materializeCountDistinctColumn(order_entry));
691  }
692  }
693 }
694 
695 template <typename BUFFER_ITERATOR_TYPE>
696 std::vector<int64_t>
698  const Analyzer::OrderEntry& order_entry) const {
699  const size_t num_storage_entries = result_set_->query_mem_desc_.getEntryCount();
700  std::vector<int64_t> count_distinct_materialized_buffer(num_storage_entries);
701  const CountDistinctDescriptor count_distinct_descriptor =
702  result_set_->query_mem_desc_.getCountDistinctDescriptor(order_entry.tle_no - 1);
703  const size_t num_non_empty_entries = result_set_->permutation_.size();
704  const size_t worker_count = cpu_threads();
705  // TODO(tlm): Allow use of tbb after we determine how to easily encapsulate the choice
706  // between thread pool types
708  for (size_t i = 0,
709  start_entry = 0,
710  stride = (num_non_empty_entries + worker_count - 1) / worker_count;
711  i < worker_count && start_entry < num_non_empty_entries;
712  ++i, start_entry += stride) {
713  const auto end_entry = std::min(start_entry + stride, num_non_empty_entries);
714  thread_pool.append(
715  [this](const size_t start,
716  const size_t end,
717  const Analyzer::OrderEntry& order_entry,
718  const CountDistinctDescriptor& count_distinct_descriptor,
719  std::vector<int64_t>& count_distinct_materialized_buffer) {
720  for (size_t i = start; i < end; ++i) {
721  const uint32_t permuted_idx = result_set_->permutation_[i];
722  const auto storage_lookup_result = result_set_->findStorage(permuted_idx);
723  const auto storage = storage_lookup_result.storage_ptr;
724  const auto off = storage_lookup_result.fixedup_entry_idx;
725  const auto value = buffer_itr_.getColumnInternal(
726  storage->buff_, off, order_entry.tle_no - 1, storage_lookup_result);
727  count_distinct_materialized_buffer[permuted_idx] =
728  count_distinct_set_size(value.i1, count_distinct_descriptor);
729  }
730  },
731  start_entry,
732  end_entry,
733  std::cref(order_entry),
734  std::cref(count_distinct_descriptor),
735  std::ref(count_distinct_materialized_buffer));
736  }
737  thread_pool.join();
738  return count_distinct_materialized_buffer;
739 }
740 
741 template <typename BUFFER_ITERATOR_TYPE>
743  const uint32_t lhs,
744  const uint32_t rhs) const {
745  // NB: The compare function must define a strict weak ordering, otherwise
746  // std::sort will trigger a segmentation fault (or corrupt memory).
747  const auto lhs_storage_lookup_result = result_set_->findStorage(lhs);
748  const auto rhs_storage_lookup_result = result_set_->findStorage(rhs);
749  const auto lhs_storage = lhs_storage_lookup_result.storage_ptr;
750  const auto rhs_storage = rhs_storage_lookup_result.storage_ptr;
751  const auto fixedup_lhs = lhs_storage_lookup_result.fixedup_entry_idx;
752  const auto fixedup_rhs = rhs_storage_lookup_result.fixedup_entry_idx;
753  size_t materialized_count_distinct_buffer_idx{0};
754 
755  for (const auto& order_entry : order_entries_) {
756  CHECK_GE(order_entry.tle_no, 1);
757  const auto& agg_info = result_set_->targets_[order_entry.tle_no - 1];
758  const auto entry_ti = get_compact_type(agg_info);
759  bool float_argument_input = takes_float_argument(agg_info);
760  // Need to determine if the float value has been stored as float
761  // or if it has been compacted to a different (often larger 8 bytes)
762  // in distributed case the floats are actually 4 bytes
763  // TODO the above takes_float_argument() is widely used wonder if this problem
764  // exists elsewhere
765  if (entry_ti.get_type() == kFLOAT) {
766  const auto is_col_lazy =
767  !result_set_->lazy_fetch_info_.empty() &&
768  result_set_->lazy_fetch_info_[order_entry.tle_no - 1].is_lazily_fetched;
769  if (result_set_->query_mem_desc_.getPaddedSlotWidthBytes(order_entry.tle_no - 1) ==
770  sizeof(float)) {
771  float_argument_input =
772  result_set_->query_mem_desc_.didOutputColumnar() ? !is_col_lazy : true;
773  }
774  }
775 
776  const bool use_desc_cmp = use_heap_ ? !order_entry.is_desc : order_entry.is_desc;
777 
778  if (UNLIKELY(is_distinct_target(agg_info))) {
779  CHECK_LT(materialized_count_distinct_buffer_idx,
780  count_distinct_materialized_buffers_.size());
781  const auto& count_distinct_materialized_buffer =
782  count_distinct_materialized_buffers_[materialized_count_distinct_buffer_idx];
783  const auto lhs_sz = count_distinct_materialized_buffer[lhs];
784  const auto rhs_sz = count_distinct_materialized_buffer[rhs];
785  ++materialized_count_distinct_buffer_idx;
786  if (lhs_sz == rhs_sz) {
787  continue;
788  }
789  return use_desc_cmp ? lhs_sz > rhs_sz : lhs_sz < rhs_sz;
790  }
791 
792  const auto lhs_v = buffer_itr_.getColumnInternal(lhs_storage->buff_,
793  fixedup_lhs,
794  order_entry.tle_no - 1,
795  lhs_storage_lookup_result);
796  const auto rhs_v = buffer_itr_.getColumnInternal(rhs_storage->buff_,
797  fixedup_rhs,
798  order_entry.tle_no - 1,
799  rhs_storage_lookup_result);
800 
801  if (UNLIKELY(isNull(entry_ti, lhs_v, float_argument_input) &&
802  isNull(entry_ti, rhs_v, float_argument_input))) {
803  return false;
804  }
805  if (UNLIKELY(isNull(entry_ti, lhs_v, float_argument_input) &&
806  !isNull(entry_ti, rhs_v, float_argument_input))) {
807  return use_heap_ ? !order_entry.nulls_first : order_entry.nulls_first;
808  }
809  if (UNLIKELY(isNull(entry_ti, rhs_v, float_argument_input) &&
810  !isNull(entry_ti, lhs_v, float_argument_input))) {
811  return use_heap_ ? order_entry.nulls_first : !order_entry.nulls_first;
812  }
813 
814  if (LIKELY(lhs_v.isInt())) {
815  CHECK(rhs_v.isInt());
816  if (UNLIKELY(entry_ti.is_string() &&
817  entry_ti.get_compression() == kENCODING_DICT)) {
818  CHECK_EQ(4, entry_ti.get_logical_size());
819  const auto string_dict_proxy = result_set_->executor_->getStringDictionaryProxy(
820  entry_ti.get_comp_param(), result_set_->row_set_mem_owner_, false);
821  auto lhs_str = string_dict_proxy->getString(lhs_v.i1);
822  auto rhs_str = string_dict_proxy->getString(rhs_v.i1);
823  if (lhs_str == rhs_str) {
824  continue;
825  }
826  return use_desc_cmp ? lhs_str > rhs_str : lhs_str < rhs_str;
827  }
828 
829  if (lhs_v.i1 == rhs_v.i1) {
830  continue;
831  }
832  if (entry_ti.is_fp()) {
833  if (float_argument_input) {
834  const auto lhs_dval = *reinterpret_cast<const float*>(may_alias_ptr(&lhs_v.i1));
835  const auto rhs_dval = *reinterpret_cast<const float*>(may_alias_ptr(&rhs_v.i1));
836  return use_desc_cmp ? lhs_dval > rhs_dval : lhs_dval < rhs_dval;
837  } else {
838  const auto lhs_dval =
839  *reinterpret_cast<const double*>(may_alias_ptr(&lhs_v.i1));
840  const auto rhs_dval =
841  *reinterpret_cast<const double*>(may_alias_ptr(&rhs_v.i1));
842  return use_desc_cmp ? lhs_dval > rhs_dval : lhs_dval < rhs_dval;
843  }
844  }
845  return use_desc_cmp ? lhs_v.i1 > rhs_v.i1 : lhs_v.i1 < rhs_v.i1;
846  } else {
847  if (lhs_v.isPair()) {
848  CHECK(rhs_v.isPair());
849  const auto lhs =
850  pair_to_double({lhs_v.i1, lhs_v.i2}, entry_ti, float_argument_input);
851  const auto rhs =
852  pair_to_double({rhs_v.i1, rhs_v.i2}, entry_ti, float_argument_input);
853  if (lhs == rhs) {
854  continue;
855  }
856  return use_desc_cmp ? lhs > rhs : lhs < rhs;
857  } else {
858  CHECK(lhs_v.isStr() && rhs_v.isStr());
859  const auto lhs = lhs_v.strVal();
860  const auto rhs = rhs_v.strVal();
861  if (lhs == rhs) {
862  continue;
863  }
864  return use_desc_cmp ? lhs > rhs : lhs < rhs;
865  }
866  }
867  }
868  return false;
869 }
870 
872  std::vector<uint32_t>& to_sort,
873  const size_t n,
874  const std::function<bool(const uint32_t, const uint32_t)> compare) {
875  auto timer = DEBUG_TIMER(__func__);
876  std::make_heap(to_sort.begin(), to_sort.end(), compare);
877  std::vector<uint32_t> permutation_top;
878  permutation_top.reserve(n);
879  for (size_t i = 0; i < n && !to_sort.empty(); ++i) {
880  permutation_top.push_back(to_sort.front());
881  std::pop_heap(to_sort.begin(), to_sort.end(), compare);
882  to_sort.pop_back();
883  }
884  to_sort.swap(permutation_top);
885 }
886 
888  const std::function<bool(const uint32_t, const uint32_t)> compare) {
889  auto timer = DEBUG_TIMER(__func__);
890  std::sort(permutation_.begin(), permutation_.end(), compare);
891 }
892 
894  const std::list<Analyzer::OrderEntry>& order_entries) const {
895  auto timer = DEBUG_TIMER(__func__);
896  auto data_mgr = &executor_->catalog_->getDataMgr();
897  const int device_id{0};
898  CudaAllocator cuda_allocator(data_mgr, device_id);
899  std::vector<int64_t*> group_by_buffers(executor_->blockSize());
900  group_by_buffers[0] = reinterpret_cast<int64_t*>(storage_->getUnderlyingBuffer());
901  auto dev_group_by_buffers =
902  create_dev_group_by_buffers(&cuda_allocator,
903  group_by_buffers,
904  query_mem_desc_,
905  executor_->blockSize(),
906  executor_->gridSize(),
907  device_id,
909  -1,
910  true,
911  true,
912  false,
913  nullptr);
915  order_entries, query_mem_desc_, dev_group_by_buffers, data_mgr, device_id);
917  data_mgr,
918  group_by_buffers,
919  query_mem_desc_.getBufferSizeBytes(ExecutorDeviceType::GPU),
920  dev_group_by_buffers.second,
921  query_mem_desc_,
922  executor_->blockSize(),
923  executor_->gridSize(),
924  device_id,
925  false);
926 }
927 
929  const std::list<Analyzer::OrderEntry>& order_entries) const {
930  auto timer = DEBUG_TIMER(__func__);
931  CHECK(!query_mem_desc_.hasKeylessHash());
932  std::vector<int64_t> tmp_buff(query_mem_desc_.getEntryCount());
933  std::vector<int32_t> idx_buff(query_mem_desc_.getEntryCount());
934  CHECK_EQ(size_t(1), order_entries.size());
935  auto buffer_ptr = storage_->getUnderlyingBuffer();
936  for (const auto& order_entry : order_entries) {
937  const auto target_idx = order_entry.tle_no - 1;
938  const auto sortkey_val_buff = reinterpret_cast<int64_t*>(
939  buffer_ptr + query_mem_desc_.getColOffInBytes(target_idx));
940  const auto chosen_bytes = query_mem_desc_.getPaddedSlotWidthBytes(target_idx);
941  sort_groups_cpu(sortkey_val_buff,
942  &idx_buff[0],
943  query_mem_desc_.getEntryCount(),
944  order_entry.is_desc,
945  chosen_bytes);
946  apply_permutation_cpu(reinterpret_cast<int64_t*>(buffer_ptr),
947  &idx_buff[0],
948  query_mem_desc_.getEntryCount(),
949  &tmp_buff[0],
950  sizeof(int64_t));
951  for (size_t target_idx = 0; target_idx < query_mem_desc_.getSlotCount();
952  ++target_idx) {
953  if (static_cast<int>(target_idx) == order_entry.tle_no - 1) {
954  continue;
955  }
956  const auto chosen_bytes = query_mem_desc_.getPaddedSlotWidthBytes(target_idx);
957  const auto satellite_val_buff = reinterpret_cast<int64_t*>(
958  buffer_ptr + query_mem_desc_.getColOffInBytes(target_idx));
959  apply_permutation_cpu(satellite_val_buff,
960  &idx_buff[0],
961  query_mem_desc_.getEntryCount(),
962  &tmp_buff[0],
963  chosen_bytes);
964  }
965  }
966 }
967 
969  const int64_t ptr) {
970  const auto it_ok = count_distinct_sets_mapping_.emplace(remote_ptr, ptr);
971  CHECK(it_ok.second);
972 }
973 
974 int64_t ResultSetStorage::mappedPtr(const int64_t remote_ptr) const {
975  const auto it = count_distinct_sets_mapping_.find(remote_ptr);
976  // Due to the removal of completely zero bitmaps in a distributed transfer there will be
977  // remote ptr that do not not exists. Return 0 if no pointer found
978  if (it == count_distinct_sets_mapping_.end()) {
979  return int64_t(0);
980  }
981  return it->second;
982 }
983 
984 size_t ResultSet::getLimit() const {
985  return keep_first_;
986 }
987 
988 std::shared_ptr<const std::vector<std::string>> ResultSet::getStringDictionaryPayloadCopy(
989  const int dict_id) const {
990  CHECK(executor_);
991  const auto sdp =
992  executor_->getStringDictionaryProxy(dict_id, row_set_mem_owner_, false);
993  return sdp->getDictionary()->copyStrings();
994 }
995 
996 bool can_use_parallel_algorithms(const ResultSet& rows) {
997  return !rows.isTruncated();
998 }
999 
1000 bool use_parallel_algorithms(const ResultSet& rows) {
1001  return can_use_parallel_algorithms(rows) && rows.entryCount() >= 20000;
1002 }
1003 
1013  return false;
1014  } else if (query_mem_desc_.didOutputColumnar()) {
1015  return permutation_.empty() && (query_mem_desc_.getQueryDescriptionType() ==
1017  (query_mem_desc_.getQueryDescriptionType() ==
1019  query_mem_desc_.getQueryDescriptionType() ==
1021  } else {
1022  return permutation_.empty() && (query_mem_desc_.getQueryDescriptionType() ==
1024  query_mem_desc_.getQueryDescriptionType() ==
1026  }
1027 }
1028 
1030  return query_mem_desc_.didOutputColumnar() &&
1031  query_mem_desc_.getQueryDescriptionType() == QueryDescriptionType::Projection &&
1032  appended_storage_.empty() && storage_ &&
1033  (lazy_fetch_info_.empty() || !lazy_fetch_info_[column_idx].is_lazily_fetched);
1034 }
1035 
1036 const int8_t* ResultSet::getColumnarBuffer(size_t column_idx) const {
1037  CHECK(isZeroCopyColumnarConversionPossible(column_idx));
1038  return storage_->getUnderlyingBuffer() + query_mem_desc_.getColOffInBytes(column_idx);
1039 }
1040 
1041 // returns a bitmap (and total number) of all single slot targets
1042 std::tuple<std::vector<bool>, size_t> ResultSet::getSingleSlotTargetBitmap() const {
1043  std::vector<bool> target_bitmap(targets_.size(), true);
1044  size_t num_single_slot_targets = 0;
1045  for (size_t target_idx = 0; target_idx < targets_.size(); target_idx++) {
1046  const auto& sql_type = targets_[target_idx].sql_type;
1047  if (targets_[target_idx].is_agg && targets_[target_idx].agg_kind == kAVG) {
1048  target_bitmap[target_idx] = false;
1049  } else if (sql_type.is_varlen()) {
1050  target_bitmap[target_idx] = false;
1051  } else {
1052  num_single_slot_targets++;
1053  }
1054  }
1055  return std::make_tuple(std::move(target_bitmap), num_single_slot_targets);
1056 }
1057 
1066 std::tuple<std::vector<bool>, size_t> ResultSet::getSupportedSingleSlotTargetBitmap()
1067  const {
1068  CHECK(isDirectColumnarConversionPossible());
1069  auto [single_slot_targets, num_single_slot_targets] = getSingleSlotTargetBitmap();
1070 
1071  for (size_t target_idx = 0; target_idx < single_slot_targets.size(); target_idx++) {
1072  const auto& target = targets_[target_idx];
1073  if (single_slot_targets[target_idx] &&
1074  (is_distinct_target(target) ||
1075  (target.is_agg && target.agg_kind == kSAMPLE && target.sql_type == kFLOAT))) {
1076  single_slot_targets[target_idx] = false;
1077  num_single_slot_targets--;
1078  }
1079  }
1080  CHECK_GE(num_single_slot_targets, size_t(0));
1081  return std::make_tuple(std::move(single_slot_targets), num_single_slot_targets);
1082 }
1083 
1084 // returns the starting slot index for all targets in the result set
1085 std::vector<size_t> ResultSet::getSlotIndicesForTargetIndices() const {
1086  std::vector<size_t> slot_indices(targets_.size(), 0);
1087  size_t slot_index = 0;
1088  for (size_t target_idx = 0; target_idx < targets_.size(); target_idx++) {
1089  slot_indices[target_idx] = slot_index;
1090  slot_index = advance_slot(slot_index, targets_[target_idx], false);
1091  }
1092  return slot_indices;
1093 }
bool is_agg(const Analyzer::Expr *expr)
void syncEstimatorBuffer() const
Definition: ResultSet.cpp:453
void baselineSort(const std::list< Analyzer::OrderEntry > &order_entries, const size_t top_n)
#define CHECK_EQ(x, y)
Definition: Logger.h:205
const QueryMemoryDescriptor & getQueryMemDesc() const
Definition: ResultSet.cpp:429
void sort_groups_cpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, const bool desc, const uint32_t chosen_bytes)
Definition: InPlaceSort.cpp:27
std::pair< size_t, size_t > getStorageIndex(const size_t entry_idx) const
Definition: ResultSet.cpp:652
std::vector< int64_t > initialize_target_values_for_storage(const std::vector< TargetInfo > &targets)
Definition: ResultSet.cpp:47
bool g_enable_watchdog
bool use_parallel_algorithms(const ResultSet &rows)
Definition: ResultSet.cpp:1000
GpuGroupByBuffers create_dev_group_by_buffers(DeviceAllocator *cuda_allocator, const std::vector< int64_t * > &group_by_buffers, const QueryMemoryDescriptor &query_mem_desc, const unsigned block_size_x, const unsigned grid_size_x, const int device_id, const ExecutorDispatchMode dispatch_mode, const int64_t num_input_rows, const bool prepend_index_buffer, const bool always_init_group_by_on_host, const bool use_bump_allocator, Allocator *insitu_allocator)
Definition: GpuMemUtils.cpp:60
bool g_enable_direct_columnarization
Definition: Execute.cpp:106
ExecutorDeviceType
void moveToBegin() const
Definition: ResultSet.cpp:489
#define LOG(tag)
Definition: Logger.h:188
static const size_t baseline_threshold
Definition: Execute.h:889
int tle_no
Definition: Analyzer.h:1419
#define UNREACHABLE()
Definition: Logger.h:241
const std::vector< TargetInfo > & getTargetInfos() const
Definition: ResultSet.cpp:434
void addCountDistinctSetPointerMapping(const int64_t remote_ptr, const int64_t ptr)
Definition: ResultSet.cpp:968
#define CHECK_GE(x, y)
Definition: Logger.h:210
ResultSetStorage(const std::vector< TargetInfo > &targets, const QueryMemoryDescriptor &query_mem_desc, int8_t *buff, const bool buff_is_provided)
Definition: ResultSet.cpp:80
void setKernelQueueTime(const int64_t kernel_queue_time)
Definition: ResultSet.cpp:472
size_t rowCount(const bool force_parallel=false) const
Definition: ResultSet.cpp:335
void keepFirstN(const size_t n)
Definition: ResultSet.cpp:94
double pair_to_double(const std::pair< int64_t, int64_t > &fp_pair, const SQLTypeInfo &ti, const bool float_argument_input)
void addCompilationQueueTime(const int64_t compilation_queue_time)
Definition: ResultSet.cpp:476
bool takes_float_argument(const TargetInfo &target_info)
Definition: TargetInfo.h:133
size_t colCount() const
Definition: ResultSet.cpp:304
void inplace_sort_gpu(const std::list< Analyzer::OrderEntry > &order_entries, const QueryMemoryDescriptor &query_mem_desc, const GpuGroupByBuffers &group_by_buffers, Data_Namespace::DataMgr *data_mgr, const int device_id)
void apply_permutation_cpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, int64_t *tmp_buff, const uint32_t chosen_bytes)
Definition: InPlaceSort.cpp:46
int64_t null_val_bit_pattern(const SQLTypeInfo &ti, const bool float_argument_input)
size_t getLimit() const
Definition: ResultSet.cpp:984
std::vector< int64_t > materializeCountDistinctColumn(const Analyzer::OrderEntry &order_entry) const
Definition: ResultSet.cpp:697
bool isTruncated() const
Definition: ResultSet.cpp:494
size_t get_truncated_row_count(size_t total_row_count, size_t limit, size_t offset)
Definition: ResultSet.cpp:319
std::unordered_map< int64_t, int64_t > count_distinct_sets_mapping_
Definition: ResultSet.h:217
int8_t * getUnderlyingBuffer() const
Definition: ResultSet.cpp:90
ResultSet(const std::vector< TargetInfo > &targets, const ExecutorDeviceType device_type, const QueryMemoryDescriptor &query_mem_desc, const std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const Executor *executor)
Definition: ResultSet.cpp:104
size_t parallelRowCount() const
Definition: ResultSet.cpp:390
std::vector< uint32_t > initPermutationBuffer(const size_t start, const size_t step)
Definition: ResultSet.cpp:589
void radixSortOnCpu(const std::list< Analyzer::OrderEntry > &order_entries) const
Definition: ResultSet.cpp:928
const SQLTypeInfo get_compact_type(const TargetInfo &target)
bool definitelyHasNoRows() const
Definition: ResultSet.cpp:425
bool isZeroCopyColumnarConversionPossible(size_t column_idx) const
Definition: ResultSet.cpp:1029
int8_t * getHostEstimatorBuffer() const
Definition: ResultSet.cpp:449
const ResultSetStorage * allocateStorage() const
std::shared_ptr< const std::vector< std::string > > getStringDictionaryPayloadCopy(const int dict_id) const
Definition: ResultSet.cpp:988
size_t advance_slot(const size_t j, const TargetInfo &target_info, const bool separate_varlen_storage)
CHECK(cgen_state)
int64_t count_distinct_set_size(const int64_t set_handle, const CountDistinctDescriptor &count_distinct_desc)
Definition: CountDistinct.h:75
void setQueueTime(const int64_t queue_time)
Definition: ResultSet.cpp:468
#define CHECK_NE(x, y)
Definition: Logger.h:206
void dropFirstN(const size_t n)
Definition: ResultSet.cpp:99
void copy_from_gpu(Data_Namespace::DataMgr *data_mgr, void *dst, const CUdeviceptr src, const size_t num_bytes, const int device_id)
std::tuple< std::vector< bool >, size_t > getSingleSlotTargetBitmap() const
Definition: ResultSet.cpp:1042
#define LIKELY(x)
Definition: likely.h:19
void * checked_calloc(const size_t nmemb, const size_t size)
Definition: checked_alloc.h:52
StorageLookupResult findStorage(const size_t entry_idx) const
Definition: ResultSet.cpp:677
void append(Function &&f, Args &&...args)
Definition: threadpool.h:33
bool is_distinct_target(const TargetInfo &target_info)
Definition: TargetInfo.h:129
static void topPermutation(std::vector< uint32_t > &to_sort, const size_t n, const std::function< bool(const uint32_t, const uint32_t)> compare)
Definition: ResultSet.cpp:871
bool g_enable_smem_group_by true
void radixSortOnGpu(const std::list< Analyzer::OrderEntry > &order_entries) const
Definition: ResultSet.cpp:893
const ResultSetStorage * getStorage() const
Definition: ResultSet.cpp:300
int64_t getQueueTime() const
Definition: ResultSet.cpp:480
#define UNLIKELY(x)
Definition: likely.h:20
#define CHECK_LT(x, y)
Definition: Logger.h:207
Definition: sqltypes.h:53
SQLTypeInfo getColType(const size_t col_idx) const
Definition: ResultSet.cpp:308
std::tuple< std::vector< bool >, size_t > getSupportedSingleSlotTargetBitmap() const
Definition: ResultSet.cpp:1066
ExecutorDeviceType getDeviceType() const
Definition: ResultSet.cpp:226
const int8_t * getColumnarBuffer(size_t column_idx) const
Definition: ResultSet.cpp:1036
bool isExplain() const
Definition: ResultSet.cpp:498
int8_t * buff_
Definition: ResultSet.h:209
void sortPermutation(const std::function< bool(const uint32_t, const uint32_t)> compare)
Definition: ResultSet.cpp:887
void copy_group_by_buffers_from_gpu(Data_Namespace::DataMgr *data_mgr, const std::vector< int64_t * > &group_by_buffers, const size_t groups_buffer_size, const CUdeviceptr group_by_dev_buffers_mem, const QueryMemoryDescriptor &query_mem_desc, const unsigned block_size_x, const unsigned grid_size_x, const int device_id, const bool prepend_index_buffer)
int64_t mappedPtr(const int64_t) const
Definition: ResultSet.cpp:974
Definition: sqldefs.h:76
static Data_Namespace::AbstractBuffer * allocGpuAbstractBuffer(Data_Namespace::DataMgr *data_mgr, const size_t num_bytes, const int device_id)
static QueryMemoryDescriptor fixupQueryMemoryDescriptor(const QueryMemoryDescriptor &)
Definition: ResultSet.cpp:506
void append(ResultSet &that)
Definition: ResultSet.cpp:270
bool operator()(const uint32_t lhs, const uint32_t rhs) const
Definition: ResultSet.cpp:742
bool can_use_parallel_algorithms(const ResultSet &rows)
Definition: ResultSet.cpp:996
size_t getCurrentRowBufferIndex() const
Definition: ResultSet.cpp:262
bool g_enable_watchdog false
Definition: Execute.cpp:74
void parallelTop(const std::list< Analyzer::OrderEntry > &order_entries, const size_t top_n)
Definition: ResultSet.cpp:612
#define DEBUG_TIMER(name)
Definition: Logger.h:313
int8_t * getDeviceEstimatorBuffer() const
Definition: ResultSet.cpp:443
Basic constructors and methods of the row set interface.
const std::vector< int64_t > & getTargetInitVals() const
Definition: ResultSet.cpp:438
std::vector< size_t > getSlotIndicesForTargetIndices() const
Definition: ResultSet.cpp:1085
const std::vector< uint32_t > & getPermutationBuffer() const
Definition: ResultSet.cpp:608
Allocate GPU memory using GpuBuffers via DataMgr.
void sort(const std::list< Analyzer::OrderEntry > &order_entries, const size_t top_n)
Definition: ResultSet.cpp:518
Definition: Analyzer.h:1414
int cpu_threads()
Definition: thread_count.h:25
bool g_use_tbb_pool
Definition: Execute.cpp:76
Definition: sqldefs.h:72
int64_t getRenderTime() const
Definition: ResultSet.cpp:485
void setCachedRowCount(const size_t row_count) const
Definition: ResultSet.cpp:372
bool isDirectColumnarConversionPossible() const
Definition: ResultSet.cpp:1011
size_t binSearchRowCount() const
Definition: ResultSet.cpp:377
int getDeviceId() const
Definition: ResultSet.cpp:502