59 const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
61 const unsigned block_size,
62 const unsigned grid_size)
64 , device_type_(device_type)
66 , query_mem_desc_(query_mem_desc)
67 , crt_row_buff_idx_(0)
71 , row_set_mem_owner_(row_set_mem_owner)
73 , block_size_(block_size)
74 , grid_size_(grid_size)
76 , separate_varlen_storage_valid_(
false)
77 , just_explain_(
false)
78 , for_validation_only_(
false)
79 , cached_row_count_(-1)
83 const std::vector<ColumnLazyFetchInfo>& lazy_fetch_info,
84 const std::vector<std::vector<const int8_t*>>& col_buffers,
85 const std::vector<std::vector<int64_t>>& frag_offsets,
86 const std::vector<int64_t>& consistent_frag_sizes,
90 const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
92 const unsigned block_size,
93 const unsigned grid_size)
95 , device_type_(device_type)
96 , device_id_(device_id)
97 , query_mem_desc_(query_mem_desc)
98 , crt_row_buff_idx_(0)
102 , row_set_mem_owner_(row_set_mem_owner)
104 , block_size_(block_size)
105 , grid_size_(grid_size)
106 , lazy_fetch_info_(lazy_fetch_info)
107 , col_buffers_{col_buffers}
108 , frag_offsets_{frag_offsets}
109 , consistent_frag_sizes_{consistent_frag_sizes}
111 , separate_varlen_storage_valid_(
false)
112 , just_explain_(
false)
113 , for_validation_only_(
false)
114 , cached_row_count_(-1)
115 , geo_return_type_(GeoReturnType::WktString) {}
121 : device_type_(device_type)
122 , device_id_(device_id)
124 , crt_row_buff_idx_(0)
125 , estimator_(estimator)
126 , data_mgr_(data_mgr)
127 , separate_varlen_storage_valid_(
false)
128 , just_explain_(
false)
129 , for_validation_only_(
false)
130 , cached_row_count_(-1)
131 , geo_return_type_(GeoReturnType::WktString) {
134 data_mgr_, estimator_->getBufferSize(), device_id_);
135 data_mgr->getCudaMgr()->zeroDeviceMem(device_estimator_buffer_->getMemoryPtr(),
136 estimator_->getBufferSize(),
139 host_estimator_buffer_ =
140 static_cast<int8_t*
>(
checked_calloc(estimator_->getBufferSize(), 1));
148 , separate_varlen_storage_valid_(
false)
149 , explanation_(explanation)
150 , just_explain_(
true)
151 , for_validation_only_(
false)
152 , cached_row_count_(-1)
156 int64_t render_time_ms,
157 const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner)
161 , row_set_mem_owner_(row_set_mem_owner)
163 , separate_varlen_storage_valid_(
false)
164 , just_explain_(
true)
165 , for_validation_only_(
false)
166 , cached_row_count_(-1)
167 , geo_return_type_(GeoReturnType::WktString){};
171 if (!storage_->buff_is_provided_) {
172 CHECK(storage_->getUnderlyingBuffer());
173 free(storage_->getUnderlyingBuffer());
176 for (
auto& storage : appended_storage_) {
177 if (storage && !storage->buff_is_provided_) {
178 free(storage->getUnderlyingBuffer());
181 if (host_estimator_buffer_) {
183 free(host_estimator_buffer_);
185 if (device_estimator_buffer_) {
187 data_mgr_->free(device_estimator_buffer_);
197 CHECK(row_set_mem_owner_);
199 row_set_mem_owner_->allocate(query_mem_desc_.getBufferSizeBytes(device_type_));
202 return storage_.get();
207 const std::vector<int64_t>& target_init_vals)
const {
210 storage_.reset(
new ResultSetStorage(targets_, query_mem_desc_, buff,
true));
211 storage_->target_init_vals_ = target_init_vals;
212 return storage_.get();
216 const std::vector<int64_t>& target_init_vals)
const {
218 CHECK(row_set_mem_owner_);
220 row_set_mem_owner_->allocate(query_mem_desc_.getBufferSizeBytes(device_type_));
223 storage_->target_init_vals_ = target_init_vals;
224 return storage_.get();
228 if (crt_row_buff_idx_ == 0) {
229 throw std::runtime_error(
"current row buffer iteration index is undefined");
231 return crt_row_buff_idx_ - 1;
237 if (!that.storage_) {
240 appended_storage_.push_back(std::move(that.storage_));
241 query_mem_desc_.setEntryCount(
242 query_mem_desc_.getEntryCount() +
243 appended_storage_.back()->query_mem_desc_.getEntryCount());
244 chunks_.insert(chunks_.end(), that.chunks_.begin(), that.chunks_.end());
246 col_buffers_.end(), that.col_buffers_.begin(), that.col_buffers_.end());
247 frag_offsets_.insert(
248 frag_offsets_.end(), that.frag_offsets_.begin(), that.frag_offsets_.end());
249 consistent_frag_sizes_.insert(consistent_frag_sizes_.end(),
250 that.consistent_frag_sizes_.begin(),
251 that.consistent_frag_sizes_.end());
253 chunk_iters_.end(), that.chunk_iters_.begin(), that.chunk_iters_.end());
254 if (separate_varlen_storage_valid_) {
255 CHECK(that.separate_varlen_storage_valid_);
256 serialized_varlen_buffer_.insert(serialized_varlen_buffer_.end(),
257 that.serialized_varlen_buffer_.begin(),
258 that.serialized_varlen_buffer_.end());
260 for (
auto& buff : that.literal_buffers_) {
261 literal_buffers_.push_back(std::move(buff));
266 return storage_.get();
270 return just_explain_ ? 1 : targets_.size();
279 : targets_[col_idx].sql_type;
285 if (total_row_count < offset) {
289 size_t total_truncated_row_count = total_row_count - offset;
292 return std::min(total_truncated_row_count, limit);
295 return total_truncated_row_count;
304 if (!permutation_.empty()) {
305 if (drop_first_ > permutation_.size()) {
308 const auto limited_row_count = keep_first_ + drop_first_;
309 return limited_row_count ? std::min(limited_row_count, permutation_.size())
310 : permutation_.size();
312 if (cached_row_count_ != -1) {
314 return cached_row_count_;
319 if (permutation_.empty() &&
321 return binSearchRowCount();
323 if (force_parallel || entryCount() > 20000) {
324 return parallelRowCount();
326 std::lock_guard<std::mutex> lock(row_iteration_mutex_);
330 auto crt_row = getNextRowUnlocked(
false,
false);
331 if (crt_row.empty()) {
341 CHECK(cached_row_count_ == -1 || cached_row_count_ == static_cast<int64_t>(row_count));
342 cached_row_count_ = row_count;
350 size_t row_count = storage_->binSearchRowCount();
351 for (
auto& s : appended_storage_) {
352 row_count += s->binSearchRowCount();
359 auto execute_parallel_row_count = [
this](
auto counter_threads) ->
size_t {
363 stride = (entryCount() + worker_count - 1) / worker_count;
364 i < worker_count && start_entry < entryCount();
365 ++i, start_entry += stride) {
366 const auto end_entry = std::min(start_entry + stride, entryCount());
367 counter_threads.spawn(
368 [
this](
const size_t start,
const size_t end) {
370 for (
size_t i = start; i < end; ++i) {
371 if (!isRowAtEmpty(i)) {
380 const auto row_counts = counter_threads.join();
381 const size_t row_count =
std::accumulate(row_counts.begin(), row_counts.end(), 0);
385 const auto row_count =
394 return !storage_ && !estimator_ && !just_explain_;
399 return storage_->query_mem_desc_;
408 return storage_->target_init_vals_;
413 CHECK(device_estimator_buffer_);
414 return device_estimator_buffer_->getMemoryPtr();
418 return host_estimator_buffer_;
423 CHECK(!host_estimator_buffer_);
424 CHECK_EQ(
size_t(0), estimator_->getBufferSize() %
sizeof(int64_t));
425 host_estimator_buffer_ =
426 static_cast<int8_t*
>(
checked_calloc(estimator_->getBufferSize(), 1));
427 CHECK(device_estimator_buffer_);
428 auto device_buffer_ptr = device_estimator_buffer_->getMemoryPtr();
430 host_estimator_buffer_,
431 reinterpret_cast<CUdeviceptr>(device_buffer_ptr),
432 estimator_->getBufferSize(),
437 timings_.executor_queue_time = queue_time;
441 timings_.kernel_queue_time = kernel_queue_time;
445 timings_.compilation_queue_time += compilation_queue_time;
449 return timings_.executor_queue_time + timings_.kernel_queue_time +
450 timings_.compilation_queue_time;
454 return timings_.render_time;
458 crt_row_buff_idx_ = 0;
463 return keep_first_ + drop_first_;
467 return just_explain_;
471 for_validation_only_ =
true;
475 return for_validation_only_;
485 query_mem_desc_copy.resetGroupColWidths(
486 std::vector<int8_t>(query_mem_desc_copy.getGroupbyColCount(), 8));
488 return query_mem_desc_copy;
490 query_mem_desc_copy.alignPaddedSlots();
491 return query_mem_desc_copy;
496 const Executor* executor) {
503 CHECK(!targets_.empty());
505 if (canUseFastBaselineSort(order_entries, top_n)) {
506 baselineSort(order_entries, top_n, executor);
510 if (query_mem_desc_.sortOnGpu()) {
512 radixSortOnGpu(order_entries);
514 LOG(
WARNING) <<
"Out of GPU memory during sort, finish on CPU";
515 radixSortOnCpu(order_entries);
516 }
catch (
const std::bad_alloc&) {
517 LOG(
WARNING) <<
"Out of GPU memory during sort, finish on CPU";
518 radixSortOnCpu(order_entries);
523 if (query_mem_desc_.getEntryCount() > std::numeric_limits<uint32_t>::max()) {
527 CHECK(permutation_.empty());
529 const bool use_heap{order_entries.size() == 1 && top_n};
530 if (use_heap && entryCount() > 100000) {
534 parallelTop(order_entries, top_n, executor);
542 permutation_ = initPermutationBuffer(0, 1);
544 auto compare = createComparator(order_entries, use_heap, executor);
547 topPermutation(permutation_, top_n, compare);
549 sortPermutation(compare);
556 const Executor* executor) {
559 if (getGpuCount() > 1) {
575 std::vector<uint32_t> permutation;
576 const auto total_entries = query_mem_desc_.getEntryCount();
577 permutation.reserve(total_entries / step);
578 for (
size_t i = start; i < total_entries; i += step) {
579 const auto storage_lookup_result = findStorage(i);
580 const auto lhs_storage = storage_lookup_result.storage_ptr;
581 const auto off = storage_lookup_result.fixedup_entry_idx;
583 if (!lhs_storage->isEmptyEntry(off)) {
584 permutation.emplace_back(i);
596 const Executor* executor) {
599 std::vector<std::vector<uint32_t>> strided_permutations(step);
600 std::vector<std::future<void>> init_futures;
601 for (
size_t start = 0; start < step; ++start) {
602 init_futures.emplace_back(
603 std::async(std::launch::async, [
this, start, step, &strided_permutations] {
604 strided_permutations[start] = initPermutationBuffer(start, step);
607 for (
auto& init_future : init_futures) {
610 for (
auto& init_future : init_futures) {
613 auto compare = createComparator(order_entries,
true, executor);
614 std::vector<std::future<void>> top_futures;
615 for (
auto& strided_permutation : strided_permutations) {
616 top_futures.emplace_back(
617 std::async(std::launch::async, [&strided_permutation, &compare, top_n] {
618 topPermutation(strided_permutation, top_n, compare);
621 for (
auto& top_future : top_futures) {
624 for (
auto& top_future : top_futures) {
627 permutation_.reserve(strided_permutations.size() * top_n);
628 for (
const auto& strided_permutation : strided_permutations) {
630 permutation_.end(), strided_permutation.begin(), strided_permutation.end());
632 topPermutation(permutation_, top_n, compare);
636 size_t fixedup_entry_idx = entry_idx;
637 auto entry_count = storage_->query_mem_desc_.getEntryCount();
638 const bool is_rowwise_layout = !storage_->query_mem_desc_.didOutputColumnar();
639 if (fixedup_entry_idx < entry_count) {
640 return {0, fixedup_entry_idx};
642 fixedup_entry_idx -= entry_count;
643 for (
size_t i = 0; i < appended_storage_.size(); ++i) {
644 const auto& desc = appended_storage_[i]->query_mem_desc_;
645 CHECK_NE(is_rowwise_layout, desc.didOutputColumnar());
646 entry_count = desc.getEntryCount();
647 if (fixedup_entry_idx < entry_count) {
648 return {i + 1, fixedup_entry_idx};
650 fixedup_entry_idx -= entry_count;
652 UNREACHABLE() <<
"entry_idx = " << entry_idx <<
", query_mem_desc_.getEntryCount() = "
653 << query_mem_desc_.getEntryCount();
661 auto [stg_idx, fixedup_entry_idx] = getStorageIndex(entry_idx);
662 return {stg_idx ? appended_storage_[stg_idx - 1].get() : storage_.get(),
667 template <
typename BUFFER_ITERATOR_TYPE>
669 BUFFER_ITERATOR_TYPE>::materializeCountDistinctColumns() {
670 for (
const auto& order_entry : order_entries_) {
672 count_distinct_materialized_buffers_.emplace_back(
673 materializeCountDistinctColumn(order_entry));
678 template <
typename BUFFER_ITERATOR_TYPE>
682 const size_t num_storage_entries = result_set_->query_mem_desc_.getEntryCount();
683 std::vector<int64_t> count_distinct_materialized_buffer(num_storage_entries);
685 result_set_->query_mem_desc_.getCountDistinctDescriptor(order_entry.
tle_no - 1);
686 const size_t num_non_empty_entries = result_set_->permutation_.size();
693 stride = (num_non_empty_entries + worker_count - 1) / worker_count;
694 i < worker_count && start_entry < num_non_empty_entries;
695 ++i, start_entry += stride) {
696 const auto end_entry = std::min(start_entry + stride, num_non_empty_entries);
698 [
this](
const size_t start,
702 std::vector<int64_t>& count_distinct_materialized_buffer) {
703 for (
size_t i = start; i < end; ++i) {
704 const uint32_t permuted_idx = result_set_->permutation_[i];
705 const auto storage_lookup_result = result_set_->findStorage(permuted_idx);
706 const auto storage = storage_lookup_result.storage_ptr;
707 const auto off = storage_lookup_result.fixedup_entry_idx;
708 const auto value = buffer_itr_.getColumnInternal(
709 storage->buff_, off, order_entry.
tle_no - 1, storage_lookup_result);
710 count_distinct_materialized_buffer[permuted_idx] =
716 std::cref(order_entry),
717 std::cref(count_distinct_descriptor),
718 std::ref(count_distinct_materialized_buffer));
721 return count_distinct_materialized_buffer;
724 template <
typename BUFFER_ITERATOR_TYPE>
727 const uint32_t rhs)
const {
730 const auto lhs_storage_lookup_result = result_set_->findStorage(lhs);
731 const auto rhs_storage_lookup_result = result_set_->findStorage(rhs);
732 const auto lhs_storage = lhs_storage_lookup_result.storage_ptr;
733 const auto rhs_storage = rhs_storage_lookup_result.storage_ptr;
734 const auto fixedup_lhs = lhs_storage_lookup_result.fixedup_entry_idx;
735 const auto fixedup_rhs = rhs_storage_lookup_result.fixedup_entry_idx;
736 size_t materialized_count_distinct_buffer_idx{0};
738 for (
const auto& order_entry : order_entries_) {
740 const auto& agg_info = result_set_->targets_[order_entry.tle_no - 1];
748 if (entry_ti.get_type() ==
kFLOAT) {
749 const auto is_col_lazy =
750 !result_set_->lazy_fetch_info_.empty() &&
751 result_set_->lazy_fetch_info_[order_entry.tle_no - 1].is_lazily_fetched;
752 if (result_set_->query_mem_desc_.getPaddedSlotWidthBytes(order_entry.tle_no - 1) ==
754 float_argument_input =
755 result_set_->query_mem_desc_.didOutputColumnar() ? !is_col_lazy :
true;
759 const bool use_desc_cmp = use_heap_ ? !order_entry.is_desc : order_entry.is_desc;
762 CHECK_LT(materialized_count_distinct_buffer_idx,
763 count_distinct_materialized_buffers_.size());
764 const auto& count_distinct_materialized_buffer =
765 count_distinct_materialized_buffers_[materialized_count_distinct_buffer_idx];
766 const auto lhs_sz = count_distinct_materialized_buffer[lhs];
767 const auto rhs_sz = count_distinct_materialized_buffer[rhs];
768 ++materialized_count_distinct_buffer_idx;
769 if (lhs_sz == rhs_sz) {
772 return use_desc_cmp ? lhs_sz > rhs_sz : lhs_sz < rhs_sz;
775 const auto lhs_v = buffer_itr_.getColumnInternal(lhs_storage->buff_,
777 order_entry.tle_no - 1,
778 lhs_storage_lookup_result);
779 const auto rhs_v = buffer_itr_.getColumnInternal(rhs_storage->buff_,
781 order_entry.tle_no - 1,
782 rhs_storage_lookup_result);
784 if (
UNLIKELY(isNull(entry_ti, lhs_v, float_argument_input) &&
785 isNull(entry_ti, rhs_v, float_argument_input))) {
788 if (
UNLIKELY(isNull(entry_ti, lhs_v, float_argument_input) &&
789 !isNull(entry_ti, rhs_v, float_argument_input))) {
790 return use_heap_ ? !order_entry.nulls_first : order_entry.nulls_first;
792 if (
UNLIKELY(isNull(entry_ti, rhs_v, float_argument_input) &&
793 !isNull(entry_ti, lhs_v, float_argument_input))) {
794 return use_heap_ ? order_entry.nulls_first : !order_entry.nulls_first;
797 if (
LIKELY(lhs_v.isInt())) {
798 CHECK(rhs_v.isInt());
799 if (
UNLIKELY(entry_ti.is_string() &&
801 CHECK_EQ(4, entry_ti.get_logical_size());
803 const auto string_dict_proxy = executor_->getStringDictionaryProxy(
804 entry_ti.get_comp_param(), result_set_->row_set_mem_owner_,
false);
805 auto lhs_str = string_dict_proxy->getString(lhs_v.i1);
806 auto rhs_str = string_dict_proxy->getString(rhs_v.i1);
807 if (lhs_str == rhs_str) {
810 return use_desc_cmp ? lhs_str > rhs_str : lhs_str < rhs_str;
813 if (lhs_v.i1 == rhs_v.i1) {
816 if (entry_ti.is_fp()) {
817 if (float_argument_input) {
818 const auto lhs_dval = *
reinterpret_cast<const float*
>(may_alias_ptr(&lhs_v.i1));
819 const auto rhs_dval = *
reinterpret_cast<const float*
>(may_alias_ptr(&rhs_v.i1));
820 return use_desc_cmp ? lhs_dval > rhs_dval : lhs_dval < rhs_dval;
822 const auto lhs_dval =
823 *
reinterpret_cast<const double*
>(may_alias_ptr(&lhs_v.i1));
824 const auto rhs_dval =
825 *
reinterpret_cast<const double*
>(may_alias_ptr(&rhs_v.i1));
826 return use_desc_cmp ? lhs_dval > rhs_dval : lhs_dval < rhs_dval;
829 return use_desc_cmp ? lhs_v.i1 > rhs_v.i1 : lhs_v.i1 < rhs_v.i1;
831 if (lhs_v.isPair()) {
832 CHECK(rhs_v.isPair());
834 pair_to_double({lhs_v.i1, lhs_v.i2}, entry_ti, float_argument_input);
836 pair_to_double({rhs_v.i1, rhs_v.i2}, entry_ti, float_argument_input);
840 return use_desc_cmp ? lhs > rhs : lhs < rhs;
842 CHECK(lhs_v.isStr() && rhs_v.isStr());
843 const auto lhs = lhs_v.strVal();
844 const auto rhs = rhs_v.strVal();
848 return use_desc_cmp ? lhs > rhs : lhs < rhs;
856 std::vector<uint32_t>& to_sort,
858 const std::function<
bool(
const uint32_t,
const uint32_t)> compare) {
860 std::make_heap(to_sort.begin(), to_sort.end(), compare);
861 std::vector<uint32_t> permutation_top;
862 permutation_top.reserve(n);
863 for (
size_t i = 0; i < n && !to_sort.empty(); ++i) {
864 permutation_top.push_back(to_sort.front());
865 std::pop_heap(to_sort.begin(), to_sort.end(), compare);
868 to_sort.swap(permutation_top);
872 const std::function<
bool(
const uint32_t,
const uint32_t)> compare) {
874 std::sort(permutation_.begin(), permutation_.end(), compare);
878 const std::list<Analyzer::OrderEntry>& order_entries)
const {
880 auto data_mgr = &
catalog_->getDataMgr();
881 const int device_id{0};
885 std::vector<int64_t*> group_by_buffers(block_size_);
886 group_by_buffers[0] =
reinterpret_cast<int64_t*
>(storage_->getUnderlyingBuffer());
887 auto dev_group_by_buffers =
901 order_entries, query_mem_desc_, dev_group_by_buffers, data_mgr, device_id);
906 dev_group_by_buffers.second,
915 const std::list<Analyzer::OrderEntry>& order_entries)
const {
917 CHECK(!query_mem_desc_.hasKeylessHash());
918 std::vector<int64_t> tmp_buff(query_mem_desc_.getEntryCount());
919 std::vector<int32_t> idx_buff(query_mem_desc_.getEntryCount());
920 CHECK_EQ(
size_t(1), order_entries.size());
921 auto buffer_ptr = storage_->getUnderlyingBuffer();
922 for (
const auto& order_entry : order_entries) {
923 const auto target_idx = order_entry.tle_no - 1;
924 const auto sortkey_val_buff =
reinterpret_cast<int64_t*
>(
925 buffer_ptr + query_mem_desc_.getColOffInBytes(target_idx));
926 const auto chosen_bytes = query_mem_desc_.getPaddedSlotWidthBytes(target_idx);
929 query_mem_desc_.getEntryCount(),
934 query_mem_desc_.getEntryCount(),
937 for (
size_t target_idx = 0; target_idx < query_mem_desc_.getSlotCount();
939 if (static_cast<int>(target_idx) == order_entry.tle_no - 1) {
942 const auto chosen_bytes = query_mem_desc_.getPaddedSlotWidthBytes(target_idx);
943 const auto satellite_val_buff =
reinterpret_cast<int64_t*
>(
944 buffer_ptr + query_mem_desc_.getColOffInBytes(target_idx));
947 query_mem_desc_.getEntryCount(),
959 const int dict_id)
const {
960 const auto sdp = row_set_mem_owner_->getOrAddStringDictProxy(
963 return sdp->getDictionary()->copyStrings();
976 }
else if (query_mem_desc_.didOutputColumnar()) {
977 return permutation_.empty() && (query_mem_desc_.getQueryDescriptionType() ==
979 (query_mem_desc_.getQueryDescriptionType() ==
981 query_mem_desc_.getQueryDescriptionType() ==
984 return permutation_.empty() && (query_mem_desc_.getQueryDescriptionType() ==
986 query_mem_desc_.getQueryDescriptionType() ==
992 return query_mem_desc_.didOutputColumnar() &&
994 appended_storage_.empty() && storage_ &&
995 (lazy_fetch_info_.empty() || !lazy_fetch_info_[column_idx].is_lazily_fetched);
999 CHECK(isZeroCopyColumnarConversionPossible(column_idx));
1000 return storage_->getUnderlyingBuffer() + query_mem_desc_.getColOffInBytes(column_idx);
1005 std::vector<bool> target_bitmap(targets_.size(),
true);
1006 size_t num_single_slot_targets = 0;
1007 for (
size_t target_idx = 0; target_idx < targets_.size(); target_idx++) {
1008 const auto& sql_type = targets_[target_idx].sql_type;
1009 if (targets_[target_idx].
is_agg && targets_[target_idx].agg_kind ==
kAVG) {
1010 target_bitmap[target_idx] =
false;
1011 }
else if (sql_type.is_varlen()) {
1012 target_bitmap[target_idx] =
false;
1014 num_single_slot_targets++;
1017 return std::make_tuple(std::move(target_bitmap), num_single_slot_targets);
1030 CHECK(isDirectColumnarConversionPossible());
1031 auto [single_slot_targets, num_single_slot_targets] = getSingleSlotTargetBitmap();
1033 for (
size_t target_idx = 0; target_idx < single_slot_targets.size(); target_idx++) {
1034 const auto& target = targets_[target_idx];
1035 if (single_slot_targets[target_idx] &&
1037 (target.is_agg && target.agg_kind ==
kSAMPLE && target.sql_type ==
kFLOAT))) {
1038 single_slot_targets[target_idx] =
false;
1039 num_single_slot_targets--;
1042 CHECK_GE(num_single_slot_targets,
size_t(0));
1043 return std::make_tuple(std::move(single_slot_targets), num_single_slot_targets);
1048 std::vector<size_t> slot_indices(targets_.size(), 0);
1049 size_t slot_index = 0;
1050 for (
size_t target_idx = 0; target_idx < targets_.size(); target_idx++) {
1051 slot_indices[target_idx] = slot_index;
1052 slot_index =
advance_slot(slot_index, targets_[target_idx],
false);
1054 return slot_indices;
1060 return !rows.isTruncated();
bool is_agg(const Analyzer::Expr *expr)
void syncEstimatorBuffer() const
const QueryMemoryDescriptor & getQueryMemDesc() const
void sort_groups_cpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, const bool desc, const uint32_t chosen_bytes)
std::pair< size_t, size_t > getStorageIndex(const size_t entry_idx) const
bool isValidationOnlyRes() const
void setValidationOnlyRes()
class for a per-database catalog. also includes metadata for the current database and the current use...
void sort(const std::list< Analyzer::OrderEntry > &order_entries, const size_t top_n, const Executor *executor)
GpuGroupByBuffers create_dev_group_by_buffers(DeviceAllocator *cuda_allocator, const std::vector< int64_t * > &group_by_buffers, const QueryMemoryDescriptor &query_mem_desc, const unsigned block_size_x, const unsigned grid_size_x, const int device_id, const ExecutorDispatchMode dispatch_mode, const int64_t num_input_rows, const bool prepend_index_buffer, const bool always_init_group_by_on_host, const bool use_bump_allocator, Allocator *insitu_allocator)
bool g_enable_direct_columnarization
static const size_t baseline_threshold
ResultSet(const std::vector< TargetInfo > &targets, const ExecutorDeviceType device_type, const QueryMemoryDescriptor &query_mem_desc, const std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const Catalog_Namespace::Catalog *catalog, const unsigned block_size, const unsigned grid_size)
DEVICE void sort(ARGS &&...args)
const std::vector< TargetInfo > & getTargetInfos() const
void setKernelQueueTime(const int64_t kernel_queue_time)
size_t rowCount(const bool force_parallel=false) const
void keepFirstN(const size_t n)
double pair_to_double(const std::pair< int64_t, int64_t > &fp_pair, const SQLTypeInfo &ti, const bool float_argument_input)
void addCompilationQueueTime(const int64_t compilation_queue_time)
bool takes_float_argument(const TargetInfo &target_info)
void parallelTop(const std::list< Analyzer::OrderEntry > &order_entries, const size_t top_n, const Executor *executor)
void inplace_sort_gpu(const std::list< Analyzer::OrderEntry > &order_entries, const QueryMemoryDescriptor &query_mem_desc, const GpuGroupByBuffers &group_by_buffers, Data_Namespace::DataMgr *data_mgr, const int device_id)
void apply_permutation_cpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, int64_t *tmp_buff, const uint32_t chosen_bytes)
std::vector< int64_t > materializeCountDistinctColumn(const Analyzer::OrderEntry &order_entry) const
size_t get_truncated_row_count(size_t total_row_count, size_t limit, size_t offset)
size_t parallelRowCount() const
std::vector< uint32_t > initPermutationBuffer(const size_t start, const size_t step)
void radixSortOnCpu(const std::list< Analyzer::OrderEntry > &order_entries) const
const SQLTypeInfo get_compact_type(const TargetInfo &target)
bool definitelyHasNoRows() const
bool use_parallel_algorithms(const ResultSet &rows)
bool isZeroCopyColumnarConversionPossible(size_t column_idx) const
int8_t * getHostEstimatorBuffer() const
const ResultSetStorage * allocateStorage() const
std::shared_ptr< const std::vector< std::string > > getStringDictionaryPayloadCopy(const int dict_id) const
size_t advance_slot(const size_t j, const TargetInfo &target_info, const bool separate_varlen_storage)
int64_t count_distinct_set_size(const int64_t set_handle, const CountDistinctDescriptor &count_distinct_desc)
void setQueueTime(const int64_t queue_time)
void dropFirstN(const size_t n)
void copy_from_gpu(Data_Namespace::DataMgr *data_mgr, void *dst, const CUdeviceptr src, const size_t num_bytes, const int device_id)
std::tuple< std::vector< bool >, size_t > getSingleSlotTargetBitmap() const
DEVICE auto accumulate(ARGS &&...args)
void * checked_calloc(const size_t nmemb, const size_t size)
StorageLookupResult findStorage(const size_t entry_idx) const
bool is_distinct_target(const TargetInfo &target_info)
static void topPermutation(std::vector< uint32_t > &to_sort, const size_t n, const std::function< bool(const uint32_t, const uint32_t)> compare)
bool g_enable_smem_group_by true
void radixSortOnGpu(const std::list< Analyzer::OrderEntry > &order_entries) const
const ResultSetStorage * getStorage() const
int64_t getQueueTime() const
SQLTypeInfo getColType(const size_t col_idx) const
std::tuple< std::vector< bool >, size_t > getSupportedSingleSlotTargetBitmap() const
ExecutorDeviceType getDeviceType() const
const int8_t * getColumnarBuffer(size_t column_idx) const
void sortPermutation(const std::function< bool(const uint32_t, const uint32_t)> compare)
void copy_group_by_buffers_from_gpu(Data_Namespace::DataMgr *data_mgr, const std::vector< int64_t * > &group_by_buffers, const size_t groups_buffer_size, const CUdeviceptr group_by_dev_buffers_mem, const QueryMemoryDescriptor &query_mem_desc, const unsigned block_size_x, const unsigned grid_size_x, const int device_id, const bool prepend_index_buffer)
void spawn(Function &&f, Args &&...args)
static Data_Namespace::AbstractBuffer * allocGpuAbstractBuffer(Data_Namespace::DataMgr *data_mgr, const size_t num_bytes, const int device_id)
static QueryMemoryDescriptor fixupQueryMemoryDescriptor(const QueryMemoryDescriptor &)
void baselineSort(const std::list< Analyzer::OrderEntry > &order_entries, const size_t top_n, const Executor *executor)
void append(ResultSet &that)
bool operator()(const uint32_t lhs, const uint32_t rhs) const
bool didOutputColumnar() const
size_t getCurrentRowBufferIndex() const
bool g_enable_watchdog false
#define DEBUG_TIMER(name)
int8_t * getDeviceEstimatorBuffer() const
Basic constructors and methods of the row set interface.
const std::vector< int64_t > & getTargetInitVals() const
std::vector< size_t > getSlotIndicesForTargetIndices() const
const std::vector< uint32_t > & getPermutationBuffer() const
Allocate GPU memory using GpuBuffers via DataMgr.
bool can_use_parallel_algorithms(const ResultSet &rows)
int64_t getRenderTime() const
void setCachedRowCount(const size_t row_count) const
bool isDirectColumnarConversionPossible() const
size_t binSearchRowCount() const