40 #include <tbb/parallel_for.h>
55 invalidateCachedRowCount();
60 invalidateCachedRowCount();
67 const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
68 const unsigned block_size,
69 const unsigned grid_size)
71 , device_type_(device_type)
73 , query_mem_desc_(query_mem_desc)
74 , crt_row_buff_idx_(0)
78 , row_set_mem_owner_(row_set_mem_owner)
79 , block_size_(block_size)
80 , grid_size_(grid_size)
82 , separate_varlen_storage_valid_(
false)
83 , just_explain_(
false)
84 , for_validation_only_(
false)
90 , can_use_speculative_top_n_sort(std::nullopt) {}
93 const std::vector<ColumnLazyFetchInfo>& lazy_fetch_info,
94 const std::vector<std::vector<const int8_t*>>& col_buffers,
95 const std::vector<std::vector<int64_t>>& frag_offsets,
96 const std::vector<int64_t>& consistent_frag_sizes,
100 const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
101 const unsigned block_size,
102 const unsigned grid_size)
104 , device_type_(device_type)
105 , device_id_(device_id)
106 , query_mem_desc_(query_mem_desc)
107 , crt_row_buff_idx_(0)
111 , row_set_mem_owner_(row_set_mem_owner)
112 , block_size_(block_size)
113 , grid_size_(grid_size)
114 , lazy_fetch_info_(lazy_fetch_info)
115 , col_buffers_{col_buffers}
116 , frag_offsets_{frag_offsets}
117 , consistent_frag_sizes_{consistent_frag_sizes}
119 , separate_varlen_storage_valid_(
false)
120 , just_explain_(
false)
121 , for_validation_only_(
false)
123 , geo_return_type_(GeoReturnType::WktString)
125 , query_exec_time_(0)
127 , can_use_speculative_top_n_sort(std::nullopt) {}
133 : device_type_(device_type)
134 , device_id_(device_id)
136 , crt_row_buff_idx_(0)
137 , estimator_(estimator)
139 , separate_varlen_storage_valid_(
false)
140 , just_explain_(
false)
141 , for_validation_only_(
false)
143 , geo_return_type_(GeoReturnType::WktString)
145 , query_exec_time_(0)
147 , can_use_speculative_top_n_sort(std::nullopt) {
150 data_mgr_, estimator_->getBufferSize(), device_id_);
151 data_mgr->getCudaMgr()->zeroDeviceMem(device_estimator_buffer_->getMemoryPtr(),
152 estimator_->getBufferSize(),
156 host_estimator_buffer_ =
157 static_cast<int8_t*
>(
checked_calloc(estimator_->getBufferSize(), 1));
165 , separate_varlen_storage_valid_(
false)
166 , explanation_(explanation)
167 , just_explain_(
true)
168 , for_validation_only_(
false)
172 , query_exec_time_(0)
174 , can_use_speculative_top_n_sort(std::nullopt) {}
177 int64_t render_time_ms,
178 const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner)
182 , row_set_mem_owner_(row_set_mem_owner)
184 , separate_varlen_storage_valid_(
false)
185 , just_explain_(
true)
186 , for_validation_only_(
false)
188 , geo_return_type_(GeoReturnType::WktString)
190 , query_exec_time_(0)
192 , can_use_speculative_top_n_sort(std::nullopt) {}
196 if (!storage_->buff_is_provided_) {
197 CHECK(storage_->getUnderlyingBuffer());
198 free(storage_->getUnderlyingBuffer());
201 for (
auto& storage : appended_storage_) {
202 if (storage && !storage->buff_is_provided_) {
203 free(storage->getUnderlyingBuffer());
206 if (host_estimator_buffer_) {
208 free(host_estimator_buffer_);
210 if (device_estimator_buffer_) {
212 data_mgr_->free(device_estimator_buffer_);
217 std::ostringstream oss;
218 oss <<
"Result Set Info" << std::endl;
219 oss <<
"\tLayout: " << query_mem_desc_.queryDescTypeToString() << std::endl;
220 oss <<
"\tColumns: " << colCount() << std::endl;
221 oss <<
"\tRows: " << rowCount() << std::endl;
222 oss <<
"\tEntry count: " << entryCount() << std::endl;
223 const std::string is_empty = isEmpty() ?
"True" :
"False";
224 oss <<
"\tIs empty: " << is_empty << std::endl;
225 const std::string did_output_columnar = didOutputColumnar() ?
"True" :
"False;";
226 oss <<
"\tColumnar: " << did_output_columnar << std::endl;
227 oss <<
"\tLazy-fetched columns: " << getNumColumnsLazyFetched() << std::endl;
228 const std::string is_direct_columnar_conversion_possible =
229 isDirectColumnarConversionPossible() ?
"True" :
"False";
230 oss <<
"\tDirect columnar conversion possible: "
231 << is_direct_columnar_conversion_possible << std::endl;
233 size_t num_columns_zero_copy_columnarizable{0};
234 for (
size_t target_idx = 0; target_idx < targets_.size(); target_idx++) {
235 if (isZeroCopyColumnarConversionPossible(target_idx)) {
236 num_columns_zero_copy_columnarizable++;
239 oss <<
"\tZero-copy columnar conversion columns: "
240 << num_columns_zero_copy_columnarizable << std::endl;
242 oss <<
"\tPermutation size: " << permutation_.size() << std::endl;
243 oss <<
"\tLimit: " << keep_first_ << std::endl;
244 oss <<
"\tOffset: " << drop_first_ << std::endl;
254 CHECK(row_set_mem_owner_);
255 auto buff = row_set_mem_owner_->allocate(
256 query_mem_desc_.getBufferSizeBytes(device_type_), 0);
259 return storage_.get();
264 const std::vector<int64_t>& target_init_vals,
265 std::shared_ptr<VarlenOutputInfo> varlen_output_info)
const {
268 storage_.reset(
new ResultSetStorage(targets_, query_mem_desc_, buff,
true));
270 storage_->target_init_vals_ = target_init_vals;
271 if (varlen_output_info) {
272 storage_->varlen_output_info_ = varlen_output_info;
274 return storage_.get();
278 const std::vector<int64_t>& target_init_vals)
const {
280 CHECK(row_set_mem_owner_);
281 auto buff = row_set_mem_owner_->allocate(
282 query_mem_desc_.getBufferSizeBytes(device_type_), 0);
285 storage_->target_init_vals_ = target_init_vals;
286 return storage_.get();
290 if (crt_row_buff_idx_ == 0) {
291 throw std::runtime_error(
"current row buffer iteration index is undefined");
293 return crt_row_buff_idx_ - 1;
298 invalidateCachedRowCount();
299 if (!that.storage_) {
302 appended_storage_.push_back(std::move(that.storage_));
303 query_mem_desc_.setEntryCount(
304 query_mem_desc_.getEntryCount() +
305 appended_storage_.back()->query_mem_desc_.getEntryCount());
306 chunks_.insert(chunks_.end(), that.chunks_.begin(), that.chunks_.end());
308 col_buffers_.end(), that.col_buffers_.begin(), that.col_buffers_.end());
309 frag_offsets_.insert(
310 frag_offsets_.end(), that.frag_offsets_.begin(), that.frag_offsets_.end());
311 consistent_frag_sizes_.insert(consistent_frag_sizes_.end(),
312 that.consistent_frag_sizes_.begin(),
313 that.consistent_frag_sizes_.end());
315 chunk_iters_.end(), that.chunk_iters_.begin(), that.chunk_iters_.end());
316 if (separate_varlen_storage_valid_) {
317 CHECK(that.separate_varlen_storage_valid_);
318 serialized_varlen_buffer_.insert(serialized_varlen_buffer_.end(),
319 that.serialized_varlen_buffer_.begin(),
320 that.serialized_varlen_buffer_.end());
322 for (
auto& buff : that.literal_buffers_) {
323 literal_buffers_.push_back(std::move(buff));
333 auto executor = getExecutor();
335 ResultSetPtr copied_rs = std::make_shared<ResultSet>(targets_,
339 executor->blockSize(),
340 executor->gridSize());
342 auto allocate_and_copy_storage =
343 [&](
const ResultSetStorage* prev_storage) -> std::unique_ptr<ResultSetStorage> {
344 const auto& prev_qmd = prev_storage->query_mem_desc_;
345 const auto storage_size = prev_qmd.getBufferSizeBytes(device_type_);
346 auto buff = row_set_mem_owner_->allocate(storage_size, 0);
347 std::unique_ptr<ResultSetStorage> new_storage;
349 prev_storage->targets_, prev_qmd, buff,
true));
350 new_storage->target_init_vals_ = prev_storage->target_init_vals_;
351 if (prev_storage->varlen_output_info_) {
352 new_storage->varlen_output_info_ = prev_storage->varlen_output_info_;
354 memcpy(new_storage->buff_, prev_storage->buff_, storage_size);
355 new_storage->query_mem_desc_ = prev_qmd;
359 copied_rs->storage_ = allocate_and_copy_storage(storage_.get());
360 if (!appended_storage_.empty()) {
361 for (
const auto& storage : appended_storage_) {
362 copied_rs->appended_storage_.push_back(allocate_and_copy_storage(storage.get()));
365 std::copy(chunks_.begin(), chunks_.end(), std::back_inserter(copied_rs->chunks_));
368 std::back_inserter(copied_rs->chunk_iters_));
371 std::back_inserter(copied_rs->col_buffers_));
374 std::back_inserter(copied_rs->frag_offsets_));
375 std::copy(consistent_frag_sizes_.begin(),
376 consistent_frag_sizes_.end(),
377 std::back_inserter(copied_rs->consistent_frag_sizes_));
378 if (separate_varlen_storage_valid_) {
379 std::copy(serialized_varlen_buffer_.begin(),
380 serialized_varlen_buffer_.end(),
381 std::back_inserter(copied_rs->serialized_varlen_buffer_));
384 literal_buffers_.end(),
385 std::back_inserter(copied_rs->literal_buffers_));
387 lazy_fetch_info_.end(),
388 std::back_inserter(copied_rs->lazy_fetch_info_));
390 copied_rs->permutation_ = permutation_;
391 copied_rs->drop_first_ = drop_first_;
392 copied_rs->keep_first_ = keep_first_;
393 copied_rs->separate_varlen_storage_valid_ = separate_varlen_storage_valid_;
394 copied_rs->query_exec_time_ = query_exec_time_;
395 copied_rs->input_table_keys_ = input_table_keys_;
396 copied_rs->target_meta_info_ = target_meta_info_;
397 copied_rs->geo_return_type_ = geo_return_type_;
398 copied_rs->query_plan_ = query_plan_;
399 if (can_use_speculative_top_n_sort) {
400 copied_rs->can_use_speculative_top_n_sort = can_use_speculative_top_n_sort;
407 return storage_.get();
411 return just_explain_ ? 1 : targets_.size();
420 : targets_[col_idx].sql_type;
425 constexpr
bool with_generation =
true;
427 ? row_set_mem_owner_->getOrAddStringDictProxy(dict_key, with_generation)
428 : row_set_mem_owner_->getStringDictProxy(dict_key);
439 using StringId = int32_t;
440 StringId*
const string_id_ptr =
441 const_cast<StringId*
>(
reinterpret_cast<StringId const*
>(cell_ptr));
443 *string_id_ptr =
id_map_[*string_id_ptr];
454 size_t const start_idx) {
456 CHECK_EQ(targets.size(), storage_->targets_.size());
458 for (
size_t target_idx = start_idx; target_idx < targets.size(); ++target_idx) {
459 auto const& type_lhs = targets[target_idx].sql_type;
460 if (type_lhs.is_dict_encoded_string()) {
462 const_cast<SQLTypeInfo&
>(storage_->targets_[target_idx].sql_type);
463 CHECK(type_rhs.is_dict_encoded_string());
464 if (type_lhs.getStringDictKey() != type_rhs.getStringDictKey()) {
465 auto*
const sdp_lhs = getStringDictionaryProxy(type_lhs.getStringDictKey());
467 auto const*
const sdp_rhs =
468 getStringDictionaryProxy(type_rhs.getStringDictKey());
470 state.cur_target_idx_ = target_idx;
474 type_rhs.set_comp_param(type_lhs.get_comp_param());
475 type_rhs.setStringDictKey(type_lhs.getStringDictKey());
488 CHECK_LT(target_idx, lazy_fetch_info_.size());
489 auto& col_lazy_fetch = lazy_fetch_info_[target_idx];
490 CHECK(col_lazy_fetch.is_lazily_fetched);
491 int const target_size = storage_->targets_[target_idx].sql_type.get_size();
492 CHECK_LT(0, target_size) << storage_->targets_[target_idx].toString();
493 size_t const nrows = storage_->binSearchRowCount();
500 : query_mem_desc_.getEffectiveKeyWidth();
503 size_t const next_target_idx = j + 1;
506 auto const& next_agg_info = storage_->targets_[next_target_idx];
511 : query_mem_desc_.getEffectiveKeyWidth();
513 for (
size_t i = 0; i < nrows; ++i) {
517 auto& frag_col_buffers = getColumnFrag(0, target_idx, pos);
518 CHECK_LT(
size_t(col_lazy_fetch.local_col_id), frag_col_buffers.size());
519 int8_t
const*
const col_frag = frag_col_buffers[col_lazy_fetch.local_col_id];
520 func(col_frag + pos * target_size);
523 size_t const key_bytes_with_padding =
525 for (
size_t i = 0; i < nrows; ++i) {
526 int8_t
const*
const keys_ptr =
row_ptr_rowwise(storage_->buff_, storage_qmd, i);
527 int8_t
const*
const rowwise_target_ptr = keys_ptr + key_bytes_with_padding;
528 int64_t pos = *
reinterpret_cast<int64_t const*
>(rowwise_target_ptr);
529 auto& frag_col_buffers = getColumnFrag(0, target_idx, pos);
530 CHECK_LT(
size_t(col_lazy_fetch.local_col_id), frag_col_buffers.size());
531 int8_t
const*
const col_frag = frag_col_buffers[col_lazy_fetch.local_col_id];
532 func(col_frag + pos * target_size);
540 if (total_row_count < offset) {
544 size_t total_truncated_row_count = total_row_count - offset;
547 return std::min(total_truncated_row_count, limit);
550 return total_truncated_row_count;
562 if (!permutation_.empty()) {
570 CHECK(permutation_.empty());
572 return binSearchRowCount();
575 constexpr
size_t auto_parallel_row_count_threshold{20000UL};
576 if (force_parallel || entryCount() >= auto_parallel_row_count_threshold) {
577 return parallelRowCount();
579 std::lock_guard<std::mutex> lock(row_iteration_mutex_);
583 auto crt_row = getNextRowUnlocked(
false,
false);
584 if (crt_row.empty()) {
596 const int64_t cached_row_count = cached_row_count_;
599 return cached_row_count;
601 setCachedRowCount(rowCountImpl(force_parallel));
602 return cached_row_count_;
610 const int64_t signed_row_count =
static_cast<int64_t
>(row_count);
611 const int64_t old_cached_row_count = cached_row_count_.exchange(signed_row_count);
613 old_cached_row_count == signed_row_count);
621 size_t row_count = storage_->binSearchRowCount();
622 for (
auto& s : appended_storage_) {
623 row_count += s->binSearchRowCount();
630 using namespace threading;
631 auto execute_parallel_row_count =
633 const blocked_range<size_t>& r,
size_t row_count) {
635 for (
size_t i = r.begin(); i < r.end(); ++i) {
636 if (!isRowAtEmpty(i)) {
642 const auto row_count =
parallel_reduce(blocked_range<size_t>(0, entryCount()),
644 execute_parallel_row_count,
665 return rowCount() == size_t(0);
669 return (!storage_ && !estimator_ && !just_explain_) || cached_row_count_ == 0;
674 return storage_->query_mem_desc_;
683 return storage_->target_init_vals_;
688 CHECK(device_estimator_buffer_);
689 return device_estimator_buffer_->getMemoryPtr();
693 return host_estimator_buffer_;
698 CHECK(!host_estimator_buffer_);
699 CHECK_EQ(
size_t(0), estimator_->getBufferSize() %
sizeof(int64_t));
700 host_estimator_buffer_ =
701 static_cast<int8_t*
>(
checked_calloc(estimator_->getBufferSize(), 1));
702 CHECK(device_estimator_buffer_);
703 auto device_buffer_ptr = device_estimator_buffer_->getMemoryPtr();
704 auto allocator = std::make_unique<CudaAllocator>(
706 allocator->copyFromDevice(
707 host_estimator_buffer_, device_buffer_ptr, estimator_->getBufferSize());
711 timings_.executor_queue_time = queue_time;
715 timings_.kernel_queue_time = kernel_queue_time;
719 timings_.compilation_queue_time += compilation_queue_time;
723 return timings_.executor_queue_time + timings_.kernel_queue_time +
724 timings_.compilation_queue_time;
728 return timings_.render_time;
732 crt_row_buff_idx_ = 0;
737 return keep_first_ + drop_first_;
741 return just_explain_;
745 for_validation_only_ =
true;
749 return for_validation_only_;
759 query_mem_desc_copy.resetGroupColWidths(
760 std::vector<int8_t>(query_mem_desc_copy.getGroupbyColCount(), 8));
762 return query_mem_desc_copy;
764 query_mem_desc_copy.alignPaddedSlots();
765 return query_mem_desc_copy;
770 const Executor* executor) {
776 invalidateCachedRowCount();
777 CHECK(!targets_.empty());
779 if (canUseFastBaselineSort(order_entries, top_n)) {
780 baselineSort(order_entries, top_n, executor);
784 if (query_mem_desc_.sortOnGpu()) {
786 radixSortOnGpu(order_entries);
788 LOG(
WARNING) <<
"Out of GPU memory during sort, finish on CPU";
789 radixSortOnCpu(order_entries);
790 }
catch (
const std::bad_alloc&) {
791 LOG(
WARNING) <<
"Out of GPU memory during sort, finish on CPU";
792 radixSortOnCpu(order_entries);
797 if (query_mem_desc_.getEntryCount() > std::numeric_limits<uint32_t>::max()) {
801 CHECK(permutation_.empty());
807 parallelTop(order_entries, top_n, executor);
812 permutation_.resize(query_mem_desc_.getEntryCount());
815 pv = initPermutationBuffer(pv, 0, permutation_.size());
819 pv = topPermutation(pv, top_n, createComparator(order_entries, pv, executor,
false));
820 if (pv.size() < permutation_.size()) {
821 permutation_.resize(pv.size());
822 permutation_.shrink_to_fit();
830 const Executor* executor) {
833 if (getGpuCount() > 1) {
851 const auto storage_lookup_result = findStorage(i);
852 const auto lhs_storage = storage_lookup_result.storage_ptr;
853 const auto off = storage_lookup_result.fixedup_entry_idx;
855 if (!lhs_storage->isEmptyEntry(off)) {
868 const Executor* executor) {
873 permutation_.resize(query_mem_desc_.getEntryCount());
874 std::vector<PermutationView> permutation_views(nthreads);
876 for (
auto interval : makeIntervals<PermutationIdx>(0, permutation_.size(), nthreads)) {
877 top_sort_threads.
run([
this,
885 PermutationView pv(permutation_.data() + interval.begin, 0, interval.size());
886 pv = initPermutationBuffer(pv, interval.begin, interval.end);
887 const auto compare = createComparator(order_entries, pv, executor,
true);
888 permutation_views[interval.index] = topPermutation(pv, top_n, compare);
891 top_sort_threads.
wait();
900 auto end = permutation_.begin() + permutation_views.front().size();
901 for (
size_t i = 1; i < nthreads; ++i) {
902 std::copy(permutation_views[i].begin(), permutation_views[i].end(), end);
903 end += permutation_views[i].size();
908 const auto compare = createComparator(order_entries, pv, executor,
false);
909 pv = topPermutation(pv, top_n, compare);
910 permutation_.resize(pv.size());
911 permutation_.shrink_to_fit();
915 size_t fixedup_entry_idx = entry_idx;
916 auto entry_count = storage_->query_mem_desc_.getEntryCount();
917 const bool is_rowwise_layout = !storage_->query_mem_desc_.didOutputColumnar();
918 if (fixedup_entry_idx < entry_count) {
919 return {0, fixedup_entry_idx};
921 fixedup_entry_idx -= entry_count;
922 for (
size_t i = 0; i < appended_storage_.size(); ++i) {
923 const auto& desc = appended_storage_[i]->query_mem_desc_;
924 CHECK_NE(is_rowwise_layout, desc.didOutputColumnar());
925 entry_count = desc.getEntryCount();
926 if (fixedup_entry_idx < entry_count) {
927 return {i + 1, fixedup_entry_idx};
929 fixedup_entry_idx -= entry_count;
931 UNREACHABLE() <<
"entry_idx = " << entry_idx <<
", query_mem_desc_.getEntryCount() = "
932 << query_mem_desc_.getEntryCount();
940 auto [stg_idx, fixedup_entry_idx] = getStorageIndex(entry_idx);
941 return {stg_idx ? appended_storage_[stg_idx - 1].get() : storage_.get(),
946 template <
typename BUFFER_ITERATOR_TYPE>
948 BUFFER_ITERATOR_TYPE>::materializeCountDistinctColumns() {
949 for (
const auto& order_entry : order_entries_) {
951 count_distinct_materialized_buffers_.emplace_back(
952 materializeCountDistinctColumn(order_entry));
962 : targets_(targets), agg_kind_(agg_kind) {}
964 return targets_[order_entry.
tle_no - 1].agg_kind == agg_kind_;
969 template <
typename BUFFER_ITERATOR_TYPE>
971 BUFFER_ITERATOR_TYPE>::materializeApproxQuantileColumns()
const {
973 for (
const auto& order_entry : order_entries_) {
974 if (result_set_->targets_[order_entry.tle_no - 1].agg_kind ==
kAPPROX_QUANTILE) {
975 approx_quantile_materialized_buffers.emplace_back(
976 materializeApproxQuantileColumn(order_entry));
979 return approx_quantile_materialized_buffers;
982 template <
typename BUFFER_ITERATOR_TYPE>
986 IsAggKind
const is_mode(result_set_->targets_,
kMODE);
987 mode_buffers.reserve(
988 std::count_if(order_entries_.begin(), order_entries_.end(), is_mode));
989 for (
auto const& order_entry : order_entries_) {
990 if (is_mode(order_entry)) {
991 mode_buffers.emplace_back(materializeModeColumn(order_entry));
997 template <
typename BUFFER_ITERATOR_TYPE>
1001 const size_t num_storage_entries = result_set_->query_mem_desc_.getEntryCount();
1002 std::vector<int64_t> count_distinct_materialized_buffer(num_storage_entries);
1004 result_set_->query_mem_desc_.getCountDistinctDescriptor(order_entry.
tle_no - 1);
1005 const size_t num_non_empty_entries = permutation_.size();
1008 const size_t start,
const size_t end) {
1010 for (
size_t i = start; i < end; ++i) {
1012 const auto storage_lookup_result = result_set_->findStorage(permuted_idx);
1013 const auto storage = storage_lookup_result.storage_ptr;
1014 const auto off = storage_lookup_result.fixedup_entry_idx;
1015 const auto value = buffer_itr_.getColumnInternal(
1016 storage->buff_, off, order_entry.
tle_no - 1, storage_lookup_result);
1017 count_distinct_materialized_buffer[permuted_idx] =
1023 if (single_threaded_) {
1024 work(0, num_non_empty_entries);
1027 for (
auto interval : makeIntervals<size_t>(0, num_non_empty_entries,
cpu_threads())) {
1028 thread_pool.
run([=] { work(interval.begin, interval.end); });
1032 return count_distinct_materialized_buffer;
1039 double const quantile = t_digest->
quantile();
1040 return boost::math::isnan(quantile) ?
NULL_DOUBLE : quantile;
1043 template <
typename BUFFER_ITERATOR_TYPE>
1044 ResultSet::ApproxQuantileBuffers::value_type
1047 ResultSet::ApproxQuantileBuffers::value_type materialized_buffer(
1048 result_set_->query_mem_desc_.getEntryCount());
1049 const size_t size = permutation_.size();
1051 const size_t start,
const size_t end) {
1053 for (
size_t i = start; i < end; ++i) {
1055 const auto storage_lookup_result = result_set_->findStorage(permuted_idx);
1056 const auto storage = storage_lookup_result.storage_ptr;
1057 const auto off = storage_lookup_result.fixedup_entry_idx;
1058 const auto value = buffer_itr_.getColumnInternal(
1059 storage->buff_, off, order_entry.
tle_no - 1, storage_lookup_result);
1060 materialized_buffer[permuted_idx] =
1061 value.i1 ? calculateQuantile(reinterpret_cast<quantile::TDigest*>(value.i1))
1065 if (single_threaded_) {
1069 for (
auto interval : makeIntervals<size_t>(0, size,
cpu_threads())) {
1070 thread_pool.
run([=] { work(interval.begin, interval.end); });
1074 return materialized_buffer;
1080 if (
auto const*
const agg_mode = reinterpret_cast<AggMode const*>(i1)) {
1081 if (std::optional<int64_t>
const mode = agg_mode->mode()) {
1091 template <
typename BUFFER_ITERATOR_TYPE>
1100 for (
size_t i = r.begin(); i != r.end(); ++i) {
1102 auto const storage_lookup_result = rsc_->result_set_->findStorage(permuted_idx);
1103 auto const storage = storage_lookup_result.storage_ptr;
1104 auto const off = storage_lookup_result.fixedup_entry_idx;
1105 auto const value = rsc_->buffer_itr_.getColumnInternal(
1106 storage->buff_, off, order_entry_.tle_no - 1, storage_lookup_result);
1112 template <
typename BUFFER_ITERATOR_TYPE>
1113 ResultSet::ModeBuffers::value_type
1116 ResultSet::ModeBuffers::value_type materialized_buffer(
1117 result_set_->query_mem_desc_.getEntryCount());
1118 ModeScatter mode_scatter{
1120 if (single_threaded_) {
1125 return materialized_buffer;
1128 template <
typename BUFFER_ITERATOR_TYPE>
1134 const auto lhs_storage_lookup_result = result_set_->findStorage(lhs);
1135 const auto rhs_storage_lookup_result = result_set_->findStorage(rhs);
1136 const auto lhs_storage = lhs_storage_lookup_result.storage_ptr;
1137 const auto rhs_storage = rhs_storage_lookup_result.storage_ptr;
1138 const auto fixedup_lhs = lhs_storage_lookup_result.fixedup_entry_idx;
1139 const auto fixedup_rhs = rhs_storage_lookup_result.fixedup_entry_idx;
1140 size_t materialized_count_distinct_buffer_idx{0};
1141 size_t materialized_approx_quantile_buffer_idx{0};
1142 size_t materialized_mode_buffer_idx{0};
1144 for (
const auto& order_entry : order_entries_) {
1147 const auto& lhs_agg_info = lhs_storage->targets_[order_entry.tle_no - 1];
1148 const auto& rhs_agg_info = rhs_storage->targets_[order_entry.tle_no - 1];
1158 if (lhs_entry_ti.get_type() ==
kFLOAT) {
1159 const auto is_col_lazy =
1160 !result_set_->lazy_fetch_info_.empty() &&
1161 result_set_->lazy_fetch_info_[order_entry.tle_no - 1].is_lazily_fetched;
1162 if (result_set_->query_mem_desc_.getPaddedSlotWidthBytes(order_entry.tle_no - 1) ==
1164 float_argument_input =
1165 result_set_->query_mem_desc_.didOutputColumnar() ? !is_col_lazy :
true;
1170 CHECK_LT(materialized_count_distinct_buffer_idx,
1171 count_distinct_materialized_buffers_.size());
1173 const auto& count_distinct_materialized_buffer =
1174 count_distinct_materialized_buffers_[materialized_count_distinct_buffer_idx];
1175 const auto lhs_sz = count_distinct_materialized_buffer[lhs];
1176 const auto rhs_sz = count_distinct_materialized_buffer[rhs];
1177 ++materialized_count_distinct_buffer_idx;
1178 if (lhs_sz == rhs_sz) {
1181 return (lhs_sz < rhs_sz) != order_entry.is_desc;
1183 CHECK_LT(materialized_approx_quantile_buffer_idx,
1184 approx_quantile_materialized_buffers_.size());
1185 const auto& approx_quantile_materialized_buffer =
1186 approx_quantile_materialized_buffers_[materialized_approx_quantile_buffer_idx];
1187 const auto lhs_value = approx_quantile_materialized_buffer[lhs];
1188 const auto rhs_value = approx_quantile_materialized_buffer[rhs];
1189 ++materialized_approx_quantile_buffer_idx;
1190 if (lhs_value == rhs_value) {
1192 }
else if (!lhs_entry_ti.get_notnull()) {
1194 return order_entry.nulls_first;
1196 return !order_entry.nulls_first;
1199 return (lhs_value < rhs_value) != order_entry.is_desc;
1201 CHECK_LT(materialized_mode_buffer_idx, mode_buffers_.size());
1202 auto const& mode_buffer = mode_buffers_[materialized_mode_buffer_idx++];
1203 int64_t
const lhs_value = mode_buffer[lhs];
1204 int64_t
const rhs_value = mode_buffer[rhs];
1205 if (lhs_value == rhs_value) {
1209 return order_entry.nulls_first;
1211 return !order_entry.nulls_first;
1213 return result_set_->isLessThan(lhs_entry_ti, lhs_value, rhs_value) !=
1214 order_entry.is_desc;
1218 const auto lhs_v = buffer_itr_.getColumnInternal(lhs_storage->buff_,
1220 order_entry.tle_no - 1,
1221 lhs_storage_lookup_result);
1222 const auto rhs_v = buffer_itr_.getColumnInternal(rhs_storage->buff_,
1224 order_entry.tle_no - 1,
1225 rhs_storage_lookup_result);
1227 if (
UNLIKELY(isNull(lhs_entry_ti, lhs_v, float_argument_input) &&
1228 isNull(rhs_entry_ti, rhs_v, float_argument_input))) {
1231 if (
UNLIKELY(isNull(lhs_entry_ti, lhs_v, float_argument_input) &&
1232 !isNull(rhs_entry_ti, rhs_v, float_argument_input))) {
1233 return order_entry.nulls_first;
1235 if (
UNLIKELY(isNull(rhs_entry_ti, rhs_v, float_argument_input) &&
1236 !isNull(lhs_entry_ti, lhs_v, float_argument_input))) {
1237 return !order_entry.nulls_first;
1240 if (
LIKELY(lhs_v.isInt())) {
1241 CHECK(rhs_v.isInt());
1242 if (
UNLIKELY(lhs_entry_ti.is_string() &&
1244 CHECK_EQ(4, lhs_entry_ti.get_logical_size());
1246 const auto lhs_string_dict_proxy =
executor_->getStringDictionaryProxy(
1247 lhs_entry_ti.getStringDictKey(), result_set_->row_set_mem_owner_,
false);
1248 const auto rhs_string_dict_proxy =
executor_->getStringDictionaryProxy(
1249 rhs_entry_ti.getStringDictKey(), result_set_->row_set_mem_owner_,
false);
1250 const auto lhs_str = lhs_string_dict_proxy->getString(lhs_v.i1);
1251 const auto rhs_str = rhs_string_dict_proxy->getString(rhs_v.i1);
1252 if (lhs_str == rhs_str) {
1255 return (lhs_str < rhs_str) != order_entry.is_desc;
1258 if (lhs_v.i1 == rhs_v.i1) {
1261 if (lhs_entry_ti.is_fp()) {
1262 if (float_argument_input) {
1263 const auto lhs_dval = *
reinterpret_cast<const float*
>(may_alias_ptr(&lhs_v.i1));
1264 const auto rhs_dval = *
reinterpret_cast<const float*
>(may_alias_ptr(&rhs_v.i1));
1265 return (lhs_dval < rhs_dval) != order_entry.is_desc;
1267 const auto lhs_dval =
1268 *
reinterpret_cast<const double*
>(may_alias_ptr(&lhs_v.i1));
1269 const auto rhs_dval =
1270 *
reinterpret_cast<const double*
>(may_alias_ptr(&rhs_v.i1));
1271 return (lhs_dval < rhs_dval) != order_entry.is_desc;
1274 return (lhs_v.i1 < rhs_v.i1) != order_entry.is_desc;
1276 if (lhs_v.isPair()) {
1277 CHECK(rhs_v.isPair());
1279 pair_to_double({lhs_v.i1, lhs_v.i2}, lhs_entry_ti, float_argument_input);
1281 pair_to_double({rhs_v.i1, rhs_v.i2}, rhs_entry_ti, float_argument_input);
1285 return (lhs < rhs) != order_entry.is_desc;
1287 CHECK(lhs_v.isStr() && rhs_v.isStr());
1288 const auto lhs = lhs_v.strVal();
1289 const auto rhs = rhs_v.strVal();
1293 return (lhs < rhs) != order_entry.is_desc;
1307 if (n < permutation.
size()) {
1309 permutation.
begin(), permutation.
begin() +
n, permutation.
end(), compare);
1318 const std::list<Analyzer::OrderEntry>& order_entries)
const {
1321 const int device_id{0};
1322 auto allocator = std::make_unique<CudaAllocator>(
1326 std::vector<int64_t*> group_by_buffers(block_size_);
1327 group_by_buffers[0] =
reinterpret_cast<int64_t*
>(storage_->getUnderlyingBuffer());
1328 auto dev_group_by_buffers =
1343 order_entries, query_mem_desc_, dev_group_by_buffers, data_mgr, device_id);
1348 dev_group_by_buffers.data,
1358 const std::list<Analyzer::OrderEntry>& order_entries)
const {
1360 CHECK(!query_mem_desc_.hasKeylessHash());
1361 std::vector<int64_t> tmp_buff(query_mem_desc_.getEntryCount());
1362 std::vector<int32_t> idx_buff(query_mem_desc_.getEntryCount());
1363 CHECK_EQ(
size_t(1), order_entries.size());
1364 auto buffer_ptr = storage_->getUnderlyingBuffer();
1365 for (
const auto& order_entry : order_entries) {
1366 const auto target_idx = order_entry.tle_no - 1;
1367 const auto sortkey_val_buff =
reinterpret_cast<int64_t*
>(
1368 buffer_ptr + query_mem_desc_.getColOffInBytes(target_idx));
1369 const auto chosen_bytes = query_mem_desc_.getPaddedSlotWidthBytes(target_idx);
1372 query_mem_desc_.getEntryCount(),
1373 order_entry.is_desc,
1377 query_mem_desc_.getEntryCount(),
1380 for (
size_t target_idx = 0; target_idx < query_mem_desc_.getSlotCount();
1382 if (static_cast<int>(target_idx) == order_entry.tle_no - 1) {
1385 const auto chosen_bytes = query_mem_desc_.getPaddedSlotWidthBytes(target_idx);
1386 const auto satellite_val_buff =
reinterpret_cast<int64_t*
>(
1387 buffer_ptr + query_mem_desc_.getColOffInBytes(target_idx));
1390 query_mem_desc_.getEntryCount(),
1404 row_set_mem_owner_->getOrAddStringDictProxy(dict_key,
true);
1406 return sdp->getDictionary()->copyStrings();
1409 const std::pair<std::vector<int32_t>, std::vector<std::string>>
1411 const auto col_type_info = getColType(col_idx);
1412 std::unordered_set<int32_t> unique_string_ids_set;
1413 const size_t num_entries = entryCount();
1414 std::vector<bool> targets_to_skip(colCount(),
true);
1415 targets_to_skip[col_idx] =
false;
1416 CHECK(col_type_info.is_dict_encoded_type());
1418 col_type_info.is_array() ? col_type_info.get_elem_type() : col_type_info);
1420 for (
size_t row_idx = 0; row_idx < num_entries; ++row_idx) {
1421 const auto result_row = getRowAtNoTranslations(row_idx, targets_to_skip);
1422 if (!result_row.empty()) {
1423 if (
const auto scalar_col_val =
1424 boost::get<ScalarTargetValue>(&result_row[col_idx])) {
1425 const int32_t string_id =
1426 static_cast<int32_t
>(boost::get<int64_t>(*scalar_col_val));
1427 if (string_id != null_val) {
1428 unique_string_ids_set.emplace(string_id);
1430 }
else if (
const auto array_col_val =
1431 boost::get<ArrayTargetValue>(&result_row[col_idx])) {
1432 if (*array_col_val) {
1434 const int32_t string_id =
static_cast<int32_t
>(boost::get<int64_t>(scalar));
1435 if (string_id != null_val) {
1436 unique_string_ids_set.emplace(string_id);
1444 const size_t num_unique_strings = unique_string_ids_set.size();
1445 std::vector<int32_t> unique_string_ids(num_unique_strings);
1446 size_t string_idx{0};
1447 for (
const auto unique_string_id : unique_string_ids_set) {
1448 unique_string_ids[string_idx++] = unique_string_id;
1451 const auto sdp = row_set_mem_owner_->getOrAddStringDictProxy(
1452 col_type_info.getStringDictKey(),
true);
1455 return std::make_pair(unique_string_ids, sdp->getStrings(unique_string_ids));
1468 }
else if (query_mem_desc_.didOutputColumnar()) {
1469 return permutation_.empty() && (query_mem_desc_.getQueryDescriptionType() ==
1471 query_mem_desc_.getQueryDescriptionType() ==
1473 query_mem_desc_.getQueryDescriptionType() ==
1475 query_mem_desc_.getQueryDescriptionType() ==
1478 CHECK(!(query_mem_desc_.getQueryDescriptionType() ==
1480 return permutation_.empty() && (query_mem_desc_.getQueryDescriptionType() ==
1482 query_mem_desc_.getQueryDescriptionType() ==
1488 return query_mem_desc_.didOutputColumnar() &&
1490 query_mem_desc_.getQueryDescriptionType() ==
1492 appended_storage_.empty() && storage_ &&
1493 (lazy_fetch_info_.empty() || !lazy_fetch_info_[column_idx].is_lazily_fetched);
1497 CHECK(isZeroCopyColumnarConversionPossible(column_idx));
1498 return storage_->getUnderlyingBuffer() + query_mem_desc_.getColOffInBytes(column_idx);
1503 std::vector<bool> target_bitmap(targets_.size(),
true);
1504 size_t num_single_slot_targets = 0;
1505 for (
size_t target_idx = 0; target_idx < targets_.size(); target_idx++) {
1506 const auto& sql_type = targets_[target_idx].sql_type;
1507 if (targets_[target_idx].
is_agg && targets_[target_idx].agg_kind ==
kAVG) {
1508 target_bitmap[target_idx] =
false;
1509 }
else if (sql_type.is_varlen()) {
1510 target_bitmap[target_idx] =
false;
1512 num_single_slot_targets++;
1515 return std::make_tuple(std::move(target_bitmap), num_single_slot_targets);
1528 CHECK(isDirectColumnarConversionPossible());
1529 auto [single_slot_targets, num_single_slot_targets] = getSingleSlotTargetBitmap();
1531 for (
size_t target_idx = 0; target_idx < single_slot_targets.size(); target_idx++) {
1532 const auto& target = targets_[target_idx];
1533 if (single_slot_targets[target_idx] &&
1535 shared::is_any<kAPPROX_QUANTILE, kMODE>(target.agg_kind) ||
1536 (target.is_agg && target.agg_kind ==
kSAMPLE && target.sql_type ==
kFLOAT))) {
1537 single_slot_targets[target_idx] =
false;
1538 num_single_slot_targets--;
1541 CHECK_GE(num_single_slot_targets,
size_t(0));
1542 return std::make_tuple(std::move(single_slot_targets), num_single_slot_targets);
1547 std::vector<size_t> slot_indices(targets_.size(), 0);
1548 size_t slot_index = 0;
1549 for (
size_t target_idx = 0; target_idx < targets_.size(); target_idx++) {
1550 slot_indices[target_idx] = slot_index;
1551 slot_index =
advance_slot(slot_index, targets_[target_idx],
false);
1553 return slot_indices;
1559 return !rows.isTruncated();
1571 std::vector<TargetInfo>
const& targets) {
1572 auto const itr = std::find_if(targets.begin(), targets.end(), IsDictEncodedStr{});
1573 return itr == targets.end() ? std::nullopt
1574 : std::make_optional<size_t>(itr - targets.begin());
bool is_agg(const Analyzer::Expr *expr)
void syncEstimatorBuffer() const
Analyzer::OrderEntry const & order_entry_
const QueryMemoryDescriptor & getQueryMemDesc() const
void sort_groups_cpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, const bool desc, const uint32_t chosen_bytes)
GpuGroupByBuffers create_dev_group_by_buffers(DeviceAllocator *device_allocator, const std::vector< int64_t * > &group_by_buffers, const QueryMemoryDescriptor &query_mem_desc, const unsigned block_size_x, const unsigned grid_size_x, const int device_id, const ExecutorDispatchMode dispatch_mode, const int64_t num_input_rows, const bool prepend_index_buffer, const bool always_init_group_by_on_host, const bool use_bump_allocator, const bool has_varlen_output, Allocator *insitu_allocator)
size_t g_parallel_top_max
std::pair< size_t, size_t > getStorageIndex(const size_t entry_idx) const
DEVICE void push_back(T const &value)
bool isValidationOnlyRes() const
void setValidationOnlyRes()
PermutationView initPermutationBuffer(PermutationView permutation, PermutationIdx const begin, PermutationIdx const end) const
bool g_enable_direct_columnarization
StringDictionaryProxy * getStringDictionaryProxy(const shared::StringDictKey &dict_key) const
T advance_to_next_columnar_target_buff(T target_ptr, const QueryMemoryDescriptor &query_mem_desc, const size_t target_slot_idx)
ResultSet::ResultSetComparator< BUFFER_ITERATOR_TYPE > const *const rsc_
constexpr QueryPlanHash EMPTY_HASHED_PLAN_DAG_KEY
DEVICE RealType quantile(VectorView< IndexType const > const partial_sum, RealType const q) const
static const size_t baseline_threshold
DEVICE void sort(ARGS &&...args)
const std::vector< TargetInfo > & getTargetInfos() const
std::optional< size_t > first_dict_encoded_idx(std::vector< TargetInfo > const &)
void setKernelQueueTime(const int64_t kernel_queue_time)
size_t rowCount(const bool force_parallel=false) const
Returns the number of valid entries in the result set (i.e that will be returned from the SQL query o...
std::shared_ptr< ResultSet > ResultSetPtr
CellCallback(StringDictionaryProxy::IdMap &&id_map, int64_t const null_int)
int64_t read_int_from_buff(const int8_t *ptr, const int8_t compact_sz)
void keepFirstN(const size_t n)
size_t g_streaming_topn_max
double pair_to_double(const std::pair< int64_t, int64_t > &fp_pair, const SQLTypeInfo &ti, const bool float_argument_input)
void addCompilationQueueTime(const int64_t compilation_queue_time)
std::vector< std::vector< double >> ApproxQuantileBuffers
bool takes_float_argument(const TargetInfo &target_info)
void parallelTop(const std::list< Analyzer::OrderEntry > &order_entries, const size_t top_n, const Executor *executor)
void inplace_sort_gpu(const std::list< Analyzer::OrderEntry > &order_entries, const QueryMemoryDescriptor &query_mem_desc, const GpuGroupByBuffers &group_by_buffers, Data_Namespace::DataMgr *data_mgr, const int device_id)
void apply_permutation_cpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, int64_t *tmp_buff, const uint32_t chosen_bytes)
DEVICE void resize(size_type const size)
std::vector< int64_t > materializeCountDistinctColumn(const Analyzer::OrderEntry &order_entry) const
ApproxQuantileBuffers::value_type materializeApproxQuantileColumn(const Analyzer::OrderEntry &order_entry) const
size_t get_truncated_row_count(size_t total_row_count, size_t limit, size_t offset)
size_t parallelRowCount() const
DEVICE void mergeBufferFinal()
void radixSortOnCpu(const std::list< Analyzer::OrderEntry > &order_entries) const
const SQLTypeInfo get_compact_type(const TargetInfo &target)
bool definitelyHasNoRows() const
bool use_parallel_algorithms(const ResultSet &rows)
bool isZeroCopyColumnarConversionPossible(size_t column_idx) const
size_t g_parallel_top_min
int8_t * getHostEstimatorBuffer() const
Data_Namespace::DataMgr & getDataMgr() const
DEVICE size_type size() const
void invalidateCachedRowCount() const
IsAggKind(std::vector< TargetInfo > const &targets, SQLAgg const agg_kind)
static SysCatalog & instance()
const ResultSetStorage * allocateStorage() const
size_t advance_slot(const size_t j, const TargetInfo &target_info, const bool separate_varlen_storage)
int64_t count_distinct_set_size(const int64_t set_handle, const CountDistinctDescriptor &count_distinct_desc)
bool operator()(TargetInfo const &target_info) const
void sort(const std::list< Analyzer::OrderEntry > &order_entries, size_t top_n, const Executor *executor)
DEVICE auto copy(ARGS &&...args)
void setQueueTime(const int64_t queue_time)
void dropFirstN(const size_t n)
std::vector< PermutationIdx > Permutation
std::tuple< std::vector< bool >, size_t > getSingleSlotTargetBitmap() const
void * checked_calloc(const size_t nmemb, const size_t size)
StorageLookupResult findStorage(const size_t entry_idx) const
bool is_distinct_target(const TargetInfo &target_info)
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
std::function< bool(const PermutationIdx, const PermutationIdx)> Comparator
logger::ThreadLocalIds const parent_thread_local_ids_
bool g_enable_smem_group_by true
This file includes the class specification for the buffer manager (BufferMgr), and related data struc...
static double calculateQuantile(quantile::TDigest *const t_digest)
std::vector< TargetInfo > const & targets_
T row_ptr_rowwise(T buff, const QueryMemoryDescriptor &query_mem_desc, const size_t entry_idx)
void radixSortOnGpu(const std::list< Analyzer::OrderEntry > &order_entries) const
const ResultSetStorage * getStorage() const
Value parallel_reduce(const blocked_range< Int > &range, const Value &identity, const RealBody &real_body, const Reduction &reduction, const Partitioner &p=Partitioner())
Parallel iteration with reduction.
const std::pair< std::vector< int32_t >, std::vector< std::string > > getUniqueStringsForDictEncodedTargetCol(const size_t col_idx) const
int64_t getQueueTime() const
ModeBuffers::value_type materializeModeColumn(const Analyzer::OrderEntry &order_entry) const
void translate_string_ids(std::vector< int32_t > &dest_ids, const LeafHostInfo &dict_server_host, const shared::StringDictKey &dest_dict_key, const std::vector< int32_t > &source_ids, const shared::StringDictKey &source_dict_key, const int32_t dest_generation)
SQLTypeInfo getColType(const size_t col_idx) const
std::tuple< std::vector< bool >, size_t > getSupportedSingleSlotTargetBitmap() const
ExecutorDeviceType getDeviceType() const
const int8_t * getColumnarBuffer(size_t column_idx) const
void eachCellInColumn(RowIterationState &, CellCallback const &)
StringDictionaryProxy::IdMap const id_map_
const std::vector< std::string > getStringDictionaryPayloadCopy(const shared::StringDictKey &dict_key) const
static Data_Namespace::AbstractBuffer * allocGpuAbstractBuffer(Data_Namespace::DataMgr *data_mgr, const size_t num_bytes, const int device_id)
static QueryMemoryDescriptor fixupQueryMemoryDescriptor(const QueryMemoryDescriptor &)
CUstream getQueryEngineCudaStreamForDevice(int device_num)
size_t rowCountImpl(const bool force_parallel) const
void baselineSort(const std::list< Analyzer::OrderEntry > &order_entries, const size_t top_n, const Executor *executor)
const Permutation & getPermutationBuffer() const
void parallel_for(const blocked_range< Int > &range, const Body &body, const Partitioner &p=Partitioner())
void append(ResultSet &that)
std::string summaryToString() const
bool didOutputColumnar() const
static PermutationView topPermutation(PermutationView, const size_t n, const Comparator &)
size_t getCurrentRowBufferIndex() const
bool g_enable_watchdog false
#define DEBUG_TIMER(name)
int8_t * getDeviceEstimatorBuffer() const
int64_t materializeMode(int64_t const i1)
tbb::blocked_range< size_t > ModeBlockedRange
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
bool operator()(const PermutationIdx lhs, const PermutationIdx rhs) const
Basic constructors and methods of the row set interface.
bool operator()(Analyzer::OrderEntry const &order_entry) const
bool isEmpty() const
Returns a boolean signifying whether there are valid entries in the result set.
bool is_dict_encoded_string() const
const std::vector< int64_t > & getTargetInitVals() const
std::vector< size_t > getSlotIndicesForTargetIndices() const
Allocate GPU memory using GpuBuffers via DataMgr.
Execution unit for relational algebra. It's a low-level description of any relational algebra operati...
constexpr int64_t uninitialized_cached_row_count
void operator()(ModeBlockedRange const &r) const
T get_cols_ptr(T buff, const QueryMemoryDescriptor &query_mem_desc)
void translateDictEncodedColumns(std::vector< TargetInfo > const &, size_t const start_idx)
Divide up indexes (A, A+1, A+2, ..., B-2, B-1) among N workers as evenly as possible in a range-based...
void copy_group_by_buffers_from_gpu(DeviceAllocator &device_allocator, const std::vector< int64_t * > &group_by_buffers, const size_t groups_buffer_size, const int8_t *group_by_dev_buffers_mem, const QueryMemoryDescriptor &query_mem_desc, const unsigned block_size_x, const unsigned grid_size_x, const int device_id, const bool prepend_index_buffer, const bool has_varlen_output)
void operator()(int8_t const *const cell_ptr) const
bool can_use_parallel_algorithms(const ResultSet &rows)
int64_t getRenderTime() const
ResultSet(const std::vector< TargetInfo > &targets, const ExecutorDeviceType device_type, const QueryMemoryDescriptor &query_mem_desc, const std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const unsigned block_size, const unsigned grid_size)
void setCachedRowCount(const size_t row_count) const
bool isDirectColumnarConversionPossible() const
size_t get_key_bytes_rowwise(const QueryMemoryDescriptor &query_mem_desc)
ThreadLocalIds thread_local_ids()
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)
ModeBuffers materializeModeColumns() const
ResultSet::ModeBuffers::value_type & materialized_buffer_
static constexpr int32_t literalsDictId
size_t binSearchRowCount() const
std::vector< std::vector< int64_t >> ModeBuffers
boost::variant< int64_t, double, float, NullableString > ScalarTargetValue