25 #ifndef QUERYENGINE_RESULTSET_H
26 #define QUERYENGINE_RESULTSET_H
98 using pointer = std::vector<TargetValue>*;
118 throw std::runtime_error(
"current row buffer iteration index is undefined");
133 bool translate_strings,
148 class TSerializedRows;
158 ResultSet(
const std::vector<TargetInfo>& targets,
161 const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
163 const unsigned block_size,
164 const unsigned grid_size);
166 ResultSet(
const std::vector<TargetInfo>& targets,
167 const std::vector<ColumnLazyFetchInfo>& lazy_fetch_info,
168 const std::vector<std::vector<const int8_t*>>& col_buffers,
169 const std::vector<std::vector<int64_t>>& frag_offsets,
170 const std::vector<int64_t>& consistent_frag_sizes,
174 const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
176 const unsigned block_size,
177 const unsigned grid_size);
179 ResultSet(
const std::shared_ptr<const Analyzer::Estimator>,
184 ResultSet(
const std::string& explanation);
187 int64_t render_time_ms,
188 const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner);
193 bool translate_strings,
200 for (
size_t index = 0; index < from_logical_index; index++) {
209 return rowIterator(0, translate_strings, decimal_to_double);
224 storage_->updateEntryCount(new_entry_count);
227 std::vector<TargetValue>
getNextRow(
const bool translate_strings,
232 std::vector<TargetValue>
getRowAt(
const size_t index)
const;
235 const size_t col_idx,
236 const bool translate_strings,
246 const std::vector<bool>& targets_to_skip = {})
const;
250 void sort(
const std::list<Analyzer::OrderEntry>& order_entries,
252 const Executor* executor);
266 size_t rowCount(
const bool force_parallel =
false)
const;
324 if (
storage_->query_mem_desc_.didOutputColumnar()) {
325 storage_->fillOneEntryColWise(entry);
327 storage_->fillOneEntryRowWise(entry);
333 void holdChunks(
const std::list<std::shared_ptr<Chunk_NS::Chunk>>& chunks) {
350 void serialize(TSerializedRows& serialized_rows)
const;
352 static std::unique_ptr<ResultSet>
unserialize(
const TSerializedRows& serialized_rows,
373 int8_t* output_buffer,
374 const size_t output_buffer_size)
const;
407 const int dict_id)
const;
409 template <
typename ENTRY_TYPE, QueryDescriptionType QUERY_TYPE,
bool COLUMNAR_FORMAT>
411 const size_t target_idx,
412 const size_t slot_idx)
const;
419 std::vector<TargetValue>
getNextRowImpl(
const bool translate_strings,
425 std::vector<TargetValue>
getRowAt(
const size_t index,
426 const bool translate_strings,
428 const bool fixup_count_distinct_pointers,
429 const std::vector<bool>& targets_to_skip = {})
const;
432 template <
typename ENTRY_TYPE>
434 const size_t target_idx,
435 const size_t slot_idx)
const;
437 template <
typename ENTRY_TYPE>
439 const size_t target_idx,
440 const size_t slot_idx)
const;
442 template <
typename ENTRY_TYPE>
444 const size_t target_idx,
445 const size_t slot_idx)
const;
447 template <
typename ENTRY_TYPE>
449 const size_t target_idx,
450 const size_t slot_idx)
const;
458 void radixSortOnGpu(
const std::list<Analyzer::OrderEntry>& order_entries)
const;
460 void radixSortOnCpu(
const std::list<Analyzer::OrderEntry>& order_entries)
const;
464 const bool float_argument_input);
467 int8_t* rowwise_target_ptr,
469 const size_t entry_buff_idx,
471 const size_t target_logical_idx,
472 const size_t slot_idx,
473 const bool translate_strings,
474 const bool decimal_to_double,
475 const bool fixup_count_distinct_pointers)
const;
478 const int8_t* keys_ptr,
480 const size_t local_entry_idx,
481 const size_t global_entry_idx,
483 const size_t target_logical_idx,
484 const size_t slot_idx,
485 const bool translate_strings,
486 const bool decimal_to_double)
const;
489 const int8_t compact_sz,
491 const size_t target_logical_idx,
492 const bool translate_strings,
493 const bool decimal_to_double,
494 const size_t entry_buff_idx)
const;
497 const int8_t compact_sz1,
499 const int8_t compact_sz2,
501 const size_t target_logical_idx,
502 const bool translate_strings,
503 const size_t entry_buff_idx)
const;
515 const size_t slot_idx,
517 const size_t target_logical_idx,
518 const size_t entry_buff_idx)
const;
528 const size_t entry_idx,
529 const size_t target_logical_idx,
533 const size_t str_len)
const;
536 const size_t target_logical_idx,
542 std::pair<size_t, size_t>
getStorageIndex(
const size_t entry_idx)
const;
544 const std::vector<const int8_t*>&
getColumnFrag(
const size_t storge_idx,
545 const size_t col_logical_idx,
546 int64_t& global_idx)
const;
569 const size_t entry_idx,
570 const size_t target_logical_idx,
576 const size_t entry_idx)
const {
599 const size_t entry_idx,
600 const size_t target_logical_idx,
610 template <
typename BUFFER_ITERATOR_TYPE>
617 const Executor* executor)
635 bool operator()(
const uint32_t lhs,
const uint32_t rhs)
const;
649 const std::list<Analyzer::OrderEntry>& order_entries,
651 const Executor* executor) {
655 std::make_unique<ResultSetComparator<ColumnWiseTargetAccessor>>(
656 order_entries, use_heap,
this, executor);
657 return [
this](
const uint32_t lhs,
const uint32_t rhs) ->
bool {
662 order_entries, use_heap,
this, executor);
663 return [
this](
const uint32_t lhs,
const uint32_t rhs) ->
bool {
670 std::vector<uint32_t>& to_sort,
672 const std::function<
bool(
const uint32_t,
const uint32_t)> compare);
674 void sortPermutation(
const std::function<
bool(
const uint32_t,
const uint32_t)> compare);
678 void parallelTop(
const std::list<Analyzer::OrderEntry>& order_entries,
680 const Executor* executor);
682 void baselineSort(
const std::list<Analyzer::OrderEntry>& order_entries,
684 const Executor* executor);
687 const std::list<Analyzer::OrderEntry>& order_entries,
689 const Executor* executor);
700 std::vector<std::string>& varlen_bufer)
const;
718 mutable std::unique_ptr<ResultSetStorage>
storage_;
732 std::list<std::shared_ptr<Chunk_NS::Chunk>>
chunks_;
798 ResultSet*
reduce(std::vector<ResultSet*>&);
805 std::shared_ptr<ResultSet>
rs_;
813 namespace result_set {
821 #endif // QUERYENGINE_RESULTSET_H
void setSeparateVarlenStorageValid(const bool val)
const std::list< Analyzer::OrderEntry > order_entries_
void setGeoReturnType(const GeoReturnType val)
void serializeVarlenAggColumn(int8_t *buf, std::vector< std::string > &varlen_bufer) const
std::mutex row_iteration_mutex_
InternalTargetValue getColumnInternal(const int8_t *buff, const size_t entry_idx, const size_t target_logical_idx, const StorageLookupResult &storage_lookup_result) const
void syncEstimatorBuffer() const
void holdChunks(const std::list< std::shared_ptr< Chunk_NS::Chunk >> &chunks)
const QueryMemoryDescriptor & getQueryMemDesc() const
int64_t executor_queue_time
std::unique_ptr< ResultSetComparator< ColumnWiseTargetAccessor > > column_wise_comparator_
std::pair< size_t, size_t > getStorageIndex(const size_t entry_idx) const
std::shared_ptr< RowSetMemoryOwner > getRowSetMemOwner() const
bool isValidationOnlyRes() const
bool didOutputColumnar() const
void setValidationOnlyRes()
bool for_validation_only_
std::ptrdiff_t difference_type
ENTRY_TYPE getRowWisePerfectHashEntryAt(const size_t row_idx, const size_t target_idx, const size_t slot_idx) const
class for a per-database catalog. also includes metadata for the current database and the current use...
void setEntryCount(const size_t val)
double decimal_to_double(const SQLTypeInfo &otype, int64_t oval)
void sort(const std::list< Analyzer::OrderEntry > &order_entries, const size_t top_n, const Executor *executor)
AppendedStorage appended_storage_
ENTRY_TYPE getColumnarPerfectHashEntryAt(const size_t row_idx, const size_t target_idx, const size_t slot_idx) const
GeoReturnType geo_return_type_
const BufferIteratorType buffer_itr_
Utility functions for easy access to the result set buffers.
std::shared_ptr< ResultSet > rs_
const Executor * executor_
std::vector< std::string > SerializedVarlenBufferStorage
void initializeStorage() const
QueryDescriptionType getQueryDescriptionType() const
ResultSetRowIterator(const ResultSet *rs, bool translate_strings, bool decimal_to_double)
const Catalog_Namespace::Catalog * catalog_
void unserializeCountDistinctColumns(const TSerializedRows &)
std::vector< TargetValue > getNextRow(const bool translate_strings, const bool decimal_to_double) const
static bool isNull(const SQLTypeInfo &ti, const InternalTargetValue &val, const bool float_argument_input)
ResultSet(const std::vector< TargetInfo > &targets, const ExecutorDeviceType device_type, const QueryMemoryDescriptor &query_mem_desc, const std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const Catalog_Namespace::Catalog *catalog, const unsigned block_size, const unsigned grid_size)
QueryMemoryDescriptor query_mem_desc_
const std::vector< TargetInfo > & getTargetInfos() const
std::unique_ptr< ResultSetStorage > storage_
void setKernelQueueTime(const int64_t kernel_queue_time)
bool operator==(const ResultSetRowIterator &other) const
ENTRY_TYPE getEntryAt(const size_t row_idx, const size_t target_idx, const size_t slot_idx) const
size_t rowCount(const bool force_parallel=false) const
ResultSetRowIterator(const ResultSet *rs)
TargetValue makeGeoTargetValue(const int8_t *geo_target_ptr, const size_t slot_idx, const TargetInfo &target_info, const size_t target_logical_idx, const size_t entry_buff_idx) const
TargetValue getTargetValueFromBufferRowwise(int8_t *rowwise_target_ptr, int8_t *keys_ptr, const size_t entry_buff_idx, const TargetInfo &target_info, const size_t target_logical_idx, const size_t slot_idx, const bool translate_strings, const bool decimal_to_double, const bool fixup_count_distinct_pointers) const
void keepFirstN(const size_t n)
std::vector< std::shared_ptr< std::list< ChunkIter > > > chunk_iters_
const ResultSet * result_set_
void addCompilationQueueTime(const int64_t compilation_queue_time)
void parallelTop(const std::list< Analyzer::OrderEntry > &order_entries, const size_t top_n, const Executor *executor)
void serialize(TSerializedRows &serialized_rows) const
std::vector< SerializedVarlenBufferStorage > serialized_varlen_buffer_
int64_t lazyReadInt(const int64_t ival, const size_t target_logical_idx, const StorageLookupResult &storage_lookup_result) const
bool operator!=(const ResultSetRowIterator &other) const
OneIntegerColumnRow getOneColRow(const size_t index) const
TargetValue getTargetValueFromBufferColwise(const int8_t *col_ptr, const int8_t *keys_ptr, const QueryMemoryDescriptor &query_mem_desc, const size_t local_entry_idx, const size_t global_entry_idx, const TargetInfo &target_info, const size_t target_logical_idx, const size_t slot_idx, const bool translate_strings, const bool decimal_to_double) const
ResultSetComparator(const std::list< Analyzer::OrderEntry > &order_entries, const bool use_heap, const ResultSet *result_set, const Executor *executor)
void rewriteVarlenAggregates(ResultSet *)
std::vector< int64_t > materializeCountDistinctColumn(const Analyzer::OrderEntry &order_entry) const
ResultSetRowIterator rowIterator(size_t from_logical_index, bool translate_strings, bool decimal_to_double) const
std::vector< uint32_t > permutation_
std::atomic< int64_t > cached_row_count_
const bool isPermutationBufferEmpty() const
size_t parallelRowCount() const
const size_t key_bytes_with_padding_
const ResultSet * result_set_
std::vector< uint32_t > initPermutationBuffer(const size_t start, const size_t step)
std::vector< TargetValue > getRowAtNoTranslations(const size_t index, const std::vector< bool > &targets_to_skip={}) const
const ResultSet * result_set_
void radixSortOnCpu(const std::list< Analyzer::OrderEntry > &order_entries) const
InternalTargetValue getColumnInternal(const int8_t *buff, const size_t entry_idx, const size_t target_logical_idx, const StorageLookupResult &storage_lookup_result) const
std::unique_ptr< ResultSetComparator< RowWiseTargetAccessor > > row_wise_comparator_
bool definitelyHasNoRows() const
ColumnWiseTargetAccessor(const ResultSet *result_set)
bool use_parallel_algorithms(const ResultSet &rows)
bool isZeroCopyColumnarConversionPossible(size_t column_idx) const
std::input_iterator_tag iterator_category
int8_t * getHostEstimatorBuffer() const
InternalTargetValue getVarlenOrderEntry(const int64_t str_ptr, const size_t str_len) const
const std::vector< TargetInfo > targets_
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
const ResultSetStorage * allocateStorage() const
std::shared_ptr< const std::vector< std::string > > getStringDictionaryPayloadCopy(const int dict_id) const
std::list< std::shared_ptr< Chunk_NS::Chunk > > chunks_
ApproxMedianBuffers materializeApproxMedianColumns() const
QueryExecutionTimings timings_
const ResultSet * result_set_
void setQueueTime(const int64_t queue_time)
void dropFirstN(const size_t n)
std::vector< std::vector< int8_t > > literal_buffers_
std::vector< std::vector< TargetOffsets > > offsets_for_storage_
bool canUseFastBaselineSort(const std::list< Analyzer::OrderEntry > &order_entries, const size_t top_n)
int64_t kernel_queue_time
int64_t compilation_queue_time
std::vector< TargetValue > & reference
ResultSet * reduce(std::vector< ResultSet * > &)
std::tuple< std::vector< bool >, size_t > getSingleSlotTargetBitmap() const
ResultSetRowIterator & operator++(void)
std::shared_ptr< ResultSet > getOwnResultSet()
StorageLookupResult findStorage(const size_t entry_idx) const
An AbstractBuffer is a unit of data management for a data manager.
static void topPermutation(std::vector< uint32_t > &to_sort, const size_t n, const std::function< bool(const uint32_t, const uint32_t)> compare)
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
value_type operator*() const
const std::vector< ColumnLazyFetchInfo > lazy_fetch_info_
RowWiseTargetAccessor(const ResultSet *result_set)
std::vector< std::vector< double >> ApproxMedianBuffers
void copyColumnIntoBuffer(const size_t column_idx, int8_t *output_buffer, const size_t output_buffer_size) const
RowSortException(const std::string &cause)
void fixupCountDistinctPointers()
void radixSortOnGpu(const std::list< Analyzer::OrderEntry > &order_entries) const
const ResultSetStorage * getStorage() const
QueryDescriptionType getQueryDescriptionType() const
Data_Namespace::DataMgr * data_mgr_
static double calculateQuantile(quantile::TDigest *const t_digest, double const q)
Basic constructors and methods of the row set interface.
int64_t getQueueTime() const
std::vector< TargetValue > getRowAt(const size_t index) const
void fillOneEntry(const std::vector< int64_t > &entry)
void updateStorageEntryCount(const size_t new_entry_count)
void serializeProjection(TSerializedRows &serialized_rows) const
ResultSetRowIterator operator++(int)
const std::shared_ptr< const Analyzer::Estimator > estimator_
SQLTypeInfo getColType(const size_t col_idx) const
GeoReturnType getGeoReturnType() const
ApproxMedianBuffers::value_type materializeApproxMedianColumn(const Analyzer::OrderEntry &order_entry) const
void holdChunkIterators(const std::shared_ptr< std::list< ChunkIter >> chunk_iters)
std::tuple< std::vector< bool >, size_t > getSupportedSingleSlotTargetBitmap() const
ExecutorDeviceType getDeviceType() const
const int8_t * getColumnarBuffer(size_t column_idx) const
size_t get_row_bytes(const QueryMemoryDescriptor &query_mem_desc)
std::vector< TargetValue > value_type
void sortPermutation(const std::function< bool(const uint32_t, const uint32_t)> compare)
bool isGeoColOnGpu(const size_t col_idx) const
void initializeOffsetsForStorage()
const int8_t * get_rowwise_ptr(const int8_t *buff, const size_t entry_idx) const
size_t getNDVEstimator() const
std::vector< std::vector< std::vector< const int8_t * > > > col_buffers_
bool isRowAtEmpty(const size_t index) const
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
size_t entryCount() const
static QueryMemoryDescriptor fixupQueryMemoryDescriptor(const QueryMemoryDescriptor &)
TargetValue makeTargetValue(const int8_t *ptr, const int8_t compact_sz, const TargetInfo &target_info, const size_t target_logical_idx, const bool translate_strings, const bool decimal_to_double, const size_t entry_buff_idx) const
void baselineSort(const std::list< Analyzer::OrderEntry > &order_entries, const size_t top_n, const Executor *executor)
size_t getCurrentRowBufferIndex() const
void append(ResultSet &that)
std::vector< std::vector< int64_t > > consistent_frag_sizes_
bool operator()(const uint32_t lhs, const uint32_t rhs) const
int8_t * host_estimator_buffer_
bool didOutputColumnar() const
const ExecutorDeviceType device_type_
std::vector< TargetValue > getNextRowImpl(const bool translate_strings, const bool decimal_to_double) const
size_t getCurrentRowBufferIndex() const
void holdLiterals(std::vector< int8_t > &literal_buff)
bool g_enable_watchdog false
#define DEBUG_TIMER(name)
size_t getBufferSizeBytes(const ExecutorDeviceType device_type) const
int8_t * getDeviceEstimatorBuffer() const
Estimators to be used when precise cardinality isn't useful.
int64_t getDistinctBufferRefFromBufferRowwise(int8_t *rowwise_target_ptr, const TargetInfo &target_info) const
std::vector< std::vector< std::vector< int64_t > > > frag_offsets_
void doBaselineSort(const ExecutorDeviceType device_type, const std::list< Analyzer::OrderEntry > &order_entries, const size_t top_n, const Executor *executor)
void materializeCountDistinctColumns()
bool separate_varlen_storage_valid_
boost::variant< ScalarTargetValue, ArrayTargetValue, GeoTargetValue, GeoTargetValuePtr > TargetValue
Executor(const ExecutorId id, const size_t block_size_x, const size_t grid_size_x, const size_t max_gpu_slab_size, const std::string &debug_dir, const std::string &debug_file)
std::vector< TargetValue > getNextRowUnlocked(const bool translate_strings, const bool decimal_to_double) const
std::vector< TargetValue > * pointer
const std::vector< int64_t > & getTargetInitVals() const
std::vector< size_t > getSlotIndicesForTargetIndices() const
const std::vector< uint32_t > & getPermutationBuffer() const
size_t advanceCursorToNextEntry() const
void create_active_buffer_set(BufferSet &count_distinct_active_buffer_set) const
std::set< int64_t > BufferSet
ResultSetRowIterator rowIterator(bool translate_strings, bool decimal_to_double) const
BUFFER_ITERATOR_TYPE BufferIteratorType
ENTRY_TYPE getColumnarBaselineEntryAt(const size_t row_idx, const size_t target_idx, const size_t slot_idx) const
Data_Namespace::DataMgr * getDataManager() const
std::vector< std::vector< int64_t > > count_distinct_materialized_buffers_
Data_Namespace::AbstractBuffer * device_estimator_buffer_
InternalTargetValue getColumnInternal(const int8_t *buff, const size_t entry_idx, const size_t target_logical_idx, const StorageLookupResult &storage_lookup_result) const
const ApproxMedianBuffers approx_median_materialized_buffers_
std::vector< std::vector< TargetOffsets > > offsets_for_storage_
void initializeOffsetsForStorage()
bool global_entry_idx_valid_
bool can_use_parallel_algorithms(const ResultSet &rows)
int64_t getRenderTime() const
void setCachedRowCount(const size_t row_count) const
bool isDirectColumnarConversionPossible() const
size_t get_key_bytes_rowwise(const QueryMemoryDescriptor &query_mem_desc)
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)
TargetValue makeVarlenTargetValue(const int8_t *ptr1, const int8_t compact_sz1, const int8_t *ptr2, const int8_t compact_sz2, const TargetInfo &target_info, const size_t target_logical_idx, const bool translate_strings, const size_t entry_buff_idx) const
void serializeCountDistinctColumns(TSerializedRows &) const
const ResultSetStorage * storage_ptr
std::vector< std::unique_ptr< ResultSetStorage >> AppendedStorage
const std::vector< const int8_t * > & getColumnFrag(const size_t storge_idx, const size_t col_logical_idx, int64_t &global_idx) const
size_t binSearchRowCount() const
const std::vector< ColumnLazyFetchInfo > & getLazyFetchInfo() const
std::function< bool(const uint32_t, const uint32_t)> createComparator(const std::list< Analyzer::OrderEntry > &order_entries, const bool use_heap, const Executor *executor)
ENTRY_TYPE getRowWiseBaselineEntryAt(const size_t row_idx, const size_t target_idx, const size_t slot_idx) const
static std::unique_ptr< ResultSet > unserialize(const TSerializedRows &serialized_rows, const Executor *)
const size_t fixedup_entry_idx