25 #include "ThirdParty/robin_hood/robin_hood.h"
43 const bool output_columnar,
46 const int64_t num_rows,
47 const std::vector<std::vector<const int8_t*>>& col_buffers,
48 const std::vector<std::vector<uint64_t>>& frag_offsets,
51 std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
53 const size_t thread_idx,
61 const int64_t num_rows,
62 const std::vector<std::vector<const int8_t*>>& col_buffers,
63 const std::vector<std::vector<uint64_t>>& frag_offsets,
64 std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
115 const unsigned block_size_x,
116 const unsigned grid_size_x,
117 const bool zero_initialize_buffers);
121 const size_t entry_count,
124 const unsigned block_size_x,
125 const unsigned grid_size_x);
129 const size_t entry_count,
132 const unsigned block_size_x,
133 const unsigned grid_size_x,
135 const bool prepend_index_buffer)
const;
142 const bool output_columnar,
146 int64_t* groups_buffer,
147 const std::vector<int64_t>& init_vals,
148 const int32_t groups_buffer_entry_count,
149 const size_t warp_size,
153 int64_t* groups_buffer,
154 const std::vector<int64_t>& init_vals,
161 const std::vector<int64_t>& init_vals,
162 const std::vector<int64_t>& bitmap_sizes,
164 const std::vector<QuantileParam>& quantile_params);
186 const int8_t* init_agg_vals_dev_ptr,
189 const unsigned block_size_x,
190 const unsigned grid_size_x);
195 const int8_t* init_agg_vals_dev_ptr,
198 const unsigned block_size_x,
199 const unsigned grid_size_x,
200 const int8_t warp_size,
201 const bool can_sort_on_gpu,
202 const bool output_columnar,
210 const size_t projection_count);
214 const size_t projection_count,
215 const int device_id);
224 const unsigned total_thread_count,
225 const int device_id);
GpuGroupByBuffers setupTableFunctionGpuBuffers(const QueryMemoryDescriptor &query_mem_desc, const int device_id, const unsigned block_size_x, const unsigned grid_size_x, const bool zero_initialize_buffers)
ModeIndexSet allocateModes(const QueryMemoryDescriptor &query_mem_desc, const bool deferred, const Executor *executor)
GpuGroupByBuffers prepareTopNHeapsDevBuffer(const QueryMemoryDescriptor &query_mem_desc, const int8_t *init_agg_vals_dev_ptr, const size_t n, const int device_id, const unsigned block_size_x, const unsigned grid_size_x)
void resetResultSet(const size_t index)
size_t count_distinct_bitmap_mem_bytes_
void initColumnsPerRow(const QueryMemoryDescriptor &query_mem_desc, int8_t *row_ptr, const std::vector< int64_t > &init_vals, const std::vector< int64_t > &bitmap_sizes, const ModeIndexSet &mode_index_set, const std::vector< QuantileParam > &quantile_params)
GpuGroupByBuffers createAndInitializeGroupByBufferGpu(const RelAlgExecutionUnit &ra_exe_unit, const QueryMemoryDescriptor &query_mem_desc, const int8_t *init_agg_vals_dev_ptr, const int device_id, const ExecutorDispatchMode dispatch_mode, const unsigned block_size_x, const unsigned grid_size_x, const int8_t warp_size, const bool can_sort_on_gpu, const bool output_columnar, RenderAllocator *render_allocator)
DeviceAllocator * device_allocator_
void sort_on_gpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, const bool desc, const uint32_t chosen_bytes, ThrustAllocator &alloc, const int device_id)
QueryMemoryInitializer(const RelAlgExecutionUnit &ra_exe_unit, const QueryMemoryDescriptor &query_mem_desc, const int device_id, const ExecutorDeviceType device_type, const ExecutorDispatchMode dispatch_mode, const bool output_columnar, const bool sort_on_gpu, const shared::TableKey &outer_table_key, const int64_t num_rows, const std::vector< std::vector< const int8_t * >> &col_buffers, const std::vector< std::vector< uint64_t >> &frag_offsets, RenderAllocatorMap *render_allocator_map, RenderInfo *render_info, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, DeviceAllocator *gpu_allocator, const size_t thread_idx, const Executor *executor)
std::vector< int64_t > allocateCountDistinctBuffers(const QueryMemoryDescriptor &query_mem_desc, const bool deferred, const Executor *executor)
unsigned long long CUdeviceptr
const auto getGroupByBuffersPtr()
std::optional< double > QuantileParam
CUdeviceptr count_distinct_bitmap_mem_
size_t computeNumberOfBuffers(const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type, const Executor *executor) const
std::vector< QuantileParam > allocateTDigests(const QueryMemoryDescriptor &query_mem_desc, const bool deferred, const Executor *executor)
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
void compactProjectionBuffersGpu(const QueryMemoryDescriptor &query_mem_desc, Data_Namespace::DataMgr *data_mgr, const GpuGroupByBuffers &gpu_group_by_buffers, const size_t projection_count, const int device_id)
std::vector< int64_t > init_agg_vals_
std::unique_ptr< ResultSet > getResultSetOwned(const size_t index)
void applyStreamingTopNOffsetCpu(const QueryMemoryDescriptor &query_mem_desc, const RelAlgExecutionUnit &ra_exe_unit)
int64_t getAggInitValForIndex(const size_t index) const
const auto getCountDistinctBitmapPtr() const
int8_t * count_distinct_bitmap_crt_ptr_
void initGroupByBuffer(int64_t *buffer, const RelAlgExecutionUnit &ra_exe_unit, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type, const bool output_columnar, const Executor *executor)
int8_t * varlen_output_buffer_host_ptr_
int64_t allocateCountDistinctSet()
void compactProjectionBuffersCpu(const QueryMemoryDescriptor &query_mem_desc, const size_t projection_count)
std::vector< int64_t * > group_by_buffers_
void initColumnarGroups(const QueryMemoryDescriptor &query_mem_desc, int64_t *groups_buffer, const std::vector< int64_t > &init_vals, const Executor *executor)
void copyGroupByBuffersFromGpu(DeviceAllocator &device_allocator, const QueryMemoryDescriptor &query_mem_desc, const size_t entry_count, const GpuGroupByBuffers &gpu_group_by_buffers, const RelAlgExecutionUnit *ra_exe_unit, const unsigned block_size_x, const unsigned grid_size_x, const int device_id, const bool prepend_index_buffer) const
std::shared_ptr< VarlenOutputInfo > getVarlenOutputInfo()
const auto getNumBuffers() const
robin_hood::unordered_set< size_t > ModeIndexSet
Abstract class for managing device memory allocations.
const auto getCountDistinctBitmapBytes() const
Descriptor for the result set buffer layout.
const auto getVarlenOutputPtr() const
std::vector< Data_Namespace::AbstractBuffer * > temporary_buffers_
void copyFromTableFunctionGpuBuffers(Data_Namespace::DataMgr *data_mgr, const QueryMemoryDescriptor &query_mem_desc, const size_t entry_count, const GpuGroupByBuffers &gpu_group_by_buffers, const int device_id, const unsigned block_size_x, const unsigned grid_size_x)
void applyStreamingTopNOffsetGpu(Data_Namespace::DataMgr *data_mgr, const QueryMemoryDescriptor &query_mem_desc, const GpuGroupByBuffers &gpu_group_by_buffers, const RelAlgExecutionUnit &ra_exe_unit, const unsigned total_thread_count, const int device_id)
const auto getGroupByBuffersSize() const
Basic constructors and methods of the row set interface.
std::shared_ptr< VarlenOutputInfo > varlen_output_info_
int8_t * count_distinct_bitmap_host_mem_
CUdeviceptr varlen_output_buffer_
ResultSet * getResultSet(const size_t index) const
std::vector< std::unique_ptr< ResultSet > > result_sets_
void allocateCountDistinctGpuMem(const QueryMemoryDescriptor &query_mem_desc)
const auto getVarlenOutputHostPtr() const
int64_t allocateCountDistinctBitmap(const size_t bitmap_byte_sz)
const auto getCountDistinctHostPtr() const
void initRowGroups(const QueryMemoryDescriptor &query_mem_desc, int64_t *groups_buffer, const std::vector< int64_t > &init_vals, const int32_t groups_buffer_entry_count, const size_t warp_size, const Executor *executor)