43 const bool output_columnar,
45 const int64_t num_rows,
46 const std::vector<std::vector<const int8_t*>>& col_buffers,
47 const std::vector<std::vector<uint64_t>>& frag_offsets,
50 std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
59 const int64_t num_rows,
60 const std::vector<std::vector<const int8_t*>>& col_buffers,
61 const std::vector<std::vector<uint64_t>>& frag_offsets,
62 std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
109 const unsigned block_size_x,
110 const unsigned grid_size_x);
113 const size_t entry_count,
116 const unsigned block_size_x,
117 const unsigned grid_size_x);
122 const size_t entry_count,
125 const unsigned block_size_x,
126 const unsigned grid_size_x,
128 const bool prepend_index_buffer)
const;
135 const bool output_columnar,
139 int64_t* groups_buffer,
140 const std::vector<int64_t>& init_vals,
141 const int32_t groups_buffer_entry_count,
142 const size_t warp_size,
146 int64_t* groups_buffer,
147 const std::vector<int64_t>& init_vals,
153 const std::vector<int64_t>& init_vals,
154 const std::vector<int64_t>& bitmap_sizes,
155 const std::vector<bool>& tdigest_deferred);
177 const unsigned block_size_x,
178 const unsigned grid_size_x);
186 const unsigned block_size_x,
187 const unsigned grid_size_x,
188 const int8_t warp_size,
189 const bool can_sort_on_gpu,
190 const bool output_columnar,
199 const size_t projection_count);
203 const size_t projection_count,
204 const int device_id);
213 const unsigned total_thread_count,
214 const int device_id);
void initGroups(const QueryMemoryDescriptor &query_mem_desc, int64_t *groups_buffer, const std::vector< int64_t > &init_vals, const int32_t groups_buffer_entry_count, const size_t warp_size, const Executor *executor)
void resetResultSet(const size_t index)
size_t count_distinct_bitmap_mem_bytes_
DeviceAllocator * device_allocator_
const size_t num_buffers_
std::vector< int64_t > allocateCountDistinctBuffers(const QueryMemoryDescriptor &query_mem_desc, const bool deferred, const Executor *executor)
unsigned long long CUdeviceptr
std::vector< bool > allocateTDigests(const QueryMemoryDescriptor &query_mem_desc, const bool deferred, const Executor *executor)
const auto getGroupByBuffersPtr()
CUdeviceptr count_distinct_bitmap_mem_
size_t computeNumberOfBuffers(const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type, const Executor *executor) const
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
void compactProjectionBuffersGpu(const QueryMemoryDescriptor &query_mem_desc, Data_Namespace::DataMgr *data_mgr, const GpuGroupByBuffers &gpu_group_by_buffers, const size_t projection_count, const int device_id)
std::vector< int64_t > init_agg_vals_
std::unique_ptr< ResultSet > getResultSetOwned(const size_t index)
void applyStreamingTopNOffsetCpu(const QueryMemoryDescriptor &query_mem_desc, const RelAlgExecutionUnit &ra_exe_unit)
int64_t getAggInitValForIndex(const size_t index) const
const auto getCountDistinctBitmapPtr() const
int8_t * count_distinct_bitmap_crt_ptr_
void initGroupByBuffer(int64_t *buffer, const RelAlgExecutionUnit &ra_exe_unit, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type, const bool output_columnar, const Executor *executor)
int64_t allocateCountDistinctSet()
void compactProjectionBuffersCpu(const QueryMemoryDescriptor &query_mem_desc, const size_t projection_count)
std::vector< int64_t * > group_by_buffers_
void initColumnarGroups(const QueryMemoryDescriptor &query_mem_desc, int64_t *groups_buffer, const std::vector< int64_t > &init_vals, const Executor *executor)
const auto getNumBuffers() const
Abstract class for managing device memory allocations.
const auto getCountDistinctBitmapBytes() const
Descriptor for the result set buffer layout.
void copyGroupByBuffersFromGpu(Data_Namespace::DataMgr *data_mgr, const QueryMemoryDescriptor &query_mem_desc, const size_t entry_count, const GpuGroupByBuffers &gpu_group_by_buffers, const RelAlgExecutionUnit *ra_exe_unit, const unsigned block_size_x, const unsigned grid_size_x, const int device_id, const bool prepend_index_buffer) const
void initColumnPerRow(const QueryMemoryDescriptor &query_mem_desc, int8_t *row_ptr, const size_t bin, const std::vector< int64_t > &init_vals, const std::vector< int64_t > &bitmap_sizes, const std::vector< bool > &tdigest_deferred)
std::vector< Data_Namespace::AbstractBuffer * > temporary_buffers_
QueryMemoryInitializer(const RelAlgExecutionUnit &ra_exe_unit, const QueryMemoryDescriptor &query_mem_desc, const int device_id, const ExecutorDeviceType device_type, const ExecutorDispatchMode dispatch_mode, const bool output_columnar, const bool sort_on_gpu, const int64_t num_rows, const std::vector< std::vector< const int8_t * >> &col_buffers, const std::vector< std::vector< uint64_t >> &frag_offsets, RenderAllocatorMap *render_allocator_map, RenderInfo *render_info, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, DeviceAllocator *gpu_allocator, const Executor *executor)
void applyStreamingTopNOffsetGpu(Data_Namespace::DataMgr *data_mgr, const QueryMemoryDescriptor &query_mem_desc, const GpuGroupByBuffers &gpu_group_by_buffers, const RelAlgExecutionUnit &ra_exe_unit, const unsigned total_thread_count, const int device_id)
const auto getGroupByBuffersSize() const
Basic constructors and methods of the row set interface.
void sort_on_gpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, const bool desc, const uint32_t chosen_bytes, ThrustAllocator &alloc)
int8_t * count_distinct_bitmap_host_mem_
ResultSet * getResultSet(const size_t index) const
std::vector< std::unique_ptr< ResultSet > > result_sets_
void allocateCountDistinctGpuMem(const QueryMemoryDescriptor &query_mem_desc)
int64_t allocateCountDistinctBitmap(const size_t bitmap_byte_sz)
const auto getCountDistinctHostPtr() const